From eacbf1cd3aff3dbf47a71dc7fdb1d01dce8e777e Mon Sep 17 00:00:00 2001 From: bstarynk Date: Mon, 9 Sep 2013 15:57:43 +0000 Subject: 2013-09-09 Basile Starynkevitch MELT branch merged with trunk rev 202389 using svnmerge.py; notice that gcc/melt/xtramelt-ana-base.melt has been significantly updated, but some updates are yet missing... [gcc/] 2013-09-09 Basile Starynkevitch {{When merging trunk GCC 4.9 with C++ passes}} * melt/xtramelt-ana-base.melt: Add GCC 4.9 specific code, still incomplete, for classy passes.... Only Gimple passes are yet possible... git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/branches/melt-branch@202408 138bc75d-0d04-0410-961f-82ee72b054a4 --- ChangeLog | 60 +- ChangeLog.MELT | 6 + MAINTAINERS | 12 +- Makefile.def | 10 +- Makefile.in | 1026 +- boehm-gc/ChangeLog | 39 +- boehm-gc/Makefile.am | 2 +- boehm-gc/Makefile.in | 2 +- config/ChangeLog | 4 + config/bootstrap-ubsan.mk | 7 + configure | 89 +- configure.ac | 40 +- contrib/ChangeLog | 12 + contrib/gcc_update | 5 +- .../x86_64-unknown-linux-gnu.xfail | 201 +- fixincludes/ChangeLog | 6 + fixincludes/fixincl.x | 55 +- fixincludes/inclhack.def | 14 + fixincludes/tests/base/assert.h | 7 + gcc/ChangeLog | 5813 +++++- gcc/ChangeLog.MELT | 5 + gcc/DATESTAMP | 2 +- gcc/Makefile.in | 93 +- gcc/aclocal.m4 | 10 +- gcc/ada/ChangeLog | 76 +- gcc/ada/gcc-interface/Make-lang.in | 10 + gcc/ada/gcc-interface/Makefile.in | 23 +- gcc/ada/gcc-interface/decl.c | 22 +- gcc/ada/gcc-interface/misc.c | 4 +- gcc/ada/gcc-interface/trans.c | 87 +- gcc/ada/gcc-interface/utils.c | 2 +- gcc/ada/gcc-interface/utils2.c | 32 +- gcc/ada/sigtramp-ppcvxw.c | 2 +- gcc/ada/terminals.c | 2 +- gcc/asan.c | 183 +- gcc/auto-inc-dec.c | 52 +- gcc/basic-block.h | 21 +- gcc/bb-reorder.c | 292 +- gcc/bt-load.c | 108 +- gcc/builtin-attrs.def | 5 + gcc/builtins.c | 60 +- gcc/builtins.def | 152 +- gcc/c-family/ChangeLog | 263 + gcc/c-family/c-ada-spec.c | 43 +- gcc/c-family/c-common.c | 80 +- gcc/c-family/c-common.h | 5 +- gcc/c-family/c-pretty-print.c | 706 +- gcc/c-family/c-pretty-print.h | 158 +- gcc/c-family/c-ubsan.c | 158 + gcc/c-family/c-ubsan.h | 27 + gcc/c/ChangeLog | 44 + gcc/c/c-array-notation.c | 14 +- gcc/c/c-objc-common.c | 23 +- gcc/c/c-typeck.c | 52 +- gcc/cfg.c | 15 + gcc/cfganal.c | 114 + gcc/cfgcleanup.c | 119 +- gcc/cfgexpand.c | 63 +- gcc/cfgloop.h | 14 + gcc/cfgrtl.c | 309 +- gcc/cgraph.c | 648 +- gcc/cgraph.h | 77 +- gcc/cgraphbuild.c | 173 +- gcc/cgraphclones.c | 55 +- gcc/cgraphunit.c | 302 +- gcc/combine-stack-adj.c | 52 +- gcc/combine.c | 71 +- gcc/common.opt | 47 +- gcc/common/config/i386/i386-common.c | 69 +- gcc/compare-elim.c | 55 +- gcc/config.gcc | 82 +- gcc/config.in | 12 + gcc/config/aarch64/aarch64-option-extensions.def | 1 + gcc/config/aarch64/aarch64-simd-builtins.def | 8 +- gcc/config/aarch64/aarch64-simd.md | 123 +- gcc/config/aarch64/aarch64.c | 51 + gcc/config/aarch64/aarch64.h | 2 + gcc/config/aarch64/aarch64.md | 524 +- gcc/config/aarch64/arm_neon.h | 14526 +++++++-------- gcc/config/aarch64/iterators.md | 31 +- gcc/config/aarch64/predicates.md | 5 + gcc/config/aarch64/t-aarch64 | 5 + gcc/config/alpha/alpha.c | 1 + gcc/config/alpha/linux.h | 14 +- gcc/config/arm/aarch-common-protos.h | 36 + gcc/config/arm/aarch-common.c | 278 + gcc/config/arm/arm-fixed.md | 31 +- gcc/config/arm/arm-protos.h | 8 - gcc/config/arm/arm.c | 524 +- gcc/config/arm/arm.h | 2 + gcc/config/arm/arm.md | 1181 +- gcc/config/arm/arm1020e.md | 29 +- gcc/config/arm/arm1026ejs.md | 17 +- gcc/config/arm/arm1136jfs.md | 17 +- gcc/config/arm/arm926ejs.md | 15 +- gcc/config/arm/cortex-a15-neon.md | 170 +- gcc/config/arm/cortex-a15.md | 50 +- gcc/config/arm/cortex-a5.md | 26 +- gcc/config/arm/cortex-a53.md | 33 +- gcc/config/arm/cortex-a7.md | 150 +- gcc/config/arm/cortex-a8-neon.md | 128 +- gcc/config/arm/cortex-a8.md | 19 +- gcc/config/arm/cortex-a9-neon.md | 120 +- gcc/config/arm/cortex-a9.md | 30 +- gcc/config/arm/cortex-m4-fpu.md | 8 +- gcc/config/arm/cortex-m4.md | 14 +- gcc/config/arm/cortex-r4.md | 15 +- gcc/config/arm/cortex-r4f.md | 12 +- gcc/config/arm/fa526.md | 15 +- gcc/config/arm/fa606te.md | 14 +- gcc/config/arm/fa626te.md | 15 +- gcc/config/arm/fa726te.md | 13 +- gcc/config/arm/fmp626.md | 10 +- gcc/config/arm/iterators.md | 2 +- gcc/config/arm/iwmmxt.md | 6 +- gcc/config/arm/linux-eabi.h | 4 +- gcc/config/arm/linux-elf.h | 4 +- gcc/config/arm/marvell-pj4.md | 47 +- gcc/config/arm/neon-schedgen.ml | 2 +- gcc/config/arm/neon.md | 542 +- gcc/config/arm/t-arm | 5 + gcc/config/arm/t-linux-eabi | 2 + gcc/config/arm/thumb2.md | 140 +- gcc/config/arm/types.md | 563 + gcc/config/arm/vfp.md | 70 +- gcc/config/arm/vfp11.md | 13 +- gcc/config/avr/avr-stdint.h | 4 +- gcc/config/avr/avr.c | 198 +- gcc/config/avr/avr.opt | 4 + gcc/config/bfin/bfin.c | 1 + gcc/config/bfin/uclinux.h | 3 + gcc/config/c6x/uclinux-elf.h | 2 + gcc/config/darwin-protos.h | 1 + gcc/config/darwin.c | 13 + gcc/config/darwin.h | 7 +- gcc/config/elfos.h | 3 + gcc/config/epiphany/epiphany.c | 35 +- gcc/config/epiphany/epiphany.h | 4 +- gcc/config/epiphany/epiphany.md | 124 +- gcc/config/epiphany/mode-switch-use.c | 51 +- gcc/config/epiphany/predicates.md | 8 +- gcc/config/epiphany/resolve-sw-modes.c | 53 +- gcc/config/freebsd.h | 3 + gcc/config/gnu-user.h | 31 +- gcc/config/i386/constraints.md | 9 +- gcc/config/i386/cpuid.h | 4 + gcc/config/i386/cygming.h | 3 + gcc/config/i386/djgpp.h | 14 + gcc/config/i386/driver-i386.c | 43 +- gcc/config/i386/i386-c.c | 8 + gcc/config/i386/i386-interix.h | 3 + gcc/config/i386/i386-modes.def | 3 + gcc/config/i386/i386-opts.h | 20 +- gcc/config/i386/i386-protos.h | 2 + gcc/config/i386/i386.c | 1285 +- gcc/config/i386/i386.h | 217 +- gcc/config/i386/i386.md | 304 +- gcc/config/i386/i386.opt | 35 + gcc/config/i386/linux-common.h | 2 +- gcc/config/i386/mmx.md | 12 +- gcc/config/i386/predicates.md | 6 + gcc/config/i386/sse.md | 376 +- gcc/config/i386/stringop.def | 37 + gcc/config/i386/stringop.opt | 31 + gcc/config/i386/t-i386 | 2 +- gcc/config/i386/x86-64.h | 3 + gcc/config/i386/x86-tune.def | 232 + gcc/config/ia64/hpux.h | 7 +- gcc/config/linux-android.c | 14 + gcc/config/linux-protos.h | 2 + gcc/config/linux.h | 14 +- gcc/config/lm32/uclinux-elf.h | 2 + gcc/config/m68k/uclinux.h | 3 + gcc/config/microblaze/microblaze.h | 4 + gcc/config/mips/linux-common.h | 2 +- gcc/config/mips/linux.h | 7 +- gcc/config/mips/linux64.h | 20 +- gcc/config/mips/mips-modes.def | 7 +- gcc/config/mips/mips-opts.h | 7 + gcc/config/mips/mips.c | 86 +- gcc/config/mips/mips.h | 14 +- gcc/config/mips/mips.md | 27 +- gcc/config/mips/mips.opt | 18 + gcc/config/mips/mti-linux.h | 2 +- gcc/config/mips/t-mti-elf | 8 +- gcc/config/mips/t-mti-linux | 8 +- gcc/config/mmix/mmix.h | 4 + gcc/config/moxie/uclinux.h | 3 + gcc/config/netbsd.h | 3 + gcc/config/openbsd.h | 6 +- gcc/config/pa/pa-hpux.h | 3 + gcc/config/pa/pa.md | 16 +- gcc/config/pdp11/pdp11.h | 4 + gcc/config/picochip/picochip.h | 4 + gcc/config/rl78/rl78.c | 63 +- gcc/config/rs6000/aix43.h | 3 + gcc/config/rs6000/aix51.h | 3 + gcc/config/rs6000/aix52.h | 4 - gcc/config/rs6000/aix53.h | 4 - gcc/config/rs6000/aix61.h | 4 - gcc/config/rs6000/darwin.h | 6 +- gcc/config/rs6000/dfp.md | 33 +- gcc/config/rs6000/linux.h | 15 +- gcc/config/rs6000/linux64.h | 15 +- gcc/config/rs6000/predicates.md | 96 + gcc/config/rs6000/rs6000-modes.def | 4 +- gcc/config/rs6000/rs6000-protos.h | 3 + gcc/config/rs6000/rs6000.c | 420 +- gcc/config/rs6000/rs6000.h | 15 +- gcc/config/rs6000/rs6000.md | 41 +- gcc/config/rs6000/rs6000.opt | 4 + gcc/config/rs6000/rtems.h | 3 + gcc/config/rs6000/t-linux64 | 10 +- gcc/config/rs6000/t-linux64bele | 7 + gcc/config/rs6000/t-linux64le | 3 + gcc/config/rs6000/t-linux64lebe | 7 + gcc/config/rs6000/vsx.md | 32 + gcc/config/rx/rx.c | 6 +- gcc/config/rx/rx.h | 1 + gcc/config/s390/2827.md | 23 +- gcc/config/s390/linux.h | 3 + gcc/config/s390/s390.md | 106 +- gcc/config/s390/tpf.h | 7 +- gcc/config/sol2-10.h | 6 +- gcc/config/sol2.h | 3 + gcc/config/sparc/sparc.c | 84 +- gcc/config/sparc/sparc.h | 12 +- gcc/config/sparc/sparc.opt | 6 + gcc/config/sparc/sync.md | 7 +- gcc/config/sparc/t-sparc | 2 +- gcc/config/vms/vms.h | 3 + gcc/config/vxworks.h | 3 + gcc/configure | 78 +- gcc/configure.ac | 30 +- gcc/context.c | 6 + gcc/context.h | 11 +- gcc/convert.c | 9 +- gcc/coretypes.h | 19 +- gcc/coverage.c | 66 +- gcc/coverage.h | 3 + gcc/cp/ChangeLog | 613 + gcc/cp/Make-lang.in | 11 +- gcc/cp/call.c | 56 +- gcc/cp/class.c | 153 +- gcc/cp/config-lang.in | 2 +- gcc/cp/cp-array-notation.c | 21 +- gcc/cp/cp-objcp-common.c | 18 +- gcc/cp/cp-tree.h | 50 +- gcc/cp/cvt.c | 23 +- gcc/cp/cxx-pretty-print.c | 858 +- gcc/cp/cxx-pretty-print.h | 77 +- gcc/cp/decl.c | 142 +- gcc/cp/decl2.c | 39 +- gcc/cp/error.c | 1420 +- gcc/cp/except.c | 49 +- gcc/cp/friend.c | 4 +- gcc/cp/init.c | 11 +- gcc/cp/mangle.c | 44 +- gcc/cp/name-lookup.c | 51 +- gcc/cp/name-lookup.h | 1 + gcc/cp/parser.c | 50 +- gcc/cp/pt.c | 98 +- gcc/cp/rtti.c | 4 +- gcc/cp/semantics.c | 27 +- gcc/cp/tree.c | 2 + gcc/cp/typeck.c | 78 +- gcc/cp/typeck2.c | 9 +- gcc/cp/vtable-class-hierarchy.c | 1342 ++ gcc/cppbuiltin.c | 2 +- gcc/cprop.c | 55 +- gcc/cse.c | 166 +- gcc/dbgcnt.def | 2 + gcc/dce.c | 104 +- gcc/defaults.h | 12 - gcc/df-core.c | 161 +- gcc/diagnostic.c | 27 +- gcc/diagnostic.h | 2 +- gcc/doc/extend.texi | 5 +- gcc/doc/install.texi | 40 +- gcc/doc/invoke.texi | 235 +- gcc/doc/md.texi | 6 +- gcc/doc/rtl.texi | 4 + gcc/doc/tm.texi | 26 +- gcc/doc/tm.texi.in | 21 +- gcc/double-int.c | 8 +- gcc/dse.c | 108 +- gcc/dumpfile.c | 16 +- gcc/dumpfile.h | 4 +- gcc/dwarf2cfi.c | 54 +- gcc/dwarf2out.c | 14 +- gcc/except.c | 107 +- gcc/expmed.c | 6 +- gcc/expr.c | 21 +- gcc/final.c | 291 +- gcc/flag-types.h | 19 + gcc/fold-const.c | 79 +- gcc/fortran/ChangeLog | 150 + gcc/fortran/array.c | 66 +- gcc/fortran/class.c | 192 +- gcc/fortran/dependency.c | 105 +- gcc/fortran/expr.c | 5 +- gcc/fortran/f95-lang.c | 4 +- gcc/fortran/gfortran.h | 6 +- gcc/fortran/interface.c | 92 +- gcc/fortran/intrinsic.c | 7 + gcc/fortran/invoke.texi | 5 + gcc/fortran/io.c | 18 +- gcc/fortran/lang.opt | 4 + gcc/fortran/match.c | 105 +- gcc/fortran/openmp.c | 4 +- gcc/fortran/options.c | 6 + gcc/fortran/parse.c | 4 +- gcc/fortran/resolve.c | 59 +- gcc/fortran/st.c | 6 +- gcc/fortran/symbol.c | 3 + gcc/fortran/trans-array.c | 40 +- gcc/fortran/trans-decl.c | 35 +- gcc/fortran/trans-expr.c | 222 +- gcc/fortran/trans-intrinsic.c | 10 +- gcc/fortran/trans-io.c | 5 +- gcc/fortran/trans-stmt.c | 3 +- gcc/function.c | 293 +- gcc/function.h | 8 + gcc/fwprop.c | 107 +- gcc/gcc.c | 127 +- gcc/gcov-io.h | 2 +- gcc/gcse.c | 108 +- gcc/gdbhooks.py | 397 + gcc/gen-pass-instances.awk | 66 + gcc/gengtype.c | 23 + gcc/genoutput.c | 4 +- gcc/gimple-fold.c | 33 +- gcc/gimple-low.c | 51 +- gcc/gimple-pretty-print.c | 235 +- gcc/gimple-ssa-strength-reduction.c | 94 +- gcc/gimple-streamer-in.c | 7 +- gcc/gimple.c | 11 +- gcc/gimple.def | 8 +- gcc/gimple.h | 28 +- gcc/gimplify.c | 200 +- gcc/go/ChangeLog | 16 + gcc/go/go-gcc.cc | 27 +- gcc/go/gofrontend/expressions.cc | 157 +- gcc/go/gofrontend/expressions.h | 13 +- gcc/go/gofrontend/gogo-tree.cc | 87 +- gcc/go/gofrontend/gogo.cc | 152 +- gcc/go/gofrontend/gogo.h | 13 - gcc/go/gofrontend/parse.cc | 3 +- gcc/go/gofrontend/statements.cc | 59 +- gcc/go/gofrontend/statements.h | 46 + gcc/go/gofrontend/types.cc | 52 +- gcc/go/gofrontend/types.h | 12 +- gcc/graph.c | 32 +- gcc/gtm-builtins.def | 2 +- gcc/hwint.h | 4 + gcc/ifcvt.c | 157 +- gcc/init-regs.c | 52 +- gcc/internal-fn.c | 24 + gcc/internal-fn.def | 3 + gcc/ipa-cp.c | 182 +- gcc/ipa-devirt.c | 1181 ++ gcc/ipa-inline-analysis.c | 72 +- gcc/ipa-inline-transform.c | 31 +- gcc/ipa-inline.c | 484 +- gcc/ipa-inline.h | 1 + gcc/ipa-profile.c | 754 + gcc/ipa-prop.c | 517 +- gcc/ipa-prop.h | 47 +- gcc/ipa-pure-const.c | 131 +- gcc/ipa-ref.c | 92 +- gcc/ipa-ref.h | 6 +- gcc/ipa-reference.c | 72 +- gcc/ipa-split.c | 112 +- gcc/ipa-utils.c | 184 + gcc/ipa-utils.h | 63 +- gcc/ipa.c | 790 +- gcc/ira.c | 113 +- gcc/jump.c | 51 +- gcc/langhooks.c | 4 +- gcc/loop-init.c | 378 +- gcc/loop-unroll.c | 19 +- gcc/lower-subreg.c | 106 +- gcc/lra-constraints.c | 203 +- gcc/lra.c | 18 +- gcc/lto-cgraph.c | 74 +- gcc/lto-section-in.c | 35 + gcc/lto-streamer-in.c | 104 +- gcc/lto-streamer-out.c | 299 +- gcc/lto-streamer.h | 14 +- gcc/lto-symtab.c | 637 - gcc/lto/ChangeLog | 97 + gcc/lto/Make-lang.in | 8 +- gcc/lto/config-lang.in | 2 +- gcc/lto/lto-partition.c | 38 +- gcc/lto/lto-symtab.c | 663 + gcc/lto/lto.c | 386 +- gcc/lto/lto.h | 4 + gcc/melt-run.proto.h | 5 + gcc/melt/xtramelt-ana-base.melt | 55 + gcc/mode-switching.c | 63 +- gcc/modulo-sched.c | 57 +- gcc/omp-low.c | 1483 +- gcc/opts.c | 70 + gcc/output.h | 4 + gcc/pass_manager.h | 138 + gcc/passes.c | 631 +- gcc/passes.def | 3 +- gcc/postreload-gcse.c | 53 +- gcc/postreload.c | 53 +- gcc/predict.c | 151 +- gcc/pretty-print.c | 176 +- gcc/pretty-print.h | 153 +- gcc/print-tree.c | 2 - gcc/profile.c | 17 +- gcc/profile.h | 2 +- gcc/recog.c | 400 +- gcc/recog.h | 52 +- gcc/ree.c | 53 +- gcc/reg-stack.c | 102 +- gcc/regcprop.c | 53 +- gcc/reginfo.c | 72 +- gcc/regmove.c | 52 +- gcc/regrename.c | 53 +- gcc/reload.c | 4 +- gcc/reload.h | 2 +- gcc/reorg.c | 132 +- gcc/resource.c | 27 +- gcc/rtl.h | 1 + gcc/rtlanal.c | 2 +- gcc/sanitizer.def | 14 + gcc/sched-rgn.c | 112 +- gcc/sched-vis.c | 91 +- gcc/sel-sched.c | 15 +- gcc/simplify-rtx.c | 7 +- gcc/stack-ptr-mod.c | 51 +- gcc/statistics.c | 7 +- gcc/store-motion.c | 54 +- gcc/symtab.c | 138 +- gcc/system.h | 2 +- gcc/target.def | 9 +- gcc/targhooks.h | 4 + gcc/testsuite/ChangeLog | 1162 +- gcc/testsuite/c-c++-common/cilk-plus/AN/pr57490.c | 28 + gcc/testsuite/c-c++-common/gomp/pr58257.c | 15 + gcc/testsuite/c-c++-common/opaque-vector.c | 22 + gcc/testsuite/c-c++-common/scal-to-vec1.c | 8 +- .../c-c++-common/simulate-thread/bitfields-2.c | 2 +- gcc/testsuite/c-c++-common/ubsan/const-char-1.c | 9 + gcc/testsuite/c-c++-common/ubsan/const-expr-1.c | 22 + gcc/testsuite/c-c++-common/ubsan/div-by-zero-1.c | 24 + gcc/testsuite/c-c++-common/ubsan/div-by-zero-2.c | 23 + gcc/testsuite/c-c++-common/ubsan/div-by-zero-3.c | 21 + gcc/testsuite/c-c++-common/ubsan/div-by-zero-4.c | 11 + gcc/testsuite/c-c++-common/ubsan/save-expr-1.c | 11 + gcc/testsuite/c-c++-common/ubsan/save-expr-2.c | 14 + gcc/testsuite/c-c++-common/ubsan/save-expr-3.c | 16 + gcc/testsuite/c-c++-common/ubsan/save-expr-4.c | 16 + gcc/testsuite/c-c++-common/ubsan/shift-1.c | 31 + gcc/testsuite/c-c++-common/ubsan/shift-2.c | 23 + gcc/testsuite/c-c++-common/ubsan/shift-3.c | 11 + gcc/testsuite/c-c++-common/ubsan/typedef-1.c | 12 + gcc/testsuite/c-c++-common/ubsan/unreachable-1.c | 10 + gcc/testsuite/g++.dg/abi/mangle33.C | 4 +- gcc/testsuite/g++.dg/conversion/ambig2.C | 18 + gcc/testsuite/g++.dg/cpp0x/alias-decl-0.C | 9 +- gcc/testsuite/g++.dg/cpp0x/alias-decl-33.C | 14 + gcc/testsuite/g++.dg/cpp0x/alias-decl-33a.C | 15 + gcc/testsuite/g++.dg/cpp0x/alias-decl-37.C | 21 + gcc/testsuite/g++.dg/cpp0x/alias-decl-dr1286.C | 13 + gcc/testsuite/g++.dg/cpp0x/alias-decl-dr1286a.C | 60 + gcc/testsuite/g++.dg/cpp0x/constexpr-function2.C | 2 +- gcc/testsuite/g++.dg/cpp0x/constexpr-ice8.C | 17 + gcc/testsuite/g++.dg/cpp0x/constexpr-neg1.C | 2 +- gcc/testsuite/g++.dg/cpp0x/constexpr-value4.C | 16 + gcc/testsuite/g++.dg/cpp0x/dc7.C | 7 + gcc/testsuite/g++.dg/cpp0x/dc8.C | 66 + gcc/testsuite/g++.dg/cpp0x/defaulted2.C | 4 +- gcc/testsuite/g++.dg/cpp0x/defaulted31.C | 2 +- gcc/testsuite/g++.dg/cpp0x/enum15.C | 2 +- gcc/testsuite/g++.dg/cpp0x/enum28.C | 17 + gcc/testsuite/g++.dg/cpp0x/error6.C | 2 +- gcc/testsuite/g++.dg/cpp0x/gen-attrs-32.C | 2 +- gcc/testsuite/g++.dg/cpp0x/lambda/lambda-defarg5.C | 30 + gcc/testsuite/g++.dg/cpp0x/nsdmi-sizeof.C | 7 + gcc/testsuite/g++.dg/cpp0x/override2.C | 2 +- gcc/testsuite/g++.dg/cpp0x/parse1.C | 2 +- gcc/testsuite/g++.dg/cpp0x/pr57416.C | 45 + gcc/testsuite/g++.dg/cpp0x/pr58072.C | 18 + gcc/testsuite/g++.dg/cpp0x/pr58080.C | 14 + gcc/testsuite/g++.dg/cpp0x/ref-qual14.C | 18 + gcc/testsuite/g++.dg/cpp0x/scoped_enum.C | 2 +- gcc/testsuite/g++.dg/cpp0x/temp_default4.C | 2 +- gcc/testsuite/g++.dg/debug/ra1.C | 77 + gcc/testsuite/g++.dg/dg.exp | 1 + gcc/testsuite/g++.dg/ext/attr-alias-3.C | 8 + gcc/testsuite/g++.dg/ext/attrib32.C | 2 +- .../g++.dg/ext/gnu-inline-global-reject.C | 18 +- gcc/testsuite/g++.dg/ext/mv13.C | 2 +- gcc/testsuite/g++.dg/ext/mv7.C | 2 +- gcc/testsuite/g++.dg/ext/mv9.C | 2 +- gcc/testsuite/g++.dg/ext/pr57362.C | 5 +- gcc/testsuite/g++.dg/ext/typeof10.C | 2 +- gcc/testsuite/g++.dg/ipa/devirt-11.C | 3 +- gcc/testsuite/g++.dg/ipa/devirt-13.C | 22 + gcc/testsuite/g++.dg/ipa/devirt-14.C | 34 + gcc/testsuite/g++.dg/ipa/devirt-15.C | 40 + gcc/testsuite/g++.dg/ipa/devirt-16.C | 39 + gcc/testsuite/g++.dg/ipa/devirt-17.C | 44 + gcc/testsuite/g++.dg/ipa/devirt-18.C | 37 + gcc/testsuite/g++.dg/ipa/remref-1.C | 36 + gcc/testsuite/g++.dg/ipa/remref-2.C | 37 + gcc/testsuite/g++.dg/ipa/type-inheritance-1.C | 28 + gcc/testsuite/g++.dg/lookup/anon6.C | 8 +- gcc/testsuite/g++.dg/lookup/crash6.C | 2 +- gcc/testsuite/g++.dg/lookup/name-clash5.C | 2 +- gcc/testsuite/g++.dg/lookup/name-clash6.C | 2 +- gcc/testsuite/g++.dg/lookup/using9.C | 4 +- gcc/testsuite/g++.dg/opt/pr57661.C | 76 + gcc/testsuite/g++.dg/opt/pr58006.C | 22 + gcc/testsuite/g++.dg/opt/pr58165.C | 14 + gcc/testsuite/g++.dg/other/anon4.C | 2 +- gcc/testsuite/g++.dg/other/error15.C | 28 +- gcc/testsuite/g++.dg/other/error8.C | 2 +- gcc/testsuite/g++.dg/other/redecl2.C | 2 +- gcc/testsuite/g++.dg/overload/new1.C | 1 - gcc/testsuite/g++.dg/overload/using2.C | 8 +- gcc/testsuite/g++.dg/overload/using3.C | 16 + gcc/testsuite/g++.dg/parse/access11.C | 35 + gcc/testsuite/g++.dg/parse/crash16.C | 2 +- gcc/testsuite/g++.dg/parse/crash21.C | 2 +- gcc/testsuite/g++.dg/parse/crash38.C | 4 +- gcc/testsuite/g++.dg/parse/crash63.C | 10 + gcc/testsuite/g++.dg/parse/redef2.C | 2 +- gcc/testsuite/g++.dg/parse/struct-as-enum1.C | 2 +- gcc/testsuite/g++.dg/plugin/dumb_plugin.c | 55 +- gcc/testsuite/g++.dg/plugin/selfassign.c | 55 +- gcc/testsuite/g++.dg/pr57878.C | 4 +- gcc/testsuite/g++.dg/template/abstract1.C | 12 + gcc/testsuite/g++.dg/template/crash39.C | 2 +- gcc/testsuite/g++.dg/template/delete2.C | 26 + gcc/testsuite/g++.dg/template/error54.C | 10 + gcc/testsuite/g++.dg/template/meminit1.C | 2 +- gcc/testsuite/g++.dg/template/redecl3.C | 2 +- gcc/testsuite/g++.dg/template/using24.C | 30 + gcc/testsuite/g++.dg/template/using25.C | 17 + gcc/testsuite/g++.dg/template/using26.C | 49 + gcc/testsuite/g++.dg/tls/diag-3.C | 4 +- gcc/testsuite/g++.dg/tm/noexcept-6.C | 23 + gcc/testsuite/g++.dg/torture/PR58294.C | 20 + gcc/testsuite/g++.dg/torture/pr58201.h | 24 + gcc/testsuite/g++.dg/torture/pr58201_0.C | 9 + gcc/testsuite/g++.dg/torture/pr58201_1.C | 10 + gcc/testsuite/g++.dg/tree-prof/pr57451.C | 26 + gcc/testsuite/g++.dg/tree-ssa/dom-invalid.C | 2 +- gcc/testsuite/g++.dg/tree-ssa/pr45453.C | 2 +- gcc/testsuite/g++.dg/ubsan/cxx11-shift-1.C | 9 + gcc/testsuite/g++.dg/ubsan/cxx11-shift-2.C | 10 + gcc/testsuite/g++.dg/ubsan/div-by-zero-1.C | 10 + gcc/testsuite/g++.dg/ubsan/ubsan.exp | 34 + gcc/testsuite/g++.dg/vect/slp-pr50413.cc | 2 +- gcc/testsuite/g++.dg/vect/slp-pr50819.cc | 2 +- gcc/testsuite/g++.dg/vect/slp-pr56812.cc | 2 +- gcc/testsuite/g++.dg/warn/Wredundant-decls-spec.C | 2 +- gcc/testsuite/g++.dg/warn/deprecated-7.C | 17 + gcc/testsuite/g++.dg/warn/deprecated-8.C | 15 + gcc/testsuite/g++.dg/warn/weak1.C | 1 + .../g++.old-deja/g++.benjamin/typedef01.C | 2 +- gcc/testsuite/g++.old-deja/g++.benjamin/warn02.C | 8 +- gcc/testsuite/g++.old-deja/g++.brendan/crash16.C | 2 +- gcc/testsuite/g++.old-deja/g++.brendan/crash18.C | 2 +- gcc/testsuite/g++.old-deja/g++.brendan/err-msg4.C | 4 +- gcc/testsuite/g++.old-deja/g++.brendan/redecl1.C | 2 +- gcc/testsuite/g++.old-deja/g++.brendan/static3.C | 2 +- gcc/testsuite/g++.old-deja/g++.bugs/900127_02.C | 8 +- gcc/testsuite/g++.old-deja/g++.jason/binding.C | 2 +- gcc/testsuite/g++.old-deja/g++.jason/crash4.C | 2 +- gcc/testsuite/g++.old-deja/g++.jason/crash7.C | 2 +- gcc/testsuite/g++.old-deja/g++.jason/lineno.C | 2 +- gcc/testsuite/g++.old-deja/g++.jason/scoping7.C | 2 +- gcc/testsuite/g++.old-deja/g++.mike/misc3.C | 2 +- gcc/testsuite/g++.old-deja/g++.mike/net44.C | 2 +- gcc/testsuite/g++.old-deja/g++.mike/ns3.C | 2 +- gcc/testsuite/g++.old-deja/g++.ns/alias4.C | 2 +- gcc/testsuite/g++.old-deja/g++.ns/ns11.C | 2 +- gcc/testsuite/g++.old-deja/g++.other/crash23.C | 2 +- gcc/testsuite/g++.old-deja/g++.other/decl8.C | 2 +- gcc/testsuite/g++.old-deja/g++.other/linkage3.C | 2 +- gcc/testsuite/g++.old-deja/g++.other/typeck1.C | 2 +- gcc/testsuite/g++.old-deja/g++.other/typedef5.C | 2 +- gcc/testsuite/g++.old-deja/g++.pt/explicit34.C | 2 +- gcc/testsuite/g++.old-deja/g++.pt/friend36.C | 2 +- gcc/testsuite/gcc.c-torture/compile/pr58164.c | 8 + gcc/testsuite/gcc.c-torture/compile/pr58340.c | 16 + gcc/testsuite/gcc.c-torture/execute/20101011-1.c | 3 + gcc/testsuite/gcc.c-torture/execute/pr56799.x | 7 + gcc/testsuite/gcc.c-torture/execute/pr57860.c | 25 + gcc/testsuite/gcc.c-torture/execute/pr57861.c | 33 + gcc/testsuite/gcc.c-torture/execute/pr57875.c | 21 + gcc/testsuite/gcc.c-torture/execute/pr57876.c | 27 + gcc/testsuite/gcc.c-torture/execute/pr57877.c | 28 + gcc/testsuite/gcc.c-torture/execute/pr58209.c | 32 + gcc/testsuite/gcc.c-torture/execute/pr58277-1.c | 102 + gcc/testsuite/gcc.c-torture/execute/pr58277-2.c | 98 + gcc/testsuite/gcc.dg/asan/pr56417.c | 9 + gcc/testsuite/gcc.dg/attr-weakref-1.c | 2 +- gcc/testsuite/gcc.dg/autopar/pr49960.c | 6 +- gcc/testsuite/gcc.dg/builtin-apply2.c | 2 +- gcc/testsuite/gcc.dg/c99-stdint-1.c | 2 +- .../gcc.dg/debug/dwarf2/aranges-fnsec-1.c | 2 +- gcc/testsuite/gcc.dg/debug/dwarf2/asm-line1.c | 2 +- gcc/testsuite/gcc.dg/debug/dwarf2/c99-typedef1.c | 2 +- gcc/testsuite/gcc.dg/debug/dwarf2/const-2.c | 2 +- gcc/testsuite/gcc.dg/debug/dwarf2/const-2b.c | 2 +- gcc/testsuite/gcc.dg/debug/dwarf2/discriminator.c | 2 +- gcc/testsuite/gcc.dg/debug/dwarf2/dwarf-char1.c | 2 +- gcc/testsuite/gcc.dg/debug/dwarf2/dwarf-char2.c | 2 +- gcc/testsuite/gcc.dg/debug/dwarf2/dwarf-char3.c | 2 +- gcc/testsuite/gcc.dg/debug/dwarf2/dwarf-dfp.c | 2 +- gcc/testsuite/gcc.dg/debug/dwarf2/dwarf-die1.c | 2 +- gcc/testsuite/gcc.dg/debug/dwarf2/dwarf-die2.c | 2 +- gcc/testsuite/gcc.dg/debug/dwarf2/dwarf-die3.c | 2 +- gcc/testsuite/gcc.dg/debug/dwarf2/dwarf-die5.c | 2 +- gcc/testsuite/gcc.dg/debug/dwarf2/dwarf-die6.c | 2 +- gcc/testsuite/gcc.dg/debug/dwarf2/dwarf-die7.c | 2 +- gcc/testsuite/gcc.dg/debug/dwarf2/dwarf-file1.c | 2 +- gcc/testsuite/gcc.dg/debug/dwarf2/dwarf-float.c | 2 +- gcc/testsuite/gcc.dg/debug/dwarf2/dwarf-merge.c | 2 +- gcc/testsuite/gcc.dg/debug/dwarf2/dwarf-uninit.c | 2 +- gcc/testsuite/gcc.dg/debug/dwarf2/dwarf2-macro.c | 2 +- gcc/testsuite/gcc.dg/debug/dwarf2/dwarf2.exp | 4 +- gcc/testsuite/gcc.dg/debug/dwarf2/fesd-any.c | 2 +- gcc/testsuite/gcc.dg/debug/dwarf2/fesd-baseonly.c | 2 +- gcc/testsuite/gcc.dg/debug/dwarf2/fesd-none.c | 2 +- gcc/testsuite/gcc.dg/debug/dwarf2/fesd-reduced.c | 2 +- gcc/testsuite/gcc.dg/debug/dwarf2/fesd-sys.c | 2 +- .../gcc.dg/debug/dwarf2/global-used-types.c | 2 +- gcc/testsuite/gcc.dg/debug/dwarf2/inline1.c | 2 +- gcc/testsuite/gcc.dg/debug/dwarf2/inline2.c | 2 +- gcc/testsuite/gcc.dg/debug/dwarf2/inline3.c | 2 +- gcc/testsuite/gcc.dg/debug/dwarf2/ipa-cp1.c | 2 +- gcc/testsuite/gcc.dg/debug/dwarf2/pr29609-1.c | 2 +- gcc/testsuite/gcc.dg/debug/dwarf2/pr29609-2.c | 2 +- gcc/testsuite/gcc.dg/debug/dwarf2/pr31230.c | 2 +- gcc/testsuite/gcc.dg/debug/dwarf2/pr36690-1.c | 2 +- gcc/testsuite/gcc.dg/debug/dwarf2/pr36690-2.c | 2 +- gcc/testsuite/gcc.dg/debug/dwarf2/pr36690-3.c | 2 +- gcc/testsuite/gcc.dg/debug/dwarf2/pr37616.c | 2 +- gcc/testsuite/gcc.dg/debug/dwarf2/pr37726.c | 2 +- gcc/testsuite/gcc.dg/debug/dwarf2/pr41445-1.c | 2 +- gcc/testsuite/gcc.dg/debug/dwarf2/pr41445-2.c | 2 +- gcc/testsuite/gcc.dg/debug/dwarf2/pr41445-3.c | 2 +- gcc/testsuite/gcc.dg/debug/dwarf2/pr41445-4.c | 2 +- gcc/testsuite/gcc.dg/debug/dwarf2/pr41445-5.c | 2 +- gcc/testsuite/gcc.dg/debug/dwarf2/pr41445-6.c | 2 +- gcc/testsuite/gcc.dg/debug/dwarf2/pr41543.c | 2 +- gcc/testsuite/gcc.dg/debug/dwarf2/pr41695.c | 2 +- gcc/testsuite/gcc.dg/debug/dwarf2/pr43237.c | 2 +- gcc/testsuite/gcc.dg/debug/dwarf2/pr47939-1.c | 2 +- gcc/testsuite/gcc.dg/debug/dwarf2/pr47939-2.c | 2 +- gcc/testsuite/gcc.dg/debug/dwarf2/pr47939-3.c | 2 +- gcc/testsuite/gcc.dg/debug/dwarf2/pr47939-4.c | 2 +- gcc/testsuite/gcc.dg/debug/dwarf2/pr51410.c | 2 +- gcc/testsuite/gcc.dg/debug/dwarf2/pr53948.c | 2 +- gcc/testsuite/gcc.dg/debug/dwarf2/short-circuit.c | 2 +- gcc/testsuite/gcc.dg/debug/dwarf2/static1.c | 2 +- gcc/testsuite/gcc.dg/debug/dwarf2/struct-loc1.c | 2 +- gcc/testsuite/gcc.dg/debug/dwarf2/var1.c | 2 +- gcc/testsuite/gcc.dg/debug/dwarf2/var2.c | 2 +- gcc/testsuite/gcc.dg/fork-instrumentation.c | 8 + gcc/testsuite/gcc.dg/guality/param-1.c | 33 + gcc/testsuite/gcc.dg/guality/param-2.c | 33 + gcc/testsuite/gcc.dg/ipa/ipa-1.c | 2 +- gcc/testsuite/gcc.dg/ipa/ipa-2.c | 2 +- gcc/testsuite/gcc.dg/ipa/ipa-3.c | 6 +- gcc/testsuite/gcc.dg/ipa/ipa-4.c | 2 +- gcc/testsuite/gcc.dg/ipa/ipa-5.c | 4 +- gcc/testsuite/gcc.dg/ipa/ipa-7.c | 2 +- gcc/testsuite/gcc.dg/ipa/ipa-8.c | 4 +- gcc/testsuite/gcc.dg/ipa/ipa-pta-14.c | 3 +- gcc/testsuite/gcc.dg/ipa/ipcp-1.c | 2 +- gcc/testsuite/gcc.dg/ipa/ipcp-2.c | 4 +- gcc/testsuite/gcc.dg/ipa/ipcp-4.c | 4 +- gcc/testsuite/gcc.dg/ipa/pr57539.c | 218 + gcc/testsuite/gcc.dg/ipa/pr58106.c | 50 + gcc/testsuite/gcc.dg/lower-subreg-1.c | 2 +- gcc/testsuite/gcc.dg/plugin/one_time_plugin.c | 66 +- gcc/testsuite/gcc.dg/plugin/selfassign.c | 55 +- gcc/testsuite/gcc.dg/pr26570.c | 2 +- gcc/testsuite/gcc.dg/pr32773.c | 4 +- gcc/testsuite/gcc.dg/pr40209.c | 2 +- gcc/testsuite/gcc.dg/pr44214-1.c | 2 +- gcc/testsuite/gcc.dg/pr44214-3.c | 2 +- gcc/testsuite/gcc.dg/pr46647.c | 4 +- gcc/testsuite/gcc.dg/pr57287-2.c | 35 + gcc/testsuite/gcc.dg/pr57662.c | 47 + gcc/testsuite/gcc.dg/pr57980.c | 19 + gcc/testsuite/gcc.dg/pr58010.c | 15 + gcc/testsuite/gcc.dg/pr58145-1.c | 37 + gcc/testsuite/gcc.dg/pr58145-2.c | 51 + gcc/testsuite/gcc.dg/stack-usage-1.c | 2 + gcc/testsuite/gcc.dg/strlenopt-10.c | 5 +- gcc/testsuite/gcc.dg/strlenopt-11.c | 14 +- gcc/testsuite/gcc.dg/strlenopt-13.c | 16 +- gcc/testsuite/gcc.dg/torture/pr37868.c | 2 +- gcc/testsuite/gcc.dg/torture/pr53922.c | 1 + gcc/testsuite/gcc.dg/torture/pr57521.c | 51 + gcc/testsuite/gcc.dg/torture/pr57656.c | 13 + gcc/testsuite/gcc.dg/torture/pr57685.c | 15 + gcc/testsuite/gcc.dg/torture/pr57993-2.cpp | 213 + gcc/testsuite/gcc.dg/torture/pr57993.c | 30 + gcc/testsuite/gcc.dg/torture/pr58041.c | 33 + gcc/testsuite/gcc.dg/torture/pr58079.c | 107 + gcc/testsuite/gcc.dg/torture/pr58223.c | 16 + gcc/testsuite/gcc.dg/torture/pr58228.c | 15 + gcc/testsuite/gcc.dg/torture/pr58246.c | 21 + .../gcc.dg/torture/stackalign/builtin-apply-2.c | 7 +- .../gcc.dg/tree-prof/crossmodule-indircall-1.c | 19 + .../gcc.dg/tree-prof/crossmodule-indircall-1a.c | 40 + gcc/testsuite/gcc.dg/tree-ssa/attr-alias.c | 2 +- gcc/testsuite/gcc.dg/tree-ssa/cunroll-1.c | 2 +- gcc/testsuite/gcc.dg/tree-ssa/cunroll-2.c | 2 +- gcc/testsuite/gcc.dg/tree-ssa/cunroll-3.c | 2 +- gcc/testsuite/gcc.dg/tree-ssa/cunroll-4.c | 2 +- gcc/testsuite/gcc.dg/tree-ssa/cunroll-5.c | 2 +- gcc/testsuite/gcc.dg/tree-ssa/fnsplit-1.c | 23 + gcc/testsuite/gcc.dg/tree-ssa/ipa-cp-1.c | 2 +- gcc/testsuite/gcc.dg/tree-ssa/ldist-16.c | 6 +- gcc/testsuite/gcc.dg/tree-ssa/loop-1.c | 2 +- gcc/testsuite/gcc.dg/tree-ssa/loop-23.c | 2 +- gcc/testsuite/gcc.dg/tree-ssa/loop-4.c | 2 +- gcc/testsuite/gcc.dg/tree-ssa/pr31261.c | 3 +- gcc/testsuite/gcc.dg/tree-ssa/pr42585.c | 4 +- gcc/testsuite/gcc.dg/tree-ssa/pr44258.c | 4 +- gcc/testsuite/gcc.dg/tree-ssa/pr54245.c | 2 +- gcc/testsuite/gcc.dg/tree-ssa/pta-escape-1.c | 3 +- gcc/testsuite/gcc.dg/tree-ssa/pta-escape-2.c | 3 +- gcc/testsuite/gcc.dg/tree-ssa/pta-escape-3.c | 3 +- gcc/testsuite/gcc.dg/tree-ssa/reassoc-31.c | 17 + gcc/testsuite/gcc.dg/tree-ssa/sccp-1.c | 15 + gcc/testsuite/gcc.dg/tree-ssa/slsr-1.c | 6 +- gcc/testsuite/gcc.dg/tree-ssa/slsr-2.c | 6 +- gcc/testsuite/gcc.dg/tree-ssa/slsr-27.c | 3 +- gcc/testsuite/gcc.dg/tree-ssa/slsr-28.c | 3 +- gcc/testsuite/gcc.dg/tree-ssa/slsr-29.c | 3 +- gcc/testsuite/gcc.dg/tree-ssa/slsr-3.c | 9 +- gcc/testsuite/gcc.dg/tree-ssa/ssa-ccp-23.c | 3 +- gcc/testsuite/gcc.dg/tree-ssa/ssa-dom-thread-3.c | 3 +- gcc/testsuite/gcc.dg/tree-ssa/ssa-dom-thread-4.c | 6 +- gcc/testsuite/gcc.dg/tree-ssa/ssa-pre-21.c | 2 +- gcc/testsuite/gcc.dg/tree-ssa/ssa-pre-30.c | 1 + gcc/testsuite/gcc.dg/tree-ssa/ssa-vrp-thread-1.c | 31 + gcc/testsuite/gcc.dg/tree-ssa/vector-4.c | 3 +- gcc/testsuite/gcc.dg/tree-ssa/vrp55.c | 3 +- gcc/testsuite/gcc.dg/tree-ssa/vrp58.c | 3 +- gcc/testsuite/gcc.dg/tree-ssa/vrp87.c | 1 + gcc/testsuite/gcc.dg/ubsan/c99-shift-1.c | 10 + gcc/testsuite/gcc.dg/ubsan/c99-shift-2.c | 10 + gcc/testsuite/gcc.dg/ubsan/ubsan.exp | 36 + gcc/testsuite/gcc.dg/unroll_1.c | 10 +- gcc/testsuite/gcc.dg/unroll_2.c | 2 +- gcc/testsuite/gcc.dg/unroll_3.c | 2 +- gcc/testsuite/gcc.dg/unroll_4.c | 2 +- gcc/testsuite/gcc.dg/vect/bb-slp-1.c | 2 +- gcc/testsuite/gcc.dg/vect/bb-slp-10.c | 2 +- gcc/testsuite/gcc.dg/vect/bb-slp-11.c | 2 +- gcc/testsuite/gcc.dg/vect/bb-slp-13.c | 2 +- gcc/testsuite/gcc.dg/vect/bb-slp-14.c | 2 +- gcc/testsuite/gcc.dg/vect/bb-slp-15.c | 2 +- gcc/testsuite/gcc.dg/vect/bb-slp-16.c | 2 +- gcc/testsuite/gcc.dg/vect/bb-slp-17.c | 2 +- gcc/testsuite/gcc.dg/vect/bb-slp-18.c | 2 +- gcc/testsuite/gcc.dg/vect/bb-slp-19.c | 2 +- gcc/testsuite/gcc.dg/vect/bb-slp-2.c | 2 +- gcc/testsuite/gcc.dg/vect/bb-slp-20.c | 2 +- gcc/testsuite/gcc.dg/vect/bb-slp-21.c | 2 +- gcc/testsuite/gcc.dg/vect/bb-slp-22.c | 4 +- gcc/testsuite/gcc.dg/vect/bb-slp-23.c | 2 +- gcc/testsuite/gcc.dg/vect/bb-slp-24.c | 2 +- gcc/testsuite/gcc.dg/vect/bb-slp-25.c | 2 +- gcc/testsuite/gcc.dg/vect/bb-slp-26.c | 2 +- gcc/testsuite/gcc.dg/vect/bb-slp-27.c | 2 +- gcc/testsuite/gcc.dg/vect/bb-slp-28.c | 2 +- gcc/testsuite/gcc.dg/vect/bb-slp-29.c | 2 +- gcc/testsuite/gcc.dg/vect/bb-slp-3.c | 2 +- gcc/testsuite/gcc.dg/vect/bb-slp-30.c | 2 +- gcc/testsuite/gcc.dg/vect/bb-slp-31.c | 2 +- gcc/testsuite/gcc.dg/vect/bb-slp-4.c | 2 +- gcc/testsuite/gcc.dg/vect/bb-slp-5.c | 2 +- gcc/testsuite/gcc.dg/vect/bb-slp-6.c | 2 +- gcc/testsuite/gcc.dg/vect/bb-slp-7.c | 2 +- gcc/testsuite/gcc.dg/vect/bb-slp-8.c | 2 +- gcc/testsuite/gcc.dg/vect/bb-slp-8a.c | 2 +- gcc/testsuite/gcc.dg/vect/bb-slp-8b.c | 2 +- gcc/testsuite/gcc.dg/vect/bb-slp-9.c | 2 +- gcc/testsuite/gcc.dg/vect/bb-slp-cond-1.c | 2 +- gcc/testsuite/gcc.dg/vect/bb-slp-pattern-2.c | 2 +- .../gcc.dg/vect/fast-math-bb-slp-call-1.c | 2 +- .../gcc.dg/vect/fast-math-bb-slp-call-2.c | 2 +- .../gcc.dg/vect/no-tree-reassoc-bb-slp-12.c | 2 +- gcc/testsuite/gcc.dg/vect/pr56933.c | 6 + gcc/testsuite/gcc.dg/vect/vect-iv-5.c | 2 +- .../gcc.target/aarch64/scalar_intrinsics.c | 56 +- gcc/testsuite/gcc.target/aarch64/scalar_shift_1.c | 263 + .../gcc.target/aarch64/table-intrinsics.c | 32 +- gcc/testsuite/gcc.target/arc/arc.exp | 41 + .../gcc.target/arc/builtin_arc_aligned-1.c | 16 + .../gcc.target/arc/builtin_arc_aligned-2.c | 28 + .../gcc.target/arc/builtin_arc_aligned-3.c | 67 + gcc/testsuite/gcc.target/arc/cond-set-use.c | 128 + gcc/testsuite/gcc.target/arc/interrupt-1.c | 5 + gcc/testsuite/gcc.target/arc/interrupt-2.c | 5 + gcc/testsuite/gcc.target/arc/interrupt-3.c | 14 + gcc/testsuite/gcc.target/arc/mulsi3_highpart-1.c | 28 + gcc/testsuite/gcc.target/arc/mulsi3_highpart-2.c | 30 + gcc/testsuite/gcc.target/arc/nv-cache.c | 9 + gcc/testsuite/gcc.target/arc/sdata-1.c | 10 + gcc/testsuite/gcc.target/arc/sdata-2.c | 10 + gcc/testsuite/gcc.target/arc/v-cache.c | 9 + .../arm/atomic-comp-swap-release-acquire.c | 2 +- gcc/testsuite/gcc.target/arm/atomic-op-acq_rel.c | 2 +- gcc/testsuite/gcc.target/arm/atomic-op-acquire.c | 2 +- gcc/testsuite/gcc.target/arm/atomic-op-char.c | 2 +- gcc/testsuite/gcc.target/arm/atomic-op-consume.c | 2 +- gcc/testsuite/gcc.target/arm/atomic-op-int.c | 2 +- gcc/testsuite/gcc.target/arm/atomic-op-relaxed.c | 2 +- gcc/testsuite/gcc.target/arm/atomic-op-release.c | 2 +- gcc/testsuite/gcc.target/arm/atomic-op-seq_cst.c | 2 +- gcc/testsuite/gcc.target/arm/atomic-op-short.c | 2 +- gcc/testsuite/gcc.target/arm/ivopts-orig_biv-inc.c | 19 + gcc/testsuite/gcc.target/arm/lp1189445.c | 18 + gcc/testsuite/gcc.target/arm/neon-for-64bits-2.c | 57 - gcc/testsuite/gcc.target/arm/pr19599.c | 2 +- gcc/testsuite/gcc.target/arm/pr46975-2.c | 10 + gcc/testsuite/gcc.target/arm/pr57637.c | 206 + gcc/testsuite/gcc.target/arm/pr58041.c | 30 + gcc/testsuite/gcc.target/avr/progmem-error-1.cpp | 2 +- gcc/testsuite/gcc.target/i386/memcpy-strategy-1.c | 12 + gcc/testsuite/gcc.target/i386/memcpy-strategy-2.c | 12 + gcc/testsuite/gcc.target/i386/memcpy-strategy-3.c | 10 + gcc/testsuite/gcc.target/i386/memset-strategy-1.c | 10 + gcc/testsuite/gcc.target/i386/movabs-1.c | 10 + gcc/testsuite/gcc.target/i386/pr58048.c | 11 + gcc/testsuite/gcc.target/i386/pr58137.c | 33 + gcc/testsuite/gcc.target/i386/pr58218.c | 5 + gcc/testsuite/gcc.target/mips/code-readable-1.c | 8 + gcc/testsuite/gcc.target/mips/code-readable-2.c | 8 + gcc/testsuite/gcc.target/mips/code-readable-3.c | 8 + gcc/testsuite/gcc.target/mips/code-readable-4.c | 8 + gcc/testsuite/gcc.target/mips/fabs-2008.c | 10 + gcc/testsuite/gcc.target/mips/fabs-legacy.c | 10 + gcc/testsuite/gcc.target/mips/fabsf-2008.c | 10 + gcc/testsuite/gcc.target/mips/fabsf-legacy.c | 10 + gcc/testsuite/gcc.target/mips/fneg-2008.c | 10 + gcc/testsuite/gcc.target/mips/fneg-legacy.c | 10 + gcc/testsuite/gcc.target/mips/fnegf-2008.c | 10 + gcc/testsuite/gcc.target/mips/fnegf-legacy.c | 10 + gcc/testsuite/gcc.target/mips/mips.exp | 20 +- gcc/testsuite/gcc.target/mips/mulsize-1.c | 1 + gcc/testsuite/gcc.target/mips/mulsize-2.c | 3 +- gcc/testsuite/gcc.target/mips/nan-2008.c | 7 + gcc/testsuite/gcc.target/mips/nan-legacy.c | 7 + gcc/testsuite/gcc.target/mips/nanf-2008.c | 7 + gcc/testsuite/gcc.target/mips/nanf-legacy.c | 7 + gcc/testsuite/gcc.target/mips/nans-2008.c | 7 + gcc/testsuite/gcc.target/mips/nans-legacy.c | 7 + gcc/testsuite/gcc.target/mips/nansf-2008.c | 7 + gcc/testsuite/gcc.target/mips/nansf-legacy.c | 7 + gcc/testsuite/gcc.target/powerpc/dfp-dd-2.c | 26 + gcc/testsuite/gcc.target/powerpc/dfp-td-2.c | 29 + gcc/testsuite/gcc.target/powerpc/dfp-td-3.c | 29 + gcc/testsuite/gcc.target/powerpc/fusion.c | 23 + gcc/testsuite/gcc.target/powerpc/pr57744.c | 2 + gcc/testsuite/gcc.target/powerpc/pr57949-1.c | 19 + gcc/testsuite/gcc.target/powerpc/pr57949-2.c | 19 + gcc/testsuite/gcc.target/s390/nearestint-1.c | 48 + gcc/testsuite/gfortran.dg/array_constructor_11.f90 | 1 + gcc/testsuite/gfortran.dg/array_constructor_18.f90 | 1 + gcc/testsuite/gfortran.dg/array_constructor_22.f90 | 1 + gcc/testsuite/gfortran.dg/bounds_check_18.f90 | 9 + gcc/testsuite/gfortran.dg/coarray_15.f90 | 2 +- gcc/testsuite/gfortran.dg/do_1.f90 | 1 + gcc/testsuite/gfortran.dg/do_3.F90 | 2 +- gcc/testsuite/gfortran.dg/do_check_10.f90 | 7 + gcc/testsuite/gfortran.dg/do_check_5.f90 | 2 +- gcc/testsuite/gfortran.dg/do_concurrent_3.f90 | 13 + gcc/testsuite/gfortran.dg/gomp/proc_ptr_1.f90 | 28 + gcc/testsuite/gfortran.dg/inline_sum_5.f90 | 33 + gcc/testsuite/gfortran.dg/intent_out_8.f90 | 17 + gcc/testsuite/gfortran.dg/pointer_assign_10.f90 | 36 + gcc/testsuite/gfortran.dg/pointer_assign_11.f90 | 51 + gcc/testsuite/gfortran.dg/pointer_assign_8.f90 | 46 + gcc/testsuite/gfortran.dg/pointer_assign_9.f90 | 36 + gcc/testsuite/gfortran.dg/pointer_init_8.f90 | 26 + gcc/testsuite/gfortran.dg/pr57987.f90 | 24 + gcc/testsuite/gfortran.dg/realloc_on_assign_14.f90 | 2 +- gcc/testsuite/gfortran.dg/realloc_on_assign_19.f90 | 21 + gcc/testsuite/gfortran.dg/reassoc_12.f90 | 74 + gcc/testsuite/gfortran.dg/select_type_34.f90 | 10 + gcc/testsuite/gfortran.dg/transfer_intrinsic_6.f90 | 20 + .../gfortran.dg/typebound_assignment_7.f90 | 66 + gcc/testsuite/gnat.dg/loop_optimization16.adb | 24 + gcc/testsuite/gnat.dg/loop_optimization16_pkg.adb | 8 + gcc/testsuite/gnat.dg/loop_optimization16_pkg.ads | 7 + gcc/testsuite/gnat.dg/specs/linker_alias.ads | 10 + gcc/testsuite/gnat.dg/stack_usage2.adb | 26 + gcc/testsuite/gnat.dg/valued_proc.adb | 12 + gcc/testsuite/gnat.dg/valued_proc_pkg.ads | 15 + gcc/testsuite/gnat.dg/warn10.adb | 12 + gcc/testsuite/gnat.dg/warn10.ads | 11 + gcc/testsuite/gnat.dg/warn10_pkg.ads | 12 + gcc/testsuite/gnat.dg/warn9.adb | 10 + gcc/testsuite/go.test/test/fixedbugs/bug086.go | 4 +- gcc/testsuite/lib/file-format.exp | 3 + gcc/testsuite/lib/plugin-support.exp | 4 +- gcc/testsuite/lib/target-supports.exp | 15 + gcc/testsuite/lib/ubsan-dg.exp | 104 + gcc/testsuite/obj-c++.dg/method-8.mm | 4 +- gcc/testsuite/obj-c++.dg/tls/diag-3.mm | 4 +- gcc/timevar.c | 5 +- gcc/timevar.def | 5 + gcc/timevar.h | 2 +- gcc/toplev.c | 46 +- gcc/toplev.h | 1 - gcc/tracer.c | 53 +- gcc/trans-mem.c | 378 +- gcc/tree-affine.c | 46 +- gcc/tree-call-cdce.c | 75 +- gcc/tree-cfg.c | 304 +- gcc/tree-cfgcleanup.c | 53 +- gcc/tree-complex.c | 106 +- gcc/tree-core.h | 1693 ++ gcc/tree-data-ref.c | 23 +- gcc/tree-eh.c | 261 +- gcc/tree-emutls.c | 52 +- gcc/tree-flow.h | 3 +- gcc/tree-if-conv.c | 146 +- gcc/tree-inline.c | 235 +- gcc/tree-inline.h | 4 + gcc/tree-into-ssa.c | 54 +- gcc/tree-loop-distribution.c | 73 +- gcc/tree-mudflap.c | 140 +- gcc/tree-nomudflap.c | 102 +- gcc/tree-nrv.c | 103 +- gcc/tree-object-size.c | 52 +- gcc/tree-optimize.c | 105 +- gcc/tree-parloops.c | 17 +- gcc/tree-pass.h | 636 +- gcc/tree-pretty-print.c | 339 +- gcc/tree-pretty-print.h | 7 +- gcc/tree-profile.c | 182 +- gcc/tree-scalar-evolution.c | 1 + gcc/tree-sra.c | 182 +- gcc/tree-ssa-ccp.c | 148 +- gcc/tree-ssa-copy.c | 64 +- gcc/tree-ssa-copyrename.c | 53 +- gcc/tree-ssa-dce.c | 307 +- gcc/tree-ssa-dom.c | 140 +- gcc/tree-ssa-dse.c | 53 +- gcc/tree-ssa-forwprop.c | 54 +- gcc/tree-ssa-ifcombine.c | 53 +- gcc/tree-ssa-loop-ch.c | 55 +- gcc/tree-ssa-loop-ivcanon.c | 10 +- gcc/tree-ssa-loop-ivopts.c | 30 +- gcc/tree-ssa-loop.c | 979 +- gcc/tree-ssa-math-opts.c | 218 +- gcc/tree-ssa-phiopt.c | 111 +- gcc/tree-ssa-phiprop.c | 53 +- gcc/tree-ssa-pre.c | 112 +- gcc/tree-ssa-reassoc.c | 82 +- gcc/tree-ssa-sink.c | 56 +- gcc/tree-ssa-strlen.c | 74 +- gcc/tree-ssa-structalias.c | 169 +- gcc/tree-ssa-threadedge.c | 173 +- gcc/tree-ssa-threadupdate.c | 82 +- gcc/tree-ssa-uncprop.c | 53 +- gcc/tree-ssa-uninit.c | 53 +- gcc/tree-ssa.c | 153 +- gcc/tree-ssanames.c | 51 +- gcc/tree-stdarg.c | 68 +- gcc/tree-streamer-in.c | 27 +- gcc/tree-streamer-out.c | 28 +- gcc/tree-switch-conversion.c | 57 +- gcc/tree-tailcall.c | 156 +- gcc/tree-vect-data-refs.c | 123 +- gcc/tree-vect-generic.c | 116 +- gcc/tree-vect-loop-manip.c | 25 +- gcc/tree-vect-loop.c | 63 +- gcc/tree-vect-slp.c | 2 +- gcc/tree-vect-stmts.c | 178 +- gcc/tree-vectorizer.c | 412 +- gcc/tree-vectorizer.h | 35 +- gcc/tree-vrp.c | 98 +- gcc/tree.c | 201 +- gcc/tree.def | 4 + gcc/tree.h | 1741 +- gcc/tsan.c | 109 +- gcc/ubsan.c | 417 + gcc/ubsan.h | 31 + gcc/value-prof.c | 192 +- gcc/value-prof.h | 4 + gcc/var-tracking.c | 77 +- gcc/varasm.c | 72 +- gcc/varpool.c | 112 +- gcc/vtable-verify.c | 793 + gcc/vtable-verify.h | 141 + gcc/web.c | 52 +- gnattools/ChangeLog | 9 + gnattools/Makefile.in | 9 + include/ChangeLog | 14 + include/dwarf2.def | 3 + include/floatformat.h | 3 +- include/vtv-change-permission.h | 55 + libcpp/ChangeLog | 5 + libcpp/configure | 4 +- libcpp/configure.ac | 4 +- libgcc/ChangeLog | 63 + libgcc/Makefile.in | 19 +- libgcc/config.host | 17 +- libgcc/config/aarch64/sfp-machine.h | 35 +- libgcc/config/aarch64/sync-cache.c | 29 +- libgcc/config/i386/cpuinfo.c | 11 +- libgcc/config/ia64/unwind-ia64.h | 35 +- libgcc/config/mips/libgcc-mips16.ver | 38 - libgcc/config/mips/mips16.S | 39 +- libgcc/config/mips/vr4120-div.S | 25 +- libgcc/configure | 16 + libgcc/configure.ac | 10 + libgcc/libgcov.c | 52 +- libgcc/vtv_end.c | 66 + libgcc/vtv_end_preinit.c | 71 + libgcc/vtv_start.c | 65 + libgcc/vtv_start_preinit.c | 73 + libgo/config/libtool.m4 | 12 +- libgo/configure | 16 +- libgo/go/net/cgo_unix.go | 3 +- libgo/go/reflect/value.go | 7 - libgo/go/syscall/mksyscall.awk | 4 +- libgo/runtime/go-reflect-call.c | 17 +- libgo/runtime/mgc0.c | 7 +- libgo/runtime/proc.c | 20 + libgo/runtime/runtime.h | 4 + libgo/runtime/time.goc | 8 +- libiberty/ChangeLog | 6 + libiberty/floatformat.c | 13 +- libitm/ChangeLog | 37 + libitm/beginend.cc | 46 +- libitm/config/linux/rwlock.h | 5 + libitm/config/posix/rwlock.cc | 4 +- libitm/config/posix/rwlock.h | 16 +- libitm/config/s390/target.h | 4 + libitm/config/x86/sjlj.S | 101 +- libitm/config/x86/target.h | 4 + libitm/configure.tgt | 2 +- libitm/libitm.h | 15 +- libitm/libitm_i.h | 26 +- libsanitizer/ChangeLog | 16 + libsanitizer/Makefile.am | 6 +- libsanitizer/Makefile.in | 8 +- libsanitizer/configure | 14 +- libsanitizer/configure.ac | 2 +- libsanitizer/merge.sh | 1 + libsanitizer/sanitizer_common/sanitizer_common.h | 3 + libsanitizer/sanitizer_common/sanitizer_printf.cc | 2 + .../sanitizer_common/sanitizer_report_decorator.h | 26 +- libsanitizer/ubsan/Makefile.am | 69 + libsanitizer/ubsan/Makefile.in | 580 + libsanitizer/ubsan/libtool-version | 6 + libsanitizer/ubsan/ubsan_diag.cc | 261 + libsanitizer/ubsan/ubsan_diag.h | 200 + libsanitizer/ubsan/ubsan_handlers.cc | 258 + libsanitizer/ubsan/ubsan_handlers.h | 115 + libsanitizer/ubsan/ubsan_handlers_cxx.cc | 72 + libsanitizer/ubsan/ubsan_handlers_cxx.h | 38 + libsanitizer/ubsan/ubsan_type_hash.cc | 246 + libsanitizer/ubsan/ubsan_type_hash.h | 61 + libsanitizer/ubsan/ubsan_value.cc | 99 + libsanitizer/ubsan/ubsan_value.h | 202 + libstdc++-v3/ChangeLog | 495 + libstdc++-v3/Makefile.in | 5 + libstdc++-v3/acinclude.m4 | 32 + libstdc++-v3/config/abi/pre/gnu.ver | 10 + libstdc++-v3/configure | 82 +- libstdc++-v3/configure.ac | 7 + libstdc++-v3/doc/Makefile.in | 5 + libstdc++-v3/doc/xml/manual/configure.xml | 12 + libstdc++-v3/fragment.am | 6 + libstdc++-v3/include/Makefile.am | 16 +- libstdc++-v3/include/Makefile.in | 21 +- libstdc++-v3/include/bits/basic_string.h | 7 +- libstdc++-v3/include/bits/hashtable.h | 349 +- libstdc++-v3/include/bits/hashtable_policy.h | 249 +- libstdc++-v3/include/bits/random.h | 8 +- libstdc++-v3/include/bits/random.tcc | 2 +- libstdc++-v3/include/bits/regex.h | 1170 +- libstdc++-v3/include/bits/regex_automaton.h | 285 + libstdc++-v3/include/bits/regex_automaton.tcc | 207 + libstdc++-v3/include/bits/regex_compiler.h | 1156 +- libstdc++-v3/include/bits/regex_compiler.tcc | 415 + libstdc++-v3/include/bits/regex_constants.h | 212 +- libstdc++-v3/include/bits/regex_cursor.h | 105 - libstdc++-v3/include/bits/regex_error.h | 2 +- libstdc++-v3/include/bits/regex_executor.h | 225 + libstdc++-v3/include/bits/regex_executor.tcc | 330 + libstdc++-v3/include/bits/regex_grep_matcher.h | 173 - libstdc++-v3/include/bits/regex_grep_matcher.tcc | 243 - libstdc++-v3/include/bits/regex_nfa.h | 415 - libstdc++-v3/include/bits/regex_nfa.tcc | 174 - libstdc++-v3/include/bits/regex_scanner.h | 196 + libstdc++-v3/include/bits/regex_scanner.tcc | 611 + libstdc++-v3/include/bits/stl_algobase.h | 2 +- libstdc++-v3/include/bits/stl_bvector.h | 2 +- libstdc++-v3/include/debug/formatter.h | 4 +- libstdc++-v3/include/debug/forward_list | 7 + libstdc++-v3/include/debug/functions.h | 276 +- libstdc++-v3/include/debug/list | 9 + libstdc++-v3/include/debug/macros.h | 61 +- libstdc++-v3/include/debug/safe_iterator.h | 3 +- libstdc++-v3/include/debug/safe_local_iterator.h | 3 +- libstdc++-v3/include/debug/string | 5 + libstdc++-v3/include/ext/atomicity.h | 2 + libstdc++-v3/include/ext/random | 2 +- libstdc++-v3/include/ext/vstring.h | 8 +- libstdc++-v3/include/std/atomic | 12 +- libstdc++-v3/include/std/bitset | 2 +- libstdc++-v3/include/std/regex | 9 +- libstdc++-v3/include/tr1/cmath | 19 +- libstdc++-v3/libsupc++/Makefile.am | 15 +- libstdc++-v3/libsupc++/Makefile.in | 24 +- libstdc++-v3/libsupc++/vtv_stubs.cc | 100 + libstdc++-v3/po/Makefile.in | 5 + libstdc++-v3/python/Makefile.in | 5 + libstdc++-v3/python/libstdcxx/v6/printers.py | 12 +- libstdc++-v3/scripts/testsuite_flags.in | 11 +- libstdc++-v3/src/Makefile.am | 5 +- libstdc++-v3/src/Makefile.in | 8 +- libstdc++-v3/src/c++11/Makefile.am | 7 +- libstdc++-v3/src/c++11/Makefile.in | 13 +- libstdc++-v3/src/c++11/debug.cc | 16 +- libstdc++-v3/src/c++11/functexcept.cc | 2 +- libstdc++-v3/src/c++11/hashtable_c++0x.cc | 1 + libstdc++-v3/src/c++11/regex.cc | 3 +- libstdc++-v3/src/c++98/Makefile.am | 7 +- libstdc++-v3/src/c++98/Makefile.in | 13 +- libstdc++-v3/src/c++98/compatibility.cc | 19 +- libstdc++-v3/testsuite/17_intro/freestanding.cc | 2 +- .../18_support/bad_exception/23591_thread-1.c | 2 +- .../basic_string/element_access/char/58163.cc | 39 + .../basic_string/element_access/wchar_t/58163.cc | 39 + .../23_containers/deque/debug/insert5_neg.cc | 33 + .../forward_list/debug/insert_after4_neg.cc | 35 + .../23_containers/list/debug/insert5_neg.cc | 34 + .../unordered_set/instantiation_neg.cc | 2 +- .../not_default_constructible_hash_neg.cc | 2 +- .../23_containers/vector/debug/57779_neg.cc | 38 + .../23_containers/vector/debug/insert5_neg.cc | 33 + .../23_containers/vector/debug/insert6_neg.cc | 48 + .../23_containers/vector/modifiers/insert/58148.cc | 35 + .../random/cauchy_distribution/cons/default.cc | 2 +- .../random/cauchy_distribution/cons/parms.cc | 2 +- .../exponential_distribution/cons/default.cc | 2 +- .../random/exponential_distribution/cons/parms.cc | 2 +- .../extreme_value_distribution/cons/default.cc | 2 +- .../extreme_value_distribution/cons/parms.cc | 2 +- .../operators/58302.cc | 34 + .../random/normal_distribution/cons/default.cc | 2 +- .../random/normal_distribution/cons/parms.cc | 2 +- .../random/student_t_distribution/cons/default.cc | 2 +- .../random/student_t_distribution/cons/parms.cc | 2 +- .../algorithms/regex_match/awk/cstring_01.cc | 50 + .../algorithms/regex_match/basic/empty_range.cc | 57 + .../regex_match/basic/string_range_02_03.cc | 1 - .../algorithms/regex_match/ecma/char/anymatcher.cc | 52 + .../algorithms/regex_match/ecma/char/backref.cc | 78 + .../regex_match/ecma/char/empty_range.cc | 47 + .../algorithms/regex_match/ecma/char/emptygroup.cc | 58 + .../algorithms/regex_match/ecma/char/hex.cc | 53 + .../regex_match/ecma/char/quoted_char.cc | 52 + .../regex_match/ecma/wchar_t/anymatcher.cc | 51 + .../algorithms/regex_match/ecma/wchar_t/hex.cc | 44 + .../algorithms/regex_match/extended/53622.cc | 35 +- .../algorithms/regex_match/extended/57173.cc | 23 +- .../regex_match/extended/cstring_bracket_01.cc | 66 + .../regex_match/extended/cstring_plus.cc | 43 +- .../regex_match/extended/cstring_questionmark.cc | 43 +- .../regex_match/extended/cstring_range.cc | 68 + .../regex_match/extended/string_dispatch_01.cc | 69 + .../regex_match/extended/string_range_00_03.cc | 34 +- .../regex_match/extended/string_range_02_03.cc | 1 - .../regex_match/extended/wstring_locale.cc | 48 + .../algorithms/regex_search/ecma/string_01.cc | 42 + .../iterators/regex_iterator/wchar_t/string_02.cc | 59 + .../regex_token_iterator/wchar_t/wstring_02.cc | 53 + .../requirements/compare_exchange_lowering.cc | 65 + libstdc++-v3/testsuite/Makefile.in | 5 + .../random/normal_mv_distribution/cons/default.cc | 4 +- .../random/normal_mv_distribution/cons/parms.cc | 4 +- .../random/triangular_distribution/cons/default.cc | 44 + .../random/triangular_distribution/cons/parms.cc | 44 + .../triangular_distribution/operators/equal.cc | 42 + .../triangular_distribution/operators/inequal.cc | 42 + .../triangular_distribution/operators/serialize.cc | 44 + .../requirements/explicit_instantiation/1.cc | 26 + .../requirements/typedefs.cc | 34 + .../random/von_mises_distribution/cons/default.cc | 43 + .../random/von_mises_distribution/cons/parms.cc | 43 + .../von_mises_distribution/operators/equal.cc | 42 + .../von_mises_distribution/operators/inequal.cc | 42 + .../von_mises_distribution/operators/serialize.cc | 44 + .../requirements/explicit_instantiation/1.cc | 26 + .../requirements/typedefs.cc | 34 + .../ext/triangular_distribution/cons/default.cc | 44 - .../ext/triangular_distribution/cons/parms.cc | 44 - .../ext/triangular_distribution/operators/equal.cc | 42 - .../triangular_distribution/operators/inequal.cc | 42 - .../triangular_distribution/operators/serialize.cc | 44 - .../requirements/explicit_instantiation/1.cc | 26 - .../requirements/typedefs.cc | 34 - .../ext/von_mises_distribution/cons/default.cc | 43 - .../ext/von_mises_distribution/cons/parms.cc | 43 - .../ext/von_mises_distribution/operators/equal.cc | 42 - .../von_mises_distribution/operators/inequal.cc | 42 - .../von_mises_distribution/operators/serialize.cc | 44 - .../requirements/explicit_instantiation/1.cc | 26 - .../requirements/typedefs.cc | 34 - .../ext/vstring/element_access/char/58163.cc | 40 + .../ext/vstring/element_access/wchar_t/58163.cc | 40 + libstdc++-v3/testsuite/lib/libstdc++.exp | 17 +- .../testsuite/libstdc++-prettyprinters/cxx11.cc | 36 +- .../tr1/8_c_compatibility/cmath/pow_cmath.cc | 33 + libstdc++-v3/testsuite/util/debug/checks.h | 30 +- .../testsuite/util/testsuite_common_types.h | 17 + libvtv/ChangeLog | 193 + libvtv/Makefile.am | 78 + libvtv/Makefile.in | 762 + libvtv/acinclude.m4 | 47 + libvtv/aclocal.m4 | 1016 ++ libvtv/configure | 17982 +++++++++++++++++++ libvtv/configure.ac | 143 + libvtv/configure.tgt | 37 + libvtv/scripts/run-testsuite.sh | 226 + libvtv/scripts/sum-vtv-counts.c | 150 + libvtv/testsuite/Makefile.am | 11 + libvtv/testsuite/Makefile.in | 400 + libvtv/testsuite/config/default.exp | 17 + libvtv/testsuite/lib/libvtv-dg.exp | 21 + libvtv/testsuite/lib/libvtv.exp | 220 + libvtv/testsuite/libvtv.cc/bb_tests.cc | 53 + libvtv/testsuite/libvtv.cc/const_vtable.cc | 83 + libvtv/testsuite/libvtv.cc/dataentry.cc | 39 + libvtv/testsuite/libvtv.cc/derived-lib.cpp | 18 + libvtv/testsuite/libvtv.cc/derived-main.cpp | 18 + libvtv/testsuite/libvtv.cc/derived.list | 1 + libvtv/testsuite/libvtv.cc/dup_name.cc | 62 + libvtv/testsuite/libvtv.cc/environment.cc | 38 + libvtv/testsuite/libvtv.cc/event-defintions.cpp | 10 + libvtv/testsuite/libvtv.cc/event-main.cpp | 15 + libvtv/testsuite/libvtv.cc/event-private.cpp | 10 + libvtv/testsuite/libvtv.cc/event-private.h | 7 + libvtv/testsuite/libvtv.cc/event.h | 29 + libvtv/testsuite/libvtv.cc/event.list | 1 + libvtv/testsuite/libvtv.cc/mul_inh.cc | 27 + libvtv/testsuite/libvtv.cc/nested_vcall_test.cc | 77 + .../libvtv.cc/parts-test-extra-parts-views.cpp | 16 + .../libvtv.cc/parts-test-extra-parts-views.h | 14 + .../testsuite/libvtv.cc/parts-test-extra-parts.cpp | 15 + .../testsuite/libvtv.cc/parts-test-extra-parts.h | 13 + libvtv/testsuite/libvtv.cc/parts-test-main.cpp | 39 + libvtv/testsuite/libvtv.cc/parts-test-main.h | 15 + libvtv/testsuite/libvtv.cc/parts-test.list | 1 + libvtv/testsuite/libvtv.cc/povray-derived.cc | 74 + libvtv/testsuite/libvtv.cc/register_set_pair.cc | 101 + .../libvtv.cc/register_set_pair_inserts.cc | 106 + .../testsuite/libvtv.cc/template-list-iostream.cc | 120 + libvtv/testsuite/libvtv.cc/template-list.cc | 94 + libvtv/testsuite/libvtv.cc/template-list2.cc | 46 + libvtv/testsuite/libvtv.cc/test1.cc | 74 + libvtv/testsuite/libvtv.cc/thunk.cc | 37 + .../testsuite/libvtv.cc/thunk_vtable_map_attack.cc | 113 + libvtv/testsuite/libvtv.cc/v8-test-2.cc | 97 + libvtv/testsuite/libvtv.cc/virtfunc-test.cc | 222 + libvtv/testsuite/libvtv.cc/virtual_inheritance.cc | 48 + libvtv/testsuite/libvtv.cc/vtv.exp | 83 + libvtv/testsuite/libvtv.cc/xlan-test.cc | 185 + libvtv/testsuite/libvtv.mempool.cc/mempool.exp | 68 + .../libvtv.mempool.cc/mempool_negative.cc | 193 + .../libvtv.mempool.cc/mempool_positive.cc | 199 + libvtv/testsuite/libvtv.mt.cc/mt.exp | 68 + .../libvtv.mt.cc/register_set_pair_inserts_mt.cc | 156 + .../testsuite/libvtv.mt.cc/register_set_pair_mt.cc | 158 + libvtv/testsuite/other-tests/Makefile.am | 52 + libvtv/testsuite/other-tests/Makefile.in | 379 + libvtv/testsuite/other-tests/README | 8 + libvtv/testsuite/other-tests/dlopen.cc | 38 + libvtv/testsuite/other-tests/dlopen_mt.cc | 112 + libvtv/testsuite/other-tests/environment-fail-32.s | 514 + libvtv/testsuite/other-tests/environment-fail-64.s | 425 + libvtv/testsuite/other-tests/field-test.cc | 94 + libvtv/testsuite/other-tests/replace-fail.cc | 11 + libvtv/testsuite/other-tests/so.cc | 93 + libvtv/testsuite/other-tests/temp_deriv.cc | 67 + libvtv/testsuite/other-tests/temp_deriv2.cc | 69 + libvtv/testsuite/other-tests/temp_deriv3.cc | 79 + libvtv/vtv_fail.cc | 233 + libvtv/vtv_fail.h | 59 + libvtv/vtv_malloc.cc | 267 + libvtv/vtv_malloc.h | 98 + libvtv/vtv_map.h | 311 + libvtv/vtv_rts.cc | 1523 ++ libvtv/vtv_rts.h | 50 + libvtv/vtv_set.h | 653 + libvtv/vtv_utils.cc | 161 + libvtv/vtv_utils.h | 63 + 1312 files changed, 97758 insertions(+), 28290 deletions(-) create mode 100644 config/bootstrap-ubsan.mk create mode 100644 gcc/c-family/c-ubsan.c create mode 100644 gcc/c-family/c-ubsan.h create mode 100644 gcc/config/arm/aarch-common-protos.h create mode 100644 gcc/config/arm/aarch-common.c create mode 100644 gcc/config/arm/types.md create mode 100644 gcc/config/i386/stringop.def create mode 100644 gcc/config/i386/stringop.opt create mode 100644 gcc/config/i386/x86-tune.def create mode 100644 gcc/config/rs6000/t-linux64bele create mode 100644 gcc/config/rs6000/t-linux64le create mode 100644 gcc/config/rs6000/t-linux64lebe create mode 100644 gcc/cp/vtable-class-hierarchy.c create mode 100644 gcc/gdbhooks.py create mode 100644 gcc/gen-pass-instances.awk create mode 100644 gcc/ipa-devirt.c create mode 100644 gcc/ipa-profile.c delete mode 100644 gcc/lto-symtab.c create mode 100644 gcc/lto/lto-symtab.c create mode 100644 gcc/pass_manager.h create mode 100644 gcc/testsuite/c-c++-common/cilk-plus/AN/pr57490.c create mode 100644 gcc/testsuite/c-c++-common/gomp/pr58257.c create mode 100644 gcc/testsuite/c-c++-common/opaque-vector.c create mode 100644 gcc/testsuite/c-c++-common/ubsan/const-char-1.c create mode 100644 gcc/testsuite/c-c++-common/ubsan/const-expr-1.c create mode 100644 gcc/testsuite/c-c++-common/ubsan/div-by-zero-1.c create mode 100644 gcc/testsuite/c-c++-common/ubsan/div-by-zero-2.c create mode 100644 gcc/testsuite/c-c++-common/ubsan/div-by-zero-3.c create mode 100644 gcc/testsuite/c-c++-common/ubsan/div-by-zero-4.c create mode 100644 gcc/testsuite/c-c++-common/ubsan/save-expr-1.c create mode 100644 gcc/testsuite/c-c++-common/ubsan/save-expr-2.c create mode 100644 gcc/testsuite/c-c++-common/ubsan/save-expr-3.c create mode 100644 gcc/testsuite/c-c++-common/ubsan/save-expr-4.c create mode 100644 gcc/testsuite/c-c++-common/ubsan/shift-1.c create mode 100644 gcc/testsuite/c-c++-common/ubsan/shift-2.c create mode 100644 gcc/testsuite/c-c++-common/ubsan/shift-3.c create mode 100644 gcc/testsuite/c-c++-common/ubsan/typedef-1.c create mode 100644 gcc/testsuite/c-c++-common/ubsan/unreachable-1.c create mode 100644 gcc/testsuite/g++.dg/conversion/ambig2.C create mode 100644 gcc/testsuite/g++.dg/cpp0x/alias-decl-33.C create mode 100644 gcc/testsuite/g++.dg/cpp0x/alias-decl-33a.C create mode 100644 gcc/testsuite/g++.dg/cpp0x/alias-decl-37.C create mode 100644 gcc/testsuite/g++.dg/cpp0x/alias-decl-dr1286.C create mode 100644 gcc/testsuite/g++.dg/cpp0x/alias-decl-dr1286a.C create mode 100644 gcc/testsuite/g++.dg/cpp0x/constexpr-ice8.C create mode 100644 gcc/testsuite/g++.dg/cpp0x/constexpr-value4.C create mode 100644 gcc/testsuite/g++.dg/cpp0x/dc7.C create mode 100644 gcc/testsuite/g++.dg/cpp0x/dc8.C create mode 100644 gcc/testsuite/g++.dg/cpp0x/enum28.C create mode 100644 gcc/testsuite/g++.dg/cpp0x/lambda/lambda-defarg5.C create mode 100644 gcc/testsuite/g++.dg/cpp0x/nsdmi-sizeof.C create mode 100644 gcc/testsuite/g++.dg/cpp0x/pr57416.C create mode 100644 gcc/testsuite/g++.dg/cpp0x/pr58072.C create mode 100644 gcc/testsuite/g++.dg/cpp0x/pr58080.C create mode 100644 gcc/testsuite/g++.dg/cpp0x/ref-qual14.C create mode 100644 gcc/testsuite/g++.dg/debug/ra1.C create mode 100644 gcc/testsuite/g++.dg/ext/attr-alias-3.C create mode 100644 gcc/testsuite/g++.dg/ipa/devirt-13.C create mode 100644 gcc/testsuite/g++.dg/ipa/devirt-14.C create mode 100644 gcc/testsuite/g++.dg/ipa/devirt-15.C create mode 100644 gcc/testsuite/g++.dg/ipa/devirt-16.C create mode 100644 gcc/testsuite/g++.dg/ipa/devirt-17.C create mode 100644 gcc/testsuite/g++.dg/ipa/devirt-18.C create mode 100644 gcc/testsuite/g++.dg/ipa/remref-1.C create mode 100644 gcc/testsuite/g++.dg/ipa/remref-2.C create mode 100644 gcc/testsuite/g++.dg/ipa/type-inheritance-1.C create mode 100644 gcc/testsuite/g++.dg/opt/pr57661.C create mode 100644 gcc/testsuite/g++.dg/opt/pr58006.C create mode 100644 gcc/testsuite/g++.dg/opt/pr58165.C create mode 100644 gcc/testsuite/g++.dg/overload/using3.C create mode 100644 gcc/testsuite/g++.dg/parse/access11.C create mode 100644 gcc/testsuite/g++.dg/parse/crash63.C create mode 100644 gcc/testsuite/g++.dg/template/abstract1.C create mode 100644 gcc/testsuite/g++.dg/template/delete2.C create mode 100644 gcc/testsuite/g++.dg/template/error54.C create mode 100644 gcc/testsuite/g++.dg/template/using24.C create mode 100644 gcc/testsuite/g++.dg/template/using25.C create mode 100644 gcc/testsuite/g++.dg/template/using26.C create mode 100644 gcc/testsuite/g++.dg/tm/noexcept-6.C create mode 100644 gcc/testsuite/g++.dg/torture/PR58294.C create mode 100644 gcc/testsuite/g++.dg/torture/pr58201.h create mode 100644 gcc/testsuite/g++.dg/torture/pr58201_0.C create mode 100644 gcc/testsuite/g++.dg/torture/pr58201_1.C create mode 100644 gcc/testsuite/g++.dg/tree-prof/pr57451.C create mode 100644 gcc/testsuite/g++.dg/ubsan/cxx11-shift-1.C create mode 100644 gcc/testsuite/g++.dg/ubsan/cxx11-shift-2.C create mode 100644 gcc/testsuite/g++.dg/ubsan/div-by-zero-1.C create mode 100644 gcc/testsuite/g++.dg/ubsan/ubsan.exp create mode 100644 gcc/testsuite/g++.dg/warn/deprecated-7.C create mode 100644 gcc/testsuite/g++.dg/warn/deprecated-8.C create mode 100644 gcc/testsuite/gcc.c-torture/compile/pr58164.c create mode 100644 gcc/testsuite/gcc.c-torture/compile/pr58340.c create mode 100644 gcc/testsuite/gcc.c-torture/execute/pr56799.x create mode 100644 gcc/testsuite/gcc.c-torture/execute/pr57860.c create mode 100644 gcc/testsuite/gcc.c-torture/execute/pr57861.c create mode 100644 gcc/testsuite/gcc.c-torture/execute/pr57875.c create mode 100644 gcc/testsuite/gcc.c-torture/execute/pr57876.c create mode 100644 gcc/testsuite/gcc.c-torture/execute/pr57877.c create mode 100644 gcc/testsuite/gcc.c-torture/execute/pr58209.c create mode 100644 gcc/testsuite/gcc.c-torture/execute/pr58277-1.c create mode 100644 gcc/testsuite/gcc.c-torture/execute/pr58277-2.c create mode 100644 gcc/testsuite/gcc.dg/asan/pr56417.c create mode 100644 gcc/testsuite/gcc.dg/fork-instrumentation.c create mode 100644 gcc/testsuite/gcc.dg/guality/param-1.c create mode 100644 gcc/testsuite/gcc.dg/guality/param-2.c create mode 100644 gcc/testsuite/gcc.dg/ipa/pr57539.c create mode 100644 gcc/testsuite/gcc.dg/ipa/pr58106.c create mode 100644 gcc/testsuite/gcc.dg/pr57287-2.c create mode 100644 gcc/testsuite/gcc.dg/pr57662.c create mode 100644 gcc/testsuite/gcc.dg/pr57980.c create mode 100644 gcc/testsuite/gcc.dg/pr58010.c create mode 100644 gcc/testsuite/gcc.dg/pr58145-1.c create mode 100644 gcc/testsuite/gcc.dg/pr58145-2.c create mode 100644 gcc/testsuite/gcc.dg/torture/pr57521.c create mode 100644 gcc/testsuite/gcc.dg/torture/pr57656.c create mode 100644 gcc/testsuite/gcc.dg/torture/pr57685.c create mode 100644 gcc/testsuite/gcc.dg/torture/pr57993-2.cpp create mode 100644 gcc/testsuite/gcc.dg/torture/pr57993.c create mode 100644 gcc/testsuite/gcc.dg/torture/pr58041.c create mode 100644 gcc/testsuite/gcc.dg/torture/pr58079.c create mode 100644 gcc/testsuite/gcc.dg/torture/pr58223.c create mode 100644 gcc/testsuite/gcc.dg/torture/pr58228.c create mode 100644 gcc/testsuite/gcc.dg/torture/pr58246.c create mode 100644 gcc/testsuite/gcc.dg/tree-prof/crossmodule-indircall-1.c create mode 100644 gcc/testsuite/gcc.dg/tree-prof/crossmodule-indircall-1a.c create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/fnsplit-1.c create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/reassoc-31.c create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/sccp-1.c create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/ssa-vrp-thread-1.c create mode 100644 gcc/testsuite/gcc.dg/ubsan/c99-shift-1.c create mode 100644 gcc/testsuite/gcc.dg/ubsan/c99-shift-2.c create mode 100644 gcc/testsuite/gcc.dg/ubsan/ubsan.exp create mode 100644 gcc/testsuite/gcc.target/aarch64/scalar_shift_1.c create mode 100644 gcc/testsuite/gcc.target/arc/arc.exp create mode 100644 gcc/testsuite/gcc.target/arc/builtin_arc_aligned-1.c create mode 100644 gcc/testsuite/gcc.target/arc/builtin_arc_aligned-2.c create mode 100644 gcc/testsuite/gcc.target/arc/builtin_arc_aligned-3.c create mode 100644 gcc/testsuite/gcc.target/arc/cond-set-use.c create mode 100644 gcc/testsuite/gcc.target/arc/interrupt-1.c create mode 100644 gcc/testsuite/gcc.target/arc/interrupt-2.c create mode 100644 gcc/testsuite/gcc.target/arc/interrupt-3.c create mode 100644 gcc/testsuite/gcc.target/arc/mulsi3_highpart-1.c create mode 100644 gcc/testsuite/gcc.target/arc/mulsi3_highpart-2.c create mode 100644 gcc/testsuite/gcc.target/arc/nv-cache.c create mode 100644 gcc/testsuite/gcc.target/arc/sdata-1.c create mode 100644 gcc/testsuite/gcc.target/arc/sdata-2.c create mode 100644 gcc/testsuite/gcc.target/arc/v-cache.c create mode 100644 gcc/testsuite/gcc.target/arm/ivopts-orig_biv-inc.c create mode 100644 gcc/testsuite/gcc.target/arm/lp1189445.c delete mode 100644 gcc/testsuite/gcc.target/arm/neon-for-64bits-2.c create mode 100644 gcc/testsuite/gcc.target/arm/pr46975-2.c create mode 100644 gcc/testsuite/gcc.target/arm/pr57637.c create mode 100644 gcc/testsuite/gcc.target/arm/pr58041.c create mode 100644 gcc/testsuite/gcc.target/i386/memcpy-strategy-1.c create mode 100644 gcc/testsuite/gcc.target/i386/memcpy-strategy-2.c create mode 100644 gcc/testsuite/gcc.target/i386/memcpy-strategy-3.c create mode 100644 gcc/testsuite/gcc.target/i386/memset-strategy-1.c create mode 100644 gcc/testsuite/gcc.target/i386/movabs-1.c create mode 100644 gcc/testsuite/gcc.target/i386/pr58048.c create mode 100644 gcc/testsuite/gcc.target/i386/pr58137.c create mode 100644 gcc/testsuite/gcc.target/i386/pr58218.c create mode 100644 gcc/testsuite/gcc.target/mips/fabs-2008.c create mode 100644 gcc/testsuite/gcc.target/mips/fabs-legacy.c create mode 100644 gcc/testsuite/gcc.target/mips/fabsf-2008.c create mode 100644 gcc/testsuite/gcc.target/mips/fabsf-legacy.c create mode 100644 gcc/testsuite/gcc.target/mips/fneg-2008.c create mode 100644 gcc/testsuite/gcc.target/mips/fneg-legacy.c create mode 100644 gcc/testsuite/gcc.target/mips/fnegf-2008.c create mode 100644 gcc/testsuite/gcc.target/mips/fnegf-legacy.c create mode 100644 gcc/testsuite/gcc.target/mips/nan-2008.c create mode 100644 gcc/testsuite/gcc.target/mips/nan-legacy.c create mode 100644 gcc/testsuite/gcc.target/mips/nanf-2008.c create mode 100644 gcc/testsuite/gcc.target/mips/nanf-legacy.c create mode 100644 gcc/testsuite/gcc.target/mips/nans-2008.c create mode 100644 gcc/testsuite/gcc.target/mips/nans-legacy.c create mode 100644 gcc/testsuite/gcc.target/mips/nansf-2008.c create mode 100644 gcc/testsuite/gcc.target/mips/nansf-legacy.c create mode 100644 gcc/testsuite/gcc.target/powerpc/dfp-dd-2.c create mode 100644 gcc/testsuite/gcc.target/powerpc/dfp-td-2.c create mode 100644 gcc/testsuite/gcc.target/powerpc/dfp-td-3.c create mode 100644 gcc/testsuite/gcc.target/powerpc/fusion.c create mode 100644 gcc/testsuite/gcc.target/powerpc/pr57949-1.c create mode 100644 gcc/testsuite/gcc.target/powerpc/pr57949-2.c create mode 100644 gcc/testsuite/gcc.target/s390/nearestint-1.c create mode 100644 gcc/testsuite/gfortran.dg/bounds_check_18.f90 create mode 100644 gcc/testsuite/gfortran.dg/do_check_10.f90 create mode 100644 gcc/testsuite/gfortran.dg/do_concurrent_3.f90 create mode 100644 gcc/testsuite/gfortran.dg/gomp/proc_ptr_1.f90 create mode 100644 gcc/testsuite/gfortran.dg/inline_sum_5.f90 create mode 100644 gcc/testsuite/gfortran.dg/intent_out_8.f90 create mode 100644 gcc/testsuite/gfortran.dg/pointer_assign_10.f90 create mode 100644 gcc/testsuite/gfortran.dg/pointer_assign_11.f90 create mode 100644 gcc/testsuite/gfortran.dg/pointer_assign_8.f90 create mode 100644 gcc/testsuite/gfortran.dg/pointer_assign_9.f90 create mode 100644 gcc/testsuite/gfortran.dg/pointer_init_8.f90 create mode 100644 gcc/testsuite/gfortran.dg/pr57987.f90 create mode 100644 gcc/testsuite/gfortran.dg/realloc_on_assign_19.f90 create mode 100644 gcc/testsuite/gfortran.dg/reassoc_12.f90 create mode 100644 gcc/testsuite/gfortran.dg/select_type_34.f90 create mode 100644 gcc/testsuite/gfortran.dg/transfer_intrinsic_6.f90 create mode 100644 gcc/testsuite/gfortran.dg/typebound_assignment_7.f90 create mode 100644 gcc/testsuite/gnat.dg/loop_optimization16.adb create mode 100644 gcc/testsuite/gnat.dg/loop_optimization16_pkg.adb create mode 100644 gcc/testsuite/gnat.dg/loop_optimization16_pkg.ads create mode 100644 gcc/testsuite/gnat.dg/specs/linker_alias.ads create mode 100644 gcc/testsuite/gnat.dg/stack_usage2.adb create mode 100644 gcc/testsuite/gnat.dg/valued_proc.adb create mode 100644 gcc/testsuite/gnat.dg/valued_proc_pkg.ads create mode 100644 gcc/testsuite/gnat.dg/warn10.adb create mode 100644 gcc/testsuite/gnat.dg/warn10.ads create mode 100644 gcc/testsuite/gnat.dg/warn10_pkg.ads create mode 100644 gcc/testsuite/gnat.dg/warn9.adb create mode 100644 gcc/testsuite/lib/ubsan-dg.exp create mode 100644 gcc/tree-core.h create mode 100644 gcc/ubsan.c create mode 100644 gcc/ubsan.h create mode 100644 gcc/vtable-verify.c create mode 100644 gcc/vtable-verify.h create mode 100644 include/vtv-change-permission.h create mode 100644 libgcc/vtv_end.c create mode 100644 libgcc/vtv_end_preinit.c create mode 100644 libgcc/vtv_start.c create mode 100644 libgcc/vtv_start_preinit.c create mode 100644 libsanitizer/ubsan/Makefile.am create mode 100644 libsanitizer/ubsan/Makefile.in create mode 100644 libsanitizer/ubsan/libtool-version create mode 100644 libsanitizer/ubsan/ubsan_diag.cc create mode 100644 libsanitizer/ubsan/ubsan_diag.h create mode 100644 libsanitizer/ubsan/ubsan_handlers.cc create mode 100644 libsanitizer/ubsan/ubsan_handlers.h create mode 100644 libsanitizer/ubsan/ubsan_handlers_cxx.cc create mode 100644 libsanitizer/ubsan/ubsan_handlers_cxx.h create mode 100644 libsanitizer/ubsan/ubsan_type_hash.cc create mode 100644 libsanitizer/ubsan/ubsan_type_hash.h create mode 100644 libsanitizer/ubsan/ubsan_value.cc create mode 100644 libsanitizer/ubsan/ubsan_value.h create mode 100644 libstdc++-v3/include/bits/regex_automaton.h create mode 100644 libstdc++-v3/include/bits/regex_automaton.tcc create mode 100644 libstdc++-v3/include/bits/regex_compiler.tcc delete mode 100644 libstdc++-v3/include/bits/regex_cursor.h create mode 100644 libstdc++-v3/include/bits/regex_executor.h create mode 100644 libstdc++-v3/include/bits/regex_executor.tcc delete mode 100644 libstdc++-v3/include/bits/regex_grep_matcher.h delete mode 100644 libstdc++-v3/include/bits/regex_grep_matcher.tcc delete mode 100644 libstdc++-v3/include/bits/regex_nfa.h delete mode 100644 libstdc++-v3/include/bits/regex_nfa.tcc create mode 100644 libstdc++-v3/include/bits/regex_scanner.h create mode 100644 libstdc++-v3/include/bits/regex_scanner.tcc create mode 100644 libstdc++-v3/libsupc++/vtv_stubs.cc create mode 100644 libstdc++-v3/testsuite/21_strings/basic_string/element_access/char/58163.cc create mode 100644 libstdc++-v3/testsuite/21_strings/basic_string/element_access/wchar_t/58163.cc create mode 100644 libstdc++-v3/testsuite/23_containers/deque/debug/insert5_neg.cc create mode 100644 libstdc++-v3/testsuite/23_containers/forward_list/debug/insert_after4_neg.cc create mode 100644 libstdc++-v3/testsuite/23_containers/list/debug/insert5_neg.cc create mode 100644 libstdc++-v3/testsuite/23_containers/vector/debug/57779_neg.cc create mode 100644 libstdc++-v3/testsuite/23_containers/vector/debug/insert5_neg.cc create mode 100644 libstdc++-v3/testsuite/23_containers/vector/debug/insert6_neg.cc create mode 100644 libstdc++-v3/testsuite/23_containers/vector/modifiers/insert/58148.cc create mode 100644 libstdc++-v3/testsuite/26_numerics/random/negative_binomial_distribution/operators/58302.cc create mode 100644 libstdc++-v3/testsuite/28_regex/algorithms/regex_match/awk/cstring_01.cc create mode 100644 libstdc++-v3/testsuite/28_regex/algorithms/regex_match/basic/empty_range.cc create mode 100644 libstdc++-v3/testsuite/28_regex/algorithms/regex_match/ecma/char/anymatcher.cc create mode 100644 libstdc++-v3/testsuite/28_regex/algorithms/regex_match/ecma/char/backref.cc create mode 100644 libstdc++-v3/testsuite/28_regex/algorithms/regex_match/ecma/char/empty_range.cc create mode 100644 libstdc++-v3/testsuite/28_regex/algorithms/regex_match/ecma/char/emptygroup.cc create mode 100644 libstdc++-v3/testsuite/28_regex/algorithms/regex_match/ecma/char/hex.cc create mode 100644 libstdc++-v3/testsuite/28_regex/algorithms/regex_match/ecma/char/quoted_char.cc create mode 100644 libstdc++-v3/testsuite/28_regex/algorithms/regex_match/ecma/wchar_t/anymatcher.cc create mode 100644 libstdc++-v3/testsuite/28_regex/algorithms/regex_match/ecma/wchar_t/hex.cc create mode 100644 libstdc++-v3/testsuite/28_regex/algorithms/regex_match/extended/cstring_bracket_01.cc create mode 100644 libstdc++-v3/testsuite/28_regex/algorithms/regex_match/extended/cstring_range.cc create mode 100644 libstdc++-v3/testsuite/28_regex/algorithms/regex_match/extended/string_dispatch_01.cc create mode 100644 libstdc++-v3/testsuite/28_regex/algorithms/regex_match/extended/wstring_locale.cc create mode 100644 libstdc++-v3/testsuite/28_regex/algorithms/regex_search/ecma/string_01.cc create mode 100644 libstdc++-v3/testsuite/28_regex/iterators/regex_iterator/wchar_t/string_02.cc create mode 100644 libstdc++-v3/testsuite/28_regex/iterators/regex_token_iterator/wchar_t/wstring_02.cc create mode 100644 libstdc++-v3/testsuite/29_atomics/atomic/requirements/compare_exchange_lowering.cc create mode 100644 libstdc++-v3/testsuite/ext/random/triangular_distribution/cons/default.cc create mode 100644 libstdc++-v3/testsuite/ext/random/triangular_distribution/cons/parms.cc create mode 100644 libstdc++-v3/testsuite/ext/random/triangular_distribution/operators/equal.cc create mode 100644 libstdc++-v3/testsuite/ext/random/triangular_distribution/operators/inequal.cc create mode 100644 libstdc++-v3/testsuite/ext/random/triangular_distribution/operators/serialize.cc create mode 100644 libstdc++-v3/testsuite/ext/random/triangular_distribution/requirements/explicit_instantiation/1.cc create mode 100644 libstdc++-v3/testsuite/ext/random/triangular_distribution/requirements/typedefs.cc create mode 100644 libstdc++-v3/testsuite/ext/random/von_mises_distribution/cons/default.cc create mode 100644 libstdc++-v3/testsuite/ext/random/von_mises_distribution/cons/parms.cc create mode 100644 libstdc++-v3/testsuite/ext/random/von_mises_distribution/operators/equal.cc create mode 100644 libstdc++-v3/testsuite/ext/random/von_mises_distribution/operators/inequal.cc create mode 100644 libstdc++-v3/testsuite/ext/random/von_mises_distribution/operators/serialize.cc create mode 100644 libstdc++-v3/testsuite/ext/random/von_mises_distribution/requirements/explicit_instantiation/1.cc create mode 100644 libstdc++-v3/testsuite/ext/random/von_mises_distribution/requirements/typedefs.cc delete mode 100644 libstdc++-v3/testsuite/ext/triangular_distribution/cons/default.cc delete mode 100644 libstdc++-v3/testsuite/ext/triangular_distribution/cons/parms.cc delete mode 100644 libstdc++-v3/testsuite/ext/triangular_distribution/operators/equal.cc delete mode 100644 libstdc++-v3/testsuite/ext/triangular_distribution/operators/inequal.cc delete mode 100644 libstdc++-v3/testsuite/ext/triangular_distribution/operators/serialize.cc delete mode 100644 libstdc++-v3/testsuite/ext/triangular_distribution/requirements/explicit_instantiation/1.cc delete mode 100644 libstdc++-v3/testsuite/ext/triangular_distribution/requirements/typedefs.cc delete mode 100644 libstdc++-v3/testsuite/ext/von_mises_distribution/cons/default.cc delete mode 100644 libstdc++-v3/testsuite/ext/von_mises_distribution/cons/parms.cc delete mode 100644 libstdc++-v3/testsuite/ext/von_mises_distribution/operators/equal.cc delete mode 100644 libstdc++-v3/testsuite/ext/von_mises_distribution/operators/inequal.cc delete mode 100644 libstdc++-v3/testsuite/ext/von_mises_distribution/operators/serialize.cc delete mode 100644 libstdc++-v3/testsuite/ext/von_mises_distribution/requirements/explicit_instantiation/1.cc delete mode 100644 libstdc++-v3/testsuite/ext/von_mises_distribution/requirements/typedefs.cc create mode 100644 libstdc++-v3/testsuite/ext/vstring/element_access/char/58163.cc create mode 100644 libstdc++-v3/testsuite/ext/vstring/element_access/wchar_t/58163.cc create mode 100644 libstdc++-v3/testsuite/tr1/8_c_compatibility/cmath/pow_cmath.cc create mode 100644 libvtv/ChangeLog create mode 100644 libvtv/Makefile.am create mode 100644 libvtv/Makefile.in create mode 100644 libvtv/acinclude.m4 create mode 100644 libvtv/aclocal.m4 create mode 100755 libvtv/configure create mode 100644 libvtv/configure.ac create mode 100644 libvtv/configure.tgt create mode 100644 libvtv/scripts/run-testsuite.sh create mode 100644 libvtv/scripts/sum-vtv-counts.c create mode 100644 libvtv/testsuite/Makefile.am create mode 100644 libvtv/testsuite/Makefile.in create mode 100644 libvtv/testsuite/config/default.exp create mode 100644 libvtv/testsuite/lib/libvtv-dg.exp create mode 100644 libvtv/testsuite/lib/libvtv.exp create mode 100644 libvtv/testsuite/libvtv.cc/bb_tests.cc create mode 100644 libvtv/testsuite/libvtv.cc/const_vtable.cc create mode 100644 libvtv/testsuite/libvtv.cc/dataentry.cc create mode 100644 libvtv/testsuite/libvtv.cc/derived-lib.cpp create mode 100644 libvtv/testsuite/libvtv.cc/derived-main.cpp create mode 100644 libvtv/testsuite/libvtv.cc/derived.list create mode 100644 libvtv/testsuite/libvtv.cc/dup_name.cc create mode 100644 libvtv/testsuite/libvtv.cc/environment.cc create mode 100644 libvtv/testsuite/libvtv.cc/event-defintions.cpp create mode 100644 libvtv/testsuite/libvtv.cc/event-main.cpp create mode 100644 libvtv/testsuite/libvtv.cc/event-private.cpp create mode 100644 libvtv/testsuite/libvtv.cc/event-private.h create mode 100644 libvtv/testsuite/libvtv.cc/event.h create mode 100644 libvtv/testsuite/libvtv.cc/event.list create mode 100644 libvtv/testsuite/libvtv.cc/mul_inh.cc create mode 100644 libvtv/testsuite/libvtv.cc/nested_vcall_test.cc create mode 100644 libvtv/testsuite/libvtv.cc/parts-test-extra-parts-views.cpp create mode 100644 libvtv/testsuite/libvtv.cc/parts-test-extra-parts-views.h create mode 100644 libvtv/testsuite/libvtv.cc/parts-test-extra-parts.cpp create mode 100644 libvtv/testsuite/libvtv.cc/parts-test-extra-parts.h create mode 100644 libvtv/testsuite/libvtv.cc/parts-test-main.cpp create mode 100644 libvtv/testsuite/libvtv.cc/parts-test-main.h create mode 100644 libvtv/testsuite/libvtv.cc/parts-test.list create mode 100644 libvtv/testsuite/libvtv.cc/povray-derived.cc create mode 100644 libvtv/testsuite/libvtv.cc/register_set_pair.cc create mode 100644 libvtv/testsuite/libvtv.cc/register_set_pair_inserts.cc create mode 100644 libvtv/testsuite/libvtv.cc/template-list-iostream.cc create mode 100644 libvtv/testsuite/libvtv.cc/template-list.cc create mode 100644 libvtv/testsuite/libvtv.cc/template-list2.cc create mode 100644 libvtv/testsuite/libvtv.cc/test1.cc create mode 100644 libvtv/testsuite/libvtv.cc/thunk.cc create mode 100644 libvtv/testsuite/libvtv.cc/thunk_vtable_map_attack.cc create mode 100644 libvtv/testsuite/libvtv.cc/v8-test-2.cc create mode 100644 libvtv/testsuite/libvtv.cc/virtfunc-test.cc create mode 100644 libvtv/testsuite/libvtv.cc/virtual_inheritance.cc create mode 100644 libvtv/testsuite/libvtv.cc/vtv.exp create mode 100644 libvtv/testsuite/libvtv.cc/xlan-test.cc create mode 100644 libvtv/testsuite/libvtv.mempool.cc/mempool.exp create mode 100644 libvtv/testsuite/libvtv.mempool.cc/mempool_negative.cc create mode 100644 libvtv/testsuite/libvtv.mempool.cc/mempool_positive.cc create mode 100644 libvtv/testsuite/libvtv.mt.cc/mt.exp create mode 100644 libvtv/testsuite/libvtv.mt.cc/register_set_pair_inserts_mt.cc create mode 100644 libvtv/testsuite/libvtv.mt.cc/register_set_pair_mt.cc create mode 100644 libvtv/testsuite/other-tests/Makefile.am create mode 100644 libvtv/testsuite/other-tests/Makefile.in create mode 100644 libvtv/testsuite/other-tests/README create mode 100644 libvtv/testsuite/other-tests/dlopen.cc create mode 100644 libvtv/testsuite/other-tests/dlopen_mt.cc create mode 100644 libvtv/testsuite/other-tests/environment-fail-32.s create mode 100644 libvtv/testsuite/other-tests/environment-fail-64.s create mode 100644 libvtv/testsuite/other-tests/field-test.cc create mode 100644 libvtv/testsuite/other-tests/replace-fail.cc create mode 100644 libvtv/testsuite/other-tests/so.cc create mode 100644 libvtv/testsuite/other-tests/temp_deriv.cc create mode 100644 libvtv/testsuite/other-tests/temp_deriv2.cc create mode 100644 libvtv/testsuite/other-tests/temp_deriv3.cc create mode 100644 libvtv/vtv_fail.cc create mode 100644 libvtv/vtv_fail.h create mode 100644 libvtv/vtv_malloc.cc create mode 100644 libvtv/vtv_malloc.h create mode 100644 libvtv/vtv_map.h create mode 100644 libvtv/vtv_rts.cc create mode 100644 libvtv/vtv_rts.h create mode 100644 libvtv/vtv_set.h create mode 100644 libvtv/vtv_utils.cc create mode 100644 libvtv/vtv_utils.h diff --git a/ChangeLog b/ChangeLog index 712c2c2016d..1c74171eb2e 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,57 @@ +2013-09-03 Richard Biener + + * configure.ac: Also allow ISL 0.12. + * configure: Regenerated. + +2013-08-30 Bernd Edlinger + + * MAINTAINERS (Write After Approval): Add myself. + +2013-08-27 David Malcolm + + * MAINTAINERS (gdbhooks.py): Add myself as maintainer. + +2013-08-26 Caroline Tice + + * MAINTAINERS: Correct earliers update: Move myself from libvtv + "Various Reviewers" to libvtv "Various Maintainers". + +2013-08-20 Steven Bosscher + + * MAINTAINERS: Add myself as RTL optimizers reviewer. + +2013-08-19 Benjamin De Kosnik + + * MAINTAINERS: Update name, email. + +2013-08-13 Adam Butcher + + * MAINTAINERS (Write After Approval): Add myself. + +2013-08-12 Caroline Tice + + * MAINTAINERS: Add myself as libvtv maintainer. Correct my email + address in the Write After Approval section. + +2013-08-09 Carlos O'Donell + + * MAINTAINERS (Write After Approval): Update email. + +2013-08-08 Benjamin Kosnik + + * configure.ac: Adjust to check VTV_SUPPORTED. + * configure: Regenerated. + +2013-08-02 Caroline Tice + + * configure.ac: Add target-libvtv to target_libraries; disable libvtv + on non-linux systems; add target-libvtv to noconfigdirs; add + libsupc++/.libs to C++ library search paths. + * configure: Regenerated. + * Makefile.def: Add libvtv to target_modules; make libvtv depend on + libstdc++ and libgcc. + * Makefile.in: Regenerated. + 2013-07-19 Yvan Roux * MAINTAINERS (Write After Approval): Add myself. @@ -17,12 +71,6 @@ * configure.ac: Sync from binutils. * configure: Regenerate. -2013-07-10 Jack Howarth - - PR target/57792 - * configure.ac: Use --with-sysroot=\"`xcrun --show-sdk-path`\" on darwin13 and later. - * configure: Regenerated. - 2013-06-14 Vidya Praveen * MAINTAINERS (Write After Approval): Add myself. diff --git a/ChangeLog.MELT b/ChangeLog.MELT index 72f26a4fcc5..f09fbf0569b 100644 --- a/ChangeLog.MELT +++ b/ChangeLog.MELT @@ -1,4 +1,10 @@ +2013-09-09 Basile Starynkevitch + + MELT branch merged with trunk rev 202389 using svnmerge.py; notice + that gcc/melt/xtramelt-ana-base.melt has been significantly + updated, but some updates are yet missing... + 2013-07-29 Basile Starynkevitch MELT branch merged with trunk rev 201298 using svnmerge.py diff --git a/MAINTAINERS b/MAINTAINERS index ba0234c4151..cc89c2ce01d 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -178,6 +178,7 @@ libobjc Nicola Pero nicola.pero@meta-innovation.com libobjc Andrew Pinski pinskia@gmail.com libquadmath Tobias Burnus burnus@net-b.de libquadmath Jakub Jelinek jakub@redhat.com +libvtv Caroline Tice cmtice@google.com loop discovery Michael Hayes m.hayes@elec.canterbury.ac.nz soft-fp Joseph Myers joseph@codesourcery.com scheduler (+ haifa) Jim Wilson wilson@tuliptree.org @@ -194,7 +195,7 @@ dwarf debugging code Cary Coutant ccoutant@google.com c++ runtime libs Paolo Carlini paolo.carlini@oracle.com c++ runtime libs Gabriel Dos Reis gdr@integrable-solutions.net c++ runtime libs Ulrich Drepper drepper@gmail.com -c++ runtime libs Benjamin Kosnik bkoz@redhat.com +c++ runtime libs Benjamin De Kosnik bkoz@gnu.org c++ runtime libs Loren J. Rittle ljrittle@acm.org c++ runtime libs Jonathan Wakely redi@gcc.gnu.org *synthetic multiply Torbjorn Granlund tege@swox.com @@ -256,6 +257,7 @@ testsuite Rainer Orth ro@CeBiTec.Uni-Bielefeld.DE testsuite Mike Stump mikestump@comcast.net testsuite Janis Johnson janisjo@codesourcery.com register allocation Vladimir Makarov vmakarov@redhat.com +gdbhooks.py David Malcolm dmalcolm@redhat.com Note that individuals who maintain parts of the compiler need approval to check in changes outside of the parts of the compiler they maintain. @@ -298,6 +300,7 @@ Plugin Le-Chun Wu lcwu@google.com register allocation Peter Bergner bergner@vnet.ibm.com register allocation Kenneth Zadeck zadeck@naturalbridge.com register allocation Seongbae Park seongbae.park@gmail.com +RTL optimizers Steven Bosscher steven@gcc.gnu.org selective scheduling Andrey Belevantsev abel@ispras.ru Note that while reviewers can approve changes to parts of the compiler @@ -329,13 +332,13 @@ Ian Bolton ian.bolton@arm.com Andrea Bona andrea.bona@st.com Paolo Bonzini bonzini@gnu.org Neil Booth neil@daikokuya.co.uk -Steven Bosscher steven@gcc.gnu.org Robert Bowdidge bowdidge@apple.com Joel Brobecker brobecker@gnat.com Dave Brolley brolley@redhat.com Julian Brown julian@codesourcery.com Christian Bruel christian.bruel@st.com Kevin Buettner kevinb@redhat.com +Adam Butcher adam@jessamine.co.uk Andrew Cagney cagney@redhat.com Daniel Carrera dcarrera@gmail.com Stephane Carrez stcarrez@nerim.fr @@ -359,6 +362,7 @@ Sameera Deshpande sameera.deshpande@arm.com François Dumont fdumont@gcc.gnu.org Benoit Dupont de Dinechin benoit.dupont-de-dinechin@st.com Michael Eager eager@eagercon.com +Bernd Edlinger bernd.edlinger@hotmail.de Phil Edwards pme@gcc.gnu.org Mohan Embar gnustuff@thisiscool.com Oleg Endo olegendo@gcc.gnu.org @@ -478,7 +482,7 @@ Thomas Neumann tneumann@users.sourceforge.net Dan Nicolaescu dann@ics.uci.edu Dorit Nuzman dorit@il.ibm.com David O'Brien obrien@FreeBSD.org -Carlos O'Donell carlos@codesourcery.com +Carlos O'Donell carlos@redhat.com Peter O'Gorman pogma@thewrittenword.com Andrea Ornstein andrea.ornstein@st.com Seongbae Park seongbae.park@gmail.com @@ -537,7 +541,7 @@ Chung-Lin Tang cltang@codesourcery.com Samuel Tardieu sam@rfc1149.net Dinar Temirbulatov dinar@kugelworks.com Kresten Krab Thorup krab@gcc.gnu.org -Caroline Tice ctice@apple.com +Caroline Tice cmtice@google.com Kyrylo Tkachov kyrylo.tkachov@arm.com Konrad Trifunovic konrad.trifunovic@inria.fr David Ung davidu@mips.com diff --git a/Makefile.def b/Makefile.def index 90d9653d530..3ba1a5b9a3a 100644 --- a/Makefile.def +++ b/Makefile.def @@ -4,8 +4,7 @@ AutoGen definitions Makefile.tpl; // Makefile.in is generated from Makefile.tpl by 'autogen Makefile.def'. // This file was originally written by Nathanael Nerode. // -// Copyright 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 -// Free Software Foundation +// Copyright 2002-2013 Free Software Foundation // // This file is free software; you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by @@ -123,6 +122,10 @@ target_modules = { module= libsanitizer; bootstrap=true; lib_path=.libs; raw_cxx=true; }; +target_modules = { module= libvtv; + bootstrap=true; + lib_path=.libs; + raw_cxx=true; }; target_modules = { module= libssp; lib_path=.libs; }; target_modules = { module= newlib; }; target_modules = { module= libgcc; bootstrap=true; no_check=true; }; @@ -516,6 +519,7 @@ dependencies = { module=configure-target-libobjc; on=configure-target-boehm-gc; dependencies = { module=all-target-libobjc; on=all-target-boehm-gc; }; dependencies = { module=configure-target-libstdc++-v3; on=configure-target-libgomp; }; dependencies = { module=configure-target-libsanitizer; on=all-target-libstdc++-v3; }; +dependencies = { module=configure-target-libvtv; on=all-target-libstdc++-v3; }; // parallel_list.o and parallel_settings.o depend on omp.h, which is // generated by the libgomp configure. Unfortunately, due to the use of // recursive make, we can't be that specific. @@ -526,6 +530,8 @@ dependencies = { module=install-target-libgfortran; on=install-target-libquadmat dependencies = { module=install-target-libgfortran; on=install-target-libgcc; }; dependencies = { module=install-target-libsanitizer; on=install-target-libstdc++-v3; }; dependencies = { module=install-target-libsanitizer; on=install-target-libgcc; }; +dependencies = { module=install-target-libvtv; on=install-target-libstdc++-v3; }; +dependencies = { module=install-target-libvtv; on=install-target-libgcc; }; dependencies = { module=install-target-libjava; on=install-target-libgcc; }; dependencies = { module=install-target-libitm; on=install-target-libgcc; }; dependencies = { module=install-target-libobjc; on=install-target-libgcc; }; diff --git a/Makefile.in b/Makefile.in index bfbaf03417a..a13771d40eb 100644 --- a/Makefile.in +++ b/Makefile.in @@ -575,7 +575,7 @@ all: # This is the list of directories that may be needed in RPATH_ENVVAR # so that programs built for the target machine work. -TARGET_LIB_PATH = $(TARGET_LIB_PATH_libstdc++-v3)$(TARGET_LIB_PATH_libmudflap)$(TARGET_LIB_PATH_libsanitizer)$(TARGET_LIB_PATH_libssp)$(TARGET_LIB_PATH_libgomp)$(TARGET_LIB_PATH_libitm)$(TARGET_LIB_PATH_libatomic)$(HOST_LIB_PATH_gcc) +TARGET_LIB_PATH = $(TARGET_LIB_PATH_libstdc++-v3)$(TARGET_LIB_PATH_libmudflap)$(TARGET_LIB_PATH_libsanitizer)$(TARGET_LIB_PATH_libvtv)$(TARGET_LIB_PATH_libssp)$(TARGET_LIB_PATH_libgomp)$(TARGET_LIB_PATH_libitm)$(TARGET_LIB_PATH_libatomic)$(HOST_LIB_PATH_gcc) @if target-libstdc++-v3 TARGET_LIB_PATH_libstdc++-v3 = $$r/$(TARGET_SUBDIR)/libstdc++-v3/src/.libs: @@ -589,6 +589,10 @@ TARGET_LIB_PATH_libmudflap = $$r/$(TARGET_SUBDIR)/libmudflap/.libs: TARGET_LIB_PATH_libsanitizer = $$r/$(TARGET_SUBDIR)/libsanitizer/.libs: @endif target-libsanitizer +@if target-libvtv +TARGET_LIB_PATH_libvtv = $$r/$(TARGET_SUBDIR)/libvtv/.libs: +@endif target-libvtv + @if target-libssp TARGET_LIB_PATH_libssp = $$r/$(TARGET_SUBDIR)/libssp/.libs: @endif target-libssp @@ -925,6 +929,7 @@ configure-target: \ maybe-configure-target-libstdc++-v3 \ maybe-configure-target-libmudflap \ maybe-configure-target-libsanitizer \ + maybe-configure-target-libvtv \ maybe-configure-target-libssp \ maybe-configure-target-newlib \ maybe-configure-target-libgcc \ @@ -1076,6 +1081,9 @@ all-target: maybe-all-target-libmudflap @if target-libsanitizer-no-bootstrap all-target: maybe-all-target-libsanitizer @endif target-libsanitizer-no-bootstrap +@if target-libvtv-no-bootstrap +all-target: maybe-all-target-libvtv +@endif target-libvtv-no-bootstrap all-target: maybe-all-target-libssp all-target: maybe-all-target-newlib @if target-libgcc-no-bootstrap @@ -1167,6 +1175,7 @@ info-host: maybe-info-lto-plugin info-target: maybe-info-target-libstdc++-v3 info-target: maybe-info-target-libmudflap info-target: maybe-info-target-libsanitizer +info-target: maybe-info-target-libvtv info-target: maybe-info-target-libssp info-target: maybe-info-target-newlib info-target: maybe-info-target-libgcc @@ -1249,6 +1258,7 @@ dvi-host: maybe-dvi-lto-plugin dvi-target: maybe-dvi-target-libstdc++-v3 dvi-target: maybe-dvi-target-libmudflap dvi-target: maybe-dvi-target-libsanitizer +dvi-target: maybe-dvi-target-libvtv dvi-target: maybe-dvi-target-libssp dvi-target: maybe-dvi-target-newlib dvi-target: maybe-dvi-target-libgcc @@ -1331,6 +1341,7 @@ pdf-host: maybe-pdf-lto-plugin pdf-target: maybe-pdf-target-libstdc++-v3 pdf-target: maybe-pdf-target-libmudflap pdf-target: maybe-pdf-target-libsanitizer +pdf-target: maybe-pdf-target-libvtv pdf-target: maybe-pdf-target-libssp pdf-target: maybe-pdf-target-newlib pdf-target: maybe-pdf-target-libgcc @@ -1413,6 +1424,7 @@ html-host: maybe-html-lto-plugin html-target: maybe-html-target-libstdc++-v3 html-target: maybe-html-target-libmudflap html-target: maybe-html-target-libsanitizer +html-target: maybe-html-target-libvtv html-target: maybe-html-target-libssp html-target: maybe-html-target-newlib html-target: maybe-html-target-libgcc @@ -1495,6 +1507,7 @@ TAGS-host: maybe-TAGS-lto-plugin TAGS-target: maybe-TAGS-target-libstdc++-v3 TAGS-target: maybe-TAGS-target-libmudflap TAGS-target: maybe-TAGS-target-libsanitizer +TAGS-target: maybe-TAGS-target-libvtv TAGS-target: maybe-TAGS-target-libssp TAGS-target: maybe-TAGS-target-newlib TAGS-target: maybe-TAGS-target-libgcc @@ -1577,6 +1590,7 @@ install-info-host: maybe-install-info-lto-plugin install-info-target: maybe-install-info-target-libstdc++-v3 install-info-target: maybe-install-info-target-libmudflap install-info-target: maybe-install-info-target-libsanitizer +install-info-target: maybe-install-info-target-libvtv install-info-target: maybe-install-info-target-libssp install-info-target: maybe-install-info-target-newlib install-info-target: maybe-install-info-target-libgcc @@ -1659,6 +1673,7 @@ install-pdf-host: maybe-install-pdf-lto-plugin install-pdf-target: maybe-install-pdf-target-libstdc++-v3 install-pdf-target: maybe-install-pdf-target-libmudflap install-pdf-target: maybe-install-pdf-target-libsanitizer +install-pdf-target: maybe-install-pdf-target-libvtv install-pdf-target: maybe-install-pdf-target-libssp install-pdf-target: maybe-install-pdf-target-newlib install-pdf-target: maybe-install-pdf-target-libgcc @@ -1741,6 +1756,7 @@ install-html-host: maybe-install-html-lto-plugin install-html-target: maybe-install-html-target-libstdc++-v3 install-html-target: maybe-install-html-target-libmudflap install-html-target: maybe-install-html-target-libsanitizer +install-html-target: maybe-install-html-target-libvtv install-html-target: maybe-install-html-target-libssp install-html-target: maybe-install-html-target-newlib install-html-target: maybe-install-html-target-libgcc @@ -1823,6 +1839,7 @@ installcheck-host: maybe-installcheck-lto-plugin installcheck-target: maybe-installcheck-target-libstdc++-v3 installcheck-target: maybe-installcheck-target-libmudflap installcheck-target: maybe-installcheck-target-libsanitizer +installcheck-target: maybe-installcheck-target-libvtv installcheck-target: maybe-installcheck-target-libssp installcheck-target: maybe-installcheck-target-newlib installcheck-target: maybe-installcheck-target-libgcc @@ -1905,6 +1922,7 @@ mostlyclean-host: maybe-mostlyclean-lto-plugin mostlyclean-target: maybe-mostlyclean-target-libstdc++-v3 mostlyclean-target: maybe-mostlyclean-target-libmudflap mostlyclean-target: maybe-mostlyclean-target-libsanitizer +mostlyclean-target: maybe-mostlyclean-target-libvtv mostlyclean-target: maybe-mostlyclean-target-libssp mostlyclean-target: maybe-mostlyclean-target-newlib mostlyclean-target: maybe-mostlyclean-target-libgcc @@ -1987,6 +2005,7 @@ clean-host: maybe-clean-lto-plugin clean-target: maybe-clean-target-libstdc++-v3 clean-target: maybe-clean-target-libmudflap clean-target: maybe-clean-target-libsanitizer +clean-target: maybe-clean-target-libvtv clean-target: maybe-clean-target-libssp clean-target: maybe-clean-target-newlib clean-target: maybe-clean-target-libgcc @@ -2069,6 +2088,7 @@ distclean-host: maybe-distclean-lto-plugin distclean-target: maybe-distclean-target-libstdc++-v3 distclean-target: maybe-distclean-target-libmudflap distclean-target: maybe-distclean-target-libsanitizer +distclean-target: maybe-distclean-target-libvtv distclean-target: maybe-distclean-target-libssp distclean-target: maybe-distclean-target-newlib distclean-target: maybe-distclean-target-libgcc @@ -2151,6 +2171,7 @@ maintainer-clean-host: maybe-maintainer-clean-lto-plugin maintainer-clean-target: maybe-maintainer-clean-target-libstdc++-v3 maintainer-clean-target: maybe-maintainer-clean-target-libmudflap maintainer-clean-target: maybe-maintainer-clean-target-libsanitizer +maintainer-clean-target: maybe-maintainer-clean-target-libvtv maintainer-clean-target: maybe-maintainer-clean-target-libssp maintainer-clean-target: maybe-maintainer-clean-target-newlib maintainer-clean-target: maybe-maintainer-clean-target-libgcc @@ -2288,6 +2309,7 @@ check-target: \ maybe-check-target-libstdc++-v3 \ maybe-check-target-libmudflap \ maybe-check-target-libsanitizer \ + maybe-check-target-libvtv \ maybe-check-target-libssp \ maybe-check-target-newlib \ maybe-check-target-libgcc \ @@ -2443,6 +2465,7 @@ install-target: \ maybe-install-target-libstdc++-v3 \ maybe-install-target-libmudflap \ maybe-install-target-libsanitizer \ + maybe-install-target-libvtv \ maybe-install-target-libssp \ maybe-install-target-newlib \ maybe-install-target-libgcc \ @@ -2545,6 +2568,7 @@ install-strip-target: \ maybe-install-strip-target-libstdc++-v3 \ maybe-install-strip-target-libmudflap \ maybe-install-strip-target-libsanitizer \ + maybe-install-strip-target-libvtv \ maybe-install-strip-target-libssp \ maybe-install-strip-target-newlib \ maybe-install-strip-target-libgcc \ @@ -33152,6 +33176,980 @@ maintainer-clean-target-libsanitizer: +.PHONY: configure-target-libvtv maybe-configure-target-libvtv +maybe-configure-target-libvtv: +@if gcc-bootstrap +configure-target-libvtv: stage_current +@endif gcc-bootstrap +@if target-libvtv +maybe-configure-target-libvtv: configure-target-libvtv +configure-target-libvtv: + @r=`${PWD_COMMAND}`; export r; \ + s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \ + echo "Checking multilib configuration for libvtv..."; \ + $(SHELL) $(srcdir)/mkinstalldirs $(TARGET_SUBDIR)/libvtv ; \ + $(CC_FOR_TARGET) --print-multi-lib > $(TARGET_SUBDIR)/libvtv/multilib.tmp 2> /dev/null ; \ + if test -r $(TARGET_SUBDIR)/libvtv/multilib.out; then \ + if cmp -s $(TARGET_SUBDIR)/libvtv/multilib.tmp $(TARGET_SUBDIR)/libvtv/multilib.out; then \ + rm -f $(TARGET_SUBDIR)/libvtv/multilib.tmp; \ + else \ + rm -f $(TARGET_SUBDIR)/libvtv/Makefile; \ + mv $(TARGET_SUBDIR)/libvtv/multilib.tmp $(TARGET_SUBDIR)/libvtv/multilib.out; \ + fi; \ + else \ + mv $(TARGET_SUBDIR)/libvtv/multilib.tmp $(TARGET_SUBDIR)/libvtv/multilib.out; \ + fi; \ + test ! -f $(TARGET_SUBDIR)/libvtv/Makefile || exit 0; \ + $(SHELL) $(srcdir)/mkinstalldirs $(TARGET_SUBDIR)/libvtv ; \ + $(RAW_CXX_TARGET_EXPORTS) \ + echo Configuring in $(TARGET_SUBDIR)/libvtv; \ + cd "$(TARGET_SUBDIR)/libvtv" || exit 1; \ + case $(srcdir) in \ + /* | [A-Za-z]:[\\/]*) topdir=$(srcdir) ;; \ + *) topdir=`echo $(TARGET_SUBDIR)/libvtv/ | \ + sed -e 's,\./,,g' -e 's,[^/]*/,../,g' `$(srcdir) ;; \ + esac; \ + srcdiroption="--srcdir=$${topdir}/libvtv"; \ + libsrcdir="$$s/libvtv"; \ + rm -f no-such-file || : ; \ + CONFIG_SITE=no-such-file $(SHELL) $${libsrcdir}/configure \ + $(TARGET_CONFIGARGS) --build=${build_alias} --host=${target_alias} \ + --target=${target_alias} $${srcdiroption} \ + || exit 1 +@endif target-libvtv + + + +.PHONY: configure-stage1-target-libvtv maybe-configure-stage1-target-libvtv +maybe-configure-stage1-target-libvtv: +@if target-libvtv-bootstrap +maybe-configure-stage1-target-libvtv: configure-stage1-target-libvtv +configure-stage1-target-libvtv: + @[ $(current_stage) = stage1 ] || $(MAKE) stage1-start + @$(SHELL) $(srcdir)/mkinstalldirs $(TARGET_SUBDIR)/libvtv + @r=`${PWD_COMMAND}`; export r; \ + s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \ + TFLAGS="$(STAGE1_TFLAGS)"; \ + echo "Checking multilib configuration for libvtv..."; \ + $(CC_FOR_TARGET) --print-multi-lib > $(TARGET_SUBDIR)/libvtv/multilib.tmp 2> /dev/null ; \ + if test -r $(TARGET_SUBDIR)/libvtv/multilib.out; then \ + if cmp -s $(TARGET_SUBDIR)/libvtv/multilib.tmp $(TARGET_SUBDIR)/libvtv/multilib.out; then \ + rm -f $(TARGET_SUBDIR)/libvtv/multilib.tmp; \ + else \ + rm -f $(TARGET_SUBDIR)/libvtv/Makefile; \ + mv $(TARGET_SUBDIR)/libvtv/multilib.tmp $(TARGET_SUBDIR)/libvtv/multilib.out; \ + fi; \ + else \ + mv $(TARGET_SUBDIR)/libvtv/multilib.tmp $(TARGET_SUBDIR)/libvtv/multilib.out; \ + fi; \ + test ! -f $(TARGET_SUBDIR)/libvtv/Makefile || exit 0; \ + $(RAW_CXX_TARGET_EXPORTS) \ + CFLAGS="$(CFLAGS_FOR_TARGET)"; export CFLAGS; \ + CXXFLAGS="$(CXXFLAGS_FOR_TARGET)"; export CXXFLAGS; \ + LIBCFLAGS="$(LIBCFLAGS_FOR_TARGET)"; export LIBCFLAGS; \ + echo Configuring stage 1 in $(TARGET_SUBDIR)/libvtv ; \ + $(SHELL) $(srcdir)/mkinstalldirs $(TARGET_SUBDIR)/libvtv ; \ + cd $(TARGET_SUBDIR)/libvtv || exit 1; \ + case $(srcdir) in \ + /* | [A-Za-z]:[\\/]*) topdir=$(srcdir) ;; \ + *) topdir=`echo $(TARGET_SUBDIR)/libvtv/ | \ + sed -e 's,\./,,g' -e 's,[^/]*/,../,g' `$(srcdir) ;; \ + esac; \ + srcdiroption="--srcdir=$${topdir}/libvtv"; \ + libsrcdir="$$s/libvtv"; \ + $(SHELL) $${libsrcdir}/configure \ + $(TARGET_CONFIGARGS) --build=${build_alias} --host=${target_alias} \ + --target=${target_alias} $${srcdiroption} \ + $(STAGE1_CONFIGURE_FLAGS) +@endif target-libvtv-bootstrap + +.PHONY: configure-stage2-target-libvtv maybe-configure-stage2-target-libvtv +maybe-configure-stage2-target-libvtv: +@if target-libvtv-bootstrap +maybe-configure-stage2-target-libvtv: configure-stage2-target-libvtv +configure-stage2-target-libvtv: + @[ $(current_stage) = stage2 ] || $(MAKE) stage2-start + @$(SHELL) $(srcdir)/mkinstalldirs $(TARGET_SUBDIR)/libvtv + @r=`${PWD_COMMAND}`; export r; \ + s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \ + TFLAGS="$(STAGE2_TFLAGS)"; \ + echo "Checking multilib configuration for libvtv..."; \ + $(CC_FOR_TARGET) --print-multi-lib > $(TARGET_SUBDIR)/libvtv/multilib.tmp 2> /dev/null ; \ + if test -r $(TARGET_SUBDIR)/libvtv/multilib.out; then \ + if cmp -s $(TARGET_SUBDIR)/libvtv/multilib.tmp $(TARGET_SUBDIR)/libvtv/multilib.out; then \ + rm -f $(TARGET_SUBDIR)/libvtv/multilib.tmp; \ + else \ + rm -f $(TARGET_SUBDIR)/libvtv/Makefile; \ + mv $(TARGET_SUBDIR)/libvtv/multilib.tmp $(TARGET_SUBDIR)/libvtv/multilib.out; \ + fi; \ + else \ + mv $(TARGET_SUBDIR)/libvtv/multilib.tmp $(TARGET_SUBDIR)/libvtv/multilib.out; \ + fi; \ + test ! -f $(TARGET_SUBDIR)/libvtv/Makefile || exit 0; \ + $(RAW_CXX_TARGET_EXPORTS) \ + \ + CFLAGS="$(CFLAGS_FOR_TARGET)"; export CFLAGS; \ + CXXFLAGS="$(CXXFLAGS_FOR_TARGET)"; export CXXFLAGS; \ + LIBCFLAGS="$(LIBCFLAGS_FOR_TARGET)"; export LIBCFLAGS; \ + echo Configuring stage 2 in $(TARGET_SUBDIR)/libvtv ; \ + $(SHELL) $(srcdir)/mkinstalldirs $(TARGET_SUBDIR)/libvtv ; \ + cd $(TARGET_SUBDIR)/libvtv || exit 1; \ + case $(srcdir) in \ + /* | [A-Za-z]:[\\/]*) topdir=$(srcdir) ;; \ + *) topdir=`echo $(TARGET_SUBDIR)/libvtv/ | \ + sed -e 's,\./,,g' -e 's,[^/]*/,../,g' `$(srcdir) ;; \ + esac; \ + srcdiroption="--srcdir=$${topdir}/libvtv"; \ + libsrcdir="$$s/libvtv"; \ + $(SHELL) $${libsrcdir}/configure \ + $(TARGET_CONFIGARGS) --build=${build_alias} --host=${target_alias} \ + --target=${target_alias} $${srcdiroption} \ + --with-build-libsubdir=$(HOST_SUBDIR) \ + $(STAGE2_CONFIGURE_FLAGS) +@endif target-libvtv-bootstrap + +.PHONY: configure-stage3-target-libvtv maybe-configure-stage3-target-libvtv +maybe-configure-stage3-target-libvtv: +@if target-libvtv-bootstrap +maybe-configure-stage3-target-libvtv: configure-stage3-target-libvtv +configure-stage3-target-libvtv: + @[ $(current_stage) = stage3 ] || $(MAKE) stage3-start + @$(SHELL) $(srcdir)/mkinstalldirs $(TARGET_SUBDIR)/libvtv + @r=`${PWD_COMMAND}`; export r; \ + s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \ + TFLAGS="$(STAGE3_TFLAGS)"; \ + echo "Checking multilib configuration for libvtv..."; \ + $(CC_FOR_TARGET) --print-multi-lib > $(TARGET_SUBDIR)/libvtv/multilib.tmp 2> /dev/null ; \ + if test -r $(TARGET_SUBDIR)/libvtv/multilib.out; then \ + if cmp -s $(TARGET_SUBDIR)/libvtv/multilib.tmp $(TARGET_SUBDIR)/libvtv/multilib.out; then \ + rm -f $(TARGET_SUBDIR)/libvtv/multilib.tmp; \ + else \ + rm -f $(TARGET_SUBDIR)/libvtv/Makefile; \ + mv $(TARGET_SUBDIR)/libvtv/multilib.tmp $(TARGET_SUBDIR)/libvtv/multilib.out; \ + fi; \ + else \ + mv $(TARGET_SUBDIR)/libvtv/multilib.tmp $(TARGET_SUBDIR)/libvtv/multilib.out; \ + fi; \ + test ! -f $(TARGET_SUBDIR)/libvtv/Makefile || exit 0; \ + $(RAW_CXX_TARGET_EXPORTS) \ + \ + CFLAGS="$(CFLAGS_FOR_TARGET)"; export CFLAGS; \ + CXXFLAGS="$(CXXFLAGS_FOR_TARGET)"; export CXXFLAGS; \ + LIBCFLAGS="$(LIBCFLAGS_FOR_TARGET)"; export LIBCFLAGS; \ + echo Configuring stage 3 in $(TARGET_SUBDIR)/libvtv ; \ + $(SHELL) $(srcdir)/mkinstalldirs $(TARGET_SUBDIR)/libvtv ; \ + cd $(TARGET_SUBDIR)/libvtv || exit 1; \ + case $(srcdir) in \ + /* | [A-Za-z]:[\\/]*) topdir=$(srcdir) ;; \ + *) topdir=`echo $(TARGET_SUBDIR)/libvtv/ | \ + sed -e 's,\./,,g' -e 's,[^/]*/,../,g' `$(srcdir) ;; \ + esac; \ + srcdiroption="--srcdir=$${topdir}/libvtv"; \ + libsrcdir="$$s/libvtv"; \ + $(SHELL) $${libsrcdir}/configure \ + $(TARGET_CONFIGARGS) --build=${build_alias} --host=${target_alias} \ + --target=${target_alias} $${srcdiroption} \ + --with-build-libsubdir=$(HOST_SUBDIR) \ + $(STAGE3_CONFIGURE_FLAGS) +@endif target-libvtv-bootstrap + +.PHONY: configure-stage4-target-libvtv maybe-configure-stage4-target-libvtv +maybe-configure-stage4-target-libvtv: +@if target-libvtv-bootstrap +maybe-configure-stage4-target-libvtv: configure-stage4-target-libvtv +configure-stage4-target-libvtv: + @[ $(current_stage) = stage4 ] || $(MAKE) stage4-start + @$(SHELL) $(srcdir)/mkinstalldirs $(TARGET_SUBDIR)/libvtv + @r=`${PWD_COMMAND}`; export r; \ + s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \ + TFLAGS="$(STAGE4_TFLAGS)"; \ + echo "Checking multilib configuration for libvtv..."; \ + $(CC_FOR_TARGET) --print-multi-lib > $(TARGET_SUBDIR)/libvtv/multilib.tmp 2> /dev/null ; \ + if test -r $(TARGET_SUBDIR)/libvtv/multilib.out; then \ + if cmp -s $(TARGET_SUBDIR)/libvtv/multilib.tmp $(TARGET_SUBDIR)/libvtv/multilib.out; then \ + rm -f $(TARGET_SUBDIR)/libvtv/multilib.tmp; \ + else \ + rm -f $(TARGET_SUBDIR)/libvtv/Makefile; \ + mv $(TARGET_SUBDIR)/libvtv/multilib.tmp $(TARGET_SUBDIR)/libvtv/multilib.out; \ + fi; \ + else \ + mv $(TARGET_SUBDIR)/libvtv/multilib.tmp $(TARGET_SUBDIR)/libvtv/multilib.out; \ + fi; \ + test ! -f $(TARGET_SUBDIR)/libvtv/Makefile || exit 0; \ + $(RAW_CXX_TARGET_EXPORTS) \ + \ + CFLAGS="$(CFLAGS_FOR_TARGET)"; export CFLAGS; \ + CXXFLAGS="$(CXXFLAGS_FOR_TARGET)"; export CXXFLAGS; \ + LIBCFLAGS="$(LIBCFLAGS_FOR_TARGET)"; export LIBCFLAGS; \ + echo Configuring stage 4 in $(TARGET_SUBDIR)/libvtv ; \ + $(SHELL) $(srcdir)/mkinstalldirs $(TARGET_SUBDIR)/libvtv ; \ + cd $(TARGET_SUBDIR)/libvtv || exit 1; \ + case $(srcdir) in \ + /* | [A-Za-z]:[\\/]*) topdir=$(srcdir) ;; \ + *) topdir=`echo $(TARGET_SUBDIR)/libvtv/ | \ + sed -e 's,\./,,g' -e 's,[^/]*/,../,g' `$(srcdir) ;; \ + esac; \ + srcdiroption="--srcdir=$${topdir}/libvtv"; \ + libsrcdir="$$s/libvtv"; \ + $(SHELL) $${libsrcdir}/configure \ + $(TARGET_CONFIGARGS) --build=${build_alias} --host=${target_alias} \ + --target=${target_alias} $${srcdiroption} \ + --with-build-libsubdir=$(HOST_SUBDIR) \ + $(STAGE4_CONFIGURE_FLAGS) +@endif target-libvtv-bootstrap + +.PHONY: configure-stageprofile-target-libvtv maybe-configure-stageprofile-target-libvtv +maybe-configure-stageprofile-target-libvtv: +@if target-libvtv-bootstrap +maybe-configure-stageprofile-target-libvtv: configure-stageprofile-target-libvtv +configure-stageprofile-target-libvtv: + @[ $(current_stage) = stageprofile ] || $(MAKE) stageprofile-start + @$(SHELL) $(srcdir)/mkinstalldirs $(TARGET_SUBDIR)/libvtv + @r=`${PWD_COMMAND}`; export r; \ + s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \ + TFLAGS="$(STAGEprofile_TFLAGS)"; \ + echo "Checking multilib configuration for libvtv..."; \ + $(CC_FOR_TARGET) --print-multi-lib > $(TARGET_SUBDIR)/libvtv/multilib.tmp 2> /dev/null ; \ + if test -r $(TARGET_SUBDIR)/libvtv/multilib.out; then \ + if cmp -s $(TARGET_SUBDIR)/libvtv/multilib.tmp $(TARGET_SUBDIR)/libvtv/multilib.out; then \ + rm -f $(TARGET_SUBDIR)/libvtv/multilib.tmp; \ + else \ + rm -f $(TARGET_SUBDIR)/libvtv/Makefile; \ + mv $(TARGET_SUBDIR)/libvtv/multilib.tmp $(TARGET_SUBDIR)/libvtv/multilib.out; \ + fi; \ + else \ + mv $(TARGET_SUBDIR)/libvtv/multilib.tmp $(TARGET_SUBDIR)/libvtv/multilib.out; \ + fi; \ + test ! -f $(TARGET_SUBDIR)/libvtv/Makefile || exit 0; \ + $(RAW_CXX_TARGET_EXPORTS) \ + \ + CFLAGS="$(CFLAGS_FOR_TARGET)"; export CFLAGS; \ + CXXFLAGS="$(CXXFLAGS_FOR_TARGET)"; export CXXFLAGS; \ + LIBCFLAGS="$(LIBCFLAGS_FOR_TARGET)"; export LIBCFLAGS; \ + echo Configuring stage profile in $(TARGET_SUBDIR)/libvtv ; \ + $(SHELL) $(srcdir)/mkinstalldirs $(TARGET_SUBDIR)/libvtv ; \ + cd $(TARGET_SUBDIR)/libvtv || exit 1; \ + case $(srcdir) in \ + /* | [A-Za-z]:[\\/]*) topdir=$(srcdir) ;; \ + *) topdir=`echo $(TARGET_SUBDIR)/libvtv/ | \ + sed -e 's,\./,,g' -e 's,[^/]*/,../,g' `$(srcdir) ;; \ + esac; \ + srcdiroption="--srcdir=$${topdir}/libvtv"; \ + libsrcdir="$$s/libvtv"; \ + $(SHELL) $${libsrcdir}/configure \ + $(TARGET_CONFIGARGS) --build=${build_alias} --host=${target_alias} \ + --target=${target_alias} $${srcdiroption} \ + --with-build-libsubdir=$(HOST_SUBDIR) \ + $(STAGEprofile_CONFIGURE_FLAGS) +@endif target-libvtv-bootstrap + +.PHONY: configure-stagefeedback-target-libvtv maybe-configure-stagefeedback-target-libvtv +maybe-configure-stagefeedback-target-libvtv: +@if target-libvtv-bootstrap +maybe-configure-stagefeedback-target-libvtv: configure-stagefeedback-target-libvtv +configure-stagefeedback-target-libvtv: + @[ $(current_stage) = stagefeedback ] || $(MAKE) stagefeedback-start + @$(SHELL) $(srcdir)/mkinstalldirs $(TARGET_SUBDIR)/libvtv + @r=`${PWD_COMMAND}`; export r; \ + s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \ + TFLAGS="$(STAGEfeedback_TFLAGS)"; \ + echo "Checking multilib configuration for libvtv..."; \ + $(CC_FOR_TARGET) --print-multi-lib > $(TARGET_SUBDIR)/libvtv/multilib.tmp 2> /dev/null ; \ + if test -r $(TARGET_SUBDIR)/libvtv/multilib.out; then \ + if cmp -s $(TARGET_SUBDIR)/libvtv/multilib.tmp $(TARGET_SUBDIR)/libvtv/multilib.out; then \ + rm -f $(TARGET_SUBDIR)/libvtv/multilib.tmp; \ + else \ + rm -f $(TARGET_SUBDIR)/libvtv/Makefile; \ + mv $(TARGET_SUBDIR)/libvtv/multilib.tmp $(TARGET_SUBDIR)/libvtv/multilib.out; \ + fi; \ + else \ + mv $(TARGET_SUBDIR)/libvtv/multilib.tmp $(TARGET_SUBDIR)/libvtv/multilib.out; \ + fi; \ + test ! -f $(TARGET_SUBDIR)/libvtv/Makefile || exit 0; \ + $(RAW_CXX_TARGET_EXPORTS) \ + \ + CFLAGS="$(CFLAGS_FOR_TARGET)"; export CFLAGS; \ + CXXFLAGS="$(CXXFLAGS_FOR_TARGET)"; export CXXFLAGS; \ + LIBCFLAGS="$(LIBCFLAGS_FOR_TARGET)"; export LIBCFLAGS; \ + echo Configuring stage feedback in $(TARGET_SUBDIR)/libvtv ; \ + $(SHELL) $(srcdir)/mkinstalldirs $(TARGET_SUBDIR)/libvtv ; \ + cd $(TARGET_SUBDIR)/libvtv || exit 1; \ + case $(srcdir) in \ + /* | [A-Za-z]:[\\/]*) topdir=$(srcdir) ;; \ + *) topdir=`echo $(TARGET_SUBDIR)/libvtv/ | \ + sed -e 's,\./,,g' -e 's,[^/]*/,../,g' `$(srcdir) ;; \ + esac; \ + srcdiroption="--srcdir=$${topdir}/libvtv"; \ + libsrcdir="$$s/libvtv"; \ + $(SHELL) $${libsrcdir}/configure \ + $(TARGET_CONFIGARGS) --build=${build_alias} --host=${target_alias} \ + --target=${target_alias} $${srcdiroption} \ + --with-build-libsubdir=$(HOST_SUBDIR) \ + $(STAGEfeedback_CONFIGURE_FLAGS) +@endif target-libvtv-bootstrap + + + + + +.PHONY: all-target-libvtv maybe-all-target-libvtv +maybe-all-target-libvtv: +@if gcc-bootstrap +all-target-libvtv: stage_current +@endif gcc-bootstrap +@if target-libvtv +TARGET-target-libvtv=all +maybe-all-target-libvtv: all-target-libvtv +all-target-libvtv: configure-target-libvtv + @r=`${PWD_COMMAND}`; export r; \ + s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \ + $(RAW_CXX_TARGET_EXPORTS) \ + (cd $(TARGET_SUBDIR)/libvtv && \ + $(MAKE) $(BASE_FLAGS_TO_PASS) $(EXTRA_TARGET_FLAGS) 'CXX=$$(RAW_CXX_FOR_TARGET)' 'CXX_FOR_TARGET=$$(RAW_CXX_FOR_TARGET)' \ + $(TARGET-target-libvtv)) +@endif target-libvtv + + + +.PHONY: all-stage1-target-libvtv maybe-all-stage1-target-libvtv +.PHONY: clean-stage1-target-libvtv maybe-clean-stage1-target-libvtv +maybe-all-stage1-target-libvtv: +maybe-clean-stage1-target-libvtv: +@if target-libvtv-bootstrap +maybe-all-stage1-target-libvtv: all-stage1-target-libvtv +all-stage1: all-stage1-target-libvtv +TARGET-stage1-target-libvtv = $(TARGET-target-libvtv) +all-stage1-target-libvtv: configure-stage1-target-libvtv + @[ $(current_stage) = stage1 ] || $(MAKE) stage1-start + @r=`${PWD_COMMAND}`; export r; \ + s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \ + TFLAGS="$(STAGE1_TFLAGS)"; \ + $(RAW_CXX_TARGET_EXPORTS) \ + cd $(TARGET_SUBDIR)/libvtv && \ + $(MAKE) $(BASE_FLAGS_TO_PASS) \ + CFLAGS="$(CFLAGS_FOR_TARGET)" \ + CXXFLAGS="$(CXXFLAGS_FOR_TARGET)" \ + LIBCFLAGS="$(LIBCFLAGS_FOR_TARGET)" \ + CFLAGS_FOR_TARGET="$(CFLAGS_FOR_TARGET)" \ + CXXFLAGS_FOR_TARGET="$(CXXFLAGS_FOR_TARGET)" \ + LIBCFLAGS_FOR_TARGET="$(LIBCFLAGS_FOR_TARGET)" \ + $(EXTRA_TARGET_FLAGS) 'CXX=$$(RAW_CXX_FOR_TARGET)' 'CXX_FOR_TARGET=$$(RAW_CXX_FOR_TARGET)' \ + \ + TFLAGS="$(STAGE1_TFLAGS)" \ + $(TARGET-stage1-target-libvtv) + +maybe-clean-stage1-target-libvtv: clean-stage1-target-libvtv +clean-stage1: clean-stage1-target-libvtv +clean-stage1-target-libvtv: + @if [ $(current_stage) = stage1 ]; then \ + [ -f $(TARGET_SUBDIR)/libvtv/Makefile ] || exit 0; \ + else \ + [ -f $(TARGET_SUBDIR)/stage1-libvtv/Makefile ] || exit 0; \ + $(MAKE) stage1-start; \ + fi; \ + cd $(TARGET_SUBDIR)/libvtv && \ + $(MAKE) $(EXTRA_TARGET_FLAGS) 'CXX=$$(RAW_CXX_FOR_TARGET)' 'CXX_FOR_TARGET=$$(RAW_CXX_FOR_TARGET)' \ + clean +@endif target-libvtv-bootstrap + + +.PHONY: all-stage2-target-libvtv maybe-all-stage2-target-libvtv +.PHONY: clean-stage2-target-libvtv maybe-clean-stage2-target-libvtv +maybe-all-stage2-target-libvtv: +maybe-clean-stage2-target-libvtv: +@if target-libvtv-bootstrap +maybe-all-stage2-target-libvtv: all-stage2-target-libvtv +all-stage2: all-stage2-target-libvtv +TARGET-stage2-target-libvtv = $(TARGET-target-libvtv) +all-stage2-target-libvtv: configure-stage2-target-libvtv + @[ $(current_stage) = stage2 ] || $(MAKE) stage2-start + @r=`${PWD_COMMAND}`; export r; \ + s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \ + TFLAGS="$(STAGE2_TFLAGS)"; \ + $(RAW_CXX_TARGET_EXPORTS) \ + \ + cd $(TARGET_SUBDIR)/libvtv && \ + $(MAKE) $(BASE_FLAGS_TO_PASS) \ + CFLAGS="$(CFLAGS_FOR_TARGET)" \ + CXXFLAGS="$(CXXFLAGS_FOR_TARGET)" \ + LIBCFLAGS="$(LIBCFLAGS_FOR_TARGET)" \ + CFLAGS_FOR_TARGET="$(CFLAGS_FOR_TARGET)" \ + CXXFLAGS_FOR_TARGET="$(CXXFLAGS_FOR_TARGET)" \ + LIBCFLAGS_FOR_TARGET="$(LIBCFLAGS_FOR_TARGET)" \ + $(EXTRA_TARGET_FLAGS) 'CXX=$$(RAW_CXX_FOR_TARGET)' 'CXX_FOR_TARGET=$$(RAW_CXX_FOR_TARGET)' \ + TFLAGS="$(STAGE2_TFLAGS)" \ + $(TARGET-stage2-target-libvtv) + +maybe-clean-stage2-target-libvtv: clean-stage2-target-libvtv +clean-stage2: clean-stage2-target-libvtv +clean-stage2-target-libvtv: + @if [ $(current_stage) = stage2 ]; then \ + [ -f $(TARGET_SUBDIR)/libvtv/Makefile ] || exit 0; \ + else \ + [ -f $(TARGET_SUBDIR)/stage2-libvtv/Makefile ] || exit 0; \ + $(MAKE) stage2-start; \ + fi; \ + cd $(TARGET_SUBDIR)/libvtv && \ + $(MAKE) $(EXTRA_TARGET_FLAGS) 'CXX=$$(RAW_CXX_FOR_TARGET)' 'CXX_FOR_TARGET=$$(RAW_CXX_FOR_TARGET)' clean +@endif target-libvtv-bootstrap + + +.PHONY: all-stage3-target-libvtv maybe-all-stage3-target-libvtv +.PHONY: clean-stage3-target-libvtv maybe-clean-stage3-target-libvtv +maybe-all-stage3-target-libvtv: +maybe-clean-stage3-target-libvtv: +@if target-libvtv-bootstrap +maybe-all-stage3-target-libvtv: all-stage3-target-libvtv +all-stage3: all-stage3-target-libvtv +TARGET-stage3-target-libvtv = $(TARGET-target-libvtv) +all-stage3-target-libvtv: configure-stage3-target-libvtv + @[ $(current_stage) = stage3 ] || $(MAKE) stage3-start + @r=`${PWD_COMMAND}`; export r; \ + s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \ + TFLAGS="$(STAGE3_TFLAGS)"; \ + $(RAW_CXX_TARGET_EXPORTS) \ + \ + cd $(TARGET_SUBDIR)/libvtv && \ + $(MAKE) $(BASE_FLAGS_TO_PASS) \ + CFLAGS="$(CFLAGS_FOR_TARGET)" \ + CXXFLAGS="$(CXXFLAGS_FOR_TARGET)" \ + LIBCFLAGS="$(LIBCFLAGS_FOR_TARGET)" \ + CFLAGS_FOR_TARGET="$(CFLAGS_FOR_TARGET)" \ + CXXFLAGS_FOR_TARGET="$(CXXFLAGS_FOR_TARGET)" \ + LIBCFLAGS_FOR_TARGET="$(LIBCFLAGS_FOR_TARGET)" \ + $(EXTRA_TARGET_FLAGS) 'CXX=$$(RAW_CXX_FOR_TARGET)' 'CXX_FOR_TARGET=$$(RAW_CXX_FOR_TARGET)' \ + TFLAGS="$(STAGE3_TFLAGS)" \ + $(TARGET-stage3-target-libvtv) + +maybe-clean-stage3-target-libvtv: clean-stage3-target-libvtv +clean-stage3: clean-stage3-target-libvtv +clean-stage3-target-libvtv: + @if [ $(current_stage) = stage3 ]; then \ + [ -f $(TARGET_SUBDIR)/libvtv/Makefile ] || exit 0; \ + else \ + [ -f $(TARGET_SUBDIR)/stage3-libvtv/Makefile ] || exit 0; \ + $(MAKE) stage3-start; \ + fi; \ + cd $(TARGET_SUBDIR)/libvtv && \ + $(MAKE) $(EXTRA_TARGET_FLAGS) 'CXX=$$(RAW_CXX_FOR_TARGET)' 'CXX_FOR_TARGET=$$(RAW_CXX_FOR_TARGET)' clean +@endif target-libvtv-bootstrap + + +.PHONY: all-stage4-target-libvtv maybe-all-stage4-target-libvtv +.PHONY: clean-stage4-target-libvtv maybe-clean-stage4-target-libvtv +maybe-all-stage4-target-libvtv: +maybe-clean-stage4-target-libvtv: +@if target-libvtv-bootstrap +maybe-all-stage4-target-libvtv: all-stage4-target-libvtv +all-stage4: all-stage4-target-libvtv +TARGET-stage4-target-libvtv = $(TARGET-target-libvtv) +all-stage4-target-libvtv: configure-stage4-target-libvtv + @[ $(current_stage) = stage4 ] || $(MAKE) stage4-start + @r=`${PWD_COMMAND}`; export r; \ + s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \ + TFLAGS="$(STAGE4_TFLAGS)"; \ + $(RAW_CXX_TARGET_EXPORTS) \ + \ + cd $(TARGET_SUBDIR)/libvtv && \ + $(MAKE) $(BASE_FLAGS_TO_PASS) \ + CFLAGS="$(CFLAGS_FOR_TARGET)" \ + CXXFLAGS="$(CXXFLAGS_FOR_TARGET)" \ + LIBCFLAGS="$(LIBCFLAGS_FOR_TARGET)" \ + CFLAGS_FOR_TARGET="$(CFLAGS_FOR_TARGET)" \ + CXXFLAGS_FOR_TARGET="$(CXXFLAGS_FOR_TARGET)" \ + LIBCFLAGS_FOR_TARGET="$(LIBCFLAGS_FOR_TARGET)" \ + $(EXTRA_TARGET_FLAGS) 'CXX=$$(RAW_CXX_FOR_TARGET)' 'CXX_FOR_TARGET=$$(RAW_CXX_FOR_TARGET)' \ + TFLAGS="$(STAGE4_TFLAGS)" \ + $(TARGET-stage4-target-libvtv) + +maybe-clean-stage4-target-libvtv: clean-stage4-target-libvtv +clean-stage4: clean-stage4-target-libvtv +clean-stage4-target-libvtv: + @if [ $(current_stage) = stage4 ]; then \ + [ -f $(TARGET_SUBDIR)/libvtv/Makefile ] || exit 0; \ + else \ + [ -f $(TARGET_SUBDIR)/stage4-libvtv/Makefile ] || exit 0; \ + $(MAKE) stage4-start; \ + fi; \ + cd $(TARGET_SUBDIR)/libvtv && \ + $(MAKE) $(EXTRA_TARGET_FLAGS) 'CXX=$$(RAW_CXX_FOR_TARGET)' 'CXX_FOR_TARGET=$$(RAW_CXX_FOR_TARGET)' clean +@endif target-libvtv-bootstrap + + +.PHONY: all-stageprofile-target-libvtv maybe-all-stageprofile-target-libvtv +.PHONY: clean-stageprofile-target-libvtv maybe-clean-stageprofile-target-libvtv +maybe-all-stageprofile-target-libvtv: +maybe-clean-stageprofile-target-libvtv: +@if target-libvtv-bootstrap +maybe-all-stageprofile-target-libvtv: all-stageprofile-target-libvtv +all-stageprofile: all-stageprofile-target-libvtv +TARGET-stageprofile-target-libvtv = $(TARGET-target-libvtv) +all-stageprofile-target-libvtv: configure-stageprofile-target-libvtv + @[ $(current_stage) = stageprofile ] || $(MAKE) stageprofile-start + @r=`${PWD_COMMAND}`; export r; \ + s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \ + TFLAGS="$(STAGEprofile_TFLAGS)"; \ + $(RAW_CXX_TARGET_EXPORTS) \ + \ + cd $(TARGET_SUBDIR)/libvtv && \ + $(MAKE) $(BASE_FLAGS_TO_PASS) \ + CFLAGS="$(CFLAGS_FOR_TARGET)" \ + CXXFLAGS="$(CXXFLAGS_FOR_TARGET)" \ + LIBCFLAGS="$(LIBCFLAGS_FOR_TARGET)" \ + CFLAGS_FOR_TARGET="$(CFLAGS_FOR_TARGET)" \ + CXXFLAGS_FOR_TARGET="$(CXXFLAGS_FOR_TARGET)" \ + LIBCFLAGS_FOR_TARGET="$(LIBCFLAGS_FOR_TARGET)" \ + $(EXTRA_TARGET_FLAGS) 'CXX=$$(RAW_CXX_FOR_TARGET)' 'CXX_FOR_TARGET=$$(RAW_CXX_FOR_TARGET)' \ + TFLAGS="$(STAGEprofile_TFLAGS)" \ + $(TARGET-stageprofile-target-libvtv) + +maybe-clean-stageprofile-target-libvtv: clean-stageprofile-target-libvtv +clean-stageprofile: clean-stageprofile-target-libvtv +clean-stageprofile-target-libvtv: + @if [ $(current_stage) = stageprofile ]; then \ + [ -f $(TARGET_SUBDIR)/libvtv/Makefile ] || exit 0; \ + else \ + [ -f $(TARGET_SUBDIR)/stageprofile-libvtv/Makefile ] || exit 0; \ + $(MAKE) stageprofile-start; \ + fi; \ + cd $(TARGET_SUBDIR)/libvtv && \ + $(MAKE) $(EXTRA_TARGET_FLAGS) 'CXX=$$(RAW_CXX_FOR_TARGET)' 'CXX_FOR_TARGET=$$(RAW_CXX_FOR_TARGET)' clean +@endif target-libvtv-bootstrap + + +.PHONY: all-stagefeedback-target-libvtv maybe-all-stagefeedback-target-libvtv +.PHONY: clean-stagefeedback-target-libvtv maybe-clean-stagefeedback-target-libvtv +maybe-all-stagefeedback-target-libvtv: +maybe-clean-stagefeedback-target-libvtv: +@if target-libvtv-bootstrap +maybe-all-stagefeedback-target-libvtv: all-stagefeedback-target-libvtv +all-stagefeedback: all-stagefeedback-target-libvtv +TARGET-stagefeedback-target-libvtv = $(TARGET-target-libvtv) +all-stagefeedback-target-libvtv: configure-stagefeedback-target-libvtv + @[ $(current_stage) = stagefeedback ] || $(MAKE) stagefeedback-start + @r=`${PWD_COMMAND}`; export r; \ + s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \ + TFLAGS="$(STAGEfeedback_TFLAGS)"; \ + $(RAW_CXX_TARGET_EXPORTS) \ + \ + cd $(TARGET_SUBDIR)/libvtv && \ + $(MAKE) $(BASE_FLAGS_TO_PASS) \ + CFLAGS="$(CFLAGS_FOR_TARGET)" \ + CXXFLAGS="$(CXXFLAGS_FOR_TARGET)" \ + LIBCFLAGS="$(LIBCFLAGS_FOR_TARGET)" \ + CFLAGS_FOR_TARGET="$(CFLAGS_FOR_TARGET)" \ + CXXFLAGS_FOR_TARGET="$(CXXFLAGS_FOR_TARGET)" \ + LIBCFLAGS_FOR_TARGET="$(LIBCFLAGS_FOR_TARGET)" \ + $(EXTRA_TARGET_FLAGS) 'CXX=$$(RAW_CXX_FOR_TARGET)' 'CXX_FOR_TARGET=$$(RAW_CXX_FOR_TARGET)' \ + TFLAGS="$(STAGEfeedback_TFLAGS)" \ + $(TARGET-stagefeedback-target-libvtv) + +maybe-clean-stagefeedback-target-libvtv: clean-stagefeedback-target-libvtv +clean-stagefeedback: clean-stagefeedback-target-libvtv +clean-stagefeedback-target-libvtv: + @if [ $(current_stage) = stagefeedback ]; then \ + [ -f $(TARGET_SUBDIR)/libvtv/Makefile ] || exit 0; \ + else \ + [ -f $(TARGET_SUBDIR)/stagefeedback-libvtv/Makefile ] || exit 0; \ + $(MAKE) stagefeedback-start; \ + fi; \ + cd $(TARGET_SUBDIR)/libvtv && \ + $(MAKE) $(EXTRA_TARGET_FLAGS) 'CXX=$$(RAW_CXX_FOR_TARGET)' 'CXX_FOR_TARGET=$$(RAW_CXX_FOR_TARGET)' clean +@endif target-libvtv-bootstrap + + + + + + +.PHONY: check-target-libvtv maybe-check-target-libvtv +maybe-check-target-libvtv: +@if target-libvtv +maybe-check-target-libvtv: check-target-libvtv + +check-target-libvtv: + @: $(MAKE); $(unstage) + @r=`${PWD_COMMAND}`; export r; \ + s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \ + $(RAW_CXX_TARGET_EXPORTS) \ + (cd $(TARGET_SUBDIR)/libvtv && \ + $(MAKE) $(TARGET_FLAGS_TO_PASS) 'CXX=$$(RAW_CXX_FOR_TARGET)' 'CXX_FOR_TARGET=$$(RAW_CXX_FOR_TARGET)' check) + +@endif target-libvtv + +.PHONY: install-target-libvtv maybe-install-target-libvtv +maybe-install-target-libvtv: +@if target-libvtv +maybe-install-target-libvtv: install-target-libvtv + +install-target-libvtv: installdirs + @: $(MAKE); $(unstage) + @r=`${PWD_COMMAND}`; export r; \ + s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \ + $(RAW_CXX_TARGET_EXPORTS) \ + (cd $(TARGET_SUBDIR)/libvtv && \ + $(MAKE) $(TARGET_FLAGS_TO_PASS) install) + +@endif target-libvtv + +.PHONY: install-strip-target-libvtv maybe-install-strip-target-libvtv +maybe-install-strip-target-libvtv: +@if target-libvtv +maybe-install-strip-target-libvtv: install-strip-target-libvtv + +install-strip-target-libvtv: installdirs + @: $(MAKE); $(unstage) + @r=`${PWD_COMMAND}`; export r; \ + s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \ + $(RAW_CXX_TARGET_EXPORTS) \ + (cd $(TARGET_SUBDIR)/libvtv && \ + $(MAKE) $(TARGET_FLAGS_TO_PASS) install-strip) + +@endif target-libvtv + +# Other targets (info, dvi, pdf, etc.) + +.PHONY: maybe-info-target-libvtv info-target-libvtv +maybe-info-target-libvtv: +@if target-libvtv +maybe-info-target-libvtv: info-target-libvtv + +info-target-libvtv: \ + configure-target-libvtv + @: $(MAKE); $(unstage) + @[ -f $(TARGET_SUBDIR)/libvtv/Makefile ] || exit 0 ; \ + r=`${PWD_COMMAND}`; export r; \ + s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \ + $(RAW_CXX_TARGET_EXPORTS) \ + echo "Doing info in $(TARGET_SUBDIR)/libvtv" ; \ + for flag in $(EXTRA_TARGET_FLAGS); do \ + eval `echo "$$flag" | sed -e "s|^\([^=]*\)=\(.*\)|\1='\2'; export \1|"`; \ + done; \ + (cd $(TARGET_SUBDIR)/libvtv && \ + $(MAKE) $(BASE_FLAGS_TO_PASS) "AR=$${AR}" "AS=$${AS}" \ + "CC=$${CC}" "CXX=$${CXX}" "LD=$${LD}" "NM=$${NM}" \ + "RANLIB=$${RANLIB}" \ + "DLLTOOL=$${DLLTOOL}" "WINDRES=$${WINDRES}" "WINDMC=$${WINDMC}" \ + info) \ + || exit 1 + +@endif target-libvtv + +.PHONY: maybe-dvi-target-libvtv dvi-target-libvtv +maybe-dvi-target-libvtv: +@if target-libvtv +maybe-dvi-target-libvtv: dvi-target-libvtv + +dvi-target-libvtv: \ + configure-target-libvtv + @: $(MAKE); $(unstage) + @[ -f $(TARGET_SUBDIR)/libvtv/Makefile ] || exit 0 ; \ + r=`${PWD_COMMAND}`; export r; \ + s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \ + $(RAW_CXX_TARGET_EXPORTS) \ + echo "Doing dvi in $(TARGET_SUBDIR)/libvtv" ; \ + for flag in $(EXTRA_TARGET_FLAGS); do \ + eval `echo "$$flag" | sed -e "s|^\([^=]*\)=\(.*\)|\1='\2'; export \1|"`; \ + done; \ + (cd $(TARGET_SUBDIR)/libvtv && \ + $(MAKE) $(BASE_FLAGS_TO_PASS) "AR=$${AR}" "AS=$${AS}" \ + "CC=$${CC}" "CXX=$${CXX}" "LD=$${LD}" "NM=$${NM}" \ + "RANLIB=$${RANLIB}" \ + "DLLTOOL=$${DLLTOOL}" "WINDRES=$${WINDRES}" "WINDMC=$${WINDMC}" \ + dvi) \ + || exit 1 + +@endif target-libvtv + +.PHONY: maybe-pdf-target-libvtv pdf-target-libvtv +maybe-pdf-target-libvtv: +@if target-libvtv +maybe-pdf-target-libvtv: pdf-target-libvtv + +pdf-target-libvtv: \ + configure-target-libvtv + @: $(MAKE); $(unstage) + @[ -f $(TARGET_SUBDIR)/libvtv/Makefile ] || exit 0 ; \ + r=`${PWD_COMMAND}`; export r; \ + s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \ + $(RAW_CXX_TARGET_EXPORTS) \ + echo "Doing pdf in $(TARGET_SUBDIR)/libvtv" ; \ + for flag in $(EXTRA_TARGET_FLAGS); do \ + eval `echo "$$flag" | sed -e "s|^\([^=]*\)=\(.*\)|\1='\2'; export \1|"`; \ + done; \ + (cd $(TARGET_SUBDIR)/libvtv && \ + $(MAKE) $(BASE_FLAGS_TO_PASS) "AR=$${AR}" "AS=$${AS}" \ + "CC=$${CC}" "CXX=$${CXX}" "LD=$${LD}" "NM=$${NM}" \ + "RANLIB=$${RANLIB}" \ + "DLLTOOL=$${DLLTOOL}" "WINDRES=$${WINDRES}" "WINDMC=$${WINDMC}" \ + pdf) \ + || exit 1 + +@endif target-libvtv + +.PHONY: maybe-html-target-libvtv html-target-libvtv +maybe-html-target-libvtv: +@if target-libvtv +maybe-html-target-libvtv: html-target-libvtv + +html-target-libvtv: \ + configure-target-libvtv + @: $(MAKE); $(unstage) + @[ -f $(TARGET_SUBDIR)/libvtv/Makefile ] || exit 0 ; \ + r=`${PWD_COMMAND}`; export r; \ + s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \ + $(RAW_CXX_TARGET_EXPORTS) \ + echo "Doing html in $(TARGET_SUBDIR)/libvtv" ; \ + for flag in $(EXTRA_TARGET_FLAGS); do \ + eval `echo "$$flag" | sed -e "s|^\([^=]*\)=\(.*\)|\1='\2'; export \1|"`; \ + done; \ + (cd $(TARGET_SUBDIR)/libvtv && \ + $(MAKE) $(BASE_FLAGS_TO_PASS) "AR=$${AR}" "AS=$${AS}" \ + "CC=$${CC}" "CXX=$${CXX}" "LD=$${LD}" "NM=$${NM}" \ + "RANLIB=$${RANLIB}" \ + "DLLTOOL=$${DLLTOOL}" "WINDRES=$${WINDRES}" "WINDMC=$${WINDMC}" \ + html) \ + || exit 1 + +@endif target-libvtv + +.PHONY: maybe-TAGS-target-libvtv TAGS-target-libvtv +maybe-TAGS-target-libvtv: +@if target-libvtv +maybe-TAGS-target-libvtv: TAGS-target-libvtv + +TAGS-target-libvtv: \ + configure-target-libvtv + @: $(MAKE); $(unstage) + @[ -f $(TARGET_SUBDIR)/libvtv/Makefile ] || exit 0 ; \ + r=`${PWD_COMMAND}`; export r; \ + s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \ + $(RAW_CXX_TARGET_EXPORTS) \ + echo "Doing TAGS in $(TARGET_SUBDIR)/libvtv" ; \ + for flag in $(EXTRA_TARGET_FLAGS); do \ + eval `echo "$$flag" | sed -e "s|^\([^=]*\)=\(.*\)|\1='\2'; export \1|"`; \ + done; \ + (cd $(TARGET_SUBDIR)/libvtv && \ + $(MAKE) $(BASE_FLAGS_TO_PASS) "AR=$${AR}" "AS=$${AS}" \ + "CC=$${CC}" "CXX=$${CXX}" "LD=$${LD}" "NM=$${NM}" \ + "RANLIB=$${RANLIB}" \ + "DLLTOOL=$${DLLTOOL}" "WINDRES=$${WINDRES}" "WINDMC=$${WINDMC}" \ + TAGS) \ + || exit 1 + +@endif target-libvtv + +.PHONY: maybe-install-info-target-libvtv install-info-target-libvtv +maybe-install-info-target-libvtv: +@if target-libvtv +maybe-install-info-target-libvtv: install-info-target-libvtv + +install-info-target-libvtv: \ + configure-target-libvtv \ + info-target-libvtv + @: $(MAKE); $(unstage) + @[ -f $(TARGET_SUBDIR)/libvtv/Makefile ] || exit 0 ; \ + r=`${PWD_COMMAND}`; export r; \ + s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \ + $(RAW_CXX_TARGET_EXPORTS) \ + echo "Doing install-info in $(TARGET_SUBDIR)/libvtv" ; \ + for flag in $(EXTRA_TARGET_FLAGS); do \ + eval `echo "$$flag" | sed -e "s|^\([^=]*\)=\(.*\)|\1='\2'; export \1|"`; \ + done; \ + (cd $(TARGET_SUBDIR)/libvtv && \ + $(MAKE) $(BASE_FLAGS_TO_PASS) "AR=$${AR}" "AS=$${AS}" \ + "CC=$${CC}" "CXX=$${CXX}" "LD=$${LD}" "NM=$${NM}" \ + "RANLIB=$${RANLIB}" \ + "DLLTOOL=$${DLLTOOL}" "WINDRES=$${WINDRES}" "WINDMC=$${WINDMC}" \ + install-info) \ + || exit 1 + +@endif target-libvtv + +.PHONY: maybe-install-pdf-target-libvtv install-pdf-target-libvtv +maybe-install-pdf-target-libvtv: +@if target-libvtv +maybe-install-pdf-target-libvtv: install-pdf-target-libvtv + +install-pdf-target-libvtv: \ + configure-target-libvtv \ + pdf-target-libvtv + @: $(MAKE); $(unstage) + @[ -f $(TARGET_SUBDIR)/libvtv/Makefile ] || exit 0 ; \ + r=`${PWD_COMMAND}`; export r; \ + s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \ + $(RAW_CXX_TARGET_EXPORTS) \ + echo "Doing install-pdf in $(TARGET_SUBDIR)/libvtv" ; \ + for flag in $(EXTRA_TARGET_FLAGS); do \ + eval `echo "$$flag" | sed -e "s|^\([^=]*\)=\(.*\)|\1='\2'; export \1|"`; \ + done; \ + (cd $(TARGET_SUBDIR)/libvtv && \ + $(MAKE) $(BASE_FLAGS_TO_PASS) "AR=$${AR}" "AS=$${AS}" \ + "CC=$${CC}" "CXX=$${CXX}" "LD=$${LD}" "NM=$${NM}" \ + "RANLIB=$${RANLIB}" \ + "DLLTOOL=$${DLLTOOL}" "WINDRES=$${WINDRES}" "WINDMC=$${WINDMC}" \ + install-pdf) \ + || exit 1 + +@endif target-libvtv + +.PHONY: maybe-install-html-target-libvtv install-html-target-libvtv +maybe-install-html-target-libvtv: +@if target-libvtv +maybe-install-html-target-libvtv: install-html-target-libvtv + +install-html-target-libvtv: \ + configure-target-libvtv \ + html-target-libvtv + @: $(MAKE); $(unstage) + @[ -f $(TARGET_SUBDIR)/libvtv/Makefile ] || exit 0 ; \ + r=`${PWD_COMMAND}`; export r; \ + s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \ + $(RAW_CXX_TARGET_EXPORTS) \ + echo "Doing install-html in $(TARGET_SUBDIR)/libvtv" ; \ + for flag in $(EXTRA_TARGET_FLAGS); do \ + eval `echo "$$flag" | sed -e "s|^\([^=]*\)=\(.*\)|\1='\2'; export \1|"`; \ + done; \ + (cd $(TARGET_SUBDIR)/libvtv && \ + $(MAKE) $(BASE_FLAGS_TO_PASS) "AR=$${AR}" "AS=$${AS}" \ + "CC=$${CC}" "CXX=$${CXX}" "LD=$${LD}" "NM=$${NM}" \ + "RANLIB=$${RANLIB}" \ + "DLLTOOL=$${DLLTOOL}" "WINDRES=$${WINDRES}" "WINDMC=$${WINDMC}" \ + install-html) \ + || exit 1 + +@endif target-libvtv + +.PHONY: maybe-installcheck-target-libvtv installcheck-target-libvtv +maybe-installcheck-target-libvtv: +@if target-libvtv +maybe-installcheck-target-libvtv: installcheck-target-libvtv + +installcheck-target-libvtv: \ + configure-target-libvtv + @: $(MAKE); $(unstage) + @[ -f $(TARGET_SUBDIR)/libvtv/Makefile ] || exit 0 ; \ + r=`${PWD_COMMAND}`; export r; \ + s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \ + $(RAW_CXX_TARGET_EXPORTS) \ + echo "Doing installcheck in $(TARGET_SUBDIR)/libvtv" ; \ + for flag in $(EXTRA_TARGET_FLAGS); do \ + eval `echo "$$flag" | sed -e "s|^\([^=]*\)=\(.*\)|\1='\2'; export \1|"`; \ + done; \ + (cd $(TARGET_SUBDIR)/libvtv && \ + $(MAKE) $(BASE_FLAGS_TO_PASS) "AR=$${AR}" "AS=$${AS}" \ + "CC=$${CC}" "CXX=$${CXX}" "LD=$${LD}" "NM=$${NM}" \ + "RANLIB=$${RANLIB}" \ + "DLLTOOL=$${DLLTOOL}" "WINDRES=$${WINDRES}" "WINDMC=$${WINDMC}" \ + installcheck) \ + || exit 1 + +@endif target-libvtv + +.PHONY: maybe-mostlyclean-target-libvtv mostlyclean-target-libvtv +maybe-mostlyclean-target-libvtv: +@if target-libvtv +maybe-mostlyclean-target-libvtv: mostlyclean-target-libvtv + +mostlyclean-target-libvtv: + @: $(MAKE); $(unstage) + @[ -f $(TARGET_SUBDIR)/libvtv/Makefile ] || exit 0 ; \ + r=`${PWD_COMMAND}`; export r; \ + s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \ + $(RAW_CXX_TARGET_EXPORTS) \ + echo "Doing mostlyclean in $(TARGET_SUBDIR)/libvtv" ; \ + for flag in $(EXTRA_TARGET_FLAGS); do \ + eval `echo "$$flag" | sed -e "s|^\([^=]*\)=\(.*\)|\1='\2'; export \1|"`; \ + done; \ + (cd $(TARGET_SUBDIR)/libvtv && \ + $(MAKE) $(BASE_FLAGS_TO_PASS) "AR=$${AR}" "AS=$${AS}" \ + "CC=$${CC}" "CXX=$${CXX}" "LD=$${LD}" "NM=$${NM}" \ + "RANLIB=$${RANLIB}" \ + "DLLTOOL=$${DLLTOOL}" "WINDRES=$${WINDRES}" "WINDMC=$${WINDMC}" \ + mostlyclean) \ + || exit 1 + +@endif target-libvtv + +.PHONY: maybe-clean-target-libvtv clean-target-libvtv +maybe-clean-target-libvtv: +@if target-libvtv +maybe-clean-target-libvtv: clean-target-libvtv + +clean-target-libvtv: + @: $(MAKE); $(unstage) + @[ -f $(TARGET_SUBDIR)/libvtv/Makefile ] || exit 0 ; \ + r=`${PWD_COMMAND}`; export r; \ + s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \ + $(RAW_CXX_TARGET_EXPORTS) \ + echo "Doing clean in $(TARGET_SUBDIR)/libvtv" ; \ + for flag in $(EXTRA_TARGET_FLAGS); do \ + eval `echo "$$flag" | sed -e "s|^\([^=]*\)=\(.*\)|\1='\2'; export \1|"`; \ + done; \ + (cd $(TARGET_SUBDIR)/libvtv && \ + $(MAKE) $(BASE_FLAGS_TO_PASS) "AR=$${AR}" "AS=$${AS}" \ + "CC=$${CC}" "CXX=$${CXX}" "LD=$${LD}" "NM=$${NM}" \ + "RANLIB=$${RANLIB}" \ + "DLLTOOL=$${DLLTOOL}" "WINDRES=$${WINDRES}" "WINDMC=$${WINDMC}" \ + clean) \ + || exit 1 + +@endif target-libvtv + +.PHONY: maybe-distclean-target-libvtv distclean-target-libvtv +maybe-distclean-target-libvtv: +@if target-libvtv +maybe-distclean-target-libvtv: distclean-target-libvtv + +distclean-target-libvtv: + @: $(MAKE); $(unstage) + @[ -f $(TARGET_SUBDIR)/libvtv/Makefile ] || exit 0 ; \ + r=`${PWD_COMMAND}`; export r; \ + s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \ + $(RAW_CXX_TARGET_EXPORTS) \ + echo "Doing distclean in $(TARGET_SUBDIR)/libvtv" ; \ + for flag in $(EXTRA_TARGET_FLAGS); do \ + eval `echo "$$flag" | sed -e "s|^\([^=]*\)=\(.*\)|\1='\2'; export \1|"`; \ + done; \ + (cd $(TARGET_SUBDIR)/libvtv && \ + $(MAKE) $(BASE_FLAGS_TO_PASS) "AR=$${AR}" "AS=$${AS}" \ + "CC=$${CC}" "CXX=$${CXX}" "LD=$${LD}" "NM=$${NM}" \ + "RANLIB=$${RANLIB}" \ + "DLLTOOL=$${DLLTOOL}" "WINDRES=$${WINDRES}" "WINDMC=$${WINDMC}" \ + distclean) \ + || exit 1 + +@endif target-libvtv + +.PHONY: maybe-maintainer-clean-target-libvtv maintainer-clean-target-libvtv +maybe-maintainer-clean-target-libvtv: +@if target-libvtv +maybe-maintainer-clean-target-libvtv: maintainer-clean-target-libvtv + +maintainer-clean-target-libvtv: + @: $(MAKE); $(unstage) + @[ -f $(TARGET_SUBDIR)/libvtv/Makefile ] || exit 0 ; \ + r=`${PWD_COMMAND}`; export r; \ + s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \ + $(RAW_CXX_TARGET_EXPORTS) \ + echo "Doing maintainer-clean in $(TARGET_SUBDIR)/libvtv" ; \ + for flag in $(EXTRA_TARGET_FLAGS); do \ + eval `echo "$$flag" | sed -e "s|^\([^=]*\)=\(.*\)|\1='\2'; export \1|"`; \ + done; \ + (cd $(TARGET_SUBDIR)/libvtv && \ + $(MAKE) $(BASE_FLAGS_TO_PASS) "AR=$${AR}" "AS=$${AS}" \ + "CC=$${CC}" "CXX=$${CXX}" "LD=$${LD}" "NM=$${NM}" \ + "RANLIB=$${RANLIB}" \ + "DLLTOOL=$${DLLTOOL}" "WINDRES=$${WINDRES}" "WINDMC=$${WINDMC}" \ + maintainer-clean) \ + || exit 1 + +@endif target-libvtv + + + + + .PHONY: configure-target-libssp maybe-configure-target-libssp maybe-configure-target-libssp: @if gcc-bootstrap @@ -45384,6 +46382,12 @@ configure-stage3-target-libsanitizer: maybe-all-stage3-gcc configure-stage4-target-libsanitizer: maybe-all-stage4-gcc configure-stageprofile-target-libsanitizer: maybe-all-stageprofile-gcc configure-stagefeedback-target-libsanitizer: maybe-all-stagefeedback-gcc +configure-stage1-target-libvtv: maybe-all-stage1-gcc +configure-stage2-target-libvtv: maybe-all-stage2-gcc +configure-stage3-target-libvtv: maybe-all-stage3-gcc +configure-stage4-target-libvtv: maybe-all-stage4-gcc +configure-stageprofile-target-libvtv: maybe-all-stageprofile-gcc +configure-stagefeedback-target-libvtv: maybe-all-stagefeedback-gcc configure-target-libssp: stage_last configure-target-newlib: stage_last configure-stage1-target-libgcc: maybe-all-stage1-gcc @@ -45420,6 +46424,7 @@ configure-target-libatomic: stage_last configure-target-libstdc++-v3: maybe-all-gcc configure-target-libmudflap: maybe-all-gcc configure-target-libsanitizer: maybe-all-gcc +configure-target-libvtv: maybe-all-gcc configure-target-libssp: maybe-all-gcc configure-target-newlib: maybe-all-gcc configure-target-libgcc: maybe-all-gcc @@ -46195,6 +47200,14 @@ configure-stage3-target-libsanitizer: maybe-all-stage3-target-libstdc++-v3 configure-stage4-target-libsanitizer: maybe-all-stage4-target-libstdc++-v3 configure-stageprofile-target-libsanitizer: maybe-all-stageprofile-target-libstdc++-v3 configure-stagefeedback-target-libsanitizer: maybe-all-stagefeedback-target-libstdc++-v3 +configure-target-libvtv: maybe-all-target-libstdc++-v3 + +configure-stage1-target-libvtv: maybe-all-stage1-target-libstdc++-v3 +configure-stage2-target-libvtv: maybe-all-stage2-target-libstdc++-v3 +configure-stage3-target-libvtv: maybe-all-stage3-target-libstdc++-v3 +configure-stage4-target-libvtv: maybe-all-stage4-target-libstdc++-v3 +configure-stageprofile-target-libvtv: maybe-all-stageprofile-target-libstdc++-v3 +configure-stagefeedback-target-libvtv: maybe-all-stagefeedback-target-libstdc++-v3 all-target-libstdc++-v3: maybe-configure-target-libgomp all-stage1-target-libstdc++-v3: maybe-configure-stage1-target-libgomp @@ -46208,6 +47221,8 @@ install-target-libgfortran: maybe-install-target-libquadmath install-target-libgfortran: maybe-install-target-libgcc install-target-libsanitizer: maybe-install-target-libstdc++-v3 install-target-libsanitizer: maybe-install-target-libgcc +install-target-libvtv: maybe-install-target-libstdc++-v3 +install-target-libvtv: maybe-install-target-libgcc install-target-libjava: maybe-install-target-libgcc install-target-libitm: maybe-install-target-libgcc install-target-libobjc: maybe-install-target-libgcc @@ -46237,6 +47252,12 @@ configure-stage3-target-libsanitizer: maybe-all-stage3-target-libgcc configure-stage4-target-libsanitizer: maybe-all-stage4-target-libgcc configure-stageprofile-target-libsanitizer: maybe-all-stageprofile-target-libgcc configure-stagefeedback-target-libsanitizer: maybe-all-stagefeedback-target-libgcc +configure-stage1-target-libvtv: maybe-all-stage1-target-libgcc +configure-stage2-target-libvtv: maybe-all-stage2-target-libgcc +configure-stage3-target-libvtv: maybe-all-stage3-target-libgcc +configure-stage4-target-libvtv: maybe-all-stage4-target-libgcc +configure-stageprofile-target-libvtv: maybe-all-stageprofile-target-libgcc +configure-stagefeedback-target-libvtv: maybe-all-stagefeedback-target-libgcc configure-stage1-target-libgomp: maybe-all-stage1-target-libgcc configure-stage2-target-libgomp: maybe-all-stage2-target-libgcc configure-stage3-target-libgomp: maybe-all-stage3-target-libgcc @@ -46249,6 +47270,7 @@ configure-stagefeedback-target-libgomp: maybe-all-stagefeedback-target-libgcc configure-target-libstdc++-v3: maybe-all-target-libgcc configure-target-libmudflap: maybe-all-target-libgcc configure-target-libsanitizer: maybe-all-target-libgcc +configure-target-libvtv: maybe-all-target-libgcc configure-target-libssp: maybe-all-target-libgcc configure-target-newlib: maybe-all-target-libgcc configure-target-libbacktrace: maybe-all-target-libgcc @@ -46277,6 +47299,8 @@ configure-target-libmudflap: maybe-all-target-newlib maybe-all-target-libgloss configure-target-libsanitizer: maybe-all-target-newlib maybe-all-target-libgloss +configure-target-libvtv: maybe-all-target-newlib maybe-all-target-libgloss + configure-target-libssp: maybe-all-target-newlib maybe-all-target-libgloss diff --git a/boehm-gc/ChangeLog b/boehm-gc/ChangeLog index e55cb37db73..0cd4dbec4d5 100644 --- a/boehm-gc/ChangeLog +++ b/boehm-gc/ChangeLog @@ -1,20 +1,25 @@ +2013-09-04 Matthias Klose + + * Makefile.am (libgcjgc_la_LIBADD): Add EXTRA_TEST_LIBS. + * Makefile.in: Regenerate. + 2013-03-16 Yvan Roux - * include/private/gcconfig.h (AARCH64): New macro (defined only if - __aarch64__). - (CPP_WORDSZ): Define for AArch64. - (MACH_TYPE): Likewise. - (ALIGNMENT): Likewise. - (HBLKSIZE): Likewise. - (OS_TYPE): Likewise. - (LINUX_STACKBOTTOM): Likewise. - (USE_GENERIC_PUSH_REGS): Likewise. - (DYNAMIC_LOADING): Likewise. - (DATASTART): Likewise. - (DATAEND): Likewise. - (STACKBOTTOM): Likewise. - (NOSYS): Likewise. - (mach_type_known): Define for AArch64 and comment update. + * include/private/gcconfig.h (AARCH64): New macro (defined only if + __aarch64__). + (CPP_WORDSZ): Define for AArch64. + (MACH_TYPE): Likewise. + (ALIGNMENT): Likewise. + (HBLKSIZE): Likewise. + (OS_TYPE): Likewise. + (LINUX_STACKBOTTOM): Likewise. + (USE_GENERIC_PUSH_REGS): Likewise. + (DYNAMIC_LOADING): Likewise. + (DATASTART): Likewise. + (DATAEND): Likewise. + (STACKBOTTOM): Likewise. + (NOSYS): Likewise. + (mach_type_known): Define for AArch64 and comment update. 2013-03-06 Rainer Orth @@ -432,7 +437,7 @@ * configure: Regenerate. 2008-09-26 Peter O'Gorman - Steve Ellcey + Steve Ellcey * configure: Regenerate for new libtool. * Makefile.in: Ditto. @@ -1073,7 +1078,7 @@ 2004-04-25 Andreas Jaeger - * mark.c (GC_mark_from): Use pointer as prefetch argument. + * mark.c (GC_mark_from): Use pointer as prefetch argument. 2004-04-06 H.J. Lu diff --git a/boehm-gc/Makefile.am b/boehm-gc/Makefile.am index 2b68938e18d..468e6ffc9b4 100644 --- a/boehm-gc/Makefile.am +++ b/boehm-gc/Makefile.am @@ -35,7 +35,7 @@ sparc_sunos4_mach_dep.s ia64_save_regs_in_stack.s # Include THREADLIBS here to ensure that the correct versions of # linuxthread semaphore functions get linked: -libgcjgc_la_LIBADD = $(addobjs) $(THREADLIBS) +libgcjgc_la_LIBADD = $(addobjs) $(THREADLIBS) $(EXTRA_TEST_LIBS) libgcjgc_la_DEPENDENCIES = $(addobjs) libgcjgc_la_LDFLAGS = $(extra_ldflags_libgc) -version-info 1:2:0 -rpath $(toolexeclibdir) libgcjgc_la_LINK = $(LINK) $(libgcjgc_la_LDFLAGS) diff --git a/boehm-gc/Makefile.in b/boehm-gc/Makefile.in index e1a3b70ca34..cd588103659 100644 --- a/boehm-gc/Makefile.in +++ b/boehm-gc/Makefile.in @@ -338,7 +338,7 @@ sparc_sunos4_mach_dep.s ia64_save_regs_in_stack.s # Include THREADLIBS here to ensure that the correct versions of # linuxthread semaphore functions get linked: -libgcjgc_la_LIBADD = $(addobjs) $(THREADLIBS) +libgcjgc_la_LIBADD = $(addobjs) $(THREADLIBS) $(EXTRA_TEST_LIBS) libgcjgc_la_DEPENDENCIES = $(addobjs) libgcjgc_la_LDFLAGS = $(extra_ldflags_libgc) -version-info 1:2:0 -rpath $(toolexeclibdir) libgcjgc_la_LINK = $(LINK) $(libgcjgc_la_LDFLAGS) diff --git a/config/ChangeLog b/config/ChangeLog index cdc733c705a..ab34cbcc224 100644 --- a/config/ChangeLog +++ b/config/ChangeLog @@ -1,3 +1,7 @@ +2013-08-30 Marek Polacek + + * bootstrap-ubsan.mk: New. + 2013-03-27 Kai Tietz * dfp.m4: Add support for cygwin x64 target. diff --git a/config/bootstrap-ubsan.mk b/config/bootstrap-ubsan.mk new file mode 100644 index 00000000000..2d21e832e21 --- /dev/null +++ b/config/bootstrap-ubsan.mk @@ -0,0 +1,7 @@ +# This option enables -fsanitize=undefined for stage2 and stage3. + +STAGE2_CFLAGS += -fsanitize=undefined +STAGE3_CFLAGS += -fsanitize=undefined +POSTSTAGE1_LDFLAGS += -fsanitize=undefined -static-libubsan -lpthread \ + -B$$r/prev-$(TARGET_SUBDIR)/libsanitizer/ubsan/ \ + -B$$r/prev-$(TARGET_SUBDIR)/libsanitizer/ubsan/.libs diff --git a/configure b/configure index 551389bc7c9..a91689ff419 100755 --- a/configure +++ b/configure @@ -2774,6 +2774,7 @@ target_libraries="target-libgcc \ target-libstdc++-v3 \ target-libmudflap \ target-libsanitizer \ + target-libvtv \ target-libssp \ target-libquadmath \ target-libgfortran \ @@ -3215,6 +3216,25 @@ $as_echo "yes" >&6; } fi fi +# Disable libvtv on unsupported systems. +if test -d ${srcdir}/libvtv; then + if test x$enable_libvtv = x; then + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for libvtv support" >&5 +$as_echo_n "checking for libvtv support... " >&6; } + if (srcdir=${srcdir}/libvtv; \ + . ${srcdir}/configure.tgt; \ + test "$VTV_SUPPORTED" != "yes") + then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } + noconfigdirs="$noconfigdirs target-libvtv" + else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +$as_echo "yes" >&6; } + fi + fi +fi + # Disable libquadmath for some systems. case "${target}" in avr-*-*) @@ -5945,6 +5965,55 @@ $as_echo "$gcc_cv_isl" >&6; } fi + if test "${gcc_cv_isl}" = no ; then + + if test "${ENABLE_ISL_CHECK}" = yes ; then + _isl_saved_CFLAGS=$CFLAGS + _isl_saved_LDFLAGS=$LDFLAGS + _isl_saved_LIBS=$LIBS + + CFLAGS="${_isl_saved_CFLAGS} ${islinc} ${gmpinc}" + LDFLAGS="${_isl_saved_LDFLAGS} ${isllibs}" + LIBS="${_isl_saved_LIBS} -lisl" + + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for version 0.12 of ISL" >&5 +$as_echo_n "checking for version 0.12 of ISL... " >&6; } + if test "$cross_compiling" = yes; then : + gcc_cv_isl=yes +else + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include + #include +int +main () +{ +if (strncmp (isl_version (), "isl-0.12", strlen ("isl-0.12")) != 0) + return 1; + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_run "$LINENO"; then : + gcc_cv_isl=yes +else + gcc_cv_isl=no +fi +rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext \ + conftest.$ac_objext conftest.beam conftest.$ac_ext +fi + + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $gcc_cv_isl" >&5 +$as_echo "$gcc_cv_isl" >&6; } + + CFLAGS=$_isl_saved_CFLAGS + LDFLAGS=$_isl_saved_LDFLAGS + LIBS=$_isl_saved_LIBS + fi + + + fi fi @@ -6551,11 +6620,11 @@ case ,${enable_languages},:${enable_objc_gc} in ;; esac -# Disable libitm and libsanitizer if we're not building C++ +# Disable libitm, libsanitizer, libvtv if we're not building C++ case ,${enable_languages}, in *,c++,*) ;; *) - noconfigdirs="$noconfigdirs target-libitm target-libsanitizer" + noconfigdirs="$noconfigdirs target-libitm target-libsanitizer target-libvtv" ;; esac @@ -7032,6 +7101,11 @@ if echo " ${target_configdirs} " | grep " libsanitizer " > /dev/null 2>&1 ; then bootstrap_target_libs=${bootstrap_target_libs}target-libsanitizer, fi +# If we are building libvtv, bootstrap it. +if echo " ${target_configdirs} " | grep " libvtv " > /dev/null 2>&1 ; then + bootstrap_target_libs=${bootstrap_target_libs}target-libvtv, +fi + # Determine whether gdb needs tk/tcl or not. # Use 'maybe' since enable_gdbtk might be true even if tk isn't available # and in that case we want gdb to be built without tk. Ugh! @@ -7420,13 +7494,6 @@ if test x${is_cross_compiler} = xyes ; then target_configargs="--with-cross-host=${host_noncanonical} ${target_configargs}" fi -# Pass --with-sysroot on darwin without SDK in / -case "${target}" in - x86_64-*-darwin1[3-9]*) - host_configargs="--with-sysroot=\"`xcrun --show-sdk-path`\" ${host_configargs}" - ;; -esac - # Default to --enable-multilib. if test x${enable_multilib} = x ; then target_configargs="--enable-multilib ${target_configargs}" @@ -13754,7 +13821,7 @@ else esac if test $ok = yes; then # An in-tree tool is available and we can use it - CXX_FOR_TARGET='$$r/$(HOST_SUBDIR)/gcc/xg++ -B$$r/$(HOST_SUBDIR)/gcc/ -nostdinc++ `if test -f $$r/$(TARGET_SUBDIR)/libstdc++-v3/scripts/testsuite_flags; then $(SHELL) $$r/$(TARGET_SUBDIR)/libstdc++-v3/scripts/testsuite_flags --build-includes; else echo -funconfigured-libstdc++-v3 ; fi` -L$$r/$(TARGET_SUBDIR)/libstdc++-v3/src -L$$r/$(TARGET_SUBDIR)/libstdc++-v3/src/.libs' + CXX_FOR_TARGET='$$r/$(HOST_SUBDIR)/gcc/xg++ -B$$r/$(HOST_SUBDIR)/gcc/ -nostdinc++ `if test -f $$r/$(TARGET_SUBDIR)/libstdc++-v3/scripts/testsuite_flags; then $(SHELL) $$r/$(TARGET_SUBDIR)/libstdc++-v3/scripts/testsuite_flags --build-includes; else echo -funconfigured-libstdc++-v3 ; fi` -L$$r/$(TARGET_SUBDIR)/libstdc++-v3/src -L$$r/$(TARGET_SUBDIR)/libstdc++-v3/src/.libs -L$$r/$(TARGET_SUBDIR)/libstdc++-v3/libsupc++/.libs' { $as_echo "$as_me:${as_lineno-$LINENO}: result: just compiled" >&5 $as_echo "just compiled" >&6; } elif expr "x$CXX_FOR_TARGET" : "x/" > /dev/null; then @@ -13799,7 +13866,7 @@ else esac if test $ok = yes; then # An in-tree tool is available and we can use it - RAW_CXX_FOR_TARGET='$$r/$(HOST_SUBDIR)/gcc/xgcc -shared-libgcc -B$$r/$(HOST_SUBDIR)/gcc -nostdinc++ -L$$r/$(TARGET_SUBDIR)/libstdc++-v3/src -L$$r/$(TARGET_SUBDIR)/libstdc++-v3/src/.libs' + RAW_CXX_FOR_TARGET='$$r/$(HOST_SUBDIR)/gcc/xgcc -shared-libgcc -B$$r/$(HOST_SUBDIR)/gcc -nostdinc++ -L$$r/$(TARGET_SUBDIR)/libstdc++-v3/src -L$$r/$(TARGET_SUBDIR)/libstdc++-v3/src/.libs -L$$r/$(TARGET_SUBDIR)/libstdc++-v3/libsupc++/.libs' { $as_echo "$as_me:${as_lineno-$LINENO}: result: just compiled" >&5 $as_echo "just compiled" >&6; } elif expr "x$RAW_CXX_FOR_TARGET" : "x/" > /dev/null; then diff --git a/configure.ac b/configure.ac index d4dc10bd725..d72b40a7af9 100644 --- a/configure.ac +++ b/configure.ac @@ -161,6 +161,7 @@ target_libraries="target-libgcc \ target-libstdc++-v3 \ target-libmudflap \ target-libsanitizer \ + target-libvtv \ target-libssp \ target-libquadmath \ target-libgfortran \ @@ -554,6 +555,22 @@ if test -d ${srcdir}/libsanitizer; then fi fi +# Disable libvtv on unsupported systems. +if test -d ${srcdir}/libvtv; then + if test x$enable_libvtv = x; then + AC_MSG_CHECKING([for libvtv support]) + if (srcdir=${srcdir}/libvtv; \ + . ${srcdir}/configure.tgt; \ + test "$VTV_SUPPORTED" != "yes") + then + AC_MSG_RESULT([no]) + noconfigdirs="$noconfigdirs target-libvtv" + else + AC_MSG_RESULT([yes]) + fi + fi +fi + # Disable libquadmath for some systems. case "${target}" in avr-*-*) @@ -1636,6 +1653,9 @@ if test "x$with_isl" != "xno" && ISL_CHECK_VERSION(0,10) if test "${gcc_cv_isl}" = no ; then ISL_CHECK_VERSION(0,11) + if test "${gcc_cv_isl}" = no ; then + ISL_CHECK_VERSION(0,12) + fi fi dnl Only execute fail-action, if ISL has been requested. ISL_IF_FAILED([ @@ -2037,11 +2057,11 @@ case ,${enable_languages},:${enable_objc_gc} in ;; esac -# Disable libitm and libsanitizer if we're not building C++ +# Disable libitm, libsanitizer, libvtv if we're not building C++ case ,${enable_languages}, in *,c++,*) ;; *) - noconfigdirs="$noconfigdirs target-libitm target-libsanitizer" + noconfigdirs="$noconfigdirs target-libitm target-libsanitizer target-libvtv" ;; esac @@ -2467,6 +2487,11 @@ if echo " ${target_configdirs} " | grep " libsanitizer " > /dev/null 2>&1 ; then bootstrap_target_libs=${bootstrap_target_libs}target-libsanitizer, fi +# If we are building libvtv, bootstrap it. +if echo " ${target_configdirs} " | grep " libvtv " > /dev/null 2>&1 ; then + bootstrap_target_libs=${bootstrap_target_libs}target-libvtv, +fi + # Determine whether gdb needs tk/tcl or not. # Use 'maybe' since enable_gdbtk might be true even if tk isn't available # and in that case we want gdb to be built without tk. Ugh! @@ -2854,13 +2879,6 @@ if test x${is_cross_compiler} = xyes ; then target_configargs="--with-cross-host=${host_noncanonical} ${target_configargs}" fi -# Pass --with-sysroot on darwin without SDK in / -case "${target}" in - x86_64-*-darwin1[[3-9]]*) - host_configargs="--with-sysroot=\"`xcrun --show-sdk-path`\" ${host_configargs}" - ;; -esac - # Default to --enable-multilib. if test x${enable_multilib} = x ; then target_configargs="--enable-multilib ${target_configargs}" @@ -3170,10 +3188,10 @@ GCC_TARGET_TOOL(as, AS_FOR_TARGET, AS, [gas/as-new]) GCC_TARGET_TOOL(cc, CC_FOR_TARGET, CC, [gcc/xgcc -B$$r/$(HOST_SUBDIR)/gcc/]) dnl see comments for CXX_FOR_TARGET_FLAG_TO_PASS GCC_TARGET_TOOL(c++, CXX_FOR_TARGET, CXX, - [gcc/xg++ -B$$r/$(HOST_SUBDIR)/gcc/ -nostdinc++ `if test -f $$r/$(TARGET_SUBDIR)/libstdc++-v3/scripts/testsuite_flags; then $(SHELL) $$r/$(TARGET_SUBDIR)/libstdc++-v3/scripts/testsuite_flags --build-includes; else echo -funconfigured-libstdc++-v3 ; fi` -L$$r/$(TARGET_SUBDIR)/libstdc++-v3/src -L$$r/$(TARGET_SUBDIR)/libstdc++-v3/src/.libs], + [gcc/xg++ -B$$r/$(HOST_SUBDIR)/gcc/ -nostdinc++ `if test -f $$r/$(TARGET_SUBDIR)/libstdc++-v3/scripts/testsuite_flags; then $(SHELL) $$r/$(TARGET_SUBDIR)/libstdc++-v3/scripts/testsuite_flags --build-includes; else echo -funconfigured-libstdc++-v3 ; fi` -L$$r/$(TARGET_SUBDIR)/libstdc++-v3/src -L$$r/$(TARGET_SUBDIR)/libstdc++-v3/src/.libs -L$$r/$(TARGET_SUBDIR)/libstdc++-v3/libsupc++/.libs], c++) GCC_TARGET_TOOL(c++ for libstdc++, RAW_CXX_FOR_TARGET, CXX, - [gcc/xgcc -shared-libgcc -B$$r/$(HOST_SUBDIR)/gcc -nostdinc++ -L$$r/$(TARGET_SUBDIR)/libstdc++-v3/src -L$$r/$(TARGET_SUBDIR)/libstdc++-v3/src/.libs], + [gcc/xgcc -shared-libgcc -B$$r/$(HOST_SUBDIR)/gcc -nostdinc++ -L$$r/$(TARGET_SUBDIR)/libstdc++-v3/src -L$$r/$(TARGET_SUBDIR)/libstdc++-v3/src/.libs -L$$r/$(TARGET_SUBDIR)/libstdc++-v3/libsupc++/.libs], c++) GCC_TARGET_TOOL(dlltool, DLLTOOL_FOR_TARGET, DLLTOOL, [binutils/dlltool]) GCC_TARGET_TOOL(gcc, GCC_FOR_TARGET, , [gcc/xgcc -B$$r/$(HOST_SUBDIR)/gcc/]) diff --git a/contrib/ChangeLog b/contrib/ChangeLog index 1233e6acaea..d99dc3417bb 100644 --- a/contrib/ChangeLog +++ b/contrib/ChangeLog @@ -1,3 +1,15 @@ +2013-08-31 Diego Novillo + + * testsuite-management/x86_64-unknown-linux-gnu.xfail: Update. + +2013-08-29 Mike Stump + + * gcc_update (configure): Update to handle svn 1.8.1. + +2013-08-03 Caroline Tice4 + + * gcc_update: Add libvtv files. + 2013-06-06 Brooks Moses * testsuite-management/validate_failures.py: Fix handling of diff --git a/contrib/gcc_update b/contrib/gcc_update index 10a5970f621..bdf89c4d128 100755 --- a/contrib/gcc_update +++ b/contrib/gcc_update @@ -158,6 +158,9 @@ libsanitizer/configure: libsanitizer/configure.ac libsanitizer/aclocal.m4 libsanitizer/asan/Makefile.in: libsanitizer/asan/Makefile.am libsanitizer/aclocal.m4 libsanitizer/interception/Makefile.in: libsanitizer/interception/Makefile.am libsanitizer/aclocal.m4 libsanitizer/sanitizer_common/Makefile.in: libsanitizer/sanitizer_common/Makefile.am libsanitizer/aclocal.m4 +libvtv/aclocal.m4: libvtv/configure.ac libvtv/acinclude.m4 +libvtv/Makefile.in: libvtv/Makefile.am libvtv/aclocal.m4 +libvtv/configure: libvtv/configure.ac libvtv/aclocal.m4 # Top level Makefile.in: Makefile.tpl Makefile.def configure: configure.ac config/acx.m4 @@ -382,7 +385,7 @@ case $vcs_type in fi revision=`$GCC_SVN info | awk '/Revision:/ { print $2 }'` - branch=`$GCC_SVN info | sed -ne "/URL:/ { + branch=`$GCC_SVN info | sed -ne "/^URL:/ { s,.*/trunk,trunk, s,.*/branches/,, s,.*/tags/,, diff --git a/contrib/testsuite-management/x86_64-unknown-linux-gnu.xfail b/contrib/testsuite-management/x86_64-unknown-linux-gnu.xfail index 44460616914..32f2b0d04ce 100644 --- a/contrib/testsuite-management/x86_64-unknown-linux-gnu.xfail +++ b/contrib/testsuite-management/x86_64-unknown-linux-gnu.xfail @@ -1,135 +1,122 @@ -FAIL: g++.dg/other/anon5.C -std=gnu++98 (test for excess errors) -FAIL: g++.dg/other/anon5.C -std=gnu++11 (test for excess errors) -FAIL: gcc.c-torture/compile/pr44119.c -Os (test for excess errors) -FAIL: gcc.c-torture/compile/pr44119.c -O3 -fomit-frame-pointer (internal compiler error) -FAIL: gcc.c-torture/compile/pr44119.c -O3 -fomit-frame-pointer -funroll-loops (test for excess errors) -FAIL: gcc.c-torture/compile/pr44119.c -O2 -flto -flto-partition=none (internal compiler error) -FAIL: gcc.c-torture/compile/pr44119.c -O2 -flto -flto-partition=none (test for excess errors) -FAIL: gcc.c-torture/compile/pr44119.c -O2 (test for excess errors) -FAIL: gcc.c-torture/compile/pr44119.c -O2 -flto (internal compiler error) -FAIL: gcc.c-torture/compile/pr44119.c -O3 -g (internal compiler error) -FAIL: gcc.c-torture/compile/pr44119.c -O2 -flto (test for excess errors) -FAIL: gcc.c-torture/compile/pr44119.c -O1 (test for excess errors) -FAIL: gcc.c-torture/compile/pr44119.c -O3 -fomit-frame-pointer (test for excess errors) -FAIL: gcc.c-torture/compile/pr44119.c -O3 -g (test for excess errors) -FAIL: gcc.c-torture/compile/pr44119.c -O3 -fomit-frame-pointer -funroll-all-loops -finline-functions (test for excess errors) -FAIL: gcc.c-torture/compile/pr44119.c -O2 (internal compiler error) -FAIL: gcc.c-torture/compile/pr44119.c -O3 -fomit-frame-pointer -funroll-all-loops -finline-functions (internal compiler error) -FAIL: gcc.c-torture/compile/pr44119.c -Os (internal compiler error) -FAIL: gcc.c-torture/compile/pr44119.c -O1 (internal compiler error) -FAIL: gcc.c-torture/compile/pr44119.c -O3 -fomit-frame-pointer -funroll-loops (internal compiler error) -UNRESOLVED: gcc.dg/attr-weakref-1.c compilation failed to produce executable -FAIL: gcc.dg/attr-weakref-1.c (test for excess errors) -FAIL: gcc.dg/autopar/pr49960.c scan-tree-dump-times optimized "loopfn" 0 -FAIL: gcc.dg/autopar/pr49960.c scan-tree-dump-times parloops "SUCCESS: may be parallelized" 0 -FAIL: gcc.dg/builtin-object-size-8.c execution test -FAIL: gcc.dg/cproj-fails-with-broken-glibc.c execution test -XPASS: gcc.dg/guality/example.c -O2 execution test -XPASS: gcc.dg/guality/example.c -O2 -flto execution test +FAIL: g++.dg/guality/pr55665.C -O2 line 23 p == 40 +FAIL: g++.dg/guality/pr55665.C -O3 -fomit-frame-pointer line 23 p == 40 +FAIL: g++.dg/guality/pr55665.C -O3 -g line 23 p == 40 +FAIL: gcc.dg/attr-ifunc-4.c execution test XPASS: gcc.dg/guality/example.c -O0 execution test -XPASS: gcc.dg/guality/example.c -O2 -flto -flto-partition=none execution test -XPASS: gcc.dg/guality/guality.c -O2 -flto -flto-partition=none execution test -XPASS: gcc.dg/guality/guality.c -O3 -fomit-frame-pointer execution test +XPASS: gcc.dg/guality/example.c -O2 execution test +XPASS: gcc.dg/guality/example.c -O2 -flto -fno-use-linker-plugin -flto-partition=none execution test XPASS: gcc.dg/guality/guality.c -O0 execution test +XPASS: gcc.dg/guality/guality.c -O1 execution test +XPASS: gcc.dg/guality/guality.c -O2 execution test +XPASS: gcc.dg/guality/guality.c -O3 -fomit-frame-pointer execution test XPASS: gcc.dg/guality/guality.c -O3 -g execution test XPASS: gcc.dg/guality/guality.c -Os execution test -XPASS: gcc.dg/guality/guality.c -O2 -flto execution test -XPASS: gcc.dg/guality/guality.c -O2 execution test -XPASS: gcc.dg/guality/guality.c -O1 execution test +XPASS: gcc.dg/guality/guality.c -O2 -flto -fno-use-linker-plugin -flto-partition=none execution test +XPASS: gcc.dg/guality/guality.c -O2 -flto -fuse-linker-plugin -fno-fat-lto-objects execution test XPASS: gcc.dg/guality/inline-params.c -O2 execution test XPASS: gcc.dg/guality/inline-params.c -O3 -fomit-frame-pointer execution test -XPASS: gcc.dg/guality/inline-params.c -O2 -flto execution test -XPASS: gcc.dg/guality/inline-params.c -Os execution test XPASS: gcc.dg/guality/inline-params.c -O3 -g execution test -XPASS: gcc.dg/guality/inline-params.c -O2 -flto -flto-partition=none execution test -XPASS: gcc.dg/guality/pr41353-1.c -O3 -g line 28 j == 28 + 37 +XPASS: gcc.dg/guality/inline-params.c -Os execution test +XPASS: gcc.dg/guality/inline-params.c -O2 -flto -fno-use-linker-plugin -flto-partition=none execution test +XPASS: gcc.dg/guality/pr41353-1.c -O0 line 28 j == 28 + 37 XPASS: gcc.dg/guality/pr41353-1.c -O1 line 28 j == 28 + 37 -XPASS: gcc.dg/guality/pr41353-1.c -O2 -flto -flto-partition=none line 28 j == 28 + 37 XPASS: gcc.dg/guality/pr41353-1.c -O2 line 28 j == 28 + 37 -XPASS: gcc.dg/guality/pr41353-1.c -O2 -flto line 28 j == 28 + 37 -XPASS: gcc.dg/guality/pr41353-1.c -Os line 28 j == 28 + 37 XPASS: gcc.dg/guality/pr41353-1.c -O3 -fomit-frame-pointer line 28 j == 28 + 37 -XPASS: gcc.dg/guality/pr41353-1.c -O0 line 28 j == 28 + 37 +XPASS: gcc.dg/guality/pr41353-1.c -O3 -g line 28 j == 28 + 37 +XPASS: gcc.dg/guality/pr41353-1.c -Os line 28 j == 28 + 37 +XPASS: gcc.dg/guality/pr41353-1.c -O2 -flto -fno-use-linker-plugin -flto-partition=none line 28 j == 28 + 37 +XPASS: gcc.dg/guality/pr41353-1.c -O2 -flto -fuse-linker-plugin -fno-fat-lto-objects line 28 j == 28 + 37 +XPASS: gcc.dg/guality/pr41447-1.c -O0 execution test XPASS: gcc.dg/guality/pr41447-1.c -O1 execution test -XPASS: gcc.dg/guality/pr41447-1.c -Os execution test XPASS: gcc.dg/guality/pr41447-1.c -O2 execution test XPASS: gcc.dg/guality/pr41447-1.c -O3 -fomit-frame-pointer execution test -XPASS: gcc.dg/guality/pr41447-1.c -O0 execution test XPASS: gcc.dg/guality/pr41447-1.c -O3 -g execution test -XPASS: gcc.dg/guality/pr41616-1.c -O2 -flto -flto-partition=none execution test +XPASS: gcc.dg/guality/pr41447-1.c -Os execution test +XPASS: gcc.dg/guality/pr41447-1.c -O2 -flto -fno-use-linker-plugin -flto-partition=none execution test +XPASS: gcc.dg/guality/pr41447-1.c -O2 -flto -fuse-linker-plugin -fno-fat-lto-objects execution test XPASS: gcc.dg/guality/pr41616-1.c -O0 execution test +XPASS: gcc.dg/guality/pr41616-1.c -O1 execution test +XPASS: gcc.dg/guality/pr41616-1.c -O2 execution test XPASS: gcc.dg/guality/pr41616-1.c -O3 -fomit-frame-pointer execution test XPASS: gcc.dg/guality/pr41616-1.c -O3 -g execution test -XPASS: gcc.dg/guality/pr41616-1.c -O1 execution test XPASS: gcc.dg/guality/pr41616-1.c -Os execution test -XPASS: gcc.dg/guality/pr41616-1.c -O2 execution test -XPASS: gcc.dg/guality/pr41616-1.c -O2 -flto execution test +XPASS: gcc.dg/guality/pr41616-1.c -O2 -flto -fno-use-linker-plugin -flto-partition=none execution test +FAIL: gcc.dg/guality/pr43051-1.c -O3 -fomit-frame-pointer -funroll-loops line 39 c == &a[0] +FAIL: gcc.dg/guality/pr43051-1.c -O3 -fomit-frame-pointer -funroll-all-loops -finline-functions line 39 c == &a[0] +FAIL: gcc.dg/guality/pr48437.c -O2 -flto -fno-use-linker-plugin -flto-partition=none line 14 i == 0 +FAIL: gcc.dg/guality/pr48437.c -O2 -flto -fuse-linker-plugin -fno-fat-lto-objects line 14 i == 0 FAIL: gcc.dg/guality/pr54200.c -Os line 20 z == 3 -FAIL: gcc.dg/guality/pr54519-1.c -O2 -flto -flto-partition=none line 23 y == 117 -FAIL: gcc.dg/guality/pr54519-1.c -O2 -flto line 20 y == 25 -FAIL: gcc.dg/guality/pr54519-1.c -O2 -flto line 23 z == 8 -FAIL: gcc.dg/guality/pr54519-1.c -O2 -flto line 20 z == 6 -FAIL: gcc.dg/guality/pr54519-1.c -O2 -flto -flto-partition=none line 23 z == 8 -FAIL: gcc.dg/guality/pr54519-1.c -O2 -flto line 23 y == 117 -FAIL: gcc.dg/guality/pr54519-1.c -O2 -flto -flto-partition=none line 20 z == 6 -FAIL: gcc.dg/guality/pr54519-1.c -O2 -flto -flto-partition=none line 20 y == 25 -FAIL: gcc.dg/guality/pr54519-2.c -O2 -flto -flto-partition=none line 17 y == 25 -FAIL: gcc.dg/guality/pr54519-2.c -O2 -flto line 17 y == 25 -FAIL: gcc.dg/guality/pr54519-5.c -O2 -flto line 17 y == 25 -FAIL: gcc.dg/guality/pr54519-5.c -O2 -flto -flto-partition=none line 17 y == 25 -FAIL: gcc.dg/guality/pr54519-5.c -Os line 17 y == 25 -FAIL: gcc.dg/guality/pr54519-5.c -O3 -fomit-frame-pointer line 17 y == 25 -FAIL: gcc.dg/guality/pr54519-5.c -O2 line 17 y == 25 -FAIL: gcc.dg/guality/pr54519-5.c -O3 -g line 17 y == 25 -FAIL: gcc.dg/guality/vla-1.c -O2 -flto line 24 sizeof (a) == 17 * sizeof (short) -FAIL: gcc.dg/guality/vla-1.c -O2 -flto line 17 sizeof (a) == 6 -FAIL: gcc.dg/guality/vla-1.c -O3 -g line 24 sizeof (a) == 17 * sizeof (short) -FAIL: gcc.dg/guality/vla-1.c -O3 -fomit-frame-pointer line 24 sizeof (a) == 17 * sizeof (short) +FAIL: gcc.dg/guality/pr54519-1.c -O2 -flto -fno-use-linker-plugin -flto-partition=none line 20 y == 25 +FAIL: gcc.dg/guality/pr54519-1.c -O2 -flto -fno-use-linker-plugin -flto-partition=none line 20 z == 6 +FAIL: gcc.dg/guality/pr54519-1.c -O2 -flto -fno-use-linker-plugin -flto-partition=none line 23 y == 117 +FAIL: gcc.dg/guality/pr54519-1.c -O2 -flto -fno-use-linker-plugin -flto-partition=none line 23 z == 8 +FAIL: gcc.dg/guality/pr54519-1.c -O2 -flto -fuse-linker-plugin -fno-fat-lto-objects line 20 y == 25 +FAIL: gcc.dg/guality/pr54519-1.c -O2 -flto -fuse-linker-plugin -fno-fat-lto-objects line 20 z == 6 +FAIL: gcc.dg/guality/pr54519-2.c -O2 -flto -fno-use-linker-plugin -flto-partition=none line 17 y == 25 +FAIL: gcc.dg/guality/pr54519-2.c -O2 -flto -fuse-linker-plugin -fno-fat-lto-objects line 17 y == 25 +FAIL: gcc.dg/guality/pr54519-3.c -O2 -flto -fno-use-linker-plugin -flto-partition=none line 20 y == 25 +FAIL: gcc.dg/guality/pr54519-3.c -O2 -flto -fno-use-linker-plugin -flto-partition=none line 20 z == 6 +FAIL: gcc.dg/guality/pr54519-3.c -O2 -flto -fno-use-linker-plugin -flto-partition=none line 23 y == 117 +FAIL: gcc.dg/guality/pr54519-3.c -O2 -flto -fno-use-linker-plugin -flto-partition=none line 23 z == 8 +FAIL: gcc.dg/guality/pr54519-3.c -O2 -flto -fuse-linker-plugin -fno-fat-lto-objects line 20 y == 25 +FAIL: gcc.dg/guality/pr54519-3.c -O2 -flto -fuse-linker-plugin -fno-fat-lto-objects line 20 z == 6 +FAIL: gcc.dg/guality/pr54519-3.c -O2 -flto -fuse-linker-plugin -fno-fat-lto-objects line 23 y == 117 +FAIL: gcc.dg/guality/pr54519-3.c -O2 -flto -fuse-linker-plugin -fno-fat-lto-objects line 23 z == 8 +FAIL: gcc.dg/guality/pr54519-4.c -O2 -flto -fno-use-linker-plugin -flto-partition=none line 17 y == 25 +FAIL: gcc.dg/guality/pr54519-4.c -O2 -flto -fuse-linker-plugin -fno-fat-lto-objects line 17 y == 25 +FAIL: gcc.dg/guality/pr54519-5.c -O2 -flto -fno-use-linker-plugin -flto-partition=none line 17 y == 25 +FAIL: gcc.dg/guality/pr54519-5.c -O2 -flto -fuse-linker-plugin -fno-fat-lto-objects line 17 y == 25 +FAIL: gcc.dg/guality/pr54693-2.c -Os line 21 x == 10 - i +FAIL: gcc.dg/guality/pr54693-2.c -Os line 21 y == 20 - 2 * i +FAIL: gcc.dg/guality/vla-1.c -O0 line 17 sizeof (a) == 6 +FAIL: gcc.dg/guality/vla-1.c -O0 line 24 sizeof (a) == 17 * sizeof (short) +FAIL: gcc.dg/guality/vla-1.c -O1 line 17 sizeof (a) == 6 FAIL: gcc.dg/guality/vla-1.c -O1 line 24 sizeof (a) == 17 * sizeof (short) -FAIL: gcc.dg/guality/vla-1.c -Os line 17 sizeof (a) == 6 -FAIL: gcc.dg/guality/vla-1.c -O2 -flto -flto-partition=none line 17 sizeof (a) == 6 FAIL: gcc.dg/guality/vla-1.c -O2 line 17 sizeof (a) == 6 -FAIL: gcc.dg/guality/vla-1.c -O3 -fomit-frame-pointer line 17 sizeof (a) == 6 -FAIL: gcc.dg/guality/vla-1.c -O0 line 24 sizeof (a) == 17 * sizeof (short) -FAIL: gcc.dg/guality/vla-1.c -O2 -flto -flto-partition=none line 24 sizeof (a) == 17 * sizeof (short) FAIL: gcc.dg/guality/vla-1.c -O2 line 24 sizeof (a) == 17 * sizeof (short) -FAIL: gcc.dg/guality/vla-1.c -O1 line 17 sizeof (a) == 6 +FAIL: gcc.dg/guality/vla-1.c -O3 -fomit-frame-pointer line 17 sizeof (a) == 6 +FAIL: gcc.dg/guality/vla-1.c -O3 -fomit-frame-pointer line 24 sizeof (a) == 17 * sizeof (short) FAIL: gcc.dg/guality/vla-1.c -O3 -g line 17 sizeof (a) == 6 -FAIL: gcc.dg/guality/vla-1.c -O0 line 17 sizeof (a) == 6 +FAIL: gcc.dg/guality/vla-1.c -O3 -g line 24 sizeof (a) == 17 * sizeof (short) +FAIL: gcc.dg/guality/vla-1.c -Os line 17 sizeof (a) == 6 FAIL: gcc.dg/guality/vla-1.c -Os line 24 sizeof (a) == 17 * sizeof (short) -FAIL: gcc.dg/guality/vla-2.c -O3 -g line 25 sizeof (a) == 6 * sizeof (int) -FAIL: gcc.dg/guality/vla-2.c -O0 line 25 sizeof (a) == 6 * sizeof (int) +FAIL: gcc.dg/guality/vla-1.c -O2 -flto -fno-use-linker-plugin -flto-partition=none line 17 sizeof (a) == 6 +FAIL: gcc.dg/guality/vla-1.c -O2 -flto -fno-use-linker-plugin -flto-partition=none line 24 sizeof (a) == 17 * sizeof (short) +FAIL: gcc.dg/guality/vla-1.c -O2 -flto -fuse-linker-plugin -fno-fat-lto-objects line 17 sizeof (a) == 6 +FAIL: gcc.dg/guality/vla-1.c -O2 -flto -fuse-linker-plugin -fno-fat-lto-objects line 24 sizeof (a) == 17 * sizeof (short) FAIL: gcc.dg/guality/vla-2.c -O0 line 16 sizeof (a) == 5 * sizeof (int) -FAIL: gcc.dg/guality/vla-2.c -O3 -fomit-frame-pointer line 16 sizeof (a) == 5 * sizeof (int) -FAIL: gcc.dg/guality/vla-2.c -O2 -flto line 25 sizeof (a) == 6 * sizeof (int) -FAIL: gcc.dg/guality/vla-2.c -Os line 25 sizeof (a) == 6 * sizeof (int) -FAIL: gcc.dg/guality/vla-2.c -O2 line 16 sizeof (a) == 5 * sizeof (int) -FAIL: gcc.dg/guality/vla-2.c -O1 line 25 sizeof (a) == 6 * sizeof (int) -FAIL: gcc.dg/guality/vla-2.c -O2 -flto line 16 sizeof (a) == 5 * sizeof (int) -FAIL: gcc.dg/guality/vla-2.c -O3 -g line 16 sizeof (a) == 5 * sizeof (int) +FAIL: gcc.dg/guality/vla-2.c -O0 line 25 sizeof (a) == 6 * sizeof (int) FAIL: gcc.dg/guality/vla-2.c -O1 line 16 sizeof (a) == 5 * sizeof (int) +FAIL: gcc.dg/guality/vla-2.c -O1 line 25 sizeof (a) == 6 * sizeof (int) +FAIL: gcc.dg/guality/vla-2.c -O2 line 16 sizeof (a) == 5 * sizeof (int) FAIL: gcc.dg/guality/vla-2.c -O2 line 25 sizeof (a) == 6 * sizeof (int) -FAIL: gcc.dg/guality/vla-2.c -O2 -flto -flto-partition=none line 25 sizeof (a) == 6 * sizeof (int) -FAIL: gcc.dg/guality/vla-2.c -O2 -flto -flto-partition=none line 16 sizeof (a) == 5 * sizeof (int) -FAIL: gcc.dg/guality/vla-2.c -Os line 16 sizeof (a) == 5 * sizeof (int) +FAIL: gcc.dg/guality/vla-2.c -O3 -fomit-frame-pointer line 16 sizeof (a) == 5 * sizeof (int) FAIL: gcc.dg/guality/vla-2.c -O3 -fomit-frame-pointer line 25 sizeof (a) == 6 * sizeof (int) -XPASS: gcc.dg/inline_3.c (test for excess errors) -XPASS: gcc.dg/inline_4.c (test for excess errors) -FAIL: gcc.dg/torture/pr51106-2.c -O2 -flto (test for excess errors) -FAIL: gcc.dg/torture/pr51106-2.c -O1 (test for excess errors) -FAIL: gcc.dg/torture/pr51106-2.c -O3 -g (test for excess errors) -FAIL: gcc.dg/torture/pr51106-2.c -Os (test for excess errors) -FAIL: gcc.dg/torture/pr51106-2.c -O0 (test for excess errors) -FAIL: gcc.dg/torture/pr51106-2.c -O3 -fomit-frame-pointer (test for excess errors) -FAIL: gcc.dg/torture/pr51106-2.c -O2 -flto -flto-partition=none (test for excess errors) -FAIL: gcc.dg/torture/pr51106-2.c -O2 (test for excess errors) -XPASS: gcc.dg/unroll_2.c (test for excess errors) -XPASS: gcc.dg/unroll_3.c (test for excess errors) -XPASS: gcc.dg/unroll_4.c (test for excess errors) -FAIL: libmudflap.c++/pass55-frag.cxx (-O2) execution test -FAIL: libmudflap.c++/pass55-frag.cxx ( -O) execution test -FAIL: libmudflap.c++/pass55-frag.cxx (-O3) execution test -FAIL: libmudflap.c/fail37-frag.c (-O3) output pattern test -FAIL: libmudflap.c/fail37-frag.c (-O2) output pattern test -FAIL: libmudflap.c/fail37-frag.c (-O3) crash test -FAIL: libmudflap.c/fail37-frag.c (-O2) crash test +FAIL: gcc.dg/guality/vla-2.c -O3 -g line 16 sizeof (a) == 5 * sizeof (int) +FAIL: gcc.dg/guality/vla-2.c -O3 -g line 25 sizeof (a) == 6 * sizeof (int) +FAIL: gcc.dg/guality/vla-2.c -Os line 16 sizeof (a) == 5 * sizeof (int) +FAIL: gcc.dg/guality/vla-2.c -Os line 25 sizeof (a) == 6 * sizeof (int) +FAIL: gcc.dg/guality/vla-2.c -O2 -flto -fno-use-linker-plugin -flto-partition=none line 16 sizeof (a) == 5 * sizeof (int) +FAIL: gcc.dg/guality/vla-2.c -O2 -flto -fno-use-linker-plugin -flto-partition=none line 25 sizeof (a) == 6 * sizeof (int) +FAIL: gcc.dg/guality/vla-2.c -O2 -flto -fuse-linker-plugin -fno-fat-lto-objects line 16 sizeof (a) == 5 * sizeof (int) +FAIL: gcc.dg/guality/vla-2.c -O2 -flto -fuse-linker-plugin -fno-fat-lto-objects line 25 sizeof (a) == 6 * sizeof (int) +FAIL: gcc.dg/lto/20090218-1 c_lto_20090218-1_0.o-c_lto_20090218-1_1.o link, -O0 -flto -flto-partition=none -fuse-linker-plugin +UNRESOLVED: gcc.dg/lto/20090218-1 c_lto_20090218-1_0.o-c_lto_20090218-1_1.o execute -O0 -flto -flto-partition=none -fuse-linker-plugin +FAIL: gcc.dg/lto/20090218-1 c_lto_20090218-1_0.o-c_lto_20090218-1_1.o link, -O0 -flto -flto-partition=1to1 -fno-use-linker-plugin +UNRESOLVED: gcc.dg/lto/20090218-1 c_lto_20090218-1_0.o-c_lto_20090218-1_1.o execute -O0 -flto -flto-partition=1to1 -fno-use-linker-plugin +FAIL: gcc.dg/lto/20090218-1 c_lto_20090218-1_0.o-c_lto_20090218-1_1.o link, -O0 -flto -fuse-linker-plugin -fno-fat-lto-objects +UNRESOLVED: gcc.dg/lto/20090218-1 c_lto_20090218-1_0.o-c_lto_20090218-1_1.o execute -O0 -flto -fuse-linker-plugin -fno-fat-lto-objects +FAIL: gfortran.dg/round_4.f90 -O0 execution test +FAIL: gfortran.dg/round_4.f90 -O1 execution test +FAIL: gfortran.dg/round_4.f90 -O2 execution test +FAIL: gfortran.dg/round_4.f90 -O3 -fomit-frame-pointer execution test +FAIL: gfortran.dg/round_4.f90 -O3 -fomit-frame-pointer -funroll-loops execution test +FAIL: gfortran.dg/round_4.f90 -O3 -fomit-frame-pointer -funroll-all-loops -finline-functions execution test +FAIL: gfortran.dg/round_4.f90 -O3 -g execution test +FAIL: gfortran.dg/round_4.f90 -Os execution test +FAIL: libmudflap.c++/pass41-frag.cxx ( -O) execution test +FAIL: libmudflap.c++/pass41-frag.cxx (-O2) execution test +FAIL: libmudflap.c++/pass41-frag.cxx (-O3) execution test +FAIL: sourcelocation output - source compiled test +FAIL: sourcelocation -findirect-dispatch output - source compiled test +FAIL: sourcelocation -O3 output - source compiled test diff --git a/fixincludes/ChangeLog b/fixincludes/ChangeLog index 8471e027f89..4a9d22997ff 100644 --- a/fixincludes/ChangeLog +++ b/fixincludes/ChangeLog @@ -1,3 +1,9 @@ +2013-09-02 David Edelsohn + + * inclhack.def (aix_assert): New fix. + * fixincl.x: Regenerate. + * tests/base/assert.h [AIX_ASSERT_CHECK]: New check. + 2013-07-06 Bruce Korb * inclhack.def (cdef_cplusplus): removed, per Bug 51776 diff --git a/fixincludes/fixincl.x b/fixincludes/fixincl.x index 52cbc11bf98..cf35c5a25a0 100644 --- a/fixincludes/fixincl.x +++ b/fixincludes/fixincl.x @@ -2,11 +2,11 @@ * * DO NOT EDIT THIS FILE (fixincl.x) * - * It has been AutoGen-ed July 12, 2013 at 10:18:23 AM by AutoGen 5.17.3 + * It has been AutoGen-ed September 9, 2013 at 03:29:05 PM by AutoGen 5.18 * From the definitions inclhack.def * and the template file fixincl */ -/* DO NOT SVN-MERGE THIS FILE, EITHER Fri Jul 12 10:18:23 CEST 2013 +/* DO NOT SVN-MERGE THIS FILE, EITHER Mon Sep 9 15:29:05 MEST 2013 * * You must regenerate it. Use the ./genfixes script. * @@ -15,7 +15,7 @@ * certain ANSI-incompatible system header files which are fixed to work * correctly with ANSI C and placed in a directory that GNU C will search. * - * This file contains 227 fixup descriptions. + * This file contains 228 fixup descriptions. * * See README for more information. * @@ -734,6 +734,45 @@ static const char* apzAab_Vxworks_UnistdPatch[] = { #endif /* _UNISTD_H */", (char*)NULL }; +/* * * * * * * * * * * * * * * * * * * * * * * * * * + * + * Description of Aix_Assert fix + */ +tSCC zAix_AssertName[] = + "aix_assert"; + +/* + * File name selection pattern + */ +tSCC zAix_AssertList[] = + "assert.h\0"; +/* + * Machine/OS name selection pattern + */ +tSCC* apzAix_AssertMachs[] = { + "*-*-aix*", + (const char*)NULL }; + +/* + * content selection pattern - do fix if pattern found + */ +tSCC zAix_AssertSelect0[] = + "#define[ \t]static_assert[ \t]_Static_assert"; + +#define AIX_ASSERT_TEST_CT 1 +static tTestDesc aAix_AssertTests[] = { + { TT_EGREP, zAix_AssertSelect0, (regex_t*)NULL }, }; + +/* + * Fix Command Arguments for Aix_Assert + */ +static const char* apzAix_AssertPatch[] = { + "format", + "#ifndef __cplusplus\n\ +%0\n\ +#endif", + (char*)NULL }; + /* * * * * * * * * * * * * * * * * * * * * * * * * * * * Description of Aix_Complex fix @@ -9329,9 +9368,9 @@ static const char* apzComplier_H_TradcppPatch[] = { * * List of all fixes */ -#define REGEX_COUNT 266 +#define REGEX_COUNT 267 #define MACH_LIST_SIZE_LIMIT 187 -#define FIX_COUNT 227 +#define FIX_COUNT 228 /* * Enumerate the fixes @@ -9349,6 +9388,7 @@ typedef enum { AAB_VXWORKS_REGS_VXTYPES_FIXIDX, AAB_VXWORKS_STDINT_FIXIDX, AAB_VXWORKS_UNISTD_FIXIDX, + AIX_ASSERT_FIXIDX, AIX_COMPLEX_FIXIDX, AIX_MALLOC_FIXIDX, AIX_NET_IF_ARP_FIXIDX, @@ -9627,6 +9667,11 @@ tFixDesc fixDescList[ FIX_COUNT ] = { AAB_VXWORKS_UNISTD_TEST_CT, FD_MACH_ONLY | FD_REPLACEMENT, aAab_Vxworks_UnistdTests, apzAab_Vxworks_UnistdPatch, 0 }, + { zAix_AssertName, zAix_AssertList, + apzAix_AssertMachs, + AIX_ASSERT_TEST_CT, FD_MACH_ONLY | FD_SUBROUTINE, + aAix_AssertTests, apzAix_AssertPatch, 0 }, + { zAix_ComplexName, zAix_ComplexList, apzAix_ComplexMachs, AIX_COMPLEX_TEST_CT, FD_MACH_ONLY | FD_SUBROUTINE, diff --git a/fixincludes/inclhack.def b/fixincludes/inclhack.def index c92170f4cd9..996356a3930 100644 --- a/fixincludes/inclhack.def +++ b/fixincludes/inclhack.def @@ -568,6 +568,20 @@ fix = { _EndOfHeader_; }; +/* + * assert.h on AIX 7 redefines static_assert as _Static_assert without + * protecting C++. + */ +fix = { + hackname = aix_assert; + mach = "*-*-aix*"; + files = assert.h; + select = "#define[ \t]static_assert[ \t]_Static_assert"; + c_fix = format; + c_fix_arg = "#ifndef __cplusplus\n%0\n#endif"; + test_text = "#define static_assert _Static_assert"; +}; + /* * complex.h on AIX 5 and AIX 6 define _Complex_I and I in terms of __I, * which only is provided by AIX xlc C99. diff --git a/fixincludes/tests/base/assert.h b/fixincludes/tests/base/assert.h index 2642cbe49c4..19dc52575ee 100644 --- a/fixincludes/tests/base/assert.h +++ b/fixincludes/tests/base/assert.h @@ -19,6 +19,13 @@ #include +#if defined( AIX_ASSERT_CHECK ) +#ifndef __cplusplus +#define static_assert _Static_assert +#endif +#endif /* AIX_ASSERT_CHECK */ + + #if defined( ALPHA___ASSERT_CHECK ) extern void __assert(const char *, const char *, int); #endif /* ALPHA___ASSERT_CHECK */ diff --git a/gcc/ChangeLog b/gcc/ChangeLog index db8db6f894a..ceaf0e50e55 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,5683 @@ +2013-09-09 Jan Hubicka + + PR middle-end/58294 + * value-prof.c (gimple_ic): Copy also abnormal edges. + +2013-09-09 Richard Sandiford + + * asan.c (asan_shadow_cst): Use gen_int_mode. + +2013-09-08 Jan Hubicka + + * ipa-profile.c: Add toplevel comment. + (ipa_propagate_frequency_1): Be more conservative when profile is read. + (contains_hot_call_p): New function. + (ipa_propagate_frequency): Set frequencies based on counts when + profile is read. + * predict.c (compute_function_frequency): Use PROFILE_READ gueard for + profile; do not tamper with profile after inlining if it is read. + +2013-09-08 Jan Hubicka + + * ipa-prop.c (try_make_edge_direct_simple_call): Do not special case + speculative edges. + +2013-09-08 Jan Hubicka + + * ipa.c (walk_polymorphic_call_targets): Fix redirection before IPA + summary generation. + +2013-09-08 Jeff Law + + PR bootstrap/58340 + * tree-ssa-threadedge.c (thread_across_edge): Fix initialization + of 'found'. + +2013-09-08 Andi Kleen + + * tree-inline.c (estimate_num_insns): Limit asm cost to 1000. + +2013-09-08 Jan Hubicka + + * ipa.c (walk_polymorphic_call_targets): Fix inliner summary update. + +2013-09-08 Richard Sandiford + + * ira.c (update_equiv_regs): Only call set_paradoxical_subreg + for non-debug insns. + * lra.c (new_insn_reg): Take the containing insn as a parameter. + Only modify lra_reg_info[].biggest_mode if it's non-debug insn. + (collect_non_operand_hard_regs, add_regs_to_insn_regno_info): Update + accordingly. + +2013-09-08 Jan Hubicka + + * cgraphunit.c (walk_polymorphic_call_targets): Permit 0 possible + targets and devirtualize to BUILT_IN_UNREACHABLE. + * timevar.def (TV_IPA_UNREACHABLE): New timevar. + * ipa.c (walk_polymorphic_call_targets): New function. + (symtab_remove_unreachable_nodes): Use it; do not keep all virtual + functions; use the new timevar. + * ipa-devirt.c (maybe_record_node): Do not insert static nodes that + was removed from the program. + (record_binfo): If BINFO corresponds to an anonymous namespace, we may + not consider it in the walk when its vtable is dead. + (possible_polymorphic_call_targets_1): Pass anonymous flag to + record_binfo. + (devirt_variable_node_removal_hook): New function. + (possible_polymorphic_call_targets): Also register + devirt_variable_node_removal_hook. + (ipa_devirt): Do not do non-speculative devirtualization. + (gate_ipa_devirt): One execute if devirtualizing speculatively. + +2013-09-08 Jan Hubicka + + * cgraph.h (varpool_node_hook, varpool_node_hook_list, + varpool_add_node_removal_hook, varpool_add_variable_insertion_hook, + varpool_remove_variable_insertion_hook): Declare. + * varpool.c (varpool_node_hook_list): New structure. + (first_varpool_node_removal_hook, + first_varpool_variable_insertion_hook): New variables. + (varpool_add_node_removal_hook, varpool_remove_node_removal_hook, + varpool_call_node_removal_hooks, varpool_add_variable_insertion_hook, + varpool_remove_variable_insertion_hook, + varpool_call_variable_insertion_hooks): New functions. + (varpool_remove_node): Use it. + +2013-09-08 Paolo Carlini + + PR c++/54941 + * diagnostic.c (diagnostic_build_prefix): When s.file is + "" don't output line and column numbers. + +2013-09-06 Jan Hubicka + + * cgraphunit.c (expand_thunk): Get body before touching arguments. + * lto-streamer-out.c: Stream thunks, too. + * lto-streamer-in.c (input_function): Pop cfun here + (lto_read_body): Instead of here. + +2013-09-06 Caroline Tice + + * doc/install.texi: Add documentation for the --enable-vtable-verify + and the --disable-libvtv configure options. + +2013-09-06 Jeff Law + + * tree-ssa-dom.c (cprop_into_successor_phis): Also propagate + edge implied equivalences into successor phis. + +2013-09-06 Joern Rennecke + + * resource.c (mark_referenced_resources): Handle COND_EXEC. + +2013-09-06 Claudiu Zissulescu + + * resource.c (mark_target_live_regs): Compute resources taking + into account if a call is predicated or not. + +2013-09-06 Eric Botcazou + + * toplev.c (output_stack_usage): Be prepared for suffixes created by + the compiler in the function names. + +2013-09-06 Jan Hubicka + + PR middle-end/58094 + * ipa-inline.c (has_caller_p): New function. + (want_inline_function_to_all_callers_p): Use it. + (sum_callers, inline_to_all_callers): Break out from ... + (ipa_inline): ... here. + +2013-09-06 Jan Hubicka + + * config/i386/i386.c (ix86_hard_regno_mode_ok): AVX modes are valid + only when AVX is enabled. + +2013-09-06 James Greenhalgh + + * config/aarch64/aarch64.md + (*movtf_aarch64): Use neon_dm_2 as type where v8type + is fpsimd_2. + (load_pair): Likewise. + (store_pair): Likewise. + +2013-09-06 James Greenhalgh + + * config/arm/types.md (type): Add "mrs" type. + * config/aarch64/aarch64.md + (aarch64_load_tp_hard): Make type "mrs". + * config/arm/arm.md + (load_tp_hard): Make type "mrs". + * config/arm/cortex-a15.md: Update with new attributes. + * config/arm/cortex-a5.md: Update with new attributes. + * config/arm/cortex-a53.md: Update with new attributes. + * config/arm/cortex-a7.md: Update with new attributes. + * config/arm/cortex-a8.md: Update with new attributes. + * config/arm/cortex-a9.md: Update with new attributes. + * config/arm/cortex-m4.md: Update with new attributes. + * config/arm/cortex-r4.md: Update with new attributes. + * config/arm/fa526.md: Update with new attributes. + * config/arm/fa606te.md: Update with new attributes. + * config/arm/fa626te.md: Update with new attributes. + * config/arm/fa726te.md: Update with new attributes. + +2013-09-06 James Greenhalgh + + * config/aarch64/aarch64.md + (*movti_aarch64): Use "multiple" for type where v8type is "move2". + (*movtf_aarch64): Likewise. + * config/arm/arm.md + (thumb1_movdi_insn): Use "multiple" for type where more than one + instruction is used for a move. + (*arm32_movhf): Likewise. + (*thumb_movdf_insn): Likewise. + +2013-09-06 James Greenhalgh + + * config/arm/types.md (type): Rename fcpys to fmov. + * config/arm/vfp.md + (*arm_movsi_vfp): Rename type fcpys as fmov. + (*thumb2_movsi_vfp): Likewise + (*movhf_vfp_neon): Likewise + (*movhf_vfp): Likewise + (*movsf_vfp): Likewise + (*thumb2_movsf_vfp): Likewise + (*movsfcc_vfp): Likewise + (*thumb2_movsfcc_vfp): Likewise + * config/aarch64/aarch64-simd.md + (move_lo_quad_): Replace type mov_reg with fmovs. + * config/aarch64/aarch64.md + (*movsi_aarch64): Replace type mov_reg with fmovs. + (*movdi_aarch64): Likewise + (*movsf_aarch64): Likewise + (*movdf_aarch64): Likewise + * config/arm/arm.c + (cortexa7_older_only): Rename TYPE_FCPYS to TYPE_FMOV. + * config/arm/iwmmxt.md + (*iwmmxt_movsi_insn): Rename type fcpys as fmov. + * config/arm/arm1020e.md: Update with new attributes. + * config/arm/cortex-a15-neon.md: Update with new attributes. + * config/arm/cortex-a5.md: Update with new attributes. + * config/arm/cortex-a53.md: Update with new attributes. + * config/arm/cortex-a7.md: Update with new attributes. + * config/arm/cortex-a8-neon.md: Update with new attributes. + * config/arm/cortex-a9.md: Update with new attributes. + * config/arm/cortex-m4-fpu.md: Update with new attributes. + * config/arm/cortex-r4f.md: Update with new attributes. + * config/arm/marvell-pj4.md: Update with new attributes. + * config/arm/vfp11.md: Update with new attributes. + +2013-09-06 James Greenhalgh + + * config/aarch64/aarch64.md + (*madd): Fix type attribute. + (*maddsi_uxtw): Likewise. + (*msub): Likewise. + (*msubsi_uxtw): Likewise. + (maddsidi4): Likewise. + (msubsidi4): Likewise. + +2013-09-06 James Greenhalgh + + * config/arm/types.md: Split fdiv as fsqrt, fdiv. + * config/arm/arm.md (core_cycles): Remove fdiv. + * config/arm/vfp.md: + (*sqrtsf2_vfp): Update for attribute changes. + (*sqrtdf2_vfp): Likewise. + * config/aarch64/aarch64.md: + (sqrt2): Update for attribute changes. + * config/arm/arm1020e.md: Update with new attributes. + * config/arm/cortex-a15-neon.md: Update with new attributes. + * config/arm/cortex-a5.md: Update with new attributes. + * config/arm/cortex-a53.md: Update with new attributes. + * config/arm/cortex-a7.md: Update with new attributes. + * config/arm/cortex-a8-neon.md: Update with new attributes. + * config/arm/cortex-a9.md: Update with new attributes. + * config/arm/cortex-m4-fpu.md: Update with new attributes. + * config/arm/cortex-r4f.md: Update with new attributes. + * config/arm/marvell-pj4.md: Update with new attributes. + * config/arm/vfp11.md: Update with new attributes. + +2013-09-06 James Greenhalgh + + * config/arm/types.md + (type): Split f_cvt as f_cvt, f_cvtf2i, f_cvti2f. + * config/aarch64/aarch64.md + (l2): Update with + new attributes. + (fix_trunc2): Likewise. + (fixuns_trunc2): Likewise. + (float2): Likewise. + * config/arm/vfp.md + (*truncsisf2_vfp): Update with new attributes. + (*truncsidf2_vfp): Likewise. + (fixuns_truncsfsi2): Likewise. + (fixuns_truncdfsi2): Likewise. + (*floatsisf2_vfp): Likewise. + (*floatsidf2_vfp): Likewise. + (floatunssisf2): Likewise. + (floatunssidf2): Likewise. + (*combine_vcvt_f32_): Likewise. + (*combine_vcvt_f64_): Likewise. + * config/arm/arm1020e.md: Update with new attributes. + * config/arm/cortex-a15-neon.md: Update with new attributes. + * config/arm/cortex-a5.md: Update with new attributes. + * config/arm/cortex-a53.md: Update with new attributes. + * config/arm/cortex-a7.md: Update with new attributes. + * config/arm/cortex-a8-neon.md: Update with new attributes. + * config/arm/cortex-a9.md: Update with new attributes. + * config/arm/cortex-m4-fpu.md: Update with new attributes. + * config/arm/cortex-r4f.md: Update with new attributes. + * config/arm/marvell-pj4.md: Update with new attributes. + * config/arm/vfp11.md: Update with new attributes. + +2013-09-06 James Greenhalgh + + * config/aarch64/arm_neon.h + (vqtbl<1,2,3,4>_s8): Fix control vector parameter type. + (vqtbx<1,2,3,4>_s8): Likewise. + +2013-09-06 James Greenhalgh + + * config/arm/types.md: Add "no_insn", "multiple" and "untyped" + types. + * config/arm/arm-fixed.md: Add type attribute to all insn + patterns. + (add3): Add type attribute. + (add3): Likewise. + (usadd3): Likewise. + (ssadd3): Likewise. + (sub3): Likewise. + (sub3): Likewise. + (ussub3): Likewise. + (sssub3): Likewise. + (ssmulsa3): Likewise. + (usmulusa3): Likewise. + (arm_usatsihi): Likewise. + * config/arm/vfp.md + (*movdi_vfp): Add types for all instructions. + (*movdi_vfp_cortexa8): Likewise. + (*movhf_vfp_neon): Likewise. + (*movhf_vfp): Likewise. + (*movdf_vfp): Likewise. + (*thumb2_movdf_vfp): Likewise. + (*thumb2_movdfcc_vfp): Likewise. + * config/arm/arm.md: Add type attribute to all insn patterns. + (*thumb1_adddi3): Add type attribute. + (*arm_adddi3): Likewise. + (*adddi_sesidi_di): Likewise. + (*adddi_zesidi_di): Likewise. + (*thumb1_addsi3): Likewise. + (addsi3_compare0): Likewise. + (*addsi3_compare0_scratch): Likewise. + (*compare_negsi_si): Likewise. + (cmpsi2_addneg): Likewise. + (*addsi3_carryin_): Likewise. + (*addsi3_carryin_alt2_): Likewise. + (*addsi3_carryin_clobercc_): Likewise. + (*subsi3_carryin): Likewise. + (*subsi3_carryin_const): Likewise. + (*subsi3_carryin_compare): Likewise. + (*subsi3_carryin_compare_const): Likewise. + (*arm_subdi3): Likewise. + (*thumb_subdi3): Likewise. + (*subdi_di_zesidi): Likewise. + (*subdi_di_sesidi): Likewise. + (*subdi_zesidi_di): Likewise. + (*subdi_sesidi_di): Likewise. + (*subdi_zesidi_ze): Likewise. + (thumb1_subsi3_insn): Likewise. + (*arm_subsi3_insn): Likewise. + (*anddi3_insn): Likewise. + (*anddi_zesidi_di): Likewise. + (*anddi_sesdi_di): Likewise. + (*ne_zeroextracts): Likewise. + (*ne_zeroextracts): Likewise. + (*ite_ne_zeroextr): Likewise. + (*ite_ne_zeroextr): Likewise. + (*anddi_notdi_di): Likewise. + (*anddi_notzesidi): Likewise. + (*anddi_notsesidi): Likewise. + (andsi_notsi_si): Likewise. + (thumb1_bicsi3): Likewise. + (*iordi3_insn): Likewise. + (*iordi_zesidi_di): Likewise. + (*iordi_sesidi_di): Likewise. + (*thumb1_iorsi3_insn): Likewise. + (*xordi3_insn): Likewise. + (*xordi_zesidi_di): Likewise. + (*xordi_sesidi_di): Likewise. + (*arm_xorsi3): Likewise. + (*andsi_iorsi3_no): Likewise. + (*smax_0): Likewise. + (*smax_m1): Likewise. + (*arm_smax_insn): Likewise. + (*smin_0): Likewise. + (*arm_smin_insn): Likewise. + (*arm_umaxsi3): Likewise. + (*arm_uminsi3): Likewise. + (*minmax_arithsi): Likewise. + (*minmax_arithsi_): Likewise. + (*satsi_): Likewise. + (arm_ashldi3_1bit): Likewise. + (arm_ashrdi3_1bit): Likewise. + (arm_lshrdi3_1bit): Likewise. + (*arm_negdi2): Likewise. + (*thumb1_negdi2): Likewise. + (*arm_negsi2): Likewise. + (*thumb1_negsi2): Likewise. + (*negdi_extendsid): Likewise. + (*negdi_zero_extend): Likewise. + (*arm_abssi2): Likewise. + (*thumb1_abssi2): Likewise. + (*arm_neg_abssi2): Likewise. + (*thumb1_neg_abss): Likewise. + (one_cmpldi2): Likewise. + (extenddi2): Likewise. + (*compareqi_eq0): Likewise. + (*arm_extendhisi2addsi): Likewise. + (*arm_movdi): Likewise. + (*thumb1_movdi_insn): Likewise. + (*arm_movt): Likewise. + (*thumb1_movsi_insn): Likewise. + (pic_add_dot_plus_four): Likewise. + (pic_add_dot_plus_eight): Likewise. + (tls_load_dot_plus_eight): Likewise. + (*thumb1_movhi_insn): Likewise. + (*thumb1_movsf_insn): Likewise. + (*movdf_soft_insn): Likewise. + (*thumb_movdf_insn): Likewise. + (cbranchsi4_insn): Likewise. + (cbranchsi4_scratch): Likewise. + (*negated_cbranchsi4): Likewise. + (*tbit_cbranch): Likewise. + (*tlobits_cbranch): Likewise. + (*tstsi3_cbranch): Likewise. + (*cbranchne_decr1): Likewise. + (*addsi3_cbranch): Likewise. + (*addsi3_cbranch_scratch): Likewise. + (*arm_cmpdi_insn): Likewise. + (*arm_cmpdi_unsig): Likewise. + (*arm_cmpdi_zero): Likewise. + (*thumb_cmpdi_zero): Likewise. + (*deleted_compare): Likewise. + (*mov_scc): Likewise. + (*mov_negscc): Likewise. + (*mov_notscc): Likewise. + (*cstoresi_eq0_thumb1_insn): Likewise. + (cstoresi_nltu_thumb1): Likewise. + (cstoresi_ltu_thu): Likewise. + (thumb1_addsi3_addgeu): Likewise. + (*arm_jump): Likewise. + (*thumb_jump): Likewise. + (*check_arch2): Likewise. + (arm_casesi_internal): Likewise. + (thumb1_casesi_dispatch): Likewise. + (*arm_indirect_jump): Likewise. + (*thumb1_indirect_jump): Likewise. + (nop): Likewise. + (*and_scc): Likewise. + (*ior_scc): Likewise. + (*compare_scc): Likewise. + (*cond_move): Likewise. + (*cond_arith): Likewise. + (*cond_sub): Likewise. + (*cmp_ite0): Likewise. + (*cmp_ite1): Likewise. + (*cmp_and): Likewise. + (*cmp_ior): Likewise. + (*ior_scc_scc): Likewise. + (*ior_scc_scc_cmp): Likewise. + (*and_scc_scc): Likewise. + (*and_scc_scc_cmp): Likewise. + (*and_scc_scc_nod): Likewise. + (*negscc): Likewise. + (movcond_addsi): Likewise. + (movcond): Likewise. + (*ifcompare_plus_move): Likewise. + (*if_plus_move): Likewise. + (*ifcompare_move_plus): Likewise. + (*if_move_plus): Likewise. + (*ifcompare_arith_arith): Likewise. + (*if_arith_arith): Likewise. + (*ifcompare_arith_move): Likewise. + (*if_arith_move): Likewise. + (*ifcompare_move_arith): Likewise. + (*if_move_arith): Likewise. + (*ifcompare_move_not): Likewise. + (*if_move_not): Likewise. + (*ifcompare_not_move): Likewise. + (*if_not_move): Likewise. + (*ifcompare_shift_move): Likewise. + (*if_shift_move): Likewise. + (*ifcompare_move_shift): Likewise. + (*if_move_shift): Likewise. + (*ifcompare_shift_shift): Likewise. + (*ifcompare_not_arith): Likewise. + (*ifcompare_arith_not): Likewise. + (*if_arith_not): Likewise. + (*ifcompare_neg_move): Likewise. + (*if_neg_move): Likewise. + (*ifcompare_move_neg): Likewise. + (*if_move_neg): Likewise. + (prologue_thumb1_interwork): Likewise. + (*cond_move_not): Likewise. + (*sign_extract_onebit): Likewise. + (*not_signextract_onebit): Likewise. + (stack_tie): Likewise. + (align_4): Likewise. + (align_8): Likewise. + (consttable_end): Likewise. + (consttable_1): Likewise. + (consttable_2): Likewise. + (consttable_4): Likewise. + (consttable_8): Likewise. + (consttable_16): Likewise. + (*thumb1_tablejump): Likewise. + (prefetch): Likewise. + (force_register_use): Likewise. + (thumb_eh_return): Likewise. + (load_tp_hard): Likewise. + (load_tp_soft): Likewise. + (tlscall): Likewise. + (*arm_movtas_ze): Likewise. + (*arm_rev): Likewise. + (*arm_revsh): Likewise. + (*arm_rev16): Likewise. + * config/arm/thumb2.md + (*thumb2_smaxsi3): Likewise. + (*thumb2_sminsi3): Likewise. + (*thumb32_umaxsi3): Likewise. + (*thumb2_uminsi3): Likewise. + (*thumb2_negdi2): Likewise. + (*thumb2_abssi2): Likewise. + (*thumb2_neg_abss): Likewise. + (*thumb2_movsi_insn): Likewise. + (tls_load_dot_plus_four): Likewise. + (*thumb2_movhi_insn): Likewise. + (*thumb2_mov_scc): Likewise. + (*thumb2_mov_negs): Likewise. + (*thumb2_mov_negs): Likewise. + (*thumb2_mov_nots): Likewise. + (*thumb2_mov_nots): Likewise. + (*thumb2_movsicc_): Likewise. + (*thumb2_movsfcc_soft_insn): Likewise. + (*thumb2_indirect_jump): Likewise. + (*thumb2_and_scc): Likewise. + (*thumb2_ior_scc): Likewise. + (*thumb2_ior_scc_strict_it): Likewise. + (*thumb2_cond_move): Likewise. + (*thumb2_cond_arith): Likewise. + (*thumb2_cond_ari): Likewise. + (*thumb2_cond_sub): Likewise. + (*thumb2_negscc): Likewise. + (*thumb2_movcond): Likewise. + (thumb2_casesi_internal): Likewise. + (thumb2_casesi_internal_pic): Likewise. + (*thumb2_alusi3_short): Likewise. + (*thumb2_mov_shortim): Likewise. + (*thumb2_addsi_short): Likewise. + (*thumb2_subsi_short): Likewise. + (thumb2_addsi3_compare0): Likewise. + (*thumb2_cbz): Likewise. + (*thumb2_cbnz): Likewise. + (*thumb2_one_cmplsi2_short): Likewise. + (*thumb2_negsi2_short): Likewise. + (*orsi_notsi_si): Likewise. + * config/arm/arm1020e.md: Update with new attributes. + * config/arm/arm1026ejs.md: Update with new attributes. + * config/arm/arm1136jfs.md: Update with new attributes. + * config/arm/arm926ejs.md: Update with new attributes. + * config/arm/cortex-a15.md: Update with new attributes. + * config/arm/cortex-a5.md: Update with new attributes. + * config/arm/cortex-a53.md: Update with new attributes. + * config/arm/cortex-a7.md: Update with new attributes. + * config/arm/cortex-a8.md: Update with new attributes. + * config/arm/cortex-a9.md: Update with new attributes. + * config/arm/cortex-m4.md: Update with new attributes. + * config/arm/cortex-r4.md: Update with new attributes. + * config/arm/fa526.md: Update with new attributes. + * config/arm/fa606te.md: Update with new attributes. + * config/arm/fa626te.md: Update with new attributes. + * config/arm/fa726te.md: Update with new attributes. + +2013-09-06 James Greenhalgh + + * config/aarch64/aarch64-simd.md + (aarch64_sqdmll_n_internal): Use + iterator to ensure correct register choice. + (aarch64_sqdmll2_n_internal): Likewise. + (aarch64_sqdmull_n): Likewise. + (aarch64_sqdmull2_n_internal): Likewise. + * config/aarch64/arm_neon.h + (vml_lane_16): Use 'x' constraint for element vector. + (vml_n_16): Likewise. + (vmll_high_lane_16): Likewise. + (vmll_high_n_16): Likewise. + (vmll_lane_16): Likewise. + (vmll_n_16): Likewise. + (vmul_lane_16): Likewise. + (vmul_n_16): Likewise. + (vmull_lane_16): Likewise. + (vmull_n_16): Likewise. + (vmull_high_lane_16): Likewise. + (vmull_high_n_16): Likewise. + (vqrdmulh_n_s16): Likewise. + +2013-09-06 Tejas Belagod + + * config/aarch64/arm_neon.h: Fix all vdup intrinsics to + have the correct lane parameter. + +2013-09-06 Richard Biener + + * cfganal.c (control_dependences::~control_dependences): + Properly free all of the vector. + +2013-09-06 Kirill Yukhin + + PR target/58269 + * config/i386/i386.c (ix86_conditional_register_usage): + Proper initialize extended SSE registers. + +2013-09-06 Jan Hubicka + + PR tree-optimization/58311 + * ipa-devirt.c (gate_ipa_devirt): Only execute when optimizing. + +2013-09-06 Jan Hubicka + + * Makefile.in (tree-sra.o): Update dependencies. + * tree-sra.c: Include ipa-utils.h + (scan_function): Use recursive_call_p. + (has_caller_p): New function. + (cgraph_for_node_and_aliases): Count also callers of aliases. + +2013-09-06 Jan Hubicka + + PR middle-end/58094 + * cgraph.h (symtab_semantically_equivalent_p): Declare. + * tree-tailcall.c: Include ipa-utils.h. + (find_tail_calls): Use it. + * ipa-pure-const.c (check_call): Likewise. + * ipa-utils.c (recursive_call_p): New function. + * ipa-utils.h (recursive_call_p): Dclare. + * symtab.c (symtab_nonoverwritable_alias): Fix formatting. + (symtab_semantically_equivalent_p): New function. + * Makefile.in (tree-tailcall.o): Update dependencies. + +2013-09-06 Eric Botcazou + + * ipa-split.c (split_function): Set DECL_NO_INLINE_WARNING_P on the + non-inlinable part. + +2013-09-06 Richard Biener + + * lto-streamer.h (lto_global_var_decls): Remove. + * Makefile.in (OBJS): Remove lto-symtab.o. + (lto-symtab.o): Remove. + (GTFILES): Remove lto-symtab.c + * lto-symtab.c: Move to lto/ + +2013-09-06 Andreas Krebbel + + * config/s390/s390.md (UNSPEC_FPINT_FLOOR, UNSPEC_FPINT_BTRUNC) + (UNSPEC_FPINT_ROUND, UNSPEC_FPINT_CEIL, UNSPEC_FPINT_NEARBYINT) + (UNSPEC_FPINT_RINT): New constant definitions. + (FPINT, fpint_name, fpint_roundingmode): New integer iterator + definition with 2 attributes. + ("2", "rint2") + ("2", "rint2"): New pattern + definitions. + +2013-09-06 Andreas Krebbel + + * config/s390/s390.md: Add "bcr_flush" value to mnemonic + attribute. + ("mem_thread_fence_1"): Use bcr 14,0 for z196 and later. + Set the mnemonic attribute to "bcr_flush". Set the "z196prop" + attribute to "z196_alone". + * config/s390/2827.md: Add "bcr_flush" to "ooo_groupalone" and + "zEC12_simple". + +2013-09-06 Richard Biener + + * basic-block.h (class control_dependences): New. + * tree-ssa-dce.c (control_dependence_map): Remove. + (cd): New global. + (EXECUTE_IF_CONTROL_DEPENDENT): Remove. + (set_control_dependence_map_bit, clear_control_dependence_bitmap, + find_pdom, find_control_dependence, find_all_control_dependences): + Move to cfganal.c. + (mark_control_dependent_edges_necessary, find_obviously_necessary_stmts, + propagate_necessity, tree_dce_init, tree_dce_done, + perform_tree_ssa_dce): Adjust. + * cfganal.c (set_control_dependence_map_bit, + clear_control_dependence_bitmap, find_pdom, find_control_dependence, + find_all_control_dependences): Move from tree-ssa-dce.c and + implement as methods of control_dependences class. + (control_dependences::control_dependences): New. + (control_dependences::~control_dependences): Likewise. + (control_dependences::get_edges_dependent_on): Likewise. + (control_dependences::get_edge): Likewise. + +2013-09-04 Jan Hubicka + + * tree.c (types_same_for_odr): Drop overactive check. + * ipa-devirt.c (hash_type_name): Likewise. + +2013-09-04 Jan Hubicka + + * cgraphunit.c (walk_polymorphic_call_targets): Break out from ... + (analyze_functions): ... here. + +2013-09-04 Jan Hubicka + + PR middle-end/58201 + * cgraphunit.c (analyze_functions): Clear AUX fields + after processing; initialize assembler name has. + +2013-09-05 Jeff Law + + * tree-ssa-threadedge.c (thread_around_empty_blocks): Renamed + from thread_around_empty_block. Record threading path into PATH. + Recurse if threading through the initial block is successful. + (thread_across_edge): Corresponding changes to slightly simplify. + +2013-09-05 James Greenhalgh + + * config/aarch64/aarch64.md + (type): Remove frecpe, frecps, frecpx. + (aarch64_frecp): Move to aarch64-simd.md, + fix to be a TARGET_SIMD instruction. + (aarch64_frecps): Remove. + * config/aarch64/aarch64-simd.md + (aarch64_frecp): New, moved from aarch64.md + (aarch64_frecps): Handle all float/vector of float modes. + +2013-09-05 James Greenhalgh + Sofiane Naci + + * config/arm/types.md (define_attr "type"): + Expand "arlo_imm" + into "adr", "alu_imm", "alus_imm", "logic_imm", "logics_imm". + Expand "arlo_reg" + into "adc_reg", "adc_imm", "adcs_reg", "adcs_imm", "alu_ext", + "alu_reg", "alus_ext", "alus_reg", "bfm", "csel", "logic_reg", + "logics_reg", "rev". + Expand "arlo_shift" + into "alu_shift_imm", "alus_shift_imm", "logic_shift_imm", + "logics_shift_imm". + Expand "arlo_shift_reg" + into "alu_shift_reg", "alus_shift_reg", "logic_shift_reg", + "logics_shift_reg". + Expand "clz" into "clz, "rbit". + Rename "shift" to "shift_imm". + * config/arm/arm.md (define_attr "core_cycles"): Update for attribute + changes. + Update for attribute changes all occurrences of arlo_* and + shift* types. + * config/arm/arm-fixed.md: Update for attribute changes + all occurrences of arlo_* types. + * config/arm/thumb2.md: Update for attribute changes all occurrences + of arlo_* types. + * config/arm/arm.c (xscale_sched_adjust_cost): (rtx insn, rtx + (cortexa7_older_only): Likewise. + (cortexa7_younger): Likewise. + * config/arm/arm1020e.md (1020alu_op): Update for attribute changes. + (1020alu_shift_op): Likewise. + (1020alu_shift_reg_op): Likewise. + * config/arm/arm1026ejs.md (alu_op): Update for attribute changes. + (alu_shift_op): Likewise. + (alu_shift_reg_op): Likewise. + * config/arm/arm1136jfs.md (11_alu_op): Update for + attribute changes. + (11_alu_shift_op): Likewise. + (11_alu_shift_reg_op): Likewise. + * config/arm/arm926ejs.md (9_alu_op): Update for attribute changes. + (9_alu_shift_reg_op): Likewise. + * config/arm/cortex-a15.md (cortex_a15_alu): Update for + attribute changes. + (cortex_a15_alu_shift): Likewise. + (cortex_a15_alu_shift_reg): Likewise. + * config/arm/cortex-a5.md (cortex_a5_alu): Update for + attribute changes. + (cortex_a5_alu_shift): Likewise. + * config/arm/cortex-a53.md + (cortex_a53_alu): Update for attribute changes. + (cortex_a53_alu_shift): Likewise. + * config/arm/cortex-a7.md + (cortex_a7_alu_imm): Update for attribute changes. + (cortex_a7_alu_reg): Likewise. + (cortex_a7_alu_shift): Likewise. + * config/arm/cortex-a8.md + (cortex_a8_alu): Update for attribute changes. + (cortex_a8_alu_shift): Likewise. + (cortex_a8_alu_shift_reg): Likewise. + * config/arm/cortex-a9.md + (cortex_a9_dp): Update for attribute changes. + (cortex_a9_dp_shift): Likewise. + * config/arm/cortex-m4.md + (cortex_m4_alu): Update for attribute changes. + * config/arm/cortex-r4.md + (cortex_r4_alu): Update for attribute changes. + (cortex_r4_mov): Likewise. + (cortex_r4_alu_shift_reg): Likewise. + * config/arm/fa526.md + (526_alu_op): Update for attribute changes. + (526_alu_shift_op): Likewise. + * config/arm/fa606te.md + (606te_alu_op): Update for attribute changes. + * config/arm/fa626te.md + (626te_alu_op): Update for attribute changes. + (626te_alu_shift_op): Likewise. + * config/arm/fa726te.md + (726te_alu_op): Update for attribute changes. + (726te_alu_shift_op): Likewise. + (726te_alu_shift_reg_op): Likewise. + * config/arm/fmp626.md (mp626_alu_op): Update for attribute changes. + (mp626_alu_shift_op): Likewise. + * config/arm/marvell-pj4.md (pj4_alu): Update for attribute changes. + (pj4_alu_conds): Likewise. + (pj4_shift): Likewise. + (pj4_shift_conds): Likewise. + (pj4_alu_shift): Likewise. + (pj4_alu_shift_conds): Likewise. + * config/aarch64/aarch64.md: Update for attribute change + all occurrences of arlo_* and shift* types. + +2013-09-05 Mike Stump + + * tree.h: Move documentation for tree_function_decl to tree-core.h + with the declaration. + +2013-09-05 Peter Bergner + + PR target/58139 + * reginfo.c (choose_hard_reg_mode): Scan through all mode classes + looking for widest mode. + +2013-09-05 Eric Botcazou + + * config.gcc (*-*-vxworks*): Do not override an existing extra_objs. + +2013-09-05 Richard Biener + + PR tree-optimization/58137 + * tree-vect-stmts.c (get_vectype_for_scalar_type_and_size): + Do not create vectors of pointers. + * tree-vect-loop.c (get_initial_def_for_induction): Use proper + types for the components of the vector initializer. + * tree-cfg.c (verify_gimple_assign_binary): Remove special-casing + allowing pointer vectors with PLUS_EXPR/MINUS_EXPR. + +2013-09-05 Martin Jambor + + * ipa-prop.c (remove_described_reference): Accept missing references, + return false if that hppens, otherwise return true. + (cgraph_node_for_jfunc): New function. + (try_decrement_rdesc_refcount): Likewise. + (try_make_edge_direct_simple_call): Use them. + (ipa_edge_removal_hook): Remove references from rdescs. + (ipa_edge_duplication_hook): Clone rdescs and their references + when the new edge has the same caller as the old one. + * cgraph.c (cgraph_resolve_speculation): Remove speculative + reference before removing any edges. + +2013-09-05 Richard Earnshaw + + * arm.c (thumb2_emit_strd_push): Rewrite to use pre-decrement on + initial store. + * thumb2.md (thumb2_storewb_parisi): New pattern. + +2013-09-05 Yufeng Zhang + + * config/aarch64/aarch64-option-extensions.def: Add + AARCH64_OPT_EXTENSION of 'crc'. + * config/aarch64/aarch64.h (AARCH64_FL_CRC): New define. + (AARCH64_ISA_CRC): Ditto. + * doc/invoke.texi (-march and -mcpu feature modifiers): Add + description of the CRC extension. + +2013-09-05 Alexander Ivchenko + + * config/rs6000/linux64.h: Define OPTION_BIONIC and OPTION_UCLIBC. + * config/rs6000/linux.h: Ditto. + * alpha/linux.h: Ditto. + * config/bfin/uclinux.h: Define TARGET_LIBC_HAS_FUNCTION as + no_c99_libc_has_function. + * config/c6x/uclinux-elf.h: Ditto. + * config/lm32/uclinux-elf.h: Ditto. + * config/m68k/uclinux.h: Ditto. + * config/moxie/uclinux.h: Ditto. + * config.gcc (bfin*-linux-uclibc*): Add t-linux-android to tmake_file. + (crisv32-*-linux*, cris-*-linux*): Ditto. + * config/bfin/bfin.c: Include "tm_p.h". + +2013-09-05 Richard Biener + + * tree-vect-loop.c (vect_analyze_loop_operations): Properly + check for a definition without a basic-block. + +2013-09-05 James Greenhalgh + Sofiane Naci + + * config/aarch64/aarch64.md + (*movti_aarch64): Rename r_2_f and f_2_r. + (*movsf_aarch64): Likewise. + (*movdf_aarch64): Likewise. + (*movtf_aarch64): Likewise. + (aarch64_movdi_low): Likewise. + (aarch64_movdi_high): Likewise. + (aarch64_movhigh_di): Likewise. + (aarch64_movlow_di): Likewise. + (aarch64_movtilow_tilow): Likewise. + * config/arm/arm.md (attribute "neon_type"): Delete. Move attribute + values to config/arm/types.md + (attribute "conds"): Update for attribute change. + (anddi3_insn): Likewise. + (iordi3_insn): Likewise. + (xordi3_insn): Likewise. + (one_cmpldi2): Likewise. + * config/arm/types.md (type): Add Neon types. + * config/arm/neon.md (neon_mov): Remove "neon_type" attribute, + use "type" attribute. + (movmisalign_neon_store): Likewise. + (movmisalign_neon_load): Likewise. + (vec_set_internal): Likewise. + (vec_setv2di_internal): Likewise. + (vec_extract): Likewise. + (vec_extractv2di): Likewise. + (add3_neon): Likewise. + (adddi3_neon): Likewise. + (sub3_neon): Likewise. + (subdi3_neon): Likewise. + (mul3_neon): Likewise. + (mul3add_neon): Likewise. + (mul3negadd_neon): Likewise. + (fma4)): Likewise. + (fma4_intrinsic): Likewise. + (fmsub4)): Likewise. + (fmsub4_intrinsic): Likewise. + (neon_vrint): Likewise. + (ior3): Likewise. + (and3): Likewise. + (anddi3_neon): Likewise. + (orn3_neon): Likewise. + (orndi3_neon): Likewise. + (bic3_neon): Likewise. + (bicdi3_neon): Likewise. + (xor3): Likewise. + (one_cmpl2): Likewise. + (abs2): Likewise. + (neg2): Likewise. + (umin3_neon): Likewise. + (umax3_neon): Likewise. + (smin3_neon): Likewise. + (smax3_neon): Likewise. + (vashl3): Likewise. + (vashr3_imm): Likewise. + (vlshr3_imm): Likewise. + (ashl3_signed): Likewise. + (ashl3_unsigned): Likewise. + (neon_load_count): Likewise. + (ashldi3_neon_noclobber): Likewise. + (signed_shift_di3_neon): Likewise. + (unsigned_shift_di3_neon): Likewise. + (ashrdi3_neon_imm_noclobber): Likewise. + (lshrdi3_neon_imm_noclobber): Likewise. + (widen_ssum3): Likewise. + (widen_usum3): Likewise. + (quad_halves_v4si): Likewise. + (quad_halves_v4sf): Likewise. + (quad_halves_v8hi): Likewise. + (quad_halves_v16qi): Likewise. + (reduc_splus_v2di): Likewise. + (neon_vpadd_internal): Likewise. + (neon_vpsmin): Likewise. + (neon_vpsmax): Likewise. + (neon_vpumin): Likewise. + (neon_vpumax): Likewise. + (ss_add_neon): Likewise. + (us_add_neon): Likewise. + (ss_sub_neon): Likewise. + (us_sub_neon): Likewise. + (neon_vadd_unspec): Likewise. + (neon_vaddl): Likewise. + (neon_vaddw): Likewise. + (neon_vhadd): Likewise. + (neon_vqadd): Likewise. + (neon_vaddhn): Likewise. + (neon_vmul): Likewise. + (neon_vmla): Likewise. + (neon_vmlal): Likewise. + (neon_vmls): Likewise. + (neon_vmlsl): Likewise. + (neon_vqdmulh): Likewise. + (neon_vqdmlal): Likewise. + (neon_vqdmlsl): Likewise. + (neon_vmull): Likewise. + (neon_vqdmull): Likewise. + (neon_vsub_unspec): Likewise. + (neon_vsubl): Likewise. + (neon_vsubw): Likewise. + (neon_vqsub): Likewise. + (neon_vhsub): Likewise. + (neon_vsubhn): Likewise. + (neon_vceq): Likewise. + (neon_vcge): Likewise. + (neon_vcgeu): Likewise. + (neon_vcgt): Likewise. + (neon_vcgtu): Likewise. + (neon_vcle): Likewise. + (neon_vclt): Likewise. + (neon_vcage): Likewise. + (neon_vcagt): Likewise. + (neon_vtst): Likewise. + (neon_vabd): Likewise. + (neon_vabdl): Likewise. + (neon_vaba): Likewise. + (neon_vabal): Likewise. + (neon_vmax): Likewise. + (neon_vmin): Likewise. + (neon_vpaddl): Likewise. + (neon_vpadal): Likewise. + (neon_vpmax): Likewise. + (neon_vpmin): Likewise. + (neon_vrecps): Likewise. + (neon_vrsqrts): Likewise. + (neon_vqabs): Likewise. + (neon_vqneg): Likewise. + (neon_vcls): Likewise. + (clz2): Likewise. + (popcount2): Likewise. + (neon_vrecpe): Likewise. + (neon_vrsqrte): Likewise. + (neon_vget_lane_sext_internal): Likewise. + (neon_vget_lane_zext_internal): Likewise. + (neon_vdup_n): Likewise. + (neon_vdup_nv2di): Likewise. + (neon_vdpu_lane_internal): Likewise. + (neon_vswp): Likewise. + (float2): Likewise. + (floatuns2): Likewise. + (fix_trunc)2): Likewise + (fixuns_trunc): Likewise. + (neon_vcvtv4sfv4hf): Likewise. + (neon_vcvtv4hfv4sf): Likewise. + (neon_vcvt_n): Likewise. + (neon_vmovn): Likewise. + (neon_vqmovn): Likewise. + (neon_vqmovun): Likewise. + (neon_vmovl): Likewise. + (neon_vmul_lane): Likewise. + (neon_vmull_lane): Likewise. + (neon_vqdmull_lane): Likewise. + (neon_vqdmulh_lane): Likewise. + (neon_vmla_lane): Likewise. + (neon_vmlal_lane): Likewise. + (neon_vqdmlal_lane): Likewise. + (neon_vmls_lane): Likewise. + (neon_vmlsl_lane): Likewise. + (neon_vqdmlsl_lane): Likewise. + (neon_vext): Likewise. + (neon_vrev64): Likewise. + (neon_vrev32): Likewise. + (neon_vrev16): Likewise. + (neon_vbsl_internal): Likewise. + (neon_vshl): Likewise. + (neon_vqshl): Likewise. + (neon_vshr_n): Likewise. + (neon_vshrn_n): Likewise. + (neon_vqshrn_n): Likewise. + (neon_vqshrun_n): Likewise. + (neon_vshl_n): Likewise. + (neon_vqshl_n): Likewise. + (neon_vqshlu_n): Likewise. + (neon_vshll_n): Likewise. + (neon_vsra_n): Likewise. + (neon_vsri_n): Likewise. + (neon_vsli_n): Likewise. + (neon_vtbl1v8qi): Likewise. + (neon_vtbl2v8qi): Likewise. + (neon_vtbl3v8qi): Likewise. + (neon_vtbl4v8qi): Likewise. + (neon_vtbx1v8qi): Likewise. + (neon_vtbx2v8qi): Likewise. + (neon_vtbx3v8qi): Likewise. + (neon_vtbx4v8qi): Likewise. + (neon_vtrn_internal): Likewise. + (neon_vzip_internal): Likewise. + (neon_vuzp_internal): Likewise. + (neon_vld1): Likewise. + (neon_vld1_lane): Likewise. + (neon_vld1_dup): Likewise. + (neon_vld1_dupv2di): Likewise. + (neon_vst1): Likewise. + (neon_vst1_lane): Likewise. + (neon_vld2): Likewise. + (neon_vld2_lane): Likewise. + (neon_vld2_dup): Likewise. + (neon_vst2): Likewise. + (neon_vst2_lane): Likewise. + (neon_vld3): Likewise. + (neon_vld3qa): Likewise. + (neon_vld3qb): Likewise. + (neon_vld3_lane): Likewise. + (neon_vld3_dup): Likewise. + (neon_vst3): Likewise. + (neon_vst3qa): Likewise. + (neon_vst3qb): Likewise. + (neon_vst3_lane): Likewise. + (neon_vld4): Likewise. + (neon_vld4qa): Likewise. + (neon_vld4qb): Likewise. + (neon_vld4_lane): Likewise. + (neon_vld4_dup): Likewise. + (neon_vst4): Likewise. + (neon_vst4qa): Likewise. + (neon_vst4qb): Likewise. + (neon_vst4_lane): Likewise. + (neon_vec_unpack_lo_): Likewise. + (neon_vec_unpack_hi_): Likewise. + (neon_vec_mult_lo_): Likewise. + (neon_vec_mult_hi_): Likewise. + (neon_vec_shiftl_): Likewise. + (neon_unpack_): Likewise. + (neon_vec_mult_): Likewise. + (vec_pack_trunc_): Likewise. + (neon_vec_pack_trunk_): Likewise. + (neon_vabd_2): Likewise. + (neon_vabd_3): Likewise. + * config/arm/vfp.md (arm_movsi_vfp): Update for attribute changes. + (thumb2_movsi_vfp): Likewise. + (movdi_vfp): Likewise. + (movdi_vfp_cortexa8): Likewise. + (movhf_vfp_neon): Likewise. + (movhf_vfp): Likewiwse. + (movsf_vfp): Likewiwse. + (thumb2_movsf_vfp): Likewiwse. + (movdf_vfp): Likewise. + (thumb2_movdf_vfp): Likewise. + (movsfcc_vfp): Likewise. + (thumb2_movsfcc_vfp): Likewise. + (movdfcc_vfp): Likewise. + (thumb2_movdfcc_vfp): Likewise. + * config/arm/arm.c (cortexa7_older_only): Update for attribute change. + * config/arm/arm1020e.md (v10_c2v): Update for attribute change. + (v10_v2c): Likewise. + * config/arm/cortex-a15-neon.md (cortex_a15_neon_int_1): Update for + attribute change. + (cortex_a15_neon_int_2): Likewise. + (cortex_a15_neon_int_3): Likewise. + (cortex_a15_neon_int_4): Likewise. + (cortex_a15_neon_int_5): Likewise. + (cortex_a15_neon_vqneg_vqabs): Likewise. + (cortex_a15_neon_vmov): Likewise. + (cortex_a15_neon_vaba): Likewise. + (cortex_a15_neon_vaba_qqq): Likewise. + (cortex_a15_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long): Likewise. + (cortex_a15_neon_mul_qqq_8_16_32_ddd_32): Likewise. + (cortex_a15_neon_mul_qdd_64_32_long_qqd_16_ddd_32_\ + scalar_64_32_long_scalar): Likewise. + (cortex_a15_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long): Likewise. + (cortex_a15_neon_mla_qqq_8_16): Likewise. + (cortex_a15_neon_mla_ddd_32_qqd_16_ddd_32_scalar_qdd_64_32_\ + lotype_qdd_64_32_long): Likewise. + (cortex_a15_neon_mla_qqq_32_qqd_32_scalar): Likewise. + (cortex_a15_neon_mul_ddd_16_scalar_32_16_long_scalar): Likewise. + (cortex_a15_neon_mul_qqd_32_scalar): Likewise. + (cortex_a15_neon_mla_ddd_16_scalar_qdd_32_16_long_scalar): Likewise. + (cortex_a15_neon_shift_1): Likewise. + (cortex_a15_neon_shift_2): Likewise. + (cortex_a15_neon_shift_3): Likewise. + (cortex_a15_neon_vshl_ddd): Likewise. + (cortex_a15_neon_vqshl_vrshl_vqrshl_qqq): Likewise. + (cortex_a15_neon_vsra_vrsra): Likewise. + (cortex_a15_neon_fp_vadd_ddd_vabs_dd): Likewise. + (cortex_a15_neon_fp_vadd_qqq_vabs_qq): Likewise. + (cortex_a15_neon_fp_vmul_ddd): Likewise. + (cortex_a15_neon_fp_vmul_qqd): Likewise. + (cortex_a15_neon_fp_vmla_ddd): Likewise. + (cortex_a15_neon_fp_vmla_qqq): Likewise. + (cortex_a15_neon_fp_vmla_ddd_scalar): Likewise. + (cortex_a15_neon_fp_vmla_qqq_scalar): Likewise. + (cortex_a15_neon_fp_vrecps_vrsqrts_ddd): Likewise. + (cortex_a15_neon_fp_vrecps_vrsqrts_qqq): Likewise. + (cortex_a15_neon_bp_simple): Likewise. + (cortex_a15_neon_bp_2cycle): Likewise. + (cortex_a15_neon_bp_3cycle): Likewise. + (cortex_a15_neon_vld1_1_2_regs): Likewise. + (cortex_a15_neon_vld1_3_4_regs): Likewise. + (cortex_a15_neon_vld2_2_regs_vld1_vld2_all_lanes): Likewise. + (cortex_a15_neon_vld2_4_regs): Likewise. + (cortex_a15_neon_vld3_vld4): Likewise. + (cortex_a15_neon_vst1_1_2_regs_vst2_2_regs): Likewise. + (cortex_a15_neon_vst1_3_4_regs): Likewise. + (cortex_a15_neon_vst2_4_regs_vst3_vst4): Likewise. + (cortex_a15_neon_vst3_vst4): Likewise. + (cortex_a15_neon_vld1_vld2_lane): Likewise. + (cortex_a15_neon_vld3_vld4_lane" 10 + (cortex_a15_neon_vst1_vst2_lane): Likewise. + (cortex_a15_neon_vst3_vst4_lane): Likewise. + (cortex_a15_neon_vld3_vld4_all_lanes): Likewise. + (cortex_a15_neon_ldm_2): Likewise.0 + (cortex_a15_neon_stm_2): Likewise. + (cortex_a15_neon_mcr): Likewise. + (cortex_a15_neon_mcr_2_mcrr): Likewise. + (cortex_a15_neon_mrc): Likewise. + (cortex_a15_neon_mrrc): Likewise. + * config/arm/cortex-a15.md (cortex_a15_alu): Update for attribute + change. + (cortex_a15_alu_shift): Likewise. + (cortex_a15_alu_shift_reg): Likewise. + (cortex_a15_mult32): Likewise. + (cortex_a15_mult64): Likewise. + (cortex_a15_block): Likewise. + (cortex_a15_branch): Likewise. + (cortex_a15_load1): Likewise. + (cortex_a15_load3): Likewise. + (cortex_a15_store1): Likewise. + (cortex_a15_store3): Likewise. + (cortex_a15_call): Likewise. + * config/arm/cortex-a5.md (cortex_a5_r2f): Update for attribute + change. + (cortex_a5_f2r): Likewise. + * config/arm/cortex-a53.md (cortex_a53_r2f): Update for attribute + change. + (cortex_a53_f2r): Likewise. + * config/arm/cortex-a7.md + (cortex_a7_branch): Update for attribute change. + (cortex_a7_call): Likewise. + (cortex_a7_alu_imm): Likewise. + (cortex_a7_alu_reg): Likewise. + (cortex_a7_alu_shift): Likewise. + (cortex_a7_mul): Likewise. + (cortex_a7_load1): Likewise. + (cortex_a7_store1): Likewise. + (cortex_a7_load2): Likewise. + (cortex_a7_store2): Likewise. + (cortex_a7_load3): Likewise. + (cortex_a7_store3): Likewise. + (cortex_a7_load4): Likewise. + (cortex_a7_store4): Likewise. + (cortex_a7_fpalu): Likewise. + (cortex_a7_fconst): Likewise. + (cortex_a7_fpmuls): Likewise. + (cortex_a7_neon_mul): Likewise. + (cortex_a7_fpmacs): Likewise. + (cortex_a7_neon_mla: Likewise. + (cortex_a7_fpmuld: Likewise. + (cortex_a7_fpmacd: Likewise. + (cortex_a7_fpfmad: Likewise. + (cortex_a7_fdivs: Likewise. + (cortex_a7_fdivd: Likewise. + (cortex_a7_r2f: Likewise. + (cortex_a7_f2r: Likewise. + (cortex_a7_f_flags: Likewise. + (cortex_a7_f_loads: Likewise. + (cortex_a7_f_loadd: Likewise. + (cortex_a7_f_stores: Likewise. + (cortex_a7_f_stored: Likewise. + (cortex_a7_neon): Likewise. + * config/arm/cortex-a8-neon.md + (cortex_a8_neon_mrc): Update for attribute change. + (cortex_a8_neon_mrrc): Likewise. + (cortex_a8_neon_int_1): Likewise. + (cortex_a8_neon_int_2): Likewise. + (cortex_a8_neon_int_3): Likewise. + (cortex_a8_neon_int_4): Likewise. + (cortex_a8_neon_int_5): Likewise. + (cortex_a8_neon_vqneg_vqabs): Likewise. + (cortex_a8_neon_vmov): Likewise. + (cortex_a8_neon_vaba): Likewise. + (cortex_a8_neon_vaba_qqq): Likewise. + (cortex_a8_neon_vsma): Likewise. + (cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long): Likewise. + (cortex_a8_neon_mul_qqq_8_16_32_ddd_32): Likewise. + (cortex_a8_neon_mul_qdd_64_32_long_qqd_16_ddd_32_scalar_64_32_long_scalar): + Likewise. + (cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long): Likewise. + (cortex_a8_neon_mla_qqq_8_16): Likewise. + (cortex_a8_neon_mla_ddd_32_qqd_16_ddd_32_scalar_qdd_64_32_\ + long_scalar_qdd_64_32_long): Likewise. + (cortex_a8_neon_mla_qqq_32_qqd_32_scalar): Likewise. + (cortex_a8_neon_mul_ddd_16_scalar_32_16_long_scalar): Likewise. + (cortex_a8_neon_mul_qqd_32_scalar): Likewise. + (cortex_a8_neon_mla_ddd_16_scalar_qdd_32_16_long_scalar): Likewise. + (cortex_a8_neon_shift_1): Likewise. + (cortex_a8_neon_shift_2): Likewise. + (cortex_a8_neon_shift_3): Likewise. + (cortex_a8_neon_vshl_ddd): Likewise. + (cortex_a8_neon_vqshl_vrshl_vqrshl_qqq): Likewise. + (cortex_a8_neon_vsra_vrsra): Likewise. + (cortex_a8_neon_fp_vadd_ddd_vabs_dd): Likewise. + (cortex_a8_neon_fp_vadd_qqq_vabs_qq): Likewise. + (cortex_a8_neon_fp_vsum): Likewise. + (cortex_a8_neon_fp_vmul_ddd): Likewise. + (cortex_a8_neon_fp_vmul_qqd): Likewise. + (cortex_a8_neon_fp_vmla_ddd): Likewise. + (cortex_a8_neon_fp_vmla_qqq): Likewise. + (cortex_a8_neon_fp_vmla_ddd_scalar): Likewise. + (cortex_a8_neon_fp_vmla_qqq_scalar): Likewise. + (cortex_a8_neon_fp_vrecps_vrsqrts_ddd): Likewise. + (cortex_a8_neon_fp_vrecps_vrsqrts_qqq): Likewise. + (cortex_a8_neon_bp_simple): Likewise. + (cortex_a8_neon_bp_2cycle): Likewise. + (cortex_a8_neon_bp_3cycle): Likewise. + (cortex_a8_neon_ldr): Likewise. + (cortex_a8_neon_str): Likewise. + (cortex_a8_neon_vld1_1_2_regs): Likewise. + (cortex_a8_neon_vld1_3_4_regs): Likewise. + (cortex_a8_neon_vld2_2_regs_vld1_vld2_all_lanes): Likewise. + (cortex_a8_neon_vld2_4_regs): Likewise. + (cortex_a8_neon_vld3_vld4): Likewise. + (cortex_a8_neon_vst1_1_2_regs_vst2_2_regs): Likewise. + (cortex_a8_neon_vst1_3_4_regs): Likewise. + (cortex_a8_neon_vst2_4_regs_vst3_vst4): Likewise. + (cortex_a8_neon_vst3_vst4): Likewise. + (cortex_a8_neon_vld1_vld2_lane): Likewise. + (cortex_a8_neon_vld3_vld4_lane): Likewise. + (cortex_a8_neon_vst1_vst2_lane): Likewise. + (cortex_a8_neon_vst3_vst4_lane): Likewise. + (cortex_a8_neon_vld3_vld4_all_lanes): Likewise. + (cortex_a8_neon_mcr): Likewise. + (cortex_a8_neon_mcr_2_mcrr): Likewise. + * config/arm/cortex-a8.md (cortex_a8_alu): Update for attribute + change. + * config/arm/cortex-a9-neon.md (ca9_neon_mrc): Update for attribute + change. + (ca9_neon_mrrc): Likewise. + (cortex_a9_neon_int_1): Likewise. + (cortex_a9_neon_int_2): Likewise. + (cortex_a9_neon_int_3): Likewise. + (cortex_a9_neon_int_4): Likewise. + (cortex_a9_neon_int_5): Likewise. + (cortex_a9_neon_vqneg_vqabs): Likewise. + (cortex_a9_neon_vmov): Likewise. + (cortex_a9_neon_vaba): Likewise. + (cortex_a9_neon_vaba_qqq): Likewise. + (cortex_a9_neon_vsma): Likewise. + (cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long): Likewise. + (cortex_a9_neon_mul_qqq_8_16_32_ddd_32): Likewise. + (cortex_a9_neon_mul_qdd_64_32_long_qqd_16_ddd_32_scalar_64_32_long_scalar): + Likewise. + (cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long): Likewise. + (cortex_a9_neon_mla_qqq_8_16): Likewise. + (cortex_a9_neon_mla_ddd_32_qqd_16_ddd_32_scalar_qdd_64_32_\ + long_scalar_qdd_64_32_long): Likewise. + (cortex_a9_neon_mla_qqq_32_qqd_32_scalar): Likewise. + (cortex_a9_neon_mul_ddd_16_scalar_32_16_long_scalar): Likewise. + (cortex_a9_neon_mul_qqd_32_scalar): Likewise. + (cortex_a9_neon_mla_ddd_16_scalar_qdd_32_16_long_scalar): Likewise. + (cortex_a9_neon_shift_1): Likewise. + (cortex_a9_neon_shift_2): Likewise. + (cortex_a9_neon_shift_3): Likewise. + (cortex_a9_neon_vshl_ddd): Likewise. + (cortex_a9_neon_vqshl_vrshl_vqrshl_qqq): Likewise. + (cortex_a9_neon_vsra_vrsra): Likewise. + (cortex_a9_neon_fp_vadd_ddd_vabs_dd): Likewise. + (cortex_a9_neon_fp_vadd_qqq_vabs_qq): Likewise. + (cortex_a9_neon_fp_vsum): Likewise. + (cortex_a9_neon_fp_vmul_ddd): Likewise. + (cortex_a9_neon_fp_vmul_qqd): Likewise. + (cortex_a9_neon_fp_vmla_ddd): Likewise. + (cortex_a9_neon_fp_vmla_qqq): Likewise. + (cortex_a9_neon_fp_vmla_ddd_scalar): Likewise. + (cortex_a9_neon_fp_vmla_qqq_scalar): Likewise. + (cortex_a9_neon_fp_vrecps_vrsqrts_ddd): Likewise. + (cortex_a9_neon_fp_vrecps_vrsqrts_qqq): Likewise. + (cortex_a9_neon_bp_simple): Likewise. + (cortex_a9_neon_bp_2cycle): Likewise. + (cortex_a9_neon_bp_3cycle): Likewise. + (cortex_a9_neon_ldr): Likewise. + (cortex_a9_neon_str): Likewise. + (cortex_a9_neon_vld1_1_2_regs): Likewise. + (cortex_a9_neon_vld1_3_4_regs): Likewise. + (cortex_a9_neon_vld2_2_regs_vld1_vld2_all_lanes): Likewise. + (cortex_a9_neon_vld2_4_regs): Likewise. + (cortex_a9_neon_vld3_vld4): Likewise. + (cortex_a9_neon_vst1_1_2_regs_vst2_2_regs): Likewise. + (cortex_a9_neon_vst1_3_4_regs): Likewise. + (cortex_a9_neon_vst2_4_regs_vst3_vst4): Likewise. + (cortex_a9_neon_vst3_vst4): Likewise. + (cortex_a9_neon_vld1_vld2_lane): Likewise. + (cortex_a9_neon_vld3_vld4_lane): Likewise. + (cortex_a9_neon_vst1_vst2_lane): Likewise. + (cortex_a9_neon_vst3_vst4_lane): Likewise. + (cortex_a9_neon_vld3_vld4_all_lanes): Likewise. + (cortex_a9_neon_mcr): Likewise. + (cortex_a9_neon_mcr_2_mcrr): Likewise. + * config/arm/cortex-a9.md (cortex_a9_dp): Update for attribute change. + (cortex_a9_fps): Likewise. + * config/arm/cortex-m4-fpu.md (cortex_m4_vmov_2): Update for attribute + change. + (cortex_m4_fmuls): Likewise. + * config/arm/cortex-r4f.md (cortex_r4_mcr): Update for attribute + change. + (cortex_r4_mrc): Likewise. + * config/arm/iterators.md: Update comment referring to neon_type. + * config/arm/iwmmxt.md + (iwmmxt_arm_movdi): Update for attribute change. + (iwmmxt_movsi_insn): Likewise. + * config/arm/marvell-pj4.md + (pj4_vfp_to_core): Update for attribute change. + (pj4_core_to_vfp): Likewise. + * config/arm/neon-schedgen.ml (emit_insn_reservations): Update for + attribute change. + * config/arm/vfp11.md (vfp_fload): Update for attribute change. + (vfp_fstore): Likewise. + * doc/md.texi: Change references to neon_type to refer to type. + +2013-09-04 Dodji Seketeli + + * tree.h (DECL_BUILT_IN): Fix typo in comment. + +2013-09-04 David Edelsohn + + * config/rs6000/rs6000.h (ASM_OUTPUT_DEF_FROM_DECLS): Only emit + lglobl if not weak. + +2013-09-04 Easwaran Raman + + PR middle-end/57370 + * tree-ssa-reassoc.c (get_stmt_uid_with_default): New function, + (build_and_add_sum): Use it. + (appears_later_in_bb): Simplify code. + +2013-09-04 Teresa Johnson + + * dumpfile.c (dump_finish): Don't close stderr/stdout. + +2013-09-04 James Greenhalgh + + * config/aarch64/arm_neon.h (vaddvq_64): Fix return types. + +2013-09-04 Jan Hubicka + + * Makefile.in (ipa-devirt.o): Add dependency on diagnostic.h + * ipa-devirt.c: Include diganostic.h + (odr_type_d): Add types and types_set. + (hash_type_name): Work for types with vtables during LTO. + (odr_hasher::remove): Fix comment; destroy types_set. + (add_type_duplicate): New function, + (get_odr_type): Use it. + (dump_type_inheritance_graph): Dump type duplicates. + * ipa.c (symtab_remove_unreachable_nodes): Build type inheritance + graph. + * tree.c (types_same_for_odr): Give exact answers on types with + virtual tables. + +2013-09-04 Dodji Seketeli + + * tree.h (DECL_BUILT_IN, DECL_IS_BUILTIN): Add more comments + explaining their differences. + +2013-09-04 Sandeep Kumar Singh + + * config/rx/rx.h: Add option -mcpu for target variants RX100 and RX200. + +2013-09-03 Jeff Law + + * tree-ssa-threadedge.c (thread_across_edge): Record entire path + when not threading through a joiner block. Pass joiner/no joiner + state to register_jump_thread. + * tree-ssa-threadupdate.c (register_jump_thread): Get joiner/no joiner + state from argument rather than implying on path length. + Dump the entire jump thread path into debugging dump. + * tree-flow.h (register_jump_thread): Update prototype. + +2013-08-29 Xinliang David Li + + * tree-vect-data-refs.c (vect_compute_data_ref_alignment): + Remove a trivial gcc_assert. + +2013-08-29 Xinliang David Li + + * tree-vect-slp.c (destroy_bb_vec_info): Data ref cleanup. + * tree-vect-loop.c (destroy_bb_vec_info): Ditto. + * tree-vect-data-refs.c (vect_compute_data_ref_alignment): + Delay base decl alignment adjustment. + * tree-vectorizer.c (vect_destroy_datarefs): New function. + * tree-vectorizer.h: New data structure. + (set_dr_misalignment): New function. + (dr_misalignment): Ditto. + * tree-vect-stmts.c (vectorizable_store): Ensure alignment. + (vectorizable_load): Ditto. + (ensure_base_align): New function. + (vectorize_loops): Add dbg_cnt support. + (execute_vect_slp): Ditto. + * dbgcnt.def: New debug counter. + * Makefile: New dependency. + +2013-09-03 Meador Inge + + Revert: + + 2013-08-30 Meador Inge + + * tree-vrp.c (check_array_ref): Bail out on zero-length arrays. + +2013-09-03 David Edelsohn + + * config/rs6000/rs6000.h (ASM_OUTPUT_DEF_FROM_DECLS): Emit lglobl for + function descriptor. + +2013-09-03 Richard Biener + + * tree-affine.c (add_elt_to_tree): Fix association issue, + avoid useless converts and make sure to always return a + properly typed result. + +2013-09-03 Richard Biener + + PR middle-end/57656 + * fold-const.c (negate_expr_p): Fix division case. + (negate_expr): Likewise. + +2013-09-03 Richard Biener + + PR lto/58285 + * tree-streamer-out.c: Include tm.h. + * Makefile.in (tree-streamer-out.o): Depend on $(TM_H). + +2013-09-03 Jan Hubicka + + * tree-profile.c (tree_profiling): Cleanup CFG when done. + +2013-09-03 Alan Modra + + * config.gcc (powerpc*-*-linux*): Add support for little-endian + multilibs to big-endian target and vice versa. + * config/rs6000/t-linux64: Use := assignment on all vars. + (MULTILIB_EXTRA_OPTS): Remove fPIC. + (MULTILIB_OSDIRNAMES): Specify using mapping from multilib_options. + * config/rs6000/t-linux64le: New file. + * config/rs6000/t-linux64bele: New file. + * config/rs6000/t-linux64lebe: New file. + +2013-09-02 Jan Hubicka + + * ipa-inline-transform.c (inline_transform): Do not + optimize_inline_calls when not optimizing. + +2013-09-02 Jan Hubicka + + * lto-symtab.c (lto_symtab_merge_symbols): Add comments; merge + duplicated nodes for assembler names. + * symtab.c (symtab_unregister_node): Do not attempt to unlink + hard registers from assembler name hash. + +2013-09-02 Jan Hubicka + + * ipa-split.c (execute_split_functions): Split externally visible + functions called once. + +2013-09-02 Martin Jambor + + PR ipa/58106 + * ipa-prop.c (ipa_edge_duplication_hook): Always put new rdesc to the + linked list. When finding the correct duplicate, also consider also + the caller in additon to its inlined_to node. + +2013-09-02 James Greenhalgh + + * config/aarch64/aarch64-simd-builtins.def + (dup_lane_scalar): Remove. + * config/aarch64/aarch64-simd.md + (aarch64_simd_dup): Add 'w->w' alternative. + (aarch64_dup_lane): Allow for VALL. + (aarch64_dup_lane_scalar): Remove. + (aarch64_dup_lane_): New. + (aarch64_get_lane_signed): Add w->w altenative. + (aarch64_get_lane_unsigned): Likewise. + (aarch64_get_lane): Likewise. + * config/aarch64/aarch64.c (aarch64_evpc_dup): New. + (aarch64_expand_vec_perm_const_1): Use aarch64_evpc_dup. + * config/aarch64/iterators.md (VSWAP_WIDTH): New. + (VCON): Change container of V2SF. + (vswap_width_name): Likewise. + * config/aarch64/arm_neon.h + (__aarch64_vdup_lane_any): New. + (__aarch64_vdup_lane_<8,16,32,64>): Likewise. + (vdup_n_<8,16,32,64>): Convert to C implementation. + (vdup_lane_<8,16,32,64>): Likewise. + +2013-09-02 Eric Botcazou + + PR middle-end/56382 + * expr.c (emit_move_complex): Do not move complex FP values as parts if + the source or the destination is a single hard register. + +2013-09-02 Richard Biener + + PR middle-end/57511 + * tree-scalar-evolution.c (instantiate_scev_name): Allow + non-linear SCEVs. + +2013-09-02 Richard Biener + + * tree-affine.c (add_elt_to_tree): Avoid converting all pointer + arithmetic to sizetype. + +2013-09-02 Bin Cheng + + * tree-ssa-loop-ivopts.c (set_autoinc_for_original_candidates): + Find auto-increment use both before and after candidate. + +2013-09-02 Marek Polacek + + * Makefile.in (ubsan.o): Add $(TM_P_H) dependency. + +2013-09-01 Jan Hubicka + + * Makefile.in: Add ipa-profile.o + (ipa.o, ipa-devrit.o, ipa-inline-analysis.o): Adjust dependencies. + * cgraph.c (struct cgraph_propagate_frequency_data, + cgraph_propagate_frequency_1, cgraph_propagate_frequency): Move to + ipa-profile.c; replace cgraph_ by ipa_ prefix. + * cgraph.h (cgraph_propagate_frequency): Remove. + * ipa-inline-analysis.c: Include ipa-utils.h; drop duplicated cfgloop.h. + (inline_update_callee_summaries): Update. + * ipa-profile.c: New file. + * ipa-utils.h (ipa_propagate_frequency): Declare. + * ipa.c: Do not include pointer-set.h, hash-table.h, lto-streamer.h, + data-streamer.h, value-prof.h + (symtab_remove_unreachable_nodes): Update profile. + (struct histogram_entry, histogram, histogram_pool, histogram_hash, + account_time_size, cmp_counts, dump_histogram, + ipa_profile_generate_summary, ipa_profile_write_summary, + ipa_profile_read_summary, ipa_profile, gate_ipa_profile, + pass_data_ipa_profile, pass_ipa_profile, make_pass_ipa_profile): + Move to ipa-profile.c + +2013-09-01 John David Anglin + + * config/pa/pa.md: Allow "const 0" operand 1 in "scc" insns. + +2013-09-01 Jan Hubicka + + * common.opt (fdevirtualize-speculatively): New function. + * invoke.texi (fdevirtualize-speculatively): Document. + * ipa-devirt.c: Include ipa-inline.h + (likely_target_p): New function. + (ipa_devirt): New function. + (gate_ipa_devirt): New function. + (pass_data_ipa_devirt): New static var. + (pass_ipa_devirt): Likewise. + (make_pass_ipa_devirt): New function. + * opts.c (default_options): Add OPT_fdevirtualize_speculatively. + (common_handle_option): Disable devirtualization when + value range profiling is available. + * passes.def (pass_ipa_devirt): Add. + * timever.def (TV_IPA_DEVIRT): New timevar. + * tree-pass.h (make_pass_ipa_devirt): + +2013-09-01 Iain Sandoe + + * config/darwin.h (LINK_COMMAND_SPEC_A): Revise sanitizer specs to + include sanitize(undefined). + +2013-08-31 Diego Novillo + + * Makefile.in (TREE_CORE_H): Define. + (TREE_H): Use. + (GTFILES): Add tree-core.h. + * builtins.c (built_in_class_names): Use BUILT_IN_LAST to + size the array. + * tree-core.h: New file. + Move all data structures, enum, typedefs, global + declarations and constants from ... + * tree.h: ... here. + +2013-08-31 Jan Hubicka + + * bulitins.c (expand_builtin): Do not early exit for gcov + instrumented functions. + +2013-08-31 Marek Polacek + + * ubsan.c: Include tm_p.h. + +2013-08-31 Jan Hubicka + + * gimple-streamer-in.c (input_gimple_stmt): Silence parameter unused + warning. + + * cgraph.c (cgraph_get_body): Update call of lto_input_function_body. + * gimple-streamer-in.c (input_gimple_stmt): Move sanity check to ... + * tree-cfg.c (verify_gimple_label): ... here. + * ipa-utils.c: Include lto-streamer.h, ipa-inline.h + (ipa_merge_profiles): New function. + * lto-streamer-in.c (lto_read_body): Take node instead of fn_decl. + (lto_input_function_body): Likewise. + * ipa-utils.h (ipa_merge_profiles): Declare. + * lto-streamer.h (lto_input_function_body): Update prototype. + (emit_label_in_global_context_p): Remove. + * lto-symtab.c: Include ipa-utils.h + (lto_cgraph_replace_node): Use ipa_merge_profiles. + +2013-08-31 Jan Hubicka + + * cgraph.c (cgraph_speculative_call_info): Fix ref lookup + +2013-08-31 Jan Hubicka + + * basic-block.h (apply_scale): Make scale parmeter gcov_type. + +2013-08-31 Uros Bizjak + + * config/alpha/alpha.c (alpha_emit_conditional_move): Update + "cmp" RTX before signed_comparison_operator check to account + for "code" changes. + +2013-08-30 Jan Hubicka + + * ipa-prop.c (ipa_set_jf_known_type): Check that we add only records. + (detect_type_change_1): Rename to ... + (detect_type_change): ... this one; early return on non-polymorphic + types. + (detect_type_change_ssa): Add comp_type parameter; update + use of detect_type_change. + (compute_complex_assign_jump_func): Add param_type parameter; + update use of detect_type_change_ssa. + (compute_complex_ancestor_jump_func): Likewise. + (ipa_get_callee_param_type): New function. + (ipa_compute_jump_functions_for_edge): Compute parameter type; + update calls to the jump function computation functions. + +2013-08-30 Teresa Johnson + Steven Bosscher + + * cfgrtl.c (fixup_new_cold_bb): New routine. + (commit_edge_insertions): Invoke fixup_partitions. + (find_partition_fixes): New routine. + (fixup_partitions): Ditto. + (verify_hot_cold_block_grouping): Update comments. + (rtl_verify_edges): Invoke find_partition_fixes. + (rtl_verify_bb_pointers): Update comments. + (rtl_verify_bb_layout): Ditto. + * basic-block.h (probably_never_executed_edge_p): Declare. + (fixup_partitions): Ditto. + * cfgcleanup.c (try_optimize_cfg): Invoke fixup_partitions. + * bb-reorder.c (sanitize_hot_paths): New function. + (find_rarely_executed_basic_blocks_and_crossing_edges): Invoke + sanitize_hot_paths. + * predict.c (probably_never_executed_edge_p): New routine. + * cfg.c (check_bb_profile): Add partition insanity warnings. + +2013-08-30 Meador Inge + + * tree-vrp.c (check_array_ref): Bail out on zero-length arrays. + +2013-08-30 Marek Polacek + + * Makefile.in (ubsan.o): Add. + (c-family/c-ubsan.o): Add. + (builtins.o): Add ubsan.h dependency. + * ubsan.h: New file. + * ubsan.c: New file. + * common.opt: Add -fsanitize=undefined option. + (flag_sanitize): Add variable. + (fsanitize=): Add option. Add Driver. + (fsanitize=thread): Remove option. + (fsanitize=address): Likewise. + (static-libubsan): New option. + * doc/invoke.texi: Document the new flag and -static-libubsan. + * sanitizer.def (DEF_SANITIZER_BUILTIN): Define. + (BUILT_IN_UBSAN_HANDLE_BUILTIN_UNREACHABLE): Define. + * builtin-attrs.def (ATTR_COLD): Define. + (ATTR_COLD_NOTHROW_LEAF_LIST): Define. + * builtins.def (BUILT_IN_UBSAN_HANDLE_DIVREM_OVERFLOW, + BUILT_IN_UBSAN_HANDLE_SHIFT_OUT_OF_BOUNDS): Define. + * flag-types.h (sanitize_code): New enum. + * opts.c (common_handle_option): Parse command line arguments + of -fsanitize=. Add -fsanitize=unreachable option. + * varasm.c (get_variable_section): Adjust. + (assemble_noswitch_variable): Likewise. + (assemble_variable): Likewise. + (output_constant_def_contents): Likewise. + (categorize_decl_for_section): Likewise. + (place_block_symbol): Likewise. + (output_object_block): Likewise. + * builtins.def: Likewise. + * toplev.c (compile_file): Likewise. + (process_options): Likewise. + * cppbuiltin.c: Likewise. + * tsan.c (tsan_pass): Likewise. + (tsan_gate): Likewise. + (tsan_gate_O0): Likewise. + * cfgexpand.c (partition_stack_vars): Likewise. + (expand_stack_vars): Likewise. + (defer_stack_allocation): Likewise. + (expand_used_vars): Likewise. + * cfgcleanup.c (old_insns_match_p): Likewise. + * asan.c (asan_finish_file): Likewise. + (asan_instrument): Likewise. + (gate_asan): Likewise. + (initialize_sanitizer_builtins): Build BT_FN_VOID_PTR_PTR_PTR. + (ATTR_COLD_NOTHROW_LEAF_LIST): Define. + (asan_global_struct): Use pointer_sized_int_node instead + calling build_nonstandard_integer_type. + (initialize_sanitizer_builtins): Likewise. + (asan_finish_file): Likewise. + * gcc.c: Document %{%:function(args):X}. + (static_spec_functions): Add sanitize. + (handle_spec_function): Add retval_nonnull argument and if non-NULL, + store funcval != NULL there. + (do_spec_1): Adjust handle_spec_function caller. + (handle_braces): Allow %:function(args) as condition. + (sanitize_spec_function): New function. + (ADD_STATIC_LIBUBSAN_LIBS): Define. + (LIBUBSAN_SPEC): Likewise. + (LIBUBSAN_EARLY_SPEC): Likewise. + (SANITIZER_SPEC): Handle libubsan. + (SANITIZER_EARLY_SPEC): Likewise. + * config/darwin.h (LINK_COMMAND_SPEC_A): Use %:sanitize(address) + instead of fsanitize=address. + * config/arm/linux-eabi.h (ASAN_CC1_SPEC): Use %:sanitize(address) + instead of fsanitize=address*. + * builtins.c: Include ubsan.h. + (fold_builtin_0): Instrument __builtin_unreachable. + * config/rs6000/rs6000.h (FRAME_GROWS_DOWNWARD): Use flag_sanitize + instead of flag_asan. + * tree.h (enum tree_index): Add TI_POINTER_SIZED_TYPE. + (pointer_sized_int_node): Define. + * tree.c (build_common_tree_nodes): Initialize pointer_sized_int_node. + +2013-08-30 Mike Stump + + * doc/install.texi (Prerequisites): Note regression in Tcl 8.6 + with RE patterns. + +2013-08-29 Jan Hubicka + + * cgraph.c (cgraph_function_body_availability): Handle weakref + correctly. + * passes.def: Remove pass_fixup_cfg. + * ipa-inline.c (ipa_inline): When not optimizing, do not inline; + track when we need to remove functions. + (gate_ipa_inline): Execute inlining always; add comment why. + (pass_data_ipa_inline): Remove TODO_remove_functions. + * ipa-inline-analysis.c (inline_generate_summary): When not optimizing + do not produce summaries. + * symtab.c (change_decl_assembler_name): Handle renaming of weakrefs. + (symtab_nonoverwritable_alias): Assert we are not called on weakref. + * varpool.c (cgraph_variable_initializer_availability): Fix weakrefs, + constant pool and vtable. + +2013-08-30 Tejas Belagod + + * config/aarch64/arm_neon.h (__AARCH64_UINT64_C, __AARCH64_INT64_C): + New arm_neon.h's internal macros to specify 64-bit constants. + Avoid using stdint.h's macros. + +2013-08-30 Joern Rennecke + + * recog.c (verify_changes): Verify that changes[i].old is non-zero + before applying REG_P. + +2013-08-30 Jakub Jelinek + + PR tree-optimization/58277 + * tree-ssa-strlen.c (strlen_enter_block): If do_invalidate gave up + after seeing too many stmts with vdef in between dombb and current + bb, invalidate everything. + +2013-08-30 Richard Biener + + * fold-const.c (fold_single_bit_test): Fix overflow test. + +2013-08-30 Eric Botcazou + + * function.c (assign_parm_setup_reg): For a parameter passed by pointer + and which can live in a register, always retrieve the value on entry. + * var-tracking.c (add_stores): Treat the copy on entry for a parameter + passed by invisible reference specially. + (emit_notes_in_bb) : Emit notes before the instruction. + (vt_add_function_parameter): Correctly deal with a parameter passed by + invisible reference. + +2013-08-30 Jan Hubicka + + * tree.c (set_call_expr_flags): Fix handling of TM_PURE. + +2013-08-30 Richard Biener + + PR tree-optimization/58228 + * tree-vect-data-refs.c (vect_analyze_data_ref_access): Do not + allow invariant loads in nested loop vectorization. + +2013-08-30 Richard Biener + + PR tree-optimization/58223 + * tree-loop-distribution.c (has_anti_dependence): Rename to ... + (has_anti_or_output_dependence): ... this and adjust to also + look for output dependences. + (mark_nodes_having_upstream_mem_writes): Adjust. + (rdg_flag_uses): Likewise. + +2013-08-30 Richard Biener + + PR tree-optimization/58010 + * tree-vect-loop.c (vect_create_epilog_for_reduction): Remove + assert that we have a loop-closed PHI. + +2013-08-29 Jan Hubicka + + * lto-symtab.c (lto_cgraph_replace_node): Free decl_in_state. + * cgraph.c (cgraph_release_function_body): Free decl_in_state. + * lto-section-in.c (lto_free_function_in_decl_state): New function. + (lto_free_function_in_decl_state_for_node): New function. + +2013-08-29 Xinliang David Li + + * loop-unroll.c (report_unroll_peel): Minor message change. + * tree-vect-loop-manip.c (vect_do_peeling_for_alignment): + Emit alignment peeling message with default -fopt-info. + (vect_loop_versioning): Emit loop version info message. + * tree-vectorizer.c (vectorize_loops): Minor message change. + (execute_vect_slp): Ditto. + +2013-08-29 Eric Botcazou + + * cgraphclones.c (cgraph_create_virtual_clone): Compute the DECL_NAME + of the clone from the DECL_NAME of the original function. + +2013-08-29 Oleg Endo + + * passes.c (register_pass): Add overload. + * tree-pass.h (register_pass): Forward declare it. Add comment. + +2013-08-29 Jan Hubicka + + * lto-streamer-out.c (hash_tree): Stream DECL_FINAL_P, + DECL_CXX_CONSTRUCTOR_P, DECL_CXX_DESTRUCTOR_P and TYPE_FINAL_P. + * lto-streamer-in.c (unpack_ts_decl_with_vis_value_fields): Stream + DECL_FINAL_P, DECL_CXX_CONSTRUCTOR_P and DECL_CXX_DESTRUCTOR_P. + (unpack_ts_type_common_value_fields): Stream TYPE_FINAL_P. + * tree-streamer-out.c (pack_ts_decl_with_vis_value_fields): + Add DECL_FINAL_P, DECL_CXX_CONSTRUCTOR_P and DECL_CXX_DESTRUCTOR_P. + (pack_ts_type_common_value_fields): Add TYPE_FINAL_P. + +2013-08-29 Teresa Johnson + + * dumpfile.c (dump_loc): Output column number. + * dumpfile.h (OPTGROUP_OTHER): Add and enable under OPTGROUP_ALL. + * doc/invoke.texi: Document optall -fopt-info flag. + * profile.c (read_profile_edge_counts): Use new dump framework. + (compute_branch_probabilities): Ditto. + * passes.c (pass_manager::register_one_dump_file): Use OPTGROUP_OTHER + when pass not in any opt group. + * pass_manager.h (pass_manager::get_pass_profile): New method. + * value-prof.c (check_counter): Use new dump framework. + (check_ic_target): Ditto. + * coverage.c (get_coverage_counts): Ditto. + (coverage_init): Setup new dump framework. + +2013-08-29 Richard Biener + + PR tree-optimization/58246 + * tree-ssa-dce.c (mark_aliased_reaching_defs_necessary_1): Properly + handle the dominance check inside a basic-block. + +2013-08-29 Richard Biener + + PR middle-end/57287 + * tree-ssa-copy.c (may_propagate_copy): Allow propagating + of default defs that appear in abnormal PHI nodes. + +2013-08-29 Richard Biener + + PR tree-optimization/57685 + * tree-vrp.c (register_edge_assert_for_1): Recurse only for + single-use operands to avoid exponential complexity. + +2013-08-28 Dehao Chen + + * ipa-inline.c (edge_badness): Fix integer underflow. + +2013-08-28 Uros Bizjak + + * gtm-builtins.def (_ITM_free): Declare leaf. + +2013-08-28 Jakub Jelinek + + PR target/58067 + * config/i386/i386.md (*tls_global_dynamic_64_largepic): New insn. + (*tls_local_dynamic_base_64_largepic): Likewise. + (tls_global_dynamic_64_, tls_local_dynamic_base_64_): + Remove predicate from call operand. + * config/i386/i386.c (ix86_tls_get_addr): For -mcmodel=large -fpic + return sum of pic_offset_table_rtx and UNSPEC_PLTOFF of the symbol. + +2013-08-28 Jeff Law + + * tree-ssa-threadedge.c (thread_around_empty_block): Remove + checks for the number of predecessors and successors allowed. + * tree-ssa-threadupdate.c (mark_threaded_blocks): Ignore requests + which require copying a joiner block if there is a request which + is a subpath that requires no joiner block copying. + +2013-08-28 Jan Hubicka + + * lto-streamer-out.c (DFS_write_tree_body): Drop + BINFO_INHERITANCE_CHAIN, BINFO_SUBVTT_INDEX and BINFO_VPTR_INDEX. + (hash_tree): Do not hash DECL_DEFER_OUTPUT, BINFO_INHERITANCE_CHAIN, + BINFO_SUBVTT_INDEX, BINFO_VPTR_INDEX, DECL_IN_TEXT_SECTION. + * tree-streamer-in.c (unpack_ts_decl_common_value_fields): + Do not read DECL_ERROR_ISSUED. + (unpack_ts_decl_with_vis_value_fields): Do not read + DECL_DEFER_OUTPUT. + (lto_input_ts_binfo_tree_pointers): Do not read + BINFO_INHERITANCE_CHAIN, BINFO_SUBVTT_INDEX, BINFO_VPTR_INDEX + * tree-streamer-out.c (pack_ts_decl_common_value_fields): Do not + write DECL_ERROR_ISSUED.. + (pack_ts_decl_with_vis_value_fields): Do not write + DECL_DEFER_OUTPUT. + (write_ts_binfo_tree_pointers): Do not read BINFO_INHERITANCE_CHAIN, + BINFO_SUBVTT_INDEX, BINFO_VPTR_INDEX. + * print-tree.c (print_node): Do not print DECL_ERROR_ISSUED. + * tree.h (tree_decl_common): Update comment. + (DECL_ERROR_ISSUED): Remove. + +2013-08-28 Jakub Jelinek + + PR middle-end/58257 + * omp-low.c (copy_var_decl): Copy over TREE_NO_WARNING flag. + +2013-08-28 Jan Hubicka + + * builtins.def (free): Declare leaf. + +2013-08-27 David Malcolm + + * gdbhooks.py: New. + * configure.ac (gdbinit.in): Add import of gcc/gdbhooks.py. + * configure: Regenerate. + +2013-08-27 Martin Jambor + + * ipa-prop.h (ipa_pass_through_data): New field type_preserved. + (ipa_ancestor_jf_data): Likewise. + (ipa_get_jf_pass_through_agg_preserved): Fix comment typo. + (ipa_get_jf_pass_through_type_preserved): New function. + (ipa_get_jf_ancestor_agg_preserved): Fix comment typo. + (ipa_get_jf_ancestor_type_preserved): New function. + * ipa-cp.c (ipa_get_jf_pass_through_result): Honor type_preserved flag. + (ipa_get_jf_ancestor_result): Likewise. + (propagate_vals_accross_pass_through): Use + ipa_get_jf_pass_through_result to do all the value mappings. + * ipa-prop.c (ipa_print_node_jump_functions_for_edge): Dump the + type_preserved flag. + (ipa_set_jf_cst_copy): New function. + (ipa_set_jf_simple_pass_through): Set the type_preserved flag. + (ipa_set_jf_arith_pass_through): Likewise. + (ipa_set_ancestor_jf): Likewise. + (compute_complex_assign_jump_func): Set type_preserved instead of + punting. + (ipa_compute_jump_functions_for_edge): Likewise. + (combine_known_type_and_ancestor_jfs): Honor type_preserved. + (update_jump_functions_after_inlining): Update type_preserved. + Explicitely create jump functions when combining one with pass_through. + (ipa_write_jump_function): Stream the type_preserved flags. + (ipa_read_jump_function): Likewise. + +2013-08-27 Jakub Jelinek + Aldy Hernandez + + * Makefile.in (omp-low.o): Depend on $(TARGET_H). + * cfgloop.h (struct loop): Add safelen, force_vect, simduid. + * function.h (struct function): Add has_force_vect_loops and + has_simduid_loops. + * gimple-pretty-print.c (dump_gimple_omp_for): Handle GF_OMP_FOR_KIND*. + * gimple.c (gimple_build_omp_critical): Add KIND argument and + handle it. + * gimple.def: Update CLAUSES comments. + * gimple.h (enum gf_mask): Add GF_OMP_FOR_KIND_{FOR,SIMD}. + (gimple_build_omp_for): Add argument to prototype. + (gimple_omp_for_kind): New. + (gimple_omp_for_set_kind): New. + * gimplify.c (enum gimplify_omp_var_data): Add GOVD_LINEAR to + GOVD_DATA_SHARE_CLASS. + (enum omp_region_type): Add ORT_SIMD. + (gimple_add_tmp_var): Handle ORT_SIMD. + (gimplify_var_or_parm_decl): Same. + (is_gimple_stmt): Same. + (omp_firstprivatize_variable): Same. + (omp_add_variable): Only use splay_tree_insert if lookup failed. + (omp_notice_variable): Handle ORT_SIMD. + (omp_is_private): Add SIMD argument and handle it as well as ORT_SIMD. + (omp_check_private): Handle ORT_SIMD. + (gimplify_scan_omp_clauses): Handle OMP_CLAUSE_LINEAR and + OMP_CLAUSE_SAFELEN. + (gimplify_adjust_omp_clauses_1): Handle GOVD_LINEAR. + Handle OMP_CLAUSE_LASTPRIVATE. + (gimplify_adjust_omp_clauses): Handle OMP_CLAUSE_LINEAR and + OMP_CLAUSE_SAFELEN. + (gimplify_omp_for): Handle OMP_SIMD and OMP_CLAUSE_LINEAR. + (gimplify_expr): Handle OMP_SIMD. + * internal-fn.c (expand_GOMP_SIMD_LANE): New. + (expand_GOMP_SIMD_VF): New. + (expand_GOMP_SIMD_LAST_LANE): New. + * internal-fn.def (GOMP_SIMD_LANE): New. + (GOMP_SIMD_VF): New. + (GOMP_SIMD_LAST_LANE): New. + * omp-low.c: Include target.h. + (extract_omp_for_data): Handle OMP_SIMD, OMP_CLAUSE_LINEAR, + OMP_CLAUSE_SAFELEN. + (check_omp_nesting_restrictions): Same. + (omp_max_vf): New. + (lower_rec_simd_input_clauses): New. + (lower_rec_input_clauses): Handle OMP_SIMD, GF_OMP_FOR_KIND_SIMD, + OMP_CLAUSE_LINEAR. + (lower_lastprivate_clauses): Handle OMP_CLAUSE_LINEAR, + GF_OMP_FOR_KIND_SIMD, OMP_SIMD. + (expand_omp_build_assign): New. + (expand_omp_for_init_counts): New. + (expand_omp_for_init_vars): New. + (extract_omp_for_update_vars): New. + (expand_omp_for_generic): Use expand_omp_for_{init,update}_vars + and rewrite accordingly. + (expand_omp_simd): New. + (expand_omp_for): Use expand_omp_simd. + (lower_omp_for_lastprivate): Unshare vinit when appropriate. + (lower_omp_for): Do not lower the body. + * tree-data-ref (get_references_in_stmt): Allow IFN_GOMP_SIMD_LANE + in their own loops. + * tree-flow.h (find_omp_clause): Remove prototype. + * tree-if-conv.c (main_tree_if_conversion): Run if doing if conversion, + forcing vectorization of the loop, or if flag_tree_vectorize. + (gate_tree_if_conversion): Similarly. + * tree-inline.c (remap_gimple_stmt): Pass for kind argument to + gimple_build_omp_for. + (copy_cfg_body): set has_force_vect_loops and has_simduid_loops. + * tree-parloops (create_parallel_loop): Pass kind argument to + gimple_build_omp_for. + * tree-pretty-print.c (dump_omp_clause): Add cases for + OMP_CLAUSE_UNIFORM, OMP_CLAUSE_LINEAR, OMP_CLAUSE_SAFELEN, + OMP_CLAUSE__SIMDUID_. + (dump_generic_node): Handle OMP_SIMD. + * tree-ssa-ccp.c (likely_value): Handle IFN_GOMP_SIMD*. + * tree-ssa-loop-ivcanon.c (tree_unroll_loops_completely_1): Do not + unroll OMP_SIMD loops here. + * tree-ssa-loop.c (gate_tree_vectorize): Run if has_force_vect_loops. + * tree-vect-data-refs.c (vect_analyze_data_ref_dependence): Handle + loop->safelen. + (vect_analyze_data_refs): Handle simd loops. + * tree-vect-loop.c (vectorizable_live_operation): Handle + IFN_GOMP_SIMD*. + * tree-vect-stmts.c (vectorizable_call): Handle IFN_GOMP_SIMD_LANE. + (vectorizable_store): Handle STMT_VINFO_SIMD_LANE_ACCESS_P. + (vectorizable_load): Same. + * tree-vectorizer.c: Include hash-table.h and tree-ssa-propagate.h. + (struct simduid_to_vf): New. + (simduid_to_vf::hash): New. + (simduid_to-vf::equal): New. + (struct simd_array_to_simduid): New. + (simd_array_to_simduid::hash): New. + (simd_array_to_simduid::equal): New. + (adjust_simduid_builtins): New. + (struct note_simd_array_uses_struct): New. + (note_simd_array_uses_cb): New. + (note_simd_array_uses): New. + (vectorize_loops): Handle simd hints and adjust simd builtins + accordingly. + * tree-vectorizer.h (struct _stmt_vec_info): Add + simd_lane_access_p field. + (STMT_VINFO_SIMD_LANE_ACCESS_P): New macro. + * tree.c (omp_clause_num_ops): Add entries for OMP_CLAUSE_LINEAR, + OMP_CLAUSE_SAFELEN, OMP_CLAUSE__SIMDUID_, OMP_CLAUSE_UNIFORM. + (omp_clause_code_name): Same. + (walk_tree_1): Handle OMP_CLAUSE_UNIFORM, OMP_CLAUSE_SAFELEN, + OMP_CLAUSE__SIMDUID_, OMP_CLAUSE_LINEAR. + * tree.def (OMP_SIMD): New entry. + * tree.h (enum omp_clause_code): Add entries for OMP_CLAUSE_LINEAR, + OMP_CLAUSE_UNIFORM, OMP_CLAUSE_SAFELEN, OMP_CLAUSE__SIMDUID_. + (OMP_CLAUSE_DECL): Adjust range for new clauses. + (OMP_CLAUSE_LINEAR_NO_COPYIN): New. + (OMP_CLAUSE_LINEAR_NO_COPYOUT): New. + (OMP_CLAUSE_LINEAR_STEP): New. + (OMP_CLAUSE_SAFELEN_EXPR): New. + (OMP_CLAUSE__SIMDUID__DECL): New. + (find_omp_clause): New prototype. + +2013-08-27 H.J. Lu + + * config/i386/driver-i386.c (host_detect_local_cpu): Update + Haswell processor detection. + +2013-08-27 Christian Widmer + + PR target/57927 + * config/i386/driver-i386.c (host_detect_local_cpu): Add detection + of Ivy Bridge and Haswell processors. Assume core-avx2 for unknown + AVX2 capable processors. + +2013-08-27 Tejas Belagod + + * config/aarch64/arm_neon.h: Replace all inline asm implementations + of vget_low_* with implementations in terms of other intrinsics. + +2013-08-27 Marc Glisse + + PR middle-end/57219 + * doc/extend.texi (__builtin_isinf_sign): Restrict the return + values to -1, 0 and 1. + +2013-08-27 Vidya Praveen + + * config/aarch64/aarch64.md (unspec): Add UNSPEC_SISD_SSHL, + UNSPEC_SISD_USHL, UNSPEC_USHL_2S, UNSPEC_SSHL_2S, UNSPEC_SISD_NEG. + (3_insn): Remove. + (aarch64_ashl_sisd_or_int_3): New Pattern. + (aarch64_lshr_sisd_or_int_3): Likewise. + (aarch64_ashr_sisd_or_int_3): Likewise. + (define_split for aarch64_lshr_sisd_or_int_di3): Likewise. + (define_split for aarch64_lshr_sisd_or_int_si3): Likewise. + (define_split for aarch64_ashr_sisd_or_int_di3): Likewise. + (define_split for aarch64_ashr_sisd_or_int_si3): Likewise. + (aarch64_sisd_ushl, aarch64_sisd_sshl): Likewise. + (aarch64_ushl_2s, aarch64_sshl_2s, aarch64_sisd_neg_qi): Likewise. + (ror3_insn): Likewise. + * config/aarch64/predicates.md (aarch64_simd_register): New. + +2013-08-27 Richard Biener + + PR tree-optimization/57521 + * tree-if-conv.c (if_convertible_bb_p): Verify that at least + one edge is non-critical. + (find_phi_replacement_condition): Make sure to use a non-critical + edge. Cleanup and remove old bug workarounds. + (bb_postdominates_preds): Remove. + (if_convertible_loop_p_1): Do not compute post-dominators. + (combine_blocks): Do not free post-dominators. + (main_tree_if_conversion): Likewise. + (pass_data_if_conversion): Add TODO_verify_ssa. + +2013-08-27 DJ Delorie + + * config/i386/djgpp.h (ASM_DECLARE_FUNCTION_NAME): New. + +2013-08-27 Yufeng Zhang + + * function.c (assign_parm_find_data_types): Set passed_mode and + nominal_mode to the TYPE_MODE of nominal_type for the built + pointer type in case of the struct-pass-by-reference. + +2013-08-26 Joern Rennecke + + * config/avr/avr-stdint.h (INT16_TYPE): Change default to "int". + (UINT16_TYPE): Change default to "unsigned int". + + * config/avr/avr.opt (mfract-convert-truncate): New option. + * config/avr/avr.c (avr_out_fract): Unless TARGET_FRACT_CONV_TRUNC + is set, round negative fractional integers according to n1169 + when converting to integer types. + +2013-08-26 Jan Hubicka + + * cgraph.c (cgraph_propagate_frequency): Do not assume that virtual + methods can not be called indirectly when their address is not taken. + +2013-08-26 Jan Hubicka + + * gimple-fold.c (gimple_get_virt_method_for_binfo): Use + ctor_for_folding. + +2013-08-26 Jan Hubicka + + * ipa.c (comdat_can_be_unshared_p_1): C++ constructors and destructors + can be unshared. + +2013-08-26 Joern Rennecke + + * reload.c (find_valid_class): Allow classes that do not include + FIRST_PSEUDO_REGISTER - 1. + +2013-08-26 Jan Hubicka + + * cgraph.c (cgraph_redirect_edge_call_stmt_to_callee): Fix formatting; + fix edge count/frequency when speculation failed; fix type check + for the direct call. + +2013-08-26 Jan Hubicka + + * ipa-prop.c (ipa_print_node_params): Do not ICE during WPA. + +2013-08-26 Jan Hubicka + + * ipa-inline-transform.c (inline_transform): Be ready for basic block + to be changed by edge redirection. + +2013-08-26 Jan Hubicka + + * cgraph.c (cgraph_speculative_call_info): Fix parameter order and + formating; add sanity check. + (cgraph_resolve_speculation): Add FIXME about scaling profiles. + (cgraph_redirect_edge_call_stmt_to_callee): Fix ICE in debug dump. + * ipa-inline.c (heap_edge_removal_hook): Reset node growth cache. + (resolve_noninline_speculation): Update callee keys, too. + +2013-08-26 Jan Hubicka + + * tree.h (tree_decl_with_vis): Add cxx_constructor, cxx_destructor. + (DECL_CXX_CONSTRUCTOR_P, DECL_CXX_DESTRUCTOR_P): New macros. + +2013-08-26 Joern Rennecke + + * config/i386/i386.c (x86_64_elf_select_section): Put ATTRIBUTE_UNUSED + into proper place. + +2013-08-26 Uros Bizjak + + * config/i386/i386.c (ix86_debug_options): Remove prototype. + (x86_64_elf_select_section): Ditto. + (ix86_handle_tm_regparm_attribute): Remove ATTRIBUTE_UNUSED on used + arguments. + (ix86_pass_by_reference): Ditto. + (output_set_got): Ditto. + (ix86_unary_operator_ok): Ditto. + (ix86_expand_builtin): Ditto. + +2013-08-23 Jan Hubicka + + * cgraph.c (cgraph_turn_edge_to_speculative): Fix debug output. + +2013-08-23 Jan Hubicka + + * tree.h (TYPE_FINAL_P, DECL_FINAL_P): New macros. + (tree_decl_with_vis): Add FINAL field. + +2013-08-23 Jeff Law + + * tree-ssa-pre.c (do_regular_insertion): Include the expression in + the debugging dump when the expression is fully redundant. + +2013-08-23 Gabriel Dos Reis + + * diagnostic.c (diagnostic_set_caret_max_width): Use pp_buffer. + * gimple-pretty-print.c (gimple_dump_bb_buff): Likewise. + * pretty-print.c (pp_formatted_text_data): Likewise. + (pp_write_text_to_stream): Likewise. + (pp_write_text_as_dot_label_to_stream): Likewise. + (pp_append_r): Likewise. + (pp_format): Likewise. + (pp_flush): Likewise. + (pp_clear_output_area): Likewise. + (pp_append_text): Likewise. + (pp_formatted_text): Likewise. + (pp_remaining_character_count_for_line): Likewise. + (pp_newline): Likewise. + (pp_character): Likewise. + (output_buffer::~output_buffer): Define. + (pretty_printer::~pretty_printer): Destruct output buffer. + * pretty-print.h (output_buffer::~output_buffer): Declare. + (pretty_printer::~pretty_printer): Declare virtual. + +2013-08-24 Marc Glisse + + PR other/57324 + * hwint.h (HOST_WIDE_INT_UC, HOST_WIDE_INT_1U, HOST_WIDE_INT_M1, + HOST_WIDE_INT_M1U): New macros. + * fold-const.c (sign_bit_p, build_range_check, fold_unary_loc, + fold_binary_loc, fold_ternary_loc): Use the new macros. Use an + unsigned -1 for lshift. + * cse.c (cse_insn): Likewise. + * double-int.c (rshift_double, lshift_double): Likewise. + * builtins.c (fold_builtin_bitop): Likewise. + * combine.c (force_to_mode): Likewise. + * tree.c (integer_pow2p, tree_log2, tree_floor_log2): Likewise. + * simplify-rtx.c (simplify_const_unary_operation, + simplify_const_binary_operation): Likewise. + * tree-stdarg.c (va_list_counter_bump, va_list_ptr_read, + check_va_list_escapes): Likewise. + * rtlanal.c (nonzero_bits1): Likewise. + * expmed.c (expand_smod_pow2): Likewise. + * tree-ssa-structalias.c (UNKNOWN_OFFSET): Use HOST_WIDE_INT_MIN. + +2013-08-23 Jan Hubicka + + * cgraph.c (cgraph_turn_edge_to_speculative): Mark target node + as having address taken. + +2013-08-23 Jan Hubicka + + * ipa-utils.h (method_class_type): Declare. + * ipa-devirt.c (method_class_type): Export. + + * cgraphunit.c (analyze_functions): Do basic devirtualization; + do not walk base classes of anonymous types. + +2013-08-23 Kaz Kojima + + PR rtl-optimization/58220 + PR regression/58221 + * final.c (reemit_insn_block_notes): Use NEXT_INSN to + handle SEQUENCE insns properly. + +2013-08-23 Gabriel Dos Reis + + * pretty-print.h (pp_newline_and_flush): Declare. Remove macro + definition. + (pp_newline_and_indent): Likewise. + (pp_separate_with): Likewise. + * pretty-print.c (pp_newline_and_flush): Define. + (pp_newline_and_indent): Likewise. + (pp_separate_with): Likewise. + +2013-08-23 Jakub Jelinek + + PR target/58218 + * config/i386/x86-64.h (TARGET_SECTION_TYPE_FLAGS): Define. + * config/i386/i386.c (x86_64_elf_section_type_flags): New function. + +2013-08-23 Kirill Yukhin + + * gcc/config/i386/predicates.md (ext_sse_reg_operand): New. + * gcc/config/i386/i386.md (*movti_internal): Use + predicate to determine if EVEX is needed. + (*movsi_internal): Ditto. + (*movdf_internal): Ditto. + (*movsf_internal): Ditto. + * gcc/config/i386/mmx.md (*mov_internal): Ditto. + +2013-08-23 Jakub Jelinek + + PR tree-optimization/58209 + * tree-tailcall.c (process_assignment): Handle POINTER_PLUS_EXPR. + (find_tail_calls): Give up for pointer result types if m is non-NULL. + (adjust_return_value_with_ops): For PLUS_EXPR and pointer result type + emit POINTER_PLUS_EXPR. + (create_tailcall_accumulator): For pointer result type accumulate in + sizetype type. + +2013-08-22 Paolo Carlini + + * configure.ac: Add backslashes missing from the last change. + * configure: Regenerate. + +2013-08-22 Jan Hubicka + + * ipa.c (function_and_variable_visibility): First remember function + was global and then make it local. + +2013-08-22 Julian Brown + + * configure.ac: Add aarch64 to list of arches which use "nop" in + debug_line test. + * configure: Regenerate. + +2013-08-22 Andreas Krebbel + + * config/s390/linux.h (TARGET_LIBC_HAS_FUNCTION): Define as + gnu_libc_has_function. + * config/s390/tpf.h: Likewise. + +2013-08-22 Jan Hubicka + + * timevar.c (validate_phases): Add cast. + +2013-08-22 Jan Hubicka + + * timevar.c (validate_phases): Use size_t for memory. + * timevar.h (struct timevar_time_def): Use size_t for ggc_mem. + +2013-08-22 Gabriel Dos Reis + + * pretty-print.h (output_buffer::output_buffer): Declare. + (pretty_printer::pretty_printer): Likewise. + (pp_construct): Remove. + * pretty-print.c (output_buffer::output_buffer): Define. + (pretty_printer::pretty_printer): Rename from pp_construct. Simplify. + * gimple-pretty-print.c (print_gimple_stmt): Do not call pp_construct. + (print_gimple_expr): Likewise. + (print_gimple_seq): Likewise. + (gimple_dump_bb): Likewise. + * sched-vis.c (dump_value_slim): Likewise. + (dump_insn_slim): Likewise. + (dump_rtl_slim): Likewise. + (str_pattern_slim): Likewise. + * tree-mudflap.c (mf_varname_tree): Likewise. + * graph.c (print_graph_cfg): Likewise. + (start_graph_dump): Likewise. + * tree-pretty-print.c (maybe_init_pretty_print): Likewise. Use + placement-new. + * diagnostic.c (diagnostic_initialize): Simplify early diagnostic + pretty printer initialization. + * coretypes.h (diagnostic_context): Remove superflous type alias + declaration. + (pretty_printer): Likewise. Declare directly as a class. + (pretty_print_info): Remove declaration as class. + * asan.c (asan_emit_stack_protection): Remove call to pp_construct + and pp_clear_output_area. + (asan_add_global): Likewise. + +2013-08-22 Jan Hubicka + + * cgraphunit.c (analyze_functions) Use update_type_inheritance_graph. + * ipa-utils.h (update_type_inheritance_graph): Declare. + (possible_polymorphic_call_target_p): Declare. + (possible_polymorphic_call_target_p): New. + * ipa-devirt.c: Update toplevel comments. + (cached_polymorphic_call_targets): Move up. + (odr_type_d): Move ID down. + (polymorphic_type_binfo_p): Update comment. + (odr_hasher::remove): Likewise; + (get_odr_type): Set anonymous_namespace. + (dump_odr_type): Dump it. + (dump_type_inheritance_graph): Do not ICE when there are no ODR types. + (maybe_record_node): Record node in cached_polymorphic_call_targets. + (record_binfo): Add comment. + (free_polymorphic_call_targets_hash): Do not ICE when cache is not + built. + (devirt_node_removal_hook): Do not iCE when cache is freed. + (possible_polymorphic_call_target_p): New predicate. + (update_type_inheritance_graph): New function. + +2013-08-22 Alexander Ivchenko + Maxim Kuznetsov + Sergey Lega + Anna Tikhonova + Ilya Tocar + Andrey Turetskiy + Ilya Verbin + Kirill Yukhin + Michael Zolotukhin + + * common/config/i386/i386-common.c (OPTION_MASK_ISA_AVX512F_SET): New. + (OPTION_MASK_ISA_AVX512CD_SET): Ditto. + (OPTION_MASK_ISA_AVX512PF_SET): Ditto. + (OPTION_MASK_ISA_AVX512ER_SET): Ditto. + (OPTION_MASK_ISA_AVX2_UNSET): Update. + (OPTION_MASK_ISA_AVX512F_UNSET): New. + (OPTION_MASK_ISA_AVX512CD_UNSET): Ditto. + (OPTION_MASK_ISA_AVX512PF_UNSET): Ditto. + (OPTION_MASK_ISA_AVX512ER_UNSET): Ditto. + (ix86_handle_option): Handle OPT_mavx512f, OPT_mavx512cd, + OPT_mavx512pf, OPT_mavx512er cases. + * config/i386/constraints.md (v): New constraint. + (Yi, Yj): Replace SSE_REGS with ALL_SSE_REGS. + * config/i386/cpuid.h (bit_AVX512F, bit_AVX512PF, bit_AVX512ER) + (bit_AVX512CD): New. + * config/i386/driver-i386.c (host_detect_local_cpu): Detect + AVX512F, AVX512ER, AVX512PF, AVX512CD features. + * config/i386/i386-c.c (ix86_target_macros_internal): + Conditionally define __AVX512F__, __AVX512ER__, __AVX512CD__, + __AVX512PF__. + * config/i386/i386-modes.def (VECTOR_MODES (INT, 128)) + (VECTOR_MODES (FLOAT, 128), INT_MODE (XI, 64)): New modes. + * config/i386/i386.c (regclass_map, dbx_register_map) + (dbx64_register_map, svr4_dbx_register_map): Add new SSE registers. + (gate_insert_vzeroupper): Disable vzeroupper for TARGET_AVX512F. + (ix86_target_string): Define -mavx512f, -mavx512er, -mavx512cd, + -mavx512pf options. + (ix86_option_override_internal): Define PTA_AVX512F, PTA_AVX512ER, + PTA_AVX512PF, PTA_AVX512CD. Handle -mavx512f, -mavx512er, -mavx512cd, + -mavx512pf options. Fix formatting. + (ix86_conditional_register_usage): Squash EXT_REX_SSE_REGs for 32-bit + targets. Squash EVEX_SSE_REGS if AVX512F is disabled. + (ix86_valid_target_attribute_inner_p): Handle -mavx512f, -mavx512er, + -mavx512cd, -mavx512pf options. + (standard_sse_constant_opcode): Add vpternlogd for 512-bit modes. + (print_reg, ix86_print_operand): Handle 'g' to output 512-bit operands. + (ix86_preferred_output_reload_class): Replace SSE_REGS with + ALL_SSE_REGS. + (ix86_hard_regno_mode_ok): Support 512-bit registers. + (ix86_set_reg_reg_cost): Ditto. + (x86_order_regs_for_local_alloc): Ditto. + (MAX_VECT_LEN): Extend to 64-byte. + (ix86_spill_class): Replace SSE_REGS with ALL_SSE_REGS. + * config/i386/i386.h (TARGET_AVX512F, TARGET_AVX512PF) + (TARGET_AVX512ER, TARGET_AVX512CD): New. + (BIGGEST_ALIGNMENT): Extend to 512-bits. + (FIRST_PSEUDO_REGISTER, FIXED_REGISTERS): Add new registers. + (CALL_USED_REGISTERS, REG_ALLOC_ORDER): Likewise. + (VALID_AVX512F_SCALAR_MODE, VALID_AVX512F_REG_MODE): New. + (SSE_REG_MODE_P): Support new modes. + (FIRST_MMX_REG, FIRST_REX_INT_REG, FIRST_REX_SSE_REG): Add comments. + (FIRST_EXT_REX_SSE_REG, LAST_EXT_REX_SSE_REG): New. + (reg_class, REG_CLASS_NAMES): Add EVEX_SSE_REGS, ALL_SSE_REGS. + (SSE_CLASS_P, MAYBE_SSE_CLASS_P): Replace SSE_REGS with ALL_SSE_REGS. + (REG_CLASS_CONTENTS): Add new registers. + (SSE_REGNO_P, SSE_REGNO, HARD_REGNO_RENAME_OK): Support new registers. + (EXT_REX_SSE_REGNO_P): New. + (HI_REGISTER_NAMES): Add new registers. + * config/i386/i386.md: Define constants for new registers. + (mode): Add new 512-bit modes. + (prefix): Support evex prefix. + (isa): Support avx512f, noavx512f, fma_avx512f. + (ssemodesuffix): Add new 512-bit modes. + (movxi): New. + (*movxi_internal_avx512f): Ditto. + (*movdi_internal): Replace constraint "x" with the new constraint "v". + Support MODE_XI. + (*movsi_internal): Likewise. + (*movdf_internal): Likewise. + (*movsf_internal): Likewise. + (*fop__comm_sse): Replace constraint "x" with new constraint "v". + (3): Likewise. + * config/i386/i386.opt (mavx512f, mavx512pf, mavx512er, mavx512cd): + New. + * config/i386/mmx.md (*mov_internal): Replace constraint "x" + with the new constraint "v". + * config/i386/sse.md (*mov_internal): Support new registers and + modes. + (_loadu): Replace constraint "x" + with the new constraint "v". + (_loaddqu): Likewise. + (_storedqu): Likewise. + (*3): Likewise. + (_vm3): Likewise. + (*mul3): Likewise. + (_vmmul3): Likewise. + (_div3): Likewise. + (_vmdiv3): Likewise. + (_sqrt2): Likewise. + (_vmsqrt2): Likewise. + (*3_finite): Likewise. + (*3) : Likewise. + (_vm3): Likewise. + (*3) : Likewise. + (*fma_fmadd_): Likewise. + (*fma_fmsub_): Likewise. + (*fma_fnmadd_): Likewise. + (*fma_fnmsub_): Likewise. + (*fma_fmaddsub_): Likewise. + (*fma_fmsubadd_): Likewise. + (*fmai_fmadd_): Likewise. + (*fmai_fmsub_): Likewise. + (*fmai_fnmadd_): Likewise. + (*fmai_fnmsub_): Likewise. + (sse_cvtsi2ss): Likewise. + (sse_cvtsi2ssq): Likewise. + (sse_cvtss2si): Likewise. + (sse_cvtss2si_2): Likewise. + (sse_cvtss2siq): Likewise. + (sse_cvtss2siq_2): Likewise. + (sse_cvttss2si): Likewise. + (sse_cvtss2siq_2): Likewise. + (float2): Likewise. + (sse2_cvtsd2si_2): Likewise. + (sse2_cvtsd2siq_2): Likewise. + (*3): Likewise. + (*_3): Likewise. + (*_mul3): Likewise. + (ashr3): Likewise. + (3): Likewise. + (avx2_3): Likewise. + (*avx2_3): Likewise. + (*andnot3): Likewise. + (*3) : Likewise. + (abs2): Likewise. + (avx2_permvar): Likewise. + (avx2_perm_1): Likewise. + (*avx_vpermilp): Likewise. + (avx_vpermilvar3): Likewise. + (avx2_ashrv): Likewise. + (avx2_v): Likewise. + * doc/invoke.texi: Document -mavx512f, -mavx512pf, -mavx512er, + -mavx512cd. + * doc/rtl.texi: Document XImode. + +2013-08-21 Jeff Law + + * tree-flow.h (register_jump_thread): Pass vector of edges + instead of each important edge. + * tree-ssa-threadedge.c (thread_across_edge): Build the jump + thread path into a vector and pass that to register_jump_thread. + * tree-ssa-threadupdate.c (register_jump_thread): Conver the + passed in edge vector to the current 3-edge form. + + Revert: + 2013-08-20 Alexey Makhalov + + * dce.c (fini_dce): Call df_analyze again just in case + delete_unmarked_insns removed anything. + +2013-08-21 Joern Rennecke + + * reload.h (struct reg_equivs): Rename to .. + (struct reg_equivs_s): .. this. + +2013-08-20 Martin Liska + + * ipa.c (ipa_profile_read_summary): Fix buffer overflow. + +2013-08-21 Rainer Orth + + * config/sol2-10.h (TARGET_LIBC_HAS_FUNCTION): Don't nest comment. + +2013-08-21 Jeff Law + + * tree-vrp.c (simplify_stmt_for_jump_threading): Try to + simplify assignments too. If the RHS collapses to a singleton + range, then return the value for the range. + +2013-08-21 Kirill Yukhin + + * config/i386/sse.md (V16): Rename to... + (VMOVE): this. + (mov): Update iterator name. + (*mov_internal): Ditto. + (push1): Ditto. + (movmisalign): Ditto. + +2013-08-20 Jan Hubicka + + PR bootstrap/58186 + * cgraph.c (cgraph_add_edge_to_call_site_hash): Overwrite hash + entry for direct edges. + (cgraph_turn_edge_to_speculative): Fix setting of can_throw_external. + +2013-08-20 David Malcolm + + Revert my last two changes, r201865 and r201864: + + Revert r201865: + 2013-08-20 David Malcolm + + Make opt_pass and gcc::pass_manager be GC-managed, so that pass + instances can own GC refs. + + * Makefile.in (GTFILES): Add pass_manager.h and tree-pass.h. + * context.c (gcc::context::gt_ggc_mx): Traverse passes_. + (gcc::context::gt_pch_nx): Likewise. + (gcc::context::gt_pch_nx): Likewise. + * ggc.h (gt_ggc_mx ): New. + (gt_pch_nx_with_op ): New. + (gt_pch_nx ): New. + * passes.c (opt_pass::gt_ggc_mx): New. + (opt_pass::gt_pch_nx): New. + (opt_pass::gt_pch_nx_with_op): New. + (pass_manager::gt_ggc_mx): New. + (pass_manager::gt_pch_nx): New. + (pass_manager::gt_pch_nx_with_op): New. + (pass_manager::operator new): Use + ggc_internal_cleared_alloc_stat rather than xcalloc. + * pass_manager.h (class pass_manager): Add GTY((user)) marking. + (pass_manager::gt_ggc_mx): New. + (pass_manager::gt_pch_nx): New. + (pass_manager::gt_pch_nx_with_op): New. + * tree-pass.h (class opt_pass): Add GTY((user)) marking. + (opt_pass::operator new): New. + (opt_pass::gt_ggc_mx): New. + (opt_pass::gt_pch_nx): New. + (opt_pass::gt_pch_nx_with_op): New. + + Revert r201864: + 2013-08-20 David Malcolm + + * Makefile.in (GTFILES): Add context.h. + * context.c (gcc::context::operator new): New. + (gcc::context::gt_ggc_mx): New. + (gcc::context::gt_pch_nx): New. + (gcc::context::gt_pch_nx): New. + * context.h (gcc::context): Add GTY((user)) marking. + (gcc::context::operator new): New. + (gcc::context::gt_ggc_mx): New. + (gcc::context::gt_pch_nx): New. + (gcc::context::gt_pch_nx): New. + (g): Add GTY marking. + (gt_ggc_mx (gcc::context *)): New. + (gt_pch_nx (gcc::context *)): New. + (gt_pch_nx (gcc::context *ctxt, gt_pointer_operator op, + void *cookie)): New. + * gengtype.c (open_base_files) : Add context.h. + +2013-08-20 Alexey Makhalov + + * dce.c (fini_dce): Call df_analyze again just in case + delete_unmarked_insns removed anything. + +2013-08-20 Teresa Johnson + + PR rtl-optimizations/57451 + * final.c (reemit_insn_block_notes): Prevent lexical blocks + from crossing split section boundaries. + +2013-08-20 Matthew Gretton-Dann + + * config/arm/linux-elf.h (MULTILIB_DEFAULTS): Remove definition. + * config/arm/t-linux-eabi (MULTILIB_OPTIONS): Document association + with MULTLIB_DEFAULTS. + +2013-08-20 Nick Clifton + + * target.def (narrow_volatile_bitfield): Note that the default + value is false, not !TARGET_STRICT_ALIGN. + * doc/tm.texi: Regenerate. + +2013-08-20 Pavel Chupin + + Fix LIB_SPEC for systems without libpthread. + + * config/gnu-user.h: Introduce GNU_USER_TARGET_NO_PTHREADS_LIB_SPEC. + * config/arm/linux-eabi.h: Use GNU_USER_TARGET_NO_PTHREADS_LIB_SPEC + for Android. + * config/i386/linux-common.h: Likewise. + * config/mips/linux-common.h: Likewise. + +2013-08-20 Zhouyi Zhou + + * tree-ssa-ccp.c (get_default_value): Remove redundant condition + checks. + +2013-08-20 David Malcolm + + Make opt_pass and gcc::pass_manager be GC-managed, so that pass + instances can own GC refs. + + * Makefile.in (GTFILES): Add pass_manager.h and tree-pass.h. + * context.c (gcc::context::gt_ggc_mx): Traverse passes_. + (gcc::context::gt_pch_nx): Likewise. + (gcc::context::gt_pch_nx): Likewise. + * ggc.h (gt_ggc_mx ): New. + (gt_pch_nx_with_op ): New. + (gt_pch_nx ): New. + * passes.c (opt_pass::gt_ggc_mx): New. + (opt_pass::gt_pch_nx): New. + (opt_pass::gt_pch_nx_with_op): New. + (pass_manager::gt_ggc_mx): New. + (pass_manager::gt_pch_nx): New. + (pass_manager::gt_pch_nx_with_op): New. + (pass_manager::operator new): Use + ggc_internal_cleared_alloc_stat rather than xcalloc. + * pass_manager.h (class pass_manager): Add GTY((user)) marking. + (pass_manager::gt_ggc_mx): New. + (pass_manager::gt_pch_nx): New. + (pass_manager::gt_pch_nx_with_op): New. + * tree-pass.h (class opt_pass): Add GTY((user)) marking. + (opt_pass::operator new): New. + (opt_pass::gt_ggc_mx): New. + (opt_pass::gt_pch_nx): New. + (opt_pass::gt_pch_nx_with_op): New. + +2013-08-20 David Malcolm + + * Makefile.in (GTFILES): Add context.h. + * context.c (gcc::context::operator new): New. + (gcc::context::gt_ggc_mx): New. + (gcc::context::gt_pch_nx): New. + (gcc::context::gt_pch_nx): New. + * context.h (gcc::context): Add GTY((user)) marking. + (gcc::context::operator new): New. + (gcc::context::gt_ggc_mx): New. + (gcc::context::gt_pch_nx): New. + (gcc::context::gt_pch_nx): New. + (g): Add GTY marking. + (gt_ggc_mx (gcc::context *)): New. + (gt_pch_nx (gcc::context *)): New. + (gt_pch_nx (gcc::context *ctxt, gt_pointer_operator op, + void *cookie)): New. + * gengtype.c (open_base_files) : Add context.h. + +2013-08-20 Alan Modra + + PR target/57865 + * config/rs6000/rs6000.c (rs6000_emit_prologue): Correct ool_adjust. + (rs6000_emit_epilogue): Likewise. + +2013-08-19 Dehao Chen + + * value-prof.c (gimple_ic): Fix the bug of adding EH edge. + +2013-08-19 Peter Bergner + Jakub Jelinek + + * builtins.def (BUILT_IN_FABSD32): New DFP ABS builtin. + (BUILT_IN_FABSD64): Likewise. + (BUILT_IN_FABSD128): Likewise. + * builtins.c (expand_builtin): Add support for new DFP ABS builtins. + (fold_builtin_1): Likewise. + * config/rs6000/dfp.md (*negtd2_fpr): Handle non-overlapping + destination and source operands. + (*abstd2_fpr): Likewise. + (*nabstd2_fpr): Likewise. + +2013-08-19 Richard Sandiford + + * config/mips/mips.c (mips_adjust_insn_length): Add checks for + JUMP_P and INSN_P. + +2013-08-19 Aldy Hernandez + + * doc/invoke.texi (-fcilkplus): Clarify that implementation is + incomplete. + +2013-08-19 Alexander Ivchenko + + * target.def (TARGET_LIBC_HAS_FUNCTION): New target hook. + * builtins.c (default_libc_has_function): New. + (gnu_libc_has_function): Ditto. + (no_c99_libc_has_function): Ditto. + (expand_builtin_cexpi): Using new target hook TARGET_LIBC_HAS_FUNCTION + instead of TARGET_HAS_SINCOS and TARGET_C99_FUNCTIONS. + (fold_builtin_sincos): Likewise. + (fold_builtin_cexp): Likewise. + * builtins.def (DEF_C94_BUILTIN): Likewise. + (DEF_C99_BUILTIN): Likewise. + (DEF_C99_C90RES_BUILTIN): Likewise. + (DEF_C99_COMPL_BUILTIN): New define. Change all complex c99 builtin + definitions to using this define. + * config/darwin-protos.h (darwin_libc_has_function): New. + * config/darwin.c (darwin_libc_has_function): Ditto. + * config/alpha/linux.h: Remove TARGET_C99_FUNCTIONS and + TARGET_HAS_SINCOS. Redefine TARGET_LIBC_HAS_FUNCTION. + * config/darwin.h: Ditto. + * config/elfos.h: Ditto. + * config/freebsd.h: Ditto. + * config/i386/cygming.h: Ditto. + * config/i386/djgpp.h: Ditto. + * config/i386/i386-interix.h: Ditto. + * config/microblaze/microblaze.h: Ditto. + * config/mmix/mmix.h: Ditto. + * config/gnu-user.h: Ditto. + * config/ia64/hpux.h: Ditto. + * config/pa/pa-hpux.h: Ditto. + * config/pdp11/pdp11.h: Ditto. + * config/picochip/picochip.h: Ditto. + * config/linux.h: Ditto. + * config/netbsd.h: Ditto. + * config/openbsd.h: Ditto. + * config/rs6000/aix43.h: Ditto. + * config/rs6000/aix51.h: Ditto. + * config/rs6000/aix52.h: Ditto. + * config/rs6000/aix53.h: Ditto. + * config/rs6000/aix61.h: Ditto. + * config/rs6000/darwin.h: Ditto. + * config/rs6000/linux.h: Ditto. + * config/rs6000/linux64.h: Ditto. + * config/s390/tpf.h: Ditto. + * config/sol2-10.h: Ditto. + * config/sol2.h: Ditto. + * config/vms/vms.h: Ditto. + * config/vxworks.h: Ditto. + * config/linux-android.c (linux_android_libc_has_function): + New linux-specific implementation of TARGET_LIBC_HAS_FUNCTION. + * config/linux-protos.h (linux_android_libc_has_function): + New declaration. + * config/i386/i386.c (ix86_libc_has_function): New. + * config/i386/i386-protos.h + (ix86_libc_has_function): New declaration. + * config/i386/i386.md + ("isinfxf2"): Change condition for TARGET_LIBC_HAS_FUNCTION. + ("isinf2): Likewise. + * convert.c (convert_to_integer): Using new target hook + TARGET_LIBC_HAS_FUNCTION istead of TARGET_HAS_SINCOS and + TARGET_C99_FUNCTIONS. + * fortran/f95-lang.c (gfc_init_builtin_functions): Ditto. + * tree-ssa-math-opts.c (execute_cse_sincos): Ditto. + * coretypes.h (function_class): New enum for different + classes of functions. + * defaults.h: Remove TARGET_C99_FUNCTIONS and TARGET_HAS_SINCOS. + * doc/tm.texi.in (TARGET_C99_FUNCTIONS): Remove documentation. + (TARGET_HAS_SINCOS): Likewise. + (TARGET_LIBC_HAS_FUNCTION): New. + * doc/tm.texi: Regenerated. + * targhooks.h (default_libc_has_function): New declaration. + (no_c99_libc_has_function): Ditto. + (gnu_libc_has_function): Ditto. + * system.h: Add the poisoning of TARGET_C99_FUNCTIONS + and TARGET_HAS_SINCOS. + +2013-08-18 Jan Hubicka + + * Makeifle-in (ipa-devirt.o): New. + (GTFILES): Add ipa-utils.h and ipa-devirt.c + * cgraphunit.c (decide_is_symbol_needed): Do not care about virtuals. + (analyze_functions): Look into possible targets of polymorphic call. + * dumpfile.c (dump_files): Add type-inheritance dump. + * dumpfile.h (TDI_inheritance): New. + * ipa-devirt.c: New file. + * ipa-utils.h (odr_type_d): Forward declare. + (odr_type): New type. + (build_type_inheritance_graph): Declare. + (possible_polymorphic_call_targets): Declare and introduce inline + variant when only edge is pased. + (dump_possible_polymorphic_call_targets): Likewise. + * timevar.def (TV_IPA_INHERITANCE, TV_IPA_VIRTUAL_CALL): New. + * tree.c (type_in_anonymous_namespace_p): Break out from ... + (types_same_for_odr): ... here. + * tree.h (type_in_anonymous_namespace_p): Declare. + +2013-08-18 Jakub Jelinek + + PR tree-optimization/58006 + * tree-parloops.c (take_address_of): Don't ICE if get_name + returns NULL. + (eliminate_local_variables_stmt): Remove clobber stmts. + +2013-08-18 Eric Botcazou + + * cgraphunit.c (handle_alias_pairs): Reset the alias flag after the + error message is issued for an alias to undefined symbol. + +2013-08-18 Jan Hubicka + + * cgraph.c (cgraph_create_indirect_edge): Discover + polymorphic calls and record basic info into indirect_info. + * gimple-fold.c (gimple_fold_call): When doing BINFO based + devirtualization, ignore objc function calls. + * ipa-cp.c (initialize_node_lattices): Be ready for polymorphic + call with no parm index info. + * ipa-prop.c (ipa_analyze_call_uses): Likewise. + * tree.c (virtual_method_call_p): New function. + * tree.h (virtual_method_call_p): Declare. + +2013-08-16 Jan Hubicka + + PR middle-end/58179 + * tree.c (obj_type_ref_class): Do not ICE on non-method calls. + +2013-08-16 David Edelsohn + + * config/rs6000/rs6000.md (rs6000_get_timebase_ppc32): Add length + attribute. + +2013-08-16 David Malcolm + + * gengtype.c (type_for_name): Add special-case support for + locating types within the "gcc::" namespace. + (open_base_files): Emit a "using namespace gcc" directive. + +2013-08-16 Michael Meissner + + PR target/58160 + * config/rs6000/predicates.md (fusion_gpr_mem_load): Allow the + memory rtx to contain ZERO_EXTEND and SIGN_EXTEND. + + * config/rs6000/rs6000-protos.h (fusion_gpr_load_p): Pass operands + array instead of each individual operand as a separate argument. + (emit_fusion_gpr_load): Likewise. + (expand_fusion_gpr_load): Add new function declaration. + + * config/rs6000/rs6000.c (fusion_gpr_load_p): Change the calling + signature to have the operands passed as an array, instead of as + separate arguments. Allow ZERO_EXTEND to be in the memory + address, and also SIGN_EXTEND if -mpower8-fusion-sign. Do not + depend on the register live/dead flags when peepholes are run. + (expand_fusion_gpr_load): New function to be called from the + peephole2 pass, to change the register that addis sets to be the + target register. + (emit_fusion_gpr_load): Change the calling signature to have the + operands passed as an array, instead of as separate arguments. + Allow ZERO_EXTEND to be in the memory address, and also + SIGN_EXTEND if -mpower8-fusion-sign. + + * config/rs6000/rs6000.md (UNSPEC_FUSION_GPR): Delete unused + unspec enumeration. + (power8 fusion peephole/peephole2): Rework the fusion peepholes to + adjust the register addis loads up in the peephole2 pass. Do not + depend on the register live/dead state when the peephole pass is done. + +2013-08-16 David Malcolm + + * gengtype.c (create_user_defined_type): Ensure that the kind + is set to TYPE_USER_STRUCT, fixing a bug seen when an incomplete + declaration is seen before the GTY((user)) marking. + +2013-08-16 Bernd Edlinger + + PR target/58105 + * config/i386/i386.c (make_resolver_func): Set DECL_UNINLINABLE. + +2013-08-16 Jan Hubicka + + * gimple-fold.c (gimple_extract_devirt_binfo_from_cst): Add new + arugment expected_type. + (gimple_fold_call): Use it. + * gimple.h (gimple_extract_devirt_binfo_from_cst): Update prototype. + * ipa-cp.c (ipa_get_indirect_edge_target_1): Update. + * ipa-prop.c (ipa_analyze_virtual_call_uses): Use obj_type_ref_class. + (try_make_edge_direct_virtual_call): Likewise. + * tree.c (obj_type_ref_class): New. + * tree.h (obj_type_ref_class): Use it. + +2013-08-16 Gabriel Dos Reis + + * sched-vis.c (rtl_slim_pp_initialized): Remove. + (rtl_slim_pp): Likewise. + (init_rtl_slim_pretty_print): Likewise. + (dump_value_slim): Don't call it. Use local pretty printer. + (dump_insn_slim): Likewise. + (dump_rtl_slim): Likewise. + (str_pattern_slim): Likewise. + * tree-mudflap.c (mf_varname_tree): Use local pretty printer. + Simplify. + +2013-08-16 Jakub Jelinek + + PR tree-optimization/58164 + * gimple.c (walk_stmt_load_store_addr_ops): For visit_addr + walk gimple_goto_dest of GIMPLE_GOTO. + + PR tree-optimization/58165 + * tree-call-cdce.c (shrink_wrap_one_built_in_call): If + bi_call must be the last stmt in a bb, don't split_block, instead + use fallthru edge from it and give up if there is none. + Release conds vector when returning early. + +2013-08-14 Xinliang David Li + + * config/i386/i386.c (ix86_option_override_internal): + Remove unused variable and field. + +2013-08-14 Bill Schmidt + + PR target/57949 + * doc/invoke.texi: Add documentation of mcompat-align-parm option. + * config/rs6000/rs6000.opt: Add mcompat-align-parm option. + * config/rs6000/rs6000.c (rs6000_function_arg_boundary): For AIX + and Linux, correct BLKmode alignment when 128-bit alignment is + required and compatibility flag is not set. + (rs6000_gimplify_va_arg): For AIX and Linux, honor specified alignment + for zero-size arguments when compatibility flag is not set. + +2013-08-14 Jakub Jelinek + + PR tree-optimization/58145 + * tree-sra.c (build_ref_for_offset): If prev_base has + TREE_THIS_VOLATILE or TREE_SIDE_EFFECTS, propagate it to MEM_REF. + +2013-08-14 Xinliang David Li + + * config/i386/i386.c (ix86_option_override_internal): + Fix uninitialized variable error. + +2013-08-14 Xinliang David Li + + * config/i386/i386.opt: Define two new options. + * config/i386/x86-tune.def: Add arch selector field in macros. + * config/i386/i386.h: Adjust macro definition. + * config/i386/i386.c (ix86_option_override_internal): + Refactor the code. + (parse_mtune_ctrl_str): New function. + (set_ix86_tune_features): New function. + (ix86_function_specific_restore): Call the new helper function. + +2013-08-14 Andrey Belevantsev + + PR rtl-optimization/57662 + * sel-sched.c (code_motion_process_successors): When the current insn + is removed after the recursive traversal, break from the loop. + Add comments and debug printouts. + +2013-08-14 Jakub Jelinek + Alexandre Oliva + + PR target/58067 + * config/i386/i386.c (ix86_delegitimize_address): For CM_MEDIUM_PIC + and CM_LARGE_PIC ix86_cmodel fall thru into the -m32 code, handle + there also UNSPEC_PLTOFF. + +2013-08-14 Marek Polacek + + * ipa-inline-analysis.c (add_clause): Avoid shifting integer + NUM_CONDITIONS bit positions. + +2013-08-13 Cary Coutant + + * dwarf2out.c (CHECKSUM_BLOCK): New macro. + (attr_checksum): Hash vector contents instead of pointer. + (attr_checksum_ordered): Likewise. + +2013-08-13 Uros Bizjak + + * config/i386/sse.md (*sse2_maskmovdqu): Emit addr32 prefix + when Pmode != word_mode. Add length_address attribute. + (sse3_monitor_): Merge from sse3_monitor and + sse3_monitor64_ insn patterns. Emit addr32 prefix when + Pmode != word_mode. Update insn length attribute. + * config/i386/i386.c (ix86_option_override_internal): Update + ix86_gen_monitor selection for merged sse3_monitor insn. + +2013-08-13 Julian Brown + + * config/rs6000/rs6000.c (rs6000_legitimize_reload_address): Don't + perform invalid legitimization on greater-than-word-size modes for + TARGET_E500_DOUBLE. + +2013-08-13 Vladimir Makarov + + * ira.c (setup_class_translate_array): Use aclass instead of cl + for classes not fully covered by allocno classes. + +2013-08-13 Jakub Jelinek + + PR tree-optimization/57661 + * tree-inline.h (struct copy_body_data): Add blocks_to_copy field. + * tree-inline.c (tree_function_versioning): Initialize it. + (remap_gimple_stmt): Return GIMPLE_NOP for MEM_REF lhs clobber stmts + if id->blocks_to_copy and MEM_REF's SSA_NAME is defined in a block + that is not being copied. + + PR sanitizer/56417 + * asan.c (instrument_strlen_call): Fix typo in comment. + Use char * type even for the lhs of POINTER_PLUS_EXPR. + +2013-08-13 Steve Ellcey + + * config/mips/mips.md (prefetch): Use lw instead of ld on + loongson in 32bit mode. + +2013-08-13 Nick Clifton + + * config.gcc: (avr-linux): Allow for tmake_file not being empty. + +2013-08-13 Jan Hubicka + + * cgraph.c (cgraph_turn_edge_to_speculative): Return newly + introduced edge; fix typo in sanity check. + (cgraph_resolve_speculation): Export; improve diagnostic. + (cgraph_redirect_edge_call_stmt_to_callee): Better diagnostic; cancel + speculation at type mismatch. + * cgraph.h (cgraph_turn_edge_to_speculative): Update. + (cgraph_resolve_speculation): Declare. + (symtab_can_be_discarded): New function. + * value-prof.c (gimple_ic_transform): Remove actual transform code. + * ipa-inline-transform.c (speculation_removed): New global var. + (clone_inlined_nodes): See if speculation can be removed. + (inline_call): If speculations was removed, we growths may not match. + * ipa-inline.c (can_inline_edge_p): Add DISREGARD_LIMITS parameter. + (speculation_useful_p): New function. + (resolve_noninline_speculation): New function. + (inline_small_functions): Resolve useless speculations. + * ipa-inline.h (speculation_useful_p): Declare + * ipa.c (can_replace_by_local_alias): Simplify. + (ipa_profile): Produce speculative calls in non-lto, too; + add simple cost model; produce local aliases. + +2013-08-13 David Malcolm + + * config/i386/t-i386 (i386.o): Rename stray PIPELINE_H to + PASS_MANAGER_H. + +2013-08-12 Paolo Carlini + + * config/i386/i386.c (ix86_function_versions): Use error + inform. + +2013-08-12 Uros Bizjak + + * config/i386/i386.md (floatunssi2 expand): Use MODEF mode + iterator instead of X87MODEF. + +2013-08-12 Perez Read + + PR target/58132 + * config/i386/i386.md (*movabs_1): Add PTR before + operand 0 for intel asm alternative. + (*movabs_2): Ditto for operand 1. + +2013-08-12 James Greenhalgh + + * config/aarch64/arm_none.h + (vdup_lane_<8,16,32,64>): Fix macro call. + +2013-08-12 Nick Clifton + + * config.gcc (m32r-linux): Allow for tmake_file not being empty. + +2013-08-12 Yuri Rumyantsev + + * config/i386/i386.md (floatunssi2 expand): Add new + expand for QI/HImode operand to produce more effictive code for + unsigned char(short) --> float(double) conversion. + +2013-08-12 Alexander Monakov + + * doc/invoke.texi: Mention that -ftls-model does not force the final + model. + +2013-08-12 Marek Polacek + Marc Glisse + + PR tree-optimization/57980 + * tree-tailcall.c (process_assignment): Call build_minus_one_cst + when creating -1 constant. + +2013-08-10 Jan Hubicka + + Workaround binutils PR14342. + * tree-profile.c (init_ic_make_global_vars): Add LTO path. + (gimple_init_edge_profiler): Likewise. + (gimple_gen_ic_func_profiler): Likewise. + +2013-08-09 Jan Hubicka + + * cgraph.c (cgraph_create_edge_1): Clear speculative flag. + +2013-08-09 Xinliang David Li + + * config/i386/stringop.def: New file. + * config/i386/stringop.opt: New file. + * config/i386/i386-opts.h: Include stringopt.def. + * config/i386/i386.opt: Include stringopt.opt. + * config/i386/i386.c (ix86_option_override_internal): + Override default size based stringop inline strategies with options. + * config/i386/i386.c (ix86_parse_stringop_strategy_string): + New function. + +2013-08-09 Jan Hubicka + + * ipa-ref.c (ipa_clear_stmts_in_references): Clear lto_stmt_uid, too. + +2013-08-09 Jan Hubicka + + * cgraph.c (cgraph_resolve_speculation): Cut frequency to + CGRAPH_FREQ_MAX. + (dump_cgraph_node): Dump profile-id. + * cgraph.h (cgraph_indirect_call_info): Add common_target_id + and common_target_probability. + * lto-cgraph.c (lto_output_edge): Stream common targets. + (lto_output_node): Stream profile ids. + (input_node): Stream profile ids. + (input_edge): Stream common targets. + * lto-streamer-in.c (fixup_call_stmt_edges_1): Fix formatting. + * ipa.c: Include value-prof.h + (ipa_profile_generate_summary): Turn indirect call statement histograms + into common targets. + (ipa_profile): Turn common targets into speculative edges. + +2013-08-09 Jan Hubicka + + * cgraph.h (cgraph_node): Add profile_id. + * value-prof.c (cgraph_node_map): Turn into pointer_map. + (init_node_map): Rewrite to handle hashes increas of incremental IDs. + (del_node_map): Update. + (find_func_by_funcdef_no): Replace by ... + (find_func_by_profile_id): ... this one. + (gimple_ic_transform): Do not remove useful histograms when + speculation is not done; dump info when indirect call removal + can happen at LTO. + * value-prof.h (find_func_by_profile_id, gimple_ic): Declare. + * gcov-io.h (__gcov_indirect_call_profiler): Replace by ... + (__gcov_indirect_call_profiler_v2): .. this one. + * profile.h (init_node_map): Update. + * coverage.c (coverage_compute_profile_id): New function. + * coverage.h (coverage_compute_profile_id): Declare. + * tree-profile.c (init_ic_make_global_vars): Make + __gcov_indirect_call_callee and __gcov_indirect_call_counters global. + (gimple_init_edge_profiler): Update prototype of + __gcov_indirect_call_profiler. + (gimple_gen_ic_func_profiler): Simplify. + (tree_profiling): Use init_node_map + +2013-08-09 Jan Hubicka + + * cgraphbuild.c (cgraph_rebuild_references): Rebuild only + non-speculative refs. + * cgraph.c (cgraph_update_edge_in_call_site_hash): New function. + (cgraph_add_edge_to_call_site_hash): Deal with speculative calls. + (cgraph_set_call_stmt): Likewise. + (cgraph_create_edge_1): Fix release checking compilatoin; + clear lto_stmt_uid. + (cgraph_free_edge): Free indirect info. + (cgraph_turn_edge_to_speculative): New function. + (cgraph_speculative_call_info): New function. + (cgraph_make_edge_direct): Return direct edge; handle speculation. + (cgraph_redirect_edge_call_stmt_to_callee): Expand speculative edges. + (dump_cgraph_node): Dump speculation. + (verify_edge_count_and_frequency): Accept speculative edges. + (verify_edge_corresponds_to_fndecl): Handle partitioned cgraph. + (verify_cgraph_node): Handle speculation. + * cgraph.h (cgraph_edge): Add SPECULATIVE flag. + (cgraph_set_call_stmt): Update prototype. + (cgraph_make_edge_direct): Update prototype. + (cgraph_speculative_call_info): Declare. + * ipa-cp.c (ipcp_discover_new_direct_edges): Be ready for edge + to change; update call of ipa_find_references. + * ipa-ref.c (ipa_record_reference): Fix return value; clear + lto_stmt_uid and speculative flags. + (ipa_dump_references): Dump speculation. + (ipa_clone_references): Clone speculative flag. + (ipa_clone_referring): Likewise. + (ipa_clone_ref): New function. + (ipa_find_reference): Look into lto_stmt_uids + (ipa_clear_stmts_in_references): Do not clear speculative calls. + * ipa-ref.h (ipa_ref): Add lto_stmt_uid and speculative flags. + (ipa_find_reference): Update declaration. + (ipa_clone_ref): Declare. + * lto-cgraph.c (lto_output_edge): Make lto_stmt_uids start from 0; + stream speculative flag. + (lto_output_ref): Stream statements uids and speculation. + (input_ref): Likewise. + (input_edge): Stream speuclation. + * cgraphclones.c (cgraph_clone_edge): Clone speculation. + (cgraph_set_call_stmt_including_clones): Handle speculation. + * ipa-inline.c (heap_edge_removal_hook): New function. + (inline_small_functions): Register it. + * lto-streamer-in.c (fixup_call_stmt_edges_1): Bounds checking; + also initialize refs. + * ipa-prop.c (ipa_make_edge_direct_to_target): Be ready for + edge to change. + (try_make_edge_direct_simple_call): Likewise. + (try_make_edge_direct_simple_call): Likewise. + (update_indirect_edges_after_inlining): Likewise. + (remove_described_reference): Look proper lto_stmt_uid. + (propagate_controlled_uses): Likewise. + (propagate_controlled_uses): Liekwise. + * tree-inline.c (copy_bb): Copy speculative edges. + (redirect_all_calls): New function. + (copy_cfg_body): Do redirection after loop info is updated. + (delete_unreachable_blocks_update_callgraph): Updadte speculation. + +2013-08-09 Jan Hubicka + + * lto-streamer-out.c (output_function): Renumber PHIs. + * lto-streamer-in.c (input_function): Likewise. + +2013-08-09 James Greenhalgh + + * config/aarch64/aarch64-simd-builtins.def (get_lane_signed): Remove. + (get_lane_unsigned): Likewise. + (dup_lane_scalar): Likewise. + (get_lane): enable for VALL. + * config/aarch64/aarch64-simd.md + (aarch64_dup_lane_scalar): Remove. + (aarch64_get_lane_signed): Likewise. + (aarch64_get_lane_unsigned): Likewise. + (aarch64_get_lane_extend): New. + (aarch64_get_lane_zero_extendsi): Likewise. + (aarch64_get_lane): Enable for all vector modes. + (aarch64_get_lanedi): Remove misleading constraints. + * config/aarch64/arm_neon.h + (__aarch64_vget_lane_any): Define. + (__aarch64_vget_lane_<8,16,32,64>): Likewise. + (vget_lane_<8,16,32,64>): Use __aarch64_vget_lane macros. + (vdup_lane_<8,16,32,64>): Likewise. + * config/aarch64/iterators.md (VDQQH): New. + (VDQQHS): Likewise. + (vwcore): Likewise. + +2013-08-09 Eric Botcazou + + * configure.ac: Add GAS check for LEON instructions on SPARC. + * configure: Regenerate. + * config.in: Likewise. + * config.gcc (with_cpu): Remove sparc-leon*-* and deal with LEON in the + sparc*-*-* block. + * config/sparc/sparc.opt (LEON, LEON3): New masks. + * config/sparc/sparc.h (ASM_CPU32_DEFAULT_SPEC): Set to AS_LEON_FLAG + for LEON or LEON3. + (ASM_CPU_SPEC): Pass AS_LEON_FLAG if -mcpu=leon or -mcpu=leon3. + (AS_LEON_FLAG): New macro. + * config/sparc/sparc.c (sparc_option_override): Set MASK_LEON for leon + and MASK_LEON3 for leon3 and unset them if HAVE_AS_LEON is not defined. + Deal with LEON and LEON3 for the memory model. + * config/sparc/sync.md (atomic_compare_and_swap): Enable if LEON3 + (atomic_compare_and_swap_1): Likewise. + (*atomic_compare_and_swap_1): Likewise. + +2013-08-09 Zhenqiang Chen + + * config/arm/neon.md (vcond): Fix floating-point vector + comparisons against 0. + +2013-08-08 Vladimir Makarov + + * lra-constraints.c (emit_spill_move): Remove assert. + (process_alt_operands): Add more debugging + output. Increase reject for spilling into memory. Decrease + reject for reloading scratch. + (split_reg): Use HARD_REGNO_CALLER_SAVE_MODE. + +2013-08-08 Steve Ellcey + + * config/mips/mti-linux.h (SYSROOT_SUFFIX_SPEC): Add nan2008. + * config/mips/t-mti-elf (MULTILIB_OPTIONS): Make mips16 and + micromips incompatible. Add nan2008. + (MULTILIB_DIRNAMES): Add nan2008. + (MULTILIB_EXCEPTIONS): Remove mips16/micromips entry. + * config/mips/t-mti-linux (MULTILIB_OPTIONS): Make mips16 + and micromips incompatible. Add nan2008. + (MULTILIB_DIRNAMES): Add nan2008. + (MULTILIB_EXCEPTIONS): Remove mips16/micromips entry. + +2013-08-08 Richard Sandiford + + PR rtl-optimization/58079 + * combine.c (combine_simplify_rtx): Avoid using SUBST if + simplify_comparison has widened a comparison with an integer. + +2013-08-08 Kyrylo Tkachov + + * config/arm/neon.md (movmisalign): Disable when we + don't allow unaligned accesses. + (*movmisalign_neon_store): Likewise. + (*movmisalign_neon_load): Likewise. + (*movmisalign_neon_store): Likewise. + (*movmisalign_neon_load): Likewise. + +2013-08-08 Jan Hubicka + + * cgraphbuild.c (build_cgraph_edges): Do not walk into debugs. + (make_pass_rebuild_cgraph_edges): Also clear references. + * cgraph.c (verify_cgraph_node): Add basic ipa-ref verifier. + * ipa-inline-transform.c (inline_transform): Remove all references + after inlining. + * cgraphunit.c (expand_function): Remove all references after + expansion. + * ipa-ref.c (ipa_ref_has_aliases_p): Fix formatting. + (ipa_find_reference): Rewrite to iterator. + (remove_stmt_references): Likewise. + (ipa_clear_stmts_in_references): New function. + * ipa-ref.h (ipa_clear_stmts_in_references): Declare. + * cgraphclones.c (cgraph_materialize_all_clones): Remove or + clear references. + * ipa-split.c (split_function): Remove references in split function. + +2013-08-08 Richard Earnshaw + + PR target/57431 + * config/arm/arm/neon.md (neon_vld1_dupdi): New expand pattern. + (neon_vld1_dup VD iterator): Iterate over VD not VDX. + +2013-08-08 Richard Earnshaw + + PR target/56979 + * config/arm/arm.c (aapcs_vfp_allocate): Decompose the argument if the + suggested mode for the assignment isn't compatible with the + registers required. + +2013-08-08 Bernd Edlinger + + PR target/58065 + * config/arm/arm.h (MALLOC_ABI_ALIGNMENT): Define. + +2013-08-07 Xinliang David Li + + * config/i386/i386.opt: New option -mtune-ctrl=. + * config/i386/x86-tune.def: New file. + * config/i386/i386.h: include x86-tune.def. + * config/i386/i386.c (ix86_option_override_internal): + Parsing -mtune-ctrl= option and set tune features. + +2013-08-07 Oleg Endo + + PR other/12081 + * config/rs6000/rs6000.c (gen_2arg_fn_t): Remove typedef. + (rs6000_emit_swdiv, rs6000_emit_swrsqrt): Don't cast result of GEN_FCN + to gen_2arg_fn_t. + +2013-08-07 Eric Botcazou + + * rtl.h (update_alignments): Declare. + * final.c (grow_label_align): New function extracted from... + (shorten_branches): ...here. Call it. + (update_alignments): New function. + * reorg.c (sibling_labels): New variable. + (get_label_before): Add SIBLING parameter. If it is non-zero, push + the new label along with it onto the sibling_labels vector. + (fill_simple_delay_slots): Adjust call to get_label_before. + (fill_slots_from_thread): Likewise. + (relax_delay_slots): Likewise. + (make_return_insns): Likewise. + (dbr_schedule): Invoke update_alignment on the sibling_labels vector. + +2013-08-07 Eric Botcazou + + * diagnostic.c (diagnostic_classify_diagnostic): Accept zero index and + document its semantics. + (diagnostic_report_diagnostic): Adjust accordingly. + +2013-08-07 David Malcolm + + * config/sparc/sparc.c (insert_pass_work_around_errata): Move into... + (sparc_option_override): ...and port to new C++ pass API. + * config/sparc/t-sparc (sparc.o): Add dep on CONTEXT_H + +2013-08-07 Peter Bergner + + * config/rs6000/rs6000.c (htm_expand_builtin) : Remove. + +2013-08-06 Caroline Tice + + * gcc.c (VTABLE_VERIFICATION_SPEC): New definition. + (LINK_COMMAND_SPEC): Add VTABLE_VERIFICATION_SPEC. + * tree-pass.h: Add pass_vtable_verify. + * varasm.c (assemble_variable): Add code to properly set the comdat + section and name for the .vtable_map_vars section. + (assemble_vtyv_preinit_initializer): New function. + (default_sectin_type_flags): Make sure .vtable_map_vars section has + LINK_ONCE flag. + * output.h: Add function decl for assemble_vtv_preinit_initializer. + * vtable-verify.c: New file. + * vtable-verify.h: New file. + * flag-types.h (enum vtv_priority): Defintions for flag_vtable_verify + initialiation levels. + * timevar.def (TV_VTABLE_VERIFICATION): New definition. + * passes.def: Insert pass_vtable_verify. + * aclocal.m4: Reorder includes. + * doc/invoke.texi: Document the -fvtable-verify=, -fvtv-debug, and + -fvtv-counts options. + * config/gnu-user.h (GNU_USER_TARGET_STARTFILE_SPEC): Add vtv_start*.o, + as appropriate, if -fvtable-verify=... is used. + (GNU_USER_TARGET_ENDFILE_SPEC): Add vtv_end*.o as appropriate, if + -fvtable-verify=... is used. + * Makefile.in (OBJS): Add vtable-verify.o to list. + (vtable-verify.o): Add new build rule. + (GTFILES): Add vtable-verify.c to list. + * common.opt (fvtable-verify=): New flag. + (vtv_priority): Values for fvtable-verify= flag. + (fvtv-counts): New flag. + (fvtv-debug): New flag. + * tree.h (save_vtable_map_decl): New extern function decl. + +2013-08-07 David Malcolm + + * config/rl78/rl78.c (rl78_devirt_pass): Convert from a struct to... + (pass_rl78_devirt): ...new subclass of rtl_opt_pass along with... + (pass_data_rl78_devirt): ...new pass_data instance and... + (make_pass_rl78_devirt): ...new function. + (rl78_asm_file_start): Port pass registration to new C++ API. + +2013-08-07 David Malcolm + + * coretypes.h (rtl_opt_pass): Add. + (gcc::context): Add. + * config/epiphany/epiphany.c (pass_mode_switch_use): New. + (epiphany_init): Port to new C++ pass API. + (epiphany_optimize_mode_switching): Likewise. + * pass_manager.h (pass_manager::get_pass_split_all_insns): New. + (pass_manager::get_pass_mode_switching): New. + (pass_manager::get_pass_peephole2): New. + * mode-switching.c (pass_mode_switching): Add clone method. + * recog.c (pass_peephole2): Add clone method. + (pass_split_all_insns): Add clone method. + +2013-08-06 David Malcolm + + * config/mips/mips.c (insert_pass_mips_machine_reorg2): Move into... + (mips_option_override): ...here, porting to new C++ API for passes. + +2013-08-06 Jan Hubicka + + * cgraph.c (cgraph_get_body): New function based on lto.c + implementation. + * cgraph.h (cgraph_get_body): Declare. + * cgraphclones.c (cgraph_create_virtual_clone): Commonize WPA and + LTO paths. + * cgraphunit.c (expand_function): Get body prior expanding. + * ipa.c (function_and_variable_visibility): Use gimple_has_body_p test. + * lto-cgraph.c (lto_output_node): Do not stream bodies we don't + really need. + * passes.c (do_per_function_toporder): Get body. + * tree-inline.c (expand_call_inline): Get body prior inlining it. + * tree-ssa-structalias.c (ipa_pta_execute): Get body; skip clones. + +2013-08-06 Martin Jambor + + PR fortran/57987 + * cgraphunit.c (cgraph_finalize_function): Assert that nested function + is not re-finalized. Rename second parameter to no_collect. + +2013-08-06 Martin Jambor + + PR middle-end/58041 + * gimple-ssa-strength-reduction.c (replace_ref): Make sure built + MEM_REF has proper alignment information. + +2013-08-05 Oleg Endo + + PR other/12081 + * recog.h (rtx (*insn_gen_fn) (rtx, ...)): Replace typedef with new + class insn_gen_fn. + * expr.c (move_by_pieces_1, store_by_pieces_2): Replace argument + rtx (*) (rtx, ...) with insn_gen_fn. + * genoutput.c (output_insn_data): Cast gen_? function pointers to + insn_gen_fn::stored_funcptr. Add initializer braces. + +2013-08-05 David Malcolm + + Rewrite how instances of passes are cloned to remove assumptions + about their sizes (thus allowing pass subclasses to have + additional data fields, albeit non-GC-managed ones at this point). + + * passes.c (make_pass_instance): Now that passes have clone + methods, rewrite this function to eliminate XNEW and memcpy + calls that used hardcoded sizes. Since this function no longer + creates pass instances, rename it to... + (add_pass_instance): ...this. Document the old way that passes were + numbered and flagged, and rework this function to continue using it. + (next_pass_1): Add an initial_pass argument for use by + add_pass_instance. + (position_pass): When adding multiple instances of a pass, use + the pass's clone method, rather than relying on the XNEW/memcpy + within the former make_pass_instance (now add_pass_instance). + (pass_manager::pass_manager): When invoking next_pass_1, also supply + the initial instance of the current pass within the pass manager. + +2013-08-05 David Malcolm + + This is the automated part of the conversion of passes from C + structs to C++ classes. + + Patch autogenerated by refactor_passes.py from + https://github.com/davidmalcolm/gcc-refactoring-scripts + revision 03fe39476a4c4ea450b49e087cfa817b5f92021e + + * asan.c (pass_asan): Convert from a global struct to a subclass of + gimple_opt_pass along with... + (pass_data_asan): ...new pass_data instance and... + (make_pass_asan): ...new function. + (pass_asan_O0): Convert from a global struct to a subclass of + gimple_opt_pass along with... + (pass_data_asan_O0): ...new pass_data instance and... + (make_pass_asan_O0): ...new function. + * auto-inc-dec.c (pass_inc_dec): Convert from a global struct to a + subclass of rtl_opt_pass along with... + (pass_data_inc_dec): ...new pass_data instance and... + (make_pass_inc_dec): ...new function. + * bb-reorder.c (pass_reorder_blocks): Convert from a global struct to + a subclass of rtl_opt_pass along with... + (pass_data_reorder_blocks): ...new pass_data instance and... + (make_pass_reorder_blocks): ...new function. + (pass_duplicate_computed_gotos): Convert from a global struct to a + subclass of rtl_opt_pass along with... + (pass_data_duplicate_computed_gotos): ...new pass_data instance and... + (make_pass_duplicate_computed_gotos): ...new function. + (pass_partition_blocks): Convert from a global struct to a subclass of + rtl_opt_pass along with... + (pass_data_partition_blocks): ...new pass_data instance and... + (make_pass_partition_blocks): ...new function. + * bt-load.c (pass_branch_target_load_optimize1): Convert from a global + struct to a subclass of rtl_opt_pass along with... + (pass_data_branch_target_load_optimize1): ...new pass_data instance + and... + (make_pass_branch_target_load_optimize1): ...new function. + (pass_branch_target_load_optimize2): Convert from a global struct to a + subclass of rtl_opt_pass along with... + (pass_data_branch_target_load_optimize2): ...new pass_data instance + and... + (make_pass_branch_target_load_optimize2): ...new function. + * cfgcleanup.c (pass_jump): Convert from a global struct to a subclass + of rtl_opt_pass along with... + (pass_data_jump): ...new pass_data instance and... + (make_pass_jump): ...new function. + (pass_jump2): Convert from a global struct to a subclass of + rtl_opt_pass along with... + (pass_data_jump2): ...new pass_data instance and... + (make_pass_jump2): ...new function. + * cfgexpand.c (pass_expand): Convert from a global struct to a + subclass of rtl_opt_pass along with... + (pass_data_expand): ...new pass_data instance and... + (make_pass_expand): ...new function. + * cfgrtl.c (pass_free_cfg): Convert from a global struct to a subclass + of rtl_opt_pass along with... + (pass_data_free_cfg): ...new pass_data instance and... + (make_pass_free_cfg): ...new function. + (pass_into_cfg_layout_mode): Convert from a global struct to a + subclass of rtl_opt_pass along with... + (pass_data_into_cfg_layout_mode): ...new pass_data instance and... + (make_pass_into_cfg_layout_mode): ...new function. + (pass_outof_cfg_layout_mode): Convert from a global struct to a + subclass of rtl_opt_pass along with... + (pass_data_outof_cfg_layout_mode): ...new pass_data instance and... + (make_pass_outof_cfg_layout_mode): ...new function. + * cgraphbuild.c (pass_build_cgraph_edges): Convert from a global + struct to a subclass of gimple_opt_pass along with... + (pass_data_build_cgraph_edges): ...new pass_data instance and... + (make_pass_build_cgraph_edges): ...new function. + (pass_rebuild_cgraph_edges): Convert from a global struct to a + subclass of gimple_opt_pass along with... + (pass_data_rebuild_cgraph_edges): ...new pass_data instance and... + (make_pass_rebuild_cgraph_edges): ...new function. + (pass_remove_cgraph_callee_edges): Convert from a global struct to a + subclass of gimple_opt_pass along with... + (pass_data_remove_cgraph_callee_edges): ...new pass_data instance + and... + (make_pass_remove_cgraph_callee_edges): ...new function. + * combine-stack-adj.c (pass_stack_adjustments): Convert from a global + struct to a subclass of rtl_opt_pass along with... + (pass_data_stack_adjustments): ...new pass_data instance and... + (make_pass_stack_adjustments): ...new function. + * combine.c (pass_combine): Convert from a global struct to a subclass + of rtl_opt_pass along with... + (pass_data_combine): ...new pass_data instance and... + (make_pass_combine): ...new function. + * compare-elim.c (pass_compare_elim_after_reload): Convert from a + global struct to a subclass of rtl_opt_pass along with... + (pass_data_compare_elim_after_reload): ...new pass_data instance + and... + (make_pass_compare_elim_after_reload): ...new function. + * cprop.c (pass_rtl_cprop): Convert from a global struct to a subclass + of rtl_opt_pass along with... + (pass_data_rtl_cprop): ...new pass_data instance and... + (make_pass_rtl_cprop): ...new function. + * cse.c (pass_cse): Convert from a global struct to a subclass of + rtl_opt_pass along with... + (pass_data_cse): ...new pass_data instance and... + (make_pass_cse): ...new function. + (pass_cse2): Convert from a global struct to a subclass of + rtl_opt_pass along with... + (pass_data_cse2): ...new pass_data instance and... + (make_pass_cse2): ...new function. + (pass_cse_after_global_opts): Convert from a global struct to a + subclass of rtl_opt_pass along with... + (pass_data_cse_after_global_opts): ...new pass_data instance and... + (make_pass_cse_after_global_opts): ...new function. + * dce.c (pass_ud_rtl_dce): Convert from a global struct to a subclass + of rtl_opt_pass along with... + (pass_data_ud_rtl_dce): ...new pass_data instance and... + (make_pass_ud_rtl_dce): ...new function. + (pass_fast_rtl_dce): Convert from a global struct to a subclass of + rtl_opt_pass along with... + (pass_data_fast_rtl_dce): ...new pass_data instance and... + (make_pass_fast_rtl_dce): ...new function. + * df-core.c (pass_df_initialize_opt): Convert from a global struct to + a subclass of rtl_opt_pass along with... + (pass_data_df_initialize_opt): ...new pass_data instance and... + (make_pass_df_initialize_opt): ...new function. + (pass_df_initialize_no_opt): Convert from a global struct to a + subclass of rtl_opt_pass along with... + (pass_data_df_initialize_no_opt): ...new pass_data instance and... + (make_pass_df_initialize_no_opt): ...new function. + (pass_df_finish): Convert from a global struct to a subclass of + rtl_opt_pass along with... + (pass_data_df_finish): ...new pass_data instance and... + (make_pass_df_finish): ...new function. + * dse.c (pass_rtl_dse1): Convert from a global struct to a subclass of + rtl_opt_pass along with... + (pass_data_rtl_dse1): ...new pass_data instance and... + (make_pass_rtl_dse1): ...new function. + (pass_rtl_dse2): Convert from a global struct to a subclass of + rtl_opt_pass along with... + (pass_data_rtl_dse2): ...new pass_data instance and... + (make_pass_rtl_dse2): ...new function. + * dwarf2cfi.c (pass_dwarf2_frame): Convert from a global struct to a + subclass of rtl_opt_pass along with... + (pass_data_dwarf2_frame): ...new pass_data instance and... + (make_pass_dwarf2_frame): ...new function. + * except.c (pass_set_nothrow_function_flags): Convert from a global + struct to a subclass of rtl_opt_pass along with... + (pass_data_set_nothrow_function_flags): ...new pass_data instance + and... + (make_pass_set_nothrow_function_flags): ...new function. + (pass_convert_to_eh_region_ranges): Convert from a global struct to a + subclass of rtl_opt_pass along with... + (pass_data_convert_to_eh_region_ranges): ...new pass_data instance + and... + (make_pass_convert_to_eh_region_ranges): ...new function. + * final.c (pass_compute_alignments): Convert from a global struct to a + subclass of rtl_opt_pass along with... + (pass_data_compute_alignments): ...new pass_data instance and... + (make_pass_compute_alignments): ...new function. + (pass_final): Convert from a global struct to a subclass of + rtl_opt_pass along with... + (pass_data_final): ...new pass_data instance and... + (make_pass_final): ...new function. + (pass_shorten_branches): Convert from a global struct to a subclass of + rtl_opt_pass along with... + (pass_data_shorten_branches): ...new pass_data instance and... + (make_pass_shorten_branches): ...new function. + (pass_clean_state): Convert from a global struct to a subclass of + rtl_opt_pass along with... + (pass_data_clean_state): ...new pass_data instance and... + (make_pass_clean_state): ...new function. + * function.c (pass_instantiate_virtual_regs): Convert from a global + struct to a subclass of rtl_opt_pass along with... + (pass_data_instantiate_virtual_regs): ...new pass_data instance and... + (make_pass_instantiate_virtual_regs): ...new function. + (pass_leaf_regs): Convert from a global struct to a subclass of + rtl_opt_pass along with... + (pass_data_leaf_regs): ...new pass_data instance and... + (make_pass_leaf_regs): ...new function. + (pass_thread_prologue_and_epilogue): Convert from a global struct to a + subclass of rtl_opt_pass along with... + (pass_data_thread_prologue_and_epilogue): ...new pass_data instance + and... + (make_pass_thread_prologue_and_epilogue): ...new function. + (pass_match_asm_constraints): Convert from a global struct to a + subclass of rtl_opt_pass along with... + (pass_data_match_asm_constraints): ...new pass_data instance and... + (make_pass_match_asm_constraints): ...new function. + * fwprop.c (pass_rtl_fwprop): Convert from a global struct to a + subclass of rtl_opt_pass along with... + (pass_data_rtl_fwprop): ...new pass_data instance and... + (make_pass_rtl_fwprop): ...new function. + (pass_rtl_fwprop_addr): Convert from a global struct to a subclass of + rtl_opt_pass along with... + (pass_data_rtl_fwprop_addr): ...new pass_data instance and... + (make_pass_rtl_fwprop_addr): ...new function. + * gcse.c (pass_rtl_pre): Convert from a global struct to a subclass of + rtl_opt_pass along with... + (pass_data_rtl_pre): ...new pass_data instance and... + (make_pass_rtl_pre): ...new function. + (pass_rtl_hoist): Convert from a global struct to a subclass of + rtl_opt_pass along with... + (pass_data_rtl_hoist): ...new pass_data instance and... + (make_pass_rtl_hoist): ...new function. + * gimple-low.c (pass_lower_cf): Convert from a global struct to a + subclass of gimple_opt_pass along with... + (pass_data_lower_cf): ...new pass_data instance and... + (make_pass_lower_cf): ...new function. + * gimple-ssa-strength-reduction.c (pass_strength_reduction): Convert + from a global struct to a subclass of gimple_opt_pass along with... + (pass_data_strength_reduction): ...new pass_data instance and... + (make_pass_strength_reduction): ...new function. + * ifcvt.c (pass_rtl_ifcvt): Convert from a global struct to a subclass + of rtl_opt_pass along with... + (pass_data_rtl_ifcvt): ...new pass_data instance and... + (make_pass_rtl_ifcvt): ...new function. + (pass_if_after_combine): Convert from a global struct to a subclass of + rtl_opt_pass along with... + (pass_data_if_after_combine): ...new pass_data instance and... + (make_pass_if_after_combine): ...new function. + (pass_if_after_reload): Convert from a global struct to a subclass of + rtl_opt_pass along with... + (pass_data_if_after_reload): ...new pass_data instance and... + (make_pass_if_after_reload): ...new function. + * init-regs.c (pass_initialize_regs): Convert from a global struct to + a subclass of rtl_opt_pass along with... + (pass_data_initialize_regs): ...new pass_data instance and... + (make_pass_initialize_regs): ...new function. + * ipa-cp.c (pass_ipa_cp): Convert from a global struct to a subclass + of ipa_opt_pass_d along with... + (pass_data_ipa_cp): ...new pass_data instance and... + (make_pass_ipa_cp): ...new function. + * ipa-inline-analysis.c (pass_inline_parameters): Convert from a + global struct to a subclass of gimple_opt_pass along with... + (pass_data_inline_parameters): ...new pass_data instance and... + (make_pass_inline_parameters): ...new function. + * ipa-inline.c (pass_early_inline): Convert from a global struct to a + subclass of gimple_opt_pass along with... + (pass_data_early_inline): ...new pass_data instance and... + (make_pass_early_inline): ...new function. + (pass_ipa_inline): Convert from a global struct to a subclass of + ipa_opt_pass_d along with... + (pass_data_ipa_inline): ...new pass_data instance and... + (make_pass_ipa_inline): ...new function. + * ipa-pure-const.c (pass_local_pure_const): Convert from a global + struct to a subclass of gimple_opt_pass along with... + (pass_data_local_pure_const): ...new pass_data instance and... + (make_pass_local_pure_const): ...new function. + (pass_ipa_pure_const): Convert from a global struct to a subclass of + ipa_opt_pass_d along with... + (pass_data_ipa_pure_const): ...new pass_data instance and... + (make_pass_ipa_pure_const): ...new function. + * ipa-reference.c (pass_ipa_reference): Convert from a global struct + to a subclass of ipa_opt_pass_d along with... + (pass_data_ipa_reference): ...new pass_data instance and... + (make_pass_ipa_reference): ...new function. + * ipa-split.c (pass_split_functions): Convert from a global struct to + a subclass of gimple_opt_pass along with... + (pass_data_split_functions): ...new pass_data instance and... + (make_pass_split_functions): ...new function. + (pass_feedback_split_functions): Convert from a global struct to a + subclass of gimple_opt_pass along with... + (pass_data_feedback_split_functions): ...new pass_data instance and... + (make_pass_feedback_split_functions): ...new function. + * ipa.c (pass_ipa_function_and_variable_visibility): Convert from a + global struct to a subclass of simple_ipa_opt_pass along with... + (pass_data_ipa_function_and_variable_visibility): ...new pass_data + instance and... + (make_pass_ipa_function_and_variable_visibility): ...new function. + (pass_ipa_free_inline_summary): Convert from a global struct to a + subclass of simple_ipa_opt_pass along with... + (pass_data_ipa_free_inline_summary): ...new pass_data instance and... + (make_pass_ipa_free_inline_summary): ...new function. + (pass_ipa_whole_program_visibility): Convert from a global struct to a + subclass of ipa_opt_pass_d along with... + (pass_data_ipa_whole_program_visibility): ...new pass_data instance + and... + (make_pass_ipa_whole_program_visibility): ...new function. + (pass_ipa_profile): Convert from a global struct to a subclass of + ipa_opt_pass_d along with... + (pass_data_ipa_profile): ...new pass_data instance and... + (make_pass_ipa_profile): ...new function. + (pass_ipa_cdtor_merge): Convert from a global struct to a subclass of + ipa_opt_pass_d along with... + (pass_data_ipa_cdtor_merge): ...new pass_data instance and... + (make_pass_ipa_cdtor_merge): ...new function. + * ira.c (pass_ira): Convert from a global struct to a subclass of + rtl_opt_pass along with... + (pass_data_ira): ...new pass_data instance and... + (make_pass_ira): ...new function. + (pass_reload): Convert from a global struct to a subclass of + rtl_opt_pass along with... + (pass_data_reload): ...new pass_data instance and... + (make_pass_reload): ...new function. + * jump.c (pass_cleanup_barriers): Convert from a global struct to a + subclass of rtl_opt_pass along with... + (pass_data_cleanup_barriers): ...new pass_data instance and... + (make_pass_cleanup_barriers): ...new function. + * loop-init.c (pass_loop2): Convert from a global struct to a subclass + of rtl_opt_pass along with... + (pass_data_loop2): ...new pass_data instance and... + (make_pass_loop2): ...new function. + (pass_rtl_loop_init): Convert from a global struct to a subclass of + rtl_opt_pass along with... + (pass_data_rtl_loop_init): ...new pass_data instance and... + (make_pass_rtl_loop_init): ...new function. + (pass_rtl_loop_done): Convert from a global struct to a subclass of + rtl_opt_pass along with... + (pass_data_rtl_loop_done): ...new pass_data instance and... + (make_pass_rtl_loop_done): ...new function. + (pass_rtl_move_loop_invariants): Convert from a global struct to a + subclass of rtl_opt_pass along with... + (pass_data_rtl_move_loop_invariants): ...new pass_data instance and... + (make_pass_rtl_move_loop_invariants): ...new function. + (pass_rtl_unswitch): Convert from a global struct to a subclass of + rtl_opt_pass along with... + (pass_data_rtl_unswitch): ...new pass_data instance and... + (make_pass_rtl_unswitch): ...new function. + (pass_rtl_unroll_and_peel_loops): Convert from a global struct to a + subclass of rtl_opt_pass along with... + (pass_data_rtl_unroll_and_peel_loops): ...new pass_data instance + and... + (make_pass_rtl_unroll_and_peel_loops): ...new function. + (pass_rtl_doloop): Convert from a global struct to a subclass of + rtl_opt_pass along with... + (pass_data_rtl_doloop): ...new pass_data instance and... + (make_pass_rtl_doloop): ...new function. + * lower-subreg.c (pass_lower_subreg): Convert from a global struct to + a subclass of rtl_opt_pass along with... + (pass_data_lower_subreg): ...new pass_data instance and... + (make_pass_lower_subreg): ...new function. + (pass_lower_subreg2): Convert from a global struct to a subclass of + rtl_opt_pass along with... + (pass_data_lower_subreg2): ...new pass_data instance and... + (make_pass_lower_subreg2): ...new function. + * lto-streamer-out.c (pass_ipa_lto_gimple_out): Convert from a global + struct to a subclass of ipa_opt_pass_d along with... + (pass_data_ipa_lto_gimple_out): ...new pass_data instance and... + (make_pass_ipa_lto_gimple_out): ...new function. + (pass_ipa_lto_finish_out): Convert from a global struct to a subclass + of ipa_opt_pass_d along with... + (pass_data_ipa_lto_finish_out): ...new pass_data instance and... + (make_pass_ipa_lto_finish_out): ...new function. + * mode-switching.c (pass_mode_switching): Convert from a global struct + to a subclass of rtl_opt_pass along with... + (pass_data_mode_switching): ...new pass_data instance and... + (make_pass_mode_switching): ...new function. + * modulo-sched.c (pass_sms): Convert from a global struct to a + subclass of rtl_opt_pass along with... + (pass_data_sms): ...new pass_data instance and... + (make_pass_sms): ...new function. + * omp-low.c (pass_expand_omp): Convert from a global struct to a + subclass of gimple_opt_pass along with... + (pass_data_expand_omp): ...new pass_data instance and... + (make_pass_expand_omp): ...new function. + (pass_lower_omp): Convert from a global struct to a subclass of + gimple_opt_pass along with... + (pass_data_lower_omp): ...new pass_data instance and... + (make_pass_lower_omp): ...new function. + (pass_diagnose_omp_blocks): Convert from a global struct to a subclass + of gimple_opt_pass along with... + (pass_data_diagnose_omp_blocks): ...new pass_data instance and... + (make_pass_diagnose_omp_blocks): ...new function. + * passes.c (pass_early_local_passes): Convert from a global struct to + a subclass of simple_ipa_opt_pass along with... + (pass_data_early_local_passes): ...new pass_data instance and... + (make_pass_early_local_passes): ...new function. + (pass_all_early_optimizations): Convert from a global struct to a + subclass of gimple_opt_pass along with... + (pass_data_all_early_optimizations): ...new pass_data instance and... + (make_pass_all_early_optimizations): ...new function. + (pass_all_optimizations): Convert from a global struct to a subclass + of gimple_opt_pass along with... + (pass_data_all_optimizations): ...new pass_data instance and... + (make_pass_all_optimizations): ...new function. + (pass_all_optimizations_g): Convert from a global struct to a subclass + of gimple_opt_pass along with... + (pass_data_all_optimizations_g): ...new pass_data instance and... + (make_pass_all_optimizations_g): ...new function. + (pass_rest_of_compilation): Convert from a global struct to a subclass + of rtl_opt_pass along with... + (pass_data_rest_of_compilation): ...new pass_data instance and... + (make_pass_rest_of_compilation): ...new function. + (pass_postreload): Convert from a global struct to a subclass of + rtl_opt_pass along with... + (pass_data_postreload): ...new pass_data instance and... + (make_pass_postreload): ...new function. + * postreload-gcse.c (pass_gcse2): Convert from a global struct to a + subclass of rtl_opt_pass along with... + (pass_data_gcse2): ...new pass_data instance and... + (make_pass_gcse2): ...new function. + * postreload.c (pass_postreload_cse): Convert from a global struct to + a subclass of rtl_opt_pass along with... + (pass_data_postreload_cse): ...new pass_data instance and... + (make_pass_postreload_cse): ...new function. + * predict.c (pass_profile): Convert from a global struct to a subclass + of gimple_opt_pass along with... + (pass_data_profile): ...new pass_data instance and... + (make_pass_profile): ...new function. + (pass_strip_predict_hints): Convert from a global struct to a subclass + of gimple_opt_pass along with... + (pass_data_strip_predict_hints): ...new pass_data instance and... + (make_pass_strip_predict_hints): ...new function. + * recog.c (pass_peephole2): Convert from a global struct to a subclass + of rtl_opt_pass along with... + (pass_data_peephole2): ...new pass_data instance and... + (make_pass_peephole2): ...new function. + (pass_split_all_insns): Convert from a global struct to a subclass of + rtl_opt_pass along with... + (pass_data_split_all_insns): ...new pass_data instance and... + (make_pass_split_all_insns): ...new function. + (pass_split_after_reload): Convert from a global struct to a subclass + of rtl_opt_pass along with... + (pass_data_split_after_reload): ...new pass_data instance and... + (make_pass_split_after_reload): ...new function. + (pass_split_before_regstack): Convert from a global struct to a + subclass of rtl_opt_pass along with... + (pass_data_split_before_regstack): ...new pass_data instance and... + (make_pass_split_before_regstack): ...new function. + (pass_split_before_sched2): Convert from a global struct to a subclass + of rtl_opt_pass along with... + (pass_data_split_before_sched2): ...new pass_data instance and... + (make_pass_split_before_sched2): ...new function. + (pass_split_for_shorten_branches): Convert from a global struct to a + subclass of rtl_opt_pass along with... + (pass_data_split_for_shorten_branches): ...new pass_data instance + and... + (make_pass_split_for_shorten_branches): ...new function. + * ree.c (pass_ree): Convert from a global struct to a subclass of + rtl_opt_pass along with... + (pass_data_ree): ...new pass_data instance and... + (make_pass_ree): ...new function. + * reg-stack.c (pass_stack_regs): Convert from a global struct to a + subclass of rtl_opt_pass along with... + (pass_data_stack_regs): ...new pass_data instance and... + (make_pass_stack_regs): ...new function. + (pass_stack_regs_run): Convert from a global struct to a subclass of + rtl_opt_pass along with... + (pass_data_stack_regs_run): ...new pass_data instance and... + (make_pass_stack_regs_run): ...new function. + * regcprop.c (pass_cprop_hardreg): Convert from a global struct to a + subclass of rtl_opt_pass along with... + (pass_data_cprop_hardreg): ...new pass_data instance and... + (make_pass_cprop_hardreg): ...new function. + * reginfo.c (pass_reginfo_init): Convert from a global struct to a + subclass of rtl_opt_pass along with... + (pass_data_reginfo_init): ...new pass_data instance and... + (make_pass_reginfo_init): ...new function. + * regmove.c (pass_regmove): Convert from a global struct to a subclass + of rtl_opt_pass along with... + (pass_data_regmove): ...new pass_data instance and... + (make_pass_regmove): ...new function. + * regrename.c (pass_regrename): Convert from a global struct to a + subclass of rtl_opt_pass along with... + (pass_data_regrename): ...new pass_data instance and... + (make_pass_regrename): ...new function. + * reorg.c (pass_delay_slots): Convert from a global struct to a + subclass of rtl_opt_pass along with... + (pass_data_delay_slots): ...new pass_data instance and... + (make_pass_delay_slots): ...new function. + (pass_machine_reorg): Convert from a global struct to a subclass of + rtl_opt_pass along with... + (pass_data_machine_reorg): ...new pass_data instance and... + (make_pass_machine_reorg): ...new function. + * sched-rgn.c (pass_sched): Convert from a global struct to a subclass + of rtl_opt_pass along with... + (pass_data_sched): ...new pass_data instance and... + (make_pass_sched): ...new function. + (pass_sched2): Convert from a global struct to a subclass of + rtl_opt_pass along with... + (pass_data_sched2): ...new pass_data instance and... + (make_pass_sched2): ...new function. + * stack-ptr-mod.c (pass_stack_ptr_mod): Convert from a global struct + to a subclass of rtl_opt_pass along with... + (pass_data_stack_ptr_mod): ...new pass_data instance and... + (make_pass_stack_ptr_mod): ...new function. + * store-motion.c (pass_rtl_store_motion): Convert from a global struct + to a subclass of rtl_opt_pass along with... + (pass_data_rtl_store_motion): ...new pass_data instance and... + (make_pass_rtl_store_motion): ...new function. + * tracer.c (pass_tracer): Convert from a global struct to a subclass + of gimple_opt_pass along with... + (pass_data_tracer): ...new pass_data instance and... + (make_pass_tracer): ...new function. + * trans-mem.c (pass_diagnose_tm_blocks): Convert from a global struct + to a subclass of gimple_opt_pass along with... + (pass_data_diagnose_tm_blocks): ...new pass_data instance and... + (make_pass_diagnose_tm_blocks): ...new function. + (pass_lower_tm): Convert from a global struct to a subclass of + gimple_opt_pass along with... + (pass_data_lower_tm): ...new pass_data instance and... + (make_pass_lower_tm): ...new function. + (pass_tm_init): Convert from a global struct to a subclass of + gimple_opt_pass along with... + (pass_data_tm_init): ...new pass_data instance and... + (make_pass_tm_init): ...new function. + (pass_tm_mark): Convert from a global struct to a subclass of + gimple_opt_pass along with... + (pass_data_tm_mark): ...new pass_data instance and... + (make_pass_tm_mark): ...new function. + (pass_tm_edges): Convert from a global struct to a subclass of + gimple_opt_pass along with... + (pass_data_tm_edges): ...new pass_data instance and... + (make_pass_tm_edges): ...new function. + (pass_tm_memopt): Convert from a global struct to a subclass of + gimple_opt_pass along with... + (pass_data_tm_memopt): ...new pass_data instance and... + (make_pass_tm_memopt): ...new function. + (pass_ipa_tm): Convert from a global struct to a subclass of + simple_ipa_opt_pass along with... + (pass_data_ipa_tm): ...new pass_data instance and... + (make_pass_ipa_tm): ...new function. + * tree-call-cdce.c (pass_call_cdce): Convert from a global struct to a + subclass of gimple_opt_pass along with... + (pass_data_call_cdce): ...new pass_data instance and... + (make_pass_call_cdce): ...new function. + * tree-cfg.c (pass_build_cfg): Convert from a global struct to a + subclass of gimple_opt_pass along with... + (pass_data_build_cfg): ...new pass_data instance and... + (make_pass_build_cfg): ...new function. + (pass_split_crit_edges): Convert from a global struct to a subclass of + gimple_opt_pass along with... + (pass_data_split_crit_edges): ...new pass_data instance and... + (make_pass_split_crit_edges): ...new function. + (pass_warn_function_return): Convert from a global struct to a + subclass of gimple_opt_pass along with... + (pass_data_warn_function_return): ...new pass_data instance and... + (make_pass_warn_function_return): ...new function. + (pass_warn_function_noreturn): Convert from a global struct to a + subclass of gimple_opt_pass along with... + (pass_data_warn_function_noreturn): ...new pass_data instance and... + (make_pass_warn_function_noreturn): ...new function. + (pass_warn_unused_result): Convert from a global struct to a subclass + of gimple_opt_pass along with... + (pass_data_warn_unused_result): ...new pass_data instance and... + (make_pass_warn_unused_result): ...new function. + * tree-cfgcleanup.c (pass_merge_phi): Convert from a global struct to + a subclass of gimple_opt_pass along with... + (pass_data_merge_phi): ...new pass_data instance and... + (make_pass_merge_phi): ...new function. + * tree-complex.c (pass_lower_complex): Convert from a global struct to + a subclass of gimple_opt_pass along with... + (pass_data_lower_complex): ...new pass_data instance and... + (make_pass_lower_complex): ...new function. + (pass_lower_complex_O0): Convert from a global struct to a subclass of + gimple_opt_pass along with... + (pass_data_lower_complex_O0): ...new pass_data instance and... + (make_pass_lower_complex_O0): ...new function. + * tree-eh.c (pass_lower_eh): Convert from a global struct to a + subclass of gimple_opt_pass along with... + (pass_data_lower_eh): ...new pass_data instance and... + (make_pass_lower_eh): ...new function. + (pass_refactor_eh): Convert from a global struct to a subclass of + gimple_opt_pass along with... + (pass_data_refactor_eh): ...new pass_data instance and... + (make_pass_refactor_eh): ...new function. + (pass_lower_resx): Convert from a global struct to a subclass of + gimple_opt_pass along with... + (pass_data_lower_resx): ...new pass_data instance and... + (make_pass_lower_resx): ...new function. + (pass_lower_eh_dispatch): Convert from a global struct to a subclass + of gimple_opt_pass along with... + (pass_data_lower_eh_dispatch): ...new pass_data instance and... + (make_pass_lower_eh_dispatch): ...new function. + (pass_cleanup_eh): Convert from a global struct to a subclass of + gimple_opt_pass along with... + (pass_data_cleanup_eh): ...new pass_data instance and... + (make_pass_cleanup_eh): ...new function. + * tree-emutls.c (pass_ipa_lower_emutls): Convert from a global struct + to a subclass of simple_ipa_opt_pass along with... + (pass_data_ipa_lower_emutls): ...new pass_data instance and... + (make_pass_ipa_lower_emutls): ...new function. + * tree-if-conv.c (pass_if_conversion): Convert from a global struct to + a subclass of gimple_opt_pass along with... + (pass_data_if_conversion): ...new pass_data instance and... + (make_pass_if_conversion): ...new function. + * tree-into-ssa.c (pass_build_ssa): Convert from a global struct to a + subclass of gimple_opt_pass along with... + (pass_data_build_ssa): ...new pass_data instance and... + (make_pass_build_ssa): ...new function. + * tree-loop-distribution.c (pass_loop_distribution): Convert from a + global struct to a subclass of gimple_opt_pass along with... + (pass_data_loop_distribution): ...new pass_data instance and... + (make_pass_loop_distribution): ...new function. + * tree-mudflap.c (pass_mudflap_1): Convert from a global struct to a + subclass of gimple_opt_pass along with... + (pass_data_mudflap_1): ...new pass_data instance and... + (make_pass_mudflap_1): ...new function. + (pass_mudflap_2): Convert from a global struct to a subclass of + gimple_opt_pass along with... + (pass_data_mudflap_2): ...new pass_data instance and... + (make_pass_mudflap_2): ...new function. + * tree-nomudflap.c (pass_mudflap_1): Convert from a global struct to a + subclass of gimple_opt_pass along with... + (pass_data_mudflap_1): ...new pass_data instance and... + (make_pass_mudflap_1): ...new function. + (pass_mudflap_2): Convert from a global struct to a subclass of + gimple_opt_pass along with... + (pass_data_mudflap_2): ...new pass_data instance and... + (make_pass_mudflap_2): ...new function. + * tree-nrv.c (pass_nrv): Convert from a global struct to a subclass of + gimple_opt_pass along with... + (pass_data_nrv): ...new pass_data instance and... + (make_pass_nrv): ...new function. + (pass_return_slot): Convert from a global struct to a subclass of + gimple_opt_pass along with... + (pass_data_return_slot): ...new pass_data instance and... + (make_pass_return_slot): ...new function. + * tree-object-size.c (pass_object_sizes): Convert from a global struct + to a subclass of gimple_opt_pass along with... + (pass_data_object_sizes): ...new pass_data instance and... + (make_pass_object_sizes): ...new function. + * tree-optimize.c (pass_cleanup_cfg_post_optimizing): Convert from a + global struct to a subclass of gimple_opt_pass along with... + (pass_data_cleanup_cfg_post_optimizing): ...new pass_data instance + and... + (make_pass_cleanup_cfg_post_optimizing): ...new function. + (pass_fixup_cfg): Convert from a global struct to a subclass of + gimple_opt_pass along with... + (pass_data_fixup_cfg): ...new pass_data instance and... + (make_pass_fixup_cfg): ...new function. + * tree-pass.h (pass_mudflap_1): Replace declaration with that of... + (make_pass_mudflap_1): ...new function. + (pass_mudflap_2): Replace declaration with that of... + (make_pass_mudflap_2): ...new function. + (pass_asan): Replace declaration with that of... + (make_pass_asan): ...new function. + (pass_asan_O0): Replace declaration with that of... + (make_pass_asan_O0): ...new function. + (pass_tsan): Replace declaration with that of... + (make_pass_tsan): ...new function. + (pass_tsan_O0): Replace declaration with that of... + (make_pass_tsan_O0): ...new function. + (pass_lower_cf): Replace declaration with that of... + (make_pass_lower_cf): ...new function. + (pass_refactor_eh): Replace declaration with that of... + (make_pass_refactor_eh): ...new function. + (pass_lower_eh): Replace declaration with that of... + (make_pass_lower_eh): ...new function. + (pass_lower_eh_dispatch): Replace declaration with that of... + (make_pass_lower_eh_dispatch): ...new function. + (pass_lower_resx): Replace declaration with that of... + (make_pass_lower_resx): ...new function. + (pass_build_cfg): Replace declaration with that of... + (make_pass_build_cfg): ...new function. + (pass_early_tree_profile): Replace declaration with that of... + (make_pass_early_tree_profile): ...new function. + (pass_cleanup_eh): Replace declaration with that of... + (make_pass_cleanup_eh): ...new function. + (pass_sra): Replace declaration with that of... + (make_pass_sra): ...new function. + (pass_sra_early): Replace declaration with that of... + (make_pass_sra_early): ...new function. + (pass_early_ipa_sra): Replace declaration with that of... + (make_pass_early_ipa_sra): ...new function. + (pass_tail_recursion): Replace declaration with that of... + (make_pass_tail_recursion): ...new function. + (pass_tail_calls): Replace declaration with that of... + (make_pass_tail_calls): ...new function. + (pass_tree_loop): Replace declaration with that of... + (make_pass_tree_loop): ...new function. + (pass_tree_loop_init): Replace declaration with that of... + (make_pass_tree_loop_init): ...new function. + (pass_lim): Replace declaration with that of... + (make_pass_lim): ...new function. + (pass_tree_unswitch): Replace declaration with that of... + (make_pass_tree_unswitch): ...new function. + (pass_predcom): Replace declaration with that of... + (make_pass_predcom): ...new function. + (pass_iv_canon): Replace declaration with that of... + (make_pass_iv_canon): ...new function. + (pass_scev_cprop): Replace declaration with that of... + (make_pass_scev_cprop): ...new function. + (pass_empty_loop): Replace declaration with that of... + (make_pass_empty_loop): ...new function. + (pass_record_bounds): Replace declaration with that of... + (make_pass_record_bounds): ...new function. + (pass_graphite): Replace declaration with that of... + (make_pass_graphite): ...new function. + (pass_graphite_transforms): Replace declaration with that of... + (make_pass_graphite_transforms): ...new function. + (pass_if_conversion): Replace declaration with that of... + (make_pass_if_conversion): ...new function. + (pass_loop_distribution): Replace declaration with that of... + (make_pass_loop_distribution): ...new function. + (pass_vectorize): Replace declaration with that of... + (make_pass_vectorize): ...new function. + (pass_slp_vectorize): Replace declaration with that of... + (make_pass_slp_vectorize): ...new function. + (pass_complete_unroll): Replace declaration with that of... + (make_pass_complete_unroll): ...new function. + (pass_complete_unrolli): Replace declaration with that of... + (make_pass_complete_unrolli): ...new function. + (pass_parallelize_loops): Replace declaration with that of... + (make_pass_parallelize_loops): ...new function. + (pass_loop_prefetch): Replace declaration with that of... + (make_pass_loop_prefetch): ...new function. + (pass_iv_optimize): Replace declaration with that of... + (make_pass_iv_optimize): ...new function. + (pass_tree_loop_done): Replace declaration with that of... + (make_pass_tree_loop_done): ...new function. + (pass_ch): Replace declaration with that of... + (make_pass_ch): ...new function. + (pass_ccp): Replace declaration with that of... + (make_pass_ccp): ...new function. + (pass_phi_only_cprop): Replace declaration with that of... + (make_pass_phi_only_cprop): ...new function. + (pass_build_ssa): Replace declaration with that of... + (make_pass_build_ssa): ...new function. + (pass_build_alias): Replace declaration with that of... + (make_pass_build_alias): ...new function. + (pass_build_ealias): Replace declaration with that of... + (make_pass_build_ealias): ...new function. + (pass_dominator): Replace declaration with that of... + (make_pass_dominator): ...new function. + (pass_dce): Replace declaration with that of... + (make_pass_dce): ...new function. + (pass_dce_loop): Replace declaration with that of... + (make_pass_dce_loop): ...new function. + (pass_cd_dce): Replace declaration with that of... + (make_pass_cd_dce): ...new function. + (pass_call_cdce): Replace declaration with that of... + (make_pass_call_cdce): ...new function. + (pass_merge_phi): Replace declaration with that of... + (make_pass_merge_phi): ...new function. + (pass_split_crit_edges): Replace declaration with that of... + (make_pass_split_crit_edges): ...new function. + (pass_pre): Replace declaration with that of... + (make_pass_pre): ...new function. + (pass_profile): Replace declaration with that of... + (make_pass_profile): ...new function. + (pass_strip_predict_hints): Replace declaration with that of... + (make_pass_strip_predict_hints): ...new function. + (pass_lower_complex_O0): Replace declaration with that of... + (make_pass_lower_complex_O0): ...new function. + (pass_lower_complex): Replace declaration with that of... + (make_pass_lower_complex): ...new function. + (pass_lower_vector): Replace declaration with that of... + (make_pass_lower_vector): ...new function. + (pass_lower_vector_ssa): Replace declaration with that of... + (make_pass_lower_vector_ssa): ...new function. + (pass_lower_omp): Replace declaration with that of... + (make_pass_lower_omp): ...new function. + (pass_diagnose_omp_blocks): Replace declaration with that of... + (make_pass_diagnose_omp_blocks): ...new function. + (pass_expand_omp): Replace declaration with that of... + (make_pass_expand_omp): ...new function. + (pass_expand_omp_ssa): Replace declaration with that of... + (make_pass_expand_omp_ssa): ...new function. + (pass_object_sizes): Replace declaration with that of... + (make_pass_object_sizes): ...new function. + (pass_strlen): Replace declaration with that of... + (make_pass_strlen): ...new function. + (pass_fold_builtins): Replace declaration with that of... + (make_pass_fold_builtins): ...new function. + (pass_stdarg): Replace declaration with that of... + (make_pass_stdarg): ...new function. + (pass_early_warn_uninitialized): Replace declaration with that of... + (make_pass_early_warn_uninitialized): ...new function. + (pass_late_warn_uninitialized): Replace declaration with that of... + (make_pass_late_warn_uninitialized): ...new function. + (pass_cse_reciprocals): Replace declaration with that of... + (make_pass_cse_reciprocals): ...new function. + (pass_cse_sincos): Replace declaration with that of... + (make_pass_cse_sincos): ...new function. + (pass_optimize_bswap): Replace declaration with that of... + (make_pass_optimize_bswap): ...new function. + (pass_optimize_widening_mul): Replace declaration with that of... + (make_pass_optimize_widening_mul): ...new function. + (pass_warn_function_return): Replace declaration with that of... + (make_pass_warn_function_return): ...new function. + (pass_warn_function_noreturn): Replace declaration with that of... + (make_pass_warn_function_noreturn): ...new function. + (pass_cselim): Replace declaration with that of... + (make_pass_cselim): ...new function. + (pass_phiopt): Replace declaration with that of... + (make_pass_phiopt): ...new function. + (pass_forwprop): Replace declaration with that of... + (make_pass_forwprop): ...new function. + (pass_phiprop): Replace declaration with that of... + (make_pass_phiprop): ...new function. + (pass_tree_ifcombine): Replace declaration with that of... + (make_pass_tree_ifcombine): ...new function. + (pass_dse): Replace declaration with that of... + (make_pass_dse): ...new function. + (pass_nrv): Replace declaration with that of... + (make_pass_nrv): ...new function. + (pass_rename_ssa_copies): Replace declaration with that of... + (make_pass_rename_ssa_copies): ...new function. + (pass_sink_code): Replace declaration with that of... + (make_pass_sink_code): ...new function. + (pass_fre): Replace declaration with that of... + (make_pass_fre): ...new function. + (pass_check_data_deps): Replace declaration with that of... + (make_pass_check_data_deps): ...new function. + (pass_copy_prop): Replace declaration with that of... + (make_pass_copy_prop): ...new function. + (pass_vrp): Replace declaration with that of... + (make_pass_vrp): ...new function. + (pass_uncprop): Replace declaration with that of... + (make_pass_uncprop): ...new function. + (pass_return_slot): Replace declaration with that of... + (make_pass_return_slot): ...new function. + (pass_reassoc): Replace declaration with that of... + (make_pass_reassoc): ...new function. + (pass_rebuild_cgraph_edges): Replace declaration with that of... + (make_pass_rebuild_cgraph_edges): ...new function. + (pass_remove_cgraph_callee_edges): Replace declaration with that of... + (make_pass_remove_cgraph_callee_edges): ...new function. + (pass_build_cgraph_edges): Replace declaration with that of... + (make_pass_build_cgraph_edges): ...new function. + (pass_local_pure_const): Replace declaration with that of... + (make_pass_local_pure_const): ...new function. + (pass_tracer): Replace declaration with that of... + (make_pass_tracer): ...new function. + (pass_warn_unused_result): Replace declaration with that of... + (make_pass_warn_unused_result): ...new function. + (pass_diagnose_tm_blocks): Replace declaration with that of... + (make_pass_diagnose_tm_blocks): ...new function. + (pass_lower_tm): Replace declaration with that of... + (make_pass_lower_tm): ...new function. + (pass_tm_init): Replace declaration with that of... + (make_pass_tm_init): ...new function. + (pass_tm_mark): Replace declaration with that of... + (make_pass_tm_mark): ...new function. + (pass_tm_memopt): Replace declaration with that of... + (make_pass_tm_memopt): ...new function. + (pass_tm_edges): Replace declaration with that of... + (make_pass_tm_edges): ...new function. + (pass_split_functions): Replace declaration with that of... + (make_pass_split_functions): ...new function. + (pass_feedback_split_functions): Replace declaration with that of... + (make_pass_feedback_split_functions): ...new function. + (pass_strength_reduction): Replace declaration with that of... + (make_pass_strength_reduction): ...new function. + (pass_ipa_lower_emutls): Replace declaration with that of... + (make_pass_ipa_lower_emutls): ...new function. + (pass_ipa_function_and_variable_visibility): Replace declaration with + that of... + (make_pass_ipa_function_and_variable_visibility): ...new function. + (pass_ipa_tree_profile): Replace declaration with that of... + (make_pass_ipa_tree_profile): ...new function. + (pass_early_local_passes): Replace declaration with that of... + (make_pass_early_local_passes): ...new function. + (pass_ipa_whole_program_visibility): Replace declaration with that + of... + (make_pass_ipa_whole_program_visibility): ...new function. + (pass_ipa_lto_gimple_out): Replace declaration with that of... + (make_pass_ipa_lto_gimple_out): ...new function. + (pass_ipa_increase_alignment): Replace declaration with that of... + (make_pass_ipa_increase_alignment): ...new function. + (pass_ipa_inline): Replace declaration with that of... + (make_pass_ipa_inline): ...new function. + (pass_ipa_free_lang_data): Replace declaration with that of... + (make_pass_ipa_free_lang_data): ...new function. + (pass_ipa_free_inline_summary): Replace declaration with that of... + (make_pass_ipa_free_inline_summary): ...new function. + (pass_ipa_cp): Replace declaration with that of... + (make_pass_ipa_cp): ...new function. + (pass_ipa_reference): Replace declaration with that of... + (make_pass_ipa_reference): ...new function. + (pass_ipa_pure_const): Replace declaration with that of... + (make_pass_ipa_pure_const): ...new function. + (pass_ipa_pta): Replace declaration with that of... + (make_pass_ipa_pta): ...new function. + (pass_ipa_lto_finish_out): Replace declaration with that of... + (make_pass_ipa_lto_finish_out): ...new function. + (pass_ipa_tm): Replace declaration with that of... + (make_pass_ipa_tm): ...new function. + (pass_ipa_profile): Replace declaration with that of... + (make_pass_ipa_profile): ...new function. + (pass_ipa_cdtor_merge): Replace declaration with that of... + (make_pass_ipa_cdtor_merge): ...new function. + (pass_cleanup_cfg_post_optimizing): Replace declaration with that + of... + (make_pass_cleanup_cfg_post_optimizing): ...new function. + (pass_init_datastructures): Replace declaration with that of... + (make_pass_init_datastructures): ...new function. + (pass_fixup_cfg): Replace declaration with that of... + (make_pass_fixup_cfg): ...new function. + (pass_expand): Replace declaration with that of... + (make_pass_expand): ...new function. + (pass_instantiate_virtual_regs): Replace declaration with that of... + (make_pass_instantiate_virtual_regs): ...new function. + (pass_rtl_fwprop): Replace declaration with that of... + (make_pass_rtl_fwprop): ...new function. + (pass_rtl_fwprop_addr): Replace declaration with that of... + (make_pass_rtl_fwprop_addr): ...new function. + (pass_jump): Replace declaration with that of... + (make_pass_jump): ...new function. + (pass_jump2): Replace declaration with that of... + (make_pass_jump2): ...new function. + (pass_lower_subreg): Replace declaration with that of... + (make_pass_lower_subreg): ...new function. + (pass_cse): Replace declaration with that of... + (make_pass_cse): ...new function. + (pass_fast_rtl_dce): Replace declaration with that of... + (make_pass_fast_rtl_dce): ...new function. + (pass_ud_rtl_dce): Replace declaration with that of... + (make_pass_ud_rtl_dce): ...new function. + (pass_rtl_dce): Replace declaration with that of... + (make_pass_rtl_dce): ...new function. + (pass_rtl_dse1): Replace declaration with that of... + (make_pass_rtl_dse1): ...new function. + (pass_rtl_dse2): Replace declaration with that of... + (make_pass_rtl_dse2): ...new function. + (pass_rtl_dse3): Replace declaration with that of... + (make_pass_rtl_dse3): ...new function. + (pass_rtl_cprop): Replace declaration with that of... + (make_pass_rtl_cprop): ...new function. + (pass_rtl_pre): Replace declaration with that of... + (make_pass_rtl_pre): ...new function. + (pass_rtl_hoist): Replace declaration with that of... + (make_pass_rtl_hoist): ...new function. + (pass_rtl_store_motion): Replace declaration with that of... + (make_pass_rtl_store_motion): ...new function. + (pass_cse_after_global_opts): Replace declaration with that of... + (make_pass_cse_after_global_opts): ...new function. + (pass_rtl_ifcvt): Replace declaration with that of... + (make_pass_rtl_ifcvt): ...new function. + (pass_into_cfg_layout_mode): Replace declaration with that of... + (make_pass_into_cfg_layout_mode): ...new function. + (pass_outof_cfg_layout_mode): Replace declaration with that of... + (make_pass_outof_cfg_layout_mode): ...new function. + (pass_loop2): Replace declaration with that of... + (make_pass_loop2): ...new function. + (pass_rtl_loop_init): Replace declaration with that of... + (make_pass_rtl_loop_init): ...new function. + (pass_rtl_move_loop_invariants): Replace declaration with that of... + (make_pass_rtl_move_loop_invariants): ...new function. + (pass_rtl_unswitch): Replace declaration with that of... + (make_pass_rtl_unswitch): ...new function. + (pass_rtl_unroll_and_peel_loops): Replace declaration with that of... + (make_pass_rtl_unroll_and_peel_loops): ...new function. + (pass_rtl_doloop): Replace declaration with that of... + (make_pass_rtl_doloop): ...new function. + (pass_rtl_loop_done): Replace declaration with that of... + (make_pass_rtl_loop_done): ...new function. + (pass_web): Replace declaration with that of... + (make_pass_web): ...new function. + (pass_cse2): Replace declaration with that of... + (make_pass_cse2): ...new function. + (pass_df_initialize_opt): Replace declaration with that of... + (make_pass_df_initialize_opt): ...new function. + (pass_df_initialize_no_opt): Replace declaration with that of... + (make_pass_df_initialize_no_opt): ...new function. + (pass_reginfo_init): Replace declaration with that of... + (make_pass_reginfo_init): ...new function. + (pass_inc_dec): Replace declaration with that of... + (make_pass_inc_dec): ...new function. + (pass_stack_ptr_mod): Replace declaration with that of... + (make_pass_stack_ptr_mod): ...new function. + (pass_initialize_regs): Replace declaration with that of... + (make_pass_initialize_regs): ...new function. + (pass_combine): Replace declaration with that of... + (make_pass_combine): ...new function. + (pass_if_after_combine): Replace declaration with that of... + (make_pass_if_after_combine): ...new function. + (pass_ree): Replace declaration with that of... + (make_pass_ree): ...new function. + (pass_partition_blocks): Replace declaration with that of... + (make_pass_partition_blocks): ...new function. + (pass_match_asm_constraints): Replace declaration with that of... + (make_pass_match_asm_constraints): ...new function. + (pass_regmove): Replace declaration with that of... + (make_pass_regmove): ...new function. + (pass_split_all_insns): Replace declaration with that of... + (make_pass_split_all_insns): ...new function. + (pass_fast_rtl_byte_dce): Replace declaration with that of... + (make_pass_fast_rtl_byte_dce): ...new function. + (pass_lower_subreg2): Replace declaration with that of... + (make_pass_lower_subreg2): ...new function. + (pass_mode_switching): Replace declaration with that of... + (make_pass_mode_switching): ...new function. + (pass_sms): Replace declaration with that of... + (make_pass_sms): ...new function. + (pass_sched): Replace declaration with that of... + (make_pass_sched): ...new function. + (pass_ira): Replace declaration with that of... + (make_pass_ira): ...new function. + (pass_reload): Replace declaration with that of... + (make_pass_reload): ...new function. + (pass_clean_state): Replace declaration with that of... + (make_pass_clean_state): ...new function. + (pass_branch_prob): Replace declaration with that of... + (make_pass_branch_prob): ...new function. + (pass_value_profile_transformations): Replace declaration with that + of... + (make_pass_value_profile_transformations): ...new function. + (pass_postreload_cse): Replace declaration with that of... + (make_pass_postreload_cse): ...new function. + (pass_gcse2): Replace declaration with that of... + (make_pass_gcse2): ...new function. + (pass_split_after_reload): Replace declaration with that of... + (make_pass_split_after_reload): ...new function. + (pass_branch_target_load_optimize1): Replace declaration with that + of... + (make_pass_branch_target_load_optimize1): ...new function. + (pass_thread_prologue_and_epilogue): Replace declaration with that + of... + (make_pass_thread_prologue_and_epilogue): ...new function. + (pass_stack_adjustments): Replace declaration with that of... + (make_pass_stack_adjustments): ...new function. + (pass_peephole2): Replace declaration with that of... + (make_pass_peephole2): ...new function. + (pass_if_after_reload): Replace declaration with that of... + (make_pass_if_after_reload): ...new function. + (pass_regrename): Replace declaration with that of... + (make_pass_regrename): ...new function. + (pass_cprop_hardreg): Replace declaration with that of... + (make_pass_cprop_hardreg): ...new function. + (pass_reorder_blocks): Replace declaration with that of... + (make_pass_reorder_blocks): ...new function. + (pass_branch_target_load_optimize2): Replace declaration with that + of... + (make_pass_branch_target_load_optimize2): ...new function. + (pass_leaf_regs): Replace declaration with that of... + (make_pass_leaf_regs): ...new function. + (pass_split_before_sched2): Replace declaration with that of... + (make_pass_split_before_sched2): ...new function. + (pass_compare_elim_after_reload): Replace declaration with that of... + (make_pass_compare_elim_after_reload): ...new function. + (pass_sched2): Replace declaration with that of... + (make_pass_sched2): ...new function. + (pass_stack_regs): Replace declaration with that of... + (make_pass_stack_regs): ...new function. + (pass_stack_regs_run): Replace declaration with that of... + (make_pass_stack_regs_run): ...new function. + (pass_df_finish): Replace declaration with that of... + (make_pass_df_finish): ...new function. + (pass_compute_alignments): Replace declaration with that of... + (make_pass_compute_alignments): ...new function. + (pass_duplicate_computed_gotos): Replace declaration with that of... + (make_pass_duplicate_computed_gotos): ...new function. + (pass_variable_tracking): Replace declaration with that of... + (make_pass_variable_tracking): ...new function. + (pass_free_cfg): Replace declaration with that of... + (make_pass_free_cfg): ...new function. + (pass_machine_reorg): Replace declaration with that of... + (make_pass_machine_reorg): ...new function. + (pass_cleanup_barriers): Replace declaration with that of... + (make_pass_cleanup_barriers): ...new function. + (pass_delay_slots): Replace declaration with that of... + (make_pass_delay_slots): ...new function. + (pass_split_for_shorten_branches): Replace declaration with that of... + (make_pass_split_for_shorten_branches): ...new function. + (pass_split_before_regstack): Replace declaration with that of... + (make_pass_split_before_regstack): ...new function. + (pass_convert_to_eh_region_ranges): Replace declaration with that + of... + (make_pass_convert_to_eh_region_ranges): ...new function. + (pass_shorten_branches): Replace declaration with that of... + (make_pass_shorten_branches): ...new function. + (pass_set_nothrow_function_flags): Replace declaration with that of... + (make_pass_set_nothrow_function_flags): ...new function. + (pass_dwarf2_frame): Replace declaration with that of... + (make_pass_dwarf2_frame): ...new function. + (pass_final): Replace declaration with that of... + (make_pass_final): ...new function. + (pass_rtl_seqabstr): Replace declaration with that of... + (make_pass_rtl_seqabstr): ...new function. + (pass_release_ssa_names): Replace declaration with that of... + (make_pass_release_ssa_names): ...new function. + (pass_early_inline): Replace declaration with that of... + (make_pass_early_inline): ...new function. + (pass_inline_parameters): Replace declaration with that of... + (make_pass_inline_parameters): ...new function. + (pass_update_address_taken): Replace declaration with that of... + (make_pass_update_address_taken): ...new function. + (pass_convert_switch): Replace declaration with that of... + (make_pass_convert_switch): ...new function. + * tree-profile.c (pass_ipa_tree_profile): Convert from a global struct + to a subclass of simple_ipa_opt_pass along with... + (pass_data_ipa_tree_profile): ...new pass_data instance and... + (make_pass_ipa_tree_profile): ...new function. + * tree-sra.c (pass_sra_early): Convert from a global struct to a + subclass of gimple_opt_pass along with... + (pass_data_sra_early): ...new pass_data instance and... + (make_pass_sra_early): ...new function. + (pass_sra): Convert from a global struct to a subclass of + gimple_opt_pass along with... + (pass_data_sra): ...new pass_data instance and... + (make_pass_sra): ...new function. + (pass_early_ipa_sra): Convert from a global struct to a subclass of + gimple_opt_pass along with... + (pass_data_early_ipa_sra): ...new pass_data instance and... + (make_pass_early_ipa_sra): ...new function. + * tree-ssa-ccp.c (pass_ccp): Convert from a global struct to a + subclass of gimple_opt_pass along with... + (pass_data_ccp): ...new pass_data instance and... + (make_pass_ccp): ...new function. + (pass_fold_builtins): Convert from a global struct to a subclass of + gimple_opt_pass along with... + (pass_data_fold_builtins): ...new pass_data instance and... + (make_pass_fold_builtins): ...new function. + * tree-ssa-copy.c (pass_copy_prop): Convert from a global struct to a + subclass of gimple_opt_pass along with... + (pass_data_copy_prop): ...new pass_data instance and... + (make_pass_copy_prop): ...new function. + * tree-ssa-copyrename.c (pass_rename_ssa_copies): Convert from a + global struct to a subclass of gimple_opt_pass along with... + (pass_data_rename_ssa_copies): ...new pass_data instance and... + (make_pass_rename_ssa_copies): ...new function. + * tree-ssa-dce.c (pass_dce): Convert from a global struct to a + subclass of gimple_opt_pass along with... + (pass_data_dce): ...new pass_data instance and... + (make_pass_dce): ...new function. + (pass_dce_loop): Convert from a global struct to a subclass of + gimple_opt_pass along with... + (pass_data_dce_loop): ...new pass_data instance and... + (make_pass_dce_loop): ...new function. + (pass_cd_dce): Convert from a global struct to a subclass of + gimple_opt_pass along with... + (pass_data_cd_dce): ...new pass_data instance and... + (make_pass_cd_dce): ...new function. + * tree-ssa-dom.c (pass_dominator): Convert from a global struct to a + subclass of gimple_opt_pass along with... + (pass_data_dominator): ...new pass_data instance and... + (make_pass_dominator): ...new function. + (pass_phi_only_cprop): Convert from a global struct to a subclass of + gimple_opt_pass along with... + (pass_data_phi_only_cprop): ...new pass_data instance and... + (make_pass_phi_only_cprop): ...new function. + * tree-ssa-dse.c (pass_dse): Convert from a global struct to a + subclass of gimple_opt_pass along with... + (pass_data_dse): ...new pass_data instance and... + (make_pass_dse): ...new function. + * tree-ssa-forwprop.c (pass_forwprop): Convert from a global struct to + a subclass of gimple_opt_pass along with... + (pass_data_forwprop): ...new pass_data instance and... + (make_pass_forwprop): ...new function. + * tree-ssa-ifcombine.c (pass_tree_ifcombine): Convert from a global + struct to a subclass of gimple_opt_pass along with... + (pass_data_tree_ifcombine): ...new pass_data instance and... + (make_pass_tree_ifcombine): ...new function. + * tree-ssa-loop-ch.c (pass_ch): Convert from a global struct to a + subclass of gimple_opt_pass along with... + (pass_data_ch): ...new pass_data instance and... + (make_pass_ch): ...new function. + * tree-ssa-loop.c (pass_tree_loop): Convert from a global struct to a + subclass of gimple_opt_pass along with... + (pass_data_tree_loop): ...new pass_data instance and... + (make_pass_tree_loop): ...new function. + (pass_tree_loop_init): Convert from a global struct to a subclass of + gimple_opt_pass along with... + (pass_data_tree_loop_init): ...new pass_data instance and... + (make_pass_tree_loop_init): ...new function. + (pass_lim): Convert from a global struct to a subclass of + gimple_opt_pass along with... + (pass_data_lim): ...new pass_data instance and... + (make_pass_lim): ...new function. + (pass_tree_unswitch): Convert from a global struct to a subclass of + gimple_opt_pass along with... + (pass_data_tree_unswitch): ...new pass_data instance and... + (make_pass_tree_unswitch): ...new function. + (pass_predcom): Convert from a global struct to a subclass of + gimple_opt_pass along with... + (pass_data_predcom): ...new pass_data instance and... + (make_pass_predcom): ...new function. + (pass_vectorize): Convert from a global struct to a subclass of + gimple_opt_pass along with... + (pass_data_vectorize): ...new pass_data instance and... + (make_pass_vectorize): ...new function. + (pass_graphite): Convert from a global struct to a subclass of + gimple_opt_pass along with... + (pass_data_graphite): ...new pass_data instance and... + (make_pass_graphite): ...new function. + (pass_graphite_transforms): Convert from a global struct to a subclass + of gimple_opt_pass along with... + (pass_data_graphite_transforms): ...new pass_data instance and... + (make_pass_graphite_transforms): ...new function. + (pass_check_data_deps): Convert from a global struct to a subclass of + gimple_opt_pass along with... + (pass_data_check_data_deps): ...new pass_data instance and... + (make_pass_check_data_deps): ...new function. + (pass_iv_canon): Convert from a global struct to a subclass of + gimple_opt_pass along with... + (pass_data_iv_canon): ...new pass_data instance and... + (make_pass_iv_canon): ...new function. + (pass_scev_cprop): Convert from a global struct to a subclass of + gimple_opt_pass along with... + (pass_data_scev_cprop): ...new pass_data instance and... + (make_pass_scev_cprop): ...new function. + (pass_record_bounds): Convert from a global struct to a subclass of + gimple_opt_pass along with... + (pass_data_record_bounds): ...new pass_data instance and... + (make_pass_record_bounds): ...new function. + (pass_complete_unroll): Convert from a global struct to a subclass of + gimple_opt_pass along with... + (pass_data_complete_unroll): ...new pass_data instance and... + (make_pass_complete_unroll): ...new function. + (pass_complete_unrolli): Convert from a global struct to a subclass of + gimple_opt_pass along with... + (pass_data_complete_unrolli): ...new pass_data instance and... + (make_pass_complete_unrolli): ...new function. + (pass_parallelize_loops): Convert from a global struct to a subclass + of gimple_opt_pass along with... + (pass_data_parallelize_loops): ...new pass_data instance and... + (make_pass_parallelize_loops): ...new function. + (pass_loop_prefetch): Convert from a global struct to a subclass of + gimple_opt_pass along with... + (pass_data_loop_prefetch): ...new pass_data instance and... + (make_pass_loop_prefetch): ...new function. + (pass_iv_optimize): Convert from a global struct to a subclass of + gimple_opt_pass along with... + (pass_data_iv_optimize): ...new pass_data instance and... + (make_pass_iv_optimize): ...new function. + (pass_tree_loop_done): Convert from a global struct to a subclass of + gimple_opt_pass along with... + (pass_data_tree_loop_done): ...new pass_data instance and... + (make_pass_tree_loop_done): ...new function. + * tree-ssa-math-opts.c (pass_cse_reciprocals): Convert from a global + struct to a subclass of gimple_opt_pass along with... + (pass_data_cse_reciprocals): ...new pass_data instance and... + (make_pass_cse_reciprocals): ...new function. + (pass_cse_sincos): Convert from a global struct to a subclass of + gimple_opt_pass along with... + (pass_data_cse_sincos): ...new pass_data instance and... + (make_pass_cse_sincos): ...new function. + (pass_optimize_bswap): Convert from a global struct to a subclass of + gimple_opt_pass along with... + (pass_data_optimize_bswap): ...new pass_data instance and... + (make_pass_optimize_bswap): ...new function. + (pass_optimize_widening_mul): Convert from a global struct to a + subclass of gimple_opt_pass along with... + (pass_data_optimize_widening_mul): ...new pass_data instance and... + (make_pass_optimize_widening_mul): ...new function. + * tree-ssa-phiopt.c (pass_phiopt): Convert from a global struct to a + subclass of gimple_opt_pass along with... + (pass_data_phiopt): ...new pass_data instance and... + (make_pass_phiopt): ...new function. + (pass_cselim): Convert from a global struct to a subclass of + gimple_opt_pass along with... + (pass_data_cselim): ...new pass_data instance and... + (make_pass_cselim): ...new function. + * tree-ssa-phiprop.c (pass_phiprop): Convert from a global struct to a + subclass of gimple_opt_pass along with... + (pass_data_phiprop): ...new pass_data instance and... + (make_pass_phiprop): ...new function. + * tree-ssa-pre.c (pass_pre): Convert from a global struct to a + subclass of gimple_opt_pass along with... + (pass_data_pre): ...new pass_data instance and... + (make_pass_pre): ...new function. + (pass_fre): Convert from a global struct to a subclass of + gimple_opt_pass along with... + (pass_data_fre): ...new pass_data instance and... + (make_pass_fre): ...new function. + * tree-ssa-reassoc.c (pass_reassoc): Convert from a global struct to a + subclass of gimple_opt_pass along with... + (pass_data_reassoc): ...new pass_data instance and... + (make_pass_reassoc): ...new function. + * tree-ssa-sink.c (pass_sink_code): Convert from a global struct to a + subclass of gimple_opt_pass along with... + (pass_data_sink_code): ...new pass_data instance and... + (make_pass_sink_code): ...new function. + * tree-ssa-strlen.c (pass_strlen): Convert from a global struct to a + subclass of gimple_opt_pass along with... + (pass_data_strlen): ...new pass_data instance and... + (make_pass_strlen): ...new function. + * tree-ssa-structalias.c (pass_build_alias): Convert from a global + struct to a subclass of gimple_opt_pass along with... + (pass_data_build_alias): ...new pass_data instance and... + (make_pass_build_alias): ...new function. + (pass_build_ealias): Convert from a global struct to a subclass of + gimple_opt_pass along with... + (pass_data_build_ealias): ...new pass_data instance and... + (make_pass_build_ealias): ...new function. + (pass_ipa_pta): Convert from a global struct to a subclass of + simple_ipa_opt_pass along with... + (pass_data_ipa_pta): ...new pass_data instance and... + (make_pass_ipa_pta): ...new function. + * tree-ssa-uncprop.c (pass_uncprop): Convert from a global struct to a + subclass of gimple_opt_pass along with... + (pass_data_uncprop): ...new pass_data instance and... + (make_pass_uncprop): ...new function. + * tree-ssa-uninit.c (pass_late_warn_uninitialized): Convert from a + global struct to a subclass of gimple_opt_pass along with... + (pass_data_late_warn_uninitialized): ...new pass_data instance and... + (make_pass_late_warn_uninitialized): ...new function. + * tree-ssa.c (pass_init_datastructures): Convert from a global struct + to a subclass of gimple_opt_pass along with... + (pass_data_init_datastructures): ...new pass_data instance and... + (make_pass_init_datastructures): ...new function. + (pass_early_warn_uninitialized): Convert from a global struct to a + subclass of gimple_opt_pass along with... + (pass_data_early_warn_uninitialized): ...new pass_data instance and... + (make_pass_early_warn_uninitialized): ...new function. + (pass_update_address_taken): Convert from a global struct to a + subclass of gimple_opt_pass along with... + (pass_data_update_address_taken): ...new pass_data instance and... + (make_pass_update_address_taken): ...new function. + * tree-ssanames.c (pass_release_ssa_names): Convert from a global + struct to a subclass of gimple_opt_pass along with... + (pass_data_release_ssa_names): ...new pass_data instance and... + (make_pass_release_ssa_names): ...new function. + * tree-stdarg.c (pass_stdarg): Convert from a global struct to a + subclass of gimple_opt_pass along with... + (pass_data_stdarg): ...new pass_data instance and... + (make_pass_stdarg): ...new function. + * tree-switch-conversion.c (pass_convert_switch): Convert from a + global struct to a subclass of gimple_opt_pass along with... + (pass_data_convert_switch): ...new pass_data instance and... + (make_pass_convert_switch): ...new function. + * tree-tailcall.c (pass_tail_recursion): Convert from a global struct + to a subclass of gimple_opt_pass along with... + (pass_data_tail_recursion): ...new pass_data instance and... + (make_pass_tail_recursion): ...new function. + (pass_tail_calls): Convert from a global struct to a subclass of + gimple_opt_pass along with... + (pass_data_tail_calls): ...new pass_data instance and... + (make_pass_tail_calls): ...new function. + * tree-vect-generic.c (pass_lower_vector): Convert from a global + struct to a subclass of gimple_opt_pass along with... + (pass_data_lower_vector): ...new pass_data instance and... + (make_pass_lower_vector): ...new function. + (pass_lower_vector_ssa): Convert from a global struct to a subclass of + gimple_opt_pass along with... + (pass_data_lower_vector_ssa): ...new pass_data instance and... + (make_pass_lower_vector_ssa): ...new function. + * tree-vectorizer.c (pass_slp_vectorize): Convert from a global struct + to a subclass of gimple_opt_pass along with... + (pass_data_slp_vectorize): ...new pass_data instance and... + (make_pass_slp_vectorize): ...new function. + (pass_ipa_increase_alignment): Convert from a global struct to a + subclass of simple_ipa_opt_pass along with... + (pass_data_ipa_increase_alignment): ...new pass_data instance and... + (make_pass_ipa_increase_alignment): ...new function. + * tree-vrp.c (pass_vrp): Convert from a global struct to a subclass of + gimple_opt_pass along with... + (pass_data_vrp): ...new pass_data instance and... + (make_pass_vrp): ...new function. + * tree.c (pass_ipa_free_lang_data): Convert from a global struct to a + subclass of simple_ipa_opt_pass along with... + (pass_data_ipa_free_lang_data): ...new pass_data instance and... + (make_pass_ipa_free_lang_data): ...new function. + * tsan.c (pass_tsan): Convert from a global struct to a subclass of + gimple_opt_pass along with... + (pass_data_tsan): ...new pass_data instance and... + (make_pass_tsan): ...new function. + (pass_tsan_O0): Convert from a global struct to a subclass of + gimple_opt_pass along with... + (pass_data_tsan_O0): ...new pass_data instance and... + (make_pass_tsan_O0): ...new function. + * var-tracking.c (pass_variable_tracking): Convert from a global + struct to a subclass of rtl_opt_pass along with... + (pass_data_variable_tracking): ...new pass_data instance and... + (make_pass_variable_tracking): ...new function. + * web.c (pass_web): Convert from a global struct to a subclass of + rtl_opt_pass along with... + (pass_data_web): ...new pass_data instance and... + (make_pass_web): ...new function. + * config/epiphany/epiphany.h (pass_mode_switch_use): Replace + declaration with that of... + (make_pass_mode_switch_use): ...new function. + (pass_resolve_sw_modes): Replace declaration with that of... + (make_pass_resolve_sw_modes): ...new function. + * config/epiphany/mode-switch-use.c (pass_mode_switch_use): Convert + from a global struct to a subclass of rtl_opt_pass along with... + (pass_data_mode_switch_use): ...new pass_data instance and... + (make_pass_mode_switch_use): ...new function. + * config/epiphany/resolve-sw-modes.c (pass_resolve_sw_modes): Convert + from a global struct to a subclass of rtl_opt_pass along with... + (pass_data_resolve_sw_modes): ...new pass_data instance and... + (make_pass_resolve_sw_modes): ...new function. + * config/i386/i386.c (pass_insert_vzeroupper): Convert from a global + struct to a subclass of rtl_opt_pass along with... + (pass_data_insert_vzeroupper): ...new pass_data instance and... + (make_pass_insert_vzeroupper): ...new function. + * config/sparc/sparc.c (pass_work_around_errata): Convert from a + global struct to a subclass of rtl_opt_pass along with... + (pass_data_work_around_errata): ...new pass_data instance and... + (make_pass_work_around_errata): ...new function. + * config/mips/mips.c (pass_mips_machine_reorg2): Convert from a global + struct to a subclass of rtl_opt_pass along with... + (pass_data_mips_machine_reorg2): ...new pass_data instance and... + (make_pass_mips_machine_reorg2): ...new function. + +2013-08-05 David Malcolm + + * passes.c (pass_manager::operator new): New. + +2013-08-05 David Malcolm + + Handwritten part of conversion of passes to C++ classes. + + * Makefile.in (PASS_MANAGER_H): Add dep on pass-instances.def. + (toplev.o): Add dep on PASS_MANAGER_H. + * cgraphunit.c (cgraph_process_new_functions): Rework invocation + of early local pases to reflect this moving from a global to a + member of gcc::pass_manager. + (cgraph_add_new_function): Likewise. + * lto-cgraph.c (lto_output_node): Update for conversion of + struct ipa_opt_pass_d to a C++ subclass of opt_pass. + * passes.c (opt_pass::clone): New. + (opt_pass::gate): New. + (opt_pass::execute): New. + (opt_pass::opt_pass): New. + (pass_manager::execute_early_local_passes): New. + (pass_manager::execute_pass_mode_switching): new. + (finish_optimization_passes): Convert to... + (pass_manager::finish_optimization_passes): ...this. + (finish_optimization_passes): Update for conversion of passes to + C++ classes. + (register_dump_files_1): Use has_gate since we cannot portably + check a vtable entry against NULL. + (dump_one_pass): Likewise. + (ipa_write_summaries_2): Likewise. + (ipa_write_optimization_summaries_1): Likewise. + (ipa_read_summaries_1): Likewise. + (ipa_read_optimization_summaries_1): Likewise. + (execute_ipa_stmt_fixups): Likewise. + (pass_manager::pass_manager): Rewrite pass-creation, invoking + pass-creation functions rather than wiring up globals, and + storing the results in fields of pass_manager generated using + pass-instances.def. + (pass_manager::dump_profile_report): Update for conversion of + passes to C++ classes. + (pass_manager::execute_ipa_summary_passes): Likewise. + (execute_one_ipa_transform_pass): Likewise. + (execute_one_pass): Use has_gate and has_execute since we cannot + portably check a vtable entry against NULL. + * pass_manager.h (pass_manager::finish_optimization_passes): New. + (pass_manager): Use pass-instances.def to add fields for the + various pass instances. + * toplev.c (finalize): Update for move of + finish_optimization_passes to a method of gcc::pass_manager. + * toplev.h (finish_optimization_passes): Move to method of class + pass_manager. + * tree-pass.h (struct pass_data): New. + (opt_pass): Convert to C++ class, make it a subclass of pass_data. + (opt_pass::gate): Convert to virtual function. + (opt_pass::~opt_pass): New. + (opt_pass::clone): New. + (opt_pass::execute): Convert to virtual function. + (opt_pass::opt_pass): New. + (opt_pass::ctxt_): new. + (gimple_opt_pass): Convert to subclass of opt_pass. + (gimple_opt_pass::gimple_opt_pass): New. + (rtl_opt_pass): Convert to subclass of opt_pass. + (rtl_opt_pass::rtl_opt_pass): New. + (ipa_opt_pass_d): Convert to subclass of opt_pass. + (ipa_opt_pass_d::ipa_opt_pass_d): New. + (simple_ipa_opt_pass): Convert to subclass of opt_pass. + (simple_ipa_opt_pass::simple_ipa_opt_pass): New. + * config/i386/i386.c (rest_of_handle_insert_vzeroupper): Rework + invocation of pass_mode_switching to reflect this moving from a + global to a member of gcc::pass_manager. + (ix86_option_override): Rework how pass_insert_vzeroupper is + added to the pass_manager to reflect autogenerated changes. + * config/i386/t-i386 (i386.o) Add deps on CONTEXT_H and PASS_MANAGER_H. + +2013-08-05 Richard Earnshaw + + PR rtl-optimization/57708 + * recog.c (peep2_find_free_register): Validate all regs in a + multi-reg mode. + +2013-08-05 Jan Hubicka + + PR lto/57602 + * cgraph.c (verify_cgraph_node): Accept local flags from other + partitions. + * ipa.c (symtab_remove_unreachable_nodes): Do not clear local flag. + (function_and_variable_visibility): Likewise. + * trans-mem.c (ipa_tm_create_version): TM versions are not local. + +2013-08-05 Gabriel Dos Reis + + * graph.c (init_graph_slim_pretty_print): Remove. + (print_graph_cfg): Do not call it. Use local pretty printer. + (start_graph_dump): Likewise. + +2013-08-05 Gabriel Dos Reis + + * gimple-pretty-print.c (buffer): Remove. + (initialized): Likewise. + (maybe_init_pretty_print): Likewise. + (print_gimple_stmt): Do not call it. Use non-static local + pretty_printer variable. + (print_gimple_expr): Likewise. + (print_gimple_seq): Likewise. + (gimple_dump_bb): Likewise. + +2013-08-05 Gabriel Dos Reis + + * asan.c (asan_pp): Remove. + (asan_pp_initialized): Likewise. + (asan_pp_initialize): Likewise. + (asan_pp_string): Take a pretty_printer parameter. Adjust callers. + (asan_emit_stack_protection): Tidy. Use local pretty printer. + (asan_add_global): Likewise. + +2013-08-04 Gabriel Dos Reis + + * pretty-print.h (pp_base): Remove. Adjust dependent macros. + * diagnostic.h (diagnostic_flush_buffer): Adjust. + * pretty-print.c (pp_formatted_text_data): Likewise. + (pp_indent): Rename from pp_base_indent. + (pp_format): Rename from pp_base_format. + (pp_output_formatted_text): Rename from pp_base_output_formatted_text. + (pp_format_verbatim): Rename from pp_base_format_verbatim. + (pp_flush): Rename from pp_base_flush. + (pp_set_line_maximum_length): Rename from + pp_base_set_line_maximum_length. + (pp_clear_output_area): Rename from pp_base_clear_output_area. + (pp_set_prefix): Rename from pp_base_set_prefix. + (pp_destroy_prefix): Rename from pp_base_destroy_prefix. + (pp_emit_prefix): Rename from pp_base_emit_prefix. + (pp_append_text): Rename from pp_base_append_text. + (pp_formatted_text): Rename from pp_base_formatted_text. + (pp_last_position_in_text): Rename from pp_base_last_position_in_text. + (pp_remaining_character_count_for_line): Rename from + pp_base_remaining_character_count_for_line. + (pp_newline): Rename from pp_base_newline. + (pp_character): Rename from pp_base_character. + (pp_string): Rename from pp_base_string. + (pp_maybe_space): Rename from pp_base_maybe_space. + * asan.c (asan_pp_string): Adjust. + (asan_emit_stack_protection): Likewise. + (asan_add_global): Likewise. + * sched-vis.c (str_pattern_slim): Adjust pretty printer function call. + * tree-mudflap.c (mf_varname_tree): Likewise. + * tree-pretty-print.c (pp_tree_identifier): Rename from + pp_base_tree_identifier. + * tree-pretty-print.h (pp_tree_identifier): Remove macro definition. + Declare as function. + +2013-08-03 Gabriel Dos Reis + + * pretty-print.h (pp_bar_bar): New. + (pp_ampersand_ampersand): Likewise. + (pp_less_equal): Likewise. + (pp_greater_equal): Likewise. + * gimple-pretty-print.c (dump_ternary_rhs): Use specialized pretty + printer functions instead of pp_string or operators and punctuators. + (dump_gimple_call): Likewise. + (dump_gimple_omp_for): Likewise. + (dump_gimple_transaction): Likewise. + (dump_gimple_phi): Likewise. + (pp_gimple_stmt_1): Likewise. + * sched-vis.c (print_insn): Likewise. + * tree-mudflap.c (mf_varname_tree): Likewise. + * tree-pretty-print.c (dump_block_node): Likewise. + (dump_generic_node): Likewise. + +2013-08-02 Jan Hubicka + + * lto-cgraph.c (compute_ltrans_boundary): Add abstract origins into + boundaries. + * lto-streamer-out.c (tree_is_indexable): Results decls and + parm decls are not indexable. + (DFS_write_tree_body): Do not follow args and results. + (hash_tree): Likewise. + (output_functions): Rearrange so struct function is needed + only when real body is output; be able to also ouptut abstract + functions; output DECL_ARGUMENTS and DECL_RESULT. + (lto_output): When not in WPA, ale store abstract functions. + (write_symbol): Do not care about RESULT_DECL. + (output_symbol_p): Handle correctly sbtract decls. + * lto-streamer-in.c (input_function): Rearrange so struct + function can be NULL at entry; allow streaming of + functions w/o body; store DECL_ARGUMENTS and DECL_RESULT. + * ipa.c (symtab_remove_unreachable_nodes): Silence confused + sanity check during LTO. + * tree-streamer-out.c (write_ts_decl_non_common_tree_pointers): Skip + RESULT_DECl and DECL_ARGUMENTS. + * tree-streamer-in.c (lto_input_ts_decl_non_common_tree_pointers): + Likewise. + +2013-08-03 Gabriel Dos Reis + + * pretty-print.h (pp_underscore): New. + (pp_comma): Tidy. + * gimple-pretty-print.c (dump_unary_rhs): Use specialized pretty + printer functions instead of pp_character. + (dump_binary_rhs): Likewise. + (dump_ternary_rhs): Likewise. + (dump_gimple_call_args): Likewise. + (pp_points_to_solution): Likewise. + (dump_gimple_call): Likewise. + (dump_gimple_switch): Likewise. + (dump_gimple_cond): Likewise. + (dump_gimple_bind): Likewise. + (dump_gimple_try): Likewise. + (dump_gimple_omp_for): Likewise. + (dump_gimple_omp_continue): Likewise. + (dump_gimple_omp_single): Likewise. + (dump_gimple_omp_sections): Likewise. + (dump_gimple_omp_block): Likewise. + (dump_gimple_omp_critical): Likewise. + (dump_gimple_transaction): Likewise. + (dump_gimple_asm): Likewise. + (dump_gimple_phi): Likewise. + (dump_gimple_omp_parallel): Likewise. + (dump_gimple_omp_task): Likewise. + (dump_gimple_omp_atomic_load): Likewise. + (dump_gimple_omp_atomic_store): Likewise. + (dump_gimple_mem_ops): Likewise. + (pp_gimple_stmt_1): Likewise. + (pp_cfg_jump): Likewise. + (dump_implicit_edges): Likewise. + (gimple_dump_bb_for_graph): Likewise. + * graph.c (draw_cfg_node): Likewise. + * langhooks.c (lhd_print_error_function): Likewise. + * sched-vis.c (print_exp): Likewise. + (print_value): Likewise. + (print_pattern): Likewise. + (print_insn): Likewise. + (rtl_dump_bb_for_graph): Likewise. + * tree-pretty-print.c (dump_function_declaration): Likewise. + (dump_array_domain): Likewise. + (dump_omp_clause): Likewise. + (dump_location): Likewise. + (dump_generic_node): Likewise. + (print_struct_decl): Likewise. + * diagnostic.c (diagnostic_show_locus): Use pp_space. + +2013-08-03 Bill Schmidt + + * gimple-ssa-strength-reduction.c (replace_mult_candidate): Update + candidate table when replacing a candidate statement. + (replace_rhs_if_not_dup): Likewise. + (replace_one_candidate): Likewise. + +2013-08-02 Jan Hubicka + Martin Liska + + * cgraphunit.c (add_new_function): Fix logic when adding from + late IPA pass. + (assemble_thunk): Rename to ... + (expand_thunk); .. this one; export; get it working with + general functions; make produced gimple valid. + * cgraph.h (expand_thunk): Declare. + +2013-08-02 Jan Hubicka + + * ipa-cp.c (gather_context_independent_values): Use + ipa_get_param_move_cost. + (get_replacement_map): Remove PARAM; move parameter folding + into tree-inline.c + (create_specialized_node): Update. + * ipa-prop.c (ipa_populate_param_decls): Do not look for origins; + assert that we have gimple body; update move_cost. + (count_formal_params): Assert that we have gimple body. + (ipa_dump_param): New function. + (ipa_alloc_node_params): Break out from ... + (ipa_initialize_node_params): ... here. + (ipa_get_vector_of_formal_parms): ICE when used in WPA. + (ipa_write_node_info): Stream move costs. + (ipa_read_node_info): Read move costs. + (ipa_update_after_lto_read): Do not recompute node params. + * ipa-prop.h (ipa_param_descriptor): Add move_cost. + (ipa_get_param): Check we are not in WPA. + (ipa_get_param_move_cost): New. + * tree-inline.c (tree_function_versioning): Fold replacement as needed. + * ipa-inline-analysis.c (inline_node_duplication_hook): Expect only + parm numbers to be present. + +2013-08-02 Vladimir Makarov + + PR rtl-optimization/58048 + * lra-constraints.c (process_alt_operands): Don't check asm + operand on register. + +2013-08-02 Eric Botcazou + + * config/sparc/sparc.c (sparc_emit_membar_for_model) : Add + the implied StoreLoad barrier for atomic operations if before. + +2013-08-02 Jan Hubicka + Martin Liska + + * cgraph.c (cgraph_function_body_availability): Do not check + cgraph flags. + * cgraph.h (symtab_for_node_and_aliases, symtab_nonoverwritable_alias, + symtab_node_availability): Declare. + * ipa.c (can_replace_by_local_alias): New. + (function_and_variable_visibility): Use it. + * symtab.c (symtab_for_node_and_aliases, + symtab_nonoverwritable_alias_1, symtab_nonoverwritable_alias): New. + +2013-08-02 Vladimir Makarov + + PR rtl-optimization/57963 + * lra-constraints.c (reverse_equiv_p, contains_reloaded_insn_p): New. + (lra_constraints): Use them. + +2013-08-02 Sofiane Naci + + * config/arm/types.md (define_attr "type"): Add "load_acq" + and "store_rel". + * config/arm/cortex-a53.md (cortex_a53_load1): Update for attribute + changes. + (cortex_a53_store1): Likewise. + +2013-08-01 Jan Hubicka + + * ipa.c (symtab_remove_unreachable_nodes): Nodes in other + partitions are not needed. + +2013-08-01 Uros Bizjak + + * config/i386/i386.h (MAYBE_NON_Q_CLASS_P): New. + * config/i386/i386.c (ix86_secondary_reload): Use INTEGER_CLASS_P and + MAYBE_NON_Q_CLASS_P where appropriate. + +2013-08-01 Jan Hubicka + + * cgraph.h (release_function_body): Declare. + * tree.c (free_lang_data_in_decl): Free, parameters and return values + of unused delcarations. + +2013-08-01 Kyrylo Tkachov + + * config/arm/arm.md (minmax_arithsi_non_canon): Emit canonical + RTL form when subtracting a constant. + +2013-08-01 Kyrylo Tkachov + + * config/arm/arm.md (peepholes for eq (reg1) (reg2/imm)): + Generate canonical plus rtx with negated immediate instead of minus + where appropriate. + * config/arm/arm.c (thumb2_reorg): Handle ADCS , case. + +2013-08-01 Jan Hubicka + + * cgraph.c (cgraph_release_function_body): Use used_as_abstract_origin. + (cgraph_release_function_body): Likewise. + (cgraph_can_remove_if_no_direct_calls_p): Likewise. + * cgraph.h (cgrpah_node): Rename abstract_and_needed + to used_as_abstract_origin. + * tree-inline-transfrom.c (can_remove_node_now_p_1): Do not remove + symbols used as abstract origins. + * cgraphunit.c (analyze_functions): Update. + * ipa.c (symtab_remove_unreachable_nodes): Recompute + used_as_abstract_origin. + * tree-inline.c (tree_function_versioning): Update + used_as_abstract_origin; be ready for DECL_RESULT and + DECL_ARGUMENTS to be NULL. + + * lto-symtab.c (lto_symtab_merge_symbols): Merge duplicated nodes + for abstract functions. + * cgraph.h (symtab_real_symbol_p): Abstract declarations are not + real symbols. + +2013-08-01 Jan Hubicka + + * profile.c (compute_value_histograms): Fix thinko. + +2013-08-01 Sofiane Naci + + * config.gcc (aarch64*-*-*): Add aarch-common.o to extra_objs. Add + aarch-common-protos.h to extra_headers. + (aarch64*-*-*): Add arm/aarch-common-protos.h to tm_p_file. + * config/aarch64/aarch64.md: Include "../arm/cortex-a53.md". + * config/aarch64/t-aarch64 (aarch-common.o): Define. + +2013-08-01 Sofiane Naci + + * config/aarch64/aarch64.md (define_attr "type"): Delete. + Include "../arm/types.md". Define "type" attribute for all patterns. + * config/aarch64/aarch64-simd.md (move_lo_quad_): Update for + attribute changes. + +2013-07-31 Michael Meissner + + * config/rs6000/predicates.md (fusion_gpr_addis): New predicates + to support power8 load fusion. + (fusion_gpr_mem_load): Likewise. + + * config/rs6000/rs6000-modes.def (PTImode): Update a comment. + + * config/rs6000/rs6000-protos.h (fusion_gpr_load_p): New + declarations for power8 load fusion. + (emit_fusion_gpr_load): Likewise. + + * config/rs6000/rs6000.c (rs6000_option_override_internal): If + tuning for power8, turn on fusion mode by default. Turn on sign + extending fusion mode if normal fusion mode is on, and we are at + -O2 or -O3. + (fusion_gpr_load_p): New function, return true if we can fuse an + addis instruction with a dependent load to a GPR. + (emit_fusion_gpr_load): Emit the instructions for power8 load + fusion to GPRs. + + * config/rs6000/vsx.md (VSX_M2): New iterator for fusion peepholes. + (VSX load fusion peepholes): New peepholes to fuse together an + addi instruction with a VSX load instruction. + + * config/rs6000/rs6000.md (GPR load fusion peepholes): New + peepholes to fuse an addis instruction with a load to a GPR base + register. If we are supporting sign extending fusions, convert + sign extending loads to zero extending loads and add an explicit + sign extension. + +2013-07-31 Sofiane Naci + + * config.gcc (arm*-*-*): Add aarch-common.o to extra_objs. Add + aarch-common-protos.h to extra_headers. + (arm*-*-*): Add arm/aarch-common-protos.h to tm_p_file. + * config/arm/arm.c (arm_early_load_addr_dep): Move from here to ... + (arm_early_store_addr_dep): Likewise. + (arm_no_early_alu_shift_dep): Likewise. + (arm_no_early_alu_shift_value_dep): Likewise. + (arm_no_early_mul_dep): Likewise. + (arm_no_early_store_addr_dep): Likewise. + (arm_mac_accumulator_is_mul_result): Likewise. + (arm_mac_accumulator_is_result): Likewise. + * config/arm/aarch-common.c: ... here. New file. + * config/arm/arm-protos.h (arm_early_load_addr_dep): Move from + here to ... + (arm_early_store_addr_dep): Likewise. + (arm_no_early_alu_shift_dep): Likewise. + (arm_no_early_alu_shift_value_dep): Likewise. + (arm_no_early_mul_dep): Likewise. + (arm_no_early_store_addr_dep): Likewise. + (arm_mac_accumulator_is_mul_result): Likewise. + (arm_mac_accumulator_is_result): Likewise. + * config/arm/aarch-common-protos.h: ... here. New file. + * config/arm/t-arm (aarch-common.o): Define. + +2013-07-31 Sofiane Naci + + * config/arm/arm.md: Include new file "types.md". + (define_attr "type"): Move from here to ... + (define_attr "mul32"): Likewise. + (define_attr "mul64"): Likewise. + * config/arm/types.md: ... here. New file. + +2013-07-31 Sebastian Huber + + * config.gcc (*-*-rtems*): Use __cxa_atexit by default. + * config/rs6000/rtems.h (TARGET_LIBGCC_SDATA_SECTION): Define. + +2013-07-31 Jan-Benedict Glaw + + * gen-pass-instances.awk: Fix offset of substr(). + +2013-07-31 David Malcolm + + * Makefile.in (pass-instances.def): New. + (passes.o): Replace dependency on passes.def with one on + pass-instances.def + + * gen-pass-instances.awk: New. + + * passes.c (pass_manager::pass_manager): Use pass-instances.def + rather than passes.def, updating local definition of NEXT_PASS + macro to add an extra NUM parameter (currently unused). + +2013-07-30 David Malcolm + + * Makefile.in (PASS_MANAGER_H): New. + (lto-cgraph.o): Depend on CONTEXT_H and PASS_MANAGER_H. + (passes.o): Likewise. + (statistics.o): Likewise. + (cgraphunit.o): Likewise. + (context.o): Depend on PASS_MANAGER_H. + + * pass_manager.h: New. + + * cgraphunit.c (cgraph_add_new_function): Update for moves + of globals to fields of pass_manager. + (analyze_function): Likewise. + (expand_function): Likewise. + (ipa_passes): Likewise. + (compile): Likewise. + + * context.c (context::context): New. + * context.h (context::context): New. + (context::get_passes): New. + (context::passes_): New. + + * lto-cgraph.c (input_node): Update for moves of globals to + fields of pass_manager. + + * passes.c (all_passes): Remove, in favor of a field of the + same name within the new class pass_manager. + (all_small_ipa_passes): Likewise. + (all_lowering_passes): Likewise. + (all_regular_ipa_passes): Likewise. + (all_late_ipa_passes): Likewise. + (all_lto_gen_passes): Likewise. + (passes_by_id): Likewise. + (passes_by_id_size): Likewise. + (gcc_pass_lists): Remove, in favor of "pass_lists" field within + the new class pass_manager. + (set_pass_for_id): Convert to... + (pass_manager::set_pass_for_id): ...method. + (get_pass_for_id): Convert to... + (pass_manager::get_pass_for_id): ...method. + (register_one_dump_file): Move body of implementation into... + (pass_manager::register_one_dump_file): ...here. + (register_dump_files_1): Convert to... + (pass_manager::register_dump_files_1): ...method. + (register_dump_files): Convert to... + (pass_manager::register_dump_files): ...method. + (create_pass_tab): Update for moves of globals to fields of + pass_manager. + (dump_passes): Move body of implementation into... + (pass_manager::dump_passes): ...here. + (register_pass): Move body of implementation into... + (pass_manager::register_pass): ...here. + (init_optimization_passes): Convert into... + (pass_manager::pass_manager): ...constructor for new + pass_manager class, and initialize the pass_lists array. + (check_profile_consistency): Update for moves of globals to + fields of pass_manager. + (dump_profile_report): Move body of implementation into... + (pass_manager::dump_profile_report): ...here. + (ipa_write_summaries_1): Update for moves of pass lists from + being globals to fields of pass_manager. + (ipa_write_optimization_summaries): Likewise. + (ipa_read_summaries): Likewise. + (ipa_read_optimization_summaries): Likewise. + (execute_all_ipa_stmt_fixups): Likewise. + + * statistics.c (statistics_fini): Update for moves of globals to + fields of pass_manager. + + * toplev.c (general_init): Replace call to + init_optimization_passes with construction of the pass_manager + instance. + + * tree-pass.h (all_passes): Remove, in favor of a field of the + same name within the new class pass_manager. + (all_small_ipa_passes): Likewise. + (all_lowering_passes): Likewise. + (all_regular_ipa_passes): Likewise. + (all_lto_gen_passes): Likewise. + (all_late_ipa_passes): Likewise. + (passes_by_id): Likewise. + (passes_by_id_size): Likewise. + (gcc_pass_lists): Remove, in favor of "pass_lists" field within + the new class pass_manager. + (get_pass_for_id): Remove. + +2013-07-30 Richard Earnshaw + + * config.gcc (arm): Require 64-bit host-wide-int for all ARM target + configs. + +2013-07-30 Richard Earnshaw + + * arm.md (mulhi3): New expand pattern. + +2013-07-30 Jan Hubicka + Martin Liska + + * profile.c (compute_value_histograms): Do not ICE when + there is mismatch only on some counters. + +2013-07-30 Zhenqiang Chen + + PR rtl-optimization/57637 + * function.c (move_insn_for_shrink_wrap): Also check the + GEN set of the LIVE problem for the liveness analysis + if it exists, otherwise give up. + +2013-07-29 Bill Schmidt + + PR tree-optimization/57993 + * gimple-ssa-strength-reduction.c (replace_mult_candidate): Record + replaced statement in the candidate table. + (phi_add_costs): Return infinite cost when the hidden basis does + not dominate all phis on which the candidate is dependent. + (replace_one_candidate): Record replaced statement in the + candidate table. + +2013-07-29 Joern Rennecke + + * config/epiphany/epiphany.md (*isub_i+2): New peephole. + (ashlv2si3): New expander. + (*ashlv2si3_i): New define_insn_and_split. + * predicates.md (float_operation): Allow patterns with three + basic sub-patterns. + + PR rtl-optimization/58021 + * mode-switching.c (create_pre_exit): Always split off preceding + insns if we are not at the basic block head. + +2013-07-29 Maciej W. Rozycki + + * config/mips/linux.h (GLIBC_DYNAMIC_LINKER): Handle `-mnan=2008'. + (UCLIBC_DYNAMIC_LINKER): New macro. + * config/mips/linux64.h (GLIBC_DYNAMIC_LINKER32): Handle + `-mnan=2008'. + (GLIBC_DYNAMIC_LINKER64, GLIBC_DYNAMIC_LINKERN32): Likewise. + (UCLIBC_DYNAMIC_LINKER32): Undefine macro first. Handle + `-mnan=2008'. + (UCLIBC_DYNAMIC_LINKER64): Redefine macro. + (UCLIBC_DYNAMIC_LINKERN32): Likewise. + * config/mips/mips-modes.def: Remove RESET_FLOAT_FORMAT calls + for SF and DF modes. Use ieee_quad_format for TF mode. + * config/mips/mips-opts.h (mips_ieee_754_setting): New enum. + * config/mips/mips.c (mips_file_start): Output a `.nan' directive. + (mips_option_override): Handle `-mnan=legacy'. + * config/mips/mips.h (TARGET_CPU_CPP_BUILTINS): Handle + `-mabs=2008' and `-mnan=2008'. + (OPTION_DEFAULT_SPECS): Add "nan" default. + (ASM_SPEC): Handle `-mnan='. + [!HAVE_AS_NAN] (HAVE_AS_NAN): New macro. + * config/mips/mips.md (abs2): Handle `-mabs=2008', update + comment accordingly. + (neg2): Likewise. + * config/mips/mips.opt (mabs, mnan): New options. + * doc/install.texi (Configuration): Document `--with-nan=' option. + * doc/invoke.texi (Option Summary): List MIPS `-mabs=' and + `-mnan=' options. + (MIPS Options): Document them. + * config.gcc : Handle `--with-nan='. + * configure.ac : Check for GAS `-mnan=2008' support. + * configure: Regenerate. + * config.in: Regenerate. + +2013-07-29 Uros Bizjak + + * config/i386/i386.md (float post-reload splitters): Do not check + for subregs of SSE registers. + +2013-07-29 Uros Bizjak + H.J. Lu + + PR target/57954 + PR target/57988 + * config/i386/i386.md (post-reload splitter + to avoid partial SSE reg dependency stalls): New pattern. + +2013-07-29 Dominik Vogt + + * config/s390/s390.md ("movcc"): Swap load and store instructions. + 2013-07-27 Joern Rennecke * config/epiphany/epiphany.c (epiphany_compute_frame_size): @@ -24,10 +5704,10 @@ * config/aarch64/iterators.md: Add attributes rtn and vas. 2013-07-26 Kyrylo Tkachov - Richard Earnshaw + Richard Earnshaw * combine.c (simplify_comparison): Re-canonicalize operands - where appropriate. + where appropriate. * config/arm/arm.md (movcond_addsi): New splitter. 2013-07-25 Sterling Augustine @@ -104,7 +5784,7 @@ PR target/19599 PR target/57731 - PR target/57748 + PR target/57837 * config/arm/arm.md ("*sibcall_insn): Replace use of Ss with US. Adjust output for v5 and v4t. (*sibcall_value_insn): Likewise and loosen predicate on operand0. @@ -117,7 +5797,7 @@ shift_add/shift_sub0/shift_sub1 RTXs. 2013-07-24 Bill Schmidt - Anton Blanchard + Anton Blanchard * config/rs6000/altivec.md (altivec_vpkpx): Handle little endian. (altivec_vpksss): Likewise. @@ -147,7 +5827,7 @@ don't set a return register to need a non-exit mode. 2013-07-24 Bill Schmidt - Anton Blanchard + Anton Blanchard * config/rs6000/vector.md (vec_realign_load_): Reorder input operands to vperm for little endian. @@ -155,7 +5835,7 @@ of lvsl to create the control mask for a vperm for little endian. 2013-07-23 Bill Schmidt - Anton Blanchard + Anton Blanchard * config/rs6000/rs6000.c (altivec_expand_vec_perm_const): Reverse two operands for little-endian. @@ -166,7 +5846,7 @@ (TARGET_CASE_VALUES_THRESHOLD): Define. 2013-07-23 Bill Schmidt - Anton Blanchard + Anton Blanchard * config/rs6000/rs6000.c (altivec_expand_vec_perm_const): Correct selection of field for vector splat in little endian mode. @@ -261,12 +5941,12 @@ (eqv3_internal2): Likewise. (one_cmpl13_internal): Likewise. -2013-07-23 David Holsgrove +2013-07-23 David Holsgrove * config/microblaze/microblaze.c (microblaze_expand_prologue): Rename flag_stack_usage to flag_stack_usage_info. -2013-07-23 David Holsgrove +2013-07-23 David Holsgrove * config/microblaze/sync.md: New file. * config/microblaze/microblaze.md: Include sync.md @@ -403,7 +6083,7 @@ * config/avr/avr.md: Explain asm print modifier 'r' for REG. 2013-07-22 Bill Schmidt - Anton Blanchard + Anton Blanchard * config/rs6000/rs6000.c (rs6000_expand_vector_init): Fix endianness when selecting field to splat. @@ -686,12 +6366,12 @@ (avr_out_round): New function. (avr_adjust_insn_length): Handle ADJUST_LEN_ROUND. -2013-07-18 David Holsgrove +2013-07-18 David Holsgrove * config/microblaze/microblaze.c (microblaze_expand_prologue): Add check for flag_stack_usage to handle -fstack-usage support -2013-07-18 Pat Haugen +2013-07-18 Pat Haugen * config/rs6000/rs6000.c (rs6000_option_override_internal): Adjust flag interaction for new Power8 flags and VSX. @@ -1856,7 +7536,7 @@ * config/i386/i386.c (enum ix86_builtins, bdesc_args): Remove IX86_BUILTIN_CMPNGTSS and IX86_BUILTIN_CMPNGESS. -2013-06-27 Catherine Moore +2013-06-27 Catherine Moore * config/mips/mips-tables.opt: Regenerate. * config/mips/mips-cpus.def: Add m14ke and m14kec. @@ -1969,8 +7649,8 @@ (TARGET_CAN_SPLIT_STACK, TARGET_THREAD_SPLIT_STACK_OFFSET): Undefine. 2013-06-26 Michael Meissner - Pat Haugen - Peter Bergner + Pat Haugen + Peter Bergner * config/rs6000/power8.md: New. * config/rs6000/rs6000-cpus.def (RS6000_CPU table): Adjust processor @@ -2099,7 +7779,7 @@ * common/config/i386/i386-common.c (ix86_handle_option): For OPT_mlzcnt add missing return true. -2013-06-23 Oleg Endo +2013-06-23 Oleg Endo PR target/52483 * config/sh/predicates.md (general_extend_operand): Invoke @@ -2156,7 +7836,7 @@ * doc/extend.texi: Use __atomic_store_n instead of __atomic_store in HLE example. -2013-06-22 Oleg Endo +2013-06-22 Oleg Endo * config/sh/sh.c: Remove workaround. @@ -2179,7 +7859,7 @@ (get_binfo_at_offset): Use it. * tree.h (types_same_for_odr): Declare. -2013-06-20 Oleg Endo +2013-06-20 Oleg Endo Jason Merrill * system.h: Include as well as . @@ -2194,7 +7874,7 @@ * lto-cgraph.c (input_symtab): Do not set cgraph state. -2013-06-20 Joern Rennecke +2013-06-20 Joern Rennecke PR rtl-optimization/57425 PR rtl-optimization/57569 @@ -2672,7 +8352,7 @@ Likewise. Remove default with_tune setting. Move default float setting to its own block. Handle with_llsc in the same block as above. -2013-06-16 Joern Rennecke +2013-06-16 Joern Rennecke PR rtl-optimization/57425 PR rtl-optimization/57569 @@ -2708,7 +8388,7 @@ rs6000_output_move_128bit to handle emitting quad memory operations. Set attribute length to 8 bytes. -2013-06-14 Vidya Praveen +2013-06-14 Vidya Praveen * config/aarch64/aarch64-simd.md (aarch64_mlal_lo): New pattern. @@ -2777,8 +8457,8 @@ * config/rs6000/spe.md (spe_abstf2_cmp, spe_abstf2_tst): Likewise. 2013-06-12 Michael Meissner - Pat Haugen - Peter Bergner + Pat Haugen + Peter Bergner * config/rs6000/rs6000.c (emit_load_locked): Add support for power8 byte, half-word, and quad-word atomic instructions. @@ -2865,7 +8545,7 @@ * lto-symtab.c (lto_symtab_merge_symbols): Likewise. * cgraph.h (cgraph_state): Add CGRAPH_LTO_STREAMING. -2013-06-12 Roland Stigge +2013-06-12 Roland Stigge PR target/57578 * config/rs6000/t-linux (MULTIARCH_DIRNAME): Fix SPE version detection. @@ -2951,6 +8631,11 @@ (symtab_alias_ultimate_target): Simplify. * varpool.c (varpool_create_variable_alias): Set weakref flag. +2013-06-11 Tom de Vries + + * genautomata.c (gen_regexp_sequence): Handle els_num == -1. Handle + sequence_vect == NULL. + 2013-06-11 DJ Delorie * config/rl78/rl78.c (TARGET_UNWIND_WORD_MODE): Define. @@ -3005,8 +8690,8 @@ for hash so that hash table traversal order is deterministic. 2013-06-10 Michael Meissner - Pat Haugen - Peter Bergner + Pat Haugen + Peter Bergner * config/rs6000/vector.md (GPR move splitter): Do not split moves of vectors in GPRS if they are direct moves or quad word load or @@ -3234,8 +8919,8 @@ TARGET_VALID_POINTER_MODE. 2013-06-06 Michael Meissner - Pat Haugen - Peter Bergner + Pat Haugen + Peter Bergner * doc/extend.texi (PowerPC AltiVec/VSX Built-in Functions): Document new power8 builtins. @@ -3986,7 +9671,7 @@ * config/aarch64/aarch64.md (insv): New define_expand. (*insv_reg): New define_insn. -2013-05-30 Joern Rennecke +2013-05-30 Joern Rennecke PR rtl-optimization/57439 * postreload.c (move2add_valid_value_p): Check that we have @@ -4079,8 +9764,8 @@ functions are not yet marked as defined. 2013-05-29 Michael Meissner - Pat Haugen - Peter Bergner + Pat Haugen + Peter Bergner * config/rs6000/vector.md (VEC_I): Add support for new power8 V2DI instructions. @@ -4396,7 +10081,7 @@ (MULTILIB_DIRNAMES): Ditto. (MULTILIB_EXCEPTIONS): Add new exceptions. -2012-05-29 Chris Schlumberger-Socha +2012-05-29 Chris Schlumberger-Socha Marcus Shawcroft * config/aarch64/aarch64-protos.h (aarch64_symbol_type): Define @@ -4409,7 +10094,7 @@ Permit SYMBOL_TINY_ABSOLUTE. * config/aarch64/predicates.md (aarch64_mov_operand): Permit CONST. -2013-05-29 Chris Schlumberger-Socha +2013-05-29 Chris Schlumberger-Socha Marcus Shawcroft * config/aarch64/aarch64.c (aarch64_classify_symbol): Remove comment. @@ -4459,7 +10144,7 @@ * builtin-types.def: Define BT_FN_INT_PTR_PTR_PTR. * cilkplus.def: New file. -2013-05-28 Joern Rennecke +2013-05-28 Joern Rennecke PR rtl-optimization/57439 * postreload.c (move2add_use_add2_insn): Use gen_lowpart_common. @@ -4543,7 +10228,7 @@ (set_ssa_val_to): Compare addresses using get_addr_base_and_unit_offset. -2013-05-27 Joern Rennecke +2013-05-27 Joern Rennecke PR rtl-optimization/56833 * postreload.c (move2add_record_mode): New function. @@ -4709,14 +10394,14 @@ PR debug/57351 * config/arm/arm.c (arm_dwarf_register_span): Do not use dbx number. -2013-05-23 Chris Schlumberger-Socha +2013-05-23 Chris Schlumberger-Socha Marcus Shawcroft * config/aarch64/aarch64.md (*movdi_aarch64): Replace Usa with S. * config/aarch64/constraints.md (Usa): Remove. * doc/md.texi (AArch64 Usa): Remove. -2013-05-23 Chris Schlumberger-Socha +2013-05-23 Chris Schlumberger-Socha Marcus Shawcroft * config/aarch64/aarch64-protos.h (aarch64_mov_operand_p): Define. @@ -4724,7 +10409,7 @@ * config/aarch64/predicates.md (aarch64_const_address): Remove. (aarch64_mov_operand): Use aarch64_mov_operand_p. -2013-05-23 Vidya Praveen +2013-05-23 Vidya Praveen * config/aarch64/aarch64-simd.md (clzv4si2): Support for CLZ instruction (AdvSIMD). @@ -4779,8 +10464,8 @@ (exec_threshold): Ditto. 2013-05-22 Michael Meissner - Pat Haugen - Peter Bergner + Pat Haugen + Peter Bergner * doc/extend.texi (PowerPC AltiVec/VSX Built-in Functions): Add documentation for the power8 crypto builtins. @@ -4832,8 +10517,8 @@ instructions. 2013-05-22 Michael Meissner - Pat Haugen - Peter Bergner + Pat Haugen + Peter Bergner * doc/invoke.texi (Option Summary): Add power8 options. (RS/6000 and PowerPC Options): Likewise. @@ -5593,7 +11278,7 @@ * config/arm/arm.h (EPILOGUE_USES): Only return true for LR_REGNUM after epilogue_completed. -2013-05-14 Joern Rennecke +2013-05-14 Joern Rennecke * config/avr/avr.c (avr_encode_section_info): Bail out if the type is error_mark_node. @@ -6476,7 +12161,7 @@ *vec_concatv2si_sse2 and vec_concatv2si_sse. (vec_concatv2di): Merge with *vec_concatv2di_rex64. -2013-05-03 Joern Rennecke +2013-05-03 Joern Rennecke PR tree-optimization/57027 * tree-ssa-math-opts.c (convert_mult_to_fma): When checking @@ -7843,7 +13528,7 @@ (scev_analysis): Likewise. 2013-04-02 Catherine Moore - Chao-ying Fu + Chao-ying Fu * config/mips/micromips.md (jraddiusp): New pattern. * config/mips/mips.c (mips_expand_epilogue): Use the JRADDIUSP @@ -8758,7 +14443,7 @@ (vect_get_constant_vectors): Handle mixed vect_external_def, vect_constant_def types. -2013-04-10 Joern Rennecke +2013-04-10 Joern Rennecke PR tree-optimization/55524 * tree-ssa-math-opts.c @@ -8766,7 +14451,7 @@ when we don't have an fms operation, but fnma, and it looks likely that we'll be able to use the latter. -2013-04-10 Zhouyi Zhou +2013-04-10 Zhouyi Zhou * cif-code.def (OVERWRITABLE): Correct the comment for overwritable function. @@ -9244,7 +14929,7 @@ * basic-block.h (gcov_working_set_t): Moved to gcov-io.h. * gcov-dump.c (dump_working_sets): New function. -2013-04-03 Kenneth Zadeck +2013-04-03 Kenneth Zadeck * hwint.c (sext_hwi, zext_hwi): New functions. * hwint.h (HOST_BITS_PER_HALF_WIDE_INT, HOST_HALF_WIDE_INT, @@ -10005,21 +15690,21 @@ 2013-03-27 Alexander Ivchenko - * target.def (TARGET_HAS_IFUNC_P): New target hook. - * doc/tm.texi.in (TARGET_HAS_IFUNC_P): New. - * doc/tm.texi: Regenerate. - * targhooks.h (default_has_ifunc_p): New. - * targhooks.c (default_has_ifunc_p): Ditto. - * config/linux-protos.h: New file. - * config/linux-android.h (TARGET_HAS_IFUNC_P): Using version of + * gcc/target.def (TARGET_HAS_IFUNC_P): New target hook. + * gcc/doc/tm.texi.in (TARGET_HAS_IFUNC_P): New. + * gcc/doc/tm.texi: Regenerate. + * gcc/targhooks.h (default_has_ifunc_p): New. + * gcc/targhooks.c (default_has_ifunc_p): Ditto. + * gcc/config/linux-protos.h: New file. + * gcc/config/linux-android.h (TARGET_HAS_IFUNC_P): Using version of this hook for linux which disables support of indirect functions in android. - * config/linux-android.c: New file. - * config/t-linux-android.c: Ditto. - * config.gcc: Added new object file linux-android.o. - * config/i386/i386.c (ix86_get_function_versions_dispatcher): + * gcc/config/linux-android.c: New file. + * gcc/config/t-linux-android.c: Ditto. + * gcc/config.gcc: Added new object file linux-android.o. + * gcc/config/i386/i386.c (ix86_get_function_versions_dispatcher): Using TARGET_HAS_IFUNC hook instead of HAVE_GNU_INDIRECT_FUNCTION. - * varasm.c (do_assemble_alias): Likewise. + * gcc/varasm.c (do_assemble_alias): Likewise. * configure.ac: Define HAVE_GNU_INDIRECT_FUNCTION as zero if the target doesn't support indirect functions. * configure: Regenerate. @@ -10772,7 +16457,7 @@ * params.def (PARAM_IPA_CP_ARRAY_INDEX_HINT_BONUS): New parameter. * ipa-cp.c (hint_time_bonus): Add abonus for known array indices. -2013-03-20 Pat Haugen +2013-03-20 Pat Haugen * config/rs6000/predicates.md (indexed_address, update_address_mem update_indexed_address_mem): New predicates. @@ -11079,10 +16764,10 @@ 2013-03-20 Catherine Moore Maciej W. Rozycki Tom de Vries - Nathan Sidwell + Nathan Sidwell Iain Sandoe Nathan Froyd - Chao-ying Fu + Chao-ying Fu * doc/extend.texi: (micromips, nomicromips, nocompression): Document new function attributes. @@ -11214,7 +16899,7 @@ remap_gimple_op_r. 2013-03-20 Bill Schmidt - Steven Bosscher + Steven Bosscher PR rtl-optimization/56605 * loop-iv.c (implies_p): Handle equal RTXs and subregs. diff --git a/gcc/ChangeLog.MELT b/gcc/ChangeLog.MELT index a7c92c3e315..393ab46a14d 100644 --- a/gcc/ChangeLog.MELT +++ b/gcc/ChangeLog.MELT @@ -1,4 +1,9 @@ +2013-09-09 Basile Starynkevitch + {{When merging trunk GCC 4.9 with C++ passes}} + * melt/xtramelt-ana-base.melt: Add GCC 4.9 specific code, still + incomplete, for classy passes.... Only Gimple passes are yet possible... + 2013-09-06 Basile Starynkevitch * melt/warmelt-macro.melt: Replaced all error_... with diff --git a/gcc/DATESTAMP b/gcc/DATESTAMP index a7af27726c6..6ee74275c1d 100644 --- a/gcc/DATESTAMP +++ b/gcc/DATESTAMP @@ -1 +1 @@ -20130729 +20130909 diff --git a/gcc/Makefile.in b/gcc/Makefile.in index 0b52ea14fd3..ac695aedc7a 100644 --- a/gcc/Makefile.in +++ b/gcc/Makefile.in @@ -872,11 +872,13 @@ BUILTINS_DEF = builtins.def sync-builtins.def omp-builtins.def \ gtm-builtins.def sanitizer.def cilkplus.def INTERNAL_FN_DEF = internal-fn.def INTERNAL_FN_H = internal-fn.h $(INTERNAL_FN_DEF) -TREE_H = coretypes.h tree.h all-tree.def tree.def c-family/c-common.def \ - $(lang_tree_files) $(MACHMODE_H) tree-check.h $(BUILTINS_DEF) \ - $(INPUT_H) statistics.h $(VEC_H) treestruct.def $(HASHTAB_H) \ +TREE_CORE_H = tree-core.h coretypes.h all-tree.def tree.def \ + c-family/c-common.def $(lang_tree_files) $(MACHMODE_H) \ + $(BUILTINS_DEF) $(INPUT_H) statistics.h \ + $(VEC_H) treestruct.def $(HASHTAB_H) \ double-int.h alias.h $(SYMTAB_H) $(FLAGS_H) \ $(REAL_H) $(FIXED_VALUE_H) +TREE_H = tree.h $(TREE_CORE_H) tree-check.h REGSET_H = regset.h $(BITMAP_H) hard-reg-set.h BASIC_BLOCK_H = basic-block.h $(PREDICT_H) $(VEC_H) $(FUNCTION_H) \ cfg-flags.def cfghooks.h @@ -987,6 +989,7 @@ PLUGIN_VERSION_H = plugin-version.h configargs.h LIBFUNCS_H = libfuncs.h $(HASHTAB_H) GRAPHITE_HTAB_H = graphite-htab.h graphite-clast-to-gimple.h $(HASH_TABLE_H) CONTEXT_H = context.h +PASS_MANAGER_H = pass_manager.h pass-instances.def # # Now figure out from those variables how to compile and link. @@ -1154,7 +1157,7 @@ C_COMMON_OBJS = c-family/c-common.o c-family/c-cppbuiltin.o c-family/c-dump.o \ c-family/c-omp.o c-family/c-opts.o c-family/c-pch.o \ c-family/c-ppoutput.o c-family/c-pragma.o c-family/c-pretty-print.o \ c-family/c-semantics.o c-family/c-ada-spec.o tree-mudflap.o \ - c-family/array-notation-common.o + c-family/array-notation-common.o c-family/c-ubsan.o # Language-independent object files. # We put the insn-*.o files first so that a parallel make will build @@ -1276,10 +1279,12 @@ OBJS = \ init-regs.o \ internal-fn.o \ ipa-cp.o \ + ipa-devirt.o \ ipa-split.o \ ipa-inline.o \ ipa-inline-analysis.o \ ipa-inline-transform.o \ + ipa-profile.o \ ipa-prop.o \ ipa-pure-const.o \ ipa-reference.o \ @@ -1317,7 +1322,6 @@ OBJS = \ lto-streamer-out.o \ lto-section-in.o \ lto-section-out.o \ - lto-symtab.o \ lto-opts.o \ lto-compress.o \ mcf.o \ @@ -1383,6 +1387,7 @@ OBJS = \ tree-affine.o \ asan.o \ tsan.o \ + ubsan.o \ tree-call-cdce.o \ tree-cfg.o \ tree-cfgcleanup.o \ @@ -1474,6 +1479,7 @@ OBJS = \ varasm.o \ varpool.o \ vmsdbgout.o \ + vtable-verify.o \ web.o \ xcoffout.o \ $(out_object_file) \ @@ -2036,6 +2042,10 @@ c-family/array-notation-common.o : c-family/array-notation-common.c $(TREE_H) \ c-family/stub-objc.o : c-family/stub-objc.c $(CONFIG_H) $(SYSTEM_H) \ coretypes.h $(TREE_H) $(C_COMMON_H) c-family/c-objc.h +c-family/c-ubsan.o : c-family/c-ubsan.c $(CONFIG_H) $(SYSTEM_H) \ + coretypes.h $(TREE_H) $(C_COMMON_H) c-family/c-ubsan.h \ + alloc-pool.h $(CGRAPH_H) $(GIMPLE_H) $(HASH_TABLE_H) output.h \ + toplev.h ubsan.h default-c.o: config/default-c.c $(CONFIG_H) $(SYSTEM_H) coretypes.h \ $(C_TARGET_H) $(C_TARGET_DEF_H) $(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) \ @@ -2186,7 +2196,7 @@ tree-streamer-in.o: tree-streamer-in.c $(CONFIG_H) $(SYSTEM_H) \ $(DATA_STREAMER_H) $(STREAMER_HOOKS_H) $(LTO_STREAMER_H) tree-streamer-out.o: tree-streamer-out.c $(CONFIG_H) $(SYSTEM_H) \ coretypes.h $(DIAGNOSTIC_H) $(TREE_STREAMER_H) $(DATA_STREAMER_H) \ - $(STREAMER_HOOKS_H) + $(STREAMER_HOOKS_H) $(TM_H) streamer-hooks.o: streamer-hooks.c $(CONFIG_H) $(SYSTEM_H) coretypes.h \ $(STREAMER_HOOKS_H) lto-cgraph.o: lto-cgraph.c $(CONFIG_H) $(SYSTEM_H) coretypes.h \ @@ -2194,7 +2204,8 @@ lto-cgraph.o: lto-cgraph.c $(CONFIG_H) $(SYSTEM_H) coretypes.h \ $(HASHTAB_H) langhooks.h $(BASIC_BLOCK_H) \ $(TREE_FLOW_H) $(CGRAPH_H) $(FUNCTION_H) $(GGC_H) $(DIAGNOSTIC_CORE_H) \ $(EXCEPT_H) $(TIMEVAR_H) pointer-set.h $(LTO_STREAMER_H) \ - $(GCOV_IO_H) $(DATA_STREAMER_H) $(TREE_STREAMER_H) $(TREE_PASS_H) profile.h + $(GCOV_IO_H) $(DATA_STREAMER_H) $(TREE_STREAMER_H) $(TREE_PASS_H) \ + profile.h $(CONTEXT_H) $(PASS_MANAGER_H) lto-streamer-in.o: lto-streamer-in.c $(CONFIG_H) $(SYSTEM_H) coretypes.h \ $(TM_H) toplev.h $(DIAGNOSTIC_CORE_H) $(EXPR_H) $(FLAGS_H) $(PARAMS_H) \ input.h $(HASHTAB_H) $(BASIC_BLOCK_H) $(TREE_FLOW_H) $(TREE_PASS_H) \ @@ -2219,9 +2230,6 @@ lto-section-out.o : lto-section-out.c $(CONFIG_H) $(SYSTEM_H) coretypes.h \ $(CGRAPH_H) $(FUNCTION_H) $(GGC_H) $(EXCEPT_H) pointer-set.h \ $(BITMAP_H) langhooks.h $(LTO_STREAMER_H) lto-compress.h \ $(DATA_STREAMER_H) -lto-symtab.o: lto-symtab.c $(CONFIG_H) $(SYSTEM_H) coretypes.h \ - $(TREE_H) $(GIMPLE_H) $(GGC_H) $(HASHTAB_H) \ - $(LTO_STREAMER_H) $(LINKER_PLUGIN_API_H) lto-opts.o: lto-opts.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TREE_H) \ $(HASHTAB_H) $(GGC_H) $(BITMAP_H) $(FLAGS_H) $(OPTS_H) $(OPTIONS_H) \ $(COMMON_TARGET_H) $(DIAGNOSTIC_H) $(LTO_STREAMER_H) @@ -2272,8 +2280,11 @@ tsan.o : $(CONFIG_H) $(SYSTEM_H) $(TREE_H) $(TREE_INLINE_H) \ $(TM_H) coretypes.h $(TREE_DUMP_H) $(TREE_PASS_H) $(CGRAPH_H) $(GGC_H) \ $(BASIC_BLOCK_H) $(FLAGS_H) $(FUNCTION_H) \ $(TM_P_H) $(TREE_FLOW_H) $(DIAGNOSTIC_CORE_H) $(GIMPLE_H) tree-iterator.h \ - intl.h cfghooks.h output.h options.h c-family/c-common.h tsan.h asan.h \ + intl.h cfghooks.h output.h options.h $(C_COMMON_H) tsan.h asan.h \ tree-ssa-propagate.h +ubsan.o : ubsan.c ubsan.h $(CONFIG_H) $(SYSTEM_H) $(GIMPLE_H) \ + output.h coretypes.h $(TREE_H) $(CGRAPH_H) $(HASHTAB_H) gt-ubsan.h \ + toplev.h $(C_COMMON_H) $(TM_P_H) tree-ssa-tail-merge.o: tree-ssa-tail-merge.c \ $(SYSTEM_H) $(CONFIG_H) coretypes.h $(TM_H) $(BITMAP_H) \ $(FLAGS_H) $(TM_P_H) $(BASIC_BLOCK_H) $(CFGLOOP_H) \ @@ -2432,7 +2443,7 @@ tree-tailcall.o : tree-tailcall.c $(TREE_FLOW_H) $(CONFIG_H) $(SYSTEM_H) \ $(TREE_H) $(TM_P_H) $(FUNCTION_H) $(TM_H) coretypes.h \ $(EXCEPT_H) $(TREE_PASS_H) $(FLAGS_H) langhooks.h \ $(BASIC_BLOCK_H) $(DBGCNT_H) $(GIMPLE_PRETTY_PRINT_H) $(TARGET_H) \ - $(COMMON_TARGET_H) $(CFGLOOP_H) + $(COMMON_TARGET_H) $(CFGLOOP_H) ipa-utils.h tree-ssa-sink.o : tree-ssa-sink.c $(TREE_FLOW_H) $(CONFIG_H) \ $(SYSTEM_H) $(TREE_H) $(DIAGNOSTIC_H) \ $(TM_H) coretypes.h $(TREE_PASS_H) $(FLAGS_H) alloc-pool.h \ @@ -2572,7 +2583,7 @@ omp-low.o : omp-low.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) $(TREE_H) \ $(RTL_H) $(GIMPLE_H) $(TREE_INLINE_H) langhooks.h $(DIAGNOSTIC_CORE_H) \ $(TREE_FLOW_H) $(FLAGS_H) $(EXPR_H) $(DIAGNOSTIC_CORE_H) \ $(TREE_PASS_H) $(GGC_H) $(EXCEPT_H) $(SPLAY_TREE_H) $(OPTABS_H) \ - $(CFGLOOP_H) tree-iterator.h gt-omp-low.h + $(CFGLOOP_H) tree-iterator.h $(TARGET_H) gt-omp-low.h tree-browser.o : tree-browser.c tree-browser.def $(CONFIG_H) $(SYSTEM_H) \ coretypes.h $(HASH_TABLE_H) $(TREE_H) $(TREE_PRETTY_PRINT_H) omega.o : omega.c $(OMEGA_H) $(CONFIG_H) $(SYSTEM_H) coretypes.h $(DUMPFILE_H) \ @@ -2652,7 +2663,13 @@ tree-vect-data-refs.o: tree-vect-data-refs.c $(CONFIG_H) $(SYSTEM_H) \ tree-vectorizer.o: tree-vectorizer.c $(CONFIG_H) $(SYSTEM_H) coretypes.h \ $(DUMPFILE_H) $(TM_H) $(GGC_H) $(TREE_H) $(TREE_FLOW_H) \ $(CFGLOOP_H) $(TREE_PASS_H) $(TREE_VECTORIZER_H) \ - $(TREE_PRETTY_PRINT_H) + $(TREE_PRETTY_PRINT_H) $(DBGCNT_H) +vtable-verify.o: vtable-verify.c vtable-verify.h $(CONFIG_H) \ + $(SYSTEM_H) coretypes.h $(TM_H) $(TREE_H) cp/cp-tree.h $(TM_P_H) \ + $(BASIC_BLOCK_H) output.h $(TREE_FLOW_H) $(TREE_DUMP_H) $(TREE_PASS_H) \ + $(TIMEVAR_H) $(CFGLOOP_H) $(FLAGS_H) $(TREE_INLINE_H) $(SCEV_H) \ + $(DIAGNOSTIC_CORE_H) $(GIMPLE_PRETTY_PRINT_H) toplev.h langhooks.h \ + gt-vtable-verify.h tree-loop-distribution.o: tree-loop-distribution.c $(CONFIG_H) $(SYSTEM_H) \ coretypes.h $(TREE_FLOW_H) $(CFGLOOP_H) $(TREE_DATA_REF_H) $(TREE_PASS_H) tree-parloops.o: tree-parloops.c $(CONFIG_H) $(SYSTEM_H) coretypes.h \ @@ -2836,10 +2853,14 @@ toplev.o : toplev.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) $(TREE_H) \ $(OPTS_H) params.def tree-mudflap.h $(TREE_PASS_H) $(GIMPLE_H) \ tree-ssa-alias.h $(PLUGIN_H) realmpfr.h tree-diagnostic.h \ $(TREE_PRETTY_PRINT_H) opts-diagnostic.h $(COMMON_TARGET_H) \ - tsan.h diagnostic-color.h $(CONTEXT_H) + tsan.h diagnostic-color.h $(CONTEXT_H) $(PASS_MANAGER_H) hwint.o : hwint.c $(CONFIG_H) $(SYSTEM_H) $(DIAGNOSTIC_CORE_H) +pass-instances.def: $(srcdir)/passes.def $(srcdir)/gen-pass-instances.awk + $(AWK) -f $(srcdir)/gen-pass-instances.awk \ + $(srcdir)/passes.def > pass-instances.def + passes.o : passes.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) $(TREE_H) \ $(RTL_H) $(FUNCTION_H) $(FLAGS_H) $(INPUT_H) $(INSN_ATTR_H) output.h \ $(DIAGNOSTIC_CORE_H) debug.h insn-config.h intl.h $(RECOG_H) toplev.h \ @@ -2850,7 +2871,8 @@ passes.o : passes.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) $(TREE_H) \ hosthooks.h $(CGRAPH_H) $(COVERAGE_H) $(TREE_PASS_H) $(TREE_DUMP_H) \ $(GGC_H) $(OPTS_H) $(TREE_FLOW_H) $(TREE_INLINE_H) \ gt-passes.h $(DF_H) $(PREDICT_H) $(LTO_STREAMER_H) \ - $(PLUGIN_H) $(IPA_UTILS_H) passes.def + $(PLUGIN_H) $(IPA_UTILS_H) pass-instances.def \ + $(CONTEXT_H) $(PASS_MANAGER_H) plugin.o : plugin.c $(PLUGIN_H) $(CONFIG_H) $(SYSTEM_H) coretypes.h \ $(HASH_TABLE_H) $(DIAGNOSTIC_CORE_H) $(TREE_H) $(TREE_PASS_H) \ @@ -2891,7 +2913,8 @@ function.o : function.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) $(RTL_ERROR_ $(TREE_PASS_H) $(DF_H) $(PARAMS_H) bb-reorder.h \ $(COMMON_TARGET_H) statistics.o : statistics.c $(CONFIG_H) $(SYSTEM_H) coretypes.h \ - $(TREE_PASS_H) $(TREE_DUMP_H) $(HASH_TABLE_H) statistics.h $(FUNCTION_H) + $(TREE_PASS_H) $(TREE_DUMP_H) $(HASH_TABLE_H) statistics.h \ + $(FUNCTION_H) $(CONTEXT_H) $(PASS_MANAGER_H) stmt.o : stmt.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(DUMPFILE_H) $(TM_H) \ $(RTL_H) \ $(TREE_H) $(FLAGS_H) $(FUNCTION_H) insn-config.h hard-reg-set.h $(EXPR_H) \ @@ -2925,7 +2948,7 @@ builtins.o : builtins.c builtins.h $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) \ hard-reg-set.h $(DIAGNOSTIC_CORE_H) hard-reg-set.h $(EXCEPT_H) \ $(TM_P_H) $(PREDICT_H) $(LIBFUNCS_H) langhooks.h $(BASIC_BLOCK_H) \ tree-mudflap.h realmpfr.h $(BUILTINS_DEF) $(MACHMODE_H) \ - $(DIAGNOSTIC_CORE_H) $(TREE_FLOW_H) value-prof.h + $(DIAGNOSTIC_CORE_H) $(TREE_FLOW_H) value-prof.h ubsan.h calls.o : calls.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) $(RTL_H) \ $(TREE_H) $(FLAGS_H) $(EXPR_H) $(OPTABS_H) langhooks.h $(TARGET_H) \ $(LIBFUNCS_H) $(REGS_H) $(DIAGNOSTIC_CORE_H) output.h \ @@ -3013,7 +3036,8 @@ cgraphunit.o : cgraphunit.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) \ $(FIBHEAP_H) output.h $(PARAMS_H) $(RTL_H) $(IPA_PROP_H) \ gt-cgraphunit.h tree-iterator.h $(COVERAGE_H) $(TREE_DUMP_H) \ $(GIMPLE_PRETTY_PRINT_H) $(IPA_INLINE_H) $(IPA_UTILS_H) $(CFGLOOP_H) \ - $(LTO_STREAMER_H) output.h $(REGSET_H) $(EXCEPT_H) $(GCC_PLUGIN_H) plugin.h + $(LTO_STREAMER_H) output.h $(REGSET_H) $(EXCEPT_H) $(GCC_PLUGIN_H) \ + plugin.h $(CONTEXT_H) $(PASS_MANAGER_H) cgraphclones.o : cgraphclones.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) \ $(TREE_H) langhooks.h $(TREE_INLINE_H) toplev.h $(DIAGNOSTIC_CORE_H) $(FLAGS_H) $(GGC_H) \ $(TARGET_H) $(CGRAPH_H) intl.h pointer-set.h $(FUNCTION_H) $(GIMPLE_H) \ @@ -3032,8 +3056,16 @@ varpool.o : varpool.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) \ $(TREE_FLOW_H) ipa.o : ipa.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) $(CGRAPH_H) \ $(TREE_PASS_H) $(GIMPLE_H) $(TARGET_H) $(GGC_H) pointer-set.h \ - $(IPA_UTILS_H) tree-inline.h $(HASH_TABLE_H) profile.h $(PARAMS_H) \ - $(LTO_STREAMER_H) $(DATA_STREAMER_H) + $(IPA_UTILS_H) tree-inline.h profile.h $(PARAMS_H) +ipa-profile.o : ipa-profile.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) $(CGRAPH_H) \ + $(TREE_PASS_H) $(GIMPLE_H) $(TARGET_H) $(GGC_H) \ + $(IPA_UTILS_H) $(HASH_TABLE_H) profile.h $(PARAMS_H) \ + value-prof.h alloc-pool.h tree-inline.h $(LTO_STREAMER_H) $(DATA_STREAMER_H) \ + ipa-inline.h +ipa-devirt.o : ipa-devirt.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) $(CGRAPH_H) \ + $(GIMPLE_H) $(TARGET_H) $(GGC_H) pointer-set.h \ + $(IPA_UTILS_H) $(HASH_TABLE_H) ipa-inline.h ipa-utils.h $(TREE_PRETTY_PRINT_H) \ + $(DIAGNOSTIC_H) ipa-prop.o : ipa-prop.c $(CONFIG_H) $(SYSTEM_H) coretypes.h \ langhooks.h $(GGC_H) $(TARGET_H) $(CGRAPH_H) $(IPA_PROP_H) $(DIAGNOSTIC_H) \ $(TREE_FLOW_H) $(TM_H) $(TREE_PASS_H) $(FLAGS_H) $(TREE_H) \ @@ -3057,13 +3089,14 @@ ipa-inline.o : ipa-inline.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) \ $(DIAGNOSTIC_H) $(FIBHEAP_H) $(PARAMS_H) $(TREE_PASS_H) \ $(COVERAGE_H) $(GGC_H) $(TREE_FLOW_H) $(RTL_H) $(IPA_PROP_H) \ $(EXCEPT_H) $(GIMPLE_PRETTY_PRINT_H) $(IPA_INLINE_H) $(TARGET_H) \ - $(IPA_UTILS_H) + $(IPA_UTILS_H) sreal.h ipa-inline-analysis.o : ipa-inline-analysis.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) \ $(TREE_H) langhooks.h $(TREE_INLINE_H) $(FLAGS_H) $(CGRAPH_H) intl.h \ $(DIAGNOSTIC_H) $(PARAMS_H) $(TREE_PASS_H) $(CFGLOOP_H) \ $(HASHTAB_H) $(COVERAGE_H) $(GGC_H) $(TREE_FLOW_H) $(IPA_PROP_H) \ $(GIMPLE_PRETTY_PRINT_H) $(IPA_INLINE_H) $(LTO_STREAMER_H) $(DATA_STREAMER_H) \ - $(TREE_STREAMER_H) + $(TREE_STREAMER_H) ipa-utils.h tree-scalar-evolution.h $(CFGLOOP_H) \ + alloc-pool.h ipa-inline-transform.o : ipa-inline-transform.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) \ $(TREE_H) langhooks.h $(TREE_INLINE_H) $(FLAGS_H) $(CGRAPH_H) intl.h \ $(TREE_PASS_H) \ @@ -3176,7 +3209,7 @@ tree-sra.o : tree-sra.c $(CONFIG_H) $(SYSTEM_H) coretypes.h alloc-pool.h \ $(HASH_TABLE_H) $(TM_H) $(TREE_H) $(GIMPLE_H) $(CGRAPH_H) $(TREE_FLOW_H) \ $(IPA_PROP_H) $(DIAGNOSTIC_H) statistics.h \ $(PARAMS_H) $(TARGET_H) $(FLAGS_H) \ - $(DBGCNT_H) $(TREE_INLINE_H) $(GIMPLE_PRETTY_PRINT_H) + $(DBGCNT_H) $(TREE_INLINE_H) $(GIMPLE_PRETTY_PRINT_H) ipa-utils.h tree-switch-conversion.o : tree-switch-conversion.c $(CONFIG_H) $(SYSTEM_H) \ $(TREE_H) $(TM_P_H) $(TREE_FLOW_H) $(DIAGNOSTIC_H) $(TREE_INLINE_H) \ $(TM_H) coretypes.h $(GIMPLE_H) $(CFGLOOP_H) \ @@ -3595,7 +3628,7 @@ $(out_object_file): $(out_file) $(CONFIG_H) coretypes.h $(TM_H) $(TREE_H) \ $(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) \ $(out_file) $(OUTPUT_OPTION) context.o: context.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(GGC_H) \ - $(CONTEXT_H) + $(CONTEXT_H) $(PASS_MANAGER_H) $(common_out_object_file): $(common_out_file) $(CONFIG_H) $(SYSTEM_H) \ coretypes.h $(COMMON_TARGET_H) $(COMMON_TARGET_DEF_H) $(PARAMS_H) \ @@ -3866,14 +3899,15 @@ GTFILES = $(CPP_ID_DATA_H) $(srcdir)/input.h $(srcdir)/coretypes.h \ $(host_xm_file_list) \ $(tm_file_list) $(HASHTAB_H) $(SPLAY_TREE_H) $(srcdir)/bitmap.h \ $(srcdir)/alias.h $(srcdir)/coverage.c $(srcdir)/rtl.h \ - $(srcdir)/optabs.h $(srcdir)/tree.h $(srcdir)/libfuncs.h $(SYMTAB_H) \ + $(srcdir)/optabs.h $(srcdir)/tree.h $(srcdir)/tree-core.h \ + $(srcdir)/libfuncs.h $(SYMTAB_H) \ $(srcdir)/real.h $(srcdir)/function.h $(srcdir)/insn-addr.h $(srcdir)/hwint.h \ $(srcdir)/fixed-value.h \ $(srcdir)/output.h $(srcdir)/cfgloop.h \ $(srcdir)/cselib.h $(srcdir)/basic-block.h $(srcdir)/ipa-ref.h $(srcdir)/cgraph.h \ $(srcdir)/reload.h $(srcdir)/caller-save.c $(srcdir)/symtab.c \ $(srcdir)/alias.c $(srcdir)/bitmap.c $(srcdir)/cselib.c $(srcdir)/cgraph.c \ - $(srcdir)/ipa-prop.c $(srcdir)/ipa-cp.c \ + $(srcdir)/ipa-prop.c $(srcdir)/ipa-cp.c $(srcdir)/ipa-utils.h \ $(srcdir)/dbxout.c \ $(srcdir)/dwarf2out.h \ $(srcdir)/dwarf2asm.c \ @@ -3907,7 +3941,6 @@ GTFILES = $(CPP_ID_DATA_H) $(srcdir)/input.h $(srcdir)/coretypes.h \ $(srcdir)/cgraphclones.c \ $(srcdir)/tree-ssa-propagate.c \ $(srcdir)/tree-phinodes.c \ - $(srcdir)/lto-symtab.c \ $(srcdir)/tree-ssa-alias.h \ $(srcdir)/ipa-prop.h \ $(MELT_RUNTIME_C) \ @@ -3916,8 +3949,10 @@ GTFILES = $(CPP_ID_DATA_H) $(srcdir)/input.h $(srcdir)/coretypes.h \ $(srcdir)/lto-streamer.h \ $(srcdir)/target-globals.h \ $(srcdir)/ipa-inline.h \ + $(srcdir)/vtable-verify.c \ $(srcdir)/asan.c \ - $(srcdir)/tsan.c \ + $(srcdir)/ubsan.c \ + $(srcdir)/tsan.c $(srcdir)/ipa-devirt.c \ @all_gtfiles@ # Compute the list of GT header files from the corresponding C sources, diff --git a/gcc/aclocal.m4 b/gcc/aclocal.m4 index 33b9992b6e8..3fe609788b7 100644 --- a/gcc/aclocal.m4 +++ b/gcc/aclocal.m4 @@ -97,11 +97,6 @@ m4_define([AC_PROG_CC], [m4_fatal([AC_PROG_CC cannot be called after AM_PROG_CC_C_O])]) ]) -m4_include([../libtool.m4]) -m4_include([../ltoptions.m4]) -m4_include([../ltsugar.m4]) -m4_include([../ltversion.m4]) -m4_include([../lt~obsolete.m4]) m4_include([../config/acx.m4]) m4_include([../config/codeset.m4]) m4_include([../config/dfp.m4]) @@ -117,4 +112,9 @@ m4_include([../config/picflag.m4]) m4_include([../config/progtest.m4]) m4_include([../config/stdint.m4]) m4_include([../config/warnings.m4]) +m4_include([../libtool.m4]) +m4_include([../ltoptions.m4]) +m4_include([../ltsugar.m4]) +m4_include([../ltversion.m4]) +m4_include([../lt~obsolete.m4]) m4_include([acinclude.m4]) diff --git a/gcc/ada/ChangeLog b/gcc/ada/ChangeLog index 4e0b0a8e349..61fd991bef2 100644 --- a/gcc/ada/ChangeLog +++ b/gcc/ada/ChangeLog @@ -1,6 +1,80 @@ +2013-09-01 Eric Botcazou + Iain Sandoe + + PR ada/58239 + * gcc-interface/Makefile.in (GCC_LINK_FLAGS): Add -static-libstdc++. + (GCC_LINK): Use CXX instead of CC. + * gcc-interface/Make-lang.in (CXX_LFLAGS): New. + (ADA_TOOLS_FLAGS_TO_PASS): Pass CXX, and CXX_LFLAGS for native. + +2013-08-13 Eric Botcazou + + * gcc-interface/decl.c (gnat_to_gnu_entity): Do not bother about alias + sets of derived types in ASIS mode. + +2013-08-13 Eric Botcazou + + * gcc-interface/decl.c (gnat_to_gnu_entity): Replace True with true. + (is_cplusplus_method): Likewise, and False with false. + (components_need_strict_alignment): Likewise. + * gcc-interface/misc.c (gnat_init_gcc_fp): Likewise. + * gcc-interface/trans.c (Loop_Statement_to_gnu): Likewise. + (Handled_Sequence_Of_Statements_to_gnu): Likewise. + (add_cleanup): Likewise. + (Sloc_to_locus1): Likewise. + (Sloc_to_locus): Likewise. + (set_expr_location_from_node): Likewise. + * gcc-interface/utils.c (potential_alignment_gap): Likewise. + +2013-08-13 Thomas Quinot + + * gcc-interface/trans.c (set_end_locus_from_node): Clear column info + for the end_locus of a block if it does not come from an End_Label. + +2013-08-13 Thomas Quinot + + * gcc-interface/trans.c (Handled_Sequence_Of_Statements_to_gnu): If + there is no End_Label, attach cleanup actions to the sloc of the HSS + node instead. + (Exception_Handler_to_gnu_zcx): Associate cleanup actions with the sloc + of the handler itself. + (add_cleanup): Clear column information in sloc of cleanup actions. + (Sloc_to_locus1): New static function. + (Sloc_to_locus): Call it. + (set_expr_location_from_node1): New static function. + (set_expr_location_from_node): Call it. + +2013-08-13 Eric Botcazou + + * gcc-interface/trans.c (Call_to_gnu): Deal with specific conditional + expressions for misaligned actual parameters. + +2013-08-13 Eric Botcazou + + * gcc-interface/trans.c (can_equal_min_or_max_val_p): Be prepared for + values outside of the range of the type. + +2013-08-13 Eric Botcazou + + * gcc-interface/utils2.c (build_atomic_load): Do a mere view-conversion + to the original type before converting to the result type. + (build_atomic_store): First do a conversion to the original type before + view-converting to the effective type, but deal with a padded type + specially. + +2013-08-08 Eric Botcazou + + * gcc-interface/Makefile.in (TOOLS_LIBS): Pick C object files from the + compiler build and use standard library variables. + (../../vxaddr2line$(exeext): Do not depend on targext.o and adjust. + (gnatmake-re): Do not depend on targext.o. + (gnatlink-re): Do not depend on link.o and targext.o. + (../../gnatmake$(exeext): Likewise. + (../../gnatlink$(exeext): Likewise. + 2013-07-21 Ondřej Bílka - * gcc-interface/gigi.h: Likewise. + * gcc-interface/gigi.h: Fix typos. * gcc-interface/trans.c: Likewise. * gcc-interface/utils2.c: Likewise. * gnat_rm.texi: Likewise. diff --git a/gcc/ada/gcc-interface/Make-lang.in b/gcc/ada/gcc-interface/Make-lang.in index 4fed34fc524..93250da561c 100644 --- a/gcc/ada/gcc-interface/Make-lang.in +++ b/gcc/ada/gcc-interface/Make-lang.in @@ -111,6 +111,12 @@ ada: gnat1$(exeext) gnatbind$(exeext) # Tell GNU Make to ignore these, if they exist. .PHONY: ada +CXX_LFLAGS = \ + -B../../../$(target_noncanonical)/libstdc++-v3/src/.libs \ + -B../../../$(target_noncanonical)/libstdc++-v3/libsupc++/.libs \ + -L../../../$(target_noncanonical)/libstdc++-v3/src/.libs \ + -L../../../$(target_noncanonical)/libstdc++-v3/libsupc++/.libs + # There are too many Ada sources to check against here. Let's # always force the recursive make. ifeq ($(build), $(host)) @@ -119,6 +125,7 @@ ifeq ($(build), $(host)) # tree. ADA_TOOLS_FLAGS_TO_PASS=\ CC="../../xgcc -B../../" \ + CXX="../../xg++ -B../../ $(CXX_LFLAGS)" \ $(COMMON_FLAGS_TO_PASS) $(ADA_FLAGS_TO_PASS) \ ADA_INCLUDES="-I- -I../rts" \ GNATMAKE="../../gnatmake" \ @@ -136,6 +143,7 @@ ifeq ($(build), $(host)) ADA_TOOLS_FLAGS_TO_PASS=\ CC="$(CC)" \ + CXX="$(CXX)" \ $(COMMON_FLAGS_TO_PASS) $(ADA_FLAGS_TO_PASS) \ ADA_INCLUDES="-I$(RTS_DIR)../adainclude -I$(RTS_DIR)" \ GNATMAKE="gnatmake" \ @@ -158,6 +166,7 @@ else # built runtime. ADA_TOOLS_FLAGS_TO_PASS=\ CC="$(CC)" \ + CXX="$(CXX)" \ $(COMMON_FLAGS_TO_PASS) $(ADA_FLAGS_TO_PASS) \ ADA_INCLUDES="-I../rts" \ GNATMAKE="$(GNATMAKE_FOR_HOST)" \ @@ -172,6 +181,7 @@ else endif ADA_TOOLS_FLAGS_TO_PASS=\ CC="$(CC)" \ + CXX="$(CXX)" \ $(COMMON_FLAGS_TO_PASS) $(ADA_FLAGS_TO_PASS) \ ADA_INCLUDES="-I$(RTS_DIR)../adainclude -I$(RTS_DIR)" \ GNATMAKE="$(GNATMAKE_FOR_HOST)" \ diff --git a/gcc/ada/gcc-interface/Makefile.in b/gcc/ada/gcc-interface/Makefile.in index 6aa93c4655a..0c4057c1019 100644 --- a/gcc/ada/gcc-interface/Makefile.in +++ b/gcc/ada/gcc-interface/Makefile.in @@ -198,7 +198,7 @@ RTSDIR = rts$(subst /,_,$(MULTISUBDIR)) # Link flags used to build gnat tools. By default we prefer to statically # link with libgcc to avoid a dependency on shared libgcc (which is tricky # to deal with as it may conflict with the libgcc provided by the system). -GCC_LINK_FLAGS=-static-libgcc +GCC_LINK_FLAGS=-static-libstdc++ -static-libgcc # End of variables for you to override. @@ -250,10 +250,9 @@ LIBS = $(LIBINTL) $(LIBICONV) $(LIBBACKTRACE) $(LIBIBERTY) $(SYSLIBS) LIBDEPS = $(LIBINTL_DEP) $(LIBICONV_DEP) $(LIBBACKTRACE) $(LIBIBERTY) # Default is no TGT_LIB; one might be passed down or something TGT_LIB = -TOOLS_LIBS = targext.o link.o ../../ggc-none.o ../../libcommon-target.a \ +TOOLS_LIBS = ../link.o ../targext.o ../../ggc-none.o ../../libcommon-target.a \ ../../libcommon.a ../../../libcpp/libcpp.a $(LIBGNAT) $(LIBINTL) $(LIBICONV) \ - ../../../libbacktrace/.libs/libbacktrace.a ../../../libiberty/libiberty.a \ - $(SYSLIBS) $(TGT_LIB) + ../$(LIBBACKTRACE) ../$(LIBIBERTY) $(SYSLIBS) $(TGT_LIB) # Convert the target variable into a space separated list of architecture, # manufacturer, and operating system and assign each of those to its own @@ -2276,7 +2275,7 @@ ifeq ($(strip $(filter-out darwin%,$(osys))),) GMEM_LIB = gmemlib LIBRARY_VERSION := $(LIB_VERSION) soext = .dylib - GCC_LINK_FLAGS= + GCC_LINK_FLAGS=-static-libstdc++ endif # ARM Nucleus @@ -2398,7 +2397,7 @@ TOOLS_FLAGS_TO_PASS= \ "GNATLINK=$(GNATLINK)" \ "GNATBIND=$(GNATBIND)" -GCC_LINK=$(CC) $(GCC_LINK_FLAGS) $(ADA_INCLUDES) +GCC_LINK=$(CXX) $(GCC_LINK_FLAGS) $(ADA_INCLUDES) # Build directory for the tools. Let's copy the target-dependent # sources using the same mechanism as for gnatlib. The other sources are @@ -2491,12 +2490,12 @@ common-tools: ../stamp-tools $(GNATBIND) $(ADA_INCLUDES) $(GNATBIND_FLAGS) gnatdll $(GNATLINK) -v gnatdll -o $@ --GCC="$(GCC_LINK)" $(TOOLS_LIBS) -../../vxaddr2line$(exeext): ../stamp-tools targext.o +../../vxaddr2line$(exeext): ../stamp-tools $(GNATMAKE) -c $(ADA_INCLUDES) vxaddr2line --GCC="$(CC) $(ALL_ADAFLAGS)" $(GNATBIND) $(ADA_INCLUDES) $(GNATBIND_FLAGS) vxaddr2line - $(GNATLINK) -v vxaddr2line -o $@ --GCC="$(GCC_LINK)" targext.o $(CLIB) + $(GNATLINK) -v vxaddr2line -o $@ --GCC="$(GCC_LINK)" ../targext.o $(CLIB) -gnatmake-re: ../stamp-tools link.o targext.o +gnatmake-re: ../stamp-tools $(GNATMAKE) -j0 $(ADA_INCLUDES) -u sdefault --GCC="$(CC) $(MOST_ADA_FLAGS)" $(GNATMAKE) -j0 -c $(ADA_INCLUDES) gnatmake --GCC="$(CC) $(ALL_ADAFLAGS)" $(GNATBIND) $(ADA_INCLUDES) $(GNATBIND_FLAGS) gnatmake @@ -2507,7 +2506,7 @@ gnatmake-re: ../stamp-tools link.o targext.o # with the former version of gnatlink itself which cannot override itself. # gnatlink-re cannot be run at the same time as gnatmake-re, hence the # dependency -gnatlink-re: ../stamp-tools link.o targext.o gnatmake-re +gnatlink-re: ../stamp-tools gnatmake-re $(GNATMAKE) -j0 -c $(ADA_INCLUDES) gnatlink --GCC="$(CC) $(ALL_ADAFLAGS)" $(GNATBIND) $(ADA_INCLUDES) $(GNATBIND_FLAGS) gnatlink $(GNATLINK) -v gnatlink -o ../../gnatlinknew$(exeext) \ @@ -2519,11 +2518,11 @@ gnatlink-re: ../stamp-tools link.o targext.o gnatmake-re # stamp target in the parent directory whenever gnat1 is rebuilt # Likewise for the tools -../../gnatmake$(exeext): $(P) b_gnatm.o link.o targext.o $(GNATMAKE_OBJS) +../../gnatmake$(exeext): $(P) b_gnatm.o $(GNATMAKE_OBJS) +$(GCC_LINK) $(ALL_CFLAGS) $(LDFLAGS) -o $@ b_gnatm.o $(GNATMAKE_OBJS) \ $(TOOLS_LIBS) -../../gnatlink$(exeext): $(P) b_gnatl.o link.o targext.o $(GNATLINK_OBJS) +../../gnatlink$(exeext): $(P) b_gnatl.o $(GNATLINK_OBJS) +$(GCC_LINK) $(ALL_CFLAGS) $(LDFLAGS) -o $@ b_gnatl.o $(GNATLINK_OBJS) \ $(TOOLS_LIBS) diff --git a/gcc/ada/gcc-interface/decl.c b/gcc/ada/gcc-interface/decl.c index f632a3164e7..26342e2a012 100644 --- a/gcc/ada/gcc-interface/decl.c +++ b/gcc/ada/gcc-interface/decl.c @@ -4830,7 +4830,7 @@ gnat_to_gnu_entity (Entity_Id gnat_entity, tree gnu_expr, int definition) if (is_type && (!gnu_decl || this_made_decl)) { /* Process the attributes, if not already done. Note that the type is - already defined so we cannot pass True for IN_PLACE here. */ + already defined so we cannot pass true for IN_PLACE here. */ process_attributes (&gnu_type, &attr_list, false, gnat_entity); /* Tell the middle-end that objects of tagged types are guaranteed to @@ -5153,7 +5153,7 @@ gnat_to_gnu_entity (Entity_Id gnat_entity, tree gnu_expr, int definition) to conflict with Comp2 and an alias set copy is required. The language rules ensure the parent type is already frozen here. */ - if (Is_Derived_Type (gnat_entity)) + if (Is_Derived_Type (gnat_entity) && !type_annotate_only) { tree gnu_parent_type = gnat_to_gnu_type (Etype (gnat_entity)); relate_alias_sets (gnu_type, gnu_parent_type, @@ -5449,26 +5449,26 @@ bool is_cplusplus_method (Entity_Id gnat_entity) { if (Convention (gnat_entity) != Convention_CPP) - return False; + return false; /* This is the main case: C++ method imported as a primitive operation. */ if (Is_Dispatching_Operation (gnat_entity)) - return True; + return true; /* A thunk needs to be handled like its associated primitive operation. */ if (Is_Subprogram (gnat_entity) && Is_Thunk (gnat_entity)) - return True; + return true; /* C++ classes with no virtual functions can be imported as limited record types, but we need to return true for the constructors. */ if (Is_Constructor (gnat_entity)) - return True; + return true; /* This is set on the E_Subprogram_Type built for a dispatching call. */ if (Is_Dispatch_Table_Entity (gnat_entity)) - return True; + return true; - return False; + return false; } /* Finalize the processing of From_With_Type incomplete types. */ @@ -6727,13 +6727,13 @@ components_need_strict_alignment (Node_Id component_list) Entity_Id gnat_field = Defining_Entity (component_decl); if (Is_Aliased (gnat_field)) - return True; + return true; if (Strict_Alignment (Etype (gnat_field))) - return True; + return true; } - return False; + return false; } /* Return true if TYPE is a type with variable size or a padding type with a diff --git a/gcc/ada/gcc-interface/misc.c b/gcc/ada/gcc-interface/misc.c index 7b168df4e03..3abe57b6bd9 100644 --- a/gcc/ada/gcc-interface/misc.c +++ b/gcc/ada/gcc-interface/misc.c @@ -385,13 +385,13 @@ void gnat_init_gcc_fp (void) { /* Disable FP optimizations that ignore the signedness of zero if - S'Signed_Zeros is True, but don't override the user if not. */ + S'Signed_Zeros is true, but don't override the user if not. */ if (Signed_Zeros_On_Target) flag_signed_zeros = 1; else if (!global_options_set.x_flag_signed_zeros) flag_signed_zeros = 0; - /* Assume that FP operations can trap if S'Machine_Overflow is True, + /* Assume that FP operations can trap if S'Machine_Overflow is true, but don't override the user if not. ??? Alpha/VMS enables FP traps without declaring it. */ diff --git a/gcc/ada/gcc-interface/trans.c b/gcc/ada/gcc-interface/trans.c index f91f4b83a6e..4048e0aefe9 100644 --- a/gcc/ada/gcc-interface/trans.c +++ b/gcc/ada/gcc-interface/trans.c @@ -257,6 +257,8 @@ static tree pos_to_constructor (Node_Id, tree, Entity_Id); static void validate_unchecked_conversion (Node_Id); static tree maybe_implicit_deref (tree); static void set_expr_location_from_node (tree, Node_Id); +static void set_expr_location_from_node1 (tree, Node_Id, bool); +static bool Sloc_to_locus1 (Source_Ptr, location_t *, bool); static bool set_end_locus_from_node (tree, Node_Id); static void set_gnu_expr_location_from_node (tree, Node_Id); static int lvalue_required_p (Node_Id, tree, bool, bool, bool); @@ -2391,7 +2393,10 @@ can_equal_min_or_max_val_p (tree val, tree type, bool max) if (TREE_CODE (val) != INTEGER_CST) return true; - return tree_int_cst_equal (val, min_or_max_val) == 1; + if (max) + return tree_int_cst_lt (val, min_or_max_val) == 0; + else + return tree_int_cst_lt (min_or_max_val, val) == 0; } /* Return true if VAL (of type TYPE) can equal the minimum value of TYPE. @@ -2716,7 +2721,7 @@ Loop_Statement_to_gnu (Node_Id gnat_node) /* First, if we have computed a small number of invariant conditions for range checks applied to the iteration variable, then initialize these - conditions in front of the loop. Otherwise, leave them set to True. + conditions in front of the loop. Otherwise, leave them set to true. ??? The heuristics need to be improved, by taking into account the following datapoints: @@ -4019,9 +4024,19 @@ Call_to_gnu (Node_Id gnat_node, tree *gnu_result_type_p, tree gnu_target, /* Set up to move the copy back to the original if needed. */ if (!in_param) { - gnu_stmt = build_binary_op (MODIFY_EXPR, NULL_TREE, gnu_orig, - gnu_temp); + /* If the original is a COND_EXPR whose first arm isn't meant to + be further used, just deal with the second arm. This is very + likely the conditional expression built for a check. */ + if (TREE_CODE (gnu_orig) == COND_EXPR + && TREE_CODE (TREE_OPERAND (gnu_orig, 1)) == COMPOUND_EXPR + && integer_zerop + (TREE_OPERAND (TREE_OPERAND (gnu_orig, 1), 1))) + gnu_orig = TREE_OPERAND (gnu_orig, 2); + + gnu_stmt + = build_binary_op (MODIFY_EXPR, NULL_TREE, gnu_orig, gnu_temp); set_expr_location_from_node (gnu_stmt, gnat_node); + append_to_statement_list (gnu_stmt, &gnu_after_list); } } @@ -4458,6 +4473,10 @@ Handled_Sequence_Of_Statements_to_gnu (Node_Id gnat_node) tree gnu_result; tree gnu_expr; Node_Id gnat_temp; + /* Node providing the sloc for the cleanup actions. */ + Node_Id gnat_cleanup_loc_node = (Present (End_Label (gnat_node)) ? + End_Label (gnat_node) : + gnat_node); /* The GCC exception handling mechanism can handle both ZCX and SJLJ schemes and we have our own SJLJ mechanism. To call the GCC mechanism, we call @@ -4507,7 +4526,7 @@ Handled_Sequence_Of_Statements_to_gnu (Node_Id gnat_node) /* When we exit this block, restore the saved value. */ add_cleanup (build_call_n_expr (set_jmpbuf_decl, 1, gnu_jmpsave_decl), - End_Label (gnat_node)); + gnat_cleanup_loc_node); } /* If we are to call a function when exiting this block, add a cleanup @@ -4515,7 +4534,7 @@ Handled_Sequence_Of_Statements_to_gnu (Node_Id gnat_node) so we must register this cleanup after the EH cleanup just above. */ if (at_end) add_cleanup (build_call_n_expr (gnat_to_gnu (At_End_Proc (gnat_node)), 0), - End_Label (gnat_node)); + gnat_cleanup_loc_node); /* Now build the tree for the declarations and statements inside this block. If this is SJLJ, set our jmp_buf as the current buffer. */ @@ -4628,14 +4647,18 @@ Handled_Sequence_Of_Statements_to_gnu (Node_Id gnat_node) /* Now make the TRY_CATCH_EXPR for the block. */ gnu_result = build2 (TRY_CATCH_EXPR, void_type_node, gnu_inner_block, gnu_handlers); - /* Set a location. We need to find a uniq location for the dispatching + /* Set a location. We need to find a unique location for the dispatching code, otherwise we can get coverage or debugging issues. Try with the location of the end label. */ if (Present (End_Label (gnat_node)) && Sloc_to_locus (Sloc (End_Label (gnat_node)), &locus)) SET_EXPR_LOCATION (gnu_result, locus); else - set_expr_location_from_node (gnu_result, gnat_node); + /* Clear column information so that the exception handler of an + implicit transient block does not incorrectly inherit the slocs + of a decision, which would otherwise confuse control flow based + coverage analysis tools. */ + set_expr_location_from_node1 (gnu_result, gnat_node, true); } else gnu_result = gnu_inner_block; @@ -4830,9 +4853,10 @@ Exception_Handler_to_gnu_zcx (Node_Id gnat_node) add_stmt_with_node (build_call_n_expr (begin_handler_decl, 1, gnu_incoming_exc_ptr), gnat_node); - /* ??? We don't seem to have an End_Label at hand to set the location. */ + /* We don't have an End_Label at hand to set the location of the cleanup + actions, so we use that of the exception handler itself instead. */ add_cleanup (build_call_n_expr (end_handler_decl, 1, gnu_incoming_exc_ptr), - Empty); + gnat_node); add_stmt_list (Statements (gnat_node)); gnat_poplevel (); @@ -7384,13 +7408,15 @@ mark_visited (tree t) } /* Add GNU_CLEANUP, a cleanup action, to the current code group and - set its location to that of GNAT_NODE if present. */ + set its location to that of GNAT_NODE if present, but with column info + cleared so that conditional branches generated as part of the cleanup + code do not interfere with coverage analysis tools. */ static void add_cleanup (tree gnu_cleanup, Node_Id gnat_node) { if (Present (gnat_node)) - set_expr_location_from_node (gnu_cleanup, gnat_node); + set_expr_location_from_node1 (gnu_cleanup, gnat_node, true); append_to_statement_list (gnu_cleanup, ¤t_stmt_group->cleanups); } @@ -9005,10 +9031,11 @@ maybe_implicit_deref (tree exp) /* Convert SLOC into LOCUS. Return true if SLOC corresponds to a source code location and false if it doesn't. In the former case, set the Gigi global - variable REF_FILENAME to the simple debug file name as given by sinput. */ + variable REF_FILENAME to the simple debug file name as given by sinput. + If clear_column is true, set column information to 0. */ -bool -Sloc_to_locus (Source_Ptr Sloc, location_t *locus) +static bool +Sloc_to_locus1 (Source_Ptr Sloc, location_t *locus, bool clear_column) { if (Sloc == No_Location) return false; @@ -9022,7 +9049,7 @@ Sloc_to_locus (Source_Ptr Sloc, location_t *locus) { Source_File_Index file = Get_Source_File_Index (Sloc); Logical_Line_Number line = Get_Logical_Line_Number (Sloc); - Column_Number column = Get_Column_Number (Sloc); + Column_Number column = (clear_column ? 0 : Get_Column_Number (Sloc)); struct line_map *map = LINEMAPS_ORDINARY_MAP_AT (line_table, file - 1); /* We can have zero if pragma Source_Reference is in effect. */ @@ -9041,20 +9068,36 @@ Sloc_to_locus (Source_Ptr Sloc, location_t *locus) return true; } +/* Similar to the above, not clearing the column information. */ + +bool +Sloc_to_locus (Source_Ptr Sloc, location_t *locus) +{ + return Sloc_to_locus1 (Sloc, locus, false); +} + /* Similar to set_expr_location, but start with the Sloc of GNAT_NODE and don't do anything if it doesn't correspond to a source location. */ static void -set_expr_location_from_node (tree node, Node_Id gnat_node) +set_expr_location_from_node1 (tree node, Node_Id gnat_node, bool clear_column) { location_t locus; - if (!Sloc_to_locus (Sloc (gnat_node), &locus)) + if (!Sloc_to_locus1 (Sloc (gnat_node), &locus, clear_column)) return; SET_EXPR_LOCATION (node, locus); } +/* Similar to the above, not clearing the column information. */ + +static void +set_expr_location_from_node (tree node, Node_Id gnat_node) +{ + set_expr_location_from_node1 (node, gnat_node, false); +} + /* More elaborate version of set_expr_location_from_node to be used in more general contexts, for example the result of the translation of a generic GNAT node. */ @@ -9185,9 +9228,13 @@ set_end_locus_from_node (tree gnu_node, Node_Id gnat_node) gnat_node = Present (gnat_end_label) ? gnat_end_label : gnat_node; /* Some expanded subprograms have neither an End_Label nor a Sloc - attached. Notify that to callers. */ + attached. Notify that to callers. For a block statement with no + End_Label, clear column information, so that the tree for a + transient block does not receive the sloc of a source condition. */ - if (!Sloc_to_locus (Sloc (gnat_node), &end_locus)) + if (!Sloc_to_locus1 (Sloc (gnat_node), &end_locus, + No (gnat_end_label) && + (Nkind (gnat_node) == N_Block_Statement))) return false; switch (TREE_CODE (gnu_node)) diff --git a/gcc/ada/gcc-interface/utils.c b/gcc/ada/gcc-interface/utils.c index 409c0dee94f..2c3e096f120 100644 --- a/gcc/ada/gcc-interface/utils.c +++ b/gcc/ada/gcc-interface/utils.c @@ -2573,7 +2573,7 @@ potential_alignment_gap (tree prev_field, tree curr_field, tree offset) if (!prev_field) return false; - /* If the previous field is a union type, then return False: The only + /* If the previous field is a union type, then return false: The only time when such a field is not the last field of the record is when there are other components at fixed positions after it (meaning there was a rep clause for every field), in which case we don't want the diff --git a/gcc/ada/gcc-interface/utils2.c b/gcc/ada/gcc-interface/utils2.c index 7f7f6af034a..64f7564a75d 100644 --- a/gcc/ada/gcc-interface/utils2.c +++ b/gcc/ada/gcc-interface/utils2.c @@ -648,11 +648,11 @@ build_atomic_load (tree src) (build_qualified_type (void_type_node, TYPE_QUAL_VOLATILE)); tree mem_model = build_int_cst (integer_type_node, MEMMODEL_SEQ_CST); tree orig_src = src; - tree type = TREE_TYPE (src); - tree t, val; + tree t, addr, val; unsigned int size; int fncode; + /* Remove conversions to get the address of the underlying object. */ src = remove_conversions (src, false); size = resolve_atomic_size (TREE_TYPE (src)); if (size == 0) @@ -661,10 +661,13 @@ build_atomic_load (tree src) fncode = (int) BUILT_IN_ATOMIC_LOAD_N + exact_log2 (size) + 1; t = builtin_decl_implicit ((enum built_in_function) fncode); - src = build_unary_op (ADDR_EXPR, ptr_type, src); - val = build_call_expr (t, 2, src, mem_model); + addr = build_unary_op (ADDR_EXPR, ptr_type, src); + val = build_call_expr (t, 2, addr, mem_model); - return unchecked_convert (type, val, true); + /* First reinterpret the loaded bits in the original type of the load, + then convert to the expected result type. */ + t = fold_build1 (VIEW_CONVERT_EXPR, TREE_TYPE (src), val); + return convert (TREE_TYPE (orig_src), t); } /* Build an atomic store from SRC to the underlying atomic object in DEST. */ @@ -677,10 +680,11 @@ build_atomic_store (tree dest, tree src) (build_qualified_type (void_type_node, TYPE_QUAL_VOLATILE)); tree mem_model = build_int_cst (integer_type_node, MEMMODEL_SEQ_CST); tree orig_dest = dest; - tree t, int_type; + tree t, int_type, addr; unsigned int size; int fncode; + /* Remove conversions to get the address of the underlying object. */ dest = remove_conversions (dest, false); size = resolve_atomic_size (TREE_TYPE (dest)); if (size == 0) @@ -690,10 +694,20 @@ build_atomic_store (tree dest, tree src) t = builtin_decl_implicit ((enum built_in_function) fncode); int_type = gnat_type_for_size (BITS_PER_UNIT * size, 1); - dest = build_unary_op (ADDR_EXPR, ptr_type, dest); - src = unchecked_convert (int_type, src, true); + /* First convert the bits to be stored to the original type of the store, + then reinterpret them in the effective type. But if the original type + is a padded type with the same size, convert to the inner type instead, + as we don't want to artificially introduce a CONSTRUCTOR here. */ + if (TYPE_IS_PADDING_P (TREE_TYPE (dest)) + && TYPE_SIZE (TREE_TYPE (dest)) + == TYPE_SIZE (TREE_TYPE (TYPE_FIELDS (TREE_TYPE (dest))))) + src = convert (TREE_TYPE (TYPE_FIELDS (TREE_TYPE (dest))), src); + else + src = convert (TREE_TYPE (dest), src); + src = fold_build1 (VIEW_CONVERT_EXPR, int_type, src); + addr = build_unary_op (ADDR_EXPR, ptr_type, dest); - return build_call_expr (t, 3, dest, src, mem_model); + return build_call_expr (t, 3, addr, src, mem_model); } /* Make a binary operation of kind OP_CODE. RESULT_TYPE is the type diff --git a/gcc/ada/sigtramp-ppcvxw.c b/gcc/ada/sigtramp-ppcvxw.c index 69baa1420df..e7b318fa9bf 100644 --- a/gcc/ada/sigtramp-ppcvxw.c +++ b/gcc/ada/sigtramp-ppcvxw.c @@ -6,7 +6,7 @@ * * * Asm Implementation File * * * - * Copyright (C) 2011-2012, Free Software Foundation, Inc. * + * Copyright (C) 2011-2013, Free Software Foundation, Inc. * * * * GNAT is free software; you can redistribute it and/or modify it under * * terms of the GNU General Public License as published by the Free Soft- * diff --git a/gcc/ada/terminals.c b/gcc/ada/terminals.c index dfadca8d6a9..8672ca372b6 100644 --- a/gcc/ada/terminals.c +++ b/gcc/ada/terminals.c @@ -6,7 +6,7 @@ * * * C Implementation File * * * - * Copyright (C) 2008-2012, AdaCore * + * Copyright (C) 2008-2013, AdaCore * * * * GNAT is free software; you can redistribute it and/or modify it under * * terms of the GNU General Public License as published by the Free Soft- * diff --git a/gcc/asan.c b/gcc/asan.c index b12cf447cc5..af215f681c3 100644 --- a/gcc/asan.c +++ b/gcc/asan.c @@ -842,25 +842,12 @@ asan_init_shadow_ptr_types (void) initialize_sanitizer_builtins (); } -/* Asan pretty-printer, used for buidling of the description STRING_CSTs. */ -static pretty_printer asan_pp; -static bool asan_pp_initialized; - -/* Initialize asan_pp. */ - -static void -asan_pp_initialize (void) -{ - pp_construct (&asan_pp, /* prefix */NULL, /* line-width */0); - asan_pp_initialized = true; -} - -/* Create ADDR_EXPR of STRING_CST with asan_pp text. */ +/* Create ADDR_EXPR of STRING_CST with the PP pretty printer text. */ static tree -asan_pp_string (void) +asan_pp_string (pretty_printer *pp) { - const char *buf = pp_base_formatted_text (&asan_pp); + const char *buf = pp_formatted_text (pp); size_t len = strlen (buf); tree ret = build_string (len + 1, buf); TREE_TYPE (ret) @@ -882,7 +869,7 @@ asan_shadow_cst (unsigned char shadow_bytes[4]) for (i = 0; i < 4; i++) val |= (unsigned HOST_WIDE_INT) shadow_bytes[BYTES_BIG_ENDIAN ? 3 - i : i] << (BITS_PER_UNIT * i); - return GEN_INT (trunc_int_for_mode (val, SImode)); + return gen_int_mode (val, SImode); } /* Clear shadow memory at SHADOW_MEM, LEN bytes. Can't call a library call here @@ -950,12 +937,10 @@ asan_emit_stack_protection (rtx base, HOST_WIDE_INT *offsets, tree *decls, asan_init_shadow_ptr_types (); /* First of all, prepare the description string. */ - if (!asan_pp_initialized) - asan_pp_initialize (); + pretty_printer asan_pp; - pp_clear_output_area (&asan_pp); if (DECL_NAME (current_function_decl)) - pp_base_tree_identifier (&asan_pp, DECL_NAME (current_function_decl)); + pp_tree_identifier (&asan_pp, DECL_NAME (current_function_decl)); else pp_string (&asan_pp, ""); pp_space (&asan_pp); @@ -972,13 +957,13 @@ asan_emit_stack_protection (rtx base, HOST_WIDE_INT *offsets, tree *decls, { pp_decimal_int (&asan_pp, IDENTIFIER_LENGTH (DECL_NAME (decl))); pp_space (&asan_pp); - pp_base_tree_identifier (&asan_pp, DECL_NAME (decl)); + pp_tree_identifier (&asan_pp, DECL_NAME (decl)); } else pp_string (&asan_pp, "9 "); pp_space (&asan_pp); } - str_cst = asan_pp_string (); + str_cst = asan_pp_string (&asan_pp); /* Emit the prologue sequence. */ base = expand_binop (Pmode, add_optab, base, GEN_INT (base_offset), @@ -1648,7 +1633,7 @@ instrument_mem_region_access (tree base, tree len, access to the last byte of the argument; it uses the result of the call to deduce the offset of that last byte. - Upon completion, iff the call has actullay been instrumented, this + Upon completion, iff the call has actually been instrumented, this function returns TRUE and *ITER points to the statement logically following the built-in strlen function call *ITER was initially pointing to. Otherwise, the function returns FALSE and *ITER @@ -1679,10 +1664,10 @@ instrument_strlen_call (gimple_stmt_iterator *iter) /* Instrument the access to the first byte of str_arg. i.e: _1 = str_arg; instrument (_1); */ + tree cptr_type = build_pointer_type (char_type_node); gimple str_arg_ssa = gimple_build_assign_with_ops (NOP_EXPR, - make_ssa_name (build_pointer_type - (char_type_node), NULL), + make_ssa_name (cptr_type, NULL), str_arg, NULL); gimple_set_location (str_arg_ssa, loc); gimple_stmt_iterator gsi = *iter; @@ -1701,8 +1686,7 @@ instrument_strlen_call (gimple_stmt_iterator *iter) pointer_plus expr: (_1 + len). */ gimple stmt = gimple_build_assign_with_ops (POINTER_PLUS_EXPR, - make_ssa_name (TREE_TYPE (str_arg), - NULL), + make_ssa_name (cptr_type, NULL), gimple_assign_lhs (str_arg_ssa), len); gimple_set_location (stmt, loc); @@ -1954,7 +1938,7 @@ asan_global_struct (void) = build_decl (UNKNOWN_LOCATION, FIELD_DECL, get_identifier (field_names[i]), (i == 0 || i == 3) ? const_ptr_type_node - : build_nonstandard_integer_type (POINTER_SIZE, 1)); + : pointer_sized_int_node); DECL_CONTEXT (fields[i]) = ret; if (i) DECL_CHAIN (fields[i - 1]) = fields[i]; @@ -1976,19 +1960,17 @@ asan_add_global (tree decl, tree type, vec *v) tree str_cst, refdecl = decl; vec *vinner = NULL; - if (!asan_pp_initialized) - asan_pp_initialize (); + pretty_printer asan_pp; - pp_clear_output_area (&asan_pp); if (DECL_NAME (decl)) - pp_base_tree_identifier (&asan_pp, DECL_NAME (decl)); + pp_tree_identifier (&asan_pp, DECL_NAME (decl)); else pp_string (&asan_pp, ""); pp_space (&asan_pp); pp_left_paren (&asan_pp); pp_string (&asan_pp, main_input_filename); pp_right_paren (&asan_pp); - str_cst = asan_pp_string (); + str_cst = asan_pp_string (&asan_pp); if (asan_needs_local_alias (decl)) { @@ -2034,10 +2016,12 @@ initialize_sanitizer_builtins (void) tree BT_FN_VOID = build_function_type_list (void_type_node, NULL_TREE); tree BT_FN_VOID_PTR = build_function_type_list (void_type_node, ptr_type_node, NULL_TREE); + tree BT_FN_VOID_PTR_PTR_PTR + = build_function_type_list (void_type_node, ptr_type_node, + ptr_type_node, ptr_type_node, NULL_TREE); tree BT_FN_VOID_PTR_PTRMODE = build_function_type_list (void_type_node, ptr_type_node, - build_nonstandard_integer_type (POINTER_SIZE, - 1), NULL_TREE); + pointer_sized_int_node, NULL_TREE); tree BT_FN_VOID_INT = build_function_type_list (void_type_node, integer_type_node, NULL_TREE); tree BT_FN_BOOL_VPTR_PTR_IX_INT_INT[5]; @@ -2099,6 +2083,12 @@ initialize_sanitizer_builtins (void) #undef ATTR_TMPURE_NORETURN_NOTHROW_LEAF_LIST #define ATTR_TMPURE_NORETURN_NOTHROW_LEAF_LIST \ ECF_TM_PURE | ATTR_NORETURN_NOTHROW_LEAF_LIST +#undef ATTR_COLD_NOTHROW_LEAF_LIST +#define ATTR_COLD_NOTHROW_LEAF_LIST \ + /* ECF_COLD missing */ ATTR_NOTHROW_LEAF_LIST +#undef ATTR_COLD_NORETURN_NOTHROW_LEAF_LIST +#define ATTR_COLD_NORETURN_NOTHROW_LEAF_LIST \ + /* ECF_COLD missing */ ATTR_NORETURN_NOTHROW_LEAF_LIST #undef DEF_SANITIZER_BUILTIN #define DEF_SANITIZER_BUILTIN(ENUM, NAME, TYPE, ATTRS) \ decl = add_builtin_function ("__builtin_" NAME, TYPE, ENUM, \ @@ -2175,7 +2165,7 @@ asan_finish_file (void) /* Avoid instrumenting code in the asan ctors/dtors. We don't need to insert padding after the description strings, nor after .LASAN* array. */ - flag_asan = 0; + flag_sanitize &= ~SANITIZE_ADDRESS; tree fn = builtin_decl_implicit (BUILT_IN_ASAN_INIT); append_to_statement_list (build_call_expr (fn, 0), &asan_ctor_statements); @@ -2188,7 +2178,6 @@ asan_finish_file (void) if (gcount) { tree type = asan_global_struct (), var, ctor; - tree uptr = build_nonstandard_integer_type (POINTER_SIZE, 1); tree dtor_statements = NULL_TREE; vec *v; char buf[20]; @@ -2217,22 +2206,23 @@ asan_finish_file (void) varpool_assemble_decl (varpool_node_for_decl (var)); fn = builtin_decl_implicit (BUILT_IN_ASAN_REGISTER_GLOBALS); + tree gcount_tree = build_int_cst (pointer_sized_int_node, gcount); append_to_statement_list (build_call_expr (fn, 2, build_fold_addr_expr (var), - build_int_cst (uptr, gcount)), + gcount_tree), &asan_ctor_statements); fn = builtin_decl_implicit (BUILT_IN_ASAN_UNREGISTER_GLOBALS); append_to_statement_list (build_call_expr (fn, 2, build_fold_addr_expr (var), - build_int_cst (uptr, gcount)), + gcount_tree), &dtor_statements); cgraph_build_static_cdtor ('D', dtor_statements, MAX_RESERVED_INIT_PRIORITY - 1); } cgraph_build_static_cdtor ('I', asan_ctor_statements, MAX_RESERVED_INIT_PRIORITY - 1); - flag_asan = 1; + flag_sanitize |= SANITIZE_ADDRESS; } /* Instrument the current function. */ @@ -2249,57 +2239,94 @@ asan_instrument (void) static bool gate_asan (void) { - return flag_asan != 0 + return (flag_sanitize & SANITIZE_ADDRESS) != 0 && !lookup_attribute ("no_sanitize_address", DECL_ATTRIBUTES (current_function_decl)); } -struct gimple_opt_pass pass_asan = +namespace { + +const pass_data pass_data_asan = { - { - GIMPLE_PASS, - "asan", /* name */ - OPTGROUP_NONE, /* optinfo_flags */ - gate_asan, /* gate */ - asan_instrument, /* execute */ - NULL, /* sub */ - NULL, /* next */ - 0, /* static_pass_number */ - TV_NONE, /* tv_id */ - PROP_ssa | PROP_cfg | PROP_gimple_leh,/* properties_required */ - 0, /* properties_provided */ - 0, /* properties_destroyed */ - 0, /* todo_flags_start */ - TODO_verify_flow | TODO_verify_stmts - | TODO_update_ssa /* todo_flags_finish */ - } + GIMPLE_PASS, /* type */ + "asan", /* name */ + OPTGROUP_NONE, /* optinfo_flags */ + true, /* has_gate */ + true, /* has_execute */ + TV_NONE, /* tv_id */ + ( PROP_ssa | PROP_cfg | PROP_gimple_leh ), /* properties_required */ + 0, /* properties_provided */ + 0, /* properties_destroyed */ + 0, /* todo_flags_start */ + ( TODO_verify_flow | TODO_verify_stmts + | TODO_update_ssa ), /* todo_flags_finish */ }; +class pass_asan : public gimple_opt_pass +{ +public: + pass_asan(gcc::context *ctxt) + : gimple_opt_pass(pass_data_asan, ctxt) + {} + + /* opt_pass methods: */ + opt_pass * clone () { return new pass_asan (ctxt_); } + bool gate () { return gate_asan (); } + unsigned int execute () { return asan_instrument (); } + +}; // class pass_asan + +} // anon namespace + +gimple_opt_pass * +make_pass_asan (gcc::context *ctxt) +{ + return new pass_asan (ctxt); +} + static bool gate_asan_O0 (void) { return !optimize && gate_asan (); } -struct gimple_opt_pass pass_asan_O0 = +namespace { + +const pass_data pass_data_asan_O0 = { - { - GIMPLE_PASS, - "asan0", /* name */ - OPTGROUP_NONE, /* optinfo_flags */ - gate_asan_O0, /* gate */ - asan_instrument, /* execute */ - NULL, /* sub */ - NULL, /* next */ - 0, /* static_pass_number */ - TV_NONE, /* tv_id */ - PROP_ssa | PROP_cfg | PROP_gimple_leh,/* properties_required */ - 0, /* properties_provided */ - 0, /* properties_destroyed */ - 0, /* todo_flags_start */ - TODO_verify_flow | TODO_verify_stmts - | TODO_update_ssa /* todo_flags_finish */ - } + GIMPLE_PASS, /* type */ + "asan0", /* name */ + OPTGROUP_NONE, /* optinfo_flags */ + true, /* has_gate */ + true, /* has_execute */ + TV_NONE, /* tv_id */ + ( PROP_ssa | PROP_cfg | PROP_gimple_leh ), /* properties_required */ + 0, /* properties_provided */ + 0, /* properties_destroyed */ + 0, /* todo_flags_start */ + ( TODO_verify_flow | TODO_verify_stmts + | TODO_update_ssa ), /* todo_flags_finish */ }; +class pass_asan_O0 : public gimple_opt_pass +{ +public: + pass_asan_O0(gcc::context *ctxt) + : gimple_opt_pass(pass_data_asan_O0, ctxt) + {} + + /* opt_pass methods: */ + bool gate () { return gate_asan_O0 (); } + unsigned int execute () { return asan_instrument (); } + +}; // class pass_asan_O0 + +} // anon namespace + +gimple_opt_pass * +make_pass_asan_O0 (gcc::context *ctxt) +{ + return new pass_asan_O0 (ctxt); +} + #include "gt-asan.h" diff --git a/gcc/auto-inc-dec.c b/gcc/auto-inc-dec.c index 6119bb6e757..0d92b1d79b5 100644 --- a/gcc/auto-inc-dec.c +++ b/gcc/auto-inc-dec.c @@ -1506,22 +1506,40 @@ gate_auto_inc_dec (void) } -struct rtl_opt_pass pass_inc_dec = +namespace { + +const pass_data pass_data_inc_dec = { - { - RTL_PASS, - "auto_inc_dec", /* name */ - OPTGROUP_NONE, /* optinfo_flags */ - gate_auto_inc_dec, /* gate */ - rest_of_handle_auto_inc_dec, /* execute */ - NULL, /* sub */ - NULL, /* next */ - 0, /* static_pass_number */ - TV_AUTO_INC_DEC, /* tv_id */ - 0, /* properties_required */ - 0, /* properties_provided */ - 0, /* properties_destroyed */ - 0, /* todo_flags_start */ - TODO_df_finish, /* todo_flags_finish */ - } + RTL_PASS, /* type */ + "auto_inc_dec", /* name */ + OPTGROUP_NONE, /* optinfo_flags */ + true, /* has_gate */ + true, /* has_execute */ + TV_AUTO_INC_DEC, /* tv_id */ + 0, /* properties_required */ + 0, /* properties_provided */ + 0, /* properties_destroyed */ + 0, /* todo_flags_start */ + TODO_df_finish, /* todo_flags_finish */ }; + +class pass_inc_dec : public rtl_opt_pass +{ +public: + pass_inc_dec(gcc::context *ctxt) + : rtl_opt_pass(pass_data_inc_dec, ctxt) + {} + + /* opt_pass methods: */ + bool gate () { return gate_auto_inc_dec (); } + unsigned int execute () { return rest_of_handle_auto_inc_dec (); } + +}; // class pass_inc_dec + +} // anon namespace + +rtl_opt_pass * +make_pass_inc_dec (gcc::context *ctxt) +{ + return new pass_inc_dec (ctxt); +} diff --git a/gcc/basic-block.h b/gcc/basic-block.h index 154dc7a2c31..ad04d4de83e 100644 --- a/gcc/basic-block.h +++ b/gcc/basic-block.h @@ -465,6 +465,23 @@ struct edge_list edge *index_to_edge; }; +/* Class to compute and manage control dependences on an edge-list. */ +class control_dependences +{ +public: + control_dependences (edge_list *); + ~control_dependences (); + bitmap get_edges_dependent_on (int); + edge get_edge (int); + +private: + void set_control_dependence_map_bit (basic_block, int); + void clear_control_dependence_bitmap (basic_block); + void find_control_dependence (int); + vec control_dependence_map; + edge_list *el; +}; + /* The base value for branch probability notes and edge probabilities. */ #define REG_BR_PROB_BASE 10000 @@ -726,6 +743,7 @@ extern void compute_available (sbitmap *, sbitmap *, sbitmap *, sbitmap *); extern bool maybe_hot_bb_p (struct function *, const_basic_block); extern bool maybe_hot_edge_p (edge); extern bool probably_never_executed_bb_p (struct function *, const_basic_block); +extern bool probably_never_executed_edge_p (struct function *, edge); extern bool optimize_bb_for_size_p (const_basic_block); extern bool optimize_bb_for_speed_p (const_basic_block); extern bool optimize_edge_for_size_p (edge); @@ -797,6 +815,7 @@ extern bool contains_no_active_insn_p (const_basic_block); extern bool forwarder_block_p (const_basic_block); extern bool can_fallthru (basic_block, basic_block); extern void emit_barrier_after_bb (basic_block bb); +extern void fixup_partitions (void); /* In cfgbuild.c. */ extern void find_many_sub_basic_blocks (sbitmap); @@ -958,7 +977,7 @@ combine_probabilities (int prob1, int prob2) constrained to be < REG_BR_PROB_BASE. */ static inline gcov_type -apply_scale (gcov_type freq, int scale) +apply_scale (gcov_type freq, gcov_type scale) { return RDIV (freq * scale, REG_BR_PROB_BASE); } diff --git a/gcc/bb-reorder.c b/gcc/bb-reorder.c index 2cbeb6ae9f3..6b034aba5c9 100644 --- a/gcc/bb-reorder.c +++ b/gcc/bb-reorder.c @@ -1444,25 +1444,155 @@ fix_up_crossing_landing_pad (eh_landing_pad old_lp, basic_block old_bb) ei_next (&ei); } + +/* Ensure that all hot bbs are included in a hot path through the + procedure. This is done by calling this function twice, once + with WALK_UP true (to look for paths from the entry to hot bbs) and + once with WALK_UP false (to look for paths from hot bbs to the exit). + Returns the updated value of COLD_BB_COUNT and adds newly-hot bbs + to BBS_IN_HOT_PARTITION. */ + +static unsigned int +sanitize_hot_paths (bool walk_up, unsigned int cold_bb_count, + vec *bbs_in_hot_partition) +{ + /* Callers check this. */ + gcc_checking_assert (cold_bb_count); + + /* Keep examining hot bbs while we still have some left to check + and there are remaining cold bbs. */ + vec hot_bbs_to_check = bbs_in_hot_partition->copy (); + while (! hot_bbs_to_check.is_empty () + && cold_bb_count) + { + basic_block bb = hot_bbs_to_check.pop (); + vec *edges = walk_up ? bb->preds : bb->succs; + edge e; + edge_iterator ei; + int highest_probability = 0; + int highest_freq = 0; + gcov_type highest_count = 0; + bool found = false; + + /* Walk the preds/succs and check if there is at least one already + marked hot. Keep track of the most frequent pred/succ so that we + can mark it hot if we don't find one. */ + FOR_EACH_EDGE (e, ei, edges) + { + basic_block reach_bb = walk_up ? e->src : e->dest; + + if (e->flags & EDGE_DFS_BACK) + continue; + + if (BB_PARTITION (reach_bb) != BB_COLD_PARTITION) + { + found = true; + break; + } + /* The following loop will look for the hottest edge via + the edge count, if it is non-zero, then fallback to the edge + frequency and finally the edge probability. */ + if (e->count > highest_count) + highest_count = e->count; + int edge_freq = EDGE_FREQUENCY (e); + if (edge_freq > highest_freq) + highest_freq = edge_freq; + if (e->probability > highest_probability) + highest_probability = e->probability; + } + + /* If bb is reached by (or reaches, in the case of !WALK_UP) another hot + block (or unpartitioned, e.g. the entry block) then it is ok. If not, + then the most frequent pred (or succ) needs to be adjusted. In the + case where multiple preds/succs have the same frequency (e.g. a + 50-50 branch), then both will be adjusted. */ + if (found) + continue; + + FOR_EACH_EDGE (e, ei, edges) + { + if (e->flags & EDGE_DFS_BACK) + continue; + /* Select the hottest edge using the edge count, if it is non-zero, + then fallback to the edge frequency and finally the edge + probability. */ + if (highest_count) + { + if (e->count < highest_count) + continue; + } + else if (highest_freq) + { + if (EDGE_FREQUENCY (e) < highest_freq) + continue; + } + else if (e->probability < highest_probability) + continue; + + basic_block reach_bb = walk_up ? e->src : e->dest; + + /* We have a hot bb with an immediate dominator that is cold. + The dominator needs to be re-marked hot. */ + BB_SET_PARTITION (reach_bb, BB_HOT_PARTITION); + cold_bb_count--; + + /* Now we need to examine newly-hot reach_bb to see if it is also + dominated by a cold bb. */ + bbs_in_hot_partition->safe_push (reach_bb); + hot_bbs_to_check.safe_push (reach_bb); + } + } + + return cold_bb_count; +} + + /* Find the basic blocks that are rarely executed and need to be moved to a separate section of the .o file (to cut down on paging and improve cache locality). Return a vector of all edges that cross. */ -static vec +static vec find_rarely_executed_basic_blocks_and_crossing_edges (void) { vec crossing_edges = vNULL; basic_block bb; edge e; edge_iterator ei; + unsigned int cold_bb_count = 0; + vec bbs_in_hot_partition = vNULL; /* Mark which partition (hot/cold) each basic block belongs in. */ FOR_EACH_BB (bb) { if (probably_never_executed_bb_p (cfun, bb)) - BB_SET_PARTITION (bb, BB_COLD_PARTITION); + { + BB_SET_PARTITION (bb, BB_COLD_PARTITION); + cold_bb_count++; + } else - BB_SET_PARTITION (bb, BB_HOT_PARTITION); + { + BB_SET_PARTITION (bb, BB_HOT_PARTITION); + bbs_in_hot_partition.safe_push (bb); + } + } + + /* Ensure that hot bbs are included along a hot path from the entry to exit. + Several different possibilities may include cold bbs along all paths + to/from a hot bb. One is that there are edge weight insanities + due to optimization phases that do not properly update basic block profile + counts. The second is that the entry of the function may not be hot, because + it is entered fewer times than the number of profile training runs, but there + is a loop inside the function that causes blocks within the function to be + above the threshold for hotness. This is fixed by walking up from hot bbs + to the entry block, and then down from hot bbs to the exit, performing + partitioning fixups as necessary. */ + if (cold_bb_count) + { + mark_dfs_back_edges (); + cold_bb_count = sanitize_hot_paths (true, cold_bb_count, + &bbs_in_hot_partition); + if (cold_bb_count) + sanitize_hot_paths (false, cold_bb_count, &bbs_in_hot_partition); } /* The format of .gcc_except_table does not allow landing pads to @@ -2179,26 +2309,44 @@ rest_of_handle_reorder_blocks (void) return 0; } -struct rtl_opt_pass pass_reorder_blocks = +namespace { + +const pass_data pass_data_reorder_blocks = { - { - RTL_PASS, - "bbro", /* name */ - OPTGROUP_NONE, /* optinfo_flags */ - gate_handle_reorder_blocks, /* gate */ - rest_of_handle_reorder_blocks, /* execute */ - NULL, /* sub */ - NULL, /* next */ - 0, /* static_pass_number */ - TV_REORDER_BLOCKS, /* tv_id */ - 0, /* properties_required */ - 0, /* properties_provided */ - 0, /* properties_destroyed */ - 0, /* todo_flags_start */ - TODO_verify_rtl_sharing, /* todo_flags_finish */ - } + RTL_PASS, /* type */ + "bbro", /* name */ + OPTGROUP_NONE, /* optinfo_flags */ + true, /* has_gate */ + true, /* has_execute */ + TV_REORDER_BLOCKS, /* tv_id */ + 0, /* properties_required */ + 0, /* properties_provided */ + 0, /* properties_destroyed */ + 0, /* todo_flags_start */ + TODO_verify_rtl_sharing, /* todo_flags_finish */ }; +class pass_reorder_blocks : public rtl_opt_pass +{ +public: + pass_reorder_blocks(gcc::context *ctxt) + : rtl_opt_pass(pass_data_reorder_blocks, ctxt) + {} + + /* opt_pass methods: */ + bool gate () { return gate_handle_reorder_blocks (); } + unsigned int execute () { return rest_of_handle_reorder_blocks (); } + +}; // class pass_reorder_blocks + +} // anon namespace + +rtl_opt_pass * +make_pass_reorder_blocks (gcc::context *ctxt) +{ + return new pass_reorder_blocks (ctxt); +} + /* Duplicate the blocks containing computed gotos. This basically unfactors computed gotos that were factored early on in the compilation process to speed up edge based data flow. We used to not unfactoring them again, @@ -2327,26 +2475,44 @@ done: return 0; } -struct rtl_opt_pass pass_duplicate_computed_gotos = +namespace { + +const pass_data pass_data_duplicate_computed_gotos = { - { - RTL_PASS, - "compgotos", /* name */ - OPTGROUP_NONE, /* optinfo_flags */ - gate_duplicate_computed_gotos, /* gate */ - duplicate_computed_gotos, /* execute */ - NULL, /* sub */ - NULL, /* next */ - 0, /* static_pass_number */ - TV_REORDER_BLOCKS, /* tv_id */ - 0, /* properties_required */ - 0, /* properties_provided */ - 0, /* properties_destroyed */ - 0, /* todo_flags_start */ - TODO_verify_rtl_sharing,/* todo_flags_finish */ - } + RTL_PASS, /* type */ + "compgotos", /* name */ + OPTGROUP_NONE, /* optinfo_flags */ + true, /* has_gate */ + true, /* has_execute */ + TV_REORDER_BLOCKS, /* tv_id */ + 0, /* properties_required */ + 0, /* properties_provided */ + 0, /* properties_destroyed */ + 0, /* todo_flags_start */ + TODO_verify_rtl_sharing, /* todo_flags_finish */ }; +class pass_duplicate_computed_gotos : public rtl_opt_pass +{ +public: + pass_duplicate_computed_gotos(gcc::context *ctxt) + : rtl_opt_pass(pass_data_duplicate_computed_gotos, ctxt) + {} + + /* opt_pass methods: */ + bool gate () { return gate_duplicate_computed_gotos (); } + unsigned int execute () { return duplicate_computed_gotos (); } + +}; // class pass_duplicate_computed_gotos + +} // anon namespace + +rtl_opt_pass * +make_pass_duplicate_computed_gotos (gcc::context *ctxt) +{ + return new pass_duplicate_computed_gotos (ctxt); +} + static bool gate_handle_partition_blocks (void) { @@ -2533,22 +2699,40 @@ partition_hot_cold_basic_blocks (void) return TODO_verify_flow | TODO_verify_rtl_sharing; } -struct rtl_opt_pass pass_partition_blocks = +namespace { + +const pass_data pass_data_partition_blocks = { - { - RTL_PASS, - "bbpart", /* name */ - OPTGROUP_NONE, /* optinfo_flags */ - gate_handle_partition_blocks, /* gate */ - partition_hot_cold_basic_blocks, /* execute */ - NULL, /* sub */ - NULL, /* next */ - 0, /* static_pass_number */ - TV_REORDER_BLOCKS, /* tv_id */ - PROP_cfglayout, /* properties_required */ - 0, /* properties_provided */ - 0, /* properties_destroyed */ - 0, /* todo_flags_start */ - 0 /* todo_flags_finish */ - } + RTL_PASS, /* type */ + "bbpart", /* name */ + OPTGROUP_NONE, /* optinfo_flags */ + true, /* has_gate */ + true, /* has_execute */ + TV_REORDER_BLOCKS, /* tv_id */ + PROP_cfglayout, /* properties_required */ + 0, /* properties_provided */ + 0, /* properties_destroyed */ + 0, /* todo_flags_start */ + 0, /* todo_flags_finish */ }; + +class pass_partition_blocks : public rtl_opt_pass +{ +public: + pass_partition_blocks(gcc::context *ctxt) + : rtl_opt_pass(pass_data_partition_blocks, ctxt) + {} + + /* opt_pass methods: */ + bool gate () { return gate_handle_partition_blocks (); } + unsigned int execute () { return partition_hot_cold_basic_blocks (); } + +}; // class pass_partition_blocks + +} // anon namespace + +rtl_opt_pass * +make_pass_partition_blocks (gcc::context *ctxt) +{ + return new pass_partition_blocks (ctxt); +} diff --git a/gcc/bt-load.c b/gcc/bt-load.c index 9ca1bd98dcd..b53435680ec 100644 --- a/gcc/bt-load.c +++ b/gcc/bt-load.c @@ -1504,26 +1504,46 @@ rest_of_handle_branch_target_load_optimize1 (void) return 0; } -struct rtl_opt_pass pass_branch_target_load_optimize1 = +namespace { + +const pass_data pass_data_branch_target_load_optimize1 = { - { - RTL_PASS, - "btl1", /* name */ - OPTGROUP_NONE, /* optinfo_flags */ - gate_handle_branch_target_load_optimize1, /* gate */ - rest_of_handle_branch_target_load_optimize1, /* execute */ - NULL, /* sub */ - NULL, /* next */ - 0, /* static_pass_number */ - TV_NONE, /* tv_id */ - 0, /* properties_required */ - 0, /* properties_provided */ - 0, /* properties_destroyed */ - 0, /* todo_flags_start */ - TODO_verify_rtl_sharing, /* todo_flags_finish */ - } + RTL_PASS, /* type */ + "btl1", /* name */ + OPTGROUP_NONE, /* optinfo_flags */ + true, /* has_gate */ + true, /* has_execute */ + TV_NONE, /* tv_id */ + 0, /* properties_required */ + 0, /* properties_provided */ + 0, /* properties_destroyed */ + 0, /* todo_flags_start */ + TODO_verify_rtl_sharing, /* todo_flags_finish */ }; +class pass_branch_target_load_optimize1 : public rtl_opt_pass +{ +public: + pass_branch_target_load_optimize1(gcc::context *ctxt) + : rtl_opt_pass(pass_data_branch_target_load_optimize1, ctxt) + {} + + /* opt_pass methods: */ + bool gate () { return gate_handle_branch_target_load_optimize1 (); } + unsigned int execute () { + return rest_of_handle_branch_target_load_optimize1 (); + } + +}; // class pass_branch_target_load_optimize1 + +} // anon namespace + +rtl_opt_pass * +make_pass_branch_target_load_optimize1 (gcc::context *ctxt) +{ + return new pass_branch_target_load_optimize1 (ctxt); +} + static bool gate_handle_branch_target_load_optimize2 (void) { @@ -1553,22 +1573,42 @@ rest_of_handle_branch_target_load_optimize2 (void) return 0; } -struct rtl_opt_pass pass_branch_target_load_optimize2 = +namespace { + +const pass_data pass_data_branch_target_load_optimize2 = { - { - RTL_PASS, - "btl2", /* name */ - OPTGROUP_NONE, /* optinfo_flags */ - gate_handle_branch_target_load_optimize2, /* gate */ - rest_of_handle_branch_target_load_optimize2, /* execute */ - NULL, /* sub */ - NULL, /* next */ - 0, /* static_pass_number */ - TV_NONE, /* tv_id */ - 0, /* properties_required */ - 0, /* properties_provided */ - 0, /* properties_destroyed */ - 0, /* todo_flags_start */ - 0, /* todo_flags_finish */ - } + RTL_PASS, /* type */ + "btl2", /* name */ + OPTGROUP_NONE, /* optinfo_flags */ + true, /* has_gate */ + true, /* has_execute */ + TV_NONE, /* tv_id */ + 0, /* properties_required */ + 0, /* properties_provided */ + 0, /* properties_destroyed */ + 0, /* todo_flags_start */ + 0, /* todo_flags_finish */ }; + +class pass_branch_target_load_optimize2 : public rtl_opt_pass +{ +public: + pass_branch_target_load_optimize2(gcc::context *ctxt) + : rtl_opt_pass(pass_data_branch_target_load_optimize2, ctxt) + {} + + /* opt_pass methods: */ + bool gate () { return gate_handle_branch_target_load_optimize2 (); } + unsigned int execute () { + return rest_of_handle_branch_target_load_optimize2 (); + } + +}; // class pass_branch_target_load_optimize2 + +} // anon namespace + +rtl_opt_pass * +make_pass_branch_target_load_optimize2 (gcc::context *ctxt) +{ + return new pass_branch_target_load_optimize2 (ctxt); +} diff --git a/gcc/builtin-attrs.def b/gcc/builtin-attrs.def index dcaeee9e68d..7939727015a 100644 --- a/gcc/builtin-attrs.def +++ b/gcc/builtin-attrs.def @@ -83,6 +83,7 @@ DEF_LIST_INT_INT (5,6) #undef DEF_LIST_INT_INT /* Construct trees for identifiers. */ +DEF_ATTR_IDENT (ATTR_COLD, "cold") DEF_ATTR_IDENT (ATTR_CONST, "const") DEF_ATTR_IDENT (ATTR_FORMAT, "format") DEF_ATTR_IDENT (ATTR_FORMAT_ARG, "format_arg") @@ -130,6 +131,10 @@ DEF_ATTR_TREE_LIST (ATTR_NORETURN_NOTHROW_LIST, ATTR_NORETURN, \ ATTR_NULL, ATTR_NOTHROW_LIST) DEF_ATTR_TREE_LIST (ATTR_NORETURN_NOTHROW_LEAF_LIST, ATTR_NORETURN,\ ATTR_NULL, ATTR_NOTHROW_LEAF_LIST) +DEF_ATTR_TREE_LIST (ATTR_COLD_NOTHROW_LEAF_LIST, ATTR_COLD,\ + ATTR_NULL, ATTR_NOTHROW_LEAF_LIST) +DEF_ATTR_TREE_LIST (ATTR_COLD_NORETURN_NOTHROW_LEAF_LIST, ATTR_COLD,\ + ATTR_NULL, ATTR_NORETURN_NOTHROW_LEAF_LIST) DEF_ATTR_TREE_LIST (ATTR_CONST_NORETURN_NOTHROW_LEAF_LIST, ATTR_CONST,\ ATTR_NULL, ATTR_NORETURN_NOTHROW_LEAF_LIST) DEF_ATTR_TREE_LIST (ATTR_MALLOC_NOTHROW_LIST, ATTR_MALLOC, \ diff --git a/gcc/builtins.c b/gcc/builtins.c index 78b0d842cc0..bb44a7f9b01 100644 --- a/gcc/builtins.c +++ b/gcc/builtins.c @@ -48,6 +48,7 @@ along with GCC; see the file COPYING3. If not see #include "value-prof.h" #include "diagnostic-core.h" #include "builtins.h" +#include "ubsan.h" #ifndef PAD_VARARGS_DOWN @@ -61,7 +62,7 @@ struct target_builtins *this_target_builtins = &default_target_builtins; #endif /* Define the names of the builtin function types and codes. */ -const char *const built_in_class_names[4] +const char *const built_in_class_names[BUILT_IN_LAST] = {"NOT_BUILT_IN", "BUILT_IN_FRONTEND", "BUILT_IN_MD", "BUILT_IN_NORMAL"}; #define DEF_BUILTIN(X, N, C, T, LT, B, F, NA, AT, IM, COND) #X, @@ -249,6 +250,30 @@ is_builtin_fn (tree decl) return TREE_CODE (decl) == FUNCTION_DECL && DECL_BUILT_IN (decl); } +/* By default we assume that c99 functions are present at the runtime, + but sincos is not. */ +bool +default_libc_has_function (enum function_class fn_class) +{ + if (fn_class == function_c94 + || fn_class == function_c99_misc + || fn_class == function_c99_math_complex) + return true; + + return false; +} + +bool +gnu_libc_has_function (enum function_class fn_class ATTRIBUTE_UNUSED) +{ + return true; +} + +bool +no_c99_libc_has_function (enum function_class fn_class ATTRIBUTE_UNUSED) +{ + return false; +} /* Return true if NODE should be considered for inline expansion regardless of the optimization level. This means whenever a function is invoked with @@ -2548,7 +2573,7 @@ expand_builtin_cexpi (tree exp, rtx target) /* Compute into op1 and op2. */ expand_twoval_unop (sincos_optab, op0, op2, op1, 0); } - else if (TARGET_HAS_SINCOS) + else if (targetm.libc_has_function (function_sincos)) { tree call, fn = NULL_TREE; tree top1, top2; @@ -5826,6 +5851,13 @@ expand_builtin (tree exp, rtx target, rtx subtarget, enum machine_mode mode, set of builtins. */ if (!optimize && !called_as_built_in (fndecl) + && fcode != BUILT_IN_FORK + && fcode != BUILT_IN_EXECL + && fcode != BUILT_IN_EXECV + && fcode != BUILT_IN_EXECLP + && fcode != BUILT_IN_EXECLE + && fcode != BUILT_IN_EXECVP + && fcode != BUILT_IN_EXECVE && fcode != BUILT_IN_ALLOCA && fcode != BUILT_IN_ALLOCA_WITH_ALIGN && fcode != BUILT_IN_FREE) @@ -5865,6 +5897,9 @@ expand_builtin (tree exp, rtx target, rtx subtarget, enum machine_mode mode, switch (fcode) { CASE_FLT_FN (BUILT_IN_FABS): + case BUILT_IN_FABSD32: + case BUILT_IN_FABSD64: + case BUILT_IN_FABSD128: target = expand_builtin_fabs (exp, target, subtarget); if (target) return target; @@ -7810,7 +7845,7 @@ fold_builtin_sincos (location_t loc, return res; /* Canonicalize sincos to cexpi. */ - if (!TARGET_C99_FUNCTIONS) + if (!targetm.libc_has_function (function_c99_math_complex)) return NULL_TREE; fn = mathfn_built_in (type, BUILT_IN_CEXPI); if (!fn) @@ -7850,7 +7885,7 @@ fold_builtin_cexp (location_t loc, tree arg0, tree type) /* In case we can figure out the real part of arg0 and it is constant zero fold to cexpi. */ - if (!TARGET_C99_FUNCTIONS) + if (!targetm.libc_has_function (function_c99_math_complex)) return NULL_TREE; ifn = mathfn_built_in (rtype, BUILT_IN_CEXPI); if (!ifn) @@ -8106,14 +8141,13 @@ fold_builtin_bitop (tree fndecl, tree arg) { hi = TREE_INT_CST_HIGH (arg); if (width < HOST_BITS_PER_DOUBLE_INT) - hi &= ~((unsigned HOST_WIDE_INT) (-1) - << (width - HOST_BITS_PER_WIDE_INT)); + hi &= ~(HOST_WIDE_INT_M1U << (width - HOST_BITS_PER_WIDE_INT)); } else { hi = 0; if (width < HOST_BITS_PER_WIDE_INT) - lo &= ~((unsigned HOST_WIDE_INT) (-1) << width); + lo &= ~(HOST_WIDE_INT_M1U << width); } switch (DECL_FUNCTION_CODE (fndecl)) @@ -8152,13 +8186,13 @@ fold_builtin_bitop (tree fndecl, tree arg) && (hi & ((unsigned HOST_WIDE_INT) 1 << (width - HOST_BITS_PER_WIDE_INT - 1))) != 0) { - hi = ~hi & ~((unsigned HOST_WIDE_INT) (-1) + hi = ~hi & ~(HOST_WIDE_INT_M1U << (width - HOST_BITS_PER_WIDE_INT - 1)); lo = ~lo; } else if (width <= HOST_BITS_PER_WIDE_INT && (lo & ((unsigned HOST_WIDE_INT) 1 << (width - 1))) != 0) - lo = ~lo & ~((unsigned HOST_WIDE_INT) (-1) << (width - 1)); + lo = ~lo & ~(HOST_WIDE_INT_M1U << (width - 1)); if (hi != 0) result = width - floor_log2 (hi) - 2 - HOST_BITS_PER_WIDE_INT; else if (lo != 0) @@ -10277,6 +10311,11 @@ fold_builtin_0 (location_t loc, tree fndecl, bool ignore ATTRIBUTE_UNUSED) case BUILT_IN_CLASSIFY_TYPE: return fold_builtin_classify_type (NULL_TREE); + case BUILT_IN_UNREACHABLE: + if (flag_sanitize & SANITIZE_UNREACHABLE) + return ubsan_instrument_unreachable (loc); + break; + default: break; } @@ -10314,6 +10353,9 @@ fold_builtin_1 (location_t loc, tree fndecl, tree arg0, bool ignore) return fold_builtin_strlen (loc, type, arg0); CASE_FLT_FN (BUILT_IN_FABS): + case BUILT_IN_FABSD32: + case BUILT_IN_FABSD64: + case BUILT_IN_FABSD128: return fold_builtin_fabs (loc, arg0, type); case BUILT_IN_ABS: diff --git a/gcc/builtins.def b/gcc/builtins.def index 9b55b1f7a96..8ccf3ae3578 100644 --- a/gcc/builtins.def +++ b/gcc/builtins.def @@ -102,14 +102,20 @@ along with GCC; see the file COPYING3. If not see #undef DEF_C94_BUILTIN #define DEF_C94_BUILTIN(ENUM, NAME, TYPE, ATTRS) \ DEF_BUILTIN (ENUM, "__builtin_" NAME, BUILT_IN_NORMAL, TYPE, TYPE, \ - true, true, !flag_isoc94, ATTRS, TARGET_C99_FUNCTIONS, true) + true, true, !flag_isoc94, ATTRS, targetm.libc_has_function (function_c94), true) /* Like DEF_LIB_BUILTIN, except that the function is only a part of the standard in C99 or above. */ #undef DEF_C99_BUILTIN #define DEF_C99_BUILTIN(ENUM, NAME, TYPE, ATTRS) \ DEF_BUILTIN (ENUM, "__builtin_" NAME, BUILT_IN_NORMAL, TYPE, TYPE, \ - true, true, !flag_isoc99, ATTRS, TARGET_C99_FUNCTIONS, true) + true, true, !flag_isoc99, ATTRS, targetm.libc_has_function (function_c99_misc), true) + +/* Like DEF_C99_BUILTIN, but for complex math functions. */ +#undef DEF_C99_COMPL_BUILTIN +#define DEF_C99_COMPL_BUILTIN(ENUM, NAME, TYPE, ATTRS) \ + DEF_BUILTIN (ENUM, "__builtin_" NAME, BUILT_IN_NORMAL, TYPE, TYPE, \ + true, true, !flag_isoc99, ATTRS, targetm.libc_has_function (function_c99_math_complex), true) /* Builtin that is specified by C99 and C90 reserve the name for future use. We can still recognize the builtin in C90 mode but we can't produce it @@ -117,7 +123,7 @@ along with GCC; see the file COPYING3. If not see #undef DEF_C99_C90RES_BUILTIN #define DEF_C99_C90RES_BUILTIN(ENUM, NAME, TYPE, ATTRS) \ DEF_BUILTIN (ENUM, "__builtin_" NAME, BUILT_IN_NORMAL, TYPE, TYPE, \ - true, true, !flag_isoc99, ATTRS, TARGET_C99_FUNCTIONS, true) + true, true, !flag_isoc99, ATTRS, targetm.libc_has_function (function_c99_misc), true) /* Builtin that C99 reserve the name for future use. We can still recognize the builtin in C99 mode but we can't produce it implicitly. */ @@ -155,7 +161,8 @@ along with GCC; see the file COPYING3. If not see #define DEF_SANITIZER_BUILTIN(ENUM, NAME, TYPE, ATTRS) \ DEF_BUILTIN (ENUM, "__builtin_" NAME, BUILT_IN_NORMAL, TYPE, TYPE, \ true, true, true, ATTRS, true, \ - (flag_asan || flag_tsan)) + (flag_sanitize & (SANITIZE_ADDRESS | SANITIZE_THREAD \ + | SANITIZE_UNDEFINED))) #undef DEF_CILKPLUS_BUILTIN #define DEF_CILKPLUS_BUILTIN(ENUM, NAME, TYPE, ATTRS) \ @@ -257,6 +264,9 @@ DEF_C99_BUILTIN (BUILT_IN_EXPM1L, "expm1l", BT_FN_LONGDOUBLE_LONGDOUBLE, DEF_LIB_BUILTIN (BUILT_IN_FABS, "fabs", BT_FN_DOUBLE_DOUBLE, ATTR_CONST_NOTHROW_LEAF_LIST) DEF_C99_C90RES_BUILTIN (BUILT_IN_FABSF, "fabsf", BT_FN_FLOAT_FLOAT, ATTR_CONST_NOTHROW_LEAF_LIST) DEF_C99_C90RES_BUILTIN (BUILT_IN_FABSL, "fabsl", BT_FN_LONGDOUBLE_LONGDOUBLE, ATTR_CONST_NOTHROW_LEAF_LIST) +DEF_GCC_BUILTIN (BUILT_IN_FABSD32, "fabsd32", BT_FN_DFLOAT32_DFLOAT32, ATTR_CONST_NOTHROW_LEAF_LIST) +DEF_GCC_BUILTIN (BUILT_IN_FABSD64, "fabsd64", BT_FN_DFLOAT64_DFLOAT64, ATTR_CONST_NOTHROW_LEAF_LIST) +DEF_GCC_BUILTIN (BUILT_IN_FABSD128, "fabsd128", BT_FN_DFLOAT128_DFLOAT128, ATTR_CONST_NOTHROW_LEAF_LIST) DEF_C99_BUILTIN (BUILT_IN_FDIM, "fdim", BT_FN_DOUBLE_DOUBLE_DOUBLE, ATTR_MATHFN_FPROUNDING_ERRNO) DEF_C99_BUILTIN (BUILT_IN_FDIMF, "fdimf", BT_FN_FLOAT_FLOAT_FLOAT, ATTR_MATHFN_FPROUNDING_ERRNO) DEF_C99_BUILTIN (BUILT_IN_FDIML, "fdiml", BT_FN_LONGDOUBLE_LONGDOUBLE_LONGDOUBLE, ATTR_MATHFN_FPROUNDING_ERRNO) @@ -463,78 +473,78 @@ DEF_EXT_LIB_BUILTIN (BUILT_IN_YNF, "ynf", BT_FN_FLOAT_INT_FLOAT, ATTR_MATHFN_ DEF_EXT_LIB_BUILTIN (BUILT_IN_YNL, "ynl", BT_FN_LONGDOUBLE_INT_LONGDOUBLE, ATTR_MATHFN_FPROUNDING_ERRNO) /* Category: _Complex math builtins. */ -DEF_C99_BUILTIN (BUILT_IN_CABS, "cabs", BT_FN_DOUBLE_COMPLEX_DOUBLE, ATTR_MATHFN_FPROUNDING) -DEF_C99_BUILTIN (BUILT_IN_CABSF, "cabsf", BT_FN_FLOAT_COMPLEX_FLOAT, ATTR_MATHFN_FPROUNDING) -DEF_C99_BUILTIN (BUILT_IN_CABSL, "cabsl", BT_FN_LONGDOUBLE_COMPLEX_LONGDOUBLE, ATTR_MATHFN_FPROUNDING) -DEF_C99_BUILTIN (BUILT_IN_CACOS, "cacos", BT_FN_COMPLEX_DOUBLE_COMPLEX_DOUBLE, ATTR_MATHFN_FPROUNDING) -DEF_C99_BUILTIN (BUILT_IN_CACOSF, "cacosf", BT_FN_COMPLEX_FLOAT_COMPLEX_FLOAT, ATTR_MATHFN_FPROUNDING) -DEF_C99_BUILTIN (BUILT_IN_CACOSH, "cacosh", BT_FN_COMPLEX_DOUBLE_COMPLEX_DOUBLE, ATTR_MATHFN_FPROUNDING) -DEF_C99_BUILTIN (BUILT_IN_CACOSHF, "cacoshf", BT_FN_COMPLEX_FLOAT_COMPLEX_FLOAT, ATTR_MATHFN_FPROUNDING) -DEF_C99_BUILTIN (BUILT_IN_CACOSHL, "cacoshl", BT_FN_COMPLEX_LONGDOUBLE_COMPLEX_LONGDOUBLE, ATTR_MATHFN_FPROUNDING) -DEF_C99_BUILTIN (BUILT_IN_CACOSL, "cacosl", BT_FN_COMPLEX_LONGDOUBLE_COMPLEX_LONGDOUBLE, ATTR_MATHFN_FPROUNDING) -DEF_C99_BUILTIN (BUILT_IN_CARG, "carg", BT_FN_DOUBLE_COMPLEX_DOUBLE, ATTR_MATHFN_FPROUNDING) -DEF_C99_BUILTIN (BUILT_IN_CARGF, "cargf", BT_FN_FLOAT_COMPLEX_FLOAT, ATTR_MATHFN_FPROUNDING) -DEF_C99_BUILTIN (BUILT_IN_CARGL, "cargl", BT_FN_LONGDOUBLE_COMPLEX_LONGDOUBLE, ATTR_MATHFN_FPROUNDING) -DEF_C99_BUILTIN (BUILT_IN_CASIN, "casin", BT_FN_COMPLEX_DOUBLE_COMPLEX_DOUBLE, ATTR_MATHFN_FPROUNDING) -DEF_C99_BUILTIN (BUILT_IN_CASINF, "casinf", BT_FN_COMPLEX_FLOAT_COMPLEX_FLOAT, ATTR_MATHFN_FPROUNDING) -DEF_C99_BUILTIN (BUILT_IN_CASINH, "casinh", BT_FN_COMPLEX_DOUBLE_COMPLEX_DOUBLE, ATTR_MATHFN_FPROUNDING) -DEF_C99_BUILTIN (BUILT_IN_CASINHF, "casinhf", BT_FN_COMPLEX_FLOAT_COMPLEX_FLOAT, ATTR_MATHFN_FPROUNDING) -DEF_C99_BUILTIN (BUILT_IN_CASINHL, "casinhl", BT_FN_COMPLEX_LONGDOUBLE_COMPLEX_LONGDOUBLE, ATTR_MATHFN_FPROUNDING) -DEF_C99_BUILTIN (BUILT_IN_CASINL, "casinl", BT_FN_COMPLEX_LONGDOUBLE_COMPLEX_LONGDOUBLE, ATTR_MATHFN_FPROUNDING) -DEF_C99_BUILTIN (BUILT_IN_CATAN, "catan", BT_FN_COMPLEX_DOUBLE_COMPLEX_DOUBLE, ATTR_MATHFN_FPROUNDING) -DEF_C99_BUILTIN (BUILT_IN_CATANF, "catanf", BT_FN_COMPLEX_FLOAT_COMPLEX_FLOAT, ATTR_MATHFN_FPROUNDING) -DEF_C99_BUILTIN (BUILT_IN_CATANH, "catanh", BT_FN_COMPLEX_DOUBLE_COMPLEX_DOUBLE, ATTR_MATHFN_FPROUNDING) -DEF_C99_BUILTIN (BUILT_IN_CATANHF, "catanhf", BT_FN_COMPLEX_FLOAT_COMPLEX_FLOAT, ATTR_MATHFN_FPROUNDING) -DEF_C99_BUILTIN (BUILT_IN_CATANHL, "catanhl", BT_FN_COMPLEX_LONGDOUBLE_COMPLEX_LONGDOUBLE, ATTR_MATHFN_FPROUNDING) -DEF_C99_BUILTIN (BUILT_IN_CATANL, "catanl", BT_FN_COMPLEX_LONGDOUBLE_COMPLEX_LONGDOUBLE, ATTR_MATHFN_FPROUNDING) -DEF_C99_BUILTIN (BUILT_IN_CCOS, "ccos", BT_FN_COMPLEX_DOUBLE_COMPLEX_DOUBLE, ATTR_MATHFN_FPROUNDING) -DEF_C99_BUILTIN (BUILT_IN_CCOSF, "ccosf", BT_FN_COMPLEX_FLOAT_COMPLEX_FLOAT, ATTR_MATHFN_FPROUNDING) -DEF_C99_BUILTIN (BUILT_IN_CCOSH, "ccosh", BT_FN_COMPLEX_DOUBLE_COMPLEX_DOUBLE, ATTR_MATHFN_FPROUNDING) -DEF_C99_BUILTIN (BUILT_IN_CCOSHF, "ccoshf", BT_FN_COMPLEX_FLOAT_COMPLEX_FLOAT, ATTR_MATHFN_FPROUNDING) -DEF_C99_BUILTIN (BUILT_IN_CCOSHL, "ccoshl", BT_FN_COMPLEX_LONGDOUBLE_COMPLEX_LONGDOUBLE, ATTR_MATHFN_FPROUNDING) -DEF_C99_BUILTIN (BUILT_IN_CCOSL, "ccosl", BT_FN_COMPLEX_LONGDOUBLE_COMPLEX_LONGDOUBLE, ATTR_MATHFN_FPROUNDING) -DEF_C99_BUILTIN (BUILT_IN_CEXP, "cexp", BT_FN_COMPLEX_DOUBLE_COMPLEX_DOUBLE, ATTR_MATHFN_FPROUNDING) -DEF_C99_BUILTIN (BUILT_IN_CEXPF, "cexpf", BT_FN_COMPLEX_FLOAT_COMPLEX_FLOAT, ATTR_MATHFN_FPROUNDING) -DEF_C99_BUILTIN (BUILT_IN_CEXPL, "cexpl", BT_FN_COMPLEX_LONGDOUBLE_COMPLEX_LONGDOUBLE, ATTR_MATHFN_FPROUNDING) +DEF_C99_COMPL_BUILTIN (BUILT_IN_CABS, "cabs", BT_FN_DOUBLE_COMPLEX_DOUBLE, ATTR_MATHFN_FPROUNDING) +DEF_C99_COMPL_BUILTIN (BUILT_IN_CABSF, "cabsf", BT_FN_FLOAT_COMPLEX_FLOAT, ATTR_MATHFN_FPROUNDING) +DEF_C99_COMPL_BUILTIN (BUILT_IN_CABSL, "cabsl", BT_FN_LONGDOUBLE_COMPLEX_LONGDOUBLE, ATTR_MATHFN_FPROUNDING) +DEF_C99_COMPL_BUILTIN (BUILT_IN_CACOS, "cacos", BT_FN_COMPLEX_DOUBLE_COMPLEX_DOUBLE, ATTR_MATHFN_FPROUNDING) +DEF_C99_COMPL_BUILTIN (BUILT_IN_CACOSF, "cacosf", BT_FN_COMPLEX_FLOAT_COMPLEX_FLOAT, ATTR_MATHFN_FPROUNDING) +DEF_C99_COMPL_BUILTIN (BUILT_IN_CACOSH, "cacosh", BT_FN_COMPLEX_DOUBLE_COMPLEX_DOUBLE, ATTR_MATHFN_FPROUNDING) +DEF_C99_COMPL_BUILTIN (BUILT_IN_CACOSHF, "cacoshf", BT_FN_COMPLEX_FLOAT_COMPLEX_FLOAT, ATTR_MATHFN_FPROUNDING) +DEF_C99_COMPL_BUILTIN (BUILT_IN_CACOSHL, "cacoshl", BT_FN_COMPLEX_LONGDOUBLE_COMPLEX_LONGDOUBLE, ATTR_MATHFN_FPROUNDING) +DEF_C99_COMPL_BUILTIN (BUILT_IN_CACOSL, "cacosl", BT_FN_COMPLEX_LONGDOUBLE_COMPLEX_LONGDOUBLE, ATTR_MATHFN_FPROUNDING) +DEF_C99_COMPL_BUILTIN (BUILT_IN_CARG, "carg", BT_FN_DOUBLE_COMPLEX_DOUBLE, ATTR_MATHFN_FPROUNDING) +DEF_C99_COMPL_BUILTIN (BUILT_IN_CARGF, "cargf", BT_FN_FLOAT_COMPLEX_FLOAT, ATTR_MATHFN_FPROUNDING) +DEF_C99_COMPL_BUILTIN (BUILT_IN_CARGL, "cargl", BT_FN_LONGDOUBLE_COMPLEX_LONGDOUBLE, ATTR_MATHFN_FPROUNDING) +DEF_C99_COMPL_BUILTIN (BUILT_IN_CASIN, "casin", BT_FN_COMPLEX_DOUBLE_COMPLEX_DOUBLE, ATTR_MATHFN_FPROUNDING) +DEF_C99_COMPL_BUILTIN (BUILT_IN_CASINF, "casinf", BT_FN_COMPLEX_FLOAT_COMPLEX_FLOAT, ATTR_MATHFN_FPROUNDING) +DEF_C99_COMPL_BUILTIN (BUILT_IN_CASINH, "casinh", BT_FN_COMPLEX_DOUBLE_COMPLEX_DOUBLE, ATTR_MATHFN_FPROUNDING) +DEF_C99_COMPL_BUILTIN (BUILT_IN_CASINHF, "casinhf", BT_FN_COMPLEX_FLOAT_COMPLEX_FLOAT, ATTR_MATHFN_FPROUNDING) +DEF_C99_COMPL_BUILTIN (BUILT_IN_CASINHL, "casinhl", BT_FN_COMPLEX_LONGDOUBLE_COMPLEX_LONGDOUBLE, ATTR_MATHFN_FPROUNDING) +DEF_C99_COMPL_BUILTIN (BUILT_IN_CASINL, "casinl", BT_FN_COMPLEX_LONGDOUBLE_COMPLEX_LONGDOUBLE, ATTR_MATHFN_FPROUNDING) +DEF_C99_COMPL_BUILTIN (BUILT_IN_CATAN, "catan", BT_FN_COMPLEX_DOUBLE_COMPLEX_DOUBLE, ATTR_MATHFN_FPROUNDING) +DEF_C99_COMPL_BUILTIN (BUILT_IN_CATANF, "catanf", BT_FN_COMPLEX_FLOAT_COMPLEX_FLOAT, ATTR_MATHFN_FPROUNDING) +DEF_C99_COMPL_BUILTIN (BUILT_IN_CATANH, "catanh", BT_FN_COMPLEX_DOUBLE_COMPLEX_DOUBLE, ATTR_MATHFN_FPROUNDING) +DEF_C99_COMPL_BUILTIN (BUILT_IN_CATANHF, "catanhf", BT_FN_COMPLEX_FLOAT_COMPLEX_FLOAT, ATTR_MATHFN_FPROUNDING) +DEF_C99_COMPL_BUILTIN (BUILT_IN_CATANHL, "catanhl", BT_FN_COMPLEX_LONGDOUBLE_COMPLEX_LONGDOUBLE, ATTR_MATHFN_FPROUNDING) +DEF_C99_COMPL_BUILTIN (BUILT_IN_CATANL, "catanl", BT_FN_COMPLEX_LONGDOUBLE_COMPLEX_LONGDOUBLE, ATTR_MATHFN_FPROUNDING) +DEF_C99_COMPL_BUILTIN (BUILT_IN_CCOS, "ccos", BT_FN_COMPLEX_DOUBLE_COMPLEX_DOUBLE, ATTR_MATHFN_FPROUNDING) +DEF_C99_COMPL_BUILTIN (BUILT_IN_CCOSF, "ccosf", BT_FN_COMPLEX_FLOAT_COMPLEX_FLOAT, ATTR_MATHFN_FPROUNDING) +DEF_C99_COMPL_BUILTIN (BUILT_IN_CCOSH, "ccosh", BT_FN_COMPLEX_DOUBLE_COMPLEX_DOUBLE, ATTR_MATHFN_FPROUNDING) +DEF_C99_COMPL_BUILTIN (BUILT_IN_CCOSHF, "ccoshf", BT_FN_COMPLEX_FLOAT_COMPLEX_FLOAT, ATTR_MATHFN_FPROUNDING) +DEF_C99_COMPL_BUILTIN (BUILT_IN_CCOSHL, "ccoshl", BT_FN_COMPLEX_LONGDOUBLE_COMPLEX_LONGDOUBLE, ATTR_MATHFN_FPROUNDING) +DEF_C99_COMPL_BUILTIN (BUILT_IN_CCOSL, "ccosl", BT_FN_COMPLEX_LONGDOUBLE_COMPLEX_LONGDOUBLE, ATTR_MATHFN_FPROUNDING) +DEF_C99_COMPL_BUILTIN (BUILT_IN_CEXP, "cexp", BT_FN_COMPLEX_DOUBLE_COMPLEX_DOUBLE, ATTR_MATHFN_FPROUNDING) +DEF_C99_COMPL_BUILTIN (BUILT_IN_CEXPF, "cexpf", BT_FN_COMPLEX_FLOAT_COMPLEX_FLOAT, ATTR_MATHFN_FPROUNDING) +DEF_C99_COMPL_BUILTIN (BUILT_IN_CEXPL, "cexpl", BT_FN_COMPLEX_LONGDOUBLE_COMPLEX_LONGDOUBLE, ATTR_MATHFN_FPROUNDING) DEF_GCC_BUILTIN (BUILT_IN_CEXPI, "cexpi", BT_FN_COMPLEX_DOUBLE_DOUBLE, ATTR_MATHFN_FPROUNDING) DEF_GCC_BUILTIN (BUILT_IN_CEXPIF, "cexpif", BT_FN_COMPLEX_FLOAT_FLOAT, ATTR_MATHFN_FPROUNDING) DEF_GCC_BUILTIN (BUILT_IN_CEXPIL, "cexpil", BT_FN_COMPLEX_LONGDOUBLE_LONGDOUBLE, ATTR_MATHFN_FPROUNDING) -DEF_C99_BUILTIN (BUILT_IN_CIMAG, "cimag", BT_FN_DOUBLE_COMPLEX_DOUBLE, ATTR_CONST_NOTHROW_LEAF_LIST) -DEF_C99_BUILTIN (BUILT_IN_CIMAGF, "cimagf", BT_FN_FLOAT_COMPLEX_FLOAT, ATTR_CONST_NOTHROW_LEAF_LIST) -DEF_C99_BUILTIN (BUILT_IN_CIMAGL, "cimagl", BT_FN_LONGDOUBLE_COMPLEX_LONGDOUBLE, ATTR_CONST_NOTHROW_LEAF_LIST) -DEF_C99_BUILTIN (BUILT_IN_CLOG, "clog", BT_FN_COMPLEX_DOUBLE_COMPLEX_DOUBLE, ATTR_MATHFN_FPROUNDING) -DEF_C99_BUILTIN (BUILT_IN_CLOGF, "clogf", BT_FN_COMPLEX_FLOAT_COMPLEX_FLOAT, ATTR_MATHFN_FPROUNDING) -DEF_C99_BUILTIN (BUILT_IN_CLOGL, "clogl", BT_FN_COMPLEX_LONGDOUBLE_COMPLEX_LONGDOUBLE, ATTR_MATHFN_FPROUNDING) +DEF_C99_COMPL_BUILTIN (BUILT_IN_CIMAG, "cimag", BT_FN_DOUBLE_COMPLEX_DOUBLE, ATTR_CONST_NOTHROW_LEAF_LIST) +DEF_C99_COMPL_BUILTIN (BUILT_IN_CIMAGF, "cimagf", BT_FN_FLOAT_COMPLEX_FLOAT, ATTR_CONST_NOTHROW_LEAF_LIST) +DEF_C99_COMPL_BUILTIN (BUILT_IN_CIMAGL, "cimagl", BT_FN_LONGDOUBLE_COMPLEX_LONGDOUBLE, ATTR_CONST_NOTHROW_LEAF_LIST) +DEF_C99_COMPL_BUILTIN (BUILT_IN_CLOG, "clog", BT_FN_COMPLEX_DOUBLE_COMPLEX_DOUBLE, ATTR_MATHFN_FPROUNDING) +DEF_C99_COMPL_BUILTIN (BUILT_IN_CLOGF, "clogf", BT_FN_COMPLEX_FLOAT_COMPLEX_FLOAT, ATTR_MATHFN_FPROUNDING) +DEF_C99_COMPL_BUILTIN (BUILT_IN_CLOGL, "clogl", BT_FN_COMPLEX_LONGDOUBLE_COMPLEX_LONGDOUBLE, ATTR_MATHFN_FPROUNDING) DEF_EXT_C99RES_BUILTIN (BUILT_IN_CLOG10, "clog10", BT_FN_COMPLEX_DOUBLE_COMPLEX_DOUBLE, ATTR_MATHFN_FPROUNDING) DEF_EXT_C99RES_BUILTIN (BUILT_IN_CLOG10F, "clog10f", BT_FN_COMPLEX_FLOAT_COMPLEX_FLOAT, ATTR_MATHFN_FPROUNDING) DEF_EXT_C99RES_BUILTIN (BUILT_IN_CLOG10L, "clog10l", BT_FN_COMPLEX_LONGDOUBLE_COMPLEX_LONGDOUBLE, ATTR_MATHFN_FPROUNDING) -DEF_C99_BUILTIN (BUILT_IN_CONJ, "conj", BT_FN_COMPLEX_DOUBLE_COMPLEX_DOUBLE, ATTR_CONST_NOTHROW_LEAF_LIST) -DEF_C99_BUILTIN (BUILT_IN_CONJF, "conjf", BT_FN_COMPLEX_FLOAT_COMPLEX_FLOAT, ATTR_CONST_NOTHROW_LEAF_LIST) -DEF_C99_BUILTIN (BUILT_IN_CONJL, "conjl", BT_FN_COMPLEX_LONGDOUBLE_COMPLEX_LONGDOUBLE, ATTR_CONST_NOTHROW_LEAF_LIST) -DEF_C99_BUILTIN (BUILT_IN_CPOW, "cpow", BT_FN_COMPLEX_DOUBLE_COMPLEX_DOUBLE_COMPLEX_DOUBLE, ATTR_MATHFN_FPROUNDING) -DEF_C99_BUILTIN (BUILT_IN_CPOWF, "cpowf", BT_FN_COMPLEX_FLOAT_COMPLEX_FLOAT_COMPLEX_FLOAT, ATTR_MATHFN_FPROUNDING) -DEF_C99_BUILTIN (BUILT_IN_CPOWL, "cpowl", BT_FN_COMPLEX_LONGDOUBLE_COMPLEX_LONGDOUBLE_COMPLEX_LONGDOUBLE, ATTR_MATHFN_FPROUNDING) -DEF_C99_BUILTIN (BUILT_IN_CPROJ, "cproj", BT_FN_COMPLEX_DOUBLE_COMPLEX_DOUBLE, ATTR_CONST_NOTHROW_LEAF_LIST) -DEF_C99_BUILTIN (BUILT_IN_CPROJF, "cprojf", BT_FN_COMPLEX_FLOAT_COMPLEX_FLOAT, ATTR_CONST_NOTHROW_LEAF_LIST) -DEF_C99_BUILTIN (BUILT_IN_CPROJL, "cprojl", BT_FN_COMPLEX_LONGDOUBLE_COMPLEX_LONGDOUBLE, ATTR_CONST_NOTHROW_LEAF_LIST) -DEF_C99_BUILTIN (BUILT_IN_CREAL, "creal", BT_FN_DOUBLE_COMPLEX_DOUBLE, ATTR_CONST_NOTHROW_LEAF_LIST) -DEF_C99_BUILTIN (BUILT_IN_CREALF, "crealf", BT_FN_FLOAT_COMPLEX_FLOAT, ATTR_CONST_NOTHROW_LEAF_LIST) -DEF_C99_BUILTIN (BUILT_IN_CREALL, "creall", BT_FN_LONGDOUBLE_COMPLEX_LONGDOUBLE, ATTR_CONST_NOTHROW_LEAF_LIST) -DEF_C99_BUILTIN (BUILT_IN_CSIN, "csin", BT_FN_COMPLEX_DOUBLE_COMPLEX_DOUBLE, ATTR_MATHFN_FPROUNDING) -DEF_C99_BUILTIN (BUILT_IN_CSINF, "csinf", BT_FN_COMPLEX_FLOAT_COMPLEX_FLOAT, ATTR_MATHFN_FPROUNDING) -DEF_C99_BUILTIN (BUILT_IN_CSINH, "csinh", BT_FN_COMPLEX_DOUBLE_COMPLEX_DOUBLE, ATTR_MATHFN_FPROUNDING) -DEF_C99_BUILTIN (BUILT_IN_CSINHF, "csinhf", BT_FN_COMPLEX_FLOAT_COMPLEX_FLOAT, ATTR_MATHFN_FPROUNDING) -DEF_C99_BUILTIN (BUILT_IN_CSINHL, "csinhl", BT_FN_COMPLEX_LONGDOUBLE_COMPLEX_LONGDOUBLE, ATTR_MATHFN_FPROUNDING) -DEF_C99_BUILTIN (BUILT_IN_CSINL, "csinl", BT_FN_COMPLEX_LONGDOUBLE_COMPLEX_LONGDOUBLE, ATTR_MATHFN_FPROUNDING) -DEF_C99_BUILTIN (BUILT_IN_CSQRT, "csqrt", BT_FN_COMPLEX_DOUBLE_COMPLEX_DOUBLE, ATTR_MATHFN_FPROUNDING) -DEF_C99_BUILTIN (BUILT_IN_CSQRTF, "csqrtf", BT_FN_COMPLEX_FLOAT_COMPLEX_FLOAT, ATTR_MATHFN_FPROUNDING) -DEF_C99_BUILTIN (BUILT_IN_CSQRTL, "csqrtl", BT_FN_COMPLEX_LONGDOUBLE_COMPLEX_LONGDOUBLE, ATTR_MATHFN_FPROUNDING) -DEF_C99_BUILTIN (BUILT_IN_CTAN, "ctan", BT_FN_COMPLEX_DOUBLE_COMPLEX_DOUBLE, ATTR_MATHFN_FPROUNDING) -DEF_C99_BUILTIN (BUILT_IN_CTANF, "ctanf", BT_FN_COMPLEX_FLOAT_COMPLEX_FLOAT, ATTR_MATHFN_FPROUNDING) -DEF_C99_BUILTIN (BUILT_IN_CTANH, "ctanh", BT_FN_COMPLEX_DOUBLE_COMPLEX_DOUBLE, ATTR_MATHFN_FPROUNDING) -DEF_C99_BUILTIN (BUILT_IN_CTANHF, "ctanhf", BT_FN_COMPLEX_FLOAT_COMPLEX_FLOAT, ATTR_MATHFN_FPROUNDING) -DEF_C99_BUILTIN (BUILT_IN_CTANHL, "ctanhl", BT_FN_COMPLEX_LONGDOUBLE_COMPLEX_LONGDOUBLE, ATTR_MATHFN_FPROUNDING) -DEF_C99_BUILTIN (BUILT_IN_CTANL, "ctanl", BT_FN_COMPLEX_LONGDOUBLE_COMPLEX_LONGDOUBLE, ATTR_MATHFN_FPROUNDING) +DEF_C99_COMPL_BUILTIN (BUILT_IN_CONJ, "conj", BT_FN_COMPLEX_DOUBLE_COMPLEX_DOUBLE, ATTR_CONST_NOTHROW_LEAF_LIST) +DEF_C99_COMPL_BUILTIN (BUILT_IN_CONJF, "conjf", BT_FN_COMPLEX_FLOAT_COMPLEX_FLOAT, ATTR_CONST_NOTHROW_LEAF_LIST) +DEF_C99_COMPL_BUILTIN (BUILT_IN_CONJL, "conjl", BT_FN_COMPLEX_LONGDOUBLE_COMPLEX_LONGDOUBLE, ATTR_CONST_NOTHROW_LEAF_LIST) +DEF_C99_COMPL_BUILTIN (BUILT_IN_CPOW, "cpow", BT_FN_COMPLEX_DOUBLE_COMPLEX_DOUBLE_COMPLEX_DOUBLE, ATTR_MATHFN_FPROUNDING) +DEF_C99_COMPL_BUILTIN (BUILT_IN_CPOWF, "cpowf", BT_FN_COMPLEX_FLOAT_COMPLEX_FLOAT_COMPLEX_FLOAT, ATTR_MATHFN_FPROUNDING) +DEF_C99_COMPL_BUILTIN (BUILT_IN_CPOWL, "cpowl", BT_FN_COMPLEX_LONGDOUBLE_COMPLEX_LONGDOUBLE_COMPLEX_LONGDOUBLE, ATTR_MATHFN_FPROUNDING) +DEF_C99_COMPL_BUILTIN (BUILT_IN_CPROJ, "cproj", BT_FN_COMPLEX_DOUBLE_COMPLEX_DOUBLE, ATTR_CONST_NOTHROW_LEAF_LIST) +DEF_C99_COMPL_BUILTIN (BUILT_IN_CPROJF, "cprojf", BT_FN_COMPLEX_FLOAT_COMPLEX_FLOAT, ATTR_CONST_NOTHROW_LEAF_LIST) +DEF_C99_COMPL_BUILTIN (BUILT_IN_CPROJL, "cprojl", BT_FN_COMPLEX_LONGDOUBLE_COMPLEX_LONGDOUBLE, ATTR_CONST_NOTHROW_LEAF_LIST) +DEF_C99_COMPL_BUILTIN (BUILT_IN_CREAL, "creal", BT_FN_DOUBLE_COMPLEX_DOUBLE, ATTR_CONST_NOTHROW_LEAF_LIST) +DEF_C99_COMPL_BUILTIN (BUILT_IN_CREALF, "crealf", BT_FN_FLOAT_COMPLEX_FLOAT, ATTR_CONST_NOTHROW_LEAF_LIST) +DEF_C99_COMPL_BUILTIN (BUILT_IN_CREALL, "creall", BT_FN_LONGDOUBLE_COMPLEX_LONGDOUBLE, ATTR_CONST_NOTHROW_LEAF_LIST) +DEF_C99_COMPL_BUILTIN (BUILT_IN_CSIN, "csin", BT_FN_COMPLEX_DOUBLE_COMPLEX_DOUBLE, ATTR_MATHFN_FPROUNDING) +DEF_C99_COMPL_BUILTIN (BUILT_IN_CSINF, "csinf", BT_FN_COMPLEX_FLOAT_COMPLEX_FLOAT, ATTR_MATHFN_FPROUNDING) +DEF_C99_COMPL_BUILTIN (BUILT_IN_CSINH, "csinh", BT_FN_COMPLEX_DOUBLE_COMPLEX_DOUBLE, ATTR_MATHFN_FPROUNDING) +DEF_C99_COMPL_BUILTIN (BUILT_IN_CSINHF, "csinhf", BT_FN_COMPLEX_FLOAT_COMPLEX_FLOAT, ATTR_MATHFN_FPROUNDING) +DEF_C99_COMPL_BUILTIN (BUILT_IN_CSINHL, "csinhl", BT_FN_COMPLEX_LONGDOUBLE_COMPLEX_LONGDOUBLE, ATTR_MATHFN_FPROUNDING) +DEF_C99_COMPL_BUILTIN (BUILT_IN_CSINL, "csinl", BT_FN_COMPLEX_LONGDOUBLE_COMPLEX_LONGDOUBLE, ATTR_MATHFN_FPROUNDING) +DEF_C99_COMPL_BUILTIN (BUILT_IN_CSQRT, "csqrt", BT_FN_COMPLEX_DOUBLE_COMPLEX_DOUBLE, ATTR_MATHFN_FPROUNDING) +DEF_C99_COMPL_BUILTIN (BUILT_IN_CSQRTF, "csqrtf", BT_FN_COMPLEX_FLOAT_COMPLEX_FLOAT, ATTR_MATHFN_FPROUNDING) +DEF_C99_COMPL_BUILTIN (BUILT_IN_CSQRTL, "csqrtl", BT_FN_COMPLEX_LONGDOUBLE_COMPLEX_LONGDOUBLE, ATTR_MATHFN_FPROUNDING) +DEF_C99_COMPL_BUILTIN (BUILT_IN_CTAN, "ctan", BT_FN_COMPLEX_DOUBLE_COMPLEX_DOUBLE, ATTR_MATHFN_FPROUNDING) +DEF_C99_COMPL_BUILTIN (BUILT_IN_CTANF, "ctanf", BT_FN_COMPLEX_FLOAT_COMPLEX_FLOAT, ATTR_MATHFN_FPROUNDING) +DEF_C99_COMPL_BUILTIN (BUILT_IN_CTANH, "ctanh", BT_FN_COMPLEX_DOUBLE_COMPLEX_DOUBLE, ATTR_MATHFN_FPROUNDING) +DEF_C99_COMPL_BUILTIN (BUILT_IN_CTANHF, "ctanhf", BT_FN_COMPLEX_FLOAT_COMPLEX_FLOAT, ATTR_MATHFN_FPROUNDING) +DEF_C99_COMPL_BUILTIN (BUILT_IN_CTANHL, "ctanhl", BT_FN_COMPLEX_LONGDOUBLE_COMPLEX_LONGDOUBLE, ATTR_MATHFN_FPROUNDING) +DEF_C99_COMPL_BUILTIN (BUILT_IN_CTANL, "ctanl", BT_FN_COMPLEX_LONGDOUBLE_COMPLEX_LONGDOUBLE, ATTR_MATHFN_FPROUNDING) /* Category: string/memory builtins. */ /* bcmp, bcopy and bzero have traditionally accepted NULL pointers @@ -685,7 +695,7 @@ DEF_EXT_LIB_BUILTIN (BUILT_IN_FFSLL, "ffsll", BT_FN_INT_LONGLONG, ATTR_CONST_ DEF_EXT_LIB_BUILTIN (BUILT_IN_FORK, "fork", BT_FN_PID, ATTR_NOTHROW_LIST) DEF_GCC_BUILTIN (BUILT_IN_FRAME_ADDRESS, "frame_address", BT_FN_PTR_UINT, ATTR_NULL) /* [trans-mem]: Adjust BUILT_IN_TM_FREE if BUILT_IN_FREE is changed. */ -DEF_LIB_BUILTIN (BUILT_IN_FREE, "free", BT_FN_VOID_PTR, ATTR_NOTHROW_LIST) +DEF_LIB_BUILTIN (BUILT_IN_FREE, "free", BT_FN_VOID_PTR, ATTR_NOTHROW_LEAF_LIST) DEF_GCC_BUILTIN (BUILT_IN_FROB_RETURN_ADDR, "frob_return_addr", BT_FN_PTR_PTR, ATTR_NULL) DEF_EXT_LIB_BUILTIN (BUILT_IN_GETTEXT, "gettext", BT_FN_STRING_CONST_STRING, ATTR_FORMAT_ARG_1) DEF_C99_BUILTIN (BUILT_IN_IMAXABS, "imaxabs", BT_FN_INTMAX_INTMAX, ATTR_CONST_NOTHROW_LEAF_LIST) diff --git a/gcc/c-family/ChangeLog b/gcc/c-family/ChangeLog index 487f880e432..95babfa743f 100644 --- a/gcc/c-family/ChangeLog +++ b/gcc/c-family/ChangeLog @@ -1,3 +1,266 @@ +2013-09-08 Joern Rennecke + + * c-common.c (same_scalar_type_ignoring_signedness): Delete. + (vector_types_compatible_elements_p): New function. + * c-common.h: (same_scalar_type_ignoring_signedness): Delete + declaration. + (vector_types_compatible_elements_p): Declare. + +2013-09-04 Gabriel Dos Reis + + * c-pretty-print.h (c_pretty_printer::simple_type_specifier): Now + a virtual member function. + (pp_simple_type_specifier): Remove. + (pp_c_type_specifier): Likewise. + * c-pretty-print.c (c_pretty_printer::simple_type_specifier): + Rename from pp_c_type_specifier. Adjust. + (c_pretty_printer::c_pretty_printer): Do not assign to + simple_type_specifier. + +2013-09-03 Gabriel Dos Reis + + * c-pretty-print.h (c_pretty_printer::type_id): Now a virtual + member function. + (c_pretty_printer::storage_class_specifier): Likewise. + (c_pretty_printer::initializer): Likewise. + (pp_declaration): Remove. + (pp_declaration_specifiers): Likewise. + (pp_abstract_declarator): Likewise. + (pp_declarator): Likewise. + (pp_type_id): Likewise. + (pp_statement): Likewise. + (pp_constant): Likewise. + (pp_id_expression): Likewise. + (pp_primary_expression): Likewise. + (pp_unary_expression): Likewise. + (pp_multiplicative_expression): Likewise. + (pp_conditional_expression): Likewise. + (pp_assignment_expression): Likewise. + (pp_expression): Likewise. + (pp_c_type_id): Likewise. + (pp_c_storage_class_specifier): Likewise. + * c-pretty-print.c (pp_c_type_cast): Tidy. + (pp_c_pointer): Likewise. + (pp_c_type_specifier): Likewise. + (pp_c_parameter_type_list): Likewise. + (pp_c_function_definition): Likewise. + (pp_c_init_declarator): Likewise. + (pp_c_initializer_list): Likewise. + (pp_c_constructor_elts): Likewise. + (c_pretty_printer::direct_abstract_declarator): Likewise. + (c_pretty_printer::declaration_specifiers): Likewise. + (c_pretty_printer::primary_expression): Likewise. + (c_pretty_printer::postfix_expression): Likewise. + (c_pretty_printer::type_id): Rename from pp_c_type_id. + (c_pretty_printer::storage_class_specifier): Rename from + pp_c_storage_class_specifier. + (c_pretty_printer::initializer): Rename from pp_c_initializer. + (c_pretty_printer::c_pretty_printer): Do not assign to type_id, + storage_class_specifier, initializer, offset_list, flags. + +2013-08-30 Marek Polacek + + * c-ubsan.c: New file. + * c-ubsan.h: New file. + +2013-08-30 Gabriel Dos Reis + + * c-pretty-print.h (c_pretty_printer::declaration): Now a virtual + member function. + (c_pretty_printer::declaration_specifiers): Likewise. + (c_pretty_printer::declarator): Likewise. + (c_pretty_printer::abstract_declarator): Likewise. + (c_pretty_printer::direct_abstract_declarator): Likewise. + (c_pretty_printer::direct_declarator): Likewise. + (c_pretty_printer::function_specifier): Likewise. + (pp_declaration): Adjust. + (pp_declaration_specifiers): Likewise. + (pp_abstract_declarator): Likewise. + (pp_direct_declarator): Likewise. + (pp_function_specifier): Likewise. + (pp_direct_abstract_declarator): Remove as unused. + (pp_c_declaration): Remove. + (pp_c_declaration_specifiers): Likewise. + (pp_c_declarator): Likewise. + (pp_c_direct_declarator): Likewise. + (pp_c_function_specifier): Likewise. + (pp_c_direct_abstract_declarator): Likewise. + * c-pretty-print.c (c_pretty_printer::abstract_declarator): Rename + from pp_c_abstract_declarator. Adjust. + (c_pretty_printer::direct_abstract_declarator): Rename from + pp_c_direct_abstract_declarator. Adjust. + (c_pretty_printer::function_specifier): Rename from + pp_c_function_specifier. Adjust. + (c_pretty_printer::declaration_specifiers): Rename from + pp_c_declaration_specifiers. Adjust. + (c_pretty_printer::direct_declarator): Rename from + pp_c_direct_declarator. Adjust. + (c_pretty_printer::declarator): Rename from pp_c_declarator. Adjust. + (c_pretty_printer::declaration): Rename from pp_c_declaration. Adjust. + (c_pretty_printer::c_pretty_printer): Do not assign to + declaration, declaration_specifiers, declarator, + direct_declarator, direct_abstract_declarator, function_specifier. + +2013-08-26 Gabriel Dos Reis + + * c-pretty-print.h (c_pretty_printer::unary_expression): Now a + virtual member function. + (c_pretty_printer::multiplicative_expression): Likewise. + (c_pretty_printer::conditional_expression): Likewise. + (c_pretty_printer::assignment_expression): Likewise. + (c_pretty_printer::expression): Likewise. + (pp_unary_expression): Adjust. + (pp_multiplicative_expression): Likewise. + (pp_assignment_expression): Likewise. + (pp_conditional_expression): Likewise. + (pp_expression): Likewise. + * c-pretty-print.c (c_pretty_printer::unary_expression): Rename + from pp_c_unary_expression. Adjust. + (c_pretty_printer::multiplicative_expression): Rename from + pp_c_multiplicative_expression. Adjust. + (c_pretty_printer::conditional_expression): Rename from + pp_c_conditional_expression. Adjust. + (c_pretty_printer::assignment_expression): Rename from + pp_c_assignment_expression. Adjust. + (c_pretty_printer::expression): Rename from pp_c_expression. Adjust. + (c_pretty_printer::c_pretty_printer): Do not assign to + unary_expression, multiplicative_expression, + conditional_expression, expression. + +2013-08-25 Gabriel Dos Reis + + * c-pretty-print.h (c_pretty_printer::postfix_expression): Now a + virtual member function. + (pp_postfix_expression): Adjust. + (pp_c_postfix_expression): Remove. + * c-pretty-print.c (c_pretty_printer::postfix_expression): Rename + from pp_c_postfix_expression. Adjust. + (c_pretty_printer::c_pretty_printer): Do not assign to + postfix_expression. + +2013-08-25 Gabriel Dos Reis + + * c-pretty-print.h (c_pretty_printer::primary_expression): Now a + virtua member function. + (pp_primary_expression): Adjust. + (pp_c_primary_expression): Remove. + * c-pretty-print.c (c_pretty_printer::primary_expression): Rename + from pp_c_primary_expression. Adjust. + (pp_c_initializer_list): Use pp_primary_expression. + (c_pretty_printer::c_pretty_printer): Do not assign to + primary_expression. + +2013-08-25 Gabriel Dos Reis + + * c-pretty-print.h (c_pretty_printer::translate_string): Declare. + * c-pretty-print.c (M_): Remove. + (c_pretty_printer::translate_string): Define. + (pp_c_type_specifier): Use it. + (pp_c_primary_expression): Likewise. + (pp_c_expression): Likewise. + +2013-08-24 Gabriel Dos Reis + + * c-pretty-print.h (c_pretty_printer::id_expression): Now a + virtual function. + (pp_c_id_expression): Remove. + (pp_id_expression): Adjust. + * c-pretty-print.c (c_pretty_printer::id_expression): Rename from + pp_c_id_expression. Adjust. + (pp_c_postfix_expression): Use pp_id_expression. + (c_pretty_printer::c_pretty_printer): Do not assign to id_expression. + +2013-08-24 Gabriel Dos Reis + + * c-pretty-print.h (c_pretty_printer::constant): Now a virtual + member function. + (pp_constant): Adjust. + (pp_c_constant): Remove. + * c-pretty-print.c (c_pretty_printer::constant): Rename from + pp_c_constant. Adjust. + (pp_c_constant) + (pp_c_primary_expression): Call pp_constant in lieu of pp_c_constant. + (c_pretty_printer::c_pretty_printer): Remove assignment to constant. + +2013-08-22 Gabriel Dos Reis + + * c-pretty-print.h (pp_c_pretty_printer_init): Remove. + (c_pretty_printer::c_pretty_printer): Declare. + * c-pretty-print.c (pretty_printer::c_pretty_printer): Rename from + c_pretty_printer_init. Adjust. + (print_c_tree): Do not call c_pretty_printer_init. + * c-ada-spec.c (dump_ads): Remove call to pp_construct. + +2013-08-09 Arnaud Charlet + + * c-ada-spec.c (print_ada_declaration): Prevent accessing null asm name + +2013-08-05 Paolo Carlini + + PR c++/58080 + * c-common.c (pointer_int_sum): Add bool parameter. + * c-common.h (pointer_int_sum): Adjust declaration. + +2013-08-05 Gabriel Dos Reis + + * c-pretty-print.c (print_c_tree): Simplify. Use non-static local + c_pretty_printer variable. + +2013-08-04 Gabriel Dos Reis + + * c-pretty-print.h (c_pretty_printer): Derive from pretty_printer. + (pp_base): Remove. + (pp_c_base): Likewise. Adjust users. + * c-pretty-print.c (pp_c_maybe_whitespace): Adjust. + (pp_c_whitespace): Do not call pp_base. + (pp_c_left_paren): Likewise. + (pp_c_right_paren): Likewise. + (pp_c_left_brace): Likewise. + (pp_c_right_brace): Likewise. + (pp_c_left_bracket): Likewise. + (pp_c_right_bracket): Likewise. + (pp_c_dot): Likewise. + (pp_c_ampersand): Likewise. + (pp_c_star): Likewise. + (pp_c_arrow): Likewise. + (pp_c_semicolon): Likewise. + (pp_c_complement): Likewise. + (pp_c_exclamation): Likewise. + (pp_c_direct_declarator): Likewise. + (pp_c_ws_string): Likewise. + (pp_c_identifier): Likewise. + (pp_c_statement): Likewise. + (print_c_tree): Likewise. + +2013-08-04 Ed Smith-Rowland <3dw4rd@verizon.net> + + PR c++/58072 + * c-common.c (c_parse_error): Catch user-defined literal tokens and + provide useful error strings. + +2013-08-03 Gabriel Dos Reis + + * c-ada-spec.c (pp_ada_tree_identifier): Use specialized pretty + printer functions instead of pp_string or operators and punctuators. + (dump_generic_ada_node): Likewise. + * c-pretty-print.c (pp_c_type_specifier): Likewise. + (pp_c_relational_expression): Likewise. + (pp_c_logical_or_expression): Likewise. + +2013-08-03 Gabriel Dos Reis + + * c-ada-spec.c (print_ada_macros): Use specialized pretty printer + functions instead of pp_character. + (pp_ada_tree_identifier): Likewise. + (dump_ada_double_name): Likewise. + (dump_ada_function_declaration): Likewise. + (dump_ada_array_domains): Likewise. + (dump_template_types): Likewise. + (dump_generic_ada_node): Likewise. + (print_ada_declaration): Likewise. + (print_ada_struct_decl): Likewise. + * c-pretty-print.c (pp_c_integer_constant): Likewise. + 2013-07-23 Tom Tromey * c-common.h (enum rid) : New constant. diff --git a/gcc/c-family/c-ada-spec.c b/gcc/c-family/c-ada-spec.c index 21cbfe94fba..eac57838752 100644 --- a/gcc/c-family/c-ada-spec.c +++ b/gcc/c-family/c-ada-spec.c @@ -418,7 +418,7 @@ print_ada_macros (pretty_printer *pp, cpp_hashnode **macros, int max_ada_macros) pp_string (pp, "; -- "); pp_string (pp, sloc.file); - pp_character (pp, ':'); + pp_colon (pp); pp_scalar (pp, "%d", sloc.line); pp_newline (pp); } @@ -1253,7 +1253,7 @@ pp_ada_tree_identifier (pretty_printer *buffer, tree node, tree type, { append_withs (s1, limited_access); pp_string (buffer, s1); - pp_character (buffer, '.'); + pp_dot (buffer); } free (s1); } @@ -1266,7 +1266,7 @@ pp_ada_tree_identifier (pretty_printer *buffer, tree node, tree type, { pp_string (buffer, "Class_"); pp_string (buffer, s); - pp_string (buffer, "."); + pp_dot (buffer); } } @@ -1375,7 +1375,7 @@ dump_ada_double_name (pretty_printer *buffer, tree t1, tree t2, const char *s) pp_scalar (buffer, "%d", TYPE_UID (TREE_TYPE (t1))); } - pp_character (buffer, '_'); + pp_underscore (buffer); if (DECL_NAME (t1)) pp_ada_tree_identifier (buffer, DECL_NAME (t2), t2, false); @@ -1489,7 +1489,7 @@ dump_ada_function_declaration (pretty_printer *buffer, tree func, if (num_args > 0) { pp_space (buffer); - pp_character (buffer, '('); + pp_left_paren (buffer); } if (TREE_CODE (func) == FUNCTION_DECL) @@ -1550,7 +1550,7 @@ dump_ada_function_declaration (pretty_printer *buffer, tree func, if (num < num_args) { - pp_character (buffer, ';'); + pp_semicolon (buffer); if (num_args > 2) newline_and_indent (buffer, spc + INDENT_INCR); @@ -1566,7 +1566,7 @@ dump_ada_function_declaration (pretty_printer *buffer, tree func, } if (num_args > 0) - pp_character (buffer, ')'); + pp_right_paren (buffer); return num_args; } @@ -1577,7 +1577,7 @@ static void dump_ada_array_domains (pretty_printer *buffer, tree node, int spc) { int first = 1; - pp_character (buffer, '('); + pp_left_paren (buffer); for (; TREE_CODE (node) == ARRAY_TYPE; node = TREE_TYPE (node)) { @@ -1606,7 +1606,7 @@ dump_ada_array_domains (pretty_printer *buffer, tree node, int spc) else pp_string (buffer, "size_t"); } - pp_character (buffer, ')'); + pp_right_paren (buffer); } /* Dump in BUFFER file:line information related to NODE. */ @@ -1626,7 +1626,7 @@ dump_sloc (pretty_printer *buffer, tree node) if (xloc.file) { pp_string (buffer, xloc.file); - pp_string (buffer, ":"); + pp_colon (buffer); pp_decimal_int (buffer, xloc.line); } } @@ -1706,7 +1706,7 @@ dump_template_types (pretty_printer *buffer, tree types, for (i = 0; i < len; i++) { tree elem = TREE_VEC_ELT (types, i); - pp_character (buffer, '_'); + pp_underscore (buffer); if (!dump_generic_ada_node (buffer, elem, 0, cpp_check, spc, false, true)) { pp_string (buffer, "unknown"); @@ -1886,14 +1886,14 @@ dump_generic_ada_node (pretty_printer *buffer, tree node, tree type, bool first = true; spc += INDENT_INCR; newline_and_indent (buffer, spc - 1); - pp_string (buffer, "("); + pp_left_paren (buffer); for (; value; value = TREE_CHAIN (value)) { if (first) first = false; else { - pp_string (buffer, ","); + pp_comma (buffer); newline_and_indent (buffer, spc); } @@ -1907,7 +1907,7 @@ dump_generic_ada_node (pretty_printer *buffer, tree node, tree type, dump_generic_ada_node (buffer, DECL_NAME (type) ? type : TYPE_NAME (node), type, cpp_check, spc, 0, true); - pp_string (buffer, ")"); + pp_right_paren (buffer); } else { @@ -2032,7 +2032,7 @@ dump_generic_ada_node (pretty_printer *buffer, tree node, tree type, pp_string (buffer, "pragma Convention (C, "); dump_generic_ada_node (buffer, type, 0, cpp_check, spc, false, true); - pp_string (buffer, ")"); + pp_right_paren (buffer); } } else @@ -2215,7 +2215,7 @@ dump_generic_ada_node (pretty_printer *buffer, tree node, tree type, if (tree_int_cst_sgn (val) < 0) { - pp_character (buffer, '-'); + pp_minus (buffer); high = ~high + !low; low = -low; } @@ -2900,7 +2900,7 @@ print_ada_declaration (pretty_printer *buffer, tree t, tree type, pp_string (buffer, " -- "); dump_sloc (buffer, t); - if (is_abstract) + if (is_abstract || !DECL_ASSEMBLER_NAME (t)) return 1; newline_and_indent (buffer, spc); @@ -2986,7 +2986,7 @@ print_ada_declaration (pretty_printer *buffer, tree t, tree type, dump_generic_ada_node (buffer, TYPE_NAME (TREE_TYPE (t)), type, cpp_check, spc, false, true); - pp_character (buffer, ')'); + pp_right_paren (buffer); print_ada_methods (buffer, TREE_TYPE (t), cpp_check, spc); } @@ -3226,7 +3226,7 @@ print_ada_struct_decl (pretty_printer *buffer, tree node, tree type, dump_generic_ada_node (buffer, TREE_TYPE (type), type, cpp_check, spc, false, true); package_prefix = true; - pp_character (buffer, ')'); + pp_right_paren (buffer); if (is_union) { @@ -3236,7 +3236,7 @@ print_ada_struct_decl (pretty_printer *buffer, tree node, tree type, dump_generic_ada_node (buffer, TREE_TYPE (type), type, cpp_check, spc, false, true); - pp_character (buffer, ')'); + pp_right_paren (buffer); } if (bitfield_used) @@ -3246,7 +3246,7 @@ print_ada_struct_decl (pretty_printer *buffer, tree node, tree type, pp_string (buffer, "pragma Pack ("); dump_generic_ada_node (buffer, TREE_TYPE (type), type, cpp_check, spc, false, true); - pp_character (buffer, ')'); + pp_right_paren (buffer); bitfield_used = false; } @@ -3304,7 +3304,6 @@ dump_ads (const char *source_file, { pretty_printer pp; - pp_construct (&pp, NULL, 0); pp_needs_newline (&pp) = true; pp.buffer->stream = f; diff --git a/gcc/c-family/c-common.c b/gcc/c-family/c-common.c index 7bba376f369..62aa9fcec2b 100644 --- a/gcc/c-family/c-common.c +++ b/gcc/c-family/c-common.c @@ -2199,6 +2199,14 @@ check_main_parameter_types (tree decl) "%q+D takes only zero or two arguments", decl); } +/* vector_targets_convertible_p is used for vector pointer types. The + callers perform various checks that the qualifiers are satisfactory, + while OTOH vector_targets_convertible_p ignores the number of elements + in the vectors. That's fine with vector pointers as we can consider, + say, a vector of 8 elements as two consecutive vectors of 4 elements, + and that does not require and conversion of the pointer values. + In contrast, vector_types_convertible_p and + vector_types_compatible_elements_p are used for vector value types. */ /* True if pointers to distinct types T1 and T2 can be converted to each other without an explicit cast. Only returns true for opaque vector types. */ @@ -2213,6 +2221,17 @@ vector_targets_convertible_p (const_tree t1, const_tree t2) return false; } +/* vector_types_convertible_p is used for vector value types. + It could in principle call vector_targets_convertible_p as a subroutine, + but then the check for vector type would be duplicated with its callers, + and also the purpose of vector_targets_convertible_p would become + muddled. + Where vector_types_convertible_p returns true, a conversion might still be + needed to make the types match. + In contrast, vector_targets_convertible_p is used for vector pointer + values, and vector_types_compatible_elements_p is used specifically + in the context for binary operators, as a check if use is possible without + conversion. */ /* True if vector types T1 and T2 can be converted to each other without an explicit cast. If EMIT_LAX_NOTE is true, and T1 and T2 can only be converted with -flax-vector-conversions yet that is not @@ -4284,7 +4303,7 @@ shorten_compare (tree *op0_ptr, tree *op1_ptr, tree *restype_ptr, tree pointer_int_sum (location_t loc, enum tree_code resultcode, - tree ptrop, tree intop) + tree ptrop, tree intop, bool complain) { tree size_exp, ret; @@ -4293,14 +4312,20 @@ pointer_int_sum (location_t loc, enum tree_code resultcode, if (TREE_CODE (TREE_TYPE (result_type)) == VOID_TYPE) { - pedwarn (loc, OPT_Wpointer_arith, - "pointer of type % used in arithmetic"); + if (complain && warn_pointer_arith) + pedwarn (loc, OPT_Wpointer_arith, + "pointer of type % used in arithmetic"); + else if (!complain) + return error_mark_node; size_exp = integer_one_node; } else if (TREE_CODE (TREE_TYPE (result_type)) == FUNCTION_TYPE) { - pedwarn (loc, OPT_Wpointer_arith, - "pointer to a function used in arithmetic"); + if (complain && warn_pointer_arith) + pedwarn (loc, OPT_Wpointer_arith, + "pointer to a function used in arithmetic"); + else if (!complain) + return error_mark_node; size_exp = integer_one_node; } else @@ -9352,6 +9377,18 @@ c_parse_error (const char *gmsgid, enum cpp_ttype token_type, free (message); message = NULL; } + else if (token_type == CPP_CHAR_USERDEF + || token_type == CPP_WCHAR_USERDEF + || token_type == CPP_CHAR16_USERDEF + || token_type == CPP_CHAR32_USERDEF) + message = catenate_messages (gmsgid, + " before user-defined character literal"); + else if (token_type == CPP_STRING_USERDEF + || token_type == CPP_WSTRING_USERDEF + || token_type == CPP_STRING16_USERDEF + || token_type == CPP_STRING32_USERDEF + || token_type == CPP_UTF8STRING_USERDEF) + message = catenate_messages (gmsgid, " before user-defined string literal"); else if (token_type == CPP_STRING || token_type == CPP_WSTRING || token_type == CPP_STRING16 @@ -10672,20 +10709,45 @@ resolve_overloaded_builtin (location_t loc, tree function, } } -/* Ignoring their sign, return true if two scalar types are the same. */ +/* vector_types_compatible_elements_p is used in type checks of vectors + values used as operands of binary operators. Where it returns true, and + the other checks of the caller succeed (being vector types in he first + place, and matching number of elements), we can just treat the types + as essentially the same. + Contrast with vector_targets_convertible_p, which is used for vector + pointer types, and vector_types_convertible_p, which will allow + language-specific matches under the control of flag_lax_vector_conversions, + and might still require a conversion. */ +/* True if vector types T1 and T2 can be inputs to the same binary + operator without conversion. + We don't check the overall vector size here because some of our callers + want to give different error messages when the vectors are compatible + except for the element count. */ + bool -same_scalar_type_ignoring_signedness (tree t1, tree t2) +vector_types_compatible_elements_p (tree t1, tree t2) { + bool opaque = TYPE_VECTOR_OPAQUE (t1) || TYPE_VECTOR_OPAQUE (t2); + t1 = TREE_TYPE (t1); + t2 = TREE_TYPE (t2); + enum tree_code c1 = TREE_CODE (t1), c2 = TREE_CODE (t2); gcc_assert ((c1 == INTEGER_TYPE || c1 == REAL_TYPE || c1 == FIXED_POINT_TYPE) && (c2 == INTEGER_TYPE || c2 == REAL_TYPE || c2 == FIXED_POINT_TYPE)); + t1 = c_common_signed_type (t1); + t2 = c_common_signed_type (t2); /* Equality works here because c_common_signed_type uses TYPE_MAIN_VARIANT. */ - return c_common_signed_type (t1) - == c_common_signed_type (t2); + if (t1 == t2) + return true; + if (opaque && c1 == c2 + && (c1 == INTEGER_TYPE || c1 == REAL_TYPE) + && TYPE_PRECISION (t1) == TYPE_PRECISION (t2)) + return true; + return false; } /* Check for missing format attributes on function pointers. LTYPE is diff --git a/gcc/c-family/c-common.h b/gcc/c-family/c-common.h index dc430c3859c..722ba6e5c15 100644 --- a/gcc/c-family/c-common.h +++ b/gcc/c-family/c-common.h @@ -766,7 +766,7 @@ extern void warn_logical_operator (location_t, enum tree_code, tree, enum tree_code, tree, enum tree_code, tree); extern void check_main_parameter_types (tree decl); extern bool c_determine_visibility (tree); -extern bool same_scalar_type_ignoring_signedness (tree, tree); +extern bool vector_types_compatible_elements_p (tree, tree); extern void mark_valid_location_for_stdc_pragma (bool); extern bool valid_location_for_stdc_pragma_p (void); extern void set_float_const_decimal64 (void); @@ -790,7 +790,8 @@ extern tree shorten_binary_op (tree result_type, tree op0, tree op1, bool bitwis and, if so, perhaps change them both back to their original type. */ extern tree shorten_compare (tree *, tree *, tree *, enum tree_code *); -extern tree pointer_int_sum (location_t, enum tree_code, tree, tree); +extern tree pointer_int_sum (location_t, enum tree_code, tree, tree, + bool = true); /* Add qualifiers to a type, in the fashion for C. */ extern tree c_build_qualified_type (tree, int); diff --git a/gcc/c-family/c-pretty-print.c b/gcc/c-family/c-pretty-print.c index b8af90c053c..d0283e8af4d 100644 --- a/gcc/c-family/c-pretty-print.c +++ b/gcc/c-family/c-pretty-print.c @@ -29,10 +29,6 @@ along with GCC; see the file COPYING3. If not see #include "tree-iterator.h" #include "diagnostic.h" -/* Translate if being used for diagnostics, but not for dump files or - __PRETTY_FUNCTION. */ -#define M_(msgid) (pp_translate_identifiers (pp) ? _(msgid) : (msgid)) - /* The pretty-printer code is primarily designed to closely follow (GNU) C and C++ grammars. That is to be contrasted with spaghetti codes we used to have in the past. Following a structured @@ -43,7 +39,7 @@ along with GCC; see the file COPYING3. If not see #define pp_c_maybe_whitespace(PP) \ do { \ - if (pp_base (PP)->padding == pp_before) \ + if ((PP)->padding == pp_before) \ pp_c_whitespace (PP); \ } while (0) @@ -54,7 +50,6 @@ static void pp_c_char (c_pretty_printer *, int); static void pp_c_initializer_list (c_pretty_printer *, tree); static void pp_c_brace_enclosed_initializer_list (c_pretty_printer *, tree); -static void pp_c_multiplicative_expression (c_pretty_printer *, tree); static void pp_c_additive_expression (c_pretty_printer *, tree); static void pp_c_shift_expression (c_pretty_printer *, tree); static void pp_c_relational_expression (c_pretty_printer *, tree); @@ -63,8 +58,6 @@ static void pp_c_and_expression (c_pretty_printer *, tree); static void pp_c_exclusive_or_expression (c_pretty_printer *, tree); static void pp_c_inclusive_or_expression (c_pretty_printer *, tree); static void pp_c_logical_and_expression (c_pretty_printer *, tree); -static void pp_c_conditional_expression (c_pretty_printer *, tree); -static void pp_c_assignment_expression (c_pretty_printer *, tree); /* declarations. */ @@ -75,98 +68,98 @@ void pp_c_whitespace (c_pretty_printer *pp) { pp_space (pp); - pp_base (pp)->padding = pp_none; + pp->padding = pp_none; } void pp_c_left_paren (c_pretty_printer *pp) { pp_left_paren (pp); - pp_base (pp)->padding = pp_none; + pp->padding = pp_none; } void pp_c_right_paren (c_pretty_printer *pp) { pp_right_paren (pp); - pp_base (pp)->padding = pp_none; + pp->padding = pp_none; } void pp_c_left_brace (c_pretty_printer *pp) { pp_left_brace (pp); - pp_base (pp)->padding = pp_none; + pp->padding = pp_none; } void pp_c_right_brace (c_pretty_printer *pp) { pp_right_brace (pp); - pp_base (pp)->padding = pp_none; + pp->padding = pp_none; } void pp_c_left_bracket (c_pretty_printer *pp) { pp_left_bracket (pp); - pp_base (pp)->padding = pp_none; + pp->padding = pp_none; } void pp_c_right_bracket (c_pretty_printer *pp) { pp_right_bracket (pp); - pp_base (pp)->padding = pp_none; + pp->padding = pp_none; } void pp_c_dot (c_pretty_printer *pp) { pp_dot (pp); - pp_base (pp)->padding = pp_none; + pp->padding = pp_none; } void pp_c_ampersand (c_pretty_printer *pp) { pp_ampersand (pp); - pp_base (pp)->padding = pp_none; + pp->padding = pp_none; } void pp_c_star (c_pretty_printer *pp) { pp_star (pp); - pp_base (pp)->padding = pp_none; + pp->padding = pp_none; } void pp_c_arrow (c_pretty_printer *pp) { pp_arrow (pp); - pp_base (pp)->padding = pp_none; + pp->padding = pp_none; } void pp_c_semicolon (c_pretty_printer *pp) { pp_semicolon (pp); - pp_base (pp)->padding = pp_none; + pp->padding = pp_none; } void pp_c_complement (c_pretty_printer *pp) { pp_complement (pp); - pp_base (pp)->padding = pp_none; + pp->padding = pp_none; } void pp_c_exclamation (c_pretty_printer *pp) { pp_exclamation (pp); - pp_base (pp)->padding = pp_none; + pp->padding = pp_none; } /* Print out the external representation of QUALIFIERS. */ @@ -215,7 +208,7 @@ static void pp_c_type_cast (c_pretty_printer *pp, tree t) { pp_c_left_paren (pp); - pp_type_id (pp, t); + pp->type_id (t); pp_c_right_paren (pp); } @@ -303,7 +296,7 @@ pp_c_pointer (c_pretty_printer *pp, tree t) /* ??? This node is now in GENERIC and so shouldn't be here. But we'll fix that later. */ case DECL_EXPR: - pp_declaration (pp, DECL_EXPR_DECL (t)); + pp->declaration (DECL_EXPR_DECL (t)); pp_needs_newline (pp) = true; break; @@ -312,7 +305,10 @@ pp_c_pointer (c_pretty_printer *pp, tree t) } } -/* type-specifier: +/* simple-type-specifier: + type-specifier + + type-specifier: void char short @@ -335,17 +331,17 @@ pp_c_pointer (c_pretty_printer *pp, tree t) __vector__ */ void -pp_c_type_specifier (c_pretty_printer *pp, tree t) +c_pretty_printer::simple_type_specifier (tree t) { const enum tree_code code = TREE_CODE (t); switch (code) { case ERROR_MARK: - pp_c_ws_string (pp, M_("")); + translate_string (""); break; case IDENTIFIER_NODE: - pp_c_identifier (pp, IDENTIFIER_POINTER (t)); + pp_c_identifier (this, IDENTIFIER_POINTER (t)); break; case VOID_TYPE: @@ -356,7 +352,7 @@ pp_c_type_specifier (c_pretty_printer *pp, tree t) if (TYPE_NAME (t)) { t = TYPE_NAME (t); - pp_c_type_specifier (pp, t); + simple_type_specifier (t); } else { @@ -367,11 +363,11 @@ pp_c_type_specifier (c_pretty_printer *pp, tree t) t = c_common_type_for_mode (TYPE_MODE (t), TYPE_UNSIGNED (t)); if (TYPE_NAME (t)) { - pp_c_type_specifier (pp, t); + simple_type_specifier (t); if (TYPE_PRECISION (t) != prec) { - pp_string (pp, ":"); - pp_decimal_int (pp, prec); + pp_colon (this); + pp_decimal_int (this, prec); } } else @@ -379,52 +375,52 @@ pp_c_type_specifier (c_pretty_printer *pp, tree t) switch (code) { case INTEGER_TYPE: - pp_string (pp, (TYPE_UNSIGNED (t) - ? M_(""); + pp_decimal_int (this, prec); + pp_greater (this); } } break; case TYPE_DECL: if (DECL_NAME (t)) - pp_id_expression (pp, t); + id_expression (t); else - pp_c_ws_string (pp, M_("")); + translate_string (""); break; case UNION_TYPE: case RECORD_TYPE: case ENUMERAL_TYPE: if (code == UNION_TYPE) - pp_c_ws_string (pp, "union"); + pp_c_ws_string (this, "union"); else if (code == RECORD_TYPE) - pp_c_ws_string (pp, "struct"); + pp_c_ws_string (this, "struct"); else if (code == ENUMERAL_TYPE) - pp_c_ws_string (pp, "enum"); + pp_c_ws_string (this, "enum"); else - pp_c_ws_string (pp, M_("")); + translate_string (""); if (TYPE_NAME (t)) - pp_id_expression (pp, TYPE_NAME (t)); + id_expression (TYPE_NAME (t)); else - pp_c_ws_string (pp, M_("")); + translate_string (""); break; default: - pp_unsupported_tree (pp, t); + pp_unsupported_tree (this, t); break; } } @@ -438,7 +434,7 @@ pp_c_type_specifier (c_pretty_printer *pp, tree t) function declarations, this routine prints not just the specifier-qualifier-list of such entities or types of such entities, but also the 'pointer' production part of their declarators. The - remaining part is done by pp_declarator or pp_c_abstract_declarator. */ + remaining part is done by declarator() or abstract_declarator(). */ void pp_c_specifier_qualifier_list (c_pretty_printer *pp, tree t) @@ -490,7 +486,7 @@ pp_c_specifier_qualifier_list (c_pretty_printer *pp, tree t) break; default: - pp_simple_type_specifier (pp, t); + pp->simple_type_specifier (t); break; } if ((pp->flags & pp_c_flag_gnu_v3) && code != POINTER_TYPE) @@ -525,12 +521,12 @@ pp_c_parameter_type_list (c_pretty_printer *pp, tree t) if (!first) pp_separate_with (pp, ','); first = false; - pp_declaration_specifiers - (pp, want_parm_decl ? parms : TREE_VALUE (parms)); + pp->declaration_specifiers + (want_parm_decl ? parms : TREE_VALUE (parms)); if (want_parm_decl) - pp_declarator (pp, parms); + pp->declarator (parms); else - pp_abstract_declarator (pp, TREE_VALUE (parms)); + pp->abstract_declarator (TREE_VALUE (parms)); } } pp_c_right_paren (pp); @@ -540,18 +536,18 @@ pp_c_parameter_type_list (c_pretty_printer *pp, tree t) pointer pointer(opt) direct-abstract-declarator */ -static void -pp_c_abstract_declarator (c_pretty_printer *pp, tree t) +void +c_pretty_printer::abstract_declarator (tree t) { if (TREE_CODE (t) == POINTER_TYPE) { if (TREE_CODE (TREE_TYPE (t)) == ARRAY_TYPE || TREE_CODE (TREE_TYPE (t)) == FUNCTION_TYPE) - pp_c_right_paren (pp); + pp_c_right_paren (this); t = TREE_TYPE (t); } - pp_direct_abstract_declarator (pp, t); + direct_abstract_declarator (t); } /* direct-abstract-declarator: @@ -561,34 +557,34 @@ pp_c_abstract_declarator (c_pretty_printer *pp, tree t) direct-abstract-declarator(opt) ( parameter-type-list(opt) ) */ void -pp_c_direct_abstract_declarator (c_pretty_printer *pp, tree t) +c_pretty_printer::direct_abstract_declarator (tree t) { switch (TREE_CODE (t)) { case POINTER_TYPE: - pp_abstract_declarator (pp, t); + abstract_declarator (t); break; case FUNCTION_TYPE: - pp_c_parameter_type_list (pp, t); - pp_direct_abstract_declarator (pp, TREE_TYPE (t)); + pp_c_parameter_type_list (this, t); + direct_abstract_declarator (TREE_TYPE (t)); break; case ARRAY_TYPE: - pp_c_left_bracket (pp); + pp_c_left_bracket (this); if (TYPE_DOMAIN (t) && TYPE_MAX_VALUE (TYPE_DOMAIN (t))) { tree maxval = TYPE_MAX_VALUE (TYPE_DOMAIN (t)); tree type = TREE_TYPE (maxval); if (host_integerp (maxval, 0)) - pp_wide_integer (pp, tree_low_cst (maxval, 0) + 1); + pp_wide_integer (this, tree_low_cst (maxval, 0) + 1); else - pp_expression (pp, fold_build2 (PLUS_EXPR, type, maxval, - build_int_cst (type, 1))); + expression (fold_build2 (PLUS_EXPR, type, maxval, + build_int_cst (type, 1))); } - pp_c_right_bracket (pp); - pp_direct_abstract_declarator (pp, TREE_TYPE (t)); + pp_c_right_bracket (this); + direct_abstract_declarator (TREE_TYPE (t)); break; case IDENTIFIER_NODE: @@ -606,7 +602,7 @@ pp_c_direct_abstract_declarator (c_pretty_printer *pp, tree t) break; default: - pp_unsupported_tree (pp, t); + pp_unsupported_tree (this, t); break; } } @@ -615,10 +611,10 @@ pp_c_direct_abstract_declarator (c_pretty_printer *pp, tree t) specifier-qualifier-list abstract-declarator(opt) */ void -pp_c_type_id (c_pretty_printer *pp, tree t) +c_pretty_printer::type_id (tree t) { - pp_c_specifier_qualifier_list (pp, t); - pp_abstract_declarator (pp, t); + pp_c_specifier_qualifier_list (this, t); + abstract_declarator (t); } /* storage-class-specifier: @@ -629,16 +625,16 @@ pp_c_type_id (c_pretty_printer *pp, tree t) register */ void -pp_c_storage_class_specifier (c_pretty_printer *pp, tree t) +c_pretty_printer::storage_class_specifier (tree t) { if (TREE_CODE (t) == TYPE_DECL) - pp_c_ws_string (pp, "typedef"); + pp_c_ws_string (this, "typedef"); else if (DECL_P (t)) { if (DECL_REGISTER (t)) - pp_c_ws_string (pp, "register"); + pp_c_ws_string (this, "register"); else if (TREE_STATIC (t) && TREE_CODE (t) == VAR_DECL) - pp_c_ws_string (pp, "static"); + pp_c_ws_string (this, "static"); } } @@ -646,10 +642,10 @@ pp_c_storage_class_specifier (c_pretty_printer *pp, tree t) inline */ void -pp_c_function_specifier (c_pretty_printer *pp, tree t) +c_pretty_printer::function_specifier (tree t) { if (TREE_CODE (t) == FUNCTION_DECL && DECL_DECLARED_INLINE_P (t)) - pp_c_ws_string (pp, "inline"); + pp_c_ws_string (this, "inline"); } /* declaration-specifiers: @@ -659,11 +655,11 @@ pp_c_function_specifier (c_pretty_printer *pp, tree t) function-specifier declaration-specifiers(opt) */ void -pp_c_declaration_specifiers (c_pretty_printer *pp, tree t) +c_pretty_printer::declaration_specifiers (tree t) { - pp_storage_class_specifier (pp, t); - pp_function_specifier (pp, t); - pp_c_specifier_qualifier_list (pp, DECL_P (t) ? TREE_TYPE (t) : t); + storage_class_specifier (t); + function_specifier (t); + pp_c_specifier_qualifier_list (this, DECL_P (t) ? TREE_TYPE (t) : t); } /* direct-declarator @@ -677,7 +673,7 @@ pp_c_declaration_specifiers (c_pretty_printer *pp, tree t) direct-declarator ( identifier-list(opt) ) */ void -pp_c_direct_declarator (c_pretty_printer *pp, tree t) +c_pretty_printer::direct_declarator (tree t) { switch (TREE_CODE (t)) { @@ -686,29 +682,29 @@ pp_c_direct_declarator (c_pretty_printer *pp, tree t) case TYPE_DECL: case FIELD_DECL: case LABEL_DECL: - pp_c_space_for_pointer_operator (pp, TREE_TYPE (t)); - pp_c_tree_decl_identifier (pp, t); + pp_c_space_for_pointer_operator (this, TREE_TYPE (t)); + pp_c_tree_decl_identifier (this, t); break; case ARRAY_TYPE: case POINTER_TYPE: - pp_abstract_declarator (pp, TREE_TYPE (t)); + abstract_declarator (TREE_TYPE (t)); break; case FUNCTION_TYPE: - pp_parameter_list (pp, t); - pp_abstract_declarator (pp, TREE_TYPE (t)); + pp_parameter_list (this, t); + abstract_declarator (TREE_TYPE (t)); break; case FUNCTION_DECL: - pp_c_space_for_pointer_operator (pp, TREE_TYPE (TREE_TYPE (t))); - pp_c_tree_decl_identifier (pp, t); - if (pp_c_base (pp)->flags & pp_c_flag_abstract) - pp_abstract_declarator (pp, TREE_TYPE (t)); + pp_c_space_for_pointer_operator (this, TREE_TYPE (TREE_TYPE (t))); + pp_c_tree_decl_identifier (this, t); + if (flags & pp_c_flag_abstract) + abstract_declarator (TREE_TYPE (t)); else { - pp_parameter_list (pp, t); - pp_abstract_declarator (pp, TREE_TYPE (TREE_TYPE (t))); + pp_parameter_list (this, t); + abstract_declarator (TREE_TYPE (TREE_TYPE (t))); } break; @@ -721,7 +717,7 @@ pp_c_direct_declarator (c_pretty_printer *pp, tree t) break; default: - pp_unsupported_tree (pp, t); + pp_unsupported_tree (this, t); break; } } @@ -731,7 +727,7 @@ pp_c_direct_declarator (c_pretty_printer *pp, tree t) pointer(opt) direct-declarator */ void -pp_c_declarator (c_pretty_printer *pp, tree t) +c_pretty_printer::declarator (tree t) { switch (TREE_CODE (t)) { @@ -750,12 +746,12 @@ pp_c_declarator (c_pretty_printer *pp, tree t) case FUNCTION_TYPE: case FUNCTION_DECL: case TYPE_DECL: - pp_direct_declarator (pp, t); + direct_declarator (t); break; default: - pp_unsupported_tree (pp, t); + pp_unsupported_tree (this, t); break; } } @@ -764,10 +760,10 @@ pp_c_declarator (c_pretty_printer *pp, tree t) declaration-specifiers init-declarator-list(opt) ; */ void -pp_c_declaration (c_pretty_printer *pp, tree t) +c_pretty_printer::declaration (tree t) { - pp_declaration_specifiers (pp, t); - pp_c_init_declarator (pp, t); + declaration_specifiers (t); + pp_c_init_declarator (this, t); } /* Pretty-print ATTRIBUTES using GNU C extension syntax. */ @@ -841,10 +837,10 @@ pp_c_attributes_display (c_pretty_printer *pp, tree a) void pp_c_function_definition (c_pretty_printer *pp, tree t) { - pp_declaration_specifiers (pp, t); - pp_declarator (pp, t); + pp->declaration_specifiers (t); + pp->declarator (t); pp_needs_newline (pp) = true; - pp_statement (pp, DECL_SAVED_TREE (t)); + pp->statement (DECL_SAVED_TREE (t)); pp_newline_and_flush (pp); } @@ -920,7 +916,7 @@ pp_c_integer_constant (c_pretty_printer *pp, tree i) HOST_WIDE_INT high = TREE_INT_CST_HIGH (i); if (tree_int_cst_sgn (i) < 0) { - pp_character (pp, '-'); + pp_minus (pp); high = ~high + !low; low = -low; } @@ -1004,7 +1000,7 @@ pp_c_enumeration_constant (c_pretty_printer *pp, tree e) ; if (value != NULL_TREE) - pp_id_expression (pp, TREE_PURPOSE (value)); + pp->id_expression (TREE_PURPOSE (value)); else { /* Value must have been cast. */ @@ -1104,7 +1100,7 @@ pp_c_complex_expr (c_pretty_printer *pp, tree e) == TREE_OPERAND (TREE_OPERAND (imagexpr, 0), 0)) { pp_c_type_cast (pp, type); - pp_expression (pp, TREE_OPERAND (TREE_OPERAND (realexpr, 0), 0)); + pp->expression (TREE_OPERAND (TREE_OPERAND (realexpr, 0), 0)); return; } @@ -1115,7 +1111,7 @@ pp_c_complex_expr (c_pretty_printer *pp, tree e) pp_c_type_cast (pp, type); if (TREE_CODE (realexpr) == NOP_EXPR) realexpr = TREE_OPERAND (realexpr, 0); - pp_expression (pp, realexpr); + pp->expression (realexpr); return; } @@ -1130,7 +1126,7 @@ pp_c_complex_expr (c_pretty_printer *pp, tree e) character-constant */ void -pp_c_constant (c_pretty_printer *pp, tree e) +c_pretty_printer::constant (tree e) { const enum tree_code code = TREE_CODE (e); @@ -1140,38 +1136,38 @@ pp_c_constant (c_pretty_printer *pp, tree e) { tree type = TREE_TYPE (e); if (type == boolean_type_node) - pp_c_bool_constant (pp, e); + pp_c_bool_constant (this, e); else if (type == char_type_node) - pp_c_character_constant (pp, e); + pp_c_character_constant (this, e); else if (TREE_CODE (type) == ENUMERAL_TYPE - && pp_c_enumeration_constant (pp, e)) + && pp_c_enumeration_constant (this, e)) ; else - pp_c_integer_constant (pp, e); + pp_c_integer_constant (this, e); } break; case REAL_CST: - pp_c_floating_constant (pp, e); + pp_c_floating_constant (this, e); break; case FIXED_CST: - pp_c_fixed_constant (pp, e); + pp_c_fixed_constant (this, e); break; case STRING_CST: - pp_c_string_literal (pp, e); + pp_c_string_literal (this, e); break; case COMPLEX_CST: /* Sometimes, we are confused and we think a complex literal is a constant. Such thing is a compound literal which grammatically belongs to postfix-expr production. */ - pp_c_compound_literal (pp, e); + pp_c_compound_literal (this, e); break; default: - pp_unsupported_tree (pp, e); + pp_unsupported_tree (this, e); break; } } @@ -1184,7 +1180,16 @@ pp_c_ws_string (c_pretty_printer *pp, const char *str) { pp_c_maybe_whitespace (pp); pp_string (pp, str); - pp_base (pp)->padding = pp_before; + pp->padding = pp_before; +} + +void +c_pretty_printer::translate_string (const char *gmsgid) +{ + if (pp_translate_identifiers (this)) + pp_c_ws_string (this, _(gmsgid)); + else + pp_c_ws_string (this, gmsgid); } /* Pretty-print an IDENTIFIER_NODE, which may contain UTF-8 sequences @@ -1196,7 +1201,7 @@ pp_c_identifier (c_pretty_printer *pp, const char *id) { pp_c_maybe_whitespace (pp); pp_identifier (pp, id); - pp_base (pp)->padding = pp_before; + pp->padding = pp_before; } /* Pretty-print a C primary-expression. @@ -1207,7 +1212,7 @@ pp_c_identifier (c_pretty_printer *pp, const char *id) ( expression ) */ void -pp_c_primary_expression (c_pretty_printer *pp, tree e) +c_pretty_printer::primary_expression (tree e) { switch (TREE_CODE (e)) { @@ -1217,49 +1222,49 @@ pp_c_primary_expression (c_pretty_printer *pp, tree e) case CONST_DECL: case FUNCTION_DECL: case LABEL_DECL: - pp_c_tree_decl_identifier (pp, e); + pp_c_tree_decl_identifier (this, e); break; case IDENTIFIER_NODE: - pp_c_tree_identifier (pp, e); + pp_c_tree_identifier (this, e); break; case ERROR_MARK: - pp_c_ws_string (pp, M_("")); + translate_string (""); break; case RESULT_DECL: - pp_c_ws_string (pp, M_("")); + translate_string (""); break; case INTEGER_CST: case REAL_CST: case FIXED_CST: case STRING_CST: - pp_c_constant (pp, e); + constant (e); break; case TARGET_EXPR: - pp_c_ws_string (pp, "__builtin_memcpy"); - pp_c_left_paren (pp); - pp_ampersand (pp); - pp_primary_expression (pp, TREE_OPERAND (e, 0)); - pp_separate_with (pp, ','); - pp_ampersand (pp); - pp_initializer (pp, TREE_OPERAND (e, 1)); + pp_c_ws_string (this, "__builtin_memcpy"); + pp_c_left_paren (this); + pp_ampersand (this); + primary_expression (TREE_OPERAND (e, 0)); + pp_separate_with (this, ','); + pp_ampersand (this); + initializer (TREE_OPERAND (e, 1)); if (TREE_OPERAND (e, 2)) { - pp_separate_with (pp, ','); - pp_c_expression (pp, TREE_OPERAND (e, 2)); + pp_separate_with (this, ','); + expression (TREE_OPERAND (e, 2)); } - pp_c_right_paren (pp); + pp_c_right_paren (this); break; default: /* FIXME: Make sure we won't get into an infinite loop. */ - pp_c_left_paren (pp); - pp_expression (pp, e); - pp_c_right_paren (pp); + pp_c_left_paren (this); + expression (e); + pp_c_right_paren (this); break; } } @@ -1270,13 +1275,13 @@ pp_c_primary_expression (c_pretty_printer *pp, tree e) { initializer-list } { initializer-list , } */ -static void -pp_c_initializer (c_pretty_printer *pp, tree e) +void +c_pretty_printer::initializer (tree e) { if (TREE_CODE (e) == CONSTRUCTOR) - pp_c_brace_enclosed_initializer_list (pp, e); + pp_c_brace_enclosed_initializer_list (this, e); else - pp_expression (pp, e); + expression (e); } /* init-declarator: @@ -1286,7 +1291,7 @@ pp_c_initializer (c_pretty_printer *pp, tree e) void pp_c_init_declarator (c_pretty_printer *pp, tree t) { - pp_declarator (pp, t); + pp->declarator (t); /* We don't want to output function definitions here. There are handled elsewhere (and the syntactic form is bogus anyway). */ if (TREE_CODE (t) != FUNCTION_DECL && DECL_INITIAL (t)) @@ -1299,7 +1304,7 @@ pp_c_init_declarator (c_pretty_printer *pp, tree t) if (TREE_CODE (init) == TREE_LIST) { pp_c_left_paren (pp); - pp_expression (pp, TREE_VALUE (init)); + pp->expression (TREE_VALUE (init)); pp_right_paren (pp); } else @@ -1307,7 +1312,7 @@ pp_c_init_declarator (c_pretty_printer *pp, tree t) pp_space (pp); pp_equal (pp); pp_space (pp); - pp_c_initializer (pp, init); + pp->initializer (init); } } } @@ -1351,19 +1356,19 @@ pp_c_initializer_list (c_pretty_printer *pp, tree e) if (code == RECORD_TYPE || code == UNION_TYPE) { pp_c_dot (pp); - pp_c_primary_expression (pp, TREE_PURPOSE (init)); + pp->primary_expression (TREE_PURPOSE (init)); } else { pp_c_left_bracket (pp); if (TREE_PURPOSE (init)) - pp_c_constant (pp, TREE_PURPOSE (init)); + pp->constant (TREE_PURPOSE (init)); pp_c_right_bracket (pp); } pp_c_whitespace (pp); pp_equal (pp); pp_c_whitespace (pp); - pp_initializer (pp, TREE_VALUE (init)); + pp->initializer (TREE_VALUE (init)); if (TREE_CHAIN (init)) pp_separate_with (pp, ','); } @@ -1378,7 +1383,7 @@ pp_c_initializer_list (c_pretty_printer *pp, tree e) { if (i > 0) pp_separate_with (pp, ','); - pp_expression (pp, VECTOR_CST_ELT (e, i)); + pp->expression (VECTOR_CST_ELT (e, i)); } } else @@ -1389,9 +1394,9 @@ pp_c_initializer_list (c_pretty_printer *pp, tree e) if (TREE_CODE (e) == COMPLEX_CST || TREE_CODE (e) == COMPLEX_EXPR) { const bool cst = TREE_CODE (e) == COMPLEX_CST; - pp_expression (pp, cst ? TREE_REALPART (e) : TREE_OPERAND (e, 0)); + pp->expression (cst ? TREE_REALPART (e) : TREE_OPERAND (e, 0)); pp_separate_with (pp, ','); - pp_expression (pp, cst ? TREE_IMAGPART (e) : TREE_OPERAND (e, 1)); + pp->expression (cst ? TREE_IMAGPART (e) : TREE_OPERAND (e, 1)); } else break; @@ -1422,7 +1427,7 @@ pp_c_brace_enclosed_initializer_list (c_pretty_printer *pp, tree l) identifier */ void -pp_c_id_expression (c_pretty_printer *pp, tree t) +c_pretty_printer::id_expression (tree t) { switch (TREE_CODE (t)) { @@ -1433,15 +1438,15 @@ pp_c_id_expression (c_pretty_printer *pp, tree t) case FUNCTION_DECL: case FIELD_DECL: case LABEL_DECL: - pp_c_tree_decl_identifier (pp, t); + pp_c_tree_decl_identifier (this, t); break; case IDENTIFIER_NODE: - pp_c_tree_identifier (pp, t); + pp_c_tree_identifier (this, t); break; default: - pp_unsupported_tree (pp, t); + pp_unsupported_tree (this, t); break; } } @@ -1458,112 +1463,112 @@ pp_c_id_expression (c_pretty_printer *pp, tree t) ( type-name ) { initializer-list , } */ void -pp_c_postfix_expression (c_pretty_printer *pp, tree e) +c_pretty_printer::postfix_expression (tree e) { enum tree_code code = TREE_CODE (e); switch (code) { case POSTINCREMENT_EXPR: case POSTDECREMENT_EXPR: - pp_postfix_expression (pp, TREE_OPERAND (e, 0)); - pp_string (pp, code == POSTINCREMENT_EXPR ? "++" : "--"); + postfix_expression (TREE_OPERAND (e, 0)); + pp_string (this, code == POSTINCREMENT_EXPR ? "++" : "--"); break; case ARRAY_REF: - pp_postfix_expression (pp, TREE_OPERAND (e, 0)); - pp_c_left_bracket (pp); - pp_expression (pp, TREE_OPERAND (e, 1)); - pp_c_right_bracket (pp); + postfix_expression (TREE_OPERAND (e, 0)); + pp_c_left_bracket (this); + expression (TREE_OPERAND (e, 1)); + pp_c_right_bracket (this); break; case ARRAY_NOTATION_REF: - pp_postfix_expression (pp, ARRAY_NOTATION_ARRAY (e)); - pp_c_left_bracket (pp); - pp_expression (pp, ARRAY_NOTATION_START (e)); - pp_colon (pp); - pp_expression (pp, ARRAY_NOTATION_LENGTH (e)); - pp_colon (pp); - pp_expression (pp, ARRAY_NOTATION_STRIDE (e)); - pp_c_right_bracket (pp); + postfix_expression (ARRAY_NOTATION_ARRAY (e)); + pp_c_left_bracket (this); + expression (ARRAY_NOTATION_START (e)); + pp_colon (this); + expression (ARRAY_NOTATION_LENGTH (e)); + pp_colon (this); + expression (ARRAY_NOTATION_STRIDE (e)); + pp_c_right_bracket (this); break; case CALL_EXPR: { call_expr_arg_iterator iter; tree arg; - pp_postfix_expression (pp, CALL_EXPR_FN (e)); - pp_c_left_paren (pp); + postfix_expression (CALL_EXPR_FN (e)); + pp_c_left_paren (this); FOR_EACH_CALL_EXPR_ARG (arg, iter, e) { - pp_expression (pp, arg); + expression (arg); if (more_call_expr_args_p (&iter)) - pp_separate_with (pp, ','); + pp_separate_with (this, ','); } - pp_c_right_paren (pp); + pp_c_right_paren (this); break; } case UNORDERED_EXPR: - pp_c_ws_string (pp, flag_isoc99 + pp_c_ws_string (this, flag_isoc99 ? "isunordered" : "__builtin_isunordered"); goto two_args_fun; case ORDERED_EXPR: - pp_c_ws_string (pp, flag_isoc99 + pp_c_ws_string (this, flag_isoc99 ? "!isunordered" : "!__builtin_isunordered"); goto two_args_fun; case UNLT_EXPR: - pp_c_ws_string (pp, flag_isoc99 + pp_c_ws_string (this, flag_isoc99 ? "!isgreaterequal" : "!__builtin_isgreaterequal"); goto two_args_fun; case UNLE_EXPR: - pp_c_ws_string (pp, flag_isoc99 + pp_c_ws_string (this, flag_isoc99 ? "!isgreater" : "!__builtin_isgreater"); goto two_args_fun; case UNGT_EXPR: - pp_c_ws_string (pp, flag_isoc99 + pp_c_ws_string (this, flag_isoc99 ? "!islessequal" : "!__builtin_islessequal"); goto two_args_fun; case UNGE_EXPR: - pp_c_ws_string (pp, flag_isoc99 + pp_c_ws_string (this, flag_isoc99 ? "!isless" : "!__builtin_isless"); goto two_args_fun; case UNEQ_EXPR: - pp_c_ws_string (pp, flag_isoc99 + pp_c_ws_string (this, flag_isoc99 ? "!islessgreater" : "!__builtin_islessgreater"); goto two_args_fun; case LTGT_EXPR: - pp_c_ws_string (pp, flag_isoc99 + pp_c_ws_string (this, flag_isoc99 ? "islessgreater" : "__builtin_islessgreater"); goto two_args_fun; two_args_fun: - pp_c_left_paren (pp); - pp_expression (pp, TREE_OPERAND (e, 0)); - pp_separate_with (pp, ','); - pp_expression (pp, TREE_OPERAND (e, 1)); - pp_c_right_paren (pp); + pp_c_left_paren (this); + expression (TREE_OPERAND (e, 0)); + pp_separate_with (this, ','); + expression (TREE_OPERAND (e, 1)); + pp_c_right_paren (this); break; case ABS_EXPR: - pp_c_ws_string (pp, "__builtin_abs"); - pp_c_left_paren (pp); - pp_expression (pp, TREE_OPERAND (e, 0)); - pp_c_right_paren (pp); + pp_c_ws_string (this, "__builtin_abs"); + pp_c_left_paren (this); + expression (TREE_OPERAND (e, 0)); + pp_c_right_paren (this); break; case COMPONENT_REF: @@ -1571,15 +1576,15 @@ pp_c_postfix_expression (c_pretty_printer *pp, tree e) tree object = TREE_OPERAND (e, 0); if (TREE_CODE (object) == INDIRECT_REF) { - pp_postfix_expression (pp, TREE_OPERAND (object, 0)); - pp_c_arrow (pp); + postfix_expression (TREE_OPERAND (object, 0)); + pp_c_arrow (this); } else { - pp_postfix_expression (pp, object); - pp_c_dot (pp); + postfix_expression (object); + pp_c_dot (this); } - pp_expression (pp, TREE_OPERAND (e, 1)); + expression (TREE_OPERAND (e, 1)); } break; @@ -1595,63 +1600,63 @@ pp_c_postfix_expression (c_pretty_printer *pp, tree e) HOST_WIDE_INT size = tree_low_cst (TYPE_SIZE (type), 0); if ((bitpos % size) == 0) { - pp_c_left_paren (pp); - pp_c_left_paren (pp); - pp_type_id (pp, type); - pp_c_star (pp); - pp_c_right_paren (pp); - pp_c_ampersand (pp); - pp_expression (pp, TREE_OPERAND (e, 0)); - pp_c_right_paren (pp); - pp_c_left_bracket (pp); - pp_wide_integer (pp, bitpos / size); - pp_c_right_bracket (pp); + pp_c_left_paren (this); + pp_c_left_paren (this); + type_id (type); + pp_c_star (this); + pp_c_right_paren (this); + pp_c_ampersand (this); + expression (TREE_OPERAND (e, 0)); + pp_c_right_paren (this); + pp_c_left_bracket (this); + pp_wide_integer (this, bitpos / size); + pp_c_right_bracket (this); break; } } - pp_unsupported_tree (pp, e); + pp_unsupported_tree (this, e); } break; case MEM_REF: - pp_c_expression (pp, e); + expression (e); break; case COMPLEX_CST: case VECTOR_CST: - pp_c_compound_literal (pp, e); + pp_c_compound_literal (this, e); break; case COMPLEX_EXPR: - pp_c_complex_expr (pp, e); + pp_c_complex_expr (this, e); break; case COMPOUND_LITERAL_EXPR: e = DECL_INITIAL (COMPOUND_LITERAL_EXPR_DECL (e)); /* Fall through. */ case CONSTRUCTOR: - pp_initializer (pp, e); + initializer (e); break; case VA_ARG_EXPR: - pp_c_ws_string (pp, "__builtin_va_arg"); - pp_c_left_paren (pp); - pp_assignment_expression (pp, TREE_OPERAND (e, 0)); - pp_separate_with (pp, ','); - pp_type_id (pp, TREE_TYPE (e)); - pp_c_right_paren (pp); + pp_c_ws_string (this, "__builtin_va_arg"); + pp_c_left_paren (this); + assignment_expression (TREE_OPERAND (e, 0)); + pp_separate_with (this, ','); + type_id (TREE_TYPE (e)); + pp_c_right_paren (this); break; case ADDR_EXPR: if (TREE_CODE (TREE_OPERAND (e, 0)) == FUNCTION_DECL) { - pp_c_id_expression (pp, TREE_OPERAND (e, 0)); + id_expression (TREE_OPERAND (e, 0)); break; } /* else fall through. */ default: - pp_primary_expression (pp, e); + primary_expression (e); break; } } @@ -1663,7 +1668,7 @@ pp_c_expression_list (c_pretty_printer *pp, tree e) { for (; e != NULL_TREE; e = TREE_CHAIN (e)) { - pp_expression (pp, TREE_VALUE (e)); + pp->expression (TREE_VALUE (e)); if (TREE_CHAIN (e)) pp_separate_with (pp, ','); } @@ -1679,7 +1684,7 @@ pp_c_constructor_elts (c_pretty_printer *pp, vec *v) FOR_EACH_CONSTRUCTOR_VALUE (v, ix, value) { - pp_expression (pp, value); + pp->expression (value); if (ix != vec_safe_length (v) - 1) pp_separate_with (pp, ','); } @@ -1716,15 +1721,15 @@ pp_c_call_argument_list (c_pretty_printer *pp, tree t) __imag__ unary-expression */ void -pp_c_unary_expression (c_pretty_printer *pp, tree e) +c_pretty_printer::unary_expression (tree e) { enum tree_code code = TREE_CODE (e); switch (code) { case PREINCREMENT_EXPR: case PREDECREMENT_EXPR: - pp_string (pp, code == PREINCREMENT_EXPR ? "++" : "--"); - pp_c_unary_expression (pp, TREE_OPERAND (e, 0)); + pp_string (this, code == PREINCREMENT_EXPR ? "++" : "--"); + unary_expression (TREE_OPERAND (e, 0)); break; case ADDR_EXPR: @@ -1735,53 +1740,53 @@ pp_c_unary_expression (c_pretty_printer *pp, tree e) case CONJ_EXPR: /* String literal are used by address. */ if (code == ADDR_EXPR && TREE_CODE (TREE_OPERAND (e, 0)) != STRING_CST) - pp_ampersand (pp); + pp_ampersand (this); else if (code == INDIRECT_REF) - pp_c_star (pp); + pp_c_star (this); else if (code == NEGATE_EXPR) - pp_minus (pp); + pp_minus (this); else if (code == BIT_NOT_EXPR || code == CONJ_EXPR) - pp_complement (pp); + pp_complement (this); else if (code == TRUTH_NOT_EXPR) - pp_exclamation (pp); - pp_c_cast_expression (pp, TREE_OPERAND (e, 0)); + pp_exclamation (this); + pp_c_cast_expression (this, TREE_OPERAND (e, 0)); break; case MEM_REF: if (TREE_CODE (TREE_OPERAND (e, 0)) == ADDR_EXPR && integer_zerop (TREE_OPERAND (e, 1))) - pp_c_expression (pp, TREE_OPERAND (TREE_OPERAND (e, 0), 0)); + expression (TREE_OPERAND (TREE_OPERAND (e, 0), 0)); else { - pp_c_star (pp); + pp_c_star (this); if (!integer_zerop (TREE_OPERAND (e, 1))) { - pp_c_left_paren (pp); + pp_c_left_paren (this); if (!integer_onep (TYPE_SIZE_UNIT (TREE_TYPE (TREE_TYPE (TREE_OPERAND (e, 0)))))) - pp_c_type_cast (pp, ptr_type_node); + pp_c_type_cast (this, ptr_type_node); } - pp_c_cast_expression (pp, TREE_OPERAND (e, 0)); + pp_c_cast_expression (this, TREE_OPERAND (e, 0)); if (!integer_zerop (TREE_OPERAND (e, 1))) { - pp_plus (pp); - pp_c_integer_constant (pp, + pp_plus (this); + pp_c_integer_constant (this, fold_convert (ssizetype, TREE_OPERAND (e, 1))); - pp_c_right_paren (pp); + pp_c_right_paren (this); } } break; case REALPART_EXPR: case IMAGPART_EXPR: - pp_c_ws_string (pp, code == REALPART_EXPR ? "__real__" : "__imag__"); - pp_c_whitespace (pp); - pp_unary_expression (pp, TREE_OPERAND (e, 0)); + pp_c_ws_string (this, code == REALPART_EXPR ? "__real__" : "__imag__"); + pp_c_whitespace (this); + unary_expression (TREE_OPERAND (e, 0)); break; default: - pp_postfix_expression (pp, e); + postfix_expression (e); break; } } @@ -1804,7 +1809,7 @@ pp_c_cast_expression (c_pretty_printer *pp, tree e) break; default: - pp_unary_expression (pp, e); + pp->unary_expression (e); } } @@ -1814,8 +1819,8 @@ pp_c_cast_expression (c_pretty_printer *pp, tree e) multiplicative-expression / cast-expression multiplicative-expression % cast-expression */ -static void -pp_c_multiplicative_expression (c_pretty_printer *pp, tree e) +void +c_pretty_printer::multiplicative_expression (tree e) { enum tree_code code = TREE_CODE (e); switch (code) @@ -1823,20 +1828,20 @@ pp_c_multiplicative_expression (c_pretty_printer *pp, tree e) case MULT_EXPR: case TRUNC_DIV_EXPR: case TRUNC_MOD_EXPR: - pp_multiplicative_expression (pp, TREE_OPERAND (e, 0)); - pp_c_whitespace (pp); + multiplicative_expression (TREE_OPERAND (e, 0)); + pp_c_whitespace (this); if (code == MULT_EXPR) - pp_c_star (pp); + pp_c_star (this); else if (code == TRUNC_DIV_EXPR) - pp_slash (pp); + pp_slash (this); else - pp_modulo (pp); - pp_c_whitespace (pp); - pp_c_cast_expression (pp, TREE_OPERAND (e, 1)); + pp_modulo (this); + pp_c_whitespace (this); + pp_c_cast_expression (this, TREE_OPERAND (e, 1)); break; default: - pp_c_cast_expression (pp, e); + pp_c_cast_expression (this, e); break; } } @@ -1862,11 +1867,11 @@ pp_c_additive_expression (c_pretty_printer *pp, tree e) else pp_minus (pp); pp_c_whitespace (pp); - pp_multiplicative_expression (pp, TREE_OPERAND (e, 1)); + pp->multiplicative_expression (TREE_OPERAND (e, 1)); break; default: - pp_multiplicative_expression (pp, e); + pp->multiplicative_expression (e); break; } } @@ -1920,9 +1925,9 @@ pp_c_relational_expression (c_pretty_printer *pp, tree e) else if (code == GT_EXPR) pp_greater (pp); else if (code == LE_EXPR) - pp_string (pp, "<="); + pp_less_equal (pp); else if (code == GE_EXPR) - pp_string (pp, ">="); + pp_greater_equal (pp); pp_c_whitespace (pp); pp_c_shift_expression (pp, TREE_OPERAND (e, 1)); break; @@ -2032,7 +2037,7 @@ pp_c_logical_and_expression (c_pretty_printer *pp, tree e) { pp_c_logical_and_expression (pp, TREE_OPERAND (e, 0)); pp_c_whitespace (pp); - pp_string (pp, "&&"); + pp_ampersand_ampersand (pp); pp_c_whitespace (pp); pp_c_inclusive_or_expression (pp, TREE_OPERAND (e, 1)); } @@ -2052,7 +2057,7 @@ pp_c_logical_or_expression (c_pretty_printer *pp, tree e) { pp_c_logical_or_expression (pp, TREE_OPERAND (e, 0)); pp_c_whitespace (pp); - pp_string (pp, "||"); + pp_bar_bar (pp); pp_c_whitespace (pp); pp_c_logical_and_expression (pp, TREE_OPERAND (e, 1)); } @@ -2064,23 +2069,23 @@ pp_c_logical_or_expression (c_pretty_printer *pp, tree e) logical-OR-expression logical-OR-expression ? expression : conditional-expression */ -static void -pp_c_conditional_expression (c_pretty_printer *pp, tree e) +void +c_pretty_printer::conditional_expression (tree e) { if (TREE_CODE (e) == COND_EXPR) { - pp_c_logical_or_expression (pp, TREE_OPERAND (e, 0)); - pp_c_whitespace (pp); - pp_question (pp); - pp_c_whitespace (pp); - pp_expression (pp, TREE_OPERAND (e, 1)); - pp_c_whitespace (pp); - pp_colon (pp); - pp_c_whitespace (pp); - pp_c_conditional_expression (pp, TREE_OPERAND (e, 2)); + pp_c_logical_or_expression (this, TREE_OPERAND (e, 0)); + pp_c_whitespace (this); + pp_question (this); + pp_c_whitespace (this); + expression (TREE_OPERAND (e, 1)); + pp_c_whitespace (this); + pp_colon (this); + pp_c_whitespace (this); + conditional_expression (TREE_OPERAND (e, 2)); } else - pp_c_logical_or_expression (pp, e); + pp_c_logical_or_expression (this, e); } @@ -2091,20 +2096,20 @@ pp_c_conditional_expression (c_pretty_printer *pp, tree e) assignment-expression: one of = *= /= %= += -= >>= <<= &= ^= |= */ -static void -pp_c_assignment_expression (c_pretty_printer *pp, tree e) +void +c_pretty_printer::assignment_expression (tree e) { if (TREE_CODE (e) == MODIFY_EXPR || TREE_CODE (e) == INIT_EXPR) { - pp_c_unary_expression (pp, TREE_OPERAND (e, 0)); - pp_c_whitespace (pp); - pp_equal (pp); - pp_space (pp); - pp_c_expression (pp, TREE_OPERAND (e, 1)); + unary_expression (TREE_OPERAND (e, 0)); + pp_c_whitespace (this); + pp_equal (this); + pp_space (this); + expression (TREE_OPERAND (e, 1)); } else - pp_c_conditional_expression (pp, e); + conditional_expression (e); } /* expression: @@ -2114,28 +2119,28 @@ pp_c_assignment_expression (c_pretty_printer *pp, tree e) Implementation note: instead of going through the usual recursion chain, I take the liberty of dispatching nodes to the appropriate functions. This makes some redundancy, but it worths it. That also - prevents a possible infinite recursion between pp_c_primary_expression () - and pp_c_expression (). */ + prevents a possible infinite recursion between primary_expression () + and expression (). */ void -pp_c_expression (c_pretty_printer *pp, tree e) +c_pretty_printer::expression (tree e) { switch (TREE_CODE (e)) { case INTEGER_CST: - pp_c_integer_constant (pp, e); + pp_c_integer_constant (this, e); break; case REAL_CST: - pp_c_floating_constant (pp, e); + pp_c_floating_constant (this, e); break; case FIXED_CST: - pp_c_fixed_constant (pp, e); + pp_c_fixed_constant (this, e); break; case STRING_CST: - pp_c_string_literal (pp, e); + pp_c_string_literal (this, e); break; case IDENTIFIER_NODE: @@ -2147,15 +2152,15 @@ pp_c_expression (c_pretty_printer *pp, tree e) case FIELD_DECL: case LABEL_DECL: case ERROR_MARK: - pp_primary_expression (pp, e); + primary_expression (e); break; case SSA_NAME: if (SSA_NAME_VAR (e) && !DECL_ARTIFICIAL (SSA_NAME_VAR (e))) - pp_c_expression (pp, SSA_NAME_VAR (e)); + expression (SSA_NAME_VAR (e)); else - pp_c_ws_string (pp, M_("")); + translate_string (""); break; case POSTINCREMENT_EXPR: @@ -2180,7 +2185,7 @@ pp_c_expression (c_pretty_printer *pp, tree e) case CONSTRUCTOR: case COMPOUND_LITERAL_EXPR: case VA_ARG_EXPR: - pp_postfix_expression (pp, e); + postfix_expression (e); break; case CONJ_EXPR: @@ -2194,107 +2199,107 @@ pp_c_expression (c_pretty_printer *pp, tree e) case PREDECREMENT_EXPR: case REALPART_EXPR: case IMAGPART_EXPR: - pp_c_unary_expression (pp, e); + unary_expression (e); break; case FLOAT_EXPR: case FIX_TRUNC_EXPR: CASE_CONVERT: case VIEW_CONVERT_EXPR: - pp_c_cast_expression (pp, e); + pp_c_cast_expression (this, e); break; case MULT_EXPR: case TRUNC_MOD_EXPR: case TRUNC_DIV_EXPR: - pp_multiplicative_expression (pp, e); + multiplicative_expression (e); break; case LSHIFT_EXPR: case RSHIFT_EXPR: - pp_c_shift_expression (pp, e); + pp_c_shift_expression (this, e); break; case LT_EXPR: case GT_EXPR: case LE_EXPR: case GE_EXPR: - pp_c_relational_expression (pp, e); + pp_c_relational_expression (this, e); break; case BIT_AND_EXPR: - pp_c_and_expression (pp, e); + pp_c_and_expression (this, e); break; case BIT_XOR_EXPR: case TRUTH_XOR_EXPR: - pp_c_exclusive_or_expression (pp, e); + pp_c_exclusive_or_expression (this, e); break; case BIT_IOR_EXPR: - pp_c_inclusive_or_expression (pp, e); + pp_c_inclusive_or_expression (this, e); break; case TRUTH_ANDIF_EXPR: case TRUTH_AND_EXPR: - pp_c_logical_and_expression (pp, e); + pp_c_logical_and_expression (this, e); break; case TRUTH_ORIF_EXPR: case TRUTH_OR_EXPR: - pp_c_logical_or_expression (pp, e); + pp_c_logical_or_expression (this, e); break; case EQ_EXPR: case NE_EXPR: - pp_c_equality_expression (pp, e); + pp_c_equality_expression (this, e); break; case COND_EXPR: - pp_conditional_expression (pp, e); + conditional_expression (e); break; case POINTER_PLUS_EXPR: case PLUS_EXPR: case MINUS_EXPR: - pp_c_additive_expression (pp, e); + pp_c_additive_expression (this, e); break; case MODIFY_EXPR: case INIT_EXPR: - pp_assignment_expression (pp, e); + assignment_expression (e); break; case COMPOUND_EXPR: - pp_c_left_paren (pp); - pp_expression (pp, TREE_OPERAND (e, 0)); - pp_separate_with (pp, ','); - pp_assignment_expression (pp, TREE_OPERAND (e, 1)); - pp_c_right_paren (pp); + pp_c_left_paren (this); + expression (TREE_OPERAND (e, 0)); + pp_separate_with (this, ','); + assignment_expression (TREE_OPERAND (e, 1)); + pp_c_right_paren (this); break; case NON_LVALUE_EXPR: case SAVE_EXPR: - pp_expression (pp, TREE_OPERAND (e, 0)); + expression (TREE_OPERAND (e, 0)); break; case TARGET_EXPR: - pp_postfix_expression (pp, TREE_OPERAND (e, 1)); + postfix_expression (TREE_OPERAND (e, 1)); break; case BIND_EXPR: case GOTO_EXPR: /* We don't yet have a way of dumping statements in a human-readable format. */ - pp_string (pp, "({...})"); + pp_string (this, "({...})"); break; case C_MAYBE_CONST_EXPR: - pp_c_expression (pp, C_MAYBE_CONST_EXPR_EXPR (e)); + expression (C_MAYBE_CONST_EXPR_EXPR (e)); break; default: - pp_unsupported_tree (pp, e); + pp_unsupported_tree (this, e); break; } } @@ -2304,53 +2309,28 @@ pp_c_expression (c_pretty_printer *pp, tree e) /* Statements. */ void -pp_c_statement (c_pretty_printer *pp, tree stmt) +c_pretty_printer::statement (tree stmt) { if (stmt == NULL) return; - if (pp_needs_newline (pp)) - pp_newline_and_indent (pp, 0); + if (pp_needs_newline (this)) + pp_newline_and_indent (this, 0); - dump_generic_node (pp_base (pp), stmt, pp_indentation (pp), 0, true); + dump_generic_node (this, stmt, pp_indentation (this), 0, true); } /* Initialize the PRETTY-PRINTER for handling C codes. */ -void -pp_c_pretty_printer_init (c_pretty_printer *pp) +c_pretty_printer::c_pretty_printer () + : pretty_printer (), + offset_list (), + flags () { - pp->offset_list = 0; - - pp->flags = 0; - - pp->declaration = pp_c_declaration; - pp->declaration_specifiers = pp_c_declaration_specifiers; - pp->declarator = pp_c_declarator; - pp->direct_declarator = pp_c_direct_declarator; - pp->type_specifier_seq = pp_c_specifier_qualifier_list; - pp->abstract_declarator = pp_c_abstract_declarator; - pp->direct_abstract_declarator = pp_c_direct_abstract_declarator; - pp->ptr_operator = pp_c_pointer; - pp->parameter_list = pp_c_parameter_type_list; - pp->type_id = pp_c_type_id; - pp->simple_type_specifier = pp_c_type_specifier; - pp->function_specifier = pp_c_function_specifier; - pp->storage_class_specifier = pp_c_storage_class_specifier; - - pp->statement = pp_c_statement; - - pp->constant = pp_c_constant; - pp->id_expression = pp_c_id_expression; - pp->primary_expression = pp_c_primary_expression; - pp->postfix_expression = pp_c_postfix_expression; - pp->unary_expression = pp_c_unary_expression; - pp->initializer = pp_c_initializer; - pp->multiplicative_expression = pp_c_multiplicative_expression; - pp->conditional_expression = pp_c_conditional_expression; - pp->assignment_expression = pp_c_assignment_expression; - pp->expression = pp_c_expression; + type_specifier_seq = pp_c_specifier_qualifier_list; + ptr_operator = pp_c_pointer; + parameter_list = pp_c_parameter_type_list; } @@ -2359,22 +2339,12 @@ pp_c_pretty_printer_init (c_pretty_printer *pp) void print_c_tree (FILE *file, tree t) { - static c_pretty_printer pp_rec; - static bool initialized = 0; - c_pretty_printer *pp = &pp_rec; + c_pretty_printer pp; - if (!initialized) - { - initialized = 1; - pp_construct (pp_base (pp), NULL, 0); - pp_c_pretty_printer_init (pp); - pp_needs_newline (pp) = true; - } - pp_base (pp)->buffer->stream = file; - - pp_statement (pp, t); - - pp_newline_and_flush (pp); + pp_needs_newline (&pp) = true; + pp.buffer->stream = file; + pp.statement (t); + pp_newline_and_flush (&pp); } /* Print the tree T in full, on stderr. */ diff --git a/gcc/c-family/c-pretty-print.h b/gcc/c-family/c-pretty-print.h index 04b72c49d1a..aa046e51dad 100644 --- a/gcc/c-family/c-pretty-print.h +++ b/gcc/c-family/c-pretty-print.h @@ -26,35 +26,58 @@ along with GCC; see the file COPYING3. If not see #include "pretty-print.h" -typedef enum +enum pp_c_pretty_print_flags { pp_c_flag_abstract = 1 << 1, pp_c_flag_gnu_v3 = 1 << 2, pp_c_flag_last_bit = 3 - } pp_c_pretty_print_flags; + }; /* The data type used to bundle information necessary for pretty-printing a C or C++ entity. */ -typedef struct c_pretty_print_info c_pretty_printer; +struct c_pretty_printer; /* The type of a C pretty-printer 'member' function. */ typedef void (*c_pretty_print_fn) (c_pretty_printer *, tree); /* The datatype that contains information necessary for pretty-printing a tree that represents a C construct. Any pretty-printer for a - language using C/c++ syntax can derive from this datatype and reuse - facilities provided here. It can do so by having a subobject of type - c_pretty_printer and override the macro pp_c_base to return a pointer - to that subobject. Such a pretty-printer has the responsibility to - initialize the pp_base() part, then call pp_c_pretty_printer_init - to set up the components that are specific to the C pretty-printer. - A derived pretty-printer can override any function listed in the - vtable below. See cp/cxx-pretty-print.h and cp/cxx-pretty-print.c - for an example of derivation. */ -struct c_pretty_print_info + language using C syntax can derive from this datatype and reuse + facilities provided here. A derived pretty-printer can override + any function listed in the vtable below. See cp/cxx-pretty-print.h + and cp/cxx-pretty-print.c for an example of derivation. */ +struct c_pretty_printer : pretty_printer { - pretty_printer base; + c_pretty_printer (); + + // Format string, possibly translated. + void translate_string (const char *); + + virtual void constant (tree); + virtual void id_expression (tree); + virtual void primary_expression (tree); + virtual void postfix_expression (tree); + virtual void unary_expression (tree); + virtual void multiplicative_expression (tree); + virtual void conditional_expression (tree); + virtual void assignment_expression (tree); + virtual void expression (tree); + + virtual void type_id (tree); + virtual void statement (tree); + + virtual void declaration (tree); + virtual void declaration_specifiers (tree); + virtual void simple_type_specifier (tree); + virtual void function_specifier (tree); + virtual void storage_class_specifier (tree); + virtual void declarator (tree); + virtual void direct_declarator (tree); + virtual void abstract_declarator (tree); + virtual void direct_abstract_declarator (tree); + + virtual void initializer (tree); /* Points to the first element of an array of offset-list. Not used yet. */ int *offset_list; @@ -63,100 +86,18 @@ struct c_pretty_print_info /* These must be overridden by each of the C and C++ front-end to reflect their understanding of syntactic productions when they differ. */ - c_pretty_print_fn declaration; - c_pretty_print_fn declaration_specifiers; - c_pretty_print_fn declarator; - c_pretty_print_fn abstract_declarator; - c_pretty_print_fn direct_abstract_declarator; c_pretty_print_fn type_specifier_seq; - c_pretty_print_fn direct_declarator; c_pretty_print_fn ptr_operator; c_pretty_print_fn parameter_list; - c_pretty_print_fn type_id; - c_pretty_print_fn simple_type_specifier; - c_pretty_print_fn function_specifier; - c_pretty_print_fn storage_class_specifier; - c_pretty_print_fn initializer; - - c_pretty_print_fn statement; - - c_pretty_print_fn constant; - c_pretty_print_fn id_expression; - c_pretty_print_fn primary_expression; - c_pretty_print_fn postfix_expression; - c_pretty_print_fn unary_expression; - c_pretty_print_fn multiplicative_expression; - c_pretty_print_fn conditional_expression; - c_pretty_print_fn assignment_expression; - c_pretty_print_fn expression; }; -/* Override the pp_base macro. Derived pretty-printers should not - touch this macro. Instead they should override pp_c_base instead. */ -#undef pp_base -#define pp_base(PP) (&pp_c_base (PP)->base) - - #define pp_c_tree_identifier(PPI, ID) \ pp_c_identifier (PPI, IDENTIFIER_POINTER (ID)) -#define pp_declaration(PPI, T) \ - pp_c_base (PPI)->declaration (pp_c_base (PPI), T) -#define pp_declaration_specifiers(PPI, D) \ - pp_c_base (PPI)->declaration_specifiers (pp_c_base (PPI), D) -#define pp_abstract_declarator(PP, D) \ - pp_c_base (PP)->abstract_declarator (pp_c_base (PP), D) -#define pp_type_specifier_seq(PPI, D) \ - pp_c_base (PPI)->type_specifier_seq (pp_c_base (PPI), D) -#define pp_declarator(PPI, D) \ - pp_c_base (PPI)->declarator (pp_c_base (PPI), D) -#define pp_direct_declarator(PPI, D) \ - pp_c_base (PPI)->direct_declarator (pp_c_base (PPI), D) -#define pp_direct_abstract_declarator(PP, D) \ - pp_c_base (PP)->direct_abstract_declarator (pp_c_base (PP), D) -#define pp_ptr_operator(PP, D) \ - pp_c_base (PP)->ptr_operator (pp_c_base (PP), D) -#define pp_parameter_list(PPI, T) \ - pp_c_base (PPI)->parameter_list (pp_c_base (PPI), T) -#define pp_type_id(PPI, D) \ - pp_c_base (PPI)->type_id (pp_c_base (PPI), D) -#define pp_simple_type_specifier(PP, T) \ - pp_c_base (PP)->simple_type_specifier (pp_c_base (PP), T) -#define pp_function_specifier(PP, D) \ - pp_c_base (PP)->function_specifier (pp_c_base (PP), D) -#define pp_storage_class_specifier(PP, D) \ - pp_c_base (PP)->storage_class_specifier (pp_c_base (PP), D); - -#define pp_statement(PPI, S) \ - pp_c_base (PPI)->statement (pp_c_base (PPI), S) - -#define pp_constant(PP, E) \ - pp_c_base (PP)->constant (pp_c_base (PP), E) -#define pp_id_expression(PP, E) \ - pp_c_base (PP)->id_expression (pp_c_base (PP), E) -#define pp_primary_expression(PPI, E) \ - pp_c_base (PPI)->primary_expression (pp_c_base (PPI), E) -#define pp_postfix_expression(PPI, E) \ - pp_c_base (PPI)->postfix_expression (pp_c_base (PPI), E) -#define pp_unary_expression(PPI, E) \ - pp_c_base (PPI)->unary_expression (pp_c_base (PPI), E) -#define pp_initializer(PPI, E) \ - pp_c_base (PPI)->initializer (pp_c_base (PPI), E) -#define pp_multiplicative_expression(PPI, E) \ - pp_c_base (PPI)->multiplicative_expression (pp_c_base (PPI), E) -#define pp_conditional_expression(PPI, E) \ - pp_c_base (PPI)->conditional_expression (pp_c_base (PPI), E) -#define pp_assignment_expression(PPI, E) \ - pp_c_base (PPI)->assignment_expression (pp_c_base (PPI), E) -#define pp_expression(PP, E) \ - pp_c_base (PP)->expression (pp_c_base (PP), E) - - -/* Returns the c_pretty_printer base object of PRETTY-PRINTER. This - macro must be overridden by any subclass of c_pretty_print_info. */ -#define pp_c_base(PP) (PP) - -extern void pp_c_pretty_printer_init (c_pretty_printer *); +#define pp_type_specifier_seq(PP, D) (PP)->type_specifier_seq (PP, D) +#define pp_ptr_operator(PP, D) (PP)->ptr_operator (PP, D) +#define pp_parameter_list(PP, T) (PP)->parameter_list (PP, T) + void pp_c_whitespace (c_pretty_printer *); void pp_c_left_paren (c_pretty_printer *); void pp_c_right_paren (c_pretty_printer *); @@ -181,31 +122,14 @@ void pp_c_attributes_display (c_pretty_printer *, tree); void pp_c_cv_qualifiers (c_pretty_printer *pp, int qualifiers, bool func_type); void pp_c_type_qualifier_list (c_pretty_printer *, tree); void pp_c_parameter_type_list (c_pretty_printer *, tree); -void pp_c_declaration (c_pretty_printer *, tree); -void pp_c_declaration_specifiers (c_pretty_printer *, tree); -void pp_c_declarator (c_pretty_printer *, tree); -void pp_c_direct_declarator (c_pretty_printer *, tree); void pp_c_specifier_qualifier_list (c_pretty_printer *, tree); -void pp_c_function_specifier (c_pretty_printer *, tree); -void pp_c_type_id (c_pretty_printer *, tree); -void pp_c_direct_abstract_declarator (c_pretty_printer *, tree); -void pp_c_type_specifier (c_pretty_printer *, tree); -void pp_c_storage_class_specifier (c_pretty_printer *, tree); -/* Statements. */ -void pp_c_statement (c_pretty_printer *, tree); /* Expressions. */ -void pp_c_expression (c_pretty_printer *, tree); void pp_c_logical_or_expression (c_pretty_printer *, tree); void pp_c_expression_list (c_pretty_printer *, tree); void pp_c_constructor_elts (c_pretty_printer *, vec *); void pp_c_call_argument_list (c_pretty_printer *, tree); -void pp_c_unary_expression (c_pretty_printer *, tree); void pp_c_cast_expression (c_pretty_printer *, tree); -void pp_c_postfix_expression (c_pretty_printer *, tree); -void pp_c_primary_expression (c_pretty_printer *, tree); void pp_c_init_declarator (c_pretty_printer *, tree); -void pp_c_constant (c_pretty_printer *, tree); -void pp_c_id_expression (c_pretty_printer *, tree); void pp_c_ws_string (c_pretty_printer *, const char *); void pp_c_identifier (c_pretty_printer *, const char *); void pp_c_string_literal (c_pretty_printer *, tree); diff --git a/gcc/c-family/c-ubsan.c b/gcc/c-family/c-ubsan.c new file mode 100644 index 00000000000..9f43f6d55b8 --- /dev/null +++ b/gcc/c-family/c-ubsan.c @@ -0,0 +1,158 @@ +/* UndefinedBehaviorSanitizer, undefined behavior detector. + Copyright (C) 2013 Free Software Foundation, Inc. + Contributed by Marek Polacek + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +. */ + +#include "config.h" +#include "system.h" +#include "coretypes.h" +#include "tree.h" +#include "alloc-pool.h" +#include "cgraph.h" +#include "gimple.h" +#include "hash-table.h" +#include "output.h" +#include "toplev.h" +#include "ubsan.h" +#include "c-family/c-common.h" +#include "c-family/c-ubsan.h" + +/* Instrument division by zero and INT_MIN / -1. If not instrumenting, + return NULL_TREE. */ + +tree +ubsan_instrument_division (location_t loc, tree op0, tree op1) +{ + tree t, tt; + tree type = TREE_TYPE (op0); + + /* At this point both operands should have the same type, + because they are already converted to RESULT_TYPE. + Use TYPE_MAIN_VARIANT since typedefs can confuse us. */ + gcc_assert (TYPE_MAIN_VARIANT (TREE_TYPE (op0)) + == TYPE_MAIN_VARIANT (TREE_TYPE (op1))); + + /* TODO: REAL_TYPE is not supported yet. */ + if (TREE_CODE (type) != INTEGER_TYPE) + return NULL_TREE; + + /* If we *know* that the divisor is not -1 or 0, we don't have to + instrument this expression. + ??? We could use decl_constant_value to cover up more cases. */ + if (TREE_CODE (op1) == INTEGER_CST + && integer_nonzerop (op1) + && !integer_minus_onep (op1)) + return NULL_TREE; + + t = fold_build2 (EQ_EXPR, boolean_type_node, + op1, build_int_cst (type, 0)); + + /* We check INT_MIN / -1 only for signed types. */ + if (!TYPE_UNSIGNED (type)) + { + tree x; + tt = fold_build2 (EQ_EXPR, boolean_type_node, op1, + build_int_cst (type, -1)); + x = fold_build2 (EQ_EXPR, boolean_type_node, op0, + TYPE_MIN_VALUE (type)); + x = fold_build2 (TRUTH_AND_EXPR, boolean_type_node, x, tt); + t = fold_build2 (TRUTH_OR_EXPR, boolean_type_node, t, x); + } + + /* In case we have a SAVE_EXPR in a conditional context, we need to + make sure it gets evaluated before the condition. */ + t = fold_build2 (COMPOUND_EXPR, TREE_TYPE (t), op0, t); + tree data = ubsan_create_data ("__ubsan_overflow_data", + loc, ubsan_type_descriptor (type), + NULL_TREE); + data = build_fold_addr_expr_loc (loc, data); + tt = builtin_decl_explicit (BUILT_IN_UBSAN_HANDLE_DIVREM_OVERFLOW); + tt = build_call_expr_loc (loc, tt, 3, data, ubsan_encode_value (op0), + ubsan_encode_value (op1)); + t = fold_build3 (COND_EXPR, void_type_node, t, tt, void_zero_node); + + return t; +} + +/* Instrument left and right shifts. If not instrumenting, return + NULL_TREE. */ + +tree +ubsan_instrument_shift (location_t loc, enum tree_code code, + tree op0, tree op1) +{ + tree t, tt = NULL_TREE; + tree type0 = TREE_TYPE (op0); + tree type1 = TREE_TYPE (op1); + tree op1_utype = unsigned_type_for (type1); + HOST_WIDE_INT op0_prec = TYPE_PRECISION (type0); + tree uprecm1 = build_int_cst (op1_utype, op0_prec - 1); + tree precm1 = build_int_cst (type1, op0_prec - 1); + + t = fold_convert_loc (loc, op1_utype, op1); + t = fold_build2 (GT_EXPR, boolean_type_node, t, uprecm1); + + /* For signed x << y, in C99/C11, the following: + (unsigned) x >> (precm1 - y) + if non-zero, is undefined. */ + if (code == LSHIFT_EXPR + && !TYPE_UNSIGNED (type0) + && flag_isoc99) + { + tree x = fold_build2 (MINUS_EXPR, integer_type_node, precm1, op1); + tt = fold_convert_loc (loc, unsigned_type_for (type0), op0); + tt = fold_build2 (RSHIFT_EXPR, TREE_TYPE (tt), tt, x); + tt = fold_build2 (NE_EXPR, boolean_type_node, tt, + build_int_cst (TREE_TYPE (tt), 0)); + } + + /* For signed x << y, in C++11/C++14, the following: + x < 0 || ((unsigned) x >> (precm1 - y)) + if > 1, is undefined. */ + if (code == LSHIFT_EXPR + && !TYPE_UNSIGNED (TREE_TYPE (op0)) + && (cxx_dialect == cxx11 || cxx_dialect == cxx1y)) + { + tree x = fold_build2 (MINUS_EXPR, integer_type_node, precm1, op1); + tt = fold_convert_loc (loc, unsigned_type_for (type0), op0); + tt = fold_build2 (RSHIFT_EXPR, TREE_TYPE (tt), tt, x); + tt = fold_build2 (GT_EXPR, boolean_type_node, tt, + build_int_cst (TREE_TYPE (tt), 1)); + x = fold_build2 (LT_EXPR, boolean_type_node, op0, + build_int_cst (type0, 0)); + tt = fold_build2 (TRUTH_OR_EXPR, boolean_type_node, x, tt); + } + + /* In case we have a SAVE_EXPR in a conditional context, we need to + make sure it gets evaluated before the condition. */ + t = fold_build2 (COMPOUND_EXPR, TREE_TYPE (t), op0, t); + tree data = ubsan_create_data ("__ubsan_shift_data", + loc, ubsan_type_descriptor (type0), + ubsan_type_descriptor (type1), NULL_TREE); + + data = build_fold_addr_expr_loc (loc, data); + + t = fold_build2 (TRUTH_OR_EXPR, boolean_type_node, t, + tt ? tt : integer_zero_node); + tt = builtin_decl_explicit (BUILT_IN_UBSAN_HANDLE_SHIFT_OUT_OF_BOUNDS); + tt = build_call_expr_loc (loc, tt, 3, data, ubsan_encode_value (op0), + ubsan_encode_value (op1)); + t = fold_build3 (COND_EXPR, void_type_node, t, tt, void_zero_node); + + return t; +} diff --git a/gcc/c-family/c-ubsan.h b/gcc/c-family/c-ubsan.h new file mode 100644 index 00000000000..b032b707cc3 --- /dev/null +++ b/gcc/c-family/c-ubsan.h @@ -0,0 +1,27 @@ +/* UndefinedBehaviorSanitizer, undefined behavior detector. + Copyright (C) 2013 Free Software Foundation, Inc. + Contributed by Marek Polacek + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +. */ + +#ifndef GCC_C_UBSAN_H +#define GCC_C_UBSAN_H + +extern tree ubsan_instrument_division (location_t, tree, tree); +extern tree ubsan_instrument_shift (location_t, enum tree_code, tree, tree); + +#endif /* GCC_C_UBSAN_H */ diff --git a/gcc/c/ChangeLog b/gcc/c/ChangeLog index e2df48f93ea..1b4b2977294 100644 --- a/gcc/c/ChangeLog +++ b/gcc/c/ChangeLog @@ -1,3 +1,47 @@ +2013-09-08 Joern Rennecke + + * c-typeck.c (build_binary_op): Use vector_types_compatible_elements_p. + +2013-09-03 Gabriel Dos Reis + + * c-objc-common.c (c_tree_printer): Tidy. + +2013-08-30 Marek Polacek + + * c-typeck.c (build_binary_op): Add division by zero and shift + instrumentation. + +2013-08-26 Joern Rennecke + Joseph Myers + + PR c/35649 + * c-typeck.c (c_common_type): Prefer double_type_node over + other REAL_TYPE types with the same precision. + (convert_arguments): Likewise. + +2013-08-23 Gabriel Dos Reis + + * c-objc-common.c (c_tree_printer): Document the nature of the cast. + (c_initialize_diagnostics): Call a destructor for the early printer. + +2013-08-22 Gabriel Dos Reis + + * c-objc-common.c (c_initialize_diagnostics): Simplify C pretty + printer initialization. + +2013-08-19 Balaji V. Iyer + + PR c/57490 + * c-array-notation.c (fix_conditional_array_notations_1): Added a + check for truth values. + (expand_array_notation_exprs): Added truth values case. Removed an + unwanted else. Added for-loop to walk through subtrees in default + case. + +2013-08-04 Gabriel Dos Reis + + * c-objc-common.c (c_initialize_diagnostics): Don't call pp_base. + 2013-07-23 Joseph Myers * c-parser.c (struct c_generic_association): Fix typo. diff --git a/gcc/c/c-array-notation.c b/gcc/c/c-array-notation.c index 7788f7bf145..5747bcb5ca8 100644 --- a/gcc/c/c-array-notation.c +++ b/gcc/c/c-array-notation.c @@ -906,6 +906,8 @@ fix_conditional_array_notations_1 (tree stmt) cond = COND_EXPR_COND (stmt); else if (TREE_CODE (stmt) == SWITCH_EXPR) cond = SWITCH_COND (stmt); + else if (truth_value_p (TREE_CODE (stmt))) + cond = TREE_OPERAND (stmt, 0); else /* Otherwise dont even touch the statement. */ return stmt; @@ -1232,6 +1234,12 @@ expand_array_notation_exprs (tree t) case BIND_EXPR: t = expand_array_notation_exprs (BIND_EXPR_BODY (t)); return t; + case TRUTH_ORIF_EXPR: + case TRUTH_ANDIF_EXPR: + case TRUTH_OR_EXPR: + case TRUTH_AND_EXPR: + case TRUTH_XOR_EXPR: + case TRUTH_NOT_EXPR: case COND_EXPR: t = fix_conditional_array_notations (t); @@ -1246,8 +1254,6 @@ expand_array_notation_exprs (tree t) COND_EXPR_ELSE (t) = expand_array_notation_exprs (COND_EXPR_ELSE (t)); } - else - t = expand_array_notation_exprs (t); return t; case STATEMENT_LIST: { @@ -1284,6 +1290,10 @@ expand_array_notation_exprs (tree t) Replace those with just void zero node. */ t = void_zero_node; default: + for (int ii = 0; ii < TREE_CODE_LENGTH (TREE_CODE (t)); ii++) + if (contains_array_notation_expr (TREE_OPERAND (t, ii))) + TREE_OPERAND (t, ii) = + expand_array_notation_exprs (TREE_OPERAND (t, ii)); return t; } return t; diff --git a/gcc/c/c-objc-common.c b/gcc/c/c-objc-common.c index 8e73856ac18..e6be6ac89fd 100644 --- a/gcc/c/c-objc-common.c +++ b/gcc/c/c-objc-common.c @@ -30,6 +30,8 @@ along with GCC; see the file COPYING3. If not see #include "langhooks.h" #include "c-objc-common.h" +#include // For placement new. + static bool c_tree_printer (pretty_printer *, text_info *, const char *, int, bool, bool, bool); @@ -90,6 +92,7 @@ c_tree_printer (pretty_printer *pp, text_info *text, const char *spec, { tree t = NULL_TREE; tree name; + // FIXME: the next cast should be a dynamic_cast, when it is permitted. c_pretty_printer *cpp = (c_pretty_printer *) pp; pp->padding = pp_none; @@ -117,7 +120,7 @@ c_tree_printer (pretty_printer *pp, text_info *text, const char *spec, t = DECL_DEBUG_EXPR (t); if (!DECL_P (t)) { - pp_c_expression (cpp, t); + cpp->expression (t); return true; } } @@ -140,12 +143,12 @@ c_tree_printer (pretty_printer *pp, text_info *text, const char *spec, if (DECL_NAME (name)) pp_identifier (cpp, lang_hooks.decl_printable_name (name, 2)); else - pp_type_id (cpp, t); + cpp->type_id (t); return true; } else { - pp_type_id (cpp, t); + cpp->type_id (t); return true; } break; @@ -154,7 +157,7 @@ c_tree_printer (pretty_printer *pp, text_info *text, const char *spec, if (TREE_CODE (t) == IDENTIFIER_NODE) pp_identifier (cpp, IDENTIFIER_POINTER (t)); else - pp_expression (cpp, t); + cpp->expression (t); return true; case 'V': @@ -183,18 +186,14 @@ has_c_linkage (const_tree decl ATTRIBUTE_UNUSED) void c_initialize_diagnostics (diagnostic_context *context) { - pretty_printer *base; - c_pretty_printer *pp; - c_common_initialize_diagnostics (context); - base = context->printer; - pp = XNEW (c_pretty_printer); - memcpy (pp_base (pp), base, sizeof (pretty_printer)); - pp_c_pretty_printer_init (pp); - context->printer = (pretty_printer *) pp; + pretty_printer *base = context->printer; + c_pretty_printer *pp = XNEW (c_pretty_printer); + context->printer = new (pp) c_pretty_printer (); /* It is safe to free this object because it was previously XNEW()'d. */ + base->~pretty_printer (); XDELETE (base); } diff --git a/gcc/c/c-typeck.c b/gcc/c/c-typeck.c index 30871db3623..e52533ecd6d 100644 --- a/gcc/c/c-typeck.c +++ b/gcc/c/c-typeck.c @@ -39,6 +39,7 @@ along with GCC; see the file COPYING3. If not see #include "gimple.h" #include "c-family/c-objc.h" #include "c-family/c-common.h" +#include "c-family/c-ubsan.h" /* Possible cases of implicit bad conversions. Used to select diagnostic messages in convert_for_assignment. */ @@ -919,6 +920,13 @@ c_common_type (tree t1, tree t2) || TYPE_MAIN_VARIANT (t2) == long_double_type_node) return long_double_type_node; + /* Likewise, prefer double to float even if same size. + We got a couple of embedded targets with 32 bit doubles, and the + pdp11 might have 64 bit floats. */ + if (TYPE_MAIN_VARIANT (t1) == double_type_node + || TYPE_MAIN_VARIANT (t2) == double_type_node) + return double_type_node; + /* Otherwise prefer the unsigned one. */ if (TYPE_UNSIGNED (t1)) @@ -3156,7 +3164,9 @@ convert_arguments (tree typelist, vec *values, } else if (TREE_CODE (valtype) == REAL_TYPE && (TYPE_PRECISION (valtype) - < TYPE_PRECISION (double_type_node)) + <= TYPE_PRECISION (double_type_node)) + && TYPE_MAIN_VARIANT (valtype) != double_type_node + && TYPE_MAIN_VARIANT (valtype) != long_double_type_node && !DECIMAL_FLOAT_MODE_P (TYPE_MODE (valtype))) { if (type_generic) @@ -9532,6 +9542,15 @@ build_binary_op (location_t location, enum tree_code code, operands to truth-values. */ bool boolean_op = false; + /* Remember whether we're doing / or %. */ + bool doing_div_or_mod = false; + + /* Remember whether we're doing << or >>. */ + bool doing_shift = false; + + /* Tree holding instrumentation expression. */ + tree instrument_expr = NULL; + if (location == UNKNOWN_LOCATION) location = input_location; @@ -9733,6 +9752,7 @@ build_binary_op (location_t location, enum tree_code code, case FLOOR_DIV_EXPR: case ROUND_DIV_EXPR: case EXACT_DIV_EXPR: + doing_div_or_mod = true; warn_for_div_by_zero (location, op1); if ((code0 == INTEGER_TYPE || code0 == REAL_TYPE @@ -9780,6 +9800,7 @@ build_binary_op (location_t location, enum tree_code code, case TRUNC_MOD_EXPR: case FLOOR_MOD_EXPR: + doing_div_or_mod = true; warn_for_div_by_zero (location, op1); if (code0 == VECTOR_TYPE && code1 == VECTOR_TYPE @@ -9878,6 +9899,7 @@ build_binary_op (location_t location, enum tree_code code, else if ((code0 == INTEGER_TYPE || code0 == FIXED_POINT_TYPE) && code1 == INTEGER_TYPE) { + doing_shift = true; if (TREE_CODE (op1) == INTEGER_CST) { if (tree_int_cst_sgn (op1) < 0) @@ -9930,6 +9952,7 @@ build_binary_op (location_t location, enum tree_code code, else if ((code0 == INTEGER_TYPE || code0 == FIXED_POINT_TYPE) && code1 == INTEGER_TYPE) { + doing_shift = true; if (TREE_CODE (op1) == INTEGER_CST) { if (tree_int_cst_sgn (op1) < 0) @@ -9964,7 +9987,7 @@ build_binary_op (location_t location, enum tree_code code, if (code0 == VECTOR_TYPE && code1 == VECTOR_TYPE) { tree intt; - if (TREE_TYPE (type0) != TREE_TYPE (type1)) + if (!vector_types_compatible_elements_p (type0, type1)) { error_at (location, "comparing vectors with different " "element types"); @@ -10101,7 +10124,7 @@ build_binary_op (location_t location, enum tree_code code, if (code0 == VECTOR_TYPE && code1 == VECTOR_TYPE) { tree intt; - if (TREE_TYPE (type0) != TREE_TYPE (type1)) + if (!vector_types_compatible_elements_p (type0, type1)) { error_at (location, "comparing vectors with different " "element types"); @@ -10207,8 +10230,7 @@ build_binary_op (location_t location, enum tree_code code, if (code0 == VECTOR_TYPE && code1 == VECTOR_TYPE && (!tree_int_cst_equal (TYPE_SIZE (type0), TYPE_SIZE (type1)) - || !same_scalar_type_ignoring_signedness (TREE_TYPE (type0), - TREE_TYPE (type1)))) + || !vector_types_compatible_elements_p (type0, type1))) { binary_op_error (location, code, type0, type1); return error_mark_node; @@ -10474,6 +10496,21 @@ build_binary_op (location_t location, enum tree_code code, return error_mark_node; } + if (flag_sanitize & SANITIZE_UNDEFINED + && current_function_decl != 0 + && (doing_div_or_mod || doing_shift)) + { + /* OP0 and/or OP1 might have side-effects. */ + op0 = c_save_expr (op0); + op1 = c_save_expr (op1); + op0 = c_fully_fold (op0, false, NULL); + op1 = c_fully_fold (op1, false, NULL); + if (doing_div_or_mod) + instrument_expr = ubsan_instrument_division (location, op0, op1); + else if (doing_shift) + instrument_expr = ubsan_instrument_shift (location, code, op0, op1); + } + /* Treat expressions in initializers specially as they can't trap. */ if (int_const_or_overflow) ret = (require_constant_value @@ -10497,6 +10534,11 @@ build_binary_op (location_t location, enum tree_code code, if (semantic_result_type) ret = build1 (EXCESS_PRECISION_EXPR, semantic_result_type, ret); protected_set_expr_location (ret, location); + + if ((flag_sanitize & SANITIZE_UNDEFINED) && instrument_expr != NULL) + ret = fold_build2 (COMPOUND_EXPR, TREE_TYPE (ret), + instrument_expr, ret); + return ret; } diff --git a/gcc/cfg.c b/gcc/cfg.c index 9c6c939139c..cfada7395db 100644 --- a/gcc/cfg.c +++ b/gcc/cfg.c @@ -446,6 +446,21 @@ check_bb_profile (basic_block bb, FILE * file, int indent, int flags) (flags & TDF_COMMENT) ? ";; " : "", s_indent, (int) lsum, (int) bb->count); } + if (BB_PARTITION (bb) == BB_COLD_PARTITION) + { + /* Warn about inconsistencies in the partitioning that are + currently caused by profile insanities created via optimization. */ + if (!probably_never_executed_bb_p (fun, bb)) + fprintf (file, "%s%sBlock in cold partition with hot count\n", + (flags & TDF_COMMENT) ? ";; " : "", s_indent); + FOR_EACH_EDGE (e, ei, bb->preds) + { + if (!probably_never_executed_edge_p (fun, e)) + fprintf (file, + "%s%sBlock in cold partition with incoming hot edge\n", + (flags & TDF_COMMENT) ? ";; " : "", s_indent); + } + } } void diff --git a/gcc/cfganal.c b/gcc/cfganal.c index 63d17cede2b..c4ea7dd0a1f 100644 --- a/gcc/cfganal.c +++ b/gcc/cfganal.c @@ -340,6 +340,120 @@ verify_edge_list (FILE *f, struct edge_list *elist) } } + +/* Functions to compute control dependences. */ + +/* Indicate block BB is control dependent on an edge with index EDGE_INDEX. */ +void +control_dependences::set_control_dependence_map_bit (basic_block bb, + int edge_index) +{ + if (bb == ENTRY_BLOCK_PTR) + return; + gcc_assert (bb != EXIT_BLOCK_PTR); + bitmap_set_bit (control_dependence_map[bb->index], edge_index); +} + +/* Clear all control dependences for block BB. */ +void +control_dependences::clear_control_dependence_bitmap (basic_block bb) +{ + bitmap_clear (control_dependence_map[bb->index]); +} + +/* Find the immediate postdominator PDOM of the specified basic block BLOCK. + This function is necessary because some blocks have negative numbers. */ + +static inline basic_block +find_pdom (basic_block block) +{ + gcc_assert (block != ENTRY_BLOCK_PTR); + + if (block == EXIT_BLOCK_PTR) + return EXIT_BLOCK_PTR; + else + { + basic_block bb = get_immediate_dominator (CDI_POST_DOMINATORS, block); + if (! bb) + return EXIT_BLOCK_PTR; + return bb; + } +} + +/* Determine all blocks' control dependences on the given edge with edge_list + EL index EDGE_INDEX, ala Morgan, Section 3.6. */ + +void +control_dependences::find_control_dependence (int edge_index) +{ + basic_block current_block; + basic_block ending_block; + + gcc_assert (INDEX_EDGE_PRED_BB (el, edge_index) != EXIT_BLOCK_PTR); + + if (INDEX_EDGE_PRED_BB (el, edge_index) == ENTRY_BLOCK_PTR) + ending_block = single_succ (ENTRY_BLOCK_PTR); + else + ending_block = find_pdom (INDEX_EDGE_PRED_BB (el, edge_index)); + + for (current_block = INDEX_EDGE_SUCC_BB (el, edge_index); + current_block != ending_block && current_block != EXIT_BLOCK_PTR; + current_block = find_pdom (current_block)) + { + edge e = INDEX_EDGE (el, edge_index); + + /* For abnormal edges, we don't make current_block control + dependent because instructions that throw are always necessary + anyway. */ + if (e->flags & EDGE_ABNORMAL) + continue; + + set_control_dependence_map_bit (current_block, edge_index); + } +} + +/* Record all blocks' control dependences on all edges in the edge + list EL, ala Morgan, Section 3.6. */ + +control_dependences::control_dependences (struct edge_list *edges) + : el (edges) +{ + timevar_push (TV_CONTROL_DEPENDENCES); + control_dependence_map.create (last_basic_block); + for (int i = 0; i < last_basic_block; ++i) + control_dependence_map.quick_push (BITMAP_ALLOC (NULL)); + for (int i = 0; i < NUM_EDGES (el); ++i) + find_control_dependence (i); + timevar_pop (TV_CONTROL_DEPENDENCES); +} + +/* Free control dependences and the associated edge list. */ + +control_dependences::~control_dependences () +{ + for (unsigned i = 0; i < control_dependence_map.length (); ++i) + BITMAP_FREE (control_dependence_map[i]); + control_dependence_map.release (); + free_edge_list (el); +} + +/* Returns the bitmap of edges the basic-block I is dependent on. */ + +bitmap +control_dependences::get_edges_dependent_on (int i) +{ + return control_dependence_map[i]; +} + +/* Returns the edge with index I from the edge list. */ + +edge +control_dependences::get_edge (int i) +{ + return INDEX_EDGE (el, i); +} + + /* Given PRED and SUCC blocks, return the edge which connects the blocks. If no such edge exists, return NULL. */ diff --git a/gcc/cfgcleanup.c b/gcc/cfgcleanup.c index 99e0baa756e..6836a9e6e84 100644 --- a/gcc/cfgcleanup.c +++ b/gcc/cfgcleanup.c @@ -1137,7 +1137,7 @@ old_insns_match_p (int mode ATTRIBUTE_UNUSED, rtx i1, rtx i2) /* For address sanitizer, never crossjump __asan_report_* builtins, otherwise errors might be reported on incorrect lines. */ - if (flag_asan) + if (flag_sanitize & SANITIZE_ADDRESS) { rtx call = get_call_rtx_from (i1); if (call && GET_CODE (XEXP (XEXP (call, 0), 0)) == SYMBOL_REF) @@ -2807,10 +2807,21 @@ try_optimize_cfg (int mode) df_analyze (); } -#ifdef ENABLE_CHECKING if (changed) - verify_flow_info (); + { + /* Edge forwarding in particular can cause hot blocks previously + reached by both hot and cold blocks to become dominated only + by cold blocks. This will cause the verification below to fail, + and lead to now cold code in the hot section. This is not easy + to detect and fix during edge forwarding, and in some cases + is only visible after newly unreachable blocks are deleted, + which will be done in fixup_partitions. */ + fixup_partitions (); + +#ifdef ENABLE_CHECKING + verify_flow_info (); #endif + } changed_overall |= changed; first_pass = false; @@ -3040,25 +3051,42 @@ execute_jump (void) return 0; } -struct rtl_opt_pass pass_jump = +namespace { + +const pass_data pass_data_jump = { - { - RTL_PASS, - "jump", /* name */ - OPTGROUP_NONE, /* optinfo_flags */ - NULL, /* gate */ - execute_jump, /* execute */ - NULL, /* sub */ - NULL, /* next */ - 0, /* static_pass_number */ - TV_JUMP, /* tv_id */ - 0, /* properties_required */ - 0, /* properties_provided */ - 0, /* properties_destroyed */ - 0, /* todo_flags_start */ - TODO_verify_rtl_sharing, /* todo_flags_finish */ - } + RTL_PASS, /* type */ + "jump", /* name */ + OPTGROUP_NONE, /* optinfo_flags */ + false, /* has_gate */ + true, /* has_execute */ + TV_JUMP, /* tv_id */ + 0, /* properties_required */ + 0, /* properties_provided */ + 0, /* properties_destroyed */ + 0, /* todo_flags_start */ + TODO_verify_rtl_sharing, /* todo_flags_finish */ }; + +class pass_jump : public rtl_opt_pass +{ +public: + pass_jump(gcc::context *ctxt) + : rtl_opt_pass(pass_data_jump, ctxt) + {} + + /* opt_pass methods: */ + unsigned int execute () { return execute_jump (); } + +}; // class pass_jump + +} // anon namespace + +rtl_opt_pass * +make_pass_jump (gcc::context *ctxt) +{ + return new pass_jump (ctxt); +} static unsigned int execute_jump2 (void) @@ -3067,22 +3095,39 @@ execute_jump2 (void) return 0; } -struct rtl_opt_pass pass_jump2 = +namespace { + +const pass_data pass_data_jump2 = { - { - RTL_PASS, - "jump2", /* name */ - OPTGROUP_NONE, /* optinfo_flags */ - NULL, /* gate */ - execute_jump2, /* execute */ - NULL, /* sub */ - NULL, /* next */ - 0, /* static_pass_number */ - TV_JUMP, /* tv_id */ - 0, /* properties_required */ - 0, /* properties_provided */ - 0, /* properties_destroyed */ - 0, /* todo_flags_start */ - TODO_verify_rtl_sharing, /* todo_flags_finish */ - } + RTL_PASS, /* type */ + "jump2", /* name */ + OPTGROUP_NONE, /* optinfo_flags */ + false, /* has_gate */ + true, /* has_execute */ + TV_JUMP, /* tv_id */ + 0, /* properties_required */ + 0, /* properties_provided */ + 0, /* properties_destroyed */ + 0, /* todo_flags_start */ + TODO_verify_rtl_sharing, /* todo_flags_finish */ }; + +class pass_jump2 : public rtl_opt_pass +{ +public: + pass_jump2(gcc::context *ctxt) + : rtl_opt_pass(pass_data_jump2, ctxt) + {} + + /* opt_pass methods: */ + unsigned int execute () { return execute_jump2 (); } + +}; // class pass_jump2 + +} // anon namespace + +rtl_opt_pass * +make_pass_jump2 (gcc::context *ctxt) +{ + return new pass_jump2 (ctxt); +} diff --git a/gcc/cfgexpand.c b/gcc/cfgexpand.c index c1872731240..4da5e7ea0cf 100644 --- a/gcc/cfgexpand.c +++ b/gcc/cfgexpand.c @@ -764,7 +764,7 @@ partition_stack_vars (void) sizes, as the shorter vars wouldn't be adequately protected. Don't do that for "large" (unsupported) alignment objects, those aren't protected anyway. */ - if (flag_asan && isize != jsize + if ((flag_sanitize & SANITIZE_ADDRESS) && isize != jsize && ialign * BITS_PER_UNIT <= MAX_SUPPORTED_STACK_ALIGNMENT) break; @@ -940,7 +940,7 @@ expand_stack_vars (bool (*pred) (size_t), struct stack_vars_data *data) alignb = stack_vars[i].alignb; if (alignb * BITS_PER_UNIT <= MAX_SUPPORTED_STACK_ALIGNMENT) { - if (flag_asan && pred) + if ((flag_sanitize & SANITIZE_ADDRESS) && pred) { HOST_WIDE_INT prev_offset = frame_offset; tree repr_decl = NULL_TREE; @@ -1110,7 +1110,7 @@ defer_stack_allocation (tree var, bool toplevel) /* If stack protection is enabled, *all* stack variables must be deferred, so that we can re-order the strings to the top of the frame. Similarly for Address Sanitizer. */ - if (flag_stack_protect || flag_asan) + if (flag_stack_protect || (flag_sanitize & SANITIZE_ADDRESS)) return true; /* We handle "large" alignment via dynamic allocation. We want to handle @@ -1753,7 +1753,7 @@ expand_used_vars (void) expand_stack_vars (stack_protect_decl_phase_2, &data); } - if (flag_asan) + if (flag_sanitize & SANITIZE_ADDRESS) /* Phase 3, any partitions that need asan protection in addition to phase 1 and 2. */ expand_stack_vars (asan_decl_phase_3, &data); @@ -4906,25 +4906,42 @@ gimple_expand_cfg (void) return 0; } -struct rtl_opt_pass pass_expand = +namespace { + +const pass_data pass_data_expand = { - { - RTL_PASS, - "expand", /* name */ - OPTGROUP_NONE, /* optinfo_flags */ - NULL, /* gate */ - gimple_expand_cfg, /* execute */ - NULL, /* sub */ - NULL, /* next */ - 0, /* static_pass_number */ - TV_EXPAND, /* tv_id */ - PROP_ssa | PROP_gimple_leh | PROP_cfg + RTL_PASS, /* type */ + "expand", /* name */ + OPTGROUP_NONE, /* optinfo_flags */ + false, /* has_gate */ + true, /* has_execute */ + TV_EXPAND, /* tv_id */ + ( PROP_ssa | PROP_gimple_leh | PROP_cfg | PROP_gimple_lcx - | PROP_gimple_lvec, /* properties_required */ - PROP_rtl, /* properties_provided */ - PROP_ssa | PROP_trees, /* properties_destroyed */ - TODO_verify_ssa | TODO_verify_flow - | TODO_verify_stmts, /* todo_flags_start */ - 0 /* todo_flags_finish */ - } + | PROP_gimple_lvec ), /* properties_required */ + PROP_rtl, /* properties_provided */ + ( PROP_ssa | PROP_trees ), /* properties_destroyed */ + ( TODO_verify_ssa | TODO_verify_flow + | TODO_verify_stmts ), /* todo_flags_start */ + 0, /* todo_flags_finish */ }; + +class pass_expand : public rtl_opt_pass +{ +public: + pass_expand(gcc::context *ctxt) + : rtl_opt_pass(pass_data_expand, ctxt) + {} + + /* opt_pass methods: */ + unsigned int execute () { return gimple_expand_cfg (); } + +}; // class pass_expand + +} // anon namespace + +rtl_opt_pass * +make_pass_expand (gcc::context *ctxt) +{ + return new pass_expand (ctxt); +} diff --git a/gcc/cfgloop.h b/gcc/cfgloop.h index 0f247996630..cd2f527bb47 100644 --- a/gcc/cfgloop.h +++ b/gcc/cfgloop.h @@ -168,6 +168,20 @@ struct GTY ((chain_next ("%h.next"))) loop { describes what is the state of the estimation. */ enum loop_estimation estimate_state; + /* If > 0, an integer, where the user asserted that for any + I in [ 0, nb_iterations ) and for any J in + [ I, min ( I + safelen, nb_iterations ) ), the Ith and Jth iterations + of the loop can be safely evaluated concurrently. */ + int safelen; + + /* True if we should try harder to vectorize this loop. */ + bool force_vect; + + /* For SIMD loops, this is a unique identifier of the loop, referenced + by IFN_GOMP_SIMD_VF, IFN_GOMP_SIMD_LANE and IFN_GOMP_SIMD_LAST_LANE + builtins. */ + tree simduid; + /* Upper bound on number of iterations of a loop. */ struct nb_iter_bound *bounds; diff --git a/gcc/cfgrtl.c b/gcc/cfgrtl.c index 36438b8409b..eb6b312d5c8 100644 --- a/gcc/cfgrtl.c +++ b/gcc/cfgrtl.c @@ -459,26 +459,43 @@ rest_of_pass_free_cfg (void) return 0; } -struct rtl_opt_pass pass_free_cfg = +namespace { + +const pass_data pass_data_free_cfg = { - { - RTL_PASS, - "*free_cfg", /* name */ - OPTGROUP_NONE, /* optinfo_flags */ - NULL, /* gate */ - rest_of_pass_free_cfg, /* execute */ - NULL, /* sub */ - NULL, /* next */ - 0, /* static_pass_number */ - TV_NONE, /* tv_id */ - 0, /* properties_required */ - 0, /* properties_provided */ - PROP_cfg, /* properties_destroyed */ - 0, /* todo_flags_start */ - 0, /* todo_flags_finish */ - } + RTL_PASS, /* type */ + "*free_cfg", /* name */ + OPTGROUP_NONE, /* optinfo_flags */ + false, /* has_gate */ + true, /* has_execute */ + TV_NONE, /* tv_id */ + 0, /* properties_required */ + 0, /* properties_provided */ + PROP_cfg, /* properties_destroyed */ + 0, /* todo_flags_start */ + 0, /* todo_flags_finish */ }; +class pass_free_cfg : public rtl_opt_pass +{ +public: + pass_free_cfg(gcc::context *ctxt) + : rtl_opt_pass(pass_data_free_cfg, ctxt) + {} + + /* opt_pass methods: */ + unsigned int execute () { return rest_of_pass_free_cfg (); } + +}; // class pass_free_cfg + +} // anon namespace + +rtl_opt_pass * +make_pass_free_cfg (gcc::context *ctxt) +{ + return new pass_free_cfg (ctxt); +} + /* Return RTX to emit after when we want to emit code on the entry of function. */ rtx entry_of_function (void) @@ -1341,6 +1358,43 @@ fixup_partition_crossing (edge e) } } +/* Called when block BB has been reassigned to the cold partition, + because it is now dominated by another cold block, + to ensure that the region crossing attributes are updated. */ + +static void +fixup_new_cold_bb (basic_block bb) +{ + edge e; + edge_iterator ei; + + /* This is called when a hot bb is found to now be dominated + by a cold bb and therefore needs to become cold. Therefore, + its preds will no longer be region crossing. Any non-dominating + preds that were previously hot would also have become cold + in the caller for the same region. Any preds that were previously + region-crossing will be adjusted in fixup_partition_crossing. */ + FOR_EACH_EDGE (e, ei, bb->preds) + { + fixup_partition_crossing (e); + } + + /* Possibly need to make bb's successor edges region crossing, + or remove stale region crossing. */ + FOR_EACH_EDGE (e, ei, bb->succs) + { + /* We can't have fall-through edges across partition boundaries. + Note that force_nonfallthru will do any necessary partition + boundary fixup by calling fixup_partition_crossing itself. */ + if ((e->flags & EDGE_FALLTHRU) + && BB_PARTITION (bb) != BB_PARTITION (e->dest) + && e->dest != EXIT_BLOCK_PTR) + force_nonfallthru (e); + else + fixup_partition_crossing (e); + } +} + /* Attempt to change code to redirect edge E to TARGET. Don't do that on expense of adding new instructions or reordering basic blocks. @@ -1979,6 +2033,14 @@ commit_edge_insertions (void) { basic_block bb; + /* Optimization passes that invoke this routine can cause hot blocks + previously reached by both hot and cold blocks to become dominated only + by cold blocks. This will cause the verification below to fail, + and lead to now cold code in the hot section. In some cases this + may only be visible after newly unreachable blocks are deleted, + which will be done by fixup_partitions. */ + fixup_partitions (); + #ifdef ENABLE_CHECKING verify_flow_info (); #endif @@ -2173,6 +2235,101 @@ get_last_bb_insn (basic_block bb) return end; } +/* Sanity check partition hotness to ensure that basic blocks in +   the cold partition don't dominate basic blocks in the hot partition. + If FLAG_ONLY is true, report violations as errors. Otherwise + re-mark the dominated blocks as cold, since this is run after + cfg optimizations that may make hot blocks previously reached + by both hot and cold blocks now only reachable along cold paths. */ + +static vec +find_partition_fixes (bool flag_only) +{ + basic_block bb; + vec bbs_in_cold_partition = vNULL; + vec bbs_to_fix = vNULL; + + /* Callers check this. */ + gcc_checking_assert (crtl->has_bb_partition); + + FOR_EACH_BB (bb) + if ((BB_PARTITION (bb) == BB_COLD_PARTITION)) + bbs_in_cold_partition.safe_push (bb); + + if (bbs_in_cold_partition.is_empty ()) + return vNULL; + + bool dom_calculated_here = !dom_info_available_p (CDI_DOMINATORS); + + if (dom_calculated_here) + calculate_dominance_info (CDI_DOMINATORS); + + while (! bbs_in_cold_partition.is_empty ()) + { + bb = bbs_in_cold_partition.pop (); + /* Any blocks dominated by a block in the cold section + must also be cold. */ + basic_block son; + for (son = first_dom_son (CDI_DOMINATORS, bb); + son; + son = next_dom_son (CDI_DOMINATORS, son)) + { + /* If son is not yet cold, then mark it cold here and + enqueue it for further processing. */ + if ((BB_PARTITION (son) != BB_COLD_PARTITION)) + { + if (flag_only) + error ("non-cold basic block %d dominated " + "by a block in the cold partition (%d)", son->index, bb->index); + else + BB_SET_PARTITION (son, BB_COLD_PARTITION); + bbs_to_fix.safe_push (son); + bbs_in_cold_partition.safe_push (son); + } + } + } + + if (dom_calculated_here) + free_dominance_info (CDI_DOMINATORS); + + return bbs_to_fix; +} + +/* Perform cleanup on the hot/cold bb partitioning after optimization + passes that modify the cfg. */ + +void +fixup_partitions (void) +{ + basic_block bb; + + if (!crtl->has_bb_partition) + return; + + /* Delete any blocks that became unreachable and weren't + already cleaned up, for example during edge forwarding + and convert_jumps_to_returns. This will expose more + opportunities for fixing the partition boundaries here. + Also, the calculation of the dominance graph during verification + will assert if there are unreachable nodes. */ + delete_unreachable_blocks (); + + /* If there are partitions, do a sanity check on them: A basic block in +   a cold partition cannot dominate a basic block in a hot partition. + Fixup any that now violate this requirement, as a result of edge + forwarding and unreachable block deletion.  */ + vec bbs_to_fix = find_partition_fixes (false); + + /* Do the partition fixup after all necessary blocks have been converted to + cold, so that we only update the region crossings the minimum number of + places, which can require forcing edges to be non fallthru. */ + while (! bbs_to_fix.is_empty ()) + { + bb = bbs_to_fix.pop (); + fixup_new_cold_bb (bb); + } +} + /* Verify, in the basic block chain, that there is at most one switch between hot/cold partitions. This condition will not be true until after reorder_basic_blocks is called. */ @@ -2219,7 +2376,8 @@ verify_hot_cold_block_grouping (void) /* Perform several checks on the edges out of each block, such as the consistency of the branch probabilities, the correctness of hot/cold partition crossing edges, and the number of expected - successor edges. */ + successor edges. Also verify that the dominance relationship + between hot/cold blocks is sane. */ static int rtl_verify_edges (void) @@ -2382,6 +2540,14 @@ rtl_verify_edges (void) } } + /* If there are partitions, do a sanity check on them: A basic block in +   a cold partition cannot dominate a basic block in a hot partition.  */ + if (crtl->has_bb_partition && !err) + { + vec bbs_to_fix = find_partition_fixes (true); + err = !bbs_to_fix.is_empty (); + } + /* Clean up. */ return err; } @@ -2515,7 +2681,7 @@ rtl_verify_bb_pointers (void) and NOTE_INSN_BASIC_BLOCK - verify that no fall_thru edge crosses hot/cold partition boundaries - verify that there are no pending RTL branch predictions - - verify that there is a single hot/cold partition boundary after bbro + - verify that hot blocks are not dominated by cold blocks In future it can be extended check a lot of other stuff as well (reachability of basic blocks, life information, etc. etc.). */ @@ -2761,7 +2927,8 @@ rtl_verify_bb_layout (void) - check that all insns are in the basic blocks (except the switch handling code, barriers and notes) - check that all returns are followed by barriers - - check that all fallthru edge points to the adjacent blocks. */ + - check that all fallthru edge points to the adjacent blocks + - verify that there is a single hot/cold partition boundary after bbro */ static int rtl_verify_flow_info (void) @@ -3297,45 +3464,79 @@ outof_cfg_layout_mode (void) return 0; } -struct rtl_opt_pass pass_into_cfg_layout_mode = +namespace { + +const pass_data pass_data_into_cfg_layout_mode = { - { - RTL_PASS, - "into_cfglayout", /* name */ - OPTGROUP_NONE, /* optinfo_flags */ - NULL, /* gate */ - into_cfg_layout_mode, /* execute */ - NULL, /* sub */ - NULL, /* next */ - 0, /* static_pass_number */ - TV_CFG, /* tv_id */ - 0, /* properties_required */ - PROP_cfglayout, /* properties_provided */ - 0, /* properties_destroyed */ - 0, /* todo_flags_start */ - 0 /* todo_flags_finish */ - } + RTL_PASS, /* type */ + "into_cfglayout", /* name */ + OPTGROUP_NONE, /* optinfo_flags */ + false, /* has_gate */ + true, /* has_execute */ + TV_CFG, /* tv_id */ + 0, /* properties_required */ + PROP_cfglayout, /* properties_provided */ + 0, /* properties_destroyed */ + 0, /* todo_flags_start */ + 0, /* todo_flags_finish */ }; -struct rtl_opt_pass pass_outof_cfg_layout_mode = +class pass_into_cfg_layout_mode : public rtl_opt_pass +{ +public: + pass_into_cfg_layout_mode(gcc::context *ctxt) + : rtl_opt_pass(pass_data_into_cfg_layout_mode, ctxt) + {} + + /* opt_pass methods: */ + unsigned int execute () { return into_cfg_layout_mode (); } + +}; // class pass_into_cfg_layout_mode + +} // anon namespace + +rtl_opt_pass * +make_pass_into_cfg_layout_mode (gcc::context *ctxt) { - { - RTL_PASS, - "outof_cfglayout", /* name */ - OPTGROUP_NONE, /* optinfo_flags */ - NULL, /* gate */ - outof_cfg_layout_mode, /* execute */ - NULL, /* sub */ - NULL, /* next */ - 0, /* static_pass_number */ - TV_CFG, /* tv_id */ - 0, /* properties_required */ - 0, /* properties_provided */ - PROP_cfglayout, /* properties_destroyed */ - 0, /* todo_flags_start */ - 0 /* todo_flags_finish */ - } + return new pass_into_cfg_layout_mode (ctxt); +} + +namespace { + +const pass_data pass_data_outof_cfg_layout_mode = +{ + RTL_PASS, /* type */ + "outof_cfglayout", /* name */ + OPTGROUP_NONE, /* optinfo_flags */ + false, /* has_gate */ + true, /* has_execute */ + TV_CFG, /* tv_id */ + 0, /* properties_required */ + 0, /* properties_provided */ + PROP_cfglayout, /* properties_destroyed */ + 0, /* todo_flags_start */ + 0, /* todo_flags_finish */ }; + +class pass_outof_cfg_layout_mode : public rtl_opt_pass +{ +public: + pass_outof_cfg_layout_mode(gcc::context *ctxt) + : rtl_opt_pass(pass_data_outof_cfg_layout_mode, ctxt) + {} + + /* opt_pass methods: */ + unsigned int execute () { return outof_cfg_layout_mode (); } + +}; // class pass_outof_cfg_layout_mode + +} // anon namespace + +rtl_opt_pass * +make_pass_outof_cfg_layout_mode (gcc::context *ctxt) +{ + return new pass_outof_cfg_layout_mode (ctxt); +} /* Link the basic blocks in the correct order, compacting the basic diff --git a/gcc/cgraph.c b/gcc/cgraph.c index be3411d6a07..f12bf1ba4be 100644 --- a/gcc/cgraph.c +++ b/gcc/cgraph.c @@ -673,15 +673,39 @@ edge_eq (const void *x, const void *y) /* Add call graph edge E to call site hash of its caller. */ +static inline void +cgraph_update_edge_in_call_site_hash (struct cgraph_edge *e) +{ + void **slot; + slot = htab_find_slot_with_hash (e->caller->call_site_hash, + e->call_stmt, + htab_hash_pointer (e->call_stmt), + INSERT); + *slot = e; +} + +/* Add call graph edge E to call site hash of its caller. */ + static inline void cgraph_add_edge_to_call_site_hash (struct cgraph_edge *e) { void **slot; + /* There are two speculative edges for every statement (one direct, + one indirect); always hash the direct one. */ + if (e->speculative && e->indirect_unknown_callee) + return; slot = htab_find_slot_with_hash (e->caller->call_site_hash, e->call_stmt, htab_hash_pointer (e->call_stmt), INSERT); - gcc_assert (!*slot); + if (*slot) + { + gcc_assert (((struct cgraph_edge *)*slot)->speculative); + if (e->callee) + *slot = e; + return; + } + gcc_assert (!*slot || e->speculative); *slot = e; } @@ -732,14 +756,33 @@ cgraph_edge (struct cgraph_node *node, gimple call_stmt) } -/* Change field call_stmt of edge E to NEW_STMT. */ +/* Change field call_stmt of edge E to NEW_STMT. + If UPDATE_SPECULATIVE and E is any component of speculative + edge, then update all components. */ void -cgraph_set_call_stmt (struct cgraph_edge *e, gimple new_stmt) +cgraph_set_call_stmt (struct cgraph_edge *e, gimple new_stmt, + bool update_speculative) { tree decl; - if (e->caller->call_site_hash) + /* Speculative edges has three component, update all of them + when asked to. */ + if (update_speculative && e->speculative) + { + struct cgraph_edge *direct, *indirect; + struct ipa_ref *ref; + + cgraph_speculative_call_info (e, direct, indirect, ref); + cgraph_set_call_stmt (direct, new_stmt, false); + cgraph_set_call_stmt (indirect, new_stmt, false); + ref->stmt = new_stmt; + return; + } + + /* Only direct speculative edges go to call_site_hash. */ + if (e->caller->call_site_hash + && (!e->speculative || !e->indirect_unknown_callee)) { htab_remove_elt_with_hash (e->caller->call_site_hash, e->call_stmt, @@ -755,7 +798,7 @@ cgraph_set_call_stmt (struct cgraph_edge *e, gimple new_stmt) struct cgraph_node *new_callee = cgraph_get_node (decl); gcc_checking_assert (new_callee); - cgraph_make_edge_direct (e, new_callee); + e = cgraph_make_edge_direct (e, new_callee); } push_cfun (DECL_STRUCT_FUNCTION (e->caller->symbol.decl)); @@ -781,7 +824,10 @@ cgraph_create_edge_1 (struct cgraph_node *caller, struct cgraph_node *callee, { /* This is a rather expensive check possibly triggering construction of call stmt hashtable. */ - gcc_checking_assert (!cgraph_edge (caller, call_stmt)); +#ifdef ENABLE_CHECKING + struct cgraph_edge *e; + gcc_checking_assert (!(e=cgraph_edge (caller, call_stmt)) || e->speculative); +#endif gcc_assert (is_gimple_call (call_stmt)); } @@ -804,6 +850,7 @@ cgraph_create_edge_1 (struct cgraph_node *caller, struct cgraph_node *callee, edge->next_caller = NULL; edge->prev_callee = NULL; edge->next_callee = NULL; + edge->lto_stmt_uid = 0; edge->count = count; gcc_assert (count >= 0); @@ -828,6 +875,7 @@ cgraph_create_edge_1 (struct cgraph_node *caller, struct cgraph_node *callee, edge->indirect_info = NULL; edge->indirect_inlining_edge = 0; + edge->speculative = false; return edge; } @@ -879,6 +927,7 @@ cgraph_create_indirect_edge (struct cgraph_node *caller, gimple call_stmt, { struct cgraph_edge *edge = cgraph_create_edge_1 (caller, NULL, call_stmt, count, freq); + tree target; edge->indirect_unknown_callee = 1; initialize_inline_failed (edge); @@ -886,6 +935,23 @@ cgraph_create_indirect_edge (struct cgraph_node *caller, gimple call_stmt, edge->indirect_info = cgraph_allocate_init_indirect_info (); edge->indirect_info->ecf_flags = ecf_flags; + /* Record polymorphic call info. */ + if (call_stmt + && (target = gimple_call_fn (call_stmt)) + && virtual_method_call_p (target)) + { + tree type = obj_type_ref_class (target); + + + /* Only record types can have virtual calls. */ + gcc_assert (TREE_CODE (type) == RECORD_TYPE); + edge->indirect_info->param_index = -1; + edge->indirect_info->otr_token + = tree_low_cst (OBJ_TYPE_REF_TOKEN (target), 1); + edge->indirect_info->otr_type = type; + edge->indirect_info->polymorphic = 1; + } + edge->next_callee = caller->indirect_calls; if (caller->indirect_calls) caller->indirect_calls->prev_callee = edge; @@ -937,6 +1003,9 @@ cgraph_free_edge (struct cgraph_edge *e) { int uid = e->uid; + if (e->indirect_info) + ggc_free (e->indirect_info); + /* Clear out the edge so we do not dangle pointers. */ memset (e, 0, sizeof (*e)); e->uid = uid; @@ -977,6 +1046,123 @@ cgraph_set_edge_callee (struct cgraph_edge *e, struct cgraph_node *n) e->callee = n; } +/* Turn edge E into speculative call calling N2. Update + the profile so the direct call is taken COUNT times + with FREQUENCY. + + At clone materialization time, the indirect call E will + be expanded as: + + if (call_dest == N2) + n2 (); + else + call call_dest + + At this time the function just creates the direct call, + the referencd representing the if conditional and attaches + them all to the orginal indirect call statement. + + Return direct edge created. */ + +struct cgraph_edge * +cgraph_turn_edge_to_speculative (struct cgraph_edge *e, + struct cgraph_node *n2, + gcov_type direct_count, + int direct_frequency) +{ + struct cgraph_node *n = e->caller; + struct ipa_ref *ref; + struct cgraph_edge *e2; + + if (dump_file) + { + fprintf (dump_file, "Indirect call -> speculative call" + " %s/%i => %s/%i\n", + xstrdup (cgraph_node_name (n)), n->symbol.order, + xstrdup (cgraph_node_name (n2)), n2->symbol.order); + } + e->speculative = true; + e2 = cgraph_create_edge (n, n2, e->call_stmt, direct_count, direct_frequency); + initialize_inline_failed (e2); + e2->speculative = true; + if (TREE_NOTHROW (n2->symbol.decl)) + e2->can_throw_external = false; + else + e2->can_throw_external = e->can_throw_external; + e2->lto_stmt_uid = e->lto_stmt_uid; + e->count -= e2->count; + e->frequency -= e2->frequency; + cgraph_call_edge_duplication_hooks (e, e2); + ref = ipa_record_reference ((symtab_node)n, (symtab_node)n2, + IPA_REF_ADDR, e->call_stmt); + ref->lto_stmt_uid = e->lto_stmt_uid; + ref->speculative = e->speculative; + cgraph_mark_address_taken_node (n2); + return e2; +} + +/* Speculative call consist of three components: + 1) an indirect edge representing the original call + 2) an direct edge representing the new call + 3) ADDR_EXPR reference representing the speculative check. + All three components are attached to single statement (the indirect + call) and if one of them exists, all of them must exist. + + Given speculative call edge E, return all three components. + */ + +void +cgraph_speculative_call_info (struct cgraph_edge *e, + struct cgraph_edge *&direct, + struct cgraph_edge *&indirect, + struct ipa_ref *&reference) +{ + struct ipa_ref *ref; + int i; + struct cgraph_edge *e2; + + if (!e->indirect_unknown_callee) + for (e2 = e->caller->indirect_calls; + e2->call_stmt != e->call_stmt || e2->lto_stmt_uid != e->lto_stmt_uid; + e2 = e2->next_callee) + ; + else + { + e2 = e; + /* We can take advantage of the call stmt hash. */ + if (e2->call_stmt) + { + e = cgraph_edge (e->caller, e2->call_stmt); + gcc_assert (e->speculative && !e->indirect_unknown_callee); + } + else + for (e = e->caller->callees; + e2->call_stmt != e->call_stmt + || e2->lto_stmt_uid != e->lto_stmt_uid; + e = e->next_callee) + ; + } + gcc_assert (e->speculative && e2->speculative); + direct = e; + indirect = e2; + + reference = NULL; + for (i = 0; ipa_ref_list_reference_iterate (&e->caller->symbol.ref_list, + i, ref); i++) + if (ref->speculative + && ((ref->stmt && ref->stmt == e->call_stmt) + || (!ref->stmt && ref->lto_stmt_uid == e->lto_stmt_uid))) + { + reference = ref; + break; + } + + /* Speculative edge always consist of all three components - direct edge, + indirect and reference. */ + + gcc_assert (e && e2 && ref); +} + /* Redirect callee of E to N. The function does not update underlying call expression. */ @@ -990,14 +1176,87 @@ cgraph_redirect_edge_callee (struct cgraph_edge *e, struct cgraph_node *n) cgraph_set_edge_callee (e, n); } +/* Speculative call EDGE turned out to be direct call to CALLE_DECL. + Remove the speculative call sequence and return edge representing the call. + It is up to caller to redirect the call as appropriate. */ + +struct cgraph_edge * +cgraph_resolve_speculation (struct cgraph_edge *edge, tree callee_decl) +{ + struct cgraph_edge *e2; + struct ipa_ref *ref; + + gcc_assert (edge->speculative); + cgraph_speculative_call_info (edge, e2, edge, ref); + if (ref->referred->symbol.decl != callee_decl) + { + if (dump_file) + { + if (callee_decl) + { + fprintf (dump_file, "Speculative indirect call %s/%i => %s/%i has " + "turned out to have contradicting known target ", + xstrdup (cgraph_node_name (edge->caller)), edge->caller->symbol.order, + xstrdup (cgraph_node_name (e2->callee)), e2->callee->symbol.order); + print_generic_expr (dump_file, callee_decl, 0); + fprintf (dump_file, "\n"); + } + else + { + fprintf (dump_file, "Removing speculative call %s/%i => %s/%i\n", + xstrdup (cgraph_node_name (edge->caller)), edge->caller->symbol.order, + xstrdup (cgraph_node_name (e2->callee)), e2->callee->symbol.order); + } + } + } + else + { + struct cgraph_edge *tmp = edge; + if (dump_file) + fprintf (dump_file, "Speculative call turned into direct call.\n"); + edge = e2; + e2 = tmp; + /* FIXME: If EDGE is inlined, we should scale up the frequencies and counts + in the functions inlined through it. */ + } + edge->count += e2->count; + edge->frequency += e2->frequency; + if (edge->frequency > CGRAPH_FREQ_MAX) + edge->frequency = CGRAPH_FREQ_MAX; + edge->speculative = false; + e2->speculative = false; + ipa_remove_reference (ref); + if (e2->indirect_unknown_callee || e2->inline_failed) + cgraph_remove_edge (e2); + else + cgraph_remove_node_and_inline_clones (e2->callee, NULL); + if (edge->caller->call_site_hash) + cgraph_update_edge_in_call_site_hash (edge); + return edge; +} + /* Make an indirect EDGE with an unknown callee an ordinary edge leading to CALLEE. DELTA is an integer constant that is to be added to the this pointer (first parameter) to compensate for skipping a thunk adjustment. */ -void +struct cgraph_edge * cgraph_make_edge_direct (struct cgraph_edge *edge, struct cgraph_node *callee) { + gcc_assert (edge->indirect_unknown_callee); + + /* If we are redirecting speculative call, make it non-speculative. */ + if (edge->indirect_unknown_callee && edge->speculative) + { + edge = cgraph_resolve_speculation (edge, callee->symbol.decl); + + /* On successful speculation just return the pre existing direct edge. */ + if (!edge->indirect_unknown_callee) + return edge; + } + edge->indirect_unknown_callee = 0; + ggc_free (edge->indirect_info); + edge->indirect_info = NULL; /* Get the edge out of the indirect edge list. */ if (edge->prev_callee) @@ -1024,6 +1283,7 @@ cgraph_make_edge_direct (struct cgraph_edge *edge, struct cgraph_node *callee) /* We need to re-determine the inlining status of the edge. */ initialize_inline_failed (edge); + return edge; } /* If necessary, change the function declaration in the call statement @@ -1039,6 +1299,82 @@ cgraph_redirect_edge_call_stmt_to_callee (struct cgraph_edge *e) struct cgraph_node *node; #endif + if (e->speculative) + { + struct cgraph_edge *e2; + gimple new_stmt; + struct ipa_ref *ref; + + cgraph_speculative_call_info (e, e, e2, ref); + /* If there already is an direct call (i.e. as a result of inliner's + substitution), forget about speculating. */ + if (decl) + e = cgraph_resolve_speculation (e, decl); + /* If types do not match, speculation was likely wrong. + The direct edge was posisbly redirected to the clone with a different + signature. We did not update the call statement yet, so compare it + with the reference that still points to the proper type. */ + else if (!gimple_check_call_matching_types (e->call_stmt, + ref->referred->symbol.decl, + true)) + { + if (dump_file) + fprintf (dump_file, "Not expanding speculative call of %s/%i -> %s/%i\n" + "Type mismatch.\n", + xstrdup (cgraph_node_name (e->caller)), + e->caller->symbol.order, + xstrdup (cgraph_node_name (e->callee)), + e->callee->symbol.order); + e = cgraph_resolve_speculation (e, NULL); + /* We are producing the final function body and will throw away the + callgraph edges really soon. Reset the counts/frequencies to + keep verifier happy in the case of roundoff errors. */ + e->count = gimple_bb (e->call_stmt)->count; + e->frequency = compute_call_stmt_bb_frequency + (e->caller->symbol.decl, gimple_bb (e->call_stmt)); + } + /* Expand speculation into GIMPLE code. */ + else + { + if (dump_file) + fprintf (dump_file, + "Expanding speculative call of %s/%i -> %s/%i count:" + HOST_WIDEST_INT_PRINT_DEC"\n", + xstrdup (cgraph_node_name (e->caller)), + e->caller->symbol.order, + xstrdup (cgraph_node_name (e->callee)), + e->callee->symbol.order, + (HOST_WIDEST_INT)e->count); + gcc_assert (e2->speculative); + push_cfun (DECL_STRUCT_FUNCTION (e->caller->symbol.decl)); + new_stmt = gimple_ic (e->call_stmt, cgraph (ref->referred), + e->count || e2->count + ? RDIV (e->count * REG_BR_PROB_BASE, + e->count + e2->count) + : e->frequency || e2->frequency + ? RDIV (e->frequency * REG_BR_PROB_BASE, + e->frequency + e2->frequency) + : REG_BR_PROB_BASE / 2, + e->count, e->count + e2->count); + e->speculative = false; + cgraph_set_call_stmt_including_clones (e->caller, e->call_stmt, + new_stmt, false); + e->frequency = compute_call_stmt_bb_frequency + (e->caller->symbol.decl, gimple_bb (e->call_stmt)); + e2->frequency = compute_call_stmt_bb_frequency + (e2->caller->symbol.decl, gimple_bb (e2->call_stmt)); + e2->speculative = false; + ref->speculative = false; + ref->stmt = NULL; + /* Indirect edges are not both in the call site hash. + get it updated. */ + if (e->caller->call_site_hash) + cgraph_update_edge_in_call_site_hash (e2); + pop_cfun (); + /* Continue redirecting E to proper target. */ + } + } + if (e->indirect_unknown_callee || decl == e->callee->symbol.decl) return e->call_stmt; @@ -1099,7 +1435,7 @@ cgraph_redirect_edge_call_stmt_to_callee (struct cgraph_edge *e) update_stmt (new_stmt); } - cgraph_set_call_stmt_including_clones (e->caller, e->call_stmt, new_stmt); + cgraph_set_call_stmt_including_clones (e->caller, e->call_stmt, new_stmt, false); if (cgraph_dump_file) { @@ -1316,7 +1652,7 @@ void cgraph_release_function_body (struct cgraph_node *node) { node->ipa_transforms_to_apply.release (); - if (!node->abstract_and_needed && cgraph_state != CGRAPH_STATE_PARSING) + if (!node->used_as_abstract_origin && cgraph_state != CGRAPH_STATE_PARSING) { DECL_RESULT (node->symbol.decl) = NULL; DECL_ARGUMENTS (node->symbol.decl) = NULL; @@ -1324,9 +1660,11 @@ cgraph_release_function_body (struct cgraph_node *node) /* If the node is abstract and needed, then do not clear DECL_INITIAL of its associated function function declaration because it's needed to emit debug info later. */ - if (!node->abstract_and_needed && DECL_INITIAL (node->symbol.decl)) + if (!node->used_as_abstract_origin && DECL_INITIAL (node->symbol.decl)) DECL_INITIAL (node->symbol.decl) = error_mark_node; release_function_body (node->symbol.decl); + if (node->symbol.lto_file_data) + lto_free_function_in_decl_state_for_node ((symtab_node) node); } /* Remove the node from cgraph. */ @@ -1544,6 +1882,9 @@ dump_cgraph_node (FILE *f, struct cgraph_node *node) fprintf (f, " Availability: %s\n", cgraph_availability_names [cgraph_function_body_availability (node)]); + if (node->profile_id) + fprintf (f, " Profile id: %i\n", + node->profile_id); fprintf (f, " Function flags:"); if (node->count) fprintf (f, " executed "HOST_WIDEST_INT_PRINT_DEC"x", @@ -1603,6 +1944,8 @@ dump_cgraph_node (FILE *f, struct cgraph_node *node) if (edge->frequency) fprintf (f, "(%.2f per call) ", edge->frequency / (double)CGRAPH_FREQ_BASE); + if (edge->speculative) + fprintf(f, "(speculative) "); if (!edge->inline_failed) fprintf(f, "(inlined) "); if (edge->indirect_inlining_edge) @@ -1616,6 +1959,8 @@ dump_cgraph_node (FILE *f, struct cgraph_node *node) { fprintf (f, "%s/%i ", cgraph_node_asm_name (edge->callee), edge->callee->symbol.order); + if (edge->speculative) + fprintf(f, "(speculative) "); if (!edge->inline_failed) fprintf(f, "(inlined) "); if (edge->indirect_inlining_edge) @@ -1697,11 +2042,12 @@ enum availability cgraph_function_body_availability (struct cgraph_node *node) { enum availability avail; - gcc_assert (cgraph_function_flags_ready); if (!node->symbol.analyzed) avail = AVAIL_NOT_AVAILABLE; else if (node->local.local) avail = AVAIL_LOCAL; + else if (node->symbol.alias && node->symbol.weakref) + cgraph_function_or_thunk_node (node, &avail); else if (!node->symbol.externally_visible) avail = AVAIL_AVAILABLE; /* Inline functions are safe to be analyzed even if their symbol can @@ -1933,128 +2279,6 @@ cgraph_set_pure_flag (struct cgraph_node *node, bool pure, bool looping) false); } -/* Data used by cgraph_propagate_frequency. */ - -struct cgraph_propagate_frequency_data -{ - bool maybe_unlikely_executed; - bool maybe_executed_once; - bool only_called_at_startup; - bool only_called_at_exit; -}; - -/* Worker for cgraph_propagate_frequency_1. */ - -static bool -cgraph_propagate_frequency_1 (struct cgraph_node *node, void *data) -{ - struct cgraph_propagate_frequency_data *d; - struct cgraph_edge *edge; - - d = (struct cgraph_propagate_frequency_data *)data; - for (edge = node->callers; - edge && (d->maybe_unlikely_executed || d->maybe_executed_once - || d->only_called_at_startup || d->only_called_at_exit); - edge = edge->next_caller) - { - if (edge->caller != node) - { - d->only_called_at_startup &= edge->caller->only_called_at_startup; - /* It makes sense to put main() together with the static constructors. - It will be executed for sure, but rest of functions called from - main are definitely not at startup only. */ - if (MAIN_NAME_P (DECL_NAME (edge->caller->symbol.decl))) - d->only_called_at_startup = 0; - d->only_called_at_exit &= edge->caller->only_called_at_exit; - } - if (!edge->frequency) - continue; - switch (edge->caller->frequency) - { - case NODE_FREQUENCY_UNLIKELY_EXECUTED: - break; - case NODE_FREQUENCY_EXECUTED_ONCE: - if (dump_file && (dump_flags & TDF_DETAILS)) - fprintf (dump_file, " Called by %s that is executed once\n", - cgraph_node_name (edge->caller)); - d->maybe_unlikely_executed = false; - if (inline_edge_summary (edge)->loop_depth) - { - d->maybe_executed_once = false; - if (dump_file && (dump_flags & TDF_DETAILS)) - fprintf (dump_file, " Called in loop\n"); - } - break; - case NODE_FREQUENCY_HOT: - case NODE_FREQUENCY_NORMAL: - if (dump_file && (dump_flags & TDF_DETAILS)) - fprintf (dump_file, " Called by %s that is normal or hot\n", - cgraph_node_name (edge->caller)); - d->maybe_unlikely_executed = false; - d->maybe_executed_once = false; - break; - } - } - return edge != NULL; -} - -/* See if the frequency of NODE can be updated based on frequencies of its - callers. */ -bool -cgraph_propagate_frequency (struct cgraph_node *node) -{ - struct cgraph_propagate_frequency_data d = {true, true, true, true}; - bool changed = false; - - if (!node->local.local) - return false; - gcc_assert (node->symbol.analyzed); - if (dump_file && (dump_flags & TDF_DETAILS)) - fprintf (dump_file, "Processing frequency %s\n", cgraph_node_name (node)); - - cgraph_for_node_and_aliases (node, cgraph_propagate_frequency_1, &d, true); - - if ((d.only_called_at_startup && !d.only_called_at_exit) - && !node->only_called_at_startup) - { - node->only_called_at_startup = true; - if (dump_file) - fprintf (dump_file, "Node %s promoted to only called at startup.\n", - cgraph_node_name (node)); - changed = true; - } - if ((d.only_called_at_exit && !d.only_called_at_startup) - && !node->only_called_at_exit) - { - node->only_called_at_exit = true; - if (dump_file) - fprintf (dump_file, "Node %s promoted to only called at exit.\n", - cgraph_node_name (node)); - changed = true; - } - /* These come either from profile or user hints; never update them. */ - if (node->frequency == NODE_FREQUENCY_HOT - || node->frequency == NODE_FREQUENCY_UNLIKELY_EXECUTED) - return changed; - if (d.maybe_unlikely_executed) - { - node->frequency = NODE_FREQUENCY_UNLIKELY_EXECUTED; - if (dump_file) - fprintf (dump_file, "Node %s promoted to unlikely executed.\n", - cgraph_node_name (node)); - changed = true; - } - else if (d.maybe_executed_once && node->frequency != NODE_FREQUENCY_EXECUTED_ONCE) - { - node->frequency = NODE_FREQUENCY_EXECUTED_ONCE; - if (dump_file) - fprintf (dump_file, "Node %s promoted to executed once.\n", - cgraph_node_name (node)); - changed = true; - } - return changed; -} - /* Return true when NODE can not return or throw and thus it is safe to ignore its side effects for IPA analysis. */ @@ -2263,6 +2487,7 @@ verify_edge_count_and_frequency (struct cgraph_edge *e) } if (gimple_has_body_p (e->caller->symbol.decl) && !e->caller->global.inlined_to + && !e->speculative /* FIXME: Inline-analysis sets frequency to 0 when edge is optimized out. Remove this once edges are actually removed from the function at that time. */ && (e->frequency @@ -2317,7 +2542,7 @@ verify_edge_corresponds_to_fndecl (struct cgraph_edge *e, tree decl) /* We do not know if a node from a different partition is an alias or what it aliases and therefore cannot do the former_clone_of check reliably. */ - if (!node || node->symbol.in_other_partition) + if (!node || node->symbol.in_other_partition || e->callee->symbol.in_other_partition) return false; node = cgraph_function_or_thunk_node (node, NULL); @@ -2364,7 +2589,7 @@ verify_cgraph_node (struct cgraph_node *node) error ("inline clone in same comdat group list"); error_found = true; } - if (!node->symbol.definition && node->local.local) + if (!node->symbol.definition && !node->symbol.in_other_partition && node->local.local) { error ("local symbols must be defined"); error_found = true; @@ -2538,55 +2763,75 @@ verify_cgraph_node (struct cgraph_node *node) { if (this_cfun->cfg) { + pointer_set_t *stmts = pointer_set_create (); + int i; + struct ipa_ref *ref; + /* Reach the trees by walking over the CFG, and note the enclosing basic-blocks in the call edges. */ FOR_EACH_BB_FN (this_block, this_cfun) - for (gsi = gsi_start_bb (this_block); - !gsi_end_p (gsi); - gsi_next (&gsi)) - { - gimple stmt = gsi_stmt (gsi); - if (is_gimple_call (stmt)) - { - struct cgraph_edge *e = cgraph_edge (node, stmt); - tree decl = gimple_call_fndecl (stmt); - if (e) - { - if (e->aux) - { - error ("shared call_stmt:"); - cgraph_debug_gimple_stmt (this_cfun, stmt); - error_found = true; - } - if (!e->indirect_unknown_callee) - { - if (verify_edge_corresponds_to_fndecl (e, decl)) - { - error ("edge points to wrong declaration:"); - debug_tree (e->callee->symbol.decl); - fprintf (stderr," Instead of:"); - debug_tree (decl); - error_found = true; - } - } - else if (decl) - { - error ("an indirect edge with unknown callee " - "corresponding to a call_stmt with " - "a known declaration:"); - error_found = true; - cgraph_debug_gimple_stmt (this_cfun, e->call_stmt); - } - e->aux = (void *)1; - } - else if (decl) - { - error ("missing callgraph edge for call stmt:"); - cgraph_debug_gimple_stmt (this_cfun, stmt); - error_found = true; - } - } + { + for (gsi = gsi_start_phis (this_block); + !gsi_end_p (gsi); gsi_next (&gsi)) + pointer_set_insert (stmts, gsi_stmt (gsi)); + for (gsi = gsi_start_bb (this_block); + !gsi_end_p (gsi); + gsi_next (&gsi)) + { + gimple stmt = gsi_stmt (gsi); + pointer_set_insert (stmts, stmt); + if (is_gimple_call (stmt)) + { + struct cgraph_edge *e = cgraph_edge (node, stmt); + tree decl = gimple_call_fndecl (stmt); + if (e) + { + if (e->aux) + { + error ("shared call_stmt:"); + cgraph_debug_gimple_stmt (this_cfun, stmt); + error_found = true; + } + if (!e->indirect_unknown_callee) + { + if (verify_edge_corresponds_to_fndecl (e, decl)) + { + error ("edge points to wrong declaration:"); + debug_tree (e->callee->symbol.decl); + fprintf (stderr," Instead of:"); + debug_tree (decl); + error_found = true; + } + } + else if (decl) + { + error ("an indirect edge with unknown callee " + "corresponding to a call_stmt with " + "a known declaration:"); + error_found = true; + cgraph_debug_gimple_stmt (this_cfun, e->call_stmt); + } + e->aux = (void *)1; + } + else if (decl) + { + error ("missing callgraph edge for call stmt:"); + cgraph_debug_gimple_stmt (this_cfun, stmt); + error_found = true; + } + } + } } + for (i = 0; + ipa_ref_list_reference_iterate (&node->symbol.ref_list, i, ref); + i++) + if (ref->stmt && !pointer_set_contains (stmts, ref->stmt)) + { + error ("reference to dead statement"); + cgraph_debug_gimple_stmt (this_cfun, ref->stmt); + error_found = true; + } + pointer_set_destroy (stmts); } else /* No CFG available?! */ @@ -2606,7 +2851,7 @@ verify_cgraph_node (struct cgraph_node *node) } for (e = node->indirect_calls; e; e = e->next_callee) { - if (!e->aux) + if (!e->aux && !e->speculative) { error ("an indirect edge from %s has no corresponding call_stmt", identifier_to_locale (cgraph_node_name (e->caller))); @@ -2708,4 +2953,45 @@ cgraph_function_node (struct cgraph_node *node, enum availability *availability) return node; } +/* When doing LTO, read NODE's body from disk if it is not already present. */ + +bool +cgraph_get_body (struct cgraph_node *node) +{ + struct lto_file_decl_data *file_data; + const char *data, *name; + size_t len; + tree decl = node->symbol.decl; + + if (DECL_RESULT (decl)) + return false; + + gcc_assert (in_lto_p); + + file_data = node->symbol.lto_file_data; + name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl)); + + /* We may have renamed the declaration, e.g., a static function. */ + name = lto_get_decl_name_mapping (file_data, name); + + data = lto_get_section_data (file_data, LTO_section_function_body, + name, &len); + if (!data) + { + dump_cgraph_node (stderr, node); + fatal_error ("%s: section %s is missing", + file_data->file_name, + name); + } + + gcc_assert (DECL_STRUCT_FUNCTION (decl) == NULL); + + lto_input_function_body (file_data, node, data); + lto_stats.num_function_bodies++; + lto_free_section_data (file_data, LTO_section_function_body, name, + data, len); + lto_free_function_in_decl_state_for_node ((symtab_node) node); + return true; +} + #include "gt-cgraph.h" diff --git a/gcc/cgraph.h b/gcc/cgraph.h index 3d6f3876f9c..a6a0a2438f7 100644 --- a/gcc/cgraph.h +++ b/gcc/cgraph.h @@ -300,10 +300,12 @@ struct GTY(()) cgraph_node { int count_materialization_scale; /* Unique id of the node. */ int uid; + /* ID assigned by the profiling. */ + unsigned int profile_id; /* Set when decl is an abstract function pointed to by the ABSTRACT_DECL_ORIGIN of a reachable function. */ - unsigned abstract_and_needed : 1; + unsigned used_as_abstract_origin : 1; /* Set once the function is lowered (i.e. its CFG is built). */ unsigned lowered : 1; /* Set once the function has been instantiated and its callee @@ -433,6 +435,10 @@ struct GTY(()) cgraph_indirect_call_info int param_index; /* ECF flags determined from the caller. */ int ecf_flags; + /* Profile_id of common target obtrained from profile. */ + int common_target_id; + /* Probability that call will land in function with COMMON_TARGET_ID. */ + int common_target_probability; /* Set when the call is a virtual call with the parameter being the associated object pointer rather than a simple direct call. */ @@ -483,6 +489,24 @@ struct GTY((chain_next ("%h.next_caller"), chain_prev ("%h.prev_caller"))) cgrap unsigned int call_stmt_cannot_inline_p : 1; /* Can this call throw externally? */ unsigned int can_throw_external : 1; + /* Edges with SPECULATIVE flag represents indirect calls that was + speculatively turned into direct (i.e. by profile feedback). + The final code sequence will have form: + + if (call_target == expected_fn) + expected_fn (); + else + call_target (); + + Every speculative call is represented by three components attached + to a same call statement: + 1) a direct call (to expected_fn) + 2) an indirect call (to call_target) + 3) a IPA_REF_ADDR refrence to expected_fn. + + Optimizers may later redirect direct call to clone, so 1) and 3) + do not need to necesarily agree with destination. */ + unsigned int speculative : 1; }; #define CGRAPH_FREQ_BASE 1000 @@ -597,6 +621,13 @@ symtab_node symtab_alias_ultimate_target (symtab_node, enum availability *avail = NULL); bool symtab_resolve_alias (symtab_node node, symtab_node target); void fixup_same_cpp_alias_visibility (symtab_node node, symtab_node target); +bool symtab_for_node_and_aliases (symtab_node, + bool (*) (symtab_node, void *), + void *, + bool); +symtab_node symtab_nonoverwritable_alias (symtab_node); +enum availability symtab_node_availability (symtab_node); +bool symtab_semantically_equivalent_p (symtab_node, symtab_node); /* In cgraph.c */ void dump_cgraph (FILE *); @@ -606,6 +637,7 @@ void debug_cgraph_node (struct cgraph_node *); void cgraph_remove_edge (struct cgraph_edge *); void cgraph_remove_node (struct cgraph_node *); void cgraph_release_function_body (struct cgraph_node *); +void release_function_body (tree); void cgraph_node_remove_callees (struct cgraph_node *node); struct cgraph_edge *cgraph_create_edge (struct cgraph_node *, struct cgraph_node *, @@ -622,7 +654,7 @@ struct cgraph_node * cgraph_add_thunk (struct cgraph_node *, tree, tree, bool, H HOST_WIDE_INT, tree, tree); struct cgraph_node *cgraph_node_for_asm (tree); struct cgraph_edge *cgraph_edge (struct cgraph_node *, gimple); -void cgraph_set_call_stmt (struct cgraph_edge *, gimple); +void cgraph_set_call_stmt (struct cgraph_edge *, gimple, bool update_speculative = true); void cgraph_update_edges_for_call_stmt (gimple, tree, gimple); struct cgraph_local_info *cgraph_local_info (tree); struct cgraph_global_info *cgraph_global_info (tree); @@ -634,7 +666,7 @@ void cgraph_call_edge_duplication_hooks (struct cgraph_edge *, struct cgraph_edge *); void cgraph_redirect_edge_callee (struct cgraph_edge *, struct cgraph_node *); -void cgraph_make_edge_direct (struct cgraph_edge *, struct cgraph_node *); +struct cgraph_edge *cgraph_make_edge_direct (struct cgraph_edge *, struct cgraph_node *); bool cgraph_only_called_directly_p (struct cgraph_node *); bool cgraph_function_possibly_inlined_p (tree); @@ -669,12 +701,14 @@ void cgraph_mark_address_taken_node (struct cgraph_node *); typedef void (*cgraph_edge_hook)(struct cgraph_edge *, void *); typedef void (*cgraph_node_hook)(struct cgraph_node *, void *); +typedef void (*varpool_node_hook)(struct varpool_node *, void *); typedef void (*cgraph_2edge_hook)(struct cgraph_edge *, struct cgraph_edge *, void *); typedef void (*cgraph_2node_hook)(struct cgraph_node *, struct cgraph_node *, void *); struct cgraph_edge_hook_list; struct cgraph_node_hook_list; +struct varpool_node_hook_list; struct cgraph_2edge_hook_list; struct cgraph_2node_hook_list; struct cgraph_edge_hook_list *cgraph_add_edge_removal_hook (cgraph_edge_hook, void *); @@ -682,18 +716,32 @@ void cgraph_remove_edge_removal_hook (struct cgraph_edge_hook_list *); struct cgraph_node_hook_list *cgraph_add_node_removal_hook (cgraph_node_hook, void *); void cgraph_remove_node_removal_hook (struct cgraph_node_hook_list *); +struct varpool_node_hook_list *varpool_add_node_removal_hook (varpool_node_hook, + void *); +void varpool_remove_node_removal_hook (struct varpool_node_hook_list *); struct cgraph_node_hook_list *cgraph_add_function_insertion_hook (cgraph_node_hook, void *); void cgraph_remove_function_insertion_hook (struct cgraph_node_hook_list *); +struct varpool_node_hook_list *varpool_add_variable_insertion_hook (varpool_node_hook, + void *); +void varpool_remove_variable_insertion_hook (struct varpool_node_hook_list *); void cgraph_call_function_insertion_hooks (struct cgraph_node *node); struct cgraph_2edge_hook_list *cgraph_add_edge_duplication_hook (cgraph_2edge_hook, void *); void cgraph_remove_edge_duplication_hook (struct cgraph_2edge_hook_list *); struct cgraph_2node_hook_list *cgraph_add_node_duplication_hook (cgraph_2node_hook, void *); void cgraph_remove_node_duplication_hook (struct cgraph_2node_hook_list *); gimple cgraph_redirect_edge_call_stmt_to_callee (struct cgraph_edge *); -bool cgraph_propagate_frequency (struct cgraph_node *node); struct cgraph_node * cgraph_function_node (struct cgraph_node *, enum availability *avail = NULL); +bool cgraph_get_body (struct cgraph_node *node); +struct cgraph_edge * +cgraph_turn_edge_to_speculative (struct cgraph_edge *, + struct cgraph_node *, + gcov_type, int); +void cgraph_speculative_call_info (struct cgraph_edge *, + struct cgraph_edge *&, + struct cgraph_edge *&, + struct ipa_ref *&); /* In cgraphunit.c */ struct asm_node *add_asm_node (tree); @@ -709,6 +757,7 @@ void fixup_same_cpp_alias_visibility (symtab_node, symtab_node target, tree); IN_SSA is true if the gimple is in SSA. */ basic_block init_lowered_empty_function (tree, bool); void cgraph_reset_node (struct cgraph_node *); +void expand_thunk (struct cgraph_node *); /* In cgraphclones.c */ @@ -726,7 +775,8 @@ struct cgraph_node * cgraph_create_virtual_clone (struct cgraph_node *old_node, const char *clone_name); struct cgraph_node *cgraph_find_replacement_node (struct cgraph_node *); bool cgraph_remove_node_and_inline_clones (struct cgraph_node *, struct cgraph_node *); -void cgraph_set_call_stmt_including_clones (struct cgraph_node *, gimple, gimple); +void cgraph_set_call_stmt_including_clones (struct cgraph_node *, gimple, gimple, + bool update_speculative = true); void cgraph_create_edge_including_clones (struct cgraph_node *, struct cgraph_node *, gimple, gimple, gcov_type, int, @@ -741,6 +791,7 @@ struct cgraph_node *cgraph_function_versioning (struct cgraph_node *, basic_block, const char *); void tree_function_versioning (tree, tree, vec *, bool, bitmap, bool, bitmap, basic_block); +struct cgraph_edge *cgraph_resolve_speculation (struct cgraph_edge *, tree); /* In cgraphbuild.c */ unsigned int rebuild_cgraph_edges (void); @@ -1347,13 +1398,25 @@ symtab_real_symbol_p (symtab_node node) { struct cgraph_node *cnode; + if (DECL_ABSTRACT (node->symbol.decl)) + return false; if (!is_a (node)) return true; cnode = cgraph (node); if (cnode->global.inlined_to) return false; - if (cnode->abstract_and_needed) - return false; return true; } + +/* Return true if NODE can be discarded by linker from the binary. */ + +static inline bool +symtab_can_be_discarded (symtab_node node) +{ + return (DECL_EXTERNAL (node->symbol.decl) + || (DECL_ONE_ONLY (node->symbol.decl) + && node->symbol.resolution != LDPR_PREVAILING_DEF + && node->symbol.resolution != LDPR_PREVAILING_DEF_IRONLY + && node->symbol.resolution != LDPR_PREVAILING_DEF_IRONLY_EXP)); +} #endif /* GCC_CGRAPH_H */ diff --git a/gcc/cgraphbuild.c b/gcc/cgraphbuild.c index fb1515d6037..b9c112fef4d 100644 --- a/gcc/cgraphbuild.c +++ b/gcc/cgraphbuild.c @@ -318,6 +318,9 @@ build_cgraph_edges (void) gimple stmt = gsi_stmt (gsi); tree decl; + if (is_gimple_debug (stmt)) + continue; + if (is_gimple_call (stmt)) { int freq = compute_call_stmt_bb_frequency (current_function_decl, @@ -370,26 +373,43 @@ build_cgraph_edges (void) return 0; } -struct gimple_opt_pass pass_build_cgraph_edges = +namespace { + +const pass_data pass_data_build_cgraph_edges = { - { - GIMPLE_PASS, - "*build_cgraph_edges", /* name */ - OPTGROUP_NONE, /* optinfo_flags */ - NULL, /* gate */ - build_cgraph_edges, /* execute */ - NULL, /* sub */ - NULL, /* next */ - 0, /* static_pass_number */ - TV_NONE, /* tv_id */ - PROP_cfg, /* properties_required */ - 0, /* properties_provided */ - 0, /* properties_destroyed */ - 0, /* todo_flags_start */ - 0 /* todo_flags_finish */ - } + GIMPLE_PASS, /* type */ + "*build_cgraph_edges", /* name */ + OPTGROUP_NONE, /* optinfo_flags */ + false, /* has_gate */ + true, /* has_execute */ + TV_NONE, /* tv_id */ + PROP_cfg, /* properties_required */ + 0, /* properties_provided */ + 0, /* properties_destroyed */ + 0, /* todo_flags_start */ + 0, /* todo_flags_finish */ }; +class pass_build_cgraph_edges : public gimple_opt_pass +{ +public: + pass_build_cgraph_edges(gcc::context *ctxt) + : gimple_opt_pass(pass_data_build_cgraph_edges, ctxt) + {} + + /* opt_pass methods: */ + unsigned int execute () { return build_cgraph_edges (); } + +}; // class pass_build_cgraph_edges + +} // anon namespace + +gimple_opt_pass * +make_pass_build_cgraph_edges (gcc::context *ctxt) +{ + return new pass_build_cgraph_edges (ctxt); +} + /* Record references to functions and other variables present in the initial value of DECL, a variable. When ONLY_VARS is true, we mark needed only variables, not functions. */ @@ -463,8 +483,15 @@ cgraph_rebuild_references (void) basic_block bb; struct cgraph_node *node = cgraph_get_node (current_function_decl); gimple_stmt_iterator gsi; + struct ipa_ref *ref; + int i; - ipa_remove_all_references (&node->symbol.ref_list); + /* Keep speculative references for further cgraph edge expansion. */ + for (i = 0; ipa_ref_list_reference_iterate (&node->symbol.ref_list, i, ref);) + if (!ref->speculative) + ipa_remove_reference (ref); + else + i++; node->count = ENTRY_BLOCK_PTR->count; @@ -478,50 +505,90 @@ cgraph_rebuild_references (void) record_eh_tables (node, cfun); } -struct gimple_opt_pass pass_rebuild_cgraph_edges = +namespace { + +const pass_data pass_data_rebuild_cgraph_edges = { - { - GIMPLE_PASS, - "*rebuild_cgraph_edges", /* name */ - OPTGROUP_NONE, /* optinfo_flags */ - NULL, /* gate */ - rebuild_cgraph_edges, /* execute */ - NULL, /* sub */ - NULL, /* next */ - 0, /* static_pass_number */ - TV_CGRAPH, /* tv_id */ - PROP_cfg, /* properties_required */ - 0, /* properties_provided */ - 0, /* properties_destroyed */ - 0, /* todo_flags_start */ - 0, /* todo_flags_finish */ - } + GIMPLE_PASS, /* type */ + "*rebuild_cgraph_edges", /* name */ + OPTGROUP_NONE, /* optinfo_flags */ + false, /* has_gate */ + true, /* has_execute */ + TV_CGRAPH, /* tv_id */ + PROP_cfg, /* properties_required */ + 0, /* properties_provided */ + 0, /* properties_destroyed */ + 0, /* todo_flags_start */ + 0, /* todo_flags_finish */ }; +class pass_rebuild_cgraph_edges : public gimple_opt_pass +{ +public: + pass_rebuild_cgraph_edges(gcc::context *ctxt) + : gimple_opt_pass(pass_data_rebuild_cgraph_edges, ctxt) + {} + + /* opt_pass methods: */ + opt_pass * clone () { return new pass_rebuild_cgraph_edges (ctxt_); } + unsigned int execute () { return rebuild_cgraph_edges (); } + +}; // class pass_rebuild_cgraph_edges + +} // anon namespace + +gimple_opt_pass * +make_pass_rebuild_cgraph_edges (gcc::context *ctxt) +{ + return new pass_rebuild_cgraph_edges (ctxt); +} + static unsigned int remove_cgraph_callee_edges (void) { - cgraph_node_remove_callees (cgraph_get_node (current_function_decl)); + struct cgraph_node *node = cgraph_get_node (current_function_decl); + cgraph_node_remove_callees (node); + ipa_remove_all_references (&node->symbol.ref_list); return 0; } -struct gimple_opt_pass pass_remove_cgraph_callee_edges = +namespace { + +const pass_data pass_data_remove_cgraph_callee_edges = { - { - GIMPLE_PASS, - "*remove_cgraph_callee_edges", /* name */ - OPTGROUP_NONE, /* optinfo_flags */ - NULL, /* gate */ - remove_cgraph_callee_edges, /* execute */ - NULL, /* sub */ - NULL, /* next */ - 0, /* static_pass_number */ - TV_NONE, /* tv_id */ - 0, /* properties_required */ - 0, /* properties_provided */ - 0, /* properties_destroyed */ - 0, /* todo_flags_start */ - 0, /* todo_flags_finish */ - } + GIMPLE_PASS, /* type */ + "*remove_cgraph_callee_edges", /* name */ + OPTGROUP_NONE, /* optinfo_flags */ + false, /* has_gate */ + true, /* has_execute */ + TV_NONE, /* tv_id */ + 0, /* properties_required */ + 0, /* properties_provided */ + 0, /* properties_destroyed */ + 0, /* todo_flags_start */ + 0, /* todo_flags_finish */ }; + +class pass_remove_cgraph_callee_edges : public gimple_opt_pass +{ +public: + pass_remove_cgraph_callee_edges(gcc::context *ctxt) + : gimple_opt_pass(pass_data_remove_cgraph_callee_edges, ctxt) + {} + + /* opt_pass methods: */ + opt_pass * clone () { + return new pass_remove_cgraph_callee_edges (ctxt_); + } + unsigned int execute () { return remove_cgraph_callee_edges (); } + +}; // class pass_remove_cgraph_callee_edges + +} // anon namespace + +gimple_opt_pass * +make_pass_remove_cgraph_callee_edges (gcc::context *ctxt) +{ + return new pass_remove_cgraph_callee_edges (ctxt); +} diff --git a/gcc/cgraphclones.c b/gcc/cgraphclones.c index 04cb990cc86..54b97b91c78 100644 --- a/gcc/cgraphclones.c +++ b/gcc/cgraphclones.c @@ -147,6 +147,7 @@ cgraph_clone_edge (struct cgraph_edge *e, struct cgraph_node *n, /* Clone flags that depend on call_stmt availability manually. */ new_edge->can_throw_external = e->can_throw_external; new_edge->call_stmt_cannot_inline_p = e->call_stmt_cannot_inline_p; + new_edge->speculative = e->speculative; if (update_original) { e->count -= new_edge->count; @@ -251,7 +252,7 @@ cgraph_clone_node (struct cgraph_node *n, tree decl, gcov_type count, int freq, return new_node; } -/* Create a new name for clone of DECL, add SUFFIX. Returns an identifier. */ +/* Return a new assembler name for a clone of DECL with SUFFIX. */ static GTY(()) unsigned int clone_fn_id_num; @@ -292,10 +293,11 @@ cgraph_create_virtual_clone (struct cgraph_node *old_node, tree old_decl = old_node->symbol.decl; struct cgraph_node *new_node = NULL; tree new_decl; - size_t i; + size_t len, i; struct ipa_replace_map *map; + char *name; - if (!flag_wpa) + if (!in_lto_p) gcc_checking_assert (tree_versionable_function_p (old_decl)); gcc_assert (old_node->local.can_change_signature || !args_to_skip); @@ -317,8 +319,13 @@ cgraph_create_virtual_clone (struct cgraph_node *old_node, sometimes storing only clone decl instead of original. */ /* Generate a new name for the new version. */ - DECL_NAME (new_decl) = clone_function_name (old_decl, suffix); - SET_DECL_ASSEMBLER_NAME (new_decl, DECL_NAME (new_decl)); + len = IDENTIFIER_LENGTH (DECL_NAME (old_decl)); + name = XALLOCAVEC (char, len + strlen (suffix) + 2); + memcpy (name, IDENTIFIER_POINTER (DECL_NAME (old_decl)), len); + strcpy (name + len + 1, suffix); + name[len] = '.'; + DECL_NAME (new_decl) = get_identifier (name); + SET_DECL_ASSEMBLER_NAME (new_decl, clone_function_name (old_decl, suffix)); SET_DECL_RTL (new_decl, NULL); new_node = cgraph_clone_node (old_node, new_decl, old_node->count, @@ -474,17 +481,21 @@ cgraph_find_replacement_node (struct cgraph_node *node) } /* Like cgraph_set_call_stmt but walk the clone tree and update all - clones sharing the same function body. */ + clones sharing the same function body. + When WHOLE_SPECULATIVE_EDGES is true, all three components of + speculative edge gets updated. Otherwise we update only direct + call. */ void cgraph_set_call_stmt_including_clones (struct cgraph_node *orig, - gimple old_stmt, gimple new_stmt) + gimple old_stmt, gimple new_stmt, + bool update_speculative) { struct cgraph_node *node; struct cgraph_edge *edge = cgraph_edge (orig, old_stmt); if (edge) - cgraph_set_call_stmt (edge, new_stmt); + cgraph_set_call_stmt (edge, new_stmt, update_speculative); node = orig->clones; if (node) @@ -492,7 +503,23 @@ cgraph_set_call_stmt_including_clones (struct cgraph_node *orig, { struct cgraph_edge *edge = cgraph_edge (node, old_stmt); if (edge) - cgraph_set_call_stmt (edge, new_stmt); + { + cgraph_set_call_stmt (edge, new_stmt, update_speculative); + /* If UPDATE_SPECULATIVE is false, it means that we are turning + speculative call into a real code sequence. Update the + callgraph edges. */ + if (edge->speculative && !update_speculative) + { + struct cgraph_edge *direct, *indirect; + struct ipa_ref *ref; + + gcc_assert (!edge->indirect_unknown_callee); + cgraph_speculative_call_info (edge, direct, indirect, ref); + direct->speculative = false; + indirect->speculative = false; + ref->speculative = false; + } + } if (node->clones) node = node->clones; else if (node->next_sibling_clone) @@ -811,6 +838,7 @@ cgraph_materialize_all_clones (void) { struct cgraph_node *node; bool stabilized = false; + if (cgraph_dump_file) fprintf (cgraph_dump_file, "Materializing clones\n"); @@ -829,6 +857,8 @@ cgraph_materialize_all_clones (void) if (node->clone_of && node->symbol.decl != node->clone_of->symbol.decl && !gimple_has_body_p (node->symbol.decl)) { + if (!node->clone_of->clone_of) + cgraph_get_body (node->clone_of); if (gimple_has_body_p (node->clone_of->symbol.decl)) { if (cgraph_dump_file) @@ -874,7 +904,12 @@ cgraph_materialize_all_clones (void) } FOR_EACH_FUNCTION (node) if (!node->symbol.analyzed && node->callees) - cgraph_node_remove_callees (node); + { + cgraph_node_remove_callees (node); + ipa_remove_all_references (&node->symbol.ref_list); + } + else + ipa_clear_stmts_in_references ((symtab_node)node); if (cgraph_dump_file) fprintf (cgraph_dump_file, "Materialization Call site updates done.\n"); #ifdef ENABLE_CHECKING diff --git a/gcc/cgraphunit.c b/gcc/cgraphunit.c index b82c2e01b57..9681df518cd 100644 --- a/gcc/cgraphunit.c +++ b/gcc/cgraphunit.c @@ -194,6 +194,8 @@ along with GCC; see the file COPYING3. If not see #include "except.h" #include "cfgloop.h" #include "regset.h" /* FIXME: For reg_obstack. */ +#include "context.h" +#include "pass_manager.h" /* Queue of cgraph nodes scheduled to be added into cgraph. This is a secondary queue used during optimization to accommodate passes that @@ -233,10 +235,6 @@ decide_is_symbol_needed (symtab_node node) if (!node->symbol.definition) return false; - /* Devirtualization may access these. */ - if (DECL_VIRTUAL_P (decl) && optimize) - return true; - if (DECL_EXTERNAL (decl)) return false; @@ -321,13 +319,10 @@ cgraph_process_new_functions (void) if (!node->symbol.analyzed) analyze_function (node); push_cfun (DECL_STRUCT_FUNCTION (fndecl)); - if ((cgraph_state == CGRAPH_STATE_IPA_SSA + if (cgraph_state == CGRAPH_STATE_IPA_SSA && !gimple_in_ssa_p (DECL_STRUCT_FUNCTION (fndecl))) - /* When not optimizing, be sure we run early local passes anyway - to expand OMP. */ - || !optimize) - execute_pass_list (pass_early_local_passes.pass.sub); - else + g->get_passes ()->execute_early_local_passes (); + else if (inline_summary_vec != NULL) compute_inline_parameters (node, true); free_dominance_info (CDI_POST_DOMINATORS); free_dominance_info (CDI_DOMINATORS); @@ -405,17 +400,20 @@ referred_to_p (symtab_node node) } /* DECL has been parsed. Take it, queue it, compile it at the whim of the - logic in effect. If NESTED is true, then our caller cannot stand to have + logic in effect. If NO_COLLECT is true, then our caller cannot stand to have the garbage collector run at the moment. We would need to either create a new GC context, or just not compile right now. */ void -cgraph_finalize_function (tree decl, bool nested) +cgraph_finalize_function (tree decl, bool no_collect) { struct cgraph_node *node = cgraph_get_create_node (decl); if (node->symbol.definition) { + /* Nested functions should only be defined once. */ + gcc_assert (!DECL_CONTEXT (decl) + || TREE_CODE (DECL_CONTEXT (decl)) != FUNCTION_DECL); cgraph_reset_node (node); node->local.redefined_extern_inline = true; } @@ -454,7 +452,7 @@ cgraph_finalize_function (tree decl, bool nested) if (warn_unused_parameter) do_warn_unused_parameter (decl); - if (!nested) + if (!no_collect) ggc_collect (); if (cgraph_state == CGRAPH_STATE_CONSTRUCTION @@ -478,6 +476,7 @@ cgraph_finalize_function (tree decl, bool nested) void cgraph_add_new_function (tree fndecl, bool lowered) { + gcc::pass_manager *passes = g->get_passes (); struct cgraph_node *node; switch (cgraph_state) { @@ -508,8 +507,8 @@ cgraph_add_new_function (tree fndecl, bool lowered) push_cfun (DECL_STRUCT_FUNCTION (fndecl)); gimple_register_cfg_hooks (); bitmap_obstack_initialize (NULL); - execute_pass_list (all_lowering_passes); - execute_pass_list (pass_early_local_passes.pass.sub); + execute_pass_list (passes->all_lowering_passes); + passes->execute_early_local_passes (); bitmap_obstack_release (NULL); pop_cfun (); @@ -534,7 +533,7 @@ cgraph_add_new_function (tree fndecl, bool lowered) gimple_register_cfg_hooks (); bitmap_obstack_initialize (NULL); if (!gimple_in_ssa_p (DECL_STRUCT_FUNCTION (fndecl))) - execute_pass_list (pass_early_local_passes.pass.sub); + g->get_passes ()->execute_early_local_passes (); bitmap_obstack_release (NULL); pop_cfun (); expand_function (node); @@ -640,7 +639,7 @@ analyze_function (struct cgraph_node *node) gimple_register_cfg_hooks (); bitmap_obstack_initialize (NULL); - execute_pass_list (all_lowering_passes); + execute_pass_list (g->get_passes ()->all_lowering_passes); free_dominance_info (CDI_POST_DOMINATORS); free_dominance_info (CDI_DOMINATORS); compact_blocks (); @@ -822,6 +821,82 @@ varpool_finalize_decl (tree decl) varpool_assemble_decl (node); } +/* EDGE is an polymorphic call. Mark all possible targets as reachable + and if there is only one target, perform trivial devirtualization. + REACHABLE_CALL_TARGETS collects target lists we already walked to + avoid udplicate work. */ + +static void +walk_polymorphic_call_targets (pointer_set_t *reachable_call_targets, + struct cgraph_edge *edge) +{ + unsigned int i; + void *cache_token; + bool final; + vec targets + = possible_polymorphic_call_targets + (edge, &final, &cache_token); + + if (!pointer_set_insert (reachable_call_targets, + cache_token)) + { + if (cgraph_dump_file) + dump_possible_polymorphic_call_targets + (cgraph_dump_file, edge); + + for (i = 0; i < targets.length(); i++) + { + /* Do not bother to mark virtual methods in anonymous namespace; + either we will find use of virtual table defining it, or it is + unused. */ + if (targets[i]->symbol.definition + && TREE_CODE + (TREE_TYPE (targets[i]->symbol.decl)) + == METHOD_TYPE + && !type_in_anonymous_namespace_p + (method_class_type + (TREE_TYPE (targets[i]->symbol.decl)))) + enqueue_node ((symtab_node) targets[i]); + } + } + + /* Very trivial devirtualization; when the type is + final or anonymous (so we know all its derivation) + and there is only one possible virtual call target, + make the edge direct. */ + if (final) + { + if (targets.length() <= 1) + { + cgraph_node *target; + if (targets.length () == 1) + target = targets[0]; + else + target = cgraph_get_create_node + (builtin_decl_implicit (BUILT_IN_UNREACHABLE)); + + if (cgraph_dump_file) + { + fprintf (cgraph_dump_file, + "Devirtualizing call: "); + print_gimple_stmt (cgraph_dump_file, + edge->call_stmt, 0, + TDF_SLIM); + } + cgraph_make_edge_direct (edge, target); + cgraph_redirect_edge_call_stmt_to_callee (edge); + if (cgraph_dump_file) + { + fprintf (cgraph_dump_file, + "Devirtualized as: "); + print_gimple_stmt (cgraph_dump_file, + edge->call_stmt, 0, + TDF_SLIM); + } + } + } +} + /* Discover all functions and variables that are trivially needed, analyze them as well as all functions and variables referred by them */ @@ -835,6 +910,7 @@ analyze_functions (void) struct cgraph_node *first_handled = first_analyzed; static struct varpool_node *first_analyzed_var; struct varpool_node *first_handled_var = first_analyzed_var; + struct pointer_set_t *reachable_call_targets = pointer_set_create (); symtab_node node, next; int i; @@ -850,6 +926,8 @@ analyze_functions (void) FOR_EACH_SYMBOL (node) if (node->symbol.cpp_implicit_alias) fixup_same_cpp_alias_visibility (node, symtab_alias_target (node)); + if (optimize && flag_devirtualize) + build_type_inheritance_graph (); /* Analysis adds static variables that in turn adds references to new functions. So we need to iterate the process until it stabilize. */ @@ -872,6 +950,8 @@ analyze_functions (void) changed = true; if (cgraph_dump_file) fprintf (cgraph_dump_file, " %s", symtab_node_asm_name (node)); + if (!changed && cgraph_dump_file) + fprintf (cgraph_dump_file, "\n"); } if (node == (symtab_node)first_analyzed || node == (symtab_node)first_analyzed_var) @@ -916,6 +996,18 @@ analyze_functions (void) for (edge = cnode->callees; edge; edge = edge->next_callee) if (edge->callee->symbol.definition) enqueue_node ((symtab_node)edge->callee); + if (optimize && flag_devirtualize) + { + struct cgraph_edge *next; + + for (edge = cnode->indirect_calls; edge; edge = next) + { + next = edge->next_callee; + if (edge->indirect_info->polymorphic) + walk_polymorphic_call_targets (reachable_call_targets, + edge); + } + } /* If decl is a clone of an abstract function, mark that abstract function so that we don't release its body. @@ -925,7 +1017,7 @@ analyze_functions (void) { struct cgraph_node *origin_node = cgraph_get_node (DECL_ABSTRACT_ORIGIN (decl)); - origin_node->abstract_and_needed = true; + origin_node->used_as_abstract_origin = true; } } else @@ -949,6 +1041,8 @@ analyze_functions (void) cgraph_process_new_functions (); } } + if (optimize && flag_devirtualize) + update_type_inheritance_graph (); /* Collect entry points to the unit. */ if (cgraph_dump_file) @@ -988,6 +1082,8 @@ analyze_functions (void) } node->symbol.aux = NULL; } + for (;node; node = node->symbol.next) + node->symbol.aux = NULL; first_analyzed = cgraph_first_function (); first_analyzed_var = varpool_first_variable (); if (cgraph_dump_file) @@ -996,12 +1092,18 @@ analyze_functions (void) dump_symtab (cgraph_dump_file); } bitmap_obstack_release (NULL); + pointer_set_destroy (reachable_call_targets); ggc_collect (); + /* Initialize assembler name hash, in particular we want to trigger C++ + mangling and same body alias creation before we free DECL_ARGUMENTS + used by it. */ + if (!seen_error ()) + symtab_initialize_asm_name_hash (); } /* Translate the ugly representation of aliases as alias pairs into nice representation in callgraph. We don't handle all cases yet, - unforutnately. */ + unfortunately. */ static void handle_alias_pairs (void) @@ -1013,10 +1115,11 @@ handle_alias_pairs (void) { symtab_node target_node = symtab_node_for_asm (p->target); - /* Weakrefs with target not defined in current unit are easy to handle; they - behave just as external variables except we need to note the alias flag - to later output the weakref pseudo op into asm file. */ - if (!target_node && lookup_attribute ("weakref", DECL_ATTRIBUTES (p->decl)) != NULL) + /* Weakrefs with target not defined in current unit are easy to handle: + they behave just as external variables except we need to note the + alias flag to later output the weakref pseudo op into asm file. */ + if (!target_node + && lookup_attribute ("weakref", DECL_ATTRIBUTES (p->decl)) != NULL) { symtab_node node = symtab_get_node (p->decl); if (node) @@ -1031,6 +1134,9 @@ handle_alias_pairs (void) else if (!target_node) { error ("%q+D aliased to undefined symbol %qE", p->decl, p->target); + symtab_node node = symtab_get_node (p->decl); + if (node) + node->symbol.alias = false; alias_pairs->unordered_remove (i); continue; } @@ -1324,8 +1430,8 @@ thunk_adjust (gimple_stmt_iterator * bsi, /* Produce assembler for thunk NODE. */ -static void -assemble_thunk (struct cgraph_node *node) +void +expand_thunk (struct cgraph_node *node) { bool this_adjusting = node->thunk.this_adjusting; HOST_WIDE_INT fixed_offset = node->thunk.fixed_offset; @@ -1333,7 +1439,11 @@ assemble_thunk (struct cgraph_node *node) tree virtual_offset = NULL; tree alias = node->callees->callee->symbol.decl; tree thunk_fndecl = node->symbol.decl; - tree a = DECL_ARGUMENTS (thunk_fndecl); + tree a; + + if (in_lto_p) + cgraph_get_body (node); + a = DECL_ARGUMENTS (thunk_fndecl); current_function_decl = thunk_fndecl; @@ -1417,7 +1527,9 @@ assemble_thunk (struct cgraph_node *node) /* Build call to the function being thunked. */ if (!VOID_TYPE_P (restype)) { - if (!is_gimple_reg_type (restype)) + if (DECL_BY_REFERENCE (resdecl)) + restmp = gimple_fold_indirect_ref (resdecl); + else if (!is_gimple_reg_type (restype)) { restmp = resdecl; add_local_decl (cfun, restmp); @@ -1433,74 +1545,91 @@ assemble_thunk (struct cgraph_node *node) if (this_adjusting) vargs.quick_push (thunk_adjust (&bsi, a, 1, fixed_offset, virtual_offset)); - else + else if (nargs) vargs.quick_push (a); - for (i = 1, arg = DECL_CHAIN (a); i < nargs; i++, arg = DECL_CHAIN (arg)) - vargs.quick_push (arg); + + if (nargs) + for (i = 1, arg = DECL_CHAIN (a); i < nargs; i++, arg = DECL_CHAIN (arg)) + vargs.quick_push (arg); call = gimple_build_call_vec (build_fold_addr_expr_loc (0, alias), vargs); vargs.release (); gimple_call_set_from_thunk (call, true); if (restmp) - gimple_call_set_lhs (call, restmp); + { + gimple_call_set_lhs (call, restmp); + gcc_assert (useless_type_conversion_p (TREE_TYPE (restmp), + TREE_TYPE (TREE_TYPE (alias)))); + } gsi_insert_after (&bsi, call, GSI_NEW_STMT); + if (!(gimple_call_flags (call) & ECF_NORETURN)) + { + if (restmp && !this_adjusting + && (fixed_offset || virtual_offset)) + { + tree true_label = NULL_TREE; - if (restmp && !this_adjusting) - { - tree true_label = NULL_TREE; + if (TREE_CODE (TREE_TYPE (restmp)) == POINTER_TYPE) + { + gimple stmt; + /* If the return type is a pointer, we need to + protect against NULL. We know there will be an + adjustment, because that's why we're emitting a + thunk. */ + then_bb = create_basic_block (NULL, (void *) 0, bb); + return_bb = create_basic_block (NULL, (void *) 0, then_bb); + else_bb = create_basic_block (NULL, (void *) 0, else_bb); + add_bb_to_loop (then_bb, bb->loop_father); + add_bb_to_loop (return_bb, bb->loop_father); + add_bb_to_loop (else_bb, bb->loop_father); + remove_edge (single_succ_edge (bb)); + true_label = gimple_block_label (then_bb); + stmt = gimple_build_cond (NE_EXPR, restmp, + build_zero_cst (TREE_TYPE (restmp)), + NULL_TREE, NULL_TREE); + gsi_insert_after (&bsi, stmt, GSI_NEW_STMT); + make_edge (bb, then_bb, EDGE_TRUE_VALUE); + make_edge (bb, else_bb, EDGE_FALSE_VALUE); + make_edge (return_bb, EXIT_BLOCK_PTR, 0); + make_edge (then_bb, return_bb, EDGE_FALLTHRU); + make_edge (else_bb, return_bb, EDGE_FALLTHRU); + bsi = gsi_last_bb (then_bb); + } - if (TREE_CODE (TREE_TYPE (restmp)) == POINTER_TYPE) - { - gimple stmt; - /* If the return type is a pointer, we need to - protect against NULL. We know there will be an - adjustment, because that's why we're emitting a - thunk. */ - then_bb = create_basic_block (NULL, (void *) 0, bb); - return_bb = create_basic_block (NULL, (void *) 0, then_bb); - else_bb = create_basic_block (NULL, (void *) 0, else_bb); - add_bb_to_loop (then_bb, bb->loop_father); - add_bb_to_loop (return_bb, bb->loop_father); - add_bb_to_loop (else_bb, bb->loop_father); - remove_edge (single_succ_edge (bb)); - true_label = gimple_block_label (then_bb); - stmt = gimple_build_cond (NE_EXPR, restmp, - build_zero_cst (TREE_TYPE (restmp)), - NULL_TREE, NULL_TREE); - gsi_insert_after (&bsi, stmt, GSI_NEW_STMT); - make_edge (bb, then_bb, EDGE_TRUE_VALUE); - make_edge (bb, else_bb, EDGE_FALSE_VALUE); - make_edge (return_bb, EXIT_BLOCK_PTR, 0); - make_edge (then_bb, return_bb, EDGE_FALLTHRU); - make_edge (else_bb, return_bb, EDGE_FALLTHRU); - bsi = gsi_last_bb (then_bb); + restmp = thunk_adjust (&bsi, restmp, /*this_adjusting=*/0, + fixed_offset, virtual_offset); + if (true_label) + { + gimple stmt; + bsi = gsi_last_bb (else_bb); + stmt = gimple_build_assign (restmp, + build_zero_cst (TREE_TYPE (restmp))); + gsi_insert_after (&bsi, stmt, GSI_NEW_STMT); + bsi = gsi_last_bb (return_bb); + } } + else + gimple_call_set_tail (call, true); - restmp = thunk_adjust (&bsi, restmp, /*this_adjusting=*/0, - fixed_offset, virtual_offset); - if (true_label) - { - gimple stmt; - bsi = gsi_last_bb (else_bb); - stmt = gimple_build_assign (restmp, - build_zero_cst (TREE_TYPE (restmp))); - gsi_insert_after (&bsi, stmt, GSI_NEW_STMT); - bsi = gsi_last_bb (return_bb); - } + /* Build return value. */ + ret = gimple_build_return (restmp); + gsi_insert_after (&bsi, ret, GSI_NEW_STMT); } else - gimple_call_set_tail (call, true); - - /* Build return value. */ - ret = gimple_build_return (restmp); - gsi_insert_after (&bsi, ret, GSI_NEW_STMT); + { + gimple_call_set_tail (call, true); + remove_edge (single_succ_edge (bb)); + } delete_unreachable_blocks (); update_ssa (TODO_update_ssa); +#ifdef ENABLE_CHECKING + verify_flow_info (); +#endif /* Since we want to emit the thunk, we explicitly mark its name as referenced. */ node->thunk.thunk_p = false; - cgraph_node_remove_callees (node); + rebuild_cgraph_edges (); cgraph_add_new_function (thunk_fndecl, true); bitmap_obstack_release (NULL); } @@ -1508,8 +1637,6 @@ assemble_thunk (struct cgraph_node *node) set_cfun (NULL); } - - /* Assemble thunks and aliases associated to NODE. */ static void @@ -1526,7 +1653,7 @@ assemble_thunks_and_aliases (struct cgraph_node *node) e = e->next_caller; assemble_thunks_and_aliases (thunk); - assemble_thunk (thunk); + expand_thunk (thunk); } else e = e->next_caller; @@ -1561,6 +1688,7 @@ expand_function (struct cgraph_node *node) announce_function (decl); node->process = 0; gcc_assert (node->lowered); + cgraph_get_body (node); /* Generate RTL for the body of DECL. */ @@ -1588,7 +1716,7 @@ expand_function (struct cgraph_node *node) /* Signal the start of passes. */ invoke_plugin_callbacks (PLUGIN_ALL_PASSES_START, NULL); - execute_pass_list (all_passes); + execute_pass_list (g->get_passes ()->all_passes); /* Signal the end of passes. */ invoke_plugin_callbacks (PLUGIN_ALL_PASSES_END, NULL); @@ -1656,6 +1784,7 @@ expand_function (struct cgraph_node *node) /* Eliminate all call edges. This is important so the GIMPLE_CALL no longer points to the dead function body. */ cgraph_node_remove_callees (node); + ipa_remove_all_references (&node->symbol.ref_list); } @@ -1807,6 +1936,8 @@ output_in_order (void) static void ipa_passes (void) { + gcc::pass_manager *passes = g->get_passes (); + set_cfun (NULL); current_function_decl = NULL; gimple_register_cfg_hooks (); @@ -1816,7 +1947,7 @@ ipa_passes (void) if (!in_lto_p) { - execute_ipa_pass_list (all_small_ipa_passes); + execute_ipa_pass_list (passes->all_small_ipa_passes); if (seen_error ()) return; } @@ -1843,14 +1974,15 @@ ipa_passes (void) cgraph_process_new_functions (); execute_ipa_summary_passes - ((struct ipa_opt_pass_d *) all_regular_ipa_passes); + ((struct ipa_opt_pass_d *) passes->all_regular_ipa_passes); } /* Some targets need to handle LTO assembler output specially. */ if (flag_generate_lto) targetm.asm_out.lto_start (); - execute_ipa_summary_passes ((struct ipa_opt_pass_d *) all_lto_gen_passes); + execute_ipa_summary_passes ((struct ipa_opt_pass_d *) + passes->all_lto_gen_passes); if (!in_lto_p) ipa_write_summaries (); @@ -1859,7 +1991,7 @@ ipa_passes (void) targetm.asm_out.lto_end (); if (!flag_ltrans && (in_lto_p || !flag_lto || flag_fat_lto_objects)) - execute_ipa_pass_list (all_regular_ipa_passes); + execute_ipa_pass_list (passes->all_regular_ipa_passes); invoke_plugin_callbacks (PLUGIN_ALL_IPA_PASSES_END, NULL); bitmap_obstack_release (NULL); @@ -1985,7 +2117,7 @@ compile (void) cgraph_materialize_all_clones (); bitmap_obstack_initialize (NULL); - execute_ipa_pass_list (all_late_ipa_passes); + execute_ipa_pass_list (g->get_passes ()->all_late_ipa_passes); symtab_remove_unreachable_nodes (true, dump_file); #ifdef ENABLE_CHECKING verify_symtab (); diff --git a/gcc/combine-stack-adj.c b/gcc/combine-stack-adj.c index 7392b92b3f5..be47665ebc0 100644 --- a/gcc/combine-stack-adj.c +++ b/gcc/combine-stack-adj.c @@ -643,22 +643,40 @@ rest_of_handle_stack_adjustments (void) return 0; } -struct rtl_opt_pass pass_stack_adjustments = +namespace { + +const pass_data pass_data_stack_adjustments = { - { - RTL_PASS, - "csa", /* name */ - OPTGROUP_NONE, /* optinfo_flags */ - gate_handle_stack_adjustments, /* gate */ - rest_of_handle_stack_adjustments, /* execute */ - NULL, /* sub */ - NULL, /* next */ - 0, /* static_pass_number */ - TV_COMBINE_STACK_ADJUST, /* tv_id */ - 0, /* properties_required */ - 0, /* properties_provided */ - 0, /* properties_destroyed */ - 0, /* todo_flags_start */ - TODO_df_finish | TODO_verify_rtl_sharing /* todo_flags_finish */ - } + RTL_PASS, /* type */ + "csa", /* name */ + OPTGROUP_NONE, /* optinfo_flags */ + true, /* has_gate */ + true, /* has_execute */ + TV_COMBINE_STACK_ADJUST, /* tv_id */ + 0, /* properties_required */ + 0, /* properties_provided */ + 0, /* properties_destroyed */ + 0, /* todo_flags_start */ + ( TODO_df_finish | TODO_verify_rtl_sharing ), /* todo_flags_finish */ }; + +class pass_stack_adjustments : public rtl_opt_pass +{ +public: + pass_stack_adjustments(gcc::context *ctxt) + : rtl_opt_pass(pass_data_stack_adjustments, ctxt) + {} + + /* opt_pass methods: */ + bool gate () { return gate_handle_stack_adjustments (); } + unsigned int execute () { return rest_of_handle_stack_adjustments (); } + +}; // class pass_stack_adjustments + +} // anon namespace + +rtl_opt_pass * +make_pass_stack_adjustments (gcc::context *ctxt) +{ + return new pass_stack_adjustments (ctxt); +} diff --git a/gcc/combine.c b/gcc/combine.c index 754cd341f45..fc566c55c16 100644 --- a/gcc/combine.c +++ b/gcc/combine.c @@ -5803,8 +5803,15 @@ combine_simplify_rtx (rtx x, enum machine_mode op0_mode, int in_dest, return x; } - /* If the code changed, return a whole new comparison. */ - if (new_code != code) + /* If the code changed, return a whole new comparison. + We also need to avoid using SUBST in cases where + simplify_comparison has widened a comparison with a CONST_INT, + since in that case the wider CONST_INT may fail the sanity + checks in do_SUBST. */ + if (new_code != code + || (CONST_INT_P (op1) + && GET_MODE (op0) != GET_MODE (XEXP (x, 0)) + && GET_MODE (op0) != GET_MODE (XEXP (x, 1)))) return gen_rtx_fmt_ee (new_code, mode, op0, op1); /* Otherwise, keep this operation, but maybe change its operands. @@ -8122,8 +8129,8 @@ force_to_mode (rtx x, enum machine_mode mode, unsigned HOST_WIDE_INT mask, /* If MODE is narrower than HOST_WIDE_INT and CVAL is a negative number, sign extend it. */ if (width > 0 && width < HOST_BITS_PER_WIDE_INT - && (cval & ((unsigned HOST_WIDE_INT) 1 << (width - 1))) != 0) - cval |= (unsigned HOST_WIDE_INT) -1 << width; + && (cval & (HOST_WIDE_INT_1U << (width - 1))) != 0) + cval |= HOST_WIDE_INT_M1U << width; y = simplify_gen_binary (AND, GET_MODE (x), XEXP (x, 0), GEN_INT (cval)); @@ -8151,8 +8158,8 @@ force_to_mode (rtx x, enum machine_mode mode, unsigned HOST_WIDE_INT mask, number, sign extend it. */ if (width < HOST_BITS_PER_WIDE_INT - && (smask & ((unsigned HOST_WIDE_INT) 1 << (width - 1))) != 0) - smask |= (unsigned HOST_WIDE_INT) (-1) << width; + && (smask & (HOST_WIDE_INT_1U << (width - 1))) != 0) + smask |= HOST_WIDE_INT_M1U << width; if (CONST_INT_P (XEXP (x, 1)) && exact_log2 (- smask) >= 0 @@ -13840,22 +13847,40 @@ rest_of_handle_combine (void) return 0; } -struct rtl_opt_pass pass_combine = +namespace { + +const pass_data pass_data_combine = { - { - RTL_PASS, - "combine", /* name */ - OPTGROUP_NONE, /* optinfo_flags */ - gate_handle_combine, /* gate */ - rest_of_handle_combine, /* execute */ - NULL, /* sub */ - NULL, /* next */ - 0, /* static_pass_number */ - TV_COMBINE, /* tv_id */ - PROP_cfglayout, /* properties_required */ - 0, /* properties_provided */ - 0, /* properties_destroyed */ - 0, /* todo_flags_start */ - TODO_df_finish | TODO_verify_rtl_sharing /* todo_flags_finish */ - } + RTL_PASS, /* type */ + "combine", /* name */ + OPTGROUP_NONE, /* optinfo_flags */ + true, /* has_gate */ + true, /* has_execute */ + TV_COMBINE, /* tv_id */ + PROP_cfglayout, /* properties_required */ + 0, /* properties_provided */ + 0, /* properties_destroyed */ + 0, /* todo_flags_start */ + ( TODO_df_finish | TODO_verify_rtl_sharing ), /* todo_flags_finish */ }; + +class pass_combine : public rtl_opt_pass +{ +public: + pass_combine(gcc::context *ctxt) + : rtl_opt_pass(pass_data_combine, ctxt) + {} + + /* opt_pass methods: */ + bool gate () { return gate_handle_combine (); } + unsigned int execute () { return rest_of_handle_combine (); } + +}; // class pass_combine + +} // anon namespace + +rtl_opt_pass * +make_pass_combine (gcc::context *ctxt) +{ + return new pass_combine (ctxt); +} diff --git a/gcc/common.opt b/gcc/common.opt index 7f8cfe8f399..27a22a0d5de 100644 --- a/gcc/common.opt +++ b/gcc/common.opt @@ -207,6 +207,10 @@ unsigned int help_columns Variable bool flag_opts_finished +; What the sanitizer should instrument +Variable +unsigned int flag_sanitize + ### Driver @@ -850,13 +854,9 @@ fargument-noalias-anything Common Ignore Does nothing. Preserved for backward compatibility. -fsanitize=address -Common Report Var(flag_asan) -Enable AddressSanitizer, a memory error detector - -fsanitize=thread -Common Report Var(flag_tsan) -Enable ThreadSanitizer, a data race detector +fsanitize= +Common Driver Report Joined +Select what to sanitize fasynchronous-unwind-tables Common Report Var(flag_asynchronous_unwind_tables) Optimization @@ -1155,6 +1155,10 @@ fdelete-null-pointer-checks Common Report Var(flag_delete_null_pointer_checks) Init(1) Optimization Delete useless null pointer checks +fdevirtualize-speculatively +Common Report Var(flag_devirtualize_speculatively) Optimization +Perform speculative devirtualization + fdevirtualize Common Report Var(flag_devirtualize) Optimization Try to convert virtual calls to direct ones. @@ -1518,7 +1522,7 @@ Common RejectNegative Joined fipa-cp Common Report Var(flag_ipa_cp) Optimization -Perform Interprocedural constant propagation +Perform interprocedural constant propagation fipa-cp-clone Common Report Var(flag_ipa_cp_clone) Optimization @@ -2463,6 +2467,30 @@ Enum(symbol_visibility) String(hidden) Value(VISIBILITY_HIDDEN) EnumValue Enum(symbol_visibility) String(protected) Value(VISIBILITY_PROTECTED) +fvtable-verify= +Common Joined RejectNegative Enum(vtv_priority) Var(flag_vtable_verify) Init(VTV_NO_PRIORITY) +Validate vtable pointers before using them. + +Enum +Name(vtv_priority) Type(enum vtv_priority) UnknownError(unknown vtable verify initialization priority %qs) + +EnumValue +Enum(vtv_priority) String(none) Value(VTV_NO_PRIORITY) + +EnumValue +Enum(vtv_priority) String(std) Value(VTV_STANDARD_PRIORITY) + +EnumValue +Enum(vtv_priority) String(preinit) Value(VTV_PREINIT_PRIORITY) + +fvtv-counts +Common Var(flag_vtv_counts) +Output vtable verification counters. + +fvtv-debug +Common Var(flag_vtv_debug) +Output vtable verification pointer sets information. + fvpt Common Report Var(flag_value_profile_transformations) Optimization Use expression value profiles in optimizations @@ -2732,6 +2760,9 @@ Driver static-libtsan Driver +static-libubsan +Driver + symbolic Driver diff --git a/gcc/common/config/i386/i386-common.c b/gcc/common/config/i386/i386-common.c index b73e369bb32..8ca74b9be49 100644 --- a/gcc/common/config/i386/i386-common.c +++ b/gcc/common/config/i386/i386-common.c @@ -57,6 +57,14 @@ along with GCC; see the file COPYING3. If not see #define OPTION_MASK_ISA_XSAVE_SET OPTION_MASK_ISA_XSAVE #define OPTION_MASK_ISA_XSAVEOPT_SET \ (OPTION_MASK_ISA_XSAVEOPT | OPTION_MASK_ISA_XSAVE) +#define OPTION_MASK_ISA_AVX512F_SET \ + (OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_AVX2_SET) +#define OPTION_MASK_ISA_AVX512CD_SET \ + (OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512F_SET) +#define OPTION_MASK_ISA_AVX512PF_SET \ + (OPTION_MASK_ISA_AVX512PF | OPTION_MASK_ISA_AVX512F_SET) +#define OPTION_MASK_ISA_AVX512ER_SET \ + (OPTION_MASK_ISA_AVX512ER | OPTION_MASK_ISA_AVX512F_SET) #define OPTION_MASK_ISA_RTM_SET OPTION_MASK_ISA_RTM #define OPTION_MASK_ISA_PRFCHW_SET OPTION_MASK_ISA_PRFCHW #define OPTION_MASK_ISA_RDSEED_SET OPTION_MASK_ISA_RDSEED @@ -128,11 +136,18 @@ along with GCC; see the file COPYING3. If not see | OPTION_MASK_ISA_FMA4_UNSET | OPTION_MASK_ISA_F16C_UNSET \ | OPTION_MASK_ISA_AVX2_UNSET | OPTION_MASK_ISA_XSAVE_UNSET) #define OPTION_MASK_ISA_FMA_UNSET OPTION_MASK_ISA_FMA -#define OPTION_MASK_ISA_AVX2_UNSET OPTION_MASK_ISA_AVX2 #define OPTION_MASK_ISA_FXSR_UNSET OPTION_MASK_ISA_FXSR #define OPTION_MASK_ISA_XSAVE_UNSET \ (OPTION_MASK_ISA_XSAVE | OPTION_MASK_ISA_XSAVEOPT_UNSET) #define OPTION_MASK_ISA_XSAVEOPT_UNSET OPTION_MASK_ISA_XSAVEOPT +#define OPTION_MASK_ISA_AVX2_UNSET \ + (OPTION_MASK_ISA_AVX2 | OPTION_MASK_ISA_AVX512F_UNSET) +#define OPTION_MASK_ISA_AVX512F_UNSET \ + (OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_AVX512CD_UNSET \ + | OPTION_MASK_ISA_AVX512PF_UNSET | OPTION_MASK_ISA_AVX512ER_UNSET) +#define OPTION_MASK_ISA_AVX512CD_UNSET OPTION_MASK_ISA_AVX512CD +#define OPTION_MASK_ISA_AVX512PF_UNSET OPTION_MASK_ISA_AVX512PF +#define OPTION_MASK_ISA_AVX512ER_UNSET OPTION_MASK_ISA_AVX512ER #define OPTION_MASK_ISA_RTM_UNSET OPTION_MASK_ISA_RTM #define OPTION_MASK_ISA_PRFCHW_UNSET OPTION_MASK_ISA_PRFCHW #define OPTION_MASK_ISA_RDSEED_UNSET OPTION_MASK_ISA_RDSEED @@ -313,6 +328,58 @@ ix86_handle_option (struct gcc_options *opts, } return true; + case OPT_mavx512f: + if (value) + { + opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512F_SET; + opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_AVX512F_SET; + } + else + { + opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_AVX512F_UNSET; + opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_AVX512F_UNSET; + } + return true; + + case OPT_mavx512cd: + if (value) + { + opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512CD_SET; + opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_AVX512CD_SET; + } + else + { + opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_AVX512CD_UNSET; + opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_AVX512CD_UNSET; + } + return true; + + case OPT_mavx512pf: + if (value) + { + opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512PF_SET; + opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_AVX512PF_SET; + } + else + { + opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_AVX512PF_UNSET; + opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_AVX512PF_UNSET; + } + return true; + + case OPT_mavx512er: + if (value) + { + opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512ER_SET; + opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_AVX512ER_SET; + } + else + { + opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_AVX512ER_UNSET; + opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_AVX512ER_UNSET; + } + return true; + case OPT_mfma: if (value) { diff --git a/gcc/compare-elim.c b/gcc/compare-elim.c index 367cd8ea8dc..e907376c577 100644 --- a/gcc/compare-elim.c +++ b/gcc/compare-elim.c @@ -651,24 +651,41 @@ gate_compare_elim_after_reload (void) return flag_compare_elim_after_reload; } -struct rtl_opt_pass pass_compare_elim_after_reload = +namespace { + +const pass_data pass_data_compare_elim_after_reload = { - { - RTL_PASS, - "cmpelim", /* name */ - OPTGROUP_NONE, /* optinfo_flags */ - gate_compare_elim_after_reload, /* gate */ - execute_compare_elim_after_reload, /* execute */ - NULL, /* sub */ - NULL, /* next */ - 0, /* static_pass_number */ - TV_NONE, /* tv_id */ - 0, /* properties_required */ - 0, /* properties_provided */ - 0, /* properties_destroyed */ - 0, /* todo_flags_start */ - TODO_df_finish - | TODO_df_verify - | TODO_verify_rtl_sharing /* todo_flags_finish */ - } + RTL_PASS, /* type */ + "cmpelim", /* name */ + OPTGROUP_NONE, /* optinfo_flags */ + true, /* has_gate */ + true, /* has_execute */ + TV_NONE, /* tv_id */ + 0, /* properties_required */ + 0, /* properties_provided */ + 0, /* properties_destroyed */ + 0, /* todo_flags_start */ + ( TODO_df_finish | TODO_df_verify + | TODO_verify_rtl_sharing ), /* todo_flags_finish */ }; + +class pass_compare_elim_after_reload : public rtl_opt_pass +{ +public: + pass_compare_elim_after_reload(gcc::context *ctxt) + : rtl_opt_pass(pass_data_compare_elim_after_reload, ctxt) + {} + + /* opt_pass methods: */ + bool gate () { return gate_compare_elim_after_reload (); } + unsigned int execute () { return execute_compare_elim_after_reload (); } + +}; // class pass_compare_elim_after_reload + +} // anon namespace + +rtl_opt_pass * +make_pass_compare_elim_after_reload (gcc::context *ctxt) +{ + return new pass_compare_elim_after_reload (ctxt); +} diff --git a/gcc/config.gcc b/gcc/config.gcc index e09ba64d675..36d5ae82dd1 100644 --- a/gcc/config.gcc +++ b/gcc/config.gcc @@ -313,7 +313,7 @@ aarch64*-*-*) cpu_type=aarch64 need_64bit_hwint=yes extra_headers="arm_neon.h" - extra_objs="aarch64-builtins.o" + extra_objs="aarch64-builtins.o aarch-common.o" target_has_targetm_common=yes ;; alpha*-*-*) @@ -327,9 +327,11 @@ am33_2.0-*-linux*) arm*-*-*) cpu_type=arm extra_headers="mmintrin.h arm_neon.h" + extra_objs="aarch-common.o" target_type_format_char='%' c_target_objs="arm-c.o" cxx_target_objs="arm-c.o" + need_64bit_hwint=yes extra_options="${extra_options} arm/arm-tables.opt" ;; avr-*-*) @@ -499,6 +501,7 @@ fi case ${target} in aarch64*-*-*) + tm_p_file="${tm_p_file} arm/aarch-common-protos.h" case ${with_abi} in "") if test "x$with_multilib_list" = xilp32; then @@ -558,7 +561,11 @@ x86_64-*-*) fi tm_file="vxworks-dummy.h ${tm_file}" ;; -arm*-*-* | mips*-*-* | sh*-*-* | sparc*-*-*) +arm*-*-*) + tm_p_file="${tm_p_file} arm/aarch-common-protos.h" + tm_file="vxworks-dummy.h ${tm_file}" + ;; +mips*-*-* | sh*-*-* | sparc*-*-*) tm_file="vxworks-dummy.h ${tm_file}" ;; esac @@ -759,6 +766,7 @@ case ${target} in yes) thread_file='rtems' ;; esac extra_options="${extra_options} rtems.opt" + default_use_cxa_atexit=yes use_gcc_stdint=wrap ;; *-*-uclinux*) @@ -823,7 +831,7 @@ case ${target} in tmake_file=t-vxworks xm_defines=POSIX extra_options="${extra_options} vxworks.opt" - extra_objs=vxworks.o + extra_objs="$extra_objs vxworks.o" case ${enable_threads} in no) ;; "" | yes | vxworks) thread_file='vxworks' ;; @@ -943,10 +951,6 @@ arm*-*-linux-*) # ARM GNU/Linux with ELF tmake_file="$tmake_file arm/t-linux-androideabi" ;; esac - # The BPABI long long divmod functions return a 128-bit value in - # registers r0-r3. Correctly modeling that requires the use of - # TImode. - need_64bit_hwint=yes # The EABI requires the use of __cxa_atexit. default_use_cxa_atexit=yes with_tls=${with_tls:-gnu} @@ -955,10 +959,6 @@ arm*-*-uclinux*eabi*) # ARM ucLinux tm_file="dbxelf.h elfos.h arm/unknown-elf.h arm/elf.h arm/linux-gas.h arm/uclinux-elf.h glibc-stdint.h" tmake_file="arm/t-arm arm/t-arm-elf arm/t-bpabi" tm_file="$tm_file arm/bpabi.h arm/uclinux-eabi.h arm/aout.h vxworks-dummy.h arm/arm.h" - # The BPABI long long divmod functions return a 128-bit value in - # registers r0-r3. Correctly modeling that requires the use of - # TImode. - need_64bit_hwint=yes # The EABI requires the use of __cxa_atexit. default_use_cxa_atexit=yes ;; @@ -967,10 +967,6 @@ arm*-*-eabi* | arm*-*-symbianelf* | arm*-*-rtems*) arm*eb-*-eabi*) tm_defines="${tm_defines} TARGET_BIG_ENDIAN_DEFAULT=1" esac - # The BPABI long long divmod functions return a 128-bit value in - # registers r0-r3. Correctly modeling that requires the use of - # TImode. - need_64bit_hwint=yes default_use_cxa_atexit=yes tm_file="dbxelf.h elfos.h arm/unknown-elf.h arm/elf.h arm/bpabi.h" tmake_file="arm/t-arm arm/t-arm-elf" @@ -1005,7 +1001,7 @@ avr-*-*) tm_file="${tm_file} ${cpu_type}/avrlibc.h" tm_defines="${tm_defines} WITH_AVRLIBC" fi - tmake_file="avr/t-avr avr/t-multilib" + tmake_file="${tmake_file} avr/t-avr avr/t-multilib" use_gcc_stdint=wrap extra_gcc_objs="driver-avr.o avr-devices.o" extra_objs="avr-devices.o avr-log.o" @@ -1022,7 +1018,7 @@ bfin*-uclinux*) ;; bfin*-linux-uclibc*) tm_file="${tm_file} dbxelf.h elfos.h bfin/elf.h gnu-user.h linux.h glibc-stdint.h bfin/linux.h ./linux-sysroot-suffix.h" - tmake_file="bfin/t-bfin-linux t-slibgcc" + tmake_file="bfin/t-bfin-linux t-slibgcc t-linux-android" use_collect2=no ;; bfin*-rtems*) @@ -1057,7 +1053,7 @@ cris-*-elf | cris-*-none) crisv32-*-linux* | cris-*-linux*) tm_file="dbxelf.h elfos.h ${tm_file} gnu-user.h linux.h glibc-stdint.h cris/linux.h" # We need to avoid using t-linux, so override default tmake_file - tmake_file="cris/t-cris cris/t-linux t-slibgcc" + tmake_file="cris/t-cris cris/t-linux t-slibgcc t-linux-android" extra_options="${extra_options} cris/linux.opt" case $target in cris-*-*) @@ -1709,8 +1705,7 @@ m32r-*-rtems*) ;; m32r-*-linux*) tm_file="dbxelf.h elfos.h gnu-user.h linux.h glibc-stdint.h ${tm_file} m32r/linux.h" - # We override the tmake_file for linux -- why? - tmake_file="m32r/t-linux t-slibgcc" + tmake_file="${tmake_file} m32r/t-linux t-slibgcc" gnu_ld=yes if test x$enable_threads = xyes; then thread_file='posix' @@ -1718,8 +1713,7 @@ m32r-*-linux*) ;; m32rle-*-linux*) tm_file="dbxelf.h elfos.h gnu-user.h linux.h glibc-stdint.h m32r/little.h ${tm_file} m32r/linux.h" - # We override the tmake_file for linux -- why? - tmake_file="m32r/t-linux t-slibgcc" + tmake_file="${tmake_file} m32r/t-linux t-slibgcc" gnu_ld=yes if test x$enable_threads = xyes; then thread_file='posix' @@ -2145,7 +2139,7 @@ powerpc*-*-linux*) tmake_file="rs6000/t-fprules rs6000/t-ppcos ${tmake_file} rs6000/t-ppccomm" case ${target} in powerpc*le-*-*) - tm_file="${tm_file} rs6000/sysv4le.h" ;; + tm_file="${tm_file} rs6000/sysv4le.h" ;; esac maybe_biarch=yes case ${target} in @@ -2168,6 +2162,19 @@ powerpc*-*-linux*) fi tm_file="rs6000/biarch64.h ${tm_file} rs6000/linux64.h glibc-stdint.h" tmake_file="$tmake_file rs6000/t-linux64" + case ${target} in + powerpc*le-*-*) + tmake_file="$tmake_file rs6000/t-linux64le" + case ${enable_targets} in + all | *powerpc64-* | *powerpc-*) + tmake_file="$tmake_file rs6000/t-linux64lebe" ;; + esac ;; + *) + case ${enable_targets} in + all | *powerpc64le-* | *powerpcle-*) + tmake_file="$tmake_file rs6000/t-linux64bele" ;; + esac ;; + esac extra_options="${extra_options} rs6000/linux64.opt" ;; *) @@ -3040,11 +3047,18 @@ if test x$with_cpu = x ; then with_cpu=8540 fi ;; - sparc-leon*-*) - with_cpu=v8; - ;; sparc*-*-*) - with_cpu="`echo ${target} | sed 's/-.*$//'`" + case ${target} in + *-leon-*) + with_cpu=leon + ;; + *-leon[3-9]*) + with_cpu=leon3 + ;; + *) + with_cpu="`echo ${target} | sed 's/-.*$//'`" + ;; + esac ;; esac @@ -3532,7 +3546,7 @@ case "${target}" in ;; mips*-*-*) - supported_defaults="abi arch arch_32 arch_64 float fpu tune tune_32 tune_64 divide llsc mips-plt synci" + supported_defaults="abi arch arch_32 arch_64 float fpu nan tune tune_32 tune_64 divide llsc mips-plt synci" case ${with_float} in "" | soft | hard) @@ -3554,6 +3568,16 @@ case "${target}" in ;; esac + case ${with_nan} in + "" | 2008 | legacy) + # OK + ;; + *) + echo "Unknown NaN encoding used in --with-nan=$with_nan" 1>&2 + exit 1 + ;; + esac + case ${with_abi} in "" | 32 | o64 | n32 | 64 | eabi) # OK @@ -3913,7 +3937,7 @@ case ${target} in esac t= -all_defaults="abi cpu cpu_32 cpu_64 arch arch_32 arch_64 tune tune_32 tune_64 schedule float mode fpu divide llsc mips-plt synci tls" +all_defaults="abi cpu cpu_32 cpu_64 arch arch_32 arch_64 tune tune_32 tune_64 schedule float mode fpu nan divide llsc mips-plt synci tls" for option in $all_defaults do eval "val=\$with_"`echo $option | sed s/-/_/g` diff --git a/gcc/config.in b/gcc/config.in index 288c11f9377..44f9a320c27 100644 --- a/gcc/config.in +++ b/gcc/config.in @@ -387,6 +387,12 @@ #endif +/* Define if your assembler supports LEON instructions. */ +#ifndef USED_FOR_TARGET +#undef HAVE_AS_LEON +#endif + + /* Define if the assembler won't complain about a line such as # 0 "" 2. */ #ifndef USED_FOR_TARGET #undef HAVE_AS_LINE_ZERO @@ -417,6 +423,12 @@ #endif +/* Define if the assembler understands -mnan=. */ +#ifndef USED_FOR_TARGET +#undef HAVE_AS_NAN +#endif + + /* Define if your assembler supports the -no-mul-bug-abort option. */ #ifndef USED_FOR_TARGET #undef HAVE_AS_NO_MUL_BUG_ABORT_OPTION diff --git a/gcc/config/aarch64/aarch64-option-extensions.def b/gcc/config/aarch64/aarch64-option-extensions.def index 58e815471a6..371e74c7f94 100644 --- a/gcc/config/aarch64/aarch64-option-extensions.def +++ b/gcc/config/aarch64/aarch64-option-extensions.def @@ -35,3 +35,4 @@ AARCH64_OPT_EXTENSION("fp", AARCH64_FL_FP, AARCH64_FL_FPSIMD | AARCH64_FL_CRYPTO) AARCH64_OPT_EXTENSION("simd", AARCH64_FL_FPSIMD, AARCH64_FL_SIMD | AARCH64_FL_CRYPTO) AARCH64_OPT_EXTENSION("crypto", AARCH64_FL_CRYPTO | AARCH64_FL_FPSIMD, AARCH64_FL_CRYPTO) +AARCH64_OPT_EXTENSION("crc", AARCH64_FL_CRC, AARCH64_FL_CRC) diff --git a/gcc/config/aarch64/aarch64-simd-builtins.def b/gcc/config/aarch64/aarch64-simd-builtins.def index 55dead6e404..4046d7a7001 100644 --- a/gcc/config/aarch64/aarch64-simd-builtins.def +++ b/gcc/config/aarch64/aarch64-simd-builtins.def @@ -40,10 +40,6 @@ 10 - CODE_FOR_. */ BUILTIN_VD_RE (CREATE, create, 0) - BUILTIN_VQ_S (GETLANE, get_lane_signed, 0) - BUILTIN_VDQ (GETLANE, get_lane_unsigned, 0) - BUILTIN_VDQF (GETLANE, get_lane, 0) - VAR1 (GETLANE, get_lane, 0, di) BUILTIN_VDC (COMBINE, combine, 0) BUILTIN_VB (BINOP, pmul, 0) BUILTIN_VDQF (UNOP, sqrt, 2) @@ -51,6 +47,9 @@ VAR1 (UNOP, addp, 0, di) VAR1 (UNOP, clz, 2, v4si) + BUILTIN_VALL (GETLANE, get_lane, 0) + VAR1 (GETLANE, get_lane, 0, di) + BUILTIN_VD_RE (REINTERP, reinterpretdi, 0) BUILTIN_VDC (REINTERP, reinterpretv8qi, 0) BUILTIN_VDC (REINTERP, reinterpretv4hi, 0) @@ -64,7 +63,6 @@ BUILTIN_VQ (REINTERP, reinterpretv2df, 0) BUILTIN_VDQ_I (BINOP, dup_lane, 0) - BUILTIN_VDQ_I (BINOP, dup_lane_scalar, 0) /* Implemented by aarch64_qshl. */ BUILTIN_VSDQ_I (BINOP, sqshl, 0) BUILTIN_VSDQ_I (BINOP, uqshl, 0) diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md index 178efdc964e..9805197a22b 100644 --- a/gcc/config/aarch64/aarch64-simd.md +++ b/gcc/config/aarch64/aarch64-simd.md @@ -336,46 +336,47 @@ }) (define_insn "aarch64_simd_dup" - [(set (match_operand:VDQ 0 "register_operand" "=w") - (vec_duplicate:VDQ (match_operand: 1 "register_operand" "r")))] + [(set (match_operand:VDQ 0 "register_operand" "=w, w") + (vec_duplicate:VDQ (match_operand: 1 "register_operand" "r, w")))] "TARGET_SIMD" - "dup\\t%0., %1" - [(set_attr "simd_type" "simd_dupgp") + "@ + dup\\t%0., %1 + dup\\t%0., %1.[0]" + [(set_attr "simd_type" "simd_dupgp, simd_dup") (set_attr "simd_mode" "")] ) -(define_insn "aarch64_dup_lane" - [(set (match_operand:VDQ_I 0 "register_operand" "=w") - (vec_duplicate:VDQ_I - (vec_select: - (match_operand: 1 "register_operand" "w") - (parallel [(match_operand:SI 2 "immediate_operand" "i")]) - )))] +(define_insn "aarch64_simd_dup" + [(set (match_operand:VDQF 0 "register_operand" "=w") + (vec_duplicate:VDQF (match_operand: 1 "register_operand" "w")))] "TARGET_SIMD" - "dup\\t%0, %1.[%2]" + "dup\\t%0., %1.[0]" [(set_attr "simd_type" "simd_dup") (set_attr "simd_mode" "")] ) -(define_insn "aarch64_dup_lane_scalar" - [(set (match_operand: 0 "register_operand" "=w, r") - (vec_select: - (match_operand:VDQ 1 "register_operand" "w, w") - (parallel [(match_operand:SI 2 "immediate_operand" "i, i")]) - ))] +(define_insn "aarch64_dup_lane" + [(set (match_operand:VALL 0 "register_operand" "=w") + (vec_duplicate:VALL + (vec_select: + (match_operand:VALL 1 "register_operand" "w") + (parallel [(match_operand:SI 2 "immediate_operand" "i")]) + )))] "TARGET_SIMD" - "@ - dup\\t%0, %1.[%2] - umov\\t%0, %1.[%2]" - [(set_attr "simd_type" "simd_dup, simd_movgp") + "dup\\t%0., %1.[%2]" + [(set_attr "simd_type" "simd_dup") (set_attr "simd_mode" "")] ) -(define_insn "aarch64_simd_dup" - [(set (match_operand:VDQF 0 "register_operand" "=w") - (vec_duplicate:VDQF (match_operand: 1 "register_operand" "w")))] +(define_insn "aarch64_dup_lane_" + [(set (match_operand:VALL 0 "register_operand" "=w") + (vec_duplicate:VALL + (vec_select: + (match_operand: 1 "register_operand" "w") + (parallel [(match_operand:SI 2 "immediate_operand" "i")]) + )))] "TARGET_SIMD" - "dup\\t%0., %1.[0]" + "dup\\t%0., %1.[%2]" [(set_attr "simd_type" "simd_dup") (set_attr "simd_mode" "")] ) @@ -1051,6 +1052,7 @@ fmov\\t%d0, %1 dup\\t%d0, %1" [(set_attr "v8type" "*,fmov,*") + (set_attr "type" "*,fmov,*") (set_attr "simd_type" "simd_dup,*,simd_dup") (set_attr "simd_mode" "") (set_attr "simd" "yes,*,yes") @@ -2146,45 +2148,50 @@ DONE; }) -(define_insn "aarch64_get_lane_signed" - [(set (match_operand: 0 "register_operand" "=r") - (sign_extend: +;; Lane extraction with sign extension to general purpose register. +(define_insn "*aarch64_get_lane_extend" + [(set (match_operand:GPI 0 "register_operand" "=r") + (sign_extend:GPI (vec_select: - (match_operand:VQ_S 1 "register_operand" "w") + (match_operand:VDQQH 1 "register_operand" "w") (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))] "TARGET_SIMD" - "smov\\t%0, %1.[%2]" + "smov\\t%0, %1.[%2]" [(set_attr "simd_type" "simd_movgp") - (set_attr "simd_mode" "")] + (set_attr "simd_mode" "")] ) -(define_insn "aarch64_get_lane_unsigned" - [(set (match_operand: 0 "register_operand" "=r") - (zero_extend: +(define_insn "*aarch64_get_lane_zero_extendsi" + [(set (match_operand:SI 0 "register_operand" "=r") + (zero_extend:SI (vec_select: - (match_operand:VDQ 1 "register_operand" "w") + (match_operand:VDQQH 1 "register_operand" "w") (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))] "TARGET_SIMD" - "umov\\t%0, %1.[%2]" + "umov\\t%w0, %1.[%2]" [(set_attr "simd_type" "simd_movgp") (set_attr "simd_mode" "")] ) +;; Lane extraction of a value, neither sign nor zero extension +;; is guaranteed so upper bits should be considered undefined. (define_insn "aarch64_get_lane" - [(set (match_operand: 0 "register_operand" "=w") + [(set (match_operand: 0 "register_operand" "=r, w") (vec_select: - (match_operand:VDQF 1 "register_operand" "w") - (parallel [(match_operand:SI 2 "immediate_operand" "i")])))] + (match_operand:VALL 1 "register_operand" "w, w") + (parallel [(match_operand:SI 2 "immediate_operand" "i, i")])))] "TARGET_SIMD" - "mov\\t%0.[0], %1.[%2]" - [(set_attr "simd_type" "simd_ins") + "@ + umov\\t%0, %1.[%2] + dup\\t%0, %1.[%2]" + [(set_attr "simd_type" "simd_movgp, simd_dup") (set_attr "simd_mode" "")] ) (define_expand "aarch64_get_lanedi" - [(match_operand:DI 0 "register_operand" "=r") - (match_operand:DI 1 "register_operand" "w") - (match_operand:SI 2 "immediate_operand" "i")] + [(match_operand:DI 0 "register_operand") + (match_operand:DI 1 "register_operand") + (match_operand:SI 2 "immediate_operand")] "TARGET_SIMD" { aarch64_simd_lane_bounds (operands[2], 0, 1); @@ -2790,7 +2797,7 @@ (match_operand:VD_HSI 2 "register_operand" "w")) (sign_extend: (vec_duplicate:VD_HSI - (match_operand: 3 "register_operand" "w")))) + (match_operand: 3 "register_operand" "")))) (const_int 1))))] "TARGET_SIMD" "sqdmll\\t%0, %2, %3.[0]" @@ -2948,7 +2955,7 @@ (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" ""))) (sign_extend: (vec_duplicate: - (match_operand: 3 "register_operand" "w")))) + (match_operand: 3 "register_operand" "")))) (const_int 1))))] "TARGET_SIMD" "sqdmll2\\t%0, %2, %3.[0]" @@ -3076,7 +3083,7 @@ (match_operand:VD_HSI 1 "register_operand" "w")) (sign_extend: (vec_duplicate:VD_HSI - (match_operand: 2 "register_operand" "w"))) + (match_operand: 2 "register_operand" ""))) ) (const_int 1)))] "TARGET_SIMD" @@ -3186,7 +3193,7 @@ (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" ""))) (sign_extend: (vec_duplicate: - (match_operand: 2 "register_operand" "w"))) + (match_operand: 2 "register_operand" ""))) ) (const_int 1)))] "TARGET_SIMD" @@ -4172,13 +4179,23 @@ (set_attr "simd_mode" "")] ) +(define_insn "aarch64_frecp" + [(set (match_operand:GPF 0 "register_operand" "=w") + (unspec:GPF [(match_operand:GPF 1 "register_operand" "w")] + FRECP))] + "TARGET_SIMD" + "frecp\\t%0, %1" + [(set_attr "simd_type" "simd_frecp") + (set_attr "mode" "")] +) + (define_insn "aarch64_frecps" - [(set (match_operand:VDQF 0 "register_operand" "=w") - (unspec:VDQF [(match_operand:VDQF 1 "register_operand" "w") - (match_operand:VDQF 2 "register_operand" "w")] + [(set (match_operand:VALLF 0 "register_operand" "=w") + (unspec:VALLF [(match_operand:VALLF 1 "register_operand" "w") + (match_operand:VALLF 2 "register_operand" "w")] UNSPEC_FRECPS))] "TARGET_SIMD" - "frecps\\t%0., %1., %2." + "frecps\\t%0, %1, %2" [(set_attr "simd_type" "simd_frecps") (set_attr "simd_mode" "")] ) diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c index aed035a434e..7635e1e2679 100644 --- a/gcc/config/aarch64/aarch64.c +++ b/gcc/config/aarch64/aarch64.c @@ -7931,6 +7931,55 @@ aarch64_evpc_zip (struct expand_vec_perm_d *d) return true; } +static bool +aarch64_evpc_dup (struct expand_vec_perm_d *d) +{ + rtx (*gen) (rtx, rtx, rtx); + rtx out = d->target; + rtx in0; + enum machine_mode vmode = d->vmode; + unsigned int i, elt, nelt = d->nelt; + rtx lane; + + /* TODO: This may not be big-endian safe. */ + if (BYTES_BIG_ENDIAN) + return false; + + elt = d->perm[0]; + for (i = 1; i < nelt; i++) + { + if (elt != d->perm[i]) + return false; + } + + /* The generic preparation in aarch64_expand_vec_perm_const_1 + swaps the operand order and the permute indices if it finds + d->perm[0] to be in the second operand. Thus, we can always + use d->op0 and need not do any extra arithmetic to get the + correct lane number. */ + in0 = d->op0; + lane = GEN_INT (elt); + + switch (vmode) + { + case V16QImode: gen = gen_aarch64_dup_lanev16qi; break; + case V8QImode: gen = gen_aarch64_dup_lanev8qi; break; + case V8HImode: gen = gen_aarch64_dup_lanev8hi; break; + case V4HImode: gen = gen_aarch64_dup_lanev4hi; break; + case V4SImode: gen = gen_aarch64_dup_lanev4si; break; + case V2SImode: gen = gen_aarch64_dup_lanev2si; break; + case V2DImode: gen = gen_aarch64_dup_lanev2di; break; + case V4SFmode: gen = gen_aarch64_dup_lanev4sf; break; + case V2SFmode: gen = gen_aarch64_dup_lanev2sf; break; + case V2DFmode: gen = gen_aarch64_dup_lanev2df; break; + default: + return false; + } + + emit_insn (gen (out, in0, lane)); + return true; +} + static bool aarch64_evpc_tbl (struct expand_vec_perm_d *d) { @@ -7988,6 +8037,8 @@ aarch64_expand_vec_perm_const_1 (struct expand_vec_perm_d *d) return true; else if (aarch64_evpc_trn (d)) return true; + else if (aarch64_evpc_dup (d)) + return true; return aarch64_evpc_tbl (d); } return false; diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h index 092426973c6..d8012f88049 100644 --- a/gcc/config/aarch64/aarch64.h +++ b/gcc/config/aarch64/aarch64.h @@ -158,6 +158,7 @@ #define AARCH64_FL_FP (1 << 1) /* Has FP. */ #define AARCH64_FL_CRYPTO (1 << 2) /* Has crypto. */ #define AARCH64_FL_SLOWMUL (1 << 3) /* A slow multiply core. */ +#define AARCH64_FL_CRC (1 << 4) /* Has CRC. */ /* Has FP and SIMD. */ #define AARCH64_FL_FPSIMD (AARCH64_FL_FP | AARCH64_FL_SIMD) @@ -170,6 +171,7 @@ /* Macros to test ISA flags. */ extern unsigned long aarch64_isa_flags; +#define AARCH64_ISA_CRC (aarch64_isa_flags & AARCH64_FL_CRC) #define AARCH64_ISA_CRYPTO (aarch64_isa_flags & AARCH64_FL_CRYPTO) #define AARCH64_ISA_FP (aarch64_isa_flags & AARCH64_FL_FP) #define AARCH64_ISA_SIMD (aarch64_isa_flags & AARCH64_FL_SIMD) diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md index 5d64228351b..f37f98f9994 100644 --- a/gcc/config/aarch64/aarch64.md +++ b/gcc/config/aarch64/aarch64.md @@ -88,11 +88,16 @@ UNSPEC_NOP UNSPEC_PRLG_STK UNSPEC_RBIT + UNSPEC_SISD_NEG + UNSPEC_SISD_SSHL + UNSPEC_SISD_USHL + UNSPEC_SSHL_2S UNSPEC_ST2 UNSPEC_ST3 UNSPEC_ST4 UNSPEC_TLS UNSPEC_TLSDESC + UNSPEC_USHL_2S UNSPEC_VSTRUCTDUMMY ]) @@ -235,9 +240,6 @@ fmovf2i,\ fmovi2f,\ fmul,\ - frecpe,\ - frecps,\ - frecpx,\ frint,\ fsqrt,\ load_acq,\ @@ -272,48 +274,9 @@ udiv" (const_string "alu")) - -; The "type" attribute is used by the AArch32 backend. Below is a mapping -; from "v8type" to "type". - -(define_attr "type" - "alu,alu_shift,block,branch,call,f_2_r,f_cvt,f_flag,f_loads, - f_loadd,f_stored,f_stores,faddd,fadds,fcmpd,fcmps,fconstd,fconsts, - fcpys,fdivd,fdivs,ffarithd,ffariths,fmacd,fmacs,fmuld,fmuls,load_byte, - load1,load2,mult,r_2_f,store1,store2" - (cond [ - (eq_attr "v8type" "alu_shift,alus_shift,logic_shift,logics_shift") (const_string "alu_shift") - (eq_attr "v8type" "branch") (const_string "branch") - (eq_attr "v8type" "call") (const_string "call") - (eq_attr "v8type" "fmovf2i") (const_string "f_2_r") - (eq_attr "v8type" "fcvt,fcvtf2i,fcvti2f") (const_string "f_cvt") - (and (eq_attr "v8type" "fpsimd_load") (eq_attr "mode" "SF")) (const_string "f_loads") - (and (eq_attr "v8type" "fpsimd_load") (eq_attr "mode" "DF")) (const_string "f_loadd") - (and (eq_attr "v8type" "fpsimd_store") (eq_attr "mode" "SF")) (const_string "f_stores") - (and (eq_attr "v8type" "fpsimd_store") (eq_attr "mode" "DF")) (const_string "f_stored") - (and (eq_attr "v8type" "fadd,fminmax") (eq_attr "mode" "DF")) (const_string "faddd") - (and (eq_attr "v8type" "fadd,fminmax") (eq_attr "mode" "SF")) (const_string "fadds") - (and (eq_attr "v8type" "fcmp,fccmp") (eq_attr "mode" "DF")) (const_string "fcmpd") - (and (eq_attr "v8type" "fcmp,fccmp") (eq_attr "mode" "SF")) (const_string "fcmps") - (and (eq_attr "v8type" "fconst") (eq_attr "mode" "DF")) (const_string "fconstd") - (and (eq_attr "v8type" "fconst") (eq_attr "mode" "SF")) (const_string "fconsts") - (and (eq_attr "v8type" "fdiv,fsqrt") (eq_attr "mode" "DF")) (const_string "fdivd") - (and (eq_attr "v8type" "fdiv,fsqrt") (eq_attr "mode" "SF")) (const_string "fdivs") - (and (eq_attr "v8type" "ffarith") (eq_attr "mode" "DF")) (const_string "ffarithd") - (and (eq_attr "v8type" "ffarith") (eq_attr "mode" "SF")) (const_string "ffariths") - (and (eq_attr "v8type" "fmadd") (eq_attr "mode" "DF")) (const_string "fmacd") - (and (eq_attr "v8type" "fmadd") (eq_attr "mode" "SF")) (const_string "fmacs") - (and (eq_attr "v8type" "fmul") (eq_attr "mode" "DF")) (const_string "fmuld") - (and (eq_attr "v8type" "fmul") (eq_attr "mode" "SF")) (const_string "fmuls") - (and (eq_attr "v8type" "load1") (eq_attr "mode" "QI,HI")) (const_string "load_byte") - (and (eq_attr "v8type" "load1") (eq_attr "mode" "SI,DI,TI")) (const_string "load1") - (eq_attr "v8type" "load2") (const_string "load2") - (and (eq_attr "v8type" "mulh,mult,mull,madd,sdiv,udiv") (eq_attr "mode" "SI")) (const_string "mult") - (eq_attr "v8type" "fmovi2f") (const_string "r_2_f") - (eq_attr "v8type" "store1") (const_string "store1") - (eq_attr "v8type" "store2") (const_string "store2") - ] - (const_string "alu"))) +; The "type" attribute is is included here from AArch32 backend to be able +; to share pipeline descriptions. +(include "../arm/types.md") ;; Attribute that specifies whether or not the instruction touches fp ;; registers. @@ -349,6 +312,7 @@ (include "aarch64-generic.md") (include "large.md") (include "small.md") +(include "../arm/cortex-a53.md") ;; ------------------------------------------------------------------- ;; Jumps and other miscellaneous insns @@ -358,14 +322,16 @@ [(set (pc) (match_operand:DI 0 "register_operand" "r"))] "" "br\\t%0" - [(set_attr "v8type" "branch")] + [(set_attr "v8type" "branch") + (set_attr "type" "branch")] ) (define_insn "jump" [(set (pc) (label_ref (match_operand 0 "" "")))] "" "b\\t%l0" - [(set_attr "v8type" "branch")] + [(set_attr "v8type" "branch") + (set_attr "type" "branch")] ) (define_expand "cbranch4" @@ -403,7 +369,8 @@ (pc)))] "" "b%m0\\t%l2" - [(set_attr "v8type" "branch")] + [(set_attr "v8type" "branch") + (set_attr "type" "branch")] ) (define_expand "casesi" @@ -467,7 +434,8 @@ return aarch64_output_casesi (operands); " [(set_attr "length" "16") - (set_attr "v8type" "branch")] + (set_attr "v8type" "branch") + (set_attr "type" "branch")] ) (define_insn "nop" @@ -508,7 +476,8 @@ [(return)] "" "ret" - [(set_attr "v8type" "branch")] + [(set_attr "v8type" "branch") + (set_attr "type" "branch")] ) (define_insn "eh_return" @@ -516,7 +485,9 @@ UNSPECV_EH_RETURN)] "" "#" - [(set_attr "v8type" "branch")] + [(set_attr "v8type" "branch") + (set_attr "type" "branch")] + ) (define_split @@ -536,7 +507,9 @@ (pc)))] "" "\\t%0, %l1" - [(set_attr "v8type" "branch")] + [(set_attr "v8type" "branch") + (set_attr "type" "branch")] + ) (define_insn "*tb1" @@ -555,6 +528,7 @@ return \"\\t%0, %1, %l2\"; " [(set_attr "v8type" "branch") + (set_attr "type" "branch") (set_attr "mode" "") (set (attr "length") (if_then_else (and (ge (minus (match_dup 2) (pc)) (const_int -32768)) @@ -576,6 +550,7 @@ return \"\\t%0, , %l1\"; " [(set_attr "v8type" "branch") + (set_attr "type" "branch") (set_attr "mode" "") (set (attr "length") (if_then_else (and (ge (minus (match_dup 1) (pc)) (const_int -32768)) @@ -620,7 +595,8 @@ (clobber (reg:DI LR_REGNUM))] "" "blr\\t%0" - [(set_attr "v8type" "call")] + [(set_attr "v8type" "call") + (set_attr "type" "call")] ) (define_insn "*call_symbol" @@ -631,7 +607,8 @@ "GET_CODE (operands[0]) == SYMBOL_REF && !aarch64_is_long_call_p (operands[0])" "bl\\t%a0" - [(set_attr "v8type" "call")] + [(set_attr "v8type" "call") + (set_attr "type" "call")] ) (define_expand "call_value" @@ -668,7 +645,9 @@ (clobber (reg:DI LR_REGNUM))] "" "blr\\t%1" - [(set_attr "v8type" "call")] + [(set_attr "v8type" "call") + (set_attr "type" "call")] + ) (define_insn "*call_value_symbol" @@ -680,7 +659,8 @@ "GET_CODE (operands[1]) == SYMBOL_REF && !aarch64_is_long_call_p (operands[1])" "bl\\t%a1" - [(set_attr "v8type" "call")] + [(set_attr "v8type" "call") + (set_attr "type" "call")] ) (define_expand "sibcall" @@ -715,7 +695,9 @@ (use (match_operand 2 "" ""))] "GET_CODE (operands[0]) == SYMBOL_REF" "b\\t%a0" - [(set_attr "v8type" "branch")] + [(set_attr "v8type" "branch") + (set_attr "type" "branch")] + ) (define_insn "*sibcall_value_insn" @@ -726,7 +708,8 @@ (use (match_operand 3 "" ""))] "GET_CODE (operands[1]) == SYMBOL_REF" "b\\t%a1" - [(set_attr "v8type" "branch")] + [(set_attr "v8type" "branch") + (set_attr "type" "branch")] ) ;; Call subroutine returning any type. @@ -804,6 +787,7 @@ } } [(set_attr "v8type" "move,alu,alu,load1,load1,store1,store1,*,*,*") + (set_attr "type" "mov_reg,mov_imm,mov_imm,load1,load1,store1,store1,*,*,*") (set_attr "simd_type" "*,*,simd_move_imm,*,*,*,*,simd_movgp,simd_dupgp,simd_dup") (set_attr "simd" "*,*,yes,*,*,*,*,yes,yes,yes") (set_attr "mode" "") @@ -846,6 +830,8 @@ fmov\\t%w0, %s1 fmov\\t%s0, %s1" [(set_attr "v8type" "move,move,move,alu,load1,load1,store1,store1,adr,adr,fmov,fmov,fmov") + (set_attr "type" "mov_reg,mov_reg,mov_reg,mov_imm,load1,load1,store1,store1,\ + adr,adr,fmov,fmov,fmov") (set_attr "mode" "SI") (set_attr "fp" "*,*,*,*,*,yes,*,yes,*,*,yes,yes,yes")] ) @@ -871,6 +857,8 @@ fmov\\t%d0, %d1 movi\\t%d0, %1" [(set_attr "v8type" "move,move,move,alu,load1,load1,store1,store1,adr,adr,fmov,fmov,fmov,fmov") + (set_attr "type" "mov_reg,mov_reg,mov_reg,mov_imm,load1,load1,store1,store1,\ + adr,adr,fmov,fmov,fmov,fmov") (set_attr "mode" "DI") (set_attr "fp" "*,*,*,*,*,yes,*,yes,*,*,yes,yes,yes,*") (set_attr "simd" "*,*,*,*,*,*,*,*,*,*,*,*,*,yes")] @@ -885,6 +873,7 @@ && UINTVAL (operands[1]) % 16 == 0" "movk\\t%0, %X2, lsl %1" [(set_attr "v8type" "movk") + (set_attr "type" "mov_imm") (set_attr "mode" "")] ) @@ -917,6 +906,8 @@ str\\t%q1, %0" [(set_attr "v8type" "move2,fmovi2f,fmovf2i,*, \ load2,store2,store2,fpsimd_load,fpsimd_store") + (set_attr "type" "multiple,f_mcr,f_mrc,*, \ + load2,store2,store2,f_loadd,f_stored") (set_attr "simd_type" "*,*,*,simd_move,*,*,*,*,*") (set_attr "mode" "DI,DI,DI,TI,DI,DI,DI,TI,TI") (set_attr "length" "8,8,8,4,4,4,4,4,4") @@ -970,6 +961,8 @@ [(set_attr "v8type" "fmovi2f,fmovf2i,\ fmov,fconst,fpsimd_load,\ fpsimd_store,fpsimd_load,fpsimd_store,fmov") + (set_attr "type" "f_mcr,f_mrc,fmov,fconsts,\ + f_loads,f_stores,f_loads,f_stores,fmov") (set_attr "mode" "SF")] ) @@ -991,6 +984,8 @@ [(set_attr "v8type" "fmovi2f,fmovf2i,\ fmov,fconst,fpsimd_load,\ fpsimd_store,fpsimd_load,fpsimd_store,move") + (set_attr "type" "f_mcr,f_mrc,fmov,fconstd,\ + f_loadd,f_stored,f_loadd,f_stored,mov_reg") (set_attr "mode" "DF")] ) @@ -1029,6 +1024,8 @@ ldp\\t%0, %H0, %1 stp\\t%1, %H1, %0" [(set_attr "v8type" "logic,move2,fmovi2f,fmovf2i,fconst,fconst,fpsimd_load,fpsimd_store,fpsimd_load2,fpsimd_store2") + (set_attr "type" "logic_reg,multiple,f_mcr,f_mrc,fconstd,fconstd,\ + f_loadd,f_stored,neon_ldm_2,neon_stm_2") (set_attr "mode" "DF,DF,DF,DF,DF,DF,TF,TF,DF,DF") (set_attr "length" "4,8,8,8,4,4,4,4,4,4") (set_attr "fp" "*,*,yes,yes,*,yes,yes,yes,*,*") @@ -1059,6 +1056,7 @@ GET_MODE_SIZE (mode)))" "ldp\\t%0, %2, %1" [(set_attr "v8type" "load2") + (set_attr "type" "load2") (set_attr "mode" "")] ) @@ -1075,6 +1073,7 @@ GET_MODE_SIZE (mode)))" "stp\\t%1, %3, %0" [(set_attr "v8type" "store2") + (set_attr "type" "store2") (set_attr "mode" "")] ) @@ -1091,6 +1090,7 @@ GET_MODE_SIZE (mode)))" "ldp\\t%0, %2, %1" [(set_attr "v8type" "fpsimd_load2") + (set_attr "type" "neon_ldm_2") (set_attr "mode" "")] ) @@ -1106,7 +1106,8 @@ XEXP (operands[0], 0), GET_MODE_SIZE (mode)))" "stp\\t%1, %3, %0" - [(set_attr "v8type" "fpsimd_load2") + [(set_attr "v8type" "fpsimd_store2") + (set_attr "type" "neon_stm_2") (set_attr "mode" "")] ) @@ -1126,6 +1127,7 @@ "INTVAL (operands[5]) == INTVAL (operands[4]) + GET_MODE_SIZE (mode)" "ldp\\t%2, %3, [%1], %4" [(set_attr "v8type" "load2") + (set_attr "type" "load2") (set_attr "mode" "")] ) @@ -1145,6 +1147,7 @@ "INTVAL (operands[5]) == INTVAL (operands[4]) + GET_MODE_SIZE (mode)" "stp\\t%2, %3, [%0, %4]!" [(set_attr "v8type" "store2") + (set_attr "type" "store2") (set_attr "mode" "")] ) @@ -1166,6 +1169,7 @@ sxtw\t%0, %w1 ldrsw\t%0, %1" [(set_attr "v8type" "extend,load1") + (set_attr "type" "extend,load1") (set_attr "mode" "DI")] ) @@ -1177,6 +1181,7 @@ uxtw\t%0, %w1 ldr\t%w0, %1" [(set_attr "v8type" "extend,load1") + (set_attr "type" "extend,load1") (set_attr "mode" "DI")] ) @@ -1194,6 +1199,7 @@ sxt\t%0, %w1 ldrs\t%0, %1" [(set_attr "v8type" "extend,load1") + (set_attr "type" "extend,load1") (set_attr "mode" "")] ) @@ -1206,6 +1212,7 @@ ldr\t%w0, %1 ldr\t%0, %1" [(set_attr "v8type" "extend,load1,load1") + (set_attr "type" "extend,load1,load1") (set_attr "mode" "")] ) @@ -1223,6 +1230,7 @@ xtb\t%w0, %w1 b\t%w0, %1" [(set_attr "v8type" "extend,load1") + (set_attr "type" "extend,load1") (set_attr "mode" "HI")] ) @@ -1267,6 +1275,7 @@ add\\t%w0, %w1, %w2 sub\\t%w0, %w1, #%n2" [(set_attr "v8type" "alu") + (set_attr "type" "alu_imm,alu_reg,alu_imm") (set_attr "mode" "SI")] ) @@ -1283,6 +1292,7 @@ add\\t%w0, %w1, %w2 sub\\t%w0, %w1, #%n2" [(set_attr "v8type" "alu") + (set_attr "type" "alu_imm,alu_reg,alu_imm") (set_attr "mode" "SI")] ) @@ -1299,6 +1309,7 @@ sub\\t%x0, %x1, #%n2 add\\t%d0, %d1, %d2" [(set_attr "v8type" "alu") + (set_attr "type" "alu_imm,alu_reg,alu_imm,alu_reg") (set_attr "mode" "DI") (set_attr "simd" "*,*,*,yes")] ) @@ -1306,16 +1317,18 @@ (define_insn "*add3_compare0" [(set (reg:CC_NZ CC_REGNUM) (compare:CC_NZ - (plus:GPI (match_operand:GPI 1 "register_operand" "%r,r") - (match_operand:GPI 2 "aarch64_plus_operand" "rI,J")) + (plus:GPI (match_operand:GPI 1 "register_operand" "%r,r,r") + (match_operand:GPI 2 "aarch64_plus_operand" "r,I,J")) (const_int 0))) - (set (match_operand:GPI 0 "register_operand" "=r,r") + (set (match_operand:GPI 0 "register_operand" "=r,r,r") (plus:GPI (match_dup 1) (match_dup 2)))] "" "@ adds\\t%0, %1, %2 + adds\\t%0, %1, %2 subs\\t%0, %1, #%n2" [(set_attr "v8type" "alus") + (set_attr "type" "alus_reg,alus_imm,alus_imm") (set_attr "mode" "")] ) @@ -1323,16 +1336,18 @@ (define_insn "*addsi3_compare0_uxtw" [(set (reg:CC_NZ CC_REGNUM) (compare:CC_NZ - (plus:SI (match_operand:SI 1 "register_operand" "%r,r") - (match_operand:SI 2 "aarch64_plus_operand" "rI,J")) + (plus:SI (match_operand:SI 1 "register_operand" "%r,r,r") + (match_operand:SI 2 "aarch64_plus_operand" "r,I,J")) (const_int 0))) - (set (match_operand:DI 0 "register_operand" "=r,r") + (set (match_operand:DI 0 "register_operand" "=r,r,r") (zero_extend:DI (plus:SI (match_dup 1) (match_dup 2))))] "" "@ adds\\t%w0, %w1, %w2 + adds\\t%w0, %w1, %w2 subs\\t%w0, %w1, #%n2" [(set_attr "v8type" "alus") + (set_attr "type" "alus_reg,alus_imm,alus_imm") (set_attr "mode" "SI")] ) @@ -1350,6 +1365,7 @@ "" "adds\\t%0, %3, %1, lsl %p2" [(set_attr "v8type" "alus_shift") + (set_attr "type" "alus_shift_imm") (set_attr "mode" "")] ) @@ -1367,6 +1383,7 @@ "" "subs\\t%0, %1, %2, lsl %p3" [(set_attr "v8type" "alus_shift") + (set_attr "type" "alus_shift_imm") (set_attr "mode" "")] ) @@ -1382,6 +1399,7 @@ "" "adds\\t%0, %2, %1, xt" [(set_attr "v8type" "alus_ext") + (set_attr "type" "alus_ext") (set_attr "mode" "")] ) @@ -1397,6 +1415,7 @@ "" "subs\\t%0, %1, %2, xt" [(set_attr "v8type" "alus_ext") + (set_attr "type" "alus_ext") (set_attr "mode" "")] ) @@ -1418,6 +1437,7 @@ "aarch64_is_extend_from_extract (mode, operands[2], operands[3])" "adds\\t%0, %4, %1, xt%e3 %p2" [(set_attr "v8type" "alus_ext") + (set_attr "type" "alus_ext") (set_attr "mode" "")] ) @@ -1439,20 +1459,23 @@ "aarch64_is_extend_from_extract (mode, operands[2], operands[3])" "subs\\t%0, %4, %1, xt%e3 %p2" [(set_attr "v8type" "alus_ext") + (set_attr "type" "alus_ext") (set_attr "mode" "")] ) (define_insn "*add3nr_compare0" [(set (reg:CC_NZ CC_REGNUM) (compare:CC_NZ - (plus:GPI (match_operand:GPI 0 "register_operand" "%r,r") - (match_operand:GPI 1 "aarch64_plus_operand" "rI,J")) + (plus:GPI (match_operand:GPI 0 "register_operand" "%r,r,r") + (match_operand:GPI 1 "aarch64_plus_operand" "r,I,J")) (const_int 0)))] "" "@ cmn\\t%0, %1 + cmn\\t%0, %1 cmp\\t%0, #%n1" [(set_attr "v8type" "alus") + (set_attr "type" "alus_reg,alus_imm,alus_imm") (set_attr "mode" "")] ) @@ -1464,6 +1487,7 @@ "" "cmn\\t%0, %1" [(set_attr "v8type" "alus") + (set_attr "type" "alus_reg") (set_attr "mode" "")] ) @@ -1475,6 +1499,7 @@ "" "add\\t%0, %3, %1, %2" [(set_attr "v8type" "alu_shift") + (set_attr "type" "alu_shift_imm") (set_attr "mode" "")] ) @@ -1488,6 +1513,7 @@ "" "add\\t%w0, %w3, %w1, %2" [(set_attr "v8type" "alu_shift") + (set_attr "type" "alu_shift_imm") (set_attr "mode" "SI")] ) @@ -1499,6 +1525,7 @@ "" "add\\t%0, %3, %1, lsl %p2" [(set_attr "v8type" "alu_shift") + (set_attr "type" "alu_shift_imm") (set_attr "mode" "")] ) @@ -1509,6 +1536,7 @@ "" "add\\t%0, %2, %1, xt" [(set_attr "v8type" "alu_ext") + (set_attr "type" "alu_ext") (set_attr "mode" "")] ) @@ -1521,6 +1549,7 @@ "" "add\\t%w0, %w2, %w1, xt" [(set_attr "v8type" "alu_ext") + (set_attr "type" "alu_ext") (set_attr "mode" "SI")] ) @@ -1533,6 +1562,7 @@ "" "add\\t%0, %3, %1, xt %2" [(set_attr "v8type" "alu_ext") + (set_attr "type" "alu_ext") (set_attr "mode" "")] ) @@ -1547,6 +1577,7 @@ "" "add\\t%w0, %w3, %w1, xt %2" [(set_attr "v8type" "alu_ext") + (set_attr "type" "alu_ext") (set_attr "mode" "SI")] ) @@ -1559,6 +1590,7 @@ "" "add\\t%0, %3, %1, xt %p2" [(set_attr "v8type" "alu_ext") + (set_attr "type" "alu_ext") (set_attr "mode" "")] ) @@ -1572,6 +1604,7 @@ "" "add\\t%w0, %w3, %w1, xt %p2" [(set_attr "v8type" "alu_ext") + (set_attr "type" "alu_ext") (set_attr "mode" "SI")] ) @@ -1586,6 +1619,7 @@ "aarch64_is_extend_from_extract (mode, operands[2], operands[3])" "add\\t%0, %4, %1, xt%e3 %p2" [(set_attr "v8type" "alu_ext") + (set_attr "type" "alu_ext") (set_attr "mode" "")] ) @@ -1602,6 +1636,7 @@ "aarch64_is_extend_from_extract (SImode, operands[2], operands[3])" "add\\t%w0, %w4, %w1, xt%e3 %p2" [(set_attr "v8type" "alu_ext") + (set_attr "type" "alu_ext") (set_attr "mode" "SI")] ) @@ -1615,6 +1650,7 @@ "" "adc\\t%0, %1, %2" [(set_attr "v8type" "adc") + (set_attr "type" "adc_reg") (set_attr "mode" "")] ) @@ -1630,6 +1666,7 @@ "" "adc\\t%w0, %w1, %w2" [(set_attr "v8type" "adc") + (set_attr "type" "adc_reg") (set_attr "mode" "SI")] ) @@ -1643,6 +1680,7 @@ "" "adc\\t%0, %1, %2" [(set_attr "v8type" "adc") + (set_attr "type" "adc_reg") (set_attr "mode" "")] ) @@ -1658,6 +1696,7 @@ "" "adc\\t%w0, %w1, %w2" [(set_attr "v8type" "adc") + (set_attr "type" "adc_reg") (set_attr "mode" "SI")] ) @@ -1671,6 +1710,7 @@ "" "adc\\t%0, %1, %2" [(set_attr "v8type" "adc") + (set_attr "type" "adc_reg") (set_attr "mode" "")] ) @@ -1686,6 +1726,7 @@ "" "adc\\t%w0, %w1, %w2" [(set_attr "v8type" "adc") + (set_attr "type" "adc_reg") (set_attr "mode" "SI")] ) @@ -1699,6 +1740,7 @@ "" "adc\\t%0, %1, %2" [(set_attr "v8type" "adc") + (set_attr "type" "adc_reg") (set_attr "mode" "")] ) @@ -1714,6 +1756,7 @@ "" "adc\\t%w0, %w1, %w2" [(set_attr "v8type" "adc") + (set_attr "type" "adc_reg") (set_attr "mode" "SI")] ) @@ -1730,6 +1773,7 @@ INTVAL (operands[3]))); return \"add\t%0, %4, %1, uxt%e3 %p2\";" [(set_attr "v8type" "alu_ext") + (set_attr "type" "alu_ext") (set_attr "mode" "")] ) @@ -1748,6 +1792,7 @@ INTVAL (operands[3]))); return \"add\t%w0, %w4, %w1, uxt%e3 %p2\";" [(set_attr "v8type" "alu_ext") + (set_attr "type" "alu_ext") (set_attr "mode" "SI")] ) @@ -1758,6 +1803,7 @@ "" "sub\\t%w0, %w1, %w2" [(set_attr "v8type" "alu") + (set_attr "type" "alu_reg") (set_attr "mode" "SI")] ) @@ -1770,6 +1816,7 @@ "" "sub\\t%w0, %w1, %w2" [(set_attr "v8type" "alu") + (set_attr "type" "alu_reg") (set_attr "mode" "SI")] ) @@ -1782,6 +1829,7 @@ sub\\t%x0, %x1, %x2 sub\\t%d0, %d1, %d2" [(set_attr "v8type" "alu") + (set_attr "type" "alu_reg") (set_attr "mode" "DI") (set_attr "simd" "*,yes")] ) @@ -1797,6 +1845,7 @@ "" "subs\\t%0, %1, %2" [(set_attr "v8type" "alus") + (set_attr "type" "alus_reg") (set_attr "mode" "")] ) @@ -1811,6 +1860,7 @@ "" "subs\\t%w0, %w1, %w2" [(set_attr "v8type" "alus") + (set_attr "type" "alus_reg") (set_attr "mode" "SI")] ) @@ -1823,6 +1873,7 @@ "" "sub\\t%0, %3, %1, %2" [(set_attr "v8type" "alu_shift") + (set_attr "type" "alu_shift_imm") (set_attr "mode" "")] ) @@ -1837,6 +1888,7 @@ "" "sub\\t%w0, %w3, %w1, %2" [(set_attr "v8type" "alu_shift") + (set_attr "type" "alu_shift_imm") (set_attr "mode" "SI")] ) @@ -1849,6 +1901,7 @@ "" "sub\\t%0, %3, %1, lsl %p2" [(set_attr "v8type" "alu_shift") + (set_attr "type" "alu_shift_imm") (set_attr "mode" "")] ) @@ -1863,6 +1916,7 @@ "" "sub\\t%w0, %w3, %w1, lsl %p2" [(set_attr "v8type" "alu_shift") + (set_attr "type" "alu_shift_imm") (set_attr "mode" "SI")] ) @@ -1874,6 +1928,7 @@ "" "sub\\t%0, %1, %2, xt" [(set_attr "v8type" "alu_ext") + (set_attr "type" "alu_ext") (set_attr "mode" "")] ) @@ -1887,6 +1942,7 @@ "" "sub\\t%w0, %w1, %w2, xt" [(set_attr "v8type" "alu_ext") + (set_attr "type" "alu_ext") (set_attr "mode" "SI")] ) @@ -1899,6 +1955,7 @@ "" "sub\\t%0, %1, %2, xt %3" [(set_attr "v8type" "alu_ext") + (set_attr "type" "alu_ext") (set_attr "mode" "")] ) @@ -1913,6 +1970,7 @@ "" "sub\\t%w0, %w1, %w2, xt %3" [(set_attr "v8type" "alu_ext") + (set_attr "type" "alu_ext") (set_attr "mode" "SI")] ) @@ -1927,6 +1985,7 @@ "aarch64_is_extend_from_extract (mode, operands[2], operands[3])" "sub\\t%0, %4, %1, xt%e3 %p2" [(set_attr "v8type" "alu_ext") + (set_attr "type" "alu_ext") (set_attr "mode" "")] ) @@ -1943,6 +2002,7 @@ "aarch64_is_extend_from_extract (SImode, operands[2], operands[3])" "sub\\t%w0, %w4, %w1, xt%e3 %p2" [(set_attr "v8type" "alu_ext") + (set_attr "type" "alu_ext") (set_attr "mode" "SI")] ) @@ -1956,6 +2016,7 @@ "" "sbc\\t%0, %1, %2" [(set_attr "v8type" "adc") + (set_attr "type" "adc_reg") (set_attr "mode" "")] ) @@ -1971,6 +2032,7 @@ "" "sbc\\t%w0, %w1, %w2" [(set_attr "v8type" "adc") + (set_attr "type" "adc_reg") (set_attr "mode" "SI")] ) @@ -1987,6 +2049,7 @@ INTVAL (operands[3]))); return \"sub\t%0, %4, %1, uxt%e3 %p2\";" [(set_attr "v8type" "alu_ext") + (set_attr "type" "alu_ext") (set_attr "mode" "")] ) @@ -2005,6 +2068,7 @@ INTVAL (operands[3]))); return \"sub\t%w0, %w4, %w1, uxt%e3 %p2\";" [(set_attr "v8type" "alu_ext") + (set_attr "type" "alu_ext") (set_attr "mode" "SI")] ) @@ -2037,6 +2101,7 @@ DONE; } [(set_attr "v8type" "alu") + (set_attr "type" "alu_reg") (set_attr "mode" "DI")] ) @@ -2048,6 +2113,7 @@ neg\\t%0, %1 neg\\t%0, %1" [(set_attr "v8type" "alu") + (set_attr "type" "alu_reg") (set_attr "simd_type" "*,simd_negabs") (set_attr "simd" "*,yes") (set_attr "mode" "") @@ -2061,6 +2127,7 @@ "" "neg\\t%w0, %w1" [(set_attr "v8type" "alu") + (set_attr "type" "alu_reg") (set_attr "mode" "SI")] ) @@ -2071,6 +2138,7 @@ "" "ngc\\t%0, %1" [(set_attr "v8type" "adc") + (set_attr "type" "adc_reg") (set_attr "mode" "")] ) @@ -2082,6 +2150,7 @@ "" "ngc\\t%w0, %w1" [(set_attr "v8type" "adc") + (set_attr "type" "adc_reg") (set_attr "mode" "SI")] ) @@ -2094,6 +2163,7 @@ "" "negs\\t%0, %1" [(set_attr "v8type" "alus") + (set_attr "type" "alus_reg") (set_attr "mode" "")] ) @@ -2107,6 +2177,7 @@ "" "negs\\t%w0, %w1" [(set_attr "v8type" "alus") + (set_attr "type" "alus_reg") (set_attr "mode" "SI")] ) @@ -2122,6 +2193,7 @@ "" "negs\\t%0, %1, %2" [(set_attr "v8type" "alus_shift") + (set_attr "type" "alus_shift_imm") (set_attr "mode" "")] ) @@ -2133,6 +2205,7 @@ "" "neg\\t%0, %1, %2" [(set_attr "v8type" "alu_shift") + (set_attr "type" "alu_shift_imm") (set_attr "mode" "")] ) @@ -2146,6 +2219,7 @@ "" "neg\\t%w0, %w1, %2" [(set_attr "v8type" "alu_shift") + (set_attr "type" "alu_shift_imm") (set_attr "mode" "SI")] ) @@ -2157,6 +2231,7 @@ "" "neg\\t%0, %1, lsl %p2" [(set_attr "v8type" "alu_shift") + (set_attr "type" "alu_shift_imm") (set_attr "mode" "")] ) @@ -2170,6 +2245,7 @@ "" "neg\\t%w0, %w1, lsl %p2" [(set_attr "v8type" "alu_shift") + (set_attr "type" "alu_shift_imm") (set_attr "mode" "SI")] ) @@ -2180,6 +2256,7 @@ "" "mul\\t%0, %1, %2" [(set_attr "v8type" "mult") + (set_attr "type" "mul") (set_attr "mode" "")] ) @@ -2192,6 +2269,7 @@ "" "mul\\t%w0, %w1, %w2" [(set_attr "v8type" "mult") + (set_attr "type" "mul") (set_attr "mode" "SI")] ) @@ -2203,6 +2281,7 @@ "" "madd\\t%0, %1, %2, %3" [(set_attr "v8type" "madd") + (set_attr "type" "mla") (set_attr "mode" "")] ) @@ -2216,6 +2295,7 @@ "" "madd\\t%w0, %w1, %w2, %w3" [(set_attr "v8type" "madd") + (set_attr "type" "mla") (set_attr "mode" "SI")] ) @@ -2228,6 +2308,7 @@ "" "msub\\t%0, %1, %2, %3" [(set_attr "v8type" "madd") + (set_attr "type" "mla") (set_attr "mode" "")] ) @@ -2242,6 +2323,7 @@ "" "msub\\t%w0, %w1, %w2, %w3" [(set_attr "v8type" "madd") + (set_attr "type" "mla") (set_attr "mode" "SI")] ) @@ -2253,6 +2335,7 @@ "" "mneg\\t%0, %1, %2" [(set_attr "v8type" "mult") + (set_attr "type" "mul") (set_attr "mode" "")] ) @@ -2266,6 +2349,7 @@ "" "mneg\\t%w0, %w1, %w2" [(set_attr "v8type" "mult") + (set_attr "type" "mul") (set_attr "mode" "SI")] ) @@ -2276,6 +2360,7 @@ "" "mull\\t%0, %w1, %w2" [(set_attr "v8type" "mull") + (set_attr "type" "mull") (set_attr "mode" "DI")] ) @@ -2288,6 +2373,7 @@ "" "maddl\\t%0, %w1, %w2, %3" [(set_attr "v8type" "maddl") + (set_attr "type" "mlal") (set_attr "mode" "DI")] ) @@ -2301,6 +2387,7 @@ "" "msubl\\t%0, %w1, %w2, %3" [(set_attr "v8type" "maddl") + (set_attr "type" "mlal") (set_attr "mode" "DI")] ) @@ -2312,6 +2399,7 @@ "" "mnegl\\t%0, %w1, %w2" [(set_attr "v8type" "mull") + (set_attr "type" "mull") (set_attr "mode" "DI")] ) @@ -2326,6 +2414,7 @@ "" "mulh\\t%0, %1, %2" [(set_attr "v8type" "mulh") + (set_attr "type" "mull") (set_attr "mode" "DI")] ) @@ -2336,6 +2425,7 @@ "" "div\\t%0, %1, %2" [(set_attr "v8type" "div") + (set_attr "type" "div") (set_attr "mode" "")] ) @@ -2348,6 +2438,7 @@ "" "div\\t%w0, %w1, %w2" [(set_attr "v8type" "div") + (set_attr "type" "div") (set_attr "mode" "SI")] ) @@ -2357,13 +2448,15 @@ (define_insn "*cmp" [(set (reg:CC CC_REGNUM) - (compare:CC (match_operand:GPI 0 "register_operand" "r,r") - (match_operand:GPI 1 "aarch64_plus_operand" "rI,J")))] + (compare:CC (match_operand:GPI 0 "register_operand" "r,r,r") + (match_operand:GPI 1 "aarch64_plus_operand" "r,I,J")))] "" "@ + cmp\\t%0, %1 cmp\\t%0, %1 cmn\\t%0, #%n1" [(set_attr "v8type" "alus") + (set_attr "type" "alus_reg,alus_imm,alus_imm") (set_attr "mode" "")] ) @@ -2376,6 +2469,7 @@ fcmp\\t%0, #0.0 fcmp\\t%0, %1" [(set_attr "v8type" "fcmp") + (set_attr "type" "fcmp") (set_attr "mode" "")] ) @@ -2388,6 +2482,7 @@ fcmpe\\t%0, #0.0 fcmpe\\t%0, %1" [(set_attr "v8type" "fcmp") + (set_attr "type" "fcmp") (set_attr "mode" "")] ) @@ -2400,6 +2495,7 @@ "" "cmp\\t%2, %0, %1" [(set_attr "v8type" "alus_shift") + (set_attr "type" "alus_shift_imm") (set_attr "mode" "")] ) @@ -2411,6 +2507,7 @@ "" "cmp\\t%1, %0, xt" [(set_attr "v8type" "alus_ext") + (set_attr "type" "alus_ext") (set_attr "mode" "")] ) @@ -2424,6 +2521,7 @@ "" "cmp\\t%2, %0, xt %1" [(set_attr "v8type" "alus_ext") + (set_attr "type" "alus_ext") (set_attr "mode" "")] ) @@ -2464,6 +2562,7 @@ "" "cset\\t%0, %m1" [(set_attr "v8type" "csel") + (set_attr "type" "csel") (set_attr "mode" "")] ) @@ -2476,6 +2575,7 @@ "" "cset\\t%w0, %m1" [(set_attr "v8type" "csel") + (set_attr "type" "csel") (set_attr "mode" "SI")] ) @@ -2486,6 +2586,7 @@ "" "csetm\\t%0, %m1" [(set_attr "v8type" "csel") + (set_attr "type" "csel") (set_attr "mode" "")] ) @@ -2498,6 +2599,7 @@ "" "csetm\\t%w0, %m1" [(set_attr "v8type" "csel") + (set_attr "type" "csel") (set_attr "mode" "SI")] ) @@ -2552,6 +2654,7 @@ mov\\t%0, -1 mov\\t%0, 1" [(set_attr "v8type" "csel") + (set_attr "type" "csel") (set_attr "mode" "")] ) @@ -2576,6 +2679,7 @@ mov\\t%w0, -1 mov\\t%w0, 1" [(set_attr "v8type" "csel") + (set_attr "type" "csel") (set_attr "mode" "SI")] ) @@ -2589,6 +2693,7 @@ "TARGET_FLOAT" "fcsel\\t%0, %3, %4, %m1" [(set_attr "v8type" "fcsel") + (set_attr "type" "fcsel") (set_attr "mode" "")] ) @@ -2638,6 +2743,7 @@ "" "csinc\\t%0, %1, %1, %M2" [(set_attr "v8type" "csel") + (set_attr "type" "csel") (set_attr "mode" "")]) (define_insn "csinc3_insn" @@ -2651,6 +2757,7 @@ "" "csinc\\t%0, %4, %3, %M1" [(set_attr "v8type" "csel") + (set_attr "type" "csel") (set_attr "mode" "")] ) @@ -2664,6 +2771,7 @@ "" "csinv\\t%0, %4, %3, %M1" [(set_attr "v8type" "csel") + (set_attr "type" "csel") (set_attr "mode" "")]) (define_insn "*csneg3_insn" @@ -2676,6 +2784,7 @@ "" "csneg\\t%0, %4, %3, %M1" [(set_attr "v8type" "csel") + (set_attr "type" "csel") (set_attr "mode" "")]) ;; ------------------------------------------------------------------- @@ -2689,6 +2798,7 @@ "" "\\t%0, %1, %2" [(set_attr "v8type" "logic,logic_imm") + (set_attr "type" "logic_reg,logic_imm") (set_attr "mode" "")]) ;; zero_extend version of above @@ -2700,6 +2810,7 @@ "" "\\t%w0, %w1, %w2" [(set_attr "v8type" "logic,logic_imm") + (set_attr "type" "logic_reg,logic_imm") (set_attr "mode" "SI")]) (define_insn "*and3_compare0" @@ -2713,6 +2824,7 @@ "" "ands\\t%0, %1, %2" [(set_attr "v8type" "logics,logics_imm") + (set_attr "type" "logics_reg,logics_imm") (set_attr "mode" "")] ) @@ -2728,6 +2840,7 @@ "" "ands\\t%w0, %w1, %w2" [(set_attr "v8type" "logics,logics_imm") + (set_attr "type" "logics_reg,logics_imm") (set_attr "mode" "SI")] ) @@ -2744,6 +2857,7 @@ "" "ands\\t%0, %3, %1, %2" [(set_attr "v8type" "logics_shift") + (set_attr "type" "logics_shift_imm") (set_attr "mode" "")] ) @@ -2762,6 +2876,7 @@ "" "ands\\t%w0, %w3, %w1, %2" [(set_attr "v8type" "logics_shift") + (set_attr "type" "logics_shift_imm") (set_attr "mode" "SI")] ) @@ -2774,6 +2889,7 @@ "" "\\t%0, %3, %1, %2" [(set_attr "v8type" "logic_shift") + (set_attr "type" "logic_shift_imm") (set_attr "mode" "")]) ;; zero_extend version of above @@ -2787,6 +2903,7 @@ "" "\\t%w0, %w3, %w1, %2" [(set_attr "v8type" "logic_shift") + (set_attr "type" "logic_shift_imm") (set_attr "mode" "SI")]) (define_insn "one_cmpl2" @@ -2795,6 +2912,7 @@ "" "mvn\\t%0, %1" [(set_attr "v8type" "logic") + (set_attr "type" "logic_reg") (set_attr "mode" "")]) (define_insn "*one_cmpl_2" @@ -2804,6 +2922,7 @@ "" "mvn\\t%0, %1, %2" [(set_attr "v8type" "logic_shift") + (set_attr "type" "logic_shift_imm") (set_attr "mode" "")]) (define_insn "*_one_cmpl3" @@ -2814,6 +2933,7 @@ "" "\\t%0, %2, %1" [(set_attr "v8type" "logic") + (set_attr "type" "logic_reg") (set_attr "mode" "")]) (define_insn "*and_one_cmpl3_compare0" @@ -2828,6 +2948,7 @@ "" "bics\\t%0, %2, %1" [(set_attr "v8type" "logics") + (set_attr "type" "logics_reg") (set_attr "mode" "")]) ;; zero_extend version of above @@ -2843,6 +2964,7 @@ "" "bics\\t%w0, %w2, %w1" [(set_attr "v8type" "logics") + (set_attr "type" "logics_reg") (set_attr "mode" "SI")]) (define_insn "*_one_cmpl_3" @@ -2855,6 +2977,7 @@ "" "\\t%0, %3, %1, %2" [(set_attr "v8type" "logic_shift") + (set_attr "type" "logics_shift_imm") (set_attr "mode" "")]) (define_insn "*and_one_cmpl_3_compare0" @@ -2873,6 +2996,7 @@ "" "bics\\t%0, %3, %1, %2" [(set_attr "v8type" "logics_shift") + (set_attr "type" "logics_shift_imm") (set_attr "mode" "")]) ;; zero_extend version of above @@ -2892,6 +3016,7 @@ "" "bics\\t%w0, %w3, %w1, %2" [(set_attr "v8type" "logics_shift") + (set_attr "type" "logics_shift_imm") (set_attr "mode" "SI")]) (define_insn "clz2" @@ -2900,6 +3025,7 @@ "" "clz\\t%0, %1" [(set_attr "v8type" "clz") + (set_attr "type" "clz") (set_attr "mode" "")]) (define_expand "ffs2" @@ -2923,6 +3049,7 @@ "" "cls\\t%0, %1" [(set_attr "v8type" "clz") + (set_attr "type" "clz") (set_attr "mode" "")]) (define_insn "rbit2" @@ -2931,6 +3058,7 @@ "" "rbit\\t%0, %1" [(set_attr "v8type" "rbit") + (set_attr "type" "rbit") (set_attr "mode" "")]) (define_expand "ctz2" @@ -2953,6 +3081,7 @@ "" "tst\\t%0, %1" [(set_attr "v8type" "logics") + (set_attr "type" "logics_reg") (set_attr "mode" "")]) (define_insn "*and_3nr_compare0" @@ -2966,6 +3095,7 @@ "" "tst\\t%2, %0, %1" [(set_attr "v8type" "logics_shift") + (set_attr "type" "logics_shift_imm") (set_attr "mode" "")]) ;; ------------------------------------------------------------------- @@ -3055,14 +3185,184 @@ } ) -(define_insn "*3_insn" +;; Logical left shift using SISD or Integer instruction +(define_insn "*aarch64_ashl_sisd_or_int_3" + [(set (match_operand:GPI 0 "register_operand" "=w,w,r") + (ashift:GPI + (match_operand:GPI 1 "register_operand" "w,w,r") + (match_operand:QI 2 "aarch64_reg_or_shift_imm_" "Us,w,rUs")))] + "" + "@ + shl\t%0, %1, %2 + ushl\t%0, %1, %2 + lsl\t%0, %1, %2" + [(set_attr "simd" "yes,yes,no") + (set_attr "simd_type" "simd_shift_imm,simd_shift,*") + (set_attr "simd_mode" ",,*") + (set_attr "v8type" "*,*,shift") + (set_attr "type" "*,*,shift_reg") + (set_attr "mode" "*,*,")] +) + +;; Logical right shift using SISD or Integer instruction +(define_insn "*aarch64_lshr_sisd_or_int_3" + [(set (match_operand:GPI 0 "register_operand" "=w,w,r") + (lshiftrt:GPI + (match_operand:GPI 1 "register_operand" "w,w,r") + (match_operand:QI 2 "aarch64_reg_or_shift_imm_" "Us,w,rUs")))] + "" + "@ + ushr\t%0, %1, %2 + # + lsr\t%0, %1, %2" + [(set_attr "simd" "yes,yes,no") + (set_attr "simd_type" "simd_shift_imm,simd_shift,*") + (set_attr "simd_mode" ",,*") + (set_attr "v8type" "*,*,shift") + (set_attr "type" "*,*,shift_reg") + (set_attr "mode" "*,*,")] +) + +(define_split + [(set (match_operand:DI 0 "aarch64_simd_register") + (lshiftrt:DI + (match_operand:DI 1 "aarch64_simd_register") + (match_operand:QI 2 "aarch64_simd_register")))] + "TARGET_SIMD && reload_completed" + [(set (match_dup 2) + (unspec:QI [(match_dup 2)] UNSPEC_SISD_NEG)) + (set (match_dup 0) + (unspec:DI [(match_dup 1) (match_dup 2)] UNSPEC_SISD_USHL))] + "" +) + +(define_split + [(set (match_operand:SI 0 "aarch64_simd_register") + (lshiftrt:SI + (match_operand:SI 1 "aarch64_simd_register") + (match_operand:QI 2 "aarch64_simd_register")))] + "TARGET_SIMD && reload_completed" + [(set (match_dup 2) + (unspec:QI [(match_dup 2)] UNSPEC_SISD_NEG)) + (set (match_dup 0) + (unspec:SI [(match_dup 1) (match_dup 2)] UNSPEC_USHL_2S))] + "" +) + +;; Arithmetic right shift using SISD or Integer instruction +(define_insn "*aarch64_ashr_sisd_or_int_3" + [(set (match_operand:GPI 0 "register_operand" "=w,w,r") + (ashiftrt:GPI + (match_operand:GPI 1 "register_operand" "w,w,r") + (match_operand:QI 2 "aarch64_reg_or_shift_imm_di" "Us,w,rUs")))] + "" + "@ + sshr\t%0, %1, %2 + # + asr\t%0, %1, %2" + [(set_attr "simd" "yes,yes,no") + (set_attr "simd_type" "simd_shift_imm,simd_shift,*") + (set_attr "simd_mode" ",,*") + (set_attr "v8type" "*,*,shift") + (set_attr "type" "*,*,shift_reg") + (set_attr "mode" "*,*,")] +) + +(define_split + [(set (match_operand:DI 0 "aarch64_simd_register") + (ashiftrt:DI + (match_operand:DI 1 "aarch64_simd_register") + (match_operand:QI 2 "aarch64_simd_register")))] + "TARGET_SIMD && reload_completed" + [(set (match_dup 2) + (unspec:QI [(match_dup 2)] UNSPEC_SISD_NEG)) + (set (match_dup 0) + (unspec:DI [(match_dup 1) (match_dup 2)] UNSPEC_SISD_SSHL))] + "" +) + +(define_split + [(set (match_operand:SI 0 "aarch64_simd_register") + (ashiftrt:SI + (match_operand:SI 1 "aarch64_simd_register") + (match_operand:QI 2 "aarch64_simd_register")))] + "TARGET_SIMD && reload_completed" + [(set (match_dup 2) + (unspec:QI [(match_dup 2)] UNSPEC_SISD_NEG)) + (set (match_dup 0) + (unspec:SI [(match_dup 1) (match_dup 2)] UNSPEC_SSHL_2S))] + "" +) + +(define_insn "*aarch64_sisd_ushl" + [(set (match_operand:DI 0 "register_operand" "=w") + (unspec:DI [(match_operand:DI 1 "register_operand" "w") + (match_operand:QI 2 "register_operand" "w")] + UNSPEC_SISD_USHL))] + "TARGET_SIMD" + "ushl\t%d0, %d1, %d2" + [(set_attr "simd" "yes") + (set_attr "simd_type" "simd_shift") + (set_attr "simd_mode" "DI")] +) + +(define_insn "*aarch64_ushl_2s" + [(set (match_operand:SI 0 "register_operand" "=w") + (unspec:SI [(match_operand:SI 1 "register_operand" "w") + (match_operand:QI 2 "register_operand" "w")] + UNSPEC_USHL_2S))] + "TARGET_SIMD" + "ushl\t%0.2s, %1.2s, %2.2s" + [(set_attr "simd" "yes") + (set_attr "simd_type" "simd_shift") + (set_attr "simd_mode" "DI")] +) + +(define_insn "*aarch64_sisd_sshl" + [(set (match_operand:DI 0 "register_operand" "=w") + (unspec:DI [(match_operand:DI 1 "register_operand" "w") + (match_operand:QI 2 "register_operand" "w")] + UNSPEC_SISD_SSHL))] + "TARGET_SIMD" + "sshl\t%d0, %d1, %d2" + [(set_attr "simd" "yes") + (set_attr "simd_type" "simd_shift") + (set_attr "simd_mode" "DI")] +) + +(define_insn "*aarch64_sshl_2s" + [(set (match_operand:SI 0 "register_operand" "=w") + (unspec:SI [(match_operand:SI 1 "register_operand" "w") + (match_operand:QI 2 "register_operand" "w")] + UNSPEC_SSHL_2S))] + "TARGET_SIMD" + "sshl\t%0.2s, %1.2s, %2.2s" + [(set_attr "simd" "yes") + (set_attr "simd_type" "simd_shift") + (set_attr "simd_mode" "DI")] +) + +(define_insn "*aarch64_sisd_neg_qi" + [(set (match_operand:QI 0 "register_operand" "=w") + (unspec:QI [(match_operand:QI 1 "register_operand" "w")] + UNSPEC_SISD_NEG))] + "TARGET_SIMD" + "neg\t%d0, %d1" + [(set_attr "simd" "yes") + (set_attr "simd_type" "simd_negabs") + (set_attr "simd_mode" "QI")] +) + +;; Rotate right +(define_insn "*ror3_insn" [(set (match_operand:GPI 0 "register_operand" "=r") - (SHIFT:GPI - (match_operand:GPI 1 "register_operand" "r") - (match_operand:QI 2 "aarch64_reg_or_shift_imm_" "rUs")))] + (rotatert:GPI + (match_operand:GPI 1 "register_operand" "r") + (match_operand:QI 2 "aarch64_reg_or_shift_imm_" "rUs")))] "" - "\\t%0, %1, %2" + "ror\\t%0, %1, %2" [(set_attr "v8type" "shift") + (set_attr "type" "shift_reg") (set_attr "mode" "")] ) @@ -3075,6 +3375,7 @@ "" "\\t%w0, %w1, %w2" [(set_attr "v8type" "shift") + (set_attr "type" "shift_reg") (set_attr "mode" "SI")] ) @@ -3085,6 +3386,7 @@ "" "lsl\\t%0, %1, %2" [(set_attr "v8type" "shift") + (set_attr "type" "shift_reg") (set_attr "mode" "")] ) @@ -3098,6 +3400,7 @@ return "\t%w0, %w1, %2, %3"; } [(set_attr "v8type" "bfm") + (set_attr "type" "bfm") (set_attr "mode" "")] ) @@ -3111,6 +3414,7 @@ (UINTVAL (operands[3]) + UINTVAL (operands[4]) == GET_MODE_BITSIZE (mode))" "extr\\t%0, %1, %2, %4" [(set_attr "v8type" "shift") + (set_attr "type" "shift_imm") (set_attr "mode" "")] ) @@ -3126,6 +3430,7 @@ (UINTVAL (operands[3]) + UINTVAL (operands[4]) == 32)" "extr\\t%w0, %w1, %w2, %4" [(set_attr "v8type" "shift") + (set_attr "type" "shift_imm") (set_attr "mode" "SI")] ) @@ -3139,6 +3444,7 @@ return "ror\\t%0, %1, %3"; } [(set_attr "v8type" "shift") + (set_attr "type" "shift_imm") (set_attr "mode" "")] ) @@ -3154,6 +3460,7 @@ return "ror\\t%w0, %w1, %3"; } [(set_attr "v8type" "shift") + (set_attr "type" "shift_imm") (set_attr "mode" "SI")] ) @@ -3168,6 +3475,7 @@ return "bfiz\t%0, %1, %2, %3"; } [(set_attr "v8type" "bfm") + (set_attr "type" "bfm") (set_attr "mode" "")] ) @@ -3182,6 +3490,7 @@ return "ubfx\t%0, %1, %2, %3"; } [(set_attr "v8type" "bfm") + (set_attr "type" "bfm") (set_attr "mode" "")] ) @@ -3196,6 +3505,7 @@ return "sbfx\\t%0, %1, %2, %3"; } [(set_attr "v8type" "bfm") + (set_attr "type" "bfm") (set_attr "mode" "")] ) @@ -3220,6 +3530,7 @@ "" "bfx\\t%0, %1, %3, %2" [(set_attr "v8type" "bfm") + (set_attr "type" "bfm") (set_attr "mode" "")] ) @@ -3264,6 +3575,7 @@ > GET_MODE_BITSIZE (mode)))" "bfi\\t%0, %3, %2, %1" [(set_attr "v8type" "bfm") + (set_attr "type" "bfm") (set_attr "mode" "")] ) @@ -3279,6 +3591,7 @@ > GET_MODE_BITSIZE (mode)))" "bfxil\\t%0, %2, %3, %1" [(set_attr "v8type" "bfm") + (set_attr "type" "bfm") (set_attr "mode" "")] ) @@ -3295,6 +3608,7 @@ return "bfiz\t%0, %1, %2, %3"; } [(set_attr "v8type" "bfm") + (set_attr "type" "bfm") (set_attr "mode" "")] ) @@ -3309,6 +3623,7 @@ && (INTVAL (operands[3]) & ((1 << INTVAL (operands[2])) - 1)) == 0" "ubfiz\\t%0, %1, %2, %P3" [(set_attr "v8type" "bfm") + (set_attr "type" "bfm") (set_attr "mode" "")] ) @@ -3318,6 +3633,7 @@ "" "rev\\t%0, %1" [(set_attr "v8type" "rev") + (set_attr "type" "rev") (set_attr "mode" "")] ) @@ -3327,6 +3643,7 @@ "" "rev16\\t%w0, %w1" [(set_attr "v8type" "rev") + (set_attr "type" "rev") (set_attr "mode" "HI")] ) @@ -3337,6 +3654,7 @@ "" "rev\\t%w0, %w1" [(set_attr "v8type" "rev") + (set_attr "type" "rev") (set_attr "mode" "SI")] ) @@ -3354,6 +3672,7 @@ "TARGET_FLOAT" "frint\\t%0, %1" [(set_attr "v8type" "frint") + (set_attr "type" "f_rint") (set_attr "mode" "")] ) @@ -3366,6 +3685,7 @@ "TARGET_FLOAT" "fcvt\\t%0, %1" [(set_attr "v8type" "fcvtf2i") + (set_attr "type" "f_cvtf2i") (set_attr "mode" "") (set_attr "mode2" "")] ) @@ -3380,6 +3700,7 @@ "TARGET_FLOAT" "fmadd\\t%0, %1, %2, %3" [(set_attr "v8type" "fmadd") + (set_attr "type" "fmac") (set_attr "mode" "")] ) @@ -3391,6 +3712,7 @@ "TARGET_FLOAT" "fmsub\\t%0, %1, %2, %3" [(set_attr "v8type" "fmadd") + (set_attr "type" "fmac") (set_attr "mode" "")] ) @@ -3402,6 +3724,7 @@ "TARGET_FLOAT" "fnmsub\\t%0, %1, %2, %3" [(set_attr "v8type" "fmadd") + (set_attr "type" "fmac") (set_attr "mode" "")] ) @@ -3413,6 +3736,7 @@ "TARGET_FLOAT" "fnmadd\\t%0, %1, %2, %3" [(set_attr "v8type" "fmadd") + (set_attr "type" "fmac") (set_attr "mode" "")] ) @@ -3425,6 +3749,7 @@ "!HONOR_SIGNED_ZEROS (mode) && TARGET_FLOAT" "fnmadd\\t%0, %1, %2, %3" [(set_attr "v8type" "fmadd") + (set_attr "type" "fmac") (set_attr "mode" "")] ) @@ -3438,6 +3763,7 @@ "TARGET_FLOAT" "fcvt\\t%d0, %s1" [(set_attr "v8type" "fcvt") + (set_attr "type" "f_cvt") (set_attr "mode" "DF") (set_attr "mode2" "SF")] ) @@ -3448,6 +3774,7 @@ "TARGET_FLOAT" "fcvt\\t%s0, %d1" [(set_attr "v8type" "fcvt") + (set_attr "type" "f_cvt") (set_attr "mode" "SF") (set_attr "mode2" "DF")] ) @@ -3458,6 +3785,7 @@ "TARGET_FLOAT" "fcvtzs\\t%0, %1" [(set_attr "v8type" "fcvtf2i") + (set_attr "type" "f_cvtf2i") (set_attr "mode" "") (set_attr "mode2" "")] ) @@ -3468,6 +3796,7 @@ "TARGET_FLOAT" "fcvtzu\\t%0, %1" [(set_attr "v8type" "fcvtf2i") + (set_attr "type" "f_cvtf2i") (set_attr "mode" "") (set_attr "mode2" "")] ) @@ -3478,6 +3807,7 @@ "TARGET_FLOAT" "scvtf\\t%0, %1" [(set_attr "v8type" "fcvti2f") + (set_attr "type" "f_cvti2f") (set_attr "mode" "") (set_attr "mode2" "")] ) @@ -3488,6 +3818,7 @@ "TARGET_FLOAT" "ucvtf\\t%0, %1" [(set_attr "v8type" "fcvt") + (set_attr "type" "f_cvt") (set_attr "mode" "") (set_attr "mode2" "")] ) @@ -3504,6 +3835,7 @@ "TARGET_FLOAT" "fadd\\t%0, %1, %2" [(set_attr "v8type" "fadd") + (set_attr "type" "fadd") (set_attr "mode" "")] ) @@ -3515,6 +3847,7 @@ "TARGET_FLOAT" "fsub\\t%0, %1, %2" [(set_attr "v8type" "fadd") + (set_attr "type" "fadd") (set_attr "mode" "")] ) @@ -3526,6 +3859,7 @@ "TARGET_FLOAT" "fmul\\t%0, %1, %2" [(set_attr "v8type" "fmul") + (set_attr "type" "fmul") (set_attr "mode" "")] ) @@ -3537,6 +3871,7 @@ "TARGET_FLOAT" "fnmul\\t%0, %1, %2" [(set_attr "v8type" "fmul") + (set_attr "type" "fmul") (set_attr "mode" "")] ) @@ -3548,6 +3883,7 @@ "TARGET_FLOAT" "fdiv\\t%0, %1, %2" [(set_attr "v8type" "fdiv") + (set_attr "type" "fdiv") (set_attr "mode" "")] ) @@ -3557,6 +3893,7 @@ "TARGET_FLOAT" "fneg\\t%0, %1" [(set_attr "v8type" "ffarith") + (set_attr "type" "ffarith") (set_attr "mode" "")] ) @@ -3566,6 +3903,7 @@ "TARGET_FLOAT" "fsqrt\\t%0, %1" [(set_attr "v8type" "fsqrt") + (set_attr "type" "fsqrt") (set_attr "mode" "")] ) @@ -3575,6 +3913,7 @@ "TARGET_FLOAT" "fabs\\t%0, %1" [(set_attr "v8type" "ffarith") + (set_attr "type" "ffarith") (set_attr "mode" "")] ) @@ -3589,6 +3928,7 @@ "TARGET_FLOAT" "fmaxnm\\t%0, %1, %2" [(set_attr "v8type" "fminmax") + (set_attr "type" "f_minmax") (set_attr "mode" "")] ) @@ -3599,27 +3939,7 @@ "TARGET_FLOAT" "fminnm\\t%0, %1, %2" [(set_attr "v8type" "fminmax") - (set_attr "mode" "")] -) - -(define_insn "aarch64_frecp" - [(set (match_operand:GPF 0 "register_operand" "=w") - (unspec:GPF [(match_operand:GPF 1 "register_operand" "w")] - FRECP))] - "TARGET_FLOAT" - "frecp\\t%0, %1" - [(set_attr "v8type" "frecp") - (set_attr "mode" "")] -) - -(define_insn "aarch64_frecps" - [(set (match_operand:GPF 0 "register_operand" "=w") - (unspec:GPF [(match_operand:GPF 1 "register_operand" "w") - (match_operand:GPF 2 "register_operand" "w")] - UNSPEC_FRECPS))] - "TARGET_FLOAT" - "frecps\\t%0, %1, %2" - [(set_attr "v8type" "frecps") + (set_attr "type" "f_minmax") (set_attr "mode" "")] ) @@ -3685,6 +4005,7 @@ "reload_completed || reload_in_progress" "fmov\\t%x0, %d1" [(set_attr "v8type" "fmovf2i") + (set_attr "type" "f_mrc") (set_attr "mode" "DI") (set_attr "length" "4") ]) @@ -3697,6 +4018,7 @@ "reload_completed || reload_in_progress" "fmov\\t%x0, %1.d[1]" [(set_attr "v8type" "fmovf2i") + (set_attr "type" "f_mrc") (set_attr "mode" "DI") (set_attr "length" "4") ]) @@ -3708,6 +4030,7 @@ "reload_completed || reload_in_progress" "fmov\\t%0.d[1], %x1" [(set_attr "v8type" "fmovi2f") + (set_attr "type" "f_mcr") (set_attr "mode" "DI") (set_attr "length" "4") ]) @@ -3718,6 +4041,7 @@ "reload_completed || reload_in_progress" "fmov\\t%d0, %x1" [(set_attr "v8type" "fmovi2f") + (set_attr "type" "f_mcr") (set_attr "mode" "DI") (set_attr "length" "4") ]) @@ -3729,6 +4053,7 @@ "reload_completed || reload_in_progress" "fmov\\t%d0, %d1" [(set_attr "v8type" "fmovi2f") + (set_attr "type" "f_mcr") (set_attr "mode" "DI") (set_attr "length" "4") ]) @@ -3761,6 +4086,7 @@ "" "add\\t%0, %1, :lo12:%a2" [(set_attr "v8type" "alu") + (set_attr "type" "alu_reg") (set_attr "mode" "")] ) @@ -3773,6 +4099,7 @@ "" "ldr\\t%0, [%1, #:got_lo12:%a2]" [(set_attr "v8type" "load1") + (set_attr "type" "load1") (set_attr "mode" "")] ) @@ -3786,6 +4113,7 @@ "TARGET_ILP32" "ldr\\t%w0, [%1, #:got_lo12:%a2]" [(set_attr "v8type" "load1") + (set_attr "type" "load1") (set_attr "mode" "DI")] ) @@ -3796,6 +4124,7 @@ "" "ldr\\t%0, %L1" [(set_attr "v8type" "load1") + (set_attr "type" "load1") (set_attr "mode" "DI")] ) @@ -3805,6 +4134,7 @@ "" "mrs\\t%0, tpidr_el0" [(set_attr "v8type" "mrs") + (set_attr "type" "mrs") (set_attr "mode" "DI")] ) @@ -3830,6 +4160,7 @@ "" "adrp\\tx0, %A1\;add\\tx0, x0, %L1\;bl\\t%2\;nop" [(set_attr "v8type" "call") + (set_attr "type" "call") (set_attr "length" "16")]) (define_insn "tlsie_small" @@ -3839,6 +4170,7 @@ "" "adrp\\t%0, %A1\;ldr\\t%0, [%0, #%L1]" [(set_attr "v8type" "load1") + (set_attr "type" "load1") (set_attr "mode" "DI") (set_attr "length" "8")] ) @@ -3851,6 +4183,7 @@ "" "add\\t%0, %1, #%G2\;add\\t%0, %0, #%L2" [(set_attr "v8type" "alu") + (set_attr "type" "alu_reg") (set_attr "mode" "DI") (set_attr "length" "8")] ) @@ -3864,6 +4197,7 @@ "TARGET_TLS_DESC" "adrp\\tx0, %A0\;ldr\\t%1, [x0, #%L0]\;add\\tx0, x0, %L0\;.tlsdesccall\\t%0\;blr\\t%1" [(set_attr "v8type" "call") + (set_attr "type" "call") (set_attr "length" "16")]) (define_insn "stack_tie" diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h index 99cf123e29e..521b7e817e6 100644 --- a/gcc/config/aarch64/arm_neon.h +++ b/gcc/config/aarch64/arm_neon.h @@ -29,6 +29,9 @@ #include +#define __AARCH64_UINT64_C(__C) ((uint64_t) __C) +#define __AARCH64_INT64_C(__C) ((int64_t) __C) + typedef __builtin_aarch64_simd_qi int8x8_t __attribute__ ((__vector_size__ (8))); typedef __builtin_aarch64_simd_hi int16x4_t @@ -446,7 +449,167 @@ typedef struct poly16x8x4_t poly16x8_t val[4]; } poly16x8x4_t; - +/* vget_lane internal macros. */ + +#define __aarch64_vget_lane_any(__size, __cast_ret, __cast_a, __a, __b) \ + (__cast_ret \ + __builtin_aarch64_get_lane##__size (__cast_a __a, __b)) + +#define __aarch64_vget_lane_f32(__a, __b) \ + __aarch64_vget_lane_any (v2sf, , , __a, __b) +#define __aarch64_vget_lane_f64(__a, __b) (__a) + +#define __aarch64_vget_lane_p8(__a, __b) \ + __aarch64_vget_lane_any (v8qi, (poly8_t), (int8x8_t), __a, __b) +#define __aarch64_vget_lane_p16(__a, __b) \ + __aarch64_vget_lane_any (v4hi, (poly16_t), (int16x4_t), __a, __b) + +#define __aarch64_vget_lane_s8(__a, __b) \ + __aarch64_vget_lane_any (v8qi, , ,__a, __b) +#define __aarch64_vget_lane_s16(__a, __b) \ + __aarch64_vget_lane_any (v4hi, , ,__a, __b) +#define __aarch64_vget_lane_s32(__a, __b) \ + __aarch64_vget_lane_any (v2si, , ,__a, __b) +#define __aarch64_vget_lane_s64(__a, __b) (__a) + +#define __aarch64_vget_lane_u8(__a, __b) \ + __aarch64_vget_lane_any (v8qi, (uint8_t), (int8x8_t), __a, __b) +#define __aarch64_vget_lane_u16(__a, __b) \ + __aarch64_vget_lane_any (v4hi, (uint16_t), (int16x4_t), __a, __b) +#define __aarch64_vget_lane_u32(__a, __b) \ + __aarch64_vget_lane_any (v2si, (uint32_t), (int32x2_t), __a, __b) +#define __aarch64_vget_lane_u64(__a, __b) (__a) + +#define __aarch64_vgetq_lane_f32(__a, __b) \ + __aarch64_vget_lane_any (v4sf, , , __a, __b) +#define __aarch64_vgetq_lane_f64(__a, __b) \ + __aarch64_vget_lane_any (v2df, , , __a, __b) + +#define __aarch64_vgetq_lane_p8(__a, __b) \ + __aarch64_vget_lane_any (v16qi, (poly8_t), (int8x16_t), __a, __b) +#define __aarch64_vgetq_lane_p16(__a, __b) \ + __aarch64_vget_lane_any (v8hi, (poly16_t), (int16x8_t), __a, __b) + +#define __aarch64_vgetq_lane_s8(__a, __b) \ + __aarch64_vget_lane_any (v16qi, , ,__a, __b) +#define __aarch64_vgetq_lane_s16(__a, __b) \ + __aarch64_vget_lane_any (v8hi, , ,__a, __b) +#define __aarch64_vgetq_lane_s32(__a, __b) \ + __aarch64_vget_lane_any (v4si, , ,__a, __b) +#define __aarch64_vgetq_lane_s64(__a, __b) \ + __aarch64_vget_lane_any (v2di, , ,__a, __b) + +#define __aarch64_vgetq_lane_u8(__a, __b) \ + __aarch64_vget_lane_any (v16qi, (uint8_t), (int8x16_t), __a, __b) +#define __aarch64_vgetq_lane_u16(__a, __b) \ + __aarch64_vget_lane_any (v8hi, (uint16_t), (int16x8_t), __a, __b) +#define __aarch64_vgetq_lane_u32(__a, __b) \ + __aarch64_vget_lane_any (v4si, (uint32_t), (int32x4_t), __a, __b) +#define __aarch64_vgetq_lane_u64(__a, __b) \ + __aarch64_vget_lane_any (v2di, (uint64_t), (int64x2_t), __a, __b) + +/* __aarch64_vdup_lane internal macros. */ +#define __aarch64_vdup_lane_any(__size, __q1, __q2, __a, __b) \ + vdup##__q1##_n_##__size (__aarch64_vget##__q2##_lane_##__size (__a, __b)) + +#define __aarch64_vdup_lane_f32(__a, __b) \ + __aarch64_vdup_lane_any (f32, , , __a, __b) +#define __aarch64_vdup_lane_f64(__a, __b) (__a) +#define __aarch64_vdup_lane_p8(__a, __b) \ + __aarch64_vdup_lane_any (p8, , , __a, __b) +#define __aarch64_vdup_lane_p16(__a, __b) \ + __aarch64_vdup_lane_any (p16, , , __a, __b) +#define __aarch64_vdup_lane_s8(__a, __b) \ + __aarch64_vdup_lane_any (s8, , , __a, __b) +#define __aarch64_vdup_lane_s16(__a, __b) \ + __aarch64_vdup_lane_any (s16, , , __a, __b) +#define __aarch64_vdup_lane_s32(__a, __b) \ + __aarch64_vdup_lane_any (s32, , , __a, __b) +#define __aarch64_vdup_lane_s64(__a, __b) (__a) +#define __aarch64_vdup_lane_u8(__a, __b) \ + __aarch64_vdup_lane_any (u8, , , __a, __b) +#define __aarch64_vdup_lane_u16(__a, __b) \ + __aarch64_vdup_lane_any (u16, , , __a, __b) +#define __aarch64_vdup_lane_u32(__a, __b) \ + __aarch64_vdup_lane_any (u32, , , __a, __b) +#define __aarch64_vdup_lane_u64(__a, __b) (__a) + +/* __aarch64_vdup_laneq internal macros. */ +#define __aarch64_vdup_laneq_f32(__a, __b) \ + __aarch64_vdup_lane_any (f32, , q, __a, __b) +#define __aarch64_vdup_laneq_f64(__a, __b) \ + __aarch64_vdup_lane_any (f64, , q, __a, __b) +#define __aarch64_vdup_laneq_p8(__a, __b) \ + __aarch64_vdup_lane_any (p8, , q, __a, __b) +#define __aarch64_vdup_laneq_p16(__a, __b) \ + __aarch64_vdup_lane_any (p16, , q, __a, __b) +#define __aarch64_vdup_laneq_s8(__a, __b) \ + __aarch64_vdup_lane_any (s8, , q, __a, __b) +#define __aarch64_vdup_laneq_s16(__a, __b) \ + __aarch64_vdup_lane_any (s16, , q, __a, __b) +#define __aarch64_vdup_laneq_s32(__a, __b) \ + __aarch64_vdup_lane_any (s32, , q, __a, __b) +#define __aarch64_vdup_laneq_s64(__a, __b) \ + __aarch64_vdup_lane_any (s64, , q, __a, __b) +#define __aarch64_vdup_laneq_u8(__a, __b) \ + __aarch64_vdup_lane_any (u8, , q, __a, __b) +#define __aarch64_vdup_laneq_u16(__a, __b) \ + __aarch64_vdup_lane_any (u16, , q, __a, __b) +#define __aarch64_vdup_laneq_u32(__a, __b) \ + __aarch64_vdup_lane_any (u32, , q, __a, __b) +#define __aarch64_vdup_laneq_u64(__a, __b) \ + __aarch64_vdup_lane_any (u64, , q, __a, __b) + +/* __aarch64_vdupq_lane internal macros. */ +#define __aarch64_vdupq_lane_f32(__a, __b) \ + __aarch64_vdup_lane_any (f32, q, , __a, __b) +#define __aarch64_vdupq_lane_f64(__a, __b) (vdupq_n_f64 (__a)) +#define __aarch64_vdupq_lane_p8(__a, __b) \ + __aarch64_vdup_lane_any (p8, q, , __a, __b) +#define __aarch64_vdupq_lane_p16(__a, __b) \ + __aarch64_vdup_lane_any (p16, q, , __a, __b) +#define __aarch64_vdupq_lane_s8(__a, __b) \ + __aarch64_vdup_lane_any (s8, q, , __a, __b) +#define __aarch64_vdupq_lane_s16(__a, __b) \ + __aarch64_vdup_lane_any (s16, q, , __a, __b) +#define __aarch64_vdupq_lane_s32(__a, __b) \ + __aarch64_vdup_lane_any (s32, q, , __a, __b) +#define __aarch64_vdupq_lane_s64(__a, __b) (vdupq_n_s64 (__a)) +#define __aarch64_vdupq_lane_u8(__a, __b) \ + __aarch64_vdup_lane_any (u8, q, , __a, __b) +#define __aarch64_vdupq_lane_u16(__a, __b) \ + __aarch64_vdup_lane_any (u16, q, , __a, __b) +#define __aarch64_vdupq_lane_u32(__a, __b) \ + __aarch64_vdup_lane_any (u32, q, , __a, __b) +#define __aarch64_vdupq_lane_u64(__a, __b) (vdupq_n_u64 (__a)) + +/* __aarch64_vdupq_laneq internal macros. */ +#define __aarch64_vdupq_laneq_f32(__a, __b) \ + __aarch64_vdup_lane_any (f32, q, q, __a, __b) +#define __aarch64_vdupq_laneq_f64(__a, __b) \ + __aarch64_vdup_lane_any (f64, q, q, __a, __b) +#define __aarch64_vdupq_laneq_p8(__a, __b) \ + __aarch64_vdup_lane_any (p8, q, q, __a, __b) +#define __aarch64_vdupq_laneq_p16(__a, __b) \ + __aarch64_vdup_lane_any (p16, q, q, __a, __b) +#define __aarch64_vdupq_laneq_s8(__a, __b) \ + __aarch64_vdup_lane_any (s8, q, q, __a, __b) +#define __aarch64_vdupq_laneq_s16(__a, __b) \ + __aarch64_vdup_lane_any (s16, q, q, __a, __b) +#define __aarch64_vdupq_laneq_s32(__a, __b) \ + __aarch64_vdup_lane_any (s32, q, q, __a, __b) +#define __aarch64_vdupq_laneq_s64(__a, __b) \ + __aarch64_vdup_lane_any (s64, q, q, __a, __b) +#define __aarch64_vdupq_laneq_u8(__a, __b) \ + __aarch64_vdup_lane_any (u8, q, q, __a, __b) +#define __aarch64_vdupq_laneq_u16(__a, __b) \ + __aarch64_vdup_lane_any (u16, q, q, __a, __b) +#define __aarch64_vdupq_laneq_u32(__a, __b) \ + __aarch64_vdup_lane_any (u32, q, q, __a, __b) +#define __aarch64_vdupq_laneq_u64(__a, __b) \ + __aarch64_vdup_lane_any (u64, q, q, __a, __b) + +/* vadd */ __extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) vadd_s8 (int8x8_t __a, int8x8_t __b) { @@ -2307,155 +2470,156 @@ vcreate_p16 (uint64_t __a) return (poly16x4_t) __a; } +/* vget_lane */ + +__extension__ static __inline float32_t __attribute__ ((__always_inline__)) +vget_lane_f32 (float32x2_t __a, const int __b) +{ + return __aarch64_vget_lane_f32 (__a, __b); +} + +__extension__ static __inline float64_t __attribute__ ((__always_inline__)) +vget_lane_f64 (float64x1_t __a, const int __b) +{ + return __aarch64_vget_lane_f64 (__a, __b); +} + +__extension__ static __inline poly8_t __attribute__ ((__always_inline__)) +vget_lane_p8 (poly8x8_t __a, const int __b) +{ + return __aarch64_vget_lane_p8 (__a, __b); +} + +__extension__ static __inline poly16_t __attribute__ ((__always_inline__)) +vget_lane_p16 (poly16x4_t __a, const int __b) +{ + return __aarch64_vget_lane_p16 (__a, __b); +} + __extension__ static __inline int8_t __attribute__ ((__always_inline__)) vget_lane_s8 (int8x8_t __a, const int __b) { - return (int8_t) __builtin_aarch64_get_lane_signedv8qi (__a, __b); + return __aarch64_vget_lane_s8 (__a, __b); } __extension__ static __inline int16_t __attribute__ ((__always_inline__)) vget_lane_s16 (int16x4_t __a, const int __b) { - return (int16_t) __builtin_aarch64_get_lane_signedv4hi (__a, __b); + return __aarch64_vget_lane_s16 (__a, __b); } __extension__ static __inline int32_t __attribute__ ((__always_inline__)) vget_lane_s32 (int32x2_t __a, const int __b) { - return (int32_t) __builtin_aarch64_get_lane_signedv2si (__a, __b); + return __aarch64_vget_lane_s32 (__a, __b); } -__extension__ static __inline float32_t __attribute__ ((__always_inline__)) -vget_lane_f32 (float32x2_t __a, const int __b) +__extension__ static __inline int64_t __attribute__ ((__always_inline__)) +vget_lane_s64 (int64x1_t __a, const int __b) { - return (float32_t) __builtin_aarch64_get_lanev2sf (__a, __b); + return __aarch64_vget_lane_s64 (__a, __b); } __extension__ static __inline uint8_t __attribute__ ((__always_inline__)) vget_lane_u8 (uint8x8_t __a, const int __b) { - return (uint8_t) __builtin_aarch64_get_lane_unsignedv8qi ((int8x8_t) __a, - __b); + return __aarch64_vget_lane_u8 (__a, __b); } __extension__ static __inline uint16_t __attribute__ ((__always_inline__)) vget_lane_u16 (uint16x4_t __a, const int __b) { - return (uint16_t) __builtin_aarch64_get_lane_unsignedv4hi ((int16x4_t) __a, - __b); + return __aarch64_vget_lane_u16 (__a, __b); } __extension__ static __inline uint32_t __attribute__ ((__always_inline__)) vget_lane_u32 (uint32x2_t __a, const int __b) { - return (uint32_t) __builtin_aarch64_get_lane_unsignedv2si ((int32x2_t) __a, - __b); + return __aarch64_vget_lane_u32 (__a, __b); } -__extension__ static __inline poly8_t __attribute__ ((__always_inline__)) -vget_lane_p8 (poly8x8_t __a, const int __b) +__extension__ static __inline uint64_t __attribute__ ((__always_inline__)) +vget_lane_u64 (uint64x1_t __a, const int __b) { - return (poly8_t) __builtin_aarch64_get_lane_unsignedv8qi ((int8x8_t) __a, - __b); + return __aarch64_vget_lane_u64 (__a, __b); } -__extension__ static __inline poly16_t __attribute__ ((__always_inline__)) -vget_lane_p16 (poly16x4_t __a, const int __b) +/* vgetq_lane */ + +__extension__ static __inline float32_t __attribute__ ((__always_inline__)) +vgetq_lane_f32 (float32x4_t __a, const int __b) { - return (poly16_t) __builtin_aarch64_get_lane_unsignedv4hi ((int16x4_t) __a, - __b); + return __aarch64_vgetq_lane_f32 (__a, __b); } -__extension__ static __inline int64_t __attribute__ ((__always_inline__)) -vget_lane_s64 (int64x1_t __a, const int __b) +__extension__ static __inline float64_t __attribute__ ((__always_inline__)) +vgetq_lane_f64 (float64x2_t __a, const int __b) { - return (int64_t) __builtin_aarch64_get_lanedi (__a, __b); + return __aarch64_vgetq_lane_f64 (__a, __b); } -__extension__ static __inline uint64_t __attribute__ ((__always_inline__)) -vget_lane_u64 (uint64x1_t __a, const int __b) +__extension__ static __inline poly8_t __attribute__ ((__always_inline__)) +vgetq_lane_p8 (poly8x16_t __a, const int __b) +{ + return __aarch64_vgetq_lane_p8 (__a, __b); +} + +__extension__ static __inline poly16_t __attribute__ ((__always_inline__)) +vgetq_lane_p16 (poly16x8_t __a, const int __b) { - return (uint64_t) __builtin_aarch64_get_lanedi ((int64x1_t) __a, __b); + return __aarch64_vgetq_lane_p16 (__a, __b); } __extension__ static __inline int8_t __attribute__ ((__always_inline__)) vgetq_lane_s8 (int8x16_t __a, const int __b) { - return (int8_t) __builtin_aarch64_get_lane_signedv16qi (__a, __b); + return __aarch64_vgetq_lane_s8 (__a, __b); } __extension__ static __inline int16_t __attribute__ ((__always_inline__)) vgetq_lane_s16 (int16x8_t __a, const int __b) { - return (int16_t) __builtin_aarch64_get_lane_signedv8hi (__a, __b); + return __aarch64_vgetq_lane_s16 (__a, __b); } __extension__ static __inline int32_t __attribute__ ((__always_inline__)) vgetq_lane_s32 (int32x4_t __a, const int __b) { - return (int32_t) __builtin_aarch64_get_lane_signedv4si (__a, __b); + return __aarch64_vgetq_lane_s32 (__a, __b); } -__extension__ static __inline float32_t __attribute__ ((__always_inline__)) -vgetq_lane_f32 (float32x4_t __a, const int __b) -{ - return (float32_t) __builtin_aarch64_get_lanev4sf (__a, __b); -} - -__extension__ static __inline float64_t __attribute__ ((__always_inline__)) -vgetq_lane_f64 (float64x2_t __a, const int __b) +__extension__ static __inline int64_t __attribute__ ((__always_inline__)) +vgetq_lane_s64 (int64x2_t __a, const int __b) { - return (float64_t) __builtin_aarch64_get_lanev2df (__a, __b); + return __aarch64_vgetq_lane_s64 (__a, __b); } __extension__ static __inline uint8_t __attribute__ ((__always_inline__)) vgetq_lane_u8 (uint8x16_t __a, const int __b) { - return (uint8_t) __builtin_aarch64_get_lane_unsignedv16qi ((int8x16_t) __a, - __b); + return __aarch64_vgetq_lane_u8 (__a, __b); } __extension__ static __inline uint16_t __attribute__ ((__always_inline__)) vgetq_lane_u16 (uint16x8_t __a, const int __b) { - return (uint16_t) __builtin_aarch64_get_lane_unsignedv8hi ((int16x8_t) __a, - __b); + return __aarch64_vgetq_lane_u16 (__a, __b); } __extension__ static __inline uint32_t __attribute__ ((__always_inline__)) vgetq_lane_u32 (uint32x4_t __a, const int __b) { - return (uint32_t) __builtin_aarch64_get_lane_unsignedv4si ((int32x4_t) __a, - __b); -} - -__extension__ static __inline poly8_t __attribute__ ((__always_inline__)) -vgetq_lane_p8 (poly8x16_t __a, const int __b) -{ - return (poly8_t) __builtin_aarch64_get_lane_unsignedv16qi ((int8x16_t) __a, - __b); -} - -__extension__ static __inline poly16_t __attribute__ ((__always_inline__)) -vgetq_lane_p16 (poly16x8_t __a, const int __b) -{ - return (poly16_t) __builtin_aarch64_get_lane_unsignedv8hi ((int16x8_t) __a, - __b); -} - -__extension__ static __inline int64_t __attribute__ ((__always_inline__)) -vgetq_lane_s64 (int64x2_t __a, const int __b) -{ - return __builtin_aarch64_get_lane_unsignedv2di (__a, __b); + return __aarch64_vgetq_lane_u32 (__a, __b); } __extension__ static __inline uint64_t __attribute__ ((__always_inline__)) vgetq_lane_u64 (uint64x2_t __a, const int __b) { - return (uint64_t) __builtin_aarch64_get_lane_unsignedv2di ((int64x2_t) __a, - __b); + return __aarch64_vgetq_lane_u64 (__a, __b); } +/* vreinterpret */ + __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) vreinterpret_p8_s8 (int8x8_t __a) { @@ -3805,6 +3969,85 @@ vreinterpretq_u32_p16 (poly16x8_t __a) return (uint32x4_t) __builtin_aarch64_reinterpretv4siv8hi ((int16x8_t) __a); } +#define __GET_LOW(__TYPE) \ + uint64x2_t tmp = vreinterpretq_u64_##__TYPE (__a); \ + uint64_t lo = vgetq_lane_u64 (tmp, 0); \ + return vreinterpret_##__TYPE##_u64 (lo); + +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vget_low_f32 (float32x4_t __a) +{ + __GET_LOW (f32); +} + +__extension__ static __inline float64x1_t __attribute__ ((__always_inline__)) +vget_low_f64 (float64x2_t __a) +{ + return vgetq_lane_f64 (__a, 0); +} + +__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) +vget_low_p8 (poly8x16_t __a) +{ + __GET_LOW (p8); +} + +__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) +vget_low_p16 (poly16x8_t __a) +{ + __GET_LOW (p16); +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vget_low_s8 (int8x16_t __a) +{ + __GET_LOW (s8); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vget_low_s16 (int16x8_t __a) +{ + __GET_LOW (s16); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vget_low_s32 (int32x4_t __a) +{ + __GET_LOW (s32); +} + +__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) +vget_low_s64 (int64x2_t __a) +{ + return vgetq_lane_s64 (__a, 0); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vget_low_u8 (uint8x16_t __a) +{ + __GET_LOW (u8); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vget_low_u16 (uint16x8_t __a) +{ + __GET_LOW (u16); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vget_low_u32 (uint32x4_t __a) +{ + __GET_LOW (u32); +} + +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vget_low_u64 (uint64x2_t __a) +{ + return vgetq_lane_u64 (__a, 0); +} + +#undef __GET_LOW + __extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) vcombine_s8 (int8x8_t __a, int8x8_t __b) { @@ -5534,2169 +5777,2370 @@ vcvtxd_f32_f64 (float64_t a) return result; } -#define vdup_lane_f32(a, b) \ +#define vext_f32(a, b, c) \ __extension__ \ ({ \ + float32x2_t b_ = (b); \ float32x2_t a_ = (a); \ float32x2_t result; \ - __asm__ ("dup %0.2s,%1.s[%2]" \ + __asm__ ("ext %0.8b, %1.8b, %2.8b, #%3*4" \ : "=w"(result) \ - : "w"(a_), "i"(b) \ + : "w"(a_), "w"(b_), "i"(c) \ : /* No clobbers */); \ result; \ }) -#define vdup_lane_p8(a, b) \ +#define vext_f64(a, b, c) \ + __extension__ \ + ({ \ + float64x1_t b_ = (b); \ + float64x1_t a_ = (a); \ + float64x1_t result; \ + __asm__ ("ext %0.8b, %1.8b, %2.8b, #%3*8" \ + : "=w"(result) \ + : "w"(a_), "w"(b_), "i"(c) \ + : /* No clobbers */); \ + result; \ + }) + +#define vext_p8(a, b, c) \ __extension__ \ ({ \ + poly8x8_t b_ = (b); \ poly8x8_t a_ = (a); \ poly8x8_t result; \ - __asm__ ("dup %0.8b,%1.b[%2]" \ + __asm__ ("ext %0.8b,%1.8b,%2.8b,%3" \ : "=w"(result) \ - : "w"(a_), "i"(b) \ + : "w"(a_), "w"(b_), "i"(c) \ : /* No clobbers */); \ result; \ }) -#define vdup_lane_p16(a, b) \ +#define vext_p16(a, b, c) \ __extension__ \ ({ \ + poly16x4_t b_ = (b); \ poly16x4_t a_ = (a); \ poly16x4_t result; \ - __asm__ ("dup %0.4h,%1.h[%2]" \ + __asm__ ("ext %0.8b, %1.8b, %2.8b, #%3*2" \ : "=w"(result) \ - : "w"(a_), "i"(b) \ + : "w"(a_), "w"(b_), "i"(c) \ : /* No clobbers */); \ result; \ }) -#define vdup_lane_s8(a, b) \ +#define vext_s8(a, b, c) \ __extension__ \ ({ \ + int8x8_t b_ = (b); \ int8x8_t a_ = (a); \ int8x8_t result; \ - __asm__ ("dup %0.8b,%1.b[%2]" \ + __asm__ ("ext %0.8b,%1.8b,%2.8b,%3" \ : "=w"(result) \ - : "w"(a_), "i"(b) \ + : "w"(a_), "w"(b_), "i"(c) \ : /* No clobbers */); \ result; \ }) -#define vdup_lane_s16(a, b) \ +#define vext_s16(a, b, c) \ __extension__ \ ({ \ + int16x4_t b_ = (b); \ int16x4_t a_ = (a); \ int16x4_t result; \ - __asm__ ("dup %0.4h,%1.h[%2]" \ + __asm__ ("ext %0.8b, %1.8b, %2.8b, #%3*2" \ : "=w"(result) \ - : "w"(a_), "i"(b) \ + : "w"(a_), "w"(b_), "i"(c) \ : /* No clobbers */); \ result; \ }) -#define vdup_lane_s32(a, b) \ +#define vext_s32(a, b, c) \ __extension__ \ ({ \ + int32x2_t b_ = (b); \ int32x2_t a_ = (a); \ int32x2_t result; \ - __asm__ ("dup %0.2s,%1.s[%2]" \ + __asm__ ("ext %0.8b, %1.8b, %2.8b, #%3*4" \ : "=w"(result) \ - : "w"(a_), "i"(b) \ + : "w"(a_), "w"(b_), "i"(c) \ : /* No clobbers */); \ result; \ }) -#define vdup_lane_s64(a, b) \ +#define vext_s64(a, b, c) \ __extension__ \ ({ \ + int64x1_t b_ = (b); \ int64x1_t a_ = (a); \ int64x1_t result; \ - __asm__ ("ins %0.d[0],%1.d[%2]" \ + __asm__ ("ext %0.8b, %1.8b, %2.8b, #%3*8" \ : "=w"(result) \ - : "w"(a_), "i"(b) \ + : "w"(a_), "w"(b_), "i"(c) \ : /* No clobbers */); \ result; \ }) -#define vdup_lane_u8(a, b) \ +#define vext_u8(a, b, c) \ __extension__ \ ({ \ + uint8x8_t b_ = (b); \ uint8x8_t a_ = (a); \ uint8x8_t result; \ - __asm__ ("dup %0.8b,%1.b[%2]" \ + __asm__ ("ext %0.8b,%1.8b,%2.8b,%3" \ : "=w"(result) \ - : "w"(a_), "i"(b) \ + : "w"(a_), "w"(b_), "i"(c) \ : /* No clobbers */); \ result; \ }) -#define vdup_lane_u16(a, b) \ +#define vext_u16(a, b, c) \ __extension__ \ ({ \ + uint16x4_t b_ = (b); \ uint16x4_t a_ = (a); \ uint16x4_t result; \ - __asm__ ("dup %0.4h,%1.h[%2]" \ + __asm__ ("ext %0.8b, %1.8b, %2.8b, #%3*2" \ : "=w"(result) \ - : "w"(a_), "i"(b) \ + : "w"(a_), "w"(b_), "i"(c) \ : /* No clobbers */); \ result; \ }) -#define vdup_lane_u32(a, b) \ +#define vext_u32(a, b, c) \ __extension__ \ ({ \ + uint32x2_t b_ = (b); \ uint32x2_t a_ = (a); \ uint32x2_t result; \ - __asm__ ("dup %0.2s,%1.s[%2]" \ + __asm__ ("ext %0.8b, %1.8b, %2.8b, #%3*4" \ : "=w"(result) \ - : "w"(a_), "i"(b) \ + : "w"(a_), "w"(b_), "i"(c) \ : /* No clobbers */); \ result; \ }) -#define vdup_lane_u64(a, b) \ +#define vext_u64(a, b, c) \ __extension__ \ ({ \ + uint64x1_t b_ = (b); \ uint64x1_t a_ = (a); \ uint64x1_t result; \ - __asm__ ("ins %0.d[0],%1.d[%2]" \ + __asm__ ("ext %0.8b, %1.8b, %2.8b, #%3*8" \ : "=w"(result) \ - : "w"(a_), "i"(b) \ + : "w"(a_), "w"(b_), "i"(c) \ : /* No clobbers */); \ result; \ }) -__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) -vdup_n_f32 (float32_t a) -{ - float32x2_t result; - __asm__ ("dup %0.2s, %w1" - : "=w"(result) - : "r"(a) - : /* No clobbers */); - return result; -} +#define vextq_f32(a, b, c) \ + __extension__ \ + ({ \ + float32x4_t b_ = (b); \ + float32x4_t a_ = (a); \ + float32x4_t result; \ + __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3*4" \ + : "=w"(result) \ + : "w"(a_), "w"(b_), "i"(c) \ + : /* No clobbers */); \ + result; \ + }) -__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) -vdup_n_p8 (uint32_t a) -{ - poly8x8_t result; - __asm__ ("dup %0.8b,%w1" - : "=w"(result) - : "r"(a) - : /* No clobbers */); - return result; -} +#define vextq_f64(a, b, c) \ + __extension__ \ + ({ \ + float64x2_t b_ = (b); \ + float64x2_t a_ = (a); \ + float64x2_t result; \ + __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3*8" \ + : "=w"(result) \ + : "w"(a_), "w"(b_), "i"(c) \ + : /* No clobbers */); \ + result; \ + }) -__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) -vdup_n_p16 (uint32_t a) -{ - poly16x4_t result; - __asm__ ("dup %0.4h,%w1" - : "=w"(result) - : "r"(a) - : /* No clobbers */); - return result; -} +#define vextq_p8(a, b, c) \ + __extension__ \ + ({ \ + poly8x16_t b_ = (b); \ + poly8x16_t a_ = (a); \ + poly8x16_t result; \ + __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3" \ + : "=w"(result) \ + : "w"(a_), "w"(b_), "i"(c) \ + : /* No clobbers */); \ + result; \ + }) -__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) -vdup_n_s8 (int32_t a) -{ - int8x8_t result; - __asm__ ("dup %0.8b,%w1" - : "=w"(result) - : "r"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) -vdup_n_s16 (int32_t a) -{ - int16x4_t result; - __asm__ ("dup %0.4h,%w1" - : "=w"(result) - : "r"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) -vdup_n_s32 (int32_t a) -{ - int32x2_t result; - __asm__ ("dup %0.2s,%w1" - : "=w"(result) - : "r"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) -vdup_n_s64 (int64_t a) -{ - int64x1_t result; - __asm__ ("ins %0.d[0],%x1" - : "=w"(result) - : "r"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) -vdup_n_u8 (uint32_t a) -{ - uint8x8_t result; - __asm__ ("dup %0.8b,%w1" - : "=w"(result) - : "r"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) -vdup_n_u16 (uint32_t a) -{ - uint16x4_t result; - __asm__ ("dup %0.4h,%w1" - : "=w"(result) - : "r"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) -vdup_n_u32 (uint32_t a) -{ - uint32x2_t result; - __asm__ ("dup %0.2s,%w1" - : "=w"(result) - : "r"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) -vdup_n_u64 (uint64_t a) -{ - uint64x1_t result; - __asm__ ("ins %0.d[0],%x1" - : "=w"(result) - : "r"(a) - : /* No clobbers */); - return result; -} +#define vextq_p16(a, b, c) \ + __extension__ \ + ({ \ + poly16x8_t b_ = (b); \ + poly16x8_t a_ = (a); \ + poly16x8_t result; \ + __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3*2" \ + : "=w"(result) \ + : "w"(a_), "w"(b_), "i"(c) \ + : /* No clobbers */); \ + result; \ + }) -#define vdupd_lane_f64(a, b) \ +#define vextq_s8(a, b, c) \ __extension__ \ ({ \ - float64x2_t a_ = (a); \ - float64_t result; \ - __asm__ ("dup %d0, %1.d[%2]" \ + int8x16_t b_ = (b); \ + int8x16_t a_ = (a); \ + int8x16_t result; \ + __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3" \ : "=w"(result) \ - : "w"(a_), "i"(b) \ + : "w"(a_), "w"(b_), "i"(c) \ : /* No clobbers */); \ result; \ }) -#define vdupq_lane_f32(a, b) \ +#define vextq_s16(a, b, c) \ __extension__ \ ({ \ - float32x2_t a_ = (a); \ - float32x4_t result; \ - __asm__ ("dup %0.4s,%1.s[%2]" \ + int16x8_t b_ = (b); \ + int16x8_t a_ = (a); \ + int16x8_t result; \ + __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3*2" \ : "=w"(result) \ - : "w"(a_), "i"(b) \ + : "w"(a_), "w"(b_), "i"(c) \ : /* No clobbers */); \ result; \ }) -#define vdupq_lane_f64(a, b) \ +#define vextq_s32(a, b, c) \ __extension__ \ ({ \ - float64x1_t a_ = (a); \ - float64x2_t result; \ - __asm__ ("dup %0.2d,%1.d[%2]" \ + int32x4_t b_ = (b); \ + int32x4_t a_ = (a); \ + int32x4_t result; \ + __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3*4" \ : "=w"(result) \ - : "w"(a_), "i"(b) \ + : "w"(a_), "w"(b_), "i"(c) \ : /* No clobbers */); \ result; \ }) -#define vdupq_lane_p8(a, b) \ +#define vextq_s64(a, b, c) \ __extension__ \ ({ \ - poly8x8_t a_ = (a); \ - poly8x16_t result; \ - __asm__ ("dup %0.16b,%1.b[%2]" \ + int64x2_t b_ = (b); \ + int64x2_t a_ = (a); \ + int64x2_t result; \ + __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3*8" \ : "=w"(result) \ - : "w"(a_), "i"(b) \ + : "w"(a_), "w"(b_), "i"(c) \ : /* No clobbers */); \ result; \ }) -#define vdupq_lane_p16(a, b) \ +#define vextq_u8(a, b, c) \ __extension__ \ ({ \ - poly16x4_t a_ = (a); \ - poly16x8_t result; \ - __asm__ ("dup %0.8h,%1.h[%2]" \ + uint8x16_t b_ = (b); \ + uint8x16_t a_ = (a); \ + uint8x16_t result; \ + __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3" \ : "=w"(result) \ - : "w"(a_), "i"(b) \ + : "w"(a_), "w"(b_), "i"(c) \ : /* No clobbers */); \ result; \ }) -#define vdupq_lane_s8(a, b) \ +#define vextq_u16(a, b, c) \ __extension__ \ ({ \ - int8x8_t a_ = (a); \ - int8x16_t result; \ - __asm__ ("dup %0.16b,%1.b[%2]" \ + uint16x8_t b_ = (b); \ + uint16x8_t a_ = (a); \ + uint16x8_t result; \ + __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3*2" \ : "=w"(result) \ - : "w"(a_), "i"(b) \ + : "w"(a_), "w"(b_), "i"(c) \ : /* No clobbers */); \ result; \ }) -#define vdupq_lane_s16(a, b) \ +#define vextq_u32(a, b, c) \ __extension__ \ ({ \ - int16x4_t a_ = (a); \ - int16x8_t result; \ - __asm__ ("dup %0.8h,%1.h[%2]" \ + uint32x4_t b_ = (b); \ + uint32x4_t a_ = (a); \ + uint32x4_t result; \ + __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3*4" \ : "=w"(result) \ - : "w"(a_), "i"(b) \ + : "w"(a_), "w"(b_), "i"(c) \ : /* No clobbers */); \ result; \ }) -#define vdupq_lane_s32(a, b) \ +#define vextq_u64(a, b, c) \ __extension__ \ ({ \ - int32x2_t a_ = (a); \ - int32x4_t result; \ - __asm__ ("dup %0.4s,%1.s[%2]" \ + uint64x2_t b_ = (b); \ + uint64x2_t a_ = (a); \ + uint64x2_t result; \ + __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3*8" \ : "=w"(result) \ - : "w"(a_), "i"(b) \ + : "w"(a_), "w"(b_), "i"(c) \ : /* No clobbers */); \ result; \ }) -#define vdupq_lane_s64(a, b) \ +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vfma_f32 (float32x2_t a, float32x2_t b, float32x2_t c) +{ + float32x2_t result; + __asm__ ("fmla %0.2s,%2.2s,%3.2s" + : "=w"(result) + : "0"(a), "w"(b), "w"(c) + : /* No clobbers */); + return result; +} + +#define vfma_lane_f32(a, b, c, d) \ __extension__ \ ({ \ - int64x1_t a_ = (a); \ - int64x2_t result; \ - __asm__ ("dup %0.2d,%1.d[%2]" \ + float32x2_t c_ = (c); \ + float32x2_t b_ = (b); \ + float32x2_t a_ = (a); \ + float32x2_t result; \ + __asm__ ("fmla %0.2s,%2.2s,%3.s[%4]" \ : "=w"(result) \ - : "w"(a_), "i"(b) \ + : "0"(a_), "w"(b_), "w"(c_), "i"(d) \ : /* No clobbers */); \ result; \ }) -#define vdupq_lane_u8(a, b) \ +#define vfmad_lane_f64(a, b, c) \ __extension__ \ ({ \ - uint8x8_t a_ = (a); \ - uint8x16_t result; \ - __asm__ ("dup %0.16b,%1.b[%2]" \ + float64x2_t b_ = (b); \ + float64_t a_ = (a); \ + float64_t result; \ + __asm__ ("fmla %d0,%d1,%2.d[%3]" \ : "=w"(result) \ - : "w"(a_), "i"(b) \ + : "w"(a_), "w"(b_), "i"(c) \ : /* No clobbers */); \ result; \ }) -#define vdupq_lane_u16(a, b) \ +__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +vfmaq_f32 (float32x4_t a, float32x4_t b, float32x4_t c) +{ + float32x4_t result; + __asm__ ("fmla %0.4s,%2.4s,%3.4s" + : "=w"(result) + : "0"(a), "w"(b), "w"(c) + : /* No clobbers */); + return result; +} + +__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) +vfmaq_f64 (float64x2_t a, float64x2_t b, float64x2_t c) +{ + float64x2_t result; + __asm__ ("fmla %0.2d,%2.2d,%3.2d" + : "=w"(result) + : "0"(a), "w"(b), "w"(c) + : /* No clobbers */); + return result; +} + +#define vfmaq_lane_f32(a, b, c, d) \ __extension__ \ ({ \ - uint16x4_t a_ = (a); \ - uint16x8_t result; \ - __asm__ ("dup %0.8h,%1.h[%2]" \ + float32x4_t c_ = (c); \ + float32x4_t b_ = (b); \ + float32x4_t a_ = (a); \ + float32x4_t result; \ + __asm__ ("fmla %0.4s,%2.4s,%3.s[%4]" \ : "=w"(result) \ - : "w"(a_), "i"(b) \ + : "0"(a_), "w"(b_), "w"(c_), "i"(d) \ : /* No clobbers */); \ result; \ }) -#define vdupq_lane_u32(a, b) \ +#define vfmaq_lane_f64(a, b, c, d) \ __extension__ \ ({ \ - uint32x2_t a_ = (a); \ - uint32x4_t result; \ - __asm__ ("dup %0.4s,%1.s[%2]" \ + float64x2_t c_ = (c); \ + float64x2_t b_ = (b); \ + float64x2_t a_ = (a); \ + float64x2_t result; \ + __asm__ ("fmla %0.2d,%2.2d,%3.d[%4]" \ : "=w"(result) \ - : "w"(a_), "i"(b) \ + : "0"(a_), "w"(b_), "w"(c_), "i"(d) \ : /* No clobbers */); \ result; \ }) -#define vdupq_lane_u64(a, b) \ +#define vfmas_lane_f32(a, b, c) \ __extension__ \ ({ \ - uint64x1_t a_ = (a); \ - uint64x2_t result; \ - __asm__ ("dup %0.2d,%1.d[%2]" \ + float32x4_t b_ = (b); \ + float32_t a_ = (a); \ + float32_t result; \ + __asm__ ("fmla %s0,%s1,%2.s[%3]" \ : "=w"(result) \ - : "w"(a_), "i"(b) \ + : "w"(a_), "w"(b_), "i"(c) \ : /* No clobbers */); \ result; \ }) -__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) -vdupq_n_f32 (float32_t a) +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vfma_n_f32 (float32x2_t a, float32x2_t b, float32_t c) { - float32x4_t result; - __asm__ ("dup %0.4s, %w1" + float32x2_t result; + __asm__ ("fmla %0.2s, %2.2s, %3.s[0]" : "=w"(result) - : "r"(a) + : "0"(a), "w"(b), "w"(c) : /* No clobbers */); return result; } -__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) -vdupq_n_f64 (float64_t a) +__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +vfmaq_n_f32 (float32x4_t a, float32x4_t b, float32_t c) { - float64x2_t result; - __asm__ ("dup %0.2d, %x1" - : "=w"(result) - : "r"(a) + float32x4_t result; + __asm__ ("fmla %0.4s, %2.4s, %3.s[0]" + : "=w"(result) + : "0"(a), "w"(b), "w"(c) : /* No clobbers */); return result; } -__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) -vdupq_n_p8 (uint32_t a) +__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) +vfmaq_n_f64 (float64x2_t a, float64x2_t b, float64_t c) { - poly8x16_t result; - __asm__ ("dup %0.16b,%w1" + float64x2_t result; + __asm__ ("fmla %0.2d, %2.2d, %3.d[0]" : "=w"(result) - : "r"(a) + : "0"(a), "w"(b), "w"(c) : /* No clobbers */); return result; } -__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) -vdupq_n_p16 (uint32_t a) +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vfms_f32 (float32x2_t a, float32x2_t b, float32x2_t c) { - poly16x8_t result; - __asm__ ("dup %0.8h,%w1" + float32x2_t result; + __asm__ ("fmls %0.2s,%2.2s,%3.2s" : "=w"(result) - : "r"(a) + : "0"(a), "w"(b), "w"(c) : /* No clobbers */); return result; } -__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) -vdupq_n_s8 (int32_t a) +#define vfmsd_lane_f64(a, b, c) \ + __extension__ \ + ({ \ + float64x2_t b_ = (b); \ + float64_t a_ = (a); \ + float64_t result; \ + __asm__ ("fmls %d0,%d1,%2.d[%3]" \ + : "=w"(result) \ + : "w"(a_), "w"(b_), "i"(c) \ + : /* No clobbers */); \ + result; \ + }) + +__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +vfmsq_f32 (float32x4_t a, float32x4_t b, float32x4_t c) { - int8x16_t result; - __asm__ ("dup %0.16b,%w1" + float32x4_t result; + __asm__ ("fmls %0.4s,%2.4s,%3.4s" : "=w"(result) - : "r"(a) + : "0"(a), "w"(b), "w"(c) : /* No clobbers */); return result; } -__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) -vdupq_n_s16 (int32_t a) +__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) +vfmsq_f64 (float64x2_t a, float64x2_t b, float64x2_t c) { - int16x8_t result; - __asm__ ("dup %0.8h,%w1" + float64x2_t result; + __asm__ ("fmls %0.2d,%2.2d,%3.2d" : "=w"(result) - : "r"(a) + : "0"(a), "w"(b), "w"(c) : /* No clobbers */); return result; } -__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) -vdupq_n_s32 (int32_t a) +#define vfmss_lane_f32(a, b, c) \ + __extension__ \ + ({ \ + float32x4_t b_ = (b); \ + float32_t a_ = (a); \ + float32_t result; \ + __asm__ ("fmls %s0,%s1,%2.s[%3]" \ + : "=w"(result) \ + : "w"(a_), "w"(b_), "i"(c) \ + : /* No clobbers */); \ + result; \ + }) + +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vget_high_f32 (float32x4_t a) { - int32x4_t result; - __asm__ ("dup %0.4s,%w1" + float32x2_t result; + __asm__ ("ins %0.d[0], %1.d[1]" : "=w"(result) - : "r"(a) + : "w"(a) : /* No clobbers */); return result; } -__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) -vdupq_n_s64 (int64_t a) +__extension__ static __inline float64x1_t __attribute__ ((__always_inline__)) +vget_high_f64 (float64x2_t a) { - int64x2_t result; - __asm__ ("dup %0.2d,%x1" + float64x1_t result; + __asm__ ("ins %0.d[0], %1.d[1]" : "=w"(result) - : "r"(a) + : "w"(a) : /* No clobbers */); return result; } -__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) -vdupq_n_u8 (uint32_t a) +__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) +vget_high_p8 (poly8x16_t a) { - uint8x16_t result; - __asm__ ("dup %0.16b,%w1" + poly8x8_t result; + __asm__ ("ins %0.d[0], %1.d[1]" : "=w"(result) - : "r"(a) + : "w"(a) : /* No clobbers */); return result; } -__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) -vdupq_n_u16 (uint32_t a) +__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) +vget_high_p16 (poly16x8_t a) { - uint16x8_t result; - __asm__ ("dup %0.8h,%w1" + poly16x4_t result; + __asm__ ("ins %0.d[0], %1.d[1]" : "=w"(result) - : "r"(a) + : "w"(a) : /* No clobbers */); return result; } -__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -vdupq_n_u32 (uint32_t a) +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vget_high_s8 (int8x16_t a) { - uint32x4_t result; - __asm__ ("dup %0.4s,%w1" + int8x8_t result; + __asm__ ("ins %0.d[0], %1.d[1]" : "=w"(result) - : "r"(a) + : "w"(a) : /* No clobbers */); return result; } -__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) -vdupq_n_u64 (uint64_t a) +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vget_high_s16 (int16x8_t a) { - uint64x2_t result; - __asm__ ("dup %0.2d,%x1" + int16x4_t result; + __asm__ ("ins %0.d[0], %1.d[1]" : "=w"(result) - : "r"(a) + : "w"(a) : /* No clobbers */); return result; } -#define vdups_lane_f32(a, b) \ - __extension__ \ - ({ \ - float32x4_t a_ = (a); \ - float32_t result; \ - __asm__ ("dup %s0, %1.s[%2]" \ - : "=w"(result) \ - : "w"(a_), "i"(b) \ - : /* No clobbers */); \ - result; \ - }) +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vget_high_s32 (int32x4_t a) +{ + int32x2_t result; + __asm__ ("ins %0.d[0], %1.d[1]" + : "=w"(result) + : "w"(a) + : /* No clobbers */); + return result; +} -#define vext_f32(a, b, c) \ - __extension__ \ - ({ \ - float32x2_t b_ = (b); \ - float32x2_t a_ = (a); \ - float32x2_t result; \ - __asm__ ("ext %0.8b, %1.8b, %2.8b, #%3*4" \ - : "=w"(result) \ - : "w"(a_), "w"(b_), "i"(c) \ - : /* No clobbers */); \ - result; \ - }) +__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) +vget_high_s64 (int64x2_t a) +{ + int64x1_t result; + __asm__ ("ins %0.d[0], %1.d[1]" + : "=w"(result) + : "w"(a) + : /* No clobbers */); + return result; +} -#define vext_f64(a, b, c) \ - __extension__ \ - ({ \ - float64x1_t b_ = (b); \ - float64x1_t a_ = (a); \ - float64x1_t result; \ - __asm__ ("ext %0.8b, %1.8b, %2.8b, #%3*8" \ - : "=w"(result) \ - : "w"(a_), "w"(b_), "i"(c) \ - : /* No clobbers */); \ - result; \ - }) +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vget_high_u8 (uint8x16_t a) +{ + uint8x8_t result; + __asm__ ("ins %0.d[0], %1.d[1]" + : "=w"(result) + : "w"(a) + : /* No clobbers */); + return result; +} -#define vext_p8(a, b, c) \ - __extension__ \ - ({ \ - poly8x8_t b_ = (b); \ - poly8x8_t a_ = (a); \ - poly8x8_t result; \ - __asm__ ("ext %0.8b,%1.8b,%2.8b,%3" \ - : "=w"(result) \ - : "w"(a_), "w"(b_), "i"(c) \ - : /* No clobbers */); \ - result; \ - }) +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vget_high_u16 (uint16x8_t a) +{ + uint16x4_t result; + __asm__ ("ins %0.d[0], %1.d[1]" + : "=w"(result) + : "w"(a) + : /* No clobbers */); + return result; +} -#define vext_p16(a, b, c) \ - __extension__ \ - ({ \ - poly16x4_t b_ = (b); \ - poly16x4_t a_ = (a); \ - poly16x4_t result; \ - __asm__ ("ext %0.8b, %1.8b, %2.8b, #%3*2" \ - : "=w"(result) \ - : "w"(a_), "w"(b_), "i"(c) \ - : /* No clobbers */); \ - result; \ - }) +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vget_high_u32 (uint32x4_t a) +{ + uint32x2_t result; + __asm__ ("ins %0.d[0], %1.d[1]" + : "=w"(result) + : "w"(a) + : /* No clobbers */); + return result; +} -#define vext_s8(a, b, c) \ - __extension__ \ - ({ \ - int8x8_t b_ = (b); \ - int8x8_t a_ = (a); \ - int8x8_t result; \ - __asm__ ("ext %0.8b,%1.8b,%2.8b,%3" \ - : "=w"(result) \ - : "w"(a_), "w"(b_), "i"(c) \ - : /* No clobbers */); \ - result; \ - }) +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vget_high_u64 (uint64x2_t a) +{ + uint64x1_t result; + __asm__ ("ins %0.d[0], %1.d[1]" + : "=w"(result) + : "w"(a) + : /* No clobbers */); + return result; +} -#define vext_s16(a, b, c) \ - __extension__ \ - ({ \ - int16x4_t b_ = (b); \ - int16x4_t a_ = (a); \ - int16x4_t result; \ - __asm__ ("ext %0.8b, %1.8b, %2.8b, #%3*2" \ - : "=w"(result) \ - : "w"(a_), "w"(b_), "i"(c) \ - : /* No clobbers */); \ - result; \ - }) - -#define vext_s32(a, b, c) \ - __extension__ \ - ({ \ - int32x2_t b_ = (b); \ - int32x2_t a_ = (a); \ - int32x2_t result; \ - __asm__ ("ext %0.8b, %1.8b, %2.8b, #%3*4" \ - : "=w"(result) \ - : "w"(a_), "w"(b_), "i"(c) \ - : /* No clobbers */); \ - result; \ - }) +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vhsub_s8 (int8x8_t a, int8x8_t b) +{ + int8x8_t result; + __asm__ ("shsub %0.8b, %1.8b, %2.8b" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} -#define vext_s64(a, b, c) \ - __extension__ \ - ({ \ - int64x1_t b_ = (b); \ - int64x1_t a_ = (a); \ - int64x1_t result; \ - __asm__ ("ext %0.8b, %1.8b, %2.8b, #%3*8" \ - : "=w"(result) \ - : "w"(a_), "w"(b_), "i"(c) \ - : /* No clobbers */); \ - result; \ - }) +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vhsub_s16 (int16x4_t a, int16x4_t b) +{ + int16x4_t result; + __asm__ ("shsub %0.4h, %1.4h, %2.4h" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} -#define vext_u8(a, b, c) \ - __extension__ \ - ({ \ - uint8x8_t b_ = (b); \ - uint8x8_t a_ = (a); \ - uint8x8_t result; \ - __asm__ ("ext %0.8b,%1.8b,%2.8b,%3" \ - : "=w"(result) \ - : "w"(a_), "w"(b_), "i"(c) \ - : /* No clobbers */); \ - result; \ - }) +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vhsub_s32 (int32x2_t a, int32x2_t b) +{ + int32x2_t result; + __asm__ ("shsub %0.2s, %1.2s, %2.2s" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} -#define vext_u16(a, b, c) \ - __extension__ \ - ({ \ - uint16x4_t b_ = (b); \ - uint16x4_t a_ = (a); \ - uint16x4_t result; \ - __asm__ ("ext %0.8b, %1.8b, %2.8b, #%3*2" \ - : "=w"(result) \ - : "w"(a_), "w"(b_), "i"(c) \ - : /* No clobbers */); \ - result; \ - }) +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vhsub_u8 (uint8x8_t a, uint8x8_t b) +{ + uint8x8_t result; + __asm__ ("uhsub %0.8b, %1.8b, %2.8b" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} -#define vext_u32(a, b, c) \ - __extension__ \ - ({ \ - uint32x2_t b_ = (b); \ - uint32x2_t a_ = (a); \ - uint32x2_t result; \ - __asm__ ("ext %0.8b, %1.8b, %2.8b, #%3*4" \ - : "=w"(result) \ - : "w"(a_), "w"(b_), "i"(c) \ - : /* No clobbers */); \ - result; \ - }) +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vhsub_u16 (uint16x4_t a, uint16x4_t b) +{ + uint16x4_t result; + __asm__ ("uhsub %0.4h, %1.4h, %2.4h" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} -#define vext_u64(a, b, c) \ - __extension__ \ - ({ \ - uint64x1_t b_ = (b); \ - uint64x1_t a_ = (a); \ - uint64x1_t result; \ - __asm__ ("ext %0.8b, %1.8b, %2.8b, #%3*8" \ - : "=w"(result) \ - : "w"(a_), "w"(b_), "i"(c) \ - : /* No clobbers */); \ - result; \ - }) +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vhsub_u32 (uint32x2_t a, uint32x2_t b) +{ + uint32x2_t result; + __asm__ ("uhsub %0.2s, %1.2s, %2.2s" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} -#define vextq_f32(a, b, c) \ - __extension__ \ - ({ \ - float32x4_t b_ = (b); \ - float32x4_t a_ = (a); \ - float32x4_t result; \ - __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3*4" \ - : "=w"(result) \ - : "w"(a_), "w"(b_), "i"(c) \ - : /* No clobbers */); \ - result; \ - }) +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vhsubq_s8 (int8x16_t a, int8x16_t b) +{ + int8x16_t result; + __asm__ ("shsub %0.16b, %1.16b, %2.16b" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} -#define vextq_f64(a, b, c) \ - __extension__ \ - ({ \ - float64x2_t b_ = (b); \ - float64x2_t a_ = (a); \ - float64x2_t result; \ - __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3*8" \ - : "=w"(result) \ - : "w"(a_), "w"(b_), "i"(c) \ - : /* No clobbers */); \ - result; \ - }) +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vhsubq_s16 (int16x8_t a, int16x8_t b) +{ + int16x8_t result; + __asm__ ("shsub %0.8h, %1.8h, %2.8h" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} -#define vextq_p8(a, b, c) \ - __extension__ \ - ({ \ - poly8x16_t b_ = (b); \ - poly8x16_t a_ = (a); \ - poly8x16_t result; \ - __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3" \ - : "=w"(result) \ - : "w"(a_), "w"(b_), "i"(c) \ - : /* No clobbers */); \ - result; \ - }) +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vhsubq_s32 (int32x4_t a, int32x4_t b) +{ + int32x4_t result; + __asm__ ("shsub %0.4s, %1.4s, %2.4s" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} -#define vextq_p16(a, b, c) \ - __extension__ \ - ({ \ - poly16x8_t b_ = (b); \ - poly16x8_t a_ = (a); \ - poly16x8_t result; \ - __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3*2" \ - : "=w"(result) \ - : "w"(a_), "w"(b_), "i"(c) \ - : /* No clobbers */); \ - result; \ - }) +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vhsubq_u8 (uint8x16_t a, uint8x16_t b) +{ + uint8x16_t result; + __asm__ ("uhsub %0.16b, %1.16b, %2.16b" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} -#define vextq_s8(a, b, c) \ - __extension__ \ - ({ \ - int8x16_t b_ = (b); \ - int8x16_t a_ = (a); \ - int8x16_t result; \ - __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3" \ - : "=w"(result) \ - : "w"(a_), "w"(b_), "i"(c) \ - : /* No clobbers */); \ - result; \ - }) +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vhsubq_u16 (uint16x8_t a, uint16x8_t b) +{ + uint16x8_t result; + __asm__ ("uhsub %0.8h, %1.8h, %2.8h" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} -#define vextq_s16(a, b, c) \ - __extension__ \ - ({ \ - int16x8_t b_ = (b); \ - int16x8_t a_ = (a); \ - int16x8_t result; \ - __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3*2" \ - : "=w"(result) \ - : "w"(a_), "w"(b_), "i"(c) \ - : /* No clobbers */); \ - result; \ - }) +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vhsubq_u32 (uint32x4_t a, uint32x4_t b) +{ + uint32x4_t result; + __asm__ ("uhsub %0.4s, %1.4s, %2.4s" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} -#define vextq_s32(a, b, c) \ +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vld1_dup_f32 (const float32_t * a) +{ + float32x2_t result; + __asm__ ("ld1r {%0.2s}, %1" + : "=w"(result) + : "Utv"(*a) + : /* No clobbers */); + return result; +} + +__extension__ static __inline float64x1_t __attribute__ ((__always_inline__)) +vld1_dup_f64 (const float64_t * a) +{ + float64x1_t result; + __asm__ ("ld1r {%0.1d}, %1" + : "=w"(result) + : "Utv"(*a) + : /* No clobbers */); + return result; +} + +__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) +vld1_dup_p8 (const poly8_t * a) +{ + poly8x8_t result; + __asm__ ("ld1r {%0.8b}, %1" + : "=w"(result) + : "Utv"(*a) + : /* No clobbers */); + return result; +} + +__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) +vld1_dup_p16 (const poly16_t * a) +{ + poly16x4_t result; + __asm__ ("ld1r {%0.4h}, %1" + : "=w"(result) + : "Utv"(*a) + : /* No clobbers */); + return result; +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vld1_dup_s8 (const int8_t * a) +{ + int8x8_t result; + __asm__ ("ld1r {%0.8b}, %1" + : "=w"(result) + : "Utv"(*a) + : /* No clobbers */); + return result; +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vld1_dup_s16 (const int16_t * a) +{ + int16x4_t result; + __asm__ ("ld1r {%0.4h}, %1" + : "=w"(result) + : "Utv"(*a) + : /* No clobbers */); + return result; +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vld1_dup_s32 (const int32_t * a) +{ + int32x2_t result; + __asm__ ("ld1r {%0.2s}, %1" + : "=w"(result) + : "Utv"(*a) + : /* No clobbers */); + return result; +} + +__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) +vld1_dup_s64 (const int64_t * a) +{ + int64x1_t result; + __asm__ ("ld1r {%0.1d}, %1" + : "=w"(result) + : "Utv"(*a) + : /* No clobbers */); + return result; +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vld1_dup_u8 (const uint8_t * a) +{ + uint8x8_t result; + __asm__ ("ld1r {%0.8b}, %1" + : "=w"(result) + : "Utv"(*a) + : /* No clobbers */); + return result; +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vld1_dup_u16 (const uint16_t * a) +{ + uint16x4_t result; + __asm__ ("ld1r {%0.4h}, %1" + : "=w"(result) + : "Utv"(*a) + : /* No clobbers */); + return result; +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vld1_dup_u32 (const uint32_t * a) +{ + uint32x2_t result; + __asm__ ("ld1r {%0.2s}, %1" + : "=w"(result) + : "Utv"(*a) + : /* No clobbers */); + return result; +} + +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vld1_dup_u64 (const uint64_t * a) +{ + uint64x1_t result; + __asm__ ("ld1r {%0.1d}, %1" + : "=w"(result) + : "Utv"(*a) + : /* No clobbers */); + return result; +} + +#define vld1_lane_f32(a, b, c) \ __extension__ \ ({ \ - int32x4_t b_ = (b); \ - int32x4_t a_ = (a); \ - int32x4_t result; \ - __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3*4" \ + float32x2_t b_ = (b); \ + const float32_t * a_ = (a); \ + float32x2_t result; \ + __asm__ ("ld1 {%0.s}[%1], %2" \ : "=w"(result) \ - : "w"(a_), "w"(b_), "i"(c) \ + : "i" (c), "Utv"(*a_), "0"(b_) \ : /* No clobbers */); \ result; \ }) -#define vextq_s64(a, b, c) \ +#define vld1_lane_f64(a, b, c) \ __extension__ \ ({ \ - int64x2_t b_ = (b); \ - int64x2_t a_ = (a); \ - int64x2_t result; \ - __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3*8" \ + float64x1_t b_ = (b); \ + const float64_t * a_ = (a); \ + float64x1_t result; \ + __asm__ ("ld1 {%0.d}[%1], %2" \ : "=w"(result) \ - : "w"(a_), "w"(b_), "i"(c) \ + : "i" (c), "Utv"(*a_), "0"(b_) \ : /* No clobbers */); \ result; \ }) -#define vextq_u8(a, b, c) \ +#define vld1_lane_p8(a, b, c) \ __extension__ \ ({ \ - uint8x16_t b_ = (b); \ - uint8x16_t a_ = (a); \ - uint8x16_t result; \ - __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3" \ + poly8x8_t b_ = (b); \ + const poly8_t * a_ = (a); \ + poly8x8_t result; \ + __asm__ ("ld1 {%0.b}[%1], %2" \ : "=w"(result) \ - : "w"(a_), "w"(b_), "i"(c) \ + : "i" (c), "Utv"(*a_), "0"(b_) \ : /* No clobbers */); \ result; \ }) -#define vextq_u16(a, b, c) \ +#define vld1_lane_p16(a, b, c) \ __extension__ \ ({ \ - uint16x8_t b_ = (b); \ - uint16x8_t a_ = (a); \ - uint16x8_t result; \ - __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3*2" \ + poly16x4_t b_ = (b); \ + const poly16_t * a_ = (a); \ + poly16x4_t result; \ + __asm__ ("ld1 {%0.h}[%1], %2" \ : "=w"(result) \ - : "w"(a_), "w"(b_), "i"(c) \ + : "i" (c), "Utv"(*a_), "0"(b_) \ : /* No clobbers */); \ result; \ }) -#define vextq_u32(a, b, c) \ +#define vld1_lane_s8(a, b, c) \ __extension__ \ ({ \ - uint32x4_t b_ = (b); \ - uint32x4_t a_ = (a); \ - uint32x4_t result; \ - __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3*4" \ + int8x8_t b_ = (b); \ + const int8_t * a_ = (a); \ + int8x8_t result; \ + __asm__ ("ld1 {%0.b}[%1], %2" \ : "=w"(result) \ - : "w"(a_), "w"(b_), "i"(c) \ + : "i" (c), "Utv"(*a_), "0"(b_) \ : /* No clobbers */); \ result; \ }) -#define vextq_u64(a, b, c) \ +#define vld1_lane_s16(a, b, c) \ __extension__ \ ({ \ - uint64x2_t b_ = (b); \ - uint64x2_t a_ = (a); \ - uint64x2_t result; \ - __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3*8" \ + int16x4_t b_ = (b); \ + const int16_t * a_ = (a); \ + int16x4_t result; \ + __asm__ ("ld1 {%0.h}[%1], %2" \ : "=w"(result) \ - : "w"(a_), "w"(b_), "i"(c) \ + : "i" (c), "Utv"(*a_), "0"(b_) \ : /* No clobbers */); \ result; \ }) -__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) -vfma_f32 (float32x2_t a, float32x2_t b, float32x2_t c) -{ - float32x2_t result; - __asm__ ("fmla %0.2s,%2.2s,%3.2s" - : "=w"(result) - : "0"(a), "w"(b), "w"(c) - : /* No clobbers */); - return result; -} - -#define vfma_lane_f32(a, b, c, d) \ +#define vld1_lane_s32(a, b, c) \ __extension__ \ ({ \ - float32x2_t c_ = (c); \ - float32x2_t b_ = (b); \ - float32x2_t a_ = (a); \ - float32x2_t result; \ - __asm__ ("fmla %0.2s,%2.2s,%3.s[%4]" \ + int32x2_t b_ = (b); \ + const int32_t * a_ = (a); \ + int32x2_t result; \ + __asm__ ("ld1 {%0.s}[%1], %2" \ : "=w"(result) \ - : "0"(a_), "w"(b_), "w"(c_), "i"(d) \ + : "i" (c), "Utv"(*a_), "0"(b_) \ : /* No clobbers */); \ result; \ }) -#define vfmad_lane_f64(a, b, c) \ +#define vld1_lane_s64(a, b, c) \ __extension__ \ ({ \ - float64x2_t b_ = (b); \ - float64_t a_ = (a); \ - float64_t result; \ - __asm__ ("fmla %d0,%d1,%2.d[%3]" \ + int64x1_t b_ = (b); \ + const int64_t * a_ = (a); \ + int64x1_t result; \ + __asm__ ("ld1 {%0.d}[%1], %2" \ : "=w"(result) \ - : "w"(a_), "w"(b_), "i"(c) \ + : "i" (c), "Utv"(*a_), "0"(b_) \ : /* No clobbers */); \ result; \ }) -__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) -vfmaq_f32 (float32x4_t a, float32x4_t b, float32x4_t c) -{ - float32x4_t result; - __asm__ ("fmla %0.4s,%2.4s,%3.4s" - : "=w"(result) - : "0"(a), "w"(b), "w"(c) - : /* No clobbers */); - return result; -} - -__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) -vfmaq_f64 (float64x2_t a, float64x2_t b, float64x2_t c) -{ - float64x2_t result; - __asm__ ("fmla %0.2d,%2.2d,%3.2d" - : "=w"(result) - : "0"(a), "w"(b), "w"(c) - : /* No clobbers */); - return result; -} - -#define vfmaq_lane_f32(a, b, c, d) \ +#define vld1_lane_u8(a, b, c) \ __extension__ \ ({ \ - float32x4_t c_ = (c); \ - float32x4_t b_ = (b); \ - float32x4_t a_ = (a); \ - float32x4_t result; \ - __asm__ ("fmla %0.4s,%2.4s,%3.s[%4]" \ + uint8x8_t b_ = (b); \ + const uint8_t * a_ = (a); \ + uint8x8_t result; \ + __asm__ ("ld1 {%0.b}[%1], %2" \ : "=w"(result) \ - : "0"(a_), "w"(b_), "w"(c_), "i"(d) \ + : "i" (c), "Utv"(*a_), "0"(b_) \ : /* No clobbers */); \ result; \ }) -#define vfmaq_lane_f64(a, b, c, d) \ +#define vld1_lane_u16(a, b, c) \ __extension__ \ ({ \ - float64x2_t c_ = (c); \ - float64x2_t b_ = (b); \ - float64x2_t a_ = (a); \ - float64x2_t result; \ - __asm__ ("fmla %0.2d,%2.2d,%3.d[%4]" \ + uint16x4_t b_ = (b); \ + const uint16_t * a_ = (a); \ + uint16x4_t result; \ + __asm__ ("ld1 {%0.h}[%1], %2" \ : "=w"(result) \ - : "0"(a_), "w"(b_), "w"(c_), "i"(d) \ + : "i" (c), "Utv"(*a_), "0"(b_) \ : /* No clobbers */); \ result; \ }) -#define vfmas_lane_f32(a, b, c) \ +#define vld1_lane_u32(a, b, c) \ __extension__ \ ({ \ - float32x4_t b_ = (b); \ - float32_t a_ = (a); \ - float32_t result; \ - __asm__ ("fmla %s0,%s1,%2.s[%3]" \ + uint32x2_t b_ = (b); \ + const uint32_t * a_ = (a); \ + uint32x2_t result; \ + __asm__ ("ld1 {%0.s}[%1], %2" \ : "=w"(result) \ - : "w"(a_), "w"(b_), "i"(c) \ + : "i" (c), "Utv"(*a_), "0"(b_) \ : /* No clobbers */); \ result; \ }) -__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) -vfma_n_f32 (float32x2_t a, float32x2_t b, float32_t c) -{ - float32x2_t result; - __asm__ ("fmla %0.2s, %2.2s, %3.s[0]" - : "=w"(result) - : "0"(a), "w"(b), "w"(c) - : /* No clobbers */); - return result; -} +#define vld1_lane_u64(a, b, c) \ + __extension__ \ + ({ \ + uint64x1_t b_ = (b); \ + const uint64_t * a_ = (a); \ + uint64x1_t result; \ + __asm__ ("ld1 {%0.d}[%1], %2" \ + : "=w"(result) \ + : "i" (c), "Utv"(*a_), "0"(b_) \ + : /* No clobbers */); \ + result; \ + }) __extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) -vfmaq_n_f32 (float32x4_t a, float32x4_t b, float32_t c) +vld1q_dup_f32 (const float32_t * a) { float32x4_t result; - __asm__ ("fmla %0.4s, %2.4s, %3.s[0]" - : "=w"(result) - : "0"(a), "w"(b), "w"(c) - : /* No clobbers */); + __asm__ ("ld1r {%0.4s}, %1" + : "=w"(result) + : "Utv"(*a) + : /* No clobbers */); return result; } __extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) -vfmaq_n_f64 (float64x2_t a, float64x2_t b, float64_t c) +vld1q_dup_f64 (const float64_t * a) { float64x2_t result; - __asm__ ("fmla %0.2d, %2.2d, %3.d[0]" - : "=w"(result) - : "0"(a), "w"(b), "w"(c) - : /* No clobbers */); + __asm__ ("ld1r {%0.2d}, %1" + : "=w"(result) + : "Utv"(*a) + : /* No clobbers */); return result; } -__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) -vfms_f32 (float32x2_t a, float32x2_t b, float32x2_t c) +__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) +vld1q_dup_p8 (const poly8_t * a) { - float32x2_t result; - __asm__ ("fmls %0.2s,%2.2s,%3.2s" - : "=w"(result) - : "0"(a), "w"(b), "w"(c) - : /* No clobbers */); + poly8x16_t result; + __asm__ ("ld1r {%0.16b}, %1" + : "=w"(result) + : "Utv"(*a) + : /* No clobbers */); return result; } -#define vfmsd_lane_f64(a, b, c) \ - __extension__ \ - ({ \ - float64x2_t b_ = (b); \ - float64_t a_ = (a); \ - float64_t result; \ - __asm__ ("fmls %d0,%d1,%2.d[%3]" \ - : "=w"(result) \ - : "w"(a_), "w"(b_), "i"(c) \ - : /* No clobbers */); \ - result; \ - }) - -__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) -vfmsq_f32 (float32x4_t a, float32x4_t b, float32x4_t c) +__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) +vld1q_dup_p16 (const poly16_t * a) { - float32x4_t result; - __asm__ ("fmls %0.4s,%2.4s,%3.4s" - : "=w"(result) - : "0"(a), "w"(b), "w"(c) - : /* No clobbers */); + poly16x8_t result; + __asm__ ("ld1r {%0.8h}, %1" + : "=w"(result) + : "Utv"(*a) + : /* No clobbers */); return result; } -__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) -vfmsq_f64 (float64x2_t a, float64x2_t b, float64x2_t c) +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vld1q_dup_s8 (const int8_t * a) { - float64x2_t result; - __asm__ ("fmls %0.2d,%2.2d,%3.2d" - : "=w"(result) - : "0"(a), "w"(b), "w"(c) - : /* No clobbers */); + int8x16_t result; + __asm__ ("ld1r {%0.16b}, %1" + : "=w"(result) + : "Utv"(*a) + : /* No clobbers */); return result; } -#define vfmss_lane_f32(a, b, c) \ - __extension__ \ - ({ \ - float32x4_t b_ = (b); \ - float32_t a_ = (a); \ - float32_t result; \ - __asm__ ("fmls %s0,%s1,%2.s[%3]" \ - : "=w"(result) \ - : "w"(a_), "w"(b_), "i"(c) \ - : /* No clobbers */); \ - result; \ - }) - -__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) -vget_high_f32 (float32x4_t a) +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vld1q_dup_s16 (const int16_t * a) { - float32x2_t result; - __asm__ ("ins %0.d[0], %1.d[1]" - : "=w"(result) - : "w"(a) - : /* No clobbers */); + int16x8_t result; + __asm__ ("ld1r {%0.8h}, %1" + : "=w"(result) + : "Utv"(*a) + : /* No clobbers */); return result; } -__extension__ static __inline float64x1_t __attribute__ ((__always_inline__)) -vget_high_f64 (float64x2_t a) +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vld1q_dup_s32 (const int32_t * a) { - float64x1_t result; - __asm__ ("ins %0.d[0], %1.d[1]" - : "=w"(result) - : "w"(a) - : /* No clobbers */); + int32x4_t result; + __asm__ ("ld1r {%0.4s}, %1" + : "=w"(result) + : "Utv"(*a) + : /* No clobbers */); return result; } -__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) -vget_high_p8 (poly8x16_t a) +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vld1q_dup_s64 (const int64_t * a) { - poly8x8_t result; - __asm__ ("ins %0.d[0], %1.d[1]" - : "=w"(result) - : "w"(a) - : /* No clobbers */); + int64x2_t result; + __asm__ ("ld1r {%0.2d}, %1" + : "=w"(result) + : "Utv"(*a) + : /* No clobbers */); return result; } -__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) -vget_high_p16 (poly16x8_t a) +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vld1q_dup_u8 (const uint8_t * a) { - poly16x4_t result; - __asm__ ("ins %0.d[0], %1.d[1]" - : "=w"(result) - : "w"(a) - : /* No clobbers */); + uint8x16_t result; + __asm__ ("ld1r {%0.16b}, %1" + : "=w"(result) + : "Utv"(*a) + : /* No clobbers */); return result; } -__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) -vget_high_s8 (int8x16_t a) +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vld1q_dup_u16 (const uint16_t * a) { - int8x8_t result; - __asm__ ("ins %0.d[0], %1.d[1]" - : "=w"(result) - : "w"(a) - : /* No clobbers */); + uint16x8_t result; + __asm__ ("ld1r {%0.8h}, %1" + : "=w"(result) + : "Utv"(*a) + : /* No clobbers */); return result; } -__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) -vget_high_s16 (int16x8_t a) +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vld1q_dup_u32 (const uint32_t * a) { - int16x4_t result; - __asm__ ("ins %0.d[0], %1.d[1]" - : "=w"(result) - : "w"(a) - : /* No clobbers */); + uint32x4_t result; + __asm__ ("ld1r {%0.4s}, %1" + : "=w"(result) + : "Utv"(*a) + : /* No clobbers */); return result; } -__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) -vget_high_s32 (int32x4_t a) -{ - int32x2_t result; - __asm__ ("ins %0.d[0], %1.d[1]" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) -vget_high_s64 (int64x2_t a) -{ - int64x1_t result; - __asm__ ("ins %0.d[0], %1.d[1]" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) -vget_high_u8 (uint8x16_t a) +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vld1q_dup_u64 (const uint64_t * a) { - uint8x8_t result; - __asm__ ("ins %0.d[0], %1.d[1]" - : "=w"(result) - : "w"(a) - : /* No clobbers */); + uint64x2_t result; + __asm__ ("ld1r {%0.2d}, %1" + : "=w"(result) + : "Utv"(*a) + : /* No clobbers */); return result; } -__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) -vget_high_u16 (uint16x8_t a) -{ - uint16x4_t result; - __asm__ ("ins %0.d[0], %1.d[1]" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} +#define vld1q_lane_f32(a, b, c) \ + __extension__ \ + ({ \ + float32x4_t b_ = (b); \ + const float32_t * a_ = (a); \ + float32x4_t result; \ + __asm__ ("ld1 {%0.s}[%1], %2" \ + : "=w"(result) \ + : "i"(c), "Utv"(*a_), "0"(b_) \ + : /* No clobbers */); \ + result; \ + }) -__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) -vget_high_u32 (uint32x4_t a) -{ - uint32x2_t result; - __asm__ ("ins %0.d[0], %1.d[1]" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} +#define vld1q_lane_f64(a, b, c) \ + __extension__ \ + ({ \ + float64x2_t b_ = (b); \ + const float64_t * a_ = (a); \ + float64x2_t result; \ + __asm__ ("ld1 {%0.d}[%1], %2" \ + : "=w"(result) \ + : "i"(c), "Utv"(*a_), "0"(b_) \ + : /* No clobbers */); \ + result; \ + }) -__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) -vget_high_u64 (uint64x2_t a) -{ - uint64x1_t result; - __asm__ ("ins %0.d[0], %1.d[1]" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} +#define vld1q_lane_p8(a, b, c) \ + __extension__ \ + ({ \ + poly8x16_t b_ = (b); \ + const poly8_t * a_ = (a); \ + poly8x16_t result; \ + __asm__ ("ld1 {%0.b}[%1], %2" \ + : "=w"(result) \ + : "i"(c), "Utv"(*a_), "0"(b_) \ + : /* No clobbers */); \ + result; \ + }) -#define vget_lane_f64(a, b) \ +#define vld1q_lane_p16(a, b, c) \ __extension__ \ ({ \ - float64x1_t a_ = (a); \ - float64_t result; \ - __asm__ ("umov %x0, %1.d[%2]" \ - : "=r"(result) \ - : "w"(a_), "i"(b) \ + poly16x8_t b_ = (b); \ + const poly16_t * a_ = (a); \ + poly16x8_t result; \ + __asm__ ("ld1 {%0.h}[%1], %2" \ + : "=w"(result) \ + : "i"(c), "Utv"(*a_), "0"(b_) \ : /* No clobbers */); \ result; \ }) -__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) -vget_low_f32 (float32x4_t a) -{ - float32x2_t result; - __asm__ ("ins %0.d[0], %1.d[0]" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} +#define vld1q_lane_s8(a, b, c) \ + __extension__ \ + ({ \ + int8x16_t b_ = (b); \ + const int8_t * a_ = (a); \ + int8x16_t result; \ + __asm__ ("ld1 {%0.b}[%1], %2" \ + : "=w"(result) \ + : "i"(c), "Utv"(*a_), "0"(b_) \ + : /* No clobbers */); \ + result; \ + }) -__extension__ static __inline float64x1_t __attribute__ ((__always_inline__)) -vget_low_f64 (float64x2_t a) -{ - float64x1_t result; - __asm__ ("ins %0.d[0], %1.d[0]" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} +#define vld1q_lane_s16(a, b, c) \ + __extension__ \ + ({ \ + int16x8_t b_ = (b); \ + const int16_t * a_ = (a); \ + int16x8_t result; \ + __asm__ ("ld1 {%0.h}[%1], %2" \ + : "=w"(result) \ + : "i"(c), "Utv"(*a_), "0"(b_) \ + : /* No clobbers */); \ + result; \ + }) -__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) -vget_low_p8 (poly8x16_t a) -{ - poly8x8_t result; - __asm__ ("ins %0.d[0], %1.d[0]" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} +#define vld1q_lane_s32(a, b, c) \ + __extension__ \ + ({ \ + int32x4_t b_ = (b); \ + const int32_t * a_ = (a); \ + int32x4_t result; \ + __asm__ ("ld1 {%0.s}[%1], %2" \ + : "=w"(result) \ + : "i"(c), "Utv"(*a_), "0"(b_) \ + : /* No clobbers */); \ + result; \ + }) -__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) -vget_low_p16 (poly16x8_t a) -{ - poly16x4_t result; - __asm__ ("ins %0.d[0], %1.d[0]" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} +#define vld1q_lane_s64(a, b, c) \ + __extension__ \ + ({ \ + int64x2_t b_ = (b); \ + const int64_t * a_ = (a); \ + int64x2_t result; \ + __asm__ ("ld1 {%0.d}[%1], %2" \ + : "=w"(result) \ + : "i"(c), "Utv"(*a_), "0"(b_) \ + : /* No clobbers */); \ + result; \ + }) -__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) -vget_low_s8 (int8x16_t a) -{ - int8x8_t result; - __asm__ ("ins %0.d[0], %1.d[0]" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} +#define vld1q_lane_u8(a, b, c) \ + __extension__ \ + ({ \ + uint8x16_t b_ = (b); \ + const uint8_t * a_ = (a); \ + uint8x16_t result; \ + __asm__ ("ld1 {%0.b}[%1], %2" \ + : "=w"(result) \ + : "i"(c), "Utv"(*a_), "0"(b_) \ + : /* No clobbers */); \ + result; \ + }) -__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) -vget_low_s16 (int16x8_t a) -{ - int16x4_t result; - __asm__ ("ins %0.d[0], %1.d[0]" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} +#define vld1q_lane_u16(a, b, c) \ + __extension__ \ + ({ \ + uint16x8_t b_ = (b); \ + const uint16_t * a_ = (a); \ + uint16x8_t result; \ + __asm__ ("ld1 {%0.h}[%1], %2" \ + : "=w"(result) \ + : "i"(c), "Utv"(*a_), "0"(b_) \ + : /* No clobbers */); \ + result; \ + }) -__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) -vget_low_s32 (int32x4_t a) -{ - int32x2_t result; - __asm__ ("ins %0.d[0], %1.d[0]" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} +#define vld1q_lane_u32(a, b, c) \ + __extension__ \ + ({ \ + uint32x4_t b_ = (b); \ + const uint32_t * a_ = (a); \ + uint32x4_t result; \ + __asm__ ("ld1 {%0.s}[%1], %2" \ + : "=w"(result) \ + : "i"(c), "Utv"(*a_), "0"(b_) \ + : /* No clobbers */); \ + result; \ + }) -__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) -vget_low_s64 (int64x2_t a) -{ - int64x1_t result; - __asm__ ("ins %0.d[0], %1.d[0]" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; +#define vld1q_lane_u64(a, b, c) \ + __extension__ \ + ({ \ + uint64x2_t b_ = (b); \ + const uint64_t * a_ = (a); \ + uint64x2_t result; \ + __asm__ ("ld1 {%0.d}[%1], %2" \ + : "=w"(result) \ + : "i"(c), "Utv"(*a_), "0"(b_) \ + : /* No clobbers */); \ + result; \ + }) + +#define vmla_lane_f32(a, b, c, d) \ + __extension__ \ + ({ \ + float32x2_t c_ = (c); \ + float32x2_t b_ = (b); \ + float32x2_t a_ = (a); \ + float32x2_t result; \ + float32x2_t t1; \ + __asm__ ("fmul %1.2s, %3.2s, %4.s[%5]; fadd %0.2s, %0.2s, %1.2s" \ + : "=w"(result), "=w"(t1) \ + : "0"(a_), "w"(b_), "w"(c_), "i"(d) \ + : /* No clobbers */); \ + result; \ + }) + +#define vmla_lane_s16(a, b, c, d) \ + __extension__ \ + ({ \ + int16x4_t c_ = (c); \ + int16x4_t b_ = (b); \ + int16x4_t a_ = (a); \ + int16x4_t result; \ + __asm__ ("mla %0.4h, %2.4h, %3.h[%4]" \ + : "=w"(result) \ + : "0"(a_), "w"(b_), "x"(c_), "i"(d) \ + : /* No clobbers */); \ + result; \ + }) + +#define vmla_lane_s32(a, b, c, d) \ + __extension__ \ + ({ \ + int32x2_t c_ = (c); \ + int32x2_t b_ = (b); \ + int32x2_t a_ = (a); \ + int32x2_t result; \ + __asm__ ("mla %0.2s, %2.2s, %3.s[%4]" \ + : "=w"(result) \ + : "0"(a_), "w"(b_), "w"(c_), "i"(d) \ + : /* No clobbers */); \ + result; \ + }) + +#define vmla_lane_u16(a, b, c, d) \ + __extension__ \ + ({ \ + uint16x4_t c_ = (c); \ + uint16x4_t b_ = (b); \ + uint16x4_t a_ = (a); \ + uint16x4_t result; \ + __asm__ ("mla %0.4h, %2.4h, %3.h[%4]" \ + : "=w"(result) \ + : "0"(a_), "w"(b_), "x"(c_), "i"(d) \ + : /* No clobbers */); \ + result; \ + }) + +#define vmla_lane_u32(a, b, c, d) \ + __extension__ \ + ({ \ + uint32x2_t c_ = (c); \ + uint32x2_t b_ = (b); \ + uint32x2_t a_ = (a); \ + uint32x2_t result; \ + __asm__ ("mla %0.2s, %2.2s, %3.s[%4]" \ + : "=w"(result) \ + : "0"(a_), "w"(b_), "w"(c_), "i"(d) \ + : /* No clobbers */); \ + result; \ + }) + +#define vmla_laneq_s16(a, b, c, d) \ + __extension__ \ + ({ \ + int16x8_t c_ = (c); \ + int16x4_t b_ = (b); \ + int16x4_t a_ = (a); \ + int16x4_t result; \ + __asm__ ("mla %0.4h, %2.4h, %3.h[%4]" \ + : "=w"(result) \ + : "0"(a_), "w"(b_), "x"(c_), "i"(d) \ + : /* No clobbers */); \ + result; \ + }) + +#define vmla_laneq_s32(a, b, c, d) \ + __extension__ \ + ({ \ + int32x4_t c_ = (c); \ + int32x2_t b_ = (b); \ + int32x2_t a_ = (a); \ + int32x2_t result; \ + __asm__ ("mla %0.2s, %2.2s, %3.s[%4]" \ + : "=w"(result) \ + : "0"(a_), "w"(b_), "w"(c_), "i"(d) \ + : /* No clobbers */); \ + result; \ + }) + +#define vmla_laneq_u16(a, b, c, d) \ + __extension__ \ + ({ \ + uint16x8_t c_ = (c); \ + uint16x4_t b_ = (b); \ + uint16x4_t a_ = (a); \ + uint16x4_t result; \ + __asm__ ("mla %0.4h, %2.4h, %3.h[%4]" \ + : "=w"(result) \ + : "0"(a_), "w"(b_), "x"(c_), "i"(d) \ + : /* No clobbers */); \ + result; \ + }) + +#define vmla_laneq_u32(a, b, c, d) \ + __extension__ \ + ({ \ + uint32x4_t c_ = (c); \ + uint32x2_t b_ = (b); \ + uint32x2_t a_ = (a); \ + uint32x2_t result; \ + __asm__ ("mla %0.2s, %2.2s, %3.s[%4]" \ + : "=w"(result) \ + : "0"(a_), "w"(b_), "w"(c_), "i"(d) \ + : /* No clobbers */); \ + result; \ + }) + +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vmla_n_f32 (float32x2_t a, float32x2_t b, float32_t c) +{ + float32x2_t result; + float32x2_t t1; + __asm__ ("fmul %1.2s, %3.2s, %4.s[0]; fadd %0.2s, %0.2s, %1.2s" + : "=w"(result), "=w"(t1) + : "0"(a), "w"(b), "w"(c) + : /* No clobbers */); + return result; } -__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) -vget_low_u8 (uint8x16_t a) +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vmla_n_s16 (int16x4_t a, int16x4_t b, int16_t c) { - uint8x8_t result; - __asm__ ("ins %0.d[0], %1.d[0]" + int16x4_t result; + __asm__ ("mla %0.4h,%2.4h,%3.h[0]" : "=w"(result) - : "w"(a) + : "0"(a), "w"(b), "x"(c) : /* No clobbers */); return result; } -__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) -vget_low_u16 (uint16x8_t a) +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vmla_n_s32 (int32x2_t a, int32x2_t b, int32_t c) { - uint16x4_t result; - __asm__ ("ins %0.d[0], %1.d[0]" + int32x2_t result; + __asm__ ("mla %0.2s,%2.2s,%3.s[0]" : "=w"(result) - : "w"(a) + : "0"(a), "w"(b), "w"(c) : /* No clobbers */); return result; } -__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) -vget_low_u32 (uint32x4_t a) +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vmla_n_u16 (uint16x4_t a, uint16x4_t b, uint16_t c) { - uint32x2_t result; - __asm__ ("ins %0.d[0], %1.d[0]" + uint16x4_t result; + __asm__ ("mla %0.4h,%2.4h,%3.h[0]" : "=w"(result) - : "w"(a) + : "0"(a), "w"(b), "x"(c) : /* No clobbers */); return result; } -__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) -vget_low_u64 (uint64x2_t a) +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vmla_n_u32 (uint32x2_t a, uint32x2_t b, uint32_t c) { - uint64x1_t result; - __asm__ ("ins %0.d[0], %1.d[0]" + uint32x2_t result; + __asm__ ("mla %0.2s,%2.2s,%3.s[0]" : "=w"(result) - : "w"(a) + : "0"(a), "w"(b), "w"(c) : /* No clobbers */); return result; } __extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) -vhsub_s8 (int8x8_t a, int8x8_t b) +vmla_s8 (int8x8_t a, int8x8_t b, int8x8_t c) { int8x8_t result; - __asm__ ("shsub %0.8b, %1.8b, %2.8b" + __asm__ ("mla %0.8b, %2.8b, %3.8b" : "=w"(result) - : "w"(a), "w"(b) + : "0"(a), "w"(b), "w"(c) : /* No clobbers */); return result; } __extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) -vhsub_s16 (int16x4_t a, int16x4_t b) +vmla_s16 (int16x4_t a, int16x4_t b, int16x4_t c) { int16x4_t result; - __asm__ ("shsub %0.4h, %1.4h, %2.4h" + __asm__ ("mla %0.4h, %2.4h, %3.4h" : "=w"(result) - : "w"(a), "w"(b) + : "0"(a), "w"(b), "w"(c) : /* No clobbers */); return result; } __extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) -vhsub_s32 (int32x2_t a, int32x2_t b) +vmla_s32 (int32x2_t a, int32x2_t b, int32x2_t c) { int32x2_t result; - __asm__ ("shsub %0.2s, %1.2s, %2.2s" + __asm__ ("mla %0.2s, %2.2s, %3.2s" : "=w"(result) - : "w"(a), "w"(b) + : "0"(a), "w"(b), "w"(c) : /* No clobbers */); return result; } __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) -vhsub_u8 (uint8x8_t a, uint8x8_t b) +vmla_u8 (uint8x8_t a, uint8x8_t b, uint8x8_t c) { uint8x8_t result; - __asm__ ("uhsub %0.8b, %1.8b, %2.8b" + __asm__ ("mla %0.8b, %2.8b, %3.8b" : "=w"(result) - : "w"(a), "w"(b) + : "0"(a), "w"(b), "w"(c) : /* No clobbers */); return result; } __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) -vhsub_u16 (uint16x4_t a, uint16x4_t b) +vmla_u16 (uint16x4_t a, uint16x4_t b, uint16x4_t c) { uint16x4_t result; - __asm__ ("uhsub %0.4h, %1.4h, %2.4h" + __asm__ ("mla %0.4h, %2.4h, %3.4h" : "=w"(result) - : "w"(a), "w"(b) + : "0"(a), "w"(b), "w"(c) : /* No clobbers */); return result; } __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) -vhsub_u32 (uint32x2_t a, uint32x2_t b) +vmla_u32 (uint32x2_t a, uint32x2_t b, uint32x2_t c) { uint32x2_t result; - __asm__ ("uhsub %0.2s, %1.2s, %2.2s" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) -vhsubq_s8 (int8x16_t a, int8x16_t b) -{ - int8x16_t result; - __asm__ ("shsub %0.16b, %1.16b, %2.16b" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) -vhsubq_s16 (int16x8_t a, int16x8_t b) -{ - int16x8_t result; - __asm__ ("shsub %0.8h, %1.8h, %2.8h" + __asm__ ("mla %0.2s, %2.2s, %3.2s" : "=w"(result) - : "w"(a), "w"(b) + : "0"(a), "w"(b), "w"(c) : /* No clobbers */); return result; } -__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) -vhsubq_s32 (int32x4_t a, int32x4_t b) -{ - int32x4_t result; - __asm__ ("shsub %0.4s, %1.4s, %2.4s" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} +#define vmlal_high_lane_s16(a, b, c, d) \ + __extension__ \ + ({ \ + int16x8_t c_ = (c); \ + int16x8_t b_ = (b); \ + int32x4_t a_ = (a); \ + int32x4_t result; \ + __asm__ ("smlal2 %0.4s, %2.8h, %3.h[%4]" \ + : "=w"(result) \ + : "0"(a_), "w"(b_), "x"(c_), "i"(d) \ + : /* No clobbers */); \ + result; \ + }) -__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) -vhsubq_u8 (uint8x16_t a, uint8x16_t b) -{ - uint8x16_t result; - __asm__ ("uhsub %0.16b, %1.16b, %2.16b" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) -vhsubq_u16 (uint16x8_t a, uint16x8_t b) -{ - uint16x8_t result; - __asm__ ("uhsub %0.8h, %1.8h, %2.8h" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -vhsubq_u32 (uint32x4_t a, uint32x4_t b) -{ - uint32x4_t result; - __asm__ ("uhsub %0.4s, %1.4s, %2.4s" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) -vld1_dup_f32 (const float32_t * a) -{ - float32x2_t result; - __asm__ ("ld1r {%0.2s}, %1" - : "=w"(result) - : "Utv"(*a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline float64x1_t __attribute__ ((__always_inline__)) -vld1_dup_f64 (const float64_t * a) -{ - float64x1_t result; - __asm__ ("ld1r {%0.1d}, %1" - : "=w"(result) - : "Utv"(*a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) -vld1_dup_p8 (const poly8_t * a) -{ - poly8x8_t result; - __asm__ ("ld1r {%0.8b}, %1" - : "=w"(result) - : "Utv"(*a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) -vld1_dup_p16 (const poly16_t * a) -{ - poly16x4_t result; - __asm__ ("ld1r {%0.4h}, %1" - : "=w"(result) - : "Utv"(*a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) -vld1_dup_s8 (const int8_t * a) -{ - int8x8_t result; - __asm__ ("ld1r {%0.8b}, %1" - : "=w"(result) - : "Utv"(*a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) -vld1_dup_s16 (const int16_t * a) -{ - int16x4_t result; - __asm__ ("ld1r {%0.4h}, %1" - : "=w"(result) - : "Utv"(*a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) -vld1_dup_s32 (const int32_t * a) -{ - int32x2_t result; - __asm__ ("ld1r {%0.2s}, %1" - : "=w"(result) - : "Utv"(*a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) -vld1_dup_s64 (const int64_t * a) -{ - int64x1_t result; - __asm__ ("ld1r {%0.1d}, %1" - : "=w"(result) - : "Utv"(*a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) -vld1_dup_u8 (const uint8_t * a) -{ - uint8x8_t result; - __asm__ ("ld1r {%0.8b}, %1" - : "=w"(result) - : "Utv"(*a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) -vld1_dup_u16 (const uint16_t * a) -{ - uint16x4_t result; - __asm__ ("ld1r {%0.4h}, %1" - : "=w"(result) - : "Utv"(*a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) -vld1_dup_u32 (const uint32_t * a) -{ - uint32x2_t result; - __asm__ ("ld1r {%0.2s}, %1" - : "=w"(result) - : "Utv"(*a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) -vld1_dup_u64 (const uint64_t * a) -{ - uint64x1_t result; - __asm__ ("ld1r {%0.1d}, %1" - : "=w"(result) - : "Utv"(*a) - : /* No clobbers */); - return result; -} - -#define vld1_lane_f32(a, b, c) \ +#define vmlal_high_lane_s32(a, b, c, d) \ __extension__ \ ({ \ - float32x2_t b_ = (b); \ - const float32_t * a_ = (a); \ - float32x2_t result; \ - __asm__ ("ld1 {%0.s}[%1], %2" \ + int32x4_t c_ = (c); \ + int32x4_t b_ = (b); \ + int64x2_t a_ = (a); \ + int64x2_t result; \ + __asm__ ("smlal2 %0.2d, %2.4s, %3.s[%4]" \ : "=w"(result) \ - : "i" (c), "Utv"(*a_), "0"(b_) \ + : "0"(a_), "w"(b_), "w"(c_), "i"(d) \ : /* No clobbers */); \ result; \ }) -#define vld1_lane_f64(a, b, c) \ +#define vmlal_high_lane_u16(a, b, c, d) \ __extension__ \ ({ \ - float64x1_t b_ = (b); \ - const float64_t * a_ = (a); \ - float64x1_t result; \ - __asm__ ("ld1 {%0.d}[%1], %2" \ + uint16x8_t c_ = (c); \ + uint16x8_t b_ = (b); \ + uint32x4_t a_ = (a); \ + uint32x4_t result; \ + __asm__ ("umlal2 %0.4s, %2.8h, %3.h[%4]" \ : "=w"(result) \ - : "i" (c), "Utv"(*a_), "0"(b_) \ + : "0"(a_), "w"(b_), "x"(c_), "i"(d) \ : /* No clobbers */); \ result; \ }) -#define vld1_lane_p8(a, b, c) \ +#define vmlal_high_lane_u32(a, b, c, d) \ __extension__ \ ({ \ - poly8x8_t b_ = (b); \ - const poly8_t * a_ = (a); \ - poly8x8_t result; \ - __asm__ ("ld1 {%0.b}[%1], %2" \ + uint32x4_t c_ = (c); \ + uint32x4_t b_ = (b); \ + uint64x2_t a_ = (a); \ + uint64x2_t result; \ + __asm__ ("umlal2 %0.2d, %2.4s, %3.s[%4]" \ : "=w"(result) \ - : "i" (c), "Utv"(*a_), "0"(b_) \ + : "0"(a_), "w"(b_), "w"(c_), "i"(d) \ : /* No clobbers */); \ result; \ }) -#define vld1_lane_p16(a, b, c) \ +#define vmlal_high_laneq_s16(a, b, c, d) \ __extension__ \ ({ \ - poly16x4_t b_ = (b); \ - const poly16_t * a_ = (a); \ - poly16x4_t result; \ - __asm__ ("ld1 {%0.h}[%1], %2" \ + int16x8_t c_ = (c); \ + int16x8_t b_ = (b); \ + int32x4_t a_ = (a); \ + int32x4_t result; \ + __asm__ ("smlal2 %0.4s, %2.8h, %3.h[%4]" \ : "=w"(result) \ - : "i" (c), "Utv"(*a_), "0"(b_) \ + : "0"(a_), "w"(b_), "x"(c_), "i"(d) \ : /* No clobbers */); \ result; \ }) -#define vld1_lane_s8(a, b, c) \ +#define vmlal_high_laneq_s32(a, b, c, d) \ __extension__ \ ({ \ - int8x8_t b_ = (b); \ - const int8_t * a_ = (a); \ - int8x8_t result; \ - __asm__ ("ld1 {%0.b}[%1], %2" \ + int32x4_t c_ = (c); \ + int32x4_t b_ = (b); \ + int64x2_t a_ = (a); \ + int64x2_t result; \ + __asm__ ("smlal2 %0.2d, %2.4s, %3.s[%4]" \ : "=w"(result) \ - : "i" (c), "Utv"(*a_), "0"(b_) \ + : "0"(a_), "w"(b_), "w"(c_), "i"(d) \ : /* No clobbers */); \ result; \ }) -#define vld1_lane_s16(a, b, c) \ +#define vmlal_high_laneq_u16(a, b, c, d) \ __extension__ \ ({ \ - int16x4_t b_ = (b); \ - const int16_t * a_ = (a); \ - int16x4_t result; \ - __asm__ ("ld1 {%0.h}[%1], %2" \ + uint16x8_t c_ = (c); \ + uint16x8_t b_ = (b); \ + uint32x4_t a_ = (a); \ + uint32x4_t result; \ + __asm__ ("umlal2 %0.4s, %2.8h, %3.h[%4]" \ : "=w"(result) \ - : "i" (c), "Utv"(*a_), "0"(b_) \ + : "0"(a_), "w"(b_), "x"(c_), "i"(d) \ : /* No clobbers */); \ result; \ }) -#define vld1_lane_s32(a, b, c) \ +#define vmlal_high_laneq_u32(a, b, c, d) \ __extension__ \ ({ \ - int32x2_t b_ = (b); \ - const int32_t * a_ = (a); \ - int32x2_t result; \ - __asm__ ("ld1 {%0.s}[%1], %2" \ + uint32x4_t c_ = (c); \ + uint32x4_t b_ = (b); \ + uint64x2_t a_ = (a); \ + uint64x2_t result; \ + __asm__ ("umlal2 %0.2d, %2.4s, %3.s[%4]" \ : "=w"(result) \ - : "i" (c), "Utv"(*a_), "0"(b_) \ + : "0"(a_), "w"(b_), "w"(c_), "i"(d) \ : /* No clobbers */); \ result; \ }) -#define vld1_lane_s64(a, b, c) \ - __extension__ \ - ({ \ - int64x1_t b_ = (b); \ - const int64_t * a_ = (a); \ - int64x1_t result; \ - __asm__ ("ld1 {%0.d}[%1], %2" \ - : "=w"(result) \ - : "i" (c), "Utv"(*a_), "0"(b_) \ - : /* No clobbers */); \ - result; \ - }) - -#define vld1_lane_u8(a, b, c) \ - __extension__ \ - ({ \ - uint8x8_t b_ = (b); \ - const uint8_t * a_ = (a); \ - uint8x8_t result; \ - __asm__ ("ld1 {%0.b}[%1], %2" \ - : "=w"(result) \ - : "i" (c), "Utv"(*a_), "0"(b_) \ - : /* No clobbers */); \ - result; \ - }) - -#define vld1_lane_u16(a, b, c) \ - __extension__ \ - ({ \ - uint16x4_t b_ = (b); \ - const uint16_t * a_ = (a); \ - uint16x4_t result; \ - __asm__ ("ld1 {%0.h}[%1], %2" \ - : "=w"(result) \ - : "i" (c), "Utv"(*a_), "0"(b_) \ - : /* No clobbers */); \ - result; \ - }) - -#define vld1_lane_u32(a, b, c) \ - __extension__ \ - ({ \ - uint32x2_t b_ = (b); \ - const uint32_t * a_ = (a); \ - uint32x2_t result; \ - __asm__ ("ld1 {%0.s}[%1], %2" \ - : "=w"(result) \ - : "i" (c), "Utv"(*a_), "0"(b_) \ - : /* No clobbers */); \ - result; \ - }) - -#define vld1_lane_u64(a, b, c) \ - __extension__ \ - ({ \ - uint64x1_t b_ = (b); \ - const uint64_t * a_ = (a); \ - uint64x1_t result; \ - __asm__ ("ld1 {%0.d}[%1], %2" \ - : "=w"(result) \ - : "i" (c), "Utv"(*a_), "0"(b_) \ - : /* No clobbers */); \ - result; \ - }) - -__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) -vld1q_dup_f32 (const float32_t * a) -{ - float32x4_t result; - __asm__ ("ld1r {%0.4s}, %1" - : "=w"(result) - : "Utv"(*a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) -vld1q_dup_f64 (const float64_t * a) +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vmlal_high_n_s16 (int32x4_t a, int16x8_t b, int16_t c) { - float64x2_t result; - __asm__ ("ld1r {%0.2d}, %1" - : "=w"(result) - : "Utv"(*a) - : /* No clobbers */); + int32x4_t result; + __asm__ ("smlal2 %0.4s,%2.8h,%3.h[0]" + : "=w"(result) + : "0"(a), "w"(b), "x"(c) + : /* No clobbers */); return result; } -__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) -vld1q_dup_p8 (const poly8_t * a) +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vmlal_high_n_s32 (int64x2_t a, int32x4_t b, int32_t c) { - poly8x16_t result; - __asm__ ("ld1r {%0.16b}, %1" - : "=w"(result) - : "Utv"(*a) - : /* No clobbers */); + int64x2_t result; + __asm__ ("smlal2 %0.2d,%2.4s,%3.s[0]" + : "=w"(result) + : "0"(a), "w"(b), "w"(c) + : /* No clobbers */); return result; } -__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) -vld1q_dup_p16 (const poly16_t * a) +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vmlal_high_n_u16 (uint32x4_t a, uint16x8_t b, uint16_t c) { - poly16x8_t result; - __asm__ ("ld1r {%0.8h}, %1" - : "=w"(result) - : "Utv"(*a) - : /* No clobbers */); + uint32x4_t result; + __asm__ ("umlal2 %0.4s,%2.8h,%3.h[0]" + : "=w"(result) + : "0"(a), "w"(b), "x"(c) + : /* No clobbers */); return result; } -__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) -vld1q_dup_s8 (const int8_t * a) +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vmlal_high_n_u32 (uint64x2_t a, uint32x4_t b, uint32_t c) { - int8x16_t result; - __asm__ ("ld1r {%0.16b}, %1" - : "=w"(result) - : "Utv"(*a) - : /* No clobbers */); + uint64x2_t result; + __asm__ ("umlal2 %0.2d,%2.4s,%3.s[0]" + : "=w"(result) + : "0"(a), "w"(b), "w"(c) + : /* No clobbers */); return result; } __extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) -vld1q_dup_s16 (const int16_t * a) +vmlal_high_s8 (int16x8_t a, int8x16_t b, int8x16_t c) { int16x8_t result; - __asm__ ("ld1r {%0.8h}, %1" - : "=w"(result) - : "Utv"(*a) - : /* No clobbers */); + __asm__ ("smlal2 %0.8h,%2.16b,%3.16b" + : "=w"(result) + : "0"(a), "w"(b), "w"(c) + : /* No clobbers */); return result; } __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) -vld1q_dup_s32 (const int32_t * a) +vmlal_high_s16 (int32x4_t a, int16x8_t b, int16x8_t c) { int32x4_t result; - __asm__ ("ld1r {%0.4s}, %1" - : "=w"(result) - : "Utv"(*a) - : /* No clobbers */); + __asm__ ("smlal2 %0.4s,%2.8h,%3.8h" + : "=w"(result) + : "0"(a), "w"(b), "w"(c) + : /* No clobbers */); return result; } __extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) -vld1q_dup_s64 (const int64_t * a) +vmlal_high_s32 (int64x2_t a, int32x4_t b, int32x4_t c) { int64x2_t result; - __asm__ ("ld1r {%0.2d}, %1" - : "=w"(result) - : "Utv"(*a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) -vld1q_dup_u8 (const uint8_t * a) -{ - uint8x16_t result; - __asm__ ("ld1r {%0.16b}, %1" - : "=w"(result) - : "Utv"(*a) - : /* No clobbers */); + __asm__ ("smlal2 %0.2d,%2.4s,%3.4s" + : "=w"(result) + : "0"(a), "w"(b), "w"(c) + : /* No clobbers */); return result; } __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) -vld1q_dup_u16 (const uint16_t * a) +vmlal_high_u8 (uint16x8_t a, uint8x16_t b, uint8x16_t c) { uint16x8_t result; - __asm__ ("ld1r {%0.8h}, %1" - : "=w"(result) - : "Utv"(*a) - : /* No clobbers */); + __asm__ ("umlal2 %0.8h,%2.16b,%3.16b" + : "=w"(result) + : "0"(a), "w"(b), "w"(c) + : /* No clobbers */); return result; } __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -vld1q_dup_u32 (const uint32_t * a) +vmlal_high_u16 (uint32x4_t a, uint16x8_t b, uint16x8_t c) { uint32x4_t result; - __asm__ ("ld1r {%0.4s}, %1" - : "=w"(result) - : "Utv"(*a) - : /* No clobbers */); + __asm__ ("umlal2 %0.4s,%2.8h,%3.8h" + : "=w"(result) + : "0"(a), "w"(b), "w"(c) + : /* No clobbers */); return result; } __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) -vld1q_dup_u64 (const uint64_t * a) +vmlal_high_u32 (uint64x2_t a, uint32x4_t b, uint32x4_t c) { uint64x2_t result; - __asm__ ("ld1r {%0.2d}, %1" - : "=w"(result) - : "Utv"(*a) - : /* No clobbers */); + __asm__ ("umlal2 %0.2d,%2.4s,%3.4s" + : "=w"(result) + : "0"(a), "w"(b), "w"(c) + : /* No clobbers */); return result; } -#define vld1q_lane_f32(a, b, c) \ +#define vmlal_lane_s16(a, b, c, d) \ __extension__ \ ({ \ - float32x4_t b_ = (b); \ - const float32_t * a_ = (a); \ - float32x4_t result; \ - __asm__ ("ld1 {%0.s}[%1], %2" \ + int16x4_t c_ = (c); \ + int16x4_t b_ = (b); \ + int32x4_t a_ = (a); \ + int32x4_t result; \ + __asm__ ("smlal %0.4s,%2.4h,%3.h[%4]" \ : "=w"(result) \ - : "i"(c), "Utv"(*a_), "0"(b_) \ + : "0"(a_), "w"(b_), "x"(c_), "i"(d) \ : /* No clobbers */); \ result; \ }) -#define vld1q_lane_f64(a, b, c) \ +#define vmlal_lane_s32(a, b, c, d) \ __extension__ \ ({ \ - float64x2_t b_ = (b); \ - const float64_t * a_ = (a); \ - float64x2_t result; \ - __asm__ ("ld1 {%0.d}[%1], %2" \ + int32x2_t c_ = (c); \ + int32x2_t b_ = (b); \ + int64x2_t a_ = (a); \ + int64x2_t result; \ + __asm__ ("smlal %0.2d,%2.2s,%3.s[%4]" \ : "=w"(result) \ - : "i"(c), "Utv"(*a_), "0"(b_) \ + : "0"(a_), "w"(b_), "w"(c_), "i"(d) \ : /* No clobbers */); \ result; \ }) -#define vld1q_lane_p8(a, b, c) \ +#define vmlal_lane_u16(a, b, c, d) \ __extension__ \ ({ \ - poly8x16_t b_ = (b); \ - const poly8_t * a_ = (a); \ - poly8x16_t result; \ - __asm__ ("ld1 {%0.b}[%1], %2" \ + uint16x4_t c_ = (c); \ + uint16x4_t b_ = (b); \ + uint32x4_t a_ = (a); \ + uint32x4_t result; \ + __asm__ ("umlal %0.4s,%2.4h,%3.h[%4]" \ : "=w"(result) \ - : "i"(c), "Utv"(*a_), "0"(b_) \ + : "0"(a_), "w"(b_), "x"(c_), "i"(d) \ : /* No clobbers */); \ result; \ }) -#define vld1q_lane_p16(a, b, c) \ +#define vmlal_lane_u32(a, b, c, d) \ __extension__ \ ({ \ - poly16x8_t b_ = (b); \ - const poly16_t * a_ = (a); \ - poly16x8_t result; \ - __asm__ ("ld1 {%0.h}[%1], %2" \ + uint32x2_t c_ = (c); \ + uint32x2_t b_ = (b); \ + uint64x2_t a_ = (a); \ + uint64x2_t result; \ + __asm__ ("umlal %0.2d, %2.2s, %3.s[%4]" \ : "=w"(result) \ - : "i"(c), "Utv"(*a_), "0"(b_) \ + : "0"(a_), "w"(b_), "w"(c_), "i"(d) \ : /* No clobbers */); \ result; \ }) -#define vld1q_lane_s8(a, b, c) \ +#define vmlal_laneq_s16(a, b, c, d) \ __extension__ \ ({ \ - int8x16_t b_ = (b); \ - const int8_t * a_ = (a); \ - int8x16_t result; \ - __asm__ ("ld1 {%0.b}[%1], %2" \ - : "=w"(result) \ - : "i"(c), "Utv"(*a_), "0"(b_) \ + int16x8_t c_ = (c); \ + int16x4_t b_ = (b); \ + int32x4_t a_ = (a); \ + int32x4_t result; \ + __asm__ ("smlal %0.4s, %2.4h, %3.h[%4]" \ + : "=w"(result) \ + : "0"(a_), "w"(b_), "x"(c_), "i"(d) \ : /* No clobbers */); \ result; \ }) -#define vld1q_lane_s16(a, b, c) \ +#define vmlal_laneq_s32(a, b, c, d) \ __extension__ \ ({ \ - int16x8_t b_ = (b); \ - const int16_t * a_ = (a); \ - int16x8_t result; \ - __asm__ ("ld1 {%0.h}[%1], %2" \ + int32x4_t c_ = (c); \ + int32x2_t b_ = (b); \ + int64x2_t a_ = (a); \ + int64x2_t result; \ + __asm__ ("smlal %0.2d, %2.2s, %3.s[%4]" \ : "=w"(result) \ - : "i"(c), "Utv"(*a_), "0"(b_) \ + : "0"(a_), "w"(b_), "w"(c_), "i"(d) \ : /* No clobbers */); \ result; \ }) -#define vld1q_lane_s32(a, b, c) \ +#define vmlal_laneq_u16(a, b, c, d) \ __extension__ \ ({ \ - int32x4_t b_ = (b); \ - const int32_t * a_ = (a); \ - int32x4_t result; \ - __asm__ ("ld1 {%0.s}[%1], %2" \ + uint16x8_t c_ = (c); \ + uint16x4_t b_ = (b); \ + uint32x4_t a_ = (a); \ + uint32x4_t result; \ + __asm__ ("umlal %0.4s, %2.4h, %3.h[%4]" \ : "=w"(result) \ - : "i"(c), "Utv"(*a_), "0"(b_) \ + : "0"(a_), "w"(b_), "x"(c_), "i"(d) \ : /* No clobbers */); \ result; \ }) -#define vld1q_lane_s64(a, b, c) \ +#define vmlal_laneq_u32(a, b, c, d) \ __extension__ \ ({ \ - int64x2_t b_ = (b); \ - const int64_t * a_ = (a); \ - int64x2_t result; \ - __asm__ ("ld1 {%0.d}[%1], %2" \ + uint32x4_t c_ = (c); \ + uint32x2_t b_ = (b); \ + uint64x2_t a_ = (a); \ + uint64x2_t result; \ + __asm__ ("umlal %0.2d, %2.2s, %3.s[%4]" \ : "=w"(result) \ - : "i"(c), "Utv"(*a_), "0"(b_) \ + : "0"(a_), "w"(b_), "w"(c_), "i"(d) \ : /* No clobbers */); \ result; \ }) -#define vld1q_lane_u8(a, b, c) \ +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vmlal_n_s16 (int32x4_t a, int16x4_t b, int16_t c) +{ + int32x4_t result; + __asm__ ("smlal %0.4s,%2.4h,%3.h[0]" + : "=w"(result) + : "0"(a), "w"(b), "x"(c) + : /* No clobbers */); + return result; +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vmlal_n_s32 (int64x2_t a, int32x2_t b, int32_t c) +{ + int64x2_t result; + __asm__ ("smlal %0.2d,%2.2s,%3.s[0]" + : "=w"(result) + : "0"(a), "w"(b), "w"(c) + : /* No clobbers */); + return result; +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vmlal_n_u16 (uint32x4_t a, uint16x4_t b, uint16_t c) +{ + uint32x4_t result; + __asm__ ("umlal %0.4s,%2.4h,%3.h[0]" + : "=w"(result) + : "0"(a), "w"(b), "x"(c) + : /* No clobbers */); + return result; +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vmlal_n_u32 (uint64x2_t a, uint32x2_t b, uint32_t c) +{ + uint64x2_t result; + __asm__ ("umlal %0.2d,%2.2s,%3.s[0]" + : "=w"(result) + : "0"(a), "w"(b), "w"(c) + : /* No clobbers */); + return result; +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vmlal_s8 (int16x8_t a, int8x8_t b, int8x8_t c) +{ + int16x8_t result; + __asm__ ("smlal %0.8h,%2.8b,%3.8b" + : "=w"(result) + : "0"(a), "w"(b), "w"(c) + : /* No clobbers */); + return result; +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vmlal_s16 (int32x4_t a, int16x4_t b, int16x4_t c) +{ + int32x4_t result; + __asm__ ("smlal %0.4s,%2.4h,%3.4h" + : "=w"(result) + : "0"(a), "w"(b), "w"(c) + : /* No clobbers */); + return result; +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vmlal_s32 (int64x2_t a, int32x2_t b, int32x2_t c) +{ + int64x2_t result; + __asm__ ("smlal %0.2d,%2.2s,%3.2s" + : "=w"(result) + : "0"(a), "w"(b), "w"(c) + : /* No clobbers */); + return result; +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vmlal_u8 (uint16x8_t a, uint8x8_t b, uint8x8_t c) +{ + uint16x8_t result; + __asm__ ("umlal %0.8h,%2.8b,%3.8b" + : "=w"(result) + : "0"(a), "w"(b), "w"(c) + : /* No clobbers */); + return result; +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vmlal_u16 (uint32x4_t a, uint16x4_t b, uint16x4_t c) +{ + uint32x4_t result; + __asm__ ("umlal %0.4s,%2.4h,%3.4h" + : "=w"(result) + : "0"(a), "w"(b), "w"(c) + : /* No clobbers */); + return result; +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vmlal_u32 (uint64x2_t a, uint32x2_t b, uint32x2_t c) +{ + uint64x2_t result; + __asm__ ("umlal %0.2d,%2.2s,%3.2s" + : "=w"(result) + : "0"(a), "w"(b), "w"(c) + : /* No clobbers */); + return result; +} + +#define vmlaq_lane_f32(a, b, c, d) \ __extension__ \ ({ \ - uint8x16_t b_ = (b); \ - const uint8_t * a_ = (a); \ - uint8x16_t result; \ - __asm__ ("ld1 {%0.b}[%1], %2" \ - : "=w"(result) \ - : "i"(c), "Utv"(*a_), "0"(b_) \ + float32x4_t c_ = (c); \ + float32x4_t b_ = (b); \ + float32x4_t a_ = (a); \ + float32x4_t result; \ + float32x4_t t1; \ + __asm__ ("fmul %1.4s, %3.4s, %4.s[%5]; fadd %0.4s, %0.4s, %1.4s" \ + : "=w"(result), "=w"(t1) \ + : "0"(a_), "w"(b_), "w"(c_), "i"(d) \ : /* No clobbers */); \ result; \ }) -#define vld1q_lane_u16(a, b, c) \ +#define vmlaq_lane_s16(a, b, c, d) \ __extension__ \ ({ \ - uint16x8_t b_ = (b); \ - const uint16_t * a_ = (a); \ - uint16x8_t result; \ - __asm__ ("ld1 {%0.h}[%1], %2" \ + int16x8_t c_ = (c); \ + int16x8_t b_ = (b); \ + int16x8_t a_ = (a); \ + int16x8_t result; \ + __asm__ ("mla %0.8h, %2.8h, %3.h[%4]" \ : "=w"(result) \ - : "i"(c), "Utv"(*a_), "0"(b_) \ + : "0"(a_), "w"(b_), "x"(c_), "i"(d) \ : /* No clobbers */); \ result; \ }) -#define vld1q_lane_u32(a, b, c) \ +#define vmlaq_lane_s32(a, b, c, d) \ __extension__ \ ({ \ - uint32x4_t b_ = (b); \ - const uint32_t * a_ = (a); \ - uint32x4_t result; \ - __asm__ ("ld1 {%0.s}[%1], %2" \ + int32x4_t c_ = (c); \ + int32x4_t b_ = (b); \ + int32x4_t a_ = (a); \ + int32x4_t result; \ + __asm__ ("mla %0.4s, %2.4s, %3.s[%4]" \ : "=w"(result) \ - : "i"(c), "Utv"(*a_), "0"(b_) \ + : "0"(a_), "w"(b_), "w"(c_), "i"(d) \ : /* No clobbers */); \ result; \ }) -#define vld1q_lane_u64(a, b, c) \ +#define vmlaq_lane_u16(a, b, c, d) \ __extension__ \ ({ \ - uint64x2_t b_ = (b); \ - const uint64_t * a_ = (a); \ - uint64x2_t result; \ - __asm__ ("ld1 {%0.d}[%1], %2" \ + uint16x8_t c_ = (c); \ + uint16x8_t b_ = (b); \ + uint16x8_t a_ = (a); \ + uint16x8_t result; \ + __asm__ ("mla %0.8h, %2.8h, %3.h[%4]" \ : "=w"(result) \ - : "i"(c), "Utv"(*a_), "0"(b_) \ + : "0"(a_), "w"(b_), "x"(c_), "i"(d) \ : /* No clobbers */); \ result; \ }) -#define vmla_lane_f32(a, b, c, d) \ +#define vmlaq_lane_u32(a, b, c, d) \ __extension__ \ ({ \ - float32x2_t c_ = (c); \ - float32x2_t b_ = (b); \ - float32x2_t a_ = (a); \ - float32x2_t result; \ - float32x2_t t1; \ - __asm__ ("fmul %1.2s, %3.2s, %4.s[%5]; fadd %0.2s, %0.2s, %1.2s" \ - : "=w"(result), "=w"(t1) \ + uint32x4_t c_ = (c); \ + uint32x4_t b_ = (b); \ + uint32x4_t a_ = (a); \ + uint32x4_t result; \ + __asm__ ("mla %0.4s, %2.4s, %3.s[%4]" \ + : "=w"(result) \ : "0"(a_), "w"(b_), "w"(c_), "i"(d) \ : /* No clobbers */); \ result; \ }) -#define vmla_lane_s16(a, b, c, d) \ +#define vmlaq_laneq_s16(a, b, c, d) \ __extension__ \ ({ \ - int16x4_t c_ = (c); \ - int16x4_t b_ = (b); \ - int16x4_t a_ = (a); \ - int16x4_t result; \ - __asm__ ("mla %0.4h, %2.4h, %3.h[%4]" \ + int16x8_t c_ = (c); \ + int16x8_t b_ = (b); \ + int16x8_t a_ = (a); \ + int16x8_t result; \ + __asm__ ("mla %0.8h, %2.8h, %3.h[%4]" \ : "=w"(result) \ - : "0"(a_), "w"(b_), "w"(c_), "i"(d) \ + : "0"(a_), "w"(b_), "x"(c_), "i"(d) \ : /* No clobbers */); \ result; \ }) -#define vmla_lane_s32(a, b, c, d) \ +#define vmlaq_laneq_s32(a, b, c, d) \ __extension__ \ ({ \ - int32x2_t c_ = (c); \ - int32x2_t b_ = (b); \ - int32x2_t a_ = (a); \ - int32x2_t result; \ - __asm__ ("mla %0.2s, %2.2s, %3.s[%4]" \ + int32x4_t c_ = (c); \ + int32x4_t b_ = (b); \ + int32x4_t a_ = (a); \ + int32x4_t result; \ + __asm__ ("mla %0.4s, %2.4s, %3.s[%4]" \ : "=w"(result) \ : "0"(a_), "w"(b_), "w"(c_), "i"(d) \ : /* No clobbers */); \ result; \ }) -#define vmla_lane_u16(a, b, c, d) \ +#define vmlaq_laneq_u16(a, b, c, d) \ __extension__ \ ({ \ - uint16x4_t c_ = (c); \ - uint16x4_t b_ = (b); \ - uint16x4_t a_ = (a); \ - uint16x4_t result; \ - __asm__ ("mla %0.4h, %2.4h, %3.h[%4]" \ - : "=w"(result) \ - : "0"(a_), "w"(b_), "w"(c_), "i"(d) \ - : /* No clobbers */); \ - result; \ + uint16x8_t c_ = (c); \ + uint16x8_t b_ = (b); \ + uint16x8_t a_ = (a); \ + uint16x8_t result; \ + __asm__ ("mla %0.8h, %2.8h, %3.h[%4]" \ + : "=w"(result) \ + : "0"(a_), "w"(b_), "x"(c_), "i"(d) \ + : /* No clobbers */); \ + result; \ }) -#define vmla_lane_u32(a, b, c, d) \ +#define vmlaq_laneq_u32(a, b, c, d) \ __extension__ \ ({ \ - uint32x2_t c_ = (c); \ - uint32x2_t b_ = (b); \ - uint32x2_t a_ = (a); \ - uint32x2_t result; \ - __asm__ ("mla %0.2s, %2.2s, %3.s[%4]" \ + uint32x4_t c_ = (c); \ + uint32x4_t b_ = (b); \ + uint32x4_t a_ = (a); \ + uint32x4_t result; \ + __asm__ ("mla %0.4s, %2.4s, %3.s[%4]" \ : "=w"(result) \ : "0"(a_), "w"(b_), "w"(c_), "i"(d) \ : /* No clobbers */); \ result; \ }) -#define vmla_laneq_s16(a, b, c, d) \ +__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +vmlaq_n_f32 (float32x4_t a, float32x4_t b, float32_t c) +{ + float32x4_t result; + float32x4_t t1; + __asm__ ("fmul %1.4s, %3.4s, %4.s[0]; fadd %0.4s, %0.4s, %1.4s" + : "=w"(result), "=w"(t1) + : "0"(a), "w"(b), "w"(c) + : /* No clobbers */); + return result; +} + +__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) +vmlaq_n_f64 (float64x2_t a, float64x2_t b, float64_t c) +{ + float64x2_t result; + float64x2_t t1; + __asm__ ("fmul %1.2d, %3.2d, %4.d[0]; fadd %0.2d, %0.2d, %1.2d" + : "=w"(result), "=w"(t1) + : "0"(a), "w"(b), "w"(c) + : /* No clobbers */); + return result; +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vmlaq_n_s16 (int16x8_t a, int16x8_t b, int16_t c) +{ + int16x8_t result; + __asm__ ("mla %0.8h,%2.8h,%3.h[0]" + : "=w"(result) + : "0"(a), "w"(b), "x"(c) + : /* No clobbers */); + return result; +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vmlaq_n_s32 (int32x4_t a, int32x4_t b, int32_t c) +{ + int32x4_t result; + __asm__ ("mla %0.4s,%2.4s,%3.s[0]" + : "=w"(result) + : "0"(a), "w"(b), "w"(c) + : /* No clobbers */); + return result; +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vmlaq_n_u16 (uint16x8_t a, uint16x8_t b, uint16_t c) +{ + uint16x8_t result; + __asm__ ("mla %0.8h,%2.8h,%3.h[0]" + : "=w"(result) + : "0"(a), "w"(b), "x"(c) + : /* No clobbers */); + return result; +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vmlaq_n_u32 (uint32x4_t a, uint32x4_t b, uint32_t c) +{ + uint32x4_t result; + __asm__ ("mla %0.4s,%2.4s,%3.s[0]" + : "=w"(result) + : "0"(a), "w"(b), "w"(c) + : /* No clobbers */); + return result; +} + +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vmlaq_s8 (int8x16_t a, int8x16_t b, int8x16_t c) +{ + int8x16_t result; + __asm__ ("mla %0.16b, %2.16b, %3.16b" + : "=w"(result) + : "0"(a), "w"(b), "w"(c) + : /* No clobbers */); + return result; +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vmlaq_s16 (int16x8_t a, int16x8_t b, int16x8_t c) +{ + int16x8_t result; + __asm__ ("mla %0.8h, %2.8h, %3.8h" + : "=w"(result) + : "0"(a), "w"(b), "w"(c) + : /* No clobbers */); + return result; +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vmlaq_s32 (int32x4_t a, int32x4_t b, int32x4_t c) +{ + int32x4_t result; + __asm__ ("mla %0.4s, %2.4s, %3.4s" + : "=w"(result) + : "0"(a), "w"(b), "w"(c) + : /* No clobbers */); + return result; +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vmlaq_u8 (uint8x16_t a, uint8x16_t b, uint8x16_t c) +{ + uint8x16_t result; + __asm__ ("mla %0.16b, %2.16b, %3.16b" + : "=w"(result) + : "0"(a), "w"(b), "w"(c) + : /* No clobbers */); + return result; +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vmlaq_u16 (uint16x8_t a, uint16x8_t b, uint16x8_t c) +{ + uint16x8_t result; + __asm__ ("mla %0.8h, %2.8h, %3.8h" + : "=w"(result) + : "0"(a), "w"(b), "w"(c) + : /* No clobbers */); + return result; +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vmlaq_u32 (uint32x4_t a, uint32x4_t b, uint32x4_t c) +{ + uint32x4_t result; + __asm__ ("mla %0.4s, %2.4s, %3.4s" + : "=w"(result) + : "0"(a), "w"(b), "w"(c) + : /* No clobbers */); + return result; +} + +#define vmls_lane_f32(a, b, c, d) \ __extension__ \ ({ \ - int16x8_t c_ = (c); \ + float32x2_t c_ = (c); \ + float32x2_t b_ = (b); \ + float32x2_t a_ = (a); \ + float32x2_t result; \ + float32x2_t t1; \ + __asm__ ("fmul %1.2s, %3.2s, %4.s[%5]; fsub %0.2s, %0.2s, %1.2s" \ + : "=w"(result), "=w"(t1) \ + : "0"(a_), "w"(b_), "w"(c_), "i"(d) \ + : /* No clobbers */); \ + result; \ + }) + +#define vmls_lane_s16(a, b, c, d) \ + __extension__ \ + ({ \ + int16x4_t c_ = (c); \ int16x4_t b_ = (b); \ int16x4_t a_ = (a); \ int16x4_t result; \ - __asm__ ("mla %0.4h, %2.4h, %3.h[%4]" \ + __asm__ ("mls %0.4h,%2.4h,%3.h[%4]" \ : "=w"(result) \ - : "0"(a_), "w"(b_), "w"(c_), "i"(d) \ + : "0"(a_), "w"(b_), "x"(c_), "i"(d) \ : /* No clobbers */); \ result; \ }) -#define vmla_laneq_s32(a, b, c, d) \ +#define vmls_lane_s32(a, b, c, d) \ __extension__ \ ({ \ - int32x4_t c_ = (c); \ + int32x2_t c_ = (c); \ int32x2_t b_ = (b); \ int32x2_t a_ = (a); \ int32x2_t result; \ - __asm__ ("mla %0.2s, %2.2s, %3.s[%4]" \ + __asm__ ("mls %0.2s,%2.2s,%3.s[%4]" \ : "=w"(result) \ : "0"(a_), "w"(b_), "w"(c_), "i"(d) \ : /* No clobbers */); \ result; \ }) -#define vmla_laneq_u16(a, b, c, d) \ +#define vmls_lane_u16(a, b, c, d) \ __extension__ \ ({ \ - uint16x8_t c_ = (c); \ + uint16x4_t c_ = (c); \ uint16x4_t b_ = (b); \ uint16x4_t a_ = (a); \ uint16x4_t result; \ - __asm__ ("mla %0.4h, %2.4h, %3.h[%4]" \ + __asm__ ("mls %0.4h,%2.4h,%3.h[%4]" \ : "=w"(result) \ - : "0"(a_), "w"(b_), "w"(c_), "i"(d) \ + : "0"(a_), "w"(b_), "x"(c_), "i"(d) \ : /* No clobbers */); \ result; \ }) -#define vmla_laneq_u32(a, b, c, d) \ +#define vmls_lane_u32(a, b, c, d) \ __extension__ \ ({ \ - uint32x4_t c_ = (c); \ + uint32x2_t c_ = (c); \ uint32x2_t b_ = (b); \ uint32x2_t a_ = (a); \ uint32x2_t result; \ - __asm__ ("mla %0.2s, %2.2s, %3.s[%4]" \ + __asm__ ("mls %0.2s,%2.2s,%3.s[%4]" \ : "=w"(result) \ : "0"(a_), "w"(b_), "w"(c_), "i"(d) \ : /* No clobbers */); \ @@ -7704,11 +8148,11 @@ vld1q_dup_u64 (const uint64_t * a) }) __extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) -vmla_n_f32 (float32x2_t a, float32x2_t b, float32_t c) +vmls_n_f32 (float32x2_t a, float32x2_t b, float32_t c) { float32x2_t result; float32x2_t t1; - __asm__ ("fmul %1.2s, %3.2s, %4.s[0]; fadd %0.2s, %0.2s, %1.2s" + __asm__ ("fmul %1.2s, %3.2s, %4.s[0]; fsub %0.2s, %0.2s, %1.2s" : "=w"(result), "=w"(t1) : "0"(a), "w"(b), "w"(c) : /* No clobbers */); @@ -7716,21 +8160,21 @@ vmla_n_f32 (float32x2_t a, float32x2_t b, float32_t c) } __extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) -vmla_n_s16 (int16x4_t a, int16x4_t b, int16_t c) +vmls_n_s16 (int16x4_t a, int16x4_t b, int16_t c) { int16x4_t result; - __asm__ ("mla %0.4h,%2.4h,%3.h[0]" + __asm__ ("mls %0.4h, %2.4h, %3.h[0]" : "=w"(result) - : "0"(a), "w"(b), "w"(c) + : "0"(a), "w"(b), "x"(c) : /* No clobbers */); return result; } __extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) -vmla_n_s32 (int32x2_t a, int32x2_t b, int32_t c) +vmls_n_s32 (int32x2_t a, int32x2_t b, int32_t c) { int32x2_t result; - __asm__ ("mla %0.2s,%2.2s,%3.s[0]" + __asm__ ("mls %0.2s, %2.2s, %3.s[0]" : "=w"(result) : "0"(a), "w"(b), "w"(c) : /* No clobbers */); @@ -7738,21 +8182,21 @@ vmla_n_s32 (int32x2_t a, int32x2_t b, int32_t c) } __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) -vmla_n_u16 (uint16x4_t a, uint16x4_t b, uint16_t c) +vmls_n_u16 (uint16x4_t a, uint16x4_t b, uint16_t c) { uint16x4_t result; - __asm__ ("mla %0.4h,%2.4h,%3.h[0]" + __asm__ ("mls %0.4h, %2.4h, %3.h[0]" : "=w"(result) - : "0"(a), "w"(b), "w"(c) + : "0"(a), "w"(b), "x"(c) : /* No clobbers */); return result; } __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) -vmla_n_u32 (uint32x2_t a, uint32x2_t b, uint32_t c) +vmls_n_u32 (uint32x2_t a, uint32x2_t b, uint32_t c) { uint32x2_t result; - __asm__ ("mla %0.2s,%2.2s,%3.s[0]" + __asm__ ("mls %0.2s, %2.2s, %3.s[0]" : "=w"(result) : "0"(a), "w"(b), "w"(c) : /* No clobbers */); @@ -7760,10 +8204,10 @@ vmla_n_u32 (uint32x2_t a, uint32x2_t b, uint32_t c) } __extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) -vmla_s8 (int8x8_t a, int8x8_t b, int8x8_t c) +vmls_s8 (int8x8_t a, int8x8_t b, int8x8_t c) { int8x8_t result; - __asm__ ("mla %0.8b, %2.8b, %3.8b" + __asm__ ("mls %0.8b,%2.8b,%3.8b" : "=w"(result) : "0"(a), "w"(b), "w"(c) : /* No clobbers */); @@ -7771,10 +8215,10 @@ vmla_s8 (int8x8_t a, int8x8_t b, int8x8_t c) } __extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) -vmla_s16 (int16x4_t a, int16x4_t b, int16x4_t c) +vmls_s16 (int16x4_t a, int16x4_t b, int16x4_t c) { int16x4_t result; - __asm__ ("mla %0.4h, %2.4h, %3.4h" + __asm__ ("mls %0.4h,%2.4h,%3.4h" : "=w"(result) : "0"(a), "w"(b), "w"(c) : /* No clobbers */); @@ -7782,10 +8226,10 @@ vmla_s16 (int16x4_t a, int16x4_t b, int16x4_t c) } __extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) -vmla_s32 (int32x2_t a, int32x2_t b, int32x2_t c) +vmls_s32 (int32x2_t a, int32x2_t b, int32x2_t c) { int32x2_t result; - __asm__ ("mla %0.2s, %2.2s, %3.2s" + __asm__ ("mls %0.2s,%2.2s,%3.2s" : "=w"(result) : "0"(a), "w"(b), "w"(c) : /* No clobbers */); @@ -7793,10 +8237,10 @@ vmla_s32 (int32x2_t a, int32x2_t b, int32x2_t c) } __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) -vmla_u8 (uint8x8_t a, uint8x8_t b, uint8x8_t c) +vmls_u8 (uint8x8_t a, uint8x8_t b, uint8x8_t c) { uint8x8_t result; - __asm__ ("mla %0.8b, %2.8b, %3.8b" + __asm__ ("mls %0.8b,%2.8b,%3.8b" : "=w"(result) : "0"(a), "w"(b), "w"(c) : /* No clobbers */); @@ -7804,10 +8248,10 @@ vmla_u8 (uint8x8_t a, uint8x8_t b, uint8x8_t c) } __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) -vmla_u16 (uint16x4_t a, uint16x4_t b, uint16x4_t c) +vmls_u16 (uint16x4_t a, uint16x4_t b, uint16x4_t c) { uint16x4_t result; - __asm__ ("mla %0.4h, %2.4h, %3.4h" + __asm__ ("mls %0.4h,%2.4h,%3.4h" : "=w"(result) : "0"(a), "w"(b), "w"(c) : /* No clobbers */); @@ -7815,122 +8259,122 @@ vmla_u16 (uint16x4_t a, uint16x4_t b, uint16x4_t c) } __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) -vmla_u32 (uint32x2_t a, uint32x2_t b, uint32x2_t c) +vmls_u32 (uint32x2_t a, uint32x2_t b, uint32x2_t c) { uint32x2_t result; - __asm__ ("mla %0.2s, %2.2s, %3.2s" + __asm__ ("mls %0.2s,%2.2s,%3.2s" : "=w"(result) : "0"(a), "w"(b), "w"(c) : /* No clobbers */); return result; } -#define vmlal_high_lane_s16(a, b, c, d) \ +#define vmlsl_high_lane_s16(a, b, c, d) \ __extension__ \ ({ \ int16x8_t c_ = (c); \ int16x8_t b_ = (b); \ int32x4_t a_ = (a); \ int32x4_t result; \ - __asm__ ("smlal2 %0.4s, %2.8h, %3.h[%4]" \ + __asm__ ("smlsl2 %0.4s, %2.8h, %3.h[%4]" \ : "=w"(result) \ - : "0"(a_), "w"(b_), "w"(c_), "i"(d) \ + : "0"(a_), "w"(b_), "x"(c_), "i"(d) \ : /* No clobbers */); \ result; \ }) -#define vmlal_high_lane_s32(a, b, c, d) \ +#define vmlsl_high_lane_s32(a, b, c, d) \ __extension__ \ ({ \ int32x4_t c_ = (c); \ int32x4_t b_ = (b); \ int64x2_t a_ = (a); \ int64x2_t result; \ - __asm__ ("smlal2 %0.2d, %2.4s, %3.s[%4]" \ + __asm__ ("smlsl2 %0.2d, %2.4s, %3.s[%4]" \ : "=w"(result) \ : "0"(a_), "w"(b_), "w"(c_), "i"(d) \ : /* No clobbers */); \ result; \ }) -#define vmlal_high_lane_u16(a, b, c, d) \ +#define vmlsl_high_lane_u16(a, b, c, d) \ __extension__ \ ({ \ uint16x8_t c_ = (c); \ uint16x8_t b_ = (b); \ uint32x4_t a_ = (a); \ uint32x4_t result; \ - __asm__ ("umlal2 %0.4s, %2.8h, %3.h[%4]" \ + __asm__ ("umlsl2 %0.4s, %2.8h, %3.h[%4]" \ : "=w"(result) \ - : "0"(a_), "w"(b_), "w"(c_), "i"(d) \ + : "0"(a_), "w"(b_), "x"(c_), "i"(d) \ : /* No clobbers */); \ result; \ }) -#define vmlal_high_lane_u32(a, b, c, d) \ +#define vmlsl_high_lane_u32(a, b, c, d) \ __extension__ \ ({ \ uint32x4_t c_ = (c); \ uint32x4_t b_ = (b); \ uint64x2_t a_ = (a); \ uint64x2_t result; \ - __asm__ ("umlal2 %0.2d, %2.4s, %3.s[%4]" \ + __asm__ ("umlsl2 %0.2d, %2.4s, %3.s[%4]" \ : "=w"(result) \ : "0"(a_), "w"(b_), "w"(c_), "i"(d) \ : /* No clobbers */); \ result; \ }) -#define vmlal_high_laneq_s16(a, b, c, d) \ +#define vmlsl_high_laneq_s16(a, b, c, d) \ __extension__ \ ({ \ int16x8_t c_ = (c); \ int16x8_t b_ = (b); \ int32x4_t a_ = (a); \ int32x4_t result; \ - __asm__ ("smlal2 %0.4s, %2.8h, %3.h[%4]" \ + __asm__ ("smlsl2 %0.4s, %2.8h, %3.h[%4]" \ : "=w"(result) \ - : "0"(a_), "w"(b_), "w"(c_), "i"(d) \ + : "0"(a_), "w"(b_), "x"(c_), "i"(d) \ : /* No clobbers */); \ result; \ }) -#define vmlal_high_laneq_s32(a, b, c, d) \ +#define vmlsl_high_laneq_s32(a, b, c, d) \ __extension__ \ ({ \ int32x4_t c_ = (c); \ int32x4_t b_ = (b); \ int64x2_t a_ = (a); \ int64x2_t result; \ - __asm__ ("smlal2 %0.2d, %2.4s, %3.s[%4]" \ + __asm__ ("smlsl2 %0.2d, %2.4s, %3.s[%4]" \ : "=w"(result) \ : "0"(a_), "w"(b_), "w"(c_), "i"(d) \ : /* No clobbers */); \ result; \ }) -#define vmlal_high_laneq_u16(a, b, c, d) \ +#define vmlsl_high_laneq_u16(a, b, c, d) \ __extension__ \ ({ \ uint16x8_t c_ = (c); \ uint16x8_t b_ = (b); \ uint32x4_t a_ = (a); \ uint32x4_t result; \ - __asm__ ("umlal2 %0.4s, %2.8h, %3.h[%4]" \ + __asm__ ("umlsl2 %0.4s, %2.8h, %3.h[%4]" \ : "=w"(result) \ - : "0"(a_), "w"(b_), "w"(c_), "i"(d) \ + : "0"(a_), "w"(b_), "x"(c_), "i"(d) \ : /* No clobbers */); \ result; \ }) -#define vmlal_high_laneq_u32(a, b, c, d) \ +#define vmlsl_high_laneq_u32(a, b, c, d) \ __extension__ \ ({ \ uint32x4_t c_ = (c); \ uint32x4_t b_ = (b); \ uint64x2_t a_ = (a); \ uint64x2_t result; \ - __asm__ ("umlal2 %0.2d, %2.4s, %3.s[%4]" \ + __asm__ ("umlsl2 %0.2d, %2.4s, %3.s[%4]" \ : "=w"(result) \ : "0"(a_), "w"(b_), "w"(c_), "i"(d) \ : /* No clobbers */); \ @@ -7938,21 +8382,21 @@ vmla_u32 (uint32x2_t a, uint32x2_t b, uint32x2_t c) }) __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) -vmlal_high_n_s16 (int32x4_t a, int16x8_t b, int16_t c) +vmlsl_high_n_s16 (int32x4_t a, int16x8_t b, int16_t c) { int32x4_t result; - __asm__ ("smlal2 %0.4s,%2.8h,%3.h[0]" + __asm__ ("smlsl2 %0.4s, %2.8h, %3.h[0]" : "=w"(result) - : "0"(a), "w"(b), "w"(c) + : "0"(a), "w"(b), "x"(c) : /* No clobbers */); return result; } __extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) -vmlal_high_n_s32 (int64x2_t a, int32x4_t b, int32_t c) +vmlsl_high_n_s32 (int64x2_t a, int32x4_t b, int32_t c) { int64x2_t result; - __asm__ ("smlal2 %0.2d,%2.4s,%3.s[0]" + __asm__ ("smlsl2 %0.2d, %2.4s, %3.s[0]" : "=w"(result) : "0"(a), "w"(b), "w"(c) : /* No clobbers */); @@ -7960,21 +8404,21 @@ vmlal_high_n_s32 (int64x2_t a, int32x4_t b, int32_t c) } __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -vmlal_high_n_u16 (uint32x4_t a, uint16x8_t b, uint16_t c) +vmlsl_high_n_u16 (uint32x4_t a, uint16x8_t b, uint16_t c) { uint32x4_t result; - __asm__ ("umlal2 %0.4s,%2.8h,%3.h[0]" + __asm__ ("umlsl2 %0.4s, %2.8h, %3.h[0]" : "=w"(result) - : "0"(a), "w"(b), "w"(c) + : "0"(a), "w"(b), "x"(c) : /* No clobbers */); return result; } __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) -vmlal_high_n_u32 (uint64x2_t a, uint32x4_t b, uint32_t c) +vmlsl_high_n_u32 (uint64x2_t a, uint32x4_t b, uint32_t c) { uint64x2_t result; - __asm__ ("umlal2 %0.2d,%2.4s,%3.s[0]" + __asm__ ("umlsl2 %0.2d, %2.4s, %3.s[0]" : "=w"(result) : "0"(a), "w"(b), "w"(c) : /* No clobbers */); @@ -7982,10 +8426,10 @@ vmlal_high_n_u32 (uint64x2_t a, uint32x4_t b, uint32_t c) } __extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) -vmlal_high_s8 (int16x8_t a, int8x16_t b, int8x16_t c) +vmlsl_high_s8 (int16x8_t a, int8x16_t b, int8x16_t c) { int16x8_t result; - __asm__ ("smlal2 %0.8h,%2.16b,%3.16b" + __asm__ ("smlsl2 %0.8h,%2.16b,%3.16b" : "=w"(result) : "0"(a), "w"(b), "w"(c) : /* No clobbers */); @@ -7993,10 +8437,10 @@ vmlal_high_s8 (int16x8_t a, int8x16_t b, int8x16_t c) } __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) -vmlal_high_s16 (int32x4_t a, int16x8_t b, int16x8_t c) +vmlsl_high_s16 (int32x4_t a, int16x8_t b, int16x8_t c) { int32x4_t result; - __asm__ ("smlal2 %0.4s,%2.8h,%3.8h" + __asm__ ("smlsl2 %0.4s,%2.8h,%3.8h" : "=w"(result) : "0"(a), "w"(b), "w"(c) : /* No clobbers */); @@ -8004,10 +8448,10 @@ vmlal_high_s16 (int32x4_t a, int16x8_t b, int16x8_t c) } __extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) -vmlal_high_s32 (int64x2_t a, int32x4_t b, int32x4_t c) +vmlsl_high_s32 (int64x2_t a, int32x4_t b, int32x4_t c) { int64x2_t result; - __asm__ ("smlal2 %0.2d,%2.4s,%3.4s" + __asm__ ("smlsl2 %0.2d,%2.4s,%3.4s" : "=w"(result) : "0"(a), "w"(b), "w"(c) : /* No clobbers */); @@ -8015,10 +8459,10 @@ vmlal_high_s32 (int64x2_t a, int32x4_t b, int32x4_t c) } __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) -vmlal_high_u8 (uint16x8_t a, uint8x16_t b, uint8x16_t c) +vmlsl_high_u8 (uint16x8_t a, uint8x16_t b, uint8x16_t c) { uint16x8_t result; - __asm__ ("umlal2 %0.8h,%2.16b,%3.16b" + __asm__ ("umlsl2 %0.8h,%2.16b,%3.16b" : "=w"(result) : "0"(a), "w"(b), "w"(c) : /* No clobbers */); @@ -8026,10 +8470,10 @@ vmlal_high_u8 (uint16x8_t a, uint8x16_t b, uint8x16_t c) } __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -vmlal_high_u16 (uint32x4_t a, uint16x8_t b, uint16x8_t c) +vmlsl_high_u16 (uint32x4_t a, uint16x8_t b, uint16x8_t c) { uint32x4_t result; - __asm__ ("umlal2 %0.4s,%2.8h,%3.8h" + __asm__ ("umlsl2 %0.4s,%2.8h,%3.8h" : "=w"(result) : "0"(a), "w"(b), "w"(c) : /* No clobbers */); @@ -8037,122 +8481,122 @@ vmlal_high_u16 (uint32x4_t a, uint16x8_t b, uint16x8_t c) } __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) -vmlal_high_u32 (uint64x2_t a, uint32x4_t b, uint32x4_t c) +vmlsl_high_u32 (uint64x2_t a, uint32x4_t b, uint32x4_t c) { uint64x2_t result; - __asm__ ("umlal2 %0.2d,%2.4s,%3.4s" + __asm__ ("umlsl2 %0.2d,%2.4s,%3.4s" : "=w"(result) : "0"(a), "w"(b), "w"(c) : /* No clobbers */); return result; } -#define vmlal_lane_s16(a, b, c, d) \ +#define vmlsl_lane_s16(a, b, c, d) \ __extension__ \ ({ \ int16x4_t c_ = (c); \ int16x4_t b_ = (b); \ int32x4_t a_ = (a); \ int32x4_t result; \ - __asm__ ("smlal %0.4s,%2.4h,%3.h[%4]" \ + __asm__ ("smlsl %0.4s, %2.4h, %3.h[%4]" \ : "=w"(result) \ - : "0"(a_), "w"(b_), "w"(c_), "i"(d) \ + : "0"(a_), "w"(b_), "x"(c_), "i"(d) \ : /* No clobbers */); \ result; \ }) -#define vmlal_lane_s32(a, b, c, d) \ +#define vmlsl_lane_s32(a, b, c, d) \ __extension__ \ ({ \ int32x2_t c_ = (c); \ int32x2_t b_ = (b); \ int64x2_t a_ = (a); \ int64x2_t result; \ - __asm__ ("smlal %0.2d,%2.2s,%3.s[%4]" \ + __asm__ ("smlsl %0.2d, %2.2s, %3.s[%4]" \ : "=w"(result) \ : "0"(a_), "w"(b_), "w"(c_), "i"(d) \ : /* No clobbers */); \ result; \ }) -#define vmlal_lane_u16(a, b, c, d) \ +#define vmlsl_lane_u16(a, b, c, d) \ __extension__ \ ({ \ uint16x4_t c_ = (c); \ uint16x4_t b_ = (b); \ uint32x4_t a_ = (a); \ uint32x4_t result; \ - __asm__ ("umlal %0.4s,%2.4h,%3.h[%4]" \ + __asm__ ("umlsl %0.4s, %2.4h, %3.h[%4]" \ : "=w"(result) \ - : "0"(a_), "w"(b_), "w"(c_), "i"(d) \ + : "0"(a_), "w"(b_), "x"(c_), "i"(d) \ : /* No clobbers */); \ result; \ }) -#define vmlal_lane_u32(a, b, c, d) \ +#define vmlsl_lane_u32(a, b, c, d) \ __extension__ \ ({ \ uint32x2_t c_ = (c); \ uint32x2_t b_ = (b); \ uint64x2_t a_ = (a); \ uint64x2_t result; \ - __asm__ ("umlal %0.2d, %2.2s, %3.s[%4]" \ + __asm__ ("umlsl %0.2d, %2.2s, %3.s[%4]" \ : "=w"(result) \ : "0"(a_), "w"(b_), "w"(c_), "i"(d) \ : /* No clobbers */); \ result; \ }) -#define vmlal_laneq_s16(a, b, c, d) \ +#define vmlsl_laneq_s16(a, b, c, d) \ __extension__ \ ({ \ int16x8_t c_ = (c); \ int16x4_t b_ = (b); \ int32x4_t a_ = (a); \ int32x4_t result; \ - __asm__ ("smlal %0.4s, %2.4h, %3.h[%4]" \ + __asm__ ("smlsl %0.4s, %2.4h, %3.h[%4]" \ : "=w"(result) \ - : "0"(a_), "w"(b_), "w"(c_), "i"(d) \ + : "0"(a_), "w"(b_), "x"(c_), "i"(d) \ : /* No clobbers */); \ result; \ }) -#define vmlal_laneq_s32(a, b, c, d) \ +#define vmlsl_laneq_s32(a, b, c, d) \ __extension__ \ ({ \ int32x4_t c_ = (c); \ int32x2_t b_ = (b); \ int64x2_t a_ = (a); \ int64x2_t result; \ - __asm__ ("smlal %0.2d, %2.2s, %3.s[%4]" \ + __asm__ ("smlsl %0.2d, %2.2s, %3.s[%4]" \ : "=w"(result) \ : "0"(a_), "w"(b_), "w"(c_), "i"(d) \ : /* No clobbers */); \ result; \ }) -#define vmlal_laneq_u16(a, b, c, d) \ +#define vmlsl_laneq_u16(a, b, c, d) \ __extension__ \ ({ \ uint16x8_t c_ = (c); \ uint16x4_t b_ = (b); \ uint32x4_t a_ = (a); \ uint32x4_t result; \ - __asm__ ("umlal %0.4s, %2.4h, %3.h[%4]" \ + __asm__ ("umlsl %0.4s, %2.4h, %3.h[%4]" \ : "=w"(result) \ - : "0"(a_), "w"(b_), "w"(c_), "i"(d) \ + : "0"(a_), "w"(b_), "x"(c_), "i"(d) \ : /* No clobbers */); \ result; \ }) -#define vmlal_laneq_u32(a, b, c, d) \ +#define vmlsl_laneq_u32(a, b, c, d) \ __extension__ \ ({ \ uint32x4_t c_ = (c); \ uint32x2_t b_ = (b); \ uint64x2_t a_ = (a); \ uint64x2_t result; \ - __asm__ ("umlal %0.2d, %2.2s, %3.s[%4]" \ + __asm__ ("umlsl %0.2d, %2.2s, %3.s[%4]" \ : "=w"(result) \ : "0"(a_), "w"(b_), "w"(c_), "i"(d) \ : /* No clobbers */); \ @@ -8160,21 +8604,21 @@ vmlal_high_u32 (uint64x2_t a, uint32x4_t b, uint32x4_t c) }) __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) -vmlal_n_s16 (int32x4_t a, int16x4_t b, int16_t c) +vmlsl_n_s16 (int32x4_t a, int16x4_t b, int16_t c) { int32x4_t result; - __asm__ ("smlal %0.4s,%2.4h,%3.h[0]" + __asm__ ("smlsl %0.4s, %2.4h, %3.h[0]" : "=w"(result) - : "0"(a), "w"(b), "w"(c) + : "0"(a), "w"(b), "x"(c) : /* No clobbers */); return result; } __extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) -vmlal_n_s32 (int64x2_t a, int32x2_t b, int32_t c) +vmlsl_n_s32 (int64x2_t a, int32x2_t b, int32_t c) { int64x2_t result; - __asm__ ("smlal %0.2d,%2.2s,%3.s[0]" + __asm__ ("smlsl %0.2d, %2.2s, %3.s[0]" : "=w"(result) : "0"(a), "w"(b), "w"(c) : /* No clobbers */); @@ -8182,21 +8626,21 @@ vmlal_n_s32 (int64x2_t a, int32x2_t b, int32_t c) } __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -vmlal_n_u16 (uint32x4_t a, uint16x4_t b, uint16_t c) +vmlsl_n_u16 (uint32x4_t a, uint16x4_t b, uint16_t c) { uint32x4_t result; - __asm__ ("umlal %0.4s,%2.4h,%3.h[0]" + __asm__ ("umlsl %0.4s, %2.4h, %3.h[0]" : "=w"(result) - : "0"(a), "w"(b), "w"(c) + : "0"(a), "w"(b), "x"(c) : /* No clobbers */); return result; } __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) -vmlal_n_u32 (uint64x2_t a, uint32x2_t b, uint32_t c) +vmlsl_n_u32 (uint64x2_t a, uint32x2_t b, uint32_t c) { uint64x2_t result; - __asm__ ("umlal %0.2d,%2.2s,%3.s[0]" + __asm__ ("umlsl %0.2d, %2.2s, %3.s[0]" : "=w"(result) : "0"(a), "w"(b), "w"(c) : /* No clobbers */); @@ -8204,10 +8648,10 @@ vmlal_n_u32 (uint64x2_t a, uint32x2_t b, uint32_t c) } __extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) -vmlal_s8 (int16x8_t a, int8x8_t b, int8x8_t c) +vmlsl_s8 (int16x8_t a, int8x8_t b, int8x8_t c) { int16x8_t result; - __asm__ ("smlal %0.8h,%2.8b,%3.8b" + __asm__ ("smlsl %0.8h, %2.8b, %3.8b" : "=w"(result) : "0"(a), "w"(b), "w"(c) : /* No clobbers */); @@ -8215,10 +8659,10 @@ vmlal_s8 (int16x8_t a, int8x8_t b, int8x8_t c) } __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) -vmlal_s16 (int32x4_t a, int16x4_t b, int16x4_t c) +vmlsl_s16 (int32x4_t a, int16x4_t b, int16x4_t c) { int32x4_t result; - __asm__ ("smlal %0.4s,%2.4h,%3.4h" + __asm__ ("smlsl %0.4s, %2.4h, %3.4h" : "=w"(result) : "0"(a), "w"(b), "w"(c) : /* No clobbers */); @@ -8226,10 +8670,10 @@ vmlal_s16 (int32x4_t a, int16x4_t b, int16x4_t c) } __extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) -vmlal_s32 (int64x2_t a, int32x2_t b, int32x2_t c) +vmlsl_s32 (int64x2_t a, int32x2_t b, int32x2_t c) { int64x2_t result; - __asm__ ("smlal %0.2d,%2.2s,%3.2s" + __asm__ ("smlsl %0.2d, %2.2s, %3.2s" : "=w"(result) : "0"(a), "w"(b), "w"(c) : /* No clobbers */); @@ -8237,10 +8681,10 @@ vmlal_s32 (int64x2_t a, int32x2_t b, int32x2_t c) } __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) -vmlal_u8 (uint16x8_t a, uint8x8_t b, uint8x8_t c) +vmlsl_u8 (uint16x8_t a, uint8x8_t b, uint8x8_t c) { uint16x8_t result; - __asm__ ("umlal %0.8h,%2.8b,%3.8b" + __asm__ ("umlsl %0.8h, %2.8b, %3.8b" : "=w"(result) : "0"(a), "w"(b), "w"(c) : /* No clobbers */); @@ -8248,10 +8692,10 @@ vmlal_u8 (uint16x8_t a, uint8x8_t b, uint8x8_t c) } __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -vmlal_u16 (uint32x4_t a, uint16x4_t b, uint16x4_t c) +vmlsl_u16 (uint32x4_t a, uint16x4_t b, uint16x4_t c) { uint32x4_t result; - __asm__ ("umlal %0.4s,%2.4h,%3.4h" + __asm__ ("umlsl %0.4s, %2.4h, %3.4h" : "=w"(result) : "0"(a), "w"(b), "w"(c) : /* No clobbers */); @@ -8259,17 +8703,17 @@ vmlal_u16 (uint32x4_t a, uint16x4_t b, uint16x4_t c) } __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) -vmlal_u32 (uint64x2_t a, uint32x2_t b, uint32x2_t c) +vmlsl_u32 (uint64x2_t a, uint32x2_t b, uint32x2_t c) { uint64x2_t result; - __asm__ ("umlal %0.2d,%2.2s,%3.2s" + __asm__ ("umlsl %0.2d, %2.2s, %3.2s" : "=w"(result) : "0"(a), "w"(b), "w"(c) : /* No clobbers */); return result; } -#define vmlaq_lane_f32(a, b, c, d) \ +#define vmlsq_lane_f32(a, b, c, d) \ __extension__ \ ({ \ float32x4_t c_ = (c); \ @@ -8277,131 +8721,146 @@ vmlal_u32 (uint64x2_t a, uint32x2_t b, uint32x2_t c) float32x4_t a_ = (a); \ float32x4_t result; \ float32x4_t t1; \ - __asm__ ("fmul %1.4s, %3.4s, %4.s[%5]; fadd %0.4s, %0.4s, %1.4s" \ + __asm__ ("fmul %1.4s, %3.4s, %4.s[%5]; fsub %0.4s, %0.4s, %1.4s" \ : "=w"(result), "=w"(t1) \ : "0"(a_), "w"(b_), "w"(c_), "i"(d) \ : /* No clobbers */); \ result; \ }) -#define vmlaq_lane_s16(a, b, c, d) \ +#define vmlsq_lane_s16(a, b, c, d) \ __extension__ \ ({ \ int16x8_t c_ = (c); \ int16x8_t b_ = (b); \ int16x8_t a_ = (a); \ int16x8_t result; \ - __asm__ ("mla %0.8h, %2.8h, %3.h[%4]" \ + __asm__ ("mls %0.8h,%2.8h,%3.h[%4]" \ : "=w"(result) \ - : "0"(a_), "w"(b_), "w"(c_), "i"(d) \ + : "0"(a_), "w"(b_), "x"(c_), "i"(d) \ : /* No clobbers */); \ result; \ }) -#define vmlaq_lane_s32(a, b, c, d) \ +#define vmlsq_lane_s32(a, b, c, d) \ __extension__ \ ({ \ int32x4_t c_ = (c); \ int32x4_t b_ = (b); \ int32x4_t a_ = (a); \ int32x4_t result; \ - __asm__ ("mla %0.4s, %2.4s, %3.s[%4]" \ + __asm__ ("mls %0.4s,%2.4s,%3.s[%4]" \ : "=w"(result) \ : "0"(a_), "w"(b_), "w"(c_), "i"(d) \ : /* No clobbers */); \ result; \ }) -#define vmlaq_lane_u16(a, b, c, d) \ +#define vmlsq_lane_u16(a, b, c, d) \ __extension__ \ ({ \ uint16x8_t c_ = (c); \ uint16x8_t b_ = (b); \ uint16x8_t a_ = (a); \ uint16x8_t result; \ - __asm__ ("mla %0.8h, %2.8h, %3.h[%4]" \ + __asm__ ("mls %0.8h,%2.8h,%3.h[%4]" \ : "=w"(result) \ - : "0"(a_), "w"(b_), "w"(c_), "i"(d) \ + : "0"(a_), "w"(b_), "x"(c_), "i"(d) \ : /* No clobbers */); \ result; \ }) -#define vmlaq_lane_u32(a, b, c, d) \ +#define vmlsq_lane_u32(a, b, c, d) \ __extension__ \ ({ \ uint32x4_t c_ = (c); \ uint32x4_t b_ = (b); \ uint32x4_t a_ = (a); \ uint32x4_t result; \ - __asm__ ("mla %0.4s, %2.4s, %3.s[%4]" \ + __asm__ ("mls %0.4s,%2.4s,%3.s[%4]" \ : "=w"(result) \ : "0"(a_), "w"(b_), "w"(c_), "i"(d) \ : /* No clobbers */); \ result; \ }) -#define vmlaq_laneq_s16(a, b, c, d) \ - __extension__ \ - ({ \ - int16x8_t c_ = (c); \ - int16x8_t b_ = (b); \ - int16x8_t a_ = (a); \ - int16x8_t result; \ - __asm__ ("mla %0.8h, %2.8h, %3.h[%4]" \ - : "=w"(result) \ - : "0"(a_), "w"(b_), "w"(c_), "i"(d) \ - : /* No clobbers */); \ - result; \ +#define vmlsq_laneq_f32(__a, __b, __c, __d) \ + __extension__ \ + ({ \ + float32x4_t __c_ = (__c); \ + float32x4_t __b_ = (__b); \ + float32x4_t __a_ = (__a); \ + float32x4_t __result; \ + float32x4_t __t1; \ + __asm__ ("fmul %1.4s, %3.4s, %4.s[%5]; fsub %0.4s, %0.4s, %1.4s" \ + : "=w"(__result), "=w"(__t1) \ + : "0"(__a_), "w"(__b_), "w"(__c_), "i"(__d) \ + : /* No clobbers */); \ + __result; \ }) -#define vmlaq_laneq_s32(a, b, c, d) \ - __extension__ \ - ({ \ - int32x4_t c_ = (c); \ - int32x4_t b_ = (b); \ - int32x4_t a_ = (a); \ - int32x4_t result; \ - __asm__ ("mla %0.4s, %2.4s, %3.s[%4]" \ - : "=w"(result) \ - : "0"(a_), "w"(b_), "w"(c_), "i"(d) \ - : /* No clobbers */); \ - result; \ +#define vmlsq_laneq_s16(__a, __b, __c, __d) \ + __extension__ \ + ({ \ + int16x8_t __c_ = (__c); \ + int16x8_t __b_ = (__b); \ + int16x8_t __a_ = (__a); \ + int16x8_t __result; \ + __asm__ ("mls %0.8h, %2.8h, %3.h[%4]" \ + : "=w"(__result) \ + : "0"(__a_), "w"(__b_), "x"(__c_), "i"(__d) \ + : /* No clobbers */); \ + __result; \ }) -#define vmlaq_laneq_u16(a, b, c, d) \ - __extension__ \ - ({ \ - uint16x8_t c_ = (c); \ - uint16x8_t b_ = (b); \ - uint16x8_t a_ = (a); \ - uint16x8_t result; \ - __asm__ ("mla %0.8h, %2.8h, %3.h[%4]" \ - : "=w"(result) \ - : "0"(a_), "w"(b_), "w"(c_), "i"(d) \ - : /* No clobbers */); \ - result; \ +#define vmlsq_laneq_s32(__a, __b, __c, __d) \ + __extension__ \ + ({ \ + int32x4_t __c_ = (__c); \ + int32x4_t __b_ = (__b); \ + int32x4_t __a_ = (__a); \ + int32x4_t __result; \ + __asm__ ("mls %0.4s, %2.4s, %3.s[%4]" \ + : "=w"(__result) \ + : "0"(__a_), "w"(__b_), "w"(__c_), "i"(__d) \ + : /* No clobbers */); \ + __result; \ }) -#define vmlaq_laneq_u32(a, b, c, d) \ - __extension__ \ - ({ \ - uint32x4_t c_ = (c); \ - uint32x4_t b_ = (b); \ - uint32x4_t a_ = (a); \ - uint32x4_t result; \ - __asm__ ("mla %0.4s, %2.4s, %3.s[%4]" \ - : "=w"(result) \ - : "0"(a_), "w"(b_), "w"(c_), "i"(d) \ - : /* No clobbers */); \ - result; \ +#define vmlsq_laneq_u16(__a, __b, __c, __d) \ + __extension__ \ + ({ \ + uint16x8_t __c_ = (__c); \ + uint16x8_t __b_ = (__b); \ + uint16x8_t __a_ = (__a); \ + uint16x8_t __result; \ + __asm__ ("mls %0.8h, %2.8h, %3.h[%4]" \ + : "=w"(__result) \ + : "0"(__a_), "w"(__b_), "x"(__c_), "i"(__d) \ + : /* No clobbers */); \ + __result; \ + }) + +#define vmlsq_laneq_u32(__a, __b, __c, __d) \ + __extension__ \ + ({ \ + uint32x4_t __c_ = (__c); \ + uint32x4_t __b_ = (__b); \ + uint32x4_t __a_ = (__a); \ + uint32x4_t __result; \ + __asm__ ("mls %0.4s, %2.4s, %3.s[%4]" \ + : "=w"(__result) \ + : "0"(__a_), "w"(__b_), "w"(__c_), "i"(__d) \ + : /* No clobbers */); \ + __result; \ }) __extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) -vmlaq_n_f32 (float32x4_t a, float32x4_t b, float32_t c) +vmlsq_n_f32 (float32x4_t a, float32x4_t b, float32_t c) { float32x4_t result; float32x4_t t1; - __asm__ ("fmul %1.4s, %3.4s, %4.s[0]; fadd %0.4s, %0.4s, %1.4s" + __asm__ ("fmul %1.4s, %3.4s, %4.s[0]; fsub %0.4s, %0.4s, %1.4s" : "=w"(result), "=w"(t1) : "0"(a), "w"(b), "w"(c) : /* No clobbers */); @@ -8409,33 +8868,33 @@ vmlaq_n_f32 (float32x4_t a, float32x4_t b, float32_t c) } __extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) -vmlaq_n_f64 (float64x2_t a, float64x2_t b, float64_t c) +vmlsq_n_f64 (float64x2_t a, float64x2_t b, float64_t c) { float64x2_t result; float64x2_t t1; - __asm__ ("fmul %1.2d, %3.2d, %4.d[0]; fadd %0.2d, %0.2d, %1.2d" + __asm__ ("fmul %1.2d, %3.2d, %4.d[0]; fsub %0.2d, %0.2d, %1.2d" : "=w"(result), "=w"(t1) - : "0"(a), "w"(b), "w"(c) + : "0"(a), "w"(b), "x"(c) : /* No clobbers */); return result; } __extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) -vmlaq_n_s16 (int16x8_t a, int16x8_t b, int16_t c) +vmlsq_n_s16 (int16x8_t a, int16x8_t b, int16_t c) { int16x8_t result; - __asm__ ("mla %0.8h,%2.8h,%3.h[0]" + __asm__ ("mls %0.8h, %2.8h, %3.h[0]" : "=w"(result) - : "0"(a), "w"(b), "w"(c) + : "0"(a), "w"(b), "x"(c) : /* No clobbers */); return result; } __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) -vmlaq_n_s32 (int32x4_t a, int32x4_t b, int32_t c) +vmlsq_n_s32 (int32x4_t a, int32x4_t b, int32_t c) { int32x4_t result; - __asm__ ("mla %0.4s,%2.4s,%3.s[0]" + __asm__ ("mls %0.4s, %2.4s, %3.s[0]" : "=w"(result) : "0"(a), "w"(b), "w"(c) : /* No clobbers */); @@ -8443,21 +8902,21 @@ vmlaq_n_s32 (int32x4_t a, int32x4_t b, int32_t c) } __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) -vmlaq_n_u16 (uint16x8_t a, uint16x8_t b, uint16_t c) +vmlsq_n_u16 (uint16x8_t a, uint16x8_t b, uint16_t c) { uint16x8_t result; - __asm__ ("mla %0.8h,%2.8h,%3.h[0]" + __asm__ ("mls %0.8h, %2.8h, %3.h[0]" : "=w"(result) - : "0"(a), "w"(b), "w"(c) + : "0"(a), "w"(b), "x"(c) : /* No clobbers */); return result; } __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -vmlaq_n_u32 (uint32x4_t a, uint32x4_t b, uint32_t c) +vmlsq_n_u32 (uint32x4_t a, uint32x4_t b, uint32_t c) { uint32x4_t result; - __asm__ ("mla %0.4s,%2.4s,%3.s[0]" + __asm__ ("mls %0.4s, %2.4s, %3.s[0]" : "=w"(result) : "0"(a), "w"(b), "w"(c) : /* No clobbers */); @@ -8465,10 +8924,10 @@ vmlaq_n_u32 (uint32x4_t a, uint32x4_t b, uint32_t c) } __extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) -vmlaq_s8 (int8x16_t a, int8x16_t b, int8x16_t c) +vmlsq_s8 (int8x16_t a, int8x16_t b, int8x16_t c) { int8x16_t result; - __asm__ ("mla %0.16b, %2.16b, %3.16b" + __asm__ ("mls %0.16b,%2.16b,%3.16b" : "=w"(result) : "0"(a), "w"(b), "w"(c) : /* No clobbers */); @@ -8476,10 +8935,10 @@ vmlaq_s8 (int8x16_t a, int8x16_t b, int8x16_t c) } __extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) -vmlaq_s16 (int16x8_t a, int16x8_t b, int16x8_t c) +vmlsq_s16 (int16x8_t a, int16x8_t b, int16x8_t c) { int16x8_t result; - __asm__ ("mla %0.8h, %2.8h, %3.8h" + __asm__ ("mls %0.8h,%2.8h,%3.8h" : "=w"(result) : "0"(a), "w"(b), "w"(c) : /* No clobbers */); @@ -8487,10 +8946,10 @@ vmlaq_s16 (int16x8_t a, int16x8_t b, int16x8_t c) } __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) -vmlaq_s32 (int32x4_t a, int32x4_t b, int32x4_t c) +vmlsq_s32 (int32x4_t a, int32x4_t b, int32x4_t c) { int32x4_t result; - __asm__ ("mla %0.4s, %2.4s, %3.4s" + __asm__ ("mls %0.4s,%2.4s,%3.4s" : "=w"(result) : "0"(a), "w"(b), "w"(c) : /* No clobbers */); @@ -8498,10 +8957,10 @@ vmlaq_s32 (int32x4_t a, int32x4_t b, int32x4_t c) } __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) -vmlaq_u8 (uint8x16_t a, uint8x16_t b, uint8x16_t c) +vmlsq_u8 (uint8x16_t a, uint8x16_t b, uint8x16_t c) { uint8x16_t result; - __asm__ ("mla %0.16b, %2.16b, %3.16b" + __asm__ ("mls %0.16b,%2.16b,%3.16b" : "=w"(result) : "0"(a), "w"(b), "w"(c) : /* No clobbers */); @@ -8509,10 +8968,10 @@ vmlaq_u8 (uint8x16_t a, uint8x16_t b, uint8x16_t c) } __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) -vmlaq_u16 (uint16x8_t a, uint16x8_t b, uint16x8_t c) +vmlsq_u16 (uint16x8_t a, uint16x8_t b, uint16x8_t c) { uint16x8_t result; - __asm__ ("mla %0.8h, %2.8h, %3.8h" + __asm__ ("mls %0.8h,%2.8h,%3.8h" : "=w"(result) : "0"(a), "w"(b), "w"(c) : /* No clobbers */); @@ -8520,2566 +8979,2209 @@ vmlaq_u16 (uint16x8_t a, uint16x8_t b, uint16x8_t c) } __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -vmlaq_u32 (uint32x4_t a, uint32x4_t b, uint32x4_t c) +vmlsq_u32 (uint32x4_t a, uint32x4_t b, uint32x4_t c) { uint32x4_t result; - __asm__ ("mla %0.4s, %2.4s, %3.4s" + __asm__ ("mls %0.4s,%2.4s,%3.4s" : "=w"(result) : "0"(a), "w"(b), "w"(c) : /* No clobbers */); return result; } -#define vmls_lane_f32(a, b, c, d) \ - __extension__ \ - ({ \ - float32x2_t c_ = (c); \ - float32x2_t b_ = (b); \ - float32x2_t a_ = (a); \ - float32x2_t result; \ - float32x2_t t1; \ - __asm__ ("fmul %1.2s, %3.2s, %4.s[%5]; fsub %0.2s, %0.2s, %1.2s" \ - : "=w"(result), "=w"(t1) \ - : "0"(a_), "w"(b_), "w"(c_), "i"(d) \ - : /* No clobbers */); \ - result; \ - }) - -#define vmls_lane_s16(a, b, c, d) \ - __extension__ \ - ({ \ - int16x4_t c_ = (c); \ - int16x4_t b_ = (b); \ - int16x4_t a_ = (a); \ - int16x4_t result; \ - __asm__ ("mls %0.4h,%2.4h,%3.h[%4]" \ - : "=w"(result) \ - : "0"(a_), "w"(b_), "w"(c_), "i"(d) \ - : /* No clobbers */); \ - result; \ - }) - -#define vmls_lane_s32(a, b, c, d) \ - __extension__ \ - ({ \ - int32x2_t c_ = (c); \ - int32x2_t b_ = (b); \ - int32x2_t a_ = (a); \ - int32x2_t result; \ - __asm__ ("mls %0.2s,%2.2s,%3.s[%4]" \ - : "=w"(result) \ - : "0"(a_), "w"(b_), "w"(c_), "i"(d) \ - : /* No clobbers */); \ - result; \ - }) - -#define vmls_lane_u16(a, b, c, d) \ - __extension__ \ - ({ \ - uint16x4_t c_ = (c); \ - uint16x4_t b_ = (b); \ - uint16x4_t a_ = (a); \ - uint16x4_t result; \ - __asm__ ("mls %0.4h,%2.4h,%3.h[%4]" \ - : "=w"(result) \ - : "0"(a_), "w"(b_), "w"(c_), "i"(d) \ - : /* No clobbers */); \ - result; \ - }) - -#define vmls_lane_u32(a, b, c, d) \ - __extension__ \ - ({ \ - uint32x2_t c_ = (c); \ - uint32x2_t b_ = (b); \ - uint32x2_t a_ = (a); \ - uint32x2_t result; \ - __asm__ ("mls %0.2s,%2.2s,%3.s[%4]" \ - : "=w"(result) \ - : "0"(a_), "w"(b_), "w"(c_), "i"(d) \ - : /* No clobbers */); \ - result; \ - }) - __extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) -vmls_n_f32 (float32x2_t a, float32x2_t b, float32_t c) +vmov_n_f32 (float32_t a) { float32x2_t result; - float32x2_t t1; - __asm__ ("fmul %1.2s, %3.2s, %4.s[0]; fsub %0.2s, %0.2s, %1.2s" - : "=w"(result), "=w"(t1) - : "0"(a), "w"(b), "w"(c) - : /* No clobbers */); - return result; -} - -__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) -vmls_n_s16 (int16x4_t a, int16x4_t b, int16_t c) -{ - int16x4_t result; - __asm__ ("mls %0.4h, %2.4h, %3.h[0]" - : "=w"(result) - : "0"(a), "w"(b), "w"(c) - : /* No clobbers */); - return result; -} - -__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) -vmls_n_s32 (int32x2_t a, int32x2_t b, int32_t c) -{ - int32x2_t result; - __asm__ ("mls %0.2s, %2.2s, %3.s[0]" + __asm__ ("dup %0.2s, %w1" : "=w"(result) - : "0"(a), "w"(b), "w"(c) + : "r"(a) : /* No clobbers */); return result; } -__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) -vmls_n_u16 (uint16x4_t a, uint16x4_t b, uint16_t c) +__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) +vmov_n_p8 (uint32_t a) { - uint16x4_t result; - __asm__ ("mls %0.4h, %2.4h, %3.h[0]" + poly8x8_t result; + __asm__ ("dup %0.8b,%w1" : "=w"(result) - : "0"(a), "w"(b), "w"(c) + : "r"(a) : /* No clobbers */); return result; } -__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) -vmls_n_u32 (uint32x2_t a, uint32x2_t b, uint32_t c) +__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) +vmov_n_p16 (uint32_t a) { - uint32x2_t result; - __asm__ ("mls %0.2s, %2.2s, %3.s[0]" + poly16x4_t result; + __asm__ ("dup %0.4h,%w1" : "=w"(result) - : "0"(a), "w"(b), "w"(c) + : "r"(a) : /* No clobbers */); return result; } __extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) -vmls_s8 (int8x8_t a, int8x8_t b, int8x8_t c) +vmov_n_s8 (int32_t a) { int8x8_t result; - __asm__ ("mls %0.8b,%2.8b,%3.8b" + __asm__ ("dup %0.8b,%w1" : "=w"(result) - : "0"(a), "w"(b), "w"(c) + : "r"(a) : /* No clobbers */); return result; } __extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) -vmls_s16 (int16x4_t a, int16x4_t b, int16x4_t c) +vmov_n_s16 (int32_t a) { int16x4_t result; - __asm__ ("mls %0.4h,%2.4h,%3.4h" + __asm__ ("dup %0.4h,%w1" : "=w"(result) - : "0"(a), "w"(b), "w"(c) + : "r"(a) : /* No clobbers */); return result; } __extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) -vmls_s32 (int32x2_t a, int32x2_t b, int32x2_t c) +vmov_n_s32 (int32_t a) { int32x2_t result; - __asm__ ("mls %0.2s,%2.2s,%3.2s" + __asm__ ("dup %0.2s,%w1" : "=w"(result) - : "0"(a), "w"(b), "w"(c) + : "r"(a) + : /* No clobbers */); + return result; +} + +__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) +vmov_n_s64 (int64_t a) +{ + int64x1_t result; + __asm__ ("ins %0.d[0],%x1" + : "=w"(result) + : "r"(a) : /* No clobbers */); return result; } __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) -vmls_u8 (uint8x8_t a, uint8x8_t b, uint8x8_t c) +vmov_n_u8 (uint32_t a) { uint8x8_t result; - __asm__ ("mls %0.8b,%2.8b,%3.8b" + __asm__ ("dup %0.8b,%w1" : "=w"(result) - : "0"(a), "w"(b), "w"(c) + : "r"(a) : /* No clobbers */); return result; } __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) -vmls_u16 (uint16x4_t a, uint16x4_t b, uint16x4_t c) +vmov_n_u16 (uint32_t a) { uint16x4_t result; - __asm__ ("mls %0.4h,%2.4h,%3.4h" + __asm__ ("dup %0.4h,%w1" : "=w"(result) - : "0"(a), "w"(b), "w"(c) + : "r"(a) : /* No clobbers */); return result; } __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) -vmls_u32 (uint32x2_t a, uint32x2_t b, uint32x2_t c) +vmov_n_u32 (uint32_t a) { uint32x2_t result; - __asm__ ("mls %0.2s,%2.2s,%3.2s" + __asm__ ("dup %0.2s,%w1" : "=w"(result) - : "0"(a), "w"(b), "w"(c) + : "r"(a) : /* No clobbers */); return result; } -#define vmlsl_high_lane_s16(a, b, c, d) \ - __extension__ \ - ({ \ - int16x8_t c_ = (c); \ - int16x8_t b_ = (b); \ - int32x4_t a_ = (a); \ - int32x4_t result; \ - __asm__ ("smlsl2 %0.4s, %2.8h, %3.h[%4]" \ - : "=w"(result) \ - : "0"(a_), "w"(b_), "w"(c_), "i"(d) \ - : /* No clobbers */); \ - result; \ - }) +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vmov_n_u64 (uint64_t a) +{ + uint64x1_t result; + __asm__ ("ins %0.d[0],%x1" + : "=w"(result) + : "r"(a) + : /* No clobbers */); + return result; +} -#define vmlsl_high_lane_s32(a, b, c, d) \ - __extension__ \ - ({ \ - int32x4_t c_ = (c); \ - int32x4_t b_ = (b); \ - int64x2_t a_ = (a); \ - int64x2_t result; \ - __asm__ ("smlsl2 %0.2d, %2.4s, %3.s[%4]" \ - : "=w"(result) \ - : "0"(a_), "w"(b_), "w"(c_), "i"(d) \ - : /* No clobbers */); \ - result; \ - }) - -#define vmlsl_high_lane_u16(a, b, c, d) \ - __extension__ \ - ({ \ - uint16x8_t c_ = (c); \ - uint16x8_t b_ = (b); \ - uint32x4_t a_ = (a); \ - uint32x4_t result; \ - __asm__ ("umlsl2 %0.4s, %2.8h, %3.h[%4]" \ - : "=w"(result) \ - : "0"(a_), "w"(b_), "w"(c_), "i"(d) \ - : /* No clobbers */); \ - result; \ - }) - -#define vmlsl_high_lane_u32(a, b, c, d) \ - __extension__ \ - ({ \ - uint32x4_t c_ = (c); \ - uint32x4_t b_ = (b); \ - uint64x2_t a_ = (a); \ - uint64x2_t result; \ - __asm__ ("umlsl2 %0.2d, %2.4s, %3.s[%4]" \ - : "=w"(result) \ - : "0"(a_), "w"(b_), "w"(c_), "i"(d) \ - : /* No clobbers */); \ - result; \ - }) - -#define vmlsl_high_laneq_s16(a, b, c, d) \ - __extension__ \ - ({ \ - int16x8_t c_ = (c); \ - int16x8_t b_ = (b); \ - int32x4_t a_ = (a); \ - int32x4_t result; \ - __asm__ ("smlsl2 %0.4s, %2.8h, %3.h[%4]" \ - : "=w"(result) \ - : "0"(a_), "w"(b_), "w"(c_), "i"(d) \ - : /* No clobbers */); \ - result; \ - }) - -#define vmlsl_high_laneq_s32(a, b, c, d) \ - __extension__ \ - ({ \ - int32x4_t c_ = (c); \ - int32x4_t b_ = (b); \ - int64x2_t a_ = (a); \ - int64x2_t result; \ - __asm__ ("smlsl2 %0.2d, %2.4s, %3.s[%4]" \ - : "=w"(result) \ - : "0"(a_), "w"(b_), "w"(c_), "i"(d) \ - : /* No clobbers */); \ - result; \ - }) - -#define vmlsl_high_laneq_u16(a, b, c, d) \ - __extension__ \ - ({ \ - uint16x8_t c_ = (c); \ - uint16x8_t b_ = (b); \ - uint32x4_t a_ = (a); \ - uint32x4_t result; \ - __asm__ ("umlsl2 %0.4s, %2.8h, %3.h[%4]" \ - : "=w"(result) \ - : "0"(a_), "w"(b_), "w"(c_), "i"(d) \ - : /* No clobbers */); \ - result; \ - }) - -#define vmlsl_high_laneq_u32(a, b, c, d) \ - __extension__ \ - ({ \ - uint32x4_t c_ = (c); \ - uint32x4_t b_ = (b); \ - uint64x2_t a_ = (a); \ - uint64x2_t result; \ - __asm__ ("umlsl2 %0.2d, %2.4s, %3.s[%4]" \ - : "=w"(result) \ - : "0"(a_), "w"(b_), "w"(c_), "i"(d) \ - : /* No clobbers */); \ - result; \ - }) +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vmovl_high_s8 (int8x16_t a) +{ + int16x8_t result; + __asm__ ("sshll2 %0.8h,%1.16b,#0" + : "=w"(result) + : "w"(a) + : /* No clobbers */); + return result; +} __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) -vmlsl_high_n_s16 (int32x4_t a, int16x8_t b, int16_t c) +vmovl_high_s16 (int16x8_t a) { int32x4_t result; - __asm__ ("smlsl2 %0.4s, %2.8h, %3.h[0]" + __asm__ ("sshll2 %0.4s,%1.8h,#0" : "=w"(result) - : "0"(a), "w"(b), "w"(c) + : "w"(a) : /* No clobbers */); return result; } __extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) -vmlsl_high_n_s32 (int64x2_t a, int32x4_t b, int32_t c) +vmovl_high_s32 (int32x4_t a) { int64x2_t result; - __asm__ ("smlsl2 %0.2d, %2.4s, %3.s[0]" + __asm__ ("sshll2 %0.2d,%1.4s,#0" : "=w"(result) - : "0"(a), "w"(b), "w"(c) + : "w"(a) + : /* No clobbers */); + return result; +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vmovl_high_u8 (uint8x16_t a) +{ + uint16x8_t result; + __asm__ ("ushll2 %0.8h,%1.16b,#0" + : "=w"(result) + : "w"(a) : /* No clobbers */); return result; } __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -vmlsl_high_n_u16 (uint32x4_t a, uint16x8_t b, uint16_t c) +vmovl_high_u16 (uint16x8_t a) { uint32x4_t result; - __asm__ ("umlsl2 %0.4s, %2.8h, %3.h[0]" + __asm__ ("ushll2 %0.4s,%1.8h,#0" : "=w"(result) - : "0"(a), "w"(b), "w"(c) + : "w"(a) : /* No clobbers */); return result; } __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) -vmlsl_high_n_u32 (uint64x2_t a, uint32x4_t b, uint32_t c) +vmovl_high_u32 (uint32x4_t a) { uint64x2_t result; - __asm__ ("umlsl2 %0.2d, %2.4s, %3.s[0]" + __asm__ ("ushll2 %0.2d,%1.4s,#0" : "=w"(result) - : "0"(a), "w"(b), "w"(c) + : "w"(a) : /* No clobbers */); return result; } __extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) -vmlsl_high_s8 (int16x8_t a, int8x16_t b, int8x16_t c) +vmovl_s8 (int8x8_t a) { int16x8_t result; - __asm__ ("smlsl2 %0.8h,%2.16b,%3.16b" + __asm__ ("sshll %0.8h,%1.8b,#0" : "=w"(result) - : "0"(a), "w"(b), "w"(c) + : "w"(a) : /* No clobbers */); return result; } __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) -vmlsl_high_s16 (int32x4_t a, int16x8_t b, int16x8_t c) +vmovl_s16 (int16x4_t a) { int32x4_t result; - __asm__ ("smlsl2 %0.4s,%2.8h,%3.8h" + __asm__ ("sshll %0.4s,%1.4h,#0" : "=w"(result) - : "0"(a), "w"(b), "w"(c) + : "w"(a) : /* No clobbers */); return result; } __extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) -vmlsl_high_s32 (int64x2_t a, int32x4_t b, int32x4_t c) +vmovl_s32 (int32x2_t a) { int64x2_t result; - __asm__ ("smlsl2 %0.2d,%2.4s,%3.4s" + __asm__ ("sshll %0.2d,%1.2s,#0" : "=w"(result) - : "0"(a), "w"(b), "w"(c) + : "w"(a) : /* No clobbers */); return result; } __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) -vmlsl_high_u8 (uint16x8_t a, uint8x16_t b, uint8x16_t c) +vmovl_u8 (uint8x8_t a) { uint16x8_t result; - __asm__ ("umlsl2 %0.8h,%2.16b,%3.16b" + __asm__ ("ushll %0.8h,%1.8b,#0" : "=w"(result) - : "0"(a), "w"(b), "w"(c) + : "w"(a) : /* No clobbers */); return result; } __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -vmlsl_high_u16 (uint32x4_t a, uint16x8_t b, uint16x8_t c) +vmovl_u16 (uint16x4_t a) { uint32x4_t result; - __asm__ ("umlsl2 %0.4s,%2.8h,%3.8h" + __asm__ ("ushll %0.4s,%1.4h,#0" : "=w"(result) - : "0"(a), "w"(b), "w"(c) + : "w"(a) : /* No clobbers */); return result; } __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) -vmlsl_high_u32 (uint64x2_t a, uint32x4_t b, uint32x4_t c) +vmovl_u32 (uint32x2_t a) { uint64x2_t result; - __asm__ ("umlsl2 %0.2d,%2.4s,%3.4s" + __asm__ ("ushll %0.2d,%1.2s,#0" : "=w"(result) - : "0"(a), "w"(b), "w"(c) + : "w"(a) : /* No clobbers */); return result; } -#define vmlsl_lane_s16(a, b, c, d) \ - __extension__ \ - ({ \ - int16x4_t c_ = (c); \ - int16x4_t b_ = (b); \ - int32x4_t a_ = (a); \ - int32x4_t result; \ - __asm__ ("smlsl %0.4s, %2.4h, %3.h[%4]" \ - : "=w"(result) \ - : "0"(a_), "w"(b_), "w"(c_), "i"(d) \ - : /* No clobbers */); \ - result; \ - }) +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vmovn_high_s16 (int8x8_t a, int16x8_t b) +{ + int8x16_t result = vcombine_s8 (a, vcreate_s8 (__AARCH64_UINT64_C (0x0))); + __asm__ ("xtn2 %0.16b,%1.8h" + : "+w"(result) + : "w"(b) + : /* No clobbers */); + return result; +} -#define vmlsl_lane_s32(a, b, c, d) \ - __extension__ \ - ({ \ - int32x2_t c_ = (c); \ - int32x2_t b_ = (b); \ - int64x2_t a_ = (a); \ - int64x2_t result; \ - __asm__ ("smlsl %0.2d, %2.2s, %3.s[%4]" \ - : "=w"(result) \ - : "0"(a_), "w"(b_), "w"(c_), "i"(d) \ - : /* No clobbers */); \ - result; \ - }) +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vmovn_high_s32 (int16x4_t a, int32x4_t b) +{ + int16x8_t result = vcombine_s16 (a, vcreate_s16 (__AARCH64_UINT64_C (0x0))); + __asm__ ("xtn2 %0.8h,%1.4s" + : "+w"(result) + : "w"(b) + : /* No clobbers */); + return result; +} -#define vmlsl_lane_u16(a, b, c, d) \ - __extension__ \ - ({ \ - uint16x4_t c_ = (c); \ - uint16x4_t b_ = (b); \ - uint32x4_t a_ = (a); \ - uint32x4_t result; \ - __asm__ ("umlsl %0.4s, %2.4h, %3.h[%4]" \ - : "=w"(result) \ - : "0"(a_), "w"(b_), "w"(c_), "i"(d) \ - : /* No clobbers */); \ - result; \ - }) +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vmovn_high_s64 (int32x2_t a, int64x2_t b) +{ + int32x4_t result = vcombine_s32 (a, vcreate_s32 (__AARCH64_UINT64_C (0x0))); + __asm__ ("xtn2 %0.4s,%1.2d" + : "+w"(result) + : "w"(b) + : /* No clobbers */); + return result; +} -#define vmlsl_lane_u32(a, b, c, d) \ - __extension__ \ - ({ \ - uint32x2_t c_ = (c); \ - uint32x2_t b_ = (b); \ - uint64x2_t a_ = (a); \ - uint64x2_t result; \ - __asm__ ("umlsl %0.2d, %2.2s, %3.s[%4]" \ - : "=w"(result) \ - : "0"(a_), "w"(b_), "w"(c_), "i"(d) \ - : /* No clobbers */); \ - result; \ - }) +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vmovn_high_u16 (uint8x8_t a, uint16x8_t b) +{ + uint8x16_t result = vcombine_u8 (a, vcreate_u8 (__AARCH64_UINT64_C (0x0))); + __asm__ ("xtn2 %0.16b,%1.8h" + : "+w"(result) + : "w"(b) + : /* No clobbers */); + return result; +} -#define vmlsl_laneq_s16(a, b, c, d) \ - __extension__ \ - ({ \ - int16x8_t c_ = (c); \ - int16x4_t b_ = (b); \ - int32x4_t a_ = (a); \ - int32x4_t result; \ - __asm__ ("smlsl %0.4s, %2.4h, %3.h[%4]" \ - : "=w"(result) \ - : "0"(a_), "w"(b_), "w"(c_), "i"(d) \ - : /* No clobbers */); \ - result; \ - }) +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vmovn_high_u32 (uint16x4_t a, uint32x4_t b) +{ + uint16x8_t result = vcombine_u16 (a, vcreate_u16 (__AARCH64_UINT64_C (0x0))); + __asm__ ("xtn2 %0.8h,%1.4s" + : "+w"(result) + : "w"(b) + : /* No clobbers */); + return result; +} -#define vmlsl_laneq_s32(a, b, c, d) \ - __extension__ \ - ({ \ - int32x4_t c_ = (c); \ - int32x2_t b_ = (b); \ - int64x2_t a_ = (a); \ - int64x2_t result; \ - __asm__ ("smlsl %0.2d, %2.2s, %3.s[%4]" \ - : "=w"(result) \ - : "0"(a_), "w"(b_), "w"(c_), "i"(d) \ - : /* No clobbers */); \ - result; \ - }) +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vmovn_high_u64 (uint32x2_t a, uint64x2_t b) +{ + uint32x4_t result = vcombine_u32 (a, vcreate_u32 (__AARCH64_UINT64_C (0x0))); + __asm__ ("xtn2 %0.4s,%1.2d" + : "+w"(result) + : "w"(b) + : /* No clobbers */); + return result; +} -#define vmlsl_laneq_u16(a, b, c, d) \ - __extension__ \ - ({ \ - uint16x8_t c_ = (c); \ - uint16x4_t b_ = (b); \ - uint32x4_t a_ = (a); \ - uint32x4_t result; \ - __asm__ ("umlsl %0.4s, %2.4h, %3.h[%4]" \ - : "=w"(result) \ - : "0"(a_), "w"(b_), "w"(c_), "i"(d) \ - : /* No clobbers */); \ - result; \ - }) +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vmovn_s16 (int16x8_t a) +{ + int8x8_t result; + __asm__ ("xtn %0.8b,%1.8h" + : "=w"(result) + : "w"(a) + : /* No clobbers */); + return result; +} -#define vmlsl_laneq_u32(a, b, c, d) \ - __extension__ \ - ({ \ - uint32x4_t c_ = (c); \ - uint32x2_t b_ = (b); \ - uint64x2_t a_ = (a); \ - uint64x2_t result; \ - __asm__ ("umlsl %0.2d, %2.2s, %3.s[%4]" \ - : "=w"(result) \ - : "0"(a_), "w"(b_), "w"(c_), "i"(d) \ - : /* No clobbers */); \ - result; \ - }) +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vmovn_s32 (int32x4_t a) +{ + int16x4_t result; + __asm__ ("xtn %0.4h,%1.4s" + : "=w"(result) + : "w"(a) + : /* No clobbers */); + return result; +} -__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) -vmlsl_n_s16 (int32x4_t a, int16x4_t b, int16_t c) +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vmovn_s64 (int64x2_t a) { - int32x4_t result; - __asm__ ("smlsl %0.4s, %2.4h, %3.h[0]" + int32x2_t result; + __asm__ ("xtn %0.2s,%1.2d" : "=w"(result) - : "0"(a), "w"(b), "w"(c) + : "w"(a) : /* No clobbers */); return result; } -__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) -vmlsl_n_s32 (int64x2_t a, int32x2_t b, int32_t c) +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vmovn_u16 (uint16x8_t a) { - int64x2_t result; - __asm__ ("smlsl %0.2d, %2.2s, %3.s[0]" + uint8x8_t result; + __asm__ ("xtn %0.8b,%1.8h" : "=w"(result) - : "0"(a), "w"(b), "w"(c) + : "w"(a) : /* No clobbers */); return result; } -__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -vmlsl_n_u16 (uint32x4_t a, uint16x4_t b, uint16_t c) +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vmovn_u32 (uint32x4_t a) { - uint32x4_t result; - __asm__ ("umlsl %0.4s, %2.4h, %3.h[0]" + uint16x4_t result; + __asm__ ("xtn %0.4h,%1.4s" : "=w"(result) - : "0"(a), "w"(b), "w"(c) + : "w"(a) : /* No clobbers */); return result; } -__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) -vmlsl_n_u32 (uint64x2_t a, uint32x2_t b, uint32_t c) +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vmovn_u64 (uint64x2_t a) { - uint64x2_t result; - __asm__ ("umlsl %0.2d, %2.2s, %3.s[0]" + uint32x2_t result; + __asm__ ("xtn %0.2s,%1.2d" : "=w"(result) - : "0"(a), "w"(b), "w"(c) + : "w"(a) + : /* No clobbers */); + return result; +} + +__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +vmovq_n_f32 (float32_t a) +{ + float32x4_t result; + __asm__ ("dup %0.4s, %w1" + : "=w"(result) + : "r"(a) + : /* No clobbers */); + return result; +} + +__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) +vmovq_n_f64 (float64_t a) +{ + return (float64x2_t) {a, a}; +} + +__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) +vmovq_n_p8 (uint32_t a) +{ + poly8x16_t result; + __asm__ ("dup %0.16b,%w1" + : "=w"(result) + : "r"(a) + : /* No clobbers */); + return result; +} + +__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) +vmovq_n_p16 (uint32_t a) +{ + poly16x8_t result; + __asm__ ("dup %0.8h,%w1" + : "=w"(result) + : "r"(a) + : /* No clobbers */); + return result; +} + +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vmovq_n_s8 (int32_t a) +{ + int8x16_t result; + __asm__ ("dup %0.16b,%w1" + : "=w"(result) + : "r"(a) : /* No clobbers */); return result; } __extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) -vmlsl_s8 (int16x8_t a, int8x8_t b, int8x8_t c) +vmovq_n_s16 (int32_t a) { int16x8_t result; - __asm__ ("smlsl %0.8h, %2.8b, %3.8b" + __asm__ ("dup %0.8h,%w1" : "=w"(result) - : "0"(a), "w"(b), "w"(c) + : "r"(a) : /* No clobbers */); return result; } __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) -vmlsl_s16 (int32x4_t a, int16x4_t b, int16x4_t c) +vmovq_n_s32 (int32_t a) { int32x4_t result; - __asm__ ("smlsl %0.4s, %2.4h, %3.4h" + __asm__ ("dup %0.4s,%w1" : "=w"(result) - : "0"(a), "w"(b), "w"(c) + : "r"(a) : /* No clobbers */); return result; } __extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) -vmlsl_s32 (int64x2_t a, int32x2_t b, int32x2_t c) +vmovq_n_s64 (int64_t a) { int64x2_t result; - __asm__ ("smlsl %0.2d, %2.2s, %3.2s" + __asm__ ("dup %0.2d,%x1" : "=w"(result) - : "0"(a), "w"(b), "w"(c) + : "r"(a) + : /* No clobbers */); + return result; +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vmovq_n_u8 (uint32_t a) +{ + uint8x16_t result; + __asm__ ("dup %0.16b,%w1" + : "=w"(result) + : "r"(a) : /* No clobbers */); return result; } __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) -vmlsl_u8 (uint16x8_t a, uint8x8_t b, uint8x8_t c) +vmovq_n_u16 (uint32_t a) { uint16x8_t result; - __asm__ ("umlsl %0.8h, %2.8b, %3.8b" + __asm__ ("dup %0.8h,%w1" : "=w"(result) - : "0"(a), "w"(b), "w"(c) + : "r"(a) : /* No clobbers */); return result; } __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -vmlsl_u16 (uint32x4_t a, uint16x4_t b, uint16x4_t c) +vmovq_n_u32 (uint32_t a) { uint32x4_t result; - __asm__ ("umlsl %0.4s, %2.4h, %3.4h" + __asm__ ("dup %0.4s,%w1" : "=w"(result) - : "0"(a), "w"(b), "w"(c) + : "r"(a) : /* No clobbers */); return result; } __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) -vmlsl_u32 (uint64x2_t a, uint32x2_t b, uint32x2_t c) +vmovq_n_u64 (uint64_t a) { uint64x2_t result; - __asm__ ("umlsl %0.2d, %2.2s, %3.2s" + __asm__ ("dup %0.2d,%x1" : "=w"(result) - : "0"(a), "w"(b), "w"(c) + : "r"(a) : /* No clobbers */); return result; } -#define vmlsq_lane_f32(a, b, c, d) \ +#define vmul_lane_f32(a, b, c) \ __extension__ \ ({ \ - float32x4_t c_ = (c); \ - float32x4_t b_ = (b); \ - float32x4_t a_ = (a); \ - float32x4_t result; \ - float32x4_t t1; \ - __asm__ ("fmul %1.4s, %3.4s, %4.s[%5]; fsub %0.4s, %0.4s, %1.4s" \ - : "=w"(result), "=w"(t1) \ - : "0"(a_), "w"(b_), "w"(c_), "i"(d) \ + float32x2_t b_ = (b); \ + float32x2_t a_ = (a); \ + float32x2_t result; \ + __asm__ ("fmul %0.2s,%1.2s,%2.s[%3]" \ + : "=w"(result) \ + : "w"(a_), "w"(b_), "i"(c) \ : /* No clobbers */); \ result; \ }) -#define vmlsq_lane_s16(a, b, c, d) \ +#define vmul_lane_s16(a, b, c) \ __extension__ \ ({ \ - int16x8_t c_ = (c); \ - int16x8_t b_ = (b); \ - int16x8_t a_ = (a); \ - int16x8_t result; \ - __asm__ ("mls %0.8h,%2.8h,%3.h[%4]" \ + int16x4_t b_ = (b); \ + int16x4_t a_ = (a); \ + int16x4_t result; \ + __asm__ ("mul %0.4h,%1.4h,%2.h[%3]" \ : "=w"(result) \ - : "0"(a_), "w"(b_), "w"(c_), "i"(d) \ + : "w"(a_), "w"(b_), "i"(c) \ : /* No clobbers */); \ result; \ }) -#define vmlsq_lane_s32(a, b, c, d) \ +#define vmul_lane_s32(a, b, c) \ __extension__ \ ({ \ - int32x4_t c_ = (c); \ - int32x4_t b_ = (b); \ - int32x4_t a_ = (a); \ - int32x4_t result; \ - __asm__ ("mls %0.4s,%2.4s,%3.s[%4]" \ + int32x2_t b_ = (b); \ + int32x2_t a_ = (a); \ + int32x2_t result; \ + __asm__ ("mul %0.2s,%1.2s,%2.s[%3]" \ : "=w"(result) \ - : "0"(a_), "w"(b_), "w"(c_), "i"(d) \ + : "w"(a_), "w"(b_), "i"(c) \ : /* No clobbers */); \ result; \ }) -#define vmlsq_lane_u16(a, b, c, d) \ +#define vmul_lane_u16(a, b, c) \ __extension__ \ ({ \ - uint16x8_t c_ = (c); \ - uint16x8_t b_ = (b); \ - uint16x8_t a_ = (a); \ - uint16x8_t result; \ - __asm__ ("mls %0.8h,%2.8h,%3.h[%4]" \ + uint16x4_t b_ = (b); \ + uint16x4_t a_ = (a); \ + uint16x4_t result; \ + __asm__ ("mul %0.4h,%1.4h,%2.h[%3]" \ : "=w"(result) \ - : "0"(a_), "w"(b_), "w"(c_), "i"(d) \ + : "w"(a_), "w"(b_), "i"(c) \ : /* No clobbers */); \ result; \ }) -#define vmlsq_lane_u32(a, b, c, d) \ +#define vmul_lane_u32(a, b, c) \ __extension__ \ ({ \ - uint32x4_t c_ = (c); \ - uint32x4_t b_ = (b); \ - uint32x4_t a_ = (a); \ - uint32x4_t result; \ - __asm__ ("mls %0.4s,%2.4s,%3.s[%4]" \ + uint32x2_t b_ = (b); \ + uint32x2_t a_ = (a); \ + uint32x2_t result; \ + __asm__ ("mul %0.2s, %1.2s, %2.s[%3]" \ : "=w"(result) \ - : "0"(a_), "w"(b_), "w"(c_), "i"(d) \ + : "w"(a_), "w"(b_), "i"(c) \ : /* No clobbers */); \ result; \ }) -#define vmlsq_laneq_f32(__a, __b, __c, __d) \ - __extension__ \ - ({ \ - float32x4_t __c_ = (__c); \ - float32x4_t __b_ = (__b); \ - float32x4_t __a_ = (__a); \ - float32x4_t __result; \ - float32x4_t __t1; \ - __asm__ ("fmul %1.4s, %3.4s, %4.s[%5]; fsub %0.4s, %0.4s, %1.4s" \ - : "=w"(__result), "=w"(__t1) \ - : "0"(__a_), "w"(__b_), "w"(__c_), "i"(__d) \ - : /* No clobbers */); \ - __result; \ +#define vmul_laneq_f32(a, b, c) \ + __extension__ \ + ({ \ + float32x4_t b_ = (b); \ + float32x2_t a_ = (a); \ + float32x2_t result; \ + __asm__ ("fmul %0.2s, %1.2s, %2.s[%3]" \ + : "=w"(result) \ + : "w"(a_), "w"(b_), "i"(c) \ + : /* No clobbers */); \ + result; \ }) -#define vmlsq_laneq_s16(__a, __b, __c, __d) \ - __extension__ \ - ({ \ - int16x8_t __c_ = (__c); \ - int16x8_t __b_ = (__b); \ - int16x8_t __a_ = (__a); \ - int16x8_t __result; \ - __asm__ ("mls %0.8h, %2.8h, %3.h[%4]" \ - : "=w"(__result) \ - : "0"(__a_), "w"(__b_), "w"(__c_), "i"(__d) \ - : /* No clobbers */); \ - __result; \ +#define vmul_laneq_s16(a, b, c) \ + __extension__ \ + ({ \ + int16x8_t b_ = (b); \ + int16x4_t a_ = (a); \ + int16x4_t result; \ + __asm__ ("mul %0.4h, %1.4h, %2.h[%3]" \ + : "=w"(result) \ + : "w"(a_), "w"(b_), "i"(c) \ + : /* No clobbers */); \ + result; \ }) -#define vmlsq_laneq_s32(__a, __b, __c, __d) \ - __extension__ \ - ({ \ - int32x4_t __c_ = (__c); \ - int32x4_t __b_ = (__b); \ - int32x4_t __a_ = (__a); \ - int32x4_t __result; \ - __asm__ ("mls %0.4s, %2.4s, %3.s[%4]" \ - : "=w"(__result) \ - : "0"(__a_), "w"(__b_), "w"(__c_), "i"(__d) \ - : /* No clobbers */); \ - __result; \ +#define vmul_laneq_s32(a, b, c) \ + __extension__ \ + ({ \ + int32x4_t b_ = (b); \ + int32x2_t a_ = (a); \ + int32x2_t result; \ + __asm__ ("mul %0.2s, %1.2s, %2.s[%3]" \ + : "=w"(result) \ + : "w"(a_), "w"(b_), "i"(c) \ + : /* No clobbers */); \ + result; \ }) -#define vmlsq_laneq_u16(__a, __b, __c, __d) \ - __extension__ \ - ({ \ - uint16x8_t __c_ = (__c); \ - uint16x8_t __b_ = (__b); \ - uint16x8_t __a_ = (__a); \ - uint16x8_t __result; \ - __asm__ ("mls %0.8h, %2.8h, %3.h[%4]" \ - : "=w"(__result) \ - : "0"(__a_), "w"(__b_), "w"(__c_), "i"(__d) \ - : /* No clobbers */); \ - __result; \ +#define vmul_laneq_u16(a, b, c) \ + __extension__ \ + ({ \ + uint16x8_t b_ = (b); \ + uint16x4_t a_ = (a); \ + uint16x4_t result; \ + __asm__ ("mul %0.4h, %1.4h, %2.h[%3]" \ + : "=w"(result) \ + : "w"(a_), "w"(b_), "i"(c) \ + : /* No clobbers */); \ + result; \ }) -#define vmlsq_laneq_u32(__a, __b, __c, __d) \ - __extension__ \ - ({ \ - uint32x4_t __c_ = (__c); \ - uint32x4_t __b_ = (__b); \ - uint32x4_t __a_ = (__a); \ - uint32x4_t __result; \ - __asm__ ("mls %0.4s, %2.4s, %3.s[%4]" \ - : "=w"(__result) \ - : "0"(__a_), "w"(__b_), "w"(__c_), "i"(__d) \ - : /* No clobbers */); \ - __result; \ +#define vmul_laneq_u32(a, b, c) \ + __extension__ \ + ({ \ + uint32x4_t b_ = (b); \ + uint32x2_t a_ = (a); \ + uint32x2_t result; \ + __asm__ ("mul %0.2s, %1.2s, %2.s[%3]" \ + : "=w"(result) \ + : "w"(a_), "w"(b_), "i"(c) \ + : /* No clobbers */); \ + result; \ }) -__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) -vmlsq_n_f32 (float32x4_t a, float32x4_t b, float32_t c) -{ - float32x4_t result; - float32x4_t t1; - __asm__ ("fmul %1.4s, %3.4s, %4.s[0]; fsub %0.4s, %0.4s, %1.4s" - : "=w"(result), "=w"(t1) - : "0"(a), "w"(b), "w"(c) - : /* No clobbers */); - return result; -} - -__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) -vmlsq_n_f64 (float64x2_t a, float64x2_t b, float64_t c) -{ - float64x2_t result; - float64x2_t t1; - __asm__ ("fmul %1.2d, %3.2d, %4.d[0]; fsub %0.2d, %0.2d, %1.2d" - : "=w"(result), "=w"(t1) - : "0"(a), "w"(b), "w"(c) - : /* No clobbers */); - return result; -} - -__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) -vmlsq_n_s16 (int16x8_t a, int16x8_t b, int16_t c) -{ - int16x8_t result; - __asm__ ("mls %0.8h, %2.8h, %3.h[0]" - : "=w"(result) - : "0"(a), "w"(b), "w"(c) - : /* No clobbers */); - return result; -} - -__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) -vmlsq_n_s32 (int32x4_t a, int32x4_t b, int32_t c) +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vmul_n_f32 (float32x2_t a, float32_t b) { - int32x4_t result; - __asm__ ("mls %0.4s, %2.4s, %3.s[0]" + float32x2_t result; + __asm__ ("fmul %0.2s,%1.2s,%2.s[0]" : "=w"(result) - : "0"(a), "w"(b), "w"(c) + : "w"(a), "w"(b) : /* No clobbers */); return result; } -__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) -vmlsq_n_u16 (uint16x8_t a, uint16x8_t b, uint16_t c) +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vmul_n_s16 (int16x4_t a, int16_t b) { - uint16x8_t result; - __asm__ ("mls %0.8h, %2.8h, %3.h[0]" + int16x4_t result; + __asm__ ("mul %0.4h,%1.4h,%2.h[0]" : "=w"(result) - : "0"(a), "w"(b), "w"(c) + : "w"(a), "x"(b) : /* No clobbers */); return result; } -__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -vmlsq_n_u32 (uint32x4_t a, uint32x4_t b, uint32_t c) +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vmul_n_s32 (int32x2_t a, int32_t b) { - uint32x4_t result; - __asm__ ("mls %0.4s, %2.4s, %3.s[0]" + int32x2_t result; + __asm__ ("mul %0.2s,%1.2s,%2.s[0]" : "=w"(result) - : "0"(a), "w"(b), "w"(c) + : "w"(a), "w"(b) : /* No clobbers */); return result; } -__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) -vmlsq_s8 (int8x16_t a, int8x16_t b, int8x16_t c) +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vmul_n_u16 (uint16x4_t a, uint16_t b) { - int8x16_t result; - __asm__ ("mls %0.16b,%2.16b,%3.16b" + uint16x4_t result; + __asm__ ("mul %0.4h,%1.4h,%2.h[0]" : "=w"(result) - : "0"(a), "w"(b), "w"(c) + : "w"(a), "x"(b) : /* No clobbers */); return result; } -__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) -vmlsq_s16 (int16x8_t a, int16x8_t b, int16x8_t c) +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vmul_n_u32 (uint32x2_t a, uint32_t b) { - int16x8_t result; - __asm__ ("mls %0.8h,%2.8h,%3.8h" + uint32x2_t result; + __asm__ ("mul %0.2s,%1.2s,%2.s[0]" : "=w"(result) - : "0"(a), "w"(b), "w"(c) + : "w"(a), "w"(b) : /* No clobbers */); return result; } -__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) -vmlsq_s32 (int32x4_t a, int32x4_t b, int32x4_t c) -{ - int32x4_t result; - __asm__ ("mls %0.4s,%2.4s,%3.4s" - : "=w"(result) - : "0"(a), "w"(b), "w"(c) - : /* No clobbers */); - return result; -} +#define vmuld_lane_f64(a, b, c) \ + __extension__ \ + ({ \ + float64x2_t b_ = (b); \ + float64_t a_ = (a); \ + float64_t result; \ + __asm__ ("fmul %d0,%d1,%2.d[%3]" \ + : "=w"(result) \ + : "w"(a_), "w"(b_), "i"(c) \ + : /* No clobbers */); \ + result; \ + }) -__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) -vmlsq_u8 (uint8x16_t a, uint8x16_t b, uint8x16_t c) -{ - uint8x16_t result; - __asm__ ("mls %0.16b,%2.16b,%3.16b" - : "=w"(result) - : "0"(a), "w"(b), "w"(c) - : /* No clobbers */); - return result; -} +#define vmull_high_lane_s16(a, b, c) \ + __extension__ \ + ({ \ + int16x8_t b_ = (b); \ + int16x8_t a_ = (a); \ + int32x4_t result; \ + __asm__ ("smull2 %0.4s, %1.8h, %2.h[%3]" \ + : "=w"(result) \ + : "w"(a_), "x"(b_), "i"(c) \ + : /* No clobbers */); \ + result; \ + }) -__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) -vmlsq_u16 (uint16x8_t a, uint16x8_t b, uint16x8_t c) -{ - uint16x8_t result; - __asm__ ("mls %0.8h,%2.8h,%3.8h" - : "=w"(result) - : "0"(a), "w"(b), "w"(c) - : /* No clobbers */); - return result; -} +#define vmull_high_lane_s32(a, b, c) \ + __extension__ \ + ({ \ + int32x4_t b_ = (b); \ + int32x4_t a_ = (a); \ + int64x2_t result; \ + __asm__ ("smull2 %0.2d, %1.4s, %2.s[%3]" \ + : "=w"(result) \ + : "w"(a_), "w"(b_), "i"(c) \ + : /* No clobbers */); \ + result; \ + }) -__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -vmlsq_u32 (uint32x4_t a, uint32x4_t b, uint32x4_t c) -{ - uint32x4_t result; - __asm__ ("mls %0.4s,%2.4s,%3.4s" - : "=w"(result) - : "0"(a), "w"(b), "w"(c) - : /* No clobbers */); - return result; -} +#define vmull_high_lane_u16(a, b, c) \ + __extension__ \ + ({ \ + uint16x8_t b_ = (b); \ + uint16x8_t a_ = (a); \ + uint32x4_t result; \ + __asm__ ("umull2 %0.4s, %1.8h, %2.h[%3]" \ + : "=w"(result) \ + : "w"(a_), "x"(b_), "i"(c) \ + : /* No clobbers */); \ + result; \ + }) -__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) -vmov_n_f32 (float32_t a) +#define vmull_high_lane_u32(a, b, c) \ + __extension__ \ + ({ \ + uint32x4_t b_ = (b); \ + uint32x4_t a_ = (a); \ + uint64x2_t result; \ + __asm__ ("umull2 %0.2d, %1.4s, %2.s[%3]" \ + : "=w"(result) \ + : "w"(a_), "w"(b_), "i"(c) \ + : /* No clobbers */); \ + result; \ + }) + +#define vmull_high_laneq_s16(a, b, c) \ + __extension__ \ + ({ \ + int16x8_t b_ = (b); \ + int16x8_t a_ = (a); \ + int32x4_t result; \ + __asm__ ("smull2 %0.4s, %1.8h, %2.h[%3]" \ + : "=w"(result) \ + : "w"(a_), "x"(b_), "i"(c) \ + : /* No clobbers */); \ + result; \ + }) + +#define vmull_high_laneq_s32(a, b, c) \ + __extension__ \ + ({ \ + int32x4_t b_ = (b); \ + int32x4_t a_ = (a); \ + int64x2_t result; \ + __asm__ ("smull2 %0.2d, %1.4s, %2.s[%3]" \ + : "=w"(result) \ + : "w"(a_), "w"(b_), "i"(c) \ + : /* No clobbers */); \ + result; \ + }) + +#define vmull_high_laneq_u16(a, b, c) \ + __extension__ \ + ({ \ + uint16x8_t b_ = (b); \ + uint16x8_t a_ = (a); \ + uint32x4_t result; \ + __asm__ ("umull2 %0.4s, %1.8h, %2.h[%3]" \ + : "=w"(result) \ + : "w"(a_), "x"(b_), "i"(c) \ + : /* No clobbers */); \ + result; \ + }) + +#define vmull_high_laneq_u32(a, b, c) \ + __extension__ \ + ({ \ + uint32x4_t b_ = (b); \ + uint32x4_t a_ = (a); \ + uint64x2_t result; \ + __asm__ ("umull2 %0.2d, %1.4s, %2.s[%3]" \ + : "=w"(result) \ + : "w"(a_), "w"(b_), "i"(c) \ + : /* No clobbers */); \ + result; \ + }) + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vmull_high_n_s16 (int16x8_t a, int16_t b) { - float32x2_t result; - __asm__ ("dup %0.2s, %w1" + int32x4_t result; + __asm__ ("smull2 %0.4s,%1.8h,%2.h[0]" : "=w"(result) - : "r"(a) + : "w"(a), "x"(b) : /* No clobbers */); return result; } -__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) -vmov_n_p8 (uint32_t a) +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vmull_high_n_s32 (int32x4_t a, int32_t b) { - poly8x8_t result; - __asm__ ("dup %0.8b,%w1" + int64x2_t result; + __asm__ ("smull2 %0.2d,%1.4s,%2.s[0]" : "=w"(result) - : "r"(a) + : "w"(a), "w"(b) : /* No clobbers */); return result; } -__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) -vmov_n_p16 (uint32_t a) +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vmull_high_n_u16 (uint16x8_t a, uint16_t b) { - poly16x4_t result; - __asm__ ("dup %0.4h,%w1" + uint32x4_t result; + __asm__ ("umull2 %0.4s,%1.8h,%2.h[0]" : "=w"(result) - : "r"(a) + : "w"(a), "x"(b) : /* No clobbers */); return result; } -__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) -vmov_n_s8 (int32_t a) +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vmull_high_n_u32 (uint32x4_t a, uint32_t b) { - int8x8_t result; - __asm__ ("dup %0.8b,%w1" + uint64x2_t result; + __asm__ ("umull2 %0.2d,%1.4s,%2.s[0]" : "=w"(result) - : "r"(a) + : "w"(a), "w"(b) : /* No clobbers */); return result; } -__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) -vmov_n_s16 (int32_t a) +__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) +vmull_high_p8 (poly8x16_t a, poly8x16_t b) { - int16x4_t result; - __asm__ ("dup %0.4h,%w1" + poly16x8_t result; + __asm__ ("pmull2 %0.8h,%1.16b,%2.16b" : "=w"(result) - : "r"(a) + : "w"(a), "w"(b) : /* No clobbers */); return result; } -__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) -vmov_n_s32 (int32_t a) +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vmull_high_s8 (int8x16_t a, int8x16_t b) { - int32x2_t result; - __asm__ ("dup %0.2s,%w1" + int16x8_t result; + __asm__ ("smull2 %0.8h,%1.16b,%2.16b" : "=w"(result) - : "r"(a) + : "w"(a), "w"(b) : /* No clobbers */); return result; } -__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) -vmov_n_s64 (int64_t a) +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vmull_high_s16 (int16x8_t a, int16x8_t b) { - int64x1_t result; - __asm__ ("ins %0.d[0],%x1" + int32x4_t result; + __asm__ ("smull2 %0.4s,%1.8h,%2.8h" : "=w"(result) - : "r"(a) + : "w"(a), "w"(b) : /* No clobbers */); return result; } -__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) -vmov_n_u8 (uint32_t a) +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vmull_high_s32 (int32x4_t a, int32x4_t b) { - uint8x8_t result; - __asm__ ("dup %0.8b,%w1" + int64x2_t result; + __asm__ ("smull2 %0.2d,%1.4s,%2.4s" : "=w"(result) - : "r"(a) + : "w"(a), "w"(b) : /* No clobbers */); return result; } -__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) -vmov_n_u16 (uint32_t a) +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vmull_high_u8 (uint8x16_t a, uint8x16_t b) { - uint16x4_t result; - __asm__ ("dup %0.4h,%w1" + uint16x8_t result; + __asm__ ("umull2 %0.8h,%1.16b,%2.16b" : "=w"(result) - : "r"(a) + : "w"(a), "w"(b) : /* No clobbers */); return result; } -__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) -vmov_n_u32 (uint32_t a) +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vmull_high_u16 (uint16x8_t a, uint16x8_t b) { - uint32x2_t result; - __asm__ ("dup %0.2s,%w1" + uint32x4_t result; + __asm__ ("umull2 %0.4s,%1.8h,%2.8h" : "=w"(result) - : "r"(a) + : "w"(a), "w"(b) : /* No clobbers */); return result; } -__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) -vmov_n_u64 (uint64_t a) +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vmull_high_u32 (uint32x4_t a, uint32x4_t b) { - uint64x1_t result; - __asm__ ("ins %0.d[0],%x1" + uint64x2_t result; + __asm__ ("umull2 %0.2d,%1.4s,%2.4s" : "=w"(result) - : "r"(a) + : "w"(a), "w"(b) : /* No clobbers */); return result; } -__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) -vmovl_high_s8 (int8x16_t a) -{ - int16x8_t result; - __asm__ ("sshll2 %0.8h,%1.16b,#0" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} +#define vmull_lane_s16(a, b, c) \ + __extension__ \ + ({ \ + int16x4_t b_ = (b); \ + int16x4_t a_ = (a); \ + int32x4_t result; \ + __asm__ ("smull %0.4s,%1.4h,%2.h[%3]" \ + : "=w"(result) \ + : "w"(a_), "x"(b_), "i"(c) \ + : /* No clobbers */); \ + result; \ + }) + +#define vmull_lane_s32(a, b, c) \ + __extension__ \ + ({ \ + int32x2_t b_ = (b); \ + int32x2_t a_ = (a); \ + int64x2_t result; \ + __asm__ ("smull %0.2d,%1.2s,%2.s[%3]" \ + : "=w"(result) \ + : "w"(a_), "w"(b_), "i"(c) \ + : /* No clobbers */); \ + result; \ + }) + +#define vmull_lane_u16(a, b, c) \ + __extension__ \ + ({ \ + uint16x4_t b_ = (b); \ + uint16x4_t a_ = (a); \ + uint32x4_t result; \ + __asm__ ("umull %0.4s,%1.4h,%2.h[%3]" \ + : "=w"(result) \ + : "w"(a_), "x"(b_), "i"(c) \ + : /* No clobbers */); \ + result; \ + }) + +#define vmull_lane_u32(a, b, c) \ + __extension__ \ + ({ \ + uint32x2_t b_ = (b); \ + uint32x2_t a_ = (a); \ + uint64x2_t result; \ + __asm__ ("umull %0.2d, %1.2s, %2.s[%3]" \ + : "=w"(result) \ + : "w"(a_), "w"(b_), "i"(c) \ + : /* No clobbers */); \ + result; \ + }) + +#define vmull_laneq_s16(a, b, c) \ + __extension__ \ + ({ \ + int16x8_t b_ = (b); \ + int16x4_t a_ = (a); \ + int32x4_t result; \ + __asm__ ("smull %0.4s, %1.4h, %2.h[%3]" \ + : "=w"(result) \ + : "w"(a_), "x"(b_), "i"(c) \ + : /* No clobbers */); \ + result; \ + }) + +#define vmull_laneq_s32(a, b, c) \ + __extension__ \ + ({ \ + int32x4_t b_ = (b); \ + int32x2_t a_ = (a); \ + int64x2_t result; \ + __asm__ ("smull %0.2d, %1.2s, %2.s[%3]" \ + : "=w"(result) \ + : "w"(a_), "w"(b_), "i"(c) \ + : /* No clobbers */); \ + result; \ + }) + +#define vmull_laneq_u16(a, b, c) \ + __extension__ \ + ({ \ + uint16x8_t b_ = (b); \ + uint16x4_t a_ = (a); \ + uint32x4_t result; \ + __asm__ ("umull %0.4s, %1.4h, %2.h[%3]" \ + : "=w"(result) \ + : "w"(a_), "x"(b_), "i"(c) \ + : /* No clobbers */); \ + result; \ + }) + +#define vmull_laneq_u32(a, b, c) \ + __extension__ \ + ({ \ + uint32x4_t b_ = (b); \ + uint32x2_t a_ = (a); \ + uint64x2_t result; \ + __asm__ ("umull %0.2d, %1.2s, %2.s[%3]" \ + : "=w"(result) \ + : "w"(a_), "w"(b_), "i"(c) \ + : /* No clobbers */); \ + result; \ + }) __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) -vmovl_high_s16 (int16x8_t a) +vmull_n_s16 (int16x4_t a, int16_t b) { int32x4_t result; - __asm__ ("sshll2 %0.4s,%1.8h,#0" + __asm__ ("smull %0.4s,%1.4h,%2.h[0]" : "=w"(result) - : "w"(a) + : "w"(a), "x"(b) : /* No clobbers */); return result; } __extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) -vmovl_high_s32 (int32x4_t a) +vmull_n_s32 (int32x2_t a, int32_t b) { int64x2_t result; - __asm__ ("sshll2 %0.2d,%1.4s,#0" + __asm__ ("smull %0.2d,%1.2s,%2.s[0]" : "=w"(result) - : "w"(a) + : "w"(a), "w"(b) : /* No clobbers */); return result; } -__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) -vmovl_high_u8 (uint8x16_t a) +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vmull_n_u16 (uint16x4_t a, uint16_t b) { - uint16x8_t result; - __asm__ ("ushll2 %0.8h,%1.16b,#0" + uint32x4_t result; + __asm__ ("umull %0.4s,%1.4h,%2.h[0]" : "=w"(result) - : "w"(a) + : "w"(a), "x"(b) : /* No clobbers */); return result; } -__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -vmovl_high_u16 (uint16x8_t a) +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vmull_n_u32 (uint32x2_t a, uint32_t b) { - uint32x4_t result; - __asm__ ("ushll2 %0.4s,%1.8h,#0" + uint64x2_t result; + __asm__ ("umull %0.2d,%1.2s,%2.s[0]" : "=w"(result) - : "w"(a) + : "w"(a), "w"(b) : /* No clobbers */); return result; } -__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) -vmovl_high_u32 (uint32x4_t a) +__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) +vmull_p8 (poly8x8_t a, poly8x8_t b) { - uint64x2_t result; - __asm__ ("ushll2 %0.2d,%1.4s,#0" + poly16x8_t result; + __asm__ ("pmull %0.8h, %1.8b, %2.8b" : "=w"(result) - : "w"(a) + : "w"(a), "w"(b) : /* No clobbers */); return result; } __extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) -vmovl_s8 (int8x8_t a) +vmull_s8 (int8x8_t a, int8x8_t b) { int16x8_t result; - __asm__ ("sshll %0.8h,%1.8b,#0" + __asm__ ("smull %0.8h, %1.8b, %2.8b" : "=w"(result) - : "w"(a) + : "w"(a), "w"(b) : /* No clobbers */); return result; } __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) -vmovl_s16 (int16x4_t a) +vmull_s16 (int16x4_t a, int16x4_t b) { int32x4_t result; - __asm__ ("sshll %0.4s,%1.4h,#0" + __asm__ ("smull %0.4s, %1.4h, %2.4h" : "=w"(result) - : "w"(a) + : "w"(a), "w"(b) : /* No clobbers */); return result; } __extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) -vmovl_s32 (int32x2_t a) +vmull_s32 (int32x2_t a, int32x2_t b) { int64x2_t result; - __asm__ ("sshll %0.2d,%1.2s,#0" + __asm__ ("smull %0.2d, %1.2s, %2.2s" : "=w"(result) - : "w"(a) + : "w"(a), "w"(b) : /* No clobbers */); return result; } __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) -vmovl_u8 (uint8x8_t a) +vmull_u8 (uint8x8_t a, uint8x8_t b) { uint16x8_t result; - __asm__ ("ushll %0.8h,%1.8b,#0" + __asm__ ("umull %0.8h, %1.8b, %2.8b" : "=w"(result) - : "w"(a) + : "w"(a), "w"(b) : /* No clobbers */); return result; } __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -vmovl_u16 (uint16x4_t a) +vmull_u16 (uint16x4_t a, uint16x4_t b) { uint32x4_t result; - __asm__ ("ushll %0.4s,%1.4h,#0" + __asm__ ("umull %0.4s, %1.4h, %2.4h" : "=w"(result) - : "w"(a) + : "w"(a), "w"(b) : /* No clobbers */); return result; } __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) -vmovl_u32 (uint32x2_t a) +vmull_u32 (uint32x2_t a, uint32x2_t b) { uint64x2_t result; - __asm__ ("ushll %0.2d,%1.2s,#0" + __asm__ ("umull %0.2d, %1.2s, %2.2s" : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) -vmovn_high_s16 (int8x8_t a, int16x8_t b) -{ - int8x16_t result = vcombine_s8 (a, vcreate_s8 (UINT64_C (0x0))); - __asm__ ("xtn2 %0.16b,%1.8h" - : "+w"(result) - : "w"(b) + : "w"(a), "w"(b) : /* No clobbers */); return result; } -__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) -vmovn_high_s32 (int16x4_t a, int32x4_t b) -{ - int16x8_t result = vcombine_s16 (a, vcreate_s16 (UINT64_C (0x0))); - __asm__ ("xtn2 %0.8h,%1.4s" - : "+w"(result) - : "w"(b) - : /* No clobbers */); - return result; -} +#define vmulq_lane_f32(a, b, c) \ + __extension__ \ + ({ \ + float32x2_t b_ = (b); \ + float32x4_t a_ = (a); \ + float32x4_t result; \ + __asm__ ("fmul %0.4s, %1.4s, %2.s[%3]" \ + : "=w"(result) \ + : "w"(a_), "w"(b_), "i"(c) \ + : /* No clobbers */); \ + result; \ + }) -__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) -vmovn_high_s64 (int32x2_t a, int64x2_t b) -{ - int32x4_t result = vcombine_s32 (a, vcreate_s32 (UINT64_C (0x0))); - __asm__ ("xtn2 %0.4s,%1.2d" - : "+w"(result) - : "w"(b) - : /* No clobbers */); - return result; -} +#define vmulq_lane_f64(a, b, c) \ + __extension__ \ + ({ \ + float64x1_t b_ = (b); \ + float64x2_t a_ = (a); \ + float64x2_t result; \ + __asm__ ("fmul %0.2d,%1.2d,%2.d[%3]" \ + : "=w"(result) \ + : "w"(a_), "w"(b_), "i"(c) \ + : /* No clobbers */); \ + result; \ + }) -__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) -vmovn_high_u16 (uint8x8_t a, uint16x8_t b) -{ - uint8x16_t result = vcombine_u8 (a, vcreate_u8 (UINT64_C (0x0))); - __asm__ ("xtn2 %0.16b,%1.8h" - : "+w"(result) - : "w"(b) - : /* No clobbers */); - return result; -} +#define vmulq_lane_s16(a, b, c) \ + __extension__ \ + ({ \ + int16x4_t b_ = (b); \ + int16x8_t a_ = (a); \ + int16x8_t result; \ + __asm__ ("mul %0.8h,%1.8h,%2.h[%3]" \ + : "=w"(result) \ + : "w"(a_), "x"(b_), "i"(c) \ + : /* No clobbers */); \ + result; \ + }) -__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) -vmovn_high_u32 (uint16x4_t a, uint32x4_t b) -{ - uint16x8_t result = vcombine_u16 (a, vcreate_u16 (UINT64_C (0x0))); - __asm__ ("xtn2 %0.8h,%1.4s" - : "+w"(result) - : "w"(b) - : /* No clobbers */); - return result; -} +#define vmulq_lane_s32(a, b, c) \ + __extension__ \ + ({ \ + int32x2_t b_ = (b); \ + int32x4_t a_ = (a); \ + int32x4_t result; \ + __asm__ ("mul %0.4s,%1.4s,%2.s[%3]" \ + : "=w"(result) \ + : "w"(a_), "w"(b_), "i"(c) \ + : /* No clobbers */); \ + result; \ + }) -__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -vmovn_high_u64 (uint32x2_t a, uint64x2_t b) -{ - uint32x4_t result = vcombine_u32 (a, vcreate_u32 (UINT64_C (0x0))); - __asm__ ("xtn2 %0.4s,%1.2d" - : "+w"(result) - : "w"(b) - : /* No clobbers */); - return result; -} +#define vmulq_lane_u16(a, b, c) \ + __extension__ \ + ({ \ + uint16x4_t b_ = (b); \ + uint16x8_t a_ = (a); \ + uint16x8_t result; \ + __asm__ ("mul %0.8h,%1.8h,%2.h[%3]" \ + : "=w"(result) \ + : "w"(a_), "x"(b_), "i"(c) \ + : /* No clobbers */); \ + result; \ + }) -__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) -vmovn_s16 (int16x8_t a) +#define vmulq_lane_u32(a, b, c) \ + __extension__ \ + ({ \ + uint32x2_t b_ = (b); \ + uint32x4_t a_ = (a); \ + uint32x4_t result; \ + __asm__ ("mul %0.4s, %1.4s, %2.s[%3]" \ + : "=w"(result) \ + : "w"(a_), "w"(b_), "i"(c) \ + : /* No clobbers */); \ + result; \ + }) + +#define vmulq_laneq_f32(a, b, c) \ + __extension__ \ + ({ \ + float32x4_t b_ = (b); \ + float32x4_t a_ = (a); \ + float32x4_t result; \ + __asm__ ("fmul %0.4s, %1.4s, %2.s[%3]" \ + : "=w"(result) \ + : "w"(a_), "w"(b_), "i"(c) \ + : /* No clobbers */); \ + result; \ + }) + +#define vmulq_laneq_f64(a, b, c) \ + __extension__ \ + ({ \ + float64x2_t b_ = (b); \ + float64x2_t a_ = (a); \ + float64x2_t result; \ + __asm__ ("fmul %0.2d,%1.2d,%2.d[%3]" \ + : "=w"(result) \ + : "w"(a_), "w"(b_), "i"(c) \ + : /* No clobbers */); \ + result; \ + }) + +#define vmulq_laneq_s16(a, b, c) \ + __extension__ \ + ({ \ + int16x8_t b_ = (b); \ + int16x8_t a_ = (a); \ + int16x8_t result; \ + __asm__ ("mul %0.8h, %1.8h, %2.h[%3]" \ + : "=w"(result) \ + : "w"(a_), "x"(b_), "i"(c) \ + : /* No clobbers */); \ + result; \ + }) + +#define vmulq_laneq_s32(a, b, c) \ + __extension__ \ + ({ \ + int32x4_t b_ = (b); \ + int32x4_t a_ = (a); \ + int32x4_t result; \ + __asm__ ("mul %0.4s, %1.4s, %2.s[%3]" \ + : "=w"(result) \ + : "w"(a_), "w"(b_), "i"(c) \ + : /* No clobbers */); \ + result; \ + }) + +#define vmulq_laneq_u16(a, b, c) \ + __extension__ \ + ({ \ + uint16x8_t b_ = (b); \ + uint16x8_t a_ = (a); \ + uint16x8_t result; \ + __asm__ ("mul %0.8h, %1.8h, %2.h[%3]" \ + : "=w"(result) \ + : "w"(a_), "x"(b_), "i"(c) \ + : /* No clobbers */); \ + result; \ + }) + +#define vmulq_laneq_u32(a, b, c) \ + __extension__ \ + ({ \ + uint32x4_t b_ = (b); \ + uint32x4_t a_ = (a); \ + uint32x4_t result; \ + __asm__ ("mul %0.4s, %1.4s, %2.s[%3]" \ + : "=w"(result) \ + : "w"(a_), "w"(b_), "i"(c) \ + : /* No clobbers */); \ + result; \ + }) + +__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +vmulq_n_f32 (float32x4_t a, float32_t b) { - int8x8_t result; - __asm__ ("xtn %0.8b,%1.8h" + float32x4_t result; + __asm__ ("fmul %0.4s,%1.4s,%2.s[0]" : "=w"(result) - : "w"(a) + : "w"(a), "w"(b) : /* No clobbers */); return result; } -__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) -vmovn_s32 (int32x4_t a) +__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) +vmulq_n_f64 (float64x2_t a, float64_t b) { - int16x4_t result; - __asm__ ("xtn %0.4h,%1.4s" + float64x2_t result; + __asm__ ("fmul %0.2d,%1.2d,%2.d[0]" : "=w"(result) - : "w"(a) + : "w"(a), "w"(b) : /* No clobbers */); return result; } -__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) -vmovn_s64 (int64x2_t a) +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vmulq_n_s16 (int16x8_t a, int16_t b) { - int32x2_t result; - __asm__ ("xtn %0.2s,%1.2d" + int16x8_t result; + __asm__ ("mul %0.8h,%1.8h,%2.h[0]" : "=w"(result) - : "w"(a) + : "w"(a), "x"(b) : /* No clobbers */); return result; } -__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) -vmovn_u16 (uint16x8_t a) +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vmulq_n_s32 (int32x4_t a, int32_t b) { - uint8x8_t result; - __asm__ ("xtn %0.8b,%1.8h" + int32x4_t result; + __asm__ ("mul %0.4s,%1.4s,%2.s[0]" : "=w"(result) - : "w"(a) + : "w"(a), "w"(b) : /* No clobbers */); return result; } -__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) -vmovn_u32 (uint32x4_t a) +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vmulq_n_u16 (uint16x8_t a, uint16_t b) { - uint16x4_t result; - __asm__ ("xtn %0.4h,%1.4s" + uint16x8_t result; + __asm__ ("mul %0.8h,%1.8h,%2.h[0]" : "=w"(result) - : "w"(a) + : "w"(a), "x"(b) : /* No clobbers */); return result; } -__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) -vmovn_u64 (uint64x2_t a) +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vmulq_n_u32 (uint32x4_t a, uint32_t b) { - uint32x2_t result; - __asm__ ("xtn %0.2s,%1.2d" + uint32x4_t result; + __asm__ ("mul %0.4s,%1.4s,%2.s[0]" : "=w"(result) - : "w"(a) + : "w"(a), "w"(b) : /* No clobbers */); return result; } -__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) -vmovq_n_f32 (float32_t a) +#define vmuls_lane_f32(a, b, c) \ + __extension__ \ + ({ \ + float32x4_t b_ = (b); \ + float32_t a_ = (a); \ + float32_t result; \ + __asm__ ("fmul %s0,%s1,%2.s[%3]" \ + : "=w"(result) \ + : "w"(a_), "w"(b_), "i"(c) \ + : /* No clobbers */); \ + result; \ + }) + +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vmulx_f32 (float32x2_t a, float32x2_t b) { - float32x4_t result; - __asm__ ("dup %0.4s, %w1" + float32x2_t result; + __asm__ ("fmulx %0.2s,%1.2s,%2.2s" : "=w"(result) - : "r"(a) + : "w"(a), "w"(b) : /* No clobbers */); return result; } -__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) -vmovq_n_f64 (float64_t a) -{ - return (float64x2_t) {a, a}; -} +#define vmulx_lane_f32(a, b, c) \ + __extension__ \ + ({ \ + float32x4_t b_ = (b); \ + float32x2_t a_ = (a); \ + float32x2_t result; \ + __asm__ ("fmulx %0.2s,%1.2s,%2.s[%3]" \ + : "=w"(result) \ + : "w"(a_), "w"(b_), "i"(c) \ + : /* No clobbers */); \ + result; \ + }) -__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) -vmovq_n_p8 (uint32_t a) +__extension__ static __inline float64_t __attribute__ ((__always_inline__)) +vmulxd_f64 (float64_t a, float64_t b) { - poly8x16_t result; - __asm__ ("dup %0.16b,%w1" + float64_t result; + __asm__ ("fmulx %d0, %d1, %d2" : "=w"(result) - : "r"(a) + : "w"(a), "w"(b) : /* No clobbers */); return result; } -__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) -vmovq_n_p16 (uint32_t a) +__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +vmulxq_f32 (float32x4_t a, float32x4_t b) { - poly16x8_t result; - __asm__ ("dup %0.8h,%w1" + float32x4_t result; + __asm__ ("fmulx %0.4s,%1.4s,%2.4s" : "=w"(result) - : "r"(a) + : "w"(a), "w"(b) : /* No clobbers */); return result; } -__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) -vmovq_n_s8 (int32_t a) +__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) +vmulxq_f64 (float64x2_t a, float64x2_t b) { - int8x16_t result; - __asm__ ("dup %0.16b,%w1" + float64x2_t result; + __asm__ ("fmulx %0.2d,%1.2d,%2.2d" : "=w"(result) - : "r"(a) + : "w"(a), "w"(b) : /* No clobbers */); return result; } -__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) -vmovq_n_s16 (int32_t a) -{ - int16x8_t result; - __asm__ ("dup %0.8h,%w1" +#define vmulxq_lane_f32(a, b, c) \ + __extension__ \ + ({ \ + float32x4_t b_ = (b); \ + float32x4_t a_ = (a); \ + float32x4_t result; \ + __asm__ ("fmulx %0.4s,%1.4s,%2.s[%3]" \ + : "=w"(result) \ + : "w"(a_), "w"(b_), "i"(c) \ + : /* No clobbers */); \ + result; \ + }) + +#define vmulxq_lane_f64(a, b, c) \ + __extension__ \ + ({ \ + float64x2_t b_ = (b); \ + float64x2_t a_ = (a); \ + float64x2_t result; \ + __asm__ ("fmulx %0.2d,%1.2d,%2.d[%3]" \ + : "=w"(result) \ + : "w"(a_), "w"(b_), "i"(c) \ + : /* No clobbers */); \ + result; \ + }) + +__extension__ static __inline float32_t __attribute__ ((__always_inline__)) +vmulxs_f32 (float32_t a, float32_t b) +{ + float32_t result; + __asm__ ("fmulx %s0, %s1, %s2" : "=w"(result) - : "r"(a) + : "w"(a), "w"(b) : /* No clobbers */); return result; } -__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) -vmovq_n_s32 (int32_t a) +__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) +vmvn_p8 (poly8x8_t a) { - int32x4_t result; - __asm__ ("dup %0.4s,%w1" + poly8x8_t result; + __asm__ ("mvn %0.8b,%1.8b" : "=w"(result) - : "r"(a) + : "w"(a) : /* No clobbers */); return result; } -__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) -vmovq_n_s64 (int64_t a) +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vmvn_s8 (int8x8_t a) { - int64x2_t result; - __asm__ ("dup %0.2d,%x1" + int8x8_t result; + __asm__ ("mvn %0.8b,%1.8b" : "=w"(result) - : "r"(a) + : "w"(a) : /* No clobbers */); return result; } -__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) -vmovq_n_u8 (uint32_t a) +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vmvn_s16 (int16x4_t a) { - uint8x16_t result; - __asm__ ("dup %0.16b,%w1" + int16x4_t result; + __asm__ ("mvn %0.8b,%1.8b" : "=w"(result) - : "r"(a) + : "w"(a) : /* No clobbers */); return result; } -__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) -vmovq_n_u16 (uint32_t a) +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vmvn_s32 (int32x2_t a) { - uint16x8_t result; - __asm__ ("dup %0.8h,%w1" + int32x2_t result; + __asm__ ("mvn %0.8b,%1.8b" : "=w"(result) - : "r"(a) + : "w"(a) : /* No clobbers */); return result; } -__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -vmovq_n_u32 (uint32_t a) +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vmvn_u8 (uint8x8_t a) { - uint32x4_t result; - __asm__ ("dup %0.4s,%w1" + uint8x8_t result; + __asm__ ("mvn %0.8b,%1.8b" : "=w"(result) - : "r"(a) + : "w"(a) : /* No clobbers */); return result; } -__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) -vmovq_n_u64 (uint64_t a) +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vmvn_u16 (uint16x4_t a) { - uint64x2_t result; - __asm__ ("dup %0.2d,%x1" + uint16x4_t result; + __asm__ ("mvn %0.8b,%1.8b" : "=w"(result) - : "r"(a) + : "w"(a) : /* No clobbers */); return result; } -#define vmul_lane_f32(a, b, c) \ - __extension__ \ - ({ \ - float32x2_t b_ = (b); \ - float32x2_t a_ = (a); \ - float32x2_t result; \ - __asm__ ("fmul %0.2s,%1.2s,%2.s[%3]" \ - : "=w"(result) \ - : "w"(a_), "w"(b_), "i"(c) \ - : /* No clobbers */); \ - result; \ - }) - -#define vmul_lane_s16(a, b, c) \ - __extension__ \ - ({ \ - int16x4_t b_ = (b); \ - int16x4_t a_ = (a); \ - int16x4_t result; \ - __asm__ ("mul %0.4h,%1.4h,%2.h[%3]" \ - : "=w"(result) \ - : "w"(a_), "w"(b_), "i"(c) \ - : /* No clobbers */); \ - result; \ - }) - -#define vmul_lane_s32(a, b, c) \ - __extension__ \ - ({ \ - int32x2_t b_ = (b); \ - int32x2_t a_ = (a); \ - int32x2_t result; \ - __asm__ ("mul %0.2s,%1.2s,%2.s[%3]" \ - : "=w"(result) \ - : "w"(a_), "w"(b_), "i"(c) \ - : /* No clobbers */); \ - result; \ - }) - -#define vmul_lane_u16(a, b, c) \ - __extension__ \ - ({ \ - uint16x4_t b_ = (b); \ - uint16x4_t a_ = (a); \ - uint16x4_t result; \ - __asm__ ("mul %0.4h,%1.4h,%2.h[%3]" \ - : "=w"(result) \ - : "w"(a_), "w"(b_), "i"(c) \ - : /* No clobbers */); \ - result; \ - }) - -#define vmul_lane_u32(a, b, c) \ - __extension__ \ - ({ \ - uint32x2_t b_ = (b); \ - uint32x2_t a_ = (a); \ - uint32x2_t result; \ - __asm__ ("mul %0.2s, %1.2s, %2.s[%3]" \ - : "=w"(result) \ - : "w"(a_), "w"(b_), "i"(c) \ - : /* No clobbers */); \ - result; \ - }) - -#define vmul_laneq_f32(a, b, c) \ - __extension__ \ - ({ \ - float32x4_t b_ = (b); \ - float32x2_t a_ = (a); \ - float32x2_t result; \ - __asm__ ("fmul %0.2s, %1.2s, %2.s[%3]" \ - : "=w"(result) \ - : "w"(a_), "w"(b_), "i"(c) \ - : /* No clobbers */); \ - result; \ - }) - -#define vmul_laneq_s16(a, b, c) \ - __extension__ \ - ({ \ - int16x8_t b_ = (b); \ - int16x4_t a_ = (a); \ - int16x4_t result; \ - __asm__ ("mul %0.4h, %1.4h, %2.h[%3]" \ - : "=w"(result) \ - : "w"(a_), "w"(b_), "i"(c) \ - : /* No clobbers */); \ - result; \ - }) - -#define vmul_laneq_s32(a, b, c) \ - __extension__ \ - ({ \ - int32x4_t b_ = (b); \ - int32x2_t a_ = (a); \ - int32x2_t result; \ - __asm__ ("mul %0.2s, %1.2s, %2.s[%3]" \ - : "=w"(result) \ - : "w"(a_), "w"(b_), "i"(c) \ - : /* No clobbers */); \ - result; \ - }) - -#define vmul_laneq_u16(a, b, c) \ - __extension__ \ - ({ \ - uint16x8_t b_ = (b); \ - uint16x4_t a_ = (a); \ - uint16x4_t result; \ - __asm__ ("mul %0.4h, %1.4h, %2.h[%3]" \ - : "=w"(result) \ - : "w"(a_), "w"(b_), "i"(c) \ - : /* No clobbers */); \ - result; \ - }) - -#define vmul_laneq_u32(a, b, c) \ - __extension__ \ - ({ \ - uint32x4_t b_ = (b); \ - uint32x2_t a_ = (a); \ - uint32x2_t result; \ - __asm__ ("mul %0.2s, %1.2s, %2.s[%3]" \ - : "=w"(result) \ - : "w"(a_), "w"(b_), "i"(c) \ - : /* No clobbers */); \ - result; \ - }) - -__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) -vmul_n_f32 (float32x2_t a, float32_t b) +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vmvn_u32 (uint32x2_t a) { - float32x2_t result; - __asm__ ("fmul %0.2s,%1.2s,%2.s[0]" + uint32x2_t result; + __asm__ ("mvn %0.8b,%1.8b" : "=w"(result) - : "w"(a), "w"(b) + : "w"(a) : /* No clobbers */); return result; } -__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) -vmul_n_s16 (int16x4_t a, int16_t b) +__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) +vmvnq_p8 (poly8x16_t a) { - int16x4_t result; - __asm__ ("mul %0.4h,%1.4h,%2.h[0]" + poly8x16_t result; + __asm__ ("mvn %0.16b,%1.16b" : "=w"(result) - : "w"(a), "w"(b) + : "w"(a) : /* No clobbers */); return result; } -__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) -vmul_n_s32 (int32x2_t a, int32_t b) +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vmvnq_s8 (int8x16_t a) { - int32x2_t result; - __asm__ ("mul %0.2s,%1.2s,%2.s[0]" + int8x16_t result; + __asm__ ("mvn %0.16b,%1.16b" : "=w"(result) - : "w"(a), "w"(b) + : "w"(a) : /* No clobbers */); return result; } -__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) -vmul_n_u16 (uint16x4_t a, uint16_t b) -{ - uint16x4_t result; - __asm__ ("mul %0.4h,%1.4h,%2.h[0]" +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vmvnq_s16 (int16x8_t a) +{ + int16x8_t result; + __asm__ ("mvn %0.16b,%1.16b" : "=w"(result) - : "w"(a), "w"(b) + : "w"(a) : /* No clobbers */); return result; } -__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) -vmul_n_u32 (uint32x2_t a, uint32_t b) +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vmvnq_s32 (int32x4_t a) { - uint32x2_t result; - __asm__ ("mul %0.2s,%1.2s,%2.s[0]" + int32x4_t result; + __asm__ ("mvn %0.16b,%1.16b" : "=w"(result) - : "w"(a), "w"(b) + : "w"(a) : /* No clobbers */); return result; } -#define vmuld_lane_f64(a, b, c) \ - __extension__ \ - ({ \ - float64x2_t b_ = (b); \ - float64_t a_ = (a); \ - float64_t result; \ - __asm__ ("fmul %d0,%d1,%2.d[%3]" \ - : "=w"(result) \ - : "w"(a_), "w"(b_), "i"(c) \ - : /* No clobbers */); \ - result; \ - }) - -#define vmull_high_lane_s16(a, b, c) \ - __extension__ \ - ({ \ - int16x8_t b_ = (b); \ - int16x8_t a_ = (a); \ - int32x4_t result; \ - __asm__ ("smull2 %0.4s, %1.8h, %2.h[%3]" \ - : "=w"(result) \ - : "w"(a_), "w"(b_), "i"(c) \ - : /* No clobbers */); \ - result; \ - }) - -#define vmull_high_lane_s32(a, b, c) \ - __extension__ \ - ({ \ - int32x4_t b_ = (b); \ - int32x4_t a_ = (a); \ - int64x2_t result; \ - __asm__ ("smull2 %0.2d, %1.4s, %2.s[%3]" \ - : "=w"(result) \ - : "w"(a_), "w"(b_), "i"(c) \ - : /* No clobbers */); \ - result; \ - }) - -#define vmull_high_lane_u16(a, b, c) \ - __extension__ \ - ({ \ - uint16x8_t b_ = (b); \ - uint16x8_t a_ = (a); \ - uint32x4_t result; \ - __asm__ ("umull2 %0.4s, %1.8h, %2.h[%3]" \ - : "=w"(result) \ - : "w"(a_), "w"(b_), "i"(c) \ - : /* No clobbers */); \ - result; \ - }) - -#define vmull_high_lane_u32(a, b, c) \ - __extension__ \ - ({ \ - uint32x4_t b_ = (b); \ - uint32x4_t a_ = (a); \ - uint64x2_t result; \ - __asm__ ("umull2 %0.2d, %1.4s, %2.s[%3]" \ - : "=w"(result) \ - : "w"(a_), "w"(b_), "i"(c) \ - : /* No clobbers */); \ - result; \ - }) - -#define vmull_high_laneq_s16(a, b, c) \ - __extension__ \ - ({ \ - int16x8_t b_ = (b); \ - int16x8_t a_ = (a); \ - int32x4_t result; \ - __asm__ ("smull2 %0.4s, %1.8h, %2.h[%3]" \ - : "=w"(result) \ - : "w"(a_), "w"(b_), "i"(c) \ - : /* No clobbers */); \ - result; \ - }) - -#define vmull_high_laneq_s32(a, b, c) \ - __extension__ \ - ({ \ - int32x4_t b_ = (b); \ - int32x4_t a_ = (a); \ - int64x2_t result; \ - __asm__ ("smull2 %0.2d, %1.4s, %2.s[%3]" \ - : "=w"(result) \ - : "w"(a_), "w"(b_), "i"(c) \ - : /* No clobbers */); \ - result; \ - }) - -#define vmull_high_laneq_u16(a, b, c) \ - __extension__ \ - ({ \ - uint16x8_t b_ = (b); \ - uint16x8_t a_ = (a); \ - uint32x4_t result; \ - __asm__ ("umull2 %0.4s, %1.8h, %2.h[%3]" \ - : "=w"(result) \ - : "w"(a_), "w"(b_), "i"(c) \ - : /* No clobbers */); \ - result; \ - }) - -#define vmull_high_laneq_u32(a, b, c) \ - __extension__ \ - ({ \ - uint32x4_t b_ = (b); \ - uint32x4_t a_ = (a); \ - uint64x2_t result; \ - __asm__ ("umull2 %0.2d, %1.4s, %2.s[%3]" \ - : "=w"(result) \ - : "w"(a_), "w"(b_), "i"(c) \ - : /* No clobbers */); \ - result; \ - }) - -__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) -vmull_high_n_s16 (int16x8_t a, int16_t b) +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vmvnq_u8 (uint8x16_t a) { - int32x4_t result; - __asm__ ("smull2 %0.4s,%1.8h,%2.h[0]" + uint8x16_t result; + __asm__ ("mvn %0.16b,%1.16b" : "=w"(result) - : "w"(a), "w"(b) + : "w"(a) : /* No clobbers */); return result; } -__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) -vmull_high_n_s32 (int32x4_t a, int32_t b) +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vmvnq_u16 (uint16x8_t a) { - int64x2_t result; - __asm__ ("smull2 %0.2d,%1.4s,%2.s[0]" + uint16x8_t result; + __asm__ ("mvn %0.16b,%1.16b" : "=w"(result) - : "w"(a), "w"(b) + : "w"(a) : /* No clobbers */); return result; } __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -vmull_high_n_u16 (uint16x8_t a, uint16_t b) +vmvnq_u32 (uint32x4_t a) { uint32x4_t result; - __asm__ ("umull2 %0.4s,%1.8h,%2.h[0]" + __asm__ ("mvn %0.16b,%1.16b" : "=w"(result) - : "w"(a), "w"(b) + : "w"(a) : /* No clobbers */); return result; } -__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) -vmull_high_n_u32 (uint32x4_t a, uint32_t b) +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vneg_f32 (float32x2_t a) { - uint64x2_t result; - __asm__ ("umull2 %0.2d,%1.4s,%2.s[0]" + float32x2_t result; + __asm__ ("fneg %0.2s,%1.2s" : "=w"(result) - : "w"(a), "w"(b) + : "w"(a) : /* No clobbers */); return result; } -__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) -vmull_high_p8 (poly8x16_t a, poly8x16_t b) +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vneg_s8 (int8x8_t a) { - poly16x8_t result; - __asm__ ("pmull2 %0.8h,%1.16b,%2.16b" + int8x8_t result; + __asm__ ("neg %0.8b,%1.8b" : "=w"(result) - : "w"(a), "w"(b) + : "w"(a) : /* No clobbers */); return result; } -__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) -vmull_high_s8 (int8x16_t a, int8x16_t b) +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vneg_s16 (int16x4_t a) { - int16x8_t result; - __asm__ ("smull2 %0.8h,%1.16b,%2.16b" + int16x4_t result; + __asm__ ("neg %0.4h,%1.4h" : "=w"(result) - : "w"(a), "w"(b) + : "w"(a) : /* No clobbers */); return result; } -__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) -vmull_high_s16 (int16x8_t a, int16x8_t b) +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vneg_s32 (int32x2_t a) { - int32x4_t result; - __asm__ ("smull2 %0.4s,%1.8h,%2.8h" + int32x2_t result; + __asm__ ("neg %0.2s,%1.2s" : "=w"(result) - : "w"(a), "w"(b) + : "w"(a) : /* No clobbers */); return result; } -__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) -vmull_high_s32 (int32x4_t a, int32x4_t b) +__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +vnegq_f32 (float32x4_t a) { - int64x2_t result; - __asm__ ("smull2 %0.2d,%1.4s,%2.4s" + float32x4_t result; + __asm__ ("fneg %0.4s,%1.4s" : "=w"(result) - : "w"(a), "w"(b) + : "w"(a) : /* No clobbers */); return result; } -__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) -vmull_high_u8 (uint8x16_t a, uint8x16_t b) +__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) +vnegq_f64 (float64x2_t a) { - uint16x8_t result; - __asm__ ("umull2 %0.8h,%1.16b,%2.16b" + float64x2_t result; + __asm__ ("fneg %0.2d,%1.2d" : "=w"(result) - : "w"(a), "w"(b) + : "w"(a) : /* No clobbers */); return result; } -__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -vmull_high_u16 (uint16x8_t a, uint16x8_t b) +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vnegq_s8 (int8x16_t a) { - uint32x4_t result; - __asm__ ("umull2 %0.4s,%1.8h,%2.8h" + int8x16_t result; + __asm__ ("neg %0.16b,%1.16b" : "=w"(result) - : "w"(a), "w"(b) + : "w"(a) : /* No clobbers */); return result; } -__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) -vmull_high_u32 (uint32x4_t a, uint32x4_t b) +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vnegq_s16 (int16x8_t a) { - uint64x2_t result; - __asm__ ("umull2 %0.2d,%1.4s,%2.4s" + int16x8_t result; + __asm__ ("neg %0.8h,%1.8h" : "=w"(result) - : "w"(a), "w"(b) + : "w"(a) : /* No clobbers */); return result; } -#define vmull_lane_s16(a, b, c) \ - __extension__ \ - ({ \ - int16x4_t b_ = (b); \ - int16x4_t a_ = (a); \ - int32x4_t result; \ - __asm__ ("smull %0.4s,%1.4h,%2.h[%3]" \ - : "=w"(result) \ - : "w"(a_), "w"(b_), "i"(c) \ - : /* No clobbers */); \ - result; \ - }) - -#define vmull_lane_s32(a, b, c) \ - __extension__ \ - ({ \ - int32x2_t b_ = (b); \ - int32x2_t a_ = (a); \ - int64x2_t result; \ - __asm__ ("smull %0.2d,%1.2s,%2.s[%3]" \ - : "=w"(result) \ - : "w"(a_), "w"(b_), "i"(c) \ - : /* No clobbers */); \ - result; \ - }) - -#define vmull_lane_u16(a, b, c) \ - __extension__ \ - ({ \ - uint16x4_t b_ = (b); \ - uint16x4_t a_ = (a); \ - uint32x4_t result; \ - __asm__ ("umull %0.4s,%1.4h,%2.h[%3]" \ - : "=w"(result) \ - : "w"(a_), "w"(b_), "i"(c) \ - : /* No clobbers */); \ - result; \ - }) - -#define vmull_lane_u32(a, b, c) \ - __extension__ \ - ({ \ - uint32x2_t b_ = (b); \ - uint32x2_t a_ = (a); \ - uint64x2_t result; \ - __asm__ ("umull %0.2d, %1.2s, %2.s[%3]" \ - : "=w"(result) \ - : "w"(a_), "w"(b_), "i"(c) \ - : /* No clobbers */); \ - result; \ - }) - -#define vmull_laneq_s16(a, b, c) \ - __extension__ \ - ({ \ - int16x8_t b_ = (b); \ - int16x4_t a_ = (a); \ - int32x4_t result; \ - __asm__ ("smull %0.4s, %1.4h, %2.h[%3]" \ - : "=w"(result) \ - : "w"(a_), "w"(b_), "i"(c) \ - : /* No clobbers */); \ - result; \ - }) - -#define vmull_laneq_s32(a, b, c) \ - __extension__ \ - ({ \ - int32x4_t b_ = (b); \ - int32x2_t a_ = (a); \ - int64x2_t result; \ - __asm__ ("smull %0.2d, %1.2s, %2.s[%3]" \ - : "=w"(result) \ - : "w"(a_), "w"(b_), "i"(c) \ - : /* No clobbers */); \ - result; \ - }) - -#define vmull_laneq_u16(a, b, c) \ - __extension__ \ - ({ \ - uint16x8_t b_ = (b); \ - uint16x4_t a_ = (a); \ - uint32x4_t result; \ - __asm__ ("umull %0.4s, %1.4h, %2.h[%3]" \ - : "=w"(result) \ - : "w"(a_), "w"(b_), "i"(c) \ - : /* No clobbers */); \ - result; \ - }) - -#define vmull_laneq_u32(a, b, c) \ - __extension__ \ - ({ \ - uint32x4_t b_ = (b); \ - uint32x2_t a_ = (a); \ - uint64x2_t result; \ - __asm__ ("umull %0.2d, %1.2s, %2.s[%3]" \ - : "=w"(result) \ - : "w"(a_), "w"(b_), "i"(c) \ - : /* No clobbers */); \ - result; \ - }) - __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) -vmull_n_s16 (int16x4_t a, int16_t b) +vnegq_s32 (int32x4_t a) { int32x4_t result; - __asm__ ("smull %0.4s,%1.4h,%2.h[0]" + __asm__ ("neg %0.4s,%1.4s" : "=w"(result) - : "w"(a), "w"(b) + : "w"(a) : /* No clobbers */); return result; } __extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) -vmull_n_s32 (int32x2_t a, int32_t b) +vnegq_s64 (int64x2_t a) { int64x2_t result; - __asm__ ("smull %0.2d,%1.2s,%2.s[0]" + __asm__ ("neg %0.2d,%1.2d" : "=w"(result) - : "w"(a), "w"(b) + : "w"(a) : /* No clobbers */); return result; } -__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -vmull_n_u16 (uint16x4_t a, uint16_t b) +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vpadal_s8 (int16x4_t a, int8x8_t b) { - uint32x4_t result; - __asm__ ("umull %0.4s,%1.4h,%2.h[0]" + int16x4_t result; + __asm__ ("sadalp %0.4h,%2.8b" : "=w"(result) - : "w"(a), "w"(b) + : "0"(a), "w"(b) : /* No clobbers */); return result; } -__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) -vmull_n_u32 (uint32x2_t a, uint32_t b) +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vpadal_s16 (int32x2_t a, int16x4_t b) { - uint64x2_t result; - __asm__ ("umull %0.2d,%1.2s,%2.s[0]" + int32x2_t result; + __asm__ ("sadalp %0.2s,%2.4h" : "=w"(result) - : "w"(a), "w"(b) + : "0"(a), "w"(b) : /* No clobbers */); return result; } -__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) -vmull_p8 (poly8x8_t a, poly8x8_t b) +__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) +vpadal_s32 (int64x1_t a, int32x2_t b) { - poly16x8_t result; - __asm__ ("pmull %0.8h, %1.8b, %2.8b" + int64x1_t result; + __asm__ ("sadalp %0.1d,%2.2s" : "=w"(result) - : "w"(a), "w"(b) + : "0"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vpadal_u8 (uint16x4_t a, uint8x8_t b) +{ + uint16x4_t result; + __asm__ ("uadalp %0.4h,%2.8b" + : "=w"(result) + : "0"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vpadal_u16 (uint32x2_t a, uint16x4_t b) +{ + uint32x2_t result; + __asm__ ("uadalp %0.2s,%2.4h" + : "=w"(result) + : "0"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vpadal_u32 (uint64x1_t a, uint32x2_t b) +{ + uint64x1_t result; + __asm__ ("uadalp %0.1d,%2.2s" + : "=w"(result) + : "0"(a), "w"(b) : /* No clobbers */); return result; } __extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) -vmull_s8 (int8x8_t a, int8x8_t b) +vpadalq_s8 (int16x8_t a, int8x16_t b) { int16x8_t result; - __asm__ ("smull %0.8h, %1.8b, %2.8b" + __asm__ ("sadalp %0.8h,%2.16b" : "=w"(result) - : "w"(a), "w"(b) + : "0"(a), "w"(b) : /* No clobbers */); return result; } __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) -vmull_s16 (int16x4_t a, int16x4_t b) +vpadalq_s16 (int32x4_t a, int16x8_t b) { int32x4_t result; - __asm__ ("smull %0.4s, %1.4h, %2.4h" + __asm__ ("sadalp %0.4s,%2.8h" : "=w"(result) - : "w"(a), "w"(b) + : "0"(a), "w"(b) : /* No clobbers */); return result; } __extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) -vmull_s32 (int32x2_t a, int32x2_t b) +vpadalq_s32 (int64x2_t a, int32x4_t b) { int64x2_t result; - __asm__ ("smull %0.2d, %1.2s, %2.2s" + __asm__ ("sadalp %0.2d,%2.4s" : "=w"(result) - : "w"(a), "w"(b) + : "0"(a), "w"(b) : /* No clobbers */); return result; } __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) -vmull_u8 (uint8x8_t a, uint8x8_t b) +vpadalq_u8 (uint16x8_t a, uint8x16_t b) { uint16x8_t result; - __asm__ ("umull %0.8h, %1.8b, %2.8b" + __asm__ ("uadalp %0.8h,%2.16b" : "=w"(result) - : "w"(a), "w"(b) + : "0"(a), "w"(b) : /* No clobbers */); return result; } __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -vmull_u16 (uint16x4_t a, uint16x4_t b) +vpadalq_u16 (uint32x4_t a, uint16x8_t b) { uint32x4_t result; - __asm__ ("umull %0.4s, %1.4h, %2.4h" + __asm__ ("uadalp %0.4s,%2.8h" : "=w"(result) - : "w"(a), "w"(b) + : "0"(a), "w"(b) : /* No clobbers */); return result; } __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) -vmull_u32 (uint32x2_t a, uint32x2_t b) +vpadalq_u32 (uint64x2_t a, uint32x4_t b) { uint64x2_t result; - __asm__ ("umull %0.2d, %1.2s, %2.2s" + __asm__ ("uadalp %0.2d,%2.4s" + : "=w"(result) + : "0"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vpadd_f32 (float32x2_t a, float32x2_t b) +{ + float32x2_t result; + __asm__ ("faddp %0.2s,%1.2s,%2.2s" : "=w"(result) : "w"(a), "w"(b) : /* No clobbers */); return result; } -#define vmulq_lane_f32(a, b, c) \ - __extension__ \ - ({ \ - float32x2_t b_ = (b); \ - float32x4_t a_ = (a); \ - float32x4_t result; \ - __asm__ ("fmul %0.4s, %1.4s, %2.s[%3]" \ - : "=w"(result) \ - : "w"(a_), "w"(b_), "i"(c) \ - : /* No clobbers */); \ - result; \ - }) +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vpadd_s8 (int8x8_t __a, int8x8_t __b) +{ + return __builtin_aarch64_addpv8qi (__a, __b); +} -#define vmulq_lane_f64(a, b, c) \ - __extension__ \ - ({ \ - float64x1_t b_ = (b); \ - float64x2_t a_ = (a); \ - float64x2_t result; \ - __asm__ ("fmul %0.2d,%1.2d,%2.d[%3]" \ - : "=w"(result) \ - : "w"(a_), "w"(b_), "i"(c) \ - : /* No clobbers */); \ - result; \ - }) +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vpadd_s16 (int16x4_t __a, int16x4_t __b) +{ + return __builtin_aarch64_addpv4hi (__a, __b); +} -#define vmulq_lane_s16(a, b, c) \ - __extension__ \ - ({ \ - int16x4_t b_ = (b); \ - int16x8_t a_ = (a); \ - int16x8_t result; \ - __asm__ ("mul %0.8h,%1.8h,%2.h[%3]" \ - : "=w"(result) \ - : "w"(a_), "w"(b_), "i"(c) \ - : /* No clobbers */); \ - result; \ - }) - -#define vmulq_lane_s32(a, b, c) \ - __extension__ \ - ({ \ - int32x2_t b_ = (b); \ - int32x4_t a_ = (a); \ - int32x4_t result; \ - __asm__ ("mul %0.4s,%1.4s,%2.s[%3]" \ - : "=w"(result) \ - : "w"(a_), "w"(b_), "i"(c) \ - : /* No clobbers */); \ - result; \ - }) - -#define vmulq_lane_u16(a, b, c) \ - __extension__ \ - ({ \ - uint16x4_t b_ = (b); \ - uint16x8_t a_ = (a); \ - uint16x8_t result; \ - __asm__ ("mul %0.8h,%1.8h,%2.h[%3]" \ - : "=w"(result) \ - : "w"(a_), "w"(b_), "i"(c) \ - : /* No clobbers */); \ - result; \ - }) - -#define vmulq_lane_u32(a, b, c) \ - __extension__ \ - ({ \ - uint32x2_t b_ = (b); \ - uint32x4_t a_ = (a); \ - uint32x4_t result; \ - __asm__ ("mul %0.4s, %1.4s, %2.s[%3]" \ - : "=w"(result) \ - : "w"(a_), "w"(b_), "i"(c) \ - : /* No clobbers */); \ - result; \ - }) - -#define vmulq_laneq_f32(a, b, c) \ - __extension__ \ - ({ \ - float32x4_t b_ = (b); \ - float32x4_t a_ = (a); \ - float32x4_t result; \ - __asm__ ("fmul %0.4s, %1.4s, %2.s[%3]" \ - : "=w"(result) \ - : "w"(a_), "w"(b_), "i"(c) \ - : /* No clobbers */); \ - result; \ - }) - -#define vmulq_laneq_f64(a, b, c) \ - __extension__ \ - ({ \ - float64x2_t b_ = (b); \ - float64x2_t a_ = (a); \ - float64x2_t result; \ - __asm__ ("fmul %0.2d,%1.2d,%2.d[%3]" \ - : "=w"(result) \ - : "w"(a_), "w"(b_), "i"(c) \ - : /* No clobbers */); \ - result; \ - }) - -#define vmulq_laneq_s16(a, b, c) \ - __extension__ \ - ({ \ - int16x8_t b_ = (b); \ - int16x8_t a_ = (a); \ - int16x8_t result; \ - __asm__ ("mul %0.8h, %1.8h, %2.h[%3]" \ - : "=w"(result) \ - : "w"(a_), "w"(b_), "i"(c) \ - : /* No clobbers */); \ - result; \ - }) +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vpadd_s32 (int32x2_t __a, int32x2_t __b) +{ + return __builtin_aarch64_addpv2si (__a, __b); +} -#define vmulq_laneq_s32(a, b, c) \ - __extension__ \ - ({ \ - int32x4_t b_ = (b); \ - int32x4_t a_ = (a); \ - int32x4_t result; \ - __asm__ ("mul %0.4s, %1.4s, %2.s[%3]" \ - : "=w"(result) \ - : "w"(a_), "w"(b_), "i"(c) \ - : /* No clobbers */); \ - result; \ - }) +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vpadd_u8 (uint8x8_t __a, uint8x8_t __b) +{ + return (uint8x8_t) __builtin_aarch64_addpv8qi ((int8x8_t) __a, + (int8x8_t) __b); +} -#define vmulq_laneq_u16(a, b, c) \ - __extension__ \ - ({ \ - uint16x8_t b_ = (b); \ - uint16x8_t a_ = (a); \ - uint16x8_t result; \ - __asm__ ("mul %0.8h, %1.8h, %2.h[%3]" \ - : "=w"(result) \ - : "w"(a_), "w"(b_), "i"(c) \ - : /* No clobbers */); \ - result; \ - }) +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vpadd_u16 (uint16x4_t __a, uint16x4_t __b) +{ + return (uint16x4_t) __builtin_aarch64_addpv4hi ((int16x4_t) __a, + (int16x4_t) __b); +} -#define vmulq_laneq_u32(a, b, c) \ - __extension__ \ - ({ \ - uint32x4_t b_ = (b); \ - uint32x4_t a_ = (a); \ - uint32x4_t result; \ - __asm__ ("mul %0.4s, %1.4s, %2.s[%3]" \ - : "=w"(result) \ - : "w"(a_), "w"(b_), "i"(c) \ - : /* No clobbers */); \ - result; \ - }) +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vpadd_u32 (uint32x2_t __a, uint32x2_t __b) +{ + return (uint32x2_t) __builtin_aarch64_addpv2si ((int32x2_t) __a, + (int32x2_t) __b); +} -__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) -vmulq_n_f32 (float32x4_t a, float32_t b) +__extension__ static __inline float64_t __attribute__ ((__always_inline__)) +vpaddd_f64 (float64x2_t a) { - float32x4_t result; - __asm__ ("fmul %0.4s,%1.4s,%2.s[0]" + float64_t result; + __asm__ ("faddp %d0,%1.2d" : "=w"(result) - : "w"(a), "w"(b) + : "w"(a) : /* No clobbers */); return result; } -__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) -vmulq_n_f64 (float64x2_t a, float64_t b) +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vpaddl_s8 (int8x8_t a) { - float64x2_t result; - __asm__ ("fmul %0.2d,%1.2d,%2.d[0]" + int16x4_t result; + __asm__ ("saddlp %0.4h,%1.8b" : "=w"(result) - : "w"(a), "w"(b) + : "w"(a) : /* No clobbers */); return result; } -__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) -vmulq_n_s16 (int16x8_t a, int16_t b) +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vpaddl_s16 (int16x4_t a) { - int16x8_t result; - __asm__ ("mul %0.8h,%1.8h,%2.h[0]" + int32x2_t result; + __asm__ ("saddlp %0.2s,%1.4h" : "=w"(result) - : "w"(a), "w"(b) + : "w"(a) : /* No clobbers */); return result; } -__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) -vmulq_n_s32 (int32x4_t a, int32_t b) +__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) +vpaddl_s32 (int32x2_t a) { - int32x4_t result; - __asm__ ("mul %0.4s,%1.4s,%2.s[0]" + int64x1_t result; + __asm__ ("saddlp %0.1d,%1.2s" : "=w"(result) - : "w"(a), "w"(b) + : "w"(a) : /* No clobbers */); return result; } -__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) -vmulq_n_u16 (uint16x8_t a, uint16_t b) +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vpaddl_u8 (uint8x8_t a) { - uint16x8_t result; - __asm__ ("mul %0.8h,%1.8h,%2.h[0]" + uint16x4_t result; + __asm__ ("uaddlp %0.4h,%1.8b" : "=w"(result) - : "w"(a), "w"(b) + : "w"(a) : /* No clobbers */); return result; } -__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -vmulq_n_u32 (uint32x4_t a, uint32_t b) +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vpaddl_u16 (uint16x4_t a) { - uint32x4_t result; - __asm__ ("mul %0.4s,%1.4s,%2.s[0]" + uint32x2_t result; + __asm__ ("uaddlp %0.2s,%1.4h" : "=w"(result) - : "w"(a), "w"(b) + : "w"(a) : /* No clobbers */); return result; } -#define vmuls_lane_f32(a, b, c) \ - __extension__ \ - ({ \ - float32x4_t b_ = (b); \ - float32_t a_ = (a); \ - float32_t result; \ - __asm__ ("fmul %s0,%s1,%2.s[%3]" \ - : "=w"(result) \ - : "w"(a_), "w"(b_), "i"(c) \ - : /* No clobbers */); \ - result; \ - }) - -__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) -vmulx_f32 (float32x2_t a, float32x2_t b) +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vpaddl_u32 (uint32x2_t a) { - float32x2_t result; - __asm__ ("fmulx %0.2s,%1.2s,%2.2s" + uint64x1_t result; + __asm__ ("uaddlp %0.1d,%1.2s" : "=w"(result) - : "w"(a), "w"(b) + : "w"(a) : /* No clobbers */); return result; } -#define vmulx_lane_f32(a, b, c) \ - __extension__ \ - ({ \ - float32x4_t b_ = (b); \ - float32x2_t a_ = (a); \ - float32x2_t result; \ - __asm__ ("fmulx %0.2s,%1.2s,%2.s[%3]" \ - : "=w"(result) \ - : "w"(a_), "w"(b_), "i"(c) \ - : /* No clobbers */); \ - result; \ - }) - -__extension__ static __inline float64_t __attribute__ ((__always_inline__)) -vmulxd_f64 (float64_t a, float64_t b) +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vpaddlq_s8 (int8x16_t a) { - float64_t result; - __asm__ ("fmulx %d0, %d1, %d2" + int16x8_t result; + __asm__ ("saddlp %0.8h,%1.16b" : "=w"(result) - : "w"(a), "w"(b) + : "w"(a) : /* No clobbers */); return result; } -__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) -vmulxq_f32 (float32x4_t a, float32x4_t b) +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vpaddlq_s16 (int16x8_t a) { - float32x4_t result; - __asm__ ("fmulx %0.4s,%1.4s,%2.4s" + int32x4_t result; + __asm__ ("saddlp %0.4s,%1.8h" : "=w"(result) - : "w"(a), "w"(b) + : "w"(a) : /* No clobbers */); return result; } -__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) -vmulxq_f64 (float64x2_t a, float64x2_t b) -{ - float64x2_t result; - __asm__ ("fmulx %0.2d,%1.2d,%2.2d" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -#define vmulxq_lane_f32(a, b, c) \ - __extension__ \ - ({ \ - float32x4_t b_ = (b); \ - float32x4_t a_ = (a); \ - float32x4_t result; \ - __asm__ ("fmulx %0.4s,%1.4s,%2.s[%3]" \ - : "=w"(result) \ - : "w"(a_), "w"(b_), "i"(c) \ - : /* No clobbers */); \ - result; \ - }) - -#define vmulxq_lane_f64(a, b, c) \ - __extension__ \ - ({ \ - float64x2_t b_ = (b); \ - float64x2_t a_ = (a); \ - float64x2_t result; \ - __asm__ ("fmulx %0.2d,%1.2d,%2.d[%3]" \ - : "=w"(result) \ - : "w"(a_), "w"(b_), "i"(c) \ - : /* No clobbers */); \ - result; \ - }) - -__extension__ static __inline float32_t __attribute__ ((__always_inline__)) -vmulxs_f32 (float32_t a, float32_t b) +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vpaddlq_s32 (int32x4_t a) { - float32_t result; - __asm__ ("fmulx %s0, %s1, %s2" + int64x2_t result; + __asm__ ("saddlp %0.2d,%1.4s" : "=w"(result) - : "w"(a), "w"(b) + : "w"(a) : /* No clobbers */); return result; } -__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) -vmvn_p8 (poly8x8_t a) +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vpaddlq_u8 (uint8x16_t a) { - poly8x8_t result; - __asm__ ("mvn %0.8b,%1.8b" + uint16x8_t result; + __asm__ ("uaddlp %0.8h,%1.16b" : "=w"(result) : "w"(a) : /* No clobbers */); return result; } -__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) -vmvn_s8 (int8x8_t a) +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vpaddlq_u16 (uint16x8_t a) { - int8x8_t result; - __asm__ ("mvn %0.8b,%1.8b" + uint32x4_t result; + __asm__ ("uaddlp %0.4s,%1.8h" : "=w"(result) : "w"(a) : /* No clobbers */); return result; } -__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) -vmvn_s16 (int16x4_t a) +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vpaddlq_u32 (uint32x4_t a) { - int16x4_t result; - __asm__ ("mvn %0.8b,%1.8b" + uint64x2_t result; + __asm__ ("uaddlp %0.2d,%1.4s" : "=w"(result) : "w"(a) : /* No clobbers */); return result; } -__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) -vmvn_s32 (int32x2_t a) +__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +vpaddq_f32 (float32x4_t a, float32x4_t b) { - int32x2_t result; - __asm__ ("mvn %0.8b,%1.8b" + float32x4_t result; + __asm__ ("faddp %0.4s,%1.4s,%2.4s" : "=w"(result) - : "w"(a) + : "w"(a), "w"(b) : /* No clobbers */); return result; } -__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) -vmvn_u8 (uint8x8_t a) +__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) +vpaddq_f64 (float64x2_t a, float64x2_t b) { - uint8x8_t result; - __asm__ ("mvn %0.8b,%1.8b" + float64x2_t result; + __asm__ ("faddp %0.2d,%1.2d,%2.2d" : "=w"(result) - : "w"(a) + : "w"(a), "w"(b) : /* No clobbers */); return result; } -__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) -vmvn_u16 (uint16x4_t a) +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vpaddq_s8 (int8x16_t a, int8x16_t b) { - uint16x4_t result; - __asm__ ("mvn %0.8b,%1.8b" + int8x16_t result; + __asm__ ("addp %0.16b,%1.16b,%2.16b" : "=w"(result) - : "w"(a) + : "w"(a), "w"(b) : /* No clobbers */); return result; } -__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) -vmvn_u32 (uint32x2_t a) +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vpaddq_s16 (int16x8_t a, int16x8_t b) { - uint32x2_t result; - __asm__ ("mvn %0.8b,%1.8b" + int16x8_t result; + __asm__ ("addp %0.8h,%1.8h,%2.8h" : "=w"(result) - : "w"(a) + : "w"(a), "w"(b) : /* No clobbers */); return result; } -__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) -vmvnq_p8 (poly8x16_t a) +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vpaddq_s32 (int32x4_t a, int32x4_t b) { - poly8x16_t result; - __asm__ ("mvn %0.16b,%1.16b" + int32x4_t result; + __asm__ ("addp %0.4s,%1.4s,%2.4s" : "=w"(result) - : "w"(a) + : "w"(a), "w"(b) : /* No clobbers */); return result; } -__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) -vmvnq_s8 (int8x16_t a) +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vpaddq_s64 (int64x2_t a, int64x2_t b) { - int8x16_t result; - __asm__ ("mvn %0.16b,%1.16b" + int64x2_t result; + __asm__ ("addp %0.2d,%1.2d,%2.2d" : "=w"(result) - : "w"(a) + : "w"(a), "w"(b) : /* No clobbers */); return result; } -__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) -vmvnq_s16 (int16x8_t a) +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vpaddq_u8 (uint8x16_t a, uint8x16_t b) { - int16x8_t result; - __asm__ ("mvn %0.16b,%1.16b" + uint8x16_t result; + __asm__ ("addp %0.16b,%1.16b,%2.16b" : "=w"(result) - : "w"(a) + : "w"(a), "w"(b) : /* No clobbers */); return result; } -__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) -vmvnq_s32 (int32x4_t a) +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vpaddq_u16 (uint16x8_t a, uint16x8_t b) { - int32x4_t result; - __asm__ ("mvn %0.16b,%1.16b" + uint16x8_t result; + __asm__ ("addp %0.8h,%1.8h,%2.8h" : "=w"(result) - : "w"(a) + : "w"(a), "w"(b) : /* No clobbers */); return result; } -__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) -vmvnq_u8 (uint8x16_t a) +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vpaddq_u32 (uint32x4_t a, uint32x4_t b) { - uint8x16_t result; - __asm__ ("mvn %0.16b,%1.16b" + uint32x4_t result; + __asm__ ("addp %0.4s,%1.4s,%2.4s" : "=w"(result) - : "w"(a) + : "w"(a), "w"(b) : /* No clobbers */); return result; } -__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) -vmvnq_u16 (uint16x8_t a) +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vpaddq_u64 (uint64x2_t a, uint64x2_t b) { - uint16x8_t result; - __asm__ ("mvn %0.16b,%1.16b" + uint64x2_t result; + __asm__ ("addp %0.2d,%1.2d,%2.2d" : "=w"(result) - : "w"(a) + : "w"(a), "w"(b) : /* No clobbers */); return result; } -__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -vmvnq_u32 (uint32x4_t a) +__extension__ static __inline float32_t __attribute__ ((__always_inline__)) +vpadds_f32 (float32x2_t a) { - uint32x4_t result; - __asm__ ("mvn %0.16b,%1.16b" + float32_t result; + __asm__ ("faddp %s0,%1.2s" : "=w"(result) : "w"(a) : /* No clobbers */); @@ -11087,252 +11189,252 @@ vmvnq_u32 (uint32x4_t a) } __extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) -vneg_f32 (float32x2_t a) +vpmax_f32 (float32x2_t a, float32x2_t b) { float32x2_t result; - __asm__ ("fneg %0.2s,%1.2s" + __asm__ ("fmaxp %0.2s, %1.2s, %2.2s" : "=w"(result) - : "w"(a) + : "w"(a), "w"(b) : /* No clobbers */); return result; } __extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) -vneg_s8 (int8x8_t a) +vpmax_s8 (int8x8_t a, int8x8_t b) { int8x8_t result; - __asm__ ("neg %0.8b,%1.8b" + __asm__ ("smaxp %0.8b, %1.8b, %2.8b" : "=w"(result) - : "w"(a) + : "w"(a), "w"(b) : /* No clobbers */); return result; } __extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) -vneg_s16 (int16x4_t a) +vpmax_s16 (int16x4_t a, int16x4_t b) { int16x4_t result; - __asm__ ("neg %0.4h,%1.4h" + __asm__ ("smaxp %0.4h, %1.4h, %2.4h" : "=w"(result) - : "w"(a) + : "w"(a), "w"(b) : /* No clobbers */); return result; } __extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) -vneg_s32 (int32x2_t a) +vpmax_s32 (int32x2_t a, int32x2_t b) { int32x2_t result; - __asm__ ("neg %0.2s,%1.2s" + __asm__ ("smaxp %0.2s, %1.2s, %2.2s" : "=w"(result) - : "w"(a) + : "w"(a), "w"(b) : /* No clobbers */); return result; } -__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) -vnegq_f32 (float32x4_t a) +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vpmax_u8 (uint8x8_t a, uint8x8_t b) { - float32x4_t result; - __asm__ ("fneg %0.4s,%1.4s" + uint8x8_t result; + __asm__ ("umaxp %0.8b, %1.8b, %2.8b" : "=w"(result) - : "w"(a) + : "w"(a), "w"(b) : /* No clobbers */); return result; } -__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) -vnegq_f64 (float64x2_t a) +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vpmax_u16 (uint16x4_t a, uint16x4_t b) { - float64x2_t result; - __asm__ ("fneg %0.2d,%1.2d" + uint16x4_t result; + __asm__ ("umaxp %0.4h, %1.4h, %2.4h" : "=w"(result) - : "w"(a) + : "w"(a), "w"(b) : /* No clobbers */); return result; } -__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) -vnegq_s8 (int8x16_t a) +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vpmax_u32 (uint32x2_t a, uint32x2_t b) { - int8x16_t result; - __asm__ ("neg %0.16b,%1.16b" + uint32x2_t result; + __asm__ ("umaxp %0.2s, %1.2s, %2.2s" : "=w"(result) - : "w"(a) + : "w"(a), "w"(b) : /* No clobbers */); return result; } -__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) -vnegq_s16 (int16x8_t a) +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vpmaxnm_f32 (float32x2_t a, float32x2_t b) { - int16x8_t result; - __asm__ ("neg %0.8h,%1.8h" + float32x2_t result; + __asm__ ("fmaxnmp %0.2s,%1.2s,%2.2s" : "=w"(result) - : "w"(a) + : "w"(a), "w"(b) : /* No clobbers */); return result; } -__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) -vnegq_s32 (int32x4_t a) +__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +vpmaxnmq_f32 (float32x4_t a, float32x4_t b) { - int32x4_t result; - __asm__ ("neg %0.4s,%1.4s" + float32x4_t result; + __asm__ ("fmaxnmp %0.4s,%1.4s,%2.4s" : "=w"(result) - : "w"(a) + : "w"(a), "w"(b) : /* No clobbers */); return result; } -__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) -vnegq_s64 (int64x2_t a) +__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) +vpmaxnmq_f64 (float64x2_t a, float64x2_t b) { - int64x2_t result; - __asm__ ("neg %0.2d,%1.2d" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) -vpadal_s8 (int16x4_t a, int8x8_t b) -{ - int16x4_t result; - __asm__ ("sadalp %0.4h,%2.8b" + float64x2_t result; + __asm__ ("fmaxnmp %0.2d,%1.2d,%2.2d" : "=w"(result) - : "0"(a), "w"(b) + : "w"(a), "w"(b) : /* No clobbers */); return result; } -__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) -vpadal_s16 (int32x2_t a, int16x4_t b) +__extension__ static __inline float64_t __attribute__ ((__always_inline__)) +vpmaxnmqd_f64 (float64x2_t a) { - int32x2_t result; - __asm__ ("sadalp %0.2s,%2.4h" + float64_t result; + __asm__ ("fmaxnmp %d0,%1.2d" : "=w"(result) - : "0"(a), "w"(b) + : "w"(a) : /* No clobbers */); return result; } -__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) -vpadal_s32 (int64x1_t a, int32x2_t b) +__extension__ static __inline float32_t __attribute__ ((__always_inline__)) +vpmaxnms_f32 (float32x2_t a) { - int64x1_t result; - __asm__ ("sadalp %0.1d,%2.2s" + float32_t result; + __asm__ ("fmaxnmp %s0,%1.2s" : "=w"(result) - : "0"(a), "w"(b) + : "w"(a) : /* No clobbers */); return result; } -__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) -vpadal_u8 (uint16x4_t a, uint8x8_t b) +__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +vpmaxq_f32 (float32x4_t a, float32x4_t b) { - uint16x4_t result; - __asm__ ("uadalp %0.4h,%2.8b" + float32x4_t result; + __asm__ ("fmaxp %0.4s, %1.4s, %2.4s" : "=w"(result) - : "0"(a), "w"(b) + : "w"(a), "w"(b) : /* No clobbers */); return result; } -__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) -vpadal_u16 (uint32x2_t a, uint16x4_t b) +__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) +vpmaxq_f64 (float64x2_t a, float64x2_t b) { - uint32x2_t result; - __asm__ ("uadalp %0.2s,%2.4h" + float64x2_t result; + __asm__ ("fmaxp %0.2d, %1.2d, %2.2d" : "=w"(result) - : "0"(a), "w"(b) + : "w"(a), "w"(b) : /* No clobbers */); return result; } -__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) -vpadal_u32 (uint64x1_t a, uint32x2_t b) +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vpmaxq_s8 (int8x16_t a, int8x16_t b) { - uint64x1_t result; - __asm__ ("uadalp %0.1d,%2.2s" + int8x16_t result; + __asm__ ("smaxp %0.16b, %1.16b, %2.16b" : "=w"(result) - : "0"(a), "w"(b) + : "w"(a), "w"(b) : /* No clobbers */); return result; } __extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) -vpadalq_s8 (int16x8_t a, int8x16_t b) +vpmaxq_s16 (int16x8_t a, int16x8_t b) { int16x8_t result; - __asm__ ("sadalp %0.8h,%2.16b" + __asm__ ("smaxp %0.8h, %1.8h, %2.8h" : "=w"(result) - : "0"(a), "w"(b) + : "w"(a), "w"(b) : /* No clobbers */); return result; } __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) -vpadalq_s16 (int32x4_t a, int16x8_t b) +vpmaxq_s32 (int32x4_t a, int32x4_t b) { int32x4_t result; - __asm__ ("sadalp %0.4s,%2.8h" + __asm__ ("smaxp %0.4s, %1.4s, %2.4s" : "=w"(result) - : "0"(a), "w"(b) + : "w"(a), "w"(b) : /* No clobbers */); return result; } -__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) -vpadalq_s32 (int64x2_t a, int32x4_t b) +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vpmaxq_u8 (uint8x16_t a, uint8x16_t b) { - int64x2_t result; - __asm__ ("sadalp %0.2d,%2.4s" + uint8x16_t result; + __asm__ ("umaxp %0.16b, %1.16b, %2.16b" : "=w"(result) - : "0"(a), "w"(b) + : "w"(a), "w"(b) : /* No clobbers */); return result; } __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) -vpadalq_u8 (uint16x8_t a, uint8x16_t b) +vpmaxq_u16 (uint16x8_t a, uint16x8_t b) { uint16x8_t result; - __asm__ ("uadalp %0.8h,%2.16b" + __asm__ ("umaxp %0.8h, %1.8h, %2.8h" : "=w"(result) - : "0"(a), "w"(b) + : "w"(a), "w"(b) : /* No clobbers */); return result; } __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -vpadalq_u16 (uint32x4_t a, uint16x8_t b) +vpmaxq_u32 (uint32x4_t a, uint32x4_t b) { uint32x4_t result; - __asm__ ("uadalp %0.4s,%2.8h" + __asm__ ("umaxp %0.4s, %1.4s, %2.4s" : "=w"(result) - : "0"(a), "w"(b) + : "w"(a), "w"(b) : /* No clobbers */); return result; } -__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) -vpadalq_u32 (uint64x2_t a, uint32x4_t b) +__extension__ static __inline float64_t __attribute__ ((__always_inline__)) +vpmaxqd_f64 (float64x2_t a) { - uint64x2_t result; - __asm__ ("uadalp %0.2d,%2.4s" + float64_t result; + __asm__ ("fmaxp %d0,%1.2d" : "=w"(result) - : "0"(a), "w"(b) + : "w"(a) + : /* No clobbers */); + return result; +} + +__extension__ static __inline float32_t __attribute__ ((__always_inline__)) +vpmaxs_f32 (float32x2_t a) +{ + float32_t result; + __asm__ ("fmaxp %s0,%1.2s" + : "=w"(result) + : "w"(a) : /* No clobbers */); return result; } __extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) -vpadd_f32 (float32x2_t a, float32x2_t b) +vpmin_f32 (float32x2_t a, float32x2_t b) { float32x2_t result; - __asm__ ("faddp %0.2s,%1.2s,%2.2s" + __asm__ ("fminp %0.2s, %1.2s, %2.2s" : "=w"(result) : "w"(a), "w"(b) : /* No clobbers */); @@ -11340,192 +11442,131 @@ vpadd_f32 (float32x2_t a, float32x2_t b) } __extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) -vpadd_s8 (int8x8_t __a, int8x8_t __b) -{ - return __builtin_aarch64_addpv8qi (__a, __b); -} - -__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) -vpadd_s16 (int16x4_t __a, int16x4_t __b) -{ - return __builtin_aarch64_addpv4hi (__a, __b); -} - -__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) -vpadd_s32 (int32x2_t __a, int32x2_t __b) -{ - return __builtin_aarch64_addpv2si (__a, __b); -} - -__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) -vpadd_u8 (uint8x8_t __a, uint8x8_t __b) -{ - return (uint8x8_t) __builtin_aarch64_addpv8qi ((int8x8_t) __a, - (int8x8_t) __b); -} - -__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) -vpadd_u16 (uint16x4_t __a, uint16x4_t __b) -{ - return (uint16x4_t) __builtin_aarch64_addpv4hi ((int16x4_t) __a, - (int16x4_t) __b); -} - -__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) -vpadd_u32 (uint32x2_t __a, uint32x2_t __b) -{ - return (uint32x2_t) __builtin_aarch64_addpv2si ((int32x2_t) __a, - (int32x2_t) __b); -} - -__extension__ static __inline float64_t __attribute__ ((__always_inline__)) -vpaddd_f64 (float64x2_t a) +vpmin_s8 (int8x8_t a, int8x8_t b) { - float64_t result; - __asm__ ("faddp %d0,%1.2d" + int8x8_t result; + __asm__ ("sminp %0.8b, %1.8b, %2.8b" : "=w"(result) - : "w"(a) + : "w"(a), "w"(b) : /* No clobbers */); return result; } __extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) -vpaddl_s8 (int8x8_t a) +vpmin_s16 (int16x4_t a, int16x4_t b) { int16x4_t result; - __asm__ ("saddlp %0.4h,%1.8b" + __asm__ ("sminp %0.4h, %1.4h, %2.4h" : "=w"(result) - : "w"(a) + : "w"(a), "w"(b) : /* No clobbers */); return result; } __extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) -vpaddl_s16 (int16x4_t a) +vpmin_s32 (int32x2_t a, int32x2_t b) { int32x2_t result; - __asm__ ("saddlp %0.2s,%1.4h" + __asm__ ("sminp %0.2s, %1.2s, %2.2s" : "=w"(result) - : "w"(a) + : "w"(a), "w"(b) : /* No clobbers */); return result; } -__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) -vpaddl_s32 (int32x2_t a) +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vpmin_u8 (uint8x8_t a, uint8x8_t b) { - int64x1_t result; - __asm__ ("saddlp %0.1d,%1.2s" + uint8x8_t result; + __asm__ ("uminp %0.8b, %1.8b, %2.8b" : "=w"(result) - : "w"(a) + : "w"(a), "w"(b) : /* No clobbers */); return result; } __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) -vpaddl_u8 (uint8x8_t a) +vpmin_u16 (uint16x4_t a, uint16x4_t b) { uint16x4_t result; - __asm__ ("uaddlp %0.4h,%1.8b" + __asm__ ("uminp %0.4h, %1.4h, %2.4h" : "=w"(result) - : "w"(a) + : "w"(a), "w"(b) : /* No clobbers */); return result; } __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) -vpaddl_u16 (uint16x4_t a) +vpmin_u32 (uint32x2_t a, uint32x2_t b) { uint32x2_t result; - __asm__ ("uaddlp %0.2s,%1.4h" + __asm__ ("uminp %0.2s, %1.2s, %2.2s" : "=w"(result) - : "w"(a) + : "w"(a), "w"(b) : /* No clobbers */); return result; } -__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) -vpaddl_u32 (uint32x2_t a) +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vpminnm_f32 (float32x2_t a, float32x2_t b) { - uint64x1_t result; - __asm__ ("uaddlp %0.1d,%1.2s" + float32x2_t result; + __asm__ ("fminnmp %0.2s,%1.2s,%2.2s" : "=w"(result) - : "w"(a) + : "w"(a), "w"(b) : /* No clobbers */); return result; } -__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) -vpaddlq_s8 (int8x16_t a) +__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +vpminnmq_f32 (float32x4_t a, float32x4_t b) { - int16x8_t result; - __asm__ ("saddlp %0.8h,%1.16b" + float32x4_t result; + __asm__ ("fminnmp %0.4s,%1.4s,%2.4s" : "=w"(result) - : "w"(a) + : "w"(a), "w"(b) : /* No clobbers */); return result; } -__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) -vpaddlq_s16 (int16x8_t a) +__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) +vpminnmq_f64 (float64x2_t a, float64x2_t b) { - int32x4_t result; - __asm__ ("saddlp %0.4s,%1.8h" + float64x2_t result; + __asm__ ("fminnmp %0.2d,%1.2d,%2.2d" : "=w"(result) - : "w"(a) + : "w"(a), "w"(b) : /* No clobbers */); return result; } -__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) -vpaddlq_s32 (int32x4_t a) +__extension__ static __inline float64_t __attribute__ ((__always_inline__)) +vpminnmqd_f64 (float64x2_t a) { - int64x2_t result; - __asm__ ("saddlp %0.2d,%1.4s" + float64_t result; + __asm__ ("fminnmp %d0,%1.2d" : "=w"(result) : "w"(a) : /* No clobbers */); return result; } -__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) -vpaddlq_u8 (uint8x16_t a) +__extension__ static __inline float32_t __attribute__ ((__always_inline__)) +vpminnms_f32 (float32x2_t a) { - uint16x8_t result; - __asm__ ("uaddlp %0.8h,%1.16b" + float32_t result; + __asm__ ("fminnmp %s0,%1.2s" : "=w"(result) : "w"(a) : /* No clobbers */); return result; } -__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -vpaddlq_u16 (uint16x8_t a) -{ - uint32x4_t result; - __asm__ ("uaddlp %0.4s,%1.8h" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) -vpaddlq_u32 (uint32x4_t a) -{ - uint64x2_t result; - __asm__ ("uaddlp %0.2d,%1.4s" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) -vpaddq_f32 (float32x4_t a, float32x4_t b) +__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +vpminq_f32 (float32x4_t a, float32x4_t b) { float32x4_t result; - __asm__ ("faddp %0.4s,%1.4s,%2.4s" + __asm__ ("fminp %0.4s, %1.4s, %2.4s" : "=w"(result) : "w"(a), "w"(b) : /* No clobbers */); @@ -11533,10 +11574,10 @@ vpaddq_f32 (float32x4_t a, float32x4_t b) } __extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) -vpaddq_f64 (float64x2_t a, float64x2_t b) +vpminq_f64 (float64x2_t a, float64x2_t b) { float64x2_t result; - __asm__ ("faddp %0.2d,%1.2d,%2.2d" + __asm__ ("fminp %0.2d, %1.2d, %2.2d" : "=w"(result) : "w"(a), "w"(b) : /* No clobbers */); @@ -11544,10 +11585,10 @@ vpaddq_f64 (float64x2_t a, float64x2_t b) } __extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) -vpaddq_s8 (int8x16_t a, int8x16_t b) +vpminq_s8 (int8x16_t a, int8x16_t b) { int8x16_t result; - __asm__ ("addp %0.16b,%1.16b,%2.16b" + __asm__ ("sminp %0.16b, %1.16b, %2.16b" : "=w"(result) : "w"(a), "w"(b) : /* No clobbers */); @@ -11555,10 +11596,10 @@ vpaddq_s8 (int8x16_t a, int8x16_t b) } __extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) -vpaddq_s16 (int16x8_t a, int16x8_t b) +vpminq_s16 (int16x8_t a, int16x8_t b) { int16x8_t result; - __asm__ ("addp %0.8h,%1.8h,%2.8h" + __asm__ ("sminp %0.8h, %1.8h, %2.8h" : "=w"(result) : "w"(a), "w"(b) : /* No clobbers */); @@ -11566,21 +11607,10 @@ vpaddq_s16 (int16x8_t a, int16x8_t b) } __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) -vpaddq_s32 (int32x4_t a, int32x4_t b) +vpminq_s32 (int32x4_t a, int32x4_t b) { int32x4_t result; - __asm__ ("addp %0.4s,%1.4s,%2.4s" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) -vpaddq_s64 (int64x2_t a, int64x2_t b) -{ - int64x2_t result; - __asm__ ("addp %0.2d,%1.2d,%2.2d" + __asm__ ("sminp %0.4s, %1.4s, %2.4s" : "=w"(result) : "w"(a), "w"(b) : /* No clobbers */); @@ -11588,10 +11618,10 @@ vpaddq_s64 (int64x2_t a, int64x2_t b) } __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) -vpaddq_u8 (uint8x16_t a, uint8x16_t b) +vpminq_u8 (uint8x16_t a, uint8x16_t b) { uint8x16_t result; - __asm__ ("addp %0.16b,%1.16b,%2.16b" + __asm__ ("uminp %0.16b, %1.16b, %2.16b" : "=w"(result) : "w"(a), "w"(b) : /* No clobbers */); @@ -11599,10 +11629,10 @@ vpaddq_u8 (uint8x16_t a, uint8x16_t b) } __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) -vpaddq_u16 (uint16x8_t a, uint16x8_t b) +vpminq_u16 (uint16x8_t a, uint16x8_t b) { uint16x8_t result; - __asm__ ("addp %0.8h,%1.8h,%2.8h" + __asm__ ("uminp %0.8h, %1.8h, %2.8h" : "=w"(result) : "w"(a), "w"(b) : /* No clobbers */); @@ -11610,197 +11640,197 @@ vpaddq_u16 (uint16x8_t a, uint16x8_t b) } __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -vpaddq_u32 (uint32x4_t a, uint32x4_t b) +vpminq_u32 (uint32x4_t a, uint32x4_t b) { uint32x4_t result; - __asm__ ("addp %0.4s,%1.4s,%2.4s" + __asm__ ("uminp %0.4s, %1.4s, %2.4s" : "=w"(result) : "w"(a), "w"(b) : /* No clobbers */); return result; } -__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) -vpaddq_u64 (uint64x2_t a, uint64x2_t b) +__extension__ static __inline float64_t __attribute__ ((__always_inline__)) +vpminqd_f64 (float64x2_t a) { - uint64x2_t result; - __asm__ ("addp %0.2d,%1.2d,%2.2d" + float64_t result; + __asm__ ("fminp %d0,%1.2d" : "=w"(result) - : "w"(a), "w"(b) + : "w"(a) : /* No clobbers */); return result; } __extension__ static __inline float32_t __attribute__ ((__always_inline__)) -vpadds_f32 (float32x2_t a) +vpmins_f32 (float32x2_t a) { float32_t result; - __asm__ ("faddp %s0,%1.2s" + __asm__ ("fminp %s0,%1.2s" : "=w"(result) : "w"(a) : /* No clobbers */); return result; } -__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) -vpmax_f32 (float32x2_t a, float32x2_t b) +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vqdmulh_n_s16 (int16x4_t a, int16_t b) { - float32x2_t result; - __asm__ ("fmaxp %0.2s, %1.2s, %2.2s" + int16x4_t result; + __asm__ ("sqdmulh %0.4h,%1.4h,%2.h[0]" : "=w"(result) : "w"(a), "w"(b) : /* No clobbers */); return result; } -__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) -vpmax_s8 (int8x8_t a, int8x8_t b) +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vqdmulh_n_s32 (int32x2_t a, int32_t b) { - int8x8_t result; - __asm__ ("smaxp %0.8b, %1.8b, %2.8b" + int32x2_t result; + __asm__ ("sqdmulh %0.2s,%1.2s,%2.s[0]" : "=w"(result) : "w"(a), "w"(b) : /* No clobbers */); return result; } -__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) -vpmax_s16 (int16x4_t a, int16x4_t b) +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vqdmulhq_n_s16 (int16x8_t a, int16_t b) { - int16x4_t result; - __asm__ ("smaxp %0.4h, %1.4h, %2.4h" + int16x8_t result; + __asm__ ("sqdmulh %0.8h,%1.8h,%2.h[0]" : "=w"(result) : "w"(a), "w"(b) : /* No clobbers */); return result; } -__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) -vpmax_s32 (int32x2_t a, int32x2_t b) +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vqdmulhq_n_s32 (int32x4_t a, int32_t b) { - int32x2_t result; - __asm__ ("smaxp %0.2s, %1.2s, %2.2s" + int32x4_t result; + __asm__ ("sqdmulh %0.4s,%1.4s,%2.s[0]" : "=w"(result) : "w"(a), "w"(b) : /* No clobbers */); return result; } -__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) -vpmax_u8 (uint8x8_t a, uint8x8_t b) +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vqmovn_high_s16 (int8x8_t a, int16x8_t b) { - uint8x8_t result; - __asm__ ("umaxp %0.8b, %1.8b, %2.8b" - : "=w"(result) - : "w"(a), "w"(b) + int8x16_t result = vcombine_s8 (a, vcreate_s8 (__AARCH64_UINT64_C (0x0))); + __asm__ ("sqxtn2 %0.16b, %1.8h" + : "+w"(result) + : "w"(b) : /* No clobbers */); return result; } -__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) -vpmax_u16 (uint16x4_t a, uint16x4_t b) +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vqmovn_high_s32 (int16x4_t a, int32x4_t b) { - uint16x4_t result; - __asm__ ("umaxp %0.4h, %1.4h, %2.4h" - : "=w"(result) - : "w"(a), "w"(b) + int16x8_t result = vcombine_s16 (a, vcreate_s16 (__AARCH64_UINT64_C (0x0))); + __asm__ ("sqxtn2 %0.8h, %1.4s" + : "+w"(result) + : "w"(b) : /* No clobbers */); return result; } -__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) -vpmax_u32 (uint32x2_t a, uint32x2_t b) +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vqmovn_high_s64 (int32x2_t a, int64x2_t b) { - uint32x2_t result; - __asm__ ("umaxp %0.2s, %1.2s, %2.2s" - : "=w"(result) - : "w"(a), "w"(b) + int32x4_t result = vcombine_s32 (a, vcreate_s32 (__AARCH64_UINT64_C (0x0))); + __asm__ ("sqxtn2 %0.4s, %1.2d" + : "+w"(result) + : "w"(b) : /* No clobbers */); return result; } -__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) -vpmaxnm_f32 (float32x2_t a, float32x2_t b) +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vqmovn_high_u16 (uint8x8_t a, uint16x8_t b) { - float32x2_t result; - __asm__ ("fmaxnmp %0.2s,%1.2s,%2.2s" - : "=w"(result) - : "w"(a), "w"(b) + uint8x16_t result = vcombine_u8 (a, vcreate_u8 (__AARCH64_UINT64_C (0x0))); + __asm__ ("uqxtn2 %0.16b, %1.8h" + : "+w"(result) + : "w"(b) : /* No clobbers */); return result; } -__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) -vpmaxnmq_f32 (float32x4_t a, float32x4_t b) +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vqmovn_high_u32 (uint16x4_t a, uint32x4_t b) { - float32x4_t result; - __asm__ ("fmaxnmp %0.4s,%1.4s,%2.4s" - : "=w"(result) - : "w"(a), "w"(b) + uint16x8_t result = vcombine_u16 (a, vcreate_u16 (__AARCH64_UINT64_C (0x0))); + __asm__ ("uqxtn2 %0.8h, %1.4s" + : "+w"(result) + : "w"(b) : /* No clobbers */); return result; } -__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) -vpmaxnmq_f64 (float64x2_t a, float64x2_t b) +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vqmovn_high_u64 (uint32x2_t a, uint64x2_t b) { - float64x2_t result; - __asm__ ("fmaxnmp %0.2d,%1.2d,%2.2d" - : "=w"(result) - : "w"(a), "w"(b) + uint32x4_t result = vcombine_u32 (a, vcreate_u32 (__AARCH64_UINT64_C (0x0))); + __asm__ ("uqxtn2 %0.4s, %1.2d" + : "+w"(result) + : "w"(b) : /* No clobbers */); return result; } -__extension__ static __inline float64_t __attribute__ ((__always_inline__)) -vpmaxnmqd_f64 (float64x2_t a) +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vqmovun_high_s16 (uint8x8_t a, int16x8_t b) { - float64_t result; - __asm__ ("fmaxnmp %d0,%1.2d" - : "=w"(result) - : "w"(a) + uint8x16_t result = vcombine_u8 (a, vcreate_u8 (__AARCH64_UINT64_C (0x0))); + __asm__ ("sqxtun2 %0.16b, %1.8h" + : "+w"(result) + : "w"(b) : /* No clobbers */); return result; } -__extension__ static __inline float32_t __attribute__ ((__always_inline__)) -vpmaxnms_f32 (float32x2_t a) +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vqmovun_high_s32 (uint16x4_t a, int32x4_t b) { - float32_t result; - __asm__ ("fmaxnmp %s0,%1.2s" - : "=w"(result) - : "w"(a) + uint16x8_t result = vcombine_u16 (a, vcreate_u16 (__AARCH64_UINT64_C (0x0))); + __asm__ ("sqxtun2 %0.8h, %1.4s" + : "+w"(result) + : "w"(b) : /* No clobbers */); return result; } -__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) -vpmaxq_f32 (float32x4_t a, float32x4_t b) +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vqmovun_high_s64 (uint32x2_t a, int64x2_t b) { - float32x4_t result; - __asm__ ("fmaxp %0.4s, %1.4s, %2.4s" - : "=w"(result) - : "w"(a), "w"(b) + uint32x4_t result = vcombine_u32 (a, vcreate_u32 (__AARCH64_UINT64_C (0x0))); + __asm__ ("sqxtun2 %0.4s, %1.2d" + : "+w"(result) + : "w"(b) : /* No clobbers */); return result; } -__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) -vpmaxq_f64 (float64x2_t a, float64x2_t b) +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vqrdmulh_n_s16 (int16x4_t a, int16_t b) { - float64x2_t result; - __asm__ ("fmaxp %0.2d, %1.2d, %2.2d" + int16x4_t result; + __asm__ ("sqrdmulh %0.4h,%1.4h,%2.h[0]" : "=w"(result) - : "w"(a), "w"(b) + : "w"(a), "x"(b) : /* No clobbers */); return result; } -__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) -vpmaxq_s8 (int8x16_t a, int8x16_t b) +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vqrdmulh_n_s32 (int32x2_t a, int32_t b) { - int8x16_t result; - __asm__ ("smaxp %0.16b, %1.16b, %2.16b" + int32x2_t result; + __asm__ ("sqrdmulh %0.2s,%1.2s,%2.s[0]" : "=w"(result) : "w"(a), "w"(b) : /* No clobbers */); @@ -11808,318 +11838,599 @@ vpmaxq_s8 (int8x16_t a, int8x16_t b) } __extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) -vpmaxq_s16 (int16x8_t a, int16x8_t b) +vqrdmulhq_n_s16 (int16x8_t a, int16_t b) { int16x8_t result; - __asm__ ("smaxp %0.8h, %1.8h, %2.8h" + __asm__ ("sqrdmulh %0.8h,%1.8h,%2.h[0]" : "=w"(result) - : "w"(a), "w"(b) + : "w"(a), "x"(b) : /* No clobbers */); return result; } __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) -vpmaxq_s32 (int32x4_t a, int32x4_t b) +vqrdmulhq_n_s32 (int32x4_t a, int32_t b) { int32x4_t result; - __asm__ ("smaxp %0.4s, %1.4s, %2.4s" + __asm__ ("sqrdmulh %0.4s,%1.4s,%2.s[0]" : "=w"(result) : "w"(a), "w"(b) : /* No clobbers */); return result; } -__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) -vpmaxq_u8 (uint8x16_t a, uint8x16_t b) -{ - uint8x16_t result; - __asm__ ("umaxp %0.16b, %1.16b, %2.16b" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} +#define vqrshrn_high_n_s16(a, b, c) \ + __extension__ \ + ({ \ + int16x8_t b_ = (b); \ + int8x8_t a_ = (a); \ + int8x16_t result = vcombine_s8 \ + (a_, vcreate_s8 \ + (__AARCH64_UINT64_C (0x0))); \ + __asm__ ("sqrshrn2 %0.16b, %1.8h, #%2" \ + : "+w"(result) \ + : "w"(b_), "i"(c) \ + : /* No clobbers */); \ + result; \ + }) -__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) -vpmaxq_u16 (uint16x8_t a, uint16x8_t b) +#define vqrshrn_high_n_s32(a, b, c) \ + __extension__ \ + ({ \ + int32x4_t b_ = (b); \ + int16x4_t a_ = (a); \ + int16x8_t result = vcombine_s16 \ + (a_, vcreate_s16 \ + (__AARCH64_UINT64_C (0x0))); \ + __asm__ ("sqrshrn2 %0.8h, %1.4s, #%2" \ + : "+w"(result) \ + : "w"(b_), "i"(c) \ + : /* No clobbers */); \ + result; \ + }) + +#define vqrshrn_high_n_s64(a, b, c) \ + __extension__ \ + ({ \ + int64x2_t b_ = (b); \ + int32x2_t a_ = (a); \ + int32x4_t result = vcombine_s32 \ + (a_, vcreate_s32 \ + (__AARCH64_UINT64_C (0x0))); \ + __asm__ ("sqrshrn2 %0.4s, %1.2d, #%2" \ + : "+w"(result) \ + : "w"(b_), "i"(c) \ + : /* No clobbers */); \ + result; \ + }) + +#define vqrshrn_high_n_u16(a, b, c) \ + __extension__ \ + ({ \ + uint16x8_t b_ = (b); \ + uint8x8_t a_ = (a); \ + uint8x16_t result = vcombine_u8 \ + (a_, vcreate_u8 \ + (__AARCH64_UINT64_C (0x0))); \ + __asm__ ("uqrshrn2 %0.16b, %1.8h, #%2" \ + : "+w"(result) \ + : "w"(b_), "i"(c) \ + : /* No clobbers */); \ + result; \ + }) + +#define vqrshrn_high_n_u32(a, b, c) \ + __extension__ \ + ({ \ + uint32x4_t b_ = (b); \ + uint16x4_t a_ = (a); \ + uint16x8_t result = vcombine_u16 \ + (a_, vcreate_u16 \ + (__AARCH64_UINT64_C (0x0))); \ + __asm__ ("uqrshrn2 %0.8h, %1.4s, #%2" \ + : "+w"(result) \ + : "w"(b_), "i"(c) \ + : /* No clobbers */); \ + result; \ + }) + +#define vqrshrn_high_n_u64(a, b, c) \ + __extension__ \ + ({ \ + uint64x2_t b_ = (b); \ + uint32x2_t a_ = (a); \ + uint32x4_t result = vcombine_u32 \ + (a_, vcreate_u32 \ + (__AARCH64_UINT64_C (0x0))); \ + __asm__ ("uqrshrn2 %0.4s, %1.2d, #%2" \ + : "+w"(result) \ + : "w"(b_), "i"(c) \ + : /* No clobbers */); \ + result; \ + }) + +#define vqrshrun_high_n_s16(a, b, c) \ + __extension__ \ + ({ \ + int16x8_t b_ = (b); \ + uint8x8_t a_ = (a); \ + uint8x16_t result = vcombine_u8 \ + (a_, vcreate_u8 \ + (__AARCH64_UINT64_C (0x0))); \ + __asm__ ("sqrshrun2 %0.16b, %1.8h, #%2" \ + : "+w"(result) \ + : "w"(b_), "i"(c) \ + : /* No clobbers */); \ + result; \ + }) + +#define vqrshrun_high_n_s32(a, b, c) \ + __extension__ \ + ({ \ + int32x4_t b_ = (b); \ + uint16x4_t a_ = (a); \ + uint16x8_t result = vcombine_u16 \ + (a_, vcreate_u16 \ + (__AARCH64_UINT64_C (0x0))); \ + __asm__ ("sqrshrun2 %0.8h, %1.4s, #%2" \ + : "+w"(result) \ + : "w"(b_), "i"(c) \ + : /* No clobbers */); \ + result; \ + }) + +#define vqrshrun_high_n_s64(a, b, c) \ + __extension__ \ + ({ \ + int64x2_t b_ = (b); \ + uint32x2_t a_ = (a); \ + uint32x4_t result = vcombine_u32 \ + (a_, vcreate_u32 \ + (__AARCH64_UINT64_C (0x0))); \ + __asm__ ("sqrshrun2 %0.4s, %1.2d, #%2" \ + : "+w"(result) \ + : "w"(b_), "i"(c) \ + : /* No clobbers */); \ + result; \ + }) + +#define vqshrn_high_n_s16(a, b, c) \ + __extension__ \ + ({ \ + int16x8_t b_ = (b); \ + int8x8_t a_ = (a); \ + int8x16_t result = vcombine_s8 \ + (a_, vcreate_s8 \ + (__AARCH64_UINT64_C (0x0))); \ + __asm__ ("sqshrn2 %0.16b, %1.8h, #%2" \ + : "+w"(result) \ + : "w"(b_), "i"(c) \ + : /* No clobbers */); \ + result; \ + }) + +#define vqshrn_high_n_s32(a, b, c) \ + __extension__ \ + ({ \ + int32x4_t b_ = (b); \ + int16x4_t a_ = (a); \ + int16x8_t result = vcombine_s16 \ + (a_, vcreate_s16 \ + (__AARCH64_UINT64_C (0x0))); \ + __asm__ ("sqshrn2 %0.8h, %1.4s, #%2" \ + : "+w"(result) \ + : "w"(b_), "i"(c) \ + : /* No clobbers */); \ + result; \ + }) + +#define vqshrn_high_n_s64(a, b, c) \ + __extension__ \ + ({ \ + int64x2_t b_ = (b); \ + int32x2_t a_ = (a); \ + int32x4_t result = vcombine_s32 \ + (a_, vcreate_s32 \ + (__AARCH64_UINT64_C (0x0))); \ + __asm__ ("sqshrn2 %0.4s, %1.2d, #%2" \ + : "+w"(result) \ + : "w"(b_), "i"(c) \ + : /* No clobbers */); \ + result; \ + }) + +#define vqshrn_high_n_u16(a, b, c) \ + __extension__ \ + ({ \ + uint16x8_t b_ = (b); \ + uint8x8_t a_ = (a); \ + uint8x16_t result = vcombine_u8 \ + (a_, vcreate_u8 \ + (__AARCH64_UINT64_C (0x0))); \ + __asm__ ("uqshrn2 %0.16b, %1.8h, #%2" \ + : "+w"(result) \ + : "w"(b_), "i"(c) \ + : /* No clobbers */); \ + result; \ + }) + +#define vqshrn_high_n_u32(a, b, c) \ + __extension__ \ + ({ \ + uint32x4_t b_ = (b); \ + uint16x4_t a_ = (a); \ + uint16x8_t result = vcombine_u16 \ + (a_, vcreate_u16 \ + (__AARCH64_UINT64_C (0x0))); \ + __asm__ ("uqshrn2 %0.8h, %1.4s, #%2" \ + : "+w"(result) \ + : "w"(b_), "i"(c) \ + : /* No clobbers */); \ + result; \ + }) + +#define vqshrn_high_n_u64(a, b, c) \ + __extension__ \ + ({ \ + uint64x2_t b_ = (b); \ + uint32x2_t a_ = (a); \ + uint32x4_t result = vcombine_u32 \ + (a_, vcreate_u32 \ + (__AARCH64_UINT64_C (0x0))); \ + __asm__ ("uqshrn2 %0.4s, %1.2d, #%2" \ + : "+w"(result) \ + : "w"(b_), "i"(c) \ + : /* No clobbers */); \ + result; \ + }) + +#define vqshrun_high_n_s16(a, b, c) \ + __extension__ \ + ({ \ + int16x8_t b_ = (b); \ + uint8x8_t a_ = (a); \ + uint8x16_t result = vcombine_u8 \ + (a_, vcreate_u8 \ + (__AARCH64_UINT64_C (0x0))); \ + __asm__ ("sqshrun2 %0.16b, %1.8h, #%2" \ + : "+w"(result) \ + : "w"(b_), "i"(c) \ + : /* No clobbers */); \ + result; \ + }) + +#define vqshrun_high_n_s32(a, b, c) \ + __extension__ \ + ({ \ + int32x4_t b_ = (b); \ + uint16x4_t a_ = (a); \ + uint16x8_t result = vcombine_u16 \ + (a_, vcreate_u16 \ + (__AARCH64_UINT64_C (0x0))); \ + __asm__ ("sqshrun2 %0.8h, %1.4s, #%2" \ + : "+w"(result) \ + : "w"(b_), "i"(c) \ + : /* No clobbers */); \ + result; \ + }) + +#define vqshrun_high_n_s64(a, b, c) \ + __extension__ \ + ({ \ + int64x2_t b_ = (b); \ + uint32x2_t a_ = (a); \ + uint32x4_t result = vcombine_u32 \ + (a_, vcreate_u32 \ + (__AARCH64_UINT64_C (0x0))); \ + __asm__ ("sqshrun2 %0.4s, %1.2d, #%2" \ + : "+w"(result) \ + : "w"(b_), "i"(c) \ + : /* No clobbers */); \ + result; \ + }) + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vrbit_s8 (int8x8_t a) { - uint16x8_t result; - __asm__ ("umaxp %0.8h, %1.8h, %2.8h" + int8x8_t result; + __asm__ ("rbit %0.8b,%1.8b" : "=w"(result) - : "w"(a), "w"(b) + : "w"(a) : /* No clobbers */); return result; } -__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -vpmaxq_u32 (uint32x4_t a, uint32x4_t b) +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vrbit_u8 (uint8x8_t a) { - uint32x4_t result; - __asm__ ("umaxp %0.4s, %1.4s, %2.4s" + uint8x8_t result; + __asm__ ("rbit %0.8b,%1.8b" : "=w"(result) - : "w"(a), "w"(b) + : "w"(a) : /* No clobbers */); return result; } -__extension__ static __inline float64_t __attribute__ ((__always_inline__)) -vpmaxqd_f64 (float64x2_t a) +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vrbitq_s8 (int8x16_t a) { - float64_t result; - __asm__ ("fmaxp %d0,%1.2d" + int8x16_t result; + __asm__ ("rbit %0.16b,%1.16b" : "=w"(result) : "w"(a) : /* No clobbers */); return result; } -__extension__ static __inline float32_t __attribute__ ((__always_inline__)) -vpmaxs_f32 (float32x2_t a) +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vrbitq_u8 (uint8x16_t a) { - float32_t result; - __asm__ ("fmaxp %s0,%1.2s" + uint8x16_t result; + __asm__ ("rbit %0.16b,%1.16b" : "=w"(result) : "w"(a) : /* No clobbers */); return result; } -__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) -vpmin_f32 (float32x2_t a, float32x2_t b) +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vrecpe_u32 (uint32x2_t a) { - float32x2_t result; - __asm__ ("fminp %0.2s, %1.2s, %2.2s" + uint32x2_t result; + __asm__ ("urecpe %0.2s,%1.2s" : "=w"(result) - : "w"(a), "w"(b) + : "w"(a) : /* No clobbers */); return result; } -__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) -vpmin_s8 (int8x8_t a, int8x8_t b) +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vrecpeq_u32 (uint32x4_t a) { - int8x8_t result; - __asm__ ("sminp %0.8b, %1.8b, %2.8b" + uint32x4_t result; + __asm__ ("urecpe %0.4s,%1.4s" : "=w"(result) - : "w"(a), "w"(b) + : "w"(a) : /* No clobbers */); return result; } -__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) -vpmin_s16 (int16x4_t a, int16x4_t b) +__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) +vrev16_p8 (poly8x8_t a) { - int16x4_t result; - __asm__ ("sminp %0.4h, %1.4h, %2.4h" + poly8x8_t result; + __asm__ ("rev16 %0.8b,%1.8b" : "=w"(result) - : "w"(a), "w"(b) + : "w"(a) : /* No clobbers */); return result; } -__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) -vpmin_s32 (int32x2_t a, int32x2_t b) +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vrev16_s8 (int8x8_t a) { - int32x2_t result; - __asm__ ("sminp %0.2s, %1.2s, %2.2s" + int8x8_t result; + __asm__ ("rev16 %0.8b,%1.8b" : "=w"(result) - : "w"(a), "w"(b) + : "w"(a) : /* No clobbers */); return result; } __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) -vpmin_u8 (uint8x8_t a, uint8x8_t b) +vrev16_u8 (uint8x8_t a) { uint8x8_t result; - __asm__ ("uminp %0.8b, %1.8b, %2.8b" + __asm__ ("rev16 %0.8b,%1.8b" : "=w"(result) - : "w"(a), "w"(b) + : "w"(a) : /* No clobbers */); return result; } -__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) -vpmin_u16 (uint16x4_t a, uint16x4_t b) +__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) +vrev16q_p8 (poly8x16_t a) { - uint16x4_t result; - __asm__ ("uminp %0.4h, %1.4h, %2.4h" + poly8x16_t result; + __asm__ ("rev16 %0.16b,%1.16b" : "=w"(result) - : "w"(a), "w"(b) + : "w"(a) : /* No clobbers */); return result; } -__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) -vpmin_u32 (uint32x2_t a, uint32x2_t b) +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vrev16q_s8 (int8x16_t a) { - uint32x2_t result; - __asm__ ("uminp %0.2s, %1.2s, %2.2s" + int8x16_t result; + __asm__ ("rev16 %0.16b,%1.16b" : "=w"(result) - : "w"(a), "w"(b) + : "w"(a) : /* No clobbers */); return result; } -__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) -vpminnm_f32 (float32x2_t a, float32x2_t b) +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vrev16q_u8 (uint8x16_t a) { - float32x2_t result; - __asm__ ("fminnmp %0.2s,%1.2s,%2.2s" + uint8x16_t result; + __asm__ ("rev16 %0.16b,%1.16b" : "=w"(result) - : "w"(a), "w"(b) + : "w"(a) : /* No clobbers */); return result; } -__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) -vpminnmq_f32 (float32x4_t a, float32x4_t b) +__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) +vrev32_p8 (poly8x8_t a) { - float32x4_t result; - __asm__ ("fminnmp %0.4s,%1.4s,%2.4s" + poly8x8_t result; + __asm__ ("rev32 %0.8b,%1.8b" : "=w"(result) - : "w"(a), "w"(b) + : "w"(a) : /* No clobbers */); return result; } -__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) -vpminnmq_f64 (float64x2_t a, float64x2_t b) +__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) +vrev32_p16 (poly16x4_t a) { - float64x2_t result; - __asm__ ("fminnmp %0.2d,%1.2d,%2.2d" + poly16x4_t result; + __asm__ ("rev32 %0.4h,%1.4h" : "=w"(result) - : "w"(a), "w"(b) + : "w"(a) : /* No clobbers */); return result; } -__extension__ static __inline float64_t __attribute__ ((__always_inline__)) -vpminnmqd_f64 (float64x2_t a) +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vrev32_s8 (int8x8_t a) { - float64_t result; - __asm__ ("fminnmp %d0,%1.2d" + int8x8_t result; + __asm__ ("rev32 %0.8b,%1.8b" : "=w"(result) : "w"(a) : /* No clobbers */); return result; } -__extension__ static __inline float32_t __attribute__ ((__always_inline__)) -vpminnms_f32 (float32x2_t a) +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vrev32_s16 (int16x4_t a) { - float32_t result; - __asm__ ("fminnmp %s0,%1.2s" + int16x4_t result; + __asm__ ("rev32 %0.4h,%1.4h" : "=w"(result) : "w"(a) : /* No clobbers */); return result; } -__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) -vpminq_f32 (float32x4_t a, float32x4_t b) +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vrev32_u8 (uint8x8_t a) { - float32x4_t result; - __asm__ ("fminp %0.4s, %1.4s, %2.4s" + uint8x8_t result; + __asm__ ("rev32 %0.8b,%1.8b" : "=w"(result) - : "w"(a), "w"(b) + : "w"(a) : /* No clobbers */); return result; } -__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) -vpminq_f64 (float64x2_t a, float64x2_t b) +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vrev32_u16 (uint16x4_t a) { - float64x2_t result; - __asm__ ("fminp %0.2d, %1.2d, %2.2d" + uint16x4_t result; + __asm__ ("rev32 %0.4h,%1.4h" : "=w"(result) - : "w"(a), "w"(b) + : "w"(a) : /* No clobbers */); return result; } -__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) -vpminq_s8 (int8x16_t a, int8x16_t b) +__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) +vrev32q_p8 (poly8x16_t a) { - int8x16_t result; - __asm__ ("sminp %0.16b, %1.16b, %2.16b" + poly8x16_t result; + __asm__ ("rev32 %0.16b,%1.16b" : "=w"(result) - : "w"(a), "w"(b) + : "w"(a) : /* No clobbers */); return result; } -__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) -vpminq_s16 (int16x8_t a, int16x8_t b) +__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) +vrev32q_p16 (poly16x8_t a) { - int16x8_t result; - __asm__ ("sminp %0.8h, %1.8h, %2.8h" + poly16x8_t result; + __asm__ ("rev32 %0.8h,%1.8h" : "=w"(result) - : "w"(a), "w"(b) + : "w"(a) : /* No clobbers */); return result; } -__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) -vpminq_s32 (int32x4_t a, int32x4_t b) +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vrev32q_s8 (int8x16_t a) { - int32x4_t result; - __asm__ ("sminp %0.4s, %1.4s, %2.4s" + int8x16_t result; + __asm__ ("rev32 %0.16b,%1.16b" : "=w"(result) - : "w"(a), "w"(b) + : "w"(a) + : /* No clobbers */); + return result; +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vrev32q_s16 (int16x8_t a) +{ + int16x8_t result; + __asm__ ("rev32 %0.8h,%1.8h" + : "=w"(result) + : "w"(a) : /* No clobbers */); return result; } __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) -vpminq_u8 (uint8x16_t a, uint8x16_t b) +vrev32q_u8 (uint8x16_t a) { uint8x16_t result; - __asm__ ("uminp %0.16b, %1.16b, %2.16b" + __asm__ ("rev32 %0.16b,%1.16b" : "=w"(result) - : "w"(a), "w"(b) + : "w"(a) : /* No clobbers */); return result; } __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) -vpminq_u16 (uint16x8_t a, uint16x8_t b) +vrev32q_u16 (uint16x8_t a) { uint16x8_t result; - __asm__ ("uminp %0.8h, %1.8h, %2.8h" + __asm__ ("rev32 %0.8h,%1.8h" : "=w"(result) - : "w"(a), "w"(b) + : "w"(a) : /* No clobbers */); return result; } -__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -vpminq_u32 (uint32x4_t a, uint32x4_t b) +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vrev64_f32 (float32x2_t a) { - uint32x4_t result; - __asm__ ("uminp %0.4s, %1.4s, %2.4s" + float32x2_t result; + __asm__ ("rev64 %0.2s,%1.2s" : "=w"(result) - : "w"(a), "w"(b) + : "w"(a) : /* No clobbers */); return result; } -__extension__ static __inline float64_t __attribute__ ((__always_inline__)) -vpminqd_f64 (float64x2_t a) +__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) +vrev64_p8 (poly8x8_t a) { - float64_t result; - __asm__ ("fminp %d0,%1.2d" + poly8x8_t result; + __asm__ ("rev64 %0.8b,%1.8b" : "=w"(result) : "w"(a) : /* No clobbers */); return result; } -__extension__ static __inline float32_t __attribute__ ((__always_inline__)) -vpmins_f32 (float32x2_t a) +__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) +vrev64_p16 (poly16x4_t a) { - float32_t result; - __asm__ ("fminp %s0,%1.2s" + poly16x4_t result; + __asm__ ("rev64 %0.4h,%1.4h" + : "=w"(result) + : "w"(a) + : /* No clobbers */); + return result; +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vrev64_s8 (int8x8_t a) +{ + int8x8_t result; + __asm__ ("rev64 %0.8b,%1.8b" : "=w"(result) : "w"(a) : /* No clobbers */); @@ -12127,1667 +12438,927 @@ vpmins_f32 (float32x2_t a) } __extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) -vqdmulh_n_s16 (int16x4_t a, int16_t b) +vrev64_s16 (int16x4_t a) { int16x4_t result; - __asm__ ("sqdmulh %0.4h,%1.4h,%2.h[0]" + __asm__ ("rev64 %0.4h,%1.4h" : "=w"(result) - : "w"(a), "w"(b) + : "w"(a) : /* No clobbers */); return result; } __extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) -vqdmulh_n_s32 (int32x2_t a, int32_t b) +vrev64_s32 (int32x2_t a) { int32x2_t result; - __asm__ ("sqdmulh %0.2s,%1.2s,%2.s[0]" + __asm__ ("rev64 %0.2s,%1.2s" : "=w"(result) - : "w"(a), "w"(b) + : "w"(a) : /* No clobbers */); return result; } -__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) -vqdmulhq_n_s16 (int16x8_t a, int16_t b) +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vrev64_u8 (uint8x8_t a) { - int16x8_t result; - __asm__ ("sqdmulh %0.8h,%1.8h,%2.h[0]" + uint8x8_t result; + __asm__ ("rev64 %0.8b,%1.8b" : "=w"(result) - : "w"(a), "w"(b) + : "w"(a) : /* No clobbers */); return result; } -__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) -vqdmulhq_n_s32 (int32x4_t a, int32_t b) +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vrev64_u16 (uint16x4_t a) { - int32x4_t result; - __asm__ ("sqdmulh %0.4s,%1.4s,%2.s[0]" + uint16x4_t result; + __asm__ ("rev64 %0.4h,%1.4h" : "=w"(result) - : "w"(a), "w"(b) + : "w"(a) : /* No clobbers */); return result; } -__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) -vqmovn_high_s16 (int8x8_t a, int16x8_t b) -{ - int8x16_t result = vcombine_s8 (a, vcreate_s8 (UINT64_C (0x0))); - __asm__ ("sqxtn2 %0.16b, %1.8h" - : "+w"(result) - : "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) -vqmovn_high_s32 (int16x4_t a, int32x4_t b) -{ - int16x8_t result = vcombine_s16 (a, vcreate_s16 (UINT64_C (0x0))); - __asm__ ("sqxtn2 %0.8h, %1.4s" - : "+w"(result) - : "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) -vqmovn_high_s64 (int32x2_t a, int64x2_t b) -{ - int32x4_t result = vcombine_s32 (a, vcreate_s32 (UINT64_C (0x0))); - __asm__ ("sqxtn2 %0.4s, %1.2d" - : "+w"(result) - : "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) -vqmovn_high_u16 (uint8x8_t a, uint16x8_t b) +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vrev64_u32 (uint32x2_t a) { - uint8x16_t result = vcombine_u8 (a, vcreate_u8 (UINT64_C (0x0))); - __asm__ ("uqxtn2 %0.16b, %1.8h" - : "+w"(result) - : "w"(b) + uint32x2_t result; + __asm__ ("rev64 %0.2s,%1.2s" + : "=w"(result) + : "w"(a) : /* No clobbers */); return result; } -__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) -vqmovn_high_u32 (uint16x4_t a, uint32x4_t b) +__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +vrev64q_f32 (float32x4_t a) { - uint16x8_t result = vcombine_u16 (a, vcreate_u16 (UINT64_C (0x0))); - __asm__ ("uqxtn2 %0.8h, %1.4s" - : "+w"(result) - : "w"(b) + float32x4_t result; + __asm__ ("rev64 %0.4s,%1.4s" + : "=w"(result) + : "w"(a) : /* No clobbers */); return result; } -__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -vqmovn_high_u64 (uint32x2_t a, uint64x2_t b) +__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) +vrev64q_p8 (poly8x16_t a) { - uint32x4_t result = vcombine_u32 (a, vcreate_u32 (UINT64_C (0x0))); - __asm__ ("uqxtn2 %0.4s, %1.2d" - : "+w"(result) - : "w"(b) + poly8x16_t result; + __asm__ ("rev64 %0.16b,%1.16b" + : "=w"(result) + : "w"(a) : /* No clobbers */); return result; } -__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) -vqmovun_high_s16 (uint8x8_t a, int16x8_t b) +__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) +vrev64q_p16 (poly16x8_t a) { - uint8x16_t result = vcombine_u8 (a, vcreate_u8 (UINT64_C (0x0))); - __asm__ ("sqxtun2 %0.16b, %1.8h" - : "+w"(result) - : "w"(b) + poly16x8_t result; + __asm__ ("rev64 %0.8h,%1.8h" + : "=w"(result) + : "w"(a) : /* No clobbers */); return result; } -__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) -vqmovun_high_s32 (uint16x4_t a, int32x4_t b) +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vrev64q_s8 (int8x16_t a) { - uint16x8_t result = vcombine_u16 (a, vcreate_u16 (UINT64_C (0x0))); - __asm__ ("sqxtun2 %0.8h, %1.4s" - : "+w"(result) - : "w"(b) + int8x16_t result; + __asm__ ("rev64 %0.16b,%1.16b" + : "=w"(result) + : "w"(a) : /* No clobbers */); return result; } -__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -vqmovun_high_s64 (uint32x2_t a, int64x2_t b) +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vrev64q_s16 (int16x8_t a) { - uint32x4_t result = vcombine_u32 (a, vcreate_u32 (UINT64_C (0x0))); - __asm__ ("sqxtun2 %0.4s, %1.2d" - : "+w"(result) - : "w"(b) + int16x8_t result; + __asm__ ("rev64 %0.8h,%1.8h" + : "=w"(result) + : "w"(a) : /* No clobbers */); return result; } -__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) -vqrdmulh_n_s16 (int16x4_t a, int16_t b) +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vrev64q_s32 (int32x4_t a) { - int16x4_t result; - __asm__ ("sqrdmulh %0.4h,%1.4h,%2.h[0]" + int32x4_t result; + __asm__ ("rev64 %0.4s,%1.4s" : "=w"(result) - : "w"(a), "w"(b) + : "w"(a) : /* No clobbers */); return result; } -__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) -vqrdmulh_n_s32 (int32x2_t a, int32_t b) +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vrev64q_u8 (uint8x16_t a) { - int32x2_t result; - __asm__ ("sqrdmulh %0.2s,%1.2s,%2.s[0]" + uint8x16_t result; + __asm__ ("rev64 %0.16b,%1.16b" : "=w"(result) - : "w"(a), "w"(b) + : "w"(a) : /* No clobbers */); return result; } -__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) -vqrdmulhq_n_s16 (int16x8_t a, int16_t b) +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vrev64q_u16 (uint16x8_t a) { - int16x8_t result; - __asm__ ("sqrdmulh %0.8h,%1.8h,%2.h[0]" + uint16x8_t result; + __asm__ ("rev64 %0.8h,%1.8h" : "=w"(result) - : "w"(a), "w"(b) + : "w"(a) : /* No clobbers */); return result; } -__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) -vqrdmulhq_n_s32 (int32x4_t a, int32_t b) +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vrev64q_u32 (uint32x4_t a) { - int32x4_t result; - __asm__ ("sqrdmulh %0.4s,%1.4s,%2.s[0]" + uint32x4_t result; + __asm__ ("rev64 %0.4s,%1.4s" : "=w"(result) - : "w"(a), "w"(b) + : "w"(a) : /* No clobbers */); return result; } -#define vqrshrn_high_n_s16(a, b, c) \ +#define vrshrn_high_n_s16(a, b, c) \ __extension__ \ ({ \ int16x8_t b_ = (b); \ int8x8_t a_ = (a); \ int8x16_t result = vcombine_s8 \ - (a_, vcreate_s8 (UINT64_C (0x0))); \ - __asm__ ("sqrshrn2 %0.16b, %1.8h, #%2" \ + (a_, vcreate_s8 \ + (__AARCH64_UINT64_C (0x0))); \ + __asm__ ("rshrn2 %0.16b,%1.8h,#%2" \ : "+w"(result) \ : "w"(b_), "i"(c) \ : /* No clobbers */); \ result; \ }) -#define vqrshrn_high_n_s32(a, b, c) \ +#define vrshrn_high_n_s32(a, b, c) \ __extension__ \ ({ \ int32x4_t b_ = (b); \ int16x4_t a_ = (a); \ int16x8_t result = vcombine_s16 \ - (a_, vcreate_s16 (UINT64_C (0x0))); \ - __asm__ ("sqrshrn2 %0.8h, %1.4s, #%2" \ + (a_, vcreate_s16 \ + (__AARCH64_UINT64_C (0x0))); \ + __asm__ ("rshrn2 %0.8h,%1.4s,#%2" \ : "+w"(result) \ : "w"(b_), "i"(c) \ : /* No clobbers */); \ result; \ }) -#define vqrshrn_high_n_s64(a, b, c) \ +#define vrshrn_high_n_s64(a, b, c) \ __extension__ \ ({ \ int64x2_t b_ = (b); \ int32x2_t a_ = (a); \ int32x4_t result = vcombine_s32 \ - (a_, vcreate_s32 (UINT64_C (0x0))); \ - __asm__ ("sqrshrn2 %0.4s, %1.2d, #%2" \ + (a_, vcreate_s32 \ + (__AARCH64_UINT64_C (0x0))); \ + __asm__ ("rshrn2 %0.4s,%1.2d,#%2" \ : "+w"(result) \ : "w"(b_), "i"(c) \ : /* No clobbers */); \ result; \ }) -#define vqrshrn_high_n_u16(a, b, c) \ +#define vrshrn_high_n_u16(a, b, c) \ __extension__ \ ({ \ uint16x8_t b_ = (b); \ uint8x8_t a_ = (a); \ uint8x16_t result = vcombine_u8 \ - (a_, vcreate_u8 (UINT64_C (0x0))); \ - __asm__ ("uqrshrn2 %0.16b, %1.8h, #%2" \ + (a_, vcreate_u8 \ + (__AARCH64_UINT64_C (0x0))); \ + __asm__ ("rshrn2 %0.16b,%1.8h,#%2" \ : "+w"(result) \ : "w"(b_), "i"(c) \ : /* No clobbers */); \ result; \ }) -#define vqrshrn_high_n_u32(a, b, c) \ +#define vrshrn_high_n_u32(a, b, c) \ __extension__ \ ({ \ uint32x4_t b_ = (b); \ uint16x4_t a_ = (a); \ uint16x8_t result = vcombine_u16 \ - (a_, vcreate_u16 (UINT64_C (0x0))); \ - __asm__ ("uqrshrn2 %0.8h, %1.4s, #%2" \ + (a_, vcreate_u16 \ + (__AARCH64_UINT64_C (0x0))); \ + __asm__ ("rshrn2 %0.8h,%1.4s,#%2" \ : "+w"(result) \ : "w"(b_), "i"(c) \ : /* No clobbers */); \ result; \ }) -#define vqrshrn_high_n_u64(a, b, c) \ +#define vrshrn_high_n_u64(a, b, c) \ __extension__ \ ({ \ uint64x2_t b_ = (b); \ uint32x2_t a_ = (a); \ uint32x4_t result = vcombine_u32 \ - (a_, vcreate_u32 (UINT64_C (0x0))); \ - __asm__ ("uqrshrn2 %0.4s, %1.2d, #%2" \ - : "+w"(result) \ - : "w"(b_), "i"(c) \ - : /* No clobbers */); \ - result; \ - }) - -#define vqrshrun_high_n_s16(a, b, c) \ - __extension__ \ - ({ \ - int16x8_t b_ = (b); \ - uint8x8_t a_ = (a); \ - uint8x16_t result = vcombine_u8 \ - (a_, vcreate_u8 (UINT64_C (0x0))); \ - __asm__ ("sqrshrun2 %0.16b, %1.8h, #%2" \ + (a_, vcreate_u32 \ + (__AARCH64_UINT64_C (0x0))); \ + __asm__ ("rshrn2 %0.4s,%1.2d,#%2" \ : "+w"(result) \ : "w"(b_), "i"(c) \ : /* No clobbers */); \ result; \ }) -#define vqrshrun_high_n_s32(a, b, c) \ +#define vrshrn_n_s16(a, b) \ __extension__ \ ({ \ - int32x4_t b_ = (b); \ - uint16x4_t a_ = (a); \ - uint16x8_t result = vcombine_u16 \ - (a_, vcreate_u16 (UINT64_C (0x0))); \ - __asm__ ("sqrshrun2 %0.8h, %1.4s, #%2" \ - : "+w"(result) \ - : "w"(b_), "i"(c) \ + int16x8_t a_ = (a); \ + int8x8_t result; \ + __asm__ ("rshrn %0.8b,%1.8h,%2" \ + : "=w"(result) \ + : "w"(a_), "i"(b) \ : /* No clobbers */); \ result; \ }) -#define vqrshrun_high_n_s64(a, b, c) \ +#define vrshrn_n_s32(a, b) \ __extension__ \ ({ \ - int64x2_t b_ = (b); \ - uint32x2_t a_ = (a); \ - uint32x4_t result = vcombine_u32 \ - (a_, vcreate_u32 (UINT64_C (0x0))); \ - __asm__ ("sqrshrun2 %0.4s, %1.2d, #%2" \ - : "+w"(result) \ - : "w"(b_), "i"(c) \ + int32x4_t a_ = (a); \ + int16x4_t result; \ + __asm__ ("rshrn %0.4h,%1.4s,%2" \ + : "=w"(result) \ + : "w"(a_), "i"(b) \ : /* No clobbers */); \ result; \ }) -#define vqshrn_high_n_s16(a, b, c) \ +#define vrshrn_n_s64(a, b) \ __extension__ \ ({ \ - int16x8_t b_ = (b); \ - int8x8_t a_ = (a); \ - int8x16_t result = vcombine_s8 \ - (a_, vcreate_s8 (UINT64_C (0x0))); \ - __asm__ ("sqshrn2 %0.16b, %1.8h, #%2" \ - : "+w"(result) \ - : "w"(b_), "i"(c) \ + int64x2_t a_ = (a); \ + int32x2_t result; \ + __asm__ ("rshrn %0.2s,%1.2d,%2" \ + : "=w"(result) \ + : "w"(a_), "i"(b) \ : /* No clobbers */); \ result; \ }) -#define vqshrn_high_n_s32(a, b, c) \ +#define vrshrn_n_u16(a, b) \ __extension__ \ ({ \ - int32x4_t b_ = (b); \ - int16x4_t a_ = (a); \ - int16x8_t result = vcombine_s16 \ - (a_, vcreate_s16 (UINT64_C (0x0))); \ - __asm__ ("sqshrn2 %0.8h, %1.4s, #%2" \ - : "+w"(result) \ - : "w"(b_), "i"(c) \ + uint16x8_t a_ = (a); \ + uint8x8_t result; \ + __asm__ ("rshrn %0.8b,%1.8h,%2" \ + : "=w"(result) \ + : "w"(a_), "i"(b) \ : /* No clobbers */); \ result; \ }) -#define vqshrn_high_n_s64(a, b, c) \ +#define vrshrn_n_u32(a, b) \ __extension__ \ ({ \ - int64x2_t b_ = (b); \ - int32x2_t a_ = (a); \ - int32x4_t result = vcombine_s32 \ - (a_, vcreate_s32 (UINT64_C (0x0))); \ - __asm__ ("sqshrn2 %0.4s, %1.2d, #%2" \ - : "+w"(result) \ - : "w"(b_), "i"(c) \ + uint32x4_t a_ = (a); \ + uint16x4_t result; \ + __asm__ ("rshrn %0.4h,%1.4s,%2" \ + : "=w"(result) \ + : "w"(a_), "i"(b) \ : /* No clobbers */); \ result; \ }) -#define vqshrn_high_n_u16(a, b, c) \ +#define vrshrn_n_u64(a, b) \ __extension__ \ ({ \ - uint16x8_t b_ = (b); \ - uint8x8_t a_ = (a); \ - uint8x16_t result = vcombine_u8 \ - (a_, vcreate_u8 (UINT64_C (0x0))); \ - __asm__ ("uqshrn2 %0.16b, %1.8h, #%2" \ - : "+w"(result) \ - : "w"(b_), "i"(c) \ + uint64x2_t a_ = (a); \ + uint32x2_t result; \ + __asm__ ("rshrn %0.2s,%1.2d,%2" \ + : "=w"(result) \ + : "w"(a_), "i"(b) \ : /* No clobbers */); \ result; \ }) -#define vqshrn_high_n_u32(a, b, c) \ - __extension__ \ - ({ \ - uint32x4_t b_ = (b); \ - uint16x4_t a_ = (a); \ - uint16x8_t result = vcombine_u16 \ - (a_, vcreate_u16 (UINT64_C (0x0))); \ - __asm__ ("uqshrn2 %0.8h, %1.4s, #%2" \ - : "+w"(result) \ - : "w"(b_), "i"(c) \ - : /* No clobbers */); \ - result; \ - }) - -#define vqshrn_high_n_u64(a, b, c) \ - __extension__ \ - ({ \ - uint64x2_t b_ = (b); \ - uint32x2_t a_ = (a); \ - uint32x4_t result = vcombine_u32 \ - (a_, vcreate_u32 (UINT64_C (0x0))); \ - __asm__ ("uqshrn2 %0.4s, %1.2d, #%2" \ - : "+w"(result) \ - : "w"(b_), "i"(c) \ - : /* No clobbers */); \ - result; \ - }) - -#define vqshrun_high_n_s16(a, b, c) \ - __extension__ \ - ({ \ - int16x8_t b_ = (b); \ - uint8x8_t a_ = (a); \ - uint8x16_t result = vcombine_u8 \ - (a_, vcreate_u8 (UINT64_C (0x0))); \ - __asm__ ("sqshrun2 %0.16b, %1.8h, #%2" \ - : "+w"(result) \ - : "w"(b_), "i"(c) \ - : /* No clobbers */); \ - result; \ - }) - -#define vqshrun_high_n_s32(a, b, c) \ - __extension__ \ - ({ \ - int32x4_t b_ = (b); \ - uint16x4_t a_ = (a); \ - uint16x8_t result = vcombine_u16 \ - (a_, vcreate_u16 (UINT64_C (0x0))); \ - __asm__ ("sqshrun2 %0.8h, %1.4s, #%2" \ - : "+w"(result) \ - : "w"(b_), "i"(c) \ - : /* No clobbers */); \ - result; \ - }) - -#define vqshrun_high_n_s64(a, b, c) \ - __extension__ \ - ({ \ - int64x2_t b_ = (b); \ - uint32x2_t a_ = (a); \ - uint32x4_t result = vcombine_u32 \ - (a_, vcreate_u32 (UINT64_C (0x0))); \ - __asm__ ("sqshrun2 %0.4s, %1.2d, #%2" \ - : "+w"(result) \ - : "w"(b_), "i"(c) \ - : /* No clobbers */); \ - result; \ - }) - -__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) -vrbit_s8 (int8x8_t a) +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vrsqrte_f32 (float32x2_t a) { - int8x8_t result; - __asm__ ("rbit %0.8b,%1.8b" + float32x2_t result; + __asm__ ("frsqrte %0.2s,%1.2s" : "=w"(result) : "w"(a) : /* No clobbers */); return result; } -__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) -vrbit_u8 (uint8x8_t a) +__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) +vrsqrte_f64 (float64x2_t a) { - uint8x8_t result; - __asm__ ("rbit %0.8b,%1.8b" + float64x2_t result; + __asm__ ("frsqrte %0.2d,%1.2d" : "=w"(result) : "w"(a) : /* No clobbers */); return result; } -__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) -vrbitq_s8 (int8x16_t a) +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vrsqrte_u32 (uint32x2_t a) { - int8x16_t result; - __asm__ ("rbit %0.16b,%1.16b" + uint32x2_t result; + __asm__ ("ursqrte %0.2s,%1.2s" : "=w"(result) : "w"(a) : /* No clobbers */); return result; } -__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) -vrbitq_u8 (uint8x16_t a) +__extension__ static __inline float64_t __attribute__ ((__always_inline__)) +vrsqrted_f64 (float64_t a) { - uint8x16_t result; - __asm__ ("rbit %0.16b,%1.16b" + float64_t result; + __asm__ ("frsqrte %d0,%d1" : "=w"(result) : "w"(a) : /* No clobbers */); return result; } -__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) -vrecpe_u32 (uint32x2_t a) +__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +vrsqrteq_f32 (float32x4_t a) { - uint32x2_t result; - __asm__ ("urecpe %0.2s,%1.2s" + float32x4_t result; + __asm__ ("frsqrte %0.4s,%1.4s" : "=w"(result) : "w"(a) : /* No clobbers */); return result; } -__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -vrecpeq_u32 (uint32x4_t a) +__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) +vrsqrteq_f64 (float64x2_t a) { - uint32x4_t result; - __asm__ ("urecpe %0.4s,%1.4s" + float64x2_t result; + __asm__ ("frsqrte %0.2d,%1.2d" : "=w"(result) : "w"(a) : /* No clobbers */); return result; } -__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) -vrev16_p8 (poly8x8_t a) +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vrsqrteq_u32 (uint32x4_t a) { - poly8x8_t result; - __asm__ ("rev16 %0.8b,%1.8b" + uint32x4_t result; + __asm__ ("ursqrte %0.4s,%1.4s" : "=w"(result) : "w"(a) : /* No clobbers */); return result; } -__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) -vrev16_s8 (int8x8_t a) +__extension__ static __inline float32_t __attribute__ ((__always_inline__)) +vrsqrtes_f32 (float32_t a) { - int8x8_t result; - __asm__ ("rev16 %0.8b,%1.8b" + float32_t result; + __asm__ ("frsqrte %s0,%s1" : "=w"(result) : "w"(a) : /* No clobbers */); return result; } -__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) -vrev16_u8 (uint8x8_t a) +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vrsqrts_f32 (float32x2_t a, float32x2_t b) { - uint8x8_t result; - __asm__ ("rev16 %0.8b,%1.8b" + float32x2_t result; + __asm__ ("frsqrts %0.2s,%1.2s,%2.2s" : "=w"(result) - : "w"(a) + : "w"(a), "w"(b) : /* No clobbers */); return result; } -__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) -vrev16q_p8 (poly8x16_t a) +__extension__ static __inline float64_t __attribute__ ((__always_inline__)) +vrsqrtsd_f64 (float64_t a, float64_t b) { - poly8x16_t result; - __asm__ ("rev16 %0.16b,%1.16b" + float64_t result; + __asm__ ("frsqrts %d0,%d1,%d2" : "=w"(result) - : "w"(a) + : "w"(a), "w"(b) : /* No clobbers */); return result; } -__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) -vrev16q_s8 (int8x16_t a) +__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +vrsqrtsq_f32 (float32x4_t a, float32x4_t b) { - int8x16_t result; - __asm__ ("rev16 %0.16b,%1.16b" + float32x4_t result; + __asm__ ("frsqrts %0.4s,%1.4s,%2.4s" : "=w"(result) - : "w"(a) + : "w"(a), "w"(b) : /* No clobbers */); return result; } -__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) -vrev16q_u8 (uint8x16_t a) +__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) +vrsqrtsq_f64 (float64x2_t a, float64x2_t b) { - uint8x16_t result; - __asm__ ("rev16 %0.16b,%1.16b" + float64x2_t result; + __asm__ ("frsqrts %0.2d,%1.2d,%2.2d" : "=w"(result) - : "w"(a) + : "w"(a), "w"(b) : /* No clobbers */); return result; } -__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) -vrev32_p8 (poly8x8_t a) +__extension__ static __inline float32_t __attribute__ ((__always_inline__)) +vrsqrtss_f32 (float32_t a, float32_t b) { - poly8x8_t result; - __asm__ ("rev32 %0.8b,%1.8b" + float32_t result; + __asm__ ("frsqrts %s0,%s1,%s2" : "=w"(result) - : "w"(a) + : "w"(a), "w"(b) : /* No clobbers */); return result; } -__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) -vrev32_p16 (poly16x4_t a) +__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) +vrsrtsq_f64 (float64x2_t a, float64x2_t b) { - poly16x4_t result; - __asm__ ("rev32 %0.4h,%1.4h" + float64x2_t result; + __asm__ ("frsqrts %0.2d,%1.2d,%2.2d" : "=w"(result) - : "w"(a) + : "w"(a), "w"(b) : /* No clobbers */); return result; } -__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) -vrev32_s8 (int8x8_t a) +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vrsubhn_high_s16 (int8x8_t a, int16x8_t b, int16x8_t c) { - int8x8_t result; - __asm__ ("rev32 %0.8b,%1.8b" - : "=w"(result) - : "w"(a) + int8x16_t result = vcombine_s8 (a, vcreate_s8 (__AARCH64_UINT64_C (0x0))); + __asm__ ("rsubhn2 %0.16b, %1.8h, %2.8h" + : "+w"(result) + : "w"(b), "w"(c) : /* No clobbers */); return result; } -__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) -vrev32_s16 (int16x4_t a) +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vrsubhn_high_s32 (int16x4_t a, int32x4_t b, int32x4_t c) { - int16x4_t result; - __asm__ ("rev32 %0.4h,%1.4h" - : "=w"(result) - : "w"(a) + int16x8_t result = vcombine_s16 (a, vcreate_s16 (__AARCH64_UINT64_C (0x0))); + __asm__ ("rsubhn2 %0.8h, %1.4s, %2.4s" + : "+w"(result) + : "w"(b), "w"(c) : /* No clobbers */); return result; } -__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) -vrev32_u8 (uint8x8_t a) +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vrsubhn_high_s64 (int32x2_t a, int64x2_t b, int64x2_t c) { - uint8x8_t result; - __asm__ ("rev32 %0.8b,%1.8b" - : "=w"(result) - : "w"(a) + int32x4_t result = vcombine_s32 (a, vcreate_s32 (__AARCH64_UINT64_C (0x0))); + __asm__ ("rsubhn2 %0.4s, %1.2d, %2.2d" + : "+w"(result) + : "w"(b), "w"(c) : /* No clobbers */); return result; } -__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) -vrev32_u16 (uint16x4_t a) +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vrsubhn_high_u16 (uint8x8_t a, uint16x8_t b, uint16x8_t c) { - uint16x4_t result; - __asm__ ("rev32 %0.4h,%1.4h" - : "=w"(result) - : "w"(a) + uint8x16_t result = vcombine_u8 (a, vcreate_u8 (__AARCH64_UINT64_C (0x0))); + __asm__ ("rsubhn2 %0.16b, %1.8h, %2.8h" + : "+w"(result) + : "w"(b), "w"(c) : /* No clobbers */); return result; } -__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) -vrev32q_p8 (poly8x16_t a) +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vrsubhn_high_u32 (uint16x4_t a, uint32x4_t b, uint32x4_t c) { - poly8x16_t result; - __asm__ ("rev32 %0.16b,%1.16b" - : "=w"(result) - : "w"(a) + uint16x8_t result = vcombine_u16 (a, vcreate_u16 (__AARCH64_UINT64_C (0x0))); + __asm__ ("rsubhn2 %0.8h, %1.4s, %2.4s" + : "+w"(result) + : "w"(b), "w"(c) : /* No clobbers */); return result; } -__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) -vrev32q_p16 (poly16x8_t a) +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vrsubhn_high_u64 (uint32x2_t a, uint64x2_t b, uint64x2_t c) { - poly16x8_t result; - __asm__ ("rev32 %0.8h,%1.8h" - : "=w"(result) - : "w"(a) + uint32x4_t result = vcombine_u32 (a, vcreate_u32 (__AARCH64_UINT64_C (0x0))); + __asm__ ("rsubhn2 %0.4s, %1.2d, %2.2d" + : "+w"(result) + : "w"(b), "w"(c) : /* No clobbers */); return result; } -__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) -vrev32q_s8 (int8x16_t a) +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vrsubhn_s16 (int16x8_t a, int16x8_t b) { - int8x16_t result; - __asm__ ("rev32 %0.16b,%1.16b" + int8x8_t result; + __asm__ ("rsubhn %0.8b, %1.8h, %2.8h" : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) -vrev32q_s16 (int16x8_t a) -{ - int16x8_t result; - __asm__ ("rev32 %0.8h,%1.8h" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) -vrev32q_u8 (uint8x16_t a) -{ - uint8x16_t result; - __asm__ ("rev32 %0.16b,%1.16b" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) -vrev32q_u16 (uint16x8_t a) -{ - uint16x8_t result; - __asm__ ("rev32 %0.8h,%1.8h" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) -vrev64_f32 (float32x2_t a) -{ - float32x2_t result; - __asm__ ("rev64 %0.2s,%1.2s" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) -vrev64_p8 (poly8x8_t a) -{ - poly8x8_t result; - __asm__ ("rev64 %0.8b,%1.8b" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) -vrev64_p16 (poly16x4_t a) -{ - poly16x4_t result; - __asm__ ("rev64 %0.4h,%1.4h" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) -vrev64_s8 (int8x8_t a) -{ - int8x8_t result; - __asm__ ("rev64 %0.8b,%1.8b" - : "=w"(result) - : "w"(a) + : "w"(a), "w"(b) : /* No clobbers */); return result; } __extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) -vrev64_s16 (int16x4_t a) +vrsubhn_s32 (int32x4_t a, int32x4_t b) { int16x4_t result; - __asm__ ("rev64 %0.4h,%1.4h" + __asm__ ("rsubhn %0.4h, %1.4s, %2.4s" : "=w"(result) - : "w"(a) + : "w"(a), "w"(b) : /* No clobbers */); return result; } __extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) -vrev64_s32 (int32x2_t a) +vrsubhn_s64 (int64x2_t a, int64x2_t b) { int32x2_t result; - __asm__ ("rev64 %0.2s,%1.2s" + __asm__ ("rsubhn %0.2s, %1.2d, %2.2d" : "=w"(result) - : "w"(a) + : "w"(a), "w"(b) : /* No clobbers */); return result; } __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) -vrev64_u8 (uint8x8_t a) +vrsubhn_u16 (uint16x8_t a, uint16x8_t b) { uint8x8_t result; - __asm__ ("rev64 %0.8b,%1.8b" + __asm__ ("rsubhn %0.8b, %1.8h, %2.8h" : "=w"(result) - : "w"(a) + : "w"(a), "w"(b) : /* No clobbers */); return result; } __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) -vrev64_u16 (uint16x4_t a) +vrsubhn_u32 (uint32x4_t a, uint32x4_t b) { uint16x4_t result; - __asm__ ("rev64 %0.4h,%1.4h" + __asm__ ("rsubhn %0.4h, %1.4s, %2.4s" : "=w"(result) - : "w"(a) + : "w"(a), "w"(b) : /* No clobbers */); return result; } __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) -vrev64_u32 (uint32x2_t a) +vrsubhn_u64 (uint64x2_t a, uint64x2_t b) { uint32x2_t result; - __asm__ ("rev64 %0.2s,%1.2s" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) -vrev64q_f32 (float32x4_t a) -{ - float32x4_t result; - __asm__ ("rev64 %0.4s,%1.4s" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) -vrev64q_p8 (poly8x16_t a) -{ - poly8x16_t result; - __asm__ ("rev64 %0.16b,%1.16b" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) -vrev64q_p16 (poly16x8_t a) -{ - poly16x8_t result; - __asm__ ("rev64 %0.8h,%1.8h" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) -vrev64q_s8 (int8x16_t a) -{ - int8x16_t result; - __asm__ ("rev64 %0.16b,%1.16b" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) -vrev64q_s16 (int16x8_t a) -{ - int16x8_t result; - __asm__ ("rev64 %0.8h,%1.8h" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) -vrev64q_s32 (int32x4_t a) -{ - int32x4_t result; - __asm__ ("rev64 %0.4s,%1.4s" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) -vrev64q_u8 (uint8x16_t a) -{ - uint8x16_t result; - __asm__ ("rev64 %0.16b,%1.16b" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) -vrev64q_u16 (uint16x8_t a) -{ - uint16x8_t result; - __asm__ ("rev64 %0.8h,%1.8h" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -vrev64q_u32 (uint32x4_t a) -{ - uint32x4_t result; - __asm__ ("rev64 %0.4s,%1.4s" + __asm__ ("rsubhn %0.2s, %1.2d, %2.2d" : "=w"(result) - : "w"(a) + : "w"(a), "w"(b) : /* No clobbers */); return result; } -#define vrshrn_high_n_s16(a, b, c) \ +#define vset_lane_f32(a, b, c) \ __extension__ \ ({ \ - int16x8_t b_ = (b); \ - int8x8_t a_ = (a); \ - int8x16_t result = vcombine_s8 \ - (a_, vcreate_s8 (UINT64_C (0x0))); \ - __asm__ ("rshrn2 %0.16b,%1.8h,#%2" \ - : "+w"(result) \ - : "w"(b_), "i"(c) \ + float32x2_t b_ = (b); \ + float32_t a_ = (a); \ + float32x2_t result; \ + __asm__ ("ins %0.s[%3], %w1" \ + : "=w"(result) \ + : "r"(a_), "0"(b_), "i"(c) \ : /* No clobbers */); \ result; \ }) -#define vrshrn_high_n_s32(a, b, c) \ +#define vset_lane_f64(a, b, c) \ __extension__ \ ({ \ - int32x4_t b_ = (b); \ - int16x4_t a_ = (a); \ - int16x8_t result = vcombine_s16 \ - (a_, vcreate_s16 (UINT64_C (0x0))); \ - __asm__ ("rshrn2 %0.8h,%1.4s,#%2" \ - : "+w"(result) \ - : "w"(b_), "i"(c) \ + float64x1_t b_ = (b); \ + float64_t a_ = (a); \ + float64x1_t result; \ + __asm__ ("ins %0.d[%3], %x1" \ + : "=w"(result) \ + : "r"(a_), "0"(b_), "i"(c) \ : /* No clobbers */); \ result; \ }) -#define vrshrn_high_n_s64(a, b, c) \ +#define vset_lane_p8(a, b, c) \ __extension__ \ ({ \ - int64x2_t b_ = (b); \ - int32x2_t a_ = (a); \ - int32x4_t result = vcombine_s32 \ - (a_, vcreate_s32 (UINT64_C (0x0))); \ - __asm__ ("rshrn2 %0.4s,%1.2d,#%2" \ - : "+w"(result) \ - : "w"(b_), "i"(c) \ + poly8x8_t b_ = (b); \ + poly8_t a_ = (a); \ + poly8x8_t result; \ + __asm__ ("ins %0.b[%3], %w1" \ + : "=w"(result) \ + : "r"(a_), "0"(b_), "i"(c) \ : /* No clobbers */); \ result; \ }) -#define vrshrn_high_n_u16(a, b, c) \ +#define vset_lane_p16(a, b, c) \ __extension__ \ ({ \ - uint16x8_t b_ = (b); \ - uint8x8_t a_ = (a); \ - uint8x16_t result = vcombine_u8 \ - (a_, vcreate_u8 (UINT64_C (0x0))); \ - __asm__ ("rshrn2 %0.16b,%1.8h,#%2" \ - : "+w"(result) \ - : "w"(b_), "i"(c) \ + poly16x4_t b_ = (b); \ + poly16_t a_ = (a); \ + poly16x4_t result; \ + __asm__ ("ins %0.h[%3], %w1" \ + : "=w"(result) \ + : "r"(a_), "0"(b_), "i"(c) \ : /* No clobbers */); \ result; \ }) -#define vrshrn_high_n_u32(a, b, c) \ +#define vset_lane_s8(a, b, c) \ __extension__ \ ({ \ - uint32x4_t b_ = (b); \ - uint16x4_t a_ = (a); \ - uint16x8_t result = vcombine_u16 \ - (a_, vcreate_u16 (UINT64_C (0x0))); \ - __asm__ ("rshrn2 %0.8h,%1.4s,#%2" \ - : "+w"(result) \ - : "w"(b_), "i"(c) \ + int8x8_t b_ = (b); \ + int8_t a_ = (a); \ + int8x8_t result; \ + __asm__ ("ins %0.b[%3], %w1" \ + : "=w"(result) \ + : "r"(a_), "0"(b_), "i"(c) \ : /* No clobbers */); \ result; \ }) -#define vrshrn_high_n_u64(a, b, c) \ +#define vset_lane_s16(a, b, c) \ __extension__ \ ({ \ - uint64x2_t b_ = (b); \ - uint32x2_t a_ = (a); \ - uint32x4_t result = vcombine_u32 \ - (a_, vcreate_u32 (UINT64_C (0x0))); \ - __asm__ ("rshrn2 %0.4s,%1.2d,#%2" \ - : "+w"(result) \ - : "w"(b_), "i"(c) \ + int16x4_t b_ = (b); \ + int16_t a_ = (a); \ + int16x4_t result; \ + __asm__ ("ins %0.h[%3], %w1" \ + : "=w"(result) \ + : "r"(a_), "0"(b_), "i"(c) \ : /* No clobbers */); \ result; \ }) -#define vrshrn_n_s16(a, b) \ +#define vset_lane_s32(a, b, c) \ __extension__ \ ({ \ - int16x8_t a_ = (a); \ - int8x8_t result; \ - __asm__ ("rshrn %0.8b,%1.8h,%2" \ + int32x2_t b_ = (b); \ + int32_t a_ = (a); \ + int32x2_t result; \ + __asm__ ("ins %0.s[%3], %w1" \ : "=w"(result) \ - : "w"(a_), "i"(b) \ + : "r"(a_), "0"(b_), "i"(c) \ : /* No clobbers */); \ result; \ }) -#define vrshrn_n_s32(a, b) \ +#define vset_lane_s64(a, b, c) \ __extension__ \ ({ \ - int32x4_t a_ = (a); \ - int16x4_t result; \ - __asm__ ("rshrn %0.4h,%1.4s,%2" \ + int64x1_t b_ = (b); \ + int64_t a_ = (a); \ + int64x1_t result; \ + __asm__ ("ins %0.d[%3], %x1" \ : "=w"(result) \ - : "w"(a_), "i"(b) \ + : "r"(a_), "0"(b_), "i"(c) \ : /* No clobbers */); \ result; \ }) -#define vrshrn_n_s64(a, b) \ +#define vset_lane_u8(a, b, c) \ __extension__ \ ({ \ - int64x2_t a_ = (a); \ - int32x2_t result; \ - __asm__ ("rshrn %0.2s,%1.2d,%2" \ + uint8x8_t b_ = (b); \ + uint8_t a_ = (a); \ + uint8x8_t result; \ + __asm__ ("ins %0.b[%3], %w1" \ : "=w"(result) \ - : "w"(a_), "i"(b) \ + : "r"(a_), "0"(b_), "i"(c) \ : /* No clobbers */); \ result; \ }) -#define vrshrn_n_u16(a, b) \ +#define vset_lane_u16(a, b, c) \ __extension__ \ ({ \ - uint16x8_t a_ = (a); \ - uint8x8_t result; \ - __asm__ ("rshrn %0.8b,%1.8h,%2" \ + uint16x4_t b_ = (b); \ + uint16_t a_ = (a); \ + uint16x4_t result; \ + __asm__ ("ins %0.h[%3], %w1" \ : "=w"(result) \ - : "w"(a_), "i"(b) \ + : "r"(a_), "0"(b_), "i"(c) \ : /* No clobbers */); \ result; \ }) -#define vrshrn_n_u32(a, b) \ +#define vset_lane_u32(a, b, c) \ __extension__ \ ({ \ - uint32x4_t a_ = (a); \ - uint16x4_t result; \ - __asm__ ("rshrn %0.4h,%1.4s,%2" \ + uint32x2_t b_ = (b); \ + uint32_t a_ = (a); \ + uint32x2_t result; \ + __asm__ ("ins %0.s[%3], %w1" \ : "=w"(result) \ - : "w"(a_), "i"(b) \ + : "r"(a_), "0"(b_), "i"(c) \ : /* No clobbers */); \ result; \ }) -#define vrshrn_n_u64(a, b) \ +#define vset_lane_u64(a, b, c) \ __extension__ \ ({ \ - uint64x2_t a_ = (a); \ - uint32x2_t result; \ - __asm__ ("rshrn %0.2s,%1.2d,%2" \ + uint64x1_t b_ = (b); \ + uint64_t a_ = (a); \ + uint64x1_t result; \ + __asm__ ("ins %0.d[%3], %x1" \ : "=w"(result) \ - : "w"(a_), "i"(b) \ + : "r"(a_), "0"(b_), "i"(c) \ : /* No clobbers */); \ result; \ }) -__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) -vrsqrte_f32 (float32x2_t a) -{ - float32x2_t result; - __asm__ ("frsqrte %0.2s,%1.2s" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} +#define vsetq_lane_f32(a, b, c) \ + __extension__ \ + ({ \ + float32x4_t b_ = (b); \ + float32_t a_ = (a); \ + float32x4_t result; \ + __asm__ ("ins %0.s[%3], %w1" \ + : "=w"(result) \ + : "r"(a_), "0"(b_), "i"(c) \ + : /* No clobbers */); \ + result; \ + }) -__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) -vrsqrte_f64 (float64x2_t a) -{ - float64x2_t result; - __asm__ ("frsqrte %0.2d,%1.2d" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} +#define vsetq_lane_f64(a, b, c) \ + __extension__ \ + ({ \ + float64x2_t b_ = (b); \ + float64_t a_ = (a); \ + float64x2_t result; \ + __asm__ ("ins %0.d[%3], %x1" \ + : "=w"(result) \ + : "r"(a_), "0"(b_), "i"(c) \ + : /* No clobbers */); \ + result; \ + }) -__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) -vrsqrte_u32 (uint32x2_t a) -{ - uint32x2_t result; - __asm__ ("ursqrte %0.2s,%1.2s" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} +#define vsetq_lane_p8(a, b, c) \ + __extension__ \ + ({ \ + poly8x16_t b_ = (b); \ + poly8_t a_ = (a); \ + poly8x16_t result; \ + __asm__ ("ins %0.b[%3], %w1" \ + : "=w"(result) \ + : "r"(a_), "0"(b_), "i"(c) \ + : /* No clobbers */); \ + result; \ + }) -__extension__ static __inline float64_t __attribute__ ((__always_inline__)) -vrsqrted_f64 (float64_t a) -{ - float64_t result; - __asm__ ("frsqrte %d0,%d1" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} +#define vsetq_lane_p16(a, b, c) \ + __extension__ \ + ({ \ + poly16x8_t b_ = (b); \ + poly16_t a_ = (a); \ + poly16x8_t result; \ + __asm__ ("ins %0.h[%3], %w1" \ + : "=w"(result) \ + : "r"(a_), "0"(b_), "i"(c) \ + : /* No clobbers */); \ + result; \ + }) -__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) -vrsqrteq_f32 (float32x4_t a) -{ - float32x4_t result; - __asm__ ("frsqrte %0.4s,%1.4s" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} +#define vsetq_lane_s8(a, b, c) \ + __extension__ \ + ({ \ + int8x16_t b_ = (b); \ + int8_t a_ = (a); \ + int8x16_t result; \ + __asm__ ("ins %0.b[%3], %w1" \ + : "=w"(result) \ + : "r"(a_), "0"(b_), "i"(c) \ + : /* No clobbers */); \ + result; \ + }) -__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) -vrsqrteq_f64 (float64x2_t a) -{ - float64x2_t result; - __asm__ ("frsqrte %0.2d,%1.2d" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} +#define vsetq_lane_s16(a, b, c) \ + __extension__ \ + ({ \ + int16x8_t b_ = (b); \ + int16_t a_ = (a); \ + int16x8_t result; \ + __asm__ ("ins %0.h[%3], %w1" \ + : "=w"(result) \ + : "r"(a_), "0"(b_), "i"(c) \ + : /* No clobbers */); \ + result; \ + }) -__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -vrsqrteq_u32 (uint32x4_t a) -{ - uint32x4_t result; - __asm__ ("ursqrte %0.4s,%1.4s" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} +#define vsetq_lane_s32(a, b, c) \ + __extension__ \ + ({ \ + int32x4_t b_ = (b); \ + int32_t a_ = (a); \ + int32x4_t result; \ + __asm__ ("ins %0.s[%3], %w1" \ + : "=w"(result) \ + : "r"(a_), "0"(b_), "i"(c) \ + : /* No clobbers */); \ + result; \ + }) -__extension__ static __inline float32_t __attribute__ ((__always_inline__)) -vrsqrtes_f32 (float32_t a) -{ - float32_t result; - __asm__ ("frsqrte %s0,%s1" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) -vrsqrts_f32 (float32x2_t a, float32x2_t b) -{ - float32x2_t result; - __asm__ ("frsqrts %0.2s,%1.2s,%2.2s" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline float64_t __attribute__ ((__always_inline__)) -vrsqrtsd_f64 (float64_t a, float64_t b) -{ - float64_t result; - __asm__ ("frsqrts %d0,%d1,%d2" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) -vrsqrtsq_f32 (float32x4_t a, float32x4_t b) -{ - float32x4_t result; - __asm__ ("frsqrts %0.4s,%1.4s,%2.4s" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) -vrsqrtsq_f64 (float64x2_t a, float64x2_t b) -{ - float64x2_t result; - __asm__ ("frsqrts %0.2d,%1.2d,%2.2d" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline float32_t __attribute__ ((__always_inline__)) -vrsqrtss_f32 (float32_t a, float32_t b) -{ - float32_t result; - __asm__ ("frsqrts %s0,%s1,%s2" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) -vrsrtsq_f64 (float64x2_t a, float64x2_t b) -{ - float64x2_t result; - __asm__ ("frsqrts %0.2d,%1.2d,%2.2d" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) -vrsubhn_high_s16 (int8x8_t a, int16x8_t b, int16x8_t c) -{ - int8x16_t result = vcombine_s8 (a, vcreate_s8 (UINT64_C (0x0))); - __asm__ ("rsubhn2 %0.16b, %1.8h, %2.8h" - : "+w"(result) - : "w"(b), "w"(c) - : /* No clobbers */); - return result; -} - -__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) -vrsubhn_high_s32 (int16x4_t a, int32x4_t b, int32x4_t c) -{ - int16x8_t result = vcombine_s16 (a, vcreate_s16 (UINT64_C (0x0))); - __asm__ ("rsubhn2 %0.8h, %1.4s, %2.4s" - : "+w"(result) - : "w"(b), "w"(c) - : /* No clobbers */); - return result; -} - -__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) -vrsubhn_high_s64 (int32x2_t a, int64x2_t b, int64x2_t c) -{ - int32x4_t result = vcombine_s32 (a, vcreate_s32 (UINT64_C (0x0))); - __asm__ ("rsubhn2 %0.4s, %1.2d, %2.2d" - : "+w"(result) - : "w"(b), "w"(c) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) -vrsubhn_high_u16 (uint8x8_t a, uint16x8_t b, uint16x8_t c) -{ - uint8x16_t result = vcombine_u8 (a, vcreate_u8 (UINT64_C (0x0))); - __asm__ ("rsubhn2 %0.16b, %1.8h, %2.8h" - : "+w"(result) - : "w"(b), "w"(c) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) -vrsubhn_high_u32 (uint16x4_t a, uint32x4_t b, uint32x4_t c) -{ - uint16x8_t result = vcombine_u16 (a, vcreate_u16 (UINT64_C (0x0))); - __asm__ ("rsubhn2 %0.8h, %1.4s, %2.4s" - : "+w"(result) - : "w"(b), "w"(c) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -vrsubhn_high_u64 (uint32x2_t a, uint64x2_t b, uint64x2_t c) -{ - uint32x4_t result = vcombine_u32 (a, vcreate_u32 (UINT64_C (0x0))); - __asm__ ("rsubhn2 %0.4s, %1.2d, %2.2d" - : "+w"(result) - : "w"(b), "w"(c) - : /* No clobbers */); - return result; -} - -__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) -vrsubhn_s16 (int16x8_t a, int16x8_t b) -{ - int8x8_t result; - __asm__ ("rsubhn %0.8b, %1.8h, %2.8h" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) -vrsubhn_s32 (int32x4_t a, int32x4_t b) -{ - int16x4_t result; - __asm__ ("rsubhn %0.4h, %1.4s, %2.4s" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) -vrsubhn_s64 (int64x2_t a, int64x2_t b) -{ - int32x2_t result; - __asm__ ("rsubhn %0.2s, %1.2d, %2.2d" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) -vrsubhn_u16 (uint16x8_t a, uint16x8_t b) -{ - uint8x8_t result; - __asm__ ("rsubhn %0.8b, %1.8h, %2.8h" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) -vrsubhn_u32 (uint32x4_t a, uint32x4_t b) -{ - uint16x4_t result; - __asm__ ("rsubhn %0.4h, %1.4s, %2.4s" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) -vrsubhn_u64 (uint64x2_t a, uint64x2_t b) -{ - uint32x2_t result; - __asm__ ("rsubhn %0.2s, %1.2d, %2.2d" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -#define vset_lane_f32(a, b, c) \ +#define vsetq_lane_s64(a, b, c) \ __extension__ \ ({ \ - float32x2_t b_ = (b); \ - float32_t a_ = (a); \ - float32x2_t result; \ - __asm__ ("ins %0.s[%3], %w1" \ + int64x2_t b_ = (b); \ + int64_t a_ = (a); \ + int64x2_t result; \ + __asm__ ("ins %0.d[%3], %x1" \ : "=w"(result) \ : "r"(a_), "0"(b_), "i"(c) \ : /* No clobbers */); \ result; \ }) -#define vset_lane_f64(a, b, c) \ +#define vsetq_lane_u8(a, b, c) \ __extension__ \ ({ \ - float64x1_t b_ = (b); \ - float64_t a_ = (a); \ - float64x1_t result; \ - __asm__ ("ins %0.d[%3], %x1" \ + uint8x16_t b_ = (b); \ + uint8_t a_ = (a); \ + uint8x16_t result; \ + __asm__ ("ins %0.b[%3], %w1" \ : "=w"(result) \ : "r"(a_), "0"(b_), "i"(c) \ : /* No clobbers */); \ result; \ }) -#define vset_lane_p8(a, b, c) \ +#define vsetq_lane_u16(a, b, c) \ __extension__ \ ({ \ - poly8x8_t b_ = (b); \ - poly8_t a_ = (a); \ - poly8x8_t result; \ - __asm__ ("ins %0.b[%3], %w1" \ + uint16x8_t b_ = (b); \ + uint16_t a_ = (a); \ + uint16x8_t result; \ + __asm__ ("ins %0.h[%3], %w1" \ : "=w"(result) \ : "r"(a_), "0"(b_), "i"(c) \ : /* No clobbers */); \ result; \ }) -#define vset_lane_p16(a, b, c) \ +#define vsetq_lane_u32(a, b, c) \ __extension__ \ ({ \ - poly16x4_t b_ = (b); \ - poly16_t a_ = (a); \ - poly16x4_t result; \ - __asm__ ("ins %0.h[%3], %w1" \ + uint32x4_t b_ = (b); \ + uint32_t a_ = (a); \ + uint32x4_t result; \ + __asm__ ("ins %0.s[%3], %w1" \ : "=w"(result) \ : "r"(a_), "0"(b_), "i"(c) \ : /* No clobbers */); \ result; \ }) -#define vset_lane_s8(a, b, c) \ +#define vsetq_lane_u64(a, b, c) \ __extension__ \ ({ \ - int8x8_t b_ = (b); \ - int8_t a_ = (a); \ - int8x8_t result; \ - __asm__ ("ins %0.b[%3], %w1" \ + uint64x2_t b_ = (b); \ + uint64_t a_ = (a); \ + uint64x2_t result; \ + __asm__ ("ins %0.d[%3], %x1" \ : "=w"(result) \ : "r"(a_), "0"(b_), "i"(c) \ : /* No clobbers */); \ result; \ }) -#define vset_lane_s16(a, b, c) \ - __extension__ \ - ({ \ - int16x4_t b_ = (b); \ - int16_t a_ = (a); \ - int16x4_t result; \ - __asm__ ("ins %0.h[%3], %w1" \ - : "=w"(result) \ - : "r"(a_), "0"(b_), "i"(c) \ - : /* No clobbers */); \ - result; \ - }) - -#define vset_lane_s32(a, b, c) \ - __extension__ \ - ({ \ - int32x2_t b_ = (b); \ - int32_t a_ = (a); \ - int32x2_t result; \ - __asm__ ("ins %0.s[%3], %w1" \ - : "=w"(result) \ - : "r"(a_), "0"(b_), "i"(c) \ - : /* No clobbers */); \ - result; \ - }) - -#define vset_lane_s64(a, b, c) \ - __extension__ \ - ({ \ - int64x1_t b_ = (b); \ - int64_t a_ = (a); \ - int64x1_t result; \ - __asm__ ("ins %0.d[%3], %x1" \ - : "=w"(result) \ - : "r"(a_), "0"(b_), "i"(c) \ - : /* No clobbers */); \ - result; \ - }) - -#define vset_lane_u8(a, b, c) \ - __extension__ \ - ({ \ - uint8x8_t b_ = (b); \ - uint8_t a_ = (a); \ - uint8x8_t result; \ - __asm__ ("ins %0.b[%3], %w1" \ - : "=w"(result) \ - : "r"(a_), "0"(b_), "i"(c) \ - : /* No clobbers */); \ - result; \ - }) - -#define vset_lane_u16(a, b, c) \ - __extension__ \ - ({ \ - uint16x4_t b_ = (b); \ - uint16_t a_ = (a); \ - uint16x4_t result; \ - __asm__ ("ins %0.h[%3], %w1" \ - : "=w"(result) \ - : "r"(a_), "0"(b_), "i"(c) \ - : /* No clobbers */); \ - result; \ - }) - -#define vset_lane_u32(a, b, c) \ - __extension__ \ - ({ \ - uint32x2_t b_ = (b); \ - uint32_t a_ = (a); \ - uint32x2_t result; \ - __asm__ ("ins %0.s[%3], %w1" \ - : "=w"(result) \ - : "r"(a_), "0"(b_), "i"(c) \ - : /* No clobbers */); \ - result; \ - }) - -#define vset_lane_u64(a, b, c) \ - __extension__ \ - ({ \ - uint64x1_t b_ = (b); \ - uint64_t a_ = (a); \ - uint64x1_t result; \ - __asm__ ("ins %0.d[%3], %x1" \ - : "=w"(result) \ - : "r"(a_), "0"(b_), "i"(c) \ - : /* No clobbers */); \ - result; \ - }) - -#define vsetq_lane_f32(a, b, c) \ - __extension__ \ - ({ \ - float32x4_t b_ = (b); \ - float32_t a_ = (a); \ - float32x4_t result; \ - __asm__ ("ins %0.s[%3], %w1" \ - : "=w"(result) \ - : "r"(a_), "0"(b_), "i"(c) \ - : /* No clobbers */); \ - result; \ - }) - -#define vsetq_lane_f64(a, b, c) \ - __extension__ \ - ({ \ - float64x2_t b_ = (b); \ - float64_t a_ = (a); \ - float64x2_t result; \ - __asm__ ("ins %0.d[%3], %x1" \ - : "=w"(result) \ - : "r"(a_), "0"(b_), "i"(c) \ - : /* No clobbers */); \ - result; \ - }) - -#define vsetq_lane_p8(a, b, c) \ - __extension__ \ - ({ \ - poly8x16_t b_ = (b); \ - poly8_t a_ = (a); \ - poly8x16_t result; \ - __asm__ ("ins %0.b[%3], %w1" \ - : "=w"(result) \ - : "r"(a_), "0"(b_), "i"(c) \ - : /* No clobbers */); \ - result; \ - }) - -#define vsetq_lane_p16(a, b, c) \ - __extension__ \ - ({ \ - poly16x8_t b_ = (b); \ - poly16_t a_ = (a); \ - poly16x8_t result; \ - __asm__ ("ins %0.h[%3], %w1" \ - : "=w"(result) \ - : "r"(a_), "0"(b_), "i"(c) \ - : /* No clobbers */); \ - result; \ - }) - -#define vsetq_lane_s8(a, b, c) \ - __extension__ \ - ({ \ - int8x16_t b_ = (b); \ - int8_t a_ = (a); \ - int8x16_t result; \ - __asm__ ("ins %0.b[%3], %w1" \ - : "=w"(result) \ - : "r"(a_), "0"(b_), "i"(c) \ - : /* No clobbers */); \ - result; \ - }) - -#define vsetq_lane_s16(a, b, c) \ - __extension__ \ - ({ \ - int16x8_t b_ = (b); \ - int16_t a_ = (a); \ - int16x8_t result; \ - __asm__ ("ins %0.h[%3], %w1" \ - : "=w"(result) \ - : "r"(a_), "0"(b_), "i"(c) \ - : /* No clobbers */); \ - result; \ - }) - -#define vsetq_lane_s32(a, b, c) \ - __extension__ \ - ({ \ - int32x4_t b_ = (b); \ - int32_t a_ = (a); \ - int32x4_t result; \ - __asm__ ("ins %0.s[%3], %w1" \ - : "=w"(result) \ - : "r"(a_), "0"(b_), "i"(c) \ - : /* No clobbers */); \ - result; \ - }) - -#define vsetq_lane_s64(a, b, c) \ - __extension__ \ - ({ \ - int64x2_t b_ = (b); \ - int64_t a_ = (a); \ - int64x2_t result; \ - __asm__ ("ins %0.d[%3], %x1" \ - : "=w"(result) \ - : "r"(a_), "0"(b_), "i"(c) \ - : /* No clobbers */); \ - result; \ - }) - -#define vsetq_lane_u8(a, b, c) \ - __extension__ \ - ({ \ - uint8x16_t b_ = (b); \ - uint8_t a_ = (a); \ - uint8x16_t result; \ - __asm__ ("ins %0.b[%3], %w1" \ - : "=w"(result) \ - : "r"(a_), "0"(b_), "i"(c) \ - : /* No clobbers */); \ - result; \ - }) - -#define vsetq_lane_u16(a, b, c) \ - __extension__ \ - ({ \ - uint16x8_t b_ = (b); \ - uint16_t a_ = (a); \ - uint16x8_t result; \ - __asm__ ("ins %0.h[%3], %w1" \ - : "=w"(result) \ - : "r"(a_), "0"(b_), "i"(c) \ - : /* No clobbers */); \ - result; \ - }) - -#define vsetq_lane_u32(a, b, c) \ - __extension__ \ - ({ \ - uint32x4_t b_ = (b); \ - uint32_t a_ = (a); \ - uint32x4_t result; \ - __asm__ ("ins %0.s[%3], %w1" \ - : "=w"(result) \ - : "r"(a_), "0"(b_), "i"(c) \ - : /* No clobbers */); \ - result; \ - }) - -#define vsetq_lane_u64(a, b, c) \ - __extension__ \ - ({ \ - uint64x2_t b_ = (b); \ - uint64_t a_ = (a); \ - uint64x2_t result; \ - __asm__ ("ins %0.d[%3], %x1" \ - : "=w"(result) \ - : "r"(a_), "0"(b_), "i"(c) \ - : /* No clobbers */); \ - result; \ - }) - -#define vshrn_high_n_s16(a, b, c) \ +#define vshrn_high_n_s16(a, b, c) \ __extension__ \ ({ \ int16x8_t b_ = (b); \ int8x8_t a_ = (a); \ int8x16_t result = vcombine_s8 \ - (a_, vcreate_s8 (UINT64_C (0x0))); \ + (a_, vcreate_s8 \ + (__AARCH64_UINT64_C (0x0))); \ __asm__ ("shrn2 %0.16b,%1.8h,#%2" \ : "+w"(result) \ : "w"(b_), "i"(c) \ @@ -13801,7 +13372,8 @@ vrsubhn_u64 (uint64x2_t a, uint64x2_t b) int32x4_t b_ = (b); \ int16x4_t a_ = (a); \ int16x8_t result = vcombine_s16 \ - (a_, vcreate_s16 (UINT64_C (0x0))); \ + (a_, vcreate_s16 \ + (__AARCH64_UINT64_C (0x0))); \ __asm__ ("shrn2 %0.8h,%1.4s,#%2" \ : "+w"(result) \ : "w"(b_), "i"(c) \ @@ -13815,7 +13387,8 @@ vrsubhn_u64 (uint64x2_t a, uint64x2_t b) int64x2_t b_ = (b); \ int32x2_t a_ = (a); \ int32x4_t result = vcombine_s32 \ - (a_, vcreate_s32 (UINT64_C (0x0))); \ + (a_, vcreate_s32 \ + (__AARCH64_UINT64_C (0x0))); \ __asm__ ("shrn2 %0.4s,%1.2d,#%2" \ : "+w"(result) \ : "w"(b_), "i"(c) \ @@ -13829,7 +13402,8 @@ vrsubhn_u64 (uint64x2_t a, uint64x2_t b) uint16x8_t b_ = (b); \ uint8x8_t a_ = (a); \ uint8x16_t result = vcombine_u8 \ - (a_, vcreate_u8 (UINT64_C (0x0))); \ + (a_, vcreate_u8 \ + (__AARCH64_UINT64_C (0x0))); \ __asm__ ("shrn2 %0.16b,%1.8h,#%2" \ : "+w"(result) \ : "w"(b_), "i"(c) \ @@ -13843,7 +13417,8 @@ vrsubhn_u64 (uint64x2_t a, uint64x2_t b) uint32x4_t b_ = (b); \ uint16x4_t a_ = (a); \ uint16x8_t result = vcombine_u16 \ - (a_, vcreate_u16 (UINT64_C (0x0))); \ + (a_, vcreate_u16 \ + (__AARCH64_UINT64_C (0x0))); \ __asm__ ("shrn2 %0.8h,%1.4s,#%2" \ : "+w"(result) \ : "w"(b_), "i"(c) \ @@ -13857,7 +13432,8 @@ vrsubhn_u64 (uint64x2_t a, uint64x2_t b) uint64x2_t b_ = (b); \ uint32x2_t a_ = (a); \ uint32x4_t result = vcombine_u32 \ - (a_, vcreate_u32 (UINT64_C (0x0))); \ + (a_, vcreate_u32 \ + (__AARCH64_UINT64_C (0x0))); \ __asm__ ("shrn2 %0.4s,%1.2d,#%2" \ : "+w"(result) \ : "w"(b_), "i"(c) \ @@ -14309,7 +13885,7 @@ vrsubhn_u64 (uint64x2_t a, uint64x2_t b) __extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) vsubhn_high_s16 (int8x8_t a, int16x8_t b, int16x8_t c) { - int8x16_t result = vcombine_s8 (a, vcreate_s8 (UINT64_C (0x0))); + int8x16_t result = vcombine_s8 (a, vcreate_s8 (__AARCH64_UINT64_C (0x0))); __asm__ ("subhn2 %0.16b, %1.8h, %2.8h" : "+w"(result) : "w"(b), "w"(c) @@ -14320,7 +13896,7 @@ vsubhn_high_s16 (int8x8_t a, int16x8_t b, int16x8_t c) __extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) vsubhn_high_s32 (int16x4_t a, int32x4_t b, int32x4_t c) { - int16x8_t result = vcombine_s16 (a, vcreate_s16 (UINT64_C (0x0))); + int16x8_t result = vcombine_s16 (a, vcreate_s16 (__AARCH64_UINT64_C (0x0))); __asm__ ("subhn2 %0.8h, %1.4s, %2.4s" : "+w"(result) : "w"(b), "w"(c) @@ -14331,7 +13907,7 @@ vsubhn_high_s32 (int16x4_t a, int32x4_t b, int32x4_t c) __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) vsubhn_high_s64 (int32x2_t a, int64x2_t b, int64x2_t c) { - int32x4_t result = vcombine_s32 (a, vcreate_s32 (UINT64_C (0x0))); + int32x4_t result = vcombine_s32 (a, vcreate_s32 (__AARCH64_UINT64_C (0x0))); __asm__ ("subhn2 %0.4s, %1.2d, %2.2d" : "+w"(result) : "w"(b), "w"(c) @@ -14342,7 +13918,7 @@ vsubhn_high_s64 (int32x2_t a, int64x2_t b, int64x2_t c) __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) vsubhn_high_u16 (uint8x8_t a, uint16x8_t b, uint16x8_t c) { - uint8x16_t result = vcombine_u8 (a, vcreate_u8 (UINT64_C (0x0))); + uint8x16_t result = vcombine_u8 (a, vcreate_u8 (__AARCH64_UINT64_C (0x0))); __asm__ ("subhn2 %0.16b, %1.8h, %2.8h" : "+w"(result) : "w"(b), "w"(c) @@ -14353,7 +13929,7 @@ vsubhn_high_u16 (uint8x8_t a, uint16x8_t b, uint16x8_t c) __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) vsubhn_high_u32 (uint16x4_t a, uint32x4_t b, uint32x4_t c) { - uint16x8_t result = vcombine_u16 (a, vcreate_u16 (UINT64_C (0x0))); + uint16x8_t result = vcombine_u16 (a, vcreate_u16 (__AARCH64_UINT64_C (0x0))); __asm__ ("subhn2 %0.8h, %1.4s, %2.4s" : "+w"(result) : "w"(b), "w"(c) @@ -14364,7 +13940,7 @@ vsubhn_high_u32 (uint16x4_t a, uint32x4_t b, uint32x4_t c) __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) vsubhn_high_u64 (uint32x2_t a, uint64x2_t b, uint64x2_t c) { - uint32x4_t result = vcombine_u32 (a, vcreate_u32 (UINT64_C (0x0))); + uint32x4_t result = vcombine_u32 (a, vcreate_u32 (__AARCH64_UINT64_C (0x0))); __asm__ ("subhn2 %0.4s, %1.2d, %2.2d" : "+w"(result) : "w"(b), "w"(c) @@ -16154,3627 +15730,4174 @@ __LD4R_FUNC (uint16x8x4_t, uint16x4_t, uint16_t, 8h, u16, q) __LD4R_FUNC (uint32x4x4_t, uint32x4_t, uint32_t, 4s, u32, q) __LD4R_FUNC (uint64x2x4_t, uint64x4_t, uint64_t, 2d, u64, q) -#define __LD4_LANE_FUNC(rettype, ptrtype, regsuffix, \ - lnsuffix, funcsuffix, Q) \ - __extension__ static __inline rettype \ - __attribute__ ((__always_inline__)) \ - vld4 ## Q ## _lane_ ## funcsuffix (const ptrtype *ptr, \ - rettype b, const int c) \ - { \ - rettype result; \ - __asm__ ("ld1 {v16." #regsuffix " - v19." #regsuffix "}, %1\n\t" \ - "ld4 {v16." #lnsuffix " - v19." #lnsuffix "}[%3], %2\n\t" \ - "st1 {v16." #regsuffix " - v19." #regsuffix "}, %0\n\t" \ - : "=Q"(result) \ - : "Q"(b), "Q"(*(const rettype *)ptr), "i"(c) \ - : "memory", "v16", "v17", "v18", "v19"); \ - return result; \ - } +#define __LD4_LANE_FUNC(rettype, ptrtype, regsuffix, \ + lnsuffix, funcsuffix, Q) \ + __extension__ static __inline rettype \ + __attribute__ ((__always_inline__)) \ + vld4 ## Q ## _lane_ ## funcsuffix (const ptrtype *ptr, \ + rettype b, const int c) \ + { \ + rettype result; \ + __asm__ ("ld1 {v16." #regsuffix " - v19." #regsuffix "}, %1\n\t" \ + "ld4 {v16." #lnsuffix " - v19." #lnsuffix "}[%3], %2\n\t" \ + "st1 {v16." #regsuffix " - v19." #regsuffix "}, %0\n\t" \ + : "=Q"(result) \ + : "Q"(b), "Q"(*(const rettype *)ptr), "i"(c) \ + : "memory", "v16", "v17", "v18", "v19"); \ + return result; \ + } + +__LD4_LANE_FUNC (int8x8x4_t, uint8_t, 8b, b, s8,) +__LD4_LANE_FUNC (float32x2x4_t, float32_t, 2s, s, f32,) +__LD4_LANE_FUNC (float64x1x4_t, float64_t, 1d, d, f64,) +__LD4_LANE_FUNC (poly8x8x4_t, poly8_t, 8b, b, p8,) +__LD4_LANE_FUNC (poly16x4x4_t, poly16_t, 4h, h, p16,) +__LD4_LANE_FUNC (int16x4x4_t, int16_t, 4h, h, s16,) +__LD4_LANE_FUNC (int32x2x4_t, int32_t, 2s, s, s32,) +__LD4_LANE_FUNC (int64x1x4_t, int64_t, 1d, d, s64,) +__LD4_LANE_FUNC (uint8x8x4_t, uint8_t, 8b, b, u8,) +__LD4_LANE_FUNC (uint16x4x4_t, uint16_t, 4h, h, u16,) +__LD4_LANE_FUNC (uint32x2x4_t, uint32_t, 2s, s, u32,) +__LD4_LANE_FUNC (uint64x1x4_t, uint64_t, 1d, d, u64,) +__LD4_LANE_FUNC (float32x4x4_t, float32_t, 4s, s, f32, q) +__LD4_LANE_FUNC (float64x2x4_t, float64_t, 2d, d, f64, q) +__LD4_LANE_FUNC (poly8x16x4_t, poly8_t, 16b, b, p8, q) +__LD4_LANE_FUNC (poly16x8x4_t, poly16_t, 8h, h, p16, q) +__LD4_LANE_FUNC (int8x16x4_t, int8_t, 16b, b, s8, q) +__LD4_LANE_FUNC (int16x8x4_t, int16_t, 8h, h, s16, q) +__LD4_LANE_FUNC (int32x4x4_t, int32_t, 4s, s, s32, q) +__LD4_LANE_FUNC (int64x2x4_t, int64_t, 2d, d, s64, q) +__LD4_LANE_FUNC (uint8x16x4_t, uint8_t, 16b, b, u8, q) +__LD4_LANE_FUNC (uint16x8x4_t, uint16_t, 8h, h, u16, q) +__LD4_LANE_FUNC (uint32x4x4_t, uint32_t, 4s, s, u32, q) +__LD4_LANE_FUNC (uint64x2x4_t, uint64_t, 2d, d, u64, q) + +#define __ST2_LANE_FUNC(intype, ptrtype, regsuffix, \ + lnsuffix, funcsuffix, Q) \ + __extension__ static __inline void \ + __attribute__ ((__always_inline__)) \ + vst2 ## Q ## _lane_ ## funcsuffix (const ptrtype *ptr, \ + intype b, const int c) \ + { \ + __asm__ ("ld1 {v16." #regsuffix ", v17." #regsuffix "}, %1\n\t" \ + "st2 {v16." #lnsuffix ", v17." #lnsuffix "}[%2], %0\n\t" \ + : "=Q"(*(intype *) ptr) \ + : "Q"(b), "i"(c) \ + : "memory", "v16", "v17"); \ + } + +__ST2_LANE_FUNC (int8x8x2_t, int8_t, 8b, b, s8,) +__ST2_LANE_FUNC (float32x2x2_t, float32_t, 2s, s, f32,) +__ST2_LANE_FUNC (float64x1x2_t, float64_t, 1d, d, f64,) +__ST2_LANE_FUNC (poly8x8x2_t, poly8_t, 8b, b, p8,) +__ST2_LANE_FUNC (poly16x4x2_t, poly16_t, 4h, h, p16,) +__ST2_LANE_FUNC (int16x4x2_t, int16_t, 4h, h, s16,) +__ST2_LANE_FUNC (int32x2x2_t, int32_t, 2s, s, s32,) +__ST2_LANE_FUNC (int64x1x2_t, int64_t, 1d, d, s64,) +__ST2_LANE_FUNC (uint8x8x2_t, uint8_t, 8b, b, u8,) +__ST2_LANE_FUNC (uint16x4x2_t, uint16_t, 4h, h, u16,) +__ST2_LANE_FUNC (uint32x2x2_t, uint32_t, 2s, s, u32,) +__ST2_LANE_FUNC (uint64x1x2_t, uint64_t, 1d, d, u64,) +__ST2_LANE_FUNC (float32x4x2_t, float32_t, 4s, s, f32, q) +__ST2_LANE_FUNC (float64x2x2_t, float64_t, 2d, d, f64, q) +__ST2_LANE_FUNC (poly8x16x2_t, poly8_t, 16b, b, p8, q) +__ST2_LANE_FUNC (poly16x8x2_t, poly16_t, 8h, h, p16, q) +__ST2_LANE_FUNC (int8x16x2_t, int8_t, 16b, b, s8, q) +__ST2_LANE_FUNC (int16x8x2_t, int16_t, 8h, h, s16, q) +__ST2_LANE_FUNC (int32x4x2_t, int32_t, 4s, s, s32, q) +__ST2_LANE_FUNC (int64x2x2_t, int64_t, 2d, d, s64, q) +__ST2_LANE_FUNC (uint8x16x2_t, uint8_t, 16b, b, u8, q) +__ST2_LANE_FUNC (uint16x8x2_t, uint16_t, 8h, h, u16, q) +__ST2_LANE_FUNC (uint32x4x2_t, uint32_t, 4s, s, u32, q) +__ST2_LANE_FUNC (uint64x2x2_t, uint64_t, 2d, d, u64, q) + +#define __ST3_LANE_FUNC(intype, ptrtype, regsuffix, \ + lnsuffix, funcsuffix, Q) \ + __extension__ static __inline void \ + __attribute__ ((__always_inline__)) \ + vst3 ## Q ## _lane_ ## funcsuffix (const ptrtype *ptr, \ + intype b, const int c) \ + { \ + __asm__ ("ld1 {v16." #regsuffix " - v18." #regsuffix "}, %1\n\t" \ + "st3 {v16." #lnsuffix " - v18." #lnsuffix "}[%2], %0\n\t" \ + : "=Q"(*(intype *) ptr) \ + : "Q"(b), "i"(c) \ + : "memory", "v16", "v17", "v18"); \ + } + +__ST3_LANE_FUNC (int8x8x3_t, int8_t, 8b, b, s8,) +__ST3_LANE_FUNC (float32x2x3_t, float32_t, 2s, s, f32,) +__ST3_LANE_FUNC (float64x1x3_t, float64_t, 1d, d, f64,) +__ST3_LANE_FUNC (poly8x8x3_t, poly8_t, 8b, b, p8,) +__ST3_LANE_FUNC (poly16x4x3_t, poly16_t, 4h, h, p16,) +__ST3_LANE_FUNC (int16x4x3_t, int16_t, 4h, h, s16,) +__ST3_LANE_FUNC (int32x2x3_t, int32_t, 2s, s, s32,) +__ST3_LANE_FUNC (int64x1x3_t, int64_t, 1d, d, s64,) +__ST3_LANE_FUNC (uint8x8x3_t, uint8_t, 8b, b, u8,) +__ST3_LANE_FUNC (uint16x4x3_t, uint16_t, 4h, h, u16,) +__ST3_LANE_FUNC (uint32x2x3_t, uint32_t, 2s, s, u32,) +__ST3_LANE_FUNC (uint64x1x3_t, uint64_t, 1d, d, u64,) +__ST3_LANE_FUNC (float32x4x3_t, float32_t, 4s, s, f32, q) +__ST3_LANE_FUNC (float64x2x3_t, float64_t, 2d, d, f64, q) +__ST3_LANE_FUNC (poly8x16x3_t, poly8_t, 16b, b, p8, q) +__ST3_LANE_FUNC (poly16x8x3_t, poly16_t, 8h, h, p16, q) +__ST3_LANE_FUNC (int8x16x3_t, int8_t, 16b, b, s8, q) +__ST3_LANE_FUNC (int16x8x3_t, int16_t, 8h, h, s16, q) +__ST3_LANE_FUNC (int32x4x3_t, int32_t, 4s, s, s32, q) +__ST3_LANE_FUNC (int64x2x3_t, int64_t, 2d, d, s64, q) +__ST3_LANE_FUNC (uint8x16x3_t, uint8_t, 16b, b, u8, q) +__ST3_LANE_FUNC (uint16x8x3_t, uint16_t, 8h, h, u16, q) +__ST3_LANE_FUNC (uint32x4x3_t, uint32_t, 4s, s, u32, q) +__ST3_LANE_FUNC (uint64x2x3_t, uint64_t, 2d, d, u64, q) + +#define __ST4_LANE_FUNC(intype, ptrtype, regsuffix, \ + lnsuffix, funcsuffix, Q) \ + __extension__ static __inline void \ + __attribute__ ((__always_inline__)) \ + vst4 ## Q ## _lane_ ## funcsuffix (const ptrtype *ptr, \ + intype b, const int c) \ + { \ + __asm__ ("ld1 {v16." #regsuffix " - v19." #regsuffix "}, %1\n\t" \ + "st4 {v16." #lnsuffix " - v19." #lnsuffix "}[%2], %0\n\t" \ + : "=Q"(*(intype *) ptr) \ + : "Q"(b), "i"(c) \ + : "memory", "v16", "v17", "v18", "v19"); \ + } + +__ST4_LANE_FUNC (int8x8x4_t, int8_t, 8b, b, s8,) +__ST4_LANE_FUNC (float32x2x4_t, float32_t, 2s, s, f32,) +__ST4_LANE_FUNC (float64x1x4_t, float64_t, 1d, d, f64,) +__ST4_LANE_FUNC (poly8x8x4_t, poly8_t, 8b, b, p8,) +__ST4_LANE_FUNC (poly16x4x4_t, poly16_t, 4h, h, p16,) +__ST4_LANE_FUNC (int16x4x4_t, int16_t, 4h, h, s16,) +__ST4_LANE_FUNC (int32x2x4_t, int32_t, 2s, s, s32,) +__ST4_LANE_FUNC (int64x1x4_t, int64_t, 1d, d, s64,) +__ST4_LANE_FUNC (uint8x8x4_t, uint8_t, 8b, b, u8,) +__ST4_LANE_FUNC (uint16x4x4_t, uint16_t, 4h, h, u16,) +__ST4_LANE_FUNC (uint32x2x4_t, uint32_t, 2s, s, u32,) +__ST4_LANE_FUNC (uint64x1x4_t, uint64_t, 1d, d, u64,) +__ST4_LANE_FUNC (float32x4x4_t, float32_t, 4s, s, f32, q) +__ST4_LANE_FUNC (float64x2x4_t, float64_t, 2d, d, f64, q) +__ST4_LANE_FUNC (poly8x16x4_t, poly8_t, 16b, b, p8, q) +__ST4_LANE_FUNC (poly16x8x4_t, poly16_t, 8h, h, p16, q) +__ST4_LANE_FUNC (int8x16x4_t, int8_t, 16b, b, s8, q) +__ST4_LANE_FUNC (int16x8x4_t, int16_t, 8h, h, s16, q) +__ST4_LANE_FUNC (int32x4x4_t, int32_t, 4s, s, s32, q) +__ST4_LANE_FUNC (int64x2x4_t, int64_t, 2d, d, s64, q) +__ST4_LANE_FUNC (uint8x16x4_t, uint8_t, 16b, b, u8, q) +__ST4_LANE_FUNC (uint16x8x4_t, uint16_t, 8h, h, u16, q) +__ST4_LANE_FUNC (uint32x4x4_t, uint32_t, 4s, s, u32, q) +__ST4_LANE_FUNC (uint64x2x4_t, uint64_t, 2d, d, u64, q) + +__extension__ static __inline int64_t __attribute__ ((__always_inline__)) +vaddlv_s32 (int32x2_t a) +{ + int64_t result; + __asm__ ("saddlp %0.1d, %1.2s" : "=w"(result) : "w"(a) : ); + return result; +} + +__extension__ static __inline uint64_t __attribute__ ((__always_inline__)) +vaddlv_u32 (uint32x2_t a) +{ + uint64_t result; + __asm__ ("uaddlp %0.1d, %1.2s" : "=w"(result) : "w"(a) : ); + return result; +} + +__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) +vpaddd_s64 (int64x2_t __a) +{ + return __builtin_aarch64_addpdi (__a); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vqdmulh_laneq_s16 (int16x4_t __a, int16x8_t __b, const int __c) +{ + return __builtin_aarch64_sqdmulh_laneqv4hi (__a, __b, __c); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vqdmulh_laneq_s32 (int32x2_t __a, int32x4_t __b, const int __c) +{ + return __builtin_aarch64_sqdmulh_laneqv2si (__a, __b, __c); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vqdmulhq_laneq_s16 (int16x8_t __a, int16x8_t __b, const int __c) +{ + return __builtin_aarch64_sqdmulh_laneqv8hi (__a, __b, __c); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vqdmulhq_laneq_s32 (int32x4_t __a, int32x4_t __b, const int __c) +{ + return __builtin_aarch64_sqdmulh_laneqv4si (__a, __b, __c); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vqrdmulh_laneq_s16 (int16x4_t __a, int16x8_t __b, const int __c) +{ + return __builtin_aarch64_sqrdmulh_laneqv4hi (__a, __b, __c); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vqrdmulh_laneq_s32 (int32x2_t __a, int32x4_t __b, const int __c) +{ + return __builtin_aarch64_sqrdmulh_laneqv2si (__a, __b, __c); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vqrdmulhq_laneq_s16 (int16x8_t __a, int16x8_t __b, const int __c) +{ + return __builtin_aarch64_sqrdmulh_laneqv8hi (__a, __b, __c); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vqrdmulhq_laneq_s32 (int32x4_t __a, int32x4_t __b, const int __c) +{ + return __builtin_aarch64_sqrdmulh_laneqv4si (__a, __b, __c); +} + +/* Table intrinsics. */ + +__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) +vqtbl1_p8 (poly8x16_t a, uint8x8_t b) +{ + poly8x8_t result; + __asm__ ("tbl %0.8b, {%1.16b}, %2.8b" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vqtbl1_s8 (int8x16_t a, uint8x8_t b) +{ + int8x8_t result; + __asm__ ("tbl %0.8b, {%1.16b}, %2.8b" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vqtbl1_u8 (uint8x16_t a, uint8x8_t b) +{ + uint8x8_t result; + __asm__ ("tbl %0.8b, {%1.16b}, %2.8b" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) +vqtbl1q_p8 (poly8x16_t a, uint8x16_t b) +{ + poly8x16_t result; + __asm__ ("tbl %0.16b, {%1.16b}, %2.16b" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vqtbl1q_s8 (int8x16_t a, uint8x16_t b) +{ + int8x16_t result; + __asm__ ("tbl %0.16b, {%1.16b}, %2.16b" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vqtbl1q_u8 (uint8x16_t a, uint8x16_t b) +{ + uint8x16_t result; + __asm__ ("tbl %0.16b, {%1.16b}, %2.16b" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vqtbl2_s8 (int8x16x2_t tab, uint8x8_t idx) +{ + int8x8_t result; + __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t" + "tbl %0.8b, {v16.16b, v17.16b}, %2.8b\n\t" + :"=w"(result) + :"Q"(tab),"w"(idx) + :"memory", "v16", "v17"); + return result; +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vqtbl2_u8 (uint8x16x2_t tab, uint8x8_t idx) +{ + uint8x8_t result; + __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t" + "tbl %0.8b, {v16.16b, v17.16b}, %2.8b\n\t" + :"=w"(result) + :"Q"(tab),"w"(idx) + :"memory", "v16", "v17"); + return result; +} + +__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) +vqtbl2_p8 (poly8x16x2_t tab, uint8x8_t idx) +{ + poly8x8_t result; + __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t" + "tbl %0.8b, {v16.16b, v17.16b}, %2.8b\n\t" + :"=w"(result) + :"Q"(tab),"w"(idx) + :"memory", "v16", "v17"); + return result; +} + +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vqtbl2q_s8 (int8x16x2_t tab, uint8x16_t idx) +{ + int8x16_t result; + __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t" + "tbl %0.16b, {v16.16b, v17.16b}, %2.16b\n\t" + :"=w"(result) + :"Q"(tab),"w"(idx) + :"memory", "v16", "v17"); + return result; +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vqtbl2q_u8 (uint8x16x2_t tab, uint8x16_t idx) +{ + uint8x16_t result; + __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t" + "tbl %0.16b, {v16.16b, v17.16b}, %2.16b\n\t" + :"=w"(result) + :"Q"(tab),"w"(idx) + :"memory", "v16", "v17"); + return result; +} + +__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) +vqtbl2q_p8 (poly8x16x2_t tab, uint8x16_t idx) +{ + poly8x16_t result; + __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t" + "tbl %0.16b, {v16.16b, v17.16b}, %2.16b\n\t" + :"=w"(result) + :"Q"(tab),"w"(idx) + :"memory", "v16", "v17"); + return result; +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vqtbl3_s8 (int8x16x3_t tab, uint8x8_t idx) +{ + int8x8_t result; + __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t" + "tbl %0.8b, {v16.16b - v18.16b}, %2.8b\n\t" + :"=w"(result) + :"Q"(tab),"w"(idx) + :"memory", "v16", "v17", "v18"); + return result; +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vqtbl3_u8 (uint8x16x3_t tab, uint8x8_t idx) +{ + uint8x8_t result; + __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t" + "tbl %0.8b, {v16.16b - v18.16b}, %2.8b\n\t" + :"=w"(result) + :"Q"(tab),"w"(idx) + :"memory", "v16", "v17", "v18"); + return result; +} + +__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) +vqtbl3_p8 (poly8x16x3_t tab, uint8x8_t idx) +{ + poly8x8_t result; + __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t" + "tbl %0.8b, {v16.16b - v18.16b}, %2.8b\n\t" + :"=w"(result) + :"Q"(tab),"w"(idx) + :"memory", "v16", "v17", "v18"); + return result; +} + +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vqtbl3q_s8 (int8x16x3_t tab, uint8x16_t idx) +{ + int8x16_t result; + __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t" + "tbl %0.16b, {v16.16b - v18.16b}, %2.16b\n\t" + :"=w"(result) + :"Q"(tab),"w"(idx) + :"memory", "v16", "v17", "v18"); + return result; +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vqtbl3q_u8 (uint8x16x3_t tab, uint8x16_t idx) +{ + uint8x16_t result; + __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t" + "tbl %0.16b, {v16.16b - v18.16b}, %2.16b\n\t" + :"=w"(result) + :"Q"(tab),"w"(idx) + :"memory", "v16", "v17", "v18"); + return result; +} + +__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) +vqtbl3q_p8 (poly8x16x3_t tab, uint8x16_t idx) +{ + poly8x16_t result; + __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t" + "tbl %0.16b, {v16.16b - v18.16b}, %2.16b\n\t" + :"=w"(result) + :"Q"(tab),"w"(idx) + :"memory", "v16", "v17", "v18"); + return result; +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vqtbl4_s8 (int8x16x4_t tab, uint8x8_t idx) +{ + int8x8_t result; + __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t" + "tbl %0.8b, {v16.16b - v19.16b}, %2.8b\n\t" + :"=w"(result) + :"Q"(tab),"w"(idx) + :"memory", "v16", "v17", "v18", "v19"); + return result; +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vqtbl4_u8 (uint8x16x4_t tab, uint8x8_t idx) +{ + uint8x8_t result; + __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t" + "tbl %0.8b, {v16.16b - v19.16b}, %2.8b\n\t" + :"=w"(result) + :"Q"(tab),"w"(idx) + :"memory", "v16", "v17", "v18", "v19"); + return result; +} + +__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) +vqtbl4_p8 (poly8x16x4_t tab, uint8x8_t idx) +{ + poly8x8_t result; + __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t" + "tbl %0.8b, {v16.16b - v19.16b}, %2.8b\n\t" + :"=w"(result) + :"Q"(tab),"w"(idx) + :"memory", "v16", "v17", "v18", "v19"); + return result; +} + + +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vqtbl4q_s8 (int8x16x4_t tab, uint8x16_t idx) +{ + int8x16_t result; + __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t" + "tbl %0.16b, {v16.16b - v19.16b}, %2.16b\n\t" + :"=w"(result) + :"Q"(tab),"w"(idx) + :"memory", "v16", "v17", "v18", "v19"); + return result; +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vqtbl4q_u8 (uint8x16x4_t tab, uint8x16_t idx) +{ + uint8x16_t result; + __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t" + "tbl %0.16b, {v16.16b - v19.16b}, %2.16b\n\t" + :"=w"(result) + :"Q"(tab),"w"(idx) + :"memory", "v16", "v17", "v18", "v19"); + return result; +} + +__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) +vqtbl4q_p8 (poly8x16x4_t tab, uint8x16_t idx) +{ + poly8x16_t result; + __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t" + "tbl %0.16b, {v16.16b - v19.16b}, %2.16b\n\t" + :"=w"(result) + :"Q"(tab),"w"(idx) + :"memory", "v16", "v17", "v18", "v19"); + return result; +} + + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vqtbx1_s8 (int8x8_t r, int8x16_t tab, uint8x8_t idx) +{ + int8x8_t result = r; + __asm__ ("tbx %0.8b,{%1.16b},%2.8b" + : "+w"(result) + : "w"(tab), "w"(idx) + : /* No clobbers */); + return result; +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vqtbx1_u8 (uint8x8_t r, uint8x16_t tab, uint8x8_t idx) +{ + uint8x8_t result = r; + __asm__ ("tbx %0.8b,{%1.16b},%2.8b" + : "+w"(result) + : "w"(tab), "w"(idx) + : /* No clobbers */); + return result; +} + +__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) +vqtbx1_p8 (poly8x8_t r, poly8x16_t tab, uint8x8_t idx) +{ + poly8x8_t result = r; + __asm__ ("tbx %0.8b,{%1.16b},%2.8b" + : "+w"(result) + : "w"(tab), "w"(idx) + : /* No clobbers */); + return result; +} + +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vqtbx1q_s8 (int8x16_t r, int8x16_t tab, uint8x16_t idx) +{ + int8x16_t result = r; + __asm__ ("tbx %0.16b,{%1.16b},%2.16b" + : "+w"(result) + : "w"(tab), "w"(idx) + : /* No clobbers */); + return result; +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vqtbx1q_u8 (uint8x16_t r, uint8x16_t tab, uint8x16_t idx) +{ + uint8x16_t result = r; + __asm__ ("tbx %0.16b,{%1.16b},%2.16b" + : "+w"(result) + : "w"(tab), "w"(idx) + : /* No clobbers */); + return result; +} + +__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) +vqtbx1q_p8 (poly8x16_t r, poly8x16_t tab, uint8x16_t idx) +{ + poly8x16_t result = r; + __asm__ ("tbx %0.16b,{%1.16b},%2.16b" + : "+w"(result) + : "w"(tab), "w"(idx) + : /* No clobbers */); + return result; +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vqtbx2_s8 (int8x8_t r, int8x16x2_t tab, uint8x8_t idx) +{ + int8x8_t result = r; + __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t" + "tbx %0.8b, {v16.16b, v17.16b}, %2.8b\n\t" + :"+w"(result) + :"Q"(tab),"w"(idx) + :"memory", "v16", "v17"); + return result; +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vqtbx2_u8 (uint8x8_t r, uint8x16x2_t tab, uint8x8_t idx) +{ + uint8x8_t result = r; + __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t" + "tbx %0.8b, {v16.16b, v17.16b}, %2.8b\n\t" + :"+w"(result) + :"Q"(tab),"w"(idx) + :"memory", "v16", "v17"); + return result; +} + +__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) +vqtbx2_p8 (poly8x8_t r, poly8x16x2_t tab, uint8x8_t idx) +{ + poly8x8_t result = r; + __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t" + "tbx %0.8b, {v16.16b, v17.16b}, %2.8b\n\t" + :"+w"(result) + :"Q"(tab),"w"(idx) + :"memory", "v16", "v17"); + return result; +} + + +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vqtbx2q_s8 (int8x16_t r, int8x16x2_t tab, uint8x16_t idx) +{ + int8x16_t result = r; + __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t" + "tbx %0.16b, {v16.16b, v17.16b}, %2.16b\n\t" + :"+w"(result) + :"Q"(tab),"w"(idx) + :"memory", "v16", "v17"); + return result; +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vqtbx2q_u8 (uint8x16_t r, uint8x16x2_t tab, uint8x16_t idx) +{ + uint8x16_t result = r; + __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t" + "tbx %0.16b, {v16.16b, v17.16b}, %2.16b\n\t" + :"+w"(result) + :"Q"(tab),"w"(idx) + :"memory", "v16", "v17"); + return result; +} + +__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) +vqtbx2q_p8 (poly8x16_t r, poly8x16x2_t tab, uint8x16_t idx) +{ + poly8x16_t result = r; + __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t" + "tbx %0.16b, {v16.16b, v17.16b}, %2.16b\n\t" + :"+w"(result) + :"Q"(tab),"w"(idx) + :"memory", "v16", "v17"); + return result; +} + + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vqtbx3_s8 (int8x8_t r, int8x16x3_t tab, uint8x8_t idx) +{ + int8x8_t result = r; + __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t" + "tbx %0.8b, {v16.16b - v18.16b}, %2.8b\n\t" + :"+w"(result) + :"Q"(tab),"w"(idx) + :"memory", "v16", "v17", "v18"); + return result; +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vqtbx3_u8 (uint8x8_t r, uint8x16x3_t tab, uint8x8_t idx) +{ + uint8x8_t result = r; + __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t" + "tbx %0.8b, {v16.16b - v18.16b}, %2.8b\n\t" + :"+w"(result) + :"Q"(tab),"w"(idx) + :"memory", "v16", "v17", "v18"); + return result; +} + +__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) +vqtbx3_p8 (poly8x8_t r, poly8x16x3_t tab, uint8x8_t idx) +{ + poly8x8_t result = r; + __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t" + "tbx %0.8b, {v16.16b - v18.16b}, %2.8b\n\t" + :"+w"(result) + :"Q"(tab),"w"(idx) + :"memory", "v16", "v17", "v18"); + return result; +} + + +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vqtbx3q_s8 (int8x16_t r, int8x16x3_t tab, uint8x16_t idx) +{ + int8x16_t result = r; + __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t" + "tbx %0.16b, {v16.16b - v18.16b}, %2.16b\n\t" + :"+w"(result) + :"Q"(tab),"w"(idx) + :"memory", "v16", "v17", "v18"); + return result; +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vqtbx3q_u8 (uint8x16_t r, uint8x16x3_t tab, uint8x16_t idx) +{ + uint8x16_t result = r; + __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t" + "tbx %0.16b, {v16.16b - v18.16b}, %2.16b\n\t" + :"+w"(result) + :"Q"(tab),"w"(idx) + :"memory", "v16", "v17", "v18"); + return result; +} + +__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) +vqtbx3q_p8 (poly8x16_t r, poly8x16x3_t tab, uint8x16_t idx) +{ + poly8x16_t result = r; + __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t" + "tbx %0.16b, {v16.16b - v18.16b}, %2.16b\n\t" + :"+w"(result) + :"Q"(tab),"w"(idx) + :"memory", "v16", "v17", "v18"); + return result; +} + + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vqtbx4_s8 (int8x8_t r, int8x16x4_t tab, uint8x8_t idx) +{ + int8x8_t result = r; + __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t" + "tbx %0.8b, {v16.16b - v19.16b}, %2.8b\n\t" + :"+w"(result) + :"Q"(tab),"w"(idx) + :"memory", "v16", "v17", "v18", "v19"); + return result; +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vqtbx4_u8 (uint8x8_t r, uint8x16x4_t tab, uint8x8_t idx) +{ + uint8x8_t result = r; + __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t" + "tbx %0.8b, {v16.16b - v19.16b}, %2.8b\n\t" + :"+w"(result) + :"Q"(tab),"w"(idx) + :"memory", "v16", "v17", "v18", "v19"); + return result; +} + +__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) +vqtbx4_p8 (poly8x8_t r, poly8x16x4_t tab, uint8x8_t idx) +{ + poly8x8_t result = r; + __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t" + "tbx %0.8b, {v16.16b - v19.16b}, %2.8b\n\t" + :"+w"(result) + :"Q"(tab),"w"(idx) + :"memory", "v16", "v17", "v18", "v19"); + return result; +} + + +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vqtbx4q_s8 (int8x16_t r, int8x16x4_t tab, uint8x16_t idx) +{ + int8x16_t result = r; + __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t" + "tbx %0.16b, {v16.16b - v19.16b}, %2.16b\n\t" + :"+w"(result) + :"Q"(tab),"w"(idx) + :"memory", "v16", "v17", "v18", "v19"); + return result; +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vqtbx4q_u8 (uint8x16_t r, uint8x16x4_t tab, uint8x16_t idx) +{ + uint8x16_t result = r; + __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t" + "tbx %0.16b, {v16.16b - v19.16b}, %2.16b\n\t" + :"+w"(result) + :"Q"(tab),"w"(idx) + :"memory", "v16", "v17", "v18", "v19"); + return result; +} + +__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) +vqtbx4q_p8 (poly8x16_t r, poly8x16x4_t tab, uint8x16_t idx) +{ + poly8x16_t result = r; + __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t" + "tbx %0.16b, {v16.16b - v19.16b}, %2.16b\n\t" + :"+w"(result) + :"Q"(tab),"w"(idx) + :"memory", "v16", "v17", "v18", "v19"); + return result; +} + +/* V7 legacy table intrinsics. */ + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vtbl1_s8 (int8x8_t tab, int8x8_t idx) +{ + int8x8_t result; + int8x16_t temp = vcombine_s8 (tab, vcreate_s8 (__AARCH64_UINT64_C (0x0))); + __asm__ ("tbl %0.8b, {%1.16b}, %2.8b" + : "=w"(result) + : "w"(temp), "w"(idx) + : /* No clobbers */); + return result; +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vtbl1_u8 (uint8x8_t tab, uint8x8_t idx) +{ + uint8x8_t result; + uint8x16_t temp = vcombine_u8 (tab, vcreate_u8 (__AARCH64_UINT64_C (0x0))); + __asm__ ("tbl %0.8b, {%1.16b}, %2.8b" + : "=w"(result) + : "w"(temp), "w"(idx) + : /* No clobbers */); + return result; +} + +__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) +vtbl1_p8 (poly8x8_t tab, uint8x8_t idx) +{ + poly8x8_t result; + poly8x16_t temp = vcombine_p8 (tab, vcreate_p8 (__AARCH64_UINT64_C (0x0))); + __asm__ ("tbl %0.8b, {%1.16b}, %2.8b" + : "=w"(result) + : "w"(temp), "w"(idx) + : /* No clobbers */); + return result; +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vtbl2_s8 (int8x8x2_t tab, int8x8_t idx) +{ + int8x8_t result; + int8x16_t temp = vcombine_s8 (tab.val[0], tab.val[1]); + __asm__ ("tbl %0.8b, {%1.16b}, %2.8b" + : "=w"(result) + : "w"(temp), "w"(idx) + : /* No clobbers */); + return result; +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vtbl2_u8 (uint8x8x2_t tab, uint8x8_t idx) +{ + uint8x8_t result; + uint8x16_t temp = vcombine_u8 (tab.val[0], tab.val[1]); + __asm__ ("tbl %0.8b, {%1.16b}, %2.8b" + : "=w"(result) + : "w"(temp), "w"(idx) + : /* No clobbers */); + return result; +} + +__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) +vtbl2_p8 (poly8x8x2_t tab, uint8x8_t idx) +{ + poly8x8_t result; + poly8x16_t temp = vcombine_p8 (tab.val[0], tab.val[1]); + __asm__ ("tbl %0.8b, {%1.16b}, %2.8b" + : "=w"(result) + : "w"(temp), "w"(idx) + : /* No clobbers */); + return result; +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vtbl3_s8 (int8x8x3_t tab, int8x8_t idx) +{ + int8x8_t result; + int8x16x2_t temp; + temp.val[0] = vcombine_s8 (tab.val[0], tab.val[1]); + temp.val[1] = vcombine_s8 (tab.val[2], vcreate_s8 (__AARCH64_UINT64_C (0x0))); + __asm__ ("ld1 {v16.16b - v17.16b }, %1\n\t" + "tbl %0.8b, {v16.16b - v17.16b}, %2.8b\n\t" + : "=w"(result) + : "Q"(temp), "w"(idx) + : "v16", "v17", "memory"); + return result; +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vtbl3_u8 (uint8x8x3_t tab, uint8x8_t idx) +{ + uint8x8_t result; + uint8x16x2_t temp; + temp.val[0] = vcombine_u8 (tab.val[0], tab.val[1]); + temp.val[1] = vcombine_u8 (tab.val[2], vcreate_u8 (__AARCH64_UINT64_C (0x0))); + __asm__ ("ld1 {v16.16b - v17.16b }, %1\n\t" + "tbl %0.8b, {v16.16b - v17.16b}, %2.8b\n\t" + : "=w"(result) + : "Q"(temp), "w"(idx) + : "v16", "v17", "memory"); + return result; +} + +__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) +vtbl3_p8 (poly8x8x3_t tab, uint8x8_t idx) +{ + poly8x8_t result; + poly8x16x2_t temp; + temp.val[0] = vcombine_p8 (tab.val[0], tab.val[1]); + temp.val[1] = vcombine_p8 (tab.val[2], vcreate_p8 (__AARCH64_UINT64_C (0x0))); + __asm__ ("ld1 {v16.16b - v17.16b }, %1\n\t" + "tbl %0.8b, {v16.16b - v17.16b}, %2.8b\n\t" + : "=w"(result) + : "Q"(temp), "w"(idx) + : "v16", "v17", "memory"); + return result; +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vtbl4_s8 (int8x8x4_t tab, int8x8_t idx) +{ + int8x8_t result; + int8x16x2_t temp; + temp.val[0] = vcombine_s8 (tab.val[0], tab.val[1]); + temp.val[1] = vcombine_s8 (tab.val[2], tab.val[3]); + __asm__ ("ld1 {v16.16b - v17.16b }, %1\n\t" + "tbl %0.8b, {v16.16b - v17.16b}, %2.8b\n\t" + : "=w"(result) + : "Q"(temp), "w"(idx) + : "v16", "v17", "memory"); + return result; +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vtbl4_u8 (uint8x8x4_t tab, uint8x8_t idx) +{ + uint8x8_t result; + uint8x16x2_t temp; + temp.val[0] = vcombine_u8 (tab.val[0], tab.val[1]); + temp.val[1] = vcombine_u8 (tab.val[2], tab.val[3]); + __asm__ ("ld1 {v16.16b - v17.16b }, %1\n\t" + "tbl %0.8b, {v16.16b - v17.16b}, %2.8b\n\t" + : "=w"(result) + : "Q"(temp), "w"(idx) + : "v16", "v17", "memory"); + return result; +} + +__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) +vtbl4_p8 (poly8x8x4_t tab, uint8x8_t idx) +{ + poly8x8_t result; + poly8x16x2_t temp; + temp.val[0] = vcombine_p8 (tab.val[0], tab.val[1]); + temp.val[1] = vcombine_p8 (tab.val[2], tab.val[3]); + __asm__ ("ld1 {v16.16b - v17.16b }, %1\n\t" + "tbl %0.8b, {v16.16b - v17.16b}, %2.8b\n\t" + : "=w"(result) + : "Q"(temp), "w"(idx) + : "v16", "v17", "memory"); + return result; +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vtbx1_s8 (int8x8_t r, int8x8_t tab, int8x8_t idx) +{ + int8x8_t result; + int8x8_t tmp1; + int8x16_t temp = vcombine_s8 (tab, vcreate_s8 (__AARCH64_UINT64_C (0x0))); + __asm__ ("movi %0.8b, 8\n\t" + "cmhs %0.8b, %3.8b, %0.8b\n\t" + "tbl %1.8b, {%2.16b}, %3.8b\n\t" + "bsl %0.8b, %4.8b, %1.8b\n\t" + : "+w"(result), "=w"(tmp1) + : "w"(temp), "w"(idx), "w"(r) + : /* No clobbers */); + return result; +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vtbx1_u8 (uint8x8_t r, uint8x8_t tab, uint8x8_t idx) +{ + uint8x8_t result; + uint8x8_t tmp1; + uint8x16_t temp = vcombine_u8 (tab, vcreate_u8 (__AARCH64_UINT64_C (0x0))); + __asm__ ("movi %0.8b, 8\n\t" + "cmhs %0.8b, %3.8b, %0.8b\n\t" + "tbl %1.8b, {%2.16b}, %3.8b\n\t" + "bsl %0.8b, %4.8b, %1.8b\n\t" + : "+w"(result), "=w"(tmp1) + : "w"(temp), "w"(idx), "w"(r) + : /* No clobbers */); + return result; +} + +__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) +vtbx1_p8 (poly8x8_t r, poly8x8_t tab, uint8x8_t idx) +{ + poly8x8_t result; + poly8x8_t tmp1; + poly8x16_t temp = vcombine_p8 (tab, vcreate_p8 (__AARCH64_UINT64_C (0x0))); + __asm__ ("movi %0.8b, 8\n\t" + "cmhs %0.8b, %3.8b, %0.8b\n\t" + "tbl %1.8b, {%2.16b}, %3.8b\n\t" + "bsl %0.8b, %4.8b, %1.8b\n\t" + : "+w"(result), "=w"(tmp1) + : "w"(temp), "w"(idx), "w"(r) + : /* No clobbers */); + return result; +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vtbx2_s8 (int8x8_t r, int8x8x2_t tab, int8x8_t idx) +{ + int8x8_t result = r; + int8x16_t temp = vcombine_s8 (tab.val[0], tab.val[1]); + __asm__ ("tbx %0.8b, {%1.16b}, %2.8b" + : "+w"(result) + : "w"(temp), "w"(idx) + : /* No clobbers */); + return result; +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vtbx2_u8 (uint8x8_t r, uint8x8x2_t tab, uint8x8_t idx) +{ + uint8x8_t result = r; + uint8x16_t temp = vcombine_u8 (tab.val[0], tab.val[1]); + __asm__ ("tbx %0.8b, {%1.16b}, %2.8b" + : "+w"(result) + : "w"(temp), "w"(idx) + : /* No clobbers */); + return result; +} + +__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) +vtbx2_p8 (poly8x8_t r, poly8x8x2_t tab, uint8x8_t idx) +{ + poly8x8_t result = r; + poly8x16_t temp = vcombine_p8 (tab.val[0], tab.val[1]); + __asm__ ("tbx %0.8b, {%1.16b}, %2.8b" + : "+w"(result) + : "w"(temp), "w"(idx) + : /* No clobbers */); + return result; +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vtbx3_s8 (int8x8_t r, int8x8x3_t tab, int8x8_t idx) +{ + int8x8_t result; + int8x8_t tmp1; + int8x16x2_t temp; + temp.val[0] = vcombine_s8 (tab.val[0], tab.val[1]); + temp.val[1] = vcombine_s8 (tab.val[2], vcreate_s8 (__AARCH64_UINT64_C (0x0))); + __asm__ ("ld1 {v16.16b - v17.16b}, %2\n\t" + "movi %0.8b, 24\n\t" + "cmhs %0.8b, %3.8b, %0.8b\n\t" + "tbl %1.8b, {v16.16b - v17.16b}, %3.8b\n\t" + "bsl %0.8b, %4.8b, %1.8b\n\t" + : "+w"(result), "=w"(tmp1) + : "Q"(temp), "w"(idx), "w"(r) + : "v16", "v17", "memory"); + return result; +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vtbx3_u8 (uint8x8_t r, uint8x8x3_t tab, uint8x8_t idx) +{ + uint8x8_t result; + uint8x8_t tmp1; + uint8x16x2_t temp; + temp.val[0] = vcombine_u8 (tab.val[0], tab.val[1]); + temp.val[1] = vcombine_u8 (tab.val[2], vcreate_u8 (__AARCH64_UINT64_C (0x0))); + __asm__ ("ld1 {v16.16b - v17.16b}, %2\n\t" + "movi %0.8b, 24\n\t" + "cmhs %0.8b, %3.8b, %0.8b\n\t" + "tbl %1.8b, {v16.16b - v17.16b}, %3.8b\n\t" + "bsl %0.8b, %4.8b, %1.8b\n\t" + : "+w"(result), "=w"(tmp1) + : "Q"(temp), "w"(idx), "w"(r) + : "v16", "v17", "memory"); + return result; +} + +__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) +vtbx3_p8 (poly8x8_t r, poly8x8x3_t tab, uint8x8_t idx) +{ + poly8x8_t result; + poly8x8_t tmp1; + poly8x16x2_t temp; + temp.val[0] = vcombine_p8 (tab.val[0], tab.val[1]); + temp.val[1] = vcombine_p8 (tab.val[2], vcreate_p8 (__AARCH64_UINT64_C (0x0))); + __asm__ ("ld1 {v16.16b - v17.16b}, %2\n\t" + "movi %0.8b, 24\n\t" + "cmhs %0.8b, %3.8b, %0.8b\n\t" + "tbl %1.8b, {v16.16b - v17.16b}, %3.8b\n\t" + "bsl %0.8b, %4.8b, %1.8b\n\t" + : "+w"(result), "=w"(tmp1) + : "Q"(temp), "w"(idx), "w"(r) + : "v16", "v17", "memory"); + return result; +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vtbx4_s8 (int8x8_t r, int8x8x4_t tab, int8x8_t idx) +{ + int8x8_t result = r; + int8x16x2_t temp; + temp.val[0] = vcombine_s8 (tab.val[0], tab.val[1]); + temp.val[1] = vcombine_s8 (tab.val[2], tab.val[3]); + __asm__ ("ld1 {v16.16b - v17.16b }, %1\n\t" + "tbx %0.8b, {v16.16b - v17.16b}, %2.8b\n\t" + : "+w"(result) + : "Q"(temp), "w"(idx) + : "v16", "v17", "memory"); + return result; +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vtbx4_u8 (uint8x8_t r, uint8x8x4_t tab, uint8x8_t idx) +{ + uint8x8_t result = r; + uint8x16x2_t temp; + temp.val[0] = vcombine_u8 (tab.val[0], tab.val[1]); + temp.val[1] = vcombine_u8 (tab.val[2], tab.val[3]); + __asm__ ("ld1 {v16.16b - v17.16b }, %1\n\t" + "tbx %0.8b, {v16.16b - v17.16b}, %2.8b\n\t" + : "+w"(result) + : "Q"(temp), "w"(idx) + : "v16", "v17", "memory"); + return result; +} -__LD4_LANE_FUNC (int8x8x4_t, uint8_t, 8b, b, s8,) -__LD4_LANE_FUNC (float32x2x4_t, float32_t, 2s, s, f32,) -__LD4_LANE_FUNC (float64x1x4_t, float64_t, 1d, d, f64,) -__LD4_LANE_FUNC (poly8x8x4_t, poly8_t, 8b, b, p8,) -__LD4_LANE_FUNC (poly16x4x4_t, poly16_t, 4h, h, p16,) -__LD4_LANE_FUNC (int16x4x4_t, int16_t, 4h, h, s16,) -__LD4_LANE_FUNC (int32x2x4_t, int32_t, 2s, s, s32,) -__LD4_LANE_FUNC (int64x1x4_t, int64_t, 1d, d, s64,) -__LD4_LANE_FUNC (uint8x8x4_t, uint8_t, 8b, b, u8,) -__LD4_LANE_FUNC (uint16x4x4_t, uint16_t, 4h, h, u16,) -__LD4_LANE_FUNC (uint32x2x4_t, uint32_t, 2s, s, u32,) -__LD4_LANE_FUNC (uint64x1x4_t, uint64_t, 1d, d, u64,) -__LD4_LANE_FUNC (float32x4x4_t, float32_t, 4s, s, f32, q) -__LD4_LANE_FUNC (float64x2x4_t, float64_t, 2d, d, f64, q) -__LD4_LANE_FUNC (poly8x16x4_t, poly8_t, 16b, b, p8, q) -__LD4_LANE_FUNC (poly16x8x4_t, poly16_t, 8h, h, p16, q) -__LD4_LANE_FUNC (int8x16x4_t, int8_t, 16b, b, s8, q) -__LD4_LANE_FUNC (int16x8x4_t, int16_t, 8h, h, s16, q) -__LD4_LANE_FUNC (int32x4x4_t, int32_t, 4s, s, s32, q) -__LD4_LANE_FUNC (int64x2x4_t, int64_t, 2d, d, s64, q) -__LD4_LANE_FUNC (uint8x16x4_t, uint8_t, 16b, b, u8, q) -__LD4_LANE_FUNC (uint16x8x4_t, uint16_t, 8h, h, u16, q) -__LD4_LANE_FUNC (uint32x4x4_t, uint32_t, 4s, s, u32, q) -__LD4_LANE_FUNC (uint64x2x4_t, uint64_t, 2d, d, u64, q) +__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) +vtbx4_p8 (poly8x8_t r, poly8x8x4_t tab, uint8x8_t idx) +{ + poly8x8_t result = r; + poly8x16x2_t temp; + temp.val[0] = vcombine_p8 (tab.val[0], tab.val[1]); + temp.val[1] = vcombine_p8 (tab.val[2], tab.val[3]); + __asm__ ("ld1 {v16.16b - v17.16b }, %1\n\t" + "tbx %0.8b, {v16.16b - v17.16b}, %2.8b\n\t" + : "+w"(result) + : "Q"(temp), "w"(idx) + : "v16", "v17", "memory"); + return result; +} -#define __ST2_LANE_FUNC(intype, ptrtype, regsuffix, \ - lnsuffix, funcsuffix, Q) \ - __extension__ static __inline void \ - __attribute__ ((__always_inline__)) \ - vst2 ## Q ## _lane_ ## funcsuffix (const ptrtype *ptr, \ - intype b, const int c) \ - { \ - __asm__ ("ld1 {v16." #regsuffix ", v17." #regsuffix "}, %1\n\t" \ - "st2 {v16." #lnsuffix ", v17." #lnsuffix "}[%2], %0\n\t" \ - : "=Q"(*(intype *) ptr) \ - : "Q"(b), "i"(c) \ - : "memory", "v16", "v17"); \ - } +/* End of temporary inline asm. */ -__ST2_LANE_FUNC (int8x8x2_t, int8_t, 8b, b, s8,) -__ST2_LANE_FUNC (float32x2x2_t, float32_t, 2s, s, f32,) -__ST2_LANE_FUNC (float64x1x2_t, float64_t, 1d, d, f64,) -__ST2_LANE_FUNC (poly8x8x2_t, poly8_t, 8b, b, p8,) -__ST2_LANE_FUNC (poly16x4x2_t, poly16_t, 4h, h, p16,) -__ST2_LANE_FUNC (int16x4x2_t, int16_t, 4h, h, s16,) -__ST2_LANE_FUNC (int32x2x2_t, int32_t, 2s, s, s32,) -__ST2_LANE_FUNC (int64x1x2_t, int64_t, 1d, d, s64,) -__ST2_LANE_FUNC (uint8x8x2_t, uint8_t, 8b, b, u8,) -__ST2_LANE_FUNC (uint16x4x2_t, uint16_t, 4h, h, u16,) -__ST2_LANE_FUNC (uint32x2x2_t, uint32_t, 2s, s, u32,) -__ST2_LANE_FUNC (uint64x1x2_t, uint64_t, 1d, d, u64,) -__ST2_LANE_FUNC (float32x4x2_t, float32_t, 4s, s, f32, q) -__ST2_LANE_FUNC (float64x2x2_t, float64_t, 2d, d, f64, q) -__ST2_LANE_FUNC (poly8x16x2_t, poly8_t, 16b, b, p8, q) -__ST2_LANE_FUNC (poly16x8x2_t, poly16_t, 8h, h, p16, q) -__ST2_LANE_FUNC (int8x16x2_t, int8_t, 16b, b, s8, q) -__ST2_LANE_FUNC (int16x8x2_t, int16_t, 8h, h, s16, q) -__ST2_LANE_FUNC (int32x4x2_t, int32_t, 4s, s, s32, q) -__ST2_LANE_FUNC (int64x2x2_t, int64_t, 2d, d, s64, q) -__ST2_LANE_FUNC (uint8x16x2_t, uint8_t, 16b, b, u8, q) -__ST2_LANE_FUNC (uint16x8x2_t, uint16_t, 8h, h, u16, q) -__ST2_LANE_FUNC (uint32x4x2_t, uint32_t, 4s, s, u32, q) -__ST2_LANE_FUNC (uint64x2x2_t, uint64_t, 2d, d, u64, q) +/* Start of optimal implementations in approved order. */ -#define __ST3_LANE_FUNC(intype, ptrtype, regsuffix, \ - lnsuffix, funcsuffix, Q) \ - __extension__ static __inline void \ - __attribute__ ((__always_inline__)) \ - vst3 ## Q ## _lane_ ## funcsuffix (const ptrtype *ptr, \ - intype b, const int c) \ - { \ - __asm__ ("ld1 {v16." #regsuffix " - v18." #regsuffix "}, %1\n\t" \ - "st3 {v16." #lnsuffix " - v18." #lnsuffix "}[%2], %0\n\t" \ - : "=Q"(*(intype *) ptr) \ - : "Q"(b), "i"(c) \ - : "memory", "v16", "v17", "v18"); \ - } +/* vabs */ -__ST3_LANE_FUNC (int8x8x3_t, int8_t, 8b, b, s8,) -__ST3_LANE_FUNC (float32x2x3_t, float32_t, 2s, s, f32,) -__ST3_LANE_FUNC (float64x1x3_t, float64_t, 1d, d, f64,) -__ST3_LANE_FUNC (poly8x8x3_t, poly8_t, 8b, b, p8,) -__ST3_LANE_FUNC (poly16x4x3_t, poly16_t, 4h, h, p16,) -__ST3_LANE_FUNC (int16x4x3_t, int16_t, 4h, h, s16,) -__ST3_LANE_FUNC (int32x2x3_t, int32_t, 2s, s, s32,) -__ST3_LANE_FUNC (int64x1x3_t, int64_t, 1d, d, s64,) -__ST3_LANE_FUNC (uint8x8x3_t, uint8_t, 8b, b, u8,) -__ST3_LANE_FUNC (uint16x4x3_t, uint16_t, 4h, h, u16,) -__ST3_LANE_FUNC (uint32x2x3_t, uint32_t, 2s, s, u32,) -__ST3_LANE_FUNC (uint64x1x3_t, uint64_t, 1d, d, u64,) -__ST3_LANE_FUNC (float32x4x3_t, float32_t, 4s, s, f32, q) -__ST3_LANE_FUNC (float64x2x3_t, float64_t, 2d, d, f64, q) -__ST3_LANE_FUNC (poly8x16x3_t, poly8_t, 16b, b, p8, q) -__ST3_LANE_FUNC (poly16x8x3_t, poly16_t, 8h, h, p16, q) -__ST3_LANE_FUNC (int8x16x3_t, int8_t, 16b, b, s8, q) -__ST3_LANE_FUNC (int16x8x3_t, int16_t, 8h, h, s16, q) -__ST3_LANE_FUNC (int32x4x3_t, int32_t, 4s, s, s32, q) -__ST3_LANE_FUNC (int64x2x3_t, int64_t, 2d, d, s64, q) -__ST3_LANE_FUNC (uint8x16x3_t, uint8_t, 16b, b, u8, q) -__ST3_LANE_FUNC (uint16x8x3_t, uint16_t, 8h, h, u16, q) -__ST3_LANE_FUNC (uint32x4x3_t, uint32_t, 4s, s, u32, q) -__ST3_LANE_FUNC (uint64x2x3_t, uint64_t, 2d, d, u64, q) +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vabs_f32 (float32x2_t __a) +{ + return __builtin_aarch64_absv2sf (__a); +} -#define __ST4_LANE_FUNC(intype, ptrtype, regsuffix, \ - lnsuffix, funcsuffix, Q) \ - __extension__ static __inline void \ - __attribute__ ((__always_inline__)) \ - vst4 ## Q ## _lane_ ## funcsuffix (const ptrtype *ptr, \ - intype b, const int c) \ - { \ - __asm__ ("ld1 {v16." #regsuffix " - v19." #regsuffix "}, %1\n\t" \ - "st4 {v16." #lnsuffix " - v19." #lnsuffix "}[%2], %0\n\t" \ - : "=Q"(*(intype *) ptr) \ - : "Q"(b), "i"(c) \ - : "memory", "v16", "v17", "v18", "v19"); \ - } +__extension__ static __inline float64x1_t __attribute__ ((__always_inline__)) +vabs_f64 (float64x1_t __a) +{ + return __builtin_fabs (__a); +} -__ST4_LANE_FUNC (int8x8x4_t, int8_t, 8b, b, s8,) -__ST4_LANE_FUNC (float32x2x4_t, float32_t, 2s, s, f32,) -__ST4_LANE_FUNC (float64x1x4_t, float64_t, 1d, d, f64,) -__ST4_LANE_FUNC (poly8x8x4_t, poly8_t, 8b, b, p8,) -__ST4_LANE_FUNC (poly16x4x4_t, poly16_t, 4h, h, p16,) -__ST4_LANE_FUNC (int16x4x4_t, int16_t, 4h, h, s16,) -__ST4_LANE_FUNC (int32x2x4_t, int32_t, 2s, s, s32,) -__ST4_LANE_FUNC (int64x1x4_t, int64_t, 1d, d, s64,) -__ST4_LANE_FUNC (uint8x8x4_t, uint8_t, 8b, b, u8,) -__ST4_LANE_FUNC (uint16x4x4_t, uint16_t, 4h, h, u16,) -__ST4_LANE_FUNC (uint32x2x4_t, uint32_t, 2s, s, u32,) -__ST4_LANE_FUNC (uint64x1x4_t, uint64_t, 1d, d, u64,) -__ST4_LANE_FUNC (float32x4x4_t, float32_t, 4s, s, f32, q) -__ST4_LANE_FUNC (float64x2x4_t, float64_t, 2d, d, f64, q) -__ST4_LANE_FUNC (poly8x16x4_t, poly8_t, 16b, b, p8, q) -__ST4_LANE_FUNC (poly16x8x4_t, poly16_t, 8h, h, p16, q) -__ST4_LANE_FUNC (int8x16x4_t, int8_t, 16b, b, s8, q) -__ST4_LANE_FUNC (int16x8x4_t, int16_t, 8h, h, s16, q) -__ST4_LANE_FUNC (int32x4x4_t, int32_t, 4s, s, s32, q) -__ST4_LANE_FUNC (int64x2x4_t, int64_t, 2d, d, s64, q) -__ST4_LANE_FUNC (uint8x16x4_t, uint8_t, 16b, b, u8, q) -__ST4_LANE_FUNC (uint16x8x4_t, uint16_t, 8h, h, u16, q) -__ST4_LANE_FUNC (uint32x4x4_t, uint32_t, 4s, s, u32, q) -__ST4_LANE_FUNC (uint64x2x4_t, uint64_t, 2d, d, u64, q) +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vabs_s8 (int8x8_t __a) +{ + return __builtin_aarch64_absv8qi (__a); +} -__extension__ static __inline int64_t __attribute__ ((__always_inline__)) -vaddlv_s32 (int32x2_t a) +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vabs_s16 (int16x4_t __a) { - int64_t result; - __asm__ ("saddlp %0.1d, %1.2s" : "=w"(result) : "w"(a) : ); - return result; + return __builtin_aarch64_absv4hi (__a); } -__extension__ static __inline uint64_t __attribute__ ((__always_inline__)) -vaddlv_u32 (uint32x2_t a) +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vabs_s32 (int32x2_t __a) { - uint64_t result; - __asm__ ("uaddlp %0.1d, %1.2s" : "=w"(result) : "w"(a) : ); - return result; + return __builtin_aarch64_absv2si (__a); } __extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) -vpaddd_s64 (int64x2_t __a) +vabs_s64 (int64x1_t __a) { - return __builtin_aarch64_addpdi (__a); + return __builtin_llabs (__a); } -__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) -vqdmulh_laneq_s16 (int16x4_t __a, int16x8_t __b, const int __c) +__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +vabsq_f32 (float32x4_t __a) +{ + return __builtin_aarch64_absv4sf (__a); +} + +__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) +vabsq_f64 (float64x2_t __a) +{ + return __builtin_aarch64_absv2df (__a); +} + +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vabsq_s8 (int8x16_t __a) +{ + return __builtin_aarch64_absv16qi (__a); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vabsq_s16 (int16x8_t __a) +{ + return __builtin_aarch64_absv8hi (__a); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vabsq_s32 (int32x4_t __a) +{ + return __builtin_aarch64_absv4si (__a); +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vabsq_s64 (int64x2_t __a) +{ + return __builtin_aarch64_absv2di (__a); +} + +/* vadd */ + +__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) +vaddd_s64 (int64x1_t __a, int64x1_t __b) { - return __builtin_aarch64_sqdmulh_laneqv4hi (__a, __b, __c); + return __a + __b; } -__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) -vqdmulh_laneq_s32 (int32x2_t __a, int32x4_t __b, const int __c) +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vaddd_u64 (uint64x1_t __a, uint64x1_t __b) { - return __builtin_aarch64_sqdmulh_laneqv2si (__a, __b, __c); + return __a + __b; } -__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) -vqdmulhq_laneq_s16 (int16x8_t __a, int16x8_t __b, const int __c) +/* vaddv */ + +__extension__ static __inline int8_t __attribute__ ((__always_inline__)) +vaddv_s8 (int8x8_t __a) { - return __builtin_aarch64_sqdmulh_laneqv8hi (__a, __b, __c); + return vget_lane_s8 (__builtin_aarch64_reduc_splus_v8qi (__a), 0); } -__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) -vqdmulhq_laneq_s32 (int32x4_t __a, int32x4_t __b, const int __c) +__extension__ static __inline int16_t __attribute__ ((__always_inline__)) +vaddv_s16 (int16x4_t __a) { - return __builtin_aarch64_sqdmulh_laneqv4si (__a, __b, __c); + return vget_lane_s16 (__builtin_aarch64_reduc_splus_v4hi (__a), 0); } -__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) -vqrdmulh_laneq_s16 (int16x4_t __a, int16x8_t __b, const int __c) +__extension__ static __inline int32_t __attribute__ ((__always_inline__)) +vaddv_s32 (int32x2_t __a) { - return __builtin_aarch64_sqrdmulh_laneqv4hi (__a, __b, __c); + return vget_lane_s32 (__builtin_aarch64_reduc_splus_v2si (__a), 0); } -__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) -vqrdmulh_laneq_s32 (int32x2_t __a, int32x4_t __b, const int __c) +__extension__ static __inline uint8_t __attribute__ ((__always_inline__)) +vaddv_u8 (uint8x8_t __a) { - return __builtin_aarch64_sqrdmulh_laneqv2si (__a, __b, __c); + return vget_lane_u8 ((uint8x8_t) + __builtin_aarch64_reduc_uplus_v8qi ((int8x8_t) __a), 0); } -__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) -vqrdmulhq_laneq_s16 (int16x8_t __a, int16x8_t __b, const int __c) +__extension__ static __inline uint16_t __attribute__ ((__always_inline__)) +vaddv_u16 (uint16x4_t __a) { - return __builtin_aarch64_sqrdmulh_laneqv8hi (__a, __b, __c); + return vget_lane_u16 ((uint16x4_t) + __builtin_aarch64_reduc_uplus_v4hi ((int16x4_t) __a), 0); } -__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) -vqrdmulhq_laneq_s32 (int32x4_t __a, int32x4_t __b, const int __c) +__extension__ static __inline uint32_t __attribute__ ((__always_inline__)) +vaddv_u32 (uint32x2_t __a) { - return __builtin_aarch64_sqrdmulh_laneqv4si (__a, __b, __c); + return vget_lane_u32 ((uint32x2_t) + __builtin_aarch64_reduc_uplus_v2si ((int32x2_t) __a), 0); } -/* Table intrinsics. */ - -__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) -vqtbl1_p8 (poly8x16_t a, uint8x8_t b) +__extension__ static __inline int8_t __attribute__ ((__always_inline__)) +vaddvq_s8 (int8x16_t __a) { - poly8x8_t result; - __asm__ ("tbl %0.8b, {%1.16b}, %2.8b" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; + return vgetq_lane_s8 (__builtin_aarch64_reduc_splus_v16qi (__a), 0); } -__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) -vqtbl1_s8 (int8x16_t a, int8x8_t b) +__extension__ static __inline int16_t __attribute__ ((__always_inline__)) +vaddvq_s16 (int16x8_t __a) { - int8x8_t result; - __asm__ ("tbl %0.8b, {%1.16b}, %2.8b" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; + return vgetq_lane_s16 (__builtin_aarch64_reduc_splus_v8hi (__a), 0); } -__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) -vqtbl1_u8 (uint8x16_t a, uint8x8_t b) +__extension__ static __inline int32_t __attribute__ ((__always_inline__)) +vaddvq_s32 (int32x4_t __a) { - uint8x8_t result; - __asm__ ("tbl %0.8b, {%1.16b}, %2.8b" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; + return vgetq_lane_s32 (__builtin_aarch64_reduc_splus_v4si (__a), 0); } -__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) -vqtbl1q_p8 (poly8x16_t a, uint8x16_t b) +__extension__ static __inline int64_t __attribute__ ((__always_inline__)) +vaddvq_s64 (int64x2_t __a) { - poly8x16_t result; - __asm__ ("tbl %0.16b, {%1.16b}, %2.16b" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; + return vgetq_lane_s64 (__builtin_aarch64_reduc_splus_v2di (__a), 0); } -__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) -vqtbl1q_s8 (int8x16_t a, int8x16_t b) +__extension__ static __inline uint8_t __attribute__ ((__always_inline__)) +vaddvq_u8 (uint8x16_t __a) { - int8x16_t result; - __asm__ ("tbl %0.16b, {%1.16b}, %2.16b" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; + return vgetq_lane_u8 ((uint8x16_t) + __builtin_aarch64_reduc_uplus_v16qi ((int8x16_t) __a), 0); } -__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) -vqtbl1q_u8 (uint8x16_t a, uint8x16_t b) +__extension__ static __inline uint16_t __attribute__ ((__always_inline__)) +vaddvq_u16 (uint16x8_t __a) { - uint8x16_t result; - __asm__ ("tbl %0.16b, {%1.16b}, %2.16b" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; + return vgetq_lane_u16 ((uint16x8_t) + __builtin_aarch64_reduc_uplus_v8hi ((int16x8_t) __a), 0); } -__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) -vqtbl2_s8 (int8x16x2_t tab, int8x8_t idx) +__extension__ static __inline uint32_t __attribute__ ((__always_inline__)) +vaddvq_u32 (uint32x4_t __a) { - int8x8_t result; - __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t" - "tbl %0.8b, {v16.16b, v17.16b}, %2.8b\n\t" - :"=w"(result) - :"Q"(tab),"w"(idx) - :"memory", "v16", "v17"); - return result; + return vgetq_lane_u32 ((uint32x4_t) + __builtin_aarch64_reduc_uplus_v4si ((int32x4_t) __a), 0); } -__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) -vqtbl2_u8 (uint8x16x2_t tab, uint8x8_t idx) +__extension__ static __inline uint64_t __attribute__ ((__always_inline__)) +vaddvq_u64 (uint64x2_t __a) { - uint8x8_t result; - __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t" - "tbl %0.8b, {v16.16b, v17.16b}, %2.8b\n\t" - :"=w"(result) - :"Q"(tab),"w"(idx) - :"memory", "v16", "v17"); - return result; + return vgetq_lane_u64 ((uint64x2_t) + __builtin_aarch64_reduc_uplus_v2di ((int64x2_t) __a), 0); } -__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) -vqtbl2_p8 (poly8x16x2_t tab, uint8x8_t idx) +__extension__ static __inline float32_t __attribute__ ((__always_inline__)) +vaddv_f32 (float32x2_t __a) { - poly8x8_t result; - __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t" - "tbl %0.8b, {v16.16b, v17.16b}, %2.8b\n\t" - :"=w"(result) - :"Q"(tab),"w"(idx) - :"memory", "v16", "v17"); - return result; + float32x2_t t = __builtin_aarch64_reduc_splus_v2sf (__a); + return vget_lane_f32 (t, 0); } -__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) -vqtbl2q_s8 (int8x16x2_t tab, int8x16_t idx) +__extension__ static __inline float32_t __attribute__ ((__always_inline__)) +vaddvq_f32 (float32x4_t __a) { - int8x16_t result; - __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t" - "tbl %0.16b, {v16.16b, v17.16b}, %2.16b\n\t" - :"=w"(result) - :"Q"(tab),"w"(idx) - :"memory", "v16", "v17"); - return result; + float32x4_t t = __builtin_aarch64_reduc_splus_v4sf (__a); + return vgetq_lane_f32 (t, 0); } -__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) -vqtbl2q_u8 (uint8x16x2_t tab, uint8x16_t idx) +__extension__ static __inline float64_t __attribute__ ((__always_inline__)) +vaddvq_f64 (float64x2_t __a) { - uint8x16_t result; - __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t" - "tbl %0.16b, {v16.16b, v17.16b}, %2.16b\n\t" - :"=w"(result) - :"Q"(tab),"w"(idx) - :"memory", "v16", "v17"); - return result; + float64x2_t t = __builtin_aarch64_reduc_splus_v2df (__a); + return vgetq_lane_f64 (t, 0); } -__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) -vqtbl2q_p8 (poly8x16x2_t tab, uint8x16_t idx) +/* vcage */ + +__extension__ static __inline uint32_t __attribute__ ((__always_inline__)) +vcages_f32 (float32_t __a, float32_t __b) { - poly8x16_t result; - __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t" - "tbl %0.16b, {v16.16b, v17.16b}, %2.16b\n\t" - :"=w"(result) - :"Q"(tab),"w"(idx) - :"memory", "v16", "v17"); - return result; + return __builtin_fabsf (__a) >= __builtin_fabsf (__b) ? -1 : 0; } -__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) -vqtbl3_s8 (int8x16x3_t tab, int8x8_t idx) +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vcage_f32 (float32x2_t __a, float32x2_t __b) { - int8x8_t result; - __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t" - "tbl %0.8b, {v16.16b - v18.16b}, %2.8b\n\t" - :"=w"(result) - :"Q"(tab),"w"(idx) - :"memory", "v16", "v17", "v18"); - return result; + return vabs_f32 (__a) >= vabs_f32 (__b); } -__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) -vqtbl3_u8 (uint8x16x3_t tab, uint8x8_t idx) +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vcageq_f32 (float32x4_t __a, float32x4_t __b) { - uint8x8_t result; - __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t" - "tbl %0.8b, {v16.16b - v18.16b}, %2.8b\n\t" - :"=w"(result) - :"Q"(tab),"w"(idx) - :"memory", "v16", "v17", "v18"); - return result; + return vabsq_f32 (__a) >= vabsq_f32 (__b); } -__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) -vqtbl3_p8 (poly8x16x3_t tab, uint8x8_t idx) +__extension__ static __inline uint64_t __attribute__ ((__always_inline__)) +vcaged_f64 (float64_t __a, float64_t __b) { - poly8x8_t result; - __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t" - "tbl %0.8b, {v16.16b - v18.16b}, %2.8b\n\t" - :"=w"(result) - :"Q"(tab),"w"(idx) - :"memory", "v16", "v17", "v18"); - return result; + return __builtin_fabs (__a) >= __builtin_fabs (__b) ? -1 : 0; } -__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) -vqtbl3q_s8 (int8x16x3_t tab, int8x16_t idx) +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vcageq_f64 (float64x2_t __a, float64x2_t __b) { - int8x16_t result; - __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t" - "tbl %0.16b, {v16.16b - v18.16b}, %2.16b\n\t" - :"=w"(result) - :"Q"(tab),"w"(idx) - :"memory", "v16", "v17", "v18"); - return result; + return vabsq_f64 (__a) >= vabsq_f64 (__b); } -__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) -vqtbl3q_u8 (uint8x16x3_t tab, uint8x16_t idx) -{ - uint8x16_t result; - __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t" - "tbl %0.16b, {v16.16b - v18.16b}, %2.16b\n\t" - :"=w"(result) - :"Q"(tab),"w"(idx) - :"memory", "v16", "v17", "v18"); - return result; +/* vcagt */ + +__extension__ static __inline uint32_t __attribute__ ((__always_inline__)) +vcagts_f32 (float32_t __a, float32_t __b) +{ + return __builtin_fabsf (__a) > __builtin_fabsf (__b) ? -1 : 0; } -__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) -vqtbl3q_p8 (poly8x16x3_t tab, uint8x16_t idx) +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vcagt_f32 (float32x2_t __a, float32x2_t __b) { - poly8x16_t result; - __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t" - "tbl %0.16b, {v16.16b - v18.16b}, %2.16b\n\t" - :"=w"(result) - :"Q"(tab),"w"(idx) - :"memory", "v16", "v17", "v18"); - return result; + return vabs_f32 (__a) > vabs_f32 (__b); } -__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) -vqtbl4_s8 (int8x16x4_t tab, int8x8_t idx) +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vcagtq_f32 (float32x4_t __a, float32x4_t __b) { - int8x8_t result; - __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t" - "tbl %0.8b, {v16.16b - v19.16b}, %2.8b\n\t" - :"=w"(result) - :"Q"(tab),"w"(idx) - :"memory", "v16", "v17", "v18", "v19"); - return result; + return vabsq_f32 (__a) > vabsq_f32 (__b); } -__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) -vqtbl4_u8 (uint8x16x4_t tab, uint8x8_t idx) +__extension__ static __inline uint64_t __attribute__ ((__always_inline__)) +vcagtd_f64 (float64_t __a, float64_t __b) { - uint8x8_t result; - __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t" - "tbl %0.8b, {v16.16b - v19.16b}, %2.8b\n\t" - :"=w"(result) - :"Q"(tab),"w"(idx) - :"memory", "v16", "v17", "v18", "v19"); - return result; + return __builtin_fabs (__a) > __builtin_fabs (__b) ? -1 : 0; } -__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) -vqtbl4_p8 (poly8x16x4_t tab, uint8x8_t idx) +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vcagtq_f64 (float64x2_t __a, float64x2_t __b) { - poly8x8_t result; - __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t" - "tbl %0.8b, {v16.16b - v19.16b}, %2.8b\n\t" - :"=w"(result) - :"Q"(tab),"w"(idx) - :"memory", "v16", "v17", "v18", "v19"); - return result; + return vabsq_f64 (__a) > vabsq_f64 (__b); } +/* vcale */ -__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) -vqtbl4q_s8 (int8x16x4_t tab, int8x16_t idx) +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vcale_f32 (float32x2_t __a, float32x2_t __b) { - int8x16_t result; - __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t" - "tbl %0.16b, {v16.16b - v19.16b}, %2.16b\n\t" - :"=w"(result) - :"Q"(tab),"w"(idx) - :"memory", "v16", "v17", "v18", "v19"); - return result; + return vabs_f32 (__a) <= vabs_f32 (__b); } -__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) -vqtbl4q_u8 (uint8x16x4_t tab, uint8x16_t idx) +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vcaleq_f32 (float32x4_t __a, float32x4_t __b) { - uint8x16_t result; - __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t" - "tbl %0.16b, {v16.16b - v19.16b}, %2.16b\n\t" - :"=w"(result) - :"Q"(tab),"w"(idx) - :"memory", "v16", "v17", "v18", "v19"); - return result; + return vabsq_f32 (__a) <= vabsq_f32 (__b); } -__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) -vqtbl4q_p8 (poly8x16x4_t tab, uint8x16_t idx) +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vcaleq_f64 (float64x2_t __a, float64x2_t __b) { - poly8x16_t result; - __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t" - "tbl %0.16b, {v16.16b - v19.16b}, %2.16b\n\t" - :"=w"(result) - :"Q"(tab),"w"(idx) - :"memory", "v16", "v17", "v18", "v19"); - return result; + return vabsq_f64 (__a) <= vabsq_f64 (__b); } +/* vcalt */ -__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) -vqtbx1_s8 (int8x8_t r, int8x16_t tab, int8x8_t idx) +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vcalt_f32 (float32x2_t __a, float32x2_t __b) { - int8x8_t result = r; - __asm__ ("tbx %0.8b,{%1.16b},%2.8b" - : "+w"(result) - : "w"(tab), "w"(idx) - : /* No clobbers */); - return result; + return vabs_f32 (__a) < vabs_f32 (__b); } -__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) -vqtbx1_u8 (uint8x8_t r, uint8x16_t tab, uint8x8_t idx) +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vcaltq_f32 (float32x4_t __a, float32x4_t __b) { - uint8x8_t result = r; - __asm__ ("tbx %0.8b,{%1.16b},%2.8b" - : "+w"(result) - : "w"(tab), "w"(idx) - : /* No clobbers */); - return result; + return vabsq_f32 (__a) < vabsq_f32 (__b); } -__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) -vqtbx1_p8 (poly8x8_t r, poly8x16_t tab, uint8x8_t idx) +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vcaltq_f64 (float64x2_t __a, float64x2_t __b) { - poly8x8_t result = r; - __asm__ ("tbx %0.8b,{%1.16b},%2.8b" - : "+w"(result) - : "w"(tab), "w"(idx) - : /* No clobbers */); - return result; + return vabsq_f64 (__a) < vabsq_f64 (__b); } -__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) -vqtbx1q_s8 (int8x16_t r, int8x16_t tab, int8x16_t idx) -{ - int8x16_t result = r; - __asm__ ("tbx %0.16b,{%1.16b},%2.16b" - : "+w"(result) - : "w"(tab), "w"(idx) - : /* No clobbers */); - return result; -} +/* vceq - vector. */ -__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) -vqtbx1q_u8 (uint8x16_t r, uint8x16_t tab, uint8x16_t idx) +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vceq_f32 (float32x2_t __a, float32x2_t __b) { - uint8x16_t result = r; - __asm__ ("tbx %0.16b,{%1.16b},%2.16b" - : "+w"(result) - : "w"(tab), "w"(idx) - : /* No clobbers */); - return result; + return (uint32x2_t) __builtin_aarch64_cmeqv2sf (__a, __b); } -__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) -vqtbx1q_p8 (poly8x16_t r, poly8x16_t tab, uint8x16_t idx) +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vceq_f64 (float64x1_t __a, float64x1_t __b) { - poly8x16_t result = r; - __asm__ ("tbx %0.16b,{%1.16b},%2.16b" - : "+w"(result) - : "w"(tab), "w"(idx) - : /* No clobbers */); - return result; + return __a == __b ? -1ll : 0ll; } -__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) -vqtbx2_s8 (int8x8_t r, int8x16x2_t tab, int8x8_t idx) +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vceq_p8 (poly8x8_t __a, poly8x8_t __b) { - int8x8_t result = r; - __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t" - "tbx %0.8b, {v16.16b, v17.16b}, %2.8b\n\t" - :"+w"(result) - :"Q"(tab),"w"(idx) - :"memory", "v16", "v17"); - return result; + return (uint8x8_t) __builtin_aarch64_cmeqv8qi ((int8x8_t) __a, + (int8x8_t) __b); } __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) -vqtbx2_u8 (uint8x8_t r, uint8x16x2_t tab, uint8x8_t idx) +vceq_s8 (int8x8_t __a, int8x8_t __b) { - uint8x8_t result = r; - __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t" - "tbx %0.8b, {v16.16b, v17.16b}, %2.8b\n\t" - :"+w"(result) - :"Q"(tab),"w"(idx) - :"memory", "v16", "v17"); - return result; + return (uint8x8_t) __builtin_aarch64_cmeqv8qi (__a, __b); } -__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) -vqtbx2_p8 (poly8x8_t r, poly8x16x2_t tab, uint8x8_t idx) +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vceq_s16 (int16x4_t __a, int16x4_t __b) { - poly8x8_t result = r; - __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t" - "tbx %0.8b, {v16.16b, v17.16b}, %2.8b\n\t" - :"+w"(result) - :"Q"(tab),"w"(idx) - :"memory", "v16", "v17"); - return result; + return (uint16x4_t) __builtin_aarch64_cmeqv4hi (__a, __b); } +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vceq_s32 (int32x2_t __a, int32x2_t __b) +{ + return (uint32x2_t) __builtin_aarch64_cmeqv2si (__a, __b); +} -__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) -vqtbx2q_s8 (int8x16_t r, int8x16x2_t tab, int8x16_t idx) +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vceq_s64 (int64x1_t __a, int64x1_t __b) { - int8x16_t result = r; - __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t" - "tbx %0.16b, {v16.16b, v17.16b}, %2.16b\n\t" - :"+w"(result) - :"Q"(tab),"w"(idx) - :"memory", "v16", "v17"); - return result; + return __a == __b ? -1ll : 0ll; } -__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) -vqtbx2q_u8 (uint8x16_t r, uint8x16x2_t tab, uint8x16_t idx) +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vceq_u8 (uint8x8_t __a, uint8x8_t __b) { - uint8x16_t result = r; - __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t" - "tbx %0.16b, {v16.16b, v17.16b}, %2.16b\n\t" - :"+w"(result) - :"Q"(tab),"w"(idx) - :"memory", "v16", "v17"); - return result; + return (uint8x8_t) __builtin_aarch64_cmeqv8qi ((int8x8_t) __a, + (int8x8_t) __b); } -__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) -vqtbx2q_p8 (poly8x16_t r, poly8x16x2_t tab, uint8x16_t idx) +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vceq_u16 (uint16x4_t __a, uint16x4_t __b) { - poly8x16_t result = r; - __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t" - "tbx %0.16b, {v16.16b, v17.16b}, %2.16b\n\t" - :"+w"(result) - :"Q"(tab),"w"(idx) - :"memory", "v16", "v17"); - return result; + return (uint16x4_t) __builtin_aarch64_cmeqv4hi ((int16x4_t) __a, + (int16x4_t) __b); } +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vceq_u32 (uint32x2_t __a, uint32x2_t __b) +{ + return (uint32x2_t) __builtin_aarch64_cmeqv2si ((int32x2_t) __a, + (int32x2_t) __b); +} -__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) -vqtbx3_s8 (int8x8_t r, int8x16x3_t tab, int8x8_t idx) +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vceq_u64 (uint64x1_t __a, uint64x1_t __b) { - int8x8_t result = r; - __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t" - "tbx %0.8b, {v16.16b - v18.16b}, %2.8b\n\t" - :"+w"(result) - :"Q"(tab),"w"(idx) - :"memory", "v16", "v17", "v18"); - return result; + return __a == __b ? -1ll : 0ll; } -__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) -vqtbx3_u8 (uint8x8_t r, uint8x16x3_t tab, uint8x8_t idx) +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vceqq_f32 (float32x4_t __a, float32x4_t __b) { - uint8x8_t result = r; - __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t" - "tbx %0.8b, {v16.16b - v18.16b}, %2.8b\n\t" - :"+w"(result) - :"Q"(tab),"w"(idx) - :"memory", "v16", "v17", "v18"); - return result; + return (uint32x4_t) __builtin_aarch64_cmeqv4sf (__a, __b); } -__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) -vqtbx3_p8 (poly8x8_t r, poly8x16x3_t tab, uint8x8_t idx) +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vceqq_f64 (float64x2_t __a, float64x2_t __b) { - poly8x8_t result = r; - __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t" - "tbx %0.8b, {v16.16b - v18.16b}, %2.8b\n\t" - :"+w"(result) - :"Q"(tab),"w"(idx) - :"memory", "v16", "v17", "v18"); - return result; + return (uint64x2_t) __builtin_aarch64_cmeqv2df (__a, __b); } - -__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) -vqtbx3q_s8 (int8x16_t r, int8x16x3_t tab, int8x16_t idx) +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vceqq_p8 (poly8x16_t __a, poly8x16_t __b) { - int8x16_t result = r; - __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t" - "tbx %0.16b, {v16.16b - v18.16b}, %2.16b\n\t" - :"+w"(result) - :"Q"(tab),"w"(idx) - :"memory", "v16", "v17", "v18"); - return result; + return (uint8x16_t) __builtin_aarch64_cmeqv16qi ((int8x16_t) __a, + (int8x16_t) __b); } __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) -vqtbx3q_u8 (uint8x16_t r, uint8x16x3_t tab, uint8x16_t idx) +vceqq_s8 (int8x16_t __a, int8x16_t __b) { - uint8x16_t result = r; - __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t" - "tbx %0.16b, {v16.16b - v18.16b}, %2.16b\n\t" - :"+w"(result) - :"Q"(tab),"w"(idx) - :"memory", "v16", "v17", "v18"); - return result; + return (uint8x16_t) __builtin_aarch64_cmeqv16qi (__a, __b); } -__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) -vqtbx3q_p8 (poly8x16_t r, poly8x16x3_t tab, uint8x16_t idx) +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vceqq_s16 (int16x8_t __a, int16x8_t __b) { - poly8x16_t result = r; - __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t" - "tbx %0.16b, {v16.16b - v18.16b}, %2.16b\n\t" - :"+w"(result) - :"Q"(tab),"w"(idx) - :"memory", "v16", "v17", "v18"); - return result; + return (uint16x8_t) __builtin_aarch64_cmeqv8hi (__a, __b); } +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vceqq_s32 (int32x4_t __a, int32x4_t __b) +{ + return (uint32x4_t) __builtin_aarch64_cmeqv4si (__a, __b); +} -__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) -vqtbx4_s8 (int8x8_t r, int8x16x4_t tab, int8x8_t idx) +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vceqq_s64 (int64x2_t __a, int64x2_t __b) { - int8x8_t result = r; - __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t" - "tbx %0.8b, {v16.16b - v19.16b}, %2.8b\n\t" - :"+w"(result) - :"Q"(tab),"w"(idx) - :"memory", "v16", "v17", "v18", "v19"); - return result; + return (uint64x2_t) __builtin_aarch64_cmeqv2di (__a, __b); } -__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) -vqtbx4_u8 (uint8x8_t r, uint8x16x4_t tab, uint8x8_t idx) +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vceqq_u8 (uint8x16_t __a, uint8x16_t __b) { - uint8x8_t result = r; - __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t" - "tbx %0.8b, {v16.16b - v19.16b}, %2.8b\n\t" - :"+w"(result) - :"Q"(tab),"w"(idx) - :"memory", "v16", "v17", "v18", "v19"); - return result; + return (uint8x16_t) __builtin_aarch64_cmeqv16qi ((int8x16_t) __a, + (int8x16_t) __b); } -__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) -vqtbx4_p8 (poly8x8_t r, poly8x16x4_t tab, uint8x8_t idx) +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vceqq_u16 (uint16x8_t __a, uint16x8_t __b) { - poly8x8_t result = r; - __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t" - "tbx %0.8b, {v16.16b - v19.16b}, %2.8b\n\t" - :"+w"(result) - :"Q"(tab),"w"(idx) - :"memory", "v16", "v17", "v18", "v19"); - return result; + return (uint16x8_t) __builtin_aarch64_cmeqv8hi ((int16x8_t) __a, + (int16x8_t) __b); } +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vceqq_u32 (uint32x4_t __a, uint32x4_t __b) +{ + return (uint32x4_t) __builtin_aarch64_cmeqv4si ((int32x4_t) __a, + (int32x4_t) __b); +} -__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) -vqtbx4q_s8 (int8x16_t r, int8x16x4_t tab, int8x16_t idx) +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vceqq_u64 (uint64x2_t __a, uint64x2_t __b) { - int8x16_t result = r; - __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t" - "tbx %0.16b, {v16.16b - v19.16b}, %2.16b\n\t" - :"+w"(result) - :"Q"(tab),"w"(idx) - :"memory", "v16", "v17", "v18", "v19"); - return result; + return (uint64x2_t) __builtin_aarch64_cmeqv2di ((int64x2_t) __a, + (int64x2_t) __b); } -__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) -vqtbx4q_u8 (uint8x16_t r, uint8x16x4_t tab, uint8x16_t idx) +/* vceq - scalar. */ + +__extension__ static __inline uint32_t __attribute__ ((__always_inline__)) +vceqs_f32 (float32_t __a, float32_t __b) { - uint8x16_t result = r; - __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t" - "tbx %0.16b, {v16.16b - v19.16b}, %2.16b\n\t" - :"+w"(result) - :"Q"(tab),"w"(idx) - :"memory", "v16", "v17", "v18", "v19"); - return result; + return __a == __b ? -1 : 0; } -__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) -vqtbx4q_p8 (poly8x16_t r, poly8x16x4_t tab, uint8x16_t idx) +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vceqd_s64 (int64x1_t __a, int64x1_t __b) { - poly8x16_t result = r; - __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t" - "tbx %0.16b, {v16.16b - v19.16b}, %2.16b\n\t" - :"+w"(result) - :"Q"(tab),"w"(idx) - :"memory", "v16", "v17", "v18", "v19"); - return result; + return __a == __b ? -1ll : 0ll; } -/* V7 legacy table intrinsics. */ +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vceqd_u64 (uint64x1_t __a, uint64x1_t __b) +{ + return __a == __b ? -1ll : 0ll; +} -__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) -vtbl1_s8 (int8x8_t tab, int8x8_t idx) +__extension__ static __inline uint64_t __attribute__ ((__always_inline__)) +vceqd_f64 (float64_t __a, float64_t __b) { - int8x8_t result; - int8x16_t temp = vcombine_s8 (tab, vcreate_s8 (UINT64_C (0x0))); - __asm__ ("tbl %0.8b, {%1.16b}, %2.8b" - : "=w"(result) - : "w"(temp), "w"(idx) - : /* No clobbers */); - return result; + return __a == __b ? -1ll : 0ll; } -__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) -vtbl1_u8 (uint8x8_t tab, uint8x8_t idx) +/* vceqz - vector. */ + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vceqz_f32 (float32x2_t __a) { - uint8x8_t result; - uint8x16_t temp = vcombine_u8 (tab, vcreate_u8 (UINT64_C (0x0))); - __asm__ ("tbl %0.8b, {%1.16b}, %2.8b" - : "=w"(result) - : "w"(temp), "w"(idx) - : /* No clobbers */); - return result; + float32x2_t __b = {0.0f, 0.0f}; + return (uint32x2_t) __builtin_aarch64_cmeqv2sf (__a, __b); } -__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) -vtbl1_p8 (poly8x8_t tab, uint8x8_t idx) +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vceqz_f64 (float64x1_t __a) { - poly8x8_t result; - poly8x16_t temp = vcombine_p8 (tab, vcreate_p8 (UINT64_C (0x0))); - __asm__ ("tbl %0.8b, {%1.16b}, %2.8b" - : "=w"(result) - : "w"(temp), "w"(idx) - : /* No clobbers */); - return result; + return __a == 0.0 ? -1ll : 0ll; } -__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) -vtbl2_s8 (int8x8x2_t tab, int8x8_t idx) +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vceqz_p8 (poly8x8_t __a) { - int8x8_t result; - int8x16_t temp = vcombine_s8 (tab.val[0], tab.val[1]); - __asm__ ("tbl %0.8b, {%1.16b}, %2.8b" - : "=w"(result) - : "w"(temp), "w"(idx) - : /* No clobbers */); - return result; + poly8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0}; + return (uint8x8_t) __builtin_aarch64_cmeqv8qi ((int8x8_t) __a, + (int8x8_t) __b); } __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) -vtbl2_u8 (uint8x8x2_t tab, uint8x8_t idx) +vceqz_s8 (int8x8_t __a) { - uint8x8_t result; - uint8x16_t temp = vcombine_u8 (tab.val[0], tab.val[1]); - __asm__ ("tbl %0.8b, {%1.16b}, %2.8b" - : "=w"(result) - : "w"(temp), "w"(idx) - : /* No clobbers */); - return result; + int8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0}; + return (uint8x8_t) __builtin_aarch64_cmeqv8qi (__a, __b); } -__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) -vtbl2_p8 (poly8x8x2_t tab, uint8x8_t idx) +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vceqz_s16 (int16x4_t __a) { - poly8x8_t result; - poly8x16_t temp = vcombine_p8 (tab.val[0], tab.val[1]); - __asm__ ("tbl %0.8b, {%1.16b}, %2.8b" - : "=w"(result) - : "w"(temp), "w"(idx) - : /* No clobbers */); - return result; + int16x4_t __b = {0, 0, 0, 0}; + return (uint16x4_t) __builtin_aarch64_cmeqv4hi (__a, __b); } -__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) -vtbl3_s8 (int8x8x3_t tab, int8x8_t idx) +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vceqz_s32 (int32x2_t __a) { - int8x8_t result; - int8x16x2_t temp; - temp.val[0] = vcombine_s8 (tab.val[0], tab.val[1]); - temp.val[1] = vcombine_s8 (tab.val[2], vcreate_s8 (UINT64_C (0x0))); - __asm__ ("ld1 {v16.16b - v17.16b }, %1\n\t" - "tbl %0.8b, {v16.16b - v17.16b}, %2.8b\n\t" - : "=w"(result) - : "Q"(temp), "w"(idx) - : "v16", "v17", "memory"); - return result; + int32x2_t __b = {0, 0}; + return (uint32x2_t) __builtin_aarch64_cmeqv2si (__a, __b); } -__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) -vtbl3_u8 (uint8x8x3_t tab, uint8x8_t idx) +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vceqz_s64 (int64x1_t __a) { - uint8x8_t result; - uint8x16x2_t temp; - temp.val[0] = vcombine_u8 (tab.val[0], tab.val[1]); - temp.val[1] = vcombine_u8 (tab.val[2], vcreate_u8 (UINT64_C (0x0))); - __asm__ ("ld1 {v16.16b - v17.16b }, %1\n\t" - "tbl %0.8b, {v16.16b - v17.16b}, %2.8b\n\t" - : "=w"(result) - : "Q"(temp), "w"(idx) - : "v16", "v17", "memory"); - return result; + return __a == 0ll ? -1ll : 0ll; } -__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) -vtbl3_p8 (poly8x8x3_t tab, uint8x8_t idx) +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vceqz_u8 (uint8x8_t __a) { - poly8x8_t result; - poly8x16x2_t temp; - temp.val[0] = vcombine_p8 (tab.val[0], tab.val[1]); - temp.val[1] = vcombine_p8 (tab.val[2], vcreate_p8 (UINT64_C (0x0))); - __asm__ ("ld1 {v16.16b - v17.16b }, %1\n\t" - "tbl %0.8b, {v16.16b - v17.16b}, %2.8b\n\t" - : "=w"(result) - : "Q"(temp), "w"(idx) - : "v16", "v17", "memory"); - return result; + uint8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0}; + return (uint8x8_t) __builtin_aarch64_cmeqv8qi ((int8x8_t) __a, + (int8x8_t) __b); } -__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) -vtbl4_s8 (int8x8x4_t tab, int8x8_t idx) +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vceqz_u16 (uint16x4_t __a) { - int8x8_t result; - int8x16x2_t temp; - temp.val[0] = vcombine_s8 (tab.val[0], tab.val[1]); - temp.val[1] = vcombine_s8 (tab.val[2], tab.val[3]); - __asm__ ("ld1 {v16.16b - v17.16b }, %1\n\t" - "tbl %0.8b, {v16.16b - v17.16b}, %2.8b\n\t" - : "=w"(result) - : "Q"(temp), "w"(idx) - : "v16", "v17", "memory"); - return result; + uint16x4_t __b = {0, 0, 0, 0}; + return (uint16x4_t) __builtin_aarch64_cmeqv4hi ((int16x4_t) __a, + (int16x4_t) __b); } -__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) -vtbl4_u8 (uint8x8x4_t tab, uint8x8_t idx) +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vceqz_u32 (uint32x2_t __a) { - uint8x8_t result; - uint8x16x2_t temp; - temp.val[0] = vcombine_u8 (tab.val[0], tab.val[1]); - temp.val[1] = vcombine_u8 (tab.val[2], tab.val[3]); - __asm__ ("ld1 {v16.16b - v17.16b }, %1\n\t" - "tbl %0.8b, {v16.16b - v17.16b}, %2.8b\n\t" - : "=w"(result) - : "Q"(temp), "w"(idx) - : "v16", "v17", "memory"); - return result; + uint32x2_t __b = {0, 0}; + return (uint32x2_t) __builtin_aarch64_cmeqv2si ((int32x2_t) __a, + (int32x2_t) __b); } -__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) -vtbl4_p8 (poly8x8x4_t tab, uint8x8_t idx) +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vceqz_u64 (uint64x1_t __a) { - poly8x8_t result; - poly8x16x2_t temp; - temp.val[0] = vcombine_p8 (tab.val[0], tab.val[1]); - temp.val[1] = vcombine_p8 (tab.val[2], tab.val[3]); - __asm__ ("ld1 {v16.16b - v17.16b }, %1\n\t" - "tbl %0.8b, {v16.16b - v17.16b}, %2.8b\n\t" - : "=w"(result) - : "Q"(temp), "w"(idx) - : "v16", "v17", "memory"); - return result; + return __a == 0ll ? -1ll : 0ll; } -__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) -vtbx1_s8 (int8x8_t r, int8x8_t tab, int8x8_t idx) +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vceqzq_f32 (float32x4_t __a) { - int8x8_t result; - int8x8_t tmp1; - int8x16_t temp = vcombine_s8 (tab, vcreate_s8 (UINT64_C (0x0))); - __asm__ ("movi %0.8b, 8\n\t" - "cmhs %0.8b, %3.8b, %0.8b\n\t" - "tbl %1.8b, {%2.16b}, %3.8b\n\t" - "bsl %0.8b, %4.8b, %1.8b\n\t" - : "+w"(result), "=w"(tmp1) - : "w"(temp), "w"(idx), "w"(r) - : /* No clobbers */); - return result; + float32x4_t __b = {0.0f, 0.0f, 0.0f, 0.0f}; + return (uint32x4_t) __builtin_aarch64_cmeqv4sf (__a, __b); } -__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) -vtbx1_u8 (uint8x8_t r, uint8x8_t tab, uint8x8_t idx) +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vceqzq_f64 (float64x2_t __a) { - uint8x8_t result; - uint8x8_t tmp1; - uint8x16_t temp = vcombine_u8 (tab, vcreate_u8 (UINT64_C (0x0))); - __asm__ ("movi %0.8b, 8\n\t" - "cmhs %0.8b, %3.8b, %0.8b\n\t" - "tbl %1.8b, {%2.16b}, %3.8b\n\t" - "bsl %0.8b, %4.8b, %1.8b\n\t" - : "+w"(result), "=w"(tmp1) - : "w"(temp), "w"(idx), "w"(r) - : /* No clobbers */); - return result; + float64x2_t __b = {0.0, 0.0}; + return (uint64x2_t) __builtin_aarch64_cmeqv2df (__a, __b); } -__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) -vtbx1_p8 (poly8x8_t r, poly8x8_t tab, uint8x8_t idx) +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vceqzq_p8 (poly8x16_t __a) { - poly8x8_t result; - poly8x8_t tmp1; - poly8x16_t temp = vcombine_p8 (tab, vcreate_p8 (UINT64_C (0x0))); - __asm__ ("movi %0.8b, 8\n\t" - "cmhs %0.8b, %3.8b, %0.8b\n\t" - "tbl %1.8b, {%2.16b}, %3.8b\n\t" - "bsl %0.8b, %4.8b, %1.8b\n\t" - : "+w"(result), "=w"(tmp1) - : "w"(temp), "w"(idx), "w"(r) - : /* No clobbers */); - return result; + poly8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0}; + return (uint8x16_t) __builtin_aarch64_cmeqv16qi ((int8x16_t) __a, + (int8x16_t) __b); } -__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) -vtbx2_s8 (int8x8_t r, int8x8x2_t tab, int8x8_t idx) +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vceqzq_s8 (int8x16_t __a) { - int8x8_t result = r; - int8x16_t temp = vcombine_s8 (tab.val[0], tab.val[1]); - __asm__ ("tbx %0.8b, {%1.16b}, %2.8b" - : "+w"(result) - : "w"(temp), "w"(idx) - : /* No clobbers */); - return result; + int8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0}; + return (uint8x16_t) __builtin_aarch64_cmeqv16qi (__a, __b); } -__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) -vtbx2_u8 (uint8x8_t r, uint8x8x2_t tab, uint8x8_t idx) +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vceqzq_s16 (int16x8_t __a) { - uint8x8_t result = r; - uint8x16_t temp = vcombine_u8 (tab.val[0], tab.val[1]); - __asm__ ("tbx %0.8b, {%1.16b}, %2.8b" - : "+w"(result) - : "w"(temp), "w"(idx) - : /* No clobbers */); - return result; + int16x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0}; + return (uint16x8_t) __builtin_aarch64_cmeqv8hi (__a, __b); } -__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) -vtbx2_p8 (poly8x8_t r, poly8x8x2_t tab, uint8x8_t idx) +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vceqzq_s32 (int32x4_t __a) { - poly8x8_t result = r; - poly8x16_t temp = vcombine_p8 (tab.val[0], tab.val[1]); - __asm__ ("tbx %0.8b, {%1.16b}, %2.8b" - : "+w"(result) - : "w"(temp), "w"(idx) - : /* No clobbers */); - return result; + int32x4_t __b = {0, 0, 0, 0}; + return (uint32x4_t) __builtin_aarch64_cmeqv4si (__a, __b); } -__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) -vtbx3_s8 (int8x8_t r, int8x8x3_t tab, int8x8_t idx) +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vceqzq_s64 (int64x2_t __a) { - int8x8_t result; - int8x8_t tmp1; - int8x16x2_t temp; - temp.val[0] = vcombine_s8 (tab.val[0], tab.val[1]); - temp.val[1] = vcombine_s8 (tab.val[2], vcreate_s8 (UINT64_C (0x0))); - __asm__ ("ld1 {v16.16b - v17.16b}, %2\n\t" - "movi %0.8b, 24\n\t" - "cmhs %0.8b, %3.8b, %0.8b\n\t" - "tbl %1.8b, {v16.16b - v17.16b}, %3.8b\n\t" - "bsl %0.8b, %4.8b, %1.8b\n\t" - : "+w"(result), "=w"(tmp1) - : "Q"(temp), "w"(idx), "w"(r) - : "v16", "v17", "memory"); - return result; + int64x2_t __b = {0, 0}; + return (uint64x2_t) __builtin_aarch64_cmeqv2di (__a, __b); } -__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) -vtbx3_u8 (uint8x8_t r, uint8x8x3_t tab, uint8x8_t idx) +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vceqzq_u8 (uint8x16_t __a) { - uint8x8_t result; - uint8x8_t tmp1; - uint8x16x2_t temp; - temp.val[0] = vcombine_u8 (tab.val[0], tab.val[1]); - temp.val[1] = vcombine_u8 (tab.val[2], vcreate_u8 (UINT64_C (0x0))); - __asm__ ("ld1 {v16.16b - v17.16b}, %2\n\t" - "movi %0.8b, 24\n\t" - "cmhs %0.8b, %3.8b, %0.8b\n\t" - "tbl %1.8b, {v16.16b - v17.16b}, %3.8b\n\t" - "bsl %0.8b, %4.8b, %1.8b\n\t" - : "+w"(result), "=w"(tmp1) - : "Q"(temp), "w"(idx), "w"(r) - : "v16", "v17", "memory"); - return result; + uint8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0}; + return (uint8x16_t) __builtin_aarch64_cmeqv16qi ((int8x16_t) __a, + (int8x16_t) __b); } -__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) -vtbx3_p8 (poly8x8_t r, poly8x8x3_t tab, uint8x8_t idx) +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vceqzq_u16 (uint16x8_t __a) { - poly8x8_t result; - poly8x8_t tmp1; - poly8x16x2_t temp; - temp.val[0] = vcombine_p8 (tab.val[0], tab.val[1]); - temp.val[1] = vcombine_p8 (tab.val[2], vcreate_p8 (UINT64_C (0x0))); - __asm__ ("ld1 {v16.16b - v17.16b}, %2\n\t" - "movi %0.8b, 24\n\t" - "cmhs %0.8b, %3.8b, %0.8b\n\t" - "tbl %1.8b, {v16.16b - v17.16b}, %3.8b\n\t" - "bsl %0.8b, %4.8b, %1.8b\n\t" - : "+w"(result), "=w"(tmp1) - : "Q"(temp), "w"(idx), "w"(r) - : "v16", "v17", "memory"); - return result; + uint16x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0}; + return (uint16x8_t) __builtin_aarch64_cmeqv8hi ((int16x8_t) __a, + (int16x8_t) __b); } -__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) -vtbx4_s8 (int8x8_t r, int8x8x4_t tab, int8x8_t idx) +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vceqzq_u32 (uint32x4_t __a) { - int8x8_t result = r; - int8x16x2_t temp; - temp.val[0] = vcombine_s8 (tab.val[0], tab.val[1]); - temp.val[1] = vcombine_s8 (tab.val[2], tab.val[3]); - __asm__ ("ld1 {v16.16b - v17.16b }, %1\n\t" - "tbx %0.8b, {v16.16b - v17.16b}, %2.8b\n\t" - : "+w"(result) - : "Q"(temp), "w"(idx) - : "v16", "v17", "memory"); - return result; + uint32x4_t __b = {0, 0, 0, 0}; + return (uint32x4_t) __builtin_aarch64_cmeqv4si ((int32x4_t) __a, + (int32x4_t) __b); } -__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) -vtbx4_u8 (uint8x8_t r, uint8x8x4_t tab, uint8x8_t idx) +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vceqzq_u64 (uint64x2_t __a) { - uint8x8_t result = r; - uint8x16x2_t temp; - temp.val[0] = vcombine_u8 (tab.val[0], tab.val[1]); - temp.val[1] = vcombine_u8 (tab.val[2], tab.val[3]); - __asm__ ("ld1 {v16.16b - v17.16b }, %1\n\t" - "tbx %0.8b, {v16.16b - v17.16b}, %2.8b\n\t" - : "+w"(result) - : "Q"(temp), "w"(idx) - : "v16", "v17", "memory"); - return result; + uint64x2_t __b = {0, 0}; + return (uint64x2_t) __builtin_aarch64_cmeqv2di ((int64x2_t) __a, + (int64x2_t) __b); } -__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) -vtbx4_p8 (poly8x8_t r, poly8x8x4_t tab, uint8x8_t idx) +/* vceqz - scalar. */ + +__extension__ static __inline uint32_t __attribute__ ((__always_inline__)) +vceqzs_f32 (float32_t __a) { - poly8x8_t result = r; - poly8x16x2_t temp; - temp.val[0] = vcombine_p8 (tab.val[0], tab.val[1]); - temp.val[1] = vcombine_p8 (tab.val[2], tab.val[3]); - __asm__ ("ld1 {v16.16b - v17.16b }, %1\n\t" - "tbx %0.8b, {v16.16b - v17.16b}, %2.8b\n\t" - : "+w"(result) - : "Q"(temp), "w"(idx) - : "v16", "v17", "memory"); - return result; + return __a == 0.0f ? -1 : 0; +} + +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vceqzd_s64 (int64x1_t __a) +{ + return __a == 0 ? -1ll : 0ll; } -/* End of temporary inline asm. */ +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vceqzd_u64 (int64x1_t __a) +{ + return __a == 0 ? -1ll : 0ll; +} -/* Start of optimal implementations in approved order. */ +__extension__ static __inline uint64_t __attribute__ ((__always_inline__)) +vceqzd_f64 (float64_t __a) +{ + return __a == 0.0 ? -1ll : 0ll; +} -/* vabs */ +/* vcge - vector. */ -__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) -vabs_f32 (float32x2_t __a) +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vcge_f32 (float32x2_t __a, float32x2_t __b) { - return __builtin_aarch64_absv2sf (__a); + return (uint32x2_t) __builtin_aarch64_cmgev2sf (__a, __b); } -__extension__ static __inline float64x1_t __attribute__ ((__always_inline__)) -vabs_f64 (float64x1_t __a) +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vcge_f64 (float64x1_t __a, float64x1_t __b) { - return __builtin_fabs (__a); + return __a >= __b ? -1ll : 0ll; } -__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) -vabs_s8 (int8x8_t __a) +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vcge_p8 (poly8x8_t __a, poly8x8_t __b) { - return __builtin_aarch64_absv8qi (__a); + return (uint8x8_t) __builtin_aarch64_cmgev8qi ((int8x8_t) __a, + (int8x8_t) __b); } -__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) -vabs_s16 (int16x4_t __a) +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vcge_s8 (int8x8_t __a, int8x8_t __b) { - return __builtin_aarch64_absv4hi (__a); + return (uint8x8_t) __builtin_aarch64_cmgev8qi (__a, __b); } -__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) -vabs_s32 (int32x2_t __a) +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vcge_s16 (int16x4_t __a, int16x4_t __b) { - return __builtin_aarch64_absv2si (__a); + return (uint16x4_t) __builtin_aarch64_cmgev4hi (__a, __b); } -__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) -vabs_s64 (int64x1_t __a) +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vcge_s32 (int32x2_t __a, int32x2_t __b) { - return __builtin_llabs (__a); + return (uint32x2_t) __builtin_aarch64_cmgev2si (__a, __b); } -__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) -vabsq_f32 (float32x4_t __a) +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vcge_s64 (int64x1_t __a, int64x1_t __b) { - return __builtin_aarch64_absv4sf (__a); + return __a >= __b ? -1ll : 0ll; } -__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) -vabsq_f64 (float64x2_t __a) +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vcge_u8 (uint8x8_t __a, uint8x8_t __b) { - return __builtin_aarch64_absv2df (__a); + return (uint8x8_t) __builtin_aarch64_cmgeuv8qi ((int8x8_t) __a, + (int8x8_t) __b); } -__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) -vabsq_s8 (int8x16_t __a) +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vcge_u16 (uint16x4_t __a, uint16x4_t __b) { - return __builtin_aarch64_absv16qi (__a); + return (uint16x4_t) __builtin_aarch64_cmgeuv4hi ((int16x4_t) __a, + (int16x4_t) __b); } -__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) -vabsq_s16 (int16x8_t __a) +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vcge_u32 (uint32x2_t __a, uint32x2_t __b) { - return __builtin_aarch64_absv8hi (__a); + return (uint32x2_t) __builtin_aarch64_cmgeuv2si ((int32x2_t) __a, + (int32x2_t) __b); } -__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) -vabsq_s32 (int32x4_t __a) +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vcge_u64 (uint64x1_t __a, uint64x1_t __b) { - return __builtin_aarch64_absv4si (__a); + return __a >= __b ? -1ll : 0ll; } -__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) -vabsq_s64 (int64x2_t __a) +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vcgeq_f32 (float32x4_t __a, float32x4_t __b) { - return __builtin_aarch64_absv2di (__a); + return (uint32x4_t) __builtin_aarch64_cmgev4sf (__a, __b); } -/* vadd */ +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vcgeq_f64 (float64x2_t __a, float64x2_t __b) +{ + return (uint64x2_t) __builtin_aarch64_cmgev2df (__a, __b); +} -__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) -vaddd_s64 (int64x1_t __a, int64x1_t __b) +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vcgeq_p8 (poly8x16_t __a, poly8x16_t __b) { - return __a + __b; + return (uint8x16_t) __builtin_aarch64_cmgev16qi ((int8x16_t) __a, + (int8x16_t) __b); } -__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) -vaddd_u64 (uint64x1_t __a, uint64x1_t __b) +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vcgeq_s8 (int8x16_t __a, int8x16_t __b) { - return __a + __b; + return (uint8x16_t) __builtin_aarch64_cmgev16qi (__a, __b); } -/* vaddv */ +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vcgeq_s16 (int16x8_t __a, int16x8_t __b) +{ + return (uint16x8_t) __builtin_aarch64_cmgev8hi (__a, __b); +} -__extension__ static __inline int8_t __attribute__ ((__always_inline__)) -vaddv_s8 (int8x8_t __a) +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vcgeq_s32 (int32x4_t __a, int32x4_t __b) { - return vget_lane_s8 (__builtin_aarch64_reduc_splus_v8qi (__a), 0); + return (uint32x4_t) __builtin_aarch64_cmgev4si (__a, __b); } -__extension__ static __inline int16_t __attribute__ ((__always_inline__)) -vaddv_s16 (int16x4_t __a) +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vcgeq_s64 (int64x2_t __a, int64x2_t __b) { - return vget_lane_s16 (__builtin_aarch64_reduc_splus_v4hi (__a), 0); + return (uint64x2_t) __builtin_aarch64_cmgev2di (__a, __b); } -__extension__ static __inline int32_t __attribute__ ((__always_inline__)) -vaddv_s32 (int32x2_t __a) +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vcgeq_u8 (uint8x16_t __a, uint8x16_t __b) { - return vget_lane_s32 (__builtin_aarch64_reduc_splus_v2si (__a), 0); + return (uint8x16_t) __builtin_aarch64_cmgeuv16qi ((int8x16_t) __a, + (int8x16_t) __b); } -__extension__ static __inline uint8_t __attribute__ ((__always_inline__)) -vaddv_u8 (uint8x8_t __a) +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vcgeq_u16 (uint16x8_t __a, uint16x8_t __b) { - return vget_lane_u8 ((uint8x8_t) - __builtin_aarch64_reduc_uplus_v8qi ((int8x8_t) __a), 0); + return (uint16x8_t) __builtin_aarch64_cmgeuv8hi ((int16x8_t) __a, + (int16x8_t) __b); } -__extension__ static __inline uint16_t __attribute__ ((__always_inline__)) -vaddv_u16 (uint16x4_t __a) +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vcgeq_u32 (uint32x4_t __a, uint32x4_t __b) { - return vget_lane_u16 ((uint16x4_t) - __builtin_aarch64_reduc_uplus_v4hi ((int16x4_t) __a), 0); + return (uint32x4_t) __builtin_aarch64_cmgeuv4si ((int32x4_t) __a, + (int32x4_t) __b); } -__extension__ static __inline uint32_t __attribute__ ((__always_inline__)) -vaddv_u32 (uint32x2_t __a) +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vcgeq_u64 (uint64x2_t __a, uint64x2_t __b) { - return vget_lane_u32 ((uint32x2_t) - __builtin_aarch64_reduc_uplus_v2si ((int32x2_t) __a), 0); + return (uint64x2_t) __builtin_aarch64_cmgeuv2di ((int64x2_t) __a, + (int64x2_t) __b); } -__extension__ static __inline int8_t __attribute__ ((__always_inline__)) -vaddvq_s8 (int8x16_t __a) +/* vcge - scalar. */ + +__extension__ static __inline uint32_t __attribute__ ((__always_inline__)) +vcges_f32 (float32_t __a, float32_t __b) { - return vgetq_lane_s8 (__builtin_aarch64_reduc_splus_v16qi (__a), 0); + return __a >= __b ? -1 : 0; } -__extension__ static __inline int16_t __attribute__ ((__always_inline__)) -vaddvq_s16 (int16x8_t __a) +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vcged_s64 (int64x1_t __a, int64x1_t __b) { - return vgetq_lane_s16 (__builtin_aarch64_reduc_splus_v8hi (__a), 0); + return __a >= __b ? -1ll : 0ll; } -__extension__ static __inline int32_t __attribute__ ((__always_inline__)) -vaddvq_s32 (int32x4_t __a) +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vcged_u64 (uint64x1_t __a, uint64x1_t __b) { - return vgetq_lane_s32 (__builtin_aarch64_reduc_splus_v4si (__a), 0); + return __a >= __b ? -1ll : 0ll; } -__extension__ static __inline int32_t __attribute__ ((__always_inline__)) -vaddvq_s64 (int64x2_t __a) +__extension__ static __inline uint64_t __attribute__ ((__always_inline__)) +vcged_f64 (float64_t __a, float64_t __b) { - return vgetq_lane_s64 (__builtin_aarch64_reduc_splus_v2di (__a), 0); + return __a >= __b ? -1ll : 0ll; } -__extension__ static __inline uint8_t __attribute__ ((__always_inline__)) -vaddvq_u8 (uint8x16_t __a) +/* vcgez - vector. */ + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vcgez_f32 (float32x2_t __a) { - return vgetq_lane_u8 ((uint8x16_t) - __builtin_aarch64_reduc_uplus_v16qi ((int8x16_t) __a), 0); + float32x2_t __b = {0.0f, 0.0f}; + return (uint32x2_t) __builtin_aarch64_cmgev2sf (__a, __b); } -__extension__ static __inline uint16_t __attribute__ ((__always_inline__)) -vaddvq_u16 (uint16x8_t __a) +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vcgez_f64 (float64x1_t __a) { - return vgetq_lane_u16 ((uint16x8_t) - __builtin_aarch64_reduc_uplus_v8hi ((int16x8_t) __a), 0); + return __a >= 0.0 ? -1ll : 0ll; } -__extension__ static __inline uint32_t __attribute__ ((__always_inline__)) -vaddvq_u32 (uint32x4_t __a) +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vcgez_p8 (poly8x8_t __a) { - return vgetq_lane_u32 ((uint32x4_t) - __builtin_aarch64_reduc_uplus_v4si ((int32x4_t) __a), 0); + poly8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0}; + return (uint8x8_t) __builtin_aarch64_cmgev8qi ((int8x8_t) __a, + (int8x8_t) __b); } -__extension__ static __inline uint32_t __attribute__ ((__always_inline__)) -vaddvq_u64 (uint64x2_t __a) +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vcgez_s8 (int8x8_t __a) { - return vgetq_lane_u64 ((uint64x2_t) - __builtin_aarch64_reduc_uplus_v2di ((int64x2_t) __a), 0); + int8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0}; + return (uint8x8_t) __builtin_aarch64_cmgev8qi (__a, __b); } -__extension__ static __inline float32_t __attribute__ ((__always_inline__)) -vaddv_f32 (float32x2_t __a) +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vcgez_s16 (int16x4_t __a) { - float32x2_t t = __builtin_aarch64_reduc_splus_v2sf (__a); - return vget_lane_f32 (t, 0); + int16x4_t __b = {0, 0, 0, 0}; + return (uint16x4_t) __builtin_aarch64_cmgev4hi (__a, __b); } -__extension__ static __inline float32_t __attribute__ ((__always_inline__)) -vaddvq_f32 (float32x4_t __a) +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vcgez_s32 (int32x2_t __a) { - float32x4_t t = __builtin_aarch64_reduc_splus_v4sf (__a); - return vgetq_lane_f32 (t, 0); + int32x2_t __b = {0, 0}; + return (uint32x2_t) __builtin_aarch64_cmgev2si (__a, __b); } -__extension__ static __inline float64_t __attribute__ ((__always_inline__)) -vaddvq_f64 (float64x2_t __a) +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vcgez_s64 (int64x1_t __a) { - float64x2_t t = __builtin_aarch64_reduc_splus_v2df (__a); - return vgetq_lane_f64 (t, 0); + return __a >= 0ll ? -1ll : 0ll; } -/* vcage */ +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vcgez_u8 (uint8x8_t __a) +{ + uint8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0}; + return (uint8x8_t) __builtin_aarch64_cmgeuv8qi ((int8x8_t) __a, + (int8x8_t) __b); +} -__extension__ static __inline uint32_t __attribute__ ((__always_inline__)) -vcages_f32 (float32_t __a, float32_t __b) +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vcgez_u16 (uint16x4_t __a) { - return __builtin_fabsf (__a) >= __builtin_fabsf (__b) ? -1 : 0; + uint16x4_t __b = {0, 0, 0, 0}; + return (uint16x4_t) __builtin_aarch64_cmgeuv4hi ((int16x4_t) __a, + (int16x4_t) __b); } __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) -vcage_f32 (float32x2_t __a, float32x2_t __b) +vcgez_u32 (uint32x2_t __a) { - return vabs_f32 (__a) >= vabs_f32 (__b); + uint32x2_t __b = {0, 0}; + return (uint32x2_t) __builtin_aarch64_cmgeuv2si ((int32x2_t) __a, + (int32x2_t) __b); } -__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -vcageq_f32 (float32x4_t __a, float32x4_t __b) +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vcgez_u64 (uint64x1_t __a) { - return vabsq_f32 (__a) >= vabsq_f32 (__b); + return __a >= 0ll ? -1ll : 0ll; } -__extension__ static __inline uint64_t __attribute__ ((__always_inline__)) -vcaged_f64 (float64_t __a, float64_t __b) +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vcgezq_f32 (float32x4_t __a) { - return __builtin_fabs (__a) >= __builtin_fabs (__b) ? -1 : 0; + float32x4_t __b = {0.0f, 0.0f, 0.0f, 0.0f}; + return (uint32x4_t) __builtin_aarch64_cmgev4sf (__a, __b); } __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) -vcageq_f64 (float64x2_t __a, float64x2_t __b) +vcgezq_f64 (float64x2_t __a) { - return vabsq_f64 (__a) >= vabsq_f64 (__b); + float64x2_t __b = {0.0, 0.0}; + return (uint64x2_t) __builtin_aarch64_cmgev2df (__a, __b); } -/* vcagt */ - -__extension__ static __inline uint32_t __attribute__ ((__always_inline__)) -vcagts_f32 (float32_t __a, float32_t __b) +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vcgezq_p8 (poly8x16_t __a) { - return __builtin_fabsf (__a) > __builtin_fabsf (__b) ? -1 : 0; + poly8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0}; + return (uint8x16_t) __builtin_aarch64_cmgev16qi ((int8x16_t) __a, + (int8x16_t) __b); } -__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) -vcagt_f32 (float32x2_t __a, float32x2_t __b) +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vcgezq_s8 (int8x16_t __a) { - return vabs_f32 (__a) > vabs_f32 (__b); + int8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0}; + return (uint8x16_t) __builtin_aarch64_cmgev16qi (__a, __b); } -__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -vcagtq_f32 (float32x4_t __a, float32x4_t __b) +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vcgezq_s16 (int16x8_t __a) { - return vabsq_f32 (__a) > vabsq_f32 (__b); + int16x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0}; + return (uint16x8_t) __builtin_aarch64_cmgev8hi (__a, __b); } -__extension__ static __inline uint64_t __attribute__ ((__always_inline__)) -vcagtd_f64 (float64_t __a, float64_t __b) +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vcgezq_s32 (int32x4_t __a) { - return __builtin_fabs (__a) > __builtin_fabs (__b) ? -1 : 0; + int32x4_t __b = {0, 0, 0, 0}; + return (uint32x4_t) __builtin_aarch64_cmgev4si (__a, __b); } __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) -vcagtq_f64 (float64x2_t __a, float64x2_t __b) +vcgezq_s64 (int64x2_t __a) { - return vabsq_f64 (__a) > vabsq_f64 (__b); + int64x2_t __b = {0, 0}; + return (uint64x2_t) __builtin_aarch64_cmgev2di (__a, __b); } -/* vcale */ +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vcgezq_u8 (uint8x16_t __a) +{ + uint8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0}; + return (uint8x16_t) __builtin_aarch64_cmgeuv16qi ((int8x16_t) __a, + (int8x16_t) __b); +} -__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) -vcale_f32 (float32x2_t __a, float32x2_t __b) +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vcgezq_u16 (uint16x8_t __a) { - return vabs_f32 (__a) <= vabs_f32 (__b); + uint16x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0}; + return (uint16x8_t) __builtin_aarch64_cmgeuv8hi ((int16x8_t) __a, + (int16x8_t) __b); } __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -vcaleq_f32 (float32x4_t __a, float32x4_t __b) +vcgezq_u32 (uint32x4_t __a) { - return vabsq_f32 (__a) <= vabsq_f32 (__b); + uint32x4_t __b = {0, 0, 0, 0}; + return (uint32x4_t) __builtin_aarch64_cmgeuv4si ((int32x4_t) __a, + (int32x4_t) __b); } __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) -vcaleq_f64 (float64x2_t __a, float64x2_t __b) +vcgezq_u64 (uint64x2_t __a) { - return vabsq_f64 (__a) <= vabsq_f64 (__b); + uint64x2_t __b = {0, 0}; + return (uint64x2_t) __builtin_aarch64_cmgeuv2di ((int64x2_t) __a, + (int64x2_t) __b); } -/* vcalt */ +/* vcgez - scalar. */ -__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) -vcalt_f32 (float32x2_t __a, float32x2_t __b) +__extension__ static __inline uint32_t __attribute__ ((__always_inline__)) +vcgezs_f32 (float32_t __a) { - return vabs_f32 (__a) < vabs_f32 (__b); + return __a >= 0.0f ? -1 : 0; } -__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -vcaltq_f32 (float32x4_t __a, float32x4_t __b) +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vcgezd_s64 (int64x1_t __a) { - return vabsq_f32 (__a) < vabsq_f32 (__b); + return __a >= 0 ? -1ll : 0ll; } -__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) -vcaltq_f64 (float64x2_t __a, float64x2_t __b) +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vcgezd_u64 (int64x1_t __a) { - return vabsq_f64 (__a) < vabsq_f64 (__b); + return __a >= 0 ? -1ll : 0ll; } -/* vceq - vector. */ +__extension__ static __inline uint64_t __attribute__ ((__always_inline__)) +vcgezd_f64 (float64_t __a) +{ + return __a >= 0.0 ? -1ll : 0ll; +} + +/* vcgt - vector. */ __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) -vceq_f32 (float32x2_t __a, float32x2_t __b) +vcgt_f32 (float32x2_t __a, float32x2_t __b) { - return (uint32x2_t) __builtin_aarch64_cmeqv2sf (__a, __b); + return (uint32x2_t) __builtin_aarch64_cmgtv2sf (__a, __b); } __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) -vceq_f64 (float64x1_t __a, float64x1_t __b) +vcgt_f64 (float64x1_t __a, float64x1_t __b) { - return __a == __b ? -1ll : 0ll; + return __a > __b ? -1ll : 0ll; } __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) -vceq_p8 (poly8x8_t __a, poly8x8_t __b) +vcgt_p8 (poly8x8_t __a, poly8x8_t __b) { - return (uint8x8_t) __builtin_aarch64_cmeqv8qi ((int8x8_t) __a, + return (uint8x8_t) __builtin_aarch64_cmgtv8qi ((int8x8_t) __a, (int8x8_t) __b); } __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) -vceq_s8 (int8x8_t __a, int8x8_t __b) +vcgt_s8 (int8x8_t __a, int8x8_t __b) { - return (uint8x8_t) __builtin_aarch64_cmeqv8qi (__a, __b); + return (uint8x8_t) __builtin_aarch64_cmgtv8qi (__a, __b); } __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) -vceq_s16 (int16x4_t __a, int16x4_t __b) +vcgt_s16 (int16x4_t __a, int16x4_t __b) { - return (uint16x4_t) __builtin_aarch64_cmeqv4hi (__a, __b); + return (uint16x4_t) __builtin_aarch64_cmgtv4hi (__a, __b); } __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) -vceq_s32 (int32x2_t __a, int32x2_t __b) +vcgt_s32 (int32x2_t __a, int32x2_t __b) { - return (uint32x2_t) __builtin_aarch64_cmeqv2si (__a, __b); + return (uint32x2_t) __builtin_aarch64_cmgtv2si (__a, __b); } __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) -vceq_s64 (int64x1_t __a, int64x1_t __b) +vcgt_s64 (int64x1_t __a, int64x1_t __b) { - return __a == __b ? -1ll : 0ll; + return __a > __b ? -1ll : 0ll; } __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) -vceq_u8 (uint8x8_t __a, uint8x8_t __b) +vcgt_u8 (uint8x8_t __a, uint8x8_t __b) { - return (uint8x8_t) __builtin_aarch64_cmeqv8qi ((int8x8_t) __a, + return (uint8x8_t) __builtin_aarch64_cmgtuv8qi ((int8x8_t) __a, (int8x8_t) __b); } __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) -vceq_u16 (uint16x4_t __a, uint16x4_t __b) +vcgt_u16 (uint16x4_t __a, uint16x4_t __b) { - return (uint16x4_t) __builtin_aarch64_cmeqv4hi ((int16x4_t) __a, + return (uint16x4_t) __builtin_aarch64_cmgtuv4hi ((int16x4_t) __a, (int16x4_t) __b); } __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) -vceq_u32 (uint32x2_t __a, uint32x2_t __b) +vcgt_u32 (uint32x2_t __a, uint32x2_t __b) { - return (uint32x2_t) __builtin_aarch64_cmeqv2si ((int32x2_t) __a, + return (uint32x2_t) __builtin_aarch64_cmgtuv2si ((int32x2_t) __a, (int32x2_t) __b); } __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) -vceq_u64 (uint64x1_t __a, uint64x1_t __b) +vcgt_u64 (uint64x1_t __a, uint64x1_t __b) { - return __a == __b ? -1ll : 0ll; + return __a > __b ? -1ll : 0ll; } __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -vceqq_f32 (float32x4_t __a, float32x4_t __b) +vcgtq_f32 (float32x4_t __a, float32x4_t __b) { - return (uint32x4_t) __builtin_aarch64_cmeqv4sf (__a, __b); + return (uint32x4_t) __builtin_aarch64_cmgtv4sf (__a, __b); } __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) -vceqq_f64 (float64x2_t __a, float64x2_t __b) +vcgtq_f64 (float64x2_t __a, float64x2_t __b) { - return (uint64x2_t) __builtin_aarch64_cmeqv2df (__a, __b); + return (uint64x2_t) __builtin_aarch64_cmgtv2df (__a, __b); } __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) -vceqq_p8 (poly8x16_t __a, poly8x16_t __b) +vcgtq_p8 (poly8x16_t __a, poly8x16_t __b) { - return (uint8x16_t) __builtin_aarch64_cmeqv16qi ((int8x16_t) __a, + return (uint8x16_t) __builtin_aarch64_cmgtv16qi ((int8x16_t) __a, (int8x16_t) __b); } __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) -vceqq_s8 (int8x16_t __a, int8x16_t __b) +vcgtq_s8 (int8x16_t __a, int8x16_t __b) { - return (uint8x16_t) __builtin_aarch64_cmeqv16qi (__a, __b); + return (uint8x16_t) __builtin_aarch64_cmgtv16qi (__a, __b); } __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) -vceqq_s16 (int16x8_t __a, int16x8_t __b) +vcgtq_s16 (int16x8_t __a, int16x8_t __b) { - return (uint16x8_t) __builtin_aarch64_cmeqv8hi (__a, __b); + return (uint16x8_t) __builtin_aarch64_cmgtv8hi (__a, __b); } __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -vceqq_s32 (int32x4_t __a, int32x4_t __b) +vcgtq_s32 (int32x4_t __a, int32x4_t __b) { - return (uint32x4_t) __builtin_aarch64_cmeqv4si (__a, __b); + return (uint32x4_t) __builtin_aarch64_cmgtv4si (__a, __b); } __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) -vceqq_s64 (int64x2_t __a, int64x2_t __b) +vcgtq_s64 (int64x2_t __a, int64x2_t __b) { - return (uint64x2_t) __builtin_aarch64_cmeqv2di (__a, __b); + return (uint64x2_t) __builtin_aarch64_cmgtv2di (__a, __b); } __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) -vceqq_u8 (uint8x16_t __a, uint8x16_t __b) +vcgtq_u8 (uint8x16_t __a, uint8x16_t __b) { - return (uint8x16_t) __builtin_aarch64_cmeqv16qi ((int8x16_t) __a, + return (uint8x16_t) __builtin_aarch64_cmgtuv16qi ((int8x16_t) __a, (int8x16_t) __b); } __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) -vceqq_u16 (uint16x8_t __a, uint16x8_t __b) +vcgtq_u16 (uint16x8_t __a, uint16x8_t __b) { - return (uint16x8_t) __builtin_aarch64_cmeqv8hi ((int16x8_t) __a, + return (uint16x8_t) __builtin_aarch64_cmgtuv8hi ((int16x8_t) __a, (int16x8_t) __b); } __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -vceqq_u32 (uint32x4_t __a, uint32x4_t __b) +vcgtq_u32 (uint32x4_t __a, uint32x4_t __b) { - return (uint32x4_t) __builtin_aarch64_cmeqv4si ((int32x4_t) __a, + return (uint32x4_t) __builtin_aarch64_cmgtuv4si ((int32x4_t) __a, (int32x4_t) __b); } __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) -vceqq_u64 (uint64x2_t __a, uint64x2_t __b) +vcgtq_u64 (uint64x2_t __a, uint64x2_t __b) { - return (uint64x2_t) __builtin_aarch64_cmeqv2di ((int64x2_t) __a, + return (uint64x2_t) __builtin_aarch64_cmgtuv2di ((int64x2_t) __a, (int64x2_t) __b); } -/* vceq - scalar. */ +/* vcgt - scalar. */ __extension__ static __inline uint32_t __attribute__ ((__always_inline__)) -vceqs_f32 (float32_t __a, float32_t __b) +vcgts_f32 (float32_t __a, float32_t __b) { - return __a == __b ? -1 : 0; + return __a > __b ? -1 : 0; } __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) -vceqd_s64 (int64x1_t __a, int64x1_t __b) +vcgtd_s64 (int64x1_t __a, int64x1_t __b) { - return __a == __b ? -1ll : 0ll; + return __a > __b ? -1ll : 0ll; } __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) -vceqd_u64 (uint64x1_t __a, uint64x1_t __b) +vcgtd_u64 (uint64x1_t __a, uint64x1_t __b) { - return __a == __b ? -1ll : 0ll; + return __a > __b ? -1ll : 0ll; } __extension__ static __inline uint64_t __attribute__ ((__always_inline__)) -vceqd_f64 (float64_t __a, float64_t __b) +vcgtd_f64 (float64_t __a, float64_t __b) { - return __a == __b ? -1ll : 0ll; + return __a > __b ? -1ll : 0ll; } -/* vceqz - vector. */ +/* vcgtz - vector. */ __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) -vceqz_f32 (float32x2_t __a) +vcgtz_f32 (float32x2_t __a) { float32x2_t __b = {0.0f, 0.0f}; - return (uint32x2_t) __builtin_aarch64_cmeqv2sf (__a, __b); + return (uint32x2_t) __builtin_aarch64_cmgtv2sf (__a, __b); } __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) -vceqz_f64 (float64x1_t __a) +vcgtz_f64 (float64x1_t __a) { - return __a == 0.0 ? -1ll : 0ll; + return __a > 0.0 ? -1ll : 0ll; } __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) -vceqz_p8 (poly8x8_t __a) +vcgtz_p8 (poly8x8_t __a) { poly8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0}; - return (uint8x8_t) __builtin_aarch64_cmeqv8qi ((int8x8_t) __a, + return (uint8x8_t) __builtin_aarch64_cmgtv8qi ((int8x8_t) __a, (int8x8_t) __b); } __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) -vceqz_s8 (int8x8_t __a) +vcgtz_s8 (int8x8_t __a) { int8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0}; - return (uint8x8_t) __builtin_aarch64_cmeqv8qi (__a, __b); + return (uint8x8_t) __builtin_aarch64_cmgtv8qi (__a, __b); } __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) -vceqz_s16 (int16x4_t __a) +vcgtz_s16 (int16x4_t __a) { int16x4_t __b = {0, 0, 0, 0}; - return (uint16x4_t) __builtin_aarch64_cmeqv4hi (__a, __b); + return (uint16x4_t) __builtin_aarch64_cmgtv4hi (__a, __b); } __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) -vceqz_s32 (int32x2_t __a) +vcgtz_s32 (int32x2_t __a) { int32x2_t __b = {0, 0}; - return (uint32x2_t) __builtin_aarch64_cmeqv2si (__a, __b); + return (uint32x2_t) __builtin_aarch64_cmgtv2si (__a, __b); } __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) -vceqz_s64 (int64x1_t __a) +vcgtz_s64 (int64x1_t __a) { - return __a == 0ll ? -1ll : 0ll; + return __a > 0ll ? -1ll : 0ll; } __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) -vceqz_u8 (uint8x8_t __a) +vcgtz_u8 (uint8x8_t __a) { uint8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0}; - return (uint8x8_t) __builtin_aarch64_cmeqv8qi ((int8x8_t) __a, + return (uint8x8_t) __builtin_aarch64_cmgtuv8qi ((int8x8_t) __a, (int8x8_t) __b); } __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) -vceqz_u16 (uint16x4_t __a) +vcgtz_u16 (uint16x4_t __a) { uint16x4_t __b = {0, 0, 0, 0}; - return (uint16x4_t) __builtin_aarch64_cmeqv4hi ((int16x4_t) __a, + return (uint16x4_t) __builtin_aarch64_cmgtuv4hi ((int16x4_t) __a, (int16x4_t) __b); } __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) -vceqz_u32 (uint32x2_t __a) +vcgtz_u32 (uint32x2_t __a) { uint32x2_t __b = {0, 0}; - return (uint32x2_t) __builtin_aarch64_cmeqv2si ((int32x2_t) __a, + return (uint32x2_t) __builtin_aarch64_cmgtuv2si ((int32x2_t) __a, (int32x2_t) __b); } __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) -vceqz_u64 (uint64x1_t __a) +vcgtz_u64 (uint64x1_t __a) { - return __a == 0ll ? -1ll : 0ll; + return __a > 0ll ? -1ll : 0ll; } __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -vceqzq_f32 (float32x4_t __a) +vcgtzq_f32 (float32x4_t __a) { float32x4_t __b = {0.0f, 0.0f, 0.0f, 0.0f}; - return (uint32x4_t) __builtin_aarch64_cmeqv4sf (__a, __b); + return (uint32x4_t) __builtin_aarch64_cmgtv4sf (__a, __b); } __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) -vceqzq_f64 (float64x2_t __a) +vcgtzq_f64 (float64x2_t __a) { float64x2_t __b = {0.0, 0.0}; - return (uint64x2_t) __builtin_aarch64_cmeqv2df (__a, __b); + return (uint64x2_t) __builtin_aarch64_cmgtv2df (__a, __b); } __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) -vceqzq_p8 (poly8x16_t __a) +vcgtzq_p8 (poly8x16_t __a) { poly8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; - return (uint8x16_t) __builtin_aarch64_cmeqv16qi ((int8x16_t) __a, + return (uint8x16_t) __builtin_aarch64_cmgtv16qi ((int8x16_t) __a, (int8x16_t) __b); } __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) -vceqzq_s8 (int8x16_t __a) +vcgtzq_s8 (int8x16_t __a) { int8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; - return (uint8x16_t) __builtin_aarch64_cmeqv16qi (__a, __b); + return (uint8x16_t) __builtin_aarch64_cmgtv16qi (__a, __b); } __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) -vceqzq_s16 (int16x8_t __a) +vcgtzq_s16 (int16x8_t __a) { int16x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0}; - return (uint16x8_t) __builtin_aarch64_cmeqv8hi (__a, __b); + return (uint16x8_t) __builtin_aarch64_cmgtv8hi (__a, __b); } __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -vceqzq_s32 (int32x4_t __a) +vcgtzq_s32 (int32x4_t __a) { int32x4_t __b = {0, 0, 0, 0}; - return (uint32x4_t) __builtin_aarch64_cmeqv4si (__a, __b); + return (uint32x4_t) __builtin_aarch64_cmgtv4si (__a, __b); } __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) -vceqzq_s64 (int64x2_t __a) +vcgtzq_s64 (int64x2_t __a) { int64x2_t __b = {0, 0}; - return (uint64x2_t) __builtin_aarch64_cmeqv2di (__a, __b); + return (uint64x2_t) __builtin_aarch64_cmgtv2di (__a, __b); } __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) -vceqzq_u8 (uint8x16_t __a) +vcgtzq_u8 (uint8x16_t __a) { uint8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; - return (uint8x16_t) __builtin_aarch64_cmeqv16qi ((int8x16_t) __a, + return (uint8x16_t) __builtin_aarch64_cmgtuv16qi ((int8x16_t) __a, (int8x16_t) __b); } __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) -vceqzq_u16 (uint16x8_t __a) +vcgtzq_u16 (uint16x8_t __a) { uint16x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0}; - return (uint16x8_t) __builtin_aarch64_cmeqv8hi ((int16x8_t) __a, + return (uint16x8_t) __builtin_aarch64_cmgtuv8hi ((int16x8_t) __a, (int16x8_t) __b); } __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -vceqzq_u32 (uint32x4_t __a) +vcgtzq_u32 (uint32x4_t __a) { uint32x4_t __b = {0, 0, 0, 0}; - return (uint32x4_t) __builtin_aarch64_cmeqv4si ((int32x4_t) __a, + return (uint32x4_t) __builtin_aarch64_cmgtuv4si ((int32x4_t) __a, (int32x4_t) __b); } __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) -vceqzq_u64 (uint64x2_t __a) +vcgtzq_u64 (uint64x2_t __a) { uint64x2_t __b = {0, 0}; - return (uint64x2_t) __builtin_aarch64_cmeqv2di ((int64x2_t) __a, + return (uint64x2_t) __builtin_aarch64_cmgtuv2di ((int64x2_t) __a, (int64x2_t) __b); } -/* vceqz - scalar. */ +/* vcgtz - scalar. */ __extension__ static __inline uint32_t __attribute__ ((__always_inline__)) -vceqzs_f32 (float32_t __a) +vcgtzs_f32 (float32_t __a) { - return __a == 0.0f ? -1 : 0; + return __a > 0.0f ? -1 : 0; } __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) -vceqzd_s64 (int64x1_t __a) +vcgtzd_s64 (int64x1_t __a) { - return __a == 0 ? -1ll : 0ll; + return __a > 0 ? -1ll : 0ll; } __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) -vceqzd_u64 (int64x1_t __a) +vcgtzd_u64 (int64x1_t __a) { - return __a == 0 ? -1ll : 0ll; + return __a > 0 ? -1ll : 0ll; } __extension__ static __inline uint64_t __attribute__ ((__always_inline__)) -vceqzd_f64 (float64_t __a) +vcgtzd_f64 (float64_t __a) { - return __a == 0.0 ? -1ll : 0ll; + return __a > 0.0 ? -1ll : 0ll; } -/* vcge - vector. */ +/* vcle - vector. */ __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) -vcge_f32 (float32x2_t __a, float32x2_t __b) +vcle_f32 (float32x2_t __a, float32x2_t __b) { - return (uint32x2_t) __builtin_aarch64_cmgev2sf (__a, __b); + return (uint32x2_t) __builtin_aarch64_cmgev2sf (__b, __a); } __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) -vcge_f64 (float64x1_t __a, float64x1_t __b) +vcle_f64 (float64x1_t __a, float64x1_t __b) { - return __a >= __b ? -1ll : 0ll; + return __a <= __b ? -1ll : 0ll; } __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) -vcge_p8 (poly8x8_t __a, poly8x8_t __b) +vcle_p8 (poly8x8_t __a, poly8x8_t __b) { - return (uint8x8_t) __builtin_aarch64_cmgev8qi ((int8x8_t) __a, - (int8x8_t) __b); + return (uint8x8_t) __builtin_aarch64_cmgev8qi ((int8x8_t) __b, + (int8x8_t) __a); } __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) -vcge_s8 (int8x8_t __a, int8x8_t __b) +vcle_s8 (int8x8_t __a, int8x8_t __b) { - return (uint8x8_t) __builtin_aarch64_cmgev8qi (__a, __b); + return (uint8x8_t) __builtin_aarch64_cmgev8qi (__b, __a); } __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) -vcge_s16 (int16x4_t __a, int16x4_t __b) +vcle_s16 (int16x4_t __a, int16x4_t __b) { - return (uint16x4_t) __builtin_aarch64_cmgev4hi (__a, __b); + return (uint16x4_t) __builtin_aarch64_cmgev4hi (__b, __a); } __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) -vcge_s32 (int32x2_t __a, int32x2_t __b) +vcle_s32 (int32x2_t __a, int32x2_t __b) { - return (uint32x2_t) __builtin_aarch64_cmgev2si (__a, __b); + return (uint32x2_t) __builtin_aarch64_cmgev2si (__b, __a); } __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) -vcge_s64 (int64x1_t __a, int64x1_t __b) +vcle_s64 (int64x1_t __a, int64x1_t __b) { - return __a >= __b ? -1ll : 0ll; + return __a <= __b ? -1ll : 0ll; } __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) -vcge_u8 (uint8x8_t __a, uint8x8_t __b) +vcle_u8 (uint8x8_t __a, uint8x8_t __b) { - return (uint8x8_t) __builtin_aarch64_cmgeuv8qi ((int8x8_t) __a, - (int8x8_t) __b); + return (uint8x8_t) __builtin_aarch64_cmgeuv8qi ((int8x8_t) __b, + (int8x8_t) __a); } __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) -vcge_u16 (uint16x4_t __a, uint16x4_t __b) +vcle_u16 (uint16x4_t __a, uint16x4_t __b) { - return (uint16x4_t) __builtin_aarch64_cmgeuv4hi ((int16x4_t) __a, - (int16x4_t) __b); + return (uint16x4_t) __builtin_aarch64_cmgeuv4hi ((int16x4_t) __b, + (int16x4_t) __a); } __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) -vcge_u32 (uint32x2_t __a, uint32x2_t __b) +vcle_u32 (uint32x2_t __a, uint32x2_t __b) { - return (uint32x2_t) __builtin_aarch64_cmgeuv2si ((int32x2_t) __a, - (int32x2_t) __b); + return (uint32x2_t) __builtin_aarch64_cmgeuv2si ((int32x2_t) __b, + (int32x2_t) __a); } __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) -vcge_u64 (uint64x1_t __a, uint64x1_t __b) +vcle_u64 (uint64x1_t __a, uint64x1_t __b) { - return __a >= __b ? -1ll : 0ll; + return __a <= __b ? -1ll : 0ll; } __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -vcgeq_f32 (float32x4_t __a, float32x4_t __b) +vcleq_f32 (float32x4_t __a, float32x4_t __b) { - return (uint32x4_t) __builtin_aarch64_cmgev4sf (__a, __b); + return (uint32x4_t) __builtin_aarch64_cmgev4sf (__b, __a); } __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) -vcgeq_f64 (float64x2_t __a, float64x2_t __b) +vcleq_f64 (float64x2_t __a, float64x2_t __b) { - return (uint64x2_t) __builtin_aarch64_cmgev2df (__a, __b); + return (uint64x2_t) __builtin_aarch64_cmgev2df (__b, __a); } __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) -vcgeq_p8 (poly8x16_t __a, poly8x16_t __b) +vcleq_p8 (poly8x16_t __a, poly8x16_t __b) { - return (uint8x16_t) __builtin_aarch64_cmgev16qi ((int8x16_t) __a, - (int8x16_t) __b); + return (uint8x16_t) __builtin_aarch64_cmgev16qi ((int8x16_t) __b, + (int8x16_t) __a); } __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) -vcgeq_s8 (int8x16_t __a, int8x16_t __b) +vcleq_s8 (int8x16_t __a, int8x16_t __b) { - return (uint8x16_t) __builtin_aarch64_cmgev16qi (__a, __b); + return (uint8x16_t) __builtin_aarch64_cmgev16qi (__b, __a); } __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) -vcgeq_s16 (int16x8_t __a, int16x8_t __b) +vcleq_s16 (int16x8_t __a, int16x8_t __b) { - return (uint16x8_t) __builtin_aarch64_cmgev8hi (__a, __b); + return (uint16x8_t) __builtin_aarch64_cmgev8hi (__b, __a); } __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -vcgeq_s32 (int32x4_t __a, int32x4_t __b) +vcleq_s32 (int32x4_t __a, int32x4_t __b) { - return (uint32x4_t) __builtin_aarch64_cmgev4si (__a, __b); + return (uint32x4_t) __builtin_aarch64_cmgev4si (__b, __a); } __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) -vcgeq_s64 (int64x2_t __a, int64x2_t __b) +vcleq_s64 (int64x2_t __a, int64x2_t __b) { - return (uint64x2_t) __builtin_aarch64_cmgev2di (__a, __b); + return (uint64x2_t) __builtin_aarch64_cmgev2di (__b, __a); } __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) -vcgeq_u8 (uint8x16_t __a, uint8x16_t __b) +vcleq_u8 (uint8x16_t __a, uint8x16_t __b) { - return (uint8x16_t) __builtin_aarch64_cmgeuv16qi ((int8x16_t) __a, - (int8x16_t) __b); + return (uint8x16_t) __builtin_aarch64_cmgeuv16qi ((int8x16_t) __b, + (int8x16_t) __a); } __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) -vcgeq_u16 (uint16x8_t __a, uint16x8_t __b) +vcleq_u16 (uint16x8_t __a, uint16x8_t __b) { - return (uint16x8_t) __builtin_aarch64_cmgeuv8hi ((int16x8_t) __a, - (int16x8_t) __b); + return (uint16x8_t) __builtin_aarch64_cmgeuv8hi ((int16x8_t) __b, + (int16x8_t) __a); } __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -vcgeq_u32 (uint32x4_t __a, uint32x4_t __b) +vcleq_u32 (uint32x4_t __a, uint32x4_t __b) { - return (uint32x4_t) __builtin_aarch64_cmgeuv4si ((int32x4_t) __a, - (int32x4_t) __b); + return (uint32x4_t) __builtin_aarch64_cmgeuv4si ((int32x4_t) __b, + (int32x4_t) __a); } __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) -vcgeq_u64 (uint64x2_t __a, uint64x2_t __b) +vcleq_u64 (uint64x2_t __a, uint64x2_t __b) { - return (uint64x2_t) __builtin_aarch64_cmgeuv2di ((int64x2_t) __a, - (int64x2_t) __b); + return (uint64x2_t) __builtin_aarch64_cmgeuv2di ((int64x2_t) __b, + (int64x2_t) __a); } -/* vcge - scalar. */ +/* vcle - scalar. */ __extension__ static __inline uint32_t __attribute__ ((__always_inline__)) -vcges_f32 (float32_t __a, float32_t __b) +vcles_f32 (float32_t __a, float32_t __b) { - return __a >= __b ? -1 : 0; + return __a <= __b ? -1 : 0; } __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) -vcged_s64 (int64x1_t __a, int64x1_t __b) +vcled_s64 (int64x1_t __a, int64x1_t __b) { - return __a >= __b ? -1ll : 0ll; + return __a <= __b ? -1ll : 0ll; } __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) -vcged_u64 (uint64x1_t __a, uint64x1_t __b) +vcled_u64 (uint64x1_t __a, uint64x1_t __b) { - return __a >= __b ? -1ll : 0ll; + return __a <= __b ? -1ll : 0ll; } __extension__ static __inline uint64_t __attribute__ ((__always_inline__)) -vcged_f64 (float64_t __a, float64_t __b) +vcled_f64 (float64_t __a, float64_t __b) { - return __a >= __b ? -1ll : 0ll; + return __a <= __b ? -1ll : 0ll; } -/* vcgez - vector. */ +/* vclez - vector. */ __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) -vcgez_f32 (float32x2_t __a) +vclez_f32 (float32x2_t __a) { float32x2_t __b = {0.0f, 0.0f}; - return (uint32x2_t) __builtin_aarch64_cmgev2sf (__a, __b); + return (uint32x2_t) __builtin_aarch64_cmlev2sf (__a, __b); } __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) -vcgez_f64 (float64x1_t __a) +vclez_f64 (float64x1_t __a) { - return __a >= 0.0 ? -1ll : 0ll; + return __a <= 0.0 ? -1ll : 0ll; } __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) -vcgez_p8 (poly8x8_t __a) +vclez_p8 (poly8x8_t __a) { poly8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0}; - return (uint8x8_t) __builtin_aarch64_cmgev8qi ((int8x8_t) __a, + return (uint8x8_t) __builtin_aarch64_cmlev8qi ((int8x8_t) __a, (int8x8_t) __b); } __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) -vcgez_s8 (int8x8_t __a) +vclez_s8 (int8x8_t __a) { int8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0}; - return (uint8x8_t) __builtin_aarch64_cmgev8qi (__a, __b); + return (uint8x8_t) __builtin_aarch64_cmlev8qi (__a, __b); } __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) -vcgez_s16 (int16x4_t __a) +vclez_s16 (int16x4_t __a) { int16x4_t __b = {0, 0, 0, 0}; - return (uint16x4_t) __builtin_aarch64_cmgev4hi (__a, __b); + return (uint16x4_t) __builtin_aarch64_cmlev4hi (__a, __b); } __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) -vcgez_s32 (int32x2_t __a) +vclez_s32 (int32x2_t __a) { int32x2_t __b = {0, 0}; - return (uint32x2_t) __builtin_aarch64_cmgev2si (__a, __b); -} - -__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) -vcgez_s64 (int64x1_t __a) -{ - return __a >= 0ll ? -1ll : 0ll; -} - -__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) -vcgez_u8 (uint8x8_t __a) -{ - uint8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0}; - return (uint8x8_t) __builtin_aarch64_cmgeuv8qi ((int8x8_t) __a, - (int8x8_t) __b); -} - -__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) -vcgez_u16 (uint16x4_t __a) -{ - uint16x4_t __b = {0, 0, 0, 0}; - return (uint16x4_t) __builtin_aarch64_cmgeuv4hi ((int16x4_t) __a, - (int16x4_t) __b); + return (uint32x2_t) __builtin_aarch64_cmlev2si (__a, __b); } -__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) -vcgez_u32 (uint32x2_t __a) +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vclez_s64 (int64x1_t __a) { - uint32x2_t __b = {0, 0}; - return (uint32x2_t) __builtin_aarch64_cmgeuv2si ((int32x2_t) __a, - (int32x2_t) __b); + return __a <= 0ll ? -1ll : 0ll; } __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) -vcgez_u64 (uint64x1_t __a) +vclez_u64 (uint64x1_t __a) { - return __a >= 0ll ? -1ll : 0ll; + return __a <= 0ll ? -1ll : 0ll; } __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -vcgezq_f32 (float32x4_t __a) +vclezq_f32 (float32x4_t __a) { float32x4_t __b = {0.0f, 0.0f, 0.0f, 0.0f}; - return (uint32x4_t) __builtin_aarch64_cmgev4sf (__a, __b); + return (uint32x4_t) __builtin_aarch64_cmlev4sf (__a, __b); } __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) -vcgezq_f64 (float64x2_t __a) +vclezq_f64 (float64x2_t __a) { float64x2_t __b = {0.0, 0.0}; - return (uint64x2_t) __builtin_aarch64_cmgev2df (__a, __b); + return (uint64x2_t) __builtin_aarch64_cmlev2df (__a, __b); } __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) -vcgezq_p8 (poly8x16_t __a) +vclezq_p8 (poly8x16_t __a) { poly8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; - return (uint8x16_t) __builtin_aarch64_cmgev16qi ((int8x16_t) __a, + return (uint8x16_t) __builtin_aarch64_cmlev16qi ((int8x16_t) __a, (int8x16_t) __b); } __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) -vcgezq_s8 (int8x16_t __a) +vclezq_s8 (int8x16_t __a) { int8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; - return (uint8x16_t) __builtin_aarch64_cmgev16qi (__a, __b); + return (uint8x16_t) __builtin_aarch64_cmlev16qi (__a, __b); } __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) -vcgezq_s16 (int16x8_t __a) +vclezq_s16 (int16x8_t __a) { int16x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0}; - return (uint16x8_t) __builtin_aarch64_cmgev8hi (__a, __b); + return (uint16x8_t) __builtin_aarch64_cmlev8hi (__a, __b); } __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -vcgezq_s32 (int32x4_t __a) +vclezq_s32 (int32x4_t __a) { int32x4_t __b = {0, 0, 0, 0}; - return (uint32x4_t) __builtin_aarch64_cmgev4si (__a, __b); + return (uint32x4_t) __builtin_aarch64_cmlev4si (__a, __b); } __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) -vcgezq_s64 (int64x2_t __a) +vclezq_s64 (int64x2_t __a) { int64x2_t __b = {0, 0}; - return (uint64x2_t) __builtin_aarch64_cmgev2di (__a, __b); -} - -__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) -vcgezq_u8 (uint8x16_t __a) -{ - uint8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0}; - return (uint8x16_t) __builtin_aarch64_cmgeuv16qi ((int8x16_t) __a, - (int8x16_t) __b); -} - -__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) -vcgezq_u16 (uint16x8_t __a) -{ - uint16x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0}; - return (uint16x8_t) __builtin_aarch64_cmgeuv8hi ((int16x8_t) __a, - (int16x8_t) __b); -} - -__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -vcgezq_u32 (uint32x4_t __a) -{ - uint32x4_t __b = {0, 0, 0, 0}; - return (uint32x4_t) __builtin_aarch64_cmgeuv4si ((int32x4_t) __a, - (int32x4_t) __b); -} - -__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) -vcgezq_u64 (uint64x2_t __a) -{ - uint64x2_t __b = {0, 0}; - return (uint64x2_t) __builtin_aarch64_cmgeuv2di ((int64x2_t) __a, - (int64x2_t) __b); + return (uint64x2_t) __builtin_aarch64_cmlev2di (__a, __b); } -/* vcgez - scalar. */ +/* vclez - scalar. */ __extension__ static __inline uint32_t __attribute__ ((__always_inline__)) -vcgezs_f32 (float32_t __a) +vclezs_f32 (float32_t __a) { - return __a >= 0.0f ? -1 : 0; + return __a <= 0.0f ? -1 : 0; } __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) -vcgezd_s64 (int64x1_t __a) +vclezd_s64 (int64x1_t __a) { - return __a >= 0 ? -1ll : 0ll; + return __a <= 0 ? -1ll : 0ll; } __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) -vcgezd_u64 (int64x1_t __a) +vclezd_u64 (int64x1_t __a) { - return __a >= 0 ? -1ll : 0ll; + return __a <= 0 ? -1ll : 0ll; } __extension__ static __inline uint64_t __attribute__ ((__always_inline__)) -vcgezd_f64 (float64_t __a) +vclezd_f64 (float64_t __a) { - return __a >= 0.0 ? -1ll : 0ll; + return __a <= 0.0 ? -1ll : 0ll; } -/* vcgt - vector. */ +/* vclt - vector. */ __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) -vcgt_f32 (float32x2_t __a, float32x2_t __b) +vclt_f32 (float32x2_t __a, float32x2_t __b) { - return (uint32x2_t) __builtin_aarch64_cmgtv2sf (__a, __b); + return (uint32x2_t) __builtin_aarch64_cmgtv2sf (__b, __a); } __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) -vcgt_f64 (float64x1_t __a, float64x1_t __b) +vclt_f64 (float64x1_t __a, float64x1_t __b) { - return __a > __b ? -1ll : 0ll; + return __a < __b ? -1ll : 0ll; } __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) -vcgt_p8 (poly8x8_t __a, poly8x8_t __b) +vclt_p8 (poly8x8_t __a, poly8x8_t __b) { - return (uint8x8_t) __builtin_aarch64_cmgtv8qi ((int8x8_t) __a, - (int8x8_t) __b); + return (uint8x8_t) __builtin_aarch64_cmgtv8qi ((int8x8_t) __b, + (int8x8_t) __a); } __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) -vcgt_s8 (int8x8_t __a, int8x8_t __b) +vclt_s8 (int8x8_t __a, int8x8_t __b) { - return (uint8x8_t) __builtin_aarch64_cmgtv8qi (__a, __b); + return (uint8x8_t) __builtin_aarch64_cmgtv8qi (__b, __a); } __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) -vcgt_s16 (int16x4_t __a, int16x4_t __b) +vclt_s16 (int16x4_t __a, int16x4_t __b) { - return (uint16x4_t) __builtin_aarch64_cmgtv4hi (__a, __b); + return (uint16x4_t) __builtin_aarch64_cmgtv4hi (__b, __a); } __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) -vcgt_s32 (int32x2_t __a, int32x2_t __b) +vclt_s32 (int32x2_t __a, int32x2_t __b) { - return (uint32x2_t) __builtin_aarch64_cmgtv2si (__a, __b); + return (uint32x2_t) __builtin_aarch64_cmgtv2si (__b, __a); } __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) -vcgt_s64 (int64x1_t __a, int64x1_t __b) +vclt_s64 (int64x1_t __a, int64x1_t __b) { - return __a > __b ? -1ll : 0ll; + return __a < __b ? -1ll : 0ll; } __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) -vcgt_u8 (uint8x8_t __a, uint8x8_t __b) +vclt_u8 (uint8x8_t __a, uint8x8_t __b) { - return (uint8x8_t) __builtin_aarch64_cmgtuv8qi ((int8x8_t) __a, - (int8x8_t) __b); + return (uint8x8_t) __builtin_aarch64_cmgtuv8qi ((int8x8_t) __b, + (int8x8_t) __a); } __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) -vcgt_u16 (uint16x4_t __a, uint16x4_t __b) +vclt_u16 (uint16x4_t __a, uint16x4_t __b) { - return (uint16x4_t) __builtin_aarch64_cmgtuv4hi ((int16x4_t) __a, - (int16x4_t) __b); + return (uint16x4_t) __builtin_aarch64_cmgtuv4hi ((int16x4_t) __b, + (int16x4_t) __a); } __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) -vcgt_u32 (uint32x2_t __a, uint32x2_t __b) +vclt_u32 (uint32x2_t __a, uint32x2_t __b) { - return (uint32x2_t) __builtin_aarch64_cmgtuv2si ((int32x2_t) __a, - (int32x2_t) __b); + return (uint32x2_t) __builtin_aarch64_cmgtuv2si ((int32x2_t) __b, + (int32x2_t) __a); } __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) -vcgt_u64 (uint64x1_t __a, uint64x1_t __b) +vclt_u64 (uint64x1_t __a, uint64x1_t __b) { - return __a > __b ? -1ll : 0ll; + return __a < __b ? -1ll : 0ll; } __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -vcgtq_f32 (float32x4_t __a, float32x4_t __b) +vcltq_f32 (float32x4_t __a, float32x4_t __b) { - return (uint32x4_t) __builtin_aarch64_cmgtv4sf (__a, __b); + return (uint32x4_t) __builtin_aarch64_cmgtv4sf (__b, __a); } __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) -vcgtq_f64 (float64x2_t __a, float64x2_t __b) +vcltq_f64 (float64x2_t __a, float64x2_t __b) { - return (uint64x2_t) __builtin_aarch64_cmgtv2df (__a, __b); + return (uint64x2_t) __builtin_aarch64_cmgtv2df (__b, __a); } __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) -vcgtq_p8 (poly8x16_t __a, poly8x16_t __b) +vcltq_p8 (poly8x16_t __a, poly8x16_t __b) { - return (uint8x16_t) __builtin_aarch64_cmgtv16qi ((int8x16_t) __a, - (int8x16_t) __b); + return (uint8x16_t) __builtin_aarch64_cmgtv16qi ((int8x16_t) __b, + (int8x16_t) __a); } __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) -vcgtq_s8 (int8x16_t __a, int8x16_t __b) +vcltq_s8 (int8x16_t __a, int8x16_t __b) { - return (uint8x16_t) __builtin_aarch64_cmgtv16qi (__a, __b); + return (uint8x16_t) __builtin_aarch64_cmgtv16qi (__b, __a); } __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) -vcgtq_s16 (int16x8_t __a, int16x8_t __b) +vcltq_s16 (int16x8_t __a, int16x8_t __b) { - return (uint16x8_t) __builtin_aarch64_cmgtv8hi (__a, __b); + return (uint16x8_t) __builtin_aarch64_cmgtv8hi (__b, __a); } __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -vcgtq_s32 (int32x4_t __a, int32x4_t __b) +vcltq_s32 (int32x4_t __a, int32x4_t __b) { - return (uint32x4_t) __builtin_aarch64_cmgtv4si (__a, __b); + return (uint32x4_t) __builtin_aarch64_cmgtv4si (__b, __a); } __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) -vcgtq_s64 (int64x2_t __a, int64x2_t __b) +vcltq_s64 (int64x2_t __a, int64x2_t __b) { - return (uint64x2_t) __builtin_aarch64_cmgtv2di (__a, __b); + return (uint64x2_t) __builtin_aarch64_cmgtv2di (__b, __a); } __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) -vcgtq_u8 (uint8x16_t __a, uint8x16_t __b) +vcltq_u8 (uint8x16_t __a, uint8x16_t __b) { - return (uint8x16_t) __builtin_aarch64_cmgtuv16qi ((int8x16_t) __a, - (int8x16_t) __b); + return (uint8x16_t) __builtin_aarch64_cmgtuv16qi ((int8x16_t) __b, + (int8x16_t) __a); } __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) -vcgtq_u16 (uint16x8_t __a, uint16x8_t __b) +vcltq_u16 (uint16x8_t __a, uint16x8_t __b) { - return (uint16x8_t) __builtin_aarch64_cmgtuv8hi ((int16x8_t) __a, - (int16x8_t) __b); + return (uint16x8_t) __builtin_aarch64_cmgtuv8hi ((int16x8_t) __b, + (int16x8_t) __a); } __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -vcgtq_u32 (uint32x4_t __a, uint32x4_t __b) +vcltq_u32 (uint32x4_t __a, uint32x4_t __b) { - return (uint32x4_t) __builtin_aarch64_cmgtuv4si ((int32x4_t) __a, - (int32x4_t) __b); + return (uint32x4_t) __builtin_aarch64_cmgtuv4si ((int32x4_t) __b, + (int32x4_t) __a); } __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) -vcgtq_u64 (uint64x2_t __a, uint64x2_t __b) +vcltq_u64 (uint64x2_t __a, uint64x2_t __b) { - return (uint64x2_t) __builtin_aarch64_cmgtuv2di ((int64x2_t) __a, - (int64x2_t) __b); + return (uint64x2_t) __builtin_aarch64_cmgtuv2di ((int64x2_t) __b, + (int64x2_t) __a); } -/* vcgt - scalar. */ +/* vclt - scalar. */ __extension__ static __inline uint32_t __attribute__ ((__always_inline__)) -vcgts_f32 (float32_t __a, float32_t __b) +vclts_f32 (float32_t __a, float32_t __b) { - return __a > __b ? -1 : 0; + return __a < __b ? -1 : 0; } __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) -vcgtd_s64 (int64x1_t __a, int64x1_t __b) +vcltd_s64 (int64x1_t __a, int64x1_t __b) { - return __a > __b ? -1ll : 0ll; + return __a < __b ? -1ll : 0ll; } __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) -vcgtd_u64 (uint64x1_t __a, uint64x1_t __b) +vcltd_u64 (uint64x1_t __a, uint64x1_t __b) { - return __a > __b ? -1ll : 0ll; + return __a < __b ? -1ll : 0ll; } __extension__ static __inline uint64_t __attribute__ ((__always_inline__)) -vcgtd_f64 (float64_t __a, float64_t __b) +vcltd_f64 (float64_t __a, float64_t __b) { - return __a > __b ? -1ll : 0ll; + return __a < __b ? -1ll : 0ll; } -/* vcgtz - vector. */ +/* vcltz - vector. */ __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) -vcgtz_f32 (float32x2_t __a) +vcltz_f32 (float32x2_t __a) { float32x2_t __b = {0.0f, 0.0f}; - return (uint32x2_t) __builtin_aarch64_cmgtv2sf (__a, __b); -} - -__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) -vcgtz_f64 (float64x1_t __a) -{ - return __a > 0.0 ? -1ll : 0ll; -} - -__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) -vcgtz_p8 (poly8x8_t __a) -{ - poly8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0}; - return (uint8x8_t) __builtin_aarch64_cmgtv8qi ((int8x8_t) __a, - (int8x8_t) __b); -} - -__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) -vcgtz_s8 (int8x8_t __a) -{ - int8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0}; - return (uint8x8_t) __builtin_aarch64_cmgtv8qi (__a, __b); -} - -__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) -vcgtz_s16 (int16x4_t __a) -{ - int16x4_t __b = {0, 0, 0, 0}; - return (uint16x4_t) __builtin_aarch64_cmgtv4hi (__a, __b); -} - -__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) -vcgtz_s32 (int32x2_t __a) -{ - int32x2_t __b = {0, 0}; - return (uint32x2_t) __builtin_aarch64_cmgtv2si (__a, __b); + return (uint32x2_t) __builtin_aarch64_cmltv2sf (__a, __b); } __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) -vcgtz_s64 (int64x1_t __a) +vcltz_f64 (float64x1_t __a) { - return __a > 0ll ? -1ll : 0ll; + return __a < 0.0 ? -1ll : 0ll; } __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) -vcgtz_u8 (uint8x8_t __a) +vcltz_p8 (poly8x8_t __a) { - uint8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0}; - return (uint8x8_t) __builtin_aarch64_cmgtuv8qi ((int8x8_t) __a, + poly8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0}; + return (uint8x8_t) __builtin_aarch64_cmltv8qi ((int8x8_t) __a, (int8x8_t) __b); } +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vcltz_s8 (int8x8_t __a) +{ + int8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0}; + return (uint8x8_t) __builtin_aarch64_cmltv8qi (__a, __b); +} + __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) -vcgtz_u16 (uint16x4_t __a) +vcltz_s16 (int16x4_t __a) { - uint16x4_t __b = {0, 0, 0, 0}; - return (uint16x4_t) __builtin_aarch64_cmgtuv4hi ((int16x4_t) __a, - (int16x4_t) __b); + int16x4_t __b = {0, 0, 0, 0}; + return (uint16x4_t) __builtin_aarch64_cmltv4hi (__a, __b); } __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) -vcgtz_u32 (uint32x2_t __a) +vcltz_s32 (int32x2_t __a) { - uint32x2_t __b = {0, 0}; - return (uint32x2_t) __builtin_aarch64_cmgtuv2si ((int32x2_t) __a, - (int32x2_t) __b); + int32x2_t __b = {0, 0}; + return (uint32x2_t) __builtin_aarch64_cmltv2si (__a, __b); } __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) -vcgtz_u64 (uint64x1_t __a) +vcltz_s64 (int64x1_t __a) { - return __a > 0ll ? -1ll : 0ll; + return __a < 0ll ? -1ll : 0ll; } __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -vcgtzq_f32 (float32x4_t __a) +vcltzq_f32 (float32x4_t __a) { float32x4_t __b = {0.0f, 0.0f, 0.0f, 0.0f}; - return (uint32x4_t) __builtin_aarch64_cmgtv4sf (__a, __b); + return (uint32x4_t) __builtin_aarch64_cmltv4sf (__a, __b); } __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) -vcgtzq_f64 (float64x2_t __a) +vcltzq_f64 (float64x2_t __a) { float64x2_t __b = {0.0, 0.0}; - return (uint64x2_t) __builtin_aarch64_cmgtv2df (__a, __b); + return (uint64x2_t) __builtin_aarch64_cmltv2df (__a, __b); } __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) -vcgtzq_p8 (poly8x16_t __a) +vcltzq_p8 (poly8x16_t __a) { poly8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; - return (uint8x16_t) __builtin_aarch64_cmgtv16qi ((int8x16_t) __a, + return (uint8x16_t) __builtin_aarch64_cmltv16qi ((int8x16_t) __a, (int8x16_t) __b); } __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) -vcgtzq_s8 (int8x16_t __a) +vcltzq_s8 (int8x16_t __a) { int8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; - return (uint8x16_t) __builtin_aarch64_cmgtv16qi (__a, __b); + return (uint8x16_t) __builtin_aarch64_cmltv16qi (__a, __b); } __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) -vcgtzq_s16 (int16x8_t __a) +vcltzq_s16 (int16x8_t __a) { int16x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0}; - return (uint16x8_t) __builtin_aarch64_cmgtv8hi (__a, __b); + return (uint16x8_t) __builtin_aarch64_cmltv8hi (__a, __b); } __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -vcgtzq_s32 (int32x4_t __a) +vcltzq_s32 (int32x4_t __a) { int32x4_t __b = {0, 0, 0, 0}; - return (uint32x4_t) __builtin_aarch64_cmgtv4si (__a, __b); + return (uint32x4_t) __builtin_aarch64_cmltv4si (__a, __b); } __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) -vcgtzq_s64 (int64x2_t __a) +vcltzq_s64 (int64x2_t __a) { int64x2_t __b = {0, 0}; - return (uint64x2_t) __builtin_aarch64_cmgtv2di (__a, __b); -} - -__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) -vcgtzq_u8 (uint8x16_t __a) -{ - uint8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0}; - return (uint8x16_t) __builtin_aarch64_cmgtuv16qi ((int8x16_t) __a, - (int8x16_t) __b); -} - -__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) -vcgtzq_u16 (uint16x8_t __a) -{ - uint16x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0}; - return (uint16x8_t) __builtin_aarch64_cmgtuv8hi ((int16x8_t) __a, - (int16x8_t) __b); -} - -__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -vcgtzq_u32 (uint32x4_t __a) -{ - uint32x4_t __b = {0, 0, 0, 0}; - return (uint32x4_t) __builtin_aarch64_cmgtuv4si ((int32x4_t) __a, - (int32x4_t) __b); -} - -__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) -vcgtzq_u64 (uint64x2_t __a) -{ - uint64x2_t __b = {0, 0}; - return (uint64x2_t) __builtin_aarch64_cmgtuv2di ((int64x2_t) __a, - (int64x2_t) __b); + return (uint64x2_t) __builtin_aarch64_cmltv2di (__a, __b); } -/* vcgtz - scalar. */ +/* vcltz - scalar. */ __extension__ static __inline uint32_t __attribute__ ((__always_inline__)) -vcgtzs_f32 (float32_t __a) +vcltzs_f32 (float32_t __a) { - return __a > 0.0f ? -1 : 0; + return __a < 0.0f ? -1 : 0; } __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) -vcgtzd_s64 (int64x1_t __a) +vcltzd_s64 (int64x1_t __a) { - return __a > 0 ? -1ll : 0ll; + return __a < 0 ? -1ll : 0ll; } __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) -vcgtzd_u64 (int64x1_t __a) +vcltzd_u64 (int64x1_t __a) { - return __a > 0 ? -1ll : 0ll; + return __a < 0 ? -1ll : 0ll; } __extension__ static __inline uint64_t __attribute__ ((__always_inline__)) -vcgtzd_f64 (float64_t __a) -{ - return __a > 0.0 ? -1ll : 0ll; -} - -/* vcle - vector. */ - -__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) -vcle_f32 (float32x2_t __a, float32x2_t __b) -{ - return (uint32x2_t) __builtin_aarch64_cmgev2sf (__b, __a); -} - -__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) -vcle_f64 (float64x1_t __a, float64x1_t __b) +vcltzd_f64 (float64_t __a) { - return __a <= __b ? -1ll : 0ll; + return __a < 0.0 ? -1ll : 0ll; } -__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) -vcle_p8 (poly8x8_t __a, poly8x8_t __b) -{ - return (uint8x8_t) __builtin_aarch64_cmgev8qi ((int8x8_t) __b, - (int8x8_t) __a); -} +/* vcvt (double -> float). */ -__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) -vcle_s8 (int8x8_t __a, int8x8_t __b) +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vcvt_f32_f64 (float64x2_t __a) { - return (uint8x8_t) __builtin_aarch64_cmgev8qi (__b, __a); + return __builtin_aarch64_float_truncate_lo_v2sf (__a); } -__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) -vcle_s16 (int16x4_t __a, int16x4_t __b) +__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +vcvt_high_f32_f64 (float32x2_t __a, float64x2_t __b) { - return (uint16x4_t) __builtin_aarch64_cmgev4hi (__b, __a); + return __builtin_aarch64_float_truncate_hi_v4sf (__a, __b); } -__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) -vcle_s32 (int32x2_t __a, int32x2_t __b) -{ - return (uint32x2_t) __builtin_aarch64_cmgev2si (__b, __a); -} +/* vcvt (float -> double). */ -__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) -vcle_s64 (int64x1_t __a, int64x1_t __b) +__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) +vcvt_f64_f32 (float32x2_t __a) { - return __a <= __b ? -1ll : 0ll; -} -__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) -vcle_u8 (uint8x8_t __a, uint8x8_t __b) -{ - return (uint8x8_t) __builtin_aarch64_cmgeuv8qi ((int8x8_t) __b, - (int8x8_t) __a); + return __builtin_aarch64_float_extend_lo_v2df (__a); } -__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) -vcle_u16 (uint16x4_t __a, uint16x4_t __b) +__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) +vcvt_high_f64_f32 (float32x4_t __a) { - return (uint16x4_t) __builtin_aarch64_cmgeuv4hi ((int16x4_t) __b, - (int16x4_t) __a); + return __builtin_aarch64_vec_unpacks_hi_v4sf (__a); } -__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) -vcle_u32 (uint32x2_t __a, uint32x2_t __b) -{ - return (uint32x2_t) __builtin_aarch64_cmgeuv2si ((int32x2_t) __b, - (int32x2_t) __a); -} +/* vcvt (int -> float) */ -__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) -vcle_u64 (uint64x1_t __a, uint64x1_t __b) +__extension__ static __inline float64_t __attribute__ ((__always_inline__)) +vcvtd_f64_s64 (int64_t __a) { - return __a <= __b ? -1ll : 0ll; + return (float64_t) __a; } -__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -vcleq_f32 (float32x4_t __a, float32x4_t __b) +__extension__ static __inline float64_t __attribute__ ((__always_inline__)) +vcvtd_f64_u64 (uint64_t __a) { - return (uint32x4_t) __builtin_aarch64_cmgev4sf (__b, __a); + return (float64_t) __a; } -__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) -vcleq_f64 (float64x2_t __a, float64x2_t __b) +__extension__ static __inline float32_t __attribute__ ((__always_inline__)) +vcvts_f32_s32 (int32_t __a) { - return (uint64x2_t) __builtin_aarch64_cmgev2df (__b, __a); + return (float32_t) __a; } -__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) -vcleq_p8 (poly8x16_t __a, poly8x16_t __b) +__extension__ static __inline float32_t __attribute__ ((__always_inline__)) +vcvts_f32_u32 (uint32_t __a) { - return (uint8x16_t) __builtin_aarch64_cmgev16qi ((int8x16_t) __b, - (int8x16_t) __a); + return (float32_t) __a; } -__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) -vcleq_s8 (int8x16_t __a, int8x16_t __b) +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vcvt_f32_s32 (int32x2_t __a) { - return (uint8x16_t) __builtin_aarch64_cmgev16qi (__b, __a); + return __builtin_aarch64_floatv2siv2sf (__a); } -__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) -vcleq_s16 (int16x8_t __a, int16x8_t __b) +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vcvt_f32_u32 (uint32x2_t __a) { - return (uint16x8_t) __builtin_aarch64_cmgev8hi (__b, __a); + return __builtin_aarch64_floatunsv2siv2sf ((int32x2_t) __a); } -__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -vcleq_s32 (int32x4_t __a, int32x4_t __b) +__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +vcvtq_f32_s32 (int32x4_t __a) { - return (uint32x4_t) __builtin_aarch64_cmgev4si (__b, __a); + return __builtin_aarch64_floatv4siv4sf (__a); } -__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) -vcleq_s64 (int64x2_t __a, int64x2_t __b) +__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +vcvtq_f32_u32 (uint32x4_t __a) { - return (uint64x2_t) __builtin_aarch64_cmgev2di (__b, __a); + return __builtin_aarch64_floatunsv4siv4sf ((int32x4_t) __a); } -__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) -vcleq_u8 (uint8x16_t __a, uint8x16_t __b) +__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) +vcvtq_f64_s64 (int64x2_t __a) { - return (uint8x16_t) __builtin_aarch64_cmgeuv16qi ((int8x16_t) __b, - (int8x16_t) __a); + return __builtin_aarch64_floatv2div2df (__a); } -__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) -vcleq_u16 (uint16x8_t __a, uint16x8_t __b) +__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) +vcvtq_f64_u64 (uint64x2_t __a) { - return (uint16x8_t) __builtin_aarch64_cmgeuv8hi ((int16x8_t) __b, - (int16x8_t) __a); + return __builtin_aarch64_floatunsv2div2df ((int64x2_t) __a); } -__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -vcleq_u32 (uint32x4_t __a, uint32x4_t __b) -{ - return (uint32x4_t) __builtin_aarch64_cmgeuv4si ((int32x4_t) __b, - (int32x4_t) __a); -} +/* vcvt (float -> int) */ -__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) -vcleq_u64 (uint64x2_t __a, uint64x2_t __b) +__extension__ static __inline int64_t __attribute__ ((__always_inline__)) +vcvtd_s64_f64 (float64_t __a) { - return (uint64x2_t) __builtin_aarch64_cmgeuv2di ((int64x2_t) __b, - (int64x2_t) __a); + return (int64_t) __a; } -/* vcle - scalar. */ - -__extension__ static __inline uint32_t __attribute__ ((__always_inline__)) -vcles_f32 (float32_t __a, float32_t __b) +__extension__ static __inline uint64_t __attribute__ ((__always_inline__)) +vcvtd_u64_f64 (float64_t __a) { - return __a <= __b ? -1 : 0; + return (uint64_t) __a; } -__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) -vcled_s64 (int64x1_t __a, int64x1_t __b) +__extension__ static __inline int32_t __attribute__ ((__always_inline__)) +vcvts_s32_f32 (float32_t __a) { - return __a <= __b ? -1ll : 0ll; + return (int32_t) __a; } -__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) -vcled_u64 (uint64x1_t __a, uint64x1_t __b) +__extension__ static __inline uint32_t __attribute__ ((__always_inline__)) +vcvts_u32_f32 (float32_t __a) { - return __a <= __b ? -1ll : 0ll; + return (uint32_t) __a; } -__extension__ static __inline uint64_t __attribute__ ((__always_inline__)) -vcled_f64 (float64_t __a, float64_t __b) +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vcvt_s32_f32 (float32x2_t __a) { - return __a <= __b ? -1ll : 0ll; + return __builtin_aarch64_lbtruncv2sfv2si (__a); } -/* vclez - vector. */ - __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) -vclez_f32 (float32x2_t __a) +vcvt_u32_f32 (float32x2_t __a) { - float32x2_t __b = {0.0f, 0.0f}; - return (uint32x2_t) __builtin_aarch64_cmlev2sf (__a, __b); + /* TODO: This cast should go away when builtins have + their correct types. */ + return (uint32x2_t) __builtin_aarch64_lbtruncuv2sfv2si (__a); } -__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) -vclez_f64 (float64x1_t __a) +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vcvtq_s32_f32 (float32x4_t __a) { - return __a <= 0.0 ? -1ll : 0ll; + return __builtin_aarch64_lbtruncv4sfv4si (__a); } -__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) -vclez_p8 (poly8x8_t __a) +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vcvtq_u32_f32 (float32x4_t __a) { - poly8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0}; - return (uint8x8_t) __builtin_aarch64_cmlev8qi ((int8x8_t) __a, - (int8x8_t) __b); + /* TODO: This cast should go away when builtins have + their correct types. */ + return (uint32x4_t) __builtin_aarch64_lbtruncuv4sfv4si (__a); } -__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) -vclez_s8 (int8x8_t __a) +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vcvtq_s64_f64 (float64x2_t __a) { - int8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0}; - return (uint8x8_t) __builtin_aarch64_cmlev8qi (__a, __b); + return __builtin_aarch64_lbtruncv2dfv2di (__a); } -__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) -vclez_s16 (int16x4_t __a) +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vcvtq_u64_f64 (float64x2_t __a) { - int16x4_t __b = {0, 0, 0, 0}; - return (uint16x4_t) __builtin_aarch64_cmlev4hi (__a, __b); + /* TODO: This cast should go away when builtins have + their correct types. */ + return (uint64x2_t) __builtin_aarch64_lbtruncuv2dfv2di (__a); } -__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) -vclez_s32 (int32x2_t __a) +/* vcvta */ + +__extension__ static __inline int64_t __attribute__ ((__always_inline__)) +vcvtad_s64_f64 (float64_t __a) { - int32x2_t __b = {0, 0}; - return (uint32x2_t) __builtin_aarch64_cmlev2si (__a, __b); + return __builtin_aarch64_lrounddfdi (__a); } -__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) -vclez_s64 (int64x1_t __a) +__extension__ static __inline uint64_t __attribute__ ((__always_inline__)) +vcvtad_u64_f64 (float64_t __a) { - return __a <= 0ll ? -1ll : 0ll; + return __builtin_aarch64_lroundudfdi (__a); } -__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) -vclez_u64 (uint64x1_t __a) +__extension__ static __inline int32_t __attribute__ ((__always_inline__)) +vcvtas_s32_f32 (float32_t __a) { - return __a <= 0ll ? -1ll : 0ll; + return __builtin_aarch64_lroundsfsi (__a); } -__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -vclezq_f32 (float32x4_t __a) +__extension__ static __inline uint32_t __attribute__ ((__always_inline__)) +vcvtas_u32_f32 (float32_t __a) { - float32x4_t __b = {0.0f, 0.0f, 0.0f, 0.0f}; - return (uint32x4_t) __builtin_aarch64_cmlev4sf (__a, __b); + return __builtin_aarch64_lroundusfsi (__a); } -__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) -vclezq_f64 (float64x2_t __a) +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vcvta_s32_f32 (float32x2_t __a) { - float64x2_t __b = {0.0, 0.0}; - return (uint64x2_t) __builtin_aarch64_cmlev2df (__a, __b); + return __builtin_aarch64_lroundv2sfv2si (__a); } -__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) -vclezq_p8 (poly8x16_t __a) +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vcvta_u32_f32 (float32x2_t __a) { - poly8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0}; - return (uint8x16_t) __builtin_aarch64_cmlev16qi ((int8x16_t) __a, - (int8x16_t) __b); + /* TODO: This cast should go away when builtins have + their correct types. */ + return (uint32x2_t) __builtin_aarch64_lrounduv2sfv2si (__a); } -__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) -vclezq_s8 (int8x16_t __a) +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vcvtaq_s32_f32 (float32x4_t __a) { - int8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0}; - return (uint8x16_t) __builtin_aarch64_cmlev16qi (__a, __b); + return __builtin_aarch64_lroundv4sfv4si (__a); } -__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) -vclezq_s16 (int16x8_t __a) +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vcvtaq_u32_f32 (float32x4_t __a) { - int16x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0}; - return (uint16x8_t) __builtin_aarch64_cmlev8hi (__a, __b); + /* TODO: This cast should go away when builtins have + their correct types. */ + return (uint32x4_t) __builtin_aarch64_lrounduv4sfv4si (__a); } -__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -vclezq_s32 (int32x4_t __a) +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vcvtaq_s64_f64 (float64x2_t __a) { - int32x4_t __b = {0, 0, 0, 0}; - return (uint32x4_t) __builtin_aarch64_cmlev4si (__a, __b); + return __builtin_aarch64_lroundv2dfv2di (__a); } __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) -vclezq_s64 (int64x2_t __a) +vcvtaq_u64_f64 (float64x2_t __a) { - int64x2_t __b = {0, 0}; - return (uint64x2_t) __builtin_aarch64_cmlev2di (__a, __b); + /* TODO: This cast should go away when builtins have + their correct types. */ + return (uint64x2_t) __builtin_aarch64_lrounduv2dfv2di (__a); } -/* vclez - scalar. */ +/* vcvtm */ -__extension__ static __inline uint32_t __attribute__ ((__always_inline__)) -vclezs_f32 (float32_t __a) +__extension__ static __inline int64_t __attribute__ ((__always_inline__)) +vcvtmd_s64_f64 (float64_t __a) { - return __a <= 0.0f ? -1 : 0; + return __builtin_lfloor (__a); } -__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) -vclezd_s64 (int64x1_t __a) +__extension__ static __inline uint64_t __attribute__ ((__always_inline__)) +vcvtmd_u64_f64 (float64_t __a) { - return __a <= 0 ? -1ll : 0ll; + return __builtin_aarch64_lfloorudfdi (__a); } -__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) -vclezd_u64 (int64x1_t __a) +__extension__ static __inline int32_t __attribute__ ((__always_inline__)) +vcvtms_s32_f32 (float32_t __a) { - return __a <= 0 ? -1ll : 0ll; + return __builtin_ifloorf (__a); } -__extension__ static __inline uint64_t __attribute__ ((__always_inline__)) -vclezd_f64 (float64_t __a) +__extension__ static __inline uint32_t __attribute__ ((__always_inline__)) +vcvtms_u32_f32 (float32_t __a) { - return __a <= 0.0 ? -1ll : 0ll; + return __builtin_aarch64_lfloorusfsi (__a); } -/* vclt - vector. */ +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vcvtm_s32_f32 (float32x2_t __a) +{ + return __builtin_aarch64_lfloorv2sfv2si (__a); +} __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) -vclt_f32 (float32x2_t __a, float32x2_t __b) +vcvtm_u32_f32 (float32x2_t __a) { - return (uint32x2_t) __builtin_aarch64_cmgtv2sf (__b, __a); + /* TODO: This cast should go away when builtins have + their correct types. */ + return (uint32x2_t) __builtin_aarch64_lflooruv2sfv2si (__a); } -__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) -vclt_f64 (float64x1_t __a, float64x1_t __b) +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vcvtmq_s32_f32 (float32x4_t __a) { - return __a < __b ? -1ll : 0ll; + return __builtin_aarch64_lfloorv4sfv4si (__a); } -__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) -vclt_p8 (poly8x8_t __a, poly8x8_t __b) +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vcvtmq_u32_f32 (float32x4_t __a) { - return (uint8x8_t) __builtin_aarch64_cmgtv8qi ((int8x8_t) __b, - (int8x8_t) __a); + /* TODO: This cast should go away when builtins have + their correct types. */ + return (uint32x4_t) __builtin_aarch64_lflooruv4sfv4si (__a); } -__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) -vclt_s8 (int8x8_t __a, int8x8_t __b) +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vcvtmq_s64_f64 (float64x2_t __a) { - return (uint8x8_t) __builtin_aarch64_cmgtv8qi (__b, __a); + return __builtin_aarch64_lfloorv2dfv2di (__a); } -__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) -vclt_s16 (int16x4_t __a, int16x4_t __b) +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vcvtmq_u64_f64 (float64x2_t __a) { - return (uint16x4_t) __builtin_aarch64_cmgtv4hi (__b, __a); + /* TODO: This cast should go away when builtins have + their correct types. */ + return (uint64x2_t) __builtin_aarch64_lflooruv2dfv2di (__a); } -__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) -vclt_s32 (int32x2_t __a, int32x2_t __b) +/* vcvtn */ + +__extension__ static __inline int64_t __attribute__ ((__always_inline__)) +vcvtnd_s64_f64 (float64_t __a) { - return (uint32x2_t) __builtin_aarch64_cmgtv2si (__b, __a); + return __builtin_aarch64_lfrintndfdi (__a); } -__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) -vclt_s64 (int64x1_t __a, int64x1_t __b) +__extension__ static __inline uint64_t __attribute__ ((__always_inline__)) +vcvtnd_u64_f64 (float64_t __a) { - return __a < __b ? -1ll : 0ll; + return __builtin_aarch64_lfrintnudfdi (__a); } -__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) -vclt_u8 (uint8x8_t __a, uint8x8_t __b) +__extension__ static __inline int32_t __attribute__ ((__always_inline__)) +vcvtns_s32_f32 (float32_t __a) { - return (uint8x8_t) __builtin_aarch64_cmgtuv8qi ((int8x8_t) __b, - (int8x8_t) __a); + return __builtin_aarch64_lfrintnsfsi (__a); } -__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) -vclt_u16 (uint16x4_t __a, uint16x4_t __b) +__extension__ static __inline uint32_t __attribute__ ((__always_inline__)) +vcvtns_u32_f32 (float32_t __a) { - return (uint16x4_t) __builtin_aarch64_cmgtuv4hi ((int16x4_t) __b, - (int16x4_t) __a); + return __builtin_aarch64_lfrintnusfsi (__a); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vcvtn_s32_f32 (float32x2_t __a) +{ + return __builtin_aarch64_lfrintnv2sfv2si (__a); } __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) -vclt_u32 (uint32x2_t __a, uint32x2_t __b) +vcvtn_u32_f32 (float32x2_t __a) { - return (uint32x2_t) __builtin_aarch64_cmgtuv2si ((int32x2_t) __b, - (int32x2_t) __a); + /* TODO: This cast should go away when builtins have + their correct types. */ + return (uint32x2_t) __builtin_aarch64_lfrintnuv2sfv2si (__a); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vcvtnq_s32_f32 (float32x4_t __a) +{ + return __builtin_aarch64_lfrintnv4sfv4si (__a); } -__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) -vclt_u64 (uint64x1_t __a, uint64x1_t __b) +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vcvtnq_u32_f32 (float32x4_t __a) { - return __a < __b ? -1ll : 0ll; + /* TODO: This cast should go away when builtins have + their correct types. */ + return (uint32x4_t) __builtin_aarch64_lfrintnuv4sfv4si (__a); } -__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -vcltq_f32 (float32x4_t __a, float32x4_t __b) +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vcvtnq_s64_f64 (float64x2_t __a) { - return (uint32x4_t) __builtin_aarch64_cmgtv4sf (__b, __a); + return __builtin_aarch64_lfrintnv2dfv2di (__a); } __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) -vcltq_f64 (float64x2_t __a, float64x2_t __b) +vcvtnq_u64_f64 (float64x2_t __a) { - return (uint64x2_t) __builtin_aarch64_cmgtv2df (__b, __a); + /* TODO: This cast should go away when builtins have + their correct types. */ + return (uint64x2_t) __builtin_aarch64_lfrintnuv2dfv2di (__a); } -__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) -vcltq_p8 (poly8x16_t __a, poly8x16_t __b) +/* vcvtp */ + +__extension__ static __inline int64_t __attribute__ ((__always_inline__)) +vcvtpd_s64_f64 (float64_t __a) { - return (uint8x16_t) __builtin_aarch64_cmgtv16qi ((int8x16_t) __b, - (int8x16_t) __a); + return __builtin_lceil (__a); } -__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) -vcltq_s8 (int8x16_t __a, int8x16_t __b) +__extension__ static __inline uint64_t __attribute__ ((__always_inline__)) +vcvtpd_u64_f64 (float64_t __a) { - return (uint8x16_t) __builtin_aarch64_cmgtv16qi (__b, __a); + return __builtin_aarch64_lceiludfdi (__a); } -__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) -vcltq_s16 (int16x8_t __a, int16x8_t __b) +__extension__ static __inline int32_t __attribute__ ((__always_inline__)) +vcvtps_s32_f32 (float32_t __a) { - return (uint16x8_t) __builtin_aarch64_cmgtv8hi (__b, __a); + return __builtin_iceilf (__a); } -__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -vcltq_s32 (int32x4_t __a, int32x4_t __b) +__extension__ static __inline uint32_t __attribute__ ((__always_inline__)) +vcvtps_u32_f32 (float32_t __a) { - return (uint32x4_t) __builtin_aarch64_cmgtv4si (__b, __a); + return __builtin_aarch64_lceilusfsi (__a); } -__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) -vcltq_s64 (int64x2_t __a, int64x2_t __b) +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vcvtp_s32_f32 (float32x2_t __a) { - return (uint64x2_t) __builtin_aarch64_cmgtv2di (__b, __a); + return __builtin_aarch64_lceilv2sfv2si (__a); } -__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) -vcltq_u8 (uint8x16_t __a, uint8x16_t __b) +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vcvtp_u32_f32 (float32x2_t __a) { - return (uint8x16_t) __builtin_aarch64_cmgtuv16qi ((int8x16_t) __b, - (int8x16_t) __a); + /* TODO: This cast should go away when builtins have + their correct types. */ + return (uint32x2_t) __builtin_aarch64_lceiluv2sfv2si (__a); } -__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) -vcltq_u16 (uint16x8_t __a, uint16x8_t __b) +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vcvtpq_s32_f32 (float32x4_t __a) { - return (uint16x8_t) __builtin_aarch64_cmgtuv8hi ((int16x8_t) __b, - (int16x8_t) __a); + return __builtin_aarch64_lceilv4sfv4si (__a); } __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -vcltq_u32 (uint32x4_t __a, uint32x4_t __b) +vcvtpq_u32_f32 (float32x4_t __a) { - return (uint32x4_t) __builtin_aarch64_cmgtuv4si ((int32x4_t) __b, - (int32x4_t) __a); + /* TODO: This cast should go away when builtins have + their correct types. */ + return (uint32x4_t) __builtin_aarch64_lceiluv4sfv4si (__a); +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vcvtpq_s64_f64 (float64x2_t __a) +{ + return __builtin_aarch64_lceilv2dfv2di (__a); } __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) -vcltq_u64 (uint64x2_t __a, uint64x2_t __b) +vcvtpq_u64_f64 (float64x2_t __a) { - return (uint64x2_t) __builtin_aarch64_cmgtuv2di ((int64x2_t) __b, - (int64x2_t) __a); + /* TODO: This cast should go away when builtins have + their correct types. */ + return (uint64x2_t) __builtin_aarch64_lceiluv2dfv2di (__a); } -/* vclt - scalar. */ +/* vdup_n */ -__extension__ static __inline uint32_t __attribute__ ((__always_inline__)) -vclts_f32 (float32_t __a, float32_t __b) +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vdup_n_f32 (float32_t __a) { - return __a < __b ? -1 : 0; + return (float32x2_t) {__a, __a}; } -__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) -vcltd_s64 (int64x1_t __a, int64x1_t __b) +__extension__ static __inline float64x1_t __attribute__ ((__always_inline__)) +vdup_n_f64 (float64_t __a) { - return __a < __b ? -1ll : 0ll; + return __a; } -__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) -vcltd_u64 (uint64x1_t __a, uint64x1_t __b) +__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) +vdup_n_p8 (poly8_t __a) { - return __a < __b ? -1ll : 0ll; + return (poly8x8_t) {__a, __a, __a, __a, __a, __a, __a, __a}; } -__extension__ static __inline uint64_t __attribute__ ((__always_inline__)) -vcltd_f64 (float64_t __a, float64_t __b) +__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) +vdup_n_p16 (poly16_t __a) { - return __a < __b ? -1ll : 0ll; + return (poly16x4_t) {__a, __a, __a, __a}; } -/* vcltz - vector. */ +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vdup_n_s8 (int8_t __a) +{ + return (int8x8_t) {__a, __a, __a, __a, __a, __a, __a, __a}; +} -__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) -vcltz_f32 (float32x2_t __a) +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vdup_n_s16 (int16_t __a) { - float32x2_t __b = {0.0f, 0.0f}; - return (uint32x2_t) __builtin_aarch64_cmltv2sf (__a, __b); + return (int16x4_t) {__a, __a, __a, __a}; } -__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) -vcltz_f64 (float64x1_t __a) +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vdup_n_s32 (int32_t __a) { - return __a < 0.0 ? -1ll : 0ll; + return (int32x2_t) {__a, __a}; } -__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) -vcltz_p8 (poly8x8_t __a) +__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) +vdup_n_s64 (int64_t __a) { - poly8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0}; - return (uint8x8_t) __builtin_aarch64_cmltv8qi ((int8x8_t) __a, - (int8x8_t) __b); + return __a; } __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) -vcltz_s8 (int8x8_t __a) +vdup_n_u8 (uint8_t __a) { - int8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0}; - return (uint8x8_t) __builtin_aarch64_cmltv8qi (__a, __b); + return (uint8x8_t) {__a, __a, __a, __a, __a, __a, __a, __a}; } __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) -vcltz_s16 (int16x4_t __a) +vdup_n_u16 (uint16_t __a) { - int16x4_t __b = {0, 0, 0, 0}; - return (uint16x4_t) __builtin_aarch64_cmltv4hi (__a, __b); + return (uint16x4_t) {__a, __a, __a, __a}; } __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) -vcltz_s32 (int32x2_t __a) +vdup_n_u32 (uint32_t __a) { - int32x2_t __b = {0, 0}; - return (uint32x2_t) __builtin_aarch64_cmltv2si (__a, __b); + return (uint32x2_t) {__a, __a}; } __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) -vcltz_s64 (int64x1_t __a) +vdup_n_u64 (uint64_t __a) { - return __a < 0ll ? -1ll : 0ll; + return __a; } -__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -vcltzq_f32 (float32x4_t __a) -{ - float32x4_t __b = {0.0f, 0.0f, 0.0f, 0.0f}; - return (uint32x4_t) __builtin_aarch64_cmltv4sf (__a, __b); -} +/* vdupq_n */ -__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) -vcltzq_f64 (float64x2_t __a) +__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +vdupq_n_f32 (float32_t __a) { - float64x2_t __b = {0.0, 0.0}; - return (uint64x2_t) __builtin_aarch64_cmltv2df (__a, __b); + return (float32x4_t) {__a, __a, __a, __a}; } -__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) -vcltzq_p8 (poly8x16_t __a) +__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) +vdupq_n_f64 (float64_t __a) { - poly8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0}; - return (uint8x16_t) __builtin_aarch64_cmltv16qi ((int8x16_t) __a, - (int8x16_t) __b); + return (float64x2_t) {__a, __a}; } -__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) -vcltzq_s8 (int8x16_t __a) +__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) +vdupq_n_p8 (uint32_t __a) { - int8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0}; - return (uint8x16_t) __builtin_aarch64_cmltv16qi (__a, __b); + return (poly8x16_t) {__a, __a, __a, __a, __a, __a, __a, __a, + __a, __a, __a, __a, __a, __a, __a, __a}; } -__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) -vcltzq_s16 (int16x8_t __a) +__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) +vdupq_n_p16 (uint32_t __a) { - int16x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0}; - return (uint16x8_t) __builtin_aarch64_cmltv8hi (__a, __b); + return (poly16x8_t) {__a, __a, __a, __a, __a, __a, __a, __a}; } -__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -vcltzq_s32 (int32x4_t __a) +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vdupq_n_s8 (int32_t __a) { - int32x4_t __b = {0, 0, 0, 0}; - return (uint32x4_t) __builtin_aarch64_cmltv4si (__a, __b); + return (int8x16_t) {__a, __a, __a, __a, __a, __a, __a, __a, + __a, __a, __a, __a, __a, __a, __a, __a}; } -__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) -vcltzq_s64 (int64x2_t __a) +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vdupq_n_s16 (int32_t __a) { - int64x2_t __b = {0, 0}; - return (uint64x2_t) __builtin_aarch64_cmltv2di (__a, __b); + return (int16x8_t) {__a, __a, __a, __a, __a, __a, __a, __a}; } -/* vcltz - scalar. */ - -__extension__ static __inline uint32_t __attribute__ ((__always_inline__)) -vcltzs_f32 (float32_t __a) +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vdupq_n_s32 (int32_t __a) { - return __a < 0.0f ? -1 : 0; + return (int32x4_t) {__a, __a, __a, __a}; } -__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) -vcltzd_s64 (int64x1_t __a) +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vdupq_n_s64 (int64_t __a) { - return __a < 0 ? -1ll : 0ll; + return (int64x2_t) {__a, __a}; } -__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) -vcltzd_u64 (int64x1_t __a) +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vdupq_n_u8 (uint32_t __a) { - return __a < 0 ? -1ll : 0ll; + return (uint8x16_t) {__a, __a, __a, __a, __a, __a, __a, __a, + __a, __a, __a, __a, __a, __a, __a, __a}; } -__extension__ static __inline uint64_t __attribute__ ((__always_inline__)) -vcltzd_f64 (float64_t __a) +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vdupq_n_u16 (uint32_t __a) { - return __a < 0.0 ? -1ll : 0ll; + return (uint16x8_t) {__a, __a, __a, __a, __a, __a, __a, __a}; } -/* vcvt (double -> float). */ - -__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) -vcvt_f32_f64 (float64x2_t __a) +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vdupq_n_u32 (uint32_t __a) { - return __builtin_aarch64_float_truncate_lo_v2sf (__a); + return (uint32x4_t) {__a, __a, __a, __a}; } -__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) -vcvt_high_f32_f64 (float32x2_t __a, float64x2_t __b) +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vdupq_n_u64 (uint64_t __a) { - return __builtin_aarch64_float_truncate_hi_v4sf (__a, __b); + return (uint64x2_t) {__a, __a}; } -/* vcvt (float -> double). */ +/* vdup_lane */ -__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) -vcvt_f64_f32 (float32x2_t __a) +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vdup_lane_f32 (float32x2_t __a, const int __b) { - - return __builtin_aarch64_float_extend_lo_v2df (__a); + return __aarch64_vdup_lane_f32 (__a, __b); } -__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) -vcvt_high_f64_f32 (float32x4_t __a) +__extension__ static __inline float64x1_t __attribute__ ((__always_inline__)) +vdup_lane_f64 (float64x1_t __a, const int __b) { - return __builtin_aarch64_vec_unpacks_hi_v4sf (__a); + return __aarch64_vdup_lane_f64 (__a, __b); } -/* vcvt (int -> float) */ - -__extension__ static __inline float64_t __attribute__ ((__always_inline__)) -vcvtd_f64_s64 (int64_t __a) +__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) +vdup_lane_p8 (poly8x8_t __a, const int __b) { - return (float64_t) __a; + return __aarch64_vdup_lane_p8 (__a, __b); } -__extension__ static __inline float64_t __attribute__ ((__always_inline__)) -vcvtd_f64_u64 (uint64_t __a) +__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) +vdup_lane_p16 (poly16x4_t __a, const int __b) { - return (float64_t) __a; + return __aarch64_vdup_lane_p16 (__a, __b); } -__extension__ static __inline float32_t __attribute__ ((__always_inline__)) -vcvts_f32_s32 (int32_t __a) +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vdup_lane_s8 (int8x8_t __a, const int __b) { - return (float32_t) __a; + return __aarch64_vdup_lane_s8 (__a, __b); } -__extension__ static __inline float32_t __attribute__ ((__always_inline__)) -vcvts_f32_u32 (uint32_t __a) +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vdup_lane_s16 (int16x4_t __a, const int __b) { - return (float32_t) __a; + return __aarch64_vdup_lane_s16 (__a, __b); } -__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) -vcvt_f32_s32 (int32x2_t __a) +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vdup_lane_s32 (int32x2_t __a, const int __b) { - return __builtin_aarch64_floatv2siv2sf (__a); + return __aarch64_vdup_lane_s32 (__a, __b); } -__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) -vcvt_f32_u32 (uint32x2_t __a) +__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) +vdup_lane_s64 (int64x1_t __a, const int __b) { - return __builtin_aarch64_floatunsv2siv2sf ((int32x2_t) __a); + return __aarch64_vdup_lane_s64 (__a, __b); } -__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) -vcvtq_f32_s32 (int32x4_t __a) +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vdup_lane_u8 (uint8x8_t __a, const int __b) { - return __builtin_aarch64_floatv4siv4sf (__a); + return __aarch64_vdup_lane_u8 (__a, __b); } -__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) -vcvtq_f32_u32 (uint32x4_t __a) +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vdup_lane_u16 (uint16x4_t __a, const int __b) { - return __builtin_aarch64_floatunsv4siv4sf ((int32x4_t) __a); + return __aarch64_vdup_lane_u16 (__a, __b); } -__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) -vcvtq_f64_s64 (int64x2_t __a) +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vdup_lane_u32 (uint32x2_t __a, const int __b) { - return __builtin_aarch64_floatv2div2df (__a); + return __aarch64_vdup_lane_u32 (__a, __b); } -__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) -vcvtq_f64_u64 (uint64x2_t __a) +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vdup_lane_u64 (uint64x1_t __a, const int __b) { - return __builtin_aarch64_floatunsv2div2df ((int64x2_t) __a); + return __aarch64_vdup_lane_u64 (__a, __b); } -/* vcvt (float -> int) */ +/* vdup_laneq */ -__extension__ static __inline int64_t __attribute__ ((__always_inline__)) -vcvtd_s64_f64 (float64_t __a) +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vdup_laneq_f32 (float32x4_t __a, const int __b) { - return (int64_t) __a; + return __aarch64_vdup_laneq_f32 (__a, __b); } -__extension__ static __inline uint64_t __attribute__ ((__always_inline__)) -vcvtd_u64_f64 (float64_t __a) +__extension__ static __inline float64x1_t __attribute__ ((__always_inline__)) +vdup_laneq_f64 (float64x2_t __a, const int __b) { - return (uint64_t) __a; + return __aarch64_vdup_laneq_f64 (__a, __b); } -__extension__ static __inline int32_t __attribute__ ((__always_inline__)) -vcvts_s32_f32 (float32_t __a) +__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) +vdup_laneq_p8 (poly8x16_t __a, const int __b) { - return (int32_t) __a; + return __aarch64_vdup_laneq_p8 (__a, __b); } -__extension__ static __inline uint32_t __attribute__ ((__always_inline__)) -vcvts_u32_f32 (float32_t __a) +__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) +vdup_laneq_p16 (poly16x8_t __a, const int __b) { - return (uint32_t) __a; + return __aarch64_vdup_laneq_p16 (__a, __b); } -__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) -vcvt_s32_f32 (float32x2_t __a) +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vdup_laneq_s8 (int8x16_t __a, const int __b) { - return __builtin_aarch64_lbtruncv2sfv2si (__a); + return __aarch64_vdup_laneq_s8 (__a, __b); } -__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) -vcvt_u32_f32 (float32x2_t __a) +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vdup_laneq_s16 (int16x8_t __a, const int __b) { - /* TODO: This cast should go away when builtins have - their correct types. */ - return (uint32x2_t) __builtin_aarch64_lbtruncuv2sfv2si (__a); + return __aarch64_vdup_laneq_s16 (__a, __b); } -__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) -vcvtq_s32_f32 (float32x4_t __a) +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vdup_laneq_s32 (int32x4_t __a, const int __b) { - return __builtin_aarch64_lbtruncv4sfv4si (__a); + return __aarch64_vdup_laneq_s32 (__a, __b); } -__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -vcvtq_u32_f32 (float32x4_t __a) +__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) +vdup_laneq_s64 (int64x2_t __a, const int __b) { - /* TODO: This cast should go away when builtins have - their correct types. */ - return (uint32x4_t) __builtin_aarch64_lbtruncuv4sfv4si (__a); + return __aarch64_vdup_laneq_s64 (__a, __b); } -__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) -vcvtq_s64_f64 (float64x2_t __a) +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vdup_laneq_u8 (uint8x16_t __a, const int __b) { - return __builtin_aarch64_lbtruncv2dfv2di (__a); + return __aarch64_vdup_laneq_u8 (__a, __b); } -__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) -vcvtq_u64_f64 (float64x2_t __a) +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vdup_laneq_u16 (uint16x8_t __a, const int __b) { - /* TODO: This cast should go away when builtins have - their correct types. */ - return (uint64x2_t) __builtin_aarch64_lbtruncuv2dfv2di (__a); + return __aarch64_vdup_laneq_u16 (__a, __b); } -/* vcvta */ +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vdup_laneq_u32 (uint32x4_t __a, const int __b) +{ + return __aarch64_vdup_laneq_u32 (__a, __b); +} -__extension__ static __inline int64_t __attribute__ ((__always_inline__)) -vcvtad_s64_f64 (float64_t __a) +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vdup_laneq_u64 (uint64x2_t __a, const int __b) { - return __builtin_aarch64_lrounddfdi (__a); + return __aarch64_vdup_laneq_u64 (__a, __b); } -__extension__ static __inline uint64_t __attribute__ ((__always_inline__)) -vcvtad_u64_f64 (float64_t __a) +/* vdupq_lane */ +__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +vdupq_lane_f32 (float32x2_t __a, const int __b) { - return __builtin_aarch64_lroundudfdi (__a); + return __aarch64_vdupq_lane_f32 (__a, __b); } -__extension__ static __inline int32_t __attribute__ ((__always_inline__)) -vcvtas_s32_f32 (float32_t __a) +__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) +vdupq_lane_f64 (float64x1_t __a, const int __b) { - return __builtin_aarch64_lroundsfsi (__a); + return __aarch64_vdupq_lane_f64 (__a, __b); } -__extension__ static __inline uint32_t __attribute__ ((__always_inline__)) -vcvtas_u32_f32 (float32_t __a) +__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) +vdupq_lane_p8 (poly8x8_t __a, const int __b) { - return __builtin_aarch64_lroundusfsi (__a); + return __aarch64_vdupq_lane_p8 (__a, __b); } -__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) -vcvta_s32_f32 (float32x2_t __a) +__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) +vdupq_lane_p16 (poly16x4_t __a, const int __b) { - return __builtin_aarch64_lroundv2sfv2si (__a); + return __aarch64_vdupq_lane_p16 (__a, __b); } -__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) -vcvta_u32_f32 (float32x2_t __a) +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vdupq_lane_s8 (int8x8_t __a, const int __b) { - /* TODO: This cast should go away when builtins have - their correct types. */ - return (uint32x2_t) __builtin_aarch64_lrounduv2sfv2si (__a); + return __aarch64_vdupq_lane_s8 (__a, __b); } -__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) -vcvtaq_s32_f32 (float32x4_t __a) +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vdupq_lane_s16 (int16x4_t __a, const int __b) { - return __builtin_aarch64_lroundv4sfv4si (__a); + return __aarch64_vdupq_lane_s16 (__a, __b); } -__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -vcvtaq_u32_f32 (float32x4_t __a) +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vdupq_lane_s32 (int32x2_t __a, const int __b) { - /* TODO: This cast should go away when builtins have - their correct types. */ - return (uint32x4_t) __builtin_aarch64_lrounduv4sfv4si (__a); + return __aarch64_vdupq_lane_s32 (__a, __b); } __extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) -vcvtaq_s64_f64 (float64x2_t __a) +vdupq_lane_s64 (int64x1_t __a, const int __b) { - return __builtin_aarch64_lroundv2dfv2di (__a); + return __aarch64_vdupq_lane_s64 (__a, __b); } -__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) -vcvtaq_u64_f64 (float64x2_t __a) +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vdupq_lane_u8 (uint8x8_t __a, const int __b) { - /* TODO: This cast should go away when builtins have - their correct types. */ - return (uint64x2_t) __builtin_aarch64_lrounduv2dfv2di (__a); + return __aarch64_vdupq_lane_u8 (__a, __b); } -/* vcvtm */ - -__extension__ static __inline int64_t __attribute__ ((__always_inline__)) -vcvtmd_s64_f64 (float64_t __a) +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vdupq_lane_u16 (uint16x4_t __a, const int __b) { - return __builtin_lfloor (__a); + return __aarch64_vdupq_lane_u16 (__a, __b); } -__extension__ static __inline uint64_t __attribute__ ((__always_inline__)) -vcvtmd_u64_f64 (float64_t __a) +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vdupq_lane_u32 (uint32x2_t __a, const int __b) { - return __builtin_aarch64_lfloorudfdi (__a); + return __aarch64_vdupq_lane_u32 (__a, __b); } -__extension__ static __inline int32_t __attribute__ ((__always_inline__)) -vcvtms_s32_f32 (float32_t __a) +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vdupq_lane_u64 (uint64x1_t __a, const int __b) { - return __builtin_ifloorf (__a); + return __aarch64_vdupq_lane_u64 (__a, __b); } -__extension__ static __inline uint32_t __attribute__ ((__always_inline__)) -vcvtms_u32_f32 (float32_t __a) +/* vdupq_laneq */ +__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +vdupq_laneq_f32 (float32x4_t __a, const int __b) { - return __builtin_aarch64_lfloorusfsi (__a); + return __aarch64_vdupq_laneq_f32 (__a, __b); } -__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) -vcvtm_s32_f32 (float32x2_t __a) +__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) +vdupq_laneq_f64 (float64x2_t __a, const int __b) { - return __builtin_aarch64_lfloorv2sfv2si (__a); + return __aarch64_vdupq_laneq_f64 (__a, __b); } -__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) -vcvtm_u32_f32 (float32x2_t __a) +__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) +vdupq_laneq_p8 (poly8x16_t __a, const int __b) { - /* TODO: This cast should go away when builtins have - their correct types. */ - return (uint32x2_t) __builtin_aarch64_lflooruv2sfv2si (__a); + return __aarch64_vdupq_laneq_p8 (__a, __b); } -__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) -vcvtmq_s32_f32 (float32x4_t __a) +__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) +vdupq_laneq_p16 (poly16x8_t __a, const int __b) { - return __builtin_aarch64_lfloorv4sfv4si (__a); + return __aarch64_vdupq_laneq_p16 (__a, __b); } -__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -vcvtmq_u32_f32 (float32x4_t __a) +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vdupq_laneq_s8 (int8x16_t __a, const int __b) { - /* TODO: This cast should go away when builtins have - their correct types. */ - return (uint32x4_t) __builtin_aarch64_lflooruv4sfv4si (__a); + return __aarch64_vdupq_laneq_s8 (__a, __b); } -__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) -vcvtmq_s64_f64 (float64x2_t __a) +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vdupq_laneq_s16 (int16x8_t __a, const int __b) { - return __builtin_aarch64_lfloorv2dfv2di (__a); + return __aarch64_vdupq_laneq_s16 (__a, __b); } -__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) -vcvtmq_u64_f64 (float64x2_t __a) +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vdupq_laneq_s32 (int32x4_t __a, const int __b) { - /* TODO: This cast should go away when builtins have - their correct types. */ - return (uint64x2_t) __builtin_aarch64_lflooruv2dfv2di (__a); + return __aarch64_vdupq_laneq_s32 (__a, __b); } -/* vcvtn */ - -__extension__ static __inline int64_t __attribute__ ((__always_inline__)) -vcvtnd_s64_f64 (float64_t __a) +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vdupq_laneq_s64 (int64x2_t __a, const int __b) { - return __builtin_aarch64_lfrintndfdi (__a); + return __aarch64_vdupq_laneq_s64 (__a, __b); } -__extension__ static __inline uint64_t __attribute__ ((__always_inline__)) -vcvtnd_u64_f64 (float64_t __a) +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vdupq_laneq_u8 (uint8x16_t __a, const int __b) { - return __builtin_aarch64_lfrintnudfdi (__a); + return __aarch64_vdupq_laneq_u8 (__a, __b); } -__extension__ static __inline int32_t __attribute__ ((__always_inline__)) -vcvtns_s32_f32 (float32_t __a) +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vdupq_laneq_u16 (uint16x8_t __a, const int __b) { - return __builtin_aarch64_lfrintnsfsi (__a); + return __aarch64_vdupq_laneq_u16 (__a, __b); } -__extension__ static __inline uint32_t __attribute__ ((__always_inline__)) -vcvtns_u32_f32 (float32_t __a) +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vdupq_laneq_u32 (uint32x4_t __a, const int __b) { - return __builtin_aarch64_lfrintnusfsi (__a); + return __aarch64_vdupq_laneq_u32 (__a, __b); } -__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) -vcvtn_s32_f32 (float32x2_t __a) +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vdupq_laneq_u64 (uint64x2_t __a, const int __b) { - return __builtin_aarch64_lfrintnv2sfv2si (__a); + return __aarch64_vdupq_laneq_u64 (__a, __b); } -__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) -vcvtn_u32_f32 (float32x2_t __a) +/* vdupb_lane */ +__extension__ static __inline poly8_t __attribute__ ((__always_inline__)) +vdupb_lane_p8 (poly8x8_t __a, const int __b) { - /* TODO: This cast should go away when builtins have - their correct types. */ - return (uint32x2_t) __builtin_aarch64_lfrintnuv2sfv2si (__a); + return __aarch64_vget_lane_p8 (__a, __b); } -__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) -vcvtnq_s32_f32 (float32x4_t __a) +__extension__ static __inline int8_t __attribute__ ((__always_inline__)) +vdupb_lane_s8 (int8x8_t __a, const int __b) { - return __builtin_aarch64_lfrintnv4sfv4si (__a); + return __aarch64_vget_lane_s8 (__a, __b); } -__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -vcvtnq_u32_f32 (float32x4_t __a) +__extension__ static __inline uint8_t __attribute__ ((__always_inline__)) +vdupb_lane_u8 (uint8x8_t __a, const int __b) { - /* TODO: This cast should go away when builtins have - their correct types. */ - return (uint32x4_t) __builtin_aarch64_lfrintnuv4sfv4si (__a); + return __aarch64_vget_lane_u8 (__a, __b); } -__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) -vcvtnq_s64_f64 (float64x2_t __a) +/* vduph_lane */ +__extension__ static __inline poly16_t __attribute__ ((__always_inline__)) +vduph_lane_p16 (poly16x4_t __a, const int __b) { - return __builtin_aarch64_lfrintnv2dfv2di (__a); + return __aarch64_vget_lane_p16 (__a, __b); } -__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) -vcvtnq_u64_f64 (float64x2_t __a) +__extension__ static __inline int16_t __attribute__ ((__always_inline__)) +vduph_lane_s16 (int16x4_t __a, const int __b) { - /* TODO: This cast should go away when builtins have - their correct types. */ - return (uint64x2_t) __builtin_aarch64_lfrintnuv2dfv2di (__a); + return __aarch64_vget_lane_s16 (__a, __b); } -/* vcvtp */ - -__extension__ static __inline int64_t __attribute__ ((__always_inline__)) -vcvtpd_s64_f64 (float64_t __a) +__extension__ static __inline uint16_t __attribute__ ((__always_inline__)) +vduph_lane_u16 (uint16x4_t __a, const int __b) { - return __builtin_lceil (__a); + return __aarch64_vget_lane_u16 (__a, __b); } -__extension__ static __inline uint64_t __attribute__ ((__always_inline__)) -vcvtpd_u64_f64 (float64_t __a) +/* vdups_lane */ +__extension__ static __inline float32_t __attribute__ ((__always_inline__)) +vdups_lane_f32 (float32x2_t __a, const int __b) { - return __builtin_aarch64_lceiludfdi (__a); + return __aarch64_vget_lane_f32 (__a, __b); } __extension__ static __inline int32_t __attribute__ ((__always_inline__)) -vcvtps_s32_f32 (float32_t __a) +vdups_lane_s32 (int32x2_t __a, const int __b) { - return __builtin_iceilf (__a); + return __aarch64_vget_lane_s32 (__a, __b); } __extension__ static __inline uint32_t __attribute__ ((__always_inline__)) -vcvtps_u32_f32 (float32_t __a) +vdups_lane_u32 (uint32x2_t __a, const int __b) { - return __builtin_aarch64_lceilusfsi (__a); + return __aarch64_vget_lane_u32 (__a, __b); } -__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) -vcvtp_s32_f32 (float32x2_t __a) +/* vdupd_lane */ +__extension__ static __inline float64_t __attribute__ ((__always_inline__)) +vdupd_lane_f64 (float64x1_t __a, const int __attribute__ ((unused)) __b) { - return __builtin_aarch64_lceilv2sfv2si (__a); + return __a; } -__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) -vcvtp_u32_f32 (float32x2_t __a) +__extension__ static __inline int64_t __attribute__ ((__always_inline__)) +vdupd_lane_s64 (int64x1_t __a, const int __attribute__ ((unused)) __b) { - /* TODO: This cast should go away when builtins have - their correct types. */ - return (uint32x2_t) __builtin_aarch64_lceiluv2sfv2si (__a); + return __a; } -__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) -vcvtpq_s32_f32 (float32x4_t __a) +__extension__ static __inline uint64_t __attribute__ ((__always_inline__)) +vdupd_lane_u64 (uint64x1_t __a, const int __attribute__ ((unused)) __b) { - return __builtin_aarch64_lceilv4sfv4si (__a); + return __a; } -__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -vcvtpq_u32_f32 (float32x4_t __a) +/* vdupb_laneq */ +__extension__ static __inline poly8_t __attribute__ ((__always_inline__)) +vdupb_laneq_p8 (poly8x16_t __a, const int __b) { - /* TODO: This cast should go away when builtins have - their correct types. */ - return (uint32x4_t) __builtin_aarch64_lceiluv4sfv4si (__a); + return __aarch64_vgetq_lane_p8 (__a, __b); } -__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) -vcvtpq_s64_f64 (float64x2_t __a) +__extension__ static __inline int8_t __attribute__ ((__always_inline__)) +vdupb_laneq_s8 (int8x16_t __a, const int __attribute__ ((unused)) __b) { - return __builtin_aarch64_lceilv2dfv2di (__a); + return __aarch64_vgetq_lane_s8 (__a, __b); } -__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) -vcvtpq_u64_f64 (float64x2_t __a) +__extension__ static __inline uint8_t __attribute__ ((__always_inline__)) +vdupb_laneq_u8 (uint8x16_t __a, const int __b) { - /* TODO: This cast should go away when builtins have - their correct types. */ - return (uint64x2_t) __builtin_aarch64_lceiluv2dfv2di (__a); + return __aarch64_vgetq_lane_u8 (__a, __b); } -/* vdup */ +/* vduph_laneq */ +__extension__ static __inline poly16_t __attribute__ ((__always_inline__)) +vduph_laneq_p16 (poly16x8_t __a, const int __b) +{ + return __aarch64_vgetq_lane_p16 (__a, __b); +} -__extension__ static __inline int8x1_t __attribute__ ((__always_inline__)) -vdupb_lane_s8 (int8x16_t a, int const b) +__extension__ static __inline int16_t __attribute__ ((__always_inline__)) +vduph_laneq_s16 (int16x8_t __a, const int __b) { - return __builtin_aarch64_dup_lane_scalarv16qi (a, b); + return __aarch64_vgetq_lane_s16 (__a, __b); } -__extension__ static __inline uint8x1_t __attribute__ ((__always_inline__)) -vdupb_lane_u8 (uint8x16_t a, int const b) +__extension__ static __inline uint16_t __attribute__ ((__always_inline__)) +vduph_laneq_u16 (uint16x8_t __a, const int __b) { - return (uint8x1_t) __builtin_aarch64_dup_lane_scalarv16qi ((int8x16_t) a, b); + return __aarch64_vgetq_lane_u16 (__a, __b); } -__extension__ static __inline int16x1_t __attribute__ ((__always_inline__)) -vduph_lane_s16 (int16x8_t a, int const b) +/* vdups_laneq */ +__extension__ static __inline float32_t __attribute__ ((__always_inline__)) +vdups_laneq_f32 (float32x4_t __a, const int __b) { - return __builtin_aarch64_dup_lane_scalarv8hi (a, b); + return __aarch64_vgetq_lane_f32 (__a, __b); } -__extension__ static __inline uint16x1_t __attribute__ ((__always_inline__)) -vduph_lane_u16 (uint16x8_t a, int const b) +__extension__ static __inline int32_t __attribute__ ((__always_inline__)) +vdups_laneq_s32 (int32x4_t __a, const int __b) { - return (uint16x1_t) __builtin_aarch64_dup_lane_scalarv8hi ((int16x8_t) a, b); + return __aarch64_vgetq_lane_s32 (__a, __b); } -__extension__ static __inline int32x1_t __attribute__ ((__always_inline__)) -vdups_lane_s32 (int32x4_t a, int const b) +__extension__ static __inline uint32_t __attribute__ ((__always_inline__)) +vdups_laneq_u32 (uint32x4_t __a, const int __b) { - return __builtin_aarch64_dup_lane_scalarv4si (a, b); + return __aarch64_vgetq_lane_u32 (__a, __b); } -__extension__ static __inline uint32x1_t __attribute__ ((__always_inline__)) -vdups_lane_u32 (uint32x4_t a, int const b) +/* vdupd_laneq */ +__extension__ static __inline float64_t __attribute__ ((__always_inline__)) +vdupd_laneq_f64 (float64x2_t __a, const int __b) { - return (uint32x1_t) __builtin_aarch64_dup_lane_scalarv4si ((int32x4_t) a, b); + return __aarch64_vgetq_lane_f64 (__a, __b); } -__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) -vdupd_lane_s64 (int64x2_t a, int const b) +__extension__ static __inline int64_t __attribute__ ((__always_inline__)) +vdupd_laneq_s64 (int64x2_t __a, const int __b) { - return __builtin_aarch64_dup_lane_scalarv2di (a, b); + return __aarch64_vgetq_lane_s64 (__a, __b); } -__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) -vdupd_lane_u64 (uint64x2_t a, int const b) +__extension__ static __inline uint64_t __attribute__ ((__always_inline__)) +vdupd_laneq_u64 (uint64x2_t __a, const int __b) { - return (uint64x1_t) __builtin_aarch64_dup_lane_scalarv2di ((int64x2_t) a, b); + return __aarch64_vgetq_lane_u64 (__a, __b); } /* vld1 */ @@ -21425,7 +21548,7 @@ vqdmlal_high_n_s16 (int32x4_t __a, int16x8_t __b, int16_t __c) __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) vqdmlal_lane_s16 (int32x4_t __a, int16x4_t __b, int16x4_t __c, int const __d) { - int16x8_t __tmp = vcombine_s16 (__c, vcreate_s16 (INT64_C (0))); + int16x8_t __tmp = vcombine_s16 (__c, vcreate_s16 (__AARCH64_INT64_C (0))); return __builtin_aarch64_sqdmlal_lanev4hi (__a, __b, __tmp, __d); } @@ -21476,7 +21599,7 @@ vqdmlal_high_n_s32 (int64x2_t __a, int32x4_t __b, int32_t __c) __extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) vqdmlal_lane_s32 (int64x2_t __a, int32x2_t __b, int32x2_t __c, int const __d) { - int32x4_t __tmp = vcombine_s32 (__c, vcreate_s32 (INT64_C (0))); + int32x4_t __tmp = vcombine_s32 (__c, vcreate_s32 (__AARCH64_INT64_C (0))); return __builtin_aarch64_sqdmlal_lanev2si (__a, __b, __tmp, __d); } @@ -21553,7 +21676,7 @@ vqdmlsl_high_n_s16 (int32x4_t __a, int16x8_t __b, int16_t __c) __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) vqdmlsl_lane_s16 (int32x4_t __a, int16x4_t __b, int16x4_t __c, int const __d) { - int16x8_t __tmp = vcombine_s16 (__c, vcreate_s16 (INT64_C (0))); + int16x8_t __tmp = vcombine_s16 (__c, vcreate_s16 (__AARCH64_INT64_C (0))); return __builtin_aarch64_sqdmlsl_lanev4hi (__a, __b, __tmp, __d); } @@ -21604,7 +21727,7 @@ vqdmlsl_high_n_s32 (int64x2_t __a, int32x4_t __b, int32_t __c) __extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) vqdmlsl_lane_s32 (int64x2_t __a, int32x2_t __b, int32x2_t __c, int const __d) { - int32x4_t __tmp = vcombine_s32 (__c, vcreate_s32 (INT64_C (0))); + int32x4_t __tmp = vcombine_s32 (__c, vcreate_s32 (__AARCH64_INT64_C (0))); return __builtin_aarch64_sqdmlsl_lanev2si (__a, __b, __tmp, __d); } @@ -21729,7 +21852,7 @@ vqdmull_high_n_s16 (int16x8_t __a, int16_t __b) __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) vqdmull_lane_s16 (int16x4_t __a, int16x4_t __b, int const __c) { - int16x8_t __tmp = vcombine_s16 (__b, vcreate_s16 (INT64_C (0))); + int16x8_t __tmp = vcombine_s16 (__b, vcreate_s16 (__AARCH64_INT64_C (0))); return __builtin_aarch64_sqdmull_lanev4hi (__a, __tmp, __c); } @@ -21778,7 +21901,7 @@ vqdmull_high_n_s32 (int32x4_t __a, int32_t __b) __extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) vqdmull_lane_s32 (int32x2_t __a, int32x2_t __b, int const __c) { - int32x4_t __tmp = vcombine_s32 (__b, vcreate_s32 (INT64_C (0))); + int32x4_t __tmp = vcombine_s32 (__b, vcreate_s32 (__AARCH64_INT64_C (0))); return __builtin_aarch64_sqdmull_lanev2si (__a, __tmp, __c); } @@ -24365,8 +24488,8 @@ vst2_s64 (int64_t * __a, int64x1x2_t val) { __builtin_aarch64_simd_oi __o; int64x2x2_t temp; - temp.val[0] = vcombine_s64 (val.val[0], vcreate_s64 (INT64_C (0))); - temp.val[1] = vcombine_s64 (val.val[1], vcreate_s64 (INT64_C (0))); + temp.val[0] = vcombine_s64 (val.val[0], vcreate_s64 (__AARCH64_INT64_C (0))); + temp.val[1] = vcombine_s64 (val.val[1], vcreate_s64 (__AARCH64_INT64_C (0))); __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) temp.val[0], 0); __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) temp.val[1], 1); __builtin_aarch64_st2di ((__builtin_aarch64_simd_di *) __a, __o); @@ -24377,8 +24500,8 @@ vst2_u64 (uint64_t * __a, uint64x1x2_t val) { __builtin_aarch64_simd_oi __o; uint64x2x2_t temp; - temp.val[0] = vcombine_u64 (val.val[0], vcreate_u64 (UINT64_C (0))); - temp.val[1] = vcombine_u64 (val.val[1], vcreate_u64 (UINT64_C (0))); + temp.val[0] = vcombine_u64 (val.val[0], vcreate_u64 (__AARCH64_UINT64_C (0))); + temp.val[1] = vcombine_u64 (val.val[1], vcreate_u64 (__AARCH64_UINT64_C (0))); __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) temp.val[0], 0); __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) temp.val[1], 1); __builtin_aarch64_st2di ((__builtin_aarch64_simd_di *) __a, __o); @@ -24389,8 +24512,8 @@ vst2_f64 (float64_t * __a, float64x1x2_t val) { __builtin_aarch64_simd_oi __o; float64x2x2_t temp; - temp.val[0] = vcombine_f64 (val.val[0], vcreate_f64 (UINT64_C (0))); - temp.val[1] = vcombine_f64 (val.val[1], vcreate_f64 (UINT64_C (0))); + temp.val[0] = vcombine_f64 (val.val[0], vcreate_f64 (__AARCH64_UINT64_C (0))); + temp.val[1] = vcombine_f64 (val.val[1], vcreate_f64 (__AARCH64_UINT64_C (0))); __o = __builtin_aarch64_set_qregoiv2df (__o, (float64x2_t) temp.val[0], 0); __o = __builtin_aarch64_set_qregoiv2df (__o, (float64x2_t) temp.val[1], 1); __builtin_aarch64_st2df ((__builtin_aarch64_simd_df *) __a, __o); @@ -24401,8 +24524,8 @@ vst2_s8 (int8_t * __a, int8x8x2_t val) { __builtin_aarch64_simd_oi __o; int8x16x2_t temp; - temp.val[0] = vcombine_s8 (val.val[0], vcreate_s8 (INT64_C (0))); - temp.val[1] = vcombine_s8 (val.val[1], vcreate_s8 (INT64_C (0))); + temp.val[0] = vcombine_s8 (val.val[0], vcreate_s8 (__AARCH64_INT64_C (0))); + temp.val[1] = vcombine_s8 (val.val[1], vcreate_s8 (__AARCH64_INT64_C (0))); __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) temp.val[0], 0); __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) temp.val[1], 1); __builtin_aarch64_st2v8qi ((__builtin_aarch64_simd_qi *) __a, __o); @@ -24413,8 +24536,8 @@ vst2_p8 (poly8_t * __a, poly8x8x2_t val) { __builtin_aarch64_simd_oi __o; poly8x16x2_t temp; - temp.val[0] = vcombine_p8 (val.val[0], vcreate_p8 (UINT64_C (0))); - temp.val[1] = vcombine_p8 (val.val[1], vcreate_p8 (UINT64_C (0))); + temp.val[0] = vcombine_p8 (val.val[0], vcreate_p8 (__AARCH64_UINT64_C (0))); + temp.val[1] = vcombine_p8 (val.val[1], vcreate_p8 (__AARCH64_UINT64_C (0))); __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) temp.val[0], 0); __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) temp.val[1], 1); __builtin_aarch64_st2v8qi ((__builtin_aarch64_simd_qi *) __a, __o); @@ -24425,8 +24548,8 @@ vst2_s16 (int16_t * __a, int16x4x2_t val) { __builtin_aarch64_simd_oi __o; int16x8x2_t temp; - temp.val[0] = vcombine_s16 (val.val[0], vcreate_s16 (INT64_C (0))); - temp.val[1] = vcombine_s16 (val.val[1], vcreate_s16 (INT64_C (0))); + temp.val[0] = vcombine_s16 (val.val[0], vcreate_s16 (__AARCH64_INT64_C (0))); + temp.val[1] = vcombine_s16 (val.val[1], vcreate_s16 (__AARCH64_INT64_C (0))); __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) temp.val[0], 0); __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) temp.val[1], 1); __builtin_aarch64_st2v4hi ((__builtin_aarch64_simd_hi *) __a, __o); @@ -24437,8 +24560,8 @@ vst2_p16 (poly16_t * __a, poly16x4x2_t val) { __builtin_aarch64_simd_oi __o; poly16x8x2_t temp; - temp.val[0] = vcombine_p16 (val.val[0], vcreate_p16 (UINT64_C (0))); - temp.val[1] = vcombine_p16 (val.val[1], vcreate_p16 (UINT64_C (0))); + temp.val[0] = vcombine_p16 (val.val[0], vcreate_p16 (__AARCH64_UINT64_C (0))); + temp.val[1] = vcombine_p16 (val.val[1], vcreate_p16 (__AARCH64_UINT64_C (0))); __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) temp.val[0], 0); __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) temp.val[1], 1); __builtin_aarch64_st2v4hi ((__builtin_aarch64_simd_hi *) __a, __o); @@ -24449,8 +24572,8 @@ vst2_s32 (int32_t * __a, int32x2x2_t val) { __builtin_aarch64_simd_oi __o; int32x4x2_t temp; - temp.val[0] = vcombine_s32 (val.val[0], vcreate_s32 (INT64_C (0))); - temp.val[1] = vcombine_s32 (val.val[1], vcreate_s32 (INT64_C (0))); + temp.val[0] = vcombine_s32 (val.val[0], vcreate_s32 (__AARCH64_INT64_C (0))); + temp.val[1] = vcombine_s32 (val.val[1], vcreate_s32 (__AARCH64_INT64_C (0))); __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) temp.val[0], 0); __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) temp.val[1], 1); __builtin_aarch64_st2v2si ((__builtin_aarch64_simd_si *) __a, __o); @@ -24461,8 +24584,8 @@ vst2_u8 (uint8_t * __a, uint8x8x2_t val) { __builtin_aarch64_simd_oi __o; uint8x16x2_t temp; - temp.val[0] = vcombine_u8 (val.val[0], vcreate_u8 (UINT64_C (0))); - temp.val[1] = vcombine_u8 (val.val[1], vcreate_u8 (UINT64_C (0))); + temp.val[0] = vcombine_u8 (val.val[0], vcreate_u8 (__AARCH64_UINT64_C (0))); + temp.val[1] = vcombine_u8 (val.val[1], vcreate_u8 (__AARCH64_UINT64_C (0))); __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) temp.val[0], 0); __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) temp.val[1], 1); __builtin_aarch64_st2v8qi ((__builtin_aarch64_simd_qi *) __a, __o); @@ -24473,8 +24596,8 @@ vst2_u16 (uint16_t * __a, uint16x4x2_t val) { __builtin_aarch64_simd_oi __o; uint16x8x2_t temp; - temp.val[0] = vcombine_u16 (val.val[0], vcreate_u16 (UINT64_C (0))); - temp.val[1] = vcombine_u16 (val.val[1], vcreate_u16 (UINT64_C (0))); + temp.val[0] = vcombine_u16 (val.val[0], vcreate_u16 (__AARCH64_UINT64_C (0))); + temp.val[1] = vcombine_u16 (val.val[1], vcreate_u16 (__AARCH64_UINT64_C (0))); __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) temp.val[0], 0); __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) temp.val[1], 1); __builtin_aarch64_st2v4hi ((__builtin_aarch64_simd_hi *) __a, __o); @@ -24485,8 +24608,8 @@ vst2_u32 (uint32_t * __a, uint32x2x2_t val) { __builtin_aarch64_simd_oi __o; uint32x4x2_t temp; - temp.val[0] = vcombine_u32 (val.val[0], vcreate_u32 (UINT64_C (0))); - temp.val[1] = vcombine_u32 (val.val[1], vcreate_u32 (UINT64_C (0))); + temp.val[0] = vcombine_u32 (val.val[0], vcreate_u32 (__AARCH64_UINT64_C (0))); + temp.val[1] = vcombine_u32 (val.val[1], vcreate_u32 (__AARCH64_UINT64_C (0))); __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) temp.val[0], 0); __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) temp.val[1], 1); __builtin_aarch64_st2v2si ((__builtin_aarch64_simd_si *) __a, __o); @@ -24497,8 +24620,8 @@ vst2_f32 (float32_t * __a, float32x2x2_t val) { __builtin_aarch64_simd_oi __o; float32x4x2_t temp; - temp.val[0] = vcombine_f32 (val.val[0], vcreate_f32 (UINT64_C (0))); - temp.val[1] = vcombine_f32 (val.val[1], vcreate_f32 (UINT64_C (0))); + temp.val[0] = vcombine_f32 (val.val[0], vcreate_f32 (__AARCH64_UINT64_C (0))); + temp.val[1] = vcombine_f32 (val.val[1], vcreate_f32 (__AARCH64_UINT64_C (0))); __o = __builtin_aarch64_set_qregoiv4sf (__o, (float32x4_t) temp.val[0], 0); __o = __builtin_aarch64_set_qregoiv4sf (__o, (float32x4_t) temp.val[1], 1); __builtin_aarch64_st2v2sf ((__builtin_aarch64_simd_sf *) __a, __o); @@ -24617,9 +24740,9 @@ vst3_s64 (int64_t * __a, int64x1x3_t val) { __builtin_aarch64_simd_ci __o; int64x2x3_t temp; - temp.val[0] = vcombine_s64 (val.val[0], vcreate_s64 (INT64_C (0))); - temp.val[1] = vcombine_s64 (val.val[1], vcreate_s64 (INT64_C (0))); - temp.val[2] = vcombine_s64 (val.val[2], vcreate_s64 (INT64_C (0))); + temp.val[0] = vcombine_s64 (val.val[0], vcreate_s64 (__AARCH64_INT64_C (0))); + temp.val[1] = vcombine_s64 (val.val[1], vcreate_s64 (__AARCH64_INT64_C (0))); + temp.val[2] = vcombine_s64 (val.val[2], vcreate_s64 (__AARCH64_INT64_C (0))); __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) temp.val[0], 0); __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) temp.val[1], 1); __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) temp.val[2], 2); @@ -24631,9 +24754,9 @@ vst3_u64 (uint64_t * __a, uint64x1x3_t val) { __builtin_aarch64_simd_ci __o; uint64x2x3_t temp; - temp.val[0] = vcombine_u64 (val.val[0], vcreate_u64 (UINT64_C (0))); - temp.val[1] = vcombine_u64 (val.val[1], vcreate_u64 (UINT64_C (0))); - temp.val[2] = vcombine_u64 (val.val[2], vcreate_u64 (UINT64_C (0))); + temp.val[0] = vcombine_u64 (val.val[0], vcreate_u64 (__AARCH64_UINT64_C (0))); + temp.val[1] = vcombine_u64 (val.val[1], vcreate_u64 (__AARCH64_UINT64_C (0))); + temp.val[2] = vcombine_u64 (val.val[2], vcreate_u64 (__AARCH64_UINT64_C (0))); __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) temp.val[0], 0); __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) temp.val[1], 1); __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) temp.val[2], 2); @@ -24645,9 +24768,9 @@ vst3_f64 (float64_t * __a, float64x1x3_t val) { __builtin_aarch64_simd_ci __o; float64x2x3_t temp; - temp.val[0] = vcombine_f64 (val.val[0], vcreate_f64 (UINT64_C (0))); - temp.val[1] = vcombine_f64 (val.val[1], vcreate_f64 (UINT64_C (0))); - temp.val[2] = vcombine_f64 (val.val[2], vcreate_f64 (UINT64_C (0))); + temp.val[0] = vcombine_f64 (val.val[0], vcreate_f64 (__AARCH64_UINT64_C (0))); + temp.val[1] = vcombine_f64 (val.val[1], vcreate_f64 (__AARCH64_UINT64_C (0))); + temp.val[2] = vcombine_f64 (val.val[2], vcreate_f64 (__AARCH64_UINT64_C (0))); __o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) temp.val[0], 0); __o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) temp.val[1], 1); __o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) temp.val[2], 2); @@ -24659,9 +24782,9 @@ vst3_s8 (int8_t * __a, int8x8x3_t val) { __builtin_aarch64_simd_ci __o; int8x16x3_t temp; - temp.val[0] = vcombine_s8 (val.val[0], vcreate_s8 (INT64_C (0))); - temp.val[1] = vcombine_s8 (val.val[1], vcreate_s8 (INT64_C (0))); - temp.val[2] = vcombine_s8 (val.val[2], vcreate_s8 (INT64_C (0))); + temp.val[0] = vcombine_s8 (val.val[0], vcreate_s8 (__AARCH64_INT64_C (0))); + temp.val[1] = vcombine_s8 (val.val[1], vcreate_s8 (__AARCH64_INT64_C (0))); + temp.val[2] = vcombine_s8 (val.val[2], vcreate_s8 (__AARCH64_INT64_C (0))); __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[0], 0); __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[1], 1); __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[2], 2); @@ -24673,9 +24796,9 @@ vst3_p8 (poly8_t * __a, poly8x8x3_t val) { __builtin_aarch64_simd_ci __o; poly8x16x3_t temp; - temp.val[0] = vcombine_p8 (val.val[0], vcreate_p8 (UINT64_C (0))); - temp.val[1] = vcombine_p8 (val.val[1], vcreate_p8 (UINT64_C (0))); - temp.val[2] = vcombine_p8 (val.val[2], vcreate_p8 (UINT64_C (0))); + temp.val[0] = vcombine_p8 (val.val[0], vcreate_p8 (__AARCH64_UINT64_C (0))); + temp.val[1] = vcombine_p8 (val.val[1], vcreate_p8 (__AARCH64_UINT64_C (0))); + temp.val[2] = vcombine_p8 (val.val[2], vcreate_p8 (__AARCH64_UINT64_C (0))); __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[0], 0); __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[1], 1); __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[2], 2); @@ -24687,9 +24810,9 @@ vst3_s16 (int16_t * __a, int16x4x3_t val) { __builtin_aarch64_simd_ci __o; int16x8x3_t temp; - temp.val[0] = vcombine_s16 (val.val[0], vcreate_s16 (INT64_C (0))); - temp.val[1] = vcombine_s16 (val.val[1], vcreate_s16 (INT64_C (0))); - temp.val[2] = vcombine_s16 (val.val[2], vcreate_s16 (INT64_C (0))); + temp.val[0] = vcombine_s16 (val.val[0], vcreate_s16 (__AARCH64_INT64_C (0))); + temp.val[1] = vcombine_s16 (val.val[1], vcreate_s16 (__AARCH64_INT64_C (0))); + temp.val[2] = vcombine_s16 (val.val[2], vcreate_s16 (__AARCH64_INT64_C (0))); __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[0], 0); __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[1], 1); __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[2], 2); @@ -24701,9 +24824,9 @@ vst3_p16 (poly16_t * __a, poly16x4x3_t val) { __builtin_aarch64_simd_ci __o; poly16x8x3_t temp; - temp.val[0] = vcombine_p16 (val.val[0], vcreate_p16 (UINT64_C (0))); - temp.val[1] = vcombine_p16 (val.val[1], vcreate_p16 (UINT64_C (0))); - temp.val[2] = vcombine_p16 (val.val[2], vcreate_p16 (UINT64_C (0))); + temp.val[0] = vcombine_p16 (val.val[0], vcreate_p16 (__AARCH64_UINT64_C (0))); + temp.val[1] = vcombine_p16 (val.val[1], vcreate_p16 (__AARCH64_UINT64_C (0))); + temp.val[2] = vcombine_p16 (val.val[2], vcreate_p16 (__AARCH64_UINT64_C (0))); __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[0], 0); __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[1], 1); __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[2], 2); @@ -24715,9 +24838,9 @@ vst3_s32 (int32_t * __a, int32x2x3_t val) { __builtin_aarch64_simd_ci __o; int32x4x3_t temp; - temp.val[0] = vcombine_s32 (val.val[0], vcreate_s32 (INT64_C (0))); - temp.val[1] = vcombine_s32 (val.val[1], vcreate_s32 (INT64_C (0))); - temp.val[2] = vcombine_s32 (val.val[2], vcreate_s32 (INT64_C (0))); + temp.val[0] = vcombine_s32 (val.val[0], vcreate_s32 (__AARCH64_INT64_C (0))); + temp.val[1] = vcombine_s32 (val.val[1], vcreate_s32 (__AARCH64_INT64_C (0))); + temp.val[2] = vcombine_s32 (val.val[2], vcreate_s32 (__AARCH64_INT64_C (0))); __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) temp.val[0], 0); __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) temp.val[1], 1); __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) temp.val[2], 2); @@ -24729,9 +24852,9 @@ vst3_u8 (uint8_t * __a, uint8x8x3_t val) { __builtin_aarch64_simd_ci __o; uint8x16x3_t temp; - temp.val[0] = vcombine_u8 (val.val[0], vcreate_u8 (UINT64_C (0))); - temp.val[1] = vcombine_u8 (val.val[1], vcreate_u8 (UINT64_C (0))); - temp.val[2] = vcombine_u8 (val.val[2], vcreate_u8 (UINT64_C (0))); + temp.val[0] = vcombine_u8 (val.val[0], vcreate_u8 (__AARCH64_UINT64_C (0))); + temp.val[1] = vcombine_u8 (val.val[1], vcreate_u8 (__AARCH64_UINT64_C (0))); + temp.val[2] = vcombine_u8 (val.val[2], vcreate_u8 (__AARCH64_UINT64_C (0))); __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[0], 0); __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[1], 1); __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[2], 2); @@ -24743,9 +24866,9 @@ vst3_u16 (uint16_t * __a, uint16x4x3_t val) { __builtin_aarch64_simd_ci __o; uint16x8x3_t temp; - temp.val[0] = vcombine_u16 (val.val[0], vcreate_u16 (UINT64_C (0))); - temp.val[1] = vcombine_u16 (val.val[1], vcreate_u16 (UINT64_C (0))); - temp.val[2] = vcombine_u16 (val.val[2], vcreate_u16 (UINT64_C (0))); + temp.val[0] = vcombine_u16 (val.val[0], vcreate_u16 (__AARCH64_UINT64_C (0))); + temp.val[1] = vcombine_u16 (val.val[1], vcreate_u16 (__AARCH64_UINT64_C (0))); + temp.val[2] = vcombine_u16 (val.val[2], vcreate_u16 (__AARCH64_UINT64_C (0))); __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[0], 0); __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[1], 1); __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[2], 2); @@ -24757,9 +24880,9 @@ vst3_u32 (uint32_t * __a, uint32x2x3_t val) { __builtin_aarch64_simd_ci __o; uint32x4x3_t temp; - temp.val[0] = vcombine_u32 (val.val[0], vcreate_u32 (UINT64_C (0))); - temp.val[1] = vcombine_u32 (val.val[1], vcreate_u32 (UINT64_C (0))); - temp.val[2] = vcombine_u32 (val.val[2], vcreate_u32 (UINT64_C (0))); + temp.val[0] = vcombine_u32 (val.val[0], vcreate_u32 (__AARCH64_UINT64_C (0))); + temp.val[1] = vcombine_u32 (val.val[1], vcreate_u32 (__AARCH64_UINT64_C (0))); + temp.val[2] = vcombine_u32 (val.val[2], vcreate_u32 (__AARCH64_UINT64_C (0))); __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) temp.val[0], 0); __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) temp.val[1], 1); __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) temp.val[2], 2); @@ -24771,9 +24894,9 @@ vst3_f32 (float32_t * __a, float32x2x3_t val) { __builtin_aarch64_simd_ci __o; float32x4x3_t temp; - temp.val[0] = vcombine_f32 (val.val[0], vcreate_f32 (UINT64_C (0))); - temp.val[1] = vcombine_f32 (val.val[1], vcreate_f32 (UINT64_C (0))); - temp.val[2] = vcombine_f32 (val.val[2], vcreate_f32 (UINT64_C (0))); + temp.val[0] = vcombine_f32 (val.val[0], vcreate_f32 (__AARCH64_UINT64_C (0))); + temp.val[1] = vcombine_f32 (val.val[1], vcreate_f32 (__AARCH64_UINT64_C (0))); + temp.val[2] = vcombine_f32 (val.val[2], vcreate_f32 (__AARCH64_UINT64_C (0))); __o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) temp.val[0], 0); __o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) temp.val[1], 1); __o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) temp.val[2], 2); @@ -24905,10 +25028,10 @@ vst4_s64 (int64_t * __a, int64x1x4_t val) { __builtin_aarch64_simd_xi __o; int64x2x4_t temp; - temp.val[0] = vcombine_s64 (val.val[0], vcreate_s64 (INT64_C (0))); - temp.val[1] = vcombine_s64 (val.val[1], vcreate_s64 (INT64_C (0))); - temp.val[2] = vcombine_s64 (val.val[2], vcreate_s64 (INT64_C (0))); - temp.val[3] = vcombine_s64 (val.val[3], vcreate_s64 (INT64_C (0))); + temp.val[0] = vcombine_s64 (val.val[0], vcreate_s64 (__AARCH64_INT64_C (0))); + temp.val[1] = vcombine_s64 (val.val[1], vcreate_s64 (__AARCH64_INT64_C (0))); + temp.val[2] = vcombine_s64 (val.val[2], vcreate_s64 (__AARCH64_INT64_C (0))); + temp.val[3] = vcombine_s64 (val.val[3], vcreate_s64 (__AARCH64_INT64_C (0))); __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) temp.val[0], 0); __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) temp.val[1], 1); __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) temp.val[2], 2); @@ -24921,10 +25044,10 @@ vst4_u64 (uint64_t * __a, uint64x1x4_t val) { __builtin_aarch64_simd_xi __o; uint64x2x4_t temp; - temp.val[0] = vcombine_u64 (val.val[0], vcreate_u64 (UINT64_C (0))); - temp.val[1] = vcombine_u64 (val.val[1], vcreate_u64 (UINT64_C (0))); - temp.val[2] = vcombine_u64 (val.val[2], vcreate_u64 (UINT64_C (0))); - temp.val[3] = vcombine_u64 (val.val[3], vcreate_u64 (UINT64_C (0))); + temp.val[0] = vcombine_u64 (val.val[0], vcreate_u64 (__AARCH64_UINT64_C (0))); + temp.val[1] = vcombine_u64 (val.val[1], vcreate_u64 (__AARCH64_UINT64_C (0))); + temp.val[2] = vcombine_u64 (val.val[2], vcreate_u64 (__AARCH64_UINT64_C (0))); + temp.val[3] = vcombine_u64 (val.val[3], vcreate_u64 (__AARCH64_UINT64_C (0))); __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) temp.val[0], 0); __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) temp.val[1], 1); __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) temp.val[2], 2); @@ -24937,10 +25060,10 @@ vst4_f64 (float64_t * __a, float64x1x4_t val) { __builtin_aarch64_simd_xi __o; float64x2x4_t temp; - temp.val[0] = vcombine_f64 (val.val[0], vcreate_f64 (UINT64_C (0))); - temp.val[1] = vcombine_f64 (val.val[1], vcreate_f64 (UINT64_C (0))); - temp.val[2] = vcombine_f64 (val.val[2], vcreate_f64 (UINT64_C (0))); - temp.val[3] = vcombine_f64 (val.val[3], vcreate_f64 (UINT64_C (0))); + temp.val[0] = vcombine_f64 (val.val[0], vcreate_f64 (__AARCH64_UINT64_C (0))); + temp.val[1] = vcombine_f64 (val.val[1], vcreate_f64 (__AARCH64_UINT64_C (0))); + temp.val[2] = vcombine_f64 (val.val[2], vcreate_f64 (__AARCH64_UINT64_C (0))); + temp.val[3] = vcombine_f64 (val.val[3], vcreate_f64 (__AARCH64_UINT64_C (0))); __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) temp.val[0], 0); __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) temp.val[1], 1); __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) temp.val[2], 2); @@ -24953,10 +25076,10 @@ vst4_s8 (int8_t * __a, int8x8x4_t val) { __builtin_aarch64_simd_xi __o; int8x16x4_t temp; - temp.val[0] = vcombine_s8 (val.val[0], vcreate_s8 (INT64_C (0))); - temp.val[1] = vcombine_s8 (val.val[1], vcreate_s8 (INT64_C (0))); - temp.val[2] = vcombine_s8 (val.val[2], vcreate_s8 (INT64_C (0))); - temp.val[3] = vcombine_s8 (val.val[3], vcreate_s8 (INT64_C (0))); + temp.val[0] = vcombine_s8 (val.val[0], vcreate_s8 (__AARCH64_INT64_C (0))); + temp.val[1] = vcombine_s8 (val.val[1], vcreate_s8 (__AARCH64_INT64_C (0))); + temp.val[2] = vcombine_s8 (val.val[2], vcreate_s8 (__AARCH64_INT64_C (0))); + temp.val[3] = vcombine_s8 (val.val[3], vcreate_s8 (__AARCH64_INT64_C (0))); __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[0], 0); __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[1], 1); __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[2], 2); @@ -24969,10 +25092,10 @@ vst4_p8 (poly8_t * __a, poly8x8x4_t val) { __builtin_aarch64_simd_xi __o; poly8x16x4_t temp; - temp.val[0] = vcombine_p8 (val.val[0], vcreate_p8 (UINT64_C (0))); - temp.val[1] = vcombine_p8 (val.val[1], vcreate_p8 (UINT64_C (0))); - temp.val[2] = vcombine_p8 (val.val[2], vcreate_p8 (UINT64_C (0))); - temp.val[3] = vcombine_p8 (val.val[3], vcreate_p8 (UINT64_C (0))); + temp.val[0] = vcombine_p8 (val.val[0], vcreate_p8 (__AARCH64_UINT64_C (0))); + temp.val[1] = vcombine_p8 (val.val[1], vcreate_p8 (__AARCH64_UINT64_C (0))); + temp.val[2] = vcombine_p8 (val.val[2], vcreate_p8 (__AARCH64_UINT64_C (0))); + temp.val[3] = vcombine_p8 (val.val[3], vcreate_p8 (__AARCH64_UINT64_C (0))); __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[0], 0); __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[1], 1); __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[2], 2); @@ -24985,10 +25108,10 @@ vst4_s16 (int16_t * __a, int16x4x4_t val) { __builtin_aarch64_simd_xi __o; int16x8x4_t temp; - temp.val[0] = vcombine_s16 (val.val[0], vcreate_s16 (INT64_C (0))); - temp.val[1] = vcombine_s16 (val.val[1], vcreate_s16 (INT64_C (0))); - temp.val[2] = vcombine_s16 (val.val[2], vcreate_s16 (INT64_C (0))); - temp.val[3] = vcombine_s16 (val.val[3], vcreate_s16 (INT64_C (0))); + temp.val[0] = vcombine_s16 (val.val[0], vcreate_s16 (__AARCH64_INT64_C (0))); + temp.val[1] = vcombine_s16 (val.val[1], vcreate_s16 (__AARCH64_INT64_C (0))); + temp.val[2] = vcombine_s16 (val.val[2], vcreate_s16 (__AARCH64_INT64_C (0))); + temp.val[3] = vcombine_s16 (val.val[3], vcreate_s16 (__AARCH64_INT64_C (0))); __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[0], 0); __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[1], 1); __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[2], 2); @@ -25001,10 +25124,10 @@ vst4_p16 (poly16_t * __a, poly16x4x4_t val) { __builtin_aarch64_simd_xi __o; poly16x8x4_t temp; - temp.val[0] = vcombine_p16 (val.val[0], vcreate_p16 (UINT64_C (0))); - temp.val[1] = vcombine_p16 (val.val[1], vcreate_p16 (UINT64_C (0))); - temp.val[2] = vcombine_p16 (val.val[2], vcreate_p16 (UINT64_C (0))); - temp.val[3] = vcombine_p16 (val.val[3], vcreate_p16 (UINT64_C (0))); + temp.val[0] = vcombine_p16 (val.val[0], vcreate_p16 (__AARCH64_UINT64_C (0))); + temp.val[1] = vcombine_p16 (val.val[1], vcreate_p16 (__AARCH64_UINT64_C (0))); + temp.val[2] = vcombine_p16 (val.val[2], vcreate_p16 (__AARCH64_UINT64_C (0))); + temp.val[3] = vcombine_p16 (val.val[3], vcreate_p16 (__AARCH64_UINT64_C (0))); __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[0], 0); __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[1], 1); __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[2], 2); @@ -25017,10 +25140,10 @@ vst4_s32 (int32_t * __a, int32x2x4_t val) { __builtin_aarch64_simd_xi __o; int32x4x4_t temp; - temp.val[0] = vcombine_s32 (val.val[0], vcreate_s32 (INT64_C (0))); - temp.val[1] = vcombine_s32 (val.val[1], vcreate_s32 (INT64_C (0))); - temp.val[2] = vcombine_s32 (val.val[2], vcreate_s32 (INT64_C (0))); - temp.val[3] = vcombine_s32 (val.val[3], vcreate_s32 (INT64_C (0))); + temp.val[0] = vcombine_s32 (val.val[0], vcreate_s32 (__AARCH64_INT64_C (0))); + temp.val[1] = vcombine_s32 (val.val[1], vcreate_s32 (__AARCH64_INT64_C (0))); + temp.val[2] = vcombine_s32 (val.val[2], vcreate_s32 (__AARCH64_INT64_C (0))); + temp.val[3] = vcombine_s32 (val.val[3], vcreate_s32 (__AARCH64_INT64_C (0))); __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) temp.val[0], 0); __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) temp.val[1], 1); __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) temp.val[2], 2); @@ -25033,10 +25156,10 @@ vst4_u8 (uint8_t * __a, uint8x8x4_t val) { __builtin_aarch64_simd_xi __o; uint8x16x4_t temp; - temp.val[0] = vcombine_u8 (val.val[0], vcreate_u8 (UINT64_C (0))); - temp.val[1] = vcombine_u8 (val.val[1], vcreate_u8 (UINT64_C (0))); - temp.val[2] = vcombine_u8 (val.val[2], vcreate_u8 (UINT64_C (0))); - temp.val[3] = vcombine_u8 (val.val[3], vcreate_u8 (UINT64_C (0))); + temp.val[0] = vcombine_u8 (val.val[0], vcreate_u8 (__AARCH64_UINT64_C (0))); + temp.val[1] = vcombine_u8 (val.val[1], vcreate_u8 (__AARCH64_UINT64_C (0))); + temp.val[2] = vcombine_u8 (val.val[2], vcreate_u8 (__AARCH64_UINT64_C (0))); + temp.val[3] = vcombine_u8 (val.val[3], vcreate_u8 (__AARCH64_UINT64_C (0))); __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[0], 0); __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[1], 1); __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[2], 2); @@ -25049,10 +25172,10 @@ vst4_u16 (uint16_t * __a, uint16x4x4_t val) { __builtin_aarch64_simd_xi __o; uint16x8x4_t temp; - temp.val[0] = vcombine_u16 (val.val[0], vcreate_u16 (UINT64_C (0))); - temp.val[1] = vcombine_u16 (val.val[1], vcreate_u16 (UINT64_C (0))); - temp.val[2] = vcombine_u16 (val.val[2], vcreate_u16 (UINT64_C (0))); - temp.val[3] = vcombine_u16 (val.val[3], vcreate_u16 (UINT64_C (0))); + temp.val[0] = vcombine_u16 (val.val[0], vcreate_u16 (__AARCH64_UINT64_C (0))); + temp.val[1] = vcombine_u16 (val.val[1], vcreate_u16 (__AARCH64_UINT64_C (0))); + temp.val[2] = vcombine_u16 (val.val[2], vcreate_u16 (__AARCH64_UINT64_C (0))); + temp.val[3] = vcombine_u16 (val.val[3], vcreate_u16 (__AARCH64_UINT64_C (0))); __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[0], 0); __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[1], 1); __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[2], 2); @@ -25065,10 +25188,10 @@ vst4_u32 (uint32_t * __a, uint32x2x4_t val) { __builtin_aarch64_simd_xi __o; uint32x4x4_t temp; - temp.val[0] = vcombine_u32 (val.val[0], vcreate_u32 (UINT64_C (0))); - temp.val[1] = vcombine_u32 (val.val[1], vcreate_u32 (UINT64_C (0))); - temp.val[2] = vcombine_u32 (val.val[2], vcreate_u32 (UINT64_C (0))); - temp.val[3] = vcombine_u32 (val.val[3], vcreate_u32 (UINT64_C (0))); + temp.val[0] = vcombine_u32 (val.val[0], vcreate_u32 (__AARCH64_UINT64_C (0))); + temp.val[1] = vcombine_u32 (val.val[1], vcreate_u32 (__AARCH64_UINT64_C (0))); + temp.val[2] = vcombine_u32 (val.val[2], vcreate_u32 (__AARCH64_UINT64_C (0))); + temp.val[3] = vcombine_u32 (val.val[3], vcreate_u32 (__AARCH64_UINT64_C (0))); __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) temp.val[0], 0); __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) temp.val[1], 1); __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) temp.val[2], 2); @@ -25081,10 +25204,10 @@ vst4_f32 (float32_t * __a, float32x2x4_t val) { __builtin_aarch64_simd_xi __o; float32x4x4_t temp; - temp.val[0] = vcombine_f32 (val.val[0], vcreate_f32 (UINT64_C (0))); - temp.val[1] = vcombine_f32 (val.val[1], vcreate_f32 (UINT64_C (0))); - temp.val[2] = vcombine_f32 (val.val[2], vcreate_f32 (UINT64_C (0))); - temp.val[3] = vcombine_f32 (val.val[3], vcreate_f32 (UINT64_C (0))); + temp.val[0] = vcombine_f32 (val.val[0], vcreate_f32 (__AARCH64_UINT64_C (0))); + temp.val[1] = vcombine_f32 (val.val[1], vcreate_f32 (__AARCH64_UINT64_C (0))); + temp.val[2] = vcombine_f32 (val.val[2], vcreate_f32 (__AARCH64_UINT64_C (0))); + temp.val[3] = vcombine_f32 (val.val[3], vcreate_f32 (__AARCH64_UINT64_C (0))); __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) temp.val[0], 0); __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) temp.val[1], 1); __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) temp.val[2], 2); @@ -25581,4 +25704,81 @@ __INTERLEAVE_LIST (zip) /* End of optimal implementations in approved order. */ +#undef __aarch64_vget_lane_any +#undef __aarch64_vget_lane_f32 +#undef __aarch64_vget_lane_f64 +#undef __aarch64_vget_lane_p8 +#undef __aarch64_vget_lane_p16 +#undef __aarch64_vget_lane_s8 +#undef __aarch64_vget_lane_s16 +#undef __aarch64_vget_lane_s32 +#undef __aarch64_vget_lane_s64 +#undef __aarch64_vget_lane_u8 +#undef __aarch64_vget_lane_u16 +#undef __aarch64_vget_lane_u32 +#undef __aarch64_vget_lane_u64 + +#undef __aarch64_vgetq_lane_f32 +#undef __aarch64_vgetq_lane_f64 +#undef __aarch64_vgetq_lane_p8 +#undef __aarch64_vgetq_lane_p16 +#undef __aarch64_vgetq_lane_s8 +#undef __aarch64_vgetq_lane_s16 +#undef __aarch64_vgetq_lane_s32 +#undef __aarch64_vgetq_lane_s64 +#undef __aarch64_vgetq_lane_u8 +#undef __aarch64_vgetq_lane_u16 +#undef __aarch64_vgetq_lane_u32 +#undef __aarch64_vgetq_lane_u64 + +#undef __aarch64_vdup_lane_any +#undef __aarch64_vdup_lane_f32 +#undef __aarch64_vdup_lane_f64 +#undef __aarch64_vdup_lane_p8 +#undef __aarch64_vdup_lane_p16 +#undef __aarch64_vdup_lane_s8 +#undef __aarch64_vdup_lane_s16 +#undef __aarch64_vdup_lane_s32 +#undef __aarch64_vdup_lane_s64 +#undef __aarch64_vdup_lane_u8 +#undef __aarch64_vdup_lane_u16 +#undef __aarch64_vdup_lane_u32 +#undef __aarch64_vdup_lane_u64 +#undef __aarch64_vdup_laneq_f32 +#undef __aarch64_vdup_laneq_f64 +#undef __aarch64_vdup_laneq_p8 +#undef __aarch64_vdup_laneq_p16 +#undef __aarch64_vdup_laneq_s8 +#undef __aarch64_vdup_laneq_s16 +#undef __aarch64_vdup_laneq_s32 +#undef __aarch64_vdup_laneq_s64 +#undef __aarch64_vdup_laneq_u8 +#undef __aarch64_vdup_laneq_u16 +#undef __aarch64_vdup_laneq_u32 +#undef __aarch64_vdup_laneq_u64 +#undef __aarch64_vdupq_lane_f32 +#undef __aarch64_vdupq_lane_f64 +#undef __aarch64_vdupq_lane_p8 +#undef __aarch64_vdupq_lane_p16 +#undef __aarch64_vdupq_lane_s8 +#undef __aarch64_vdupq_lane_s16 +#undef __aarch64_vdupq_lane_s32 +#undef __aarch64_vdupq_lane_s64 +#undef __aarch64_vdupq_lane_u8 +#undef __aarch64_vdupq_lane_u16 +#undef __aarch64_vdupq_lane_u32 +#undef __aarch64_vdupq_lane_u64 +#undef __aarch64_vdupq_laneq_f32 +#undef __aarch64_vdupq_laneq_f64 +#undef __aarch64_vdupq_laneq_p8 +#undef __aarch64_vdupq_laneq_p16 +#undef __aarch64_vdupq_laneq_s8 +#undef __aarch64_vdupq_laneq_s16 +#undef __aarch64_vdupq_laneq_s32 +#undef __aarch64_vdupq_laneq_s64 +#undef __aarch64_vdupq_laneq_u8 +#undef __aarch64_vdupq_laneq_u16 +#undef __aarch64_vdupq_laneq_u32 +#undef __aarch64_vdupq_laneq_u64 + #endif diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md index 3ec889f28fd..ffe125b5583 100644 --- a/gcc/config/aarch64/iterators.md +++ b/gcc/config/aarch64/iterators.md @@ -134,9 +134,15 @@ ;; Vector modes except double int. (define_mode_iterator VDQIF [V8QI V16QI V4HI V8HI V2SI V4SI V2SF V4SF V2DF]) +;; Vector modes for Q and H types. +(define_mode_iterator VDQQH [V8QI V16QI V4HI V8HI]) + ;; Vector modes for H and S types. (define_mode_iterator VDQHS [V4HI V8HI V2SI V4SI]) +;; Vector modes for Q, H and S types. +(define_mode_iterator VDQQHS [V8QI V16QI V4HI V8HI V2SI V4SI]) + ;; Vector and scalar integer modes for H and S (define_mode_iterator VSDQ_HSI [V4HI V8HI V2SI V4SI HI SI]) @@ -377,7 +383,7 @@ (V4HI "V8HI") (V8HI "V8HI") (V2SI "V4SI") (V4SI "V4SI") (DI "V2DI") (V2DI "V2DI") - (V2SF "V2SF") (V4SF "V4SF") + (V2SF "V4SF") (V4SF "V4SF") (V2DF "V2DF") (SI "V4SI") (HI "V8HI") (QI "V16QI")]) @@ -453,6 +459,15 @@ (V2SF "s") (V4SF "s") (V2DF "d")]) +;; Corresponding core element mode for each vector mode. This is a +;; variation on mapping FP modes to GP regs. +(define_mode_attr vwcore [(V8QI "w") (V16QI "w") + (V4HI "w") (V8HI "w") + (V2SI "w") (V4SI "w") + (DI "x") (V2DI "x") + (V2SF "w") (V4SF "w") + (V2DF "x")]) + ;; Double vector types for ALLX. (define_mode_attr Vallxd [(QI "8b") (HI "4h") (SI "2s")]) @@ -512,6 +527,20 @@ (define_mode_attr fcvt_target [(V2DF "v2di") (V4SF "v4si") (V2SF "v2si")]) (define_mode_attr FCVT_TARGET [(V2DF "V2DI") (V4SF "V4SI") (V2SF "V2SI")]) +(define_mode_attr VSWAP_WIDTH [(V8QI "V16QI") (V16QI "V8QI") + (V4HI "V8HI") (V8HI "V4HI") + (V2SI "V4SI") (V4SI "V2SI") + (DI "V2DI") (V2DI "DI") + (V2SF "V4SF") (V4SF "V2SF") + (DF "V2DF") (V2DF "DF")]) + +(define_mode_attr vswap_width_name [(V8QI "to_128") (V16QI "to_64") + (V4HI "to_128") (V8HI "to_64") + (V2SI "to_128") (V4SI "to_64") + (DI "to_128") (V2DI "to_64") + (V2SF "to_128") (V4SF "to_64") + (DF "to_128") (V2DF "to_64")]) + ;; ------------------------------------------------------------------- ;; Code Iterators ;; ------------------------------------------------------------------- diff --git a/gcc/config/aarch64/predicates.md b/gcc/config/aarch64/predicates.md index 3e2b6b34357..dbc90826665 100644 --- a/gcc/config/aarch64/predicates.md +++ b/gcc/config/aarch64/predicates.md @@ -26,6 +26,11 @@ && GET_MODE_CLASS (GET_MODE (op)) == MODE_CC")))) ) +(define_predicate "aarch64_simd_register" + (and (match_code "reg") + (ior (match_test "REGNO_REG_CLASS (REGNO (op)) == FP_LO_REGS") + (match_test "REGNO_REG_CLASS (REGNO (op)) == FP_REGS")))) + (define_predicate "aarch64_reg_or_zero" (and (match_code "reg,subreg,const_int") (ior (match_operand 0 "register_operand") diff --git a/gcc/config/aarch64/t-aarch64 b/gcc/config/aarch64/t-aarch64 index 2975850dcb9..9f8d8cd6e0d 100644 --- a/gcc/config/aarch64/t-aarch64 +++ b/gcc/config/aarch64/t-aarch64 @@ -35,6 +35,11 @@ aarch64-builtins.o: $(srcdir)/config/aarch64/aarch64-builtins.c $(CONFIG_H) \ $(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \ $(srcdir)/config/aarch64/aarch64-builtins.c +aarch-common.o: $(srcdir)/config/arm/aarch-common.c $(CONFIG_H) $(SYSTEM_H) \ + coretypes.h $(TM_H) $(TM_P_H) $(RTL_H) $(TREE_H) output.h $(C_COMMON_H) + $(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \ + $(srcdir)/config/arm/aarch-common.c + comma=, MULTILIB_OPTIONS = $(patsubst %, mabi=%, $(subst $(comma), ,$(TM_MULTILIB_CONFIG))) MULTILIB_DIRNAMES = $(subst $(comma), ,$(TM_MULTILIB_CONFIG)) diff --git a/gcc/config/alpha/alpha.c b/gcc/config/alpha/alpha.c index 5f5b33e347b..a8fb92964eb 100644 --- a/gcc/config/alpha/alpha.c +++ b/gcc/config/alpha/alpha.c @@ -2659,6 +2659,7 @@ alpha_emit_conditional_move (rtx cmp, enum machine_mode mode) cmp_mode = cmp_mode == DImode ? DFmode : DImode; op0 = gen_lowpart (cmp_mode, tem); op1 = CONST0_RTX (cmp_mode); + cmp = gen_rtx_fmt_ee (code, VOIDmode, op0, op1); local_fast_math = 1; } diff --git a/gcc/config/alpha/linux.h b/gcc/config/alpha/linux.h index fbf4a07eb45..da5842fda85 100644 --- a/gcc/config/alpha/linux.h +++ b/gcc/config/alpha/linux.h @@ -59,16 +59,18 @@ along with GCC; see the file COPYING3. If not see #ifdef SINGLE_LIBC #define OPTION_GLIBC (DEFAULT_LIBC == LIBC_GLIBC) +#define OPTION_UCLIBC (DEFAULT_LIBC == LIBC_UCLIBC) +#define OPTION_BIONIC (DEFAULT_LIBC == LIBC_BIONIC) #else #define OPTION_GLIBC (linux_libc == LIBC_GLIBC) +#define OPTION_UCLIBC (linux_libc == LIBC_UCLIBC) +#define OPTION_BIONIC (linux_libc == LIBC_BIONIC) #endif -/* Determine whether the entire c99 runtime is present in the - runtime library. */ -#define TARGET_C99_FUNCTIONS (OPTION_GLIBC) - -/* Whether we have sincos that follows the GNU extension. */ -#define TARGET_HAS_SINCOS (OPTION_GLIBC) +/* Determine what functions are present at the runtime; + this includes full c99 runtime and sincos. */ +#undef TARGET_LIBC_HAS_FUNCTION +#define TARGET_LIBC_HAS_FUNCTION linux_android_libc_has_function #define TARGET_POSIX_IO diff --git a/gcc/config/arm/aarch-common-protos.h b/gcc/config/arm/aarch-common-protos.h new file mode 100644 index 00000000000..97768fce0ca --- /dev/null +++ b/gcc/config/arm/aarch-common-protos.h @@ -0,0 +1,36 @@ +/* Function prototypes for instruction scheduling dependeoncy routines, + defined in aarch-common.c + + Copyright (C) 1991-2013 Free Software Foundation, Inc. + Contributed by ARM Ltd. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3, or (at your + option) any later version. + + GCC is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public + License for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + . */ + + +#ifndef GCC_AARCH_COMMON_PROTOS_H +#define GCC_AARCH_COMMON_PROTOS_H + +extern int arm_early_load_addr_dep (rtx, rtx); +extern int arm_early_store_addr_dep (rtx, rtx); +extern int arm_mac_accumulator_is_mul_result (rtx, rtx); +extern int arm_mac_accumulator_is_result (rtx, rtx); +extern int arm_no_early_alu_shift_dep (rtx, rtx); +extern int arm_no_early_alu_shift_value_dep (rtx, rtx); +extern int arm_no_early_mul_dep (rtx, rtx); +extern int arm_no_early_store_addr_dep (rtx, rtx); + +#endif /* GCC_AARCH_COMMON_PROTOS_H */ diff --git a/gcc/config/arm/aarch-common.c b/gcc/config/arm/aarch-common.c new file mode 100644 index 00000000000..69366af9bd5 --- /dev/null +++ b/gcc/config/arm/aarch-common.c @@ -0,0 +1,278 @@ +/* Dependency checks for instruction scheduling, shared between ARM and + AARCH64. + + Copyright (C) 1991-2013 Free Software Foundation, Inc. + Contributed by ARM Ltd. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3, or (at your + option) any later version. + + GCC is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public + License for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + . */ + + +/* Return nonzero if the CONSUMER instruction (a load) does need + PRODUCER's value to calculate the address. */ + +#include "config.h" +#include "system.h" +#include "coretypes.h" +#include "tm.h" +#include "tm_p.h" +#include "rtl.h" +#include "tree.h" +#include "c-family/c-common.h" +#include "rtl.h" + +int +arm_early_load_addr_dep (rtx producer, rtx consumer) +{ + rtx value = PATTERN (producer); + rtx addr = PATTERN (consumer); + + if (GET_CODE (value) == COND_EXEC) + value = COND_EXEC_CODE (value); + if (GET_CODE (value) == PARALLEL) + value = XVECEXP (value, 0, 0); + value = XEXP (value, 0); + if (GET_CODE (addr) == COND_EXEC) + addr = COND_EXEC_CODE (addr); + if (GET_CODE (addr) == PARALLEL) + { + if (GET_CODE (XVECEXP (addr, 0, 0)) == RETURN) + addr = XVECEXP (addr, 0, 1); + else + addr = XVECEXP (addr, 0, 0); + } + addr = XEXP (addr, 1); + + return reg_overlap_mentioned_p (value, addr); +} + +/* Return nonzero if the CONSUMER instruction (an ALU op) does not + have an early register shift value or amount dependency on the + result of PRODUCER. */ + +int +arm_no_early_alu_shift_dep (rtx producer, rtx consumer) +{ + rtx value = PATTERN (producer); + rtx op = PATTERN (consumer); + rtx early_op; + + if (GET_CODE (value) == COND_EXEC) + value = COND_EXEC_CODE (value); + if (GET_CODE (value) == PARALLEL) + value = XVECEXP (value, 0, 0); + value = XEXP (value, 0); + if (GET_CODE (op) == COND_EXEC) + op = COND_EXEC_CODE (op); + if (GET_CODE (op) == PARALLEL) + op = XVECEXP (op, 0, 0); + op = XEXP (op, 1); + + early_op = XEXP (op, 0); + /* This is either an actual independent shift, or a shift applied to + the first operand of another operation. We want the whole shift + operation. */ + if (REG_P (early_op)) + early_op = op; + + return !reg_overlap_mentioned_p (value, early_op); +} + +/* Return nonzero if the CONSUMER instruction (an ALU op) does not + have an early register shift value dependency on the result of + PRODUCER. */ + +int +arm_no_early_alu_shift_value_dep (rtx producer, rtx consumer) +{ + rtx value = PATTERN (producer); + rtx op = PATTERN (consumer); + rtx early_op; + + if (GET_CODE (value) == COND_EXEC) + value = COND_EXEC_CODE (value); + if (GET_CODE (value) == PARALLEL) + value = XVECEXP (value, 0, 0); + value = XEXP (value, 0); + if (GET_CODE (op) == COND_EXEC) + op = COND_EXEC_CODE (op); + if (GET_CODE (op) == PARALLEL) + op = XVECEXP (op, 0, 0); + op = XEXP (op, 1); + + early_op = XEXP (op, 0); + + /* This is either an actual independent shift, or a shift applied to + the first operand of another operation. We want the value being + shifted, in either case. */ + if (!REG_P (early_op)) + early_op = XEXP (early_op, 0); + + return !reg_overlap_mentioned_p (value, early_op); +} + +/* Return nonzero if the CONSUMER (a mul or mac op) does not + have an early register mult dependency on the result of + PRODUCER. */ + +int +arm_no_early_mul_dep (rtx producer, rtx consumer) +{ + rtx value = PATTERN (producer); + rtx op = PATTERN (consumer); + + if (GET_CODE (value) == COND_EXEC) + value = COND_EXEC_CODE (value); + if (GET_CODE (value) == PARALLEL) + value = XVECEXP (value, 0, 0); + value = XEXP (value, 0); + if (GET_CODE (op) == COND_EXEC) + op = COND_EXEC_CODE (op); + if (GET_CODE (op) == PARALLEL) + op = XVECEXP (op, 0, 0); + op = XEXP (op, 1); + + if (GET_CODE (op) == PLUS || GET_CODE (op) == MINUS) + { + if (GET_CODE (XEXP (op, 0)) == MULT) + return !reg_overlap_mentioned_p (value, XEXP (op, 0)); + else + return !reg_overlap_mentioned_p (value, XEXP (op, 1)); + } + + return 0; +} + +/* Return nonzero if the CONSUMER instruction (a store) does not need + PRODUCER's value to calculate the address. */ + +int +arm_no_early_store_addr_dep (rtx producer, rtx consumer) +{ + rtx value = PATTERN (producer); + rtx addr = PATTERN (consumer); + + if (GET_CODE (value) == COND_EXEC) + value = COND_EXEC_CODE (value); + if (GET_CODE (value) == PARALLEL) + value = XVECEXP (value, 0, 0); + value = XEXP (value, 0); + if (GET_CODE (addr) == COND_EXEC) + addr = COND_EXEC_CODE (addr); + if (GET_CODE (addr) == PARALLEL) + addr = XVECEXP (addr, 0, 0); + addr = XEXP (addr, 0); + + return !reg_overlap_mentioned_p (value, addr); +} + +/* Return nonzero if the CONSUMER instruction (a store) does need + PRODUCER's value to calculate the address. */ + +int +arm_early_store_addr_dep (rtx producer, rtx consumer) +{ + return !arm_no_early_store_addr_dep (producer, consumer); +} + +/* Return non-zero iff the consumer (a multiply-accumulate or a + multiple-subtract instruction) has an accumulator dependency on the + result of the producer and no other dependency on that result. It + does not check if the producer is multiply-accumulate instruction. */ +int +arm_mac_accumulator_is_result (rtx producer, rtx consumer) +{ + rtx result; + rtx op0, op1, acc; + + producer = PATTERN (producer); + consumer = PATTERN (consumer); + + if (GET_CODE (producer) == COND_EXEC) + producer = COND_EXEC_CODE (producer); + if (GET_CODE (consumer) == COND_EXEC) + consumer = COND_EXEC_CODE (consumer); + + if (GET_CODE (producer) != SET) + return 0; + + result = XEXP (producer, 0); + + if (GET_CODE (consumer) != SET) + return 0; + + /* Check that the consumer is of the form + (set (...) (plus (mult ...) (...))) + or + (set (...) (minus (...) (mult ...))). */ + if (GET_CODE (XEXP (consumer, 1)) == PLUS) + { + if (GET_CODE (XEXP (XEXP (consumer, 1), 0)) != MULT) + return 0; + + op0 = XEXP (XEXP (XEXP (consumer, 1), 0), 0); + op1 = XEXP (XEXP (XEXP (consumer, 1), 0), 1); + acc = XEXP (XEXP (consumer, 1), 1); + } + else if (GET_CODE (XEXP (consumer, 1)) == MINUS) + { + if (GET_CODE (XEXP (XEXP (consumer, 1), 1)) != MULT) + return 0; + + op0 = XEXP (XEXP (XEXP (consumer, 1), 1), 0); + op1 = XEXP (XEXP (XEXP (consumer, 1), 1), 1); + acc = XEXP (XEXP (consumer, 1), 0); + } + else + return 0; + + return (reg_overlap_mentioned_p (result, acc) + && !reg_overlap_mentioned_p (result, op0) + && !reg_overlap_mentioned_p (result, op1)); +} + +/* Return non-zero if the consumer (a multiply-accumulate instruction) + has an accumulator dependency on the result of the producer (a + multiplication instruction) and no other dependency on that result. */ +int +arm_mac_accumulator_is_mul_result (rtx producer, rtx consumer) +{ + rtx mul = PATTERN (producer); + rtx mac = PATTERN (consumer); + rtx mul_result; + rtx mac_op0, mac_op1, mac_acc; + + if (GET_CODE (mul) == COND_EXEC) + mul = COND_EXEC_CODE (mul); + if (GET_CODE (mac) == COND_EXEC) + mac = COND_EXEC_CODE (mac); + + /* Check that mul is of the form (set (...) (mult ...)) + and mla is of the form (set (...) (plus (mult ...) (...))). */ + if ((GET_CODE (mul) != SET || GET_CODE (XEXP (mul, 1)) != MULT) + || (GET_CODE (mac) != SET || GET_CODE (XEXP (mac, 1)) != PLUS + || GET_CODE (XEXP (XEXP (mac, 1), 0)) != MULT)) + return 0; + + mul_result = XEXP (mul, 0); + mac_op0 = XEXP (XEXP (XEXP (mac, 1), 0), 0); + mac_op1 = XEXP (XEXP (XEXP (mac, 1), 0), 1); + mac_acc = XEXP (XEXP (mac, 1), 1); + + return (reg_overlap_mentioned_p (mul_result, mac_acc) + && !reg_overlap_mentioned_p (mul_result, mac_op0) + && !reg_overlap_mentioned_p (mul_result, mac_op1)); +} diff --git a/gcc/config/arm/arm-fixed.md b/gcc/config/arm/arm-fixed.md index dc8e7ac8c14..3972a850990 100644 --- a/gcc/config/arm/arm-fixed.md +++ b/gcc/config/arm/arm-fixed.md @@ -25,7 +25,8 @@ "TARGET_32BIT" "add%?\\t%0, %1, %2" [(set_attr "predicable" "yes") - (set_attr "predicable_short_it" "yes,no")]) + (set_attr "predicable_short_it" "yes,no") + (set_attr "type" "alu_reg")]) (define_insn "add3" [(set (match_operand:ADDSUB 0 "s_register_operand" "=r") @@ -34,7 +35,8 @@ "TARGET_INT_SIMD" "sadd%?\\t%0, %1, %2" [(set_attr "predicable" "yes") - (set_attr "predicable_short_it" "no")]) + (set_attr "predicable_short_it" "no") + (set_attr "type" "alu_reg")]) (define_insn "usadd3" [(set (match_operand:UQADDSUB 0 "s_register_operand" "=r") @@ -43,7 +45,8 @@ "TARGET_INT_SIMD" "uqadd%?\\t%0, %1, %2" [(set_attr "predicable" "yes") - (set_attr "predicable_short_it" "no")]) + (set_attr "predicable_short_it" "no") + (set_attr "type" "alu_reg")]) (define_insn "ssadd3" [(set (match_operand:QADDSUB 0 "s_register_operand" "=r") @@ -52,7 +55,8 @@ "TARGET_INT_SIMD" "qadd%?\\t%0, %1, %2" [(set_attr "predicable" "yes") - (set_attr "predicable_short_it" "no")]) + (set_attr "predicable_short_it" "no") + (set_attr "type" "alu_reg")]) (define_insn "sub3" [(set (match_operand:FIXED 0 "s_register_operand" "=l,r") @@ -61,7 +65,8 @@ "TARGET_32BIT" "sub%?\\t%0, %1, %2" [(set_attr "predicable" "yes") - (set_attr "predicable_short_it" "yes,no")]) + (set_attr "predicable_short_it" "yes,no") + (set_attr "type" "alu_reg")]) (define_insn "sub3" [(set (match_operand:ADDSUB 0 "s_register_operand" "=r") @@ -70,7 +75,8 @@ "TARGET_INT_SIMD" "ssub%?\\t%0, %1, %2" [(set_attr "predicable" "yes") - (set_attr "predicable_short_it" "no")]) + (set_attr "predicable_short_it" "no") + (set_attr "type" "alu_reg")]) (define_insn "ussub3" [(set (match_operand:UQADDSUB 0 "s_register_operand" "=r") @@ -80,7 +86,8 @@ "TARGET_INT_SIMD" "uqsub%?\\t%0, %1, %2" [(set_attr "predicable" "yes") - (set_attr "predicable_short_it" "no")]) + (set_attr "predicable_short_it" "no") + (set_attr "type" "alu_reg")]) (define_insn "sssub3" [(set (match_operand:QADDSUB 0 "s_register_operand" "=r") @@ -89,7 +96,8 @@ "TARGET_INT_SIMD" "qsub%?\\t%0, %1, %2" [(set_attr "predicable" "yes") - (set_attr "predicable_short_it" "no")]) + (set_attr "predicable_short_it" "no") + (set_attr "type" "alu_reg")]) ;; Fractional multiplies. @@ -246,6 +254,7 @@ return ""; } [(set_attr "conds" "clob") + (set_attr "type" "multiple") (set (attr "length") (if_then_else (eq_attr "is_thumb" "yes") (if_then_else (match_test "arm_restrict_it") @@ -305,6 +314,7 @@ return ""; } [(set_attr "conds" "clob") + (set_attr "type" "multiple") (set (attr "length") (if_then_else (eq_attr "is_thumb" "yes") (if_then_else (match_test "arm_restrict_it") @@ -406,7 +416,7 @@ [(set_attr "predicable" "yes") (set_attr "predicable_short_it" "no") (set_attr "shift" "1") - (set_attr "type" "arlo_shift")]) + (set_attr "type" "alu_shift_imm")]) (define_insn "arm_usatsihi" [(set (match_operand:HI 0 "s_register_operand" "=r") @@ -414,5 +424,6 @@ "TARGET_INT_SIMD" "usat%?\\t%0, #16, %1" [(set_attr "predicable" "yes") - (set_attr "predicable_short_it" "no")] + (set_attr "predicable_short_it" "no") + (set_attr "type" "alu_imm")] ) diff --git a/gcc/config/arm/arm-protos.h b/gcc/config/arm/arm-protos.h index ef94bbcea25..f694dfdaae2 100644 --- a/gcc/config/arm/arm-protos.h +++ b/gcc/config/arm/arm-protos.h @@ -97,14 +97,6 @@ extern bool arm_tls_referenced_p (rtx); extern int arm_coproc_mem_operand (rtx, bool); extern int neon_vector_mem_operand (rtx, int, bool); extern int neon_struct_mem_operand (rtx); -extern int arm_no_early_store_addr_dep (rtx, rtx); -extern int arm_early_store_addr_dep (rtx, rtx); -extern int arm_early_load_addr_dep (rtx, rtx); -extern int arm_no_early_alu_shift_dep (rtx, rtx); -extern int arm_no_early_alu_shift_value_dep (rtx, rtx); -extern int arm_no_early_mul_dep (rtx, rtx); -extern int arm_mac_accumulator_is_result (rtx, rtx); -extern int arm_mac_accumulator_is_mul_result (rtx, rtx); extern int tls_mentioned_p (rtx); extern int symbol_mentioned_p (rtx); diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c index 294de80a73b..f9027ddd2e7 100644 --- a/gcc/config/arm/arm.c +++ b/gcc/config/arm/arm.c @@ -4544,7 +4544,9 @@ aapcs_vfp_allocate (CUMULATIVE_ARGS *pcum, enum machine_mode mode, if (((pcum->aapcs_vfp_regs_free >> regno) & mask) == mask) { pcum->aapcs_vfp_reg_alloc = mask << regno; - if (mode == BLKmode || (mode == TImode && !TARGET_NEON)) + if (mode == BLKmode + || (mode == TImode && ! TARGET_NEON) + || ! arm_hard_regno_mode_ok (FIRST_VFP_REGNUM + regno, mode)) { int i; int rcount = pcum->aapcs_vfp_rcount; @@ -8662,8 +8664,14 @@ xscale_sched_adjust_cost (rtx insn, rtx link, rtx dep, int * cost) instruction we depend on is another ALU instruction, then we may have to account for an additional stall. */ if (shift_opnum != 0 - && (attr_type == TYPE_ARLO_SHIFT - || attr_type == TYPE_ARLO_SHIFT_REG + && (attr_type == TYPE_ALU_SHIFT_IMM + || attr_type == TYPE_ALUS_SHIFT_IMM + || attr_type == TYPE_LOGIC_SHIFT_IMM + || attr_type == TYPE_LOGICS_SHIFT_IMM + || attr_type == TYPE_ALU_SHIFT_REG + || attr_type == TYPE_ALUS_SHIFT_REG + || attr_type == TYPE_LOGIC_SHIFT_REG + || attr_type == TYPE_LOGICS_SHIFT_REG || attr_type == TYPE_MOV_SHIFT || attr_type == TYPE_MVN_SHIFT || attr_type == TYPE_MOV_SHIFT_REG @@ -8950,9 +8958,17 @@ cortexa7_older_only (rtx insn) switch (get_attr_type (insn)) { - case TYPE_ARLO_REG: + case TYPE_ALU_REG: + case TYPE_ALUS_REG: + case TYPE_LOGIC_REG: + case TYPE_LOGICS_REG: + case TYPE_ADC_REG: + case TYPE_ADCS_REG: + case TYPE_ADR: + case TYPE_BFM: + case TYPE_REV: case TYPE_MVN_REG: - case TYPE_SHIFT: + case TYPE_SHIFT_IMM: case TYPE_SHIFT_REG: case TYPE_LOAD_BYTE: case TYPE_LOAD1: @@ -8961,7 +8977,7 @@ cortexa7_older_only (rtx insn) case TYPE_FADDS: case TYPE_FFARITHD: case TYPE_FADDD: - case TYPE_FCPYS: + case TYPE_FMOV: case TYPE_F_CVT: case TYPE_FCMPS: case TYPE_FCMPD: @@ -8973,7 +8989,8 @@ cortexa7_older_only (rtx insn) case TYPE_FMACD: case TYPE_FDIVS: case TYPE_FDIVD: - case TYPE_F_2_R: + case TYPE_F_MRC: + case TYPE_F_MRRC: case TYPE_F_FLAG: case TYPE_F_LOADS: case TYPE_F_STORES: @@ -8996,7 +9013,10 @@ cortexa7_younger (FILE *file, int verbose, rtx insn) switch (get_attr_type (insn)) { - case TYPE_ARLO_IMM: + case TYPE_ALU_IMM: + case TYPE_ALUS_IMM: + case TYPE_LOGIC_IMM: + case TYPE_LOGICS_IMM: case TYPE_EXTEND: case TYPE_MVN_IMM: case TYPE_MOV_IMM: @@ -14360,6 +14380,16 @@ thumb2_reorg (void) && IN_RANGE (INTVAL (op1), -7, 7)) action = CONV; } + /* ADCS , */ + else if (GET_CODE (XEXP (src, 0)) == PLUS + && rtx_equal_p (XEXP (XEXP (src, 0), 0), dst) + && low_register_operand (XEXP (XEXP (src, 0), 1), + SImode) + && COMPARISON_P (op1) + && cc_register (XEXP (op1, 0), VOIDmode) + && maybe_get_arm_condition_code (op1) == ARM_CS + && XEXP (op1, 1) == const0_rtx) + action = CONV; break; case MINUS: @@ -16801,123 +16831,165 @@ arm_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED, } } -/* Generate and emit a pattern that will be recognized as STRD pattern. If even - number of registers are being pushed, multiple STRD patterns are created for - all register pairs. If odd number of registers are pushed, emit a - combination of STRDs and STR for the prologue saves. */ +/* Generate and emit a sequence of insns equivalent to PUSH, but using + STR and STRD. If an even number of registers are being pushed, one + or more STRD patterns are created for each register pair. If an + odd number of registers are pushed, emit an initial STR followed by + as many STRD instructions as are needed. This works best when the + stack is initially 64-bit aligned (the normal case), since it + ensures that each STRD is also 64-bit aligned. */ static void thumb2_emit_strd_push (unsigned long saved_regs_mask) { int num_regs = 0; - int i, j; + int i; + int regno; rtx par = NULL_RTX; - rtx insn = NULL_RTX; rtx dwarf = NULL_RTX; - rtx tmp, reg, tmp1; - - for (i = 0; i <= LAST_ARM_REGNUM; i++) - if (saved_regs_mask & (1 << i)) - num_regs++; + rtx tmp; + bool first = true; - gcc_assert (num_regs && num_regs <= 16); + num_regs = bit_count (saved_regs_mask); - /* Pre-decrement the stack pointer, based on there being num_regs 4-byte - registers to push. */ - tmp = gen_rtx_SET (VOIDmode, - stack_pointer_rtx, - plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs)); - RTX_FRAME_RELATED_P (tmp) = 1; - insn = emit_insn (tmp); + /* Must be at least one register to save, and can't save SP or PC. */ + gcc_assert (num_regs > 0 && num_regs <= 14); + gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM))); + gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM))); - /* Create sequence for DWARF info. */ + /* Create sequence for DWARF info. All the frame-related data for + debugging is held in this wrapper. */ dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1)); - /* RTLs cannot be shared, hence create new copy for dwarf. */ - tmp1 = gen_rtx_SET (VOIDmode, - stack_pointer_rtx, - plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs)); - RTX_FRAME_RELATED_P (tmp1) = 1; - XVECEXP (dwarf, 0, 0) = tmp1; + /* Describe the stack adjustment. */ + tmp = gen_rtx_SET (VOIDmode, + stack_pointer_rtx, + plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs)); + RTX_FRAME_RELATED_P (tmp) = 1; + XVECEXP (dwarf, 0, 0) = tmp; - gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM))); - gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM))); + /* Find the first register. */ + for (regno = 0; (saved_regs_mask & (1 << regno)) == 0; regno++) + ; - /* Var j iterates over all the registers to gather all the registers in - saved_regs_mask. Var i gives index of register R_j in stack frame. - A PARALLEL RTX of register-pair is created here, so that pattern for - STRD can be matched. If num_regs is odd, 1st register will be pushed - using STR and remaining registers will be pushed with STRD in pairs. - If num_regs is even, all registers are pushed with STRD in pairs. - Hence, skip first element for odd num_regs. */ - for (i = num_regs - 1, j = LAST_ARM_REGNUM; i >= (num_regs % 2); j--) - if (saved_regs_mask & (1 << j)) - { - /* Create RTX for store. New RTX is created for dwarf as - they are not sharable. */ - reg = gen_rtx_REG (SImode, j); - tmp = gen_rtx_SET (SImode, - gen_frame_mem - (SImode, - plus_constant (Pmode, stack_pointer_rtx, 4 * i)), - reg); + i = 0; - tmp1 = gen_rtx_SET (SImode, - gen_frame_mem - (SImode, - plus_constant (Pmode, stack_pointer_rtx, 4 * i)), - reg); - RTX_FRAME_RELATED_P (tmp) = 1; - RTX_FRAME_RELATED_P (tmp1) = 1; - - if (((i - (num_regs % 2)) % 2) == 1) - /* When (i - (num_regs % 2)) is odd, the RTX to be emitted is yet to - be created. Hence create it first. The STRD pattern we are - generating is : - [ (SET (MEM (PLUS (SP) (NUM))) (reg_t1)) - (SET (MEM (PLUS (SP) (NUM + 4))) (reg_t2)) ] - where the target registers need not be consecutive. */ - par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2)); + /* If there's an odd number of registers to push. Start off by + pushing a single register. This ensures that subsequent strd + operations are dword aligned (assuming that SP was originally + 64-bit aligned). */ + if ((num_regs & 1) != 0) + { + rtx reg, mem, insn; - /* Register R_j is added in PARALLEL RTX. If (i - (num_regs % 2)) is - even, the reg_j is added as 0th element and if it is odd, reg_i is - added as 1st element of STRD pattern shown above. */ - XVECEXP (par, 0, ((i - (num_regs % 2)) % 2)) = tmp; - XVECEXP (dwarf, 0, (i + 1)) = tmp1; + reg = gen_rtx_REG (SImode, regno); + if (num_regs == 1) + mem = gen_frame_mem (Pmode, gen_rtx_PRE_DEC (Pmode, + stack_pointer_rtx)); + else + mem = gen_frame_mem (Pmode, + gen_rtx_PRE_MODIFY + (Pmode, stack_pointer_rtx, + plus_constant (Pmode, stack_pointer_rtx, + -4 * num_regs))); - if (((i - (num_regs % 2)) % 2) == 0) - /* When (i - (num_regs % 2)) is even, RTXs for both the registers - to be loaded are generated in above given STRD pattern, and the - pattern can be emitted now. */ - emit_insn (par); + tmp = gen_rtx_SET (VOIDmode, mem, reg); + RTX_FRAME_RELATED_P (tmp) = 1; + insn = emit_insn (tmp); + RTX_FRAME_RELATED_P (insn) = 1; + add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf); + tmp = gen_rtx_SET (VOIDmode, gen_frame_mem (Pmode, stack_pointer_rtx), + reg); + RTX_FRAME_RELATED_P (tmp) = 1; + i++; + regno++; + XVECEXP (dwarf, 0, i) = tmp; + first = false; + } - i--; - } + while (i < num_regs) + if (saved_regs_mask & (1 << regno)) + { + rtx reg1, reg2, mem1, mem2; + rtx tmp0, tmp1, tmp2; + int regno2; - if ((num_regs % 2) == 1) - { - /* If odd number of registers are pushed, generate STR pattern to store - lone register. */ - for (; (saved_regs_mask & (1 << j)) == 0; j--); + /* Find the register to pair with this one. */ + for (regno2 = regno + 1; (saved_regs_mask & (1 << regno2)) == 0; + regno2++) + ; - tmp1 = gen_frame_mem (SImode, plus_constant (Pmode, - stack_pointer_rtx, 4 * i)); - reg = gen_rtx_REG (SImode, j); - tmp = gen_rtx_SET (SImode, tmp1, reg); - RTX_FRAME_RELATED_P (tmp) = 1; + reg1 = gen_rtx_REG (SImode, regno); + reg2 = gen_rtx_REG (SImode, regno2); - emit_insn (tmp); + if (first) + { + rtx insn; + + first = false; + mem1 = gen_frame_mem (Pmode, plus_constant (Pmode, + stack_pointer_rtx, + -4 * num_regs)); + mem2 = gen_frame_mem (Pmode, plus_constant (Pmode, + stack_pointer_rtx, + -4 * (num_regs - 1))); + tmp0 = gen_rtx_SET (VOIDmode, stack_pointer_rtx, + plus_constant (Pmode, stack_pointer_rtx, + -4 * (num_regs))); + tmp1 = gen_rtx_SET (VOIDmode, mem1, reg1); + tmp2 = gen_rtx_SET (VOIDmode, mem2, reg2); + RTX_FRAME_RELATED_P (tmp0) = 1; + RTX_FRAME_RELATED_P (tmp1) = 1; + RTX_FRAME_RELATED_P (tmp2) = 1; + par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (3)); + XVECEXP (par, 0, 0) = tmp0; + XVECEXP (par, 0, 1) = tmp1; + XVECEXP (par, 0, 2) = tmp2; + insn = emit_insn (par); + RTX_FRAME_RELATED_P (insn) = 1; + add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf); + } + else + { + mem1 = gen_frame_mem (Pmode, plus_constant (Pmode, + stack_pointer_rtx, + 4 * i)); + mem2 = gen_frame_mem (Pmode, plus_constant (Pmode, + stack_pointer_rtx, + 4 * (i + 1))); + tmp1 = gen_rtx_SET (VOIDmode, mem1, reg1); + tmp2 = gen_rtx_SET (VOIDmode, mem2, reg2); + RTX_FRAME_RELATED_P (tmp1) = 1; + RTX_FRAME_RELATED_P (tmp2) = 1; + par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2)); + XVECEXP (par, 0, 0) = tmp1; + XVECEXP (par, 0, 1) = tmp2; + emit_insn (par); + } - tmp1 = gen_rtx_SET (SImode, - gen_frame_mem - (SImode, - plus_constant (Pmode, stack_pointer_rtx, 4 * i)), - reg); - RTX_FRAME_RELATED_P (tmp1) = 1; - XVECEXP (dwarf, 0, (i + 1)) = tmp1; - } + /* Create unwind information. This is an approximation. */ + tmp1 = gen_rtx_SET (VOIDmode, + gen_frame_mem (Pmode, + plus_constant (Pmode, + stack_pointer_rtx, + 4 * i)), + reg1); + tmp2 = gen_rtx_SET (VOIDmode, + gen_frame_mem (Pmode, + plus_constant (Pmode, + stack_pointer_rtx, + 4 * (i + 1))), + reg2); + + RTX_FRAME_RELATED_P (tmp1) = 1; + RTX_FRAME_RELATED_P (tmp2) = 1; + XVECEXP (dwarf, 0, i + 1) = tmp1; + XVECEXP (dwarf, 0, i + 2) = tmp2; + i += 2; + regno = regno2 + 1; + } + else + regno++; - add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf); - RTX_FRAME_RELATED_P (insn) = 1; return; } @@ -25394,163 +25466,6 @@ arm_setup_incoming_varargs (cumulative_args_t pcum_v, *pretend_size = (NUM_ARG_REGS - nregs) * UNITS_PER_WORD; } -/* Return nonzero if the CONSUMER instruction (a store) does not need - PRODUCER's value to calculate the address. */ - -int -arm_no_early_store_addr_dep (rtx producer, rtx consumer) -{ - rtx value = PATTERN (producer); - rtx addr = PATTERN (consumer); - - if (GET_CODE (value) == COND_EXEC) - value = COND_EXEC_CODE (value); - if (GET_CODE (value) == PARALLEL) - value = XVECEXP (value, 0, 0); - value = XEXP (value, 0); - if (GET_CODE (addr) == COND_EXEC) - addr = COND_EXEC_CODE (addr); - if (GET_CODE (addr) == PARALLEL) - addr = XVECEXP (addr, 0, 0); - addr = XEXP (addr, 0); - - return !reg_overlap_mentioned_p (value, addr); -} - -/* Return nonzero if the CONSUMER instruction (a store) does need - PRODUCER's value to calculate the address. */ - -int -arm_early_store_addr_dep (rtx producer, rtx consumer) -{ - return !arm_no_early_store_addr_dep (producer, consumer); -} - -/* Return nonzero if the CONSUMER instruction (a load) does need - PRODUCER's value to calculate the address. */ - -int -arm_early_load_addr_dep (rtx producer, rtx consumer) -{ - rtx value = PATTERN (producer); - rtx addr = PATTERN (consumer); - - if (GET_CODE (value) == COND_EXEC) - value = COND_EXEC_CODE (value); - if (GET_CODE (value) == PARALLEL) - value = XVECEXP (value, 0, 0); - value = XEXP (value, 0); - if (GET_CODE (addr) == COND_EXEC) - addr = COND_EXEC_CODE (addr); - if (GET_CODE (addr) == PARALLEL) - { - if (GET_CODE (XVECEXP (addr, 0, 0)) == RETURN) - addr = XVECEXP (addr, 0, 1); - else - addr = XVECEXP (addr, 0, 0); - } - addr = XEXP (addr, 1); - - return reg_overlap_mentioned_p (value, addr); -} - -/* Return nonzero if the CONSUMER instruction (an ALU op) does not - have an early register shift value or amount dependency on the - result of PRODUCER. */ - -int -arm_no_early_alu_shift_dep (rtx producer, rtx consumer) -{ - rtx value = PATTERN (producer); - rtx op = PATTERN (consumer); - rtx early_op; - - if (GET_CODE (value) == COND_EXEC) - value = COND_EXEC_CODE (value); - if (GET_CODE (value) == PARALLEL) - value = XVECEXP (value, 0, 0); - value = XEXP (value, 0); - if (GET_CODE (op) == COND_EXEC) - op = COND_EXEC_CODE (op); - if (GET_CODE (op) == PARALLEL) - op = XVECEXP (op, 0, 0); - op = XEXP (op, 1); - - early_op = XEXP (op, 0); - /* This is either an actual independent shift, or a shift applied to - the first operand of another operation. We want the whole shift - operation. */ - if (REG_P (early_op)) - early_op = op; - - return !reg_overlap_mentioned_p (value, early_op); -} - -/* Return nonzero if the CONSUMER instruction (an ALU op) does not - have an early register shift value dependency on the result of - PRODUCER. */ - -int -arm_no_early_alu_shift_value_dep (rtx producer, rtx consumer) -{ - rtx value = PATTERN (producer); - rtx op = PATTERN (consumer); - rtx early_op; - - if (GET_CODE (value) == COND_EXEC) - value = COND_EXEC_CODE (value); - if (GET_CODE (value) == PARALLEL) - value = XVECEXP (value, 0, 0); - value = XEXP (value, 0); - if (GET_CODE (op) == COND_EXEC) - op = COND_EXEC_CODE (op); - if (GET_CODE (op) == PARALLEL) - op = XVECEXP (op, 0, 0); - op = XEXP (op, 1); - - early_op = XEXP (op, 0); - - /* This is either an actual independent shift, or a shift applied to - the first operand of another operation. We want the value being - shifted, in either case. */ - if (!REG_P (early_op)) - early_op = XEXP (early_op, 0); - - return !reg_overlap_mentioned_p (value, early_op); -} - -/* Return nonzero if the CONSUMER (a mul or mac op) does not - have an early register mult dependency on the result of - PRODUCER. */ - -int -arm_no_early_mul_dep (rtx producer, rtx consumer) -{ - rtx value = PATTERN (producer); - rtx op = PATTERN (consumer); - - if (GET_CODE (value) == COND_EXEC) - value = COND_EXEC_CODE (value); - if (GET_CODE (value) == PARALLEL) - value = XVECEXP (value, 0, 0); - value = XEXP (value, 0); - if (GET_CODE (op) == COND_EXEC) - op = COND_EXEC_CODE (op); - if (GET_CODE (op) == PARALLEL) - op = XVECEXP (op, 0, 0); - op = XEXP (op, 1); - - if (GET_CODE (op) == PLUS || GET_CODE (op) == MINUS) - { - if (GET_CODE (XEXP (op, 0)) == MULT) - return !reg_overlap_mentioned_p (value, XEXP (op, 0)); - else - return !reg_overlap_mentioned_p (value, XEXP (op, 1)); - } - - return 0; -} - /* We can't rely on the caller doing the proper promotion when using APCS or ATPCS. */ @@ -25600,95 +25515,6 @@ arm_cxx_guard_type (void) return TARGET_AAPCS_BASED ? integer_type_node : long_long_integer_type_node; } -/* Return non-zero iff the consumer (a multiply-accumulate or a - multiple-subtract instruction) has an accumulator dependency on the - result of the producer and no other dependency on that result. It - does not check if the producer is multiply-accumulate instruction. */ -int -arm_mac_accumulator_is_result (rtx producer, rtx consumer) -{ - rtx result; - rtx op0, op1, acc; - - producer = PATTERN (producer); - consumer = PATTERN (consumer); - - if (GET_CODE (producer) == COND_EXEC) - producer = COND_EXEC_CODE (producer); - if (GET_CODE (consumer) == COND_EXEC) - consumer = COND_EXEC_CODE (consumer); - - if (GET_CODE (producer) != SET) - return 0; - - result = XEXP (producer, 0); - - if (GET_CODE (consumer) != SET) - return 0; - - /* Check that the consumer is of the form - (set (...) (plus (mult ...) (...))) - or - (set (...) (minus (...) (mult ...))). */ - if (GET_CODE (XEXP (consumer, 1)) == PLUS) - { - if (GET_CODE (XEXP (XEXP (consumer, 1), 0)) != MULT) - return 0; - - op0 = XEXP (XEXP (XEXP (consumer, 1), 0), 0); - op1 = XEXP (XEXP (XEXP (consumer, 1), 0), 1); - acc = XEXP (XEXP (consumer, 1), 1); - } - else if (GET_CODE (XEXP (consumer, 1)) == MINUS) - { - if (GET_CODE (XEXP (XEXP (consumer, 1), 1)) != MULT) - return 0; - - op0 = XEXP (XEXP (XEXP (consumer, 1), 1), 0); - op1 = XEXP (XEXP (XEXP (consumer, 1), 1), 1); - acc = XEXP (XEXP (consumer, 1), 0); - } - else - return 0; - - return (reg_overlap_mentioned_p (result, acc) - && !reg_overlap_mentioned_p (result, op0) - && !reg_overlap_mentioned_p (result, op1)); -} - -/* Return non-zero if the consumer (a multiply-accumulate instruction) - has an accumulator dependency on the result of the producer (a - multiplication instruction) and no other dependency on that result. */ -int -arm_mac_accumulator_is_mul_result (rtx producer, rtx consumer) -{ - rtx mul = PATTERN (producer); - rtx mac = PATTERN (consumer); - rtx mul_result; - rtx mac_op0, mac_op1, mac_acc; - - if (GET_CODE (mul) == COND_EXEC) - mul = COND_EXEC_CODE (mul); - if (GET_CODE (mac) == COND_EXEC) - mac = COND_EXEC_CODE (mac); - - /* Check that mul is of the form (set (...) (mult ...)) - and mla is of the form (set (...) (plus (mult ...) (...))). */ - if ((GET_CODE (mul) != SET || GET_CODE (XEXP (mul, 1)) != MULT) - || (GET_CODE (mac) != SET || GET_CODE (XEXP (mac, 1)) != PLUS - || GET_CODE (XEXP (XEXP (mac, 1), 0)) != MULT)) - return 0; - - mul_result = XEXP (mul, 0); - mac_op0 = XEXP (XEXP (XEXP (mac, 1), 0), 0); - mac_op1 = XEXP (XEXP (XEXP (mac, 1), 0), 1); - mac_acc = XEXP (XEXP (mac, 1), 1); - - return (reg_overlap_mentioned_p (mul_result, mac_acc) - && !reg_overlap_mentioned_p (mul_result, mac_op0) - && !reg_overlap_mentioned_p (mul_result, mac_op1)); -} - /* The EABI says test the least significant bit of a guard variable. */ diff --git a/gcc/config/arm/arm.h b/gcc/config/arm/arm.h index 387d2717431..1781b75b34b 100644 --- a/gcc/config/arm/arm.h +++ b/gcc/config/arm/arm.h @@ -645,6 +645,8 @@ extern int prefer_neon_for_64bits; #define BIGGEST_ALIGNMENT (ARM_DOUBLEWORD_ALIGN ? DOUBLEWORD_ALIGNMENT : 32) +#define MALLOC_ABI_ALIGNMENT BIGGEST_ALIGNMENT + /* XXX Blah -- this macro is used directly by libobjc. Since it supports no vector modes, cut out the complexity and fall back on BIGGEST_FIELD_ALIGNMENT. */ diff --git a/gcc/config/arm/arm.md b/gcc/config/arm/arm.md index fceb04c1272..8a482b570ec 100644 --- a/gcc/config/arm/arm.md +++ b/gcc/config/arm/arm.md @@ -245,411 +245,13 @@ (set_attr "length" "4") (set_attr "pool_range" "250")]) -; TYPE attribute is used to classify instructions for use in scheduling. -; -; Instruction classification: -; -; arlo_imm any arithmetic or logical instruction that doesn't have -; a shifted operand and has an immediate operand. This -; excludes MOV, MVN and RSB(S) immediate. -; arlo_reg any arithmetic or logical instruction that doesn't have -; a shifted or an immediate operand. This excludes -; MOV and MVN but includes MOVT. This is also the default. -; arlo_shift any arithmetic or logical instruction that has a source -; operand shifted by a constant. This excludes -; simple shifts. -; arlo_shift_reg as arlo_shift, with the shift amount specified in a -; register. -; block blockage insn, this blocks all functional units. -; branch branch. -; call subroutine call. -; clz count leading zeros (CLZ). -; extend extend instruction (SXTB, SXTH, UXTB, UXTH). -; f_2_r transfer from float to core (no memory needed). -; f_cvt conversion between float and integral. -; f_flag transfer of co-processor flags to the CPSR. -; f_load[d,s] double/single load from memory. Used for VFP unit. -; f_minmax[d,s] double/single floating point minimum/maximum. -; f_rint[d,s] double/single floating point rount to integral. -; f_sel[d,s] double/single floating byte select. -; f_store[d,s] double/single store to memory. Used for VFP unit. -; fadd[d,s] double/single floating-point scalar addition. -; fcmp[d,s] double/single floating-point compare. -; fconst[d,s] double/single load immediate. -; fcpys single precision floating point cpy. -; fdiv[d,s] double/single precision floating point division. -; ffarith[d,s] double/single floating point abs/neg/cpy. -; ffma[d,s] double/single floating point fused multiply-accumulate. -; float floating point arithmetic operation. -; fmac[d,s] double/single floating point multiply-accumulate. -; fmul[d,s] double/single floating point multiply. -; load_byte load byte(s) from memory to arm registers. -; load1 load 1 word from memory to arm registers. -; load2 load 2 words from memory to arm registers. -; load3 load 3 words from memory to arm registers. -; load4 load 4 words from memory to arm registers. -; mla integer multiply accumulate. -; mlas integer multiply accumulate, flag setting. -; mov_imm simple MOV instruction that moves an immediate to -; register. This includes MOVW, but not MOVT. -; mov_reg simple MOV instruction that moves a register to another -; register. This includes MOVW, but not MOVT. -; mov_shift simple MOV instruction, shifted operand by a constant. -; mov_shift_reg simple MOV instruction, shifted operand by a register. -; mul integer multiply. -; muls integer multiply, flag setting. -; mvn_imm inverting move instruction, immediate. -; mvn_reg inverting move instruction, register. -; mvn_shift inverting move instruction, shifted operand by a constant. -; mvn_shift_reg inverting move instruction, shifted operand by a register. -; r_2_f transfer from core to float. -; sdiv signed division. -; shift simple shift operation (LSL, LSR, ASR, ROR) with an -; immediate. -; shift_reg simple shift by a register. -; smlad signed multiply accumulate dual. -; smladx signed multiply accumulate dual reverse. -; smlal signed multiply accumulate long. -; smlald signed multiply accumulate long dual. -; smlals signed multiply accumulate long, flag setting. -; smlalxy signed multiply accumulate, 16x16-bit, 64-bit accumulate. -; smlawx signed multiply accumulate, 32x16-bit, 32-bit accumulate. -; smlawy signed multiply accumulate wide, 32x16-bit, -; 32-bit accumulate. -; smlaxy signed multiply accumulate, 16x16-bit, 32-bit accumulate. -; smlsd signed multiply subtract dual. -; smlsdx signed multiply subtract dual reverse. -; smlsld signed multiply subtract long dual. -; smmla signed most significant word multiply accumulate. -; smmul signed most significant word multiply. -; smmulr signed most significant word multiply, rounded. -; smuad signed dual multiply add. -; smuadx signed dual multiply add reverse. -; smull signed multiply long. -; smulls signed multiply long, flag setting. -; smulwy signed multiply wide, 32x16-bit, 32-bit accumulate. -; smulxy signed multiply, 16x16-bit, 32-bit accumulate. -; smusd signed dual multiply subtract. -; smusdx signed dual multiply subtract reverse. -; store1 store 1 word to memory from arm registers. -; store2 store 2 words to memory from arm registers. -; store3 store 3 words to memory from arm registers. -; store4 store 4 (or more) words to memory from arm registers. -; udiv unsigned division. -; umaal unsigned multiply accumulate accumulate long. -; umlal unsigned multiply accumulate long. -; umlals unsigned multiply accumulate long, flag setting. -; umull unsigned multiply long. -; umulls unsigned multiply long, flag setting. -; -; The classification below is for instructions used by the Wireless MMX -; Technology. Each attribute value is used to classify an instruction of the -; same name or family. -; -; wmmx_tandc -; wmmx_tbcst -; wmmx_textrc -; wmmx_textrm -; wmmx_tinsr -; wmmx_tmcr -; wmmx_tmcrr -; wmmx_tmia -; wmmx_tmiaph -; wmmx_tmiaxy -; wmmx_tmrc -; wmmx_tmrrc -; wmmx_tmovmsk -; wmmx_torc -; wmmx_torvsc -; wmmx_wabs -; wmmx_wdiff -; wmmx_wacc -; wmmx_wadd -; wmmx_waddbhus -; wmmx_waddsubhx -; wmmx_waligni -; wmmx_walignr -; wmmx_wand -; wmmx_wandn -; wmmx_wavg2 -; wmmx_wavg4 -; wmmx_wcmpeq -; wmmx_wcmpgt -; wmmx_wmac -; wmmx_wmadd -; wmmx_wmax -; wmmx_wmerge -; wmmx_wmiawxy -; wmmx_wmiaxy -; wmmx_wmin -; wmmx_wmov -; wmmx_wmul -; wmmx_wmulw -; wmmx_wldr -; wmmx_wor -; wmmx_wpack -; wmmx_wqmiaxy -; wmmx_wqmulm -; wmmx_wqmulwm -; wmmx_wror -; wmmx_wsad -; wmmx_wshufh -; wmmx_wsll -; wmmx_wsra -; wmmx_wsrl -; wmmx_wstr -; wmmx_wsub -; wmmx_wsubaddhx -; wmmx_wunpckeh -; wmmx_wunpckel -; wmmx_wunpckih -; wmmx_wunpckil -; wmmx_wxor - -(define_attr "type" - "arlo_imm,\ - arlo_reg,\ - arlo_shift,\ - arlo_shift_reg,\ - block,\ - branch,\ - call,\ - clz,\ - extend,\ - f_2_r,\ - f_cvt,\ - f_flag,\ - f_loadd,\ - f_loads,\ - f_minmaxd,\ - f_minmaxs,\ - f_rintd,\ - f_rints,\ - f_seld,\ - f_sels,\ - f_stored,\ - f_stores,\ - faddd,\ - fadds,\ - fcmpd,\ - fcmps,\ - fconstd,\ - fconsts,\ - fcpys,\ - fdivd,\ - fdivs,\ - ffarithd,\ - ffariths,\ - ffmad,\ - ffmas,\ - float,\ - fmacd,\ - fmacs,\ - fmuld,\ - fmuls,\ - load_byte,\ - load1,\ - load2,\ - load3,\ - load4,\ - mla,\ - mlas,\ - mov_imm,\ - mov_reg,\ - mov_shift,\ - mov_shift_reg,\ - mul,\ - muls,\ - mvn_imm,\ - mvn_reg,\ - mvn_shift,\ - mvn_shift_reg,\ - r_2_f,\ - sdiv,\ - shift,\ - shift_reg,\ - smlad,\ - smladx,\ - smlal,\ - smlald,\ - smlals,\ - smlalxy,\ - smlawx,\ - smlawy,\ - smlaxy,\ - smlsd,\ - smlsdx,\ - smlsld,\ - smmla,\ - smmul,\ - smmulr,\ - smuad,\ - smuadx,\ - smull,\ - smulls,\ - smulwy,\ - smulxy,\ - smusd,\ - smusdx,\ - store1,\ - store2,\ - store3,\ - store4,\ - udiv,\ - umaal,\ - umlal,\ - umlals,\ - umull,\ - umulls,\ - wmmx_tandc,\ - wmmx_tbcst,\ - wmmx_textrc,\ - wmmx_textrm,\ - wmmx_tinsr,\ - wmmx_tmcr,\ - wmmx_tmcrr,\ - wmmx_tmia,\ - wmmx_tmiaph,\ - wmmx_tmiaxy,\ - wmmx_tmrc,\ - wmmx_tmrrc,\ - wmmx_tmovmsk,\ - wmmx_torc,\ - wmmx_torvsc,\ - wmmx_wabs,\ - wmmx_wabsdiff,\ - wmmx_wacc,\ - wmmx_wadd,\ - wmmx_waddbhus,\ - wmmx_waddsubhx,\ - wmmx_waligni,\ - wmmx_walignr,\ - wmmx_wand,\ - wmmx_wandn,\ - wmmx_wavg2,\ - wmmx_wavg4,\ - wmmx_wcmpeq,\ - wmmx_wcmpgt,\ - wmmx_wmac,\ - wmmx_wmadd,\ - wmmx_wmax,\ - wmmx_wmerge,\ - wmmx_wmiawxy,\ - wmmx_wmiaxy,\ - wmmx_wmin,\ - wmmx_wmov,\ - wmmx_wmul,\ - wmmx_wmulw,\ - wmmx_wldr,\ - wmmx_wor,\ - wmmx_wpack,\ - wmmx_wqmiaxy,\ - wmmx_wqmulm,\ - wmmx_wqmulwm,\ - wmmx_wror,\ - wmmx_wsad,\ - wmmx_wshufh,\ - wmmx_wsll,\ - wmmx_wsra,\ - wmmx_wsrl,\ - wmmx_wstr,\ - wmmx_wsub,\ - wmmx_wsubaddhx,\ - wmmx_wunpckeh,\ - wmmx_wunpckel,\ - wmmx_wunpckih,\ - wmmx_wunpckil,\ - wmmx_wxor" - (const_string "arlo_reg")) - -; Is this an (integer side) multiply with a 32-bit (or smaller) result? -(define_attr "mul32" "no,yes" - (if_then_else - (eq_attr "type" - "smulxy,smlaxy,smulwy,smlawx,mul,muls,mla,mlas,smlawy,smuad,smuadx,\ - smlad,smladx,smusd,smusdx,smlsd,smlsdx,smmul,smmulr,smmla,smlald,smlsld") - (const_string "yes") - (const_string "no"))) - -; Is this an (integer side) multiply with a 64-bit result? -(define_attr "mul64" "no,yes" - (if_then_else - (eq_attr "type" - "smlalxy,umull,umulls,umaal,umlal,umlals,smull,smulls,smlal,smlals") - (const_string "yes") - (const_string "no"))) +;; Instruction classification types +(include "types.md") ; Load scheduling, set from the arm_ld_sched variable ; initialized by arm_option_override() (define_attr "ldsched" "no,yes" (const (symbol_ref "arm_ld_sched"))) -;; Classification of NEON instructions for scheduling purposes. -(define_attr "neon_type" - "neon_int_1,\ - neon_int_2,\ - neon_int_3,\ - neon_int_4,\ - neon_int_5,\ - neon_vqneg_vqabs,\ - neon_vmov,\ - neon_vaba,\ - neon_vsma,\ - neon_vaba_qqq,\ - neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ - neon_mul_qqq_8_16_32_ddd_32,\ - neon_mul_qdd_64_32_long_qqd_16_ddd_32_scalar_64_32_long_scalar,\ - neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ - neon_mla_qqq_8_16,\ - neon_mla_ddd_32_qqd_16_ddd_32_scalar_qdd_64_32_long_scalar_qdd_64_32_long,\ - neon_mla_qqq_32_qqd_32_scalar,\ - neon_mul_ddd_16_scalar_32_16_long_scalar,\ - neon_mul_qqd_32_scalar,\ - neon_mla_ddd_16_scalar_qdd_32_16_long_scalar,\ - neon_shift_1,\ - neon_shift_2,\ - neon_shift_3,\ - neon_vshl_ddd,\ - neon_vqshl_vrshl_vqrshl_qqq,\ - neon_vsra_vrsra,\ - neon_fp_vadd_ddd_vabs_dd,\ - neon_fp_vadd_qqq_vabs_qq,\ - neon_fp_vsum,\ - neon_fp_vmul_ddd,\ - neon_fp_vmul_qqd,\ - neon_fp_vmla_ddd,\ - neon_fp_vmla_qqq,\ - neon_fp_vmla_ddd_scalar,\ - neon_fp_vmla_qqq_scalar,\ - neon_fp_vrecps_vrsqrts_ddd,\ - neon_fp_vrecps_vrsqrts_qqq,\ - neon_bp_simple,\ - neon_bp_2cycle,\ - neon_bp_3cycle,\ - neon_ldr,\ - neon_str,\ - neon_vld1_1_2_regs,\ - neon_vld1_3_4_regs,\ - neon_vld2_2_regs_vld1_vld2_all_lanes,\ - neon_vld2_4_regs,\ - neon_vld3_vld4,\ - neon_vst1_1_2_regs_vst2_2_regs,\ - neon_vst1_3_4_regs,\ - neon_vst2_4_regs_vst3_vst4,\ - neon_vst3_vst4,\ - neon_vld1_vld2_lane,\ - neon_vld3_vld4_lane,\ - neon_vst1_vst2_lane,\ - neon_vst3_vst4_lane,\ - neon_vld3_vld4_all_lanes,\ - neon_mcr,\ - neon_mcr_2_mcrr,\ - neon_mrc,\ - neon_mrrc,\ - neon_ldm_2,\ - neon_stm_2,\ - none" - (const_string "none")) - ; condition codes: this one is used by final_prescan_insn to speed up ; conditionalizing instructions. It saves having to scan the rtl to see if ; it uses or alters the condition codes. @@ -675,9 +277,34 @@ (ior (eq_attr "is_thumb1" "yes") (eq_attr "type" "call")) (const_string "clob") - (if_then_else (eq_attr "neon_type" "none") - (const_string "nocond") - (const_string "unconditional")))) + (if_then_else (eq_attr "type" + "!neon_int_1, neon_int_2, neon_int_3, neon_int_4, neon_int_5,\ + neon_vqneg_vqabs, neon_vmov, neon_vaba, neon_vsma, neon_vaba_qqq,\ + neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + neon_mul_qqq_8_16_32_ddd_32,\ + neon_mul_qdd_64_32_long_qqd_16_ddd_32_scalar_64_32_long_scalar,\ + neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + neon_mla_qqq_8_16,\ + neon_mla_ddd_32_qqd_16_ddd_32_scalar_qdd_64_32_long_scalar_qdd_64_32_long,\ + neon_mla_qqq_32_qqd_32_scalar,\ + neon_mul_ddd_16_scalar_32_16_long_scalar, neon_mul_qqd_32_scalar,\ + neon_mla_ddd_16_scalar_qdd_32_16_long_scalar, neon_shift_1,\ + neon_shift_2, neon_shift_3, neon_vshl_ddd,\ + neon_vqshl_vrshl_vqrshl_qqq, neon_vsra_vrsra,\ + neon_fp_vadd_ddd_vabs_dd, neon_fp_vadd_qqq_vabs_qq, neon_fp_vsum,\ + neon_fp_vmul_ddd, neon_fp_vmul_qqd, neon_fp_vmla_ddd,\ + neon_fp_vmla_qqq, neon_fp_vmla_ddd_scalar, neon_fp_vmla_qqq_scalar,\ + neon_fp_vrecps_vrsqrts_ddd, neon_fp_vrecps_vrsqrts_qqq,\ + neon_bp_simple, neon_bp_2cycle, neon_bp_3cycle, neon_ldr, neon_str,\ + neon_vld1_1_2_regs, neon_vld1_3_4_regs,\ + neon_vld2_2_regs_vld1_vld2_all_lanes, neon_vld2_4_regs,\ + neon_vld3_vld4, neon_vst1_1_2_regs_vst2_2_regs, neon_vst1_3_4_regs,\ + neon_vst2_4_regs_vst3_vst4, neon_vst3_vst4, neon_vld1_vld2_lane,\ + neon_vld3_vld4_lane, neon_vst1_vst2_lane, neon_vst3_vst4_lane,\ + neon_vld3_vld4_all_lanes, neon_mcr, neon_mcr_2_mcrr, neon_mrc,\ + neon_mrrc, neon_ldm_2, neon_stm_2") + (const_string "nocond") + (const_string "unconditional")))) ; Predicable means that the insn can be conditionally executed based on ; an automatically added predicate (additional patterns are generated by @@ -703,8 +330,11 @@ ; than one on the main cpu execution unit. (define_attr "core_cycles" "single,multi" (if_then_else (eq_attr "type" - "arlo_imm, arlo_reg,\ - extend, shift, arlo_shift, float, fdivd, fdivs,\ + "adc_imm, adc_reg, adcs_imm, adcs_reg, adr, alu_ext, alu_imm, alu_reg,\ + alu_shift_imm, alu_shift_reg, alus_ext, alus_imm, alus_reg,\ + alus_shift_imm, alus_shift_reg, bfm, csel, rev, logic_imm, logic_reg,\ + logic_shift_imm, logic_shift_reg, logics_imm, logics_reg,\ + logics_shift_imm, logics_shift_reg, extend, shift_imm, float, fcsel,\ wmmx_wor, wmmx_wxor, wmmx_wand, wmmx_wandn, wmmx_wmov, wmmx_tmcrr,\ wmmx_tmrrc, wmmx_wldr, wmmx_wstr, wmmx_tmcr, wmmx_tmrc, wmmx_wadd,\ wmmx_wsub, wmmx_wmul, wmmx_wmac, wmmx_wavg2, wmmx_tinsr, wmmx_textrm,\ @@ -830,7 +460,8 @@ ] "TARGET_THUMB1" "add\\t%Q0, %Q0, %Q2\;adc\\t%R0, %R0, %R2" - [(set_attr "length" "4")] + [(set_attr "length" "4") + (set_attr "type" "multiple")] ) (define_insn_and_split "*arm_adddi3" @@ -858,7 +489,8 @@ operands[2] = gen_lowpart (SImode, operands[2]); }" [(set_attr "conds" "clob") - (set_attr "length" "8")] + (set_attr "length" "8") + (set_attr "type" "multiple")] ) (define_insn_and_split "*adddi_sesidi_di" @@ -887,7 +519,8 @@ operands[2] = gen_lowpart (SImode, operands[2]); }" [(set_attr "conds" "clob") - (set_attr "length" "8")] + (set_attr "length" "8") + (set_attr "type" "multiple")] ) (define_insn_and_split "*adddi_zesidi_di" @@ -914,7 +547,8 @@ operands[2] = gen_lowpart (SImode, operands[2]); }" [(set_attr "conds" "clob") - (set_attr "length" "8")] + (set_attr "length" "8") + (set_attr "type" "multiple")] ) (define_expand "addsi3" @@ -989,8 +623,8 @@ (set_attr "predicable_short_it" "yes,yes,yes,yes,no,no,no,no,no,no,no,no,no,no,no") (set_attr "arch" "t2,t2,t2,t2,*,*,*,t2,t2,*,*,a,t2,t2,*") (set (attr "type") (if_then_else (match_operand 2 "const_int_operand" "") - (const_string "arlo_imm") - (const_string "arlo_reg"))) + (const_string "alu_imm") + (const_string "alu_reg"))) ] ) @@ -1040,7 +674,9 @@ operands[3] = GEN_INT (offset); operands[2] = GEN_INT (INTVAL (operands[2]) - offset); } - [(set_attr "length" "2,2,2,2,2,2,2,4,4,4")] + [(set_attr "length" "2,2,2,2,2,2,2,4,4,4") + (set_attr "type" "alus_imm,alus_imm,alus_reg,alus_reg,alus_reg, + alus_reg,alus_reg,multiple,multiple,multiple")] ) ;; Reloading and elimination of the frame pointer can @@ -1071,7 +707,7 @@ sub%.\\t%0, %1, #%n2 add%.\\t%0, %1, %2" [(set_attr "conds" "set") - (set_attr "type" "arlo_imm,arlo_imm,*")] + (set_attr "type" "alus_imm,alus_imm,alus_reg")] ) (define_insn "*addsi3_compare0_scratch" @@ -1087,8 +723,7 @@ cmn%?\\t%0, %1" [(set_attr "conds" "set") (set_attr "predicable" "yes") - (set_attr "type" "arlo_imm,arlo_imm,*") - ] + (set_attr "type" "alus_imm,alus_imm,alus_reg")] ) (define_insn "*compare_negsi_si" @@ -1102,7 +737,8 @@ (set_attr "predicable" "yes") (set_attr "arch" "t2,*") (set_attr "length" "2,4") - (set_attr "predicable_short_it" "yes,no")] + (set_attr "predicable_short_it" "yes,no") + (set_attr "type" "alus_reg")] ) ;; This is the canonicalization of addsi3_compare0_for_combiner when the @@ -1119,7 +755,8 @@ "@ add%.\\t%0, %1, %3 sub%.\\t%0, %1, #%n3" - [(set_attr "conds" "set")] + [(set_attr "conds" "set") + (set_attr "type" "alus_reg")] ) ;; Convert the sequence @@ -1177,7 +814,7 @@ sub%.\\t%0, %1, #%n2 add%.\\t%0, %1, %2" [(set_attr "conds" "set") - (set_attr "type" "arlo_imm,arlo_imm,*")] + (set_attr "type" "alus_imm,alus_imm,alus_reg")] ) (define_insn "*addsi3_compare_op2" @@ -1194,7 +831,7 @@ add%.\\t%0, %1, %2 sub%.\\t%0, %1, #%n2" [(set_attr "conds" "set") - (set_attr "type" "arlo_imm,arlo_imm,*")] + (set_attr "type" "alus_imm,alus_imm,alus_reg")] ) (define_insn "*compare_addsi2_op0" @@ -1215,7 +852,7 @@ (set_attr "arch" "t2,t2,*,*,*") (set_attr "predicable_short_it" "yes,yes,no,no,no") (set_attr "length" "2,2,4,4,4") - (set_attr "type" "arlo_imm,*,arlo_imm,arlo_imm,*")] + (set_attr "type" "alus_imm,alus_reg,alus_imm,alus_imm,alus_reg")] ) (define_insn "*compare_addsi2_op1" @@ -1236,8 +873,7 @@ (set_attr "arch" "t2,t2,*,*,*") (set_attr "predicable_short_it" "yes,yes,no,no,no") (set_attr "length" "2,2,4,4,4") - (set_attr "type" - "arlo_imm,*,arlo_imm,arlo_imm,*")] + (set_attr "type" "alus_imm,alus_reg,alus_imm,alus_imm,alus_reg")] ) (define_insn "*addsi3_carryin_" @@ -1254,7 +890,8 @@ (set_attr "predicable" "yes") (set_attr "arch" "t2,*,*") (set_attr "length" "4") - (set_attr "predicable_short_it" "yes,no,no")] + (set_attr "predicable_short_it" "yes,no,no") + (set_attr "type" "adc_reg,adc_reg,adc_imm")] ) (define_insn "*addsi3_carryin_alt2_" @@ -1271,7 +908,8 @@ (set_attr "predicable" "yes") (set_attr "arch" "t2,*,*") (set_attr "length" "4") - (set_attr "predicable_short_it" "yes,no,no")] + (set_attr "predicable_short_it" "yes,no,no") + (set_attr "type" "adc_reg,adc_reg,adc_imm")] ) (define_insn "*addsi3_carryin_shift_" @@ -1288,8 +926,8 @@ (set_attr "predicable" "yes") (set_attr "predicable_short_it" "no") (set (attr "type") (if_then_else (match_operand 4 "const_int_operand" "") - (const_string "arlo_shift") - (const_string "arlo_shift_reg")))] + (const_string "alu_shift_imm") + (const_string "alu_shift_reg")))] ) (define_insn "*addsi3_carryin_clobercc_" @@ -1300,7 +938,8 @@ (clobber (reg:CC CC_REGNUM))] "TARGET_32BIT" "adc%.\\t%0, %1, %2" - [(set_attr "conds" "set")] + [(set_attr "conds" "set") + (set_attr "type" "adcs_reg")] ) (define_insn "*subsi3_carryin" @@ -1315,7 +954,8 @@ [(set_attr "conds" "use") (set_attr "arch" "*,a") (set_attr "predicable" "yes") - (set_attr "predicable_short_it" "no")] + (set_attr "predicable_short_it" "no") + (set_attr "type" "adc_reg,adc_imm")] ) (define_insn "*subsi3_carryin_const" @@ -1325,7 +965,8 @@ (ltu:SI (reg:CC_C CC_REGNUM) (const_int 0))))] "TARGET_32BIT" "sbc\\t%0, %1, #%B2" - [(set_attr "conds" "use")] + [(set_attr "conds" "use") + (set_attr "type" "adc_imm")] ) (define_insn "*subsi3_carryin_compare" @@ -1338,7 +979,8 @@ (ltu:SI (reg:CC_C CC_REGNUM) (const_int 0))))] "TARGET_32BIT" "sbcs\\t%0, %1, %2" - [(set_attr "conds" "set")] + [(set_attr "conds" "set") + (set_attr "type" "adcs_reg")] ) (define_insn "*subsi3_carryin_compare_const" @@ -1351,7 +993,8 @@ (ltu:SI (reg:CC_C CC_REGNUM) (const_int 0))))] "TARGET_32BIT" "sbcs\\t%0, %1, #%B2" - [(set_attr "conds" "set")] + [(set_attr "conds" "set") + (set_attr "type" "adcs_imm")] ) (define_insn "*subsi3_carryin_shift" @@ -1367,8 +1010,8 @@ [(set_attr "conds" "use") (set_attr "predicable" "yes") (set (attr "type") (if_then_else (match_operand 4 "const_int_operand" "") - (const_string "arlo_shift") - (const_string "arlo_shift_reg")))] + (const_string "alu_shift_imm") + (const_string "alu_shift_reg")))] ) (define_insn "*rsbsi3_carryin_shift" @@ -1384,8 +1027,8 @@ [(set_attr "conds" "use") (set_attr "predicable" "yes") (set (attr "type") (if_then_else (match_operand 4 "const_int_operand" "") - (const_string "arlo_shift") - (const_string "arlo_shift_reg")))] + (const_string "alu_shift_imm") + (const_string "alu_shift_reg")))] ) ; transform ((x << y) - 1) to ~(~(x-1) << y) Where X is a constant. @@ -1458,7 +1101,8 @@ operands[2] = gen_lowpart (SImode, operands[2]); } [(set_attr "conds" "clob") - (set_attr "length" "8")] + (set_attr "length" "8") + (set_attr "type" "multiple")] ) (define_insn "*thumb_subdi3" @@ -1468,7 +1112,8 @@ (clobber (reg:CC CC_REGNUM))] "TARGET_THUMB1" "sub\\t%Q0, %Q0, %Q2\;sbc\\t%R0, %R0, %R2" - [(set_attr "length" "4")] + [(set_attr "length" "4") + (set_attr "type" "multiple")] ) (define_insn_and_split "*subdi_di_zesidi" @@ -1493,7 +1138,8 @@ operands[5] = GEN_INT (~0); } [(set_attr "conds" "clob") - (set_attr "length" "8")] + (set_attr "length" "8") + (set_attr "type" "multiple")] ) (define_insn_and_split "*subdi_di_sesidi" @@ -1519,7 +1165,8 @@ operands[1] = gen_lowpart (SImode, operands[1]); } [(set_attr "conds" "clob") - (set_attr "length" "8")] + (set_attr "length" "8") + (set_attr "type" "multiple")] ) (define_insn_and_split "*subdi_zesidi_di" @@ -1545,7 +1192,8 @@ operands[1] = gen_lowpart (SImode, operands[1]); } [(set_attr "conds" "clob") - (set_attr "length" "8")] + (set_attr "length" "8") + (set_attr "type" "multiple")] ) (define_insn_and_split "*subdi_sesidi_di" @@ -1574,7 +1222,8 @@ operands[1] = gen_lowpart (SImode, operands[1]); } [(set_attr "conds" "clob") - (set_attr "length" "8")] + (set_attr "length" "8") + (set_attr "type" "multiple")] ) (define_insn_and_split "*subdi_zesidi_zesidi" @@ -1597,7 +1246,8 @@ operands[0] = gen_lowpart (SImode, operands[0]); } [(set_attr "conds" "clob") - (set_attr "length" "8")] + (set_attr "length" "8") + (set_attr "type" "multiple")] ) (define_expand "subsi3" @@ -1628,7 +1278,9 @@ "TARGET_THUMB1" "sub\\t%0, %1, %2" [(set_attr "length" "2") - (set_attr "conds" "set")]) + (set_attr "conds" "set") + (set_attr "type" "alus_reg")] +) ; ??? Check Thumb-2 split length (define_insn_and_split "*arm_subsi3_insn" @@ -1658,7 +1310,7 @@ (set_attr "arch" "t2,t2,t2,t2,*,*,*,*,*") (set_attr "predicable" "yes") (set_attr "predicable_short_it" "yes,yes,yes,yes,no,no,no,no,no") - (set_attr "type" "*,*,*,*,arlo_imm,arlo_imm,*,*,arlo_imm")] + (set_attr "type" "alu_reg,alu_reg,alu_reg,alu_reg,alu_imm,alu_imm,alu_reg,alu_reg,multiple")] ) (define_peephole2 @@ -1688,7 +1340,7 @@ sub%.\\t%0, %1, %2 rsb%.\\t%0, %2, %1" [(set_attr "conds" "set") - (set_attr "type" "arlo_imm,*,*")] + (set_attr "type" "alus_imm,alus_reg,alus_reg")] ) (define_insn "subsi3_compare" @@ -1703,7 +1355,7 @@ sub%.\\t%0, %1, %2 rsb%.\\t%0, %2, %1" [(set_attr "conds" "set") - (set_attr "type" "arlo_imm,*,*")] + (set_attr "type" "alus_imm,alus_reg,alus_reg")] ) (define_expand "subsf3" @@ -1725,6 +1377,20 @@ ;; Multiplication insns +(define_expand "mulhi3" + [(set (match_operand:HI 0 "s_register_operand" "") + (mult:HI (match_operand:HI 1 "s_register_operand" "") + (match_operand:HI 2 "s_register_operand" "")))] + "TARGET_DSP_MULTIPLY" + " + { + rtx result = gen_reg_rtx (SImode); + emit_insn (gen_mulhisi3 (result, operands[1], operands[2])); + emit_move_insn (operands[0], gen_lowpart (HImode, result)); + DONE; + }" +) + (define_expand "mulsi3" [(set (match_operand:SI 0 "s_register_operand" "") (mult:SI (match_operand:SI 2 "s_register_operand" "") @@ -2496,7 +2162,7 @@ gen_highpart_mode (SImode, DImode, operands[2])); }" - [(set_attr "neon_type" "neon_int_1,neon_int_1,*,*,*,*,neon_int_1,neon_int_1") + [(set_attr "type" "neon_int_1,neon_int_1,multiple,multiple,multiple,multiple,neon_int_1,neon_int_1") (set_attr "arch" "neon_for_64bits,neon_for_64bits,*,*,*,*, avoid_neon_for_64bits,avoid_neon_for_64bits") (set_attr "length" "*,*,8,8,8,8,*,*") @@ -2521,7 +2187,8 @@ operands[0] = gen_lowpart (SImode, operands[0]); operands[1] = gen_lowpart (SImode, operands[1]); }" - [(set_attr "length" "8")] + [(set_attr "length" "8") + (set_attr "type" "multiple")] ) (define_insn "*anddi_sesdi_di" @@ -2531,7 +2198,8 @@ (match_operand:DI 1 "s_register_operand" "0,r")))] "TARGET_32BIT" "#" - [(set_attr "length" "8")] + [(set_attr "length" "8") + (set_attr "type" "multiple")] ) (define_expand "andsi3" @@ -2638,8 +2306,7 @@ [(set_attr "length" "4,4,4,4,16") (set_attr "predicable" "yes") (set_attr "predicable_short_it" "no,yes,no,no,no") - (set_attr "type" - "arlo_imm,arlo_imm,*,*,arlo_imm")] + (set_attr "type" "logic_imm,logic_imm,logic_reg,logic_reg,logic_imm")] ) (define_insn "*thumb1_andsi3_insn" @@ -2649,7 +2316,7 @@ "TARGET_THUMB1" "and\\t%0, %2" [(set_attr "length" "2") - (set_attr "type" "arlo_imm") + (set_attr "type" "logic_imm") (set_attr "conds" "set")]) (define_insn "*andsi3_compare0" @@ -2666,7 +2333,7 @@ bic%.\\t%0, %1, #%B2 and%.\\t%0, %1, %2" [(set_attr "conds" "set") - (set_attr "type" "arlo_imm,arlo_imm,*")] + (set_attr "type" "logics_imm,logics_imm,logics_reg")] ) (define_insn "*andsi3_compare0_scratch" @@ -2682,7 +2349,7 @@ bic%.\\t%2, %0, #%B1 tst%?\\t%0, %1" [(set_attr "conds" "set") - (set_attr "type" "arlo_imm,arlo_imm,*")] + (set_attr "type" "logics_imm,logics_imm,logics_reg")] ) (define_insn "*zeroextractsi_compare0_scratch" @@ -2706,7 +2373,7 @@ [(set_attr "conds" "set") (set_attr "predicable" "yes") (set_attr "predicable_short_it" "no") - (set_attr "type" "arlo_imm")] + (set_attr "type" "logics_imm")] ) (define_insn_and_split "*ne_zeroextractsi" @@ -2743,7 +2410,8 @@ (set (attr "length") (if_then_else (eq_attr "is_thumb" "yes") (const_int 12) - (const_int 8)))] + (const_int 8))) + (set_attr "type" "multiple")] ) (define_insn_and_split "*ne_zeroextractsi_shifted" @@ -2768,7 +2436,8 @@ operands[2] = GEN_INT (32 - INTVAL (operands[2])); " [(set_attr "conds" "clob") - (set_attr "length" "8")] + (set_attr "length" "8") + (set_attr "type" "multiple")] ) (define_insn_and_split "*ite_ne_zeroextractsi" @@ -2806,7 +2475,8 @@ << INTVAL (operands[3])); " [(set_attr "conds" "clob") - (set_attr "length" "8")] + (set_attr "length" "8") + (set_attr "type" "multiple")] ) (define_insn_and_split "*ite_ne_zeroextractsi_shifted" @@ -2833,7 +2503,8 @@ operands[2] = GEN_INT (32 - INTVAL (operands[2])); " [(set_attr "conds" "clob") - (set_attr "length" "8")] + (set_attr "length" "8") + (set_attr "type" "multiple")] ) (define_split @@ -3134,7 +2805,8 @@ "bfc%?\t%0, %2, %1" [(set_attr "length" "4") (set_attr "predicable" "yes") - (set_attr "predicable_short_it" "no")] + (set_attr "predicable_short_it" "no") + (set_attr "type" "bfm")] ) (define_insn "insv_t2" @@ -3146,7 +2818,8 @@ "bfi%?\t%0, %3, %2, %1" [(set_attr "length" "4") (set_attr "predicable" "yes") - (set_attr "predicable_short_it" "no")] + (set_attr "predicable_short_it" "no") + (set_attr "type" "bfm")] ) ; constants for op 2 will never be given to these patterns. @@ -3171,7 +2844,8 @@ operands[2] = gen_lowpart (SImode, operands[2]); }" [(set_attr "length" "8") - (set_attr "predicable" "yes")] + (set_attr "predicable" "yes") + (set_attr "type" "multiple")] ) (define_insn_and_split "*anddi_notzesidi_di" @@ -3199,7 +2873,8 @@ }" [(set_attr "length" "4,8") (set_attr "predicable" "yes") - (set_attr "predicable_short_it" "no")] + (set_attr "predicable_short_it" "no") + (set_attr "type" "multiple")] ) (define_insn_and_split "*anddi_notsesidi_di" @@ -3223,7 +2898,8 @@ }" [(set_attr "length" "8") (set_attr "predicable" "yes") - (set_attr "predicable_short_it" "no")] + (set_attr "predicable_short_it" "no") + (set_attr "type" "multiple")] ) (define_insn "andsi_notsi_si" @@ -3233,7 +2909,8 @@ "TARGET_32BIT" "bic%?\\t%0, %1, %2" [(set_attr "predicable" "yes") - (set_attr "predicable_short_it" "no")] + (set_attr "predicable_short_it" "no") + (set_attr "type" "logic_reg")] ) (define_insn "thumb1_bicsi3" @@ -3243,7 +2920,9 @@ "TARGET_THUMB1" "bic\\t%0, %1" [(set_attr "length" "2") - (set_attr "conds" "set")]) + (set_attr "conds" "set") + (set_attr "type" "logics_reg")] +) (define_insn "andsi_not_shiftsi_si" [(set (match_operand:SI 0 "s_register_operand" "=r") @@ -3256,8 +2935,8 @@ [(set_attr "predicable" "yes") (set_attr "shift" "2") (set (attr "type") (if_then_else (match_operand 3 "const_int_operand" "") - (const_string "arlo_shift") - (const_string "arlo_shift_reg")))] + (const_string "logic_shift_imm") + (const_string "logic_shift_reg")))] ) (define_insn "*andsi_notsi_si_compare0" @@ -3270,7 +2949,8 @@ (and:SI (not:SI (match_dup 2)) (match_dup 1)))] "TARGET_32BIT" "bic%.\\t%0, %1, %2" - [(set_attr "conds" "set")] + [(set_attr "conds" "set") + (set_attr "type" "logics_shift_reg")] ) (define_insn "*andsi_notsi_si_compare0_scratch" @@ -3282,7 +2962,8 @@ (clobber (match_scratch:SI 0 "=r"))] "TARGET_32BIT" "bic%.\\t%0, %1, %2" - [(set_attr "conds" "set")] + [(set_attr "conds" "set") + (set_attr "type" "logics_shift_reg")] ) (define_expand "iordi3" @@ -3331,7 +3012,7 @@ gen_highpart_mode (SImode, DImode, operands[2])); }" - [(set_attr "neon_type" "neon_int_1,neon_int_1,*,*,*,*,neon_int_1,neon_int_1") + [(set_attr "type" "neon_int_1,neon_int_1,multiple,multiple,multiple,multiple,neon_int_1,neon_int_1") (set_attr "length" "*,*,8,8,8,8,*,*") (set_attr "arch" "neon_for_64bits,neon_for_64bits,*,*,*,*,avoid_neon_for_64bits,avoid_neon_for_64bits")] ) @@ -3347,7 +3028,8 @@ #" [(set_attr "length" "4,8") (set_attr "predicable" "yes") - (set_attr "predicable_short_it" "no")] + (set_attr "predicable_short_it" "no") + (set_attr "type" "logic_reg,multiple")] ) (define_insn "*iordi_sesidi_di" @@ -3358,7 +3040,8 @@ "TARGET_32BIT" "#" [(set_attr "length" "8") - (set_attr "predicable" "yes")] + (set_attr "predicable" "yes") + (set_attr "type" "multiple")] ) (define_expand "iorsi3" @@ -3416,7 +3099,7 @@ (set_attr "arch" "32,t2,t2,32,32") (set_attr "predicable" "yes") (set_attr "predicable_short_it" "no,yes,no,no,no") - (set_attr "type" "arlo_imm,*,arlo_imm,*,*")] + (set_attr "type" "logic_imm,logic_reg,logic_imm,logic_reg,logic_reg")] ) (define_insn "*thumb1_iorsi3_insn" @@ -3426,7 +3109,8 @@ "TARGET_THUMB1" "orr\\t%0, %2" [(set_attr "length" "2") - (set_attr "conds" "set")]) + (set_attr "conds" "set") + (set_attr "type" "logics_reg")]) (define_peephole2 [(match_scratch:SI 3 "r") @@ -3451,7 +3135,7 @@ "TARGET_32BIT" "orr%.\\t%0, %1, %2" [(set_attr "conds" "set") - (set_attr "type" "arlo_imm,*")] + (set_attr "type" "logics_imm,logics_reg")] ) (define_insn "*iorsi3_compare0_scratch" @@ -3463,7 +3147,7 @@ "TARGET_32BIT" "orr%.\\t%0, %1, %2" [(set_attr "conds" "set") - (set_attr "type" "arlo_imm,*")] + (set_attr "type" "logics_imm,logics_reg")] ) (define_expand "xordi3" @@ -3510,7 +3194,7 @@ }" [(set_attr "length" "*,8,8,8,8,*") - (set_attr "neon_type" "neon_int_1,*,*,*,*,neon_int_1") + (set_attr "type" "neon_int_1,multiple,multiple,multiple,multiple,neon_int_1") (set_attr "arch" "neon_for_64bits,*,*,*,*,avoid_neon_for_64bits")] ) @@ -3525,7 +3209,8 @@ #" [(set_attr "length" "4,8") (set_attr "predicable" "yes") - (set_attr "predicable_short_it" "no")] + (set_attr "predicable_short_it" "no") + (set_attr "type" "logic_reg")] ) (define_insn "*xordi_sesidi_di" @@ -3536,7 +3221,8 @@ "TARGET_32BIT" "#" [(set_attr "length" "8") - (set_attr "predicable" "yes")] + (set_attr "predicable" "yes") + (set_attr "type" "multiple")] ) (define_expand "xorsi3" @@ -3589,7 +3275,7 @@ [(set_attr "length" "4,4,4,16") (set_attr "predicable" "yes") (set_attr "predicable_short_it" "no,yes,no,no") - (set_attr "type" "arlo_imm,*,*,*")] + (set_attr "type" "logic_imm,logic_reg,logic_reg,multiple")] ) (define_insn "*thumb1_xorsi3_insn" @@ -3600,7 +3286,7 @@ "eor\\t%0, %2" [(set_attr "length" "2") (set_attr "conds" "set") - (set_attr "type" "arlo_imm")] + (set_attr "type" "logics_reg")] ) (define_insn "*xorsi3_compare0" @@ -3613,7 +3299,7 @@ "TARGET_32BIT" "eor%.\\t%0, %1, %2" [(set_attr "conds" "set") - (set_attr "type" "arlo_imm,*")] + (set_attr "type" "logics_imm,logics_reg")] ) (define_insn "*xorsi3_compare0_scratch" @@ -3624,7 +3310,7 @@ "TARGET_32BIT" "teq%?\\t%0, %1" [(set_attr "conds" "set") - (set_attr "type" "arlo_imm,*")] + (set_attr "type" "logics_imm,logics_reg")] ) ; By splitting (IOR (AND (NOT A) (NOT B)) C) as D = AND (IOR A B) (NOT C), @@ -3658,7 +3344,8 @@ [(set_attr "length" "8") (set_attr "ce_count" "2") (set_attr "predicable" "yes") - (set_attr "predicable_short_it" "no")] + (set_attr "predicable_short_it" "no") + (set_attr "type" "multiple")] ) ; ??? Are these four splitters still beneficial when the Thumb-2 bitfield @@ -3795,7 +3482,8 @@ "TARGET_32BIT" "bic%?\\t%0, %1, %1, asr #31" [(set_attr "predicable" "yes") - (set_attr "predicable_short_it" "no")] + (set_attr "predicable_short_it" "no") + (set_attr "type" "logic_shift_reg")] ) (define_insn "*smax_m1" @@ -3805,7 +3493,8 @@ "TARGET_32BIT" "orr%?\\t%0, %1, %1, asr #31" [(set_attr "predicable" "yes") - (set_attr "predicable_short_it" "no")] + (set_attr "predicable_short_it" "no") + (set_attr "type" "logic_shift_reg")] ) (define_insn_and_split "*arm_smax_insn" @@ -3826,7 +3515,8 @@ (match_dup 2)))] "" [(set_attr "conds" "clob") - (set_attr "length" "8,12")] + (set_attr "length" "8,12") + (set_attr "type" "multiple")] ) (define_expand "sminsi3" @@ -3854,7 +3544,8 @@ "TARGET_32BIT" "and%?\\t%0, %1, %1, asr #31" [(set_attr "predicable" "yes") - (set_attr "predicable_short_it" "no")] + (set_attr "predicable_short_it" "no") + (set_attr "type" "logic_shift_reg")] ) (define_insn_and_split "*arm_smin_insn" @@ -3875,7 +3566,8 @@ (match_dup 2)))] "" [(set_attr "conds" "clob") - (set_attr "length" "8,12")] + (set_attr "length" "8,12") + (set_attr "type" "multiple,multiple")] ) (define_expand "umaxsi3" @@ -3907,7 +3599,8 @@ (match_dup 2)))] "" [(set_attr "conds" "clob") - (set_attr "length" "8,8,12")] + (set_attr "length" "8,8,12") + (set_attr "type" "store1")] ) (define_expand "uminsi3" @@ -3939,7 +3632,8 @@ (match_dup 2)))] "" [(set_attr "conds" "clob") - (set_attr "length" "8,8,12")] + (set_attr "length" "8,8,12") + (set_attr "type" "store1")] ) (define_insn "*store_minmaxsi" @@ -4008,7 +3702,8 @@ (set (attr "length") (if_then_else (eq_attr "is_thumb" "yes") (const_int 14) - (const_int 12)))] + (const_int 12))) + (set_attr "type" "multiple")] ) ; Reject the frame pointer in operand[1], since reloading this after @@ -4034,8 +3729,7 @@ (match_dup 2)))) (cond_exec (match_op_dup 5 [(reg:CC CC_REGNUM) (const_int 0)]) (set (match_dup 0) - (minus:SI (match_dup 1) - (match_dup 3))))] + (match_dup 6)))] { enum machine_mode mode = SELECT_CC_MODE (GET_CODE (operands[1]), operands[2], operands[3]); @@ -4048,12 +3742,17 @@ else rc = reverse_condition (rc); operands[5] = gen_rtx_fmt_ee (rc, SImode, operands[2], operands[3]); + if (CONST_INT_P (operands[3])) + operands[6] = plus_constant (SImode, operands[1], -INTVAL (operands[3])); + else + operands[6] = gen_rtx_MINUS (SImode, operands[1], operands[3]); } [(set_attr "conds" "clob") (set (attr "length") (if_then_else (eq_attr "is_thumb" "yes") (const_int 14) - (const_int 12)))] + (const_int 12))) + (set_attr "type" "multiple")] ) (define_code_iterator SAT [smin smax]) @@ -4082,7 +3781,8 @@ return "usat%?\t%0, %1, %3"; } [(set_attr "predicable" "yes") - (set_attr "predicable_short_it" "no")] + (set_attr "predicable_short_it" "no") + (set_attr "type" "alus_imm")] ) (define_insn "*satsi__shift" @@ -4110,7 +3810,7 @@ [(set_attr "predicable" "yes") (set_attr "predicable_short_it" "no") (set_attr "shift" "3") - (set_attr "type" "arlo_shift")]) + (set_attr "type" "logic_shift_reg")]) ;; Shift and rotation insns @@ -4188,7 +3888,8 @@ operands[1] = gen_lowpart (SImode, operands[1]); } [(set_attr "conds" "clob") - (set_attr "length" "8")] + (set_attr "length" "8") + (set_attr "type" "multiple")] ) (define_expand "ashlsi3" @@ -4213,7 +3914,7 @@ "TARGET_THUMB1" "lsl\\t%0, %1, %2" [(set_attr "length" "2") - (set_attr "type" "shift,shift_reg") + (set_attr "type" "shift_imm,shift_reg") (set_attr "conds" "set")]) (define_expand "ashrdi3" @@ -4285,7 +3986,8 @@ operands[1] = gen_lowpart (SImode, operands[1]); } [(set_attr "conds" "clob") - (set_attr "length" "8")] + (set_attr "length" "8") + (set_attr "type" "multiple")] ) (define_insn "*rrx" @@ -4318,7 +4020,7 @@ "TARGET_THUMB1" "asr\\t%0, %1, %2" [(set_attr "length" "2") - (set_attr "type" "shift,shift_reg") + (set_attr "type" "shift_imm,shift_reg") (set_attr "conds" "set")]) (define_expand "lshrdi3" @@ -4390,7 +4092,8 @@ operands[1] = gen_lowpart (SImode, operands[1]); } [(set_attr "conds" "clob") - (set_attr "length" "8")] + (set_attr "length" "8") + (set_attr "type" "multiple")] ) (define_expand "lshrsi3" @@ -4415,7 +4118,7 @@ "TARGET_THUMB1" "lsr\\t%0, %1, %2" [(set_attr "length" "2") - (set_attr "type" "shift,shift_reg") + (set_attr "type" "shift_imm,shift_reg") (set_attr "conds" "set")]) (define_expand "rotlsi3" @@ -4477,7 +4180,7 @@ (set_attr "predicable_short_it" "yes,no,no") (set_attr "length" "4") (set_attr "shift" "1") - (set_attr "type" "arlo_shift_reg,arlo_shift,arlo_shift_reg")] + (set_attr "type" "alu_shift_reg,alu_shift_imm,alu_shift_reg")] ) (define_insn "*shiftsi3_compare" @@ -4492,7 +4195,7 @@ "* return arm_output_shift(operands, 1);" [(set_attr "conds" "set") (set_attr "shift" "1") - (set_attr "type" "arlo_shift,arlo_shift_reg")] + (set_attr "type" "alus_shift_imm,alus_shift_reg")] ) (define_insn "*shiftsi3_compare0" @@ -4507,7 +4210,7 @@ "* return arm_output_shift(operands, 1);" [(set_attr "conds" "set") (set_attr "shift" "1") - (set_attr "type" "arlo_shift,arlo_shift_reg")] + (set_attr "type" "alus_shift_imm,alus_shift_reg")] ) (define_insn "*shiftsi3_compare0_scratch" @@ -4521,7 +4224,7 @@ "* return arm_output_shift(operands, 1);" [(set_attr "conds" "set") (set_attr "shift" "1") - (set_attr "type" "shift,shift_reg")] + (set_attr "type" "shift_imm,shift_reg")] ) (define_insn "*not_shiftsi" @@ -4863,7 +4566,8 @@ "sbfx%?\t%0, %1, %3, %2" [(set_attr "length" "4") (set_attr "predicable" "yes") - (set_attr "predicable_short_it" "no")] + (set_attr "predicable_short_it" "no") + (set_attr "type" "bfm")] ) (define_insn "extzv_t2" @@ -4875,7 +4579,8 @@ "ubfx%?\t%0, %1, %3, %2" [(set_attr "length" "4") (set_attr "predicable" "yes") - (set_attr "predicable_short_it" "no")] + (set_attr "predicable_short_it" "no") + (set_attr "type" "bfm")] ) @@ -4941,7 +4646,8 @@ operands[1] = gen_lowpart (SImode, operands[1]); } [(set_attr "conds" "clob") - (set_attr "length" "8")] + (set_attr "length" "8") + (set_attr "type" "multiple")] ) (define_insn "*thumb1_negdi2" @@ -4950,7 +4656,8 @@ (clobber (reg:CC CC_REGNUM))] "TARGET_THUMB1" "mov\\t%R0, #0\;neg\\t%Q0, %Q1\;sbc\\t%R0, %R1" - [(set_attr "length" "6")] + [(set_attr "length" "6") + (set_attr "type" "multiple")] ) (define_expand "negsi2" @@ -4968,7 +4675,8 @@ [(set_attr "predicable" "yes") (set_attr "predicable_short_it" "yes,no") (set_attr "arch" "t2,*") - (set_attr "length" "4")] + (set_attr "length" "4") + (set_attr "type" "alu_reg")] ) (define_insn "*thumb1_negsi2" @@ -4976,7 +4684,8 @@ (neg:SI (match_operand:SI 1 "register_operand" "l")))] "TARGET_THUMB1" "neg\\t%0, %1" - [(set_attr "length" "2")] + [(set_attr "length" "2") + (set_attr "type" "alu_imm")] ) (define_expand "negsf2" @@ -5034,7 +4743,8 @@ DONE; } [(set_attr "length" "8,8,4,4") - (set_attr "arch" "a,a,t2,t2")] + (set_attr "arch" "a,a,t2,t2") + (set_attr "type" "multiple")] ) (define_insn_and_split "*negdi_zero_extendsidi" @@ -5056,7 +4766,8 @@ operands[0] = gen_lowpart (SImode, operands[0]); } [(set_attr "conds" "clob") - (set_attr "length" "8")] ;; length in thumb is 4 + (set_attr "length" "8") + (set_attr "type" "multiple")] ;; length in thumb is 4 ) ;; abssi2 doesn't really clobber the condition codes if a different register @@ -5141,7 +4852,8 @@ [(set_attr "conds" "clob,*") (set_attr "shift" "1") (set_attr "predicable" "no, yes") - (set_attr "length" "8")] + (set_attr "length" "8") + (set_attr "type" "multiple")] ) (define_insn_and_split "*thumb1_abssi2" @@ -5155,7 +4867,8 @@ (set (match_dup 0) (plus:SI (match_dup 1) (match_dup 2))) (set (match_dup 0) (xor:SI (match_dup 0) (match_dup 2)))] "" - [(set_attr "length" "6")] + [(set_attr "length" "6") + (set_attr "type" "multiple")] ) (define_insn_and_split "*arm_neg_abssi2" @@ -5211,7 +4924,8 @@ [(set_attr "conds" "clob,*") (set_attr "shift" "1") (set_attr "predicable" "no, yes") - (set_attr "length" "8")] + (set_attr "length" "8") + (set_attr "type" "multiple")] ) (define_insn_and_split "*thumb1_neg_abssi2" @@ -5225,7 +4939,8 @@ (set (match_dup 0) (minus:SI (match_dup 2) (match_dup 1))) (set (match_dup 0) (xor:SI (match_dup 0) (match_dup 2)))] "" - [(set_attr "length" "6")] + [(set_attr "length" "6") + (set_attr "type" "multiple")] ) (define_expand "abssf2" @@ -5274,7 +4989,7 @@ }" [(set_attr "length" "*,8,8,*") (set_attr "predicable" "no,yes,yes,no") - (set_attr "neon_type" "neon_int_1,*,*,neon_int_1") + (set_attr "type" "neon_int_1,multiple,multiple,neon_int_1") (set_attr "arch" "neon_for_64bits,*,*,avoid_neon_for_64bits")] ) @@ -5461,7 +5176,8 @@ (set_attr "ce_count" "2") (set_attr "shift" "1") (set_attr "predicable" "yes") - (set_attr "arch" "neon_for_64bits,*,a,t,avoid_neon_for_64bits")] + (set_attr "arch" "neon_for_64bits,*,a,t,avoid_neon_for_64bits") + (set_attr "type" "multiple,mov_reg,multiple,multiple,multiple")] ) ;; Splits for all extensions to DImode @@ -5597,7 +5313,7 @@ "@ # ldr%(h%)\\t%0, %1" - [(set_attr "type" "arlo_shift,load_byte") + [(set_attr "type" "alu_shift_reg,load_byte") (set_attr "predicable" "yes")] ) @@ -5618,7 +5334,7 @@ (match_operand:SI 2 "s_register_operand" "r")))] "TARGET_INT_SIMD" "uxtah%?\\t%0, %2, %1" - [(set_attr "type" "arlo_shift") + [(set_attr "type" "alu_shift_reg") (set_attr "predicable" "yes") (set_attr "predicable_short_it" "no")] ) @@ -5668,7 +5384,7 @@ # ldrb\\t%0, %1" [(set_attr "length" "4,2") - (set_attr "type" "arlo_shift,load_byte") + (set_attr "type" "alu_shift_reg,load_byte") (set_attr "pool_range" "*,32")] ) @@ -5691,7 +5407,7 @@ # ldr%(b%)\\t%0, %1\\t%@ zero_extendqisi2" [(set_attr "length" "8,4") - (set_attr "type" "arlo_shift,load_byte") + (set_attr "type" "alu_shift_reg,load_byte") (set_attr "predicable" "yes")] ) @@ -5714,7 +5430,7 @@ "uxtab%?\\t%0, %2, %1" [(set_attr "predicable" "yes") (set_attr "predicable_short_it" "no") - (set_attr "type" "arlo_shift")] + (set_attr "type" "alu_shift_reg")] ) (define_split @@ -5766,7 +5482,8 @@ "tst%?\\t%0, #255" [(set_attr "conds" "set") (set_attr "predicable" "yes") - (set_attr "predicable_short_it" "no")] + (set_attr "predicable_short_it" "no") + (set_attr "type" "logic_imm")] ) (define_expand "extendhisi2" @@ -5936,7 +5653,7 @@ # ldr%(sh%)\\t%0, %1" [(set_attr "length" "8,4") - (set_attr "type" "arlo_shift,load_byte") + (set_attr "type" "alu_shift_reg,load_byte") (set_attr "predicable" "yes") (set_attr "pool_range" "*,256") (set_attr "neg_pool_range" "*,244")] @@ -5963,6 +5680,7 @@ (match_operand:SI 2 "s_register_operand" "r")))] "TARGET_INT_SIMD" "sxtah%?\\t%0, %2, %1" + [(set_attr "type" "alu_shift_reg")] ) (define_expand "extendqihi2" @@ -6037,7 +5755,7 @@ # ldr%(sb%)\\t%0, %1" [(set_attr "length" "8,4") - (set_attr "type" "arlo_shift,load_byte") + (set_attr "type" "alu_shift_reg,load_byte") (set_attr "predicable" "yes") (set_attr "pool_range" "*,256") (set_attr "neg_pool_range" "*,244")] @@ -6063,7 +5781,7 @@ (match_operand:SI 2 "s_register_operand" "r")))] "TARGET_INT_SIMD" "sxtab%?\\t%0, %2, %1" - [(set_attr "type" "arlo_shift") + [(set_attr "type" "alu_shift_reg") (set_attr "predicable" "yes") (set_attr "predicable_short_it" "no")] ) @@ -6283,7 +6001,7 @@ } " [(set_attr "length" "8,12,16,8,8") - (set_attr "type" "*,*,*,load2,store2") + (set_attr "type" "multiple,multiple,multiple,load2,store2") (set_attr "arm_pool_range" "*,*,*,1020,*") (set_attr "arm_neg_pool_range" "*,*,*,1004,*") (set_attr "thumb2_pool_range" "*,*,*,4094,*") @@ -6423,7 +6141,7 @@ } }" [(set_attr "length" "4,4,6,2,2,6,4,4") - (set_attr "type" "*,mov_reg,*,load2,store2,load2,store2,mov_reg") + (set_attr "type" "multiple,multiple,multiple,load2,store2,load2,store2,multiple") (set_attr "pool_range" "*,*,*,*,*,1018,*,*")] ) @@ -6521,7 +6239,8 @@ "movt%?\t%0, #:upper16:%c2" [(set_attr "predicable" "yes") (set_attr "predicable_short_it" "no") - (set_attr "length" "4")] + (set_attr "length" "4") + (set_attr "type" "mov_imm")] ) (define_insn "*arm_movsi_insn" @@ -6593,7 +6312,7 @@ str\\t%1, %0 mov\\t%0, %1" [(set_attr "length" "2,2,4,4,2,2,2,2,2") - (set_attr "type" "*,*,*,*,load1,store1,load1,store1,*") + (set_attr "type" "mov_reg,mov_imm,multiple,multiple,load1,store1,load1,store1,mov_reg") (set_attr "pool_range" "*,*,*,*,*,*,1018,*,*") (set_attr "conds" "set,clob,*,*,nocond,nocond,nocond,nocond,nocond")]) @@ -6749,7 +6468,8 @@ INTVAL (operands[2])); return \"add\\t%0, %|pc\"; " - [(set_attr "length" "2")] + [(set_attr "length" "2") + (set_attr "type" "alu_reg")] ) (define_insn "pic_add_dot_plus_eight" @@ -6764,7 +6484,8 @@ INTVAL (operands[2])); return \"add%?\\t%0, %|pc, %1\"; " - [(set_attr "predicable" "yes")] + [(set_attr "predicable" "yes") + (set_attr "type" "alu_reg")] ) (define_insn "tls_load_dot_plus_eight" @@ -6779,7 +6500,8 @@ INTVAL (operands[2])); return \"ldr%?\\t%0, [%|pc, %1]\t\t@ tls_load_dot_plus_eight\"; " - [(set_attr "predicable" "yes")] + [(set_attr "predicable" "yes") + (set_attr "type" "load1")] ) ;; PIC references to local variables can generate pic_add_dot_plus_eight @@ -6840,7 +6562,7 @@ cmp%?\\t%0, #0 sub%.\\t%0, %1, #0" [(set_attr "conds" "set") - (set_attr "type" "arlo_imm,arlo_imm")] + (set_attr "type" "alus_imm,alus_imm")] ) ;; Subroutine to store a half word from a register into memory. @@ -7186,7 +6908,7 @@ return \"ldrh %0, %1\"; }" [(set_attr "length" "2,4,2,2,2,2") - (set_attr "type" "*,load1,store1,*,*,*") + (set_attr "type" "alus_imm,load1,store1,mov_reg,mov_reg,mov_imm") (set_attr "conds" "clob,nocond,nocond,nocond,nocond,clob")]) @@ -7434,7 +7156,7 @@ mov\\t%0, %1 mov\\t%0, %1" [(set_attr "length" "2") - (set_attr "type" "arlo_imm,load1,store1,mov_reg,mov_imm,mov_imm") + (set_attr "type" "alu_imm,load1,store1,mov_reg,mov_imm,mov_imm") (set_attr "pool_range" "*,32,*,*,*,*") (set_attr "conds" "clob,nocond,nocond,nocond,nocond,clob")]) @@ -7499,7 +7221,7 @@ } " [(set_attr "conds" "unconditional") - (set_attr "type" "load1,store1,mov_reg,mov_reg") + (set_attr "type" "load1,store1,mov_reg,multiple") (set_attr "length" "4,4,4,8") (set_attr "predicable" "yes")] ) @@ -7612,7 +7334,7 @@ mov\\t%0, %1 mov\\t%0, %1" [(set_attr "length" "2") - (set_attr "type" "*,load1,store1,load1,store1,mov_reg,mov_reg") + (set_attr "type" "alus_imm,load1,store1,load1,store1,mov_reg,mov_reg") (set_attr "pool_range" "*,*,*,1018,*,*,*") (set_attr "conds" "clob,nocond,nocond,nocond,nocond,nocond,nocond")] ) @@ -7700,7 +7422,7 @@ } " [(set_attr "length" "8,12,16,8,8") - (set_attr "type" "*,*,*,load2,store2") + (set_attr "type" "multiple,multiple,multiple,load2,store2") (set_attr "arm_pool_range" "*,*,*,1020,*") (set_attr "thumb2_pool_range" "*,*,*,1018,*") (set_attr "arm_neg_pool_range" "*,*,*,1004,*") @@ -7744,7 +7466,7 @@ } " [(set_attr "length" "4,2,2,6,4,4") - (set_attr "type" "*,load2,store2,load2,store2,mov_reg") + (set_attr "type" "multiple,load2,store2,load2,store2,multiple") (set_attr "pool_range" "*,*,*,1018,*,*")] ) @@ -8052,7 +7774,8 @@ (and (ge (minus (match_dup 3) (pc)) (const_int -2040)) (le (minus (match_dup 3) (pc)) (const_int 2048))) (const_int 6) - (const_int 8))))] + (const_int 8)))) + (set_attr "type" "multiple")] ) (define_insn "cbranchsi4_scratch" @@ -8088,7 +7811,8 @@ (and (ge (minus (match_dup 3) (pc)) (const_int -2040)) (le (minus (match_dup 3) (pc)) (const_int 2048))) (const_int 6) - (const_int 8))))] + (const_int 8)))) + (set_attr "type" "multiple")] ) (define_insn "*negated_cbranchsi4" @@ -8123,7 +7847,8 @@ (and (ge (minus (match_dup 3) (pc)) (const_int -2040)) (le (minus (match_dup 3) (pc)) (const_int 2048))) (const_int 6) - (const_int 8))))] + (const_int 8)))) + (set_attr "type" "multiple")] ) (define_insn "*tbit_cbranch" @@ -8167,7 +7892,8 @@ (and (ge (minus (match_dup 3) (pc)) (const_int -2040)) (le (minus (match_dup 3) (pc)) (const_int 2048))) (const_int 6) - (const_int 8))))] + (const_int 8)))) + (set_attr "type" "multiple")] ) (define_insn "*tlobits_cbranch" @@ -8211,7 +7937,8 @@ (and (ge (minus (match_dup 3) (pc)) (const_int -2040)) (le (minus (match_dup 3) (pc)) (const_int 2048))) (const_int 6) - (const_int 8))))] + (const_int 8)))) + (set_attr "type" "multiple")] ) (define_insn "*tstsi3_cbranch" @@ -8248,7 +7975,8 @@ (and (ge (minus (match_dup 2) (pc)) (const_int -2040)) (le (minus (match_dup 2) (pc)) (const_int 2048))) (const_int 6) - (const_int 8))))] + (const_int 8)))) + (set_attr "type" "multiple")] ) (define_insn "*cbranchne_decr1" @@ -8351,7 +8079,8 @@ (and (ge (minus (match_dup 4) (pc)) (const_int -2038)) (le (minus (match_dup 4) (pc)) (const_int 2048))) (const_int 8) - (const_int 10)))])] + (const_int 10)))]) + (set_attr "type" "multiple")] ) (define_insn "*addsi3_cbranch" @@ -8432,7 +8161,8 @@ (and (ge (minus (match_dup 5) (pc)) (const_int -2038)) (le (minus (match_dup 5) (pc)) (const_int 2048))) (const_int 8) - (const_int 10)))))] + (const_int 10))))) + (set_attr "type" "multiple")] ) (define_insn "*addsi3_cbranch_scratch" @@ -8500,7 +8230,8 @@ (and (ge (minus (match_dup 4) (pc)) (const_int -2040)) (le (minus (match_dup 4) (pc)) (const_int 2048))) (const_int 6) - (const_int 8))))] + (const_int 8)))) + (set_attr "type" "multiple")] ) @@ -8520,34 +8251,34 @@ (set_attr "arch" "t2,t2,any,any") (set_attr "length" "2,2,4,4") (set_attr "predicable" "yes") - (set_attr "type" "*,*,*,arlo_imm")] + (set_attr "type" "alus_reg,alus_reg,alus_reg,alus_imm")] ) (define_insn "*cmpsi_shiftsi" [(set (reg:CC CC_REGNUM) - (compare:CC (match_operand:SI 0 "s_register_operand" "r,r") + (compare:CC (match_operand:SI 0 "s_register_operand" "r,r,r") (match_operator:SI 3 "shift_operator" - [(match_operand:SI 1 "s_register_operand" "r,r") - (match_operand:SI 2 "shift_amount_operand" "M,rM")])))] + [(match_operand:SI 1 "s_register_operand" "r,r,r") + (match_operand:SI 2 "shift_amount_operand" "M,r,M")])))] "TARGET_32BIT" "cmp%?\\t%0, %1%S3" [(set_attr "conds" "set") (set_attr "shift" "1") - (set_attr "arch" "32,a") - (set_attr "type" "arlo_shift,arlo_shift_reg")]) + (set_attr "arch" "32,a,a") + (set_attr "type" "alus_shift_imm,alu_shift_reg,alus_shift_imm")]) (define_insn "*cmpsi_shiftsi_swp" [(set (reg:CC_SWP CC_REGNUM) (compare:CC_SWP (match_operator:SI 3 "shift_operator" - [(match_operand:SI 1 "s_register_operand" "r,r") - (match_operand:SI 2 "shift_amount_operand" "M,rM")]) - (match_operand:SI 0 "s_register_operand" "r,r")))] + [(match_operand:SI 1 "s_register_operand" "r,r,r") + (match_operand:SI 2 "shift_amount_operand" "M,r,M")]) + (match_operand:SI 0 "s_register_operand" "r,r,r")))] "TARGET_32BIT" "cmp%?\\t%0, %1%S3" [(set_attr "conds" "set") (set_attr "shift" "1") - (set_attr "arch" "32,a") - (set_attr "type" "arlo_shift,arlo_shift_reg")]) + (set_attr "arch" "32,a,a") + (set_attr "type" "alus_shift_imm,alu_shift_reg,alus_shift_imm")]) (define_insn "*arm_cmpsi_negshiftsi_si" [(set (reg:CC_Z CC_REGNUM) @@ -8560,8 +8291,8 @@ "cmn%?\\t%0, %2%S1" [(set_attr "conds" "set") (set (attr "type") (if_then_else (match_operand 3 "const_int_operand" "") - (const_string "arlo_shift") - (const_string "arlo_shift_reg"))) + (const_string "alus_shift_imm") + (const_string "alus_shift_reg"))) (set_attr "predicable" "yes")] ) @@ -8603,7 +8334,8 @@ operands[2] = gen_lowpart (SImode, operands[2]); } [(set_attr "conds" "set") - (set_attr "length" "8")] + (set_attr "length" "8") + (set_attr "type" "multiple")] ) (define_insn_and_split "*arm_cmpdi_unsigned" @@ -8631,7 +8363,8 @@ [(set_attr "conds" "set") (set_attr "enabled_for_depr_it" "yes,yes,no") (set_attr "arch" "t2,t2,*") - (set_attr "length" "6,6,8")] + (set_attr "length" "6,6,8") + (set_attr "type" "multiple")] ) (define_insn "*arm_cmpdi_zero" @@ -8641,7 +8374,8 @@ (clobber (match_scratch:SI 1 "=r"))] "TARGET_32BIT" "orr%.\\t%1, %Q0, %R0" - [(set_attr "conds" "set")] + [(set_attr "conds" "set") + (set_attr "type" "logics_reg")] ) (define_insn "*thumb_cmpdi_zero" @@ -8652,7 +8386,8 @@ "TARGET_THUMB1" "orr\\t%1, %Q0, %R0" [(set_attr "conds" "set") - (set_attr "length" "2")] + (set_attr "length" "2") + (set_attr "type" "logics_reg")] ) ; This insn allows redundant compares to be removed by cse, nothing should @@ -8666,7 +8401,8 @@ "TARGET_32BIT" "\\t%@ deleted compare" [(set_attr "conds" "set") - (set_attr "length" "0")] + (set_attr "length" "0") + (set_attr "type" "no_insn")] ) @@ -8767,7 +8503,8 @@ (const_int 0)))] "" [(set_attr "conds" "use") - (set_attr "length" "8")] + (set_attr "length" "8") + (set_attr "type" "multiple")] ) (define_insn_and_split "*mov_negscc" @@ -8785,7 +8522,8 @@ operands[3] = GEN_INT (~0); } [(set_attr "conds" "use") - (set_attr "length" "8")] + (set_attr "length" "8") + (set_attr "type" "multiple")] ) (define_insn_and_split "*mov_notscc" @@ -8804,7 +8542,8 @@ operands[4] = GEN_INT (~0); } [(set_attr "conds" "use") - (set_attr "length" "8")] + (set_attr "length" "8") + (set_attr "type" "multiple")] ) (define_expand "cstoresi4" @@ -9009,7 +8748,8 @@ "@ neg\\t%0, %1\;adc\\t%0, %0, %1 neg\\t%2, %1\;adc\\t%0, %1, %2" - [(set_attr "length" "4")] + [(set_attr "length" "4") + (set_attr "type" "multiple")] ) (define_insn "*cstoresi_ne0_thumb1_insn" @@ -9029,7 +8769,8 @@ (match_operand:SI 2 "thumb1_cmp_operand" "lI*h,*r"))))] "TARGET_THUMB1" "cmp\\t%1, %2\;sbc\\t%0, %0, %0" - [(set_attr "length" "4")] + [(set_attr "length" "4") + (set_attr "type" "multiple")] ) (define_insn_and_split "cstoresi_ltu_thumb1" @@ -9043,7 +8784,8 @@ (neg:SI (ltu:SI (match_dup 1) (match_dup 2)))) (set (match_dup 0) (neg:SI (match_dup 3)))] "operands[3] = gen_reg_rtx (SImode);" - [(set_attr "length" "4")] + [(set_attr "length" "4") + (set_attr "type" "multiple")] ) ;; Used as part of the expansion of thumb les sequence. @@ -9055,7 +8797,8 @@ (match_operand:SI 4 "thumb1_cmp_operand" "lI"))))] "TARGET_THUMB1" "cmp\\t%3, %4\;adc\\t%0, %1, %2" - [(set_attr "length" "4")] + [(set_attr "length" "4") + (set_attr "type" "multiple")] ) @@ -9273,7 +9016,8 @@ (and (ge (minus (match_dup 0) (pc)) (const_int -2044)) (le (minus (match_dup 0) (pc)) (const_int 2048)))) (const_int 2) - (const_int 4)))] + (const_int 4))) + (set_attr "type" "branch")] ) (define_insn "*thumb_jump" @@ -9295,7 +9039,8 @@ (and (ge (minus (match_dup 0) (pc)) (const_int -2044)) (le (minus (match_dup 0) (pc)) (const_int 2048))) (const_int 2) - (const_int 4)))] + (const_int 4))) + (set_attr "type" "branch")] ) (define_expand "call" @@ -9779,7 +9524,8 @@ "TARGET_ARM" "teq\\t%|r0, %|r0\;teq\\t%|pc, %|pc" [(set_attr "length" "8") - (set_attr "conds" "set")] + (set_attr "conds" "set") + (set_attr "type" "multiple")] ) ;; Call subroutine returning any type. @@ -9970,7 +9716,8 @@ return \"cmp\\t%0, %1\;ldrls\\t%|pc, [%|pc, %0, asl #2]\;b\\t%l3\"; " [(set_attr "conds" "clob") - (set_attr "length" "12")] + (set_attr "length" "12") + (set_attr "type" "multiple")] ) (define_expand "thumb1_casesi_internal_pic" @@ -10001,7 +9748,8 @@ (clobber (reg:SI LR_REGNUM))])] "TARGET_THUMB1" "* return thumb1_output_casesi(operands);" - [(set_attr "length" "4")] + [(set_attr "length" "4") + (set_attr "type" "multiple")] ) (define_expand "indirect_jump" @@ -10027,7 +9775,8 @@ (match_operand:SI 0 "s_register_operand" "r"))] "TARGET_ARM" "mov%?\\t%|pc, %0\\t%@ indirect register jump" - [(set_attr "predicable" "yes")] + [(set_attr "predicable" "yes") + (set_attr "type" "branch")] ) (define_insn "*load_indirect_jump" @@ -10048,7 +9797,8 @@ "TARGET_THUMB1" "mov\\tpc, %0" [(set_attr "conds" "clob") - (set_attr "length" "2")] + (set_attr "length" "2") + (set_attr "type" "branch")] ) @@ -10067,7 +9817,8 @@ [(set (attr "length") (if_then_else (eq_attr "is_thumb" "yes") (const_int 2) - (const_int 4)))] + (const_int 4))) + (set_attr "type" "mov_reg")] ) @@ -10103,7 +9854,7 @@ (if_then_else (match_operand:SI 3 "mult_operator" "") (const_string "no") (const_string "yes"))]) - (set_attr "type" "arlo_shift,arlo_shift,arlo_shift,arlo_shift_reg")]) + (set_attr "type" "alu_shift_imm,alu_shift_imm,alu_shift_imm,alu_shift_reg")]) (define_split [(set (match_operand:SI 0 "s_register_operand" "") @@ -10140,7 +9891,7 @@ [(set_attr "conds" "set") (set_attr "shift" "4") (set_attr "arch" "32,a") - (set_attr "type" "arlo_shift,arlo_shift_reg")]) + (set_attr "type" "alus_shift_imm,alus_shift_reg")]) (define_insn "*arith_shiftsi_compare0_scratch" [(set (reg:CC_NOOV CC_REGNUM) @@ -10157,7 +9908,7 @@ [(set_attr "conds" "set") (set_attr "shift" "4") (set_attr "arch" "32,a") - (set_attr "type" "arlo_shift,arlo_shift_reg")]) + (set_attr "type" "alus_shift_imm,alus_shift_reg")]) (define_insn "*sub_shiftsi" [(set (match_operand:SI 0 "s_register_operand" "=r,r") @@ -10170,41 +9921,41 @@ [(set_attr "predicable" "yes") (set_attr "shift" "3") (set_attr "arch" "32,a") - (set_attr "type" "arlo_shift,arlo_shift_reg")]) + (set_attr "type" "alus_shift_imm,alus_shift_reg")]) (define_insn "*sub_shiftsi_compare0" [(set (reg:CC_NOOV CC_REGNUM) (compare:CC_NOOV - (minus:SI (match_operand:SI 1 "s_register_operand" "r,r") + (minus:SI (match_operand:SI 1 "s_register_operand" "r,r,r") (match_operator:SI 2 "shift_operator" - [(match_operand:SI 3 "s_register_operand" "r,r") - (match_operand:SI 4 "shift_amount_operand" "M,rM")])) + [(match_operand:SI 3 "s_register_operand" "r,r,r") + (match_operand:SI 4 "shift_amount_operand" "M,r,M")])) (const_int 0))) - (set (match_operand:SI 0 "s_register_operand" "=r,r") + (set (match_operand:SI 0 "s_register_operand" "=r,r,r") (minus:SI (match_dup 1) (match_op_dup 2 [(match_dup 3) (match_dup 4)])))] "TARGET_32BIT" "sub%.\\t%0, %1, %3%S2" [(set_attr "conds" "set") (set_attr "shift" "3") - (set_attr "arch" "32,a") - (set_attr "type" "arlo_shift,arlo_shift_reg")]) + (set_attr "arch" "32,a,a") + (set_attr "type" "alus_shift_imm,alus_shift_reg,alus_shift_imm")]) (define_insn "*sub_shiftsi_compare0_scratch" [(set (reg:CC_NOOV CC_REGNUM) (compare:CC_NOOV - (minus:SI (match_operand:SI 1 "s_register_operand" "r,r") + (minus:SI (match_operand:SI 1 "s_register_operand" "r,r,r") (match_operator:SI 2 "shift_operator" - [(match_operand:SI 3 "s_register_operand" "r,r") - (match_operand:SI 4 "shift_amount_operand" "M,rM")])) + [(match_operand:SI 3 "s_register_operand" "r,r,r") + (match_operand:SI 4 "shift_amount_operand" "M,r,M")])) (const_int 0))) - (clobber (match_scratch:SI 0 "=r,r"))] + (clobber (match_scratch:SI 0 "=r,r,r"))] "TARGET_32BIT" "sub%.\\t%0, %1, %3%S2" [(set_attr "conds" "set") (set_attr "shift" "3") - (set_attr "arch" "32,a") - (set_attr "type" "arlo_shift,arlo_shift_reg")]) + (set_attr "arch" "32,a,a") + (set_attr "type" "alus_shift_imm,alus_shift_reg,alus_shift_imm")]) (define_insn_and_split "*and_scc" @@ -10232,7 +9983,7 @@ operands[5] = gen_rtx_fmt_ee (rc, VOIDmode, operands[2], const0_rtx); } [(set_attr "conds" "use") - (set_attr "type" "mov_reg") + (set_attr "type" "multiple") (set_attr "length" "8")] ) @@ -10266,7 +10017,8 @@ operands[5] = gen_rtx_fmt_ee (rc, VOIDmode, operands[2], const0_rtx); } [(set_attr "conds" "use") - (set_attr "length" "4,8")] + (set_attr "length" "4,8") + (set_attr "type" "logic_imm,multiple")] ) ; A series of splitters for the compare_scc pattern below. Note that @@ -10368,7 +10120,9 @@ else rc = reverse_condition (rc); operands[4] = gen_rtx_fmt_ee (rc, VOIDmode, tmp1, const0_rtx); -}) +} + [(set_attr "type" "multiple")] +) ;; Attempt to improve the sequence generated by the compare_scc splitters ;; not to use conditional execution. @@ -10411,7 +10165,7 @@ (geu:SI (reg:CC CC_REGNUM) (const_int 0))))] ) -;; Rd = (eq (reg1) (reg2/imm)) // ARMv5 +;; Rd = (eq (reg1) (reg2/imm)) // ARMv5 and optimising for speed. ;; sub Rd, Reg1, reg2 ;; clz Rd, Rd ;; lsr Rd, Rd, #5 @@ -10423,14 +10177,15 @@ (set (match_operand:SI 0 "register_operand" "") (const_int 0))) (cond_exec (eq (reg:CC CC_REGNUM) (const_int 0)) (set (match_dup 0) (const_int 1)))] - "arm_arch5 && TARGET_32BIT && peep2_regno_dead_p (3, CC_REGNUM)" + "arm_arch5 && TARGET_32BIT && peep2_regno_dead_p (3, CC_REGNUM) + && !(TARGET_THUMB2 && optimize_insn_for_size_p ())" [(set (match_dup 0) (minus:SI (match_dup 1) (match_dup 2))) (set (match_dup 0) (clz:SI (match_dup 0))) (set (match_dup 0) (lshiftrt:SI (match_dup 0) (const_int 5)))] ) -;; Rd = (eq (reg1) (reg2/imm)) // ! ARMv5 +;; Rd = (eq (reg1) (reg2)) // ! ARMv5 or optimising for size. ;; sub T1, Reg1, reg2 ;; negs Rd, T1 ;; adc Rd, Rd, T1 @@ -10444,7 +10199,7 @@ (set (match_dup 0) (const_int 1))) (match_scratch:SI 3 "r")] "TARGET_32BIT && peep2_regno_dead_p (3, CC_REGNUM)" - [(set (match_dup 3) (minus:SI (match_dup 1) (match_dup 2))) + [(set (match_dup 3) (match_dup 4)) (parallel [(set (reg:CC CC_REGNUM) (compare:CC (const_int 0) (match_dup 3))) @@ -10452,7 +10207,12 @@ (set (match_dup 0) (plus:SI (plus:SI (match_dup 0) (match_dup 3)) (geu:SI (reg:CC CC_REGNUM) (const_int 0))))] -) + " + if (CONST_INT_P (operands[2])) + operands[4] = plus_constant (SImode, operands[1], -INTVAL (operands[2])); + else + operands[4] = gen_rtx_MINUS (SImode, operands[1], operands[2]); + ") (define_insn "*cond_move" [(set (match_operand:SI 0 "s_register_operand" "=r,r,r") @@ -10479,7 +10239,7 @@ return \"\"; " [(set_attr "conds" "use") - (set_attr "type" "mov_reg") + (set_attr "type" "mov_reg,mov_reg,multiple") (set_attr "length" "4,4,8")] ) @@ -10506,7 +10266,8 @@ return \"%i5%d4\\t%0, %1, #1\"; " [(set_attr "conds" "clob") - (set_attr "length" "12")] + (set_attr "length" "12") + (set_attr "type" "multiple")] ) (define_insn "*cond_sub" @@ -10524,7 +10285,8 @@ return \"sub%d4\\t%0, %1, #1\"; " [(set_attr "conds" "clob") - (set_attr "length" "8,12")] + (set_attr "length" "8,12") + (set_attr "type" "multiple")] ) (define_insn "*cmp_ite0" @@ -10588,6 +10350,7 @@ }" [(set_attr "conds" "set") (set_attr "arch" "t2,t2,t2,t2,t2,any,any,any,any") + (set_attr "type" "multiple") (set_attr_alternative "length" [(const_int 6) (const_int 8) @@ -10687,7 +10450,8 @@ (const_int 10)) (if_then_else (eq_attr "is_thumb" "no") (const_int 8) - (const_int 10))])] + (const_int 10))]) + (set_attr "type" "multiple")] ) (define_insn "*cmp_and" @@ -10768,7 +10532,8 @@ (const_int 10)) (if_then_else (eq_attr "is_thumb" "no") (const_int 8) - (const_int 10))])] + (const_int 10))]) + (set_attr "type" "multiple")] ) (define_insn "*cmp_ior" @@ -10849,7 +10614,8 @@ (const_int 10)) (if_then_else (eq_attr "is_thumb" "no") (const_int 8) - (const_int 10))])] + (const_int 10))]) + (set_attr "type" "multiple")] ) (define_insn_and_split "*ior_scc_scc" @@ -10878,7 +10644,9 @@ DOM_CC_X_OR_Y), CC_REGNUM);" [(set_attr "conds" "clob") - (set_attr "length" "16")]) + (set_attr "length" "16") + (set_attr "type" "multiple")] +) ; If the above pattern is followed by a CMP insn, then the compare is ; redundant, since we can rework the conditional instruction that follows. @@ -10906,7 +10674,9 @@ (set (match_dup 7) (ne:SI (match_dup 0) (const_int 0)))] "" [(set_attr "conds" "set") - (set_attr "length" "16")]) + (set_attr "length" "16") + (set_attr "type" "multiple")] +) (define_insn_and_split "*and_scc_scc" [(set (match_operand:SI 0 "s_register_operand" "=Ts") @@ -10936,7 +10706,9 @@ DOM_CC_X_AND_Y), CC_REGNUM);" [(set_attr "conds" "clob") - (set_attr "length" "16")]) + (set_attr "length" "16") + (set_attr "type" "multiple")] +) ; If the above pattern is followed by a CMP insn, then the compare is ; redundant, since we can rework the conditional instruction that follows. @@ -10964,7 +10736,9 @@ (set (match_dup 7) (ne:SI (match_dup 0) (const_int 0)))] "" [(set_attr "conds" "set") - (set_attr "length" "16")]) + (set_attr "length" "16") + (set_attr "type" "multiple")] +) ;; If there is no dominance in the comparison, then we can still save an ;; instruction in the AND case, since we can know that the second compare @@ -10998,7 +10772,9 @@ operands[8] = gen_rtx_COMPARE (GET_MODE (operands[7]), operands[4], operands[5]);" [(set_attr "conds" "clob") - (set_attr "length" "20")]) + (set_attr "length" "20") + (set_attr "type" "multiple")] +) (define_split [(set (reg:CC_NOOV CC_REGNUM) @@ -11109,7 +10885,8 @@ FAIL; } [(set_attr "conds" "clob") - (set_attr "length" "12")] + (set_attr "length" "12") + (set_attr "type" "multiple")] ) (define_insn_and_split "movcond_addsi" @@ -11147,7 +10924,8 @@ } " [(set_attr "conds" "clob") - (set_attr "enabled_for_depr_it" "no,yes,yes")] + (set_attr "enabled_for_depr_it" "no,yes,yes") + (set_attr "type" "multiple")] ) (define_insn "movcond" @@ -11210,7 +10988,8 @@ return \"\"; " [(set_attr "conds" "clob") - (set_attr "length" "8,8,12")] + (set_attr "length" "8,8,12") + (set_attr "type" "multiple")] ) ;; ??? The patterns below need checking for Thumb-2 usefulness. @@ -11228,7 +11007,8 @@ "TARGET_ARM" "#" [(set_attr "conds" "clob") - (set_attr "length" "8,12")] + (set_attr "length" "8,12") + (set_attr "type" "multiple")] ) (define_insn "*if_plus_move" @@ -11250,11 +11030,11 @@ (set_attr "length" "4,4,8,8") (set_attr_alternative "type" [(if_then_else (match_operand 3 "const_int_operand" "") - (const_string "arlo_imm" ) - (const_string "*")) - (const_string "arlo_imm") - (const_string "*") - (const_string "*")])] + (const_string "alu_imm" ) + (const_string "alu_reg")) + (const_string "alu_imm") + (const_string "alu_reg") + (const_string "alu_reg")])] ) (define_insn "*ifcompare_move_plus" @@ -11270,7 +11050,8 @@ "TARGET_ARM" "#" [(set_attr "conds" "clob") - (set_attr "length" "8,12")] + (set_attr "length" "8,12") + (set_attr "type" "multiple")] ) (define_insn "*if_move_plus" @@ -11290,13 +11071,7 @@ sub%D4\\t%0, %2, #%n3\;mov%d4\\t%0, %1" [(set_attr "conds" "use") (set_attr "length" "4,4,8,8") - (set_attr_alternative "type" - [(if_then_else (match_operand 3 "const_int_operand" "") - (const_string "arlo_imm" ) - (const_string "*")) - (const_string "arlo_imm") - (const_string "*") - (const_string "*")])] + (set_attr "type" "alu_reg,alu_imm,multiple,multiple")] ) (define_insn "*ifcompare_arith_arith" @@ -11314,7 +11089,8 @@ "TARGET_ARM" "#" [(set_attr "conds" "clob") - (set_attr "length" "12")] + (set_attr "length" "12") + (set_attr "type" "multiple")] ) (define_insn "*if_arith_arith" @@ -11330,7 +11106,8 @@ "TARGET_ARM" "%I6%d5\\t%0, %1, %2\;%I7%D5\\t%0, %3, %4" [(set_attr "conds" "use") - (set_attr "length" "8")] + (set_attr "length" "8") + (set_attr "type" "multiple")] ) (define_insn "*ifcompare_arith_move" @@ -11371,7 +11148,8 @@ return \"\"; " [(set_attr "conds" "clob") - (set_attr "length" "8,12")] + (set_attr "length" "8,12") + (set_attr "type" "multiple")] ) (define_insn "*if_arith_move" @@ -11388,7 +11166,7 @@ %I5%d4\\t%0, %2, %3\;mov%D4\\t%0, %1" [(set_attr "conds" "use") (set_attr "length" "4,8") - (set_attr "type" "*,*")] + (set_attr "type" "alu_shift_reg,multiple")] ) (define_insn "*ifcompare_move_arith" @@ -11430,7 +11208,8 @@ return \"%I7%D6\\t%0, %2, %3\"; " [(set_attr "conds" "clob") - (set_attr "length" "8,12")] + (set_attr "length" "8,12") + (set_attr "type" "multiple")] ) (define_insn "*if_move_arith" @@ -11448,7 +11227,7 @@ %I5%D4\\t%0, %2, %3\;mov%d4\\t%0, %1" [(set_attr "conds" "use") (set_attr "length" "4,8") - (set_attr "type" "*,*")] + (set_attr "type" "alu_shift_reg,multiple")] ) (define_insn "*ifcompare_move_not" @@ -11464,7 +11243,8 @@ "TARGET_ARM" "#" [(set_attr "conds" "clob") - (set_attr "length" "8,12")] + (set_attr "length" "8,12") + (set_attr "type" "multiple")] ) (define_insn "*if_move_not" @@ -11481,7 +11261,8 @@ mvn%d4\\t%0, #%B1\;mvn%D4\\t%0, %2" [(set_attr "conds" "use") (set_attr "type" "mvn_reg") - (set_attr "length" "4,8,8")] + (set_attr "length" "4,8,8") + (set_attr "type" "mvn_reg,multiple,multiple")] ) (define_insn "*ifcompare_not_move" @@ -11497,7 +11278,8 @@ "TARGET_ARM" "#" [(set_attr "conds" "clob") - (set_attr "length" "8,12")] + (set_attr "length" "8,12") + (set_attr "type" "multiple")] ) (define_insn "*if_not_move" @@ -11513,7 +11295,7 @@ mov%D4\\t%0, %1\;mvn%d4\\t%0, %2 mvn%D4\\t%0, #%B1\;mvn%d4\\t%0, %2" [(set_attr "conds" "use") - (set_attr "type" "mvn_reg") + (set_attr "type" "mvn_reg,multiple,multiple") (set_attr "length" "4,8,8")] ) @@ -11531,7 +11313,8 @@ "TARGET_ARM" "#" [(set_attr "conds" "clob") - (set_attr "length" "8,12")] + (set_attr "length" "8,12") + (set_attr "type" "multiple")] ) (define_insn "*if_shift_move" @@ -11551,9 +11334,7 @@ [(set_attr "conds" "use") (set_attr "shift" "2") (set_attr "length" "4,8,8") - (set (attr "type") (if_then_else (match_operand 3 "const_int_operand" "") - (const_string "mov_shift") - (const_string "mov_shift_reg")))] + (set_attr "type" "mov_shift_reg,multiple,multiple")] ) (define_insn "*ifcompare_move_shift" @@ -11570,7 +11351,8 @@ "TARGET_ARM" "#" [(set_attr "conds" "clob") - (set_attr "length" "8,12")] + (set_attr "length" "8,12") + (set_attr "type" "multiple")] ) (define_insn "*if_move_shift" @@ -11590,9 +11372,7 @@ [(set_attr "conds" "use") (set_attr "shift" "2") (set_attr "length" "4,8,8") - (set (attr "type") (if_then_else (match_operand 3 "const_int_operand" "") - (const_string "mov_shift") - (const_string "mov_shift_reg")))] + (set_attr "type" "mov_shift_reg,multiple,multiple")] ) (define_insn "*ifcompare_shift_shift" @@ -11611,7 +11391,8 @@ "TARGET_ARM" "#" [(set_attr "conds" "clob") - (set_attr "length" "12")] + (set_attr "length" "12") + (set_attr "type" "multiple")] ) (define_insn "*if_shift_shift" @@ -11651,7 +11432,8 @@ "TARGET_ARM" "#" [(set_attr "conds" "clob") - (set_attr "length" "12")] + (set_attr "length" "12") + (set_attr "type" "multiple")] ) (define_insn "*if_not_arith" @@ -11684,7 +11466,8 @@ "TARGET_ARM" "#" [(set_attr "conds" "clob") - (set_attr "length" "12")] + (set_attr "length" "12") + (set_attr "type" "multiple")] ) (define_insn "*if_arith_not" @@ -11699,7 +11482,7 @@ "TARGET_ARM" "mvn%D5\\t%0, %1\;%I6%d5\\t%0, %2, %3" [(set_attr "conds" "use") - (set_attr "type" "mvn_reg") + (set_attr "type" "multiple") (set_attr "length" "8")] ) @@ -11715,7 +11498,8 @@ "TARGET_ARM" "#" [(set_attr "conds" "clob") - (set_attr "length" "8,12")] + (set_attr "length" "8,12") + (set_attr "type" "multiple")] ) (define_insn "*if_neg_move" @@ -11731,7 +11515,8 @@ mov%D4\\t%0, %1\;rsb%d4\\t%0, %2, #0 mvn%D4\\t%0, #%B1\;rsb%d4\\t%0, %2, #0" [(set_attr "conds" "use") - (set_attr "length" "4,8,8")] + (set_attr "length" "4,8,8") + (set_attr "type" "logic_shift_imm,multiple,multiple")] ) (define_insn "*ifcompare_move_neg" @@ -11746,7 +11531,8 @@ "TARGET_ARM" "#" [(set_attr "conds" "clob") - (set_attr "length" "8,12")] + (set_attr "length" "8,12") + (set_attr "type" "multiple")] ) (define_insn "*if_move_neg" @@ -11762,7 +11548,8 @@ mov%d4\\t%0, %1\;rsb%D4\\t%0, %2, #0 mvn%d4\\t%0, #%B1\;rsb%D4\\t%0, %2, #0" [(set_attr "conds" "use") - (set_attr "length" "4,8,8")] + (set_attr "length" "4,8,8") + (set_attr "type" "logic_shift_imm,multiple,multiple")] ) (define_insn "*arith_adjacentmem" @@ -11960,7 +11747,8 @@ [(unspec_volatile [(const_int 0)] VUNSPEC_THUMB1_INTERWORK)] "TARGET_THUMB1" "* return thumb1_output_interwork ();" - [(set_attr "length" "8")] + [(set_attr "length" "8") + (set_attr "type" "multiple")] ) ;; Note - although unspec_volatile's USE all hard registers, @@ -12147,7 +11935,7 @@ mvn%D4\\t%0, %2 mov%d4\\t%0, %1\;mvn%D4\\t%0, %2" [(set_attr "conds" "use") - (set_attr "type" "mvn_reg") + (set_attr "type" "mvn_reg,multiple") (set_attr "length" "4,8")] ) @@ -12167,7 +11955,8 @@ return \"mvnne\\t%0, #0\"; " [(set_attr "conds" "clob") - (set_attr "length" "8")] + (set_attr "length" "8") + (set_attr "type" "multiple")] ) (define_insn "*not_signextract_onebit" @@ -12185,7 +11974,8 @@ return \"movne\\t%0, #0\"; " [(set_attr "conds" "clob") - (set_attr "length" "12")] + (set_attr "length" "12") + (set_attr "type" "multiple")] ) ;; ??? The above patterns need auditing for Thumb-2 @@ -12247,7 +12037,8 @@ UNSPEC_PRLG_STK))] "" "" - [(set_attr "length" "0")] + [(set_attr "length" "0") + (set_attr "type" "block")] ) ;; Pop (as used in epilogue RTL) @@ -12377,6 +12168,7 @@ assemble_align (32); return \"\"; " + [(set_attr "type" "no_insn")] ) (define_insn "align_8" @@ -12386,6 +12178,7 @@ assemble_align (64); return \"\"; " + [(set_attr "type" "no_insn")] ) (define_insn "consttable_end" @@ -12395,6 +12188,7 @@ making_const_table = FALSE; return \"\"; " + [(set_attr "type" "no_insn")] ) (define_insn "consttable_1" @@ -12406,7 +12200,8 @@ assemble_zeros (3); return \"\"; " - [(set_attr "length" "4")] + [(set_attr "length" "4") + (set_attr "type" "no_insn")] ) (define_insn "consttable_2" @@ -12419,7 +12214,8 @@ assemble_zeros (2); return \"\"; " - [(set_attr "length" "4")] + [(set_attr "length" "4") + (set_attr "type" "no_insn")] ) (define_insn "consttable_4" @@ -12455,7 +12251,8 @@ } return \"\"; }" - [(set_attr "length" "4")] + [(set_attr "length" "4") + (set_attr "type" "no_insn")] ) (define_insn "consttable_8" @@ -12479,7 +12276,8 @@ } return \"\"; }" - [(set_attr "length" "8")] + [(set_attr "length" "8") + (set_attr "type" "no_insn")] ) (define_insn "consttable_16" @@ -12503,7 +12301,8 @@ } return \"\"; }" - [(set_attr "length" "16")] + [(set_attr "length" "16") + (set_attr "type" "no_insn")] ) ;; Miscellaneous Thumb patterns @@ -12531,7 +12330,8 @@ (use (label_ref (match_operand 1 "" "")))] "TARGET_THUMB1" "mov\\t%|pc, %0" - [(set_attr "length" "2")] + [(set_attr "length" "2") + (set_attr "type" "no_insn")] ) ;; V5 Instructions, @@ -12573,7 +12373,9 @@ (match_operand:SI 1 "" "") (match_operand:SI 2 "" ""))] "TARGET_32BIT && arm_arch5e" - "pld\\t%a0") + "pld\\t%a0" + [(set_attr "type" "load1")] +) ;; General predication pattern @@ -12590,7 +12392,8 @@ [(unspec:SI [(match_operand:SI 0 "register_operand" "")] UNSPEC_REGISTER_USE)] "" "%@ %0 needed" - [(set_attr "length" "0")] + [(set_attr "length" "0") + (set_attr "type" "no_insn")] ) @@ -12638,6 +12441,7 @@ thumb_set_return_address (operands[0], operands[1]); DONE; }" + [(set_attr "type" "mov_reg")] ) @@ -12648,7 +12452,8 @@ (unspec:SI [(const_int 0)] UNSPEC_TLS))] "TARGET_HARD_TP" "mrc%?\\tp15, 0, %0, c13, c0, 3\\t@ load_tp_hard" - [(set_attr "predicable" "yes")] + [(set_attr "predicable" "yes") + (set_attr "type" "mrs")] ) ;; Doesn't clobber R1-R3. Must use r0 for the first operand. @@ -12659,7 +12464,8 @@ (clobber (reg:CC CC_REGNUM))] "TARGET_SOFT_TP" "bl\\t__aeabi_read_tp\\t@ load_tp_soft" - [(set_attr "conds" "clob")] + [(set_attr "conds" "clob") + (set_attr "type" "branch")] ) ;; tls descriptor call @@ -12678,7 +12484,8 @@ return "bl\\t%c0(tlscall)"; } [(set_attr "conds" "clob") - (set_attr "length" "4")] + (set_attr "length" "4") + (set_attr "type" "branch")] ) ;; For thread pointer builtin @@ -12704,7 +12511,8 @@ "movt%?\t%0, %L1" [(set_attr "predicable" "yes") (set_attr "predicable_short_it" "no") - (set_attr "length" "4")] + (set_attr "length" "4") + (set_attr "type" "mov_imm")] ) (define_insn "*arm_rev" @@ -12716,7 +12524,8 @@ rev%?\t%0, %1 rev%?\t%0, %1" [(set_attr "arch" "t1,t2,32") - (set_attr "length" "2,2,4")] + (set_attr "length" "2,2,4") + (set_attr "type" "rev")] ) (define_expand "arm_legacy_rev" @@ -12816,7 +12625,8 @@ revsh%?\t%0, %1 revsh%?\t%0, %1" [(set_attr "arch" "t1,t2,32") - (set_attr "length" "2,2,4")] + (set_attr "length" "2,2,4") + (set_attr "type" "rev")] ) (define_insn "*arm_rev16" @@ -12828,7 +12638,8 @@ rev16%?\t%0, %1 rev16%?\t%0, %1" [(set_attr "arch" "t1,t2,32") - (set_attr "length" "2,2,4")] + (set_attr "length" "2,2,4") + (set_attr "type" "rev")] ) (define_expand "bswaphi2" diff --git a/gcc/config/arm/arm1020e.md b/gcc/config/arm/arm1020e.md index 317e4cd4ad6..7df84d52481 100644 --- a/gcc/config/arm/arm1020e.md +++ b/gcc/config/arm/arm1020e.md @@ -66,14 +66,21 @@ ;; ALU operations with no shifted operand (define_insn_reservation "1020alu_op" 1 (and (eq_attr "tune" "arm1020e,arm1022e") - (eq_attr "type" "arlo_imm,arlo_reg,shift,shift_reg,\ - mov_imm,mov_reg,mvn_imm,mvn_reg")) + (eq_attr "type" "alu_imm,alus_imm,logic_imm,logics_imm,\ + alu_reg,alus_reg,logic_reg,logics_reg,\ + adc_imm,adcs_imm,adc_reg,adcs_reg,\ + adr,bfm,rev,\ + shift_imm,shift_reg,\ + mov_imm,mov_reg,mvn_imm,mvn_reg,\ + multiple,no_insn")) "1020a_e,1020a_m,1020a_w") ;; ALU operations with a shift-by-constant operand (define_insn_reservation "1020alu_shift_op" 1 (and (eq_attr "tune" "arm1020e,arm1022e") - (eq_attr "type" "extend,arlo_shift,mov_shift,mvn_shift")) + (eq_attr "type" "alu_shift_imm,alus_shift_imm,\ + logic_shift_imm,logics_shift_imm,\ + extend,mov_shift,mvn_shift")) "1020a_e,1020a_m,1020a_w") ;; ALU operations with a shift-by-register operand @@ -82,7 +89,9 @@ ;; the execute stage. (define_insn_reservation "1020alu_shift_reg_op" 2 (and (eq_attr "tune" "arm1020e,arm1022e") - (eq_attr "type" "arlo_shift_reg,mov_shift_reg,mvn_shift_reg")) + (eq_attr "type" "alu_shift_reg,alus_shift_reg,\ + logic_shift_reg,logics_shift_reg,\ + mov_shift_reg,mvn_shift_reg")) "1020a_e*2,1020a_m,1020a_w") ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; @@ -270,7 +279,7 @@ ;; first execute state. We model this by using 1020a_e in the first cycle. (define_insn_reservation "v10_ffarith" 5 (and (eq_attr "vfp10" "yes") - (eq_attr "type" "fcpys,ffariths,ffarithd,fcmps,fcmpd")) + (eq_attr "type" "fmov,ffariths,ffarithd,fcmps,fcmpd")) "1020a_e+v10_fmac") (define_insn_reservation "v10_farith" 5 @@ -280,7 +289,7 @@ (define_insn_reservation "v10_cvt" 5 (and (eq_attr "vfp10" "yes") - (eq_attr "type" "f_cvt")) + (eq_attr "type" "f_cvt,f_cvti2f,f_cvtf2i")) "1020a_e+v10_fmac") (define_insn_reservation "v10_fmul" 6 @@ -290,12 +299,12 @@ (define_insn_reservation "v10_fdivs" 18 (and (eq_attr "vfp10" "yes") - (eq_attr "type" "fdivs")) + (eq_attr "type" "fdivs, fsqrts")) "1020a_e+v10_ds*14") (define_insn_reservation "v10_fdivd" 32 (and (eq_attr "vfp10" "yes") - (eq_attr "type" "fdivd")) + (eq_attr "type" "fdivd, fsqrtd")) "1020a_e+v10_fmac+v10_ds*28") (define_insn_reservation "v10_floads" 4 @@ -316,7 +325,7 @@ (define_insn_reservation "v10_c2v" 4 (and (eq_attr "vfp10" "yes") - (eq_attr "type" "r_2_f")) + (eq_attr "type" "f_mcr,f_mcrr")) "1020a_e+1020l_e+v10_ls1,v10_ls2") (define_insn_reservation "v10_fstores" 1 @@ -331,7 +340,7 @@ (define_insn_reservation "v10_v2c" 1 (and (eq_attr "vfp10" "yes") - (eq_attr "type" "f_2_r")) + (eq_attr "type" "f_mrc,f_mrrc")) "1020a_e+1020l_e,1020l_m,1020l_w") (define_insn_reservation "v10_to_cpsr" 2 diff --git a/gcc/config/arm/arm1026ejs.md b/gcc/config/arm/arm1026ejs.md index 9112122d67b..f5a0447f5da 100644 --- a/gcc/config/arm/arm1026ejs.md +++ b/gcc/config/arm/arm1026ejs.md @@ -66,14 +66,21 @@ ;; ALU operations with no shifted operand (define_insn_reservation "alu_op" 1 (and (eq_attr "tune" "arm1026ejs") - (eq_attr "type" "arlo_imm,arlo_reg,shift,shift_reg,\ - mov_imm,mov_reg,mvn_imm,mvn_reg")) + (eq_attr "type" "alu_imm,alus_imm,logic_imm,logics_imm,\ + alu_reg,alus_reg,logic_reg,logics_reg,\ + adc_imm,adcs_imm,adc_reg,adcs_reg,\ + adr,bfm,rev,\ + shift_imm,shift_reg,\ + mov_imm,mov_reg,mvn_imm,mvn_reg,\ + multiple,no_insn")) "a_e,a_m,a_w") ;; ALU operations with a shift-by-constant operand (define_insn_reservation "alu_shift_op" 1 (and (eq_attr "tune" "arm1026ejs") - (eq_attr "type" "extend,arlo_shift,mov_shift,mvn_shift")) + (eq_attr "type" "alu_shift_imm,alus_shift_imm,\ + logic_shift_imm,logics_shift_imm,\ + extend,mov_shift,mvn_shift")) "a_e,a_m,a_w") ;; ALU operations with a shift-by-register operand @@ -82,7 +89,9 @@ ;; the execute stage. (define_insn_reservation "alu_shift_reg_op" 2 (and (eq_attr "tune" "arm1026ejs") - (eq_attr "type" "arlo_shift_reg,mov_shift_reg,mvn_shift_reg")) + (eq_attr "type" "alu_shift_reg,alus_shift_reg,\ + logic_shift_reg,logics_shift_reg,\ + mov_shift_reg,mvn_shift_reg")) "a_e*2,a_m,a_w") ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; diff --git a/gcc/config/arm/arm1136jfs.md b/gcc/config/arm/arm1136jfs.md index f83b9d14f2b..f6e0b8da8b6 100644 --- a/gcc/config/arm/arm1136jfs.md +++ b/gcc/config/arm/arm1136jfs.md @@ -75,14 +75,21 @@ ;; ALU operations with no shifted operand (define_insn_reservation "11_alu_op" 2 (and (eq_attr "tune" "arm1136js,arm1136jfs") - (eq_attr "type" "arlo_imm,arlo_reg,shift,shift_reg,\ - mov_imm,mov_reg,mvn_imm,mvn_reg")) + (eq_attr "type" "alu_imm,alus_imm,logic_imm,logics_imm,\ + alu_reg,alus_reg,logic_reg,logics_reg,\ + adc_imm,adcs_imm,adc_reg,adcs_reg,\ + adr,bfm,rev,\ + shift_imm,shift_reg,\ + mov_imm,mov_reg,mvn_imm,mvn_reg,\ + multiple,no_insn")) "e_1,e_2,e_3,e_wb") ;; ALU operations with a shift-by-constant operand (define_insn_reservation "11_alu_shift_op" 2 (and (eq_attr "tune" "arm1136js,arm1136jfs") - (eq_attr "type" "extend,arlo_shift,mov_shift,mvn_shift")) + (eq_attr "type" "alu_shift_imm,alus_shift_imm,\ + logic_shift_imm,logics_shift_imm,\ + extend,mov_shift,mvn_shift")) "e_1,e_2,e_3,e_wb") ;; ALU operations with a shift-by-register operand @@ -91,7 +98,9 @@ ;; the shift stage. (define_insn_reservation "11_alu_shift_reg_op" 3 (and (eq_attr "tune" "arm1136js,arm1136jfs") - (eq_attr "type" "arlo_shift_reg,mov_shift_reg,mvn_shift_reg")) + (eq_attr "type" "alu_shift_reg,alus_shift_reg,\ + logic_shift_reg,logics_shift_reg,\ + mov_shift_reg,mvn_shift_reg")) "e_1*2,e_2,e_3,e_wb") ;; alu_ops can start sooner, if there is no shifter dependency diff --git a/gcc/config/arm/arm926ejs.md b/gcc/config/arm/arm926ejs.md index 8c38e86ce66..d2b0e9e3cf8 100644 --- a/gcc/config/arm/arm926ejs.md +++ b/gcc/config/arm/arm926ejs.md @@ -58,9 +58,16 @@ ;; ALU operations with no shifted operand (define_insn_reservation "9_alu_op" 1 (and (eq_attr "tune" "arm926ejs") - (eq_attr "type" "arlo_imm,arlo_reg,shift,shift_reg,extend,arlo_shift,\ + (eq_attr "type" "alu_imm,alus_imm,logic_imm,logics_imm,\ + alu_reg,alus_reg,logic_reg,logics_reg,\ + adc_imm,adcs_imm,adc_reg,adcs_reg,\ + adr,bfm,rev,\ + alu_shift_imm,alus_shift_imm,\ + logic_shift_imm,logics_shift_imm,\ + shift_imm,shift_reg,extend,\ mov_imm,mov_reg,mov_shift,\ - mvn_imm,mvn_reg,mvn_shift")) + mvn_imm,mvn_reg,mvn_shift,\ + multiple,no_insn")) "e,m,w") ;; ALU operations with a shift-by-register operand @@ -69,7 +76,9 @@ ;; the execute stage. (define_insn_reservation "9_alu_shift_reg_op" 2 (and (eq_attr "tune" "arm926ejs") - (eq_attr "type" "arlo_shift_reg,mov_shift_reg,mvn_shift_reg")) + (eq_attr "type" "alu_shift_reg,alus_shift_reg,\ + logic_shift_reg,logics_shift_reg,\ + mov_shift_reg,mvn_shift_reg")) "e*2,m,w") ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; diff --git a/gcc/config/arm/cortex-a15-neon.md b/gcc/config/arm/cortex-a15-neon.md index bfa2f5e8818..6eb8268321a 100644 --- a/gcc/config/arm/cortex-a15-neon.md +++ b/gcc/config/arm/cortex-a15-neon.md @@ -93,389 +93,345 @@ (define_insn_reservation "cortex_a15_neon_int_1" 5 (and (eq_attr "tune" "cortexa15") - (eq_attr "neon_type" "neon_int_1")) + (eq_attr "type" "neon_int_1")) "ca15_issue1,ca15_cx_ialu") (define_insn_reservation "cortex_a15_neon_int_2" 5 (and (eq_attr "tune" "cortexa15") - (eq_attr "neon_type" "neon_int_2")) + (eq_attr "type" "neon_int_2")) "ca15_issue1,ca15_cx_ialu") (define_insn_reservation "cortex_a15_neon_int_3" 5 (and (eq_attr "tune" "cortexa15") - (eq_attr "neon_type" "neon_int_3")) + (eq_attr "type" "neon_int_3")) "ca15_issue1,ca15_cx_ialu") (define_insn_reservation "cortex_a15_neon_int_4" 5 (and (eq_attr "tune" "cortexa15") - (eq_attr "neon_type" "neon_int_4")) + (eq_attr "type" "neon_int_4")) "ca15_issue1,ca15_cx_ialu") (define_insn_reservation "cortex_a15_neon_int_5" 5 (and (eq_attr "tune" "cortexa15") - (eq_attr "neon_type" "neon_int_5")) + (eq_attr "type" "neon_int_5")) "ca15_issue1,ca15_cx_ialu") (define_insn_reservation "cortex_a15_neon_vqneg_vqabs" 5 (and (eq_attr "tune" "cortexa15") - (eq_attr "neon_type" "neon_vqneg_vqabs")) + (eq_attr "type" "neon_vqneg_vqabs")) "ca15_issue1,ca15_cx_ialu") (define_insn_reservation "cortex_a15_neon_vmov" 5 (and (eq_attr "tune" "cortexa15") - (eq_attr "neon_type" "neon_vmov")) + (eq_attr "type" "neon_vmov")) "ca15_issue1,ca15_cx_ialu") (define_insn_reservation "cortex_a15_neon_vaba" 7 (and (eq_attr "tune" "cortexa15") - (eq_attr "neon_type" "neon_vaba")) + (eq_attr "type" "neon_vaba")) "ca15_issue1,ca15_cx_ialu_with_acc") (define_insn_reservation "cortex_a15_neon_vaba_qqq" 8 (and (eq_attr "tune" "cortexa15") - (eq_attr "neon_type" "neon_vaba_qqq")) + (eq_attr "type" "neon_vaba_qqq")) "ca15_issue2,ca15_cx_ialu_with_acc*2") (define_insn_reservation "cortex_a15_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long" 6 (and (eq_attr "tune" "cortexa15") - (eq_attr "neon_type" "neon_mul_ddd_8_16_qdd_16_8_long_32_16_long")) + (eq_attr "type" "neon_mul_ddd_8_16_qdd_16_8_long_32_16_long")) "ca15_issue1,ca15_cx_imac") (define_insn_reservation "cortex_a15_neon_mul_qqq_8_16_32_ddd_32" 7 (and (eq_attr "tune" "cortexa15") - (eq_attr "neon_type" "neon_mul_qqq_8_16_32_ddd_32")) + (eq_attr "type" "neon_mul_qqq_8_16_32_ddd_32")) "ca15_issue1,ca15_cx_imac*2") (define_insn_reservation "cortex_a15_neon_mul_qdd_64_32_long_qqd_16_ddd_32_scalar_64_32_long_scalar" 7 (and (eq_attr "tune" "cortexa15") - (eq_attr "neon_type" + (eq_attr "type" "neon_mul_qdd_64_32_long_qqd_16_ddd_32_scalar_64_32_long_scalar")) "ca15_issue1,ca15_cx_imac*2") (define_insn_reservation "cortex_a15_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long" 6 (and (eq_attr "tune" "cortexa15") - (eq_attr "neon_type" + (eq_attr "type" "neon_mla_ddd_8_16_qdd_16_8_long_32_16_long")) "ca15_issue1,ca15_cx_imac") (define_insn_reservation "cortex_a15_neon_mla_qqq_8_16" 7 (and (eq_attr "tune" "cortexa15") - (eq_attr "neon_type" + (eq_attr "type" "neon_mla_qqq_8_16")) "ca15_issue1,ca15_cx_imac*2") (define_insn_reservation "cortex_a15_neon_mla_ddd_32_qqd_16_ddd_32_scalar_\ - qdd_64_32_long_scalar_qdd_64_32_long" 7 + qdd_64_32_lotype_qdd_64_32_long" 7 (and (eq_attr "tune" "cortexa15") - (eq_attr "neon_type" - "neon_mla_ddd_32_qqd_16_ddd_32_scalar_qdd_64_32_long_scalar_qdd_64_32_long")) + (eq_attr "type" "neon_mla_ddd_32_qqd_16_ddd_32_scalar_qdd_64_32_long_scalar_qdd_64_32_long")) "ca15_issue1,ca15_cx_imac") (define_insn_reservation "cortex_a15_neon_mla_qqq_32_qqd_32_scalar" 7 (and (eq_attr "tune" "cortexa15") - (eq_attr "neon_type" - "neon_mla_qqq_32_qqd_32_scalar")) + (eq_attr "type" "neon_mla_qqq_32_qqd_32_scalar")) "ca15_issue1,ca15_cx_imac*2") (define_insn_reservation "cortex_a15_neon_mul_ddd_16_scalar_32_16_long_scalar" 6 (and (eq_attr "tune" "cortexa15") - (eq_attr "neon_type" - "neon_mul_ddd_16_scalar_32_16_long_scalar")) + (eq_attr "type" "neon_mul_ddd_16_scalar_32_16_long_scalar")) "ca15_issue1,ca15_cx_imac") (define_insn_reservation "cortex_a15_neon_mul_qqd_32_scalar" 7 (and (eq_attr "tune" "cortexa15") - (eq_attr "neon_type" - "neon_mul_qqd_32_scalar")) + (eq_attr "type" "neon_mul_qqd_32_scalar")) "ca15_issue1,ca15_cx_imac*2") (define_insn_reservation "cortex_a15_neon_mla_ddd_16_scalar_qdd_32_16_long_scalar" 6 (and (eq_attr "tune" "cortexa15") - (eq_attr "neon_type" - "neon_mla_ddd_16_scalar_qdd_32_16_long_scalar")) + (eq_attr "type" "neon_mla_ddd_16_scalar_qdd_32_16_long_scalar")) "ca15_issue1,ca15_cx_imac") (define_insn_reservation "cortex_a15_neon_shift_1" 5 (and (eq_attr "tune" "cortexa15") - (eq_attr "neon_type" - "neon_shift_1")) + (eq_attr "type" "neon_shift_1")) "ca15_issue1,ca15_cx_ik+ca15_cx_ishf") (define_insn_reservation "cortex_a15_neon_shift_2" 5 (and (eq_attr "tune" "cortexa15") - (eq_attr "neon_type" - "neon_shift_2")) + (eq_attr "type" "neon_shift_2")) "ca15_issue1,ca15_cx_ik+ca15_cx_ishf") (define_insn_reservation "cortex_a15_neon_shift_3" 6 (and (eq_attr "tune" "cortexa15") - (eq_attr "neon_type" - "neon_shift_3")) + (eq_attr "type" "neon_shift_3")) "ca15_issue2,(ca15_cx_ik+ca15_cx_ishf)*2") (define_insn_reservation "cortex_a15_neon_vshl_ddd" 5 (and (eq_attr "tune" "cortexa15") - (eq_attr "neon_type" - "neon_vshl_ddd")) + (eq_attr "type" "neon_vshl_ddd")) "ca15_issue1,ca15_cx_ik+ca15_cx_ishf") (define_insn_reservation "cortex_a15_neon_vqshl_vrshl_vqrshl_qqq" 6 (and (eq_attr "tune" "cortexa15") - (eq_attr "neon_type" - "neon_vqshl_vrshl_vqrshl_qqq")) + (eq_attr "type" "neon_vqshl_vrshl_vqrshl_qqq")) "ca15_issue2,(ca15_cx_ik+ca15_cx_ishf)*2") (define_insn_reservation "cortex_a15_neon_vsra_vrsra" 7 (and (eq_attr "tune" "cortexa15") - (eq_attr "neon_type" - "neon_vsra_vrsra")) + (eq_attr "type" "neon_vsra_vrsra")) "ca15_issue1,ca15_cx_ishf_with_acc") (define_insn_reservation "cortex_a15_neon_fp_vadd_ddd_vabs_dd" 6 (and (eq_attr "tune" "cortexa15") - (eq_attr "neon_type" - "neon_fp_vadd_ddd_vabs_dd")) + (eq_attr "type" "neon_fp_vadd_ddd_vabs_dd")) "ca15_issue1,ca15_cx_falu") (define_insn_reservation "cortex_a15_neon_fp_vadd_qqq_vabs_qq" 7 (and (eq_attr "tune" "cortexa15") - (eq_attr "neon_type" - "neon_fp_vadd_qqq_vabs_qq")) + (eq_attr "type" "neon_fp_vadd_qqq_vabs_qq")) "ca15_issue2,ca15_cx_falu_2") (define_insn_reservation "cortex_a15_neon_fp_vmul_ddd" 5 (and (eq_attr "tune" "cortexa15") - (eq_attr "neon_type" - "neon_fp_vmul_ddd")) + (eq_attr "type" "neon_fp_vmul_ddd")) "ca15_issue1,ca15_cx_fmul") (define_insn_reservation "cortex_a15_neon_fp_vmul_qqd" 6 (and (eq_attr "tune" "cortexa15") - (eq_attr "neon_type" - "neon_fp_vmul_qqd")) + (eq_attr "type" "neon_fp_vmul_qqd")) "ca15_issue2,ca15_cx_fmul_2") (define_insn_reservation "cortex_a15_neon_fp_vmla_ddd" 9 (and (eq_attr "tune" "cortexa15") - (eq_attr "neon_type" - "neon_fp_vmla_ddd")) + (eq_attr "type" "neon_fp_vmla_ddd")) "ca15_issue1,ca15_cx_fmac") (define_insn_reservation "cortex_a15_neon_fp_vmla_qqq" 11 (and (eq_attr "tune" "cortexa15") - (eq_attr "neon_type" - "neon_fp_vmla_qqq")) + (eq_attr "type" "neon_fp_vmla_qqq")) "ca15_issue2,ca15_cx_fmac_2") (define_insn_reservation "cortex_a15_neon_fp_vmla_ddd_scalar" 9 (and (eq_attr "tune" "cortexa15") - (eq_attr "neon_type" - "neon_fp_vmla_ddd_scalar")) + (eq_attr "type" "neon_fp_vmla_ddd_scalar")) "ca15_issue1,ca15_cx_fmac") (define_insn_reservation "cortex_a15_neon_fp_vmla_qqq_scalar" 11 (and (eq_attr "tune" "cortexa15") - (eq_attr "neon_type" - "neon_fp_vmla_qqq_scalar")) + (eq_attr "type" "neon_fp_vmla_qqq_scalar")) "ca15_issue2,ca15_cx_fmac_2") (define_insn_reservation "cortex_a15_neon_fp_vrecps_vrsqrts_ddd" 9 (and (eq_attr "tune" "cortexa15") - (eq_attr "neon_type" - "neon_fp_vrecps_vrsqrts_ddd")) + (eq_attr "type" "neon_fp_vrecps_vrsqrts_ddd")) "ca15_issue1,ca15_cx_fmac") (define_insn_reservation "cortex_a15_neon_fp_vrecps_vrsqrts_qqq" 11 (and (eq_attr "tune" "cortexa15") - (eq_attr "neon_type" - "neon_fp_vrecps_vrsqrts_qqq")) + (eq_attr "type" "neon_fp_vrecps_vrsqrts_qqq")) "ca15_issue2,ca15_cx_fmac_2") (define_insn_reservation "cortex_a15_neon_bp_simple" 4 (and (eq_attr "tune" "cortexa15") - (eq_attr "neon_type" - "neon_bp_simple")) + (eq_attr "type" "neon_bp_simple")) "ca15_issue3,ca15_ls+ca15_cx_perm_2,ca15_cx_perm") (define_insn_reservation "cortex_a15_neon_bp_2cycle" 4 (and (eq_attr "tune" "cortexa15") - (eq_attr "neon_type" - "neon_bp_2cycle")) + (eq_attr "type" "neon_bp_2cycle")) "ca15_issue1,ca15_cx_perm") (define_insn_reservation "cortex_a15_neon_bp_3cycle" 7 (and (eq_attr "tune" "cortexa15") - (eq_attr "neon_type" - "neon_bp_3cycle")) + (eq_attr "type" "neon_bp_3cycle")) "ca15_issue3,ca15_cx_ialu+ca15_cx_perm_2,ca15_cx_perm") (define_insn_reservation "cortex_a15_neon_vld1_1_2_regs" 7 (and (eq_attr "tune" "cortexa15") - (eq_attr "neon_type" - "neon_vld1_1_2_regs")) + (eq_attr "type" "neon_vld1_1_2_regs")) "ca15_issue2,ca15_ls,ca15_ldr") (define_insn_reservation "cortex_a15_neon_vld1_3_4_regs" 8 (and (eq_attr "tune" "cortexa15") - (eq_attr "neon_type" - "neon_vld1_3_4_regs")) + (eq_attr "type" "neon_vld1_3_4_regs")) "ca15_issue3,ca15_ls1+ca15_ls2,ca15_ldr,ca15_ldr") (define_insn_reservation "cortex_a15_neon_vld2_2_regs_vld1_vld2_all_lanes" 9 (and (eq_attr "tune" "cortexa15") - (eq_attr "neon_type" - "neon_vld2_2_regs_vld1_vld2_all_lanes")) + (eq_attr "type" "neon_vld2_2_regs_vld1_vld2_all_lanes")) "ca15_issue3,ca15_ls,ca15_ldr") (define_insn_reservation "cortex_a15_neon_vld2_4_regs" 12 (and (eq_attr "tune" "cortexa15") - (eq_attr "neon_type" - "neon_vld2_4_regs")) + (eq_attr "type" "neon_vld2_4_regs")) "ca15_issue3,ca15_issue3+ca15_ls1+ca15_ls2,ca15_ldr*2") (define_insn_reservation "cortex_a15_neon_vld3_vld4" 12 (and (eq_attr "tune" "cortexa15") - (eq_attr "neon_type" - "neon_vld3_vld4")) + (eq_attr "type" "neon_vld3_vld4")) "ca15_issue3,ca15_issue3+ca15_ls1+ca15_ls2,ca15_ldr*2") (define_insn_reservation "cortex_a15_neon_vst1_1_2_regs_vst2_2_regs" 0 (and (eq_attr "tune" "cortexa15") - (eq_attr "neon_type" - "neon_vst1_1_2_regs_vst2_2_regs")) + (eq_attr "type" "neon_vst1_1_2_regs_vst2_2_regs")) "ca15_issue3,ca15_issue3+ca15_cx_perm+ca15_ls1+ca15_ls2,ca15_str*2") (define_insn_reservation "cortex_a15_neon_vst1_3_4_regs" 0 (and (eq_attr "tune" "cortexa15") - (eq_attr "neon_type" - "neon_vst1_3_4_regs")) + (eq_attr "type" "neon_vst1_3_4_regs")) "ca15_issue3,ca15_issue3+ca15_ls1+ca15_ls2,ca15_str*3") (define_insn_reservation "cortex_a15_neon_vst2_4_regs_vst3_vst4" 0 (and (eq_attr "tune" "cortexa15") - (eq_attr "neon_type" - "neon_vst2_4_regs_vst3_vst4")) + (eq_attr "type" "neon_vst2_4_regs_vst3_vst4")) "ca15_issue3,ca15_issue3+ca15_cx_perm_2+ca15_ls1+ca15_ls2,\ ca15_issue3+ca15_str,ca15_str*3") (define_insn_reservation "cortex_a15_neon_vst3_vst4" 0 (and (eq_attr "tune" "cortexa15") - (eq_attr "neon_type" - "neon_vst3_vst4")) + (eq_attr "type" "neon_vst3_vst4")) "ca15_issue3,ca15_issue3+ca15_cx_perm_2+ca15_ls1+ca15_ls2,ca15_str*4") (define_insn_reservation "cortex_a15_neon_vld1_vld2_lane" 9 (and (eq_attr "tune" "cortexa15") - (eq_attr "neon_type" - "neon_vld1_vld2_lane")) + (eq_attr "type" "neon_vld1_vld2_lane")) "ca15_issue3,ca15_ls,ca15_ldr") (define_insn_reservation "cortex_a15_neon_vld3_vld4_lane" 10 (and (eq_attr "tune" "cortexa15") - (eq_attr "neon_type" - "neon_vld3_vld4_lane")) + (eq_attr "type" "neon_vld3_vld4_lane")) "ca15_issue3,ca15_issue3+ca15_ls,ca15_issue3+ca15_ldr") (define_insn_reservation "cortex_a15_neon_vst1_vst2_lane" 0 (and (eq_attr "tune" "cortexa15") - (eq_attr "neon_type" - "neon_vst1_vst2_lane")) + (eq_attr "type" "neon_vst1_vst2_lane")) "ca15_issue3,ca15_cx_perm+ca15_ls,ca15_str") (define_insn_reservation "cortex_a15_neon_vst3_vst4_lane" 0 (and (eq_attr "tune" "cortexa15") - (eq_attr "neon_type" - "neon_vst3_vst4_lane")) + (eq_attr "type" "neon_vst3_vst4_lane")) "ca15_issue3,ca15_issue3+ca15_cx_perm+ca15_ls1+ca15_ls2,ca15_str*2") (define_insn_reservation "cortex_a15_neon_vld3_vld4_all_lanes" 11 (and (eq_attr "tune" "cortexa15") - (eq_attr "neon_type" - "neon_vld3_vld4_all_lanes")) + (eq_attr "type" "neon_vld3_vld4_all_lanes")) "ca15_issue3,ca15_issue3+ca15_ls,ca15_ldr") (define_insn_reservation "cortex_a15_neon_ldm_2" 20 (and (eq_attr "tune" "cortexa15") - (eq_attr "neon_type" - "neon_ldm_2")) + (eq_attr "type" "neon_ldm_2")) "ca15_issue3*6") (define_insn_reservation "cortex_a15_neon_stm_2" 0 (and (eq_attr "tune" "cortexa15") - (eq_attr "neon_type" - "neon_stm_2")) + (eq_attr "type" "neon_stm_2")) "ca15_issue3*6") (define_insn_reservation "cortex_a15_neon_mcr" 6 (and (eq_attr "tune" "cortexa15") - (eq_attr "neon_type" - "neon_mcr")) + (eq_attr "type" "neon_mcr")) "ca15_issue2,ca15_ls,ca15_cx_perm") (define_insn_reservation "cortex_a15_neon_mcr_2_mcrr" 6 (and (eq_attr "tune" "cortexa15") - (eq_attr "neon_type" - "neon_mcr_2_mcrr")) + (eq_attr "type" "neon_mcr_2_mcrr")) "ca15_issue2,ca15_ls1+ca15_ls2") (define_insn_reservation "cortex_a15_neon_mrc" 5 (and (eq_attr "tune" "cortexa15") - (eq_attr "neon_type" - "neon_mrc")) + (eq_attr "type" "neon_mrc")) "ca15_issue1,ca15_ls") (define_insn_reservation "cortex_a15_neon_mrrc" 6 (and (eq_attr "tune" "cortexa15") - (eq_attr "neon_type" - "neon_mrrc")) + (eq_attr "type" "neon_mrrc")) "ca15_issue2,ca15_ls1+ca15_ls2") (define_insn_reservation "cortex_a15_vfp_const" 4 @@ -515,7 +471,7 @@ (define_insn_reservation "cortex_a15_vfp_cvt" 6 (and (eq_attr "tune" "cortexa15") - (eq_attr "type" "f_cvt")) + (eq_attr "type" "f_cvt,f_cvtf2i,f_cvti2f")) "ca15_issue1,ca15_cx_vfp") (define_insn_reservation "cortex_a15_vfp_cmpd" 8 @@ -535,7 +491,7 @@ (define_insn_reservation "cortex_a15_vfp_cpys" 4 (and (eq_attr "tune" "cortexa15") - (eq_attr "type" "fcpys")) + (eq_attr "type" "fmov")) "ca15_issue1,ca15_cx_perm") (define_insn_reservation "cortex_a15_vfp_ariths" 7 @@ -545,12 +501,12 @@ (define_insn_reservation "cortex_a15_vfp_divs" 10 (and (eq_attr "tune" "cortexa15") - (eq_attr "type" "fdivs")) + (eq_attr "type" "fdivs, fsqrts")) "ca15_issue1,ca15_cx_ik") (define_insn_reservation "cortex_a15_vfp_divd" 18 (and (eq_attr "tune" "cortexa15") - (eq_attr "type" "fdivd")) + (eq_attr "type" "fdivd, fsqrtd")) "ca15_issue1,ca15_cx_ik") ;; Define bypasses. diff --git a/gcc/config/arm/cortex-a15.md b/gcc/config/arm/cortex-a15.md index 4ad87121d6d..ccad6207608 100644 --- a/gcc/config/arm/cortex-a15.md +++ b/gcc/config/arm/cortex-a15.md @@ -61,25 +61,32 @@ ;; Simple ALU without shift (define_insn_reservation "cortex_a15_alu" 2 (and (eq_attr "tune" "cortexa15") - (and (eq_attr "type" "arlo_imm,arlo_reg,shift,shift_reg,\ - mov_imm,mov_reg,\ - mvn_imm,mvn_reg") - (eq_attr "neon_type" "none"))) + (eq_attr "type" "alu_imm,alus_imm,logic_imm,logics_imm,\ + alu_reg,alus_reg,logic_reg,logics_reg,\ + adc_imm,adcs_imm,adc_reg,adcs_reg,\ + adr,bfm,rev,\ + shift_imm,shift_reg,\ + mov_imm,mov_reg,\ + mvn_imm,mvn_reg,\ + mrs,multiple,no_insn")) "ca15_issue1,(ca15_sx1,ca15_sx1_alu)|(ca15_sx2,ca15_sx2_alu)") ;; ALU ops with immediate shift (define_insn_reservation "cortex_a15_alu_shift" 3 (and (eq_attr "tune" "cortexa15") - (and (eq_attr "type" "extend,arlo_shift,,mov_shift,mvn_shift") - (eq_attr "neon_type" "none"))) + (eq_attr "type" "extend,\ + alu_shift_imm,alus_shift_imm,\ + logic_shift_imm,logics_shift_imm,\ + mov_shift,mvn_shift")) "ca15_issue1,(ca15_sx1,ca15_sx1+ca15_sx1_shf,ca15_sx1_alu)\ |(ca15_sx2,ca15_sx2+ca15_sx2_shf,ca15_sx2_alu)") ;; ALU ops with register controlled shift (define_insn_reservation "cortex_a15_alu_shift_reg" 3 (and (eq_attr "tune" "cortexa15") - (and (eq_attr "type" "arlo_shift_reg,mov_shift_reg,mvn_shift_reg") - (eq_attr "neon_type" "none"))) + (eq_attr "type" "alu_shift_reg,alus_shift_reg,\ + logic_shift_reg,logics_shift_reg,\ + mov_shift_reg,mvn_shift_reg")) "(ca15_issue2,ca15_sx1+ca15_sx2,ca15_sx1_shf,ca15_sx2_alu)\ |(ca15_issue1,(ca15_issue1+ca15_sx2,ca15_sx1+ca15_sx2_shf)\ |(ca15_issue1+ca15_sx1,ca15_sx1+ca15_sx1_shf),ca15_sx1_alu)") @@ -89,15 +96,13 @@ ;; 32-bit multiplies (define_insn_reservation "cortex_a15_mult32" 3 (and (eq_attr "tune" "cortexa15") - (and (eq_attr "mul32" "yes") - (eq_attr "neon_type" "none"))) + (eq_attr "mul32" "yes")) "ca15_issue1,ca15_mx") ;; 64-bit multiplies (define_insn_reservation "cortex_a15_mult64" 4 (and (eq_attr "tune" "cortexa15") - (and (eq_attr "mul64" "yes") - (eq_attr "neon_type" "none"))) + (eq_attr "mul64" "yes")) "ca15_issue1,ca15_mx*2") ;; Integer divide @@ -114,8 +119,7 @@ ;; Block all issue pipes for a cycle (define_insn_reservation "cortex_a15_block" 1 (and (eq_attr "tune" "cortexa15") - (and (eq_attr "type" "block") - (eq_attr "neon_type" "none"))) + (eq_attr "type" "block")) "ca15_issue3") ;; Branch execution Unit @@ -124,8 +128,7 @@ ;; No latency as there is no result (define_insn_reservation "cortex_a15_branch" 0 (and (eq_attr "tune" "cortexa15") - (and (eq_attr "type" "branch") - (eq_attr "neon_type" "none"))) + (eq_attr "type" "branch")) "ca15_issue1,ca15_bx") ;; Load-store execution Unit @@ -133,29 +136,25 @@ ;; Loads of up to two words. (define_insn_reservation "cortex_a15_load1" 4 (and (eq_attr "tune" "cortexa15") - (and (eq_attr "type" "load_byte,load1,load2") - (eq_attr "neon_type" "none"))) + (eq_attr "type" "load_byte,load1,load2")) "ca15_issue1,ca15_ls,ca15_ldr,nothing") ;; Loads of three or four words. (define_insn_reservation "cortex_a15_load3" 5 (and (eq_attr "tune" "cortexa15") - (and (eq_attr "type" "load3,load4") - (eq_attr "neon_type" "none"))) + (eq_attr "type" "load3,load4")) "ca15_issue2,ca15_ls1+ca15_ls2,ca15_ldr,ca15_ldr,nothing") ;; Stores of up to two words. (define_insn_reservation "cortex_a15_store1" 0 (and (eq_attr "tune" "cortexa15") - (and (eq_attr "type" "store1,store2") - (eq_attr "neon_type" "none"))) + (eq_attr "type" "store1,store2")) "ca15_issue1,ca15_ls,ca15_str") ;; Stores of three or four words. (define_insn_reservation "cortex_a15_store3" 0 (and (eq_attr "tune" "cortexa15") - (and (eq_attr "type" "store3,store4") - (eq_attr "neon_type" "none"))) + (eq_attr "type" "store3,store4")) "ca15_issue2,ca15_ls1+ca15_ls2,ca15_str,ca15_str") ;; We include Neon.md here to ensure that the branch can block the Neon units. @@ -165,8 +164,7 @@ ;; pipeline. The result however is available the next cycle. (define_insn_reservation "cortex_a15_call" 1 (and (eq_attr "tune" "cortexa15") - (and (eq_attr "type" "call") - (eq_attr "neon_type" "none"))) + (eq_attr "type" "call")) "ca15_issue3,\ ca15_sx1+ca15_sx2+ca15_bx+ca15_mx+ca15_cx_ij+ca15_cx_ik+ca15_ls1+ca15_ls2+\ ca15_cx_imac1+ca15_cx_ialu1+ca15_cx_ialu2+ca15_cx_ishf+\ diff --git a/gcc/config/arm/cortex-a5.md b/gcc/config/arm/cortex-a5.md index 1400c47d95a..22e0a08f38e 100644 --- a/gcc/config/arm/cortex-a5.md +++ b/gcc/config/arm/cortex-a5.md @@ -58,13 +58,22 @@ (define_insn_reservation "cortex_a5_alu" 2 (and (eq_attr "tune" "cortexa5") - (eq_attr "type" "arlo_imm,arlo_reg,shift,shift_reg,\ - mov_imm,mov_reg,mvn_imm,mvn_reg")) + (eq_attr "type" "alu_imm,alus_imm,logic_imm,logics_imm,\ + alu_reg,alus_reg,logic_reg,logics_reg,\ + adc_imm,adcs_imm,adc_reg,adcs_reg,\ + adr,bfm,rev,\ + shift_imm,shift_reg,\ + mov_imm,mov_reg,mvn_imm,mvn_reg,\ + mrs,multiple,no_insn")) "cortex_a5_ex1") (define_insn_reservation "cortex_a5_alu_shift" 2 (and (eq_attr "tune" "cortexa5") - (eq_attr "type" "extend,arlo_shift,arlo_shift_reg,\ + (eq_attr "type" "extend,\ + alu_shift_imm,alus_shift_imm,\ + logic_shift_imm,logics_shift_imm,\ + alu_shift_reg,alus_shift_reg,\ + logic_shift_reg,logics_shift_reg,\ mov_shift,mov_shift_reg,\ mvn_shift,mvn_shift_reg")) "cortex_a5_ex1") @@ -159,7 +168,8 @@ (define_insn_reservation "cortex_a5_fpalu" 4 (and (eq_attr "tune" "cortexa5") - (eq_attr "type" "ffariths, fadds, ffarithd, faddd, fcpys, fmuls, f_cvt,\ + (eq_attr "type" "ffariths, fadds, ffarithd, faddd, fmov, fmuls,\ + f_cvt,f_cvtf2i,f_cvti2f,\ fcmps, fcmpd")) "cortex_a5_ex1+cortex_a5_fpadd_pipe") @@ -223,14 +233,14 @@ (define_insn_reservation "cortex_a5_fdivs" 14 (and (eq_attr "tune" "cortexa5") - (eq_attr "type" "fdivs")) + (eq_attr "type" "fdivs, fsqrts")) "cortex_a5_ex1, cortex_a5_fp_div_sqrt * 13") ;; ??? Similarly for fdivd. (define_insn_reservation "cortex_a5_fdivd" 29 (and (eq_attr "tune" "cortexa5") - (eq_attr "type" "fdivd")) + (eq_attr "type" "fdivd, fsqrtd")) "cortex_a5_ex1, cortex_a5_fp_div_sqrt * 28") ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; @@ -243,12 +253,12 @@ (define_insn_reservation "cortex_a5_r2f" 4 (and (eq_attr "tune" "cortexa5") - (eq_attr "type" "r_2_f")) + (eq_attr "type" "f_mcr,f_mcrr")) "cortex_a5_ex1") (define_insn_reservation "cortex_a5_f2r" 2 (and (eq_attr "tune" "cortexa5") - (eq_attr "type" "f_2_r")) + (eq_attr "type" "f_mrc,f_mrrc")) "cortex_a5_ex1") ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; diff --git a/gcc/config/arm/cortex-a53.md b/gcc/config/arm/cortex-a53.md index 2f9107994c9..48d0d03853f 100644 --- a/gcc/config/arm/cortex-a53.md +++ b/gcc/config/arm/cortex-a53.md @@ -67,14 +67,22 @@ (define_insn_reservation "cortex_a53_alu" 2 (and (eq_attr "tune" "cortexa53") - (eq_attr "type" "arlo_imm,arlo_reg,shift,shift_reg,\ - mov_imm,mov_reg,mvn_imm,mvn_reg")) + (eq_attr "type" "alu_imm,alus_imm,logic_imm,logics_imm,\ + alu_reg,alus_reg,logic_reg,logics_reg,\ + adc_imm,adcs_imm,adc_reg,adcs_reg,\ + adr,bfm,csel,rev,\ + shift_imm,shift_reg,\ + mov_imm,mov_reg,mvn_imm,mvn_reg,\ + mrs,multiple,no_insn")) "cortex_a53_slot_any") (define_insn_reservation "cortex_a53_alu_shift" 2 (and (eq_attr "tune" "cortexa53") - (eq_attr "type" "arlo_shift,arlo_shift_reg,\ - mov_shift,mov_shift_reg,\ + (eq_attr "type" "alu_shift_imm,alus_shift_imm,\ + logic_shift_imm,logics_shift_imm,\ + alu_shift_reg,alus_shift_reg,\ + logic_shift_reg,logics_shift_reg,\ + extend,mov_shift,mov_shift_reg,\ mvn_shift,mvn_shift_reg")) "cortex_a53_slot_any") @@ -130,12 +138,12 @@ (define_insn_reservation "cortex_a53_load1" 3 (and (eq_attr "tune" "cortexa53") - (eq_attr "type" "load_byte,load1")) + (eq_attr "type" "load_byte,load1,load_acq")) "cortex_a53_slot_any+cortex_a53_ls") (define_insn_reservation "cortex_a53_store1" 2 (and (eq_attr "tune" "cortexa53") - (eq_attr "type" "store1")) + (eq_attr "type" "store1,store_rel")) "cortex_a53_slot_any+cortex_a53_ls+cortex_a53_store") (define_insn_reservation "cortex_a53_load2" 3 @@ -201,8 +209,9 @@ (define_insn_reservation "cortex_a53_fpalu" 4 (and (eq_attr "tune" "cortexa53") - (eq_attr "type" "ffariths, fadds, ffarithd, faddd, fcpys, fmuls, f_cvt,\ - fcmps, fcmpd")) + (eq_attr "type" "ffariths, fadds, ffarithd, faddd, fmov, fmuls,\ + f_cvt,f_cvtf2i,f_cvti2f,\ + fcmps, fcmpd, fcsel")) "cortex_a53_slot0+cortex_a53_fpadd_pipe") (define_insn_reservation "cortex_a53_fconst" 2 @@ -230,12 +239,12 @@ (define_insn_reservation "cortex_a53_fdivs" 14 (and (eq_attr "tune" "cortexa53") - (eq_attr "type" "fdivs")) + (eq_attr "type" "fdivs, fsqrts")) "cortex_a53_slot0, cortex_a53_fp_div_sqrt * 13") (define_insn_reservation "cortex_a53_fdivd" 29 (and (eq_attr "tune" "cortexa53") - (eq_attr "type" "fdivd")) + (eq_attr "type" "fdivd, fsqrtd")) "cortex_a53_slot0, cortex_a53_fp_div_sqrt * 28") ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; @@ -244,12 +253,12 @@ (define_insn_reservation "cortex_a53_r2f" 4 (and (eq_attr "tune" "cortexa53") - (eq_attr "type" "r_2_f")) + (eq_attr "type" "f_mcr,f_mcrr")) "cortex_a53_slot0") (define_insn_reservation "cortex_a53_f2r" 2 (and (eq_attr "tune" "cortexa53") - (eq_attr "type" "f_2_r")) + (eq_attr "type" "f_mrc,f_mrrc")) "cortex_a53_slot0") ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; diff --git a/gcc/config/arm/cortex-a7.md b/gcc/config/arm/cortex-a7.md index e14413d5083..a72a88d90af 100644 --- a/gcc/config/arm/cortex-a7.md +++ b/gcc/config/arm/cortex-a7.md @@ -67,8 +67,7 @@ (define_insn_reservation "cortex_a7_branch" 0 (and (eq_attr "tune" "cortexa7") - (and (eq_attr "type" "branch") - (eq_attr "neon_type" "none"))) + (eq_attr "type" "branch")) "(cortex_a7_ex2|cortex_a7_ex1)+cortex_a7_branch") ;; Call cannot dual-issue as an older instruction. It can dual-issue @@ -77,8 +76,7 @@ ;; cycle. (define_insn_reservation "cortex_a7_call" 1 (and (eq_attr "tune" "cortexa7") - (and (eq_attr "type" "call") - (eq_attr "neon_type" "none"))) + (eq_attr "type" "call")) "(cortex_a7_ex2|cortex_a7_both)+cortex_a7_branch") ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; @@ -88,27 +86,31 @@ ;; ALU instruction with an immediate operand can dual-issue. (define_insn_reservation "cortex_a7_alu_imm" 2 (and (eq_attr "tune" "cortexa7") - (and (ior (eq_attr "type" "arlo_imm,mov_imm,mvn_imm") - (ior (eq_attr "type" "extend") - (and (eq_attr "type" "mov_reg,mov_shift,mov_shift_reg") - (not (eq_attr "length" "8"))))) - (eq_attr "neon_type" "none"))) + (ior (eq_attr "type" "adr,alu_imm,alus_imm,logic_imm,logics_imm,\ + mov_imm,mvn_imm,extend") + (and (eq_attr "type" "mov_reg,mov_shift,mov_shift_reg") + (not (eq_attr "length" "8"))))) "cortex_a7_ex2|cortex_a7_ex1") ;; ALU instruction with register operands can dual-issue ;; with a younger immediate-based instruction. (define_insn_reservation "cortex_a7_alu_reg" 2 (and (eq_attr "tune" "cortexa7") - (and (eq_attr "type" "arlo_reg,shift,shift_reg,mov_reg,mvn_reg") - (eq_attr "neon_type" "none"))) + (eq_attr "type" "alu_reg,alus_reg,logic_reg,logics_reg,\ + adc_imm,adcs_imm,adc_reg,adcs_reg,\ + bfm,rev,\ + shift_imm,shift_reg,mov_reg,mvn_reg")) "cortex_a7_ex1") (define_insn_reservation "cortex_a7_alu_shift" 2 (and (eq_attr "tune" "cortexa7") - (and (eq_attr "type" "arlo_shift,arlo_shift_reg,\ - mov_shift,mov_shift_reg,\ - mvn_shift,mvn_shift_reg") - (eq_attr "neon_type" "none"))) + (eq_attr "type" "alu_shift_imm,alus_shift_imm,\ + logic_shift_imm,logics_shift_imm,\ + alu_shift_reg,alus_shift_reg,\ + logic_shift_reg,logics_shift_reg,\ + mov_shift,mov_shift_reg,\ + mvn_shift,mvn_shift_reg,\ + mrs,multiple,no_insn")) "cortex_a7_ex1") ;; Forwarding path for unshifted operands. @@ -129,9 +131,8 @@ (define_insn_reservation "cortex_a7_mul" 2 (and (eq_attr "tune" "cortexa7") - (and (eq_attr "neon_type" "none") - (ior (eq_attr "mul32" "yes") - (eq_attr "mul64" "yes")))) + (ior (eq_attr "mul32" "yes") + (eq_attr "mul64" "yes"))) "cortex_a7_both") ;; Forward the result of a multiply operation to the accumulator @@ -156,50 +157,42 @@ (define_insn_reservation "cortex_a7_load1" 2 (and (eq_attr "tune" "cortexa7") - (and (eq_attr "type" "load_byte,load1") - (eq_attr "neon_type" "none"))) + (eq_attr "type" "load_byte,load1")) "cortex_a7_ex1") (define_insn_reservation "cortex_a7_store1" 0 (and (eq_attr "tune" "cortexa7") - (and (eq_attr "type" "store1") - (eq_attr "neon_type" "none"))) + (eq_attr "type" "store1")) "cortex_a7_ex1") (define_insn_reservation "cortex_a7_load2" 2 (and (eq_attr "tune" "cortexa7") - (and (eq_attr "type" "load2") - (eq_attr "neon_type" "none"))) + (eq_attr "type" "load2")) "cortex_a7_both") (define_insn_reservation "cortex_a7_store2" 0 (and (eq_attr "tune" "cortexa7") - (and (eq_attr "type" "store2") - (eq_attr "neon_type" "none"))) + (eq_attr "type" "store2")) "cortex_a7_both") (define_insn_reservation "cortex_a7_load3" 3 (and (eq_attr "tune" "cortexa7") - (and (eq_attr "type" "load3") - (eq_attr "neon_type" "none"))) + (eq_attr "type" "load3")) "cortex_a7_both, cortex_a7_ex1") (define_insn_reservation "cortex_a7_store3" 0 (and (eq_attr "tune" "cortexa7") - (and (eq_attr "type" "store4") - (eq_attr "neon_type" "none"))) + (eq_attr "type" "store4")) "cortex_a7_both, cortex_a7_ex1") (define_insn_reservation "cortex_a7_load4" 3 (and (eq_attr "tune" "cortexa7") - (and (eq_attr "type" "load4") - (eq_attr "neon_type" "none"))) + (eq_attr "type" "load4")) "cortex_a7_both, cortex_a7_both") (define_insn_reservation "cortex_a7_store4" 0 (and (eq_attr "tune" "cortexa7") - (and (eq_attr "type" "store3") - (eq_attr "neon_type" "none"))) + (eq_attr "type" "store3")) "cortex_a7_both, cortex_a7_both") ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; @@ -211,9 +204,8 @@ (define_insn_reservation "cortex_a7_fpalu" 4 (and (eq_attr "tune" "cortexa7") - (and (eq_attr "type" "ffariths, fadds, ffarithd, faddd, fcpys,\ - f_cvt, fcmps, fcmpd") - (eq_attr "neon_type" "none"))) + (eq_attr "type" "ffariths, fadds, ffarithd, faddd, fmov,\ + f_cvt, f_cvtf2i, f_cvti2f, fcmps, fcmpd")) "cortex_a7_ex1+cortex_a7_fpadd_pipe") ;; For fconsts and fconstd, 8-bit immediate data is passed directly from @@ -221,8 +213,7 @@ (define_insn_reservation "cortex_a7_fconst" 3 (and (eq_attr "tune" "cortexa7") - (and (eq_attr "type" "fconsts,fconstd") - (eq_attr "neon_type" "none"))) + (eq_attr "type" "fconsts,fconstd")) "cortex_a7_ex1+cortex_a7_fpadd_pipe") ;; We should try not to attempt to issue a single-precision multiplication in @@ -231,13 +222,12 @@ (define_insn_reservation "cortex_a7_fpmuls" 4 (and (eq_attr "tune" "cortexa7") - (and (eq_attr "type" "fmuls") - (eq_attr "neon_type" "none"))) + (eq_attr "type" "fmuls")) "cortex_a7_ex1+cortex_a7_fpmul_pipe") (define_insn_reservation "cortex_a7_neon_mul" 4 (and (eq_attr "tune" "cortexa7") - (eq_attr "neon_type" + (eq_attr "type" "neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ neon_mul_qqq_8_16_32_ddd_32,\ neon_mul_qdd_64_32_long_qqd_16_ddd_32_scalar_64_32_long_scalar,\ @@ -249,13 +239,12 @@ (define_insn_reservation "cortex_a7_fpmacs" 8 (and (eq_attr "tune" "cortexa7") - (and (eq_attr "type" "fmacs,ffmas") - (eq_attr "neon_type" "none"))) + (eq_attr "type" "fmacs,ffmas")) "cortex_a7_ex1+cortex_a7_fpmul_pipe") (define_insn_reservation "cortex_a7_neon_mla" 8 (and (eq_attr "tune" "cortexa7") - (eq_attr "neon_type" + (eq_attr "type" "neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ neon_mla_qqq_8_16,\ neon_mla_ddd_32_qqd_16_ddd_32_scalar_qdd_64_32_long_scalar_qdd_64_32_long,\ @@ -276,20 +265,17 @@ (define_insn_reservation "cortex_a7_fpmuld" 7 (and (eq_attr "tune" "cortexa7") - (and (eq_attr "type" "fmuld") - (eq_attr "neon_type" "none"))) + (eq_attr "type" "fmuld")) "cortex_a7_ex1+cortex_a7_fpmul_pipe, cortex_a7_fpmul_pipe*3") (define_insn_reservation "cortex_a7_fpmacd" 11 (and (eq_attr "tune" "cortexa7") - (and (eq_attr "type" "fmacd") - (eq_attr "neon_type" "none"))) + (eq_attr "type" "fmacd")) "cortex_a7_ex1+cortex_a7_fpmul_pipe, cortex_a7_fpmul_pipe*3") (define_insn_reservation "cortex_a7_fpfmad" 8 (and (eq_attr "tune" "cortexa7") - (and (eq_attr "type" "ffmad") - (eq_attr "neon_type" "none"))) + (eq_attr "type" "ffmad")) "cortex_a7_ex1+cortex_a7_fpmul_pipe, cortex_a7_fpmul_pipe*4") (define_bypass 7 "cortex_a7_fpmacd" @@ -302,14 +288,12 @@ (define_insn_reservation "cortex_a7_fdivs" 16 (and (eq_attr "tune" "cortexa7") - (and (eq_attr "type" "fdivs") - (eq_attr "neon_type" "none"))) + (eq_attr "type" "fdivs, fsqrts")) "cortex_a7_ex1+cortex_a7_fp_div_sqrt, cortex_a7_fp_div_sqrt * 13") (define_insn_reservation "cortex_a7_fdivd" 31 (and (eq_attr "tune" "cortexa7") - (and (eq_attr "type" "fdivd") - (eq_attr "neon_type" "none"))) + (eq_attr "type" "fdivd, fsqrtd")) "cortex_a7_ex1+cortex_a7_fp_div_sqrt, cortex_a7_fp_div_sqrt * 28") ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; @@ -320,14 +304,12 @@ (define_insn_reservation "cortex_a7_r2f" 4 (and (eq_attr "tune" "cortexa7") - (and (eq_attr "type" "r_2_f") - (eq_attr "neon_type" "none"))) + (eq_attr "type" "f_mcr,f_mcrr")) "cortex_a7_both") (define_insn_reservation "cortex_a7_f2r" 2 (and (eq_attr "tune" "cortexa7") - (and (eq_attr "type" "f_2_r") - (eq_attr "neon_type" "none"))) + (eq_attr "type" "f_mrc,f_mrrc")) "cortex_a7_ex1") ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; @@ -339,8 +321,7 @@ (define_insn_reservation "cortex_a7_f_flags" 4 (and (eq_attr "tune" "cortexa7") - (and (eq_attr "type" "f_flag") - (eq_attr "neon_type" "none"))) + (eq_attr "type" "f_flag")) "cortex_a7_ex1") ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; @@ -349,26 +330,22 @@ (define_insn_reservation "cortex_a7_f_loads" 4 (and (eq_attr "tune" "cortexa7") - (and (eq_attr "type" "f_loads") - (eq_attr "neon_type" "none"))) + (eq_attr "type" "f_loads")) "cortex_a7_ex1") (define_insn_reservation "cortex_a7_f_loadd" 4 (and (eq_attr "tune" "cortexa7") - (and (eq_attr "type" "f_loadd") - (eq_attr "neon_type" "none"))) + (eq_attr "type" "f_loadd")) "cortex_a7_both") (define_insn_reservation "cortex_a7_f_stores" 0 (and (eq_attr "tune" "cortexa7") - (and (eq_attr "type" "f_stores") - (eq_attr "neon_type" "none"))) + (eq_attr "type" "f_stores")) "cortex_a7_ex1") (define_insn_reservation "cortex_a7_f_stored" 0 (and (eq_attr "tune" "cortexa7") - (and (eq_attr "type" "f_stored") - (eq_attr "neon_type" "none"))) + (eq_attr "type" "f_stored")) "cortex_a7_both") ;; Load-to-use for floating-point values has a penalty of one cycle, @@ -389,22 +366,21 @@ (define_insn_reservation "cortex_a7_neon" 4 (and (eq_attr "tune" "cortexa7") - (eq_attr "neon_type" - "!none,\ - neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ - neon_mul_qqq_8_16_32_ddd_32,\ - neon_mul_qdd_64_32_long_qqd_16_ddd_32_scalar_64_32_long_scalar,\ - neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ - neon_mla_qqq_8_16,\ - neon_mla_ddd_32_qqd_16_ddd_32_scalar_qdd_64_32_long_scalar_qdd_64_32_long,\ - neon_mla_qqq_32_qqd_32_scalar,\ - neon_mul_ddd_16_scalar_32_16_long_scalar,\ - neon_mul_qqd_32_scalar,\ - neon_mla_ddd_16_scalar_qdd_32_16_long_scalar,\ - neon_fp_vmul_ddd,\ - neon_fp_vmul_qqd,\ - neon_fp_vmla_ddd,\ - neon_fp_vmla_qqq,\ - neon_fp_vmla_ddd_scalar,\ - neon_fp_vmla_qqq_scalar")) + (eq_attr "type" + "neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + neon_mul_qqq_8_16_32_ddd_32,\ + neon_mul_qdd_64_32_long_qqd_16_ddd_32_scalar_64_32_long_scalar,\ + neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + neon_mla_qqq_8_16,\ + neon_mla_ddd_32_qqd_16_ddd_32_scalar_qdd_64_32_long_scalar_qdd_64_32_long,\ + neon_mla_qqq_32_qqd_32_scalar,\ + neon_mul_ddd_16_scalar_32_16_long_scalar,\ + neon_mul_qqd_32_scalar,\ + neon_mla_ddd_16_scalar_qdd_32_16_long_scalar,\ + neon_fp_vmul_ddd,\ + neon_fp_vmul_qqd,\ + neon_fp_vmla_ddd,\ + neon_fp_vmla_qqq,\ + neon_fp_vmla_ddd_scalar,\ + neon_fp_vmla_qqq_scalar")) "cortex_a7_both*2") diff --git a/gcc/config/arm/cortex-a8-neon.md b/gcc/config/arm/cortex-a8-neon.md index 2f0cc7b3a5a..b7773891669 100644 --- a/gcc/config/arm/cortex-a8-neon.md +++ b/gcc/config/arm/cortex-a8-neon.md @@ -159,12 +159,12 @@ (define_insn_reservation "cortex_a8_vfp_divs" 37 (and (eq_attr "tune" "cortexa8") - (eq_attr "type" "fdivs")) + (eq_attr "type" "fdivs, fsqrts")) "cortex_a8_vfp,cortex_a8_vfplite*36") (define_insn_reservation "cortex_a8_vfp_divd" 65 (and (eq_attr "tune" "cortexa8") - (eq_attr "type" "fdivd")) + (eq_attr "type" "fdivd, fsqrtd")) "cortex_a8_vfp,cortex_a8_vfplite*64") ;; Comparisons can actually take 7 cycles sometimes instead of four, @@ -172,24 +172,24 @@ ;; take four cycles, we pick that latency. (define_insn_reservation "cortex_a8_vfp_farith" 4 (and (eq_attr "tune" "cortexa8") - (eq_attr "type" "fcpys,ffariths,ffarithd,fconsts,fconstd,fcmps,fcmpd")) + (eq_attr "type" "fmov,ffariths,ffarithd,fconsts,fconstd,fcmps,fcmpd")) "cortex_a8_vfp,cortex_a8_vfplite*3") (define_insn_reservation "cortex_a8_vfp_cvt" 7 (and (eq_attr "tune" "cortexa8") - (eq_attr "type" "f_cvt")) + (eq_attr "type" "f_cvt,f_cvtf2i,f_cvti2f")) "cortex_a8_vfp,cortex_a8_vfplite*6") ;; NEON -> core transfers. (define_insn_reservation "cortex_a8_neon_mrc" 20 (and (eq_attr "tune" "cortexa8") - (eq_attr "neon_type" "neon_mrc")) + (eq_attr "type" "neon_mrc")) "cortex_a8_neon_ls") (define_insn_reservation "cortex_a8_neon_mrrc" 21 (and (eq_attr "tune" "cortexa8") - (eq_attr "neon_type" "neon_mrrc")) + (eq_attr "type" "neon_mrrc")) "cortex_a8_neon_ls_2") ;; The remainder of this file is auto-generated by neon-schedgen. @@ -198,48 +198,48 @@ ;; produce a result at N3. (define_insn_reservation "cortex_a8_neon_int_1" 3 (and (eq_attr "tune" "cortexa8") - (eq_attr "neon_type" "neon_int_1")) + (eq_attr "type" "neon_int_1")) "cortex_a8_neon_dp") ;; Instructions using this reservation read their (D|Q)m operands at N1, ;; their (D|Q)n operands at N2, and produce a result at N3. (define_insn_reservation "cortex_a8_neon_int_2" 3 (and (eq_attr "tune" "cortexa8") - (eq_attr "neon_type" "neon_int_2")) + (eq_attr "type" "neon_int_2")) "cortex_a8_neon_dp") ;; Instructions using this reservation read their source operands at N1, and ;; produce a result at N3. (define_insn_reservation "cortex_a8_neon_int_3" 3 (and (eq_attr "tune" "cortexa8") - (eq_attr "neon_type" "neon_int_3")) + (eq_attr "type" "neon_int_3")) "cortex_a8_neon_dp") ;; Instructions using this reservation read their source operands at N2, and ;; produce a result at N4. (define_insn_reservation "cortex_a8_neon_int_4" 4 (and (eq_attr "tune" "cortexa8") - (eq_attr "neon_type" "neon_int_4")) + (eq_attr "type" "neon_int_4")) "cortex_a8_neon_dp") ;; Instructions using this reservation read their (D|Q)m operands at N1, ;; their (D|Q)n operands at N2, and produce a result at N4. (define_insn_reservation "cortex_a8_neon_int_5" 4 (and (eq_attr "tune" "cortexa8") - (eq_attr "neon_type" "neon_int_5")) + (eq_attr "type" "neon_int_5")) "cortex_a8_neon_dp") ;; Instructions using this reservation read their source operands at N1, and ;; produce a result at N4. (define_insn_reservation "cortex_a8_neon_vqneg_vqabs" 4 (and (eq_attr "tune" "cortexa8") - (eq_attr "neon_type" "neon_vqneg_vqabs")) + (eq_attr "type" "neon_vqneg_vqabs")) "cortex_a8_neon_dp") ;; Instructions using this reservation produce a result at N3. (define_insn_reservation "cortex_a8_neon_vmov" 3 (and (eq_attr "tune" "cortexa8") - (eq_attr "neon_type" "neon_vmov")) + (eq_attr "type" "neon_vmov")) "cortex_a8_neon_dp") ;; Instructions using this reservation read their (D|Q)n operands at N2, @@ -247,7 +247,7 @@ ;; produce a result at N6. (define_insn_reservation "cortex_a8_neon_vaba" 6 (and (eq_attr "tune" "cortexa8") - (eq_attr "neon_type" "neon_vaba")) + (eq_attr "type" "neon_vaba")) "cortex_a8_neon_dp") ;; Instructions using this reservation read their (D|Q)n operands at N2, @@ -255,35 +255,35 @@ ;; produce a result at N6 on cycle 2. (define_insn_reservation "cortex_a8_neon_vaba_qqq" 7 (and (eq_attr "tune" "cortexa8") - (eq_attr "neon_type" "neon_vaba_qqq")) + (eq_attr "type" "neon_vaba_qqq")) "cortex_a8_neon_dp_2") ;; Instructions using this reservation read their (D|Q)m operands at N1, ;; their (D|Q)d operands at N3, and produce a result at N6. (define_insn_reservation "cortex_a8_neon_vsma" 6 (and (eq_attr "tune" "cortexa8") - (eq_attr "neon_type" "neon_vsma")) + (eq_attr "type" "neon_vsma")) "cortex_a8_neon_dp") ;; Instructions using this reservation read their source operands at N2, and ;; produce a result at N6. (define_insn_reservation "cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long" 6 (and (eq_attr "tune" "cortexa8") - (eq_attr "neon_type" "neon_mul_ddd_8_16_qdd_16_8_long_32_16_long")) + (eq_attr "type" "neon_mul_ddd_8_16_qdd_16_8_long_32_16_long")) "cortex_a8_neon_dp") ;; Instructions using this reservation read their source operands at N2, and ;; produce a result at N6 on cycle 2. (define_insn_reservation "cortex_a8_neon_mul_qqq_8_16_32_ddd_32" 7 (and (eq_attr "tune" "cortexa8") - (eq_attr "neon_type" "neon_mul_qqq_8_16_32_ddd_32")) + (eq_attr "type" "neon_mul_qqq_8_16_32_ddd_32")) "cortex_a8_neon_dp_2") ;; Instructions using this reservation read their (D|Q)n operands at N2, ;; their (D|Q)m operands at N1, and produce a result at N6 on cycle 2. (define_insn_reservation "cortex_a8_neon_mul_qdd_64_32_long_qqd_16_ddd_32_scalar_64_32_long_scalar" 7 (and (eq_attr "tune" "cortexa8") - (eq_attr "neon_type" "neon_mul_qdd_64_32_long_qqd_16_ddd_32_scalar_64_32_long_scalar")) + (eq_attr "type" "neon_mul_qdd_64_32_long_qqd_16_ddd_32_scalar_64_32_long_scalar")) "cortex_a8_neon_dp_2") ;; Instructions using this reservation read their (D|Q)n operands at N2, @@ -291,7 +291,7 @@ ;; produce a result at N6. (define_insn_reservation "cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long" 6 (and (eq_attr "tune" "cortexa8") - (eq_attr "neon_type" "neon_mla_ddd_8_16_qdd_16_8_long_32_16_long")) + (eq_attr "type" "neon_mla_ddd_8_16_qdd_16_8_long_32_16_long")) "cortex_a8_neon_dp") ;; Instructions using this reservation read their (D|Q)n operands at N2, @@ -299,7 +299,7 @@ ;; produce a result at N6 on cycle 2. (define_insn_reservation "cortex_a8_neon_mla_qqq_8_16" 7 (and (eq_attr "tune" "cortexa8") - (eq_attr "neon_type" "neon_mla_qqq_8_16")) + (eq_attr "type" "neon_mla_qqq_8_16")) "cortex_a8_neon_dp_2") ;; Instructions using this reservation read their (D|Q)n operands at N2, @@ -307,7 +307,7 @@ ;; produce a result at N6 on cycle 2. (define_insn_reservation "cortex_a8_neon_mla_ddd_32_qqd_16_ddd_32_scalar_qdd_64_32_long_scalar_qdd_64_32_long" 7 (and (eq_attr "tune" "cortexa8") - (eq_attr "neon_type" "neon_mla_ddd_32_qqd_16_ddd_32_scalar_qdd_64_32_long_scalar_qdd_64_32_long")) + (eq_attr "type" "neon_mla_ddd_32_qqd_16_ddd_32_scalar_qdd_64_32_long_scalar_qdd_64_32_long")) "cortex_a8_neon_dp_2") ;; Instructions using this reservation read their (D|Q)n operands at N2, @@ -315,21 +315,21 @@ ;; produce a result at N6 on cycle 4. (define_insn_reservation "cortex_a8_neon_mla_qqq_32_qqd_32_scalar" 9 (and (eq_attr "tune" "cortexa8") - (eq_attr "neon_type" "neon_mla_qqq_32_qqd_32_scalar")) + (eq_attr "type" "neon_mla_qqq_32_qqd_32_scalar")) "cortex_a8_neon_dp_4") ;; Instructions using this reservation read their (D|Q)n operands at N2, ;; their (D|Q)m operands at N1, and produce a result at N6. (define_insn_reservation "cortex_a8_neon_mul_ddd_16_scalar_32_16_long_scalar" 6 (and (eq_attr "tune" "cortexa8") - (eq_attr "neon_type" "neon_mul_ddd_16_scalar_32_16_long_scalar")) + (eq_attr "type" "neon_mul_ddd_16_scalar_32_16_long_scalar")) "cortex_a8_neon_dp") ;; Instructions using this reservation read their (D|Q)n operands at N2, ;; their (D|Q)m operands at N1, and produce a result at N6 on cycle 4. (define_insn_reservation "cortex_a8_neon_mul_qqd_32_scalar" 9 (and (eq_attr "tune" "cortexa8") - (eq_attr "neon_type" "neon_mul_qqd_32_scalar")) + (eq_attr "type" "neon_mul_qqd_32_scalar")) "cortex_a8_neon_dp_4") ;; Instructions using this reservation read their (D|Q)n operands at N2, @@ -337,84 +337,84 @@ ;; produce a result at N6. (define_insn_reservation "cortex_a8_neon_mla_ddd_16_scalar_qdd_32_16_long_scalar" 6 (and (eq_attr "tune" "cortexa8") - (eq_attr "neon_type" "neon_mla_ddd_16_scalar_qdd_32_16_long_scalar")) + (eq_attr "type" "neon_mla_ddd_16_scalar_qdd_32_16_long_scalar")) "cortex_a8_neon_dp") ;; Instructions using this reservation read their source operands at N1, and ;; produce a result at N3. (define_insn_reservation "cortex_a8_neon_shift_1" 3 (and (eq_attr "tune" "cortexa8") - (eq_attr "neon_type" "neon_shift_1")) + (eq_attr "type" "neon_shift_1")) "cortex_a8_neon_dp") ;; Instructions using this reservation read their source operands at N1, and ;; produce a result at N4. (define_insn_reservation "cortex_a8_neon_shift_2" 4 (and (eq_attr "tune" "cortexa8") - (eq_attr "neon_type" "neon_shift_2")) + (eq_attr "type" "neon_shift_2")) "cortex_a8_neon_dp") ;; Instructions using this reservation read their source operands at N1, and ;; produce a result at N3 on cycle 2. (define_insn_reservation "cortex_a8_neon_shift_3" 4 (and (eq_attr "tune" "cortexa8") - (eq_attr "neon_type" "neon_shift_3")) + (eq_attr "type" "neon_shift_3")) "cortex_a8_neon_dp_2") ;; Instructions using this reservation read their source operands at N1, and ;; produce a result at N1. (define_insn_reservation "cortex_a8_neon_vshl_ddd" 1 (and (eq_attr "tune" "cortexa8") - (eq_attr "neon_type" "neon_vshl_ddd")) + (eq_attr "type" "neon_vshl_ddd")) "cortex_a8_neon_dp") ;; Instructions using this reservation read their source operands at N1, and ;; produce a result at N4 on cycle 2. (define_insn_reservation "cortex_a8_neon_vqshl_vrshl_vqrshl_qqq" 5 (and (eq_attr "tune" "cortexa8") - (eq_attr "neon_type" "neon_vqshl_vrshl_vqrshl_qqq")) + (eq_attr "type" "neon_vqshl_vrshl_vqrshl_qqq")) "cortex_a8_neon_dp_2") ;; Instructions using this reservation read their (D|Q)m operands at N1, ;; their (D|Q)d operands at N3, and produce a result at N6. (define_insn_reservation "cortex_a8_neon_vsra_vrsra" 6 (and (eq_attr "tune" "cortexa8") - (eq_attr "neon_type" "neon_vsra_vrsra")) + (eq_attr "type" "neon_vsra_vrsra")) "cortex_a8_neon_dp") ;; Instructions using this reservation read their source operands at N2, and ;; produce a result at N5. (define_insn_reservation "cortex_a8_neon_fp_vadd_ddd_vabs_dd" 5 (and (eq_attr "tune" "cortexa8") - (eq_attr "neon_type" "neon_fp_vadd_ddd_vabs_dd")) + (eq_attr "type" "neon_fp_vadd_ddd_vabs_dd")) "cortex_a8_neon_fadd") ;; Instructions using this reservation read their source operands at N2, and ;; produce a result at N5 on cycle 2. (define_insn_reservation "cortex_a8_neon_fp_vadd_qqq_vabs_qq" 6 (and (eq_attr "tune" "cortexa8") - (eq_attr "neon_type" "neon_fp_vadd_qqq_vabs_qq")) + (eq_attr "type" "neon_fp_vadd_qqq_vabs_qq")) "cortex_a8_neon_fadd_2") ;; Instructions using this reservation read their source operands at N1, and ;; produce a result at N5. (define_insn_reservation "cortex_a8_neon_fp_vsum" 5 (and (eq_attr "tune" "cortexa8") - (eq_attr "neon_type" "neon_fp_vsum")) + (eq_attr "type" "neon_fp_vsum")) "cortex_a8_neon_fadd") ;; Instructions using this reservation read their (D|Q)n operands at N2, ;; their (D|Q)m operands at N1, and produce a result at N5. (define_insn_reservation "cortex_a8_neon_fp_vmul_ddd" 5 (and (eq_attr "tune" "cortexa8") - (eq_attr "neon_type" "neon_fp_vmul_ddd")) + (eq_attr "type" "neon_fp_vmul_ddd")) "cortex_a8_neon_dp") ;; Instructions using this reservation read their (D|Q)n operands at N2, ;; their (D|Q)m operands at N1, and produce a result at N5 on cycle 2. (define_insn_reservation "cortex_a8_neon_fp_vmul_qqd" 6 (and (eq_attr "tune" "cortexa8") - (eq_attr "neon_type" "neon_fp_vmul_qqd")) + (eq_attr "type" "neon_fp_vmul_qqd")) "cortex_a8_neon_dp_2") ;; Instructions using this reservation read their (D|Q)n operands at N2, @@ -422,7 +422,7 @@ ;; produce a result at N9. (define_insn_reservation "cortex_a8_neon_fp_vmla_ddd" 9 (and (eq_attr "tune" "cortexa8") - (eq_attr "neon_type" "neon_fp_vmla_ddd")) + (eq_attr "type" "neon_fp_vmla_ddd")) "cortex_a8_neon_fmul_then_fadd") ;; Instructions using this reservation read their (D|Q)n operands at N2, @@ -430,7 +430,7 @@ ;; produce a result at N9 on cycle 2. (define_insn_reservation "cortex_a8_neon_fp_vmla_qqq" 10 (and (eq_attr "tune" "cortexa8") - (eq_attr "neon_type" "neon_fp_vmla_qqq")) + (eq_attr "type" "neon_fp_vmla_qqq")) "cortex_a8_neon_fmul_then_fadd_2") ;; Instructions using this reservation read their (D|Q)n operands at N2, @@ -438,7 +438,7 @@ ;; produce a result at N9. (define_insn_reservation "cortex_a8_neon_fp_vmla_ddd_scalar" 9 (and (eq_attr "tune" "cortexa8") - (eq_attr "neon_type" "neon_fp_vmla_ddd_scalar")) + (eq_attr "type" "neon_fp_vmla_ddd_scalar")) "cortex_a8_neon_fmul_then_fadd") ;; Instructions using this reservation read their (D|Q)n operands at N2, @@ -446,152 +446,152 @@ ;; produce a result at N9 on cycle 2. (define_insn_reservation "cortex_a8_neon_fp_vmla_qqq_scalar" 10 (and (eq_attr "tune" "cortexa8") - (eq_attr "neon_type" "neon_fp_vmla_qqq_scalar")) + (eq_attr "type" "neon_fp_vmla_qqq_scalar")) "cortex_a8_neon_fmul_then_fadd_2") ;; Instructions using this reservation read their source operands at N2, and ;; produce a result at N9. (define_insn_reservation "cortex_a8_neon_fp_vrecps_vrsqrts_ddd" 9 (and (eq_attr "tune" "cortexa8") - (eq_attr "neon_type" "neon_fp_vrecps_vrsqrts_ddd")) + (eq_attr "type" "neon_fp_vrecps_vrsqrts_ddd")) "cortex_a8_neon_fmul_then_fadd") ;; Instructions using this reservation read their source operands at N2, and ;; produce a result at N9 on cycle 2. (define_insn_reservation "cortex_a8_neon_fp_vrecps_vrsqrts_qqq" 10 (and (eq_attr "tune" "cortexa8") - (eq_attr "neon_type" "neon_fp_vrecps_vrsqrts_qqq")) + (eq_attr "type" "neon_fp_vrecps_vrsqrts_qqq")) "cortex_a8_neon_fmul_then_fadd_2") ;; Instructions using this reservation read their source operands at N1, and ;; produce a result at N2. (define_insn_reservation "cortex_a8_neon_bp_simple" 2 (and (eq_attr "tune" "cortexa8") - (eq_attr "neon_type" "neon_bp_simple")) + (eq_attr "type" "neon_bp_simple")) "cortex_a8_neon_perm") ;; Instructions using this reservation read their source operands at N1, and ;; produce a result at N2 on cycle 2. (define_insn_reservation "cortex_a8_neon_bp_2cycle" 3 (and (eq_attr "tune" "cortexa8") - (eq_attr "neon_type" "neon_bp_2cycle")) + (eq_attr "type" "neon_bp_2cycle")) "cortex_a8_neon_perm_2") ;; Instructions using this reservation read their source operands at N1, and ;; produce a result at N2 on cycle 3. (define_insn_reservation "cortex_a8_neon_bp_3cycle" 4 (and (eq_attr "tune" "cortexa8") - (eq_attr "neon_type" "neon_bp_3cycle")) + (eq_attr "type" "neon_bp_3cycle")) "cortex_a8_neon_perm_3") ;; Instructions using this reservation produce a result at N1. (define_insn_reservation "cortex_a8_neon_ldr" 1 (and (eq_attr "tune" "cortexa8") - (eq_attr "neon_type" "neon_ldr")) + (eq_attr "type" "neon_ldr")) "cortex_a8_neon_ls") ;; Instructions using this reservation read their source operands at N1. (define_insn_reservation "cortex_a8_neon_str" 0 (and (eq_attr "tune" "cortexa8") - (eq_attr "neon_type" "neon_str")) + (eq_attr "type" "neon_str")) "cortex_a8_neon_ls") ;; Instructions using this reservation produce a result at N1 on cycle 2. (define_insn_reservation "cortex_a8_neon_vld1_1_2_regs" 2 (and (eq_attr "tune" "cortexa8") - (eq_attr "neon_type" "neon_vld1_1_2_regs")) + (eq_attr "type" "neon_vld1_1_2_regs")) "cortex_a8_neon_ls_2") ;; Instructions using this reservation produce a result at N1 on cycle 3. (define_insn_reservation "cortex_a8_neon_vld1_3_4_regs" 3 (and (eq_attr "tune" "cortexa8") - (eq_attr "neon_type" "neon_vld1_3_4_regs")) + (eq_attr "type" "neon_vld1_3_4_regs")) "cortex_a8_neon_ls_3") ;; Instructions using this reservation produce a result at N2 on cycle 2. (define_insn_reservation "cortex_a8_neon_vld2_2_regs_vld1_vld2_all_lanes" 3 (and (eq_attr "tune" "cortexa8") - (eq_attr "neon_type" "neon_vld2_2_regs_vld1_vld2_all_lanes")) + (eq_attr "type" "neon_vld2_2_regs_vld1_vld2_all_lanes")) "cortex_a8_neon_ls_2") ;; Instructions using this reservation produce a result at N2 on cycle 3. (define_insn_reservation "cortex_a8_neon_vld2_4_regs" 4 (and (eq_attr "tune" "cortexa8") - (eq_attr "neon_type" "neon_vld2_4_regs")) + (eq_attr "type" "neon_vld2_4_regs")) "cortex_a8_neon_ls_3") ;; Instructions using this reservation produce a result at N2 on cycle 4. (define_insn_reservation "cortex_a8_neon_vld3_vld4" 5 (and (eq_attr "tune" "cortexa8") - (eq_attr "neon_type" "neon_vld3_vld4")) + (eq_attr "type" "neon_vld3_vld4")) "cortex_a8_neon_ls_4") ;; Instructions using this reservation read their source operands at N1. (define_insn_reservation "cortex_a8_neon_vst1_1_2_regs_vst2_2_regs" 0 (and (eq_attr "tune" "cortexa8") - (eq_attr "neon_type" "neon_vst1_1_2_regs_vst2_2_regs")) + (eq_attr "type" "neon_vst1_1_2_regs_vst2_2_regs")) "cortex_a8_neon_ls_2") ;; Instructions using this reservation read their source operands at N1. (define_insn_reservation "cortex_a8_neon_vst1_3_4_regs" 0 (and (eq_attr "tune" "cortexa8") - (eq_attr "neon_type" "neon_vst1_3_4_regs")) + (eq_attr "type" "neon_vst1_3_4_regs")) "cortex_a8_neon_ls_3") ;; Instructions using this reservation read their source operands at N1. (define_insn_reservation "cortex_a8_neon_vst2_4_regs_vst3_vst4" 0 (and (eq_attr "tune" "cortexa8") - (eq_attr "neon_type" "neon_vst2_4_regs_vst3_vst4")) + (eq_attr "type" "neon_vst2_4_regs_vst3_vst4")) "cortex_a8_neon_ls_4") ;; Instructions using this reservation read their source operands at N1. (define_insn_reservation "cortex_a8_neon_vst3_vst4" 0 (and (eq_attr "tune" "cortexa8") - (eq_attr "neon_type" "neon_vst3_vst4")) + (eq_attr "type" "neon_vst3_vst4")) "cortex_a8_neon_ls_4") ;; Instructions using this reservation read their source operands at N1, and ;; produce a result at N2 on cycle 3. (define_insn_reservation "cortex_a8_neon_vld1_vld2_lane" 4 (and (eq_attr "tune" "cortexa8") - (eq_attr "neon_type" "neon_vld1_vld2_lane")) + (eq_attr "type" "neon_vld1_vld2_lane")) "cortex_a8_neon_ls_3") ;; Instructions using this reservation read their source operands at N1, and ;; produce a result at N2 on cycle 5. (define_insn_reservation "cortex_a8_neon_vld3_vld4_lane" 6 (and (eq_attr "tune" "cortexa8") - (eq_attr "neon_type" "neon_vld3_vld4_lane")) + (eq_attr "type" "neon_vld3_vld4_lane")) "cortex_a8_neon_ls_5") ;; Instructions using this reservation read their source operands at N1. (define_insn_reservation "cortex_a8_neon_vst1_vst2_lane" 0 (and (eq_attr "tune" "cortexa8") - (eq_attr "neon_type" "neon_vst1_vst2_lane")) + (eq_attr "type" "neon_vst1_vst2_lane")) "cortex_a8_neon_ls_2") ;; Instructions using this reservation read their source operands at N1. (define_insn_reservation "cortex_a8_neon_vst3_vst4_lane" 0 (and (eq_attr "tune" "cortexa8") - (eq_attr "neon_type" "neon_vst3_vst4_lane")) + (eq_attr "type" "neon_vst3_vst4_lane")) "cortex_a8_neon_ls_3") ;; Instructions using this reservation produce a result at N2 on cycle 2. (define_insn_reservation "cortex_a8_neon_vld3_vld4_all_lanes" 3 (and (eq_attr "tune" "cortexa8") - (eq_attr "neon_type" "neon_vld3_vld4_all_lanes")) + (eq_attr "type" "neon_vld3_vld4_all_lanes")) "cortex_a8_neon_ls_3") ;; Instructions using this reservation produce a result at N2. (define_insn_reservation "cortex_a8_neon_mcr" 2 (and (eq_attr "tune" "cortexa8") - (eq_attr "neon_type" "neon_mcr")) + (eq_attr "type" "neon_mcr")) "cortex_a8_neon_perm") ;; Instructions using this reservation produce a result at N2. (define_insn_reservation "cortex_a8_neon_mcr_2_mcrr" 2 (and (eq_attr "tune" "cortexa8") - (eq_attr "neon_type" "neon_mcr_2_mcrr")) + (eq_attr "type" "neon_mcr_2_mcrr")) "cortex_a8_neon_perm_2") ;; Exceptions to the default latencies. diff --git a/gcc/config/arm/cortex-a8.md b/gcc/config/arm/cortex-a8.md index 1113a45ff0e..1eade5e1244 100644 --- a/gcc/config/arm/cortex-a8.md +++ b/gcc/config/arm/cortex-a8.md @@ -85,19 +85,25 @@ ;; (source read in E2 and destination available at the end of that cycle). (define_insn_reservation "cortex_a8_alu" 2 (and (eq_attr "tune" "cortexa8") - (ior (and (eq_attr "type" "arlo_imm,arlo_reg,shift,shift_reg") - (eq_attr "neon_type" "none")) - (eq_attr "type" "clz"))) + (eq_attr "type" "alu_imm,alus_imm,logic_imm,logics_imm,\ + alu_reg,alus_reg,logic_reg,logics_reg,\ + adc_imm,adcs_imm,adc_reg,adcs_reg,\ + adr,bfm,clz,rbit,rev,\ + shift_imm,shift_reg,\ + multiple,no_insn")) "cortex_a8_default") (define_insn_reservation "cortex_a8_alu_shift" 2 (and (eq_attr "tune" "cortexa8") - (eq_attr "type" "extend,arlo_shift")) + (eq_attr "type" "alu_shift_imm,alus_shift_imm,\ + logic_shift_imm,logics_shift_imm,\ + extend")) "cortex_a8_default") (define_insn_reservation "cortex_a8_alu_shift_reg" 2 (and (eq_attr "tune" "cortexa8") - (eq_attr "type" "arlo_shift_reg")) + (eq_attr "type" "alu_shift_reg,alus_shift_reg,\ + logic_shift_reg,logics_shift_reg")) "cortex_a8_default") ;; Move instructions. @@ -105,7 +111,8 @@ (define_insn_reservation "cortex_a8_mov" 1 (and (eq_attr "tune" "cortexa8") (eq_attr "type" "mov_imm,mov_reg,mov_shift,mov_shift_reg,\ - mvn_imm,mvn_reg,mvn_shift,mvn_shift_reg")) + mvn_imm,mvn_reg,mvn_shift,mvn_shift_reg,\ + mrs")) "cortex_a8_default") ;; Exceptions to the default latencies for data processing instructions. diff --git a/gcc/config/arm/cortex-a9-neon.md b/gcc/config/arm/cortex-a9-neon.md index 9688edc8f72..2c9d5db5bd8 100644 --- a/gcc/config/arm/cortex-a9-neon.md +++ b/gcc/config/arm/cortex-a9-neon.md @@ -109,12 +109,12 @@ ;; NEON -> core transfers. (define_insn_reservation "ca9_neon_mrc" 1 (and (eq_attr "tune" "cortexa9") - (eq_attr "neon_type" "neon_mrc")) + (eq_attr "type" "neon_mrc")) "ca9_issue_vfp_neon + cortex_a9_neon_mcr") (define_insn_reservation "ca9_neon_mrrc" 1 (and (eq_attr "tune" "cortexa9") - (eq_attr "neon_type" "neon_mrrc")) + (eq_attr "type" "neon_mrrc")) "ca9_issue_vfp_neon + cortex_a9_neon_mcr") ;; The remainder of this file is auto-generated by neon-schedgen. @@ -123,48 +123,48 @@ ;; produce a result at N3. (define_insn_reservation "cortex_a9_neon_int_1" 3 (and (eq_attr "tune" "cortexa9") - (eq_attr "neon_type" "neon_int_1")) + (eq_attr "type" "neon_int_1")) "cortex_a9_neon_dp") ;; Instructions using this reservation read their (D|Q)m operands at N1, ;; their (D|Q)n operands at N2, and produce a result at N3. (define_insn_reservation "cortex_a9_neon_int_2" 3 (and (eq_attr "tune" "cortexa9") - (eq_attr "neon_type" "neon_int_2")) + (eq_attr "type" "neon_int_2")) "cortex_a9_neon_dp") ;; Instructions using this reservation read their source operands at N1, and ;; produce a result at N3. (define_insn_reservation "cortex_a9_neon_int_3" 3 (and (eq_attr "tune" "cortexa9") - (eq_attr "neon_type" "neon_int_3")) + (eq_attr "type" "neon_int_3")) "cortex_a9_neon_dp") ;; Instructions using this reservation read their source operands at N2, and ;; produce a result at N4. (define_insn_reservation "cortex_a9_neon_int_4" 4 (and (eq_attr "tune" "cortexa9") - (eq_attr "neon_type" "neon_int_4")) + (eq_attr "type" "neon_int_4")) "cortex_a9_neon_dp") ;; Instructions using this reservation read their (D|Q)m operands at N1, ;; their (D|Q)n operands at N2, and produce a result at N4. (define_insn_reservation "cortex_a9_neon_int_5" 4 (and (eq_attr "tune" "cortexa9") - (eq_attr "neon_type" "neon_int_5")) + (eq_attr "type" "neon_int_5")) "cortex_a9_neon_dp") ;; Instructions using this reservation read their source operands at N1, and ;; produce a result at N4. (define_insn_reservation "cortex_a9_neon_vqneg_vqabs" 4 (and (eq_attr "tune" "cortexa9") - (eq_attr "neon_type" "neon_vqneg_vqabs")) + (eq_attr "type" "neon_vqneg_vqabs")) "cortex_a9_neon_dp") ;; Instructions using this reservation produce a result at N3. (define_insn_reservation "cortex_a9_neon_vmov" 3 (and (eq_attr "tune" "cortexa9") - (eq_attr "neon_type" "neon_vmov")) + (eq_attr "type" "neon_vmov")) "cortex_a9_neon_dp") ;; Instructions using this reservation read their (D|Q)n operands at N2, @@ -172,7 +172,7 @@ ;; produce a result at N6. (define_insn_reservation "cortex_a9_neon_vaba" 6 (and (eq_attr "tune" "cortexa9") - (eq_attr "neon_type" "neon_vaba")) + (eq_attr "type" "neon_vaba")) "cortex_a9_neon_dp") ;; Instructions using this reservation read their (D|Q)n operands at N2, @@ -180,35 +180,35 @@ ;; produce a result at N6 on cycle 2. (define_insn_reservation "cortex_a9_neon_vaba_qqq" 7 (and (eq_attr "tune" "cortexa9") - (eq_attr "neon_type" "neon_vaba_qqq")) + (eq_attr "type" "neon_vaba_qqq")) "cortex_a9_neon_dp_2") ;; Instructions using this reservation read their (D|Q)m operands at N1, ;; their (D|Q)d operands at N3, and produce a result at N6. (define_insn_reservation "cortex_a9_neon_vsma" 6 (and (eq_attr "tune" "cortexa9") - (eq_attr "neon_type" "neon_vsma")) + (eq_attr "type" "neon_vsma")) "cortex_a9_neon_dp") ;; Instructions using this reservation read their source operands at N2, and ;; produce a result at N6. (define_insn_reservation "cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long" 6 (and (eq_attr "tune" "cortexa9") - (eq_attr "neon_type" "neon_mul_ddd_8_16_qdd_16_8_long_32_16_long")) + (eq_attr "type" "neon_mul_ddd_8_16_qdd_16_8_long_32_16_long")) "cortex_a9_neon_dp") ;; Instructions using this reservation read their source operands at N2, and ;; produce a result at N6 on cycle 2. (define_insn_reservation "cortex_a9_neon_mul_qqq_8_16_32_ddd_32" 7 (and (eq_attr "tune" "cortexa9") - (eq_attr "neon_type" "neon_mul_qqq_8_16_32_ddd_32")) + (eq_attr "type" "neon_mul_qqq_8_16_32_ddd_32")) "cortex_a9_neon_dp_2") ;; Instructions using this reservation read their (D|Q)n operands at N2, ;; their (D|Q)m operands at N1, and produce a result at N6 on cycle 2. (define_insn_reservation "cortex_a9_neon_mul_qdd_64_32_long_qqd_16_ddd_32_scalar_64_32_long_scalar" 7 (and (eq_attr "tune" "cortexa9") - (eq_attr "neon_type" "neon_mul_qdd_64_32_long_qqd_16_ddd_32_scalar_64_32_long_scalar")) + (eq_attr "type" "neon_mul_qdd_64_32_long_qqd_16_ddd_32_scalar_64_32_long_scalar")) "cortex_a9_neon_dp_2") ;; Instructions using this reservation read their (D|Q)n operands at N2, @@ -216,7 +216,7 @@ ;; produce a result at N6. (define_insn_reservation "cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long" 6 (and (eq_attr "tune" "cortexa9") - (eq_attr "neon_type" "neon_mla_ddd_8_16_qdd_16_8_long_32_16_long")) + (eq_attr "type" "neon_mla_ddd_8_16_qdd_16_8_long_32_16_long")) "cortex_a9_neon_dp") ;; Instructions using this reservation read their (D|Q)n operands at N2, @@ -224,7 +224,7 @@ ;; produce a result at N6 on cycle 2. (define_insn_reservation "cortex_a9_neon_mla_qqq_8_16" 7 (and (eq_attr "tune" "cortexa9") - (eq_attr "neon_type" "neon_mla_qqq_8_16")) + (eq_attr "type" "neon_mla_qqq_8_16")) "cortex_a9_neon_dp_2") ;; Instructions using this reservation read their (D|Q)n operands at N2, @@ -232,7 +232,7 @@ ;; produce a result at N6 on cycle 2. (define_insn_reservation "cortex_a9_neon_mla_ddd_32_qqd_16_ddd_32_scalar_qdd_64_32_long_scalar_qdd_64_32_long" 7 (and (eq_attr "tune" "cortexa9") - (eq_attr "neon_type" "neon_mla_ddd_32_qqd_16_ddd_32_scalar_qdd_64_32_long_scalar_qdd_64_32_long")) + (eq_attr "type" "neon_mla_ddd_32_qqd_16_ddd_32_scalar_qdd_64_32_long_scalar_qdd_64_32_long")) "cortex_a9_neon_dp_2") ;; Instructions using this reservation read their (D|Q)n operands at N2, @@ -240,21 +240,21 @@ ;; produce a result at N6 on cycle 4. (define_insn_reservation "cortex_a9_neon_mla_qqq_32_qqd_32_scalar" 9 (and (eq_attr "tune" "cortexa9") - (eq_attr "neon_type" "neon_mla_qqq_32_qqd_32_scalar")) + (eq_attr "type" "neon_mla_qqq_32_qqd_32_scalar")) "cortex_a9_neon_dp_4") ;; Instructions using this reservation read their (D|Q)n operands at N2, ;; their (D|Q)m operands at N1, and produce a result at N6. (define_insn_reservation "cortex_a9_neon_mul_ddd_16_scalar_32_16_long_scalar" 6 (and (eq_attr "tune" "cortexa9") - (eq_attr "neon_type" "neon_mul_ddd_16_scalar_32_16_long_scalar")) + (eq_attr "type" "neon_mul_ddd_16_scalar_32_16_long_scalar")) "cortex_a9_neon_dp") ;; Instructions using this reservation read their (D|Q)n operands at N2, ;; their (D|Q)m operands at N1, and produce a result at N6 on cycle 4. (define_insn_reservation "cortex_a9_neon_mul_qqd_32_scalar" 9 (and (eq_attr "tune" "cortexa9") - (eq_attr "neon_type" "neon_mul_qqd_32_scalar")) + (eq_attr "type" "neon_mul_qqd_32_scalar")) "cortex_a9_neon_dp_4") ;; Instructions using this reservation read their (D|Q)n operands at N2, @@ -262,84 +262,84 @@ ;; produce a result at N6. (define_insn_reservation "cortex_a9_neon_mla_ddd_16_scalar_qdd_32_16_long_scalar" 6 (and (eq_attr "tune" "cortexa9") - (eq_attr "neon_type" "neon_mla_ddd_16_scalar_qdd_32_16_long_scalar")) + (eq_attr "type" "neon_mla_ddd_16_scalar_qdd_32_16_long_scalar")) "cortex_a9_neon_dp") ;; Instructions using this reservation read their source operands at N1, and ;; produce a result at N3. (define_insn_reservation "cortex_a9_neon_shift_1" 3 (and (eq_attr "tune" "cortexa9") - (eq_attr "neon_type" "neon_shift_1")) + (eq_attr "type" "neon_shift_1")) "cortex_a9_neon_dp") ;; Instructions using this reservation read their source operands at N1, and ;; produce a result at N4. (define_insn_reservation "cortex_a9_neon_shift_2" 4 (and (eq_attr "tune" "cortexa9") - (eq_attr "neon_type" "neon_shift_2")) + (eq_attr "type" "neon_shift_2")) "cortex_a9_neon_dp") ;; Instructions using this reservation read their source operands at N1, and ;; produce a result at N3 on cycle 2. (define_insn_reservation "cortex_a9_neon_shift_3" 4 (and (eq_attr "tune" "cortexa9") - (eq_attr "neon_type" "neon_shift_3")) + (eq_attr "type" "neon_shift_3")) "cortex_a9_neon_dp_2") ;; Instructions using this reservation read their source operands at N1, and ;; produce a result at N1. (define_insn_reservation "cortex_a9_neon_vshl_ddd" 1 (and (eq_attr "tune" "cortexa9") - (eq_attr "neon_type" "neon_vshl_ddd")) + (eq_attr "type" "neon_vshl_ddd")) "cortex_a9_neon_dp") ;; Instructions using this reservation read their source operands at N1, and ;; produce a result at N4 on cycle 2. (define_insn_reservation "cortex_a9_neon_vqshl_vrshl_vqrshl_qqq" 5 (and (eq_attr "tune" "cortexa9") - (eq_attr "neon_type" "neon_vqshl_vrshl_vqrshl_qqq")) + (eq_attr "type" "neon_vqshl_vrshl_vqrshl_qqq")) "cortex_a9_neon_dp_2") ;; Instructions using this reservation read their (D|Q)m operands at N1, ;; their (D|Q)d operands at N3, and produce a result at N6. (define_insn_reservation "cortex_a9_neon_vsra_vrsra" 6 (and (eq_attr "tune" "cortexa9") - (eq_attr "neon_type" "neon_vsra_vrsra")) + (eq_attr "type" "neon_vsra_vrsra")) "cortex_a9_neon_dp") ;; Instructions using this reservation read their source operands at N2, and ;; produce a result at N5. (define_insn_reservation "cortex_a9_neon_fp_vadd_ddd_vabs_dd" 5 (and (eq_attr "tune" "cortexa9") - (eq_attr "neon_type" "neon_fp_vadd_ddd_vabs_dd")) + (eq_attr "type" "neon_fp_vadd_ddd_vabs_dd")) "cortex_a9_neon_fadd") ;; Instructions using this reservation read their source operands at N2, and ;; produce a result at N5 on cycle 2. (define_insn_reservation "cortex_a9_neon_fp_vadd_qqq_vabs_qq" 6 (and (eq_attr "tune" "cortexa9") - (eq_attr "neon_type" "neon_fp_vadd_qqq_vabs_qq")) + (eq_attr "type" "neon_fp_vadd_qqq_vabs_qq")) "cortex_a9_neon_fadd_2") ;; Instructions using this reservation read their source operands at N1, and ;; produce a result at N5. (define_insn_reservation "cortex_a9_neon_fp_vsum" 5 (and (eq_attr "tune" "cortexa9") - (eq_attr "neon_type" "neon_fp_vsum")) + (eq_attr "type" "neon_fp_vsum")) "cortex_a9_neon_fadd") ;; Instructions using this reservation read their (D|Q)n operands at N2, ;; their (D|Q)m operands at N1, and produce a result at N5. (define_insn_reservation "cortex_a9_neon_fp_vmul_ddd" 5 (and (eq_attr "tune" "cortexa9") - (eq_attr "neon_type" "neon_fp_vmul_ddd")) + (eq_attr "type" "neon_fp_vmul_ddd")) "cortex_a9_neon_dp") ;; Instructions using this reservation read their (D|Q)n operands at N2, ;; their (D|Q)m operands at N1, and produce a result at N5 on cycle 2. (define_insn_reservation "cortex_a9_neon_fp_vmul_qqd" 6 (and (eq_attr "tune" "cortexa9") - (eq_attr "neon_type" "neon_fp_vmul_qqd")) + (eq_attr "type" "neon_fp_vmul_qqd")) "cortex_a9_neon_dp_2") ;; Instructions using this reservation read their (D|Q)n operands at N2, @@ -347,7 +347,7 @@ ;; produce a result at N9. (define_insn_reservation "cortex_a9_neon_fp_vmla_ddd" 9 (and (eq_attr "tune" "cortexa9") - (eq_attr "neon_type" "neon_fp_vmla_ddd")) + (eq_attr "type" "neon_fp_vmla_ddd")) "cortex_a9_neon_fmul_then_fadd") ;; Instructions using this reservation read their (D|Q)n operands at N2, @@ -355,7 +355,7 @@ ;; produce a result at N9 on cycle 2. (define_insn_reservation "cortex_a9_neon_fp_vmla_qqq" 10 (and (eq_attr "tune" "cortexa9") - (eq_attr "neon_type" "neon_fp_vmla_qqq")) + (eq_attr "type" "neon_fp_vmla_qqq")) "cortex_a9_neon_fmul_then_fadd_2") ;; Instructions using this reservation read their (D|Q)n operands at N2, @@ -363,7 +363,7 @@ ;; produce a result at N9. (define_insn_reservation "cortex_a9_neon_fp_vmla_ddd_scalar" 9 (and (eq_attr "tune" "cortexa9") - (eq_attr "neon_type" "neon_fp_vmla_ddd_scalar")) + (eq_attr "type" "neon_fp_vmla_ddd_scalar")) "cortex_a9_neon_fmul_then_fadd") ;; Instructions using this reservation read their (D|Q)n operands at N2, @@ -371,152 +371,152 @@ ;; produce a result at N9 on cycle 2. (define_insn_reservation "cortex_a9_neon_fp_vmla_qqq_scalar" 10 (and (eq_attr "tune" "cortexa9") - (eq_attr "neon_type" "neon_fp_vmla_qqq_scalar")) + (eq_attr "type" "neon_fp_vmla_qqq_scalar")) "cortex_a9_neon_fmul_then_fadd_2") ;; Instructions using this reservation read their source operands at N2, and ;; produce a result at N9. (define_insn_reservation "cortex_a9_neon_fp_vrecps_vrsqrts_ddd" 9 (and (eq_attr "tune" "cortexa9") - (eq_attr "neon_type" "neon_fp_vrecps_vrsqrts_ddd")) + (eq_attr "type" "neon_fp_vrecps_vrsqrts_ddd")) "cortex_a9_neon_fmul_then_fadd") ;; Instructions using this reservation read their source operands at N2, and ;; produce a result at N9 on cycle 2. (define_insn_reservation "cortex_a9_neon_fp_vrecps_vrsqrts_qqq" 10 (and (eq_attr "tune" "cortexa9") - (eq_attr "neon_type" "neon_fp_vrecps_vrsqrts_qqq")) + (eq_attr "type" "neon_fp_vrecps_vrsqrts_qqq")) "cortex_a9_neon_fmul_then_fadd_2") ;; Instructions using this reservation read their source operands at N1, and ;; produce a result at N2. (define_insn_reservation "cortex_a9_neon_bp_simple" 2 (and (eq_attr "tune" "cortexa9") - (eq_attr "neon_type" "neon_bp_simple")) + (eq_attr "type" "neon_bp_simple")) "cortex_a9_neon_perm") ;; Instructions using this reservation read their source operands at N1, and ;; produce a result at N2 on cycle 2. (define_insn_reservation "cortex_a9_neon_bp_2cycle" 3 (and (eq_attr "tune" "cortexa9") - (eq_attr "neon_type" "neon_bp_2cycle")) + (eq_attr "type" "neon_bp_2cycle")) "cortex_a9_neon_perm_2") ;; Instructions using this reservation read their source operands at N1, and ;; produce a result at N2 on cycle 3. (define_insn_reservation "cortex_a9_neon_bp_3cycle" 4 (and (eq_attr "tune" "cortexa9") - (eq_attr "neon_type" "neon_bp_3cycle")) + (eq_attr "type" "neon_bp_3cycle")) "cortex_a9_neon_perm_3") ;; Instructions using this reservation produce a result at N1. (define_insn_reservation "cortex_a9_neon_ldr" 1 (and (eq_attr "tune" "cortexa9") - (eq_attr "neon_type" "neon_ldr")) + (eq_attr "type" "neon_ldr")) "cortex_a9_neon_ls") ;; Instructions using this reservation read their source operands at N1. (define_insn_reservation "cortex_a9_neon_str" 0 (and (eq_attr "tune" "cortexa9") - (eq_attr "neon_type" "neon_str")) + (eq_attr "type" "neon_str")) "cortex_a9_neon_ls") ;; Instructions using this reservation produce a result at N1 on cycle 2. (define_insn_reservation "cortex_a9_neon_vld1_1_2_regs" 2 (and (eq_attr "tune" "cortexa9") - (eq_attr "neon_type" "neon_vld1_1_2_regs")) + (eq_attr "type" "neon_vld1_1_2_regs")) "cortex_a9_neon_ls_2") ;; Instructions using this reservation produce a result at N1 on cycle 3. (define_insn_reservation "cortex_a9_neon_vld1_3_4_regs" 3 (and (eq_attr "tune" "cortexa9") - (eq_attr "neon_type" "neon_vld1_3_4_regs")) + (eq_attr "type" "neon_vld1_3_4_regs")) "cortex_a9_neon_ls_3") ;; Instructions using this reservation produce a result at N2 on cycle 2. (define_insn_reservation "cortex_a9_neon_vld2_2_regs_vld1_vld2_all_lanes" 3 (and (eq_attr "tune" "cortexa9") - (eq_attr "neon_type" "neon_vld2_2_regs_vld1_vld2_all_lanes")) + (eq_attr "type" "neon_vld2_2_regs_vld1_vld2_all_lanes")) "cortex_a9_neon_ls_2") ;; Instructions using this reservation produce a result at N2 on cycle 3. (define_insn_reservation "cortex_a9_neon_vld2_4_regs" 4 (and (eq_attr "tune" "cortexa9") - (eq_attr "neon_type" "neon_vld2_4_regs")) + (eq_attr "type" "neon_vld2_4_regs")) "cortex_a9_neon_ls_3") ;; Instructions using this reservation produce a result at N2 on cycle 4. (define_insn_reservation "cortex_a9_neon_vld3_vld4" 5 (and (eq_attr "tune" "cortexa9") - (eq_attr "neon_type" "neon_vld3_vld4")) + (eq_attr "type" "neon_vld3_vld4")) "cortex_a9_neon_ls_4") ;; Instructions using this reservation read their source operands at N1. (define_insn_reservation "cortex_a9_neon_vst1_1_2_regs_vst2_2_regs" 0 (and (eq_attr "tune" "cortexa9") - (eq_attr "neon_type" "neon_vst1_1_2_regs_vst2_2_regs")) + (eq_attr "type" "neon_vst1_1_2_regs_vst2_2_regs")) "cortex_a9_neon_ls_2") ;; Instructions using this reservation read their source operands at N1. (define_insn_reservation "cortex_a9_neon_vst1_3_4_regs" 0 (and (eq_attr "tune" "cortexa9") - (eq_attr "neon_type" "neon_vst1_3_4_regs")) + (eq_attr "type" "neon_vst1_3_4_regs")) "cortex_a9_neon_ls_3") ;; Instructions using this reservation read their source operands at N1. (define_insn_reservation "cortex_a9_neon_vst2_4_regs_vst3_vst4" 0 (and (eq_attr "tune" "cortexa9") - (eq_attr "neon_type" "neon_vst2_4_regs_vst3_vst4")) + (eq_attr "type" "neon_vst2_4_regs_vst3_vst4")) "cortex_a9_neon_ls_4") ;; Instructions using this reservation read their source operands at N1. (define_insn_reservation "cortex_a9_neon_vst3_vst4" 0 (and (eq_attr "tune" "cortexa9") - (eq_attr "neon_type" "neon_vst3_vst4")) + (eq_attr "type" "neon_vst3_vst4")) "cortex_a9_neon_ls_4") ;; Instructions using this reservation read their source operands at N1, and ;; produce a result at N2 on cycle 3. (define_insn_reservation "cortex_a9_neon_vld1_vld2_lane" 4 (and (eq_attr "tune" "cortexa9") - (eq_attr "neon_type" "neon_vld1_vld2_lane")) + (eq_attr "type" "neon_vld1_vld2_lane")) "cortex_a9_neon_ls_3") ;; Instructions using this reservation read their source operands at N1, and ;; produce a result at N2 on cycle 5. (define_insn_reservation "cortex_a9_neon_vld3_vld4_lane" 6 (and (eq_attr "tune" "cortexa9") - (eq_attr "neon_type" "neon_vld3_vld4_lane")) + (eq_attr "type" "neon_vld3_vld4_lane")) "cortex_a9_neon_ls_5") ;; Instructions using this reservation read their source operands at N1. (define_insn_reservation "cortex_a9_neon_vst1_vst2_lane" 0 (and (eq_attr "tune" "cortexa9") - (eq_attr "neon_type" "neon_vst1_vst2_lane")) + (eq_attr "type" "neon_vst1_vst2_lane")) "cortex_a9_neon_ls_2") ;; Instructions using this reservation read their source operands at N1. (define_insn_reservation "cortex_a9_neon_vst3_vst4_lane" 0 (and (eq_attr "tune" "cortexa9") - (eq_attr "neon_type" "neon_vst3_vst4_lane")) + (eq_attr "type" "neon_vst3_vst4_lane")) "cortex_a9_neon_ls_3") ;; Instructions using this reservation produce a result at N2 on cycle 2. (define_insn_reservation "cortex_a9_neon_vld3_vld4_all_lanes" 3 (and (eq_attr "tune" "cortexa9") - (eq_attr "neon_type" "neon_vld3_vld4_all_lanes")) + (eq_attr "type" "neon_vld3_vld4_all_lanes")) "cortex_a9_neon_ls_3") ;; Instructions using this reservation produce a result at N2. (define_insn_reservation "cortex_a9_neon_mcr" 2 (and (eq_attr "tune" "cortexa9") - (eq_attr "neon_type" "neon_mcr")) + (eq_attr "type" "neon_mcr")) "cortex_a9_neon_perm") ;; Instructions using this reservation produce a result at N2. (define_insn_reservation "cortex_a9_neon_mcr_2_mcrr" 2 (and (eq_attr "tune" "cortexa9") - (eq_attr "neon_type" "neon_mcr_2_mcrr")) + (eq_attr "type" "neon_mcr_2_mcrr")) "cortex_a9_neon_perm_2") ;; Exceptions to the default latencies. diff --git a/gcc/config/arm/cortex-a9.md b/gcc/config/arm/cortex-a9.md index 11dc0b32c38..7c62d8489ae 100644 --- a/gcc/config/arm/cortex-a9.md +++ b/gcc/config/arm/cortex-a9.md @@ -80,17 +80,24 @@ cortex_a9_p1_e2 + cortex_a9_p0_e1 + cortex_a9_p1_e1") ;; which can go down E2 without any problem. (define_insn_reservation "cortex_a9_dp" 2 (and (eq_attr "tune" "cortexa9") - (and (eq_attr "type" "arlo_imm,arlo_reg,shift,shift_reg,\ - mov_imm,mov_reg,mvn_imm,mvn_reg,\ - mov_shift_reg,mov_shift") - (eq_attr "neon_type" "none"))) + (eq_attr "type" "alu_imm,alus_imm,logic_imm,logics_imm,\ + alu_reg,alus_reg,logic_reg,logics_reg,\ + adc_imm,adcs_imm,adc_reg,adcs_reg,\ + adr,bfm,rev,\ + shift_imm,shift_reg,\ + mov_imm,mov_reg,mvn_imm,mvn_reg,\ + mov_shift_reg,mov_shift,\ + mrs,multiple,no_insn")) "cortex_a9_p0_default|cortex_a9_p1_default") ;; An instruction using the shifter will go down E1. (define_insn_reservation "cortex_a9_dp_shift" 3 (and (eq_attr "tune" "cortexa9") - (eq_attr "type" "arlo_shift_reg,extend,arlo_shift,\ - mvn_shift,mvn_shift_reg")) + (eq_attr "type" "alu_shift_imm,alus_shift_imm,\ + logic_shift_imm,logics_shift_imm,\ + alu_shift_reg,alus_shift_reg,\ + logic_shift_reg,logics_shift_reg,\ + extend,mvn_shift,mvn_shift_reg")) "cortex_a9_p0_shift | cortex_a9_p1_shift") ;; Loads have a latency of 4 cycles. @@ -200,7 +207,7 @@ cortex_a9_store3_4, cortex_a9_store1_2, cortex_a9_load3_4") ;; Pipelining for VFP instructions. ;; Issue happens either along load store unit or the VFP / Neon unit. ;; Pipeline Instruction Classification. -;; FPS - fcpys, ffariths, ffarithd,r_2_f,f_2_r +;; FPS - fmov, ffariths, ffarithd,f_mcr,f_mcrr,f_mrc,f_mrrc ;; FP_ADD - fadds, faddd, fcmps (1) ;; FPMUL - fmul{s,d}, fmac{s,d}, ffma{s,d} ;; FPDIV - fdiv{s,d} @@ -213,7 +220,8 @@ cortex_a9_store3_4, cortex_a9_store1_2, cortex_a9_load3_4") ;; fmrs, fmrrd, fmstat and fmrx - The data is available after 1 cycle. (define_insn_reservation "cortex_a9_fps" 2 (and (eq_attr "tune" "cortexa9") - (eq_attr "type" "fcpys, fconsts, fconstd, ffariths, ffarithd, r_2_f, f_2_r, f_flag")) + (eq_attr "type" "fmov, fconsts, fconstd, ffariths, ffarithd,\ + f_mcr, f_mcrr, f_mrc, f_mrrc, f_flag")) "ca9_issue_vfp_neon + ca9fps") (define_bypass 1 @@ -225,7 +233,7 @@ cortex_a9_store3_4, cortex_a9_store1_2, cortex_a9_load3_4") (define_insn_reservation "cortex_a9_fadd" 4 (and (eq_attr "tune" "cortexa9") - (eq_attr "type" "fadds, faddd, f_cvt")) + (eq_attr "type" "fadds, faddd, f_cvt, f_cvtf2i, f_cvti2f")) "ca9fp_add") (define_insn_reservation "cortex_a9_fcmp" 1 @@ -263,12 +271,12 @@ cortex_a9_store3_4, cortex_a9_store1_2, cortex_a9_load3_4") ;; Division pipeline description. (define_insn_reservation "cortex_a9_fdivs" 15 (and (eq_attr "tune" "cortexa9") - (eq_attr "type" "fdivs")) + (eq_attr "type" "fdivs, fsqrts")) "ca9fp_ds1 + ca9_issue_vfp_neon, nothing*14") (define_insn_reservation "cortex_a9_fdivd" 25 (and (eq_attr "tune" "cortexa9") - (eq_attr "type" "fdivd")) + (eq_attr "type" "fdivd, fsqrtd")) "ca9fp_ds1 + ca9_issue_vfp_neon, nothing*24") ;; Include Neon pipeline description diff --git a/gcc/config/arm/cortex-m4-fpu.md b/gcc/config/arm/cortex-m4-fpu.md index 4ce3f10f0de..2190938b65c 100644 --- a/gcc/config/arm/cortex-m4-fpu.md +++ b/gcc/config/arm/cortex-m4-fpu.md @@ -30,17 +30,17 @@ ;; Integer instructions following VDIV or VSQRT complete out-of-order. (define_insn_reservation "cortex_m4_fdivs" 15 (and (eq_attr "tune" "cortexm4") - (eq_attr "type" "fdivs")) + (eq_attr "type" "fdivs, fsqrts")) "cortex_m4_ex_v,cortex_m4_v*13") (define_insn_reservation "cortex_m4_vmov_1" 1 (and (eq_attr "tune" "cortexm4") - (eq_attr "type" "fcpys,fconsts")) + (eq_attr "type" "fmov,fconsts")) "cortex_m4_ex_v") (define_insn_reservation "cortex_m4_vmov_2" 2 (and (eq_attr "tune" "cortexm4") - (eq_attr "type" "f_2_r,r_2_f")) + (eq_attr "type" "f_mrc,f_mrrc,f_mcr,f_mcrr")) "cortex_m4_ex_v*2") (define_insn_reservation "cortex_m4_fmuls" 2 @@ -77,7 +77,7 @@ (define_insn_reservation "cortex_m4_f_cvt" 2 (and (eq_attr "tune" "cortexm4") - (eq_attr "type" "f_cvt")) + (eq_attr "type" "f_cvt,f_cvtf2i,f_cvti2f")) "cortex_m4_ex_v") (define_insn_reservation "cortex_m4_f_load" 2 diff --git a/gcc/config/arm/cortex-m4.md b/gcc/config/arm/cortex-m4.md index 53bd60cd98f..9ae4cc3143b 100644 --- a/gcc/config/arm/cortex-m4.md +++ b/gcc/config/arm/cortex-m4.md @@ -31,10 +31,18 @@ ;; ALU and multiply is one cycle. (define_insn_reservation "cortex_m4_alu" 1 (and (eq_attr "tune" "cortexm4") - (ior (eq_attr "type" "arlo_imm,arlo_reg,shift,shift_reg,extend,\ - arlo_shift,arlo_shift_reg,\ + (ior (eq_attr "type" "alu_imm,alus_imm,logic_imm,logics_imm,\ + alu_reg,alus_reg,logic_reg,logics_reg,\ + adc_imm,adcs_imm,adc_reg,adcs_reg,\ + adr,bfm,rev,\ + shift_imm,shift_reg,extend,\ + alu_shift_imm,alus_shift_imm,\ + logic_shift_imm,logics_shift_imm,\ + alu_shift_reg,alus_shift_reg,\ + logic_shift_reg,logics_shift_reg,\ mov_imm,mov_reg,mov_shift,mov_shift_reg,\ - mvn_imm,mvn_reg,mvn_shift,mvn_shift_reg") + mvn_imm,mvn_reg,mvn_shift,mvn_shift_reg,\ + mrs,multiple,no_insn") (ior (eq_attr "mul32" "yes") (eq_attr "mul64" "yes")))) "cortex_m4_ex") diff --git a/gcc/config/arm/cortex-r4.md b/gcc/config/arm/cortex-r4.md index 597774dbd89..7a3ceeb15d7 100644 --- a/gcc/config/arm/cortex-r4.md +++ b/gcc/config/arm/cortex-r4.md @@ -78,7 +78,11 @@ ;; for the purposes of the dual-issue constraints above. (define_insn_reservation "cortex_r4_alu" 2 (and (eq_attr "tune_cortexr4" "yes") - (eq_attr "type" "arlo_imm,arlo_reg,shift,shift_reg,mvn_imm,mvn_reg")) + (eq_attr "type" "alu_imm,alus_imm,logic_imm,logics_imm,\ + alu_reg,alus_reg,logic_reg,logics_reg,\ + adc_imm,adcs_imm,adc_reg,adcs_reg,\ + adr,bfm,rev,\ + shift_imm,shift_reg,mvn_imm,mvn_reg")) "cortex_r4_alu") (define_insn_reservation "cortex_r4_mov" 2 @@ -88,12 +92,17 @@ (define_insn_reservation "cortex_r4_alu_shift" 2 (and (eq_attr "tune_cortexr4" "yes") - (eq_attr "type" "extend,arlo_shift,mov_shift,mvn_shift")) + (eq_attr "type" "alu_shift_imm,alus_shift_imm,\ + logic_shift_imm,logics_shift_imm,\ + extend,mov_shift,mvn_shift")) "cortex_r4_alu") (define_insn_reservation "cortex_r4_alu_shift_reg" 2 (and (eq_attr "tune_cortexr4" "yes") - (eq_attr "type" "arlo_shift_reg,mov_shift_reg,mvn_shift_reg")) + (eq_attr "type" "alu_shift_reg,alus_shift_reg,\ + logic_shift_reg,logics_shift_reg,\ + mov_shift_reg,mvn_shift_reg,\ + mrs,multiple,no_insn")) "cortex_r4_alu_shift_reg") ;; An ALU instruction followed by an ALU instruction with no early dep. diff --git a/gcc/config/arm/cortex-r4f.md b/gcc/config/arm/cortex-r4f.md index 0c0bae0cd74..1bc4249d4d1 100644 --- a/gcc/config/arm/cortex-r4f.md +++ b/gcc/config/arm/cortex-r4f.md @@ -48,7 +48,7 @@ (define_insn_reservation "cortex_r4_fcpys" 2 (and (eq_attr "tune_cortexr4" "yes") - (eq_attr "type" "fcpys")) + (eq_attr "type" "fmov")) "cortex_r4_issue_ab") (define_insn_reservation "cortex_r4_ffariths" 2 @@ -68,7 +68,7 @@ (define_insn_reservation "cortex_r4_fdivs" 17 (and (eq_attr "tune_cortexr4" "yes") - (eq_attr "type" "fdivs")) + (eq_attr "type" "fdivs, fsqrts")) "cortex_r4_issue_ab+cortex_r4_v1,cortex_r4_issue_a+cortex_r4_v1") (define_insn_reservation "cortex_r4_floads" 2 @@ -83,12 +83,12 @@ (define_insn_reservation "cortex_r4_mcr" 2 (and (eq_attr "tune_cortexr4" "yes") - (eq_attr "type" "r_2_f")) + (eq_attr "type" "f_mcr,f_mcrr")) "cortex_r4_issue_ab") (define_insn_reservation "cortex_r4_mrc" 3 (and (eq_attr "tune_cortexr4" "yes") - (eq_attr "type" "f_2_r")) + (eq_attr "type" "f_mrc,f_mrrc")) "cortex_r4_issue_ab") ;; Bypasses for normal (not early) regs. @@ -131,7 +131,7 @@ ;; out of order. Chances are this is not a pipelined operation. (define_insn_reservation "cortex_r4_fdivd" 97 (and (eq_attr "tune_cortexr4" "yes") - (eq_attr "type" "fdivd")) + (eq_attr "type" "fdivd, fsqrtd")) "cortex_r4_single_issue*3") (define_insn_reservation "cortex_r4_ffarithd" 2 @@ -146,7 +146,7 @@ (define_insn_reservation "cortex_r4_f_cvt" 8 (and (eq_attr "tune_cortexr4" "yes") - (eq_attr "type" "f_cvt")) + (eq_attr "type" "f_cvt,f_cvtf2i,f_cvti2f")) "cortex_r4_single_issue*3") (define_insn_reservation "cortex_r4_f_memd" 8 diff --git a/gcc/config/arm/fa526.md b/gcc/config/arm/fa526.md index 9ec92d60dc5..401abd3c0a0 100644 --- a/gcc/config/arm/fa526.md +++ b/gcc/config/arm/fa526.md @@ -62,13 +62,22 @@ ;; ALU operations (define_insn_reservation "526_alu_op" 1 (and (eq_attr "tune" "fa526") - (eq_attr "type" "arlo_imm,arlo_reg,shift,shift_reg,\ - mov_imm,mov_reg,mvn_imm,mvn_reg")) + (eq_attr "type" "alu_imm,alus_imm,logic_imm,logics_imm,\ + alu_reg,alus_reg,logic_reg,logics_reg,\ + adc_imm,adcs_imm,adc_reg,adcs_reg,\ + adr,bfm,rev,\ + shift_imm,shift_reg,\ + mov_imm,mov_reg,mvn_imm,mvn_reg,\ + mrs,multiple,no_insn")) "fa526_core") (define_insn_reservation "526_alu_shift_op" 2 (and (eq_attr "tune" "fa526") - (eq_attr "type" "extend,arlo_shift,arlo_shift_reg,\ + (eq_attr "type" "extend,\ + alu_shift_imm,alus_shift_imm,\ + logic_shift_imm,logics_shift_imm,\ + alu_shift_reg,alus_shift_reg,\ + logic_shift_reg,logics_shift_reg,\ mov_shift,mov_shift_reg,\ mvn_shift,mvn_shift_reg")) "fa526_core") diff --git a/gcc/config/arm/fa606te.md b/gcc/config/arm/fa606te.md index e61242886d7..88347bc2d96 100644 --- a/gcc/config/arm/fa606te.md +++ b/gcc/config/arm/fa606te.md @@ -62,10 +62,18 @@ ;; ALU operations (define_insn_reservation "606te_alu_op" 1 (and (eq_attr "tune" "fa606te") - (eq_attr "type" "arlo_imm,arlo_reg,shift,shift_reg, - extend,arlo_shift,arlo_shift_reg,\ + (eq_attr "type" "alu_imm,alus_imm,logic_imm,logics_imm,\ + alu_reg,alus_reg,logic_reg,logics_reg,\ + adc_imm,adcs_imm,adc_reg,adcs_reg,\ + adr,bfm,rev,\ + shift_imm,shift_reg,extend,\ + alu_shift_imm,alus_shift_imm,\ + logic_shift_imm,logics_shift_imm,\ + alu_shift_reg,alus_shift_reg,\ + logic_shift_reg,logics_shift_reg,\ mov_imm,mov_reg,mov_shift,mov_shift_reg,\ - mvn_imm,mvn_reg,mvn_shift,mvn_shift_reg")) + mvn_imm,mvn_reg,mvn_shift,mvn_shift_reg,\ + mrs,multiple,no_insn")) "fa606te_core") ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; diff --git a/gcc/config/arm/fa626te.md b/gcc/config/arm/fa626te.md index 04d2a5cf33f..e6790a21215 100644 --- a/gcc/config/arm/fa626te.md +++ b/gcc/config/arm/fa626te.md @@ -68,13 +68,22 @@ ;; ALU operations (define_insn_reservation "626te_alu_op" 1 (and (eq_attr "tune" "fa626,fa626te") - (eq_attr "type" "arlo_imm,arlo_reg,shift,shift_reg,\ - mov_imm,mov_reg,mvn_imm,mvn_reg")) + (eq_attr "type" "alu_imm,alus_imm,logic_imm,logics_imm,\ + alu_reg,alus_reg,logic_reg,logics_reg,\ + adc_imm,adcs_imm,adc_reg,adcs_reg,\ + adr,bfm,rev,\ + shift_imm,shift_reg,\ + mov_imm,mov_reg,mvn_imm,mvn_reg,\ + mrs,multiple,no_insn")) "fa626te_core") (define_insn_reservation "626te_alu_shift_op" 2 (and (eq_attr "tune" "fa626,fa626te") - (eq_attr "type" "extend,arlo_shift,arlo_shift_reg,\ + (eq_attr "type" "extend,\ + alu_shift_imm,alus_shift_imm,\ + logic_shift_imm,logics_shift_imm,\ + alu_shift_reg,alus_shift_reg,\ + logic_shift_reg,logics_shift_reg,\ mov_shift,mov_shift_reg,\ mvn_shift,mvn_shift_reg")) "fa626te_core") diff --git a/gcc/config/arm/fa726te.md b/gcc/config/arm/fa726te.md index 342b9bf5d33..d0a03981eec 100644 --- a/gcc/config/arm/fa726te.md +++ b/gcc/config/arm/fa726te.md @@ -86,7 +86,12 @@ ;; Other ALU instructions 2 cycles. (define_insn_reservation "726te_alu_op" 1 (and (eq_attr "tune" "fa726te") - (eq_attr "type" "arlo_imm,arlo_reg,shift,shift_reg")) + (eq_attr "type" "alu_imm,alus_imm,logic_imm,logics_imm,\ + alu_reg,alus_reg,logic_reg,logics_reg,\ + adc_imm,adcs_imm,adc_reg,adcs_reg,\ + adr,bfm,rev,\ + shift_imm,shift_reg,\ + mrs,multiple,no_insn")) "fa726te_issue+(fa726te_alu0_pipe|fa726te_alu1_pipe)") ;; ALU operations with a shift-by-register operand. @@ -95,12 +100,14 @@ ;; it takes 3 cycles. (define_insn_reservation "726te_alu_shift_op" 3 (and (eq_attr "tune" "fa726te") - (eq_attr "type" "extend,arlo_shift")) + (eq_attr "type" "extend,alu_shift_imm,alus_shift_imm,\ + logic_shift_imm,logics_shift_imm")) "fa726te_issue+(fa726te_alu0_pipe|fa726te_alu1_pipe)") (define_insn_reservation "726te_alu_shift_reg_op" 3 (and (eq_attr "tune" "fa726te") - (eq_attr "type" "arlo_shift_reg")) + (eq_attr "type" "alu_shift_reg,alus_shift_reg,\ + logic_shift_reg,logics_shift_reg")) "fa726te_issue+(fa726te_alu0_pipe|fa726te_alu1_pipe)") ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; Multiplication Instructions diff --git a/gcc/config/arm/fmp626.md b/gcc/config/arm/fmp626.md index 944645b9ead..ffb68570e37 100644 --- a/gcc/config/arm/fmp626.md +++ b/gcc/config/arm/fmp626.md @@ -63,13 +63,19 @@ ;; ALU operations (define_insn_reservation "mp626_alu_op" 1 (and (eq_attr "tune" "fmp626") - (eq_attr "type" "arlo_imm,arlo_reg,shift,shift_reg,\ + (eq_attr "type" "alu_imm,alus_imm,alu_reg,alus_reg,\ + logic_imm,logics_imm,logic_reg,logics_reg,\ + adc_imm,adcs_imm,adc_reg,adcs_reg,\ + adr,bfm,rev,\ + shift_imm,shift_reg,\ mov_imm,mov_reg,mvn_imm,mvn_reg")) "fmp626_core") (define_insn_reservation "mp626_alu_shift_op" 2 (and (eq_attr "tune" "fmp626") - (eq_attr "type" "extend,arlo_shift,arlo_shift_reg,\ + (eq_attr "type" "alu_shift_imm,logic_shift_imm,alus_shift_imm,logics_shift_imm,\ + alu_shift_reg,logic_shift_reg,alus_shift_reg,logics_shift_reg,\ + extend,\ mov_shift,mov_shift_reg,\ mvn_shift,mvn_shift_reg")) "fmp626_core") diff --git a/gcc/config/arm/iterators.md b/gcc/config/arm/iterators.md index d84929f3d1f..c7d7079b9de 100644 --- a/gcc/config/arm/iterators.md +++ b/gcc/config/arm/iterators.md @@ -391,7 +391,7 @@ (define_mode_attr scalar_mul_constraint [(V4HI "x") (V2SI "t") (V2SF "t") (V8HI "x") (V4SI "t") (V4SF "t")]) -;; Predicates used for setting neon_type +;; Predicates used for setting type for neon instructions (define_mode_attr Is_float_mode [(V8QI "false") (V16QI "false") (V4HI "false") (V8HI "false") diff --git a/gcc/config/arm/iwmmxt.md b/gcc/config/arm/iwmmxt.md index f1f0a5c5150..62cdae21e3f 100644 --- a/gcc/config/arm/iwmmxt.md +++ b/gcc/config/arm/iwmmxt.md @@ -155,7 +155,8 @@ (const_int 8) (const_int 4))] (const_int 4))) - (set_attr "type" "*,*,*,load2,store2,wmmx_wmov,wmmx_tmcrr,wmmx_tmrrc,wmmx_wldr,wmmx_wstr,r_2_f,f_2_r,ffarithd,f_loadd,f_stored") + (set_attr "type" "*,*,*,load2,store2,*,*,*,*,*,f_mcrr,f_mrrc,\ + ffarithd,f_loadd,f_stored") (set_attr "arm_pool_range" "*,*,*,1020,*,*,*,*,*,*,*,*,*,1020,*") (set_attr "arm_neg_pool_range" "*,*,*,1008,*,*,*,*,*,*,*,*,*,1008,*")] ) @@ -187,7 +188,8 @@ default: gcc_unreachable (); }" - [(set_attr "type" "*,*,*,*,load1,store1,wmmx_tmcr,wmmx_tmrc,wmmx_wldr,wmmx_wstr,r_2_f,f_2_r,fcpys,f_loads,f_stores") + [(set_attr "type" "*,*,*,*,load1,store1,*,*,*,*,f_mcr,f_mrc,\ + fmov,f_loads,f_stores") (set_attr "length" "*,*,*,*,*, *,*,*, 16, *,*,*,*,*,*") (set_attr "pool_range" "*,*,*,*,4096, *,*,*,1024, *,*,*,*,1020,*") (set_attr "neg_pool_range" "*,*,*,*,4084, *,*,*, *, 1012,*,*,*,1008,*") diff --git a/gcc/config/arm/linux-eabi.h b/gcc/config/arm/linux-eabi.h index cb0aad19c34..232c38d28ff 100644 --- a/gcc/config/arm/linux-eabi.h +++ b/gcc/config/arm/linux-eabi.h @@ -85,7 +85,7 @@ LINUX_TARGET_LINK_SPEC " " ANDROID_LINK_SPEC) #undef ASAN_CC1_SPEC -#define ASAN_CC1_SPEC "%{fsanitize=*:-funwind-tables}" +#define ASAN_CC1_SPEC "%{%:sanitize(address):-funwind-tables}" #undef CC1_SPEC #define CC1_SPEC \ @@ -99,7 +99,7 @@ #undef LIB_SPEC #define LIB_SPEC \ LINUX_OR_ANDROID_LD (GNU_USER_TARGET_LIB_SPEC, \ - GNU_USER_TARGET_LIB_SPEC " " ANDROID_LIB_SPEC) + GNU_USER_TARGET_NO_PTHREADS_LIB_SPEC " " ANDROID_LIB_SPEC) #undef STARTFILE_SPEC #define STARTFILE_SPEC \ diff --git a/gcc/config/arm/linux-elf.h b/gcc/config/arm/linux-elf.h index 488efa4ba15..475e22079fc 100644 --- a/gcc/config/arm/linux-elf.h +++ b/gcc/config/arm/linux-elf.h @@ -44,9 +44,9 @@ #define SUBTARGET_EXTRA_LINK_SPEC " -m " TARGET_LINKER_EMULATION " -p" +/* We do not have any MULTILIB_OPTIONS specified, so there are no + MULTILIB_DEFAULTS. */ #undef MULTILIB_DEFAULTS -#define MULTILIB_DEFAULTS \ - { "marm", "mlittle-endian", "mfloat-abi=hard", "mno-thumb-interwork" } /* Now we define the strings used to build the spec file. */ #undef LIB_SPEC diff --git a/gcc/config/arm/marvell-pj4.md b/gcc/config/arm/marvell-pj4.md index 0e2c443721e..880789600e0 100644 --- a/gcc/config/arm/marvell-pj4.md +++ b/gcc/config/arm/marvell-pj4.md @@ -53,26 +53,42 @@ (define_insn_reservation "pj4_alu" 1 (and (eq_attr "tune" "marvell_pj4") - (eq_attr "type" "arlo_imm,arlo_reg,shift,shift_reg") + (eq_attr "type" "alu_imm,alus_imm,alu_reg,alus_reg,\ + logic_imm,logics_imm,logic_reg,logics_reg,\ + adc_imm,adcs_imm,adc_reg,adcs_reg,\ + adr,bfm,rev,\ + shift_imm,shift_reg") (not (eq_attr "conds" "set"))) "pj4_is,(pj4_alu1,pj4_w1+pj4_cp)|(pj4_alu2,pj4_w2+pj4_cp)") (define_insn_reservation "pj4_alu_conds" 4 (and (eq_attr "tune" "marvell_pj4") - (eq_attr "type" "arlo_imm,arlo_reg,shift,shift_reg") + (eq_attr "type" "alu_imm,alus_imm,alu_reg,alus_reg,\ + logic_imm,logics_imm,logic_reg,logics_reg,\ + adc_imm,adcs_imm,adc_reg,adcs_reg,\ + adr,bfm,rev,\ + shift_imm,shift_reg") (eq_attr "conds" "set")) "pj4_is,(pj4_alu1,pj4_w1+pj4_cp)|(pj4_alu2,pj4_w2+pj4_cp)") (define_insn_reservation "pj4_shift" 1 (and (eq_attr "tune" "marvell_pj4") - (eq_attr "type" "arlo_shift,arlo_shift_reg,extend,\ + (eq_attr "type" "alu_shift_imm,logic_shift_imm,\ + alus_shift_imm,logics_shift_imm,\ + alu_shift_reg,logic_shift_reg,\ + alus_shift_reg,logics_shift_reg,\ + extend,\ mov_shift,mvn_shift,mov_shift_reg,mvn_shift_reg") (not (eq_attr "conds" "set")) (eq_attr "shift" "1")) "pj4_is,(pj4_alu1,pj4_w1+pj4_cp)|(pj4_alu2,pj4_w2+pj4_cp)") (define_insn_reservation "pj4_shift_conds" 4 (and (eq_attr "tune" "marvell_pj4") - (eq_attr "type" "arlo_shift,arlo_shift_reg,extend,\ + (eq_attr "type" "alu_shift_imm,logic_shift_imm,\ + alus_shift_imm,logics_shift_imm,\ + alu_shift_reg,logic_shift_reg,\ + alus_shift_reg,logics_shift_reg,\ + extend,\ mov_shift,mvn_shift,mov_shift_reg,mvn_shift_reg") (eq_attr "conds" "set") (eq_attr "shift" "1")) "pj4_is,(pj4_alu1,pj4_w1+pj4_cp)|(pj4_alu2,pj4_w2+pj4_cp)") @@ -80,14 +96,20 @@ (define_insn_reservation "pj4_alu_shift" 1 (and (eq_attr "tune" "marvell_pj4") (not (eq_attr "conds" "set")) - (eq_attr "type" "arlo_shift,arlo_shift_reg,extend,\ + (eq_attr "type" "alu_shift_imm,logic_shift_imm,\ + alus_shift_imm,logics_shift_imm,\ + alu_shift_reg,logic_shift_reg,\ + alus_shift_reg,logics_shift_reg,\ + extend,\ mov_shift,mvn_shift,mov_shift_reg,mvn_shift_reg")) "pj4_is,(pj4_alu1,nothing,pj4_w1+pj4_cp)|(pj4_alu2,nothing,pj4_w2+pj4_cp)") (define_insn_reservation "pj4_alu_shift_conds" 4 (and (eq_attr "tune" "marvell_pj4") (eq_attr "conds" "set") - (eq_attr "type" "arlo_shift,arlo_shift_reg,extend,\ + (eq_attr "type" "alu_shift_imm,logic_shift_imm,alus_shift_imm,logics_shift_imm,\ + alu_shift_reg,logic_shift_reg,alus_shift_reg,logics_shift_reg,\ + extend,\ mov_shift,mvn_shift,mov_shift_reg,mvn_shift_reg")) "pj4_is,(pj4_alu1,nothing,pj4_w1+pj4_cp)|(pj4_alu2,nothing,pj4_w2+pj4_cp)") @@ -171,11 +193,11 @@ (define_insn_reservation "pj4_vfp_divs" 20 (and (eq_attr "tune" "marvell_pj4") - (eq_attr "type" "fdivs")) "pj4_is,nothing*2,vissue,vdiv*18,nothing") + (eq_attr "type" "fdivs, fsqrts")) "pj4_is,nothing*2,vissue,vdiv*18,nothing") (define_insn_reservation "pj4_vfp_divd" 34 (and (eq_attr "tune" "marvell_pj4") - (eq_attr "type" "fdivd")) "pj4_is,nothing*2,vissue,vdiv*32,nothing") + (eq_attr "type" "fdivd, fsqrtd")) "pj4_is,nothing*2,vissue,vdiv*32,nothing") (define_insn_reservation "pj4_vfp_mac" 9 (and (eq_attr "tune" "marvell_pj4") @@ -186,8 +208,9 @@ (define_insn_reservation "pj4_vfp_cpy" 4 (and (eq_attr "tune" "marvell_pj4") - (eq_attr "type" "fcpys,ffariths,ffarithd,fconsts,fconstd,\ - fcmps,fcmpd,f_cvt")) "pj4_is,nothing*2,vissue,vfast,nothing*2") + (eq_attr "type" "fmov,ffariths,ffarithd,fconsts,fconstd,\ + fcmps,fcmpd,f_cvt,f_cvtf2i,f_cvti2f")) +"pj4_is,nothing*2,vissue,vfast,nothing*2") ;; Enlarge latency, and wish that more nondependent insns are ;; scheduled immediately after VFP load. @@ -201,9 +224,9 @@ (define_insn_reservation "pj4_vfp_to_core" 7 (and (eq_attr "tune" "marvell_pj4") - (eq_attr "type" "f_2_r,f_flag")) "pj4_isb,nothing,nothing,vissue,vfast,nothing*2") + (eq_attr "type" "f_mrc,f_mrrc,f_flag")) "pj4_isb,nothing,nothing,vissue,vfast,nothing*2") (define_insn_reservation "pj4_core_to_vfp" 2 (and (eq_attr "tune" "marvell_pj4") - (eq_attr "type" "r_2_f")) "pj4_isb,pj4_alu1,pj4_w1,vissue,pj4_cp") + (eq_attr "type" "f_mcr,f_mcrr")) "pj4_isb,pj4_alu1,pj4_w1,vissue,pj4_cp") diff --git a/gcc/config/arm/neon-schedgen.ml b/gcc/config/arm/neon-schedgen.ml index 7dacbab2625..b3699563d48 100644 --- a/gcc/config/arm/neon-schedgen.ml +++ b/gcc/config/arm/neon-schedgen.ml @@ -480,7 +480,7 @@ let emit_insn_reservations core = Printf.printf "(define_insn_reservation \"%s_%s\" %d\n" corestring producer latency; Printf.printf " (and (eq_attr \"tune\" \"%s\")\n" tunestring; - Printf.printf " (eq_attr \"neon_type\" \"%s\"))\n" producer; + Printf.printf " (eq_attr \"type\" \"%s\"))\n" producer; let str = match reservation with Mul -> "dp" | Mul_2cycle -> "dp_2" | Mul_4cycle -> "dp_4" diff --git a/gcc/config/arm/neon.md b/gcc/config/arm/neon.md index e8d2285fa81..ae83dba5f89 100644 --- a/gcc/config/arm/neon.md +++ b/gcc/config/arm/neon.md @@ -20,7 +20,7 @@ ;; Attribute used to permit string comparisons against in -;; neon_type attribute definitions. +;; type attribute definitions. (define_attr "vqh_mnem" "vadd,vmin,vmax" (const_string "vadd")) (define_insn "*neon_mov" @@ -60,8 +60,8 @@ default: return output_move_double (operands, true, NULL); } } - [(set_attr "neon_type" "neon_int_1,*,neon_vmov,*,neon_mrrc,neon_mcr_2_mcrr,*,*,*") - (set_attr "type" "*,f_stored,*,f_loadd,*,*,mov_reg,load2,store2") + [(set_attr "type" "neon_int_1,f_stored,neon_vmov,f_loadd,neon_mrrc,\ + neon_mcr_2_mcrr,mov_reg,load2,store2") (set_attr "length" "4,4,4,4,4,4,8,8,8") (set_attr "arm_pool_range" "*,*,*,1020,*,*,*,1020,*") (set_attr "thumb2_pool_range" "*,*,*,1018,*,*,*,1018,*") @@ -104,9 +104,8 @@ default: return output_move_quad (operands); } } - [(set_attr "neon_type" "neon_int_1,neon_stm_2,neon_vmov,neon_ldm_2,\ - neon_mrrc,neon_mcr_2_mcrr,*,*,*") - (set_attr "type" "*,*,*,*,*,*,mov_reg,load4,store4") + [(set_attr "type" "neon_int_1,neon_stm_2,neon_vmov,neon_ldm_2,\ + neon_mrrc,neon_mcr_2_mcrr,mov_reg,load4,store4") (set_attr "length" "4,8,4,8,8,8,16,8,16") (set_attr "arm_pool_range" "*,*,*,1020,*,*,*,1020,*") (set_attr "thumb2_pool_range" "*,*,*,1018,*,*,*,1018,*") @@ -150,7 +149,7 @@ default: gcc_unreachable (); } } - [(set_attr "neon_type" "neon_int_1,neon_stm_2,neon_ldm_2") + [(set_attr "type" "neon_int_1,neon_stm_2,neon_ldm_2") (set (attr "length") (symbol_ref "arm_attr_length_move_neon (insn)"))]) (define_split @@ -242,7 +241,7 @@ [(set (match_operand:VDQX 0 "neon_perm_struct_or_reg_operand") (unspec:VDQX [(match_operand:VDQX 1 "neon_perm_struct_or_reg_operand")] UNSPEC_MISALIGNED_ACCESS))] - "TARGET_NEON && !BYTES_BIG_ENDIAN" + "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access" { /* This pattern is not permitted to fail during expansion: if both arguments are non-registers (e.g. memory := constant, which can be created by the @@ -256,35 +255,35 @@ [(set (match_operand:VDX 0 "neon_permissive_struct_operand" "=Um") (unspec:VDX [(match_operand:VDX 1 "s_register_operand" " w")] UNSPEC_MISALIGNED_ACCESS))] - "TARGET_NEON && !BYTES_BIG_ENDIAN" + "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access" "vst1.\t{%P1}, %A0" - [(set_attr "neon_type" "neon_vst1_1_2_regs_vst2_2_regs")]) + [(set_attr "type" "neon_vst1_1_2_regs_vst2_2_regs")]) (define_insn "*movmisalign_neon_load" [(set (match_operand:VDX 0 "s_register_operand" "=w") (unspec:VDX [(match_operand:VDX 1 "neon_permissive_struct_operand" " Um")] UNSPEC_MISALIGNED_ACCESS))] - "TARGET_NEON && !BYTES_BIG_ENDIAN" + "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access" "vld1.\t{%P0}, %A1" - [(set_attr "neon_type" "neon_vld1_1_2_regs")]) + [(set_attr "type" "neon_vld1_1_2_regs")]) (define_insn "*movmisalign_neon_store" [(set (match_operand:VQX 0 "neon_permissive_struct_operand" "=Um") (unspec:VQX [(match_operand:VQX 1 "s_register_operand" " w")] UNSPEC_MISALIGNED_ACCESS))] - "TARGET_NEON && !BYTES_BIG_ENDIAN" + "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access" "vst1.\t{%q1}, %A0" - [(set_attr "neon_type" "neon_vst1_1_2_regs_vst2_2_regs")]) + [(set_attr "type" "neon_vst1_1_2_regs_vst2_2_regs")]) (define_insn "*movmisalign_neon_load" [(set (match_operand:VQX 0 "s_register_operand" "=w") (unspec:VQX [(match_operand:VQX 1 "neon_permissive_struct_operand" " Um")] UNSPEC_MISALIGNED_ACCESS))] - "TARGET_NEON && !BYTES_BIG_ENDIAN" + "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access" "vld1.\t{%q0}, %A1" - [(set_attr "neon_type" "neon_vld1_1_2_regs")]) + [(set_attr "type" "neon_vld1_1_2_regs")]) (define_insn "vec_set_internal" [(set (match_operand:VD 0 "s_register_operand" "=w,w") @@ -305,7 +304,7 @@ else return "vmov.\t%P0[%c2], %1"; } - [(set_attr "neon_type" "neon_vld1_vld2_lane,neon_mcr")]) + [(set_attr "type" "neon_vld1_vld2_lane,neon_mcr")]) (define_insn "vec_set_internal" [(set (match_operand:VQ 0 "s_register_operand" "=w,w") @@ -333,7 +332,7 @@ else return "vmov.\t%P0[%c2], %1"; } - [(set_attr "neon_type" "neon_vld1_vld2_lane,neon_mcr")] + [(set_attr "type" "neon_vld1_vld2_lane,neon_mcr")] ) (define_insn "vec_setv2di_internal" @@ -355,7 +354,7 @@ else return "vmov\t%P0, %Q1, %R1"; } - [(set_attr "neon_type" "neon_vld1_1_2_regs,neon_mcr_2_mcrr")] + [(set_attr "type" "neon_vld1_1_2_regs,neon_mcr_2_mcrr")] ) (define_expand "vec_set" @@ -389,7 +388,7 @@ else return "vmov.\t%0, %P1[%c2]"; } - [(set_attr "neon_type" "neon_vst1_vst2_lane,neon_bp_simple")] + [(set_attr "type" "neon_vst1_vst2_lane,neon_bp_simple")] ) (define_insn "vec_extract" @@ -415,7 +414,7 @@ else return "vmov.\t%0, %P1[%c2]"; } - [(set_attr "neon_type" "neon_vst1_vst2_lane,neon_bp_simple")] + [(set_attr "type" "neon_vst1_vst2_lane,neon_bp_simple")] ) (define_insn "vec_extractv2di" @@ -434,7 +433,7 @@ else return "vmov\t%Q0, %R0, %P1 @ v2di"; } - [(set_attr "neon_type" "neon_vst1_vst2_lane,neon_int_1")] + [(set_attr "type" "neon_vst1_vst2_lane,neon_int_1")] ) (define_expand "vec_init" @@ -457,7 +456,7 @@ (match_operand:VDQ 2 "s_register_operand" "w")))] "TARGET_NEON && (! || flag_unsafe_math_optimizations)" "vadd.\t%0, %1, %2" - [(set (attr "neon_type") + [(set (attr "type") (if_then_else (match_test "") (if_then_else (match_test "") (const_string "neon_fp_vadd_ddd_vabs_dd") @@ -484,7 +483,7 @@ default: gcc_unreachable (); } } - [(set_attr "neon_type" "neon_int_1,*,*,neon_int_1,*,*,*") + [(set_attr "type" "neon_int_1,*,*,neon_int_1,*,*,*") (set_attr "conds" "*,clob,clob,*,clob,clob,clob") (set_attr "length" "*,8,8,*,8,8,8") (set_attr "arch" "neon_for_64bits,*,*,avoid_neon_for_64bits,*,*,*")] @@ -496,7 +495,7 @@ (match_operand:VDQ 2 "s_register_operand" "w")))] "TARGET_NEON && (! || flag_unsafe_math_optimizations)" "vsub.\t%0, %1, %2" - [(set (attr "neon_type") + [(set (attr "type") (if_then_else (match_test "") (if_then_else (match_test "") (const_string "neon_fp_vadd_ddd_vabs_dd") @@ -521,7 +520,7 @@ default: gcc_unreachable (); } } - [(set_attr "neon_type" "neon_int_2,*,*,*,neon_int_2") + [(set_attr "type" "neon_int_2,*,*,*,neon_int_2") (set_attr "conds" "*,clob,clob,clob,*") (set_attr "length" "*,8,8,8,*") (set_attr "arch" "neon_for_64bits,*,*,*,avoid_neon_for_64bits")] @@ -533,7 +532,7 @@ (match_operand:VDQ 2 "s_register_operand" "w")))] "TARGET_NEON && (! || flag_unsafe_math_optimizations)" "vmul.\t%0, %1, %2" - [(set (attr "neon_type") + [(set (attr "type") (if_then_else (match_test "") (if_then_else (match_test "") (const_string "neon_fp_vadd_ddd_vabs_dd") @@ -555,7 +554,7 @@ (match_operand:VDQ 1 "s_register_operand" "0")))] "TARGET_NEON && (! || flag_unsafe_math_optimizations)" "vmla.\t%0, %2, %3" - [(set (attr "neon_type") + [(set (attr "type") (if_then_else (match_test "") (if_then_else (match_test "") (const_string "neon_fp_vmla_ddd") @@ -577,7 +576,7 @@ (match_operand:VDQ 3 "s_register_operand" "w"))))] "TARGET_NEON && (! || flag_unsafe_math_optimizations)" "vmls.\t%0, %2, %3" - [(set (attr "neon_type") + [(set (attr "type") (if_then_else (match_test "") (if_then_else (match_test "") (const_string "neon_fp_vmla_ddd") @@ -604,7 +603,7 @@ (match_operand:VCVTF 3 "register_operand" "0")))] "TARGET_NEON && TARGET_FMA && flag_unsafe_math_optimizations" "vfma%?.\\t%0, %1, %2" - [(set (attr "neon_type") + [(set (attr "type") (if_then_else (match_test "") (const_string "neon_fp_vmla_ddd") (const_string "neon_fp_vmla_qqq")))] @@ -617,7 +616,7 @@ (match_operand:VCVTF 3 "register_operand" "0")))] "TARGET_NEON && TARGET_FMA" "vfma%?.\\t%0, %1, %2" - [(set (attr "neon_type") + [(set (attr "type") (if_then_else (match_test "") (const_string "neon_fp_vmla_ddd") (const_string "neon_fp_vmla_qqq")))] @@ -630,7 +629,7 @@ (match_operand:VCVTF 3 "register_operand" "0")))] "TARGET_NEON && TARGET_FMA && flag_unsafe_math_optimizations" "vfms%?.\\t%0, %1, %2" - [(set (attr "neon_type") + [(set (attr "type") (if_then_else (match_test "") (const_string "neon_fp_vmla_ddd") (const_string "neon_fp_vmla_qqq")))] @@ -643,7 +642,7 @@ (match_operand:VCVTF 3 "register_operand" "0")))] "TARGET_NEON && TARGET_FMA" "vfms%?.\\t%0, %1, %2" - [(set (attr "neon_type") + [(set (attr "type") (if_then_else (match_test "") (const_string "neon_fp_vmla_ddd") (const_string "neon_fp_vmla_qqq")))] @@ -656,7 +655,7 @@ NEON_VRINT))] "TARGET_NEON && TARGET_FPU_ARMV8" "vrint%?.f32\\t%0, %1" - [(set (attr "neon_type") + [(set (attr "type") (if_then_else (match_test "") (const_string "neon_fp_vadd_ddd_vabs_dd") (const_string "neon_fp_vadd_qqq_vabs_qq")))] @@ -676,7 +675,7 @@ default: gcc_unreachable (); } } - [(set_attr "neon_type" "neon_int_1")] + [(set_attr "type" "neon_int_1")] ) ;; The concrete forms of the Neon immediate-logic instructions are vbic and @@ -698,7 +697,7 @@ default: gcc_unreachable (); } } - [(set_attr "neon_type" "neon_int_1")] + [(set_attr "type" "neon_int_1")] ) (define_insn "orn3_neon" @@ -707,7 +706,7 @@ (match_operand:VDQ 1 "s_register_operand" "w")))] "TARGET_NEON" "vorn\t%0, %1, %2" - [(set_attr "neon_type" "neon_int_1")] + [(set_attr "type" "neon_int_1")] ) ;; TODO: investigate whether we should disable @@ -745,7 +744,7 @@ DONE; } }" - [(set_attr "neon_type" "neon_int_1,*,*,*") + [(set_attr "type" "neon_int_1,*,*,*") (set_attr "length" "*,16,8,8") (set_attr "arch" "any,a,t2,t2")] ) @@ -756,7 +755,7 @@ (match_operand:VDQ 1 "s_register_operand" "w")))] "TARGET_NEON" "vbic\t%0, %1, %2" - [(set_attr "neon_type" "neon_int_1")] + [(set_attr "type" "neon_int_1")] ) ;; Compare to *anddi_notdi_di. @@ -769,7 +768,7 @@ vbic\t%P0, %P1, %P2 # #" - [(set_attr "neon_type" "neon_int_1,*,*") + [(set_attr "type" "neon_int_1,*,*") (set_attr "length" "*,8,8")] ) @@ -779,7 +778,7 @@ (match_operand:VDQ 2 "s_register_operand" "w")))] "TARGET_NEON" "veor\t%0, %1, %2" - [(set_attr "neon_type" "neon_int_1")] + [(set_attr "type" "neon_int_1")] ) (define_insn "one_cmpl2" @@ -787,7 +786,7 @@ (not:VDQ (match_operand:VDQ 1 "s_register_operand" "w")))] "TARGET_NEON" "vmvn\t%0, %1" - [(set_attr "neon_type" "neon_int_1")] + [(set_attr "type" "neon_int_1")] ) (define_insn "abs2" @@ -795,7 +794,7 @@ (abs:VDQW (match_operand:VDQW 1 "s_register_operand" "w")))] "TARGET_NEON" "vabs.\t%0, %1" - [(set (attr "neon_type") + [(set (attr "type") (if_then_else (match_test "") (if_then_else (match_test "") (const_string "neon_fp_vadd_ddd_vabs_dd") @@ -808,7 +807,7 @@ (neg:VDQW (match_operand:VDQW 1 "s_register_operand" "w")))] "TARGET_NEON" "vneg.\t%0, %1" - [(set (attr "neon_type") + [(set (attr "type") (if_then_else (match_test "") (if_then_else (match_test "") (const_string "neon_fp_vadd_ddd_vabs_dd") @@ -861,7 +860,7 @@ (match_operand:VDQIW 2 "s_register_operand" "w")))] "TARGET_NEON" "vmin.\t%0, %1, %2" - [(set_attr "neon_type" "neon_int_5")] + [(set_attr "type" "neon_int_5")] ) (define_insn "*umax3_neon" @@ -870,7 +869,7 @@ (match_operand:VDQIW 2 "s_register_operand" "w")))] "TARGET_NEON" "vmax.\t%0, %1, %2" - [(set_attr "neon_type" "neon_int_5")] + [(set_attr "type" "neon_int_5")] ) (define_insn "*smin3_neon" @@ -879,7 +878,7 @@ (match_operand:VDQW 2 "s_register_operand" "w")))] "TARGET_NEON" "vmin.\t%0, %1, %2" - [(set (attr "neon_type") + [(set (attr "type") (if_then_else (match_test "") (const_string "neon_fp_vadd_ddd_vabs_dd") (const_string "neon_int_5")))] @@ -891,7 +890,7 @@ (match_operand:VDQW 2 "s_register_operand" "w")))] "TARGET_NEON" "vmax.\t%0, %1, %2" - [(set (attr "neon_type") + [(set (attr "type") (if_then_else (match_test "") (const_string "neon_fp_vadd_ddd_vabs_dd") (const_string "neon_int_5")))] @@ -917,7 +916,7 @@ default: gcc_unreachable (); } } - [(set (attr "neon_type") + [(set (attr "type") (if_then_else (match_test "") (const_string "neon_vshl_ddd") (const_string "neon_shift_3")))] @@ -933,7 +932,7 @@ mode, VALID_NEON_QREG_MODE (mode), false); } - [(set (attr "neon_type") + [(set (attr "type") (if_then_else (match_test "") (const_string "neon_vshl_ddd") (const_string "neon_shift_3")))] @@ -949,7 +948,7 @@ mode, VALID_NEON_QREG_MODE (mode), false); } - [(set (attr "neon_type") + [(set (attr "type") (if_then_else (match_test "") (const_string "neon_vshl_ddd") (const_string "neon_shift_3")))] @@ -967,7 +966,7 @@ UNSPEC_ASHIFT_SIGNED))] "TARGET_NEON" "vshl.\t%0, %1, %2" - [(set (attr "neon_type") + [(set (attr "type") (if_then_else (match_test "") (const_string "neon_vshl_ddd") (const_string "neon_shift_3")))] @@ -983,7 +982,7 @@ UNSPEC_ASHIFT_UNSIGNED))] "TARGET_NEON" "vshl.\t%0, %1, %2" - [(set (attr "neon_type") + [(set (attr "type") (if_then_else (match_test "") (const_string "neon_vshl_ddd") (const_string "neon_shift_3")))] @@ -1038,7 +1037,7 @@ "@ vld1.32\t{%P0[0]}, %A1 vmov.32\t%P0[0], %1" - [(set_attr "neon_type" "neon_vld1_vld2_lane,neon_mcr")] + [(set_attr "type" "neon_vld1_vld2_lane,neon_mcr")] ) (define_insn "ashldi3_neon_noclobber" @@ -1051,7 +1050,7 @@ "@ vshl.u64\t%P0, %P1, %2 vshl.u64\t%P0, %P1, %P2" - [(set_attr "neon_type" "neon_vshl_ddd,neon_vshl_ddd")] + [(set_attr "type" "neon_vshl_ddd,neon_vshl_ddd")] ) (define_insn_and_split "ashldi3_neon" @@ -1113,7 +1112,7 @@ UNSPEC_ASHIFT_SIGNED))] "TARGET_NEON && reload_completed" "vshl.s64\t%P0, %P1, %P2" - [(set_attr "neon_type" "neon_vshl_ddd")] + [(set_attr "type" "neon_vshl_ddd")] ) ; The shift amount needs to be negated for right-shifts @@ -1124,7 +1123,7 @@ UNSPEC_ASHIFT_UNSIGNED))] "TARGET_NEON && reload_completed" "vshl.u64\t%P0, %P1, %P2" - [(set_attr "neon_type" "neon_vshl_ddd")] + [(set_attr "type" "neon_vshl_ddd")] ) (define_insn "ashrdi3_neon_imm_noclobber" @@ -1134,7 +1133,7 @@ "TARGET_NEON && reload_completed && INTVAL (operands[2]) > 0 && INTVAL (operands[2]) <= 64" "vshr.s64\t%P0, %P1, %2" - [(set_attr "neon_type" "neon_vshl_ddd")] + [(set_attr "type" "neon_vshl_ddd")] ) (define_insn "lshrdi3_neon_imm_noclobber" @@ -1144,7 +1143,7 @@ "TARGET_NEON && reload_completed && INTVAL (operands[2]) > 0 && INTVAL (operands[2]) <= 64" "vshr.u64\t%P0, %P1, %2" - [(set_attr "neon_type" "neon_vshl_ddd")] + [(set_attr "type" "neon_vshl_ddd")] ) ;; ashrdi3_neon @@ -1215,7 +1214,7 @@ (match_operand: 2 "s_register_operand" "w")))] "TARGET_NEON" "vaddw.\t%q0, %q2, %P1" - [(set_attr "neon_type" "neon_int_3")] + [(set_attr "type" "neon_int_3")] ) (define_insn "widen_usum3" @@ -1225,7 +1224,7 @@ (match_operand: 2 "s_register_operand" "w")))] "TARGET_NEON" "vaddw.\t%q0, %q2, %P1" - [(set_attr "neon_type" "neon_int_3")] + [(set_attr "type" "neon_int_3")] ) ;; VEXT can be used to synthesize coarse whole-vector shifts with 8-bit @@ -1309,7 +1308,7 @@ "TARGET_NEON" ".32\t%P0, %e1, %f1" [(set_attr "vqh_mnem" "") - (set (attr "neon_type") + (set (attr "type") (if_then_else (eq_attr "vqh_mnem" "vadd") (const_string "neon_int_1") (const_string "neon_int_5")))] ) @@ -1324,7 +1323,7 @@ "TARGET_NEON && flag_unsafe_math_optimizations" ".f32\t%P0, %e1, %f1" [(set_attr "vqh_mnem" "") - (set (attr "neon_type") + (set (attr "type") (if_then_else (eq_attr "vqh_mnem" "vadd") (const_string "neon_int_1") (const_string "neon_int_5")))] ) @@ -1341,7 +1340,7 @@ "TARGET_NEON" ".16\t%P0, %e1, %f1" [(set_attr "vqh_mnem" "") - (set (attr "neon_type") + (set (attr "type") (if_then_else (eq_attr "vqh_mnem" "vadd") (const_string "neon_int_1") (const_string "neon_int_5")))] ) @@ -1362,7 +1361,7 @@ "TARGET_NEON" ".8\t%P0, %e1, %f1" [(set_attr "vqh_mnem" "") - (set (attr "neon_type") + (set (attr "type") (if_then_else (eq_attr "vqh_mnem" "vadd") (const_string "neon_int_1") (const_string "neon_int_5")))] ) @@ -1423,7 +1422,7 @@ UNSPEC_VPADD))] "TARGET_NEON && !BYTES_BIG_ENDIAN" "vadd.i64\t%e0, %e1, %f1" - [(set_attr "neon_type" "neon_int_1")] + [(set_attr "type" "neon_int_1")] ) ;; NEON does not distinguish between signed and unsigned addition except on @@ -1547,7 +1546,7 @@ "TARGET_NEON" "vpadd.\t%P0, %P1, %P2" ;; Assume this schedules like vadd. - [(set (attr "neon_type") + [(set (attr "type") (if_then_else (match_test "") (if_then_else (match_test "") (const_string "neon_fp_vadd_ddd_vabs_dd") @@ -1563,7 +1562,7 @@ "TARGET_NEON" "vpmin.\t%P0, %P1, %P2" ;; Assume this schedules like vmin. - [(set (attr "neon_type") + [(set (attr "type") (if_then_else (match_test "") (const_string "neon_fp_vadd_ddd_vabs_dd") (const_string "neon_int_5")))] @@ -1577,7 +1576,7 @@ "TARGET_NEON" "vpmax.\t%P0, %P1, %P2" ;; Assume this schedules like vmax. - [(set (attr "neon_type") + [(set (attr "type") (if_then_else (match_test "") (const_string "neon_fp_vadd_ddd_vabs_dd") (const_string "neon_int_5")))] @@ -1591,7 +1590,7 @@ "TARGET_NEON" "vpmin.\t%P0, %P1, %P2" ;; Assume this schedules like umin. - [(set_attr "neon_type" "neon_int_5")] + [(set_attr "type" "neon_int_5")] ) (define_insn "neon_vpumax" @@ -1602,7 +1601,7 @@ "TARGET_NEON" "vpmax.\t%P0, %P1, %P2" ;; Assume this schedules like umax. - [(set_attr "neon_type" "neon_int_5")] + [(set_attr "type" "neon_int_5")] ) ;; Saturating arithmetic @@ -1619,7 +1618,7 @@ (match_operand:VD 2 "s_register_operand" "w")))] "TARGET_NEON" "vqadd.\t%P0, %P1, %P2" - [(set_attr "neon_type" "neon_int_4")] + [(set_attr "type" "neon_int_4")] ) (define_insn "*us_add_neon" @@ -1628,7 +1627,7 @@ (match_operand:VD 2 "s_register_operand" "w")))] "TARGET_NEON" "vqadd.\t%P0, %P1, %P2" - [(set_attr "neon_type" "neon_int_4")] + [(set_attr "type" "neon_int_4")] ) (define_insn "*ss_sub_neon" @@ -1637,7 +1636,7 @@ (match_operand:VD 2 "s_register_operand" "w")))] "TARGET_NEON" "vqsub.\t%P0, %P1, %P2" - [(set_attr "neon_type" "neon_int_5")] + [(set_attr "type" "neon_int_5")] ) (define_insn "*us_sub_neon" @@ -1646,7 +1645,7 @@ (match_operand:VD 2 "s_register_operand" "w")))] "TARGET_NEON" "vqsub.\t%P0, %P1, %P2" - [(set_attr "neon_type" "neon_int_5")] + [(set_attr "type" "neon_int_5")] ) ;; Conditional instructions. These are comparisons with conditional moves for @@ -1671,6 +1670,7 @@ ? 3 : 1; rtx magic_rtx = GEN_INT (magic_word); int inverse = 0; + int use_zero_form = 0; int swap_bsl_operands = 0; rtx mask = gen_reg_rtx (mode); rtx tmp = gen_reg_rtx (mode); @@ -1681,12 +1681,16 @@ switch (GET_CODE (operands[3])) { case GE: + case GT: case LE: + case LT: case EQ: - if (!REG_P (operands[5]) - && (operands[5] != CONST0_RTX (mode))) - operands[5] = force_reg (mode, operands[5]); - break; + if (operands[5] == CONST0_RTX (mode)) + { + use_zero_form = 1; + break; + } + /* Fall through. */ default: if (!REG_P (operands[5])) operands[5] = force_reg (mode, operands[5]); @@ -1737,7 +1741,26 @@ a GT b -> a GT b a LE b -> b GE a a LT b -> b GT a - a EQ b -> a EQ b */ + a EQ b -> a EQ b + Note that there also exist direct comparison against 0 forms, + so catch those as a special case. */ + if (use_zero_form) + { + inverse = 0; + switch (GET_CODE (operands[3])) + { + case LT: + base_comparison = gen_neon_vclt; + break; + case LE: + base_comparison = gen_neon_vcle; + break; + default: + /* Do nothing, other zero form cases already have the correct + base_comparison. */ + break; + } + } if (!inverse) emit_insn (base_comparison (mask, operands[4], operands[5], magic_rtx)); @@ -1914,7 +1937,7 @@ UNSPEC_VADD))] "TARGET_NEON" "vadd.\t%0, %1, %2" - [(set (attr "neon_type") + [(set (attr "type") (if_then_else (match_test "") (if_then_else (match_test "") (const_string "neon_fp_vadd_ddd_vabs_dd") @@ -1934,7 +1957,7 @@ UNSPEC_VADDL))] "TARGET_NEON" "vaddl.%T3%#\t%q0, %P1, %P2" - [(set_attr "neon_type" "neon_int_3")] + [(set_attr "type" "neon_int_3")] ) (define_insn "neon_vaddw" @@ -1945,7 +1968,7 @@ UNSPEC_VADDW))] "TARGET_NEON" "vaddw.%T3%#\t%q0, %q1, %P2" - [(set_attr "neon_type" "neon_int_2")] + [(set_attr "type" "neon_int_2")] ) ; vhadd and vrhadd. @@ -1958,7 +1981,7 @@ UNSPEC_VHADD))] "TARGET_NEON" "v%O3hadd.%T3%#\t%0, %1, %2" - [(set_attr "neon_type" "neon_int_4")] + [(set_attr "type" "neon_int_4")] ) (define_insn "neon_vqadd" @@ -1969,7 +1992,7 @@ UNSPEC_VQADD))] "TARGET_NEON" "vqadd.%T3%#\t%0, %1, %2" - [(set_attr "neon_type" "neon_int_4")] + [(set_attr "type" "neon_int_4")] ) (define_insn "neon_vaddhn" @@ -1980,7 +2003,7 @@ UNSPEC_VADDHN))] "TARGET_NEON" "v%O3addhn.\t%P0, %q1, %q2" - [(set_attr "neon_type" "neon_int_4")] + [(set_attr "type" "neon_int_4")] ) ;; We cannot replace this unspec with mul3 because of the odd @@ -1993,7 +2016,7 @@ UNSPEC_VMUL))] "TARGET_NEON" "vmul.%F3%#\t%0, %1, %2" - [(set (attr "neon_type") + [(set (attr "type") (if_then_else (match_test "") (if_then_else (match_test "") (const_string "neon_fp_vadd_ddd_vabs_dd") @@ -2061,7 +2084,7 @@ UNSPEC_VMLA))] "TARGET_NEON" "vmla.\t%0, %2, %3" - [(set (attr "neon_type") + [(set (attr "type") (if_then_else (match_test "") (if_then_else (match_test "") (const_string "neon_fp_vmla_ddd") @@ -2085,7 +2108,7 @@ UNSPEC_VMLAL))] "TARGET_NEON" "vmlal.%T4%#\t%q0, %P2, %P3" - [(set (attr "neon_type") + [(set (attr "type") (if_then_else (match_test "") (const_string "neon_mla_ddd_8_16_qdd_16_8_long_32_16_long") (const_string "neon_mla_ddd_32_qqd_16_ddd_32_scalar_qdd_64_32_long_scalar_qdd_64_32_long")))] @@ -2118,7 +2141,7 @@ UNSPEC_VMLS))] "TARGET_NEON" "vmls.\t%0, %2, %3" - [(set (attr "neon_type") + [(set (attr "type") (if_then_else (match_test "") (if_then_else (match_test "") (const_string "neon_fp_vmla_ddd") @@ -2143,7 +2166,7 @@ UNSPEC_VMLSL))] "TARGET_NEON" "vmlsl.%T4%#\t%q0, %P2, %P3" - [(set (attr "neon_type") + [(set (attr "type") (if_then_else (match_test "") (const_string "neon_mla_ddd_8_16_qdd_16_8_long_32_16_long") (const_string "neon_mla_ddd_32_qqd_16_ddd_32_scalar_qdd_64_32_long_scalar_qdd_64_32_long")))] @@ -2157,7 +2180,7 @@ UNSPEC_VQDMULH))] "TARGET_NEON" "vq%O3dmulh.\t%0, %1, %2" - [(set (attr "neon_type") + [(set (attr "type") (if_then_else (match_test "") (if_then_else (match_test "") (const_string "neon_mul_ddd_8_16_qdd_16_8_long_32_16_long") @@ -2176,7 +2199,7 @@ UNSPEC_VQDMLAL))] "TARGET_NEON" "vqdmlal.\t%q0, %P2, %P3" - [(set (attr "neon_type") + [(set (attr "type") (if_then_else (match_test "") (const_string "neon_mla_ddd_8_16_qdd_16_8_long_32_16_long") (const_string "neon_mla_ddd_32_qqd_16_ddd_32_scalar_qdd_64_32_long_scalar_qdd_64_32_long")))] @@ -2191,7 +2214,7 @@ UNSPEC_VQDMLSL))] "TARGET_NEON" "vqdmlsl.\t%q0, %P2, %P3" - [(set (attr "neon_type") + [(set (attr "type") (if_then_else (match_test "") (const_string "neon_mla_ddd_8_16_qdd_16_8_long_32_16_long") (const_string "neon_mla_ddd_32_qqd_16_ddd_32_scalar_qdd_64_32_long_scalar_qdd_64_32_long")))] @@ -2205,7 +2228,7 @@ UNSPEC_VMULL))] "TARGET_NEON" "vmull.%T3%#\t%q0, %P1, %P2" - [(set (attr "neon_type") + [(set (attr "type") (if_then_else (match_test "") (const_string "neon_mul_ddd_8_16_qdd_16_8_long_32_16_long") (const_string "neon_mul_qdd_64_32_long_qqd_16_ddd_32_scalar_64_32_long_scalar")))] @@ -2219,7 +2242,7 @@ UNSPEC_VQDMULL))] "TARGET_NEON" "vqdmull.\t%q0, %P1, %P2" - [(set (attr "neon_type") + [(set (attr "type") (if_then_else (match_test "") (const_string "neon_mul_ddd_8_16_qdd_16_8_long_32_16_long") (const_string "neon_mul_qdd_64_32_long_qqd_16_ddd_32_scalar_64_32_long_scalar")))] @@ -2249,7 +2272,7 @@ UNSPEC_VSUB))] "TARGET_NEON" "vsub.\t%0, %1, %2" - [(set (attr "neon_type") + [(set (attr "type") (if_then_else (match_test "") (if_then_else (match_test "") (const_string "neon_fp_vadd_ddd_vabs_dd") @@ -2265,7 +2288,7 @@ UNSPEC_VSUBL))] "TARGET_NEON" "vsubl.%T3%#\t%q0, %P1, %P2" - [(set_attr "neon_type" "neon_int_2")] + [(set_attr "type" "neon_int_2")] ) (define_insn "neon_vsubw" @@ -2276,7 +2299,7 @@ UNSPEC_VSUBW))] "TARGET_NEON" "vsubw.%T3%#\t%q0, %q1, %P2" - [(set_attr "neon_type" "neon_int_2")] + [(set_attr "type" "neon_int_2")] ) (define_insn "neon_vqsub" @@ -2287,7 +2310,7 @@ UNSPEC_VQSUB))] "TARGET_NEON" "vqsub.%T3%#\t%0, %1, %2" - [(set_attr "neon_type" "neon_int_5")] + [(set_attr "type" "neon_int_5")] ) (define_insn "neon_vhsub" @@ -2298,7 +2321,7 @@ UNSPEC_VHSUB))] "TARGET_NEON" "vhsub.%T3%#\t%0, %1, %2" - [(set_attr "neon_type" "neon_int_5")] + [(set_attr "type" "neon_int_5")] ) (define_insn "neon_vsubhn" @@ -2309,7 +2332,7 @@ UNSPEC_VSUBHN))] "TARGET_NEON" "v%O3subhn.\t%P0, %q1, %q2" - [(set_attr "neon_type" "neon_int_4")] + [(set_attr "type" "neon_int_4")] ) (define_insn "neon_vceq" @@ -2323,7 +2346,7 @@ "@ vceq.\t%0, %1, %2 vceq.\t%0, %1, #0" - [(set (attr "neon_type") + [(set (attr "type") (if_then_else (match_test "") (if_then_else (match_test "") (const_string "neon_fp_vadd_ddd_vabs_dd") @@ -2342,7 +2365,7 @@ "@ vcge.%T3%#\t%0, %1, %2 vcge.%T3%#\t%0, %1, #0" - [(set (attr "neon_type") + [(set (attr "type") (if_then_else (match_test "") (if_then_else (match_test "") (const_string "neon_fp_vadd_ddd_vabs_dd") @@ -2359,7 +2382,7 @@ UNSPEC_VCGEU))] "TARGET_NEON" "vcge.%T3%#\t%0, %1, %2" - [(set_attr "neon_type" "neon_int_5")] + [(set_attr "type" "neon_int_5")] ) (define_insn "neon_vcgt" @@ -2373,7 +2396,7 @@ "@ vcgt.%T3%#\t%0, %1, %2 vcgt.%T3%#\t%0, %1, #0" - [(set (attr "neon_type") + [(set (attr "type") (if_then_else (match_test "") (if_then_else (match_test "") (const_string "neon_fp_vadd_ddd_vabs_dd") @@ -2390,7 +2413,7 @@ UNSPEC_VCGTU))] "TARGET_NEON" "vcgt.%T3%#\t%0, %1, %2" - [(set_attr "neon_type" "neon_int_5")] + [(set_attr "type" "neon_int_5")] ) ;; VCLE and VCLT only support comparisons with immediate zero (register @@ -2405,7 +2428,7 @@ UNSPEC_VCLE))] "TARGET_NEON" "vcle.%T3%#\t%0, %1, #0" - [(set (attr "neon_type") + [(set (attr "type") (if_then_else (match_test "") (if_then_else (match_test "") (const_string "neon_fp_vadd_ddd_vabs_dd") @@ -2422,7 +2445,7 @@ UNSPEC_VCLT))] "TARGET_NEON" "vclt.%T3%#\t%0, %1, #0" - [(set (attr "neon_type") + [(set (attr "type") (if_then_else (match_test "") (if_then_else (match_test "") (const_string "neon_fp_vadd_ddd_vabs_dd") @@ -2438,7 +2461,7 @@ UNSPEC_VCAGE))] "TARGET_NEON" "vacge.\t%0, %1, %2" - [(set (attr "neon_type") + [(set (attr "type") (if_then_else (match_test "") (const_string "neon_fp_vadd_ddd_vabs_dd") (const_string "neon_fp_vadd_qqq_vabs_qq")))] @@ -2452,7 +2475,7 @@ UNSPEC_VCAGT))] "TARGET_NEON" "vacgt.\t%0, %1, %2" - [(set (attr "neon_type") + [(set (attr "type") (if_then_else (match_test "") (const_string "neon_fp_vadd_ddd_vabs_dd") (const_string "neon_fp_vadd_qqq_vabs_qq")))] @@ -2466,7 +2489,7 @@ UNSPEC_VTST))] "TARGET_NEON" "vtst.\t%0, %1, %2" - [(set_attr "neon_type" "neon_int_4")] + [(set_attr "type" "neon_int_4")] ) (define_insn "neon_vabd" @@ -2477,7 +2500,7 @@ UNSPEC_VABD))] "TARGET_NEON" "vabd.%T3%#\t%0, %1, %2" - [(set (attr "neon_type") + [(set (attr "type") (if_then_else (match_test "") (if_then_else (match_test "") (const_string "neon_fp_vadd_ddd_vabs_dd") @@ -2493,7 +2516,7 @@ UNSPEC_VABDL))] "TARGET_NEON" "vabdl.%T3%#\t%q0, %P1, %P2" - [(set_attr "neon_type" "neon_int_5")] + [(set_attr "type" "neon_int_5")] ) (define_insn "neon_vaba" @@ -2505,7 +2528,7 @@ (match_operand:VDQIW 1 "s_register_operand" "0")))] "TARGET_NEON" "vaba.%T4%#\t%0, %2, %3" - [(set (attr "neon_type") + [(set (attr "type") (if_then_else (match_test "") (const_string "neon_vaba") (const_string "neon_vaba_qqq")))] ) @@ -2519,7 +2542,7 @@ (match_operand: 1 "s_register_operand" "0")))] "TARGET_NEON" "vabal.%T4%#\t%q0, %P2, %P3" - [(set_attr "neon_type" "neon_vaba")] + [(set_attr "type" "neon_vaba")] ) (define_insn "neon_vmax" @@ -2530,7 +2553,7 @@ UNSPEC_VMAX))] "TARGET_NEON" "vmax.%T3%#\t%0, %1, %2" - [(set (attr "neon_type") + [(set (attr "type") (if_then_else (match_test "") (if_then_else (match_test "") (const_string "neon_fp_vadd_ddd_vabs_dd") @@ -2546,7 +2569,7 @@ UNSPEC_VMIN))] "TARGET_NEON" "vmin.%T3%#\t%0, %1, %2" - [(set (attr "neon_type") + [(set (attr "type") (if_then_else (match_test "") (if_then_else (match_test "") (const_string "neon_fp_vadd_ddd_vabs_dd") @@ -2574,7 +2597,7 @@ "TARGET_NEON" "vpaddl.%T2%#\t%0, %1" ;; Assume this schedules like vaddl. - [(set_attr "neon_type" "neon_int_3")] + [(set_attr "type" "neon_int_3")] ) (define_insn "neon_vpadal" @@ -2586,7 +2609,7 @@ "TARGET_NEON" "vpadal.%T3%#\t%0, %2" ;; Assume this schedules like vpadd. - [(set_attr "neon_type" "neon_int_1")] + [(set_attr "type" "neon_int_1")] ) (define_insn "neon_vpmax" @@ -2598,7 +2621,7 @@ "TARGET_NEON" "vpmax.%T3%#\t%0, %1, %2" ;; Assume this schedules like vmax. - [(set (attr "neon_type") + [(set (attr "type") (if_then_else (match_test "") (const_string "neon_fp_vadd_ddd_vabs_dd") (const_string "neon_int_5")))] @@ -2613,7 +2636,7 @@ "TARGET_NEON" "vpmin.%T3%#\t%0, %1, %2" ;; Assume this schedules like vmin. - [(set (attr "neon_type") + [(set (attr "type") (if_then_else (match_test "") (const_string "neon_fp_vadd_ddd_vabs_dd") (const_string "neon_int_5")))] @@ -2627,7 +2650,7 @@ UNSPEC_VRECPS))] "TARGET_NEON" "vrecps.\t%0, %1, %2" - [(set (attr "neon_type") + [(set (attr "type") (if_then_else (match_test "") (const_string "neon_fp_vrecps_vrsqrts_ddd") (const_string "neon_fp_vrecps_vrsqrts_qqq")))] @@ -2641,7 +2664,7 @@ UNSPEC_VRSQRTS))] "TARGET_NEON" "vrsqrts.\t%0, %1, %2" - [(set (attr "neon_type") + [(set (attr "type") (if_then_else (match_test "") (const_string "neon_fp_vrecps_vrsqrts_ddd") (const_string "neon_fp_vrecps_vrsqrts_qqq")))] @@ -2664,7 +2687,7 @@ UNSPEC_VQABS))] "TARGET_NEON" "vqabs.\t%0, %1" - [(set_attr "neon_type" "neon_vqneg_vqabs")] + [(set_attr "type" "neon_vqneg_vqabs")] ) (define_expand "neon_vneg" @@ -2684,7 +2707,7 @@ UNSPEC_VQNEG))] "TARGET_NEON" "vqneg.\t%0, %1" - [(set_attr "neon_type" "neon_vqneg_vqabs")] + [(set_attr "type" "neon_vqneg_vqabs")] ) (define_insn "neon_vcls" @@ -2694,7 +2717,7 @@ UNSPEC_VCLS))] "TARGET_NEON" "vcls.\t%0, %1" - [(set_attr "neon_type" "neon_int_1")] + [(set_attr "type" "neon_int_1")] ) (define_insn "clz2" @@ -2702,7 +2725,7 @@ (clz:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")))] "TARGET_NEON" "vclz.\t%0, %1" - [(set_attr "neon_type" "neon_int_1")] + [(set_attr "type" "neon_int_1")] ) (define_expand "neon_vclz" @@ -2720,7 +2743,7 @@ (popcount:VE (match_operand:VE 1 "s_register_operand" "w")))] "TARGET_NEON" "vcnt.\t%0, %1" - [(set_attr "neon_type" "neon_int_1")] + [(set_attr "type" "neon_int_1")] ) (define_expand "neon_vcnt" @@ -2740,7 +2763,7 @@ UNSPEC_VRECPE))] "TARGET_NEON" "vrecpe.\t%0, %1" - [(set (attr "neon_type") + [(set (attr "type") (if_then_else (match_test "") (const_string "neon_fp_vadd_ddd_vabs_dd") (const_string "neon_fp_vadd_qqq_vabs_qq")))] @@ -2753,7 +2776,7 @@ UNSPEC_VRSQRTE))] "TARGET_NEON" "vrsqrte.\t%0, %1" - [(set (attr "neon_type") + [(set (attr "type") (if_then_else (match_test "") (const_string "neon_fp_vadd_ddd_vabs_dd") (const_string "neon_fp_vadd_qqq_vabs_qq")))] @@ -2785,7 +2808,7 @@ } return "vmov.s\t%0, %P1[%c2]"; } - [(set_attr "neon_type" "neon_bp_simple")] + [(set_attr "type" "neon_bp_simple")] ) (define_insn "neon_vget_lane_zext_internal" @@ -2804,7 +2827,7 @@ } return "vmov.u\t%0, %P1[%c2]"; } - [(set_attr "neon_type" "neon_bp_simple")] + [(set_attr "type" "neon_bp_simple")] ) (define_insn "neon_vget_lane_sext_internal" @@ -2831,7 +2854,7 @@ return ""; } - [(set_attr "neon_type" "neon_bp_simple")] + [(set_attr "type" "neon_bp_simple")] ) (define_insn "neon_vget_lane_zext_internal" @@ -2858,7 +2881,7 @@ return ""; } - [(set_attr "neon_type" "neon_bp_simple")] + [(set_attr "type" "neon_bp_simple")] ) (define_expand "neon_vget_lane" @@ -2991,7 +3014,7 @@ "TARGET_NEON" "vdup.\t%0, %1" ;; Assume this schedules like vmov. - [(set_attr "neon_type" "neon_bp_simple")] + [(set_attr "type" "neon_bp_simple")] ) (define_insn "neon_vdup_n" @@ -3002,7 +3025,7 @@ vdup.\t%0, %1 vdup.\t%0, %y1" ;; Assume this schedules like vmov. - [(set_attr "neon_type" "neon_bp_simple")] + [(set_attr "type" "neon_bp_simple")] ) (define_expand "neon_vdup_ndi" @@ -3023,7 +3046,7 @@ vmov\t%e0, %Q1, %R1\;vmov\t%f0, %Q1, %R1 vmov\t%e0, %P1\;vmov\t%f0, %P1" [(set_attr "length" "8") - (set_attr "neon_type" "neon_bp_simple")] + (set_attr "type" "neon_bp_simple")] ) (define_insn "neon_vdup_lane_internal" @@ -3046,7 +3069,7 @@ return "vdup.\t%q0, %P1[%c2]"; } ;; Assume this schedules like vmov. - [(set_attr "neon_type" "neon_bp_simple")] + [(set_attr "type" "neon_bp_simple")] ) (define_expand "neon_vdup_lane" @@ -3101,7 +3124,7 @@ (set (match_dup 1) (match_dup 0))] "TARGET_NEON && reload_completed" "vswp\t%0, %1" - [(set (attr "neon_type") + [(set (attr "type") (if_then_else (match_test "") (const_string "neon_bp_simple") (const_string "neon_bp_2cycle")))] @@ -3155,7 +3178,7 @@ (float: (match_operand:VCVTI 1 "s_register_operand" "w")))] "TARGET_NEON && !flag_rounding_math" "vcvt.f32.s32\t%0, %1" - [(set (attr "neon_type") + [(set (attr "type") (if_then_else (match_test "") (const_string "neon_fp_vadd_ddd_vabs_dd") (const_string "neon_fp_vadd_qqq_vabs_qq")))] @@ -3166,7 +3189,7 @@ (unsigned_float: (match_operand:VCVTI 1 "s_register_operand" "w")))] "TARGET_NEON && !flag_rounding_math" "vcvt.f32.u32\t%0, %1" - [(set (attr "neon_type") + [(set (attr "type") (if_then_else (match_test "") (const_string "neon_fp_vadd_ddd_vabs_dd") (const_string "neon_fp_vadd_qqq_vabs_qq")))] @@ -3177,7 +3200,7 @@ (fix: (match_operand:VCVTF 1 "s_register_operand" "w")))] "TARGET_NEON" "vcvt.s32.f32\t%0, %1" - [(set (attr "neon_type") + [(set (attr "type") (if_then_else (match_test "") (const_string "neon_fp_vadd_ddd_vabs_dd") (const_string "neon_fp_vadd_qqq_vabs_qq")))] @@ -3188,7 +3211,7 @@ (unsigned_fix: (match_operand:VCVTF 1 "s_register_operand" "w")))] "TARGET_NEON" "vcvt.u32.f32\t%0, %1" - [(set (attr "neon_type") + [(set (attr "type") (if_then_else (match_test "") (const_string "neon_fp_vadd_ddd_vabs_dd") (const_string "neon_fp_vadd_qqq_vabs_qq")))] @@ -3201,7 +3224,7 @@ UNSPEC_VCVT))] "TARGET_NEON" "vcvt.%T2%#32.f32\t%0, %1" - [(set (attr "neon_type") + [(set (attr "type") (if_then_else (match_test "") (const_string "neon_fp_vadd_ddd_vabs_dd") (const_string "neon_fp_vadd_qqq_vabs_qq")))] @@ -3214,7 +3237,7 @@ UNSPEC_VCVT))] "TARGET_NEON" "vcvt.f32.%T2%#32\t%0, %1" - [(set (attr "neon_type") + [(set (attr "type") (if_then_else (match_test "") (const_string "neon_fp_vadd_ddd_vabs_dd") (const_string "neon_fp_vadd_qqq_vabs_qq")))] @@ -3226,7 +3249,7 @@ UNSPEC_VCVT))] "TARGET_NEON && TARGET_FP16" "vcvt.f32.f16\t%q0, %P1" - [(set_attr "neon_type" "neon_fp_vadd_ddd_vabs_dd")] + [(set_attr "type" "neon_fp_vadd_ddd_vabs_dd")] ) (define_insn "neon_vcvtv4hfv4sf" @@ -3235,7 +3258,7 @@ UNSPEC_VCVT))] "TARGET_NEON && TARGET_FP16" "vcvt.f16.f32\t%P0, %q1" - [(set_attr "neon_type" "neon_fp_vadd_ddd_vabs_dd")] + [(set_attr "type" "neon_fp_vadd_ddd_vabs_dd")] ) (define_insn "neon_vcvt_n" @@ -3249,7 +3272,7 @@ neon_const_bounds (operands[2], 1, 33); return "vcvt.%T3%#32.f32\t%0, %1, %2"; } - [(set (attr "neon_type") + [(set (attr "type") (if_then_else (match_test "") (const_string "neon_fp_vadd_ddd_vabs_dd") (const_string "neon_fp_vadd_qqq_vabs_qq")))] @@ -3266,7 +3289,7 @@ neon_const_bounds (operands[2], 1, 33); return "vcvt.f32.%T3%#32\t%0, %1, %2"; } - [(set (attr "neon_type") + [(set (attr "type") (if_then_else (match_test "") (const_string "neon_fp_vadd_ddd_vabs_dd") (const_string "neon_fp_vadd_qqq_vabs_qq")))] @@ -3279,7 +3302,7 @@ UNSPEC_VMOVN))] "TARGET_NEON" "vmovn.\t%P0, %q1" - [(set_attr "neon_type" "neon_bp_simple")] + [(set_attr "type" "neon_bp_simple")] ) (define_insn "neon_vqmovn" @@ -3289,7 +3312,7 @@ UNSPEC_VQMOVN))] "TARGET_NEON" "vqmovn.%T2%#\t%P0, %q1" - [(set_attr "neon_type" "neon_shift_2")] + [(set_attr "type" "neon_shift_2")] ) (define_insn "neon_vqmovun" @@ -3299,7 +3322,7 @@ UNSPEC_VQMOVUN))] "TARGET_NEON" "vqmovun.\t%P0, %q1" - [(set_attr "neon_type" "neon_shift_2")] + [(set_attr "type" "neon_shift_2")] ) (define_insn "neon_vmovl" @@ -3309,7 +3332,7 @@ UNSPEC_VMOVL))] "TARGET_NEON" "vmovl.%T2%#\t%q0, %P1" - [(set_attr "neon_type" "neon_shift_1")] + [(set_attr "type" "neon_shift_1")] ) (define_insn "neon_vmul_lane" @@ -3325,7 +3348,7 @@ neon_lane_bounds (operands[3], 0, GET_MODE_NUNITS (mode)); return "vmul.\t%P0, %P1, %P2[%c3]"; } - [(set (attr "neon_type") + [(set (attr "type") (if_then_else (match_test "") (const_string "neon_fp_vmul_ddd") (if_then_else (match_test "") @@ -3346,7 +3369,7 @@ neon_lane_bounds (operands[3], 0, GET_MODE_NUNITS (mode)); return "vmul.\t%q0, %q1, %P2[%c3]"; } - [(set (attr "neon_type") + [(set (attr "type") (if_then_else (match_test "") (const_string "neon_fp_vmul_qqd") (if_then_else (match_test "") @@ -3367,7 +3390,7 @@ neon_lane_bounds (operands[3], 0, GET_MODE_NUNITS (mode)); return "vmull.%T4%#\t%q0, %P1, %P2[%c3]"; } - [(set (attr "neon_type") + [(set (attr "type") (if_then_else (match_test "") (const_string "neon_mul_ddd_16_scalar_32_16_long_scalar") (const_string "neon_mul_qdd_64_32_long_qqd_16_ddd_32_scalar_64_32_long_scalar")))] @@ -3386,7 +3409,7 @@ neon_lane_bounds (operands[3], 0, GET_MODE_NUNITS (mode)); return "vqdmull.\t%q0, %P1, %P2[%c3]"; } - [(set (attr "neon_type") + [(set (attr "type") (if_then_else (match_test "") (const_string "neon_mul_ddd_16_scalar_32_16_long_scalar") (const_string "neon_mul_qdd_64_32_long_qqd_16_ddd_32_scalar_64_32_long_scalar")))] @@ -3405,7 +3428,7 @@ neon_lane_bounds (operands[3], 0, GET_MODE_NUNITS (mode)); return "vq%O4dmulh.%T4%#\t%q0, %q1, %P2[%c3]"; } - [(set (attr "neon_type") + [(set (attr "type") (if_then_else (match_test "") (const_string "neon_mul_qdd_64_32_long_qqd_16_ddd_32_scalar_64_32_long_scalar") (const_string "neon_mul_qqd_32_scalar")))] @@ -3424,7 +3447,7 @@ neon_lane_bounds (operands[3], 0, GET_MODE_NUNITS (mode)); return "vq%O4dmulh.%T4%#\t%P0, %P1, %P2[%c3]"; } - [(set (attr "neon_type") + [(set (attr "type") (if_then_else (match_test "") (const_string "neon_mul_ddd_16_scalar_32_16_long_scalar") (const_string "neon_mul_qdd_64_32_long_qqd_16_ddd_32_scalar_64_32_long_scalar")))] @@ -3444,7 +3467,7 @@ neon_lane_bounds (operands[4], 0, GET_MODE_NUNITS (mode)); return "vmla.\t%P0, %P2, %P3[%c4]"; } - [(set (attr "neon_type") + [(set (attr "type") (if_then_else (match_test "") (const_string "neon_fp_vmla_ddd_scalar") (if_then_else (match_test "") @@ -3466,7 +3489,7 @@ neon_lane_bounds (operands[4], 0, GET_MODE_NUNITS (mode)); return "vmla.\t%q0, %q2, %P3[%c4]"; } - [(set (attr "neon_type") + [(set (attr "type") (if_then_else (match_test "") (const_string "neon_fp_vmla_qqq_scalar") (if_then_else (match_test "") @@ -3488,7 +3511,7 @@ neon_lane_bounds (operands[4], 0, GET_MODE_NUNITS (mode)); return "vmlal.%T5%#\t%q0, %P2, %P3[%c4]"; } - [(set (attr "neon_type") + [(set (attr "type") (if_then_else (match_test "") (const_string "neon_mla_ddd_16_scalar_qdd_32_16_long_scalar") (const_string "neon_mla_ddd_32_qqd_16_ddd_32_scalar_qdd_64_32_long_scalar_qdd_64_32_long")))] @@ -3508,7 +3531,7 @@ neon_lane_bounds (operands[4], 0, GET_MODE_NUNITS (mode)); return "vqdmlal.\t%q0, %P2, %P3[%c4]"; } - [(set (attr "neon_type") + [(set (attr "type") (if_then_else (match_test "") (const_string "neon_mla_ddd_16_scalar_qdd_32_16_long_scalar") (const_string "neon_mla_ddd_32_qqd_16_ddd_32_scalar_qdd_64_32_long_scalar_qdd_64_32_long")))] @@ -3528,7 +3551,7 @@ neon_lane_bounds (operands[4], 0, GET_MODE_NUNITS (mode)); return "vmls.\t%P0, %P2, %P3[%c4]"; } - [(set (attr "neon_type") + [(set (attr "type") (if_then_else (match_test "") (const_string "neon_fp_vmla_ddd_scalar") (if_then_else (match_test "") @@ -3550,7 +3573,7 @@ neon_lane_bounds (operands[4], 0, GET_MODE_NUNITS (mode)); return "vmls.\t%q0, %q2, %P3[%c4]"; } - [(set (attr "neon_type") + [(set (attr "type") (if_then_else (match_test "") (const_string "neon_fp_vmla_qqq_scalar") (if_then_else (match_test "") @@ -3572,7 +3595,7 @@ neon_lane_bounds (operands[4], 0, GET_MODE_NUNITS (mode)); return "vmlsl.%T5%#\t%q0, %P2, %P3[%c4]"; } - [(set (attr "neon_type") + [(set (attr "type") (if_then_else (match_test "") (const_string "neon_mla_ddd_16_scalar_qdd_32_16_long_scalar") (const_string "neon_mla_ddd_32_qqd_16_ddd_32_scalar_qdd_64_32_long_scalar_qdd_64_32_long")))] @@ -3592,7 +3615,7 @@ neon_lane_bounds (operands[4], 0, GET_MODE_NUNITS (mode)); return "vqdmlsl.\t%q0, %P2, %P3[%c4]"; } - [(set (attr "neon_type") + [(set (attr "type") (if_then_else (match_test "") (const_string "neon_mla_ddd_16_scalar_qdd_32_16_long_scalar") (const_string "neon_mla_ddd_32_qqd_16_ddd_32_scalar_qdd_64_32_long_scalar_qdd_64_32_long")))] @@ -3820,7 +3843,7 @@ neon_const_bounds (operands[3], 0, GET_MODE_NUNITS (mode)); return "vext.\t%0, %1, %2, %3"; } - [(set (attr "neon_type") + [(set (attr "type") (if_then_else (match_test "") (const_string "neon_bp_simple") (const_string "neon_bp_2cycle")))] @@ -3833,7 +3856,7 @@ UNSPEC_VREV64))] "TARGET_NEON" "vrev64.\t%0, %1" - [(set_attr "neon_type" "neon_bp_simple")] + [(set_attr "type" "neon_bp_simple")] ) (define_insn "neon_vrev32" @@ -3843,7 +3866,7 @@ UNSPEC_VREV32))] "TARGET_NEON" "vrev32.\t%0, %1" - [(set_attr "neon_type" "neon_bp_simple")] + [(set_attr "type" "neon_bp_simple")] ) (define_insn "neon_vrev16" @@ -3853,7 +3876,7 @@ UNSPEC_VREV16))] "TARGET_NEON" "vrev16.\t%0, %1" - [(set_attr "neon_type" "neon_bp_simple")] + [(set_attr "type" "neon_bp_simple")] ) ; vbsl_* intrinsics may compile to any of vbsl/vbif/vbit depending on register @@ -3875,7 +3898,7 @@ vbsl\t%0, %2, %3 vbit\t%0, %2, %1 vbif\t%0, %3, %1" - [(set_attr "neon_type" "neon_int_1")] + [(set_attr "type" "neon_int_1")] ) (define_expand "neon_vbsl" @@ -3898,7 +3921,7 @@ UNSPEC_VSHL))] "TARGET_NEON" "v%O3shl.%T3%#\t%0, %1, %2" - [(set (attr "neon_type") + [(set (attr "type") (if_then_else (match_test "") (const_string "neon_vshl_ddd") (const_string "neon_shift_3")))] @@ -3912,7 +3935,7 @@ UNSPEC_VQSHL))] "TARGET_NEON" "vq%O3shl.%T3%#\t%0, %1, %2" - [(set (attr "neon_type") + [(set (attr "type") (if_then_else (match_test "") (const_string "neon_shift_2") (const_string "neon_vqshl_vrshl_vqrshl_qqq")))] @@ -3929,7 +3952,7 @@ neon_const_bounds (operands[2], 1, neon_element_bits (mode) + 1); return "v%O3shr.%T3%#\t%0, %1, %2"; } - [(set_attr "neon_type" "neon_shift_1")] + [(set_attr "type" "neon_shift_1")] ) (define_insn "neon_vshrn_n" @@ -3943,7 +3966,7 @@ neon_const_bounds (operands[2], 1, neon_element_bits (mode) / 2 + 1); return "v%O3shrn.\t%P0, %q1, %2"; } - [(set_attr "neon_type" "neon_shift_1")] + [(set_attr "type" "neon_shift_1")] ) (define_insn "neon_vqshrn_n" @@ -3957,7 +3980,7 @@ neon_const_bounds (operands[2], 1, neon_element_bits (mode) / 2 + 1); return "vq%O3shrn.%T3%#\t%P0, %q1, %2"; } - [(set_attr "neon_type" "neon_shift_2")] + [(set_attr "type" "neon_shift_2")] ) (define_insn "neon_vqshrun_n" @@ -3971,7 +3994,7 @@ neon_const_bounds (operands[2], 1, neon_element_bits (mode) / 2 + 1); return "vq%O3shrun.%T3%#\t%P0, %q1, %2"; } - [(set_attr "neon_type" "neon_shift_2")] + [(set_attr "type" "neon_shift_2")] ) (define_insn "neon_vshl_n" @@ -3985,7 +4008,7 @@ neon_const_bounds (operands[2], 0, neon_element_bits (mode)); return "vshl.\t%0, %1, %2"; } - [(set_attr "neon_type" "neon_shift_1")] + [(set_attr "type" "neon_shift_1")] ) (define_insn "neon_vqshl_n" @@ -3999,7 +4022,7 @@ neon_const_bounds (operands[2], 0, neon_element_bits (mode)); return "vqshl.%T3%#\t%0, %1, %2"; } - [(set_attr "neon_type" "neon_shift_2")] + [(set_attr "type" "neon_shift_2")] ) (define_insn "neon_vqshlu_n" @@ -4013,7 +4036,7 @@ neon_const_bounds (operands[2], 0, neon_element_bits (mode)); return "vqshlu.%T3%#\t%0, %1, %2"; } - [(set_attr "neon_type" "neon_shift_2")] + [(set_attr "type" "neon_shift_2")] ) (define_insn "neon_vshll_n" @@ -4028,7 +4051,7 @@ neon_const_bounds (operands[2], 0, neon_element_bits (mode) + 1); return "vshll.%T3%#\t%q0, %P1, %2"; } - [(set_attr "neon_type" "neon_shift_1")] + [(set_attr "type" "neon_shift_1")] ) (define_insn "neon_vsra_n" @@ -4043,7 +4066,7 @@ neon_const_bounds (operands[3], 1, neon_element_bits (mode) + 1); return "v%O4sra.%T4%#\t%0, %2, %3"; } - [(set_attr "neon_type" "neon_vsra_vrsra")] + [(set_attr "type" "neon_vsra_vrsra")] ) (define_insn "neon_vsri_n" @@ -4057,7 +4080,7 @@ neon_const_bounds (operands[3], 1, neon_element_bits (mode) + 1); return "vsri.\t%0, %2, %3"; } - [(set (attr "neon_type") + [(set (attr "type") (if_then_else (match_test "") (const_string "neon_shift_1") (const_string "neon_shift_3")))] @@ -4074,7 +4097,7 @@ neon_const_bounds (operands[3], 0, neon_element_bits (mode)); return "vsli.\t%0, %2, %3"; } - [(set (attr "neon_type") + [(set (attr "type") (if_then_else (match_test "") (const_string "neon_shift_1") (const_string "neon_shift_3")))] @@ -4087,7 +4110,7 @@ UNSPEC_VTBL))] "TARGET_NEON" "vtbl.8\t%P0, {%P1}, %P2" - [(set_attr "neon_type" "neon_bp_2cycle")] + [(set_attr "type" "neon_bp_2cycle")] ) (define_insn "neon_vtbl2v8qi" @@ -4108,7 +4131,7 @@ return ""; } - [(set_attr "neon_type" "neon_bp_2cycle")] + [(set_attr "type" "neon_bp_2cycle")] ) (define_insn "neon_vtbl3v8qi" @@ -4130,7 +4153,7 @@ return ""; } - [(set_attr "neon_type" "neon_bp_3cycle")] + [(set_attr "type" "neon_bp_3cycle")] ) (define_insn "neon_vtbl4v8qi" @@ -4153,7 +4176,7 @@ return ""; } - [(set_attr "neon_type" "neon_bp_3cycle")] + [(set_attr "type" "neon_bp_3cycle")] ) ;; These three are used by the vec_perm infrastructure for V16QImode. @@ -4241,7 +4264,7 @@ UNSPEC_VTBX))] "TARGET_NEON" "vtbx.8\t%P0, {%P2}, %P3" - [(set_attr "neon_type" "neon_bp_2cycle")] + [(set_attr "type" "neon_bp_2cycle")] ) (define_insn "neon_vtbx2v8qi" @@ -4263,7 +4286,7 @@ return ""; } - [(set_attr "neon_type" "neon_bp_2cycle")] + [(set_attr "type" "neon_bp_2cycle")] ) (define_insn "neon_vtbx3v8qi" @@ -4286,7 +4309,7 @@ return ""; } - [(set_attr "neon_type" "neon_bp_3cycle")] + [(set_attr "type" "neon_bp_3cycle")] ) (define_insn "neon_vtbx4v8qi" @@ -4310,7 +4333,7 @@ return ""; } - [(set_attr "neon_type" "neon_bp_3cycle")] + [(set_attr "type" "neon_bp_3cycle")] ) (define_expand "neon_vtrn_internal" @@ -4336,7 +4359,7 @@ UNSPEC_VTRN2))] "TARGET_NEON" "vtrn.\t%0, %2" - [(set (attr "neon_type") + [(set (attr "type") (if_then_else (match_test "") (const_string "neon_bp_simple") (const_string "neon_bp_3cycle")))] @@ -4376,7 +4399,7 @@ UNSPEC_VZIP2))] "TARGET_NEON" "vzip.\t%0, %2" - [(set (attr "neon_type") + [(set (attr "type") (if_then_else (match_test "") (const_string "neon_bp_simple") (const_string "neon_bp_3cycle")))] @@ -4416,7 +4439,7 @@ UNSPEC_VUZP2))] "TARGET_NEON" "vuzp.\t%0, %2" - [(set (attr "neon_type") + [(set (attr "type") (if_then_else (match_test "") (const_string "neon_bp_simple") (const_string "neon_bp_3cycle")))] @@ -4535,7 +4558,7 @@ UNSPEC_VLD1))] "TARGET_NEON" "vld1.\t%h0, %A1" - [(set_attr "neon_type" "neon_vld1_1_2_regs")] + [(set_attr "type" "neon_vld1_1_2_regs")] ) (define_insn "neon_vld1_lane" @@ -4555,7 +4578,7 @@ else return "vld1.\t{%P0[%c3]}, %A1"; } - [(set (attr "neon_type") + [(set (attr "type") (if_then_else (eq (const_string "") (const_int 2)) (const_string "neon_vld1_1_2_regs") (const_string "neon_vld1_vld2_lane")))] @@ -4586,26 +4609,27 @@ else return "vld1.\t{%P0[%c3]}, %A1"; } - [(set (attr "neon_type") + [(set (attr "type") (if_then_else (eq (const_string "") (const_int 2)) (const_string "neon_vld1_1_2_regs") (const_string "neon_vld1_vld2_lane")))] ) (define_insn "neon_vld1_dup" - [(set (match_operand:VDX 0 "s_register_operand" "=w") - (vec_duplicate:VDX (match_operand: 1 "neon_struct_operand" "Um")))] + [(set (match_operand:VD 0 "s_register_operand" "=w") + (vec_duplicate:VD (match_operand: 1 "neon_struct_operand" "Um")))] "TARGET_NEON" -{ - if (GET_MODE_NUNITS (mode) > 1) - return "vld1.\t{%P0[]}, %A1"; - else - return "vld1.\t%h0, %A1"; -} - [(set (attr "neon_type") - (if_then_else (gt (const_string "") (const_string "1")) - (const_string "neon_vld2_2_regs_vld1_vld2_all_lanes") - (const_string "neon_vld1_1_2_regs")))] + "vld1.\t{%P0[]}, %A1" + [(set_attr "type" "neon_vld2_2_regs_vld1_vld2_all_lanes")] +) + +;; Special case for DImode. Treat it exactly like a simple load. +(define_expand "neon_vld1_dupdi" + [(set (match_operand:DI 0 "s_register_operand" "") + (unspec:DI [(match_operand:DI 1 "neon_struct_operand" "")] + UNSPEC_VLD1))] + "TARGET_NEON" + "" ) (define_insn "neon_vld1_dup" @@ -4615,7 +4639,7 @@ { return "vld1.\t{%e0[], %f0[]}, %A1"; } - [(set_attr "neon_type" "neon_vld2_2_regs_vld1_vld2_all_lanes")] + [(set_attr "type" "neon_vld2_2_regs_vld1_vld2_all_lanes")] ) (define_insn_and_split "neon_vld1_dupv2di" @@ -4632,7 +4656,7 @@ DONE; } [(set_attr "length" "8") - (set_attr "neon_type" "neon_vld2_2_regs_vld1_vld2_all_lanes")] + (set_attr "type" "neon_vld2_2_regs_vld1_vld2_all_lanes")] ) (define_expand "vec_store_lanes" @@ -4647,7 +4671,7 @@ UNSPEC_VST1))] "TARGET_NEON" "vst1.\t%h1, %A0" - [(set_attr "neon_type" "neon_vst1_1_2_regs_vst2_2_regs")]) + [(set_attr "type" "neon_vst1_1_2_regs_vst2_2_regs")]) (define_insn "neon_vst1_lane" [(set (match_operand: 0 "neon_struct_operand" "=Um") @@ -4666,7 +4690,7 @@ else return "vst1.\t{%P1[%c2]}, %A0"; } - [(set (attr "neon_type") + [(set (attr "type") (if_then_else (eq (const_string "") (const_int 1)) (const_string "neon_vst1_1_2_regs_vst2_2_regs") (const_string "neon_vst1_vst2_lane")))]) @@ -4696,7 +4720,7 @@ else return "vst1.\t{%P1[%c2]}, %A0"; } - [(set_attr "neon_type" "neon_vst1_vst2_lane")] + [(set_attr "type" "neon_vst1_vst2_lane")] ) (define_expand "vec_load_lanesti" @@ -4718,7 +4742,7 @@ else return "vld2.\t%h0, %A1"; } - [(set (attr "neon_type") + [(set (attr "type") (if_then_else (eq (const_string "") (const_string "64")) (const_string "neon_vld1_1_2_regs") (const_string "neon_vld2_2_regs_vld1_vld2_all_lanes")))] @@ -4738,7 +4762,7 @@ UNSPEC_VLD2))] "TARGET_NEON" "vld2.\t%h0, %A1" - [(set_attr "neon_type" "neon_vld2_2_regs_vld1_vld2_all_lanes")]) + [(set_attr "type" "neon_vld2_2_regs_vld1_vld2_all_lanes")]) (define_insn "neon_vld2_lane" [(set (match_operand:TI 0 "s_register_operand" "=w") @@ -4762,7 +4786,7 @@ output_asm_insn ("vld2.\t{%P0[%c3], %P1[%c3]}, %A2", ops); return ""; } - [(set_attr "neon_type" "neon_vld1_vld2_lane")] + [(set_attr "type" "neon_vld1_vld2_lane")] ) (define_insn "neon_vld2_lane" @@ -4792,7 +4816,7 @@ output_asm_insn ("vld2.\t{%P0[%c3], %P1[%c3]}, %A2", ops); return ""; } - [(set_attr "neon_type" "neon_vld1_vld2_lane")] + [(set_attr "type" "neon_vld1_vld2_lane")] ) (define_insn "neon_vld2_dup" @@ -4807,7 +4831,7 @@ else return "vld1.\t%h0, %A1"; } - [(set (attr "neon_type") + [(set (attr "type") (if_then_else (gt (const_string "") (const_string "1")) (const_string "neon_vld2_2_regs_vld1_vld2_all_lanes") (const_string "neon_vld1_1_2_regs")))] @@ -4832,7 +4856,7 @@ else return "vst2.\t%h1, %A0"; } - [(set (attr "neon_type") + [(set (attr "type") (if_then_else (eq (const_string "") (const_string "64")) (const_string "neon_vst1_1_2_regs_vst2_2_regs") (const_string "neon_vst1_1_2_regs_vst2_2_regs")))] @@ -4852,7 +4876,7 @@ UNSPEC_VST2))] "TARGET_NEON" "vst2.\t%h1, %A0" - [(set_attr "neon_type" "neon_vst1_1_2_regs_vst2_2_regs")] + [(set_attr "type" "neon_vst1_1_2_regs_vst2_2_regs")] ) (define_insn "neon_vst2_lane" @@ -4877,7 +4901,7 @@ output_asm_insn ("vst2.\t{%P1[%c3], %P2[%c3]}, %A0", ops); return ""; } - [(set_attr "neon_type" "neon_vst1_vst2_lane")] + [(set_attr "type" "neon_vst1_vst2_lane")] ) (define_insn "neon_vst2_lane" @@ -4907,7 +4931,7 @@ output_asm_insn ("vst2.\t{%P1[%c3], %P2[%c3]}, %A0", ops); return ""; } - [(set_attr "neon_type" "neon_vst1_vst2_lane")] + [(set_attr "type" "neon_vst1_vst2_lane")] ) (define_expand "vec_load_lanesei" @@ -4929,7 +4953,7 @@ else return "vld3.\t%h0, %A1"; } - [(set (attr "neon_type") + [(set (attr "type") (if_then_else (eq (const_string "") (const_string "64")) (const_string "neon_vld1_1_2_regs") (const_string "neon_vld3_vld4")))] @@ -4976,7 +5000,7 @@ output_asm_insn ("vld3.\t{%P0, %P1, %P2}, %A3", ops); return ""; } - [(set_attr "neon_type" "neon_vld3_vld4")] + [(set_attr "type" "neon_vld3_vld4")] ) (define_insn "neon_vld3qb" @@ -4996,7 +5020,7 @@ output_asm_insn ("vld3.\t{%P0, %P1, %P2}, %A3", ops); return ""; } - [(set_attr "neon_type" "neon_vld3_vld4")] + [(set_attr "type" "neon_vld3_vld4")] ) (define_insn "neon_vld3_lane" @@ -5023,7 +5047,7 @@ ops); return ""; } - [(set_attr "neon_type" "neon_vld3_vld4_lane")] + [(set_attr "type" "neon_vld3_vld4_lane")] ) (define_insn "neon_vld3_lane" @@ -5055,7 +5079,7 @@ ops); return ""; } - [(set_attr "neon_type" "neon_vld3_vld4_lane")] + [(set_attr "type" "neon_vld3_vld4_lane")] ) (define_insn "neon_vld3_dup" @@ -5079,7 +5103,7 @@ else return "vld1.\t%h0, %A1"; } - [(set (attr "neon_type") + [(set (attr "type") (if_then_else (gt (const_string "") (const_string "1")) (const_string "neon_vld3_vld4_all_lanes") (const_string "neon_vld1_1_2_regs")))]) @@ -5103,7 +5127,7 @@ else return "vst3.\t%h1, %A0"; } - [(set (attr "neon_type") + [(set (attr "type") (if_then_else (eq (const_string "") (const_string "64")) (const_string "neon_vst1_1_2_regs_vst2_2_regs") (const_string "neon_vst2_4_regs_vst3_vst4")))]) @@ -5149,7 +5173,7 @@ output_asm_insn ("vst3.\t{%P1, %P2, %P3}, %A0", ops); return ""; } - [(set_attr "neon_type" "neon_vst2_4_regs_vst3_vst4")] + [(set_attr "type" "neon_vst2_4_regs_vst3_vst4")] ) (define_insn "neon_vst3qb" @@ -5168,7 +5192,7 @@ output_asm_insn ("vst3.\t{%P1, %P2, %P3}, %A0", ops); return ""; } - [(set_attr "neon_type" "neon_vst2_4_regs_vst3_vst4")] + [(set_attr "type" "neon_vst2_4_regs_vst3_vst4")] ) (define_insn "neon_vst3_lane" @@ -5195,7 +5219,7 @@ ops); return ""; } - [(set_attr "neon_type" "neon_vst3_vst4_lane")] + [(set_attr "type" "neon_vst3_vst4_lane")] ) (define_insn "neon_vst3_lane" @@ -5227,7 +5251,7 @@ ops); return ""; } -[(set_attr "neon_type" "neon_vst3_vst4_lane")]) +[(set_attr "type" "neon_vst3_vst4_lane")]) (define_expand "vec_load_lanesoi" [(set (match_operand:OI 0 "s_register_operand") @@ -5248,7 +5272,7 @@ else return "vld4.\t%h0, %A1"; } - [(set (attr "neon_type") + [(set (attr "type") (if_then_else (eq (const_string "") (const_string "64")) (const_string "neon_vld1_1_2_regs") (const_string "neon_vld3_vld4")))] @@ -5296,7 +5320,7 @@ output_asm_insn ("vld4.\t{%P0, %P1, %P2, %P3}, %A4", ops); return ""; } - [(set_attr "neon_type" "neon_vld3_vld4")] + [(set_attr "type" "neon_vld3_vld4")] ) (define_insn "neon_vld4qb" @@ -5317,7 +5341,7 @@ output_asm_insn ("vld4.\t{%P0, %P1, %P2, %P3}, %A4", ops); return ""; } - [(set_attr "neon_type" "neon_vld3_vld4")] + [(set_attr "type" "neon_vld3_vld4")] ) (define_insn "neon_vld4_lane" @@ -5345,7 +5369,7 @@ ops); return ""; } - [(set_attr "neon_type" "neon_vld3_vld4_lane")] + [(set_attr "type" "neon_vld3_vld4_lane")] ) (define_insn "neon_vld4_lane" @@ -5378,7 +5402,7 @@ ops); return ""; } - [(set_attr "neon_type" "neon_vld3_vld4_lane")] + [(set_attr "type" "neon_vld3_vld4_lane")] ) (define_insn "neon_vld4_dup" @@ -5404,7 +5428,7 @@ else return "vld1.\t%h0, %A1"; } - [(set (attr "neon_type") + [(set (attr "type") (if_then_else (gt (const_string "") (const_string "1")) (const_string "neon_vld3_vld4_all_lanes") (const_string "neon_vld1_1_2_regs")))] @@ -5429,7 +5453,7 @@ else return "vst4.\t%h1, %A0"; } - [(set (attr "neon_type") + [(set (attr "type") (if_then_else (eq (const_string "") (const_string "64")) (const_string "neon_vst1_1_2_regs_vst2_2_regs") (const_string "neon_vst2_4_regs_vst3_vst4")))] @@ -5477,7 +5501,7 @@ output_asm_insn ("vst4.\t{%P1, %P2, %P3, %P4}, %A0", ops); return ""; } - [(set_attr "neon_type" "neon_vst2_4_regs_vst3_vst4")] + [(set_attr "type" "neon_vst2_4_regs_vst3_vst4")] ) (define_insn "neon_vst4qb" @@ -5497,7 +5521,7 @@ output_asm_insn ("vst4.\t{%P1, %P2, %P3, %P4}, %A0", ops); return ""; } - [(set_attr "neon_type" "neon_vst2_4_regs_vst3_vst4")] + [(set_attr "type" "neon_vst2_4_regs_vst3_vst4")] ) (define_insn "neon_vst4_lane" @@ -5525,7 +5549,7 @@ ops); return ""; } - [(set_attr "neon_type" "neon_vst3_vst4_lane")] + [(set_attr "type" "neon_vst3_vst4_lane")] ) (define_insn "neon_vst4_lane" @@ -5558,7 +5582,7 @@ ops); return ""; } - [(set_attr "neon_type" "neon_vst3_vst4_lane")] + [(set_attr "type" "neon_vst3_vst4_lane")] ) (define_expand "neon_vand" @@ -5623,7 +5647,7 @@ (match_operand:VU 2 "vect_par_constant_low" ""))))] "TARGET_NEON && !BYTES_BIG_ENDIAN" "vmovl. %q0, %e1" - [(set_attr "neon_type" "neon_shift_1")] + [(set_attr "type" "neon_shift_1")] ) (define_insn "neon_vec_unpack_hi_" @@ -5633,7 +5657,7 @@ (match_operand:VU 2 "vect_par_constant_high" ""))))] "TARGET_NEON && !BYTES_BIG_ENDIAN" "vmovl. %q0, %f1" - [(set_attr "neon_type" "neon_shift_1")] + [(set_attr "type" "neon_shift_1")] ) (define_expand "vec_unpack_hi_" @@ -5683,7 +5707,7 @@ (match_dup 2)))))] "TARGET_NEON && !BYTES_BIG_ENDIAN" "vmull. %q0, %e1, %e3" - [(set_attr "neon_type" "neon_shift_1")] + [(set_attr "type" "neon_shift_1")] ) (define_expand "vec_widen_mult_lo_" @@ -5717,7 +5741,7 @@ (match_dup 2)))))] "TARGET_NEON && !BYTES_BIG_ENDIAN" "vmull. %q0, %f1, %f3" - [(set_attr "neon_type" "neon_shift_1")] + [(set_attr "type" "neon_shift_1")] ) (define_expand "vec_widen_mult_hi_" @@ -5750,7 +5774,7 @@ { return "vshll. %q0, %P1, %2"; } - [(set_attr "neon_type" "neon_shift_1")] + [(set_attr "type" "neon_shift_1")] ) (define_expand "vec_widen_shiftl_lo_" @@ -5786,7 +5810,7 @@ (SE: (match_operand:VDI 1 "register_operand" "w")))] "TARGET_NEON" "vmovl. %q0, %P1" - [(set_attr "neon_type" "neon_shift_1")] + [(set_attr "type" "neon_shift_1")] ) (define_expand "vec_unpack_lo_" @@ -5823,7 +5847,7 @@ (match_operand:VDI 2 "register_operand" "w"))))] "TARGET_NEON" "vmull. %q0, %P1, %P2" - [(set_attr "neon_type" "neon_shift_1")] + [(set_attr "type" "neon_shift_1")] ) (define_expand "vec_widen_mult_hi_" @@ -5897,7 +5921,7 @@ (match_operand:VN 2 "register_operand" "w"))))] "TARGET_NEON && !BYTES_BIG_ENDIAN" "vmovn.i\t%e0, %q1\;vmovn.i\t%f0, %q2" - [(set_attr "neon_type" "neon_shift_1") + [(set_attr "type" "neon_shift_1") (set_attr "length" "8")] ) @@ -5907,7 +5931,7 @@ (truncate: (match_operand:VN 1 "register_operand" "w")))] "TARGET_NEON && !BYTES_BIG_ENDIAN" "vmovn.i\t%P0, %q1" - [(set_attr "neon_type" "neon_shift_1")] + [(set_attr "type" "neon_shift_1")] ) (define_expand "vec_pack_trunc_" @@ -5930,7 +5954,7 @@ (match_operand:VDQ 2 "s_register_operand" "w"))))] "TARGET_NEON && (! || flag_unsafe_math_optimizations)" "vabd. %0, %1, %2" - [(set (attr "neon_type") + [(set (attr "type") (if_then_else (ne (symbol_ref "") (const_int 0)) (if_then_else (ne (symbol_ref "") (const_int 0)) (const_string "neon_fp_vadd_ddd_vabs_dd") @@ -5945,7 +5969,7 @@ UNSPEC_VSUB)))] "TARGET_NEON && (! || flag_unsafe_math_optimizations)" "vabd. %0, %1, %2" - [(set (attr "neon_type") + [(set (attr "type") (if_then_else (ne (symbol_ref "") (const_int 0)) (if_then_else (ne (symbol_ref "") (const_int 0)) (const_string "neon_fp_vadd_ddd_vabs_dd") diff --git a/gcc/config/arm/t-arm b/gcc/config/arm/t-arm index 246f0f5b540..20e79ef2680 100644 --- a/gcc/config/arm/t-arm +++ b/gcc/config/arm/t-arm @@ -78,6 +78,11 @@ $(srcdir)/config/arm/arm-tables.opt: $(srcdir)/config/arm/genopt.sh \ $(SHELL) $(srcdir)/config/arm/genopt.sh $(srcdir)/config/arm > \ $(srcdir)/config/arm/arm-tables.opt +aarch-common.o: $(srcdir)/config/arm/aarch-common.c $(CONFIG_H) $(SYSTEM_H) \ + coretypes.h $(TM_H) $(TM_P_H) $(RTL_H) $(TREE_H) output.h $(C_COMMON_H) + $(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \ + $(srcdir)/config/arm/aarch-common.c + arm.o: $(srcdir)/config/arm/arm.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) \ $(RTL_H) $(TREE_H) $(HASH_TABLE_H) $(OBSTACK_H) $(REGS_H) hard-reg-set.h \ insn-config.h conditions.h output.h dumpfile.h \ diff --git a/gcc/config/arm/t-linux-eabi b/gcc/config/arm/t-linux-eabi index 2f2f8ffa5e2..07e32b38de8 100644 --- a/gcc/config/arm/t-linux-eabi +++ b/gcc/config/arm/t-linux-eabi @@ -18,6 +18,8 @@ # We do not build a Thumb multilib for Linux because the definition of # CLEAR_INSN_CACHE in linux-gas.h does not work in Thumb mode. +# If you set MULTILIB_OPTIONS to a non-empty value you should also set +# MULTILIB_DEFAULTS in linux-elf.h. MULTILIB_OPTIONS = MULTILIB_DIRNAMES = diff --git a/gcc/config/arm/thumb2.md b/gcc/config/arm/thumb2.md index 8b184a80c2e..3b5944a014a 100644 --- a/gcc/config/arm/thumb2.md +++ b/gcc/config/arm/thumb2.md @@ -36,7 +36,7 @@ [(set_attr "predicable" "yes") (set_attr "predicable_short_it" "no") (set_attr "shift" "2") - (set_attr "type" "arlo_shift")] + (set_attr "type" "alu_shift_imm")] ) ;; We use the '0' constraint for operand 1 because reload should @@ -58,7 +58,8 @@ "" [(set_attr "conds" "clob") (set_attr "enabled_for_depr_it" "yes,yes,no") - (set_attr "length" "6,6,10")] + (set_attr "length" "6,6,10") + (set_attr "type" "multiple")] ) (define_insn_and_split "*thumb2_sminsi3" @@ -78,7 +79,8 @@ "" [(set_attr "conds" "clob") (set_attr "enabled_for_depr_it" "yes,yes,no") - (set_attr "length" "6,6,10")] + (set_attr "length" "6,6,10") + (set_attr "type" "multiple")] ) (define_insn_and_split "*thumb32_umaxsi3" @@ -98,7 +100,8 @@ "" [(set_attr "conds" "clob") (set_attr "length" "6,6,10") - (set_attr "enabled_for_depr_it" "yes,yes,no")] + (set_attr "enabled_for_depr_it" "yes,yes,no") + (set_attr "type" "multiple")] ) (define_insn_and_split "*thumb2_uminsi3" @@ -118,7 +121,8 @@ "" [(set_attr "conds" "clob") (set_attr "length" "6,6,10") - (set_attr "enabled_for_depr_it" "yes,yes,no")] + (set_attr "enabled_for_depr_it" "yes,yes,no") + (set_attr "type" "multiple")] ) ;; Thumb-2 does not have rsc, so use a clever trick with shifter operands. @@ -143,7 +147,8 @@ operands[1] = gen_lowpart (SImode, operands[1]); } [(set_attr "conds" "clob") - (set_attr "length" "8")] + (set_attr "length" "8") + (set_attr "type" "multiple")] ) (define_insn_and_split "*thumb2_abssi2" @@ -200,7 +205,8 @@ (set_attr "predicable_short_it" "no") (set_attr "enabled_for_depr_it" "yes,yes,no") (set_attr "ce_count" "2") - (set_attr "length" "8,6,10")] + (set_attr "length" "8,6,10") + (set_attr "type" "multiple")] ) (define_insn_and_split "*thumb2_neg_abssi2" @@ -257,7 +263,8 @@ (set_attr "enabled_for_depr_it" "yes,yes,no") (set_attr "predicable_short_it" "no") (set_attr "ce_count" "2") - (set_attr "length" "8,6,10")] + (set_attr "length" "8,6,10") + (set_attr "type" "multiple")] ) ;; We have two alternatives here for memory loads (and similarly for stores) @@ -282,7 +289,7 @@ ldr%?\\t%0, %1 str%?\\t%1, %0 str%?\\t%1, %0" - [(set_attr "type" "*,arlo_imm,arlo_imm,arlo_imm,*,load1,load1,store1,store1") + [(set_attr "type" "mov_reg,alu_imm,alu_imm,alu_imm,mov_imm,load1,load1,store1,store1") (set_attr "length" "2,4,2,4,4,4,4,4,4") (set_attr "predicable" "yes") (set_attr "predicable_short_it" "yes,no,yes,no,no,no,no,no,no") @@ -303,7 +310,8 @@ INTVAL (operands[3])); return \"add\\t%2, %|pc\;ldr%?\\t%0, [%2]\"; " - [(set_attr "length" "4,4,6,6")] + [(set_attr "length" "4,4,6,6") + (set_attr "type" "multiple")] ) ;; Thumb-2 always has load/store halfword instructions, so we can avoid a lot @@ -319,12 +327,27 @@ movw%?\\t%0, %L1\\t%@ movhi str%(h%)\\t%1, %0\\t%@ movhi ldr%(h%)\\t%0, %1\\t%@ movhi" - [(set_attr "type" "*,*,store1,load1") + [(set_attr "type" "mov_imm,mov_reg,store1,load1") (set_attr "predicable" "yes") (set_attr "pool_range" "*,*,*,4094") (set_attr "neg_pool_range" "*,*,*,250")] ) +(define_insn "*thumb2_storewb_pairsi" + [(set (match_operand:SI 0 "register_operand" "=&kr") + (plus:SI (match_operand:SI 1 "register_operand" "0") + (match_operand:SI 2 "const_int_operand" "n"))) + (set (mem:SI (plus:SI (match_dup 0) (match_dup 2))) + (match_operand:SI 3 "register_operand" "r")) + (set (mem:SI (plus:SI (match_dup 0) + (match_operand:SI 5 "const_int_operand" "n"))) + (match_operand:SI 4 "register_operand" "r"))] + "TARGET_THUMB2 + && INTVAL (operands[5]) == INTVAL (operands[2]) + 4" + "strd\\t%3, %4, [%0, %2]!" + [(set_attr "type" "store2")] +) + (define_insn "*thumb2_cmpsi_neg_shiftsi" [(set (reg:CC CC_REGNUM) (compare:CC (match_operand:SI 0 "s_register_operand" "r") @@ -335,7 +358,7 @@ "cmn%?\\t%0, %1%S3" [(set_attr "conds" "set") (set_attr "shift" "1") - (set_attr "type" "arlo_shift")] + (set_attr "type" "alus_shift_imm")] ) (define_insn_and_split "*thumb2_mov_scc" @@ -352,7 +375,8 @@ "" [(set_attr "conds" "use") (set_attr "enabled_for_depr_it" "yes,no") - (set_attr "length" "8,10")] + (set_attr "length" "8,10") + (set_attr "type" "multiple")] ) (define_insn_and_split "*thumb2_mov_negscc" @@ -370,7 +394,8 @@ operands[3] = GEN_INT (~0); } [(set_attr "conds" "use") - (set_attr "length" "10")] + (set_attr "length" "10") + (set_attr "type" "multiple")] ) (define_insn_and_split "*thumb2_mov_negscc_strict_it" @@ -398,7 +423,8 @@ } [(set_attr "conds" "use") - (set_attr "length" "8")] + (set_attr "length" "8") + (set_attr "type" "multiple")] ) (define_insn_and_split "*thumb2_mov_notscc" @@ -417,7 +443,8 @@ operands[4] = GEN_INT (~0); } [(set_attr "conds" "use") - (set_attr "length" "10")] + (set_attr "length" "10") + (set_attr "type" "multiple")] ) (define_insn_and_split "*thumb2_mov_notscc_strict_it" @@ -439,7 +466,8 @@ VOIDmode, operands[2], const0_rtx); } [(set_attr "conds" "use") - (set_attr "length" "8")] + (set_attr "length" "8") + (set_attr "type" "multiple")] ) (define_insn_and_split "*thumb2_movsicc_insn" @@ -499,7 +527,8 @@ } [(set_attr "length" "4,4,6,6,6,6,10,10,10,10,6") (set_attr "enabled_for_depr_it" "yes,yes,no,no,no,no,no,no,no,no,yes") - (set_attr "conds" "use")] + (set_attr "conds" "use") + (set_attr "type" "multiple")] ) (define_insn "*thumb2_movsfcc_soft_insn" @@ -513,7 +542,8 @@ it\\t%D3\;mov%D3\\t%0, %2 it\\t%d3\;mov%d3\\t%0, %1" [(set_attr "length" "6,6") - (set_attr "conds" "use")] + (set_attr "conds" "use") + (set_attr "type" "multiple")] ) (define_insn "*call_reg_thumb2" @@ -542,7 +572,8 @@ (match_operand:SI 0 "register_operand" "l*r"))] "TARGET_THUMB2" "bx\\t%0" - [(set_attr "conds" "clob")] + [(set_attr "conds" "clob") + (set_attr "type" "branch")] ) ;; Don't define thumb2_load_indirect_jump because we can't guarantee label ;; addresses will have the thumb bit set correctly. @@ -570,6 +601,7 @@ operands[4] = gen_rtx_fmt_ee (rc, VOIDmode, operands[2], const0_rtx); } [(set_attr "conds" "use") + (set_attr "type" "multiple") (set (attr "length") (if_then_else (match_test "arm_restrict_it") (const_int 8) (const_int 10)))] @@ -602,7 +634,8 @@ operands[5] = gen_rtx_fmt_ee (rc, VOIDmode, operands[2], const0_rtx); } [(set_attr "conds" "use") - (set_attr "length" "6,10")] + (set_attr "length" "6,10") + (set_attr "type" "multiple")] ) (define_insn "*thumb2_ior_scc_strict_it" @@ -615,7 +648,8 @@ it\\t%d2\;mov%d2\\t%0, #1\;it\\t%d2\;orr%d2\\t%0, %1 mov\\t%0, #1\;orr\\t%0, %1\;it\\t%D2\;mov%D2\\t%0, %1" [(set_attr "conds" "use") - (set_attr "length" "8")] + (set_attr "length" "8") + (set_attr "type" "multiple")] ) (define_insn "*thumb2_cond_move" @@ -664,7 +698,8 @@ return \"\"; " [(set_attr "conds" "use") - (set_attr "length" "6,6,10")] + (set_attr "length" "6,6,10") + (set_attr "type" "multiple")] ) (define_insn "*thumb2_cond_arith" @@ -701,7 +736,8 @@ return \"%i5%d4\\t%0, %1, #1\"; " [(set_attr "conds" "clob") - (set_attr "length" "14")] + (set_attr "length" "14") + (set_attr "type" "multiple")] ) (define_insn_and_split "*thumb2_cond_arith_strict_it" @@ -770,7 +806,8 @@ FAIL; } [(set_attr "conds" "clob") - (set_attr "length" "12")] + (set_attr "length" "12") + (set_attr "type" "multiple")] ) (define_insn "*thumb2_cond_sub" @@ -801,7 +838,8 @@ return \"sub%d4\\t%0, %1, #1\"; " [(set_attr "conds" "clob") - (set_attr "length" "10,14")] + (set_attr "length" "10,14") + (set_attr "type" "multiple")] ) (define_insn_and_split "*thumb2_negscc" @@ -869,7 +907,8 @@ FAIL; } [(set_attr "conds" "clob") - (set_attr "length" "14")] + (set_attr "length" "14") + (set_attr "type" "multiple")] ) (define_insn "*thumb2_movcond" @@ -952,7 +991,8 @@ return \"\"; " [(set_attr "conds" "clob") - (set_attr "length" "10,10,14")] + (set_attr "length" "10,10,14") + (set_attr "type" "multiple")] ) ;; Zero and sign extension instructions. @@ -1015,7 +1055,8 @@ "TARGET_THUMB2 && !flag_pic" "* return thumb2_output_casesi(operands);" [(set_attr "conds" "clob") - (set_attr "length" "16")] + (set_attr "length" "16") + (set_attr "type" "multiple")] ) (define_insn "thumb2_casesi_internal_pic" @@ -1033,7 +1074,8 @@ "TARGET_THUMB2 && flag_pic" "* return thumb2_output_casesi(operands);" [(set_attr "conds" "clob") - (set_attr "length" "20")] + (set_attr "length" "20") + (set_attr "type" "multiple")] ) (define_insn "*thumb2_return" @@ -1070,7 +1112,8 @@ && GET_CODE(operands[3]) != MINUS" "%I3%!\\t%0, %1, %2" [(set_attr "predicable" "yes") - (set_attr "length" "2")] + (set_attr "length" "2") + (set_attr "type" "alu_reg")] ) (define_insn "*thumb2_shiftsi3_short" @@ -1087,8 +1130,8 @@ (set_attr "shift" "1") (set_attr "length" "2") (set (attr "type") (if_then_else (match_operand 2 "const_int_operand" "") - (const_string "arlo_shift") - (const_string "arlo_shift_reg")))] + (const_string "alu_shift_imm") + (const_string "alu_shift_reg")))] ) (define_insn "*thumb2_mov_shortim" @@ -1098,7 +1141,8 @@ "TARGET_THUMB2 && reload_completed" "mov%!\t%0, %1" [(set_attr "predicable" "yes") - (set_attr "length" "2")] + (set_attr "length" "2") + (set_attr "type" "mov_imm")] ) (define_insn "*thumb2_addsi_short" @@ -1122,7 +1166,8 @@ return \"add%!\\t%0, %1, %2\"; " [(set_attr "predicable" "yes") - (set_attr "length" "2")] + (set_attr "length" "2") + (set_attr "type" "alu_reg")] ) (define_insn "*thumb2_subsi_short" @@ -1133,7 +1178,8 @@ "TARGET_THUMB2 && reload_completed" "sub%!\\t%0, %1, %2" [(set_attr "predicable" "yes") - (set_attr "length" "2")] + (set_attr "length" "2") + (set_attr "type" "alu_reg")] ) (define_peephole2 @@ -1185,7 +1231,8 @@ return \"adds\\t%0, %1, %2\"; " [(set_attr "conds" "set") - (set_attr "length" "2,2,4")] + (set_attr "length" "2,2,4") + (set_attr "type" "alu_reg")] ) (define_insn "*thumb2_addsi3_compare0_scratch" @@ -1210,7 +1257,7 @@ " [(set_attr "conds" "set") (set_attr "length" "2,2,4,4") - (set_attr "type" "arlo_imm,*,arlo_imm,*")] + (set_attr "type" "alus_imm,alus_reg,alus_imm,alus_reg")] ) (define_insn "*thumb2_mulsi_short" @@ -1269,7 +1316,8 @@ (le (minus (match_dup 1) (pc)) (const_int 128)) (not (match_test "which_alternative"))) (const_int 2) - (const_int 8)))] + (const_int 8))) + (set_attr "type" "branch,multiple")] ) (define_insn "*thumb2_cbnz" @@ -1292,7 +1340,8 @@ (le (minus (match_dup 1) (pc)) (const_int 128)) (not (match_test "which_alternative"))) (const_int 2) - (const_int 8)))] + (const_int 8))) + (set_attr "type" "branch,multiple")] ) (define_insn "*thumb2_one_cmplsi2_short" @@ -1302,7 +1351,8 @@ "TARGET_THUMB2 && reload_completed" "mvn%!\t%0, %1" [(set_attr "predicable" "yes") - (set_attr "length" "2")] + (set_attr "length" "2") + (set_attr "type" "mvn_reg")] ) (define_insn "*thumb2_negsi2_short" @@ -1312,7 +1362,8 @@ "TARGET_THUMB2 && reload_completed" "neg%!\t%0, %1" [(set_attr "predicable" "yes") - (set_attr "length" "2")] + (set_attr "length" "2") + (set_attr "type" "alu_reg")] ) (define_insn "*orsi_notsi_si" @@ -1322,7 +1373,8 @@ "TARGET_THUMB2" "orn%?\\t%0, %1, %2" [(set_attr "predicable" "yes") - (set_attr "predicable_short_it" "no")] + (set_attr "predicable_short_it" "no") + (set_attr "type" "logic_reg")] ) (define_insn "*orsi_not_shiftsi_si" @@ -1336,7 +1388,7 @@ [(set_attr "predicable" "yes") (set_attr "predicable_short_it" "no") (set_attr "shift" "2") - (set_attr "type" "arlo_shift")] + (set_attr "type" "alu_shift_imm")] ) (define_peephole2 diff --git a/gcc/config/arm/types.md b/gcc/config/arm/types.md new file mode 100644 index 00000000000..7a96438fd48 --- /dev/null +++ b/gcc/config/arm/types.md @@ -0,0 +1,563 @@ +;; Instruction Classification for ARM for GNU compiler. + +;; Copyright (C) 1991-2013 Free Software Foundation, Inc. +;; Contributed by ARM Ltd. + +;; This file is part of GCC. + +;; GCC is free software; you can redistribute it and/or modify it +;; under the terms of the GNU General Public License as published +;; by the Free Software Foundation; either version 3, or (at your +;; option) any later version. + +;; GCC is distributed in the hope that it will be useful, but WITHOUT +;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +;; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public +;; License for more details. + +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; . + +; TYPE attribute is used to classify instructions for use in scheduling. +; +; Instruction classification: +; +; adc_imm add/subtract with carry and with an immediate operand. +; adc_reg add/subtract with carry and no immediate operand. +; adcs_imm as adc_imm, setting condition flags. +; adcs_reg as adc_reg, setting condition flags. +; adr calculate address. +; alu_ext From ARMv8-A: any arithmetic instruction that has a +; sign/zero-extended. +; AArch64 Only. +; source operand +; alu_imm any arithmetic instruction that doesn't have a shifted +; operand and has an immediate operand. This +; excludes MOV, MVN and RSB(S) immediate. +; alu_reg any arithmetic instruction that doesn't have a shifted +; or an immediate operand. This excludes +; MOV and MVN but includes MOVT. This is also the default. +; alu_shift_imm any arithmetic instruction that has a source operand +; shifted by a constant. This excludes simple shifts. +; alu_shift_reg as alu_shift_imm, with the shift amount specified in a +; register. +; alus_ext From ARMv8-A: as alu_ext, setting condition flags. +; AArch64 Only. +; alus_imm as alu_imm, setting condition flags. +; alus_reg as alu_reg, setting condition flags. +; alus_shift_imm as alu_shift_imm, setting condition flags. +; alus_shift_reg as alu_shift_reg, setting condition flags. +; bfm bitfield move operation. +; block blockage insn, this blocks all functional units. +; branch branch. +; call subroutine call. +; clz count leading zeros (CLZ). +; csel From ARMv8-A: conditional select. +; extend extend instruction (SXTB, SXTH, UXTB, UXTH). +; f_cvt conversion between float representations. +; f_cvtf2i conversion between float and integral types. +; f_cvti2f conversion between integral and float types. +; f_flag transfer of co-processor flags to the CPSR. +; f_load[d,s] double/single load from memory. Used for VFP unit. +; f_mcr transfer arm to vfp reg. +; f_mcrr transfer two arm regs to vfp reg. +; f_minmax[d,s] double/single floating point minimum/maximum. +; f_mrc transfer vfp to arm reg. +; f_mrrc transfer vfp to two arm regs. +; f_rint[d,s] double/single floating point rount to integral. +; f_sel[d,s] double/single floating byte select. +; f_store[d,s] double/single store to memory. Used for VFP unit. +; fadd[d,s] double/single floating-point scalar addition. +; fcmp[d,s] double/single floating-point compare. +; fconst[d,s] double/single load immediate. +; fcsel From ARMv8-A: Floating-point conditional select. +; fdiv[d,s] double/single precision floating point division. +; ffarith[d,s] double/single floating point abs/neg/cpy. +; ffma[d,s] double/single floating point fused multiply-accumulate. +; float floating point arithmetic operation. +; fmac[d,s] double/single floating point multiply-accumulate. +; fmov floating point to floating point register move. +; fmul[d,s] double/single floating point multiply. +; fsqrt[d,s] double/single precision floating point square root. +; load_acq load-acquire. +; load_byte load byte(s) from memory to arm registers. +; load1 load 1 word from memory to arm registers. +; load2 load 2 words from memory to arm registers. +; load3 load 3 words from memory to arm registers. +; load4 load 4 words from memory to arm registers. +; logic_imm any logical instruction that doesn't have a shifted +; operand and has an immediate operand. +; logic_reg any logical instruction that doesn't have a shifted +; operand or an immediate operand. +; logic_shift_imm any logical instruction that has a source operand +; shifted by a constant. This excludes simple shifts. +; logic_shift_reg as logic_shift_imm, with the shift amount specified in a +; register. +; logics_imm as logic_imm, setting condition flags. +; logics_reg as logic_reg, setting condition flags. +; logics_shift_imm as logic_shift_imm, setting condition flags. +; logics_shift_reg as logic_shift_reg, setting condition flags. +; mla integer multiply accumulate. +; mlas integer multiply accumulate, flag setting. +; mov_imm simple MOV instruction that moves an immediate to +; register. This includes MOVW, but not MOVT. +; mov_reg simple MOV instruction that moves a register to another +; register. This includes MOVW, but not MOVT. +; mov_shift simple MOV instruction, shifted operand by a constant. +; mov_shift_reg simple MOV instruction, shifted operand by a register. +; mrs system/special/co-processor register move. +; mul integer multiply. +; muls integer multiply, flag setting. +; multiple more than one instruction, candidate for future +; splitting, or better modeling. +; mvn_imm inverting move instruction, immediate. +; mvn_reg inverting move instruction, register. +; mvn_shift inverting move instruction, shifted operand by a constant. +; mvn_shift_reg inverting move instruction, shifted operand by a register. +; no_insn an insn which does not represent an instruction in the +; final output, thus having no impact on scheduling. +; rbit reverse bits. +; rev reverse bytes. +; sdiv signed division. +; shift_imm simple shift operation (LSL, LSR, ASR, ROR) with an +; immediate. +; shift_reg simple shift by a register. +; smlad signed multiply accumulate dual. +; smladx signed multiply accumulate dual reverse. +; smlal signed multiply accumulate long. +; smlald signed multiply accumulate long dual. +; smlals signed multiply accumulate long, flag setting. +; smlalxy signed multiply accumulate, 16x16-bit, 64-bit accumulate. +; smlawx signed multiply accumulate, 32x16-bit, 32-bit accumulate. +; smlawy signed multiply accumulate wide, 32x16-bit, +; 32-bit accumulate. +; smlaxy signed multiply accumulate, 16x16-bit, 32-bit accumulate. +; smlsd signed multiply subtract dual. +; smlsdx signed multiply subtract dual reverse. +; smlsld signed multiply subtract long dual. +; smmla signed most significant word multiply accumulate. +; smmul signed most significant word multiply. +; smmulr signed most significant word multiply, rounded. +; smuad signed dual multiply add. +; smuadx signed dual multiply add reverse. +; smull signed multiply long. +; smulls signed multiply long, flag setting. +; smulwy signed multiply wide, 32x16-bit, 32-bit accumulate. +; smulxy signed multiply, 16x16-bit, 32-bit accumulate. +; smusd signed dual multiply subtract. +; smusdx signed dual multiply subtract reverse. +; store_rel store-release. +; store1 store 1 word to memory from arm registers. +; store2 store 2 words to memory from arm registers. +; store3 store 3 words to memory from arm registers. +; store4 store 4 (or more) words to memory from arm registers. +; udiv unsigned division. +; umaal unsigned multiply accumulate accumulate long. +; umlal unsigned multiply accumulate long. +; umlals unsigned multiply accumulate long, flag setting. +; umull unsigned multiply long. +; umulls unsigned multiply long, flag setting. +; untyped insn without type information - default, and error, +; case. +; +; The classification below is for instructions used by the Wireless MMX +; Technology. Each attribute value is used to classify an instruction of the +; same name or family. +; +; wmmx_tandc +; wmmx_tbcst +; wmmx_textrc +; wmmx_textrm +; wmmx_tinsr +; wmmx_tmcr +; wmmx_tmcrr +; wmmx_tmia +; wmmx_tmiaph +; wmmx_tmiaxy +; wmmx_tmrc +; wmmx_tmrrc +; wmmx_tmovmsk +; wmmx_torc +; wmmx_torvsc +; wmmx_wabs +; wmmx_wdiff +; wmmx_wacc +; wmmx_wadd +; wmmx_waddbhus +; wmmx_waddsubhx +; wmmx_waligni +; wmmx_walignr +; wmmx_wand +; wmmx_wandn +; wmmx_wavg2 +; wmmx_wavg4 +; wmmx_wcmpeq +; wmmx_wcmpgt +; wmmx_wmac +; wmmx_wmadd +; wmmx_wmax +; wmmx_wmerge +; wmmx_wmiawxy +; wmmx_wmiaxy +; wmmx_wmin +; wmmx_wmov +; wmmx_wmul +; wmmx_wmulw +; wmmx_wldr +; wmmx_wor +; wmmx_wpack +; wmmx_wqmiaxy +; wmmx_wqmulm +; wmmx_wqmulwm +; wmmx_wror +; wmmx_wsad +; wmmx_wshufh +; wmmx_wsll +; wmmx_wsra +; wmmx_wsrl +; wmmx_wstr +; wmmx_wsub +; wmmx_wsubaddhx +; wmmx_wunpckeh +; wmmx_wunpckel +; wmmx_wunpckih +; wmmx_wunpckil +; wmmx_wxor +; +; The classification below is for NEON instructions. +; +; neon_bp_2cycle +; neon_bp_3cycle +; neon_bp_simple +; neon_fp_vadd_ddd_vabs_dd +; neon_fp_vadd_qqq_vabs_qq +; neon_fp_vmla_ddd_scalar +; neon_fp_vmla_ddd +; neon_fp_vmla_qqq_scalar +; neon_fp_vmla_qqq +; neon_fp_vmul_ddd +; neon_fp_vmul_qqd +; neon_fp_vrecps_vrsqrts_ddd +; neon_fp_vrecps_vrsqrts_qqq +; neon_fp_vsum +; neon_int_1 +; neon_int_2 +; neon_int_3 +; neon_int_4 +; neon_int_5 +; neon_ldm_2 +; neon_ldr +; neon_mcr_2_mcrr +; neon_mcr +; neon_mla_ddd_16_scalar_qdd_32_16_long_scalar +; neon_mla_ddd_32_qqd_16_ddd_32_scalar_qdd_64_32_long_scalar_qdd_64_32_long +; neon_mla_ddd_8_16_qdd_16_8_long_32_16_long +; neon_mla_qqq_32_qqd_32_scalar +; neon_mla_qqq_8_16 +; neon_mrc +; neon_mrrc +; neon_mul_ddd_16_scalar_32_16_long_scalar +; neon_mul_ddd_8_16_qdd_16_8_long_32_16_long +; neon_mul_qdd_64_32_long_qqd_16_ddd_32_scalar_64_32_long_scalar +; neon_mul_qqd_32_scalar +; neon_mul_qqq_8_16_32_ddd_32 +; neon_shift_1 +; neon_shift_2 +; neon_shift_3 +; neon_stm_2 +; neon_str +; neon_vaba_qqq +; neon_vaba +; neon_vld1_1_2_regs +; neon_vld1_3_4_regs +; neon_vld1_vld2_lane +; neon_vld2_2_regs_vld1_vld2_all_lanes +; neon_vld2_4_regs +; neon_vld3_vld4_all_lanes +; neon_vld3_vld4_lane +; neon_vld3_vld4 +; neon_vmov +; neon_vqneg_vqabs +; neon_vqshl_vrshl_vqrshl_qqq +; neon_vshl_ddd +; neon_vsma +; neon_vsra_vrsra +; neon_vst1_1_2_regs_vst2_2_regs +; neon_vst1_3_4_regs +; neon_vst1_vst2_lane +; neon_vst2_4_regs_vst3_vst4 +; neon_vst3_vst4_lane +; neon_vst3_vst4 + +(define_attr "type" + "adc_imm,\ + adc_reg,\ + adcs_imm,\ + adcs_reg,\ + adr,\ + alu_ext,\ + alu_imm,\ + alu_reg,\ + alu_shift_imm,\ + alu_shift_reg,\ + alus_ext,\ + alus_imm,\ + alus_reg,\ + alus_shift_imm,\ + alus_shift_reg,\ + bfm,\ + block,\ + branch,\ + call,\ + clz,\ + no_insn,\ + csel,\ + extend,\ + f_cvt,\ + f_cvtf2i,\ + f_cvti2f,\ + f_flag,\ + f_loadd,\ + f_loads,\ + f_mcr,\ + f_mcrr,\ + f_minmaxd,\ + f_minmaxs,\ + f_mrc,\ + f_mrrc,\ + f_rintd,\ + f_rints,\ + f_seld,\ + f_sels,\ + f_stored,\ + f_stores,\ + faddd,\ + fadds,\ + fcmpd,\ + fcmps,\ + fconstd,\ + fconsts,\ + fcsel,\ + fdivd,\ + fdivs,\ + ffarithd,\ + ffariths,\ + ffmad,\ + ffmas,\ + float,\ + fmacd,\ + fmacs,\ + fmov,\ + fmuld,\ + fmuls,\ + fsqrts,\ + fsqrtd,\ + load_acq,\ + load_byte,\ + load1,\ + load2,\ + load3,\ + load4,\ + logic_imm,\ + logic_reg,\ + logic_shift_imm,\ + logic_shift_reg,\ + logics_imm,\ + logics_reg,\ + logics_shift_imm,\ + logics_shift_reg,\ + mla,\ + mlas,\ + mov_imm,\ + mov_reg,\ + mov_shift,\ + mov_shift_reg,\ + mrs,\ + mul,\ + muls,\ + multiple,\ + mvn_imm,\ + mvn_reg,\ + mvn_shift,\ + mvn_shift_reg,\ + nop,\ + rbit,\ + rev,\ + sdiv,\ + shift_imm,\ + shift_reg,\ + smlad,\ + smladx,\ + smlal,\ + smlald,\ + smlals,\ + smlalxy,\ + smlawx,\ + smlawy,\ + smlaxy,\ + smlsd,\ + smlsdx,\ + smlsld,\ + smmla,\ + smmul,\ + smmulr,\ + smuad,\ + smuadx,\ + smull,\ + smulls,\ + smulwy,\ + smulxy,\ + smusd,\ + smusdx,\ + store_rel,\ + store1,\ + store2,\ + store3,\ + store4,\ + udiv,\ + umaal,\ + umlal,\ + umlals,\ + umull,\ + umulls,\ + untyped,\ + wmmx_tandc,\ + wmmx_tbcst,\ + wmmx_textrc,\ + wmmx_textrm,\ + wmmx_tinsr,\ + wmmx_tmcr,\ + wmmx_tmcrr,\ + wmmx_tmia,\ + wmmx_tmiaph,\ + wmmx_tmiaxy,\ + wmmx_tmrc,\ + wmmx_tmrrc,\ + wmmx_tmovmsk,\ + wmmx_torc,\ + wmmx_torvsc,\ + wmmx_wabs,\ + wmmx_wabsdiff,\ + wmmx_wacc,\ + wmmx_wadd,\ + wmmx_waddbhus,\ + wmmx_waddsubhx,\ + wmmx_waligni,\ + wmmx_walignr,\ + wmmx_wand,\ + wmmx_wandn,\ + wmmx_wavg2,\ + wmmx_wavg4,\ + wmmx_wcmpeq,\ + wmmx_wcmpgt,\ + wmmx_wmac,\ + wmmx_wmadd,\ + wmmx_wmax,\ + wmmx_wmerge,\ + wmmx_wmiawxy,\ + wmmx_wmiaxy,\ + wmmx_wmin,\ + wmmx_wmov,\ + wmmx_wmul,\ + wmmx_wmulw,\ + wmmx_wldr,\ + wmmx_wor,\ + wmmx_wpack,\ + wmmx_wqmiaxy,\ + wmmx_wqmulm,\ + wmmx_wqmulwm,\ + wmmx_wror,\ + wmmx_wsad,\ + wmmx_wshufh,\ + wmmx_wsll,\ + wmmx_wsra,\ + wmmx_wsrl,\ + wmmx_wstr,\ + wmmx_wsub,\ + wmmx_wsubaddhx,\ + wmmx_wunpckeh,\ + wmmx_wunpckel,\ + wmmx_wunpckih,\ + wmmx_wunpckil,\ + wmmx_wxor,\ + neon_bp_2cycle,\ + neon_bp_3cycle,\ + neon_bp_simple,\ + neon_fp_vadd_ddd_vabs_dd,\ + neon_fp_vadd_qqq_vabs_qq,\ + neon_fp_vmla_ddd_scalar,\ + neon_fp_vmla_ddd,\ + neon_fp_vmla_qqq_scalar,\ + neon_fp_vmla_qqq,\ + neon_fp_vmul_ddd,\ + neon_fp_vmul_qqd,\ + neon_fp_vrecps_vrsqrts_ddd,\ + neon_fp_vrecps_vrsqrts_qqq,\ + neon_fp_vsum,\ + neon_int_1,\ + neon_int_2,\ + neon_int_3,\ + neon_int_4,\ + neon_int_5,\ + neon_ldm_2,\ + neon_ldr,\ + neon_mcr_2_mcrr,\ + neon_mcr,\ + neon_mla_ddd_16_scalar_qdd_32_16_long_scalar,\ + neon_mla_ddd_32_qqd_16_ddd_32_scalar_qdd_64_32_long_scalar_qdd_64_32_long,\ + neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + neon_mla_qqq_32_qqd_32_scalar,\ + neon_mla_qqq_8_16,\ + neon_mrc,\ + neon_mrrc,\ + neon_mul_ddd_16_scalar_32_16_long_scalar,\ + neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + neon_mul_qdd_64_32_long_qqd_16_ddd_32_scalar_64_32_long_scalar,\ + neon_mul_qqd_32_scalar,\ + neon_mul_qqq_8_16_32_ddd_32,\ + neon_shift_1,\ + neon_shift_2,\ + neon_shift_3,\ + neon_stm_2,\ + neon_str,\ + neon_vaba_qqq,\ + neon_vaba,\ + neon_vld1_1_2_regs,\ + neon_vld1_3_4_regs,\ + neon_vld1_vld2_lane,\ + neon_vld2_2_regs_vld1_vld2_all_lanes,\ + neon_vld2_4_regs,\ + neon_vld3_vld4_all_lanes,\ + neon_vld3_vld4_lane,\ + neon_vld3_vld4,\ + neon_vmov,\ + neon_vqneg_vqabs,\ + neon_vqshl_vrshl_vqrshl_qqq,\ + neon_vshl_ddd,\ + neon_vsma,\ + neon_vsra_vrsra,\ + neon_vst1_1_2_regs_vst2_2_regs,\ + neon_vst1_3_4_regs,\ + neon_vst1_vst2_lane,\ + neon_vst2_4_regs_vst3_vst4,\ + neon_vst3_vst4_lane,\ + neon_vst3_vst4" + (const_string "untyped")) + +; Is this an (integer side) multiply with a 32-bit (or smaller) result? +(define_attr "mul32" "no,yes" + (if_then_else + (eq_attr "type" + "smulxy,smlaxy,smulwy,smlawx,mul,muls,mla,mlas,smlawy,smuad,smuadx,\ + smlad,smladx,smusd,smusdx,smlsd,smlsdx,smmul,smmulr,smmla,smlald,smlsld") + (const_string "yes") + (const_string "no"))) + +; Is this an (integer side) multiply with a 64-bit result? +(define_attr "mul64" "no,yes" + (if_then_else + (eq_attr "type" + "smlalxy,umull,umulls,umaal,umlal,umlals,smull,smulls,smlal,smlals") + (const_string "yes") + (const_string "no"))) diff --git a/gcc/config/arm/vfp.md b/gcc/config/arm/vfp.md index ef8777a900b..9318e49d9ea 100644 --- a/gcc/config/arm/vfp.md +++ b/gcc/config/arm/vfp.md @@ -53,8 +53,7 @@ } " [(set_attr "predicable" "yes") - (set_attr "type" "mov_reg,mov_reg,mvn_imm,mov_imm,load1,store1,r_2_f,f_2_r,fcpys,f_loads,f_stores") - (set_attr "neon_type" "*,*,*,*,*,*,neon_mcr,neon_mrc,neon_vmov,*,*") + (set_attr "type" "mov_reg,mov_reg,mvn_imm,mov_imm,load1,store1,f_mcr,f_mrc,fmov,f_loads,f_stores") (set_attr "pool_range" "*,*,*,*,4096,*,*,*,*,1020,*") (set_attr "neg_pool_range" "*,*,*,*,4084,*,*,*,*,1008,*")] ) @@ -101,9 +100,8 @@ " [(set_attr "predicable" "yes") (set_attr "predicable_short_it" "yes,no,yes,no,no,no,no,no,no,no,no,no,no,no") - (set_attr "type" "mov_reg,mov_reg,mov_reg,mvn_reg,mov_reg,load1,load1,store1,store1,r_2_f,f_2_r,fcpys,f_loads,f_stores") + (set_attr "type" "mov_reg,mov_reg,mov_reg,mvn_reg,mov_reg,load1,load1,store1,store1,f_mcr,f_mrc,fmov,f_loads,f_stores") (set_attr "length" "2,4,2,4,4,4,4,4,4,4,4,4,4,4") - (set_attr "neon_type" "*,*,*,*,*,*,*,*,*,neon_mcr,neon_mrc,neon_vmov,*,*") (set_attr "pool_range" "*,*,*,*,*,1018,4094,*,*,*,*,*,1018,*") (set_attr "neg_pool_range" "*,*,*,*,*, 0, 0,*,*,*,*,*,1008,*")] ) @@ -146,8 +144,7 @@ gcc_unreachable (); } " - [(set_attr "type" "*,*,*,*,load2,load2,store2,r_2_f,f_2_r,ffarithd,f_loadd,f_stored") - (set_attr "neon_type" "*,*,*,*,*,*,*,neon_mcr_2_mcrr,neon_mrrc,neon_vmov,*,*") + [(set_attr "type" "multiple,multiple,multiple,multiple,load2,load2,store2,f_mcrr,f_mrrc,ffarithd,f_loadd,f_stored") (set (attr "length") (cond [(eq_attr "alternative" "1,4,5,6") (const_int 8) (eq_attr "alternative" "2") (const_int 12) (eq_attr "alternative" "3") (const_int 16) @@ -195,8 +192,7 @@ gcc_unreachable (); } " - [(set_attr "type" "*,*,*,*,load2,load2,store2,r_2_f,f_2_r,ffarithd,f_loadd,f_stored") - (set_attr "neon_type" "*,*,*,*,*,*,*,neon_mcr_2_mcrr,neon_mrrc,neon_vmov,*,*") + [(set_attr "type" "multiple,multiple,multiple,multiple,load2,load2,store2,f_mcrr,f_mrrc,ffarithd,f_loadd,f_stored") (set (attr "length") (cond [(eq_attr "alternative" "1") (const_int 8) (eq_attr "alternative" "2") (const_int 12) (eq_attr "alternative" "3") (const_int 16) @@ -264,8 +260,8 @@ } " [(set_attr "conds" "unconditional") - (set_attr "type" "*,*,load1,store1,fcpys,*,r_2_f,f_2_r,*") - (set_attr "neon_type" "neon_vld1_1_2_regs,neon_vst1_1_2_regs_vst2_2_regs,*,*,*,*,*,*,*") + (set_attr "type" "neon_vld1_1_2_regs,neon_vst1_1_2_regs_vst2_2_regs,\ + load1,store1,fmov,mov_reg,f_mcr,f_mrc,multiple") (set_attr "length" "4,4,4,4,4,4,4,4,8")] ) @@ -315,7 +311,7 @@ } " [(set_attr "conds" "unconditional") - (set_attr "type" "load1,store1,fcpys,*,r_2_f,f_2_r,*") + (set_attr "type" "load1,store1,fmov,mov_reg,f_mcr,f_mrc,multiple") (set_attr "length" "4,4,4,4,4,4,8")] ) @@ -355,8 +351,7 @@ " [(set_attr "predicable" "yes") (set_attr "type" - "r_2_f,f_2_r,fconsts,f_loads,f_stores,load1,store1,fcpys,mov_reg") - (set_attr "neon_type" "neon_mcr,neon_mrc,*,*,*,*,*,neon_vmov,*") + "f_mcr,f_mrc,fconsts,f_loads,f_stores,load1,store1,fmov,mov_reg") (set_attr "pool_range" "*,*,*,1020,*,4096,*,*,*") (set_attr "neg_pool_range" "*,*,*,1008,*,4080,*,*,*")] ) @@ -393,8 +388,7 @@ [(set_attr "predicable" "yes") (set_attr "predicable_short_it" "no") (set_attr "type" - "r_2_f,f_2_r,fconsts,f_loads,f_stores,load1,store1,fcpys,mov_reg") - (set_attr "neon_type" "neon_mcr,neon_mrc,*,*,*,*,*,neon_vmov,*") + "f_mcr,f_mrc,fconsts,f_loads,f_stores,load1,store1,fmov,mov_reg") (set_attr "pool_range" "*,*,*,1018,*,4090,*,*,*") (set_attr "neg_pool_range" "*,*,*,1008,*,0,*,*,*")] ) @@ -434,9 +428,8 @@ } } " - [(set_attr "type" - "r_2_f,f_2_r,fconstd,f_loadd,f_stored,load2,store2,ffarithd,*") - (set_attr "neon_type" "neon_mcr_2_mcrr,neon_mrrc,*,*,*,*,*,neon_vmov,*") + [(set_attr "type" "f_mcrr,f_mrrc,fconstd,f_loadd,f_stored,\ + load2,store2,ffarithd,multiple") (set (attr "length") (cond [(eq_attr "alternative" "5,6,8") (const_int 8) (eq_attr "alternative" "7") (if_then_else @@ -480,9 +473,8 @@ } } " - [(set_attr "type" - "r_2_f,f_2_r,fconstd,f_loadd,f_stored,load2,store2,ffarithd,*") - (set_attr "neon_type" "neon_mcr_2_mcrr,neon_mrrc,*,*,*,*,*,neon_vmov,*") + [(set_attr "type" "f_mcrr,f_mrrc,fconstd,f_loadd,\ + f_stored,load2,store2,ffarithd,multiple") (set (attr "length") (cond [(eq_attr "alternative" "5,6,8") (const_int 8) (eq_attr "alternative" "7") (if_then_else @@ -517,8 +509,7 @@ fmrs%D3\\t%0, %2\;fmrs%d3\\t%0, %1" [(set_attr "conds" "use") (set_attr "length" "4,4,8,4,4,8,4,4,8") - (set_attr "type" "fcpys,fcpys,fcpys,r_2_f,r_2_f,r_2_f,f_2_r,f_2_r,f_2_r") - (set_attr "neon_type" "neon_vmov,neon_vmov,neon_vmov,neon_mcr,neon_mcr,neon_mcr,neon_mrc,neon_mrc,neon_mrc")] + (set_attr "type" "fmov,fmov,fmov,f_mcr,f_mcr,f_mcr,f_mrc,f_mrc,f_mrc")] ) (define_insn "*thumb2_movsfcc_vfp" @@ -541,8 +532,7 @@ ite\\t%D3\;fmrs%D3\\t%0, %2\;fmrs%d3\\t%0, %1" [(set_attr "conds" "use") (set_attr "length" "6,6,10,6,6,10,6,6,10") - (set_attr "type" "fcpys,fcpys,fcpys,r_2_f,r_2_f,r_2_f,f_2_r,f_2_r,f_2_r") - (set_attr "neon_type" "neon_vmov,neon_vmov,neon_vmov,neon_mcr,neon_mcr,neon_mcr,neon_mrc,neon_mrc,neon_mrc")] + (set_attr "type" "fmov,fmov,fmov,f_mcr,f_mcr,f_mcr,f_mrc,f_mrc,f_mrc")] ) (define_insn "*movdfcc_vfp" @@ -565,8 +555,7 @@ fmrrd%D3\\t%Q0, %R0, %P2\;fmrrd%d3\\t%Q0, %R0, %P1" [(set_attr "conds" "use") (set_attr "length" "4,4,8,4,4,8,4,4,8") - (set_attr "type" "ffarithd,ffarithd,ffarithd,r_2_f,r_2_f,r_2_f,f_2_r,f_2_r,f_2_r") - (set_attr "neon_type" "neon_vmov,neon_vmov,neon_vmov,neon_mcr_2_mcrr,neon_mcr_2_mcrr,neon_mcr_2_mcrr,neon_mrrc,neon_mrrc,neon_mrrc")] + (set_attr "type" "ffarithd,ffarithd,ffarithd,f_mcr,f_mcr,f_mcr,f_mrrc,f_mrrc,f_mrrc")] ) (define_insn "*thumb2_movdfcc_vfp" @@ -589,8 +578,7 @@ ite\\t%D3\;fmrrd%D3\\t%Q0, %R0, %P2\;fmrrd%d3\\t%Q0, %R0, %P1" [(set_attr "conds" "use") (set_attr "length" "6,6,10,6,6,10,6,6,10") - (set_attr "type" "ffarithd,ffarithd,ffarithd,r_2_f,r_2_f,r_2_f,f_2_r,f_2_r,f_2_r") - (set_attr "neon_type" "neon_vmov,neon_vmov,neon_vmov,neon_mcr_2_mcrr,neon_mcr_2_mcrr,neon_mcr_2_mcrr,neon_mrrc,neon_mrrc,neon_mrrc")] + (set_attr "type" "ffarithd,ffarithd,ffarithd,f_mcr,f_mcr,f_mcrr,f_mrrc,f_mrrc,f_mrrc")] ) @@ -1003,7 +991,7 @@ "ftosizs%?\\t%0, %1" [(set_attr "predicable" "yes") (set_attr "predicable_short_it" "no") - (set_attr "type" "f_cvt")] + (set_attr "type" "f_cvtf2i")] ) (define_insn "*truncsidf2_vfp" @@ -1013,7 +1001,7 @@ "ftosizd%?\\t%0, %P1" [(set_attr "predicable" "yes") (set_attr "predicable_short_it" "no") - (set_attr "type" "f_cvt")] + (set_attr "type" "f_cvtf2i")] ) @@ -1024,7 +1012,7 @@ "ftouizs%?\\t%0, %1" [(set_attr "predicable" "yes") (set_attr "predicable_short_it" "no") - (set_attr "type" "f_cvt")] + (set_attr "type" "f_cvtf2i")] ) (define_insn "fixuns_truncdfsi2" @@ -1034,7 +1022,7 @@ "ftouizd%?\\t%0, %P1" [(set_attr "predicable" "yes") (set_attr "predicable_short_it" "no") - (set_attr "type" "f_cvt")] + (set_attr "type" "f_cvtf2i")] ) @@ -1045,7 +1033,7 @@ "fsitos%?\\t%0, %1" [(set_attr "predicable" "yes") (set_attr "predicable_short_it" "no") - (set_attr "type" "f_cvt")] + (set_attr "type" "f_cvti2f")] ) (define_insn "*floatsidf2_vfp" @@ -1055,7 +1043,7 @@ "fsitod%?\\t%P0, %1" [(set_attr "predicable" "yes") (set_attr "predicable_short_it" "no") - (set_attr "type" "f_cvt")] + (set_attr "type" "f_cvti2f")] ) @@ -1066,7 +1054,7 @@ "fuitos%?\\t%0, %1" [(set_attr "predicable" "yes") (set_attr "predicable_short_it" "no") - (set_attr "type" "f_cvt")] + (set_attr "type" "f_cvti2f")] ) (define_insn "floatunssidf2" @@ -1076,7 +1064,7 @@ "fuitod%?\\t%P0, %1" [(set_attr "predicable" "yes") (set_attr "predicable_short_it" "no") - (set_attr "type" "f_cvt")] + (set_attr "type" "f_cvti2f")] ) @@ -1089,7 +1077,7 @@ "fsqrts%?\\t%0, %1" [(set_attr "predicable" "yes") (set_attr "predicable_short_it" "no") - (set_attr "type" "fdivs")] + (set_attr "type" "fsqrts")] ) (define_insn "*sqrtdf2_vfp" @@ -1099,7 +1087,7 @@ "fsqrtd%?\\t%P0, %P1" [(set_attr "predicable" "yes") (set_attr "predicable_short_it" "no") - (set_attr "type" "fdivd")] + (set_attr "type" "fsqrtd")] ) @@ -1241,7 +1229,7 @@ "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP3 && !flag_rounding_math" "vcvt.f32.\\t%0, %1, %v2" [(set_attr "predicable" "no") - (set_attr "type" "f_cvt")] + (set_attr "type" "f_cvti2f")] ) ;; Not the ideal way of implementing this. Ideally we would be able to split @@ -1258,7 +1246,7 @@ vmov.f32\\t%0, %1\;vcvt.f64.\\t%P0, %P0, %v2 vmov.f64\\t%P0, %1, %1\;vcvt.f64.\\t%P0, %P0, %v2" [(set_attr "predicable" "no") - (set_attr "type" "f_cvt") + (set_attr "type" "f_cvti2f") (set_attr "length" "8")] ) diff --git a/gcc/config/arm/vfp11.md b/gcc/config/arm/vfp11.md index b027fe6c3cd..4cfa69efc24 100644 --- a/gcc/config/arm/vfp11.md +++ b/gcc/config/arm/vfp11.md @@ -51,12 +51,13 @@ (define_insn_reservation "vfp_ffarith" 4 (and (eq_attr "generic_vfp" "yes") - (eq_attr "type" "fcpys,ffariths,ffarithd,fcmps,fcmpd")) + (eq_attr "type" "fmov,ffariths,ffarithd,fcmps,fcmpd")) "fmac") (define_insn_reservation "vfp_farith" 8 (and (eq_attr "generic_vfp" "yes") - (eq_attr "type" "fadds,faddd,fconsts,fconstd,f_cvt,fmuls,fmacs,ffmas")) + (eq_attr "type" "fadds,faddd,fconsts,fconstd,f_cvt,f_cvtf2i,f_cvti2f,\ + fmuls,fmacs,ffmas")) "fmac") (define_insn_reservation "vfp_fmul" 9 @@ -66,23 +67,23 @@ (define_insn_reservation "vfp_fdivs" 19 (and (eq_attr "generic_vfp" "yes") - (eq_attr "type" "fdivs")) + (eq_attr "type" "fdivs, fsqrts")) "ds*15") (define_insn_reservation "vfp_fdivd" 33 (and (eq_attr "generic_vfp" "yes") - (eq_attr "type" "fdivd")) + (eq_attr "type" "fdivd, fsqrtd")) "fmac+ds*29") ;; Moves to/from arm regs also use the load/store pipeline. (define_insn_reservation "vfp_fload" 4 (and (eq_attr "generic_vfp" "yes") - (eq_attr "type" "f_loads,f_loadd,r_2_f")) + (eq_attr "type" "f_loads,f_loadd,f_mcr,f_mcrr")) "vfp_ls") (define_insn_reservation "vfp_fstore" 4 (and (eq_attr "generic_vfp" "yes") - (eq_attr "type" "f_stores,f_stored,f_2_r")) + (eq_attr "type" "f_stores,f_stored,f_mrc,f_mrrc")) "vfp_ls") (define_insn_reservation "vfp_to_cpsr" 4 diff --git a/gcc/config/avr/avr-stdint.h b/gcc/config/avr/avr-stdint.h index 8e7278f389a..4137b0689a5 100644 --- a/gcc/config/avr/avr-stdint.h +++ b/gcc/config/avr/avr-stdint.h @@ -34,11 +34,11 @@ along with GCC; see the file COPYING3. If not see #define SIG_ATOMIC_TYPE "char" #define INT8_TYPE "signed char" -#define INT16_TYPE (INT_TYPE_SIZE == 16 ? "short int" : "long int") +#define INT16_TYPE (INT_TYPE_SIZE == 16 ? "int" : "long int") #define INT32_TYPE (INT_TYPE_SIZE == 16 ? "long int" : "long long int") #define INT64_TYPE (INT_TYPE_SIZE == 16 ? "long long int" : 0) #define UINT8_TYPE "unsigned char" -#define UINT16_TYPE (INT_TYPE_SIZE == 16 ? "short unsigned int" : "long unsigned int") +#define UINT16_TYPE (INT_TYPE_SIZE == 16 ? "unsigned int" : "long unsigned int") #define UINT32_TYPE (INT_TYPE_SIZE == 16 ? "long unsigned int" : "long long unsigned int") #define UINT64_TYPE (INT_TYPE_SIZE == 16 ? "long long unsigned int" : 0) diff --git a/gcc/config/avr/avr.c b/gcc/config/avr/avr.c index 14a3eee7c72..f6d88856bec 100644 --- a/gcc/config/avr/avr.c +++ b/gcc/config/avr/avr.c @@ -7030,7 +7030,9 @@ avr_out_fract (rtx insn, rtx operands[], bool intsigned, int *plen) RTX_CODE shift = UNKNOWN; bool sign_in_carry = false; bool msb_in_carry = false; + bool lsb_in_tmp_reg = false; bool lsb_in_carry = false; + bool frac_rounded = false; const char *code_ashift = "lsl %0"; @@ -7038,6 +7040,7 @@ avr_out_fract (rtx insn, rtx operands[], bool intsigned, int *plen) /* Shorthand used below. */ \ ((sign_bytes \ && IN_RANGE (RR, dest.regno_msb - sign_bytes + 1, dest.regno_msb)) \ + || (offset && IN_RANGE (RR, dest.regno, dest.regno_msb)) \ || (reg_unused_after (insn, all_regs_rtx[RR]) \ && !IN_RANGE (RR, dest.regno, dest.regno_msb))) @@ -7112,13 +7115,119 @@ avr_out_fract (rtx insn, rtx operands[], bool intsigned, int *plen) else gcc_unreachable(); + /* If we need to round the fraction part, we might need to save/round it + before clobbering any of it in Step 1. Also, we might to want to do + the rounding now to make use of LD_REGS. */ + if (SCALAR_INT_MODE_P (GET_MODE (xop[0])) + && SCALAR_ACCUM_MODE_P (GET_MODE (xop[1])) + && !TARGET_FRACT_CONV_TRUNC) + { + bool overlap + = (src.regno <= + (offset ? dest.regno_msb - sign_bytes : dest.regno + zero_bytes - 1) + && dest.regno - offset -1 >= dest.regno); + unsigned s0 = dest.regno - offset -1; + bool use_src = true; + unsigned sn; + unsigned copied_msb = src.regno_msb; + bool have_carry = false; + + if (src.ibyte > dest.ibyte) + copied_msb -= src.ibyte - dest.ibyte; + + for (sn = s0; sn <= copied_msb; sn++) + if (!IN_RANGE (sn, dest.regno, dest.regno_msb) + && !reg_unused_after (insn, all_regs_rtx[sn])) + use_src = false; + if (use_src && TEST_HARD_REG_BIT (reg_class_contents[LD_REGS], s0)) + { + avr_asm_len ("tst %0" CR_TAB "brpl 0f", + &all_regs_rtx[src.regno_msb], plen, 2); + sn = src.regno; + if (sn < s0) + { + if (TEST_HARD_REG_BIT (reg_class_contents[LD_REGS], sn)) + avr_asm_len ("cpi %0,1", &all_regs_rtx[sn], plen, 1); + else + avr_asm_len ("sec" CR_TAB "cpc %0,__zero_reg__", + &all_regs_rtx[sn], plen, 2); + have_carry = true; + } + while (++sn < s0) + avr_asm_len ("cpc %0,__zero_reg__", &all_regs_rtx[sn], plen, 1); + avr_asm_len (have_carry ? "sbci %0,128" : "subi %0,129", + &all_regs_rtx[s0], plen, 1); + for (sn = src.regno + src.fbyte; sn <= copied_msb; sn++) + avr_asm_len ("sbci %0,255", &all_regs_rtx[sn], plen, 1); + avr_asm_len ("\n0:", NULL, plen, 0); + frac_rounded = true; + } + else if (use_src && overlap) + { + avr_asm_len ("clr __tmp_reg__" CR_TAB + "sbrc %1,0" CR_TAB "dec __tmp_reg__", xop, plen, 1); + sn = src.regno; + if (sn < s0) + { + avr_asm_len ("add %0,__tmp_reg__", &all_regs_rtx[sn], plen, 1); + have_carry = true; + } + while (++sn < s0) + avr_asm_len ("adc %0,__tmp_reg__", &all_regs_rtx[sn], plen, 1); + if (have_carry) + avr_asm_len ("clt" CR_TAB "bld __tmp_reg__,7" CR_TAB + "adc %0,__tmp_reg__", + &all_regs_rtx[s0], plen, 1); + else + avr_asm_len ("lsr __tmp_reg" CR_TAB "add %0,__tmp_reg__", + &all_regs_rtx[s0], plen, 2); + for (sn = src.regno + src.fbyte; sn <= copied_msb; sn++) + avr_asm_len ("adc %0,__zero_reg__", &all_regs_rtx[sn], plen, 1); + frac_rounded = true; + } + else if (overlap) + { + bool use_src + = (TEST_HARD_REG_BIT (reg_class_contents[LD_REGS], s0) + && (IN_RANGE (s0, dest.regno, dest.regno_msb) + || reg_unused_after (insn, all_regs_rtx[s0]))); + xop[2] = all_regs_rtx[s0]; + unsigned sn = src.regno; + if (!use_src || sn == s0) + avr_asm_len ("mov __tmp_reg__,%2", xop, plen, 1); + /* We need to consider to-be-discarded bits + if the value is negative. */ + if (sn < s0) + { + avr_asm_len ("tst %0" CR_TAB "brpl 0f", + &all_regs_rtx[src.regno_msb], plen, 2); + /* Test to-be-discarded bytes for any nozero bits. + ??? Could use OR or SBIW to test two registers at once. */ + if (sn < s0) + avr_asm_len ("cp %0,__zero_reg__", &all_regs_rtx[sn], plen, 1); + while (++sn < s0) + avr_asm_len ("cpc %0,__zero_reg__", &all_regs_rtx[sn], plen, 1); + /* Set bit 0 in __tmp_reg__ if any of the lower bits was set. */ + if (use_src) + avr_asm_len ("breq 0f" CR_TAB + "ori %2,1" "\n0:\t" "mov __tmp_reg__,%2", + xop, plen, 3); + else + avr_asm_len ("breq 0f" CR_TAB + "set" CR_TAB "bld __tmp_reg__,0\n0:", + xop, plen, 3); + } + lsb_in_tmp_reg = true; + } + } + /* Step 1: Clear bytes at the low end and copy payload bits from source ====== to destination. */ int step = offset < 0 ? 1 : -1; unsigned d0 = offset < 0 ? dest.regno : dest.regno_msb; - // We leared at least that number of registers. + // We cleared at least that number of registers. int clr_n = 0; for (; d0 >= dest.regno && d0 <= dest.regno_msb; d0 += step) @@ -7208,6 +7317,7 @@ avr_out_fract (rtx insn, rtx operands[], bool intsigned, int *plen) unsigned src_lsb = dest.regno - offset -1; if (shift == ASHIFT && src.fbyte > dest.fbyte && !lsb_in_carry + && !lsb_in_tmp_reg && (d0 == src_lsb || d0 + stepw == src_lsb)) { /* We are going to override the new LSB; store it into carry. */ @@ -7229,7 +7339,91 @@ avr_out_fract (rtx insn, rtx operands[], bool intsigned, int *plen) { unsigned s0 = dest.regno - offset -1; - if (MAY_CLOBBER (s0)) + /* n1169 4.1.4 says: + "Conversions from a fixed-point to an integer type round toward zero." + Hence, converting a fract type to integer only gives a non-zero result + for -1. */ + if (SCALAR_INT_MODE_P (GET_MODE (xop[0])) + && SCALAR_FRACT_MODE_P (GET_MODE (xop[1])) + && !TARGET_FRACT_CONV_TRUNC) + { + gcc_assert (s0 == src.regno_msb); + /* Check if the input is -1. We do that by checking if negating + the input causes an integer overflow. */ + unsigned sn = src.regno; + avr_asm_len ("cp __zero_reg__,%0", &all_regs_rtx[sn++], plen, 1); + while (sn <= s0) + avr_asm_len ("cpc __zero_reg__,%0", &all_regs_rtx[sn++], plen, 1); + + /* Overflow goes with set carry. Clear carry otherwise. */ + avr_asm_len ("brvs 0f" CR_TAB "clc\n0:", NULL, plen, 2); + } + /* Likewise, when converting from accumulator types to integer, we + need to round up negative values. */ + else if (SCALAR_INT_MODE_P (GET_MODE (xop[0])) + && SCALAR_ACCUM_MODE_P (GET_MODE (xop[1])) + && !TARGET_FRACT_CONV_TRUNC + && !frac_rounded) + { + bool have_carry = false; + + xop[2] = all_regs_rtx[s0]; + if (!lsb_in_tmp_reg && !MAY_CLOBBER (s0)) + avr_asm_len ("mov __tmp_reg__,%2", xop, plen, 1); + avr_asm_len ("tst %0" CR_TAB "brpl 0f", + &all_regs_rtx[src.regno_msb], plen, 2); + if (!lsb_in_tmp_reg) + { + unsigned sn = src.regno; + if (sn < s0) + { + avr_asm_len ("cp __zero_reg__,%0", &all_regs_rtx[sn], + plen, 1); + have_carry = true; + } + while (++sn < s0) + avr_asm_len ("cpc __zero_reg__,%0", &all_regs_rtx[sn], plen, 1); + lsb_in_tmp_reg = !MAY_CLOBBER (s0); + } + /* Add in C and the rounding value 127. */ + /* If the destination msb is a sign byte, and in LD_REGS, + grab it as a temporary. */ + if (sign_bytes + && TEST_HARD_REG_BIT (reg_class_contents[LD_REGS], + dest.regno_msb)) + { + xop[3] = all_regs_rtx[dest.regno_msb]; + avr_asm_len ("ldi %3,127", xop, plen, 1); + avr_asm_len ((have_carry && lsb_in_tmp_reg ? "adc __tmp_reg__,%3" + : have_carry ? "adc %2,%3" + : lsb_in_tmp_reg ? "add __tmp_reg__,%3" + : "add %2,%3"), + xop, plen, 1); + } + else + { + /* Fall back to use __zero_reg__ as a temporary. */ + avr_asm_len ("dec __zero_reg__", NULL, plen, 1); + if (have_carry) + avr_asm_len ("clt" CR_TAB "bld __zero_reg__,7", NULL, plen, 2); + else + avr_asm_len ("lsr __zero_reg__", NULL, plen, 1); + avr_asm_len ((have_carry && lsb_in_tmp_reg + ? "adc __tmp_reg__,__zero_reg__" + : have_carry ? "adc %2,__zero_reg__" + : lsb_in_tmp_reg ? "add __tmp_reg__,__zero_reg__" + : "add %2,__zero_reg__"), + xop, plen, 1); + avr_asm_len ("eor __zero_reg__,__zero_reg__", NULL, plen, 1); + } + for (d0 = dest.regno + zero_bytes; + d0 <= dest.regno_msb - sign_bytes; d0++) + avr_asm_len ("adc %0,__zero_reg__", &all_regs_rtx[d0], plen, 1); + avr_asm_len (lsb_in_tmp_reg + ? "\n0:\t" "lsl __tmp_reg__" : "\n0:\t" "lsl %2", + xop, plen, 1); + } + else if (MAY_CLOBBER (s0)) avr_asm_len ("lsl %0", &all_regs_rtx[s0], plen, 1); else avr_asm_len ("mov __tmp_reg__,%0" CR_TAB diff --git a/gcc/config/avr/avr.opt b/gcc/config/avr/avr.opt index 4b990775b7f..9b0f782d385 100644 --- a/gcc/config/avr/avr.opt +++ b/gcc/config/avr/avr.opt @@ -78,3 +78,7 @@ The device has no SPH special function register. This option will be overridden Waddr-space-convert Warning C Report Var(avr_warn_addr_space_convert) Init(0) Warn if the address space of an address is changed. + +mfract-convert-truncate +Target Report Mask(FRACT_CONV_TRUNC) +Allow to use truncation instead of rounding towards 0 for fractional int types diff --git a/gcc/config/bfin/bfin.c b/gcc/config/bfin/bfin.c index 7fab975a673..18457f8f7b6 100644 --- a/gcc/config/bfin/bfin.c +++ b/gcc/config/bfin/bfin.c @@ -46,6 +46,7 @@ #include "cgraph.h" #include "langhooks.h" #include "bfin-protos.h" +#include "tm_p.h" #include "tm-preds.h" #include "tm-constrs.h" #include "gt-bfin.h" diff --git a/gcc/config/bfin/uclinux.h b/gcc/config/bfin/uclinux.h index ca0f4ee8a35..63cba99cec6 100644 --- a/gcc/config/bfin/uclinux.h +++ b/gcc/config/bfin/uclinux.h @@ -44,3 +44,6 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see #define TARGET_SUPPORTS_SYNC_CALLS 1 #define SUBTARGET_FDPIC_NOT_SUPPORTED + +#undef TARGET_LIBC_HAS_FUNCTION +#define TARGET_LIBC_HAS_FUNCTION no_c99_libc_has_function diff --git a/gcc/config/c6x/uclinux-elf.h b/gcc/config/c6x/uclinux-elf.h index 5d61f4dc4ec..fa0937ed268 100644 --- a/gcc/config/c6x/uclinux-elf.h +++ b/gcc/config/c6x/uclinux-elf.h @@ -62,3 +62,5 @@ : "0" (_beg), "b" (_end), "b" (_scno)); \ } +#undef TARGET_LIBC_HAS_FUNCTION +#define TARGET_LIBC_HAS_FUNCTION no_c99_libc_has_function diff --git a/gcc/config/darwin-protos.h b/gcc/config/darwin-protos.h index 70b7fb00959..36d16b9e57a 100644 --- a/gcc/config/darwin-protos.h +++ b/gcc/config/darwin-protos.h @@ -123,3 +123,4 @@ extern bool darwin_kextabi_p (void); extern void darwin_override_options (void); extern void darwin_patch_builtins (void); extern void darwin_rename_builtins (void); +extern bool darwin_libc_has_function (enum function_class fn_class); diff --git a/gcc/config/darwin.c b/gcc/config/darwin.c index e07fa4c8324..6c5d9c00623 100644 --- a/gcc/config/darwin.c +++ b/gcc/config/darwin.c @@ -3357,6 +3357,19 @@ darwin_rename_builtins (void) } } +bool +darwin_libc_has_function (enum function_class fn_class) +{ + if (fn_class == function_sincos) + return false; + if (fn_class == function_c99_math_complex + || fn_class == function_c99_misc) + return (TARGET_64BIT + || strverscmp (darwin_macosx_version_min, "10.3") >= 0); + + return true; +} + static hashval_t cfstring_hash (const void *ptr) { diff --git a/gcc/config/darwin.h b/gcc/config/darwin.h index 82a42c8598b..596c9ef11f0 100644 --- a/gcc/config/darwin.h +++ b/gcc/config/darwin.h @@ -178,10 +178,11 @@ extern GTY(()) int darwin_ms_struct; %{L*} %(link_libgcc) %o %{fprofile-arcs|fprofile-generate*|coverage:-lgcov} \ %{fopenmp|ftree-parallelize-loops=*: \ %{static|static-libgcc|static-libstdc++|static-libgfortran: libgomp.a%s; : -lgomp } } \ - %{fsanitize=address: -lasan } \ %{fgnu-tm: \ %{static|static-libgcc|static-libstdc++|static-libgfortran: libitm.a%s; : -litm } } \ %{!nostdlib:%{!nodefaultlibs:\ + %{%:sanitize(address): -lasan } \ + %{%:sanitize(undefined): -lubsan } \ %(link_ssp) %(link_gcc_c_sequence)\ }}\ %{!nostdlib:%{!nostartfiles:%E}} %{T*} %{F*} }}}}}}}" @@ -874,10 +875,6 @@ void add_framework_path (char *); #define TARGET_POSIX_IO -/* All new versions of Darwin have C99 functions. */ - -#define TARGET_C99_FUNCTIONS 1 - #define WINT_TYPE "int" /* Every program on darwin links against libSystem which contains the pthread diff --git a/gcc/config/elfos.h b/gcc/config/elfos.h index 438302345cf..9606fe0f85c 100644 --- a/gcc/config/elfos.h +++ b/gcc/config/elfos.h @@ -433,3 +433,6 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see #define ASM_OUTPUT_EXTERNAL(FILE, DECL, NAME) \ default_elf_asm_output_external (FILE, DECL, NAME) #endif + +#undef TARGET_LIBC_HAS_FUNCTION +#define TARGET_LIBC_HAS_FUNCTION no_c99_libc_has_function diff --git a/gcc/config/epiphany/epiphany.c b/gcc/config/epiphany/epiphany.c index 1dcdc4b3808..fd4c01c49a4 100644 --- a/gcc/config/epiphany/epiphany.c +++ b/gcc/config/epiphany/epiphany.c @@ -45,6 +45,8 @@ along with GCC; see the file COPYING3. If not see #include "ggc.h" #include "tm-constrs.h" #include "tree-pass.h" /* for current_pass */ +#include "context.h" +#include "pass_manager.h" /* Which cpu we're compiling for. */ int epiphany_cpu_type; @@ -59,6 +61,9 @@ char epiphany_punct_chars[256]; /* The rounding mode that we generally use for floating point. */ int epiphany_normal_fp_rounding; +/* The pass instance, for use in epiphany_optimize_mode_switching. */ +static opt_pass *pass_mode_switch_use; + static void epiphany_init_reg_tables (void); static int get_epiphany_condition_code (rtx); static tree epiphany_handle_interrupt_attribute (tree *, tree, tree, int, bool *); @@ -165,20 +170,26 @@ epiphany_init (void) pass because of the side offect of epiphany_mode_needed on MACHINE_FUNCTION(cfun)->unknown_mode_uses. But it must run before pass_resolve_sw_modes. */ - static struct register_pass_info insert_use_info - = { &pass_mode_switch_use.pass, "mode_sw", + pass_mode_switch_use = make_pass_mode_switch_use (g); + struct register_pass_info insert_use_info + = { pass_mode_switch_use, "mode_sw", 1, PASS_POS_INSERT_AFTER }; - static struct register_pass_info mode_sw2_info - = { &pass_mode_switching.pass, "mode_sw", + opt_pass *mode_sw2 + = g->get_passes()->get_pass_mode_switching ()->clone (); + struct register_pass_info mode_sw2_info + = { mode_sw2, "mode_sw", 1, PASS_POS_INSERT_AFTER }; - static struct register_pass_info mode_sw3_info - = { &pass_resolve_sw_modes.pass, "mode_sw", + opt_pass *mode_sw3 = make_pass_resolve_sw_modes (g); + struct register_pass_info mode_sw3_info + = { mode_sw3, "mode_sw", 1, PASS_POS_INSERT_AFTER }; - static struct register_pass_info mode_sw4_info - = { &pass_split_all_insns.pass, "mode_sw", + opt_pass *mode_sw4 + = g->get_passes()->get_pass_split_all_insns ()->clone (); + struct register_pass_info mode_sw4_info + = { mode_sw4, "mode_sw", 1, PASS_POS_INSERT_AFTER }; static const int num_modes[] = NUM_MODES_FOR_MODE_SWITCHING; @@ -205,8 +216,10 @@ epiphany_init (void) (see http://gcc.gnu.org/ml/gcc-patches/2011-10/msg02819.html,) we need a second peephole2 pass to get reasonable code. */ { - static struct register_pass_info peep2_2_info - = { &pass_peephole2.pass, "peephole2", + opt_pass *extra_peephole2 + = g->get_passes ()->get_pass_peephole2 ()->clone (); + struct register_pass_info peep2_2_info + = { extra_peephole2, "peephole2", 1, PASS_POS_INSERT_AFTER }; @@ -2256,7 +2269,7 @@ epiphany_optimize_mode_switching (int entity) return (MACHINE_FUNCTION (cfun)->sw_entities_processed & (1 << EPIPHANY_MSW_ENTITY_ROUND_UNKNOWN)) != 0; case EPIPHANY_MSW_ENTITY_FPU_OMNIBUS: - return optimize == 0 || current_pass == &pass_mode_switch_use.pass; + return optimize == 0 || current_pass == pass_mode_switch_use; } gcc_unreachable (); } diff --git a/gcc/config/epiphany/epiphany.h b/gcc/config/epiphany/epiphany.h index bd84b5c793f..f16ab85dde9 100644 --- a/gcc/config/epiphany/epiphany.h +++ b/gcc/config/epiphany/epiphany.h @@ -929,8 +929,8 @@ enum }; extern int epiphany_normal_fp_rounding; -extern struct rtl_opt_pass pass_mode_switch_use; -extern struct rtl_opt_pass pass_resolve_sw_modes; +extern rtl_opt_pass *make_pass_mode_switch_use (gcc::context *ctxt); +extern rtl_opt_pass *make_pass_resolve_sw_modes (gcc::context *ctxt); /* This will need to be adjusted when FP_CONTRACT_ON is properly implemented. */ diff --git a/gcc/config/epiphany/epiphany.md b/gcc/config/epiphany/epiphany.md index 1e2d2ab02ed..e8756ad8e23 100644 --- a/gcc/config/epiphany/epiphany.md +++ b/gcc/config/epiphany/epiphany.md @@ -587,7 +587,7 @@ ; After mode-switching, floating point operations, fp_sfuncs and calls ; must exhibit the use of the control register, lest the setting of the ; control register could be deleted or moved. OTOH a use of a hard register -; greatly coundounds optimizers like the rtl loop optimizers or combine. +; greatly counfounds optimizers like the rtl loop optimizers or combine. ; Therefore, we put an extra pass immediately after the mode switching pass ; that inserts the USEs of the control registers, and sets a flag in struct ; machine_function that float_operation can henceforth only match with that @@ -1058,6 +1058,28 @@ (clobber (reg:CC CC_REGNUM))])] ) +(define_peephole2 + [(match_parallel 3 "float_operation" + [(set (match_operand:SI 0 "gpr_operand" "") + (mult:SI + (match_operand:SI 1 "gpr_operand" "") + (match_operand:SI 2 "gpr_operand" ""))) + (clobber (reg:CC_FP CCFP_REGNUM))])] + "prev_active_insn (peep2_next_insn (0)) + && get_attr_sched_use_fpu (prev_active_insn (peep2_next_insn (0))) + && peep2_regno_dead_p (1, CC_REGNUM) + && get_attr_sched_use_fpu (next_active_insn (peep2_next_insn (0))) + && find_reg_note (insn, REG_EQUAL, NULL_RTX) != NULL_RTX + && GET_CODE (XEXP (find_reg_note (insn, REG_EQUAL, NULL_RTX), 0)) == MULT + && CONST_INT_P (XEXP (XEXP (find_reg_note (insn, REG_EQUAL, NULL_RTX), 0), + 1))" + [(parallel [(set (match_dup 0) (ashift:SI (match_dup 1) (match_dup 4))) + (clobber (reg:CC CC_REGNUM))])] +{ + operands[4] + = XEXP (XEXP (find_reg_note (curr_insn, REG_EQUAL, NULL_RTX), 0), 1); +}) + (define_expand "mulsi3" [(parallel [(set (match_operand:SI 0 "gpr_operand" "") @@ -2530,6 +2552,106 @@ [(set_attr "length" "8") (set_attr "type" "v2fp")]) +(define_expand "ashlv2si3" + [(parallel + [(set (match_operand:V2SI 0 "gpr_operand" "") + (ashift:V2SI (match_operand:V2SI 1 "gpr_operand" "") + (match_operand:SI 2 "general_operand"))) + (use (match_dup 3)) + (clobber (reg:CC_FP CCFP_REGNUM))])] + "" +{ + if (const_int_operand (operands[2], VOIDmode)) + operands[3] + = copy_to_mode_reg (SImode, GEN_INT (1 << INTVAL (operands[2]))); + else + { + int o, i; + rtx xop[2], last_out = pc_rtx; + + for (o = 0; o <= UNITS_PER_WORD; o += UNITS_PER_WORD) + { + for (i = 0; i < 2; i++) + { + xop[i] + = (i == 2 ? operands[2] + : simplify_gen_subreg (SImode, operands[i], V2SImode, o)); + gcc_assert (!reg_overlap_mentioned_p (last_out, xop[i]) + /* ??? reg_overlap_mentioned_p doesn't understand + about multi-word SUBREGs. */ + || (GET_CODE (last_out) == SUBREG + && GET_CODE (xop[i]) == SUBREG + && SUBREG_REG (last_out) == SUBREG_REG (xop[i]) + && ((SUBREG_BYTE (last_out) & -UNITS_PER_WORD) + != (SUBREG_BYTE (xop[i]) & -UNITS_PER_WORD)))); + } + emit_insn (gen_ashlsi3 (xop[0], xop[1], operands[2])); + last_out = xop[0]; + } + DONE; + } +}) + +(define_insn_and_split "*ashlv2si3_i" + [(match_parallel 3 "float_operation" + [(set (match_operand:V2SI 0 "gpr_operand" "=&r,*1*2") + (ashift:V2SI (match_operand:V2SI 1 "gpr_operand" "r,r") + (match_operand 2 "const_int_operand" "n,n"))) + (use (match_operand:SI 4 "gpr_operand" "r,r")) + (clobber (reg:CC_FP CCFP_REGNUM))])] + "" + "#" + "reload_completed" + [(parallel + [(set (match_dup 5) (mult:SI (match_dup 6) (match_dup 4))) + (clobber (reg:CC_FP CCFP_REGNUM)) + (match_dup 9) + (match_dup 10)]) + (parallel + [(set (match_dup 7) (mult:SI (match_dup 8) (match_dup 4))) + (clobber (reg:CC_FP CCFP_REGNUM)) + (match_dup 9) + (match_dup 10)])] +{ + operands[5] = simplify_gen_subreg (SImode, operands[0], V2SImode, 0); + operands[6] = simplify_gen_subreg (SImode, operands[1], V2SImode, 0); + operands[7] = simplify_gen_subreg (SImode, operands[0], + V2SImode, UNITS_PER_WORD); + operands[8] = simplify_gen_subreg (SImode, operands[1], + V2SImode, UNITS_PER_WORD); + gcc_assert (!reg_overlap_mentioned_p (operands[5], operands[8])); + gcc_assert (!reg_overlap_mentioned_p (operands[5], operands[4])); + operands[9] = XVECEXP (operands[3], 0, XVECLEN (operands[3], 0) - 2); + operands[10] = XVECEXP (operands[3], 0, XVECLEN (operands[3], 0) - 1); + rtx insn + = (gen_rtx_PARALLEL + (VOIDmode, + gen_rtvec + (4, + gen_rtx_SET (VOIDmode, operands[5], + gen_rtx_MULT (SImode, operands[6], operands[4])), + gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CC_FPmode, CCFP_REGNUM)), + operands[9], operands[10]))); + insn = emit_insn (insn); + add_reg_note (insn, REG_EQUAL, + gen_rtx_ASHIFT (SImode, operands[6], operands[2])); + insn + = (gen_rtx_PARALLEL + (VOIDmode, + gen_rtvec + (4, + gen_rtx_SET (VOIDmode, operands[7], + gen_rtx_MULT (SImode, operands[8], operands[4])), + gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CC_FPmode, CCFP_REGNUM)), + operands[9], operands[10]))); + insn = emit_insn (insn); + add_reg_note (insn, REG_EQUAL, + gen_rtx_ASHIFT (SImode, operands[7], operands[2])); + DONE; +} + [(set_attr "length" "8") + (set_attr "type" "fp_int")]) + (define_expand "mul3" [(parallel [(set (match_operand:DWV2MODE 0 "gpr_operand" "") diff --git a/gcc/config/epiphany/mode-switch-use.c b/gcc/config/epiphany/mode-switch-use.c index 66529636801..8e278583215 100644 --- a/gcc/config/epiphany/mode-switch-use.c +++ b/gcc/config/epiphany/mode-switch-use.c @@ -71,22 +71,39 @@ insert_uses (void) return 0; } -struct rtl_opt_pass pass_mode_switch_use = +namespace { + +const pass_data pass_data_mode_switch_use = { - { - RTL_PASS, - "mode_switch_use", /* name */ - OPTGROUP_NONE, /* optinfo_flags */ - NULL, /* gate */ - insert_uses, /* execute */ - NULL, /* sub */ - NULL, /* next */ - 0, /* static_pass_number */ - TV_NONE, /* tv_id */ - 0, /* properties_required */ - 0, /* properties_provided */ - 0, /* properties_destroyed */ - 0, /* todo_flags_start */ - 0, /* todo_flags_finish */ - } + RTL_PASS, /* type */ + "mode_switch_use", /* name */ + OPTGROUP_NONE, /* optinfo_flags */ + false, /* has_gate */ + true, /* has_execute */ + TV_NONE, /* tv_id */ + 0, /* properties_required */ + 0, /* properties_provided */ + 0, /* properties_destroyed */ + 0, /* todo_flags_start */ + 0, /* todo_flags_finish */ }; + +class pass_mode_switch_use : public rtl_opt_pass +{ +public: + pass_mode_switch_use(gcc::context *ctxt) + : rtl_opt_pass(pass_data_mode_switch_use, ctxt) + {} + + /* opt_pass methods: */ + unsigned int execute () { return insert_uses (); } + +}; // class pass_mode_switch_use + +} // anon namespace + +rtl_opt_pass * +make_pass_mode_switch_use (gcc::context *ctxt) +{ + return new pass_mode_switch_use (ctxt); +} diff --git a/gcc/config/epiphany/predicates.md b/gcc/config/epiphany/predicates.md index af60d7c73f7..b77867cc851 100644 --- a/gcc/config/epiphany/predicates.md +++ b/gcc/config/epiphany/predicates.md @@ -292,7 +292,11 @@ bool inserted = MACHINE_FUNCTION (cfun)->control_use_inserted; int i; - if (count == 2) + if (count == 2 + /* Vector ashift has an extra use for the constant factor required to + implement the shift as multiply. */ + || (count == 3 && GET_CODE (XVECEXP (op, 0, 0)) == SET + && GET_CODE (XEXP (XVECEXP (op, 0, 0), 1)) == ASHIFT)) return !inserted; /* combine / recog will pass any old garbage here before checking the @@ -302,7 +306,7 @@ i = 1; if (count > 4) - for (i = 4; i < count; i++) + for (i = 2; i < count; i++) { rtx x = XVECEXP (op, 0, i); diff --git a/gcc/config/epiphany/resolve-sw-modes.c b/gcc/config/epiphany/resolve-sw-modes.c index 729a0ffc9b7..b43b4d953cd 100644 --- a/gcc/config/epiphany/resolve-sw-modes.c +++ b/gcc/config/epiphany/resolve-sw-modes.c @@ -161,23 +161,40 @@ resolve_sw_modes (void) return 0; } -struct rtl_opt_pass pass_resolve_sw_modes = +namespace { + +const pass_data pass_data_resolve_sw_modes = { - { - RTL_PASS, - "resolve_sw_modes", /* name */ - OPTGROUP_NONE, /* optinfo_flags */ - gate_resolve_sw_modes, /* gate */ - resolve_sw_modes, /* execute */ - NULL, /* sub */ - NULL, /* next */ - 0, /* static_pass_number */ - TV_MODE_SWITCH, /* tv_id */ - 0, /* properties_required */ - 0, /* properties_provided */ - 0, /* properties_destroyed */ - 0, /* todo_flags_start */ - TODO_df_finish | TODO_verify_rtl_sharing | - 0 /* todo_flags_finish */ - } + RTL_PASS, /* type */ + "resolve_sw_modes", /* name */ + OPTGROUP_NONE, /* optinfo_flags */ + true, /* has_gate */ + true, /* has_execute */ + TV_MODE_SWITCH, /* tv_id */ + 0, /* properties_required */ + 0, /* properties_provided */ + 0, /* properties_destroyed */ + 0, /* todo_flags_start */ + ( TODO_df_finish | TODO_verify_rtl_sharing | 0 ), /* todo_flags_finish */ }; + +class pass_resolve_sw_modes : public rtl_opt_pass +{ +public: + pass_resolve_sw_modes(gcc::context *ctxt) + : rtl_opt_pass(pass_data_resolve_sw_modes, ctxt) + {} + + /* opt_pass methods: */ + bool gate () { return gate_resolve_sw_modes (); } + unsigned int execute () { return resolve_sw_modes (); } + +}; // class pass_resolve_sw_modes + +} // anon namespace + +rtl_opt_pass * +make_pass_resolve_sw_modes (gcc::context *ctxt) +{ + return new pass_resolve_sw_modes (ctxt); +} diff --git a/gcc/config/freebsd.h b/gcc/config/freebsd.h index 87c0acf1d89..da66253e660 100644 --- a/gcc/config/freebsd.h +++ b/gcc/config/freebsd.h @@ -52,6 +52,9 @@ along with GCC; see the file COPYING3. If not see #define LINK_SSP_SPEC "%{fstack-protector|fstack-protector-all:-lssp_nonshared}" #endif +#undef TARGET_LIBC_HAS_FUNCTION +#define TARGET_LIBC_HAS_FUNCTION no_c99_libc_has_function + /* Use --as-needed -lgcc_s for eh support. */ #ifdef HAVE_LD_AS_NEEDED #define USE_LD_AS_NEEDED 1 diff --git a/gcc/config/gnu-user.h b/gcc/config/gnu-user.h index bcdf0e6cc5a..6f6915842b9 100644 --- a/gcc/config/gnu-user.h +++ b/gcc/config/gnu-user.h @@ -39,15 +39,21 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see the GNU userspace magical crtbegin.o file (see crtstuff.c) which provides part of the support for getting C++ file-scope static object constructed before entering `main'. */ - + #if defined HAVE_LD_PIE #define GNU_USER_TARGET_STARTFILE_SPEC \ "%{!shared: %{pg|p|profile:gcrt1.o%s;pie:Scrt1.o%s;:crt1.o%s}} \ - crti.o%s %{static:crtbeginT.o%s;shared|pie:crtbeginS.o%s;:crtbegin.o%s}" + crti.o%s %{static:crtbeginT.o%s;shared|pie:crtbeginS.o%s;:crtbegin.o%s} \ + %{fvtable-verify=none:%s; \ + fvtable-verify=preinit:vtv_start_preinit.o%s; \ + fvtable-verify=std:vtv_start.o%s}" #else #define GNU_USER_TARGET_STARTFILE_SPEC \ "%{!shared: %{pg|p|profile:gcrt1.o%s;:crt1.o%s}} \ - crti.o%s %{static:crtbeginT.o%s;shared|pie:crtbeginS.o%s;:crtbegin.o%s}" + crti.o%s %{static:crtbeginT.o%s;shared|pie:crtbeginS.o%s;:crtbegin.o%s} \ + %{fvtable-verify=none:%s; \ + fvtable-verify=preinit:vtv_start_preinit.o%s; \ + fvtable-verify=std:vtv_start.o%s}" #endif #undef STARTFILE_SPEC #define STARTFILE_SPEC GNU_USER_TARGET_STARTFILE_SPEC @@ -59,7 +65,10 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see GNU userspace "finalizer" file, `crtn.o'. */ #define GNU_USER_TARGET_ENDFILE_SPEC \ - "%{shared|pie:crtendS.o%s;:crtend.o%s} crtn.o%s" + "%{fvtable-verify=none:%s; \ + fvtable-verify=preinit:vtv_end_preinit.o%s; \ + fvtable-verify=std:vtv_end.o%s} \ + %{shared|pie:crtendS.o%s;:crtend.o%s} crtn.o%s" #undef ENDFILE_SPEC #define ENDFILE_SPEC GNU_USER_TARGET_ENDFILE_SPEC @@ -73,10 +82,14 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see #undef CPLUSPLUS_CPP_SPEC #define CPLUSPLUS_CPP_SPEC "-D_GNU_SOURCE %(cpp)" -#define GNU_USER_TARGET_LIB_SPEC \ - "%{pthread:-lpthread} \ - %{shared:-lc} \ +#define GNU_USER_TARGET_NO_PTHREADS_LIB_SPEC \ + "%{shared:-lc} \ %{!shared:%{mieee-fp:-lieee} %{profile:-lc_p}%{!profile:-lc}}" + +#define GNU_USER_TARGET_LIB_SPEC \ + "%{pthread:-lpthread} " \ + GNU_USER_TARGET_NO_PTHREADS_LIB_SPEC + #undef LIB_SPEC #define LIB_SPEC GNU_USER_TARGET_LIB_SPEC @@ -95,8 +108,8 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see #define TARGET_POSIX_IO -#define TARGET_C99_FUNCTIONS 1 -#define TARGET_HAS_SINCOS 1 +#undef TARGET_LIBC_HAS_FUNCTION +#define TARGET_LIBC_HAS_FUNCTION gnu_libc_has_function /* Link -lasan early on the command line. For -static-libasan, don't link it for -shared link, the executable should be compiled with -static-libasan diff --git a/gcc/config/i386/constraints.md b/gcc/config/i386/constraints.md index 6cb53b8aafb..28e626ff3be 100644 --- a/gcc/config/i386/constraints.md +++ b/gcc/config/i386/constraints.md @@ -19,7 +19,7 @@ ;;; Unused letters: ;;; B H T -;;; h jk v +;;; h jk ;; Integer register constraints. ;; It is not necessary to define 'r' here. @@ -101,11 +101,11 @@ "First SSE register (@code{%xmm0}).") (define_register_constraint "Yi" - "TARGET_SSE2 && TARGET_INTER_UNIT_MOVES_TO_VEC ? SSE_REGS : NO_REGS" + "TARGET_SSE2 && TARGET_INTER_UNIT_MOVES_TO_VEC ? ALL_SSE_REGS : NO_REGS" "@internal Any SSE register, when SSE2 and inter-unit moves to vector registers are enabled.") (define_register_constraint "Yj" - "TARGET_SSE2 && TARGET_INTER_UNIT_MOVES_FROM_VEC ? SSE_REGS : NO_REGS" + "TARGET_SSE2 && TARGET_INTER_UNIT_MOVES_FROM_VEC ? ALL_SSE_REGS : NO_REGS" "@internal Any SSE register, when SSE2 and inter-unit moves from vector registers are enabled.") (define_register_constraint "Ym" @@ -138,6 +138,9 @@ "(ix86_fpmath & FPMATH_387) ? FLOAT_REGS : NO_REGS" "@internal Any x87 register when 80387 FP arithmetic is enabled.") +(define_register_constraint "v" "TARGET_SSE ? ALL_SSE_REGS : NO_REGS" + "Any EVEX encodable SSE register (@code{%xmm0-%xmm31}).") + (define_constraint "z" "@internal Constant call address operand." (match_operand 0 "constant_call_address_operand")) diff --git a/gcc/config/i386/cpuid.h b/gcc/config/i386/cpuid.h index c1e1eba12f1..aa91e1ab8d8 100644 --- a/gcc/config/i386/cpuid.h +++ b/gcc/config/i386/cpuid.h @@ -71,8 +71,12 @@ #define bit_AVX2 (1 << 5) #define bit_BMI2 (1 << 8) #define bit_RTM (1 << 11) +#define bit_AVX512F (1 << 16) #define bit_RDSEED (1 << 18) #define bit_ADX (1 << 19) +#define bit_AVX512PF (1 << 26) +#define bit_AVX512ER (1 << 27) +#define bit_AVX512CD (1 << 28) /* Extended State Enumeration Sub-leaf (%eax == 13, %ecx == 1) */ #define bit_XSAVEOPT (1 << 0) diff --git a/gcc/config/i386/cygming.h b/gcc/config/i386/cygming.h index 27187641aad..9cb66d646be 100644 --- a/gcc/config/i386/cygming.h +++ b/gcc/config/i386/cygming.h @@ -171,6 +171,9 @@ along with GCC; see the file COPYING3. If not see #undef MATH_LIBRARY #define MATH_LIBRARY "" +#undef TARGET_LIBC_HAS_FUNCTION +#define TARGET_LIBC_HAS_FUNCTION no_c99_libc_has_function + #define SIZE_TYPE (TARGET_64BIT ? "long long unsigned int" : "unsigned int") #define PTRDIFF_TYPE (TARGET_64BIT ? "long long int" : "int") diff --git a/gcc/config/i386/djgpp.h b/gcc/config/i386/djgpp.h index 05f9dfda71d..cc420d0a6d6 100644 --- a/gcc/config/i386/djgpp.h +++ b/gcc/config/i386/djgpp.h @@ -117,6 +117,17 @@ along with GCC; see the file COPYING3. If not see #define ASM_OUTPUT_ALIGNED_BSS(FILE, DECL, NAME, SIZE, ALIGN) \ asm_output_aligned_bss ((FILE), (DECL), (NAME), (SIZE), (ALIGN)) +/* Write the extra assembler code needed to declare a function properly. */ + +#ifndef ASM_DECLARE_FUNCTION_NAME +#define ASM_DECLARE_FUNCTION_NAME(FILE, NAME, DECL) \ + do \ + { \ + ASM_OUTPUT_FUNCTION_LABEL (FILE, NAME, DECL); \ + } \ + while (0) +#endif + /* This is how to tell assembler that a symbol is weak */ #undef ASM_WEAKEN_LABEL #define ASM_WEAKEN_LABEL(FILE,NAME) \ @@ -127,6 +138,9 @@ along with GCC; see the file COPYING3. If not see in libgcc, nor call one in main(). */ #define HAS_INIT_SECTION +#undef TARGET_LIBC_HAS_FUNCTION +#define TARGET_LIBC_HAS_FUNCTION no_c99_libc_has_function + /* Definitions for types and sizes. Wide characters are 16-bits long so Win32 compiler add-ons will be wide character compatible. */ #undef WCHAR_TYPE_SIZE diff --git a/gcc/config/i386/driver-i386.c b/gcc/config/i386/driver-i386.c index c8b71c8edf9..4cb9907b5ed 100644 --- a/gcc/config/i386/driver-i386.c +++ b/gcc/config/i386/driver-i386.c @@ -390,6 +390,8 @@ const char *host_detect_local_cpu (int argc, const char **argv) unsigned int has_rdrnd = 0, has_f16c = 0, has_fsgsbase = 0; unsigned int has_rdseed = 0, has_prfchw = 0, has_adx = 0; unsigned int has_osxsave = 0, has_fxsr = 0, has_xsave = 0, has_xsaveopt = 0; + unsigned int has_avx512er = 0, has_avx512pf = 0, has_avx512cd = 0; + unsigned int has_avx512f = 0; bool arch; @@ -461,6 +463,10 @@ const char *host_detect_local_cpu (int argc, const char **argv) has_fsgsbase = ebx & bit_FSGSBASE; has_rdseed = ebx & bit_RDSEED; has_adx = ebx & bit_ADX; + has_avx512f = ebx & bit_AVX512F; + has_avx512er = ebx & bit_AVX512ER; + has_avx512pf = ebx & bit_AVX512PF; + has_avx512cd = ebx & bit_AVX512CD; } if (max_level >= 13) @@ -638,13 +644,18 @@ const char *host_detect_local_cpu (int argc, const char **argv) /* Atom. */ cpu = "atom"; break; + case 0x0f: + /* Merom. */ + case 0x17: + case 0x1d: + /* Penryn. */ + cpu = "core2"; + break; case 0x1a: case 0x1e: case 0x1f: case 0x2e: /* Nehalem. */ - cpu = "corei7"; - break; case 0x25: case 0x2c: case 0x2f: @@ -656,20 +667,25 @@ const char *host_detect_local_cpu (int argc, const char **argv) /* Sandy Bridge. */ cpu = "corei7-avx"; break; - case 0x17: - case 0x1d: - /* Penryn. */ - cpu = "core2"; + case 0x3a: + case 0x3e: + /* Ivy Bridge. */ + cpu = "core-avx-i"; break; - case 0x0f: - /* Merom. */ - cpu = "core2"; + case 0x3c: + case 0x45: + case 0x46: + /* Haswell. */ + cpu = "core-avx2"; break; default: if (arch) { /* This is unknown family 0x6 CPU. */ - if (has_avx) + if (has_avx2) + /* Assume Haswell. */ + cpu = "core-avx2"; + else if (has_avx) /* Assume Sandy Bridge. */ cpu = "corei7-avx"; else if (has_sse4_2) @@ -828,13 +844,18 @@ const char *host_detect_local_cpu (int argc, const char **argv) const char *fxsr = has_fxsr ? " -mfxsr" : " -mno-fxsr"; const char *xsave = has_xsave ? " -mxsave" : " -mno-xsave"; const char *xsaveopt = has_xsaveopt ? " -mxsaveopt" : " -mno-xsaveopt"; + const char *avx512f = has_avx512f ? " -mavx512f" : " -mno-avx512f"; + const char *avx512er = has_avx512er ? " -mavx512er" : " -mno-avx512er"; + const char *avx512cd = has_avx512cd ? " -mavx512cd" : " -mno-avx512cd"; + const char *avx512pf = has_avx512pf ? " -mavx512pf" : " -mno-avx512pf"; options = concat (options, mmx, mmx3dnow, sse, sse2, sse3, ssse3, sse4a, cx16, sahf, movbe, aes, pclmul, popcnt, abm, lwp, fma, fma4, xop, bmi, bmi2, tbm, avx, avx2, sse4_2, sse4_1, lzcnt, rtm, hle, rdrnd, f16c, fsgsbase, rdseed, prfchw, adx, - fxsr, xsave, xsaveopt, NULL); + fxsr, xsave, xsaveopt, avx512f, avx512er, + avx512cd, avx512pf, NULL); } done: diff --git a/gcc/config/i386/i386-c.c b/gcc/config/i386/i386-c.c index 31dd28a94cb..14349be0af5 100644 --- a/gcc/config/i386/i386-c.c +++ b/gcc/config/i386/i386-c.c @@ -306,6 +306,14 @@ ix86_target_macros_internal (HOST_WIDE_INT isa_flag, def_or_undef (parse_in, "__AVX__"); if (isa_flag & OPTION_MASK_ISA_AVX2) def_or_undef (parse_in, "__AVX2__"); + if (isa_flag & OPTION_MASK_ISA_AVX512F) + def_or_undef (parse_in, "__AVX512F__"); + if (isa_flag & OPTION_MASK_ISA_AVX512ER) + def_or_undef (parse_in, "__AVX512ER__"); + if (isa_flag & OPTION_MASK_ISA_AVX512CD) + def_or_undef (parse_in, "__AVX512CD__"); + if (isa_flag & OPTION_MASK_ISA_AVX512PF) + def_or_undef (parse_in, "__AVX512PF__"); if (isa_flag & OPTION_MASK_ISA_FMA) def_or_undef (parse_in, "__FMA__"); if (isa_flag & OPTION_MASK_ISA_RTM) diff --git a/gcc/config/i386/i386-interix.h b/gcc/config/i386/i386-interix.h index c74e008b0d9..b99f4d9b908 100644 --- a/gcc/config/i386/i386-interix.h +++ b/gcc/config/i386/i386-interix.h @@ -143,6 +143,9 @@ do { \ #undef LIBGCC2_LONG_DOUBLE_TYPE_SIZE #define LIBGCC2_LONG_DOUBLE_TYPE_SIZE 64 +#undef TARGET_LIBC_HAS_FUNCTION +#define TARGET_LIBC_HAS_FUNCTION no_c99_libc_has_function + /* The following are needed for us to be able to use winnt.c, but are not otherwise meaningful to Interix. (The functions that use these are never called because we don't do DLLs.) */ diff --git a/gcc/config/i386/i386-modes.def b/gcc/config/i386/i386-modes.def index 393cd4a23be..e0b8fc826ab 100644 --- a/gcc/config/i386/i386-modes.def +++ b/gcc/config/i386/i386-modes.def @@ -76,16 +76,19 @@ VECTOR_MODES (INT, 8); /* V8QI V4HI V2SI */ VECTOR_MODES (INT, 16); /* V16QI V8HI V4SI V2DI */ VECTOR_MODES (INT, 32); /* V32QI V16HI V8SI V4DI */ VECTOR_MODES (INT, 64); /* V64QI V32HI V16SI V8DI */ +VECTOR_MODES (INT, 128); /* V128QI V64HI V32SI V16DI */ VECTOR_MODES (FLOAT, 8); /* V4HF V2SF */ VECTOR_MODES (FLOAT, 16); /* V8HF V4SF V2DF */ VECTOR_MODES (FLOAT, 32); /* V16HF V8SF V4DF */ VECTOR_MODES (FLOAT, 64); /* V32HF V16SF V8DF */ +VECTOR_MODES (FLOAT, 128); /* V64HF V32SF V16DF */ VECTOR_MODE (INT, TI, 1); /* V1TI */ VECTOR_MODE (INT, DI, 1); /* V1DI */ VECTOR_MODE (INT, SI, 1); /* V1SI */ VECTOR_MODE (INT, QI, 2); /* V2QI */ INT_MODE (OI, 32); +INT_MODE (XI, 64); /* The symbol Pmode stands for one of the above machine modes (usually SImode). The tm.h file specifies which one. It is not a distinct mode. */ diff --git a/gcc/config/i386/i386-opts.h b/gcc/config/i386/i386-opts.h index bea1c257830..5fcbd6b5776 100644 --- a/gcc/config/i386/i386-opts.h +++ b/gcc/config/i386/i386-opts.h @@ -28,15 +28,17 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see /* Algorithm to expand string function with. */ enum stringop_alg { - no_stringop, - libcall, - rep_prefix_1_byte, - rep_prefix_4_byte, - rep_prefix_8_byte, - loop_1_byte, - loop, - unrolled_loop, - vector_loop +#undef DEF_ENUM +#define DEF_ENUM + +#undef DEF_ALG +#define DEF_ALG(alg, name) alg, + +#include "stringop.def" +last_alg + +#undef DEF_ENUM +#undef DEF_ALG }; /* Available call abi. */ diff --git a/gcc/config/i386/i386-protos.h b/gcc/config/i386/i386-protos.h index 09667893910..3ab2f3a2ac8 100644 --- a/gcc/config/i386/i386-protos.h +++ b/gcc/config/i386/i386-protos.h @@ -173,6 +173,8 @@ extern int ix86_mode_after (int, int, rtx); extern int ix86_mode_entry (int); extern int ix86_mode_exit (int); +extern bool ix86_libc_has_function (enum function_class fn_class); + #ifdef HARD_CONST extern void ix86_emit_mode_set (int, int, HARD_REG_SET); #endif diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index 0c546af00b2..e2fa71a369a 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -62,6 +62,8 @@ along with GCC; see the file COPYING3. If not see #include "dumpfile.h" #include "tree-pass.h" #include "tree-flow.h" +#include "context.h" +#include "pass_manager.h" static rtx legitimize_dllimport_symbol (rtx, bool); static rtx legitimize_pe_coff_extern_decl (rtx, bool); @@ -85,6 +87,13 @@ static rtx legitimize_pe_coff_symbol (rtx, bool); #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall, false}}} +static stringop_algs ix86_size_memcpy[2] = { + {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}}, + {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}}}; +static stringop_algs ix86_size_memset[2] = { + {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}}, + {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}}}; + const struct processor_costs ix86_size_cost = {/* costs for tuning for size */ COSTS_N_BYTES (2), /* cost of an add instruction */ @@ -138,10 +147,8 @@ struct processor_costs ix86_size_cost = {/* costs for tuning for size */ COSTS_N_BYTES (2), /* cost of FABS instruction. */ COSTS_N_BYTES (2), /* cost of FCHS instruction. */ COSTS_N_BYTES (2), /* cost of FSQRT instruction. */ - {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}}, - {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}}}, - {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}}, - {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}}}, + ix86_size_memcpy, + ix86_size_memset, 1, /* scalar_stmt_cost. */ 1, /* scalar load_cost. */ 1, /* scalar_store_cost. */ @@ -156,6 +163,13 @@ struct processor_costs ix86_size_cost = {/* costs for tuning for size */ }; /* Processor costs (relative to an add) */ +static stringop_algs i386_memcpy[2] = { + {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}}, + DUMMY_STRINGOP_ALGS}; +static stringop_algs i386_memset[2] = { + {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}}, + DUMMY_STRINGOP_ALGS}; + static const struct processor_costs i386_cost = { /* 386 specific costs */ COSTS_N_INSNS (1), /* cost of an add instruction */ @@ -209,10 +223,8 @@ struct processor_costs i386_cost = { /* 386 specific costs */ COSTS_N_INSNS (22), /* cost of FABS instruction. */ COSTS_N_INSNS (24), /* cost of FCHS instruction. */ COSTS_N_INSNS (122), /* cost of FSQRT instruction. */ - {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}}, - DUMMY_STRINGOP_ALGS}, - {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}}, - DUMMY_STRINGOP_ALGS}, + i386_memcpy, + i386_memset, 1, /* scalar_stmt_cost. */ 1, /* scalar load_cost. */ 1, /* scalar_store_cost. */ @@ -226,6 +238,13 @@ struct processor_costs i386_cost = { /* 386 specific costs */ 1, /* cond_not_taken_branch_cost. */ }; +static stringop_algs i486_memcpy[2] = { + {rep_prefix_4_byte, {{-1, rep_prefix_4_byte, false}}}, + DUMMY_STRINGOP_ALGS}; +static stringop_algs i486_memset[2] = { + {rep_prefix_4_byte, {{-1, rep_prefix_4_byte, false}}}, + DUMMY_STRINGOP_ALGS}; + static const struct processor_costs i486_cost = { /* 486 specific costs */ COSTS_N_INSNS (1), /* cost of an add instruction */ @@ -281,10 +300,8 @@ struct processor_costs i486_cost = { /* 486 specific costs */ COSTS_N_INSNS (3), /* cost of FABS instruction. */ COSTS_N_INSNS (3), /* cost of FCHS instruction. */ COSTS_N_INSNS (83), /* cost of FSQRT instruction. */ - {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte, false}}}, - DUMMY_STRINGOP_ALGS}, - {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte, false}}}, - DUMMY_STRINGOP_ALGS}, + i486_memcpy, + i486_memset, 1, /* scalar_stmt_cost. */ 1, /* scalar load_cost. */ 1, /* scalar_store_cost. */ @@ -298,6 +315,13 @@ struct processor_costs i486_cost = { /* 486 specific costs */ 1, /* cond_not_taken_branch_cost. */ }; +static stringop_algs pentium_memcpy[2] = { + {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}}, + DUMMY_STRINGOP_ALGS}; +static stringop_algs pentium_memset[2] = { + {libcall, {{-1, rep_prefix_4_byte, false}}}, + DUMMY_STRINGOP_ALGS}; + static const struct processor_costs pentium_cost = { COSTS_N_INSNS (1), /* cost of an add instruction */ @@ -351,10 +375,8 @@ struct processor_costs pentium_cost = { COSTS_N_INSNS (1), /* cost of FABS instruction. */ COSTS_N_INSNS (1), /* cost of FCHS instruction. */ COSTS_N_INSNS (70), /* cost of FSQRT instruction. */ - {{libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}}, - DUMMY_STRINGOP_ALGS}, - {{libcall, {{-1, rep_prefix_4_byte, false}}}, - DUMMY_STRINGOP_ALGS}, + pentium_memcpy, + pentium_memset, 1, /* scalar_stmt_cost. */ 1, /* scalar load_cost. */ 1, /* scalar_store_cost. */ @@ -368,6 +390,21 @@ struct processor_costs pentium_cost = { 1, /* cond_not_taken_branch_cost. */ }; +/* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes + (we ensure the alignment). For small blocks inline loop is still a + noticeable win, for bigger blocks either rep movsl or rep movsb is + way to go. Rep movsb has apparently more expensive startup time in CPU, + but after 4K the difference is down in the noise. */ +static stringop_algs pentiumpro_memcpy[2] = { + {rep_prefix_4_byte, {{128, loop, false}, {1024, unrolled_loop, false}, + {8192, rep_prefix_4_byte, false}, + {-1, rep_prefix_1_byte, false}}}, + DUMMY_STRINGOP_ALGS}; +static stringop_algs pentiumpro_memset[2] = { + {rep_prefix_4_byte, {{1024, unrolled_loop, false}, + {8192, rep_prefix_4_byte, false}, + {-1, libcall, false}}}, + DUMMY_STRINGOP_ALGS}; static const struct processor_costs pentiumpro_cost = { COSTS_N_INSNS (1), /* cost of an add instruction */ @@ -421,19 +458,8 @@ struct processor_costs pentiumpro_cost = { COSTS_N_INSNS (2), /* cost of FABS instruction. */ COSTS_N_INSNS (2), /* cost of FCHS instruction. */ COSTS_N_INSNS (56), /* cost of FSQRT instruction. */ - /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes - (we ensure the alignment). For small blocks inline loop is still a - noticeable win, for bigger blocks either rep movsl or rep movsb is - way to go. Rep movsb has apparently more expensive startup time in CPU, - but after 4K the difference is down in the noise. */ - {{rep_prefix_4_byte, {{128, loop, false}, {1024, unrolled_loop, false}, - {8192, rep_prefix_4_byte, false}, - {-1, rep_prefix_1_byte, false}}}, - DUMMY_STRINGOP_ALGS}, - {{rep_prefix_4_byte, {{1024, unrolled_loop, false}, - {8192, rep_prefix_4_byte, false}, - {-1, libcall, false}}}, - DUMMY_STRINGOP_ALGS}, + pentiumpro_memcpy, + pentiumpro_memset, 1, /* scalar_stmt_cost. */ 1, /* scalar load_cost. */ 1, /* scalar_store_cost. */ @@ -447,6 +473,12 @@ struct processor_costs pentiumpro_cost = { 1, /* cond_not_taken_branch_cost. */ }; +static stringop_algs geode_memcpy[2] = { + {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}}, + DUMMY_STRINGOP_ALGS}; +static stringop_algs geode_memset[2] = { + {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}}, + DUMMY_STRINGOP_ALGS}; static const struct processor_costs geode_cost = { COSTS_N_INSNS (1), /* cost of an add instruction */ @@ -501,10 +533,8 @@ struct processor_costs geode_cost = { COSTS_N_INSNS (1), /* cost of FABS instruction. */ COSTS_N_INSNS (1), /* cost of FCHS instruction. */ COSTS_N_INSNS (54), /* cost of FSQRT instruction. */ - {{libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}}, - DUMMY_STRINGOP_ALGS}, - {{libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}}, - DUMMY_STRINGOP_ALGS}, + geode_memcpy, + geode_memset, 1, /* scalar_stmt_cost. */ 1, /* scalar load_cost. */ 1, /* scalar_store_cost. */ @@ -518,6 +548,12 @@ struct processor_costs geode_cost = { 1, /* cond_not_taken_branch_cost. */ }; +static stringop_algs k6_memcpy[2] = { + {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}}, + DUMMY_STRINGOP_ALGS}; +static stringop_algs k6_memset[2] = { + {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}}, + DUMMY_STRINGOP_ALGS}; static const struct processor_costs k6_cost = { COSTS_N_INSNS (1), /* cost of an add instruction */ @@ -574,10 +610,8 @@ struct processor_costs k6_cost = { COSTS_N_INSNS (2), /* cost of FABS instruction. */ COSTS_N_INSNS (2), /* cost of FCHS instruction. */ COSTS_N_INSNS (56), /* cost of FSQRT instruction. */ - {{libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}}, - DUMMY_STRINGOP_ALGS}, - {{libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}}, - DUMMY_STRINGOP_ALGS}, + k6_memcpy, + k6_memset, 1, /* scalar_stmt_cost. */ 1, /* scalar load_cost. */ 1, /* scalar_store_cost. */ @@ -591,6 +625,15 @@ struct processor_costs k6_cost = { 1, /* cond_not_taken_branch_cost. */ }; +/* For some reason, Athlon deals better with REP prefix (relative to loops) + compared to K8. Alignment becomes important after 8 bytes for memcpy and + 128 bytes for memset. */ +static stringop_algs athlon_memcpy[2] = { + {libcall, {{2048, rep_prefix_4_byte, false}, {-1, libcall, false}}}, + DUMMY_STRINGOP_ALGS}; +static stringop_algs athlon_memset[2] = { + {libcall, {{2048, rep_prefix_4_byte, false}, {-1, libcall, false}}}, + DUMMY_STRINGOP_ALGS}; static const struct processor_costs athlon_cost = { COSTS_N_INSNS (1), /* cost of an add instruction */ @@ -644,13 +687,8 @@ struct processor_costs athlon_cost = { COSTS_N_INSNS (2), /* cost of FABS instruction. */ COSTS_N_INSNS (2), /* cost of FCHS instruction. */ COSTS_N_INSNS (35), /* cost of FSQRT instruction. */ - /* For some reason, Athlon deals better with REP prefix (relative to loops) - compared to K8. Alignment becomes important after 8 bytes for memcpy and - 128 bytes for memset. */ - {{libcall, {{2048, rep_prefix_4_byte, false}, {-1, libcall, false}}}, - DUMMY_STRINGOP_ALGS}, - {{libcall, {{2048, rep_prefix_4_byte, false}, {-1, libcall, false}}}, - DUMMY_STRINGOP_ALGS}, + athlon_memcpy, + athlon_memset, 1, /* scalar_stmt_cost. */ 1, /* scalar load_cost. */ 1, /* scalar_store_cost. */ @@ -664,6 +702,19 @@ struct processor_costs athlon_cost = { 1, /* cond_not_taken_branch_cost. */ }; +/* K8 has optimized REP instruction for medium sized blocks, but for very + small blocks it is better to use loop. For large blocks, libcall can + do nontemporary accesses and beat inline considerably. */ +static stringop_algs k8_memcpy[2] = { + {libcall, {{6, loop, false}, {14, unrolled_loop, false}, + {-1, rep_prefix_4_byte, false}}}, + {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false}, + {-1, libcall, false}}}}; +static stringop_algs k8_memset[2] = { + {libcall, {{8, loop, false}, {24, unrolled_loop, false}, + {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}}, + {libcall, {{48, unrolled_loop, false}, + {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}}; static const struct processor_costs k8_cost = { COSTS_N_INSNS (1), /* cost of an add instruction */ @@ -722,17 +773,9 @@ struct processor_costs k8_cost = { COSTS_N_INSNS (2), /* cost of FABS instruction. */ COSTS_N_INSNS (2), /* cost of FCHS instruction. */ COSTS_N_INSNS (35), /* cost of FSQRT instruction. */ - /* K8 has optimized REP instruction for medium sized blocks, but for very - small blocks it is better to use loop. For large blocks, libcall can - do nontemporary accesses and beat inline considerably. */ - {{libcall, {{6, loop, false}, {14, unrolled_loop, false}, - {-1, rep_prefix_4_byte, false}}}, - {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false}, - {-1, libcall, false}}}}, - {{libcall, {{8, loop, false}, {24, unrolled_loop, false}, - {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}}, - {libcall, {{48, unrolled_loop, false}, - {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}}, + + k8_memcpy, + k8_memset, 4, /* scalar_stmt_cost. */ 2, /* scalar load_cost. */ 2, /* scalar_store_cost. */ @@ -746,6 +789,19 @@ struct processor_costs k8_cost = { 2, /* cond_not_taken_branch_cost. */ }; +/* AMDFAM10 has optimized REP instruction for medium sized blocks, but for + very small blocks it is better to use loop. For large blocks, libcall can + do nontemporary accesses and beat inline considerably. */ +static stringop_algs amdfam10_memcpy[2] = { + {libcall, {{6, loop, false}, {14, unrolled_loop, false}, + {-1, rep_prefix_4_byte, false}}}, + {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false}, + {-1, libcall, false}}}}; +static stringop_algs amdfam10_memset[2] = { + {libcall, {{8, loop, false}, {24, unrolled_loop, false}, + {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}}, + {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false}, + {-1, libcall, false}}}}; struct processor_costs amdfam10_cost = { COSTS_N_INSNS (1), /* cost of an add instruction */ COSTS_N_INSNS (2), /* cost of a lea instruction */ @@ -812,17 +868,8 @@ struct processor_costs amdfam10_cost = { COSTS_N_INSNS (2), /* cost of FCHS instruction. */ COSTS_N_INSNS (35), /* cost of FSQRT instruction. */ - /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for - very small blocks it is better to use loop. For large blocks, libcall can - do nontemporary accesses and beat inline considerably. */ - {{libcall, {{6, loop, false}, {14, unrolled_loop, false}, - {-1, rep_prefix_4_byte, false}}}, - {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false}, - {-1, libcall, false}}}}, - {{libcall, {{8, loop, false}, {24, unrolled_loop, false}, - {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}}, - {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false}, - {-1, libcall, false}}}}, + amdfam10_memcpy, + amdfam10_memset, 4, /* scalar_stmt_cost. */ 2, /* scalar load_cost. */ 2, /* scalar_store_cost. */ @@ -836,7 +883,21 @@ struct processor_costs amdfam10_cost = { 1, /* cond_not_taken_branch_cost. */ }; -struct processor_costs bdver1_cost = { +/* BDVER1 has optimized REP instruction for medium sized blocks, but for + very small blocks it is better to use loop. For large blocks, libcall + can do nontemporary accesses and beat inline considerably. */ +static stringop_algs bdver1_memcpy[2] = { + {libcall, {{6, loop, false}, {14, unrolled_loop, false}, + {-1, rep_prefix_4_byte, false}}}, + {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false}, + {-1, libcall, false}}}}; +static stringop_algs bdver1_memset[2] = { + {libcall, {{8, loop, false}, {24, unrolled_loop, false}, + {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}}, + {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false}, + {-1, libcall, false}}}}; + +const struct processor_costs bdver1_cost = { COSTS_N_INSNS (1), /* cost of an add instruction */ COSTS_N_INSNS (1), /* cost of a lea instruction */ COSTS_N_INSNS (1), /* variable shift costs */ @@ -902,17 +963,8 @@ struct processor_costs bdver1_cost = { COSTS_N_INSNS (2), /* cost of FCHS instruction. */ COSTS_N_INSNS (52), /* cost of FSQRT instruction. */ - /* BDVER1 has optimized REP instruction for medium sized blocks, but for - very small blocks it is better to use loop. For large blocks, libcall - can do nontemporary accesses and beat inline considerably. */ - {{libcall, {{6, loop, false}, {14, unrolled_loop, false}, - {-1, rep_prefix_4_byte, false}}}, - {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false}, - {-1, libcall, false}}}}, - {{libcall, {{8, loop, false}, {24, unrolled_loop, false}, - {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}}, - {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false}, - {-1, libcall, false}}}}, + bdver1_memcpy, + bdver1_memset, 6, /* scalar_stmt_cost. */ 4, /* scalar load_cost. */ 4, /* scalar_store_cost. */ @@ -926,7 +978,22 @@ struct processor_costs bdver1_cost = { 1, /* cond_not_taken_branch_cost. */ }; -struct processor_costs bdver2_cost = { +/* BDVER2 has optimized REP instruction for medium sized blocks, but for + very small blocks it is better to use loop. For large blocks, libcall + can do nontemporary accesses and beat inline considerably. */ + +static stringop_algs bdver2_memcpy[2] = { + {libcall, {{6, loop, false}, {14, unrolled_loop, false}, + {-1, rep_prefix_4_byte, false}}}, + {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false}, + {-1, libcall, false}}}}; +static stringop_algs bdver2_memset[2] = { + {libcall, {{8, loop, false}, {24, unrolled_loop, false}, + {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}}, + {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false}, + {-1, libcall, false}}}}; + +const struct processor_costs bdver2_cost = { COSTS_N_INSNS (1), /* cost of an add instruction */ COSTS_N_INSNS (1), /* cost of a lea instruction */ COSTS_N_INSNS (1), /* variable shift costs */ @@ -992,17 +1059,8 @@ struct processor_costs bdver2_cost = { COSTS_N_INSNS (2), /* cost of FCHS instruction. */ COSTS_N_INSNS (52), /* cost of FSQRT instruction. */ - /* BDVER2 has optimized REP instruction for medium sized blocks, but for - very small blocks it is better to use loop. For large blocks, libcall - can do nontemporary accesses and beat inline considerably. */ - {{libcall, {{6, loop, false}, {14, unrolled_loop, false}, - {-1, rep_prefix_4_byte, false}}}, - {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false}, - {-1, libcall, false}}}}, - {{libcall, {{8, loop, false}, {24, unrolled_loop, false}, - {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}}, - {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false}, - {-1, libcall, false}}}}, + bdver2_memcpy, + bdver2_memset, 6, /* scalar_stmt_cost. */ 4, /* scalar load_cost. */ 4, /* scalar_store_cost. */ @@ -1016,6 +1074,20 @@ struct processor_costs bdver2_cost = { 1, /* cond_not_taken_branch_cost. */ }; + + /* BDVER3 has optimized REP instruction for medium sized blocks, but for + very small blocks it is better to use loop. For large blocks, libcall + can do nontemporary accesses and beat inline considerably. */ +static stringop_algs bdver3_memcpy[2] = { + {libcall, {{6, loop, false}, {14, unrolled_loop, false}, + {-1, rep_prefix_4_byte, false}}}, + {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false}, + {-1, libcall, false}}}}; +static stringop_algs bdver3_memset[2] = { + {libcall, {{8, loop, false}, {24, unrolled_loop, false}, + {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}}, + {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false}, + {-1, libcall, false}}}}; struct processor_costs bdver3_cost = { COSTS_N_INSNS (1), /* cost of an add instruction */ COSTS_N_INSNS (1), /* cost of a lea instruction */ @@ -1074,17 +1146,8 @@ struct processor_costs bdver3_cost = { COSTS_N_INSNS (2), /* cost of FCHS instruction. */ COSTS_N_INSNS (52), /* cost of FSQRT instruction. */ - /* BDVER3 has optimized REP instruction for medium sized blocks, but for - very small blocks it is better to use loop. For large blocks, libcall - can do nontemporary accesses and beat inline considerably. */ - {{libcall, {{6, loop, false}, {14, unrolled_loop, false}, - {-1, rep_prefix_4_byte, false}}}, - {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false}, - {-1, libcall, false}}}}, - {{libcall, {{8, loop, false}, {24, unrolled_loop, false}, - {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}}, - {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false}, - {-1, libcall, false}}}}, + bdver3_memcpy, + bdver3_memset, 6, /* scalar_stmt_cost. */ 4, /* scalar load_cost. */ 4, /* scalar_store_cost. */ @@ -1098,7 +1161,20 @@ struct processor_costs bdver3_cost = { 1, /* cond_not_taken_branch_cost. */ }; -struct processor_costs btver1_cost = { + /* BTVER1 has optimized REP instruction for medium sized blocks, but for + very small blocks it is better to use loop. For large blocks, libcall can + do nontemporary accesses and beat inline considerably. */ +static stringop_algs btver1_memcpy[2] = { + {libcall, {{6, loop, false}, {14, unrolled_loop, false}, + {-1, rep_prefix_4_byte, false}}}, + {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false}, + {-1, libcall, false}}}}; +static stringop_algs btver1_memset[2] = { + {libcall, {{8, loop, false}, {24, unrolled_loop, false}, + {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}}, + {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false}, + {-1, libcall, false}}}}; +const struct processor_costs btver1_cost = { COSTS_N_INSNS (1), /* cost of an add instruction */ COSTS_N_INSNS (2), /* cost of a lea instruction */ COSTS_N_INSNS (1), /* variable shift costs */ @@ -1159,17 +1235,8 @@ struct processor_costs btver1_cost = { COSTS_N_INSNS (2), /* cost of FCHS instruction. */ COSTS_N_INSNS (35), /* cost of FSQRT instruction. */ - /* BTVER1 has optimized REP instruction for medium sized blocks, but for - very small blocks it is better to use loop. For large blocks, libcall can - do nontemporary accesses and beat inline considerably. */ - {{libcall, {{6, loop, false}, {14, unrolled_loop, false}, - {-1, rep_prefix_4_byte, false}}}, - {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false}, - {-1, libcall, false}}}}, - {{libcall, {{8, loop, false}, {24, unrolled_loop, false}, - {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}}, - {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false}, - {-1, libcall, false}}}}, + btver1_memcpy, + btver1_memset, 4, /* scalar_stmt_cost. */ 2, /* scalar load_cost. */ 2, /* scalar_store_cost. */ @@ -1183,7 +1250,17 @@ struct processor_costs btver1_cost = { 1, /* cond_not_taken_branch_cost. */ }; -struct processor_costs btver2_cost = { +static stringop_algs btver2_memcpy[2] = { + {libcall, {{6, loop, false}, {14, unrolled_loop, false}, + {-1, rep_prefix_4_byte, false}}}, + {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false}, + {-1, libcall, false}}}}; +static stringop_algs btver2_memset[2] = { + {libcall, {{8, loop, false}, {24, unrolled_loop, false}, + {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}}, + {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false}, + {-1, libcall, false}}}}; +const struct processor_costs btver2_cost = { COSTS_N_INSNS (1), /* cost of an add instruction */ COSTS_N_INSNS (2), /* cost of a lea instruction */ COSTS_N_INSNS (1), /* variable shift costs */ @@ -1243,15 +1320,8 @@ struct processor_costs btver2_cost = { COSTS_N_INSNS (2), /* cost of FABS instruction. */ COSTS_N_INSNS (2), /* cost of FCHS instruction. */ COSTS_N_INSNS (35), /* cost of FSQRT instruction. */ - - {{libcall, {{6, loop, false}, {14, unrolled_loop, false}, - {-1, rep_prefix_4_byte, false}}}, - {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false}, - {-1, libcall, false}}}}, - {{libcall, {{8, loop, false}, {24, unrolled_loop, false}, - {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}}, - {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false}, - {-1, libcall, false}}}}, + btver2_memcpy, + btver2_memset, 4, /* scalar_stmt_cost. */ 2, /* scalar load_cost. */ 2, /* scalar_store_cost. */ @@ -1265,6 +1335,14 @@ struct processor_costs btver2_cost = { 1, /* cond_not_taken_branch_cost. */ }; +static stringop_algs pentium4_memcpy[2] = { + {libcall, {{12, loop_1_byte, false}, {-1, rep_prefix_4_byte, false}}}, + DUMMY_STRINGOP_ALGS}; +static stringop_algs pentium4_memset[2] = { + {libcall, {{6, loop_1_byte, false}, {48, loop, false}, + {20480, rep_prefix_4_byte, false}, {-1, libcall, false}}}, + DUMMY_STRINGOP_ALGS}; + static const struct processor_costs pentium4_cost = { COSTS_N_INSNS (1), /* cost of an add instruction */ @@ -1318,11 +1396,8 @@ struct processor_costs pentium4_cost = { COSTS_N_INSNS (2), /* cost of FABS instruction. */ COSTS_N_INSNS (2), /* cost of FCHS instruction. */ COSTS_N_INSNS (43), /* cost of FSQRT instruction. */ - {{libcall, {{12, loop_1_byte, false}, {-1, rep_prefix_4_byte, false}}}, - DUMMY_STRINGOP_ALGS}, - {{libcall, {{6, loop_1_byte, false}, {48, loop, false}, - {20480, rep_prefix_4_byte, false}, {-1, libcall, false}}}, - DUMMY_STRINGOP_ALGS}, + pentium4_memcpy, + pentium4_memset, 1, /* scalar_stmt_cost. */ 1, /* scalar load_cost. */ 1, /* scalar_store_cost. */ @@ -1336,6 +1411,17 @@ struct processor_costs pentium4_cost = { 1, /* cond_not_taken_branch_cost. */ }; +static stringop_algs nocona_memcpy[2] = { + {libcall, {{12, loop_1_byte, false}, {-1, rep_prefix_4_byte, false}}}, + {libcall, {{32, loop, false}, {20000, rep_prefix_8_byte, false}, + {100000, unrolled_loop, false}, {-1, libcall, false}}}}; + +static stringop_algs nocona_memset[2] = { + {libcall, {{6, loop_1_byte, false}, {48, loop, false}, + {20480, rep_prefix_4_byte, false}, {-1, libcall, false}}}, + {libcall, {{24, loop, false}, {64, unrolled_loop, false}, + {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}}; + static const struct processor_costs nocona_cost = { COSTS_N_INSNS (1), /* cost of an add instruction */ @@ -1389,13 +1475,8 @@ struct processor_costs nocona_cost = { COSTS_N_INSNS (3), /* cost of FABS instruction. */ COSTS_N_INSNS (3), /* cost of FCHS instruction. */ COSTS_N_INSNS (44), /* cost of FSQRT instruction. */ - {{libcall, {{12, loop_1_byte, false}, {-1, rep_prefix_4_byte, false}}}, - {libcall, {{32, loop, false}, {20000, rep_prefix_8_byte, false}, - {100000, unrolled_loop, false}, {-1, libcall, false}}}}, - {{libcall, {{6, loop_1_byte, false}, {48, loop, false}, - {20480, rep_prefix_4_byte, false}, {-1, libcall, false}}}, - {libcall, {{24, loop, false}, {64, unrolled_loop, false}, - {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}}, + nocona_memcpy, + nocona_memset, 1, /* scalar_stmt_cost. */ 1, /* scalar load_cost. */ 1, /* scalar_store_cost. */ @@ -1409,6 +1490,15 @@ struct processor_costs nocona_cost = { 1, /* cond_not_taken_branch_cost. */ }; +static stringop_algs atom_memcpy[2] = { + {libcall, {{11, loop, false}, {-1, rep_prefix_4_byte, false}}}, + {libcall, {{32, loop, false}, {64, rep_prefix_4_byte, false}, + {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}}; +static stringop_algs atom_memset[2] = { + {libcall, {{8, loop, false}, {15, unrolled_loop, false}, + {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}}, + {libcall, {{24, loop, false}, {32, unrolled_loop, false}, + {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}}; static const struct processor_costs atom_cost = { COSTS_N_INSNS (1), /* cost of an add instruction */ @@ -1462,13 +1552,8 @@ struct processor_costs atom_cost = { COSTS_N_INSNS (8), /* cost of FABS instruction. */ COSTS_N_INSNS (8), /* cost of FCHS instruction. */ COSTS_N_INSNS (40), /* cost of FSQRT instruction. */ - {{libcall, {{11, loop, false}, {-1, rep_prefix_4_byte, false}}}, - {libcall, {{32, loop, false}, {64, rep_prefix_4_byte, false}, - {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}}, - {{libcall, {{8, loop, false}, {15, unrolled_loop, false}, - {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}}, - {libcall, {{24, loop, false}, {32, unrolled_loop, false}, - {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}}, + atom_memcpy, + atom_memset, 1, /* scalar_stmt_cost. */ 1, /* scalar load_cost. */ 1, /* scalar_store_cost. */ @@ -1482,6 +1567,15 @@ struct processor_costs atom_cost = { 1, /* cond_not_taken_branch_cost. */ }; +static stringop_algs slm_memcpy[2] = { + {libcall, {{11, loop, false}, {-1, rep_prefix_4_byte, false}}}, + {libcall, {{32, loop, false}, {64, rep_prefix_4_byte, false}, + {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}}; +static stringop_algs slm_memset[2] = { + {libcall, {{8, loop, false}, {15, unrolled_loop, false}, + {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}}, + {libcall, {{24, loop, false}, {32, unrolled_loop, false}, + {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}}; static const struct processor_costs slm_cost = { COSTS_N_INSNS (1), /* cost of an add instruction */ @@ -1535,13 +1629,8 @@ struct processor_costs slm_cost = { COSTS_N_INSNS (8), /* cost of FABS instruction. */ COSTS_N_INSNS (8), /* cost of FCHS instruction. */ COSTS_N_INSNS (40), /* cost of FSQRT instruction. */ - {{libcall, {{11, loop, false}, {-1, rep_prefix_4_byte, false}}}, - {libcall, {{32, loop, false}, {64, rep_prefix_4_byte, false}, - {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}}, - {{libcall, {{8, loop, false}, {15, unrolled_loop, false}, - {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}}, - {libcall, {{24, loop, false}, {32, unrolled_loop, false}, - {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}}, + slm_memcpy, + slm_memset, 1, /* scalar_stmt_cost. */ 1, /* scalar load_cost. */ 1, /* scalar_store_cost. */ @@ -1556,6 +1645,15 @@ struct processor_costs slm_cost = { }; /* Generic64 should produce code tuned for Nocona and K8. */ + +static stringop_algs generic64_memcpy[2] = { + DUMMY_STRINGOP_ALGS, + {libcall, {{32, loop, false}, {8192, rep_prefix_8_byte, false}, + {-1, libcall, false}}}}; +static stringop_algs generic64_memset[2] = { + DUMMY_STRINGOP_ALGS, + {libcall, {{32, loop, false}, {8192, rep_prefix_8_byte, false}, + {-1, libcall, false}}}}; static const struct processor_costs generic64_cost = { COSTS_N_INSNS (1), /* cost of an add instruction */ @@ -1615,12 +1713,8 @@ struct processor_costs generic64_cost = { COSTS_N_INSNS (8), /* cost of FABS instruction. */ COSTS_N_INSNS (8), /* cost of FCHS instruction. */ COSTS_N_INSNS (40), /* cost of FSQRT instruction. */ - {DUMMY_STRINGOP_ALGS, - {libcall, {{32, loop, false}, {8192, rep_prefix_8_byte, false}, - {-1, libcall, false}}}}, - {DUMMY_STRINGOP_ALGS, - {libcall, {{32, loop, false}, {8192, rep_prefix_8_byte, false}, - {-1, libcall, false}}}}, + generic64_memcpy, + generic64_memset, 1, /* scalar_stmt_cost. */ 1, /* scalar load_cost. */ 1, /* scalar_store_cost. */ @@ -1635,6 +1729,18 @@ struct processor_costs generic64_cost = { }; /* core_cost should produce code tuned for Core familly of CPUs. */ +static stringop_algs core_memcpy[2] = { + {libcall, {{1024, rep_prefix_4_byte, true}, {-1, libcall, false}}}, + {libcall, {{24, loop, true}, {128, rep_prefix_8_byte, true}, + {-1, libcall, false}}}}; +static stringop_algs core_memset[2] = { + {libcall, {{6, loop_1_byte, true}, + {24, loop, true}, + {8192, rep_prefix_4_byte, true}, + {-1, libcall, false}}}, + {libcall, {{24, loop, true}, {512, rep_prefix_8_byte, true}, + {-1, libcall, false}}}}; + static const struct processor_costs core_cost = { COSTS_N_INSNS (1), /* cost of an add instruction */ @@ -1693,15 +1799,8 @@ struct processor_costs core_cost = { COSTS_N_INSNS (8), /* cost of FABS instruction. */ COSTS_N_INSNS (8), /* cost of FCHS instruction. */ COSTS_N_INSNS (40), /* cost of FSQRT instruction. */ - {{libcall, {{1024, rep_prefix_4_byte, true}, {-1, libcall, false}}}, - {libcall, {{24, loop, true}, {128, rep_prefix_8_byte, true}, - {-1, libcall, false}}}}, - {{libcall, {{6, loop_1_byte, true}, - {24, loop, true}, - {8192, rep_prefix_4_byte, true}, - {-1, libcall, false}}}, - {libcall, {{24, loop, true}, {512, rep_prefix_8_byte, true}, - {-1, libcall, false}}}}, + core_memcpy, + core_memset, 1, /* scalar_stmt_cost. */ 1, /* scalar load_cost. */ 1, /* scalar_store_cost. */ @@ -1717,6 +1816,14 @@ struct processor_costs core_cost = { /* Generic32 should produce code tuned for PPro, Pentium4, Nocona, Athlon and K8. */ +static stringop_algs generic32_memcpy[2] = { + {libcall, {{32, loop, false}, {8192, rep_prefix_4_byte, false}, + {-1, libcall, false}}}, + DUMMY_STRINGOP_ALGS}; +static stringop_algs generic32_memset[2] = { + {libcall, {{32, loop, false}, {8192, rep_prefix_4_byte, false}, + {-1, libcall, false}}}, + DUMMY_STRINGOP_ALGS}; static const struct processor_costs generic32_cost = { COSTS_N_INSNS (1), /* cost of an add instruction */ @@ -1770,12 +1877,8 @@ struct processor_costs generic32_cost = { COSTS_N_INSNS (8), /* cost of FABS instruction. */ COSTS_N_INSNS (8), /* cost of FCHS instruction. */ COSTS_N_INSNS (40), /* cost of FSQRT instruction. */ - {{libcall, {{32, loop, false}, {8192, rep_prefix_4_byte, false}, - {-1, libcall, false}}}, - DUMMY_STRINGOP_ALGS}, - {{libcall, {{32, loop, false}, {8192, rep_prefix_4_byte, false}, - {-1, libcall, false}}}, - DUMMY_STRINGOP_ALGS}, + generic32_memcpy, + generic32_memset, 1, /* scalar_stmt_cost. */ 1, /* scalar load_cost. */ 1, /* scalar_store_cost. */ @@ -1833,287 +1936,23 @@ const struct processor_costs *ix86_cost = &pentium_cost; (PPro/PENT4/NOCONA/CORE2/Athlon/K8). */ #define m_GENERIC (m_GENERIC32 | m_GENERIC64) +const char* ix86_tune_feature_names[X86_TUNE_LAST] = { +#undef DEF_TUNE +#define DEF_TUNE(tune, name, selector) name, +#include "x86-tune.def" +#undef DEF_TUNE +}; + /* Feature tests against the various tunings. */ unsigned char ix86_tune_features[X86_TUNE_LAST]; /* Feature tests against the various tunings used to create ix86_tune_features based on the processor mask. */ static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = { - /* X86_TUNE_USE_LEAVE: Leave does not affect Nocona SPEC2000 results - negatively, so enabling for Generic64 seems like good code size - tradeoff. We can't enable it for 32bit generic because it does not - work well with PPro base chips. */ - m_386 | m_CORE_ALL | m_K6_GEODE | m_AMD_MULTIPLE | m_GENERIC64, - - /* X86_TUNE_PUSH_MEMORY */ - m_386 | m_P4_NOCONA | m_CORE_ALL | m_K6_GEODE | m_AMD_MULTIPLE | m_GENERIC, - - /* X86_TUNE_ZERO_EXTEND_WITH_AND */ - m_486 | m_PENT, - - /* X86_TUNE_UNROLL_STRLEN */ - m_486 | m_PENT | m_PPRO | m_ATOM | m_SLM | m_CORE_ALL | m_K6 | m_AMD_MULTIPLE | m_GENERIC, - - /* X86_TUNE_BRANCH_PREDICTION_HINTS: Branch hints were put in P4 based - on simulation result. But after P4 was made, no performance benefit - was observed with branch hints. It also increases the code size. - As a result, icc never generates branch hints. */ - 0, - - /* X86_TUNE_DOUBLE_WITH_ADD */ - ~m_386, - - /* X86_TUNE_USE_SAHF */ - m_PPRO | m_P4_NOCONA | m_CORE_ALL | m_ATOM | m_SLM | m_K6_GEODE | m_K8 | m_AMDFAM10 | m_BDVER | m_BTVER | m_GENERIC, - - /* X86_TUNE_MOVX: Enable to zero extend integer registers to avoid - partial dependencies. */ - m_PPRO | m_P4_NOCONA | m_CORE_ALL | m_ATOM | m_SLM | m_GEODE | m_AMD_MULTIPLE | m_GENERIC, - - /* X86_TUNE_PARTIAL_REG_STALL: We probably ought to watch for partial - register stalls on Generic32 compilation setting as well. However - in current implementation the partial register stalls are not eliminated - very well - they can be introduced via subregs synthesized by combine - and can happen in caller/callee saving sequences. Because this option - pays back little on PPro based chips and is in conflict with partial reg - dependencies used by Athlon/P4 based chips, it is better to leave it off - for generic32 for now. */ - m_PPRO, - - /* X86_TUNE_PARTIAL_FLAG_REG_STALL */ - m_CORE_ALL | m_GENERIC, - - /* X86_TUNE_LCP_STALL: Avoid an expensive length-changing prefix stall - * on 16-bit immediate moves into memory on Core2 and Corei7. */ - m_CORE_ALL | m_GENERIC, - - /* X86_TUNE_USE_HIMODE_FIOP */ - m_386 | m_486 | m_K6_GEODE, - - /* X86_TUNE_USE_SIMODE_FIOP */ - ~(m_PENT | m_PPRO | m_CORE_ALL | m_ATOM | m_SLM | m_AMD_MULTIPLE | m_GENERIC), - - /* X86_TUNE_USE_MOV0 */ - m_K6, - - /* X86_TUNE_USE_CLTD */ - ~(m_PENT | m_ATOM | m_SLM | m_K6), - - /* X86_TUNE_USE_XCHGB: Use xchgb %rh,%rl instead of rolw/rorw $8,rx. */ - m_PENT4, - - /* X86_TUNE_SPLIT_LONG_MOVES */ - m_PPRO, - - /* X86_TUNE_READ_MODIFY_WRITE */ - ~m_PENT, - - /* X86_TUNE_READ_MODIFY */ - ~(m_PENT | m_PPRO), - - /* X86_TUNE_PROMOTE_QIMODE */ - m_386 | m_486 | m_PENT | m_CORE_ALL | m_ATOM | m_SLM | m_K6_GEODE | m_AMD_MULTIPLE | m_GENERIC, - - /* X86_TUNE_FAST_PREFIX */ - ~(m_386 | m_486 | m_PENT), - - /* X86_TUNE_SINGLE_STRINGOP */ - m_386 | m_P4_NOCONA, - - /* X86_TUNE_QIMODE_MATH */ - ~0, - - /* X86_TUNE_HIMODE_MATH: On PPro this flag is meant to avoid partial - register stalls. Just like X86_TUNE_PARTIAL_REG_STALL this option - might be considered for Generic32 if our scheme for avoiding partial - stalls was more effective. */ - ~m_PPRO, - - /* X86_TUNE_PROMOTE_QI_REGS */ - 0, - - /* X86_TUNE_PROMOTE_HI_REGS */ - m_PPRO, - - /* X86_TUNE_SINGLE_POP: Enable if single pop insn is preferred - over esp addition. */ - m_386 | m_486 | m_PENT | m_PPRO, - - /* X86_TUNE_DOUBLE_POP: Enable if double pop insn is preferred - over esp addition. */ - m_PENT, - - /* X86_TUNE_SINGLE_PUSH: Enable if single push insn is preferred - over esp subtraction. */ - m_386 | m_486 | m_PENT | m_K6_GEODE, - - /* X86_TUNE_DOUBLE_PUSH. Enable if double push insn is preferred - over esp subtraction. */ - m_PENT | m_K6_GEODE, - - /* X86_TUNE_INTEGER_DFMODE_MOVES: Enable if integer moves are preferred - for DFmode copies */ - ~(m_PPRO | m_P4_NOCONA | m_CORE_ALL | m_ATOM | m_SLM | m_GEODE | m_AMD_MULTIPLE | m_GENERIC), - - /* X86_TUNE_PARTIAL_REG_DEPENDENCY */ - m_P4_NOCONA | m_CORE_ALL | m_ATOM | m_SLM | m_AMD_MULTIPLE | m_GENERIC, - - /* X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY: In the Generic model we have a - conflict here in between PPro/Pentium4 based chips that thread 128bit - SSE registers as single units versus K8 based chips that divide SSE - registers to two 64bit halves. This knob promotes all store destinations - to be 128bit to allow register renaming on 128bit SSE units, but usually - results in one extra microop on 64bit SSE units. Experimental results - shows that disabling this option on P4 brings over 20% SPECfp regression, - while enabling it on K8 brings roughly 2.4% regression that can be partly - masked by careful scheduling of moves. */ - m_PPRO | m_P4_NOCONA | m_CORE_ALL | m_ATOM | m_SLM | m_AMDFAM10 | m_BDVER | m_GENERIC, - - /* X86_TUNE_SSE_UNALIGNED_LOAD_OPTIMAL */ - m_COREI7 | m_AMDFAM10 | m_BDVER | m_BTVER | m_SLM, - - /* X86_TUNE_SSE_UNALIGNED_STORE_OPTIMAL */ - m_COREI7 | m_BDVER | m_SLM, - - /* X86_TUNE_SSE_PACKED_SINGLE_INSN_OPTIMAL */ - m_BDVER , - - /* X86_TUNE_SSE_SPLIT_REGS: Set for machines where the type and dependencies - are resolved on SSE register parts instead of whole registers, so we may - maintain just lower part of scalar values in proper format leaving the - upper part undefined. */ - m_ATHLON_K8, - - /* X86_TUNE_SSE_TYPELESS_STORES */ - m_AMD_MULTIPLE, - - /* X86_TUNE_SSE_LOAD0_BY_PXOR */ - m_PPRO | m_P4_NOCONA, - - /* X86_TUNE_MEMORY_MISMATCH_STALL */ - m_P4_NOCONA | m_CORE_ALL | m_ATOM | m_SLM | m_AMD_MULTIPLE | m_GENERIC, - - /* X86_TUNE_PROLOGUE_USING_MOVE */ - m_PPRO | m_ATHLON_K8, - - /* X86_TUNE_EPILOGUE_USING_MOVE */ - m_PPRO | m_ATHLON_K8, - - /* X86_TUNE_SHIFT1 */ - ~m_486, - - /* X86_TUNE_USE_FFREEP */ - m_AMD_MULTIPLE, - - /* X86_TUNE_INTER_UNIT_MOVES_TO_VEC */ - ~(m_AMD_MULTIPLE | m_GENERIC), - - /* X86_TUNE_INTER_UNIT_MOVES_FROM_VEC */ - ~m_ATHLON_K8, - - /* X86_TUNE_INTER_UNIT_CONVERSIONS */ - ~(m_AMDFAM10 | m_BDVER ), - - /* X86_TUNE_FOUR_JUMP_LIMIT: Some CPU cores are not able to predict more - than 4 branch instructions in the 16 byte window. */ - m_PPRO | m_P4_NOCONA | m_CORE_ALL | m_ATOM | m_SLM | m_AMD_MULTIPLE | m_GENERIC, - - /* X86_TUNE_SCHEDULE */ - m_PENT | m_PPRO | m_CORE_ALL | m_ATOM | m_SLM | m_K6_GEODE | m_AMD_MULTIPLE | m_GENERIC, - - /* X86_TUNE_USE_BT */ - m_CORE_ALL | m_ATOM | m_SLM | m_AMD_MULTIPLE | m_GENERIC, - - /* X86_TUNE_USE_INCDEC */ - ~(m_P4_NOCONA | m_CORE_ALL | m_ATOM | m_SLM | m_GENERIC), - - /* X86_TUNE_PAD_RETURNS */ - m_CORE_ALL | m_AMD_MULTIPLE | m_GENERIC, - - /* X86_TUNE_PAD_SHORT_FUNCTION: Pad short function. */ - m_ATOM, - - /* X86_TUNE_EXT_80387_CONSTANTS */ - m_PPRO | m_P4_NOCONA | m_CORE_ALL | m_ATOM | m_SLM | m_K6_GEODE | m_ATHLON_K8 | m_GENERIC, - - /* X86_TUNE_AVOID_VECTOR_DECODE */ - m_CORE_ALL | m_K8 | m_GENERIC64, - - /* X86_TUNE_PROMOTE_HIMODE_IMUL: Modern CPUs have same latency for HImode - and SImode multiply, but 386 and 486 do HImode multiply faster. */ - ~(m_386 | m_486), - - /* X86_TUNE_SLOW_IMUL_IMM32_MEM: Imul of 32-bit constant and memory is - vector path on AMD machines. */ - m_CORE_ALL | m_K8 | m_AMDFAM10 | m_BDVER | m_BTVER | m_GENERIC64, - - /* X86_TUNE_SLOW_IMUL_IMM8: Imul of 8-bit constant is vector path on AMD - machines. */ - m_CORE_ALL | m_K8 | m_AMDFAM10 | m_BDVER | m_BTVER | m_GENERIC64, - - /* X86_TUNE_MOVE_M1_VIA_OR: On pentiums, it is faster to load -1 via OR - than a MOV. */ - m_PENT, - - /* X86_TUNE_NOT_UNPAIRABLE: NOT is not pairable on Pentium, while XOR is, - but one byte longer. */ - m_PENT, - - /* X86_TUNE_NOT_VECTORMODE: On AMD K6, NOT is vector decoded with memory - operand that cannot be represented using a modRM byte. The XOR - replacement is long decoded, so this split helps here as well. */ - m_K6, - - /* X86_TUNE_USE_VECTOR_FP_CONVERTS: Prefer vector packed SSE conversion - from FP to FP. */ - m_CORE_ALL | m_AMDFAM10 | m_GENERIC, - - /* X86_TUNE_USE_VECTOR_CONVERTS: Prefer vector packed SSE conversion - from integer to FP. */ - m_AMDFAM10, - - /* X86_TUNE_FUSE_CMP_AND_BRANCH: Fuse a compare or test instruction - with a subsequent conditional jump instruction into a single - compare-and-branch uop. */ - m_BDVER, - - /* X86_TUNE_OPT_AGU: Optimize for Address Generation Unit. This flag - will impact LEA instruction selection. */ - m_ATOM | m_SLM, - - /* X86_TUNE_VECTORIZE_DOUBLE: Enable double precision vector - instructions. */ - ~m_ATOM, - - /* X86_TUNE_SOFTWARE_PREFETCHING_BENEFICIAL: Enable software prefetching - at -O3. For the moment, the prefetching seems badly tuned for Intel - chips. */ - m_K6_GEODE | m_AMD_MULTIPLE, - - /* X86_TUNE_AVX128_OPTIMAL: Enable 128-bit AVX instruction generation for - the auto-vectorizer. */ - m_BDVER | m_BTVER2, - - /* X86_TUNE_REASSOC_INT_TO_PARALLEL: Try to produce parallel computations - during reassociation of integer computation. */ - m_ATOM, - - /* X86_TUNE_REASSOC_FP_TO_PARALLEL: Try to produce parallel computations - during reassociation of fp computation. */ - m_ATOM | m_SLM | m_HASWELL | m_BDVER1 | m_BDVER2, - - /* X86_TUNE_GENERAL_REGS_SSE_SPILL: Try to spill general regs to SSE - regs instead of memory. */ - m_CORE_ALL, - - /* X86_TUNE_AVOID_MEM_OPND_FOR_CMOVE: Try to avoid memory operands for - a conditional move. */ - m_ATOM, - - /* X86_TUNE_SPLIT_MEM_OPND_FOR_FP_CONVERTS: Try to split memory operand for - fp converts to destination register. */ - m_SLM - +#undef DEF_TUNE +#define DEF_TUNE(tune, name, selector) selector, +#include "x86-tune.def" +#undef DEF_TUNE }; /* Feature tests against the various architecture variations. */ @@ -2188,6 +2027,11 @@ enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] = /* SSE REX registers */ SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, + /* AVX-512 SSE registers */ + EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, + EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, + EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, + EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, }; /* The "default" register map used in 32bit mode. */ @@ -2201,6 +2045,8 @@ int const dbx_register_map[FIRST_PSEUDO_REGISTER] = 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */ -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */ -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */ + -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 16-23*/ + -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 24-31*/ }; /* The "default" register map used in 64bit mode. */ @@ -2214,6 +2060,8 @@ int const dbx64_register_map[FIRST_PSEUDO_REGISTER] = 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */ 8,9,10,11,12,13,14,15, /* extended integer registers */ 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */ + 67, 68, 69, 70, 71, 72, 73, 74, /* AVX-512 registers 16-23 */ + 75, 76, 77, 78, 79, 80, 81, 82, /* AVX-512 registers 24-31 */ }; /* Define the register numbers to be used in Dwarf debugging information. @@ -2279,6 +2127,8 @@ int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] = 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */ -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */ -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */ + -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 16-23*/ + -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 24-31*/ }; /* Define parameter passing and return registers. */ @@ -2471,7 +2321,6 @@ enum ix86_function_specific_strings static char *ix86_target_string (HOST_WIDE_INT, int, const char *, const char *, enum fpmath_unit, bool); -static void ix86_debug_options (void) ATTRIBUTE_UNUSED; static void ix86_function_specific_save (struct cl_target_option *); static void ix86_function_specific_restore (struct cl_target_option *); static void ix86_function_specific_print (FILE *, int, @@ -2578,7 +2427,7 @@ static const char *const cpu_names[TARGET_CPU_DEFAULT_max] = static bool gate_insert_vzeroupper (void) { - return TARGET_AVX && TARGET_VZEROUPPER; + return TARGET_AVX && !TARGET_AVX512F && TARGET_VZEROUPPER; } static unsigned int @@ -2596,31 +2445,48 @@ rest_of_handle_insert_vzeroupper (void) ix86_optimize_mode_switching[AVX_U128] = 1; /* Call optimize_mode_switching. */ - pass_mode_switching.pass.execute (); + g->get_passes ()->execute_pass_mode_switching (); return 0; } -struct rtl_opt_pass pass_insert_vzeroupper = +namespace { + +const pass_data pass_data_insert_vzeroupper = { - { - RTL_PASS, - "vzeroupper", /* name */ - OPTGROUP_NONE, /* optinfo_flags */ - gate_insert_vzeroupper, /* gate */ - rest_of_handle_insert_vzeroupper, /* execute */ - NULL, /* sub */ - NULL, /* next */ - 0, /* static_pass_number */ - TV_NONE, /* tv_id */ - 0, /* properties_required */ - 0, /* properties_provided */ - 0, /* properties_destroyed */ - 0, /* todo_flags_start */ - TODO_df_finish | TODO_verify_rtl_sharing | - 0, /* todo_flags_finish */ - } + RTL_PASS, /* type */ + "vzeroupper", /* name */ + OPTGROUP_NONE, /* optinfo_flags */ + true, /* has_gate */ + true, /* has_execute */ + TV_NONE, /* tv_id */ + 0, /* properties_required */ + 0, /* properties_provided */ + 0, /* properties_destroyed */ + 0, /* todo_flags_start */ + ( TODO_df_finish | TODO_verify_rtl_sharing | 0 ), /* todo_flags_finish */ }; +class pass_insert_vzeroupper : public rtl_opt_pass +{ +public: + pass_insert_vzeroupper(gcc::context *ctxt) + : rtl_opt_pass(pass_data_insert_vzeroupper, ctxt) + {} + + /* opt_pass methods: */ + bool gate () { return gate_insert_vzeroupper (); } + unsigned int execute () { return rest_of_handle_insert_vzeroupper (); } + +}; // class pass_insert_vzeroupper + +} // anon namespace + +rtl_opt_pass * +make_pass_insert_vzeroupper (gcc::context *ctxt) +{ + return new pass_insert_vzeroupper (ctxt); +} + /* Return true if a red-zone is in use. */ static inline bool @@ -2651,6 +2517,10 @@ ix86_target_string (HOST_WIDE_INT isa, int flags, const char *arch, { "-mfma", OPTION_MASK_ISA_FMA }, { "-mxop", OPTION_MASK_ISA_XOP }, { "-mlwp", OPTION_MASK_ISA_LWP }, + { "-mavx512f", OPTION_MASK_ISA_AVX512F }, + { "-mavx512er", OPTION_MASK_ISA_AVX512ER }, + { "-mavx512cd", OPTION_MASK_ISA_AVX512CD }, + { "-mavx512pf", OPTION_MASK_ISA_AVX512PF }, { "-msse4a", OPTION_MASK_ISA_SSE4A }, { "-msse4.2", OPTION_MASK_ISA_SSE4_2 }, { "-msse4.1", OPTION_MASK_ISA_SSE4_1 }, @@ -2883,7 +2753,7 @@ ix86_profile_before_prologue (void) /* Function that is callable from the debugger to print the current options. */ -void +void ATTRIBUTE_UNUSED ix86_debug_options (void) { char *opts = ix86_target_string (ix86_isa_flags, target_flags, @@ -2900,7 +2770,222 @@ ix86_debug_options (void) return; } + +static const char *stringop_alg_names[] = { +#define DEF_ENUM +#define DEF_ALG(alg, name) #name, +#include "stringop.def" +#undef DEF_ENUM +#undef DEF_ALG +}; + +/* Parse parameter string passed to -mmemcpy-strategy= or -mmemset-strategy=. + The string is of the following form (or comma separated list of it): + + strategy_alg:max_size:[align|noalign] + + where the full size range for the strategy is either [0, max_size] or + [min_size, max_size], in which min_size is the max_size + 1 of the + preceding range. The last size range must have max_size == -1. + + Examples: + + 1. + -mmemcpy-strategy=libcall:-1:noalign + + this is equivalent to (for known size memcpy) -mstringop-strategy=libcall + + + 2. + -mmemset-strategy=rep_8byte:16:noalign,vector_loop:2048:align,libcall:-1:noalign + + This is to tell the compiler to use the following strategy for memset + 1) when the expected size is between [1, 16], use rep_8byte strategy; + 2) when the size is between [17, 2048], use vector_loop; + 3) when the size is > 2048, use libcall. */ + +struct stringop_size_range +{ + int max; + stringop_alg alg; + bool noalign; +}; + +static void +ix86_parse_stringop_strategy_string (char *strategy_str, bool is_memset) +{ + const struct stringop_algs *default_algs; + stringop_size_range input_ranges[MAX_STRINGOP_ALGS]; + char *curr_range_str, *next_range_str; + int i = 0, n = 0; + + if (is_memset) + default_algs = &ix86_cost->memset[TARGET_64BIT != 0]; + else + default_algs = &ix86_cost->memcpy[TARGET_64BIT != 0]; + + curr_range_str = strategy_str; + + do + { + int maxs; + stringop_alg alg; + char alg_name[128]; + char align[16]; + next_range_str = strchr (curr_range_str, ','); + if (next_range_str) + *next_range_str++ = '\0'; + + if (3 != sscanf (curr_range_str, "%20[^:]:%d:%10s", + alg_name, &maxs, align)) + { + error ("wrong arg %s to option %s", curr_range_str, + is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy="); + return; + } + + if (n > 0 && (maxs < (input_ranges[n - 1].max + 1) && maxs != -1)) + { + error ("size ranges of option %s should be increasing", + is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy="); + return; + } + + for (i = 0; i < last_alg; i++) + { + if (!strcmp (alg_name, stringop_alg_names[i])) + { + alg = (stringop_alg) i; + break; + } + } + + if (i == last_alg) + { + error ("wrong stringop strategy name %s specified for option %s", + alg_name, + is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy="); + return; + } + + input_ranges[n].max = maxs; + input_ranges[n].alg = alg; + if (!strcmp (align, "align")) + input_ranges[n].noalign = false; + else if (!strcmp (align, "noalign")) + input_ranges[n].noalign = true; + else + { + error ("unknown alignment %s specified for option %s", + align, is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy="); + return; + } + n++; + curr_range_str = next_range_str; + } + while (curr_range_str); + + if (input_ranges[n - 1].max != -1) + { + error ("the max value for the last size range should be -1" + " for option %s", + is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy="); + return; + } + + if (n > MAX_STRINGOP_ALGS) + { + error ("too many size ranges specified in option %s", + is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy="); + return; + } + + /* Now override the default algs array. */ + for (i = 0; i < n; i++) + { + *const_cast(&default_algs->size[i].max) = input_ranges[i].max; + *const_cast(&default_algs->size[i].alg) + = input_ranges[i].alg; + *const_cast(&default_algs->size[i].noalign) + = input_ranges[i].noalign; + } +} + +/* parse -mtune-ctrl= option. When DUMP is true, + print the features that are explicitly set. */ + +static void +parse_mtune_ctrl_str (bool dump) +{ + if (!ix86_tune_ctrl_string) + return; + + char *next_feature_string = NULL; + char *curr_feature_string = xstrdup (ix86_tune_ctrl_string); + char *orig = curr_feature_string; + int i; + do + { + bool clear = false; + + next_feature_string = strchr (curr_feature_string, ','); + if (next_feature_string) + *next_feature_string++ = '\0'; + if (*curr_feature_string == '^') + { + curr_feature_string++; + clear = true; + } + for (i = 0; i < X86_TUNE_LAST; i++) + { + if (!strcmp (curr_feature_string, ix86_tune_feature_names[i])) + { + ix86_tune_features[i] = !clear; + if (dump) + fprintf (stderr, "Explicitly %s feature %s\n", + clear ? "clear" : "set", ix86_tune_feature_names[i]); + break; + } + } + if (i == X86_TUNE_LAST) + error ("Unknown parameter to option -mtune-ctrl: %s", + clear ? curr_feature_string - 1 : curr_feature_string); + curr_feature_string = next_feature_string; + } + while (curr_feature_string); + free (orig); +} + +/* Helper function to set ix86_tune_features. IX86_TUNE is the + processor type. */ + +static void +set_ix86_tune_features (enum processor_type ix86_tune, bool dump) +{ + unsigned int ix86_tune_mask = 1u << ix86_tune; + int i; + + for (i = 0; i < X86_TUNE_LAST; ++i) + { + if (ix86_tune_no_default) + ix86_tune_features[i] = 0; + else + ix86_tune_features[i] = !!(initial_ix86_tune_features[i] & ix86_tune_mask); + } + + if (dump) + { + fprintf (stderr, "List of x86 specific tuning parameter names:\n"); + for (i = 0; i < X86_TUNE_LAST; i++) + fprintf (stderr, "%s : %s\n", ix86_tune_feature_names[i], + ix86_tune_features[i] ? "on" : "off"); + } + + parse_mtune_ctrl_str (dump); +} + + /* Override various settings based on options. If MAIN_ARGS_P, the options are from the command line, otherwise they are from attributes. */ @@ -2955,6 +3040,10 @@ ix86_option_override_internal (bool main_args_p) #define PTA_FXSR (HOST_WIDE_INT_1 << 37) #define PTA_XSAVE (HOST_WIDE_INT_1 << 38) #define PTA_XSAVEOPT (HOST_WIDE_INT_1 << 39) +#define PTA_AVX512F (HOST_WIDE_INT_1 << 40) +#define PTA_AVX512ER (HOST_WIDE_INT_1 << 41) +#define PTA_AVX512PF (HOST_WIDE_INT_1 << 42) +#define PTA_AVX512CD (HOST_WIDE_INT_1 << 43) /* if this reaches 64, need to widen struct pta flags below */ @@ -3476,6 +3565,18 @@ ix86_option_override_internal (bool main_args_p) if (processor_alias_table[i].flags & PTA_XSAVEOPT && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVEOPT)) ix86_isa_flags |= OPTION_MASK_ISA_XSAVEOPT; + if (processor_alias_table[i].flags & PTA_AVX512F + && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512F)) + ix86_isa_flags |= OPTION_MASK_ISA_AVX512F; + if (processor_alias_table[i].flags & PTA_AVX512ER + && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512ER)) + ix86_isa_flags |= OPTION_MASK_ISA_AVX512ER; + if (processor_alias_table[i].flags & PTA_AVX512PF + && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512PF)) + ix86_isa_flags |= OPTION_MASK_ISA_AVX512PF; + if (processor_alias_table[i].flags & PTA_AVX512CD + && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512CD)) + ix86_isa_flags |= OPTION_MASK_ISA_AVX512CD; if (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE)) x86_prefetch_sse = true; @@ -3546,9 +3647,7 @@ ix86_option_override_internal (bool main_args_p) error ("bad value (%s) for %stune=%s %s", ix86_tune_string, prefix, suffix, sw); - ix86_tune_mask = 1u << ix86_tune; - for (i = 0; i < X86_TUNE_LAST; ++i) - ix86_tune_features[i] = !!(initial_ix86_tune_features[i] & ix86_tune_mask); + set_ix86_tune_features (ix86_tune, ix86_dump_tunes); #ifndef USE_IX86_FRAME_POINTER #define USE_IX86_FRAME_POINTER 0 @@ -3784,6 +3883,7 @@ ix86_option_override_internal (bool main_args_p) gcc_unreachable (); } + ix86_tune_mask = 1u << ix86_tune; if ((!USE_IX86_FRAME_POINTER || (x86_accumulate_outgoing_args & ix86_tune_mask)) && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS) @@ -3866,24 +3966,19 @@ ix86_option_override_internal (bool main_args_p) ix86_gen_leave = gen_leave_rex64; if (Pmode == DImode) { - ix86_gen_monitor = gen_sse3_monitor64_di; ix86_gen_tls_global_dynamic_64 = gen_tls_global_dynamic_64_di; ix86_gen_tls_local_dynamic_base_64 = gen_tls_local_dynamic_base_64_di; } else { - ix86_gen_monitor = gen_sse3_monitor64_si; ix86_gen_tls_global_dynamic_64 = gen_tls_global_dynamic_64_si; ix86_gen_tls_local_dynamic_base_64 = gen_tls_local_dynamic_base_64_si; } } else - { - ix86_gen_leave = gen_leave; - ix86_gen_monitor = gen_sse3_monitor; - } + ix86_gen_leave = gen_leave; if (Pmode == DImode) { @@ -3895,6 +3990,7 @@ ix86_option_override_internal (bool main_args_p) ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_probe_di; ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probedi; ix86_gen_probe_stack_range = gen_probe_stack_rangedi; + ix86_gen_monitor = gen_sse3_monitor_di; } else { @@ -3906,6 +4002,7 @@ ix86_option_override_internal (bool main_args_p) ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_probe_si; ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probesi; ix86_gen_probe_stack_range = gen_probe_stack_rangesi; + ix86_gen_monitor = gen_sse3_monitor_si; } #ifdef USE_IX86_CLD @@ -3940,22 +4037,22 @@ ix86_option_override_internal (bool main_args_p) TARGET_AVX with -fexpensive-optimizations and split 32-byte AVX unaligned load/store. */ if (!optimize_size) - { - if (flag_expensive_optimizations - && !(target_flags_explicit & MASK_VZEROUPPER)) + { + if (flag_expensive_optimizations + && !(target_flags_explicit & MASK_VZEROUPPER)) target_flags |= MASK_VZEROUPPER; - if ((x86_avx256_split_unaligned_load & ix86_tune_mask) - && !(target_flags_explicit & MASK_AVX256_SPLIT_UNALIGNED_LOAD)) + if ((x86_avx256_split_unaligned_load & ix86_tune_mask) + && !(target_flags_explicit & MASK_AVX256_SPLIT_UNALIGNED_LOAD)) target_flags |= MASK_AVX256_SPLIT_UNALIGNED_LOAD; - if ((x86_avx256_split_unaligned_store & ix86_tune_mask) - && !(target_flags_explicit & MASK_AVX256_SPLIT_UNALIGNED_STORE)) + if ((x86_avx256_split_unaligned_store & ix86_tune_mask) + && !(target_flags_explicit & MASK_AVX256_SPLIT_UNALIGNED_STORE)) target_flags |= MASK_AVX256_SPLIT_UNALIGNED_STORE; - /* Enable 128-bit AVX instruction generation - for the auto-vectorizer. */ - if (TARGET_AVX128_OPTIMAL - && !(target_flags_explicit & MASK_PREFER_AVX128)) + /* Enable 128-bit AVX instruction generation + for the auto-vectorizer. */ + if (TARGET_AVX128_OPTIMAL + && !(target_flags_explicit & MASK_PREFER_AVX128)) target_flags |= MASK_PREFER_AVX128; - } + } if (ix86_recip_name) { @@ -4021,6 +4118,21 @@ ix86_option_override_internal (bool main_args_p) /* Handle stack protector */ if (!global_options_set.x_ix86_stack_protector_guard) ix86_stack_protector_guard = TARGET_HAS_BIONIC ? SSP_GLOBAL : SSP_TLS; + + /* Handle -mmemcpy-strategy= and -mmemset-strategy= */ + if (ix86_tune_memcpy_strategy) + { + char *str = xstrdup (ix86_tune_memcpy_strategy); + ix86_parse_stringop_strategy_string (str, false); + free (str); + } + + if (ix86_tune_memset_strategy) + { + char *str = xstrdup (ix86_tune_memset_strategy); + ix86_parse_stringop_strategy_string (str, true); + free (str); + } } /* Implement the TARGET_OPTION_OVERRIDE hook. */ @@ -4028,8 +4140,9 @@ ix86_option_override_internal (bool main_args_p) static void ix86_option_override (void) { + opt_pass *pass_insert_vzeroupper = make_pass_insert_vzeroupper (g); static struct register_pass_info insert_vzeroupper_info - = { &pass_insert_vzeroupper.pass, "reload", + = { pass_insert_vzeroupper, "reload", 1, PASS_POS_INSERT_AFTER }; @@ -4060,6 +4173,8 @@ ix86_conditional_register_usage (void) fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = ""; for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++) fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = ""; + for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++) + fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = ""; } /* See the definition of CALL_USED_REGISTERS in i386.h. */ @@ -4100,6 +4215,11 @@ ix86_conditional_register_usage (void) for (i = 0; i < FIRST_PSEUDO_REGISTER; i++) if (TEST_HARD_REG_BIT (reg_class_contents[(int)FLOAT_REGS], i)) fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = ""; + + /* If AVX512F is disabled, squash the registers. */ + if (! TARGET_AVX512F) + for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++) + fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = ""; } @@ -4133,7 +4253,7 @@ ix86_function_specific_restore (struct cl_target_option *ptr) { enum processor_type old_tune = ix86_tune; enum processor_type old_arch = ix86_arch; - unsigned int ix86_arch_mask, ix86_tune_mask; + unsigned int ix86_arch_mask; int i; ix86_arch = (enum processor_type) ptr->arch; @@ -4157,12 +4277,7 @@ ix86_function_specific_restore (struct cl_target_option *ptr) /* Recreate the tune optimization tests */ if (old_tune != ix86_tune) - { - ix86_tune_mask = 1u << ix86_tune; - for (i = 0; i < X86_TUNE_LAST; ++i) - ix86_tune_features[i] - = !!(initial_ix86_tune_features[i] & ix86_tune_mask); - } + set_ix86_tune_features (ix86_tune, false); } /* Print the current options */ @@ -4244,6 +4359,10 @@ ix86_valid_target_attribute_inner_p (tree args, char *p_strings[], IX86_ATTR_ISA ("aes", OPT_maes), IX86_ATTR_ISA ("avx", OPT_mavx), IX86_ATTR_ISA ("avx2", OPT_mavx2), + IX86_ATTR_ISA ("avx512f", OPT_mavx512f), + IX86_ATTR_ISA ("avx512pf", OPT_mavx512pf), + IX86_ATTR_ISA ("avx512er", OPT_mavx512er), + IX86_ATTR_ISA ("avx512cd", OPT_mavx512cd), IX86_ATTR_ISA ("mmx", OPT_mmmx), IX86_ATTR_ISA ("pclmul", OPT_mpclmul), IX86_ATTR_ISA ("popcnt", OPT_mpopcnt), @@ -4728,10 +4847,7 @@ ix86_in_large_data_p (tree exp) RELOC indicates whether forming the initial value of DECL requires link-time relocations. */ -static section * x86_64_elf_select_section (tree, int, unsigned HOST_WIDE_INT) - ATTRIBUTE_UNUSED; - -static section * +ATTRIBUTE_UNUSED static section * x86_64_elf_select_section (tree decl, int reloc, unsigned HOST_WIDE_INT align) { @@ -4792,6 +4908,28 @@ x86_64_elf_select_section (tree decl, int reloc, return default_elf_select_section (decl, reloc, align); } +/* Select a set of attributes for section NAME based on the properties + of DECL and whether or not RELOC indicates that DECL's initializer + might contain runtime relocations. */ + +static unsigned int ATTRIBUTE_UNUSED +x86_64_elf_section_type_flags (tree decl, const char *name, int reloc) +{ + unsigned int flags = default_section_type_flags (decl, name, reloc); + + if (decl == NULL_TREE + && (strcmp (name, ".ldata.rel.ro") == 0 + || strcmp (name, ".ldata.rel.ro.local") == 0)) + flags |= SECTION_RELRO; + + if (strcmp (name, ".lbss") == 0 + || strncmp (name, ".lbss.", 5) == 0 + || strncmp (name, ".gnu.linkonce.lb.", 16) == 0) + flags |= SECTION_BSS; + + return flags; +} + /* Build up a unique section name, expressed as a STRING_CST node, and assign it to DECL_SECTION_NAME (decl). RELOC indicates whether the initial value of EXP requires @@ -5161,8 +5299,7 @@ ix86_handle_cconv_attribute (tree *node, tree name, static tree ix86_handle_tm_regparm_attribute (tree *node, tree name ATTRIBUTE_UNUSED, tree args ATTRIBUTE_UNUSED, - int flags ATTRIBUTE_UNUSED, - bool *no_add_attrs) + int flags, bool *no_add_attrs) { tree alt; @@ -5651,6 +5788,14 @@ ix86_function_type_abi (const_tree fntype) return ix86_abi; } +/* We add this as a workaround in order to use libc_has_function + hook in i386.md. */ +bool +ix86_libc_has_function (enum function_class fn_class) +{ + return targetm.libc_has_function (fn_class); +} + static bool ix86_function_ms_hook_prologue (const_tree fn) { @@ -7078,8 +7223,7 @@ ix86_function_arg (cumulative_args_t cum_v, enum machine_mode omode, appropriate for passing a pointer to that type. */ static bool -ix86_pass_by_reference (cumulative_args_t cum_v ATTRIBUTE_UNUSED, - enum machine_mode mode ATTRIBUTE_UNUSED, +ix86_pass_by_reference (cumulative_args_t cum_v, enum machine_mode mode, const_tree type, bool named ATTRIBUTE_UNUSED) { CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v); @@ -8562,6 +8706,10 @@ standard_sse_constant_opcode (rtx insn, rtx x) } case 2: + if (get_attr_mode (insn) == MODE_XI + || get_attr_mode (insn) == MODE_V8DF + || get_attr_mode (insn) == MODE_V16SF) + return "vpternlogd\t{$0xFF, %g0, %g0, %g0|%g0, %g0, %g0, 0xFF}"; if (TARGET_AVX) return "vpcmpeqd\t%0, %0, %0"; else @@ -8796,7 +8944,7 @@ ix86_code_end (void) /* Emit code for the SET_GOT patterns. */ const char * -output_set_got (rtx dest, rtx label ATTRIBUTE_UNUSED) +output_set_got (rtx dest, rtx label) { rtx xops[3]; @@ -12900,6 +13048,14 @@ ix86_tls_get_addr (void) ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, sym); } + if (ix86_cmodel == CM_LARGE_PIC && !TARGET_PECOFF) + { + rtx unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, ix86_tls_symbol), + UNSPEC_PLTOFF); + return gen_rtx_PLUS (Pmode, pic_offset_table_rtx, + gen_rtx_CONST (Pmode, unspec)); + } + return ix86_tls_symbol; } @@ -13807,21 +13963,29 @@ ix86_delegitimize_address (rtx x) x = replace_equiv_address_nv (orig_x, x); return x; } - if (GET_CODE (x) != CONST - || GET_CODE (XEXP (x, 0)) != UNSPEC - || (XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL - && XINT (XEXP (x, 0), 1) != UNSPEC_PCREL) - || (!MEM_P (orig_x) && XINT (XEXP (x, 0), 1) != UNSPEC_PCREL)) - return ix86_delegitimize_tls_address (orig_x); - x = XVECEXP (XEXP (x, 0), 0, 0); - if (GET_MODE (orig_x) != GET_MODE (x) && MEM_P (orig_x)) + + if (GET_CODE (x) == CONST + && GET_CODE (XEXP (x, 0)) == UNSPEC + && (XINT (XEXP (x, 0), 1) == UNSPEC_GOTPCREL + || XINT (XEXP (x, 0), 1) == UNSPEC_PCREL) + && (MEM_P (orig_x) || XINT (XEXP (x, 0), 1) == UNSPEC_PCREL)) { - x = simplify_gen_subreg (GET_MODE (orig_x), x, - GET_MODE (x), 0); - if (x == NULL_RTX) - return orig_x; + x = XVECEXP (XEXP (x, 0), 0, 0); + if (GET_MODE (orig_x) != GET_MODE (x) && MEM_P (orig_x)) + { + x = simplify_gen_subreg (GET_MODE (orig_x), x, + GET_MODE (x), 0); + if (x == NULL_RTX) + return orig_x; + } + return x; } - return x; + + if (ix86_cmodel != CM_MEDIUM_PIC && ix86_cmodel != CM_LARGE_PIC) + return ix86_delegitimize_tls_address (orig_x); + + /* Fall thru into the code shared with -m32 for -mcmodel=large -fpic + and -mcmodel=medium -fpic. */ } if (GET_CODE (x) != PLUS @@ -13858,10 +14022,12 @@ ix86_delegitimize_address (rtx x) if (GET_CODE (x) == UNSPEC && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x) && !addend) - || (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x)))) + || (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x)) + || (XINT (x, 1) == UNSPEC_PLTOFF && ix86_cmodel == CM_LARGE_PIC + && !MEM_P (orig_x) && !addend))) result = XVECEXP (x, 0, 0); - if (TARGET_MACHO && darwin_local_data_pic (x) + if (!TARGET_64BIT && TARGET_MACHO && darwin_local_data_pic (x) && !MEM_P (orig_x)) result = XVECEXP (x, 0, 0); @@ -14077,6 +14243,7 @@ put_condition_code (enum rtx_code code, enum machine_mode mode, bool reverse, If CODE is 'q', pretend the mode is DImode. If CODE is 'x', pretend the mode is V4SFmode. If CODE is 't', pretend the mode is V8SFmode. + If CODE is 'g', pretend the mode is V16SFmode. If CODE is 'h', pretend the reg is the 'high' byte register. If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op. If CODE is 'd', duplicate the operand for AVX instruction. @@ -14122,6 +14289,8 @@ print_reg (rtx x, int code, FILE *file) code = 16; else if (code == 't') code = 32; + else if (code == 'g') + code = 64; else code = GET_MODE_SIZE (GET_MODE (x)); @@ -14195,6 +14364,14 @@ print_reg (rtx x, int code, FILE *file) fputs (hi_reg_name[regno] + 1, file); return; } + case 64: + if (SSE_REG_P (x)) + { + gcc_assert (!duplicated); + putc ('z', file); + fputs (hi_reg_name[REGNO (x)] + 1, file); + return; + } break; default: gcc_unreachable (); @@ -14268,6 +14445,7 @@ get_some_local_dynamic_name (void) q -- likewise, print the DImode name of the register. x -- likewise, print the V4SFmode name of the register. t -- likewise, print the V8SFmode name of the register. + g -- likewise, print the V16SFmode name of the register. h -- print the QImode name for a "high" register, either ah, bh, ch or dh. y -- print "st(0)" instead of "st" as a register. d -- print duplicated register operand for AVX instruction. @@ -14497,6 +14675,7 @@ ix86_print_operand (FILE *file, rtx x, int code) case 'q': case 'h': case 't': + case 'g': case 'y': case 'x': case 'X': @@ -14805,6 +14984,7 @@ ix86_print_operand (FILE *file, rtx x, int code) size = "XMMWORD"; break; case 32: size = "YMMWORD"; break; + case 64: size = "ZMMWORD"; break; default: gcc_unreachable (); } @@ -17882,7 +18062,7 @@ ix86_dep_by_shift_count (const_rtx set_insn, const_rtx use_insn) bool ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED, enum machine_mode mode ATTRIBUTE_UNUSED, - rtx operands[2] ATTRIBUTE_UNUSED) + rtx operands[2]) { /* If one of operands is memory, source and destination must match. */ if ((MEM_P (operands[0]) @@ -22903,6 +23083,7 @@ ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp, { case libcall: case no_stringop: + case last_alg: gcc_unreachable (); case loop_1_byte: need_zero_guard = true; @@ -23093,6 +23274,7 @@ ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp, { case libcall: case no_stringop: + case last_alg: gcc_unreachable (); case loop_1_byte: case loop: @@ -23304,6 +23486,7 @@ ix86_expand_setmem (rtx dst, rtx count_exp, rtx val_exp, rtx align_exp, { case libcall: case no_stringop: + case last_alg: gcc_unreachable (); case loop: need_zero_guard = true; @@ -23481,6 +23664,7 @@ ix86_expand_setmem (rtx dst, rtx count_exp, rtx val_exp, rtx align_exp, { case libcall: case no_stringop: + case last_alg: gcc_unreachable (); case loop_1_byte: case loop: @@ -29546,8 +29730,8 @@ ix86_function_versions (tree fn1, tree fn2) error_at (DECL_SOURCE_LOCATION (fn2), "missing % attribute for multi-versioned %D", fn2); - error_at (DECL_SOURCE_LOCATION (fn1), - "previous declaration of %D", fn1); + inform (DECL_SOURCE_LOCATION (fn1), + "previous declaration of %D", fn1); /* Prevent diagnosing of the same error multiple times. */ DECL_ATTRIBUTES (fn2) = tree_cons (get_identifier ("target"), @@ -29830,7 +30014,7 @@ make_resolver_func (const tree default_decl, DECL_IGNORED_P (decl) = 0; /* IFUNC resolvers have to be externally visible. */ TREE_PUBLIC (decl) = 1; - DECL_UNINLINABLE (decl) = 0; + DECL_UNINLINABLE (decl) = 1; /* Resolver is not external, body is generated. */ DECL_EXTERNAL (decl) = 0; @@ -31972,9 +32156,8 @@ ix86_expand_vec_set_builtin (tree exp) IGNORE is nonzero if the value is to be ignored. */ static rtx -ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED, - enum machine_mode mode ATTRIBUTE_UNUSED, - int ignore ATTRIBUTE_UNUSED) +ix86_expand_builtin (tree exp, rtx target, rtx subtarget, + enum machine_mode mode, int ignore) { const struct builtin_description *d; size_t i; @@ -33798,7 +33981,7 @@ ix86_preferred_output_reload_class (rtx x, reg_class_t regclass) alternative: if reload cannot do this, it will still use its choice. */ mode = GET_MODE (x); if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode)) - return MAYBE_SSE_CLASS_P (regclass) ? SSE_REGS : NO_REGS; + return MAYBE_SSE_CLASS_P (regclass) ? ALL_SSE_REGS : NO_REGS; if (X87_FLOAT_MODE_P (mode)) { @@ -33822,7 +34005,7 @@ ix86_secondary_reload (bool in_p, rtx x, reg_class_t rclass, if (TARGET_64BIT && MEM_P (x) && GET_MODE_SIZE (mode) > UNITS_PER_WORD - && rclass == GENERAL_REGS + && INTEGER_CLASS_P (rclass) && !offsettable_memref_p (x)) { sri->icode = (in_p @@ -33838,12 +34021,8 @@ ix86_secondary_reload (bool in_p, rtx x, reg_class_t rclass, intermediate register on 32bit targets. */ if (!TARGET_64BIT && !in_p && mode == QImode - && (rclass == GENERAL_REGS - || rclass == LEGACY_REGS - || rclass == NON_Q_REGS - || rclass == SIREG - || rclass == DIREG - || rclass == INDEX_REGS)) + && INTEGER_CLASS_P (rclass) + && MAYBE_NON_Q_CLASS_P (rclass)) { int regno; @@ -34269,10 +34448,25 @@ ix86_hard_regno_mode_ok (int regno, enum machine_mode mode) { /* We implement the move patterns for all vector modes into and out of SSE registers, even when no operation instructions - are available. OImode move is available only when AVX is - enabled. */ + are available. */ + + /* For AVX-512 we allow, regardless of regno: + - XI mode + - any of 512-bit wide vector mode + - any scalar mode. */ + if (TARGET_AVX512F + && (mode == XImode + || VALID_AVX512F_REG_MODE (mode) + || VALID_AVX512F_SCALAR_MODE (mode))) + return true; + + /* xmm16-xmm31 are only available for AVX-512. */ + if (EXT_REX_SSE_REGNO_P (regno)) + return false; + + /* OImode move is available only when AVX is enabled. */ return ((TARGET_AVX && mode == OImode) - || VALID_AVX256_REG_MODE (mode) + || (TARGET_AVX && VALID_AVX256_REG_MODE (mode)) || VALID_SSE_REG_MODE (mode) || VALID_SSE2_REG_MODE (mode) || VALID_MMX_REG_MODE (mode) @@ -34422,7 +34616,8 @@ ix86_set_reg_reg_cost (enum machine_mode mode) case MODE_VECTOR_INT: case MODE_VECTOR_FLOAT: - if ((TARGET_AVX && VALID_AVX256_REG_MODE (mode)) + if ((TARGET_AVX512F && VALID_AVX512F_REG_MODE (mode)) + || (TARGET_AVX && VALID_AVX256_REG_MODE (mode)) || (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode)) || (TARGET_SSE && VALID_SSE_REG_MODE (mode)) || (TARGET_MMX && VALID_MMX_REG_MODE (mode))) @@ -35054,6 +35249,10 @@ x86_order_regs_for_local_alloc (void) for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++) reg_alloc_order [pos++] = i; + /* Extended REX SSE registers. */ + for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++) + reg_alloc_order [pos++] = i; + /* x87 registers. */ if (TARGET_SSE_MATH) for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++) @@ -36051,9 +36250,9 @@ x86_emit_floatuns (rtx operands[2]) emit_label (donelab); } -/* AVX2 does support 32-byte integer vector operations, - thus the longest vector we are faced with is V32QImode. */ -#define MAX_VECT_LEN 32 +/* AVX512F does support 64-byte integer vector operations, + thus the longest vector we are faced with is V64QImode. */ +#define MAX_VECT_LEN 64 struct expand_vec_perm_d { @@ -42590,7 +42789,7 @@ ix86_spill_class (reg_class_t rclass, enum machine_mode mode) if (TARGET_SSE && TARGET_GENERAL_REGS_SSE_SPILL && ! TARGET_MMX && (mode == SImode || (TARGET_64BIT && mode == DImode)) && INTEGER_CLASS_P (rclass)) - return SSE_REGS; + return ALL_SSE_REGS; return NO_REGS; } diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h index 7d940f98804..e820aa65ac5 100644 --- a/gcc/config/i386/i386.h +++ b/gcc/config/i386/i386.h @@ -51,6 +51,10 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see #define TARGET_SSE4_2 TARGET_ISA_SSE4_2 #define TARGET_AVX TARGET_ISA_AVX #define TARGET_AVX2 TARGET_ISA_AVX2 +#define TARGET_AVX512F TARGET_ISA_AVX512F +#define TARGET_AVX512PF TARGET_ISA_AVX512PF +#define TARGET_AVX512ER TARGET_ISA_AVX512ER +#define TARGET_AVX512CD TARGET_ISA_AVX512CD #define TARGET_FMA TARGET_ISA_FMA #define TARGET_SSE4A TARGET_ISA_SSE4A #define TARGET_FMA4 TARGET_ISA_FMA4 @@ -170,7 +174,7 @@ struct processor_costs { const int fsqrt; /* cost of FSQRT instruction. */ /* Specify what algorithm to use for stringops on unknown size. */ - struct stringop_algs memcpy[2], memset[2]; + struct stringop_algs *memcpy, *memset; const int scalar_stmt_cost; /* Cost of any scalar operation, excluding load and store. */ const int scalar_load_cost; /* Cost of scalar load. */ @@ -261,81 +265,11 @@ extern const struct processor_costs ix86_size_cost; /* Feature tests against the various tunings. */ enum ix86_tune_indices { - X86_TUNE_USE_LEAVE, - X86_TUNE_PUSH_MEMORY, - X86_TUNE_ZERO_EXTEND_WITH_AND, - X86_TUNE_UNROLL_STRLEN, - X86_TUNE_BRANCH_PREDICTION_HINTS, - X86_TUNE_DOUBLE_WITH_ADD, - X86_TUNE_USE_SAHF, - X86_TUNE_MOVX, - X86_TUNE_PARTIAL_REG_STALL, - X86_TUNE_PARTIAL_FLAG_REG_STALL, - X86_TUNE_LCP_STALL, - X86_TUNE_USE_HIMODE_FIOP, - X86_TUNE_USE_SIMODE_FIOP, - X86_TUNE_USE_MOV0, - X86_TUNE_USE_CLTD, - X86_TUNE_USE_XCHGB, - X86_TUNE_SPLIT_LONG_MOVES, - X86_TUNE_READ_MODIFY_WRITE, - X86_TUNE_READ_MODIFY, - X86_TUNE_PROMOTE_QIMODE, - X86_TUNE_FAST_PREFIX, - X86_TUNE_SINGLE_STRINGOP, - X86_TUNE_QIMODE_MATH, - X86_TUNE_HIMODE_MATH, - X86_TUNE_PROMOTE_QI_REGS, - X86_TUNE_PROMOTE_HI_REGS, - X86_TUNE_SINGLE_POP, - X86_TUNE_DOUBLE_POP, - X86_TUNE_SINGLE_PUSH, - X86_TUNE_DOUBLE_PUSH, - X86_TUNE_INTEGER_DFMODE_MOVES, - X86_TUNE_PARTIAL_REG_DEPENDENCY, - X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY, - X86_TUNE_SSE_UNALIGNED_LOAD_OPTIMAL, - X86_TUNE_SSE_UNALIGNED_STORE_OPTIMAL, - X86_TUNE_SSE_PACKED_SINGLE_INSN_OPTIMAL, - X86_TUNE_SSE_SPLIT_REGS, - X86_TUNE_SSE_TYPELESS_STORES, - X86_TUNE_SSE_LOAD0_BY_PXOR, - X86_TUNE_MEMORY_MISMATCH_STALL, - X86_TUNE_PROLOGUE_USING_MOVE, - X86_TUNE_EPILOGUE_USING_MOVE, - X86_TUNE_SHIFT1, - X86_TUNE_USE_FFREEP, - X86_TUNE_INTER_UNIT_MOVES_TO_VEC, - X86_TUNE_INTER_UNIT_MOVES_FROM_VEC, - X86_TUNE_INTER_UNIT_CONVERSIONS, - X86_TUNE_FOUR_JUMP_LIMIT, - X86_TUNE_SCHEDULE, - X86_TUNE_USE_BT, - X86_TUNE_USE_INCDEC, - X86_TUNE_PAD_RETURNS, - X86_TUNE_PAD_SHORT_FUNCTION, - X86_TUNE_EXT_80387_CONSTANTS, - X86_TUNE_AVOID_VECTOR_DECODE, - X86_TUNE_PROMOTE_HIMODE_IMUL, - X86_TUNE_SLOW_IMUL_IMM32_MEM, - X86_TUNE_SLOW_IMUL_IMM8, - X86_TUNE_MOVE_M1_VIA_OR, - X86_TUNE_NOT_UNPAIRABLE, - X86_TUNE_NOT_VECTORMODE, - X86_TUNE_USE_VECTOR_FP_CONVERTS, - X86_TUNE_USE_VECTOR_CONVERTS, - X86_TUNE_FUSE_CMP_AND_BRANCH, - X86_TUNE_OPT_AGU, - X86_TUNE_VECTORIZE_DOUBLE, - X86_TUNE_SOFTWARE_PREFETCHING_BENEFICIAL, - X86_TUNE_AVX128_OPTIMAL, - X86_TUNE_REASSOC_INT_TO_PARALLEL, - X86_TUNE_REASSOC_FP_TO_PARALLEL, - X86_TUNE_GENERAL_REGS_SSE_SPILL, - X86_TUNE_AVOID_MEM_OPND_FOR_CMOVE, - X86_TUNE_SPLIT_MEM_OPND_FOR_FP_CONVERTS, - - X86_TUNE_LAST +#undef DEF_TUNE +#define DEF_TUNE(tune, name, selector) tune, +#include "x86-tune.def" +#undef DEF_TUNE +X86_TUNE_LAST }; extern unsigned char ix86_tune_features[X86_TUNE_LAST]; @@ -802,7 +736,8 @@ enum target_cpu_default Pentium+ prefers DFmode values to be aligned to 64 bit boundary and Pentium Pro XFmode values at 128 bit boundaries. */ -#define BIGGEST_ALIGNMENT (TARGET_AVX ? 256 : 128) +#define BIGGEST_ALIGNMENT \ + (TARGET_AVX512F ? 512 : (TARGET_AVX ? 256 : 128)) /* Maximum stack alignment. */ #define MAX_STACK_ALIGNMENT MAX_OFILE_ALIGNMENT @@ -958,7 +893,7 @@ enum target_cpu_default eliminated during reloading in favor of either the stack or frame pointer. */ -#define FIRST_PSEUDO_REGISTER 53 +#define FIRST_PSEUDO_REGISTER 69 /* Number of hardware registers that go into the DWARF-2 unwind info. If not defined, equals FIRST_PSEUDO_REGISTER. */ @@ -984,6 +919,10 @@ enum target_cpu_default /* r8, r9, r10, r11, r12, r13, r14, r15*/ \ 0, 0, 0, 0, 0, 0, 0, 0, \ /*xmm8,xmm9,xmm10,xmm11,xmm12,xmm13,xmm14,xmm15*/ \ + 0, 0, 0, 0, 0, 0, 0, 0, \ +/*xmm16,xmm17,xmm18,xmm19,xmm20,xmm21,xmm22,xmm23*/ \ + 0, 0, 0, 0, 0, 0, 0, 0, \ +/*xmm24,xmm25,xmm26,xmm27,xmm28,xmm29,xmm30,xmm31*/ \ 0, 0, 0, 0, 0, 0, 0, 0 } /* 1 for registers not available across function calls. @@ -1012,7 +951,11 @@ enum target_cpu_default /* r8, r9, r10, r11, r12, r13, r14, r15*/ \ 1, 1, 1, 1, 2, 2, 2, 2, \ /*xmm8,xmm9,xmm10,xmm11,xmm12,xmm13,xmm14,xmm15*/ \ - 6, 6, 6, 6, 6, 6, 6, 6 } + 6, 6, 6, 6, 6, 6, 6, 6, \ +/*xmm16,xmm17,xmm18,xmm19,xmm20,xmm21,xmm22,xmm23*/ \ + 6, 6, 6, 6, 6, 6, 6, 6, \ +/*xmm24,xmm25,xmm26,xmm27,xmm28,xmm29,xmm30,xmm31*/ \ + 6, 6, 6, 6, 6, 6, 6, 6 } /* Order in which to allocate registers. Each register must be listed once, even those in FIXED_REGISTERS. List frame pointer @@ -1027,7 +970,8 @@ enum target_cpu_default { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17,\ 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, \ 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, \ - 48, 49, 50, 51, 52 } + 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, \ + 63, 64, 65, 66, 67, 68 } /* ADJUST_REG_ALLOC_ORDER is a macro which permits reg_alloc_order to be rearranged based on a particular function. When using sse math, @@ -1073,6 +1017,14 @@ enum target_cpu_default #define VALID_AVX256_REG_OR_OI_MODE(MODE) \ (VALID_AVX256_REG_MODE (MODE) || (MODE) == OImode) +#define VALID_AVX512F_SCALAR_MODE(MODE) \ + ((MODE) == DImode || (MODE) == DFmode || (MODE) == SImode \ + || (MODE) == SFmode) + +#define VALID_AVX512F_REG_MODE(MODE) \ + ((MODE) == V8DImode || (MODE) == V8DFmode || (MODE) == V64QImode \ + || (MODE) == V16SImode || (MODE) == V16SFmode || (MODE) == V32HImode) + #define VALID_SSE2_REG_MODE(MODE) \ ((MODE) == V16QImode || (MODE) == V8HImode || (MODE) == V2DFmode \ || (MODE) == V2DImode || (MODE) == DFmode) @@ -1112,7 +1064,9 @@ enum target_cpu_default || (MODE) == V2DImode || (MODE) == V4SFmode || (MODE) == V4SImode \ || (MODE) == V32QImode || (MODE) == V16HImode || (MODE) == V8SImode \ || (MODE) == V4DImode || (MODE) == V8SFmode || (MODE) == V4DFmode \ - || (MODE) == V2TImode) + || (MODE) == V2TImode || (MODE) == V8DImode || (MODE) == V64QImode \ + || (MODE) == V16SImode || (MODE) == V32HImode || (MODE) == V8DFmode \ + || (MODE) == V16SFmode) /* Value is 1 if hard register REGNO can hold a value of machine-mode MODE. */ @@ -1175,15 +1129,18 @@ enum target_cpu_default #define FIRST_SSE_REG (FRAME_POINTER_REGNUM + 1) #define LAST_SSE_REG (FIRST_SSE_REG + 7) -#define FIRST_MMX_REG (LAST_SSE_REG + 1) +#define FIRST_MMX_REG (LAST_SSE_REG + 1) /*29*/ #define LAST_MMX_REG (FIRST_MMX_REG + 7) -#define FIRST_REX_INT_REG (LAST_MMX_REG + 1) +#define FIRST_REX_INT_REG (LAST_MMX_REG + 1) /*37*/ #define LAST_REX_INT_REG (FIRST_REX_INT_REG + 7) -#define FIRST_REX_SSE_REG (LAST_REX_INT_REG + 1) +#define FIRST_REX_SSE_REG (LAST_REX_INT_REG + 1) /*45*/ #define LAST_REX_SSE_REG (FIRST_REX_SSE_REG + 7) +#define FIRST_EXT_REX_SSE_REG (LAST_REX_SSE_REG + 1) /*53*/ +#define LAST_EXT_REX_SSE_REG (FIRST_EXT_REX_SSE_REG + 15) /*68*/ + /* Override this in other tm.h files to cope with various OS lossage requiring a frame pointer. */ #ifndef SUBTARGET_FRAME_POINTER_REQUIRED @@ -1263,6 +1220,8 @@ enum reg_class FLOAT_REGS, SSE_FIRST_REG, SSE_REGS, + EVEX_SSE_REGS, + ALL_SSE_REGS, MMX_REGS, FP_TOP_SSE_REGS, FP_SECOND_SSE_REGS, @@ -1280,7 +1239,7 @@ enum reg_class #define FLOAT_CLASS_P(CLASS) \ reg_class_subset_p ((CLASS), FLOAT_REGS) #define SSE_CLASS_P(CLASS) \ - reg_class_subset_p ((CLASS), SSE_REGS) + reg_class_subset_p ((CLASS), ALL_SSE_REGS) #define MMX_CLASS_P(CLASS) \ ((CLASS) == MMX_REGS) #define MAYBE_INTEGER_CLASS_P(CLASS) \ @@ -1288,13 +1247,16 @@ enum reg_class #define MAYBE_FLOAT_CLASS_P(CLASS) \ reg_classes_intersect_p ((CLASS), FLOAT_REGS) #define MAYBE_SSE_CLASS_P(CLASS) \ - reg_classes_intersect_p (SSE_REGS, (CLASS)) + reg_classes_intersect_p ((CLASS), ALL_SSE_REGS) #define MAYBE_MMX_CLASS_P(CLASS) \ - reg_classes_intersect_p (MMX_REGS, (CLASS)) + reg_classes_intersect_p ((CLASS), MMX_REGS) #define Q_CLASS_P(CLASS) \ reg_class_subset_p ((CLASS), Q_REGS) +#define MAYBE_NON_Q_CLASS_P(CLASS) \ + reg_classes_intersect_p ((CLASS), NON_Q_REGS) + /* Give names of register classes as strings for dump file. */ #define REG_CLASS_NAMES \ @@ -1311,6 +1273,8 @@ enum reg_class "FLOAT_REGS", \ "SSE_FIRST_REG", \ "SSE_REGS", \ + "EVEX_SSE_REGS", \ + "ALL_SSE_REGS", \ "MMX_REGS", \ "FP_TOP_SSE_REGS", \ "FP_SECOND_SSE_REGS", \ @@ -1326,30 +1290,36 @@ enum reg_class Note that CLOBBERED_REGS are calculated by TARGET_CONDITIONAL_REGISTER_USAGE. */ -#define REG_CLASS_CONTENTS \ -{ { 0x00, 0x0 }, \ - { 0x01, 0x0 }, { 0x02, 0x0 }, /* AREG, DREG */ \ - { 0x04, 0x0 }, { 0x08, 0x0 }, /* CREG, BREG */ \ - { 0x10, 0x0 }, { 0x20, 0x0 }, /* SIREG, DIREG */ \ - { 0x03, 0x0 }, /* AD_REGS */ \ - { 0x0f, 0x0 }, /* Q_REGS */ \ - { 0x1100f0, 0x1fe0 }, /* NON_Q_REGS */ \ - { 0x7f, 0x1fe0 }, /* INDEX_REGS */ \ - { 0x1100ff, 0x0 }, /* LEGACY_REGS */ \ - { 0x00, 0x0 }, /* CLOBBERED_REGS */ \ - { 0x1100ff, 0x1fe0 }, /* GENERAL_REGS */ \ - { 0x100, 0x0 }, { 0x0200, 0x0 },/* FP_TOP_REG, FP_SECOND_REG */\ - { 0xff00, 0x0 }, /* FLOAT_REGS */ \ - { 0x200000, 0x0 }, /* SSE_FIRST_REG */ \ -{ 0x1fe00000,0x1fe000 }, /* SSE_REGS */ \ -{ 0xe0000000, 0x1f }, /* MMX_REGS */ \ -{ 0x1fe00100,0x1fe000 }, /* FP_TOP_SSE_REG */ \ -{ 0x1fe00200,0x1fe000 }, /* FP_SECOND_SSE_REG */ \ -{ 0x1fe0ff00,0x1fe000 }, /* FLOAT_SSE_REGS */ \ - { 0x11ffff, 0x1fe0 }, /* FLOAT_INT_REGS */ \ -{ 0x1ff100ff,0x1fffe0 }, /* INT_SSE_REGS */ \ -{ 0x1ff1ffff,0x1fffe0 }, /* FLOAT_INT_SSE_REGS */ \ -{ 0xffffffff,0x1fffff } \ +#define REG_CLASS_CONTENTS \ +{ { 0x00, 0x0, 0x0 }, \ + { 0x01, 0x0, 0x0 }, /* AREG */ \ + { 0x02, 0x0, 0x0 }, /* DREG */ \ + { 0x04, 0x0, 0x0 }, /* CREG */ \ + { 0x08, 0x0, 0x0 }, /* BREG */ \ + { 0x10, 0x0, 0x0 }, /* SIREG */ \ + { 0x20, 0x0, 0x0 }, /* DIREG */ \ + { 0x03, 0x0, 0x0 }, /* AD_REGS */ \ + { 0x0f, 0x0, 0x0 }, /* Q_REGS */ \ + { 0x1100f0, 0x1fe0, 0x0 }, /* NON_Q_REGS */ \ + { 0x7f, 0x1fe0, 0x0 }, /* INDEX_REGS */ \ + { 0x1100ff, 0x0, 0x0 }, /* LEGACY_REGS */ \ + { 0x07, 0x0, 0x0 }, /* CLOBBERED_REGS */ \ + { 0x1100ff, 0x1fe0, 0x0 }, /* GENERAL_REGS */ \ + { 0x100, 0x0, 0x0 }, /* FP_TOP_REG */ \ + { 0x0200, 0x0, 0x0 }, /* FP_SECOND_REG */ \ + { 0xff00, 0x0, 0x0 }, /* FLOAT_REGS */ \ + { 0x200000, 0x0, 0x0 }, /* SSE_FIRST_REG */ \ +{ 0x1fe00000, 0x1fe000, 0x0 }, /* SSE_REGS */ \ + { 0x0,0xffe00000, 0x1f }, /* EVEX_SSE_REGS */ \ +{ 0x1fe00000,0xffffe000, 0x1f }, /* ALL_SSE_REGS */ \ +{ 0xe0000000, 0x1f, 0x0 }, /* MMX_REGS */ \ +{ 0x1fe00100,0xffffe000, 0x1f }, /* FP_TOP_SSE_REG */ \ +{ 0x1fe00200,0xffffe000, 0x1f }, /* FP_SECOND_SSE_REG */ \ +{ 0x1fe0ff00,0xffffe000, 0x1f }, /* FLOAT_SSE_REGS */ \ +{ 0x11ffff, 0x1fe0, 0x0 }, /* FLOAT_INT_REGS */ \ +{ 0x1ff100ff,0xffffffe0, 0x1f }, /* INT_SSE_REGS */ \ +{ 0x1ff1ffff,0xffffffe0, 0x1f }, /* FLOAT_INT_SSE_REGS */ \ +{ 0xffffffff,0xffffffff, 0x1f } \ } /* The same information, inverted: @@ -1393,13 +1363,20 @@ enum reg_class #define SSE_REG_P(X) (REG_P (X) && SSE_REGNO_P (REGNO (X))) #define SSE_REGNO_P(N) \ (IN_RANGE ((N), FIRST_SSE_REG, LAST_SSE_REG) \ - || REX_SSE_REGNO_P (N)) + || REX_SSE_REGNO_P (N) \ + || EXT_REX_SSE_REGNO_P (N)) #define REX_SSE_REGNO_P(N) \ IN_RANGE ((N), FIRST_REX_SSE_REG, LAST_REX_SSE_REG) +#define EXT_REX_SSE_REGNO_P(N) \ + IN_RANGE ((N), FIRST_EXT_REX_SSE_REG, LAST_EXT_REX_SSE_REG) + #define SSE_REGNO(N) \ - ((N) < 8 ? FIRST_SSE_REG + (N) : FIRST_REX_SSE_REG + (N) - 8) + ((N) < 8 ? FIRST_SSE_REG + (N) \ + : (N) <= LAST_REX_SSE_REG ? (FIRST_REX_SSE_REG + (N) - 8) \ + : (FIRST_EXT_REX_SSE_REG + (N) - 16)) + #define SSE_FLOAT_MODE_P(MODE) \ ((TARGET_SSE && (MODE) == SFmode) || (TARGET_SSE2 && (MODE) == DFmode)) @@ -1952,7 +1929,11 @@ do { \ "xmm0","xmm1","xmm2","xmm3","xmm4","xmm5","xmm6","xmm7", \ "mm0", "mm1", "mm2", "mm3", "mm4", "mm5", "mm6", "mm7", \ "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", \ - "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15"} + "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", \ + "xmm16", "xmm17", "xmm18", "xmm19", \ + "xmm20", "xmm21", "xmm22", "xmm23", \ + "xmm24", "xmm25", "xmm26", "xmm27", \ + "xmm28", "xmm29", "xmm30", "xmm31" } #define REGISTER_NAMES HI_REGISTER_NAMES @@ -2273,9 +2254,13 @@ enum avx_u128_state scheduling just increases amount of live registers at time and in the turn amount of fxch instructions needed. - ??? Maybe Pentium chips benefits from renaming, someone can try.... */ + ??? Maybe Pentium chips benefits from renaming, someone can try.... + + Don't rename evex to non-evex sse registers. */ -#define HARD_REGNO_RENAME_OK(SRC, TARGET) !STACK_REGNO_P (SRC) +#define HARD_REGNO_RENAME_OK(SRC, TARGET) (!STACK_REGNO_P (SRC) && \ + (EXT_REX_SSE_REGNO_P (SRC) == \ + EXT_REX_SSE_REGNO_P (TARGET))) #define FASTCALL_PREFIX '@' diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index c67ed31923e..3307b081aaa 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -312,6 +312,22 @@ (XMM13_REG 50) (XMM14_REG 51) (XMM15_REG 52) + (XMM16_REG 53) + (XMM17_REG 54) + (XMM18_REG 55) + (XMM19_REG 56) + (XMM20_REG 57) + (XMM21_REG 58) + (XMM22_REG 59) + (XMM23_REG 60) + (XMM24_REG 61) + (XMM25_REG 62) + (XMM26_REG 63) + (XMM27_REG 64) + (XMM28_REG 65) + (XMM29_REG 66) + (XMM30_REG 67) + (XMM31_REG 68) ]) ;; Insns whose names begin with "x86_" are emitted by gen_FOO calls @@ -350,7 +366,8 @@ ;; Main data type used by the insn (define_attr "mode" - "unknown,none,QI,HI,SI,DI,TI,OI,SF,DF,XF,TF,V8SF,V4DF,V4SF,V2DF,V2SF,V1DF" + "unknown,none,QI,HI,SI,DI,TI,OI,XI,SF,DF,XF,TF,V16SF,V8SF,V4DF,V4SF, + V2DF,V2SF,V1DF,V8DF" (const_string "unknown")) ;; The CPU unit operations uses. @@ -471,10 +488,13 @@ (const_int 0))) ;; Prefix used: original, VEX or maybe VEX. -(define_attr "prefix" "orig,vex,maybe_vex" - (if_then_else (eq_attr "mode" "OI,V8SF,V4DF") - (const_string "vex") - (const_string "orig"))) +(define_attr "prefix" "orig,vex,maybe_vex,evex,maybe_evex" + (cond [(eq_attr "mode" "OI,V8SF,V4DF") + (const_string "vex") + (eq_attr "mode" "XI,V16SF,V8DF") + (const_string "evex") + ] + (const_string "orig"))) ;; VEX W bit is used. (define_attr "prefix_vex_w" "" (const_int 0)) @@ -493,6 +513,9 @@ (symbol_ref "ix86_attr_length_vex_default (insn, false, true)") (symbol_ref "ix86_attr_length_vex_default (insn, false, false)")))) +;; 4-bytes evex prefix and 1 byte opcode. +(define_attr "length_evex" "" (const_int 5)) + ;; Set when modrm byte is used. (define_attr "modrm" "" (cond [(eq_attr "type" "str,leave") @@ -544,8 +567,17 @@ (plus (const_int 2) (plus (attr "prefix_data16") (attr "length_address"))) + (ior (eq_attr "prefix" "evex") + (and (ior (eq_attr "prefix" "maybe_evex") + (eq_attr "prefix" "maybe_vex")) + (match_test "TARGET_AVX512F"))) + (plus (attr "length_evex") + (plus (attr "length_immediate") + (plus (attr "modrm") + (attr "length_address")))) (ior (eq_attr "prefix" "vex") - (and (eq_attr "prefix" "maybe_vex") + (and (ior (eq_attr "prefix" "maybe_vex") + (eq_attr "prefix" "maybe_evex")) (match_test "TARGET_AVX"))) (plus (attr "length_vex") (plus (attr "length_immediate") @@ -663,7 +695,7 @@ ;; Used to control the "enabled" attribute on a per-instruction basis. (define_attr "isa" "base,x64,x64_sse4,x64_sse4_noavx,x64_avx,nox64, sse2,sse2_noavx,sse3,sse4,sse4_noavx,avx,noavx, - avx2,noavx2,bmi2,fma4,fma" + avx2,noavx2,bmi2,fma4,fma,avx512f,noavx512f,fma_avx512f" (const_string "base")) (define_attr "enabled" "" @@ -689,6 +721,10 @@ (eq_attr "isa" "bmi2") (symbol_ref "TARGET_BMI2") (eq_attr "isa" "fma4") (symbol_ref "TARGET_FMA4") (eq_attr "isa" "fma") (symbol_ref "TARGET_FMA") + (eq_attr "isa" "avx512f") (symbol_ref "TARGET_AVX512F") + (eq_attr "isa" "noavx512f") (symbol_ref "!TARGET_AVX512F") + (eq_attr "isa" "fma_avx512f") + (symbol_ref "TARGET_FMA || TARGET_AVX512F") ] (const_int 1))) @@ -924,10 +960,12 @@ ;; SSE instruction suffix for various modes (define_mode_attr ssemodesuffix [(SF "ss") (DF "sd") + (V16SF "ps") (V8DF "pd") (V8SF "ps") (V4DF "pd") (V4SF "ps") (V2DF "pd") (V16QI "b") (V8HI "w") (V4SI "d") (V2DI "q") - (V32QI "b") (V16HI "w") (V8SI "d") (V4DI "q")]) + (V32QI "b") (V16HI "w") (V8SI "d") (V4DI "q") + (V64QI "b") (V16SI "d") (V8DI "q")]) ;; SSE vector suffix for floating point modes (define_mode_attr ssevecmodesuffix [(SF "ps") (DF "pd")]) @@ -1649,6 +1687,12 @@ ;; Move instructions. +(define_expand "movxi" + [(set (match_operand:XI 0 "nonimmediate_operand") + (match_operand:XI 1 "general_operand"))] + "TARGET_AVX512F" + "ix86_expand_move (XImode, operands); DONE;") + ;; Reload patterns to support multi-word load/store ;; with non-offsetable address. (define_expand "reload_noff_store" @@ -1746,6 +1790,30 @@ (set_attr "mode" "") (set_attr "length_immediate" "1")]) +(define_insn "*movxi_internal_avx512f" + [(set (match_operand:XI 0 "nonimmediate_operand" "=x,x ,m") + (match_operand:XI 1 "vector_move_operand" "C ,xm,x"))] + "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))" +{ + switch (which_alternative) + { + case 0: + return standard_sse_constant_opcode (insn, operands[1]); + case 1: + case 2: + if (misaligned_operand (operands[0], XImode) + || misaligned_operand (operands[1], XImode)) + return "vmovdqu32\t{%1, %0|%0, %1}"; + else + return "vmovdqa32\t{%1, %0|%0, %1}"; + default: + gcc_unreachable (); + } +} + [(set_attr "type" "sselog1,ssemov,ssemov") + (set_attr "prefix" "evex") + (set_attr "mode" "XI")]) + (define_insn "*movoi_internal_avx" [(set (match_operand:OI 0 "nonimmediate_operand" "=x,x ,m") (match_operand:OI 1 "vector_move_operand" "C ,xm,x"))] @@ -1857,9 +1925,9 @@ (define_insn "*movdi_internal" [(set (match_operand:DI 0 "nonimmediate_operand" - "=r ,o ,r,r ,r,m ,*y,*y,?*y,?m,?r ,?*Ym,*x,*x,*x,m ,?r ,?r,?*Yi,?*Ym,?*Yi") + "=r ,o ,r,r ,r,m ,*y,*y,?*y,?m,?r ,?*Ym,*v,*v,*v,m ,?r ,?r,?*Yi,?*Ym,?*Yi") (match_operand:DI 1 "general_operand" - "riFo,riF,Z,rem,i,re,C ,*y,m ,*y,*Yn,r ,C ,*x,m ,*x,*Yj,*x,r ,*Yj ,*Yn"))] + "riFo,riF,Z,rem,i,re,C ,*y,m ,*y,*Yn,r ,C ,*v,m ,*v,*Yj,*v,r ,*Yj ,*Yn"))] "!(MEM_P (operands[0]) && MEM_P (operands[1]))" { switch (get_attr_type (insn)) @@ -1896,6 +1964,8 @@ return "%vmovq\t{%1, %0|%0, %1}"; case MODE_TI: return "%vmovdqa\t{%1, %0|%0, %1}"; + case MODE_XI: + return "vmovdqa64\t{%g1, %g0|%g0, %g1}"; case MODE_V2SF: gcc_assert (!TARGET_AVX); @@ -1989,7 +2059,10 @@ (cond [(eq_attr "alternative" "2") (const_string "SI") (eq_attr "alternative" "12,13") - (cond [(ior (not (match_test "TARGET_SSE2")) + (cond [(ior (match_operand 0 "ext_sse_reg_operand") + (match_operand 1 "ext_sse_reg_operand")) + (const_string "XI") + (ior (not (match_test "TARGET_SSE2")) (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")) (const_string "V4SF") (match_test "TARGET_AVX") @@ -2018,9 +2091,9 @@ (define_insn "*movsi_internal" [(set (match_operand:SI 0 "nonimmediate_operand" - "=r,m ,*y,*y,?rm,?*y,*x,*x,*x,m ,?r ,?r,?*Yi") + "=r,m ,*y,*y,?rm,?*y,*v,*v,*v,m ,?r ,?r,?*Yi") (match_operand:SI 1 "general_operand" - "g ,re,C ,*y,*y ,rm ,C ,*x,m ,*x,*Yj,*x,r"))] + "g ,re,C ,*y,*y ,rm ,C ,*v,m ,*v,*Yj,*v,r"))] "!(MEM_P (operands[0]) && MEM_P (operands[1]))" { switch (get_attr_type (insn)) @@ -2038,6 +2111,8 @@ return "%vmovd\t{%1, %0|%0, %1}"; case MODE_TI: return "%vmovdqa\t{%1, %0|%0, %1}"; + case MODE_XI: + return "vmovdqa32\t{%g1, %g0|%g0, %g1}"; case MODE_V4SF: return "%vmovaps\t{%1, %0|%0, %1}"; @@ -2116,7 +2191,10 @@ (cond [(eq_attr "alternative" "2,3") (const_string "DI") (eq_attr "alternative" "6,7") - (cond [(ior (not (match_test "TARGET_SSE2")) + (cond [(ior (match_operand 0 "ext_sse_reg_operand") + (match_operand 1 "ext_sse_reg_operand")) + (const_string "XI") + (ior (not (match_test "TARGET_SSE2")) (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")) (const_string "V4SF") (match_test "TARGET_AVX") @@ -2255,7 +2333,7 @@ "TARGET_LP64 && ix86_check_movabs (insn, 0)" "@ movabs{}\t{%1, %P0|[%P0], %1} - mov{}\t{%1, %a0|%a0, %1}" + mov{}\t{%1, %a0| PTR %a0, %1}" [(set_attr "type" "imov") (set_attr "modrm" "0,*") (set_attr "length_address" "8,0") @@ -2269,7 +2347,7 @@ "TARGET_LP64 && ix86_check_movabs (insn, 1)" "@ movabs{}\t{%P1, %0|%0, [%P1]} - mov{}\t{%a1, %0|%0, %a1}" + mov{}\t{%a1, %0|%0, PTR %a1}" [(set_attr "type" "imov") (set_attr "modrm" "0,*") (set_attr "length_address" "8,0") @@ -2703,9 +2781,9 @@ ;; Possible store forwarding (partial memory) stall in alternative 4. (define_insn "*movdf_internal" [(set (match_operand:DF 0 "nonimmediate_operand" - "=Yf*f,m ,Yf*f,?Yd*r ,!o ,?r,?m,?r,?r,x,x,x,m,*x,*x,*x,m ,r ,Yi") + "=Yf*f,m ,Yf*f,?Yd*r ,!o ,?r,?m,?r,?r,v,v,v,m,*x,*x,*x,m ,r ,Yi") (match_operand:DF 1 "general_operand" - "Yf*fm,Yf*f,G ,Yd*roF,Yd*rF,rm,rC,C ,F ,C,x,m,x,C ,*x,m ,*x,Yj,r"))] + "Yf*fm,Yf*f,G ,Yd*roF,Yd*rF,rm,rC,C ,F ,C,v,m,v,C ,*x,m ,*x,Yj,r"))] "!(MEM_P (operands[0]) && MEM_P (operands[1])) && (!can_create_pseudo_p () || (ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_LARGE) @@ -2750,6 +2828,8 @@ case MODE_V4SF: return "%vmovaps\t{%1, %0|%0, %1}"; + case MODE_V8DF: + return "vmovapd\t{%g1, %g0|%g0, %g1}"; case MODE_V2DF: return "%vmovapd\t{%1, %0|%0, %1}"; @@ -2824,6 +2904,8 @@ (eq_attr "alternative" "9,13") (cond [(not (match_test "TARGET_SSE2")) (const_string "V4SF") + (match_test "TARGET_AVX512F") + (const_string "XI") (match_test "TARGET_AVX") (const_string "V2DF") (match_test "optimize_function_for_size_p (cfun)") @@ -2839,7 +2921,10 @@ /* movaps is one byte shorter for non-AVX targets. */ (eq_attr "alternative" "10,14") - (cond [(ior (not (match_test "TARGET_SSE2")) + (cond [(ior (match_operand 0 "ext_sse_reg_operand") + (match_operand 1 "ext_sse_reg_operand")) + (const_string "V8DF") + (ior (not (match_test "TARGET_SSE2")) (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")) (const_string "V4SF") (match_test "TARGET_SSE_PARTIAL_REG_DEPENDENCY") @@ -2872,9 +2957,9 @@ (define_insn "*movsf_internal" [(set (match_operand:SF 0 "nonimmediate_operand" - "=Yf*f,m ,Yf*f,?r ,?m,x,x,x,m,?r,?Yi,!*y,!*y,!m,!r ,!*Ym") + "=Yf*f,m ,Yf*f,?r ,?m,v,v,v,m,?r,?Yi,!*y,!*y,!m,!r ,!*Ym") (match_operand:SF 1 "general_operand" - "Yf*fm,Yf*f,G ,rmF,rF,C,x,m,x,Yj,r ,*y ,m ,*y,*Yn,r"))] + "Yf*fm,Yf*f,G ,rmF,rF,C,v,m,v,Yj,r ,*y ,m ,*y,*Yn,r"))] "!(MEM_P (operands[0]) && MEM_P (operands[1])) && (!can_create_pseudo_p () || (ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_LARGE) @@ -2907,6 +2992,8 @@ return "vmovss\t{%1, %0, %0|%0, %0, %1}"; return "%vmovss\t{%1, %0|%0, %1}"; + case MODE_V16SF: + return "vmovaps\t{%g1, %g0|%g0, %g1}"; case MODE_V4SF: return "%vmovaps\t{%1, %0|%0, %1}"; @@ -2960,6 +3047,8 @@ (eq_attr "alternative" "5") (cond [(not (match_test "TARGET_SSE2")) (const_string "V4SF") + (match_test "TARGET_AVX512F") + (const_string "V16SF") (match_test "TARGET_AVX") (const_string "V4SF") (match_test "optimize_function_for_size_p (cfun)") @@ -2979,10 +3068,15 @@ of instructions to load just part of the register. It is better to maintain the whole registers in single format to avoid problems on using packed logical operations. */ - (and (eq_attr "alternative" "6") - (ior (match_test "TARGET_SSE_PARTIAL_REG_DEPENDENCY") - (match_test "TARGET_SSE_SPLIT_REGS"))) - (const_string "V4SF") + (eq_attr "alternative" "6") + (cond [(ior (match_operand 0 "ext_sse_reg_operand") + (match_operand 1 "ext_sse_reg_operand")) + (const_string "V16SF") + (ior (match_test "TARGET_SSE_PARTIAL_REG_DEPENDENCY") + (match_test "TARGET_SSE_SPLIT_REGS")) + (const_string "V4SF") + ] + (const_string "SF")) ] (const_string "SF")))]) @@ -4596,10 +4690,7 @@ (clobber (match_operand:SWI48 2 "memory_operand"))] "SSE_FLOAT_MODE_P (mode) && TARGET_MIX_SSE_I387 && TARGET_INTER_UNIT_CONVERSIONS - && reload_completed - && (SSE_REG_P (operands[0]) - || (GET_CODE (operands[0]) == SUBREG - && SSE_REG_P (SUBREG_REG (operands[0]))))" + && reload_completed && SSE_REG_P (operands[0])" [(set (match_dup 0) (float:MODEF (match_dup 1)))]) (define_split @@ -4608,10 +4699,7 @@ (clobber (match_operand:SWI48 2 "memory_operand"))] "SSE_FLOAT_MODE_P (mode) && TARGET_MIX_SSE_I387 && !(TARGET_INTER_UNIT_CONVERSIONS || optimize_function_for_size_p (cfun)) - && reload_completed - && (SSE_REG_P (operands[0]) - || (GET_CODE (operands[0]) == SUBREG - && SSE_REG_P (SUBREG_REG (operands[0]))))" + && reload_completed && SSE_REG_P (operands[0])" [(set (match_dup 2) (match_dup 1)) (set (match_dup 0) (float:MODEF (match_dup 2)))]) @@ -4697,10 +4785,7 @@ (clobber (match_operand:SI 2 "memory_operand"))] "TARGET_SSE2 && TARGET_SSE_MATH && TARGET_USE_VECTOR_CONVERTS && optimize_function_for_speed_p (cfun) - && reload_completed - && (SSE_REG_P (operands[0]) - || (GET_CODE (operands[0]) == SUBREG - && SSE_REG_P (SUBREG_REG (operands[0]))))" + && reload_completed && SSE_REG_P (operands[0])" [(const_int 0)] { rtx op1 = operands[1]; @@ -4740,10 +4825,7 @@ (clobber (match_operand:SI 2 "memory_operand"))] "TARGET_SSE2 && TARGET_SSE_MATH && TARGET_USE_VECTOR_CONVERTS && optimize_function_for_speed_p (cfun) - && reload_completed - && (SSE_REG_P (operands[0]) - || (GET_CODE (operands[0]) == SUBREG - && SSE_REG_P (SUBREG_REG (operands[0]))))" + && reload_completed && SSE_REG_P (operands[0])" [(const_int 0)] { operands[3] = simplify_gen_subreg (mode, operands[0], @@ -4764,10 +4846,7 @@ (float:MODEF (match_operand:SI 1 "register_operand")))] "TARGET_SSE2 && TARGET_SSE_MATH && TARGET_USE_VECTOR_CONVERTS && optimize_function_for_speed_p (cfun) - && reload_completed - && (SSE_REG_P (operands[0]) - || (GET_CODE (operands[0]) == SUBREG - && SSE_REG_P (SUBREG_REG (operands[0]))))" + && reload_completed && SSE_REG_P (operands[0])" [(const_int 0)] { rtx op1 = operands[1]; @@ -4810,10 +4889,7 @@ (float:MODEF (match_operand:SI 1 "memory_operand")))] "TARGET_SSE2 && TARGET_SSE_MATH && TARGET_USE_VECTOR_CONVERTS && optimize_function_for_speed_p (cfun) - && reload_completed - && (SSE_REG_P (operands[0]) - || (GET_CODE (operands[0]) == SUBREG - && SSE_REG_P (SUBREG_REG (operands[0]))))" + && reload_completed && SSE_REG_P (operands[0])" [(const_int 0)] { operands[3] = simplify_gen_subreg (mode, operands[0], @@ -4872,10 +4948,7 @@ (clobber (match_operand:SWI48 2 "memory_operand"))] "SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH && (TARGET_INTER_UNIT_CONVERSIONS || optimize_function_for_size_p (cfun)) - && reload_completed - && (SSE_REG_P (operands[0]) - || (GET_CODE (operands[0]) == SUBREG - && SSE_REG_P (SUBREG_REG (operands[0]))))" + && reload_completed && SSE_REG_P (operands[0])" [(set (match_dup 0) (float:MODEF (match_dup 1)))]) (define_insn "*float2_sse_nointerunit" @@ -4905,10 +4978,7 @@ (clobber (match_operand:SWI48 2 "memory_operand"))] "SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH && !(TARGET_INTER_UNIT_CONVERSIONS || optimize_function_for_size_p (cfun)) - && reload_completed - && (SSE_REG_P (operands[0]) - || (GET_CODE (operands[0]) == SUBREG - && SSE_REG_P (SUBREG_REG (operands[0]))))" + && reload_completed && SSE_REG_P (operands[0])" [(set (match_dup 2) (match_dup 1)) (set (match_dup 0) (float:MODEF (match_dup 2)))]) @@ -4917,10 +4987,7 @@ (float:MODEF (match_operand:SWI48 1 "memory_operand"))) (clobber (match_operand:SWI48 2 "memory_operand"))] "SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH - && reload_completed - && (SSE_REG_P (operands[0]) - || (GET_CODE (operands[0]) == SUBREG - && SSE_REG_P (SUBREG_REG (operands[0]))))" + && reload_completed && SSE_REG_P (operands[0])" [(set (match_dup 0) (float:MODEF (match_dup 1)))]) (define_insn "*float2_i387_with_temp" @@ -4968,6 +5035,46 @@ && reload_completed" [(set (match_dup 0) (float:X87MODEF (match_dup 1)))]) +;; Avoid partial SSE register dependency stalls + +(define_split + [(set (match_operand:MODEF 0 "register_operand") + (float:MODEF (match_operand:SI 1 "nonimmediate_operand")))] + "TARGET_SSE2 && TARGET_SSE_MATH + && TARGET_SSE_PARTIAL_REG_DEPENDENCY + && optimize_function_for_speed_p (cfun) + && reload_completed && SSE_REG_P (operands[0])" + [(set (match_dup 0) + (vec_merge: + (vec_duplicate: + (float:MODEF (match_dup 1))) + (match_dup 0) + (const_int 1)))] +{ + operands[0] = simplify_gen_subreg (mode, operands[0], + mode, 0); + emit_move_insn (operands[0], CONST0_RTX (mode)); +}) + +(define_split + [(set (match_operand:MODEF 0 "register_operand") + (float:MODEF (match_operand:DI 1 "nonimmediate_operand")))] + "TARGET_64BIT && TARGET_SSE2 && TARGET_SSE_MATH + && TARGET_SSE_PARTIAL_REG_DEPENDENCY + && optimize_function_for_speed_p (cfun) + && reload_completed && SSE_REG_P (operands[0])" + [(set (match_dup 0) + (vec_merge: + (vec_duplicate: + (float:MODEF (match_dup 1))) + (match_dup 0) + (const_int 1)))] +{ + operands[0] = simplify_gen_subreg (mode, operands[0], + mode, 0); + emit_move_insn (operands[0], CONST0_RTX (mode)); +}) + ;; Avoid store forwarding (partial memory) stall penalty ;; by passing DImode value through XMM registers. */ @@ -5024,6 +5131,18 @@ && reload_completed" [(set (match_dup 0) (float:X87MODEF (match_dup 1)))]) +(define_expand "floatuns2" + [(set (match_operand:MODEF 0 "register_operand") + (unsigned_float:MODEF + (match_operand:SWI12 1 "nonimmediate_operand")))] + "!TARGET_64BIT + && SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH" +{ + operands[1] = convert_to_mode (SImode, operands[1], 1); + emit_insn (gen_floatsi2 (operands[0], operands[1])); + DONE; +}) + ;; Avoid store forwarding (partial memory) stall penalty by extending ;; SImode value to DImode through XMM register instead of pushing two ;; SImode values to stack. Note that even !TARGET_INTER_UNIT_MOVES_TO_VEC @@ -12278,11 +12397,33 @@ (set (attr "length") (symbol_ref "TARGET_X32 ? 15 : 16"))]) +(define_insn "*tls_global_dynamic_64_largepic" + [(set (match_operand:DI 0 "register_operand" "=a") + (call:DI + (mem:QI (plus:DI (match_operand:DI 2 "register_operand" "b") + (match_operand:DI 3 "immediate_operand" "i"))) + (match_operand 4))) + (unspec:DI [(match_operand 1 "tls_symbolic_operand")] + UNSPEC_TLS_GD)] + "TARGET_64BIT && ix86_cmodel == CM_LARGE_PIC && !TARGET_PECOFF + && GET_CODE (operands[3]) == CONST + && GET_CODE (XEXP (operands[3], 0)) == UNSPEC + && XINT (XEXP (operands[3], 0), 1) == UNSPEC_PLTOFF" +{ + output_asm_insn + ("lea{q}\t{%E1@tlsgd(%%rip), %%rdi|rdi, %E1@tlsgd[rip]}", operands); + output_asm_insn ("movabs{q}\t{%3, %%rax|rax, %3}", operands); + output_asm_insn ("add{q}\t{%2, %%rax|rax, %2}", operands); + return "call\t{*%%rax|rax}"; +} + [(set_attr "type" "multi") + (set_attr "length" "22")]) + (define_expand "tls_global_dynamic_64_" [(parallel [(set (match_operand:P 0 "register_operand") (call:P - (mem:QI (match_operand 2 "constant_call_address_operand")) + (mem:QI (match_operand 2)) (const_int 0))) (unspec:P [(match_operand 1 "tls_symbolic_operand")] UNSPEC_TLS_GD)])] @@ -12340,11 +12481,32 @@ [(set_attr "type" "multi") (set_attr "length" "12")]) +(define_insn "*tls_local_dynamic_base_64_largepic" + [(set (match_operand:DI 0 "register_operand" "=a") + (call:DI + (mem:QI (plus:DI (match_operand:DI 1 "register_operand" "b") + (match_operand:DI 2 "immediate_operand" "i"))) + (match_operand 3))) + (unspec:DI [(const_int 0)] UNSPEC_TLS_LD_BASE)] + "TARGET_64BIT && ix86_cmodel == CM_LARGE_PIC && !TARGET_PECOFF + && GET_CODE (operands[2]) == CONST + && GET_CODE (XEXP (operands[2], 0)) == UNSPEC + && XINT (XEXP (operands[2], 0), 1) == UNSPEC_PLTOFF" +{ + output_asm_insn + ("lea{q}\t{%&@tlsld(%%rip), %%rdi|rdi, %&@tlsld[rip]}", operands); + output_asm_insn ("movabs{q}\t{%2, %%rax|rax, %2}", operands); + output_asm_insn ("add{q}\t{%1, %%rax|rax, %1}", operands); + return "call\t{*%%rax|rax}"; +} + [(set_attr "type" "multi") + (set_attr "length" "22")]) + (define_expand "tls_local_dynamic_base_64_" [(parallel [(set (match_operand:P 0 "register_operand") (call:P - (mem:QI (match_operand 1 "constant_call_address_operand")) + (mem:QI (match_operand 1)) (const_int 0))) (unspec:P [(const_int 0)] UNSPEC_TLS_LD_BASE)])] "TARGET_64BIT") @@ -12629,10 +12791,10 @@ (set_attr "mode" "")]) (define_insn "*fop__comm_sse" - [(set (match_operand:MODEF 0 "register_operand" "=x,x") + [(set (match_operand:MODEF 0 "register_operand" "=x,v") (match_operator:MODEF 3 "binary_fp_operator" - [(match_operand:MODEF 1 "nonimmediate_operand" "%0,x") - (match_operand:MODEF 2 "nonimmediate_operand" "xm,xm")]))] + [(match_operand:MODEF 1 "nonimmediate_operand" "%0,v") + (match_operand:MODEF 2 "nonimmediate_operand" "xm,vm")]))] "SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH && COMMUTATIVE_ARITH_P (operands[3]) && !(MEM_P (operands[1]) && MEM_P (operands[2]))" @@ -14884,7 +15046,7 @@ [(use (match_operand:SI 0 "register_operand")) (use (match_operand:XF 1 "register_operand"))] "TARGET_USE_FANCY_MATH_387 - && TARGET_C99_FUNCTIONS" + && ix86_libc_has_function (function_c99_misc)" { rtx mask = GEN_INT (0x45); rtx val = GEN_INT (0x05); @@ -14910,7 +15072,7 @@ [(use (match_operand:SI 0 "register_operand")) (use (match_operand:MODEF 1 "nonimmediate_operand"))] "TARGET_USE_FANCY_MATH_387 - && TARGET_C99_FUNCTIONS + && ix86_libc_has_function (function_c99_misc) && !(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)" { rtx mask = GEN_INT (0x45); @@ -15977,10 +16139,10 @@ ;; are undefined in this condition, we're certain this is correct. (define_insn "3" - [(set (match_operand:MODEF 0 "register_operand" "=x,x") + [(set (match_operand:MODEF 0 "register_operand" "=x,v") (smaxmin:MODEF - (match_operand:MODEF 1 "nonimmediate_operand" "%0,x") - (match_operand:MODEF 2 "nonimmediate_operand" "xm,xm")))] + (match_operand:MODEF 1 "nonimmediate_operand" "%0,v") + (match_operand:MODEF 2 "nonimmediate_operand" "xm,vm")))] "SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH" "@ \t{%2, %0|%0, %2} diff --git a/gcc/config/i386/i386.opt b/gcc/config/i386/i386.opt index 9fbf5451e9c..5495c295f57 100644 --- a/gcc/config/i386/i386.opt +++ b/gcc/config/i386/i386.opt @@ -316,6 +316,14 @@ mstack-arg-probe Target Report Mask(STACK_PROBE) Save Enable stack probing +mmemcpy-strategy= +Target RejectNegative Joined Var(ix86_tune_memcpy_strategy) +Specify memcpy expansion strategy when expected size is known + +mmemset-strategy= +Target RejectNegative Joined Var(ix86_tune_memset_strategy) +Specify memset expansion strategy when expected size is known + mstringop-strategy= Target RejectNegative Joined Enum(stringop_alg) Var(ix86_stringop_alg) Init(no_stringop) Chose strategy to generate stringop using @@ -370,6 +378,17 @@ mtune= Target RejectNegative Joined Var(ix86_tune_string) Schedule code for given CPU +mtune-ctrl= +Target RejectNegative Joined Var(ix86_tune_ctrl_string) +Fine grain control of tune features + +mno-default +Target RejectNegative Var(ix86_tune_no_default) Init(0) +Clear all tune features + +mdump-tune-features +Target RejectNegative Var(ix86_dump_tunes) Init(0) + mabi= Target RejectNegative Joined Var(ix86_abi) Enum(calling_abi) Init(SYSV_ABI) Generate code that conforms to the given ABI @@ -498,6 +517,22 @@ mavx2 Target Report Mask(ISA_AVX2) Var(ix86_isa_flags) Save Support MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX and AVX2 built-in functions and code generation +mavx512f +Target Report Mask(ISA_AVX512F) Var(ix86_isa_flags) Save +Support MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX, AVX2 and AVX512F built-in functions and code generation + +mavx512pf +Target Report Mask(ISA_AVX512PF) Var(ix86_isa_flags) Save +Support MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX, AVX2 and AVX512F and AVX512PF built-in functions and code generation + +mavx512er +Target Report Mask(ISA_AVX512ER) Var(ix86_isa_flags) Save +Support MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX, AVX2 and AVX512F and AVX512ER built-in functions and code generation + +mavx512cd +Target Report Mask(ISA_AVX512CD) Var(ix86_isa_flags) Save +Support MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX, AVX2 and AVX512F and AVX512CD built-in functions and code generation + mfma Target Report Mask(ISA_FMA) Var(ix86_isa_flags) Save Support MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX and FMA built-in functions and code generation diff --git a/gcc/config/i386/linux-common.h b/gcc/config/i386/linux-common.h index 1e8bf6b2dc0..52f0baf202e 100644 --- a/gcc/config/i386/linux-common.h +++ b/gcc/config/i386/linux-common.h @@ -40,7 +40,7 @@ along with GCC; see the file COPYING3. If not see #undef LIB_SPEC #define LIB_SPEC \ LINUX_OR_ANDROID_LD (GNU_USER_TARGET_LIB_SPEC, \ - GNU_USER_TARGET_LIB_SPEC " " ANDROID_LIB_SPEC) + GNU_USER_TARGET_NO_PTHREADS_LIB_SPEC " " ANDROID_LIB_SPEC) #undef STARTFILE_SPEC #define STARTFILE_SPEC \ diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md index 12c062687c0..17e24999258 100644 --- a/gcc/config/i386/mmx.md +++ b/gcc/config/i386/mmx.md @@ -78,9 +78,9 @@ (define_insn "*mov_internal" [(set (match_operand:MMXMODE 0 "nonimmediate_operand" - "=r ,o ,r,r ,m ,?!y,!y,?!y,m ,r ,?!Ym,x,x,x,m,*x,*x,*x,m ,r ,Yi,!Ym,*Yi") + "=r ,o ,r,r ,m ,?!y,!y,?!y,m ,r ,?!Ym,v,v,v,m,*x,*x,*x,m ,r ,Yi,!Ym,*Yi") (match_operand:MMXMODE 1 "vector_move_operand" - "rCo,rC,C,rm,rC,C ,!y,m ,?!y,?!Yn,r ,C,x,m,x,C ,*x,m ,*x,Yj,r ,*Yj,!Yn"))] + "rCo,rC,C,rm,rC,C ,!y,m ,?!y,?!Yn,r ,C,v,m,v,C ,*x,m ,*x,Yj,r ,*Yj,!Yn"))] "TARGET_MMX && !(MEM_P (operands[0]) && MEM_P (operands[1]))" { @@ -128,6 +128,9 @@ case MODE_TI: return "%vmovdqa\t{%1, %0|%0, %1}"; + case MODE_XI: + return "vmovdqa64\t{%g1, %g0|%g0, %g1}"; + case MODE_V2SF: if (TARGET_AVX && REG_P (operands[0])) return "vmovlps\t{%1, %0, %0|%0, %0, %1}"; @@ -182,7 +185,10 @@ (cond [(eq_attr "alternative" "2") (const_string "SI") (eq_attr "alternative" "11,12,15,16") - (cond [(match_test "mode == V2SFmode") + (cond [(ior (match_operand 0 "ext_sse_reg_operand") + (match_operand 1 "ext_sse_reg_operand")) + (const_string "XI") + (match_test "mode == V2SFmode") (const_string "V4SF") (ior (not (match_test "TARGET_SSE2")) (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")) diff --git a/gcc/config/i386/predicates.md b/gcc/config/i386/predicates.md index b64ef6999ee..3959c3892e4 100644 --- a/gcc/config/i386/predicates.md +++ b/gcc/config/i386/predicates.md @@ -47,6 +47,12 @@ (and (match_code "reg") (match_test "SSE_REGNO_P (REGNO (op))"))) +;; True if the operand is an AVX-512 new register. +(define_predicate "ext_sse_reg_operand" + (and (match_code "reg") + (match_test "EXT_REX_SSE_REGNO_P (REGNO (op))"))) + + ;; True if the operand is a Q_REGS class register. (define_predicate "q_regs_operand" (match_operand 0 "register_operand") diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index 439749877f2..9d9469e2c62 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -96,7 +96,7 @@ ]) ;; All vector modes including V?TImode, used in move patterns. -(define_mode_iterator V16 +(define_mode_iterator VMOVE [(V32QI "TARGET_AVX") V16QI (V16HI "TARGET_AVX") V8HI (V8SI "TARGET_AVX") V4SI @@ -244,6 +244,13 @@ (V4SI "vec") (V8SI "avx2") (V2DI "vec") (V4DI "avx2")]) +(define_mode_attr shuffletype + [(V16SF "f") (V16SI "i") (V8DF "f") (V8DI "i") + (V8SF "f") (V8SI "i") (V4DF "f") (V4DI "i") + (V4SF "f") (V4SI "i") (V2DF "f") (V2DI "i") + (V32QI "i") (V16HI "u") (V16QI "i") (V8HI "i") + (V64QI "i") (V1TI "i") (V2TI "i")]) + (define_mode_attr ssedoublemode [(V16HI "V16SI") (V8HI "V8SI") (V4HI "V4SI") (V32QI "V32HI") (V16QI "V16HI")]) @@ -301,8 +308,10 @@ ;; SSE instruction mode (define_mode_attr sseinsnmode - [(V32QI "OI") (V16HI "OI") (V8SI "OI") (V4DI "OI") (V2TI "OI") + [(V64QI "XI") (V32HI "XI") (V16SI "XI") (V8DI "XI") + (V32QI "OI") (V16HI "OI") (V8SI "OI") (V4DI "OI") (V2TI "OI") (V16QI "TI") (V8HI "TI") (V4SI "TI") (V2DI "TI") (V1TI "TI") + (V16SF "V16SF") (V8DF "V8DF") (V8SF "V8SF") (V4DF "V4DF") (V4SF "V4SF") (V2DF "V2DF") (TI "TI")]) @@ -435,8 +444,8 @@ ;; This is essential for maintaining stable calling conventions. (define_expand "mov" - [(set (match_operand:V16 0 "nonimmediate_operand") - (match_operand:V16 1 "nonimmediate_operand"))] + [(set (match_operand:VMOVE 0 "nonimmediate_operand") + (match_operand:VMOVE 1 "nonimmediate_operand"))] "TARGET_SSE" { ix86_expand_vector_move (mode, operands); @@ -444,20 +453,64 @@ }) (define_insn "*mov_internal" - [(set (match_operand:V16 0 "nonimmediate_operand" "=x,x ,m") - (match_operand:V16 1 "nonimmediate_or_sse_const_operand" "C ,xm,x"))] + [(set (match_operand:VMOVE 0 "nonimmediate_operand" "=v,v ,m") + (match_operand:VMOVE 1 "nonimmediate_or_sse_const_operand" "C ,vm,v"))] "TARGET_SSE && (register_operand (operands[0], mode) || register_operand (operands[1], mode))" { + int mode = get_attr_mode (insn); switch (which_alternative) { case 0: return standard_sse_constant_opcode (insn, operands[1]); case 1: case 2: - switch (get_attr_mode (insn)) + /* There is no evex-encoded vmov* for sizes smaller than 64-bytes + in avx512f, so we need to use workarounds, to access sse registers + 16-31, which are evex-only. */ + if (TARGET_AVX512F && GET_MODE_SIZE (mode) < 64 + && (EXT_REX_SSE_REGNO_P (REGNO (operands[0])) + || EXT_REX_SSE_REGNO_P (REGNO (operands[1])))) { + if (memory_operand (operands[0], mode)) + { + if (GET_MODE_SIZE (mode) == 32) + return "vextract64x4\t{$0x0, %g1, %0|%0, %g1, 0x0}"; + else if (GET_MODE_SIZE (mode) == 16) + return "vextract32x4\t{$0x0, %g1, %0|%0, %g1, 0x0}"; + else + gcc_unreachable (); + } + else if (memory_operand (operands[1], mode)) + { + if (GET_MODE_SIZE (mode) == 32) + return "vbroadcast64x4\t{%1, %g0|%g0, %1}"; + else if (GET_MODE_SIZE (mode) == 16) + return "vbroadcast32x4\t{%1, %g0|%g0, %1}"; + else + gcc_unreachable (); + } + else + /* Reg -> reg move is always aligned. Just use wider move. */ + switch (mode) + { + case MODE_V8SF: + case MODE_V4SF: + return "vmovaps\t{%g1, %g0|%g0, %g1}"; + case MODE_V4DF: + case MODE_V2DF: + return "vmovapd\t{%g1, %g0|%g0, %g1}"; + case MODE_OI: + case MODE_TI: + return "vmovdqa64\t{%g1, %g0|%g0, %g1}"; + default: + gcc_unreachable (); + } + } + switch (mode) + { + case MODE_V16SF: case MODE_V8SF: case MODE_V4SF: if (TARGET_AVX @@ -467,6 +520,7 @@ else return "%vmovaps\t{%1, %0|%0, %1}"; + case MODE_V8DF: case MODE_V4DF: case MODE_V2DF: if (TARGET_AVX @@ -484,6 +538,12 @@ return "vmovdqu\t{%1, %0|%0, %1}"; else return "%vmovdqa\t{%1, %0|%0, %1}"; + case MODE_XI: + if (misaligned_operand (operands[0], mode) + || misaligned_operand (operands[1], mode)) + return "vmovdqu64\t{%1, %0|%0, %1}"; + else + return "vmovdqa64\t{%1, %0|%0, %1}"; default: gcc_unreachable (); @@ -586,7 +646,7 @@ }) (define_expand "push1" - [(match_operand:V16 0 "register_operand")] + [(match_operand:VMOVE 0 "register_operand")] "TARGET_SSE" { ix86_expand_push (mode, operands[0]); @@ -594,8 +654,8 @@ }) (define_expand "movmisalign" - [(set (match_operand:V16 0 "nonimmediate_operand") - (match_operand:V16 1 "nonimmediate_operand"))] + [(set (match_operand:VMOVE 0 "nonimmediate_operand") + (match_operand:VMOVE 1 "nonimmediate_operand"))] "TARGET_SSE" { ix86_expand_vector_move_misalign (mode, operands); @@ -603,7 +663,7 @@ }) (define_insn "_loadu" - [(set (match_operand:VF 0 "register_operand" "=x") + [(set (match_operand:VF 0 "register_operand" "=v") (unspec:VF [(match_operand:VF 1 "memory_operand" "m")] UNSPEC_LOADU))] @@ -662,7 +722,7 @@ (const_string "")))]) (define_insn "_loaddqu" - [(set (match_operand:VI1 0 "register_operand" "=x") + [(set (match_operand:VI1 0 "register_operand" "=v") (unspec:VI1 [(match_operand:VI1 1 "memory_operand" "m")] UNSPEC_LOADU))] "TARGET_SSE2" @@ -696,7 +756,7 @@ (define_insn "_storedqu" [(set (match_operand:VI1 0 "memory_operand" "=m") - (unspec:VI1 [(match_operand:VI1 1 "register_operand" "x")] + (unspec:VI1 [(match_operand:VI1 1 "register_operand" "v")] UNSPEC_STOREU))] "TARGET_SSE2" { @@ -863,10 +923,10 @@ "ix86_fixup_binary_operands_no_copy (, mode, operands);") (define_insn "*3" - [(set (match_operand:VF 0 "register_operand" "=x,x") + [(set (match_operand:VF 0 "register_operand" "=x,v") (plusminus:VF - (match_operand:VF 1 "nonimmediate_operand" "0,x") - (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))] + (match_operand:VF 1 "nonimmediate_operand" "0,v") + (match_operand:VF 2 "nonimmediate_operand" "xm,vm")))] "TARGET_SSE && ix86_binary_operator_ok (, mode, operands)" "@ \t{%2, %0|%0, %2} @@ -877,11 +937,11 @@ (set_attr "mode" "")]) (define_insn "_vm3" - [(set (match_operand:VF_128 0 "register_operand" "=x,x") + [(set (match_operand:VF_128 0 "register_operand" "=x,v") (vec_merge:VF_128 (plusminus:VF_128 - (match_operand:VF_128 1 "register_operand" "0,x") - (match_operand:VF_128 2 "nonimmediate_operand" "xm,xm")) + (match_operand:VF_128 1 "register_operand" "0,v") + (match_operand:VF_128 2 "nonimmediate_operand" "xm,vm")) (match_dup 1) (const_int 1)))] "TARGET_SSE" @@ -917,11 +977,11 @@ (set_attr "mode" "")]) (define_insn "_vmmul3" - [(set (match_operand:VF_128 0 "register_operand" "=x,x") + [(set (match_operand:VF_128 0 "register_operand" "=x,v") (vec_merge:VF_128 (mult:VF_128 - (match_operand:VF_128 1 "register_operand" "0,x") - (match_operand:VF_128 2 "nonimmediate_operand" "xm,xm")) + (match_operand:VF_128 1 "register_operand" "0,v") + (match_operand:VF_128 2 "nonimmediate_operand" "xm,vm")) (match_dup 1) (const_int 1)))] "TARGET_SSE" @@ -960,10 +1020,10 @@ }) (define_insn "_div3" - [(set (match_operand:VF 0 "register_operand" "=x,x") + [(set (match_operand:VF 0 "register_operand" "=x,v") (div:VF - (match_operand:VF 1 "register_operand" "0,x") - (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))] + (match_operand:VF 1 "register_operand" "0,v") + (match_operand:VF 2 "nonimmediate_operand" "xm,vm")))] "TARGET_SSE" "@ div\t{%2, %0|%0, %2} @@ -974,11 +1034,11 @@ (set_attr "mode" "")]) (define_insn "_vmdiv3" - [(set (match_operand:VF_128 0 "register_operand" "=x,x") + [(set (match_operand:VF_128 0 "register_operand" "=x,v") (vec_merge:VF_128 (div:VF_128 - (match_operand:VF_128 1 "register_operand" "0,x") - (match_operand:VF_128 2 "nonimmediate_operand" "xm,xm")) + (match_operand:VF_128 1 "register_operand" "0,v") + (match_operand:VF_128 2 "nonimmediate_operand" "xm,vm")) (match_dup 1) (const_int 1)))] "TARGET_SSE" @@ -1043,8 +1103,8 @@ }) (define_insn "_sqrt2" - [(set (match_operand:VF 0 "register_operand" "=x") - (sqrt:VF (match_operand:VF 1 "nonimmediate_operand" "xm")))] + [(set (match_operand:VF 0 "register_operand" "=v") + (sqrt:VF (match_operand:VF 1 "nonimmediate_operand" "vm")))] "TARGET_SSE" "%vsqrt\t{%1, %0|%0, %1}" [(set_attr "type" "sse") @@ -1054,11 +1114,11 @@ (set_attr "mode" "")]) (define_insn "_vmsqrt2" - [(set (match_operand:VF_128 0 "register_operand" "=x,x") + [(set (match_operand:VF_128 0 "register_operand" "=x,v") (vec_merge:VF_128 (sqrt:VF_128 - (match_operand:VF_128 1 "nonimmediate_operand" "xm,xm")) - (match_operand:VF_128 2 "register_operand" "0,x") + (match_operand:VF_128 1 "nonimmediate_operand" "xm,vm")) + (match_operand:VF_128 2 "register_operand" "0,v") (const_int 1)))] "TARGET_SSE" "@ @@ -1124,10 +1184,10 @@ }) (define_insn "*3_finite" - [(set (match_operand:VF 0 "register_operand" "=x,x") + [(set (match_operand:VF 0 "register_operand" "=x,v") (smaxmin:VF - (match_operand:VF 1 "nonimmediate_operand" "%0,x") - (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))] + (match_operand:VF 1 "nonimmediate_operand" "%0,v") + (match_operand:VF 2 "nonimmediate_operand" "xm,vm")))] "TARGET_SSE && flag_finite_math_only && ix86_binary_operator_ok (, mode, operands)" "@ @@ -1140,10 +1200,10 @@ (set_attr "mode" "")]) (define_insn "*3" - [(set (match_operand:VF 0 "register_operand" "=x,x") + [(set (match_operand:VF 0 "register_operand" "=x,v") (smaxmin:VF - (match_operand:VF 1 "register_operand" "0,x") - (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))] + (match_operand:VF 1 "register_operand" "0,v") + (match_operand:VF 2 "nonimmediate_operand" "xm,vm")))] "TARGET_SSE && !flag_finite_math_only" "@ \t{%2, %0|%0, %2} @@ -1155,11 +1215,11 @@ (set_attr "mode" "")]) (define_insn "_vm3" - [(set (match_operand:VF_128 0 "register_operand" "=x,x") + [(set (match_operand:VF_128 0 "register_operand" "=x,v") (vec_merge:VF_128 (smaxmin:VF_128 - (match_operand:VF_128 1 "register_operand" "0,x") - (match_operand:VF_128 2 "nonimmediate_operand" "xm,xm")) + (match_operand:VF_128 1 "register_operand" "0,v") + (match_operand:VF_128 2 "nonimmediate_operand" "xm,vm")) (match_dup 1) (const_int 1)))] "TARGET_SSE" @@ -1790,10 +1850,10 @@ "ix86_fixup_binary_operands_no_copy (, mode, operands);") (define_insn "*3" - [(set (match_operand:VF 0 "register_operand" "=x,x") + [(set (match_operand:VF 0 "register_operand" "=x,v") (any_logic:VF - (match_operand:VF 1 "nonimmediate_operand" "%0,x") - (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))] + (match_operand:VF 1 "nonimmediate_operand" "%0,v") + (match_operand:VF 2 "nonimmediate_operand" "xm,vm")))] "TARGET_SSE && ix86_binary_operator_ok (, mode, operands)" { static char buf[32]; @@ -2101,11 +2161,11 @@ "TARGET_FMA || TARGET_FMA4") (define_insn "*fma_fmadd_" - [(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x,x,x") + [(set (match_operand:FMAMODE 0 "register_operand" "=v,v,v,x,x") (fma:FMAMODE - (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0,x, x,x") - (match_operand:FMAMODE 2 "nonimmediate_operand" "xm, x,xm,x,m") - (match_operand:FMAMODE 3 "nonimmediate_operand" " x,xm,0,xm,x")))] + (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0, v, x,x") + (match_operand:FMAMODE 2 "nonimmediate_operand" "vm, v,vm, x,m") + (match_operand:FMAMODE 3 "nonimmediate_operand" " v,vm, 0,xm,x")))] "TARGET_FMA || TARGET_FMA4" "@ vfmadd132\t{%2, %3, %0|%0, %3, %2} @@ -2113,17 +2173,17 @@ vfmadd231\t{%2, %1, %0|%0, %1, %2} vfmadd\t{%3, %2, %1, %0|%0, %1, %2, %3} vfmadd\t{%3, %2, %1, %0|%0, %1, %2, %3}" - [(set_attr "isa" "fma,fma,fma,fma4,fma4") + [(set_attr "isa" "fma_avx512f,fma_avx512f,fma_avx512f,fma4,fma4") (set_attr "type" "ssemuladd") (set_attr "mode" "")]) (define_insn "*fma_fmsub_" - [(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x,x,x") + [(set (match_operand:FMAMODE 0 "register_operand" "=v,v,v,x,x") (fma:FMAMODE - (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0,x, x,x") - (match_operand:FMAMODE 2 "nonimmediate_operand" "xm, x,xm,x,m") + (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0, v, x,x") + (match_operand:FMAMODE 2 "nonimmediate_operand" "vm, v,vm, x,m") (neg:FMAMODE - (match_operand:FMAMODE 3 "nonimmediate_operand" " x,xm,0,xm,x"))))] + (match_operand:FMAMODE 3 "nonimmediate_operand" " v,vm, 0,xm,x"))))] "TARGET_FMA || TARGET_FMA4" "@ vfmsub132\t{%2, %3, %0|%0, %3, %2} @@ -2131,17 +2191,17 @@ vfmsub231\t{%2, %1, %0|%0, %1, %2} vfmsub\t{%3, %2, %1, %0|%0, %1, %2, %3} vfmsub\t{%3, %2, %1, %0|%0, %1, %2, %3}" - [(set_attr "isa" "fma,fma,fma,fma4,fma4") + [(set_attr "isa" "fma_avx512f,fma_avx512f,fma_avx512f,fma4,fma4") (set_attr "type" "ssemuladd") (set_attr "mode" "")]) (define_insn "*fma_fnmadd_" - [(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x,x,x") + [(set (match_operand:FMAMODE 0 "register_operand" "=v,v,v,x,x") (fma:FMAMODE (neg:FMAMODE - (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0,x, x,x")) - (match_operand:FMAMODE 2 "nonimmediate_operand" "xm, x,xm,x,m") - (match_operand:FMAMODE 3 "nonimmediate_operand" " x,xm,0,xm,x")))] + (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0, v, x,x")) + (match_operand:FMAMODE 2 "nonimmediate_operand" "vm, v,vm, x,m") + (match_operand:FMAMODE 3 "nonimmediate_operand" " v,vm, 0,xm,x")))] "TARGET_FMA || TARGET_FMA4" "@ vfnmadd132\t{%2, %3, %0|%0, %3, %2} @@ -2149,18 +2209,18 @@ vfnmadd231\t{%2, %1, %0|%0, %1, %2} vfnmadd\t{%3, %2, %1, %0|%0, %1, %2, %3} vfnmadd\t{%3, %2, %1, %0|%0, %1, %2, %3}" - [(set_attr "isa" "fma,fma,fma,fma4,fma4") + [(set_attr "isa" "fma_avx512f,fma_avx512f,fma_avx512f,fma4,fma4") (set_attr "type" "ssemuladd") (set_attr "mode" "")]) (define_insn "*fma_fnmsub_" - [(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x,x,x") + [(set (match_operand:FMAMODE 0 "register_operand" "=v,v,v,x,x") (fma:FMAMODE (neg:FMAMODE - (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0,x, x,x")) - (match_operand:FMAMODE 2 "nonimmediate_operand" "xm, x,xm,x,m") + (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0, v, x,x")) + (match_operand:FMAMODE 2 "nonimmediate_operand" "vm, v,vm, x,m") (neg:FMAMODE - (match_operand:FMAMODE 3 "nonimmediate_operand" " x,xm,0,xm,x"))))] + (match_operand:FMAMODE 3 "nonimmediate_operand" " v,vm, 0,xm,x"))))] "TARGET_FMA || TARGET_FMA4" "@ vfnmsub132\t{%2, %3, %0|%0, %3, %2} @@ -2168,7 +2228,7 @@ vfnmsub231\t{%2, %1, %0|%0, %1, %2} vfnmsub\t{%3, %2, %1, %0|%0, %1, %2, %3} vfnmsub\t{%3, %2, %1, %0|%0, %1, %2, %3}" - [(set_attr "isa" "fma,fma,fma,fma4,fma4") + [(set_attr "isa" "fma_avx512f,fma_avx512f,fma_avx512f,fma4,fma4") (set_attr "type" "ssemuladd") (set_attr "mode" "")]) @@ -2193,11 +2253,11 @@ "TARGET_FMA || TARGET_FMA4") (define_insn "*fma_fmaddsub_" - [(set (match_operand:VF 0 "register_operand" "=x,x,x,x,x") + [(set (match_operand:VF 0 "register_operand" "=v,v,v,x,x") (unspec:VF - [(match_operand:VF 1 "nonimmediate_operand" "%0, 0,x, x,x") - (match_operand:VF 2 "nonimmediate_operand" "xm, x,xm,x,m") - (match_operand:VF 3 "nonimmediate_operand" " x,xm,0,xm,x")] + [(match_operand:VF 1 "nonimmediate_operand" "%0, 0, v, x,x") + (match_operand:VF 2 "nonimmediate_operand" "vm, v,vm, x,m") + (match_operand:VF 3 "nonimmediate_operand" " v,vm, 0,xm,x")] UNSPEC_FMADDSUB))] "TARGET_FMA || TARGET_FMA4" "@ @@ -2206,17 +2266,17 @@ vfmaddsub231\t{%2, %1, %0|%0, %1, %2} vfmaddsub\t{%3, %2, %1, %0|%0, %1, %2, %3} vfmaddsub\t{%3, %2, %1, %0|%0, %1, %2, %3}" - [(set_attr "isa" "fma,fma,fma,fma4,fma4") + [(set_attr "isa" "fma_avx512f,fma_avx512f,fma_avx512f,fma4,fma4") (set_attr "type" "ssemuladd") (set_attr "mode" "")]) (define_insn "*fma_fmsubadd_" - [(set (match_operand:VF 0 "register_operand" "=x,x,x,x,x") + [(set (match_operand:VF 0 "register_operand" "=v,v,v,x,x") (unspec:VF - [(match_operand:VF 1 "nonimmediate_operand" "%0, 0,x, x,x") - (match_operand:VF 2 "nonimmediate_operand" "xm, x,xm,x,m") + [(match_operand:VF 1 "nonimmediate_operand" "%0, 0, v, x,x") + (match_operand:VF 2 "nonimmediate_operand" "vm, v,vm, x,m") (neg:VF - (match_operand:VF 3 "nonimmediate_operand" " x,xm,0,xm,x"))] + (match_operand:VF 3 "nonimmediate_operand" " v,vm, 0,xm,x"))] UNSPEC_FMADDSUB))] "TARGET_FMA || TARGET_FMA4" "@ @@ -2225,7 +2285,7 @@ vfmsubadd231\t{%2, %1, %0|%0, %1, %2} vfmsubadd\t{%3, %2, %1, %0|%0, %1, %2, %3} vfmsubadd\t{%3, %2, %1, %0|%0, %1, %2, %3}" - [(set_attr "isa" "fma,fma,fma,fma4,fma4") + [(set_attr "isa" "fma_avx512f,fma_avx512f,fma_avx512f,fma4,fma4") (set_attr "type" "ssemuladd") (set_attr "mode" "")]) @@ -2244,12 +2304,12 @@ "TARGET_FMA") (define_insn "*fmai_fmadd_" - [(set (match_operand:VF_128 0 "register_operand" "=x,x") + [(set (match_operand:VF_128 0 "register_operand" "=v,v") (vec_merge:VF_128 (fma:VF_128 (match_operand:VF_128 1 "nonimmediate_operand" " 0, 0") - (match_operand:VF_128 2 "nonimmediate_operand" "xm, x") - (match_operand:VF_128 3 "nonimmediate_operand" " x,xm")) + (match_operand:VF_128 2 "nonimmediate_operand" "vm, v") + (match_operand:VF_128 3 "nonimmediate_operand" " v,vm")) (match_dup 1) (const_int 1)))] "TARGET_FMA" @@ -2260,13 +2320,13 @@ (set_attr "mode" "")]) (define_insn "*fmai_fmsub_" - [(set (match_operand:VF_128 0 "register_operand" "=x,x") + [(set (match_operand:VF_128 0 "register_operand" "=v,v") (vec_merge:VF_128 (fma:VF_128 (match_operand:VF_128 1 "nonimmediate_operand" " 0, 0") - (match_operand:VF_128 2 "nonimmediate_operand" "xm, x") + (match_operand:VF_128 2 "nonimmediate_operand" "vm, v") (neg:VF_128 - (match_operand:VF_128 3 "nonimmediate_operand" " x,xm"))) + (match_operand:VF_128 3 "nonimmediate_operand" " v,vm"))) (match_dup 1) (const_int 1)))] "TARGET_FMA" @@ -2277,13 +2337,13 @@ (set_attr "mode" "")]) (define_insn "*fmai_fnmadd_" - [(set (match_operand:VF_128 0 "register_operand" "=x,x") + [(set (match_operand:VF_128 0 "register_operand" "=v,v") (vec_merge:VF_128 (fma:VF_128 (neg:VF_128 - (match_operand:VF_128 2 "nonimmediate_operand" "xm, x")) + (match_operand:VF_128 2 "nonimmediate_operand" "vm, v")) (match_operand:VF_128 1 "nonimmediate_operand" " 0, 0") - (match_operand:VF_128 3 "nonimmediate_operand" " x,xm")) + (match_operand:VF_128 3 "nonimmediate_operand" " v,vm")) (match_dup 1) (const_int 1)))] "TARGET_FMA" @@ -2294,14 +2354,14 @@ (set_attr "mode" "")]) (define_insn "*fmai_fnmsub_" - [(set (match_operand:VF_128 0 "register_operand" "=x,x") + [(set (match_operand:VF_128 0 "register_operand" "=v,v") (vec_merge:VF_128 (fma:VF_128 (neg:VF_128 - (match_operand:VF_128 2 "nonimmediate_operand" "xm, x")) + (match_operand:VF_128 2 "nonimmediate_operand" "vm, v")) (match_operand:VF_128 1 "nonimmediate_operand" " 0, 0") (neg:VF_128 - (match_operand:VF_128 3 "nonimmediate_operand" " x,xm"))) + (match_operand:VF_128 3 "nonimmediate_operand" " v,vm"))) (match_dup 1) (const_int 1)))] "TARGET_FMA" @@ -2429,11 +2489,11 @@ (set_attr "mode" "SF")]) (define_insn "sse_cvtsi2ss" - [(set (match_operand:V4SF 0 "register_operand" "=x,x,x") + [(set (match_operand:V4SF 0 "register_operand" "=x,x,v") (vec_merge:V4SF (vec_duplicate:V4SF (float:SF (match_operand:SI 2 "nonimmediate_operand" "r,m,rm"))) - (match_operand:V4SF 1 "register_operand" "0,0,x") + (match_operand:V4SF 1 "register_operand" "0,0,v") (const_int 1)))] "TARGET_SSE" "@ @@ -2450,11 +2510,11 @@ (set_attr "mode" "SF")]) (define_insn "sse_cvtsi2ssq" - [(set (match_operand:V4SF 0 "register_operand" "=x,x,x") + [(set (match_operand:V4SF 0 "register_operand" "=x,x,v") (vec_merge:V4SF (vec_duplicate:V4SF (float:SF (match_operand:DI 2 "nonimmediate_operand" "r,m,rm"))) - (match_operand:V4SF 1 "register_operand" "0,0,x") + (match_operand:V4SF 1 "register_operand" "0,0,v") (const_int 1)))] "TARGET_SSE && TARGET_64BIT" "@ @@ -2476,7 +2536,7 @@ [(set (match_operand:SI 0 "register_operand" "=r,r") (unspec:SI [(vec_select:SF - (match_operand:V4SF 1 "nonimmediate_operand" "x,m") + (match_operand:V4SF 1 "nonimmediate_operand" "v,m") (parallel [(const_int 0)]))] UNSPEC_FIX_NOTRUNC))] "TARGET_SSE" @@ -2490,7 +2550,7 @@ (define_insn "sse_cvtss2si_2" [(set (match_operand:SI 0 "register_operand" "=r,r") - (unspec:SI [(match_operand:SF 1 "nonimmediate_operand" "x,m")] + (unspec:SI [(match_operand:SF 1 "nonimmediate_operand" "v,m")] UNSPEC_FIX_NOTRUNC))] "TARGET_SSE" "%vcvtss2si\t{%1, %0|%0, %k1}" @@ -2506,7 +2566,7 @@ [(set (match_operand:DI 0 "register_operand" "=r,r") (unspec:DI [(vec_select:SF - (match_operand:V4SF 1 "nonimmediate_operand" "x,m") + (match_operand:V4SF 1 "nonimmediate_operand" "v,m") (parallel [(const_int 0)]))] UNSPEC_FIX_NOTRUNC))] "TARGET_SSE && TARGET_64BIT" @@ -2536,7 +2596,7 @@ [(set (match_operand:SI 0 "register_operand" "=r,r") (fix:SI (vec_select:SF - (match_operand:V4SF 1 "nonimmediate_operand" "x,m") + (match_operand:V4SF 1 "nonimmediate_operand" "v,m") (parallel [(const_int 0)]))))] "TARGET_SSE" "%vcvttss2si\t{%1, %0|%0, %k1}" @@ -2552,7 +2612,7 @@ [(set (match_operand:DI 0 "register_operand" "=r,r") (fix:DI (vec_select:SF - (match_operand:V4SF 1 "nonimmediate_operand" "x,m") + (match_operand:V4SF 1 "nonimmediate_operand" "v,m") (parallel [(const_int 0)]))))] "TARGET_SSE && TARGET_64BIT" "%vcvttss2si{q}\t{%1, %0|%0, %k1}" @@ -2565,9 +2625,9 @@ (set_attr "mode" "DI")]) (define_insn "float2" - [(set (match_operand:VF1 0 "register_operand" "=x") + [(set (match_operand:VF1 0 "register_operand" "=v") (float:VF1 - (match_operand: 1 "nonimmediate_operand" "xm")))] + (match_operand: 1 "nonimmediate_operand" "vm")))] "TARGET_SSE2" "%vcvtdq2ps\t{%1, %0|%0, %1}" [(set_attr "type" "ssecvt") @@ -2752,7 +2812,7 @@ (define_insn "sse2_cvtsd2si_2" [(set (match_operand:SI 0 "register_operand" "=r,r") - (unspec:SI [(match_operand:DF 1 "nonimmediate_operand" "x,m")] + (unspec:SI [(match_operand:DF 1 "nonimmediate_operand" "v,m")] UNSPEC_FIX_NOTRUNC))] "TARGET_SSE2" "%vcvtsd2si\t{%1, %0|%0, %q1}" @@ -2782,7 +2842,7 @@ (define_insn "sse2_cvtsd2siq_2" [(set (match_operand:DI 0 "register_operand" "=r,r") - (unspec:DI [(match_operand:DF 1 "nonimmediate_operand" "x,m")] + (unspec:DI [(match_operand:DF 1 "nonimmediate_operand" "v,m")] UNSPEC_FIX_NOTRUNC))] "TARGET_SSE2 && TARGET_64BIT" "%vcvtsd2si{q}\t{%1, %0|%0, %q1}" @@ -2981,12 +3041,12 @@ (set_attr "mode" "TI")]) (define_insn "sse2_cvtsd2ss" - [(set (match_operand:V4SF 0 "register_operand" "=x,x,x") + [(set (match_operand:V4SF 0 "register_operand" "=x,x,v") (vec_merge:V4SF (vec_duplicate:V4SF (float_truncate:V2SF - (match_operand:V2DF 2 "nonimmediate_operand" "x,m,xm"))) - (match_operand:V4SF 1 "register_operand" "0,0,x") + (match_operand:V2DF 2 "nonimmediate_operand" "x,m,vm"))) + (match_operand:V4SF 1 "register_operand" "0,0,v") (const_int 1)))] "TARGET_SSE2" "@ @@ -3003,13 +3063,13 @@ (set_attr "mode" "SF")]) (define_insn "sse2_cvtss2sd" - [(set (match_operand:V2DF 0 "register_operand" "=x,x,x") + [(set (match_operand:V2DF 0 "register_operand" "=x,x,v") (vec_merge:V2DF (float_extend:V2DF (vec_select:V2SF - (match_operand:V4SF 2 "nonimmediate_operand" "x,m,xm") + (match_operand:V4SF 2 "nonimmediate_operand" "x,m,vm") (parallel [(const_int 0) (const_int 1)]))) - (match_operand:V2DF 1 "register_operand" "0,0,x") + (match_operand:V2DF 1 "register_operand" "0,0,v") (const_int 1)))] "TARGET_SSE2" "@ @@ -5243,10 +5303,10 @@ "ix86_fixup_binary_operands_no_copy (, mode, operands);") (define_insn "*3" - [(set (match_operand:VI_AVX2 0 "register_operand" "=x,x") + [(set (match_operand:VI_AVX2 0 "register_operand" "=x,v") (plusminus:VI_AVX2 - (match_operand:VI_AVX2 1 "nonimmediate_operand" "0,x") - (match_operand:VI_AVX2 2 "nonimmediate_operand" "xm,xm")))] + (match_operand:VI_AVX2 1 "nonimmediate_operand" "0,v") + (match_operand:VI_AVX2 2 "nonimmediate_operand" "xm,vm")))] "TARGET_SSE2 && ix86_binary_operator_ok (, mode, operands)" "@ p\t{%2, %0|%0, %2} @@ -5266,10 +5326,10 @@ "ix86_fixup_binary_operands_no_copy (, mode, operands);") (define_insn "*_3" - [(set (match_operand:VI12_AVX2 0 "register_operand" "=x,x") + [(set (match_operand:VI12_AVX2 0 "register_operand" "=x,v") (sat_plusminus:VI12_AVX2 - (match_operand:VI12_AVX2 1 "nonimmediate_operand" "0,x") - (match_operand:VI12_AVX2 2 "nonimmediate_operand" "xm,xm")))] + (match_operand:VI12_AVX2 1 "nonimmediate_operand" "0,v") + (match_operand:VI12_AVX2 2 "nonimmediate_operand" "xm,vm")))] "TARGET_SSE2 && ix86_binary_operator_ok (, mode, operands)" "@ p\t{%2, %0|%0, %2} @@ -5641,10 +5701,10 @@ }) (define_insn "*_mul3" - [(set (match_operand:VI4_AVX2 0 "register_operand" "=x,x") + [(set (match_operand:VI4_AVX2 0 "register_operand" "=x,v") (mult:VI4_AVX2 - (match_operand:VI4_AVX2 1 "nonimmediate_operand" "%0,x") - (match_operand:VI4_AVX2 2 "nonimmediate_operand" "xm,xm")))] + (match_operand:VI4_AVX2 1 "nonimmediate_operand" "%0,v") + (match_operand:VI4_AVX2 2 "nonimmediate_operand" "xm,vm")))] "TARGET_SSE4_1 && ix86_binary_operator_ok (MULT, mode, operands)" "@ pmulld\t{%2, %0|%0, %2} @@ -5765,9 +5825,9 @@ (set_attr "mode" "")]) (define_insn "3" - [(set (match_operand:VI248_AVX2 0 "register_operand" "=x,x") + [(set (match_operand:VI248_AVX2 0 "register_operand" "=x,v") (any_lshift:VI248_AVX2 - (match_operand:VI248_AVX2 1 "register_operand" "0,x") + (match_operand:VI248_AVX2 1 "register_operand" "0,v") (match_operand:SI 2 "nonmemory_operand" "xN,xN")))] "TARGET_SSE2" "@ @@ -5868,10 +5928,10 @@ "ix86_fixup_binary_operands_no_copy (, mode, operands);") (define_insn "*avx2_3" - [(set (match_operand:VI124_256 0 "register_operand" "=x") + [(set (match_operand:VI124_256 0 "register_operand" "=v") (maxmin:VI124_256 - (match_operand:VI124_256 1 "nonimmediate_operand" "%x") - (match_operand:VI124_256 2 "nonimmediate_operand" "xm")))] + (match_operand:VI124_256 1 "nonimmediate_operand" "%v") + (match_operand:VI124_256 2 "nonimmediate_operand" "vm")))] "TARGET_AVX2 && ix86_binary_operator_ok (, mode, operands)" "vp\t{%2, %1, %0|%0, %1, %2}" [(set_attr "type" "sseiadd") @@ -6345,10 +6405,10 @@ "TARGET_SSE2") (define_insn "*andnot3" - [(set (match_operand:VI 0 "register_operand" "=x,x") + [(set (match_operand:VI 0 "register_operand" "=x,v") (and:VI - (not:VI (match_operand:VI 1 "register_operand" "0,x")) - (match_operand:VI 2 "nonimmediate_operand" "xm,xm")))] + (not:VI (match_operand:VI 1 "register_operand" "0,v")) + (match_operand:VI 2 "nonimmediate_operand" "xm,vm")))] "TARGET_SSE" { static char buf[32]; @@ -6429,10 +6489,10 @@ }) (define_insn "*3" - [(set (match_operand:VI 0 "register_operand" "=x,x") + [(set (match_operand:VI 0 "register_operand" "=x,v") (any_logic:VI - (match_operand:VI 1 "nonimmediate_operand" "%0,x") - (match_operand:VI 2 "nonimmediate_operand" "xm,xm")))] + (match_operand:VI 1 "nonimmediate_operand" "%0,v") + (match_operand:VI 2 "nonimmediate_operand" "xm,vm")))] "TARGET_SSE && ix86_binary_operator_ok (, mode, operands)" { @@ -7731,9 +7791,17 @@ (mem:V16QI (match_dup 0))] UNSPEC_MASKMOV))] "TARGET_SSE2" - "%vmaskmovdqu\t{%2, %1|%1, %2}" +{ + /* We can't use %^ here due to ASM_OUTPUT_OPCODE processing + that requires %v to be at the beginning of the opcode name. */ + if (Pmode != word_mode) + fputs ("\taddr32", asm_out_file); + return "%vmaskmovdqu\t{%2, %1|%1, %2}"; +} [(set_attr "type" "ssemov") (set_attr "prefix_data16" "1") + (set (attr "length_address") + (symbol_ref ("Pmode != word_mode"))) ;; The implicit %rdi operand confuses default length_vex computation. (set (attr "length_vex") (symbol_ref ("3 + REX_SSE_REGNO_P (REGNO (operands[2]))"))) @@ -7781,26 +7849,18 @@ "mwait" [(set_attr "length" "3")]) -(define_insn "sse3_monitor" - [(unspec_volatile [(match_operand:SI 0 "register_operand" "a") - (match_operand:SI 1 "register_operand" "c") - (match_operand:SI 2 "register_operand" "d")] - UNSPECV_MONITOR)] - "TARGET_SSE3 && !TARGET_64BIT" - "monitor\t%0, %1, %2" - [(set_attr "length" "3")]) - -(define_insn "sse3_monitor64_" +(define_insn "sse3_monitor_" [(unspec_volatile [(match_operand:P 0 "register_operand" "a") (match_operand:SI 1 "register_operand" "c") (match_operand:SI 2 "register_operand" "d")] UNSPECV_MONITOR)] - "TARGET_SSE3 && TARGET_64BIT" + "TARGET_SSE3" ;; 64bit version is "monitor %rax,%rcx,%rdx". But only lower 32bits in ;; RCX and RDX are used. Since 32bit register operands are implicitly ;; zero extended to 64bit, we only need to set up 32bit registers. - "monitor" - [(set_attr "length" "3")]) + "%^monitor" + [(set (attr "length") + (symbol_ref ("(Pmode != word_mode) + 3")))]) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; @@ -8368,9 +8428,9 @@ (set_attr "mode" "DI")]) (define_insn "abs2" - [(set (match_operand:VI124_AVX2 0 "register_operand" "=x") + [(set (match_operand:VI124_AVX2 0 "register_operand" "=v") (abs:VI124_AVX2 - (match_operand:VI124_AVX2 1 "nonimmediate_operand" "xm")))] + (match_operand:VI124_AVX2 1 "nonimmediate_operand" "vm")))] "TARGET_SSSE3" "%vpabs\t{%1, %0|%0, %1}" [(set_attr "type" "sselog1") @@ -10481,10 +10541,10 @@ (set_attr "mode" "")]) (define_insn "avx2_permvar" - [(set (match_operand:VI4F_256 0 "register_operand" "=x") + [(set (match_operand:VI4F_256 0 "register_operand" "=v") (unspec:VI4F_256 - [(match_operand:VI4F_256 1 "nonimmediate_operand" "xm") - (match_operand:V8SI 2 "register_operand" "x")] + [(match_operand:VI4F_256 1 "nonimmediate_operand" "vm") + (match_operand:V8SI 2 "register_operand" "v")] UNSPEC_VPERMVAR))] "TARGET_AVX2" "vperm\t{%1, %2, %0|%0, %2, %1}" @@ -10508,9 +10568,9 @@ }) (define_insn "avx2_perm_1" - [(set (match_operand:VI8F_256 0 "register_operand" "=x") + [(set (match_operand:VI8F_256 0 "register_operand" "=v") (vec_select:VI8F_256 - (match_operand:VI8F_256 1 "nonimmediate_operand" "xm") + (match_operand:VI8F_256 1 "nonimmediate_operand" "vm") (parallel [(match_operand 2 "const_0_to_3_operand") (match_operand 3 "const_0_to_3_operand") (match_operand 4 "const_0_to_3_operand") @@ -10735,9 +10795,9 @@ }) (define_insn "*avx_vpermilp" - [(set (match_operand:VF 0 "register_operand" "=x") + [(set (match_operand:VF 0 "register_operand" "=v") (vec_select:VF - (match_operand:VF 1 "nonimmediate_operand" "xm") + (match_operand:VF 1 "nonimmediate_operand" "vm") (match_parallel 2 "" [(match_operand 3 "const_int_operand")])))] "TARGET_AVX @@ -10754,10 +10814,10 @@ (set_attr "mode" "")]) (define_insn "avx_vpermilvar3" - [(set (match_operand:VF 0 "register_operand" "=x") + [(set (match_operand:VF 0 "register_operand" "=v") (unspec:VF - [(match_operand:VF 1 "register_operand" "x") - (match_operand: 2 "nonimmediate_operand" "xm")] + [(match_operand:VF 1 "register_operand" "v") + (match_operand: 2 "nonimmediate_operand" "vm")] UNSPEC_VPERMIL))] "TARGET_AVX" "vpermil\t{%2, %1, %0|%0, %1, %2}" @@ -11149,10 +11209,10 @@ }) (define_insn "avx2_ashrv" - [(set (match_operand:VI4_AVX2 0 "register_operand" "=x") + [(set (match_operand:VI4_AVX2 0 "register_operand" "=v") (ashiftrt:VI4_AVX2 - (match_operand:VI4_AVX2 1 "register_operand" "x") - (match_operand:VI4_AVX2 2 "nonimmediate_operand" "xm")))] + (match_operand:VI4_AVX2 1 "register_operand" "v") + (match_operand:VI4_AVX2 2 "nonimmediate_operand" "vm")))] "TARGET_AVX2" "vpsravd\t{%2, %1, %0|%0, %1, %2}" [(set_attr "type" "sseishft") @@ -11160,10 +11220,10 @@ (set_attr "mode" "")]) (define_insn "avx2_v" - [(set (match_operand:VI48_AVX2 0 "register_operand" "=x") + [(set (match_operand:VI48_AVX2 0 "register_operand" "=v") (any_lshift:VI48_AVX2 - (match_operand:VI48_AVX2 1 "register_operand" "x") - (match_operand:VI48_AVX2 2 "nonimmediate_operand" "xm")))] + (match_operand:VI48_AVX2 1 "register_operand" "v") + (match_operand:VI48_AVX2 2 "nonimmediate_operand" "vm")))] "TARGET_AVX2" "vpv\t{%2, %1, %0|%0, %1, %2}" [(set_attr "type" "sseishft") diff --git a/gcc/config/i386/stringop.def b/gcc/config/i386/stringop.def new file mode 100644 index 00000000000..1a7d1e88f65 --- /dev/null +++ b/gcc/config/i386/stringop.def @@ -0,0 +1,37 @@ +/* Definitions for stringop strategy for IA-32. + Copyright (C) 2013 Free Software Foundation, Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the files COPYING3. If not, +see . */ + +DEF_ENUM +DEF_ALG (no_stringop, no_stringop) +DEF_ENUM +DEF_ALG (libcall, libcall) +DEF_ENUM +DEF_ALG (rep_prefix_1_byte, rep_byte) +DEF_ENUM +DEF_ALG (rep_prefix_4_byte, rep_4byte) +DEF_ENUM +DEF_ALG (rep_prefix_8_byte, rep_8byte) +DEF_ENUM +DEF_ALG (loop_1_byte, byte_loop) +DEF_ENUM +DEF_ALG (loop, loop) +DEF_ENUM +DEF_ALG (unrolled_loop, unrolled_loop) +DEF_ENUM +DEF_ALG (vector_loop, vector_loop) diff --git a/gcc/config/i386/stringop.opt b/gcc/config/i386/stringop.opt new file mode 100644 index 00000000000..5c5fc906a33 --- /dev/null +++ b/gcc/config/i386/stringop.opt @@ -0,0 +1,31 @@ +/* Definitions for stringop option handling for IA-32. + Copyright (C) 2013 Free Software Foundation, Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the files COPYING3. If not, +see . */ + +Enum(stringop_alg) String(rep_byte) Value(rep_prefix_1_byte) + +#undef DEF_ENUM +#define DEF_ENUM EnumValue + +#undef DEF_ALG +#define DEF_ALG(alg, name) Enum(stringop_alg) String(name) Value(alg) + +#include "stringop.def" + +#undef DEF_ENUM +#undef DEF_ALG diff --git a/gcc/config/i386/t-i386 b/gcc/config/i386/t-i386 index 3a77e14f5ca..07624cc575e 100644 --- a/gcc/config/i386/t-i386 +++ b/gcc/config/i386/t-i386 @@ -24,7 +24,7 @@ i386.o: $(CONFIG_H) $(SYSTEM_H) coretypes.h dumpfile.h $(TM_H) \ $(GGC_H) $(TARGET_H) $(TARGET_DEF_H) langhooks.h $(CGRAPH_H) \ $(TREE_GIMPLE_H) $(DWARF2_H) $(DF_H) tm-constrs.h $(PARAMS_H) \ i386-builtin-types.inc debug.h dwarf2out.h sbitmap.h $(FIBHEAP_H) \ - $(OPTS_H) $(DIAGNOSTIC_H) $(COMMON_TARGET_H) + $(OPTS_H) $(DIAGNOSTIC_H) $(COMMON_TARGET_H) $(CONTEXT_H) $(PASS_MANAGER_H) i386-c.o: $(srcdir)/config/i386/i386-c.c \ $(srcdir)/config/i386/i386-protos.h $(CONFIG_H) $(SYSTEM_H) coretypes.h \ diff --git a/gcc/config/i386/x86-64.h b/gcc/config/i386/x86-64.h index 336343927c8..0c62723ae22 100644 --- a/gcc/config/i386/x86-64.h +++ b/gcc/config/i386/x86-64.h @@ -103,3 +103,6 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see #undef TARGET_ASM_UNIQUE_SECTION #define TARGET_ASM_UNIQUE_SECTION x86_64_elf_unique_section + +#undef TARGET_SECTION_TYPE_FLAGS +#define TARGET_SECTION_TYPE_FLAGS x86_64_elf_section_type_flags diff --git a/gcc/config/i386/x86-tune.def b/gcc/config/i386/x86-tune.def new file mode 100644 index 00000000000..e3a34ee7b2e --- /dev/null +++ b/gcc/config/i386/x86-tune.def @@ -0,0 +1,232 @@ +/* Definitions of x86 tunable features. + Copyright (C) 2013 Free Software Foundation, Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +. */ + +/* X86_TUNE_USE_LEAVE: Leave does not affect Nocona SPEC2000 results + negatively, so enabling for Generic64 seems like good code size + tradeoff. We can't enable it for 32bit generic because it does not + work well with PPro base chips. */ +DEF_TUNE (X86_TUNE_USE_LEAVE, "use_leave", + m_386 | m_CORE_ALL | m_K6_GEODE | m_AMD_MULTIPLE | m_GENERIC64) +DEF_TUNE (X86_TUNE_PUSH_MEMORY, "push_memory", + m_386 | m_P4_NOCONA | m_CORE_ALL | m_K6_GEODE | m_AMD_MULTIPLE + | m_GENERIC) +DEF_TUNE (X86_TUNE_ZERO_EXTEND_WITH_AND, "zero_extend_with_and", m_486 | m_PENT) +DEF_TUNE (X86_TUNE_UNROLL_STRLEN, "unroll_strlen", + m_486 | m_PENT | m_PPRO | m_ATOM | m_SLM | m_CORE_ALL | m_K6 + | m_AMD_MULTIPLE | m_GENERIC) +/* X86_TUNE_BRANCH_PREDICTION_HINTS: Branch hints were put in P4 based + on simulation result. But after P4 was made, no performance benefit + was observed with branch hints. It also increases the code size. + As a result, icc never generates branch hints. */ +DEF_TUNE (X86_TUNE_BRANCH_PREDICTION_HINTS, "branch_prediction_hints", 0) +DEF_TUNE (X86_TUNE_DOUBLE_WITH_ADD, "double_with_add", ~m_386) +DEF_TUNE (X86_TUNE_USE_SAHF, "use_sahf", + m_PPRO | m_P4_NOCONA | m_CORE_ALL | m_ATOM | m_SLM | m_K6_GEODE + | m_K8 | m_AMDFAM10 | m_BDVER | m_BTVER | m_GENERIC) +/* X86_TUNE_MOVX: Enable to zero extend integer registers to avoid + partial dependencies. */ +DEF_TUNE (X86_TUNE_MOVX, "movx", + m_PPRO | m_P4_NOCONA | m_CORE_ALL | m_ATOM | m_SLM | m_GEODE + | m_AMD_MULTIPLE | m_GENERIC) +/* X86_TUNE_PARTIAL_REG_STALL: We probably ought to watch for partial + register stalls on Generic32 compilation setting as well. However + in current implementation the partial register stalls are not eliminated + very well - they can be introduced via subregs synthesized by combine + and can happen in caller/callee saving sequences. Because this option + pays back little on PPro based chips and is in conflict with partial reg + dependencies used by Athlon/P4 based chips, it is better to leave it off + for generic32 for now. */ +DEF_TUNE (X86_TUNE_PARTIAL_REG_STALL, "partial_reg_stall", m_PPRO) +DEF_TUNE (X86_TUNE_PARTIAL_FLAG_REG_STALL, "partial_flag_reg_stall", + m_CORE_ALL | m_GENERIC) +/* X86_TUNE_LCP_STALL: Avoid an expensive length-changing prefix stall + * on 16-bit immediate moves into memory on Core2 and Corei7. */ +DEF_TUNE (X86_TUNE_LCP_STALL, "lcp_stall", m_CORE_ALL | m_GENERIC) +DEF_TUNE (X86_TUNE_USE_HIMODE_FIOP, "use_himode_fiop", + m_386 | m_486 | m_K6_GEODE) +DEF_TUNE (X86_TUNE_USE_SIMODE_FIOP, "use_simode_fiop", + ~(m_PENT | m_PPRO | m_CORE_ALL | m_ATOM + | m_SLM | m_AMD_MULTIPLE | m_GENERIC)) +DEF_TUNE (X86_TUNE_USE_MOV0, "use_mov0", m_K6) +DEF_TUNE (X86_TUNE_USE_CLTD, "use_cltd", ~(m_PENT | m_ATOM | m_SLM | m_K6)) +/* X86_TUNE_USE_XCHGB: Use xchgb %rh,%rl instead of rolw/rorw $8,rx. */ +DEF_TUNE (X86_TUNE_USE_XCHGB, "use_xchgb", m_PENT4) +DEF_TUNE (X86_TUNE_SPLIT_LONG_MOVES, "split_long_moves", m_PPRO) +DEF_TUNE (X86_TUNE_READ_MODIFY_WRITE, "read_modify_write", ~m_PENT) +DEF_TUNE (X86_TUNE_READ_MODIFY, "read_modify", ~(m_PENT | m_PPRO)) +DEF_TUNE (X86_TUNE_PROMOTE_QIMODE, "promote_qimode", + m_386 | m_486 | m_PENT | m_CORE_ALL | m_ATOM | m_SLM + | m_K6_GEODE | m_AMD_MULTIPLE | m_GENERIC) +DEF_TUNE (X86_TUNE_FAST_PREFIX, "fast_prefix", ~(m_386 | m_486 | m_PENT)) +DEF_TUNE (X86_TUNE_SINGLE_STRINGOP, "single_stringop", m_386 | m_P4_NOCONA) +DEF_TUNE (X86_TUNE_QIMODE_MATH, "qimode_math", ~0) +/* X86_TUNE_HIMODE_MATH: On PPro this flag is meant to avoid partial + register stalls. Just like X86_TUNE_PARTIAL_REG_STALL this option + might be considered for Generic32 if our scheme for avoiding partial + stalls was more effective. */ +DEF_TUNE (X86_TUNE_HIMODE_MATH, "himode_math", ~m_PPRO) +DEF_TUNE (X86_TUNE_PROMOTE_QI_REGS, "promote_qi_regs", 0) +DEF_TUNE (X86_TUNE_PROMOTE_HI_REGS, "promote_hi_regs", m_PPRO) +/* X86_TUNE_SINGLE_POP: Enable if single pop insn is preferred + over esp addition. */ +DEF_TUNE (X86_TUNE_SINGLE_POP, "single_pop", m_386 | m_486 | m_PENT | m_PPRO) +/* X86_TUNE_DOUBLE_POP: Enable if double pop insn is preferred + over esp addition. */ +DEF_TUNE (X86_TUNE_DOUBLE_POP, "double_pop", m_PENT) +/* X86_TUNE_SINGLE_PUSH: Enable if single push insn is preferred + over esp subtraction. */ +DEF_TUNE (X86_TUNE_SINGLE_PUSH, "single_push", m_386 | m_486 | m_PENT + | m_K6_GEODE) +/* X86_TUNE_DOUBLE_PUSH. Enable if double push insn is preferred + over esp subtraction. */ +DEF_TUNE (X86_TUNE_DOUBLE_PUSH, "double_push", m_PENT | m_K6_GEODE) +/* X86_TUNE_INTEGER_DFMODE_MOVES: Enable if integer moves are preferred + for DFmode copies */ +DEF_TUNE (X86_TUNE_INTEGER_DFMODE_MOVES, "integer_dfmode_moves", + ~(m_PPRO | m_P4_NOCONA | m_CORE_ALL | m_ATOM | m_SLM + | m_GEODE | m_AMD_MULTIPLE | m_GENERIC)) +DEF_TUNE (X86_TUNE_PARTIAL_REG_DEPENDENCY, "partial_reg_dependency", + m_P4_NOCONA | m_CORE_ALL | m_ATOM | m_SLM | m_AMD_MULTIPLE + | m_GENERIC) +/* X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY: In the Generic model we have a + conflict here in between PPro/Pentium4 based chips that thread 128bit + SSE registers as single units versus K8 based chips that divide SSE + registers to two 64bit halves. This knob promotes all store destinations + to be 128bit to allow register renaming on 128bit SSE units, but usually + results in one extra microop on 64bit SSE units. Experimental results + shows that disabling this option on P4 brings over 20% SPECfp regression, + while enabling it on K8 brings roughly 2.4% regression that can be partly + masked by careful scheduling of moves. */ +DEF_TUNE (X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY, "sse_partial_reg_dependency", + m_PPRO | m_P4_NOCONA | m_CORE_ALL | m_ATOM | m_SLM | m_AMDFAM10 + | m_BDVER | m_GENERIC) +DEF_TUNE (X86_TUNE_SSE_UNALIGNED_LOAD_OPTIMAL, "sse_unaligned_load_optimal", + m_COREI7 | m_AMDFAM10 | m_BDVER | m_BTVER | m_SLM) +DEF_TUNE (X86_TUNE_SSE_UNALIGNED_STORE_OPTIMAL, "sse_unaligned_store_optimal", + m_COREI7 | m_BDVER | m_SLM) +DEF_TUNE (X86_TUNE_SSE_PACKED_SINGLE_INSN_OPTIMAL, "sse_packed_single_insn_optimal", + m_BDVER) +/* X86_TUNE_SSE_SPLIT_REGS: Set for machines where the type and dependencies + are resolved on SSE register parts instead of whole registers, so we may + maintain just lower part of scalar values in proper format leaving the + upper part undefined. */ +DEF_TUNE (X86_TUNE_SSE_SPLIT_REGS, "sse_split_regs", m_ATHLON_K8) +DEF_TUNE (X86_TUNE_SSE_TYPELESS_STORES, "sse_typeless_stores", m_AMD_MULTIPLE) +DEF_TUNE (X86_TUNE_SSE_LOAD0_BY_PXOR, "sse_load0_by_pxor", m_PPRO | m_P4_NOCONA) +DEF_TUNE (X86_TUNE_MEMORY_MISMATCH_STALL, "memory_mismatch_stall", + m_P4_NOCONA | m_CORE_ALL | m_ATOM | m_SLM | m_AMD_MULTIPLE | m_GENERIC) +DEF_TUNE (X86_TUNE_PROLOGUE_USING_MOVE, "prologue_using_move", + m_PPRO | m_ATHLON_K8) +DEF_TUNE (X86_TUNE_EPILOGUE_USING_MOVE, "epilogue_using_move", + m_PPRO | m_ATHLON_K8) +DEF_TUNE (X86_TUNE_SHIFT1, "shift1", ~m_486) +DEF_TUNE (X86_TUNE_USE_FFREEP, "use_ffreep", m_AMD_MULTIPLE) +DEF_TUNE (X86_TUNE_INTER_UNIT_MOVES_TO_VEC, "inter_unit_moves_to_vec", + ~(m_AMD_MULTIPLE | m_GENERIC)) +DEF_TUNE (X86_TUNE_INTER_UNIT_MOVES_FROM_VEC, "inter_unit_moves_from_vec", + ~m_ATHLON_K8) +DEF_TUNE (X86_TUNE_INTER_UNIT_CONVERSIONS, "inter_unit_conversions", + ~(m_AMDFAM10 | m_BDVER )) +/* X86_TUNE_FOUR_JUMP_LIMIT: Some CPU cores are not able to predict more + than 4 branch instructions in the 16 byte window. */ +DEF_TUNE (X86_TUNE_FOUR_JUMP_LIMIT, "four_jump_limit", + m_PPRO | m_P4_NOCONA | m_CORE_ALL | m_ATOM | m_SLM| m_AMD_MULTIPLE + | m_GENERIC) +DEF_TUNE (X86_TUNE_SCHEDULE, "schedule", + m_PENT | m_PPRO | m_CORE_ALL | m_ATOM | m_SLM | m_K6_GEODE + | m_AMD_MULTIPLE | m_GENERIC) +DEF_TUNE (X86_TUNE_USE_BT, "use_bt", + m_CORE_ALL | m_ATOM | m_SLM | m_AMD_MULTIPLE | m_GENERIC) +DEF_TUNE (X86_TUNE_USE_INCDEC, "use_incdec", + ~(m_P4_NOCONA | m_CORE_ALL | m_ATOM | m_SLM | m_GENERIC)) +DEF_TUNE (X86_TUNE_PAD_RETURNS, "pad_returns", + m_CORE_ALL | m_AMD_MULTIPLE | m_GENERIC) +DEF_TUNE (X86_TUNE_PAD_SHORT_FUNCTION, "pad_short_function", m_ATOM) +DEF_TUNE (X86_TUNE_EXT_80387_CONSTANTS, "ext_80387_constants", + m_PPRO | m_P4_NOCONA | m_CORE_ALL | m_ATOM | m_SLM | m_K6_GEODE + | m_ATHLON_K8 | m_GENERIC) +DEF_TUNE (X86_TUNE_AVOID_VECTOR_DECODE, "avoid_vector_decode", + m_CORE_ALL | m_K8 | m_GENERIC64) +/* X86_TUNE_PROMOTE_HIMODE_IMUL: Modern CPUs have same latency for HImode + and SImode multiply, but 386 and 486 do HImode multiply faster. */ +DEF_TUNE (X86_TUNE_PROMOTE_HIMODE_IMUL, "promote_himode_imul", + ~(m_386 | m_486)) +/* X86_TUNE_SLOW_IMUL_IMM32_MEM: Imul of 32-bit constant and memory is + vector path on AMD machines. */ +DEF_TUNE (X86_TUNE_SLOW_IMUL_IMM32_MEM, "slow_imul_imm32_mem", + m_CORE_ALL | m_K8 | m_AMDFAM10 | m_BDVER | m_BTVER | m_GENERIC64) +/* X86_TUNE_SLOW_IMUL_IMM8: Imul of 8-bit constant is vector path on AMD + machines. */ +DEF_TUNE (X86_TUNE_SLOW_IMUL_IMM8, "slow_imul_imm8", + m_CORE_ALL | m_K8 | m_AMDFAM10 | m_BDVER | m_BTVER | m_GENERIC64) +/* X86_TUNE_MOVE_M1_VIA_OR: On pentiums, it is faster to load -1 via OR + than a MOV. */ +DEF_TUNE (X86_TUNE_MOVE_M1_VIA_OR, "move_m1_via_or", m_PENT) +/* X86_TUNE_NOT_UNPAIRABLE: NOT is not pairable on Pentium, while XOR is, + but one byte longer. */ +DEF_TUNE (X86_TUNE_NOT_UNPAIRABLE, "not_unpairable", m_PENT) +/* X86_TUNE_NOT_VECTORMODE: On AMD K6, NOT is vector decoded with memory + operand that cannot be represented using a modRM byte. The XOR + replacement is long decoded, so this split helps here as well. */ +DEF_TUNE (X86_TUNE_NOT_VECTORMODE, "not_vectormode", m_K6) +/* X86_TUNE_USE_VECTOR_FP_CONVERTS: Prefer vector packed SSE conversion + from FP to FP. */ +DEF_TUNE (X86_TUNE_USE_VECTOR_FP_CONVERTS, "use_vector_fp_converts", + m_CORE_ALL | m_AMDFAM10 | m_GENERIC) +/* X86_TUNE_USE_VECTOR_CONVERTS: Prefer vector packed SSE conversion + from integer to FP. */ +DEF_TUNE (X86_TUNE_USE_VECTOR_CONVERTS, "use_vector_converts", m_AMDFAM10) +/* X86_TUNE_FUSE_CMP_AND_BRANCH: Fuse a compare or test instruction + with a subsequent conditional jump instruction into a single + compare-and-branch uop. */ +DEF_TUNE (X86_TUNE_FUSE_CMP_AND_BRANCH, "fuse_cmp_and_branch", m_BDVER) +/* X86_TUNE_OPT_AGU: Optimize for Address Generation Unit. This flag + will impact LEA instruction selection. */ +DEF_TUNE (X86_TUNE_OPT_AGU, "opt_agu", m_ATOM | m_SLM) +/* X86_TUNE_VECTORIZE_DOUBLE: Enable double precision vector + instructions. */ +DEF_TUNE (X86_TUNE_VECTORIZE_DOUBLE, "vectorize_double", ~m_ATOM) +/* X86_TUNE_SOFTWARE_PREFETCHING_BENEFICIAL: Enable software prefetching + at -O3. For the moment, the prefetching seems badly tuned for Intel + chips. */ +DEF_TUNE (X86_TUNE_SOFTWARE_PREFETCHING_BENEFICIAL, "software_prefetching_beneficial", + m_K6_GEODE | m_AMD_MULTIPLE) +/* X86_TUNE_AVX128_OPTIMAL: Enable 128-bit AVX instruction generation for + the auto-vectorizer. */ +DEF_TUNE (X86_TUNE_AVX128_OPTIMAL, "avx128_optimal", m_BDVER | m_BTVER2) +/* X86_TUNE_REASSOC_INT_TO_PARALLEL: Try to produce parallel computations + during reassociation of integer computation. */ +DEF_TUNE (X86_TUNE_REASSOC_INT_TO_PARALLEL, "reassoc_int_to_parallel", + m_ATOM) +/* X86_TUNE_REASSOC_FP_TO_PARALLEL: Try to produce parallel computations + during reassociation of fp computation. */ +DEF_TUNE (X86_TUNE_REASSOC_FP_TO_PARALLEL, "reassoc_fp_to_parallel", + m_ATOM | m_SLM | m_HASWELL | m_BDVER1 | m_BDVER2) +/* X86_TUNE_GENERAL_REGS_SSE_SPILL: Try to spill general regs to SSE + regs instead of memory. */ +DEF_TUNE (X86_TUNE_GENERAL_REGS_SSE_SPILL, "general_regs_sse_spill", + m_CORE_ALL) +/* X86_TUNE_AVOID_MEM_OPND_FOR_CMOVE: Try to avoid memory operands for + a conditional move. */ +DEF_TUNE (X86_TUNE_AVOID_MEM_OPND_FOR_CMOVE, "avoid_mem_opnd_for_cmove", m_ATOM) +/* X86_TUNE_SPLIT_MEM_OPND_FOR_FP_CONVERTS: Try to split memory operand for + fp converts to destination register. */ +DEF_TUNE (X86_TUNE_SPLIT_MEM_OPND_FOR_FP_CONVERTS, "split_mem_opnd_for_fp_converts", + m_SLM) diff --git a/gcc/config/ia64/hpux.h b/gcc/config/ia64/hpux.h index 22cfe9f6677..ca592e4bc26 100644 --- a/gcc/config/ia64/hpux.h +++ b/gcc/config/ia64/hpux.h @@ -179,9 +179,10 @@ do { \ #undef TARGET_ASM_RELOC_RW_MASK #define TARGET_ASM_RELOC_RW_MASK ia64_hpux_reloc_rw_mask -/* ia64 HPUX has the float and long double forms of math functions. */ -#undef TARGET_C99_FUNCTIONS -#define TARGET_C99_FUNCTIONS 1 +/* ia64 HPUX has the float and long double forms of math functions. + We redefine this hook so the version from elfos.h header won't be used. */ +#undef TARGET_LIBC_HAS_FUNCTION +#define TARGET_LIBC_HAS_FUNCTION default_c99_libc_has_function #undef TARGET_INIT_LIBFUNCS #define TARGET_INIT_LIBFUNCS ia64_hpux_init_libfuncs diff --git a/gcc/config/linux-android.c b/gcc/config/linux-android.c index d6e47a70e7a..4a4b48d9882 100644 --- a/gcc/config/linux-android.c +++ b/gcc/config/linux-android.c @@ -31,3 +31,17 @@ linux_android_has_ifunc_p (void) { return TARGET_ANDROID ? false : HAVE_GNU_INDIRECT_FUNCTION; } + +bool +linux_android_libc_has_function (enum function_class fn_class) +{ + if (OPTION_GLIBC) + return true; + if (OPTION_BIONIC) + if (fn_class == function_c94 + || fn_class == function_c99_misc + || fn_class == function_sincos) + return true; + + return false; +} diff --git a/gcc/config/linux-protos.h b/gcc/config/linux-protos.h index 3f926e5dffd..d1f0f926367 100644 --- a/gcc/config/linux-protos.h +++ b/gcc/config/linux-protos.h @@ -19,3 +19,5 @@ along with GCC; see the file COPYING3. If not see . */ extern bool linux_android_has_ifunc_p (void); + +extern bool linux_android_libc_has_function (enum function_class fn_class); diff --git a/gcc/config/linux.h b/gcc/config/linux.h index 2be1079b92f..8116e698d94 100644 --- a/gcc/config/linux.h +++ b/gcc/config/linux.h @@ -95,15 +95,11 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see CHOOSE_DYNAMIC_LINKER (GLIBC_DYNAMIC_LINKERX32, UCLIBC_DYNAMIC_LINKERX32, \ BIONIC_DYNAMIC_LINKERX32) -/* Determine whether the entire c99 runtime - is present in the runtime library. */ -#undef TARGET_C99_FUNCTIONS -#define TARGET_C99_FUNCTIONS (OPTION_GLIBC) - -/* Whether we have sincos that follows the GNU extension. */ -#undef TARGET_HAS_SINCOS -#define TARGET_HAS_SINCOS (OPTION_GLIBC || OPTION_BIONIC) - /* Whether we have Bionic libc runtime */ #undef TARGET_HAS_BIONIC #define TARGET_HAS_BIONIC (OPTION_BIONIC) + +/* Determine what functions are present at the runtime; + this includes full c99 runtime and sincos. */ +#undef TARGET_LIBC_HAS_FUNCTION +#define TARGET_LIBC_HAS_FUNCTION linux_android_libc_has_function diff --git a/gcc/config/lm32/uclinux-elf.h b/gcc/config/lm32/uclinux-elf.h index 3a556d7258d..a5e8163cf6f 100644 --- a/gcc/config/lm32/uclinux-elf.h +++ b/gcc/config/lm32/uclinux-elf.h @@ -77,3 +77,5 @@ #undef CC1_SPEC #define CC1_SPEC "%{G*} %{!fno-PIC:-fPIC}" +#undef TARGET_LIBC_HAS_FUNCTION +#define TARGET_LIBC_HAS_FUNCTION no_c99_libc_has_function diff --git a/gcc/config/m68k/uclinux.h b/gcc/config/m68k/uclinux.h index 8d743126547..b1af7d2c585 100644 --- a/gcc/config/m68k/uclinux.h +++ b/gcc/config/m68k/uclinux.h @@ -67,3 +67,6 @@ along with GCC; see the file COPYING3. If not see sections. */ #undef M68K_OFFSETS_MUST_BE_WITHIN_SECTIONS_P #define M68K_OFFSETS_MUST_BE_WITHIN_SECTIONS_P 1 + +#undef TARGET_LIBC_HAS_FUNCTION +#define TARGET_LIBC_HAS_FUNCTION no_c99_libc_has_function diff --git a/gcc/config/microblaze/microblaze.h b/gcc/config/microblaze/microblaze.h index bc4d9a128d1..eb8e45ce17b 100644 --- a/gcc/config/microblaze/microblaze.h +++ b/gcc/config/microblaze/microblaze.h @@ -892,6 +892,10 @@ do { \ %{pg:-start-group -lxilprofile -lgloss -lxil -lc -lm -end-group } \ %{!pg:-start-group -lgloss -lxil -lc -lm -end-group }} " +/* microblaze-unknown-elf target has no support of C99 runtime */ +#undef TARGET_LIBC_HAS_FUNCTION +#define TARGET_LIBC_HAS_FUNCTION no_c99_libc_has_function + #undef ENDFILE_SPEC #define ENDFILE_SPEC "crtend.o%s crtn.o%s" diff --git a/gcc/config/mips/linux-common.h b/gcc/config/mips/linux-common.h index ca4ea0705a6..ebc67a28d90 100644 --- a/gcc/config/mips/linux-common.h +++ b/gcc/config/mips/linux-common.h @@ -44,7 +44,7 @@ along with GCC; see the file COPYING3. If not see #undef LIB_SPEC #define LIB_SPEC \ LINUX_OR_ANDROID_LD (GNU_USER_TARGET_LIB_SPEC, \ - GNU_USER_TARGET_LIB_SPEC " " ANDROID_LIB_SPEC) + GNU_USER_TARGET_NO_PTHREADS_LIB_SPEC " " ANDROID_LIB_SPEC) #undef STARTFILE_SPEC #define STARTFILE_SPEC \ diff --git a/gcc/config/mips/linux.h b/gcc/config/mips/linux.h index 9b4c68db6ee..6736295eb36 100644 --- a/gcc/config/mips/linux.h +++ b/gcc/config/mips/linux.h @@ -17,4 +17,9 @@ You should have received a copy of the GNU General Public License along with GCC; see the file COPYING3. If not see . */ -#define GLIBC_DYNAMIC_LINKER "/lib/ld.so.1" +#define GLIBC_DYNAMIC_LINKER \ + "%{mnan=2008:/lib/ld-linux-mipsn8.so.1;:/lib/ld.so.1}" + +#undef UCLIBC_DYNAMIC_LINKER +#define UCLIBC_DYNAMIC_LINKER \ + "%{mnan=2008:/lib/ld-uClibc-mipsn8.so.0;:/lib/ld-uClibc.so.0}" diff --git a/gcc/config/mips/linux64.h b/gcc/config/mips/linux64.h index dbba47a1a13..421a53a1085 100644 --- a/gcc/config/mips/linux64.h +++ b/gcc/config/mips/linux64.h @@ -22,10 +22,22 @@ along with GCC; see the file COPYING3. If not see #define GNU_USER_LINK_EMULATION64 "elf64%{EB:b}%{EL:l}tsmip" #define GNU_USER_LINK_EMULATIONN32 "elf32%{EB:b}%{EL:l}tsmipn32" -#define GLIBC_DYNAMIC_LINKER32 "/lib/ld.so.1" -#define GLIBC_DYNAMIC_LINKER64 "/lib64/ld.so.1" -#define GLIBC_DYNAMIC_LINKERN32 "/lib32/ld.so.1" -#define UCLIBC_DYNAMIC_LINKERN32 "/lib32/ld-uClibc.so.0" +#define GLIBC_DYNAMIC_LINKER32 \ + "%{mnan=2008:/lib/ld-linux-mipsn8.so.1;:/lib/ld.so.1}" +#define GLIBC_DYNAMIC_LINKER64 \ + "%{mnan=2008:/lib64/ld-linux-mipsn8.so.1;:/lib64/ld.so.1}" +#define GLIBC_DYNAMIC_LINKERN32 \ + "%{mnan=2008:/lib32/ld-linux-mipsn8.so.1;:/lib32/ld.so.1}" + +#undef UCLIBC_DYNAMIC_LINKER32 +#define UCLIBC_DYNAMIC_LINKER32 \ + "%{mnan=2008:/lib/ld-uClibc-mipsn8.so.0;:/lib/ld-uClibc.so.0}" +#undef UCLIBC_DYNAMIC_LINKER64 +#define UCLIBC_DYNAMIC_LINKER64 \ + "%{mnan=2008:/lib/ld64-uClibc-mipsn8.so.0;:/lib/ld64-uClibc.so.0}" +#define UCLIBC_DYNAMIC_LINKERN32 \ + "%{mnan=2008:/lib32/ld-uClibc-mipsn8.so.0;:/lib32/ld-uClibc.so.0}" + #define BIONIC_DYNAMIC_LINKERN32 "/system/bin/linker32" #define GNU_USER_DYNAMIC_LINKERN32 \ CHOOSE_DYNAMIC_LINKER (GLIBC_DYNAMIC_LINKERN32, UCLIBC_DYNAMIC_LINKERN32, \ diff --git a/gcc/config/mips/mips-modes.def b/gcc/config/mips/mips-modes.def index ecb7f181d8f..383d2cb6d43 100644 --- a/gcc/config/mips/mips-modes.def +++ b/gcc/config/mips/mips-modes.def @@ -17,12 +17,7 @@ You should have received a copy of the GNU General Public License along with GCC; see the file COPYING3. If not see . */ -/* MIPS has a quirky almost-IEEE format for all its - floating point. */ -RESET_FLOAT_FORMAT (SF, mips_single_format); -RESET_FLOAT_FORMAT (DF, mips_double_format); - -FLOAT_MODE (TF, 16, mips_quad_format); +FLOAT_MODE (TF, 16, ieee_quad_format); /* Vector modes. */ VECTOR_MODES (INT, 4); /* V4QI V2HI */ diff --git a/gcc/config/mips/mips-opts.h b/gcc/config/mips/mips-opts.h index dbfcfad0b04..56249d94c4e 100644 --- a/gcc/config/mips/mips-opts.h +++ b/gcc/config/mips/mips-opts.h @@ -27,6 +27,13 @@ enum mips_code_readable_setting { CODE_READABLE_YES }; +/* Enumerates the setting of the -mabs and -mnan options. */ +enum mips_ieee_754_setting { + MIPS_IEEE_754_DEFAULT, + MIPS_IEEE_754_LEGACY, + MIPS_IEEE_754_2008 +}; + /* Enumerates the setting of the -mr10k-cache-barrier option. */ enum mips_r10k_cache_barrier_setting { R10K_CACHE_BARRIER_NONE, diff --git a/gcc/config/mips/mips.c b/gcc/config/mips/mips.c index a56757c877f..5993aabe578 100644 --- a/gcc/config/mips/mips.c +++ b/gcc/config/mips/mips.c @@ -56,6 +56,7 @@ along with GCC; see the file COPYING3. If not see #include "target-globals.h" #include "opts.h" #include "tree-pass.h" +#include "context.h" /* True if X is an UNSPEC wrapper around a SYMBOL_REF or LABEL_REF. */ #define UNSPEC_ADDRESS_P(X) \ @@ -8843,6 +8844,11 @@ mips_file_start (void) fprintf (asm_out_file, "\t.section .gcc_compiled_long%d\n" "\t.previous\n", TARGET_LONG64 ? 64 : 32); + /* Record the NaN encoding. */ + if (HAVE_AS_NAN || mips_nan != MIPS_IEEE_754_DEFAULT) + fprintf (asm_out_file, "\t.nan\t%s\n", + mips_nan == MIPS_IEEE_754_2008 ? "2008" : "legacy"); + #ifdef HAVE_AS_GNU_ATTRIBUTE { int attr; @@ -12291,6 +12297,7 @@ mips_adjust_insn_length (rtx insn, int length) /* mips.md uses MAX_PIC_BRANCH_LENGTH as a placeholder for the length of a PIC long-branch sequence. Substitute the correct value. */ if (length == MAX_PIC_BRANCH_LENGTH + && JUMP_P (insn) && INSN_CODE (insn) >= 0 && get_attr_type (insn) == TYPE_BRANCH) { @@ -12312,7 +12319,9 @@ mips_adjust_insn_length (rtx insn, int length) length += TARGET_MIPS16 ? 2 : 4; /* See how many nops might be needed to avoid hardware hazards. */ - if (!cfun->machine->ignore_hazard_length_p && INSN_CODE (insn) >= 0) + if (!cfun->machine->ignore_hazard_length_p + && INSN_P (insn) + && INSN_CODE (insn) >= 0) switch (get_attr_hazard (insn)) { case HAZARD_NONE: @@ -16332,33 +16341,43 @@ mips_machine_reorg2 (void) return 0; } -struct rtl_opt_pass pass_mips_machine_reorg2 = -{ - { - RTL_PASS, - "mach2", /* name */ - OPTGROUP_NONE, /* optinfo_flags */ - NULL, /* gate */ - mips_machine_reorg2, /* execute */ - NULL, /* sub */ - NULL, /* next */ - 0, /* static_pass_number */ - TV_MACH_DEP, /* tv_id */ - 0, /* properties_required */ - 0, /* properties_provided */ - 0, /* properties_destroyed */ - 0, /* todo_flags_start */ - TODO_verify_rtl_sharing, /* todo_flags_finish */ - } -}; +namespace { -struct register_pass_info insert_pass_mips_machine_reorg2 = +const pass_data pass_data_mips_machine_reorg2 = { - &pass_mips_machine_reorg2.pass, /* pass */ - "dbr", /* reference_pass_name */ - 1, /* ref_pass_instance_number */ - PASS_POS_INSERT_AFTER /* po_op */ + RTL_PASS, /* type */ + "mach2", /* name */ + OPTGROUP_NONE, /* optinfo_flags */ + false, /* has_gate */ + true, /* has_execute */ + TV_MACH_DEP, /* tv_id */ + 0, /* properties_required */ + 0, /* properties_provided */ + 0, /* properties_destroyed */ + 0, /* todo_flags_start */ + TODO_verify_rtl_sharing, /* todo_flags_finish */ }; + +class pass_mips_machine_reorg2 : public rtl_opt_pass +{ +public: + pass_mips_machine_reorg2(gcc::context *ctxt) + : rtl_opt_pass(pass_data_mips_machine_reorg2, ctxt) + {} + + /* opt_pass methods: */ + unsigned int execute () { return mips_machine_reorg2 (); } + +}; // class pass_mips_machine_reorg2 + +} // anon namespace + +rtl_opt_pass * +make_pass_mips_machine_reorg2 (gcc::context *ctxt) +{ + return new pass_mips_machine_reorg2 (ctxt); +} + /* Implement TARGET_ASM_OUTPUT_MI_THUNK. Generate rtl rather than asm text in order to avoid duplicating too much logic from elsewhere. */ @@ -16980,6 +16999,15 @@ mips_option_override (void) } } + /* Pre-IEEE 754-2008 MIPS hardware has a quirky almost-IEEE format + for all its floating point. */ + if (mips_nan != MIPS_IEEE_754_2008) + { + REAL_MODE_FORMAT (SFmode) = &mips_single_format; + REAL_MODE_FORMAT (DFmode) = &mips_double_format; + REAL_MODE_FORMAT (TFmode) = &mips_quad_format; + } + /* Make sure that the user didn't turn off paired single support when MIPS-3D support is requested. */ if (TARGET_MIPS3D @@ -17143,6 +17171,14 @@ mips_option_override (void) /* We register a second machine specific reorg pass after delay slot filling. Registering the pass must be done at start up. It's convenient to do it here. */ + opt_pass *new_pass = make_pass_mips_machine_reorg2 (g); + struct register_pass_info insert_pass_mips_machine_reorg2 = + { + new_pass, /* pass */ + "dbr", /* reference_pass_name */ + 1, /* ref_pass_instance_number */ + PASS_POS_INSERT_AFTER /* po_op */ + }; register_pass (&insert_pass_mips_machine_reorg2); if (TARGET_HARD_FLOAT_ABI && TARGET_MIPS5900) diff --git a/gcc/config/mips/mips.h b/gcc/config/mips/mips.h index 2dcccd48b8f..af7eeee6682 100644 --- a/gcc/config/mips/mips.h +++ b/gcc/config/mips/mips.h @@ -507,6 +507,12 @@ struct mips_cpu_info { if (TARGET_PAIRED_SINGLE_FLOAT) \ builtin_define ("__mips_paired_single_float"); \ \ + if (mips_abs == MIPS_IEEE_754_2008) \ + builtin_define ("__mips_abs2008"); \ + \ + if (mips_nan == MIPS_IEEE_754_2008) \ + builtin_define ("__mips_nan2008"); \ + \ if (TARGET_BIG_ENDIAN) \ { \ builtin_define_std ("MIPSEB"); \ @@ -743,6 +749,7 @@ struct mips_cpu_info { --with-abi is ignored if -mabi is specified. --with-float is ignored if -mhard-float or -msoft-float are specified. + --with-nan is ignored if -mnan is specified. --with-divide is ignored if -mdivide-traps or -mdivide-breaks are specified. */ #define OPTION_DEFAULT_SPECS \ @@ -755,6 +762,7 @@ struct mips_cpu_info { {"abi", "%{!mabi=*:-mabi=%(VALUE)}" }, \ {"float", "%{!msoft-float:%{!mhard-float:-m%(VALUE)-float}}" }, \ {"fpu", "%{!msingle-float:%{!mdouble-float:-m%(VALUE)-float}}" }, \ + {"nan", "%{!mnan=*:-mnan=%(VALUE)}" }, \ {"divide", "%{!mdivide-traps:%{!mdivide-breaks:-mdivide-%(VALUE)}}" }, \ {"llsc", "%{!mllsc:%{!mno-llsc:-m%(VALUE)}}" }, \ {"mips-plt", "%{!mplt:%{!mno-plt:-m%(VALUE)}}" }, \ @@ -1160,7 +1168,7 @@ struct mips_cpu_info { %(subtarget_asm_debugging_spec) \ %{mabi=*} %{!mabi=*: %(asm_abi_default_spec)} \ %{mgp32} %{mgp64} %{march=*} %{mxgot:-xgot} \ -%{mfp32} %{mfp64} \ +%{mfp32} %{mfp64} %{mnan=*} \ %{mshared} %{mno-shared} \ %{msym32} %{mno-sym32} \ %{mtune=*} \ @@ -2897,6 +2905,10 @@ while (0) #define HAVE_AS_TLS 0 #endif +#ifndef HAVE_AS_NAN +#define HAVE_AS_NAN 0 +#endif + #ifndef USED_FOR_TARGET /* Information about ".set noFOO; ...; .set FOO" blocks. */ struct mips_asm_switch { diff --git a/gcc/config/mips/mips.md b/gcc/config/mips/mips.md index ca79a31e29a..0cda169224f 100644 --- a/gcc/config/mips/mips.md +++ b/gcc/config/mips/mips.md @@ -2711,14 +2711,15 @@ ;; Do not use the integer abs macro instruction, since that signals an ;; exception on -2147483648 (sigh). -;; abs.fmt is an arithmetic instruction and treats all NaN inputs as -;; invalid; it does not clear their sign bits. We therefore can't use -;; abs.fmt if the signs of NaNs matter. +;; The "legacy" (as opposed to "2008") form of ABS.fmt is an arithmetic +;; instruction that treats all NaN inputs as invalid; it does not clear +;; their sign bit. We therefore can't use that form if the signs of +;; NaNs matter. (define_insn "abs2" [(set (match_operand:ANYF 0 "register_operand" "=f") (abs:ANYF (match_operand:ANYF 1 "register_operand" "f")))] - "!HONOR_NANS (mode)" + "mips_abs == MIPS_IEEE_754_2008 || !HONOR_NANS (mode)" "abs.\t%0,%1" [(set_attr "type" "fabs") (set_attr "mode" "")]) @@ -2793,14 +2794,15 @@ [(set_attr "alu_type" "sub") (set_attr "mode" "DI")]) -;; neg.fmt is an arithmetic instruction and treats all NaN inputs as -;; invalid; it does not flip their sign bit. We therefore can't use -;; neg.fmt if the signs of NaNs matter. +;; The "legacy" (as opposed to "2008") form of NEG.fmt is an arithmetic +;; instruction that treats all NaN inputs as invalid; it does not flip +;; their sign bit. We therefore can't use that form if the signs of +;; NaNs matter. (define_insn "neg2" [(set (match_operand:ANYF 0 "register_operand" "=f") (neg:ANYF (match_operand:ANYF 1 "register_operand" "f")))] - "!HONOR_NANS (mode)" + "mips_abs == MIPS_IEEE_754_2008 || !HONOR_NANS (mode)" "neg.\t%0,%1" [(set_attr "type" "fneg") (set_attr "mode" "")]) @@ -6671,8 +6673,13 @@ "ISA_HAS_PREFETCH && TARGET_EXPLICIT_RELOCS" { if (TARGET_LOONGSON_2EF || TARGET_LOONGSON_3A) - /* Loongson 2[ef] and Loongson 3a use load to $0 to perform prefetching. */ - return "ld\t$0,%a0"; + { + /* Loongson 2[ef] and Loongson 3a use load to $0 for prefetching. */ + if (TARGET_64BIT) + return "ld\t$0,%a0"; + else + return "lw\t$0,%a0"; + } operands[1] = mips_prefetch_cookie (operands[1], operands[2]); return "pref\t%1,%a0"; } diff --git a/gcc/config/mips/mips.opt b/gcc/config/mips/mips.opt index 08ab29b1810..0324041dbea 100644 --- a/gcc/config/mips/mips.opt +++ b/gcc/config/mips/mips.opt @@ -205,6 +205,24 @@ mfused-madd Target Report Mask(FUSED_MADD) Generate floating-point multiply-add instructions +mabs= +Target RejectNegative Joined Enum(mips_ieee_754_value) Var(mips_abs) Init(MIPS_IEEE_754_DEFAULT) +-mabs=MODE Select the IEEE 754 ABS/NEG instruction execution mode + +mnan= +Target RejectNegative Joined Enum(mips_ieee_754_value) Var(mips_nan) Init(MIPS_IEEE_754_DEFAULT) +-mnan=ENCODING Select the IEEE 754 NaN data encoding + +Enum +Name(mips_ieee_754_value) Type(int) +Known MIPS IEEE 754 settings (for use with the -mabs= and -mnan= options): + +EnumValue +Enum(mips_ieee_754_value) String(2008) Value(MIPS_IEEE_754_2008) + +EnumValue +Enum(mips_ieee_754_value) String(legacy) Value(MIPS_IEEE_754_LEGACY) + mgp32 Target Report RejectNegative InverseMask(64BIT) Use 32-bit general registers diff --git a/gcc/config/mips/mti-linux.h b/gcc/config/mips/mti-linux.h index 45bc0b88107..96dcac4dfb3 100644 --- a/gcc/config/mips/mti-linux.h +++ b/gcc/config/mips/mti-linux.h @@ -20,7 +20,7 @@ along with GCC; see the file COPYING3. If not see /* This target is a multilib target, specify the sysroot paths. */ #undef SYSROOT_SUFFIX_SPEC #define SYSROOT_SUFFIX_SPEC \ - "%{mips32:/mips32}%{mips64:/mips64}%{mips64r2:/mips64r2}%{mips16:/mips16}%{mmicromips:/micromips}%{mabi=64:/64}%{mel|EL:/el}%{msoft-float:/sof}" + "%{mips32:/mips32}%{mips64:/mips64}%{mips64r2:/mips64r2}%{mips16:/mips16}%{mmicromips:/micromips}%{mabi=64:/64}%{mel|EL:/el}%{msoft-float:/sof}%{mnan=2008:/nan2008}" #undef DRIVER_SELF_SPECS #define DRIVER_SELF_SPECS \ diff --git a/gcc/config/mips/t-mti-elf b/gcc/config/mips/t-mti-elf index bce8f063452..4aec70cb807 100644 --- a/gcc/config/mips/t-mti-elf +++ b/gcc/config/mips/t-mti-elf @@ -19,8 +19,8 @@ # The default build is mips32r2, hard-float big-endian. Add mips32, # soft-float, and little-endian variations. -MULTILIB_OPTIONS = mips32/mips64/mips64r2 mips16 mmicromips mabi=64 EL msoft-float -MULTILIB_DIRNAMES = mips32 mips64 mips64r2 mips16 micromips 64 el sof +MULTILIB_OPTIONS = mips32/mips64/mips64r2 mips16/mmicromips mabi=64 EL msoft-float/mnan=2008 +MULTILIB_DIRNAMES = mips32 mips64 mips64r2 mips16 micromips 64 el sof nan2008 MULTILIB_MATCHES = EL=mel EB=meb # The 64 bit ABI is not supported on the mips32 architecture. @@ -36,9 +36,7 @@ MULTILIB_EXCEPTIONS += mabi=64* MULTILIB_EXCEPTIONS += *mips64*/*mips16* MULTILIB_EXCEPTIONS += *mips16/mabi=64* -# We only want micromips for mips32r2 architecture and we do not want -# it used in conjunction with -mips16. -MULTILIB_EXCEPTIONS += *mips16/mmicromips* +# We only want micromips for mips32r2 architecture. MULTILIB_EXCEPTIONS += *mips64*/mmicromips* MULTILIB_EXCEPTIONS += *mips32/mmicromips* MULTILIB_EXCEPTIONS += *mmicromips/mabi=64* diff --git a/gcc/config/mips/t-mti-linux b/gcc/config/mips/t-mti-linux index bce8f063452..4aec70cb807 100644 --- a/gcc/config/mips/t-mti-linux +++ b/gcc/config/mips/t-mti-linux @@ -19,8 +19,8 @@ # The default build is mips32r2, hard-float big-endian. Add mips32, # soft-float, and little-endian variations. -MULTILIB_OPTIONS = mips32/mips64/mips64r2 mips16 mmicromips mabi=64 EL msoft-float -MULTILIB_DIRNAMES = mips32 mips64 mips64r2 mips16 micromips 64 el sof +MULTILIB_OPTIONS = mips32/mips64/mips64r2 mips16/mmicromips mabi=64 EL msoft-float/mnan=2008 +MULTILIB_DIRNAMES = mips32 mips64 mips64r2 mips16 micromips 64 el sof nan2008 MULTILIB_MATCHES = EL=mel EB=meb # The 64 bit ABI is not supported on the mips32 architecture. @@ -36,9 +36,7 @@ MULTILIB_EXCEPTIONS += mabi=64* MULTILIB_EXCEPTIONS += *mips64*/*mips16* MULTILIB_EXCEPTIONS += *mips16/mabi=64* -# We only want micromips for mips32r2 architecture and we do not want -# it used in conjunction with -mips16. -MULTILIB_EXCEPTIONS += *mips16/mmicromips* +# We only want micromips for mips32r2 architecture. MULTILIB_EXCEPTIONS += *mips64*/mmicromips* MULTILIB_EXCEPTIONS += *mips32/mmicromips* MULTILIB_EXCEPTIONS += *mmicromips/mabi=64* diff --git a/gcc/config/mmix/mmix.h b/gcc/config/mmix/mmix.h index c5edc5777a9..2d5e1a8a392 100644 --- a/gcc/config/mmix/mmix.h +++ b/gcc/config/mmix/mmix.h @@ -813,6 +813,10 @@ typedef struct { int regs; int lib; } CUMULATIVE_ARGS; #define NO_IMPLICIT_EXTERN_C +/* mmix-knuth-mmixware target has no support of C99 runtime */ +#undef TARGET_LIBC_HAS_FUNCTION +#define TARGET_LIBC_HAS_FUNCTION no_c99_libc_has_function + /* These are checked. */ #define DOLLARS_IN_IDENTIFIERS 0 #define NO_DOLLAR_IN_LABEL diff --git a/gcc/config/moxie/uclinux.h b/gcc/config/moxie/uclinux.h index 498037e8072..85c65f257ce 100644 --- a/gcc/config/moxie/uclinux.h +++ b/gcc/config/moxie/uclinux.h @@ -37,3 +37,6 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see --wrap=mmap --wrap=munmap --wrap=alloca\ %{fmudflapth: --wrap=pthread_create\ }} %{fmudflap|fmudflapth: --wrap=main}" + +#undef TARGET_LIBC_HAS_FUNCTION +#define TARGET_LIBC_HAS_FUNCTION no_c99_libc_has_function diff --git a/gcc/config/netbsd.h b/gcc/config/netbsd.h index 71c9183be0d..dd50dcc0ec4 100644 --- a/gcc/config/netbsd.h +++ b/gcc/config/netbsd.h @@ -139,6 +139,9 @@ along with GCC; see the file COPYING3. If not see #undef LIBGCC_SPEC #define LIBGCC_SPEC NETBSD_LIBGCC_SPEC +#undef TARGET_LIBC_HAS_FUNCTION +#define TARGET_LIBC_HAS_FUNCTION no_c99_libc_has_function + /* When building shared libraries, the initialization and finalization functions for the library are .init and .fini respectively. */ diff --git a/gcc/config/openbsd.h b/gcc/config/openbsd.h index 6537451f5f5..0d118b46328 100644 --- a/gcc/config/openbsd.h +++ b/gcc/config/openbsd.h @@ -145,8 +145,10 @@ while (0) #define TARGET_POSIX_IO -/* All new versions of OpenBSD have C99 functions. */ -#define TARGET_C99_FUNCTIONS 1 +/* All new versions of OpenBSD have C99 functions. We redefine this hook + so the version from elfos.h header won't be used. */ +#undef TARGET_LIBC_HAS_FUNCTION +#define TARGET_LIBC_HAS_FUNCTION default_libc_has_function /* Runtime target specification. */ diff --git a/gcc/config/pa/pa-hpux.h b/gcc/config/pa/pa-hpux.h index c384824fbf6..9685bb25a57 100644 --- a/gcc/config/pa/pa-hpux.h +++ b/gcc/config/pa/pa-hpux.h @@ -114,3 +114,6 @@ along with GCC; see the file COPYING3. If not see compatibility with the HP-UX unwind library. */ #undef TARGET_HPUX_UNWIND_LIBRARY #define TARGET_HPUX_UNWIND_LIBRARY 1 + +#undef TARGET_LIBC_HAS_FUNCTION +#define TARGET_LIBC_HAS_FUNCTION no_c99_libc_has_function diff --git a/gcc/config/pa/pa.md b/gcc/config/pa/pa.md index be07d2a229a..80c4d43401d 100644 --- a/gcc/config/pa/pa.md +++ b/gcc/config/pa/pa.md @@ -833,7 +833,7 @@ (define_insn "scc" [(set (match_operand:SI 0 "register_operand" "=r") (match_operator:SI 3 "comparison_operator" - [(match_operand:SI 1 "register_operand" "r") + [(match_operand:SI 1 "reg_or_0_operand" "rM") (match_operand:SI 2 "arith11_operand" "rI")]))] "" "{com%I2clr|cmp%I2clr},%B3 %2,%1,%0\;ldi 1,%0" @@ -843,7 +843,7 @@ (define_insn "" [(set (match_operand:DI 0 "register_operand" "=r") (match_operator:DI 3 "comparison_operator" - [(match_operand:DI 1 "register_operand" "r") + [(match_operand:DI 1 "reg_or_0_operand" "rM") (match_operand:DI 2 "arith11_operand" "rI")]))] "TARGET_64BIT" "cmp%I2clr,*%B3 %2,%1,%0\;ldi 1,%0" @@ -853,10 +853,10 @@ (define_insn "iorscc" [(set (match_operand:SI 0 "register_operand" "=r") (ior:SI (match_operator:SI 3 "comparison_operator" - [(match_operand:SI 1 "register_operand" "r") + [(match_operand:SI 1 "reg_or_0_operand" "rM") (match_operand:SI 2 "arith11_operand" "rI")]) (match_operator:SI 6 "comparison_operator" - [(match_operand:SI 4 "register_operand" "r") + [(match_operand:SI 4 "reg_or_0_operand" "rM") (match_operand:SI 5 "arith11_operand" "rI")])))] "" "{com%I2clr|cmp%I2clr},%S3 %2,%1,%%r0\;{com%I5clr|cmp%I5clr},%B6 %5,%4,%0\;ldi 1,%0" @@ -866,10 +866,10 @@ (define_insn "" [(set (match_operand:DI 0 "register_operand" "=r") (ior:DI (match_operator:DI 3 "comparison_operator" - [(match_operand:DI 1 "register_operand" "r") + [(match_operand:DI 1 "reg_or_0_operand" "rM") (match_operand:DI 2 "arith11_operand" "rI")]) (match_operator:DI 6 "comparison_operator" - [(match_operand:DI 4 "register_operand" "r") + [(match_operand:DI 4 "reg_or_0_operand" "rM") (match_operand:DI 5 "arith11_operand" "rI")])))] "TARGET_64BIT" "cmp%I2clr,*%S3 %2,%1,%%r0\;cmp%I5clr,*%B6 %5,%4,%0\;ldi 1,%0" @@ -881,7 +881,7 @@ (define_insn "negscc" [(set (match_operand:SI 0 "register_operand" "=r") (neg:SI (match_operator:SI 3 "comparison_operator" - [(match_operand:SI 1 "register_operand" "r") + [(match_operand:SI 1 "reg_or_0_operand" "rM") (match_operand:SI 2 "arith11_operand" "rI")])))] "" "{com%I2clr|cmp%I2clr},%B3 %2,%1,%0\;ldi -1,%0" @@ -891,7 +891,7 @@ (define_insn "" [(set (match_operand:DI 0 "register_operand" "=r") (neg:DI (match_operator:DI 3 "comparison_operator" - [(match_operand:DI 1 "register_operand" "r") + [(match_operand:DI 1 "reg_or_0_operand" "rM") (match_operand:DI 2 "arith11_operand" "rI")])))] "TARGET_64BIT" "cmp%I2clr,*%B3 %2,%1,%0\;ldi -1,%0" diff --git a/gcc/config/pdp11/pdp11.h b/gcc/config/pdp11/pdp11.h index d61db4c3bd0..d4bc19a00f1 100644 --- a/gcc/config/pdp11/pdp11.h +++ b/gcc/config/pdp11/pdp11.h @@ -666,3 +666,7 @@ extern rtx cc0_reg_rtx; #define COMPARE_FLAG_MODE HImode #define TARGET_HAVE_NAMED_SECTIONS false + +/* pdp11-unknown-aout target has no support of C99 runtime */ +#undef TARGET_LIBC_HAS_FUNCTION +#define TARGET_LIBC_HAS_FUNCTION no_c99_libc_has_function diff --git a/gcc/config/picochip/picochip.h b/gcc/config/picochip/picochip.h index d43ec20e440..13414c6cc9c 100644 --- a/gcc/config/picochip/picochip.h +++ b/gcc/config/picochip/picochip.h @@ -656,4 +656,8 @@ enum picochip_builtins not detecting this. */ #define HAVE_AS_LEB128 1 +/* picochip-unknown-none target has no support of C99 runtime */ +#undef TARGET_LIBC_HAS_FUNCTION +#define TARGET_LIBC_HAS_FUNCTION no_c99_libc_has_function + /* The End */ diff --git a/gcc/config/rl78/rl78.c b/gcc/config/rl78/rl78.c index c2ed7389bc4..d7cacc16352 100644 --- a/gcc/config/rl78/rl78.c +++ b/gcc/config/rl78/rl78.c @@ -49,6 +49,7 @@ #include "rl78-protos.h" #include "dumpfile.h" #include "tree-pass.h" +#include "context.h" static inline bool is_interrupt_func (const_tree decl); static inline bool is_brk_interrupt_func (const_tree decl); @@ -129,30 +130,45 @@ devirt_pass (void) /* This pass converts virtual instructions using virtual registers, to real instructions using real registers. Rather than run it as reorg, we reschedule it before vartrack to help with debugging. */ -static struct opt_pass rl78_devirt_pass = -{ - RTL_PASS, - "devirt", - OPTGROUP_NONE, /* optinfo_flags */ - devirt_gate, - devirt_pass, - NULL, - NULL, - 212, - TV_MACH_DEP, - 0, 0, 0, - 0, - 0 +namespace { + +const pass_data pass_data_rl78_devirt = +{ + RTL_PASS, /* type */ + "devirt", /* name */ + OPTGROUP_NONE, /* optinfo_flags */ + true, /* has_gate */ + true, /* has_execute */ + TV_MACH_DEP, /* tv_id */ + 0, /* properties_required */ + 0, /* properties_provided */ + 0, /* properties_destroyed */ + 0, /* todo_flags_start */ + 0, /* todo_flags_finish */ }; -static struct register_pass_info rl78_devirt_info = +class pass_rl78_devirt : public rtl_opt_pass { - & rl78_devirt_pass, - "vartrack", - 1, - PASS_POS_INSERT_BEFORE +public: + pass_rl78_devirt(gcc::context *ctxt) + : rtl_opt_pass(pass_data_rl78_devirt, ctxt) + { + } + + /* opt_pass methods: */ + bool gate () { return devirt_gate (); } + unsigned int execute () { return devirt_pass (); } }; +} // anon namespace + +rtl_opt_pass * +make_pass_rl78_devirt (gcc::context *ctxt) +{ + return new pass_rl78_devirt (ctxt); +} + + #undef TARGET_ASM_FILE_START #define TARGET_ASM_FILE_START rl78_asm_file_start @@ -167,6 +183,15 @@ rl78_asm_file_start (void) fprintf (asm_out_file, "r%d\t=\t0x%x\n", 16 + i, 0xffee8 + i); } + opt_pass *rl78_devirt_pass = make_pass_rl78_devirt (g); + struct register_pass_info rl78_devirt_info = + { + rl78_devirt_pass, + "vartrack", + 1, + PASS_POS_INSERT_BEFORE + }; + register_pass (& rl78_devirt_info); } diff --git a/gcc/config/rs6000/aix43.h b/gcc/config/rs6000/aix43.h index 70db7f7482f..b27c046021a 100644 --- a/gcc/config/rs6000/aix43.h +++ b/gcc/config/rs6000/aix43.h @@ -159,3 +159,6 @@ do { \ #define TARGET_USES_AIX64_OPT 1 #define TARGET_AIX_VERSION 43 + +#undef TARGET_LIBC_HAS_FUNCTION +#define TARGET_LIBC_HAS_FUNCTION no_c99_libc_has_function diff --git a/gcc/config/rs6000/aix51.h b/gcc/config/rs6000/aix51.h index 669dbbe03f3..3837bfdc0bb 100644 --- a/gcc/config/rs6000/aix51.h +++ b/gcc/config/rs6000/aix51.h @@ -163,3 +163,6 @@ do { \ #define TARGET_USE_JCR_SECTION 0 #define TARGET_AIX_VERSION 51 + +#undef TARGET_LIBC_HAS_FUNCTION +#define TARGET_LIBC_HAS_FUNCTION no_c99_libc_has_function diff --git a/gcc/config/rs6000/aix52.h b/gcc/config/rs6000/aix52.h index c57271a5a58..51954718b2e 100644 --- a/gcc/config/rs6000/aix52.h +++ b/gcc/config/rs6000/aix52.h @@ -166,10 +166,6 @@ do { \ #undef LD_INIT_SWITCH #define LD_INIT_SWITCH "-binitfini" -/* AIX 5.2 has the float and long double forms of math functions. */ -#undef TARGET_C99_FUNCTIONS -#define TARGET_C99_FUNCTIONS 1 - #ifndef _AIX52 extern long long int atoll(const char *); #endif diff --git a/gcc/config/rs6000/aix53.h b/gcc/config/rs6000/aix53.h index b1b0759e7ff..b3bd73a6988 100644 --- a/gcc/config/rs6000/aix53.h +++ b/gcc/config/rs6000/aix53.h @@ -166,10 +166,6 @@ do { \ #undef LD_INIT_SWITCH #define LD_INIT_SWITCH "-binitfini" -/* AIX 5.2 has the float and long double forms of math functions. */ -#undef TARGET_C99_FUNCTIONS -#define TARGET_C99_FUNCTIONS 1 - #ifndef _AIX52 extern long long int atoll(const char *); #endif diff --git a/gcc/config/rs6000/aix61.h b/gcc/config/rs6000/aix61.h index cd341b97eea..b0778143773 100644 --- a/gcc/config/rs6000/aix61.h +++ b/gcc/config/rs6000/aix61.h @@ -190,10 +190,6 @@ do { \ #undef LD_INIT_SWITCH #define LD_INIT_SWITCH "-binitfini" -/* AIX 5.2 has the float and long double forms of math functions. */ -#undef TARGET_C99_FUNCTIONS -#define TARGET_C99_FUNCTIONS 1 - #ifndef _AIX52 extern long long int atoll(const char *); #endif diff --git a/gcc/config/rs6000/darwin.h b/gcc/config/rs6000/darwin.h index 0cf2f4c346d..d5919c4c71d 100644 --- a/gcc/config/rs6000/darwin.h +++ b/gcc/config/rs6000/darwin.h @@ -386,10 +386,8 @@ extern int darwin_emit_branch_islands; #define OFFS_ASSIGNIVAR_FAST 0xFFFEFEC0 /* Old versions of Mac OS/Darwin don't have C99 functions available. */ -#undef TARGET_C99_FUNCTIONS -#define TARGET_C99_FUNCTIONS \ - (TARGET_64BIT \ - || strverscmp (darwin_macosx_version_min, "10.3") >= 0) +#undef TARGET_LIBC_HAS_FUNCTION +#define TARGET_LIBC_HAS_FUNCTION darwin_libc_has_function /* When generating kernel code or kexts, we don't use Altivec by default, as kernel code doesn't save/restore those registers. */ diff --git a/gcc/config/rs6000/dfp.md b/gcc/config/rs6000/dfp.md index 052ac482e0f..9a846239b04 100644 --- a/gcc/config/rs6000/dfp.md +++ b/gcc/config/rs6000/dfp.md @@ -132,11 +132,14 @@ "") (define_insn "*negtd2_fpr" - [(set (match_operand:TD 0 "gpc_reg_operand" "=d") - (neg:TD (match_operand:TD 1 "gpc_reg_operand" "d")))] + [(set (match_operand:TD 0 "gpc_reg_operand" "=d,d") + (neg:TD (match_operand:TD 1 "gpc_reg_operand" "0,d")))] "TARGET_HARD_FLOAT && TARGET_FPRS" - "fneg %0,%1" - [(set_attr "type" "fp")]) + "@ + fneg %0,%1 + fneg %0,%1\;fmr %L0,%L1" + [(set_attr "type" "fp") + (set_attr "length" "4,8")]) (define_expand "abstd2" [(set (match_operand:TD 0 "gpc_reg_operand" "") @@ -145,18 +148,24 @@ "") (define_insn "*abstd2_fpr" - [(set (match_operand:TD 0 "gpc_reg_operand" "=d") - (abs:TD (match_operand:TD 1 "gpc_reg_operand" "d")))] + [(set (match_operand:TD 0 "gpc_reg_operand" "=d,d") + (abs:TD (match_operand:TD 1 "gpc_reg_operand" "0,d")))] "TARGET_HARD_FLOAT && TARGET_FPRS" - "fabs %0,%1" - [(set_attr "type" "fp")]) + "@ + fabs %0,%1 + fabs %0,%1\;fmr %L0,%L1" + [(set_attr "type" "fp") + (set_attr "length" "4,8")]) (define_insn "*nabstd2_fpr" - [(set (match_operand:TD 0 "gpc_reg_operand" "=d") - (neg:TD (abs:TD (match_operand:TD 1 "gpc_reg_operand" "d"))))] + [(set (match_operand:TD 0 "gpc_reg_operand" "=d,d") + (neg:TD (abs:TD (match_operand:TD 1 "gpc_reg_operand" "0,d"))))] "TARGET_HARD_FLOAT && TARGET_FPRS" - "fnabs %0,%1" - [(set_attr "type" "fp")]) + "@ + fnabs %0,%1 + fnabs %0,%1\;fmr %L0,%L1" + [(set_attr "type" "fp") + (set_attr "length" "4,8")]) ;; Hardware support for decimal floating point operations. diff --git a/gcc/config/rs6000/linux.h b/gcc/config/rs6000/linux.h index f7f2d80c4f2..2e5a56b3929 100644 --- a/gcc/config/rs6000/linux.h +++ b/gcc/config/rs6000/linux.h @@ -28,17 +28,18 @@ #ifdef SINGLE_LIBC #define OPTION_GLIBC (DEFAULT_LIBC == LIBC_GLIBC) +#define OPTION_UCLIBC (DEFAULT_LIBC == LIBC_UCLIBC) +#define OPTION_BIONIC (DEFAULT_LIBC == LIBC_BIONIC) #else #define OPTION_GLIBC (linux_libc == LIBC_GLIBC) +#define OPTION_UCLIBC (linux_libc == LIBC_UCLIBC) +#define OPTION_BIONIC (linux_libc == LIBC_BIONIC) #endif -/* glibc has float and long double forms of math functions. */ -#undef TARGET_C99_FUNCTIONS -#define TARGET_C99_FUNCTIONS (OPTION_GLIBC) - -/* Whether we have sincos that follows the GNU extension. */ -#undef TARGET_HAS_SINCOS -#define TARGET_HAS_SINCOS (OPTION_GLIBC) +/* Determine what functions are present at the runtime; + this includes full c99 runtime and sincos. */ +#undef TARGET_LIBC_HAS_FUNCTION +#define TARGET_LIBC_HAS_FUNCTION linux_android_libc_has_function #undef TARGET_OS_CPP_BUILTINS #define TARGET_OS_CPP_BUILTINS() \ diff --git a/gcc/config/rs6000/linux64.h b/gcc/config/rs6000/linux64.h index 79f0f0b5f00..439f53f2d23 100644 --- a/gcc/config/rs6000/linux64.h +++ b/gcc/config/rs6000/linux64.h @@ -288,17 +288,18 @@ extern int dot_symbols; #ifdef SINGLE_LIBC #define OPTION_GLIBC (DEFAULT_LIBC == LIBC_GLIBC) +#define OPTION_UCLIBC (DEFAULT_LIBC == LIBC_UCLIBC) +#define OPTION_BIONIC (DEFAULT_LIBC == LIBC_BIONIC) #else #define OPTION_GLIBC (linux_libc == LIBC_GLIBC) +#define OPTION_UCLIBC (linux_libc == LIBC_UCLIBC) +#define OPTION_BIONIC (linux_libc == LIBC_BIONIC) #endif -/* glibc has float and long double forms of math functions. */ -#undef TARGET_C99_FUNCTIONS -#define TARGET_C99_FUNCTIONS (OPTION_GLIBC) - -/* Whether we have sincos that follows the GNU extension. */ -#undef TARGET_HAS_SINCOS -#define TARGET_HAS_SINCOS (OPTION_GLIBC) +/* Determine what functions are present at the runtime; + this includes full c99 runtime and sincos. */ +#undef TARGET_LIBC_HAS_FUNCTION +#define TARGET_LIBC_HAS_FUNCTION linux_android_libc_has_function #undef TARGET_OS_CPP_BUILTINS #define TARGET_OS_CPP_BUILTINS() \ diff --git a/gcc/config/rs6000/predicates.md b/gcc/config/rs6000/predicates.md index 18912f15a4a..7338e764c5c 100644 --- a/gcc/config/rs6000/predicates.md +++ b/gcc/config/rs6000/predicates.md @@ -1702,3 +1702,99 @@ return GET_CODE (op) == UNSPEC && XINT (op, 1) == UNSPEC_TOCREL; }) + +;; Match the first insn (addis) in fusing the combination of addis and loads to +;; GPR registers on power8. +(define_predicate "fusion_gpr_addis" + (match_code "const_int,high,plus") +{ + HOST_WIDE_INT value; + rtx int_const; + + if (GET_CODE (op) == HIGH) + return 1; + + if (CONST_INT_P (op)) + int_const = op; + + else if (GET_CODE (op) == PLUS + && base_reg_operand (XEXP (op, 0), Pmode) + && CONST_INT_P (XEXP (op, 1))) + int_const = XEXP (op, 1); + + else + return 0; + + /* Power8 currently will only do the fusion if the top 11 bits of the addis + value are all 1's or 0's. */ + value = INTVAL (int_const); + if ((value & (HOST_WIDE_INT)0xffff) != 0) + return 0; + + if ((value & (HOST_WIDE_INT)0xffff0000) == 0) + return 0; + + return (IN_RANGE (value >> 16, -32, 31)); +}) + +;; Match the second insn (lbz, lhz, lwz, ld) in fusing the combination of addis +;; and loads to GPR registers on power8. +(define_predicate "fusion_gpr_mem_load" + (match_code "mem,sign_extend,zero_extend") +{ + rtx addr; + + /* Handle sign/zero extend. */ + if (GET_CODE (op) == ZERO_EXTEND + || (TARGET_P8_FUSION_SIGN && GET_CODE (op) == SIGN_EXTEND)) + { + op = XEXP (op, 0); + mode = GET_MODE (op); + } + + if (!MEM_P (op)) + return 0; + + switch (mode) + { + case QImode: + case HImode: + case SImode: + break; + + case DImode: + if (!TARGET_POWERPC64) + return 0; + break; + + default: + return 0; + } + + addr = XEXP (op, 0); + if (GET_CODE (addr) == PLUS) + { + rtx base = XEXP (addr, 0); + rtx offset = XEXP (addr, 1); + + return (base_reg_operand (base, GET_MODE (base)) + && satisfies_constraint_I (offset)); + } + + else if (GET_CODE (addr) == LO_SUM) + { + rtx base = XEXP (addr, 0); + rtx offset = XEXP (addr, 1); + + if (!base_reg_operand (base, GET_MODE (base))) + return 0; + + else if (TARGET_XCOFF || (TARGET_ELF && TARGET_POWERPC64)) + return small_toc_ref (offset, GET_MODE (offset)); + + else if (TARGET_ELF && !TARGET_POWERPC64) + return CONSTANT_P (offset); + } + + return 0; +}) diff --git a/gcc/config/rs6000/rs6000-modes.def b/gcc/config/rs6000/rs6000-modes.def index 54548be7038..5124e1665d4 100644 --- a/gcc/config/rs6000/rs6000-modes.def +++ b/gcc/config/rs6000/rs6000-modes.def @@ -42,5 +42,7 @@ VECTOR_MODES (FLOAT, 8); /* V4HF V2SF */ VECTOR_MODES (FLOAT, 16); /* V8HF V4SF V2DF */ VECTOR_MODES (FLOAT, 32); /* V16HF V8SF V4DF */ -/* Replacement for TImode that only is allowed in GPRs. */ +/* Replacement for TImode that only is allowed in GPRs. We also use PTImode + for quad memory atomic operations to force getting an even/odd register + combination. */ PARTIAL_INT_MODE (TI); diff --git a/gcc/config/rs6000/rs6000-protos.h b/gcc/config/rs6000/rs6000-protos.h index 3a7b37a8270..3ddabb81c39 100644 --- a/gcc/config/rs6000/rs6000-protos.h +++ b/gcc/config/rs6000/rs6000-protos.h @@ -73,6 +73,9 @@ extern int mems_ok_for_quad_peep (rtx, rtx); extern bool gpr_or_gpr_p (rtx, rtx); extern bool direct_move_p (rtx, rtx); extern bool quad_load_store_p (rtx, rtx); +extern bool fusion_gpr_load_p (rtx *, bool); +extern void expand_fusion_gpr_load (rtx *); +extern const char *emit_fusion_gpr_load (rtx *); extern enum reg_class (*rs6000_preferred_reload_class_ptr) (rtx, enum reg_class); extern enum reg_class (*rs6000_secondary_reload_class_ptr) (enum reg_class, diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c index 8b939d8e826..c1acbd825ea 100644 --- a/gcc/config/rs6000/rs6000.c +++ b/gcc/config/rs6000/rs6000.c @@ -284,9 +284,6 @@ static struct { "rsqrtd", (RECIP_DF_RSQRT | RECIP_V2DF_RSQRT) }, }; -/* 2 argument gen function typedef. */ -typedef rtx (*gen_2arg_fn_t) (rtx, rtx, rtx); - /* Pointer to function (in rs6000-c.c) that can define or undefine target macros that have changed. Languages that don't support the preprocessor don't link in rs6000-c.c, so we can't call it directly. */ @@ -3074,6 +3071,21 @@ rs6000_option_override_internal (bool global_init_p) rs6000_isa_flags &= ~OPTION_MASK_QUAD_MEMORY; } + /* Enable power8 fusion if we are tuning for power8, even if we aren't + generating power8 instructions. */ + if (!(rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION)) + rs6000_isa_flags |= (processor_target_table[tune_index].target_enable + & OPTION_MASK_P8_FUSION); + + /* Power8 does not fuse sign extended loads with the addis. If we are + optimizing at high levels for speed, convert a sign extended load into a + zero extending load, and an explicit sign extension. */ + if (TARGET_P8_FUSION + && !(rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION_SIGN) + && optimize_function_for_speed_p (cfun) + && optimize >= 3) + rs6000_isa_flags |= OPTION_MASK_P8_FUSION_SIGN; + if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET) rs6000_print_isa_options (stderr, 0, "after defaults", rs6000_isa_flags); @@ -6918,9 +6930,7 @@ rs6000_legitimize_reload_address (rtx x, enum machine_mode mode, && GET_CODE (XEXP (x, 1)) == CONST_INT && reg_offset_p && !SPE_VECTOR_MODE (mode) - && !(TARGET_E500_DOUBLE && (mode == DFmode || mode == TFmode - || mode == DDmode || mode == TDmode - || mode == DImode)) + && !(TARGET_E500_DOUBLE && GET_MODE_SIZE (mode) > UNITS_PER_WORD) && (!VECTOR_MODE_P (mode) || VECTOR_MEM_NONE_P (mode))) { HOST_WIDE_INT val = INTVAL (XEXP (x, 1)); @@ -8329,8 +8339,8 @@ rs6000_function_arg_boundary (enum machine_mode mode, const_tree type) || (type && TREE_CODE (type) == VECTOR_TYPE && int_size_in_bytes (type) >= 16)) return 128; - else if (TARGET_MACHO - && rs6000_darwin64_abi + else if (((TARGET_MACHO && rs6000_darwin64_abi) + || (DEFAULT_ABI == ABI_AIX && !rs6000_compat_align_parm)) && mode == BLKmode && type && TYPE_ALIGN (type) > 64) return 128; @@ -9878,8 +9888,9 @@ rs6000_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p, We don't need to check for pass-by-reference because of the test above. We can return a simplifed answer, since we know there's no offset to add. */ - if (TARGET_MACHO - && rs6000_darwin64_abi + if (((TARGET_MACHO + && rs6000_darwin64_abi) + || (DEFAULT_ABI == ABI_AIX && !rs6000_compat_align_parm)) && integer_zerop (TYPE_SIZE (type))) { unsigned HOST_WIDE_INT align, boundary; @@ -11133,9 +11144,6 @@ htm_expand_builtin (tree exp, rtx target, bool * expandedp) switch (nopnds) { - case 0: - pat = GEN_FCN (icode) (NULL_RTX); - break; case 1: pat = GEN_FCN (icode) (op[0]); break; @@ -21401,8 +21409,7 @@ rs6000_emit_prologue (void) HOST_WIDE_INT offset; if (!(strategy & SAVE_INLINE_GPRS)) - ool_adjust = 8 * (info->first_gp_reg_save - - (FIRST_SAVRES_REGISTER + 1)); + ool_adjust = 8 * (info->first_gp_reg_save - FIRST_SAVED_GP_REGNO); offset = info->spe_gp_save_offset + frame_off - ool_adjust; spe_save_area_ptr = gen_rtx_REG (Pmode, 11); save_off = frame_off - offset; @@ -22644,8 +22651,7 @@ rs6000_emit_epilogue (int sibcall) anew to every function. */ if (!restoring_GPRs_inline) - ool_adjust = 8 * (info->first_gp_reg_save - - (FIRST_SAVRES_REGISTER + 1)); + ool_adjust = 8 * (info->first_gp_reg_save - FIRST_SAVED_GP_REGNO); frame_reg_rtx = gen_rtx_REG (Pmode, 11); emit_insn (gen_addsi3 (frame_reg_rtx, old_frame_reg_rtx, GEN_INT (info->spe_gp_save_offset @@ -28127,7 +28133,7 @@ rs6000_emit_swdiv (rtx dst, rtx n, rtx d, bool note_p) passes++; enum insn_code code = optab_handler (smul_optab, mode); - gen_2arg_fn_t gen_mul = (gen_2arg_fn_t) GEN_FCN (code); + insn_gen_fn gen_mul = GEN_FCN (code); gcc_assert (code != CODE_FOR_nothing); @@ -28205,7 +28211,7 @@ rs6000_emit_swrsqrt (rtx dst, rtx src) int i; rtx halfthree; enum insn_code code = optab_handler (smul_optab, mode); - gen_2arg_fn_t gen_mul = (gen_2arg_fn_t) GEN_FCN (code); + insn_gen_fn gen_mul = GEN_FCN (code); gcc_assert (code != CODE_FOR_nothing); @@ -30419,6 +30425,382 @@ rs6000_split_logical (rtx operands[3], } +/* Return true if the peephole2 can combine a load involving a combination of + an addis instruction and a load with an offset that can be fused together on + a power8. + + The operands are: + operands[0] register set with addis + operands[1] value set via addis + operands[2] target register being loaded + operands[3] D-form memory reference using operands[0]. + + In addition, we are passed a boolean that is true if this is a peephole2, + and we can use see if the addis_reg is dead after the insn and can be + replaced by the target register. */ + +bool +fusion_gpr_load_p (rtx *operands, bool peep2_p) +{ + rtx addis_reg = operands[0]; + rtx addis_value = operands[1]; + rtx target = operands[2]; + rtx mem = operands[3]; + rtx addr; + rtx base_reg; + + /* Validate arguments. */ + if (!base_reg_operand (addis_reg, GET_MODE (addis_reg))) + return false; + + if (!base_reg_operand (target, GET_MODE (target))) + return false; + + if (!fusion_gpr_addis (addis_value, GET_MODE (addis_value))) + return false; + + if (!fusion_gpr_mem_load (mem, GET_MODE (mem))) + return false; + + /* Allow sign/zero extension. */ + if (GET_CODE (mem) == ZERO_EXTEND + || (GET_CODE (mem) == SIGN_EXTEND && TARGET_P8_FUSION_SIGN)) + mem = XEXP (mem, 0); + + if (!MEM_P (mem)) + return false; + + addr = XEXP (mem, 0); /* either PLUS or LO_SUM. */ + if (GET_CODE (addr) != PLUS && GET_CODE (addr) != LO_SUM) + return false; + + /* Validate that the register used to load the high value is either the + register being loaded, or we can safely replace its use in a peephole2. + + If this is a peephole2, we assume that there are 2 instructions in the + peephole (addis and load), so we want to check if the target register was + not used in the memory address and the register to hold the addis result + is dead after the peephole. */ + if (REGNO (addis_reg) != REGNO (target)) + { + if (!peep2_p) + return false; + + if (reg_mentioned_p (target, mem)) + return false; + + if (!peep2_reg_dead_p (2, addis_reg)) + return false; + } + + base_reg = XEXP (addr, 0); + return REGNO (addis_reg) == REGNO (base_reg); +} + +/* During the peephole2 pass, adjust and expand the insns for a load fusion + sequence. We adjust the addis register to use the target register. If the + load sign extends, we adjust the code to do the zero extending load, and an + explicit sign extension later since the fusion only covers zero extending + loads. + + The operands are: + operands[0] register set with addis (to be replaced with target) + operands[1] value set via addis + operands[2] target register being loaded + operands[3] D-form memory reference using operands[0]. */ + +void +expand_fusion_gpr_load (rtx *operands) +{ + rtx addis_value = operands[1]; + rtx target = operands[2]; + rtx orig_mem = operands[3]; + rtx new_addr, new_mem, orig_addr, offset; + enum rtx_code plus_or_lo_sum; + enum machine_mode target_mode = GET_MODE (target); + enum machine_mode extend_mode = target_mode; + enum machine_mode ptr_mode = Pmode; + enum rtx_code extend = UNKNOWN; + rtx addis_reg = ((ptr_mode == target_mode) + ? target + : simplify_subreg (ptr_mode, target, target_mode, 0)); + + if (GET_CODE (orig_mem) == ZERO_EXTEND + || (TARGET_P8_FUSION_SIGN && GET_CODE (orig_mem) == SIGN_EXTEND)) + { + extend = GET_CODE (orig_mem); + orig_mem = XEXP (orig_mem, 0); + target_mode = GET_MODE (orig_mem); + } + + gcc_assert (MEM_P (orig_mem)); + + orig_addr = XEXP (orig_mem, 0); + plus_or_lo_sum = GET_CODE (orig_addr); + gcc_assert (plus_or_lo_sum == PLUS || plus_or_lo_sum == LO_SUM); + + offset = XEXP (orig_addr, 1); + new_addr = gen_rtx_fmt_ee (plus_or_lo_sum, ptr_mode, addis_reg, offset); + new_mem = change_address (orig_mem, target_mode, new_addr); + + if (extend != UNKNOWN) + new_mem = gen_rtx_fmt_e (ZERO_EXTEND, extend_mode, new_mem); + + emit_insn (gen_rtx_SET (VOIDmode, addis_reg, addis_value)); + emit_insn (gen_rtx_SET (VOIDmode, target, new_mem)); + + if (extend == SIGN_EXTEND) + { + int sub_off = ((BYTES_BIG_ENDIAN) + ? GET_MODE_SIZE (extend_mode) - GET_MODE_SIZE (target_mode) + : 0); + rtx sign_reg + = simplify_subreg (target_mode, target, extend_mode, sub_off); + + emit_insn (gen_rtx_SET (VOIDmode, target, + gen_rtx_SIGN_EXTEND (extend_mode, sign_reg))); + } + + return; +} + +/* Return a string to fuse an addis instruction with a gpr load to the same + register that we loaded up the addis instruction. The code is complicated, + so we call output_asm_insn directly, and just return "". + + The operands are: + operands[0] register set with addis (must be same reg as target). + operands[1] value set via addis + operands[2] target register being loaded + operands[3] D-form memory reference using operands[0]. */ + +const char * +emit_fusion_gpr_load (rtx *operands) +{ + rtx addis_reg = operands[0]; + rtx addis_value = operands[1]; + rtx target = operands[2]; + rtx mem = operands[3]; + rtx fuse_ops[10]; + rtx addr; + rtx load_offset; + const char *addis_str = NULL; + const char *load_str = NULL; + const char *extend_insn = NULL; + const char *mode_name = NULL; + char insn_template[80]; + enum machine_mode mode; + const char *comment_str = ASM_COMMENT_START; + bool sign_p = false; + + gcc_assert (REG_P (addis_reg) && REG_P (target)); + gcc_assert (REGNO (addis_reg) == REGNO (target)); + + if (*comment_str == ' ') + comment_str++; + + /* Allow sign/zero extension. */ + if (GET_CODE (mem) == ZERO_EXTEND) + mem = XEXP (mem, 0); + + else if (GET_CODE (mem) == SIGN_EXTEND && TARGET_P8_FUSION_SIGN) + { + sign_p = true; + mem = XEXP (mem, 0); + } + + gcc_assert (MEM_P (mem)); + addr = XEXP (mem, 0); + if (GET_CODE (addr) != PLUS && GET_CODE (addr) != LO_SUM) + gcc_unreachable (); + + load_offset = XEXP (addr, 1); + + /* Now emit the load instruction to the same register. */ + mode = GET_MODE (mem); + switch (mode) + { + case QImode: + mode_name = "char"; + load_str = "lbz"; + extend_insn = "extsb %0,%0"; + break; + + case HImode: + mode_name = "short"; + load_str = "lhz"; + extend_insn = "extsh %0,%0"; + break; + + case SImode: + mode_name = "int"; + load_str = "lwz"; + extend_insn = "extsw %0,%0"; + break; + + case DImode: + if (TARGET_POWERPC64) + { + mode_name = "long"; + load_str = "ld"; + } + else + gcc_unreachable (); + break; + + default: + gcc_unreachable (); + } + + /* Emit the addis instruction. */ + fuse_ops[0] = target; + if (satisfies_constraint_L (addis_value)) + { + fuse_ops[1] = addis_value; + addis_str = "lis %0,%v1"; + } + + else if (GET_CODE (addis_value) == PLUS) + { + rtx op0 = XEXP (addis_value, 0); + rtx op1 = XEXP (addis_value, 1); + + if (REG_P (op0) && CONST_INT_P (op1) + && satisfies_constraint_L (op1)) + { + fuse_ops[1] = op0; + fuse_ops[2] = op1; + addis_str = "addis %0,%1,%v2"; + } + } + + else if (GET_CODE (addis_value) == HIGH) + { + rtx value = XEXP (addis_value, 0); + if (GET_CODE (value) == UNSPEC && XINT (value, 1) == UNSPEC_TOCREL) + { + fuse_ops[1] = XVECEXP (value, 0, 0); /* symbol ref. */ + fuse_ops[2] = XVECEXP (value, 0, 1); /* TOC register. */ + if (TARGET_ELF) + addis_str = "addis %0,%2,%1@toc@ha"; + + else if (TARGET_XCOFF) + addis_str = "addis %0,%1@u(%2)"; + + else + gcc_unreachable (); + } + + else if (GET_CODE (value) == PLUS) + { + rtx op0 = XEXP (value, 0); + rtx op1 = XEXP (value, 1); + + if (GET_CODE (op0) == UNSPEC + && XINT (op0, 1) == UNSPEC_TOCREL + && CONST_INT_P (op1)) + { + fuse_ops[1] = XVECEXP (op0, 0, 0); /* symbol ref. */ + fuse_ops[2] = XVECEXP (op0, 0, 1); /* TOC register. */ + fuse_ops[3] = op1; + if (TARGET_ELF) + addis_str = "addis %0,%2,%1+%3@toc@ha"; + + else if (TARGET_XCOFF) + addis_str = "addis %0,%1+%3@u(%2)"; + + else + gcc_unreachable (); + } + } + + else if (satisfies_constraint_L (value)) + { + fuse_ops[1] = value; + addis_str = "lis %0,%v1"; + } + + else if (TARGET_ELF && !TARGET_POWERPC64 && CONSTANT_P (value)) + { + fuse_ops[1] = value; + addis_str = "lis %0,%1@ha"; + } + } + + if (!addis_str) + fatal_insn ("Could not generate addis value for fusion", addis_value); + + sprintf (insn_template, "%s\t\t%s gpr load fusion, type %s", addis_str, + comment_str, mode_name); + output_asm_insn (insn_template, fuse_ops); + + /* Emit the D-form load instruction. */ + if (CONST_INT_P (load_offset) && satisfies_constraint_I (load_offset)) + { + sprintf (insn_template, "%s %%0,%%1(%%0)", load_str); + fuse_ops[1] = load_offset; + output_asm_insn (insn_template, fuse_ops); + } + + else if (GET_CODE (load_offset) == UNSPEC + && XINT (load_offset, 1) == UNSPEC_TOCREL) + { + if (TARGET_ELF) + sprintf (insn_template, "%s %%0,%%1@toc@l(%%0)", load_str); + + else if (TARGET_XCOFF) + sprintf (insn_template, "%s %%0,%%1@l(%%0)", load_str); + + else + gcc_unreachable (); + + fuse_ops[1] = XVECEXP (load_offset, 0, 0); + output_asm_insn (insn_template, fuse_ops); + } + + else if (GET_CODE (load_offset) == PLUS + && GET_CODE (XEXP (load_offset, 0)) == UNSPEC + && XINT (XEXP (load_offset, 0), 1) == UNSPEC_TOCREL + && CONST_INT_P (XEXP (load_offset, 1))) + { + rtx tocrel_unspec = XEXP (load_offset, 0); + if (TARGET_ELF) + sprintf (insn_template, "%s %%0,%%1+%%2@toc@l(%%0)", load_str); + + else if (TARGET_XCOFF) + sprintf (insn_template, "%s %%0,%%1+%%2@l(%%0)", load_str); + + else + gcc_unreachable (); + + fuse_ops[1] = XVECEXP (tocrel_unspec, 0, 0); + fuse_ops[2] = XEXP (load_offset, 1); + output_asm_insn (insn_template, fuse_ops); + } + + else if (TARGET_ELF && !TARGET_POWERPC64 && CONSTANT_P (load_offset)) + { + sprintf (insn_template, "%s %%0,%%1@l(%%0)", load_str); + + fuse_ops[1] = load_offset; + output_asm_insn (insn_template, fuse_ops); + } + + else + fatal_insn ("Unable to generate load offset for fusion", load_offset); + + /* Handle sign extension. The peephole2 pass generates this as a separate + insn, but we handle it just in case it got reattached. */ + if (sign_p) + { + gcc_assert (extend_insn != NULL); + output_asm_insn (extend_insn, fuse_ops); + } + + return ""; +} + + struct gcc_target targetm = TARGET_INITIALIZER; #include "gt-rs6000.h" diff --git a/gcc/config/rs6000/rs6000.h b/gcc/config/rs6000/rs6000.h index e5a6abd6d0d..a5a7a859426 100644 --- a/gcc/config/rs6000/rs6000.h +++ b/gcc/config/rs6000/rs6000.h @@ -1498,7 +1498,8 @@ extern enum reg_class rs6000_constraints[RS6000_CONSTRAINT_MAX]; On the RS/6000, we grow upwards, from the area after the outgoing arguments. */ -#define FRAME_GROWS_DOWNWARD (flag_stack_protect != 0 || flag_asan != 0) +#define FRAME_GROWS_DOWNWARD (flag_stack_protect != 0 \ + || (flag_sanitize & SANITIZE_ADDRESS) != 0) /* Size of the outgoing register save area */ #define RS6000_REG_SAVE ((DEFAULT_ABI == ABI_AIX \ @@ -2138,9 +2139,15 @@ extern int toc_initialized; } \ else if (TARGET_XCOFF) \ { \ - fputs ("\t.lglobl\t.", FILE); \ - RS6000_OUTPUT_BASENAME (FILE, alias); \ - putc ('\n', FILE); \ + if (!RS6000_WEAK || !DECL_WEAK (DECL)) \ + { \ + fputs ("\t.lglobl\t.", FILE); \ + RS6000_OUTPUT_BASENAME (FILE, alias); \ + putc ('\n', FILE); \ + fputs ("\t.lglobl\t", FILE); \ + RS6000_OUTPUT_BASENAME (FILE, alias); \ + putc ('\n', FILE); \ + } \ } \ fputs ("\t.set\t.", FILE); \ RS6000_OUTPUT_BASENAME (FILE, alias); \ diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md index 064a51da608..3880f9175a2 100644 --- a/gcc/config/rs6000/rs6000.md +++ b/gcc/config/rs6000/rs6000.md @@ -136,7 +136,6 @@ UNSPEC_P8V_MTVSRD UNSPEC_P8V_XXPERMDI UNSPEC_P8V_RELOAD_FROM_VSX - UNSPEC_FUSION_GPR ]) ;; @@ -15757,7 +15756,8 @@ "cmpw %2,%L0,%1\;" "bne- %2,$-16"; } -}) +} + [(set_attr "length" "20")]) (define_insn "rs6000_mftb_" [(set (match_operand:P 0 "gpc_reg_operand" "=r") @@ -15771,6 +15771,43 @@ }) +;; Power8 fusion support for fusing an addis instruction with a D-form load of +;; a GPR. The addis instruction must be adjacent to the load, and use the same +;; register that is being loaded. The fused ops must be physically adjacent. + +;; We use define_peephole for the actual addis/load, and the register used to +;; hold the addis value must be the same as the register being loaded. We use +;; define_peephole2 to change the register used for addis to be the register +;; being loaded, since we can look at whether it is dead after the load insn. + +(define_peephole + [(set (match_operand:P 0 "base_reg_operand" "") + (match_operand:P 1 "fusion_gpr_addis" "")) + (set (match_operand:INT1 2 "base_reg_operand" "") + (match_operand:INT1 3 "fusion_gpr_mem_load" ""))] + "TARGET_P8_FUSION && fusion_gpr_load_p (operands, false)" +{ + return emit_fusion_gpr_load (operands); +} + [(set_attr "type" "load") + (set_attr "length" "8")]) + +(define_peephole2 + [(set (match_operand:P 0 "base_reg_operand" "") + (match_operand:P 1 "fusion_gpr_addis" "")) + (set (match_operand:INT1 2 "base_reg_operand" "") + (match_operand:INT1 3 "fusion_gpr_mem_load" ""))] + "TARGET_P8_FUSION + && (REGNO (operands[0]) != REGNO (operands[2]) + || GET_CODE (operands[3]) == SIGN_EXTEND) + && fusion_gpr_load_p (operands, true)" + [(const_int 0)] +{ + expand_fusion_gpr_load (operands); + DONE; +}) + + (include "sync.md") (include "vector.md") diff --git a/gcc/config/rs6000/rs6000.opt b/gcc/config/rs6000/rs6000.opt index f36e4758031..cd83cb2d206 100644 --- a/gcc/config/rs6000/rs6000.opt +++ b/gcc/config/rs6000/rs6000.opt @@ -546,3 +546,7 @@ Use ISA 2.07 transactional memory (HTM) instructions mquad-memory Target Report Mask(QUAD_MEMORY) Var(rs6000_isa_flags) Generate the quad word memory instructions (lq/stq/lqarx/stqcx). + +mcompat-align-parm +Target Report Var(rs6000_compat_align_parm) Init(0) Save +Generate aggregate parameter passing code with at most 64-bit alignment. diff --git a/gcc/config/rs6000/rtems.h b/gcc/config/rs6000/rtems.h index b910b5ec5a2..fb22be1e8bb 100644 --- a/gcc/config/rs6000/rtems.h +++ b/gcc/config/rs6000/rtems.h @@ -34,6 +34,9 @@ } \ while (0) +#undef TARGET_LIBGCC_SDATA_SECTION +#define TARGET_LIBGCC_SDATA_SECTION ".sdata" + #undef CPP_OS_DEFAULT_SPEC #define CPP_OS_DEFAULT_SPEC "%(cpp_os_rtems)" diff --git a/gcc/config/rs6000/t-linux64 b/gcc/config/rs6000/t-linux64 index 9175de2ffe3..70e928dd7cd 100644 --- a/gcc/config/rs6000/t-linux64 +++ b/gcc/config/rs6000/t-linux64 @@ -25,8 +25,8 @@ # it doesn't tell anything about the 32bit libraries on those systems. Set # MULTILIB_OSDIRNAMES according to what is found on the target. -MULTILIB_OPTIONS = m64/m32 -MULTILIB_DIRNAMES = 64 32 -MULTILIB_EXTRA_OPTS = fPIC -MULTILIB_OSDIRNAMES = ../lib64$(call if_multiarch,:powerpc64-linux-gnu) -MULTILIB_OSDIRNAMES += $(if $(wildcard $(shell echo $(SYSTEM_HEADER_DIR))/../../usr/lib32),../lib32,../lib)$(call if_multiarch,:powerpc-linux-gnu) +MULTILIB_OPTIONS := m64/m32 +MULTILIB_DIRNAMES := 64 32 +MULTILIB_EXTRA_OPTS := +MULTILIB_OSDIRNAMES := m64=../lib64$(call if_multiarch,:powerpc64-linux-gnu) +MULTILIB_OSDIRNAMES += m32=$(if $(wildcard $(shell echo $(SYSTEM_HEADER_DIR))/../../usr/lib32),../lib32,../lib)$(call if_multiarch,:powerpc-linux-gnu) diff --git a/gcc/config/rs6000/t-linux64bele b/gcc/config/rs6000/t-linux64bele new file mode 100644 index 00000000000..97c1ee6fb4d --- /dev/null +++ b/gcc/config/rs6000/t-linux64bele @@ -0,0 +1,7 @@ +#rs6000/t-linux64end + +MULTILIB_OPTIONS += mlittle +MULTILIB_DIRNAMES += le +MULTILIB_OSDIRNAMES += $(subst =,.mlittle=,$(subst lible32,lib32le,$(subst lible64,lib64le,$(subst lib,lible,$(subst -linux,le-linux,$(MULTILIB_OSDIRNAMES)))))) +MULTILIB_OSDIRNAMES += $(subst $(if $(findstring 64,$(target)),m64,m32).,,$(filter $(if $(findstring 64,$(target)),m64,m32).mlittle%,$(MULTILIB_OSDIRNAMES))) +MULTILIB_MATCHES := ${MULTILIB_MATCHES_ENDIAN} diff --git a/gcc/config/rs6000/t-linux64le b/gcc/config/rs6000/t-linux64le new file mode 100644 index 00000000000..0cf38e1523a --- /dev/null +++ b/gcc/config/rs6000/t-linux64le @@ -0,0 +1,3 @@ +#rs6000/t-linux64le + +MULTILIB_OSDIRNAMES := $(subst -linux,le-linux,$(MULTILIB_OSDIRNAMES)) diff --git a/gcc/config/rs6000/t-linux64lebe b/gcc/config/rs6000/t-linux64lebe new file mode 100644 index 00000000000..2e63bdb9fc9 --- /dev/null +++ b/gcc/config/rs6000/t-linux64lebe @@ -0,0 +1,7 @@ +#rs6000/t-linux64leend + +MULTILIB_OPTIONS += mbig +MULTILIB_DIRNAMES += be +MULTILIB_OSDIRNAMES += $(subst =,.mbig=,$(subst libbe32,lib32be,$(subst libbe64,lib64be,$(subst lib,libbe,$(subst le-linux,-linux,$(MULTILIB_OSDIRNAMES)))))) +MULTILIB_OSDIRNAMES += $(subst $(if $(findstring 64,$(target)),m64,m32).,,$(filter $(if $(findstring 64,$(target)),m64,m32).mbig%,$(MULTILIB_OSDIRNAMES))) +MULTILIB_MATCHES := ${MULTILIB_MATCHES_ENDIAN} diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md index 5e6f397031c..11d6b8bb4d0 100644 --- a/gcc/config/rs6000/vsx.md +++ b/gcc/config/rs6000/vsx.md @@ -40,6 +40,14 @@ ;; it to use gprs as well as vsx registers. (define_mode_iterator VSX_M [V16QI V8HI V4SI V2DI V4SF V2DF]) +(define_mode_iterator VSX_M2 [V16QI + V8HI + V4SI + V2DI + V4SF + V2DF + (TI "TARGET_VSX_TIMODE")]) + ;; Map into the appropriate load/store name based on the type (define_mode_attr VSm [(V16QI "vw4") (V8HI "vw4") @@ -1446,3 +1454,27 @@ }" [(set_attr "length" "20") (set_attr "type" "veccomplex")]) + + +;; Power8 Vector fusion. The fused ops must be physically adjacent. +(define_peephole + [(set (match_operand:P 0 "base_reg_operand" "") + (match_operand:P 1 "short_cint_operand" "")) + (set (match_operand:VSX_M2 2 "vsx_register_operand" "") + (mem:VSX_M2 (plus:P (match_dup 0) + (match_operand:P 3 "int_reg_operand" ""))))] + "TARGET_P8_FUSION" + "li %0,%1\t\t\t# vector load fusion\;lxx %x2,%0,%3" + [(set_attr "length" "8") + (set_attr "type" "vecload")]) + +(define_peephole + [(set (match_operand:P 0 "base_reg_operand" "") + (match_operand:P 1 "short_cint_operand" "")) + (set (match_operand:VSX_M2 2 "vsx_register_operand" "") + (mem:VSX_M2 (plus:P (match_operand:P 3 "int_reg_operand" "") + (match_dup 0))))] + "TARGET_P8_FUSION" + "li %0,%1\t\t\t# vector load fusion\;lxx %x2,%0,%3" + [(set_attr "length" "8") + (set_attr "type" "vecload")]) diff --git a/gcc/config/rx/rx.c b/gcc/config/rx/rx.c index b163a593721..603e49de3ab 100644 --- a/gcc/config/rx/rx.c +++ b/gcc/config/rx/rx.c @@ -1021,9 +1021,9 @@ rx_gen_move_template (rtx * operands, bool is_movu) gcc_assert (! is_movu); if (REG_P (src) && REG_P (dest) && (REGNO (dest) == REGNO (src) + 1)) - sprintf (out_template, "mov.L\t%H1, %H0 | mov.L\t%1, %0"); + sprintf (out_template, "mov.L\t%%H1, %%H0 ! mov.L\t%%1, %%0"); else - sprintf (out_template, "mov.L\t%1, %0 | mov.L\t%H1, %H0"); + sprintf (out_template, "mov.L\t%%1, %%0 ! mov.L\t%%H1, %%H0"); } else sprintf (out_template, "%s%s\t%s, %s", is_movu ? "movu" : "mov", @@ -3270,7 +3270,7 @@ rx_ok_to_inline (tree caller, tree callee) static bool rx_enable_lra (void) { - return TARGET_ENABLE_LRA || 1; + return TARGET_ENABLE_LRA; } diff --git a/gcc/config/rx/rx.h b/gcc/config/rx/rx.h index 72aee2fe214..ec2770be161 100644 --- a/gcc/config/rx/rx.h +++ b/gcc/config/rx/rx.h @@ -99,6 +99,7 @@ %{mpid} \ %{mint-register=*} \ %{mgcc-abi:-mgcc-abi} %{!mgcc-abi:-mrx-abi} \ +%{mcpu=*} \ " #undef LIB_SPEC diff --git a/gcc/config/s390/2827.md b/gcc/config/s390/2827.md index f21d8f8ce25..5be7cfaabfb 100644 --- a/gcc/config/s390/2827.md +++ b/gcc/config/s390/2827.md @@ -32,12 +32,12 @@ (const_int 0))) (define_attr "ooo_groupalone" "" - (cond [(eq_attr "mnemonic" "lnxbr,madb,ltxtr,clc,axtr,msebr,slbgr,xc,alcr,lpxbr,slbr,maebr,mlg,mfy,lxdtr,maeb,lxeb,nc,mxtr,sxtr,dxbr,alc,msdbr,ltxbr,lxdb,madbr,lxdbr,lxebr,mvc,m,mseb,mlr,mlgr,slb,tcxb,msdb,sqxbr,alcgr,oc,flogr,alcg,mxbr,dxtr,axbr,mr,sxbr,slbg,ml,lcxbr") (const_int 1)] + (cond [(eq_attr "mnemonic" "lnxbr,madb,ltxtr,clc,axtr,msebr,slbgr,xc,alcr,lpxbr,slbr,maebr,mlg,mfy,lxdtr,maeb,lxeb,nc,mxtr,sxtr,dxbr,alc,msdbr,ltxbr,lxdb,madbr,lxdbr,lxebr,mvc,m,mseb,mlr,mlgr,slb,tcxb,msdb,sqxbr,alcgr,oc,flogr,alcg,mxbr,dxtr,axbr,mr,sxbr,slbg,ml,lcxbr,bcr_flush") (const_int 1)] (const_int 0))) (define_insn_reservation "zEC12_simple" 1 (and (eq_attr "cpu" "zEC12") - (eq_attr "mnemonic" "ltg,ogrk,lr,lnebr,lghrl,sdbr,x,asi,lhr,sebr,madb,ar,lhrl,clfxtr,llgfr,clghrl,cgr,cli,agrk,ic,adbr,aebr,lrv,clg,cy,cghi,sy,celfbr,seb,clgfr,al,tm,lang,clfebr,lghr,cdb,lpebr,laa,ark,lh,or,icy,xi,msebr,n,llihl,afi,cs,nrk,sth,lgr,l,lcr,stey,xg,crt,slgfr,ny,ld,j,llihh,slgr,clfhsi,slg,lb,lgrl,lrl,llihf,lndbr,llcr,laxg,mvghi,rllg,sdb,xrk,laag,alhsik,algfi,algr,aly,agfi,lrvr,d,crl,llgc,tmhl,algsi,lgh,icmh,clhrl,xgrk,icm,iilf,ork,lbr,cg,ldgr,lgf,iihf,llghr,sg,clfdbr,llgtr,stam,cebr,tmhh,tceb,slgf,basr,lgbr,maebr,lgb,cgfi,aeb,ltebr,lax,clfit,lrvgr,nihl,ni,clfdtr,srdl,mdb,srk,xihf,stgrl,sthrl,algf,ltr,cdlgbr,cgit,ng,lat,llghrl,ltgr,nihh,clgfrl,srlk,maeb,agr,cxlftr,ler,bcr,stcy,cds,clfi,nihf,ly,clt,lgat,alg,lhy,lgfrl,clghsi,clrt,tmll,srlg,tcdb,ay,sty,clr,lgfi,lan,lpdbr,clgt,adb,ahik,sra,algrk,cdfbr,lcebr,clfxbr,msdbr,ceb,clgr,tmy,tmlh,alghsik,lcgr,mvi,cdbr,ltgf,xr,larl,ldr,llgcr,clgrt,clrl,cghsi,cliy,madbr,oy,ogr,llgt,meebr,slr,clgxbr,chi,s,icmy,llc,ngr,clhhsi,ltgfr,llill,lhi,o,meeb,clgdtr,sll,clgrl,clgf,ledbr,cegbr,mviy,algfr,rll,cdlftr,sldl,cdlgtr,lg,niy,st,sgr,ag,le,xgr,cr,stg,llilh,sr,lzer,cdsg,sllk,mdbr,stoc,csg,clgit,chhsi,strl,llilf,lndfr,ngrk,clgebr,clgfi,llgh,mseb,ltdbr,oill,la,llhrl,stc,lghi,oihl,xiy,sllg,llgf,cgrt,ldeb,cl,sl,cdlfbr,oi,oilh,nr,srak,oihh,ear,slgrk,og,c,slgfi,sthy,oilf,oiy,msdb,oihf,a,cfi,lzxr,lzdr,srag,cdgbr,brasl,alr,cgrl,llgfrl,cit,clgxtr,ley,exrl,lcdfr,lay,xilf,lcdbr,alsi,mvhhi,srl,chsi,lgfr,lrvg,cly,sgrk,ahi,celgbr,nill,clgdbr,jg,slrk,lxr,sar,slfi,cpsdr,lcgfr,aghik,nilh,mvhi,lpdfr,xy,alrk,lao,agsi,ldy,nilf,llhr,alfi,laog,sly,aghi,ldebr,bras,srda,cefbr,lt")) "nothing") + (eq_attr "mnemonic" "ltg,ogrk,lr,lnebr,lghrl,sdbr,x,asi,lhr,sebr,madb,ar,lhrl,clfxtr,llgfr,clghrl,cgr,cli,agrk,ic,adbr,aebr,lrv,clg,cy,cghi,sy,celfbr,seb,clgfr,al,tm,lang,clfebr,lghr,cdb,lpebr,laa,ark,lh,or,icy,xi,msebr,n,llihl,afi,cs,nrk,sth,lgr,l,lcr,stey,xg,crt,slgfr,ny,ld,j,llihh,slgr,clfhsi,slg,lb,lgrl,lrl,llihf,lndbr,llcr,laxg,mvghi,rllg,sdb,xrk,laag,alhsik,algfi,algr,aly,agfi,lrvr,d,crl,llgc,tmhl,algsi,lgh,icmh,clhrl,xgrk,icm,iilf,ork,lbr,cg,ldgr,lgf,iihf,llghr,sg,clfdbr,llgtr,stam,cebr,tmhh,tceb,slgf,basr,lgbr,maebr,lgb,cgfi,aeb,ltebr,lax,clfit,lrvgr,nihl,ni,clfdtr,srdl,mdb,srk,xihf,stgrl,sthrl,algf,ltr,cdlgbr,cgit,ng,lat,llghrl,ltgr,nihh,clgfrl,srlk,maeb,agr,cxlftr,ler,bcr_flush,stcy,cds,clfi,nihf,ly,clt,lgat,alg,lhy,lgfrl,clghsi,clrt,tmll,srlg,tcdb,ay,sty,clr,lgfi,lan,lpdbr,clgt,adb,ahik,sra,algrk,cdfbr,lcebr,clfxbr,msdbr,ceb,clgr,tmy,tmlh,alghsik,lcgr,mvi,cdbr,ltgf,xr,larl,ldr,llgcr,clgrt,clrl,cghsi,cliy,madbr,oy,ogr,llgt,meebr,slr,clgxbr,chi,s,icmy,llc,ngr,clhhsi,ltgfr,llill,lhi,o,meeb,clgdtr,sll,clgrl,clgf,ledbr,cegbr,mviy,algfr,rll,cdlftr,sldl,cdlgtr,lg,niy,st,sgr,ag,le,xgr,cr,stg,llilh,sr,lzer,cdsg,sllk,mdbr,stoc,csg,clgit,chhsi,strl,llilf,lndfr,ngrk,clgebr,clgfi,llgh,mseb,ltdbr,oill,la,llhrl,stc,lghi,oihl,xiy,sllg,llgf,cgrt,ldeb,cl,sl,cdlfbr,oi,oilh,nr,srak,oihh,ear,slgrk,og,c,slgfi,sthy,oilf,oiy,msdb,oihf,a,cfi,lzxr,lzdr,srag,cdgbr,brasl,alr,cgrl,llgfrl,cit,clgxtr,ley,exrl,lcdfr,lay,xilf,lcdbr,alsi,mvhhi,srl,chsi,lgfr,lrvg,cly,sgrk,ahi,celgbr,nill,clgdbr,jg,slrk,lxr,sar,slfi,cpsdr,lcgfr,aghik,nilh,mvhi,lpdfr,xy,alrk,lao,agsi,ldy,nilf,llhr,alfi,laog,sly,aghi,ldebr,bras,srda,cefbr,lt,fiebra,fidbra,fixbra,fidtr,fixtr")) "nothing") (define_insn_reservation "zEC12_cgdbr" 2 (and (eq_attr "cpu" "zEC12") @@ -603,3 +603,22 @@ (and (eq_attr "cpu" "zEC12") (eq_attr "mnemonic" "mh")) "nothing") +(define_insn_reservation "zEC12_fiebra" 6 + (and (eq_attr "cpu" "zEC12") + (eq_attr "mnemonic" "fiebra")) "nothing") + +(define_insn_reservation "zEC12_fidbra" 6 + (and (eq_attr "cpu" "zEC12") + (eq_attr "mnemonic" "fidbra")) "nothing") + +(define_insn_reservation "zEC12_fixbra" 10 + (and (eq_attr "cpu" "zEC12") + (eq_attr "mnemonic" "fixbra")) "nothing") + +(define_insn_reservation "zEC12_fidtr" 6 + (and (eq_attr "cpu" "zEC12") + (eq_attr "mnemonic" "fidtr")) "nothing") + +(define_insn_reservation "zEC12_fixtr" 10 + (and (eq_attr "cpu" "zEC12") + (eq_attr "mnemonic" "fixtr")) "nothing") diff --git a/gcc/config/s390/linux.h b/gcc/config/s390/linux.h index 3b4966a91ff..699b5dfb7e2 100644 --- a/gcc/config/s390/linux.h +++ b/gcc/config/s390/linux.h @@ -87,4 +87,7 @@ along with GCC; see the file COPYING3. If not see /* Define if long doubles should be mangled as 'g'. */ #define TARGET_ALTERNATE_LONG_DOUBLE_MANGLING +#undef TARGET_LIBC_HAS_FUNCTION +#define TARGET_LIBC_HAS_FUNCTION gnu_libc_has_function + #endif diff --git a/gcc/config/s390/s390.md b/gcc/config/s390/s390.md index 95ded7c78e6..cf9ef774675 100644 --- a/gcc/config/s390/s390.md +++ b/gcc/config/s390/s390.md @@ -117,6 +117,14 @@ ; Population Count UNSPEC_POPCNT UNSPEC_COPYSIGN + + ; Load FP Integer + UNSPEC_FPINT_FLOOR + UNSPEC_FPINT_BTRUNC + UNSPEC_FPINT_ROUND + UNSPEC_FPINT_CEIL + UNSPEC_FPINT_NEARBYINT + UNSPEC_FPINT_RINT ]) ;; @@ -291,7 +299,7 @@ z196_cracked" (const_string "none")) -(define_attr "mnemonic" "unknown" (const_string "unknown")) +(define_attr "mnemonic" "bcr_flush,unknown" (const_string "unknown")) ;; Length in bytes. @@ -429,9 +437,25 @@ ;; the same template. (define_code_iterator SHIFT [ashift lshiftrt]) -;; This iterator allow r[ox]sbg to be defined with the same template +;; This iterator allows r[ox]sbg to be defined with the same template (define_code_iterator IXOR [ior xor]) +;; This iterator is used to expand the patterns for the nearest +;; integer functions. +(define_int_iterator FPINT [UNSPEC_FPINT_FLOOR UNSPEC_FPINT_BTRUNC + UNSPEC_FPINT_ROUND UNSPEC_FPINT_CEIL + UNSPEC_FPINT_NEARBYINT]) +(define_int_attr fpint_name [(UNSPEC_FPINT_FLOOR "floor") + (UNSPEC_FPINT_BTRUNC "btrunc") + (UNSPEC_FPINT_ROUND "round") + (UNSPEC_FPINT_CEIL "ceil") + (UNSPEC_FPINT_NEARBYINT "nearbyint")]) +(define_int_attr fpint_roundingmode [(UNSPEC_FPINT_FLOOR "7") + (UNSPEC_FPINT_BTRUNC "5") + (UNSPEC_FPINT_ROUND "1") + (UNSPEC_FPINT_CEIL "6") + (UNSPEC_FPINT_NEARBYINT "0")]) + ;; This iterator and attribute allow to combine most atomic operations. (define_code_iterator ATOMIC [and ior xor plus minus mult]) (define_code_iterator ATOMIC_Z196 [and ior xor plus]) @@ -2289,13 +2313,13 @@ lr\t%0,%1 tmh\t%1,12288 ipm\t%0 - st\t%0,%1 - sty\t%0,%1 - l\t%1,%0 - ly\t%1,%0" + l\t%0,%1 + ly\t%0,%1 + st\t%1,%0 + sty\t%1,%0" [(set_attr "op_type" "RR,RI,RRE,RX,RXY,RX,RXY") - (set_attr "type" "lr,*,*,store,store,load,load") - (set_attr "z10prop" "z10_fr_E1,z10_super,*,z10_rec,z10_rec,z10_fwd_A3,z10_fwd_A3") + (set_attr "type" "lr,*,*,load,load,store,store") + (set_attr "z10prop" "z10_fr_E1,z10_super,*,z10_fwd_A3,z10_fwd_A3,z10_rec,z10_rec") (set_attr "z196prop" "*,*,z196_ends,*,*,*,*")]) ; @@ -4414,6 +4438,58 @@ [(set_attr "op_type" "RRF") (set_attr "type" "fsimptf")]) +; Binary Floating Point - load fp integer + +; Expanders for: floor, btrunc, round, ceil, and nearbyint +; For all of them the inexact exceptions are suppressed. + +; fiebra, fidbra, fixbra +(define_insn "2" + [(set (match_operand:BFP 0 "register_operand" "=f") + (unspec:BFP [(match_operand:BFP 1 "register_operand" "f")] + FPINT))] + "TARGET_Z196" + "fibra\t%0,,%1,4" + [(set_attr "op_type" "RRF") + (set_attr "type" "fsimp")]) + +; rint is supposed to raise an inexact exception so we can use the +; older instructions. + +; fiebr, fidbr, fixbr +(define_insn "rint2" + [(set (match_operand:BFP 0 "register_operand" "=f") + (unspec:BFP [(match_operand:BFP 1 "register_operand" "f")] + UNSPEC_FPINT_RINT))] + "" + "fibr\t%0,0,%1" + [(set_attr "op_type" "RRF") + (set_attr "type" "fsimp")]) + + +; Decimal Floating Point - load fp integer + +; fidtr, fixtr +(define_insn "2" + [(set (match_operand:DFP 0 "register_operand" "=f") + (unspec:DFP [(match_operand:DFP 1 "register_operand" "f")] + FPINT))] + "TARGET_HARD_DFP" + "fitr\t%0,,%1,4" + [(set_attr "op_type" "RRF") + (set_attr "type" "fsimp")]) + +; fidtr, fixtr +(define_insn "rint2" + [(set (match_operand:DFP 0 "register_operand" "=f") + (unspec:DFP [(match_operand:DFP 1 "register_operand" "f")] + UNSPEC_FPINT_RINT))] + "TARGET_HARD_DFP" + "fitr\t%0,0,%1,0" + [(set_attr "op_type" "RRF") + (set_attr "type" "fsimp")]) + +; ; Binary <-> Decimal floating point trunc patterns ; @@ -9007,12 +9083,22 @@ ; Although bcr is superscalar on Z10, this variant will never ; become part of an execution group. +; With z196 we can make use of the fast-BCR-serialization facility. +; This allows for a slightly faster sync which is sufficient for our +; purposes. (define_insn "mem_thread_fence_1" [(set (match_operand:BLK 0 "" "") (unspec:BLK [(match_dup 0)] UNSPEC_MB))] "" - "bcr\t15,0" - [(set_attr "op_type" "RR")]) +{ + if (TARGET_Z196) + return "bcr\t14,0"; + else + return "bcr\t15,0"; +} + [(set_attr "op_type" "RR") + (set_attr "mnemonic" "bcr_flush") + (set_attr "z196prop" "z196_alone")]) ; ; atomic load/store operations diff --git a/gcc/config/s390/tpf.h b/gcc/config/s390/tpf.h index a2bde82ca79..a1af01b07b6 100644 --- a/gcc/config/s390/tpf.h +++ b/gcc/config/s390/tpf.h @@ -94,9 +94,6 @@ along with GCC; see the file COPYING3. If not see #define ASM_SPEC "%{m31&m64}%{mesa&mzarch}%{march=*} \ -alshd=%b.lst" -#undef TARGET_C99_FUNCTIONS -#define TARGET_C99_FUNCTIONS 1 - #define ENTRY_SPEC "%{mmain:-entry=_start} \ %{!mmain:-entry=0}" @@ -114,4 +111,8 @@ along with GCC; see the file COPYING3. If not see /* IBM copies these libraries over with these names. */ #define MATH_LIBRARY "CLBM" #define LIBSTDCXX "CPP2" + +#undef TARGET_LIBC_HAS_FUNCTION +#define TARGET_LIBC_HAS_FUNCTION gnu_libc_has_function + #endif /* ! _TPF_H */ diff --git a/gcc/config/sol2-10.h b/gcc/config/sol2-10.h index 81d0f51e144..9df5548e4c1 100644 --- a/gcc/config/sol2-10.h +++ b/gcc/config/sol2-10.h @@ -18,5 +18,7 @@ You should have received a copy of the GNU General Public License along with GCC; see the file COPYING3. If not see . */ -/* Solaris 10 has the float and long double forms of math functions. */ -#define TARGET_C99_FUNCTIONS 1 +/* Solaris 10 has the float and long double forms of math functions. + We redefine this hook so the version from elfos.h header won't be used. */ +#undef TARGET_LIBC_HAS_FUNCTION +#define TARGET_LIBC_HAS_FUNCTION default_libc_has_function diff --git a/gcc/config/sol2.h b/gcc/config/sol2.h index 4c9b334e7a7..b606595dfe9 100644 --- a/gcc/config/sol2.h +++ b/gcc/config/sol2.h @@ -285,6 +285,9 @@ along with GCC; see the file COPYING3. If not see #define TARGET_POSIX_IO +#undef TARGET_LIBC_HAS_FUNCTION +#define TARGET_LIBC_HAS_FUNCTION no_c99_libc_has_function + extern GTY(()) tree solaris_pending_aligns; extern GTY(()) tree solaris_pending_inits; extern GTY(()) tree solaris_pending_finis; diff --git a/gcc/config/sparc/sparc.c b/gcc/config/sparc/sparc.c index 48c25dcd5cd..e5b4662512d 100644 --- a/gcc/config/sparc/sparc.c +++ b/gcc/config/sparc/sparc.c @@ -53,6 +53,7 @@ along with GCC; see the file COPYING3. If not see #include "df.h" #include "opts.h" #include "tree-pass.h" +#include "context.h" /* Processor costs */ @@ -1000,34 +1001,44 @@ sparc_do_work_around_errata (void) return 0; } -struct rtl_opt_pass pass_work_around_errata = -{ - { - RTL_PASS, - "errata", /* name */ - OPTGROUP_NONE, /* optinfo_flags */ - sparc_gate_work_around_errata, /* gate */ - sparc_do_work_around_errata, /* execute */ - NULL, /* sub */ - NULL, /* next */ - 0, /* static_pass_number */ - TV_MACH_DEP, /* tv_id */ - 0, /* properties_required */ - 0, /* properties_provided */ - 0, /* properties_destroyed */ - 0, /* todo_flags_start */ - TODO_verify_rtl_sharing, /* todo_flags_finish */ - } -}; +namespace { -struct register_pass_info insert_pass_work_around_errata = +const pass_data pass_data_work_around_errata = { - &pass_work_around_errata.pass, /* pass */ - "dbr", /* reference_pass_name */ - 1, /* ref_pass_instance_number */ - PASS_POS_INSERT_AFTER /* po_op */ + RTL_PASS, /* type */ + "errata", /* name */ + OPTGROUP_NONE, /* optinfo_flags */ + true, /* has_gate */ + true, /* has_execute */ + TV_MACH_DEP, /* tv_id */ + 0, /* properties_required */ + 0, /* properties_provided */ + 0, /* properties_destroyed */ + 0, /* todo_flags_start */ + TODO_verify_rtl_sharing, /* todo_flags_finish */ }; +class pass_work_around_errata : public rtl_opt_pass +{ +public: + pass_work_around_errata(gcc::context *ctxt) + : rtl_opt_pass(pass_data_work_around_errata, ctxt) + {} + + /* opt_pass methods: */ + bool gate () { return sparc_gate_work_around_errata (); } + unsigned int execute () { return sparc_do_work_around_errata (); } + +}; // class pass_work_around_errata + +} // anon namespace + +rtl_opt_pass * +make_pass_work_around_errata (gcc::context *ctxt) +{ + return new pass_work_around_errata (ctxt); +} + /* Helpers for TARGET_DEBUG_OPTIONS. */ static void dump_target_flag_bits (const int flags) @@ -1140,9 +1151,8 @@ sparc_option_override (void) /* TI TMS390Z55 supersparc */ { "supersparc", MASK_ISA, MASK_V8 }, { "hypersparc", MASK_ISA, MASK_V8|MASK_FPU }, - /* LEON */ - { "leon", MASK_ISA, MASK_V8|MASK_FPU }, - { "leon3", MASK_ISA, MASK_V8|MASK_FPU }, + { "leon", MASK_ISA, MASK_V8|MASK_LEON|MASK_FPU }, + { "leon3", MASK_ISA, MASK_V8|MASK_LEON3|MASK_FPU }, { "sparclite", MASK_ISA, MASK_SPARCLITE }, /* The Fujitsu MB86930 is the original sparclite chip, with no FPU. */ { "f930", MASK_ISA|MASK_FPU, MASK_SPARCLITE }, @@ -1301,6 +1311,9 @@ sparc_option_override (void) #endif #ifndef HAVE_AS_SPARC4 & ~MASK_CBCOND +#endif +#ifndef HAVE_AS_LEON + & ~(MASK_LEON | MASK_LEON3) #endif ); @@ -1430,6 +1443,10 @@ sparc_option_override (void) /* Choose the most relaxed model for the processor. */ else if (TARGET_V9) sparc_memory_model = SMM_RMO; + else if (TARGET_LEON3) + sparc_memory_model = SMM_TSO; + else if (TARGET_LEON) + sparc_memory_model = SMM_SC; else if (TARGET_V8) sparc_memory_model = SMM_PSO; else @@ -1477,6 +1494,14 @@ sparc_option_override (void) (essentially) final form of the insn stream to work on. Registering the pass must be done at start up. It's convenient to do it here. */ + opt_pass *errata_pass = make_pass_work_around_errata (g); + struct register_pass_info insert_pass_work_around_errata = + { + errata_pass, /* pass */ + "dbr", /* reference_pass_name */ + 1, /* ref_pass_instance_number */ + PASS_POS_INSERT_AFTER /* po_op */ + }; register_pass (&insert_pass_work_around_errata); } @@ -11318,6 +11343,11 @@ sparc_emit_membar_for_model (enum memmodel model, /* Total Store Ordering: all memory transactions with store semantics are followed by an implied StoreStore. */ implied |= StoreStore; + + /* If we're not looking for a raw barrer (before+after), then atomic + operations get the benefit of being both load and store. */ + if (load_store == 3 && before_after == 1) + implied |= StoreLoad; /* FALLTHRU */ case SMM_PSO: diff --git a/gcc/config/sparc/sparc.h b/gcc/config/sparc/sparc.h index 202d23c0162..d96c1b6b422 100644 --- a/gcc/config/sparc/sparc.h +++ b/gcc/config/sparc/sparc.h @@ -236,7 +236,7 @@ extern enum cmodel sparc_cmodel; #if TARGET_CPU_DEFAULT == TARGET_CPU_leon \ || TARGET_CPU_DEFAULT == TARGET_CPU_leon3 #define CPP_CPU32_DEFAULT_SPEC "-D__leon__ -D__sparc_v8__" -#define ASM_CPU32_DEFAULT_SPEC "" +#define ASM_CPU32_DEFAULT_SPEC AS_LEON_FLAG #endif #endif @@ -332,8 +332,8 @@ extern enum cmodel sparc_cmodel; %{mcpu=v8:-Av8} \ %{mcpu=supersparc:-Av8} \ %{mcpu=hypersparc:-Av8} \ -%{mcpu=leon:-Av8} \ -%{mcpu=leon3:-Av8} \ +%{mcpu=leon:" AS_LEON_FLAG "} \ +%{mcpu=leon3:" AS_LEON_FLAG "} \ %{mv8plus:-Av8plus} \ %{mcpu=v9:-Av9} \ %{mcpu=ultrasparc:%{!mv8plus:-Av9a}} \ @@ -1758,6 +1758,12 @@ extern int sparc_indent_opcode; #define AS_NIAGARA4_FLAG "-Av9" AS_NIAGARA3_FLAG #endif +#ifdef HAVE_AS_LEON +#define AS_LEON_FLAG "-Aleon" +#else +#define AS_LEON_FLAG "-Av8" +#endif + /* We use gcc _mcount for profiling. */ #define NO_PROFILE_COUNTERS 0 diff --git a/gcc/config/sparc/sparc.opt b/gcc/config/sparc/sparc.opt index 016e6997422..3ccd54fa463 100644 --- a/gcc/config/sparc/sparc.opt +++ b/gcc/config/sparc/sparc.opt @@ -211,6 +211,12 @@ Enable workarounds for the errata of the UT699 processor Mask(LONG_DOUBLE_128) ;; Use 128-bit long double +Mask(LEON) +;; Generate code for LEON + +Mask(LEON3) +;; Generate code for LEON3 + Mask(SPARCLITE) ;; Generate code for SPARClite diff --git a/gcc/config/sparc/sync.md b/gcc/config/sparc/sync.md index 2f21f812dc9..130f5219194 100644 --- a/gcc/config/sparc/sync.md +++ b/gcc/config/sparc/sync.md @@ -161,7 +161,8 @@ (match_operand:SI 5 "const_int_operand" "") ;; is_weak (match_operand:SI 6 "const_int_operand" "") ;; mod_s (match_operand:SI 7 "const_int_operand" "")] ;; mod_f - "TARGET_V9 && (mode != DImode || TARGET_ARCH64 || TARGET_V8PLUS)" + "(TARGET_V9 || TARGET_LEON3) + && (mode != DImode || TARGET_ARCH64 || TARGET_V8PLUS)" { sparc_expand_compare_and_swap (operands); DONE; @@ -176,7 +177,7 @@ [(match_operand:I48MODE 2 "register_operand" "") (match_operand:I48MODE 3 "register_operand" "")] UNSPECV_CAS))])] - "TARGET_V9" + "TARGET_V9 || TARGET_LEON3" "") (define_insn "*atomic_compare_and_swap_1" @@ -187,7 +188,7 @@ [(match_operand:I48MODE 2 "register_operand" "r") (match_operand:I48MODE 3 "register_operand" "0")] UNSPECV_CAS))] - "TARGET_V9 && (mode == SImode || TARGET_ARCH64)" + "(TARGET_V9 || TARGET_LEON3) && (mode != DImode || TARGET_ARCH64)" "cas\t%1, %2, %0" [(set_attr "type" "multi")]) diff --git a/gcc/config/sparc/t-sparc b/gcc/config/sparc/t-sparc index 664f4a42418..62ad3f77934 100644 --- a/gcc/config/sparc/t-sparc +++ b/gcc/config/sparc/t-sparc @@ -24,7 +24,7 @@ sparc.o: $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) \ $(FUNCTION_H) $(EXCEPT_H) $(EXPR_H) $(OPTABS_H) $(RECOG_H) \ $(DIAGNOSTIC_CORE_H) $(GGC_H) $(TM_P_H) debug.h $(TARGET_H) \ $(TARGET_DEF_H) $(COMMON_TARGET_H) $(GIMPLE_H) $(TREE_PASS_H) \ - langhooks.h reload.h $(PARAMS_H) $(DF_H) $(OPTS_H) \ + langhooks.h reload.h $(PARAMS_H) $(DF_H) $(OPTS_H) $(CONTEXT_H) \ gt-sparc.h sparc-c.o: $(srcdir)/config/sparc/sparc-c.c \ diff --git a/gcc/config/vms/vms.h b/gcc/config/vms/vms.h index b7689bfa674..5d0a5c6515c 100644 --- a/gcc/config/vms/vms.h +++ b/gcc/config/vms/vms.h @@ -87,3 +87,6 @@ extern void vms_c_register_includes (const char *, const char *, int); /* Special VMS debugger symbol to record the entry point. */ #define VMS_DEBUG_MAIN_POINTER "TRANSFER$BREAK$GO" + +#undef TARGET_LIBC_HAS_FUNCTION +#define TARGET_LIBC_HAS_FUNCTION no_c99_libc_has_function diff --git a/gcc/config/vxworks.h b/gcc/config/vxworks.h index d91a8b103ac..72f344b6f01 100644 --- a/gcc/config/vxworks.h +++ b/gcc/config/vxworks.h @@ -114,6 +114,9 @@ extern void vxworks_asm_out_destructor (rtx symbol, int priority); #undef SIZE_TYPE #define SIZE_TYPE "unsigned int" +#undef TARGET_LIBC_HAS_FUNCTION +#define TARGET_LIBC_HAS_FUNCTION no_c99_libc_has_function + /* Both kernels and RTPs have the facilities required by this macro. */ #define TARGET_POSIX_IO diff --git a/gcc/configure b/gcc/configure index e36a6086c44..c6bc3a69d84 100755 --- a/gcc/configure +++ b/gcc/configure @@ -24331,6 +24331,43 @@ if test $gcc_cv_as_sparc_sparc4 = yes; then $as_echo "#define HAVE_AS_SPARC4 1" >>confdefs.h +fi + + { $as_echo "$as_me:${as_lineno-$LINENO}: checking assembler for LEON instructions" >&5 +$as_echo_n "checking assembler for LEON instructions... " >&6; } +if test "${gcc_cv_as_sparc_leon+set}" = set; then : + $as_echo_n "(cached) " >&6 +else + gcc_cv_as_sparc_leon=no + if test x$gcc_cv_as != x; then + $as_echo '.text + .register %g2, #scratch + .register %g3, #scratch + .align 4 + smac %g2, %g3, %g1 + umac %g2, %g3, %g1 + cas [%g2], %g3, %g1' > conftest.s + if { ac_try='$gcc_cv_as $gcc_cv_as_flags -Aleon -o conftest.o conftest.s >&5' + { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5 + (eval $ac_try) 2>&5 + ac_status=$? + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; }; } + then + gcc_cv_as_sparc_leon=yes + else + echo "configure: failed program was" >&5 + cat conftest.s >&5 + fi + rm -f conftest.o conftest.s + fi +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $gcc_cv_as_sparc_leon" >&5 +$as_echo "$gcc_cv_as_sparc_leon" >&6; } +if test $gcc_cv_as_sparc_leon = yes; then + +$as_echo "#define HAVE_AS_LEON 1" >>confdefs.h + fi ;; @@ -26019,6 +26056,41 @@ $as_echo "$gcc_cv_ld_mips_personality_relaxation" >&6; } $as_echo "#define HAVE_LD_PERSONALITY_RELAXATION 1" >>confdefs.h fi + + { $as_echo "$as_me:${as_lineno-$LINENO}: checking assembler for -mnan= support" >&5 +$as_echo_n "checking assembler for -mnan= support... " >&6; } +if test "${gcc_cv_as_mips_nan+set}" = set; then : + $as_echo_n "(cached) " >&6 +else + gcc_cv_as_mips_nan=no + if test x$gcc_cv_as != x; then + $as_echo '' > conftest.s + if { ac_try='$gcc_cv_as $gcc_cv_as_flags -mnan=2008 -o conftest.o conftest.s >&5' + { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5 + (eval $ac_try) 2>&5 + ac_status=$? + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; }; } + then + gcc_cv_as_mips_nan=yes + else + echo "configure: failed program was" >&5 + cat conftest.s >&5 + fi + rm -f conftest.o conftest.s + fi +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $gcc_cv_as_mips_nan" >&5 +$as_echo "$gcc_cv_as_mips_nan" >&6; } +if test $gcc_cv_as_mips_nan = yes; then + +$as_echo "#define HAVE_AS_NAN 1" >>confdefs.h + +fi + if test x$gcc_cv_as_mips_nan = xno \ + && test x$with_nan != x; then + as_fn_error "Requesting --with-nan= requires assembler support for -mnan=" "$LINENO" 5 + fi ;; esac @@ -26046,8 +26118,9 @@ esac # ??? Once 2.11 is released, probably need to add first known working # version to the per-target configury. case "$cpu_type" in - alpha | arm | avr | bfin | cris | i386 | m32c | m68k | microblaze | mips \ - | pa | rs6000 | score | sparc | spu | tilegx | tilepro | xstormy16 | xtensa) + aarch64 | alpha | arm | avr | bfin | cris | i386 | m32c | m68k | microblaze \ + | mips | pa | rs6000 | score | sparc | spu | tilegx | tilepro | xstormy16 \ + | xtensa) insn="nop" ;; ia64 | s390) @@ -27324,6 +27397,7 @@ if test "x$subdirs" != x; then done fi echo "source ${srcdir}/gdbinit.in" >> .gdbinit +echo "python import sys; sys.path.append('${srcdir}'); import gdbhooks" >> .gdbinit gcc_tooldir='$(libsubdir)/$(libsubdir_to_prefix)$(target_noncanonical)' diff --git a/gcc/configure.ac b/gcc/configure.ac index 9dfd389bd98..5d3e5ad5823 100644 --- a/gcc/configure.ac +++ b/gcc/configure.ac @@ -3613,6 +3613,19 @@ foo: kasumi_fi_xor %f46, %f48, %f50, %f52],, [AC_DEFINE(HAVE_AS_SPARC4, 1, [Define if your assembler supports SPARC4 instructions.])]) + + gcc_GAS_CHECK_FEATURE([LEON instructions], + gcc_cv_as_sparc_leon,, + [-Aleon], + [.text + .register %g2, #scratch + .register %g3, #scratch + .align 4 + smac %g2, %g3, %g1 + umac %g2, %g3, %g1 + cas [[%g2]], %g3, %g1],, + [AC_DEFINE(HAVE_AS_LEON, 1, + [Define if your assembler supports LEON instructions.])]) ;; changequote(,)dnl @@ -4168,6 +4181,17 @@ EOF [Define if your linker can relax absolute .eh_frame personality pointers into PC-relative form.]) fi + + gcc_GAS_CHECK_FEATURE([-mnan= support], + gcc_cv_as_mips_nan,, + [-mnan=2008],,, + [AC_DEFINE(HAVE_AS_NAN, 1, + [Define if the assembler understands -mnan=.])]) + if test x$gcc_cv_as_mips_nan = xno \ + && test x$with_nan != x; then + AC_MSG_ERROR( + [Requesting --with-nan= requires assembler support for -mnan=]) + fi ;; esac @@ -4195,8 +4219,9 @@ esac # ??? Once 2.11 is released, probably need to add first known working # version to the per-target configury. case "$cpu_type" in - alpha | arm | avr | bfin | cris | i386 | m32c | m68k | microblaze | mips \ - | pa | rs6000 | score | sparc | spu | tilegx | tilepro | xstormy16 | xtensa) + aarch64 | alpha | arm | avr | bfin | cris | i386 | m32c | m68k | microblaze \ + | mips | pa | rs6000 | score | sparc | spu | tilegx | tilepro | xstormy16 \ + | xtensa) insn="nop" ;; ia64 | s390) @@ -5156,6 +5181,7 @@ if test "x$subdirs" != x; then done fi echo "source ${srcdir}/gdbinit.in" >> .gdbinit +echo "python import sys; sys.path.append('${srcdir}'); import gdbhooks" >> .gdbinit gcc_tooldir='$(libsubdir)/$(libsubdir_to_prefix)$(target_noncanonical)' AC_SUBST(gcc_tooldir) diff --git a/gcc/context.c b/gcc/context.c index 76e0dde9c0f..b5152419a97 100644 --- a/gcc/context.c +++ b/gcc/context.c @@ -22,6 +22,12 @@ along with GCC; see the file COPYING3. If not see #include "coretypes.h" #include "ggc.h" #include "context.h" +#include "pass_manager.h" /* The singleton holder of global state: */ gcc::context *g; + +gcc::context::context() +{ + passes_ = new gcc::pass_manager (this); +} diff --git a/gcc/context.h b/gcc/context.h index 3caf02fed31..66260cd279a 100644 --- a/gcc/context.h +++ b/gcc/context.h @@ -22,14 +22,23 @@ along with GCC; see the file COPYING3. If not see namespace gcc { +class pass_manager; + /* GCC's internal state can be divided into zero or more "parallel universe" of state; an instance of this class is one such context of state. */ class context { public: + context(); + + /* Pass-management. */ + + pass_manager *get_passes () { gcc_assert (passes_); return passes_; } - /* Currently empty. */ +private: + /* Pass-management. */ + pass_manager *passes_; }; // class context diff --git a/gcc/convert.c b/gcc/convert.c index 9ecef4247ba..b07f0efe820 100644 --- a/gcc/convert.c +++ b/gcc/convert.c @@ -29,6 +29,7 @@ along with GCC; see the file COPYING3. If not see #include "flags.h" #include "convert.h" #include "diagnostic-core.h" +#include "target.h" #include "langhooks.h" /* Convert EXPR to some pointer or reference type TYPE. @@ -386,7 +387,7 @@ convert_to_integer (tree type, tree expr) { CASE_FLT_FN (BUILT_IN_CEIL): /* Only convert in ISO C99 mode. */ - if (!TARGET_C99_FUNCTIONS) + if (!targetm.libc_has_function (function_c99_misc)) break; if (outprec < TYPE_PRECISION (integer_type_node) || (outprec == TYPE_PRECISION (integer_type_node) @@ -402,7 +403,7 @@ convert_to_integer (tree type, tree expr) CASE_FLT_FN (BUILT_IN_FLOOR): /* Only convert in ISO C99 mode. */ - if (!TARGET_C99_FUNCTIONS) + if (!targetm.libc_has_function (function_c99_misc)) break; if (outprec < TYPE_PRECISION (integer_type_node) || (outprec == TYPE_PRECISION (integer_type_node) @@ -418,7 +419,7 @@ convert_to_integer (tree type, tree expr) CASE_FLT_FN (BUILT_IN_ROUND): /* Only convert in ISO C99 mode. */ - if (!TARGET_C99_FUNCTIONS) + if (!targetm.libc_has_function (function_c99_misc)) break; if (outprec < TYPE_PRECISION (integer_type_node) || (outprec == TYPE_PRECISION (integer_type_node) @@ -439,7 +440,7 @@ convert_to_integer (tree type, tree expr) /* ... Fall through ... */ CASE_FLT_FN (BUILT_IN_RINT): /* Only convert in ISO C99 mode. */ - if (!TARGET_C99_FUNCTIONS) + if (!targetm.libc_has_function (function_c99_misc)) break; if (outprec < TYPE_PRECISION (integer_type_node) || (outprec == TYPE_PRECISION (integer_type_node) diff --git a/gcc/coretypes.h b/gcc/coretypes.h index edb9c8c8477..bff8f5c62d5 100644 --- a/gcc/coretypes.h +++ b/gcc/coretypes.h @@ -73,9 +73,7 @@ struct cl_option; struct cl_decoded_option; struct cl_option_handlers; struct diagnostic_context; -typedef struct diagnostic_context diagnostic_context; -struct pretty_print_info; -typedef struct pretty_print_info pretty_printer; +struct pretty_printer; /* Address space number for named address space support. */ typedef unsigned char addr_space_t; @@ -169,6 +167,12 @@ typedef const struct basic_block_def *const_basic_block; in target.h. */ typedef int reg_class_t; +class rtl_opt_pass; + +namespace gcc { + class context; +} + #else struct _dont_use_rtx_here_; @@ -183,6 +187,15 @@ union _dont_use_tree_here_; #endif +/* Classes of functions that compiler needs to check + whether they are present at the runtime or not. */ +enum function_class { + function_c94, + function_c99_misc, + function_c99_math_complex, + function_sincos +}; + /* Memory model types for the __atomic* builtins. This must match the order in libstdc++-v3/include/bits/atomic_base.h. */ enum memmodel diff --git a/gcc/coverage.c b/gcc/coverage.c index 7c395f4750b..d662e8d0946 100644 --- a/gcc/coverage.c +++ b/gcc/coverage.c @@ -43,6 +43,9 @@ along with GCC; see the file COPYING3. If not see #include "langhooks.h" #include "hash-table.h" #include "tree-iterator.h" +#include "context.h" +#include "pass_manager.h" +#include "tree-pass.h" #include "cgraph.h" #include "dumpfile.h" #include "diagnostic-core.h" @@ -341,11 +344,13 @@ get_coverage_counts (unsigned counter, unsigned expected, { static int warned = 0; - if (!warned++) - inform (input_location, (flag_guess_branch_prob - ? "file %s not found, execution counts estimated" - : "file %s not found, execution counts assumed to be zero"), - da_file_name); + if (!warned++ && dump_enabled_p ()) + dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, input_location, + (flag_guess_branch_prob + ? "file %s not found, execution counts estimated" + : "file %s not found, execution counts assumed to " + "be zero"), + da_file_name); return NULL; } @@ -369,21 +374,25 @@ get_coverage_counts (unsigned counter, unsigned expected, warning_at (input_location, OPT_Wcoverage_mismatch, "the control flow of function %qE does not match " "its profile data (counter %qs)", id, ctr_names[counter]); - if (warning_printed) + if (warning_printed && dump_enabled_p ()) { - inform (input_location, "use -Wno-error=coverage-mismatch to tolerate " - "the mismatch but performance may drop if the function is hot"); + dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, input_location, + "use -Wno-error=coverage-mismatch to tolerate " + "the mismatch but performance may drop if the " + "function is hot"); if (!seen_error () && !warned++) { - inform (input_location, "coverage mismatch ignored"); - inform (input_location, flag_guess_branch_prob - ? G_("execution counts estimated") - : G_("execution counts assumed to be zero")); + dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, input_location, + "coverage mismatch ignored"); + dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, input_location, + flag_guess_branch_prob + ? G_("execution counts estimated") + : G_("execution counts assumed to be zero")); if (!flag_guess_branch_prob) - inform (input_location, - "this can result in poorly optimized code"); + dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, input_location, + "this can result in poorly optimized code"); } } @@ -539,6 +548,28 @@ coverage_compute_lineno_checksum (void) return chksum; } +/* Compute profile ID. This is better to be unique in whole program. */ + +unsigned +coverage_compute_profile_id (struct cgraph_node *n) +{ + expanded_location xloc + = expand_location (DECL_SOURCE_LOCATION (n->symbol.decl)); + unsigned chksum = xloc.line; + + chksum = coverage_checksum_string (chksum, xloc.file); + chksum = coverage_checksum_string + (chksum, IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (n->symbol.decl))); + if (first_global_object_name) + chksum = coverage_checksum_string + (chksum, first_global_object_name); + chksum = coverage_checksum_string + (chksum, aux_base_name); + + /* Non-negative integers are hopefully small enough to fit in all targets. */ + return chksum & 0x7fffffff; +} + /* Compute cfg checksum for the current function. The checksum is calculated carefully so that source code changes that doesn't affect the control flow graph @@ -1103,6 +1134,11 @@ coverage_init (const char *filename) int len = strlen (filename); int prefix_len = 0; + /* Since coverage_init is invoked very early, before the pass + manager, we need to set up the dumping explicitly. This is + similar to the handling in finish_optimization_passes. */ + dump_start (g->get_passes ()->get_pass_profile ()->static_pass_number, NULL); + if (!profile_data_prefix && !IS_ABSOLUTE_PATH (filename)) profile_data_prefix = getpwd (); @@ -1145,6 +1181,8 @@ coverage_init (const char *filename) gcov_write_unsigned (bbg_file_stamp); } } + + dump_finish (g->get_passes ()->get_pass_profile ()->static_pass_number); } /* Performs file-level cleanup. Close notes file, generate coverage diff --git a/gcc/coverage.h b/gcc/coverage.h index 21afe7298ff..342d73e1653 100644 --- a/gcc/coverage.h +++ b/gcc/coverage.h @@ -35,6 +35,9 @@ extern void coverage_end_function (unsigned, unsigned); /* Compute the control flow checksum for the current function. */ extern unsigned coverage_compute_cfg_checksum (void); +/* Compute the profile id of function N. */ +extern unsigned coverage_compute_profile_id (struct cgraph_node *n); + /* Compute the line number checksum for the current function. */ extern unsigned coverage_compute_lineno_checksum (void); diff --git a/gcc/cp/ChangeLog b/gcc/cp/ChangeLog index 2669375b7f5..9ba17c8bc31 100644 --- a/gcc/cp/ChangeLog +++ b/gcc/cp/ChangeLog @@ -1,3 +1,616 @@ +2013-09-08 Caroline Tice + + PR c++/58300 + * vtable-class-hierarchy.c (vtv_generate_init_routine): In + preinit case, move call to assemble_vtv_preinit_initializer to + after call to cgraph_process_new_functions. + +2013-09-08 Tom de Vries + + PR c++/58282 + * except.c (build_must_not_throw_expr): Handle + flag_exceptions. + +2013-09-08 Joern Rennecke + + * typeck.c (cp_build_binary_op): Use vector_types_compatible_elements_p. + +2013-09-04 Paolo Carlini + + PR c++/24926 + * class.c (finish_struct_anon_r): New. + (finish_struct_anon): Use it. + +2013-09-04 Gabriel Dos Reis + + * cxx-pretty-print.h (cxx_pretty_printer::simple_type_specifier): + Declare as overrider. + * cxx-pretty-print.c (cxx_pretty_printer::simple_type_specifier): + Rename from pp_cxx_simple_type_specifier. + (cxx_pretty_printer::cxx_pretty_printer): Do not assign to + simple_type_specifier. + +2013-09-03 Paolo Carlini + + PR c++/58305 + * typeck2.c (build_functional_cast): Maybe warn_deprecated_use. + +2013-09-03 Mike Stump + + * Make-lang.in (cp/lambda.o): Add dependencies. + +2013-09-03 Gabriel Dos Reis + + * cxx-pretty-print.h (cxx_pretty_printer::type_id): Declare as + overrider. + * cxx-pretty-print.c (pp_cxx_storage_class_specifier): Remove. + (pp_cxx_userdef_literal): Tidy. + (pp_cxx_template_argument_list): Likewise. + (pp_cxx_typeid_expression): Likewise. + (pp_cxx_offsetof_expression_1): Likewise. + (cxx_pretty_printer::postfix_expression): Likewise. + (cxx_pretty_printer::unary_expression): Likewise. + (cxx_pretty_printer::statement): Likewise. + (cxx_pretty_printer::type_id): Rename from pp_cxx_type_id. + (c_pretty_printer::cxx_pretty_printer): Do not assign to type_id. + * error.c (dump_decl): Tidy. + (dump_expr): Likewise. + +2013-09-02 Paolo Carlini + + PR c++/21682, implement DR 565 + * name-lookup.c (compparms_for_decl_and_using_decl): New. + (push_overloaded_decl_1, do_nonmember_using_decl): Use it. + +2013-08-30 Marek Polacek + + * typeck.c (cp_build_binary_op): Add division by zero and shift + instrumentation. + * error.c (dump_expr): Special-case ubsan builtins. + +2013-08-30 Paolo Carlini + + PR c++/51424 + * cp-tree.h (LOOKUP_DELEGATING_CONS): Add. + * init.c (perform_target_ctor): Use it. + * call.c (build_special_member_call): Diagnose self-delegating + constructors. + +2013-08-30 Gabriel Dos Reis + + * cxx-pretty-print.h (cxx_pretty_printer::declaration): Declare as + overrider. + (cxx_pretty_printer::declaration_specifiers): Likewise. + (cxx_pretty_printer::function_specifier): Likewise. + (cxx_pretty_printer::declarator): Likewise. + (cxx_pretty_printer::direct_declarator): Likewise. + (cxx_pretty_printer::abstract_declarator): Likewise. + (cxx_pretty_printer::direct_abstract_declarator): Likewise. + (pp_cxx_declaration): Remove. + * cxx-pretty-print.c (cxx_pretty_printer::function_specifier): + Rename from pp_cxx_function_specifier. Adjust. + (cxx_pretty_printer::declaration_specifiers): Rename from + pp_cxx_decl_specifier_seq. Adjust. + (cxx_pretty_printer::direct_declarator): Rename from + pp_cxx_direct_declarator. Adjust. + (cxx_pretty_printer::declarator): Rename from pp_cxx_declarator. + Adjust. + (cxx_pretty_printer::abstract_declarator): Rename from + pp_cxx_abstract_declarator. Adjust. + (cxx_pretty_printer::direct_abstract_declarator): Rename from + pp_cxx_direct_abstract_declarator. Adjust. + (cxx_pretty_printer::declaration): Rename from + pp_cxx_declaration. Adjust. + (cxx_pretty_printer::cxx_pretty_printer): Do not assign to + declaration, declaration_specifiers, function_specifier, + declarator, direct_declarator, abstract_declarator, + direct_abstract_declarator. + * error.c (dump_decl): Adjust. + +2013-08-29 Jan Hubicka + + Correct previous patch to not mark terminate as LEAF. + * class.c (build_vtbl_initializer): Drop LEAF + * decl.c (cxx_init_decl_processing): Likewise. + (push_throw_library_fn): Likewise. + * except.c (init_exception_processing): Likewise. + (do_begin_catch): Likewise. + (do_end_catch): Likewise. + (do_allocate_exception): Likewise. + +2013-08-29 Jan Hubicka + + * class.c (build_vtbl_initializer): Make __cxa_deleted_virtual + ECF_NORETURN | ECF_LEAF + * cp-tree.h (build_library_fn_ptr, build_cp_library_fn_ptr, + push_library_fn, push_void_library_fn): Update prototype. + * decl.c (build_library_fn_1): Remove. + (push_cp_library_fn, build_cp_library_fn): Update to take ECF flags. + (cxx_init_decl_processing): Update; global_delete_fndecl is ECF_NOTROW; + __cxa_pure_virtual is ECF_NORETURN | ECF_NORETURN | ECF_LEAF. + (build_library_fn_1): Add ecf_flags argument; rename to ... + (build_library_fn): ... this one. + (build_cp_library_fn): Take ecf_flags; do not copy NOTHROW flag. + (build_library_fn_ptr): Take ecf_flags. + (build_cp_library_fn_ptr): Likewise. + (push_library_fn): Likewise. + (push_cp_library_fn): Likewise. + (push_void_library_fn): Likewise. + (push_throw_library_fn): All throws are ECF_NORETURN. + (__cxa_atexit, __cxa_thread_atexit): Add ECF_LEAF | ECF_NOTHROW attributes. + (expand_static_init): __cxa_guard_acquire, __cxa_guard_release, + __cxa_guard_abort are ECF_NOTHROW | ECF_LEAF. + * except.c (init_exception_processing): terminate is + ECF_NOTHROW | ECF_NORETURN | ECF_LEAF. + (declare_nothrow_library_fn): Add ecf_flags parameter. + (__cxa_get_exception_ptr): Is ECF_NOTHROW | ECF_PURE | ECF_LEAF | + ECF_TM_PURE. + (do_begin_catch): cxa_begin_catch and _ITM_cxa_begin_catch + are ECF_NOTHROW | ECF_LEAF. + (do_end_catch): __cxa_end_catch and _ITM_cxa_end_catch is + ECF_LEAF. + (do_allocate_exception): _cxa_allocate_exception + and _ITM_cxa_allocate_exception are ECF_NOTHROW | ECF_MALLOC + | ECF_LEAF + (do_free_exception): __cxa_free_exception is + ECF_NOTHROW | ECF_LEAF. + * rtti.c (build_dynamic_cast_1): __dynamic_cast + is ECF_LEAF | ECF_PURE | ECF_NOTHROW. + +2013-08-29 Adam Butcher + + * error.c (dump_lambda_function): New function, dependent on ... + (dump_substitution): ... this new function, factored out of ... + (subst_to_string): ... here and ... + (dump_function_decl): ... here. Updated to early-out with call to + dump_lambda_function after determining template bindings. + +2013-08-28 Paolo Carlini + + PR c++/58255 + * init.c (build_aggr_init): When init == void_type_node do not + set LOOKUP_ONLYCONVERTING. + +2013-08-27 Caroline Tice + + * vtable-class-hierarchy.c: Remove unnecessary include statements. + (MAX_SET_SIZE): Remove unnecessary constant. + (register_construction_vtables): Make vtable_ptr_array parameter + into a vector; remove num_args parameter. Change array accesses to + vector accesses. + (register_other_binfo_vtables): Ditto. + (insert_call_to_register_set): Ditto. + (insert_call_to_register_pair): Ditto. + (output_set_info): Ditto. Also change warning calls to warning_at + calls, and fix format of warning messages. + (register_all_pairs): Change vtbl_ptr_array from an array into a + vector. Remove num_vtable_args (replace with calls to vector length). + Change array stores & accesses to vector functions. Change calls to + register_construction_vtables, register_other_binfo_vtables, + insert_call_to_register_set, insert_call_to_register_pair and + output_set_info to match their new signatures. Change warning to + warning_at and fix the format of the warning message. + +2013-08-27 Jakub Jelinek + Aldy Hernandez + + * cp-tree.h (CP_OMP_CLAUSE_INFO): Adjust range for new clauses. + +2013-08-27 Paolo Carlini + + * decl.c (grokfndecl): Remove old bison hack. + +2013-08-26 Jan Hubicka + + * cp-tree.h (DECL_CONSTRUCTOR_P, DECL_DESTRUCTOR_P): Use + middle-end flag. + +2013-08-26 Gabriel Dos Reis + + * cxx-pretty-print.h (cxx_pretty_printer::unary_expression): + Declare as overrider. + (cxx_pretty_printer::multiplicative_expression): Likewise. + (cxx_pretty_printer::conditional_expression): Likewise. + (cxx_pretty_printer::assignment_expression): Likewise. + (cxx_pretty_printer::expression): Likewise. + * cxx-pretty-print.c (cxx_pretty_printer::unary_expression): + Rename from pp_cxx_unary_expression. Adjust. + (cxx_pretty_printer::multiplicative_expression): Rename from + pp_cxx_multiplicative_expression. Adjust. + (cxx_pretty_printer::conditional_expression): Rename from + pp_cxx_conditional_expression. Adjust. + (cxx_pretty_printer::assignment_expression): Rename from + pp_cxx_assignment_expression. Adjust. + (cxx_pretty_printer::expression): Rename from pp_cxx_expression. + Adjust. + (cxx_pretty_printer::cxx_pretty_printer): Dot not assign to + unary_expression, multiplicative_expression, + conditional_expression, assignment_expression, expression. + +2013-08-25 Gabriel Dos Reis + + * cxx-pretty-print.h (cxx_pretty_printer::postfix_expression): + Declare as overrider. + * cxx-pretty-print.c (cxx_pretty_printer::postfix_expression): + Rename from pp_cxx_postfix_expression. Adjust. + (pp_cxx_expression): Use pp_postfix_expression. + (cxx_pretty_printer::cxx_pretty_printer): Do not assign to + postfix_expression. + +2013-08-25 Gabriel Dos Reis + + * cxx-pretty-print.h (cxx_pretty_printer::primary_expression): Now + an overrider of c_pretty_printer::primary_expression. + * cxx-pretty-print.c (cxx_pretty_printer::primary_expression): + Rename from pp_cxx_primary_expression. Adjust. + (pp_cxx_postfix_expression): Use pp_primary_expression. + (pp_cxx_ctor_initializer): Likewise. + (cxx_pretty_printer::cxx_pretty_printer): Do not assign to + primary_expression. + +2013-08-23 Jan Hubicka + + * cp-tree.h (struct lang_type_class): Free is_final bit. + (CLASSTYPE_FINAL): Define using TYPE_FINAL_P. + (DECL_FINAL_P): Remove. + * pt.c (instantiate_class_template_1): Guard that CLASSTYPE_FINAL + is called on CLASS_TYPE_P. + +2013-08-25 Gabriel Dos Reis + + * cxx-pretty-print.c (M_): Remove. + (pp_cxx_unqualified_id): Use translate_string instead of M_. + (pp_cxx_canonical_template_parameter): Likewise. + +2013-08-24 Gabriel Dos Reis + + * cxx-pretty-print.h (cxx_pretty_printer::id_expression): Declare. + * cxx-pretty-print.c (cxx_pretty_printer::id_expression): Rename + from pp_cxx_id_expression. Adjust. + (pp_cxx_userdef_literal): Use pp_id_expression. + (pp_cxx_primary_expression): Likewise. + (pp_cxx_direct_declarator): Likewise. + (cxx_pretty_printer::cxx_pretty_printer): Do not assign to + id_expression. + +2013-08-24 Gabriel Dos Reis + + * cxx-pretty-print.h (cxx_pretty_printer::constant): Now a member + function, overriding c_pretty_printer::constant. + * cxx-pretty-print.c (cxx_pretty_printer::constant): Rename from + pp_cxx_constant. Adjust. + (cxx_pretty_printer::cxx_pretty_printer): Do not assign to constant. + +2013-08-23 Gabriel Dos Reis + + * cp-objcp-common.c (cxx_initialize_diagnostics): Call a + destructor for the early printer. + * error.c (type_to_string): Use pp_buffer. + +2013-08-22 Paolo Carlini + + PR c++/56380 + * class.c (check_field_decls): Check for const mutable and const + reference data members. + +2013-08-22 Gabriel Dos Reis + + * error.c (init_error): Remove calls to pp_construct and + pp_cxx_pretty_printer_init. Initialize cxx_pp with placement-new. + * cxx-pretty-print.h (cxx_pretty_printer::cxx_pretty_printer): Declare. + (cxx_pretty_printer_init): Remove. + * cxx-pretty-print.c (cxx_pretty_printer::cxx_pretty_printer): + Rename from cxx_pretty_printer_init. Adjust. + * cp-objcp-common.c (cxx_initialize_diagnostics): Simplify + initialization of C++ diagnostics pretty printer. + +2013-08-21 Paolo Carlini + + * call.c (build_new_method_call_1): Use INDIRECT_REF_P. + * cp-tree.h (REFERENCE_REF_P): Likewise. + * semantics.c (finish_offsetof): Likewise. + +2013-08-21 Paolo Carlini + + PR c++/56130 + * semantics.c (finish_id_expression): Handle deprecated references. + +2013-08-20 Jason Merrill + + PR c++/58119 + * cvt.c (build_expr_type_conversion): Don't complain about a + template that can't match the desired type category. + +2013-08-20 Gabriel Dos Reis + + * error.c (pp_ggc_formatted_text): New. + (type_as_string): Use it in lieu of pp_formatted_text. + (type_as_string_translate): Likewise. + (expr_as_string): Likewise. + (decl_as_string): Likewise. + (decl_as_string_translate): Likewise. + (lang_decl_name): Likewise. + (decl_to_string): Likewise. + (expr_to_string): Likewise. + (fndecl_to_string): Likewise. + (parm_to_string): Likewise. + (type_to_string): Likewise. + (args_to_string): Likewise. + (subst_to_string): Likewise. + +2013-08-19 Balaji V. Iyer + + PR c/57490 + * cp-array-notation.c (cp_expand_cond_array_notations): Added a + check for truth values. + (expand_array_notation_exprs): Added truth values case. Removed an + unwanted else. Added for-loop to walk through subtrees in default + case. + * call.c (build_cxx_call): Inherited the type of the array notation for + certain built-in array notation functions. + +2013-08-19 Paolo Carlini + + * parser.c (cp_parser_lambda_introducer, cp_parser_decltype_expr): + Use cp_parser_lookup_name_simple. + +2013-08-19 Paolo Carlini + + * name-lookup.h (pop_bindings_and_leave_scope): Declare. + * name-lookup.c (pop_bindings_and_leave_scope): Define. + * parser.c (cp_parser_lambda_declarator_opt, + cp_parser_direct_declarator, cp_parser_cache_defarg): Use it. + +2013-08-17 Jason Merrill + + PR c++/58083 + * name-lookup.c (push_class_level_binding_1): It's OK to push a + lambda type after the enclosing type is complete. + +2013-08-17 Gabriel Dos Reis + + * error.c (dump_scope): Add a cxx_pretty_printer parameter. + Adjust callers. + (dump_template_argument): Likewise. + (dump_template_argument_list): Likewise. + (dump_template_parameter): Likewise. + (dump_template_bindings): Likewise. + (dump_alias_template_specialization): Likewise. + (dump_type): Likewise. + (dump_typename): Likewise. + (dump_aggr_type): Likewise. + (dump_type_prefix): Likewise. + (dump_type_suffix): Likewise. + (dump_global_iord): Likewise. + (dump_simple_decl): Likewise. + (dump_decl): Likewise. + (dump_template_decl): Likewise. + (dump_function_decl): Likewise. + (dump_parameters): Likewise. + (dump_ref_qualifier): Likewise. + (dump_exception_spec): Likewise. + (dump_function_name): Likewise. + (dump_template_parms): Likewise. + (dump_call_expr_args): Likewise. + (dump_aggr_init_expr_args): Likewise. + (dump_expr_list): Likewise. + (dump_expr_init_vec): Likewise. + (dump_expr): Likewise. + (dump_binary_op): Likewise. + (dump_unary_op): Likewise. + +2013-08-14 Paolo Carlini + + PR c++/51912 + * cp-tree.h (LOOKUP_NO_NON_INTEGRAL): Add. + * decl.c (case_conversion): Use it. + * call.c (standard_conversion): Likewise. + (implicit_conversion): Adjust. + +2013-08-13 Adam Butcher + + * pt.c: Grammar fix in comments ("it's" to "its"). + +2013-08-12 Paolo Carlini + + * decl.c (warn_extern_redeclared_static, duplicate_decls, + check_elaborated_type_specifier): Use error + inform. + * friend.c (make_friend_class): Likewise. + * semantics.c (finish_id_expression): Likewise. + +2013-08-09 Paolo Carlini + + Revert: + 2013-08-07 Paolo Carlini + + PR c++/46206 + * name-lookup.c (lookup_name_real_1): Handle iter->type before + iter->value. + +2013-08-07 Paolo Carlini + + PR c++/46206 + * name-lookup.c (lookup_name_real_1): Handle iter->type before + iter->value. + +2013-08-06 Caroline Tice + + * Make-lang.in (*CXX_AND_OBJCXX_OBJS): Add vtable-class-hierarchy.o to + list. + (vtable-class-hierarchy.o): Add build rule. + * cp-tree.h (vtv_start_verification_constructor_init_function): New + extern function decl. + (vtv_finish_verification_constructor_init_function): New extern + function decl. + (build_vtbl_address): New extern function decl. + (get_mangled_vtable_map_var_name): New extern function decl. + (vtv_compute_class_hierarchy_transitive_closure): New extern function + decl. + (vtv_generate_init_routine): New extern function decl. + (vtv_save_class_info): New extern function decl. + (vtv_recover_class_info): New extern function decl. + (vtv_build_vtable_verify_fndecl): New extern function decl. + * class.c (finish_struct_1): Add call to vtv_save_class_info if + flag_vtable_verify is true. + * config-lang.in: Add vtable-class-hierarchy.c to gtfiles list. + * vtable-class-hierarchy.c: New file. + * mangle.c (get_mangled_vtable_map_var_name): New function. + * decl2.c (start_objects): Update function comment. + (cp_write_global_declarations): Call vtv_recover_class_info, + vtv_compute_class_hierarchy_transitive_closure and + vtv_build_vtable_verify_fndecl, before calling + finalize_compilation_unit, and call vtv_generate_init_rount after, IFF + flag_vtable_verify is true. + (vtv_start_verification_constructor_init_function): New function. + (vtv_finish_verification_constructor_init_function): New function. + * init.c (build_vtbl_address): Remove static qualifier from function. + +2013-08-06 Jason Merrill + + PR c++/57825 + * tree.c (strip_typedefs) [METHOD_TYPE]: Preserve ref-qualifier. + +2013-08-05 Paolo Carlini + + PR c++/58080 + * typeck.c (cp_pointer_int_sum): Add tsubst_flags_t parameter. + (cp_build_binary_op): Adjust. + +2013-08-04 Gabriel Dos Reis + + * cxx-pretty-print.h (pp_c_base): Remove. + (cxx_pretty_printer): Derive from c_pretty_printer. + Adjust macros using pp_c_base. + * cp-objcp-common.c (cxx_initialize_diagnostics): Do not call pp_base. + * cxx-pretty-print.c (pp_cxx_nonconsecutive_character): Likewise. + (pp_cxx_colon_colon): Likewise. + (pp_cxx_separate_with): Likewise. + (pp_cxx_storage_class_specifier): Do not call pp_c_base. + (pp_cxx_expression_list): Likewise. + (pp_cxx_space_for_pointer_operator): Likewise. + (pp_cxx_init_declarator): Likewise. + (pp_cxx_call_argument_list): Likewise. + (pp_cxx_constant): Likewise. + (pp_cxx_postfix_expression): Likewise. + (pp_cxx_new_expression): Likewise. + (pp_cxx_unary_expression): Likewise. + (pp_cxx_cast_expression): Likewise. + (pp_cxx_conditional_expression): Likewise. + (pp_cxx_assignment_expression): Likewise. + (pp_cxx_expression): Likewise. + (pp_cxx_function_specifier): Likewise. + (pp_cxx_decl_specifier_seq): Likewise. + (pp_cxx_simple_type_specifier): Likewise. + (pp_cxx_type_specifier_seq): Likewise. + (pp_cxx_ptr_operator): Likewise. + (pp_cxx_parameter_declaration_clause): Likewise. + (pp_cxx_direct_declarator): Likewise. + (pp_cxx_direct_abstract_declarator): Likewise. + (pp_cxx_type_id): Likewise. + (pp_cxx_statement): Likewise. + (pp_cxx_pretty_printer_init): Tidy. + * error.c (init_error): Do not use pp_base. + (dump_aggr_type): Likewise. + (dump_type_prefix): Likewise. + (dump_type_suffix): Likewise. + (dump_global_iord): Likewise. + (dump_decl): Likewise. + (dump_function_decl): Likewise. + (dump_ref_qualifier): Likewise. + (reinit_cxx_pp): Likewise. + (decl_as_dwarf_string): Likewise. + (lang_decl_dwarf_name): Likewise. + (type_to_string): Likewise. + (cv_to_string): Likewise. + (cxx_print_error_function): Likewise. + (cp_diagnostic_starter): Likewise. + (cp_diagnostic_finalizer): Likewise. + (cp_print_error_function): Likewise. + (print_instantiation_context): Likewise. + (cp_printer): Likewise. + +2013-08-03 Gabriel Dos Reis + + * error.c (dump_type_prefix): Use specialized pretty printer + functions instead of pp_string or operators and punctuators. + (dump_decl): Likewise. + (dump_expr): Likewise. + +2013-08-03 Jason Merrill + + DR 1286 + * pt.c (get_underlying_template): New. + (convert_template_argument, lookup_template_class_1): Use it. + + DR 1430 + PR c++/51239 + * pt.c (pack_expansion_args_count): Rename from + any_pack_expanson_args_p. + (coerce_template_parms): Reject pack expansion to + non-pack template parameter of alias template. + +2013-08-03 Gabriel Dos Reis + + * error.c (dump_aggr_type): Use specialized pretty printer + functions instead of pp_character. + (dump_type_prefix): Likewise. + (dump_simple_decl): Likewise. + (type_to_string): Likewise. + +2013-08-02 Paolo Carlini + + * cp-tree.h (finish_stmt): Do not declare. + * decl.c (finish_stmt): Do not define. + * parser.c (cp_parser_expression_statement, + cp_parser_declaration_statement, + cp_parser_transaction_cancel): Don't call finish_stmt. + * semantics.c (finish_expr_stmt, finish_if_stmt, + finish_while_stmt, finish_do_stmt, finish_return_stmt, + finish_for_stmt, finish_switch_stmt, finish_compound_stmt, + finish_transaction_stmt): Likewise. + +2013-08-01 Fabien Chêne + + PR c++/54537 + * cp-tree.h: Check OVL_USED with OVERLOAD_CHECK. + * name-lookup.c (do_nonmember_using_decl): Make sure we have an + OVERLOAD before calling OVL_USED. Call diagnose_name_conflict + instead of issuing an error without mentioning the conflicting + declaration. + +2013-07-31 Paolo Carlini + + * parser.c (cp_parser_sizeof_pack): Check cp_parser_identifier + return value for error_mark_node. + +2013-07-30 Paolo Carlini + + PR c++/57673 + * parser.c (cp_parser_cache_defarg): In an NSDMI don't stop when + token->type == CPP_ELLIPSIS. + +2013-07-30 Paolo Carlini + + PR c++/57947 + * call.c (is_std_init_list): Return false if cxx_dialect == cxx98. + +2013-07-29 Jason Merrill + + PR c++/57901 + * semantics.c (build_data_member_initialization, constexpr_fn_retval): + Use break_out_target_exprs instead of unshare_expr. + +2013-07-29 Paolo Carlini + + PR c++/57948 + * call.c (initialize_reference): Don't crash when reference_binding + returns a conv with conv->kind == ck_ambig. + +2013-07-29 Jason Merrill + + * mangle.c (write_name): Check for null context. + (write_unscoped_name): Allow PARM_DECL context. + 2013-07-25 Paolo Carlini PR c++/57981 diff --git a/gcc/cp/Make-lang.in b/gcc/cp/Make-lang.in index 2cb919a2172..65dfe081e0b 100644 --- a/gcc/cp/Make-lang.in +++ b/gcc/cp/Make-lang.in @@ -80,7 +80,8 @@ CXX_AND_OBJCXX_OBJS = cp/call.o cp/decl.o cp/expr.o cp/pt.o cp/typeck2.o \ cp/typeck.o cp/cvt.o cp/except.o cp/friend.o cp/init.o cp/method.o \ cp/search.o cp/semantics.o cp/tree.o cp/repo.o cp/dump.o cp/optimize.o \ cp/mangle.o cp/cp-objcp-common.o cp/name-lookup.o cp/cxx-pretty-print.o \ - cp/cp-gimplify.o cp/cp-array-notation.o cp/lambda.o $(CXX_C_OBJS) + cp/cp-gimplify.o cp/cp-array-notation.o cp/lambda.o \ + cp/vtable-class-hierarchy.o $(CXX_C_OBJS) # Language-specific object files for C++. CXX_OBJS = cp/cp-lang.o c-family/stub-objc.o $(CXX_AND_OBJCXX_OBJS) @@ -341,10 +342,16 @@ cp/parser.o: cp/parser.c $(CXX_TREE_H) $(TM_H) $(DIAGNOSTIC_CORE_H) \ c-family/c-objc.h tree-pretty-print.h $(CXX_PARSER_H) $(TIMEVAR_H) cp/cp-gimplify.o: cp/cp-gimplify.c $(CXX_TREE_H) $(C_COMMON_H) \ $(TM_H) coretypes.h pointer-set.h tree-iterator.h $(SPLAY_TREE_H) - +cp/vtable-class-hierarchy.o: cp/vtable-class-hierarchy.c \ + $(TM_H) $(TIMEVAR_H) $(CXX_TREE_H) intl.h $(CXX_PARSER_H) cp/decl.h \ + $(FLAGS_H) $(DIAGNOSTIC_CORE_H) output.h $(CGRAPH_H) c-family/c-common.h \ + c-family/c-objc.h $(PLUGIN_H) \ + tree-iterator.h vtable-verify.h $(GIMPLE_H) \ + gt-cp-vtable-class-hierarchy.h cp/name-lookup.o: cp/name-lookup.c $(CONFIG_H) $(SYSTEM_H) coretypes.h \ $(TM_H) $(CXX_TREE_H) $(TIMEVAR_H) gt-cp-name-lookup.h $(PARAMS_H) \ $(DIAGNOSTIC_CORE_H) $(FLAGS_H) debug.h pointer-set.h +cp/lambda.o: cp/lambda.c $(CXX_TREE_H) $(CGRAPH_H) $(VEC_H) $(SYSTEM_H) coretypes.h cp/cxx-pretty-print.o: cp/cxx-pretty-print.c $(CXX_PRETTY_PRINT_H) \ $(CONFIG_H) $(SYSTEM_H) $(TM_H) coretypes.h $(CXX_TREE_H) tree-pretty-print.h diff --git a/gcc/cp/call.c b/gcc/cp/call.c index e8d526075a7..3ed73b80374 100644 --- a/gcc/cp/call.c +++ b/gcc/cp/call.c @@ -1314,7 +1314,8 @@ standard_conversion (tree to, tree from, tree expr, bool c_cast_p, /* As an extension, allow conversion to complex type. */ else if (ARITHMETIC_TYPE_P (to)) { - if (! (INTEGRAL_CODE_P (fcode) || fcode == REAL_TYPE) + if (! (INTEGRAL_CODE_P (fcode) + || (fcode == REAL_TYPE && !(flags & LOOKUP_NO_NON_INTEGRAL))) || SCOPED_ENUM_P (from)) return NULL; conv = build_conv (ck_std, to, conv); @@ -1681,7 +1682,7 @@ implicit_conversion (tree to, tree from, tree expr, bool c_cast_p, resolution, or after we've chosen one. */ flags &= (LOOKUP_ONLYCONVERTING|LOOKUP_NO_CONVERSION|LOOKUP_COPY_PARM |LOOKUP_NO_TEMP_BIND|LOOKUP_NO_RVAL_BIND|LOOKUP_PREFER_RVALUE - |LOOKUP_NO_NARROWING|LOOKUP_PROTECT); + |LOOKUP_NO_NARROWING|LOOKUP_PROTECT|LOOKUP_NO_NON_INTEGRAL); /* FIXME: actually we don't want warnings either, but we can't just have 'complain &= ~(tf_warning|tf_error)' because it would cause @@ -7176,6 +7177,33 @@ build_cxx_call (tree fn, int nargs, tree *argarray, && !check_builtin_function_arguments (fndecl, nargs, argarray)) return error_mark_node; + /* If it is a built-in array notation function, then the return type of + the function is the element type of the array passed in as array + notation (i.e. the first parameter of the function). */ + if (flag_enable_cilkplus && TREE_CODE (fn) == CALL_EXPR) + { + enum built_in_function bif = + is_cilkplus_reduce_builtin (CALL_EXPR_FN (fn)); + if (bif == BUILT_IN_CILKPLUS_SEC_REDUCE_ADD + || bif == BUILT_IN_CILKPLUS_SEC_REDUCE_MUL + || bif == BUILT_IN_CILKPLUS_SEC_REDUCE_MAX + || bif == BUILT_IN_CILKPLUS_SEC_REDUCE_MIN + || bif == BUILT_IN_CILKPLUS_SEC_REDUCE + || bif == BUILT_IN_CILKPLUS_SEC_REDUCE_MUTATING) + { + /* for bif == BUILT_IN_CILKPLUS_SEC_REDUCE_ALL_ZERO or + BUILT_IN_CILKPLUS_SEC_REDUCE_ANY_ZERO or + BUILT_IN_CILKPLUS_SEC_REDUCE_ANY_NONZERO or + BUILT_IN_CILKPLUS_SEC_REDUCE_ALL_NONZERO or + BUILT_IN_CILKPLUS_SEC_REDUCE_MIN_IND or + BUILT_IN_CILKPLUS_SEC_REDUCE_MAX_IND + The pre-defined return-type is the correct one. */ + tree array_ntn = CALL_EXPR_ARG (fn, 0); + TREE_TYPE (fn) = TREE_TYPE (array_ntn); + return fn; + } + } + /* Some built-in function calls will be evaluated at compile-time in fold (). Set optimize to 1 when folding __builtin_constant_p inside a constexpr function so that fold_builtin_1 doesn't fold it to 0. */ @@ -7414,6 +7442,14 @@ build_special_member_call (tree instance, tree name, vec **args, if (allocated != NULL) release_tree_vector (allocated); + if ((complain & tf_error) + && (flags & LOOKUP_DELEGATING_CONS) + && name == complete_ctor_identifier + && TREE_CODE (ret) == CALL_EXPR + && (DECL_ABSTRACT_ORIGIN (TREE_OPERAND (CALL_EXPR_FN (ret), 0)) + == current_function_decl)) + error ("constructor delegates to itself"); + return ret; } @@ -7640,7 +7676,7 @@ build_new_method_call_1 (tree instance, tree fns, vec **args, if (init) { - if (TREE_CODE (instance) == INDIRECT_REF + if (INDIRECT_REF_P (instance) && integer_zerop (TREE_OPERAND (instance, 0))) return get_target_expr_sfinae (init, complain); init = build2 (INIT_EXPR, TREE_TYPE (instance), instance, init); @@ -9282,10 +9318,14 @@ initialize_reference (tree type, tree expr, return error_mark_node; } - gcc_assert (conv->kind == ck_ref_bind); - - /* Perform the conversion. */ - expr = convert_like (conv, expr, complain); + if (conv->kind == ck_ref_bind) + /* Perform the conversion. */ + expr = convert_like (conv, expr, complain); + else if (conv->kind == ck_ambig) + /* We gave an error in build_user_type_conversion_1. */ + expr = error_mark_node; + else + gcc_unreachable (); /* Free all the conversions we allocated. */ obstack_free (&conversion_obstack, p); @@ -9392,6 +9432,8 @@ is_std_init_list (tree type) /* Look through typedefs. */ if (!TYPE_P (type)) return false; + if (cxx_dialect == cxx98) + return false; type = TYPE_MAIN_VARIANT (type); return (CLASS_TYPE_P (type) && CP_TYPE_CONTEXT (type) == std_node diff --git a/gcc/cp/class.c b/gcc/cp/class.c index f0c515269e2..3d34b92cfb1 100644 --- a/gcc/cp/class.c +++ b/gcc/cp/class.c @@ -2773,15 +2773,93 @@ warn_hidden (tree t) } } +/* Recursive helper for finish_struct_anon. */ + +static void +finish_struct_anon_r (tree field, bool complain) +{ + bool is_union = TREE_CODE (TREE_TYPE (field)) == UNION_TYPE; + tree elt = TYPE_FIELDS (TREE_TYPE (field)); + for (; elt; elt = DECL_CHAIN (elt)) + { + /* We're generally only interested in entities the user + declared, but we also find nested classes by noticing + the TYPE_DECL that we create implicitly. You're + allowed to put one anonymous union inside another, + though, so we explicitly tolerate that. We use + TYPE_ANONYMOUS_P rather than ANON_AGGR_TYPE_P so that + we also allow unnamed types used for defining fields. */ + if (DECL_ARTIFICIAL (elt) + && (!DECL_IMPLICIT_TYPEDEF_P (elt) + || TYPE_ANONYMOUS_P (TREE_TYPE (elt)))) + continue; + + if (TREE_CODE (elt) != FIELD_DECL) + { + if (complain) + { + if (is_union) + permerror (input_location, + "%q+#D invalid; an anonymous union can " + "only have non-static data members", elt); + else + permerror (input_location, + "%q+#D invalid; an anonymous struct can " + "only have non-static data members", elt); + } + continue; + } + + if (complain) + { + if (TREE_PRIVATE (elt)) + { + if (is_union) + permerror (input_location, + "private member %q+#D in anonymous union", elt); + else + permerror (input_location, + "private member %q+#D in anonymous struct", elt); + } + else if (TREE_PROTECTED (elt)) + { + if (is_union) + permerror (input_location, + "protected member %q+#D in anonymous union", elt); + else + permerror (input_location, + "protected member %q+#D in anonymous struct", elt); + } + } + + TREE_PRIVATE (elt) = TREE_PRIVATE (field); + TREE_PROTECTED (elt) = TREE_PROTECTED (field); + + /* Recurse into the anonymous aggregates to handle correctly + access control (c++/24926): + + class A { + union { + union { + int i; + }; + }; + }; + + int j=A().i; */ + if (DECL_NAME (elt) == NULL_TREE + && ANON_AGGR_TYPE_P (TREE_TYPE (elt))) + finish_struct_anon_r (elt, /*complain=*/false); + } +} + /* Check for things that are invalid. There are probably plenty of other things we should check for also. */ static void finish_struct_anon (tree t) { - tree field; - - for (field = TYPE_FIELDS (t); field; field = DECL_CHAIN (field)) + for (tree field = TYPE_FIELDS (t); field; field = DECL_CHAIN (field)) { if (TREE_STATIC (field)) continue; @@ -2790,53 +2868,7 @@ finish_struct_anon (tree t) if (DECL_NAME (field) == NULL_TREE && ANON_AGGR_TYPE_P (TREE_TYPE (field))) - { - bool is_union = TREE_CODE (TREE_TYPE (field)) == UNION_TYPE; - tree elt = TYPE_FIELDS (TREE_TYPE (field)); - for (; elt; elt = DECL_CHAIN (elt)) - { - /* We're generally only interested in entities the user - declared, but we also find nested classes by noticing - the TYPE_DECL that we create implicitly. You're - allowed to put one anonymous union inside another, - though, so we explicitly tolerate that. We use - TYPE_ANONYMOUS_P rather than ANON_AGGR_TYPE_P so that - we also allow unnamed types used for defining fields. */ - if (DECL_ARTIFICIAL (elt) - && (!DECL_IMPLICIT_TYPEDEF_P (elt) - || TYPE_ANONYMOUS_P (TREE_TYPE (elt)))) - continue; - - if (TREE_CODE (elt) != FIELD_DECL) - { - if (is_union) - permerror (input_location, "%q+#D invalid; an anonymous union can " - "only have non-static data members", elt); - else - permerror (input_location, "%q+#D invalid; an anonymous struct can " - "only have non-static data members", elt); - continue; - } - - if (TREE_PRIVATE (elt)) - { - if (is_union) - permerror (input_location, "private member %q+#D in anonymous union", elt); - else - permerror (input_location, "private member %q+#D in anonymous struct", elt); - } - else if (TREE_PROTECTED (elt)) - { - if (is_union) - permerror (input_location, "protected member %q+#D in anonymous union", elt); - else - permerror (input_location, "protected member %q+#D in anonymous struct", elt); - } - - TREE_PRIVATE (elt) = TREE_PRIVATE (field); - TREE_PROTECTED (elt) = TREE_PROTECTED (field); - } - } + finish_struct_anon_r (field, /*complain=*/true); } } @@ -3500,6 +3532,22 @@ check_field_decls (tree t, tree *access_decls, if (DECL_MUTABLE_P (x) || TYPE_HAS_MUTABLE_P (type)) CLASSTYPE_HAS_MUTABLE (t) = 1; + if (DECL_MUTABLE_P (x)) + { + if (CP_TYPE_CONST_P (type)) + { + error ("member %q+D cannot be declared both % " + "and %", x); + continue; + } + if (TREE_CODE (type) == REFERENCE_TYPE) + { + error ("member %q+D cannot be declared as a % " + "reference", x); + continue; + } + } + if (! layout_pod_type_p (type)) /* DR 148 now allows pointers to members (which are POD themselves), to be allowed in POD structs. */ @@ -6485,6 +6533,9 @@ finish_struct_1 (tree t) maybe_suppress_debug_info (t); + if (flag_vtable_verify) + vtv_save_class_info (t); + dump_class_hierarchy (t); /* Finish debugging output for this type. */ @@ -8854,7 +8905,7 @@ build_vtbl_initializer (tree binfo, if (!get_global_value_if_present (fn, &fn)) fn = push_library_fn (fn, (build_function_type_list (void_type_node, NULL_TREE)), - NULL_TREE); + NULL_TREE, ECF_NORETURN); if (!TARGET_VTABLE_USES_DESCRIPTORS) init = fold_convert (vfunc_ptr_type_node, build_fold_addr_expr (fn)); diff --git a/gcc/cp/config-lang.in b/gcc/cp/config-lang.in index 1597bf97017..4ea9b4d9a2b 100644 --- a/gcc/cp/config-lang.in +++ b/gcc/cp/config-lang.in @@ -29,4 +29,4 @@ compilers="cc1plus\$(exeext)" target_libs="target-libstdc++-v3" -gtfiles="\$(srcdir)/cp/rtti.c \$(srcdir)/cp/mangle.c \$(srcdir)/cp/name-lookup.h \$(srcdir)/cp/name-lookup.c \$(srcdir)/cp/cp-tree.h \$(srcdir)/cp/decl.h \$(srcdir)/cp/call.c \$(srcdir)/cp/decl.c \$(srcdir)/cp/decl2.c \$(srcdir)/cp/pt.c \$(srcdir)/cp/repo.c \$(srcdir)/cp/semantics.c \$(srcdir)/cp/tree.c \$(srcdir)/cp/parser.h \$(srcdir)/cp/parser.c \$(srcdir)/cp/method.c \$(srcdir)/cp/typeck2.c \$(srcdir)/c-family/c-common.c \$(srcdir)/c-family/c-common.h \$(srcdir)/c-family/c-objc.h \$(srcdir)/c-family/c-lex.c \$(srcdir)/c-family/c-pragma.h \$(srcdir)/c-family/c-pragma.c \$(srcdir)/cp/class.c \$(srcdir)/cp/cp-objcp-common.c \$(srcdir)/cp/cp-lang.c \$(srcdir)/cp/except.c" +gtfiles="\$(srcdir)/cp/rtti.c \$(srcdir)/cp/mangle.c \$(srcdir)/cp/name-lookup.h \$(srcdir)/cp/name-lookup.c \$(srcdir)/cp/cp-tree.h \$(srcdir)/cp/decl.h \$(srcdir)/cp/call.c \$(srcdir)/cp/decl.c \$(srcdir)/cp/decl2.c \$(srcdir)/cp/pt.c \$(srcdir)/cp/repo.c \$(srcdir)/cp/semantics.c \$(srcdir)/cp/tree.c \$(srcdir)/cp/parser.h \$(srcdir)/cp/parser.c \$(srcdir)/cp/method.c \$(srcdir)/cp/typeck2.c \$(srcdir)/c-family/c-common.c \$(srcdir)/c-family/c-common.h \$(srcdir)/c-family/c-objc.h \$(srcdir)/c-family/c-lex.c \$(srcdir)/c-family/c-pragma.h \$(srcdir)/c-family/c-pragma.c \$(srcdir)/cp/class.c \$(srcdir)/cp/cp-objcp-common.c \$(srcdir)/cp/cp-lang.c \$(srcdir)/cp/except.c \$(srcdir)/cp/vtable-class-hierarchy.c" diff --git a/gcc/cp/cp-array-notation.c b/gcc/cp/cp-array-notation.c index eb6a70d835c..f4581f01e57 100644 --- a/gcc/cp/cp-array-notation.c +++ b/gcc/cp/cp-array-notation.c @@ -857,6 +857,19 @@ cp_expand_cond_array_notations (tree orig_stmt) return error_mark_node; } } + else if (truth_value_p (TREE_CODE (orig_stmt))) + { + size_t left_rank = 0, right_rank = 0; + tree left_expr = TREE_OPERAND (orig_stmt, 0); + tree right_expr = TREE_OPERAND (orig_stmt, 1); + if (!find_rank (EXPR_LOCATION (left_expr), left_expr, left_expr, true, + &left_rank) + || !find_rank (EXPR_LOCATION (right_expr), right_expr, right_expr, + true, &right_rank)) + return error_mark_node; + if (right_rank == 0 && left_rank == 0) + return orig_stmt; + } if (!find_rank (EXPR_LOCATION (orig_stmt), orig_stmt, orig_stmt, true, &rank)) @@ -1213,6 +1226,12 @@ expand_array_notation_exprs (tree t) if (TREE_OPERAND (t, 0) == error_mark_node) return TREE_OPERAND (t, 0); return t; + case TRUTH_ANDIF_EXPR: + case TRUTH_ORIF_EXPR: + case TRUTH_AND_EXPR: + case TRUTH_OR_EXPR: + case TRUTH_XOR_EXPR: + case TRUTH_NOT_EXPR: case COND_EXPR: t = cp_expand_cond_array_notations (t); if (TREE_CODE (t) == COND_EXPR) @@ -1222,8 +1241,6 @@ expand_array_notation_exprs (tree t) COND_EXPR_ELSE (t) = expand_array_notation_exprs (COND_EXPR_ELSE (t)); } - else - t = expand_array_notation_exprs (t); return t; case FOR_STMT: if (contains_array_notation_expr (FOR_COND (t))) diff --git a/gcc/cp/cp-objcp-common.c b/gcc/cp/cp-objcp-common.c index 82f684a1c7b..d70766f3a06 100644 --- a/gcc/cp/cp-objcp-common.c +++ b/gcc/cp/cp-objcp-common.c @@ -32,6 +32,8 @@ along with GCC; see the file COPYING3. If not see #include "cxx-pretty-print.h" #include "cp-objcp-common.h" +#include // For placement new. + /* Special routine to get the alias set for C++. */ alias_set_type @@ -131,19 +133,15 @@ cp_var_mod_type_p (tree type, tree fn) void cxx_initialize_diagnostics (diagnostic_context *context) { - pretty_printer *base; - cxx_pretty_printer *pp; - c_common_initialize_diagnostics (context); - base = context->printer; - pp = XNEW (cxx_pretty_printer); - memcpy (pp_base (pp), base, sizeof (pretty_printer)); - pp_cxx_pretty_printer_init (pp); - context->printer = (pretty_printer *) pp; + pretty_printer *base = context->printer; + cxx_pretty_printer *pp = XNEW (cxx_pretty_printer); + context->printer = new (pp) cxx_pretty_printer (); - /* It is safe to free this object because it was previously malloc()'d. */ - free (base); + /* It is safe to free this object because it was previously XNEW()'d. */ + base->~pretty_printer (); + XDELETE (base); } /* This compares two types for equivalence ("compatible" in C-based languages). diff --git a/gcc/cp/cp-tree.h b/gcc/cp/cp-tree.h index 200e78ad715..3e4f188b93f 100644 --- a/gcc/cp/cp-tree.h +++ b/gcc/cp/cp-tree.h @@ -346,7 +346,7 @@ typedef struct ptrmem_cst * ptrmem_cst_t; /* If set, this was imported in a using declaration. This is not to confuse with being used somewhere, which is not important for this node. */ -#define OVL_USED(NODE) TREE_USED (NODE) +#define OVL_USED(NODE) TREE_USED (OVERLOAD_CHECK (NODE)) /* If set, this OVERLOAD was created for argument-dependent lookup and can be freed afterward. */ #define OVL_ARG_DEPENDENT(NODE) TREE_LANG_FLAG_0 (OVERLOAD_CHECK (NODE)) @@ -1416,7 +1416,6 @@ struct GTY(()) lang_type_class { unsigned has_complex_move_ctor : 1; unsigned has_complex_move_assign : 1; unsigned has_constexpr_ctor : 1; - unsigned is_final : 1; /* When adding a flag here, consider whether or not it ought to apply to a template instance if it applies to the template. If @@ -1425,7 +1424,7 @@ struct GTY(()) lang_type_class { /* There are some bits left to fill out a 32-bit word. Keep track of this by updating the size of this bitfield whenever you add or remove a flag. */ - unsigned dummy : 2; + unsigned dummy : 3; tree primary_base; vec *vcall_indices; @@ -1535,7 +1534,7 @@ struct GTY((variable_size)) lang_type { /* Nonzero means that NODE (a class type) is final */ #define CLASSTYPE_FINAL(NODE) \ - (LANG_TYPE_CLASS_CHECK (NODE)->is_final) + TYPE_FINAL_P (NODE) /* Nonzero means that this _CLASSTYPE node overloads operator=(X&). */ @@ -2122,9 +2121,10 @@ struct GTY((variable_size)) lang_decl { #define SET_DECL_LANGUAGE(NODE, LANGUAGE) \ (DECL_LANG_SPECIFIC (NODE)->u.base.language = (LANGUAGE)) -/* For FUNCTION_DECLs: nonzero means that this function is a constructor. */ +/* For FUNCTION_DECLs and TEMPLATE_DECLs: nonzero means that this function + is a constructor. */ #define DECL_CONSTRUCTOR_P(NODE) \ - (LANG_DECL_FN_CHECK (NODE)->constructor_attr) + DECL_CXX_CONSTRUCTOR_P (STRIP_TEMPLATE (NODE)) /* Nonzero if NODE (a FUNCTION_DECL) is a constructor for a complete object. */ @@ -2153,9 +2153,10 @@ struct GTY((variable_size)) lang_decl { #define DECL_MOVE_CONSTRUCTOR_P(NODE) \ (DECL_CONSTRUCTOR_P (NODE) && move_fn_p (NODE)) -/* Nonzero if NODE is a destructor. */ +/* Nonzero if NODE (a FUNCTION_DECL or TEMPLATE_DECL) + is a destructor. */ #define DECL_DESTRUCTOR_P(NODE) \ - (LANG_DECL_FN_CHECK (NODE)->destructor_attr) + DECL_CXX_DESTRUCTOR_P (STRIP_TEMPLATE (NODE)) /* Nonzero if NODE (a FUNCTION_DECL) is a destructor, but not the specialized in-charge constructor, in-charge deleting constructor, @@ -2400,10 +2401,6 @@ struct GTY((variable_size)) lang_decl { an override virt-specifier */ #define DECL_OVERRIDE_P(NODE) (TREE_LANG_FLAG_0 (NODE)) -/* True (in a FUNCTION_DECL) if NODE is a function declared with - a final virt-specifier */ -#define DECL_FINAL_P(NODE) (TREE_LANG_FLAG_1 (NODE)) - /* The thunks associated with NODE, a FUNCTION_DECL. */ #define DECL_THUNKS(NODE) \ (DECL_VIRTUAL_P (NODE) ? LANG_DECL_FN_CHECK (NODE)->context : NULL_TREE) @@ -2975,7 +2972,7 @@ extern void decl_shadowed_for_var_insert (tree, tree); /* True if NODE is an implicit INDIRECT_EXPR from convert_from_reference. */ #define REFERENCE_REF_P(NODE) \ - (TREE_CODE (NODE) == INDIRECT_REF \ + (INDIRECT_REF_P (NODE) \ && TREE_TYPE (TREE_OPERAND (NODE, 0)) \ && (TREE_CODE (TREE_TYPE (TREE_OPERAND ((NODE), 0))) \ == REFERENCE_TYPE)) @@ -4023,7 +4020,7 @@ more_aggr_init_expr_args_p (const aggr_init_expr_arg_iterator *iter) See semantics.c for details. */ #define CP_OMP_CLAUSE_INFO(NODE) \ TREE_TYPE (OMP_CLAUSE_RANGE_CHECK (NODE, OMP_CLAUSE_PRIVATE, \ - OMP_CLAUSE_COPYPRIVATE)) + OMP_CLAUSE_LINEAR)) /* Nonzero if this transaction expression's body contains statements. */ #define TRANSACTION_EXPR_IS_STMT(NODE) \ @@ -4510,6 +4507,10 @@ enum overload_flags { NO_SPECIAL = 0, DTOR_FLAG, TYPENAME_FLAG }; #define LOOKUP_EXPLICIT_TMPL_ARGS (LOOKUP_ALREADY_DIGESTED << 1) /* Like LOOKUP_NO_TEMP_BIND, but also prevent binding to xvalues. */ #define LOOKUP_NO_RVAL_BIND (LOOKUP_EXPLICIT_TMPL_ARGS << 1) +/* Used by case_conversion to disregard non-integral conversions. */ +#define LOOKUP_NO_NON_INTEGRAL (LOOKUP_NO_RVAL_BIND << 1) +/* Used for delegating constructors in order to diagnose self-delegation. */ +#define LOOKUP_DELEGATING_CONS (LOOKUP_NO_NON_INTEGRAL << 1) #define LOOKUP_NAMESPACES_ONLY(F) \ (((F) & LOOKUP_PREFER_NAMESPACES) && !((F) & LOOKUP_PREFER_TYPES)) @@ -5171,10 +5172,10 @@ extern void check_goto (tree); extern bool check_omp_return (void); extern tree make_typename_type (tree, tree, enum tag_types, tsubst_flags_t); extern tree make_unbound_class_template (tree, tree, tree, tsubst_flags_t); -extern tree build_library_fn_ptr (const char *, tree); -extern tree build_cp_library_fn_ptr (const char *, tree); -extern tree push_library_fn (tree, tree, tree); -extern tree push_void_library_fn (tree, tree); +extern tree build_library_fn_ptr (const char *, tree, int); +extern tree build_cp_library_fn_ptr (const char *, tree, int); +extern tree push_library_fn (tree, tree, tree, int); +extern tree push_void_library_fn (tree, tree, int); extern tree push_throw_library_fn (tree, tree); extern void warn_misplaced_attr_for_class_type (source_location location, tree class_type); @@ -5217,7 +5218,6 @@ extern tree grokmethod (cp_decl_specifier_seq *, const cp_declarator *, tree) extern void maybe_register_incomplete_var (tree); extern void maybe_commonize_var (tree); extern void complete_vars (tree); -extern void finish_stmt (void); extern tree static_fn_type (tree); extern void revert_static_member_fn (tree); extern void fixup_anonymous_aggr (tree); @@ -5299,6 +5299,8 @@ extern void note_vague_linkage_fn (tree); extern tree build_artificial_parm (tree, tree); extern bool possibly_inlined_p (tree); extern int parm_index (tree); +extern tree vtv_start_verification_constructor_init_function (void); +extern tree vtv_finish_verification_constructor_init_function (tree); /* in error.c */ extern void init_error (void); @@ -5389,6 +5391,7 @@ extern tree build_java_class_ref (tree); extern tree integral_constant_value (tree); extern tree decl_constant_value_safe (tree); extern int diagnose_uninitialized_cst_or_ref_member (tree, bool, bool); +extern tree build_vtbl_address (tree); /* in lex.c */ extern void cxx_dup_lang_specific_decl (tree); @@ -5618,7 +5621,6 @@ extern tree copied_binfo (tree, tree); extern tree original_binfo (tree, tree); extern int shared_member_p (tree); - /* The representation of a deferred access check. */ typedef struct GTY(()) deferred_access_check { @@ -6113,6 +6115,7 @@ extern tree mangle_tls_init_fn (tree); extern tree mangle_tls_wrapper_fn (tree); extern bool decl_tls_wrapper_p (tree); extern tree mangle_ref_init_variable (tree); +extern char * get_mangled_vtable_map_var_name (tree); /* in dump.c */ extern bool cp_dump_tree (void *, tree); @@ -6145,6 +6148,13 @@ extern bool cxx_omp_privatize_by_reference (const_tree); extern void suggest_alternatives_for (location_t, tree); extern tree strip_using_decl (tree); +/* in vtable-class-hierarchy.c */ +extern void vtv_compute_class_hierarchy_transitive_closure (void); +extern void vtv_generate_init_routine (void); +extern void vtv_save_class_info (tree); +extern void vtv_recover_class_info (void); +extern void vtv_build_vtable_verify_fndecl (void); + /* In cp/cp-array-notations.c */ extern tree expand_array_notation_exprs (tree); bool cilkplus_an_triplet_types_ok_p (location_t, tree, tree, tree, diff --git a/gcc/cp/cvt.c b/gcc/cp/cvt.c index 532e8fd9d6b..08c026da178 100644 --- a/gcc/cp/cvt.c +++ b/gcc/cp/cvt.c @@ -1590,17 +1590,6 @@ build_expr_type_conversion (int desires, tree expr, bool complain) if (DECL_NONCONVERTING_P (cand)) continue; - if (TREE_CODE (cand) == TEMPLATE_DECL) - { - if (complain) - { - error ("ambiguous default type conversion from %qT", - basetype); - error (" candidate conversions include %qD", cand); - } - return error_mark_node; - } - candidate = non_reference (TREE_TYPE (TREE_TYPE (cand))); switch (TREE_CODE (candidate)) @@ -1634,11 +1623,23 @@ build_expr_type_conversion (int desires, tree expr, bool complain) break; default: + /* A wildcard could be instantiated to match any desired + type, but we can't deduce the template argument. */ + if (WILDCARD_TYPE_P (candidate)) + win = true; break; } if (win) { + if (TREE_CODE (cand) == TEMPLATE_DECL) + { + if (complain) + error ("default type conversion can't deduce template" + " argument for %qD", cand); + return error_mark_node; + } + if (winner) { tree winner_type diff --git a/gcc/cp/cxx-pretty-print.c b/gcc/cp/cxx-pretty-print.c index ef8df706758..4578a5b0b05 100644 --- a/gcc/cp/cxx-pretty-print.c +++ b/gcc/cp/cxx-pretty-print.c @@ -27,24 +27,13 @@ along with GCC; see the file COPYING3. If not see #include "cxx-pretty-print.h" #include "tree-pretty-print.h" -/* Translate if being used for diagnostics, but not for dump files or - __PRETTY_FUNCTION. */ -#define M_(msgid) (pp_translate_identifiers (pp) ? _(msgid) : (msgid)) - static void pp_cxx_unqualified_id (cxx_pretty_printer *, tree); static void pp_cxx_nested_name_specifier (cxx_pretty_printer *, tree); static void pp_cxx_qualified_id (cxx_pretty_printer *, tree); -static void pp_cxx_assignment_expression (cxx_pretty_printer *, tree); -static void pp_cxx_expression (cxx_pretty_printer *, tree); static void pp_cxx_template_argument_list (cxx_pretty_printer *, tree); static void pp_cxx_type_specifier_seq (cxx_pretty_printer *, tree); static void pp_cxx_ptr_operator (cxx_pretty_printer *, tree); -static void pp_cxx_type_id (cxx_pretty_printer *, tree); -static void pp_cxx_direct_abstract_declarator (cxx_pretty_printer *, tree); -static void pp_cxx_declarator (cxx_pretty_printer *, tree); static void pp_cxx_parameter_declaration_clause (cxx_pretty_printer *, tree); -static void pp_cxx_abstract_declarator (cxx_pretty_printer *, tree); -static void pp_cxx_statement (cxx_pretty_printer *, tree); static void pp_cxx_template_parameter (cxx_pretty_printer *, tree); static void pp_cxx_cast_expression (cxx_pretty_printer *, tree); static void pp_cxx_typeid_expression (cxx_pretty_printer *, tree); @@ -58,25 +47,23 @@ pp_cxx_nonconsecutive_character (cxx_pretty_printer *pp, int c) if (p != NULL && *p == c) pp_cxx_whitespace (pp); pp_character (pp, c); - pp_base (pp)->padding = pp_none; + pp->padding = pp_none; } -#define pp_cxx_storage_class_specifier(PP, T) \ - pp_c_storage_class_specifier (pp_c_base (PP), T) #define pp_cxx_expression_list(PP, T) \ - pp_c_expression_list (pp_c_base (PP), T) + pp_c_expression_list (PP, T) #define pp_cxx_space_for_pointer_operator(PP, T) \ - pp_c_space_for_pointer_operator (pp_c_base (PP), T) + pp_c_space_for_pointer_operator (PP, T) #define pp_cxx_init_declarator(PP, T) \ - pp_c_init_declarator (pp_c_base (PP), T) + pp_c_init_declarator (PP, T) #define pp_cxx_call_argument_list(PP, T) \ - pp_c_call_argument_list (pp_c_base (PP), T) + pp_c_call_argument_list (PP, T) void pp_cxx_colon_colon (cxx_pretty_printer *pp) { pp_colon_colon (pp); - pp_base (pp)->padding = pp_none; + pp->padding = pp_none; } void @@ -95,7 +82,7 @@ void pp_cxx_separate_with (cxx_pretty_printer *pp, int c) { pp_separate_with (pp, c); - pp_base (pp)->padding = pp_none; + pp->padding = pp_none; } /* Expressions. */ @@ -149,7 +136,7 @@ pp_cxx_unqualified_id (cxx_pretty_printer *pp, tree t) switch (code) { case RESULT_DECL: - pp_cxx_ws_string (pp, M_("")); + pp->translate_string (""); break; case OVERLOAD: @@ -168,7 +155,7 @@ pp_cxx_unqualified_id (cxx_pretty_printer *pp, tree t) case IDENTIFIER_NODE: if (t == NULL) - pp_cxx_ws_string (pp, M_("")); + pp->translate_string (""); else if (IDENTIFIER_TYPENAME_P (t)) pp_cxx_conversion_function_id (pp, t); else @@ -321,8 +308,8 @@ pp_cxx_qualified_id (cxx_pretty_printer *pp, tree t) } -static void -pp_cxx_constant (cxx_pretty_printer *pp, tree t) +void +cxx_pretty_printer::constant (tree t) { switch (TREE_CODE (t)) { @@ -330,23 +317,23 @@ pp_cxx_constant (cxx_pretty_printer *pp, tree t) { const bool in_parens = PAREN_STRING_LITERAL_P (t); if (in_parens) - pp_cxx_left_paren (pp); - pp_c_constant (pp_c_base (pp), t); + pp_cxx_left_paren (this); + c_pretty_printer::constant (t); if (in_parens) - pp_cxx_right_paren (pp); + pp_cxx_right_paren (this); } break; case INTEGER_CST: if (NULLPTR_TYPE_P (TREE_TYPE (t))) { - pp_string (pp, "nullptr"); + pp_string (this, "nullptr"); break; } /* else fall through. */ default: - pp_c_constant (pp_c_base (pp), t); + c_pretty_printer::constant (t); break; } } @@ -355,15 +342,15 @@ pp_cxx_constant (cxx_pretty_printer *pp, tree t) unqualified-id qualified-id */ -static inline void -pp_cxx_id_expression (cxx_pretty_printer *pp, tree t) +void +cxx_pretty_printer::id_expression (tree t) { if (TREE_CODE (t) == OVERLOAD) t = OVL_CURRENT (t); if (DECL_P (t) && DECL_CONTEXT (t)) - pp_cxx_qualified_id (pp, t); + pp_cxx_qualified_id (this, t); else - pp_cxx_unqualified_id (pp, t); + pp_cxx_unqualified_id (this, t); } /* user-defined literal: @@ -372,8 +359,8 @@ pp_cxx_id_expression (cxx_pretty_printer *pp, tree t) void pp_cxx_userdef_literal (cxx_pretty_printer *pp, tree t) { - pp_cxx_constant (pp, USERDEF_LITERAL_VALUE (t)); - pp_cxx_id_expression (pp, USERDEF_LITERAL_SUFFIX_ID (t)); + pp->constant (USERDEF_LITERAL_VALUE (t)); + pp->id_expression (USERDEF_LITERAL_SUFFIX_ID (t)); } @@ -411,8 +398,8 @@ pp_cxx_userdef_literal (cxx_pretty_printer *pp, tree t) __is_trivial ( type-id ) __is_union ( type-id ) */ -static void -pp_cxx_primary_expression (cxx_pretty_printer *pp, tree t) +void +cxx_pretty_printer::primary_expression (tree t) { switch (TREE_CODE (t)) { @@ -420,11 +407,11 @@ pp_cxx_primary_expression (cxx_pretty_printer *pp, tree t) case REAL_CST: case COMPLEX_CST: case STRING_CST: - pp_cxx_constant (pp, t); + constant (t); break; case USERDEF_LITERAL: - pp_cxx_userdef_literal (pp, t); + pp_cxx_userdef_literal (this, t); break; case BASELINK: @@ -436,36 +423,36 @@ pp_cxx_primary_expression (cxx_pretty_printer *pp, tree t) case OVERLOAD: case CONST_DECL: case TEMPLATE_DECL: - pp_cxx_id_expression (pp, t); + id_expression (t); break; case RESULT_DECL: case TEMPLATE_TYPE_PARM: case TEMPLATE_TEMPLATE_PARM: case TEMPLATE_PARM_INDEX: - pp_cxx_unqualified_id (pp, t); + pp_cxx_unqualified_id (this, t); break; case STMT_EXPR: - pp_cxx_left_paren (pp); - pp_cxx_statement (pp, STMT_EXPR_STMT (t)); - pp_cxx_right_paren (pp); + pp_cxx_left_paren (this); + statement (STMT_EXPR_STMT (t)); + pp_cxx_right_paren (this); break; case TRAIT_EXPR: - pp_cxx_trait_expression (pp, t); + pp_cxx_trait_expression (this, t); break; case VA_ARG_EXPR: - pp_cxx_va_arg_expression (pp, t); + pp_cxx_va_arg_expression (this, t); break; case OFFSETOF_EXPR: - pp_cxx_offsetof_expression (pp, t); + pp_cxx_offsetof_expression (this, t); break; default: - pp_c_primary_expression (pp_c_base (pp), t); + c_pretty_printer::primary_expression (t); break; } } @@ -491,8 +478,8 @@ pp_cxx_primary_expression (cxx_pretty_printer *pp, tree t) typeid ( expression ) typeid ( type-id ) */ -static void -pp_cxx_postfix_expression (cxx_pretty_printer *pp, tree t) +void +cxx_pretty_printer::postfix_expression (tree t) { enum tree_code code = TREE_CODE (t); @@ -503,7 +490,7 @@ pp_cxx_postfix_expression (cxx_pretty_printer *pp, tree t) { tree fun = (code == AGGR_INIT_EXPR ? AGGR_INIT_EXPR_FN (t) : CALL_EXPR_FN (t)); - tree saved_scope = pp->enclosing_scope; + tree saved_scope = enclosing_scope; bool skipfirst = false; tree arg; @@ -532,21 +519,21 @@ pp_cxx_postfix_expression (cxx_pretty_printer *pp, tree t) if (!TYPE_PTR_P (TREE_TYPE (object))) { - pp_cxx_postfix_expression (pp, object); - pp_cxx_dot (pp); + postfix_expression (object); + pp_cxx_dot (this); } else { - pp_cxx_postfix_expression (pp, object); - pp_cxx_arrow (pp); + postfix_expression (object); + pp_cxx_arrow (this); } skipfirst = true; - pp->enclosing_scope = strip_pointer_operator (TREE_TYPE (object)); + enclosing_scope = strip_pointer_operator (TREE_TYPE (object)); } - pp_cxx_postfix_expression (pp, fun); - pp->enclosing_scope = saved_scope; - pp_cxx_left_paren (pp); + postfix_expression (fun); + enclosing_scope = saved_scope; + pp_cxx_left_paren (this); if (code == AGGR_INIT_EXPR) { aggr_init_expr_arg_iterator iter; @@ -556,9 +543,9 @@ pp_cxx_postfix_expression (cxx_pretty_printer *pp, tree t) skipfirst = false; else { - pp_cxx_expression (pp, arg); + expression (arg); if (more_aggr_init_expr_args_p (&iter)) - pp_cxx_separate_with (pp, ','); + pp_cxx_separate_with (this, ','); } } } @@ -571,18 +558,18 @@ pp_cxx_postfix_expression (cxx_pretty_printer *pp, tree t) skipfirst = false; else { - pp_cxx_expression (pp, arg); + expression (arg); if (more_call_expr_args_p (&iter)) - pp_cxx_separate_with (pp, ','); + pp_cxx_separate_with (this, ','); } } } - pp_cxx_right_paren (pp); + pp_cxx_right_paren (this); } if (code == AGGR_INIT_EXPR && AGGR_INIT_VIA_CTOR_P (t)) { - pp_cxx_separate_with (pp, ','); - pp_cxx_postfix_expression (pp, AGGR_INIT_EXPR_SLOT (t)); + pp_cxx_separate_with (this, ','); + postfix_expression (AGGR_INIT_EXPR_SLOT (t)); } break; @@ -595,7 +582,7 @@ pp_cxx_postfix_expression (cxx_pretty_printer *pp, tree t) case CONST_DECL: case TEMPLATE_DECL: case RESULT_DECL: - pp_cxx_primary_expression (pp, t); + primary_expression (t); break; case DYNAMIC_CAST_EXPR: @@ -603,47 +590,47 @@ pp_cxx_postfix_expression (cxx_pretty_printer *pp, tree t) case REINTERPRET_CAST_EXPR: case CONST_CAST_EXPR: if (code == DYNAMIC_CAST_EXPR) - pp_cxx_ws_string (pp, "dynamic_cast"); + pp_cxx_ws_string (this, "dynamic_cast"); else if (code == STATIC_CAST_EXPR) - pp_cxx_ws_string (pp, "static_cast"); + pp_cxx_ws_string (this, "static_cast"); else if (code == REINTERPRET_CAST_EXPR) - pp_cxx_ws_string (pp, "reinterpret_cast"); + pp_cxx_ws_string (this, "reinterpret_cast"); else - pp_cxx_ws_string (pp, "const_cast"); - pp_cxx_begin_template_argument_list (pp); - pp_cxx_type_id (pp, TREE_TYPE (t)); - pp_cxx_end_template_argument_list (pp); - pp_left_paren (pp); - pp_cxx_expression (pp, TREE_OPERAND (t, 0)); - pp_right_paren (pp); + pp_cxx_ws_string (this, "const_cast"); + pp_cxx_begin_template_argument_list (this); + type_id (TREE_TYPE (t)); + pp_cxx_end_template_argument_list (this); + pp_left_paren (this); + expression (TREE_OPERAND (t, 0)); + pp_right_paren (this); break; case EMPTY_CLASS_EXPR: - pp_cxx_type_id (pp, TREE_TYPE (t)); - pp_left_paren (pp); - pp_right_paren (pp); + type_id (TREE_TYPE (t)); + pp_left_paren (this); + pp_right_paren (this); break; case TYPEID_EXPR: - pp_cxx_typeid_expression (pp, t); + pp_cxx_typeid_expression (this, t); break; case PSEUDO_DTOR_EXPR: - pp_cxx_postfix_expression (pp, TREE_OPERAND (t, 0)); - pp_cxx_dot (pp); - pp_cxx_qualified_id (pp, TREE_OPERAND (t, 1)); - pp_cxx_colon_colon (pp); - pp_complement (pp); - pp_cxx_unqualified_id (pp, TREE_OPERAND (t, 2)); + postfix_expression (TREE_OPERAND (t, 0)); + pp_cxx_dot (this); + pp_cxx_qualified_id (this, TREE_OPERAND (t, 1)); + pp_cxx_colon_colon (this); + pp_complement (this); + pp_cxx_unqualified_id (this, TREE_OPERAND (t, 2)); break; case ARROW_EXPR: - pp_cxx_postfix_expression (pp, TREE_OPERAND (t, 0)); - pp_cxx_arrow (pp); + postfix_expression (TREE_OPERAND (t, 0)); + pp_cxx_arrow (this); break; default: - pp_c_postfix_expression (pp_c_base (pp), t); + c_pretty_printer::postfix_expression (t); break; } } @@ -694,16 +681,16 @@ pp_cxx_new_expression (cxx_pretty_printer *pp, tree t) MINUS_EXPR, integer_type_node, TREE_OPERAND (type, 1), integer_one_node))); - pp_cxx_type_id (pp, type); + pp->type_id (type); if (init) { pp_left_paren (pp); if (TREE_CODE (init) == TREE_LIST) - pp_c_expression_list (pp_c_base (pp), init); + pp_c_expression_list (pp, init); else if (init == void_zero_node) ; /* OK, empty initializer list. */ else - pp_cxx_expression (pp, init); + pp->expression (init); pp_right_paren (pp); } break; @@ -736,7 +723,7 @@ pp_cxx_delete_expression (cxx_pretty_printer *pp, tree t) pp_right_bracket (pp); pp_space (pp); } - pp_c_cast_expression (pp_c_base (pp), TREE_OPERAND (t, 0)); + pp_c_cast_expression (pp, TREE_OPERAND (t, 0)); break; default: @@ -762,80 +749,80 @@ pp_cxx_delete_expression (cxx_pretty_printer *pp, tree t) __alignof__ unary-expression __alignof__ ( type-id ) */ -static void -pp_cxx_unary_expression (cxx_pretty_printer *pp, tree t) +void +cxx_pretty_printer::unary_expression (tree t) { enum tree_code code = TREE_CODE (t); switch (code) { case NEW_EXPR: case VEC_NEW_EXPR: - pp_cxx_new_expression (pp, t); + pp_cxx_new_expression (this, t); break; case DELETE_EXPR: case VEC_DELETE_EXPR: - pp_cxx_delete_expression (pp, t); + pp_cxx_delete_expression (this, t); break; case SIZEOF_EXPR: if (PACK_EXPANSION_P (TREE_OPERAND (t, 0))) { - pp_cxx_ws_string (pp, "sizeof"); - pp_cxx_ws_string (pp, "..."); - pp_cxx_whitespace (pp); - pp_cxx_left_paren (pp); + pp_cxx_ws_string (this, "sizeof"); + pp_cxx_ws_string (this, "..."); + pp_cxx_whitespace (this); + pp_cxx_left_paren (this); if (TYPE_P (TREE_OPERAND (t, 0))) - pp_cxx_type_id (pp, TREE_OPERAND (t, 0)); + type_id (TREE_OPERAND (t, 0)); else - pp_unary_expression (pp, TREE_OPERAND (t, 0)); - pp_cxx_right_paren (pp); + unary_expression (TREE_OPERAND (t, 0)); + pp_cxx_right_paren (this); break; } /* Fall through */ case ALIGNOF_EXPR: - pp_cxx_ws_string (pp, code == SIZEOF_EXPR ? "sizeof" : "__alignof__"); - pp_cxx_whitespace (pp); + pp_cxx_ws_string (this, code == SIZEOF_EXPR ? "sizeof" : "__alignof__"); + pp_cxx_whitespace (this); if (TREE_CODE (t) == SIZEOF_EXPR && SIZEOF_EXPR_TYPE_P (t)) { - pp_cxx_left_paren (pp); - pp_cxx_type_id (pp, TREE_TYPE (TREE_OPERAND (t, 0))); - pp_cxx_right_paren (pp); + pp_cxx_left_paren (this); + type_id (TREE_TYPE (TREE_OPERAND (t, 0))); + pp_cxx_right_paren (this); } else if (TYPE_P (TREE_OPERAND (t, 0))) { - pp_cxx_left_paren (pp); - pp_cxx_type_id (pp, TREE_OPERAND (t, 0)); - pp_cxx_right_paren (pp); + pp_cxx_left_paren (this); + type_id (TREE_OPERAND (t, 0)); + pp_cxx_right_paren (this); } else - pp_unary_expression (pp, TREE_OPERAND (t, 0)); + unary_expression (TREE_OPERAND (t, 0)); break; case AT_ENCODE_EXPR: - pp_cxx_ws_string (pp, "@encode"); - pp_cxx_whitespace (pp); - pp_cxx_left_paren (pp); - pp_cxx_type_id (pp, TREE_OPERAND (t, 0)); - pp_cxx_right_paren (pp); + pp_cxx_ws_string (this, "@encode"); + pp_cxx_whitespace (this); + pp_cxx_left_paren (this); + type_id (TREE_OPERAND (t, 0)); + pp_cxx_right_paren (this); break; case NOEXCEPT_EXPR: - pp_cxx_ws_string (pp, "noexcept"); - pp_cxx_whitespace (pp); - pp_cxx_left_paren (pp); - pp_cxx_expression (pp, TREE_OPERAND (t, 0)); - pp_cxx_right_paren (pp); + pp_cxx_ws_string (this, "noexcept"); + pp_cxx_whitespace (this); + pp_cxx_left_paren (this); + expression (TREE_OPERAND (t, 0)); + pp_cxx_right_paren (this); break; case UNARY_PLUS_EXPR: - pp_plus (pp); - pp_cxx_cast_expression (pp, TREE_OPERAND (t, 0)); + pp_plus (this); + pp_cxx_cast_expression (this, TREE_OPERAND (t, 0)); break; default: - pp_c_unary_expression (pp_c_base (pp), t); + c_pretty_printer::unary_expression (t); break; } } @@ -851,12 +838,12 @@ pp_cxx_cast_expression (cxx_pretty_printer *pp, tree t) { case CAST_EXPR: case IMPLICIT_CONV_EXPR: - pp_cxx_type_id (pp, TREE_TYPE (t)); + pp->type_id (TREE_TYPE (t)); pp_cxx_call_argument_list (pp, TREE_OPERAND (t, 0)); break; default: - pp_c_cast_expression (pp_c_base (pp), t); + pp_c_cast_expression (pp, t); break; } } @@ -903,8 +890,8 @@ pp_cxx_pm_expression (cxx_pretty_printer *pp, tree t) multiplicative-expression / pm-expression multiplicative-expression % pm-expression */ -static void -pp_cxx_multiplicative_expression (cxx_pretty_printer *pp, tree e) +void +cxx_pretty_printer::multiplicative_expression (tree e) { enum tree_code code = TREE_CODE (e); switch (code) @@ -912,20 +899,20 @@ pp_cxx_multiplicative_expression (cxx_pretty_printer *pp, tree e) case MULT_EXPR: case TRUNC_DIV_EXPR: case TRUNC_MOD_EXPR: - pp_cxx_multiplicative_expression (pp, TREE_OPERAND (e, 0)); - pp_space (pp); + multiplicative_expression (TREE_OPERAND (e, 0)); + pp_space (this); if (code == MULT_EXPR) - pp_star (pp); + pp_star (this); else if (code == TRUNC_DIV_EXPR) - pp_slash (pp); + pp_slash (this); else - pp_modulo (pp); - pp_space (pp); - pp_cxx_pm_expression (pp, TREE_OPERAND (e, 1)); + pp_modulo (this); + pp_space (this); + pp_cxx_pm_expression (this, TREE_OPERAND (e, 1)); break; default: - pp_cxx_pm_expression (pp, e); + pp_cxx_pm_expression (this, e); break; } } @@ -934,21 +921,21 @@ pp_cxx_multiplicative_expression (cxx_pretty_printer *pp, tree e) logical-or-expression logical-or-expression ? expression : assignment-expression */ -static void -pp_cxx_conditional_expression (cxx_pretty_printer *pp, tree e) +void +cxx_pretty_printer::conditional_expression (tree e) { if (TREE_CODE (e) == COND_EXPR) { - pp_c_logical_or_expression (pp_c_base (pp), TREE_OPERAND (e, 0)); - pp_space (pp); - pp_question (pp); - pp_space (pp); - pp_cxx_expression (pp, TREE_OPERAND (e, 1)); - pp_space (pp); - pp_cxx_assignment_expression (pp, TREE_OPERAND (e, 2)); + pp_c_logical_or_expression (this, TREE_OPERAND (e, 0)); + pp_space (this); + pp_question (this); + pp_space (this); + expression (TREE_OPERAND (e, 1)); + pp_space (this); + assignment_expression (TREE_OPERAND (e, 2)); } else - pp_c_logical_or_expression (pp_c_base (pp), e); + pp_c_logical_or_expression (this, e); } /* Pretty-print a compound assignment operator token as indicated by T. */ @@ -1000,40 +987,40 @@ pp_cxx_assignment_operator (cxx_pretty_printer *pp, tree t) assignment-operator: one of = *= /= %= += -= >>= <<= &= ^= |= */ -static void -pp_cxx_assignment_expression (cxx_pretty_printer *pp, tree e) +void +cxx_pretty_printer::assignment_expression (tree e) { switch (TREE_CODE (e)) { case MODIFY_EXPR: case INIT_EXPR: - pp_c_logical_or_expression (pp_c_base (pp), TREE_OPERAND (e, 0)); - pp_space (pp); - pp_equal (pp); - pp_space (pp); - pp_cxx_assignment_expression (pp, TREE_OPERAND (e, 1)); + pp_c_logical_or_expression (this, TREE_OPERAND (e, 0)); + pp_space (this); + pp_equal (this); + pp_space (this); + assignment_expression (TREE_OPERAND (e, 1)); break; case THROW_EXPR: - pp_cxx_ws_string (pp, "throw"); + pp_cxx_ws_string (this, "throw"); if (TREE_OPERAND (e, 0)) - pp_cxx_assignment_expression (pp, TREE_OPERAND (e, 0)); + assignment_expression (TREE_OPERAND (e, 0)); break; case MODOP_EXPR: - pp_c_logical_or_expression (pp_c_base (pp), TREE_OPERAND (e, 0)); - pp_cxx_assignment_operator (pp, TREE_OPERAND (e, 1)); - pp_cxx_assignment_expression (pp, TREE_OPERAND (e, 2)); + pp_c_logical_or_expression (this, TREE_OPERAND (e, 0)); + pp_cxx_assignment_operator (this, TREE_OPERAND (e, 1)); + assignment_expression (TREE_OPERAND (e, 2)); break; default: - pp_cxx_conditional_expression (pp, e); + conditional_expression (e); break; } } -static void -pp_cxx_expression (cxx_pretty_printer *pp, tree t) +void +cxx_pretty_printer::expression (tree t) { switch (TREE_CODE (t)) { @@ -1041,15 +1028,15 @@ pp_cxx_expression (cxx_pretty_printer *pp, tree t) case INTEGER_CST: case REAL_CST: case COMPLEX_CST: - pp_cxx_constant (pp, t); + constant (t); break; case USERDEF_LITERAL: - pp_cxx_userdef_literal (pp, t); + pp_cxx_userdef_literal (this, t); break; case RESULT_DECL: - pp_cxx_unqualified_id (pp, t); + pp_cxx_unqualified_id (this, t); break; #if 0 @@ -1057,7 +1044,7 @@ pp_cxx_expression (cxx_pretty_printer *pp, tree t) #endif case SCOPE_REF: case PTRMEM_CST: - pp_cxx_qualified_id (pp, t); + pp_cxx_qualified_id (this, t); break; case OVERLOAD: @@ -1073,7 +1060,7 @@ pp_cxx_expression (cxx_pretty_printer *pp, tree t) case TEMPLATE_PARM_INDEX: case TEMPLATE_TEMPLATE_PARM: case STMT_EXPR: - pp_cxx_primary_expression (pp, t); + primary_expression (t); break; case CALL_EXPR: @@ -1089,65 +1076,65 @@ pp_cxx_expression (cxx_pretty_printer *pp, tree t) case PSEUDO_DTOR_EXPR: case AGGR_INIT_EXPR: case ARROW_EXPR: - pp_cxx_postfix_expression (pp, t); + postfix_expression (t); break; case NEW_EXPR: case VEC_NEW_EXPR: - pp_cxx_new_expression (pp, t); + pp_cxx_new_expression (this, t); break; case DELETE_EXPR: case VEC_DELETE_EXPR: - pp_cxx_delete_expression (pp, t); + pp_cxx_delete_expression (this, t); break; case SIZEOF_EXPR: case ALIGNOF_EXPR: case NOEXCEPT_EXPR: - pp_cxx_unary_expression (pp, t); + unary_expression (t); break; case CAST_EXPR: case IMPLICIT_CONV_EXPR: - pp_cxx_cast_expression (pp, t); + pp_cxx_cast_expression (this, t); break; case OFFSET_REF: case MEMBER_REF: case DOTSTAR_EXPR: - pp_cxx_pm_expression (pp, t); + pp_cxx_pm_expression (this, t); break; case MULT_EXPR: case TRUNC_DIV_EXPR: case TRUNC_MOD_EXPR: - pp_cxx_multiplicative_expression (pp, t); + multiplicative_expression (t); break; case COND_EXPR: - pp_cxx_conditional_expression (pp, t); + conditional_expression (t); break; case MODIFY_EXPR: case INIT_EXPR: case THROW_EXPR: case MODOP_EXPR: - pp_cxx_assignment_expression (pp, t); + assignment_expression (t); break; case NON_DEPENDENT_EXPR: case MUST_NOT_THROW_EXPR: - pp_cxx_expression (pp, TREE_OPERAND (t, 0)); + expression (TREE_OPERAND (t, 0)); break; case EXPR_PACK_EXPANSION: - pp_cxx_expression (pp, PACK_EXPANSION_PATTERN (t)); - pp_cxx_ws_string (pp, "..."); + expression (PACK_EXPANSION_PATTERN (t)); + pp_cxx_ws_string (this, "..."); break; case TEMPLATE_ID_EXPR: - pp_cxx_template_id (pp, t); + pp_cxx_template_id (this, t); break; case NONTYPE_ARGUMENT_PACK: @@ -1157,24 +1144,24 @@ pp_cxx_expression (cxx_pretty_printer *pp, tree t) for (i = 0; i < len; ++i) { if (i > 0) - pp_cxx_separate_with (pp, ','); - pp_cxx_expression (pp, TREE_VEC_ELT (args, i)); + pp_cxx_separate_with (this, ','); + expression (TREE_VEC_ELT (args, i)); } } break; case LAMBDA_EXPR: - pp_cxx_ws_string (pp, ""); + pp_cxx_ws_string (this, ""); break; case PAREN_EXPR: - pp_cxx_left_paren (pp); - pp_cxx_expression (pp, TREE_OPERAND (t, 0)); - pp_cxx_right_paren (pp); + pp_cxx_left_paren (this); + expression (TREE_OPERAND (t, 0)); + pp_cxx_right_paren (this); break; default: - pp_c_expression (pp_c_base (pp), t); + c_pretty_printer::expression (t); break; } } @@ -1187,18 +1174,18 @@ pp_cxx_expression (cxx_pretty_printer *pp, tree t) virtual explicit */ -static void -pp_cxx_function_specifier (cxx_pretty_printer *pp, tree t) +void +cxx_pretty_printer::function_specifier (tree t) { switch (TREE_CODE (t)) { case FUNCTION_DECL: if (DECL_VIRTUAL_P (t)) - pp_cxx_ws_string (pp, "virtual"); + pp_cxx_ws_string (this, "virtual"); else if (DECL_CONSTRUCTOR_P (t) && DECL_NONCONVERTING_P (t)) - pp_cxx_ws_string (pp, "explicit"); + pp_cxx_ws_string (this, "explicit"); else - pp_c_function_specifier (pp_c_base (pp), t); + c_pretty_printer::function_specifier (t); default: break; @@ -1215,8 +1202,8 @@ pp_cxx_function_specifier (cxx_pretty_printer *pp, tree t) friend typedef */ -static void -pp_cxx_decl_specifier_seq (cxx_pretty_printer *pp, tree t) +void +cxx_pretty_printer::declaration_specifiers (tree t) { switch (TREE_CODE (t)) { @@ -1224,25 +1211,25 @@ pp_cxx_decl_specifier_seq (cxx_pretty_printer *pp, tree t) case PARM_DECL: case CONST_DECL: case FIELD_DECL: - pp_cxx_storage_class_specifier (pp, t); - pp_cxx_decl_specifier_seq (pp, TREE_TYPE (t)); + storage_class_specifier (t); + declaration_specifiers (TREE_TYPE (t)); break; case TYPE_DECL: - pp_cxx_ws_string (pp, "typedef"); - pp_cxx_decl_specifier_seq (pp, TREE_TYPE (t)); + pp_cxx_ws_string (this, "typedef"); + declaration_specifiers (TREE_TYPE (t)); break; case FUNCTION_DECL: /* Constructors don't have return types. And conversion functions do not have a type-specifier in their return types. */ if (DECL_CONSTRUCTOR_P (t) || DECL_CONV_FN_P (t)) - pp_cxx_function_specifier (pp, t); + function_specifier (t); else if (DECL_NONSTATIC_MEMBER_FUNCTION_P (t)) - pp_cxx_decl_specifier_seq (pp, TREE_TYPE (TREE_TYPE (t))); + declaration_specifiers (TREE_TYPE (TREE_TYPE (t))); else default: - pp_c_declaration_specifiers (pp_c_base (pp), t); + c_pretty_printer::declaration_specifiers (t); break; } } @@ -1262,32 +1249,32 @@ pp_cxx_decl_specifier_seq (cxx_pretty_printer *pp, tree t) double void */ -static void -pp_cxx_simple_type_specifier (cxx_pretty_printer *pp, tree t) +void +cxx_pretty_printer::simple_type_specifier (tree t) { switch (TREE_CODE (t)) { case RECORD_TYPE: case UNION_TYPE: case ENUMERAL_TYPE: - pp_cxx_qualified_id (pp, t); + pp_cxx_qualified_id (this, t); break; case TEMPLATE_TYPE_PARM: case TEMPLATE_TEMPLATE_PARM: case TEMPLATE_PARM_INDEX: case BOUND_TEMPLATE_TEMPLATE_PARM: - pp_cxx_unqualified_id (pp, t); + pp_cxx_unqualified_id (this, t); break; case TYPENAME_TYPE: - pp_cxx_ws_string (pp, "typename"); - pp_cxx_nested_name_specifier (pp, TYPE_CONTEXT (t)); - pp_cxx_unqualified_id (pp, TYPE_NAME (t)); + pp_cxx_ws_string (this, "typename"); + pp_cxx_nested_name_specifier (this, TYPE_CONTEXT (t)); + pp_cxx_unqualified_id (this, TYPE_NAME (t)); break; default: - pp_c_type_specifier (pp_c_base (pp), t); + c_pretty_printer::simple_type_specifier (t); break; } } @@ -1313,7 +1300,7 @@ pp_cxx_type_specifier_seq (cxx_pretty_printer *pp, tree t) case TYPE_DECL: case BOUND_TEMPLATE_TEMPLATE_PARM: pp_cxx_cv_qualifier_seq (pp, t); - pp_cxx_simple_type_specifier (pp, t); + pp->simple_type_specifier (t); break; case METHOD_TYPE: @@ -1325,7 +1312,7 @@ pp_cxx_type_specifier_seq (cxx_pretty_printer *pp, tree t) case DECLTYPE_TYPE: pp_cxx_ws_string (pp, "decltype"); pp_cxx_left_paren (pp); - pp_cxx_expression (pp, DECLTYPE_TYPE_EXPR (t)); + pp->expression (DECLTYPE_TYPE_EXPR (t)); pp_cxx_right_paren (pp); break; @@ -1333,7 +1320,7 @@ pp_cxx_type_specifier_seq (cxx_pretty_printer *pp, tree t) if (TYPE_PTRMEMFUNC_P (t)) { tree pfm = TYPE_PTRMEMFUNC_FN_TYPE (t); - pp_cxx_decl_specifier_seq (pp, TREE_TYPE (TREE_TYPE (pfm))); + pp->declaration_specifiers (TREE_TYPE (TREE_TYPE (pfm))); pp_cxx_whitespace (pp); pp_cxx_ptr_operator (pp, t); break; @@ -1342,7 +1329,7 @@ pp_cxx_type_specifier_seq (cxx_pretty_printer *pp, tree t) default: if (!(TREE_CODE (t) == FUNCTION_DECL && DECL_CONSTRUCTOR_P (t))) - pp_c_specifier_qualifier_list (pp_c_base (pp), t); + pp_c_specifier_qualifier_list (pp, t); } } @@ -1362,8 +1349,7 @@ pp_cxx_ptr_operator (cxx_pretty_printer *pp, tree t) case POINTER_TYPE: if (TYPE_PTR_OR_PTRMEM_P (TREE_TYPE (t))) pp_cxx_ptr_operator (pp, TREE_TYPE (t)); - pp_c_attributes_display (pp_c_base (pp), - TYPE_ATTRIBUTES (TREE_TYPE (t))); + pp_c_attributes_display (pp, TYPE_ATTRIBUTES (TREE_TYPE (t))); if (TYPE_PTR_P (t)) { pp_star (pp); @@ -1415,11 +1401,11 @@ pp_cxx_implicit_parameter_type (tree mf) static inline void pp_cxx_parameter_declaration (cxx_pretty_printer *pp, tree t) { - pp_cxx_decl_specifier_seq (pp, t); + pp->declaration_specifiers (t); if (TYPE_P (t)) - pp_cxx_abstract_declarator (pp, t); + pp->abstract_declarator (t); else - pp_cxx_declarator (pp, t); + pp->declarator (t); } /* parameter-declaration-clause: @@ -1436,8 +1422,7 @@ pp_cxx_parameter_declaration_clause (cxx_pretty_printer *pp, tree t) tree args = TYPE_P (t) ? NULL : FUNCTION_FIRST_USER_PARM (t); tree types = TYPE_P (t) ? TYPE_ARG_TYPES (t) : FUNCTION_FIRST_USER_PARMTYPE (t); - const bool abstract = args == NULL - || pp_c_base (pp)->flags & pp_c_flag_abstract; + const bool abstract = args == NULL || pp->flags & pp_c_flag_abstract; bool first = true; /* Skip artificial parameter for nonstatic member functions. */ @@ -1451,12 +1436,12 @@ pp_cxx_parameter_declaration_clause (cxx_pretty_printer *pp, tree t) pp_cxx_separate_with (pp, ','); first = false; pp_cxx_parameter_declaration (pp, abstract ? TREE_VALUE (types) : args); - if (!abstract && pp_c_base (pp)->flags & pp_cxx_flag_default_argument) + if (!abstract && pp->flags & pp_cxx_flag_default_argument) { pp_cxx_whitespace (pp); pp_equal (pp); pp_cxx_whitespace (pp); - pp_cxx_assignment_expression (pp, TREE_PURPOSE (types)); + pp->assignment_expression (TREE_PURPOSE (types)); } } pp_cxx_right_paren (pp); @@ -1485,7 +1470,7 @@ pp_cxx_exception_specification (cxx_pretty_printer *pp, tree t) if (DEFERRED_NOEXCEPT_SPEC_P (ex_spec)) pp_cxx_ws_string (pp, ""); else - pp_cxx_expression (pp, TREE_PURPOSE (ex_spec)); + pp->expression (TREE_PURPOSE (ex_spec)); pp_cxx_right_paren (pp); return; } @@ -1513,7 +1498,7 @@ pp_cxx_exception_specification (cxx_pretty_printer *pp, tree t) else need_comma = true; - pp_cxx_type_id (pp, type); + pp->type_id (type); } } pp_cxx_right_paren (pp); @@ -1526,8 +1511,8 @@ pp_cxx_exception_specification (cxx_pretty_printer *pp, tree t) direct-declaration [ constant-expression(opt) ] ( declarator ) */ -static void -pp_cxx_direct_declarator (cxx_pretty_printer *pp, tree t) +void +cxx_pretty_printer::direct_declarator (tree t) { switch (TREE_CODE (t)) { @@ -1537,31 +1522,31 @@ pp_cxx_direct_declarator (cxx_pretty_printer *pp, tree t) case FIELD_DECL: if (DECL_NAME (t)) { - pp_cxx_space_for_pointer_operator (pp, TREE_TYPE (t)); + pp_cxx_space_for_pointer_operator (this, TREE_TYPE (t)); if ((TREE_CODE (t) == PARM_DECL && FUNCTION_PARAMETER_PACK_P (t)) || template_parameter_pack_p (t)) /* A function parameter pack or non-type template parameter pack. */ - pp_cxx_ws_string (pp, "..."); + pp_cxx_ws_string (this, "..."); - pp_cxx_id_expression (pp, DECL_NAME (t)); + id_expression (DECL_NAME (t)); } - pp_cxx_abstract_declarator (pp, TREE_TYPE (t)); + abstract_declarator (TREE_TYPE (t)); break; case FUNCTION_DECL: - pp_cxx_space_for_pointer_operator (pp, TREE_TYPE (TREE_TYPE (t))); - pp_cxx_id_expression (pp, t); - pp_cxx_parameter_declaration_clause (pp, t); + pp_cxx_space_for_pointer_operator (this, TREE_TYPE (TREE_TYPE (t))); + expression (t); + pp_cxx_parameter_declaration_clause (this, t); if (DECL_NONSTATIC_MEMBER_FUNCTION_P (t)) { - pp_base (pp)->padding = pp_before; - pp_cxx_cv_qualifier_seq (pp, pp_cxx_implicit_parameter_type (t)); + padding = pp_before; + pp_cxx_cv_qualifier_seq (this, pp_cxx_implicit_parameter_type (t)); } - pp_cxx_exception_specification (pp, TREE_TYPE (t)); + pp_cxx_exception_specification (this, TREE_TYPE (t)); break; case TYPENAME_TYPE: @@ -1572,7 +1557,7 @@ pp_cxx_direct_declarator (cxx_pretty_printer *pp, tree t) break; default: - pp_c_direct_declarator (pp_c_base (pp), t); + c_pretty_printer::direct_declarator (t); break; } } @@ -1581,10 +1566,10 @@ pp_cxx_direct_declarator (cxx_pretty_printer *pp, tree t) direct-declarator ptr-operator declarator */ -static void -pp_cxx_declarator (cxx_pretty_printer *pp, tree t) +void +cxx_pretty_printer::declarator (tree t) { - pp_cxx_direct_declarator (pp, t); + direct_declarator (t); } /* ctor-initializer: @@ -1614,9 +1599,9 @@ pp_cxx_ctor_initializer (cxx_pretty_printer *pp, tree t) bool is_pack = PACK_EXPANSION_P (purpose); if (is_pack) - pp_cxx_primary_expression (pp, PACK_EXPANSION_PATTERN (purpose)); + pp->primary_expression (PACK_EXPANSION_PATTERN (purpose)); else - pp_cxx_primary_expression (pp, purpose); + pp->primary_expression (purpose); pp_cxx_call_argument_list (pp, TREE_VALUE (t)); if (is_pack) pp_cxx_ws_string (pp, "..."); @@ -1633,12 +1618,12 @@ static void pp_cxx_function_definition (cxx_pretty_printer *pp, tree t) { tree saved_scope = pp->enclosing_scope; - pp_cxx_decl_specifier_seq (pp, t); - pp_cxx_declarator (pp, t); + pp->declaration_specifiers (t); + pp->declarator (t); pp_needs_newline (pp) = true; pp->enclosing_scope = DECL_CONTEXT (t); if (DECL_SAVED_TREE (t)) - pp_cxx_statement (pp, DECL_SAVED_TREE (t)); + pp->statement (DECL_SAVED_TREE (t)); else pp_cxx_semicolon (pp); pp_newline_and_flush (pp); @@ -1649,19 +1634,19 @@ pp_cxx_function_definition (cxx_pretty_printer *pp, tree t) ptr-operator abstract-declarator(opt) direct-abstract-declarator */ -static void -pp_cxx_abstract_declarator (cxx_pretty_printer *pp, tree t) +void +cxx_pretty_printer::abstract_declarator (tree t) { if (TYPE_PTRMEM_P (t)) - pp_cxx_right_paren (pp); + pp_cxx_right_paren (this); else if (POINTER_TYPE_P (t)) { if (TREE_CODE (TREE_TYPE (t)) == ARRAY_TYPE || TREE_CODE (TREE_TYPE (t)) == FUNCTION_TYPE) - pp_cxx_right_paren (pp); + pp_cxx_right_paren (this); t = TREE_TYPE (t); } - pp_cxx_direct_abstract_declarator (pp, t); + direct_abstract_declarator (t); } /* direct-abstract-declarator: @@ -1670,30 +1655,30 @@ pp_cxx_abstract_declarator (cxx_pretty_printer *pp, tree t) direct-abstract-declarator(opt) [ constant-expression(opt) ] ( abstract-declarator ) */ -static void -pp_cxx_direct_abstract_declarator (cxx_pretty_printer *pp, tree t) +void +cxx_pretty_printer::direct_abstract_declarator (tree t) { switch (TREE_CODE (t)) { case REFERENCE_TYPE: - pp_cxx_abstract_declarator (pp, t); + abstract_declarator (t); break; case RECORD_TYPE: if (TYPE_PTRMEMFUNC_P (t)) - pp_cxx_direct_abstract_declarator (pp, TYPE_PTRMEMFUNC_FN_TYPE (t)); + direct_abstract_declarator (TYPE_PTRMEMFUNC_FN_TYPE (t)); break; case METHOD_TYPE: case FUNCTION_TYPE: - pp_cxx_parameter_declaration_clause (pp, t); - pp_cxx_direct_abstract_declarator (pp, TREE_TYPE (t)); + pp_cxx_parameter_declaration_clause (this, t); + direct_abstract_declarator (TREE_TYPE (t)); if (TREE_CODE (t) == METHOD_TYPE) { - pp_base (pp)->padding = pp_before; - pp_cxx_cv_qualifier_seq (pp, class_of_this_parm (t)); + padding = pp_before; + pp_cxx_cv_qualifier_seq (this, class_of_this_parm (t)); } - pp_cxx_exception_specification (pp, t); + pp_cxx_exception_specification (this, t); break; case TYPENAME_TYPE: @@ -1704,7 +1689,7 @@ pp_cxx_direct_abstract_declarator (cxx_pretty_printer *pp, tree t) break; default: - pp_c_direct_abstract_declarator (pp_c_base (pp), t); + c_pretty_printer::direct_abstract_declarator (t); break; } } @@ -1712,11 +1697,11 @@ pp_cxx_direct_abstract_declarator (cxx_pretty_printer *pp, tree t) /* type-id: type-specifier-seq abstract-declarator(opt) */ -static void -pp_cxx_type_id (cxx_pretty_printer *pp, tree t) +void +cxx_pretty_printer::type_id (tree t) { - pp_flags saved_flags = pp_c_base (pp)->flags; - pp_c_base (pp)->flags |= pp_c_flag_abstract; + pp_flags saved_flags = flags; + flags |= pp_c_flag_abstract; switch (TREE_CODE (t)) { @@ -1735,20 +1720,20 @@ pp_cxx_type_id (cxx_pretty_printer *pp, tree t) case UNDERLYING_TYPE: case DECLTYPE_TYPE: case TEMPLATE_ID_EXPR: - pp_cxx_type_specifier_seq (pp, t); + pp_cxx_type_specifier_seq (this, t); break; case TYPE_PACK_EXPANSION: - pp_cxx_type_id (pp, PACK_EXPANSION_PATTERN (t)); - pp_cxx_ws_string (pp, "..."); + type_id (PACK_EXPANSION_PATTERN (t)); + pp_cxx_ws_string (this, "..."); break; default: - pp_c_type_id (pp_c_base (pp), t); + c_pretty_printer::type_id (t); break; } - pp_c_base (pp)->flags = saved_flags; + flags = saved_flags; } /* template-argument-list: @@ -1792,9 +1777,9 @@ pp_cxx_template_argument_list (cxx_pretty_printer *pp, tree t) if (TYPE_P (arg) || (TREE_CODE (arg) == TEMPLATE_DECL && TYPE_P (DECL_TEMPLATE_RESULT (arg)))) - pp_cxx_type_id (pp, arg); + pp->type_id (arg); else - pp_cxx_expression (pp, arg); + pp->expression (arg); } } } @@ -1806,34 +1791,34 @@ pp_cxx_exception_declaration (cxx_pretty_printer *pp, tree t) t = DECL_EXPR_DECL (t); pp_cxx_type_specifier_seq (pp, t); if (TYPE_P (t)) - pp_cxx_abstract_declarator (pp, t); + pp->abstract_declarator (t); else - pp_cxx_declarator (pp, t); + pp->declarator (t); } /* Statements. */ -static void -pp_cxx_statement (cxx_pretty_printer *pp, tree t) +void +cxx_pretty_printer::statement (tree t) { switch (TREE_CODE (t)) { case CTOR_INITIALIZER: - pp_cxx_ctor_initializer (pp, t); + pp_cxx_ctor_initializer (this, t); break; case USING_STMT: - pp_cxx_ws_string (pp, "using"); - pp_cxx_ws_string (pp, "namespace"); + pp_cxx_ws_string (this, "using"); + pp_cxx_ws_string (this, "namespace"); if (DECL_CONTEXT (t)) - pp_cxx_nested_name_specifier (pp, DECL_CONTEXT (t)); - pp_cxx_qualified_id (pp, USING_STMT_NAMESPACE (t)); + pp_cxx_nested_name_specifier (this, DECL_CONTEXT (t)); + pp_cxx_qualified_id (this, USING_STMT_NAMESPACE (t)); break; case USING_DECL: - pp_cxx_ws_string (pp, "using"); - pp_cxx_nested_name_specifier (pp, USING_DECL_SCOPE (t)); - pp_cxx_unqualified_id (pp, DECL_NAME (t)); + pp_cxx_ws_string (this, "using"); + pp_cxx_nested_name_specifier (this, USING_DECL_SCOPE (t)); + pp_cxx_unqualified_id (this, DECL_NAME (t)); break; case EH_SPEC_BLOCK: @@ -1842,15 +1827,15 @@ pp_cxx_statement (cxx_pretty_printer *pp, tree t) /* try-block: try compound-statement handler-seq */ case TRY_BLOCK: - pp_maybe_newline_and_indent (pp, 0); - pp_cxx_ws_string (pp, "try"); - pp_newline_and_indent (pp, 3); - pp_cxx_statement (pp, TRY_STMTS (t)); - pp_newline_and_indent (pp, -3); + pp_maybe_newline_and_indent (this, 0); + pp_cxx_ws_string (this, "try"); + pp_newline_and_indent (this, 3); + statement (TRY_STMTS (t)); + pp_newline_and_indent (this, -3); if (CLEANUP_P (t)) ; else - pp_cxx_statement (pp, TRY_HANDLERS (t)); + statement (TRY_HANDLERS (t)); break; /* @@ -1865,53 +1850,53 @@ pp_cxx_statement (cxx_pretty_printer *pp, tree t) type-specifier-seq abstract-declarator ... */ case HANDLER: - pp_cxx_ws_string (pp, "catch"); - pp_cxx_left_paren (pp); - pp_cxx_exception_declaration (pp, HANDLER_PARMS (t)); - pp_cxx_right_paren (pp); - pp_indentation (pp) += 3; - pp_needs_newline (pp) = true; - pp_cxx_statement (pp, HANDLER_BODY (t)); - pp_indentation (pp) -= 3; - pp_needs_newline (pp) = true; + pp_cxx_ws_string (this, "catch"); + pp_cxx_left_paren (this); + pp_cxx_exception_declaration (this, HANDLER_PARMS (t)); + pp_cxx_right_paren (this); + pp_indentation (this) += 3; + pp_needs_newline (this) = true; + statement (HANDLER_BODY (t)); + pp_indentation (this) -= 3; + pp_needs_newline (this) = true; break; /* selection-statement: if ( expression ) statement if ( expression ) statement else statement */ case IF_STMT: - pp_cxx_ws_string (pp, "if"); - pp_cxx_whitespace (pp); - pp_cxx_left_paren (pp); - pp_cxx_expression (pp, IF_COND (t)); - pp_cxx_right_paren (pp); - pp_newline_and_indent (pp, 2); - pp_cxx_statement (pp, THEN_CLAUSE (t)); - pp_newline_and_indent (pp, -2); + pp_cxx_ws_string (this, "if"); + pp_cxx_whitespace (this); + pp_cxx_left_paren (this); + expression (IF_COND (t)); + pp_cxx_right_paren (this); + pp_newline_and_indent (this, 2); + statement (THEN_CLAUSE (t)); + pp_newline_and_indent (this, -2); if (ELSE_CLAUSE (t)) { tree else_clause = ELSE_CLAUSE (t); - pp_cxx_ws_string (pp, "else"); + pp_cxx_ws_string (this, "else"); if (TREE_CODE (else_clause) == IF_STMT) - pp_cxx_whitespace (pp); + pp_cxx_whitespace (this); else - pp_newline_and_indent (pp, 2); - pp_cxx_statement (pp, else_clause); + pp_newline_and_indent (this, 2); + statement (else_clause); if (TREE_CODE (else_clause) != IF_STMT) - pp_newline_and_indent (pp, -2); + pp_newline_and_indent (this, -2); } break; case SWITCH_STMT: - pp_cxx_ws_string (pp, "switch"); - pp_space (pp); - pp_cxx_left_paren (pp); - pp_cxx_expression (pp, SWITCH_STMT_COND (t)); - pp_cxx_right_paren (pp); - pp_indentation (pp) += 3; - pp_needs_newline (pp) = true; - pp_cxx_statement (pp, SWITCH_STMT_BODY (t)); - pp_newline_and_indent (pp, -3); + pp_cxx_ws_string (this, "switch"); + pp_space (this); + pp_cxx_left_paren (this); + expression (SWITCH_STMT_COND (t)); + pp_cxx_right_paren (this); + pp_indentation (this) += 3; + pp_needs_newline (this) = true; + statement (SWITCH_STMT_BODY (t)); + pp_newline_and_indent (this, -3); break; /* iteration-statement: @@ -1920,70 +1905,70 @@ pp_cxx_statement (cxx_pretty_printer *pp, tree t) for ( expression(opt) ; expression(opt) ; expression(opt) ) statement for ( declaration expression(opt) ; expression(opt) ) statement */ case WHILE_STMT: - pp_cxx_ws_string (pp, "while"); - pp_space (pp); - pp_cxx_left_paren (pp); - pp_cxx_expression (pp, WHILE_COND (t)); - pp_cxx_right_paren (pp); - pp_newline_and_indent (pp, 3); - pp_cxx_statement (pp, WHILE_BODY (t)); - pp_indentation (pp) -= 3; - pp_needs_newline (pp) = true; + pp_cxx_ws_string (this, "while"); + pp_space (this); + pp_cxx_left_paren (this); + expression (WHILE_COND (t)); + pp_cxx_right_paren (this); + pp_newline_and_indent (this, 3); + statement (WHILE_BODY (t)); + pp_indentation (this) -= 3; + pp_needs_newline (this) = true; break; case DO_STMT: - pp_cxx_ws_string (pp, "do"); - pp_newline_and_indent (pp, 3); - pp_cxx_statement (pp, DO_BODY (t)); - pp_newline_and_indent (pp, -3); - pp_cxx_ws_string (pp, "while"); - pp_space (pp); - pp_cxx_left_paren (pp); - pp_cxx_expression (pp, DO_COND (t)); - pp_cxx_right_paren (pp); - pp_cxx_semicolon (pp); - pp_needs_newline (pp) = true; + pp_cxx_ws_string (this, "do"); + pp_newline_and_indent (this, 3); + statement (DO_BODY (t)); + pp_newline_and_indent (this, -3); + pp_cxx_ws_string (this, "while"); + pp_space (this); + pp_cxx_left_paren (this); + expression (DO_COND (t)); + pp_cxx_right_paren (this); + pp_cxx_semicolon (this); + pp_needs_newline (this) = true; break; case FOR_STMT: - pp_cxx_ws_string (pp, "for"); - pp_space (pp); - pp_cxx_left_paren (pp); + pp_cxx_ws_string (this, "for"); + pp_space (this); + pp_cxx_left_paren (this); if (FOR_INIT_STMT (t)) - pp_cxx_statement (pp, FOR_INIT_STMT (t)); + statement (FOR_INIT_STMT (t)); else - pp_cxx_semicolon (pp); - pp_needs_newline (pp) = false; - pp_cxx_whitespace (pp); + pp_cxx_semicolon (this); + pp_needs_newline (this) = false; + pp_cxx_whitespace (this); if (FOR_COND (t)) - pp_cxx_expression (pp, FOR_COND (t)); - pp_cxx_semicolon (pp); - pp_needs_newline (pp) = false; - pp_cxx_whitespace (pp); + expression (FOR_COND (t)); + pp_cxx_semicolon (this); + pp_needs_newline (this) = false; + pp_cxx_whitespace (this); if (FOR_EXPR (t)) - pp_cxx_expression (pp, FOR_EXPR (t)); - pp_cxx_right_paren (pp); - pp_newline_and_indent (pp, 3); - pp_cxx_statement (pp, FOR_BODY (t)); - pp_indentation (pp) -= 3; - pp_needs_newline (pp) = true; + expression (FOR_EXPR (t)); + pp_cxx_right_paren (this); + pp_newline_and_indent (this, 3); + statement (FOR_BODY (t)); + pp_indentation (this) -= 3; + pp_needs_newline (this) = true; break; case RANGE_FOR_STMT: - pp_cxx_ws_string (pp, "for"); - pp_space (pp); - pp_cxx_left_paren (pp); - pp_cxx_statement (pp, RANGE_FOR_DECL (t)); - pp_space (pp); - pp_needs_newline (pp) = false; - pp_colon (pp); - pp_space (pp); - pp_cxx_statement (pp, RANGE_FOR_EXPR (t)); - pp_cxx_right_paren (pp); - pp_newline_and_indent (pp, 3); - pp_cxx_statement (pp, FOR_BODY (t)); - pp_indentation (pp) -= 3; - pp_needs_newline (pp) = true; + pp_cxx_ws_string (this, "for"); + pp_space (this); + pp_cxx_left_paren (this); + statement (RANGE_FOR_DECL (t)); + pp_space (this); + pp_needs_newline (this) = false; + pp_colon (this); + pp_space (this); + statement (RANGE_FOR_EXPR (t)); + pp_cxx_right_paren (this); + pp_newline_and_indent (this, 3); + statement (FOR_BODY (t)); + pp_indentation (this) -= 3; + pp_needs_newline (this) = true; break; /* jump-statement: @@ -1992,36 +1977,36 @@ pp_cxx_statement (cxx_pretty_printer *pp, tree t) return expression(opt) ; */ case BREAK_STMT: case CONTINUE_STMT: - pp_string (pp, TREE_CODE (t) == BREAK_STMT ? "break" : "continue"); - pp_cxx_semicolon (pp); - pp_needs_newline (pp) = true; + pp_string (this, TREE_CODE (t) == BREAK_STMT ? "break" : "continue"); + pp_cxx_semicolon (this); + pp_needs_newline (this) = true; break; /* expression-statement: expression(opt) ; */ case EXPR_STMT: - pp_cxx_expression (pp, EXPR_STMT_EXPR (t)); - pp_cxx_semicolon (pp); - pp_needs_newline (pp) = true; + expression (EXPR_STMT_EXPR (t)); + pp_cxx_semicolon (this); + pp_needs_newline (this) = true; break; case CLEANUP_STMT: - pp_cxx_ws_string (pp, "try"); - pp_newline_and_indent (pp, 2); - pp_cxx_statement (pp, CLEANUP_BODY (t)); - pp_newline_and_indent (pp, -2); - pp_cxx_ws_string (pp, CLEANUP_EH_ONLY (t) ? "catch" : "finally"); - pp_newline_and_indent (pp, 2); - pp_cxx_statement (pp, CLEANUP_EXPR (t)); - pp_newline_and_indent (pp, -2); + pp_cxx_ws_string (this, "try"); + pp_newline_and_indent (this, 2); + statement (CLEANUP_BODY (t)); + pp_newline_and_indent (this, -2); + pp_cxx_ws_string (this, CLEANUP_EH_ONLY (t) ? "catch" : "finally"); + pp_newline_and_indent (this, 2); + statement (CLEANUP_EXPR (t)); + pp_newline_and_indent (this, -2); break; case STATIC_ASSERT: - pp_cxx_declaration (pp, t); + declaration (t); break; default: - pp_c_statement (pp_c_base (pp), t); + c_pretty_printer::statement (t); break; } } @@ -2078,7 +2063,7 @@ pp_cxx_namespace_alias_definition (cxx_pretty_printer *pp, tree t) static void pp_cxx_simple_declaration (cxx_pretty_printer *pp, tree t) { - pp_cxx_decl_specifier_seq (pp, t); + pp->declaration_specifiers (t); pp_cxx_init_declarator (pp, t); pp_cxx_semicolon (pp); pp_needs_newline (pp) = true; @@ -2156,7 +2141,7 @@ pp_cxx_canonical_template_parameter (cxx_pretty_printer *pp, tree parm) parm = TEMPLATE_TYPE_PARM_INDEX (parm); pp_cxx_begin_template_argument_list (pp); - pp_cxx_ws_string (pp, M_("template-parameter-")); + pp->translate_string ("template-parameter-"); pp_wide_integer (pp, TEMPLATE_PARM_LEVEL (parm)); pp_minus (pp); pp_wide_integer (pp, TEMPLATE_PARM_IDX (parm) + 1); @@ -2218,32 +2203,32 @@ pp_cxx_explicit_instantiation (cxx_pretty_printer *pp, tree t) using-directive static_assert-declaration */ void -pp_cxx_declaration (cxx_pretty_printer *pp, tree t) +cxx_pretty_printer::declaration (tree t) { if (TREE_CODE (t) == STATIC_ASSERT) { - pp_cxx_ws_string (pp, "static_assert"); - pp_cxx_left_paren (pp); - pp_cxx_expression (pp, STATIC_ASSERT_CONDITION (t)); - pp_cxx_separate_with (pp, ','); - pp_cxx_expression (pp, STATIC_ASSERT_MESSAGE (t)); - pp_cxx_right_paren (pp); + pp_cxx_ws_string (this, "static_assert"); + pp_cxx_left_paren (this); + expression (STATIC_ASSERT_CONDITION (t)); + pp_cxx_separate_with (this, ','); + expression (STATIC_ASSERT_MESSAGE (t)); + pp_cxx_right_paren (this); } else if (!DECL_LANG_SPECIFIC (t)) - pp_cxx_simple_declaration (pp, t); + pp_cxx_simple_declaration (this, t); else if (DECL_USE_TEMPLATE (t)) switch (DECL_USE_TEMPLATE (t)) { case 1: - pp_cxx_template_declaration (pp, t); + pp_cxx_template_declaration (this, t); break; case 2: - pp_cxx_explicit_specialization (pp, t); + pp_cxx_explicit_specialization (this, t); break; case 3: - pp_cxx_explicit_instantiation (pp, t); + pp_cxx_explicit_instantiation (this, t); break; default: @@ -2253,25 +2238,25 @@ pp_cxx_declaration (cxx_pretty_printer *pp, tree t) { case VAR_DECL: case TYPE_DECL: - pp_cxx_simple_declaration (pp, t); + pp_cxx_simple_declaration (this, t); break; case FUNCTION_DECL: if (DECL_SAVED_TREE (t)) - pp_cxx_function_definition (pp, t); + pp_cxx_function_definition (this, t); else - pp_cxx_simple_declaration (pp, t); + pp_cxx_simple_declaration (this, t); break; case NAMESPACE_DECL: if (DECL_NAMESPACE_ALIAS (t)) - pp_cxx_namespace_alias_definition (pp, t); + pp_cxx_namespace_alias_definition (this, t); else - pp_cxx_original_namespace_definition (pp, t); + pp_cxx_original_namespace_definition (this, t); break; default: - pp_unsupported_tree (pp, t); + pp_unsupported_tree (this, t); break; } } @@ -2283,9 +2268,9 @@ pp_cxx_typeid_expression (cxx_pretty_printer *pp, tree t) pp_cxx_ws_string (pp, "typeid"); pp_cxx_left_paren (pp); if (TYPE_P (t)) - pp_cxx_type_id (pp, t); + pp->type_id (t); else - pp_cxx_expression (pp, t); + pp->expression (t); pp_cxx_right_paren (pp); } @@ -2294,9 +2279,9 @@ pp_cxx_va_arg_expression (cxx_pretty_printer *pp, tree t) { pp_cxx_ws_string (pp, "va_arg"); pp_cxx_left_paren (pp); - pp_cxx_assignment_expression (pp, TREE_OPERAND (t, 0)); + pp->assignment_expression (TREE_OPERAND (t, 0)); pp_cxx_separate_with (pp, ','); - pp_cxx_type_id (pp, TREE_TYPE (t)); + pp->type_id (TREE_TYPE (t)); pp_cxx_right_paren (pp); } @@ -2309,7 +2294,7 @@ pp_cxx_offsetof_expression_1 (cxx_pretty_printer *pp, tree t) if (TREE_CODE (TREE_OPERAND (t, 0)) == STATIC_CAST_EXPR && POINTER_TYPE_P (TREE_TYPE (TREE_OPERAND (t, 0)))) { - pp_cxx_type_id (pp, TREE_TYPE (TREE_TYPE (TREE_OPERAND (t, 0)))); + pp->type_id (TREE_TYPE (TREE_TYPE (TREE_OPERAND (t, 0)))); pp_cxx_separate_with (pp, ','); return true; } @@ -2319,13 +2304,13 @@ pp_cxx_offsetof_expression_1 (cxx_pretty_printer *pp, tree t) return false; if (TREE_CODE (TREE_OPERAND (t, 0)) != ARROW_EXPR) pp_cxx_dot (pp); - pp_cxx_expression (pp, TREE_OPERAND (t, 1)); + pp->expression (TREE_OPERAND (t, 1)); return true; case ARRAY_REF: if (!pp_cxx_offsetof_expression_1 (pp, TREE_OPERAND (t, 0))) return false; pp_left_bracket (pp); - pp_cxx_expression (pp, TREE_OPERAND (t, 1)); + pp->expression (TREE_OPERAND (t, 1)); pp_right_bracket (pp); return true; default: @@ -2339,7 +2324,7 @@ pp_cxx_offsetof_expression (cxx_pretty_printer *pp, tree t) pp_cxx_ws_string (pp, "offsetof"); pp_cxx_left_paren (pp); if (!pp_cxx_offsetof_expression_1 (pp, TREE_OPERAND (t, 0))) - pp_cxx_expression (pp, TREE_OPERAND (t, 0)); + pp->expression (TREE_OPERAND (t, 0)); pp_cxx_right_paren (pp); } @@ -2419,12 +2404,12 @@ pp_cxx_trait_expression (cxx_pretty_printer *pp, tree t) } pp_cxx_left_paren (pp); - pp_cxx_type_id (pp, TRAIT_EXPR_TYPE1 (t)); + pp->type_id (TRAIT_EXPR_TYPE1 (t)); if (kind == CPTK_IS_BASE_OF || kind == CPTK_IS_CONVERTIBLE_TO) { pp_cxx_separate_with (pp, ','); - pp_cxx_type_id (pp, TRAIT_EXPR_TYPE2 (t)); + pp->type_id (TRAIT_EXPR_TYPE2 (t)); } pp_cxx_right_paren (pp); @@ -2434,35 +2419,12 @@ typedef c_pretty_print_fn pp_fun; /* Initialization of a C++ pretty-printer object. */ -void -pp_cxx_pretty_printer_init (cxx_pretty_printer *pp) +cxx_pretty_printer::cxx_pretty_printer () + : c_pretty_printer (), + enclosing_scope (global_namespace) { - pp_c_pretty_printer_init (pp_c_base (pp)); - pp_set_line_maximum_length (pp, 0); - - pp->c_base.declaration = (pp_fun) pp_cxx_declaration; - pp->c_base.declaration_specifiers = (pp_fun) pp_cxx_decl_specifier_seq; - pp->c_base.function_specifier = (pp_fun) pp_cxx_function_specifier; - pp->c_base.type_specifier_seq = (pp_fun) pp_cxx_type_specifier_seq; - pp->c_base.declarator = (pp_fun) pp_cxx_declarator; - pp->c_base.direct_declarator = (pp_fun) pp_cxx_direct_declarator; - pp->c_base.parameter_list = (pp_fun) pp_cxx_parameter_declaration_clause; - pp->c_base.type_id = (pp_fun) pp_cxx_type_id; - pp->c_base.abstract_declarator = (pp_fun) pp_cxx_abstract_declarator; - pp->c_base.direct_abstract_declarator = - (pp_fun) pp_cxx_direct_abstract_declarator; - pp->c_base.simple_type_specifier = (pp_fun)pp_cxx_simple_type_specifier; - - /* pp->c_base.statement = (pp_fun) pp_cxx_statement; */ - - pp->c_base.constant = (pp_fun) pp_cxx_constant; - pp->c_base.id_expression = (pp_fun) pp_cxx_id_expression; - pp->c_base.primary_expression = (pp_fun) pp_cxx_primary_expression; - pp->c_base.postfix_expression = (pp_fun) pp_cxx_postfix_expression; - pp->c_base.unary_expression = (pp_fun) pp_cxx_unary_expression; - pp->c_base.multiplicative_expression = (pp_fun) pp_cxx_multiplicative_expression; - pp->c_base.conditional_expression = (pp_fun) pp_cxx_conditional_expression; - pp->c_base.assignment_expression = (pp_fun) pp_cxx_assignment_expression; - pp->c_base.expression = (pp_fun) pp_cxx_expression; - pp->enclosing_scope = global_namespace; + pp_set_line_maximum_length (this, 0); + + type_specifier_seq = (pp_fun) pp_cxx_type_specifier_seq; + parameter_list = (pp_fun) pp_cxx_parameter_declaration_clause; } diff --git a/gcc/cp/cxx-pretty-print.h b/gcc/cp/cxx-pretty-print.h index 0f7dc4a8174..819bbacae75 100644 --- a/gcc/cp/cxx-pretty-print.h +++ b/gcc/cp/cxx-pretty-print.h @@ -23,54 +23,69 @@ along with GCC; see the file COPYING3. If not see #include "c-family/c-pretty-print.h" -#undef pp_c_base -#define pp_c_base(PP) (&(PP)->c_base) - -typedef enum +enum cxx_pretty_printer_flags { /* Ask for a qualified-id. */ pp_cxx_flag_default_argument = 1 << pp_c_flag_last_bit +}; -} cxx_pretty_printer_flags; - -typedef struct +struct cxx_pretty_printer : c_pretty_printer { - c_pretty_printer c_base; + cxx_pretty_printer (); + + void constant (tree); + void id_expression (tree); + void primary_expression (tree); + void postfix_expression (tree); + void unary_expression (tree); + void multiplicative_expression (tree); + void conditional_expression (tree); + void assignment_expression (tree); + void expression (tree); + void type_id (tree); + void statement (tree); + void declaration (tree); + void declaration_specifiers (tree); + void simple_type_specifier (tree); + void function_specifier (tree); + void declarator (tree); + void direct_declarator (tree); + void abstract_declarator (tree); + void direct_abstract_declarator (tree); + /* This is the enclosing scope of the entity being pretty-printed. */ tree enclosing_scope; -} cxx_pretty_printer; +}; #define pp_cxx_cv_qualifier_seq(PP, T) \ - pp_c_type_qualifier_list (pp_c_base (PP), T) + pp_c_type_qualifier_list (PP, T) #define pp_cxx_cv_qualifiers(PP, CV) \ - pp_c_cv_qualifiers (pp_c_base (PP), CV, false) - -#define pp_cxx_whitespace(PP) pp_c_whitespace (pp_c_base (PP)) -#define pp_cxx_left_paren(PP) pp_c_left_paren (pp_c_base (PP)) -#define pp_cxx_right_paren(PP) pp_c_right_paren (pp_c_base (PP)) -#define pp_cxx_left_brace(PP) pp_c_left_brace (pp_c_base (PP)) -#define pp_cxx_right_brace(PP) pp_c_right_brace (pp_c_base (PP)) -#define pp_cxx_left_bracket(PP) pp_c_left_bracket (pp_c_base (PP)) -#define pp_cxx_right_bracket(PP) pp_c_right_bracket (pp_c_base (PP)) -#define pp_cxx_dot(PP) pp_c_dot (pp_c_base (PP)) -#define pp_cxx_ampersand(PP) pp_c_ampersand (pp_c_base (PP)) -#define pp_cxx_star(PP) pp_c_star (pp_c_base (PP)) -#define pp_cxx_arrow(PP) pp_c_arrow (pp_c_base (PP)) -#define pp_cxx_semicolon(PP) pp_c_semicolon (pp_c_base (PP)) -#define pp_cxx_complement(PP) pp_c_complement (pp_c_base (PP)) - -#define pp_cxx_ws_string(PP, I) pp_c_ws_string (pp_c_base (PP), I) -#define pp_cxx_identifier(PP, I) pp_c_identifier (pp_c_base (PP), I) + pp_c_cv_qualifiers (PP, CV, false) + +#define pp_cxx_whitespace(PP) pp_c_whitespace (PP) +#define pp_cxx_left_paren(PP) pp_c_left_paren (PP) +#define pp_cxx_right_paren(PP) pp_c_right_paren (PP) +#define pp_cxx_left_brace(PP) pp_c_left_brace (PP) +#define pp_cxx_right_brace(PP) pp_c_right_brace (PP) +#define pp_cxx_left_bracket(PP) pp_c_left_bracket (PP) +#define pp_cxx_right_bracket(PP) pp_c_right_bracket (PP) +#define pp_cxx_dot(PP) pp_c_dot (PP) +#define pp_cxx_ampersand(PP) pp_c_ampersand (PP) +#define pp_cxx_star(PP) pp_c_star (PP) +#define pp_cxx_arrow(PP) pp_c_arrow (PP) +#define pp_cxx_semicolon(PP) pp_c_semicolon (PP) +#define pp_cxx_complement(PP) pp_c_complement (PP) + +#define pp_cxx_ws_string(PP, I) pp_c_ws_string (PP, I) +#define pp_cxx_identifier(PP, I) pp_c_identifier (PP, I) #define pp_cxx_tree_identifier(PP, T) \ - pp_c_tree_identifier (pp_c_base (PP), T) + pp_c_tree_identifier (PP, T) -void pp_cxx_pretty_printer_init (cxx_pretty_printer *); void pp_cxx_begin_template_argument_list (cxx_pretty_printer *); void pp_cxx_end_template_argument_list (cxx_pretty_printer *); void pp_cxx_colon_colon (cxx_pretty_printer *); void pp_cxx_separate_with (cxx_pretty_printer *, int); -void pp_cxx_declaration (cxx_pretty_printer *, tree); void pp_cxx_canonical_template_parameter (cxx_pretty_printer *, tree); void pp_cxx_trait_expression (cxx_pretty_printer *, tree); void pp_cxx_va_arg_expression (cxx_pretty_printer *, tree); diff --git a/gcc/cp/decl.c b/gcc/cp/decl.c index 7d6fe0de8c1..b4223aa5706 100644 --- a/gcc/cp/decl.c +++ b/gcc/cp/decl.c @@ -75,7 +75,6 @@ static tree grokvardecl (tree, tree, const cp_decl_specifier_seq *, static int check_static_variable_definition (tree, tree); static void record_unknown_type (tree, const char *); static tree builtin_function_1 (tree, tree, bool); -static tree build_library_fn_1 (tree, enum tree_code, tree); static int member_function_or_else (tree, tree, enum overload_flags); static void bad_specifiers (tree, enum bad_spec_place, int, int, int, int, int); @@ -107,8 +106,8 @@ static tree cp_make_fname_decl (location_t, tree, int); static void initialize_predefined_identifiers (void); static tree check_special_function_return_type (special_function_kind, tree, tree); -static tree push_cp_library_fn (enum tree_code, tree); -static tree build_cp_library_fn (tree, enum tree_code, tree); +static tree push_cp_library_fn (enum tree_code, tree, int); +static tree build_cp_library_fn (tree, enum tree_code, tree, int); static void store_parm_decls (tree); static void initialize_local_var (tree, tree); static void expand_static_init (tree, tree); @@ -1146,8 +1145,9 @@ warn_extern_redeclared_static (tree newdecl, tree olddecl) && DECL_ARTIFICIAL (olddecl)) return; - permerror (input_location, "%qD was declared % and later %", newdecl); - permerror (input_location, "previous declaration of %q+D", olddecl); + if (permerror (input_location, + "%qD was declared % and later %", newdecl)) + inform (input_location, "previous declaration of %q+D", olddecl); } /* NEW_DECL is a redeclaration of OLD_DECL; both are functions or @@ -1287,19 +1287,19 @@ duplicate_decls (tree newdecl, tree olddecl, bool newdecl_is_friend) && DECL_UNINLINABLE (olddecl) && lookup_attribute ("noinline", DECL_ATTRIBUTES (olddecl))) { - warning (OPT_Wattributes, "function %q+D redeclared as inline", - newdecl); - warning (OPT_Wattributes, "previous declaration of %q+D " - "with attribute noinline", olddecl); + if (warning (OPT_Wattributes, "function %q+D redeclared as inline", + newdecl)) + inform (input_location, "previous declaration of %q+D " + "with attribute noinline", olddecl); } else if (DECL_DECLARED_INLINE_P (olddecl) && DECL_UNINLINABLE (newdecl) && lookup_attribute ("noinline", DECL_ATTRIBUTES (newdecl))) { - warning (OPT_Wattributes, "function %q+D redeclared with " - "attribute noinline", newdecl); - warning (OPT_Wattributes, "previous declaration of %q+D was inline", - olddecl); + if (warning (OPT_Wattributes, "function %q+D redeclared with " + "attribute noinline", newdecl)) + inform (input_location, "previous declaration of %q+D was inline", + olddecl); } } @@ -1485,7 +1485,7 @@ duplicate_decls (tree newdecl, tree olddecl, bool newdecl_is_friend) error ("%q#D redeclared as different kind of symbol", newdecl); if (TREE_CODE (olddecl) == TREE_LIST) olddecl = TREE_VALUE (olddecl); - error ("previous declaration of %q+#D", olddecl); + inform (input_location, "previous declaration of %q+#D", olddecl); return error_mark_node; } @@ -1550,7 +1550,8 @@ duplicate_decls (tree newdecl, tree olddecl, bool newdecl_is_friend) else { error ("conflicting declaration %q#D", newdecl); - error ("%q+D has a previous declaration as %q#D", olddecl, olddecl); + inform (input_location, + "%q+D has a previous declaration as %q#D", olddecl, olddecl); return error_mark_node; } } @@ -1613,9 +1614,10 @@ duplicate_decls (tree newdecl, tree olddecl, bool newdecl_is_friend) { error_at (DECL_SOURCE_LOCATION (newdecl), errmsg, newdecl); if (DECL_NAME (olddecl) != NULL_TREE) - error ((DECL_INITIAL (olddecl) && namespace_bindings_p ()) - ? G_("%q+#D previously defined here") - : G_("%q+#D previously declared here"), olddecl); + inform (input_location, + (DECL_INITIAL (olddecl) && namespace_bindings_p ()) + ? G_("%q+#D previously defined here") + : G_("%q+#D previously declared here"), olddecl); return error_mark_node; } else if (TREE_CODE (olddecl) == FUNCTION_DECL @@ -1759,8 +1761,10 @@ duplicate_decls (tree newdecl, tree olddecl, bool newdecl_is_friend) && (! DECL_TEMPLATE_SPECIALIZATION (newdecl) || DECL_TEMPLATE_SPECIALIZATION (olddecl))) { - warning (OPT_Wredundant_decls, "redundant redeclaration of %qD in same scope", newdecl); - warning (OPT_Wredundant_decls, "previous declaration of %q+D", olddecl); + if (warning (OPT_Wredundant_decls, + "redundant redeclaration of %qD in same scope", + newdecl)) + inform (input_location, "previous declaration of %q+D", olddecl); } if (!(DECL_TEMPLATE_INSTANTIATION (olddecl) @@ -3098,7 +3102,9 @@ case_conversion (tree type, tree value) { if (INTEGRAL_OR_UNSCOPED_ENUMERATION_TYPE_P (type)) type = type_promotes_to (type); - value = perform_implicit_conversion (type, value, tf_warning_or_error); + value = (perform_implicit_conversion_flags + (type, value, tf_warning_or_error, + LOOKUP_IMPLICIT | LOOKUP_NO_NON_INTEGRAL)); } return cxx_constant_value (value); } @@ -3793,10 +3799,10 @@ cxx_init_decl_processing (void) newtype = build_exception_variant (newtype, new_eh_spec); deltype = cp_build_type_attribute_variant (void_ftype_ptr, extvisattr); deltype = build_exception_variant (deltype, empty_except_spec); - push_cp_library_fn (NEW_EXPR, newtype); - push_cp_library_fn (VEC_NEW_EXPR, newtype); - global_delete_fndecl = push_cp_library_fn (DELETE_EXPR, deltype); - push_cp_library_fn (VEC_DELETE_EXPR, deltype); + push_cp_library_fn (NEW_EXPR, newtype, 0); + push_cp_library_fn (VEC_NEW_EXPR, newtype, 0); + global_delete_fndecl = push_cp_library_fn (DELETE_EXPR, deltype, ECF_NOTHROW); + push_cp_library_fn (VEC_DELETE_EXPR, deltype, ECF_NOTHROW); nullptr_type_node = make_node (NULLPTR_TYPE); TYPE_SIZE (nullptr_type_node) = bitsize_int (GET_MODE_BITSIZE (ptr_mode)); @@ -3809,7 +3815,8 @@ cxx_init_decl_processing (void) } abort_fndecl - = build_library_fn_ptr ("__cxa_pure_virtual", void_ftype); + = build_library_fn_ptr ("__cxa_pure_virtual", void_ftype, + ECF_NORETURN | ECF_NOTHROW); /* Perform other language dependent initializations. */ init_class_processing (); @@ -4000,7 +4007,8 @@ cxx_builtin_function_ext_scope (tree decl) function. Not called directly. */ static tree -build_library_fn_1 (tree name, enum tree_code operator_code, tree type) +build_library_fn (tree name, enum tree_code operator_code, tree type, + int ecf_flags) { tree fn = build_lang_decl (FUNCTION_DECL, name, type); DECL_EXTERNAL (fn) = 1; @@ -4012,28 +4020,17 @@ build_library_fn_1 (tree name, enum tree_code operator_code, tree type) external shared object. */ DECL_VISIBILITY (fn) = VISIBILITY_DEFAULT; DECL_VISIBILITY_SPECIFIED (fn) = 1; - return fn; -} - -/* Returns the _DECL for a library function with C linkage. - We assume that such functions never throw; if this is incorrect, - callers should unset TREE_NOTHROW. */ - -static tree -build_library_fn (tree name, tree type) -{ - tree fn = build_library_fn_1 (name, ERROR_MARK, type); - TREE_NOTHROW (fn) = 1; + set_call_expr_flags (fn, ecf_flags); return fn; } /* Returns the _DECL for a library function with C++ linkage. */ static tree -build_cp_library_fn (tree name, enum tree_code operator_code, tree type) +build_cp_library_fn (tree name, enum tree_code operator_code, tree type, + int ecf_flags) { - tree fn = build_library_fn_1 (name, operator_code, type); - TREE_NOTHROW (fn) = TYPE_NOTHROW_P (type); + tree fn = build_library_fn (name, operator_code, type, ecf_flags); DECL_CONTEXT (fn) = FROB_CONTEXT (current_namespace); SET_DECL_LANGUAGE (fn, lang_cplusplus); return fn; @@ -4043,18 +4040,19 @@ build_cp_library_fn (tree name, enum tree_code operator_code, tree type) IDENTIFIER_NODE. */ tree -build_library_fn_ptr (const char* name, tree type) +build_library_fn_ptr (const char* name, tree type, int ecf_flags) { - return build_library_fn (get_identifier (name), type); + return build_library_fn (get_identifier (name), ERROR_MARK, type, ecf_flags); } /* Like build_cp_library_fn, but takes a C string instead of an IDENTIFIER_NODE. */ tree -build_cp_library_fn_ptr (const char* name, tree type) +build_cp_library_fn_ptr (const char* name, tree type, int ecf_flags) { - return build_cp_library_fn (get_identifier (name), ERROR_MARK, type); + return build_cp_library_fn (get_identifier (name), ERROR_MARK, type, + ecf_flags); } /* Like build_library_fn, but also pushes the function so that we will @@ -4062,14 +4060,14 @@ build_cp_library_fn_ptr (const char* name, tree type) may throw exceptions listed in RAISES. */ tree -push_library_fn (tree name, tree type, tree raises) +push_library_fn (tree name, tree type, tree raises, int ecf_flags) { tree fn; if (raises) type = build_exception_variant (type, raises); - fn = build_library_fn (name, type); + fn = build_library_fn (name, ERROR_MARK, type, ecf_flags); pushdecl_top_level (fn); return fn; } @@ -4078,11 +4076,12 @@ push_library_fn (tree name, tree type, tree raises) will be found by normal lookup. */ static tree -push_cp_library_fn (enum tree_code operator_code, tree type) +push_cp_library_fn (enum tree_code operator_code, tree type, + int ecf_flags) { tree fn = build_cp_library_fn (ansi_opname (operator_code), operator_code, - type); + type, ecf_flags); pushdecl (fn); if (flag_tm) apply_tm_attr (fn, get_identifier ("transaction_safe")); @@ -4093,10 +4092,10 @@ push_cp_library_fn (enum tree_code operator_code, tree type) a FUNCTION_TYPE. */ tree -push_void_library_fn (tree name, tree parmtypes) +push_void_library_fn (tree name, tree parmtypes, int ecf_flags) { tree type = build_function_type (void_type_node, parmtypes); - return push_library_fn (name, type, NULL_TREE); + return push_library_fn (name, type, NULL_TREE, ecf_flags); } /* Like push_library_fn, but also note that this function throws @@ -4105,9 +4104,7 @@ push_void_library_fn (tree name, tree parmtypes) tree push_throw_library_fn (tree name, tree type) { - tree fn = push_library_fn (name, type, NULL_TREE); - TREE_THIS_VOLATILE (fn) = 1; - TREE_NOTHROW (fn) = 0; + tree fn = push_library_fn (name, type, NULL_TREE, ECF_NORETURN); return fn; } @@ -6637,7 +6634,7 @@ get_atexit_node (void) /* Now, build the function declaration. */ push_lang_context (lang_name_c); - atexit_fndecl = build_library_fn_ptr (name, fn_type); + atexit_fndecl = build_library_fn_ptr (name, fn_type, ECF_LEAF | ECF_NOTHROW); mark_used (atexit_fndecl); pop_lang_context (); atexit_node = decay_conversion (atexit_fndecl, tf_warning_or_error); @@ -6659,7 +6656,8 @@ get_thread_atexit_node (void) NULL_TREE); /* Now, build the function declaration. */ - tree atexit_fndecl = build_library_fn_ptr ("__cxa_thread_atexit", fn_type); + tree atexit_fndecl = build_library_fn_ptr ("__cxa_thread_atexit", fn_type, + ECF_LEAF | ECF_NOTHROW); return decay_conversion (atexit_fndecl, tf_warning_or_error); } @@ -6985,15 +6983,17 @@ expand_static_init (tree decl, tree init) (acquire_name, build_function_type_list (integer_type_node, TREE_TYPE (guard_addr), NULL_TREE), - NULL_TREE); + NULL_TREE, ECF_NOTHROW | ECF_LEAF); if (!release_fn || !abort_fn) vfntype = build_function_type_list (void_type_node, TREE_TYPE (guard_addr), NULL_TREE); if (!release_fn) - release_fn = push_library_fn (release_name, vfntype, NULL_TREE); + release_fn = push_library_fn (release_name, vfntype, NULL_TREE, + ECF_NOTHROW | ECF_LEAF); if (!abort_fn) - abort_fn = push_library_fn (abort_name, vfntype, NULL_TREE); + abort_fn = push_library_fn (abort_name, vfntype, NULL_TREE, + ECF_NOTHROW | ECF_LEAF); inner_if_stmt = begin_if_stmt (); finish_if_stmt_cond (build_call_n (acquire_fn, 1, guard_addr), @@ -7420,17 +7420,6 @@ grokfndecl (tree ctype, the information in the TEMPLATE_ID_EXPR. */ SET_DECL_IMPLICIT_INSTANTIATION (decl); - if (TREE_CODE (fns) == COMPONENT_REF) - { - /* Due to bison parser ickiness, we will have already looked - up an operator_name or PFUNCNAME within the current class - (see template_id in parse.y). If the current class contains - such a name, we'll get a COMPONENT_REF here. Undo that. */ - - gcc_assert (TREE_TYPE (TREE_OPERAND (fns, 0)) - == current_class_type); - fns = TREE_OPERAND (fns, 1); - } gcc_assert (identifier_p (fns) || TREE_CODE (fns) == OVERLOAD); DECL_TEMPLATE_INFO (decl) = build_template_info (fns, args); @@ -11836,14 +11825,14 @@ check_elaborated_type_specifier (enum tag_types tag_code, && tag_code != typename_type) { error ("%qT referred to as %qs", type, tag_name (tag_code)); - error ("%q+T has a previous declaration here", type); + inform (input_location, "%q+T has a previous declaration here", type); return error_mark_node; } else if (TREE_CODE (type) != ENUMERAL_TYPE && tag_code == enum_type) { error ("%qT referred to as enum", type); - error ("%q+T has a previous declaration here", type); + inform (input_location, "%q+T has a previous declaration here", type); return error_mark_node; } else if (!allow_template_p @@ -14266,13 +14255,6 @@ cxx_maybe_build_cleanup (tree decl, tsubst_flags_t complain) } -/* When a stmt has been parsed, this function is called. */ - -void -finish_stmt (void) -{ -} - /* Return the FUNCTION_TYPE that corresponds to MEMFNTYPE, which can be a FUNCTION_DECL, METHOD_TYPE, FUNCTION_TYPE, pointer or reference to METHOD_TYPE or FUNCTION_TYPE, or pointer to member function. */ diff --git a/gcc/cp/decl2.c b/gcc/cp/decl2.c index 1573cede899..d5d29127cfd 100644 --- a/gcc/cp/decl2.c +++ b/gcc/cp/decl2.c @@ -3039,7 +3039,8 @@ generate_tls_wrapper (tree fn) } /* Start the process of running a particular set of global constructors - or destructors. Subroutine of do_[cd]tors. */ + or destructors. Subroutine of do_[cd]tors. Also called from + vtv_start_verification_constructor_init_function. */ static tree start_objects (int method_type, int initp) @@ -4353,8 +4354,25 @@ cp_write_global_declarations (void) timevar_stop (TV_PHASE_DEFERRED); timevar_start (TV_PHASE_OPT_GEN); + if (flag_vtable_verify) + { + vtv_recover_class_info (); + vtv_compute_class_hierarchy_transitive_closure (); + vtv_build_vtable_verify_fndecl (); + } + finalize_compilation_unit (); + if (flag_vtable_verify) + { + /* Generate the special constructor initialization function that + calls __VLTRegisterPairs, and give it a very high + initialization priority. This must be done after + finalize_compilation_unit so that we have accurate + information about which vtable will actually be emitted. */ + vtv_generate_init_routine (); + } + timevar_stop (TV_PHASE_OPT_GEN); timevar_start (TV_PHASE_CHECK_DBGINFO); @@ -4731,4 +4749,23 @@ mark_used (tree decl) return mark_used (decl, tf_warning_or_error); } +tree +vtv_start_verification_constructor_init_function (void) +{ + return start_objects ('I', MAX_RESERVED_INIT_PRIORITY - 1); +} + +tree +vtv_finish_verification_constructor_init_function (tree function_body) +{ + tree fn; + + finish_compound_stmt (function_body); + fn = finish_function (0); + DECL_STATIC_CONSTRUCTOR (fn) = 1; + decl_init_priority_insert (fn, MAX_RESERVED_INIT_PRIORITY - 1); + + return fn; +} + #include "gt-cp-decl2.h" diff --git a/gcc/cp/error.c b/gcc/cp/error.c index a8f52cda0ae..78c74b65e67 100644 --- a/gcc/cp/error.c +++ b/gcc/cp/error.c @@ -32,6 +32,9 @@ along with GCC; see the file COPYING3. If not see #include "tree-pretty-print.h" #include "pointer-set.h" #include "c-family/c-objc.h" +#include "ubsan.h" + +#include // For placement-new. #define pp_separate_with_comma(PP) pp_cxx_separate_with (PP, ',') #define pp_separate_with_semicolon(PP) pp_cxx_separate_with (PP, ';') @@ -59,33 +62,34 @@ static const char *op_to_string (enum tree_code); static const char *parm_to_string (int); static const char *type_to_string (tree, int); -static void dump_alias_template_specialization (tree, int); -static void dump_type (tree, int); -static void dump_typename (tree, int); -static void dump_simple_decl (tree, tree, int); -static void dump_decl (tree, int); -static void dump_template_decl (tree, int); -static void dump_function_decl (tree, int); -static void dump_expr (tree, int); -static void dump_unary_op (const char *, tree, int); -static void dump_binary_op (const char *, tree, int); -static void dump_aggr_type (tree, int); -static void dump_type_prefix (tree, int); -static void dump_type_suffix (tree, int); -static void dump_function_name (tree, int); -static void dump_call_expr_args (tree, int, bool); -static void dump_aggr_init_expr_args (tree, int, bool); -static void dump_expr_list (tree, int); -static void dump_global_iord (tree); -static void dump_parameters (tree, int); -static void dump_ref_qualifier (tree, int); -static void dump_exception_spec (tree, int); -static void dump_template_argument (tree, int); -static void dump_template_argument_list (tree, int); -static void dump_template_parameter (tree, int); -static void dump_template_bindings (tree, tree, vec *); -static void dump_scope (tree, int); -static void dump_template_parms (tree, int, int); +static void dump_alias_template_specialization (cxx_pretty_printer *, tree, int); +static void dump_type (cxx_pretty_printer *, tree, int); +static void dump_typename (cxx_pretty_printer *, tree, int); +static void dump_simple_decl (cxx_pretty_printer *, tree, tree, int); +static void dump_decl (cxx_pretty_printer *, tree, int); +static void dump_template_decl (cxx_pretty_printer *, tree, int); +static void dump_function_decl (cxx_pretty_printer *, tree, int); +static void dump_expr (cxx_pretty_printer *, tree, int); +static void dump_unary_op (cxx_pretty_printer *, const char *, tree, int); +static void dump_binary_op (cxx_pretty_printer *, const char *, tree, int); +static void dump_aggr_type (cxx_pretty_printer *, tree, int); +static void dump_type_prefix (cxx_pretty_printer *, tree, int); +static void dump_type_suffix (cxx_pretty_printer *, tree, int); +static void dump_function_name (cxx_pretty_printer *, tree, int); +static void dump_call_expr_args (cxx_pretty_printer *, tree, int, bool); +static void dump_aggr_init_expr_args (cxx_pretty_printer *, tree, int, bool); +static void dump_expr_list (cxx_pretty_printer *, tree, int); +static void dump_global_iord (cxx_pretty_printer *, tree); +static void dump_parameters (cxx_pretty_printer *, tree, int); +static void dump_ref_qualifier (cxx_pretty_printer *, tree, int); +static void dump_exception_spec (cxx_pretty_printer *, tree, int); +static void dump_template_argument (cxx_pretty_printer *, tree, int); +static void dump_template_argument_list (cxx_pretty_printer *, tree, int); +static void dump_template_parameter (cxx_pretty_printer *, tree, int); +static void dump_template_bindings (cxx_pretty_printer *, tree, tree, + vec *); +static void dump_scope (cxx_pretty_printer *, tree, int); +static void dump_template_parms (cxx_pretty_printer *, tree, int, int); static int get_non_default_template_args_count (tree, int); static const char *function_category (tree); static void maybe_print_constexpr_context (diagnostic_context *); @@ -108,14 +112,13 @@ init_error (void) diagnostic_finalizer (global_dc) = cp_diagnostic_finalizer; diagnostic_format_decoder (global_dc) = cp_printer; - pp_construct (pp_base (cxx_pp), NULL, 0); - pp_cxx_pretty_printer_init (cxx_pp); + new (cxx_pp) cxx_pretty_printer (); } /* Dump a scope, if deemed necessary. */ static void -dump_scope (tree scope, int flags) +dump_scope (cxx_pretty_printer *pp, tree scope, int flags) { int f = flags & (TFF_SCOPE | TFF_CHASE_TYPEDEF); @@ -126,39 +129,39 @@ dump_scope (tree scope, int flags) { if (scope != global_namespace) { - dump_decl (scope, f); - pp_cxx_colon_colon (cxx_pp); + dump_decl (pp, scope, f); + pp_cxx_colon_colon (pp); } } else if (AGGREGATE_TYPE_P (scope)) { - dump_type (scope, f); - pp_cxx_colon_colon (cxx_pp); + dump_type (pp, scope, f); + pp_cxx_colon_colon (pp); } else if ((flags & TFF_SCOPE) && TREE_CODE (scope) == FUNCTION_DECL) { - dump_function_decl (scope, f); - pp_cxx_colon_colon (cxx_pp); + dump_function_decl (pp, scope, f); + pp_cxx_colon_colon (pp); } } /* Dump the template ARGument under control of FLAGS. */ static void -dump_template_argument (tree arg, int flags) +dump_template_argument (cxx_pretty_printer *pp, tree arg, int flags) { if (ARGUMENT_PACK_P (arg)) - dump_template_argument_list (ARGUMENT_PACK_ARGS (arg), + dump_template_argument_list (pp, ARGUMENT_PACK_ARGS (arg), /* No default args in argument packs. */ flags|TFF_NO_OMIT_DEFAULT_TEMPLATE_ARGUMENTS); else if (TYPE_P (arg) || TREE_CODE (arg) == TEMPLATE_DECL) - dump_type (arg, flags & ~TFF_CLASS_KEY_OR_ENUM); + dump_type (pp, arg, flags & ~TFF_CLASS_KEY_OR_ENUM); else { if (TREE_CODE (arg) == TREE_LIST) arg = TREE_VALUE (arg); - dump_expr (arg, (flags | TFF_EXPR_IN_PARENS) & ~TFF_CLASS_KEY_OR_ENUM); + dump_expr (pp, arg, (flags | TFF_EXPR_IN_PARENS) & ~TFF_CLASS_KEY_OR_ENUM); } } @@ -186,7 +189,7 @@ get_non_default_template_args_count (tree args, int flags) of FLAGS. */ static void -dump_template_argument_list (tree args, int flags) +dump_template_argument_list (cxx_pretty_printer *pp, tree args, int flags) { int n = get_non_default_template_args_count (args, flags); int need_comma = 0; @@ -202,9 +205,9 @@ dump_template_argument_list (tree args, int flags) if (need_comma && (!ARGUMENT_PACK_P (arg) || TREE_VEC_LENGTH (ARGUMENT_PACK_ARGS (arg)) > 0)) - pp_separate_with_comma (cxx_pp); + pp_separate_with_comma (pp); - dump_template_argument (arg, flags); + dump_template_argument (pp, arg, flags); need_comma = 1; } } @@ -212,7 +215,7 @@ dump_template_argument_list (tree args, int flags) /* Dump a template parameter PARM (a TREE_LIST) under control of FLAGS. */ static void -dump_template_parameter (tree parm, int flags) +dump_template_parameter (cxx_pretty_printer *pp, tree parm, int flags) { tree p; tree a; @@ -227,29 +230,29 @@ dump_template_parameter (tree parm, int flags) { if (flags & TFF_DECL_SPECIFIERS) { - pp_cxx_ws_string (cxx_pp, "class"); + pp_cxx_ws_string (pp, "class"); if (TEMPLATE_TYPE_PARAMETER_PACK (TREE_TYPE (p))) - pp_cxx_ws_string (cxx_pp, "..."); + pp_cxx_ws_string (pp, "..."); if (DECL_NAME (p)) - pp_cxx_tree_identifier (cxx_pp, DECL_NAME (p)); + pp_cxx_tree_identifier (pp, DECL_NAME (p)); } else if (DECL_NAME (p)) - pp_cxx_tree_identifier (cxx_pp, DECL_NAME (p)); + pp_cxx_tree_identifier (pp, DECL_NAME (p)); else - pp_cxx_canonical_template_parameter (cxx_pp, TREE_TYPE (p)); + pp_cxx_canonical_template_parameter (pp, TREE_TYPE (p)); } else - dump_decl (p, flags | TFF_DECL_SPECIFIERS); + dump_decl (pp, p, flags | TFF_DECL_SPECIFIERS); if ((flags & TFF_FUNCTION_DEFAULT_ARGUMENTS) && a != NULL_TREE) { - pp_cxx_whitespace (cxx_pp); - pp_equal (cxx_pp); - pp_cxx_whitespace (cxx_pp); + pp_cxx_whitespace (pp); + pp_equal (pp); + pp_cxx_whitespace (pp); if (TREE_CODE (p) == TYPE_DECL || TREE_CODE (p) == TEMPLATE_DECL) - dump_type (a, flags & ~TFF_CHASE_TYPEDEF); + dump_type (pp, a, flags & ~TFF_CHASE_TYPEDEF); else - dump_expr (a, flags | TFF_EXPR_IN_PARENS); + dump_expr (pp, a, flags | TFF_EXPR_IN_PARENS); } } @@ -258,7 +261,8 @@ dump_template_parameter (tree parm, int flags) TREE_VEC. */ static void -dump_template_bindings (tree parms, tree args, vec *typenames) +dump_template_bindings (cxx_pretty_printer *pp, tree parms, tree args, + vec *typenames) { bool need_semicolon = false; int i; @@ -285,21 +289,22 @@ dump_template_bindings (tree parms, tree args, vec *typenames) arg = TREE_VEC_ELT (lvl_args, arg_idx); if (need_semicolon) - pp_separate_with_semicolon (cxx_pp); - dump_template_parameter (TREE_VEC_ELT (p, i), TFF_PLAIN_IDENTIFIER); - pp_cxx_whitespace (cxx_pp); - pp_equal (cxx_pp); - pp_cxx_whitespace (cxx_pp); + pp_separate_with_semicolon (pp); + dump_template_parameter (pp, TREE_VEC_ELT (p, i), + TFF_PLAIN_IDENTIFIER); + pp_cxx_whitespace (pp); + pp_equal (pp); + pp_cxx_whitespace (pp); if (arg) { if (ARGUMENT_PACK_P (arg)) - pp_cxx_left_brace (cxx_pp); - dump_template_argument (arg, TFF_PLAIN_IDENTIFIER); + pp_cxx_left_brace (pp); + dump_template_argument (pp, arg, TFF_PLAIN_IDENTIFIER); if (ARGUMENT_PACK_P (arg)) - pp_cxx_right_brace (cxx_pp); + pp_cxx_right_brace (pp); } else - pp_string (cxx_pp, M_("")); + pp_string (pp, M_("")); ++arg_idx; need_semicolon = true; @@ -315,18 +320,18 @@ dump_template_bindings (tree parms, tree args, vec *typenames) FOR_EACH_VEC_SAFE_ELT (typenames, i, t) { if (need_semicolon) - pp_separate_with_semicolon (cxx_pp); - dump_type (t, TFF_PLAIN_IDENTIFIER); - pp_cxx_whitespace (cxx_pp); - pp_equal (cxx_pp); - pp_cxx_whitespace (cxx_pp); + pp_separate_with_semicolon (pp); + dump_type (pp, t, TFF_PLAIN_IDENTIFIER); + pp_cxx_whitespace (pp); + pp_equal (pp); + pp_cxx_whitespace (pp); push_deferring_access_checks (dk_no_check); t = tsubst (t, args, tf_none, NULL_TREE); pop_deferring_access_checks (); /* Strip typedefs. We can't just use TFF_CHASE_TYPEDEF because pp_simple_type_specifier doesn't know about it. */ t = strip_typedefs (t); - dump_type (t, TFF_PLAIN_IDENTIFIER); + dump_type (pp, t, TFF_PLAIN_IDENTIFIER); } } @@ -334,17 +339,17 @@ dump_template_bindings (tree parms, tree args, vec *typenames) specialization of T. */ static void -dump_alias_template_specialization (tree t, int flags) +dump_alias_template_specialization (cxx_pretty_printer *pp, tree t, int flags) { tree name; gcc_assert (alias_template_specialization_p (t)); if (!(flags & TFF_UNQUALIFIED_NAME)) - dump_scope (CP_DECL_CONTEXT (TYPE_NAME (t)), flags); + dump_scope (pp, CP_DECL_CONTEXT (TYPE_NAME (t)), flags); name = TYPE_IDENTIFIER (t); - pp_cxx_tree_identifier (cxx_pp, name); - dump_template_parms (TYPE_TEMPLATE_INFO (t), + pp_cxx_tree_identifier (pp, name); + dump_template_parms (pp, TYPE_TEMPLATE_INFO (t), /*primary=*/false, flags & ~TFF_TEMPLATE_HEADER); } @@ -353,7 +358,7 @@ dump_alias_template_specialization (tree t, int flags) format. */ static void -dump_type (tree t, int flags) +dump_type (cxx_pretty_printer *pp, tree t, int flags) { if (t == NULL_TREE) return; @@ -369,15 +374,15 @@ dump_type (tree t, int flags) t = strip_typedefs (t); else if (alias_template_specialization_p (t)) { - dump_alias_template_specialization (t, flags); + dump_alias_template_specialization (pp, t, flags); return; } else if (same_type_p (t, TREE_TYPE (decl))) t = decl; else { - pp_cxx_cv_qualifier_seq (cxx_pp, t); - pp_cxx_tree_identifier (cxx_pp, TYPE_IDENTIFIER (t)); + pp_cxx_cv_qualifier_seq (pp, t); + pp_cxx_tree_identifier (pp, TYPE_IDENTIFIER (t)); return; } } @@ -389,39 +394,39 @@ dump_type (tree t, int flags) { case LANG_TYPE: if (t == init_list_type_node) - pp_string (cxx_pp, M_("")); + pp_string (pp, M_("")); else if (t == unknown_type_node) - pp_string (cxx_pp, M_("")); + pp_string (pp, M_("")); else { - pp_cxx_cv_qualifier_seq (cxx_pp, t); - pp_cxx_tree_identifier (cxx_pp, TYPE_IDENTIFIER (t)); + pp_cxx_cv_qualifier_seq (pp, t); + pp_cxx_tree_identifier (pp, TYPE_IDENTIFIER (t)); } break; case TREE_LIST: /* A list of function parms. */ - dump_parameters (t, flags); + dump_parameters (pp, t, flags); break; case IDENTIFIER_NODE: - pp_cxx_tree_identifier (cxx_pp, t); + pp_cxx_tree_identifier (pp, t); break; case TREE_BINFO: - dump_type (BINFO_TYPE (t), flags); + dump_type (pp, BINFO_TYPE (t), flags); break; case RECORD_TYPE: case UNION_TYPE: case ENUMERAL_TYPE: - dump_aggr_type (t, flags); + dump_aggr_type (pp, t, flags); break; case TYPE_DECL: if (flags & TFF_CHASE_TYPEDEF) { - dump_type (DECL_ORIGINAL_TYPE (t) + dump_type (pp, DECL_ORIGINAL_TYPE (t) ? DECL_ORIGINAL_TYPE (t) : TREE_TYPE (t), flags); break; } @@ -429,7 +434,7 @@ dump_type (tree t, int flags) case TEMPLATE_DECL: case NAMESPACE_DECL: - dump_decl (t, flags & ~TFF_DECL_SPECIFIERS); + dump_decl (pp, t, flags & ~TFF_DECL_SPECIFIERS); break; case INTEGER_TYPE: @@ -439,35 +444,35 @@ dump_type (tree t, int flags) case COMPLEX_TYPE: case VECTOR_TYPE: case FIXED_POINT_TYPE: - pp_type_specifier_seq (cxx_pp, t); + pp_type_specifier_seq (pp, t); break; case TEMPLATE_TEMPLATE_PARM: /* For parameters inside template signature. */ if (TYPE_IDENTIFIER (t)) - pp_cxx_tree_identifier (cxx_pp, TYPE_IDENTIFIER (t)); + pp_cxx_tree_identifier (pp, TYPE_IDENTIFIER (t)); else - pp_cxx_canonical_template_parameter (cxx_pp, t); + pp_cxx_canonical_template_parameter (pp, t); break; case BOUND_TEMPLATE_TEMPLATE_PARM: { tree args = TYPE_TI_ARGS (t); - pp_cxx_cv_qualifier_seq (cxx_pp, t); - pp_cxx_tree_identifier (cxx_pp, TYPE_IDENTIFIER (t)); - pp_cxx_begin_template_argument_list (cxx_pp); - dump_template_argument_list (args, flags); - pp_cxx_end_template_argument_list (cxx_pp); + pp_cxx_cv_qualifier_seq (pp, t); + pp_cxx_tree_identifier (pp, TYPE_IDENTIFIER (t)); + pp_cxx_begin_template_argument_list (pp); + dump_template_argument_list (pp, args, flags); + pp_cxx_end_template_argument_list (pp); } break; case TEMPLATE_TYPE_PARM: - pp_cxx_cv_qualifier_seq (cxx_pp, t); + pp_cxx_cv_qualifier_seq (pp, t); if (TYPE_IDENTIFIER (t)) - pp_cxx_tree_identifier (cxx_pp, TYPE_IDENTIFIER (t)); + pp_cxx_tree_identifier (pp, TYPE_IDENTIFIER (t)); else pp_cxx_canonical_template_parameter - (cxx_pp, TEMPLATE_TYPE_PARM_INDEX (t)); + (pp, TEMPLATE_TYPE_PARM_INDEX (t)); break; /* This is not always necessary for pointers and such, but doing this @@ -480,78 +485,78 @@ dump_type (tree t, int flags) case FUNCTION_TYPE: case METHOD_TYPE: { - dump_type_prefix (t, flags); - dump_type_suffix (t, flags); + dump_type_prefix (pp, t, flags); + dump_type_suffix (pp, t, flags); break; } case TYPENAME_TYPE: if (! (flags & TFF_CHASE_TYPEDEF) && DECL_ORIGINAL_TYPE (TYPE_NAME (t))) { - dump_decl (TYPE_NAME (t), TFF_PLAIN_IDENTIFIER); + dump_decl (pp, TYPE_NAME (t), TFF_PLAIN_IDENTIFIER); break; } - pp_cxx_cv_qualifier_seq (cxx_pp, t); - pp_cxx_ws_string (cxx_pp, + pp_cxx_cv_qualifier_seq (pp, t); + pp_cxx_ws_string (pp, TYPENAME_IS_ENUM_P (t) ? "enum" : TYPENAME_IS_CLASS_P (t) ? "class" : "typename"); - dump_typename (t, flags); + dump_typename (pp, t, flags); break; case UNBOUND_CLASS_TEMPLATE: if (! (flags & TFF_UNQUALIFIED_NAME)) { - dump_type (TYPE_CONTEXT (t), flags); - pp_cxx_colon_colon (cxx_pp); + dump_type (pp, TYPE_CONTEXT (t), flags); + pp_cxx_colon_colon (pp); } - pp_cxx_ws_string (cxx_pp, "template"); - dump_type (DECL_NAME (TYPE_NAME (t)), flags); + pp_cxx_ws_string (pp, "template"); + dump_type (pp, DECL_NAME (TYPE_NAME (t)), flags); break; case TYPEOF_TYPE: - pp_cxx_ws_string (cxx_pp, "__typeof__"); - pp_cxx_whitespace (cxx_pp); - pp_cxx_left_paren (cxx_pp); - dump_expr (TYPEOF_TYPE_EXPR (t), flags & ~TFF_EXPR_IN_PARENS); - pp_cxx_right_paren (cxx_pp); + pp_cxx_ws_string (pp, "__typeof__"); + pp_cxx_whitespace (pp); + pp_cxx_left_paren (pp); + dump_expr (pp, TYPEOF_TYPE_EXPR (t), flags & ~TFF_EXPR_IN_PARENS); + pp_cxx_right_paren (pp); break; case UNDERLYING_TYPE: - pp_cxx_ws_string (cxx_pp, "__underlying_type"); - pp_cxx_whitespace (cxx_pp); - pp_cxx_left_paren (cxx_pp); - dump_expr (UNDERLYING_TYPE_TYPE (t), flags & ~TFF_EXPR_IN_PARENS); - pp_cxx_right_paren (cxx_pp); + pp_cxx_ws_string (pp, "__underlying_type"); + pp_cxx_whitespace (pp); + pp_cxx_left_paren (pp); + dump_expr (pp, UNDERLYING_TYPE_TYPE (t), flags & ~TFF_EXPR_IN_PARENS); + pp_cxx_right_paren (pp); break; case TYPE_PACK_EXPANSION: - dump_type (PACK_EXPANSION_PATTERN (t), flags); - pp_cxx_ws_string (cxx_pp, "..."); + dump_type (pp, PACK_EXPANSION_PATTERN (t), flags); + pp_cxx_ws_string (pp, "..."); break; case TYPE_ARGUMENT_PACK: - dump_template_argument (t, flags); + dump_template_argument (pp, t, flags); break; case DECLTYPE_TYPE: - pp_cxx_ws_string (cxx_pp, "decltype"); - pp_cxx_whitespace (cxx_pp); - pp_cxx_left_paren (cxx_pp); - dump_expr (DECLTYPE_TYPE_EXPR (t), flags & ~TFF_EXPR_IN_PARENS); - pp_cxx_right_paren (cxx_pp); + pp_cxx_ws_string (pp, "decltype"); + pp_cxx_whitespace (pp); + pp_cxx_left_paren (pp); + dump_expr (pp, DECLTYPE_TYPE_EXPR (t), flags & ~TFF_EXPR_IN_PARENS); + pp_cxx_right_paren (pp); break; case NULLPTR_TYPE: - pp_string (cxx_pp, "std::nullptr_t"); + pp_string (pp, "std::nullptr_t"); break; default: - pp_unsupported_tree (cxx_pp, t); + pp_unsupported_tree (pp, t); /* Fall through to error. */ case ERROR_MARK: - pp_string (cxx_pp, M_("")); + pp_string (pp, M_("")); break; } } @@ -560,16 +565,16 @@ dump_type (tree t, int flags) a TYPENAME_TYPE. */ static void -dump_typename (tree t, int flags) +dump_typename (cxx_pretty_printer *pp, tree t, int flags) { tree ctx = TYPE_CONTEXT (t); if (TREE_CODE (ctx) == TYPENAME_TYPE) - dump_typename (ctx, flags); + dump_typename (pp, ctx, flags); else - dump_type (ctx, flags & ~TFF_CLASS_KEY_OR_ENUM); - pp_cxx_colon_colon (cxx_pp); - dump_decl (TYPENAME_TYPE_FULLNAME (t), flags); + dump_type (pp, ctx, flags & ~TFF_CLASS_KEY_OR_ENUM); + pp_cxx_colon_colon (pp); + dump_decl (pp, TYPENAME_TYPE_FULLNAME (t), flags); } /* Return the name of the supplied aggregate, or enumeral type. */ @@ -596,17 +601,17 @@ class_key_or_enum_as_string (tree t) in the form `class foo'. */ static void -dump_aggr_type (tree t, int flags) +dump_aggr_type (cxx_pretty_printer *pp, tree t, int flags) { tree name; const char *variety = class_key_or_enum_as_string (t); int typdef = 0; int tmplate = 0; - pp_cxx_cv_qualifier_seq (cxx_pp, t); + pp_cxx_cv_qualifier_seq (pp, t); if (flags & TFF_CLASS_KEY_OR_ENUM) - pp_cxx_ws_string (cxx_pp, variety); + pp_cxx_ws_string (pp, variety); name = TYPE_NAME (t); @@ -634,7 +639,7 @@ dump_aggr_type (tree t, int flags) || PRIMARY_TEMPLATE_P (CLASSTYPE_TI_TEMPLATE (t))); if (! (flags & TFF_UNQUALIFIED_NAME)) - dump_scope (CP_DECL_CONTEXT (name), flags | TFF_SCOPE); + dump_scope (pp, CP_DECL_CONTEXT (name), flags | TFF_SCOPE); flags &= ~TFF_UNQUALIFIED_NAME; if (tmplate) { @@ -652,23 +657,24 @@ dump_aggr_type (tree t, int flags) if (name == 0 || ANON_AGGRNAME_P (name)) { if (flags & TFF_CLASS_KEY_OR_ENUM) - pp_string (cxx_pp, M_("")); + pp_string (pp, M_("")); else - pp_printf (pp_base (cxx_pp), M_(""), variety); + pp_printf (pp, M_(""), variety); } else if (LAMBDA_TYPE_P (t)) { /* A lambda's "type" is essentially its signature. */ - pp_string (cxx_pp, M_("'); + pp_greater (pp); } else - pp_cxx_tree_identifier (cxx_pp, name); + pp_cxx_tree_identifier (pp, name); if (tmplate) - dump_template_parms (TYPE_TEMPLATE_INFO (t), + dump_template_parms (pp, TYPE_TEMPLATE_INFO (t), !CLASSTYPE_USE_TEMPLATE (t), flags & ~TFF_TEMPLATE_HEADER); } @@ -685,7 +691,7 @@ dump_aggr_type (tree t, int flags) int *[]&. */ static void -dump_type_prefix (tree t, int flags) +dump_type_prefix (cxx_pretty_printer *pp, tree t, int flags) { if (TYPE_PTRMEMFUNC_P (t)) { @@ -700,61 +706,60 @@ dump_type_prefix (tree t, int flags) { tree sub = TREE_TYPE (t); - dump_type_prefix (sub, flags); + dump_type_prefix (pp, sub, flags); if (TREE_CODE (sub) == ARRAY_TYPE || TREE_CODE (sub) == FUNCTION_TYPE) { - pp_cxx_whitespace (cxx_pp); - pp_cxx_left_paren (cxx_pp); - pp_c_attributes_display (pp_c_base (cxx_pp), - TYPE_ATTRIBUTES (sub)); + pp_cxx_whitespace (pp); + pp_cxx_left_paren (pp); + pp_c_attributes_display (pp, TYPE_ATTRIBUTES (sub)); } if (TYPE_PTR_P (t)) - pp_character(cxx_pp, '*'); + pp_star (pp); else if (TREE_CODE (t) == REFERENCE_TYPE) { if (TYPE_REF_IS_RVALUE (t)) - pp_string (cxx_pp, "&&"); + pp_ampersand_ampersand (pp); else - pp_character (cxx_pp, '&'); + pp_ampersand (pp); } - pp_base (cxx_pp)->padding = pp_before; - pp_cxx_cv_qualifier_seq (cxx_pp, t); + pp->padding = pp_before; + pp_cxx_cv_qualifier_seq (pp, t); } break; case OFFSET_TYPE: offset_type: - dump_type_prefix (TREE_TYPE (t), flags); + dump_type_prefix (pp, TREE_TYPE (t), flags); if (TREE_CODE (t) == OFFSET_TYPE) /* pmfs deal with this in d_t_p */ { - pp_maybe_space (cxx_pp); + pp_maybe_space (pp); if (TREE_CODE (TREE_TYPE (t)) == ARRAY_TYPE) - pp_cxx_left_paren (cxx_pp); - dump_type (TYPE_OFFSET_BASETYPE (t), flags); - pp_cxx_colon_colon (cxx_pp); + pp_cxx_left_paren (pp); + dump_type (pp, TYPE_OFFSET_BASETYPE (t), flags); + pp_cxx_colon_colon (pp); } - pp_cxx_star (cxx_pp); - pp_cxx_cv_qualifier_seq (cxx_pp, t); - pp_base (cxx_pp)->padding = pp_before; + pp_cxx_star (pp); + pp_cxx_cv_qualifier_seq (pp, t); + pp->padding = pp_before; break; /* This can be reached without a pointer when dealing with templates, e.g. std::is_function. */ case FUNCTION_TYPE: - dump_type_prefix (TREE_TYPE (t), flags); + dump_type_prefix (pp, TREE_TYPE (t), flags); break; case METHOD_TYPE: - dump_type_prefix (TREE_TYPE (t), flags); - pp_maybe_space (cxx_pp); - pp_cxx_left_paren (cxx_pp); - dump_aggr_type (TYPE_METHOD_BASETYPE (t), flags); - pp_cxx_colon_colon (cxx_pp); + dump_type_prefix (pp, TREE_TYPE (t), flags); + pp_maybe_space (pp); + pp_cxx_left_paren (pp); + dump_aggr_type (pp, TYPE_METHOD_BASETYPE (t), flags); + pp_cxx_colon_colon (pp); break; case ARRAY_TYPE: - dump_type_prefix (TREE_TYPE (t), flags); + dump_type_prefix (pp, TREE_TYPE (t), flags); break; case ENUMERAL_TYPE: @@ -781,15 +786,15 @@ dump_type_prefix (tree t, int flags) case TYPE_PACK_EXPANSION: case FIXED_POINT_TYPE: case NULLPTR_TYPE: - dump_type (t, flags); - pp_base (cxx_pp)->padding = pp_before; + dump_type (pp, t, flags); + pp->padding = pp_before; break; default: - pp_unsupported_tree (cxx_pp, t); + pp_unsupported_tree (pp, t); /* fall through. */ case ERROR_MARK: - pp_string (cxx_pp, M_("")); + pp_string (pp, M_("")); break; } } @@ -798,7 +803,7 @@ dump_type_prefix (tree t, int flags) which appears after the identifier (or function parms). */ static void -dump_type_suffix (tree t, int flags) +dump_type_suffix (cxx_pretty_printer *pp, tree t, int flags) { if (TYPE_PTRMEMFUNC_P (t)) t = TYPE_PTRMEMFUNC_FN_TYPE (t); @@ -810,8 +815,8 @@ dump_type_suffix (tree t, int flags) case OFFSET_TYPE: if (TREE_CODE (TREE_TYPE (t)) == ARRAY_TYPE || TREE_CODE (TREE_TYPE (t)) == FUNCTION_TYPE) - pp_cxx_right_paren (cxx_pp); - dump_type_suffix (TREE_TYPE (t), flags); + pp_cxx_right_paren (pp); + dump_type_suffix (pp, TREE_TYPE (t), flags); break; case FUNCTION_TYPE: @@ -820,34 +825,34 @@ dump_type_suffix (tree t, int flags) tree arg; if (TREE_CODE (t) == METHOD_TYPE) /* Can only be reached through a pointer. */ - pp_cxx_right_paren (cxx_pp); + pp_cxx_right_paren (pp); arg = TYPE_ARG_TYPES (t); if (TREE_CODE (t) == METHOD_TYPE) arg = TREE_CHAIN (arg); /* Function pointers don't have default args. Not in standard C++, anyway; they may in g++, but we'll just pretend otherwise. */ - dump_parameters (arg, flags & ~TFF_FUNCTION_DEFAULT_ARGUMENTS); + dump_parameters (pp, arg, flags & ~TFF_FUNCTION_DEFAULT_ARGUMENTS); - pp_base (cxx_pp)->padding = pp_before; - pp_cxx_cv_qualifiers (cxx_pp, type_memfn_quals (t)); - dump_ref_qualifier (t, flags); - dump_exception_spec (TYPE_RAISES_EXCEPTIONS (t), flags); - dump_type_suffix (TREE_TYPE (t), flags); + pp->padding = pp_before; + pp_cxx_cv_qualifiers (pp, type_memfn_quals (t)); + dump_ref_qualifier (pp, t, flags); + dump_exception_spec (pp, TYPE_RAISES_EXCEPTIONS (t), flags); + dump_type_suffix (pp, TREE_TYPE (t), flags); break; } case ARRAY_TYPE: - pp_maybe_space (cxx_pp); - pp_cxx_left_bracket (cxx_pp); + pp_maybe_space (pp); + pp_cxx_left_bracket (pp); if (TYPE_DOMAIN (t)) { tree dtype = TYPE_DOMAIN (t); tree max = TYPE_MAX_VALUE (dtype); if (integer_all_onesp (max)) - pp_character (cxx_pp, '0'); + pp_character (pp, '0'); else if (host_integerp (max, 0)) - pp_wide_integer (cxx_pp, tree_low_cst (max, 0) + 1); + pp_wide_integer (pp, tree_low_cst (max, 0) + 1); else { STRIP_NOPS (max); @@ -864,11 +869,11 @@ dump_type_suffix (tree t, int flags) max = fold_build2_loc (input_location, PLUS_EXPR, dtype, max, build_int_cst (dtype, 1)); - dump_expr (max, flags & ~TFF_EXPR_IN_PARENS); + dump_expr (pp, max, flags & ~TFF_EXPR_IN_PARENS); } } - pp_cxx_right_bracket (cxx_pp); - dump_type_suffix (TREE_TYPE (t), flags); + pp_cxx_right_bracket (pp); + dump_type_suffix (pp, TREE_TYPE (t), flags); break; case ENUMERAL_TYPE: @@ -898,7 +903,7 @@ dump_type_suffix (tree t, int flags) break; default: - pp_unsupported_tree (cxx_pp, t); + pp_unsupported_tree (pp, t); case ERROR_MARK: /* Don't mark it here, we should have already done in dump_type_prefix. */ @@ -907,7 +912,7 @@ dump_type_suffix (tree t, int flags) } static void -dump_global_iord (tree t) +dump_global_iord (cxx_pretty_printer *pp, tree t) { const char *p = NULL; @@ -918,51 +923,51 @@ dump_global_iord (tree t) else gcc_unreachable (); - pp_printf (pp_base (cxx_pp), p, input_filename); + pp_printf (pp, p, input_filename); } static void -dump_simple_decl (tree t, tree type, int flags) +dump_simple_decl (cxx_pretty_printer *pp, tree t, tree type, int flags) { if (flags & TFF_DECL_SPECIFIERS) { if (VAR_P (t) && DECL_DECLARED_CONSTEXPR_P (t)) - pp_cxx_ws_string (cxx_pp, "constexpr"); - dump_type_prefix (type, flags & ~TFF_UNQUALIFIED_NAME); - pp_maybe_space (cxx_pp); + pp_cxx_ws_string (pp, "constexpr"); + dump_type_prefix (pp, type, flags & ~TFF_UNQUALIFIED_NAME); + pp_maybe_space (pp); } if (! (flags & TFF_UNQUALIFIED_NAME) && TREE_CODE (t) != PARM_DECL && (!DECL_INITIAL (t) || TREE_CODE (DECL_INITIAL (t)) != TEMPLATE_PARM_INDEX)) - dump_scope (CP_DECL_CONTEXT (t), flags); + dump_scope (pp, CP_DECL_CONTEXT (t), flags); flags &= ~TFF_UNQUALIFIED_NAME; if ((flags & TFF_DECL_SPECIFIERS) && DECL_TEMPLATE_PARM_P (t) && TEMPLATE_PARM_PARAMETER_PACK (DECL_INITIAL (t))) - pp_string (cxx_pp, "..."); + pp_string (pp, "..."); if (DECL_NAME (t)) { if (TREE_CODE (t) == FIELD_DECL && DECL_NORMAL_CAPTURE_P (t)) { - pp_character (cxx_pp, '<'); - pp_string (cxx_pp, IDENTIFIER_POINTER (DECL_NAME (t)) + 2); - pp_string (cxx_pp, " capture>"); + pp_less (pp); + pp_string (pp, IDENTIFIER_POINTER (DECL_NAME (t)) + 2); + pp_string (pp, " capture>"); } else - dump_decl (DECL_NAME (t), flags); + dump_decl (pp, DECL_NAME (t), flags); } else - pp_string (cxx_pp, M_("")); + pp_string (pp, M_("")); if (flags & TFF_DECL_SPECIFIERS) - dump_type_suffix (type, flags); + dump_type_suffix (pp, type, flags); } /* Dump a human readable string for the decl T under control of FLAGS. */ static void -dump_decl (tree t, int flags) +dump_decl (cxx_pretty_printer *pp, tree t, int flags) { if (t == NULL_TREE) return; @@ -974,7 +979,7 @@ dump_decl (tree t, int flags) const char *demangled = objc_maybe_printable_name (t, flags); if (demangled) { - pp_string (cxx_pp, demangled); + pp_string (pp, demangled); return; } } @@ -989,32 +994,32 @@ dump_decl (tree t, int flags) && TREE_CODE (TREE_TYPE (t)) == TEMPLATE_TYPE_PARM) { /* Say `class T' not just `T'. */ - pp_cxx_ws_string (cxx_pp, "class"); + pp_cxx_ws_string (pp, "class"); /* Emit the `...' for a parameter pack. */ if (TEMPLATE_TYPE_PARAMETER_PACK (TREE_TYPE (t))) - pp_cxx_ws_string (cxx_pp, "..."); + pp_cxx_ws_string (pp, "..."); } - dump_type (TREE_TYPE (t), flags); + dump_type (pp, TREE_TYPE (t), flags); break; } if (TYPE_DECL_ALIAS_P (t) && (flags & TFF_DECL_SPECIFIERS || flags & TFF_CLASS_KEY_OR_ENUM)) { - pp_cxx_ws_string (cxx_pp, "using"); - dump_decl (DECL_NAME (t), flags); - pp_cxx_whitespace (cxx_pp); - pp_cxx_ws_string (cxx_pp, "="); - pp_cxx_whitespace (cxx_pp); - dump_type (DECL_ORIGINAL_TYPE (t), flags); + pp_cxx_ws_string (pp, "using"); + dump_decl (pp, DECL_NAME (t), flags); + pp_cxx_whitespace (pp); + pp_cxx_ws_string (pp, "="); + pp_cxx_whitespace (pp); + dump_type (pp, DECL_ORIGINAL_TYPE (t), flags); break; } if ((flags & TFF_DECL_SPECIFIERS) && !DECL_SELF_REFERENCE_P (t)) - pp_cxx_ws_string (cxx_pp, "typedef"); - dump_simple_decl (t, DECL_ORIGINAL_TYPE (t) + pp_cxx_ws_string (pp, "typedef"); + dump_simple_decl (pp, t, DECL_ORIGINAL_TYPE (t) ? DECL_ORIGINAL_TYPE (t) : TREE_TYPE (t), flags); break; @@ -1022,78 +1027,78 @@ dump_decl (tree t, int flags) case VAR_DECL: if (DECL_NAME (t) && VTABLE_NAME_P (DECL_NAME (t))) { - pp_string (cxx_pp, M_("vtable for ")); + pp_string (pp, M_("vtable for ")); gcc_assert (TYPE_P (DECL_CONTEXT (t))); - dump_type (DECL_CONTEXT (t), flags); + dump_type (pp, DECL_CONTEXT (t), flags); break; } /* Else fall through. */ case FIELD_DECL: case PARM_DECL: - dump_simple_decl (t, TREE_TYPE (t), flags); + dump_simple_decl (pp, t, TREE_TYPE (t), flags); break; case RESULT_DECL: - pp_string (cxx_pp, M_(" ")); - dump_simple_decl (t, TREE_TYPE (t), flags); + pp_string (pp, M_(" ")); + dump_simple_decl (pp, t, TREE_TYPE (t), flags); break; case NAMESPACE_DECL: if (flags & TFF_DECL_SPECIFIERS) - pp_cxx_declaration (cxx_pp, t); + pp->declaration (t); else { if (! (flags & TFF_UNQUALIFIED_NAME)) - dump_scope (CP_DECL_CONTEXT (t), flags); + dump_scope (pp, CP_DECL_CONTEXT (t), flags); flags &= ~TFF_UNQUALIFIED_NAME; if (DECL_NAME (t) == NULL_TREE) { - if (!(pp_c_base (cxx_pp)->flags & pp_c_flag_gnu_v3)) - pp_cxx_ws_string (cxx_pp, M_("{anonymous}")); + if (!(pp->flags & pp_c_flag_gnu_v3)) + pp_cxx_ws_string (pp, M_("{anonymous}")); else - pp_cxx_ws_string (cxx_pp, M_("(anonymous namespace)")); + pp_cxx_ws_string (pp, M_("(anonymous namespace)")); } else - pp_cxx_tree_identifier (cxx_pp, DECL_NAME (t)); + pp_cxx_tree_identifier (pp, DECL_NAME (t)); } break; case SCOPE_REF: - dump_type (TREE_OPERAND (t, 0), flags); - pp_string (cxx_pp, "::"); - dump_decl (TREE_OPERAND (t, 1), TFF_UNQUALIFIED_NAME); + dump_type (pp, TREE_OPERAND (t, 0), flags); + pp_colon_colon (pp); + dump_decl (pp, TREE_OPERAND (t, 1), TFF_UNQUALIFIED_NAME); break; case ARRAY_REF: - dump_decl (TREE_OPERAND (t, 0), flags); - pp_cxx_left_bracket (cxx_pp); - dump_decl (TREE_OPERAND (t, 1), flags); - pp_cxx_right_bracket (cxx_pp); + dump_decl (pp, TREE_OPERAND (t, 0), flags); + pp_cxx_left_bracket (pp); + dump_decl (pp, TREE_OPERAND (t, 1), flags); + pp_cxx_right_bracket (pp); break; case ARRAY_NOTATION_REF: - dump_decl (ARRAY_NOTATION_ARRAY (t), flags | TFF_EXPR_IN_PARENS); - pp_cxx_left_bracket (cxx_pp); - dump_decl (ARRAY_NOTATION_START (t), flags | TFF_EXPR_IN_PARENS); - pp_string (cxx_pp, ":"); - dump_decl (ARRAY_NOTATION_LENGTH (t), flags | TFF_EXPR_IN_PARENS); - pp_string (cxx_pp, ":"); - dump_decl (ARRAY_NOTATION_STRIDE (t), flags | TFF_EXPR_IN_PARENS); - pp_cxx_right_bracket (cxx_pp); + dump_decl (pp, ARRAY_NOTATION_ARRAY (t), flags | TFF_EXPR_IN_PARENS); + pp_cxx_left_bracket (pp); + dump_decl (pp, ARRAY_NOTATION_START (t), flags | TFF_EXPR_IN_PARENS); + pp_colon (pp); + dump_decl (pp, ARRAY_NOTATION_LENGTH (t), flags | TFF_EXPR_IN_PARENS); + pp_colon (pp); + dump_decl (pp, ARRAY_NOTATION_STRIDE (t), flags | TFF_EXPR_IN_PARENS); + pp_cxx_right_bracket (pp); break; /* So that we can do dump_decl on an aggr type. */ case RECORD_TYPE: case UNION_TYPE: case ENUMERAL_TYPE: - dump_type (t, flags); + dump_type (pp, t, flags); break; case BIT_NOT_EXPR: /* This is a pseudo destructor call which has not been folded into a PSEUDO_DTOR_EXPR yet. */ - pp_cxx_complement (cxx_pp); - dump_type (TREE_OPERAND (t, 0), flags); + pp_cxx_complement (pp); + dump_type (pp, TREE_OPERAND (t, 0), flags); break; case TYPE_EXPR: @@ -1105,13 +1110,13 @@ dump_decl (tree t, int flags) case IDENTIFIER_NODE: if (IDENTIFIER_TYPENAME_P (t)) { - pp_cxx_ws_string (cxx_pp, "operator"); + pp_cxx_ws_string (pp, "operator"); /* Not exactly IDENTIFIER_TYPE_VALUE. */ - dump_type (TREE_TYPE (t), flags); + dump_type (pp, TREE_TYPE (t), flags); break; } else - pp_cxx_tree_identifier (cxx_pp, t); + pp_cxx_tree_identifier (pp, t); break; case OVERLOAD: @@ -1120,15 +1125,15 @@ dump_decl (tree t, int flags) t = OVL_CURRENT (t); if (DECL_CLASS_SCOPE_P (t)) { - dump_type (DECL_CONTEXT (t), flags); - pp_cxx_colon_colon (cxx_pp); + dump_type (pp, DECL_CONTEXT (t), flags); + pp_cxx_colon_colon (pp); } else if (!DECL_FILE_SCOPE_P (t)) { - dump_decl (DECL_CONTEXT (t), flags); - pp_cxx_colon_colon (cxx_pp); + dump_decl (pp, DECL_CONTEXT (t), flags); + pp_cxx_colon_colon (pp); } - dump_decl (DECL_NAME (t), flags); + dump_decl (pp, DECL_NAME (t), flags); break; } @@ -1139,15 +1144,15 @@ dump_decl (tree t, int flags) case FUNCTION_DECL: if (! DECL_LANG_SPECIFIC (t)) - pp_string (cxx_pp, M_("")); + pp_string (pp, M_("")); else if (DECL_GLOBAL_CTOR_P (t) || DECL_GLOBAL_DTOR_P (t)) - dump_global_iord (t); + dump_global_iord (pp, t); else - dump_function_decl (t, flags); + dump_function_decl (pp, t, flags); break; case TEMPLATE_DECL: - dump_template_decl (t, flags); + dump_template_decl (pp, t, flags); break; case TEMPLATE_ID_EXPR: @@ -1157,71 +1162,71 @@ dump_decl (tree t, int flags) if (is_overloaded_fn (name)) name = DECL_NAME (get_first_fn (name)); - dump_decl (name, flags); - pp_cxx_begin_template_argument_list (cxx_pp); + dump_decl (pp, name, flags); + pp_cxx_begin_template_argument_list (pp); if (args == error_mark_node) - pp_string (cxx_pp, M_("