diff options
author | bstarynk <bstarynk@138bc75d-0d04-0410-961f-82ee72b054a4> | 2009-11-06 22:51:05 +0000 |
---|---|---|
committer | bstarynk <bstarynk@138bc75d-0d04-0410-961f-82ee72b054a4> | 2009-11-06 22:51:05 +0000 |
commit | e1647522f93999450cc558341bb2066ca26e070f (patch) | |
tree | ec9704394836b7bb5123d7d8c1d9647eace77c5d | |
parent | 035ef3e66f39f67a3fab95825e0fbc750bc8160d (diff) | |
download | gcc-e1647522f93999450cc558341bb2066ca26e070f.tar.gz |
2009-11-06 Basile Starynkevitch <basile@starynkevitch.net>
MELT branch merged with trunk rev 153975
git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/branches/melt-branch@153981 138bc75d-0d04-0410-961f-82ee72b054a4
240 files changed, 14683 insertions, 7636 deletions
diff --git a/ChangeLog b/ChangeLog index 414041c12c3..600e9149f73 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,13 @@ +2009-11-06 Ozkan Sezer <sezeroz@gmail.com> + + * configure.ac (FLAGS_FOR_TARGET): Add -L and -isystem + paths for *-w64-mingw* and x86_64-*mingw*. + * configure: Regenerated. + +2009-11-05 Joern Rennecke <amylaar@spamcop.net> + + * MAINTAINERS (Write After Approval): Add entry for my INRIA work. + 2009-11-02 Benjamin Kosnik <bkoz@redhat.com> * MAINTAINERS: Add Jonathan Wakely under Various Maintainers, move diff --git a/ChangeLog.MELT b/ChangeLog.MELT index 80ae228539b..72fb1bc4b7f 100644 --- a/ChangeLog.MELT +++ b/ChangeLog.MELT @@ -1,3 +1,6 @@ +2009-11-06 Basile Starynkevitch <basile@starynkevitch.net> + MELT branch merged with trunk rev 153975 + 2009-11-03 Basile Starynkevitch <basile@starynkevitch.net> MELT branch merged with trunk rev 153838 diff --git a/MAINTAINERS b/MAINTAINERS index e8202b45e50..033eb896c23 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -438,6 +438,7 @@ Dwarakanath Rajagopal dwarak.rajagopal@amd.com Ramana Radhakrishnan ramana.r@gmail.com Rolf Rasmussen rolfwr@gcc.gnu.org Volker Reichelt v.reichelt@netcologne.de +Joern Rennecke amylaar@spamcop.net Bernhard Reutner-Fischer rep.dot.nop@gmail.com Tom Rix trix@redhat.com Craig Rodrigues rodrigc@gcc.gnu.org diff --git a/configure b/configure index 237708fdf31..994c6e77e65 100755 --- a/configure +++ b/configure @@ -7863,8 +7863,9 @@ case " $target_configdirs " in case " $target_configargs " in *" --with-newlib "*) case "$target" in - *-cygwin*) - FLAGS_FOR_TARGET=$FLAGS_FOR_TARGET' -L$$r/$(TARGET_SUBDIR)/winsup -L$$r/$(TARGET_SUBDIR)/winsup/cygwin -L$$r/$(TARGET_SUBDIR)/winsup/w32api/lib -isystem $$s/winsup/include -isystem $$s/winsup/cygwin/include -isystem $$s/winsup/w32api/include' ;; + *-cygwin*) + FLAGS_FOR_TARGET=$FLAGS_FOR_TARGET' -L$$r/$(TARGET_SUBDIR)/winsup -L$$r/$(TARGET_SUBDIR)/winsup/cygwin -L$$r/$(TARGET_SUBDIR)/winsup/w32api/lib -isystem $$s/winsup/include -isystem $$s/winsup/cygwin/include -isystem $$s/winsup/w32api/include' + ;; esac # If we're not building GCC, don't discard standard headers. @@ -7920,12 +7921,17 @@ case " $target_configdirs " in esac ;; esac + case "$target" in -x86_64-*-mingw* | *-w64-mingw*) - ;; -*-mingw*) - # Can't be handled as Cygwin above since Mingw does not use newlib. - FLAGS_FOR_TARGET=$FLAGS_FOR_TARGET' -L$$r/$(TARGET_SUBDIR)/winsup/mingw -L$$r/$(TARGET_SUBDIR)/winsup/w32api/lib -isystem $$s/winsup/mingw/include -isystem $$s/winsup/w32api/include' ;; + x86_64-*mingw* | *-w64-mingw*) + # MinGW-w64 does not use newlib, nor does it use winsup. It may, + # however, use a symlink named 'mingw' in ${prefix} . + FLAGS_FOR_TARGET=$FLAGS_FOR_TARGET' -L${prefix}/${target}/lib -L${prefix}/mingw/lib -isystem ${prefix}/${target}/include -isystem ${prefix}/mingw/include' + ;; + *-mingw*) + # MinGW can't be handled as Cygwin above since it does not use newlib. + FLAGS_FOR_TARGET=$FLAGS_FOR_TARGET' -L$$r/$(TARGET_SUBDIR)/winsup/mingw -L$$r/$(TARGET_SUBDIR)/winsup/w32api/lib -isystem $$s/winsup/mingw/include -isystem $$s/winsup/w32api/include' + ;; esac # Allow the user to override the flags for diff --git a/configure.ac b/configure.ac index 105e5c6fe0e..43b1970e056 100644 --- a/configure.ac +++ b/configure.ac @@ -747,10 +747,10 @@ case "${target}" in i[[3456789]]86-*-mingw*) target_configdirs="$target_configdirs target-winsup" noconfigdirs="$noconfigdirs expect target-libgloss target-newlib ${libgcj}" - ;; + ;; x86_64-*-mingw*) noconfigdirs="$noconfigdirs expect target-libgloss target-newlib ${libgcj}" - ;; + ;; *-*-cygwin*) target_configdirs="$target_configdirs target-libtermcap target-winsup" noconfigdirs="$noconfigdirs target-gperf target-libgloss" @@ -760,7 +760,7 @@ case "${target}" in elif test -d "$srcdir/newlib"; then echo "Warning: winsup/cygwin is missing so newlib can't be built." fi - ;; + ;; i[[3456789]]86-moss-msdos | i[[3456789]]86-*-moss* | \ i[[3456789]]86-*-uwin* | i[[3456789]]86-*-interix* ) ;; @@ -2985,8 +2985,9 @@ case " $target_configdirs " in case " $target_configargs " in *" --with-newlib "*) case "$target" in - *-cygwin*) - FLAGS_FOR_TARGET=$FLAGS_FOR_TARGET' -L$$r/$(TARGET_SUBDIR)/winsup -L$$r/$(TARGET_SUBDIR)/winsup/cygwin -L$$r/$(TARGET_SUBDIR)/winsup/w32api/lib -isystem $$s/winsup/include -isystem $$s/winsup/cygwin/include -isystem $$s/winsup/w32api/include' ;; + *-cygwin*) + FLAGS_FOR_TARGET=$FLAGS_FOR_TARGET' -L$$r/$(TARGET_SUBDIR)/winsup -L$$r/$(TARGET_SUBDIR)/winsup/cygwin -L$$r/$(TARGET_SUBDIR)/winsup/w32api/lib -isystem $$s/winsup/include -isystem $$s/winsup/cygwin/include -isystem $$s/winsup/w32api/include' + ;; esac # If we're not building GCC, don't discard standard headers. @@ -3042,12 +3043,17 @@ case " $target_configdirs " in esac ;; esac + case "$target" in -x86_64-*-mingw* | *-w64-mingw*) - ;; -*-mingw*) - # Can't be handled as Cygwin above since Mingw does not use newlib. - FLAGS_FOR_TARGET=$FLAGS_FOR_TARGET' -L$$r/$(TARGET_SUBDIR)/winsup/mingw -L$$r/$(TARGET_SUBDIR)/winsup/w32api/lib -isystem $$s/winsup/mingw/include -isystem $$s/winsup/w32api/include' ;; + x86_64-*mingw* | *-w64-mingw*) + # MinGW-w64 does not use newlib, nor does it use winsup. It may, + # however, use a symlink named 'mingw' in ${prefix} . + FLAGS_FOR_TARGET=$FLAGS_FOR_TARGET' -L${prefix}/${target}/lib -L${prefix}/mingw/lib -isystem ${prefix}/${target}/include -isystem ${prefix}/mingw/include' + ;; + *-mingw*) + # MinGW can't be handled as Cygwin above since it does not use newlib. + FLAGS_FOR_TARGET=$FLAGS_FOR_TARGET' -L$$r/$(TARGET_SUBDIR)/winsup/mingw -L$$r/$(TARGET_SUBDIR)/winsup/w32api/lib -isystem $$s/winsup/mingw/include -isystem $$s/winsup/w32api/include' + ;; esac # Allow the user to override the flags for diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 6ab0707c897..d0df7de35ed 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,599 @@ +2009-11-06 Michael Matz <matz@suse.de> + + PR middle-end/41963 + * tree-ssa-math-opts.c (execute_cse_reciprocals): Check all uses + of a potential reciprocal to really be reciprocals. + +2009-11-06 Jakub Jelinek <jakub@redhat.com> + + * config/i386/x86intrin.h: Include fma4intrin.h, xopintrin.h and + lwpintrin.h after immintrin.h. + * config/i386/fma4intrin.h (__v8sf, __v4df, __m256, __m256d): Remove + typedefs. + + PR middle-end/41935 + * c-common.c (fold_offsetof_1) <case ARRAY_REF>: Don't crash for VLAs + or non-constant index, allow index one past the last element and + allow exceeding array bound in arrays that might be used as flexible + array members. + +2009-11-05 Richard Henderson <rth@redhat.com> + + * config/i386/ia32intrin.h: Protect CRC32 builtins with __SSE4_2__. + +2009-11-05 Paul Brook <paul@codesourcery.com> + + * config/arm/arm.c (arm_fp_model, arm_fpu_arch, arm_fpu_tune): Remove. + (arm_fpu_desc): New. + (all_fpus): Add FPU details. + (fp_model_for_fpu): Remove. + (arm_override_options): Set and use arm_fpu_desc and arm_fpu_attr. + (arm_output_epilogue, arm_save_coproc_regs): Use TARGET_FPA_EMU2. + (arm_file_start): Use arm_fpu_desc. + * config/arm/arm.h (TARGET_FPA, TARGET_MAVERICK, TARGET_VFP, + TARGET_VFPD32, TARGET_VFP3, TARGET_NEON_FP16, TARGET_NEON): Use + arm_fpu_desc. + (TARGET_FPA_EMU2): Define. + (arm_fp_model, fputype, arm_fpu_tune): Remove. + (vfp_reg_type, arm_fpu_desc): New. + * config/arm/arm.md (attr fpu): Simplify. + * config/arm/fpa.md (movxf_fpa): Use TARGET_FPA_EMU2. + * config/arm/linux-elf.h (FPUTYPE_DEFAULT): Use string value. + * config/arm/bpabi.h (FPUTYPE_DEFAULT): Use string value. + * config/arm/netbsd-elf.h (FPUTYPE_DEFAULT): Use string value. + * config/arm/vxworks.h (FPUTYPE_DEFAULT): Use string value. + +2009-11-05 Michael Matz <matz@suse.de> + + * config/i386/i386.c (ix86_builtin_reciprocal): Remove dependency + on TARGET_RECIP. + * doc/invoke.texi (-mrecip): Clarify that we don't need -mrecip + for 1/sqrtf. + +2009-11-04 Jason Merrill <jason@redhat.com> + + PR c++/36912 + * varasm.c (initializer_constant_valid_p): A PLUS_EXPR + or MINUS_EXPR of REAL_TYPE is not a valid constant initializer. + (output_constant): Avoid crash after error. + +2009-11-05 Martin Jambor <mjambor@suse.cz> + + * tree-sra.c (struct access): Changed comment of next_sibling field. + (analyze_modified_params): Loop over accesses of a group rather than + over all with the ame base, pass a common bitmap to + walk_aliased_vdefs. + (unmodified_by_ref_scalar_representative): Build link lists of + accesses of a group. + (splice_param_accesses): Likewise. + +2009-11-04 Kenneth Zadeck <zadeck@naturalbridge.com> + + * df-scan.c (df-uses-record): Add case zero_extract of mem. + +2009-11-04 Eric Botcazou <ebotcazou@adacore.com> + + PR target/10127 + PR ada/20548 + * config/i386/i386.md (probe_stack): New expander. + (logical operation peepholes): Do not split stack checking probes. + +2009-11-04 Harsha Jagasia <harsha.jagasia@amd.com> + Dwarakanath Rajagopal <dwarak.rajagopal@amd.com> + + * doc/invoke.texi (-mlwp): Add documentation. + * doc/extend.texi (x86 intrinsics): Add LWP intrinsics. + * config.gcc (i[34567]86-*-*): Include lwpintrin.h. + (x86_64-*-*): Ditto. + * config/i386/lwpintrin.h: New file, provide x86 compiler + intrinisics for LWP. + * config/i386/cpuid.h (bit_LWP): Define LWP bit. + * config/i386/x86intrin.h: Add LWP check and lwpintrin.h. + * config/i386/i386-c.c (ix86_target_macros_internal): Check + ISA_FLAG for LWP. + * config/i386/i386.h (TARGET_LWP): New macro for LWP. + * config/i386/i386.opt (-mlwp): New switch for LWP support. + * config/i386/i386.c (OPTION_MASK_ISA_LWP_SET): New. + (OPTION_MASK_ISA_LWP_UNSET): New. + (ix86_handle_option): Handle -mlwp. + (isa_opts): Handle -mlwp. + (enum pta_flags): Add PTA_LWP. + (override_options): Add LWP support. + (IX86_BUILTIN_LLWPCB16): New for LWP intrinsic. + (IX86_BUILTIN_LLWPCB32): Ditto. + (IX86_BUILTIN_LLWPCB64): Ditto. + (IX86_BUILTIN_SLWPCB16): Ditto. + (IX86_BUILTIN_SLWPCB32): Ditto. + (IX86_BUILTIN_SLWPCB64): Ditto. + (IX86_BUILTIN_LWPVAL16): Ditto. + (IX86_BUILTIN_LWPVAL32): Ditto. + (IX86_BUILTIN_LWPVAL64): Ditto. + (IX86_BUILTIN_LWPINS16): Ditto. + (IX86_BUILTIN_LWPINS32): Ditto. + (IX86_BUILTIN_LWPINS64): Ditto. + (enum ix86_special_builtin_type): Add LWP intrinsic support. + (builtin_description): Ditto. + (ix86_init_mmx_sse_builtins): Ditto. + (ix86_expand_special_args_builtin): Ditto. + * config/i386/i386.md (UNSPEC_LLWP_INTRINSIC): Add new UNSPEC for + LWP support. + (UNSPEC_SLWP_INTRINSIC): Ditto. + (UNSPECV_LWPVAL_INTRINSIC): Ditto. + (UNSPECV_LWPINS_INTRINSIC): Ditto. + (lwp_llwpcbhi1): New lwp pattern. + (lwp_llwpcbsi1): Ditto. + (lwp_llwpcbdi1): Ditto. + (lwp_slwpcbhi1): Ditto. + (lwp_slwpcbsi1): Ditto. + (lwp_slwpcbdi1): Ditto. + (lwp_lwpvalhi3): Ditto. + (lwp_lwpvalsi3): Ditto. + (lwp_lwpvaldi3): Ditto. + (lwp_lwpinshi3): Ditto. + (lwp_lwpinssi3): Ditto. + (lwp_lwpinsdi3): Ditto. + +2009-11-04 Andrew Pinski <andrew_pinski@playstation.sony.com> + Trevor Smigiel <Trevor_Smigiel@playstation.sony.com> + + PR rtl-opt/41833 + * simplify-rtx.c (simplify_binary_operation_1): Simplify vec_select of + a vec_duplicate. + +2009-11-04 Richard Guenther <rguenther@suse.de> + Rafael Avila de Espindola <espindola@google.com> + + * gcc.c (process_command): Handle arguments name@offset. + +2009-11-04 Harsha Jagasia <harsha.jagasia@amd.com> + Dwarakanath Rajagopal <dwarak.rajagopal@amd.com> + + * config.gcc (i[34567]86-*-*): Include xopintrin.h. + (x86_64-*-*): Ditto. + * config/i386/xopintrin.h: New file, provide common x86 compiler + intrinisics for XOP. + * config/i386/cpuid.h (bit_XOP): Define XOP bit. + * config/i386/x86intrin.h: Add XOP check and xopintrin.h. + * config/i386/i386-c.c(ix86_target_macros_internal): Check + ISA_FLAG for XOP. + * config/i386/i386.h(TARGET_XOP): New macro for XOP. + * config/i386/i386.opt (-mxop): New switch for XOP support. + * config/i386/i386.md (UNSPEC_XOP_UNSIGNED_CMP) + (UNSPEC_XOP_TRUEFALSE) + (UNSPEC_XOP_PERMUTE) + (UNSPEC_FRCZ): Add new UNSPEC for XOP support. + (PPERM_*): New constants for vpperm instruction. + (xop_pcmov_<mode>): Add XOP conditional mov instructions. + * config/i386/i386.c (OPTION_MASK_ISA_XOP_SET): New. + (OPTION_MASK_ISA_XOP_UNSET): New. + (OPTION_MASK_ISA_XOP_UNSET): Change definition to + depend on XOP. + (ix86_handle_option): Handle -mxop. + (isa_opts): Handle -mxop. + (enum pta_flags): Add PTA_XOP. + (override_options): Add XOP support. + (print_operand): Add code for XOP compare instructions. + (ix86_expand_sse_movcc): Extend for XOP conditional move instruction. + (ix86_expand_int_vcond): Extend for XOP compare instruction. + + (IX86_BUILTIN_VPCMOV): New for XOP intrinsic. + (IX86_BUILTIN_VPCMOV_V2DI): Ditto. + (IX86_BUILTIN_VPCMOV_V4SI): Ditto. + (IX86_BUILTIN_VPCMOV_V8HI): Ditto. + (IX86_BUILTIN_VPCMOV_V16QI): Ditto. + (IX86_BUILTIN_VPCMOV_V4SF): Ditto. + (IX86_BUILTIN_VPCMOV_V2DF): Ditto. + + (IX86_BUILTIN_VPCMOV256): Ditto. + (IX86_BUILTIN_VPCMOV_V4DI256): Ditto. + (IX86_BUILTIN_VPCMOV_V8SI256): Ditto. + (IX86_BUILTIN_VPCMOV_V16HI256): Ditto. + (IX86_BUILTIN_VPCMOV_V32QI256): Ditto. + (IX86_BUILTIN_VPCMOV_V8SF256): Ditto. + (IX86_BUILTIN_VPCMOV_V4DF256): Ditto. + + (IX86_BUILTIN_VPPERM): Ditto. + + (IX86_BUILTIN_VPMACSSWW): Ditto. + (IX86_BUILTIN_VPMACSWW): Ditto. + (IX86_BUILTIN_VPMACSSWD): Ditto. + (IX86_BUILTIN_VPMACSWD): Ditto. + (IX86_BUILTIN_VPMACSSDD): Ditto. + (IX86_BUILTIN_VPMACSDD): Ditto. + (IX86_BUILTIN_VPMACSSDQL): Ditto. + (IX86_BUILTIN_VPMACSSDQH): Ditto. + (IX86_BUILTIN_VPMACSDQL): Ditto. + (IX86_BUILTIN_VPMACSDQH): Ditto. + (IX86_BUILTIN_VPMADCSSWD): Ditto. + (IX86_BUILTIN_VPMADCSWD): Ditto. + + (IX86_BUILTIN_VPHADDBW): Ditto. + (IX86_BUILTIN_VPHADDBD): Ditto. + (IX86_BUILTIN_VPHADDBQ): Ditto. + (IX86_BUILTIN_VPHADDWD): Ditto. + (IX86_BUILTIN_VPHADDWQ): Ditto. + (IX86_BUILTIN_VPHADDDQ): Ditto. + (IX86_BUILTIN_VPHADDUBW): Ditto. + (IX86_BUILTIN_VPHADDUBD): Ditto. + (IX86_BUILTIN_VPHADDUBQ): Ditto. + (IX86_BUILTIN_VPHADDUWD): Ditto. + (IX86_BUILTIN_VPHADDUWQ): Ditto. + (IX86_BUILTIN_VPHADDUDQ): Ditto. + (IX86_BUILTIN_VPHSUBBW): Ditto. + (IX86_BUILTIN_VPHSUBWD): Ditto. + (IX86_BUILTIN_VPHSUBDQ): Ditto. + + (IX86_BUILTIN_VPROTB): Ditto. + (IX86_BUILTIN_VPROTW): Ditto. + (IX86_BUILTIN_VPROTD): Ditto. + (IX86_BUILTIN_VPROTQ): Ditto. + (IX86_BUILTIN_VPROTB_IMM): Ditto. + (IX86_BUILTIN_VPROTW_IMM): Ditto. + (IX86_BUILTIN_VPROTD_IMM): Ditto. + (IX86_BUILTIN_VPROTQ_IMM): Ditto. + + (IX86_BUILTIN_VPSHLB): Ditto. + (IX86_BUILTIN_VPSHLW): Ditto. + (IX86_BUILTIN_VPSHLD): Ditto. + (IX86_BUILTIN_VPSHLQ): Ditto. + (IX86_BUILTIN_VPSHAB): Ditto. + (IX86_BUILTIN_VPSHAW): Ditto. + (IX86_BUILTIN_VPSHAD): Ditto. + (IX86_BUILTIN_VPSHAQ): Ditto. + + (IX86_BUILTIN_VFRCZSS): Ditto. + (IX86_BUILTIN_VFRCZSD): Ditto. + (IX86_BUILTIN_VFRCZPS): Ditto. + (IX86_BUILTIN_VFRCZPD): Ditto. + (IX86_BUILTIN_VFRCZPS256): Ditto. + (IX86_BUILTIN_VFRCZPD256): Ditto. + + (IX86_BUILTIN_VPCOMEQUB): Ditto. + (IX86_BUILTIN_VPCOMNEUB): Ditto. + (IX86_BUILTIN_VPCOMLTUB): Ditto. + (IX86_BUILTIN_VPCOMLEUB): Ditto. + (IX86_BUILTIN_VPCOMGTUB): Ditto. + (IX86_BUILTIN_VPCOMGEUB): Ditto. + (IX86_BUILTIN_VPCOMFALSEUB): Ditto. + (IX86_BUILTIN_VPCOMTRUEUB): Ditto. + + (IX86_BUILTIN_VPCOMEQUW): Ditto. + (IX86_BUILTIN_VPCOMNEUW): Ditto. + (IX86_BUILTIN_VPCOMLTUW): Ditto. + (IX86_BUILTIN_VPCOMLEUW): Ditto. + (IX86_BUILTIN_VPCOMGTUW): Ditto. + (IX86_BUILTIN_VPCOMGEUW): Ditto. + (IX86_BUILTIN_VPCOMFALSEUW): Ditto. + (IX86_BUILTIN_VPCOMTRUEUW): Ditto. + + (IX86_BUILTIN_VPCOMEQUD): Ditto. + (IX86_BUILTIN_VPCOMNEUD): Ditto. + (IX86_BUILTIN_VPCOMLTUD): Ditto. + (IX86_BUILTIN_VPCOMLEUD): Ditto. + (IX86_BUILTIN_VPCOMGTUD): Ditto. + (IX86_BUILTIN_VPCOMGEUD): Ditto. + (IX86_BUILTIN_VPCOMFALSEUD): Ditto. + (IX86_BUILTIN_VPCOMTRUEUD): Ditto. + + (IX86_BUILTIN_VPCOMEQUQ): Ditto. + (IX86_BUILTIN_VPCOMNEUQ): Ditto. + (IX86_BUILTIN_VPCOMLTUQ): Ditto. + (IX86_BUILTIN_VPCOMLEUQ): Ditto. + (IX86_BUILTIN_VPCOMGTUQ): Ditto. + (IX86_BUILTIN_VPCOMGEUQ): Ditto. + (IX86_BUILTIN_VPCOMFALSEUQ): Ditto. + (IX86_BUILTIN_VPCOMTRUEUQ): Ditto. + + (IX86_BUILTIN_VPCOMEQB): Ditto. + (IX86_BUILTIN_VPCOMNEB): Ditto. + (IX86_BUILTIN_VPCOMLTB): Ditto. + (IX86_BUILTIN_VPCOMLEB): Ditto. + (IX86_BUILTIN_VPCOMGTB): Ditto. + (IX86_BUILTIN_VPCOMGEB): Ditto. + (IX86_BUILTIN_VPCOMFALSEB): Ditto. + (IX86_BUILTIN_VPCOMTRUEB): Ditto. + + (IX86_BUILTIN_VPCOMEQW): Ditto. + (IX86_BUILTIN_VPCOMNEW): Ditto. + (IX86_BUILTIN_VPCOMLTW): Ditto. + (IX86_BUILTIN_VPCOMLEW): Ditto. + (IX86_BUILTIN_VPCOMGTW): Ditto. + (IX86_BUILTIN_VPCOMGEW): Ditto. + (IX86_BUILTIN_VPCOMFALSEW): Ditto. + (IX86_BUILTIN_VPCOMTRUEW): Ditto. + + (IX86_BUILTIN_VPCOMEQD): Ditto. + (IX86_BUILTIN_VPCOMNED): Ditto. + (IX86_BUILTIN_VPCOMLTD): Ditto. + (IX86_BUILTIN_VPCOMLED): Ditto. + (IX86_BUILTIN_VPCOMGTD): Ditto. + (IX86_BUILTIN_VPCOMGED): Ditto. + (IX86_BUILTIN_VPCOMFALSED): Ditto. + (IX86_BUILTIN_VPCOMTRUED): Ditto. + + (IX86_BUILTIN_VPCOMEQQ): Ditto. + (IX86_BUILTIN_VPCOMNEQ): Ditto. + (IX86_BUILTIN_VPCOMLTQ): Ditto. + (IX86_BUILTIN_VPCOMLEQ): Ditto. + (IX86_BUILTIN_VPCOMGTQ): Ditto. + (IX86_BUILTIN_VPCOMGEQ): Ditto. + (IX86_BUILTIN_VPCOMFALSEQ): Ditto. + (IX86_BUILTIN_VPCOMTRUEQ): Ditto. + + (enum multi_arg_type): New enum for describing the various XOP + intrinsic argument types. + (bdesc_multi_arg): New table for XOP intrinsics. + (ix86_init_mmx_sse_builtins): Add XOP intrinsic support. + (ix86_expand_multi_arg_builtin): New function for creating XOP + intrinsics. + + * config/i386/sse.md (sserotatemax): New mode attribute for XOP. + (xop_pmacsww): Ditto. + (xop_pmacssww): Ditto. + (xop_pmacsdd): Ditto. + (xop_pmacssdd): Ditto. + (xop_pmacssdql): Ditto. + (xop_pmacssdqh): Ditto. + (xop_pmacsdql): Ditto. + (xop_pmacsdql_mem): Ditto. + (xop_mulv2div2di3_low): Ditto. + (xop_pmacsdqh): Ditto. + (xop_pmacsdqh_mem): Ditto. + (xop_mulv2div2di3_high): Ditto. + (xop_pmacsswd): Ditto. + (xop_pmacswd): Ditto. + (xop_pmadcsswd): Ditto. + (xop_pmadcswd): Ditto. + (xop_pcmov_<mode>): Ditto. + (xop_pcmov_<mode>)256: Ditto. + (xop_phaddbw): Ditto. + (xop_phaddbd): Ditto. + (xop_phaddbq): Ditto. + (xop_phaddwd): Ditto. + (xop_phaddwq): Ditto. + (xop_phadddq): Ditto. + (xop_phaddubw): Ditto. + (xop_phaddubd): Ditto. + (xop_phaddubq): Ditto. + (xop_phadduwd): Ditto. + (xop_phadduwq): Ditto. + (xop_phaddudq): Ditto. + (xop_phsubbw): Ditto. + (xop_phsubwd): Ditto. + (xop_phsubdq): Ditto. + (xop_pperm): Ditto. + (rotl<mode>3): Ditto. + (rotr<mode>3): Ditto. + (xop_rotl<mode>3): Ditto. + (xop_rotr<mode>3): Ditto. + (vrotr<mode>3): Ditto. + (vrotl<mode>3): Ditto. + (xop_vrotl<mode>3): Ditto. + (vlshr<mode>3): Ditto. + (vashr<mode>3): Ditto. + (vashl<mode>3 + (xop_ashl<mode>3): Ditto. + (xop_lshl<mode>3): Ditto. + (ashlv16qi3): Ditto. + (lshlv16qi3): Ditto. + (ashrv16qi3): Ditto. + (ashrv2di3): Ditto. + (xop_frcz<mode>2): Ditto. + (xop_vmfrcz<mode>2): Ditto. + (xop_frcz<mode>2256): Ditto. + (xop_maskcmp<mode>3): Ditto. + (xop_maskcmp_uns<mode>3): Ditto. + (xop_maskcmp_uns2<mode>3): Ditto. + (xop_pcom_tf<mode>3): Ditto. + + * doc/invoke.texi (-mxop): Add documentation. + * doc/extend.texi (x86 intrinsics): Add XOP intrinsics. + +2009-11-03 Mark Mitchell <mark@codesourcery.com> + + PR driver/11810 + * gcc.c (SWITCHES_NEED_SPACES): Define to "o". + * config/alpha/osf.h (SWITCHES_NEED_SPACES): Remove here. + * config/mips/iris.h (SWITCHES_NEED_SPACES): Remove here. + +2009-11-04 Richard Earnshaw <rearnsha@arm.com> + + PR target/40835 + * arm.md (peephole2 patterns for move and compare): New. + +2009-11-04 Nick Clifton <nickc@redhat.com> + + * defaults.h (CONSTANT_ADDRESS_P): Provide a default definition. + Make sure that it does not allow CONST_DOUBLEs. + * doc/tm.texi (CONSTANT_ADDRESS_P): Update description. + * config/avr/avr.h (CONSTANT_ADDRESS_P): Delete. + * config/bfin/bfin.h (CONSTANT_ADDRESS_P): Delete. + * config/cris/cris.h (CONSTANT_ADDRESS_P): Delete. + * config/fr30/fr30.h (CONSTANT_ADDRESS_P): Delete. + * config/frv/frv.h (CONSTANT_ADDRESS_P): Delete. + * config/m32c/m32c.h (CONSTANT_ADDRESS_P): Delete. + * config/m68hc11/m68hc11.h (CONSTANT_ADDRESS_P): Delete. + * config/mep/mep.h (CONSTANT_ADDRESS_P): Delete. + * config/mn10300/mn10300.h (CONSTANT_ADDRESS_P): Delete. + * config/moxie/moxie.h (CONSTANT_ADDRESS_P): Delete. + * config/pdp11/pdp11.h (CONSTANT_ADDRESS_P): Delete. + * config/picochip/picochip.h (CONSTANT_ADDRESS_P): Delete. + * config/score/score.h (CONSTANT_ADDRESS_P): Delete. + * config/stormy16/stormy16.h (CONSTANT_ADDRESS_P): Delete. + +2009-11-04 Richard Guenther <rguenther@suse.de> + + PR tree-optimization/41919 + * tree-vrp.c (test_for_singularity): Properly compare values. + +2009-11-04 Revital Eres <eres@il.ibm.com> + + * tree-vect-data-refs.c (vect_enhance_data_refs_alignment): + Consider peeling for alignment only for stores and remove + redundant assignment. + +2009-11-04 Maxim Kuvyrkov <maxim@codesourcery.com> + + PR target/41302 + * config/m68k/m68k.c (m68k_reg_present_p): New static function. + (m68k_ok_for_sibcall_p): Handle different result return locations. + +2009-11-04 Richard Guenther <rguenther@suse.de> + + * c-opts.c (c_common_post_options): Move LTO option processing + code ... + * opts.c (decode_options): ... here. + +2009-11-04 Jakub Jelinek <jakub@redhat.com> + + * c-common.c (fold_offsetof_1): Use %wd instead of + HOST_WIDE_INT_PRINT_DEC. + +2009-11-04 Maciej W. Rozycki <macro@linux-mips.org> + + * config/vax/linux.h (TARGET_OS_CPP_BUILTINS): Don't define + __pic__ or __PIC__. + +2009-11-04 Maciej W. Rozycki <macro@linux-mips.org> + + * config.gcc (vax-*-linux*): Keep the original contents of + tmake_file while adding vax/t-linux. + +2009-11-03 Eric Botcazou <ebotcazou@adacore.com> + + PR target/10127 + PR ada/20548 + * expr.h (STACK_CHECK_PROBE_INTERVAL): Delete. + (STACK_CHECK_PROBE_INTERVAL_EXP): New macro. + (STACK_CHECK_MOVING_SP): Likewise. + * system.h (STACK_CHECK_PROBE_INTERVAL): Poison it. + * doc/tm.texi (Stack Checking): Delete STACK_CHECK_PROBE_INTERVAL. + Document STACK_CHECK_PROBE_INTERVAL_EXP and STACK_CHECK_MOVING_SP. + * doc/md.texi (Standard Pattern Names): Tweak entry of CHECK_STACK. + Document PROBE_STACK. + * explow.c (anti_adjust_stack_and_probe): New function. + (allocate_dynamic_stack_space): Do not directly allocate space if + STACK_CHECK_MOVING_SP, instead invoke above function. + (emit_stack_probe): Handle probe_stack insn. + (PROBE_INTERVAL): New macro. + (STACK_GROW_OPTAB): Likewise. + (STACK_GROW_OFF): Likewise. + (probe_stack_range): Use Pmode and memory_address consistently. Fix + loop condition in the small constant case. Rewrite in the general + case to be immune to wraparounds. Make sure the address of probes + is valid. Try to use [base + disp] addressing mode if possible. + * ira.c (setup_eliminable_regset): Set frame_pointer_needed if stack + checking is enabled and STACK_CHECK_MOVING_SP. + * rtlanal.c (may_trap_p_1) <MEM>: If stack checking is enabled, + return 1 for volatile references to the stack pointer. + * tree.c (build_common_builtin_nodes): Do not set ECF_NOTHROW on + __builtin_alloca if stack checking is enabled. + * unwind-dw2.c (uw_identify_context): Take into account whether the + context is that of a signal frame or not. + * config/i386/linux.h (STACK_CHECK_MOVING_SP): Define to 1. + * config/i386/linux64.h (STACK_CHECK_MOVING_SP): Likewise. + +2009-11-03 Jakub Jelinek <jakub@redhat.com> + + PR rtl-optimization/41917 + * rtlanal.c (num_sign_bit_copies1) <case UMOD>: If sign bit of second + operand isn't known to be 0, return 1. + +2009-11-03 Richard Sandiford <rdsandiford@googlemail.com> + + * config/mips/mips.md: Fix typos. + +2009-11-03 Richard Sandiford <rdsandiford@googlemail.com> + + * doc/invoke.texi: Fix typo. + +2009-11-03 Paul Brook <paul@codesourcery.com> + + * config/arm/neon.ml (vectype): Add T_floatSF. + (string_of_vectype): Ditto. + * config/arm/neon-gen.ml (signed_ctype): Add T_floatSF. + (deftypes): Use float for float32_t. + * config/arm/arm_neon.h: Regenerate. + +2009-11-03 Nick Clifton <nickc@redhat.com> + Kevin Buettner <kevinb@redhat.com> + + * config/rx/predicates.md (rx_store_multiple_vector): Reverse + order of expected registers. + (rx_load_multiple_vector): Likewise. + (rx_rtsd_vector): Likewise. + * config/rx/rx.c (rx_cpu_type): New variable. + (rx_print_operand): Fix bug printing 64-bit constant values. + (rx_emit_stack_pushm): Reverse order of pushed registers. + (gen_rx_store_vector): Likewise. + (is_fast_interrupt_func): Only accept "fast_interrupt" as the + attribute name. + (is_exception_func): Rename to is_interrupt_func and only accept + "interrupt" as the attribute name. + (rx_get_stack_layout): Use new function name. + (rx_func_attr_inlinable): Likewise. + (rx_attribute_table): Remove "exception". + (rx_expand_prologue): If necessary push the accumulator register + in the prologue of interrupt functions. + (rx_expand_epilogue): If necessary pop the accumulator. + (rx_builtins): Add RX_BUILTIN_MVTIPL. + (rx_expand_builtin_stz): Remove. + (rx_expand_builtin_mvtipl): New function. + (rx_init_builtins): Handle RX_BUILTIN_MVTIPL. + (rx_expand_builtin): Likewise. + (rx_enable_fpu): New variable. + (rx_handle_option): Handle -fpu, -nofpu, -mcpu and -patch. + * config/rx/rx.h (TARGET_CPU_CPP_BUILTINS): Assert machine based + on rx_cpu_type. Define __RX_FPU_INSNS__ if FPU insns are allowed. + (enum rx_cpu_types): Define. + (ASM_SPEC): Pass -m32bit-doubles on to assembler. + (INCOMING_FRAME_SP_OFFSET): Define. + (ARG_POINTER_CFA_OFFSET): Define. + (FRAME_POINTER_CFA_OFFSET): Define. + (OVERRIDE_OPTIONS): Enable fast math if RX FPU insns are enabled. + (ALLOW_RX_FPU_INSNS): Define. + * config/rx/rx.md: Test ALLOW_RX_FPU_INSNS instead of + fast_math_flags_set_p. + (UNSPEC_BUILTIN_MVTIPL): Define. + (revl): Rename to bswapsi2. + (bswaphi2): New pattern. + (mvtachi): Mark as volatile because it uses a register unknown to GCC. + (mvtaclo): Likewise. + (racw): Likewise. + (mvtc): Remove clobber of cc0. + (mvtcp): Delete. + (opecp): Delete. + * config/rx/rx.opt (mieee): Remove. + (fpu): Add. + (nofpu): Add. + (mcpu=): Add. + (patch=): Add. + (msave-acc-in-interrupts): Add. + * config/rx/t-rx (MULTILIB_OPTIONS): Change default to 64bit doubles. + (MULTILIB_DIRS): Likewise. + (MULTILIB_MATCHES): Treat -fpu as an alias for -m32bit-doubles. + * doc/extend.texi: Remove description of "exception" function + attribute. + * doc/invoke.texi: Document -fpu, -nofpu, -mcpu=, -patch= and + -msave-acc-in-interrupts options. + +2009-11-03 Richard Guenther <rguenther@suse.de> + + * c-common.c (fold_offsetof_1): Use HOST_WIDE_INT_PRINT_DEC. + +2009-11-03 Dodji Seketeli <dodji@redhat.com> + + PR c++/38699 + * c-common.c (fold_offsetof_1): Issue errors when the member designator + of the offsetof expression is not legitimate. + +2009-11-03 Uros Bizjak <ubizjak@gmail.com> + + * config/i386/i386.md (*call_value_1_rex64_ms_sysv): Use register + names instead of numerical constants. + (sse_prologue_save): Ditto. + (*sse_prologue_save_insn): Ditto. + 2009-11-03 Uros Bizjak <ubizjak@gmail.com> PR target/41900 @@ -5,13 +601,13 @@ (TARGET_CALL_ESP): New define. * config/i386/i386.c (initial_ix86_tune_features): Initialize X86_ARCH_CALL_ESP. - * config/i386/i386.md - (*call_pop_1_esp, *call_1_esp, *call_value_pop_1_esp, - *call_value_1_esp): Rename from *call_pop_1, *call_1, - *call_value_pop_1 and *call_value_1. Depend on TARGET_CALL_ESP. + * config/i386/i386.md (*call_pop_1_esp, *call_1_esp, + *call_value_pop_1_esp, *call_value_1_esp): Rename from *call_pop_1, + *call_1, *call_value_pop_1 and *call_value_1. Depend on + TARGET_CALL_ESP. (*call_pop_1, *call_1, *call_value_pop_1, *call_value_1): New patterns, use "lsm" as operand 1 constraint. - * config/i386/predicates.md (call_insn_operand): Depend on + * config/i386/predicates.md (call_insn_operand): Depend on index_register_operand for !TARGET_CALL_ESP to avoid %esp register. 2009-11-02 Ulrich Weigand <Ulrich.Weigand@de.ibm.com> @@ -76,8 +672,7 @@ * config/mn10300/mn10300.c (mn10300_function_value): Make static, add new 'outgoing' argument. - (mn10300_libcall_value, mn10300_function_value_regno_p): New - functions. + (mn10300_libcall_value, mn10300_function_value_regno_p): New functions. (TARGET_FUNCTION_VALUE, TARGET_LIBCALL_VALUE): Declare. * config/mn10300/mn10300.h: (FUNCTION_VALUE, FUNCTION_OUTGOING_VALUE, LIBCALL_VALUE): Remove. @@ -186,7 +781,7 @@ PR target/38018 * doc/tm.texi (OVERRIDE_OPTIONS): Update. (TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE): New. - * optc-gen.awk (cl_target_option_restore): Include call to + * optc-gen.awk (cl_target_option_restore): Include call to targetm.override_options_after_change. * target-def.h (TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE): New. * target.h (override_options_after_change): New. @@ -210,7 +805,7 @@ 2009-10-29 Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> * config/arm/arm.c (find_best_start): Fix type of remainder to be - unsigned . + unsigned. 2009-10-29 Martin Jambor <mjambor@suse.cz> diff --git a/gcc/DATESTAMP b/gcc/DATESTAMP index 07f35714d86..459615d7a95 100644 --- a/gcc/DATESTAMP +++ b/gcc/DATESTAMP @@ -1 +1 @@ -20091103 +20091106 diff --git a/gcc/Makefile.in b/gcc/Makefile.in index fa278fe791b..68fc7d77a1e 100644 --- a/gcc/Makefile.in +++ b/gcc/Makefile.in @@ -2005,7 +2005,7 @@ c-convert.o : c-convert.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) \ c-pragma.o: c-pragma.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) $(RTL_H) \ $(TREE_H) $(FUNCTION_H) $(C_PRAGMA_H) $(TOPLEV_H) output.h $(GGC_H) $(TM_P_H) \ $(C_COMMON_H) $(TARGET_H) gt-c-pragma.h $(CPPLIB_H) $(FLAGS_H) $(DIAGNOSTIC_H) \ - opts.h + opts.h $(PLUGINS_H) graph.o: graph.c $(SYSTEM_H) coretypes.h $(TM_H) $(TOPLEV_H) $(FLAGS_H) output.h \ $(RTL_H) $(FUNCTION_H) hard-reg-set.h $(BASIC_BLOCK_H) graph.h $(OBSTACK_H) \ $(CONFIG_H) @@ -4330,7 +4330,8 @@ PLUGIN_HEADERS = $(TREE_H) $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) \ $(host_xm_file_list) $(host_xm_include_list) $(xm_include_list) \ intl.h $(PLUGIN_VERSION_H) $(DIAGNOSTIC_H) $(C_COMMON_H) $(C_PRETTY_PRINT_H) \ tree-iterator.h $(PLUGIN_H) $(TREE_FLOW_H) langhooks.h incpath.h \ - tree-ssa-sccvn.h real.h output.h $(IPA_UTILS_H) + tree-ssa-sccvn.h real.h output.h $(IPA_UTILS_H) \ + $(C_PRAGMA_H) $(CPPLIB_H) $(FUNCTION_H) # Install the headers needed to build a plugin. install-plugin: installdirs lang.install-plugin diff --git a/gcc/ada/ChangeLog b/gcc/ada/ChangeLog index 4dc41876af4..8c98429a2f3 100644 --- a/gcc/ada/ChangeLog +++ b/gcc/ada/ChangeLog @@ -1,3 +1,12 @@ +2009-11-05 Eric Botcazou <ebotcazou@adacore.com> + + * gcc-interface/utils.c (gnat_type_for_mode): Handle vector modes. + +2009-11-05 Eric Botcazou <ebotcazou@adacore.com> + + * gcc-interface/trans.c (lvalue_required_p) <N_Unchecked_Conversion>: + New case. + 2009-10-30 Eric Botcazou <ebotcazou@adacore.com> * gcc-interface/utils.c (MAX_FIXED_MODE_SIZE): Delete. diff --git a/gcc/ada/gcc-interface/trans.c b/gcc/ada/gcc-interface/trans.c index 58afbfddac6..41be8bb77af 100644 --- a/gcc/ada/gcc-interface/trans.c +++ b/gcc/ada/gcc-interface/trans.c @@ -657,17 +657,16 @@ gigi (Node_Id gnat_root, int max_gnat_node, int number_name, error_gnat_node = Empty; } -/* Return a positive value if an lvalue is required for GNAT_NODE. - GNU_TYPE is the type that will be used for GNAT_NODE in the - translated GNU tree. CONSTANT indicates whether the underlying - object represented by GNAT_NODE is constant in the Ada sense, - ALIASED whether it is aliased (but the latter doesn't affect - the outcome if CONSTANT is not true). - - The function climbs up the GNAT tree starting from the node and - returns 1 upon encountering a node that effectively requires an - lvalue downstream. It returns int instead of bool to facilitate - usage in non purely binary logic contexts. */ +/* Return a positive value if an lvalue is required for GNAT_NODE. GNU_TYPE + is the type that will be used for GNAT_NODE in the translated GNU tree. + CONSTANT indicates whether the underlying object represented by GNAT_NODE + is constant in the Ada sense, ALIASED whether it is aliased (but the latter + doesn't affect the outcome if CONSTANT is not true). + + The function climbs up the GNAT tree starting from the node and returns 1 + upon encountering a node that effectively requires an lvalue downstream. + It returns int instead of bool to facilitate usage in non-purely binary + logic contexts. */ static int lvalue_required_p (Node_Id gnat_node, tree gnu_type, bool constant, @@ -754,6 +753,13 @@ lvalue_required_p (Node_Id gnat_node, tree gnu_type, bool constant, || (Is_Composite_Type (Underlying_Type (Etype (gnat_node))) && Is_Atomic (Entity (Name (gnat_parent))))); + case N_Unchecked_Type_Conversion: + /* Returning 0 is very likely correct but we get better code if we + go through the conversion. */ + return lvalue_required_p (gnat_parent, + get_unpadded_type (Etype (gnat_parent)), + constant, aliased); + default: return 0; } diff --git a/gcc/ada/gcc-interface/utils.c b/gcc/ada/gcc-interface/utils.c index 6ee5a912856..c79dd4e7a65 100644 --- a/gcc/ada/gcc-interface/utils.c +++ b/gcc/ada/gcc-interface/utils.c @@ -2177,16 +2177,28 @@ gnat_type_for_mode (enum machine_mode mode, int unsignedp) { if (mode == BLKmode) return NULL_TREE; - else if (mode == VOIDmode) + + if (mode == VOIDmode) return void_type_node; - else if (COMPLEX_MODE_P (mode)) + + if (COMPLEX_MODE_P (mode)) return NULL_TREE; - else if (SCALAR_FLOAT_MODE_P (mode)) + + if (SCALAR_FLOAT_MODE_P (mode)) return float_type_for_precision (GET_MODE_PRECISION (mode), mode); - else if (SCALAR_INT_MODE_P (mode)) + + if (SCALAR_INT_MODE_P (mode)) return gnat_type_for_size (GET_MODE_BITSIZE (mode), unsignedp); - else - return NULL_TREE; + + if (VECTOR_MODE_P (mode)) + { + enum machine_mode inner_mode = GET_MODE_INNER (mode); + tree inner_type = gnat_type_for_mode (inner_mode, unsignedp); + if (inner_type) + return build_vector_type_for_mode (inner_type, mode); + } + + return NULL_TREE; } /* Return the unsigned version of a TYPE_NODE, a scalar type. */ diff --git a/gcc/c-common.c b/gcc/c-common.c index 8b85f66a2ad..20b24f0c3c2 100644 --- a/gcc/c-common.c +++ b/gcc/c-common.c @@ -8356,15 +8356,14 @@ fold_offsetof_1 (tree expr, tree stop_ref) error ("cannot apply %<offsetof%> when %<operator[]%> is overloaded"); return error_mark_node; - case INTEGER_CST: - gcc_assert (integer_zerop (expr)); - return size_zero_node; - case NOP_EXPR: case INDIRECT_REF: - base = fold_offsetof_1 (TREE_OPERAND (expr, 0), stop_ref); - gcc_assert (base == error_mark_node || base == size_zero_node); - return base; + if (!integer_zerop (TREE_OPERAND (expr, 0))) + { + error ("cannot apply %<offsetof%> to a non constant address"); + return error_mark_node; + } + return size_zero_node; case COMPONENT_REF: base = fold_offsetof_1 (TREE_OPERAND (expr, 0), stop_ref); @@ -8397,6 +8396,48 @@ fold_offsetof_1 (tree expr, tree stop_ref) } t = convert (sizetype, t); off = size_binop (MULT_EXPR, TYPE_SIZE_UNIT (TREE_TYPE (expr)), t); + + /* Check if the offset goes beyond the upper bound of the array. */ + if (code == PLUS_EXPR && TREE_CODE (t) == INTEGER_CST) + { + tree upbound = array_ref_up_bound (expr); + if (upbound != NULL_TREE + && TREE_CODE (upbound) == INTEGER_CST + && !tree_int_cst_equal (upbound, + TYPE_MAX_VALUE (TREE_TYPE (upbound)))) + { + upbound = size_binop (PLUS_EXPR, upbound, + build_int_cst (TREE_TYPE (upbound), 1)); + if (tree_int_cst_lt (upbound, t)) + { + tree v; + + for (v = TREE_OPERAND (expr, 0); + TREE_CODE (v) == COMPONENT_REF; + v = TREE_OPERAND (v, 0)) + if (TREE_CODE (TREE_TYPE (TREE_OPERAND (v, 0))) + == RECORD_TYPE) + { + tree fld_chain = TREE_CHAIN (TREE_OPERAND (v, 1)); + for (; fld_chain; fld_chain = TREE_CHAIN (fld_chain)) + if (TREE_CODE (fld_chain) == FIELD_DECL) + break; + + if (fld_chain) + break; + } + /* Don't warn if the array might be considered a poor + man's flexible array member with a very permissive + definition thereof. */ + if (TREE_CODE (v) == ARRAY_REF + || TREE_CODE (v) == COMPONENT_REF) + warning (OPT_Warray_bounds, + "index %E denotes an offset " + "greater than size of %qT", + t, TREE_TYPE (TREE_OPERAND (expr, 0))); + } + } + } break; case COMPOUND_EXPR: diff --git a/gcc/c-opts.c b/gcc/c-opts.c index 6c2f5a59cd3..e026fd97dc7 100644 --- a/gcc/c-opts.c +++ b/gcc/c-opts.c @@ -1033,29 +1033,6 @@ c_common_post_options (const char **pfilename) C_COMMON_OVERRIDE_OPTIONS; #endif - if (flag_lto || flag_whopr) - { -#ifdef ENABLE_LTO - flag_generate_lto = 1; - - /* When generating IL, do not operate in whole-program mode. - Otherwise, symbols will be privatized too early, causing link - errors later. */ - flag_whole_program = 0; - - /* FIXME lto. Disable var-tracking until debug information - is properly handled in free_lang_data. */ - flag_var_tracking = 0; -#else - error ("LTO support has not been enabled in this configuration"); -#endif - } - - /* Reconcile -flto and -fwhopr. Set additional flags as appropriate and - check option consistency. */ - if (flag_lto && flag_whopr) - error ("-flto and -fwhopr are mutually exclusive"); - /* Excess precision other than "fast" requires front-end support. */ if (c_dialect_cxx ()) diff --git a/gcc/c-pragma.c b/gcc/c-pragma.c index f2816448707..f71399fa93e 100644 --- a/gcc/c-pragma.c +++ b/gcc/c-pragma.c @@ -37,6 +37,7 @@ along with GCC; see the file COPYING3. If not see #include "target.h" #include "diagnostic.h" #include "opts.h" +#include "plugin.h" #define GCC_BAD(gmsgid) \ do { warning (OPT_Wpragmas, gmsgid); return; } while (0) @@ -1450,6 +1451,9 @@ init_pragma (void) #ifdef REGISTER_TARGET_PRAGMAS REGISTER_TARGET_PRAGMAS (); #endif + + /* Allow plugins to register their own pragmas. */ + invoke_plugin_callbacks (PLUGIN_PRAGMAS, NULL); } #include "gt-c-pragma.h" diff --git a/gcc/config.gcc b/gcc/config.gcc index 4f388babe6a..1d3c3fc84f8 100644 --- a/gcc/config.gcc +++ b/gcc/config.gcc @@ -287,8 +287,8 @@ i[34567]86-*-*) extra_headers="cpuid.h mmintrin.h mm3dnow.h xmmintrin.h emmintrin.h pmmintrin.h tmmintrin.h ammintrin.h smmintrin.h nmmintrin.h bmmintrin.h fma4intrin.h wmmintrin.h - immintrin.h x86intrin.h avxintrin.h - ia32intrin.h cross-stdarg.h" + immintrin.h x86intrin.h avxintrin.h xopintrin.h + ia32intrin.h cross-stdarg.h lwpintrin.h" ;; x86_64-*-*) cpu_type=i386 @@ -297,8 +297,8 @@ x86_64-*-*) extra_headers="cpuid.h mmintrin.h mm3dnow.h xmmintrin.h emmintrin.h pmmintrin.h tmmintrin.h ammintrin.h smmintrin.h nmmintrin.h bmmintrin.h fma4intrin.h wmmintrin.h - immintrin.h x86intrin.h avxintrin.h - ia32intrin.h cross-stdarg.h" + immintrin.h x86intrin.h avxintrin.h xopintrin.h + ia32intrin.h cross-stdarg.h lwpintrin.h" need_64bit_hwint=yes ;; ia64-*-*) @@ -2503,7 +2503,7 @@ v850-*-*) ;; vax-*-linux*) tm_file="${tm_file} dbxelf.h elfos.h svr4.h linux.h vax/elf.h vax/linux.h" - tmake_file=vax/t-linux + tmake_file="${tmake_file} vax/t-linux" ;; vax-*-netbsdelf*) tm_file="${tm_file} elfos.h netbsd.h netbsd-elf.h vax/elf.h vax/netbsd-elf.h" diff --git a/gcc/config/alpha/osf.h b/gcc/config/alpha/osf.h index 2b5165c0754..81c12aa14fc 100644 --- a/gcc/config/alpha/osf.h +++ b/gcc/config/alpha/osf.h @@ -167,10 +167,6 @@ __enable_execute_stack (void *addr) \ #define LD_INIT_SWITCH "-init" #define LD_FINI_SWITCH "-fini" -/* The linker needs a space after "-o". This allows -oldstyle_liblookup to - be passed to ld. */ -#define SWITCHES_NEED_SPACES "o" - /* Select a format to encode pointers in exception handling data. CODE is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is true if the symbol may be affected by dynamic relocations. diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c index 25e433cd3a4..4c7fcb65854 100644 --- a/gcc/config/arm/arm.c +++ b/gcc/config/arm/arm.c @@ -524,14 +524,11 @@ enum processor_type arm_tune = arm_none; /* The default processor used if not overridden by commandline. */ static enum processor_type arm_default_cpu = arm_none; -/* Which floating point model to use. */ -enum arm_fp_model arm_fp_model; - -/* Which floating point hardware is available. */ -enum fputype arm_fpu_arch; - /* Which floating point hardware to schedule for. */ -enum fputype arm_fpu_tune; +int arm_fpu_attr; + +/* Which floating popint hardware to use. */ +const struct arm_fpu_desc *arm_fpu_desc; /* Whether to use floating point hardware. */ enum float_abi_type arm_float_abi; @@ -809,46 +806,21 @@ static struct arm_cpu_select arm_select[] = char arm_arch_name[] = "__ARM_ARCH_0UNK__"; -struct fpu_desc -{ - const char * name; - enum fputype fpu; -}; - - /* Available values for -mfpu=. */ -static const struct fpu_desc all_fpus[] = -{ - {"fpa", FPUTYPE_FPA}, - {"fpe2", FPUTYPE_FPA_EMU2}, - {"fpe3", FPUTYPE_FPA_EMU2}, - {"maverick", FPUTYPE_MAVERICK}, - {"vfp", FPUTYPE_VFP}, - {"vfp3", FPUTYPE_VFP3}, - {"vfpv3", FPUTYPE_VFP3}, - {"vfpv3-d16", FPUTYPE_VFP3D16}, - {"neon", FPUTYPE_NEON}, - {"neon-fp16", FPUTYPE_NEON_FP16} -}; - - -/* Floating point models used by the different hardware. - See fputype in arm.h. */ - -static const enum arm_fp_model fp_model_for_fpu[] = -{ - /* No FP hardware. */ - ARM_FP_MODEL_UNKNOWN, /* FPUTYPE_NONE */ - ARM_FP_MODEL_FPA, /* FPUTYPE_FPA */ - ARM_FP_MODEL_FPA, /* FPUTYPE_FPA_EMU2 */ - ARM_FP_MODEL_FPA, /* FPUTYPE_FPA_EMU3 */ - ARM_FP_MODEL_MAVERICK, /* FPUTYPE_MAVERICK */ - ARM_FP_MODEL_VFP, /* FPUTYPE_VFP */ - ARM_FP_MODEL_VFP, /* FPUTYPE_VFP3D16 */ - ARM_FP_MODEL_VFP, /* FPUTYPE_VFP3 */ - ARM_FP_MODEL_VFP, /* FPUTYPE_NEON */ - ARM_FP_MODEL_VFP /* FPUTYPE_NEON_FP16 */ +static const struct arm_fpu_desc all_fpus[] = +{ + {"fpa", ARM_FP_MODEL_FPA, 0, 0, false, false}, + {"fpe2", ARM_FP_MODEL_FPA, 2, 0, false, false}, + {"fpe3", ARM_FP_MODEL_FPA, 3, 0, false, false}, + {"maverick", ARM_FP_MODEL_MAVERICK, 0, 0, false, false}, + {"vfp", ARM_FP_MODEL_VFP, 2, VFP_REG_D16, false, false}, + {"vfpv3", ARM_FP_MODEL_VFP, 3, VFP_REG_D32, false, false}, + {"vfpv3-d16", ARM_FP_MODEL_VFP, 3, VFP_REG_D16, false, false}, + {"neon", ARM_FP_MODEL_VFP, 3, VFP_REG_D32, true , false}, + {"neon-fp16", ARM_FP_MODEL_VFP, 3, VFP_REG_D32, true , true }, + /* Compatibility aliases. */ + {"vfp3", ARM_FP_MODEL_VFP, 3, VFP_REG_D32, false, false}, }; @@ -1615,7 +1587,6 @@ arm_override_options (void) if (TARGET_IWMMXT_ABI && !TARGET_IWMMXT) error ("iwmmxt abi requires an iwmmxt capable cpu"); - arm_fp_model = ARM_FP_MODEL_UNKNOWN; if (target_fpu_name == NULL && target_fpe_name != NULL) { if (streq (target_fpe_name, "2")) @@ -1626,46 +1597,52 @@ arm_override_options (void) error ("invalid floating point emulation option: -mfpe=%s", target_fpe_name); } - if (target_fpu_name != NULL) - { - /* The user specified a FPU. */ - for (i = 0; i < ARRAY_SIZE (all_fpus); i++) - { - if (streq (all_fpus[i].name, target_fpu_name)) - { - arm_fpu_arch = all_fpus[i].fpu; - arm_fpu_tune = arm_fpu_arch; - arm_fp_model = fp_model_for_fpu[arm_fpu_arch]; - break; - } - } - if (arm_fp_model == ARM_FP_MODEL_UNKNOWN) - error ("invalid floating point option: -mfpu=%s", target_fpu_name); - } - else + + if (target_fpu_name == NULL) { #ifdef FPUTYPE_DEFAULT - /* Use the default if it is specified for this platform. */ - arm_fpu_arch = FPUTYPE_DEFAULT; - arm_fpu_tune = FPUTYPE_DEFAULT; + target_fpu_name = FPUTYPE_DEFAULT; #else - /* Pick one based on CPU type. */ - /* ??? Some targets assume FPA is the default. - if ((insn_flags & FL_VFP) != 0) - arm_fpu_arch = FPUTYPE_VFP; - else - */ if (arm_arch_cirrus) - arm_fpu_arch = FPUTYPE_MAVERICK; + target_fpu_name = "maverick"; else - arm_fpu_arch = FPUTYPE_FPA_EMU2; + target_fpu_name = "fpe2"; #endif - if (tune_flags & FL_CO_PROC && arm_fpu_arch == FPUTYPE_FPA_EMU2) - arm_fpu_tune = FPUTYPE_FPA; + } + + arm_fpu_desc = NULL; + for (i = 0; i < ARRAY_SIZE (all_fpus); i++) + { + if (streq (all_fpus[i].name, target_fpu_name)) + { + arm_fpu_desc = &all_fpus[i]; + break; + } + } + if (!arm_fpu_desc) + error ("invalid floating point option: -mfpu=%s", target_fpu_name); + + switch (arm_fpu_desc->model) + { + case ARM_FP_MODEL_FPA: + if (arm_fpu_desc->rev == 2) + arm_fpu_attr = FPU_FPE2; + else if (arm_fpu_desc->rev == 3) + arm_fpu_attr = FPU_FPE3; else - arm_fpu_tune = arm_fpu_arch; - arm_fp_model = fp_model_for_fpu[arm_fpu_arch]; - gcc_assert (arm_fp_model != ARM_FP_MODEL_UNKNOWN); + arm_fpu_attr = FPU_FPA; + break; + + case ARM_FP_MODEL_MAVERICK: + arm_fpu_attr = FPU_MAVERICK; + break; + + case ARM_FP_MODEL_VFP: + arm_fpu_attr = FPU_VFP; + break; + + default: + gcc_unreachable(); } if (target_float_abi_name != NULL) @@ -1687,7 +1664,7 @@ arm_override_options (void) arm_float_abi = TARGET_DEFAULT_FLOAT_ABI; if (TARGET_AAPCS_BASED - && (arm_fp_model == ARM_FP_MODEL_FPA)) + && (arm_fpu_desc->model == ARM_FP_MODEL_FPA)) error ("FPA is unsupported in the AAPCS"); if (TARGET_AAPCS_BASED) @@ -1715,7 +1692,7 @@ arm_override_options (void) /* If soft-float is specified then don't use FPU. */ if (TARGET_SOFT_FLOAT) - arm_fpu_arch = FPUTYPE_NONE; + arm_fpu_attr = FPU_NONE; if (TARGET_AAPCS_BASED) { @@ -1742,8 +1719,7 @@ arm_override_options (void) /* For arm2/3 there is no need to do any scheduling if there is only a floating point emulator, or we are doing software floating-point. */ if ((TARGET_SOFT_FLOAT - || arm_fpu_tune == FPUTYPE_FPA_EMU2 - || arm_fpu_tune == FPUTYPE_FPA_EMU3) + || (TARGET_FPA && arm_fpu_desc->rev)) && (tune_flags & FL_MODE32) == 0) flag_schedule_insns = flag_schedule_insns_after_reload = 0; @@ -13305,7 +13281,7 @@ arm_output_epilogue (rtx sibling) /* This variable is for the Virtual Frame Pointer, not VFP regs. */ int vfp_offset = offsets->frame; - if (arm_fpu_arch == FPUTYPE_FPA_EMU2) + if (TARGET_FPA_EMU2) { for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--) if (df_regs_ever_live_p (reg) && !call_used_regs[reg]) @@ -13528,7 +13504,7 @@ arm_output_epilogue (rtx sibling) SP_REGNUM, HARD_FRAME_POINTER_REGNUM); } - if (arm_fpu_arch == FPUTYPE_FPA_EMU2) + if (TARGET_FPA_EMU2) { for (reg = FIRST_FPA_REGNUM; reg <= LAST_FPA_REGNUM; reg++) if (df_regs_ever_live_p (reg) && !call_used_regs[reg]) @@ -14254,7 +14230,7 @@ arm_save_coproc_regs(void) /* Save any floating point call-saved registers used by this function. */ - if (arm_fpu_arch == FPUTYPE_FPA_EMU2) + if (TARGET_FPA_EMU2) { for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--) if (df_regs_ever_live_p (reg) && !call_used_regs[reg]) @@ -19736,45 +19712,8 @@ arm_file_start (void) } else { - int set_float_abi_attributes = 0; - switch (arm_fpu_arch) - { - case FPUTYPE_FPA: - fpu_name = "fpa"; - break; - case FPUTYPE_FPA_EMU2: - fpu_name = "fpe2"; - break; - case FPUTYPE_FPA_EMU3: - fpu_name = "fpe3"; - break; - case FPUTYPE_MAVERICK: - fpu_name = "maverick"; - break; - case FPUTYPE_VFP: - fpu_name = "vfp"; - set_float_abi_attributes = 1; - break; - case FPUTYPE_VFP3D16: - fpu_name = "vfpv3-d16"; - set_float_abi_attributes = 1; - break; - case FPUTYPE_VFP3: - fpu_name = "vfpv3"; - set_float_abi_attributes = 1; - break; - case FPUTYPE_NEON: - fpu_name = "neon"; - set_float_abi_attributes = 1; - break; - case FPUTYPE_NEON_FP16: - fpu_name = "neon-fp16"; - set_float_abi_attributes = 1; - break; - default: - abort(); - } - if (set_float_abi_attributes) + fpu_name = arm_fpu_desc->name; + if (arm_fpu_desc->model == ARM_FP_MODEL_VFP) { if (TARGET_HARD_FLOAT) asm_fprintf (asm_out_file, "\t.eabi_attribute 27, 3\n"); diff --git a/gcc/config/arm/arm.h b/gcc/config/arm/arm.h index 9272ca51cba..2dfd22df45c 100644 --- a/gcc/config/arm/arm.h +++ b/gcc/config/arm/arm.h @@ -190,9 +190,9 @@ extern void (*arm_lang_output_object_attributes_hook)(void); #define TARGET_HARD_FLOAT (arm_float_abi != ARM_FLOAT_ABI_SOFT) /* Use hardware floating point calling convention. */ #define TARGET_HARD_FLOAT_ABI (arm_float_abi == ARM_FLOAT_ABI_HARD) -#define TARGET_FPA (arm_fp_model == ARM_FP_MODEL_FPA) -#define TARGET_MAVERICK (arm_fp_model == ARM_FP_MODEL_MAVERICK) -#define TARGET_VFP (arm_fp_model == ARM_FP_MODEL_VFP) +#define TARGET_FPA (arm_fpu_desc->model == ARM_FP_MODEL_FPA) +#define TARGET_MAVERICK (arm_fpu_desc->model == ARM_FP_MODEL_MAVERICK) +#define TARGET_VFP (arm_fpu_desc->model == ARM_FP_MODEL_VFP) #define TARGET_IWMMXT (arm_arch_iwmmxt) #define TARGET_REALLY_IWMMXT (TARGET_IWMMXT && TARGET_32BIT) #define TARGET_IWMMXT_ABI (TARGET_32BIT && arm_abi == ARM_ABI_IWMMXT) @@ -216,6 +216,8 @@ extern void (*arm_lang_output_object_attributes_hook)(void); #define TARGET_THUMB2 (TARGET_THUMB && arm_arch_thumb2) /* Thumb-1 only. */ #define TARGET_THUMB1_ONLY (TARGET_THUMB1 && !arm_arch_notm) +/* FPA emulator without LFM. */ +#define TARGET_FPA_EMU2 (TARGET_FPA && arm_fpu_desc->rev == 2) /* The following two macros concern the ability to execute coprocessor instructions for VFPv3 or NEON. TARGET_VFP3/TARGET_VFPD32 are currently @@ -223,27 +225,21 @@ extern void (*arm_lang_output_object_attributes_hook)(void); to be more careful with TARGET_NEON as noted below. */ /* FPU is has the full VFPv3/NEON register file of 32 D registers. */ -#define TARGET_VFPD32 (arm_fp_model == ARM_FP_MODEL_VFP \ - && (arm_fpu_arch == FPUTYPE_VFP3 \ - || arm_fpu_arch == FPUTYPE_NEON \ - || arm_fpu_arch == FPUTYPE_NEON_FP16)) +#define TARGET_VFPD32 (TARGET_VFP && arm_fpu_desc->regs == VFP_REG_D32) /* FPU supports VFPv3 instructions. */ -#define TARGET_VFP3 (arm_fp_model == ARM_FP_MODEL_VFP \ - && (arm_fpu_arch == FPUTYPE_VFP3D16 \ - || TARGET_VFPD32)) +#define TARGET_VFP3 (TARGET_VFP && arm_fpu_desc->rev >= 3) /* FPU supports NEON/VFP half-precision floating-point. */ -#define TARGET_NEON_FP16 (arm_fpu_arch == FPUTYPE_NEON_FP16) +#define TARGET_NEON_FP16 \ + (TARGET_VFP && arm_fpu_desc->neon && arm_fpu_desc->fp16) /* FPU supports Neon instructions. The setting of this macro gets revealed via __ARM_NEON__ so we add extra guards upon TARGET_32BIT and TARGET_HARD_FLOAT to ensure that NEON instructions are available. */ #define TARGET_NEON (TARGET_32BIT && TARGET_HARD_FLOAT \ - && arm_fp_model == ARM_FP_MODEL_VFP \ - && (arm_fpu_arch == FPUTYPE_NEON \ - || arm_fpu_arch == FPUTYPE_NEON_FP16)) + && TARGET_VFP && arm_fpu_desc->neon) /* "DSP" multiply instructions, eg. SMULxy. */ #define TARGET_DSP_MULTIPLY \ @@ -300,42 +296,25 @@ enum arm_fp_model ARM_FP_MODEL_VFP }; -extern enum arm_fp_model arm_fp_model; - -/* Which floating point hardware is available. Also update - fp_model_for_fpu in arm.c when adding entries to this list. */ -enum fputype +enum vfp_reg_type { - /* No FP hardware. */ - FPUTYPE_NONE, - /* Full FPA support. */ - FPUTYPE_FPA, - /* Emulated FPA hardware, Issue 2 emulator (no LFM/SFM). */ - FPUTYPE_FPA_EMU2, - /* Emulated FPA hardware, Issue 3 emulator. */ - FPUTYPE_FPA_EMU3, - /* Cirrus Maverick floating point co-processor. */ - FPUTYPE_MAVERICK, - /* VFP. */ - FPUTYPE_VFP, - /* VFPv3-D16. */ - FPUTYPE_VFP3D16, - /* VFPv3. */ - FPUTYPE_VFP3, - /* Neon. */ - FPUTYPE_NEON, - /* Neon with half-precision float extensions. */ - FPUTYPE_NEON_FP16 + VFP_REG_D16, + VFP_REG_D32, + VFP_REG_SINGLE }; -/* Recast the floating point class to be the floating point attribute. */ -#define arm_fpu_attr ((enum attr_fpu) arm_fpu_tune) - -/* What type of floating point to tune for */ -extern enum fputype arm_fpu_tune; - -/* What type of floating point instructions are available */ -extern enum fputype arm_fpu_arch; +extern const struct arm_fpu_desc +{ + const char *name; + enum arm_fp_model model; + int rev; + enum vfp_reg_type regs; + int neon; + int fp16; +} *arm_fpu_desc; + +/* Which floating point hardware to schedule for. */ +extern int arm_fpu_attr; enum float_abi_type { diff --git a/gcc/config/arm/arm.md b/gcc/config/arm/arm.md index b8bf700242b..52edcbaa17b 100644 --- a/gcc/config/arm/arm.md +++ b/gcc/config/arm/arm.md @@ -160,7 +160,7 @@ ; Floating Point Unit. If we only have floating point emulation, then there ; is no point in scheduling the floating point insns. (Well, for best ; performance we should try and group them together). -(define_attr "fpu" "none,fpa,fpe2,fpe3,maverick,vfp,vfpv3d16,vfpv3,neon,neon_fp16" +(define_attr "fpu" "none,fpa,fpe2,fpe3,maverick,vfp" (const (symbol_ref "arm_fpu_attr"))) ; LENGTH of an instruction (in bytes) @@ -6770,6 +6770,7 @@ (const_int 6) (const_int 8))))] ) + (define_insn "*movsi_cbranchsi4" [(set (pc) (if_then_else @@ -6833,6 +6834,45 @@ (const_int 10)))))] ) +(define_peephole2 + [(set (match_operand:SI 0 "low_register_operand" "") + (match_operand:SI 1 "low_register_operand" "")) + (set (pc) + (if_then_else (match_operator 2 "arm_comparison_operator" + [(match_dup 1) (const_int 0)]) + (label_ref (match_operand 3 "" "")) + (pc)))] + "TARGET_THUMB1" + [(parallel + [(set (pc) + (if_then_else (match_op_dup 2 [(match_dup 1) (const_int 0)]) + (label_ref (match_dup 3)) + (pc))) + (set (match_dup 0) (match_dup 1))])] + "" +) + +;; Sigh! This variant shouldn't be needed, but combine often fails to +;; merge cases like this because the op1 is a hard register in +;; CLASS_LIKELY_SPILLED_P. +(define_peephole2 + [(set (match_operand:SI 0 "low_register_operand" "") + (match_operand:SI 1 "low_register_operand" "")) + (set (pc) + (if_then_else (match_operator 2 "arm_comparison_operator" + [(match_dup 0) (const_int 0)]) + (label_ref (match_operand 3 "" "")) + (pc)))] + "TARGET_THUMB1" + [(parallel + [(set (pc) + (if_then_else (match_op_dup 2 [(match_dup 1) (const_int 0)]) + (label_ref (match_dup 3)) + (pc))) + (set (match_dup 0) (match_dup 1))])] + "" +) + (define_insn "*negated_cbranchsi4" [(set (pc) (if_then_else diff --git a/gcc/config/arm/arm_neon.h b/gcc/config/arm/arm_neon.h index faaaf7bca39..ccfc7426077 100644 --- a/gcc/config/arm/arm_neon.h +++ b/gcc/config/arm/arm_neon.h @@ -61,7 +61,7 @@ typedef __builtin_neon_uhi uint16x8_t __attribute__ ((__vector_size__ (16))); typedef __builtin_neon_usi uint32x4_t __attribute__ ((__vector_size__ (16))); typedef __builtin_neon_udi uint64x2_t __attribute__ ((__vector_size__ (16))); -typedef __builtin_neon_sf float32_t; +typedef float float32_t; typedef __builtin_neon_poly8 poly8_t; typedef __builtin_neon_poly16 poly16_t; @@ -5085,7 +5085,7 @@ vset_lane_s32 (int32_t __a, int32x2_t __b, const int __c) __extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) vset_lane_f32 (float32_t __a, float32x2_t __b, const int __c) { - return (float32x2_t)__builtin_neon_vset_lanev2sf (__a, __b, __c); + return (float32x2_t)__builtin_neon_vset_lanev2sf ((__builtin_neon_sf) __a, __b, __c); } __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) @@ -5151,7 +5151,7 @@ vsetq_lane_s32 (int32_t __a, int32x4_t __b, const int __c) __extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) vsetq_lane_f32 (float32_t __a, float32x4_t __b, const int __c) { - return (float32x4_t)__builtin_neon_vset_lanev4sf (__a, __b, __c); + return (float32x4_t)__builtin_neon_vset_lanev4sf ((__builtin_neon_sf) __a, __b, __c); } __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) @@ -5283,7 +5283,7 @@ vdup_n_s32 (int32_t __a) __extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) vdup_n_f32 (float32_t __a) { - return (float32x2_t)__builtin_neon_vdup_nv2sf (__a); + return (float32x2_t)__builtin_neon_vdup_nv2sf ((__builtin_neon_sf) __a); } __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) @@ -5349,7 +5349,7 @@ vdupq_n_s32 (int32_t __a) __extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) vdupq_n_f32 (float32_t __a) { - return (float32x4_t)__builtin_neon_vdup_nv4sf (__a); + return (float32x4_t)__builtin_neon_vdup_nv4sf ((__builtin_neon_sf) __a); } __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) @@ -5415,7 +5415,7 @@ vmov_n_s32 (int32_t __a) __extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) vmov_n_f32 (float32_t __a) { - return (float32x2_t)__builtin_neon_vdup_nv2sf (__a); + return (float32x2_t)__builtin_neon_vdup_nv2sf ((__builtin_neon_sf) __a); } __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) @@ -5481,7 +5481,7 @@ vmovq_n_s32 (int32_t __a) __extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) vmovq_n_f32 (float32_t __a) { - return (float32x4_t)__builtin_neon_vdup_nv4sf (__a); + return (float32x4_t)__builtin_neon_vdup_nv4sf ((__builtin_neon_sf) __a); } __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) @@ -6591,7 +6591,7 @@ vmul_n_s32 (int32x2_t __a, int32_t __b) __extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) vmul_n_f32 (float32x2_t __a, float32_t __b) { - return (float32x2_t)__builtin_neon_vmul_nv2sf (__a, __b, 3); + return (float32x2_t)__builtin_neon_vmul_nv2sf (__a, (__builtin_neon_sf) __b, 3); } __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) @@ -6621,7 +6621,7 @@ vmulq_n_s32 (int32x4_t __a, int32_t __b) __extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) vmulq_n_f32 (float32x4_t __a, float32_t __b) { - return (float32x4_t)__builtin_neon_vmul_nv4sf (__a, __b, 3); + return (float32x4_t)__builtin_neon_vmul_nv4sf (__a, (__builtin_neon_sf) __b, 3); } __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) @@ -6735,7 +6735,7 @@ vmla_n_s32 (int32x2_t __a, int32x2_t __b, int32_t __c) __extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) vmla_n_f32 (float32x2_t __a, float32x2_t __b, float32_t __c) { - return (float32x2_t)__builtin_neon_vmla_nv2sf (__a, __b, __c, 3); + return (float32x2_t)__builtin_neon_vmla_nv2sf (__a, __b, (__builtin_neon_sf) __c, 3); } __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) @@ -6765,7 +6765,7 @@ vmlaq_n_s32 (int32x4_t __a, int32x4_t __b, int32_t __c) __extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) vmlaq_n_f32 (float32x4_t __a, float32x4_t __b, float32_t __c) { - return (float32x4_t)__builtin_neon_vmla_nv4sf (__a, __b, __c, 3); + return (float32x4_t)__builtin_neon_vmla_nv4sf (__a, __b, (__builtin_neon_sf) __c, 3); } __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) @@ -6831,7 +6831,7 @@ vmls_n_s32 (int32x2_t __a, int32x2_t __b, int32_t __c) __extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) vmls_n_f32 (float32x2_t __a, float32x2_t __b, float32_t __c) { - return (float32x2_t)__builtin_neon_vmls_nv2sf (__a, __b, __c, 3); + return (float32x2_t)__builtin_neon_vmls_nv2sf (__a, __b, (__builtin_neon_sf) __c, 3); } __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) @@ -6861,7 +6861,7 @@ vmlsq_n_s32 (int32x4_t __a, int32x4_t __b, int32_t __c) __extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) vmlsq_n_f32 (float32x4_t __a, float32x4_t __b, float32_t __c) { - return (float32x4_t)__builtin_neon_vmls_nv4sf (__a, __b, __c, 3); + return (float32x4_t)__builtin_neon_vmls_nv4sf (__a, __b, (__builtin_neon_sf) __c, 3); } __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) @@ -7851,7 +7851,7 @@ vld1_s64 (const int64_t * __a) __extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) vld1_f32 (const float32_t * __a) { - return (float32x2_t)__builtin_neon_vld1v2sf (__a); + return (float32x2_t)__builtin_neon_vld1v2sf ((const __builtin_neon_sf *) __a); } __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) @@ -7917,7 +7917,7 @@ vld1q_s64 (const int64_t * __a) __extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) vld1q_f32 (const float32_t * __a) { - return (float32x4_t)__builtin_neon_vld1v4sf (__a); + return (float32x4_t)__builtin_neon_vld1v4sf ((const __builtin_neon_sf *) __a); } __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) @@ -7977,7 +7977,7 @@ vld1_lane_s32 (const int32_t * __a, int32x2_t __b, const int __c) __extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) vld1_lane_f32 (const float32_t * __a, float32x2_t __b, const int __c) { - return (float32x2_t)__builtin_neon_vld1_lanev2sf (__a, __b, __c); + return (float32x2_t)__builtin_neon_vld1_lanev2sf ((const __builtin_neon_sf *) __a, __b, __c); } __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) @@ -8043,7 +8043,7 @@ vld1q_lane_s32 (const int32_t * __a, int32x4_t __b, const int __c) __extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) vld1q_lane_f32 (const float32_t * __a, float32x4_t __b, const int __c) { - return (float32x4_t)__builtin_neon_vld1_lanev4sf (__a, __b, __c); + return (float32x4_t)__builtin_neon_vld1_lanev4sf ((const __builtin_neon_sf *) __a, __b, __c); } __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) @@ -8109,7 +8109,7 @@ vld1_dup_s32 (const int32_t * __a) __extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) vld1_dup_f32 (const float32_t * __a) { - return (float32x2_t)__builtin_neon_vld1_dupv2sf (__a); + return (float32x2_t)__builtin_neon_vld1_dupv2sf ((const __builtin_neon_sf *) __a); } __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) @@ -8175,7 +8175,7 @@ vld1q_dup_s32 (const int32_t * __a) __extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) vld1q_dup_f32 (const float32_t * __a) { - return (float32x4_t)__builtin_neon_vld1_dupv4sf (__a); + return (float32x4_t)__builtin_neon_vld1_dupv4sf ((const __builtin_neon_sf *) __a); } __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) @@ -8247,7 +8247,7 @@ vst1_s64 (int64_t * __a, int64x1_t __b) __extension__ static __inline void __attribute__ ((__always_inline__)) vst1_f32 (float32_t * __a, float32x2_t __b) { - __builtin_neon_vst1v2sf (__a, __b); + __builtin_neon_vst1v2sf ((__builtin_neon_sf *) __a, __b); } __extension__ static __inline void __attribute__ ((__always_inline__)) @@ -8313,7 +8313,7 @@ vst1q_s64 (int64_t * __a, int64x2_t __b) __extension__ static __inline void __attribute__ ((__always_inline__)) vst1q_f32 (float32_t * __a, float32x4_t __b) { - __builtin_neon_vst1v4sf (__a, __b); + __builtin_neon_vst1v4sf ((__builtin_neon_sf *) __a, __b); } __extension__ static __inline void __attribute__ ((__always_inline__)) @@ -8373,7 +8373,7 @@ vst1_lane_s32 (int32_t * __a, int32x2_t __b, const int __c) __extension__ static __inline void __attribute__ ((__always_inline__)) vst1_lane_f32 (float32_t * __a, float32x2_t __b, const int __c) { - __builtin_neon_vst1_lanev2sf (__a, __b, __c); + __builtin_neon_vst1_lanev2sf ((__builtin_neon_sf *) __a, __b, __c); } __extension__ static __inline void __attribute__ ((__always_inline__)) @@ -8439,7 +8439,7 @@ vst1q_lane_s32 (int32_t * __a, int32x4_t __b, const int __c) __extension__ static __inline void __attribute__ ((__always_inline__)) vst1q_lane_f32 (float32_t * __a, float32x4_t __b, const int __c) { - __builtin_neon_vst1_lanev4sf (__a, __b, __c); + __builtin_neon_vst1_lanev4sf ((__builtin_neon_sf *) __a, __b, __c); } __extension__ static __inline void __attribute__ ((__always_inline__)) @@ -8512,7 +8512,7 @@ __extension__ static __inline float32x2x2_t __attribute__ ((__always_inline__)) vld2_f32 (const float32_t * __a) { union { float32x2x2_t __i; __builtin_neon_ti __o; } __rv; - __rv.__o = __builtin_neon_vld2v2sf (__a); + __rv.__o = __builtin_neon_vld2v2sf ((const __builtin_neon_sf *) __a); return __rv.__i; } @@ -8600,7 +8600,7 @@ __extension__ static __inline float32x4x2_t __attribute__ ((__always_inline__)) vld2q_f32 (const float32_t * __a) { union { float32x4x2_t __i; __builtin_neon_oi __o; } __rv; - __rv.__o = __builtin_neon_vld2v4sf (__a); + __rv.__o = __builtin_neon_vld2v4sf ((const __builtin_neon_sf *) __a); return __rv.__i; } @@ -8676,7 +8676,7 @@ vld2_lane_f32 (const float32_t * __a, float32x2x2_t __b, const int __c) { union { float32x2x2_t __i; __builtin_neon_ti __o; } __bu = { __b }; union { float32x2x2_t __i; __builtin_neon_ti __o; } __rv; - __rv.__o = __builtin_neon_vld2_lanev2sf (__a, __bu.__o, __c); + __rv.__o = __builtin_neon_vld2_lanev2sf ((const __builtin_neon_sf *) __a, __bu.__o, __c); return __rv.__i; } @@ -8748,7 +8748,7 @@ vld2q_lane_f32 (const float32_t * __a, float32x4x2_t __b, const int __c) { union { float32x4x2_t __i; __builtin_neon_oi __o; } __bu = { __b }; union { float32x4x2_t __i; __builtin_neon_oi __o; } __rv; - __rv.__o = __builtin_neon_vld2_lanev4sf (__a, __bu.__o, __c); + __rv.__o = __builtin_neon_vld2_lanev4sf ((const __builtin_neon_sf *) __a, __bu.__o, __c); return __rv.__i; } @@ -8807,7 +8807,7 @@ __extension__ static __inline float32x2x2_t __attribute__ ((__always_inline__)) vld2_dup_f32 (const float32_t * __a) { union { float32x2x2_t __i; __builtin_neon_ti __o; } __rv; - __rv.__o = __builtin_neon_vld2_dupv2sf (__a); + __rv.__o = __builtin_neon_vld2_dupv2sf ((const __builtin_neon_sf *) __a); return __rv.__i; } @@ -8892,7 +8892,7 @@ __extension__ static __inline void __attribute__ ((__always_inline__)) vst2_f32 (float32_t * __a, float32x2x2_t __b) { union { float32x2x2_t __i; __builtin_neon_ti __o; } __bu = { __b }; - __builtin_neon_vst2v2sf (__a, __bu.__o); + __builtin_neon_vst2v2sf ((__builtin_neon_sf *) __a, __bu.__o); } __extension__ static __inline void __attribute__ ((__always_inline__)) @@ -8969,7 +8969,7 @@ __extension__ static __inline void __attribute__ ((__always_inline__)) vst2q_f32 (float32_t * __a, float32x4x2_t __b) { union { float32x4x2_t __i; __builtin_neon_oi __o; } __bu = { __b }; - __builtin_neon_vst2v4sf (__a, __bu.__o); + __builtin_neon_vst2v4sf ((__builtin_neon_sf *) __a, __bu.__o); } __extension__ static __inline void __attribute__ ((__always_inline__)) @@ -9032,7 +9032,7 @@ __extension__ static __inline void __attribute__ ((__always_inline__)) vst2_lane_f32 (float32_t * __a, float32x2x2_t __b, const int __c) { union { float32x2x2_t __i; __builtin_neon_ti __o; } __bu = { __b }; - __builtin_neon_vst2_lanev2sf (__a, __bu.__o, __c); + __builtin_neon_vst2_lanev2sf ((__builtin_neon_sf *) __a, __bu.__o, __c); } __extension__ static __inline void __attribute__ ((__always_inline__)) @@ -9088,7 +9088,7 @@ __extension__ static __inline void __attribute__ ((__always_inline__)) vst2q_lane_f32 (float32_t * __a, float32x4x2_t __b, const int __c) { union { float32x4x2_t __i; __builtin_neon_oi __o; } __bu = { __b }; - __builtin_neon_vst2_lanev4sf (__a, __bu.__o, __c); + __builtin_neon_vst2_lanev4sf ((__builtin_neon_sf *) __a, __bu.__o, __c); } __extension__ static __inline void __attribute__ ((__always_inline__)) @@ -9140,7 +9140,7 @@ __extension__ static __inline float32x2x3_t __attribute__ ((__always_inline__)) vld3_f32 (const float32_t * __a) { union { float32x2x3_t __i; __builtin_neon_ei __o; } __rv; - __rv.__o = __builtin_neon_vld3v2sf (__a); + __rv.__o = __builtin_neon_vld3v2sf ((const __builtin_neon_sf *) __a); return __rv.__i; } @@ -9228,7 +9228,7 @@ __extension__ static __inline float32x4x3_t __attribute__ ((__always_inline__)) vld3q_f32 (const float32_t * __a) { union { float32x4x3_t __i; __builtin_neon_ci __o; } __rv; - __rv.__o = __builtin_neon_vld3v4sf (__a); + __rv.__o = __builtin_neon_vld3v4sf ((const __builtin_neon_sf *) __a); return __rv.__i; } @@ -9304,7 +9304,7 @@ vld3_lane_f32 (const float32_t * __a, float32x2x3_t __b, const int __c) { union { float32x2x3_t __i; __builtin_neon_ei __o; } __bu = { __b }; union { float32x2x3_t __i; __builtin_neon_ei __o; } __rv; - __rv.__o = __builtin_neon_vld3_lanev2sf (__a, __bu.__o, __c); + __rv.__o = __builtin_neon_vld3_lanev2sf ((const __builtin_neon_sf *) __a, __bu.__o, __c); return __rv.__i; } @@ -9376,7 +9376,7 @@ vld3q_lane_f32 (const float32_t * __a, float32x4x3_t __b, const int __c) { union { float32x4x3_t __i; __builtin_neon_ci __o; } __bu = { __b }; union { float32x4x3_t __i; __builtin_neon_ci __o; } __rv; - __rv.__o = __builtin_neon_vld3_lanev4sf (__a, __bu.__o, __c); + __rv.__o = __builtin_neon_vld3_lanev4sf ((const __builtin_neon_sf *) __a, __bu.__o, __c); return __rv.__i; } @@ -9435,7 +9435,7 @@ __extension__ static __inline float32x2x3_t __attribute__ ((__always_inline__)) vld3_dup_f32 (const float32_t * __a) { union { float32x2x3_t __i; __builtin_neon_ei __o; } __rv; - __rv.__o = __builtin_neon_vld3_dupv2sf (__a); + __rv.__o = __builtin_neon_vld3_dupv2sf ((const __builtin_neon_sf *) __a); return __rv.__i; } @@ -9520,7 +9520,7 @@ __extension__ static __inline void __attribute__ ((__always_inline__)) vst3_f32 (float32_t * __a, float32x2x3_t __b) { union { float32x2x3_t __i; __builtin_neon_ei __o; } __bu = { __b }; - __builtin_neon_vst3v2sf (__a, __bu.__o); + __builtin_neon_vst3v2sf ((__builtin_neon_sf *) __a, __bu.__o); } __extension__ static __inline void __attribute__ ((__always_inline__)) @@ -9597,7 +9597,7 @@ __extension__ static __inline void __attribute__ ((__always_inline__)) vst3q_f32 (float32_t * __a, float32x4x3_t __b) { union { float32x4x3_t __i; __builtin_neon_ci __o; } __bu = { __b }; - __builtin_neon_vst3v4sf (__a, __bu.__o); + __builtin_neon_vst3v4sf ((__builtin_neon_sf *) __a, __bu.__o); } __extension__ static __inline void __attribute__ ((__always_inline__)) @@ -9660,7 +9660,7 @@ __extension__ static __inline void __attribute__ ((__always_inline__)) vst3_lane_f32 (float32_t * __a, float32x2x3_t __b, const int __c) { union { float32x2x3_t __i; __builtin_neon_ei __o; } __bu = { __b }; - __builtin_neon_vst3_lanev2sf (__a, __bu.__o, __c); + __builtin_neon_vst3_lanev2sf ((__builtin_neon_sf *) __a, __bu.__o, __c); } __extension__ static __inline void __attribute__ ((__always_inline__)) @@ -9716,7 +9716,7 @@ __extension__ static __inline void __attribute__ ((__always_inline__)) vst3q_lane_f32 (float32_t * __a, float32x4x3_t __b, const int __c) { union { float32x4x3_t __i; __builtin_neon_ci __o; } __bu = { __b }; - __builtin_neon_vst3_lanev4sf (__a, __bu.__o, __c); + __builtin_neon_vst3_lanev4sf ((__builtin_neon_sf *) __a, __bu.__o, __c); } __extension__ static __inline void __attribute__ ((__always_inline__)) @@ -9768,7 +9768,7 @@ __extension__ static __inline float32x2x4_t __attribute__ ((__always_inline__)) vld4_f32 (const float32_t * __a) { union { float32x2x4_t __i; __builtin_neon_oi __o; } __rv; - __rv.__o = __builtin_neon_vld4v2sf (__a); + __rv.__o = __builtin_neon_vld4v2sf ((const __builtin_neon_sf *) __a); return __rv.__i; } @@ -9856,7 +9856,7 @@ __extension__ static __inline float32x4x4_t __attribute__ ((__always_inline__)) vld4q_f32 (const float32_t * __a) { union { float32x4x4_t __i; __builtin_neon_xi __o; } __rv; - __rv.__o = __builtin_neon_vld4v4sf (__a); + __rv.__o = __builtin_neon_vld4v4sf ((const __builtin_neon_sf *) __a); return __rv.__i; } @@ -9932,7 +9932,7 @@ vld4_lane_f32 (const float32_t * __a, float32x2x4_t __b, const int __c) { union { float32x2x4_t __i; __builtin_neon_oi __o; } __bu = { __b }; union { float32x2x4_t __i; __builtin_neon_oi __o; } __rv; - __rv.__o = __builtin_neon_vld4_lanev2sf (__a, __bu.__o, __c); + __rv.__o = __builtin_neon_vld4_lanev2sf ((const __builtin_neon_sf *) __a, __bu.__o, __c); return __rv.__i; } @@ -10004,7 +10004,7 @@ vld4q_lane_f32 (const float32_t * __a, float32x4x4_t __b, const int __c) { union { float32x4x4_t __i; __builtin_neon_xi __o; } __bu = { __b }; union { float32x4x4_t __i; __builtin_neon_xi __o; } __rv; - __rv.__o = __builtin_neon_vld4_lanev4sf (__a, __bu.__o, __c); + __rv.__o = __builtin_neon_vld4_lanev4sf ((const __builtin_neon_sf *) __a, __bu.__o, __c); return __rv.__i; } @@ -10063,7 +10063,7 @@ __extension__ static __inline float32x2x4_t __attribute__ ((__always_inline__)) vld4_dup_f32 (const float32_t * __a) { union { float32x2x4_t __i; __builtin_neon_oi __o; } __rv; - __rv.__o = __builtin_neon_vld4_dupv2sf (__a); + __rv.__o = __builtin_neon_vld4_dupv2sf ((const __builtin_neon_sf *) __a); return __rv.__i; } @@ -10148,7 +10148,7 @@ __extension__ static __inline void __attribute__ ((__always_inline__)) vst4_f32 (float32_t * __a, float32x2x4_t __b) { union { float32x2x4_t __i; __builtin_neon_oi __o; } __bu = { __b }; - __builtin_neon_vst4v2sf (__a, __bu.__o); + __builtin_neon_vst4v2sf ((__builtin_neon_sf *) __a, __bu.__o); } __extension__ static __inline void __attribute__ ((__always_inline__)) @@ -10225,7 +10225,7 @@ __extension__ static __inline void __attribute__ ((__always_inline__)) vst4q_f32 (float32_t * __a, float32x4x4_t __b) { union { float32x4x4_t __i; __builtin_neon_xi __o; } __bu = { __b }; - __builtin_neon_vst4v4sf (__a, __bu.__o); + __builtin_neon_vst4v4sf ((__builtin_neon_sf *) __a, __bu.__o); } __extension__ static __inline void __attribute__ ((__always_inline__)) @@ -10288,7 +10288,7 @@ __extension__ static __inline void __attribute__ ((__always_inline__)) vst4_lane_f32 (float32_t * __a, float32x2x4_t __b, const int __c) { union { float32x2x4_t __i; __builtin_neon_oi __o; } __bu = { __b }; - __builtin_neon_vst4_lanev2sf (__a, __bu.__o, __c); + __builtin_neon_vst4_lanev2sf ((__builtin_neon_sf *) __a, __bu.__o, __c); } __extension__ static __inline void __attribute__ ((__always_inline__)) @@ -10344,7 +10344,7 @@ __extension__ static __inline void __attribute__ ((__always_inline__)) vst4q_lane_f32 (float32_t * __a, float32x4x4_t __b, const int __c) { union { float32x4x4_t __i; __builtin_neon_xi __o; } __bu = { __b }; - __builtin_neon_vst4_lanev4sf (__a, __bu.__o, __c); + __builtin_neon_vst4_lanev4sf ((__builtin_neon_sf *) __a, __bu.__o, __c); } __extension__ static __inline void __attribute__ ((__always_inline__)) diff --git a/gcc/config/arm/bpabi.h b/gcc/config/arm/bpabi.h index 8d3afbf55fd..ba206022b75 100644 --- a/gcc/config/arm/bpabi.h +++ b/gcc/config/arm/bpabi.h @@ -30,7 +30,7 @@ /* Section 4.1 of the AAPCS requires the use of VFP format. */ #undef FPUTYPE_DEFAULT -#define FPUTYPE_DEFAULT FPUTYPE_VFP +#define FPUTYPE_DEFAULT "vfp" /* TARGET_BIG_ENDIAN_DEFAULT is set in config.gcc for big endian configurations. */ diff --git a/gcc/config/arm/fpa.md b/gcc/config/arm/fpa.md index fcd92b002d7..515de43d28b 100644 --- a/gcc/config/arm/fpa.md +++ b/gcc/config/arm/fpa.md @@ -599,10 +599,10 @@ { default: case 0: return \"mvf%?e\\t%0, %1\"; - case 1: if (arm_fpu_arch == FPUTYPE_FPA_EMU2) + case 1: if (TARGET_FPA_EMU2) return \"ldf%?e\\t%0, %1\"; return \"lfm%?\\t%0, 1, %1\"; - case 2: if (arm_fpu_arch == FPUTYPE_FPA_EMU2) + case 2: if (TARGET_FPA_EMU2) return \"stf%?e\\t%1, %0\"; return \"sfm%?\\t%1, 1, %0\"; } diff --git a/gcc/config/arm/linux-elf.h b/gcc/config/arm/linux-elf.h index 07455ee87fd..9fdca414e8e 100644 --- a/gcc/config/arm/linux-elf.h +++ b/gcc/config/arm/linux-elf.h @@ -98,7 +98,7 @@ /* NWFPE always understands FPA instructions. */ #undef FPUTYPE_DEFAULT -#define FPUTYPE_DEFAULT FPUTYPE_FPA_EMU3 +#define FPUTYPE_DEFAULT "fpe3" /* Call the function profiler with a given profile label. */ #undef ARM_FUNCTION_PROFILER diff --git a/gcc/config/arm/neon-gen.ml b/gcc/config/arm/neon-gen.ml index 9c8e2a89b86..112c8be6e3b 100644 --- a/gcc/config/arm/neon-gen.ml +++ b/gcc/config/arm/neon-gen.ml @@ -122,6 +122,7 @@ let rec signed_ctype = function | T_uint16 | T_int16 -> T_intHI | T_uint32 | T_int32 -> T_intSI | T_uint64 | T_int64 -> T_intDI + | T_float32 -> T_floatSF | T_poly8 -> T_intQI | T_poly16 -> T_intHI | T_arrayof (n, elt) -> T_arrayof (n, signed_ctype elt) @@ -320,7 +321,7 @@ let deftypes () = typeinfo; Format.print_newline (); (* Extra types not in <stdint.h>. *) - Format.printf "typedef __builtin_neon_sf float32_t;\n"; + Format.printf "typedef float float32_t;\n"; Format.printf "typedef __builtin_neon_poly8 poly8_t;\n"; Format.printf "typedef __builtin_neon_poly16 poly16_t;\n" diff --git a/gcc/config/arm/neon.ml b/gcc/config/arm/neon.ml index 10393b33ebc..114097d22a7 100644 --- a/gcc/config/arm/neon.ml +++ b/gcc/config/arm/neon.ml @@ -50,7 +50,7 @@ type vectype = T_int8x8 | T_int8x16 | T_ptrto of vectype | T_const of vectype | T_void | T_intQI | T_intHI | T_intSI - | T_intDI + | T_intDI | T_floatSF (* The meanings of the following are: TImode : "Tetra", two registers (four words). @@ -1693,6 +1693,7 @@ let string_of_vectype vt = | T_intHI -> "__builtin_neon_hi" | T_intSI -> "__builtin_neon_si" | T_intDI -> "__builtin_neon_di" + | T_floatSF -> "__builtin_neon_sf" | T_arrayof (num, base) -> let basename = name (fun x -> x) base in affix (Printf.sprintf "%sx%d" basename num) diff --git a/gcc/config/arm/netbsd-elf.h b/gcc/config/arm/netbsd-elf.h index 4c06fa1cb3b..9cf186b338d 100644 --- a/gcc/config/arm/netbsd-elf.h +++ b/gcc/config/arm/netbsd-elf.h @@ -153,5 +153,5 @@ do \ while (0) #undef FPUTYPE_DEFAULT -#define FPUTYPE_DEFAULT FPUTYPE_VFP +#define FPUTYPE_DEFAULT "vfp" diff --git a/gcc/config/arm/vxworks.h b/gcc/config/arm/vxworks.h index 8879fedb7d7..aa7e197bc5d 100644 --- a/gcc/config/arm/vxworks.h +++ b/gcc/config/arm/vxworks.h @@ -97,7 +97,7 @@ along with GCC; see the file COPYING3. If not see /* There is no default multilib. */ #undef MULTILIB_DEFAULTS -#define FPUTYPE_DEFAULT FPUTYPE_VFP +#define FPUTYPE_DEFAULT "vfp" #undef FUNCTION_PROFILER #define FUNCTION_PROFILER VXWORKS_FUNCTION_PROFILER diff --git a/gcc/config/avr/avr.h b/gcc/config/avr/avr.h index 0927e3928c1..782ad11627b 100644 --- a/gcc/config/avr/avr.h +++ b/gcc/config/avr/avr.h @@ -406,8 +406,6 @@ extern int avr_reg_order[]; #define HAVE_POST_INCREMENT 1 #define HAVE_PRE_DECREMENT 1 -#define CONSTANT_ADDRESS_P(X) CONSTANT_P (X) - #define MAX_REGS_PER_ADDRESS 1 #define REG_OK_FOR_BASE_NOSTRICT_P(X) \ diff --git a/gcc/config/bfin/bfin.h b/gcc/config/bfin/bfin.h index 03a279036f3..365680ee9fa 100644 --- a/gcc/config/bfin/bfin.h +++ b/gcc/config/bfin/bfin.h @@ -911,9 +911,6 @@ typedef struct { /* Addressing Modes */ -/* Recognize any constant value that is a valid address. */ -#define CONSTANT_ADDRESS_P(X) (CONSTANT_P (X)) - /* Nonzero if the constant value X is a legitimate general operand. symbol_ref are not legitimate and will be put into constant pool. See force_const_mem(). diff --git a/gcc/config/cris/cris.h b/gcc/config/cris/cris.h index 0fea7d77b39..3c426b74ae5 100644 --- a/gcc/config/cris/cris.h +++ b/gcc/config/cris/cris.h @@ -950,8 +950,6 @@ struct cum_args {int regs;}; #define HAVE_POST_INCREMENT 1 -#define CONSTANT_ADDRESS_P(X) CONSTANT_P (X) - /* Must be a compile-time constant, so we go with the highest value among all CRIS variants. */ #define MAX_REGS_PER_ADDRESS 2 diff --git a/gcc/config/fr30/fr30.h b/gcc/config/fr30/fr30.h index 20e157173d8..5e6237895b5 100644 --- a/gcc/config/fr30/fr30.h +++ b/gcc/config/fr30/fr30.h @@ -741,16 +741,6 @@ enum reg_class /*}}}*/ /*{{{ Addressing Modes. */ -/* A C expression that is 1 if the RTX X is a constant which is a valid - address. On most machines, this can be defined as `CONSTANT_P (X)', but a - few machines are more restrictive in which constant addresses are supported. - - `CONSTANT_P' accepts integer-values expressions whose values are not - explicitly known, such as `symbol_ref', `label_ref', and `high' expressions - and `const' arithmetic expressions, in addition to `const_int' and - `const_double' expressions. */ -#define CONSTANT_ADDRESS_P(X) CONSTANT_P (X) - /* A number, the maximum number of registers that can appear in a valid memory address. Note that it is up to you to specify a value equal to the maximum number that `GO_IF_LEGITIMATE_ADDRESS' would ever accept. */ diff --git a/gcc/config/frv/frv.h b/gcc/config/frv/frv.h index d48aa1ef17d..d5a7a4a6670 100644 --- a/gcc/config/frv/frv.h +++ b/gcc/config/frv/frv.h @@ -1927,16 +1927,6 @@ __asm__("\n" \ /* Addressing Modes. */ -/* A C expression that is 1 if the RTX X is a constant which is a valid - address. On most machines, this can be defined as `CONSTANT_P (X)', but a - few machines are more restrictive in which constant addresses are supported. - - `CONSTANT_P' accepts integer-values expressions whose values are not - explicitly known, such as `symbol_ref', `label_ref', and `high' expressions - and `const' arithmetic expressions, in addition to `const_int' and - `const_double' expressions. */ -#define CONSTANT_ADDRESS_P(X) CONSTANT_P (X) - /* A number, the maximum number of registers that can appear in a valid memory address. Note that it is up to you to specify a value equal to the maximum number that `TARGET_LEGITIMATE_ADDRESS_P' would ever accept. */ diff --git a/gcc/config/i386/cpuid.h b/gcc/config/i386/cpuid.h index 49acfa780e4..21f0e3184ef 100644 --- a/gcc/config/i386/cpuid.h +++ b/gcc/config/i386/cpuid.h @@ -46,9 +46,11 @@ /* Extended Features */ /* %ecx */ +#define bit_FMA4 (1 << 16) #define bit_LAHF_LM (1 << 0) +#define bit_LWP (1 << 15) #define bit_SSE4a (1 << 6) -#define bit_FMA4 (1 << 16) +#define bit_XOP (1 << 11) /* %edx */ #define bit_LM (1 << 29) diff --git a/gcc/config/i386/fma4intrin.h b/gcc/config/i386/fma4intrin.h index 42782ade0ed..2bd411a0f05 100644 --- a/gcc/config/i386/fma4intrin.h +++ b/gcc/config/i386/fma4intrin.h @@ -35,15 +35,6 @@ /* We need definitions from the SSE4A, SSE3, SSE2 and SSE header files. */ #include <ammintrin.h> -/* Internal data types for implementing the intrinsics. */ -typedef float __v8sf __attribute__ ((__vector_size__ (32))); -typedef double __v4df __attribute__ ((__vector_size__ (32))); - -typedef float __m256 __attribute__ ((__vector_size__ (32), - __may_alias__)); -typedef double __m256d __attribute__ ((__vector_size__ (32), - __may_alias__)); - /* 128b Floating point multiply/add type instructions. */ extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_macc_ps (__m128 __A, __m128 __B, __m128 __C) diff --git a/gcc/config/i386/i386-c.c b/gcc/config/i386/i386-c.c index 12a3f1759a8..5a5311fba0f 100644 --- a/gcc/config/i386/i386-c.c +++ b/gcc/config/i386/i386-c.c @@ -232,6 +232,10 @@ ix86_target_macros_internal (int isa_flag, def_or_undef (parse_in, "__SSE4A__"); if (isa_flag & OPTION_MASK_ISA_FMA4) def_or_undef (parse_in, "__FMA4__"); + if (isa_flag & OPTION_MASK_ISA_XOP) + def_or_undef (parse_in, "__XOP__"); + if (isa_flag & OPTION_MASK_ISA_LWP) + def_or_undef (parse_in, "__LWP__"); if ((fpmath & FPMATH_SSE) && (isa_flag & OPTION_MASK_ISA_SSE)) def_or_undef (parse_in, "__SSE_MATH__"); if ((fpmath & FPMATH_SSE) && (isa_flag & OPTION_MASK_ISA_SSE2)) diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index c29a7848ae1..2031dfb6e98 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -1964,6 +1964,10 @@ static int ix86_isa_flags_explicit; #define OPTION_MASK_ISA_FMA4_SET \ (OPTION_MASK_ISA_FMA4 | OPTION_MASK_ISA_SSE4A_SET \ | OPTION_MASK_ISA_AVX_SET) +#define OPTION_MASK_ISA_XOP_SET \ + (OPTION_MASK_ISA_XOP | OPTION_MASK_ISA_FMA4_SET) +#define OPTION_MASK_ISA_LWP_SET \ + OPTION_MASK_ISA_LWP /* AES and PCLMUL need SSE2 because they use xmm registers */ #define OPTION_MASK_ISA_AES_SET \ @@ -2015,7 +2019,10 @@ static int ix86_isa_flags_explicit; #define OPTION_MASK_ISA_SSE4A_UNSET \ (OPTION_MASK_ISA_SSE4A | OPTION_MASK_ISA_FMA4_UNSET) -#define OPTION_MASK_ISA_FMA4_UNSET OPTION_MASK_ISA_FMA4 +#define OPTION_MASK_ISA_FMA4_UNSET \ + (OPTION_MASK_ISA_FMA4 | OPTION_MASK_ISA_XOP_UNSET) +#define OPTION_MASK_ISA_XOP_UNSET OPTION_MASK_ISA_XOP +#define OPTION_MASK_ISA_LWP_UNSET OPTION_MASK_ISA_LWP #define OPTION_MASK_ISA_AES_UNSET OPTION_MASK_ISA_AES #define OPTION_MASK_ISA_PCLMUL_UNSET OPTION_MASK_ISA_PCLMUL @@ -2263,6 +2270,32 @@ ix86_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED, int value) } return true; + case OPT_mxop: + if (value) + { + ix86_isa_flags |= OPTION_MASK_ISA_XOP_SET; + ix86_isa_flags_explicit |= OPTION_MASK_ISA_XOP_SET; + } + else + { + ix86_isa_flags &= ~OPTION_MASK_ISA_XOP_UNSET; + ix86_isa_flags_explicit |= OPTION_MASK_ISA_XOP_UNSET; + } + return true; + + case OPT_mlwp: + if (value) + { + ix86_isa_flags |= OPTION_MASK_ISA_LWP_SET; + ix86_isa_flags_explicit |= OPTION_MASK_ISA_LWP_SET; + } + else + { + ix86_isa_flags &= ~OPTION_MASK_ISA_LWP_UNSET; + ix86_isa_flags_explicit |= OPTION_MASK_ISA_LWP_UNSET; + } + return true; + case OPT_mabm: if (value) { @@ -2391,6 +2424,8 @@ ix86_target_string (int isa, int flags, const char *arch, const char *tune, { { "-m64", OPTION_MASK_ISA_64BIT }, { "-mfma4", OPTION_MASK_ISA_FMA4 }, + { "-mxop", OPTION_MASK_ISA_XOP }, + { "-mlwp", OPTION_MASK_ISA_LWP }, { "-msse4a", OPTION_MASK_ISA_SSE4A }, { "-msse4.2", OPTION_MASK_ISA_SSE4_2 }, { "-msse4.1", OPTION_MASK_ISA_SSE4_1 }, @@ -2621,7 +2656,9 @@ override_options (bool main_args_p) PTA_AVX = 1 << 18, PTA_FMA = 1 << 19, PTA_MOVBE = 1 << 20, - PTA_FMA4 = 1 << 21 + PTA_FMA4 = 1 << 21, + PTA_XOP = 1 << 22, + PTA_LWP = 1 << 23 }; static struct pta @@ -2967,6 +3004,12 @@ override_options (bool main_args_p) if (processor_alias_table[i].flags & PTA_FMA4 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA4)) ix86_isa_flags |= OPTION_MASK_ISA_FMA4; + if (processor_alias_table[i].flags & PTA_XOP + && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_XOP)) + ix86_isa_flags |= OPTION_MASK_ISA_XOP; + if (processor_alias_table[i].flags & PTA_LWP + && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_LWP)) + ix86_isa_flags |= OPTION_MASK_ISA_LWP; if (processor_alias_table[i].flags & PTA_ABM && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_ABM)) ix86_isa_flags |= OPTION_MASK_ISA_ABM; @@ -3649,6 +3692,8 @@ ix86_valid_target_attribute_inner_p (tree args, char *p_strings[]) IX86_ATTR_ISA ("sse4a", OPT_msse4a), IX86_ATTR_ISA ("ssse3", OPT_mssse3), IX86_ATTR_ISA ("fma4", OPT_mfma4), + IX86_ATTR_ISA ("xop", OPT_mxop), + IX86_ATTR_ISA ("lwp", OPT_mlwp), /* string options */ IX86_ATTR_STR ("arch=", IX86_FUNCTION_SPECIFIC_ARCH), @@ -11290,6 +11335,7 @@ get_some_local_dynamic_name (void) X -- don't print any sort of PIC '@' suffix for a symbol. & -- print some in-use local-dynamic symbol name. H -- print a memory address offset by 8; used for sse high-parts + Y -- print condition for XOP pcom* instruction. + -- print a branch hint as 'cs' or 'ds' prefix ; -- print a semicolon (after prefixes due to bug in older gas). */ @@ -11707,6 +11753,61 @@ print_operand (FILE *file, rtx x, int code) return; } + case 'Y': + switch (GET_CODE (x)) + { + case NE: + fputs ("neq", file); + break; + case EQ: + fputs ("eq", file); + break; + case GE: + case GEU: + fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "ge" : "unlt", file); + break; + case GT: + case GTU: + fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "gt" : "unle", file); + break; + case LE: + case LEU: + fputs ("le", file); + break; + case LT: + case LTU: + fputs ("lt", file); + break; + case UNORDERED: + fputs ("unord", file); + break; + case ORDERED: + fputs ("ord", file); + break; + case UNEQ: + fputs ("ueq", file); + break; + case UNGE: + fputs ("nlt", file); + break; + case UNGT: + fputs ("nle", file); + break; + case UNLE: + fputs ("ule", file); + break; + case UNLT: + fputs ("ult", file); + break; + case LTGT: + fputs ("une", file); + break; + default: + output_operand_lossage ("operand is not a condition code, invalid operand code 'D'"); + return; + } + return; + case ';': #if TARGET_MACHO fputs (" ; ", file); @@ -15916,6 +16017,14 @@ ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false) x = gen_rtx_AND (mode, x, op_false); emit_insn (gen_rtx_SET (VOIDmode, dest, x)); } + else if (TARGET_XOP) + { + rtx pcmov = gen_rtx_SET (mode, dest, + gen_rtx_IF_THEN_ELSE (mode, cmp, + op_true, + op_false)); + emit_insn (pcmov); + } else { op_true = force_reg (mode, op_true); @@ -16038,6 +16147,9 @@ ix86_expand_int_vcond (rtx operands[]) cop0 = operands[4]; cop1 = operands[5]; + /* XOP supports all of the comparisons on all vector int types. */ + if (!TARGET_XOP) + { /* Canonicalize the comparison to EQ, GT, GTU. */ switch (code) { @@ -16148,6 +16260,7 @@ ix86_expand_int_vcond (rtx operands[]) cop0 = x; cop1 = CONST0_RTX (mode); } + } x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1, operands[1+negate], operands[2-negate]); @@ -20806,7 +20919,7 @@ enum ix86_builtins IX86_BUILTIN_CVTUDQ2PS, - /* FMA4 instructions. */ + /* FMA4 and XOP instructions. */ IX86_BUILTIN_VFMADDSS, IX86_BUILTIN_VFMADDSD, IX86_BUILTIN_VFMADDPS, @@ -20839,6 +20952,164 @@ enum ix86_builtins IX86_BUILTIN_VFNMADDPD256, IX86_BUILTIN_VFNMSUBPS256, IX86_BUILTIN_VFNMSUBPD256, + + IX86_BUILTIN_VPCMOV, + IX86_BUILTIN_VPCMOV_V2DI, + IX86_BUILTIN_VPCMOV_V4SI, + IX86_BUILTIN_VPCMOV_V8HI, + IX86_BUILTIN_VPCMOV_V16QI, + IX86_BUILTIN_VPCMOV_V4SF, + IX86_BUILTIN_VPCMOV_V2DF, + IX86_BUILTIN_VPCMOV256, + IX86_BUILTIN_VPCMOV_V4DI256, + IX86_BUILTIN_VPCMOV_V8SI256, + IX86_BUILTIN_VPCMOV_V16HI256, + IX86_BUILTIN_VPCMOV_V32QI256, + IX86_BUILTIN_VPCMOV_V8SF256, + IX86_BUILTIN_VPCMOV_V4DF256, + + IX86_BUILTIN_VPPERM, + + IX86_BUILTIN_VPMACSSWW, + IX86_BUILTIN_VPMACSWW, + IX86_BUILTIN_VPMACSSWD, + IX86_BUILTIN_VPMACSWD, + IX86_BUILTIN_VPMACSSDD, + IX86_BUILTIN_VPMACSDD, + IX86_BUILTIN_VPMACSSDQL, + IX86_BUILTIN_VPMACSSDQH, + IX86_BUILTIN_VPMACSDQL, + IX86_BUILTIN_VPMACSDQH, + IX86_BUILTIN_VPMADCSSWD, + IX86_BUILTIN_VPMADCSWD, + + IX86_BUILTIN_VPHADDBW, + IX86_BUILTIN_VPHADDBD, + IX86_BUILTIN_VPHADDBQ, + IX86_BUILTIN_VPHADDWD, + IX86_BUILTIN_VPHADDWQ, + IX86_BUILTIN_VPHADDDQ, + IX86_BUILTIN_VPHADDUBW, + IX86_BUILTIN_VPHADDUBD, + IX86_BUILTIN_VPHADDUBQ, + IX86_BUILTIN_VPHADDUWD, + IX86_BUILTIN_VPHADDUWQ, + IX86_BUILTIN_VPHADDUDQ, + IX86_BUILTIN_VPHSUBBW, + IX86_BUILTIN_VPHSUBWD, + IX86_BUILTIN_VPHSUBDQ, + + IX86_BUILTIN_VPROTB, + IX86_BUILTIN_VPROTW, + IX86_BUILTIN_VPROTD, + IX86_BUILTIN_VPROTQ, + IX86_BUILTIN_VPROTB_IMM, + IX86_BUILTIN_VPROTW_IMM, + IX86_BUILTIN_VPROTD_IMM, + IX86_BUILTIN_VPROTQ_IMM, + + IX86_BUILTIN_VPSHLB, + IX86_BUILTIN_VPSHLW, + IX86_BUILTIN_VPSHLD, + IX86_BUILTIN_VPSHLQ, + IX86_BUILTIN_VPSHAB, + IX86_BUILTIN_VPSHAW, + IX86_BUILTIN_VPSHAD, + IX86_BUILTIN_VPSHAQ, + + IX86_BUILTIN_VFRCZSS, + IX86_BUILTIN_VFRCZSD, + IX86_BUILTIN_VFRCZPS, + IX86_BUILTIN_VFRCZPD, + IX86_BUILTIN_VFRCZPS256, + IX86_BUILTIN_VFRCZPD256, + + IX86_BUILTIN_VPCOMEQUB, + IX86_BUILTIN_VPCOMNEUB, + IX86_BUILTIN_VPCOMLTUB, + IX86_BUILTIN_VPCOMLEUB, + IX86_BUILTIN_VPCOMGTUB, + IX86_BUILTIN_VPCOMGEUB, + IX86_BUILTIN_VPCOMFALSEUB, + IX86_BUILTIN_VPCOMTRUEUB, + + IX86_BUILTIN_VPCOMEQUW, + IX86_BUILTIN_VPCOMNEUW, + IX86_BUILTIN_VPCOMLTUW, + IX86_BUILTIN_VPCOMLEUW, + IX86_BUILTIN_VPCOMGTUW, + IX86_BUILTIN_VPCOMGEUW, + IX86_BUILTIN_VPCOMFALSEUW, + IX86_BUILTIN_VPCOMTRUEUW, + + IX86_BUILTIN_VPCOMEQUD, + IX86_BUILTIN_VPCOMNEUD, + IX86_BUILTIN_VPCOMLTUD, + IX86_BUILTIN_VPCOMLEUD, + IX86_BUILTIN_VPCOMGTUD, + IX86_BUILTIN_VPCOMGEUD, + IX86_BUILTIN_VPCOMFALSEUD, + IX86_BUILTIN_VPCOMTRUEUD, + + IX86_BUILTIN_VPCOMEQUQ, + IX86_BUILTIN_VPCOMNEUQ, + IX86_BUILTIN_VPCOMLTUQ, + IX86_BUILTIN_VPCOMLEUQ, + IX86_BUILTIN_VPCOMGTUQ, + IX86_BUILTIN_VPCOMGEUQ, + IX86_BUILTIN_VPCOMFALSEUQ, + IX86_BUILTIN_VPCOMTRUEUQ, + + IX86_BUILTIN_VPCOMEQB, + IX86_BUILTIN_VPCOMNEB, + IX86_BUILTIN_VPCOMLTB, + IX86_BUILTIN_VPCOMLEB, + IX86_BUILTIN_VPCOMGTB, + IX86_BUILTIN_VPCOMGEB, + IX86_BUILTIN_VPCOMFALSEB, + IX86_BUILTIN_VPCOMTRUEB, + + IX86_BUILTIN_VPCOMEQW, + IX86_BUILTIN_VPCOMNEW, + IX86_BUILTIN_VPCOMLTW, + IX86_BUILTIN_VPCOMLEW, + IX86_BUILTIN_VPCOMGTW, + IX86_BUILTIN_VPCOMGEW, + IX86_BUILTIN_VPCOMFALSEW, + IX86_BUILTIN_VPCOMTRUEW, + + IX86_BUILTIN_VPCOMEQD, + IX86_BUILTIN_VPCOMNED, + IX86_BUILTIN_VPCOMLTD, + IX86_BUILTIN_VPCOMLED, + IX86_BUILTIN_VPCOMGTD, + IX86_BUILTIN_VPCOMGED, + IX86_BUILTIN_VPCOMFALSED, + IX86_BUILTIN_VPCOMTRUED, + + IX86_BUILTIN_VPCOMEQQ, + IX86_BUILTIN_VPCOMNEQ, + IX86_BUILTIN_VPCOMLTQ, + IX86_BUILTIN_VPCOMLEQ, + IX86_BUILTIN_VPCOMGTQ, + IX86_BUILTIN_VPCOMGEQ, + IX86_BUILTIN_VPCOMFALSEQ, + IX86_BUILTIN_VPCOMTRUEQ, + + /* LWP instructions. */ + IX86_BUILTIN_LLWPCB16, + IX86_BUILTIN_LLWPCB32, + IX86_BUILTIN_LLWPCB64, + IX86_BUILTIN_SLWPCB16, + IX86_BUILTIN_SLWPCB32, + IX86_BUILTIN_SLWPCB64, + IX86_BUILTIN_LWPVAL16, + IX86_BUILTIN_LWPVAL32, + IX86_BUILTIN_LWPVAL64, + IX86_BUILTIN_LWPINS16, + IX86_BUILTIN_LWPINS32, + IX86_BUILTIN_LWPINS64, + IX86_BUILTIN_MAX }; @@ -21052,7 +21323,13 @@ enum ix86_special_builtin_type VOID_FTYPE_PV8SF_V8SF_V8SF, VOID_FTYPE_PV4DF_V4DF_V4DF, VOID_FTYPE_PV4SF_V4SF_V4SF, - VOID_FTYPE_PV2DF_V2DF_V2DF + VOID_FTYPE_PV2DF_V2DF_V2DF, + VOID_FTYPE_USHORT_UINT_USHORT, + VOID_FTYPE_UINT_UINT_UINT, + VOID_FTYPE_UINT64_UINT_UINT, + UCHAR_FTYPE_USHORT_UINT_USHORT, + UCHAR_FTYPE_UINT_UINT_UINT, + UCHAR_FTYPE_UINT64_UINT_UINT }; /* Builtin types */ @@ -21299,6 +21576,22 @@ static const struct builtin_description bdesc_special_args[] = { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps, "__builtin_ia32_maskstoreps", IX86_BUILTIN_MASKSTOREPS, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SF_V4SF }, { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd256, "__builtin_ia32_maskstorepd256", IX86_BUILTIN_MASKSTOREPD256, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DF_V4DF }, { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps256, "__builtin_ia32_maskstoreps256", IX86_BUILTIN_MASKSTOREPS256, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SF_V8SF }, + + { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_llwpcbhi1, "__builtin_ia32_llwpcb16", IX86_BUILTIN_LLWPCB16, UNKNOWN, (int) VOID_FTYPE_VOID }, + { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_llwpcbsi1, "__builtin_ia32_llwpcb32", IX86_BUILTIN_LLWPCB32, UNKNOWN, (int) VOID_FTYPE_VOID }, + { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_llwpcbdi1, "__builtin_ia32_llwpcb64", IX86_BUILTIN_LLWPCB64, UNKNOWN, (int) VOID_FTYPE_VOID }, + + { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_slwpcbhi1, "__builtin_ia32_slwpcb16", IX86_BUILTIN_SLWPCB16, UNKNOWN, (int) VOID_FTYPE_VOID }, + { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_slwpcbsi1, "__builtin_ia32_slwpcb32", IX86_BUILTIN_SLWPCB32, UNKNOWN, (int) VOID_FTYPE_VOID }, + { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_slwpcbdi1, "__builtin_ia32_slwpcb64", IX86_BUILTIN_SLWPCB64, UNKNOWN, (int) VOID_FTYPE_VOID }, + + { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvalhi3, "__builtin_ia32_lwpval16", IX86_BUILTIN_LWPVAL16, UNKNOWN, (int) VOID_FTYPE_USHORT_UINT_USHORT }, + { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvalsi3, "__builtin_ia32_lwpval32", IX86_BUILTIN_LWPVAL64, UNKNOWN, (int) VOID_FTYPE_UINT_UINT_UINT }, + { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvaldi3, "__builtin_ia32_lwpval64", IX86_BUILTIN_LWPVAL64, UNKNOWN, (int) VOID_FTYPE_UINT64_UINT_UINT }, + { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinshi3, "__builtin_ia32_lwpins16", IX86_BUILTIN_LWPINS16, UNKNOWN, (int) UCHAR_FTYPE_USHORT_UINT_USHORT }, + { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinssi3, "__builtin_ia32_lwpins32", IX86_BUILTIN_LWPINS64, UNKNOWN, (int) UCHAR_FTYPE_UINT_UINT_UINT }, + { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinsdi3, "__builtin_ia32_lwpins64", IX86_BUILTIN_LWPINS64, UNKNOWN, (int) UCHAR_FTYPE_UINT64_UINT_UINT }, + }; /* Builtins with variable number of arguments. */ @@ -21912,13 +22205,58 @@ static const struct builtin_description bdesc_args[] = { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskps256, "__builtin_ia32_movmskps256", IX86_BUILTIN_MOVMSKPS256, UNKNOWN, (int) INT_FTYPE_V8SF }, }; -/* FMA4. */ +/* FMA4 and XOP. */ enum multi_arg_type { MULTI_ARG_UNKNOWN, MULTI_ARG_3_SF, MULTI_ARG_3_DF, MULTI_ARG_3_SF2, - MULTI_ARG_3_DF2 + MULTI_ARG_3_DF2, + MULTI_ARG_3_DI, + MULTI_ARG_3_SI, + MULTI_ARG_3_SI_DI, + MULTI_ARG_3_HI, + MULTI_ARG_3_HI_SI, + MULTI_ARG_3_QI, + MULTI_ARG_3_DI2, + MULTI_ARG_3_SI2, + MULTI_ARG_3_HI2, + MULTI_ARG_3_QI2, + MULTI_ARG_2_SF, + MULTI_ARG_2_DF, + MULTI_ARG_2_DI, + MULTI_ARG_2_SI, + MULTI_ARG_2_HI, + MULTI_ARG_2_QI, + MULTI_ARG_2_DI_IMM, + MULTI_ARG_2_SI_IMM, + MULTI_ARG_2_HI_IMM, + MULTI_ARG_2_QI_IMM, + MULTI_ARG_2_DI_CMP, + MULTI_ARG_2_SI_CMP, + MULTI_ARG_2_HI_CMP, + MULTI_ARG_2_QI_CMP, + MULTI_ARG_2_DI_TF, + MULTI_ARG_2_SI_TF, + MULTI_ARG_2_HI_TF, + MULTI_ARG_2_QI_TF, + MULTI_ARG_2_SF_TF, + MULTI_ARG_2_DF_TF, + MULTI_ARG_1_SF, + MULTI_ARG_1_DF, + MULTI_ARG_1_SF2, + MULTI_ARG_1_DF2, + MULTI_ARG_1_DI, + MULTI_ARG_1_SI, + MULTI_ARG_1_HI, + MULTI_ARG_1_QI, + MULTI_ARG_1_SI_DI, + MULTI_ARG_1_HI_DI, + MULTI_ARG_1_HI_SI, + MULTI_ARG_1_QI_DI, + MULTI_ARG_1_QI_SI, + MULTI_ARG_1_QI_HI + }; static const struct builtin_description bdesc_multi_arg[] = @@ -21959,7 +22297,160 @@ static const struct builtin_description bdesc_multi_arg[] = { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmaddsubv8sf4, "__builtin_ia32_vfmaddsubps256", IX86_BUILTIN_VFMADDSUBPS256, UNKNOWN, (int)MULTI_ARG_3_SF2 }, { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmaddsubv4df4, "__builtin_ia32_vfmaddsubpd256", IX86_BUILTIN_VFMADDSUBPD256, UNKNOWN, (int)MULTI_ARG_3_DF2 }, { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmsubaddv8sf4, "__builtin_ia32_vfmsubaddps256", IX86_BUILTIN_VFMSUBADDPS256, UNKNOWN, (int)MULTI_ARG_3_SF2 }, - { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmsubaddv4df4, "__builtin_ia32_vfmsubaddpd256", IX86_BUILTIN_VFMSUBADDPD256, UNKNOWN, (int)MULTI_ARG_3_DF2 } + { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmsubaddv4df4, "__builtin_ia32_vfmsubaddpd256", IX86_BUILTIN_VFMSUBADDPD256, UNKNOWN, (int)MULTI_ARG_3_DF2 }, + + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di, "__builtin_ia32_vpcmov", IX86_BUILTIN_VPCMOV, UNKNOWN, (int)MULTI_ARG_3_DI }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di, "__builtin_ia32_vpcmov_v2di", IX86_BUILTIN_VPCMOV_V2DI, UNKNOWN, (int)MULTI_ARG_3_DI }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4si, "__builtin_ia32_vpcmov_v4si", IX86_BUILTIN_VPCMOV_V4SI, UNKNOWN, (int)MULTI_ARG_3_SI }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8hi, "__builtin_ia32_vpcmov_v8hi", IX86_BUILTIN_VPCMOV_V8HI, UNKNOWN, (int)MULTI_ARG_3_HI }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v16qi, "__builtin_ia32_vpcmov_v16qi",IX86_BUILTIN_VPCMOV_V16QI,UNKNOWN, (int)MULTI_ARG_3_QI }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2df, "__builtin_ia32_vpcmov_v2df", IX86_BUILTIN_VPCMOV_V2DF, UNKNOWN, (int)MULTI_ARG_3_DF }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4sf, "__builtin_ia32_vpcmov_v4sf", IX86_BUILTIN_VPCMOV_V4SF, UNKNOWN, (int)MULTI_ARG_3_SF }, + + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4di256, "__builtin_ia32_vpcmov256", IX86_BUILTIN_VPCMOV256, UNKNOWN, (int)MULTI_ARG_3_DI2 }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4di256, "__builtin_ia32_vpcmov_v4di256", IX86_BUILTIN_VPCMOV_V4DI256, UNKNOWN, (int)MULTI_ARG_3_DI2 }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8si256, "__builtin_ia32_vpcmov_v8si256", IX86_BUILTIN_VPCMOV_V8SI256, UNKNOWN, (int)MULTI_ARG_3_SI2 }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v16hi256, "__builtin_ia32_vpcmov_v16hi256", IX86_BUILTIN_VPCMOV_V16HI256, UNKNOWN, (int)MULTI_ARG_3_HI2 }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v32qi256, "__builtin_ia32_vpcmov_v32qi256", IX86_BUILTIN_VPCMOV_V32QI256, UNKNOWN, (int)MULTI_ARG_3_QI2 }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4df256, "__builtin_ia32_vpcmov_v4df256", IX86_BUILTIN_VPCMOV_V4DF256, UNKNOWN, (int)MULTI_ARG_3_DF2 }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8sf256, "__builtin_ia32_vpcmov_v8sf256", IX86_BUILTIN_VPCMOV_V8SF256, UNKNOWN, (int)MULTI_ARG_3_SF2 }, + + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pperm, "__builtin_ia32_vpperm", IX86_BUILTIN_VPPERM, UNKNOWN, (int)MULTI_ARG_3_QI }, + + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssww, "__builtin_ia32_vpmacssww", IX86_BUILTIN_VPMACSSWW, UNKNOWN, (int)MULTI_ARG_3_HI }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsww, "__builtin_ia32_vpmacsww", IX86_BUILTIN_VPMACSWW, UNKNOWN, (int)MULTI_ARG_3_HI }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsswd, "__builtin_ia32_vpmacsswd", IX86_BUILTIN_VPMACSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacswd, "__builtin_ia32_vpmacswd", IX86_BUILTIN_VPMACSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdd, "__builtin_ia32_vpmacssdd", IX86_BUILTIN_VPMACSSDD, UNKNOWN, (int)MULTI_ARG_3_SI }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdd, "__builtin_ia32_vpmacsdd", IX86_BUILTIN_VPMACSDD, UNKNOWN, (int)MULTI_ARG_3_SI }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdql, "__builtin_ia32_vpmacssdql", IX86_BUILTIN_VPMACSSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdqh, "__builtin_ia32_vpmacssdqh", IX86_BUILTIN_VPMACSSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdql, "__builtin_ia32_vpmacsdql", IX86_BUILTIN_VPMACSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdqh, "__builtin_ia32_vpmacsdqh", IX86_BUILTIN_VPMACSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmadcsswd, "__builtin_ia32_vpmadcsswd", IX86_BUILTIN_VPMADCSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmadcswd, "__builtin_ia32_vpmadcswd", IX86_BUILTIN_VPMADCSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI }, + + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv2di3, "__builtin_ia32_vprotq", IX86_BUILTIN_VPROTQ, UNKNOWN, (int)MULTI_ARG_2_DI }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv4si3, "__builtin_ia32_vprotd", IX86_BUILTIN_VPROTD, UNKNOWN, (int)MULTI_ARG_2_SI }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv8hi3, "__builtin_ia32_vprotw", IX86_BUILTIN_VPROTW, UNKNOWN, (int)MULTI_ARG_2_HI }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv16qi3, "__builtin_ia32_vprotb", IX86_BUILTIN_VPROTB, UNKNOWN, (int)MULTI_ARG_2_QI }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv2di3, "__builtin_ia32_vprotqi", IX86_BUILTIN_VPROTQ_IMM, UNKNOWN, (int)MULTI_ARG_2_DI_IMM }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv4si3, "__builtin_ia32_vprotdi", IX86_BUILTIN_VPROTD_IMM, UNKNOWN, (int)MULTI_ARG_2_SI_IMM }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv8hi3, "__builtin_ia32_vprotwi", IX86_BUILTIN_VPROTW_IMM, UNKNOWN, (int)MULTI_ARG_2_HI_IMM }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv16qi3, "__builtin_ia32_vprotbi", IX86_BUILTIN_VPROTB_IMM, UNKNOWN, (int)MULTI_ARG_2_QI_IMM }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_ashlv2di3, "__builtin_ia32_vpshaq", IX86_BUILTIN_VPSHAQ, UNKNOWN, (int)MULTI_ARG_2_DI }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_ashlv4si3, "__builtin_ia32_vpshad", IX86_BUILTIN_VPSHAD, UNKNOWN, (int)MULTI_ARG_2_SI }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_ashlv8hi3, "__builtin_ia32_vpshaw", IX86_BUILTIN_VPSHAW, UNKNOWN, (int)MULTI_ARG_2_HI }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_ashlv16qi3, "__builtin_ia32_vpshab", IX86_BUILTIN_VPSHAB, UNKNOWN, (int)MULTI_ARG_2_QI }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_lshlv2di3, "__builtin_ia32_vpshlq", IX86_BUILTIN_VPSHLQ, UNKNOWN, (int)MULTI_ARG_2_DI }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_lshlv4si3, "__builtin_ia32_vpshld", IX86_BUILTIN_VPSHLD, UNKNOWN, (int)MULTI_ARG_2_SI }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_lshlv8hi3, "__builtin_ia32_vpshlw", IX86_BUILTIN_VPSHLW, UNKNOWN, (int)MULTI_ARG_2_HI }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_lshlv16qi3, "__builtin_ia32_vpshlb", IX86_BUILTIN_VPSHLB, UNKNOWN, (int)MULTI_ARG_2_QI }, + + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv4sf2, "__builtin_ia32_vfrczss", IX86_BUILTIN_VFRCZSS, UNKNOWN, (int)MULTI_ARG_2_SF }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv2df2, "__builtin_ia32_vfrczsd", IX86_BUILTIN_VFRCZSD, UNKNOWN, (int)MULTI_ARG_2_DF }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4sf2, "__builtin_ia32_vfrczps", IX86_BUILTIN_VFRCZPS, UNKNOWN, (int)MULTI_ARG_1_SF }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv2df2, "__builtin_ia32_vfrczpd", IX86_BUILTIN_VFRCZPD, UNKNOWN, (int)MULTI_ARG_1_DF }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv8sf2256, "__builtin_ia32_vfrczps256", IX86_BUILTIN_VFRCZPS256, UNKNOWN, (int)MULTI_ARG_1_SF2 }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4df2256, "__builtin_ia32_vfrczpd256", IX86_BUILTIN_VFRCZPD256, UNKNOWN, (int)MULTI_ARG_1_DF2 }, + + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbw, "__builtin_ia32_vphaddbw", IX86_BUILTIN_VPHADDBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbd, "__builtin_ia32_vphaddbd", IX86_BUILTIN_VPHADDBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbq, "__builtin_ia32_vphaddbq", IX86_BUILTIN_VPHADDBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddwd, "__builtin_ia32_vphaddwd", IX86_BUILTIN_VPHADDWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddwq, "__builtin_ia32_vphaddwq", IX86_BUILTIN_VPHADDWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadddq, "__builtin_ia32_vphadddq", IX86_BUILTIN_VPHADDDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubw, "__builtin_ia32_vphaddubw", IX86_BUILTIN_VPHADDUBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubd, "__builtin_ia32_vphaddubd", IX86_BUILTIN_VPHADDUBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubq, "__builtin_ia32_vphaddubq", IX86_BUILTIN_VPHADDUBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadduwd, "__builtin_ia32_vphadduwd", IX86_BUILTIN_VPHADDUWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadduwq, "__builtin_ia32_vphadduwq", IX86_BUILTIN_VPHADDUWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddudq, "__builtin_ia32_vphaddudq", IX86_BUILTIN_VPHADDUDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubbw, "__builtin_ia32_vphsubbw", IX86_BUILTIN_VPHSUBBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubwd, "__builtin_ia32_vphsubwd", IX86_BUILTIN_VPHSUBWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubdq, "__builtin_ia32_vphsubdq", IX86_BUILTIN_VPHSUBDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI }, + + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomeqb", IX86_BUILTIN_VPCOMEQB, EQ, (int)MULTI_ARG_2_QI_CMP }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomneb", IX86_BUILTIN_VPCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomneqb", IX86_BUILTIN_VPCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomltb", IX86_BUILTIN_VPCOMLTB, LT, (int)MULTI_ARG_2_QI_CMP }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomleb", IX86_BUILTIN_VPCOMLEB, LE, (int)MULTI_ARG_2_QI_CMP }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomgtb", IX86_BUILTIN_VPCOMGTB, GT, (int)MULTI_ARG_2_QI_CMP }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomgeb", IX86_BUILTIN_VPCOMGEB, GE, (int)MULTI_ARG_2_QI_CMP }, + + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomeqw", IX86_BUILTIN_VPCOMEQW, EQ, (int)MULTI_ARG_2_HI_CMP }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomnew", IX86_BUILTIN_VPCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomneqw", IX86_BUILTIN_VPCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomltw", IX86_BUILTIN_VPCOMLTW, LT, (int)MULTI_ARG_2_HI_CMP }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomlew", IX86_BUILTIN_VPCOMLEW, LE, (int)MULTI_ARG_2_HI_CMP }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomgtw", IX86_BUILTIN_VPCOMGTW, GT, (int)MULTI_ARG_2_HI_CMP }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomgew", IX86_BUILTIN_VPCOMGEW, GE, (int)MULTI_ARG_2_HI_CMP }, + + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomeqd", IX86_BUILTIN_VPCOMEQD, EQ, (int)MULTI_ARG_2_SI_CMP }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomned", IX86_BUILTIN_VPCOMNED, NE, (int)MULTI_ARG_2_SI_CMP }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomneqd", IX86_BUILTIN_VPCOMNED, NE, (int)MULTI_ARG_2_SI_CMP }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomltd", IX86_BUILTIN_VPCOMLTD, LT, (int)MULTI_ARG_2_SI_CMP }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomled", IX86_BUILTIN_VPCOMLED, LE, (int)MULTI_ARG_2_SI_CMP }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomgtd", IX86_BUILTIN_VPCOMGTD, GT, (int)MULTI_ARG_2_SI_CMP }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomged", IX86_BUILTIN_VPCOMGED, GE, (int)MULTI_ARG_2_SI_CMP }, + + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomeqq", IX86_BUILTIN_VPCOMEQQ, EQ, (int)MULTI_ARG_2_DI_CMP }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomneq", IX86_BUILTIN_VPCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomneqq", IX86_BUILTIN_VPCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomltq", IX86_BUILTIN_VPCOMLTQ, LT, (int)MULTI_ARG_2_DI_CMP }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomleq", IX86_BUILTIN_VPCOMLEQ, LE, (int)MULTI_ARG_2_DI_CMP }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomgtq", IX86_BUILTIN_VPCOMGTQ, GT, (int)MULTI_ARG_2_DI_CMP }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomgeq", IX86_BUILTIN_VPCOMGEQ, GE, (int)MULTI_ARG_2_DI_CMP }, + + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomequb", IX86_BUILTIN_VPCOMEQUB, EQ, (int)MULTI_ARG_2_QI_CMP }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomneub", IX86_BUILTIN_VPCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomnequb", IX86_BUILTIN_VPCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomltub", IX86_BUILTIN_VPCOMLTUB, LTU, (int)MULTI_ARG_2_QI_CMP }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomleub", IX86_BUILTIN_VPCOMLEUB, LEU, (int)MULTI_ARG_2_QI_CMP }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomgtub", IX86_BUILTIN_VPCOMGTUB, GTU, (int)MULTI_ARG_2_QI_CMP }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomgeub", IX86_BUILTIN_VPCOMGEUB, GEU, (int)MULTI_ARG_2_QI_CMP }, + + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomequw", IX86_BUILTIN_VPCOMEQUW, EQ, (int)MULTI_ARG_2_HI_CMP }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomneuw", IX86_BUILTIN_VPCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomnequw", IX86_BUILTIN_VPCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomltuw", IX86_BUILTIN_VPCOMLTUW, LTU, (int)MULTI_ARG_2_HI_CMP }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomleuw", IX86_BUILTIN_VPCOMLEUW, LEU, (int)MULTI_ARG_2_HI_CMP }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomgtuw", IX86_BUILTIN_VPCOMGTUW, GTU, (int)MULTI_ARG_2_HI_CMP }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomgeuw", IX86_BUILTIN_VPCOMGEUW, GEU, (int)MULTI_ARG_2_HI_CMP }, + + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomequd", IX86_BUILTIN_VPCOMEQUD, EQ, (int)MULTI_ARG_2_SI_CMP }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomneud", IX86_BUILTIN_VPCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomnequd", IX86_BUILTIN_VPCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomltud", IX86_BUILTIN_VPCOMLTUD, LTU, (int)MULTI_ARG_2_SI_CMP }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomleud", IX86_BUILTIN_VPCOMLEUD, LEU, (int)MULTI_ARG_2_SI_CMP }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomgtud", IX86_BUILTIN_VPCOMGTUD, GTU, (int)MULTI_ARG_2_SI_CMP }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomgeud", IX86_BUILTIN_VPCOMGEUD, GEU, (int)MULTI_ARG_2_SI_CMP }, + + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomequq", IX86_BUILTIN_VPCOMEQUQ, EQ, (int)MULTI_ARG_2_DI_CMP }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomneuq", IX86_BUILTIN_VPCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomnequq", IX86_BUILTIN_VPCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomltuq", IX86_BUILTIN_VPCOMLTUQ, LTU, (int)MULTI_ARG_2_DI_CMP }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomleuq", IX86_BUILTIN_VPCOMLEUQ, LEU, (int)MULTI_ARG_2_DI_CMP }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomgtuq", IX86_BUILTIN_VPCOMGTUQ, GTU, (int)MULTI_ARG_2_DI_CMP }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomgeuq", IX86_BUILTIN_VPCOMGEUQ, GEU, (int)MULTI_ARG_2_DI_CMP }, + + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomfalseb", IX86_BUILTIN_VPCOMFALSEB, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomfalsew", IX86_BUILTIN_VPCOMFALSEW, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomfalsed", IX86_BUILTIN_VPCOMFALSED, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomfalseq", IX86_BUILTIN_VPCOMFALSEQ, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomfalseub",IX86_BUILTIN_VPCOMFALSEUB,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomfalseuw",IX86_BUILTIN_VPCOMFALSEUW,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomfalseud",IX86_BUILTIN_VPCOMFALSEUD,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomfalseuq",IX86_BUILTIN_VPCOMFALSEUQ,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF }, + + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomtrueb", IX86_BUILTIN_VPCOMTRUEB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomtruew", IX86_BUILTIN_VPCOMTRUEW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomtrued", IX86_BUILTIN_VPCOMTRUED, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomtrueq", IX86_BUILTIN_VPCOMTRUEQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomtrueub", IX86_BUILTIN_VPCOMTRUEUB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomtrueuw", IX86_BUILTIN_VPCOMTRUEUW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomtrueud", IX86_BUILTIN_VPCOMTRUEUD, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomtrueuq", IX86_BUILTIN_VPCOMTRUEUQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF }, }; @@ -22341,51 +22832,6 @@ ix86_init_mmx_sse_builtins (void) integer_type_node, NULL_TREE); - - tree v2di_ftype_v2di - = build_function_type_list (V2DI_type_node, V2DI_type_node, NULL_TREE); - - tree v16qi_ftype_v8hi_v8hi - = build_function_type_list (V16QI_type_node, - V8HI_type_node, V8HI_type_node, - NULL_TREE); - tree v8hi_ftype_v4si_v4si - = build_function_type_list (V8HI_type_node, - V4SI_type_node, V4SI_type_node, - NULL_TREE); - tree v8hi_ftype_v16qi_v16qi - = build_function_type_list (V8HI_type_node, - V16QI_type_node, V16QI_type_node, - NULL_TREE); - tree v4hi_ftype_v8qi_v8qi - = build_function_type_list (V4HI_type_node, - V8QI_type_node, V8QI_type_node, - NULL_TREE); - tree unsigned_ftype_unsigned_uchar - = build_function_type_list (unsigned_type_node, - unsigned_type_node, - unsigned_char_type_node, - NULL_TREE); - tree unsigned_ftype_unsigned_ushort - = build_function_type_list (unsigned_type_node, - unsigned_type_node, - short_unsigned_type_node, - NULL_TREE); - tree unsigned_ftype_unsigned_unsigned - = build_function_type_list (unsigned_type_node, - unsigned_type_node, - unsigned_type_node, - NULL_TREE); - tree uint64_ftype_uint64_uint64 - = build_function_type_list (long_long_unsigned_type_node, - long_long_unsigned_type_node, - long_long_unsigned_type_node, - NULL_TREE); - tree float_ftype_float - = build_function_type_list (float_type_node, - float_type_node, - NULL_TREE); - /* AVX builtins */ tree V32QI_type_node = build_vector_type_for_mode (char_type_node, V32QImode); @@ -22397,6 +22843,8 @@ ix86_init_mmx_sse_builtins (void) V4DImode); tree V4DF_type_node = build_vector_type_for_mode (double_type_node, V4DFmode); + tree V16HI_type_node = build_vector_type_for_mode (intHI_type_node, + V16HImode); tree v8sf_ftype_v8sf = build_function_type_list (V8SF_type_node, V8SF_type_node, @@ -22641,6 +23089,138 @@ ix86_init_mmx_sse_builtins (void) = build_function_type_list (V2DF_type_node, V2DF_type_node, V2DI_type_node, NULL_TREE); + /* XOP instructions */ + tree v2di_ftype_v2di_v2di_v2di + = build_function_type_list (V2DI_type_node, + V2DI_type_node, + V2DI_type_node, + V2DI_type_node, + NULL_TREE); + + tree v4di_ftype_v4di_v4di_v4di + = build_function_type_list (V4DI_type_node, + V4DI_type_node, + V4DI_type_node, + V4DI_type_node, + NULL_TREE); + + tree v4si_ftype_v4si_v4si_v4si + = build_function_type_list (V4SI_type_node, + V4SI_type_node, + V4SI_type_node, + V4SI_type_node, + NULL_TREE); + + tree v8si_ftype_v8si_v8si_v8si + = build_function_type_list (V8SI_type_node, + V8SI_type_node, + V8SI_type_node, + V8SI_type_node, + NULL_TREE); + + tree v32qi_ftype_v32qi_v32qi_v32qi + = build_function_type_list (V32QI_type_node, + V32QI_type_node, + V32QI_type_node, + V32QI_type_node, + NULL_TREE); + + tree v4si_ftype_v4si_v4si_v2di + = build_function_type_list (V4SI_type_node, + V4SI_type_node, + V4SI_type_node, + V2DI_type_node, + NULL_TREE); + + tree v8hi_ftype_v8hi_v8hi_v8hi + = build_function_type_list (V8HI_type_node, + V8HI_type_node, + V8HI_type_node, + V8HI_type_node, + NULL_TREE); + + tree v16hi_ftype_v16hi_v16hi_v16hi + = build_function_type_list (V16HI_type_node, + V16HI_type_node, + V16HI_type_node, + V16HI_type_node, + NULL_TREE); + + tree v8hi_ftype_v8hi_v8hi_v4si + = build_function_type_list (V8HI_type_node, + V8HI_type_node, + V8HI_type_node, + V4SI_type_node, + NULL_TREE); + + tree v2di_ftype_v2di_si + = build_function_type_list (V2DI_type_node, + V2DI_type_node, + integer_type_node, + NULL_TREE); + + tree v4si_ftype_v4si_si + = build_function_type_list (V4SI_type_node, + V4SI_type_node, + integer_type_node, + NULL_TREE); + + tree v8hi_ftype_v8hi_si + = build_function_type_list (V8HI_type_node, + V8HI_type_node, + integer_type_node, + NULL_TREE); + + tree v16qi_ftype_v16qi_si + = build_function_type_list (V16QI_type_node, + V16QI_type_node, + integer_type_node, + NULL_TREE); + + tree v2di_ftype_v2di + = build_function_type_list (V2DI_type_node, V2DI_type_node, NULL_TREE); + + tree v16qi_ftype_v8hi_v8hi + = build_function_type_list (V16QI_type_node, + V8HI_type_node, V8HI_type_node, + NULL_TREE); + tree v8hi_ftype_v4si_v4si + = build_function_type_list (V8HI_type_node, + V4SI_type_node, V4SI_type_node, + NULL_TREE); + tree v8hi_ftype_v16qi_v16qi + = build_function_type_list (V8HI_type_node, + V16QI_type_node, V16QI_type_node, + NULL_TREE); + tree v4hi_ftype_v8qi_v8qi + = build_function_type_list (V4HI_type_node, + V8QI_type_node, V8QI_type_node, + NULL_TREE); + tree unsigned_ftype_unsigned_uchar + = build_function_type_list (unsigned_type_node, + unsigned_type_node, + unsigned_char_type_node, + NULL_TREE); + tree unsigned_ftype_unsigned_ushort + = build_function_type_list (unsigned_type_node, + unsigned_type_node, + short_unsigned_type_node, + NULL_TREE); + tree unsigned_ftype_unsigned_unsigned + = build_function_type_list (unsigned_type_node, + unsigned_type_node, + unsigned_type_node, + NULL_TREE); + tree uint64_ftype_uint64_uint64 + = build_function_type_list (long_long_unsigned_type_node, + long_long_unsigned_type_node, + long_long_unsigned_type_node, + NULL_TREE); + tree float_ftype_float + = build_function_type_list (float_type_node, + float_type_node, + NULL_TREE); + /* Integer intrinsics. */ tree uint64_ftype_void = build_function_type (long_long_unsigned_type_node, @@ -22670,6 +23250,50 @@ ix86_init_mmx_sse_builtins (void) integer_type_node, NULL_TREE); + /* LWP instructions. */ + + tree void_ftype_ushort_unsigned_ushort + = build_function_type_list (void_type_node, + short_unsigned_type_node, + unsigned_type_node, + short_unsigned_type_node, + NULL_TREE); + + tree void_ftype_unsigned_unsigned_unsigned + = build_function_type_list (void_type_node, + unsigned_type_node, + unsigned_type_node, + unsigned_type_node, + NULL_TREE); + + tree void_ftype_uint64_unsigned_unsigned + = build_function_type_list (void_type_node, + long_long_unsigned_type_node, + unsigned_type_node, + unsigned_type_node, + NULL_TREE); + + tree uchar_ftype_ushort_unsigned_ushort + = build_function_type_list (unsigned_char_type_node, + short_unsigned_type_node, + unsigned_type_node, + short_unsigned_type_node, + NULL_TREE); + + tree uchar_ftype_unsigned_unsigned_unsigned + = build_function_type_list (unsigned_char_type_node, + unsigned_type_node, + unsigned_type_node, + unsigned_type_node, + NULL_TREE); + + tree uchar_ftype_uint64_unsigned_unsigned + = build_function_type_list (unsigned_char_type_node, + long_long_unsigned_type_node, + unsigned_type_node, + unsigned_type_node, + NULL_TREE); + tree ftype; /* Add all special builtins with variable number of operands. */ @@ -22783,6 +23407,25 @@ ix86_init_mmx_sse_builtins (void) case VOID_FTYPE_PV2DF_V2DF_V2DF: type = void_ftype_pv2df_v2df_v2df; break; + case VOID_FTYPE_USHORT_UINT_USHORT: + type = void_ftype_ushort_unsigned_ushort; + break; + case VOID_FTYPE_UINT_UINT_UINT: + type = void_ftype_unsigned_unsigned_unsigned; + break; + case VOID_FTYPE_UINT64_UINT_UINT: + type = void_ftype_uint64_unsigned_unsigned; + break; + case UCHAR_FTYPE_USHORT_UINT_USHORT: + type = uchar_ftype_ushort_unsigned_ushort; + break; + case UCHAR_FTYPE_UINT_UINT_UINT: + type = uchar_ftype_unsigned_unsigned_unsigned; + break; + case UCHAR_FTYPE_UINT64_UINT_UINT: + type = uchar_ftype_uint64_unsigned_unsigned; + break; + default: gcc_unreachable (); } @@ -23409,6 +24052,50 @@ ix86_init_mmx_sse_builtins (void) case MULTI_ARG_3_DF: mtype = v2df_ftype_v2df_v2df_v2df; break; case MULTI_ARG_3_SF2: mtype = v8sf_ftype_v8sf_v8sf_v8sf; break; case MULTI_ARG_3_DF2: mtype = v4df_ftype_v4df_v4df_v4df; break; + case MULTI_ARG_3_DI: mtype = v2di_ftype_v2di_v2di_v2di; break; + case MULTI_ARG_3_SI: mtype = v4si_ftype_v4si_v4si_v4si; break; + case MULTI_ARG_3_SI_DI: mtype = v4si_ftype_v4si_v4si_v2di; break; + case MULTI_ARG_3_HI: mtype = v8hi_ftype_v8hi_v8hi_v8hi; break; + case MULTI_ARG_3_HI_SI: mtype = v8hi_ftype_v8hi_v8hi_v4si; break; + case MULTI_ARG_3_QI: mtype = v16qi_ftype_v16qi_v16qi_v16qi; break; + case MULTI_ARG_3_DI2: mtype = v4di_ftype_v4di_v4di_v4di; break; + case MULTI_ARG_3_SI2: mtype = v8si_ftype_v8si_v8si_v8si; break; + case MULTI_ARG_3_HI2: mtype = v16hi_ftype_v16hi_v16hi_v16hi; break; + case MULTI_ARG_3_QI2: mtype = v32qi_ftype_v32qi_v32qi_v32qi; break; + case MULTI_ARG_2_SF: mtype = v4sf_ftype_v4sf_v4sf; break; + case MULTI_ARG_2_DF: mtype = v2df_ftype_v2df_v2df; break; + case MULTI_ARG_2_DI: mtype = v2di_ftype_v2di_v2di; break; + case MULTI_ARG_2_SI: mtype = v4si_ftype_v4si_v4si; break; + case MULTI_ARG_2_HI: mtype = v8hi_ftype_v8hi_v8hi; break; + case MULTI_ARG_2_QI: mtype = v16qi_ftype_v16qi_v16qi; break; + case MULTI_ARG_2_DI_IMM: mtype = v2di_ftype_v2di_si; break; + case MULTI_ARG_2_SI_IMM: mtype = v4si_ftype_v4si_si; break; + case MULTI_ARG_2_HI_IMM: mtype = v8hi_ftype_v8hi_si; break; + case MULTI_ARG_2_QI_IMM: mtype = v16qi_ftype_v16qi_si; break; + case MULTI_ARG_2_DI_CMP: mtype = v2di_ftype_v2di_v2di; break; + case MULTI_ARG_2_SI_CMP: mtype = v4si_ftype_v4si_v4si; break; + case MULTI_ARG_2_HI_CMP: mtype = v8hi_ftype_v8hi_v8hi; break; + case MULTI_ARG_2_QI_CMP: mtype = v16qi_ftype_v16qi_v16qi; break; + case MULTI_ARG_2_SF_TF: mtype = v4sf_ftype_v4sf_v4sf; break; + case MULTI_ARG_2_DF_TF: mtype = v2df_ftype_v2df_v2df; break; + case MULTI_ARG_2_DI_TF: mtype = v2di_ftype_v2di_v2di; break; + case MULTI_ARG_2_SI_TF: mtype = v4si_ftype_v4si_v4si; break; + case MULTI_ARG_2_HI_TF: mtype = v8hi_ftype_v8hi_v8hi; break; + case MULTI_ARG_2_QI_TF: mtype = v16qi_ftype_v16qi_v16qi; break; + case MULTI_ARG_1_SF: mtype = v4sf_ftype_v4sf; break; + case MULTI_ARG_1_DF: mtype = v2df_ftype_v2df; break; + case MULTI_ARG_1_SF2: mtype = v8sf_ftype_v8sf; break; + case MULTI_ARG_1_DF2: mtype = v4df_ftype_v4df; break; + case MULTI_ARG_1_DI: mtype = v2di_ftype_v2di; break; + case MULTI_ARG_1_SI: mtype = v4si_ftype_v4si; break; + case MULTI_ARG_1_HI: mtype = v8hi_ftype_v8hi; break; + case MULTI_ARG_1_QI: mtype = v16qi_ftype_v16qi; break; + case MULTI_ARG_1_SI_DI: mtype = v2di_ftype_v4si; break; + case MULTI_ARG_1_HI_DI: mtype = v2di_ftype_v8hi; break; + case MULTI_ARG_1_HI_SI: mtype = v4si_ftype_v8hi; break; + case MULTI_ARG_1_QI_DI: mtype = v2di_ftype_v16qi; break; + case MULTI_ARG_1_QI_SI: mtype = v4si_ftype_v16qi; break; + case MULTI_ARG_1_QI_HI: mtype = v8hi_ftype_v16qi; break; case MULTI_ARG_UNKNOWN: default: @@ -23628,9 +24315,71 @@ ix86_expand_multi_arg_builtin (enum insn_code icode, tree exp, rtx target, case MULTI_ARG_3_DF: case MULTI_ARG_3_SF2: case MULTI_ARG_3_DF2: + case MULTI_ARG_3_DI: + case MULTI_ARG_3_SI: + case MULTI_ARG_3_SI_DI: + case MULTI_ARG_3_HI: + case MULTI_ARG_3_HI_SI: + case MULTI_ARG_3_QI: + case MULTI_ARG_3_DI2: + case MULTI_ARG_3_SI2: + case MULTI_ARG_3_HI2: + case MULTI_ARG_3_QI2: nargs = 3; break; + case MULTI_ARG_2_SF: + case MULTI_ARG_2_DF: + case MULTI_ARG_2_DI: + case MULTI_ARG_2_SI: + case MULTI_ARG_2_HI: + case MULTI_ARG_2_QI: + nargs = 2; + break; + + case MULTI_ARG_2_DI_IMM: + case MULTI_ARG_2_SI_IMM: + case MULTI_ARG_2_HI_IMM: + case MULTI_ARG_2_QI_IMM: + nargs = 2; + last_arg_constant = true; + break; + + case MULTI_ARG_1_SF: + case MULTI_ARG_1_DF: + case MULTI_ARG_1_SF2: + case MULTI_ARG_1_DF2: + case MULTI_ARG_1_DI: + case MULTI_ARG_1_SI: + case MULTI_ARG_1_HI: + case MULTI_ARG_1_QI: + case MULTI_ARG_1_SI_DI: + case MULTI_ARG_1_HI_DI: + case MULTI_ARG_1_HI_SI: + case MULTI_ARG_1_QI_DI: + case MULTI_ARG_1_QI_SI: + case MULTI_ARG_1_QI_HI: + nargs = 1; + break; + + case MULTI_ARG_2_DI_CMP: + case MULTI_ARG_2_SI_CMP: + case MULTI_ARG_2_HI_CMP: + case MULTI_ARG_2_QI_CMP: + nargs = 2; + comparison_p = true; + break; + + case MULTI_ARG_2_SF_TF: + case MULTI_ARG_2_DF_TF: + case MULTI_ARG_2_DI_TF: + case MULTI_ARG_2_SI_TF: + case MULTI_ARG_2_HI_TF: + case MULTI_ARG_2_QI_TF: + nargs = 2; + tf_p = true; + break; + case MULTI_ARG_UNKNOWN: default: gcc_unreachable (); @@ -24568,6 +25317,16 @@ ix86_expand_special_args_builtin (const struct builtin_description *d, /* Reserve memory operand for target. */ memory = ARRAY_SIZE (args); break; + case VOID_FTYPE_USHORT_UINT_USHORT: + case VOID_FTYPE_UINT_UINT_UINT: + case VOID_FTYPE_UINT64_UINT_UINT: + case UCHAR_FTYPE_USHORT_UINT_USHORT: + case UCHAR_FTYPE_UINT_UINT_UINT: + case UCHAR_FTYPE_UINT64_UINT_UINT: + nargs = 3; + klass = store; + memory = 0; + break; default: gcc_unreachable (); } @@ -25311,7 +26070,7 @@ static tree ix86_builtin_reciprocal (unsigned int fn, bool md_fn, bool sqrt ATTRIBUTE_UNUSED) { - if (! (TARGET_SSE_MATH && TARGET_RECIP && !optimize_insn_for_size_p () + if (! (TARGET_SSE_MATH && !optimize_insn_for_size_p () && flag_finite_math_only && !flag_trapping_math && flag_unsafe_math_optimizations)) return NULL_TREE; diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h index b412604dbd8..4bc8ef18500 100644 --- a/gcc/config/i386/i386.h +++ b/gcc/config/i386/i386.h @@ -55,6 +55,8 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see #define TARGET_FMA OPTION_ISA_FMA #define TARGET_SSE4A OPTION_ISA_SSE4A #define TARGET_FMA4 OPTION_ISA_FMA4 +#define TARGET_XOP OPTION_ISA_XOP +#define TARGET_LWP OPTION_ISA_LWP #define TARGET_ROUND OPTION_ISA_ROUND #define TARGET_ABM OPTION_ISA_ABM #define TARGET_POPCNT OPTION_ISA_POPCNT diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index dc605abde06..82f5352597c 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -57,6 +57,7 @@ ;; X -- don't print any sort of PIC '@' suffix for a symbol. ;; & -- print some in-use local-dynamic symbol name. ;; H -- print a memory address offset by 8; used for sse high-parts +;; Y -- print condition for XOP pcom* instruction. ;; + -- print a branch hint as 'cs' or 'ds' prefix ;; ; -- print a semicolon (after prefixes due to bug in older gas). @@ -199,6 +200,15 @@ (UNSPEC_FMA4_INTRINSIC 150) (UNSPEC_FMA4_FMADDSUB 151) (UNSPEC_FMA4_FMSUBADD 152) + (UNSPEC_XOP_UNSIGNED_CMP 151) + (UNSPEC_XOP_TRUEFALSE 152) + (UNSPEC_XOP_PERMUTE 153) + (UNSPEC_FRCZ 154) + (UNSPEC_LLWP_INTRINSIC 155) + (UNSPEC_SLWP_INTRINSIC 156) + (UNSPECV_LWPVAL_INTRINSIC 157) + (UNSPECV_LWPINS_INTRINSIC 158) + ; For AES support (UNSPEC_AESENC 159) (UNSPEC_AESENCLAST 160) @@ -254,6 +264,20 @@ (COM_TRUE_P 5) ]) +;; Constants used in the XOP pperm instruction +(define_constants + [(PPERM_SRC 0x00) /* copy source */ + (PPERM_INVERT 0x20) /* invert source */ + (PPERM_REVERSE 0x40) /* bit reverse source */ + (PPERM_REV_INV 0x60) /* bit reverse & invert src */ + (PPERM_ZERO 0x80) /* all 0's */ + (PPERM_ONES 0xa0) /* all 1's */ + (PPERM_SIGN 0xc0) /* propagate sign bit */ + (PPERM_INV_SIGN 0xe0) /* invert & propagate sign */ + (PPERM_SRC1 0x00) /* use first source byte */ + (PPERM_SRC2 0x10) /* use second source byte */ + ]) + ;; Registers by name. (define_constants [(AX_REG 0) @@ -333,7 +357,7 @@ fmov,fop,fsgn,fmul,fdiv,fpspc,fcmov,fcmp,fxch,fistp,fisttp,frndint, sselog,sselog1,sseiadd,sseiadd1,sseishft,sseimul, sse,ssemov,sseadd,ssemul,ssecmp,ssecomi,ssecvt,ssecvt1,sseicvt,ssediv,sseins, - ssemuladd,sse4arg, + ssemuladd,sse4arg,lwp, mmx,mmxmov,mmxadd,mmxmul,mmxcmp,mmxcvt,mmxshft" (const_string "other")) @@ -19676,6 +19700,20 @@ [(set_attr "type" "fcmov") (set_attr "mode" "XF")]) +;; All moves in XOP pcmov instructions are 128 bits and hence we restrict +;; the scalar versions to have only XMM registers as operands. + +;; XOP conditional move +(define_insn "*xop_pcmov_<mode>" + [(set (match_operand:MODEF 0 "register_operand" "=x") + (if_then_else:MODEF + (match_operand:MODEF 1 "register_operand" "x") + (match_operand:MODEF 2 "register_operand" "x") + (match_operand:MODEF 3 "register_operand" "x")))] + "TARGET_XOP && ix86_fma4_valid_op_p (operands, insn, 4, true, 1, false)" + "vpcmov\t{%1, %3, %2, %0|%0, %2, %3, %1}" + [(set_attr "type" "sse4arg")]) + ;; These versions of the min/max patterns are intentionally ignorant of ;; their behavior wrt -0.0 and NaN (via the commutative operand mark). ;; Since both the tree-level MAX_EXPR and the rtl-level SMAX operator @@ -19985,6 +20023,18 @@ DONE; }) +;; Use IOR for stack probes, this is shorter. +(define_expand "probe_stack" + [(match_operand 0 "memory_operand" "")] + "" +{ + if (GET_MODE (operands[0]) == DImode) + emit_insn (gen_iordi3 (operands[0], operands[0], const0_rtx)); + else + emit_insn (gen_iorsi3 (operands[0], operands[0], const0_rtx)); + DONE; +}) + (define_expand "builtin_setjmp_receiver" [(label_ref (match_operand 0 "" ""))] "!TARGET_64BIT && flag_pic" @@ -20488,7 +20538,9 @@ [(match_dup 0) (match_operand:SI 1 "nonmemory_operand" "")])) (clobber (reg:CC FLAGS_REG))])] - "optimize_insn_for_speed_p () && ! TARGET_READ_MODIFY_WRITE" + "optimize_insn_for_speed_p () && ! TARGET_READ_MODIFY_WRITE + /* Do not split stack checking probes. */ + && GET_CODE (operands[3]) != IOR && operands[1] != const0_rtx" [(set (match_dup 2) (match_dup 0)) (parallel [(set (match_dup 2) (match_op_dup 3 [(match_dup 2) (match_dup 1)])) @@ -20503,7 +20555,9 @@ [(match_operand:SI 1 "nonmemory_operand" "") (match_dup 0)])) (clobber (reg:CC FLAGS_REG))])] - "optimize_insn_for_speed_p () && ! TARGET_READ_MODIFY_WRITE" + "optimize_insn_for_speed_p () && ! TARGET_READ_MODIFY_WRITE + /* Do not split stack checking probes. */ + && GET_CODE (operands[3]) != IOR && operands[1] != const0_rtx" [(set (match_dup 2) (match_dup 0)) (parallel [(set (match_dup 2) (match_op_dup 3 [(match_dup 1) (match_dup 2)])) @@ -21252,19 +21306,19 @@ (call (mem:QI (match_operand:DI 1 "call_insn_operand" "rsm")) (match_operand:DI 2 "" ""))) (unspec [(const_int 0)] UNSPEC_MS_TO_SYSV_CALL) - (clobber (reg:TI 27)) - (clobber (reg:TI 28)) - (clobber (reg:TI 45)) - (clobber (reg:TI 46)) - (clobber (reg:TI 47)) - (clobber (reg:TI 48)) - (clobber (reg:TI 49)) - (clobber (reg:TI 50)) - (clobber (reg:TI 51)) - (clobber (reg:TI 52)) + (clobber (reg:TI XMM6_REG)) + (clobber (reg:TI XMM7_REG)) + (clobber (reg:TI XMM8_REG)) + (clobber (reg:TI XMM9_REG)) + (clobber (reg:TI XMM10_REG)) + (clobber (reg:TI XMM11_REG)) + (clobber (reg:TI XMM12_REG)) + (clobber (reg:TI XMM13_REG)) + (clobber (reg:TI XMM14_REG)) + (clobber (reg:TI XMM15_REG)) (clobber (reg:DI SI_REG)) (clobber (reg:DI DI_REG))] - "!SIBLING_CALL_P (insn) && TARGET_64BIT" + "TARGET_64BIT && !SIBLING_CALL_P (insn)" { if (constant_call_address_operand (operands[1], Pmode)) return "call\t%P1"; @@ -21303,14 +21357,14 @@ (define_expand "sse_prologue_save" [(parallel [(set (match_operand:BLK 0 "" "") - (unspec:BLK [(reg:DI 21) - (reg:DI 22) - (reg:DI 23) - (reg:DI 24) - (reg:DI 25) - (reg:DI 26) - (reg:DI 27) - (reg:DI 28)] UNSPEC_SSE_PROLOGUE_SAVE)) + (unspec:BLK [(reg:DI XMM0_REG) + (reg:DI XMM1_REG) + (reg:DI XMM2_REG) + (reg:DI XMM3_REG) + (reg:DI XMM4_REG) + (reg:DI XMM5_REG) + (reg:DI XMM6_REG) + (reg:DI XMM7_REG)] UNSPEC_SSE_PROLOGUE_SAVE)) (use (match_operand:DI 1 "register_operand" "")) (use (match_operand:DI 2 "immediate_operand" "")) (use (label_ref:DI (match_operand 3 "" "")))])] @@ -21320,14 +21374,14 @@ (define_insn "*sse_prologue_save_insn" [(set (mem:BLK (plus:DI (match_operand:DI 0 "register_operand" "R") (match_operand:DI 4 "const_int_operand" "n"))) - (unspec:BLK [(reg:DI 21) - (reg:DI 22) - (reg:DI 23) - (reg:DI 24) - (reg:DI 25) - (reg:DI 26) - (reg:DI 27) - (reg:DI 28)] UNSPEC_SSE_PROLOGUE_SAVE)) + (unspec:BLK [(reg:DI XMM0_REG) + (reg:DI XMM1_REG) + (reg:DI XMM2_REG) + (reg:DI XMM3_REG) + (reg:DI XMM4_REG) + (reg:DI XMM5_REG) + (reg:DI XMM6_REG) + (reg:DI XMM7_REG)] UNSPEC_SSE_PROLOGUE_SAVE)) (use (match_operand:DI 1 "register_operand" "r")) (use (match_operand:DI 2 "const_int_operand" "i")) (use (label_ref:DI (match_operand 3 "" "X")))] @@ -21804,6 +21858,120 @@ [(set_attr "type" "other") (set_attr "length" "3")]) +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; +;; LWP instructions +;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(define_insn "lwp_llwpcbhi1" + [(unspec [(match_operand:HI 0 "register_operand" "r")] + UNSPEC_LLWP_INTRINSIC)] + "TARGET_LWP" + "llwpcb\t%0" + [(set_attr "type" "lwp") + (set_attr "mode" "HI")]) + +(define_insn "lwp_llwpcbsi1" + [(unspec [(match_operand:SI 0 "register_operand" "r")] + UNSPEC_LLWP_INTRINSIC)] + "TARGET_LWP" + "llwpcb\t%0" + [(set_attr "type" "lwp") + (set_attr "mode" "SI")]) + +(define_insn "lwp_llwpcbdi1" + [(unspec [(match_operand:DI 0 "register_operand" "r")] + UNSPEC_LLWP_INTRINSIC)] + "TARGET_LWP" + "llwpcb\t%0" + [(set_attr "type" "lwp") + (set_attr "mode" "DI")]) + +(define_insn "lwp_slwpcbhi1" + [(unspec [(match_operand:HI 0 "register_operand" "r")] + UNSPEC_SLWP_INTRINSIC)] + "TARGET_LWP" + "slwpcb\t%0" + [(set_attr "type" "lwp") + (set_attr "mode" "HI")]) + +(define_insn "lwp_slwpcbsi1" + [(unspec [(match_operand:SI 0 "register_operand" "r")] + UNSPEC_SLWP_INTRINSIC)] + "TARGET_LWP" + "slwpcb\t%0" + [(set_attr "type" "lwp") + (set_attr "mode" "SI")]) + +(define_insn "lwp_slwpcbdi1" + [(unspec [(match_operand:DI 0 "register_operand" "r")] + UNSPEC_SLWP_INTRINSIC)] + "TARGET_LWP" + "slwpcb\t%0" + [(set_attr "type" "lwp") + (set_attr "mode" "DI")]) + +(define_insn "lwp_lwpvalhi3" + [(unspec_volatile [(match_operand:HI 0 "register_operand" "r") + (match_operand:SI 1 "nonimmediate_operand" "rm") + (match_operand:HI 2 "const_int_operand" "")] + UNSPECV_LWPVAL_INTRINSIC)] + "TARGET_LWP" + "lwpval\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "type" "lwp") + (set_attr "mode" "HI")]) + +(define_insn "lwp_lwpvalsi3" + [(unspec_volatile [(match_operand:SI 0 "register_operand" "r") + (match_operand:SI 1 "nonimmediate_operand" "rm") + (match_operand:SI 2 "const_int_operand" "")] + UNSPECV_LWPVAL_INTRINSIC)] + "TARGET_LWP" + "lwpval\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "type" "lwp") + (set_attr "mode" "SI")]) + +(define_insn "lwp_lwpvaldi3" + [(unspec_volatile [(match_operand:DI 0 "register_operand" "r") + (match_operand:SI 1 "nonimmediate_operand" "rm") + (match_operand:SI 2 "const_int_operand" "")] + UNSPECV_LWPVAL_INTRINSIC)] + "TARGET_LWP" + "lwpval\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "type" "lwp") + (set_attr "mode" "DI")]) + +(define_insn "lwp_lwpinshi3" + [(unspec_volatile [(match_operand:HI 0 "register_operand" "r") + (match_operand:SI 1 "nonimmediate_operand" "rm") + (match_operand:HI 2 "const_int_operand" "")] + UNSPECV_LWPINS_INTRINSIC)] + "TARGET_LWP" + "lwpins\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "type" "lwp") + (set_attr "mode" "HI")]) + +(define_insn "lwp_lwpinssi3" + [(unspec_volatile [(match_operand:SI 0 "register_operand" "r") + (match_operand:SI 1 "nonimmediate_operand" "rm") + (match_operand:SI 2 "const_int_operand" "")] + UNSPECV_LWPINS_INTRINSIC)] + "TARGET_LWP" + "lwpins\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "type" "lwp") + (set_attr "mode" "SI")]) + +(define_insn "lwp_lwpinsdi3" + [(unspec_volatile [(match_operand:DI 0 "register_operand" "r") + (match_operand:SI 1 "nonimmediate_operand" "rm") + (match_operand:SI 2 "const_int_operand" "")] + UNSPECV_LWPINS_INTRINSIC)] + "TARGET_LWP" + "lwpins\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "type" "lwp") + (set_attr "mode" "DI")]) + (include "mmx.md") (include "sse.md") (include "sync.md") diff --git a/gcc/config/i386/i386.opt b/gcc/config/i386/i386.opt index 9668ff6504d..dd47b7d1dc5 100644 --- a/gcc/config/i386/i386.opt +++ b/gcc/config/i386/i386.opt @@ -314,6 +314,14 @@ mfma4 Target Report Mask(ISA_FMA4) Var(ix86_isa_flags) VarExists Save Support FMA4 built-in functions and code generation +mxop +Target Report Mask(ISA_XOP) Var(ix86_isa_flags) VarExists Save +Support XOP built-in functions and code generation + +mlwp +Target Report Mask(ISA_LWP) Var(ix86_isa_flags) VarExists Save +Support LWP built-in functions and code generation + mabm Target Report Mask(ISA_ABM) Var(ix86_isa_flags) VarExists Save Support code generation of Advanced Bit Manipulation (ABM) instructions. diff --git a/gcc/config/i386/ia32intrin.h b/gcc/config/i386/ia32intrin.h index e701b19e2a8..540bc3f09ee 100644 --- a/gcc/config/i386/ia32intrin.h +++ b/gcc/config/i386/ia32intrin.h @@ -49,6 +49,7 @@ __bswapd (int __X) return __builtin_bswap32 (__X); } +#ifdef __SSE4_2__ /* 32bit accumulate CRC32 (polynomial 0x11EDC6F41) value. */ extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) @@ -70,6 +71,7 @@ __crc32d (unsigned int __C, unsigned int __V) { return __builtin_ia32_crc32si (__C, __V); } +#endif /* SSE4.2 */ /* 32bit popcnt */ extern __inline int diff --git a/gcc/config/i386/linux.h b/gcc/config/i386/linux.h index 5e2e0136fcb..5d8e5ad2cbe 100644 --- a/gcc/config/i386/linux.h +++ b/gcc/config/i386/linux.h @@ -207,6 +207,9 @@ along with GCC; see the file COPYING3. If not see #define MD_UNWIND_SUPPORT "config/i386/linux-unwind.h" +/* The stack pointer needs to be moved while checking the stack. */ +#define STACK_CHECK_MOVING_SP 1 + /* This macro may be overridden in i386/k*bsd-gnu.h. */ #define REG_NAME(reg) reg diff --git a/gcc/config/i386/linux64.h b/gcc/config/i386/linux64.h index cfa3f49e870..d07547a804f 100644 --- a/gcc/config/i386/linux64.h +++ b/gcc/config/i386/linux64.h @@ -110,6 +110,9 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see #define MD_UNWIND_SUPPORT "config/i386/linux-unwind.h" +/* The stack pointer needs to be moved while checking the stack. */ +#define STACK_CHECK_MOVING_SP 1 + /* This macro may be overridden in i386/k*bsd-gnu.h. */ #define REG_NAME(reg) reg diff --git a/gcc/config/i386/lwpintrin.h b/gcc/config/i386/lwpintrin.h new file mode 100644 index 00000000000..e5137ec24f4 --- /dev/null +++ b/gcc/config/i386/lwpintrin.h @@ -0,0 +1,109 @@ +/* Copyright (C) 2007, 2008, 2009 Free Software Foundation, Inc. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + GCC is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + <http://www.gnu.org/licenses/>. */ + +#ifndef _X86INTRIN_H_INCLUDED +# error "Never use <lwpintrin.h> directly; include <x86intrin.h> instead." +#endif + +#ifndef _LWPINTRIN_H_INCLUDED +#define _LWPINTRIN_H_INCLUDED + +#ifndef __LWP__ +# error "LWP instruction set not enabled" +#else + +extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__llwpcb16 (void *pcbAddress) +{ + __builtin_ia32_llwpcb16 (pcbAddress); +} + +extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__llwpcb32 (void *pcbAddress) +{ + __builtin_ia32_llwpcb32 (pcbAddress); +} + +extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__llwpcb64 (void *pcbAddress) +{ + __builtin_ia32_llwpcb64 (pcbAddress); +} + +extern __inline void * __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__slwpcb16 (void) +{ + return __builtin_ia32_slwpcb16 (); +} + +extern __inline void * __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__slwpcb32 (void) +{ + return __builtin_ia32_slwpcb32 (); +} + +extern __inline void * __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__slwpcb64 (void) +{ + return __builtin_ia32_slwpcb64 (); +} + +extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__lwpval16 (unsigned short data2, unsigned int data1, unsigned short flags) +{ + __builtin_ia32_lwpval16 (data2, data1, flags); +} +/* +extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__lwpval32 (unsigned int data2, unsigned int data1, unsigned int flags) +{ + __builtin_ia32_lwpval32 (data2, data1, flags); +} + +extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__lwpval64 (unsigned __int64 data2, unsigned int data1, unsigned int flags) +{ + __builtin_ia32_lwpval64 (data2, data1, flags); +} + +extern __inline unsigned char __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__lwpins16 (unsigned short data2, unsigned int data1, unsigned short flags) +{ + return __builtin_ia32_lwpins16 (data2, data1, flags); +} + +extern __inline unsigned char __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__lwpins32 (unsigned int data2, unsigned int data1, unsigned int flags) +{ + return __builtin_ia32_lwpins32 (data2, data1, flags); +} + +extern __inline unsigned char __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__lwpins64 (unsigned __int64 data2, unsigned int data1, unsigned int flags) +{ + return __builtin_ia32_lwpins64 (data2, data1, flags); +} +*/ +#endif /* __LWP__ */ + +#endif /* _LWPINTRIN_H_INCLUDED */ diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index e90296512ad..bad39bb69c8 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -86,6 +86,9 @@ (define_mode_attr ssemodesuffixf2c [(V4SF "s") (V2DF "d")]) +;; Mapping of the max integer size for xop rotate immediate constraint +(define_mode_attr sserotatemax [(V16QI "7") (V8HI "15") (V4SI "31") (V2DI "63")]) + ;; Mapping of vector modes back to the scalar modes (define_mode_attr ssescalarmode [(V4SF "SF") (V2DF "DF") (V16QI "QI") (V8HI "HI") @@ -1455,7 +1458,8 @@ (match_operator:SSEMODEF4 3 "sse_comparison_operator" [(match_operand:SSEMODEF4 1 "register_operand" "0") (match_operand:SSEMODEF4 2 "nonimmediate_operand" "xm")]))] - "(SSE_FLOAT_MODE_P (<MODE>mode) || SSE_VEC_FLOAT_MODE_P (<MODE>mode))" + "!TARGET_XOP + && (SSE_FLOAT_MODE_P (<MODE>mode) || SSE_VEC_FLOAT_MODE_P (<MODE>mode))" "cmp%D3<ssemodesuffixf4>\t{%2, %0|%0, %2}" [(set_attr "type" "ssecmp") (set_attr "length_immediate" "1") @@ -5614,7 +5618,7 @@ (match_operand:V4SI 2 "register_operand" "")))] "TARGET_SSE2" { - if (TARGET_SSE4_1) + if (TARGET_SSE4_1 || TARGET_XOP) ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands); }) @@ -5643,7 +5647,7 @@ [(set (match_operand:V4SI 0 "register_operand" "") (mult:V4SI (match_operand:V4SI 1 "register_operand" "") (match_operand:V4SI 2 "register_operand" "")))] - "TARGET_SSE2 && !TARGET_SSE4_1 + "TARGET_SSE2 && !TARGET_SSE4_1 && !TARGET_XOP && can_create_pseudo_p ()" "#" "&& 1" @@ -5705,6 +5709,42 @@ rtx t1, t2, t3, t4, t5, t6, thirtytwo; rtx op0, op1, op2; + if (TARGET_XOP) + { + /* op1: A,B,C,D, op2: E,F,G,H */ + op0 = operands[0]; + op1 = gen_lowpart (V4SImode, operands[1]); + op2 = gen_lowpart (V4SImode, operands[2]); + t1 = gen_reg_rtx (V4SImode); + t2 = gen_reg_rtx (V4SImode); + t3 = gen_reg_rtx (V4SImode); + t4 = gen_reg_rtx (V2DImode); + t5 = gen_reg_rtx (V2DImode); + + /* t1: B,A,D,C */ + emit_insn (gen_sse2_pshufd_1 (t1, op1, + GEN_INT (1), + GEN_INT (0), + GEN_INT (3), + GEN_INT (2))); + + /* t2: 0 */ + emit_move_insn (t2, CONST0_RTX (V4SImode)); + + /* t3: (B*E),(A*F),(D*G),(C*H) */ + emit_insn (gen_xop_pmacsdd (t3, t1, op2, t2)); + + /* t4: (B*E)+(A*F), (D*G)+(C*H) */ + emit_insn (gen_xop_phadddq (t4, t3)); + + /* t5: ((B*E)+(A*F))<<32, ((D*G)+(C*H))<<32 */ + emit_insn (gen_ashlv2di3 (t5, t4, GEN_INT (32))); + + /* op0: (((B*E)+(A*F))<<32)+(B*F), (((D*G)+(C*H))<<32)+(D*H) */ + emit_insn (gen_xop_pmacsdql (op0, op1, op2, t5)); + DONE; + } + op0 = operands[0]; op1 = operands[1]; op2 = operands[2]; @@ -5820,6 +5860,56 @@ DONE; }) +(define_expand "vec_widen_smult_hi_v4si" + [(match_operand:V2DI 0 "register_operand" "") + (match_operand:V4SI 1 "register_operand" "") + (match_operand:V4SI 2 "register_operand" "")] + "TARGET_XOP" +{ + rtx t1, t2; + + t1 = gen_reg_rtx (V4SImode); + t2 = gen_reg_rtx (V4SImode); + + emit_insn (gen_sse2_pshufd_1 (t1, operands[1], + GEN_INT (0), + GEN_INT (2), + GEN_INT (1), + GEN_INT (3))); + emit_insn (gen_sse2_pshufd_1 (t2, operands[2], + GEN_INT (0), + GEN_INT (2), + GEN_INT (1), + GEN_INT (3))); + emit_insn (gen_xop_mulv2div2di3_high (operands[0], t1, t2)); + DONE; +}) + +(define_expand "vec_widen_smult_lo_v4si" + [(match_operand:V2DI 0 "register_operand" "") + (match_operand:V4SI 1 "register_operand" "") + (match_operand:V4SI 2 "register_operand" "")] + "TARGET_XOP" +{ + rtx t1, t2; + + t1 = gen_reg_rtx (V4SImode); + t2 = gen_reg_rtx (V4SImode); + + emit_insn (gen_sse2_pshufd_1 (t1, operands[1], + GEN_INT (0), + GEN_INT (2), + GEN_INT (1), + GEN_INT (3))); + emit_insn (gen_sse2_pshufd_1 (t2, operands[2], + GEN_INT (0), + GEN_INT (2), + GEN_INT (1), + GEN_INT (3))); + emit_insn (gen_xop_mulv2div2di3_low (operands[0], t1, t2)); + DONE; +}) + (define_expand "vec_widen_umult_hi_v4si" [(match_operand:V2DI 0 "register_operand" "") (match_operand:V4SI 1 "register_operand" "") @@ -6217,7 +6307,7 @@ (eq:SSEMODE124 (match_operand:SSEMODE124 1 "nonimmediate_operand" "") (match_operand:SSEMODE124 2 "nonimmediate_operand" "")))] - "TARGET_SSE2" + "TARGET_SSE2 && !TARGET_XOP " "ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);") (define_insn "*avx_eq<mode>3" @@ -6240,7 +6330,7 @@ (eq:SSEMODE124 (match_operand:SSEMODE124 1 "nonimmediate_operand" "%0") (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")))] - "TARGET_SSE2 + "TARGET_SSE2 && !TARGET_XOP && ix86_binary_operator_ok (EQ, <MODE>mode, operands)" "pcmpeq<ssevecsize>\t{%2, %0|%0, %2}" [(set_attr "type" "ssecmp") @@ -6286,7 +6376,7 @@ (gt:SSEMODE124 (match_operand:SSEMODE124 1 "register_operand" "0") (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")))] - "TARGET_SSE2" + "TARGET_SSE2 && !TARGET_XOP" "pcmpgt<ssevecsize>\t{%2, %0|%0, %2}" [(set_attr "type" "ssecmp") (set_attr "prefix_data16" "1") @@ -10364,6 +10454,1445 @@ (set_attr "prefix" "maybe_vex") (set_attr "mode" "TI")]) +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; +;; XOP instructions +;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;; XOP parallel integer multiply/add instructions. +;; Note the instruction does not allow the value being added to be a memory +;; operation. However by pretending via the nonimmediate_operand predicate +;; that it does and splitting it later allows the following to be recognized: +;; a[i] = b[i] * c[i] + d[i]; +(define_insn "xop_pmacsww" + [(set (match_operand:V8HI 0 "register_operand" "=x,x,x") + (plus:V8HI + (mult:V8HI + (match_operand:V8HI 1 "nonimmediate_operand" "%x,x,xm") + (match_operand:V8HI 2 "nonimmediate_operand" "x,xm,x")) + (match_operand:V8HI 3 "register_operand" "x,x,x")))] + "TARGET_XOP && ix86_fma4_valid_op_p (operands, insn, 4, false, 2, true)" + "@ + vpmacsww\t{%3, %2, %1, %0|%0, %1, %2, %3} + vpmacsww\t{%3, %2, %1, %0|%0, %1, %2, %3} + vpmacsww\t{%3, %1, %2, %0|%0, %2, %1, %3}" + [(set_attr "type" "ssemuladd") + (set_attr "mode" "TI")]) + +;; Split pmacsww with two memory operands into a load and the pmacsww. +(define_split + [(set (match_operand:V8HI 0 "register_operand" "") + (plus:V8HI + (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "") + (match_operand:V8HI 2 "nonimmediate_operand" "")) + (match_operand:V8HI 3 "nonimmediate_operand" "")))] + "TARGET_XOP + && !ix86_fma4_valid_op_p (operands, insn, 4, false, 1, true) + && ix86_fma4_valid_op_p (operands, insn, 4, false, 2, true) + && !reg_mentioned_p (operands[0], operands[1]) + && !reg_mentioned_p (operands[0], operands[2]) + && !reg_mentioned_p (operands[0], operands[3])" + [(const_int 0)] +{ + ix86_expand_fma4_multiple_memory (operands, 4, V8HImode); + emit_insn (gen_xop_pmacsww (operands[0], operands[1], operands[2], + operands[3])); + DONE; +}) + +(define_insn "xop_pmacssww" + [(set (match_operand:V8HI 0 "register_operand" "=x,x,x") + (ss_plus:V8HI + (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%x,x,m") + (match_operand:V8HI 2 "nonimmediate_operand" "x,m,x")) + (match_operand:V8HI 3 "register_operand" "x,x,x")))] + "TARGET_XOP && ix86_fma4_valid_op_p (operands, insn, 4, false, 1, true)" + "@ + vpmacssww\t{%3, %2, %1, %0|%0, %1, %2, %3} + vpmacssww\t{%3, %2, %1, %0|%0, %1, %2, %3} + vpmacssww\t{%3, %1, %2, %0|%0, %2, %1, %3}" + [(set_attr "type" "ssemuladd") + (set_attr "mode" "TI")]) + +;; Note the instruction does not allow the value being added to be a memory +;; operation. However by pretending via the nonimmediate_operand predicate +;; that it does and splitting it later allows the following to be recognized: +;; a[i] = b[i] * c[i] + d[i]; +(define_insn "xop_pmacsdd" + [(set (match_operand:V4SI 0 "register_operand" "=x,x,x") + (plus:V4SI + (mult:V4SI + (match_operand:V4SI 1 "nonimmediate_operand" "%x,x,m") + (match_operand:V4SI 2 "nonimmediate_operand" "x,m,x")) + (match_operand:V4SI 3 "register_operand" "x,x,x")))] + "TARGET_XOP && ix86_fma4_valid_op_p (operands, insn, 4, false, 2, true)" + "@ + vpmacsdd\t{%3, %2, %1, %0|%0, %1, %2, %3} + vpmacsdd\t{%3, %2, %1, %0|%0, %1, %2, %3} + vpmacsdd\t{%3, %1, %2, %0|%0, %2, %1, %3}" + [(set_attr "type" "ssemuladd") + (set_attr "mode" "TI")]) + +;; Split pmacsdd with two memory operands into a load and the pmacsdd. +(define_split + [(set (match_operand:V4SI 0 "register_operand" "") + (plus:V4SI + (mult:V4SI (match_operand:V4SI 1 "nonimmediate_operand" "") + (match_operand:V4SI 2 "nonimmediate_operand" "")) + (match_operand:V4SI 3 "nonimmediate_operand" "")))] + "TARGET_XOP + && !ix86_fma4_valid_op_p (operands, insn, 4, false, 1, true) + && ix86_fma4_valid_op_p (operands, insn, 4, false, 2, true) + && !reg_mentioned_p (operands[0], operands[1]) + && !reg_mentioned_p (operands[0], operands[2]) + && !reg_mentioned_p (operands[0], operands[3])" + [(const_int 0)] +{ + ix86_expand_fma4_multiple_memory (operands, 4, V4SImode); + emit_insn (gen_xop_pmacsdd (operands[0], operands[1], operands[2], + operands[3])); + DONE; +}) + +(define_insn "xop_pmacssdd" + [(set (match_operand:V4SI 0 "register_operand" "=x,x,x") + (ss_plus:V4SI + (mult:V4SI (match_operand:V4SI 1 "nonimmediate_operand" "%x,x,m") + (match_operand:V4SI 2 "nonimmediate_operand" "x,m,x")) + (match_operand:V4SI 3 "register_operand" "x,x,x")))] + "TARGET_XOP && ix86_fma4_valid_op_p (operands, insn, 4, false, 1, true)" + "@ + vpmacssdd\t{%3, %2, %1, %0|%0, %1, %2, %3} + vpmacssdd\t{%3, %2, %1, %0|%0, %1, %2, %3} + vpmacssdd\t{%3, %1, %2, %0|%0, %2, %1, %3}" + [(set_attr "type" "ssemuladd") + (set_attr "mode" "TI")]) + +(define_insn "xop_pmacssdql" + [(set (match_operand:V2DI 0 "register_operand" "=x,x,x") + (ss_plus:V2DI + (mult:V2DI + (sign_extend:V2DI + (vec_select:V2SI + (match_operand:V4SI 1 "nonimmediate_operand" "%x,x,m") + (parallel [(const_int 1) + (const_int 3)]))) + (vec_select:V2SI + (match_operand:V4SI 2 "nonimmediate_operand" "x,m,x") + (parallel [(const_int 1) + (const_int 3)]))) + (match_operand:V2DI 3 "register_operand" "x,x,x")))] + "TARGET_XOP && ix86_fma4_valid_op_p (operands, insn, 4, false, 1, true)" + "@ + vpmacssdql\t{%3, %2, %1, %0|%0, %1, %2, %3} + vpmacssdql\t{%3, %2, %1, %0|%0, %1, %2, %3} + vpmacssdql\t{%3, %1, %2, %0|%0, %2, %1, %3}" + [(set_attr "type" "ssemuladd") + (set_attr "mode" "TI")]) + +(define_insn "xop_pmacssdqh" + [(set (match_operand:V2DI 0 "register_operand" "=x,x,x") + (ss_plus:V2DI + (mult:V2DI + (sign_extend:V2DI + (vec_select:V2SI + (match_operand:V4SI 1 "nonimmediate_operand" "%x,x,m") + (parallel [(const_int 0) + (const_int 2)]))) + (sign_extend:V2DI + (vec_select:V2SI + (match_operand:V4SI 2 "nonimmediate_operand" "x,m,x") + (parallel [(const_int 0) + (const_int 2)])))) + (match_operand:V2DI 3 "register_operand" "x,x,x")))] + "TARGET_XOP && ix86_fma4_valid_op_p (operands, insn, 4, false, 1, true)" + "@ + vpmacssdqh\t{%3, %2, %1, %0|%0, %1, %2, %3} + vpmacssdqh\t{%3, %2, %1, %0|%0, %1, %2, %3} + vpmacssdqh\t{%3, %1, %2, %0|%0, %2, %1, %3}" + [(set_attr "type" "ssemuladd") + (set_attr "mode" "TI")]) + +(define_insn "xop_pmacsdql" + [(set (match_operand:V2DI 0 "register_operand" "=x,x,x") + (plus:V2DI + (mult:V2DI + (sign_extend:V2DI + (vec_select:V2SI + (match_operand:V4SI 1 "nonimmediate_operand" "%x,x,m") + (parallel [(const_int 1) + (const_int 3)]))) + (sign_extend:V2DI + (vec_select:V2SI + (match_operand:V4SI 2 "nonimmediate_operand" "x,m,x") + (parallel [(const_int 1) + (const_int 3)])))) + (match_operand:V2DI 3 "register_operand" "x,x,x")))] + "TARGET_XOP && ix86_fma4_valid_op_p (operands, insn, 4, false, 1, true)" + "@ + vpmacsdql\t{%3, %2, %1, %0|%0, %1, %2, %3} + vpmacsdql\t{%3, %2, %1, %0|%0, %1, %2, %3} + vpmacsdql\t{%3, %1, %2, %0|%0, %2, %1, %3}" + [(set_attr "type" "ssemuladd") + (set_attr "mode" "TI")]) + +(define_insn_and_split "*xop_pmacsdql_mem" + [(set (match_operand:V2DI 0 "register_operand" "=&x,&x,&x") + (plus:V2DI + (mult:V2DI + (sign_extend:V2DI + (vec_select:V2SI + (match_operand:V4SI 1 "nonimmediate_operand" "%x,x,m") + (parallel [(const_int 1) + (const_int 3)]))) + (sign_extend:V2DI + (vec_select:V2SI + (match_operand:V4SI 2 "nonimmediate_operand" "x,m,x") + (parallel [(const_int 1) + (const_int 3)])))) + (match_operand:V2DI 3 "memory_operand" "m,m,m")))] + "TARGET_XOP && ix86_fma4_valid_op_p (operands, insn, 4, false, -1, true)" + "#" + "&& (reload_completed + || (!reg_mentioned_p (operands[0], operands[1]) + && !reg_mentioned_p (operands[0], operands[2])))" + [(set (match_dup 0) + (match_dup 3)) + (set (match_dup 0) + (plus:V2DI + (mult:V2DI + (sign_extend:V2DI + (vec_select:V2SI + (match_dup 1) + (parallel [(const_int 1) + (const_int 3)]))) + (sign_extend:V2DI + (vec_select:V2SI + (match_dup 2) + (parallel [(const_int 1) + (const_int 3)])))) + (match_dup 0)))]) + +;; We don't have a straight 32-bit parallel multiply and extend on XOP, so +;; fake it with a multiply/add. In general, we expect the define_split to +;; occur before register allocation, so we have to handle the corner case where +;; the target is the same as operands 1/2 +(define_insn_and_split "xop_mulv2div2di3_low" + [(set (match_operand:V2DI 0 "register_operand" "=&x") + (mult:V2DI + (sign_extend:V2DI + (vec_select:V2SI + (match_operand:V4SI 1 "nonimmediate_operand" "%x") + (parallel [(const_int 1) + (const_int 3)]))) + (sign_extend:V2DI + (vec_select:V2SI + (match_operand:V4SI 2 "nonimmediate_operand" "xm") + (parallel [(const_int 1) + (const_int 3)])))))] + "TARGET_XOP" + "#" + "&& (reload_completed + || (!reg_mentioned_p (operands[0], operands[1]) + && !reg_mentioned_p (operands[0], operands[2])))" + [(set (match_dup 0) + (match_dup 3)) + (set (match_dup 0) + (plus:V2DI + (mult:V2DI + (sign_extend:V2DI + (vec_select:V2SI + (match_dup 1) + (parallel [(const_int 1) + (const_int 3)]))) + (sign_extend:V2DI + (vec_select:V2SI + (match_dup 2) + (parallel [(const_int 1) + (const_int 3)])))) + (match_dup 0)))] +{ + operands[3] = CONST0_RTX (V2DImode); +} + [(set_attr "type" "ssemuladd") + (set_attr "mode" "TI")]) + +(define_insn "xop_pmacsdqh" + [(set (match_operand:V2DI 0 "register_operand" "=x,x,x") + (plus:V2DI + (mult:V2DI + (sign_extend:V2DI + (vec_select:V2SI + (match_operand:V4SI 1 "nonimmediate_operand" "%x,x,m") + (parallel [(const_int 0) + (const_int 2)]))) + (sign_extend:V2DI + (vec_select:V2SI + (match_operand:V4SI 2 "nonimmediate_operand" "x,m,x") + (parallel [(const_int 0) + (const_int 2)])))) + (match_operand:V2DI 3 "register_operand" "x,x,x")))] + "TARGET_XOP && ix86_fma4_valid_op_p (operands, insn, 4, false, 1, true)" + "@ + vpmacsdqh\t{%3, %2, %1, %0|%0, %1, %2, %3} + vpmacsdqh\t{%3, %2, %1, %0|%0, %1, %2, %3} + vpmacsdqh\t{%3, %1, %2, %0|%0, %2, %1, %3}" + [(set_attr "type" "ssemuladd") + (set_attr "mode" "TI")]) + +(define_insn_and_split "*xop_pmacsdqh_mem" + [(set (match_operand:V2DI 0 "register_operand" "=&x,&x,&x") + (plus:V2DI + (mult:V2DI + (sign_extend:V2DI + (vec_select:V2SI + (match_operand:V4SI 1 "nonimmediate_operand" "%x,x,m") + (parallel [(const_int 0) + (const_int 2)]))) + (sign_extend:V2DI + (vec_select:V2SI + (match_operand:V4SI 2 "nonimmediate_operand" "x,m,x") + (parallel [(const_int 0) + (const_int 2)])))) + (match_operand:V2DI 3 "memory_operand" "m,m,m")))] + "TARGET_XOP && ix86_fma4_valid_op_p (operands, insn, 4, false, -1, true)" + "#" + "&& (reload_completed + || (!reg_mentioned_p (operands[0], operands[1]) + && !reg_mentioned_p (operands[0], operands[2])))" + [(set (match_dup 0) + (match_dup 3)) + (set (match_dup 0) + (plus:V2DI + (mult:V2DI + (sign_extend:V2DI + (vec_select:V2SI + (match_dup 1) + (parallel [(const_int 0) + (const_int 2)]))) + (sign_extend:V2DI + (vec_select:V2SI + (match_dup 2) + (parallel [(const_int 0) + (const_int 2)])))) + (match_dup 0)))]) + +;; We don't have a straight 32-bit parallel multiply and extend on XOP, so +;; fake it with a multiply/add. In general, we expect the define_split to +;; occur before register allocation, so we have to handle the corner case where +;; the target is the same as either operands[1] or operands[2] +(define_insn_and_split "xop_mulv2div2di3_high" + [(set (match_operand:V2DI 0 "register_operand" "=&x") + (mult:V2DI + (sign_extend:V2DI + (vec_select:V2SI + (match_operand:V4SI 1 "nonimmediate_operand" "%x") + (parallel [(const_int 0) + (const_int 2)]))) + (sign_extend:V2DI + (vec_select:V2SI + (match_operand:V4SI 2 "nonimmediate_operand" "xm") + (parallel [(const_int 0) + (const_int 2)])))))] + "TARGET_XOP" + "#" + "&& (reload_completed + || (!reg_mentioned_p (operands[0], operands[1]) + && !reg_mentioned_p (operands[0], operands[2])))" + [(set (match_dup 0) + (match_dup 3)) + (set (match_dup 0) + (plus:V2DI + (mult:V2DI + (sign_extend:V2DI + (vec_select:V2SI + (match_dup 1) + (parallel [(const_int 0) + (const_int 2)]))) + (sign_extend:V2DI + (vec_select:V2SI + (match_dup 2) + (parallel [(const_int 0) + (const_int 2)])))) + (match_dup 0)))] +{ + operands[3] = CONST0_RTX (V2DImode); +} + [(set_attr "type" "ssemuladd") + (set_attr "mode" "TI")]) + +;; XOP parallel integer multiply/add instructions for the intrinisics +(define_insn "xop_pmacsswd" + [(set (match_operand:V4SI 0 "register_operand" "=x,x,x") + (ss_plus:V4SI + (mult:V4SI + (sign_extend:V4SI + (vec_select:V4HI + (match_operand:V8HI 1 "nonimmediate_operand" "%x,x,m") + (parallel [(const_int 1) + (const_int 3) + (const_int 5) + (const_int 7)]))) + (sign_extend:V4SI + (vec_select:V4HI + (match_operand:V8HI 2 "nonimmediate_operand" "x,m,x") + (parallel [(const_int 1) + (const_int 3) + (const_int 5) + (const_int 7)])))) + (match_operand:V4SI 3 "register_operand" "x,x,x")))] + "TARGET_XOP && ix86_fma4_valid_op_p (operands, insn, 4, false, 1, true)" + "@ + vpmacsswd\t{%3, %2, %1, %0|%0, %1, %2, %3} + vpmacsswd\t{%3, %2, %1, %0|%0, %1, %2, %3} + vpmacsswd\t{%3, %1, %2, %0|%0, %2, %1, %3}" + [(set_attr "type" "ssemuladd") + (set_attr "mode" "TI")]) + +(define_insn "xop_pmacswd" + [(set (match_operand:V4SI 0 "register_operand" "=x,x,x") + (plus:V4SI + (mult:V4SI + (sign_extend:V4SI + (vec_select:V4HI + (match_operand:V8HI 1 "nonimmediate_operand" "%x,x,m") + (parallel [(const_int 1) + (const_int 3) + (const_int 5) + (const_int 7)]))) + (sign_extend:V4SI + (vec_select:V4HI + (match_operand:V8HI 2 "nonimmediate_operand" "x,m,x") + (parallel [(const_int 1) + (const_int 3) + (const_int 5) + (const_int 7)])))) + (match_operand:V4SI 3 "register_operand" "x,x,x")))] + "TARGET_XOP && ix86_fma4_valid_op_p (operands, insn, 4, false, 1, true)" + "@ + vpmacswd\t{%3, %2, %1, %0|%0, %1, %2, %3} + vpmacswd\t{%3, %2, %1, %0|%0, %1, %2, %3} + vpmacswd\t{%3, %1, %2, %0|%0, %2, %1, %3}" + [(set_attr "type" "ssemuladd") + (set_attr "mode" "TI")]) + +(define_insn "xop_pmadcsswd" + [(set (match_operand:V4SI 0 "register_operand" "=x,x,x") + (ss_plus:V4SI + (plus:V4SI + (mult:V4SI + (sign_extend:V4SI + (vec_select:V4HI + (match_operand:V8HI 1 "nonimmediate_operand" "%x,x,m") + (parallel [(const_int 0) + (const_int 2) + (const_int 4) + (const_int 6)]))) + (sign_extend:V4SI + (vec_select:V4HI + (match_operand:V8HI 2 "nonimmediate_operand" "x,m,x") + (parallel [(const_int 0) + (const_int 2) + (const_int 4) + (const_int 6)])))) + (mult:V4SI + (sign_extend:V4SI + (vec_select:V4HI + (match_dup 1) + (parallel [(const_int 1) + (const_int 3) + (const_int 5) + (const_int 7)]))) + (sign_extend:V4SI + (vec_select:V4HI + (match_dup 2) + (parallel [(const_int 1) + (const_int 3) + (const_int 5) + (const_int 7)]))))) + (match_operand:V4SI 3 "register_operand" "x,x,x")))] + "TARGET_XOP && ix86_fma4_valid_op_p (operands, insn, 4, false, 1, true)" + "@ + vpmadcsswd\t{%3, %2, %1, %0|%0, %1, %2, %3} + vpmadcsswd\t{%3, %2, %1, %0|%0, %1, %2, %3} + vpmadcsswd\t{%3, %1, %2, %0|%0, %2, %1, %3}" + [(set_attr "type" "ssemuladd") + (set_attr "mode" "TI")]) + +(define_insn "xop_pmadcswd" + [(set (match_operand:V4SI 0 "register_operand" "=x,x,x") + (plus:V4SI + (plus:V4SI + (mult:V4SI + (sign_extend:V4SI + (vec_select:V4HI + (match_operand:V8HI 1 "nonimmediate_operand" "%x,x,m") + (parallel [(const_int 0) + (const_int 2) + (const_int 4) + (const_int 6)]))) + (sign_extend:V4SI + (vec_select:V4HI + (match_operand:V8HI 2 "nonimmediate_operand" "x,m,x") + (parallel [(const_int 0) + (const_int 2) + (const_int 4) + (const_int 6)])))) + (mult:V4SI + (sign_extend:V4SI + (vec_select:V4HI + (match_dup 1) + (parallel [(const_int 1) + (const_int 3) + (const_int 5) + (const_int 7)]))) + (sign_extend:V4SI + (vec_select:V4HI + (match_dup 2) + (parallel [(const_int 1) + (const_int 3) + (const_int 5) + (const_int 7)]))))) + (match_operand:V4SI 3 "register_operand" "x,x,x")))] + "TARGET_XOP && ix86_fma4_valid_op_p (operands, insn, 4, false, 1, true)" + "@ + vpmadcswd\t{%3, %2, %1, %0|%0, %1, %2, %3} + vpmadcswd\t{%3, %2, %1, %0|%0, %1, %2, %3} + vpmadcswd\t{%3, %1, %2, %0|%0, %2, %1, %3}" + [(set_attr "type" "ssemuladd") + (set_attr "mode" "TI")]) + +;; XOP parallel XMM conditional moves +(define_insn "xop_pcmov_<mode>" + [(set (match_operand:SSEMODE 0 "register_operand" "=x,x,x") + (if_then_else:SSEMODE + (match_operand:SSEMODE 3 "nonimmediate_operand" "x,x,xm") + (match_operand:SSEMODE 1 "vector_move_operand" "x,xm,x") + (match_operand:SSEMODE 2 "vector_move_operand" "xm,x,x")))] + "TARGET_XOP && ix86_fma4_valid_op_p (operands, insn, 4, true, 1, false)" + "@ + vpcmov\t{%3, %2, %1, %0|%0, %1, %2, %3} + vpcmov\t{%3, %2, %1, %0|%0, %1, %2, %3} + vpcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}" + [(set_attr "type" "sse4arg")]) + +(define_insn "xop_pcmov_<mode>256" + [(set (match_operand:AVX256MODE 0 "register_operand" "=x,x,x") + (if_then_else:AVX256MODE + (match_operand:AVX256MODE 3 "nonimmediate_operand" "x,x,xm") + (match_operand:AVX256MODE 1 "vector_move_operand" "x,xm,x") + (match_operand:AVX256MODE 2 "vector_move_operand" "xm,x,x")))] + "TARGET_XOP && ix86_fma4_valid_op_p (operands, insn, 4, true, 1, false)" + "@ + vpcmov\t{%3, %2, %1, %0|%0, %1, %2, %3} + vpcmov\t{%3, %2, %1, %0|%0, %1, %2, %3} + vpcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}" + [(set_attr "type" "sse4arg")]) + +;; XOP horizontal add/subtract instructions +(define_insn "xop_phaddbw" + [(set (match_operand:V8HI 0 "register_operand" "=x") + (plus:V8HI + (sign_extend:V8HI + (vec_select:V8QI + (match_operand:V16QI 1 "nonimmediate_operand" "xm") + (parallel [(const_int 0) + (const_int 2) + (const_int 4) + (const_int 6) + (const_int 8) + (const_int 10) + (const_int 12) + (const_int 14)]))) + (sign_extend:V8HI + (vec_select:V8QI + (match_dup 1) + (parallel [(const_int 1) + (const_int 3) + (const_int 5) + (const_int 7) + (const_int 9) + (const_int 11) + (const_int 13) + (const_int 15)])))))] + "TARGET_XOP" + "vphaddbw\t{%1, %0|%0, %1}" + [(set_attr "type" "sseiadd1")]) + +(define_insn "xop_phaddbd" + [(set (match_operand:V4SI 0 "register_operand" "=x") + (plus:V4SI + (plus:V4SI + (sign_extend:V4SI + (vec_select:V4QI + (match_operand:V16QI 1 "nonimmediate_operand" "xm") + (parallel [(const_int 0) + (const_int 4) + (const_int 8) + (const_int 12)]))) + (sign_extend:V4SI + (vec_select:V4QI + (match_dup 1) + (parallel [(const_int 1) + (const_int 5) + (const_int 9) + (const_int 13)])))) + (plus:V4SI + (sign_extend:V4SI + (vec_select:V4QI + (match_dup 1) + (parallel [(const_int 2) + (const_int 6) + (const_int 10) + (const_int 14)]))) + (sign_extend:V4SI + (vec_select:V4QI + (match_dup 1) + (parallel [(const_int 3) + (const_int 7) + (const_int 11) + (const_int 15)]))))))] + "TARGET_XOP" + "vphaddbd\t{%1, %0|%0, %1}" + [(set_attr "type" "sseiadd1")]) + +(define_insn "xop_phaddbq" + [(set (match_operand:V2DI 0 "register_operand" "=x") + (plus:V2DI + (plus:V2DI + (plus:V2DI + (sign_extend:V2DI + (vec_select:V2QI + (match_operand:V16QI 1 "nonimmediate_operand" "xm") + (parallel [(const_int 0) + (const_int 4)]))) + (sign_extend:V2DI + (vec_select:V2QI + (match_dup 1) + (parallel [(const_int 1) + (const_int 5)])))) + (plus:V2DI + (sign_extend:V2DI + (vec_select:V2QI + (match_dup 1) + (parallel [(const_int 2) + (const_int 6)]))) + (sign_extend:V2DI + (vec_select:V2QI + (match_dup 1) + (parallel [(const_int 3) + (const_int 7)]))))) + (plus:V2DI + (plus:V2DI + (sign_extend:V2DI + (vec_select:V2QI + (match_dup 1) + (parallel [(const_int 8) + (const_int 12)]))) + (sign_extend:V2DI + (vec_select:V2QI + (match_dup 1) + (parallel [(const_int 9) + (const_int 13)])))) + (plus:V2DI + (sign_extend:V2DI + (vec_select:V2QI + (match_dup 1) + (parallel [(const_int 10) + (const_int 14)]))) + (sign_extend:V2DI + (vec_select:V2QI + (match_dup 1) + (parallel [(const_int 11) + (const_int 15)])))))))] + "TARGET_XOP" + "vphaddbq\t{%1, %0|%0, %1}" + [(set_attr "type" "sseiadd1")]) + +(define_insn "xop_phaddwd" + [(set (match_operand:V4SI 0 "register_operand" "=x") + (plus:V4SI + (sign_extend:V4SI + (vec_select:V4HI + (match_operand:V8HI 1 "nonimmediate_operand" "xm") + (parallel [(const_int 0) + (const_int 2) + (const_int 4) + (const_int 6)]))) + (sign_extend:V4SI + (vec_select:V4HI + (match_dup 1) + (parallel [(const_int 1) + (const_int 3) + (const_int 5) + (const_int 7)])))))] + "TARGET_XOP" + "vphaddwd\t{%1, %0|%0, %1}" + [(set_attr "type" "sseiadd1")]) + +(define_insn "xop_phaddwq" + [(set (match_operand:V2DI 0 "register_operand" "=x") + (plus:V2DI + (plus:V2DI + (sign_extend:V2DI + (vec_select:V2HI + (match_operand:V8HI 1 "nonimmediate_operand" "xm") + (parallel [(const_int 0) + (const_int 4)]))) + (sign_extend:V2DI + (vec_select:V2HI + (match_dup 1) + (parallel [(const_int 1) + (const_int 5)])))) + (plus:V2DI + (sign_extend:V2DI + (vec_select:V2HI + (match_dup 1) + (parallel [(const_int 2) + (const_int 6)]))) + (sign_extend:V2DI + (vec_select:V2HI + (match_dup 1) + (parallel [(const_int 3) + (const_int 7)]))))))] + "TARGET_XOP" + "vphaddwq\t{%1, %0|%0, %1}" + [(set_attr "type" "sseiadd1")]) + +(define_insn "xop_phadddq" + [(set (match_operand:V2DI 0 "register_operand" "=x") + (plus:V2DI + (sign_extend:V2DI + (vec_select:V2SI + (match_operand:V4SI 1 "nonimmediate_operand" "xm") + (parallel [(const_int 0) + (const_int 2)]))) + (sign_extend:V2DI + (vec_select:V2SI + (match_dup 1) + (parallel [(const_int 1) + (const_int 3)])))))] + "TARGET_XOP" + "vphadddq\t{%1, %0|%0, %1}" + [(set_attr "type" "sseiadd1")]) + +(define_insn "xop_phaddubw" + [(set (match_operand:V8HI 0 "register_operand" "=x") + (plus:V8HI + (zero_extend:V8HI + (vec_select:V8QI + (match_operand:V16QI 1 "nonimmediate_operand" "xm") + (parallel [(const_int 0) + (const_int 2) + (const_int 4) + (const_int 6) + (const_int 8) + (const_int 10) + (const_int 12) + (const_int 14)]))) + (zero_extend:V8HI + (vec_select:V8QI + (match_dup 1) + (parallel [(const_int 1) + (const_int 3) + (const_int 5) + (const_int 7) + (const_int 9) + (const_int 11) + (const_int 13) + (const_int 15)])))))] + "TARGET_XOP" + "vphaddubw\t{%1, %0|%0, %1}" + [(set_attr "type" "sseiadd1")]) + +(define_insn "xop_phaddubd" + [(set (match_operand:V4SI 0 "register_operand" "=x") + (plus:V4SI + (plus:V4SI + (zero_extend:V4SI + (vec_select:V4QI + (match_operand:V16QI 1 "nonimmediate_operand" "xm") + (parallel [(const_int 0) + (const_int 4) + (const_int 8) + (const_int 12)]))) + (zero_extend:V4SI + (vec_select:V4QI + (match_dup 1) + (parallel [(const_int 1) + (const_int 5) + (const_int 9) + (const_int 13)])))) + (plus:V4SI + (zero_extend:V4SI + (vec_select:V4QI + (match_dup 1) + (parallel [(const_int 2) + (const_int 6) + (const_int 10) + (const_int 14)]))) + (zero_extend:V4SI + (vec_select:V4QI + (match_dup 1) + (parallel [(const_int 3) + (const_int 7) + (const_int 11) + (const_int 15)]))))))] + "TARGET_XOP" + "vphaddubd\t{%1, %0|%0, %1}" + [(set_attr "type" "sseiadd1")]) + +(define_insn "xop_phaddubq" + [(set (match_operand:V2DI 0 "register_operand" "=x") + (plus:V2DI + (plus:V2DI + (plus:V2DI + (zero_extend:V2DI + (vec_select:V2QI + (match_operand:V16QI 1 "nonimmediate_operand" "xm") + (parallel [(const_int 0) + (const_int 4)]))) + (sign_extend:V2DI + (vec_select:V2QI + (match_dup 1) + (parallel [(const_int 1) + (const_int 5)])))) + (plus:V2DI + (zero_extend:V2DI + (vec_select:V2QI + (match_dup 1) + (parallel [(const_int 2) + (const_int 6)]))) + (zero_extend:V2DI + (vec_select:V2QI + (match_dup 1) + (parallel [(const_int 3) + (const_int 7)]))))) + (plus:V2DI + (plus:V2DI + (zero_extend:V2DI + (vec_select:V2QI + (match_dup 1) + (parallel [(const_int 8) + (const_int 12)]))) + (sign_extend:V2DI + (vec_select:V2QI + (match_dup 1) + (parallel [(const_int 9) + (const_int 13)])))) + (plus:V2DI + (zero_extend:V2DI + (vec_select:V2QI + (match_dup 1) + (parallel [(const_int 10) + (const_int 14)]))) + (zero_extend:V2DI + (vec_select:V2QI + (match_dup 1) + (parallel [(const_int 11) + (const_int 15)])))))))] + "TARGET_XOP" + "vphaddubq\t{%1, %0|%0, %1}" + [(set_attr "type" "sseiadd1")]) + +(define_insn "xop_phadduwd" + [(set (match_operand:V4SI 0 "register_operand" "=x") + (plus:V4SI + (zero_extend:V4SI + (vec_select:V4HI + (match_operand:V8HI 1 "nonimmediate_operand" "xm") + (parallel [(const_int 0) + (const_int 2) + (const_int 4) + (const_int 6)]))) + (zero_extend:V4SI + (vec_select:V4HI + (match_dup 1) + (parallel [(const_int 1) + (const_int 3) + (const_int 5) + (const_int 7)])))))] + "TARGET_XOP" + "vphadduwd\t{%1, %0|%0, %1}" + [(set_attr "type" "sseiadd1")]) + +(define_insn "xop_phadduwq" + [(set (match_operand:V2DI 0 "register_operand" "=x") + (plus:V2DI + (plus:V2DI + (zero_extend:V2DI + (vec_select:V2HI + (match_operand:V8HI 1 "nonimmediate_operand" "xm") + (parallel [(const_int 0) + (const_int 4)]))) + (zero_extend:V2DI + (vec_select:V2HI + (match_dup 1) + (parallel [(const_int 1) + (const_int 5)])))) + (plus:V2DI + (zero_extend:V2DI + (vec_select:V2HI + (match_dup 1) + (parallel [(const_int 2) + (const_int 6)]))) + (zero_extend:V2DI + (vec_select:V2HI + (match_dup 1) + (parallel [(const_int 3) + (const_int 7)]))))))] + "TARGET_XOP" + "vphadduwq\t{%1, %0|%0, %1}" + [(set_attr "type" "sseiadd1")]) + +(define_insn "xop_phaddudq" + [(set (match_operand:V2DI 0 "register_operand" "=x") + (plus:V2DI + (zero_extend:V2DI + (vec_select:V2SI + (match_operand:V4SI 1 "nonimmediate_operand" "xm") + (parallel [(const_int 0) + (const_int 2)]))) + (zero_extend:V2DI + (vec_select:V2SI + (match_dup 1) + (parallel [(const_int 1) + (const_int 3)])))))] + "TARGET_XOP" + "vphaddudq\t{%1, %0|%0, %1}" + [(set_attr "type" "sseiadd1")]) + +(define_insn "xop_phsubbw" + [(set (match_operand:V8HI 0 "register_operand" "=x") + (minus:V8HI + (sign_extend:V8HI + (vec_select:V8QI + (match_operand:V16QI 1 "nonimmediate_operand" "xm") + (parallel [(const_int 0) + (const_int 2) + (const_int 4) + (const_int 6) + (const_int 8) + (const_int 10) + (const_int 12) + (const_int 14)]))) + (sign_extend:V8HI + (vec_select:V8QI + (match_dup 1) + (parallel [(const_int 1) + (const_int 3) + (const_int 5) + (const_int 7) + (const_int 9) + (const_int 11) + (const_int 13) + (const_int 15)])))))] + "TARGET_XOP" + "vphsubbw\t{%1, %0|%0, %1}" + [(set_attr "type" "sseiadd1")]) + +(define_insn "xop_phsubwd" + [(set (match_operand:V4SI 0 "register_operand" "=x") + (minus:V4SI + (sign_extend:V4SI + (vec_select:V4HI + (match_operand:V8HI 1 "nonimmediate_operand" "xm") + (parallel [(const_int 0) + (const_int 2) + (const_int 4) + (const_int 6)]))) + (sign_extend:V4SI + (vec_select:V4HI + (match_dup 1) + (parallel [(const_int 1) + (const_int 3) + (const_int 5) + (const_int 7)])))))] + "TARGET_XOP" + "vphsubwd\t{%1, %0|%0, %1}" + [(set_attr "type" "sseiadd1")]) + +(define_insn "xop_phsubdq" + [(set (match_operand:V2DI 0 "register_operand" "=x") + (minus:V2DI + (sign_extend:V2DI + (vec_select:V2SI + (match_operand:V4SI 1 "nonimmediate_operand" "xm") + (parallel [(const_int 0) + (const_int 2)]))) + (sign_extend:V2DI + (vec_select:V2SI + (match_dup 1) + (parallel [(const_int 1) + (const_int 3)])))))] + "TARGET_XOP" + "vphsubdq\t{%1, %0|%0, %1}" + [(set_attr "type" "sseiadd1")]) + +;; XOP permute instructions +(define_insn "xop_pperm" + [(set (match_operand:V16QI 0 "register_operand" "=x,x,x") + (unspec:V16QI + [(match_operand:V16QI 1 "nonimmediate_operand" "x,x,xm") + (match_operand:V16QI 2 "nonimmediate_operand" "x,xm,x") + (match_operand:V16QI 3 "nonimmediate_operand" "xm,x,x")] + UNSPEC_XOP_PERMUTE))] + "TARGET_XOP && ix86_fma4_valid_op_p (operands, insn, 4, true, 1, false)" + "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}" + [(set_attr "type" "sse4arg") + (set_attr "mode" "TI")]) + +;; XOP pack instructions that combine two vectors into a smaller vector +(define_insn "xop_pperm_pack_v2di_v4si" + [(set (match_operand:V4SI 0 "register_operand" "=x,x,x") + (vec_concat:V4SI + (truncate:V2SI + (match_operand:V2DI 1 "nonimmediate_operand" "x,x,xm")) + (truncate:V2SI + (match_operand:V2DI 2 "nonimmediate_operand" "x,xm,x")))) + (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x,x"))] + "TARGET_XOP && ix86_fma4_valid_op_p (operands, insn, 4, true, 1, false)" + "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}" + [(set_attr "type" "sse4arg") + (set_attr "mode" "TI")]) + +(define_insn "xop_pperm_pack_v4si_v8hi" + [(set (match_operand:V8HI 0 "register_operand" "=x,x,x") + (vec_concat:V8HI + (truncate:V4HI + (match_operand:V4SI 1 "nonimmediate_operand" "x,x,xm")) + (truncate:V4HI + (match_operand:V4SI 2 "nonimmediate_operand" "x,xm,x")))) + (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x,x"))] + "TARGET_XOP && ix86_fma4_valid_op_p (operands, insn, 4, true, 1, false)" + "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}" + [(set_attr "type" "sse4arg") + (set_attr "mode" "TI")]) + +(define_insn "xop_pperm_pack_v8hi_v16qi" + [(set (match_operand:V16QI 0 "register_operand" "=x,x,x") + (vec_concat:V16QI + (truncate:V8QI + (match_operand:V8HI 1 "nonimmediate_operand" "x,x,xm")) + (truncate:V8QI + (match_operand:V8HI 2 "nonimmediate_operand" "x,xm,x")))) + (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x,x"))] + "TARGET_XOP && ix86_fma4_valid_op_p (operands, insn, 4, true, 1, false)" + "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}" + [(set_attr "type" "sse4arg") + (set_attr "mode" "TI")]) + +;; XOP packed rotate instructions +(define_expand "rotl<mode>3" + [(set (match_operand:SSEMODE1248 0 "register_operand" "") + (rotate:SSEMODE1248 + (match_operand:SSEMODE1248 1 "nonimmediate_operand" "") + (match_operand:SI 2 "general_operand")))] + "TARGET_XOP" +{ + /* If we were given a scalar, convert it to parallel */ + if (! const_0_to_<sserotatemax>_operand (operands[2], SImode)) + { + rtvec vs = rtvec_alloc (<ssescalarnum>); + rtx par = gen_rtx_PARALLEL (<MODE>mode, vs); + rtx reg = gen_reg_rtx (<MODE>mode); + rtx op2 = operands[2]; + int i; + + if (GET_MODE (op2) != <ssescalarmode>mode) + { + op2 = gen_reg_rtx (<ssescalarmode>mode); + convert_move (op2, operands[2], false); + } + + for (i = 0; i < <ssescalarnum>; i++) + RTVEC_ELT (vs, i) = op2; + + emit_insn (gen_vec_init<mode> (reg, par)); + emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], reg)); + DONE; + } +}) + +(define_expand "rotr<mode>3" + [(set (match_operand:SSEMODE1248 0 "register_operand" "") + (rotatert:SSEMODE1248 + (match_operand:SSEMODE1248 1 "nonimmediate_operand" "") + (match_operand:SI 2 "general_operand")))] + "TARGET_XOP" +{ + /* If we were given a scalar, convert it to parallel */ + if (! const_0_to_<sserotatemax>_operand (operands[2], SImode)) + { + rtvec vs = rtvec_alloc (<ssescalarnum>); + rtx par = gen_rtx_PARALLEL (<MODE>mode, vs); + rtx neg = gen_reg_rtx (<MODE>mode); + rtx reg = gen_reg_rtx (<MODE>mode); + rtx op2 = operands[2]; + int i; + + if (GET_MODE (op2) != <ssescalarmode>mode) + { + op2 = gen_reg_rtx (<ssescalarmode>mode); + convert_move (op2, operands[2], false); + } + + for (i = 0; i < <ssescalarnum>; i++) + RTVEC_ELT (vs, i) = op2; + + emit_insn (gen_vec_init<mode> (reg, par)); + emit_insn (gen_neg<mode>2 (neg, reg)); + emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], neg)); + DONE; + } +}) + +(define_insn "xop_rotl<mode>3" + [(set (match_operand:SSEMODE1248 0 "register_operand" "=x") + (rotate:SSEMODE1248 + (match_operand:SSEMODE1248 1 "nonimmediate_operand" "xm") + (match_operand:SI 2 "const_0_to_<sserotatemax>_operand" "n")))] + "TARGET_XOP" + "vprot<ssevecsize>\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "type" "sseishft") + (set_attr "length_immediate" "1") + (set_attr "mode" "TI")]) + +(define_insn "xop_rotr<mode>3" + [(set (match_operand:SSEMODE1248 0 "register_operand" "=x") + (rotatert:SSEMODE1248 + (match_operand:SSEMODE1248 1 "nonimmediate_operand" "xm") + (match_operand:SI 2 "const_0_to_<sserotatemax>_operand" "n")))] + "TARGET_XOP" +{ + operands[3] = GEN_INT ((<ssescalarnum> * 8) - INTVAL (operands[2])); + return \"vprot<ssevecsize>\t{%3, %1, %0|%0, %1, %3}\"; +} + [(set_attr "type" "sseishft") + (set_attr "length_immediate" "1") + (set_attr "mode" "TI")]) + +(define_expand "vrotr<mode>3" + [(match_operand:SSEMODE1248 0 "register_operand" "") + (match_operand:SSEMODE1248 1 "register_operand" "") + (match_operand:SSEMODE1248 2 "register_operand" "")] + "TARGET_XOP" +{ + rtx reg = gen_reg_rtx (<MODE>mode); + emit_insn (gen_neg<mode>2 (reg, operands[2])); + emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], reg)); + DONE; +}) + +(define_expand "vrotl<mode>3" + [(match_operand:SSEMODE1248 0 "register_operand" "") + (match_operand:SSEMODE1248 1 "register_operand" "") + (match_operand:SSEMODE1248 2 "register_operand" "")] + "TARGET_XOP" +{ + emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], operands[2])); + DONE; +}) + +(define_insn "xop_vrotl<mode>3" + [(set (match_operand:SSEMODE1248 0 "register_operand" "=x,x") + (if_then_else:SSEMODE1248 + (ge:SSEMODE1248 + (match_operand:SSEMODE1248 2 "nonimmediate_operand" "xm,x") + (const_int 0)) + (rotate:SSEMODE1248 + (match_operand:SSEMODE1248 1 "nonimmediate_operand" "x,xm") + (match_dup 2)) + (rotatert:SSEMODE1248 + (match_dup 1) + (neg:SSEMODE1248 (match_dup 2)))))] + "TARGET_XOP && ix86_fma4_valid_op_p (operands, insn, 3, true, 1, false)" + "vprot<ssevecsize>\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "type" "sseishft") + (set_attr "prefix_data16" "0") + (set_attr "prefix_extra" "2") + (set_attr "mode" "TI")]) + +;; XOP packed shift instructions. +;; FIXME: add V2DI back in +(define_expand "vlshr<mode>3" + [(match_operand:SSEMODE124 0 "register_operand" "") + (match_operand:SSEMODE124 1 "register_operand" "") + (match_operand:SSEMODE124 2 "register_operand" "")] + "TARGET_XOP" +{ + rtx neg = gen_reg_rtx (<MODE>mode); + emit_insn (gen_neg<mode>2 (neg, operands[2])); + emit_insn (gen_xop_lshl<mode>3 (operands[0], operands[1], neg)); + DONE; +}) + +(define_expand "vashr<mode>3" + [(match_operand:SSEMODE124 0 "register_operand" "") + (match_operand:SSEMODE124 1 "register_operand" "") + (match_operand:SSEMODE124 2 "register_operand" "")] + "TARGET_XOP" +{ + rtx neg = gen_reg_rtx (<MODE>mode); + emit_insn (gen_neg<mode>2 (neg, operands[2])); + emit_insn (gen_xop_ashl<mode>3 (operands[0], operands[1], neg)); + DONE; +}) + +(define_expand "vashl<mode>3" + [(match_operand:SSEMODE124 0 "register_operand" "") + (match_operand:SSEMODE124 1 "register_operand" "") + (match_operand:SSEMODE124 2 "register_operand" "")] + "TARGET_XOP" +{ + emit_insn (gen_xop_ashl<mode>3 (operands[0], operands[1], operands[2])); + DONE; +}) + +(define_insn "xop_ashl<mode>3" + [(set (match_operand:SSEMODE1248 0 "register_operand" "=x,x") + (if_then_else:SSEMODE1248 + (ge:SSEMODE1248 + (match_operand:SSEMODE1248 2 "nonimmediate_operand" "xm,x") + (const_int 0)) + (ashift:SSEMODE1248 + (match_operand:SSEMODE1248 1 "nonimmediate_operand" "x,xm") + (match_dup 2)) + (ashiftrt:SSEMODE1248 + (match_dup 1) + (neg:SSEMODE1248 (match_dup 2)))))] + "TARGET_XOP && ix86_fma4_valid_op_p (operands, insn, 3, true, 1, false)" + "vpsha<ssevecsize>\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "type" "sseishft") + (set_attr "prefix_data16" "0") + (set_attr "prefix_extra" "2") + (set_attr "mode" "TI")]) + +(define_insn "xop_lshl<mode>3" + [(set (match_operand:SSEMODE1248 0 "register_operand" "=x,x") + (if_then_else:SSEMODE1248 + (ge:SSEMODE1248 + (match_operand:SSEMODE1248 2 "nonimmediate_operand" "xm,x") + (const_int 0)) + (ashift:SSEMODE1248 + (match_operand:SSEMODE1248 1 "nonimmediate_operand" "x,xm") + (match_dup 2)) + (lshiftrt:SSEMODE1248 + (match_dup 1) + (neg:SSEMODE1248 (match_dup 2)))))] + "TARGET_XOP && ix86_fma4_valid_op_p (operands, insn, 3, true, 1, false)" + "vpshl<ssevecsize>\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "type" "sseishft") + (set_attr "prefix_data16" "0") + (set_attr "prefix_extra" "2") + (set_attr "mode" "TI")]) + +;; SSE2 doesn't have some shift varients, so define versions for XOP +(define_expand "ashlv16qi3" + [(match_operand:V16QI 0 "register_operand" "") + (match_operand:V16QI 1 "register_operand" "") + (match_operand:SI 2 "nonmemory_operand" "")] + "TARGET_XOP" +{ + rtvec vs = rtvec_alloc (16); + rtx par = gen_rtx_PARALLEL (V16QImode, vs); + rtx reg = gen_reg_rtx (V16QImode); + int i; + for (i = 0; i < 16; i++) + RTVEC_ELT (vs, i) = operands[2]; + + emit_insn (gen_vec_initv16qi (reg, par)); + emit_insn (gen_xop_ashlv16qi3 (operands[0], operands[1], reg)); + DONE; +}) + +(define_expand "lshlv16qi3" + [(match_operand:V16QI 0 "register_operand" "") + (match_operand:V16QI 1 "register_operand" "") + (match_operand:SI 2 "nonmemory_operand" "")] + "TARGET_XOP" +{ + rtvec vs = rtvec_alloc (16); + rtx par = gen_rtx_PARALLEL (V16QImode, vs); + rtx reg = gen_reg_rtx (V16QImode); + int i; + for (i = 0; i < 16; i++) + RTVEC_ELT (vs, i) = operands[2]; + + emit_insn (gen_vec_initv16qi (reg, par)); + emit_insn (gen_xop_lshlv16qi3 (operands[0], operands[1], reg)); + DONE; +}) + +(define_expand "ashrv16qi3" + [(match_operand:V16QI 0 "register_operand" "") + (match_operand:V16QI 1 "register_operand" "") + (match_operand:SI 2 "nonmemory_operand" "")] + "TARGET_XOP" +{ + rtvec vs = rtvec_alloc (16); + rtx par = gen_rtx_PARALLEL (V16QImode, vs); + rtx reg = gen_reg_rtx (V16QImode); + int i; + rtx ele = ((CONST_INT_P (operands[2])) + ? GEN_INT (- INTVAL (operands[2])) + : operands[2]); + + for (i = 0; i < 16; i++) + RTVEC_ELT (vs, i) = ele; + + emit_insn (gen_vec_initv16qi (reg, par)); + + if (!CONST_INT_P (operands[2])) + { + rtx neg = gen_reg_rtx (V16QImode); + emit_insn (gen_negv16qi2 (neg, reg)); + emit_insn (gen_xop_ashlv16qi3 (operands[0], operands[1], neg)); + } + else + emit_insn (gen_xop_ashlv16qi3 (operands[0], operands[1], reg)); + + DONE; +}) + +(define_expand "ashrv2di3" + [(match_operand:V2DI 0 "register_operand" "") + (match_operand:V2DI 1 "register_operand" "") + (match_operand:DI 2 "nonmemory_operand" "")] + "TARGET_XOP" +{ + rtvec vs = rtvec_alloc (2); + rtx par = gen_rtx_PARALLEL (V2DImode, vs); + rtx reg = gen_reg_rtx (V2DImode); + rtx ele; + + if (CONST_INT_P (operands[2])) + ele = GEN_INT (- INTVAL (operands[2])); + else if (GET_MODE (operands[2]) != DImode) + { + rtx move = gen_reg_rtx (DImode); + ele = gen_reg_rtx (DImode); + convert_move (move, operands[2], false); + emit_insn (gen_negdi2 (ele, move)); + } + else + { + ele = gen_reg_rtx (DImode); + emit_insn (gen_negdi2 (ele, operands[2])); + } + + RTVEC_ELT (vs, 0) = ele; + RTVEC_ELT (vs, 1) = ele; + emit_insn (gen_vec_initv2di (reg, par)); + emit_insn (gen_xop_ashlv2di3 (operands[0], operands[1], reg)); + DONE; +}) + +;; XOP FRCZ support +;; parallel insns +(define_insn "xop_frcz<mode>2" + [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x") + (unspec:SSEMODEF2P + [(match_operand:SSEMODEF2P 1 "nonimmediate_operand" "xm")] + UNSPEC_FRCZ))] + "TARGET_XOP" + "vfrcz<ssemodesuffixf4>\t{%1, %0|%0, %1}" + [(set_attr "type" "ssecvt1") + (set_attr "mode" "<MODE>")]) + +;; scalar insns +(define_insn "xop_vmfrcz<mode>2" + [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x") + (vec_merge:SSEMODEF2P + (unspec:SSEMODEF2P + [(match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")] + UNSPEC_FRCZ) + (match_operand:SSEMODEF2P 1 "register_operand" "0") + (const_int 1)))] + "TARGET_XOP" + "vfrcz<ssemodesuffixf2s>\t{%2, %0|%0, %2}" + [(set_attr "type" "ssecvt1") + (set_attr "mode" "<MODE>")]) + +(define_insn "xop_frcz<mode>2256" + [(set (match_operand:FMA4MODEF4 0 "register_operand" "=x") + (unspec:FMA4MODEF4 + [(match_operand:FMA4MODEF4 1 "nonimmediate_operand" "xm")] + UNSPEC_FRCZ))] + "TARGET_XOP" + "vfrcz<fma4modesuffixf4>\t{%1, %0|%0, %1}" + [(set_attr "type" "ssecvt1") + (set_attr "mode" "<MODE>")]) + +(define_insn "xop_maskcmp<mode>3" + [(set (match_operand:SSEMODE1248 0 "register_operand" "=x") + (match_operator:SSEMODE1248 1 "ix86_comparison_int_operator" + [(match_operand:SSEMODE1248 2 "register_operand" "x") + (match_operand:SSEMODE1248 3 "nonimmediate_operand" "xm")]))] + "TARGET_XOP" + "vpcom%Y1<ssevecsize>\t{%3, %2, %0|%0, %2, %3}" + [(set_attr "type" "sse4arg") + (set_attr "prefix_data16" "0") + (set_attr "prefix_rep" "0") + (set_attr "prefix_extra" "2") + (set_attr "length_immediate" "1") + (set_attr "mode" "TI")]) + +(define_insn "xop_maskcmp_uns<mode>3" + [(set (match_operand:SSEMODE1248 0 "register_operand" "=x") + (match_operator:SSEMODE1248 1 "ix86_comparison_uns_operator" + [(match_operand:SSEMODE1248 2 "register_operand" "x") + (match_operand:SSEMODE1248 3 "nonimmediate_operand" "xm")]))] + "TARGET_XOP" + "vpcom%Y1u<ssevecsize>\t{%3, %2, %0|%0, %2, %3}" + [(set_attr "type" "ssecmp") + (set_attr "prefix_data16" "0") + (set_attr "prefix_rep" "0") + (set_attr "prefix_extra" "2") + (set_attr "length_immediate" "1") + (set_attr "mode" "TI")]) + +;; Version of pcom*u* that is called from the intrinsics that allows pcomequ* +;; and pcomneu* not to be converted to the signed ones in case somebody needs +;; the exact instruction generated for the intrinsic. +(define_insn "xop_maskcmp_uns2<mode>3" + [(set (match_operand:SSEMODE1248 0 "register_operand" "=x") + (unspec:SSEMODE1248 + [(match_operator:SSEMODE1248 1 "ix86_comparison_uns_operator" + [(match_operand:SSEMODE1248 2 "register_operand" "x") + (match_operand:SSEMODE1248 3 "nonimmediate_operand" "xm")])] + UNSPEC_XOP_UNSIGNED_CMP))] + "TARGET_XOP" + "vpcom%Y1u<ssevecsize>\t{%3, %2, %0|%0, %2, %3}" + [(set_attr "type" "ssecmp") + (set_attr "prefix_data16" "0") + (set_attr "prefix_extra" "2") + (set_attr "length_immediate" "1") + (set_attr "mode" "TI")]) + +;; Pcomtrue and pcomfalse support. These are useless instructions, but are +;; being added here to be complete. +(define_insn "xop_pcom_tf<mode>3" + [(set (match_operand:SSEMODE1248 0 "register_operand" "=x") + (unspec:SSEMODE1248 + [(match_operand:SSEMODE1248 1 "register_operand" "x") + (match_operand:SSEMODE1248 2 "nonimmediate_operand" "xm") + (match_operand:SI 3 "const_int_operand" "n")] + UNSPEC_XOP_TRUEFALSE))] + "TARGET_XOP" +{ + return ((INTVAL (operands[3]) != 0) + ? "vpcomtrue<ssevecsize>\t{%2, %1, %0|%0, %1, %2}" + : "vpcomfalse<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"); +} + [(set_attr "type" "ssecmp") + (set_attr "prefix_data16" "0") + (set_attr "prefix_extra" "2") + (set_attr "length_immediate" "1") + (set_attr "mode" "TI")]) + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (define_insn "*avx_aesenc" [(set (match_operand:V2DI 0 "register_operand" "=x") (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x") diff --git a/gcc/config/i386/x86intrin.h b/gcc/config/i386/x86intrin.h index 7bc47f8f15d..ac7e21fd6f7 100644 --- a/gcc/config/i386/x86intrin.h +++ b/gcc/config/i386/x86intrin.h @@ -54,10 +54,6 @@ #include <smmintrin.h> #endif -#ifdef __FMA4__ -#include <fma4intrin.h> -#endif - #if defined (__AES__) || defined (__PCLMUL__) #include <wmmintrin.h> #endif @@ -69,4 +65,16 @@ #include <mm3dnow.h> #endif +#ifdef __FMA4__ +#include <fma4intrin.h> +#endif + +#ifdef __XOP__ +#include <xopintrin.h> +#endif + +#ifdef __LWP__ +#include <lwpintrin.h> +#endif + #endif /* _X86INTRIN_H_INCLUDED */ diff --git a/gcc/config/i386/xopintrin.h b/gcc/config/i386/xopintrin.h new file mode 100644 index 00000000000..803417a6a45 --- /dev/null +++ b/gcc/config/i386/xopintrin.h @@ -0,0 +1,771 @@ +/* Copyright (C) 2007, 2008, 2009 Free Software Foundation, Inc. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + GCC is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + <http://www.gnu.org/licenses/>. */ + +#ifndef _X86INTRIN_H_INCLUDED +# error "Never use <xopintrin.h> directly; include <x86intrin.h> instead." +#endif + +#ifndef _XOPMMINTRIN_H_INCLUDED +#define _XOPMMINTRIN_H_INCLUDED + +#ifndef __XOP__ +# error "XOP instruction set not enabled" +#else + +#include <fma4intrin.h> + +/* Integer multiply/add intructions. */ +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maccs_epi16(__m128i __A, __m128i __B, __m128i __C) +{ + return (__m128i) __builtin_ia32_vpmacssww ((__v8hi)__A,(__v8hi)__B, (__v8hi)__C); +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_macc_epi16(__m128i __A, __m128i __B, __m128i __C) +{ + return (__m128i) __builtin_ia32_vpmacsww ((__v8hi)__A, (__v8hi)__B, (__v8hi)__C); +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maccsd_epi16(__m128i __A, __m128i __B, __m128i __C) +{ + return (__m128i) __builtin_ia32_vpmacsswd ((__v8hi)__A, (__v8hi)__B, (__v4si)__C); +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maccd_epi16(__m128i __A, __m128i __B, __m128i __C) +{ + return (__m128i) __builtin_ia32_vpmacswd ((__v8hi)__A, (__v8hi)__B, (__v4si)__C); +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maccs_epi32(__m128i __A, __m128i __B, __m128i __C) +{ + return (__m128i) __builtin_ia32_vpmacssdd ((__v4si)__A, (__v4si)__B, (__v4si)__C); +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_macc_epi32(__m128i __A, __m128i __B, __m128i __C) +{ + return (__m128i) __builtin_ia32_vpmacsdd ((__v4si)__A, (__v4si)__B, (__v4si)__C); +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maccslo_epi32(__m128i __A, __m128i __B, __m128i __C) +{ + return (__m128i) __builtin_ia32_vpmacssdql ((__v4si)__A, (__v4si)__B, (__v2di)__C); +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_macclo_epi32(__m128i __A, __m128i __B, __m128i __C) +{ + return (__m128i) __builtin_ia32_vpmacsdql ((__v4si)__A, (__v4si)__B, (__v2di)__C); +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maccshi_epi32(__m128i __A, __m128i __B, __m128i __C) +{ + return (__m128i) __builtin_ia32_vpmacssdqh ((__v4si)__A, (__v4si)__B, (__v2di)__C); +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_macchi_epi32(__m128i __A, __m128i __B, __m128i __C) +{ + return (__m128i) __builtin_ia32_vpmacsdqh ((__v4si)__A, (__v4si)__B, (__v2di)__C); +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maddsd_epi16(__m128i __A, __m128i __B, __m128i __C) +{ + return (__m128i) __builtin_ia32_vpmadcsswd ((__v8hi)__A,(__v8hi)__B,(__v4si)__C); +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maddd_epi16(__m128i __A, __m128i __B, __m128i __C) +{ + return (__m128i) __builtin_ia32_vpmadcswd ((__v8hi)__A,(__v8hi)__B,(__v4si)__C); +} + +/* Packed Integer Horizontal Add and Subtract */ +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_haddw_epi8(__m128i __A) +{ + return (__m128i) __builtin_ia32_vphaddbw ((__v16qi)__A); +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_haddd_epi8(__m128i __A) +{ + return (__m128i) __builtin_ia32_vphaddbd ((__v16qi)__A); +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_haddq_epi8(__m128i __A) +{ + return (__m128i) __builtin_ia32_vphaddbq ((__v16qi)__A); +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_haddd_epi16(__m128i __A) +{ + return (__m128i) __builtin_ia32_vphaddwd ((__v8hi)__A); +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_haddq_epi16(__m128i __A) +{ + return (__m128i) __builtin_ia32_vphaddwq ((__v8hi)__A); +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_haddq_epi32(__m128i __A) +{ + return (__m128i) __builtin_ia32_vphadddq ((__v4si)__A); +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_haddw_epu8(__m128i __A) +{ + return (__m128i) __builtin_ia32_vphaddubw ((__v16qi)__A); +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_haddd_epu8(__m128i __A) +{ + return (__m128i) __builtin_ia32_vphaddubd ((__v16qi)__A); +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_haddq_epu8(__m128i __A) +{ + return (__m128i) __builtin_ia32_vphaddubq ((__v16qi)__A); +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_haddd_epu16(__m128i __A) +{ + return (__m128i) __builtin_ia32_vphadduwd ((__v8hi)__A); +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_haddq_epu16(__m128i __A) +{ + return (__m128i) __builtin_ia32_vphadduwq ((__v8hi)__A); +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_haddq_epu32(__m128i __A) +{ + return (__m128i) __builtin_ia32_vphaddudq ((__v4si)__A); +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_hsubw_epi8(__m128i __A) +{ + return (__m128i) __builtin_ia32_vphsubbw ((__v16qi)__A); +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_hsubd_epi16(__m128i __A) +{ + return (__m128i) __builtin_ia32_vphsubwd ((__v8hi)__A); +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_hsubq_epi32(__m128i __A) +{ + return (__m128i) __builtin_ia32_vphsubdq ((__v4si)__A); +} + +/* Vector conditional move and permute */ + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cmov_si128(__m128i __A, __m128i __B, __m128i __C) +{ + return (__m128i) __builtin_ia32_vpcmov (__A, __B, __C); +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_perm_epi8(__m128i __A, __m128i __B, __m128i __C) +{ + return (__m128i) __builtin_ia32_vpperm ((__v16qi)__A, (__v16qi)__B, (__v16qi)__C); +} + +/* Packed Integer Rotates and Shifts + Rotates - Non-Immediate form */ + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_rot_epi8(__m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_vprotb ((__v16qi)__A, (__v16qi)__B); +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_rot_epi16(__m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_vprotw ((__v8hi)__A, (__v8hi)__B); +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_rot_epi32(__m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_vprotd ((__v4si)__A, (__v4si)__B); +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_rot_epi64(__m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_vprotq ((__v2di)__A, (__v2di)__B); +} + +/* Rotates - Immediate form */ + +#ifdef __OPTIMIZE__ +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_roti_epi8(__m128i __A, const int __B) +{ + return (__m128i) __builtin_ia32_vprotbi ((__v16qi)__A, __B); +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_roti_epi16(__m128i __A, const int __B) +{ + return (__m128i) __builtin_ia32_vprotwi ((__v8hi)__A, __B); +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_roti_epi32(__m128i __A, const int __B) +{ + return (__m128i) __builtin_ia32_vprotdi ((__v4si)__A, __B); +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_roti_epi64(__m128i __A, const int __B) +{ + return (__m128i) __builtin_ia32_vprotqi ((__v2di)__A, __B); +} +#else +#define _mm_roti_epi8(A, N) \ + ((__m128i) __builtin_ia32_vprotbi ((__v16qi)(__m128i)(A), (int)(N))) +#define _mm_roti_epi16(A, N) \ + ((__m128i) __builtin_ia32_vprotwi ((__v8hi)(__m128i)(A), (int)(N))) +#define _mm_roti_epi32(A, N) \ + ((__m128i) __builtin_ia32_vprotdi ((__v4si)(__m128i)(A), (int)(N))) +#define _mm_roti_epi64(A, N) \ + ((__m128i) __builtin_ia32_vprotqi ((__v2di)(__m128i)(A), (int)(N))) +#endif + +/* Shifts */ + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_shl_epi8(__m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_vpshlb ((__v16qi)__A, (__v16qi)__B); +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_shl_epi16(__m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_vpshlw ((__v8hi)__A, (__v8hi)__B); +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_shl_epi32(__m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_vpshld ((__v4si)__A, (__v4si)__B); +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_shl_epi64(__m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_vpshlq ((__v2di)__A, (__v2di)__B); +} + + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_sha_epi8(__m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_vpshab ((__v16qi)__A, (__v16qi)__B); +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_sha_epi16(__m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_vpshaw ((__v8hi)__A, (__v8hi)__B); +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_sha_epi32(__m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_vpshad ((__v4si)__A, (__v4si)__B); +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_sha_epi64(__m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_vpshaq ((__v2di)__A, (__v2di)__B); +} + +/* Compare and Predicate Generation + pcom (integer, unsinged bytes) */ + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_comlt_epu8(__m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_vpcomltub ((__v16qi)__A, (__v16qi)__B); +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_comle_epu8(__m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_vpcomleub ((__v16qi)__A, (__v16qi)__B); +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_comgt_epu8(__m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_vpcomgtub ((__v16qi)__A, (__v16qi)__B); +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_comge_epu8(__m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_vpcomgeub ((__v16qi)__A, (__v16qi)__B); +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_comeq_epu8(__m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_vpcomequb ((__v16qi)__A, (__v16qi)__B); +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_comneq_epu8(__m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_vpcomnequb ((__v16qi)__A, (__v16qi)__B); +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_comfalse_epu8(__m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_vpcomfalseub ((__v16qi)__A, (__v16qi)__B); +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_comtrue_epu8(__m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_vpcomtrueub ((__v16qi)__A, (__v16qi)__B); +} + +/*pcom (integer, unsinged words) */ + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_comlt_epu16(__m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_vpcomltuw ((__v8hi)__A, (__v8hi)__B); +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_comle_epu16(__m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_vpcomleuw ((__v8hi)__A, (__v8hi)__B); +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_comgt_epu16(__m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_vpcomgtuw ((__v8hi)__A, (__v8hi)__B); +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_comge_epu16(__m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_vpcomgeuw ((__v8hi)__A, (__v8hi)__B); +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_comeq_epu16(__m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_vpcomequw ((__v8hi)__A, (__v8hi)__B); +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_comneq_epu16(__m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_vpcomnequw ((__v8hi)__A, (__v8hi)__B); +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_comfalse_epu16(__m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_vpcomfalseuw ((__v8hi)__A, (__v8hi)__B); +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_comtrue_epu16(__m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_vpcomtrueuw ((__v8hi)__A, (__v8hi)__B); +} + +/*pcom (integer, unsinged double words) */ + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_comlt_epu32(__m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_vpcomltud ((__v4si)__A, (__v4si)__B); +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_comle_epu32(__m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_vpcomleud ((__v4si)__A, (__v4si)__B); +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_comgt_epu32(__m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_vpcomgtud ((__v4si)__A, (__v4si)__B); +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_comge_epu32(__m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_vpcomgeud ((__v4si)__A, (__v4si)__B); +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_comeq_epu32(__m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_vpcomequd ((__v4si)__A, (__v4si)__B); +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_comneq_epu32(__m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_vpcomnequd ((__v4si)__A, (__v4si)__B); +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_comfalse_epu32(__m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_vpcomfalseud ((__v4si)__A, (__v4si)__B); +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_comtrue_epu32(__m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_vpcomtrueud ((__v4si)__A, (__v4si)__B); +} + +/*pcom (integer, unsinged quad words) */ + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_comlt_epu64(__m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_vpcomltuq ((__v2di)__A, (__v2di)__B); +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_comle_epu64(__m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_vpcomleuq ((__v2di)__A, (__v2di)__B); +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_comgt_epu64(__m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_vpcomgtuq ((__v2di)__A, (__v2di)__B); +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_comge_epu64(__m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_vpcomgeuq ((__v2di)__A, (__v2di)__B); +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_comeq_epu64(__m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_vpcomequq ((__v2di)__A, (__v2di)__B); +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_comneq_epu64(__m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_vpcomnequq ((__v2di)__A, (__v2di)__B); +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_comfalse_epu64(__m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_vpcomfalseuq ((__v2di)__A, (__v2di)__B); +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_comtrue_epu64(__m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_vpcomtrueuq ((__v2di)__A, (__v2di)__B); +} + +/*pcom (integer, signed bytes) */ + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_comlt_epi8(__m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_vpcomltb ((__v16qi)__A, (__v16qi)__B); +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_comle_epi8(__m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_vpcomleb ((__v16qi)__A, (__v16qi)__B); +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_comgt_epi8(__m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_vpcomgtb ((__v16qi)__A, (__v16qi)__B); +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_comge_epi8(__m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_vpcomgeb ((__v16qi)__A, (__v16qi)__B); +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_comeq_epi8(__m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_vpcomeqb ((__v16qi)__A, (__v16qi)__B); +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_comneq_epi8(__m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_vpcomneqb ((__v16qi)__A, (__v16qi)__B); +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_comfalse_epi8(__m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_vpcomfalseb ((__v16qi)__A, (__v16qi)__B); +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_comtrue_epi8(__m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_vpcomtrueb ((__v16qi)__A, (__v16qi)__B); +} + +/*pcom (integer, signed words) */ + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_comlt_epi16(__m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_vpcomltw ((__v8hi)__A, (__v8hi)__B); +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_comle_epi16(__m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_vpcomlew ((__v8hi)__A, (__v8hi)__B); +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_comgt_epi16(__m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_vpcomgtw ((__v8hi)__A, (__v8hi)__B); +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_comge_epi16(__m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_vpcomgew ((__v8hi)__A, (__v8hi)__B); +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_comeq_epi16(__m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_vpcomeqw ((__v8hi)__A, (__v8hi)__B); +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_comneq_epi16(__m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_vpcomneqw ((__v8hi)__A, (__v8hi)__B); +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_comfalse_epi16(__m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_vpcomfalsew ((__v8hi)__A, (__v8hi)__B); +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_comtrue_epi16(__m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_vpcomtruew ((__v8hi)__A, (__v8hi)__B); +} + +/*pcom (integer, signed double words) */ + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_comlt_epi32(__m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_vpcomltd ((__v4si)__A, (__v4si)__B); +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_comle_epi32(__m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_vpcomled ((__v4si)__A, (__v4si)__B); +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_comgt_epi32(__m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_vpcomgtd ((__v4si)__A, (__v4si)__B); +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_comge_epi32(__m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_vpcomged ((__v4si)__A, (__v4si)__B); +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_comeq_epi32(__m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_vpcomeqd ((__v4si)__A, (__v4si)__B); +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_comneq_epi32(__m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_vpcomneqd ((__v4si)__A, (__v4si)__B); +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_comfalse_epi32(__m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_vpcomfalsed ((__v4si)__A, (__v4si)__B); +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_comtrue_epi32(__m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_vpcomtrued ((__v4si)__A, (__v4si)__B); +} + +/*pcom (integer, signed quad words) */ + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_comlt_epi64(__m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_vpcomltq ((__v2di)__A, (__v2di)__B); +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_comle_epi64(__m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_vpcomleq ((__v2di)__A, (__v2di)__B); +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_comgt_epi64(__m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_vpcomgtq ((__v2di)__A, (__v2di)__B); +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_comge_epi64(__m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_vpcomgeq ((__v2di)__A, (__v2di)__B); +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_comeq_epi64(__m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_vpcomeqq ((__v2di)__A, (__v2di)__B); +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_comneq_epi64(__m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_vpcomneqq ((__v2di)__A, (__v2di)__B); +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_comfalse_epi64(__m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_vpcomfalseq ((__v2di)__A, (__v2di)__B); +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_comtrue_epi64(__m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_vpcomtrueq ((__v2di)__A, (__v2di)__B); +} + +/* FRCZ */ + +extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_frcz_ps (__m128 __A) +{ + return (__m128) __builtin_ia32_vfrczps ((__v4sf)__A); +} + +extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_frcz_pd (__m128d __A) +{ + return (__m128d) __builtin_ia32_vfrczpd ((__v2df)__A); +} + +extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_frcz_ss (__m128 __A, __m128 __B) +{ + return (__m128) __builtin_ia32_vfrczss ((__v4sf)__A, (__v4sf)__B); +} + +extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_frcz_sd (__m128d __A, __m128d __B) +{ + return (__m128d) __builtin_ia32_vfrczsd ((__v2df)__A, (__v2df)__B); +} + +extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_frcz_ps (__m256 __A) +{ + return (__m256) __builtin_ia32_vfrczps256 ((__v8sf)__A); +} + +extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_frcz_pd (__m256d __A) +{ + return (__m256d) __builtin_ia32_vfrczpd256 ((__v4df)__A); +} + +#endif /* __XOP__ */ + +#endif /* _XOPMMINTRIN_H_INCLUDED */ diff --git a/gcc/config/m32c/m32c.h b/gcc/config/m32c/m32c.h index c0914d3be02..78e3115c291 100644 --- a/gcc/config/m32c/m32c.h +++ b/gcc/config/m32c/m32c.h @@ -560,7 +560,6 @@ typedef struct m32c_cumulative_args #define HAVE_PRE_DECREMENT 1 #define HAVE_POST_INCREMENT 1 -#define CONSTANT_ADDRESS_P(X) CONSTANT_P(X) #define MAX_REGS_PER_ADDRESS 1 /* This is passed to the macros below, so that they can be implemented diff --git a/gcc/config/m68hc11/m68hc11.h b/gcc/config/m68hc11/m68hc11.h index ee0f9f67fca..278ba15c4fa 100644 --- a/gcc/config/m68hc11/m68hc11.h +++ b/gcc/config/m68hc11/m68hc11.h @@ -1108,9 +1108,6 @@ extern unsigned char m68hc11_reg_valid_for_index[FIRST_PSEUDO_REGISTER]; && (GET_CODE (XEXP (operand, 0)) == POST_INC) \ && (SP_REG_P (XEXP (XEXP (operand, 0), 0)))) -/* 1 if X is an rtx for a constant that is a valid address. */ -#define CONSTANT_ADDRESS_P(X) (CONSTANT_P (X)) - /* Maximum number of registers that can appear in a valid memory address */ #define MAX_REGS_PER_ADDRESS 2 diff --git a/gcc/config/m68k/m68k.c b/gcc/config/m68k/m68k.c index 0862936b1b4..8db98fc4f46 100644 --- a/gcc/config/m68k/m68k.c +++ b/gcc/config/m68k/m68k.c @@ -1399,6 +1399,30 @@ flags_in_68881 (void) return cc_status.flags & CC_IN_68881; } +/* Return true if PARALLEL contains register REGNO. */ +static bool +m68k_reg_present_p (const_rtx parallel, unsigned int regno) +{ + int i; + + if (REG_P (parallel) && REGNO (parallel) == regno) + return true; + + if (GET_CODE (parallel) != PARALLEL) + return false; + + for (i = 0; i < XVECLEN (parallel, 0); ++i) + { + const_rtx x; + + x = XEXP (XVECEXP (parallel, 0, i), 0); + if (REG_P (x) && REGNO (x) == regno) + return true; + } + + return false; +} + /* Implement TARGET_FUNCTION_OK_FOR_SIBCALL_P. */ static bool @@ -1411,6 +1435,26 @@ m68k_ok_for_sibcall_p (tree decl, tree exp) if (CALL_EXPR_STATIC_CHAIN (exp)) return false; + if (!VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl)))) + { + /* Check that the return value locations are the same. For + example that we aren't returning a value from the sibling in + a D0 register but then need to transfer it to a A0 register. */ + rtx cfun_value; + rtx call_value; + + cfun_value = FUNCTION_VALUE (TREE_TYPE (DECL_RESULT (cfun->decl)), + cfun->decl); + call_value = FUNCTION_VALUE (TREE_TYPE (exp), decl); + + /* Check that the values are equal or that the result the callee + function returns is superset of what the current function returns. */ + if (!(rtx_equal_p (cfun_value, call_value) + || (REG_P (cfun_value) + && m68k_reg_present_p (call_value, REGNO (cfun_value))))) + return false; + } + kind = m68k_get_function_kind (current_function_decl); if (kind == m68k_fk_normal_function) /* We can always sibcall from a normal function, because it's @@ -5188,6 +5232,9 @@ m68k_libcall_value (enum machine_mode mode) return gen_rtx_REG (mode, m68k_libcall_value_in_a0_p ? A0_REG : D0_REG); } +/* Location in which function value is returned. + NOTE: Due to differences in ABIs, don't call this function directly, + use FUNCTION_VALUE instead. */ rtx m68k_function_value (const_tree valtype, const_tree func ATTRIBUTE_UNUSED) { diff --git a/gcc/config/mep/mep.h b/gcc/config/mep/mep.h index 8b00a444ce2..9d286e33b94 100644 --- a/gcc/config/mep/mep.h +++ b/gcc/config/mep/mep.h @@ -567,8 +567,6 @@ typedef struct #define TRAMPOLINE_SIZE 20 -#define CONSTANT_ADDRESS_P(X) CONSTANT_P (X) - #define MAX_REGS_PER_ADDRESS 1 #ifdef REG_OK_STRICT diff --git a/gcc/config/mips/iris.h b/gcc/config/mips/iris.h index fce82174e66..373691ee6e1 100644 --- a/gcc/config/mips/iris.h +++ b/gcc/config/mips/iris.h @@ -63,9 +63,6 @@ along with GCC; see the file COPYING3. If not see #undef ASM_FINISH_DECLARE_OBJECT #define ASM_FINISH_DECLARE_OBJECT mips_finish_declare_object -/* The linker needs a space after "-o". */ -#define SWITCHES_NEED_SPACES "o" - /* Specify wchar_t types. */ #undef WCHAR_TYPE #define WCHAR_TYPE (Pmode == DImode ? "int" : "long int") diff --git a/gcc/config/mips/mips.md b/gcc/config/mips/mips.md index 5005bf7f0fb..76fc37bd479 100644 --- a/gcc/config/mips/mips.md +++ b/gcc/config/mips/mips.md @@ -1930,7 +1930,7 @@ (set (match_dup 0) (match_dup 5)) (set (match_dup 4) (unspec:DI [(match_dup 3)] UNSPEC_MFHI)) - ;; Zero-extend OP4. + ;; Zero-extend OP0. (set (match_dup 0) (ashift:DI (match_dup 0) (const_int 32))) @@ -1938,7 +1938,7 @@ (lshiftrt:DI (match_dup 0) (const_int 32))) - ;; Shift OP0 into place. + ;; Shift OP4 into place. (set (match_dup 4) (ashift:DI (match_dup 4) (const_int 32))) diff --git a/gcc/config/mn10300/mn10300.h b/gcc/config/mn10300/mn10300.h index bdbc948ac12..c732aa07180 100644 --- a/gcc/config/mn10300/mn10300.h +++ b/gcc/config/mn10300/mn10300.h @@ -600,10 +600,6 @@ struct cum_arg {int nbytes; }; ? gen_rtx_MEM (Pmode, arg_pointer_rtx) \ : (rtx) 0) -/* 1 if X is an rtx for a constant that is a valid address. */ - -#define CONSTANT_ADDRESS_P(X) (CONSTANT_P (X) && GET_CODE (X) != CONST_DOUBLE) - /* Maximum number of registers that can appear in a valid memory address. */ #define MAX_REGS_PER_ADDRESS 2 diff --git a/gcc/config/moxie/moxie.h b/gcc/config/moxie/moxie.h index f1b77eaf0c2..384bce4a986 100644 --- a/gcc/config/moxie/moxie.h +++ b/gcc/config/moxie/moxie.h @@ -475,10 +475,6 @@ enum reg_class an immediate operand on the target machine. */ #define LEGITIMATE_CONSTANT_P(X) 1 -/* A C expression that is 1 if the RTX X is a constant which is a - valid address. */ -#define CONSTANT_ADDRESS_P(X) CONSTANT_P(X) - /* A number, the maximum number of registers that can appear in a valid memory address. */ #define MAX_REGS_PER_ADDRESS 1 diff --git a/gcc/config/pdp11/pdp11.h b/gcc/config/pdp11/pdp11.h index 8997612ba5a..fe8c9e8aed3 100644 --- a/gcc/config/pdp11/pdp11.h +++ b/gcc/config/pdp11/pdp11.h @@ -594,10 +594,6 @@ extern int may_call_alloca; #define MAX_REGS_PER_ADDRESS 1 -/* Recognize any constant value that is a valid address. */ - -#define CONSTANT_ADDRESS_P(X) CONSTANT_P (X) - /* Nonzero if the constant value X is a legitimate general operand. It is given that X satisfies CONSTANT_P or is a CONST_DOUBLE. */ diff --git a/gcc/config/picochip/picochip.h b/gcc/config/picochip/picochip.h index 44559f22333..4d0c96278e0 100644 --- a/gcc/config/picochip/picochip.h +++ b/gcc/config/picochip/picochip.h @@ -471,8 +471,6 @@ extern const enum reg_class picochip_regno_reg_class[FIRST_PSEUDO_REGISTER]; /* Addressing Modes */ -#define CONSTANT_ADDRESS_P(X) CONSTANT_P(X) - #define MAX_REGS_PER_ADDRESS 1 /* Legitimize reload address tries machine dependent means of diff --git a/gcc/config/rx/constraints.md b/gcc/config/rx/constraints.md index f15b586afb5..52bf7df3621 100644 --- a/gcc/config/rx/constraints.md +++ b/gcc/config/rx/constraints.md @@ -55,7 +55,7 @@ ;; This constraint is used by the SUBSI3 pattern because the ;; RX SUB instruction can only take a 4-bit unsigned integer -;; value. +;; value. Also used by the MVTIPL instruction. (define_constraint "Uint04" "@internal An unsigned 4-bit immediate value" (and (match_code "const_int") diff --git a/gcc/config/rx/predicates.md b/gcc/config/rx/predicates.md index 75cf8ebaed8..d7a363ebb88 100644 --- a/gcc/config/rx/predicates.md +++ b/gcc/config/rx/predicates.md @@ -117,16 +117,22 @@ /* Check that the next element is the first push. */ element = XVECEXP (op, 0, 1); if ( ! SET_P (element) + || ! REG_P (SET_SRC (element)) + || GET_MODE (SET_SRC (element)) != SImode || ! MEM_P (SET_DEST (element)) - || ! REG_P (XEXP (SET_DEST (element), 0)) - || REGNO (XEXP (SET_DEST (element), 0)) != SP_REG - || ! REG_P (SET_SRC (element))) + || GET_MODE (SET_DEST (element)) != SImode + || GET_CODE (XEXP (SET_DEST (element), 0)) != MINUS + || ! REG_P (XEXP (XEXP (SET_DEST (element), 0), 0)) + || REGNO (XEXP (XEXP (SET_DEST (element), 0), 0)) != SP_REG + || ! CONST_INT_P (XEXP (XEXP (SET_DEST (element), 0), 1)) + || INTVAL (XEXP (XEXP (SET_DEST (element), 0), 1)) + != GET_MODE_SIZE (SImode)) return false; src_regno = REGNO (SET_SRC (element)); /* Check that the remaining elements use SP-<disp> - addressing and incremental register numbers. */ + addressing and decreasing register numbers. */ for (i = 2; i < count; i++) { element = XVECEXP (op, 0, i); @@ -134,7 +140,7 @@ if ( ! SET_P (element) || ! REG_P (SET_SRC (element)) || GET_MODE (SET_SRC (element)) != SImode - || REGNO (SET_SRC (element)) != src_regno + (i - 1) + || REGNO (SET_SRC (element)) != src_regno - (i - 1) || ! MEM_P (SET_DEST (element)) || GET_MODE (SET_DEST (element)) != SImode || GET_CODE (XEXP (SET_DEST (element), 0)) != MINUS @@ -142,7 +148,7 @@ || REGNO (XEXP (XEXP (SET_DEST (element), 0), 0)) != SP_REG || ! CONST_INT_P (XEXP (XEXP (SET_DEST (element), 0), 1)) || INTVAL (XEXP (XEXP (SET_DEST (element), 0), 1)) - != (i - 1) * GET_MODE_SIZE (SImode)) + != i * GET_MODE_SIZE (SImode)) return false; } return true; diff --git a/gcc/config/rx/rx.c b/gcc/config/rx/rx.c index cf2b098e83c..885f52581de 100644 --- a/gcc/config/rx/rx.c +++ b/gcc/config/rx/rx.c @@ -51,6 +51,8 @@ #include "target-def.h" #include "langhooks.h" +enum rx_cpu_types rx_cpu_type = RX600; + /* Return true if OP is a reference to an object in a small data area. */ static bool @@ -249,7 +251,6 @@ rx_is_mode_dependent_addr (rtx addr) } } - /* A C compound statement to output to stdio stream FILE the assembler syntax for an instruction operand that is a memory reference whose address is ADDR. */ @@ -445,8 +446,13 @@ rx_print_operand (FILE * file, rtx op, int letter) fprintf (file, "%s", reg_names [REGNO (op) + (WORDS_BIG_ENDIAN ? 0 : 1)]); else if (CONST_INT_P (op)) { + HOST_WIDE_INT v = INTVAL (op); + fprintf (file, "#"); - rx_print_integer (file, INTVAL (op) >> 32); + /* Trickery to avoid problems with shifting 32 bits at a time. */ + v = v >> 16; + v = v >> 16; + rx_print_integer (file, v); } else { @@ -840,22 +846,20 @@ has_func_attr (const_tree decl, const char * func_attr) return lookup_attribute (func_attr, DECL_ATTRIBUTES (decl)) != NULL_TREE; } -/* Returns true if the provided function has - the "[fast_]interrupt" attribute. */ +/* Returns true if the provided function has the "fast_interrupt" attribute. */ static inline bool is_fast_interrupt_func (const_tree decl) { - return has_func_attr (decl, "interrupt") - || has_func_attr (decl, "fast_interrupt") ; + return has_func_attr (decl, "fast_interrupt"); } -/* Returns true if the provided function has the "exception" attribute. */ +/* Returns true if the provided function has the "interrupt" attribute. */ static inline bool -is_exception_func (const_tree decl) +is_interrupt_func (const_tree decl) { - return has_func_attr (decl, "exception"); + return has_func_attr (decl, "interrupt"); } /* Returns true if the provided function has the "naked" attribute. */ @@ -945,8 +949,8 @@ rx_set_current_function (tree fndecl) { /* Remember the last target of rx_set_current_function. */ static tree rx_previous_fndecl; - bool prev_was_interrupt; - bool current_is_interrupt; + bool prev_was_fast_interrupt; + bool current_is_fast_interrupt; /* Only change the context if the function changes. This hook is called several times in the course of compiling a function, and we don't want @@ -954,18 +958,19 @@ rx_set_current_function (tree fndecl) if (fndecl == rx_previous_fndecl) return; - prev_was_interrupt + prev_was_fast_interrupt = rx_previous_fndecl ? is_fast_interrupt_func (rx_previous_fndecl) : false; - current_is_interrupt + + current_is_fast_interrupt = fndecl ? is_fast_interrupt_func (fndecl) : false; - if (prev_was_interrupt != current_is_interrupt) + if (prev_was_fast_interrupt != current_is_fast_interrupt) { - use_fixed_regs = current_is_interrupt; + use_fixed_regs = current_is_fast_interrupt; target_reinit (); } - + rx_previous_fndecl = fndecl; } @@ -1057,8 +1062,8 @@ rx_get_stack_layout (unsigned int * lowest, if (df_regs_ever_live_p (reg) && (! call_used_regs[reg] /* Even call clobbered registered must - be pushed inside exception handlers. */ - || is_exception_func (NULL_TREE))) + be pushed inside interrupt handlers. */ + || is_interrupt_func (NULL_TREE))) { if (low == 0) low = reg; @@ -1142,9 +1147,8 @@ rx_emit_stack_pushm (rtx * operands) gcc_assert (REG_P (first_push)); asm_fprintf (asm_out_file, "\tpushm\t%s-%s\n", - reg_names [REGNO (first_push)], - reg_names [REGNO (first_push) + last_reg]); - + reg_names [REGNO (first_push) - last_reg], + reg_names [REGNO (first_push)]); } /* Generate a PARALLEL that will pass the rx_store_multiple_vector predicate. */ @@ -1167,14 +1171,30 @@ gen_rx_store_vector (unsigned int low, unsigned int high) XVECEXP (vector, 0, i + 1) = gen_rtx_SET (SImode, gen_rtx_MEM (SImode, - i == 0 ? stack_pointer_rtx - : gen_rtx_MINUS (SImode, stack_pointer_rtx, - GEN_INT (i * UNITS_PER_WORD))), - gen_rtx_REG (SImode, low + i)); - + gen_rtx_MINUS (SImode, stack_pointer_rtx, + GEN_INT ((i + 1) * UNITS_PER_WORD))), + gen_rtx_REG (SImode, high - i)); return vector; } +/* Mark INSN as being frame related. If it is a PARALLEL + then mark each element as being frame related as well. */ + +static void +mark_frame_related (rtx insn) +{ + RTX_FRAME_RELATED_P (insn) = 1; + insn = PATTERN (insn); + + if (GET_CODE (insn) == PARALLEL) + { + unsigned int i; + + for (i = 0; i < XVECLEN (insn, 0); i++) + RTX_FRAME_RELATED_P (XVECEXP (insn, 0, i)) = 1; + } +} + void rx_expand_prologue (void) { @@ -1183,6 +1203,7 @@ rx_expand_prologue (void) unsigned int mask; unsigned int low; unsigned int high; + unsigned int reg; rtx insn; /* Naked functions use their own, programmer provided prologues. */ @@ -1196,14 +1217,12 @@ rx_expand_prologue (void) /* If we use any of the callee-saved registers, save them now. */ if (mask) { - unsigned int reg; - /* Push registers in reverse order. */ for (reg = FIRST_PSEUDO_REGISTER; reg --;) if (mask & (1 << reg)) { insn = emit_insn (gen_stack_push (gen_rtx_REG (SImode, reg))); - RTX_FRAME_RELATED_P (insn) = 1; + mark_frame_related (insn); } } else if (low) @@ -1214,7 +1233,57 @@ rx_expand_prologue (void) insn = emit_insn (gen_stack_pushm (GEN_INT (((high - low) + 1) * UNITS_PER_WORD), gen_rx_store_vector (low, high))); - RTX_FRAME_RELATED_P (insn) = 1; + mark_frame_related (insn); + } + + if (is_interrupt_func (NULL_TREE) && TARGET_SAVE_ACC_REGISTER) + { + unsigned int acc_high, acc_low; + + /* Interrupt handlers have to preserve the accumulator + register if so requested by the user. Use the first + two pushed register as intermediaries. */ + if (mask) + { + acc_low = acc_high = 0; + + for (reg = 1; reg < FIRST_PSEUDO_REGISTER; reg ++) + if (mask & (1 << reg)) + { + if (acc_low == 0) + acc_low = reg; + else + { + acc_high = reg; + break; + } + } + + /* We have assumed that there are at least two registers pushed... */ + gcc_assert (acc_high != 0); + + /* Note - the bottom 16 bits of the accumulator are inaccessible. + We just assume that they are zero. */ + emit_insn (gen_mvfacmi (gen_rtx_REG (SImode, acc_low))); + emit_insn (gen_mvfachi (gen_rtx_REG (SImode, acc_high))); + emit_insn (gen_stack_push (gen_rtx_REG (SImode, acc_low))); + emit_insn (gen_stack_push (gen_rtx_REG (SImode, acc_high))); + } + else + { + acc_low = low; + acc_high = low + 1; + + /* We have assumed that there are at least two registers pushed... */ + gcc_assert (acc_high <= high); + + emit_insn (gen_mvfacmi (gen_rtx_REG (SImode, acc_low))); + emit_insn (gen_mvfachi (gen_rtx_REG (SImode, acc_high))); + emit_insn (gen_stack_pushm (GEN_INT (2 * UNITS_PER_WORD), + gen_rx_store_vector (acc_low, acc_high))); + } + + frame_size += 2 * UNITS_PER_WORD; } /* If needed, set up the frame pointer. */ @@ -1270,8 +1339,8 @@ rx_output_function_prologue (FILE * file, if (is_fast_interrupt_func (NULL_TREE)) asm_fprintf (file, "\t; Note: Fast Interrupt Handler\n"); - if (is_exception_func (NULL_TREE)) - asm_fprintf (file, "\t; Note: Exception Handler\n"); + if (is_interrupt_func (NULL_TREE)) + asm_fprintf (file, "\t; Note: Interrupt Handler\n"); if (is_naked_func (NULL_TREE)) asm_fprintf (file, "\t; Note: Naked Function\n"); @@ -1382,6 +1451,7 @@ rx_expand_epilogue (bool is_sibcall) unsigned int stack_size; unsigned int register_mask; unsigned int regs_size; + unsigned int reg; unsigned HOST_WIDE_INT total_size; if (is_naked_func (NULL_TREE)) @@ -1407,14 +1477,14 @@ rx_expand_epilogue (bool is_sibcall) their caller. Instead they branch to their sibling and allow their return instruction to return to this function's parent. - - Fast interrupt and exception handling functions have to use special + - Fast and normal interrupt handling functions have to use special return instructions. - Functions where we have pushed a fragmented set of registers into the call-save area must have the same set of registers popped. */ if (is_sibcall || is_fast_interrupt_func (NULL_TREE) - || is_exception_func (NULL_TREE) + || is_interrupt_func (NULL_TREE) || register_mask) { /* Cannot use the special instructions - deconstruct by hand. */ @@ -1422,10 +1492,47 @@ rx_expand_epilogue (bool is_sibcall) emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, GEN_INT (total_size))); - if (register_mask) + if (is_interrupt_func (NULL_TREE) && TARGET_SAVE_ACC_REGISTER) { - unsigned int reg; + unsigned int acc_low, acc_high; + + /* Reverse the saving of the accumulator register onto the stack. + Note we must adjust the saved "low" accumulator value as it + is really the middle 32-bits of the accumulator. */ + if (register_mask) + { + acc_low = acc_high = 0; + for (reg = 1; reg < FIRST_PSEUDO_REGISTER; reg ++) + if (register_mask & (1 << reg)) + { + if (acc_low == 0) + acc_low = reg; + else + { + acc_high = reg; + break; + } + } + emit_insn (gen_stack_pop (gen_rtx_REG (SImode, acc_high))); + emit_insn (gen_stack_pop (gen_rtx_REG (SImode, acc_low))); + } + else + { + acc_low = low; + acc_high = low + 1; + emit_insn (gen_stack_popm (GEN_INT (2 * UNITS_PER_WORD), + gen_rx_popm_vector (acc_low, acc_high))); + } + + emit_insn (gen_ashlsi3 (gen_rtx_REG (SImode, acc_low), + gen_rtx_REG (SImode, acc_low), + GEN_INT (16))); + emit_insn (gen_mvtaclo (gen_rtx_REG (SImode, acc_low))); + emit_insn (gen_mvtachi (gen_rtx_REG (SImode, acc_high))); + } + if (register_mask) + { for (reg = 0; reg < FIRST_PSEUDO_REGISTER; reg ++) if (register_mask & (1 << reg)) emit_insn (gen_stack_pop (gen_rtx_REG (SImode, reg))); @@ -1441,7 +1548,7 @@ rx_expand_epilogue (bool is_sibcall) if (is_fast_interrupt_func (NULL_TREE)) emit_jump_insn (gen_fast_interrupt_return ()); - else if (is_exception_func (NULL_TREE)) + else if (is_interrupt_func (NULL_TREE)) emit_jump_insn (gen_exception_return ()); else if (! is_sibcall) emit_jump_insn (gen_simple_return ()); @@ -1670,6 +1777,7 @@ enum rx_builtin RX_BUILTIN_MVTACHI, RX_BUILTIN_MVTACLO, RX_BUILTIN_MVTC, + RX_BUILTIN_MVTIPL, RX_BUILTIN_RACW, RX_BUILTIN_REVW, RX_BUILTIN_RMPA, @@ -1725,6 +1833,7 @@ rx_init_builtins (void) ADD_RX_BUILTIN1 (RMPA, "rmpa", void, void); ADD_RX_BUILTIN1 (MVFC, "mvfc", intSI, integer); ADD_RX_BUILTIN2 (MVTC, "mvtc", void, integer, integer); + ADD_RX_BUILTIN1 (MVTIPL, "mvtipl", void, integer); ADD_RX_BUILTIN1 (RACW, "racw", void, integer); ADD_RX_BUILTIN1 (ROUND, "round", intSI, float); ADD_RX_BUILTIN1 (REVW, "revw", intSI, intSI); @@ -1733,20 +1842,6 @@ rx_init_builtins (void) } static rtx -rx_expand_builtin_stz (rtx arg, rtx target, rtx (* gen_func)(rtx, rtx)) -{ - if (! CONST_INT_P (arg)) - return NULL_RTX; - - if (target == NULL_RTX || ! REG_P (target)) - target = gen_reg_rtx (SImode); - - emit_insn (gen_func (target, arg)); - - return target; -} - -static rtx rx_expand_void_builtin_1_arg (rtx arg, rtx (* gen_func)(rtx), bool reg) { if (reg && ! REG_P (arg)) @@ -1791,6 +1886,21 @@ rx_expand_builtin_mvfc (tree t_arg, rtx target) } static rtx +rx_expand_builtin_mvtipl (rtx arg) +{ + /* The RX610 does not support the MVTIPL instruction. */ + if (rx_cpu_type == RX610) + return NULL_RTX; + + if (! CONST_INT_P (arg) || ! IN_RANGE (arg, 0, (1 << 4) - 1)) + return NULL_RTX; + + emit_insn (gen_mvtipl (arg)); + + return NULL_RTX; +} + +static rtx rx_expand_builtin_mac (tree exp, rtx (* gen_func)(rtx, rtx)) { rtx arg1 = expand_normal (CALL_EXPR_ARG (exp, 0)); @@ -1887,6 +1997,7 @@ rx_expand_builtin (tree exp, case RX_BUILTIN_RMPA: emit_insn (gen_rmpa ()); return NULL_RTX; case RX_BUILTIN_MVFC: return rx_expand_builtin_mvfc (arg, target); case RX_BUILTIN_MVTC: return rx_expand_builtin_mvtc (exp); + case RX_BUILTIN_MVTIPL: return rx_expand_builtin_mvtipl (op); case RX_BUILTIN_RACW: return rx_expand_void_builtin_1_arg (op, gen_racw, false); case RX_BUILTIN_ROUND: return rx_expand_builtin_round (op, target); @@ -1945,7 +2056,7 @@ rx_elf_asm_destructor (rtx symbol, int priority) rx_elf_asm_cdtor (symbol, priority, /* is_ctor= */false); } -/* Check "interrupt", "exception" and "naked" attributes. */ +/* Check "fast_interrupt", "interrupt" and "naked" attributes. */ static tree rx_handle_func_attribute (tree * node, @@ -1975,9 +2086,8 @@ rx_handle_func_attribute (tree * node, const struct attribute_spec rx_attribute_table[] = { /* Name, min_len, max_len, decl_req, type_req, fn_type_req, handler. */ - { "interrupt", 0, 0, true, false, false, rx_handle_func_attribute }, { "fast_interrupt", 0, 0, true, false, false, rx_handle_func_attribute }, - { "exception", 0, 0, true, false, false, rx_handle_func_attribute }, + { "interrupt", 0, 0, true, false, false, rx_handle_func_attribute }, { "naked", 0, 0, true, false, false, rx_handle_func_attribute }, { NULL, 0, 0, false, false, false, NULL } }; @@ -1993,7 +2103,7 @@ static bool rx_func_attr_inlinable (const_tree decl) { return ! is_fast_interrupt_func (decl) - && ! is_exception_func (decl) + && ! is_interrupt_func (decl) && ! is_naked_func (decl); } @@ -2115,6 +2225,20 @@ rx_is_legitimate_constant (rtx x) ( 1 << (rx_max_constant_size * 8))); } +/* This is a tri-state variable. The default value of 0 means that the user + has specified neither -mfpu nor -mnofpu on the command line. In this case + the selection of RX FPU instructions is entirely based upon the size of + the floating point object and whether unsafe math optimizations were + enabled. If 32-bit doubles have been enabled then both floats and doubles + can make use of FPU instructions, otherwise only floats may do so. + + If the value is 1 then the user has specified -mfpu and the FPU + instructions should be used. Unsafe math optimizations will automatically + be enabled and doubles set to 32-bits. If the value is -1 then -mnofpu + has been specified and FPU instructions will not be used, even if unsafe + math optimizations have been enabled. */ +int rx_enable_fpu = 0; + /* Extra processing for target specific command line options. */ static bool @@ -2122,6 +2246,27 @@ rx_handle_option (size_t code, const char * arg ATTRIBUTE_UNUSED, int value) { switch (code) { + /* -mfpu enables the use of RX FPU instructions. This implies the use + of 32-bit doubles and also the enabling of fast math optimizations. + (Since the RX FPU instructions are not IEEE compliant). The -mnofpu + option disables the use of RX FPU instructions, but does not make + place any constraints on the size of doubles or the use of fast math + optimizations. + + The selection of 32-bit vs 64-bit doubles is handled by the setting + of the 32BIT_DOUBLES mask in the rx.opt file. Enabling fast math + optimizations is performed in OVERRIDE_OPTIONS since if it was done + here it could be overridden by a -fno-fast-math option specified + *earlier* on the command line. (Target specific options are + processed before generic ones). */ + case OPT_fpu: + rx_enable_fpu = 1; + break; + + case OPT_nofpu: + rx_enable_fpu = -1; + break; + case OPT_mint_register_: switch (value) { @@ -2145,12 +2290,21 @@ rx_handle_option (size_t code, const char * arg ATTRIBUTE_UNUSED, int value) break; case OPT_mmax_constant_size_: - /* Make sure that the the -mmax-constant_size option is in range. */ + /* Make sure that the -mmax-constant_size option is in range. */ return IN_RANGE (value, 0, 4); + case OPT_mcpu_: + case OPT_patch_: + if (strcasecmp (arg, "RX610") == 0) + rx_cpu_type = RX610; + /* FIXME: Should we check for non-RX cpu names here ? */ + break; + default: - return true; + break; } + + return true; } static int diff --git a/gcc/config/rx/rx.h b/gcc/config/rx/rx.h index a01e194910b..bb7cf7f1e3e 100644 --- a/gcc/config/rx/rx.h +++ b/gcc/config/rx/rx.h @@ -24,18 +24,24 @@ { \ builtin_define ("__RX__"); \ builtin_assert ("cpu=RX"); \ - builtin_assert ("machine=RX"); \ + if (rx_cpu_type == RX610) \ + builtin_assert ("machine=RX610"); \ + else \ + builtin_assert ("machine=RX600"); \ \ if (TARGET_BIG_ENDIAN_DATA) \ builtin_define ("__RX_BIG_ENDIAN__"); \ else \ builtin_define ("__RX_LITTLE_ENDIAN__");\ \ - if (TARGET_64BIT_DOUBLES) \ - builtin_define ("__RX_64BIT_DOUBLES__");\ - else \ + if (TARGET_32BIT_DOUBLES) \ builtin_define ("__RX_32BIT_DOUBLES__");\ + else \ + builtin_define ("__RX_64BIT_DOUBLES__");\ \ + if (ALLOW_RX_FPU_INSNS) \ + builtin_define ("__RX_FPU_INSNS__"); \ + \ if (TARGET_AS100_SYNTAX) \ builtin_define ("__RX_AS100_SYNTAX__"); \ else \ @@ -43,6 +49,17 @@ } \ while (0) +enum rx_cpu_types +{ + RX600, + RX610 +}; + +extern enum rx_cpu_types rx_cpu_type; + +#undef CC1_SPEC +#define CC1_SPEC "%{mas100-syntax:%{gdwarf*:%e-mas100-syntax is incompatible with -gdwarf}}" + #undef STARTFILE_SPEC #define STARTFILE_SPEC "%{pg:gcrt0.o%s}%{!pg:crt0.o%s} crtbegin.o%s" @@ -52,7 +69,8 @@ #undef ASM_SPEC #define ASM_SPEC "\ %{mbig-endian-data:-mbig-endian-data} \ -%{m64bit-doubles:-m64bit-doubles} \ +%{m32bit-doubles:-m32bit-doubles} \ +%{!m32bit-doubles:-m64bit-doubles} \ %{msmall-data-limit*:-msmall-data-limit} \ %{mrelax:-relax} \ " @@ -88,16 +106,17 @@ #define LONG_LONG_TYPE_SIZE 64 #define FLOAT_TYPE_SIZE 32 -#define DOUBLE_TYPE_SIZE (TARGET_64BIT_DOUBLES ? 64 : 32) +#define DOUBLE_TYPE_SIZE (TARGET_32BIT_DOUBLES ? 32 : 64) #define LONG_DOUBLE_TYPE_SIZE DOUBLE_TYPE_SIZE -#ifdef __RX_64BIT_DOUBLES__ -#define LIBGCC2_LONG_DOUBLE_TYPE_SIZE 64 -#define LIBGCC2_DOUBLE_TYPE_SIZE 64 -#define LIBGCC2_HAS_DF_MODE 1 -#else +#ifdef __RX_32BIT_DOUBLES__ +#define LIBGCC2_HAS_DF_MODE 0 #define LIBGCC2_LONG_DOUBLE_TYPE_SIZE 32 #define LIBGCC2_DOUBLE_TYPE_SIZE 32 +#else +#define LIBGCC2_HAS_DF_MODE 1 +#define LIBGCC2_LONG_DOUBLE_TYPE_SIZE 64 +#define LIBGCC2_DOUBLE_TYPE_SIZE 64 #endif #define DEFAULT_SIGNED_CHAR 0 @@ -591,7 +610,6 @@ typedef unsigned int CUMULATIVE_ARGS; #define PRINT_OPERAND_ADDRESS(FILE, ADDR) \ rx_print_operand_address (FILE, ADDR) - #define CC_NO_CARRY 0400 #define NOTICE_UPDATE_CC(EXP, INSN) rx_notice_update_cc (EXP, INSN) @@ -614,19 +632,28 @@ extern int rx_float_compare_mode; #define PREFERRED_DEBUGGING_TYPE (TARGET_AS100_SYNTAX \ ? DBX_DEBUG : DWARF2_DEBUG) -#undef CC1_SPEC -#define CC1_SPEC "%{mas100-syntax:%{gdwarf*:%e-mas100-syntax is incompatible with -gdwarf}}" +#define INCOMING_FRAME_SP_OFFSET 4 +#define ARG_POINTER_CFA_OFFSET(FNDECL) 4 +#define FRAME_POINTER_CFA_OFFSET(FNDECL) 4 + +extern int rx_enable_fpu; /* For some unknown reason LTO compression is not working, at least on my local system. So set the default compression - level to none, for now. */ + level to none, for now. + + For an explanation of rx_flag_no_fpu see rx_handle_option(). */ #define OVERRIDE_OPTIONS \ do \ { \ if (flag_lto_compression_level == -1) \ flag_lto_compression_level = 0; \ + \ + if (rx_enable_fpu == 1) \ + set_fast_math_flags (true); \ } \ while (0) /* This macro is used to decide when RX FPU instructions can be used. */ -#define ALLOW_RX_FPU_INSNS flag_unsafe_math_optimizations +#define ALLOW_RX_FPU_INSNS ((rx_enable_fpu != -1) \ + && flag_unsafe_math_optimizations) diff --git a/gcc/config/rx/rx.md b/gcc/config/rx/rx.md index 165da4f41a1..360f6235558 100644 --- a/gcc/config/rx/rx.md +++ b/gcc/config/rx/rx.md @@ -27,8 +27,8 @@ ;; This code iterator is used for sign- and zero- extensions. (define_mode_iterator small_int_modes [(HI "") (QI "")]) -;; We do not handle DFmode here because by default it is -;; the same as SFmode, and if -m64bit-doubles is active +;; We do not handle DFmode here because it is either +;; the same as SFmode, or if -m64bit-doubles is active ;; then all operations on doubles have to be handled by ;; library functions. (define_mode_iterator register_modes @@ -75,15 +75,14 @@ (UNSPEC_BUILTIN_MVTACHI 41) (UNSPEC_BUILTIN_MVTACLO 42) (UNSPEC_BUILTIN_MVTC 43) - (UNSPEC_BUILTIN_MVTCP 44) - (UNSPEC_BUILTIN_OPEPC 45) - (UNSPEC_BUILTIN_RACW 46) - (UNSPEC_BUILTIN_REVW 47) - (UNSPEC_BUILTIN_RMPA 48) - (UNSPEC_BUILTIN_ROUND 49) - (UNSPEC_BUILTIN_SAT 50) - (UNSPEC_BUILTIN_SETPSW 51) - (UNSPEC_BUILTIN_WAIT 52) + (UNSPEC_BUILTIN_MVTIPL 44) + (UNSPEC_BUILTIN_RACW 45) + (UNSPEC_BUILTIN_REVW 46) + (UNSPEC_BUILTIN_RMPA 47) + (UNSPEC_BUILTIN_ROUND 48) + (UNSPEC_BUILTIN_SAT 49) + (UNSPEC_BUILTIN_SETPSW 50) + (UNSPEC_BUILTIN_WAIT 51) ] ) @@ -1002,10 +1001,8 @@ (set_attr "timings" "11,11,11,11,11,33") (set_attr "length" "3,4,5,6,7,6")] ) - + ;; Floating Point Instructions -;; These patterns are only enabled with -ffast-math because the RX FPU -;; cannot handle sub-normal values. (define_insn "addsf3" [(set (match_operand:SF 0 "register_operand" "=r,r,r") @@ -1298,7 +1295,6 @@ [(set_attr "length" "3,6") (set_attr "timings" "22")] ) - ;; Block move functions. @@ -1580,8 +1576,8 @@ ;; Move to Accumulator (high) (define_insn "mvtachi" - [(unspec:SI [(match_operand:SI 0 "register_operand" "r")] - UNSPEC_BUILTIN_MVTACHI)] + [(unspec_volatile:SI [(match_operand:SI 0 "register_operand" "r")] + UNSPEC_BUILTIN_MVTACHI)] "" "mvtachi\t%0" [(set_attr "length" "3")] @@ -1589,8 +1585,8 @@ ;; Move to Accumulator (low) (define_insn "mvtaclo" - [(unspec:SI [(match_operand:SI 0 "register_operand" "r")] - UNSPEC_BUILTIN_MVTACLO)] + [(unspec_volatile:SI [(match_operand:SI 0 "register_operand" "r")] + UNSPEC_BUILTIN_MVTACLO)] "" "mvtaclo\t%0" [(set_attr "length" "3")] @@ -1598,8 +1594,8 @@ ;; Round Accumulator (define_insn "racw" - [(unspec:SI [(match_operand:SI 0 "immediate_operand" "i")] - UNSPEC_BUILTIN_RACW)] + [(unspec_volatile:SI [(match_operand:SI 0 "immediate_operand" "i")] + UNSPEC_BUILTIN_RACW)] "" "racw\t%0" [(set_attr "length" "3")] @@ -1679,7 +1675,7 @@ ;; Move from control register (define_insn "mvfc" - [(set (match_operand:SI 0 "register_operand" "=r") + [(set (match_operand:SI 0 "register_operand" "=r") (unspec:SI [(match_operand:SI 1 "immediate_operand" "i")] UNSPEC_BUILTIN_MVFC))] "" @@ -1691,13 +1687,24 @@ (define_insn "mvtc" [(unspec:SI [(match_operand:SI 0 "immediate_operand" "i,i") (match_operand:SI 1 "nonmemory_operand" "r,i")] - UNSPEC_BUILTIN_MVTC) - (clobber (cc0))] + UNSPEC_BUILTIN_MVTC)] "" "mvtc\t%1, %C0" - [(set_attr "length" "3,7") - (set_attr "cc" "clobber")] ;; Just in case the control - ;; register selected is the psw. + [(set_attr "length" "3,7")] + ;; Ignore possible clobbering of the comparison flags in the + ;; PSW register. This is a cc0 target so any cc0 setting + ;; instruction will always be paired with a cc0 user, without + ;; the possibility of this instruction being placed in between + ;; them. +) + +;; Move to interrupt priority level +(define_insn "mvtipl" + [(unspec:SI [(match_operand:SI 0 "immediate_operand" "Uint04")] + UNSPEC_BUILTIN_MVTIPL)] + "" + "mvtipl\t%0" + [(set_attr "length" "3")] ) ;;---------- Interrupts ------------------------ @@ -1748,27 +1755,6 @@ [(set_attr "length" "5")] ) -;; Move to co-processor register -(define_insn "mvtcp" - [(unspec:SI [(match_operand:SI 0 "immediate_operand" "i,i") - (match_operand:SI 1 "nonmemory_operand" "i,r") - (match_operand:SI 2 "immediate_operand" "i,i")] - UNSPEC_BUILTIN_MVTCP)] - "" - "; mvtcp\t%0, %1, %2" - [(set_attr "length" "7,5")] -) - -;; Co-processor operation -(define_insn "opecp" - [(unspec:SI [(match_operand:SI 0 "immediate_operand" "i") - (match_operand:SI 1 "immediate_operand" "i")] - UNSPEC_BUILTIN_OPEPC)] - "" - "; opecp\t%0, %1" - [(set_attr "length" "5")] -) - ;;---------- Misc ------------------------ ;; Required by cfglayout.c... diff --git a/gcc/config/rx/rx.opt b/gcc/config/rx/rx.opt index 83e75bfba76..768d565b478 100644 --- a/gcc/config/rx/rx.opt +++ b/gcc/config/rx/rx.opt @@ -19,13 +19,31 @@ ; <http://www.gnu.org/licenses/>. ;--------------------------------------------------- +m32bit-doubles +Target RejectNegative Mask(32BIT_DOUBLES) +Stores doubles in 32 bits. + m64bit-doubles -Target RejectNegative Mask(64BIT_DOUBLES) -Store doubles in 64 bits. +Target RejectNegative InverseMask(32BIT_DOUBLES) +Store doubles in 64 bits. This is the default. -m32bit-doubles -Target RejectNegative InverseMask(64BIT_DOUBLES) -Stores doubles in 32 bits. This is the default. +fpu +Target RejectNegative Mask(32BIT_DOUBLES) MaskExists +Enable the use of RX FPU instructions. + +nofpu +Target RejectNegative InverseMask(32BIT_DOUBLES) MaskExists +Disable the use of RX FPU instructions. + +;--------------------------------------------------- + +mcpu= +Target RejectNegative Joined Var(rx_cpu_name) +Specify the target RX cpu type. + +patch= +Target RejectNegative Joined Var(rx_cpu_name) +Alias for -mcpu. ;--------------------------------------------------- @@ -72,3 +90,9 @@ Maximum size in bytes of constant values allowed as operands. mint-register= Target RejectNegative Joined UInteger Var(rx_interrupt_registers) Init(0) Specifies the number of registers to reserve for interrupt handlers. + +;--------------------------------------------------- + +msave-acc-in-interrupts +Target Mask(SAVE_ACC_REGISTER) +Specifies whether interrupt functions should save and restore the accumulator register. diff --git a/gcc/config/rx/t-rx b/gcc/config/rx/t-rx index 39cda72af57..eb1ca48d3a3 100644 --- a/gcc/config/rx/t-rx +++ b/gcc/config/rx/t-rx @@ -20,9 +20,9 @@ # Enable multilibs: -MULTILIB_OPTIONS = m64bit-doubles mbig-endian-data -MULTILIB_DIRNAMES = 64fp big-endian-data -MULTILIB_MATCHES = m64bit-doubles=mieee +MULTILIB_OPTIONS = m32bit-doubles mbig-endian-data +MULTILIB_DIRNAMES = 32fp big-endian-data +MULTILIB_MATCHES = m32bit-doubles=fpu MULTILIB_EXCEPTIONS = MULTILIB_EXTRA_OPTS = diff --git a/gcc/config/score/score.h b/gcc/config/score/score.h index 0b7af7b2739..cde9c222546 100644 --- a/gcc/config/score/score.h +++ b/gcc/config/score/score.h @@ -688,9 +688,6 @@ typedef struct score_args #define HAVE_PRE_MODIFY_REG 0 #define HAVE_POST_MODIFY_REG 0 -/* Recognize any constant value that is a valid address. */ -#define CONSTANT_ADDRESS_P(X) CONSTANT_P (X) - /* Maximum number of registers that can appear in a valid memory address. */ #define MAX_REGS_PER_ADDRESS 1 diff --git a/gcc/config/stormy16/stormy16.h b/gcc/config/stormy16/stormy16.h index 682f7e6f466..fa97e8becdc 100644 --- a/gcc/config/stormy16/stormy16.h +++ b/gcc/config/stormy16/stormy16.h @@ -522,8 +522,6 @@ enum reg_class #define HAVE_PRE_DECREMENT 1 -#define CONSTANT_ADDRESS_P(X) CONSTANT_P (X) - #define MAX_REGS_PER_ADDRESS 1 #ifdef REG_OK_STRICT diff --git a/gcc/config/vax/linux.h b/gcc/config/vax/linux.h index 1087069adbb..dccbe9cc8ee 100644 --- a/gcc/config/vax/linux.h +++ b/gcc/config/vax/linux.h @@ -21,17 +21,7 @@ along with GCC; see the file COPYING3. If not see #undef TARGET_VERSION #define TARGET_VERSION fprintf (stderr, " (VAX GNU/Linux with ELF)"); -#define TARGET_OS_CPP_BUILTINS() \ - do \ - { \ - LINUX_TARGET_OS_CPP_BUILTINS(); \ - if (flag_pic) \ - { \ - builtin_define ("__PIC__"); \ - builtin_define ("__pic__"); \ - } \ - } \ - while (0) +#define TARGET_OS_CPP_BUILTINS() LINUX_TARGET_OS_CPP_BUILTINS() /* We use GAS, G-float double and want new DI patterns. */ #undef TARGET_DEFAULT diff --git a/gcc/cp/ChangeLog b/gcc/cp/ChangeLog index 10fa16a2421..6d7fd34cb61 100644 --- a/gcc/cp/ChangeLog +++ b/gcc/cp/ChangeLog @@ -1,3 +1,77 @@ +2009-11-06 Andrew Pinski <andrew_pinski@playstation.sony.com> + + PR c++/41536 + * optimize.c (maybe_clone_body): Copy DECL_ATTRIBUTES and + DECL_DISREGARD_INLINE_LIMITS also. + +2009-11-06 Jakub Jelinek <jakub@redhat.com> + + PR c++/41967 + * parser.c (cp_parser_omp_for_loop): After diagnosing not perfectly + nested loop and parsing statements, don't cp_parser_require }, instead + exit the loop if next token is CPP_EOF. + +2009-11-05 Jason Merrill <jason@redhat.com> + + PR c++/34180 + * method.c (do_build_copy_constructor): Don't drop cv-quals from + the field type. + + PR c++/7046 + * class.c (finish_struct): Store maximum_field_alignment in + TYPE_PRECISION. + * pt.c (instantiate_class_template): Set maximum_field_alignment. + + PR c++/34870 + * name-lookup.c (arg_assoc_class): Call complete_type. + * pt.c (instantiate_class_template): Call uses_template_parms + instead of dependent_type_p. + + PR c++/41703 + * pt.c (check_undeduced_parms): New subroutine of... + (more_specialized_fn): ...here. Undeduced template parms can make + a template less specialized than another. + +2009-11-04 Jason Merrill <jason@redhat.com> + + PR c++/39413 + * search.c (lookup_base): Don't complete_type (base). + + PR c++/35067 + * method.c (use_thunk): Check DECL_WEAK as well as + DECL_ONE_ONLY. + + PR c++/17365, DR 218 + * name-lookup.c (add_function): Ignore non-functions. + +2009-11-03 Jason Merrill <jason@redhat.com> + + PR c++/36959 + * decl2.c (cxx_callgraph_analyze_expr): Don't reference a function + just because a static variable in it is needed unless -frepo. + + PR c++/41876 + * parser.c (cp_parser_type_specifier_seq): Rename is_condition to + is_declaration. + (cp_parser_exception_declaration): Pass true. + (cp_parser_omp_for_loop): Likewise. + + PR c++/41927 + * typeck.c (build_x_binary_op): Don't do warn_parentheses + if we're in a SFINAE context. + + PR c++/41815 + * call.c (build_call_a): Strip cv-quals from rvalue result. + + PR c++/40944 + * call.c (initialize_reference): Add complain parm. + * typeck.c (convert_for_initialization): Pass it. + * decl.c (grok_reference_init): Likewise. + * cp-tree.h: Declare it. + + PR c++/40687 + * pt.c (do_auto_deduction): Diagnose inconsistent deduction. + 2009-11-02 Dodji Seketeli <dodji@redhat.com> PR c++/37093 diff --git a/gcc/cp/call.c b/gcc/cp/call.c index 463257cd5af..0979f3ab0ff 100644 --- a/gcc/cp/call.c +++ b/gcc/cp/call.c @@ -313,6 +313,9 @@ build_call_a (tree function, int n, tree *argarray) gcc_assert (TREE_CODE (fntype) == FUNCTION_TYPE || TREE_CODE (fntype) == METHOD_TYPE); result_type = TREE_TYPE (fntype); + /* An rvalue has no cv-qualifiers. */ + if (SCALAR_TYPE_P (result_type) || VOID_TYPE_P (result_type)) + result_type = cv_unqualified (result_type); if (TREE_CODE (function) == ADDR_EXPR && TREE_CODE (TREE_OPERAND (function, 0)) == FUNCTION_DECL) @@ -7617,7 +7620,8 @@ set_up_extended_ref_temp (tree decl, tree expr, tree *cleanup, tree *initp) Return the converted expression. */ tree -initialize_reference (tree type, tree expr, tree decl, tree *cleanup) +initialize_reference (tree type, tree expr, tree decl, tree *cleanup, + tsubst_flags_t complain) { conversion *conv; void *p; @@ -7632,16 +7636,19 @@ initialize_reference (tree type, tree expr, tree decl, tree *cleanup) LOOKUP_NORMAL); if (!conv || conv->bad_p) { - if (!(TYPE_QUALS (TREE_TYPE (type)) & TYPE_QUAL_CONST) - && !TYPE_REF_IS_RVALUE (type) - && !real_lvalue_p (expr)) - error ("invalid initialization of non-const reference of " - "type %qT from an rvalue of type %qT", - type, TREE_TYPE (expr)); - else - error ("invalid initialization of reference of type " - "%qT from expression of type %qT", type, - TREE_TYPE (expr)); + if (complain & tf_error) + { + if (!(TYPE_QUALS (TREE_TYPE (type)) & TYPE_QUAL_CONST) + && !TYPE_REF_IS_RVALUE (type) + && !real_lvalue_p (expr)) + error ("invalid initialization of non-const reference of " + "type %qT from an rvalue of type %qT", + type, TREE_TYPE (expr)); + else + error ("invalid initialization of reference of type " + "%qT from expression of type %qT", type, + TREE_TYPE (expr)); + } return error_mark_node; } diff --git a/gcc/cp/class.c b/gcc/cp/class.c index dc4c6b39c9a..4020144e815 100644 --- a/gcc/cp/class.c +++ b/gcc/cp/class.c @@ -5516,6 +5516,9 @@ finish_struct (tree t, tree attributes) if (DECL_PURE_VIRTUAL_P (x)) VEC_safe_push (tree, gc, CLASSTYPE_PURE_VIRTUALS (t), x); complete_vars (t); + + /* Remember current #pragma pack value. */ + TYPE_PRECISION (t) = maximum_field_alignment; } else finish_struct_1 (t); diff --git a/gcc/cp/cp-tree.h b/gcc/cp/cp-tree.h index 79655142eeb..c4b088beb56 100644 --- a/gcc/cp/cp-tree.h +++ b/gcc/cp/cp-tree.h @@ -4429,7 +4429,7 @@ extern tree type_passed_as (tree); extern tree convert_for_arg_passing (tree, tree); extern bool is_properly_derived_from (tree, tree); extern tree set_up_extended_ref_temp (tree, tree, tree *, tree *); -extern tree initialize_reference (tree, tree, tree, tree *); +extern tree initialize_reference (tree, tree, tree, tree *, tsubst_flags_t); extern tree make_temporary_var_for_ref_to_temp (tree, tree); extern tree strip_top_quals (tree); extern bool reference_related_p (tree, tree); diff --git a/gcc/cp/decl.c b/gcc/cp/decl.c index de29d0bdb63..97f1ac1f4f3 100644 --- a/gcc/cp/decl.c +++ b/gcc/cp/decl.c @@ -4390,7 +4390,7 @@ grok_reference_init (tree decl, tree type, tree init, tree *cleanup) DECL_INITIAL for local references (instead assigning to them explicitly); we need to allow the temporary to be initialized first. */ - tmp = initialize_reference (type, init, decl, cleanup); + tmp = initialize_reference (type, init, decl, cleanup, tf_warning_or_error); if (tmp == error_mark_node) return NULL_TREE; diff --git a/gcc/cp/decl2.c b/gcc/cp/decl2.c index b1fe4b9e31c..dbb9fb47a64 100644 --- a/gcc/cp/decl2.c +++ b/gcc/cp/decl2.c @@ -3310,6 +3310,7 @@ cxx_callgraph_analyze_expr (tree *tp, int *walk_subtrees ATTRIBUTE_UNUSED) mark_decl_referenced (vtbl); } else if (DECL_CONTEXT (t) + && flag_use_repository && TREE_CODE (DECL_CONTEXT (t)) == FUNCTION_DECL) /* If we need a static variable in a function, then we need the containing function. */ diff --git a/gcc/cp/method.c b/gcc/cp/method.c index 266406c7cd0..47f9e424dbd 100644 --- a/gcc/cp/method.c +++ b/gcc/cp/method.c @@ -378,7 +378,7 @@ use_thunk (tree thunk_fndecl, bool emit_p) DECL_VISIBILITY (thunk_fndecl) = DECL_VISIBILITY (function); DECL_VISIBILITY_SPECIFIED (thunk_fndecl) = DECL_VISIBILITY_SPECIFIED (function); - if (DECL_ONE_ONLY (function)) + if (DECL_ONE_ONLY (function) || DECL_WEAK (function)) make_decl_one_only (thunk_fndecl, cxx_comdat_group (thunk_fndecl)); if (flag_syntax_only) @@ -622,6 +622,7 @@ do_build_copy_constructor (tree fndecl) if (DECL_MUTABLE_P (field)) quals &= ~TYPE_QUAL_CONST; + quals |= TYPE_QUALS (expr_type); expr_type = cp_build_qualified_type (expr_type, quals); } diff --git a/gcc/cp/name-lookup.c b/gcc/cp/name-lookup.c index 25c8ac0aa62..14f97873484 100644 --- a/gcc/cp/name-lookup.c +++ b/gcc/cp/name-lookup.c @@ -4565,26 +4565,15 @@ add_function (struct arg_lookup *k, tree fn) total number of functions being compared, which should usually be the case. */ - /* We must find only functions, or exactly one non-function. */ - if (!k->functions) + if (!is_overloaded_fn (fn)) + /* All names except those of (possibly overloaded) functions and + function templates are ignored. */; + else if (!k->functions) k->functions = fn; else if (fn == k->functions) ; - else if (is_overloaded_fn (k->functions) && is_overloaded_fn (fn)) - k->functions = build_overload (fn, k->functions); else - { - tree f1 = OVL_CURRENT (k->functions); - tree f2 = fn; - if (is_overloaded_fn (f1)) - { - fn = f1; f1 = f2; f2 = fn; - } - error ("%q+D is not a function,", f1); - error (" conflict with %q+D", f2); - error (" in call to %qD", k->name); - return true; - } + k->functions = build_overload (fn, k->functions); return false; } @@ -4791,6 +4780,8 @@ arg_assoc_class (struct arg_lookup *k, tree type) if (arg_assoc_namespace (k, context)) return true; + complete_type (type); + if (TYPE_BINFO (type)) { /* Process baseclasses. */ diff --git a/gcc/cp/optimize.c b/gcc/cp/optimize.c index 58d5b9001d2..662bd4a22a3 100644 --- a/gcc/cp/optimize.c +++ b/gcc/cp/optimize.c @@ -199,6 +199,8 @@ maybe_clone_body (tree fn) DECL_VISIBILITY (clone) = DECL_VISIBILITY (fn); DECL_VISIBILITY_SPECIFIED (clone) = DECL_VISIBILITY_SPECIFIED (fn); DECL_DLLIMPORT_P (clone) = DECL_DLLIMPORT_P (fn); + DECL_ATTRIBUTES (clone) = copy_list (DECL_ATTRIBUTES (fn)); + DECL_DISREGARD_INLINE_LIMITS (clone) = DECL_DISREGARD_INLINE_LIMITS (fn); /* Adjust the parameter names and locations. */ parm = DECL_ARGUMENTS (fn); diff --git a/gcc/cp/parser.c b/gcc/cp/parser.c index b24d6b1c0e9..1d677cb92eb 100644 --- a/gcc/cp/parser.c +++ b/gcc/cp/parser.c @@ -5803,7 +5803,7 @@ cp_parser_new_type_id (cp_parser* parser, tree *nelts) parser->type_definition_forbidden_message = "types may not be defined in a new-type-id"; /* Parse the type-specifier-seq. */ - cp_parser_type_specifier_seq (parser, /*is_condition=*/false, + cp_parser_type_specifier_seq (parser, /*is_declaration=*/false, /*is_trailing_return=*/false, &type_specifier_seq); /* Restore the old message. */ @@ -8049,7 +8049,7 @@ cp_parser_condition (cp_parser* parser) parser->type_definition_forbidden_message = "types may not be defined in conditions"; /* Parse the type-specifier-seq. */ - cp_parser_type_specifier_seq (parser, /*is_condition==*/true, + cp_parser_type_specifier_seq (parser, /*is_declaration==*/true, /*is_trailing_return=*/false, &type_specifiers); /* Restore the saved message. */ @@ -9691,7 +9691,7 @@ cp_parser_conversion_type_id (cp_parser* parser) /* Parse the attributes. */ attributes = cp_parser_attributes_opt (parser); /* Parse the type-specifiers. */ - cp_parser_type_specifier_seq (parser, /*is_condition=*/false, + cp_parser_type_specifier_seq (parser, /*is_declaration=*/false, /*is_trailing_return=*/false, &type_specifiers); /* If that didn't work, stop. */ @@ -12561,7 +12561,7 @@ cp_parser_enum_specifier (cp_parser* parser) cp_lexer_consume_token (parser->lexer); /* Parse the type-specifier-seq. */ - cp_parser_type_specifier_seq (parser, /*is_condition=*/false, + cp_parser_type_specifier_seq (parser, /*is_declaration=*/false, /*is_trailing_return=*/false, &type_specifiers); @@ -14531,7 +14531,7 @@ cp_parser_type_id_1 (cp_parser* parser, bool is_template_arg, cp_declarator *abstract_declarator; /* Parse the type-specifier-seq. */ - cp_parser_type_specifier_seq (parser, /*is_condition=*/false, + cp_parser_type_specifier_seq (parser, /*is_declaration=*/false, is_trailing_return, &type_specifier_seq); if (type_specifier_seq.type == error_mark_node) @@ -14593,8 +14593,8 @@ static tree cp_parser_trailing_type_id (cp_parser *parser) type-specifier-seq: attributes type-specifier-seq [opt] - If IS_CONDITION is true, we are at the start of a "condition", - e.g., we've just seen "if (". + If IS_DECLARATION is true, we are at the start of a "condition" or + exception-declaration, so we might be followed by a declarator-id. If IS_TRAILING_RETURN is true, we are in a trailing-return-type, i.e. we've just seen "->". @@ -14603,7 +14603,7 @@ static tree cp_parser_trailing_type_id (cp_parser *parser) static void cp_parser_type_specifier_seq (cp_parser* parser, - bool is_condition, + bool is_declaration, bool is_trailing_return, cp_decl_specifier_seq *type_specifier_seq) { @@ -14679,7 +14679,7 @@ cp_parser_type_specifier_seq (cp_parser* parser, would be clearer just to allow a decl-specifier-seq here, and then add a semantic restriction that if any decl-specifiers that are not type-specifiers appear, the program is invalid. */ - if (is_condition && !is_cv_qualifier) + if (is_declaration && !is_cv_qualifier) flags |= CP_PARSER_FLAGS_NO_USER_DEFINED_TYPES; } @@ -17330,7 +17330,7 @@ cp_parser_exception_declaration (cp_parser* parser) = "types may not be defined in exception-declarations"; /* Parse the type-specifier-seq. */ - cp_parser_type_specifier_seq (parser, /*is_condition=*/false, + cp_parser_type_specifier_seq (parser, /*is_declaration=*/true, /*is_trailing_return=*/false, &type_specifiers); /* If it's a `)', then there is no declarator. */ @@ -22104,7 +22104,7 @@ cp_parser_omp_for_loop (cp_parser *parser, tree clauses, tree *par_clauses) cp_parser_condition, from whence the bulk of this is copied. */ cp_parser_parse_tentatively (parser); - cp_parser_type_specifier_seq (parser, /*is_condition=*/false, + cp_parser_type_specifier_seq (parser, /*is_declaration=*/true, /*is_trailing_return=*/false, &type_specifiers); if (cp_parser_parse_definitely (parser)) @@ -22424,7 +22424,8 @@ cp_parser_omp_for_loop (cp_parser *parser, tree clauses, tree *par_clauses) } collapse_err = true; cp_parser_statement_seq_opt (parser, NULL); - cp_parser_require (parser, CPP_CLOSE_BRACE, "%<}%>"); + if (cp_lexer_next_token_is (parser->lexer, CPP_EOF)) + break; } } diff --git a/gcc/cp/pt.c b/gcc/cp/pt.c index 5af348a1390..75180eabbe8 100644 --- a/gcc/cp/pt.c +++ b/gcc/cp/pt.c @@ -7352,13 +7352,14 @@ instantiate_class_template (tree type) tree typedecl; tree pbinfo; tree base_list; + unsigned int saved_maximum_field_alignment; if (type == error_mark_node) return error_mark_node; if (TYPE_BEING_DEFINED (type) || COMPLETE_TYPE_P (type) - || dependent_type_p (type)) + || uses_template_parms (type)) return type; /* Figure out which template is being instantiated. */ @@ -7412,6 +7413,9 @@ instantiate_class_template (tree type) push_deferring_access_checks (dk_no_deferred); push_to_top_level (); + /* Use #pragma pack from the template context. */ + saved_maximum_field_alignment = maximum_field_alignment; + maximum_field_alignment = TYPE_PRECISION (pattern); SET_CLASSTYPE_INTERFACE_UNKNOWN (type); @@ -7827,6 +7831,7 @@ instantiate_class_template (tree type) perform_typedefs_access_check (pattern, args); perform_deferred_access_checks (); pop_nested_class (); + maximum_field_alignment = saved_maximum_field_alignment; pop_from_top_level (); pop_deferring_access_checks (); pop_tinst_level (); @@ -14854,6 +14859,35 @@ mark_decl_instantiated (tree result, int extern_p) DECL_INTERFACE_KNOWN (result) = 1; } +/* Subroutine of more_specialized_fn: check whether TARGS is missing any + important template arguments. If any are missing, we check whether + they're important by using error_mark_node for substituting into any + args that were used for partial ordering (the ones between ARGS and END) + and seeing if it bubbles up. */ + +static bool +check_undeduced_parms (tree targs, tree args, tree end) +{ + bool found = false; + int i; + for (i = TREE_VEC_LENGTH (targs) - 1; i >= 0; --i) + if (TREE_VEC_ELT (targs, i) == NULL_TREE) + { + found = true; + TREE_VEC_ELT (targs, i) = error_mark_node; + } + if (found) + { + for (; args != end; args = TREE_CHAIN (args)) + { + tree substed = tsubst (TREE_VALUE (args), targs, tf_none, NULL_TREE); + if (substed == error_mark_node) + return true; + } + } + return false; +} + /* Given two function templates PAT1 and PAT2, return: 1 if PAT1 is more specialized than PAT2 as described in [temp.func.order]. @@ -14877,8 +14911,12 @@ mark_decl_instantiated (tree result, int extern_p) neither is more cv-qualified, they both are equal). Unlike regular deduction, after all the arguments have been deduced in this way, we do *not* verify the deduced template argument values can be - substituted into non-deduced contexts, nor do we have to verify - that all template arguments have been deduced. */ + substituted into non-deduced contexts. + + The logic can be a bit confusing here, because we look at deduce1 and + targs1 to see if pat2 is at least as specialized, and vice versa; if we + can find template arguments for pat1 to make arg1 look like arg2, that + means that arg2 is at least as specialized as arg1. */ int more_specialized_fn (tree pat1, tree pat2, int len) @@ -14891,8 +14929,9 @@ more_specialized_fn (tree pat1, tree pat2, int len) tree tparms2 = DECL_INNERMOST_TEMPLATE_PARMS (pat2); tree args1 = TYPE_ARG_TYPES (TREE_TYPE (decl1)); tree args2 = TYPE_ARG_TYPES (TREE_TYPE (decl2)); - int better1 = 0; - int better2 = 0; + tree origs1, origs2; + bool lose1 = false; + bool lose2 = false; /* Remove the this parameter from non-static member functions. If one is a non-static member function and the other is not a static @@ -14931,6 +14970,9 @@ more_specialized_fn (tree pat1, tree pat2, int len) processing_template_decl++; + origs1 = args1; + origs2 = args2; + while (len-- /* Stop when an ellipsis is seen. */ && args1 != NULL_TREE && args2 != NULL_TREE) @@ -15065,28 +15107,37 @@ more_specialized_fn (tree pat1, tree pat2, int len) deduce2 = !unify (tparms2, targs2, arg2, arg1, UNIFY_ALLOW_NONE); } + /* If we couldn't deduce arguments for tparms1 to make arg1 match + arg2, then arg2 is not as specialized as arg1. */ if (!deduce1) - better2 = -1; + lose2 = true; if (!deduce2) - better1 = -1; - if (better1 < 0 && better2 < 0) - /* We've failed to deduce something in either direction. - These must be unordered. */ - break; - - if (deduce1 && deduce2 && quals1 >= 0 && quals2 >= 0) + lose1 = true; + + /* "If, for a given type, deduction succeeds in both directions + (i.e., the types are identical after the transformations above) + and if the type from the argument template is more cv-qualified + than the type from the parameter template (as described above) + that type is considered to be more specialized than the other. If + neither type is more cv-qualified than the other then neither type + is more specialized than the other." + + We check same_type_p explicitly because deduction can also succeed + in both directions when there is a nondeduced context. */ + if (deduce1 && deduce2 + && quals1 != quals2 && quals1 >= 0 && quals2 >= 0 + && same_type_p (arg1, arg2)) { - /* Deduces in both directions, see if quals can - disambiguate. Pretend the worse one failed to deduce. */ if ((quals1 & quals2) == quals2) - deduce1 = 0; + lose2 = true; if ((quals1 & quals2) == quals1) - deduce2 = 0; + lose1 = true; } - if (deduce1 && !deduce2 && !better2) - better2 = 1; - if (deduce2 && !deduce1 && !better1) - better1 = 1; + + if (lose1 && lose2) + /* We've failed to deduce something in either direction. + These must be unordered. */ + break; if (TREE_CODE (arg1) == TYPE_PACK_EXPANSION || TREE_CODE (arg2) == TYPE_PACK_EXPANSION) @@ -15098,22 +15149,38 @@ more_specialized_fn (tree pat1, tree pat2, int len) args2 = TREE_CHAIN (args2); } + /* "In most cases, all template parameters must have values in order for + deduction to succeed, but for partial ordering purposes a template + parameter may remain without a value provided it is not used in the + types being used for partial ordering." + + Thus, if we are missing any of the targs1 we need to substitute into + origs1, then pat2 is not as specialized as pat1. This can happen when + there is a nondeduced context. */ + if (!lose2 && check_undeduced_parms (targs1, origs1, args1)) + lose2 = true; + if (!lose1 && check_undeduced_parms (targs2, origs2, args2)) + lose1 = true; + processing_template_decl--; /* All things being equal, if the next argument is a pack expansion for one function but not for the other, prefer the - non-variadic function. */ - if ((better1 > 0) - (better2 > 0) == 0 + non-variadic function. FIXME this is bogus; see c++/41958. */ + if (lose1 == lose2 && args1 && TREE_VALUE (args1) && args2 && TREE_VALUE (args2)) { - if (TREE_CODE (TREE_VALUE (args1)) == TYPE_PACK_EXPANSION) - return TREE_CODE (TREE_VALUE (args2)) == TYPE_PACK_EXPANSION ? 0 : -1; - else if (TREE_CODE (TREE_VALUE (args2)) == TYPE_PACK_EXPANSION) - return 1; + lose1 = TREE_CODE (TREE_VALUE (args1)) == TYPE_PACK_EXPANSION; + lose2 = TREE_CODE (TREE_VALUE (args2)) == TYPE_PACK_EXPANSION; } - return (better1 > 0) - (better2 > 0); + if (lose1 == lose2) + return 0; + else if (!lose1) + return 1; + else + return -1; } /* Determine which of two partial specializations is more specialized. @@ -17796,10 +17863,7 @@ make_args_non_dependent (VEC(tree,gc) *args) tree make_auto (void) { - tree au; - - /* ??? Is it worth caching this for multiple autos at the same level? */ - au = cxx_make_type (TEMPLATE_TYPE_PARM); + tree au = cxx_make_type (TEMPLATE_TYPE_PARM); TYPE_NAME (au) = build_decl (BUILTINS_LOCATION, TYPE_DECL, get_identifier ("auto"), au); TYPE_STUB_DECL (au) = TYPE_NAME (au); @@ -17877,6 +17941,19 @@ do_auto_deduction (tree type, tree init, tree auto_node) return error_mark_node; } + /* If the list of declarators contains more than one declarator, the type + of each declared variable is determined as described above. If the + type deduced for the template parameter U is not the same in each + deduction, the program is ill-formed. */ + if (TREE_TYPE (auto_node) + && !same_type_p (TREE_TYPE (auto_node), TREE_VEC_ELT (targs, 0))) + { + error ("inconsistent deduction for %qT: %qT and then %qT", + auto_node, TREE_TYPE (auto_node), TREE_VEC_ELT (targs, 0)); + return error_mark_node; + } + TREE_TYPE (auto_node) = TREE_VEC_ELT (targs, 0); + if (processing_template_decl) targs = add_to_template_args (current_template_args (), targs); return tsubst (type, targs, tf_warning_or_error, NULL_TREE); diff --git a/gcc/cp/search.c b/gcc/cp/search.c index d6521fb6f82..3adf9e0a1ab 100644 --- a/gcc/cp/search.c +++ b/gcc/cp/search.c @@ -214,9 +214,12 @@ lookup_base (tree t, tree base, base_access access, base_kind *kind_ptr) t_binfo = TYPE_BINFO (t); } - base = complete_type (TYPE_MAIN_VARIANT (base)); + base = TYPE_MAIN_VARIANT (base); - if (t_binfo) + /* If BASE is incomplete, it can't be a base of T--and instantiating it + might cause an error. */ + if (t_binfo && CLASS_TYPE_P (base) + && (COMPLETE_TYPE_P (base) || TYPE_BEING_DEFINED (base))) { struct lookup_base_data_s data; diff --git a/gcc/cp/typeck.c b/gcc/cp/typeck.c index 25c257f94a9..7cafc8ab224 100644 --- a/gcc/cp/typeck.c +++ b/gcc/cp/typeck.c @@ -3245,6 +3245,7 @@ build_x_binary_op (enum tree_code code, tree arg1, enum tree_code arg1_code, misinterpret. But don't warn about obj << x + y, since that is a common idiom for I/O. */ if (warn_parentheses + && (complain & tf_warning) && !processing_template_decl && !error_operand_p (arg1) && !error_operand_p (arg2) @@ -6879,7 +6880,7 @@ convert_for_initialization (tree exp, tree type, tree rhs, int flags, if (fndecl) savew = warningcount, savee = errorcount; rhs = initialize_reference (type, rhs, /*decl=*/NULL_TREE, - /*cleanup=*/NULL); + /*cleanup=*/NULL, complain); if (fndecl) { if (warningcount > savew) diff --git a/gcc/defaults.h b/gcc/defaults.h index f1d96833070..182de95685c 100644 --- a/gcc/defaults.h +++ b/gcc/defaults.h @@ -1154,4 +1154,10 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see #define GO_IF_MODE_DEPENDENT_ADDRESS(X, WIN) #endif +/* For most ports anything that evaluates to a constant symbolic + or integer value is acceptable as a constant address. */ +#ifndef CONSTANT_ADDRESS_P +#define CONSTANT_ADDRESS_P(X) (CONSTANT_P (X) && GET_CODE (X) != CONST_DOUBLE) +#endif + #endif /* ! GCC_DEFAULTS_H */ diff --git a/gcc/df-scan.c b/gcc/df-scan.c index 45df29ecc2b..101234b55bc 100644 --- a/gcc/df-scan.c +++ b/gcc/df-scan.c @@ -3248,10 +3248,23 @@ df_uses_record (enum df_ref_class cl, struct df_collection_rec *collection_rec, width = INTVAL (XEXP (dst, 1)); offset = INTVAL (XEXP (dst, 2)); mode = GET_MODE (dst); - df_uses_record (DF_REF_EXTRACT, collection_rec, &XEXP (dst, 0), - DF_REF_REG_USE, bb, insn_info, - DF_REF_READ_WRITE | DF_REF_ZERO_EXTRACT, - width, offset, mode); + if (GET_CODE (XEXP (dst,0)) == MEM) + { + /* Handle the case of zero_extract(mem(...)) in the set dest. + This special case is allowed only if the mem is a single byte and + is useful to set a bitfield in memory. */ + df_uses_record (DF_REF_EXTRACT, collection_rec, &XEXP (XEXP (dst,0), 0), + DF_REF_REG_MEM_STORE, bb, insn_info, + DF_REF_ZERO_EXTRACT, + width, offset, mode); + } + else + { + df_uses_record (DF_REF_EXTRACT, collection_rec, &XEXP (dst, 0), + DF_REF_REG_USE, bb, insn_info, + DF_REF_READ_WRITE | DF_REF_ZERO_EXTRACT, + width, offset, mode); + } } else { diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi index cb764c600eb..e09c9ee6685 100644 --- a/gcc/doc/extend.texi +++ b/gcc/doc/extend.texi @@ -2270,13 +2270,6 @@ on data in the eight bit data area. Note the eight bit data area is limited to You must use GAS and GLD from GNU binutils version 2.7 or later for this attribute to work correctly. -@item exception -@cindex exception handler functions on the RX processor -Use this attribute on the RX to indicate that the specified function -is an exception handler. The compiler will generate function entry and -exit sequences suitable for use in an exception handler when this -attribute is present. - @item exception_handler @cindex exception handler functions on the Blackfin processor Use this attribute on the Blackfin to indicate that the specified function @@ -3214,6 +3207,16 @@ Enable/disable the generation of the SSE4A instructions. @cindex @code{target("fma4")} attribute Enable/disable the generation of the FMA4 instructions. +@item xop +@itemx no-xop +@cindex @code{target("xop")} attribute +Enable/disable the generation of the XOP instructions. + +@item lwp +@itemx no-lwp +@cindex @code{target("lwp")} attribute +Enable/disable the generation of the LWP instructions. + @item ssse3 @itemx no-ssse3 @cindex @code{target("ssse3")} attribute @@ -8935,6 +8938,134 @@ v2di __builtin_ia32_insertq (v2di, v2di) v2di __builtin_ia32_insertqi (v2di, v2di, const unsigned int, const unsigned int) @end smallexample +The following built-in functions are available when @option{-mxop} is used. +@smallexample +v2df __builtin_ia32_vfrczpd (v2df) +v4sf __builtin_ia32_vfrczps (v4sf) +v2df __builtin_ia32_vfrczsd (v2df, v2df) +v4sf __builtin_ia32_vfrczss (v4sf, v4sf) +v4df __builtin_ia32_vfrczpd256 (v4df) +v8sf __builtin_ia32_vfrczps256 (v8sf) +v2di __builtin_ia32_vpcmov (v2di, v2di, v2di) +v2di __builtin_ia32_vpcmov_v2di (v2di, v2di, v2di) +v4si __builtin_ia32_vpcmov_v4si (v4si, v4si, v4si) +v8hi __builtin_ia32_vpcmov_v8hi (v8hi, v8hi, v8hi) +v16qi __builtin_ia32_vpcmov_v16qi (v16qi, v16qi, v16qi) +v2df __builtin_ia32_vpcmov_v2df (v2df, v2df, v2df) +v4sf __builtin_ia32_vpcmov_v4sf (v4sf, v4sf, v4sf) +v4di __builtin_ia32_vpcmov_v4di256 (v4di, v4di, v4di) +v8si __builtin_ia32_vpcmov_v8si256 (v8si, v8si, v8si) +v16hi __builtin_ia32_vpcmov_v16hi256 (v16hi, v16hi, v16hi) +v32qi __builtin_ia32_vpcmov_v32qi256 (v32qi, v32qi, v32qi) +v4df __builtin_ia32_vpcmov_v4df256 (v4df, v4df, v4df) +v8sf __builtin_ia32_vpcmov_v8sf256 (v8sf, v8sf, v8sf) +v16qi __builtin_ia32_vpcomeqb (v16qi, v16qi) +v8hi __builtin_ia32_vpcomeqw (v8hi, v8hi) +v4si __builtin_ia32_vpcomeqd (v4si, v4si) +v2di __builtin_ia32_vpcomeqq (v2di, v2di) +v16qi __builtin_ia32_vpcomequb (v16qi, v16qi) +v4si __builtin_ia32_vpcomequd (v4si, v4si) +v2di __builtin_ia32_vpcomequq (v2di, v2di) +v8hi __builtin_ia32_vpcomequw (v8hi, v8hi) +v8hi __builtin_ia32_vpcomeqw (v8hi, v8hi) +v16qi __builtin_ia32_vpcomfalseb (v16qi, v16qi) +v4si __builtin_ia32_vpcomfalsed (v4si, v4si) +v2di __builtin_ia32_vpcomfalseq (v2di, v2di) +v16qi __builtin_ia32_vpcomfalseub (v16qi, v16qi) +v4si __builtin_ia32_vpcomfalseud (v4si, v4si) +v2di __builtin_ia32_vpcomfalseuq (v2di, v2di) +v8hi __builtin_ia32_vpcomfalseuw (v8hi, v8hi) +v8hi __builtin_ia32_vpcomfalsew (v8hi, v8hi) +v16qi __builtin_ia32_vpcomgeb (v16qi, v16qi) +v4si __builtin_ia32_vpcomged (v4si, v4si) +v2di __builtin_ia32_vpcomgeq (v2di, v2di) +v16qi __builtin_ia32_vpcomgeub (v16qi, v16qi) +v4si __builtin_ia32_vpcomgeud (v4si, v4si) +v2di __builtin_ia32_vpcomgeuq (v2di, v2di) +v8hi __builtin_ia32_vpcomgeuw (v8hi, v8hi) +v8hi __builtin_ia32_vpcomgew (v8hi, v8hi) +v16qi __builtin_ia32_vpcomgtb (v16qi, v16qi) +v4si __builtin_ia32_vpcomgtd (v4si, v4si) +v2di __builtin_ia32_vpcomgtq (v2di, v2di) +v16qi __builtin_ia32_vpcomgtub (v16qi, v16qi) +v4si __builtin_ia32_vpcomgtud (v4si, v4si) +v2di __builtin_ia32_vpcomgtuq (v2di, v2di) +v8hi __builtin_ia32_vpcomgtuw (v8hi, v8hi) +v8hi __builtin_ia32_vpcomgtw (v8hi, v8hi) +v16qi __builtin_ia32_vpcomleb (v16qi, v16qi) +v4si __builtin_ia32_vpcomled (v4si, v4si) +v2di __builtin_ia32_vpcomleq (v2di, v2di) +v16qi __builtin_ia32_vpcomleub (v16qi, v16qi) +v4si __builtin_ia32_vpcomleud (v4si, v4si) +v2di __builtin_ia32_vpcomleuq (v2di, v2di) +v8hi __builtin_ia32_vpcomleuw (v8hi, v8hi) +v8hi __builtin_ia32_vpcomlew (v8hi, v8hi) +v16qi __builtin_ia32_vpcomltb (v16qi, v16qi) +v4si __builtin_ia32_vpcomltd (v4si, v4si) +v2di __builtin_ia32_vpcomltq (v2di, v2di) +v16qi __builtin_ia32_vpcomltub (v16qi, v16qi) +v4si __builtin_ia32_vpcomltud (v4si, v4si) +v2di __builtin_ia32_vpcomltuq (v2di, v2di) +v8hi __builtin_ia32_vpcomltuw (v8hi, v8hi) +v8hi __builtin_ia32_vpcomltw (v8hi, v8hi) +v16qi __builtin_ia32_vpcomneb (v16qi, v16qi) +v4si __builtin_ia32_vpcomned (v4si, v4si) +v2di __builtin_ia32_vpcomneq (v2di, v2di) +v16qi __builtin_ia32_vpcomneub (v16qi, v16qi) +v4si __builtin_ia32_vpcomneud (v4si, v4si) +v2di __builtin_ia32_vpcomneuq (v2di, v2di) +v8hi __builtin_ia32_vpcomneuw (v8hi, v8hi) +v8hi __builtin_ia32_vpcomnew (v8hi, v8hi) +v16qi __builtin_ia32_vpcomtrueb (v16qi, v16qi) +v4si __builtin_ia32_vpcomtrued (v4si, v4si) +v2di __builtin_ia32_vpcomtrueq (v2di, v2di) +v16qi __builtin_ia32_vpcomtrueub (v16qi, v16qi) +v4si __builtin_ia32_vpcomtrueud (v4si, v4si) +v2di __builtin_ia32_vpcomtrueuq (v2di, v2di) +v8hi __builtin_ia32_vpcomtrueuw (v8hi, v8hi) +v8hi __builtin_ia32_vpcomtruew (v8hi, v8hi) +v4si __builtin_ia32_vphaddbd (v16qi) +v2di __builtin_ia32_vphaddbq (v16qi) +v8hi __builtin_ia32_vphaddbw (v16qi) +v2di __builtin_ia32_vphadddq (v4si) +v4si __builtin_ia32_vphaddubd (v16qi) +v2di __builtin_ia32_vphaddubq (v16qi) +v8hi __builtin_ia32_vphaddubw (v16qi) +v2di __builtin_ia32_vphaddudq (v4si) +v4si __builtin_ia32_vphadduwd (v8hi) +v2di __builtin_ia32_vphadduwq (v8hi) +v4si __builtin_ia32_vphaddwd (v8hi) +v2di __builtin_ia32_vphaddwq (v8hi) +v8hi __builtin_ia32_vphsubbw (v16qi) +v2di __builtin_ia32_vphsubdq (v4si) +v4si __builtin_ia32_vphsubwd (v8hi) +v4si __builtin_ia32_vpmacsdd (v4si, v4si, v4si) +v2di __builtin_ia32_vpmacsdqh (v4si, v4si, v2di) +v2di __builtin_ia32_vpmacsdql (v4si, v4si, v2di) +v4si __builtin_ia32_vpmacssdd (v4si, v4si, v4si) +v2di __builtin_ia32_vpmacssdqh (v4si, v4si, v2di) +v2di __builtin_ia32_vpmacssdql (v4si, v4si, v2di) +v4si __builtin_ia32_vpmacsswd (v8hi, v8hi, v4si) +v8hi __builtin_ia32_vpmacssww (v8hi, v8hi, v8hi) +v4si __builtin_ia32_vpmacswd (v8hi, v8hi, v4si) +v8hi __builtin_ia32_vpmacsww (v8hi, v8hi, v8hi) +v4si __builtin_ia32_vpmadcsswd (v8hi, v8hi, v4si) +v4si __builtin_ia32_vpmadcswd (v8hi, v8hi, v4si) +v16qi __builtin_ia32_vpperm (v16qi, v16qi, v16qi) +v16qi __builtin_ia32_vprotb (v16qi, v16qi) +v4si __builtin_ia32_vprotd (v4si, v4si) +v2di __builtin_ia32_vprotq (v2di, v2di) +v8hi __builtin_ia32_vprotw (v8hi, v8hi) +v16qi __builtin_ia32_vpshab (v16qi, v16qi) +v4si __builtin_ia32_vpshad (v4si, v4si) +v2di __builtin_ia32_vpshaq (v2di, v2di) +v8hi __builtin_ia32_vpshaw (v8hi, v8hi) +v16qi __builtin_ia32_vpshlb (v16qi, v16qi) +v4si __builtin_ia32_vpshld (v4si, v4si) +v2di __builtin_ia32_vpshlq (v2di, v2di) +v8hi __builtin_ia32_vpshlw (v8hi, v8hi) +@end smallexample + The following built-in functions are available when @option{-mfma4} is used. All of them generate the machine instruction that is part of the name with MMX registers. @@ -8975,6 +9106,23 @@ v8sf __builtin_ia32_fmsubaddps256 (v8sf, v8sf, v8sf) @end smallexample +The following built-in functions are available when @option{-mlwp} is used. + +@smallexample +void __builtin_ia32_llwpcb16 (void *); +void __builtin_ia32_llwpcb32 (void *); +void __builtin_ia32_llwpcb64 (void *); +void * __builtin_ia32_llwpcb16 (void); +void * __builtin_ia32_llwpcb32 (void); +void * __builtin_ia32_llwpcb64 (void); +void __builtin_ia32_lwpval16 (unsigned short, unsigned int, unsigned short) +void __builtin_ia32_lwpval32 (unsigned int, unsigned int, unsigned int) +void __builtin_ia32_lwpval64 (unsigned __int64, unsigned int, unsigned int) +unsigned char __builtin_ia32_lwpins16 (unsigned short, unsigned int, unsigned short) +unsigned char __builtin_ia32_lwpins32 (unsigned int, unsigned int, unsigned int) +unsigned char __builtin_ia32_lwpins64 (unsigned __int64, unsigned int, unsigned int) +@end smallexample + The following built-in functions are available when @option{-m3dnow} is used. All of them generate the machine instruction that is part of the name. diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index 98634f98a46..4b7f0fd58be 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -595,7 +595,7 @@ Objective-C and Objective-C++ Dialects}. -mcld -mcx16 -msahf -mmovbe -mcrc32 -mrecip @gol -mmmx -msse -msse2 -msse3 -mssse3 -msse4.1 -msse4.2 -msse4 -mavx @gol -maes -mpclmul @gol --msse4a -m3dnow -mpopcnt -mabm -mfma4 @gol +-msse4a -m3dnow -mpopcnt -mabm -mfma4 -mxop -mlwp @gol -mthreads -mno-align-stringops -minline-all-stringops @gol -minline-stringops-dynamically -mstringop-strategy=@var{alg} @gol -mpush-args -maccumulate-outgoing-args -m128bit-long-double @gol @@ -785,14 +785,16 @@ See RS/6000 and PowerPC Options. -msdata=@var{opt} -mvxworks -G @var{num} -pthread} @emph{RX Options} -@gccoptlist{-m64bit-doubles -m32bit-doubles -mieee -mno-ieee@gol +@gccoptlist{-m64bit-doubles -m32bit-doubles -fpu -nofpu@gol +-mcpu= -patch=@gol -mbig-endian-data -mlittle-endian-data @gol -msmall-data @gol -msim -mno-sim@gol -mas100-syntax -mno-as100-syntax@gol -mrelax@gol -mmax-constant-size=@gol --mint-register=} +-mint-register=@gol +-msave-acc-in-interrupts} @emph{S/390 and zSeries Options} @gccoptlist{-mtune=@var{cpu-type} -march=@var{cpu-type} @gol @@ -12008,6 +12010,10 @@ preferred alignment to @option{-mpreferred-stack-boundary=2}. @itemx -mno-sse4a @itemx -mfma4 @itemx -mno-fma4 +@itemx -mxop +@itemx -mno-xop +@itemx -mlwp +@itemx -mno-lwp @itemx -m3dnow @itemx -mno-3dnow @itemx -mpopcnt @@ -12021,8 +12027,8 @@ preferred alignment to @option{-mpreferred-stack-boundary=2}. @opindex m3dnow @opindex mno-3dnow These switches enable or disable the use of instructions in the MMX, -SSE, SSE2, SSE3, SSSE3, SSE4.1, AVX, AES, PCLMUL, SSE4A, FMA4, ABM or -3DNow!@: extended instruction sets. +SSE, SSE2, SSE3, SSSE3, SSE4.1, AVX, AES, PCLMUL, SSE4A, FMA4, XOP, +LWP, ABM or 3DNow!@: extended instruction sets. These extensions are also available as built-in functions: see @ref{X86 Built-in Functions}, for details of the functions enabled and disabled by these switches. @@ -12095,6 +12101,10 @@ Note that while the throughput of the sequence is higher than the throughput of the non-reciprocal instruction, the precision of the sequence can be decreased by up to 2 ulp (i.e. the inverse of 1.0 equals 0.99999994). +Note that GCC implements 1.0f/sqrtf(x) in terms of RSQRTSS (or RSQRTPS) +already with @option{-ffast-math} (or the above option combination), and +doesn't need @option{-mrecip}. + @item -mveclibabi=@var{type} @opindex mveclibabi Specifies the ABI type to use for vectorizing intrinsics using an @@ -14219,7 +14229,7 @@ assembler and the linker alone without help from the compiler. @opindex mmcount-ra-address @opindex mno-mcount-ra-address Emit (do not emit) code that allows @code{_mcount} to modify the -colling function's return address. When enabled, this option extends +calling function's return address. When enabled, this option extends the usual @code{_mcount} interface with a new @var{ra-address} parameter, which has type @code{intptr_t *} and is passed in register @code{$12}. @code{_mcount} can then modify the return address by @@ -15413,16 +15423,37 @@ These @option{-m} options are defined for RX implementations: @table @gcctabopt @item -m64bit-doubles @itemx -m32bit-doubles +@itemx -fpu +@itemx -nofpu @opindex m64bit-doubles @opindex m32bit-doubles +@opindex fpu +@opindex nofpu Make the @code{double} data type be 64-bits (@option{-m64bit-doubles}) or 32-bits (@option{-m32bit-doubles}) in size. The default is -@option{-m32bit-doubles}. @emph{Note} the RX's hardware floating +@option{-m64bit-doubles}. @emph{Note} the RX's hardware floating point instructions are only used for 32-bit floating point values, and then only if @option{-ffast-math} has been specified on the command line. This is because the RX FPU instructions do not properly support denormal (or sub-normal) values. +The options @option{-fpu} and @option{-nofpu} have been provided at +the request of Rensas for compatibility with their toolchain. The +@option{-mfpu} option enables the use of RX FPU instructions by +selecting 32-bit doubles and enabling unsafe math optimizations. The +@option{-mnofpu} option disables the use of RX FPU instructions, even +if @option{-m32bit-doubles} is active and unsafe math optimizations +have been enabled. + +@item -mcpu=@var{name} +@itemx -patch=@var{name} +@opindex -mcpu +@opindex -patch +Selects the type of RX CPU to be targeted. Currently on two types are +supported, the generic @var{RX600} and the specific @var{RX610}. The +only difference between them is that the @var{RX610} does not support +the @code{MVTIPL} instruction. + @item -mbig-endian-data @itemx -mlittle-endian-data @opindex mbig-endian-data @@ -15498,6 +15529,15 @@ of fast interrupt handlers. A value of 2 reserves @code{r13} and @code{r12}. A value of 3 reserves @code{r13}, @code{r12} and @code{r11}, and a value of 4 reserves @code{r13} through @code{r10}. A value of 0, the default, does not reserve any registers. + +@item -msave-acc-in-interrupts +@opindex msave-acc-in-interrupts +Specifies that interrupt handler functions should preserve the +accumulator register. This is only necessary if normal code might use +the accumulator register, for example because it performs 64-bit +multiplications. The default is to ignore the accumulator as this +makes the interrupt handlers faster. + @end table @emph{Note:} The generic GCC command line @option{-ffixed-@var{reg}} diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi index dcfba921207..2974dcfb20c 100644 --- a/gcc/doc/md.texi +++ b/gcc/doc/md.texi @@ -5037,11 +5037,19 @@ operations in addition to updating the stack pointer. @item @samp{check_stack} If stack checking cannot be done on your system by probing the stack with a load or store instruction (@pxref{Stack Checking}), define this pattern -to perform the needed check and signaling an error if the stack -has overflowed. The single operand is the location in the stack furthest -from the current stack pointer that you need to validate. Normally, -on machines where this pattern is needed, you would obtain the stack -limit from a global or thread-specific variable or register. +to perform the needed check and signal an error if the stack has overflowed. +The single operand is the address in the stack furthest from the current +stack pointer that you need to validate. Normally, on machines where this +pattern is needed, you would obtain the stack limit from a global or +thread-specific variable or register. + +@cindex @code{probe_stack} instruction pattern +@item @samp{probe_stack} +If stack checking can be done on your system by probing the stack but doing +it with a load or store instruction is not optimal (@pxref{Stack Checking}), +define this pattern to do the probing differently and signal an error if +the stack has overflowed. The single operand is the memory location in the +stack that needs to be probed. @cindex @code{nonlocal_goto} instruction pattern @item @samp{nonlocal_goto} diff --git a/gcc/doc/plugins.texi b/gcc/doc/plugins.texi index f784953b5f4..123f67075ad 100644 --- a/gcc/doc/plugins.texi +++ b/gcc/doc/plugins.texi @@ -136,6 +136,7 @@ enum plugin_event PLUGIN_REGISTER_GGC_CACHES, /* Register an extra GGC cache table. */ PLUGIN_ATTRIBUTES, /* Called during attribute registration */ PLUGIN_START_UNIT, /* Called before processing a translation unit. */ + PLUGIN_PRAGMAS, /* Called during pragma registration. */ PLUGIN_EVENT_LAST /* Dummy event used for indexing callback array. */ @}; @@ -156,6 +157,11 @@ For the PLUGIN_PASS_MANAGER_SETUP, PLUGIN_INFO, PLUGIN_REGISTER_GGC_ROOTS and PLUGIN_REGISTER_GGC_CACHES pseudo-events the @code{callback} should be null, and the @code{user_data} is specific. +When the PLUGIN_PRAGMAS event is triggered (with a null +pointer as data from GCC), plugins may register their own pragmas +using functions like @code{c_register_pragma} or +@code{c_register_pragma_with_expansion}. + @section Interacting with the pass manager There needs to be a way to add/reorder/remove passes dynamically. This diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi index 984bbd70c1e..c69ef0c73ab 100644 --- a/gcc/doc/tm.texi +++ b/gcc/doc/tm.texi @@ -3556,11 +3556,12 @@ like to do static stack checking in some more efficient way than the generic approach. The default value of this macro is zero. @end defmac -@defmac STACK_CHECK_PROBE_INTERVAL -An integer representing the interval at which GCC must generate stack -probe instructions. You will normally define this macro to be no larger -than the size of the ``guard pages'' at the end of a stack area. The -default value of 4096 is suitable for most systems. +@defmac STACK_CHECK_PROBE_INTERVAL_EXP +An integer specifying the interval at which GCC must generate stack probe +instructions, defined as 2 raised to this integer. You will normally +define this macro so that the interval be no larger than the size of +the ``guard pages'' at the end of a stack area. The default value +of 12 (4096-byte interval) is suitable for most systems. @end defmac @defmac STACK_CHECK_PROBE_LOAD @@ -3569,6 +3570,15 @@ as a load instruction and zero if GCC should use a store instruction. The default is zero, which is the most efficient choice on most systems. @end defmac +@defmac STACK_CHECK_MOVING_SP +An integer which is nonzero if GCC should move the stack pointer page by page +when doing probes. This can be necessary on systems where the stack pointer +contains the bottom address of the memory area accessible to the executing +thread at any point in time. In this situation an alternate signal stack +is required in order to be able to recover from a stack overflow. The +default value of this macro is zero. +@end defmac + @defmac STACK_CHECK_PROTECT The number of bytes of stack needed to recover from a stack overflow, for languages where such a recovery is supported. The default value of diff --git a/gcc/explow.c b/gcc/explow.c index c38682d4ce5..0bbbc003f6d 100644 --- a/gcc/explow.c +++ b/gcc/explow.c @@ -43,6 +43,7 @@ along with GCC; see the file COPYING3. If not see static rtx break_out_memory_refs (rtx); static void emit_stack_probe (rtx); +static void anti_adjust_stack_and_probe (rtx); /* Truncate and perhaps sign-extend C as appropriate for MODE. */ @@ -1233,9 +1234,11 @@ allocate_dynamic_stack_space (rtx size, rtx target, int known_align) gcc_assert (!(stack_pointer_delta % (PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT))); - /* If needed, check that we have the required amount of stack. - Take into account what has already been checked. */ - if (flag_stack_check == GENERIC_STACK_CHECK) + /* If needed, check that we have the required amount of stack. Take into + account what has already been checked. */ + if (STACK_CHECK_MOVING_SP) + ; + else if (flag_stack_check == GENERIC_STACK_CHECK) probe_stack_range (STACK_OLD_CHECK_PROTECT + STACK_CHECK_MAX_FRAME_SIZE, size); else if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK) @@ -1304,7 +1307,10 @@ allocate_dynamic_stack_space (rtx size, rtx target, int known_align) emit_label (space_available); } - anti_adjust_stack (size); + if (flag_stack_check && STACK_CHECK_MOVING_SP) + anti_adjust_stack_and_probe (size); + else + anti_adjust_stack (size); #ifdef STACK_GROWS_DOWNWARD emit_move_insn (target, virtual_stack_dynamic_rtx); @@ -1355,6 +1361,12 @@ emit_stack_probe (rtx address) MEM_VOLATILE_P (memref) = 1; + /* See if we have an insn to probe the stack. */ +#ifdef HAVE_probe_stack + if (HAVE_probe_stack) + emit_insn (gen_probe_stack (memref)); + else +#endif if (STACK_CHECK_PROBE_LOAD) emit_move_insn (gen_reg_rtx (word_mode), memref); else @@ -1362,15 +1374,20 @@ emit_stack_probe (rtx address) } /* Probe a range of stack addresses from FIRST to FIRST+SIZE, inclusive. - FIRST is a constant and size is a Pmode RTX. These are offsets from the - current stack pointer. STACK_GROWS_DOWNWARD says whether to add or - subtract from the stack. If SIZE is constant, this is done - with a fixed number of probes. Otherwise, we must make a loop. */ + FIRST is a constant and size is a Pmode RTX. These are offsets from + the current stack pointer. STACK_GROWS_DOWNWARD says whether to add + or subtract them from the stack pointer. */ + +#define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP) #ifdef STACK_GROWS_DOWNWARD #define STACK_GROW_OP MINUS +#define STACK_GROW_OPTAB sub_optab +#define STACK_GROW_OFF(off) -(off) #else #define STACK_GROW_OP PLUS +#define STACK_GROW_OPTAB add_optab +#define STACK_GROW_OFF(off) (off) #endif void @@ -1380,113 +1397,272 @@ probe_stack_range (HOST_WIDE_INT first, rtx size) if (GET_MODE (size) != VOIDmode && GET_MODE (size) != Pmode) size = convert_to_mode (Pmode, size, 1); - /* Next see if the front end has set up a function for us to call to - check the stack. */ - if (stack_check_libfunc != 0) + /* Next see if we have a function to check the stack. */ + if (stack_check_libfunc) { - rtx addr = memory_address (QImode, + rtx addr = memory_address (Pmode, gen_rtx_fmt_ee (STACK_GROW_OP, Pmode, stack_pointer_rtx, plus_constant (size, first))); - - addr = convert_memory_address (ptr_mode, addr); - emit_library_call (stack_check_libfunc, LCT_NORMAL, VOIDmode, 1, addr, - ptr_mode); + emit_library_call (stack_check_libfunc, LCT_NORMAL, VOIDmode, 1, addr); } - /* Next see if we have an insn to check the stack. Use it if so. */ + /* Next see if we have an insn to check the stack. */ #ifdef HAVE_check_stack else if (HAVE_check_stack) { - insn_operand_predicate_fn pred; - rtx last_addr - = force_operand (gen_rtx_fmt_ee (STACK_GROW_OP, Pmode, - stack_pointer_rtx, - plus_constant (size, first)), - NULL_RTX); - - pred = insn_data[(int) CODE_FOR_check_stack].operand[0].predicate; - if (pred && ! ((*pred) (last_addr, Pmode))) - last_addr = copy_to_mode_reg (Pmode, last_addr); + rtx addr = memory_address (Pmode, + gen_rtx_fmt_ee (STACK_GROW_OP, Pmode, + stack_pointer_rtx, + plus_constant (size, first))); + insn_operand_predicate_fn pred + = insn_data[(int) CODE_FOR_check_stack].operand[0].predicate; + if (pred && !((*pred) (addr, Pmode))) + addr = copy_to_mode_reg (Pmode, addr); - emit_insn (gen_check_stack (last_addr)); + emit_insn (gen_check_stack (addr)); } #endif - /* If we have to generate explicit probes, see if we have a constant - small number of them to generate. If so, that's the easy case. */ - else if (CONST_INT_P (size) - && INTVAL (size) < 10 * STACK_CHECK_PROBE_INTERVAL) + /* Otherwise we have to generate explicit probes. If we have a constant + small number of them to generate, that's the easy case. */ + else if (CONST_INT_P (size) && INTVAL (size) < 7 * PROBE_INTERVAL) { - HOST_WIDE_INT offset; - - /* Start probing at FIRST + N * STACK_CHECK_PROBE_INTERVAL - for values of N from 1 until it exceeds LAST. If only one - probe is needed, this will not generate any code. Then probe - at LAST. */ - for (offset = first + STACK_CHECK_PROBE_INTERVAL; - offset < INTVAL (size); - offset = offset + STACK_CHECK_PROBE_INTERVAL) - emit_stack_probe (gen_rtx_fmt_ee (STACK_GROW_OP, Pmode, - stack_pointer_rtx, - GEN_INT (offset))); - - emit_stack_probe (gen_rtx_fmt_ee (STACK_GROW_OP, Pmode, - stack_pointer_rtx, - plus_constant (size, first))); + HOST_WIDE_INT isize = INTVAL (size), i; + rtx addr; + + /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 1 until + it exceeds SIZE. If only one probe is needed, this will not + generate any code. Then probe at FIRST + SIZE. */ + for (i = PROBE_INTERVAL; i < isize; i += PROBE_INTERVAL) + { + addr = memory_address (Pmode, + plus_constant (stack_pointer_rtx, + STACK_GROW_OFF (first + i))); + emit_stack_probe (addr); + } + + addr = memory_address (Pmode, + plus_constant (stack_pointer_rtx, + STACK_GROW_OFF (first + isize))); + emit_stack_probe (addr); } - /* In the variable case, do the same as above, but in a loop. We emit loop - notes so that loop optimization can be done. */ + /* In the variable case, do the same as above, but in a loop. Note that we + must be extra careful with variables wrapping around because we might be + at the very top (or the very bottom) of the address space and we have to + be able to handle this case properly; in particular, we use an equality + test for the loop condition. */ else { - rtx test_addr - = force_operand (gen_rtx_fmt_ee (STACK_GROW_OP, Pmode, - stack_pointer_rtx, - GEN_INT (first + STACK_CHECK_PROBE_INTERVAL)), - NULL_RTX); - rtx last_addr - = force_operand (gen_rtx_fmt_ee (STACK_GROW_OP, Pmode, - stack_pointer_rtx, - plus_constant (size, first)), - NULL_RTX); - rtx incr = GEN_INT (STACK_CHECK_PROBE_INTERVAL); + rtx rounded_size, rounded_size_op, test_addr, last_addr, temp; rtx loop_lab = gen_label_rtx (); - rtx test_lab = gen_label_rtx (); rtx end_lab = gen_label_rtx (); - rtx temp; - if (!REG_P (test_addr) - || REGNO (test_addr) < FIRST_PSEUDO_REGISTER) - test_addr = force_reg (Pmode, test_addr); - emit_jump (test_lab); + /* Step 1: round SIZE to the previous multiple of the interval. */ + + /* ROUNDED_SIZE = SIZE & -PROBE_INTERVAL */ + rounded_size + = simplify_gen_binary (AND, Pmode, size, GEN_INT (-PROBE_INTERVAL)); + rounded_size_op = force_operand (rounded_size, NULL_RTX); + + + /* Step 2: compute initial and final value of the loop counter. */ + + /* TEST_ADDR = SP + FIRST. */ + test_addr = force_operand (gen_rtx_fmt_ee (STACK_GROW_OP, Pmode, + stack_pointer_rtx, + GEN_INT (first)), NULL_RTX); + + /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE. */ + last_addr = force_operand (gen_rtx_fmt_ee (STACK_GROW_OP, Pmode, + test_addr, + rounded_size_op), NULL_RTX); + + + /* Step 3: the loop + + while (TEST_ADDR != LAST_ADDR) + { + TEST_ADDR = TEST_ADDR + PROBE_INTERVAL + probe at TEST_ADDR + } + + probes at FIRST + N * PROBE_INTERVAL for values of N from 1 + until it is equal to ROUNDED_SIZE. */ emit_label (loop_lab); - emit_stack_probe (test_addr); -#ifdef STACK_GROWS_DOWNWARD -#define CMP_OPCODE GTU - temp = expand_binop (Pmode, sub_optab, test_addr, incr, test_addr, - 1, OPTAB_WIDEN); -#else -#define CMP_OPCODE LTU - temp = expand_binop (Pmode, add_optab, test_addr, incr, test_addr, + /* Jump to END_LAB if TEST_ADDR == LAST_ADDR. */ + emit_cmp_and_jump_insns (test_addr, last_addr, EQ, NULL_RTX, Pmode, 1, + end_lab); + + /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */ + temp = expand_binop (Pmode, STACK_GROW_OPTAB, test_addr, + GEN_INT (PROBE_INTERVAL), test_addr, 1, OPTAB_WIDEN); -#endif gcc_assert (temp == test_addr); - emit_label (test_lab); - emit_cmp_and_jump_insns (test_addr, last_addr, CMP_OPCODE, - NULL_RTX, Pmode, 1, loop_lab); - emit_jump (end_lab); + /* Probe at TEST_ADDR. */ + emit_stack_probe (test_addr); + + emit_jump (loop_lab); + emit_label (end_lab); - emit_stack_probe (last_addr); + + /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time + that SIZE is equal to ROUNDED_SIZE. */ + + /* TEMP = SIZE - ROUNDED_SIZE. */ + temp = simplify_gen_binary (MINUS, Pmode, size, rounded_size); + if (temp != const0_rtx) + { + rtx addr; + + if (GET_CODE (temp) == CONST_INT) + { + /* Use [base + disp} addressing mode if supported. */ + HOST_WIDE_INT offset = INTVAL (temp); + addr = memory_address (Pmode, + plus_constant (last_addr, + STACK_GROW_OFF (offset))); + } + else + { + /* Manual CSE if the difference is not known at compile-time. */ + temp = gen_rtx_MINUS (Pmode, size, rounded_size_op); + addr = memory_address (Pmode, + gen_rtx_fmt_ee (STACK_GROW_OP, Pmode, + last_addr, temp)); + } + + emit_stack_probe (addr); + } } } - + +/* Adjust the stack by SIZE bytes while probing it. Note that we skip the + probe for the first interval + a small dope of 4 words and instead probe + that many bytes past the specified size to maintain a protection area. */ + +static void +anti_adjust_stack_and_probe (rtx size) +{ + const int dope = 4 * UNITS_PER_WORD; + + /* First ensure SIZE is Pmode. */ + if (GET_MODE (size) != VOIDmode && GET_MODE (size) != Pmode) + size = convert_to_mode (Pmode, size, 1); + + /* If we have a constant small number of probes to generate, that's the + easy case. */ + if (GET_CODE (size) == CONST_INT && INTVAL (size) < 7 * PROBE_INTERVAL) + { + HOST_WIDE_INT isize = INTVAL (size), i; + bool first_probe = true; + + /* Adjust SP and probe to PROBE_INTERVAL + N * PROBE_INTERVAL for + values of N from 1 until it exceeds SIZE. If only one probe is + needed, this will not generate any code. Then adjust and probe + to PROBE_INTERVAL + SIZE. */ + for (i = PROBE_INTERVAL; i < isize; i += PROBE_INTERVAL) + { + if (first_probe) + { + anti_adjust_stack (GEN_INT (2 * PROBE_INTERVAL + dope)); + first_probe = false; + } + else + anti_adjust_stack (GEN_INT (PROBE_INTERVAL)); + emit_stack_probe (stack_pointer_rtx); + } + + if (first_probe) + anti_adjust_stack (plus_constant (size, PROBE_INTERVAL + dope)); + else + anti_adjust_stack (plus_constant (size, PROBE_INTERVAL - i)); + emit_stack_probe (stack_pointer_rtx); + } + + /* In the variable case, do the same as above, but in a loop. Note that we + must be extra careful with variables wrapping around because we might be + at the very top (or the very bottom) of the address space and we have to + be able to handle this case properly; in particular, we use an equality + test for the loop condition. */ + else + { + rtx rounded_size, rounded_size_op, last_addr, temp; + rtx loop_lab = gen_label_rtx (); + rtx end_lab = gen_label_rtx (); + + + /* Step 1: round SIZE to the previous multiple of the interval. */ + + /* ROUNDED_SIZE = SIZE & -PROBE_INTERVAL */ + rounded_size + = simplify_gen_binary (AND, Pmode, size, GEN_INT (-PROBE_INTERVAL)); + rounded_size_op = force_operand (rounded_size, NULL_RTX); + + + /* Step 2: compute initial and final value of the loop counter. */ + + /* SP = SP_0 + PROBE_INTERVAL. */ + anti_adjust_stack (GEN_INT (PROBE_INTERVAL + dope)); + + /* LAST_ADDR = SP_0 + PROBE_INTERVAL + ROUNDED_SIZE. */ + last_addr = force_operand (gen_rtx_fmt_ee (STACK_GROW_OP, Pmode, + stack_pointer_rtx, + rounded_size_op), NULL_RTX); + + + /* Step 3: the loop + + while (SP != LAST_ADDR) + { + SP = SP + PROBE_INTERVAL + probe at SP + } + + adjusts SP and probes to PROBE_INTERVAL + N * PROBE_INTERVAL for + values of N from 1 until it is equal to ROUNDED_SIZE. */ + + emit_label (loop_lab); + + /* Jump to END_LAB if SP == LAST_ADDR. */ + emit_cmp_and_jump_insns (stack_pointer_rtx, last_addr, EQ, NULL_RTX, + Pmode, 1, end_lab); + + /* SP = SP + PROBE_INTERVAL and probe at SP. */ + anti_adjust_stack (GEN_INT (PROBE_INTERVAL)); + emit_stack_probe (stack_pointer_rtx); + + emit_jump (loop_lab); + + emit_label (end_lab); + + + /* Step 4: adjust SP and probe to PROBE_INTERVAL + SIZE if we cannot + assert at compile-time that SIZE is equal to ROUNDED_SIZE. */ + + /* TEMP = SIZE - ROUNDED_SIZE. */ + temp = simplify_gen_binary (MINUS, Pmode, size, rounded_size); + if (temp != const0_rtx) + { + /* Manual CSE if the difference is not known at compile-time. */ + if (GET_CODE (temp) != CONST_INT) + temp = gen_rtx_MINUS (Pmode, size, rounded_size_op); + anti_adjust_stack (temp); + emit_stack_probe (stack_pointer_rtx); + } + } + + /* Adjust back to account for the additional first interval. */ + adjust_stack (GEN_INT (PROBE_INTERVAL + dope)); +} + /* Return an rtx representing the register or memory location in which a scalar value of data type VALTYPE was returned by a function call to function FUNC. diff --git a/gcc/expr.h b/gcc/expr.h index 0eceb6e45be..e84779639b5 100644 --- a/gcc/expr.h +++ b/gcc/expr.h @@ -218,9 +218,9 @@ do { \ #define STACK_CHECK_STATIC_BUILTIN 0 #endif -/* The default interval is one page. */ -#ifndef STACK_CHECK_PROBE_INTERVAL -#define STACK_CHECK_PROBE_INTERVAL 4096 +/* The default interval is one page (4096 bytes). */ +#ifndef STACK_CHECK_PROBE_INTERVAL_EXP +#define STACK_CHECK_PROBE_INTERVAL_EXP 12 #endif /* The default is to do a store into the stack. */ @@ -228,6 +228,11 @@ do { \ #define STACK_CHECK_PROBE_LOAD 0 #endif +/* The default is not to move the stack pointer. */ +#ifndef STACK_CHECK_MOVING_SP +#define STACK_CHECK_MOVING_SP 0 +#endif + /* This is a kludge to try to capture the discrepancy between the old mechanism (generic stack checking) and the new mechanism (static builtin stack checking). STACK_CHECK_PROTECT needs to be bumped @@ -252,7 +257,7 @@ do { \ one probe per function. */ #ifndef STACK_CHECK_MAX_FRAME_SIZE #define STACK_CHECK_MAX_FRAME_SIZE \ - (STACK_CHECK_PROBE_INTERVAL - UNITS_PER_WORD) + ((1 << STACK_CHECK_PROBE_INTERVAL_EXP) - UNITS_PER_WORD) #endif /* This is arbitrary, but should be large enough everywhere. */ @@ -779,10 +784,9 @@ extern void update_nonlocal_goto_save_area (void); extern rtx allocate_dynamic_stack_space (rtx, rtx, int); /* Probe a range of stack addresses from FIRST to FIRST+SIZE, inclusive. - FIRST is a constant and size is a Pmode RTX. These are offsets from the - current stack pointer. STACK_GROWS_DOWNWARD says whether to add or - subtract from the stack. If SIZE is constant, this is done - with a fixed number of probes. Otherwise, we must make a loop. */ + FIRST is a constant and size is a Pmode RTX. These are offsets from + the current stack pointer. STACK_GROWS_DOWNWARD says whether to add + or subtract them from the stack pointer. */ extern void probe_stack_range (HOST_WIDE_INT, rtx); /* Return an rtx that refers to the value returned by a library call diff --git a/gcc/fortran/ChangeLog b/gcc/fortran/ChangeLog index 717ffa025cb..eef8cf8d5a0 100644 --- a/gcc/fortran/ChangeLog +++ b/gcc/fortran/ChangeLog @@ -1,3 +1,43 @@ +2009-11-05 Steven G. Kargl <kargl@gcc.gnu.org> + + PR fortran/41918 + * fortran/trans-decl.c: Silence intent(out) warning for derived type + dummy arguments with default initialization. + +2009-11-05 Janus Weil <janus@gcc.gnu.org> + + PR fortran/41556 + * interface.c (matching_typebound_op,gfc_extend_assign): Handle CLASS + variables. + +2009-11-05 Janus Weil <janus@gcc.gnu.org> + + PR fortran/41556 + PR fortran/41873 + * resolve.c (resolve_function,resolve_call): Prevent abstract interfaces + from being called, but allow deferred type-bound procedures with + abstract interface. + +2009-11-04 Tobias Burnus <burnus@gcc.gnu.org> + Janus Weil <janus@gcc.gnu.org> + + PR fortran/41556 + PR fortran/41937 + * interface.c (gfc_check_operator_interface): Handle CLASS arguments. + * resolve.c (resolve_allocate_expr): Handle allocatable components of + CLASS variables. + +2009-11-04 Richard Guenther <rguenther@suse.de> + + * options.c (gfc_post_options): Rely on common code processing + LTO options. Only enable -fwhole-file here. + +2009-11-03 Tobias Burnus <burnus@net-b.de> + + PR fortran/41907 + * trans-expr.c (gfc_conv_procedure_call): Fix presence check + for optional arguments. + 2009-11-01 Tobias Burnus <burnus@net-b.de> PR fortran/41872 diff --git a/gcc/fortran/interface.c b/gcc/fortran/interface.c index 0fd4742a1de..866a81ca1d8 100644 --- a/gcc/fortran/interface.c +++ b/gcc/fortran/interface.c @@ -626,6 +626,7 @@ gfc_check_operator_interface (gfc_symbol *sym, gfc_intrinsic_op op, - Types and kinds do not conform, and - First argument is of derived type. */ if (sym->formal->sym->ts.type != BT_DERIVED + && sym->formal->sym->ts.type != BT_CLASS && (r1 == 0 || r1 == r2) && (sym->formal->sym->ts.type == sym->formal->next->sym->ts.type || (gfc_numeric_ts (&sym->formal->sym->ts) @@ -2573,13 +2574,16 @@ matching_typebound_op (gfc_expr** tb_base, gfc_actual_arglist* base; for (base = args; base; base = base->next) - if (base->expr->ts.type == BT_DERIVED) + if (base->expr->ts.type == BT_DERIVED || base->expr->ts.type == BT_CLASS) { gfc_typebound_proc* tb; gfc_symbol* derived; gfc_try result; - derived = base->expr->ts.u.derived; + if (base->expr->ts.type == BT_CLASS) + derived = base->expr->ts.u.derived->components->ts.u.derived; + else + derived = base->expr->ts.u.derived; if (op == INTRINSIC_USER) { @@ -2836,7 +2840,7 @@ gfc_extend_assign (gfc_code *c, gfc_namespace *ns) rhs = c->expr2; /* Don't allow an intrinsic assignment to be replaced. */ - if (lhs->ts.type != BT_DERIVED + if (lhs->ts.type != BT_DERIVED && lhs->ts.type != BT_CLASS && (rhs->rank == 0 || rhs->rank == lhs->rank) && (lhs->ts.type == rhs->ts.type || (gfc_numeric_ts (&lhs->ts) && gfc_numeric_ts (&rhs->ts)))) diff --git a/gcc/fortran/options.c b/gcc/fortran/options.c index d2c6d9ba849..3742addb6b1 100644 --- a/gcc/fortran/options.c +++ b/gcc/fortran/options.c @@ -242,27 +242,9 @@ gfc_post_options (const char **pfilename) if (flag_whole_program) gfc_option.flag_whole_file = 1; + /* Enable whole-file mode if LTO is in effect. */ if (flag_lto || flag_whopr) - { -#ifdef ENABLE_LTO - flag_generate_lto = 1; - - /* When generating IL, do not operate in whole-program mode. - Otherwise, symbols will be privatized too early, causing link - errors later. */ - flag_whole_program = 0; - - /* But do enable whole-file mode. */ - gfc_option.flag_whole_file = 1; -#else - error ("LTO support has not been enabled in this configuration"); -#endif - } - - /* Reconcile -flto and -fwhopr. Set additional flags as appropriate and - check option consistency. */ - if (flag_lto && flag_whopr) - error ("-flto and -fwhopr are mutually exclusive"); + gfc_option.flag_whole_file = 1; /* -fbounds-check is equivalent to -fcheck=bounds */ if (flag_bounds_check) diff --git a/gcc/fortran/resolve.c b/gcc/fortran/resolve.c index 5a5fcccc1f8..a721d944b33 100644 --- a/gcc/fortran/resolve.c +++ b/gcc/fortran/resolve.c @@ -2526,7 +2526,9 @@ resolve_function (gfc_expr *expr) return FAILURE; } - if (sym && sym->attr.abstract) + /* If this ia a deferred TBP with an abstract interface (which may + of course be referenced), expr->value.function.name will be set. */ + if (sym && sym->attr.abstract && !expr->value.function.name) { gfc_error ("ABSTRACT INTERFACE '%s' must not be referenced at %L", sym->name, &expr->where); @@ -3138,6 +3140,15 @@ resolve_call (gfc_code *c) } } + /* If this ia a deferred TBP with an abstract interface + (which may of course be referenced), c->expr1 will be set. */ + if (csym && csym->attr.abstract && !c->expr1) + { + gfc_error ("ABSTRACT INTERFACE '%s' must not be referenced at %L", + csym->name, &c->loc); + return FAILURE; + } + /* Subroutines without the RECURSIVE attribution are not allowed to * call themselves. */ if (csym && is_illegal_recursion (csym, gfc_current_ns)) @@ -6198,7 +6209,7 @@ check_symbols: sym = a->expr->symtree->n.sym; /* TODO - check derived type components. */ - if (sym->ts.type == BT_DERIVED) + if (sym->ts.type == BT_DERIVED || sym->ts.type == BT_CLASS) continue; if ((ar->start[i] != NULL diff --git a/gcc/fortran/trans-decl.c b/gcc/fortran/trans-decl.c index 8ac6b9acc19..200c3f5654c 100644 --- a/gcc/fortran/trans-decl.c +++ b/gcc/fortran/trans-decl.c @@ -3776,8 +3776,12 @@ generate_local_decl (gfc_symbol * sym) else if (warn_unused_variable && sym->attr.dummy && sym->attr.intent == INTENT_OUT) - gfc_warning ("Dummy argument '%s' at %L was declared INTENT(OUT) but was not set", - sym->name, &sym->declared_at); + { + if (!(sym->ts.type == BT_DERIVED + && sym->ts.u.derived->components->initializer)) + gfc_warning ("Dummy argument '%s' at %L was declared INTENT(OUT) " + "but was not set", sym->name, &sym->declared_at); + } /* Specific warning for unused dummy arguments. */ else if (warn_unused_variable && sym->attr.dummy) gfc_warning ("Unused dummy argument '%s' at %L", sym->name, diff --git a/gcc/fortran/trans-expr.c b/gcc/fortran/trans-expr.c index d8f8303fdbd..5a45f4f6368 100644 --- a/gcc/fortran/trans-expr.c +++ b/gcc/fortran/trans-expr.c @@ -2998,16 +2998,19 @@ gfc_conv_procedure_call (gfc_se * se, gfc_symbol * sym, only needed when passing an array to an elemental procedure as then array elements are accessed - or no NULL pointer is allowed and a "1" or "0" should be passed if not present. - When passing a deferred array to a non-deferred array dummy, - the array needs to be packed and a check needs thus to be - inserted. */ + When passing a non-array-descriptor full array to a + non-array-descriptor dummy, no check is needed. For + array-descriptor actual to array-descriptor dummy, see + PR 41911 for why a check has to be inserted. + fsym == NULL is checked as intrinsics required the descriptor + but do not always set fsym. */ if (e->expr_type == EXPR_VARIABLE && e->symtree->n.sym->attr.optional && ((e->rank > 0 && sym->attr.elemental) || e->representation.length || e->ts.type == BT_CHARACTER - || (e->rank > 0 && (fsym == NULL - || (fsym->as->type != AS_ASSUMED_SHAPE - && fsym->as->type != AS_DEFERRED))))) + || (e->rank > 0 + && (fsym == NULL || fsym->as->type == AS_ASSUMED_SHAPE + || fsym->as->type == AS_DEFERRED)))) gfc_conv_missing_dummy (&parmse, e, fsym ? fsym->ts : e->ts, e->representation.length); } diff --git a/gcc/gcc-plugin.h b/gcc/gcc-plugin.h index 1792c0393a2..2e36f486262 100644 --- a/gcc/gcc-plugin.h +++ b/gcc/gcc-plugin.h @@ -43,6 +43,7 @@ enum plugin_event PLUGIN_REGISTER_GGC_CACHES, /* Register an extra GGC cache table. */ PLUGIN_ATTRIBUTES, /* Called during attribute registration. */ PLUGIN_START_UNIT, /* Called before processing a translation unit. */ + PLUGIN_PRAGMAS, /* Called during pragma registration. */ PLUGIN_EVENT_LAST /* Dummy event used for indexing callback array. */ }; diff --git a/gcc/gcc.c b/gcc/gcc.c index b033d62ff13..6bc8e150a67 100644 --- a/gcc/gcc.c +++ b/gcc/gcc.c @@ -684,9 +684,15 @@ proper position among the other output files. */ #endif /* config.h can define SWITCHES_NEED_SPACES to control which options - require spaces between the option and the argument. */ + require spaces between the option and the argument. + + We define SWITCHES_NEED_SPACES to include "o" by default. This + causes "-ofoo.o" to be split into "-o foo.o" during the initial + processing of the command-line, before being seen by the specs + machinery. This makes sure we record "foo.o" as the temporary file + to be deleted in the case of error, rather than "-ofoo.o". */ #ifndef SWITCHES_NEED_SPACES -#define SWITCHES_NEED_SPACES "" +#define SWITCHES_NEED_SPACES "o" #endif /* config.h can define ENDFILE_SPEC to override the default crtn files. */ @@ -4562,20 +4568,32 @@ process_command (int argc, const char **argv) } else { + const char *p = strchr (argv[i], '@'); + char *fname; #ifdef HAVE_TARGET_OBJECT_SUFFIX argv[i] = convert_filename (argv[i], 0, access (argv[i], F_OK)); #endif + if (!p) + fname = xstrdup (argv[i]); + else + { + fname = (char *)xmalloc (p - argv[i] + 1); + memcpy (fname, argv[i], p - argv[i]); + fname[p - argv[i]] = '\0'; + } + + if (strcmp (fname, "-") != 0 && access (fname, F_OK) < 0) + { + perror_with_name (fname); + error_count++; + } + else + { + infiles[n_infiles].language = spec_lang; + infiles[n_infiles++].name = argv[i]; + } - if (strcmp (argv[i], "-") != 0 && access (argv[i], F_OK) < 0) - { - perror_with_name (argv[i]); - error_count++; - } - else - { - infiles[n_infiles].language = spec_lang; - infiles[n_infiles++].name = argv[i]; - } + free (fname); } } diff --git a/gcc/ira.c b/gcc/ira.c index 962d0994c36..a3e899f8313 100644 --- a/gcc/ira.c +++ b/gcc/ira.c @@ -1442,6 +1442,9 @@ ira_setup_eliminable_regset (void) int need_fp = (! flag_omit_frame_pointer || (cfun->calls_alloca && EXIT_IGNORE_STACK) + /* We need the frame pointer to catch stack overflow exceptions + if the stack pointer is moving. */ + || (flag_stack_check && STACK_CHECK_MOVING_SP) || crtl->accesses_prior_frames || crtl->stack_realign_needed || targetm.frame_pointer_required ()); diff --git a/gcc/lto/ChangeLog b/gcc/lto/ChangeLog index 0d464ec232e..3334de7eb95 100644 --- a/gcc/lto/ChangeLog +++ b/gcc/lto/ChangeLog @@ -1,3 +1,9 @@ +2009-11-04 Richard Guenther <rguenther@suse.de> + Rafael Avila de Espindola <espindola@google.com> + + * lto-elf.c (lto_elf_build_section_table): Add the base offset. + (lto_elf_file_open): Handle offsets in arguments name@offest. + 2009-10-30 Richard Guenther <rguenther@suse.de> PR lto/41858 diff --git a/gcc/lto/lto-elf.c b/gcc/lto/lto-elf.c index 190430b646c..ee587f7a750 100644 --- a/gcc/lto/lto-elf.c +++ b/gcc/lto/lto-elf.c @@ -167,9 +167,11 @@ lto_elf_build_section_table (lto_file *lto_file) lto_elf_file *elf_file = (lto_elf_file *)lto_file; htab_t section_hash_table; Elf_Scn *section; + size_t base_offset; section_hash_table = htab_create (37, hash_name, eq_name, free); + base_offset = elf_getbase (elf_file->elf); for (section = elf_getscn (elf_file->elf, 0); section; section = elf_nextscn (elf_file->elf, section)) @@ -206,7 +208,7 @@ lto_elf_build_section_table (lto_file *lto_file) new_slot->name = new_name; /* The offset into the file for this section. */ - new_slot->start = shdr->sh_offset; + new_slot->start = base_offset + shdr->sh_offset; new_slot->len = shdr->sh_size; *slot = new_slot; } @@ -530,7 +532,6 @@ init_ehdr (lto_elf_file *elf_file) } } - /* Open ELF file FILENAME. If WRITABLE is true, the file is opened for write and, if necessary, created. Otherwise, the file is opened for reading. Returns the opened file. */ @@ -540,18 +541,42 @@ lto_elf_file_open (const char *filename, bool writable) { lto_elf_file *elf_file; lto_file *result; + off_t offset; + const char *offset_p; + char *fname; + + offset_p = strchr (filename, '@'); + if (!offset_p) + { + fname = xstrdup (filename); + offset = 0; + } + else + { + int64_t t; + fname = (char *) xmalloc (offset_p - filename + 1); + memcpy (fname, filename, offset_p - filename); + fname[offset_p - filename] = '\0'; + offset_p++; + sscanf(offset_p, "%" PRId64 , &t); + offset = t; + /* elf_rand expects the offset to point to the ar header, not the + object itself. Subtract the size of the ar header (60 bytes). + We don't uses sizeof (struct ar_hd) to avoid including ar.h */ + offset -= 60; + } /* Set up. */ elf_file = XCNEW (lto_elf_file); result = (lto_file *) elf_file; - lto_file_init (result, filename); + lto_file_init (result, fname); elf_file->fd = -1; /* Open the file. */ - elf_file->fd = open (filename, writable ? O_WRONLY|O_CREAT : O_RDONLY, 0666); + elf_file->fd = open (fname, writable ? O_WRONLY|O_CREAT : O_RDONLY, 0666); if (elf_file->fd == -1) { - error ("could not open file %s", filename); + error ("could not open file %s", fname); goto fail; } @@ -571,6 +596,26 @@ lto_elf_file_open (const char *filename, bool writable) goto fail; } + if (offset != 0) + { + Elf *e; + off_t t = elf_rand (elf_file->elf, offset); + if (t != offset) + { + error ("could not seek in archive"); + goto fail; + } + + e = elf_begin (elf_file->fd, ELF_C_READ, elf_file->elf); + if (e == NULL) + { + error("could not find archive member"); + goto fail; + } + elf_end (elf_file->elf); + elf_file->elf = e; + } + if (writable) { init_ehdr (elf_file); diff --git a/gcc/opts.c b/gcc/opts.c index 72411b662cc..b2b6c4454ee 100644 --- a/gcc/opts.c +++ b/gcc/opts.c @@ -1117,6 +1117,28 @@ decode_options (unsigned int argc, const char **argv) PARAM_VALUE (PARAM_STACK_FRAME_GROWTH) = 40; } + if (flag_lto || flag_whopr) + { +#ifdef ENABLE_LTO + flag_generate_lto = 1; + + /* When generating IL, do not operate in whole-program mode. + Otherwise, symbols will be privatized too early, causing link + errors later. */ + flag_whole_program = 0; + + /* FIXME lto. Disable var-tracking until debug information + is properly handled in free_lang_data. */ + flag_var_tracking = 0; +#else + error ("LTO support has not been enabled in this configuration"); +#endif + } + + /* Reconcile -flto and -fwhopr. Set additional flags as appropriate and + check option consistency. */ + if (flag_lto && flag_whopr) + error ("-flto and -fwhopr are mutually exclusive"); } #define LEFT_COLUMN 27 diff --git a/gcc/plugin.c b/gcc/plugin.c index 18b7c8aecad..2d64422787e 100644 --- a/gcc/plugin.c +++ b/gcc/plugin.c @@ -58,7 +58,9 @@ const char *plugin_event_name[] = "PLUGIN_GGC_END", "PLUGIN_REGISTER_GGC_ROOTS", "PLUGIN_REGISTER_GGC_CACHES", - "PLUGIN_START_UNIT", + "PLUGIN_ATTRIBUTES", + "PLUGIN_START_UNIT", + "PLUGIN_PRAGMAS", "PLUGIN_EVENT_LAST" }; @@ -325,6 +327,7 @@ register_callback (const char *plugin_name, case PLUGIN_GGC_MARKING: case PLUGIN_GGC_END: case PLUGIN_ATTRIBUTES: + case PLUGIN_PRAGMAS: case PLUGIN_FINISH: { struct callback_info *new_callback; @@ -344,7 +347,7 @@ register_callback (const char *plugin_name, break; case PLUGIN_EVENT_LAST: default: - error ("Unkown callback event registered by plugin %s", + error ("Unknown callback event registered by plugin %s", plugin_name); } } @@ -368,6 +371,7 @@ invoke_plugin_callbacks (enum plugin_event event, void *gcc_data) case PLUGIN_FINISH_UNIT: case PLUGIN_CXX_CP_PRE_GENERICIZE: case PLUGIN_ATTRIBUTES: + case PLUGIN_PRAGMAS: case PLUGIN_FINISH: case PLUGIN_GGC_START: case PLUGIN_GGC_MARKING: diff --git a/gcc/rtlanal.c b/gcc/rtlanal.c index d14bbe58bf7..ab88f23a379 100644 --- a/gcc/rtlanal.c +++ b/gcc/rtlanal.c @@ -2252,6 +2252,11 @@ may_trap_p_1 (const_rtx x, unsigned flags) /* Memory ref can trap unless it's a static var or a stack slot. */ case MEM: + /* Recognize specific pattern of stack checking probes. */ + if (flag_stack_check + && MEM_VOLATILE_P (x) + && XEXP (x, 0) == stack_pointer_rtx) + return 1; if (/* MEM_NOTRAP_P only relates to the actual position of the memory reference; moving it out of context such as when moving code when optimizing, might cause its address to become invalid. */ @@ -4517,8 +4522,16 @@ num_sign_bit_copies1 (const_rtx x, enum machine_mode mode, const_rtx known_x, known_x, known_mode, known_ret); case UMOD: - /* The result must be <= the second operand. */ - return cached_num_sign_bit_copies (XEXP (x, 1), mode, + /* The result must be <= the second operand. If the second operand + has (or just might have) the high bit set, we know nothing about + the number of sign bit copies. */ + if (bitwidth > HOST_BITS_PER_WIDE_INT) + return 1; + else if ((nonzero_bits (XEXP (x, 1), mode) + & ((HOST_WIDE_INT) 1 << (bitwidth - 1))) != 0) + return 1; + else + return cached_num_sign_bit_copies (XEXP (x, 1), mode, known_x, known_mode, known_ret); case DIV: diff --git a/gcc/simplify-rtx.c b/gcc/simplify-rtx.c index 39a791d9890..ee119bcd65c 100644 --- a/gcc/simplify-rtx.c +++ b/gcc/simplify-rtx.c @@ -2946,6 +2946,9 @@ simplify_binary_operation_1 (enum rtx_code code, enum machine_mode mode, tmp_op, gen_rtx_PARALLEL (VOIDmode, vec)); return tmp; } + if (GET_CODE (trueop0) == VEC_DUPLICATE + && GET_MODE (XEXP (trueop0, 0)) == mode) + return XEXP (trueop0, 0); } else { diff --git a/gcc/system.h b/gcc/system.h index 0c846cfbdd6..03910d0152f 100644 --- a/gcc/system.h +++ b/gcc/system.h @@ -761,7 +761,7 @@ extern void fancy_abort (const char *, int, const char *) ATTRIBUTE_NORETURN; TARGET_ASM_EXCEPTION_SECTION TARGET_ASM_EH_FRAME_SECTION \ SMALL_ARG_MAX ASM_OUTPUT_SHARED_BSS ASM_OUTPUT_SHARED_COMMON \ ASM_OUTPUT_SHARED_LOCAL UNALIGNED_WORD_ASM_OP \ - ASM_MAKE_LABEL_LINKONCE + ASM_MAKE_LABEL_LINKONCE STACK_CHECK_PROBE_INTERVAL /* Hooks that are no longer used. */ #pragma GCC poison LANG_HOOKS_FUNCTION_MARK LANG_HOOKS_FUNCTION_FREE \ diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 672d36df211..bf743708f87 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,251 @@ +2009-11-06 Basile Starynkevitch <basile@starynkevitch.net> + + * g++.dg/plugin/pragma_plugin-test-1.C: new testcase for + PLUGIN_PRAGMAS. + * g++.dg/plugin/pragma_plugin.c: new test plugin for + PLUGIN_PRAGMAS. + * g++.dg/plugin/plugin.exp (plugin_test_list): Add pragma_plugin.c and + pragma_plugin-test-1.C. + +2009-11-06 Andrew Pinski <andrew_pinski@playstation.sony.com> + + PR c++/41536 + * g++.dg/ext/always_inline-5.C: New test. + +2009-11-06 Jakub Jelinek <jakub@redhat.com> + + PR c++/41967 + * g++.dg/gomp/pr41967.C: New test. + +2009-11-06 Michael Matz <matz@suse.de> + + PR middle-end/41963 + * gcc.dg/pr41963.c: New test. + +2009-11-06 Jakub Jelinek <jakub@redhat.com> + + PR middle-end/41935 + * gcc.dg/pr41935.c: New test. + * c-c++-common/pr41935.c: New test. + * c-c++-common/builtin-offsetof.c (f0): Allow index one past the last + element. + * gcc.c-torture/execute/pr41935.c: New test. + +2009-11-05 Jason Merrill <jason@redhat.com> + + PR c++/34180 + * g++.dg/init/synth2.C: New. + + PR c++/7046 + * g++.dg/abi/pragma-pack1.C: New. + + PR c++/34870 + * g++.dg/lookup/koenig7.C: New. + + PR c++/41703 + * g++.dg/template/partial6.C: New. + +2009-11-05 Janus Weil <janus@gcc.gnu.org> + + PR fortran/41556 + * gfortran.dg/class_12.f03: New test. + +2009-11-05 Jakub Jelinek <jakub@redhat.com> + + * gcc.target/i386/i386.exp (check_effective_target_xop): Fix typo + in builtin's name. + +2009-11-04 Jason Merrill <jason@redhat.com> + + PR c++/36912 + * g++.dg/init/static-init2.C: New. + +2009-11-05 Janus Weil <janus@gcc.gnu.org> + + PR fortran/41556 + PR fortran/41873 + * gfortran.dg/interface_abstract_4.f90: New test. + +2009-11-05 Maxim Kuvyrkov <maxim@codesourcery.com> + + * gcc.target/m68k/pr41302.c: Fix target triplet. + +2009-11-04 Jason Merrill <jason@redhat.com> + + PR c++/39413 + * g++.dg/template/overload11.C: New. + * g++.dg/template/nested3.C: Adjust. + +2009-11-04 Eric Botcazou <ebotcazou@adacore.com> + + PR ada/20548 + * ada/acats/norun.lst: Remove the stack checking tests. + * ada/acats/run_acats: Limit the stack to 8MB. + +2009-11-04 Janus Weil <janus@gcc.gnu.org> + + PR fortran/41556 + PR fortran/41937 + * gfortran.dg/class_11.f03: New test. + +2009-11-04 Jason Merrill <jason@redhat.com> + + PR c++/35067 + * g++.dg/abi/thunk5.C: New. + +2009-11-04 Andrew Pinski <andrew_pinski@playstation.sony.com> + + PR rtl-opt/41833 + * gcc.target/powerpc/altivec-33.c: New testcase. + +2009-11-04 Jason Merrill <jason@redhat.com> + + PR c++/17365, DR 218 + * g++.dg/lookup/koenig6.C: New. + * g++.dg/lookup/koenig5.C: Adjust. + * g++.dg/template/crash56.C: Adjust. + * g++.old-deja/g++.ns/koenig5.C: Adjust. + +2009-11-04 Harsha Jagasia <harsha.jagasia@amd.com> + Dwarakanath Rajagopal <dwarak.rajagopal@amd.com> + + * gcc.target/i386/xop-check.h: New file. + * gcc.target/i386/xop-hadduX.c: Ditto. + * gcc.target/i386/xop-haddX.c: Ditto. + * gcc.target/i386/xop-hsubX.c: Ditto. + * gcc.target/i386/xop-imul32widen-vector.c: Ditto. + * gcc.target/i386/xop-imul32widen-vector.c: Ditto. + * gcc.target/i386/xop-pcmov2.c: Ditto. + * gcc.target/i386/xop-pcmov.c: Ditto. + * gcc.target/i386/xop-rotate1-vector.c: Ditto. + * gcc.target/i386/xop-rotate2-vector.c: Ditto. + * gcc.target/i386/xop-rotate3-vector.c: Ditto. + * gcc.target/i386/xop-shift1-vector.c: Ditto. + * gcc.target/i386/xop-shift2-vector.c: Ditto. + * gcc.target/i386/xop-shift3-vector.c: Ditto. + * gcc.target/i386/i386.exp: Add check_effective_target_xop. + * gcc.target/i386/sse-12.c: Update with new compile options to + activate and check xopintrin.h intrinsic file. + * gcc.target/i386/sse-13.c: Ditto. + * gcc.target/i386/sse-14.c: Ditto. + * gcc.target/i386/sse-22.c: Ditto. + * gcc.target/i386/sse-23.c: Ditto. + * g++.dg/other/i386-2.C: Ditto. + * g++.dg/other/i386-3.C: Ditto. + * g++.dg/other/i386-5.C: Ditto. + * g++.dg/other/i386-6.C: Ditto. + +2009-11-04 Wei Guozhi <carrot@google.com> + + PR target/40835 + * gcc.target/arm/pr40835: New testcase. + +2009-11-04 Revital Eres <eres@il.ibm.com> + + * gcc.target/powerpc/vsx-vectorize-3.c: Adjust tetcase following + change in decision of peeling for alignment. + * gcc.target/powerpc/vsx-vectorize-5.c: Likewise. + * gcc.dg/vect/vect-50.c: Likewise. + * gcc.dg/vect/vect-42.c: Likewise. + * gcc.dg/vect/costmodel/i386/costmodel-fast-math-vect-pr29925.c: + Likewise. + * gcc.dg/vect/costmodel/x86_64/costmodel-fast-math-vect-pr29925.c: + Likewise. + * gcc.dg/vect/vect-multitypes-6.c: Likewise. + * gcc.dg/vect/vect-60.c: Likewise. + * gcc.dg/vect/vect-52.c: Likewise. + * gcc.dg/vect/vect-44.c: Likewise. + * gcc.dg/vect/vect-27.c: Likewise. + * gcc.dg/vect/vect-29.c: Likewise. + * gcc.dg/vect/vect-72.c: Likewise. + * gcc.dg/vect/vect-56.c: Likewise. + * gcc.dg/vect/vect-48.c: Likewise. + * gcc.dg/vect/vect-multitypes-3.c: Likewise. + * gfortran.dg/vect/vect-2.f90: Likewise. + * gfortran.dg/vect/vect-5.f90: Likewise. + +2009-11-04 Richard Guenther <rguenther@suse.de> + + PR tree-optimization/41919 + * gcc.c-torture/execute/pr41919.c: New testcase. + +2009-11-04 Carlos O'Donell <carlos@codesourcery.com> + + PR target/41302 + * gcc.target/m68k/pr41302.c: New test. + +2009-11-03 Jason Merrill <jason@redhat.com> + + PR c++/36959 + * g++.dg/opt/inline16.C: New. + +2009-11-03 Eric Botcazou <ebotcazou@adacore.com> + + * gnat.dg/stack_check.adb1: New test. + * gnat.dg/stack_check.adb2: Likewise. + +2009-11-03 Jakub Jelinek <jakub@redhat.com> + + PR rtl-optimization/41917 + * gcc.c-torture/execute/pr41917.c: New test. + +2009-11-03 Uros Bizjak <ubizjak@gmail.com> + + * gcc.target/i386/pr41900.c: Make test compile only. Scan assembler + dump to not include "call *%esp". + +2009-11-03 Jason Merrill <jason@redhat.com> + + PR c++/39786 + * g++.dg/lookup/using22.C: New. + + PR c++/41876 + * g++.dg/parse/eh-decl.C: New. + + PR c++/41927 + * g++.dg/template/sfinae16.C: New. + + PR c++/41815 + * g++.dg/cpp0x/rv-return.C: New. + * g++.dg/cpp0x/deduce.C: Adjust. + + PR c++/40944 + * g++.dg/template/sfinae15.C: New. + + PR c++/40687 + * g++.dg/cpp0x/auto3.C: Remove xfail. + +2009-11-03 Tobias Burnus <burnus@net-b.de> + + PR fortran/41907 + * gfortran.dg/missing_optional_dummy_6.f90: New test. + +2009-11-03 Nick Clifton <nickc@redhat.com> + + * gcc.target/rx/builtins.c: Remove redundant tests. + Add test of MVTIPL instruction. + * gcc.target/rx/interrupts.c: Use fast_interrupt and interrupt + function attributes. Add -msave-acc-in-interrupts option to the + command line. + +2009-11-03 Andrew Stubbs <ams@codesourcery.com> + Maxim Kuvyrkov <maxim@codesourcery.com> + + * g++.dg/torture/pr36191.C: Don't run with -fomit-frame-pointer on + sh, m68k and fido. + +2009-11-03 Dodji Seketeli <dodji@redhat.com> + + PR c++/38699 + * c-c++-common/dfp/builtin-offsetof.c: Moved to ... + * c-c++-common/builtin-offsetof.c: ... here. + +2009-11-03 Dodji Seketeli <dodji@redhat.com> + + PR c++/38699 + * c-c++-common/dfp/builtin-offsetof.c: New test. + * g++.dg/other/offsetof6.C: Likewise. + 2009-11-03 Uros Bizjak <ubizjak@gmail.com> PR target/41900 @@ -1400,18 +1648,15 @@ * gcc.c-torture/execute/builtins/builtins.exp: Likewise. * gcc.c-torture/execute/ieee/ieee.exp: Likewise. * gcc.c-torture/unsorted/unsorted.exp: Likewise. - * gcc.target/i386/math-torture/math-torture.exp: - Likewise. + * gcc.target/i386/math-torture/math-torture.exp: Likewise. * gcc.dg/lto/lto.exp: New. * gfortran.dg/lto/lto.exp: New. * lib/target-supports.exp (check_effective_target_lto): New. - * lib/c-torture.exp: Load target-supports.exp. - Define LTO_TORTURE_OPTIONS if check_effective_target_lto - returns nonzero. + * lib/c-torture.exp: Load target-supports.exp. Define + LTO_TORTURE_OPTIONS if check_effective_target_lto returns nonzero. * lib/gcc-dg.exp: Likewise. * lib/lto.exp: New. - * lib/torture-options.exp: Add support for a third - argument. + * lib/torture-options.exp: Add support for a third argument. 2009-10-03 Uros Bizjak <ubizjak@gmail.com> diff --git a/gcc/testsuite/ada/acats/norun.lst b/gcc/testsuite/ada/acats/norun.lst index 8441024b241..c0b08e42c07 100644 --- a/gcc/testsuite/ada/acats/norun.lst +++ b/gcc/testsuite/ada/acats/norun.lst @@ -1,10 +1,2 @@ -c52103x -c52104x -c52104y -cb1010a -cb1010c -cb1010d templat # Tests must be sorted in alphabetical order -# c52103x, c52104x, c52104y: -fstack-check doesn't work, PR middle-end/20548 -# cb1010a, cb1010c, cb1010d: likewise diff --git a/gcc/testsuite/ada/acats/run_acats b/gcc/testsuite/ada/acats/run_acats index 05f3ff57df5..acb6ab57151 100755 --- a/gcc/testsuite/ada/acats/run_acats +++ b/gcc/testsuite/ada/acats/run_acats @@ -52,4 +52,7 @@ echo exec gnatmake '"$@"' >> host_gnatmake chmod +x host_gnatmake +# Limit the stack to 8MB for stack checking +ulimit -s 8192 + exec $testdir/run_all.sh ${1+"$@"} diff --git a/gcc/testsuite/c-c++-common/builtin-offsetof.c b/gcc/testsuite/c-c++-common/builtin-offsetof.c new file mode 100644 index 00000000000..6d97775467d --- /dev/null +++ b/gcc/testsuite/c-c++-common/builtin-offsetof.c @@ -0,0 +1,29 @@ +// Contributed by Dodji Seketeli <dodji@redhat.com> +// Origin PR c++/38699 +// { dg-options "-Warray-bounds" } +// { dg-do compile } + +struct A +{ + const char *p; +}; + +struct B +{ + char p[10]; + struct A a; +}; + +void +f0 () +{ + __builtin_offsetof(struct A, p); // OK + __builtin_offsetof(struct A, p[0]); // { dg-error "non constant address" } + __builtin_offsetof(struct B, p[0]); // OK + __builtin_offsetof(struct B, p[9]); // OK + __builtin_offsetof(struct B, p[10]); // OK + __builtin_offsetof(struct B, p[11]); // { dg-warning "greater than size" } + __builtin_offsetof(struct B, a.p); // OK + __builtin_offsetof(struct B, p[0]); // OK + __builtin_offsetof(struct B, a.p[0]); // { dg-error "non constant address" } +} diff --git a/gcc/testsuite/c-c++-common/pr41935.c b/gcc/testsuite/c-c++-common/pr41935.c new file mode 100644 index 00000000000..3279e75593d --- /dev/null +++ b/gcc/testsuite/c-c++-common/pr41935.c @@ -0,0 +1,70 @@ +/* { dg-options "-Warray-bounds" } */ +/* { dg-do compile } */ + +struct A +{ + int i; + char p[1]; +}; + +struct B +{ + struct A a; + int i; +}; + +struct C +{ + int i; + struct A a; +}; + +union D +{ + char p[1]; + struct A a; + struct B b; + struct C c; +}; + +struct E +{ + int i; + union D d; +}; + +struct F +{ + union D d; + int i; +}; + +union G +{ + int i; + union D d; +}; + +void +f0 () +{ + __builtin_offsetof (struct A, p[4]); /* OK */ + __builtin_offsetof (struct B, a.p[4]); /* { dg-warning "greater than size" } */ + __builtin_offsetof (struct C, a.p[4]); /* OK */ + __builtin_offsetof (union D, p[4]); /* OK */ + __builtin_offsetof (union D, a.p[4]); /* OK */ + __builtin_offsetof (union D, b.a.p[4]); /* { dg-warning "greater than size" } */ + __builtin_offsetof (union D, c.a.p[4]); /* OK */ + __builtin_offsetof (struct E, d.p[4]); /* OK */ + __builtin_offsetof (struct E, d.a.p[4]); /* OK */ + __builtin_offsetof (struct E, d.b.a.p[4]); /* { dg-warning "greater than size" } */ + __builtin_offsetof (struct E, d.c.a.p[4]); /* OK */ + __builtin_offsetof (struct F, d.p[4]); /* { dg-warning "greater than size" } */ + __builtin_offsetof (struct F, d.a.p[4]); /* { dg-warning "greater than size" } */ + __builtin_offsetof (struct F, d.b.a.p[4]); /* { dg-warning "greater than size" } */ + __builtin_offsetof (struct F, d.c.a.p[4]); /* { dg-warning "greater than size" } */ + __builtin_offsetof (union G, d.p[4]); /* OK */ + __builtin_offsetof (union G, d.a.p[4]); /* OK */ + __builtin_offsetof (union G, d.b.a.p[4]); /* { dg-warning "greater than size" } */ + __builtin_offsetof (union G, d.c.a.p[4]); /* OK */ +} diff --git a/gcc/testsuite/g++.dg/abi/pragma-pack1.C b/gcc/testsuite/g++.dg/abi/pragma-pack1.C new file mode 100644 index 00000000000..d90fc200cbf --- /dev/null +++ b/gcc/testsuite/g++.dg/abi/pragma-pack1.C @@ -0,0 +1,37 @@ +// PR c++/7046 + +extern "C" int printf (const char *, ...); + +#pragma pack(4) + +template <typename X > +struct T +{ + char x1; /* Usually 3 padding bytes are added after x1 member. */ + int x2; +}; + +template <class T> +int f() +{ + struct A { char i1; int i2; }; + return sizeof (A); +} + +#pragma pack(1) +template struct T<int>; /* T<int> is instantiated here */ +template int f<int>(); + +#pragma pack(4) +template struct T<float>; /* T<float> is instantiated here */ +template int f<double>(); + +int main() +{ + printf("sizeof T<int> = %d\n", sizeof(T<int>)); + printf("sizeof T<float> = %d\n", sizeof(T<float>)); + printf("f<int>() = %d\n", f<int>()); + printf("f<float>() = %d\n", f<float>()); + return (sizeof(T<int>) != sizeof(T<float>) + || f<int>() != f<float>()); +} diff --git a/gcc/testsuite/g++.dg/abi/thunk5.C b/gcc/testsuite/g++.dg/abi/thunk5.C new file mode 100644 index 00000000000..15526bf9ee3 --- /dev/null +++ b/gcc/testsuite/g++.dg/abi/thunk5.C @@ -0,0 +1,13 @@ +// PR c++/35067 +// The thunks should be weak even on targets without one-only support. +// { dg-require-weak "" } +// { dg-final { scan-assembler "weak.*ZTv" } } + +struct A +{ + virtual ~A() { } +}; + +struct B: virtual A { }; + +B b; diff --git a/gcc/testsuite/g++.dg/cpp0x/auto3.C b/gcc/testsuite/g++.dg/cpp0x/auto3.C index 3cea856e601..f792c07b169 100644 --- a/gcc/testsuite/g++.dg/cpp0x/auto3.C +++ b/gcc/testsuite/g++.dg/cpp0x/auto3.C @@ -7,7 +7,7 @@ auto x; // { dg-error "auto" } // If the type deduced for the template parameter U is not the same in each // deduction, the program is ill-formed. -auto i = 42, j = 42.0; // { dg-error "" "" { xfail *-*-* } } +auto i = 42, j = 42.0; // { dg-error "auto" } // New CWG issue auto a[2] = { 1, 2 }; // { dg-error "auto" } diff --git a/gcc/testsuite/g++.dg/cpp0x/deduce.C b/gcc/testsuite/g++.dg/cpp0x/deduce.C index 6bd05160898..635228cca08 100644 --- a/gcc/testsuite/g++.dg/cpp0x/deduce.C +++ b/gcc/testsuite/g++.dg/cpp0x/deduce.C @@ -5,7 +5,7 @@ template<typename T> struct same_type<T, T> {}; int lval_int; int rval_int(); int const lval_const_int=0; -int const rval_const_int(); +int const&& rval_const_int(); template <typename T> void deduce_lval_int(T && t) { diff --git a/gcc/testsuite/g++.dg/cpp0x/rv-return.C b/gcc/testsuite/g++.dg/cpp0x/rv-return.C new file mode 100644 index 00000000000..e52101feae1 --- /dev/null +++ b/gcc/testsuite/g++.dg/cpp0x/rv-return.C @@ -0,0 +1,18 @@ +// PR c++/41815 +// { dg-options -std=c++0x } + +template<typename T, typename U> struct same_type; +template<typename T> struct same_type<T, T> {}; + +int const f() { return 0; } + +int &&r = f(); // binding "int&&" to "int" should succeed +same_type<decltype(f()), int const> s1; +same_type<decltype(0,f()), int> s2; + +template <class T> +T const g() { return 0; } + +int &&r2 = g<int>(); +same_type<decltype(g<int>()), int const> s3; +same_type<decltype(0,g<int>()), int> s4; diff --git a/gcc/testsuite/g++.dg/ext/always_inline-5.C b/gcc/testsuite/g++.dg/ext/always_inline-5.C new file mode 100644 index 00000000000..73caa094f47 --- /dev/null +++ b/gcc/testsuite/g++.dg/ext/always_inline-5.C @@ -0,0 +1,28 @@ +// { dg-do compile } +struct f +{ + inline f(void); + inline void f1(void); + int a; +}; + +inline __attribute__((always_inline)) f::f(void) +{ + a++; +} + +inline __attribute__((always_inline)) void f::f1(void) +{ + a++; +} + +void g(void) +{ + f a, b, c, d; + a.f1(); +} + +// f::f() should be inlined even at -O0 +// { dg-final { scan-assembler-not "_ZN1fC1Ev" } } +// Likewise for f::f1() +// { dg-final { scan-assembler-not "_ZN1f2f1Ev" } } diff --git a/gcc/testsuite/g++.dg/gomp/pr41967.C b/gcc/testsuite/g++.dg/gomp/pr41967.C new file mode 100644 index 00000000000..0eb489e8bee --- /dev/null +++ b/gcc/testsuite/g++.dg/gomp/pr41967.C @@ -0,0 +1,17 @@ +// PR c++/41967 +// { dg-do compile } +// { dg-options "-fopenmp" } + +int +foo () +{ + int sum = 0; +#pragma omp for collapse(2) + for (int i = 0; i < 5; ++i) + { + for (int j = 0; j < 5; ++j) + ++sum; + ++sum; // { dg-error "collapsed loops not perfectly nested" } + } + return sum; +} diff --git a/gcc/testsuite/g++.dg/init/static-init2.C b/gcc/testsuite/g++.dg/init/static-init2.C new file mode 100644 index 00000000000..34bf2b2388b --- /dev/null +++ b/gcc/testsuite/g++.dg/init/static-init2.C @@ -0,0 +1,3 @@ +// PR c++/36912 +// { dg-options -frounding-math } +const double c = .1, d = c+1; diff --git a/gcc/testsuite/g++.dg/init/synth2.C b/gcc/testsuite/g++.dg/init/synth2.C new file mode 100644 index 00000000000..795ce42893c --- /dev/null +++ b/gcc/testsuite/g++.dg/init/synth2.C @@ -0,0 +1,17 @@ +// PR c++/34180 + +struct G { + G(); // { dg-message "" "candidate" } + G(G&); // { dg-message "" "candidate" } +}; + +class A +{ // { dg-error "no match" } + const G g; +}; + +int main() +{ + A a; + A b = a; // { dg-message "required here" } +} diff --git a/gcc/testsuite/g++.dg/lookup/koenig5.C b/gcc/testsuite/g++.dg/lookup/koenig5.C index 139e3b86684..6ecc25daadb 100644 --- a/gcc/testsuite/g++.dg/lookup/koenig5.C +++ b/gcc/testsuite/g++.dg/lookup/koenig5.C @@ -8,39 +8,39 @@ namespace N { struct A {}; - void One (...); // { dg-error "conflict with" "" } - void (*Two) (...); // { dg-error "not a function" "" } - namespace Three {} // { dg-error "lookup finds|not a function" "" } + void One (...); + void (*Two) (...); + namespace Three {} } namespace M { struct B {}; - struct One {}; // { dg-error "lookup finds|not a function" "" } - void (*Two) (...); // { dg-error "conflict with" "" } - void Three (...); // { dg-error "conflict with" "" } + struct One {}; + void (*Two) (...); + void Three (...); } namespace O { struct C {}; - void Two (...); // { dg-error "conflict with" "" } + void Two (...); } void g (N::A *a, M::B *b, O::C *c) { One (a); // ok - One (b); // { dg-error "in call to" "" } - One (a, b); // { dg-error "in call to" "" } + One (a, b); // ok + One (b); // { dg-error "not declared" } - Two (a); // ok - Two (a, a); // ok - Two (b); // ok Two (c); // ok - Two (a, b); // { dg-error "in call to" "" } - Two (a, c); // { dg-error "in call to" "" } + Two (a, c); // ok + Two (a); // { dg-error "not declared" } + Two (a, a); // error masked by earlier error + Two (b); // error masked by earlier error + Two (a, b); // error masked by earlier error - Three (a); // { dg-error "in call to" "" } Three (b); // ok - Three (a, b); // { dg-error "in call to" "" } + Three (a, b); // ok + Three (a); // { dg-error "not declared" } } diff --git a/gcc/testsuite/g++.dg/lookup/koenig6.C b/gcc/testsuite/g++.dg/lookup/koenig6.C new file mode 100644 index 00000000000..9fdf771e0cf --- /dev/null +++ b/gcc/testsuite/g++.dg/lookup/koenig6.C @@ -0,0 +1,18 @@ +// PR c++/17365 +// ADL should not find B::N. + +namespace A +{ + namespace B + { + template <typename T> struct N {int n_;}; + } + template <typename T> int N( T p ) { return p->n_; } + template <typename T> void f( T p ) { N(p); } // #1 +} +int main() +{ + A::B::N<int> n; + A::f(&n); + return 0; +} diff --git a/gcc/testsuite/g++.dg/lookup/koenig7.C b/gcc/testsuite/g++.dg/lookup/koenig7.C new file mode 100644 index 00000000000..bc54ba96b6b --- /dev/null +++ b/gcc/testsuite/g++.dg/lookup/koenig7.C @@ -0,0 +1,12 @@ +// PR c++/34870 + +template <typename T> +struct Foo +{ + friend void func(const Foo &) {} +}; + +void check(const Foo<int> & x) +{ + func(x); +} diff --git a/gcc/testsuite/g++.dg/lookup/using22.C b/gcc/testsuite/g++.dg/lookup/using22.C new file mode 100644 index 00000000000..2396f9ab185 --- /dev/null +++ b/gcc/testsuite/g++.dg/lookup/using22.C @@ -0,0 +1,17 @@ +// PR c++/39786 + +namespace A { + char (*f(char *p))[13] { return 0; } +} + +namespace B { + namespace C { + char (*f(int p))[42] { return 0; } + } + using namespace C; +} + +using namespace B; +using namespace A; + +char x[sizeof *::f(0) == 42 ? 1 : -1]; diff --git a/gcc/testsuite/g++.dg/opt/inline16.C b/gcc/testsuite/g++.dg/opt/inline16.C new file mode 100644 index 00000000000..6ee6d76d925 --- /dev/null +++ b/gcc/testsuite/g++.dg/opt/inline16.C @@ -0,0 +1,19 @@ +// PR c++/36959 +// We shouldn't have to emit fromSlotB just because we need shuf_BZZZ. +// { dg-options -O } +// { dg-final { scan-assembler-not "_ZL9fromSlotBv" } } + +static inline int *fromSlotB(void) +{ + static int shuf_BZZZ = 1; + return &shuf_BZZZ; +} + +int *p; + +int main(void) +{ + p = fromSlotB(); + return (*p != 1); +} + diff --git a/gcc/testsuite/g++.dg/other/i386-2.C b/gcc/testsuite/g++.dg/other/i386-2.C index 4c9579d07cd..1ef6b2775d8 100644 --- a/gcc/testsuite/g++.dg/other/i386-2.C +++ b/gcc/testsuite/g++.dg/other/i386-2.C @@ -1,8 +1,9 @@ -/* Test that {,x,e,p,t,s,w,a,i}mmintrin.h, fma4intrin.h, mm3dnow.h and +/* Test that {,x,e,p,t,s,w,a,i}mmintrin.h, fma4intrin.h, xopintrin.h, mm3dnow.h and mm_malloc.h are usable with -O -pedantic-errors. */ /* { dg-do compile { target i?86-*-* x86_64-*-* } } */ -/* { dg-options "-O -pedantic-errors -march=k8 -m3dnow -mavx -msse4a -mfma4 -maes -mpclmul" } */ +/* { dg-options "-O -pedantic-errors -march=k8 -m3dnow -mavx -msse4a -mfma4 -mxop -maes -mpclmul" } */ #include <x86intrin.h> int dummy; + diff --git a/gcc/testsuite/g++.dg/other/i386-3.C b/gcc/testsuite/g++.dg/other/i386-3.C index b9e89169ccb..df33af8bbd9 100644 --- a/gcc/testsuite/g++.dg/other/i386-3.C +++ b/gcc/testsuite/g++.dg/other/i386-3.C @@ -1,6 +1,6 @@ -/* Test that {,x,e,p,t,s,w,a,i}mmintrin.h, fma4intrin.h, mm3dnow.h and +/* Test that {,x,e,p,t,s,w,a,i}mmintrin.h, fma4intrin.h, mm3dnow.h, xopintrin.h and mm_malloc.h are usable with -O -fkeep-inline-functions. */ /* { dg-do compile { target i?86-*-* x86_64-*-* } } */ -/* { dg-options "-O -fkeep-inline-functions -march=k8 -m3dnow -mavx -msse4a -mfma4 -maes -mpclmul" } */ +/* { dg-options "-O -fkeep-inline-functions -march=k8 -m3dnow -mavx -msse4a -mfma4 -mxop -maes -mpclmul" } */ #include <x86intrin.h> diff --git a/gcc/testsuite/g++.dg/other/i386-5.C b/gcc/testsuite/g++.dg/other/i386-5.C index 6dcb2d3b0d0..383aae365bb 100644 --- a/gcc/testsuite/g++.dg/other/i386-5.C +++ b/gcc/testsuite/g++.dg/other/i386-5.C @@ -1,6 +1,6 @@ -/* Test that {,x,e,p,t,s,w,a,i}mmintrin.h, fma4intrin.h, mm3dnow.h and +/* Test that {,x,e,p,t,s,w,a,i}mmintrin.h, fma4intrin.h, xopintrin.h, mm3dnow.h and mm_malloc.h are usable with -O -fkeep-inline-functions. */ /* { dg-do compile { target i?86-*-* x86_64-*-* } } */ -/* { dg-options "-O -fkeep-inline-functions -march=k8 -m3dnow -mavx -msse4a -mfma4 -maes -mpclmul" } */ +/* { dg-options "-O -fkeep-inline-functions -march=k8 -m3dnow -mavx -msse4a -mfma4 -mxop -maes -mpclmul" } */ #include <x86intrin.h> diff --git a/gcc/testsuite/g++.dg/other/i386-6.C b/gcc/testsuite/g++.dg/other/i386-6.C index 4c9579d07cd..2bd4609d671 100644 --- a/gcc/testsuite/g++.dg/other/i386-6.C +++ b/gcc/testsuite/g++.dg/other/i386-6.C @@ -1,7 +1,7 @@ -/* Test that {,x,e,p,t,s,w,a,i}mmintrin.h, fma4intrin.h, mm3dnow.h and +/* Test that {,x,e,p,t,s,w,a,i}mmintrin.h, fma4intrin.h, xopintrin.h, mm3dnow.h and mm_malloc.h are usable with -O -pedantic-errors. */ /* { dg-do compile { target i?86-*-* x86_64-*-* } } */ -/* { dg-options "-O -pedantic-errors -march=k8 -m3dnow -mavx -msse4a -mfma4 -maes -mpclmul" } */ +/* { dg-options "-O -pedantic-errors -march=k8 -m3dnow -mavx -msse4a -mfma4 -mxop -maes -mpclmul" } */ #include <x86intrin.h> diff --git a/gcc/testsuite/g++.dg/other/i386-7.C b/gcc/testsuite/g++.dg/other/i386-7.C new file mode 100644 index 00000000000..e2ad51e528d --- /dev/null +++ b/gcc/testsuite/g++.dg/other/i386-7.C @@ -0,0 +1,8 @@ +/* Test that x86intrin.h is usable with -O -pedantic-errors. */ +/* We were using SSE4.2 builtins without the extension available. */ +/* { dg-do compile { target i?86-*-* x86_64-*-* } } */ +/* { dg-options "-O -pedantic-errors" } */ + +#include <x86intrin.h> + +int dummy; diff --git a/gcc/testsuite/g++.dg/other/offsetof6.C b/gcc/testsuite/g++.dg/other/offsetof6.C new file mode 100644 index 00000000000..b77d1b99a77 --- /dev/null +++ b/gcc/testsuite/g++.dg/other/offsetof6.C @@ -0,0 +1,26 @@ +// Contributed by Dodji Seketeli <dodji@redhat.com> +// Origin PR c++/38699 +// { dg-do compile } + +template<class T> +struct A +{ + const T *p; +}; + +struct B +{ + A<int> a; +}; + +template class A<char>; + +void +f0 () +{ + __builtin_offsetof(A<char>, p); // OK + __builtin_offsetof(A<char>, p[1]); // { dg-error "non constant address" } + __builtin_offsetof(B, a.p); // OK + __builtin_offsetof(B, a.p[1]); // { dg-error "non constant address" } +} + diff --git a/gcc/testsuite/g++.dg/parse/eh-decl.C b/gcc/testsuite/g++.dg/parse/eh-decl.C new file mode 100644 index 00000000000..1c72fd39f55 --- /dev/null +++ b/gcc/testsuite/g++.dg/parse/eh-decl.C @@ -0,0 +1,8 @@ +// PR c++/41876 + +struct A; + +void foo() +{ + try {} catch(int A) {} +} diff --git a/gcc/testsuite/g++.dg/plugin/plugin.exp b/gcc/testsuite/g++.dg/plugin/plugin.exp index 4ba73e53dac..72de92dfd42 100644 --- a/gcc/testsuite/g++.dg/plugin/plugin.exp +++ b/gcc/testsuite/g++.dg/plugin/plugin.exp @@ -48,6 +48,7 @@ load_lib plugin-support.exp # plugin_test_list={ {plugin1 test1 test2 ...} {plugin2 test1 ...} ... } set plugin_test_list [list \ { attribute_plugin.c attribute_plugin-test-1.C } \ + { pragma_plugin.c pragma_plugin-test-1.C } \ { selfassign.c self-assign-test-1.C self-assign-test-2.C self-assign-test-3.C } \ { dumb_plugin.c dumb-plugin-test-1.C } \ { header_plugin.c header-plugin-test.C } ] diff --git a/gcc/testsuite/g++.dg/plugin/pragma_plugin-test-1.C b/gcc/testsuite/g++.dg/plugin/pragma_plugin-test-1.C new file mode 100644 index 00000000000..3c084208b71 --- /dev/null +++ b/gcc/testsuite/g++.dg/plugin/pragma_plugin-test-1.C @@ -0,0 +1,18 @@ +// { dg-warning "Callback to register pragmas" "" { target *-*-* } 0 } + +int some_func (int c); + +#pragma GCCPLUGIN sayhello "here" // { dg-warning "'pragma GCCPLUGIN sayhello' outside of function: here" } + +int some_func (const char* s) +{ +#pragma GCCPLUGIN sayhello "at start" // { dg-warning "'pragma GCCPLUGIN sayhello' from function 'some_func': at start" } + +#define DO_PRAGMA(x) _Pragma(#x) + if (!s) + { + DO_PRAGMA(GCCPLUGIN sayhello "in block"); // { dg-warning "'pragma GCCPLUGIN sayhello' from function 'some_func': in block" } + return 0; + } + return 1; +} diff --git a/gcc/testsuite/g++.dg/plugin/pragma_plugin.c b/gcc/testsuite/g++.dg/plugin/pragma_plugin.c new file mode 100644 index 00000000000..237fcdddfa9 --- /dev/null +++ b/gcc/testsuite/g++.dg/plugin/pragma_plugin.c @@ -0,0 +1,60 @@ +/* Demonstrates how to add custom pragmas */ + +#include "gcc-plugin.h" +#include <stdlib.h> +#include "config.h" +#include "system.h" +#include "coretypes.h" +#include "tm.h" +#include "rtl.h" +#include "tree.h" +#include "function.h" +#include "c-pragma.h" +#include "cpplib.h" +#include "tree-pass.h" +#include "intl.h" + +int plugin_is_GPL_compatible; + + +/* handler of #pragma GCCPLUGIN sayhello "message" is quite similar to + handler of #pragma GCC message...*/ + +static void +handle_pragma_sayhello (cpp_reader *dummy) +{ + tree message = 0; + if (pragma_lex (&message) != CPP_STRING) + { + warning (OPT_Wpragmas, "%<#pragma GCCPLUGIN sayhello%> is not a string"); + return; + } + if (TREE_STRING_LENGTH (message) > 1) + if (cfun) + warning (OPT_Wpragmas, + "%<pragma GCCPLUGIN sayhello%> from function %qE: %s", + cfun->decl, TREE_STRING_POINTER (message)); + else + warning (OPT_Wpragmas, + "%<pragma GCCPLUGIN sayhello%> outside of function: %s", + TREE_STRING_POINTER (message)); +} + +/* Plugin callback called during pragma registration */ + +static void +register_my_pragma (void *event_data, void *data) +{ + warning (0, G_("Callback to register pragmas")); + c_register_pragma ("GCCPLUGIN", "sayhello", handle_pragma_sayhello); +} + +int +plugin_init (struct plugin_name_args *plugin_info, + struct plugin_gcc_version *version) +{ + const char *plugin_name = plugin_info->base_name; + + register_callback (plugin_name, PLUGIN_PRAGMAS, register_my_pragma, NULL); + return 0; +} diff --git a/gcc/testsuite/g++.dg/template/crash56.C b/gcc/testsuite/g++.dg/template/crash56.C index 1efa3500d8a..03bddf42a57 100644 --- a/gcc/testsuite/g++.dg/template/crash56.C +++ b/gcc/testsuite/g++.dg/template/crash56.C @@ -7,10 +7,10 @@ namespace N { - struct A { A (A*); }; // { dg-error "lookup finds" "" } + struct A { A (A*); }; } template<typename T> void g (N::A *p) { - (void) A (p); // { dg-error "in call" "" } + (void) A (p); // { dg-message "" "" } } diff --git a/gcc/testsuite/g++.dg/template/nested3.C b/gcc/testsuite/g++.dg/template/nested3.C index 1ae4bf7647d..5652e178a70 100644 --- a/gcc/testsuite/g++.dg/template/nested3.C +++ b/gcc/testsuite/g++.dg/template/nested3.C @@ -5,13 +5,13 @@ class A { int _k; }; T1 _t1; - T2 _t2; // { dg-message "instantiated" } + T2 _t2; }; template <class U> -class B { // { dg-error "declaration" } +class B { class SubB1 { - B _i; // { dg-error "incomplete type" } + B _i; }; class SubB2 { @@ -19,7 +19,6 @@ class B { // { dg-error "declaration" } }; A<U,SubB1>::SubA<SubB2> _a; // { dg-error "not a base type" "not base" } // { dg-message "note" "note" { target *-*-* } 20 } - // { dg-message "instantiated" "inst" { target *-*-* } 20 } // { dg-error "non-template" "non-template" { target *-*-* } 20 } }; diff --git a/gcc/testsuite/g++.dg/template/overload11.C b/gcc/testsuite/g++.dg/template/overload11.C new file mode 100644 index 00000000000..d7b0a7c9f1c --- /dev/null +++ b/gcc/testsuite/g++.dg/template/overload11.C @@ -0,0 +1,27 @@ +// PR c++/39413 +// We don't need to instantiate Wrapper<int> to check the +// foo(const Thingy&) overload. + +template <class T> struct Incomplete; + +template <typename T> class Wrapper +{ + Incomplete<T> i; +}; + +template <typename T> struct Thingy +{ + Thingy(); + Thingy(const Wrapper<T>& v); + + template <typename X> void foo(const Thingy<X>&); + void foo(const Thingy&); +}; + +int main() +{ + Thingy<int> ap1; + Thingy<float> bp1; + + ap1.foo(bp1); +} diff --git a/gcc/testsuite/g++.dg/template/partial6.C b/gcc/testsuite/g++.dg/template/partial6.C new file mode 100644 index 00000000000..80bbfe3c138 --- /dev/null +++ b/gcc/testsuite/g++.dg/template/partial6.C @@ -0,0 +1,31 @@ +// PR c++/41703 +// The second GetAllSize template is more specialized because even though +// deduction on each parameter type succeeds, we never get a template +// argument for its X to make it match the first template. + +template <typename T, int (T::*)() const> +struct TSizeEnabler +{ + typedef T TClass; +}; + +template <typename X> +int +GetAllSize(const X &Var) +{ return sizeof(Var); } + +template <typename X> +int +GetAllSize(const typename TSizeEnabler<X, &X::func>::TClass &Var) +{ return Var.func(); } + +struct H +{ + int func() const; +}; + +int main() +{ + H b; + return GetAllSize< H >(b); +} diff --git a/gcc/testsuite/g++.dg/template/sfinae15.C b/gcc/testsuite/g++.dg/template/sfinae15.C new file mode 100644 index 00000000000..27bce255db3 --- /dev/null +++ b/gcc/testsuite/g++.dg/template/sfinae15.C @@ -0,0 +1,23 @@ +// PR c++/40944 +// { dg-options -std=c++0x } +// { dg-do run } + +template<typename T> +struct make { static T&& it(); }; + +void (*pf)(int&) = 0; + +template< typename T > +int bar(T const& x, + decltype( pf(make<T const&>::it()) )* = 0 // SFINAE! + ) { + return 1; +} + +int bar(...) { + return 0; +} + +int main() { + return bar(42); +} diff --git a/gcc/testsuite/g++.dg/template/sfinae16.C b/gcc/testsuite/g++.dg/template/sfinae16.C new file mode 100644 index 00000000000..5ea564c9f86 --- /dev/null +++ b/gcc/testsuite/g++.dg/template/sfinae16.C @@ -0,0 +1,34 @@ +// PR c++/41927 +// { dg-options "-std=c++0x -Wall" } + +// We were getting a spurious ||/&& warning about the enable_if with the +// source position of d1. + +template<typename Tp> + struct is_int + { static const bool value = true; }; + +template<bool, typename Tp = void> + struct enable_if + { }; + +template<typename Tp> + struct enable_if<true, Tp> + { typedef Tp type; }; + +template<typename Rep> + struct duration + { + duration() { } + + template<typename Rep2, typename = typename + enable_if<false || (true && is_int<Rep2>::value)>::type> + duration(const duration<Rep2>&) { } + }; + +int main() +{ + duration<int> d0; + duration<int> d1 = d0; +} + diff --git a/gcc/testsuite/g++.dg/torture/pr36191.C b/gcc/testsuite/g++.dg/torture/pr36191.C index 18051cedf7a..125d8a12205 100644 --- a/gcc/testsuite/g++.dg/torture/pr36191.C +++ b/gcc/testsuite/g++.dg/torture/pr36191.C @@ -1,6 +1,7 @@ // PR c++/36191 // { dg-do compile } // { dg-options "-fnon-call-exceptions" } +// { dg-skip-if "Frame pointer required for unwind tables" { sh*-*-* m68k*-*-* fido*-*-* } "-fomit-frame-pointer" "" } __complex__ double foo (__complex__ double x, double y) diff --git a/gcc/testsuite/g++.old-deja/g++.ns/koenig5.C b/gcc/testsuite/g++.old-deja/g++.ns/koenig5.C index d84fc8dbf41..33061ad0bd8 100644 --- a/gcc/testsuite/g++.old-deja/g++.ns/koenig5.C +++ b/gcc/testsuite/g++.old-deja/g++.ns/koenig5.C @@ -1,5 +1,5 @@ // { dg-do assemble } -// To find function pointers in Koenig lookup is ok as long as we only find one. +// Function pointers are ignored in Koenig lookup. (DR 218) namespace A{ void foo(); struct X{}; @@ -14,5 +14,5 @@ void g() foo(new X); // ok -- DR 218 says that we find the global // foo variable first, and therefore do not // perform argument-dependent lookup. - bar(new X); // ok + bar(new X); // { dg-error "not declared" } } diff --git a/gcc/testsuite/gcc.c-torture/execute/pr41917.c b/gcc/testsuite/gcc.c-torture/execute/pr41917.c new file mode 100644 index 00000000000..4a9ada921c4 --- /dev/null +++ b/gcc/testsuite/gcc.c-torture/execute/pr41917.c @@ -0,0 +1,21 @@ +/* PR rtl-optimization/41917 */ + +extern void abort (void); +unsigned int a = 1; + +int +main (void) +{ + unsigned int b, c, d; + + if (sizeof (int) != 4 || (int) 0xc7d24b5e > 0) + return 0; + + c = 0xc7d24b5e; + d = a | -2; + b = (d == 0) ? c : (c % d); + if (b != c) + abort (); + + return 0; +} diff --git a/gcc/testsuite/gcc.c-torture/execute/pr41919.c b/gcc/testsuite/gcc.c-torture/execute/pr41919.c new file mode 100644 index 00000000000..4ca09141b5e --- /dev/null +++ b/gcc/testsuite/gcc.c-torture/execute/pr41919.c @@ -0,0 +1,39 @@ +extern void abort (void); + +#define assert(x) if(!(x)) abort() + +struct S1 +{ + char f0; +}; + +int g_23 = 0; + +static struct S1 +foo (void) +{ + int *l_100 = &g_23; + int **l_110 = &l_100; + struct S1 l_128 = { 1 }; + assert (l_100 == &g_23); + assert (l_100 == &g_23); + assert (l_100 == &g_23); + assert (l_100 == &g_23); + assert (l_100 == &g_23); + assert (l_100 == &g_23); + assert (l_100 == &g_23); + return l_128; +} + +static char bar(char si1, char si2) +{ + return (si1 <= 0) ? si1 : (si2 * 2); +} +int main (void) +{ + struct S1 s = foo(); + if (bar(0x99 ^ (s.f0 && 1), 1) != -104) + abort (); + return 0; +} + diff --git a/gcc/testsuite/gcc.c-torture/execute/pr41935.c b/gcc/testsuite/gcc.c-torture/execute/pr41935.c new file mode 100644 index 00000000000..ef8d08ce023 --- /dev/null +++ b/gcc/testsuite/gcc.c-torture/execute/pr41935.c @@ -0,0 +1,25 @@ +/* PR middle-end/41935 */ + +extern void abort (void); + +long int +foo (int n, int i, int j) +{ + typedef int T[n]; + struct S { int a; T b[n]; }; + return __builtin_offsetof (struct S, b[i][j]); +} + +int +main (void) +{ + typedef int T[5]; + struct S { int a; T b[5]; }; + if (foo (5, 2, 3) + != __builtin_offsetof (struct S, b) + (5 * 2 + 3) * sizeof (int)) + abort (); + if (foo (5, 5, 5) + != __builtin_offsetof (struct S, b) + (5 * 5 + 5) * sizeof (int)) + abort (); + return 0; +} diff --git a/gcc/testsuite/gcc.dg/pr41935.c b/gcc/testsuite/gcc.dg/pr41935.c new file mode 100644 index 00000000000..e6a1b28670a --- /dev/null +++ b/gcc/testsuite/gcc.dg/pr41935.c @@ -0,0 +1,25 @@ +/* PR middle-end/41935 */ +/* { dg-do run } */ +/* { dg-options "-O2" } */ + +extern void abort (void); +struct A { int a; int b[10]; }; + +int +foo (struct A *p) +{ + return __builtin_offsetof (struct A, b[p->a]); +} + +int +main () +{ + struct A a; + a.a = 7; + if (foo (&a) != 7 * sizeof (int) + __builtin_offsetof (struct A, b)) + abort (); + a.a = 2; + if (foo (&a) != 2 * sizeof (int) + __builtin_offsetof (struct A, b)) + abort (); + return 0; +} diff --git a/gcc/testsuite/gcc.dg/pr41963.c b/gcc/testsuite/gcc.dg/pr41963.c new file mode 100644 index 00000000000..f8bf4a1b81c --- /dev/null +++ b/gcc/testsuite/gcc.dg/pr41963.c @@ -0,0 +1,36 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -ffast-math" } */ +#include <math.h> + +extern float sqrtf(float); + +static __attribute__((noinline)) void f (float *dst, float *src) +{ + int i, j; + for (i = 0; i < 2; i++) + { + float len; + dst[0] = src[0]; + dst[1] = src[1]; + len = sqrtf (dst[0] * dst[0] + dst[1] * dst[1]); + if (len > 0.5f) + { + len = 1.0f / len; + dst[0] *= len; + dst[1] *= len; + } + } +} + +extern void abort (void); + +int main() +{ + float dst[2], src[2]; + src[0] = 2.0f; + src[1] = 5.0f; + f (dst, src); + if (fabsf (dst[0] * dst[0] + dst[1] * dst[1] - 1.0f) > 0.01f) + abort (); + return 0; +} diff --git a/gcc/testsuite/gcc.dg/vect/costmodel/i386/costmodel-fast-math-vect-pr29925.c b/gcc/testsuite/gcc.dg/vect/costmodel/i386/costmodel-fast-math-vect-pr29925.c index ffbd220c38b..5627c265523 100644 --- a/gcc/testsuite/gcc.dg/vect/costmodel/i386/costmodel-fast-math-vect-pr29925.c +++ b/gcc/testsuite/gcc.dg/vect/costmodel/i386/costmodel-fast-math-vect-pr29925.c @@ -35,6 +35,6 @@ int main() return 0; } -/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail vect_hw_misalign } } } */ +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */ /* { dg-final { cleanup-tree-dump "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/costmodel/x86_64/costmodel-fast-math-vect-pr29925.c b/gcc/testsuite/gcc.dg/vect/costmodel/x86_64/costmodel-fast-math-vect-pr29925.c index a2ad28b14d8..5627c265523 100644 --- a/gcc/testsuite/gcc.dg/vect/costmodel/x86_64/costmodel-fast-math-vect-pr29925.c +++ b/gcc/testsuite/gcc.dg/vect/costmodel/x86_64/costmodel-fast-math-vect-pr29925.c @@ -35,6 +35,6 @@ int main() return 0; } -/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail vect_hw_misalign } } } */ +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */ /* { dg-final { cleanup-tree-dump "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-27.c b/gcc/testsuite/gcc.dg/vect/vect-27.c index e117196c1f5..4a2da227e3c 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-27.c +++ b/gcc/testsuite/gcc.dg/vect/vect-27.c @@ -46,5 +46,5 @@ int main (void) /* { dg-final { scan-tree-dump-times "vectorized 2 loops" 1 "vect" { xfail vect_no_align } } } */ /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_no_align } } } */ /* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 1 "vect" { xfail vect_no_align } } } */ -/* { dg-final { scan-tree-dump-times "Alignment of access forced using peeling" 1 "vect" { xfail {! vect_hw_misalign} } } } */ +/* { dg-final { scan-tree-dump-times "Alignment of access forced using peeling" 0 "vect" } } */ /* { dg-final { cleanup-tree-dump "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-29.c b/gcc/testsuite/gcc.dg/vect/vect-29.c index 1645a2dfcca..0ad28488056 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-29.c +++ b/gcc/testsuite/gcc.dg/vect/vect-29.c @@ -51,6 +51,6 @@ int main (void) /* The initialization induction loop (with aligned access) is also vectorized. */ /* { dg-final { scan-tree-dump-times "vectorized 2 loops" 1 "vect" } } */ /* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 1 "vect" { xfail vect_no_align } } } */ -/* { dg-final { scan-tree-dump-times "Alignment of access forced using peeling" 0 "vect" { xfail vect_hw_misalign } } } */ +/* { dg-final { scan-tree-dump-times "Alignment of access forced using peeling" 0 "vect" } } */ /* { dg-final { scan-tree-dump-times "Alignment of access forced using versioning." 1 "vect" {target vect_no_align } } } */ /* { dg-final { cleanup-tree-dump "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-42.c b/gcc/testsuite/gcc.dg/vect/vect-42.c index ebed418e338..3ba1c6f7fde 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-42.c +++ b/gcc/testsuite/gcc.dg/vect/vect-42.c @@ -65,6 +65,6 @@ int main (void) /* { dg-final { scan-tree-dump-times "vectorized 2 loops" 1 "vect" } } */ /* { dg-final { scan-tree-dump-times "Alignment of access forced using versioning" 3 "vect" { target vect_no_align } } } */ /* { dg-final { scan-tree-dump-times "Alignment of access forced using versioning" 1 "vect" { target { { ! vector_alignment_reachable } && { ! vect_hw_misalign } } } } } */ -/* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 4 "vect" { xfail { { vect_no_align || vect_hw_misalign } || { ! vector_alignment_reachable } } } } } */ -/* { dg-final { scan-tree-dump-times "Alignment of access forced using peeling" 1 "vect" { xfail { { vect_no_align || vect_hw_misalign } || { ! vector_alignment_reachable } } } } } */ +/* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 4 "vect" { xfail { vect_no_align || { ! vector_alignment_reachable } } } } } */ +/* { dg-final { scan-tree-dump-times "Alignment of access forced using peeling" 1 "vect" { xfail { vect_no_align || { ! vector_alignment_reachable } } } } } */ /* { dg-final { cleanup-tree-dump "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-44.c b/gcc/testsuite/gcc.dg/vect/vect-44.c index 5f2228fdd71..ef1a4635bfa 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-44.c +++ b/gcc/testsuite/gcc.dg/vect/vect-44.c @@ -66,7 +66,7 @@ int main (void) /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */ /* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 2 "vect" { xfail { vect_no_align } } } } */ -/* { dg-final { scan-tree-dump-times "Alignment of access forced using peeling" 1 "vect" { xfail { { vect_no_align || {! vect_hw_misalign } } || {! vector_alignment_reachable} } } } } */ +/* { dg-final { scan-tree-dump-times "Alignment of access forced using peeling" 1 "vect" { xfail { vect_no_align || {! vector_alignment_reachable} } } } } */ /* { dg-final { scan-tree-dump-times "Alignment of access forced using versioning." 3 "vect" { target vect_no_align } } } */ /* { dg-final { scan-tree-dump-times "Alignment of access forced using versioning." 1 "vect" { target { {! vector_alignment_reachable} && {{! vect_no_align} && {! vect_hw_misalign} } } } } } */ /* { dg-final { cleanup-tree-dump "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-48.c b/gcc/testsuite/gcc.dg/vect/vect-48.c index f66a8622ec0..e47ee00de91 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-48.c +++ b/gcc/testsuite/gcc.dg/vect/vect-48.c @@ -55,6 +55,6 @@ int main (void) /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */ /* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 2 "vect" { xfail vect_no_align } } } */ -/* { dg-final { scan-tree-dump-times "Alignment of access forced using peeling" 1 "vect" { xfail {! vect_hw_misalign } } } } */ +/* { dg-final { scan-tree-dump-times "Alignment of access forced using peeling" 0 "vect" } } */ /* { dg-final { scan-tree-dump-times "Alignment of access forced using versioning." 2 "vect" { target vect_no_align } } } */ /* { dg-final { cleanup-tree-dump "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-50.c b/gcc/testsuite/gcc.dg/vect/vect-50.c index 74de0fb4fa8..068c804a168 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-50.c +++ b/gcc/testsuite/gcc.dg/vect/vect-50.c @@ -63,7 +63,7 @@ int main (void) /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */ /* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 2 "vect" { xfail { vect_no_align } } } } */ /* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 2 "vect" { target vect_hw_misalign } } } */ -/* { dg-final { scan-tree-dump-times "Alignment of access forced using peeling" 1 "vect" { xfail { { vect_no_align || {! vect_hw_misalign } } || {! vector_alignment_reachable} } } } } */ +/* { dg-final { scan-tree-dump-times "Alignment of access forced using peeling" 1 "vect" { xfail { vect_no_align || {! vector_alignment_reachable} } } } } */ /* { dg-final { scan-tree-dump-times "Alignment of access forced using versioning." 3 "vect" { target vect_no_align } } } */ /* { dg-final { scan-tree-dump-times "Alignment of access forced using versioning." 1 "vect" { target { {! vector_alignment_reachable} && { {! vect_no_align } && {! vect_hw_misalign } } } } } } */ /* { dg-final { cleanup-tree-dump "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-52.c b/gcc/testsuite/gcc.dg/vect/vect-52.c index 426092e3915..af485abbd14 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-52.c +++ b/gcc/testsuite/gcc.dg/vect/vect-52.c @@ -56,6 +56,6 @@ int main (void) /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */ /* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 2 "vect" { xfail vect_no_align } } } */ -/* { dg-final { scan-tree-dump-times "Alignment of access forced using peeling" 1 "vect" {xfail {! vect_hw_misalign } } } } */ +/* { dg-final { scan-tree-dump-times "Alignment of access forced using peeling" 0 "vect" } } */ /* { dg-final { scan-tree-dump-times "Alignment of access forced using versioning." 2 "vect" { target vect_no_align } } } */ /* { dg-final { cleanup-tree-dump "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-56.c b/gcc/testsuite/gcc.dg/vect/vect-56.c index 945d7caa6c7..7b7da123591 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-56.c +++ b/gcc/testsuite/gcc.dg/vect/vect-56.c @@ -68,6 +68,6 @@ int main (void) } /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail vect_no_align } } } */ -/* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 2 "vect" { xfail { vect_no_align || vect_hw_misalign } } } } */ -/* { dg-final { scan-tree-dump-times "Alignment of access forced using peeling" 1 "vect" { xfail {! vect_hw_misalign } } } } */ +/* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 2 "vect" { xfail { vect_no_align } } } } */ +/* { dg-final { scan-tree-dump-times "Alignment of access forced using peeling" 0 "vect" } } */ /* { dg-final { cleanup-tree-dump "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-60.c b/gcc/testsuite/gcc.dg/vect/vect-60.c index 90234dd0db4..cbdf63db123 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-60.c +++ b/gcc/testsuite/gcc.dg/vect/vect-60.c @@ -69,6 +69,6 @@ int main (void) } /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail vect_no_align } } } */ -/* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 2 "vect" { xfail { vect_no_align || vect_hw_misalign } } } } */ -/* { dg-final { scan-tree-dump-times "Alignment of access forced using peeling" 0 "vect" { xfail vect_hw_misalign } } } */ +/* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 2 "vect" { xfail { vect_no_align } } } } */ +/* { dg-final { scan-tree-dump-times "Alignment of access forced using peeling" 0 "vect" } } */ /* { dg-final { cleanup-tree-dump "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-72.c b/gcc/testsuite/gcc.dg/vect/vect-72.c index 1a9f13ca869..67a19751952 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-72.c +++ b/gcc/testsuite/gcc.dg/vect/vect-72.c @@ -47,5 +47,5 @@ int main (void) /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail vect_no_align } } } */ /* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 1 "vect" { xfail vect_no_align } } } */ -/* { dg-final { scan-tree-dump-times "Alignment of access forced using peeling" 0 "vect" { xfail vect_hw_misalign } } } */ +/* { dg-final { scan-tree-dump-times "Alignment of access forced using peeling" 0 "vect" } } */ /* { dg-final { cleanup-tree-dump "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-multitypes-3.c b/gcc/testsuite/gcc.dg/vect/vect-multitypes-3.c index 2cbc6216354..3346e71e523 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-multitypes-3.c +++ b/gcc/testsuite/gcc.dg/vect/vect-multitypes-3.c @@ -54,6 +54,6 @@ int main (void) /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */ /* { dg-final { scan-tree-dump-times "Alignment of access forced using versioning" 3 "vect" { target vect_no_align } } } */ -/* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 3 "vect" {xfail { vect_no_align || vect_hw_misalign } } } } */ +/* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 3 "vect" {xfail { vect_no_align } } } } */ /* { dg-final { cleanup-tree-dump "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-multitypes-6.c b/gcc/testsuite/gcc.dg/vect/vect-multitypes-6.c index 522ed30ead3..450855137a6 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-multitypes-6.c +++ b/gcc/testsuite/gcc.dg/vect/vect-multitypes-6.c @@ -61,6 +61,6 @@ int main (void) /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */ /* { dg-final { scan-tree-dump-times "Alignment of access forced using versioning" 6 "vect" { target vect_no_align } } } */ -/* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 6 "vect" {xfail { vect_no_align || vect_hw_misalign } } } } */ +/* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 6 "vect" {xfail { vect_no_align } } } } */ /* { dg-final { cleanup-tree-dump "vect" } } */ diff --git a/gcc/testsuite/gcc.target/arm/pr40835.c b/gcc/testsuite/gcc.target/arm/pr40835.c new file mode 100644 index 00000000000..baf94032101 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/pr40835.c @@ -0,0 +1,55 @@ +/* { dg-options "-mthumb -Os -march=armv5te" } */ +/* { dg-final { scan-assembler-not "cmp" } } */ + +int bar(); +void goo(int, int); + +void eq() +{ + int v = bar(); + if (v == 0) + return; + goo(1, v); +} + +void ge() +{ + int v = bar(); + if (v >= 0) + return; + goo(1, v); +} + +void gt() +{ + int v = bar(); + if (v > 0) + return; + goo(1, v); +} + +void lt() +{ + int v = bar(); + if (v < 0) + return; + goo(1, v); +} + +void le() +{ + int v = bar(); + if (v <= 0) + return; + goo(1, v); +} + +unsigned int foo(); + +void leu() +{ + unsigned int v = foo(); + if (v <= 0) + return; + goo(1, v); +} diff --git a/gcc/testsuite/gcc.target/i386/i386.exp b/gcc/testsuite/gcc.target/i386/i386.exp index 3bfac8d6f71..3ef9df66ca5 100644 --- a/gcc/testsuite/gcc.target/i386/i386.exp +++ b/gcc/testsuite/gcc.target/i386/i386.exp @@ -146,6 +146,20 @@ proc check_effective_target_fma4 { } { } "-O2 -mfma4" ] } +# Return 1 if xop instructions can be compiled. +proc check_effective_target_xop { } { + return [check_no_compiler_messages xop object { + typedef long long __m128i __attribute__ ((__vector_size__ (16), __may_alias__)); + typedef short __v8hi __attribute__ ((__vector_size__ (16))); + __m128i _mm_maccs_epi16(__m128i __A, __m128i __B, __m128i __C) + { + return (__m128i) __builtin_ia32_vpmacssww ((__v8hi)__A, + (__v8hi)__B, + (__v8hi)__C); + } + } "-O2 -mxop" ] +} + # If a testcase doesn't have special options, use these. global DEFAULT_CFLAGS if ![info exists DEFAULT_CFLAGS] then { diff --git a/gcc/testsuite/gcc.target/i386/pr41900.c b/gcc/testsuite/gcc.target/i386/pr41900.c index ac5f8636bbd..55f712d1fa4 100644 --- a/gcc/testsuite/gcc.target/i386/pr41900.c +++ b/gcc/testsuite/gcc.target/i386/pr41900.c @@ -1,11 +1,13 @@ -/* { dg-do run } */ +/* { dg-do compile } */ /* { dg-require-effective-target ilp32 } */ /* { dg-options "-O2 -fomit-frame-pointer -mpreferred-stack-boundary=2" } */ int main () { - unsigned code = 0xc3; + volatile unsigned code = 0xc3; ((void (*)(void)) &code) (); return 0; } + +/* { dg-final { scan-assembler-not "call\[ \\t\]+\\*%esp" } } */ diff --git a/gcc/testsuite/gcc.target/i386/sse-12.c b/gcc/testsuite/gcc.target/i386/sse-12.c index 85c36c8be31..d03c41bf10a 100644 --- a/gcc/testsuite/gcc.target/i386/sse-12.c +++ b/gcc/testsuite/gcc.target/i386/sse-12.c @@ -1,7 +1,7 @@ -/* Test that {,x,e,p,t,s,w,a,b,i}mmintrin.h, mm3dnow.h and mm_malloc.h are +/* Test that {,x,e,p,t,s,w,a,b,i}mmintrin.h, xopintrin.h, mm3dnow.h and mm_malloc.h are usable with -O -std=c89 -pedantic-errors. */ /* { dg-do compile } */ -/* { dg-options "-O -std=c89 -pedantic-errors -march=k8 -m3dnow -mavx -mfma4 -maes -mpclmul" } */ +/* { dg-options "-O -std=c89 -pedantic-errors -march=k8 -m3dnow -mavx -mfma4 -mxop -maes -mpclmul" } */ #include <x86intrin.h> diff --git a/gcc/testsuite/gcc.target/i386/sse-13.c b/gcc/testsuite/gcc.target/i386/sse-13.c index 1ce9d960884..2ef63d5fc68 100644 --- a/gcc/testsuite/gcc.target/i386/sse-13.c +++ b/gcc/testsuite/gcc.target/i386/sse-13.c @@ -1,10 +1,10 @@ /* { dg-do compile } */ -/* { dg-options "-O2 -Werror-implicit-function-declaration -march=k8 -m3dnow -mavx -maes -mpclmul" } */ +/* { dg-options "-O2 -Werror-implicit-function-declaration -march=k8 -m3dnow -mavx -mxop -maes -mpclmul" } */ #include <mm_malloc.h> /* Test that the intrinsics compile with optimization. All of them are - defined as inline functions in {,x,e,p,t,s,w,a,b,i}mmintrin.h and mm3dnow.h + defined as inline functions in {,x,e,p,t,s,w,a,b,i}mmintrin.h, xopintrin.h and mm3dnow.h that reference the proper builtin functions. Defining away "extern" and "__inline" results in all of them being compiled as proper functions. */ @@ -125,4 +125,10 @@ #define __builtin_ia32_vec_ext_v4hi(A, N) __builtin_ia32_vec_ext_v4hi(A, 0) #define __builtin_ia32_shufps(A, B, N) __builtin_ia32_shufps(A, B, 0) +/* xopintrin.h */ +#define __builtin_ia32_vprotbi(A, N) __builtin_ia32_vprotbi (A,1) +#define __builtin_ia32_vprotwi(A, N) __builtin_ia32_vprotwi (A,1) +#define __builtin_ia32_vprotdi(A, N) __builtin_ia32_vprotdi (A,1) +#define __builtin_ia32_vprotqi(A, N) __builtin_ia32_vprotqi (A,1) + #include <x86intrin.h> diff --git a/gcc/testsuite/gcc.target/i386/sse-14.c b/gcc/testsuite/gcc.target/i386/sse-14.c index c1ddb96e5c3..783cd0af106 100644 --- a/gcc/testsuite/gcc.target/i386/sse-14.c +++ b/gcc/testsuite/gcc.target/i386/sse-14.c @@ -1,10 +1,10 @@ /* { dg-do compile } */ -/* { dg-options "-O0 -Werror-implicit-function-declaration -march=k8 -m3dnow -mavx -msse4a -maes -mpclmul" } */ +/* { dg-options "-O0 -Werror-implicit-function-declaration -march=k8 -m3dnow -mavx -mxop -msse4a -maes -mpclmul" } */ #include <mm_malloc.h> /* Test that the intrinsics compile without optimization. All of them are - defined as inline functions in {,x,e,p,t,s,w,a}mmintrin.h and mm3dnow.h + defined as inline functions in {,x,e,p,t,s,w,a}mmintrin.h, xopintrin.h and mm3dnow.h that reference the proper builtin functions. Defining away "extern" and "__inline" results in all of them being compiled as proper functions. */ @@ -155,3 +155,10 @@ test_2 (_m_pinsrw, __m64, __m64, int, 1) test_1 (_mm_shuffle_pi16, __m64, __m64, 1) test_1 (_m_pshufw, __m64, __m64, 1) test_1 (_mm_prefetch, void, void *, _MM_HINT_NTA) + +/* xopintrin.h */ +test_1 ( _mm_roti_epi8, __m128i, __m128i, 1) +test_1 ( _mm_roti_epi16, __m128i, __m128i, 1) +test_1 ( _mm_roti_epi32, __m128i, __m128i, 1) +test_1 ( _mm_roti_epi64, __m128i, __m128i, 1) + diff --git a/gcc/testsuite/gcc.target/i386/sse-22.c b/gcc/testsuite/gcc.target/i386/sse-22.c index eeae0fcab75..541cad4d439 100644 --- a/gcc/testsuite/gcc.target/i386/sse-22.c +++ b/gcc/testsuite/gcc.target/i386/sse-22.c @@ -5,7 +5,7 @@ #include <mm_malloc.h> /* Test that the intrinsics compile without optimization. All of them are - defined as inline functions in {,x,e,p,t,s,w,a}mmintrin.h and mm3dnow.h + defined as inline functions in {,x,e,p,t,s,w,a}mmintrin.h, xopintrin.h and mm3dnow.h that reference the proper builtin functions. Defining away "extern" and "__inline" results in all of them being compiled as proper functions. */ @@ -37,7 +37,7 @@ #ifndef DIFFERENT_PRAGMAS -#pragma GCC target ("mmx,3dnow,sse,sse2,sse3,ssse3,sse4.1,sse4.2,sse4a,aes,pclmul") +#pragma GCC target ("mmx,3dnow,sse,sse2,sse3,ssse3,sse4.1,sse4.2,sse4a,aes,pclmul,xop") #endif /* Following intrinsics require immediate arguments. They @@ -159,3 +159,13 @@ test_1 (_mm_round_pd, __m128d, __m128d, 1) test_1 (_mm_round_ps, __m128, __m128, 1) test_2 (_mm_round_sd, __m128d, __m128d, __m128d, 1) test_2 (_mm_round_ss, __m128, __m128, __m128, 1) + +/* xopintrin.h (XOP). */ +#ifdef DIFFERENT_PRAGMAS +#pragma GCC target ("xop") +#endif +#include <x86intrin.h> +test_1 ( _mm_roti_epi8, __m128i, __m128i, 1) +test_1 ( _mm_roti_epi16, __m128i, __m128i, 1) +test_1 ( _mm_roti_epi32, __m128i, __m128i, 1) +test_1 ( _mm_roti_epi64, __m128i, __m128i, 1) diff --git a/gcc/testsuite/gcc.target/i386/sse-23.c b/gcc/testsuite/gcc.target/i386/sse-23.c index 63cb811d042..3e0fa1f5ca4 100644 --- a/gcc/testsuite/gcc.target/i386/sse-23.c +++ b/gcc/testsuite/gcc.target/i386/sse-23.c @@ -4,7 +4,7 @@ #include <mm_malloc.h> /* Test that the intrinsics compile with optimization. All of them are - defined as inline functions in {,x,e,p,t,s,w,a,b}mmintrin.h and mm3dnow.h + defined as inline functions in {,x,e,p,t,s,w,a}mmintrin.h, xopintrin.h and mm3dnow.h that reference the proper builtin functions. Defining away "extern" and "__inline" results in all of them being compiled as proper functions. */ @@ -93,14 +93,13 @@ #define __builtin_ia32_vec_ext_v4hi(A, N) __builtin_ia32_vec_ext_v4hi(A, 0) #define __builtin_ia32_shufps(A, B, N) __builtin_ia32_shufps(A, B, 0) -/* bmmintrin.h */ -#define __builtin_ia32_protbi(A, B) __builtin_ia32_protbi(A,1) -#define __builtin_ia32_protwi(A, B) __builtin_ia32_protwi(A,1) -#define __builtin_ia32_protdi(A, B) __builtin_ia32_protdi(A,1) -#define __builtin_ia32_protqi(A, B) __builtin_ia32_protqi(A,1) +/* xopintrin.h */ +#define __builtin_ia32_vprotbi(A, B) __builtin_ia32_vprotbi(A,1) +#define __builtin_ia32_vprotwi(A, B) __builtin_ia32_vprotwi(A,1) +#define __builtin_ia32_vprotdi(A, B) __builtin_ia32_vprotdi(A,1) +#define __builtin_ia32_vprotqi(A, B) __builtin_ia32_vprotqi(A,1) - -#pragma GCC target ("3dnow,sse4,sse4a,aes,pclmul") +#pragma GCC target ("3dnow,sse4,sse4a,aes,pclmul,xop") #include <wmmintrin.h> #include <smmintrin.h> #include <mm3dnow.h> diff --git a/gcc/testsuite/gcc.target/i386/xop-check.h b/gcc/testsuite/gcc.target/i386/xop-check.h new file mode 100644 index 00000000000..2dede33d851 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/xop-check.h @@ -0,0 +1,20 @@ +#include <stdlib.h> + +#include "cpuid.h" + +static void xop_test (void); + +int +main () +{ + unsigned int eax, ebx, ecx, edx; + + if (!__get_cpuid (0x80000001, &eax, &ebx, &ecx, &edx)) + return 0; + + /* Run XOP test only if host has XOP support. */ + if (ecx & bit_XOP) + xop_test (); + + exit (0); +} diff --git a/gcc/testsuite/gcc.target/i386/xop-haddX.c b/gcc/testsuite/gcc.target/i386/xop-haddX.c new file mode 100644 index 00000000000..7d3220baffe --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/xop-haddX.c @@ -0,0 +1,206 @@ +/* { dg-do run } */ +/* { dg-require-effective-target xop } */ +/* { dg-options "-O2 -mxop" } */ + +#include "xop-check.h" + +#include <x86intrin.h> +#include <string.h> + +#define NUM 10 + +union +{ + __m128i x[NUM]; + signed char ssi[NUM * 16]; + short si[NUM * 8]; + int li[NUM * 4]; + long long lli[NUM * 2]; +} dst, res, src1; + +static void +init_sbyte () +{ + int i; + for (i=0; i < NUM * 16; i++) + src1.ssi[i] = i; +} + +static void +init_sword () +{ + int i; + for (i=0; i < NUM * 8; i++) + src1.si[i] = i; +} + + +static void +init_sdword () +{ + int i; + for (i=0; i < NUM * 4; i++) + src1.li[i] = i; +} + +static int +check_sbyte2word () +{ + int i, j, s, t, check_fails = 0; + for (i = 0; i < NUM * 16; i = i + 16) + { + for (j = 0; j < 8; j++) + { + t = i + (2 * j); + s = (i / 2) + j; + res.si[s] = src1.ssi[t] + src1.ssi[t + 1] ; + if (res.si[s] != dst.si[s]) + check_fails++; + } + } +} + +static int +check_sbyte2dword () +{ + int i, j, s, t, check_fails = 0; + for (i = 0; i < NUM * 16; i = i + 16) + { + for (j = 0; j < 4; j++) + { + t = i + (4 * j); + s = (i / 4) + j; + res.li[s] = (src1.ssi[t] + src1.ssi[t + 1]) + (src1.ssi[t + 2] + + src1.ssi[t + 3]); + if (res.li[s] != dst.li[s]) + check_fails++; + } + } + return check_fails++; +} + +static int +check_sbyte2qword () +{ + int i, j, s, t, check_fails = 0; + for (i = 0; i < NUM * 16; i = i + 16) + { + for (j = 0; j < 2; j++) + { + t = i + (8 * j); + s = (i / 8) + j; + res.lli[s] = ((src1.ssi[t] + src1.ssi[t + 1]) + (src1.ssi[t + 2] + + src1.ssi[t + 3])) + ((src1.ssi[t + 4] + src1.ssi[t +5]) + + (src1.ssi[t + 6] + src1.ssi[t + 7])); + if (res.lli[s] != dst.lli[s]) + check_fails++; + } + } + return check_fails++; +} + +static int +check_sword2dword () +{ + int i, j, s, t, check_fails = 0; + for (i = 0; i < (NUM * 8); i = i + 8) + { + for (j = 0; j < 4; j++) + { + t = i + (2 * j); + s = (i / 2) + j; + res.li[s] = src1.si[t] + src1.si[t + 1] ; + if (res.li[s] != dst.li[s]) + check_fails++; + } + } +} + +static int +check_sword2qword () +{ + int i, j, s, t, check_fails = 0; + for (i = 0; i < NUM * 8; i = i + 8) + { + for (j = 0; j < 2; j++) + { + t = i + (4 * j); + s = (i / 4) + j; + res.lli[s] = (src1.si[t] + src1.si[t + 1]) + (src1.si[t + 2] + + src1.si[t + 3]); + if (res.lli[s] != dst.lli[s]) + check_fails++; + } + } + return check_fails++; +} + +static int +check_dword2qword () +{ + int i, j, s, t, check_fails = 0; + for (i = 0; i < (NUM * 4); i = i + 4) + { + for (j = 0; j < 2; j++) + { + t = i + (2 * j); + s = (i / 2) + j; + res.lli[s] = src1.li[t] + src1.li[t + 1] ; + if (res.lli[s] != dst.lli[s]) + check_fails++; + } + } +} + +static void +xop_test (void) +{ + int i; + + init_sbyte (); + + for (i = 0; i < NUM; i++) + dst.x[i] = _mm_haddw_epi8 (src1.x[i]); + + if (check_sbyte2word()) + abort (); + + + for (i = 0; i < (NUM ); i++) + dst.x[i] = _mm_haddd_epi8 (src1.x[i]); + + if (check_sbyte2dword()) + abort (); + + + for (i = 0; i < NUM; i++) + dst.x[i] = _mm_haddq_epi8 (src1.x[i]); + + if (check_sbyte2qword()) + abort (); + + + init_sword (); + + for (i = 0; i < (NUM ); i++) + dst.x[i] = _mm_haddd_epi16 (src1.x[i]); + + if (check_sword2dword()) + abort (); + + for (i = 0; i < NUM; i++) + dst.x[i] = _mm_haddq_epi16 (src1.x[i]); + + if (check_sword2qword()) + abort (); + + + init_sdword (); + + for (i = 0; i < NUM; i++) + dst.x[i] = _mm_haddq_epi32 (src1.x[i]); + + if (check_dword2qword()) + abort (); + +} diff --git a/gcc/testsuite/gcc.target/i386/xop-hadduX.c b/gcc/testsuite/gcc.target/i386/xop-hadduX.c new file mode 100644 index 00000000000..9c7ea9a2a60 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/xop-hadduX.c @@ -0,0 +1,207 @@ +/* { dg-do run } */ +/* { dg-require-effective-target xop } */ +/* { dg-options "-O2 -mxop" } */ + +#include "xop-check.h" + +#include <x86intrin.h> +#include <string.h> + +#define NUM 10 + +union +{ + __m128i x[NUM]; + unsigned char ssi[NUM * 16]; + unsigned short si[NUM * 8]; + unsigned int li[NUM * 4]; + unsigned long long lli[NUM * 2]; +} dst, res, src1; + +static void +init_byte () +{ + int i; + for (i=0; i < NUM * 16; i++) + src1.ssi[i] = i; +} + +static void +init_word () +{ + int i; + for (i=0; i < NUM * 8; i++) + src1.si[i] = i; +} + + +static void +init_dword () +{ + int i; + for (i=0; i < NUM * 4; i++) + src1.li[i] = i; +} + +static int +check_byte2word () +{ + int i, j, s, t, check_fails = 0; + for (i = 0; i < NUM * 16; i = i + 16) + { + for (j = 0; j < 8; j++) + { + t = i + (2 * j); + s = (i / 2) + j; + res.si[s] = src1.ssi[t] + src1.ssi[t + 1] ; + if (res.si[s] != dst.si[s]) + check_fails++; + } + } +} + +static int +check_byte2dword () +{ + int i, j, s, t, check_fails = 0; + for (i = 0; i < NUM * 16; i = i + 16) + { + for (j = 0; j < 4; j++) + { + t = i + (4 * j); + s = (i / 4) + j; + res.li[s] = (src1.ssi[t] + src1.ssi[t + 1]) + (src1.ssi[t + 2] + + src1.ssi[t + 3]); + if (res.li[s] != dst.li[s]) + check_fails++; + } + } + return check_fails++; +} + +static int +check_byte2qword () +{ + int i, j, s, t, check_fails = 0; + for (i = 0; i < NUM * 16; i = i + 16) + { + for (j = 0; j < 2; j++) + { + t = i + (8 * j); + s = (i / 8) + j; + res.lli[s] = ((src1.ssi[t] + src1.ssi[t + 1]) + (src1.ssi[t + 2] + + src1.ssi[t + 3])) + ((src1.ssi[t + 4] + src1.ssi[t +5]) + + (src1.ssi[t + 6] + src1.ssi[t + 7])); + if (res.lli[s] != dst.lli[s]) + check_fails++; + } + } + return check_fails++; +} + +static int +check_word2dword () +{ + int i, j, s, t, check_fails = 0; + for (i = 0; i < (NUM * 8); i = i + 8) + { + for (j = 0; j < 4; j++) + { + t = i + (2 * j); + s = (i / 2) + j; + res.li[s] = src1.si[t] + src1.si[t + 1] ; + if (res.li[s] != dst.li[s]) + check_fails++; + } + } +} + +static int +check_word2qword () +{ + int i, j, s, t, check_fails = 0; + for (i = 0; i < NUM * 8; i = i + 8) + { + for (j = 0; j < 2; j++) + { + t = i + (4 * j); + s = (i / 4) + j; + res.lli[s] = (src1.si[t] + src1.si[t + 1]) + (src1.si[t + 2] + + src1.si[t + 3]); + if (res.lli[s] != dst.lli[s]) + check_fails++; + } + } + return check_fails++; +} + +static int +check_dword2qword () +{ + int i, j, s, t, check_fails = 0; + for (i = 0; i < (NUM * 4); i = i + 4) + { + for (j = 0; j < 2; j++) + { + t = i + (2 * j); + s = (i / 2) + j; + res.lli[s] = src1.li[t] + src1.li[t + 1] ; + if (res.lli[s] != dst.lli[s]) + check_fails++; + } + } +} + +static void +xop_test (void) +{ + int i; + + /* Check haddubw */ + init_byte (); + + for (i = 0; i < NUM; i++) + dst.x[i] = _mm_haddw_epu8 (src1.x[i]); + + if (check_byte2word()) + abort (); + + /* Check haddubd */ + for (i = 0; i < (NUM ); i++) + dst.x[i] = _mm_haddd_epu8 (src1.x[i]); + + if (check_byte2dword()) + abort (); + + /* Check haddubq */ + for (i = 0; i < NUM; i++) + dst.x[i] = _mm_haddq_epu8 (src1.x[i]); + + if (check_byte2qword()) + abort (); + + /* Check hadduwd */ + init_word (); + + for (i = 0; i < (NUM ); i++) + dst.x[i] = _mm_haddd_epu16 (src1.x[i]); + + if (check_word2dword()) + abort (); + + /* Check haddbuwq */ + + for (i = 0; i < NUM; i++) + dst.x[i] = _mm_haddq_epu16 (src1.x[i]); + + if (check_word2qword()) + abort (); + + /* Check hadudq */ + init_dword (); + for (i = 0; i < NUM; i++) + dst.x[i] = _mm_haddq_epu32 (src1.x[i]); + + if (check_dword2qword()) + abort (); +} diff --git a/gcc/testsuite/gcc.target/i386/xop-hsubX.c b/gcc/testsuite/gcc.target/i386/xop-hsubX.c new file mode 100644 index 00000000000..f0fa9b312f2 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/xop-hsubX.c @@ -0,0 +1,128 @@ +/* { dg-do run } */ +/* { dg-require-effective-target xop } */ +/* { dg-options "-O2 -mxop" } */ + +#include "xop-check.h" + +#include <x86intrin.h> +#include <string.h> + +#define NUM 10 + +union +{ + __m128i x[NUM]; + signed char ssi[NUM * 16]; + short si[NUM * 8]; + int li[NUM * 4]; + long long lli[NUM * 2]; +} dst, res, src1; + +static void +init_sbyte () +{ + int i; + for (i=0; i < NUM * 16; i++) + src1.ssi[i] = i; +} + +static void +init_sword () +{ + int i; + for (i=0; i < NUM * 8; i++) + src1.si[i] = i; +} + + +static void +init_sdword () +{ + int i; + for (i=0; i < NUM * 4; i++) + src1.li[i] = i; +} + +static int +check_sbyte2word () +{ + int i, j, s, t, check_fails = 0; + for (i = 0; i < NUM * 16; i = i + 16) + { + for (j = 0; j < 8; j++) + { + t = i + (2 * j); + s = (i / 2) + j; + res.si[s] = src1.ssi[t] - src1.ssi[t + 1] ; + if (res.si[s] != dst.si[s]) + check_fails++; + } + } +} + +static int +check_sword2dword () +{ + int i, j, s, t, check_fails = 0; + for (i = 0; i < (NUM * 8); i = i + 8) + { + for (j = 0; j < 4; j++) + { + t = i + (2 * j); + s = (i / 2) + j; + res.li[s] = src1.si[t] - src1.si[t + 1] ; + if (res.li[s] != dst.li[s]) + check_fails++; + } + } +} + +static int +check_dword2qword () +{ + int i, j, s, t, check_fails = 0; + for (i = 0; i < (NUM * 4); i = i + 4) + { + for (j = 0; j < 2; j++) + { + t = i + (2 * j); + s = (i / 2) + j; + res.lli[s] = src1.li[t] - src1.li[t + 1] ; + if (res.lli[s] != dst.lli[s]) + check_fails++; + } + } +} + +static void +xop_test (void) +{ + int i; + + /* Check hsubbw */ + init_sbyte (); + + for (i = 0; i < NUM; i++) + dst.x[i] = _mm_hsubw_epi8 (src1.x[i]); + + if (check_sbyte2word()) + abort (); + + + /* Check hsubwd */ + init_sword (); + + for (i = 0; i < (NUM ); i++) + dst.x[i] = _mm_hsubd_epi16 (src1.x[i]); + + if (check_sword2dword()) + abort (); + + /* Check hsubdq */ + init_sdword (); + for (i = 0; i < NUM; i++) + dst.x[i] = _mm_hsubq_epi32 (src1.x[i]); + + if (check_dword2qword()) + abort (); +} diff --git a/gcc/testsuite/gcc.target/i386/xop-imul32widen-vector.c b/gcc/testsuite/gcc.target/i386/xop-imul32widen-vector.c new file mode 100644 index 00000000000..0406d023df5 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/xop-imul32widen-vector.c @@ -0,0 +1,36 @@ +/* Test that the compiler properly optimizes floating point multiply and add + instructions vector into pmacsdd/etc. on XOP systems. */ + +/* { dg-do compile } */ +/* { dg-require-effective-target lp64 } */ +/* { dg-options "-O2 -mxop -ftree-vectorize" } */ + +extern void exit (int); + +typedef long __m128i __attribute__ ((__vector_size__ (16), __may_alias__)); + +#define SIZE 10240 + +union { + __m128i i_align; + int i32[SIZE]; + long i64[SIZE]; +} a, b, c, d; + +void +imul32_to_64 (void) +{ + int i; + + for (i = 0; i < SIZE; i++) + a.i64[i] = ((long)b.i32[i]) * ((long)c.i32[i]); +} + +int main () +{ + imul32_to_64 (); + exit (0); +} + +/* { dg-final { scan-assembler "vpmacsdql" } } */ +/* { dg-final { scan-assembler "vpmacsdqh" } } */ diff --git a/gcc/testsuite/gcc.target/i386/xop-imul64-vector.c b/gcc/testsuite/gcc.target/i386/xop-imul64-vector.c new file mode 100644 index 00000000000..738cac04105 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/xop-imul64-vector.c @@ -0,0 +1,36 @@ +/* Test that the compiler properly optimizes floating point multiply and add + instructions vector into pmacsdd/etc. on XOP systems. */ + +/* { dg-do compile } */ +/* { dg-require-effective-target lp64 } */ +/* { dg-options "-O2 -mxop -ftree-vectorize" } */ + +extern void exit (int); + +typedef long __m128i __attribute__ ((__vector_size__ (16), __may_alias__)); + +#define SIZE 10240 + +union { + __m128i i_align; + long i64[SIZE]; +} a, b, c, d; + +void +imul64 (void) +{ + int i; + + for (i = 0; i < SIZE; i++) + a.i64[i] = b.i64[i] * c.i64[i]; +} + +int main () +{ + imul64 (); + exit (0); +} + +/* { dg-final { scan-assembler "vpmacsdd" } } */ +/* { dg-final { scan-assembler "vphadddq" } } */ +/* { dg-final { scan-assembler "vpmacsdql" } } */ diff --git a/gcc/testsuite/gcc.target/i386/xop-pcmov.c b/gcc/testsuite/gcc.target/i386/xop-pcmov.c new file mode 100644 index 00000000000..d6375b1fd50 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/xop-pcmov.c @@ -0,0 +1,23 @@ +/* Test that the compiler properly optimizes conditional floating point moves + into the pcmov instruction on XOP systems. */ + +/* { dg-do compile } */ +/* { dg-require-effective-target lp64 } */ +/* { dg-options "-O2 -mxop" } */ + +extern void exit (int); + +double dbl_test (double a, double b, double c, double d) +{ + return (a > b) ? c : d; +} + +double dbl_a = 1, dbl_b = 2, dbl_c = 3, dbl_d = 4, dbl_e; + +int main() +{ + dbl_e = dbl_test (dbl_a, dbl_b, dbl_c, dbl_d); + exit (0); +} + +/* { dg-final { scan-assembler "vpcmov" } } */ diff --git a/gcc/testsuite/gcc.target/i386/xop-pcmov2.c b/gcc/testsuite/gcc.target/i386/xop-pcmov2.c new file mode 100644 index 00000000000..617da39da98 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/xop-pcmov2.c @@ -0,0 +1,23 @@ +/* Test that the compiler properly optimizes conditional floating point moves + into the pcmov instruction on XOP systems. */ + +/* { dg-do compile } */ +/* { dg-require-effective-target lp64 } */ +/* { dg-options "-O2 -mxop" } */ + +extern void exit (int); + +float flt_test (float a, float b, float c, float d) +{ + return (a > b) ? c : d; +} + +float flt_a = 1, flt_b = 2, flt_c = 3, flt_d = 4, flt_e; + +int main() +{ + flt_e = flt_test (flt_a, flt_b, flt_c, flt_d); + exit (0); +} + +/* { dg-final { scan-assembler "vpcmov" } } */ diff --git a/gcc/testsuite/gcc.target/i386/xop-rotate1-vector.c b/gcc/testsuite/gcc.target/i386/xop-rotate1-vector.c new file mode 100644 index 00000000000..e3ae644d0b5 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/xop-rotate1-vector.c @@ -0,0 +1,35 @@ +/* Test that the compiler properly optimizes vector rotate instructions vector + into prot on XOP systems. */ + +/* { dg-do compile } */ +/* { dg-require-effective-target lp64 } */ +/* { dg-options "-O2 -mxop -ftree-vectorize" } */ + +extern void exit (int); + +typedef long __m128i __attribute__ ((__vector_size__ (16), __may_alias__)); + +#define SIZE 10240 + +union { + __m128i i_align; + unsigned u32[SIZE]; +} a, b, c; + +void +left_rotate32 (void) +{ + int i; + + for (i = 0; i < SIZE; i++) + a.u32[i] = (b.u32[i] << ((sizeof (int) * 8) - 4)) | (b.u32[i] >> 4); +} + +int +main () +{ + left_rotate32 (); + exit (0); +} + +/* { dg-final { scan-assembler "vprotd" } } */ diff --git a/gcc/testsuite/gcc.target/i386/xop-rotate2-vector.c b/gcc/testsuite/gcc.target/i386/xop-rotate2-vector.c new file mode 100644 index 00000000000..9996279bc0f --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/xop-rotate2-vector.c @@ -0,0 +1,35 @@ +/* Test that the compiler properly optimizes vector rotate instructions vector + into prot on XOP systems. */ + +/* { dg-do compile } */ +/* { dg-require-effective-target lp64 } */ +/* { dg-options "-O2 -mxop -ftree-vectorize" } */ + +extern void exit (int); + +typedef long __m128i __attribute__ ((__vector_size__ (16), __may_alias__)); + +#define SIZE 10240 + +union { + __m128i i_align; + unsigned u32[SIZE]; +} a, b, c; + +void +right_rotate32_b (void) +{ + int i; + + for (i = 0; i < SIZE; i++) + a.u32[i] = (b.u32[i] >> ((sizeof (int) * 8) - 4)) | (b.u32[i] << 4); +} + +int +main () +{ + right_rotate (); + exit (0); +} + +/* { dg-final { scan-assembler "vprot" } } */ diff --git a/gcc/testsuite/gcc.target/i386/xop-rotate3-vector.c b/gcc/testsuite/gcc.target/i386/xop-rotate3-vector.c new file mode 100644 index 00000000000..73d52f5f3f0 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/xop-rotate3-vector.c @@ -0,0 +1,34 @@ +/* Test that the compiler properly optimizes vector rotate instructions vector + into prot on XOP systems. */ + +/* { dg-do compile } */ +/* { dg-require-effective-target lp64 } */ +/* { dg-options "-O2 -mxop -ftree-vectorize" } */ + +extern void exit (int); + +typedef long __m128i __attribute__ ((__vector_size__ (16), __may_alias__)); + +#define SIZE 10240 + +union { + __m128i i_align; + unsigned u32[SIZE]; +} a, b, c; + +void +vector_rotate32 (void) +{ + int i; + + for (i = 0; i < SIZE; i++) + a.u32[i] = (b.u32[i] >> ((sizeof (int) * 8) - c.u32[i])) | (b.u32[i] << c.u32[i]); +} + +int main () +{ + vector_rotate32 (); + exit (0); +} + +/* { dg-final { scan-assembler "vprotd" } } */ diff --git a/gcc/testsuite/gcc.target/i386/xop-shift1-vector.c b/gcc/testsuite/gcc.target/i386/xop-shift1-vector.c new file mode 100644 index 00000000000..eb84439c496 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/xop-shift1-vector.c @@ -0,0 +1,35 @@ +/* Test that the compiler properly optimizes vector shift instructions into + psha/pshl on XOP systems. */ + +/* { dg-do compile } */ +/* { dg-require-effective-target lp64 } */ +/* { dg-options "-O2 -mxop -ftree-vectorize" } */ + +extern void exit (int); + +typedef long __m128i __attribute__ ((__vector_size__ (16), __may_alias__)); + +#define SIZE 10240 + +union { + __m128i i_align; + int i32[SIZE]; + unsigned u32[SIZE]; +} a, b, c; + +void +left_shift32 (void) +{ + int i; + + for (i = 0; i < SIZE; i++) + a.i32[i] = b.i32[i] << c.i32[i]; +} + +int main () +{ + left_shfit32 (); + exit (0); +} + +/* { dg-final { scan-assembler "vpshad" } } */ diff --git a/gcc/testsuite/gcc.target/i386/xop-shift2-vector.c b/gcc/testsuite/gcc.target/i386/xop-shift2-vector.c new file mode 100644 index 00000000000..e59c30d021b --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/xop-shift2-vector.c @@ -0,0 +1,35 @@ +/* Test that the compiler properly optimizes vector shift instructions into + psha/pshl on XOP systems. */ + +/* { dg-do compile } */ +/* { dg-require-effective-target lp64 } */ +/* { dg-options "-O2 -mxop -ftree-vectorize" } */ + +extern void exit (int); + +typedef long __m128i __attribute__ ((__vector_size__ (16), __may_alias__)); + +#define SIZE 10240 + +union { + __m128i i_align; + int i32[SIZE]; + unsigned u32[SIZE]; +} a, b, c; + +void +right_sign_shift32 (void) +{ + int i; + + for (i = 0; i < SIZE; i++) + a.i32[i] = b.i32[i] >> c.i32[i]; +} + +int main () +{ + right_sign_shfit32 (); + exit (0); +} + +/* { dg-final { scan-assembler "vpshad" } } */ diff --git a/gcc/testsuite/gcc.target/i386/xop-shift3-vector.c b/gcc/testsuite/gcc.target/i386/xop-shift3-vector.c new file mode 100644 index 00000000000..2b9302db52d --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/xop-shift3-vector.c @@ -0,0 +1,35 @@ +/* Test that the compiler properly optimizes vector shift instructions into + psha/pshl on XOP systems. */ + +/* { dg-do compile } */ +/* { dg-require-effective-target lp64 } */ +/* { dg-options "-O2 -mxop -ftree-vectorize" } */ + +extern void exit (int); + +typedef long __m128i __attribute__ ((__vector_size__ (16), __may_alias__)); + +#define SIZE 10240 + +union { + __m128i i_align; + int i32[SIZE]; + unsigned u32[SIZE]; +} a, b, c; + +void +right_uns_shift32 (void) +{ + int i; + + for (i = 0; i < SIZE; i++) + a.u32[i] = b.u32[i] >> c.i32[i]; +} + +int main () +{ + right_uns_shfit32 (); + exit (0); +} + +/* { dg-final { scan-assembler "vpshld" } } */ diff --git a/gcc/testsuite/gcc.target/m68k/pr41302.c b/gcc/testsuite/gcc.target/m68k/pr41302.c new file mode 100644 index 00000000000..c3679923e65 --- /dev/null +++ b/gcc/testsuite/gcc.target/m68k/pr41302.c @@ -0,0 +1,14 @@ +/* { dg-do compile } */ +/* { dg-options "-O2" } */ +/* { dg-final { scan-assembler "move.l \%d0,\%a0" { target *-*-*linux* } } } */ + +struct pts { + int c; +}; + +unsigned int bar (struct pts *a, int b); + +struct pts * foo (struct pts *a, int b) +{ + return (struct pts *) bar (a, b); +} diff --git a/gcc/testsuite/gcc.target/powerpc/altivec-33.c b/gcc/testsuite/gcc.target/powerpc/altivec-33.c new file mode 100644 index 00000000000..c1c935a1c59 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/altivec-33.c @@ -0,0 +1,16 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target powerpc_altivec_ok } */ +/* { dg-options "-O2 -maltivec" } */ + +/* We should only produce one vspltw as we already splatted the value. */ +/* { dg-final { scan-assembler-times "vspltw" 1 } } */ + +#include <altivec.h> + +vector float f(vector float a) +{ + vector float b = vec_splat (a, 2); + return vec_splat (b, 0); +} + + diff --git a/gcc/testsuite/gcc.target/powerpc/vsx-vectorize-3.c b/gcc/testsuite/gcc.target/powerpc/vsx-vectorize-3.c index b7e1329a151..b99bcca49f4 100644 --- a/gcc/testsuite/gcc.target/powerpc/vsx-vectorize-3.c +++ b/gcc/testsuite/gcc.target/powerpc/vsx-vectorize-3.c @@ -54,7 +54,7 @@ int main (void) } /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */ -/* { dg-final { scan-tree-dump-times "Alignment of access forced using peeling" 1 "vect" {xfail {! vect_hw_misalign } } } } */ -/* { dg-final { scan-tree-dump-times "Alignment of access forced using versioning" 0 "vect" {xfail {! vect_hw_misalign } } } } */ +/* { dg-final { scan-tree-dump-times "Alignment of access forced using peeling" 1 "vect" } } */ +/* { dg-final { scan-tree-dump-times "Alignment of access forced using versioning" 0 "vect" } } */ /* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 2 "vect" } } */ /* { dg-final { cleanup-tree-dump "vect" } } */ diff --git a/gcc/testsuite/gcc.target/powerpc/vsx-vectorize-5.c b/gcc/testsuite/gcc.target/powerpc/vsx-vectorize-5.c index a5449ffdd9e..32d05b29829 100644 --- a/gcc/testsuite/gcc.target/powerpc/vsx-vectorize-5.c +++ b/gcc/testsuite/gcc.target/powerpc/vsx-vectorize-5.c @@ -54,7 +54,7 @@ int main (void) } /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */ -/* { dg-final { scan-tree-dump-times "Alignment of access forced using peeling" 1 "vect" {xfail {! vect_hw_misalign } } } } */ -/* { dg-final { scan-tree-dump-times "Alignment of access forced using versioning" 0 "vect" {xfail {! vect_hw_misalign } } } } */ +/* { dg-final { scan-tree-dump-times "Alignment of access forced using peeling" 1 "vect" } } */ +/* { dg-final { scan-tree-dump-times "Alignment of access forced using versioning" 0 "vect" } } */ /* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 2 "vect" } } */ /* { dg-final { cleanup-tree-dump "vect" } } */ diff --git a/gcc/testsuite/gcc.target/rx/builtins.c b/gcc/testsuite/gcc.target/rx/builtins.c index 07448024b44..2a6241d7cce 100644 --- a/gcc/testsuite/gcc.target/rx/builtins.c +++ b/gcc/testsuite/gcc.target/rx/builtins.c @@ -17,7 +17,6 @@ to correctly set the psw flags. */ int saturate_add (int, int) __attribute__((__noinline__)); -int subtract_with_borrow (int, int, int) __attribute__((__noinline__)); int exchange (int, int) __attribute__((__noinline__)); int @@ -33,6 +32,13 @@ saturate_add (int arg1, int arg2) return __builtin_rx_sat (arg1); } +int +exchange (int arg1, int arg2) +{ + arg1 = __builtin_rx_xchg (arg2); + return arg1; +} + long multiply_and_accumulate (long arg1, long arg2, long arg3) { @@ -157,3 +163,9 @@ rmpa (int * multiplicand, int * multiplier, int num) { __builtin_rx_rmpa (); } + +void +set_interrupts (void) +{ + __builtin_mvtipl (3); +} diff --git a/gcc/testsuite/gcc.target/rx/interrupts.c b/gcc/testsuite/gcc.target/rx/interrupts.c index 910e870f11b..cdc4903ded8 100644 --- a/gcc/testsuite/gcc.target/rx/interrupts.c +++ b/gcc/testsuite/gcc.target/rx/interrupts.c @@ -1,10 +1,10 @@ /* { dg-do compile } */ -/* { dg-options "-mint-register=3" } */ +/* { dg-options "-mint-register=3 -msave-acc-in-interrupts" } */ /* Verify that the RX specific function attributes work. */ +void fast_interrupt (void) __attribute__((__fast_interrupt__)); void interrupt (void) __attribute__((__interrupt__)); -void exception (void) __attribute__((__exception__)); int naked (int) __attribute__((__naked__)); int flag = 0; @@ -13,16 +13,16 @@ int flag = 0; by the -fixed-xxx gcc command line option. Returns via RTFI. */ void -interrupt (void) +fast_interrupt (void) { flag = 1; } -/* Exception handler. Must preserve any register it uses, even +/* Interrupt handler. Must preserve any register it uses, even call clobbered ones. Returns via RTE. */ void -exception (void) +interrupt (void) { switch (flag) { diff --git a/gcc/testsuite/gcc.target/rx/rx-abi-function-tests.c b/gcc/testsuite/gcc.target/rx/rx-abi-function-tests.c index 0c4ec3f6b05..e07ff71a007 100644 --- a/gcc/testsuite/gcc.target/rx/rx-abi-function-tests.c +++ b/gcc/testsuite/gcc.target/rx/rx-abi-function-tests.c @@ -1,6 +1,6 @@ /* { dg-do run } */ /* { dg-options "-msim" } */ -/* Note: The -msim abiove is actually there to override the default +/* Note: The -msim above is actually there to override the default options which include -ansi -pendantic and -Wlong-long... */ extern int printf (const char *, ...); diff --git a/gcc/testsuite/gfortran.dg/class_11.f03 b/gcc/testsuite/gfortran.dg/class_11.f03 new file mode 100644 index 00000000000..bf80c4e00e6 --- /dev/null +++ b/gcc/testsuite/gfortran.dg/class_11.f03 @@ -0,0 +1,37 @@ +! { dg-do compile } +! +! PR 41556 +! Contributed by Damian Rouson <damian@rouson.net> + + implicit none + + type ,abstract :: object + contains + procedure(assign_interface) ,deferred :: assign + generic :: assignment(=) => assign + end type + + abstract interface + subroutine assign_interface(lhs,rhs) + import :: object + class(object) ,intent(inout) :: lhs + class(object) ,intent(in) :: rhs + end subroutine + end interface + +! PR 41937 +! Contributed by Juergen Reuter <reuter@physik.uni-freiburg.de> + + type, abstract :: cuba_abstract_type + integer :: dim_f = 1 + real, dimension(:), allocatable :: integral + end type cuba_abstract_type + +contains + + subroutine cuba_abstract_alloc_dim_f(this) + class(cuba_abstract_type) :: this + allocate(this%integral(this%dim_f)) + end subroutine cuba_abstract_alloc_dim_f + +end diff --git a/gcc/testsuite/gfortran.dg/class_12.f03 b/gcc/testsuite/gfortran.dg/class_12.f03 new file mode 100644 index 00000000000..56c68a57787 --- /dev/null +++ b/gcc/testsuite/gfortran.dg/class_12.f03 @@ -0,0 +1,45 @@ +! { dg-do compile } +! +! PR 41556: [OOP] Errors in applying operator/assignment to an abstract type +! +! Contributed by Damian Rouson <damian@rouson.net> + +module abstract_algebra + implicit none + private + public :: rescale + public :: object + + type ,abstract :: object + contains + procedure(assign_interface) ,deferred :: assign + procedure(product_interface) ,deferred :: product + generic :: assignment(=) => assign + generic :: operator(*) => product + end type + + abstract interface + function product_interface(lhs,rhs) result(product) + import :: object + class(object) ,intent(in) :: lhs + class(object) ,allocatable :: product + real ,intent(in) :: rhs + end function + subroutine assign_interface(lhs,rhs) + import :: object + class(object) ,intent(inout) :: lhs + class(object) ,intent(in) :: rhs + end subroutine + end interface + +contains + + subroutine rescale(operand,scale) + class(object) :: operand + real ,intent(in) :: scale + operand = operand*scale + operand = operand%product(scale) + end subroutine +end module + +! { dg-final { cleanup-modules "abstract_algebra" } } diff --git a/gcc/testsuite/gfortran.dg/interface_abstract_4.f90 b/gcc/testsuite/gfortran.dg/interface_abstract_4.f90 new file mode 100644 index 00000000000..50f101577e6 --- /dev/null +++ b/gcc/testsuite/gfortran.dg/interface_abstract_4.f90 @@ -0,0 +1,35 @@ +! { dg-do compile } +! +! PR 41873: Bogus Error: ABSTRACT INTERFACE must not be referenced... +! +! Contributed by Harald Anlauf <anlauf@gmx.de> + + implicit none + + type, abstract :: abstype + contains + procedure(f), nopass, deferred :: f_bound + procedure(s), nopass, deferred :: s_bound + end type + + abstract interface + real function f () + end function + end interface + + abstract interface + subroutine s + end subroutine + end interface + +contains + + subroutine cg (c) + class(abstype) :: c + print *, f() ! { dg-error "must not be referenced" } + call s ! { dg-error "must not be referenced" } + print *, c%f_bound () + call c%s_bound () + end subroutine + +end diff --git a/gcc/testsuite/gfortran.dg/missing_optional_dummy_6.f90 b/gcc/testsuite/gfortran.dg/missing_optional_dummy_6.f90 new file mode 100644 index 00000000000..408582289f1 --- /dev/null +++ b/gcc/testsuite/gfortran.dg/missing_optional_dummy_6.f90 @@ -0,0 +1,60 @@ +! { dg-do run } +! { dg-options "-fdump-tree-original" } +! +! PR fortran/41907 +! +program test + implicit none + call scalar1 () + call assumed_shape1 () + call explicit_shape1 () +contains + + ! Calling functions + subroutine scalar1 (slr1) + integer, optional :: slr1 + call scalar2 (slr1) + end subroutine scalar1 + + subroutine assumed_shape1 (as1) + integer, dimension(:), optional :: as1 + call assumed_shape2 (as1) + call explicit_shape2 (as1) + end subroutine assumed_shape1 + + subroutine explicit_shape1 (es1) + integer, dimension(5), optional :: es1 + call assumed_shape2 (es1) + call explicit_shape2 (es1) + end subroutine explicit_shape1 + + + ! Called functions + subroutine assumed_shape2 (as2) + integer, dimension(:),optional :: as2 + if (present (as2)) call abort() + end subroutine assumed_shape2 + + subroutine explicit_shape2 (es2) + integer, dimension(5),optional :: es2 + if (present (es2)) call abort() + end subroutine explicit_shape2 + + subroutine scalar2 (slr2) + integer, optional :: slr2 + if (present (slr2)) call abort() + end subroutine scalar2 + +end program test + +! { dg-final { scan-tree-dump-times "scalar2 \\(slr1" 1 "original" } } + +! { dg-final { scan-tree-dump-times "= es1 != 0B" 1 "original" } } +! { dg-final { scan-tree-dump-times "assumed_shape2 \\(es1" 0 "original" } } +! { dg-final { scan-tree-dump-times "explicit_shape2 \\(es1" 1 "original" } } + +! { dg-final { scan-tree-dump-times "= as1 != 0B" 2 "original" } } +! { dg-final { scan-tree-dump-times "assumed_shape2 \\(as1" 0 "original" } } +! { dg-final { scan-tree-dump-times "explicit_shape2 \\(as1" 0 "original" } } + +! { dg-final { cleanup-tree-dump "original" } } diff --git a/gcc/testsuite/gfortran.dg/vect/vect-2.f90 b/gcc/testsuite/gfortran.dg/vect/vect-2.f90 index bc904a04917..0f45a70c53b 100644 --- a/gcc/testsuite/gfortran.dg/vect/vect-2.f90 +++ b/gcc/testsuite/gfortran.dg/vect/vect-2.f90 @@ -15,7 +15,7 @@ END ! support unaligned loads). ! { dg-final { scan-tree-dump-times "vectorized 3 loops" 1 "vect" } } -! { dg-final { scan-tree-dump-times "Alignment of access forced using peeling" 3 "vect" { xfail { { vect_no_align || { ! vect_hw_misalign } } || { ! vector_alignment_reachable } } } } } +! { dg-final { scan-tree-dump-times "Alignment of access forced using peeling" 3 "vect" { xfail { vect_no_align || { ! vector_alignment_reachable } } } } } ! { dg-final { scan-tree-dump-times "Alignment of access forced using peeling" 2 "vect" { target { vect_no_align && { ! vector_alignment_reachable } } } } } ! { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 2 "vect" { xfail { vect_no_align } } } } ! { dg-final { scan-tree-dump-times "Alignment of access forced using versioning." 3 "vect" {target { vect_no_align || { { ! vector_alignment_reachable } && { ! vect_hw_misalign } } } } } } diff --git a/gcc/testsuite/gfortran.dg/vect/vect-5.f90 b/gcc/testsuite/gfortran.dg/vect/vect-5.f90 index 25eb88e2cab..72776a6fb49 100644 --- a/gcc/testsuite/gfortran.dg/vect/vect-5.f90 +++ b/gcc/testsuite/gfortran.dg/vect/vect-5.f90 @@ -36,7 +36,7 @@ end ! { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } -! { dg-final { scan-tree-dump-times "Alignment of access forced using peeling" 1 "vect" { xfail { { vect_no_align || {! vect_hw_misalign } } || {! vector_alignment_reachable} } } } } +! { dg-final { scan-tree-dump-times "Alignment of access forced using peeling" 1 "vect" { xfail { vect_no_align || {! vector_alignment_reachable} } } } } ! { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 1 "vect" { xfail { vect_no_align } } } } ! { dg-final { scan-tree-dump-times "Alignment of access forced using versioning." 2 "vect" { target { vect_no_align } } } } ! { dg-final { scan-tree-dump-times "Alignment of access forced using versioning." 1 "vect" { target { {! vector_alignment_reachable} && {! vect_hw_misalign} } } } } diff --git a/gcc/testsuite/gnat.dg/stack_check1.adb b/gcc/testsuite/gnat.dg/stack_check1.adb new file mode 100644 index 00000000000..51ee1a633b3 --- /dev/null +++ b/gcc/testsuite/gnat.dg/stack_check1.adb @@ -0,0 +1,38 @@ +-- { dg-do run } +-- { dg-options "-fstack-check" } + +-- This test requires architecture- and OS-specific support code for unwinding +-- through signal frames (typically located in *-unwind.h) to pass. Feel free +-- to disable it if this code hasn't been implemented yet. + +procedure Stack_Check1 is + + type A is Array (1..2048) of Integer; + + procedure Consume_Stack (N : Integer) is + My_A : A; -- 8 KB static + begin + My_A (1) := 0; + if N <= 0 then + return; + end if; + Consume_Stack (N-1); + end; + + Task T; + + Task body T is + begin + begin + Consume_Stack (Integer'Last); + raise Program_Error; + exception + when Storage_Error => null; + end; + + Consume_Stack (128); + end; + +begin + null; +end; diff --git a/gcc/testsuite/gnat.dg/stack_check2.adb b/gcc/testsuite/gnat.dg/stack_check2.adb new file mode 100644 index 00000000000..4a3008ba02b --- /dev/null +++ b/gcc/testsuite/gnat.dg/stack_check2.adb @@ -0,0 +1,43 @@ +-- { dg-do run } +-- { dg-options "-fstack-check" } + +-- This test requires architecture- and OS-specific support code for unwinding +-- through signal frames (typically located in *-unwind.h) to pass. Feel free +-- to disable it if this code hasn't been implemented yet. + +procedure Stack_Check2 is + + function UB return Integer is + begin + return 2048; + end; + + type A is Array (Positive range <>) of Integer; + + procedure Consume_Stack (N : Integer) is + My_A : A (1..UB); -- 8 KB dynamic + begin + My_A (1) := 0; + if N <= 0 then + return; + end if; + Consume_Stack (N-1); + end; + + Task T; + + Task body T is + begin + begin + Consume_Stack (Integer'Last); + raise Program_Error; + exception + when Storage_Error => null; + end; + + Consume_Stack (128); + end; + +begin + null; +end; diff --git a/gcc/tree-sra.c b/gcc/tree-sra.c index 12b19909db9..67001a64564 100644 --- a/gcc/tree-sra.c +++ b/gcc/tree-sra.c @@ -144,7 +144,9 @@ struct access points to the first one. */ struct access *first_child; - /* Pointer to the next sibling in the access tree as described above. */ + /* In intraprocedural SRA, pointer to the next sibling in the access tree as + described above. In IPA-SRA this is a pointer to the next access + belonging to the same group (having the same representative). */ struct access *next_sibling; /* Pointers to the first and last element in the linked list of assign @@ -2824,33 +2826,28 @@ analyze_modified_params (VEC (access_p, heap) *representatives) repr; repr = repr->next_grp) { - VEC (access_p, heap) *access_vec; - int j, access_count; - tree parm; + struct access *access; + bitmap visited; + ao_ref ar; if (no_accesses_p (repr)) continue; - parm = repr->base; - if (!POINTER_TYPE_P (TREE_TYPE (parm)) + if (!POINTER_TYPE_P (TREE_TYPE (repr->base)) || repr->grp_maybe_modified) continue; - access_vec = get_base_access_vector (parm); - access_count = VEC_length (access_p, access_vec); - for (j = 0; j < access_count; j++) + ao_ref_init (&ar, repr->expr); + visited = BITMAP_ALLOC (NULL); + for (access = repr; access; access = access->next_sibling) { - struct access *access; - ao_ref ar; - /* All accesses are read ones, otherwise grp_maybe_modified would be trivially set. */ - access = VEC_index (access_p, access_vec, j); - ao_ref_init (&ar, access->expr); walk_aliased_vdefs (&ar, gimple_vuse (access->stmt), - mark_maybe_modified, repr, NULL); + mark_maybe_modified, repr, &visited); if (repr->grp_maybe_modified) break; } + BITMAP_FREE (visited); } } } @@ -3019,24 +3016,30 @@ static struct access * unmodified_by_ref_scalar_representative (tree parm) { int i, access_count; - struct access *access; + struct access *repr; VEC (access_p, heap) *access_vec; access_vec = get_base_access_vector (parm); gcc_assert (access_vec); - access_count = VEC_length (access_p, access_vec); + repr = VEC_index (access_p, access_vec, 0); + if (repr->write) + return NULL; + repr->group_representative = repr; - for (i = 0; i < access_count; i++) + access_count = VEC_length (access_p, access_vec); + for (i = 1; i < access_count; i++) { - access = VEC_index (access_p, access_vec, i); + struct access *access = VEC_index (access_p, access_vec, i); if (access->write) return NULL; + access->group_representative = repr; + access->next_sibling = repr->next_sibling; + repr->next_sibling = access; } - access = VEC_index (access_p, access_vec, 0); - access->grp_read = 1; - access->grp_scalar_ptr = 1; - return access; + repr->grp_read = 1; + repr->grp_scalar_ptr = 1; + return repr; } /* Sort collected accesses for parameter PARM, identify representatives for @@ -3091,6 +3094,9 @@ splice_param_accesses (tree parm, bool *ro_grp) return NULL; modification |= ac2->write; + ac2->group_representative = access; + ac2->next_sibling = access->next_sibling; + access->next_sibling = ac2; j++; } diff --git a/gcc/tree-ssa-math-opts.c b/gcc/tree-ssa-math-opts.c index f0fcc2b0a21..c0ddc8afa30 100644 --- a/gcc/tree-ssa-math-opts.c +++ b/gcc/tree-ssa-math-opts.c @@ -531,7 +531,9 @@ execute_cse_reciprocals (void) || DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD)) { enum built_in_function code; - bool md_code; + bool md_code, fail; + imm_use_iterator ui; + use_operand_p use_p; code = DECL_FUNCTION_CODE (fndecl); md_code = DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD; @@ -540,12 +542,36 @@ execute_cse_reciprocals (void) if (!fndecl) continue; + /* Check that all uses of the SSA name are divisions, + otherwise replacing the defining statement will do + the wrong thing. */ + fail = false; + FOR_EACH_IMM_USE_FAST (use_p, ui, arg1) + { + gimple stmt2 = USE_STMT (use_p); + if (is_gimple_debug (stmt2)) + continue; + if (!is_gimple_assign (stmt2) + || gimple_assign_rhs_code (stmt2) != RDIV_EXPR + || gimple_assign_rhs1 (stmt2) == arg1 + || gimple_assign_rhs2 (stmt2) != arg1) + { + fail = true; + break; + } + } + if (fail) + continue; + gimple_call_set_fndecl (stmt1, fndecl); update_stmt (stmt1); - gimple_assign_set_rhs_code (stmt, MULT_EXPR); - fold_stmt_inplace (stmt); - update_stmt (stmt); + FOR_EACH_IMM_USE_STMT (stmt, ui, arg1) + { + gimple_assign_set_rhs_code (stmt, MULT_EXPR); + fold_stmt_inplace (stmt); + update_stmt (stmt); + } } } } diff --git a/gcc/tree-vect-data-refs.c b/gcc/tree-vect-data-refs.c index b4c1f401bb5..c13c2750270 100644 --- a/gcc/tree-vect-data-refs.c +++ b/gcc/tree-vect-data-refs.c @@ -1176,7 +1176,6 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo) { stmt = DR_STMT (dr); stmt_info = vinfo_for_stmt (stmt); - supportable_dr_alignment = vect_supportable_dr_alignment (dr); /* For interleaving, only the alignment of the first access matters. */ @@ -1184,7 +1183,7 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo) && DR_GROUP_FIRST_DR (stmt_info) != stmt) continue; - if (!aligned_access_p (dr)) + if (!DR_IS_READ (dr) && !aligned_access_p (dr)) { do_peeling = vector_alignment_reachable_p (dr); if (do_peeling) diff --git a/gcc/tree-vrp.c b/gcc/tree-vrp.c index 0cb227abbd1..5adc0da84da 100644 --- a/gcc/tree-vrp.c +++ b/gcc/tree-vrp.c @@ -6748,13 +6748,9 @@ test_for_singularity (enum tree_code cond_code, tree op0, value range information we have for op0. */ if (min && max) { - if (compare_values (vr->min, min) == -1) - min = min; - else + if (compare_values (vr->min, min) == 1) min = vr->min; - if (compare_values (vr->max, max) == 1) - max = max; - else + if (compare_values (vr->max, max) == -1) max = vr->max; /* If the new min/max values have converged to a single value, diff --git a/gcc/tree.c b/gcc/tree.c index 60797254fa1..f3970dd3a55 100644 --- a/gcc/tree.c +++ b/gcc/tree.c @@ -9009,7 +9009,8 @@ build_common_builtin_nodes (void) tmp = tree_cons (NULL_TREE, size_type_node, void_list_node); ftype = build_function_type (ptr_type_node, tmp); local_define_builtin ("__builtin_alloca", ftype, BUILT_IN_ALLOCA, - "alloca", ECF_NOTHROW | ECF_MALLOC); + "alloca", + ECF_MALLOC | (flag_stack_check ? 0 : ECF_NOTHROW)); } tmp = tree_cons (NULL_TREE, ptr_type_node, void_list_node); diff --git a/gcc/unwind-dw2.c b/gcc/unwind-dw2.c index 82958b0086a..2208f17dc1d 100644 --- a/gcc/unwind-dw2.c +++ b/gcc/unwind-dw2.c @@ -1559,7 +1559,13 @@ uw_install_context_1 (struct _Unwind_Context *current, static inline _Unwind_Ptr uw_identify_context (struct _Unwind_Context *context) { - return _Unwind_GetCFA (context); + /* The CFA is not sufficient to disambiguate the context of a function + interrupted by a signal before establishing its frame and the context + of the signal itself. */ + if (STACK_GROWS_DOWNWARD) + return _Unwind_GetCFA (context) - _Unwind_IsSignalFrame (context); + else + return _Unwind_GetCFA (context) + _Unwind_IsSignalFrame (context); } diff --git a/gcc/varasm.c b/gcc/varasm.c index b6ff4ae149d..c9953d128da 100644 --- a/gcc/varasm.c +++ b/gcc/varasm.c @@ -4322,6 +4322,10 @@ initializer_constant_valid_p (tree value, tree endtype) case POINTER_PLUS_EXPR: case PLUS_EXPR: + /* Any valid floating-point constants will have been folded by now; + with -frounding-math we hit this with addition of two constants. */ + if (TREE_CODE (endtype) == REAL_TYPE) + return NULL_TREE; if (! INTEGRAL_TYPE_P (endtype) || TYPE_PRECISION (endtype) >= int_or_pointer_precision (TREE_TYPE (value))) @@ -4345,6 +4349,8 @@ initializer_constant_valid_p (tree value, tree endtype) break; case MINUS_EXPR: + if (TREE_CODE (endtype) == REAL_TYPE) + return NULL_TREE; if (! INTEGRAL_TYPE_P (endtype) || TYPE_PRECISION (endtype) >= int_or_pointer_precision (TREE_TYPE (value))) @@ -4560,8 +4566,8 @@ output_constant (tree exp, unsigned HOST_WIDE_INT size, unsigned int align) case REAL_TYPE: if (TREE_CODE (exp) != REAL_CST) error ("initializer for floating value is not a floating constant"); - - assemble_real (TREE_REAL_CST (exp), TYPE_MODE (TREE_TYPE (exp)), align); + else + assemble_real (TREE_REAL_CST (exp), TYPE_MODE (TREE_TYPE (exp)), align); break; case COMPLEX_TYPE: diff --git a/include/ChangeLog b/include/ChangeLog index 6cff29517f6..000eeda414f 100644 --- a/include/ChangeLog +++ b/include/ChangeLog @@ -1,3 +1,9 @@ +2009-11-06 Jonas Maebe <jonas.maebe@elis.ugent.be> + + Add DWARF attribute value for the "Borland fastcall" calling + convention. + * elf/dwarf2.h: Add DW_CC_GNU_borland_fastcall_i386 constant. + 2009-10-23 Kai Tietz <kai.tietz@onevision.com> * splay-tree.h (libi_uhostptr_t): Add gcc specific diff --git a/include/dwarf2.h b/include/dwarf2.h index 7a8e030a2bf..559b82d7876 100644 --- a/include/dwarf2.h +++ b/include/dwarf2.h @@ -721,7 +721,8 @@ enum dwarf_calling_convention DW_CC_lo_user = 0x40, DW_CC_hi_user = 0xff, - DW_CC_GNU_renesas_sh = 0x40 + DW_CC_GNU_renesas_sh = 0x40, + DW_CC_GNU_borland_fastcall_i386 = 0x41 }; /* Inline attribute. */ diff --git a/libstdc++-v3/ChangeLog b/libstdc++-v3/ChangeLog index 589671b470b..ec39612f8f5 100644 --- a/libstdc++-v3/ChangeLog +++ b/libstdc++-v3/ChangeLog @@ -1,3 +1,71 @@ +2009-11-06 Paolo Carlini <paolo.carlini@oracle.com> + + * include/parallel/multiway_merge.h: Simple formatting and + uglification fixes. + * include/parallel/find_selectors.h: Likewise. + * include/parallel/losertree.h: Likewise. + * include/parallel/list_partition.h: Likewise. + * include/parallel/for_each.h: Likewise. + * include/parallel/multiseq_selection.h: Likewise. + * include/parallel/workstealing.h: Likewise. + * include/parallel/par_loop.h: Likewise. + * include/parallel/numeric: Likewise. + * include/parallel/quicksort.h: Likewise. + * include/parallel/equally_split.h: Likewise. + * include/parallel/omp_loop_static.h: Likewise. + * include/parallel/random_shuffle.h: Likewise. + * include/parallel/balanced_quicksort.h: Likewise. + * include/parallel/tags.h: Likewise. + * include/parallel/set_operations.h: Likewise. + * include/parallel/merge.h: Likewise. + * include/parallel/unique_copy.h: Likewise. + * include/parallel/multiway_mergesort.h: Likewise. + * include/parallel/search.h: Likewise. + * include/parallel/partition.h: Likewise. + * include/parallel/partial_sum.h: Likewise. + * include/parallel/find.h: Likewise. + * include/parallel/queue.h: Likewise. + * include/parallel/omp_loop.h: Likewise. + * include/parallel/checkers.h: Likewise. + * include/parallel/sort.h: Likewise. + +2009-11-06 Jonathan Wakely <jwakely.gcc@gmail.com> + + PR libstdc++/41949 + * include/std/ostream: Adjust link. + +2009-11-05 Paolo Carlini <paolo.carlini@oracle.com> + + * include/parallel/multiway_merge.h: Simple formatting and + uglification fixes. + * include/parallel/losertree.h: Likewise. + * include/parallel/base.h: Likewise. + * include/parallel/par_loop.h: Likewise. + * include/parallel/omp_loop_static.h: Likewise. + * include/parallel/multiway_mergesort.h: Likewise. + * include/parallel/partial_sum.h: Likewise. + * include/parallel/omp_loop.h: Likewise. + +2009-11-04 Benjamin Kosnik <bkoz@redhat.com> + + * testsuite/25_algorithms/fill/5.cc: Move... + * testsuite/25_algorithms/fill_n/1.cc: ...here. + +2009-11-04 Paolo Carlini <paolo.carlini@oracle.com> + + * include/parallel/multiway_merge.h: Simple formatting and + uglification fixes. + * include/parallel/losertree.h: Likewise. + +2009-11-03 David Krauss <potswa@mac.com> + Paolo Carlini <paolo.carlini@oracle.com> + + PR libstdc++/41351 + * include/bits/stl_algo.h (__rotate(_RandomAccessIterator, + _RandomAccessIterator, _RandomAccessIterator, + random_access_iterator_tag)): Rewrite to use only std::swap in + general and std::copy/std::copy_backward when safe. + 2009-11-02 Benjamin Kosnik <bkoz@redhat.com> * include/std/future: Use base class with nested types. diff --git a/libstdc++-v3/include/bits/stl_algo.h b/libstdc++-v3/include/bits/stl_algo.h index 70cde1a4bf0..9b6f2afb9ec 100644 --- a/libstdc++-v3/include/bits/stl_algo.h +++ b/libstdc++-v3/include/bits/stl_algo.h @@ -1647,53 +1647,64 @@ _GLIBCXX_BEGIN_NAMESPACE(std) typedef typename iterator_traits<_RandomAccessIterator>::value_type _ValueType; - const _Distance __n = __last - __first; - const _Distance __k = __middle - __first; - const _Distance __l = __n - __k; + _Distance __n = __last - __first; + _Distance __k = __middle - __first; - if (__k == __l) + if (__k == __n - __k) { std::swap_ranges(__first, __middle, __middle); return; } - const _Distance __d = std::__gcd(__n, __k); + _RandomAccessIterator __p = __first; - for (_Distance __i = 0; __i < __d; __i++) + for (;;) { - _ValueType __tmp = _GLIBCXX_MOVE(*__first); - _RandomAccessIterator __p = __first; - - if (__k < __l) + if (__k < __n - __k) { - for (_Distance __j = 0; __j < __l / __d; __j++) + if (__is_pod(_ValueType) && __k == 1) + { + _ValueType __t = _GLIBCXX_MOVE(*__p); + _GLIBCXX_MOVE3(__p + 1, __p + __n, __p); + *(__p + __n - 1) = _GLIBCXX_MOVE(__t); + return; + } + _RandomAccessIterator __q = __p + __k; + for (_Distance __i = 0; __i < __n - __k; ++ __i) { - if (__p > __first + __l) - { - *__p = _GLIBCXX_MOVE(*(__p - __l)); - __p -= __l; - } - - *__p = _GLIBCXX_MOVE(*(__p + __k)); - __p += __k; + std::iter_swap(__p, __q); + ++__p; + ++__q; } + __n %= __k; + if (__n == 0) + return; + std::swap(__n, __k); + __k = __n - __k; } else { - for (_Distance __j = 0; __j < __k / __d - 1; __j ++) + __k = __n - __k; + if (__is_pod(_ValueType) && __k == 1) { - if (__p < __last - __k) - { - *__p = _GLIBCXX_MOVE(*(__p + __k)); - __p += __k; - } - *__p = _GLIBCXX_MOVE(*(__p - __l)); - __p -= __l; + _ValueType __t = _GLIBCXX_MOVE(*(__p + __n - 1)); + _GLIBCXX_MOVE_BACKWARD3(__p, __p + __n - 1, __p + __n); + *__p = _GLIBCXX_MOVE(__t); + return; } + _RandomAccessIterator __q = __p + __n; + __p = __q - __k; + for (_Distance __i = 0; __i < __n - __k; ++ __i) + { + --__p; + --__q; + std::iter_swap(__p, __q); + } + __n %= __k; + if (__n == 0) + return; + std::swap(__n, __k); } - - *__p = _GLIBCXX_MOVE(__tmp); - ++__first; } } diff --git a/libstdc++-v3/include/parallel/balanced_quicksort.h b/libstdc++-v3/include/parallel/balanced_quicksort.h index 85e4d699e26..3107d97a45b 100644 --- a/libstdc++-v3/include/parallel/balanced_quicksort.h +++ b/libstdc++-v3/include/parallel/balanced_quicksort.h @@ -57,436 +57,435 @@ namespace __gnu_parallel { -/** @brief Information local to one thread in the parallel quicksort run. */ -template<typename _RAIter> - struct _QSBThreadLocal - { - typedef std::iterator_traits<_RAIter> _TraitsType; - typedef typename _TraitsType::difference_type _DifferenceType; - - /** @brief Continuous part of the sequence, described by an - iterator pair. */ - typedef std::pair<_RAIter, _RAIter> _Piece; - - /** @brief Initial piece to work on. */ - _Piece _M_initial; - - /** @brief Work-stealing queue. */ - _RestrictedBoundedConcurrentQueue<_Piece> _M_leftover_parts; - - /** @brief Number of threads involved in this algorithm. */ - _ThreadIndex _M_num_threads; - - /** @brief Pointer to a counter of elements left over to sort. */ - volatile _DifferenceType* _M_elements_leftover; - - /** @brief The complete sequence to sort. */ - _Piece _M_global; - - /** @brief Constructor. - * @param __queue_size size of the work-stealing queue. */ - _QSBThreadLocal(int __queue_size) : _M_leftover_parts(__queue_size) { } - }; - -/** @brief Balanced quicksort divide step. - * @param __begin Begin iterator of subsequence. - * @param __end End iterator of subsequence. - * @param __comp Comparator. - * @param __num_threads Number of threads that are allowed to work on - * this part. - * @pre @__c (__end-__begin)>=1 */ -template<typename _RAIter, typename _Compare> - typename std::iterator_traits<_RAIter>::difference_type - __qsb_divide(_RAIter __begin, _RAIter __end, - _Compare __comp, _ThreadIndex __num_threads) - { - _GLIBCXX_PARALLEL_ASSERT(__num_threads > 0); - - typedef std::iterator_traits<_RAIter> _TraitsType; - typedef typename _TraitsType::value_type _ValueType; - typedef typename _TraitsType::difference_type _DifferenceType; - - _RAIter __pivot_pos = - __median_of_three_iterators(__begin, __begin + (__end - __begin) / 2, - __end - 1, __comp); + /** @brief Information local to one thread in the parallel quicksort run. */ + template<typename _RAIter> + struct _QSBThreadLocal + { + typedef std::iterator_traits<_RAIter> _TraitsType; + typedef typename _TraitsType::difference_type _DifferenceType; + + /** @brief Continuous part of the sequence, described by an + iterator pair. */ + typedef std::pair<_RAIter, _RAIter> _Piece; + + /** @brief Initial piece to work on. */ + _Piece _M_initial; + + /** @brief Work-stealing queue. */ + _RestrictedBoundedConcurrentQueue<_Piece> _M_leftover_parts; + + /** @brief Number of threads involved in this algorithm. */ + _ThreadIndex _M_num_threads; + + /** @brief Pointer to a counter of elements left over to sort. */ + volatile _DifferenceType* _M_elements_leftover; + + /** @brief The complete sequence to sort. */ + _Piece _M_global; + + /** @brief Constructor. + * @param __queue_size size of the work-stealing queue. */ + _QSBThreadLocal(int __queue_size) : _M_leftover_parts(__queue_size) { } + }; + + /** @brief Balanced quicksort divide step. + * @param __begin Begin iterator of subsequence. + * @param __end End iterator of subsequence. + * @param __comp Comparator. + * @param __num_threads Number of threads that are allowed to work on + * this part. + * @pre @__c (__end-__begin)>=1 */ + template<typename _RAIter, typename _Compare> + typename std::iterator_traits<_RAIter>::difference_type + __qsb_divide(_RAIter __begin, _RAIter __end, + _Compare __comp, _ThreadIndex __num_threads) + { + _GLIBCXX_PARALLEL_ASSERT(__num_threads > 0); + + typedef std::iterator_traits<_RAIter> _TraitsType; + typedef typename _TraitsType::value_type _ValueType; + typedef typename _TraitsType::difference_type _DifferenceType; + + _RAIter __pivot_pos = + __median_of_three_iterators(__begin, __begin + (__end - __begin) / 2, + __end - 1, __comp); #if defined(_GLIBCXX_ASSERTIONS) - // Must be in between somewhere. - _DifferenceType __n = __end - __begin; - - _GLIBCXX_PARALLEL_ASSERT( - (!__comp(*__pivot_pos, *__begin) && - !__comp(*(__begin + __n / 2), *__pivot_pos)) - || (!__comp(*__pivot_pos, *__begin) && - !__comp(*(__end - 1), *__pivot_pos)) - || (!__comp(*__pivot_pos, *(__begin + __n / 2)) && - !__comp(*__begin, *__pivot_pos)) - || (!__comp(*__pivot_pos, *(__begin + __n / 2)) && - !__comp(*(__end - 1), *__pivot_pos)) - || (!__comp(*__pivot_pos, *(__end - 1)) && - !__comp(*__begin, *__pivot_pos)) - || (!__comp(*__pivot_pos, *(__end - 1)) && - !__comp(*(__begin + __n / 2), *__pivot_pos))); + // Must be in between somewhere. + _DifferenceType __n = __end - __begin; + + _GLIBCXX_PARALLEL_ASSERT((!__comp(*__pivot_pos, *__begin) + && !__comp(*(__begin + __n / 2), + *__pivot_pos)) + || (!__comp(*__pivot_pos, *__begin) + && !__comp(*(__end - 1), *__pivot_pos)) + || (!__comp(*__pivot_pos, *(__begin + __n / 2)) + && !__comp(*__begin, *__pivot_pos)) + || (!__comp(*__pivot_pos, *(__begin + __n / 2)) + && !__comp(*(__end - 1), *__pivot_pos)) + || (!__comp(*__pivot_pos, *(__end - 1)) + && !__comp(*__begin, *__pivot_pos)) + || (!__comp(*__pivot_pos, *(__end - 1)) + && !__comp(*(__begin + __n / 2), + *__pivot_pos))); #endif - // Swap pivot value to end. - if (__pivot_pos != (__end - 1)) - std::swap(*__pivot_pos, *(__end - 1)); - __pivot_pos = __end - 1; + // Swap pivot value to end. + if (__pivot_pos != (__end - 1)) + std::swap(*__pivot_pos, *(__end - 1)); + __pivot_pos = __end - 1; - __gnu_parallel::binder2nd<_Compare, _ValueType, _ValueType, bool> - __pred(__comp, *__pivot_pos); + __gnu_parallel::binder2nd<_Compare, _ValueType, _ValueType, bool> + __pred(__comp, *__pivot_pos); - // Divide, returning __end - __begin - 1 in the worst case. - _DifferenceType __split_pos = __parallel_partition( - __begin, __end - 1, __pred, __num_threads); + // Divide, returning __end - __begin - 1 in the worst case. + _DifferenceType __split_pos = __parallel_partition(__begin, __end - 1, + __pred, + __num_threads); - // Swap back pivot to middle. - std::swap(*(__begin + __split_pos), *__pivot_pos); - __pivot_pos = __begin + __split_pos; + // Swap back pivot to middle. + std::swap(*(__begin + __split_pos), *__pivot_pos); + __pivot_pos = __begin + __split_pos; #if _GLIBCXX_ASSERTIONS - _RAIter __r; - for (__r = __begin; __r != __pivot_pos; ++__r) - _GLIBCXX_PARALLEL_ASSERT(__comp(*__r, *__pivot_pos)); - for (; __r != __end; ++__r) - _GLIBCXX_PARALLEL_ASSERT(!__comp(*__r, *__pivot_pos)); + _RAIter __r; + for (__r = __begin; __r != __pivot_pos; ++__r) + _GLIBCXX_PARALLEL_ASSERT(__comp(*__r, *__pivot_pos)); + for (; __r != __end; ++__r) + _GLIBCXX_PARALLEL_ASSERT(!__comp(*__r, *__pivot_pos)); #endif - return __split_pos; - } - -/** @brief Quicksort conquer step. - * @param __tls Array of thread-local storages. - * @param __begin Begin iterator of subsequence. - * @param __end End iterator of subsequence. - * @param __comp Comparator. - * @param __iam Number of the thread processing this function. - * @param __num_threads - * Number of threads that are allowed to work on this part. */ -template<typename _RAIter, typename _Compare> - void - __qsb_conquer(_QSBThreadLocal<_RAIter>** __tls, - _RAIter __begin, _RAIter __end, - _Compare __comp, - _ThreadIndex __iam, _ThreadIndex __num_threads, - bool __parent_wait) - { - typedef std::iterator_traits<_RAIter> _TraitsType; - typedef typename _TraitsType::value_type _ValueType; - typedef typename _TraitsType::difference_type _DifferenceType; - - _DifferenceType __n = __end - __begin; - - if (__num_threads <= 1 || __n <= 1) - { - __tls[__iam]->_M_initial.first = __begin; - __tls[__iam]->_M_initial.second = __end; + return __split_pos; + } - __qsb_local_sort_with_helping(__tls, __comp, __iam, __parent_wait); + /** @brief Quicksort conquer step. + * @param __tls Array of thread-local storages. + * @param __begin Begin iterator of subsequence. + * @param __end End iterator of subsequence. + * @param __comp Comparator. + * @param __iam Number of the thread processing this function. + * @param __num_threads + * Number of threads that are allowed to work on this part. */ + template<typename _RAIter, typename _Compare> + void + __qsb_conquer(_QSBThreadLocal<_RAIter>** __tls, + _RAIter __begin, _RAIter __end, + _Compare __comp, + _ThreadIndex __iam, _ThreadIndex __num_threads, + bool __parent_wait) + { + typedef std::iterator_traits<_RAIter> _TraitsType; + typedef typename _TraitsType::value_type _ValueType; + typedef typename _TraitsType::difference_type _DifferenceType; - return; - } + _DifferenceType __n = __end - __begin; + + if (__num_threads <= 1 || __n <= 1) + { + __tls[__iam]->_M_initial.first = __begin; + __tls[__iam]->_M_initial.second = __end; + + __qsb_local_sort_with_helping(__tls, __comp, __iam, __parent_wait); + + return; + } - // Divide step. - _DifferenceType __split_pos = - __qsb_divide(__begin, __end, __comp, __num_threads); + // Divide step. + _DifferenceType __split_pos = + __qsb_divide(__begin, __end, __comp, __num_threads); #if _GLIBCXX_ASSERTIONS - _GLIBCXX_PARALLEL_ASSERT(0 <= __split_pos && - __split_pos < (__end - __begin)); + _GLIBCXX_PARALLEL_ASSERT(0 <= __split_pos && + __split_pos < (__end - __begin)); #endif - _ThreadIndex __num_threads_leftside = - std::max<_ThreadIndex>(1, std::min<_ThreadIndex>( - __num_threads - 1, __split_pos * __num_threads / __n)); + _ThreadIndex + __num_threads_leftside = std::max<_ThreadIndex> + (1, std::min<_ThreadIndex>(__num_threads - 1, __split_pos + * __num_threads / __n)); -# pragma omp atomic - *__tls[__iam]->_M_elements_leftover -= (_DifferenceType)1; +# pragma omp atomic + *__tls[__iam]->_M_elements_leftover -= (_DifferenceType)1; - // Conquer step. -# pragma omp parallel num_threads(2) - { - bool __wait; - if(omp_get_num_threads() < 2) - __wait = false; - else - __wait = __parent_wait; - -# pragma omp sections - { + // Conquer step. +# pragma omp parallel num_threads(2) + { + bool __wait; + if(omp_get_num_threads() < 2) + __wait = false; + else + __wait = __parent_wait; + +# pragma omp sections + { # pragma omp section - { - __qsb_conquer(__tls, __begin, __begin + __split_pos, __comp, - __iam, - __num_threads_leftside, - __wait); - __wait = __parent_wait; - } - // The pivot_pos is left in place, to ensure termination. + { + __qsb_conquer(__tls, __begin, __begin + __split_pos, __comp, + __iam, __num_threads_leftside, __wait); + __wait = __parent_wait; + } + // The pivot_pos is left in place, to ensure termination. # pragma omp section - { - __qsb_conquer(__tls, __begin + __split_pos + 1, __end, __comp, - __iam + __num_threads_leftside, - __num_threads - __num_threads_leftside, - __wait); - __wait = __parent_wait; - } - } + { + __qsb_conquer(__tls, __begin + __split_pos + 1, __end, __comp, + __iam + __num_threads_leftside, + __num_threads - __num_threads_leftside, __wait); + __wait = __parent_wait; + } + } + } } - } - -/** - * @brief Quicksort step doing load-balanced local sort. - * @param __tls Array of thread-local storages. - * @param __comp Comparator. - * @param __iam Number of the thread processing this function. - */ -template<typename _RAIter, typename _Compare> - void - __qsb_local_sort_with_helping(_QSBThreadLocal<_RAIter>** __tls, - _Compare& __comp, int __iam, bool __wait) - { - typedef std::iterator_traits<_RAIter> _TraitsType; - typedef typename _TraitsType::value_type _ValueType; - typedef typename _TraitsType::difference_type _DifferenceType; - typedef std::pair<_RAIter, _RAIter> _Piece; - - _QSBThreadLocal<_RAIter>& __tl = *__tls[__iam]; - - _DifferenceType __base_case_n = - _Settings::get().sort_qsb_base_case_maximal_n; - if (__base_case_n < 2) - __base_case_n = 2; - _ThreadIndex __num_threads = __tl._M_num_threads; - - // Every thread has its own random number generator. - _RandomNumber __rng(__iam + 1); - - _Piece __current = __tl._M_initial; - - _DifferenceType __elements_done = 0; + + /** + * @brief Quicksort step doing load-balanced local sort. + * @param __tls Array of thread-local storages. + * @param __comp Comparator. + * @param __iam Number of the thread processing this function. + */ + template<typename _RAIter, typename _Compare> + void + __qsb_local_sort_with_helping(_QSBThreadLocal<_RAIter>** __tls, + _Compare& __comp, int __iam, bool __wait) + { + typedef std::iterator_traits<_RAIter> _TraitsType; + typedef typename _TraitsType::value_type _ValueType; + typedef typename _TraitsType::difference_type _DifferenceType; + typedef std::pair<_RAIter, _RAIter> _Piece; + + _QSBThreadLocal<_RAIter>& __tl = *__tls[__iam]; + + _DifferenceType + __base_case_n = _Settings::get().sort_qsb_base_case_maximal_n; + if (__base_case_n < 2) + __base_case_n = 2; + _ThreadIndex __num_threads = __tl._M_num_threads; + + // Every thread has its own random number generator. + _RandomNumber __rng(__iam + 1); + + _Piece __current = __tl._M_initial; + + _DifferenceType __elements_done = 0; #if _GLIBCXX_ASSERTIONS - _DifferenceType __total_elements_done = 0; + _DifferenceType __total_elements_done = 0; #endif - for (;;) - { - // Invariant: __current must be a valid (maybe empty) range. - _RAIter __begin = __current.first, __end = __current.second; - _DifferenceType __n = __end - __begin; - - if (__n > __base_case_n) - { - // Divide. - _RAIter __pivot_pos = __begin + __rng(__n); - - // Swap __pivot_pos value to end. - if (__pivot_pos != (__end - 1)) - std::swap(*__pivot_pos, *(__end - 1)); - __pivot_pos = __end - 1; - - __gnu_parallel::binder2nd - <_Compare, _ValueType, _ValueType, bool> - __pred(__comp, *__pivot_pos); - - // Divide, leave pivot unchanged in last place. - _RAIter __split_pos1, __split_pos2; - __split_pos1 = - __gnu_sequential::partition(__begin, __end - 1, __pred); - - // Left side: < __pivot_pos; __right side: >= __pivot_pos. + for (;;) + { + // Invariant: __current must be a valid (maybe empty) range. + _RAIter __begin = __current.first, __end = __current.second; + _DifferenceType __n = __end - __begin; + + if (__n > __base_case_n) + { + // Divide. + _RAIter __pivot_pos = __begin + __rng(__n); + + // Swap __pivot_pos value to end. + if (__pivot_pos != (__end - 1)) + std::swap(*__pivot_pos, *(__end - 1)); + __pivot_pos = __end - 1; + + __gnu_parallel::binder2nd + <_Compare, _ValueType, _ValueType, bool> + __pred(__comp, *__pivot_pos); + + // Divide, leave pivot unchanged in last place. + _RAIter __split_pos1, __split_pos2; + __split_pos1 = __gnu_sequential::partition(__begin, __end - 1, + __pred); + + // Left side: < __pivot_pos; __right side: >= __pivot_pos. #if _GLIBCXX_ASSERTIONS - _GLIBCXX_PARALLEL_ASSERT(__begin <= __split_pos1 - && __split_pos1 < __end); + _GLIBCXX_PARALLEL_ASSERT(__begin <= __split_pos1 + && __split_pos1 < __end); #endif - // Swap pivot back to middle. - if (__split_pos1 != __pivot_pos) - std::swap(*__split_pos1, *__pivot_pos); - __pivot_pos = __split_pos1; - - // In case all elements are equal, __split_pos1 == 0. - if ((__split_pos1 + 1 - __begin) < (__n >> 7) - || (__end - __split_pos1) < (__n >> 7)) - { - // Very unequal split, one part smaller than one 128th - // elements not strictly larger than the pivot. - __gnu_parallel::__unary_negate<__gnu_parallel::__binder1st - <_Compare, _ValueType, _ValueType, bool>, _ValueType> - __pred(__gnu_parallel::__binder1st - <_Compare, _ValueType, _ValueType, bool>( - __comp, *__pivot_pos)); - - // Find other end of pivot-equal range. - __split_pos2 = __gnu_sequential::partition(__split_pos1 + 1, - __end, __pred); - } - else - // Only skip the pivot. - __split_pos2 = __split_pos1 + 1; - - // Elements equal to pivot are done. - __elements_done += (__split_pos2 - __split_pos1); + // Swap pivot back to middle. + if (__split_pos1 != __pivot_pos) + std::swap(*__split_pos1, *__pivot_pos); + __pivot_pos = __split_pos1; + + // In case all elements are equal, __split_pos1 == 0. + if ((__split_pos1 + 1 - __begin) < (__n >> 7) + || (__end - __split_pos1) < (__n >> 7)) + { + // Very unequal split, one part smaller than one 128th + // elements not strictly larger than the pivot. + __gnu_parallel::__unary_negate<__gnu_parallel::__binder1st + <_Compare, _ValueType, _ValueType, bool>, _ValueType> + __pred(__gnu_parallel::__binder1st + <_Compare, _ValueType, _ValueType, bool> + (__comp, *__pivot_pos)); + + // Find other end of pivot-equal range. + __split_pos2 = __gnu_sequential::partition(__split_pos1 + 1, + __end, __pred); + } + else + // Only skip the pivot. + __split_pos2 = __split_pos1 + 1; + + // Elements equal to pivot are done. + __elements_done += (__split_pos2 - __split_pos1); #if _GLIBCXX_ASSERTIONS - __total_elements_done += (__split_pos2 - __split_pos1); + __total_elements_done += (__split_pos2 - __split_pos1); #endif - // Always push larger part onto stack. - if (((__split_pos1 + 1) - __begin) < (__end - (__split_pos2))) - { - // Right side larger. - if ((__split_pos2) != __end) - __tl._M_leftover_parts.push_front( - std::make_pair(__split_pos2, __end)); - - //__current.first = __begin; //already set anyway - __current.second = __split_pos1; - continue; - } - else - { - // Left side larger. - if (__begin != __split_pos1) - __tl._M_leftover_parts.push_front(std::make_pair(__begin, - __split_pos1)); - - __current.first = __split_pos2; - //__current.second = __end; //already set anyway - continue; - } - } - else - { - __gnu_sequential::sort(__begin, __end, __comp); - __elements_done += __n; + // Always push larger part onto stack. + if (((__split_pos1 + 1) - __begin) < (__end - (__split_pos2))) + { + // Right side larger. + if ((__split_pos2) != __end) + __tl._M_leftover_parts.push_front + (std::make_pair(__split_pos2, __end)); + + //__current.first = __begin; //already set anyway + __current.second = __split_pos1; + continue; + } + else + { + // Left side larger. + if (__begin != __split_pos1) + __tl._M_leftover_parts.push_front(std::make_pair + (__begin, __split_pos1)); + + __current.first = __split_pos2; + //__current.second = __end; //already set anyway + continue; + } + } + else + { + __gnu_sequential::sort(__begin, __end, __comp); + __elements_done += __n; #if _GLIBCXX_ASSERTIONS - __total_elements_done += __n; + __total_elements_done += __n; #endif - // Prefer own stack, small pieces. - if (__tl._M_leftover_parts.pop_front(__current)) - continue; + // Prefer own stack, small pieces. + if (__tl._M_leftover_parts.pop_front(__current)) + continue; -# pragma omp atomic - *__tl._M_elements_leftover -= __elements_done; +# pragma omp atomic + *__tl._M_elements_leftover -= __elements_done; - __elements_done = 0; + __elements_done = 0; #if _GLIBCXX_ASSERTIONS - double __search_start = omp_get_wtime(); + double __search_start = omp_get_wtime(); #endif - // Look for new work. - bool __successfully_stolen = false; - while (__wait && *__tl._M_elements_leftover > 0 - && !__successfully_stolen + // Look for new work. + bool __successfully_stolen = false; + while (__wait && *__tl._M_elements_leftover > 0 + && !__successfully_stolen #if _GLIBCXX_ASSERTIONS - // Possible dead-lock. - && (omp_get_wtime() < (__search_start + 1.0)) + // Possible dead-lock. + && (omp_get_wtime() < (__search_start + 1.0)) #endif - ) - { - _ThreadIndex __victim; - __victim = __rng(__num_threads); - - // Large pieces. - __successfully_stolen = (__victim != __iam) - && __tls[__victim]->_M_leftover_parts.pop_back(__current); - if (!__successfully_stolen) - __yield(); + ) + { + _ThreadIndex __victim; + __victim = __rng(__num_threads); + + // Large pieces. + __successfully_stolen = (__victim != __iam) + && __tls[__victim]->_M_leftover_parts.pop_back(__current); + if (!__successfully_stolen) + __yield(); #if !defined(__ICC) && !defined(__ECC) -# pragma omp flush +# pragma omp flush #endif - } + } #if _GLIBCXX_ASSERTIONS - if (omp_get_wtime() >= (__search_start + 1.0)) - { - sleep(1); - _GLIBCXX_PARALLEL_ASSERT(omp_get_wtime() - < (__search_start + 1.0)); - } + if (omp_get_wtime() >= (__search_start + 1.0)) + { + sleep(1); + _GLIBCXX_PARALLEL_ASSERT(omp_get_wtime() + < (__search_start + 1.0)); + } #endif - if (!__successfully_stolen) - { + if (!__successfully_stolen) + { #if _GLIBCXX_ASSERTIONS - _GLIBCXX_PARALLEL_ASSERT(*__tl._M_elements_leftover == 0); + _GLIBCXX_PARALLEL_ASSERT(*__tl._M_elements_leftover == 0); #endif - return; - } - } - } - } - -/** @brief Top-level quicksort routine. - * @param __begin Begin iterator of sequence. - * @param __end End iterator of sequence. - * @param __comp Comparator. - * @param __num_threads Number of threads that are allowed to work on - * this part. - */ -template<typename _RAIter, typename _Compare> - void - __parallel_sort_qsb(_RAIter __begin, _RAIter __end, - _Compare __comp, - _ThreadIndex __num_threads) - { - _GLIBCXX_CALL(__end - __begin) - - typedef std::iterator_traits<_RAIter> _TraitsType; - typedef typename _TraitsType::value_type _ValueType; - typedef typename _TraitsType::difference_type _DifferenceType; - typedef std::pair<_RAIter, _RAIter> _Piece; - - typedef _QSBThreadLocal<_RAIter> _TLSType; - - _DifferenceType __n = __end - __begin; - - if (__n <= 1) - return; - - // At least one element per processor. - if (__num_threads > __n) - __num_threads = static_cast<_ThreadIndex>(__n); - - // Initialize thread local storage - _TLSType** __tls = new _TLSType*[__num_threads]; - _DifferenceType __queue_size = - __num_threads * (_ThreadIndex)(log2(__n) + 1); - for (_ThreadIndex __t = 0; __t < __num_threads; ++__t) - __tls[__t] = new _QSBThreadLocal<_RAIter>(__queue_size); - - // There can never be more than ceil(log2(__n)) ranges on the stack, - // because - // 1. Only one processor pushes onto the stack - // 2. The largest range has at most length __n - // 3. Each range is larger than half of the range remaining - volatile _DifferenceType _M_elements_leftover = __n; - for (int __i = 0; __i < __num_threads; ++__i) - { - __tls[__i]->_M_elements_leftover = &_M_elements_leftover; - __tls[__i]->_M_num_threads = __num_threads; - __tls[__i]->_M_global = std::make_pair(__begin, __end); - - // Just in case nothing is left to assign. - __tls[__i]->_M_initial = std::make_pair(__end, __end); - } + return; + } + } + } + } - // Main recursion call. - __qsb_conquer( - __tls, __begin, __begin + __n, __comp, 0, __num_threads, true); + /** @brief Top-level quicksort routine. + * @param __begin Begin iterator of sequence. + * @param __end End iterator of sequence. + * @param __comp Comparator. + * @param __num_threads Number of threads that are allowed to work on + * this part. + */ + template<typename _RAIter, typename _Compare> + void + __parallel_sort_qsb(_RAIter __begin, _RAIter __end, + _Compare __comp, _ThreadIndex __num_threads) + { + _GLIBCXX_CALL(__end - __begin) + + typedef std::iterator_traits<_RAIter> _TraitsType; + typedef typename _TraitsType::value_type _ValueType; + typedef typename _TraitsType::difference_type _DifferenceType; + typedef std::pair<_RAIter, _RAIter> _Piece; + + typedef _QSBThreadLocal<_RAIter> _TLSType; + + _DifferenceType __n = __end - __begin; + + if (__n <= 1) + return; + + // At least one element per processor. + if (__num_threads > __n) + __num_threads = static_cast<_ThreadIndex>(__n); + + // Initialize thread local storage + _TLSType** __tls = new _TLSType*[__num_threads]; + _DifferenceType __queue_size = (__num_threads + * (_ThreadIndex)(__rd_log2(__n) + 1)); + for (_ThreadIndex __t = 0; __t < __num_threads; ++__t) + __tls[__t] = new _QSBThreadLocal<_RAIter>(__queue_size); + + // There can never be more than ceil(__rd_log2(__n)) ranges on the + // stack, because + // 1. Only one processor pushes onto the stack + // 2. The largest range has at most length __n + // 3. Each range is larger than half of the range remaining + volatile _DifferenceType __elements_leftover = __n; + for (int __i = 0; __i < __num_threads; ++__i) + { + __tls[__i]->_M_elements_leftover = &__elements_leftover; + __tls[__i]->_M_num_threads = __num_threads; + __tls[__i]->_M_global = std::make_pair(__begin, __end); + + // Just in case nothing is left to assign. + __tls[__i]->_M_initial = std::make_pair(__end, __end); + } + + // Main recursion call. + __qsb_conquer(__tls, __begin, __begin + __n, __comp, 0, + __num_threads, true); #if _GLIBCXX_ASSERTIONS - // All stack must be empty. - _Piece __dummy; - for (int __i = 1; __i < __num_threads; ++__i) - _GLIBCXX_PARALLEL_ASSERT( - !__tls[__i]->_M_leftover_parts.pop_back(__dummy)); + // All stack must be empty. + _Piece __dummy; + for (int __i = 1; __i < __num_threads; ++__i) + _GLIBCXX_PARALLEL_ASSERT( + !__tls[__i]->_M_leftover_parts.pop_back(__dummy)); #endif - for (int __i = 0; __i < __num_threads; ++__i) - delete __tls[__i]; - delete[] __tls; - } + for (int __i = 0; __i < __num_threads; ++__i) + delete __tls[__i]; + delete[] __tls; + } } // namespace __gnu_parallel #endif /* _GLIBCXX_PARALLEL_BALANCED_QUICKSORT_H */ diff --git a/libstdc++-v3/include/parallel/base.h b/libstdc++-v3/include/parallel/base.h index 6bdcedc206a..eee88bd2ce1 100644 --- a/libstdc++-v3/include/parallel/base.h +++ b/libstdc++-v3/include/parallel/base.h @@ -93,13 +93,13 @@ namespace __gnu_parallel __is_parallel(const _Parallelism __p) { return __p != sequential; } -/** @brief Calculates the rounded-down logarithm of @__c __n for base 2. - * @param __n Argument. - * @return Returns 0 for any argument <1. - */ -template<typename _Size> - inline _Size - __rd_log2(_Size __n) + /** @brief Calculates the rounded-down logarithm of @__c __n for base 2. + * @param __n Argument. + * @return Returns 0 for any argument <1. + */ + template<typename _Size> + inline _Size + __rd_log2(_Size __n) { _Size __k; for (__k = 0; __n > 1; __n >>= 1) @@ -107,356 +107,352 @@ template<typename _Size> return __k; } -/** @brief Encode two integers into one gnu_parallel::_CASable. - * @param __a First integer, to be encoded in the most-significant @__c - * _CASable_bits/2 bits. - * @param __b Second integer, to be encoded in the least-significant - * @__c _CASable_bits/2 bits. - * @return value encoding @__c __a and @__c __b. - * @see decode2 - */ -inline _CASable -__encode2(int __a, int __b) //must all be non-negative, actually -{ - return (((_CASable)__a) << (_CASable_bits / 2)) | (((_CASable)__b) << 0); -} - -/** @brief Decode two integers from one gnu_parallel::_CASable. - * @param __x __gnu_parallel::_CASable to decode integers from. - * @param __a First integer, to be decoded from the most-significant - * @__c _CASable_bits/2 bits of @__c __x. - * @param __b Second integer, to be encoded in the least-significant - * @__c _CASable_bits/2 bits of @__c __x. - * @see __encode2 - */ -inline void -decode2(_CASable __x, int& __a, int& __b) -{ - __a = (int)((__x >> (_CASable_bits / 2)) & _CASable_mask); - __b = (int)((__x >> 0 ) & _CASable_mask); -} - -//needed for parallel "numeric", even if "algorithm" not included - -/** @brief Equivalent to std::min. */ -template<typename _Tp> - const _Tp& - min(const _Tp& __a, const _Tp& __b) - { return (__a < __b) ? __a : __b; } - -/** @brief Equivalent to std::max. */ -template<typename _Tp> - const _Tp& - max(const _Tp& __a, const _Tp& __b) - { return (__a > __b) ? __a : __b; } - -/** @brief Constructs predicate for equality from strict weak - * ordering predicate - */ -template<typename _T1, typename _T2, typename _Compare> - class _EqualFromLess : public std::binary_function<_T1, _T2, bool> + /** @brief Encode two integers into one gnu_parallel::_CASable. + * @param __a First integer, to be encoded in the most-significant @__c + * _CASable_bits/2 bits. + * @param __b Second integer, to be encoded in the least-significant + * @__c _CASable_bits/2 bits. + * @return value encoding @__c __a and @__c __b. + * @see decode2 + */ + inline _CASable + __encode2(int __a, int __b) //must all be non-negative, actually { - private: - _Compare& _M_comp; + return (((_CASable)__a) << (_CASable_bits / 2)) | (((_CASable)__b) << 0); + } - public: - _EqualFromLess(_Compare& __comp) : _M_comp(__comp) { } + /** @brief Decode two integers from one gnu_parallel::_CASable. + * @param __x __gnu_parallel::_CASable to decode integers from. + * @param __a First integer, to be decoded from the most-significant + * @__c _CASable_bits/2 bits of @__c __x. + * @param __b Second integer, to be encoded in the least-significant + * @__c _CASable_bits/2 bits of @__c __x. + * @see __encode2 + */ + inline void + decode2(_CASable __x, int& __a, int& __b) + { + __a = (int)((__x >> (_CASable_bits / 2)) & _CASable_mask); + __b = (int)((__x >> 0 ) & _CASable_mask); + } - bool operator()(const _T1& __a, const _T2& __b) - { - return !_M_comp(__a, __b) && !_M_comp(__b, __a); - } - }; + //needed for parallel "numeric", even if "algorithm" not included + /** @brief Equivalent to std::min. */ + template<typename _Tp> + const _Tp& + min(const _Tp& __a, const _Tp& __b) + { return (__a < __b) ? __a : __b; } -/** @brief Similar to std::binder1st, - * but giving the argument types explicitly. */ -template<typename _Predicate, typename argument_type> - class __unary_negate - : public std::unary_function<argument_type, bool> - { - protected: - _Predicate _M_pred; - - public: - explicit - __unary_negate(const _Predicate& __x) : _M_pred(__x) { } - - bool - operator()(const argument_type& __x) - { return !_M_pred(__x); } - }; - -/** @brief Similar to std::binder1st, - * but giving the argument types explicitly. */ -template<typename _Operation, typename _FirstArgumentType, - typename _SecondArgumentType, typename _ResultType> - class __binder1st - : public std::unary_function<_SecondArgumentType, _ResultType> - { - protected: - _Operation _M_op; - _FirstArgumentType _M_value; - - public: - __binder1st(const _Operation& __x, - const _FirstArgumentType& __y) - : _M_op(__x), _M_value(__y) { } - - _ResultType - operator()(const _SecondArgumentType& __x) - { return _M_op(_M_value, __x); } - - // _GLIBCXX_RESOLVE_LIB_DEFECTS - // 109. Missing binders for non-const sequence elements - _ResultType - operator()(_SecondArgumentType& __x) const - { return _M_op(_M_value, __x); } - }; + /** @brief Equivalent to std::max. */ + template<typename _Tp> + const _Tp& + max(const _Tp& __a, const _Tp& __b) + { return (__a > __b) ? __a : __b; } + + /** @brief Constructs predicate for equality from strict weak + * ordering predicate + */ + template<typename _T1, typename _T2, typename _Compare> + class _EqualFromLess : public std::binary_function<_T1, _T2, bool> + { + private: + _Compare& _M_comp; -/** - * @brief Similar to std::binder2nd, but giving the argument types - * explicitly. - */ -template<typename _Operation, typename _FirstArgumentType, - typename _SecondArgumentType, typename _ResultType> - class binder2nd - : public std::unary_function<_FirstArgumentType, _ResultType> - { - protected: - _Operation _M_op; - _SecondArgumentType _M_value; - - public: - binder2nd(const _Operation& __x, - const _SecondArgumentType& __y) - : _M_op(__x), _M_value(__y) { } - - _ResultType - operator()(const _FirstArgumentType& __x) const - { return _M_op(__x, _M_value); } - - // _GLIBCXX_RESOLVE_LIB_DEFECTS - // 109. Missing binders for non-const sequence elements - _ResultType - operator()(_FirstArgumentType& __x) - { return _M_op(__x, _M_value); } - }; - -/** @brief Similar to std::equal_to, but allows two different types. */ -template<typename _T1, typename _T2> - struct _EqualTo : std::binary_function<_T1, _T2, bool> - { - bool operator()(const _T1& __t1, const _T2& __t2) const - { return __t1 == __t2; } - }; + public: + _EqualFromLess(_Compare& __comp) : _M_comp(__comp) { } -/** @brief Similar to std::less, but allows two different types. */ -template<typename _T1, typename _T2> - struct _Less : std::binary_function<_T1, _T2, bool> - { - bool - operator()(const _T1& __t1, const _T2& __t2) const - { return __t1 < __t2; } - - bool - operator()(const _T2& __t2, const _T1& __t1) const - { return __t2 < __t1; } - }; - -// Partial specialization for one type. Same as std::less. -template<typename _Tp> -struct _Less<_Tp, _Tp> : public std::binary_function<_Tp, _Tp, bool> - { - bool - operator()(const _Tp& __x, const _Tp& __y) const - { return __x < __y; } - }; + bool operator()(const _T1& __a, const _T2& __b) + { return !_M_comp(__a, __b) && !_M_comp(__b, __a); } + }; - /** @brief Similar to std::plus, but allows two different types. */ -template<typename _Tp1, typename _Tp2> - struct _Plus : public std::binary_function<_Tp1, _Tp2, _Tp1> - { - typedef __typeof__(*static_cast<_Tp1*>(NULL) - + *static_cast<_Tp2*>(NULL)) __result; + /** @brief Similar to std::binder1st, + * but giving the argument types explicitly. */ + template<typename _Predicate, typename argument_type> + class __unary_negate + : public std::unary_function<argument_type, bool> + { + protected: + _Predicate _M_pred; + + public: + explicit + __unary_negate(const _Predicate& __x) : _M_pred(__x) { } + + bool + operator()(const argument_type& __x) + { return !_M_pred(__x); } + }; + + /** @brief Similar to std::binder1st, + * but giving the argument types explicitly. */ + template<typename _Operation, typename _FirstArgumentType, + typename _SecondArgumentType, typename _ResultType> + class __binder1st + : public std::unary_function<_SecondArgumentType, _ResultType> + { + protected: + _Operation _M_op; + _FirstArgumentType _M_value; + + public: + __binder1st(const _Operation& __x, const _FirstArgumentType& __y) + : _M_op(__x), _M_value(__y) { } + + _ResultType + operator()(const _SecondArgumentType& __x) + { return _M_op(_M_value, __x); } + + // _GLIBCXX_RESOLVE_LIB_DEFECTS + // 109. Missing binders for non-const sequence elements + _ResultType + operator()(_SecondArgumentType& __x) const + { return _M_op(_M_value, __x); } + }; + + /** + * @brief Similar to std::binder2nd, but giving the argument types + * explicitly. + */ + template<typename _Operation, typename _FirstArgumentType, + typename _SecondArgumentType, typename _ResultType> + class binder2nd + : public std::unary_function<_FirstArgumentType, _ResultType> + { + protected: + _Operation _M_op; + _SecondArgumentType _M_value; + + public: + binder2nd(const _Operation& __x, const _SecondArgumentType& __y) + : _M_op(__x), _M_value(__y) { } + + _ResultType + operator()(const _FirstArgumentType& __x) const + { return _M_op(__x, _M_value); } + + // _GLIBCXX_RESOLVE_LIB_DEFECTS + // 109. Missing binders for non-const sequence elements + _ResultType + operator()(_FirstArgumentType& __x) + { return _M_op(__x, _M_value); } + }; + + /** @brief Similar to std::equal_to, but allows two different types. */ + template<typename _T1, typename _T2> + struct _EqualTo : std::binary_function<_T1, _T2, bool> + { + bool operator()(const _T1& __t1, const _T2& __t2) const + { return __t1 == __t2; } + }; - __result - operator()(const _Tp1& __x, const _Tp2& __y) const - { return __x + __y; } - }; + /** @brief Similar to std::less, but allows two different types. */ + template<typename _T1, typename _T2> + struct _Less : std::binary_function<_T1, _T2, bool> + { + bool + operator()(const _T1& __t1, const _T2& __t2) const + { return __t1 < __t2; } + + bool + operator()(const _T2& __t2, const _T1& __t1) const + { return __t2 < __t1; } + }; + + // Partial specialization for one type. Same as std::less. + template<typename _Tp> + struct _Less<_Tp, _Tp> : public std::binary_function<_Tp, _Tp, bool> + { + bool + operator()(const _Tp& __x, const _Tp& __y) const + { return __x < __y; } + }; -// Partial specialization for one type. Same as std::plus. -template<typename _Tp> - struct _Plus<_Tp, _Tp> : public std::binary_function<_Tp, _Tp, _Tp> - { - typedef __typeof__(*static_cast<_Tp*>(NULL) - + *static_cast<_Tp*>(NULL)) __result; - __result - operator()(const _Tp& __x, const _Tp& __y) const - { return __x + __y; } - }; + /** @brief Similar to std::plus, but allows two different types. */ + template<typename _Tp1, typename _Tp2> + struct _Plus : public std::binary_function<_Tp1, _Tp2, _Tp1> + { + typedef __typeof__(*static_cast<_Tp1*>(NULL) + + *static_cast<_Tp2*>(NULL)) __result; + __result + operator()(const _Tp1& __x, const _Tp2& __y) const + { return __x + __y; } + }; -/** @brief Similar to std::multiplies, but allows two different types. */ -template<typename _Tp1, typename _Tp2> - struct _Multiplies : public std::binary_function<_Tp1, _Tp2, _Tp1> - { - typedef __typeof__(*static_cast<_Tp1*>(NULL) - * *static_cast<_Tp2*>(NULL)) __result; + // Partial specialization for one type. Same as std::plus. + template<typename _Tp> + struct _Plus<_Tp, _Tp> : public std::binary_function<_Tp, _Tp, _Tp> + { + typedef __typeof__(*static_cast<_Tp*>(NULL) + + *static_cast<_Tp*>(NULL)) __result; - __result - operator()(const _Tp1& __x, const _Tp2& __y) const - { return __x * __y; } - }; + __result + operator()(const _Tp& __x, const _Tp& __y) const + { return __x + __y; } + }; -// Partial specialization for one type. Same as std::multiplies. -template<typename _Tp> - struct _Multiplies<_Tp, _Tp> : public std::binary_function<_Tp, _Tp, _Tp> - { - typedef __typeof__(*static_cast<_Tp*>(NULL) - * *static_cast<_Tp*>(NULL)) __result; - __result - operator()(const _Tp& __x, const _Tp& __y) const - { return __x * __y; } - }; + /** @brief Similar to std::multiplies, but allows two different types. */ + template<typename _Tp1, typename _Tp2> + struct _Multiplies : public std::binary_function<_Tp1, _Tp2, _Tp1> + { + typedef __typeof__(*static_cast<_Tp1*>(NULL) + * *static_cast<_Tp2*>(NULL)) __result; + __result + operator()(const _Tp1& __x, const _Tp2& __y) const + { return __x * __y; } + }; -template<typename _Tp, typename _DifferenceTp> - class _PseudoSequence; + // Partial specialization for one type. Same as std::multiplies. + template<typename _Tp> + struct _Multiplies<_Tp, _Tp> : public std::binary_function<_Tp, _Tp, _Tp> + { + typedef __typeof__(*static_cast<_Tp*>(NULL) + * *static_cast<_Tp*>(NULL)) __result; -/** @brief _Iterator associated with __gnu_parallel::_PseudoSequence. - * If features the usual random-access iterator functionality. - * @param _Tp Sequence _M_value type. - * @param _DifferenceType Sequence difference type. - */ -template<typename _Tp, typename _DifferenceTp> - class _PseudoSequenceIterator - { - public: - typedef _DifferenceTp _DifferenceType; + __result + operator()(const _Tp& __x, const _Tp& __y) const + { return __x * __y; } + }; - private: - const _Tp& _M_val; - _DifferenceType _M_pos; - public: - _PseudoSequenceIterator(const _Tp& _M_val, _DifferenceType _M_pos) - : _M_val(_M_val), _M_pos(_M_pos) { } + template<typename _Tp, typename _DifferenceTp> + class _PseudoSequence; - // Pre-increment operator. - _PseudoSequenceIterator& - operator++() + /** @brief _Iterator associated with __gnu_parallel::_PseudoSequence. + * If features the usual random-access iterator functionality. + * @param _Tp Sequence _M_value type. + * @param _DifferenceType Sequence difference type. + */ + template<typename _Tp, typename _DifferenceTp> + class _PseudoSequenceIterator { - ++_M_pos; - return *this; - } + public: + typedef _DifferenceTp _DifferenceType; - // Post-increment operator. - const _PseudoSequenceIterator - operator++(int) - { return _PseudoSequenceIterator(_M_pos++); } + private: + const _Tp& _M_val; + _DifferenceType _M_pos; - const _Tp& - operator*() const - { return _M_val; } + public: + _PseudoSequenceIterator(const _Tp& __val, _DifferenceType __pos) + : _M_val(__val), _M_pos(__pos) { } - const _Tp& - operator[](_DifferenceType) const - { return _M_val; } - - bool - operator==(const _PseudoSequenceIterator& __i2) - { return _M_pos == __i2._M_pos; } - - _DifferenceType - operator!=(const _PseudoSequenceIterator& __i2) - { return _M_pos != __i2._M_pos; } - - _DifferenceType - operator-(const _PseudoSequenceIterator& __i2) - { return _M_pos - __i2._M_pos; } - }; - -/** @brief Sequence that conceptually consists of multiple copies of - the same element. - * The copies are not stored explicitly, of course. - * @param _Tp Sequence _M_value type. - * @param _DifferenceType Sequence difference type. - */ -template<typename _Tp, typename _DifferenceTp> - class _PseudoSequence - { - public: - typedef _DifferenceTp _DifferenceType; - - // Better cast down to uint64_t, than up to _DifferenceTp. - typedef _PseudoSequenceIterator<_Tp, uint64_t> iterator; + // Pre-increment operator. + _PseudoSequenceIterator& + operator++() + { + ++_M_pos; + return *this; + } - /** @brief Constructor. - * @param _M_val Element of the sequence. - * @param __count Number of (virtual) copies. + // Post-increment operator. + const _PseudoSequenceIterator + operator++(int) + { return _PseudoSequenceIterator(_M_pos++); } + + const _Tp& + operator*() const + { return _M_val; } + + const _Tp& + operator[](_DifferenceType) const + { return _M_val; } + + bool + operator==(const _PseudoSequenceIterator& __i2) + { return _M_pos == __i2._M_pos; } + + _DifferenceType + operator!=(const _PseudoSequenceIterator& __i2) + { return _M_pos != __i2._M_pos; } + + _DifferenceType + operator-(const _PseudoSequenceIterator& __i2) + { return _M_pos - __i2._M_pos; } + }; + + /** @brief Sequence that conceptually consists of multiple copies of + the same element. + * The copies are not stored explicitly, of course. + * @param _Tp Sequence _M_value type. + * @param _DifferenceType Sequence difference type. */ - _PseudoSequence(const _Tp& _M_val, _DifferenceType __count) - : _M_val(_M_val), __count(__count) { } - - /** @brief Begin iterator. */ - iterator - begin() const - { return iterator(_M_val, 0); } - - /** @brief End iterator. */ - iterator - end() const - { return iterator(_M_val, __count); } - - private: - const _Tp& _M_val; - _DifferenceType __count; - }; - -/** @brief Functor that does nothing */ -template<typename _ValueTp> - class _VoidFunctor - { - inline void - operator()(const _ValueTp& __v) const { } - }; - -/** @brief Compute the median of three referenced elements, - according to @__c __comp. - * @param __a First iterator. - * @param __b Second iterator. - * @param __c Third iterator. - * @param __comp Comparator. - */ -template<typename _RAIter, typename _Compare> - _RAIter - __median_of_three_iterators(_RAIter __a, _RAIter __b, - _RAIter __c, _Compare& __comp) - { - if (__comp(*__a, *__b)) - if (__comp(*__b, *__c)) - return __b; + template<typename _Tp, typename _DifferenceTp> + class _PseudoSequence + { + public: + typedef _DifferenceTp _DifferenceType; + + // Better cast down to uint64_t, than up to _DifferenceTp. + typedef _PseudoSequenceIterator<_Tp, uint64_t> iterator; + + /** @brief Constructor. + * @param _M_val Element of the sequence. + * @param __count Number of (virtual) copies. + */ + _PseudoSequence(const _Tp& __val, _DifferenceType __count) + : _M_val(__val), _M_count(__count) { } + + /** @brief Begin iterator. */ + iterator + begin() const + { return iterator(_M_val, 0); } + + /** @brief End iterator. */ + iterator + end() const + { return iterator(_M_val, _M_count); } + + private: + const _Tp& _M_val; + _DifferenceType _M_count; + }; + + /** @brief Functor that does nothing */ + template<typename _ValueTp> + class _VoidFunctor + { + inline void + operator()(const _ValueTp& __v) const { } + }; + + /** @brief Compute the median of three referenced elements, + according to @__c __comp. + * @param __a First iterator. + * @param __b Second iterator. + * @param __c Third iterator. + * @param __comp Comparator. + */ + template<typename _RAIter, typename _Compare> + _RAIter + __median_of_three_iterators(_RAIter __a, _RAIter __b, + _RAIter __c, _Compare& __comp) + { + if (__comp(*__a, *__b)) + if (__comp(*__b, *__c)) + return __b; + else + if (__comp(*__a, *__c)) + return __c; + else + return __a; else - if (__comp(*__a, *__c)) - return __c; - else - return __a; - else - { - // Just swap __a and __b. - if (__comp(*__a, *__c)) - return __a; - else - if (__comp(*__b, *__c)) - return __c; - else - return __b; - } - } + { + // Just swap __a and __b. + if (__comp(*__a, *__c)) + return __a; + else + if (__comp(*__b, *__c)) + return __c; + else + return __b; + } + } #define _GLIBCXX_PARALLEL_ASSERT(_Condition) __glibcxx_assert(_Condition) diff --git a/libstdc++-v3/include/parallel/checkers.h b/libstdc++-v3/include/parallel/checkers.h index 70cc949e3fe..8abfc991e67 100644 --- a/libstdc++-v3/include/parallel/checkers.h +++ b/libstdc++-v3/include/parallel/checkers.h @@ -68,5 +68,6 @@ namespace __gnu_parallel return true; } +} #endif /* _GLIBCXX_PARALLEL_CHECKERS_H */ diff --git a/libstdc++-v3/include/parallel/equally_split.h b/libstdc++-v3/include/parallel/equally_split.h index 31ed87d16f3..307be0b52f0 100644 --- a/libstdc++-v3/include/parallel/equally_split.h +++ b/libstdc++-v3/include/parallel/equally_split.h @@ -1,6 +1,6 @@ // -*- C++ -*- -// Copyright (C) 2007, 2009 Free Software Foundation, Inc. +// Copyright (C) 2007, 2008, 2009 Free Software Foundation, Inc. // // This file is part of the GNU ISO C++ Library. This library is free // software; you can redistribute it and/or modify it under the terms @@ -33,57 +33,56 @@ namespace __gnu_parallel { -/** @brief function to split a sequence into parts of almost equal size. - * - * The resulting sequence __s of length __num_threads+1 contains the splitting - * positions when splitting the range [0,__n) into parts of almost - * equal size (plus minus 1). The first entry is 0, the last one -* n. There may result empty parts. - * @param __n Number of elements - * @param __num_threads Number of parts - * @param __s Splitters - * @returns End of __splitter sequence, i.e. @__c __s+__num_threads+1 */ -template<typename _DifferenceType, typename _OutputIterator> - _OutputIterator - equally_split(_DifferenceType __n, _ThreadIndex __num_threads, - _OutputIterator __s) - { - _DifferenceType __chunk_length = __n / __num_threads; - _DifferenceType __num_longer_chunks = __n % __num_threads; - _DifferenceType __pos = 0; - for (_ThreadIndex __i = 0; __i < __num_threads; ++__i) - { - *__s++ = __pos; - __pos += (__i < __num_longer_chunks) ? - (__chunk_length + 1) : __chunk_length; - } - *__s++ = __n; - return __s; - } + /** @brief function to split a sequence into parts of almost equal size. + * + * The resulting sequence __s of length __num_threads+1 contains the + * splitting positions when splitting the range [0,__n) into parts of + * almost equal size (plus minus 1). The first entry is 0, the last + * one n. There may result empty parts. + * @param __n Number of elements + * @param __num_threads Number of parts + * @param __s Splitters + * @returns End of __splitter sequence, i.e. @__c __s+__num_threads+1 */ + template<typename _DifferenceType, typename _OutputIterator> + _OutputIterator + equally_split(_DifferenceType __n, _ThreadIndex __num_threads, + _OutputIterator __s) + { + _DifferenceType __chunk_length = __n / __num_threads; + _DifferenceType __num_longer_chunks = __n % __num_threads; + _DifferenceType __pos = 0; + for (_ThreadIndex __i = 0; __i < __num_threads; ++__i) + { + *__s++ = __pos; + __pos += ((__i < __num_longer_chunks) + ? (__chunk_length + 1) : __chunk_length); + } + *__s++ = __n; + return __s; + } - -/** @brief function to split a sequence into parts of almost equal size. - * - * Returns the position of the splitting point between - * thread number __thread_no (included) and - * thread number __thread_no+1 (excluded). - * @param __n Number of elements - * @param __num_threads Number of parts - * @returns splitting point */ -template<typename _DifferenceType> - _DifferenceType - equally_split_point(_DifferenceType __n, - _ThreadIndex __num_threads, - _ThreadIndex __thread_no) - { - _DifferenceType __chunk_length = __n / __num_threads; - _DifferenceType __num_longer_chunks = __n % __num_threads; - if (__thread_no < __num_longer_chunks) - return __thread_no * (__chunk_length + 1); - else - return __num_longer_chunks * (__chunk_length + 1) + /** @brief function to split a sequence into parts of almost equal size. + * + * Returns the position of the splitting point between + * thread number __thread_no (included) and + * thread number __thread_no+1 (excluded). + * @param __n Number of elements + * @param __num_threads Number of parts + * @returns splitting point */ + template<typename _DifferenceType> + _DifferenceType + equally_split_point(_DifferenceType __n, + _ThreadIndex __num_threads, + _ThreadIndex __thread_no) + { + _DifferenceType __chunk_length = __n / __num_threads; + _DifferenceType __num_longer_chunks = __n % __num_threads; + if (__thread_no < __num_longer_chunks) + return __thread_no * (__chunk_length + 1); + else + return __num_longer_chunks * (__chunk_length + 1) + (__thread_no - __num_longer_chunks) * __chunk_length; - } + } } #endif /* _GLIBCXX_PARALLEL_EQUALLY_SPLIT_H */ diff --git a/libstdc++-v3/include/parallel/find.h b/libstdc++-v3/include/parallel/find.h index 050a80d8af0..b4e581488ae 100644 --- a/libstdc++-v3/include/parallel/find.h +++ b/libstdc++-v3/include/parallel/find.h @@ -42,360 +42,363 @@ namespace __gnu_parallel { -/** - * @brief Parallel std::find, switch for different algorithms. - * @param __begin1 Begin iterator of first sequence. - * @param __end1 End iterator of first sequence. - * @param __begin2 Begin iterator of second sequence. Must have same - * length as first sequence. - * @param __pred Find predicate. - * @param __selector _Functionality (e. g. std::find_if (), std::equal(),...) - * @return Place of finding in both sequences. - */ -template<typename _RAIter1, - typename _RAIter2, - typename _Pred, - typename _Selector> - inline std::pair<_RAIter1, _RAIter2> - __find_template(_RAIter1 __begin1, _RAIter1 __end1, - _RAIter2 __begin2, _Pred __pred, _Selector __selector) - { - switch (_Settings::get().find_algorithm) - { - case GROWING_BLOCKS: - return __find_template(__begin1, __end1, __begin2, __pred, __selector, - growing_blocks_tag()); - case CONSTANT_SIZE_BLOCKS: - return __find_template(__begin1, __end1, __begin2, __pred, __selector, - constant_size_blocks_tag()); - case EQUAL_SPLIT: - return __find_template(__begin1, __end1, __begin2, __pred, __selector, - equal_split_tag()); - default: - _GLIBCXX_PARALLEL_ASSERT(false); - return std::make_pair(__begin1, __begin2); - } - } + /** + * @brief Parallel std::find, switch for different algorithms. + * @param __begin1 Begin iterator of first sequence. + * @param __end1 End iterator of first sequence. + * @param __begin2 Begin iterator of second sequence. Must have same + * length as first sequence. + * @param __pred Find predicate. + * @param __selector _Functionality (e. g. std::find_if(), std::equal(),...) + * @return Place of finding in both sequences. + */ + template<typename _RAIter1, + typename _RAIter2, + typename _Pred, + typename _Selector> + inline std::pair<_RAIter1, _RAIter2> + __find_template(_RAIter1 __begin1, _RAIter1 __end1, + _RAIter2 __begin2, _Pred __pred, _Selector __selector) + { + switch (_Settings::get().find_algorithm) + { + case GROWING_BLOCKS: + return __find_template(__begin1, __end1, __begin2, __pred, + __selector, growing_blocks_tag()); + case CONSTANT_SIZE_BLOCKS: + return __find_template(__begin1, __end1, __begin2, __pred, + __selector, constant_size_blocks_tag()); + case EQUAL_SPLIT: + return __find_template(__begin1, __end1, __begin2, __pred, + __selector, equal_split_tag()); + default: + _GLIBCXX_PARALLEL_ASSERT(false); + return std::make_pair(__begin1, __begin2); + } + } #if _GLIBCXX_FIND_EQUAL_SPLIT -/** - * @brief Parallel std::find, equal splitting variant. - * @param __begin1 Begin iterator of first sequence. - * @param __end1 End iterator of first sequence. - * @param __begin2 Begin iterator of second sequence. Second __sequence - * must have same length as first sequence. - * @param __pred Find predicate. - * @param __selector _Functionality (e. g. std::find_if (), std::equal(),...) - * @return Place of finding in both sequences. - */ -template<typename _RAIter1, - typename _RAIter2, - typename _Pred, - typename _Selector> - std::pair<_RAIter1, _RAIter2> - __find_template(_RAIter1 __begin1, - _RAIter1 __end1, - _RAIter2 __begin2, - _Pred __pred, - _Selector __selector, - equal_split_tag) - { - _GLIBCXX_CALL(__end1 - __begin1) - - typedef std::iterator_traits<_RAIter1> _TraitsType; - typedef typename _TraitsType::difference_type _DifferenceType; - typedef typename _TraitsType::value_type _ValueType; - - _DifferenceType __length = __end1 - __begin1; - _DifferenceType __result = __length; - _DifferenceType* __borders; - - omp_lock_t __result_lock; - omp_init_lock(&__result_lock); - - _ThreadIndex __num_threads = __get_max_threads(); -# pragma omp parallel num_threads(__num_threads) + /** + * @brief Parallel std::find, equal splitting variant. + * @param __begin1 Begin iterator of first sequence. + * @param __end1 End iterator of first sequence. + * @param __begin2 Begin iterator of second sequence. Second __sequence + * must have same length as first sequence. + * @param __pred Find predicate. + * @param __selector _Functionality (e. g. std::find_if(), std::equal(),...) + * @return Place of finding in both sequences. + */ + template<typename _RAIter1, + typename _RAIter2, + typename _Pred, + typename _Selector> + std::pair<_RAIter1, _RAIter2> + __find_template(_RAIter1 __begin1, _RAIter1 __end1, + _RAIter2 __begin2, _Pred __pred, + _Selector __selector, equal_split_tag) + { + _GLIBCXX_CALL(__end1 - __begin1) + + typedef std::iterator_traits<_RAIter1> _TraitsType; + typedef typename _TraitsType::difference_type _DifferenceType; + typedef typename _TraitsType::value_type _ValueType; + + _DifferenceType __length = __end1 - __begin1; + _DifferenceType __result = __length; + _DifferenceType* __borders; + + omp_lock_t __result_lock; + omp_init_lock(&__result_lock); + + _ThreadIndex __num_threads = __get_max_threads(); +# pragma omp parallel num_threads(__num_threads) { -# pragma omp single - { - __num_threads = omp_get_num_threads(); - __borders = new _DifferenceType[__num_threads + 1]; - equally_split(__length, __num_threads, __borders); - } //single - - _ThreadIndex __iam = omp_get_thread_num(); - _DifferenceType __start = __borders[__iam], - __stop = __borders[__iam + 1]; - - _RAIter1 __i1 = __begin1 + __start; - _RAIter2 __i2 = __begin2 + __start; - for (_DifferenceType __pos = __start; __pos < __stop; ++__pos) - { - #pragma omp flush(__result) - // Result has been set to something lower. - if (__result < __pos) - break; - - if (__selector(__i1, __i2, __pred)) - { - omp_set_lock(&__result_lock); - if (__pos < __result) - __result = __pos; - omp_unset_lock(&__result_lock); - break; - } - ++__i1; - ++__i2; - } +# pragma omp single + { + __num_threads = omp_get_num_threads(); + __borders = new _DifferenceType[__num_threads + 1]; + equally_split(__length, __num_threads, __borders); + } //single + + _ThreadIndex __iam = omp_get_thread_num(); + _DifferenceType __start = __borders[__iam], + __stop = __borders[__iam + 1]; + + _RAIter1 __i1 = __begin1 + __start; + _RAIter2 __i2 = __begin2 + __start; + for (_DifferenceType __pos = __start; __pos < __stop; ++__pos) + { +# pragma omp flush(__result) + // Result has been set to something lower. + if (__result < __pos) + break; + + if (__selector(__i1, __i2, __pred)) + { + omp_set_lock(&__result_lock); + if (__pos < __result) + __result = __pos; + omp_unset_lock(&__result_lock); + break; + } + ++__i1; + ++__i2; + } } //parallel - omp_destroy_lock(&__result_lock); - delete[] __borders; + omp_destroy_lock(&__result_lock); + delete[] __borders; - return - std::pair<_RAIter1, _RAIter2>(__begin1 + __result, __begin2 + __result); - } + return std::pair<_RAIter1, _RAIter2>(__begin1 + __result, + __begin2 + __result); + } #endif #if _GLIBCXX_FIND_GROWING_BLOCKS -/** - * @brief Parallel std::find, growing block size variant. - * @param __begin1 Begin iterator of first sequence. - * @param __end1 End iterator of first sequence. - * @param __begin2 Begin iterator of second sequence. Second __sequence - * must have same length as first sequence. - * @param __pred Find predicate. - * @param __selector _Functionality (e. g. std::find_if (), std::equal(),...) - * @return Place of finding in both sequences. - * @see __gnu_parallel::_Settings::find_sequential_search_size - * @see __gnu_parallel::_Settings::find_initial_block_size - * @see __gnu_parallel::_Settings::find_maximum_block_size - * @see __gnu_parallel::_Settings::find_increasing_factor - * - * There are two main differences between the growing blocks and - * the constant-size blocks variants. - * 1. For GB, the block size grows; for CSB, the block size is fixed. - * 2. For GB, the blocks are allocated dynamically; - * for CSB, the blocks are allocated in a predetermined manner, - * namely spacial round-robin. - */ -template<typename _RAIter1, - typename _RAIter2, - typename _Pred, - typename _Selector> - std::pair<_RAIter1, _RAIter2> - __find_template(_RAIter1 __begin1, _RAIter1 __end1, - _RAIter2 __begin2, _Pred __pred, _Selector __selector, - growing_blocks_tag) - { - _GLIBCXX_CALL(__end1 - __begin1) - - typedef std::iterator_traits<_RAIter1> _TraitsType; - typedef typename _TraitsType::difference_type _DifferenceType; - typedef typename _TraitsType::value_type _ValueType; - - const _Settings& __s = _Settings::get(); - - _DifferenceType __length = __end1 - __begin1; - - _DifferenceType __sequential_search_size = - std::min<_DifferenceType>(__length, __s.find_sequential_search_size); - - // Try it sequentially first. - std::pair<_RAIter1, _RAIter2> __find_seq_result = - __selector._M_sequential_algorithm( - __begin1, __begin1 + __sequential_search_size, __begin2, __pred); - - if (__find_seq_result.first != (__begin1 + __sequential_search_size)) - return __find_seq_result; - - // Index of beginning of next free block (after sequential find). - _DifferenceType __next_block_start = __sequential_search_size; - _DifferenceType __result = __length; - - omp_lock_t __result_lock; - omp_init_lock(&__result_lock); - - _ThreadIndex __num_threads = __get_max_threads(); -# pragma omp parallel shared(__result) num_threads(__num_threads) + /** + * @brief Parallel std::find, growing block size variant. + * @param __begin1 Begin iterator of first sequence. + * @param __end1 End iterator of first sequence. + * @param __begin2 Begin iterator of second sequence. Second __sequence + * must have same length as first sequence. + * @param __pred Find predicate. + * @param __selector _Functionality (e. g. std::find_if(), std::equal(),...) + * @return Place of finding in both sequences. + * @see __gnu_parallel::_Settings::find_sequential_search_size + * @see __gnu_parallel::_Settings::find_initial_block_size + * @see __gnu_parallel::_Settings::find_maximum_block_size + * @see __gnu_parallel::_Settings::find_increasing_factor + * + * There are two main differences between the growing blocks and + * the constant-size blocks variants. + * 1. For GB, the block size grows; for CSB, the block size is fixed. + * 2. For GB, the blocks are allocated dynamically; + * for CSB, the blocks are allocated in a predetermined manner, + * namely spacial round-robin. + */ + template<typename _RAIter1, + typename _RAIter2, + typename _Pred, + typename _Selector> + std::pair<_RAIter1, _RAIter2> + __find_template(_RAIter1 __begin1, _RAIter1 __end1, + _RAIter2 __begin2, _Pred __pred, _Selector __selector, + growing_blocks_tag) + { + _GLIBCXX_CALL(__end1 - __begin1) + + typedef std::iterator_traits<_RAIter1> _TraitsType; + typedef typename _TraitsType::difference_type _DifferenceType; + typedef typename _TraitsType::value_type _ValueType; + + const _Settings& __s = _Settings::get(); + + _DifferenceType __length = __end1 - __begin1; + + _DifferenceType + __sequential_search_size = std::min<_DifferenceType> + (__length, __s.find_sequential_search_size); + + // Try it sequentially first. + std::pair<_RAIter1, _RAIter2> + __find_seq_result = __selector._M_sequential_algorithm + (__begin1, __begin1 + __sequential_search_size, + __begin2, __pred); + + if (__find_seq_result.first != (__begin1 + __sequential_search_size)) + return __find_seq_result; + + // Index of beginning of next free block (after sequential find). + _DifferenceType __next_block_start = __sequential_search_size; + _DifferenceType __result = __length; + + omp_lock_t __result_lock; + omp_init_lock(&__result_lock); + + _ThreadIndex __num_threads = __get_max_threads(); +# pragma omp parallel shared(__result) num_threads(__num_threads) { # pragma omp single - __num_threads = omp_get_num_threads(); + __num_threads = omp_get_num_threads(); - // Not within first __k elements -> start parallel. - _ThreadIndex __iam = omp_get_thread_num(); + // Not within first __k elements -> start parallel. + _ThreadIndex __iam = omp_get_thread_num(); - _DifferenceType __block_size = __s.find_initial_block_size; - _DifferenceType __start = - __fetch_and_add<_DifferenceType>(&__next_block_start, __block_size); + _DifferenceType __block_size = __s.find_initial_block_size; + _DifferenceType __start = __fetch_and_add<_DifferenceType> + (&__next_block_start, __block_size); - // Get new block, update pointer to next block. - _DifferenceType __stop = - std::min<_DifferenceType>(__length, __start + __block_size); + // Get new block, update pointer to next block. + _DifferenceType __stop = + std::min<_DifferenceType>(__length, __start + __block_size); - std::pair<_RAIter1, _RAIter2> __local_result; + std::pair<_RAIter1, _RAIter2> __local_result; - while (__start < __length) - { + while (__start < __length) + { # pragma omp flush(__result) - // Get new value of result. - if (__result < __start) - { - // No chance to find first element. - break; - } - - __local_result = __selector._M_sequential_algorithm( - __begin1 + __start, __begin1 + __stop, - __begin2 + __start, __pred); - if (__local_result.first != (__begin1 + __stop)) - { - omp_set_lock(&__result_lock); - if ((__local_result.first - __begin1) < __result) - { - __result = __local_result.first - __begin1; - - // Result cannot be in future blocks, stop algorithm. - __fetch_and_add<_DifferenceType>( - &__next_block_start, __length); - } - omp_unset_lock(&__result_lock); - } - - __block_size = std::min<_DifferenceType>( - __block_size * __s.find_increasing_factor, - __s.find_maximum_block_size); - - // Get new block, update pointer to next block. - __start = - __fetch_and_add<_DifferenceType>( - &__next_block_start, __block_size); - __stop = ((__length < (__start + __block_size)) - ? __length : (__start + __block_size)); - } + // Get new value of result. + if (__result < __start) + { + // No chance to find first element. + break; + } + + __local_result = __selector._M_sequential_algorithm + (__begin1 + __start, __begin1 + __stop, + __begin2 + __start, __pred); + + if (__local_result.first != (__begin1 + __stop)) + { + omp_set_lock(&__result_lock); + if ((__local_result.first - __begin1) < __result) + { + __result = __local_result.first - __begin1; + + // Result cannot be in future blocks, stop algorithm. + __fetch_and_add<_DifferenceType>(&__next_block_start, + __length); + } + omp_unset_lock(&__result_lock); + } + + __block_size = std::min<_DifferenceType> + (__block_size * __s.find_increasing_factor, + __s.find_maximum_block_size); + + // Get new block, update pointer to next block. + __start = __fetch_and_add<_DifferenceType>(&__next_block_start, + __block_size); + __stop = (__length < (__start + __block_size) + ? __length : (__start + __block_size)); + } } //parallel - omp_destroy_lock(&__result_lock); + omp_destroy_lock(&__result_lock); - // Return iterator on found element. - return - std::pair<_RAIter1, _RAIter2>(__begin1 + __result, __begin2 + __result); - } + // Return iterator on found element. + return + std::pair<_RAIter1, _RAIter2>(__begin1 + __result, + __begin2 + __result); + } #endif #if _GLIBCXX_FIND_CONSTANT_SIZE_BLOCKS -/** - * @brief Parallel std::find, constant block size variant. - * @param __begin1 Begin iterator of first sequence. - * @param __end1 End iterator of first sequence. - * @param __begin2 Begin iterator of second sequence. Second __sequence - * must have same length as first sequence. - * @param __pred Find predicate. - * @param __selector _Functionality (e. g. std::find_if (), std::equal(),...) - * @return Place of finding in both sequences. - * @see __gnu_parallel::_Settings::find_sequential_search_size - * @see __gnu_parallel::_Settings::find_block_size - * There are two main differences between the growing blocks and the - * constant-size blocks variants. - * 1. For GB, the block size grows; for CSB, the block size is fixed. - * 2. For GB, the blocks are allocated dynamically; for CSB, the - * blocks are allocated in a predetermined manner, namely spacial - * round-robin. - */ -template<typename _RAIter1, - typename _RAIter2, - typename _Pred, - typename _Selector> - std::pair<_RAIter1, _RAIter2> - __find_template(_RAIter1 __begin1, _RAIter1 __end1, - _RAIter2 __begin2, _Pred __pred, _Selector __selector, - constant_size_blocks_tag) - { - _GLIBCXX_CALL(__end1 - __begin1) - typedef std::iterator_traits<_RAIter1> _TraitsType; - typedef typename _TraitsType::difference_type _DifferenceType; - typedef typename _TraitsType::value_type _ValueType; - - const _Settings& __s = _Settings::get(); - - _DifferenceType __length = __end1 - __begin1; - - _DifferenceType __sequential_search_size = std::min<_DifferenceType>( - __length, __s.find_sequential_search_size); - - // Try it sequentially first. - std::pair<_RAIter1, _RAIter2> __find_seq_result = - __selector._M_sequential_algorithm( - __begin1, __begin1 + __sequential_search_size, __begin2, __pred); - - if (__find_seq_result.first != (__begin1 + __sequential_search_size)) - return __find_seq_result; - - _DifferenceType __result = __length; - omp_lock_t __result_lock; - omp_init_lock(&__result_lock); - - // Not within first __sequential_search_size elements -> start parallel. - - _ThreadIndex __num_threads = __get_max_threads(); -# pragma omp parallel shared(__result) num_threads(__num_threads) + /** + * @brief Parallel std::find, constant block size variant. + * @param __begin1 Begin iterator of first sequence. + * @param __end1 End iterator of first sequence. + * @param __begin2 Begin iterator of second sequence. Second __sequence + * must have same length as first sequence. + * @param __pred Find predicate. + * @param __selector _Functionality (e. g. std::find_if(), std::equal(),...) + * @return Place of finding in both sequences. + * @see __gnu_parallel::_Settings::find_sequential_search_size + * @see __gnu_parallel::_Settings::find_block_size + * There are two main differences between the growing blocks and the + * constant-size blocks variants. + * 1. For GB, the block size grows; for CSB, the block size is fixed. + * 2. For GB, the blocks are allocated dynamically; for CSB, the + * blocks are allocated in a predetermined manner, namely spacial + * round-robin. + */ + template<typename _RAIter1, + typename _RAIter2, + typename _Pred, + typename _Selector> + std::pair<_RAIter1, _RAIter2> + __find_template(_RAIter1 __begin1, _RAIter1 __end1, + _RAIter2 __begin2, _Pred __pred, _Selector __selector, + constant_size_blocks_tag) + { + _GLIBCXX_CALL(__end1 - __begin1) + typedef std::iterator_traits<_RAIter1> _TraitsType; + typedef typename _TraitsType::difference_type _DifferenceType; + typedef typename _TraitsType::value_type _ValueType; + + const _Settings& __s = _Settings::get(); + + _DifferenceType __length = __end1 - __begin1; + + _DifferenceType __sequential_search_size = std::min<_DifferenceType> + (__length, __s.find_sequential_search_size); + + // Try it sequentially first. + std::pair<_RAIter1, _RAIter2> + __find_seq_result = __selector._M_sequential_algorithm + (__begin1, __begin1 + __sequential_search_size, __begin2, __pred); + + if (__find_seq_result.first != (__begin1 + __sequential_search_size)) + return __find_seq_result; + + _DifferenceType __result = __length; + omp_lock_t __result_lock; + omp_init_lock(&__result_lock); + + // Not within first __sequential_search_size elements -> start parallel. + + _ThreadIndex __num_threads = __get_max_threads(); +# pragma omp parallel shared(__result) num_threads(__num_threads) { # pragma omp single - __num_threads = omp_get_num_threads(); + __num_threads = omp_get_num_threads(); - _ThreadIndex __iam = omp_get_thread_num(); - _DifferenceType __block_size = __s.find_initial_block_size; + _ThreadIndex __iam = omp_get_thread_num(); + _DifferenceType __block_size = __s.find_initial_block_size; - // First element of thread's current iteration. - _DifferenceType __iteration_start = __sequential_search_size; + // First element of thread's current iteration. + _DifferenceType __iteration_start = __sequential_search_size; - // Where to work (initialization). - _DifferenceType __start = __iteration_start + __iam * __block_size; - _DifferenceType __stop = - std::min<_DifferenceType>(__length, __start + __block_size); + // Where to work (initialization). + _DifferenceType __start = __iteration_start + __iam * __block_size; + _DifferenceType __stop = std::min<_DifferenceType>(__length, + __start + + __block_size); - std::pair<_RAIter1, _RAIter2> __local_result; + std::pair<_RAIter1, _RAIter2> __local_result; - while (__start < __length) - { - // Get new value of result. + while (__start < __length) + { + // Get new value of result. # pragma omp flush(__result) - // No chance to find first element. - if (__result < __start) - break; - __local_result = __selector._M_sequential_algorithm( - __begin1 + __start, __begin1 + __stop, - __begin2 + __start, __pred); - if (__local_result.first != (__begin1 + __stop)) - { - omp_set_lock(&__result_lock); - if ((__local_result.first - __begin1) < __result) - __result = __local_result.first - __begin1; - omp_unset_lock(&__result_lock); - // Will not find better value in its interval. - break; - } - - __iteration_start += __num_threads * __block_size; - - // Where to work. - __start = __iteration_start + __iam * __block_size; - __stop = std::min<_DifferenceType>( - __length, __start + __block_size); - } + // No chance to find first element. + if (__result < __start) + break; + + __local_result = __selector._M_sequential_algorithm + (__begin1 + __start, __begin1 + __stop, + __begin2 + __start, __pred); + + if (__local_result.first != (__begin1 + __stop)) + { + omp_set_lock(&__result_lock); + if ((__local_result.first - __begin1) < __result) + __result = __local_result.first - __begin1; + omp_unset_lock(&__result_lock); + // Will not find better value in its interval. + break; + } + + __iteration_start += __num_threads * __block_size; + + // Where to work. + __start = __iteration_start + __iam * __block_size; + __stop = std::min<_DifferenceType>(__length, + __start + __block_size); + } } //parallel - omp_destroy_lock(&__result_lock); + omp_destroy_lock(&__result_lock); - // Return iterator on found element. - return - std::pair<_RAIter1, _RAIter2>(__begin1 + __result, __begin2 + __result); - } + // Return iterator on found element. + return std::pair<_RAIter1, _RAIter2>(__begin1 + __result, + __begin2 + __result); + } #endif } // end namespace diff --git a/libstdc++-v3/include/parallel/find_selectors.h b/libstdc++-v3/include/parallel/find_selectors.h index 0d385bc6c94..df77978a9dd 100644 --- a/libstdc++-v3/include/parallel/find_selectors.h +++ b/libstdc++-v3/include/parallel/find_selectors.h @@ -103,12 +103,12 @@ namespace __gnu_parallel typename _Pred> std::pair<_RAIter1, _RAIter2> _M_sequential_algorithm(_RAIter1 __begin1, - _RAIter1 __end1, - _RAIter2 __begin2, _Pred __pred) + _RAIter1 __end1, + _RAIter2 __begin2, _Pred __pred) { // Passed end iterator is one short. _RAIter1 __spot = adjacent_find(__begin1, __end1 + 1, - __pred, sequential_tag()); + __pred, sequential_tag()); if (__spot == (__end1 + 1)) __spot = __end1; return std::make_pair(__spot, __begin2); @@ -141,56 +141,57 @@ namespace __gnu_parallel typename _Pred> std::pair<_RAIter1, _RAIter2> _M_sequential_algorithm(_RAIter1 __begin1, - _RAIter1 __end1, - _RAIter2 __begin2, _Pred __pred) - { return mismatch(__begin1, __end1, __begin2, __pred, sequential_tag()); - } + _RAIter1 __end1, + _RAIter2 __begin2, _Pred __pred) + { return mismatch(__begin1, __end1, __begin2, + __pred, sequential_tag()); } }; /** @brief Test predicate on several elements. */ template<typename _FIterator> - struct __find_first_of_selector : public __generic_find_selector - { - _FIterator _M_begin; - _FIterator _M_end; - - explicit __find_first_of_selector(_FIterator __begin, _FIterator __end) - : _M_begin(__begin), _M_end(__end) { } - - /** @brief Test on one position. - * @param __i1 _Iterator on first sequence. - * @param __i2 _Iterator on second sequence (unused). - * @param __pred Find predicate. */ - template<typename _RAIter1, typename _RAIter2, - typename _Pred> - bool - operator()(_RAIter1 __i1, _RAIter2 __i2, _Pred __pred) - { - for (_FIterator __pos_in_candidates = _M_begin; - __pos_in_candidates != _M_end; ++__pos_in_candidates) - if (__pred(*__i1, *__pos_in_candidates)) - return true; - return false; - } - - /** @brief Corresponding sequential algorithm on a sequence. - * @param __begin1 Begin iterator of first sequence. - * @param __end1 End iterator of first sequence. - * @param __begin2 Begin iterator of second sequence. - * @param __pred Find predicate. */ - template<typename _RAIter1, typename _RAIter2, - typename _Pred> - std::pair<_RAIter1, _RAIter2> - _M_sequential_algorithm(_RAIter1 __begin1, - _RAIter1 __end1, - _RAIter2 __begin2, _Pred __pred) - { - return std::make_pair( - find_first_of(__begin1, __end1, _M_begin, _M_end, __pred, - sequential_tag()), __begin2); - } - }; + struct __find_first_of_selector : public __generic_find_selector + { + _FIterator _M_begin; + _FIterator _M_end; + + explicit __find_first_of_selector(_FIterator __begin, + _FIterator __end) + : _M_begin(__begin), _M_end(__end) { } + + /** @brief Test on one position. + * @param __i1 _Iterator on first sequence. + * @param __i2 _Iterator on second sequence (unused). + * @param __pred Find predicate. */ + template<typename _RAIter1, typename _RAIter2, + typename _Pred> + bool + operator()(_RAIter1 __i1, _RAIter2 __i2, _Pred __pred) + { + for (_FIterator __pos_in_candidates = _M_begin; + __pos_in_candidates != _M_end; ++__pos_in_candidates) + if (__pred(*__i1, *__pos_in_candidates)) + return true; + return false; + } + + /** @brief Corresponding sequential algorithm on a sequence. + * @param __begin1 Begin iterator of first sequence. + * @param __end1 End iterator of first sequence. + * @param __begin2 Begin iterator of second sequence. + * @param __pred Find predicate. */ + template<typename _RAIter1, typename _RAIter2, + typename _Pred> + std::pair<_RAIter1, _RAIter2> + _M_sequential_algorithm(_RAIter1 __begin1, + _RAIter1 __end1, + _RAIter2 __begin2, _Pred __pred) + { + return std::make_pair(find_first_of(__begin1, __end1, + _M_begin, _M_end, __pred, + sequential_tag()), __begin2); + } + }; } #endif /* _GLIBCXX_PARALLEL_FIND_SELECTORS_H */ diff --git a/libstdc++-v3/include/parallel/for_each.h b/libstdc++-v3/include/parallel/for_each.h index b585999f514..f758b7ab78a 100644 --- a/libstdc++-v3/include/parallel/for_each.h +++ b/libstdc++-v3/include/parallel/for_each.h @@ -69,31 +69,21 @@ namespace __gnu_parallel _Parallelism __parallelism_tag) { if (__parallelism_tag == parallel_unbalanced) - return __for_each_template_random_access_ed(__begin, __end, __user_op, - __functionality, __reduction, - __reduction_start, - __output, __bound); + return __for_each_template_random_access_ed + (__begin, __end, __user_op, __functionality, __reduction, + __reduction_start, __output, __bound); else if (__parallelism_tag == parallel_omp_loop) - return __for_each_template_random_access_omp_loop( - __begin, __end, __user_op, - __functionality, - __reduction, - __reduction_start, - __output, __bound); + return __for_each_template_random_access_omp_loop + (__begin, __end, __user_op, __functionality, __reduction, + __reduction_start, __output, __bound); else if (__parallelism_tag == parallel_omp_loop_static) - return __for_each_template_random_access_omp_loop( - __begin, __end, __user_op, - __functionality, - __reduction, - __reduction_start, - __output, __bound); + return __for_each_template_random_access_omp_loop + (__begin, __end, __user_op, __functionality, __reduction, + __reduction_start, __output, __bound); else //e. g. parallel_balanced - return __for_each_template_random_access_workstealing(__begin, __end, - __user_op, - __functionality, - __reduction, - __reduction_start, - __output, __bound); + return __for_each_template_random_access_workstealing + (__begin, __end, __user_op, __functionality, __reduction, + __reduction_start, __output, __bound); } } diff --git a/libstdc++-v3/include/parallel/list_partition.h b/libstdc++-v3/include/parallel/list_partition.h index 9c4f48187f0..573085aa872 100644 --- a/libstdc++-v3/include/parallel/list_partition.h +++ b/libstdc++-v3/include/parallel/list_partition.h @@ -48,11 +48,11 @@ namespace __gnu_parallel template<typename _IIter> void __shrink_and_double(std::vector<_IIter>& __os_starts, - size_t& __count_to_two, size_t& __range_length, - const bool __make_twice) + size_t& __count_to_two, size_t& __range_length, + const bool __make_twice) { ++__count_to_two; - if (not __make_twice or __count_to_two < 2) + if (!__make_twice || __count_to_two < 2) __shrink(__os_starts, __count_to_two, __range_length); else { @@ -68,7 +68,7 @@ namespace __gnu_parallel template<typename _IIter> void __shrink(std::vector<_IIter>& __os_starts, size_t& __count_to_two, - size_t& __range_length) + size_t& __range_length) { for (typename std::vector<_IIter>::size_type __i = 0; __i <= (__os_starts.size() / 2); ++__i) @@ -112,8 +112,8 @@ namespace __gnu_parallel std::vector<_IIter> __os_starts(2 * __oversampling * __num_parts + 1); - __os_starts[0]= __begin; - _IIter __prev = __begin, __it = __begin; + __os_starts[0] = __begin; + _IIter __prev = __begin, __it = __begin; size_t __dist_limit = 0, __dist = 0; size_t __cur = 1, __next = 1; size_t __range_length = 1; diff --git a/libstdc++-v3/include/parallel/losertree.h b/libstdc++-v3/include/parallel/losertree.h index 0da84abe4ab..425cf6d4b6f 100644 --- a/libstdc++-v3/include/parallel/losertree.h +++ b/libstdc++-v3/include/parallel/losertree.h @@ -40,992 +40,1000 @@ namespace __gnu_parallel { - -/** - * @brief Guarded loser/tournament tree. - * - * The smallest element is at the top. - * - * Guarding is done explicitly through one flag _M_sup per element, - * inf is not needed due to a better initialization routine. This - * is a well-performing variant. - * - * @param _Tp the element type - * @param _Compare the comparator to use, defaults to std::less<_Tp> - */ -template<typename _Tp, typename _Compare> -class _LoserTreeBase -{ -protected: - /** @brief Internal representation of a _LoserTree element. */ - struct _Loser - { - /** @brief flag, true iff this is a "maximum" __sentinel. */ - bool _M_sup; - /** @brief __index of the __source __sequence. */ - int _M_source; - /** @brief _M_key of the element in the _LoserTree. */ - _Tp _M_key; - }; - - unsigned int _M_ik, _M_k, _M_offset; - - /** log_2{_M_k} */ - unsigned int _M_log_k; - - /** @brief _LoserTree __elements. */ - _Loser* _M_losers; - - /** @brief _Compare to use. */ - _Compare _M_comp; - /** - * @brief State flag that determines whether the _LoserTree is empty. + * @brief Guarded loser/tournament tree. * - * Only used for building the _LoserTree. - */ - bool _M_first_insert; - -public: - /** - * @brief The constructor. + * The smallest element is at the top. + * + * Guarding is done explicitly through one flag _M_sup per element, + * inf is not needed due to a better initialization routine. This + * is a well-performing variant. * - * @param __k The number of sequences to merge. - * @param __comp The comparator to use. + * @param _Tp the element type + * @param _Compare the comparator to use, defaults to std::less<_Tp> */ - _LoserTreeBase(unsigned int __k, _Compare __comp) - : _M_comp(__comp) - { - _M_ik = __k; - - // Compute log_2{_M_k} for the _Loser Tree - _M_log_k = __rd_log2(_M_ik - 1) + 1; - - // Next greater power of 2. - _M_k = 1 << _M_log_k; - _M_offset = _M_k; - - // Avoid default-constructing _M_losers[]._M_key - _M_losers - = static_cast<_Loser*>(::operator new(2 * _M_k * sizeof(_Loser))); - for (unsigned int __i = _M_ik - 1; __i < _M_k; ++__i) - _M_losers[__i + _M_k]._M_sup = true; + template<typename _Tp, typename _Compare> + class _LoserTreeBase + { + protected: + /** @brief Internal representation of a _LoserTree element. */ + struct _Loser + { + /** @brief flag, true iff this is a "maximum" __sentinel. */ + bool _M_sup; + /** @brief __index of the __source __sequence. */ + int _M_source; + /** @brief _M_key of the element in the _LoserTree. */ + _Tp _M_key; + }; + + unsigned int _M_ik, _M_k, _M_offset; + + /** log_2{_M_k} */ + unsigned int _M_log_k; + + /** @brief _LoserTree __elements. */ + _Loser* _M_losers; + + /** @brief _Compare to use. */ + _Compare _M_comp; + + /** + * @brief State flag that determines whether the _LoserTree is empty. + * + * Only used for building the _LoserTree. + */ + bool _M_first_insert; + + public: + /** + * @brief The constructor. + * + * @param __k The number of sequences to merge. + * @param __comp The comparator to use. + */ + _LoserTreeBase(unsigned int __k, _Compare __comp) + : _M_comp(__comp) + { + _M_ik = __k; - _M_first_insert = true; - } + // Compute log_2{_M_k} for the _Loser Tree + _M_log_k = __rd_log2(_M_ik - 1) + 1; - /** - * @brief The destructor. - */ - ~_LoserTreeBase() - { ::operator delete(_M_losers); } + // Next greater power of 2. + _M_k = 1 << _M_log_k; + _M_offset = _M_k; - /** - * @brief Initializes the sequence "_M_source" with the element "_M_key". - * - * @param _M_key the element to insert - * @param _M_source __index of the __source __sequence - * @param _M_sup flag that determines whether the value to insert is an - * explicit __supremum. - */ - inline void - __insert_start(const _Tp& _M_key, int _M_source, bool _M_sup) - { - unsigned int __pos = _M_k + _M_source; + // Avoid default-constructing _M_losers[]._M_key + _M_losers = static_cast<_Loser*>(::operator new(2 * _M_k + * sizeof(_Loser))); + for (unsigned int __i = _M_ik - 1; __i < _M_k; ++__i) + _M_losers[__i + _M_k]._M_sup = true; - if(_M_first_insert) - { - // Construct all keys, so we can easily deconstruct them. - for (unsigned int __i = 0; __i < (2 * _M_k); ++__i) - new(&(_M_losers[__i]._M_key)) _Tp(_M_key); - _M_first_insert = false; + _M_first_insert = true; } - else - new(&(_M_losers[__pos]._M_key)) _Tp(_M_key); - - _M_losers[__pos]._M_sup = _M_sup; - _M_losers[__pos]._M_source = _M_source; - } - /** - * @return the index of the sequence with the smallest element. - */ - int __get_min_source() - { return _M_losers[0]._M_source; } -}; - -/** - * @brief Stable _LoserTree variant. - * - * Provides the stable implementations of insert_start, __init_winner, - * __init and __delete_min_insert. - * - * Unstable variant is done using partial specialisation below. - */ -template<bool __stable/* default == true */, typename _Tp, typename _Compare> -class _LoserTree : public _LoserTreeBase<_Tp, _Compare> -{ - typedef _LoserTreeBase<_Tp, _Compare> Base; - using Base::_M_k; - using Base::_M_losers; - using Base::_M_first_insert; - -public: - _LoserTree(unsigned int __k, _Compare __comp) - : Base::_LoserTreeBase(__k, __comp) - {} - - unsigned int - __init_winner(unsigned int __root) - { - if (__root >= _M_k) + /** + * @brief The destructor. + */ + ~_LoserTreeBase() + { ::operator delete(_M_losers); } + + /** + * @brief Initializes the sequence "_M_source" with the element "__key". + * + * @param __key the element to insert + * @param __source __index of the __source __sequence + * @param __sup flag that determines whether the value to insert is an + * explicit __supremum. + */ + void + __insert_start(const _Tp& __key, int __source, bool __sup) { - return __root; + unsigned int __pos = _M_k + __source; + + if(_M_first_insert) + { + // Construct all keys, so we can easily deconstruct them. + for (unsigned int __i = 0; __i < (2 * _M_k); ++__i) + new(&(_M_losers[__i]._M_key)) _Tp(__key); + _M_first_insert = false; + } + else + new(&(_M_losers[__pos]._M_key)) _Tp(__key); + + _M_losers[__pos]._M_sup = __sup; + _M_losers[__pos]._M_source = __source; } - else + + /** + * @return the index of the sequence with the smallest element. + */ + int __get_min_source() + { return _M_losers[0]._M_source; } + }; + + /** + * @brief Stable _LoserTree variant. + * + * Provides the stable implementations of insert_start, __init_winner, + * __init and __delete_min_insert. + * + * Unstable variant is done using partial specialisation below. + */ + template<bool __stable/* default == true */, typename _Tp, + typename _Compare> + class _LoserTree + : public _LoserTreeBase<_Tp, _Compare> + { + typedef _LoserTreeBase<_Tp, _Compare> _Base; + using _Base::_M_k; + using _Base::_M_losers; + using _Base::_M_first_insert; + + public: + _LoserTree(unsigned int __k, _Compare __comp) + : _Base::_LoserTreeBase(__k, __comp) + { } + + unsigned int + __init_winner(unsigned int __root) { - unsigned int __left = __init_winner (2 * __root); - unsigned int __right = __init_winner (2 * __root + 1); - if (_M_losers[__right]._M_sup - || (!_M_losers[__left]._M_sup - && !_M_comp(_M_losers[__right]._M_key, _M_losers[__left]._M_key))) - { - // Left one is less or equal. - _M_losers[__root] = _M_losers[__right]; - return __left; - } - else - { - // Right one is less. - _M_losers[__root] = _M_losers[__left]; - return __right; - } + if (__root >= _M_k) + return __root; + else + { + unsigned int __left = __init_winner(2 * __root); + unsigned int __right = __init_winner(2 * __root + 1); + if (_M_losers[__right]._M_sup + || (!_M_losers[__left]._M_sup + && !_M_comp(_M_losers[__right]._M_key, + _M_losers[__left]._M_key))) + { + // Left one is less or equal. + _M_losers[__root] = _M_losers[__right]; + return __left; + } + else + { + // Right one is less. + _M_losers[__root] = _M_losers[__left]; + return __right; + } + } } - } - - void __init() - { _M_losers[0] = _M_losers[__init_winner(1)]; } - /** - * @brief Delete the smallest element and insert a new element from - * the previously smallest element's sequence. - * - * This implementation is stable. - */ - // Do not pass a const reference since _M_key will be used as local variable. - void __delete_min_insert(_Tp _M_key, bool _M_sup) - { + void __init() + { _M_losers[0] = _M_losers[__init_winner(1)]; } + + /** + * @brief Delete the smallest element and insert a new element from + * the previously smallest element's sequence. + * + * This implementation is stable. + */ + // Do not pass a const reference since __key will be used as + // local variable. + void + __delete_min_insert(_Tp __key, bool __sup) + { #if _GLIBCXX_ASSERTIONS - // no dummy sequence can ever be at the top! - _GLIBCXX_PARALLEL_ASSERT(_M_losers[0]._M_source != -1); + // no dummy sequence can ever be at the top! + _GLIBCXX_PARALLEL_ASSERT(_M_losers[0]._M_source != -1); #endif - int _M_source = _M_losers[0]._M_source; - for (unsigned int __pos = (_M_k + _M_source) / 2; __pos > 0; __pos /= 2) + int __source = _M_losers[0]._M_source; + for (unsigned int __pos = (_M_k + __source) / 2; __pos > 0; + __pos /= 2) + { + // The smaller one gets promoted, ties are broken by _M_source. + if ((__sup && (!_M_losers[__pos]._M_sup + || _M_losers[__pos]._M_source < __source)) + || (!__sup && !_M_losers[__pos]._M_sup + && ((_M_comp(_M_losers[__pos]._M_key, __key)) + || (!_M_comp(__key, _M_losers[__pos]._M_key) + && _M_losers[__pos]._M_source < __source)))) + { + // The other one is smaller. + std::swap(_M_losers[__pos]._M_sup, __sup); + std::swap(_M_losers[__pos]._M_source, __source); + std::swap(_M_losers[__pos]._M_key, __key); + } + } + + _M_losers[0]._M_sup = __sup; + _M_losers[0]._M_source = __source; + _M_losers[0]._M_key = __key; + } + }; + + /** + * @brief Unstable _LoserTree variant. + * + * Stability (non-stable here) is selected with partial specialization. + */ + template<typename _Tp, typename _Compare> + class _LoserTree</* __stable == */false, _Tp, _Compare> + : public _LoserTreeBase<_Tp, _Compare> + { + typedef _LoserTreeBase<_Tp, _Compare> _Base; + using _Base::_M_log_k; + using _Base::_M_k; + using _Base::_M_losers; + using _Base::_M_first_insert; + + public: + _LoserTree(unsigned int __k, _Compare __comp) + : _Base::_LoserTreeBase(__k, __comp) + { } + + /** + * Computes the winner of the competition at position "__root". + * + * Called recursively (starting at 0) to build the initial tree. + * + * @param __root __index of the "game" to start. + */ + unsigned int + __init_winner(unsigned int __root) { - // The smaller one gets promoted, ties are broken by _M_source. - if ((_M_sup && (!_M_losers[__pos]._M_sup - || _M_losers[__pos]._M_source < _M_source)) - || (!_M_sup && !_M_losers[__pos]._M_sup - && ((_M_comp(_M_losers[__pos]._M_key, _M_key)) - || (!_M_comp(_M_key, _M_losers[__pos]._M_key) - && _M_losers[__pos]._M_source < _M_source)))) - { - // The other one is smaller. - std::swap(_M_losers[__pos]._M_sup, _M_sup); - std::swap(_M_losers[__pos]._M_source, _M_source); - std::swap(_M_losers[__pos]._M_key, _M_key); - } + if (__root >= _M_k) + return __root; + else + { + unsigned int __left = __init_winner(2 * __root); + unsigned int __right = __init_winner(2 * __root + 1); + if (_M_losers[__right]._M_sup + || (!_M_losers[__left]._M_sup + && !_M_comp(_M_losers[__right]._M_key, + _M_losers[__left]._M_key))) + { + // Left one is less or equal. + _M_losers[__root] = _M_losers[__right]; + return __left; + } + else + { + // Right one is less. + _M_losers[__root] = _M_losers[__left]; + return __right; + } + } } - _M_losers[0]._M_sup = _M_sup; - _M_losers[0]._M_source = _M_source; - _M_losers[0]._M_key = _M_key; - } -}; - -/** - * @brief Unstable _LoserTree variant. - * - * Stability (non-stable here) is selected with partial specialization. - */ -template<typename _Tp, typename _Compare> -class _LoserTree</* __stable == */false, _Tp, _Compare> : - public _LoserTreeBase<_Tp, _Compare> -{ - typedef _LoserTreeBase<_Tp, _Compare> Base; - using Base::_M_log_k; - using Base::_M_k; - using Base::_M_losers; - using Base::_M_first_insert; + void + __init() + { _M_losers[0] = _M_losers[__init_winner(1)]; } + + /** + * Delete the _M_key smallest element and insert the element __key + * instead. + * + * @param __key the _M_key to insert + * @param __sup true iff __key is an explicitly marked supremum + */ + // Do not pass a const reference since __key will be used as local + // variable. + void + __delete_min_insert(_Tp __key, bool __sup) + { +#if _GLIBCXX_ASSERTIONS + // no dummy sequence can ever be at the top! + _GLIBCXX_PARALLEL_ASSERT(_M_losers[0]._M_source != -1); +#endif -public: - _LoserTree(unsigned int __k, _Compare __comp) - : Base::_LoserTreeBase(__k, __comp) - {} + int __source = _M_losers[0]._M_source; + for (unsigned int __pos = (_M_k + __source) / 2; __pos > 0; + __pos /= 2) + { + // The smaller one gets promoted. + if (__sup || (!_M_losers[__pos]._M_sup + && _M_comp(_M_losers[__pos]._M_key, __key))) + { + // The other one is smaller. + std::swap(_M_losers[__pos]._M_sup, __sup); + std::swap(_M_losers[__pos]._M_source, __source); + std::swap(_M_losers[__pos]._M_key, __key); + } + } + + _M_losers[0]._M_sup = __sup; + _M_losers[0]._M_source = __source; + _M_losers[0]._M_key = __key; + } + }; /** - * Computes the winner of the competition at position "__root". - * - * Called recursively (starting at 0) to build the initial tree. - * - * @param __root __index of the "game" to start. + * @brief Base class of _Loser Tree implementation using pointers. */ - unsigned int - __init_winner (unsigned int __root) - { - if (__root >= _M_k) + template<typename _Tp, typename _Compare> + class _LoserTreePointerBase + { + protected: + /** @brief Internal representation of _LoserTree __elements. */ + struct _Loser { - return __root; - } - else + bool _M_sup; + int _M_source; + const _Tp* _M_keyp; + }; + + unsigned int _M_ik, _M_k, _M_offset; + _Loser* _M_losers; + _Compare _M_comp; + + public: + _LoserTreePointerBase(unsigned int __k, + _Compare __comp = std::less<_Tp>()) + : _M_comp(__comp) { - unsigned int __left = __init_winner (2 * __root); - unsigned int __right = __init_winner (2 * __root + 1); - if (_M_losers[__right]._M_sup - || (!_M_losers[__left]._M_sup - && !_M_comp(_M_losers[__right]._M_key, _M_losers[__left]._M_key))) - { - // Left one is less or equal. - _M_losers[__root] = _M_losers[__right]; - return __left; - } - else - { - // Right one is less. - _M_losers[__root] = _M_losers[__left]; - return __right; - } + _M_ik = __k; + + // Next greater power of 2. + _M_k = 1 << (__rd_log2(_M_ik - 1) + 1); + _M_offset = _M_k; + _M_losers = new _Loser[_M_k * 2]; + for (unsigned int __i = _M_ik - 1; __i < _M_k; __i++) + _M_losers[__i + _M_k]._M_sup = true; } - } - inline void - __init() - { _M_losers[0] = _M_losers[__init_winner(1)]; } + ~_LoserTreePointerBase() + { ::operator delete[](_M_losers); } - /** - * Delete the _M_key smallest element and insert the element _M_key instead. - * - * @param _M_key the _M_key to insert - * @param _M_sup true iff _M_key is an explicitly marked supremum - */ - // Do not pass a const reference since _M_key will be used as local variable. - inline void - __delete_min_insert(_Tp _M_key, bool _M_sup) - { -#if _GLIBCXX_ASSERTIONS - // no dummy sequence can ever be at the top! - _GLIBCXX_PARALLEL_ASSERT(_M_losers[0]._M_source != -1); -#endif + int __get_min_source() + { return _M_losers[0]._M_source; } - int _M_source = _M_losers[0]._M_source; - for (unsigned int __pos = (_M_k + _M_source) / 2; __pos > 0; __pos /= 2) - { - // The smaller one gets promoted. - if (_M_sup || (!_M_losers[__pos]._M_sup - && _M_comp(_M_losers[__pos]._M_key, _M_key))) + void __insert_start(const _Tp& __key, int __source, bool __sup) { - // The other one is smaller. - std::swap(_M_losers[__pos]._M_sup, _M_sup); - std::swap(_M_losers[__pos]._M_source, _M_source); - std::swap(_M_losers[__pos]._M_key, _M_key); + unsigned int __pos = _M_k + __source; + + _M_losers[__pos]._M_sup = __sup; + _M_losers[__pos]._M_source = __source; + _M_losers[__pos]._M_keyp = &__key; } - } + }; - _M_losers[0]._M_sup = _M_sup; - _M_losers[0]._M_source = _M_source; - _M_losers[0]._M_key = _M_key; - } -}; + /** + * @brief Stable _LoserTree implementation. + * + * The unstable variant is implemented using partial instantiation below. + */ + template<bool __stable/* default == true */, typename _Tp, typename _Compare> + class _LoserTreePointer + : public _LoserTreePointerBase<_Tp, _Compare> + { + typedef _LoserTreePointerBase<_Tp, _Compare> _Base; + using _Base::_M_k; + using _Base::_M_losers; + public: + _LoserTreePointer(unsigned int __k, _Compare __comp = std::less<_Tp>()) + : _Base::_LoserTreePointerBase(__k, __comp) + { } -/** - * @brief Base class of _Loser Tree implementation using pointers. - */ -template<typename _Tp, typename _Compare> -class _LoserTreePointerBase -{ -protected: - /** @brief Internal representation of _LoserTree __elements. */ - struct _Loser - { - bool _M_sup; - int _M_source; - const _Tp* _M_keyp; - }; - - unsigned int _M_ik, _M_k, _M_offset; - _Loser* _M_losers; - _Compare _M_comp; - -public: - _LoserTreePointerBase(unsigned int __k, _Compare __comp = std::less<_Tp>()) - : _M_comp(__comp) - { - _M_ik = __k; - - // Next greater power of 2. - _M_k = 1 << (__rd_log2(_M_ik - 1) + 1); - _M_offset = _M_k; - _M_losers = new _Loser[_M_k * 2]; - for (unsigned int __i = _M_ik - 1; __i < _M_k; __i++) - _M_losers[__i + _M_k]._M_sup = true; - } - - ~_LoserTreePointerBase() - { ::operator delete[](_M_losers); } - - int __get_min_source() - { return _M_losers[0]._M_source; } - - void __insert_start(const _Tp& _M_key, int _M_source, bool _M_sup) - { - unsigned int __pos = _M_k + _M_source; - - _M_losers[__pos]._M_sup = _M_sup; - _M_losers[__pos]._M_source = _M_source; - _M_losers[__pos]._M_keyp = &_M_key; - } -}; - -/** - * @brief Stable _LoserTree implementation. - * - * The unstable variant is implemented using partial instantiation below. - */ -template<bool __stable/* default == true */, typename _Tp, typename _Compare> -class _LoserTreePointer : public _LoserTreePointerBase<_Tp, _Compare> -{ - typedef _LoserTreePointerBase<_Tp, _Compare> Base; - using Base::_M_k; - using Base::_M_losers; - -public: - _LoserTreePointer(unsigned int __k, _Compare __comp = std::less<_Tp>()) - : Base::_LoserTreePointerBase(__k, __comp) - {} - - unsigned int - __init_winner(unsigned int __root) - { - if (__root >= _M_k) - { - return __root; - } - else + unsigned int + __init_winner(unsigned int __root) { - unsigned int __left = __init_winner (2 * __root); - unsigned int __right = __init_winner (2 * __root + 1); - if (_M_losers[__right]._M_sup - || (!_M_losers[__left]._M_sup - && !_M_comp(*_M_losers[__right]._M_keyp, - *_M_losers[__left]._M_keyp))) - { - // Left one is less or equal. - _M_losers[__root] = _M_losers[__right]; - return __left; - } - else - { - // Right one is less. - _M_losers[__root] = _M_losers[__left]; - return __right; - } + if (__root >= _M_k) + return __root; + else + { + unsigned int __left = __init_winner(2 * __root); + unsigned int __right = __init_winner(2 * __root + 1); + if (_M_losers[__right]._M_sup + || (!_M_losers[__left]._M_sup + && !_M_comp(*_M_losers[__right]._M_keyp, + *_M_losers[__left]._M_keyp))) + { + // Left one is less or equal. + _M_losers[__root] = _M_losers[__right]; + return __left; + } + else + { + // Right one is less. + _M_losers[__root] = _M_losers[__left]; + return __right; + } + } } - } - void __init() - { _M_losers[0] = _M_losers[__init_winner(1)]; } + void __init() + { _M_losers[0] = _M_losers[__init_winner(1)]; } - void __delete_min_insert(const _Tp& _M_key, bool _M_sup) - { + void __delete_min_insert(const _Tp& __key, bool __sup) + { #if _GLIBCXX_ASSERTIONS - // no dummy sequence can ever be at the top! - _GLIBCXX_PARALLEL_ASSERT(_M_losers[0]._M_source != -1); + // no dummy sequence can ever be at the top! + _GLIBCXX_PARALLEL_ASSERT(_M_losers[0]._M_source != -1); #endif - const _Tp* _M_keyp = &_M_key; - int _M_source = _M_losers[0]._M_source; - for (unsigned int __pos = (_M_k + _M_source) / 2; __pos > 0; __pos /= 2) - { - // The smaller one gets promoted, ties are broken by _M_source. - if ((_M_sup && (!_M_losers[__pos]._M_sup || - _M_losers[__pos]._M_source < _M_source)) || - (!_M_sup && !_M_losers[__pos]._M_sup && - ((_M_comp(*_M_losers[__pos]._M_keyp, *_M_keyp)) || - (!_M_comp(*_M_keyp, *_M_losers[__pos]._M_keyp) - && _M_losers[__pos]._M_source < _M_source)))) - { - // The other one is smaller. - std::swap(_M_losers[__pos]._M_sup, _M_sup); - std::swap(_M_losers[__pos]._M_source, _M_source); - std::swap(_M_losers[__pos]._M_keyp, _M_keyp); - } + const _Tp* __keyp = &__key; + int __source = _M_losers[0]._M_source; + for (unsigned int __pos = (_M_k + __source) / 2; __pos > 0; + __pos /= 2) + { + // The smaller one gets promoted, ties are broken by __source. + if ((__sup && (!_M_losers[__pos]._M_sup + || _M_losers[__pos]._M_source < __source)) + || (!__sup && !_M_losers[__pos]._M_sup && + ((_M_comp(*_M_losers[__pos]._M_keyp, *__keyp)) + || (!_M_comp(*__keyp, *_M_losers[__pos]._M_keyp) + && _M_losers[__pos]._M_source < __source)))) + { + // The other one is smaller. + std::swap(_M_losers[__pos]._M_sup, __sup); + std::swap(_M_losers[__pos]._M_source, __source); + std::swap(_M_losers[__pos]._M_keyp, __keyp); + } + } + + _M_losers[0]._M_sup = __sup; + _M_losers[0]._M_source = __source; + _M_losers[0]._M_keyp = __keyp; } + }; - _M_losers[0]._M_sup = _M_sup; - _M_losers[0]._M_source = _M_source; - _M_losers[0]._M_keyp = _M_keyp; - } -}; - -/** - * @brief Unstable _LoserTree implementation. - * - * The stable variant is above. - */ -template<typename _Tp, typename _Compare> -class _LoserTreePointer</* __stable == */false, _Tp, _Compare> : - public _LoserTreePointerBase<_Tp, _Compare> -{ - typedef _LoserTreePointerBase<_Tp, _Compare> Base; - using Base::_M_k; - using Base::_M_losers; - -public: - _LoserTreePointer(unsigned int __k, _Compare __comp = std::less<_Tp>()) - : Base::_LoserTreePointerBase(__k, __comp) - {} - - unsigned int - __init_winner(unsigned int __root) - { - if (__root >= _M_k) - { - return __root; - } - else + /** + * @brief Unstable _LoserTree implementation. + * + * The stable variant is above. + */ + template<typename _Tp, typename _Compare> + class _LoserTreePointer</* __stable == */false, _Tp, _Compare> + : public _LoserTreePointerBase<_Tp, _Compare> + { + typedef _LoserTreePointerBase<_Tp, _Compare> _Base; + using _Base::_M_k; + using _Base::_M_losers; + + public: + _LoserTreePointer(unsigned int __k, _Compare __comp = std::less<_Tp>()) + : _Base::_LoserTreePointerBase(__k, __comp) + { } + + unsigned int + __init_winner(unsigned int __root) { - unsigned int __left = __init_winner (2 * __root); - unsigned int __right = __init_winner (2 * __root + 1); - if (_M_losers[__right]._M_sup - || (!_M_losers[__left]._M_sup - && !_M_comp(*_M_losers[__right]._M_keyp, - *_M_losers[__left]._M_keyp))) - { - // Left one is less or equal. - _M_losers[__root] = _M_losers[__right]; - return __left; - } - else - { - // Right one is less. - _M_losers[__root] = _M_losers[__left]; - return __right; - } + if (__root >= _M_k) + return __root; + else + { + unsigned int __left = __init_winner(2 * __root); + unsigned int __right = __init_winner(2 * __root + 1); + if (_M_losers[__right]._M_sup + || (!_M_losers[__left]._M_sup + && !_M_comp(*_M_losers[__right]._M_keyp, + *_M_losers[__left]._M_keyp))) + { + // Left one is less or equal. + _M_losers[__root] = _M_losers[__right]; + return __left; + } + else + { + // Right one is less. + _M_losers[__root] = _M_losers[__left]; + return __right; + } + } } - } - void __init() - { _M_losers[0] = _M_losers[__init_winner(1)]; } + void __init() + { _M_losers[0] = _M_losers[__init_winner(1)]; } - void __delete_min_insert(const _Tp& _M_key, bool _M_sup) - { + void __delete_min_insert(const _Tp& __key, bool __sup) + { #if _GLIBCXX_ASSERTIONS - // no dummy sequence can ever be at the top! - _GLIBCXX_PARALLEL_ASSERT(_M_losers[0]._M_source != -1); + // no dummy sequence can ever be at the top! + _GLIBCXX_PARALLEL_ASSERT(_M_losers[0]._M_source != -1); #endif - const _Tp* _M_keyp = &_M_key; - int _M_source = _M_losers[0]._M_source; - for (unsigned int __pos = (_M_k + _M_source) / 2; __pos > 0; __pos /= 2) - { - // The smaller one gets promoted. - if (_M_sup || (!_M_losers[__pos]._M_sup - && _M_comp(*_M_losers[__pos]._M_keyp, *_M_keyp))) - { - // The other one is smaller. - std::swap(_M_losers[__pos]._M_sup, _M_sup); - std::swap(_M_losers[__pos]._M_source, _M_source); - std::swap(_M_losers[__pos]._M_keyp, _M_keyp); - } + const _Tp* __keyp = &__key; + int __source = _M_losers[0]._M_source; + for (unsigned int __pos = (_M_k + __source) / 2; __pos > 0; + __pos /= 2) + { + // The smaller one gets promoted. + if (__sup || (!_M_losers[__pos]._M_sup + && _M_comp(*_M_losers[__pos]._M_keyp, *__keyp))) + { + // The other one is smaller. + std::swap(_M_losers[__pos]._M_sup, __sup); + std::swap(_M_losers[__pos]._M_source, __source); + std::swap(_M_losers[__pos]._M_keyp, __keyp); + } + } + + _M_losers[0]._M_sup = __sup; + _M_losers[0]._M_source = __source; + _M_losers[0]._M_keyp = __keyp; } + }; - _M_losers[0]._M_sup = _M_sup; - _M_losers[0]._M_source = _M_source; - _M_losers[0]._M_keyp = _M_keyp; - } -}; - -/** @brief Base class for unguarded _LoserTree implementation. - * - * The whole element is copied into the tree structure. - * - * No guarding is done, therefore not a single input sequence must - * run empty. Unused __sequence heads are marked with a sentinel which - * is > all elements that are to be merged. - * - * This is a very fast variant. - */ -template<typename _Tp, typename _Compare> -class _LoserTreeUnguardedBase -{ -protected: - struct _Loser - { - int _M_source; - _Tp _M_key; - }; - - unsigned int _M_ik, _M_k, _M_offset; - _Loser* _M_losers; - _Compare _M_comp; - -public: - inline - _LoserTreeUnguardedBase(unsigned int __k, const _Tp _sentinel, - _Compare __comp = std::less<_Tp>()) - : _M_comp(__comp) - { - _M_ik = __k; - - // Next greater power of 2. - _M_k = 1 << (__rd_log2(_M_ik - 1) + 1); - _M_offset = _M_k; - // Avoid default-constructing _M_losers[]._M_key - _M_losers - = static_cast<_Loser*>(::operator new(2 * _M_k * sizeof(_Loser))); - - for (unsigned int __i = _M_k + _M_ik - 1; __i < (2 * _M_k); ++__i) + /** @brief Base class for unguarded _LoserTree implementation. + * + * The whole element is copied into the tree structure. + * + * No guarding is done, therefore not a single input sequence must + * run empty. Unused __sequence heads are marked with a sentinel which + * is > all elements that are to be merged. + * + * This is a very fast variant. + */ + template<typename _Tp, typename _Compare> + class _LoserTreeUnguardedBase + { + protected: + struct _Loser + { + int _M_source; + _Tp _M_key; + }; + + unsigned int _M_ik, _M_k, _M_offset; + _Loser* _M_losers; + _Compare _M_comp; + + public: + _LoserTreeUnguardedBase(unsigned int __k, const _Tp __sentinel, + _Compare __comp = std::less<_Tp>()) + : _M_comp(__comp) { - _M_losers[__i]._M_key = _sentinel; - _M_losers[__i]._M_source = -1; + _M_ik = __k; + + // Next greater power of 2. + _M_k = 1 << (__rd_log2(_M_ik - 1) + 1); + _M_offset = _M_k; + // Avoid default-constructing _M_losers[]._M_key + _M_losers = static_cast<_Loser*>(::operator new(2 * _M_k + * sizeof(_Loser))); + + for (unsigned int __i = _M_k + _M_ik - 1; __i < (2 * _M_k); ++__i) + { + _M_losers[__i]._M_key = __sentinel; + _M_losers[__i]._M_source = -1; + } } - } - inline ~_LoserTreeUnguardedBase() - { ::operator delete(_M_losers); } + ~_LoserTreeUnguardedBase() + { ::operator delete(_M_losers); } - inline int - __get_min_source() - { + int + __get_min_source() + { #if _GLIBCXX_ASSERTIONS - // no dummy sequence can ever be at the top! - _GLIBCXX_PARALLEL_ASSERT(_M_losers[0]._M_source != -1); + // no dummy sequence can ever be at the top! + _GLIBCXX_PARALLEL_ASSERT(_M_losers[0]._M_source != -1); #endif - return _M_losers[0]._M_source; - } - - inline void - __insert_start(const _Tp& _M_key, int _M_source, bool) - { - unsigned int __pos = _M_k + _M_source; - - new(&(_M_losers[__pos]._M_key)) _Tp(_M_key); - _M_losers[__pos]._M_source = _M_source; - } -}; - -/** - * @brief Stable implementation of unguarded _LoserTree. - * - * Unstable variant is selected below with partial specialization. - */ -template<bool __stable/* default == true */, typename _Tp, typename _Compare> -class _LoserTreeUnguarded : public _LoserTreeUnguardedBase<_Tp, _Compare> -{ - typedef _LoserTreeUnguardedBase<_Tp, _Compare> Base; - using Base::_M_k; - using Base::_M_losers; - -public: - _LoserTreeUnguarded(unsigned int __k, const _Tp _sentinel, - _Compare __comp = std::less<_Tp>()) - : Base::_LoserTreeUnguardedBase(__k, _sentinel, __comp) - {} - - unsigned int - __init_winner(unsigned int __root) - { - if (__root >= _M_k) + return _M_losers[0]._M_source; + } + + void + __insert_start(const _Tp& __key, int __source, bool) { - return __root; + unsigned int __pos = _M_k + __source; + + new(&(_M_losers[__pos]._M_key)) _Tp(__key); + _M_losers[__pos]._M_source = __source; } - else + }; + + /** + * @brief Stable implementation of unguarded _LoserTree. + * + * Unstable variant is selected below with partial specialization. + */ + template<bool __stable/* default == true */, typename _Tp, typename _Compare> + class _LoserTreeUnguarded + : public _LoserTreeUnguardedBase<_Tp, _Compare> + { + typedef _LoserTreeUnguardedBase<_Tp, _Compare> _Base; + using _Base::_M_k; + using _Base::_M_losers; + + public: + _LoserTreeUnguarded(unsigned int __k, const _Tp __sentinel, + _Compare __comp = std::less<_Tp>()) + : _Base::_LoserTreeUnguardedBase(__k, __sentinel, __comp) + { } + + unsigned int + __init_winner(unsigned int __root) { - unsigned int __left = __init_winner (2 * __root); - unsigned int __right = __init_winner (2 * __root + 1); - if (!_M_comp(_M_losers[__right]._M_key, _M_losers[__left]._M_key)) - { - // Left one is less or equal. - _M_losers[__root] = _M_losers[__right]; - return __left; - } - else - { - // Right one is less. - _M_losers[__root] = _M_losers[__left]; - return __right; - } + if (__root >= _M_k) + return __root; + else + { + unsigned int __left = __init_winner(2 * __root); + unsigned int __right = __init_winner(2 * __root + 1); + if (!_M_comp(_M_losers[__right]._M_key, + _M_losers[__left]._M_key)) + { + // Left one is less or equal. + _M_losers[__root] = _M_losers[__right]; + return __left; + } + else + { + // Right one is less. + _M_losers[__root] = _M_losers[__left]; + return __right; + } + } } - } - inline void - __init() - { - _M_losers[0] = _M_losers[__init_winner(1)]; + void + __init() + { + _M_losers[0] = _M_losers[__init_winner(1)]; #if _GLIBCXX_ASSERTIONS - // no dummy sequence can ever be at the top at the beginning (0 sequences!) - _GLIBCXX_PARALLEL_ASSERT(_M_losers[0]._M_source != -1); + // no dummy sequence can ever be at the top at the beginning + // (0 sequences!) + _GLIBCXX_PARALLEL_ASSERT(_M_losers[0]._M_source != -1); #endif - } + } - // Do not pass a const reference since _M_key will be used as local variable. - inline void - __delete_min_insert(_Tp _M_key, bool) - { + // Do not pass a const reference since __key will be used as + // local variable. + void + __delete_min_insert(_Tp __key, bool) + { #if _GLIBCXX_ASSERTIONS - // no dummy sequence can ever be at the top! - _GLIBCXX_PARALLEL_ASSERT(_M_losers[0]._M_source != -1); + // no dummy sequence can ever be at the top! + _GLIBCXX_PARALLEL_ASSERT(_M_losers[0]._M_source != -1); #endif - int _M_source = _M_losers[0]._M_source; - for (unsigned int __pos = (_M_k + _M_source) / 2; __pos > 0; __pos /= 2) - { - // The smaller one gets promoted, ties are broken by _M_source. - if (_M_comp(_M_losers[__pos]._M_key, _M_key) - || (!_M_comp(_M_key, _M_losers[__pos]._M_key) - && _M_losers[__pos]._M_source < _M_source)) - { - // The other one is smaller. - std::swap(_M_losers[__pos]._M_source, _M_source); - std::swap(_M_losers[__pos]._M_key, _M_key); - } + int __source = _M_losers[0]._M_source; + for (unsigned int __pos = (_M_k + __source) / 2; __pos > 0; + __pos /= 2) + { + // The smaller one gets promoted, ties are broken by _M_source. + if (_M_comp(_M_losers[__pos]._M_key, __key) + || (!_M_comp(__key, _M_losers[__pos]._M_key) + && _M_losers[__pos]._M_source < __source)) + { + // The other one is smaller. + std::swap(_M_losers[__pos]._M_source, __source); + std::swap(_M_losers[__pos]._M_key, __key); + } + } + + _M_losers[0]._M_source = __source; + _M_losers[0]._M_key = __key; } + }; - _M_losers[0]._M_source = _M_source; - _M_losers[0]._M_key = _M_key; - } -}; - -/** - * @brief Non-Stable implementation of unguarded _LoserTree. - * - * Stable implementation is above. - */ -template<typename _Tp, typename _Compare> -class _LoserTreeUnguarded</* __stable == */false, _Tp, _Compare> : - public _LoserTreeUnguardedBase<_Tp, _Compare> -{ - typedef _LoserTreeUnguardedBase<_Tp, _Compare> Base; - using Base::_M_k; - using Base::_M_losers; - -public: - _LoserTreeUnguarded(unsigned int __k, const _Tp _sentinel, - _Compare __comp = std::less<_Tp>()) - : Base::_LoserTreeUnguardedBase(__k, _sentinel, __comp) - {} - - unsigned int - __init_winner (unsigned int __root) - { - if (__root >= _M_k) - { - return __root; - } - else + /** + * @brief Non-Stable implementation of unguarded _LoserTree. + * + * Stable implementation is above. + */ + template<typename _Tp, typename _Compare> + class _LoserTreeUnguarded</* __stable == */false, _Tp, _Compare> + : public _LoserTreeUnguardedBase<_Tp, _Compare> + { + typedef _LoserTreeUnguardedBase<_Tp, _Compare> _Base; + using _Base::_M_k; + using _Base::_M_losers; + + public: + _LoserTreeUnguarded(unsigned int __k, const _Tp __sentinel, + _Compare __comp = std::less<_Tp>()) + : _Base::_LoserTreeUnguardedBase(__k, __sentinel, __comp) + { } + + unsigned int + __init_winner(unsigned int __root) { - unsigned int __left = __init_winner (2 * __root); - unsigned int __right = __init_winner (2 * __root + 1); + if (__root >= _M_k) + return __root; + else + { + unsigned int __left = __init_winner(2 * __root); + unsigned int __right = __init_winner(2 * __root + 1); #if _GLIBCXX_ASSERTIONS - // If __left one is sentinel then __right one must be, too. - if (_M_losers[__left]._M_source == -1) - _GLIBCXX_PARALLEL_ASSERT(_M_losers[__right]._M_source == -1); + // If __left one is sentinel then __right one must be, too. + if (_M_losers[__left]._M_source == -1) + _GLIBCXX_PARALLEL_ASSERT(_M_losers[__right]._M_source == -1); #endif - if (!_M_comp(_M_losers[__right]._M_key, _M_losers[__left]._M_key)) - { - // Left one is less or equal. - _M_losers[__root] = _M_losers[__right]; - return __left; - } - else - { - // Right one is less. - _M_losers[__root] = _M_losers[__left]; - return __right; - } + if (!_M_comp(_M_losers[__right]._M_key, + _M_losers[__left]._M_key)) + { + // Left one is less or equal. + _M_losers[__root] = _M_losers[__right]; + return __left; + } + else + { + // Right one is less. + _M_losers[__root] = _M_losers[__left]; + return __right; + } + } } - } - inline void - __init() - { - _M_losers[0] = _M_losers[__init_winner(1)]; + void + __init() + { + _M_losers[0] = _M_losers[__init_winner(1)]; #if _GLIBCXX_ASSERTIONS - // no dummy sequence can ever be at the top at the beginning (0 sequences!) - _GLIBCXX_PARALLEL_ASSERT(_M_losers[0]._M_source != -1); + // no dummy sequence can ever be at the top at the beginning + // (0 sequences!) + _GLIBCXX_PARALLEL_ASSERT(_M_losers[0]._M_source != -1); #endif - } + } - // Do not pass a const reference since _M_key will be used as local variable. - inline void - __delete_min_insert(_Tp _M_key, bool) - { + // Do not pass a const reference since __key will be used as + // local variable. + void + __delete_min_insert(_Tp __key, bool) + { #if _GLIBCXX_ASSERTIONS - // no dummy sequence can ever be at the top! - _GLIBCXX_PARALLEL_ASSERT(_M_losers[0]._M_source != -1); + // no dummy sequence can ever be at the top! + _GLIBCXX_PARALLEL_ASSERT(_M_losers[0]._M_source != -1); #endif - int _M_source = _M_losers[0]._M_source; - for (unsigned int __pos = (_M_k + _M_source) / 2; __pos > 0; __pos /= 2) - { - // The smaller one gets promoted. - if (_M_comp(_M_losers[__pos]._M_key, _M_key)) - { - // The other one is smaller. - std::swap(_M_losers[__pos]._M_source, _M_source); - std::swap(_M_losers[__pos]._M_key, _M_key); - } + int __source = _M_losers[0]._M_source; + for (unsigned int __pos = (_M_k + __source) / 2; __pos > 0; + __pos /= 2) + { + // The smaller one gets promoted. + if (_M_comp(_M_losers[__pos]._M_key, __key)) + { + // The other one is smaller. + std::swap(_M_losers[__pos]._M_source, __source); + std::swap(_M_losers[__pos]._M_key, __key); + } + } + + _M_losers[0]._M_source = __source; + _M_losers[0]._M_key = __key; } + }; + + /** @brief Unguarded loser tree, keeping only pointers to the + * elements in the tree structure. + * + * No guarding is done, therefore not a single input sequence must + * run empty. This is a very fast variant. + */ + template<typename _Tp, typename _Compare> + class _LoserTreePointerUnguardedBase + { + protected: + struct _Loser + { + int _M_source; + const _Tp* _M_keyp; + }; - _M_losers[0]._M_source = _M_source; - _M_losers[0]._M_key = _M_key; - } -}; + unsigned int _M_ik, _M_k, _M_offset; + _Loser* _M_losers; + _Compare _M_comp; -/** @brief Unguarded loser tree, keeping only pointers to the -* elements in the tree structure. -* -* No guarding is done, therefore not a single input sequence must -* run empty. This is a very fast variant. -*/ -template<typename _Tp, typename _Compare> -class LoserTreePointerUnguardedBase -{ -protected: - struct _Loser - { - int _M_source; - const _Tp* _M_keyp; - }; - - unsigned int _M_ik, _M_k, _M_offset; - _Loser* _M_losers; - _Compare _M_comp; - -public: - - inline - LoserTreePointerUnguardedBase(unsigned int __k, const _Tp& _sentinel, - _Compare __comp = std::less<_Tp>()) - : _M_comp(__comp) - { - _M_ik = __k; - - // Next greater power of 2. - _M_k = 1 << (__rd_log2(_M_ik - 1) + 1); - _M_offset = _M_k; - // Avoid default-constructing _M_losers[]._M_key - _M_losers = new _Loser[2 * _M_k]; - - for (unsigned int __i = _M_k + _M_ik - 1; __i < (2 * _M_k); ++__i) + public: + + _LoserTreePointerUnguardedBase(unsigned int __k, const _Tp& __sentinel, + _Compare __comp = std::less<_Tp>()) + : _M_comp(__comp) { - _M_losers[__i]._M_keyp = &_sentinel; - _M_losers[__i]._M_source = -1; + _M_ik = __k; + + // Next greater power of 2. + _M_k = 1 << (__rd_log2(_M_ik - 1) + 1); + _M_offset = _M_k; + // Avoid default-constructing _M_losers[]._M_key + _M_losers = new _Loser[2 * _M_k]; + + for (unsigned int __i = _M_k + _M_ik - 1; __i < (2 * _M_k); ++__i) + { + _M_losers[__i]._M_keyp = &__sentinel; + _M_losers[__i]._M_source = -1; + } } - } - inline ~LoserTreePointerUnguardedBase() - { delete[] _M_losers; } + ~_LoserTreePointerUnguardedBase() + { delete[] _M_losers; } - inline int - __get_min_source() - { + int + __get_min_source() + { #if _GLIBCXX_ASSERTIONS - // no dummy sequence can ever be at the top! - _GLIBCXX_PARALLEL_ASSERT(_M_losers[0]._M_source != -1); + // no dummy sequence can ever be at the top! + _GLIBCXX_PARALLEL_ASSERT(_M_losers[0]._M_source != -1); #endif - return _M_losers[0]._M_source; - } - - inline void - __insert_start(const _Tp& _M_key, int _M_source, bool) - { - unsigned int __pos = _M_k + _M_source; - - _M_losers[__pos]._M_keyp = &_M_key; - _M_losers[__pos]._M_source = _M_source; - } -}; - -/** - * @brief Stable unguarded _LoserTree variant storing pointers. - * - * Unstable variant is implemented below using partial specialization. - */ -template<bool __stable/* default == true */, typename _Tp, typename _Compare> -class _LoserTreePointerUnguarded : - public LoserTreePointerUnguardedBase<_Tp, _Compare> -{ - typedef LoserTreePointerUnguardedBase<_Tp, _Compare> Base; - using Base::_M_k; - using Base::_M_losers; - -public: - _LoserTreePointerUnguarded(unsigned int __k, const _Tp& _sentinel, - _Compare __comp = std::less<_Tp>()) - : Base::LoserTreePointerUnguardedBase(__k, _sentinel, __comp) - {} - - unsigned int - __init_winner(unsigned int __root) - { - if (__root >= _M_k) + return _M_losers[0]._M_source; + } + + void + __insert_start(const _Tp& __key, int __source, bool) { - return __root; + unsigned int __pos = _M_k + __source; + + _M_losers[__pos]._M_keyp = &__key; + _M_losers[__pos]._M_source = __source; } - else + }; + + /** + * @brief Stable unguarded _LoserTree variant storing pointers. + * + * Unstable variant is implemented below using partial specialization. + */ + template<bool __stable/* default == true */, typename _Tp, typename _Compare> + class _LoserTreePointerUnguarded + : public _LoserTreePointerUnguardedBase<_Tp, _Compare> + { + typedef _LoserTreePointerUnguardedBase<_Tp, _Compare> _Base; + using _Base::_M_k; + using _Base::_M_losers; + + public: + _LoserTreePointerUnguarded(unsigned int __k, const _Tp& __sentinel, + _Compare __comp = std::less<_Tp>()) + : _Base::_LoserTreePointerUnguardedBase(__k, __sentinel, __comp) + { } + + unsigned int + __init_winner(unsigned int __root) { - unsigned int __left = __init_winner (2 * __root); - unsigned int __right = __init_winner (2 * __root + 1); - if (!_M_comp(*_M_losers[__right]._M_keyp, *_M_losers[__left]._M_keyp)) - { - // Left one is less or equal. - _M_losers[__root] = _M_losers[__right]; - return __left; - } - else - { - // Right one is less. - _M_losers[__root] = _M_losers[__left]; - return __right; - } + if (__root >= _M_k) + return __root; + else + { + unsigned int __left = __init_winner(2 * __root); + unsigned int __right = __init_winner(2 * __root + 1); + if (!_M_comp(*_M_losers[__right]._M_keyp, + *_M_losers[__left]._M_keyp)) + { + // Left one is less or equal. + _M_losers[__root] = _M_losers[__right]; + return __left; + } + else + { + // Right one is less. + _M_losers[__root] = _M_losers[__left]; + return __right; + } + } } - } - inline void - __init() - { - _M_losers[0] = _M_losers[__init_winner(1)]; + void + __init() + { + _M_losers[0] = _M_losers[__init_winner(1)]; #if _GLIBCXX_ASSERTIONS - // no dummy sequence can ever be at the top at the beginning (0 sequences!) - _GLIBCXX_PARALLEL_ASSERT(_M_losers[0]._M_source != -1); + // no dummy sequence can ever be at the top at the beginning + // (0 sequences!) + _GLIBCXX_PARALLEL_ASSERT(_M_losers[0]._M_source != -1); #endif - } + } - inline void - __delete_min_insert(const _Tp& _M_key, bool _M_sup) - { + void + __delete_min_insert(const _Tp& __key, bool __sup) + { #if _GLIBCXX_ASSERTIONS - // no dummy sequence can ever be at the top! - _GLIBCXX_PARALLEL_ASSERT(_M_losers[0]._M_source != -1); + // no dummy sequence can ever be at the top! + _GLIBCXX_PARALLEL_ASSERT(_M_losers[0]._M_source != -1); #endif - const _Tp* _M_keyp = &_M_key; - int _M_source = _M_losers[0]._M_source; - for (unsigned int __pos = (_M_k + _M_source) / 2; __pos > 0; __pos /= 2) - { - // The smaller one gets promoted, ties are broken by _M_source. - if (_M_comp(*_M_losers[__pos]._M_keyp, *_M_keyp) - || (!_M_comp(*_M_keyp, *_M_losers[__pos]._M_keyp) - && _M_losers[__pos]._M_source < _M_source)) - { - // The other one is smaller. - std::swap(_M_losers[__pos]._M_source, _M_source); - std::swap(_M_losers[__pos]._M_keyp, _M_keyp); - } + const _Tp* __keyp = &__key; + int __source = _M_losers[0]._M_source; + for (unsigned int __pos = (_M_k + __source) / 2; __pos > 0; + __pos /= 2) + { + // The smaller one gets promoted, ties are broken by _M_source. + if (_M_comp(*_M_losers[__pos]._M_keyp, *__keyp) + || (!_M_comp(*__keyp, *_M_losers[__pos]._M_keyp) + && _M_losers[__pos]._M_source < __source)) + { + // The other one is smaller. + std::swap(_M_losers[__pos]._M_source, __source); + std::swap(_M_losers[__pos]._M_keyp, __keyp); + } + } + + _M_losers[0]._M_source = __source; + _M_losers[0]._M_keyp = __keyp; } + }; - _M_losers[0]._M_source = _M_source; - _M_losers[0]._M_keyp = _M_keyp; - } -}; - -/** - * @brief Unstable unguarded _LoserTree variant storing pointers. - * - * Stable variant is above. - */ -template<typename _Tp, typename _Compare> -class _LoserTreePointerUnguarded</* __stable == */false, _Tp, _Compare> : - public LoserTreePointerUnguardedBase<_Tp, _Compare> -{ - typedef LoserTreePointerUnguardedBase<_Tp, _Compare> Base; - using Base::_M_k; - using Base::_M_losers; - -public: - _LoserTreePointerUnguarded(unsigned int __k, const _Tp& _sentinel, - _Compare __comp = std::less<_Tp>()) - : Base::LoserTreePointerUnguardedBase(__k, _sentinel, __comp) - {} - - unsigned int - __init_winner(unsigned int __root) - { - if (__root >= _M_k) - { - return __root; - } - else + /** + * @brief Unstable unguarded _LoserTree variant storing pointers. + * + * Stable variant is above. + */ + template<typename _Tp, typename _Compare> + class _LoserTreePointerUnguarded</* __stable == */false, _Tp, _Compare> + : public _LoserTreePointerUnguardedBase<_Tp, _Compare> + { + typedef _LoserTreePointerUnguardedBase<_Tp, _Compare> _Base; + using _Base::_M_k; + using _Base::_M_losers; + + public: + _LoserTreePointerUnguarded(unsigned int __k, const _Tp& __sentinel, + _Compare __comp = std::less<_Tp>()) + : _Base::_LoserTreePointerUnguardedBase(__k, __sentinel, __comp) + { } + + unsigned int + __init_winner(unsigned int __root) { - unsigned int __left = __init_winner (2 * __root); - unsigned int __right = __init_winner (2 * __root + 1); + if (__root >= _M_k) + return __root; + else + { + unsigned int __left = __init_winner(2 * __root); + unsigned int __right = __init_winner(2 * __root + 1); #if _GLIBCXX_ASSERTIONS - // If __left one is sentinel then __right one must be, too. - if (_M_losers[__left]._M_source == -1) - _GLIBCXX_PARALLEL_ASSERT(_M_losers[__right]._M_source == -1); + // If __left one is sentinel then __right one must be, too. + if (_M_losers[__left]._M_source == -1) + _GLIBCXX_PARALLEL_ASSERT(_M_losers[__right]._M_source == -1); #endif - if (!_M_comp(*_M_losers[__right]._M_keyp, *_M_losers[__left]._M_keyp)) - { - // Left one is less or equal. - _M_losers[__root] = _M_losers[__right]; - return __left; - } - else - { - // Right one is less. - _M_losers[__root] = _M_losers[__left]; - return __right; - } + if (!_M_comp(*_M_losers[__right]._M_keyp, + *_M_losers[__left]._M_keyp)) + { + // Left one is less or equal. + _M_losers[__root] = _M_losers[__right]; + return __left; + } + else + { + // Right one is less. + _M_losers[__root] = _M_losers[__left]; + return __right; + } + } } - } - inline void - __init() - { - _M_losers[0] = _M_losers[__init_winner(1)]; + void + __init() + { + _M_losers[0] = _M_losers[__init_winner(1)]; #if _GLIBCXX_ASSERTIONS - // no dummy sequence can ever be at the top at the beginning (0 sequences!) - _GLIBCXX_PARALLEL_ASSERT(_M_losers[0]._M_source != -1); + // no dummy sequence can ever be at the top at the beginning + // (0 sequences!) + _GLIBCXX_PARALLEL_ASSERT(_M_losers[0]._M_source != -1); #endif - } + } - inline void - __delete_min_insert(const _Tp& _M_key, bool _M_sup) - { + void + __delete_min_insert(const _Tp& __key, bool __sup) + { #if _GLIBCXX_ASSERTIONS - // no dummy sequence can ever be at the top! - _GLIBCXX_PARALLEL_ASSERT(_M_losers[0]._M_source != -1); + // no dummy sequence can ever be at the top! + _GLIBCXX_PARALLEL_ASSERT(_M_losers[0]._M_source != -1); #endif - const _Tp* _M_keyp = &_M_key; - int _M_source = _M_losers[0]._M_source; - for (unsigned int __pos = (_M_k + _M_source) / 2; __pos > 0; __pos /= 2) - { - // The smaller one gets promoted. - if (_M_comp(*(_M_losers[__pos]._M_keyp), *_M_keyp)) - { - // The other one is smaller. - std::swap(_M_losers[__pos]._M_source, _M_source); - std::swap(_M_losers[__pos]._M_keyp, _M_keyp); - } + const _Tp* __keyp = &__key; + int __source = _M_losers[0]._M_source; + for (unsigned int __pos = (_M_k + __source) / 2; __pos > 0; + __pos /= 2) + { + // The smaller one gets promoted. + if (_M_comp(*(_M_losers[__pos]._M_keyp), *__keyp)) + { + // The other one is smaller. + std::swap(_M_losers[__pos]._M_source, __source); + std::swap(_M_losers[__pos]._M_keyp, __keyp); + } + } + + _M_losers[0]._M_source = __source; + _M_losers[0]._M_keyp = __keyp; } - - _M_losers[0]._M_source = _M_source; - _M_losers[0]._M_keyp = _M_keyp; - } -}; - + }; } // namespace __gnu_parallel #endif /* _GLIBCXX_PARALLEL_LOSERTREE_H */ diff --git a/libstdc++-v3/include/parallel/merge.h b/libstdc++-v3/include/parallel/merge.h index c323c629893..2343d7326e8 100644 --- a/libstdc++-v3/include/parallel/merge.h +++ b/libstdc++-v3/include/parallel/merge.h @@ -54,11 +54,10 @@ namespace __gnu_parallel typename _OutputIterator, typename _DifferenceTp, typename _Compare> _OutputIterator - __merge_advance_usual(_RAIter1& __begin1, - _RAIter1 __end1, - _RAIter2& __begin2, - _RAIter2 __end2, _OutputIterator __target, - _DifferenceTp __max_length, _Compare __comp) + __merge_advance_usual(_RAIter1& __begin1, _RAIter1 __end1, + _RAIter2& __begin2, _RAIter2 __end2, + _OutputIterator __target, + _DifferenceTp __max_length, _Compare __comp) { typedef _DifferenceTp _DifferenceType; while (__begin1 != __end1 && __begin2 != __end2 && __max_length > 0) @@ -103,12 +102,10 @@ namespace __gnu_parallel typename _OutputIterator, typename _DifferenceTp, typename _Compare> _OutputIterator - __merge_advance_movc(_RAIter1& __begin1, - _RAIter1 __end1, - _RAIter2& __begin2, - _RAIter2 __end2, - _OutputIterator __target, - _DifferenceTp __max_length, _Compare __comp) + __merge_advance_movc(_RAIter1& __begin1, _RAIter1 __end1, + _RAIter2& __begin2, _RAIter2 __end2, + _OutputIterator __target, + _DifferenceTp __max_length, _Compare __comp) { typedef _DifferenceTp _DifferenceType; typedef typename std::iterator_traits<_RAIter1>::value_type @@ -172,14 +169,14 @@ namespace __gnu_parallel typename _Compare> inline _OutputIterator __merge_advance(_RAIter1& __begin1, _RAIter1 __end1, - _RAIter2& __begin2, _RAIter2 __end2, - _OutputIterator __target, _DifferenceTp __max_length, - _Compare __comp) + _RAIter2& __begin2, _RAIter2 __end2, + _OutputIterator __target, _DifferenceTp __max_length, + _Compare __comp) { _GLIBCXX_CALL(__max_length) - return __merge_advance_movc(__begin1, __end1, __begin2, __end2, __target, - __max_length, __comp); + return __merge_advance_movc(__begin1, __end1, __begin2, __end2, + __target, __max_length, __comp); } /** @brief Merge routine fallback to sequential in case the @@ -195,17 +192,15 @@ namespace __gnu_parallel template<typename _RAIter1, typename _RAIter2, typename _RAIter3, typename _Compare> inline _RAIter3 - __parallel_merge_advance(_RAIter1& __begin1, - _RAIter1 __end1, - _RAIter2& __begin2, - // different iterators, parallel implementation - // not available - _RAIter2 __end2, - _RAIter3 __target, typename - std::iterator_traits<_RAIter1>:: - difference_type __max_length, _Compare __comp) + __parallel_merge_advance(_RAIter1& __begin1, _RAIter1 __end1, + _RAIter2& __begin2, + // different iterators, parallel implementation + // not available + _RAIter2 __end2, _RAIter3 __target, typename + std::iterator_traits<_RAIter1>:: + difference_type __max_length, _Compare __comp) { return __merge_advance(__begin1, __end1, __begin2, __end2, __target, - __max_length, __comp); } + __max_length, __comp); } /** @brief Parallel merge routine being able to merge only the @__c * __max_length smallest elements. @@ -225,13 +220,11 @@ namespace __gnu_parallel template<typename _RAIter1, typename _RAIter3, typename _Compare> inline _RAIter3 - __parallel_merge_advance(_RAIter1& __begin1, - _RAIter1 __end1, - _RAIter1& __begin2, - _RAIter1 __end2, - _RAIter3 __target, typename - std::iterator_traits<_RAIter1>:: - difference_type __max_length, _Compare __comp) + __parallel_merge_advance(_RAIter1& __begin1, _RAIter1 __end1, + _RAIter1& __begin2, _RAIter1 __end2, + _RAIter3 __target, typename + std::iterator_traits<_RAIter1>:: + difference_type __max_length, _Compare __comp) { typedef typename std::iterator_traits<_RAIter1>::value_type _ValueType; @@ -242,17 +235,14 @@ namespace __gnu_parallel typedef typename std::pair<_RAIter1, _RAIter1> _IteratorPair; - _IteratorPair - seqs[2] = { std::make_pair(__begin1, __end1), - std::make_pair(__begin2, __end2) }; - _RAIter3 - __target_end = parallel_multiway_merge - < /* __stable = */ true, /* __sentinels = */ false>( - seqs, seqs + 2, __target, - multiway_merge_exact_splitting - < /* __stable = */ true, _IteratorPair*, - _Compare, _DifferenceType1>, - __max_length, __comp, omp_get_max_threads()); + _IteratorPair __seqs[2] = { std::make_pair(__begin1, __end1), + std::make_pair(__begin2, __end2) }; + _RAIter3 __target_end = parallel_multiway_merge + < /* __stable = */ true, /* __sentinels = */ false> + (__seqs, __seqs + 2, __target, multiway_merge_exact_splitting + < /* __stable = */ true, _IteratorPair*, + _Compare, _DifferenceType1>, __max_length, __comp, + omp_get_max_threads()); return __target_end; } diff --git a/libstdc++-v3/include/parallel/multiseq_selection.h b/libstdc++-v3/include/parallel/multiseq_selection.h index ac06385b44b..e434eee624c 100644 --- a/libstdc++-v3/include/parallel/multiseq_selection.h +++ b/libstdc++-v3/include/parallel/multiseq_selection.h @@ -53,8 +53,8 @@ namespace __gnu_parallel /** @brief Compare __a pair of types lexicographically, ascending. */ template<typename _T1, typename _T2, typename _Compare> class _Lexicographic - : public std::binary_function< - std::pair<_T1, _T2>, std::pair<_T1, _T2>, bool> + : public std::binary_function<std::pair<_T1, _T2>, + std::pair<_T1, _T2>, bool> { private: _Compare& _M_comp; @@ -142,19 +142,19 @@ namespace __gnu_parallel // Number of sequences, number of elements in total (possibly // including padding). - _DifferenceType __m = std::distance(__begin_seqs, __end_seqs), __N = 0, + _DifferenceType __m = std::distance(__begin_seqs, __end_seqs), __nn = 0, __nmax, __n, __r; for (int __i = 0; __i < __m; __i++) { - __N += std::distance(__begin_seqs[__i].first, + __nn += std::distance(__begin_seqs[__i].first, __begin_seqs[__i].second); _GLIBCXX_PARALLEL_ASSERT( std::distance(__begin_seqs[__i].first, __begin_seqs[__i].second) > 0); } - if (__rank == __N) + if (__rank == __nn) { for (int __i = 0; __i < __m; __i++) __begin_offsets[__i] = __begin_seqs[__i].second; // Very end. @@ -163,9 +163,9 @@ namespace __gnu_parallel } _GLIBCXX_PARALLEL_ASSERT(__m != 0); - _GLIBCXX_PARALLEL_ASSERT(__N != 0); + _GLIBCXX_PARALLEL_ASSERT(__nn != 0); _GLIBCXX_PARALLEL_ASSERT(__rank >= 0); - _GLIBCXX_PARALLEL_ASSERT(__rank < __N); + _GLIBCXX_PARALLEL_ASSERT(__rank < __nn); _DifferenceType* __ns = new _DifferenceType[__m]; _DifferenceType* __a = new _DifferenceType[__m]; @@ -401,14 +401,14 @@ namespace __gnu_parallel // Number of sequences, number of elements in total (possibly // including padding). _DifferenceType __m = std::distance(__begin_seqs, __end_seqs); - _DifferenceType __N = 0; + _DifferenceType __nn = 0; _DifferenceType __nmax, __n, __r; for (int __i = 0; __i < __m; __i++) - __N += std::distance(__begin_seqs[__i].first, - __begin_seqs[__i].second); + __nn += std::distance(__begin_seqs[__i].first, + __begin_seqs[__i].second); - if (__m == 0 || __N == 0 || __rank < 0 || __rank >= __N) + if (__m == 0 || __nn == 0 || __rank < 0 || __rank >= __nn) { // result undefined if there is no data or __rank is outside bounds throw std::exception(); @@ -433,7 +433,7 @@ namespace __gnu_parallel // Pad all lists to this length, at least as long as any ns[__i], // equality iff __nmax = 2^__k - 1 - __l = pow2(__r) - 1; + __l = __round_up_to_pow2(__r) - 1; for (int __i = 0; __i < __m; ++__i) { diff --git a/libstdc++-v3/include/parallel/multiway_merge.h b/libstdc++-v3/include/parallel/multiway_merge.h index 310a07a766c..4238a1c6923 100644 --- a/libstdc++-v3/include/parallel/multiway_merge.h +++ b/libstdc++-v3/include/parallel/multiway_merge.h @@ -54,2113 +54,1998 @@ namespace __gnu_parallel { + /** @brief _Iterator wrapper supporting an implicit supremum at the end + * of the sequence, dominating all comparisons. + * + * The implicit supremum comes with a performance cost. + * + * Deriving from _RAIter is not possible since + * _RAIter need not be a class. + */ + template<typename _RAIter, typename _Compare> + class _GuardedIterator + { + private: + /** @brief Current iterator __position. */ + _RAIter _M_current; + + /** @brief End iterator of the sequence. */ + _RAIter _M_end; + + /** @brief _Compare. */ + _Compare& __comp; + + public: + /** @brief Constructor. Sets iterator to beginning of sequence. + * @param __begin Begin iterator of sequence. + * @param __end End iterator of sequence. + * @param __comp Comparator provided for associated overloaded + * compare operators. */ + _GuardedIterator(_RAIter __begin, _RAIter __end, _Compare& __comp) + : _M_current(__begin), _M_end(__end), __comp(__comp) + { } + + /** @brief Pre-increment operator. + * @return This. */ + _GuardedIterator<_RAIter, _Compare>& + operator++() + { + ++_M_current; + return *this; + } -// Announce guarded and unguarded iterator. - -template<typename _RAIter, typename _Compare> - class _GuardedIterator; - -// Making the arguments const references seems to dangerous, -// the user-defined comparator might not be const. -template<typename _RAIter, typename _Compare> - inline bool - operator<(_GuardedIterator<_RAIter, _Compare>& __bi1, - _GuardedIterator<_RAIter, _Compare>& __bi2); - -template<typename _RAIter, typename _Compare> - inline bool - operator<=(_GuardedIterator<_RAIter, _Compare>& __bi1, - _GuardedIterator<_RAIter, _Compare>& __bi2); - -/** @brief _Iterator wrapper supporting an implicit supremum at the end - * of the sequence, dominating all comparisons. - * - * The implicit supremum comes with a performance cost. - * - * Deriving from _RAIter is not possible since - * _RAIter need not be a class. - */ -template<typename _RAIter, typename _Compare> - class _GuardedIterator - { - private: - /** @brief Current iterator __position. */ - _RAIter _M_current; - - /** @brief End iterator of the sequence. */ - _RAIter _M_end; - - /** @brief _Compare. */ - _Compare& __comp; - - public: - /** @brief Constructor. Sets iterator to beginning of sequence. - * @param __begin Begin iterator of sequence. - * @param _M_end End iterator of sequence. - * @param __comp Comparator provided for associated overloaded - * compare operators. */ - _GuardedIterator(_RAIter __begin, - _RAIter _M_end, _Compare& __comp) - : _M_current(__begin), _M_end(_M_end), __comp(__comp) - { } - - /** @brief Pre-increment operator. - * @return This. */ - _GuardedIterator<_RAIter, _Compare>& - operator++() + /** @brief Dereference operator. + * @return Referenced element. */ + typename std::iterator_traits<_RAIter>::value_type& + operator*() + { return *_M_current; } + + /** @brief Convert to wrapped iterator. + * @return Wrapped iterator. */ + operator _RAIter() + { return _M_current; } + + /** @brief Compare two elements referenced by guarded iterators. + * @param __bi1 First iterator. + * @param __bi2 Second iterator. + * @return @__c true if less. */ + friend bool + operator<(_GuardedIterator<_RAIter, _Compare>& __bi1, + _GuardedIterator<_RAIter, _Compare>& __bi2) + { + if (__bi1._M_current == __bi1._M_end) // __bi1 is sup + return __bi2._M_current == __bi2._M_end; // __bi2 is not sup + if (__bi2._M_current == __bi2._M_end) // __bi2 is sup + return true; + return (__bi1.__comp)(*__bi1, *__bi2); // normal compare + } + + /** @brief Compare two elements referenced by guarded iterators. + * @param __bi1 First iterator. + * @param __bi2 Second iterator. + * @return @__c True if less equal. */ + friend bool + operator<=(_GuardedIterator<_RAIter, _Compare>& __bi1, + _GuardedIterator<_RAIter, _Compare>& __bi2) + { + if (__bi2._M_current == __bi2._M_end) // __bi1 is sup + return __bi1._M_current != __bi1._M_end; // __bi2 is not sup + if (__bi1._M_current == __bi1._M_end) // __bi2 is sup + return false; + return !(__bi1.__comp)(*__bi2, *__bi1); // normal compare + } + }; + + template<typename _RAIter, typename _Compare> + class _UnguardedIterator { - ++_M_current; - return *this; - } + private: + /** @brief Current iterator __position. */ + _RAIter _M_current; + /** @brief _Compare. */ + mutable _Compare& __comp; + + public: + /** @brief Constructor. Sets iterator to beginning of sequence. + * @param __begin Begin iterator of sequence. + * @param __end Unused, only for compatibility. + * @param __comp Unused, only for compatibility. */ + _UnguardedIterator(_RAIter __begin, + _RAIter /* __end */, _Compare& __comp) + : _M_current(__begin), __comp(__comp) + { } + + /** @brief Pre-increment operator. + * @return This. */ + _UnguardedIterator<_RAIter, _Compare>& + operator++() + { + ++_M_current; + return *this; + } - /** @brief Dereference operator. - * @return Referenced element. */ - typename std::iterator_traits<_RAIter>::value_type& - operator*() - { return *_M_current; } - - /** @brief Convert to wrapped iterator. - * @return Wrapped iterator. */ - operator _RAIter() - { return _M_current; } - - friend bool - operator< <_RAIter, _Compare>( - _GuardedIterator<_RAIter, _Compare>& __bi1, - _GuardedIterator<_RAIter, _Compare>& __bi2); - - friend bool - operator<= <_RAIter, _Compare>( - _GuardedIterator<_RAIter, _Compare>& __bi1, - _GuardedIterator<_RAIter, _Compare>& __bi2); - }; - -/** @brief Compare two elements referenced by guarded iterators. - * @param __bi1 First iterator. - * @param __bi2 Second iterator. - * @return @__c true if less. */ -template<typename _RAIter, typename _Compare> - inline bool - operator<(_GuardedIterator<_RAIter, _Compare>& __bi1, - _GuardedIterator<_RAIter, _Compare>& __bi2) - { - if (__bi1._M_current == __bi1._M_end) //__bi1 is sup - return __bi2._M_current == __bi2._M_end; //__bi2 is not sup - if (__bi2._M_current == __bi2._M_end) //__bi2 is sup - return true; - return (__bi1.__comp)(*__bi1, *__bi2); //normal compare - } - -/** @brief Compare two elements referenced by guarded iterators. - * @param __bi1 First iterator. - * @param __bi2 Second iterator. - * @return @__c True if less equal. */ -template<typename _RAIter, typename _Compare> - inline bool - operator<=(_GuardedIterator<_RAIter, _Compare>& __bi1, - _GuardedIterator<_RAIter, _Compare>& __bi2) - { - if (__bi2._M_current == __bi2._M_end) //__bi1 is sup - return __bi1._M_current != __bi1._M_end; //__bi2 is not sup - if (__bi1._M_current == __bi1._M_end) //__bi2 is sup - return false; - return !(__bi1.__comp)(*__bi2, *__bi1); //normal compare - } - -template<typename _RAIter, typename _Compare> - class _UnguardedIterator; - -template<typename _RAIter, typename _Compare> - inline bool - operator<(_UnguardedIterator<_RAIter, _Compare>& __bi1, - _UnguardedIterator<_RAIter, _Compare>& __bi2); - -template<typename _RAIter, typename _Compare> - inline bool - operator<=(_UnguardedIterator<_RAIter, _Compare>& __bi1, - _UnguardedIterator<_RAIter, _Compare>& __bi2); - -template<typename _RAIter, typename _Compare> - class _UnguardedIterator - { - private: - /** @brief Current iterator __position. */ - _RAIter _M_current; - /** @brief _Compare. */ - mutable _Compare& __comp; - - public: - /** @brief Constructor. Sets iterator to beginning of sequence. - * @param __begin Begin iterator of sequence. - * @param _M_end Unused, only for compatibility. - * @param __comp Unused, only for compatibility. */ - _UnguardedIterator(_RAIter __begin, - _RAIter _M_end, _Compare& __comp) - : _M_current(__begin), __comp(__comp) - { } - - /** @brief Pre-increment operator. - * @return This. */ - _UnguardedIterator<_RAIter, _Compare>& - operator++() + /** @brief Dereference operator. + * @return Referenced element. */ + typename std::iterator_traits<_RAIter>::value_type& + operator*() + { return *_M_current; } + + /** @brief Convert to wrapped iterator. + * @return Wrapped iterator. */ + operator _RAIter() + { return _M_current; } + + /** @brief Compare two elements referenced by unguarded iterators. + * @param __bi1 First iterator. + * @param __bi2 Second iterator. + * @return @__c true if less. */ + friend bool + operator<(_UnguardedIterator<_RAIter, _Compare>& __bi1, + _UnguardedIterator<_RAIter, _Compare>& __bi2) + { + // Normal compare. + return (__bi1.__comp)(*__bi1, *__bi2); + } + + /** @brief Compare two elements referenced by unguarded iterators. + * @param __bi1 First iterator. + * @param __bi2 Second iterator. + * @return @__c True if less equal. */ + friend bool + operator<=(_UnguardedIterator<_RAIter, _Compare>& __bi1, + _UnguardedIterator<_RAIter, _Compare>& __bi2) + { + // Normal compare. + return !(__bi1.__comp)(*__bi2, *__bi1); + } + }; + + /** @brief Highly efficient 3-way merging procedure. + * + * Merging is done with the algorithm implementation described by Peter + * Sanders. Basically, the idea is to minimize the number of necessary + * comparison after merging an element. The implementation trick + * that makes this fast is that the order of the sequences is stored + * in the instruction pointer (translated into labels in C++). + * + * This works well for merging up to 4 sequences. + * + * Note that making the merging stable does <em>not</em> come at a + * performance hit. + * + * Whether the merging is done guarded or unguarded is selected by the + * used iterator class. + * + * @param __seqs_begin Begin iterator of iterator pair input sequence. + * @param __seqs_end End iterator of iterator pair input sequence. + * @param __target Begin iterator of output sequence. + * @param __comp Comparator. + * @param __length Maximum length to merge, less equal than the + * total number of elements available. + * + * @return End iterator of output sequence. + */ + template<template<typename RAI, typename C> class iterator, + typename _RAIterIterator, + typename _RAIter3, + typename _DifferenceTp, + typename _Compare> + _RAIter3 + multiway_merge_3_variant(_RAIterIterator __seqs_begin, + _RAIterIterator __seqs_end, + _RAIter3 __target, + _DifferenceTp __length, _Compare __comp) { - ++_M_current; - return *this; - } + _GLIBCXX_CALL(__length); - /** @brief Dereference operator. - * @return Referenced element. */ - typename std::iterator_traits<_RAIter>::value_type& - operator*() - { return *_M_current; } - - /** @brief Convert to wrapped iterator. - * @return Wrapped iterator. */ - operator _RAIter() - { return _M_current; } - - friend bool - operator< <_RAIter, _Compare>( - _UnguardedIterator<_RAIter, _Compare>& __bi1, - _UnguardedIterator<_RAIter, _Compare>& __bi2); - - friend bool - operator<= <_RAIter, _Compare>( - _UnguardedIterator<_RAIter, _Compare>& __bi1, - _UnguardedIterator<_RAIter, _Compare>& __bi2); - }; - -/** @brief Compare two elements referenced by unguarded iterators. - * @param __bi1 First iterator. - * @param __bi2 Second iterator. - * @return @__c true if less. */ -template<typename _RAIter, typename _Compare> - inline bool - operator<(_UnguardedIterator<_RAIter, _Compare>& __bi1, - _UnguardedIterator<_RAIter, _Compare>& __bi2) - { - // Normal compare. - return (__bi1.__comp)(*__bi1, *__bi2); - } - -/** @brief Compare two elements referenced by unguarded iterators. - * @param __bi1 First iterator. - * @param __bi2 Second iterator. - * @return @__c True if less equal. */ -template<typename _RAIter, typename _Compare> - inline bool - operator<=(_UnguardedIterator<_RAIter, _Compare>& __bi1, - _UnguardedIterator<_RAIter, _Compare>& __bi2) - { - // Normal compare. - return !(__bi1.__comp)(*__bi2, *__bi1); - } - -/** @brief Highly efficient 3-way merging procedure. - * - * Merging is done with the algorithm implementation described by Peter - * Sanders. Basically, the idea is to minimize the number of necessary - * comparison after merging an element. The implementation trick - * that makes this fast is that the order of the sequences is stored - * in the instruction pointer (translated into labels in C++). - * - * This works well for merging up to 4 sequences. - * - * Note that making the merging stable does <em>not</em> come at a - * performance hit. - * - * Whether the merging is done guarded or unguarded is selected by the - * used iterator class. - * - * @param __seqs_begin Begin iterator of iterator pair input sequence. - * @param __seqs_end End iterator of iterator pair input sequence. - * @param __target Begin iterator of output sequence. - * @param __comp Comparator. - * @param __length Maximum length to merge, less equal than the - * total number of elements available. - * - * @return End iterator of output sequence. - */ -template<template<typename RAI, typename C> class iterator, - typename _RAIterIterator, - typename _RAIter3, - typename _DifferenceTp, - typename _Compare> - _RAIter3 - multiway_merge_3_variant( - _RAIterIterator __seqs_begin, - _RAIterIterator __seqs_end, - _RAIter3 __target, - _DifferenceTp __length, _Compare __comp) - { - _GLIBCXX_CALL(__length); - - typedef _DifferenceTp _DifferenceType; - - typedef typename std::iterator_traits<_RAIterIterator> - ::value_type::first_type - _RAIter1; - typedef typename std::iterator_traits<_RAIter1>::value_type - _ValueType; - - if (__length == 0) - return __target; + typedef _DifferenceTp _DifferenceType; + + typedef typename std::iterator_traits<_RAIterIterator> + ::value_type::first_type + _RAIter1; + typedef typename std::iterator_traits<_RAIter1>::value_type + _ValueType; + + if (__length == 0) + return __target; #if _GLIBCXX_ASSERTIONS - _DifferenceTp orig_length = __length; + _DifferenceTp __orig_length = __length; #endif - iterator<_RAIter1, _Compare> - __seq0(__seqs_begin[0].first, __seqs_begin[0].second, __comp), - __seq1(__seqs_begin[1].first, __seqs_begin[1].second, __comp), - __seq2(__seqs_begin[2].first, __seqs_begin[2].second, __comp); + iterator<_RAIter1, _Compare> + __seq0(__seqs_begin[0].first, __seqs_begin[0].second, __comp), + __seq1(__seqs_begin[1].first, __seqs_begin[1].second, __comp), + __seq2(__seqs_begin[2].first, __seqs_begin[2].second, __comp); - if (__seq0 <= __seq1) - { - if (__seq1 <= __seq2) - goto __s012; - else - if (__seq2 < __seq0) - goto __s201; + if (__seq0 <= __seq1) + { + if (__seq1 <= __seq2) + goto __s012; else - goto __s021; - } - else - { - if (__seq1 <= __seq2) - { - if (__seq0 <= __seq2) - goto __s102; + if (__seq2 < __seq0) + goto __s201; else - goto __s120; - } - else - goto __s210; - } -#define _GLIBCXX_PARALLEL_MERGE_3_CASE(__a,__b,__c,c0,c1) \ - __s ## __a ## __b ## __c : \ - *__target = *__seq ## __a; \ - ++__target; \ - --__length; \ - ++__seq ## __a; \ - if (__length == 0) goto finish; \ - if (__seq ## __a c0 __seq ## __b) goto __s ## __a ## __b ## __c; \ - if (__seq ## __a c1 __seq ## __c) goto __s ## __b ## __a ## __c; \ - goto __s ## __b ## __c ## __a; - - _GLIBCXX_PARALLEL_MERGE_3_CASE(0, 1, 2, <=, <=); - _GLIBCXX_PARALLEL_MERGE_3_CASE(1, 2, 0, <=, < ); - _GLIBCXX_PARALLEL_MERGE_3_CASE(2, 0, 1, < , < ); - _GLIBCXX_PARALLEL_MERGE_3_CASE(1, 0, 2, < , <=); - _GLIBCXX_PARALLEL_MERGE_3_CASE(0, 2, 1, <=, <=); - _GLIBCXX_PARALLEL_MERGE_3_CASE(2, 1, 0, < , < ); + goto __s021; + } + else + { + if (__seq1 <= __seq2) + { + if (__seq0 <= __seq2) + goto __s102; + else + goto __s120; + } + else + goto __s210; + } +#define _GLIBCXX_PARALLEL_MERGE_3_CASE(__a, __b, __c, __c0, __c1) \ + __s ## __a ## __b ## __c : \ + *__target = *__seq ## __a; \ + ++__target; \ + --__length; \ + ++__seq ## __a; \ + if (__length == 0) goto __finish; \ + if (__seq ## __a __c0 __seq ## __b) goto __s ## __a ## __b ## __c; \ + if (__seq ## __a __c1 __seq ## __c) goto __s ## __b ## __a ## __c; \ + goto __s ## __b ## __c ## __a; + + _GLIBCXX_PARALLEL_MERGE_3_CASE(0, 1, 2, <=, <=); + _GLIBCXX_PARALLEL_MERGE_3_CASE(1, 2, 0, <=, < ); + _GLIBCXX_PARALLEL_MERGE_3_CASE(2, 0, 1, < , < ); + _GLIBCXX_PARALLEL_MERGE_3_CASE(1, 0, 2, < , <=); + _GLIBCXX_PARALLEL_MERGE_3_CASE(0, 2, 1, <=, <=); + _GLIBCXX_PARALLEL_MERGE_3_CASE(2, 1, 0, < , < ); #undef _GLIBCXX_PARALLEL_MERGE_3_CASE - finish: - ; + __finish: + ; #if _GLIBCXX_ASSERTIONS - _GLIBCXX_PARALLEL_ASSERT( - ((_RAIter1)__seq0 - __seqs_begin[0].first) + - ((_RAIter1)__seq1 - __seqs_begin[1].first) + - ((_RAIter1)__seq2 - __seqs_begin[2].first) - == orig_length); + _GLIBCXX_PARALLEL_ASSERT( + ((_RAIter1)__seq0 - __seqs_begin[0].first) + + ((_RAIter1)__seq1 - __seqs_begin[1].first) + + ((_RAIter1)__seq2 - __seqs_begin[2].first) + == __orig_length); #endif - __seqs_begin[0].first = __seq0; - __seqs_begin[1].first = __seq1; - __seqs_begin[2].first = __seq2; - - return __target; - } - -/** - * @brief Highly efficient 4-way merging procedure. - * - * Merging is done with the algorithm implementation described by Peter - * Sanders. Basically, the idea is to minimize the number of necessary - * comparison after merging an element. The implementation trick - * that makes this fast is that the order of the sequences is stored - * in the instruction pointer (translated into goto labels in C++). - * - * This works well for merging up to 4 sequences. - * - * Note that making the merging stable does <em>not</em> come at a - * performance hit. - * - * Whether the merging is done guarded or unguarded is selected by the - * used iterator class. - * - * @param __seqs_begin Begin iterator of iterator pair input sequence. - * @param __seqs_end End iterator of iterator pair input sequence. - * @param __target Begin iterator of output sequence. - * @param __comp Comparator. - * @param __length Maximum length to merge, less equal than the - * total number of elements available. - * - * @return End iterator of output sequence. - */ -template<template<typename RAI, typename C> class iterator, - typename _RAIterIterator, - typename _RAIter3, - typename _DifferenceTp, - typename _Compare> - _RAIter3 - multiway_merge_4_variant(_RAIterIterator __seqs_begin, - _RAIterIterator __seqs_end, - _RAIter3 __target, - _DifferenceTp __length, _Compare __comp) - { - _GLIBCXX_CALL(__length); - typedef _DifferenceTp _DifferenceType; - - typedef typename std::iterator_traits<_RAIterIterator> - ::value_type::first_type - _RAIter1; - typedef typename std::iterator_traits<_RAIter1>::value_type - _ValueType; - - iterator<_RAIter1, _Compare> - __seq0(__seqs_begin[0].first, __seqs_begin[0].second, __comp), - __seq1(__seqs_begin[1].first, __seqs_begin[1].second, __comp), - __seq2(__seqs_begin[2].first, __seqs_begin[2].second, __comp), - __seq3(__seqs_begin[3].first, __seqs_begin[3].second, __comp); - -#define _GLIBCXX_PARALLEL_DECISION(__a,__b,__c,d) { \ - if (__seq ## d < __seq ## __a) goto __s ## d ## __a ## __b ## __c; \ - if (__seq ## d < __seq ## __b) goto __s ## __a ## d ## __b ## __c; \ - if (__seq ## d < __seq ## __c) goto __s ## __a ## __b ## d ## __c; \ - goto __s ## __a ## __b ## __c ## d; } - - if (__seq0 <= __seq1) - { - if (__seq1 <= __seq2) - _GLIBCXX_PARALLEL_DECISION(0,1,2,3) - else - if (__seq2 < __seq0) - _GLIBCXX_PARALLEL_DECISION(2,0,1,3) - else - _GLIBCXX_PARALLEL_DECISION(0,2,1,3) - } - else - { - if (__seq1 <= __seq2) - { - if (__seq0 <= __seq2) - _GLIBCXX_PARALLEL_DECISION(1,0,2,3) - else - _GLIBCXX_PARALLEL_DECISION(1,2,0,3) - } - else - _GLIBCXX_PARALLEL_DECISION(2,1,0,3) - } + __seqs_begin[0].first = __seq0; + __seqs_begin[1].first = __seq1; + __seqs_begin[2].first = __seq2; -#define _GLIBCXX_PARALLEL_MERGE_4_CASE(__a,__b,__c,d,c0,c1,c2) \ - __s ## __a ## __b ## __c ## d: \ - if (__length == 0) goto finish; \ - *__target = *__seq ## __a; \ - ++__target; \ - --__length; \ - ++__seq ## __a; \ - if (__seq ## __a c0 __seq ## __b) goto __s ## __a ## __b ## __c ## d; \ - if (__seq ## __a c1 __seq ## __c) goto __s ## __b ## __a ## __c ## d; \ - if (__seq ## __a c2 __seq ## d) goto __s ## __b ## __c ## __a ## d; \ - goto __s ## __b ## __c ## d ## __a; - - _GLIBCXX_PARALLEL_MERGE_4_CASE(0, 1, 2, 3, <=, <=, <=); - _GLIBCXX_PARALLEL_MERGE_4_CASE(0, 1, 3, 2, <=, <=, <=); - _GLIBCXX_PARALLEL_MERGE_4_CASE(0, 2, 1, 3, <=, <=, <=); - _GLIBCXX_PARALLEL_MERGE_4_CASE(0, 2, 3, 1, <=, <=, <=); - _GLIBCXX_PARALLEL_MERGE_4_CASE(0, 3, 1, 2, <=, <=, <=); - _GLIBCXX_PARALLEL_MERGE_4_CASE(0, 3, 2, 1, <=, <=, <=); - _GLIBCXX_PARALLEL_MERGE_4_CASE(1, 0, 2, 3, < , <=, <=); - _GLIBCXX_PARALLEL_MERGE_4_CASE(1, 0, 3, 2, < , <=, <=); - _GLIBCXX_PARALLEL_MERGE_4_CASE(1, 2, 0, 3, <=, < , <=); - _GLIBCXX_PARALLEL_MERGE_4_CASE(1, 2, 3, 0, <=, <=, < ); - _GLIBCXX_PARALLEL_MERGE_4_CASE(1, 3, 0, 2, <=, < , <=); - _GLIBCXX_PARALLEL_MERGE_4_CASE(1, 3, 2, 0, <=, <=, < ); - _GLIBCXX_PARALLEL_MERGE_4_CASE(2, 0, 1, 3, < , < , <=); - _GLIBCXX_PARALLEL_MERGE_4_CASE(2, 0, 3, 1, < , <=, < ); - _GLIBCXX_PARALLEL_MERGE_4_CASE(2, 1, 0, 3, < , < , <=); - _GLIBCXX_PARALLEL_MERGE_4_CASE(2, 1, 3, 0, < , <=, < ); - _GLIBCXX_PARALLEL_MERGE_4_CASE(2, 3, 0, 1, <=, < , < ); - _GLIBCXX_PARALLEL_MERGE_4_CASE(2, 3, 1, 0, <=, < , < ); - _GLIBCXX_PARALLEL_MERGE_4_CASE(3, 0, 1, 2, < , < , < ); - _GLIBCXX_PARALLEL_MERGE_4_CASE(3, 0, 2, 1, < , < , < ); - _GLIBCXX_PARALLEL_MERGE_4_CASE(3, 1, 0, 2, < , < , < ); - _GLIBCXX_PARALLEL_MERGE_4_CASE(3, 1, 2, 0, < , < , < ); - _GLIBCXX_PARALLEL_MERGE_4_CASE(3, 2, 0, 1, < , < , < ); - _GLIBCXX_PARALLEL_MERGE_4_CASE(3, 2, 1, 0, < , < , < ); + return __target; + } + + /** + * @brief Highly efficient 4-way merging procedure. + * + * Merging is done with the algorithm implementation described by Peter + * Sanders. Basically, the idea is to minimize the number of necessary + * comparison after merging an element. The implementation trick + * that makes this fast is that the order of the sequences is stored + * in the instruction pointer (translated into goto labels in C++). + * + * This works well for merging up to 4 sequences. + * + * Note that making the merging stable does <em>not</em> come at a + * performance hit. + * + * Whether the merging is done guarded or unguarded is selected by the + * used iterator class. + * + * @param __seqs_begin Begin iterator of iterator pair input sequence. + * @param __seqs_end End iterator of iterator pair input sequence. + * @param __target Begin iterator of output sequence. + * @param __comp Comparator. + * @param __length Maximum length to merge, less equal than the + * total number of elements available. + * + * @return End iterator of output sequence. + */ + template<template<typename RAI, typename C> class iterator, + typename _RAIterIterator, + typename _RAIter3, + typename _DifferenceTp, + typename _Compare> + _RAIter3 + multiway_merge_4_variant(_RAIterIterator __seqs_begin, + _RAIterIterator __seqs_end, + _RAIter3 __target, + _DifferenceTp __length, _Compare __comp) + { + _GLIBCXX_CALL(__length); + typedef _DifferenceTp _DifferenceType; + + typedef typename std::iterator_traits<_RAIterIterator> + ::value_type::first_type + _RAIter1; + typedef typename std::iterator_traits<_RAIter1>::value_type + _ValueType; + + iterator<_RAIter1, _Compare> + __seq0(__seqs_begin[0].first, __seqs_begin[0].second, __comp), + __seq1(__seqs_begin[1].first, __seqs_begin[1].second, __comp), + __seq2(__seqs_begin[2].first, __seqs_begin[2].second, __comp), + __seq3(__seqs_begin[3].first, __seqs_begin[3].second, __comp); + +#define _GLIBCXX_PARALLEL_DECISION(__a, __b, __c, __d) { \ + if (__seq ## __d < __seq ## __a) \ + goto __s ## __d ## __a ## __b ## __c; \ + if (__seq ## __d < __seq ## __b) \ + goto __s ## __a ## __d ## __b ## __c; \ + if (__seq ## __d < __seq ## __c) \ + goto __s ## __a ## __b ## __d ## __c; \ + goto __s ## __a ## __b ## __c ## __d; } + + if (__seq0 <= __seq1) + { + if (__seq1 <= __seq2) + _GLIBCXX_PARALLEL_DECISION(0,1,2,3) + else + if (__seq2 < __seq0) + _GLIBCXX_PARALLEL_DECISION(2,0,1,3) + else + _GLIBCXX_PARALLEL_DECISION(0,2,1,3) + } + else + { + if (__seq1 <= __seq2) + { + if (__seq0 <= __seq2) + _GLIBCXX_PARALLEL_DECISION(1,0,2,3) + else + _GLIBCXX_PARALLEL_DECISION(1,2,0,3) + } + else + _GLIBCXX_PARALLEL_DECISION(2,1,0,3) + } + +#define _GLIBCXX_PARALLEL_MERGE_4_CASE(__a, __b, __c, __d, \ + __c0, __c1, __c2) \ + __s ## __a ## __b ## __c ## __d: \ + if (__length == 0) goto __finish; \ + *__target = *__seq ## __a; \ + ++__target; \ + --__length; \ + ++__seq ## __a; \ + if (__seq ## __a __c0 __seq ## __b) \ + goto __s ## __a ## __b ## __c ## __d; \ + if (__seq ## __a __c1 __seq ## __c) \ + goto __s ## __b ## __a ## __c ## __d; \ + if (__seq ## __a __c2 __seq ## __d) \ + goto __s ## __b ## __c ## __a ## __d; \ + goto __s ## __b ## __c ## __d ## __a; + + _GLIBCXX_PARALLEL_MERGE_4_CASE(0, 1, 2, 3, <=, <=, <=); + _GLIBCXX_PARALLEL_MERGE_4_CASE(0, 1, 3, 2, <=, <=, <=); + _GLIBCXX_PARALLEL_MERGE_4_CASE(0, 2, 1, 3, <=, <=, <=); + _GLIBCXX_PARALLEL_MERGE_4_CASE(0, 2, 3, 1, <=, <=, <=); + _GLIBCXX_PARALLEL_MERGE_4_CASE(0, 3, 1, 2, <=, <=, <=); + _GLIBCXX_PARALLEL_MERGE_4_CASE(0, 3, 2, 1, <=, <=, <=); + _GLIBCXX_PARALLEL_MERGE_4_CASE(1, 0, 2, 3, < , <=, <=); + _GLIBCXX_PARALLEL_MERGE_4_CASE(1, 0, 3, 2, < , <=, <=); + _GLIBCXX_PARALLEL_MERGE_4_CASE(1, 2, 0, 3, <=, < , <=); + _GLIBCXX_PARALLEL_MERGE_4_CASE(1, 2, 3, 0, <=, <=, < ); + _GLIBCXX_PARALLEL_MERGE_4_CASE(1, 3, 0, 2, <=, < , <=); + _GLIBCXX_PARALLEL_MERGE_4_CASE(1, 3, 2, 0, <=, <=, < ); + _GLIBCXX_PARALLEL_MERGE_4_CASE(2, 0, 1, 3, < , < , <=); + _GLIBCXX_PARALLEL_MERGE_4_CASE(2, 0, 3, 1, < , <=, < ); + _GLIBCXX_PARALLEL_MERGE_4_CASE(2, 1, 0, 3, < , < , <=); + _GLIBCXX_PARALLEL_MERGE_4_CASE(2, 1, 3, 0, < , <=, < ); + _GLIBCXX_PARALLEL_MERGE_4_CASE(2, 3, 0, 1, <=, < , < ); + _GLIBCXX_PARALLEL_MERGE_4_CASE(2, 3, 1, 0, <=, < , < ); + _GLIBCXX_PARALLEL_MERGE_4_CASE(3, 0, 1, 2, < , < , < ); + _GLIBCXX_PARALLEL_MERGE_4_CASE(3, 0, 2, 1, < , < , < ); + _GLIBCXX_PARALLEL_MERGE_4_CASE(3, 1, 0, 2, < , < , < ); + _GLIBCXX_PARALLEL_MERGE_4_CASE(3, 1, 2, 0, < , < , < ); + _GLIBCXX_PARALLEL_MERGE_4_CASE(3, 2, 0, 1, < , < , < ); + _GLIBCXX_PARALLEL_MERGE_4_CASE(3, 2, 1, 0, < , < , < ); #undef _GLIBCXX_PARALLEL_MERGE_4_CASE #undef _GLIBCXX_PARALLEL_DECISION - finish: - ; - - __seqs_begin[0].first = __seq0; - __seqs_begin[1].first = __seq1; - __seqs_begin[2].first = __seq2; - __seqs_begin[3].first = __seq3; - - return __target; - } - -/** @brief Multi-way merging procedure for a high branching factor, - * guarded case. - * - * This merging variant uses a LoserTree class as selected by <tt>LT</tt>. - * - * Stability is selected through the used LoserTree class <tt>LT</tt>. - * - * At least one non-empty sequence is required. - * - * @param __seqs_begin Begin iterator of iterator pair input sequence. - * @param __seqs_end End iterator of iterator pair input sequence. - * @param __target Begin iterator of output sequence. - * @param __comp Comparator. - * @param __length Maximum length to merge, less equal than the - * total number of elements available. - * - * @return End iterator of output sequence. - */ -template<typename LT, - typename _RAIterIterator, - typename _RAIter3, - typename _DifferenceTp, - typename _Compare> - _RAIter3 - multiway_merge_loser_tree(_RAIterIterator __seqs_begin, - _RAIterIterator __seqs_end, - _RAIter3 __target, - _DifferenceTp __length, _Compare __comp) - { - _GLIBCXX_CALL(__length) + __finish: + ; - typedef _DifferenceTp _DifferenceType; - typedef typename std::iterator_traits<_RAIterIterator> - ::value_type::first_type - _RAIter1; - typedef typename std::iterator_traits<_RAIter1>::value_type - _ValueType; + __seqs_begin[0].first = __seq0; + __seqs_begin[1].first = __seq1; + __seqs_begin[2].first = __seq2; + __seqs_begin[3].first = __seq3; - int __k = static_cast<int>(__seqs_end - __seqs_begin); + return __target; + } - LT __lt(__k, __comp); + /** @brief Multi-way merging procedure for a high branching factor, + * guarded case. + * + * This merging variant uses a LoserTree class as selected by <tt>_LT</tt>. + * + * Stability is selected through the used LoserTree class <tt>_LT</tt>. + * + * At least one non-empty sequence is required. + * + * @param __seqs_begin Begin iterator of iterator pair input sequence. + * @param __seqs_end End iterator of iterator pair input sequence. + * @param __target Begin iterator of output sequence. + * @param __comp Comparator. + * @param __length Maximum length to merge, less equal than the + * total number of elements available. + * + * @return End iterator of output sequence. + */ + template<typename _LT, + typename _RAIterIterator, + typename _RAIter3, + typename _DifferenceTp, + typename _Compare> + _RAIter3 + multiway_merge_loser_tree(_RAIterIterator __seqs_begin, + _RAIterIterator __seqs_end, + _RAIter3 __target, + _DifferenceTp __length, _Compare __comp) + { + _GLIBCXX_CALL(__length) - // Default value for potentially non-default-constructible types. - _ValueType* __arbitrary_element = NULL; + typedef _DifferenceTp _DifferenceType; + typedef typename std::iterator_traits<_RAIterIterator> + ::value_type::first_type + _RAIter1; + typedef typename std::iterator_traits<_RAIter1>::value_type + _ValueType; - for (int __t = 0; __t < __k; ++__t) - { - if(__arbitrary_element == NULL - && _GLIBCXX_PARALLEL_LENGTH(__seqs_begin[__t]) > 0) - __arbitrary_element = &(*__seqs_begin[__t].first); - } + int __k = static_cast<int>(__seqs_end - __seqs_begin); - for (int __t = 0; __t < __k; ++__t) - { - if (__seqs_begin[__t].first == __seqs_begin[__t].second) - __lt.__insert_start(*__arbitrary_element, __t, true); - else - __lt.__insert_start(*__seqs_begin[__t].first, __t, false); - } + _LT __lt(__k, __comp); - __lt.__init(); + // Default value for potentially non-default-constructible types. + _ValueType* __arbitrary_element = NULL; - int __source; + for (int __t = 0; __t < __k; ++__t) + { + if(__arbitrary_element == NULL + && _GLIBCXX_PARALLEL_LENGTH(__seqs_begin[__t]) > 0) + __arbitrary_element = &(*__seqs_begin[__t].first); + } - for (_DifferenceType __i = 0; __i < __length; ++__i) - { - //take out - __source = __lt.__get_min_source(); + for (int __t = 0; __t < __k; ++__t) + { + if (__seqs_begin[__t].first == __seqs_begin[__t].second) + __lt.__insert_start(*__arbitrary_element, __t, true); + else + __lt.__insert_start(*__seqs_begin[__t].first, __t, false); + } - *(__target++) = *(__seqs_begin[__source].first++); + __lt.__init(); - // Feed. - if (__seqs_begin[__source].first == __seqs_begin[__source].second) - __lt.__delete_min_insert(*__arbitrary_element, true); - else - // Replace from same __source. - __lt.__delete_min_insert(*__seqs_begin[__source].first, false); - } + int __source; - return __target; - } - -/** @brief Multi-way merging procedure for a high branching factor, - * unguarded case. - * - * Merging is done using the LoserTree class <tt>LT</tt>. - * - * Stability is selected by the used LoserTrees. - * - * @pre No input will run out of elements during the merge. - * - * @param __seqs_begin Begin iterator of iterator pair input sequence. - * @param __seqs_end End iterator of iterator pair input sequence. - * @param __target Begin iterator of output sequence. - * @param __comp Comparator. - * @param __length Maximum length to merge, less equal than the - * total number of elements available. - * - * @return End iterator of output sequence. - */ -template<typename LT, - typename _RAIterIterator, - typename _RAIter3, - typename _DifferenceTp, typename _Compare> - _RAIter3 - multiway_merge_loser_tree_unguarded( - _RAIterIterator __seqs_begin, - _RAIterIterator __seqs_end, - _RAIter3 __target, - const typename std::iterator_traits<typename std::iterator_traits< - _RAIterIterator>::value_type::first_type>::value_type& - __sentinel, - _DifferenceTp __length, - _Compare __comp) - { - _GLIBCXX_CALL(__length) - typedef _DifferenceTp _DifferenceType; + for (_DifferenceType __i = 0; __i < __length; ++__i) + { + //take out + __source = __lt.__get_min_source(); - typedef typename std::iterator_traits<_RAIterIterator> - ::value_type::first_type - _RAIter1; - typedef typename std::iterator_traits<_RAIter1>::value_type - _ValueType; + *(__target++) = *(__seqs_begin[__source].first++); - int __k = __seqs_end - __seqs_begin; + // Feed. + if (__seqs_begin[__source].first == __seqs_begin[__source].second) + __lt.__delete_min_insert(*__arbitrary_element, true); + else + // Replace from same __source. + __lt.__delete_min_insert(*__seqs_begin[__source].first, false); + } - LT __lt(__k, __sentinel, __comp); + return __target; + } - for (int __t = 0; __t < __k; ++__t) - { -#if _GLIBCXX_ASSERTIONS - _GLIBCXX_PARALLEL_ASSERT(__seqs_begin[__t].first - != __seqs_begin[__t].second); -#endif - __lt.__insert_start(*__seqs_begin[__t].first, __t, false); - } + /** @brief Multi-way merging procedure for a high branching factor, + * unguarded case. + * + * Merging is done using the LoserTree class <tt>_LT</tt>. + * + * Stability is selected by the used LoserTrees. + * + * @pre No input will run out of elements during the merge. + * + * @param __seqs_begin Begin iterator of iterator pair input sequence. + * @param __seqs_end End iterator of iterator pair input sequence. + * @param __target Begin iterator of output sequence. + * @param __comp Comparator. + * @param __length Maximum length to merge, less equal than the + * total number of elements available. + * + * @return End iterator of output sequence. + */ + template<typename _LT, + typename _RAIterIterator, + typename _RAIter3, + typename _DifferenceTp, typename _Compare> + _RAIter3 + multiway_merge_loser_tree_unguarded(_RAIterIterator __seqs_begin, + _RAIterIterator __seqs_end, + _RAIter3 __target, + const typename std::iterator_traits<typename std::iterator_traits< + _RAIterIterator>::value_type::first_type>::value_type& + __sentinel, + _DifferenceTp __length, + _Compare __comp) + { + _GLIBCXX_CALL(__length) + typedef _DifferenceTp _DifferenceType; - __lt.__init(); + typedef typename std::iterator_traits<_RAIterIterator> + ::value_type::first_type + _RAIter1; + typedef typename std::iterator_traits<_RAIter1>::value_type + _ValueType; + + int __k = __seqs_end - __seqs_begin; - int __source; + _LT __lt(__k, __sentinel, __comp); + for (int __t = 0; __t < __k; ++__t) + { #if _GLIBCXX_ASSERTIONS - _DifferenceType __i = 0; + _GLIBCXX_PARALLEL_ASSERT(__seqs_begin[__t].first + != __seqs_begin[__t].second); #endif + __lt.__insert_start(*__seqs_begin[__t].first, __t, false); + } - _RAIter3 __target_end = __target + __length; - while (__target < __target_end) - { - // Take out. - __source = __lt.__get_min_source(); + __lt.__init(); + + int __source; #if _GLIBCXX_ASSERTIONS - _GLIBCXX_PARALLEL_ASSERT(0 <= __source && __source < __k); - _GLIBCXX_PARALLEL_ASSERT(__i == 0 - || !__comp(*(__seqs_begin[__source].first), *(__target - 1))); + _DifferenceType __i = 0; #endif - // Feed. - *(__target++) = *(__seqs_begin[__source].first++); + _RAIter3 __target_end = __target + __length; + while (__target < __target_end) + { + // Take out. + __source = __lt.__get_min_source(); #if _GLIBCXX_ASSERTIONS - ++__i; + _GLIBCXX_PARALLEL_ASSERT(0 <= __source && __source < __k); + _GLIBCXX_PARALLEL_ASSERT(__i == 0 + || !__comp(*(__seqs_begin[__source].first), *(__target - 1))); #endif - // Replace from same __source. - __lt.__delete_min_insert(*__seqs_begin[__source].first, false); - } - return __target; - } - - -/** @brief Multi-way merging procedure for a high branching factor, - * requiring sentinels to exist. - * - * @param __stable The value must the same as for the used LoserTrees. - * @param UnguardedLoserTree _Loser Tree variant to use for the unguarded - * merging. - * @param GuardedLoserTree _Loser Tree variant to use for the guarded - * merging. - * - * @param __seqs_begin Begin iterator of iterator pair input sequence. - * @param __seqs_end End iterator of iterator pair input sequence. - * @param __target Begin iterator of output sequence. - * @param __comp Comparator. - * @param __length Maximum length to merge, less equal than the - * total number of elements available. - * - * @return End iterator of output sequence. - */ -template< - typename UnguardedLoserTree, - typename _RAIterIterator, - typename _RAIter3, - typename _DifferenceTp, - typename _Compare> - _RAIter3 - multiway_merge_loser_tree_sentinel( - _RAIterIterator __seqs_begin, - _RAIterIterator __seqs_end, - _RAIter3 __target, - const typename std::iterator_traits<typename std::iterator_traits< - _RAIterIterator>::value_type::first_type>::value_type& - __sentinel, - _DifferenceTp __length, - _Compare __comp) - { - _GLIBCXX_CALL(__length) - - typedef _DifferenceTp _DifferenceType; - typedef std::iterator_traits<_RAIterIterator> _TraitsType; - typedef typename std::iterator_traits<_RAIterIterator> - ::value_type::first_type - _RAIter1; - typedef typename std::iterator_traits<_RAIter1>::value_type - _ValueType; - - _RAIter3 __target_end; - - for (_RAIterIterator __s = __seqs_begin; __s != __seqs_end; ++__s) - // Move the sequence ends to the sentinel. This has the - // effect that the sentinel appears to be within the sequence. Then, - // we can use the unguarded variant if we merge out as many - // non-sentinel elements as we have. - ++((*__s).second); - - __target_end = multiway_merge_loser_tree_unguarded - <UnguardedLoserTree> - (__seqs_begin, __seqs_end, __target, __sentinel, __length, __comp); + // Feed. + *(__target++) = *(__seqs_begin[__source].first++); #if _GLIBCXX_ASSERTIONS - _GLIBCXX_PARALLEL_ASSERT(__target_end == __target + __length); - _GLIBCXX_PARALLEL_ASSERT(__is_sorted(__target, __target_end, __comp)); + ++__i; #endif + // Replace from same __source. + __lt.__delete_min_insert(*__seqs_begin[__source].first, false); + } - // Restore the sequence ends so the sentinels are not contained in the - // sequence any more (see comment in loop above). - for (_RAIterIterator __s = __seqs_begin; __s != __seqs_end; ++__s) - --((*__s).second); - - return __target_end; - } - -/** - * @brief Traits for determining whether the loser tree should - * use pointers or copies. - * - * The field "_M_use_pointer" is used to determine whether to use pointers in - * the loser trees or whether to copy the values into the loser tree. - * - * The default behavior is to use pointers if the data type is 4 times as - * big as the pointer to it. - * - * Specialize for your data type to customize the behavior. - * - * Example: - * - * template<> - * struct _LoserTreeTraits<int> - * { static const bool _M_use_pointer = false; }; - * - * template<> - * struct _LoserTreeTraits<heavyweight_type> - * { static const bool _M_use_pointer = true; }; - * - * @param _Tp type to give the loser tree traits for. - */ -template <typename _Tp> -struct _LoserTreeTraits -{ - /** - * @brief True iff to use pointers instead of values in loser trees. + return __target; + } + + + /** @brief Multi-way merging procedure for a high branching factor, + * requiring sentinels to exist. * - * The default behavior is to use pointers if the data type is four - * times as big as the pointer to it. + * @param __stable The value must the same as for the used LoserTrees. + * @param UnguardedLoserTree _Loser Tree variant to use for the unguarded + * merging. + * @param GuardedLoserTree _Loser Tree variant to use for the guarded + * merging. + * + * @param __seqs_begin Begin iterator of iterator pair input sequence. + * @param __seqs_end End iterator of iterator pair input sequence. + * @param __target Begin iterator of output sequence. + * @param __comp Comparator. + * @param __length Maximum length to merge, less equal than the + * total number of elements available. + * + * @return End iterator of output sequence. */ - static const bool _M_use_pointer = (sizeof(_Tp) > 4 * sizeof(_Tp*)); -}; - -/** - * @brief Switch for 3-way merging with __sentinels turned off. - * - * Note that 3-way merging is always stable! - */ -template< - bool __sentinels /*default == false*/, - typename _RAIterIterator, - typename _RAIter3, - typename _DifferenceTp, - typename _Compare> -struct __multiway_merge_3_variant_sentinel_switch -{ - _RAIter3 operator()( - _RAIterIterator __seqs_begin, - _RAIterIterator __seqs_end, - _RAIter3 __target, - _DifferenceTp __length, _Compare __comp) - { - return multiway_merge_3_variant<_GuardedIterator>( - __seqs_begin, __seqs_end, __target, __length, __comp); - } -}; - -/** - * @brief Switch for 3-way merging with __sentinels turned on. - * - * Note that 3-way merging is always stable! - */ -template< - typename _RAIterIterator, - typename _RAIter3, - typename _DifferenceTp, - typename _Compare> -struct __multiway_merge_3_variant_sentinel_switch - <true, _RAIterIterator, _RAIter3, - _DifferenceTp, _Compare> -{ - _RAIter3 operator()( - _RAIterIterator __seqs_begin, - _RAIterIterator __seqs_end, - _RAIter3 __target, - _DifferenceTp __length, _Compare __comp) - { - return multiway_merge_3_variant<_UnguardedIterator>( - __seqs_begin, __seqs_end, __target, __length, __comp); - } -}; - -/** - * @brief Switch for 4-way merging with __sentinels turned off. - * - * Note that 4-way merging is always stable! - */ -template< - bool __sentinels /*default == false*/, - typename _RAIterIterator, - typename _RAIter3, - typename _DifferenceTp, - typename _Compare> -struct __multiway_merge_4_variant_sentinel_switch -{ - _RAIter3 operator()( - _RAIterIterator __seqs_begin, - _RAIterIterator __seqs_end, - _RAIter3 __target, - _DifferenceTp __length, _Compare __comp) - { - return multiway_merge_4_variant<_GuardedIterator>( - __seqs_begin, __seqs_end, __target, __length, __comp); - } -}; - -/** - * @brief Switch for 4-way merging with __sentinels turned on. - * - * Note that 4-way merging is always stable! - */ -template< - typename _RAIterIterator, - typename _RAIter3, - typename _DifferenceTp, - typename _Compare> -struct __multiway_merge_4_variant_sentinel_switch - <true, _RAIterIterator, _RAIter3, - _DifferenceTp, _Compare> -{ - _RAIter3 operator()( - _RAIterIterator __seqs_begin, - _RAIterIterator __seqs_end, - _RAIter3 __target, - _DifferenceTp __length, _Compare __comp) - { - return multiway_merge_4_variant<_UnguardedIterator>( - __seqs_begin, __seqs_end, __target, __length, __comp); - } -}; - -/** - * @brief Switch for k-way merging with __sentinels turned on. - */ -template< - bool __sentinels, - bool __stable, - typename _RAIterIterator, - typename _RAIter3, - typename _DifferenceTp, - typename _Compare> -struct __multiway_merge_k_variant_sentinel_switch -{ - _RAIter3 operator()( - _RAIterIterator __seqs_begin, - _RAIterIterator __seqs_end, - _RAIter3 __target, + template<typename UnguardedLoserTree, + typename _RAIterIterator, + typename _RAIter3, + typename _DifferenceTp, + typename _Compare> + _RAIter3 + multiway_merge_loser_tree_sentinel(_RAIterIterator __seqs_begin, + _RAIterIterator __seqs_end, + _RAIter3 __target, const typename std::iterator_traits<typename std::iterator_traits< - _RAIterIterator>::value_type::first_type>::value_type& - __sentinel, - _DifferenceTp __length, _Compare __comp) - { - typedef typename std::iterator_traits<_RAIterIterator> - ::value_type::first_type - _RAIter1; - typedef typename std::iterator_traits<_RAIter1>::value_type - _ValueType; - - return multiway_merge_loser_tree_sentinel< - typename __gnu_cxx::__conditional_type< - _LoserTreeTraits<_ValueType>::_M_use_pointer - , _LoserTreePointerUnguarded<__stable, _ValueType, _Compare> - , _LoserTreeUnguarded<__stable, _ValueType, _Compare> - >::__type>( - __seqs_begin, __seqs_end, __target, __sentinel, __length, __comp); - } -}; - -/** - * @brief Switch for k-way merging with __sentinels turned off. - */ -template< - bool __stable, - typename _RAIterIterator, - typename _RAIter3, - typename _DifferenceTp, - typename _Compare> -struct __multiway_merge_k_variant_sentinel_switch - <false, __stable, _RAIterIterator, _RAIter3, - _DifferenceTp, _Compare> -{ - _RAIter3 operator()( - _RAIterIterator __seqs_begin, - _RAIterIterator __seqs_end, - _RAIter3 __target, - const typename std::iterator_traits<typename std::iterator_traits< - _RAIterIterator>::value_type::first_type>::value_type& - __sentinel, - _DifferenceTp __length, _Compare __comp) - { - typedef typename std::iterator_traits<_RAIterIterator> - ::value_type::first_type - _RAIter1; - typedef typename std::iterator_traits<_RAIter1>::value_type - _ValueType; - - return multiway_merge_loser_tree< - typename __gnu_cxx::__conditional_type< - _LoserTreeTraits<_ValueType>::_M_use_pointer - , _LoserTreePointer<__stable, _ValueType, _Compare> - , _LoserTree<__stable, _ValueType, _Compare> - >::__type >(__seqs_begin, __seqs_end, __target, __length, __comp); - } -}; - -/** @brief Sequential multi-way merging switch. - * - * The _GLIBCXX_PARALLEL_DECISION is based on the branching factor and - * runtime settings. - * @param __seqs_begin Begin iterator of iterator pair input sequence. - * @param __seqs_end End iterator of iterator pair input sequence. - * @param __target Begin iterator of output sequence. - * @param __comp Comparator. - * @param __length Maximum length to merge, possibly larger than the - * number of elements available. - * @param __stable Stable merging incurs a performance penalty. - * @param __sentinel The sequences have __a __sentinel element. - * @return End iterator of output sequence. */ -template< - bool __stable, - bool __sentinels, - typename _RAIterIterator, - typename _RAIter3, - typename _DifferenceTp, - typename _Compare> - _RAIter3 - __sequential_multiway_merge( - _RAIterIterator __seqs_begin, - _RAIterIterator __seqs_end, - _RAIter3 __target, - const typename std::iterator_traits<typename std::iterator_traits< - _RAIterIterator>::value_type::first_type>::value_type& - __sentinel, - _DifferenceTp __length, _Compare __comp) - { - _GLIBCXX_CALL(__length) - - typedef _DifferenceTp _DifferenceType; - typedef typename std::iterator_traits<_RAIterIterator> - ::value_type::first_type - _RAIter1; - typedef typename std::iterator_traits<_RAIter1>::value_type - _ValueType; - -#if _GLIBCXX_ASSERTIONS - for (_RAIterIterator __s = __seqs_begin; __s != __seqs_end; ++__s) - { - _GLIBCXX_PARALLEL_ASSERT( - __is_sorted((*__s).first, (*__s).second, __comp)); - } -#endif + _RAIterIterator>::value_type::first_type>::value_type& + __sentinel, + _DifferenceTp __length, + _Compare __comp) + { + _GLIBCXX_CALL(__length) - _DifferenceTp __total_length = 0; - for (_RAIterIterator __s = __seqs_begin; __s != __seqs_end; ++__s) - __total_length += _GLIBCXX_PARALLEL_LENGTH(*__s); + typedef _DifferenceTp _DifferenceType; + typedef std::iterator_traits<_RAIterIterator> _TraitsType; + typedef typename std::iterator_traits<_RAIterIterator> + ::value_type::first_type + _RAIter1; + typedef typename std::iterator_traits<_RAIter1>::value_type + _ValueType; - __length = std::min<_DifferenceTp>(__length, __total_length); + _RAIter3 __target_end; - if(__length == 0) - return __target; + for (_RAIterIterator __s = __seqs_begin; __s != __seqs_end; ++__s) + // Move the sequence ends to the sentinel. This has the + // effect that the sentinel appears to be within the sequence. Then, + // we can use the unguarded variant if we merge out as many + // non-sentinel elements as we have. + ++((*__s).second); - _RAIter3 __return_target = __target; - int __k = static_cast<int>(__seqs_end - __seqs_begin); + __target_end = multiway_merge_loser_tree_unguarded<UnguardedLoserTree> + (__seqs_begin, __seqs_end, __target, __sentinel, __length, __comp); - switch (__k) - { - case 0: - break; - case 1: - __return_target = std::copy(__seqs_begin[0].first, - __seqs_begin[0].first + __length, - __target); - __seqs_begin[0].first += __length; - break; - case 2: - __return_target = __merge_advance(__seqs_begin[0].first, - __seqs_begin[0].second, - __seqs_begin[1].first, - __seqs_begin[1].second, - __target, __length, __comp); - break; - case 3: - __return_target = __multiway_merge_3_variant_sentinel_switch< - __sentinels - , _RAIterIterator - , _RAIter3 - , _DifferenceTp - , _Compare>()(__seqs_begin, __seqs_end, __target, __length, __comp); - break; - case 4: - __return_target = __multiway_merge_4_variant_sentinel_switch< - __sentinels - , _RAIterIterator - , _RAIter3 - , _DifferenceTp - , _Compare>()(__seqs_begin, __seqs_end, __target, __length, __comp); - break; - default: - __return_target = __multiway_merge_k_variant_sentinel_switch< - __sentinels - , __stable - , _RAIterIterator - , _RAIter3 - , _DifferenceTp - , _Compare>()(__seqs_begin, __seqs_end, __target, __sentinel, - __length, __comp); - break; - } #if _GLIBCXX_ASSERTIONS - _GLIBCXX_PARALLEL_ASSERT( - __is_sorted(__target, __target + __length, __comp)); + _GLIBCXX_PARALLEL_ASSERT(__target_end == __target + __length); + _GLIBCXX_PARALLEL_ASSERT(__is_sorted(__target, __target_end, __comp)); #endif - return __return_target; - } + // Restore the sequence ends so the sentinels are not contained in the + // sequence any more (see comment in loop above). + for (_RAIterIterator __s = __seqs_begin; __s != __seqs_end; ++__s) + --((*__s).second); -/** - * @brief Stable sorting functor. - * - * Used to reduce code instanciation in multiway_merge_sampling_splitting. - */ -template<bool __stable, class _RAIter, class _StrictWeakOrdering> -struct _SamplingSorter -{ - void operator()(_RAIter __first, _RAIter __last, - _StrictWeakOrdering __comp) - { __gnu_sequential::stable_sort(__first, __last, __comp); } -}; - -/** - * @brief Non-__stable sorting functor. - * - * Used to reduce code instantiation in multiway_merge_sampling_splitting. - */ -template<class _RAIter, class _StrictWeakOrdering> -struct _SamplingSorter<false, _RAIter, _StrictWeakOrdering> -{ - void operator()(_RAIter __first, _RAIter __last, - _StrictWeakOrdering __comp) - { __gnu_sequential::sort(__first, __last, __comp); } -}; - -/** - * @brief Sampling based splitting for parallel multiway-merge routine. - */ -template< - bool __stable - , typename _RAIterIterator - , typename _Compare - , typename _DifferenceType> -void multiway_merge_sampling_splitting( - _RAIterIterator __seqs_begin, - _RAIterIterator __seqs_end, - _DifferenceType __length, _DifferenceType __total_length, _Compare __comp, - std::vector<std::pair<_DifferenceType, _DifferenceType> > *__pieces) -{ - typedef typename std::iterator_traits<_RAIterIterator> - ::value_type::first_type - _RAIter1; - typedef typename std::iterator_traits<_RAIter1>::value_type - _ValueType; + return __target_end; + } - // __k sequences. - int __k = static_cast<int>(__seqs_end - __seqs_begin); + /** + * @brief Traits for determining whether the loser tree should + * use pointers or copies. + * + * The field "_M_use_pointer" is used to determine whether to use pointers + * in he loser trees or whether to copy the values into the loser tree. + * + * The default behavior is to use pointers if the data type is 4 times as + * big as the pointer to it. + * + * Specialize for your data type to customize the behavior. + * + * Example: + * + * template<> + * struct _LoserTreeTraits<int> + * { static const bool _M_use_pointer = false; }; + * + * template<> + * struct _LoserTreeTraits<heavyweight_type> + * { static const bool _M_use_pointer = true; }; + * + * @param _Tp type to give the loser tree traits for. + */ + template <typename _Tp> + struct _LoserTreeTraits + { + /** + * @brief True iff to use pointers instead of values in loser trees. + * + * The default behavior is to use pointers if the data type is four + * times as big as the pointer to it. + */ + static const bool _M_use_pointer = (sizeof(_Tp) > 4 * sizeof(_Tp*)); + }; - int __num_threads = omp_get_num_threads(); + /** + * @brief Switch for 3-way merging with __sentinels turned off. + * + * Note that 3-way merging is always stable! + */ + template<bool __sentinels /*default == false*/, + typename _RAIterIterator, + typename _RAIter3, + typename _DifferenceTp, + typename _Compare> + struct __multiway_merge_3_variant_sentinel_switch + { + _RAIter3 + operator()(_RAIterIterator __seqs_begin, + _RAIterIterator __seqs_end, + _RAIter3 __target, + _DifferenceTp __length, _Compare __comp) + { return multiway_merge_3_variant<_GuardedIterator> + (__seqs_begin, __seqs_end, __target, __length, __comp); } + }; + + /** + * @brief Switch for 3-way merging with __sentinels turned on. + * + * Note that 3-way merging is always stable! + */ + template<typename _RAIterIterator, + typename _RAIter3, + typename _DifferenceTp, + typename _Compare> + struct __multiway_merge_3_variant_sentinel_switch<true, _RAIterIterator, + _RAIter3, _DifferenceTp, + _Compare> + { + _RAIter3 + operator()(_RAIterIterator __seqs_begin, + _RAIterIterator __seqs_end, + _RAIter3 __target, + _DifferenceTp __length, _Compare __comp) + { return multiway_merge_3_variant<_UnguardedIterator> + (__seqs_begin, __seqs_end, __target, __length, __comp); } + }; + + /** + * @brief Switch for 4-way merging with __sentinels turned off. + * + * Note that 4-way merging is always stable! + */ + template<bool __sentinels /*default == false*/, + typename _RAIterIterator, + typename _RAIter3, + typename _DifferenceTp, + typename _Compare> + struct __multiway_merge_4_variant_sentinel_switch + { + _RAIter3 + operator()(_RAIterIterator __seqs_begin, + _RAIterIterator __seqs_end, + _RAIter3 __target, + _DifferenceTp __length, _Compare __comp) + { return multiway_merge_4_variant<_GuardedIterator> + (__seqs_begin, __seqs_end, __target, __length, __comp); } + }; - _DifferenceType __num_samples = - __gnu_parallel::_Settings::get().merge_oversampling * __num_threads; + /** + * @brief Switch for 4-way merging with __sentinels turned on. + * + * Note that 4-way merging is always stable! + */ + template<typename _RAIterIterator, + typename _RAIter3, + typename _DifferenceTp, + typename _Compare> + struct __multiway_merge_4_variant_sentinel_switch<true, _RAIterIterator, + _RAIter3, _DifferenceTp, + _Compare> + { + _RAIter3 + operator()(_RAIterIterator __seqs_begin, + _RAIterIterator __seqs_end, + _RAIter3 __target, + _DifferenceTp __length, _Compare __comp) + { return multiway_merge_4_variant<_UnguardedIterator> + (__seqs_begin, __seqs_end, __target, __length, __comp); } + }; - _ValueType* __samples = static_cast<_ValueType*>( - ::operator new(sizeof(_ValueType) * __k * __num_samples)); - // Sample. - for (int __s = 0; __s < __k; ++__s) - for (_DifferenceType __i = 0; __i < __num_samples; ++__i) + /** + * @brief Switch for k-way merging with __sentinels turned on. + */ + template<bool __sentinels, + bool __stable, + typename _RAIterIterator, + typename _RAIter3, + typename _DifferenceTp, + typename _Compare> + struct __multiway_merge_k_variant_sentinel_switch + { + _RAIter3 + operator()(_RAIterIterator __seqs_begin, + _RAIterIterator __seqs_end, + _RAIter3 __target, + const typename std::iterator_traits<typename std::iterator_traits< + _RAIterIterator>::value_type::first_type>::value_type& + __sentinel, + _DifferenceTp __length, _Compare __comp) { - _DifferenceType sample_index = - static_cast<_DifferenceType>( - _GLIBCXX_PARALLEL_LENGTH(__seqs_begin[__s]) - * (double(__i + 1) / (__num_samples + 1)) - * (double(__length) / __total_length)); - new(&(__samples[__s * __num_samples + __i])) - _ValueType(__seqs_begin[__s].first[sample_index]); + typedef typename std::iterator_traits<_RAIterIterator> + ::value_type::first_type + _RAIter1; + typedef typename std::iterator_traits<_RAIter1>::value_type + _ValueType; + + return multiway_merge_loser_tree_sentinel< + typename __gnu_cxx::__conditional_type< + _LoserTreeTraits<_ValueType>::_M_use_pointer, + _LoserTreePointerUnguarded<__stable, _ValueType, _Compare>, + _LoserTreeUnguarded<__stable, _ValueType, _Compare> + >::__type> + (__seqs_begin, __seqs_end, __target, __sentinel, __length, __comp); } + }; - // Sort stable or non-stable, depending on value of template parameter - // "__stable". - _SamplingSorter<__stable, _ValueType*, _Compare>()( - __samples, __samples + (__num_samples * __k), __comp); - - for (int __slab = 0; __slab < __num_threads; ++__slab) - // For each slab / processor. - for (int __seq = 0; __seq < __k; ++__seq) + /** + * @brief Switch for k-way merging with __sentinels turned off. + */ + template<bool __stable, + typename _RAIterIterator, + typename _RAIter3, + typename _DifferenceTp, + typename _Compare> + struct __multiway_merge_k_variant_sentinel_switch<false, __stable, + _RAIterIterator, _RAIter3, + _DifferenceTp, _Compare> + { + _RAIter3 + operator()(_RAIterIterator __seqs_begin, + _RAIterIterator __seqs_end, + _RAIter3 __target, + const typename std::iterator_traits<typename std::iterator_traits< + _RAIterIterator>::value_type::first_type>::value_type& + __sentinel, + _DifferenceTp __length, _Compare __comp) { - // For each sequence. - if (__slab > 0) - __pieces[__slab][__seq].first = - std::upper_bound( - __seqs_begin[__seq].first, - __seqs_begin[__seq].second, - __samples[__num_samples * __k * __slab / __num_threads], - __comp) - - __seqs_begin[__seq].first; - else - // Absolute beginning. - __pieces[__slab][__seq].first = 0; - if ((__slab + 1) < __num_threads) - __pieces[__slab][__seq].second = - std::upper_bound( - __seqs_begin[__seq].first, - __seqs_begin[__seq].second, - __samples[__num_samples * __k * (__slab + 1) / - __num_threads], __comp) - - __seqs_begin[__seq].first; - else - // Absolute end. - __pieces[__slab][__seq].second - = _GLIBCXX_PARALLEL_LENGTH(__seqs_begin[__seq]); + typedef typename std::iterator_traits<_RAIterIterator> + ::value_type::first_type + _RAIter1; + typedef typename std::iterator_traits<_RAIter1>::value_type + _ValueType; + + return multiway_merge_loser_tree< + typename __gnu_cxx::__conditional_type< + _LoserTreeTraits<_ValueType>::_M_use_pointer, + _LoserTreePointer<__stable, _ValueType, _Compare>, + _LoserTree<__stable, _ValueType, _Compare> + >::__type >(__seqs_begin, __seqs_end, __target, __length, __comp); } - ::operator delete(__samples); -} - -/** - * @brief Exact splitting for parallel multiway-merge routine. - * - * None of the passed sequences may be empty. - */ -template< - bool __stable - , typename _RAIterIterator - , typename _Compare - , typename _DifferenceType> -void multiway_merge_exact_splitting( - _RAIterIterator __seqs_begin, - _RAIterIterator __seqs_end, - _DifferenceType __length, _DifferenceType __total_length, _Compare __comp, - std::vector<std::pair<_DifferenceType, _DifferenceType> > *__pieces) -{ - typedef typename std::iterator_traits<_RAIterIterator> - ::value_type::first_type - _RAIter1; + }; + + /** @brief Sequential multi-way merging switch. + * + * The _GLIBCXX_PARALLEL_DECISION is based on the branching factor and + * runtime settings. + * @param __seqs_begin Begin iterator of iterator pair input sequence. + * @param __seqs_end End iterator of iterator pair input sequence. + * @param __target Begin iterator of output sequence. + * @param __comp Comparator. + * @param __length Maximum length to merge, possibly larger than the + * number of elements available. + * @param __stable Stable merging incurs a performance penalty. + * @param __sentinel The sequences have __a __sentinel element. + * @return End iterator of output sequence. */ + template<bool __stable, + bool __sentinels, + typename _RAIterIterator, + typename _RAIter3, + typename _DifferenceTp, + typename _Compare> + _RAIter3 + __sequential_multiway_merge(_RAIterIterator __seqs_begin, + _RAIterIterator __seqs_end, + _RAIter3 __target, + const typename std::iterator_traits<typename std::iterator_traits< + _RAIterIterator>::value_type::first_type>::value_type& + __sentinel, + _DifferenceTp __length, _Compare __comp) + { + _GLIBCXX_CALL(__length) + + typedef _DifferenceTp _DifferenceType; + typedef typename std::iterator_traits<_RAIterIterator> + ::value_type::first_type + _RAIter1; + typedef typename std::iterator_traits<_RAIter1>::value_type + _ValueType; + +#if _GLIBCXX_ASSERTIONS + for (_RAIterIterator __s = __seqs_begin; __s != __seqs_end; ++__s) + { + _GLIBCXX_PARALLEL_ASSERT(__is_sorted((*__s).first, + (*__s).second, __comp)); + } +#endif - const bool __tight = (__total_length == __length); + _DifferenceTp __total_length = 0; + for (_RAIterIterator __s = __seqs_begin; __s != __seqs_end; ++__s) + __total_length += _GLIBCXX_PARALLEL_LENGTH(*__s); - // __k sequences. - const int __k = static_cast<int>(__seqs_end - __seqs_begin); + __length = std::min<_DifferenceTp>(__length, __total_length); - const int __num_threads = omp_get_num_threads(); + if(__length == 0) + return __target; - // (Settings::multiway_merge_splitting == __gnu_parallel::_Settings::EXACT). - std::vector<_RAIter1>* __offsets = - new std::vector<_RAIter1>[__num_threads]; - std::vector< - std::pair<_RAIter1, _RAIter1> - > __se(__k); + _RAIter3 __return_target = __target; + int __k = static_cast<int>(__seqs_end - __seqs_begin); - copy(__seqs_begin, __seqs_end, __se.begin()); + switch (__k) + { + case 0: + break; + case 1: + __return_target = std::copy(__seqs_begin[0].first, + __seqs_begin[0].first + __length, + __target); + __seqs_begin[0].first += __length; + break; + case 2: + __return_target = __merge_advance(__seqs_begin[0].first, + __seqs_begin[0].second, + __seqs_begin[1].first, + __seqs_begin[1].second, + __target, __length, __comp); + break; + case 3: + __return_target = __multiway_merge_3_variant_sentinel_switch + <__sentinels, _RAIterIterator, _RAIter3, _DifferenceTp, _Compare>() + (__seqs_begin, __seqs_end, __target, __length, __comp); + break; + case 4: + __return_target = __multiway_merge_4_variant_sentinel_switch + <__sentinels, _RAIterIterator, _RAIter3, _DifferenceTp, _Compare>() + (__seqs_begin, __seqs_end, __target, __length, __comp); + break; + default: + __return_target = __multiway_merge_k_variant_sentinel_switch + <__sentinels, __stable, _RAIterIterator, _RAIter3, _DifferenceTp, + _Compare>() + (__seqs_begin, __seqs_end, __target, __sentinel, __length, __comp); + break; + } +#if _GLIBCXX_ASSERTIONS + _GLIBCXX_PARALLEL_ASSERT( + __is_sorted(__target, __target + __length, __comp)); +#endif - _DifferenceType* __borders = - new _DifferenceType[__num_threads + 1]; - equally_split(__length, __num_threads, __borders); + return __return_target; + } - for (int __s = 0; __s < (__num_threads - 1); ++__s) + /** + * @brief Stable sorting functor. + * + * Used to reduce code instanciation in multiway_merge_sampling_splitting. + */ + template<bool __stable, class _RAIter, class _StrictWeakOrdering> + struct _SamplingSorter { - __offsets[__s].resize(__k); - multiseq_partition( - __se.begin(), __se.end(), __borders[__s + 1], - __offsets[__s].begin(), __comp); - - // Last one also needed and available. - if (!__tight) - { - __offsets[__num_threads - 1].resize(__k); - multiseq_partition(__se.begin(), __se.end(), - _DifferenceType(__length), - __offsets[__num_threads - 1].begin(), __comp); - } - } - delete[] __borders; + void + operator()(_RAIter __first, _RAIter __last, _StrictWeakOrdering __comp) + { __gnu_sequential::stable_sort(__first, __last, __comp); } + }; - for (int __slab = 0; __slab < __num_threads; ++__slab) + /** + * @brief Non-__stable sorting functor. + * + * Used to reduce code instantiation in multiway_merge_sampling_splitting. + */ + template<class _RAIter, class _StrictWeakOrdering> + struct _SamplingSorter<false, _RAIter, _StrictWeakOrdering> { - // For each slab / processor. - for (int __seq = 0; __seq < __k; ++__seq) - { - // For each sequence. - if (__slab == 0) - { - // Absolute beginning. - __pieces[__slab][__seq].first = 0; - } - else - __pieces[__slab][__seq].first = - __pieces[__slab - 1][__seq].second; - if (!__tight || __slab < (__num_threads - 1)) - __pieces[__slab][__seq].second = - __offsets[__slab][__seq] - __seqs_begin[__seq].first; - else - { - // __slab == __num_threads - 1 - __pieces[__slab][__seq].second = - _GLIBCXX_PARALLEL_LENGTH(__seqs_begin[__seq]); - } - } + void + operator()(_RAIter __first, _RAIter __last, _StrictWeakOrdering __comp) + { __gnu_sequential::sort(__first, __last, __comp); } + }; + + /** + * @brief Sampling based splitting for parallel multiway-merge routine. + */ + template<bool __stable, + typename _RAIterIterator, + typename _Compare, + typename _DifferenceType> + void + multiway_merge_sampling_splitting(_RAIterIterator __seqs_begin, + _RAIterIterator __seqs_end, + _DifferenceType __length, + _DifferenceType __total_length, + _Compare __comp, + std::vector<std::pair<_DifferenceType, _DifferenceType> > *__pieces) + { + typedef typename std::iterator_traits<_RAIterIterator> + ::value_type::first_type + _RAIter1; + typedef typename std::iterator_traits<_RAIter1>::value_type + _ValueType; + + // __k sequences. + int __k = static_cast<int>(__seqs_end - __seqs_begin); + + int __num_threads = omp_get_num_threads(); + + _DifferenceType __num_samples = + __gnu_parallel::_Settings::get().merge_oversampling * __num_threads; + + _ValueType* __samples = static_cast<_ValueType*> + (::operator new(sizeof(_ValueType) * __k * __num_samples)); + // Sample. + for (int __s = 0; __s < __k; ++__s) + for (_DifferenceType __i = 0; __i < __num_samples; ++__i) + { + _DifferenceType sample_index = static_cast<_DifferenceType> + (_GLIBCXX_PARALLEL_LENGTH(__seqs_begin[__s]) + * (double(__i + 1) / (__num_samples + 1)) + * (double(__length) / __total_length)); + new(&(__samples[__s * __num_samples + __i])) + _ValueType(__seqs_begin[__s].first[sample_index]); + } + + // Sort stable or non-stable, depending on value of template parameter + // "__stable". + _SamplingSorter<__stable, _ValueType*, _Compare>() + (__samples, __samples + (__num_samples * __k), __comp); + + for (int __slab = 0; __slab < __num_threads; ++__slab) + // For each slab / processor. + for (int __seq = 0; __seq < __k; ++__seq) + { + // For each sequence. + if (__slab > 0) + __pieces[__slab][__seq].first = std::upper_bound + (__seqs_begin[__seq].first, __seqs_begin[__seq].second, + __samples[__num_samples * __k * __slab / __num_threads], + __comp) + - __seqs_begin[__seq].first; + else + // Absolute beginning. + __pieces[__slab][__seq].first = 0; + if ((__slab + 1) < __num_threads) + __pieces[__slab][__seq].second = std::upper_bound + (__seqs_begin[__seq].first, __seqs_begin[__seq].second, + __samples[__num_samples * __k * (__slab + 1) / __num_threads], + __comp) + - __seqs_begin[__seq].first; + else + // Absolute end. + __pieces[__slab][__seq].second = + _GLIBCXX_PARALLEL_LENGTH(__seqs_begin[__seq]); + } + ::operator delete(__samples); } - delete[] __offsets; -} - -/** @brief Parallel multi-way merge routine. - * - * The _GLIBCXX_PARALLEL_DECISION is based on the branching factor - * and runtime settings. - * - * Must not be called if the number of sequences is 1. - * - * @param _Splitter functor to split input (either __exact or sampling based) - * - * @param __seqs_begin Begin iterator of iterator pair input sequence. - * @param __seqs_end End iterator of iterator pair input sequence. - * @param __target Begin iterator of output sequence. - * @param __comp Comparator. - * @param __length Maximum length to merge, possibly larger than the - * number of elements available. - * @param __stable Stable merging incurs a performance penalty. - * @param __sentinel Ignored. - * @return End iterator of output sequence. - */ -template< - bool __stable, - bool __sentinels, - typename _RAIterIterator, - typename _RAIter3, - typename _DifferenceTp, - typename _Splitter, - typename _Compare - > - _RAIter3 - parallel_multiway_merge(_RAIterIterator __seqs_begin, - _RAIterIterator __seqs_end, - _RAIter3 __target, - _Splitter __splitter, - _DifferenceTp __length, - _Compare __comp, - _ThreadIndex __num_threads) + + /** + * @brief Exact splitting for parallel multiway-merge routine. + * + * None of the passed sequences may be empty. + */ + template<bool __stable, + typename _RAIterIterator, + typename _Compare, + typename _DifferenceType> + void + multiway_merge_exact_splitting(_RAIterIterator __seqs_begin, + _RAIterIterator __seqs_end, + _DifferenceType __length, + _DifferenceType __total_length, + _Compare __comp, + std::vector<std::pair<_DifferenceType, _DifferenceType> > *__pieces) { + typedef typename std::iterator_traits<_RAIterIterator> + ::value_type::first_type + _RAIter1; + + const bool __tight = (__total_length == __length); + + // __k sequences. + const int __k = static_cast<int>(__seqs_end - __seqs_begin); + + const int __num_threads = omp_get_num_threads(); + + // (Settings::multiway_merge_splitting + // == __gnu_parallel::_Settings::EXACT). + std::vector<_RAIter1>* __offsets = + new std::vector<_RAIter1>[__num_threads]; + std::vector<std::pair<_RAIter1, _RAIter1> > __se(__k); + + copy(__seqs_begin, __seqs_end, __se.begin()); + + _DifferenceType* __borders = + new _DifferenceType[__num_threads + 1]; + equally_split(__length, __num_threads, __borders); + + for (int __s = 0; __s < (__num_threads - 1); ++__s) + { + __offsets[__s].resize(__k); + multiseq_partition(__se.begin(), __se.end(), __borders[__s + 1], + __offsets[__s].begin(), __comp); + + // Last one also needed and available. + if (!__tight) + { + __offsets[__num_threads - 1].resize(__k); + multiseq_partition(__se.begin(), __se.end(), + _DifferenceType(__length), + __offsets[__num_threads - 1].begin(), + __comp); + } + } + delete[] __borders; + + for (int __slab = 0; __slab < __num_threads; ++__slab) + { + // For each slab / processor. + for (int __seq = 0; __seq < __k; ++__seq) + { + // For each sequence. + if (__slab == 0) + { + // Absolute beginning. + __pieces[__slab][__seq].first = 0; + } + else + __pieces[__slab][__seq].first = + __pieces[__slab - 1][__seq].second; + if (!__tight || __slab < (__num_threads - 1)) + __pieces[__slab][__seq].second = + __offsets[__slab][__seq] - __seqs_begin[__seq].first; + else + { + // __slab == __num_threads - 1 + __pieces[__slab][__seq].second = + _GLIBCXX_PARALLEL_LENGTH(__seqs_begin[__seq]); + } + } + } + delete[] __offsets; + } + + /** @brief Parallel multi-way merge routine. + * + * The _GLIBCXX_PARALLEL_DECISION is based on the branching factor + * and runtime settings. + * + * Must not be called if the number of sequences is 1. + * + * @param _Splitter functor to split input (either __exact or sampling based) + * + * @param __seqs_begin Begin iterator of iterator pair input sequence. + * @param __seqs_end End iterator of iterator pair input sequence. + * @param __target Begin iterator of output sequence. + * @param __comp Comparator. + * @param __length Maximum length to merge, possibly larger than the + * number of elements available. + * @param __stable Stable merging incurs a performance penalty. + * @param __sentinel Ignored. + * @return End iterator of output sequence. + */ + template<bool __stable, + bool __sentinels, + typename _RAIterIterator, + typename _RAIter3, + typename _DifferenceTp, + typename _Splitter, + typename _Compare> + _RAIter3 + parallel_multiway_merge(_RAIterIterator __seqs_begin, + _RAIterIterator __seqs_end, + _RAIter3 __target, + _Splitter __splitter, + _DifferenceTp __length, + _Compare __comp, + _ThreadIndex __num_threads) + { #if _GLIBCXX_ASSERTIONS - _GLIBCXX_PARALLEL_ASSERT(__seqs_end - __seqs_begin > 1); + _GLIBCXX_PARALLEL_ASSERT(__seqs_end - __seqs_begin > 1); #endif - _GLIBCXX_CALL(__length) - - typedef _DifferenceTp _DifferenceType; - typedef typename std::iterator_traits<_RAIterIterator> - ::value_type::first_type - _RAIter1; - typedef typename - std::iterator_traits<_RAIter1>::value_type _ValueType; - - // Leave only non-empty sequences. - typedef std::pair<_RAIter1, _RAIter1> seq_type; - seq_type* __ne_seqs = new seq_type[__seqs_end - __seqs_begin]; - int __k = 0; - _DifferenceType __total_length = 0; - for (_RAIterIterator __raii = __seqs_begin; - __raii != __seqs_end; ++__raii) - { - _DifferenceTp __seq_length = _GLIBCXX_PARALLEL_LENGTH(*__raii); - if(__seq_length > 0) - { - __total_length += __seq_length; - __ne_seqs[__k++] = *__raii; - } - } + _GLIBCXX_CALL(__length) + + typedef _DifferenceTp _DifferenceType; + typedef typename std::iterator_traits<_RAIterIterator> + ::value_type::first_type + _RAIter1; + typedef typename + std::iterator_traits<_RAIter1>::value_type _ValueType; + + // Leave only non-empty sequences. + typedef std::pair<_RAIter1, _RAIter1> seq_type; + seq_type* __ne_seqs = new seq_type[__seqs_end - __seqs_begin]; + int __k = 0; + _DifferenceType __total_length = 0; + for (_RAIterIterator __raii = __seqs_begin; + __raii != __seqs_end; ++__raii) + { + _DifferenceTp __seq_length = _GLIBCXX_PARALLEL_LENGTH(*__raii); + if(__seq_length > 0) + { + __total_length += __seq_length; + __ne_seqs[__k++] = *__raii; + } + } - _GLIBCXX_CALL(__total_length) + _GLIBCXX_CALL(__total_length) - __length = std::min<_DifferenceTp>(__length, __total_length); + __length = std::min<_DifferenceTp>(__length, __total_length); - if (__total_length == 0 || __k == 0) - { - delete[] __ne_seqs; - return __target; - } + if (__total_length == 0 || __k == 0) + { + delete[] __ne_seqs; + return __target; + } - std::vector<std::pair<_DifferenceType, _DifferenceType> >* __pieces; + std::vector<std::pair<_DifferenceType, _DifferenceType> >* __pieces; - __num_threads = static_cast<_ThreadIndex> - (std::min<_DifferenceType>(__num_threads, __total_length)); + __num_threads = static_cast<_ThreadIndex> + (std::min<_DifferenceType>(__num_threads, __total_length)); -# pragma omp parallel num_threads (__num_threads) - { +# pragma omp parallel num_threads (__num_threads) + { # pragma omp single - { - __num_threads = omp_get_num_threads(); - // Thread __t will have to merge pieces[__iam][0..__k - 1] - __pieces = new std::vector< - std::pair<_DifferenceType, _DifferenceType> >[__num_threads]; - for (int __s = 0; __s < __num_threads; ++__s) - __pieces[__s].resize(__k); + { + __num_threads = omp_get_num_threads(); + // Thread __t will have to merge pieces[__iam][0..__k - 1] + __pieces = new std::vector< + std::pair<_DifferenceType, _DifferenceType> >[__num_threads]; + for (int __s = 0; __s < __num_threads; ++__s) + __pieces[__s].resize(__k); - _DifferenceType __num_samples = - __gnu_parallel::_Settings::get().merge_oversampling * - __num_threads; + _DifferenceType __num_samples = + __gnu_parallel::_Settings::get().merge_oversampling + * __num_threads; - __splitter(__ne_seqs, __ne_seqs + __k, __length, __total_length, - __comp, __pieces); - } //single + __splitter(__ne_seqs, __ne_seqs + __k, __length, __total_length, + __comp, __pieces); + } //single - _ThreadIndex __iam = omp_get_thread_num(); + _ThreadIndex __iam = omp_get_thread_num(); - _DifferenceType __target_position = 0; + _DifferenceType __target_position = 0; - for (int __c = 0; __c < __k; ++__c) - __target_position += __pieces[__iam][__c].first; + for (int __c = 0; __c < __k; ++__c) + __target_position += __pieces[__iam][__c].first; - seq_type* __chunks = new seq_type[__k]; + seq_type* __chunks = new seq_type[__k]; - for (int __s = 0; __s < __k; ++__s) - { - __chunks[__s] = std::make_pair( - __ne_seqs[__s].first + __pieces[__iam][__s].first, - __ne_seqs[__s].first + __pieces[__iam][__s].second); - } + for (int __s = 0; __s < __k; ++__s) + __chunks[__s] = std::make_pair(__ne_seqs[__s].first + + __pieces[__iam][__s].first, + __ne_seqs[__s].first + + __pieces[__iam][__s].second); - if(__length > __target_position) - __sequential_multiway_merge<__stable, __sentinels>( - __chunks, __chunks + __k, __target + __target_position, - *(__seqs_begin->second), __length - __target_position, __comp); + if(__length > __target_position) + __sequential_multiway_merge<__stable, __sentinels> + (__chunks, __chunks + __k, __target + __target_position, + *(__seqs_begin->second), __length - __target_position, __comp); - delete[] __chunks; - } // parallel + delete[] __chunks; + } // parallel #if _GLIBCXX_ASSERTIONS - _GLIBCXX_PARALLEL_ASSERT( - __is_sorted(__target, __target + __length, __comp)); + _GLIBCXX_PARALLEL_ASSERT( + __is_sorted(__target, __target + __length, __comp)); #endif - __k = 0; - // Update ends of sequences. - for (_RAIterIterator __raii = __seqs_begin; - __raii != __seqs_end; ++__raii) - { - _DifferenceTp __length = _GLIBCXX_PARALLEL_LENGTH(*__raii); - if(__length > 0) - (*__raii).first += __pieces[__num_threads - 1][__k++].second; - } + __k = 0; + // Update ends of sequences. + for (_RAIterIterator __raii = __seqs_begin; + __raii != __seqs_end; ++__raii) + { + _DifferenceTp __length = _GLIBCXX_PARALLEL_LENGTH(*__raii); + if(__length > 0) + (*__raii).first += __pieces[__num_threads - 1][__k++].second; + } - delete[] __pieces; - delete[] __ne_seqs; + delete[] __pieces; + delete[] __ne_seqs; - return __target + __length; - } + return __target + __length; + } -/** - * @brief Multiway Merge Frontend. - * - * Merge the sequences specified by seqs_begin and __seqs_end into - * __target. __seqs_begin and __seqs_end must point to a sequence of - * pairs. These pairs must contain an iterator to the beginning - * of a sequence in their first entry and an iterator the _M_end of - * the same sequence in their second entry. - * - * Ties are broken arbitrarily. See stable_multiway_merge for a variant - * that breaks ties by sequence number but is slower. - * - * The first entries of the pairs (i.e. the begin iterators) will be moved - * forward. - * - * The output sequence has to provide enough space for all elements - * that are written to it. - * - * This function will merge the input sequences: - * - * - not stable - * - parallel, depending on the input size and Settings - * - using sampling for splitting - * - not using sentinels - * - * Example: - * - * <pre> - * int sequences[10][10]; - * for (int __i = 0; __i < 10; ++__i) - * for (int __j = 0; __i < 10; ++__j) - * sequences[__i][__j] = __j; - * - * int __out[33]; - * std::vector<std::pair<int*> > seqs; - * for (int __i = 0; __i < 10; ++__i) - * { seqs.push(std::make_pair<int*>(sequences[__i], sequences[__i] + 10)) } - * - * multiway_merge(seqs.begin(), seqs.end(), __target, std::less<int>(), 33); - * </pre> - * - * @see stable_multiway_merge - * - * @pre All input sequences must be sorted. - * @pre Target must provide enough space to merge out length elements or - * the number of elements in all sequences, whichever is smaller. - * - * @post [__target, return __value) contains merged __elements from the - * input sequences. - * @post return __value - __target = min(__length, number of elements in all - * sequences). - * - * @param _RAIterPairIterator iterator over sequence - * of pairs of iterators - * @param _RAIterOut iterator over target sequence - * @param _DifferenceTp difference type for the sequence - * @param _Compare strict weak ordering type to compare elements - * in sequences - * - * @param __seqs_begin __begin of sequence __sequence - * @param __seqs_end _M_end of sequence __sequence - * @param __target target sequence to merge to. - * @param __comp strict weak ordering to use for element comparison. - * @param __length Maximum length to merge, possibly larger than the - * number of elements available. - * - * @return _M_end iterator of output sequence - */ -// multiway_merge -// public interface -template< - typename _RAIterPairIterator - , typename _RAIterOut - , typename _DifferenceTp - , typename _Compare> -_RAIterOut -multiway_merge(_RAIterPairIterator __seqs_begin - , _RAIterPairIterator __seqs_end - , _RAIterOut __target - , _DifferenceTp __length, _Compare __comp - , __gnu_parallel::sequential_tag) -{ - typedef _DifferenceTp _DifferenceType; - _GLIBCXX_CALL(__seqs_end - __seqs_begin) - - // catch special case: no sequences - if (__seqs_begin == __seqs_end) - return __target; - - // Execute multiway merge *sequentially*. - return __sequential_multiway_merge - </* __stable = */ false, /* __sentinels = */ false> - (__seqs_begin, __seqs_end, __target, - *(__seqs_begin->second), __length, __comp); -} - -// public interface -template< - typename _RAIterPairIterator - , typename _RAIterOut - , typename _DifferenceTp - , typename _Compare> -_RAIterOut -multiway_merge(_RAIterPairIterator __seqs_begin - , _RAIterPairIterator __seqs_end - , _RAIterOut __target - , _DifferenceTp __length, _Compare __comp - , __gnu_parallel::exact_tag __tag) -{ - typedef _DifferenceTp _DifferenceType; - _GLIBCXX_CALL(__seqs_end - __seqs_begin) + /** + * @brief Multiway Merge Frontend. + * + * Merge the sequences specified by seqs_begin and __seqs_end into + * __target. __seqs_begin and __seqs_end must point to a sequence of + * pairs. These pairs must contain an iterator to the beginning + * of a sequence in their first entry and an iterator the _M_end of + * the same sequence in their second entry. + * + * Ties are broken arbitrarily. See stable_multiway_merge for a variant + * that breaks ties by sequence number but is slower. + * + * The first entries of the pairs (i.e. the begin iterators) will be moved + * forward. + * + * The output sequence has to provide enough space for all elements + * that are written to it. + * + * This function will merge the input sequences: + * + * - not stable + * - parallel, depending on the input size and Settings + * - using sampling for splitting + * - not using sentinels + * + * Example: + * + * <pre> + * int sequences[10][10]; + * for (int __i = 0; __i < 10; ++__i) + * for (int __j = 0; __i < 10; ++__j) + * sequences[__i][__j] = __j; + * + * int __out[33]; + * std::vector<std::pair<int*> > seqs; + * for (int __i = 0; __i < 10; ++__i) + * { seqs.push(std::make_pair<int*>(sequences[__i], + * sequences[__i] + 10)) } + * + * multiway_merge(seqs.begin(), seqs.end(), __target, std::less<int>(), 33); + * </pre> + * + * @see stable_multiway_merge + * + * @pre All input sequences must be sorted. + * @pre Target must provide enough space to merge out length elements or + * the number of elements in all sequences, whichever is smaller. + * + * @post [__target, return __value) contains merged __elements from the + * input sequences. + * @post return __value - __target = min(__length, number of elements in all + * sequences). + * + * @param _RAIterPairIterator iterator over sequence + * of pairs of iterators + * @param _RAIterOut iterator over target sequence + * @param _DifferenceTp difference type for the sequence + * @param _Compare strict weak ordering type to compare elements + * in sequences + * + * @param __seqs_begin __begin of sequence __sequence + * @param __seqs_end _M_end of sequence __sequence + * @param __target target sequence to merge to. + * @param __comp strict weak ordering to use for element comparison. + * @param __length Maximum length to merge, possibly larger than the + * number of elements available. + * + * @return _M_end iterator of output sequence + */ + // multiway_merge + // public interface + template<typename _RAIterPairIterator, + typename _RAIterOut, + typename _DifferenceTp, + typename _Compare> + _RAIterOut + multiway_merge(_RAIterPairIterator __seqs_begin, + _RAIterPairIterator __seqs_end, + _RAIterOut __target, + _DifferenceTp __length, _Compare __comp, + __gnu_parallel::sequential_tag) + { + typedef _DifferenceTp _DifferenceType; + _GLIBCXX_CALL(__seqs_end - __seqs_begin) - // catch special case: no sequences - if (__seqs_begin == __seqs_end) - return __target; + // catch special case: no sequences + if (__seqs_begin == __seqs_end) + return __target; - // Execute merge; maybe parallel, depending on the number of merged - // elements and the number of sequences and global thresholds in - // Settings. - if ((__seqs_end - __seqs_begin > 1) && - _GLIBCXX_PARALLEL_CONDITION( - ((__seqs_end - __seqs_begin) >= - __gnu_parallel::_Settings::get().multiway_merge_minimal_k) - && ((_SequenceIndex)__length >= - __gnu_parallel::_Settings::get().multiway_merge_minimal_n))) - return parallel_multiway_merge - </* __stable = */ false, /* __sentinels = */ false>( - __seqs_begin, __seqs_end, __target, - multiway_merge_exact_splitting</* __stable = */ false, - typename std::iterator_traits<_RAIterPairIterator> - ::value_type*, _Compare, _DifferenceTp>, - static_cast<_DifferenceType>(__length), __comp, - __tag.__get_num_threads()); - else + // Execute multiway merge *sequentially*. return __sequential_multiway_merge - </* __stable = */ false, /* __sentinels = */ false>( - __seqs_begin, __seqs_end, __target, *(__seqs_begin->second), - __length, __comp); -} - -// public interface -template< - typename _RAIterPairIterator - , typename _RAIterOut - , typename _DifferenceTp - , typename _Compare> -_RAIterOut -multiway_merge(_RAIterPairIterator __seqs_begin - , _RAIterPairIterator __seqs_end - , _RAIterOut __target - , _DifferenceTp __length, _Compare __comp - , __gnu_parallel::sampling_tag __tag) -{ - typedef _DifferenceTp _DifferenceType; - _GLIBCXX_CALL(__seqs_end - __seqs_begin) + </* __stable = */ false, /* __sentinels = */ false> + (__seqs_begin, __seqs_end, __target, + *(__seqs_begin->second), __length, __comp); + } - // catch special case: no sequences - if (__seqs_begin == __seqs_end) - return __target; + // public interface + template<typename _RAIterPairIterator, + typename _RAIterOut, + typename _DifferenceTp, + typename _Compare> + _RAIterOut + multiway_merge(_RAIterPairIterator __seqs_begin, + _RAIterPairIterator __seqs_end, + _RAIterOut __target, + _DifferenceTp __length, _Compare __comp, + __gnu_parallel::exact_tag __tag) + { + typedef _DifferenceTp _DifferenceType; + _GLIBCXX_CALL(__seqs_end - __seqs_begin) + + // catch special case: no sequences + if (__seqs_begin == __seqs_end) + return __target; + + // Execute merge; maybe parallel, depending on the number of merged + // elements and the number of sequences and global thresholds in + // Settings. + if ((__seqs_end - __seqs_begin > 1) + && _GLIBCXX_PARALLEL_CONDITION( + ((__seqs_end - __seqs_begin) >= + __gnu_parallel::_Settings::get().multiway_merge_minimal_k) + && ((_SequenceIndex)__length >= + __gnu_parallel::_Settings::get().multiway_merge_minimal_n))) + return parallel_multiway_merge + </* __stable = */ false, /* __sentinels = */ false> + (__seqs_begin, __seqs_end, __target, + multiway_merge_exact_splitting</* __stable = */ false, + typename std::iterator_traits<_RAIterPairIterator> + ::value_type*, _Compare, _DifferenceTp>, + static_cast<_DifferenceType>(__length), __comp, + __tag.__get_num_threads()); + else + return __sequential_multiway_merge + </* __stable = */ false, /* __sentinels = */ false> + (__seqs_begin, __seqs_end, __target, + *(__seqs_begin->second), __length, __comp); + } - // Execute merge; maybe parallel, depending on the number of merged - // elements and the number of sequences and global thresholds in - // Settings. - if ((__seqs_end - __seqs_begin > 1) && - _GLIBCXX_PARALLEL_CONDITION( - ((__seqs_end - __seqs_begin) >= - __gnu_parallel::_Settings::get().multiway_merge_minimal_k) - && ((_SequenceIndex)__length >= - __gnu_parallel::_Settings::get().multiway_merge_minimal_n))) - return parallel_multiway_merge - </* __stable = */ false, /* __sentinels = */ false>( - __seqs_begin, __seqs_end, - __target, - multiway_merge_exact_splitting</* __stable = */ false, - typename std::iterator_traits<_RAIterPairIterator> - ::value_type*, _Compare, _DifferenceTp>, - static_cast<_DifferenceType>(__length), __comp, - __tag.__get_num_threads()); - else - return __sequential_multiway_merge - </* __stable = */ false, /* __sentinels = */ false>( - __seqs_begin, __seqs_end, - __target, *(__seqs_begin->second), __length, __comp); -} - -// public interface -template< - typename _RAIterPairIterator - , typename _RAIterOut - , typename _DifferenceTp - , typename _Compare> -_RAIterOut -multiway_merge(_RAIterPairIterator __seqs_begin - , _RAIterPairIterator __seqs_end - , _RAIterOut __target - , _DifferenceTp __length, _Compare __comp - , parallel_tag __tag = parallel_tag(0)) -{ - return multiway_merge(__seqs_begin, __seqs_end, __target, __length, __comp, - exact_tag(__tag.__get_num_threads())); -} - -// public interface -template< - typename _RAIterPairIterator - , typename _RAIterOut - , typename _DifferenceTp - , typename _Compare> -_RAIterOut -multiway_merge(_RAIterPairIterator __seqs_begin - , _RAIterPairIterator __seqs_end - , _RAIterOut __target - , _DifferenceTp __length, _Compare __comp - , default_parallel_tag __tag) -{ - return multiway_merge(__seqs_begin, __seqs_end, __target, __length, __comp, - exact_tag(__tag.__get_num_threads())); -} - -// stable_multiway_merge -// public interface -template< - typename _RAIterPairIterator - , typename _RAIterOut - , typename _DifferenceTp - , typename _Compare> -_RAIterOut -stable_multiway_merge(_RAIterPairIterator __seqs_begin - , _RAIterPairIterator __seqs_end - , _RAIterOut __target - , _DifferenceTp __length, _Compare __comp - , __gnu_parallel::sequential_tag) -{ - typedef _DifferenceTp _DifferenceType; - _GLIBCXX_CALL(__seqs_end - __seqs_begin) + // public interface + template<typename _RAIterPairIterator, + typename _RAIterOut, + typename _DifferenceTp, + typename _Compare> + _RAIterOut + multiway_merge(_RAIterPairIterator __seqs_begin, + _RAIterPairIterator __seqs_end, + _RAIterOut __target, + _DifferenceTp __length, _Compare __comp, + __gnu_parallel::sampling_tag __tag) + { + typedef _DifferenceTp _DifferenceType; + _GLIBCXX_CALL(__seqs_end - __seqs_begin) + + // catch special case: no sequences + if (__seqs_begin == __seqs_end) + return __target; + + // Execute merge; maybe parallel, depending on the number of merged + // elements and the number of sequences and global thresholds in + // Settings. + if ((__seqs_end - __seqs_begin > 1) + && _GLIBCXX_PARALLEL_CONDITION( + ((__seqs_end - __seqs_begin) >= + __gnu_parallel::_Settings::get().multiway_merge_minimal_k) + && ((_SequenceIndex)__length >= + __gnu_parallel::_Settings::get().multiway_merge_minimal_n))) + return parallel_multiway_merge + </* __stable = */ false, /* __sentinels = */ false> + (__seqs_begin, __seqs_end, __target, + multiway_merge_exact_splitting</* __stable = */ false, + typename std::iterator_traits<_RAIterPairIterator> + ::value_type*, _Compare, _DifferenceTp>, + static_cast<_DifferenceType>(__length), __comp, + __tag.__get_num_threads()); + else + return __sequential_multiway_merge + </* __stable = */ false, /* __sentinels = */ false> + (__seqs_begin, __seqs_end, __target, + *(__seqs_begin->second), __length, __comp); + } - // catch special case: no sequences - if (__seqs_begin == __seqs_end) - return __target; + // public interface + template<typename _RAIterPairIterator, + typename _RAIterOut, + typename _DifferenceTp, + typename _Compare> + _RAIterOut + multiway_merge(_RAIterPairIterator __seqs_begin, + _RAIterPairIterator __seqs_end, + _RAIterOut __target, + _DifferenceTp __length, _Compare __comp, + parallel_tag __tag = parallel_tag(0)) + { return multiway_merge(__seqs_begin, __seqs_end, __target, __length, + __comp, exact_tag(__tag.__get_num_threads())); } + + // public interface + template<typename _RAIterPairIterator, + typename _RAIterOut, + typename _DifferenceTp, + typename _Compare> + _RAIterOut + multiway_merge(_RAIterPairIterator __seqs_begin, + _RAIterPairIterator __seqs_end, + _RAIterOut __target, + _DifferenceTp __length, _Compare __comp, + default_parallel_tag __tag) + { return multiway_merge(__seqs_begin, __seqs_end, __target, __length, + __comp, exact_tag(__tag.__get_num_threads())); } + + // stable_multiway_merge + // public interface + template<typename _RAIterPairIterator, + typename _RAIterOut, + typename _DifferenceTp, + typename _Compare> + _RAIterOut + stable_multiway_merge(_RAIterPairIterator __seqs_begin, + _RAIterPairIterator __seqs_end, + _RAIterOut __target, + _DifferenceTp __length, _Compare __comp, + __gnu_parallel::sequential_tag) + { + typedef _DifferenceTp _DifferenceType; + _GLIBCXX_CALL(__seqs_end - __seqs_begin) - // Execute multiway merge *sequentially*. - return __sequential_multiway_merge - </* __stable = */ true, /* __sentinels = */ false> - (__seqs_begin, __seqs_end, __target, *(__seqs_begin->second), __length, - __comp); -} - -// public interface -template< - typename _RAIterPairIterator - , typename _RAIterOut - , typename _DifferenceTp - , typename _Compare> -_RAIterOut -stable_multiway_merge(_RAIterPairIterator __seqs_begin - , _RAIterPairIterator __seqs_end - , _RAIterOut __target - , _DifferenceTp __length, _Compare __comp - , __gnu_parallel::exact_tag __tag) -{ - typedef _DifferenceTp _DifferenceType; - _GLIBCXX_CALL(__seqs_end - __seqs_begin) + // catch special case: no sequences + if (__seqs_begin == __seqs_end) + return __target; - // catch special case: no sequences - if (__seqs_begin == __seqs_end) - return __target; + // Execute multiway merge *sequentially*. + return __sequential_multiway_merge + </* __stable = */ true, /* __sentinels = */ false> + (__seqs_begin, __seqs_end, __target, + *(__seqs_begin->second), __length, __comp); + } - // Execute merge; maybe parallel, depending on the number of merged - // elements and the number of sequences and global thresholds in - // Settings. - if ((__seqs_end - __seqs_begin > 1) && - _GLIBCXX_PARALLEL_CONDITION( - ((__seqs_end - __seqs_begin) >= - __gnu_parallel::_Settings::get().multiway_merge_minimal_k) - && ((_SequenceIndex)__length >= - __gnu_parallel::_Settings::get().multiway_merge_minimal_n))) - return parallel_multiway_merge - </* __stable = */ true, /* __sentinels = */ false>( - __seqs_begin, __seqs_end, - __target, - multiway_merge_exact_splitting</* __stable = */ true, - typename std::iterator_traits<_RAIterPairIterator> - ::value_type*, _Compare, _DifferenceTp>, - static_cast<_DifferenceType>(__length), __comp, - __tag.__get_num_threads()); - else - return __sequential_multiway_merge</* __stable = */ true, - /* __sentinels = */ false>( - __seqs_begin, __seqs_end, - __target, *(__seqs_begin->second), __length, __comp); -} - -// public interface -template< - typename _RAIterPairIterator - , typename _RAIterOut - , typename _DifferenceTp - , typename _Compare> -_RAIterOut -stable_multiway_merge(_RAIterPairIterator __seqs_begin - , _RAIterPairIterator __seqs_end - , _RAIterOut __target - , _DifferenceTp __length, _Compare __comp - , sampling_tag __tag) -{ - typedef _DifferenceTp _DifferenceType; - _GLIBCXX_CALL(__seqs_end - __seqs_begin) + // public interface + template<typename _RAIterPairIterator, + typename _RAIterOut, + typename _DifferenceTp, + typename _Compare> + _RAIterOut + stable_multiway_merge(_RAIterPairIterator __seqs_begin, + _RAIterPairIterator __seqs_end, + _RAIterOut __target, + _DifferenceTp __length, _Compare __comp, + __gnu_parallel::exact_tag __tag) + { + typedef _DifferenceTp _DifferenceType; + _GLIBCXX_CALL(__seqs_end - __seqs_begin) + + // catch special case: no sequences + if (__seqs_begin == __seqs_end) + return __target; + + // Execute merge; maybe parallel, depending on the number of merged + // elements and the number of sequences and global thresholds in + // Settings. + if ((__seqs_end - __seqs_begin > 1) + && _GLIBCXX_PARALLEL_CONDITION( + ((__seqs_end - __seqs_begin) >= + __gnu_parallel::_Settings::get().multiway_merge_minimal_k) + && ((_SequenceIndex)__length >= + __gnu_parallel::_Settings::get().multiway_merge_minimal_n))) + return parallel_multiway_merge + </* __stable = */ true, /* __sentinels = */ false> + (__seqs_begin, __seqs_end, __target, + multiway_merge_exact_splitting</* __stable = */ true, + typename std::iterator_traits<_RAIterPairIterator> + ::value_type*, _Compare, _DifferenceTp>, + static_cast<_DifferenceType>(__length), __comp, + __tag.__get_num_threads()); + else + return __sequential_multiway_merge + </* __stable = */ true, /* __sentinels = */ false> + (__seqs_begin, __seqs_end, __target, + *(__seqs_begin->second), __length, __comp); + } - // catch special case: no sequences - if (__seqs_begin == __seqs_end) - return __target; + // public interface + template<typename _RAIterPairIterator, + typename _RAIterOut, + typename _DifferenceTp, + typename _Compare> + _RAIterOut + stable_multiway_merge(_RAIterPairIterator __seqs_begin, + _RAIterPairIterator __seqs_end, + _RAIterOut __target, + _DifferenceTp __length, _Compare __comp, + sampling_tag __tag) + { + typedef _DifferenceTp _DifferenceType; + _GLIBCXX_CALL(__seqs_end - __seqs_begin) + + // catch special case: no sequences + if (__seqs_begin == __seqs_end) + return __target; + + // Execute merge; maybe parallel, depending on the number of merged + // elements and the number of sequences and global thresholds in + // Settings. + if ((__seqs_end - __seqs_begin > 1) + && _GLIBCXX_PARALLEL_CONDITION( + ((__seqs_end - __seqs_begin) >= + __gnu_parallel::_Settings::get().multiway_merge_minimal_k) + && ((_SequenceIndex)__length >= + __gnu_parallel::_Settings::get().multiway_merge_minimal_n))) + return parallel_multiway_merge + </* __stable = */ true, /* __sentinels = */ false> + (__seqs_begin, __seqs_end, __target, + multiway_merge_sampling_splitting</* __stable = */ true, + typename std::iterator_traits<_RAIterPairIterator> + ::value_type*, _Compare, _DifferenceTp>, + static_cast<_DifferenceType>(__length), __comp, + __tag.__get_num_threads()); + else + return __sequential_multiway_merge + </* __stable = */ true, /* __sentinels = */ false> + (__seqs_begin, __seqs_end, __target, + *(__seqs_begin->second), __length, __comp); + } - // Execute merge; maybe parallel, depending on the number of merged - // elements and the number of sequences and global thresholds in - // Settings. - if ((__seqs_end - __seqs_begin > 1) && - _GLIBCXX_PARALLEL_CONDITION( - ((__seqs_end - __seqs_begin) >= - __gnu_parallel::_Settings::get().multiway_merge_minimal_k) - && ((_SequenceIndex)__length >= - __gnu_parallel::_Settings::get().multiway_merge_minimal_n))) - return parallel_multiway_merge - </* __stable = */ true, /* __sentinels = */ false>( - __seqs_begin, __seqs_end, - __target, - multiway_merge_sampling_splitting</* __stable = */ true, - typename std::iterator_traits<_RAIterPairIterator> - ::value_type*, _Compare, _DifferenceTp>, - static_cast<_DifferenceType>(__length), __comp, - __tag.__get_num_threads()); - else - return __sequential_multiway_merge - </* __stable = */ true, /* __sentinels = */ false>( - __seqs_begin, __seqs_end, - __target, *(__seqs_begin->second), __length, __comp); -} - - -// public interface -template< - typename _RAIterPairIterator - , typename _RAIterOut - , typename _DifferenceTp - , typename _Compare> -_RAIterOut -stable_multiway_merge(_RAIterPairIterator __seqs_begin - , _RAIterPairIterator __seqs_end - , _RAIterOut __target - , _DifferenceTp __length, _Compare __comp - , parallel_tag __tag = parallel_tag(0)) -{ - return stable_multiway_merge( - __seqs_begin, __seqs_end, __target, __length, __comp, - exact_tag(__tag.__get_num_threads())); -} - -// public interface -template< - typename _RAIterPairIterator - , typename _RAIterOut - , typename _DifferenceTp - , typename _Compare> -_RAIterOut -stable_multiway_merge(_RAIterPairIterator __seqs_begin - , _RAIterPairIterator __seqs_end - , _RAIterOut __target - , _DifferenceTp __length, _Compare __comp - , default_parallel_tag __tag) -{ - return stable_multiway_merge( - __seqs_begin, __seqs_end, __target, __length, __comp, - exact_tag(__tag.__get_num_threads())); -} - -/** - * @brief Multiway Merge Frontend. - * - * Merge the sequences specified by seqs_begin and __seqs_end into - * __target. __seqs_begin and __seqs_end must point to a sequence of - * pairs. These pairs must contain an iterator to the beginning - * of a sequence in their first entry and an iterator the _M_end of - * the same sequence in their second entry. - * - * Ties are broken arbitrarily. See stable_multiway_merge for a variant - * that breaks ties by sequence number but is slower. - * - * The first entries of the pairs (i.e. the begin iterators) will be moved - * forward accordingly. - * - * The output sequence has to provide enough space for all elements - * that are written to it. - * - * This function will merge the input sequences: - * - * - not stable - * - parallel, depending on the input size and Settings - * - using sampling for splitting - * - using sentinels - * - * You have to take care that the element the _M_end iterator points to is - * readable and contains a value that is greater than any other non-sentinel - * value in all sequences. - * - * Example: - * - * <pre> - * int sequences[10][11]; - * for (int __i = 0; __i < 10; ++__i) - * for (int __j = 0; __i < 11; ++__j) - * sequences[__i][__j] = __j; // __last one is sentinel! - * - * int __out[33]; - * std::vector<std::pair<int*> > seqs; - * for (int __i = 0; __i < 10; ++__i) - * { seqs.push(std::make_pair<int*>(sequences[__i], sequences[__i] + 10)) } - * - * multiway_merge(seqs.begin(), seqs.end(), __target, std::less<int>(), 33); - * </pre> - * - * @pre All input sequences must be sorted. - * @pre Target must provide enough space to merge out length elements or - * the number of elements in all sequences, whichever is smaller. - * @pre For each @__c __i, @__c __seqs_begin[__i].second must be the end - * marker of the sequence, but also reference the one more __sentinel - * element. - * - * @post [__target, return __value) contains merged __elements from the - * input sequences. - * @post return __value - __target = min(__length, number of elements in all - * sequences). - * - * @see stable_multiway_merge_sentinels - * - * @param _RAIterPairIterator iterator over sequence - * of pairs of iterators - * @param _RAIterOut iterator over target sequence - * @param _DifferenceTp difference type for the sequence - * @param _Compare strict weak ordering type to compare elements - * in sequences - * - * @param __seqs_begin __begin of sequence __sequence - * @param __seqs_end _M_end of sequence __sequence - * @param __target target sequence to merge to. - * @param __comp strict weak ordering to use for element comparison. - * @param __length Maximum length to merge, possibly larger than the - * number of elements available. - * - * @return _M_end iterator of output sequence - */ -// multiway_merge_sentinels -// public interface -template< - typename _RAIterPairIterator - , typename _RAIterOut - , typename _DifferenceTp - , typename _Compare> -_RAIterOut -multiway_merge_sentinels(_RAIterPairIterator __seqs_begin - , _RAIterPairIterator __seqs_end - , _RAIterOut __target - , _DifferenceTp __length, _Compare __comp - , __gnu_parallel::sequential_tag) -{ - typedef _DifferenceTp _DifferenceType; - _GLIBCXX_CALL(__seqs_end - __seqs_begin) + // public interface + template<typename _RAIterPairIterator, + typename _RAIterOut, + typename _DifferenceTp, + typename _Compare> + _RAIterOut + stable_multiway_merge(_RAIterPairIterator __seqs_begin, + _RAIterPairIterator __seqs_end, + _RAIterOut __target, + _DifferenceTp __length, _Compare __comp, + parallel_tag __tag = parallel_tag(0)) + { + return stable_multiway_merge + (__seqs_begin, __seqs_end, __target, __length, __comp, + exact_tag(__tag.__get_num_threads())); + } - // catch special case: no sequences - if (__seqs_begin == __seqs_end) - return __target; + // public interface + template<typename _RAIterPairIterator, + typename _RAIterOut, + typename _DifferenceTp, + typename _Compare> + _RAIterOut + stable_multiway_merge(_RAIterPairIterator __seqs_begin, + _RAIterPairIterator __seqs_end, + _RAIterOut __target, + _DifferenceTp __length, _Compare __comp, + default_parallel_tag __tag) + { + return stable_multiway_merge + (__seqs_begin, __seqs_end, __target, __length, __comp, + exact_tag(__tag.__get_num_threads())); + } - // Execute multiway merge *sequentially*. - return __sequential_multiway_merge - </* __stable = */ false, /* __sentinels = */ true> - (__seqs_begin, __seqs_end, - __target, *(__seqs_begin->second), __length, __comp); -} - -// public interface -template< - typename _RAIterPairIterator - , typename _RAIterOut - , typename _DifferenceTp - , typename _Compare> -_RAIterOut -multiway_merge_sentinels(_RAIterPairIterator __seqs_begin - , _RAIterPairIterator __seqs_end - , _RAIterOut __target - , _DifferenceTp __length, _Compare __comp - , __gnu_parallel::exact_tag __tag) -{ - typedef _DifferenceTp _DifferenceType; - _GLIBCXX_CALL(__seqs_end - __seqs_begin) + /** + * @brief Multiway Merge Frontend. + * + * Merge the sequences specified by seqs_begin and __seqs_end into + * __target. __seqs_begin and __seqs_end must point to a sequence of + * pairs. These pairs must contain an iterator to the beginning + * of a sequence in their first entry and an iterator the _M_end of + * the same sequence in their second entry. + * + * Ties are broken arbitrarily. See stable_multiway_merge for a variant + * that breaks ties by sequence number but is slower. + * + * The first entries of the pairs (i.e. the begin iterators) will be moved + * forward accordingly. + * + * The output sequence has to provide enough space for all elements + * that are written to it. + * + * This function will merge the input sequences: + * + * - not stable + * - parallel, depending on the input size and Settings + * - using sampling for splitting + * - using sentinels + * + * You have to take care that the element the _M_end iterator points to is + * readable and contains a value that is greater than any other non-sentinel + * value in all sequences. + * + * Example: + * + * <pre> + * int sequences[10][11]; + * for (int __i = 0; __i < 10; ++__i) + * for (int __j = 0; __i < 11; ++__j) + * sequences[__i][__j] = __j; // __last one is sentinel! + * + * int __out[33]; + * std::vector<std::pair<int*> > seqs; + * for (int __i = 0; __i < 10; ++__i) + * { seqs.push(std::make_pair<int*>(sequences[__i], + * sequences[__i] + 10)) } + * + * multiway_merge(seqs.begin(), seqs.end(), __target, std::less<int>(), 33); + * </pre> + * + * @pre All input sequences must be sorted. + * @pre Target must provide enough space to merge out length elements or + * the number of elements in all sequences, whichever is smaller. + * @pre For each @__c __i, @__c __seqs_begin[__i].second must be the end + * marker of the sequence, but also reference the one more __sentinel + * element. + * + * @post [__target, return __value) contains merged __elements from the + * input sequences. + * @post return __value - __target = min(__length, number of elements in all + * sequences). + * + * @see stable_multiway_merge_sentinels + * + * @param _RAIterPairIterator iterator over sequence + * of pairs of iterators + * @param _RAIterOut iterator over target sequence + * @param _DifferenceTp difference type for the sequence + * @param _Compare strict weak ordering type to compare elements + * in sequences + * + * @param __seqs_begin __begin of sequence __sequence + * @param __seqs_end _M_end of sequence __sequence + * @param __target target sequence to merge to. + * @param __comp strict weak ordering to use for element comparison. + * @param __length Maximum length to merge, possibly larger than the + * number of elements available. + * + * @return _M_end iterator of output sequence + */ + // multiway_merge_sentinels + // public interface + template<typename _RAIterPairIterator, + typename _RAIterOut, + typename _DifferenceTp, + typename _Compare> + _RAIterOut + multiway_merge_sentinels(_RAIterPairIterator __seqs_begin, + _RAIterPairIterator __seqs_end, + _RAIterOut __target, + _DifferenceTp __length, _Compare __comp, + __gnu_parallel::sequential_tag) + { + typedef _DifferenceTp _DifferenceType; + _GLIBCXX_CALL(__seqs_end - __seqs_begin) - // catch special case: no sequences - if (__seqs_begin == __seqs_end) - return __target; + // catch special case: no sequences + if (__seqs_begin == __seqs_end) + return __target; - // Execute merge; maybe parallel, depending on the number of merged - // elements and the number of sequences and global thresholds in - // Settings. - if ((__seqs_end - __seqs_begin > 1) && - _GLIBCXX_PARALLEL_CONDITION( - ((__seqs_end - __seqs_begin) >= - __gnu_parallel::_Settings::get().multiway_merge_minimal_k) - && ((_SequenceIndex)__length >= - __gnu_parallel::_Settings::get().multiway_merge_minimal_n))) - return parallel_multiway_merge - </* __stable = */ false, /* __sentinels = */ true>( - __seqs_begin, __seqs_end, - __target, - multiway_merge_exact_splitting</* __stable = */ false, - typename std::iterator_traits<_RAIterPairIterator> - ::value_type*, _Compare, _DifferenceTp>, - static_cast<_DifferenceType>(__length), __comp, - __tag.__get_num_threads()); - else + // Execute multiway merge *sequentially*. return __sequential_multiway_merge - </* __stable = */ false, /* __sentinels = */ true>( - __seqs_begin, __seqs_end, - __target, *(__seqs_begin->second), __length, __comp); -} - -// public interface -template< - typename _RAIterPairIterator - , typename _RAIterOut - , typename _DifferenceTp - , typename _Compare> -_RAIterOut -multiway_merge_sentinels(_RAIterPairIterator __seqs_begin - , _RAIterPairIterator __seqs_end - , _RAIterOut __target - , _DifferenceTp __length, _Compare __comp - , sampling_tag __tag) -{ - typedef _DifferenceTp _DifferenceType; - _GLIBCXX_CALL(__seqs_end - __seqs_begin) + </* __stable = */ false, /* __sentinels = */ true> + (__seqs_begin, __seqs_end, + __target, *(__seqs_begin->second), __length, __comp); + } - // catch special case: no sequences - if (__seqs_begin == __seqs_end) - return __target; + // public interface + template<typename _RAIterPairIterator, + typename _RAIterOut, + typename _DifferenceTp, + typename _Compare> + _RAIterOut + multiway_merge_sentinels(_RAIterPairIterator __seqs_begin, + _RAIterPairIterator __seqs_end, + _RAIterOut __target, + _DifferenceTp __length, _Compare __comp, + __gnu_parallel::exact_tag __tag) + { + typedef _DifferenceTp _DifferenceType; + _GLIBCXX_CALL(__seqs_end - __seqs_begin) + + // catch special case: no sequences + if (__seqs_begin == __seqs_end) + return __target; + + // Execute merge; maybe parallel, depending on the number of merged + // elements and the number of sequences and global thresholds in + // Settings. + if ((__seqs_end - __seqs_begin > 1) + && _GLIBCXX_PARALLEL_CONDITION( + ((__seqs_end - __seqs_begin) >= + __gnu_parallel::_Settings::get().multiway_merge_minimal_k) + && ((_SequenceIndex)__length >= + __gnu_parallel::_Settings::get().multiway_merge_minimal_n))) + return parallel_multiway_merge + </* __stable = */ false, /* __sentinels = */ true> + (__seqs_begin, __seqs_end, __target, + multiway_merge_exact_splitting</* __stable = */ false, + typename std::iterator_traits<_RAIterPairIterator> + ::value_type*, _Compare, _DifferenceTp>, + static_cast<_DifferenceType>(__length), __comp, + __tag.__get_num_threads()); + else + return __sequential_multiway_merge + </* __stable = */ false, /* __sentinels = */ true> + (__seqs_begin, __seqs_end, __target, + *(__seqs_begin->second), __length, __comp); + } - // Execute merge; maybe parallel, depending on the number of merged - // elements and the number of sequences and global thresholds in - // Settings. - if ((__seqs_end - __seqs_begin > 1) && - _GLIBCXX_PARALLEL_CONDITION( - ((__seqs_end - __seqs_begin) >= - __gnu_parallel::_Settings::get().multiway_merge_minimal_k) - && ((_SequenceIndex)__length >= - __gnu_parallel::_Settings::get().multiway_merge_minimal_n))) - return parallel_multiway_merge - </* __stable = */ false, /* __sentinels = */ true> - (__seqs_begin, __seqs_end, __target, - multiway_merge_sampling_splitting</* __stable = */ false, - typename std::iterator_traits<_RAIterPairIterator> - ::value_type*, _Compare, _DifferenceTp>, - static_cast<_DifferenceType>(__length), __comp, - __tag.__get_num_threads()); - else - return __sequential_multiway_merge - </* __stable = */false, /* __sentinels = */ true>( - __seqs_begin, __seqs_end, - __target, *(__seqs_begin->second), __length, __comp); -} - -// public interface -template< - typename _RAIterPairIterator - , typename _RAIterOut - , typename _DifferenceTp - , typename _Compare> -_RAIterOut -multiway_merge_sentinels(_RAIterPairIterator __seqs_begin - , _RAIterPairIterator __seqs_end - , _RAIterOut __target - , _DifferenceTp __length, _Compare __comp - , parallel_tag __tag = parallel_tag(0)) -{ - return multiway_merge_sentinels( - __seqs_begin, __seqs_end, __target, __length, __comp, - exact_tag(__tag.__get_num_threads())); -} - -// public interface -template< - typename _RAIterPairIterator - , typename _RAIterOut - , typename _DifferenceTp - , typename _Compare> -_RAIterOut -multiway_merge_sentinels(_RAIterPairIterator __seqs_begin - , _RAIterPairIterator __seqs_end - , _RAIterOut __target - , _DifferenceTp __length, _Compare __comp - , default_parallel_tag __tag) -{ - return multiway_merge_sentinels( - __seqs_begin, __seqs_end, __target, __length, __comp, - exact_tag(__tag.__get_num_threads())); -} - -// stable_multiway_merge_sentinels -// public interface -template< - typename _RAIterPairIterator - , typename _RAIterOut - , typename _DifferenceTp - , typename _Compare> -_RAIterOut -stable_multiway_merge_sentinels(_RAIterPairIterator __seqs_begin - , _RAIterPairIterator __seqs_end - , _RAIterOut __target - , _DifferenceTp __length, _Compare __comp - , __gnu_parallel::sequential_tag) -{ - typedef _DifferenceTp _DifferenceType; - _GLIBCXX_CALL(__seqs_end - __seqs_begin) + // public interface + template<typename _RAIterPairIterator, + typename _RAIterOut, + typename _DifferenceTp, + typename _Compare> + _RAIterOut + multiway_merge_sentinels(_RAIterPairIterator __seqs_begin, + _RAIterPairIterator __seqs_end, + _RAIterOut __target, + _DifferenceTp __length, _Compare __comp, + sampling_tag __tag) + { + typedef _DifferenceTp _DifferenceType; + _GLIBCXX_CALL(__seqs_end - __seqs_begin) + + // catch special case: no sequences + if (__seqs_begin == __seqs_end) + return __target; + + // Execute merge; maybe parallel, depending on the number of merged + // elements and the number of sequences and global thresholds in + // Settings. + if ((__seqs_end - __seqs_begin > 1) + && _GLIBCXX_PARALLEL_CONDITION( + ((__seqs_end - __seqs_begin) >= + __gnu_parallel::_Settings::get().multiway_merge_minimal_k) + && ((_SequenceIndex)__length >= + __gnu_parallel::_Settings::get().multiway_merge_minimal_n))) + return parallel_multiway_merge + </* __stable = */ false, /* __sentinels = */ true> + (__seqs_begin, __seqs_end, __target, + multiway_merge_sampling_splitting</* __stable = */ false, + typename std::iterator_traits<_RAIterPairIterator> + ::value_type*, _Compare, _DifferenceTp>, + static_cast<_DifferenceType>(__length), __comp, + __tag.__get_num_threads()); + else + return __sequential_multiway_merge + </* __stable = */false, /* __sentinels = */ true>( + __seqs_begin, __seqs_end, __target, + *(__seqs_begin->second), __length, __comp); + } - // catch special case: no sequences - if (__seqs_begin == __seqs_end) - return __target; + // public interface + template<typename _RAIterPairIterator, + typename _RAIterOut, + typename _DifferenceTp, + typename _Compare> + _RAIterOut + multiway_merge_sentinels(_RAIterPairIterator __seqs_begin, + _RAIterPairIterator __seqs_end, + _RAIterOut __target, + _DifferenceTp __length, _Compare __comp, + parallel_tag __tag = parallel_tag(0)) + { + return multiway_merge_sentinels + (__seqs_begin, __seqs_end, __target, __length, __comp, + exact_tag(__tag.__get_num_threads())); + } - // Execute multiway merge *sequentially*. - return __sequential_multiway_merge - </* __stable = */ true, /* __sentinels = */ true> - (__seqs_begin, __seqs_end, __target, *(__seqs_begin->second), __length, - __comp); -} - -// public interface -template< - typename _RAIterPairIterator - , typename _RAIterOut - , typename _DifferenceTp - , typename _Compare> -_RAIterOut -stable_multiway_merge_sentinels(_RAIterPairIterator __seqs_begin - , _RAIterPairIterator __seqs_end - , _RAIterOut __target - , _DifferenceTp __length, _Compare __comp - , __gnu_parallel::exact_tag __tag) -{ - typedef _DifferenceTp _DifferenceType; - _GLIBCXX_CALL(__seqs_end - __seqs_begin) + // public interface + template<typename _RAIterPairIterator, + typename _RAIterOut, + typename _DifferenceTp, + typename _Compare> + _RAIterOut + multiway_merge_sentinels(_RAIterPairIterator __seqs_begin, + _RAIterPairIterator __seqs_end, + _RAIterOut __target, + _DifferenceTp __length, _Compare __comp, + default_parallel_tag __tag) + { + return multiway_merge_sentinels + (__seqs_begin, __seqs_end, __target, __length, __comp, + exact_tag(__tag.__get_num_threads())); + } - // catch special case: no sequences - if (__seqs_begin == __seqs_end) - return __target; + // stable_multiway_merge_sentinels + // public interface + template<typename _RAIterPairIterator, + typename _RAIterOut, + typename _DifferenceTp, + typename _Compare> + _RAIterOut + stable_multiway_merge_sentinels(_RAIterPairIterator __seqs_begin, + _RAIterPairIterator __seqs_end, + _RAIterOut __target, + _DifferenceTp __length, _Compare __comp, + __gnu_parallel::sequential_tag) + { + typedef _DifferenceTp _DifferenceType; + _GLIBCXX_CALL(__seqs_end - __seqs_begin) - // Execute merge; maybe parallel, depending on the number of merged - // elements and the number of sequences and global thresholds in - // Settings. - if ((__seqs_end - __seqs_begin > 1) && - _GLIBCXX_PARALLEL_CONDITION( - ((__seqs_end - __seqs_begin) >= - __gnu_parallel::_Settings::get().multiway_merge_minimal_k) - && ((_SequenceIndex)__length >= - __gnu_parallel::_Settings::get().multiway_merge_minimal_n))) - return parallel_multiway_merge - </* __stable = */ true, /* __sentinels = */ true>( - __seqs_begin, __seqs_end, - __target, - multiway_merge_exact_splitting</* __stable = */ true, - typename std::iterator_traits<_RAIterPairIterator> - ::value_type*, _Compare, _DifferenceTp>, - static_cast<_DifferenceType>(__length), __comp, - __tag.__get_num_threads()); - else - return __sequential_multiway_merge - </* __stable = */ true, /* __sentinels = */ true>( - __seqs_begin, __seqs_end, __target, *(__seqs_begin->second), - __length, __comp); -} - -// public interface -template< - typename _RAIterPairIterator - , typename _RAIterOut - , typename _DifferenceTp - , typename _Compare> -_RAIterOut -stable_multiway_merge_sentinels(_RAIterPairIterator __seqs_begin - , _RAIterPairIterator __seqs_end - , _RAIterOut __target - , _DifferenceTp __length, _Compare __comp - , sampling_tag __tag) -{ - typedef _DifferenceTp _DifferenceType; - _GLIBCXX_CALL(__seqs_end - __seqs_begin) + // catch special case: no sequences + if (__seqs_begin == __seqs_end) + return __target; - // catch special case: no sequences - if (__seqs_begin == __seqs_end) - return __target; + // Execute multiway merge *sequentially*. + return __sequential_multiway_merge + </* __stable = */ true, /* __sentinels = */ true> + (__seqs_begin, __seqs_end, __target, + *(__seqs_begin->second), __length, __comp); + } - // Execute merge; maybe parallel, depending on the number of merged - // elements and the number of sequences and global thresholds in - // Settings. - if ((__seqs_end - __seqs_begin > 1) && - _GLIBCXX_PARALLEL_CONDITION( - ((__seqs_end - __seqs_begin) >= + // public interface + template<typename _RAIterPairIterator, + typename _RAIterOut, + typename _DifferenceTp, + typename _Compare> + _RAIterOut + stable_multiway_merge_sentinels(_RAIterPairIterator __seqs_begin, + _RAIterPairIterator __seqs_end, + _RAIterOut __target, + _DifferenceTp __length, _Compare __comp, + __gnu_parallel::exact_tag __tag) + { + typedef _DifferenceTp _DifferenceType; + _GLIBCXX_CALL(__seqs_end - __seqs_begin) + + // catch special case: no sequences + if (__seqs_begin == __seqs_end) + return __target; + + // Execute merge; maybe parallel, depending on the number of merged + // elements and the number of sequences and global thresholds in + // Settings. + if ((__seqs_end - __seqs_begin > 1) + && _GLIBCXX_PARALLEL_CONDITION( + ((__seqs_end - __seqs_begin) >= __gnu_parallel::_Settings::get().multiway_merge_minimal_k) - && ((_SequenceIndex)__length >= + && ((_SequenceIndex)__length >= __gnu_parallel::_Settings::get().multiway_merge_minimal_n))) - return parallel_multiway_merge - </* __stable = */ true, /* __sentinels = */ true>( - __seqs_begin, __seqs_end, - __target, - multiway_merge_sampling_splitting</* __stable = */ true, - typename std::iterator_traits<_RAIterPairIterator> - ::value_type*, _Compare, _DifferenceTp>, - static_cast<_DifferenceType>(__length), __comp, - __tag.__get_num_threads()); - else - return __sequential_multiway_merge - </* __stable = */ true, /* __sentinels = */ true>( - __seqs_begin, __seqs_end, - __target, *(__seqs_begin->second), __length, __comp); -} - -// public interface -template< - typename _RAIterPairIterator - , typename _RAIterOut - , typename _DifferenceTp - , typename _Compare> -_RAIterOut -stable_multiway_merge_sentinels(_RAIterPairIterator __seqs_begin - , _RAIterPairIterator __seqs_end - , _RAIterOut __target - , _DifferenceTp __length, _Compare __comp - , parallel_tag __tag = parallel_tag(0)) -{ - return stable_multiway_merge_sentinels( - __seqs_begin, __seqs_end, __target, __length, __comp, - exact_tag(__tag.__get_num_threads())); -} - -// public interface -template< - typename _RAIterPairIterator - , typename _RAIterOut - , typename _DifferenceTp - , typename _Compare> -_RAIterOut -stable_multiway_merge_sentinels(_RAIterPairIterator __seqs_begin - , _RAIterPairIterator __seqs_end - , _RAIterOut __target - , _DifferenceTp __length, _Compare __comp - , default_parallel_tag __tag) -{ - return stable_multiway_merge_sentinels( - __seqs_begin, __seqs_end, __target, __length, __comp, - exact_tag(__tag.__get_num_threads())); -} + return parallel_multiway_merge + </* __stable = */ true, /* __sentinels = */ true> + (__seqs_begin, __seqs_end, __target, + multiway_merge_exact_splitting</* __stable = */ true, + typename std::iterator_traits<_RAIterPairIterator> + ::value_type*, _Compare, _DifferenceTp>, + static_cast<_DifferenceType>(__length), __comp, + __tag.__get_num_threads()); + else + return __sequential_multiway_merge + </* __stable = */ true, /* __sentinels = */ true> + (__seqs_begin, __seqs_end, __target, + *(__seqs_begin->second), __length, __comp); + } + + // public interface + template<typename _RAIterPairIterator, + typename _RAIterOut, + typename _DifferenceTp, + typename _Compare> + _RAIterOut + stable_multiway_merge_sentinels(_RAIterPairIterator __seqs_begin, + _RAIterPairIterator __seqs_end, + _RAIterOut __target, + _DifferenceTp __length, + _Compare __comp, + sampling_tag __tag) + { + typedef _DifferenceTp _DifferenceType; + _GLIBCXX_CALL(__seqs_end - __seqs_begin) + + // catch special case: no sequences + if (__seqs_begin == __seqs_end) + return __target; + + // Execute merge; maybe parallel, depending on the number of merged + // elements and the number of sequences and global thresholds in + // Settings. + if ((__seqs_end - __seqs_begin > 1) + && _GLIBCXX_PARALLEL_CONDITION( + ((__seqs_end - __seqs_begin) >= + __gnu_parallel::_Settings::get().multiway_merge_minimal_k) + && ((_SequenceIndex)__length >= + __gnu_parallel::_Settings::get().multiway_merge_minimal_n))) + return parallel_multiway_merge + </* __stable = */ true, /* __sentinels = */ true> + (__seqs_begin, __seqs_end, __target, + multiway_merge_sampling_splitting</* __stable = */ true, + typename std::iterator_traits<_RAIterPairIterator> + ::value_type*, _Compare, _DifferenceTp>, + static_cast<_DifferenceType>(__length), __comp, + __tag.__get_num_threads()); + else + return __sequential_multiway_merge + </* __stable = */ true, /* __sentinels = */ true> + (__seqs_begin, __seqs_end, __target, + *(__seqs_begin->second), __length, __comp); + } + + // public interface + template<typename _RAIterPairIterator, + typename _RAIterOut, + typename _DifferenceTp, + typename _Compare> + _RAIterOut + stable_multiway_merge_sentinels(_RAIterPairIterator __seqs_begin, + _RAIterPairIterator __seqs_end, + _RAIterOut __target, + _DifferenceTp __length, + _Compare __comp, + parallel_tag __tag = parallel_tag(0)) + { + return stable_multiway_merge_sentinels + (__seqs_begin, __seqs_end, __target, __length, __comp, + exact_tag(__tag.__get_num_threads())); + } + // public interface + template<typename _RAIterPairIterator, + typename _RAIterOut, + typename _DifferenceTp, + typename _Compare> + _RAIterOut + stable_multiway_merge_sentinels(_RAIterPairIterator __seqs_begin, + _RAIterPairIterator __seqs_end, + _RAIterOut __target, + _DifferenceTp __length, _Compare __comp, + default_parallel_tag __tag) + { + return stable_multiway_merge_sentinels + (__seqs_begin, __seqs_end, __target, __length, __comp, + exact_tag(__tag.__get_num_threads())); + } }; // namespace __gnu_parallel #endif /* _GLIBCXX_PARALLEL_MULTIWAY_MERGE_H */ diff --git a/libstdc++-v3/include/parallel/multiway_mergesort.h b/libstdc++-v3/include/parallel/multiway_mergesort.h index c7f10ae7511..1f2c43db61f 100644 --- a/libstdc++-v3/include/parallel/multiway_mergesort.h +++ b/libstdc++-v3/include/parallel/multiway_mergesort.h @@ -41,451 +41,434 @@ namespace __gnu_parallel { + /** @brief Subsequence description. */ + template<typename _DifferenceTp> + struct _Piece + { + typedef _DifferenceTp _DifferenceType; -/** @brief Subsequence description. */ -template<typename _DifferenceTp> - struct _Piece - { - typedef _DifferenceTp _DifferenceType; + /** @brief Begin of subsequence. */ + _DifferenceType _M_begin; - /** @brief Begin of subsequence. */ - _DifferenceType _M_begin; + /** @brief End of subsequence. */ + _DifferenceType _M_end; + }; - /** @brief End of subsequence. */ - _DifferenceType _M_end; - }; + /** @brief Data accessed by all threads. + * + * PMWMS = parallel multiway mergesort */ + template<typename _RAIter> + struct _PMWMSSortingData + { + typedef std::iterator_traits<_RAIter> _TraitsType; + typedef typename _TraitsType::value_type _ValueType; + typedef typename _TraitsType::difference_type _DifferenceType; -/** @brief Data accessed by all threads. - * - * PMWMS = parallel multiway mergesort */ -template<typename _RAIter> - struct _PMWMSSortingData - { - typedef std::iterator_traits<_RAIter> _TraitsType; - typedef typename _TraitsType::value_type _ValueType; - typedef typename _TraitsType::difference_type _DifferenceType; - - /** @brief Number of threads involved. */ - _ThreadIndex _M_num_threads; - - /** @brief Input __begin. */ - _RAIter _M_source; - - /** @brief Start indices, per thread. */ - _DifferenceType* _M_starts; - - /** @brief Storage in which to sort. */ - _ValueType** _M_temporary; - - /** @brief Samples. */ - _ValueType* _M_samples; - - /** @brief Offsets to add to the found positions. */ - _DifferenceType* _M_offsets; - - /** @brief Pieces of data to merge @__c [thread][__sequence] */ - std::vector<_Piece<_DifferenceType> >* _M_pieces; -}; - -/** - * @brief Select _M_samples from a sequence. - * @param __sd Pointer to algorithm data. _Result will be placed in - * @__c __sd->_M_samples. - * @param __num_samples Number of _M_samples to select. - */ -template<typename _RAIter, typename _DifferenceTp> - void - __determine_samples(_PMWMSSortingData<_RAIter>* __sd, - _DifferenceTp __num_samples) - { - typedef std::iterator_traits<_RAIter> _TraitsType; - typedef typename _TraitsType::value_type _ValueType; - typedef _DifferenceTp _DifferenceType; - - _ThreadIndex __iam = omp_get_thread_num(); - - _DifferenceType* __es = new _DifferenceType[__num_samples + 2]; - - equally_split(__sd->_M_starts[__iam + 1] - __sd->_M_starts[__iam], - __num_samples + 1, __es); - - for (_DifferenceType __i = 0; __i < __num_samples; ++__i) - ::new(&(__sd->_M_samples[__iam * __num_samples + __i])) - _ValueType(__sd->_M_source[__sd->_M_starts[__iam] + __es[__i + 1]]); - - delete[] __es; - } - -/** @brief Split consistently. */ -template<bool __exact, typename _RAIter, - typename _Compare, typename _SortingPlacesIterator> - struct _SplitConsistently - { - }; + /** @brief Number of threads involved. */ + _ThreadIndex _M_num_threads; -/** @brief Split by exact splitting. */ -template<typename _RAIter, typename _Compare, - typename _SortingPlacesIterator> - struct _SplitConsistently - <true, _RAIter, _Compare, _SortingPlacesIterator> - { - void operator()( - const _ThreadIndex __iam, - _PMWMSSortingData<_RAIter>* __sd, - _Compare& __comp, - const typename - std::iterator_traits<_RAIter>::difference_type - __num_samples) - const - { -# pragma omp barrier - - std::vector<std::pair<_SortingPlacesIterator, _SortingPlacesIterator> > - seqs(__sd->_M_num_threads); - for (_ThreadIndex __s = 0; __s < __sd->_M_num_threads; __s++) - seqs[__s] = std::make_pair(__sd->_M_temporary[__s], - __sd->_M_temporary[__s] - + (__sd->_M_starts[__s + 1] - - __sd->_M_starts[__s])); - - std::vector<_SortingPlacesIterator> _M_offsets(__sd->_M_num_threads); - - // if not last thread - if (__iam < __sd->_M_num_threads - 1) - multiseq_partition(seqs.begin(), seqs.end(), - __sd->_M_starts[__iam + 1], _M_offsets.begin(), - __comp); - - for (int __seq = 0; __seq < __sd->_M_num_threads; __seq++) - { - // for each sequence - if (__iam < (__sd->_M_num_threads - 1)) - __sd->_M_pieces[__iam][__seq]._M_end - = _M_offsets[__seq] - seqs[__seq].first; - else - // very end of this sequence - __sd->_M_pieces[__iam][__seq]._M_end = - __sd->_M_starts[__seq + 1] - __sd->_M_starts[__seq]; - } + /** @brief Input __begin. */ + _RAIter _M_source; -# pragma omp barrier + /** @brief Start indices, per thread. */ + _DifferenceType* _M_starts; - for (_ThreadIndex __seq = 0; __seq < __sd->_M_num_threads; __seq++) - { - // For each sequence. - if (__iam > 0) - __sd->_M_pieces[__iam][__seq]._M_begin = - __sd->_M_pieces[__iam - 1][__seq]._M_end; - else - // Absolute beginning. - __sd->_M_pieces[__iam][__seq]._M_begin = 0; - } - } + /** @brief Storage in which to sort. */ + _ValueType** _M_temporary; + + /** @brief Samples. */ + _ValueType* _M_samples; + + /** @brief Offsets to add to the found positions. */ + _DifferenceType* _M_offsets; + + /** @brief Pieces of data to merge @__c [thread][__sequence] */ + std::vector<_Piece<_DifferenceType> >* _M_pieces; }; -/** @brief Split by sampling. */ -template<typename _RAIter, typename _Compare, - typename _SortingPlacesIterator> - struct _SplitConsistently<false, _RAIter, _Compare, - _SortingPlacesIterator> - { - void operator()( - const _ThreadIndex __iam, - _PMWMSSortingData<_RAIter>* __sd, - _Compare& __comp, - const typename - std::iterator_traits<_RAIter>::difference_type - __num_samples) - const + /** + * @brief Select _M_samples from a sequence. + * @param __sd Pointer to algorithm data. _Result will be placed in + * @__c __sd->_M_samples. + * @param __num_samples Number of _M_samples to select. + */ + template<typename _RAIter, typename _DifferenceTp> + void + __determine_samples(_PMWMSSortingData<_RAIter>* __sd, + _DifferenceTp __num_samples) { typedef std::iterator_traits<_RAIter> _TraitsType; typedef typename _TraitsType::value_type _ValueType; - typedef typename _TraitsType::difference_type _DifferenceType; + typedef _DifferenceTp _DifferenceType; - __determine_samples(__sd, __num_samples); + _ThreadIndex __iam = omp_get_thread_num(); -# pragma omp barrier + _DifferenceType* __es = new _DifferenceType[__num_samples + 2]; -# pragma omp single - __gnu_sequential::sort(__sd->_M_samples, - __sd->_M_samples - + (__num_samples * __sd->_M_num_threads), - __comp); + equally_split(__sd->_M_starts[__iam + 1] - __sd->_M_starts[__iam], + __num_samples + 1, __es); -# pragma omp barrier + for (_DifferenceType __i = 0; __i < __num_samples; ++__i) + ::new(&(__sd->_M_samples[__iam * __num_samples + __i])) + _ValueType(__sd->_M_source[__sd->_M_starts[__iam] + + __es[__i + 1]]); + + delete[] __es; + } - for (_ThreadIndex __s = 0; __s < __sd->_M_num_threads; ++__s) - { - // For each sequence. - if (__num_samples * __iam > 0) - __sd->_M_pieces[__iam][__s]._M_begin = + /** @brief Split consistently. */ + template<bool __exact, typename _RAIter, + typename _Compare, typename _SortingPlacesIterator> + struct _SplitConsistently + { }; + + /** @brief Split by exact splitting. */ + template<typename _RAIter, typename _Compare, + typename _SortingPlacesIterator> + struct _SplitConsistently<true, _RAIter, _Compare, _SortingPlacesIterator> + { + void + operator()(const _ThreadIndex __iam, + _PMWMSSortingData<_RAIter>* __sd, + _Compare& __comp, + const typename + std::iterator_traits<_RAIter>::difference_type + __num_samples) const + { +# pragma omp barrier + + std::vector<std::pair<_SortingPlacesIterator, + _SortingPlacesIterator> > + __seqs(__sd->_M_num_threads); + for (_ThreadIndex __s = 0; __s < __sd->_M_num_threads; __s++) + __seqs[__s] = std::make_pair(__sd->_M_temporary[__s], + __sd->_M_temporary[__s] + + (__sd->_M_starts[__s + 1] + - __sd->_M_starts[__s])); + + std::vector<_SortingPlacesIterator> __offsets(__sd->_M_num_threads); + + // if not last thread + if (__iam < __sd->_M_num_threads - 1) + multiseq_partition(__seqs.begin(), __seqs.end(), + __sd->_M_starts[__iam + 1], __offsets.begin(), + __comp); + + for (int __seq = 0; __seq < __sd->_M_num_threads; __seq++) + { + // for each sequence + if (__iam < (__sd->_M_num_threads - 1)) + __sd->_M_pieces[__iam][__seq]._M_end + = __offsets[__seq] - __seqs[__seq].first; + else + // very end of this sequence + __sd->_M_pieces[__iam][__seq]._M_end = + __sd->_M_starts[__seq + 1] - __sd->_M_starts[__seq]; + } + +# pragma omp barrier + + for (_ThreadIndex __seq = 0; __seq < __sd->_M_num_threads; __seq++) + { + // For each sequence. + if (__iam > 0) + __sd->_M_pieces[__iam][__seq]._M_begin = + __sd->_M_pieces[__iam - 1][__seq]._M_end; + else + // Absolute beginning. + __sd->_M_pieces[__iam][__seq]._M_begin = 0; + } + } + }; + + /** @brief Split by sampling. */ + template<typename _RAIter, typename _Compare, + typename _SortingPlacesIterator> + struct _SplitConsistently<false, _RAIter, _Compare, _SortingPlacesIterator> + { + void + operator()(const _ThreadIndex __iam, + _PMWMSSortingData<_RAIter>* __sd, + _Compare& __comp, + const typename + std::iterator_traits<_RAIter>::difference_type + __num_samples) const + { + typedef std::iterator_traits<_RAIter> _TraitsType; + typedef typename _TraitsType::value_type _ValueType; + typedef typename _TraitsType::difference_type _DifferenceType; + + __determine_samples(__sd, __num_samples); + +# pragma omp barrier + +# pragma omp single + __gnu_sequential::sort(__sd->_M_samples, + __sd->_M_samples + + (__num_samples * __sd->_M_num_threads), + __comp); + +# pragma omp barrier + + for (_ThreadIndex __s = 0; __s < __sd->_M_num_threads; ++__s) + { + // For each sequence. + if (__num_samples * __iam > 0) + __sd->_M_pieces[__iam][__s]._M_begin = std::lower_bound(__sd->_M_temporary[__s], - __sd->_M_temporary[__s] - + (__sd->_M_starts[__s + 1] - __sd->_M_starts[__s]), - __sd->_M_samples[__num_samples * __iam], - __comp) + __sd->_M_temporary[__s] + + (__sd->_M_starts[__s + 1] + - __sd->_M_starts[__s]), + __sd->_M_samples[__num_samples * __iam], + __comp) - __sd->_M_temporary[__s]; - else - // Absolute beginning. - __sd->_M_pieces[__iam][__s]._M_begin = 0; + else + // Absolute beginning. + __sd->_M_pieces[__iam][__s]._M_begin = 0; - if ((__num_samples * (__iam + 1)) < - (__num_samples * __sd->_M_num_threads)) - __sd->_M_pieces[__iam][__s]._M_end = + if ((__num_samples * (__iam + 1)) < + (__num_samples * __sd->_M_num_threads)) + __sd->_M_pieces[__iam][__s]._M_end = std::lower_bound(__sd->_M_temporary[__s], - __sd->_M_temporary[__s] - + (__sd->_M_starts[__s + 1] - __sd->_M_starts[__s]), - __sd->_M_samples[__num_samples * (__iam + 1)], - __comp) + __sd->_M_temporary[__s] + + (__sd->_M_starts[__s + 1] + - __sd->_M_starts[__s]), + __sd->_M_samples[__num_samples * (__iam + 1)], + __comp) - __sd->_M_temporary[__s]; - else - // Absolute end. - __sd->_M_pieces[__iam][__s]._M_end = __sd->_M_starts[__s + 1] - - __sd->_M_starts[__s]; - } - } + else + // Absolute end. + __sd->_M_pieces[__iam][__s]._M_end = (__sd->_M_starts[__s + 1] + - __sd->_M_starts[__s]); + } + } }; -template<bool __stable, typename _RAIter, typename _Compare> - struct __possibly_stable_sort - { - }; + template<bool __stable, typename _RAIter, typename _Compare> + struct __possibly_stable_sort + { }; -template<typename _RAIter, typename _Compare> - struct __possibly_stable_sort<true, _RAIter, _Compare> - { - void operator()(const _RAIter& __begin, - const _RAIter& __end, _Compare& __comp) const + template<typename _RAIter, typename _Compare> + struct __possibly_stable_sort<true, _RAIter, _Compare> { - __gnu_sequential::stable_sort(__begin, __end, __comp); - } - }; + void operator()(const _RAIter& __begin, + const _RAIter& __end, _Compare& __comp) const + { __gnu_sequential::stable_sort(__begin, __end, __comp); } + }; -template<typename _RAIter, typename _Compare> - struct __possibly_stable_sort<false, _RAIter, _Compare> - { - void operator()(const _RAIter __begin, - const _RAIter __end, _Compare& __comp) const + template<typename _RAIter, typename _Compare> + struct __possibly_stable_sort<false, _RAIter, _Compare> { - __gnu_sequential::sort(__begin, __end, __comp); - } - }; - -template<bool __stable, typename Seq_RAIter, - typename _RAIter, typename _Compare, - typename DiffType> - struct __possibly_stable_multiway_merge - { - }; - -template<typename Seq_RAIter, typename _RAIter, - typename _Compare, typename DiffType> - struct __possibly_stable_multiway_merge - <true, Seq_RAIter, _RAIter, _Compare, - DiffType> - { - void operator()(const Seq_RAIter& __seqs_begin, - const Seq_RAIter& __seqs_end, - const _RAIter& __target, - _Compare& __comp, - DiffType __length_am) const + void operator()(const _RAIter __begin, + const _RAIter __end, _Compare& __comp) const + { __gnu_sequential::sort(__begin, __end, __comp); } + }; + + template<bool __stable, typename Seq_RAIter, + typename _RAIter, typename _Compare, + typename DiffType> + struct __possibly_stable_multiway_merge + { }; + + template<typename Seq_RAIter, typename _RAIter, + typename _Compare, typename _DiffType> + struct __possibly_stable_multiway_merge<true, Seq_RAIter, + _RAIter, _Compare, _DiffType> { - stable_multiway_merge(__seqs_begin, __seqs_end, __target, __length_am, - __comp, sequential_tag()); - } - }; - -template<typename Seq_RAIter, typename _RAIter, - typename _Compare, typename DiffType> - struct __possibly_stable_multiway_merge - <false, Seq_RAIter, _RAIter, _Compare, - DiffType> - { - void operator()(const Seq_RAIter& __seqs_begin, + void operator()(const Seq_RAIter& __seqs_begin, + const Seq_RAIter& __seqs_end, + const _RAIter& __target, + _Compare& __comp, + _DiffType __length_am) const + { stable_multiway_merge(__seqs_begin, __seqs_end, __target, + __length_am, __comp, sequential_tag()); } + }; + + template<typename Seq_RAIter, typename _RAIter, + typename _Compare, typename _DiffType> + struct __possibly_stable_multiway_merge<false, Seq_RAIter, + _RAIter, _Compare, _DiffType> + { + void operator()(const Seq_RAIter& __seqs_begin, const Seq_RAIter& __seqs_end, const _RAIter& __target, _Compare& __comp, - DiffType __length_am) const + _DiffType __length_am) const + { multiway_merge(__seqs_begin, __seqs_end, __target, __length_am, + __comp, sequential_tag()); } + }; + + /** @brief PMWMS code executed by each thread. + * @param __sd Pointer to algorithm data. + * @param __comp Comparator. + */ + template<bool __stable, bool __exact, typename _RAIter, + typename _Compare> + void + parallel_sort_mwms_pu(_PMWMSSortingData<_RAIter>* __sd, + _Compare& __comp) { - multiway_merge(__seqs_begin, __seqs_end, __target, __length_am, __comp, - sequential_tag()); - } - }; + typedef std::iterator_traits<_RAIter> _TraitsType; + typedef typename _TraitsType::value_type _ValueType; + typedef typename _TraitsType::difference_type _DifferenceType; + + _ThreadIndex __iam = omp_get_thread_num(); + + // Length of this thread's chunk, before merging. + _DifferenceType __length_local = + __sd->_M_starts[__iam + 1] - __sd->_M_starts[__iam]; -/** @brief PMWMS code executed by each thread. - * @param __sd Pointer to algorithm data. - * @param __comp Comparator. - */ -template<bool __stable, bool __exact, typename _RAIter, - typename _Compare> - void - parallel_sort_mwms_pu(_PMWMSSortingData<_RAIter>* __sd, - _Compare& __comp) - { - typedef std::iterator_traits<_RAIter> _TraitsType; - typedef typename _TraitsType::value_type _ValueType; - typedef typename _TraitsType::difference_type _DifferenceType; - - _ThreadIndex __iam = omp_get_thread_num(); - - // Length of this thread's chunk, before merging. - _DifferenceType __length_local - = __sd->_M_starts[__iam + 1] - __sd->_M_starts[__iam]; - - // Sort in temporary storage, leave space for sentinel. - - typedef _ValueType* _SortingPlacesIterator; - - __sd->_M_temporary[__iam] = - static_cast<_ValueType*>( - ::operator new(sizeof(_ValueType) * (__length_local + 1))); - - // Copy there. - std::uninitialized_copy( - __sd->_M_source + __sd->_M_starts[__iam], - __sd->_M_source + __sd->_M_starts[__iam] + __length_local, - __sd->_M_temporary[__iam]); - - __possibly_stable_sort<__stable, _SortingPlacesIterator, _Compare>() + // Sort in temporary storage, leave space for sentinel. + + typedef _ValueType* _SortingPlacesIterator; + + __sd->_M_temporary[__iam] = + static_cast<_ValueType*>(::operator new(sizeof(_ValueType) + * (__length_local + 1))); + + // Copy there. + std::uninitialized_copy(__sd->_M_source + __sd->_M_starts[__iam], + __sd->_M_source + __sd->_M_starts[__iam] + + __length_local, + __sd->_M_temporary[__iam]); + + __possibly_stable_sort<__stable, _SortingPlacesIterator, _Compare>() (__sd->_M_temporary[__iam], - __sd->_M_temporary[__iam] + __length_local, + __sd->_M_temporary[__iam] + __length_local, __comp); - // Invariant: locally sorted subsequence in sd->_M_temporary[__iam], - // __sd->_M_temporary[__iam] + __length_local. + // Invariant: locally sorted subsequence in sd->_M_temporary[__iam], + // __sd->_M_temporary[__iam] + __length_local. - // No barrier here: Synchronization is done by the splitting routine. + // No barrier here: Synchronization is done by the splitting routine. - _DifferenceType __num_samples = + _DifferenceType __num_samples = _Settings::get().sort_mwms_oversampling * __sd->_M_num_threads - 1; - _SplitConsistently - <__exact, _RAIter, _Compare, _SortingPlacesIterator>() + _SplitConsistently<__exact, _RAIter, _Compare, _SortingPlacesIterator>() (__iam, __sd, __comp, __num_samples); - // Offset from __target __begin, __length after merging. - _DifferenceType __offset = 0, __length_am = 0; - for (_ThreadIndex __s = 0; __s < __sd->_M_num_threads; __s++) - { - __length_am += __sd->_M_pieces[__iam][__s]._M_end - - __sd->_M_pieces[__iam][__s]._M_begin; - __offset += __sd->_M_pieces[__iam][__s]._M_begin; - } - - typedef std::vector< - std::pair<_SortingPlacesIterator, _SortingPlacesIterator> > + // Offset from __target __begin, __length after merging. + _DifferenceType __offset = 0, __length_am = 0; + for (_ThreadIndex __s = 0; __s < __sd->_M_num_threads; __s++) + { + __length_am += (__sd->_M_pieces[__iam][__s]._M_end + - __sd->_M_pieces[__iam][__s]._M_begin); + __offset += __sd->_M_pieces[__iam][__s]._M_begin; + } + + typedef std::vector< + std::pair<_SortingPlacesIterator, _SortingPlacesIterator> > _SeqVector; - _SeqVector seqs(__sd->_M_num_threads); + _SeqVector __seqs(__sd->_M_num_threads); + + for (int __s = 0; __s < __sd->_M_num_threads; ++__s) + { + __seqs[__s] = + std::make_pair(__sd->_M_temporary[__s] + + __sd->_M_pieces[__iam][__s]._M_begin, + __sd->_M_temporary[__s] + + __sd->_M_pieces[__iam][__s]._M_end); + } + + __possibly_stable_multiway_merge< + __stable, typename _SeqVector::iterator, + _RAIter, _Compare, _DifferenceType>()(__seqs.begin(), __seqs.end(), + __sd->_M_source + __offset, __comp, + __length_am); - for (int __s = 0; __s < __sd->_M_num_threads; ++__s) - { - seqs[__s] = - std::make_pair( - __sd->_M_temporary[__s] + __sd->_M_pieces[__iam][__s]._M_begin, - __sd->_M_temporary[__s] + __sd->_M_pieces[__iam][__s]._M_end); - } +# pragma omp barrier + + ::operator delete(__sd->_M_temporary[__iam]); + } - __possibly_stable_multiway_merge< - __stable, - typename _SeqVector::iterator, - _RAIter, - _Compare, _DifferenceType>() - (seqs.begin(), seqs.end(), - __sd->_M_source + __offset, __comp, - __length_am); - -# pragma omp barrier - - ::operator delete(__sd->_M_temporary[__iam]); - } - -/** @brief PMWMS main call. - * @param __begin Begin iterator of sequence. - * @param __end End iterator of sequence. - * @param __comp Comparator. - * @param __n Length of sequence. - * @param __num_threads Number of threads to use. - */ -template<bool __stable, bool __exact, typename _RAIter, + /** @brief PMWMS main call. + * @param __begin Begin iterator of sequence. + * @param __end End iterator of sequence. + * @param __comp Comparator. + * @param __n Length of sequence. + * @param __num_threads Number of threads to use. + */ + template<bool __stable, bool __exact, typename _RAIter, typename _Compare> - void - parallel_sort_mwms(_RAIter __begin, _RAIter __end, - _Compare __comp, - _ThreadIndex __num_threads) - { - _GLIBCXX_CALL(__end - __begin) + void + parallel_sort_mwms(_RAIter __begin, _RAIter __end, + _Compare __comp, + _ThreadIndex __num_threads) + { + _GLIBCXX_CALL(__end - __begin) - typedef std::iterator_traits<_RAIter> _TraitsType; - typedef typename _TraitsType::value_type _ValueType; - typedef typename _TraitsType::difference_type _DifferenceType; + typedef std::iterator_traits<_RAIter> _TraitsType; + typedef typename _TraitsType::value_type _ValueType; + typedef typename _TraitsType::difference_type _DifferenceType; - _DifferenceType __n = __end - __begin; + _DifferenceType __n = __end - __begin; - if (__n <= 1) - return; + if (__n <= 1) + return; - // at least one element per thread - if (__num_threads > __n) - __num_threads = static_cast<_ThreadIndex>(__n); + // at least one element per thread + if (__num_threads > __n) + __num_threads = static_cast<_ThreadIndex>(__n); - // shared variables - _PMWMSSortingData<_RAIter> __sd; - _DifferenceType* _M_starts; + // shared variables + _PMWMSSortingData<_RAIter> __sd; + _DifferenceType* __starts; -# pragma omp parallel num_threads(__num_threads) +# pragma omp parallel num_threads(__num_threads) { __num_threads = omp_get_num_threads(); //no more threads than requested # pragma omp single - { - __sd._M_num_threads = __num_threads; - __sd._M_source = __begin; - - __sd._M_temporary = new _ValueType*[__num_threads]; - - if (!__exact) - { - _DifferenceType __size = - (_Settings::get().sort_mwms_oversampling * __num_threads - 1) - * __num_threads; - __sd._M_samples = static_cast<_ValueType*>( - ::operator new(__size * sizeof(_ValueType))); - } - else - __sd._M_samples = NULL; - - __sd._M_offsets = new _DifferenceType[__num_threads - 1]; - __sd._M_pieces - = new std::vector<_Piece<_DifferenceType> >[__num_threads]; - for (int __s = 0; __s < __num_threads; ++__s) - __sd._M_pieces[__s].resize(__num_threads); - _M_starts = __sd._M_starts - = new _DifferenceType[__num_threads + 1]; - - _DifferenceType __chunk_length = __n / __num_threads; - _DifferenceType __split = __n % __num_threads; - _DifferenceType __pos = 0; - for (int __i = 0; __i < __num_threads; ++__i) - { - _M_starts[__i] = __pos; - __pos += (__i < __split) - ? (__chunk_length + 1) : __chunk_length; - } - _M_starts[__num_threads] = __pos; - } //single + { + __sd._M_num_threads = __num_threads; + __sd._M_source = __begin; + + __sd._M_temporary = new _ValueType*[__num_threads]; + + if (!__exact) + { + _DifferenceType __size = + (_Settings::get().sort_mwms_oversampling * __num_threads - 1) + * __num_threads; + __sd._M_samples = static_cast<_ValueType*> + (::operator new(__size * sizeof(_ValueType))); + } + else + __sd._M_samples = NULL; + + __sd._M_offsets = new _DifferenceType[__num_threads - 1]; + __sd._M_pieces + = new std::vector<_Piece<_DifferenceType> >[__num_threads]; + for (int __s = 0; __s < __num_threads; ++__s) + __sd._M_pieces[__s].resize(__num_threads); + __starts = __sd._M_starts = new _DifferenceType[__num_threads + 1]; + + _DifferenceType __chunk_length = __n / __num_threads; + _DifferenceType __split = __n % __num_threads; + _DifferenceType __pos = 0; + for (int __i = 0; __i < __num_threads; ++__i) + { + __starts[__i] = __pos; + __pos += ((__i < __split) + ? (__chunk_length + 1) : __chunk_length); + } + __starts[__num_threads] = __pos; + } //single // Now sort in parallel. parallel_sort_mwms_pu<__stable, __exact>(&__sd, __comp); } //parallel - delete[] _M_starts; - delete[] __sd._M_temporary; + delete[] __starts; + delete[] __sd._M_temporary; - if (!__exact) - ::operator delete(__sd._M_samples); + if (!__exact) + ::operator delete(__sd._M_samples); + + delete[] __sd._M_offsets; + delete[] __sd._M_pieces; + } - delete[] __sd._M_offsets; - delete[] __sd._M_pieces; - } } //namespace __gnu_parallel #endif /* _GLIBCXX_PARALLEL_MULTIWAY_MERGESORT_H */ diff --git a/libstdc++-v3/include/parallel/numeric b/libstdc++-v3/include/parallel/numeric index dc4c96761c8..6c5607cedc5 100644 --- a/libstdc++-v3/include/parallel/numeric +++ b/libstdc++-v3/include/parallel/numeric @@ -69,7 +69,7 @@ namespace __parallel __accumulate_switch(_IIter __begin, _IIter __end, _Tp __init, _IteratorTag) { return accumulate(__begin, __end, __init, -__gnu_parallel::sequential_tag()); } + __gnu_parallel::sequential_tag()); } template<typename _IIter, typename _Tp, typename _BinaryOperation, typename _IteratorTag> diff --git a/libstdc++-v3/include/parallel/omp_loop.h b/libstdc++-v3/include/parallel/omp_loop.h index 2424bfbdde8..d07fd7a9307 100644 --- a/libstdc++-v3/include/parallel/omp_loop.h +++ b/libstdc++-v3/include/parallel/omp_loop.h @@ -41,74 +41,74 @@ namespace __gnu_parallel { -/** @brief Embarrassingly parallel algorithm for random access - * iterators, using an OpenMP for loop. - * - * @param __begin Begin iterator of element sequence. - * @param __end End iterator of element sequence. - * @param __o User-supplied functor (comparator, predicate, adding - * functor, etc.). - * @param __f Functor to "process" an element with __op (depends on - * desired functionality, e. g. for std::for_each(), ...). - * @param __r Functor to "add" a single __result to the already - * processed elements (depends on functionality). - * @param __base Base value for reduction. - * @param __output Pointer to position where final result is written to - * @param __bound Maximum number of elements processed (e. g. for - * std::count_n()). - * @return User-supplied functor (that may contain a part of the result). - */ -template<typename _RAIter, - typename _Op, - typename _Fu, - typename _Red, - typename _Result> - _Op - __for_each_template_random_access_omp_loop( - _RAIter __begin, _RAIter __end, _Op __o, _Fu& __f, _Red __r, - _Result __base, _Result& __output, - typename std::iterator_traits<_RAIter>::difference_type __bound) - { - typedef typename - std::iterator_traits<_RAIter>::difference_type + /** @brief Embarrassingly parallel algorithm for random access + * iterators, using an OpenMP for loop. + * + * @param __begin Begin iterator of element sequence. + * @param __end End iterator of element sequence. + * @param __o User-supplied functor (comparator, predicate, adding + * functor, etc.). + * @param __f Functor to "process" an element with __op (depends on + * desired functionality, e. g. for std::for_each(), ...). + * @param __r Functor to "add" a single __result to the already + * processed elements (depends on functionality). + * @param __base Base value for reduction. + * @param __output Pointer to position where final result is written to + * @param __bound Maximum number of elements processed (e. g. for + * std::count_n()). + * @return User-supplied functor (that may contain a part of the result). + */ + template<typename _RAIter, + typename _Op, + typename _Fu, + typename _Red, + typename _Result> + _Op + __for_each_template_random_access_omp_loop(_RAIter __begin, _RAIter __end, + _Op __o, _Fu& __f, _Red __r, + _Result __base, + _Result& __output, + typename std::iterator_traits<_RAIter>::difference_type __bound) + { + typedef typename std::iterator_traits<_RAIter>::difference_type _DifferenceType; - _DifferenceType __length = __end - __begin; - _ThreadIndex __num_threads = - __gnu_parallel::min<_DifferenceType>(__get_max_threads(), __length); + _DifferenceType __length = __end - __begin; + _ThreadIndex __num_threads = __gnu_parallel::min<_DifferenceType> + (__get_max_threads(), __length); - _Result *__thread_results; + _Result *__thread_results; -# pragma omp parallel num_threads(__num_threads) +# pragma omp parallel num_threads(__num_threads) { # pragma omp single - { - __num_threads = omp_get_num_threads(); - __thread_results = new _Result[__num_threads]; + { + __num_threads = omp_get_num_threads(); + __thread_results = new _Result[__num_threads]; - for (_ThreadIndex __i = 0; __i < __num_threads; ++__i) - __thread_results[__i] = _Result(); - } + for (_ThreadIndex __i = 0; __i < __num_threads; ++__i) + __thread_results[__i] = _Result(); + } _ThreadIndex __iam = omp_get_thread_num(); #pragma omp for schedule(dynamic, _Settings::get().workstealing_chunk_size) for (_DifferenceType __pos = 0; __pos < __length; ++__pos) - __thread_results[__iam] = - __r(__thread_results[__iam], __f(__o, __begin+__pos)); + __thread_results[__iam] = __r(__thread_results[__iam], + __f(__o, __begin+__pos)); } //parallel - for (_ThreadIndex __i = 0; __i < __num_threads; ++__i) + for (_ThreadIndex __i = 0; __i < __num_threads; ++__i) __output = __r(__output, __thread_results[__i]); - delete [] __thread_results; + delete [] __thread_results; - // Points to last element processed (needed as return value for - // some algorithms like transform). - __f._M_finish_iterator = __begin + __length; + // Points to last element processed (needed as return value for + // some algorithms like transform). + __f._M_finish_iterator = __begin + __length; - return __o; - } + return __o; + } } // end namespace diff --git a/libstdc++-v3/include/parallel/omp_loop_static.h b/libstdc++-v3/include/parallel/omp_loop_static.h index 3d9ed841ac6..f43726e1973 100644 --- a/libstdc++-v3/include/parallel/omp_loop_static.h +++ b/libstdc++-v3/include/parallel/omp_loop_static.h @@ -40,7 +40,6 @@ namespace __gnu_parallel { - /** @brief Embarrassingly parallel algorithm for random access * iterators, using an OpenMP for loop with static scheduling. * @@ -58,37 +57,38 @@ namespace __gnu_parallel * std::count_n()). * @return User-supplied functor (that may contain a part of the result). */ -template<typename _RAIter, - typename _Op, - typename _Fu, - typename _Red, - typename _Result> - _Op - __for_each_template_random_access_omp_loop_static( - _RAIter __begin, _RAIter __end, _Op __o, _Fu& __f, _Red __r, - _Result __base, _Result& __output, - typename std::iterator_traits<_RAIter>::difference_type __bound) - { - typedef typename - std::iterator_traits<_RAIter>::difference_type - _DifferenceType; - - _DifferenceType __length = __end - __begin; - _ThreadIndex __num_threads = - std::min<_DifferenceType>(__get_max_threads(), __length); - - _Result *__thread_results; - -# pragma omp parallel num_threads(__num_threads) + template<typename _RAIter, + typename _Op, + typename _Fu, + typename _Red, + typename _Result> + _Op + __for_each_template_random_access_omp_loop_static(_RAIter __begin, + _RAIter __end, _Op __o, + _Fu& __f, _Red __r, + _Result __base, + _Result& __output, + typename std::iterator_traits<_RAIter>::difference_type __bound) + { + typedef typename std::iterator_traits<_RAIter>::difference_type + _DifferenceType; + + _DifferenceType __length = __end - __begin; + _ThreadIndex __num_threads = std::min<_DifferenceType> + (__get_max_threads(), __length); + + _Result *__thread_results; + +# pragma omp parallel num_threads(__num_threads) { # pragma omp single - { - __num_threads = omp_get_num_threads(); - __thread_results = new _Result[__num_threads]; + { + __num_threads = omp_get_num_threads(); + __thread_results = new _Result[__num_threads]; - for (_ThreadIndex __i = 0; __i < __num_threads; ++__i) - __thread_results[__i] = _Result(); - } + for (_ThreadIndex __i = 0; __i < __num_threads; ++__i) + __thread_results[__i] = _Result(); + } _ThreadIndex __iam = omp_get_thread_num(); @@ -98,17 +98,17 @@ template<typename _RAIter, __f(__o, __begin+__pos)); } //parallel - for (_ThreadIndex __i = 0; __i < __num_threads; ++__i) - __output = __r(__output, __thread_results[__i]); + for (_ThreadIndex __i = 0; __i < __num_threads; ++__i) + __output = __r(__output, __thread_results[__i]); - delete [] __thread_results; + delete [] __thread_results; - // Points to last element processed (needed as return value for - // some algorithms like transform). - __f.finish_iterator = __begin + __length; + // Points to last element processed (needed as return value for + // some algorithms like transform). + __f.finish_iterator = __begin + __length; - return __o; - } + return __o; + } } // end namespace diff --git a/libstdc++-v3/include/parallel/par_loop.h b/libstdc++-v3/include/parallel/par_loop.h index c842364a6fd..1e21d3ad042 100644 --- a/libstdc++-v3/include/parallel/par_loop.h +++ b/libstdc++-v3/include/parallel/par_loop.h @@ -40,94 +40,92 @@ namespace __gnu_parallel { - -/** @brief Embarrassingly parallel algorithm for random access - * iterators, using hand-crafted parallelization by equal splitting - * the work. - * - * @param __begin Begin iterator of element sequence. - * @param __end End iterator of element sequence. - * @param __o User-supplied functor (comparator, predicate, adding - * functor, ...) - * @param __f Functor to "process" an element with __op (depends on - * desired functionality, e. g. for std::for_each(), ...). - * @param __r Functor to "add" a single __result to the already - * processed elements (depends on functionality). - * @param __base Base value for reduction. - * @param __output Pointer to position where final result is written to - * @param __bound Maximum number of elements processed (e. g. for - * std::count_n()). - * @return User-supplied functor (that may contain a part of the result). - */ -template<typename _RAIter, - typename _Op, - typename _Fu, - typename _Red, - typename _Result> - _Op - __for_each_template_random_access_ed( - _RAIter __begin, _RAIter __end, _Op __o, _Fu& __f, _Red __r, - _Result __base, _Result& __output, - typename std::iterator_traits<_RAIter>::difference_type __bound) - { - typedef std::iterator_traits<_RAIter> _TraitsType; - typedef typename _TraitsType::difference_type _DifferenceType; - const _DifferenceType __length = __end - __begin; - _Result *__thread_results; - bool* __constructed; - - _ThreadIndex __num_threads = - __gnu_parallel::min<_DifferenceType>(__get_max_threads(), __length); - -# pragma omp parallel num_threads(__num_threads) + /** @brief Embarrassingly parallel algorithm for random access + * iterators, using hand-crafted parallelization by equal splitting + * the work. + * + * @param __begin Begin iterator of element sequence. + * @param __end End iterator of element sequence. + * @param __o User-supplied functor (comparator, predicate, adding + * functor, ...) + * @param __f Functor to "process" an element with __op (depends on + * desired functionality, e. g. for std::for_each(), ...). + * @param __r Functor to "add" a single __result to the already + * processed elements (depends on functionality). + * @param __base Base value for reduction. + * @param __output Pointer to position where final result is written to + * @param __bound Maximum number of elements processed (e. g. for + * std::count_n()). + * @return User-supplied functor (that may contain a part of the result). + */ + template<typename _RAIter, + typename _Op, + typename _Fu, + typename _Red, + typename _Result> + _Op + __for_each_template_random_access_ed(_RAIter __begin, _RAIter __end, + _Op __o, _Fu& __f, _Red __r, + _Result __base, _Result& __output, + typename std::iterator_traits<_RAIter>::difference_type __bound) + { + typedef std::iterator_traits<_RAIter> _TraitsType; + typedef typename _TraitsType::difference_type _DifferenceType; + const _DifferenceType __length = __end - __begin; + _Result *__thread_results; + bool* __constructed; + + _ThreadIndex __num_threads = __gnu_parallel::min<_DifferenceType> + (__get_max_threads(), __length); + +# pragma omp parallel num_threads(__num_threads) { # pragma omp single - { - __num_threads = omp_get_num_threads(); - __thread_results = - static_cast<_Result*>( - ::operator new(__num_threads * sizeof(_Result))); - __constructed = new bool[__num_threads]; - } - - _ThreadIndex __iam = omp_get_thread_num(); - - // Neutral element. - _Result* __reduct = - static_cast<_Result*>(::operator new(sizeof(_Result))); - - _DifferenceType - __start = equally_split_point(__length, __num_threads, __iam), - __stop = equally_split_point(__length, __num_threads, __iam + 1); - - if (__start < __stop) - { - new(__reduct) _Result(__f(__o, __begin + __start)); - ++__start; - __constructed[__iam] = true; - } - else - __constructed[__iam] = false; - - for (; __start < __stop; ++__start) - *__reduct = __r(*__reduct, __f(__o, __begin + __start)); - - __thread_results[__iam] = *__reduct; + { + __num_threads = omp_get_num_threads(); + __thread_results = static_cast<_Result*> + (::operator new(__num_threads * sizeof(_Result))); + __constructed = new bool[__num_threads]; + } + + _ThreadIndex __iam = omp_get_thread_num(); + + // Neutral element. + _Result* __reduct = static_cast<_Result*> + (::operator new(sizeof(_Result))); + + _DifferenceType + __start = equally_split_point(__length, __num_threads, __iam), + __stop = equally_split_point(__length, __num_threads, __iam + 1); + + if (__start < __stop) + { + new(__reduct) _Result(__f(__o, __begin + __start)); + ++__start; + __constructed[__iam] = true; + } + else + __constructed[__iam] = false; + + for (; __start < __stop; ++__start) + *__reduct = __r(*__reduct, __f(__o, __begin + __start)); + + __thread_results[__iam] = *__reduct; } //parallel - for (_ThreadIndex __i = 0; __i < __num_threads; ++__i) - if (__constructed[__i]) - __output = __r(__output, __thread_results[__i]); + for (_ThreadIndex __i = 0; __i < __num_threads; ++__i) + if (__constructed[__i]) + __output = __r(__output, __thread_results[__i]); - // Points to last element processed (needed as return value for - // some algorithms like transform). - __f._M_finish_iterator = __begin + __length; + // Points to last element processed (needed as return value for + // some algorithms like transform). + __f._M_finish_iterator = __begin + __length; - delete[] __thread_results; - delete[] __constructed; + delete[] __thread_results; + delete[] __constructed; - return __o; - } + return __o; + } } // end namespace diff --git a/libstdc++-v3/include/parallel/partial_sum.h b/libstdc++-v3/include/parallel/partial_sum.h index b121e1ff8c7..02404203f4e 100644 --- a/libstdc++-v3/include/parallel/partial_sum.h +++ b/libstdc++-v3/include/parallel/partial_sum.h @@ -43,114 +43,116 @@ namespace __gnu_parallel { // Problem: there is no 0-element given. -/** @brief Base case prefix sum routine. - * @param __begin Begin iterator of input sequence. - * @param __end End iterator of input sequence. - * @param __result Begin iterator of output sequence. - * @param __bin_op Associative binary function. - * @param __value Start value. Must be passed since the neutral - * element is unknown in general. - * @return End iterator of output sequence. */ -template<typename _IIter, - typename _OutputIterator, - typename _BinaryOperation> - _OutputIterator - __parallel_partial_sum_basecase( - _IIter __begin, _IIter __end, _OutputIterator __result, - _BinaryOperation __bin_op, - typename std::iterator_traits <_IIter>::value_type __value) - { - if (__begin == __end) + /** @brief Base case prefix sum routine. + * @param __begin Begin iterator of input sequence. + * @param __end End iterator of input sequence. + * @param __result Begin iterator of output sequence. + * @param __bin_op Associative binary function. + * @param __value Start value. Must be passed since the neutral + * element is unknown in general. + * @return End iterator of output sequence. */ + template<typename _IIter, + typename _OutputIterator, + typename _BinaryOperation> + _OutputIterator + __parallel_partial_sum_basecase(_IIter __begin, _IIter __end, + _OutputIterator __result, + _BinaryOperation __bin_op, + typename std::iterator_traits <_IIter>::value_type __value) + { + if (__begin == __end) + return __result; + + while (__begin != __end) + { + __value = __bin_op(__value, *__begin); + *__result = __value; + ++__result; + ++__begin; + } return __result; - - while (__begin != __end) - { - __value = __bin_op(__value, *__begin); - *__result = __value; - ++__result; - ++__begin; - } - return __result; - } - -/** @brief Parallel partial sum implementation, two-phase approach, - no recursion. - * @param __begin Begin iterator of input sequence. - * @param __end End iterator of input sequence. - * @param __result Begin iterator of output sequence. - * @param __bin_op Associative binary function. - * @param __n Length of sequence. - * @param __num_threads Number of threads to use. - * @return End iterator of output sequence. - */ -template<typename _IIter, - typename _OutputIterator, - typename _BinaryOperation> - _OutputIterator - __parallel_partial_sum_linear( - _IIter __begin, _IIter __end, _OutputIterator __result, - _BinaryOperation __bin_op, - typename std::iterator_traits<_IIter>::difference_type __n) - { - typedef std::iterator_traits<_IIter> _TraitsType; - typedef typename _TraitsType::value_type _ValueType; - typedef typename _TraitsType::difference_type _DifferenceType; - - if (__begin == __end) - return __result; - - _ThreadIndex __num_threads = + } + + /** @brief Parallel partial sum implementation, two-phase approach, + no recursion. + * @param __begin Begin iterator of input sequence. + * @param __end End iterator of input sequence. + * @param __result Begin iterator of output sequence. + * @param __bin_op Associative binary function. + * @param __n Length of sequence. + * @param __num_threads Number of threads to use. + * @return End iterator of output sequence. + */ + template<typename _IIter, + typename _OutputIterator, + typename _BinaryOperation> + _OutputIterator + __parallel_partial_sum_linear(_IIter __begin, _IIter __end, + _OutputIterator __result, + _BinaryOperation __bin_op, + typename std::iterator_traits<_IIter>::difference_type __n) + { + typedef std::iterator_traits<_IIter> _TraitsType; + typedef typename _TraitsType::value_type _ValueType; + typedef typename _TraitsType::difference_type _DifferenceType; + + if (__begin == __end) + return __result; + + _ThreadIndex __num_threads = std::min<_DifferenceType>(__get_max_threads(), __n - 1); - if (__num_threads < 2) - { - *__result = *__begin; - return __parallel_partial_sum_basecase( - __begin + 1, __end, __result + 1, __bin_op, *__begin); - } + if (__num_threads < 2) + { + *__result = *__begin; + return __parallel_partial_sum_basecase(__begin + 1, __end, + __result + 1, __bin_op, + *__begin); + } - _DifferenceType* __borders; - _ValueType* __sums; + _DifferenceType* __borders; + _ValueType* __sums; - const _Settings& __s = _Settings::get(); + const _Settings& __s = _Settings::get(); -# pragma omp parallel num_threads(__num_threads) +# pragma omp parallel num_threads(__num_threads) { # pragma omp single - { - __num_threads = omp_get_num_threads(); - - __borders = new _DifferenceType[__num_threads + 2]; - - if (__s.partial_sum_dilation == 1.0f) - equally_split(__n, __num_threads + 1, __borders); - else - { - _DifferenceType __chunk_length = - ((double)__n - / ((double)__num_threads + __s.partial_sum_dilation)), - __borderstart = __n - __num_threads * __chunk_length; - __borders[0] = 0; - for (int __i = 1; __i < (__num_threads + 1); ++__i) - { - __borders[__i] = __borderstart; - __borderstart += __chunk_length; - } - __borders[__num_threads + 1] = __n; - } - - __sums = static_cast<_ValueType*>(::operator new(sizeof(_ValueType) + { + __num_threads = omp_get_num_threads(); + + __borders = new _DifferenceType[__num_threads + 2]; + + if (__s.partial_sum_dilation == 1.0f) + equally_split(__n, __num_threads + 1, __borders); + else + { + _DifferenceType __chunk_length = + ((double)__n + / ((double)__num_threads + __s.partial_sum_dilation)), + __borderstart = __n - __num_threads * __chunk_length; + __borders[0] = 0; + for (int __i = 1; __i < (__num_threads + 1); ++__i) + { + __borders[__i] = __borderstart; + __borderstart += __chunk_length; + } + __borders[__num_threads + 1] = __n; + } + + __sums = static_cast<_ValueType*>(::operator new(sizeof(_ValueType) * __num_threads)); - _OutputIterator __target_end; - } //single + _OutputIterator __target_end; + } //single _ThreadIndex __iam = omp_get_thread_num(); if (__iam == 0) { *__result = *__begin; - __parallel_partial_sum_basecase( - __begin + 1, __begin + __borders[1], __result + 1, - __bin_op, *__begin); + __parallel_partial_sum_basecase(__begin + 1, + __begin + __borders[1], + __result + 1, + __bin_op, *__begin); ::new(&(__sums[__iam])) _ValueType(*(__result + __borders[1] - 1)); } else @@ -166,58 +168,57 @@ template<typename _IIter, # pragma omp barrier # pragma omp single - __parallel_partial_sum_basecase(__sums + 1, __sums + __num_threads, - __sums + 1, __bin_op, __sums[0]); + __parallel_partial_sum_basecase(__sums + 1, __sums + __num_threads, + __sums + 1, __bin_op, __sums[0]); # pragma omp barrier - // Still same team. - __parallel_partial_sum_basecase( - __begin + __borders[__iam + 1], - __begin + __borders[__iam + 2], - __result + __borders[__iam + 1], - __bin_op, __sums[__iam]); + // Still same team. + __parallel_partial_sum_basecase(__begin + __borders[__iam + 1], + __begin + __borders[__iam + 2], + __result + __borders[__iam + 1], + __bin_op, __sums[__iam]); } //parallel - ::operator delete(__sums); - delete[] __borders; - - return __result + __n; - } - -/** @brief Parallel partial sum front-__end. - * @param __begin Begin iterator of input sequence. - * @param __end End iterator of input sequence. - * @param __result Begin iterator of output sequence. - * @param __bin_op Associative binary function. - * @return End iterator of output sequence. */ -template<typename _IIter, - typename _OutputIterator, - typename _BinaryOperation> - _OutputIterator - __parallel_partial_sum(_IIter __begin, _IIter __end, - _OutputIterator __result, _BinaryOperation __bin_op) - { - _GLIBCXX_CALL(__begin - __end) - - typedef std::iterator_traits<_IIter> _TraitsType; - typedef typename _TraitsType::value_type _ValueType; - typedef typename _TraitsType::difference_type _DifferenceType; - - _DifferenceType __n = __end - __begin; - - switch (_Settings::get().partial_sum_algorithm) - { - case LINEAR: - // Need an initial offset. - return __parallel_partial_sum_linear( - __begin, __end, __result, __bin_op, __n); - default: - // Partial_sum algorithm not implemented. - _GLIBCXX_PARALLEL_ASSERT(0); - return __result + __n; - } - } + ::operator delete(__sums); + delete[] __borders; + + return __result + __n; + } + + /** @brief Parallel partial sum front-__end. + * @param __begin Begin iterator of input sequence. + * @param __end End iterator of input sequence. + * @param __result Begin iterator of output sequence. + * @param __bin_op Associative binary function. + * @return End iterator of output sequence. */ + template<typename _IIter, + typename _OutputIterator, + typename _BinaryOperation> + _OutputIterator + __parallel_partial_sum(_IIter __begin, _IIter __end, + _OutputIterator __result, _BinaryOperation __bin_op) + { + _GLIBCXX_CALL(__begin - __end) + + typedef std::iterator_traits<_IIter> _TraitsType; + typedef typename _TraitsType::value_type _ValueType; + typedef typename _TraitsType::difference_type _DifferenceType; + + _DifferenceType __n = __end - __begin; + + switch (_Settings::get().partial_sum_algorithm) + { + case LINEAR: + // Need an initial offset. + return __parallel_partial_sum_linear(__begin, __end, __result, + __bin_op, __n); + default: + // Partial_sum algorithm not implemented. + _GLIBCXX_PARALLEL_ASSERT(0); + return __result + __n; + } + } } #endif /* _GLIBCXX_PARALLEL_PARTIAL_SUM_H */ diff --git a/libstdc++-v3/include/parallel/partition.h b/libstdc++-v3/include/parallel/partition.h index 11362939fc8..86723c51270 100644 --- a/libstdc++-v3/include/parallel/partition.h +++ b/libstdc++-v3/include/parallel/partition.h @@ -44,387 +44,391 @@ namespace __gnu_parallel { -/** @brief Parallel implementation of std::partition. - * @param __begin Begin iterator of input sequence to split. - * @param __end End iterator of input sequence to split. - * @param __pred Partition predicate, possibly including some kind of pivot. - * @param __num_threads Maximum number of threads to use for this task. - * @return Number of elements not fulfilling the predicate. */ -template<typename _RAIter, typename _Predicate> - typename std::iterator_traits<_RAIter>::difference_type - __parallel_partition(_RAIter __begin, _RAIter __end, - _Predicate __pred, _ThreadIndex __num_threads) - { - typedef std::iterator_traits<_RAIter> _TraitsType; - typedef typename _TraitsType::value_type _ValueType; - typedef typename _TraitsType::difference_type _DifferenceType; - - _DifferenceType __n = __end - __begin; - - _GLIBCXX_CALL(__n) - - const _Settings& __s = _Settings::get(); - - // Shared. - _GLIBCXX_VOLATILE _DifferenceType __left = 0, __right = __n - 1; - _GLIBCXX_VOLATILE _DifferenceType __leftover_left, __leftover_right; - _GLIBCXX_VOLATILE _DifferenceType __leftnew, __rightnew; - - bool* __reserved_left = NULL, * __reserved_right = NULL; - - _DifferenceType __chunk_size; - - omp_lock_t __result_lock; - omp_init_lock(&__result_lock); - - //at least two chunks per thread - if(__right - __left + 1 >= 2 * __num_threads * __chunk_size) -# pragma omp parallel num_threads(__num_threads) - { -# pragma omp single - { - __num_threads = omp_get_num_threads(); - __reserved_left = new bool[__num_threads]; - __reserved_right = new bool[__num_threads]; - - if (__s.partition_chunk_share > 0.0) - __chunk_size = std::max<_DifferenceType>( - __s.partition_chunk_size, - (double)__n * __s.partition_chunk_share / - (double)__num_threads); - else - __chunk_size = __s.partition_chunk_size; - } - - while (__right - __left + 1 >= 2 * __num_threads * __chunk_size) - { -# pragma omp single - { - _DifferenceType __num_chunks - = (__right - __left + 1) / __chunk_size; - - for (int __r = 0; __r < __num_threads; ++__r) - { - __reserved_left[__r] = false; - __reserved_right[__r] = false; - } - __leftover_left = 0; - __leftover_right = 0; - } //implicit barrier - - // Private. - _DifferenceType __thread_left, __thread_left_border, - __thread_right, __thread_right_border; - __thread_left = __left + 1; - - // Just to satisfy the condition below. - __thread_left_border = __thread_left - 1; - __thread_right = __n - 1; - __thread_right_border = __thread_right + 1; - - bool __iam_finished = false; - while (!__iam_finished) - { - if (__thread_left > __thread_left_border) - { - omp_set_lock(&__result_lock); - if (__left + (__chunk_size - 1) > __right) - __iam_finished = true; - else - { - __thread_left = __left; - __thread_left_border = __left + (__chunk_size - 1); - __left += __chunk_size; - } - omp_unset_lock(&__result_lock); - } - - if (__thread_right < __thread_right_border) - { - omp_set_lock(&__result_lock); - if (__left > __right - (__chunk_size - 1)) - __iam_finished = true; - else - { - __thread_right = __right; - __thread_right_border = __right - (__chunk_size - 1); - __right -= __chunk_size; - } - omp_unset_lock(&__result_lock); - } - - if (__iam_finished) - break; - - // Swap as usual. - while (__thread_left < __thread_right) - { - while (__pred(__begin[__thread_left]) - && __thread_left <= __thread_left_border) - ++__thread_left; - while (!__pred(__begin[__thread_right]) - && __thread_right >= __thread_right_border) - --__thread_right; - - if (__thread_left > __thread_left_border - || __thread_right < __thread_right_border) - // Fetch new chunk(__s). - break; - - std::swap(__begin[__thread_left], __begin[__thread_right]); - ++__thread_left; - --__thread_right; - } - } - - // Now swap the leftover chunks to the right places. - if (__thread_left <= __thread_left_border) -# pragma omp atomic - ++__leftover_left; - if (__thread_right >= __thread_right_border) -# pragma omp atomic - ++__leftover_right; - -# pragma omp barrier - -# pragma omp single - { - __leftnew = __left - __leftover_left * __chunk_size; - __rightnew = __right + __leftover_right * __chunk_size; - } - -# pragma omp barrier - - // <=> __thread_left_border + (__chunk_size - 1) >= __leftnew - if (__thread_left <= __thread_left_border - && __thread_left_border >= __leftnew) - { - // Chunk already in place, reserve spot. - __reserved_left - [(__left - (__thread_left_border + 1)) / __chunk_size] - = true; - } - - // <=> __thread_right_border - (__chunk_size - 1) <= __rightnew - if (__thread_right >= __thread_right_border - && __thread_right_border <= __rightnew) - { - // Chunk already in place, reserve spot. - __reserved_right[((__thread_right_border - 1) - __right) - / __chunk_size] = true; - } - -# pragma omp barrier - - if (__thread_left <= __thread_left_border - && __thread_left_border < __leftnew) - { - // Find spot and swap. - _DifferenceType __swapstart = -1; - omp_set_lock(&__result_lock); - for (int __r = 0; __r < __leftover_left; ++__r) - if (!__reserved_left[__r]) - { - __reserved_left[__r] = true; - __swapstart = __left - (__r + 1) * __chunk_size; - break; - } - omp_unset_lock(&__result_lock); + /** @brief Parallel implementation of std::partition. + * @param __begin Begin iterator of input sequence to split. + * @param __end End iterator of input sequence to split. + * @param __pred Partition predicate, possibly including some kind + * of pivot. + * @param __num_threads Maximum number of threads to use for this task. + * @return Number of elements not fulfilling the predicate. */ + template<typename _RAIter, typename _Predicate> + typename std::iterator_traits<_RAIter>::difference_type + __parallel_partition(_RAIter __begin, _RAIter __end, + _Predicate __pred, _ThreadIndex __num_threads) + { + typedef std::iterator_traits<_RAIter> _TraitsType; + typedef typename _TraitsType::value_type _ValueType; + typedef typename _TraitsType::difference_type _DifferenceType; + + _DifferenceType __n = __end - __begin; + + _GLIBCXX_CALL(__n) + + const _Settings& __s = _Settings::get(); + + // Shared. + _GLIBCXX_VOLATILE _DifferenceType __left = 0, __right = __n - 1; + _GLIBCXX_VOLATILE _DifferenceType __leftover_left, __leftover_right; + _GLIBCXX_VOLATILE _DifferenceType __leftnew, __rightnew; + + bool* __reserved_left = NULL, * __reserved_right = NULL; + + _DifferenceType __chunk_size; + + omp_lock_t __result_lock; + omp_init_lock(&__result_lock); + + //at least two chunks per thread + if (__right - __left + 1 >= 2 * __num_threads * __chunk_size) +# pragma omp parallel num_threads(__num_threads) + { +# pragma omp single + { + __num_threads = omp_get_num_threads(); + __reserved_left = new bool[__num_threads]; + __reserved_right = new bool[__num_threads]; + + if (__s.partition_chunk_share > 0.0) + __chunk_size = std::max<_DifferenceType> + (__s.partition_chunk_size, (double)__n + * __s.partition_chunk_share / (double)__num_threads); + else + __chunk_size = __s.partition_chunk_size; + } + + while (__right - __left + 1 >= 2 * __num_threads * __chunk_size) + { +# pragma omp single + { + _DifferenceType __num_chunks = ((__right - __left + 1) + / __chunk_size); + + for (int __r = 0; __r < __num_threads; ++__r) + { + __reserved_left[__r] = false; + __reserved_right[__r] = false; + } + __leftover_left = 0; + __leftover_right = 0; + } //implicit barrier + + // Private. + _DifferenceType __thread_left, __thread_left_border, + __thread_right, __thread_right_border; + __thread_left = __left + 1; + + // Just to satisfy the condition below. + __thread_left_border = __thread_left - 1; + __thread_right = __n - 1; + __thread_right_border = __thread_right + 1; + + bool __iam_finished = false; + while (!__iam_finished) + { + if (__thread_left > __thread_left_border) + { + omp_set_lock(&__result_lock); + if (__left + (__chunk_size - 1) > __right) + __iam_finished = true; + else + { + __thread_left = __left; + __thread_left_border = __left + (__chunk_size - 1); + __left += __chunk_size; + } + omp_unset_lock(&__result_lock); + } + + if (__thread_right < __thread_right_border) + { + omp_set_lock(&__result_lock); + if (__left > __right - (__chunk_size - 1)) + __iam_finished = true; + else + { + __thread_right = __right; + __thread_right_border = __right - (__chunk_size - 1); + __right -= __chunk_size; + } + omp_unset_lock(&__result_lock); + } + + if (__iam_finished) + break; + + // Swap as usual. + while (__thread_left < __thread_right) + { + while (__pred(__begin[__thread_left]) + && __thread_left <= __thread_left_border) + ++__thread_left; + while (!__pred(__begin[__thread_right]) + && __thread_right >= __thread_right_border) + --__thread_right; + + if (__thread_left > __thread_left_border + || __thread_right < __thread_right_border) + // Fetch new chunk(__s). + break; + + std::swap(__begin[__thread_left], + __begin[__thread_right]); + ++__thread_left; + --__thread_right; + } + } + + // Now swap the leftover chunks to the right places. + if (__thread_left <= __thread_left_border) +# pragma omp atomic + ++__leftover_left; + if (__thread_right >= __thread_right_border) +# pragma omp atomic + ++__leftover_right; + +# pragma omp barrier + +# pragma omp single + { + __leftnew = __left - __leftover_left * __chunk_size; + __rightnew = __right + __leftover_right * __chunk_size; + } + +# pragma omp barrier + + // <=> __thread_left_border + (__chunk_size - 1) >= __leftnew + if (__thread_left <= __thread_left_border + && __thread_left_border >= __leftnew) + { + // Chunk already in place, reserve spot. + __reserved_left[(__left - (__thread_left_border + 1)) + / __chunk_size] = true; + } + + // <=> __thread_right_border - (__chunk_size - 1) <= __rightnew + if (__thread_right >= __thread_right_border + && __thread_right_border <= __rightnew) + { + // Chunk already in place, reserve spot. + __reserved_right[((__thread_right_border - 1) - __right) + / __chunk_size] = true; + } + +# pragma omp barrier + + if (__thread_left <= __thread_left_border + && __thread_left_border < __leftnew) + { + // Find spot and swap. + _DifferenceType __swapstart = -1; + omp_set_lock(&__result_lock); + for (int __r = 0; __r < __leftover_left; ++__r) + if (!__reserved_left[__r]) + { + __reserved_left[__r] = true; + __swapstart = __left - (__r + 1) * __chunk_size; + break; + } + omp_unset_lock(&__result_lock); #if _GLIBCXX_ASSERTIONS - _GLIBCXX_PARALLEL_ASSERT(__swapstart != -1); + _GLIBCXX_PARALLEL_ASSERT(__swapstart != -1); #endif - std::swap_ranges(__begin + __thread_left_border - - (__chunk_size - 1), - __begin + __thread_left_border + 1, - __begin + __swapstart); - } - - if (__thread_right >= __thread_right_border - && __thread_right_border > __rightnew) - { - // Find spot and swap - _DifferenceType __swapstart = -1; - omp_set_lock(&__result_lock); - for (int __r = 0; __r < __leftover_right; ++__r) - if (!__reserved_right[__r]) - { - __reserved_right[__r] = true; - __swapstart = __right + __r * __chunk_size + 1; - break; - } - omp_unset_lock(&__result_lock); + std::swap_ranges(__begin + __thread_left_border + - (__chunk_size - 1), + __begin + __thread_left_border + 1, + __begin + __swapstart); + } + + if (__thread_right >= __thread_right_border + && __thread_right_border > __rightnew) + { + // Find spot and swap + _DifferenceType __swapstart = -1; + omp_set_lock(&__result_lock); + for (int __r = 0; __r < __leftover_right; ++__r) + if (!__reserved_right[__r]) + { + __reserved_right[__r] = true; + __swapstart = __right + __r * __chunk_size + 1; + break; + } + omp_unset_lock(&__result_lock); #if _GLIBCXX_ASSERTIONS - _GLIBCXX_PARALLEL_ASSERT(__swapstart != -1); + _GLIBCXX_PARALLEL_ASSERT(__swapstart != -1); #endif - std::swap_ranges( - __begin + __thread_right_border, - __begin + __thread_right_border + __chunk_size, - __begin + __swapstart); - } + std::swap_ranges(__begin + __thread_right_border, + __begin + __thread_right_border + + __chunk_size, __begin + __swapstart); + } #if _GLIBCXX_ASSERTIONS # pragma omp barrier # pragma omp single - { - for (int __r = 0; __r < __leftover_left; ++__r) - _GLIBCXX_PARALLEL_ASSERT(__reserved_left[__r]); - for (int __r = 0; __r < __leftover_right; ++__r) - _GLIBCXX_PARALLEL_ASSERT(__reserved_right[__r]); - } + { + for (int __r = 0; __r < __leftover_left; ++__r) + _GLIBCXX_PARALLEL_ASSERT(__reserved_left[__r]); + for (int __r = 0; __r < __leftover_right; ++__r) + _GLIBCXX_PARALLEL_ASSERT(__reserved_right[__r]); + } # pragma omp barrier #endif # pragma omp barrier - __left = __leftnew; - __right = __rightnew; - } -# pragma omp flush(__left, __right) - } // end "recursion" //parallel - - _DifferenceType __final_left = __left, __final_right = __right; - - while (__final_left < __final_right) - { - // Go right until key is geq than pivot. - while (__pred(__begin[__final_left]) && __final_left < __final_right) - ++__final_left; - - // Go left until key is less than pivot. - while (!__pred(__begin[__final_right]) && __final_left < __final_right) - --__final_right; - - if (__final_left == __final_right) - break; - std::swap(__begin[__final_left], __begin[__final_right]); - ++__final_left; - --__final_right; - } - - // All elements on the left side are < piv, all elements on the - // right are >= piv - delete[] __reserved_left; - delete[] __reserved_right; - - omp_destroy_lock(&__result_lock); - - // Element "between" __final_left and __final_right might not have - // been regarded yet - if (__final_left < __n && !__pred(__begin[__final_left])) - // Really swapped. - return __final_left; - else - return __final_left + 1; - } - -/** - * @brief Parallel implementation of std::nth_element(). + __left = __leftnew; + __right = __rightnew; + } + +# pragma omp flush(__left, __right) + } // end "recursion" //parallel + + _DifferenceType __final_left = __left, __final_right = __right; + + while (__final_left < __final_right) + { + // Go right until key is geq than pivot. + while (__pred(__begin[__final_left]) + && __final_left < __final_right) + ++__final_left; + + // Go left until key is less than pivot. + while (!__pred(__begin[__final_right]) + && __final_left < __final_right) + --__final_right; + + if (__final_left == __final_right) + break; + std::swap(__begin[__final_left], __begin[__final_right]); + ++__final_left; + --__final_right; + } + + // All elements on the left side are < piv, all elements on the + // right are >= piv + delete[] __reserved_left; + delete[] __reserved_right; + + omp_destroy_lock(&__result_lock); + + // Element "between" __final_left and __final_right might not have + // been regarded yet + if (__final_left < __n && !__pred(__begin[__final_left])) + // Really swapped. + return __final_left; + else + return __final_left + 1; + } + + /** + * @brief Parallel implementation of std::nth_element(). + * @param __begin Begin iterator of input sequence. + * @param __nth _Iterator of element that must be in position afterwards. + * @param __end End iterator of input sequence. + * @param __comp Comparator. + */ + template<typename _RAIter, typename _Compare> + void + __parallel_nth_element(_RAIter __begin, _RAIter __nth, + _RAIter __end, _Compare __comp) + { + typedef std::iterator_traits<_RAIter> _TraitsType; + typedef typename _TraitsType::value_type _ValueType; + typedef typename _TraitsType::difference_type _DifferenceType; + + _GLIBCXX_CALL(__end - __begin) + + _RAIter __split; + _RandomNumber __rng; + + _DifferenceType __minimum_length = + std::max<_DifferenceType>(2, _Settings::get().partition_minimal_n); + + // Break if input range to small. + while (static_cast<_SequenceIndex>(__end - __begin) >= __minimum_length) + { + _DifferenceType __n = __end - __begin; + + _RAIter __pivot_pos = __begin + __rng(__n); + + // Swap __pivot_pos value to end. + if (__pivot_pos != (__end - 1)) + std::swap(*__pivot_pos, *(__end - 1)); + __pivot_pos = __end - 1; + + // _Compare must have first_value_type, second_value_type, + // result_type + // _Compare == + // __gnu_parallel::_Lexicographic<S, int, + // __gnu_parallel::_Less<S, S> > + // __pivot_pos == std::pair<S, int>* + __gnu_parallel::binder2nd<_Compare, _ValueType, _ValueType, bool> + __pred(__comp, *__pivot_pos); + + // Divide, leave pivot unchanged in last place. + _RAIter __split_pos1, __split_pos2; + __split_pos1 = __begin + __parallel_partition(__begin, __end - 1, + __pred, + __get_max_threads()); + + // Left side: < __pivot_pos; __right side: >= __pivot_pos + + // Swap pivot back to middle. + if (__split_pos1 != __pivot_pos) + std::swap(*__split_pos1, *__pivot_pos); + __pivot_pos = __split_pos1; + + // In case all elements are equal, __split_pos1 == 0 + if ((__split_pos1 + 1 - __begin) < (__n >> 7) + || (__end - __split_pos1) < (__n >> 7)) + { + // Very unequal split, one part smaller than one 128th + // elements not strictly larger than the pivot. + __gnu_parallel::__unary_negate<__gnu_parallel:: + __binder1st<_Compare, _ValueType, + _ValueType, bool>, _ValueType> + __pred(__gnu_parallel::__binder1st<_Compare, _ValueType, + _ValueType, bool>(__comp, *__pivot_pos)); + + // Find other end of pivot-equal range. + __split_pos2 = __gnu_sequential::partition(__split_pos1 + 1, + __end, __pred); + } + else + // Only skip the pivot. + __split_pos2 = __split_pos1 + 1; + + // Compare iterators. + if (__split_pos2 <= __nth) + __begin = __split_pos2; + else if (__nth < __split_pos1) + __end = __split_pos1; + else + break; + } + + // Only at most _Settings::partition_minimal_n __elements __left. + __gnu_sequential::sort(__begin, __end, __comp); + } + + /** @brief Parallel implementation of std::partial_sort(). * @param __begin Begin iterator of input sequence. - * @param __nth _Iterator of element that must be in position afterwards. + * @param __middle Sort until this position. * @param __end End iterator of input sequence. - * @param __comp Comparator. - */ -template<typename _RAIter, typename _Compare> - void - __parallel_nth_element(_RAIter __begin, _RAIter __nth, - _RAIter __end, _Compare __comp) - { - typedef std::iterator_traits<_RAIter> _TraitsType; - typedef typename _TraitsType::value_type _ValueType; - typedef typename _TraitsType::difference_type _DifferenceType; - - _GLIBCXX_CALL(__end - __begin) - - _RAIter __split; - _RandomNumber __rng; - - _DifferenceType __minimum_length = - std::max<_DifferenceType>(2, _Settings::get().partition_minimal_n); - - // Break if input range to small. - while (static_cast<_SequenceIndex>(__end - __begin) >= __minimum_length) - { - _DifferenceType __n = __end - __begin; - - _RAIter __pivot_pos = __begin + __rng(__n); - - // Swap __pivot_pos value to end. - if (__pivot_pos != (__end - 1)) - std::swap(*__pivot_pos, *(__end - 1)); - __pivot_pos = __end - 1; - - // _Compare must have first_value_type, second_value_type, - // result_type - // _Compare == - // __gnu_parallel::_Lexicographic<S, int, __gnu_parallel::_Less<S, S> > - // __pivot_pos == std::pair<S, int>* - __gnu_parallel::binder2nd<_Compare, _ValueType, _ValueType, bool> - __pred(__comp, *__pivot_pos); - - // Divide, leave pivot unchanged in last place. - _RAIter __split_pos1, __split_pos2; - __split_pos1 = __begin - + __parallel_partition(__begin, __end - 1, __pred, - __get_max_threads()); - - // Left side: < __pivot_pos; __right side: >= __pivot_pos - - // Swap pivot back to middle. - if (__split_pos1 != __pivot_pos) - std::swap(*__split_pos1, *__pivot_pos); - __pivot_pos = __split_pos1; - - // In case all elements are equal, __split_pos1 == 0 - if ((__split_pos1 + 1 - __begin) < (__n >> 7) - || (__end - __split_pos1) < (__n >> 7)) - { - // Very unequal split, one part smaller than one 128th - // elements not strictly larger than the pivot. - __gnu_parallel::__unary_negate<__gnu_parallel:: - __binder1st<_Compare, _ValueType, _ValueType, bool>, _ValueType> - __pred(__gnu_parallel::__binder1st<_Compare, _ValueType, - _ValueType, bool>(__comp, *__pivot_pos)); - - // Find other end of pivot-equal range. - __split_pos2 = __gnu_sequential::partition(__split_pos1 + 1, - __end, __pred); - } - else - // Only skip the pivot. - __split_pos2 = __split_pos1 + 1; - - // Compare iterators. - if (__split_pos2 <= __nth) - __begin = __split_pos2; - else if (__nth < __split_pos1) - __end = __split_pos1; - else - break; - } - - // Only at most _Settings::partition_minimal_n __elements __left. - __gnu_sequential::sort(__begin, __end, __comp); - } - -/** @brief Parallel implementation of std::partial_sort(). -* @param __begin Begin iterator of input sequence. -* @param __middle Sort until this position. -* @param __end End iterator of input sequence. -* @param __comp Comparator. */ -template<typename _RAIter, typename _Compare> - void - __parallel_partial_sort(_RAIter __begin, - _RAIter __middle, - _RAIter __end, _Compare __comp) - { - __parallel_nth_element(__begin, __middle, __end, __comp); - std::sort(__begin, __middle, __comp); - } + * @param __comp Comparator. */ + template<typename _RAIter, typename _Compare> + void + __parallel_partial_sort(_RAIter __begin, + _RAIter __middle, + _RAIter __end, _Compare __comp) + { + __parallel_nth_element(__begin, __middle, __end, __comp); + std::sort(__begin, __middle, __comp); + } } //namespace __gnu_parallel diff --git a/libstdc++-v3/include/parallel/queue.h b/libstdc++-v3/include/parallel/queue.h index 348bb1ac900..ff5deb09383 100644 --- a/libstdc++-v3/include/parallel/queue.h +++ b/libstdc++-v3/include/parallel/queue.h @@ -65,10 +65,10 @@ namespace __gnu_parallel public: /** @brief Constructor. Not to be called concurrent, of course. * @param _M_max_size Maximal number of elements to be contained. */ - _RestrictedBoundedConcurrentQueue(_SequenceIndex _M_max_size) + _RestrictedBoundedConcurrentQueue(_SequenceIndex __max_size) { - this->_M_max_size = _M_max_size; - _M_base = new _Tp[_M_max_size]; + _M_max_size = __max_size; + _M_base = new _Tp[__max_size]; _M_borders = __encode2(0, 0); #pragma omp flush } @@ -105,12 +105,12 @@ namespace __gnu_parallel while (__former_front > __former_back) { // Chance. - _CASable - __former_borders = __encode2(__former_front, __former_back); - _CASable - __new_borders = __encode2(__former_front - 1, __former_back); - if (__compare_and_swap( - &_M_borders, __former_borders, __new_borders)) + _CASable __former_borders = __encode2(__former_front, + __former_back); + _CASable __new_borders = __encode2(__former_front - 1, + __former_back); + if (__compare_and_swap(&_M_borders, __former_borders, + __new_borders)) { __t = *(_M_base + (__former_front - 1) % _M_max_size); return true; @@ -132,12 +132,12 @@ namespace __gnu_parallel while (__former_front > __former_back) { // Chance. - _CASable - __former_borders = __encode2(__former_front, __former_back); - _CASable - __new_borders = __encode2(__former_front, __former_back + 1); - if (__compare_and_swap( - &_M_borders, __former_borders, __new_borders)) + _CASable __former_borders = __encode2(__former_front, + __former_back); + _CASable __new_borders = __encode2(__former_front, + __former_back + 1); + if (__compare_and_swap(&_M_borders, __former_borders, + __new_borders)) { __t = *(_M_base + __former_back % _M_max_size); return true; diff --git a/libstdc++-v3/include/parallel/quicksort.h b/libstdc++-v3/include/parallel/quicksort.h index 1ed46b4a77f..508c3c1763c 100644 --- a/libstdc++-v3/include/parallel/quicksort.h +++ b/libstdc++-v3/include/parallel/quicksort.h @@ -48,13 +48,12 @@ namespace __gnu_parallel */ template<typename _RAIter, typename _Compare> typename std::iterator_traits<_RAIter>::difference_type - __parallel_sort_qs_divide(_RAIter __begin, - _RAIter __end, - _Compare __comp, typename std::iterator_traits - <_RAIter>::difference_type __pivot_rank, - typename std::iterator_traits - <_RAIter>::difference_type - __num_samples, _ThreadIndex __num_threads) + __parallel_sort_qs_divide(_RAIter __begin, _RAIter __end, + _Compare __comp, typename std::iterator_traits + <_RAIter>::difference_type __pivot_rank, + typename std::iterator_traits + <_RAIter>::difference_type + __num_samples, _ThreadIndex __num_threads) { typedef std::iterator_traits<_RAIter> _TraitsType; typedef typename _TraitsType::value_type _ValueType; @@ -64,25 +63,24 @@ namespace __gnu_parallel __num_samples = std::min(__num_samples, __n); // Allocate uninitialized, to avoid default constructor. - _ValueType* __samples = - static_cast<_ValueType*>(::operator new(__num_samples - * sizeof(_ValueType))); + _ValueType* __samples = static_cast<_ValueType*> + (::operator new(__num_samples * sizeof(_ValueType))); for (_DifferenceType __s = 0; __s < __num_samples; ++__s) { - const unsigned long long __index - = static_cast<unsigned long long>(__s) * __n / __num_samples; + const unsigned long long __index = static_cast<unsigned long long> + (__s) * __n / __num_samples; ::new(&(__samples[__s])) _ValueType(__begin[__index]); } __gnu_sequential::sort(__samples, __samples + __num_samples, __comp); - _ValueType& pivot = __samples[__pivot_rank * __num_samples / __n]; + _ValueType& __pivot = __samples[__pivot_rank * __num_samples / __n]; __gnu_parallel::binder2nd<_Compare, _ValueType, _ValueType, bool> - __pred(__comp, pivot); - _DifferenceType __split = - __parallel_partition(__begin, __end, __pred, __num_threads); + __pred(__comp, __pivot); + _DifferenceType __split = __parallel_partition(__begin, __end, + __pred, __num_threads); ::operator delete(__samples); @@ -98,10 +96,9 @@ namespace __gnu_parallel */ template<typename _RAIter, typename _Compare> void - __parallel_sort_qs_conquer(_RAIter __begin, - _RAIter __end, - _Compare __comp, - _ThreadIndex __num_threads) + __parallel_sort_qs_conquer(_RAIter __begin, _RAIter __end, + _Compare __comp, + _ThreadIndex __num_threads) { typedef std::iterator_traits<_RAIter> _TraitsType; typedef typename _TraitsType::value_type _ValueType; @@ -127,24 +124,22 @@ namespace __gnu_parallel __pivot_rank = __n * __num_threads_left / __num_threads; - _DifferenceType __split = - __parallel_sort_qs_divide(__begin, __end, __comp, __pivot_rank, - _Settings::get().sort_qs_num_samples_preset, - __num_threads); + _DifferenceType __split = __parallel_sort_qs_divide + (__begin, __end, __comp, __pivot_rank, + _Settings::get().sort_qs_num_samples_preset, __num_threads); #pragma omp parallel sections num_threads(2) { #pragma omp section __parallel_sort_qs_conquer(__begin, __begin + __split, - __comp, __num_threads_left); + __comp, __num_threads_left); #pragma omp section __parallel_sort_qs_conquer(__begin + __split, __end, - __comp, __num_threads - __num_threads_left); + __comp, __num_threads - __num_threads_left); } } - /** @brief Unbalanced quicksort main call. * @param __begin Begin iterator of input sequence. * @param __end End iterator input sequence, ignored. @@ -154,10 +149,9 @@ namespace __gnu_parallel */ template<typename _RAIter, typename _Compare> void - __parallel_sort_qs(_RAIter __begin, - _RAIter __end, - _Compare __comp, - _ThreadIndex __num_threads) + __parallel_sort_qs(_RAIter __begin, _RAIter __end, + _Compare __comp, + _ThreadIndex __num_threads) { _GLIBCXX_CALL(__n) diff --git a/libstdc++-v3/include/parallel/random_shuffle.h b/libstdc++-v3/include/parallel/random_shuffle.h index a74b9ac82ac..137414f20f9 100644 --- a/libstdc++-v3/include/parallel/random_shuffle.h +++ b/libstdc++-v3/include/parallel/random_shuffle.h @@ -39,492 +39,484 @@ namespace __gnu_parallel { -/** @brief Type to hold the index of a bin. - * - * Since many variables of this type are allocated, it should be - * chosen as small as possible. - */ -typedef unsigned short _BinIndex; - -/** @brief Data known to every thread participating in - __gnu_parallel::__parallel_random_shuffle(). */ -template<typename _RAIter> - struct _DRandomShufflingGlobalData - { - typedef std::iterator_traits<_RAIter> _TraitsType; - typedef typename _TraitsType::value_type _ValueType; - typedef typename _TraitsType::difference_type _DifferenceType; - - /** @brief Begin iterator of the __source. */ - _RAIter& _M_source; - - /** @brief Temporary arrays for each thread. */ - _ValueType** _M_temporaries; - - /** @brief Two-dimensional array to hold the thread-bin distribution. - * - * Dimensions (_M_num_threads + 1) __x (_M_num_bins + 1). */ - _DifferenceType** _M_dist; - - /** @brief Start indexes of the threads' __chunks. */ - _DifferenceType* _M_starts; - - /** @brief Number of the thread that will further process the - corresponding bin. */ - _ThreadIndex* _M_bin_proc; - - /** @brief Number of bins to distribute to. */ - int _M_num_bins; - - /** @brief Number of bits needed to address the bins. */ - int _M_num_bits; - - /** @brief Constructor. */ - _DRandomShufflingGlobalData(_RAIter& __source) - : _M_source(__source) { } - }; - -/** @brief Local data for a thread participating in - __gnu_parallel::__parallel_random_shuffle(). - */ -template<typename _RAIter, typename _RandomNumberGenerator> - struct _DRSSorterPU - { - /** @brief Number of threads participating in total. */ - int _M_num_threads; - - /** @brief Begin index for bins taken care of by this thread. */ - _BinIndex _M_bins_begin; - - /** @brief End index for bins taken care of by this thread. */ - _BinIndex __bins_end; - - /** @brief Random _M_seed for this thread. */ - uint32_t _M_seed; - - /** @brief Pointer to global data. */ - _DRandomShufflingGlobalData<_RAIter>* _M_sd; - }; - -/** @brief Generate a random number in @__c [0,2^logp). - * @param logp Logarithm (basis 2) of the upper range __bound. - * @param __rng Random number generator to use. - */ -template<typename _RandomNumberGenerator> - inline int - __random_number_pow2(int logp, _RandomNumberGenerator& __rng) - { return __rng.__genrand_bits(logp); } - -/** @brief Random shuffle code executed by each thread. - * @param __pus Array of thread-local data records. */ -template<typename _RAIter, typename _RandomNumberGenerator> - void - __parallel_random_shuffle_drs_pu(_DRSSorterPU<_RAIter, - _RandomNumberGenerator>* __pus) - { - typedef std::iterator_traits<_RAIter> _TraitsType; - typedef typename _TraitsType::value_type _ValueType; - typedef typename _TraitsType::difference_type _DifferenceType; - - _ThreadIndex __iam = omp_get_thread_num(); - _DRSSorterPU<_RAIter, _RandomNumberGenerator>* d = &__pus[__iam]; - _DRandomShufflingGlobalData<_RAIter>* _M_sd = d->_M_sd; - - // Indexing: _M_dist[bin][processor] - _DifferenceType __length = _M_sd->_M_starts[__iam + 1] - - _M_sd->_M_starts[__iam]; - _BinIndex* __oracles = new _BinIndex[__length]; - _DifferenceType* _M_dist = new _DifferenceType[_M_sd->_M_num_bins + 1]; - _BinIndex* _M_bin_proc = new _BinIndex[_M_sd->_M_num_bins]; - _ValueType** _M_temporaries = new _ValueType*[d->_M_num_threads]; - - // Compute oracles and count appearances. - for (_BinIndex __b = 0; __b < _M_sd->_M_num_bins + 1; ++__b) - _M_dist[__b] = 0; - int _M_num_bits = _M_sd->_M_num_bits; - - _RandomNumber __rng(d->_M_seed); - - // First main loop. - for (_DifferenceType __i = 0; __i < __length; ++__i) - { - _BinIndex __oracle = __random_number_pow2(_M_num_bits, __rng); - __oracles[__i] = __oracle; + /** @brief Type to hold the index of a bin. + * + * Since many variables of this type are allocated, it should be + * chosen as small as possible. + */ + typedef unsigned short _BinIndex; + + /** @brief Data known to every thread participating in + __gnu_parallel::__parallel_random_shuffle(). */ + template<typename _RAIter> + struct _DRandomShufflingGlobalData + { + typedef std::iterator_traits<_RAIter> _TraitsType; + typedef typename _TraitsType::value_type _ValueType; + typedef typename _TraitsType::difference_type _DifferenceType; - // To allow prefix (partial) sum. - ++(_M_dist[__oracle + 1]); - } + /** @brief Begin iterator of the __source. */ + _RAIter& _M_source; - for (_BinIndex __b = 0; __b < _M_sd->_M_num_bins + 1; ++__b) - _M_sd->_M_dist[__b][__iam + 1] = _M_dist[__b]; + /** @brief Temporary arrays for each thread. */ + _ValueType** _M_temporaries; -# pragma omp barrier + /** @brief Two-dimensional array to hold the thread-bin distribution. + * + * Dimensions (_M_num_threads + 1) __x (_M_num_bins + 1). */ + _DifferenceType** _M_dist; -# pragma omp single - { - // Sum up bins, _M_sd->_M_dist[__s + 1][d->_M_num_threads] now contains - // the total number of items in bin __s - for (_BinIndex __s = 0; __s < _M_sd->_M_num_bins; ++__s) - __gnu_sequential::partial_sum( - _M_sd->_M_dist[__s + 1], - _M_sd->_M_dist[__s + 1] + d->_M_num_threads + 1, - _M_sd->_M_dist[__s + 1]); - } + /** @brief Start indexes of the threads' __chunks. */ + _DifferenceType* _M_starts; -# pragma omp barrier + /** @brief Number of the thread that will further process the + corresponding bin. */ + _ThreadIndex* _M_bin_proc; - _SequenceIndex __offset = 0, __global_offset = 0; - for (_BinIndex __s = 0; __s < d->_M_bins_begin; ++__s) - __global_offset += _M_sd->_M_dist[__s + 1][d->_M_num_threads]; + /** @brief Number of bins to distribute to. */ + int _M_num_bins; -# pragma omp barrier + /** @brief Number of bits needed to address the bins. */ + int _M_num_bits; - for (_BinIndex __s = d->_M_bins_begin; __s < d->__bins_end; ++__s) - { - for (int __t = 0; __t < d->_M_num_threads + 1; ++__t) - _M_sd->_M_dist[__s + 1][__t] += __offset; - __offset = _M_sd->_M_dist[__s + 1][d->_M_num_threads]; - } + /** @brief Constructor. */ + _DRandomShufflingGlobalData(_RAIter& __source) + : _M_source(__source) { } + }; + + /** @brief Local data for a thread participating in + __gnu_parallel::__parallel_random_shuffle(). + */ + template<typename _RAIter, typename _RandomNumberGenerator> + struct _DRSSorterPU + { + /** @brief Number of threads participating in total. */ + int _M_num_threads; + + /** @brief Begin index for bins taken care of by this thread. */ + _BinIndex _M_bins_begin; + + /** @brief End index for bins taken care of by this thread. */ + _BinIndex __bins_end; + + /** @brief Random _M_seed for this thread. */ + uint32_t _M_seed; + + /** @brief Pointer to global data. */ + _DRandomShufflingGlobalData<_RAIter>* _M_sd; + }; + + /** @brief Generate a random number in @__c [0,2^__logp). + * @param __logp Logarithm (basis 2) of the upper range __bound. + * @param __rng Random number generator to use. + */ + template<typename _RandomNumberGenerator> + inline int + __random_number_pow2(int __logp, _RandomNumberGenerator& __rng) + { return __rng.__genrand_bits(__logp); } + + /** @brief Random shuffle code executed by each thread. + * @param __pus Array of thread-local data records. */ + template<typename _RAIter, typename _RandomNumberGenerator> + void + __parallel_random_shuffle_drs_pu(_DRSSorterPU<_RAIter, + _RandomNumberGenerator>* __pus) + { + typedef std::iterator_traits<_RAIter> _TraitsType; + typedef typename _TraitsType::value_type _ValueType; + typedef typename _TraitsType::difference_type _DifferenceType; - _M_sd->_M_temporaries[__iam] = static_cast<_ValueType*>( - ::operator new(sizeof(_ValueType) * __offset)); + _ThreadIndex __iam = omp_get_thread_num(); + _DRSSorterPU<_RAIter, _RandomNumberGenerator>* __d = &__pus[__iam]; + _DRandomShufflingGlobalData<_RAIter>* __sd = __d->_M_sd; -# pragma omp barrier + // Indexing: _M_dist[bin][processor] + _DifferenceType __length = (__sd->_M_starts[__iam + 1] + - __sd->_M_starts[__iam]); + _BinIndex* __oracles = new _BinIndex[__length]; + _DifferenceType* __dist = new _DifferenceType[__sd->_M_num_bins + 1]; + _BinIndex* __bin_proc = new _BinIndex[__sd->_M_num_bins]; + _ValueType** __temporaries = new _ValueType*[__d->_M_num_threads]; - // Draw local copies to avoid false sharing. - for (_BinIndex __b = 0; __b < _M_sd->_M_num_bins + 1; ++__b) - _M_dist[__b] = _M_sd->_M_dist[__b][__iam]; - for (_BinIndex __b = 0; __b < _M_sd->_M_num_bins; ++__b) - _M_bin_proc[__b] = _M_sd->_M_bin_proc[__b]; - for (_ThreadIndex __t = 0; __t < d->_M_num_threads; ++__t) - _M_temporaries[__t] = _M_sd->_M_temporaries[__t]; + // Compute oracles and count appearances. + for (_BinIndex __b = 0; __b < __sd->_M_num_bins + 1; ++__b) + __dist[__b] = 0; + int __num_bits = __sd->_M_num_bits; - _RAIter _M_source = _M_sd->_M_source; - _DifferenceType __start = _M_sd->_M_starts[__iam]; + _RandomNumber __rng(__d->_M_seed); - // Distribute according to oracles, second main loop. - for (_DifferenceType __i = 0; __i < __length; ++__i) - { - _BinIndex target_bin = __oracles[__i]; - _ThreadIndex target_p = _M_bin_proc[target_bin]; + // First main loop. + for (_DifferenceType __i = 0; __i < __length; ++__i) + { + _BinIndex __oracle = __random_number_pow2(__num_bits, __rng); + __oracles[__i] = __oracle; - // Last column [d->_M_num_threads] stays unchanged. - ::new(&(_M_temporaries[target_p][_M_dist[target_bin + 1]++])) - _ValueType(*(_M_source + __i + __start)); - } + // To allow prefix (partial) sum. + ++(__dist[__oracle + 1]); + } - delete[] __oracles; - delete[] _M_dist; - delete[] _M_bin_proc; - delete[] _M_temporaries; + for (_BinIndex __b = 0; __b < __sd->_M_num_bins + 1; ++__b) + __sd->_M_dist[__b][__iam + 1] = __dist[__b]; -# pragma omp barrier +# pragma omp barrier - // Shuffle bins internally. - for (_BinIndex __b = d->_M_bins_begin; __b < d->__bins_end; ++__b) +# pragma omp single { - _ValueType* __begin = - _M_sd->_M_temporaries[__iam] + - ((__b == d->_M_bins_begin) - ? 0 : _M_sd->_M_dist[__b][d->_M_num_threads]), - * __end = - _M_sd->_M_temporaries[__iam] + - _M_sd->_M_dist[__b + 1][d->_M_num_threads]; - __sequential_random_shuffle(__begin, __end, __rng); - std::copy(__begin, __end, _M_sd->_M_source + __global_offset + - ((__b == d->_M_bins_begin) - ? 0 : _M_sd->_M_dist[__b][d->_M_num_threads])); + // Sum up bins, __sd->_M_dist[__s + 1][__d->_M_num_threads] now + // contains the total number of items in bin __s + for (_BinIndex __s = 0; __s < __sd->_M_num_bins; ++__s) + __gnu_sequential::partial_sum(__sd->_M_dist[__s + 1], + __sd->_M_dist[__s + 1] + + __d->_M_num_threads + 1, + __sd->_M_dist[__s + 1]); } - ::operator delete(_M_sd->_M_temporaries[__iam]); - } - -/** @brief Round up to the next greater power of 2. - * @param __x _Integer to round up */ -template<typename _Tp> - _Tp - __round_up_to_pow2(_Tp __x) - { - if (__x <= 1) - return 1; - else - return (_Tp)1 << (__rd_log2(__x - 1) + 1); - } - -/** @brief Main parallel random shuffle step. - * @param __begin Begin iterator of sequence. - * @param __end End iterator of sequence. - * @param __n Length of sequence. - * @param __num_threads Number of threads to use. - * @param __rng Random number generator to use. - */ -template<typename _RAIter, typename _RandomNumberGenerator> - void - __parallel_random_shuffle_drs(_RAIter __begin, - _RAIter __end, - typename std::iterator_traits - <_RAIter>::difference_type __n, - _ThreadIndex __num_threads, - _RandomNumberGenerator& __rng) - { - typedef std::iterator_traits<_RAIter> _TraitsType; - typedef typename _TraitsType::value_type _ValueType; - typedef typename _TraitsType::difference_type _DifferenceType; - - _GLIBCXX_CALL(__n) - - const _Settings& __s = _Settings::get(); - - if (__num_threads > __n) - __num_threads = static_cast<_ThreadIndex>(__n); - - _BinIndex _M_num_bins, __num_bins_cache; +# pragma omp barrier + + _SequenceIndex __offset = 0, __global_offset = 0; + for (_BinIndex __s = 0; __s < __d->_M_bins_begin; ++__s) + __global_offset += __sd->_M_dist[__s + 1][__d->_M_num_threads]; + +# pragma omp barrier + + for (_BinIndex __s = __d->_M_bins_begin; __s < __d->__bins_end; ++__s) + { + for (int __t = 0; __t < __d->_M_num_threads + 1; ++__t) + __sd->_M_dist[__s + 1][__t] += __offset; + __offset = __sd->_M_dist[__s + 1][__d->_M_num_threads]; + } + + __sd->_M_temporaries[__iam] = static_cast<_ValueType*> + (::operator new(sizeof(_ValueType) * __offset)); + +# pragma omp barrier + + // Draw local copies to avoid false sharing. + for (_BinIndex __b = 0; __b < __sd->_M_num_bins + 1; ++__b) + __dist[__b] = __sd->_M_dist[__b][__iam]; + for (_BinIndex __b = 0; __b < __sd->_M_num_bins; ++__b) + __bin_proc[__b] = __sd->_M_bin_proc[__b]; + for (_ThreadIndex __t = 0; __t < __d->_M_num_threads; ++__t) + __temporaries[__t] = __sd->_M_temporaries[__t]; + + _RAIter __source = __sd->_M_source; + _DifferenceType __start = __sd->_M_starts[__iam]; + + // Distribute according to oracles, second main loop. + for (_DifferenceType __i = 0; __i < __length; ++__i) + { + _BinIndex __target_bin = __oracles[__i]; + _ThreadIndex __target_p = __bin_proc[__target_bin]; + + // Last column [__d->_M_num_threads] stays unchanged. + ::new(&(__temporaries[__target_p][__dist[__target_bin + 1]++])) + _ValueType(*(__source + __i + __start)); + } + + delete[] __oracles; + delete[] __dist; + delete[] __bin_proc; + delete[] __temporaries; + +# pragma omp barrier + + // Shuffle bins internally. + for (_BinIndex __b = __d->_M_bins_begin; __b < __d->__bins_end; ++__b) + { + _ValueType* __begin = + (__sd->_M_temporaries[__iam] + + (__b == __d->_M_bins_begin + ? 0 : __sd->_M_dist[__b][__d->_M_num_threads])), + * __end = (__sd->_M_temporaries[__iam] + + __sd->_M_dist[__b + 1][__d->_M_num_threads]); + + __sequential_random_shuffle(__begin, __end, __rng); + std::copy(__begin, __end, __sd->_M_source + __global_offset + + (__b == __d->_M_bins_begin + ? 0 : __sd->_M_dist[__b][__d->_M_num_threads])); + } + + ::operator delete(__sd->_M_temporaries[__iam]); + } + + /** @brief Round up to the next greater power of 2. + * @param __x _Integer to round up */ + template<typename _Tp> + _Tp + __round_up_to_pow2(_Tp __x) + { + if (__x <= 1) + return 1; + else + return (_Tp)1 << (__rd_log2(__x - 1) + 1); + } + + /** @brief Main parallel random shuffle step. + * @param __begin Begin iterator of sequence. + * @param __end End iterator of sequence. + * @param __n Length of sequence. + * @param __num_threads Number of threads to use. + * @param __rng Random number generator to use. + */ + template<typename _RAIter, typename _RandomNumberGenerator> + void + __parallel_random_shuffle_drs(_RAIter __begin, _RAIter __end, + typename std::iterator_traits + <_RAIter>::difference_type __n, + _ThreadIndex __num_threads, + _RandomNumberGenerator& __rng) + { + typedef std::iterator_traits<_RAIter> _TraitsType; + typedef typename _TraitsType::value_type _ValueType; + typedef typename _TraitsType::difference_type _DifferenceType; + + _GLIBCXX_CALL(__n) + + const _Settings& __s = _Settings::get(); + + if (__num_threads > __n) + __num_threads = static_cast<_ThreadIndex>(__n); + + _BinIndex __num_bins, __num_bins_cache; #if _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_L1 - // Try the L1 cache first. + // Try the L1 cache first. - // Must fit into L1. - __num_bins_cache = std::max<_DifferenceType>( - 1, __n / (__s.L1_cache_size_lb / sizeof(_ValueType))); - __num_bins_cache = __round_up_to_pow2(__num_bins_cache); + // Must fit into L1. + __num_bins_cache = + std::max<_DifferenceType>(1, __n / (__s.L1_cache_size_lb + / sizeof(_ValueType))); + __num_bins_cache = __round_up_to_pow2(__num_bins_cache); - // No more buckets than TLB entries, power of 2 - // Power of 2 and at least one element per bin, at most the TLB size. - _M_num_bins = std::min<_DifferenceType>(__n, __num_bins_cache); + // No more buckets than TLB entries, power of 2 + // Power of 2 and at least one element per bin, at most the TLB size. + __num_bins = std::min<_DifferenceType>(__n, __num_bins_cache); #if _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_TLB - // 2 TLB entries needed per bin. - _M_num_bins = std::min<_DifferenceType>(__s.TLB_size / 2, _M_num_bins); + // 2 TLB entries needed per bin. + __num_bins = std::min<_DifferenceType>(__s.TLB_size / 2, __num_bins); #endif - _M_num_bins = __round_up_to_pow2(_M_num_bins); + __num_bins = __round_up_to_pow2(__num_bins); - if (_M_num_bins < __num_bins_cache) - { + if (__num_bins < __num_bins_cache) + { #endif - // Now try the L2 cache - // Must fit into L2 - __num_bins_cache = static_cast<_BinIndex>(std::max<_DifferenceType>( - 1, __n / (__s.L2_cache_size / sizeof(_ValueType)))); - __num_bins_cache = __round_up_to_pow2(__num_bins_cache); - - // No more buckets than TLB entries, power of 2. - _M_num_bins = static_cast<_BinIndex>( - std::min(__n, static_cast<_DifferenceType>(__num_bins_cache))); - // Power of 2 and at least one element per bin, at most the TLB size. + // Now try the L2 cache + // Must fit into L2 + __num_bins_cache = static_cast<_BinIndex> + (std::max<_DifferenceType>(1, __n / (__s.L2_cache_size + / sizeof(_ValueType)))); + __num_bins_cache = __round_up_to_pow2(__num_bins_cache); + + // No more buckets than TLB entries, power of 2. + __num_bins = static_cast<_BinIndex> + (std::min(__n, static_cast<_DifferenceType>(__num_bins_cache))); + // Power of 2 and at least one element per bin, at most the TLB size. #if _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_TLB - // 2 TLB entries needed per bin. - _M_num_bins = std::min( - static_cast<_DifferenceType>(__s.TLB_size / 2), _M_num_bins); + // 2 TLB entries needed per bin. + __num_bins = std::min(static_cast<_DifferenceType>(__s.TLB_size / 2), + __num_bins); #endif - _M_num_bins = __round_up_to_pow2(_M_num_bins); + __num_bins = __round_up_to_pow2(__num_bins); #if _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_L1 - } + } #endif - __num_threads = std::min<_BinIndex>(__num_threads, _M_num_bins); + __num_threads = std::min<_BinIndex>(__num_threads, __num_bins); - if (__num_threads <= 1) - return __sequential_random_shuffle(__begin, __end, __rng); + if (__num_threads <= 1) + return __sequential_random_shuffle(__begin, __end, __rng); - _DRandomShufflingGlobalData<_RAIter> _M_sd(__begin); - _DRSSorterPU<_RAIter, _RandomNumber >* __pus; - _DifferenceType* _M_starts; + _DRandomShufflingGlobalData<_RAIter> __sd(__begin); + _DRSSorterPU<_RAIter, _RandomNumber >* __pus; + _DifferenceType* __starts; -# pragma omp parallel num_threads(__num_threads) +# pragma omp parallel num_threads(__num_threads) { - _ThreadIndex __num_threads = omp_get_num_threads(); + _ThreadIndex __num_threads = omp_get_num_threads(); # pragma omp single - { - __pus = new _DRSSorterPU<_RAIter, _RandomNumber> - [__num_threads]; - - _M_sd._M_temporaries = new _ValueType*[__num_threads]; - _M_sd._M_dist = new _DifferenceType*[_M_num_bins + 1]; - _M_sd._M_bin_proc = new _ThreadIndex[_M_num_bins]; - for (_BinIndex __b = 0; __b < _M_num_bins + 1; ++__b) - _M_sd._M_dist[__b] = new _DifferenceType[__num_threads + 1]; - for (_BinIndex __b = 0; __b < (_M_num_bins + 1); ++__b) - { - _M_sd._M_dist[0][0] = 0; - _M_sd._M_dist[__b][0] = 0; - } - _M_starts = _M_sd._M_starts - = new _DifferenceType[__num_threads + 1]; - int bin_cursor = 0; - _M_sd._M_num_bins = _M_num_bins; - _M_sd._M_num_bits = __rd_log2(_M_num_bins); - - _DifferenceType __chunk_length = __n / __num_threads, - __split = __n % __num_threads, __start = 0; - _DifferenceType bin_chunk_length = _M_num_bins / __num_threads, - bin_split = _M_num_bins % __num_threads; - for (_ThreadIndex __i = 0; __i < __num_threads; ++__i) - { - _M_starts[__i] = __start; - __start += (__i < __split) - ? (__chunk_length + 1) : __chunk_length; - int __j = __pus[__i]._M_bins_begin = bin_cursor; - - // Range of bins for this processor. - bin_cursor += (__i < bin_split) ? - (bin_chunk_length + 1) : bin_chunk_length; - __pus[__i].__bins_end = bin_cursor; - for (; __j < bin_cursor; ++__j) - _M_sd._M_bin_proc[__j] = __i; - __pus[__i]._M_num_threads = __num_threads; - __pus[__i]._M_seed = - __rng(std::numeric_limits<uint32_t>::max()); - __pus[__i]._M_sd = &_M_sd; - } - _M_starts[__num_threads] = __start; - } //single - // Now shuffle in parallel. - __parallel_random_shuffle_drs_pu(__pus); + { + __pus = new _DRSSorterPU<_RAIter, _RandomNumber>[__num_threads]; + + __sd._M_temporaries = new _ValueType*[__num_threads]; + __sd._M_dist = new _DifferenceType*[__num_bins + 1]; + __sd._M_bin_proc = new _ThreadIndex[__num_bins]; + for (_BinIndex __b = 0; __b < __num_bins + 1; ++__b) + __sd._M_dist[__b] = new _DifferenceType[__num_threads + 1]; + for (_BinIndex __b = 0; __b < (__num_bins + 1); ++__b) + { + __sd._M_dist[0][0] = 0; + __sd._M_dist[__b][0] = 0; + } + __starts = __sd._M_starts = new _DifferenceType[__num_threads + 1]; + int __bin_cursor = 0; + __sd._M_num_bins = __num_bins; + __sd._M_num_bits = __rd_log2(__num_bins); + + _DifferenceType __chunk_length = __n / __num_threads, + __split = __n % __num_threads, + __start = 0; + _DifferenceType __bin_chunk_length = __num_bins / __num_threads, + __bin_split = __num_bins % __num_threads; + for (_ThreadIndex __i = 0; __i < __num_threads; ++__i) + { + __starts[__i] = __start; + __start += (__i < __split + ? (__chunk_length + 1) : __chunk_length); + int __j = __pus[__i]._M_bins_begin = __bin_cursor; + + // Range of bins for this processor. + __bin_cursor += (__i < __bin_split + ? (__bin_chunk_length + 1) + : __bin_chunk_length); + __pus[__i].__bins_end = __bin_cursor; + for (; __j < __bin_cursor; ++__j) + __sd._M_bin_proc[__j] = __i; + __pus[__i]._M_num_threads = __num_threads; + __pus[__i]._M_seed = __rng(std::numeric_limits<uint32_t>::max()); + __pus[__i]._M_sd = &__sd; + } + __starts[__num_threads] = __start; + } //single + // Now shuffle in parallel. + __parallel_random_shuffle_drs_pu(__pus); } // parallel - delete[] _M_starts; - delete[] _M_sd._M_bin_proc; - for (int __s = 0; __s < (_M_num_bins + 1); ++__s) - delete[] _M_sd._M_dist[__s]; - delete[] _M_sd._M_dist; - delete[] _M_sd._M_temporaries; + delete[] __starts; + delete[] __sd._M_bin_proc; + for (int __s = 0; __s < (__num_bins + 1); ++__s) + delete[] __sd._M_dist[__s]; + delete[] __sd._M_dist; + delete[] __sd._M_temporaries; - delete[] __pus; - } + delete[] __pus; + } -/** @brief Sequential cache-efficient random shuffle. - * @param __begin Begin iterator of sequence. - * @param __end End iterator of sequence. - * @param __rng Random number generator to use. - */ -template<typename _RAIter, typename _RandomNumberGenerator> - void - __sequential_random_shuffle(_RAIter __begin, - _RAIter __end, - _RandomNumberGenerator& __rng) - { - typedef std::iterator_traits<_RAIter> _TraitsType; - typedef typename _TraitsType::value_type _ValueType; - typedef typename _TraitsType::difference_type _DifferenceType; + /** @brief Sequential cache-efficient random shuffle. + * @param __begin Begin iterator of sequence. + * @param __end End iterator of sequence. + * @param __rng Random number generator to use. + */ + template<typename _RAIter, typename _RandomNumberGenerator> + void + __sequential_random_shuffle(_RAIter __begin, _RAIter __end, + _RandomNumberGenerator& __rng) + { + typedef std::iterator_traits<_RAIter> _TraitsType; + typedef typename _TraitsType::value_type _ValueType; + typedef typename _TraitsType::difference_type _DifferenceType; - _DifferenceType __n = __end - __begin; - const _Settings& __s = _Settings::get(); + _DifferenceType __n = __end - __begin; + const _Settings& __s = _Settings::get(); - _BinIndex _M_num_bins, __num_bins_cache; + _BinIndex __num_bins, __num_bins_cache; #if _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_L1 - // Try the L1 cache first, must fit into L1. - __num_bins_cache = - std::max<_DifferenceType> - (1, __n / (__s.L1_cache_size_lb / sizeof(_ValueType))); - __num_bins_cache = __round_up_to_pow2(__num_bins_cache); - - // No more buckets than TLB entries, power of 2 - // Power of 2 and at least one element per bin, at most the TLB size - _M_num_bins = std::min(__n, (_DifferenceType)__num_bins_cache); + // Try the L1 cache first, must fit into L1. + __num_bins_cache = std::max<_DifferenceType> + (1, __n / (__s.L1_cache_size_lb / sizeof(_ValueType))); + __num_bins_cache = __round_up_to_pow2(__num_bins_cache); + + // No more buckets than TLB entries, power of 2 + // Power of 2 and at least one element per bin, at most the TLB size + __num_bins = std::min(__n, (_DifferenceType)__num_bins_cache); #if _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_TLB - // 2 TLB entries needed per bin - _M_num_bins = std::min((_DifferenceType)__s.TLB_size / 2, _M_num_bins); + // 2 TLB entries needed per bin + __num_bins = std::min((_DifferenceType)__s.TLB_size / 2, __num_bins); #endif - _M_num_bins = __round_up_to_pow2(_M_num_bins); + __num_bins = __round_up_to_pow2(__num_bins); - if (_M_num_bins < __num_bins_cache) - { + if (__num_bins < __num_bins_cache) + { #endif - // Now try the L2 cache, must fit into L2. - __num_bins_cache = - static_cast<_BinIndex>(std::max<_DifferenceType>( - 1, __n / (__s.L2_cache_size / sizeof(_ValueType)))); - __num_bins_cache = __round_up_to_pow2(__num_bins_cache); + // Now try the L2 cache, must fit into L2. + __num_bins_cache = static_cast<_BinIndex> + (std::max<_DifferenceType>(1, __n / (__s.L2_cache_size + / sizeof(_ValueType)))); + __num_bins_cache = __round_up_to_pow2(__num_bins_cache); - // No more buckets than TLB entries, power of 2 - // Power of 2 and at least one element per bin, at most the TLB size. - _M_num_bins = static_cast<_BinIndex> - (std::min(__n, static_cast<_DifferenceType>(__num_bins_cache))); + // No more buckets than TLB entries, power of 2 + // Power of 2 and at least one element per bin, at most the TLB size. + __num_bins = static_cast<_BinIndex> + (std::min(__n, static_cast<_DifferenceType>(__num_bins_cache))); #if _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_TLB - // 2 TLB entries needed per bin - _M_num_bins = - std::min<_DifferenceType>(__s.TLB_size / 2, _M_num_bins); + // 2 TLB entries needed per bin + __num_bins = std::min<_DifferenceType>(__s.TLB_size / 2, __num_bins); #endif - _M_num_bins = __round_up_to_pow2(_M_num_bins); + __num_bins = __round_up_to_pow2(__num_bins); #if _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_L1 - } + } #endif - int _M_num_bits = __rd_log2(_M_num_bins); + int __num_bits = __rd_log2(__num_bins); - if (_M_num_bins > 1) - { - _ValueType* __target = static_cast<_ValueType*>( - ::operator new(sizeof(_ValueType) * __n)); - _BinIndex* __oracles = new _BinIndex[__n]; - _DifferenceType* __dist0 = new _DifferenceType[_M_num_bins + 1], - * __dist1 = new _DifferenceType[_M_num_bins + 1]; - - for (int __b = 0; __b < _M_num_bins + 1; ++__b) - __dist0[__b] = 0; - - _RandomNumber bitrng(__rng(0xFFFFFFFF)); - - for (_DifferenceType __i = 0; __i < __n; ++__i) - { - _BinIndex __oracle = __random_number_pow2(_M_num_bits, bitrng); - __oracles[__i] = __oracle; - - // To allow prefix (partial) sum. - ++(__dist0[__oracle + 1]); - } - - // Sum up bins. - __gnu_sequential:: - partial_sum(__dist0, __dist0 + _M_num_bins + 1, __dist0); - - for (int __b = 0; __b < _M_num_bins + 1; ++__b) - __dist1[__b] = __dist0[__b]; - - // Distribute according to oracles. - for (_DifferenceType __i = 0; __i < __n; ++__i) - ::new(&(__target[(__dist0[__oracles[__i]])++])) - _ValueType(*(__begin + __i)); - - for (int __b = 0; __b < _M_num_bins; ++__b) - { - __sequential_random_shuffle(__target + __dist1[__b], - __target + __dist1[__b + 1], - __rng); - } - - // Copy elements back. - std::copy(__target, __target + __n, __begin); - - delete[] __dist0; - delete[] __dist1; - delete[] __oracles; - ::operator delete(__target); - } - else - __gnu_sequential::random_shuffle(__begin, __end, __rng); - } - -/** @brief Parallel random public call. - * @param __begin Begin iterator of sequence. - * @param __end End iterator of sequence. - * @param __rng Random number generator to use. - */ -template<typename _RAIter, typename _RandomNumberGenerator> - inline void - __parallel_random_shuffle(_RAIter __begin, - _RAIter __end, - _RandomNumberGenerator __rng = _RandomNumber()) - { - typedef std::iterator_traits<_RAIter> _TraitsType; - typedef typename _TraitsType::difference_type _DifferenceType; - _DifferenceType __n = __end - __begin; - __parallel_random_shuffle_drs( - __begin, __end, __n, __get_max_threads(), __rng) ; - } + if (__num_bins > 1) + { + _ValueType* __target = + static_cast<_ValueType*>(::operator new(sizeof(_ValueType) * __n)); + _BinIndex* __oracles = new _BinIndex[__n]; + _DifferenceType* __dist0 = new _DifferenceType[__num_bins + 1], + * __dist1 = new _DifferenceType[__num_bins + 1]; + + for (int __b = 0; __b < __num_bins + 1; ++__b) + __dist0[__b] = 0; + + _RandomNumber __bitrng(__rng(0xFFFFFFFF)); + for (_DifferenceType __i = 0; __i < __n; ++__i) + { + _BinIndex __oracle = __random_number_pow2(__num_bits, __bitrng); + __oracles[__i] = __oracle; + + // To allow prefix (partial) sum. + ++(__dist0[__oracle + 1]); + } + + // Sum up bins. + __gnu_sequential::partial_sum(__dist0, __dist0 + __num_bins + 1, + __dist0); + + for (int __b = 0; __b < __num_bins + 1; ++__b) + __dist1[__b] = __dist0[__b]; + + // Distribute according to oracles. + for (_DifferenceType __i = 0; __i < __n; ++__i) + ::new(&(__target[(__dist0[__oracles[__i]])++])) + _ValueType(*(__begin + __i)); + + for (int __b = 0; __b < __num_bins; ++__b) + __sequential_random_shuffle(__target + __dist1[__b], + __target + __dist1[__b + 1], __rng); + + // Copy elements back. + std::copy(__target, __target + __n, __begin); + + delete[] __dist0; + delete[] __dist1; + delete[] __oracles; + ::operator delete(__target); + } + else + __gnu_sequential::random_shuffle(__begin, __end, __rng); + } + + /** @brief Parallel random public call. + * @param __begin Begin iterator of sequence. + * @param __end End iterator of sequence. + * @param __rng Random number generator to use. + */ + template<typename _RAIter, typename _RandomNumberGenerator> + inline void + __parallel_random_shuffle(_RAIter __begin, _RAIter __end, + _RandomNumberGenerator __rng = _RandomNumber()) + { + typedef std::iterator_traits<_RAIter> _TraitsType; + typedef typename _TraitsType::difference_type _DifferenceType; + _DifferenceType __n = __end - __begin; + __parallel_random_shuffle_drs(__begin, __end, __n, + __get_max_threads(), __rng); + } } #endif /* _GLIBCXX_PARALLEL_RANDOM_SHUFFLE_H */ diff --git a/libstdc++-v3/include/parallel/search.h b/libstdc++-v3/include/parallel/search.h index 1fc8ceaa5c8..9709925618d 100644 --- a/libstdc++-v3/include/parallel/search.h +++ b/libstdc++-v3/include/parallel/search.h @@ -38,7 +38,6 @@ #include <parallel/parallel.h> #include <parallel/equally_split.h> - namespace __gnu_parallel { /** @@ -47,24 +46,24 @@ namespace __gnu_parallel * @param __length Length of sequence to search for. * @param __advances Returned __offsets. */ -template<typename _RAIter, typename _DifferenceTp> - void - __calc_borders(_RAIter __elements, _DifferenceTp __length, - _DifferenceTp* __off) - { - typedef _DifferenceTp _DifferenceType; - - __off[0] = -1; - if (__length > 1) - __off[1] = 0; - _DifferenceType __k = 0; - for (_DifferenceType __j = 2; __j <= __length; __j++) - { - while ((__k >= 0) && !(__elements[__k] == __elements[__j-1])) - __k = __off[__k]; - __off[__j] = ++__k; - } - } + template<typename _RAIter, typename _DifferenceTp> + void + __calc_borders(_RAIter __elements, _DifferenceTp __length, + _DifferenceTp* __off) + { + typedef _DifferenceTp _DifferenceType; + + __off[0] = -1; + if (__length > 1) + __off[1] = 0; + _DifferenceType __k = 0; + for (_DifferenceType __j = 2; __j <= __length; __j++) + { + while ((__k >= 0) && !(__elements[__k] == __elements[__j-1])) + __k = __off[__k]; + __off[__j] = ++__k; + } + } // Generic parallel find algorithm (requires random access iterator). @@ -75,100 +74,99 @@ template<typename _RAIter, typename _DifferenceTp> * @param __end2 End iterator of second sequence. * @param __pred Find predicate. * @return Place of finding in first sequences. */ -template<typename __RAIter1, - typename __RAIter2, - typename _Pred> - __RAIter1 - __search_template(__RAIter1 __begin1, __RAIter1 __end1, - __RAIter2 __begin2, __RAIter2 __end2, - _Pred __pred) - { - typedef std::iterator_traits<__RAIter1> _TraitsType; - typedef typename _TraitsType::difference_type _DifferenceType; + template<typename __RAIter1, + typename __RAIter2, + typename _Pred> + __RAIter1 + __search_template(__RAIter1 __begin1, __RAIter1 __end1, + __RAIter2 __begin2, __RAIter2 __end2, + _Pred __pred) + { + typedef std::iterator_traits<__RAIter1> _TraitsType; + typedef typename _TraitsType::difference_type _DifferenceType; - _GLIBCXX_CALL((__end1 - __begin1) + (__end2 - __begin2)); + _GLIBCXX_CALL((__end1 - __begin1) + (__end2 - __begin2)); - _DifferenceType __pattern_length = __end2 - __begin2; + _DifferenceType __pattern_length = __end2 - __begin2; - // Pattern too short. - if(__pattern_length <= 0) - return __end1; + // Pattern too short. + if(__pattern_length <= 0) + return __end1; - // Last point to start search. - _DifferenceType __input_length = (__end1 - __begin1) - __pattern_length; + // Last point to start search. + _DifferenceType __input_length = (__end1 - __begin1) - __pattern_length; - // Where is first occurrence of pattern? defaults to end. - _DifferenceType __result = (__end1 - __begin1); - _DifferenceType *__splitters; + // Where is first occurrence of pattern? defaults to end. + _DifferenceType __result = (__end1 - __begin1); + _DifferenceType *__splitters; - // Pattern too long. - if (__input_length < 0) - return __end1; + // Pattern too long. + if (__input_length < 0) + return __end1; - omp_lock_t __result_lock; - omp_init_lock(&__result_lock); + omp_lock_t __result_lock; + omp_init_lock(&__result_lock); - _ThreadIndex __num_threads = - std::max<_DifferenceType>(1, - std::min<_DifferenceType>(__input_length, __get_max_threads())); + _ThreadIndex __num_threads = std::max<_DifferenceType> + (1, std::min<_DifferenceType>(__input_length, + __get_max_threads())); - _DifferenceType __advances[__pattern_length]; - __calc_borders(__begin2, __pattern_length, __advances); + _DifferenceType __advances[__pattern_length]; + __calc_borders(__begin2, __pattern_length, __advances); -# pragma omp parallel num_threads(__num_threads) +# pragma omp parallel num_threads(__num_threads) { # pragma omp single - { - __num_threads = omp_get_num_threads(); - __splitters = new _DifferenceType[__num_threads + 1]; - equally_split(__input_length, __num_threads, __splitters); - } - - _ThreadIndex __iam = omp_get_thread_num(); - - _DifferenceType __start = __splitters[__iam], - __stop = __splitters[__iam + 1]; - - _DifferenceType __pos_in_pattern = 0; - bool __found_pattern = false; - - while (__start <= __stop && !__found_pattern) - { - // Get new value of result. - #pragma omp flush(__result) - // No chance for this thread to find first occurrence. - if (__result < __start) - break; - while (__pred(__begin1[__start + __pos_in_pattern], - __begin2[__pos_in_pattern])) - { - ++__pos_in_pattern; - if (__pos_in_pattern == __pattern_length) - { - // Found new candidate for result. - omp_set_lock(&__result_lock); - __result = std::min(__result, __start); - omp_unset_lock(&__result_lock); - - __found_pattern = true; - break; - } - } - // Make safe jump. - __start += (__pos_in_pattern - __advances[__pos_in_pattern]); - __pos_in_pattern = - (__advances[__pos_in_pattern] < 0) ? - 0 : __advances[__pos_in_pattern]; - } + { + __num_threads = omp_get_num_threads(); + __splitters = new _DifferenceType[__num_threads + 1]; + equally_split(__input_length, __num_threads, __splitters); + } + + _ThreadIndex __iam = omp_get_thread_num(); + + _DifferenceType __start = __splitters[__iam], + __stop = __splitters[__iam + 1]; + + _DifferenceType __pos_in_pattern = 0; + bool __found_pattern = false; + + while (__start <= __stop && !__found_pattern) + { + // Get new value of result. +#pragma omp flush(__result) + // No chance for this thread to find first occurrence. + if (__result < __start) + break; + while (__pred(__begin1[__start + __pos_in_pattern], + __begin2[__pos_in_pattern])) + { + ++__pos_in_pattern; + if (__pos_in_pattern == __pattern_length) + { + // Found new candidate for result. + omp_set_lock(&__result_lock); + __result = std::min(__result, __start); + omp_unset_lock(&__result_lock); + + __found_pattern = true; + break; + } + } + // Make safe jump. + __start += (__pos_in_pattern - __advances[__pos_in_pattern]); + __pos_in_pattern = (__advances[__pos_in_pattern] < 0 + ? 0 : __advances[__pos_in_pattern]); + } } //parallel - omp_destroy_lock(&__result_lock); - - delete[] __splitters; + omp_destroy_lock(&__result_lock); - // Return iterator on found element. - return (__begin1 + __result); - } + delete[] __splitters; + + // Return iterator on found element. + return (__begin1 + __result); + } } // end namespace #endif /* _GLIBCXX_PARALLEL_SEARCH_H */ diff --git a/libstdc++-v3/include/parallel/set_operations.h b/libstdc++-v3/include/parallel/set_operations.h index ac669c55d5d..6dd63c9b128 100644 --- a/libstdc++-v3/include/parallel/set_operations.h +++ b/libstdc++-v3/include/parallel/set_operations.h @@ -41,490 +41,489 @@ namespace __gnu_parallel { -template<typename _IIter, typename _OutputIterator> - _OutputIterator - copy_tail(std::pair<_IIter, _IIter> __b, - std::pair<_IIter, _IIter> __e, _OutputIterator __r) - { - if (__b.first != __e.first) + template<typename _IIter, typename _OutputIterator> + _OutputIterator + __copy_tail(std::pair<_IIter, _IIter> __b, + std::pair<_IIter, _IIter> __e, _OutputIterator __r) + { + if (__b.first != __e.first) + { + do + { + *__r++ = *__b.first++; + } + while (__b.first != __e.first); + } + else + { + while (__b.second != __e.second) + *__r++ = *__b.second++; + } + return __r; + } + + template<typename _IIter, + typename _OutputIterator, + typename _Compare> + struct __symmetric_difference_func + { + typedef std::iterator_traits<_IIter> _TraitsType; + typedef typename _TraitsType::difference_type _DifferenceType; + typedef typename std::pair<_IIter, _IIter> _IteratorPair; + + __symmetric_difference_func(_Compare __comp) : _M_comp(__comp) {} + + _Compare _M_comp; + + _OutputIterator + _M_invoke(_IIter __a, _IIter __b, _IIter __c, _IIter __d, + _OutputIterator __r) const { - do + while (__a != __b && __c != __d) { - *__r++ = *__b.first++; + if (_M_comp(*__a, *__c)) + { + *__r = *__a; + ++__a; + ++__r; + } + else if (_M_comp(*__c, *__a)) + { + *__r = *__c; + ++__c; + ++__r; + } + else + { + ++__a; + ++__c; + } } - while (__b.first != __e.first); + return std::copy(__c, __d, std::copy(__a, __b, __r)); } - else + + _DifferenceType + __count(_IIter __a, _IIter __b, _IIter __c, _IIter d) const { - while (__b.second != __e.second) - *__r++ = *__b.second++; + _DifferenceType __counter = 0; + + while (__a != __b && __c != d) + { + if (_M_comp(*__a, *__c)) + { + ++__a; + ++__counter; + } + else if (_M_comp(*__c, *__a)) + { + ++__c; + ++__counter; + } + else + { + ++__a; + ++__c; + } + } + + return __counter + (__b - __a) + (d - __c); } - return __r; - } -template<typename _IIter, - typename _OutputIterator, - typename _Compare> - struct symmetric_difference_func - { - typedef std::iterator_traits<_IIter> _TraitsType; - typedef typename _TraitsType::difference_type _DifferenceType; - typedef typename std::pair<_IIter, _IIter> _IteratorPair; + _OutputIterator + __first_empty(_IIter __c, _IIter d, _OutputIterator __out) const + { return std::copy(__c, d, __out); } - symmetric_difference_func(_Compare __comp) : _M_comp(__comp) {} + _OutputIterator + __second_empty(_IIter __a, _IIter __b, _OutputIterator __out) const + { return std::copy(__a, __b, __out); } + }; - _Compare _M_comp; - _OutputIterator - _M_invoke(_IIter __a, _IIter __b, - _IIter __c, _IIter d, - _OutputIterator __r) const + template<typename _IIter, + typename _OutputIterator, + typename _Compare> + struct __difference_func { - while (__a != __b && __c != d) - { - if (_M_comp(*__a, *__c)) - { - *__r = *__a; - ++__a; - ++__r; - } - else if (_M_comp(*__c, *__a)) - { - *__r = *__c; - ++__c; - ++__r; - } - else - { - ++__a; - ++__c; - } - } - return std::copy(__c, d, std::copy(__a, __b, __r)); - } + typedef std::iterator_traits<_IIter> _TraitsType; + typedef typename _TraitsType::difference_type _DifferenceType; + typedef typename std::pair<_IIter, _IIter> _IteratorPair; - _DifferenceType - __count(_IIter __a, _IIter __b, - _IIter __c, _IIter d) const - { - _DifferenceType __counter = 0; + __difference_func(_Compare __comp) : _M_comp(__comp) {} - while (__a != __b && __c != d) - { - if (_M_comp(*__a, *__c)) - { - ++__a; - ++__counter; - } - else if (_M_comp(*__c, *__a)) - { - ++__c; - ++__counter; - } - else - { - ++__a; - ++__c; - } - } + _Compare _M_comp; - return __counter + (__b - __a) + (d - __c); - } + _OutputIterator + _M_invoke(_IIter __a, _IIter __b, _IIter __c, _IIter d, + _OutputIterator __r) const + { + while (__a != __b && __c != d) + { + if (_M_comp(*__a, *__c)) + { + *__r = *__a; + ++__a; + ++__r; + } + else if (_M_comp(*__c, *__a)) + { ++__c; } + else + { + ++__a; + ++__c; + } + } + return std::copy(__a, __b, __r); + } - _OutputIterator - __first_empty(_IIter __c, _IIter d, _OutputIterator __out) const - { return std::copy(__c, d, __out); } + _DifferenceType + __count(_IIter __a, _IIter __b, + _IIter __c, _IIter d) const + { + _DifferenceType __counter = 0; - _OutputIterator - __second_empty(_IIter __a, _IIter __b, _OutputIterator __out) const - { return std::copy(__a, __b, __out); } - }; + while (__a != __b && __c != d) + { + if (_M_comp(*__a, *__c)) + { + ++__a; + ++__counter; + } + else if (_M_comp(*__c, *__a)) + { ++__c; } + else + { ++__a; ++__c; } + } + return __counter + (__b - __a); + } -template<typename _IIter, - typename _OutputIterator, - typename _Compare> - struct __difference_func - { - typedef std::iterator_traits<_IIter> _TraitsType; - typedef typename _TraitsType::difference_type _DifferenceType; - typedef typename std::pair<_IIter, _IIter> _IteratorPair; + _OutputIterator + __first_empty(_IIter, _IIter, _OutputIterator __out) const + { return __out; } - __difference_func(_Compare __comp) : _M_comp(__comp) {} + _OutputIterator + __second_empty(_IIter __a, _IIter __b, _OutputIterator __out) const + { return std::copy(__a, __b, __out); } + }; - _Compare _M_comp; - _OutputIterator - _M_invoke(_IIter __a, _IIter __b, _IIter __c, _IIter d, - _OutputIterator __r) const + template<typename _IIter, + typename _OutputIterator, + typename _Compare> + struct __intersection_func { - while (__a != __b && __c != d) - { - if (_M_comp(*__a, *__c)) - { - *__r = *__a; - ++__a; - ++__r; - } - else if (_M_comp(*__c, *__a)) - { ++__c; } - else - { - ++__a; - ++__c; - } - } - return std::copy(__a, __b, __r); - } + typedef std::iterator_traits<_IIter> _TraitsType; + typedef typename _TraitsType::difference_type _DifferenceType; + typedef typename std::pair<_IIter, _IIter> _IteratorPair; - _DifferenceType - __count(_IIter __a, _IIter __b, - _IIter __c, _IIter d) const - { - _DifferenceType __counter = 0; + __intersection_func(_Compare __comp) : _M_comp(__comp) {} - while (__a != __b && __c != d) - { - if (_M_comp(*__a, *__c)) - { - ++__a; - ++__counter; - } - else if (_M_comp(*__c, *__a)) - { ++__c; } - else - { ++__a; ++__c; } - } + _Compare _M_comp; - return __counter + (__b - __a); - } + _OutputIterator + _M_invoke(_IIter __a, _IIter __b, _IIter __c, _IIter __d, + _OutputIterator __r) const + { + while (__a != __b && __c != __d) + { + if (_M_comp(*__a, *__c)) + { ++__a; } + else if (_M_comp(*__c, *__a)) + { ++__c; } + else + { + *__r = *__a; + ++__a; + ++__c; + ++__r; + } + } - inline _OutputIterator - __first_empty(_IIter __c, _IIter d, _OutputIterator __out) const - { return __out; } + return __r; + } - inline _OutputIterator - __second_empty(_IIter __a, _IIter __b, _OutputIterator __out) const - { return std::copy(__a, __b, __out); } - }; + _DifferenceType + __count(_IIter __a, _IIter __b, _IIter __c, _IIter __d) const + { + _DifferenceType __counter = 0; + while (__a != __b && __c != __d) + { + if (_M_comp(*__a, *__c)) + { ++__a; } + else if (_M_comp(*__c, *__a)) + { ++__c; } + else + { + ++__a; + ++__c; + ++__counter; + } + } -template<typename _IIter, - typename _OutputIterator, - typename _Compare> - struct __intersection_func - { - typedef std::iterator_traits<_IIter> _TraitsType; - typedef typename _TraitsType::difference_type _DifferenceType; - typedef typename std::pair<_IIter, _IIter> _IteratorPair; + return __counter; + } - __intersection_func(_Compare __comp) : _M_comp(__comp) {} + _OutputIterator + __first_empty(_IIter, _IIter, _OutputIterator __out) const + { return __out; } - _Compare _M_comp; + _OutputIterator + __second_empty(_IIter, _IIter, _OutputIterator __out) const + { return __out; } + }; - _OutputIterator - _M_invoke(_IIter __a, _IIter __b, _IIter __c, _IIter d, - _OutputIterator __r) const + template<class _IIter, class _OutputIterator, class _Compare> + struct __union_func { - while (__a != __b && __c != d) - { - if (_M_comp(*__a, *__c)) - { ++__a; } - else if (_M_comp(*__c, *__a)) - { ++__c; } - else - { - *__r = *__a; - ++__a; - ++__c; - ++__r; - } - } + typedef typename std::iterator_traits<_IIter>::difference_type + _DifferenceType; - return __r; - } + __union_func(_Compare __comp) : _M_comp(__comp) {} - _DifferenceType - __count(_IIter __a, _IIter __b, - _IIter __c, _IIter d) const - { - _DifferenceType __counter = 0; - - while (__a != __b && __c != d) - { - if (_M_comp(*__a, *__c)) - { ++__a; } - else if (_M_comp(*__c, *__a)) - { ++__c; } - else - { - ++__a; - ++__c; - ++__counter; - } - } + _Compare _M_comp; - return __counter; - } + _OutputIterator + _M_invoke(_IIter __a, const _IIter __b, _IIter __c, + const _IIter __d, _OutputIterator __r) const + { + while (__a != __b && __c != __d) + { + if (_M_comp(*__a, *__c)) + { + *__r = *__a; + ++__a; + } + else if (_M_comp(*__c, *__a)) + { + *__r = *__c; + ++__c; + } + else + { + *__r = *__a; + ++__a; + ++__c; + } + ++__r; + } + return std::copy(__c, __d, std::copy(__a, __b, __r)); + } - inline _OutputIterator - __first_empty(_IIter __c, _IIter d, _OutputIterator __out) const - { return __out; } + _DifferenceType + __count(_IIter __a, _IIter __b, _IIter __c, _IIter __d) const + { + _DifferenceType __counter = 0; - inline _OutputIterator - __second_empty(_IIter __a, _IIter __b, _OutputIterator __out) const - { return __out; } - }; + while (__a != __b && __c != __d) + { + if (_M_comp(*__a, *__c)) + { ++__a; } + else if (_M_comp(*__c, *__a)) + { ++__c; } + else + { + ++__a; + ++__c; + } + ++__counter; + } -template<class _IIter, class _OutputIterator, class _Compare> - struct __union_func - { - typedef typename std::iterator_traits<_IIter>::difference_type - _DifferenceType; + __counter += (__b - __a); + __counter += (__d - __c); + return __counter; + } - __union_func(_Compare __comp) : _M_comp(__comp) {} + _OutputIterator + __first_empty(_IIter __c, _IIter __d, _OutputIterator __out) const + { return std::copy(__c, __d, __out); } - _Compare _M_comp; + _OutputIterator + __second_empty(_IIter __a, _IIter __b, _OutputIterator __out) const + { return std::copy(__a, __b, __out); } + }; + template<typename _IIter, + typename _OutputIterator, + typename Operation> _OutputIterator - _M_invoke(_IIter __a, const _IIter __b, _IIter __c, - const _IIter d, _OutputIterator __r) const + __parallel_set_operation(_IIter __begin1, _IIter __end1, + _IIter __begin2, _IIter __end2, + _OutputIterator __result, Operation __op) { - while (__a != __b && __c != d) - { - if (_M_comp(*__a, *__c)) - { - *__r = *__a; - ++__a; - } - else if (_M_comp(*__c, *__a)) - { - *__r = *__c; - ++__c; - } - else - { - *__r = *__a; - ++__a; - ++__c; - } - ++__r; - } - return std::copy(__c, d, std::copy(__a, __b, __r)); - } + _GLIBCXX_CALL((__end1 - __begin1) + (__end2 - __begin2)) - _DifferenceType - __count(_IIter __a, _IIter __b, - _IIter __c, _IIter d) const - { - _DifferenceType __counter = 0; - - while (__a != __b && __c != d) - { - if (_M_comp(*__a, *__c)) - { ++__a; } - else if (_M_comp(*__c, *__a)) - { ++__c; } - else - { - ++__a; - ++__c; - } - ++__counter; - } + typedef std::iterator_traits<_IIter> _TraitsType; + typedef typename _TraitsType::difference_type _DifferenceType; + typedef typename std::pair<_IIter, _IIter> _IteratorPair; - __counter += (__b - __a); - __counter += (d - __c); - return __counter; - } + if (__begin1 == __end1) + return __op.__first_empty(__begin2, __end2, __result); - inline _OutputIterator - __first_empty(_IIter __c, _IIter d, _OutputIterator __out) const - { return std::copy(__c, d, __out); } + if (__begin2 == __end2) + return __op.__second_empty(__begin1, __end1, __result); - inline _OutputIterator - __second_empty(_IIter __a, _IIter __b, _OutputIterator __out) const - { return std::copy(__a, __b, __out); } - }; - -template<typename _IIter, - typename _OutputIterator, - typename Operation> - _OutputIterator - __parallel_set_operation(_IIter __begin1, _IIter __end1, - _IIter __begin2, _IIter __end2, - _OutputIterator __result, Operation __op) - { - _GLIBCXX_CALL((__end1 - __begin1) + (__end2 - __begin2)) - - typedef std::iterator_traits<_IIter> _TraitsType; - typedef typename _TraitsType::difference_type _DifferenceType; - typedef typename std::pair<_IIter, _IIter> _IteratorPair; - - if (__begin1 == __end1) - return __op.__first_empty(__begin2, __end2, __result); - - if (__begin2 == __end2) - return __op.__second_empty(__begin1, __end1, __result); - - const _DifferenceType size = (__end1 - __begin1) + (__end2 - __begin2); - - const _IteratorPair __sequence[ 2 ] = - { std::make_pair(__begin1, __end1), std::make_pair(__begin2, __end2) }; - _OutputIterator return_value = __result; - _DifferenceType *__borders; - _IteratorPair *__block_begins; - _DifferenceType* __lengths; - - _ThreadIndex __num_threads = - std::min<_DifferenceType>(__get_max_threads(), - std::min(__end1 - __begin1, __end2 - __begin2)); - -# pragma omp parallel num_threads(__num_threads) + const _DifferenceType __size = (__end1 - __begin1) + (__end2 - __begin2); + + const _IteratorPair __sequence[2] = { std::make_pair(__begin1, __end1), + std::make_pair(__begin2, __end2) }; + _OutputIterator __return_value = __result; + _DifferenceType *__borders; + _IteratorPair *__block_begins; + _DifferenceType* __lengths; + + _ThreadIndex __num_threads = + std::min<_DifferenceType>(__get_max_threads(), + std::min(__end1 - __begin1, __end2 - __begin2)); + +# pragma omp parallel num_threads(__num_threads) { # pragma omp single - { - __num_threads = omp_get_num_threads(); - - __borders = new _DifferenceType[__num_threads + 2]; - equally_split(size, __num_threads + 1, __borders); - __block_begins = new _IteratorPair[__num_threads + 1]; - // Very __start. - __block_begins[0] = std::make_pair(__begin1, __begin2); - __lengths = new _DifferenceType[__num_threads]; - } //single - - _ThreadIndex __iam = omp_get_thread_num(); - - // _Result from multiseq_partition. - _IIter __offset[2]; - const _DifferenceType __rank = __borders[__iam + 1]; - - multiseq_partition(__sequence, __sequence + 2, - __rank, __offset, __op._M_comp); - - // allowed to read? - // together - // *(__offset[ 0 ] - 1) == *__offset[ 1 ] - if (__offset[ 0 ] != __begin1 && __offset[ 1 ] != __end2 - && !__op._M_comp(*(__offset[ 0 ] - 1), *__offset[ 1 ]) - && !__op._M_comp(*__offset[ 1 ], *(__offset[ 0 ] - 1))) - { - // Avoid split between globally equal elements: move one to - // front in first sequence. - --__offset[ 0 ]; - } - - _IteratorPair block_end = __block_begins[ __iam + 1 ] = - _IteratorPair(__offset[ 0 ], __offset[ 1 ]); + { + __num_threads = omp_get_num_threads(); + + __borders = new _DifferenceType[__num_threads + 2]; + equally_split(__size, __num_threads + 1, __borders); + __block_begins = new _IteratorPair[__num_threads + 1]; + // Very __start. + __block_begins[0] = std::make_pair(__begin1, __begin2); + __lengths = new _DifferenceType[__num_threads]; + } //single + + _ThreadIndex __iam = omp_get_thread_num(); + + // _Result from multiseq_partition. + _IIter __offset[2]; + const _DifferenceType __rank = __borders[__iam + 1]; + + multiseq_partition(__sequence, __sequence + 2, + __rank, __offset, __op._M_comp); + + // allowed to read? + // together + // *(__offset[ 0 ] - 1) == *__offset[ 1 ] + if (__offset[ 0 ] != __begin1 && __offset[1] != __end2 + && !__op._M_comp(*(__offset[0] - 1), *__offset[1]) + && !__op._M_comp(*__offset[1], *(__offset[0] - 1))) + { + // Avoid split between globally equal elements: move one to + // front in first sequence. + --__offset[0]; + } + + _IteratorPair __block_end = __block_begins[__iam + 1] = + _IteratorPair(__offset[0], __offset[1]); + + // Make sure all threads have their block_begin result written out. +# pragma omp barrier - // Make sure all threads have their block_begin result written out. + _IteratorPair __block_begin = __block_begins[__iam]; + + // Begin working for the first block, while the others except + // the last start to count. + if (__iam == 0) + { + // The first thread can copy already. + __lengths[ __iam ] = + __op._M_invoke(__block_begin.first, __block_end.first, + __block_begin.second, __block_end.second, + __result) - __result; + } + else + { + __lengths[ __iam ] = + __op.__count(__block_begin.first, __block_end.first, + __block_begin.second, __block_end.second); + } + + // Make sure everyone wrote their lengths. # pragma omp barrier - _IteratorPair __block_begin = __block_begins[ __iam ]; + _OutputIterator __r = __result; - // Begin working for the first block, while the others except - // the last start to count. - if (__iam == 0) - { - // The first thread can copy already. - __lengths[ __iam ] = - __op._M_invoke(__block_begin.first, block_end.first, - __block_begin.second, block_end.second, __result) - - __result; - } - else - { - __lengths[ __iam ] = - __op.__count(__block_begin.first, block_end.first, - __block_begin.second, block_end.second); - } + if (__iam == 0) + { + // Do the last block. + for (int __i = 0; __i < __num_threads; ++__i) + __r += __lengths[__i]; - // Make sure everyone wrote their lengths. -# pragma omp barrier + __block_begin = __block_begins[__num_threads]; - _OutputIterator __r = __result; + // Return the result iterator of the last block. + __return_value = + __op._M_invoke(__block_begin.first, __end1, + __block_begin.second, __end2, __r); - if (__iam == 0) - { - // Do the last block. - for (int __i = 0; __i < __num_threads; ++__i) - __r += __lengths[__i]; + } + else + { + for (int __i = 0; __i < __iam; ++__i) + __r += __lengths[ __i ]; - __block_begin = __block_begins[__num_threads]; + // Reset begins for copy pass. + __op._M_invoke(__block_begin.first, __block_end.first, + __block_begin.second, __block_end.second, __r); + } + } + return __return_value; + } - // Return the result iterator of the last block. - return_value = __op._M_invoke( - __block_begin.first, __end1, __block_begin.second, __end2, __r); + template<typename _IIter, + typename _OutputIterator, + typename _Compare> + inline _OutputIterator + __parallel_set_union(_IIter __begin1, _IIter __end1, + _IIter __begin2, _IIter __end2, + _OutputIterator __result, _Compare __comp) + { + return __parallel_set_operation(__begin1, __end1, __begin2, __end2, + __result, + __union_func< _IIter, _OutputIterator, + _Compare>(__comp)); + } - } - else - { - for (int __i = 0; __i < __iam; ++__i) - __r += __lengths[ __i ]; + template<typename _IIter, + typename _OutputIterator, + typename _Compare> + inline _OutputIterator + __parallel_set_intersection(_IIter __begin1, _IIter __end1, + _IIter __begin2, _IIter __end2, + _OutputIterator __result, _Compare __comp) + { + return __parallel_set_operation(__begin1, __end1, __begin2, __end2, + __result, + __intersection_func<_IIter, + _OutputIterator, _Compare>(__comp)); + } - // Reset begins for copy pass. - __op._M_invoke(__block_begin.first, block_end.first, - __block_begin.second, block_end.second, __r); - } - } - return return_value; - } - - -template<typename _IIter, - typename _OutputIterator, - typename _Compare> - inline _OutputIterator - __parallel_set_union(_IIter __begin1, _IIter __end1, - _IIter __begin2, _IIter __end2, - _OutputIterator __result, _Compare _M_comp) - { - return __parallel_set_operation(__begin1, __end1, __begin2, __end2, - __result, __union_func< _IIter, _OutputIterator, _Compare>(_M_comp)); - } - -template<typename _IIter, - typename _OutputIterator, - typename _Compare> - inline _OutputIterator - __parallel_set_intersection(_IIter __begin1, _IIter __end1, + template<typename _IIter, + typename _OutputIterator, + typename _Compare> + inline _OutputIterator + __parallel_set_difference(_IIter __begin1, _IIter __end1, _IIter __begin2, _IIter __end2, - _OutputIterator __result, _Compare _M_comp) - { - return __parallel_set_operation( - __begin1, __end1, __begin2, __end2, __result, - __intersection_func<_IIter, _OutputIterator, _Compare>(_M_comp)); - } - -template<typename _IIter, - typename _OutputIterator, - typename _Compare> - inline _OutputIterator - __parallel_set_difference(_IIter __begin1, _IIter __end1, - _IIter __begin2, _IIter __end2, - _OutputIterator __result, _Compare _M_comp) - { - return __parallel_set_operation( - __begin1, __end1, __begin2, __end2, __result, - __difference_func<_IIter, _OutputIterator, _Compare>(_M_comp)); - } - -template<typename _IIter, - typename _OutputIterator, - typename _Compare> - inline _OutputIterator - __parallel_set_symmetric_difference(_IIter __begin1, _IIter __end1, - _IIter __begin2, _IIter __end2, - _OutputIterator __result, - _Compare _M_comp) - { - return __parallel_set_operation( - __begin1, __end1, __begin2, __end2, __result, - symmetric_difference_func<_IIter, _OutputIterator, _Compare> - (_M_comp)); - } + _OutputIterator __result, _Compare __comp) + { + return __parallel_set_operation(__begin1, __end1, __begin2, __end2, + __result, + __difference_func<_IIter, + _OutputIterator, _Compare>(__comp)); + } + template<typename _IIter, + typename _OutputIterator, + typename _Compare> + inline _OutputIterator + __parallel_set_symmetric_difference(_IIter __begin1, _IIter __end1, + _IIter __begin2, _IIter __end2, + _OutputIterator __result, + _Compare __comp) + { + return __parallel_set_operation(__begin1, __end1, __begin2, __end2, + __result, + __symmetric_difference_func<_IIter, + _OutputIterator, _Compare>(__comp)); + } } #endif /* _GLIBCXX_PARALLEL_SET_OPERATIONS_H */ diff --git a/libstdc++-v3/include/parallel/sort.h b/libstdc++-v3/include/parallel/sort.h index 2d38cad4d96..f1a163c63b6 100644 --- a/libstdc++-v3/include/parallel/sort.h +++ b/libstdc++-v3/include/parallel/sort.h @@ -54,12 +54,12 @@ namespace __gnu_parallel { - //prototype + //prototype template<bool __stable, typename _RAIter, typename _Compare, typename _Parallelism> - void - __parallel_sort(_RAIter __begin, _RAIter __end, - _Compare __comp, _Parallelism __parallelism); + void + __parallel_sort(_RAIter __begin, _RAIter __end, + _Compare __comp, _Parallelism __parallelism); /** * @brief Choose multiway mergesort, splitting variant at run-time, @@ -70,19 +70,19 @@ namespace __gnu_parallel * @callgraph */ template<bool __stable, typename _RAIter, typename _Compare> - inline void - __parallel_sort(_RAIter __begin, _RAIter __end, - _Compare __comp, multiway_mergesort_tag __parallelism) - { - _GLIBCXX_CALL(__end - __begin) + inline void + __parallel_sort(_RAIter __begin, _RAIter __end, + _Compare __comp, multiway_mergesort_tag __parallelism) + { + _GLIBCXX_CALL(__end - __begin) - if(_Settings::get().sort_splitting == EXACT) - parallel_sort_mwms<__stable, true> - (__begin, __end, __comp, __parallelism.__get_num_threads()); - else - parallel_sort_mwms<__stable, false> - (__begin, __end, __comp, __parallelism.__get_num_threads()); - } + if(_Settings::get().sort_splitting == EXACT) + parallel_sort_mwms<__stable, true> + (__begin, __end, __comp, __parallelism.__get_num_threads()); + else + parallel_sort_mwms<__stable, false> + (__begin, __end, __comp, __parallelism.__get_num_threads()); + } /** * @brief Choose multiway mergesort with exact splitting, @@ -93,15 +93,16 @@ namespace __gnu_parallel * @callgraph */ template<bool __stable, typename _RAIter, typename _Compare> - inline void - __parallel_sort(_RAIter __begin, _RAIter __end, - _Compare __comp, multiway_mergesort_exact_tag __parallelism) - { - _GLIBCXX_CALL(__end - __begin) + inline void + __parallel_sort(_RAIter __begin, _RAIter __end, + _Compare __comp, + multiway_mergesort_exact_tag __parallelism) + { + _GLIBCXX_CALL(__end - __begin) parallel_sort_mwms<__stable, true> (__begin, __end, __comp, __parallelism.__get_num_threads()); - } + } /** * @brief Choose multiway mergesort with splitting by sampling, @@ -112,15 +113,16 @@ namespace __gnu_parallel * @callgraph */ template<bool __stable, typename _RAIter, typename _Compare> - inline void - __parallel_sort(_RAIter __begin, _RAIter __end, - _Compare __comp, multiway_mergesort_sampling_tag __parallelism) - { - _GLIBCXX_CALL(__end - __begin) + inline void + __parallel_sort(_RAIter __begin, _RAIter __end, + _Compare __comp, + multiway_mergesort_sampling_tag __parallelism) + { + _GLIBCXX_CALL(__end - __begin) - parallel_sort_mwms<__stable, false> + parallel_sort_mwms<__stable, false> (__begin, __end, __comp, __parallelism.__get_num_threads()); - } + } /** * @brief Choose quicksort for parallel sorting. @@ -130,17 +132,17 @@ namespace __gnu_parallel * @callgraph */ template<bool __stable, typename _RAIter, typename _Compare> - inline void - __parallel_sort(_RAIter __begin, _RAIter __end, - _Compare __comp, quicksort_tag __parallelism) - { - _GLIBCXX_CALL(__end - __begin) + inline void + __parallel_sort(_RAIter __begin, _RAIter __end, + _Compare __comp, quicksort_tag __parallelism) + { + _GLIBCXX_CALL(__end - __begin) - _GLIBCXX_PARALLEL_ASSERT(__stable == false); + _GLIBCXX_PARALLEL_ASSERT(__stable == false); - __parallel_sort_qs(__begin, __end, __comp, - __parallelism.__get_num_threads()); - } + __parallel_sort_qs(__begin, __end, __comp, + __parallelism.__get_num_threads()); + } /** * @brief Choose balanced quicksort for parallel sorting. @@ -150,19 +152,18 @@ namespace __gnu_parallel * @param __stable Sort __stable. * @callgraph */ - template<bool __stable, typename _RAIter, typename _Compare> - inline void - __parallel_sort(_RAIter __begin, _RAIter __end, - _Compare __comp, balanced_quicksort_tag __parallelism) - { - _GLIBCXX_CALL(__end - __begin) - - _GLIBCXX_PARALLEL_ASSERT(__stable == false); + template<bool __stable, typename _RAIter, typename _Compare> + inline void + __parallel_sort(_RAIter __begin, _RAIter __end, + _Compare __comp, balanced_quicksort_tag __parallelism) + { + _GLIBCXX_CALL(__end - __begin) - __parallel_sort_qsb(__begin, __end, __comp, - __parallelism.__get_num_threads()); - } + _GLIBCXX_PARALLEL_ASSERT(__stable == false); + __parallel_sort_qsb(__begin, __end, __comp, + __parallelism.__get_num_threads()); + } /** * @brief Choose multiway mergesort with exact splitting, @@ -173,17 +174,16 @@ namespace __gnu_parallel * @callgraph */ template<bool __stable, typename _RAIter, typename _Compare> - inline void - __parallel_sort(_RAIter __begin, _RAIter __end, - _Compare __comp, default_parallel_tag __parallelism) - { - _GLIBCXX_CALL(__end - __begin) - - __parallel_sort<__stable> - (__begin, __end, __comp, - multiway_mergesort_exact_tag(__parallelism.__get_num_threads())); - } + inline void + __parallel_sort(_RAIter __begin, _RAIter __end, + _Compare __comp, default_parallel_tag __parallelism) + { + _GLIBCXX_CALL(__end - __begin) + __parallel_sort<__stable> + (__begin, __end, __comp, + multiway_mergesort_exact_tag(__parallelism.__get_num_threads())); + } /** * @brief Choose a parallel sorting algorithm. @@ -196,7 +196,7 @@ namespace __gnu_parallel template<bool __stable, typename _RAIter, typename _Compare> inline void __parallel_sort(_RAIter __begin, _RAIter __end, - _Compare __comp, parallel_tag __parallelism) + _Compare __comp, parallel_tag __parallelism) { _GLIBCXX_CALL(__end - __begin) typedef std::iterator_traits<_RAIter> _TraitsType; diff --git a/libstdc++-v3/include/parallel/tags.h b/libstdc++-v3/include/parallel/tags.h index 43561d28d5c..bc47b2699b4 100644 --- a/libstdc++-v3/include/parallel/tags.h +++ b/libstdc++-v3/include/parallel/tags.h @@ -51,20 +51,16 @@ namespace __gnu_parallel public: /** @brief Default constructor. Use default number of threads. */ parallel_tag() - { - this->_M_num_threads = 0; - } + { _M_num_threads = 0; } /** @brief Default constructor. Recommend number of threads to use. * @param __num_threads Desired number of threads. */ parallel_tag(_ThreadIndex __num_threads) - { - this->_M_num_threads = __num_threads; - } + { _M_num_threads = __num_threads; } /** @brief Find out desired number of threads. * @return Desired number of threads. */ - inline _ThreadIndex __get_num_threads() + _ThreadIndex __get_num_threads() { if(_M_num_threads == 0) return omp_get_max_threads(); @@ -74,19 +70,17 @@ namespace __gnu_parallel /** @brief Set the desired number of threads. * @param __num_threads Desired number of threads. */ - inline void set_num_threads(_ThreadIndex __num_threads) - { - this->_M_num_threads = __num_threads; - } + void set_num_threads(_ThreadIndex __num_threads) + { _M_num_threads = __num_threads; } }; /** @brief Recommends parallel execution using the default parallel algorithm. */ struct default_parallel_tag : public parallel_tag { - default_parallel_tag() { } - default_parallel_tag(_ThreadIndex __num_threads) - : parallel_tag(__num_threads) { } + default_parallel_tag() { } + default_parallel_tag(_ThreadIndex __num_threads) + : parallel_tag(__num_threads) { } }; /** @brief Recommends parallel execution using dynamic @@ -114,18 +108,18 @@ namespace __gnu_parallel * with exact splitting, at compile time. */ struct exact_tag : public parallel_tag { - exact_tag() { } - exact_tag(_ThreadIndex __num_threads) - : parallel_tag(__num_threads) { } + exact_tag() { } + exact_tag(_ThreadIndex __num_threads) + : parallel_tag(__num_threads) { } }; /** @brief Forces parallel merging * with exact splitting, at compile time. */ struct sampling_tag : public parallel_tag { - sampling_tag() { } - sampling_tag(_ThreadIndex __num_threads) - : parallel_tag(__num_threads) { } + sampling_tag() { } + sampling_tag(_ThreadIndex __num_threads) + : parallel_tag(__num_threads) { } }; @@ -133,45 +127,45 @@ namespace __gnu_parallel * at compile time. */ struct multiway_mergesort_tag : public parallel_tag { - multiway_mergesort_tag() { } - multiway_mergesort_tag(_ThreadIndex __num_threads) - : parallel_tag(__num_threads) { } + multiway_mergesort_tag() { } + multiway_mergesort_tag(_ThreadIndex __num_threads) + : parallel_tag(__num_threads) { } }; /** @brief Forces parallel sorting using multiway mergesort * with exact splitting at compile time. */ struct multiway_mergesort_exact_tag : public parallel_tag { - multiway_mergesort_exact_tag() { } - multiway_mergesort_exact_tag(_ThreadIndex __num_threads) - : parallel_tag(__num_threads) { } + multiway_mergesort_exact_tag() { } + multiway_mergesort_exact_tag(_ThreadIndex __num_threads) + : parallel_tag(__num_threads) { } }; /** @brief Forces parallel sorting using multiway mergesort * with splitting by sampling at compile time. */ struct multiway_mergesort_sampling_tag : public parallel_tag { - multiway_mergesort_sampling_tag() { } - multiway_mergesort_sampling_tag(_ThreadIndex __num_threads) - : parallel_tag(__num_threads) { } + multiway_mergesort_sampling_tag() { } + multiway_mergesort_sampling_tag(_ThreadIndex __num_threads) + : parallel_tag(__num_threads) { } }; /** @brief Forces parallel sorting using unbalanced quicksort * at compile time. */ struct quicksort_tag : public parallel_tag { - quicksort_tag() { } - quicksort_tag(_ThreadIndex __num_threads) - : parallel_tag(__num_threads) { } + quicksort_tag() { } + quicksort_tag(_ThreadIndex __num_threads) + : parallel_tag(__num_threads) { } }; /** @brief Forces parallel sorting using balanced quicksort * at compile time. */ struct balanced_quicksort_tag : public parallel_tag { - balanced_quicksort_tag() { } - balanced_quicksort_tag(_ThreadIndex __num_threads) - : parallel_tag(__num_threads) { } + balanced_quicksort_tag() { } + balanced_quicksort_tag(_ThreadIndex __num_threads) + : parallel_tag(__num_threads) { } }; diff --git a/libstdc++-v3/include/parallel/unique_copy.h b/libstdc++-v3/include/parallel/unique_copy.h index 327870e7d86..1ae5a12316d 100644 --- a/libstdc++-v3/include/parallel/unique_copy.h +++ b/libstdc++-v3/include/parallel/unique_copy.h @@ -37,155 +37,160 @@ namespace __gnu_parallel { - -/** @brief Parallel std::unique_copy(), w/__o explicit equality predicate. - * @param __first Begin iterator of input sequence. - * @param __last End iterator of input sequence. - * @param __result Begin iterator of result __sequence. - * @param __binary_pred Equality predicate. - * @return End iterator of result __sequence. */ -template<typename _IIter, - class _OutputIterator, - class _BinaryPredicate> - _OutputIterator - __parallel_unique_copy(_IIter __first, _IIter __last, - _OutputIterator __result, _BinaryPredicate __binary_pred) - { - _GLIBCXX_CALL(__last - __first) - - typedef std::iterator_traits<_IIter> _TraitsType; - typedef typename _TraitsType::value_type _ValueType; - typedef typename _TraitsType::difference_type _DifferenceType; - - _DifferenceType size = __last - __first; - - if (size == 0) - return __result; - - // Let the first thread process two parts. - _DifferenceType *__counter; - _DifferenceType *__borders; - - _ThreadIndex __num_threads = __get_max_threads(); - // First part contains at least one element. -# pragma omp parallel num_threads(__num_threads) + /** @brief Parallel std::unique_copy(), w/__o explicit equality predicate. + * @param __first Begin iterator of input sequence. + * @param __last End iterator of input sequence. + * @param __result Begin iterator of result __sequence. + * @param __binary_pred Equality predicate. + * @return End iterator of result __sequence. */ + template<typename _IIter, + class _OutputIterator, + class _BinaryPredicate> + _OutputIterator + __parallel_unique_copy(_IIter __first, _IIter __last, + _OutputIterator __result, + _BinaryPredicate __binary_pred) + { + _GLIBCXX_CALL(__last - __first) + + typedef std::iterator_traits<_IIter> _TraitsType; + typedef typename _TraitsType::value_type _ValueType; + typedef typename _TraitsType::difference_type _DifferenceType; + + _DifferenceType __size = __last - __first; + + if (__size == 0) + return __result; + + // Let the first thread process two parts. + _DifferenceType *__counter; + _DifferenceType *__borders; + + _ThreadIndex __num_threads = __get_max_threads(); + // First part contains at least one element. +# pragma omp parallel num_threads(__num_threads) { # pragma omp single + { + __num_threads = omp_get_num_threads(); + __borders = new _DifferenceType[__num_threads + 2]; + equally_split(__size, __num_threads + 1, __borders); + __counter = new _DifferenceType[__num_threads + 1]; + } + + _ThreadIndex __iam = omp_get_thread_num(); + + _DifferenceType __begin, __end; + + // Check for length without duplicates + // Needed for position in output + _DifferenceType __i = 0; + _OutputIterator __out = __result; + + if (__iam == 0) { - __num_threads = omp_get_num_threads(); - __borders = new _DifferenceType[__num_threads + 2]; - equally_split(size, __num_threads + 1, __borders); - __counter = new _DifferenceType[__num_threads + 1]; + __begin = __borders[0] + 1; // == 1 + __end = __borders[__iam + 1]; + + ++__i; + *__out++ = *__first; + + for (_IIter __iter = __first + __begin; __iter < __first + __end; + ++__iter) + { + if (!__binary_pred(*__iter, *(__iter - 1))) + { + ++__i; + *__out++ = *__iter; + } + } } + else + { + __begin = __borders[__iam]; //one part + __end = __borders[__iam + 1]; - _ThreadIndex __iam = omp_get_thread_num(); + for (_IIter __iter = __first + __begin; __iter < __first + __end; + ++__iter) + { + if (!__binary_pred(*__iter, *(__iter - 1))) + ++__i; + } + } + __counter[__iam] = __i; - _DifferenceType __begin, __end; + // Last part still untouched. + _DifferenceType __begin_output; - // Check for length without duplicates - // Needed for position in output - _DifferenceType __i = 0; - _OutputIterator __out = __result; +# pragma omp barrier - if (__iam == 0) - { - __begin = __borders[0] + 1; // == 1 - __end = __borders[__iam + 1]; + // Store result in output on calculated positions. + __begin_output = 0; - ++__i; - *__out++ = *__first; + if (__iam == 0) + { + for (int __t = 0; __t < __num_threads; ++__t) + __begin_output += __counter[__t]; - for (_IIter iter = __first + __begin; iter < __first + __end; ++iter) - { - if (!__binary_pred(*iter, *(iter-1))) - { - ++__i; - *__out++ = *iter; - } - } - } - else - { - __begin = __borders[__iam]; //one part - __end = __borders[__iam + 1]; - - for (_IIter iter = __first + __begin; iter < __first + __end; ++iter) - { - if (!__binary_pred(*iter, *(iter - 1))) - ++__i; - } - } - __counter[__iam] = __i; - - // Last part still untouched. - _DifferenceType __begin_output; - -# pragma omp barrier - - // Store result in output on calculated positions. - __begin_output = 0; - - if (__iam == 0) - { - for (int __t = 0; __t < __num_threads; ++__t) - __begin_output += __counter[__t]; - - __i = 0; - - _OutputIterator __iter_out = __result + __begin_output; - - __begin = __borders[__num_threads]; - __end = size; - - for (_IIter iter = __first + __begin; iter < __first + __end; ++iter) - { - if (iter == __first || !__binary_pred(*iter, *(iter - 1))) - { - ++__i; - *__iter_out++ = *iter; - } - } - - __counter[__num_threads] = __i; - } - else - { - for (int __t = 0; __t < __iam; __t++) - __begin_output += __counter[__t]; - - _OutputIterator __iter_out = __result + __begin_output; - for (_IIter iter = __first + __begin; iter < __first + __end; ++iter) - { - if (!__binary_pred(*iter, *(iter-1))) - *__iter_out++ = *iter; - } - } + __i = 0; + + _OutputIterator __iter_out = __result + __begin_output; + + __begin = __borders[__num_threads]; + __end = __size; + + for (_IIter __iter = __first + __begin; __iter < __first + __end; + ++__iter) + { + if (__iter == __first + || !__binary_pred(*__iter, *(__iter - 1))) + { + ++__i; + *__iter_out++ = *__iter; + } + } + + __counter[__num_threads] = __i; + } + else + { + for (int __t = 0; __t < __iam; __t++) + __begin_output += __counter[__t]; + + _OutputIterator __iter_out = __result + __begin_output; + for (_IIter __iter = __first + __begin; __iter < __first + __end; + ++__iter) + { + if (!__binary_pred(*__iter, *(__iter - 1))) + *__iter_out++ = *__iter; + } + } + } + + _DifferenceType __end_output = 0; + for (int __t = 0; __t < __num_threads + 1; __t++) + __end_output += __counter[__t]; + + delete[] __borders; + + return __result + __end_output; } - _DifferenceType __end_output = 0; - for (int __t = 0; __t < __num_threads + 1; __t++) - __end_output += __counter[__t]; - - delete[] __borders; - - return __result + __end_output; - } - -/** @brief Parallel std::unique_copy(), without explicit equality predicate - * @param __first Begin iterator of input sequence. - * @param __last End iterator of input sequence. - * @param __result Begin iterator of result __sequence. - * @return End iterator of result __sequence. */ -template<typename _IIter, class _OutputIterator> - inline _OutputIterator - __parallel_unique_copy(_IIter __first, _IIter __last, - _OutputIterator __result) - { - typedef typename std::iterator_traits<_IIter>::value_type - _ValueType; - return __parallel_unique_copy(__first, __last, __result, - std::equal_to<_ValueType>()); - } + /** @brief Parallel std::unique_copy(), without explicit equality predicate + * @param __first Begin iterator of input sequence. + * @param __last End iterator of input sequence. + * @param __result Begin iterator of result __sequence. + * @return End iterator of result __sequence. */ + template<typename _IIter, class _OutputIterator> + inline _OutputIterator + __parallel_unique_copy(_IIter __first, _IIter __last, + _OutputIterator __result) + { + typedef typename std::iterator_traits<_IIter>::value_type + _ValueType; + return __parallel_unique_copy(__first, __last, __result, + std::equal_to<_ValueType>()); + } }//namespace __gnu_parallel diff --git a/libstdc++-v3/include/parallel/workstealing.h b/libstdc++-v3/include/parallel/workstealing.h index 638057ca740..4ac155d923f 100644 --- a/libstdc++-v3/include/parallel/workstealing.h +++ b/libstdc++-v3/include/parallel/workstealing.h @@ -49,261 +49,264 @@ namespace __gnu_parallel #define _GLIBCXX_JOB_VOLATILE volatile -/** @brief One __job for a certain thread. */ -template<typename _DifferenceTp> - struct _Job - { - typedef _DifferenceTp _DifferenceType; - - /** @brief First element. - * - * Changed by owning and stealing thread. By stealing thread, - * always incremented. */ - _GLIBCXX_JOB_VOLATILE _DifferenceType _M_first; - - /** @brief Last element. - * - * Changed by owning thread only. */ - _GLIBCXX_JOB_VOLATILE _DifferenceType _M_last; - - /** @brief Number of elements, i.e. @__c _M_last-_M_first+1. - * - * Changed by owning thread only. */ - _GLIBCXX_JOB_VOLATILE _DifferenceType _M_load; - }; - -/** @brief Work stealing algorithm for random access iterators. - * - * Uses O(1) additional memory. Synchronization at job lists is - * done with atomic operations. - * @param __begin Begin iterator of element sequence. - * @param __end End iterator of element sequence. - * @param __op User-supplied functor (comparator, predicate, adding - * functor, ...). - * @param __f Functor to "process" an element with __op (depends on - * desired functionality, e. g. for std::for_each(), ...). - * @param __r Functor to "add" a single __result to the already - * processed elements (depends on functionality). - * @param __base Base value for reduction. - * @param __output Pointer to position where final result is written to - * @param __bound Maximum number of elements processed (e. g. for - * std::count_n()). - * @return User-supplied functor (that may contain a part of the result). - */ -template<typename _RAIter, - typename _Op, - typename _Fu, - typename _Red, - typename _Result> - _Op - __for_each_template_random_access_workstealing( - _RAIter __begin, _RAIter __end, _Op __op, _Fu& __f, _Red __r, - _Result __base, _Result& __output, - typename std::iterator_traits<_RAIter>::difference_type __bound) - { - _GLIBCXX_CALL(__end - __begin) - - typedef std::iterator_traits<_RAIter> _TraitsType; - typedef typename _TraitsType::difference_type _DifferenceType; - - const _Settings& __s = _Settings::get(); - - _DifferenceType __chunk_size = - static_cast<_DifferenceType>(__s.workstealing_chunk_size); - - // How many jobs? - _DifferenceType __length = (__bound < 0) ? (__end - __begin) : __bound; - - // To avoid false sharing in a cache line. - const int __stride = - __s.cache_line_size * 10 / sizeof(_Job<_DifferenceType>) + 1; - - // Total number of threads currently working. - _ThreadIndex __busy = 0; - - _Job<_DifferenceType> *__job; - - omp_lock_t __output_lock; - omp_init_lock(&__output_lock); - - // Write base value to output. - __output = __base; - - // No more threads than jobs, at least one thread. - _ThreadIndex __num_threads = - __gnu_parallel::max<_ThreadIndex>(1, - __gnu_parallel::min<_DifferenceType>(__length, __get_max_threads())); - -# pragma omp parallel shared(__busy) num_threads(__num_threads) + /** @brief One __job for a certain thread. */ + template<typename _DifferenceTp> + struct _Job + { + typedef _DifferenceTp _DifferenceType; + + /** @brief First element. + * + * Changed by owning and stealing thread. By stealing thread, + * always incremented. */ + _GLIBCXX_JOB_VOLATILE _DifferenceType _M_first; + + /** @brief Last element. + * + * Changed by owning thread only. */ + _GLIBCXX_JOB_VOLATILE _DifferenceType _M_last; + + /** @brief Number of elements, i.e. @__c _M_last-_M_first+1. + * + * Changed by owning thread only. */ + _GLIBCXX_JOB_VOLATILE _DifferenceType _M_load; + }; + + /** @brief Work stealing algorithm for random access iterators. + * + * Uses O(1) additional memory. Synchronization at job lists is + * done with atomic operations. + * @param __begin Begin iterator of element sequence. + * @param __end End iterator of element sequence. + * @param __op User-supplied functor (comparator, predicate, adding + * functor, ...). + * @param __f Functor to "process" an element with __op (depends on + * desired functionality, e. g. for std::for_each(), ...). + * @param __r Functor to "add" a single __result to the already + * processed elements (depends on functionality). + * @param __base Base value for reduction. + * @param __output Pointer to position where final result is written to + * @param __bound Maximum number of elements processed (e. g. for + * std::count_n()). + * @return User-supplied functor (that may contain a part of the result). + */ + template<typename _RAIter, + typename _Op, + typename _Fu, + typename _Red, + typename _Result> + _Op + __for_each_template_random_access_workstealing(_RAIter __begin, + _RAIter __end, _Op __op, + _Fu& __f, _Red __r, + _Result __base, + _Result& __output, + typename std::iterator_traits<_RAIter>::difference_type __bound) + { + _GLIBCXX_CALL(__end - __begin) + + typedef std::iterator_traits<_RAIter> _TraitsType; + typedef typename _TraitsType::difference_type _DifferenceType; + + const _Settings& __s = _Settings::get(); + + _DifferenceType __chunk_size = + static_cast<_DifferenceType>(__s.workstealing_chunk_size); + + // How many jobs? + _DifferenceType __length = (__bound < 0) ? (__end - __begin) : __bound; + + // To avoid false sharing in a cache line. + const int __stride = (__s.cache_line_size * 10 + / sizeof(_Job<_DifferenceType>) + 1); + + // Total number of threads currently working. + _ThreadIndex __busy = 0; + + _Job<_DifferenceType> *__job; + + omp_lock_t __output_lock; + omp_init_lock(&__output_lock); + + // Write base value to output. + __output = __base; + + // No more threads than jobs, at least one thread. + _ThreadIndex __num_threads = __gnu_parallel::max<_ThreadIndex> + (1, __gnu_parallel::min<_DifferenceType>(__length, + __get_max_threads())); + +# pragma omp parallel shared(__busy) num_threads(__num_threads) { - # pragma omp single - { - __num_threads = omp_get_num_threads(); + { + __num_threads = omp_get_num_threads(); - // Create job description array. - __job = new _Job<_DifferenceType>[__num_threads * __stride]; - } + // Create job description array. + __job = new _Job<_DifferenceType>[__num_threads * __stride]; + } - // Initialization phase. + // Initialization phase. - // Flags for every thread if it is doing productive work. - bool __iam_working = false; + // Flags for every thread if it is doing productive work. + bool __iam_working = false; - // Thread id. - _ThreadIndex __iam = omp_get_thread_num(); + // Thread id. + _ThreadIndex __iam = omp_get_thread_num(); - // This job. - _Job<_DifferenceType>& __my_job = __job[__iam * __stride]; + // This job. + _Job<_DifferenceType>& __my_job = __job[__iam * __stride]; - // Random number (for work stealing). - _ThreadIndex __victim; + // Random number (for work stealing). + _ThreadIndex __victim; - // Local value for reduction. - _Result __result = _Result(); + // Local value for reduction. + _Result __result = _Result(); - // Number of elements to steal in one attempt. - _DifferenceType __steal; + // Number of elements to steal in one attempt. + _DifferenceType __steal; - // Every thread has its own random number generator - // (modulo __num_threads). - _RandomNumber rand_gen(__iam, __num_threads); + // Every thread has its own random number generator + // (modulo __num_threads). + _RandomNumber __rand_gen(__iam, __num_threads); - // This thread is currently working. + // This thread is currently working. # pragma omp atomic - ++__busy; + ++__busy; - __iam_working = true; + __iam_working = true; - // How many jobs per thread? last thread gets the rest. - __my_job._M_first = - static_cast<_DifferenceType>(__iam * (__length / __num_threads)); + // How many jobs per thread? last thread gets the rest. + __my_job._M_first = static_cast<_DifferenceType> + (__iam * (__length / __num_threads)); - __my_job._M_last = (__iam == (__num_threads - 1)) ? - (__length - 1) : ((__iam + 1) * (__length / __num_threads) - 1); - __my_job._M_load = __my_job._M_last - __my_job._M_first + 1; + __my_job._M_last = (__iam == (__num_threads - 1) + ? (__length - 1) + : ((__iam + 1) * (__length / __num_threads) - 1)); + __my_job._M_load = __my_job._M_last - __my_job._M_first + 1; - // Init result with _M_first value (to have a base value for reduction) - if (__my_job._M_first <= __my_job._M_last) - { - // Cannot use volatile variable directly. - _DifferenceType __my_first = __my_job._M_first; - __result = __f(__op, __begin + __my_first); - ++__my_job._M_first; - --__my_job._M_load; - } + // Init result with _M_first value (to have a base value for reduction) + if (__my_job._M_first <= __my_job._M_last) + { + // Cannot use volatile variable directly. + _DifferenceType __my_first = __my_job._M_first; + __result = __f(__op, __begin + __my_first); + ++__my_job._M_first; + --__my_job._M_load; + } - _RAIter __current; + _RAIter __current; # pragma omp barrier - // Actual work phase - // Work on own or stolen current start - while (__busy > 0) - { - // Work until no productive thread left. + // Actual work phase + // Work on own or stolen current start + while (__busy > 0) + { + // Work until no productive thread left. # pragma omp flush(__busy) - // Thread has own work to do - while (__my_job._M_first <= __my_job._M_last) - { - // fetch-and-add call - // Reserve current job block (size __chunk_size) in my queue. - _DifferenceType __current_job = - __fetch_and_add<_DifferenceType>( - &(__my_job._M_first), __chunk_size); - - // Update _M_load, to make the three values consistent, - // _M_first might have been changed in the meantime - __my_job._M_load = __my_job._M_last - __my_job._M_first + 1; - for (_DifferenceType __job_counter = 0; - __job_counter < __chunk_size - && __current_job <= __my_job._M_last; - ++__job_counter) - { - // Yes: process it! - __current = __begin + __current_job; - ++__current_job; - - // Do actual work. - __result = __r(__result, __f(__op, __current)); - } + // Thread has own work to do + while (__my_job._M_first <= __my_job._M_last) + { + // fetch-and-add call + // Reserve current job block (size __chunk_size) in my queue. + _DifferenceType __current_job = + __fetch_and_add<_DifferenceType>(&(__my_job._M_first), + __chunk_size); + + // Update _M_load, to make the three values consistent, + // _M_first might have been changed in the meantime + __my_job._M_load = __my_job._M_last - __my_job._M_first + 1; + for (_DifferenceType __job_counter = 0; + __job_counter < __chunk_size + && __current_job <= __my_job._M_last; + ++__job_counter) + { + // Yes: process it! + __current = __begin + __current_job; + ++__current_job; + + // Do actual work. + __result = __r(__result, __f(__op, __current)); + } # pragma omp flush(__busy) - } + } - // After reaching this point, a thread's __job list is empty. - if (__iam_working) - { - // This thread no longer has work. + // After reaching this point, a thread's __job list is empty. + if (__iam_working) + { + // This thread no longer has work. # pragma omp atomic - --__busy; + --__busy; - __iam_working = false; - } + __iam_working = false; + } - _DifferenceType __supposed_first, __supposed_last, __supposed_load; - do - { - // Find random nonempty deque (not own), do consistency check. - __yield(); + _DifferenceType __supposed_first, __supposed_last, + __supposed_load; + do + { + // Find random nonempty deque (not own), do consistency check. + __yield(); # pragma omp flush(__busy) - __victim = rand_gen(); - __supposed_first = __job[__victim * __stride]._M_first; - __supposed_last = __job[__victim * __stride]._M_last; - __supposed_load = __job[__victim * __stride]._M_load; - } - while (__busy > 0 - && ((__supposed_load <= 0) - || ((__supposed_first + __supposed_load - 1) - != __supposed_last))); - - if (__busy == 0) - break; - - if (__supposed_load > 0) - { - // Has work and work to do. - // Number of elements to steal (at least one). - __steal = (__supposed_load < 2) ? 1 : __supposed_load / 2; - - // Push __victim's current start forward. - _DifferenceType __stolen_first = - __fetch_and_add<_DifferenceType>( - &(__job[__victim * __stride]._M_first), __steal); - _DifferenceType __stolen_try = - __stolen_first + __steal - _DifferenceType(1); - - __my_job._M_first = __stolen_first; - __my_job._M_last = - __gnu_parallel::min(__stolen_try, __supposed_last); - __my_job._M_load = __my_job._M_last - __my_job._M_first + 1; - - // Has potential work again. + __victim = __rand_gen(); + __supposed_first = __job[__victim * __stride]._M_first; + __supposed_last = __job[__victim * __stride]._M_last; + __supposed_load = __job[__victim * __stride]._M_load; + } + while (__busy > 0 + && ((__supposed_load <= 0) + || ((__supposed_first + __supposed_load - 1) + != __supposed_last))); + + if (__busy == 0) + break; + + if (__supposed_load > 0) + { + // Has work and work to do. + // Number of elements to steal (at least one). + __steal = (__supposed_load < 2) ? 1 : __supposed_load / 2; + + // Push __victim's current start forward. + _DifferenceType __stolen_first = + __fetch_and_add<_DifferenceType> + (&(__job[__victim * __stride]._M_first), __steal); + _DifferenceType __stolen_try = (__stolen_first + __steal + - _DifferenceType(1)); + + __my_job._M_first = __stolen_first; + __my_job._M_last = __gnu_parallel::min(__stolen_try, + __supposed_last); + __my_job._M_load = __my_job._M_last - __my_job._M_first + 1; + + // Has potential work again. # pragma omp atomic - ++__busy; - __iam_working = true; + ++__busy; + __iam_working = true; # pragma omp flush(__busy) - } + } # pragma omp flush(__busy) - } // end while __busy > 0 - // Add accumulated result to output. - omp_set_lock(&__output_lock); - __output = __r(__output, __result); - omp_unset_lock(&__output_lock); + } // end while __busy > 0 + // Add accumulated result to output. + omp_set_lock(&__output_lock); + __output = __r(__output, __result); + omp_unset_lock(&__output_lock); } - delete[] __job; + delete[] __job; - // Points to last element processed (needed as return value for - // some algorithms like transform) - __f._M_finish_iterator = __begin + __length; + // Points to last element processed (needed as return value for + // some algorithms like transform) + __f._M_finish_iterator = __begin + __length; - omp_destroy_lock(&__output_lock); + omp_destroy_lock(&__output_lock); - return __op; - } + return __op; + } } // end namespace #endif /* _GLIBCXX_PARALLEL_WORKSTEALING_H */ diff --git a/libstdc++-v3/include/std/ostream b/libstdc++-v3/include/std/ostream index 9fc693cb3c3..d6241ba39a2 100644 --- a/libstdc++-v3/include/std/ostream +++ b/libstdc++-v3/include/std/ostream @@ -533,8 +533,8 @@ _GLIBCXX_BEGIN_NAMESPACE(std) * * This manipulator is often mistakenly used when a simple newline is * desired, leading to poor buffering performance. See - * http://gcc.gnu.org/onlinedocs/libstdc++/27_io/howto.html#2 for more - * on this subject. + * http://gcc.gnu.org/onlinedocs/libstdc++/manual/bk01pt11ch25s02.html + * for more on this subject. */ template<typename _CharT, typename _Traits> inline basic_ostream<_CharT, _Traits>& diff --git a/libstdc++-v3/testsuite/25_algorithms/fill/5.cc b/libstdc++-v3/testsuite/25_algorithms/fill_n/1.cc index edabe1fcc29..edabe1fcc29 100644 --- a/libstdc++-v3/testsuite/25_algorithms/fill/5.cc +++ b/libstdc++-v3/testsuite/25_algorithms/fill_n/1.cc diff --git a/lto-plugin/ChangeLog b/lto-plugin/ChangeLog index 3342979bc6e..27713cad3d9 100644 --- a/lto-plugin/ChangeLog +++ b/lto-plugin/ChangeLog @@ -1,3 +1,24 @@ +2009-11-05 Rafael Avila de Espindola <espindola@google.com> + + * lto-plugin.c (temp_obj_dir_name): Remove. + (arguments_file_name): New. + (free_2): Free arguments_file_name instead of temp_obj_dir_name. + (exec_lto_wrapper): Create arguments file with make_temp_file. + (cleanup_handler): Don't remove the temporary directory. Remove the + arguments file. + (onload): Don't create the temporary directory. + +2009-11-04 Richard Guenther <rguenther@suse.de> + Rafael Avila de Espindola <espindola@google.com> + + * lto-plugin.c (plugin_file_info): Remove temp field. + (cleanup_handler): Don't delete temporary objects. + (claim_file_handler): Don't create temporary objects. + +2009-11-04 Rafael Avila de Espindola <espindola@google.com> + + * lto-plugin.c (cleanup_handler): Don't cleanup if debugging. + 2009-10-30 Rafael Avila de Espindola <espindola@google.com> PR41871 diff --git a/lto-plugin/lto-plugin.c b/lto-plugin/lto-plugin.c index c92ac06cf2d..e8e88cbb3c7 100644 --- a/lto-plugin/lto-plugin.c +++ b/lto-plugin/lto-plugin.c @@ -37,7 +37,6 @@ along with this program; see the file COPYING3. If not see #include <stdlib.h> #include <stdio.h> #include <inttypes.h> -#include <ar.h> #include <sys/stat.h> #include <unistd.h> #include <fcntl.h> @@ -70,11 +69,10 @@ struct plugin_file_info char *name; void *handle; struct plugin_symtab symtab; - unsigned char temp; }; -static char *temp_obj_dir_name; +static char *arguments_file_name; static ld_plugin_register_claim_file register_claim_file; static ld_plugin_add_symbols add_symbols; static ld_plugin_register_all_symbols_read register_all_symbols_read; @@ -293,8 +291,9 @@ free_2 (void) claimed_files = NULL; num_claimed_files = 0; - free (temp_obj_dir_name); - temp_obj_dir_name = NULL; + if (arguments_file_name) + free (arguments_file_name); + arguments_file_name = NULL; if (resolution_file) { @@ -376,7 +375,6 @@ exec_lto_wrapper (char *argv[]) int t; int status; char *at_args; - char *args_name; FILE *args; FILE *wrapper_output; char *new_argv[3]; @@ -384,11 +382,11 @@ exec_lto_wrapper (char *argv[]) const char *errmsg; /* Write argv to a file to avoid a command line that is too long. */ - t = asprintf (&at_args, "@%s/arguments", temp_obj_dir_name); - check (t >= 0, LDPL_FATAL, "asprintf failed"); + arguments_file_name = make_temp_file (""); + check (arguments_file_name, LDPL_FATAL, + "Failed to generate a temorary file name"); - args_name = at_args + 1; - args = fopen (args_name, "w"); + args = fopen (arguments_file_name, "w"); check (args, LDPL_FATAL, "could not open arguments file"); t = writeargv (&argv[1], args); @@ -396,6 +394,9 @@ exec_lto_wrapper (char *argv[]) t = fclose (args); check (t == 0, LDPL_FATAL, "could not close arguments file"); + at_args = concat ("@", arguments_file_name, NULL); + check (at_args, LDPL_FATAL, "could not allocate"); + new_argv[0] = argv[0]; new_argv[1] = at_args; new_argv[2] = NULL; @@ -428,8 +429,6 @@ exec_lto_wrapper (char *argv[]) pex_free (pex); - t = unlink (args_name); - check (t == 0, LDPL_FATAL, "could not unlink arguments file"); free (at_args); } @@ -513,33 +512,15 @@ static enum ld_plugin_status cleanup_handler (void) { int t; - unsigned i; - char *arguments; - struct stat buf; - for (i = 0; i < num_claimed_files; i++) - { - struct plugin_file_info *info = &claimed_files[i]; - if (info->temp) - { - t = unlink (info->name); - check (t == 0, LDPL_FATAL, "could not unlink temporary file"); - } - } + if (debug) + return LDPS_OK; - /* If we are being called from an error handler, it is possible - that the arguments file is still exists. */ - t = asprintf (&arguments, "%s/arguments", temp_obj_dir_name); - check (t >= 0, LDPL_FATAL, "asprintf failed"); - if (stat(arguments, &buf) == 0) + if (arguments_file_name) { - t = unlink (arguments); + t = unlink (arguments_file_name); check (t == 0, LDPL_FATAL, "could not unlink arguments file"); } - free (arguments); - - t = rmdir (temp_obj_dir_name); - check (t == 0, LDPL_FATAL, "could not remove temporary directory"); free_2 (); return LDPS_OK; @@ -555,49 +536,39 @@ claim_file_handler (const struct ld_plugin_input_file *file, int *claimed) Elf *elf; struct plugin_file_info lto_file; Elf_Data *symtab; - int lto_file_fd; if (file->offset != 0) { - /* FIXME lto: lto1 should know how to handle archives. */ - int fd; - off_t size = file->filesize; - off_t offset; - - static int objnum = 0; char *objname; - int t = asprintf (&objname, "%s/obj%d.o", - temp_obj_dir_name, objnum); + Elf *archive; + off_t offset; + /* We pass the offset of the actual file, not the archive header. */ + int t = asprintf (&objname, "%s@%" PRId64, file->name, + (int64_t) file->offset); check (t >= 0, LDPL_FATAL, "asprintf failed"); - objnum++; - - fd = open (objname, O_RDWR | O_CREAT, 0666); - check (fd > 0, LDPL_FATAL, "could not open/create temporary file"); - offset = lseek (file->fd, file->offset, SEEK_SET); - check (offset == file->offset, LDPL_FATAL, "could not seek"); - while (size > 0) - { - ssize_t r, written; - char buf[1000]; - off_t s = sizeof (buf) < size ? sizeof (buf) : size; - r = read (file->fd, buf, s); - written = write (fd, buf, r); - check (written == r, LDPL_FATAL, "could not write to temporary file"); - size -= r; - } lto_file.name = objname; - lto_file_fd = fd; - lto_file.handle = file->handle; - lto_file.temp = 1; + + archive = elf_begin (file->fd, ELF_C_READ, NULL); + check (elf_kind (archive) == ELF_K_AR, LDPL_FATAL, + "Not an archive and offset not 0"); + + /* elf_rand expects the offset to point to the ar header, not the + object itself. Subtract the size of the ar header (60 bytes). + We don't uses sizeof (struct ar_hd) to avoid including ar.h */ + + offset = file->offset - 60; + check (offset == elf_rand (archive, offset), LDPL_FATAL, + "could not seek in archive"); + elf = elf_begin (file->fd, ELF_C_READ, archive); + check (elf != NULL, LDPL_FATAL, "could not find archive member"); + elf_end (archive); } else { lto_file.name = strdup (file->name); - lto_file_fd = file->fd; - lto_file.handle = file->handle; - lto_file.temp = 0; + elf = elf_begin (file->fd, ELF_C_READ, NULL); } - elf = elf_begin (lto_file_fd, ELF_C_READ, NULL); + lto_file.handle = file->handle; *claimed = 0; @@ -624,20 +595,12 @@ claim_file_handler (const struct ld_plugin_input_file *file, int *claimed) goto cleanup; err: - if (file->offset != 0) - { - int t = unlink (lto_file.name); - check (t == 0, LDPL_FATAL, "could not unlink file"); - } free (lto_file.name); cleanup: if (elf) elf_end (elf); - if (file->offset != 0) - close (lto_file_fd); - return LDPS_OK; } @@ -679,7 +642,6 @@ onload (struct ld_plugin_tv *tv) { struct ld_plugin_tv *p; enum ld_plugin_status status; - char *t; unsigned version = elf_version (EV_CURRENT); check (version != EV_NONE, LDPL_FATAL, "invalid ELF version"); @@ -743,8 +705,5 @@ onload (struct ld_plugin_tv *tv) "could not register the all_symbols_read callback"); } - temp_obj_dir_name = strdup ("tmp_objectsXXXXXX"); - t = mkdtemp (temp_obj_dir_name); - assert (t == temp_obj_dir_name); return LDPS_OK; } |