diff options
Diffstat (limited to 'openmp/runtime/src/kmp_collapse.h')
-rw-r--r-- | openmp/runtime/src/kmp_collapse.h | 240 |
1 files changed, 240 insertions, 0 deletions
diff --git a/openmp/runtime/src/kmp_collapse.h b/openmp/runtime/src/kmp_collapse.h new file mode 100644 index 000000000000..e4870185645d --- /dev/null +++ b/openmp/runtime/src/kmp_collapse.h @@ -0,0 +1,240 @@ +/* + * kmp_collapse.h -- header for loop collapse feature + */ + +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef KMP_COLLAPSE_H +#define KMP_COLLAPSE_H + +#include <type_traits> + +// Type of the index into the loop nest structures +// (with values from 0 to less than n from collapse(n)) +typedef kmp_int32 kmp_index_t; + +// Type for combined loop nest space IV: +typedef kmp_uint64 kmp_loop_nest_iv_t; + +// Loop has <, <=, etc. as a comparison: +enum comparison_t : kmp_int32 { + comp_less_or_eq = 0, + comp_greater_or_eq = 1, + comp_not_eq = 2, + comp_less = 3, + comp_greater = 4 +}; + +// Type of loop IV. +// Type of bounds and step, after usual promotions +// are a subset of these types (32 & 64 only): +enum loop_type_t : kmp_int32 { + loop_type_uint8 = 0, + loop_type_int8 = 1, + loop_type_uint16 = 2, + loop_type_int16 = 3, + loop_type_uint32 = 4, + loop_type_int32 = 5, + loop_type_uint64 = 6, + loop_type_int64 = 7 +}; + +/*! + @ingroup WORK_SHARING + * Describes the structure for rectangular nested loops. + */ +template <typename T> struct bounds_infoXX_template { + + // typedef typename traits_t<T>::unsigned_t UT; + typedef typename traits_t<T>::signed_t ST; + + loop_type_t loop_type; // The differentiator + loop_type_t loop_iv_type; + comparison_t comparison; + // outer_iv should be 0 (or any other less then number of dimentions) + // if loop doesn't depend on it (lb1 and ub1 will be 0). + // This way we can do multiplication without a check. + kmp_index_t outer_iv; + + // unions to keep the size constant: + union { + T lb0; + kmp_uint64 lb0_u64; // real type can be signed + }; + + union { + T lb1; + kmp_uint64 lb1_u64; // real type can be signed + }; + + union { + T ub0; + kmp_uint64 ub0_u64; // real type can be signed + }; + + union { + T ub1; + kmp_uint64 ub1_u64; // real type can be signed + }; + + union { + ST step; // signed even if bounds type is unsigned + kmp_int64 step_64; // signed + }; + + kmp_loop_nest_iv_t trip_count; +}; + +/*! + @ingroup WORK_SHARING + * Interface struct for rectangular nested loops. + * Same size as bounds_infoXX_template. + */ +struct bounds_info_t { + + loop_type_t loop_type; // The differentiator + loop_type_t loop_iv_type; + comparison_t comparison; + // outer_iv should be 0 (or any other less then number of dimentions) + // if loop doesn't depend on it (lb1 and ub1 will be 0). + // This way we can do multiplication without a check. + kmp_index_t outer_iv; + + kmp_uint64 lb0_u64; // real type can be signed + kmp_uint64 lb1_u64; // real type can be signed + kmp_uint64 ub0_u64; // real type can be signed + kmp_uint64 ub1_u64; // real type can be signed + kmp_int64 step_64; // signed + + // This is internal, but it's the only internal thing we need + // in rectangular case, so let's expose it here: + kmp_loop_nest_iv_t trip_count; +}; + +//------------------------------------------------------------------------- +// Additional types for internal representation: + +// Array for a point in the loop space, in the original space. +// It's represented in kmp_uint64, but each dimention is calculated in +// that loop IV type. Also dimentions have to be converted to those types +// when used in generated code. +typedef kmp_uint64* kmp_point_t; + +// Array: Number of loop iterations on each nesting level to achieve some point, +// in expanded space or in original space. +// OMPTODO: move from using iterations to using offsets (iterations multiplied +// by steps). For those we need to be careful with the types, as step can be +// negative, but it'll remove multiplications and divisions in several places. +typedef kmp_loop_nest_iv_t* kmp_iterations_t; + +// Internal struct with additional info: +template <typename T> struct bounds_info_internalXX_template { + + // OMPTODO: should span have type T or should it better be + // kmp_uint64/kmp_int64 depending on T sign? (if kmp_uint64/kmp_int64 than + // updated bounds should probably also be kmp_uint64/kmp_int64). I'd like to + // use big_span_t, if it can be resolved at compile time. + typedef + typename std::conditional<std::is_signed<T>::value, kmp_int64, kmp_uint64> + big_span_t; + + // typedef typename big_span_t span_t; + typedef T span_t; + + bounds_infoXX_template<T> b; // possibly adjusted bounds + + // Leaving this as a union in case we'll switch to span_t with different sizes + // (depending on T) + union { + // Smallest possible value of iv (may be smaller than actually possible) + span_t span_smallest; + kmp_uint64 span_smallest_u64; + }; + + // Leaving this as a union in case we'll switch to span_t with different sizes + // (depending on T) + union { + // Biggest possible value of iv (may be bigger than actually possible) + span_t span_biggest; + kmp_uint64 span_biggest_u64; + }; + + // Did we adjust loop bounds (not counting canonicalization)? + bool loop_bounds_adjusted; +}; + +// Internal struct with additional info: +struct bounds_info_internal_t { + + bounds_info_t b; // possibly adjusted bounds + + // Smallest possible value of iv (may be smaller than actually possible) + kmp_uint64 span_smallest_u64; + + // Biggest possible value of iv (may be bigger than actually possible) + kmp_uint64 span_biggest_u64; + + // Did we adjust loop bounds (not counting canonicalization)? + bool loop_bounds_adjusted; +}; + +//----------APIs for rectangular loop nests-------------------------------- + +// Canonicalize loop nest and calculate overall trip count. +// "bounds_nest" has to be allocated per thread. +// API will modify original bounds_nest array to bring it to a canonical form +// (only <= and >=, no !=, <, >). If the original loop nest was already in a +// canonical form there will be no changes to bounds in bounds_nest array +// (only trip counts will be calculated). +// Returns trip count of overall space. +extern "C" kmp_loop_nest_iv_t +__kmpc_process_loop_nest_rectang(ident_t *loc, kmp_int32 gtid, + /*in/out*/ bounds_info_t *original_bounds_nest, + kmp_index_t n); + +// Calculate old induction variables corresponding to overall new_iv. +// Note: original IV will be returned as if it had kmp_uint64 type, +// will have to be converted to original type in user code. +// Note: trip counts should be already calculated by +// __kmpc_process_loop_nest_rectang. +// OMPTODO: special case 2, 3 nested loops - if it'll be possible to inline +// that into user code. +extern "C" void +__kmpc_calc_original_ivs_rectang(ident_t *loc, kmp_loop_nest_iv_t new_iv, + const bounds_info_t *original_bounds_nest, + /*out*/ kmp_uint64 *original_ivs, + kmp_index_t n); + +//----------Init API for non-rectangular loops-------------------------------- + +// Init API for collapsed loops (static, no chunks defined). +// "bounds_nest" has to be allocated per thread. +// API will modify original bounds_nest array to bring it to a canonical form +// (only <= and >=, no !=, <, >). If the original loop nest was already in a +// canonical form there will be no changes to bounds in bounds_nest array +// (only trip counts will be calculated). Internally API will expand the space +// to parallelogram/parallelepiped, calculate total, calculate bounds for the +// chunks in terms of the new IV, re-calc them in terms of old IVs (especially +// important on the left side, to hit the lower bounds and not step over), and +// pick the correct chunk for this thread (so it will calculate chunks up to the +// needed one). It could be optimized to calculate just this chunk, potentially +// a bit less well distributed among threads. It is designed to make sure that +// threads will receive predictable chunks, deterministically (so that next nest +// of loops with similar characteristics will get exactly same chunks on same +// threads). +// Current contract: chunk_bounds_nest has only lb0 and ub0, +// lb1 and ub1 are set to 0 and can be ignored. (This may change in the future). +extern "C" kmp_int32 +__kmpc_for_collapsed_init(ident_t *loc, kmp_int32 gtid, + /*in/out*/ bounds_info_t *original_bounds_nest, + /*out*/ bounds_info_t *chunk_bounds_nest, + kmp_index_t n, + /*out*/ kmp_int32 *plastiter); + +#endif // KMP_COLLAPSE_H |