summaryrefslogtreecommitdiff
path: root/openmp/runtime/src/kmp_collapse.h
diff options
context:
space:
mode:
Diffstat (limited to 'openmp/runtime/src/kmp_collapse.h')
-rw-r--r--openmp/runtime/src/kmp_collapse.h240
1 files changed, 240 insertions, 0 deletions
diff --git a/openmp/runtime/src/kmp_collapse.h b/openmp/runtime/src/kmp_collapse.h
new file mode 100644
index 000000000000..e4870185645d
--- /dev/null
+++ b/openmp/runtime/src/kmp_collapse.h
@@ -0,0 +1,240 @@
+/*
+ * kmp_collapse.h -- header for loop collapse feature
+ */
+
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef KMP_COLLAPSE_H
+#define KMP_COLLAPSE_H
+
+#include <type_traits>
+
+// Type of the index into the loop nest structures
+// (with values from 0 to less than n from collapse(n))
+typedef kmp_int32 kmp_index_t;
+
+// Type for combined loop nest space IV:
+typedef kmp_uint64 kmp_loop_nest_iv_t;
+
+// Loop has <, <=, etc. as a comparison:
+enum comparison_t : kmp_int32 {
+ comp_less_or_eq = 0,
+ comp_greater_or_eq = 1,
+ comp_not_eq = 2,
+ comp_less = 3,
+ comp_greater = 4
+};
+
+// Type of loop IV.
+// Type of bounds and step, after usual promotions
+// are a subset of these types (32 & 64 only):
+enum loop_type_t : kmp_int32 {
+ loop_type_uint8 = 0,
+ loop_type_int8 = 1,
+ loop_type_uint16 = 2,
+ loop_type_int16 = 3,
+ loop_type_uint32 = 4,
+ loop_type_int32 = 5,
+ loop_type_uint64 = 6,
+ loop_type_int64 = 7
+};
+
+/*!
+ @ingroup WORK_SHARING
+ * Describes the structure for rectangular nested loops.
+ */
+template <typename T> struct bounds_infoXX_template {
+
+ // typedef typename traits_t<T>::unsigned_t UT;
+ typedef typename traits_t<T>::signed_t ST;
+
+ loop_type_t loop_type; // The differentiator
+ loop_type_t loop_iv_type;
+ comparison_t comparison;
+ // outer_iv should be 0 (or any other less then number of dimentions)
+ // if loop doesn't depend on it (lb1 and ub1 will be 0).
+ // This way we can do multiplication without a check.
+ kmp_index_t outer_iv;
+
+ // unions to keep the size constant:
+ union {
+ T lb0;
+ kmp_uint64 lb0_u64; // real type can be signed
+ };
+
+ union {
+ T lb1;
+ kmp_uint64 lb1_u64; // real type can be signed
+ };
+
+ union {
+ T ub0;
+ kmp_uint64 ub0_u64; // real type can be signed
+ };
+
+ union {
+ T ub1;
+ kmp_uint64 ub1_u64; // real type can be signed
+ };
+
+ union {
+ ST step; // signed even if bounds type is unsigned
+ kmp_int64 step_64; // signed
+ };
+
+ kmp_loop_nest_iv_t trip_count;
+};
+
+/*!
+ @ingroup WORK_SHARING
+ * Interface struct for rectangular nested loops.
+ * Same size as bounds_infoXX_template.
+ */
+struct bounds_info_t {
+
+ loop_type_t loop_type; // The differentiator
+ loop_type_t loop_iv_type;
+ comparison_t comparison;
+ // outer_iv should be 0 (or any other less then number of dimentions)
+ // if loop doesn't depend on it (lb1 and ub1 will be 0).
+ // This way we can do multiplication without a check.
+ kmp_index_t outer_iv;
+
+ kmp_uint64 lb0_u64; // real type can be signed
+ kmp_uint64 lb1_u64; // real type can be signed
+ kmp_uint64 ub0_u64; // real type can be signed
+ kmp_uint64 ub1_u64; // real type can be signed
+ kmp_int64 step_64; // signed
+
+ // This is internal, but it's the only internal thing we need
+ // in rectangular case, so let's expose it here:
+ kmp_loop_nest_iv_t trip_count;
+};
+
+//-------------------------------------------------------------------------
+// Additional types for internal representation:
+
+// Array for a point in the loop space, in the original space.
+// It's represented in kmp_uint64, but each dimention is calculated in
+// that loop IV type. Also dimentions have to be converted to those types
+// when used in generated code.
+typedef kmp_uint64* kmp_point_t;
+
+// Array: Number of loop iterations on each nesting level to achieve some point,
+// in expanded space or in original space.
+// OMPTODO: move from using iterations to using offsets (iterations multiplied
+// by steps). For those we need to be careful with the types, as step can be
+// negative, but it'll remove multiplications and divisions in several places.
+typedef kmp_loop_nest_iv_t* kmp_iterations_t;
+
+// Internal struct with additional info:
+template <typename T> struct bounds_info_internalXX_template {
+
+ // OMPTODO: should span have type T or should it better be
+ // kmp_uint64/kmp_int64 depending on T sign? (if kmp_uint64/kmp_int64 than
+ // updated bounds should probably also be kmp_uint64/kmp_int64). I'd like to
+ // use big_span_t, if it can be resolved at compile time.
+ typedef
+ typename std::conditional<std::is_signed<T>::value, kmp_int64, kmp_uint64>
+ big_span_t;
+
+ // typedef typename big_span_t span_t;
+ typedef T span_t;
+
+ bounds_infoXX_template<T> b; // possibly adjusted bounds
+
+ // Leaving this as a union in case we'll switch to span_t with different sizes
+ // (depending on T)
+ union {
+ // Smallest possible value of iv (may be smaller than actually possible)
+ span_t span_smallest;
+ kmp_uint64 span_smallest_u64;
+ };
+
+ // Leaving this as a union in case we'll switch to span_t with different sizes
+ // (depending on T)
+ union {
+ // Biggest possible value of iv (may be bigger than actually possible)
+ span_t span_biggest;
+ kmp_uint64 span_biggest_u64;
+ };
+
+ // Did we adjust loop bounds (not counting canonicalization)?
+ bool loop_bounds_adjusted;
+};
+
+// Internal struct with additional info:
+struct bounds_info_internal_t {
+
+ bounds_info_t b; // possibly adjusted bounds
+
+ // Smallest possible value of iv (may be smaller than actually possible)
+ kmp_uint64 span_smallest_u64;
+
+ // Biggest possible value of iv (may be bigger than actually possible)
+ kmp_uint64 span_biggest_u64;
+
+ // Did we adjust loop bounds (not counting canonicalization)?
+ bool loop_bounds_adjusted;
+};
+
+//----------APIs for rectangular loop nests--------------------------------
+
+// Canonicalize loop nest and calculate overall trip count.
+// "bounds_nest" has to be allocated per thread.
+// API will modify original bounds_nest array to bring it to a canonical form
+// (only <= and >=, no !=, <, >). If the original loop nest was already in a
+// canonical form there will be no changes to bounds in bounds_nest array
+// (only trip counts will be calculated).
+// Returns trip count of overall space.
+extern "C" kmp_loop_nest_iv_t
+__kmpc_process_loop_nest_rectang(ident_t *loc, kmp_int32 gtid,
+ /*in/out*/ bounds_info_t *original_bounds_nest,
+ kmp_index_t n);
+
+// Calculate old induction variables corresponding to overall new_iv.
+// Note: original IV will be returned as if it had kmp_uint64 type,
+// will have to be converted to original type in user code.
+// Note: trip counts should be already calculated by
+// __kmpc_process_loop_nest_rectang.
+// OMPTODO: special case 2, 3 nested loops - if it'll be possible to inline
+// that into user code.
+extern "C" void
+__kmpc_calc_original_ivs_rectang(ident_t *loc, kmp_loop_nest_iv_t new_iv,
+ const bounds_info_t *original_bounds_nest,
+ /*out*/ kmp_uint64 *original_ivs,
+ kmp_index_t n);
+
+//----------Init API for non-rectangular loops--------------------------------
+
+// Init API for collapsed loops (static, no chunks defined).
+// "bounds_nest" has to be allocated per thread.
+// API will modify original bounds_nest array to bring it to a canonical form
+// (only <= and >=, no !=, <, >). If the original loop nest was already in a
+// canonical form there will be no changes to bounds in bounds_nest array
+// (only trip counts will be calculated). Internally API will expand the space
+// to parallelogram/parallelepiped, calculate total, calculate bounds for the
+// chunks in terms of the new IV, re-calc them in terms of old IVs (especially
+// important on the left side, to hit the lower bounds and not step over), and
+// pick the correct chunk for this thread (so it will calculate chunks up to the
+// needed one). It could be optimized to calculate just this chunk, potentially
+// a bit less well distributed among threads. It is designed to make sure that
+// threads will receive predictable chunks, deterministically (so that next nest
+// of loops with similar characteristics will get exactly same chunks on same
+// threads).
+// Current contract: chunk_bounds_nest has only lb0 and ub0,
+// lb1 and ub1 are set to 0 and can be ignored. (This may change in the future).
+extern "C" kmp_int32
+__kmpc_for_collapsed_init(ident_t *loc, kmp_int32 gtid,
+ /*in/out*/ bounds_info_t *original_bounds_nest,
+ /*out*/ bounds_info_t *chunk_bounds_nest,
+ kmp_index_t n,
+ /*out*/ kmp_int32 *plastiter);
+
+#endif // KMP_COLLAPSE_H