summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorpaveldyakov <70019067+paveldyakov@users.noreply.github.com>2021-03-03 10:10:38 +0300
committerGitHub <noreply@github.com>2021-03-03 10:10:38 +0300
commit39a58022a0eb67a8a8b63fdd46d9bfc3acaa9425 (patch)
treef045494b1f7ae1d84022fbb8173e0572033aeb27
parent2e03e11e8d481a2d75fe924acbf16925642f303c (diff)
downloadllvm-39a58022a0eb67a8a8b63fdd46d9bfc3acaa9425.tar.gz
[RNG] Add optimizations (#138)
[RNG] Add optimizations
-rw-r--r--include/oneapi/dpl/internal/random_impl/discard_block_engine.h60
-rw-r--r--include/oneapi/dpl/internal/random_impl/linear_congruential_engine.h40
-rw-r--r--include/oneapi/dpl/internal/random_impl/normal_distribution.h128
-rw-r--r--include/oneapi/dpl/internal/random_impl/subtract_with_carry_engine.h29
-rw-r--r--include/oneapi/dpl/internal/random_impl/uniform_int_distribution.h32
-rw-r--r--include/oneapi/dpl/internal/random_impl/uniform_real_distribution.h61
-rw-r--r--test/rng_testsuite/statistics_tests/normal_distribution_test.pass.cpp96
7 files changed, 330 insertions, 116 deletions
diff --git a/include/oneapi/dpl/internal/random_impl/discard_block_engine.h b/include/oneapi/dpl/internal/random_impl/discard_block_engine.h
index eb891a71e5e6..c1b5ba8667ca 100644
--- a/include/oneapi/dpl/internal/random_impl/discard_block_engine.h
+++ b/include/oneapi/dpl/internal/random_impl/discard_block_engine.h
@@ -82,25 +82,42 @@ class discard_block_engine
if (!__num_to_skip)
return;
- for (; __num_to_skip > 0; --__num_to_skip)
- generate_internal_scalar<internal::type_traits_t<result_type>::num_elems>();
+ if (__num_to_skip < (used_block - n_))
+ {
+ n_ += __num_to_skip;
+ engine_.discard(__num_to_skip);
+ }
+ else
+ {
+ unsigned long long __n_skip =
+ __num_to_skip + static_cast<unsigned long long>((__num_to_skip + n_) / used_block) *
+ static_cast<unsigned long long>(block_size - used_block);
+ // Check the oveflow case
+ if (__n_skip >= __num_to_skip)
+ {
+ n_ = (__num_to_skip - (used_block - n_)) % used_block;
+ engine_.discard(__n_skip);
+ }
+ else
+ {
+ for (; __num_to_skip > 0; --__num_to_skip)
+ operator()();
+ }
+ }
}
// operator () returns bits of engine recurrence
result_type
operator()()
{
- result_type res = generate_internal<internal::type_traits_t<result_type>::num_elems>();
-
- return res;
+ return generate_internal<internal::type_traits_t<result_type>::num_elems>();
}
// operator () overload for result portion generation
result_type
- operator()(unsigned int __randoms_num)
+ operator()(unsigned int __random_nums)
{
- result_type res = generate_internal<internal::type_traits_t<result_type>::num_elems>(__randoms_num);
- return res;
+ return generate_internal<internal::type_traits_t<result_type>::num_elems>(__random_nums);
}
// Property function
@@ -154,25 +171,36 @@ class discard_block_engine
generate_internal()
{
result_type __res;
- for (int __i = 0; __i < _N; ++__i)
+ if (_N < (used_block - n_))
{
- __res[__i] = generate_internal_scalar<internal::type_traits_t<result_type>::num_elems>();
+ __res = engine_();
+ n_ += _N;
+ }
+ else
+ {
+ for (int __i = 0; __i < _N; ++__i)
+ {
+ __res[__i] = generate_internal_scalar<internal::type_traits_t<result_type>::num_elems>();
+ }
}
-
return __res;
}
template <int _N>
typename ::std::enable_if<(_N > 0), result_type>::type
- generate_internal(unsigned int __randoms_num)
+ generate_internal(unsigned int __random_nums)
{
- result_type __res;
- for (unsigned int __i = 0; __i < __randoms_num; ++__i)
+ if (__random_nums >= _N)
+ return operator()();
+
+ result_type __part_vec;
+
+ for (unsigned int __i = 0; __i < __random_nums; ++__i)
{
- __res[__i] = generate_internal_scalar<internal::type_traits_t<result_type>::num_elems>();
+ __part_vec[__i] = generate_internal_scalar<internal::type_traits_t<result_type>::num_elems>();
}
- return __res;
+ return __part_vec;
}
_Engine engine_;
diff --git a/include/oneapi/dpl/internal/random_impl/linear_congruential_engine.h b/include/oneapi/dpl/internal/random_impl/linear_congruential_engine.h
index 96efa8ea94da..d5a7d418fda0 100644
--- a/include/oneapi/dpl/internal/random_impl/linear_congruential_engine.h
+++ b/include/oneapi/dpl/internal/random_impl/linear_congruential_engine.h
@@ -75,9 +75,9 @@ class linear_congruential_engine
// Skipping sequence
if (__num_to_skip == 0)
return;
- constexpr bool flag = (increment == 0) && (modulus < ::std::numeric_limits<::std::uint32_t>::max()) &&
- (multiplier < ::std::numeric_limits<::std::uint32_t>::max());
- skip_seq<internal::type_traits_t<result_type>::num_elems, flag>(__num_to_skip);
+ constexpr bool __flag = (increment == 0) && (modulus < ::std::numeric_limits<::std::uint32_t>::max()) &&
+ (multiplier < ::std::numeric_limits<::std::uint32_t>::max());
+ skip_seq<internal::type_traits_t<result_type>::num_elems, __flag>(__num_to_skip);
}
// operator () returns bits of engine recurrence
@@ -94,9 +94,9 @@ class linear_congruential_engine
// operator () overload for result portion generation
result_type
- operator()(unsigned int __randoms_num)
+ operator()(unsigned int __random_nums)
{
- return result_portion_internal<internal::type_traits_t<result_type>::num_elems>(__randoms_num);
+ return result_portion_internal<internal::type_traits_t<result_type>::num_elems>(__random_nums);
}
private:
@@ -143,7 +143,7 @@ class linear_congruential_engine
state_[0] = mod_scalar(state_[0]);
- for (int __i = 1u; __i < _N; __i++)
+ for (int __i = 1u; __i < _N; ++__i)
state_[__i] = mod_scalar(state_[__i - 1u]);
}
@@ -154,9 +154,7 @@ class linear_congruential_engine
::std::uint64_t __a2;
::std::uint64_t __mod = static_cast<::std::uint64_t>(modulus);
::std::uint64_t __a = static_cast<::std::uint64_t>(multiplier);
- scalar_type __r;
-
- __r = 1;
+ scalar_type __r = 1;
do
{
@@ -182,7 +180,7 @@ class linear_congruential_engine
typename ::std::enable_if<(_N == 0) && (_FLAG == false)>::type
skip_seq(unsigned long long __num_to_skip)
{
- for (unsigned long long __i = 0; __i < __num_to_skip; ++__i)
+ for (; __num_to_skip > 0; --__num_to_skip)
state_ = mod_scalar(state_);
}
@@ -190,7 +188,7 @@ class linear_congruential_engine
typename ::std::enable_if<(_N == 1) && (_FLAG == false)>::type
skip_seq(unsigned long long __num_to_skip)
{
- for (unsigned long long __i = 0; __i < __num_to_skip; ++__i)
+ for (; __num_to_skip > 0; --__num_to_skip)
state_[0] = mod_scalar(state_[0]);
}
@@ -198,11 +196,11 @@ class linear_congruential_engine
typename ::std::enable_if<(_N > 1) && (_FLAG == false)>::type
skip_seq(unsigned long long __num_to_skip)
{
- for (unsigned long long __i = 0; __i < __num_to_skip; ++__i)
+ for (; __num_to_skip > 0; --__num_to_skip)
{
- for (int __j = 0; __j < (_N - 1); ++__j)
+ for (int __i = 0; __i < (_N - 1); ++__i)
{
- state_[__j] = state_[__j + 1];
+ state_[__i] = state_[__i + 1];
}
state_[_N - 1] = mod_scalar(state_[_N - 2]);
}
@@ -232,24 +230,24 @@ class linear_congruential_engine
{
::std::uint64_t __mod = modulus, __inc = increment;
::std::uint64_t __mult = pow_mult_n(__num_to_skip);
- for (unsigned int __i = 0; __i < _N; ++__i)
+ for (int __i = 0; __i < _N; ++__i)
state_[__i] = static_cast<scalar_type>(((__mult * static_cast<::std::uint64_t>(state_[__i])) % __mod));
}
// result_portion implementation
template <int _N>
typename ::std::enable_if<(_N > 0), result_type>::type
- result_portion_internal(unsigned int __randoms_num)
+ result_portion_internal(unsigned int __random_nums)
{
result_type __part_vec;
- if (__randoms_num < 1)
- return __part_vec;
- unsigned int __num_to_gen = (__randoms_num > _N) ? _N : __randoms_num;
- for (unsigned int __i = 0; __i < __num_to_gen; ++__i)
+ if (__random_nums >= _N)
+ return operator()();
+
+ for (unsigned int __i = 0; __i < __random_nums; ++__i)
__part_vec[__i] = state_[__i];
- discard(__num_to_gen);
+ discard(__random_nums);
return __part_vec;
}
diff --git a/include/oneapi/dpl/internal/random_impl/normal_distribution.h b/include/oneapi/dpl/internal/random_impl/normal_distribution.h
index 60ef897cfc4c..08615a6c3b95 100644
--- a/include/oneapi/dpl/internal/random_impl/normal_distribution.h
+++ b/include/oneapi/dpl/internal/random_impl/normal_distribution.h
@@ -118,23 +118,16 @@ class normal_distribution
template <class _Engine>
result_type
- operator()(_Engine& __engine, unsigned int __randoms_num)
+ operator()(_Engine& __engine, unsigned int __random_nums)
{
- return operator()<_Engine>(__engine, param_type(mean_, stddev_), __randoms_num);
+ return operator()<_Engine>(__engine, param_type(mean_, stddev_), __random_nums);
}
template <class _Engine>
result_type
- operator()(_Engine& __engine, const param_type& __params, unsigned int __randoms_num)
+ operator()(_Engine& __engine, const param_type& __params, unsigned int __random_nums)
{
- result_type __part_vec;
- if (__randoms_num < 1)
- return __part_vec;
-
- int __portion = (__randoms_num > size_of_type_) ? size_of_type_ : __randoms_num;
-
- __part_vec = result_portion_internal<size_of_type_, _Engine>(__engine, __params, __portion);
- return __part_vec;
+ return result_portion_internal<size_of_type_, _Engine>(__engine, __params, __random_nums);
}
private:
@@ -161,14 +154,14 @@ class normal_distribution
// Callback function
template <typename _Type = float>
- scalar_type
+ inline scalar_type
callback()
{
return ((scalar_type*)(internal::gaussian_sp_table))[1];
}
template <>
- scalar_type
+ inline scalar_type
callback<double>()
{
return ((scalar_type*)(internal::gaussian_dp_table))[1];
@@ -176,14 +169,14 @@ class normal_distribution
// Get 2 * pi function
template <typename _Type = float>
- scalar_type
+ inline scalar_type
pi2()
{
return ((scalar_type*)(internal::gaussian_sp_table))[0];
}
template <>
- scalar_type
+ inline scalar_type
pi2<double>()
{
return ((scalar_type*)(internal::gaussian_dp_table))[0];
@@ -194,9 +187,10 @@ class normal_distribution
typename ::std::enable_if<(_Ndistr != 0), result_type>::type
generate(_Engine& __engine, const param_type __params)
{
- return generate_vec_internal(__engine, __params, _Ndistr);
+ return generate_vec<_Ndistr, _Engine>(__engine, __params);
}
+ // Specialization of the scalar generation
template <int _Ndistr, class _Engine>
typename ::std::enable_if<(_Ndistr == 0), result_type>::type
generate(_Engine& __engine, const param_type __params)
@@ -232,14 +226,86 @@ class normal_distribution
return __res;
}
- // Implementation for the generate vector function
+ // Specialization of the vector generation with size = [1; 3]
+ template <int __N, class _Engine>
+ typename ::std::enable_if<(__N <= 3), result_type>::type
+ generate_vec(_Engine& __engine, const param_type __params)
+ {
+ return generate_n_elems<_Engine>(__engine, __params, __N);
+ }
+
+ // Specialization of the vector generation with size = [4; 8; 16]
+ template <int __N, class _Engine>
+ typename ::std::enable_if<(__N > 3), result_type>::type
+ generate_vec(_Engine& __engine, const param_type __params)
+ {
+ uniform_result_type __u;
+ scalar_type __mean = __params.first, __stddev = __params.second;
+ result_type __res;
+
+ constexpr unsigned int __vec_size = __N / 2;
+ sycl::vec<scalar_type, __vec_size> __sin, __cos;
+ sycl::vec<scalar_type, __vec_size> __u1_transformed;
+
+ __u = uniform_real_distribution_(__engine,
+ param_type(static_cast<scalar_type>(0.0), static_cast<scalar_type>(1.0)), __N);
+
+ sycl::vec<scalar_type, __vec_size> __u1 = __u.even();
+ sycl::vec<scalar_type, __vec_size> __u2 = __u.odd();
+
+ // Calculate sycl::log with callback
+ __u1_transformed =
+ select(sycl::log(__u1), sycl::vec<scalar_type, __vec_size>{callback<scalar_type>()},
+ sycl::isequal(__u1, sycl::vec<scalar_type, __vec_size>{static_cast<scalar_type>(0.0)}));
+
+ // Get sincos
+ __sin = sycl::sincos(pi2<scalar_type>() * __u2, &__cos);
+
+ if (!flag_)
+ {
+ __u1_transformed = sycl::sqrt(static_cast<scalar_type>(-2.0) * __u1_transformed);
+ __res.even() = __u1_transformed * __sin * __stddev + __mean;
+ __res.odd() = __u1_transformed * __cos * __stddev + __mean;
+
+ // Flag is still false as code-branch for 4/8/16 vector sizes
+ }
+ else
+ {
+ __res[0] = __mean + __stddev * (sycl::sqrt(-static_cast<scalar_type>(2.0) * saved_ln_) *
+ sycl::cos(pi2<scalar_type>() * saved_u2_));
+
+ for (int __i = 0, __j = 0; __i < __N; __i += 2, ++__j)
+ {
+ __res[__i + 1] =
+ (sycl::sqrt(static_cast<scalar_type>(-2.0) * __u1_transformed[__j]) * __sin[__j]) * __stddev +
+ __mean;
+ __res[__i + 2] =
+ (sycl::sqrt(static_cast<scalar_type>(-2.0) * __u1_transformed[__j]) * __cos[__j]) * __stddev +
+ __mean;
+ }
+
+ __res[__N - 1] = (sycl::sqrt(static_cast<scalar_type>(-2.0) * __u1_transformed[__vec_size - 1]) *
+ __sin[__vec_size - 1]) *
+ __stddev +
+ __mean;
+
+ saved_ln_ = __u1_transformed[__vec_size - 1];
+ saved_u2_ = __u2[__vec_size - 1];
+
+ // Flag is still true as code-branch for 4/8/16 vector sizes
+ }
+ return __res;
+ }
+
+ // Implementation for the N vector's elements generation
template <class _Engine>
result_type
- generate_vec_internal(_Engine& __engine, const param_type __params, unsigned int __N)
+ generate_n_elems(_Engine& __engine, const param_type __params, unsigned int __N)
{
uniform_result_type __u;
scalar_type __u1, __u2, __ln;
+ scalar_type __sin, __cos;
scalar_type __mean = __params.first, __stddev = __params.second;
result_type __res;
@@ -253,11 +319,12 @@ class normal_distribution
{
__u1 = __u[__i];
__u2 = __u[__i + 1];
+
+ __sin = sycl::sincos(pi2<scalar_type>() * __u2, &__cos);
+
__ln = (__u1 == static_cast<scalar_type>(0.0)) ? callback<scalar_type>() : sycl::log(__u1);
- __res[__i] = __mean + __stddev * (sycl::sqrt(-static_cast<scalar_type>(2.0) * __ln) *
- sycl::sin(pi2<scalar_type>() * __u2));
- __res[__i + 1] = __mean + __stddev * (sycl::sqrt(-static_cast<scalar_type>(2.0) * __ln) *
- sycl::cos(pi2<scalar_type>() * __u2));
+ __res[__i] = __mean + __stddev * (sycl::sqrt(-static_cast<scalar_type>(2.0) * __ln) * __sin);
+ __res[__i + 1] = __mean + __stddev * (sycl::sqrt(-static_cast<scalar_type>(2.0) * __ln) * __cos);
}
if (__tail)
{
@@ -286,13 +353,13 @@ class normal_distribution
for (unsigned int __i = 1; __i < (__N - __tail); __i += 2)
{
+ __sin = sycl::sincos(pi2<scalar_type>() * __u2, &__cos);
+
__u1 = __u[__i - 1];
__u2 = __u[__i];
__ln = (__u1 == static_cast<scalar_type>(0.0)) ? callback<scalar_type>() : sycl::log(__u1);
- __res[__i] = __mean + __stddev * (sycl::sqrt(-static_cast<scalar_type>(2.0) * __ln) *
- sycl::sin(pi2<scalar_type>() * __u2));
- __res[__i + 1] = __mean + __stddev * (sycl::sqrt(-static_cast<scalar_type>(2.0) * __ln) *
- sycl::cos(pi2<scalar_type>() * __u2));
+ __res[__i] = __mean + __stddev * (sycl::sqrt(-static_cast<scalar_type>(2.0) * __ln) * __sin);
+ __res[__i + 1] = __mean + __stddev * (sycl::sqrt(-static_cast<scalar_type>(2.0) * __ln) * __cos);
}
if (__tail)
{
@@ -315,7 +382,14 @@ class normal_distribution
typename ::std::enable_if<(_Ndistr != 0), result_type>::type
result_portion_internal(_Engine& __engine, const param_type __params, unsigned int __N)
{
- return generate_vec_internal(__engine, __params, __N);
+ result_type __part_vec;
+ if (__N == 0)
+ return __part_vec;
+ else if (__N >= _Ndistr)
+ return operator()(__engine);
+
+ __part_vec = generate_n_elems(__engine, __params, __N);
+ return __part_vec;
}
};
diff --git a/include/oneapi/dpl/internal/random_impl/subtract_with_carry_engine.h b/include/oneapi/dpl/internal/random_impl/subtract_with_carry_engine.h
index 48831f699881..918e394c6872 100644
--- a/include/oneapi/dpl/internal/random_impl/subtract_with_carry_engine.h
+++ b/include/oneapi/dpl/internal/random_impl/subtract_with_carry_engine.h
@@ -84,18 +84,14 @@ class subtract_with_carry_engine
result_type
operator()()
{
- result_type res = generate_internal<internal::type_traits_t<result_type>::num_elems>();
-
- return res;
+ return generate_internal<internal::type_traits_t<result_type>::num_elems>();
}
// operator () overload for result portion generation
result_type
- operator()(unsigned int __randoms_num)
+ operator()(unsigned int __random_nums)
{
- result_type __res = generate_internal<internal::type_traits_t<result_type>::num_elems>(__randoms_num);
-
- return __res;
+ return result_portion_internal<internal::type_traits_t<result_type>::num_elems>(__random_nums);
}
private:
@@ -142,12 +138,14 @@ class subtract_with_carry_engine
}
else
{
- x_[i_] = max() - x_[__id_2] - c_ + x_[__id_1] + 1u;
+ x_[__id] = max() - x_[__id_2] - c_ + x_[__id_1] + 1u;
c_ = 1;
}
+
i_++;
if (i_ >= long_lag)
i_ = 0;
+
return x_[__id];
};
@@ -171,17 +169,22 @@ class subtract_with_carry_engine
return __res;
}
+ // result_portion implementation
template <int _N>
typename ::std::enable_if<(_N > 0), result_type>::type
- generate_internal(unsigned int __randoms_num)
+ result_portion_internal(unsigned int __random_nums)
{
- result_type __res;
- for (unsigned int __i = 0; __i < __randoms_num; ++__i)
+ result_type __part_vec;
+
+ if (__random_nums >= _N)
+ return operator()();
+
+ for (unsigned int __i = 0; __i < __random_nums; ++__i)
{
- __res[__i] = generate_internal_scalar();
+ __part_vec[__i] = generate_internal_scalar();
}
- return __res;
+ return __part_vec;
}
scalar_type x_[long_lag];
diff --git a/include/oneapi/dpl/internal/random_impl/uniform_int_distribution.h b/include/oneapi/dpl/internal/random_impl/uniform_int_distribution.h
index 3954ce411764..9e4b6530ba03 100644
--- a/include/oneapi/dpl/internal/random_impl/uniform_int_distribution.h
+++ b/include/oneapi/dpl/internal/random_impl/uniform_int_distribution.h
@@ -104,23 +104,16 @@ class uniform_int_distribution
// Generation by portion
template <class _Engine>
result_type
- operator()(_Engine& __engine, unsigned int __randoms_num)
+ operator()(_Engine& __engine, unsigned int __random_nums)
{
- return operator()<_Engine>(__engine, param_type(a_, b_), __randoms_num);
+ return operator()<_Engine>(__engine, param_type(a_, b_), __random_nums);
}
template <class _Engine>
result_type
- operator()(_Engine& __engine, const param_type& __params, unsigned int __randoms_num)
+ operator()(_Engine& __engine, const param_type& __params, unsigned int __random_nums)
{
- result_type __part_vec;
- if (__randoms_num < 1)
- return __part_vec;
-
- unsigned int __portion = (__randoms_num > size_of_type_) ? size_of_type_ : __randoms_num;
-
- __part_vec = result_portion_internal<size_of_type_, _Engine>(__engine, __params, __portion);
- return __part_vec;
+ return result_portion_internal<size_of_type_, _Engine>(__engine, __params, __random_nums);
}
private:
@@ -150,7 +143,11 @@ class uniform_int_distribution
__engine, ::std::pair<double, double>(static_cast<double>(__params.first),
static_cast<double>(__params.second) + 1.0));
- return __res.template convert<scalar_type, sycl::rounding_mode::rte>();
+ result_type __res_ret;
+ for (int __i = 0; __i < _Ndistr; ++__i)
+ __res_ret[__i] = static_cast<scalar_type>(__res[__i]);
+
+ return __res_ret;
}
template <int _Ndistr, class _Engine>
@@ -169,13 +166,22 @@ class uniform_int_distribution
typename ::std::enable_if<(_Ndistr != 0), result_type>::type
result_portion_internal(_Engine& __engine, const param_type& __params, unsigned int __N)
{
+ result_type __part_vec;
+ if (__N == 0)
+ return __part_vec;
+ else if (__N >= _Ndistr)
+ return operator()(__engine);
+
RealType __res =
uniform_real_distribution_(__engine,
::std::pair<double, double>(static_cast<double>(__params.first),
static_cast<double>(__params.second) + 1.0),
__N);
- return __res.template convert<scalar_type, sycl::rounding_mode::rte>();
+ for (unsigned int __i = 0; __i < __N; ++__i)
+ __part_vec[__i] = static_cast<scalar_type>(__res[__i]);
+
+ return __part_vec;
}
};
diff --git a/include/oneapi/dpl/internal/random_impl/uniform_real_distribution.h b/include/oneapi/dpl/internal/random_impl/uniform_real_distribution.h
index 3c4bf1803084..8aa4ea2aa831 100644
--- a/include/oneapi/dpl/internal/random_impl/uniform_real_distribution.h
+++ b/include/oneapi/dpl/internal/random_impl/uniform_real_distribution.h
@@ -98,33 +98,23 @@ class uniform_real_distribution
result_type
operator()(_Engine& __engine, const param_type& __params)
{
- result_type __res =
- generate<size_of_type_, internal::type_traits_t<typename _Engine::result_type>::num_elems, _Engine>(
- __engine, __params);
- return __res;
+ return generate<size_of_type_, internal::type_traits_t<typename _Engine::result_type>::num_elems, _Engine>(
+ __engine, __params);
}
template <class _Engine>
result_type
- operator()(_Engine& __engine, unsigned int __randoms_num)
+ operator()(_Engine& __engine, unsigned int __random_nums)
{
- return operator()<_Engine>(__engine, param_type(a_, b_), __randoms_num);
+ return operator()<_Engine>(__engine, param_type(a_, b_), __random_nums);
}
template <class _Engine>
result_type
- operator()(_Engine& __engine, const param_type& __params, unsigned int __randoms_num)
+ operator()(_Engine& __engine, const param_type& __params, unsigned int __random_nums)
{
- result_type __part_vec;
- if (__randoms_num < 1)
- return __part_vec;
-
- unsigned int __portion = (__randoms_num > size_of_type_) ? size_of_type_ : __randoms_num;
-
- __part_vec =
- result_portion_internal<size_of_type_, internal::type_traits_t<typename _Engine::result_type>::num_elems,
- _Engine>(__engine, __params, __portion);
- return __part_vec;
+ return result_portion_internal<size_of_type_, internal::type_traits_t<typename _Engine::result_type>::num_elems,
+ _Engine>(__engine, __params, __random_nums);
}
private:
@@ -145,7 +135,11 @@ class uniform_real_distribution
generate(_Engine& __engine, const param_type& __params)
{
auto __engine_output = __engine();
- auto __res = __engine_output.template convert<scalar_type, sycl::rounding_mode::rte>();
+ result_type __res;
+
+ for (int __i = 0; __i < _Ndistr; ++__i)
+ __res[__i] = static_cast<scalar_type>(__engine_output[__i]);
+
__res = ((__res - __engine.min()) / (1 + static_cast<scalar_type>(__engine.max() - __engine.min()))) *
(__params.second - __params.first) +
__params.first;
@@ -200,7 +194,7 @@ class uniform_real_distribution
{
sycl::vec<scalar_type, _Ndistr> __res;
int __i;
- int __tail_size = _Ndistr % _Negnine;
+ constexpr int __tail_size = _Ndistr % _Negnine;
for (__i = 0; __i < _Ndistr; __i += _Negnine)
{
auto __engine_output = __engine();
@@ -248,11 +242,11 @@ class uniform_real_distribution
// Implementation for result_portion function
template <int _Ndistr, int _Negnine, class _Engine>
typename ::std::enable_if<((_Ndistr <= _Negnine) & (_Ndistr != 0)), result_type>::type
- result_portion_internal(_Engine& __engine, const param_type& __params, unsigned int __N)
+ generate_n_elems(_Engine& __engine, const param_type& __params, unsigned int __N)
{
auto __engine_output = __engine(__N);
result_type __res;
- for (unsigned int __i = 0; __i < __N; ++__i)
+ for (int __i = 0; __i < __N; ++__i)
{
__res[__i] = static_cast<scalar_type>(__engine_output[__i]);
__res[__i] =
@@ -266,15 +260,15 @@ class uniform_real_distribution
template <int _Ndistr, int _Negnine, class _Engine>
typename ::std::enable_if<((_Ndistr > _Negnine) & (_Negnine != 0)), result_type>::type
- result_portion_internal(_Engine& __engine, const param_type& __params, unsigned int __N)
+ generate_n_elems(_Engine& __engine, const param_type& __params, unsigned int __N)
{
result_type __res;
- unsigned int __i;
+ int __i;
if (_Negnine >= __N)
{
auto __engine_output = __engine(__N);
- for (unsigned int __i = 0; __i < __N; ++__i)
+ for (__i = 0; __i < __N; ++__i)
{
__res[__i] = static_cast<scalar_type>(__engine_output[__i]);
__res[__i] =
@@ -321,10 +315,10 @@ class uniform_real_distribution
template <int _Ndistr, int _Negnine, class _Engine>
typename ::std::enable_if<((_Ndistr > _Negnine) & (_Negnine == 0)), result_type>::type
- result_portion_internal(_Engine& __engine, const param_type& __params, unsigned int __N)
+ generate_n_elems(_Engine& __engine, const param_type& __params, unsigned int __N)
{
result_type __res;
- for (unsigned int __i = 0; __i < __N; ++__i)
+ for (int __i = 0; __i < __N; ++__i)
{
__res[__i] = static_cast<scalar_type>(__engine());
__res[__i] =
@@ -335,6 +329,21 @@ class uniform_real_distribution
return __res;
}
+
+ // Implementation for result_portion function
+ template <int _Ndistr, int _Negnine, class _Engine>
+ typename ::std::enable_if<(_Ndistr != 0), result_type>::type
+ result_portion_internal(_Engine& __engine, const param_type __params, unsigned int __N)
+ {
+ result_type __part_vec;
+ if (__N == 0)
+ return __part_vec;
+ else if (__N >= _Ndistr)
+ return operator()(__engine);
+
+ __part_vec = generate_n_elems<_Ndistr, _Negnine, _Engine>(__engine, __params, __N);
+ return __part_vec;
+ }
};
} // namespace dpl
diff --git a/test/rng_testsuite/statistics_tests/normal_distribution_test.pass.cpp b/test/rng_testsuite/statistics_tests/normal_distribution_test.pass.cpp
index cf28e6f869f8..4725de219ae2 100644
--- a/test/rng_testsuite/statistics_tests/normal_distribution_test.pass.cpp
+++ b/test/rng_testsuite/statistics_tests/normal_distribution_test.pass.cpp
@@ -215,6 +215,72 @@ int test_portion(oneapi::dpl::internal::element_type_t<RealType> mean, oneapi::d
}
template<class RealType, class UIntType>
+int test_flag(oneapi::dpl::internal::element_type_t<RealType> mean, oneapi::dpl::internal::element_type_t<RealType> stddev, int nsamples) {
+
+ sycl::queue queue(sycl::default_selector{});
+
+ // memory allocation
+ std::vector<oneapi::dpl::internal::element_type_t<RealType>> std_samples(nsamples);
+ std::vector<oneapi::dpl::internal::element_type_t<RealType>> dpstd_samples(nsamples);
+
+ constexpr int num_elems = oneapi::dpl::internal::type_traits_t<RealType>::num_elems == 0 ? 1 : oneapi::dpl::internal::type_traits_t<RealType>::num_elems;
+ constexpr int num_to_skip = (num_elems + 1) % 2 ? (num_elems + 2) : (num_elems + 1);
+
+ // dpstd generation
+ {
+ sycl::buffer<oneapi::dpl::internal::element_type_t<RealType>, 1> dpstd_buffer(dpstd_samples.data(), nsamples);
+
+ queue.submit([&](sycl::handler &cgh) {
+ auto dpstd_acc = dpstd_buffer.template get_access<sycl::access::mode::write>(cgh);
+
+ cgh.parallel_for<>(sycl::range<1>(nsamples / (num_elems + 1)),
+ [=](sycl::item<1> idx) {
+
+ unsigned long long offset = idx.get_linear_id() * num_to_skip;
+ oneapi::dpl::linear_congruential_engine<UIntType, a, c, m> engine(seed, offset);
+ oneapi::dpl::normal_distribution<RealType> distr(mean, stddev);
+
+ // Generate the first element
+ auto res_1 = distr(engine, 1);
+
+ // Generate the rest elements
+ auto res = distr(engine);
+
+ dpstd_acc[idx * (num_elems + 1)] = res_1[0];
+ for(int i = 0; i < num_elems; ++i)
+ dpstd_acc[idx * (num_elems + 1) + i + 1] = res[i];
+ });
+ });
+ queue.wait();
+ }
+
+ // std generation
+ generate_std<oneapi::dpl::internal::element_type_t<UIntType>, oneapi::dpl::internal::element_type_t<RealType>>
+ (num_elems + 1, nsamples, mean, stddev, std_samples);
+
+ // comparison
+ int err = 0;
+ for(int i = 0; i < nsamples; ++i) {
+ if (abs(std_samples[i] - dpstd_samples[i]) > eps) {
+ std::cout << "\nError: std_sample[" << i << "] = " << std_samples[i] << ", dpstd_samples[" << i << "] = " << dpstd_samples[i];
+ err++;
+ }
+ }
+
+ // statistics check
+ err += statistics_check(nsamples, mean, stddev, dpstd_samples);
+
+ if(err) {
+ std::cout << "\tFailed" << std::endl;
+ }
+ else {
+ std::cout << "\tPassed" << std::endl;
+ }
+
+ return err;
+}
+
+template<class RealType, class UIntType>
int tests_set(int nsamples) {
constexpr int nparams = 2;
@@ -254,6 +320,20 @@ int tests_set_portion(std::int32_t nsamples, unsigned int part) {
return 0;
}
+template<class RealType, class UIntType>
+int tests_set_flag(int nsamples) {
+
+ int err;
+ // Test for all non-zero parameters
+ std::cout << "normal_distribution test<type>, mean = " << 0.0 << ", stddev = " << 1.0 <<
+ ", nsamples = " << nsamples << ", flag = true, vec_size = " << oneapi::dpl::internal::type_traits_t<RealType>::num_elems;
+ err = test_flag<RealType, UIntType>(0.0, 1.0, nsamples);
+ if (err)
+ return 1;
+
+ return 0;
+}
+
#endif // _ONEDPL_BACKEND_SYCL
int main() {
@@ -353,6 +433,22 @@ int main() {
return 1;
}
+
+ // testing flag = true case
+ std::cout << "----------------------------------------------------------------------" << std::endl;
+ std::cout << "Float vector sizes = [1, 2, 3, 4, 8, 16], flag = true" << std::endl;
+ std::cout << "----------------------------------------------------------------------" << std::endl;
+ err = tests_set_flag<sycl::vec<float, 1>, sycl::vec<std::uint32_t, 1>>(160);
+ err = tests_set_flag<sycl::vec<float, 2>, sycl::vec<std::uint32_t, 2>>(99);
+ err = tests_set_flag<sycl::vec<float, 3>, sycl::vec<std::uint32_t, 3>>(160);
+ err = tests_set_flag<sycl::vec<float, 4>, sycl::vec<std::uint32_t, 4>>(100);
+ err = tests_set_flag<sycl::vec<float, 8>, sycl::vec<std::uint32_t, 8>>(99);
+ err = tests_set_flag<sycl::vec<float, 16>, sycl::vec<std::uint32_t, 16>>(170);
+ if(err) {
+ std::cout << "Test FAILED" << std::endl;
+ return 1;
+ }
+
#else
std::cout << "\tTest is skipped for non-SYCL backend" << std::endl;
#endif // _ONEDPL_BACKEND_SYCL