diff options
Diffstat (limited to 'libs/numeric/odeint/performance/SIMD')
-rw-r--r-- | libs/numeric/odeint/performance/SIMD/Makefile | 33 | ||||
-rwxr-xr-x | libs/numeric/odeint/performance/SIMD/perf_roessler.sh | 22 | ||||
-rw-r--r-- | libs/numeric/odeint/performance/SIMD/roessler.cpp | 125 | ||||
-rw-r--r-- | libs/numeric/odeint/performance/SIMD/roessler_simd.cpp | 149 |
4 files changed, 329 insertions, 0 deletions
diff --git a/libs/numeric/odeint/performance/SIMD/Makefile b/libs/numeric/odeint/performance/SIMD/Makefile new file mode 100644 index 000000000..811acd988 --- /dev/null +++ b/libs/numeric/odeint/performance/SIMD/Makefile @@ -0,0 +1,33 @@ +# Copyright 2014 Mario Mulansky +# +# Distributed under the Boost Software License, Version 1.0. +# (See accompanying file LICENSE_1_0.txt or +# copy at http://www.boost.org/LICENSE_1_0.txt) + +# make sure BOOST_ROOT is pointing to your boost directory +# otherwise, set it here: +# BOOST_ROOT = /path/to/boost +# you also need NT2s SIMD libary available set the include path here: +# SIMD_INCLUDE = /path/to/simd/include + +INCLUDES = -I$(BOOST_ROOT) -I${SIMD_INCLUDE} + +# INTEL COMPILER +# change this if you want to cross-compile +ARCH = Host +# ARCH = AVX +# ARCH = SSE4.2 + +CXX = icpc +CC = icpc +CXXFLAGS = -O3 -x${ARCH} -std=c++0x -fno-alias -inline-forceinline -DNDEBUG ${INCLUDES} +# -ip + +# GCC COMPILER +# change this if you want to cross-compile +# ARCH = native +# # ARCH = core-avx-i + +# CXX = g++ +# CC = g++ +# CXXFLAGS = -O3 -ffast-math -mtune=${ARCH} -march=${ARCH} -std=c++0x -DNDEBUG ${INCLUDES} diff --git a/libs/numeric/odeint/performance/SIMD/perf_roessler.sh b/libs/numeric/odeint/performance/SIMD/perf_roessler.sh new file mode 100755 index 000000000..a1094f63a --- /dev/null +++ b/libs/numeric/odeint/performance/SIMD/perf_roessler.sh @@ -0,0 +1,22 @@ +#!/bin/bash +echo "Running on ${HOSTNAME}" + +out_dir=perf_${HOSTNAME} +mkdir -p ${out_dir} + +for N in 256 1024 4096 16384 65536 262144 1048576 4194304 16777216 67108864 +do + steps=`expr 4 \* 67108864 / ${N}` + for exe in "roessler" "roessler_simd" + do + rm -f ${out_dir}/${exe}_N${N}.times + for i in {0..4} + do + likwid-pin -cS0:0 ./${exe} ${N} ${steps} >> ${out_dir}/${exe}_N${N}.times + done + for perf_ctr in "FLOPS_DP" "FLOPS_AVX" "L2" "L3" "MEM" + do + likwid-perfctr -CS0:0 -g ${perf_ctr} ./${exe} ${N} ${steps} > ${out_dir}/${exe}_N${N}_${perf_ctr}.perf + done + done +done diff --git a/libs/numeric/odeint/performance/SIMD/roessler.cpp b/libs/numeric/odeint/performance/SIMD/roessler.cpp new file mode 100644 index 000000000..4e6cc4229 --- /dev/null +++ b/libs/numeric/odeint/performance/SIMD/roessler.cpp @@ -0,0 +1,125 @@ +/* + * Simulation of an ensemble of Roessler attractors + * + * Copyright 2014 Mario Mulansky + * + * Distributed under the Boost Software License, Version 1.0. + * (See accompanying file LICENSE_1_0.txt or + * copy at http://www.boost.org/LICENSE_1_0.txt) + * + */ + + +#include <iostream> +#include <vector> +#include <random> + +#include <boost/timer.hpp> +#include <boost/array.hpp> + +#include <boost/numeric/odeint.hpp> + +namespace odeint = boost::numeric::odeint; + +typedef boost::timer timer_type; + +typedef double fp_type; +//typedef float fp_type; + +typedef boost::array<fp_type, 3> state_type; +typedef std::vector<state_type> state_vec; + +//--------------------------------------------------------------------------- +struct roessler_system { + const fp_type m_a, m_b, m_c; + + roessler_system(const fp_type a, const fp_type b, const fp_type c) + : m_a(a), m_b(b), m_c(c) + {} + + void operator()(const state_type &x, state_type &dxdt, const fp_type t) const + { + dxdt[0] = -x[1] - x[2]; + dxdt[1] = x[0] + m_a * x[1]; + dxdt[2] = m_b + x[2] * (x[0] - m_c); + } +}; + +//--------------------------------------------------------------------------- +int main(int argc, char *argv[]) { +if(argc<3) +{ + std::cerr << "Expected size and steps as parameter" << std::endl; + exit(1); +} +const size_t n = atoi(argv[1]); +const size_t steps = atoi(argv[2]); +//const size_t steps = 50; + +const fp_type dt = 0.01; + +const fp_type a = 0.2; +const fp_type b = 1.0; +const fp_type c = 9.0; + +// random initial conditions on the device +std::vector<fp_type> x(n), y(n), z(n); +std::default_random_engine generator; +std::uniform_real_distribution<fp_type> distribution_xy(-8.0, 8.0); +std::uniform_real_distribution<fp_type> distribution_z(0.0, 20.0); +auto rand_xy = std::bind(distribution_xy, std::ref(generator)); +auto rand_z = std::bind(distribution_z, std::ref(generator)); +std::generate(x.begin(), x.end(), rand_xy); +std::generate(y.begin(), y.end(), rand_xy); +std::generate(z.begin(), z.end(), rand_z); + +state_vec state(n); +for(size_t i=0; i<n; ++i) +{ + state[i][0] = x[i]; + state[i][1] = y[i]; + state[i][2] = z[i]; +} + +std::cout.precision(16); + +std::cout << "# n: " << n << std::endl; + +std::cout << x[0] << std::endl; + + +// Stepper type - use never_resizer for slight performance improvement +odeint::runge_kutta4_classic<state_type, fp_type, state_type, fp_type, + odeint::array_algebra, + odeint::default_operations, + odeint::never_resizer> stepper; + +roessler_system sys(a, b, c); + +timer_type timer; + +fp_type t = 0.0; + +for (int step = 0; step < steps; step++) +{ + for(size_t i=0; i<n; ++i) + { + stepper.do_step(sys, state[i], t, dt); + } + t += dt; +} + +std::cout << "Integration finished, runtime for " << steps << " steps: "; +std::cout << timer.elapsed() << " s" << std::endl; + +// compute some accumulation to make sure all results have been computed +fp_type s = 0.0; +for(size_t i = 0; i < n; ++i) +{ + s += state[i][0]; +} + +std::cout << state[0][0] << std::endl; +std::cout << s/n << std::endl; + +} diff --git a/libs/numeric/odeint/performance/SIMD/roessler_simd.cpp b/libs/numeric/odeint/performance/SIMD/roessler_simd.cpp new file mode 100644 index 000000000..d79af4d8b --- /dev/null +++ b/libs/numeric/odeint/performance/SIMD/roessler_simd.cpp @@ -0,0 +1,149 @@ +/* + * Simulation of an ensemble of Roessler attractors using NT2 SIMD library + * This requires the SIMD library headers. + * + * Copyright 2014 Mario Mulansky + * + * Distributed under the Boost Software License, Version 1.0. + * (See accompanying file LICENSE_1_0.txt or + * copy at http://www.boost.org/LICENSE_1_0.txt) + * + */ + + +#include <iostream> +#include <vector> +#include <random> + +#include <boost/timer.hpp> +#include <boost/array.hpp> + +#include <boost/numeric/odeint.hpp> +#include <boost/simd/sdk/simd/pack.hpp> +#include <boost/simd/sdk/simd/io.hpp> +#include <boost/simd/memory/allocator.hpp> +#include <boost/simd/include/functions/splat.hpp> +#include <boost/simd/include/functions/plus.hpp> +#include <boost/simd/include/functions/multiplies.hpp> + + +namespace odeint = boost::numeric::odeint; +namespace simd = boost::simd; + +typedef boost::timer timer_type; + +static const size_t dim = 3; // roessler is 3D + +typedef double fp_type; +//typedef float fp_type; + +typedef simd::pack<fp_type> simd_pack; +typedef boost::array<simd_pack, dim> state_type; +// use the simd allocator to get properly aligned memory +typedef std::vector< state_type, simd::allocator< state_type > > state_vec; + +static const size_t pack_size = simd_pack::static_size; + +//--------------------------------------------------------------------------- +struct roessler_system { + const fp_type m_a, m_b, m_c; + + roessler_system(const fp_type a, const fp_type b, const fp_type c) + : m_a(a), m_b(b), m_c(c) + {} + + void operator()(const state_type &x, state_type &dxdt, const fp_type t) const + { + dxdt[0] = -1.0*x[1] - x[2]; + dxdt[1] = x[0] + m_a * x[1]; + dxdt[2] = m_b + x[2] * (x[0] - m_c); + } +}; + +//--------------------------------------------------------------------------- +int main(int argc, char *argv[]) { +if(argc<3) +{ + std::cerr << "Expected size and steps as parameter" << std::endl; + exit(1); +} +const size_t n = atoi(argv[1]); +const size_t steps = atoi(argv[2]); + +const fp_type dt = 0.01; + +const fp_type a = 0.2; +const fp_type b = 1.0; +const fp_type c = 9.0; + +// random initial conditions on the device +std::vector<fp_type> x(n), y(n), z(n); +std::default_random_engine generator; +std::uniform_real_distribution<fp_type> distribution_xy(-8.0, 8.0); +std::uniform_real_distribution<fp_type> distribution_z(0.0, 20.0); +auto rand_xy = std::bind(distribution_xy, std::ref(generator)); +auto rand_z = std::bind(distribution_z, std::ref(generator)); +std::generate(x.begin(), x.end(), rand_xy); +std::generate(y.begin(), y.end(), rand_xy); +std::generate(z.begin(), z.end(), rand_z); + +state_vec state(n/pack_size); +for(size_t i=0; i<n/pack_size; ++i) +{ + for(size_t p=0; p<pack_size; ++p) + { + state[i][0][p] = x[i*pack_size+p]; + state[i][1][p] = y[i*pack_size+p]; + state[i][2][p] = z[i*pack_size+p]; + } +} + +std::cout << "Systems: " << n << std::endl; +std::cout << "Steps: " << steps << std::endl; +std::cout << "SIMD pack size: " << pack_size << std::endl; + +std::cout << state[0][0] << std::endl; + +// Stepper type +odeint::runge_kutta4_classic<state_type, fp_type, state_type, fp_type, + odeint::array_algebra, odeint::default_operations, + odeint::never_resizer> stepper; + +roessler_system sys(a, b, c); + +timer_type timer; + +fp_type t = 0.0; + +for(int step = 0; step < steps; step++) +{ + for(size_t i = 0; i < n/pack_size; ++i) + { + stepper.do_step(sys, state[i], t, dt); + } + t += dt; +} + +std::cout.precision(16); + +std::cout << "Integration finished, runtime for " << steps << " steps: "; +std::cout << timer.elapsed() << " s" << std::endl; + +// compute some accumulation to make sure all results have been computed +simd_pack s_pack = 0.0; +for(size_t i = 0; i < n/pack_size; ++i) +{ + s_pack += state[i][0]; +} + +fp_type s = 0.0; +for(size_t p=0; p<pack_size; ++p) +{ + s += s_pack[p]; +} + + +std::cout << state[0][0] << std::endl; +std::cout << s/n << std::endl; + +} |