Commit 0acf2aab authored by Jens Petit's avatar Jens Petit
Browse files

Parallelized for loop of ET

parent a354449a
Pipeline #185586 failed with stages
in 2 minutes and 24 seconds
......@@ -357,6 +357,64 @@ namespace elsa
return const_reverse_iterator(cbegin());
}
template <typename data_t>
data_t DataContainer<data_t>::test() const
{
data_t result;
for (index_t i = 0; i < getSize(); ++i) {
result += this->operator[](i);
}
return result;
}
template <typename data_t>
data_t DataContainer<data_t>::test_omp() const
{
data_t result;
auto data = _dataHandler.get();
index_t I = getSize();
#pragma omp parallel private(data)
{
#pragma omp for
for (index_t i = 0; i < I; ++i) {
result += data->operator[](i);
}
}
return result;
}
template <typename data_t>
data_t DataContainer<data_t>::test_s() const
{
index_t result;
for (index_t i = 0; i < getSize(); ++i) {
result += i;
}
return result;
}
template <typename data_t>
data_t DataContainer<data_t>::test_s_omp() const
{
index_t result;
#pragma omp parallel
{
#pragma omp for simd
for (index_t i = 0; i < getSize(); ++i) {
result += i;
}
}
return result;
}
// ------------------------------------------
// explicit template instantiation
template class DataContainer<float>;
......
......@@ -110,13 +110,37 @@ namespace elsa
DataContainer<data_t>& operator=(Source const& source)
{
detach();
for (index_t i = 0; i < getSize(); ++i) {
this->operator[](i) = source[i];
index_t I = getSize();
auto data = _dataHandler.get();
auto src = source;
#pragma omp parallel
{
#pragma omp for simd private(data, src) schedule(dynamic)
for (index_t i = 0; i < I; ++i)
{
data->operator[](i) = src[i];
}
}
return *this;
}
/// return the dot product of this signal with the one from container other
data_t test() const;
/// return the dot product of this signal with the one from container other
data_t test_omp() const;
/// return the dot product of this signal with the one from container other
data_t test_s() const;
/// return the dot product of this signal with the one from container other
data_t test_s_omp() const;
/// return the current DataDescriptor
const DataDescriptor& getDataDescriptor() const;
......
......@@ -12,43 +12,57 @@
#include "DataContainer.h"
#include <string>
#include <cstdlib>
#include <ctime>
using namespace elsa;
static const index_t dimension = 16;
static const index_t dimension = 256;
TEST_CASE("Expression benchmark using Eigen with n=" + std::to_string(dimension) + "^3")
{
index_t size = dimension * dimension * dimension;
Eigen::VectorXf randVec = Eigen::VectorXf::Random(size);
Eigen::VectorXf randVec2 = Eigen::VectorXf::Random(size);
Eigen::VectorXf randVec3 = Eigen::VectorXf::Random(size);
BENCHMARK("exp = dc - dc2;") { return (randVec - randVec2).eval(); };
Eigen::Matrix<float, Eigen::Dynamic, 1> randVec(size);
Eigen::Matrix<float, Eigen::Dynamic, 1> randVec2(size);
Eigen::Matrix<float, Eigen::Dynamic, 1> randVec3(size);
BENCHMARK("exp = dc - dc2 + dc;") { return (randVec - randVec2 + randVec).eval(); };
Eigen::Matrix<float, Eigen::Dynamic, 1> result(size);
for (index_t i = 0; i < size; ++i) {
randVec[i] = static_cast<float>(rand()) / (static_cast<float>(RAND_MAX / 100.0));
randVec2[i] = static_cast<float>(rand()) / (static_cast<float>(RAND_MAX / 100.0));
randVec3[i] = static_cast<float>(rand()) / (static_cast<float>(RAND_MAX / 100.0));
}
BENCHMARK("exp = dc - dc2;") {
result = randVec - randVec2;
};
BENCHMARK("exp = dc - dc2 + dc;") {
result = randVec - randVec2 + randVec;
};
BENCHMARK("exp = dc * dc2 - dc2 / dc3;")
{
return (randVec * randVec2 - randVec2 / 2.38).eval();
result = (randVec.array() * randVec2.array()).matrix()
- (randVec2.array() / randVec3.array()).matrix();
};
BENCHMARK("exp = dc * dc2 - dc2 / dc3 + dc;")
{
return (randVec * randVec2 - randVec2 / 2.38 + randVec).eval();
result = (randVec.array() * randVec2.array()).matrix()
- (randVec2.array() / randVec3.array()).matrix() + randVec;
};
BENCHMARK("exp = dc * dc2 - dc2 / dc3 + dc * dc3;")
{
return (randVec * randVec2 - randVec2 / 2.38 + randVec * randVec3).eval();
result = (randVec.array() * randVec2.array()).matrix()
- (randVec2.array() / randVec3.array()).matrix()
+ (randVec.array() * randVec3.array()).matrix();
};
}
TEST_CASE("Expression benchmark using expression templates with n=" + std::to_string(dimension)
+ "^3")
{
srand(static_cast<unsigned>(time(nullptr)));
IndexVector_t numCoeff(3);
numCoeff << dimension, dimension, dimension;
DataDescriptor desc(numCoeff);
......@@ -66,39 +80,32 @@ TEST_CASE("Expression benchmark using expression templates with n=" + std::to_st
BENCHMARK("exp = dc - dc2;")
{
result = dc - dc2;
return result;
};
BENCHMARK("exp = dc - dc2 + dc;")
{
result = dc - dc2 + dc;
return result;
};
BENCHMARK("exp = dc * dc2 - dc2 / dc3;")
{
result = dc * dc2 - dc2 / dc3;
return result;
};
BENCHMARK("exp = dc * dc2 - dc2 / dc3 + dc;")
{
result = dc * dc2 - dc2 / dc3 + dc;
return result;
};
BENCHMARK("exp = dc * dc2 - dc2 / dc3 + dc * dc3;")
{
result = dc * dc2 - dc2 / dc3 + dc * dc3;
return result;
};
}
TEST_CASE("Expression benchmark without expression templates with n=" + std::to_string(dimension)
+ "^3")
{
srand(static_cast<unsigned>(time(nullptr)));
IndexVector_t numCoeff(3);
numCoeff << dimension, dimension, dimension;
DataDescriptor desc(numCoeff);
......@@ -118,26 +125,63 @@ TEST_CASE("Expression benchmark without expression templates with n=" + std::to_
BENCHMARK("exp = dc - dc2;")
{
return dc - dc2;
result = dc - dc2;
};
BENCHMARK("exp = dc - dc2 + dc;")
{
return dc - dc2 + dc;
result = dc - dc2 + dc;
};
BENCHMARK("exp = dc * dc2 - dc2 / dc3;")
{
return dc * dc2 - dc2 / dc3;
result = dc * dc2 - dc2 / dc3;
};
BENCHMARK("exp = dc * dc2 - dc2 / dc3 + dc;")
{
return dc * dc2 - dc2 / dc3 + dc;
result = dc * dc2 - dc2 / dc3 + dc;
};
BENCHMARK("exp = dc * dc2 - dc2 / dc3 + dc * dc3;")
{
return dc * dc2 - dc2 / dc3 + dc * dc3;
result = dc * dc2 - dc2 / dc3 + dc * dc3;
};
}
TEST_CASE("Expression benchmark openmp n=" + std::to_string(dimension)
+ "^3")
{
IndexVector_t numCoeff(3);
numCoeff << dimension, dimension, dimension;
DataDescriptor desc(numCoeff);
DataContainer dc(desc);
for (index_t i = 0; i < dc.getSize(); ++i) {
dc[i] = static_cast<float>(rand()) / (static_cast<float>(RAND_MAX / 100.0));
}
BENCHMARK("for loop")
{
auto result = dc.test();
return result;
};
BENCHMARK("for loop with omp")
{
auto result = dc.test_omp();
return result;
};
BENCHMARK("s for loop")
{
auto result = dc.test_s();
return result;
};
BENCHMARK("s for loop with omp")
{
auto result = dc.test_s_omp();
return result;
};
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment