Commit 7f24cd6d authored by Jens Petit's avatar Jens Petit

DataHandlerGPU: Core functionality (#21)

parent 825e8c5b
......@@ -53,7 +53,7 @@ namespace elsa
_dataHandler = other._dataHandler->clone();
}
_dataHandlerType = other._dataHandlerType;
// TODO: Check what to do with handler type if CPU copy assign to GPU type
}
return *this;
......@@ -81,7 +81,7 @@ namespace elsa
_dataHandler = std::move(other._dataHandler);
}
_dataHandlerType = std::move(other._dataHandlerType);
// TODO: Check what to do with handler type if CPU move assign to GPU type
// leave other in a valid state
other._dataDescriptor = nullptr;
......@@ -230,6 +230,12 @@ namespace elsa
return std::make_unique<DataHandlerCPU<data_t>>(std::forward<Args>(args)...);
case DataHandlerType::MAP_CPU:
return std::make_unique<DataHandlerCPU<data_t>>(std::forward<Args>(args)...);
#ifdef ELSA_CUDA_VECTOR
case DataHandlerType::GPU:
return std::make_unique<DataHandlerGPU<data_t>>(std::forward<Args>(args)...);
case DataHandlerType::MAP_GPU:
return std::make_unique<DataHandlerGPU<data_t>>(std::forward<Args>(args)...);
#endif
default:
throw std::invalid_argument("DataContainer: unknown handler type");
}
......@@ -277,8 +283,13 @@ namespace elsa
const auto& ithDesc = blockDesc->getDescriptorOfBlock(i);
index_t blockSize = ithDesc.getNumberOfCoefficients();
DataHandlerType newHandlerType = (_dataHandlerType == DataHandlerType::CPU
|| _dataHandlerType == DataHandlerType::MAP_CPU)
? DataHandlerType::MAP_CPU
: DataHandlerType::MAP_GPU;
return DataContainer<data_t>{ithDesc, _dataHandler->getBlock(startIndex, blockSize),
DataHandlerType::MAP_CPU};
newHandlerType};
}
template <typename data_t>
......@@ -295,10 +306,15 @@ namespace elsa
const auto& ithDesc = blockDesc->getDescriptorOfBlock(i);
index_t blockSize = ithDesc.getNumberOfCoefficients();
DataHandlerType newHandlerType = (_dataHandlerType == DataHandlerType::CPU
|| _dataHandlerType == DataHandlerType::MAP_CPU)
? DataHandlerType::MAP_CPU
: DataHandlerType::MAP_GPU;
// getBlock() returns a pointer to non-const DH, but that's fine as it gets wrapped in a
// constant container
return DataContainer<data_t>{ithDesc, _dataHandler->getBlock(startIndex, blockSize),
DataHandlerType::MAP_CPU};
newHandlerType};
}
template <typename data_t>
......@@ -307,8 +323,13 @@ namespace elsa
if (dataDescriptor.getNumberOfCoefficients() != getSize())
throw std::invalid_argument("DataContainer: view must have same size as container");
DataHandlerType newHandlerType = (_dataHandlerType == DataHandlerType::CPU
|| _dataHandlerType == DataHandlerType::MAP_CPU)
? DataHandlerType::MAP_CPU
: DataHandlerType::MAP_GPU;
return DataContainer<data_t>{dataDescriptor, _dataHandler->getBlock(0, getSize()),
DataHandlerType::MAP_CPU};
newHandlerType};
}
template <typename data_t>
......@@ -318,10 +339,15 @@ namespace elsa
if (dataDescriptor.getNumberOfCoefficients() != getSize())
throw std::invalid_argument("DataContainer: view must have same size as container");
DataHandlerType newHandlerType = (_dataHandlerType == DataHandlerType::CPU
|| _dataHandlerType == DataHandlerType::MAP_CPU)
? DataHandlerType::MAP_CPU
: DataHandlerType::MAP_GPU;
// getBlock() returns a pointer to non-const DH, but that's fine as it gets wrapped in a
// constant container
return DataContainer<data_t>{dataDescriptor, _dataHandler->getBlock(0, getSize()),
DataHandlerType::MAP_CPU};
newHandlerType};
}
template <typename data_t>
......@@ -396,22 +422,6 @@ namespace elsa
return const_reverse_iterator(cbegin());
}
template <typename data_t>
typename DataContainer<data_t>::HandlerTypes_t DataContainer<data_t>::getHandlerPtr() const
{
DataContainer<data_t>::HandlerTypes_t handler;
if (_dataHandlerType == DataHandlerType::CPU) {
handler = static_cast<DataHandlerCPU<data_t>*>(_dataHandler.get());
}
if (_dataHandlerType == DataHandlerType::MAP_CPU) {
handler = static_cast<DataHandlerMapCPU<data_t>*>(_dataHandler.get());
}
return handler;
}
template <typename data_t>
DataHandlerType DataContainer<data_t>::getDataHandlerType() const
{
......
This diff is collapsed.
......@@ -4,6 +4,10 @@
#include "Cloneable.h"
#include "ExpressionPredicates.h"
#ifdef ELSA_CUDA_VECTOR
#include "Quickvec.h"
#endif
#include <Eigen/Core>
namespace elsa
......@@ -29,13 +33,6 @@ namespace elsa
template <typename data_t = real_t>
class DataHandler : public Cloneable<DataHandler<data_t>>
{
/// for enabling accessData()
template <class Operand, std::enable_if_t<isDataContainer<Operand>, int>>
friend constexpr auto evaluateOrReturn(Operand const& operand);
/// for enabling accessData()
friend DataContainer<data_t>;
protected:
/// convenience typedef for the Eigen::Matrix data vector
using DataVector_t = Eigen::Matrix<data_t, Eigen::Dynamic, 1>;
......@@ -178,11 +175,5 @@ namespace elsa
/// derived classes should override this method to implement move assignment
virtual void assign(DataHandler<data_t>&& other) = 0;
/// derived classes return underlying data
virtual DataMap_t accessData() = 0;
/// derived classes return underlying data
virtual DataMap_t accessData() const = 0;
};
} // namespace elsa
......@@ -18,7 +18,7 @@ namespace elsa
class DataHandlerCPU;
// forward declaration, used for testing and defined in test file (declared as friend)
template <typename data_t>
long useCount(const DataHandlerCPU<data_t>&);
long useCount(const DataHandlerCPU<data_t>& /*dh*/);
/**
* \brief Class representing and owning a vector stored in CPU main memory (using
......@@ -45,9 +45,16 @@ namespace elsa
/// declare DataHandlerMapCPU as friend, allows the use of Eigen for improved performance
friend DataHandlerMapCPU<data_t>;
/// for enabling accessData()
friend DataContainer<data_t>;
/// used for testing only and defined in test file
friend long useCount<>(const DataHandlerCPU<data_t>& dh);
/// friend constexpr function to implement expression templates
template <bool GPU, class Operand, std::enable_if_t<isDataContainer<Operand>, int>>
friend constexpr auto evaluateOrReturn(Operand const& operand);
protected:
/// convenience typedef for the Eigen::Matrix data vector
using DataVector_t = Eigen::Matrix<data_t, Eigen::Dynamic, 1>;
......@@ -176,10 +183,10 @@ namespace elsa
void assign(DataHandler<data_t>&& other) override;
/// return non-const version of data
DataMap_t accessData() override;
DataMap_t accessData();
/// return const version of data
DataMap_t accessData() const override;
DataMap_t accessData() const;
private:
/// creates the deep copy for the copy-on-write mechanism
......
This diff is collapsed.
#pragma once
#include "elsaDefines.h"
#include "DataHandler.h"
#include "Quickvec.h"
#include <list>
namespace elsa
{
// forward declaration, allows mutual friending
template <typename data_t>
class DataHandlerMapGPU;
// forward declaration for friend test function
template <typename data_t = real_t>
class DataHandlerGPU;
// forward declaration, used for testing and defined in test file (declared as friend)
template <typename data_t>
long useCount(const DataHandlerGPU<data_t>&);
/**
* \brief Class representing and owning a vector stored in CPU main memory (using
* Eigen::Matrix).
*
* \tparam data_t - data type that is stored, defaulting to real_t.
*
* \author David Frank - main code
* \author Tobias Lasser - modularization and modernization
* \author Nikola Dinev - integration of map and copy-on-write concepts
*
* The class implements copy-on-write. Therefore any non-const functions should call the
* detach() function first to trigger the copy-on-write mechanism.
*
* DataHandlerGPU and DataHandlerMapCPU are mutual friend classes allowing for the vectorization
* of arithmetic operations with the help of Eigen. A strong bidirectional link exists
* between the two classes. A Map is associated with the DataHandlerGPU from which it was
* created for the entirety of its lifetime. If the DataHandlerGPU starts managing a new vector
* (e.g. through a call to detach()), all associated Maps will also be updated.
*/
template <typename data_t>
class DataHandlerGPU : public DataHandler<data_t>
{
/// for enabling accessData()
friend DataContainer<data_t>;
/// declare DataHandlerMapGPU as friend, allows the use of Eigen for improved performance
friend DataHandlerMapGPU<data_t>;
/// used for testing only and defined in test file
friend long useCount<>(const DataHandlerGPU<data_t>& dh);
/// friend constexpr function to implement expression templates
template <bool GPU, class Operand, std::enable_if_t<isDataContainer<Operand>, int>>
friend constexpr auto evaluateOrReturn(Operand const& operand);
protected:
/// convenience typedef for the Eigen::Matrix data vector
using DataVector_t = Eigen::Matrix<data_t, Eigen::Dynamic, 1>;
/// convenience typedef for the Eigen::Map
using DataMap_t = Eigen::Map<DataVector_t>;
public:
/// delete default constructor (having no information makes no sense)
DataHandlerGPU() = delete;
/// default destructor
~DataHandlerGPU() override;
/**
* \brief Constructor initializing an appropriately sized vector with zeros
*
* \param[in] size of the vector
* \param[in] initialize - set to false if you do not need initialization with zeros
* (default: true)
*
* \throw std::invalid_argument if the size is non-positive
*/
explicit DataHandlerGPU(index_t size);
/**
* \brief Constructor initializing a data vector with a given vector
*
* \param[in] vector that is used for initializing the data
*/
explicit DataHandlerGPU(DataVector_t const& vector);
/**
* \brief Constructor initializing a data vector with a given vector
*
* \param[in] vector that is used for initializing the data
*/
explicit DataHandlerGPU(quickvec::Vector<data_t> const& vector);
/// copy constructor
DataHandlerGPU(const DataHandlerGPU<data_t>& other);
/// move constructor
DataHandlerGPU(DataHandlerGPU<data_t>&& other) noexcept;
/// return the size of the vector
index_t getSize() const override;
/// return the index-th element of the data vector (not bounds checked!)
data_t& operator[](index_t index) override;
/// return the index-th element of the data vector as read-only (not bound checked!)
const data_t& operator[](index_t index) const override;
/// return the dot product of the data vector with vector v
data_t dot(const DataHandler<data_t>& v) const override;
/// return the squared l2 norm of the data vector (dot product with itself)
GetFloatingPointType_t<data_t> squaredL2Norm() const override;
/// return the l1 norm of the data vector (sum of absolute values)
GetFloatingPointType_t<data_t> l1Norm() const override;
/// return the linf norm of the data vector (maximum of absolute values)
GetFloatingPointType_t<data_t> lInfNorm() const override;
/// return the sum of all elements of the data vector
data_t sum() const override;
/// copy assign another DataHandlerGPU
DataHandlerGPU<data_t>& operator=(const DataHandlerGPU<data_t>& v);
/// move assign another DataHandlerGPU
DataHandlerGPU<data_t>& operator=(DataHandlerGPU<data_t>&& v);
/// lift copy and move assignment operators from base class
using DataHandler<data_t>::operator=;
/// compute in-place element-wise addition of another vector v
DataHandler<data_t>& operator+=(const DataHandler<data_t>& v) override;
/// compute in-place element-wise subtraction of another vector v
DataHandler<data_t>& operator-=(const DataHandler<data_t>& v) override;
/// compute in-place element-wise multiplication by another vector v
DataHandler<data_t>& operator*=(const DataHandler<data_t>& v) override;
/// compute in-place element-wise division by another vector v
DataHandler<data_t>& operator/=(const DataHandler<data_t>& v) override;
/// compute in-place addition of a scalar
DataHandler<data_t>& operator+=(data_t scalar) override;
/// compute in-place subtraction of a scalar
DataHandler<data_t>& operator-=(data_t scalar) override;
/// compute in-place multiplication by a scalar
DataHandler<data_t>& operator*=(data_t scalar) override;
/// compute in-place division by a scalar
DataHandler<data_t>& operator/=(data_t scalar) override;
/// assign a scalar to all elements of the data vector
DataHandler<data_t>& operator=(data_t scalar) override;
/// return a reference to the sequential block starting at startIndex and containing
/// numberOfElements elements
std::unique_ptr<DataHandler<data_t>> getBlock(index_t startIndex,
index_t numberOfElements) override;
/// return a const reference to the sequential block starting at startIndex and containing
/// numberOfElements elements
std::unique_ptr<const DataHandler<data_t>>
getBlock(index_t startIndex, index_t numberOfElements) const override;
protected:
/// the vector storing the data
std::shared_ptr<quickvec::Vector<data_t>> _data;
/// list of DataHandlerMaps referring to blocks of this
std::list<DataHandlerMapGPU<data_t>*> _associatedMaps;
/// implement the polymorphic clone operation
DataHandlerGPU<data_t>* cloneImpl() const override;
/// implement the polymorphic comparison operation
bool isEqual(const DataHandler<data_t>& other) const override;
/// copy the data stored in other
void assign(const DataHandler<data_t>& other) override;
/// move the data stored in other if other is of the same type, otherwise copy the data
void assign(DataHandler<data_t>&& other) override;
/// return non-const version of data
quickvec::Vector<data_t> accessData();
/// return const version of data
quickvec::Vector<data_t> accessData() const;
private:
/// creates the deep copy for the copy-on-write mechanism
void detach();
/// same as detach() but leaving an uninitialized block of numberOfElements elements
/// starting at index startIndex
void detachWithUninitializedBlock(index_t startIndex, index_t numberOfElements);
/// change the vector being handled
void attach(const std::shared_ptr<quickvec::Vector<data_t>>& data);
/// change the vector being handled (rvalue version)
void attach(std::shared_ptr<quickvec::Vector<data_t>>&& data);
};
} // namespace elsa
......@@ -45,6 +45,13 @@ namespace elsa
/// declare DataHandlerCPU as friend, allows the use of Eigen for improved performance
friend class DataHandlerCPU<data_t>;
/// friend constexpr function to implement expression templates
template <bool GPU, class Operand, std::enable_if_t<isDataContainer<Operand>, int>>
friend constexpr auto evaluateOrReturn(Operand const& operand);
/// for enabling accessData()
friend DataContainer<data_t>;
protected:
/// convenience typedef for the Eigen::Matrix data vector
using DataVector_t = Eigen::Matrix<data_t, Eigen::Dynamic, 1>;
......@@ -152,10 +159,10 @@ namespace elsa
void assign(DataHandler<data_t>&& other) override;
/// return non-const version of the data
DataMap_t accessData() override;
DataMap_t accessData();
/// return const version of the data
DataMap_t accessData() const override;
DataMap_t accessData() const;
private:
/**
......
This diff is collapsed.
#pragma once
#include "elsaDefines.h"
#include "DataHandler.h"
#include "Quickvec.h"
#include <Eigen/Core>
#include <list>
namespace elsa
{
/// forward declaration, allows mutual friending
template <typename data_t>
class DataHandlerGPU;
/**
* \brief Class referencing a vector stored in GPU main memory, or a part thereof
*
* \tparam data_t data type of vector
*
* \author David Frank - main code
* \author Tobias Lasser - modularization, fixes
* \author Nikola Dinev - integration with the copy-on-write mechanism
* \author Jens Petit - adaption of CPU version for GPU
*
* This class does not own or manage its own memory. It is bound to a DataHandlerGPU (the data
* owner) at its creation, and serves as a reference to a sequential block of memory owned by
* the DataHandlerGPU. As such, changes to the Map will affect the DataHandlerGPU and vice
* versa.
*
* Maps do not support move assignment, and remain bound to the original data owner until
* destructed.
*
* Maps provide only limited support for copy-on-write. Unless the Map is referencing the
* entirety of the vector managed by the data owner, assigning to the Map or cloning will always
* trigger a deep copy.
*
* Cloning a Map produces a new DataHandlerGPU, managing a new chunk of memory. The contents of
* the memory are equivalent to the contents of the block referenced by the Map, but the two are
* not associated.
*/
template <typename data_t = real_t>
class DataHandlerMapGPU : public DataHandler<data_t>
{
/// declare DataHandlerGPU as friend, allows the use of Eigen for improved performance
friend class DataHandlerGPU<data_t>;
/// friend constexpr function to implement expression templates
template <bool GPU, class Operand, std::enable_if_t<isDataContainer<Operand>, int>>
friend constexpr auto evaluateOrReturn(Operand const& operand);
/// for enabling accessData()
friend DataContainer<data_t>;
public:
/// copy constructor
DataHandlerMapGPU(const DataHandlerMapGPU<data_t>& other);
/// default move constructor
DataHandlerMapGPU(DataHandlerMapGPU<data_t>&& other) = default;
/// default destructor
~DataHandlerMapGPU() override;
/// return the size of the vector
index_t getSize() const override;
/// return the index-th element of the data vector (not bounds checked!)
data_t& operator[](index_t index) override;
/// return the index-th element of the data vector as read-only (not bound checked!)
const data_t& operator[](index_t index) const override;
/// return the dot product of the data vector with vector v
data_t dot(const DataHandler<data_t>& v) const override;
/// return the squared l2 norm of the data vector (dot product with itself)
GetFloatingPointType_t<data_t> squaredL2Norm() const override;
/// return the l1 norm of the data vector (sum of absolute values)
GetFloatingPointType_t<data_t> l1Norm() const override;
/// return the linf norm of the data vector (maximum of absolute values)
GetFloatingPointType_t<data_t> lInfNorm() const override;
/// return the sum of all elements of the data vector
data_t sum() const override;
/// compute in-place element-wise addition of another vector v
DataHandler<data_t>& operator+=(const DataHandler<data_t>& v) override;
/// compute in-place element-wise subtraction of another vector v
DataHandler<data_t>& operator-=(const DataHandler<data_t>& v) override;
/// compute in-place element-wise multiplication by another vector v
DataHandler<data_t>& operator*=(const DataHandler<data_t>& v) override;
/// compute in-place element-wise division by another vector v
DataHandler<data_t>& operator/=(const DataHandler<data_t>& v) override;
/// copy assign another DataHandlerMapGPU to this, other types handled in assign()
DataHandlerMapGPU<data_t>& operator=(const DataHandlerMapGPU<data_t>& v);
DataHandlerMapGPU<data_t>& operator=(DataHandlerMapGPU<data_t>&&) = default;
/// lift copy and move assignment operators from base class
using DataHandler<data_t>::operator=;
/// compute in-place addition of a scalar
DataHandler<data_t>& operator+=(data_t scalar) override;
/// compute in-place subtraction of a scalar
DataHandler<data_t>& operator-=(data_t scalar) override;
/// compute in-place multiplication by a scalar
DataHandler<data_t>& operator*=(data_t scalar) override;
/// compute in-place division by a scalar
DataHandler<data_t>& operator/=(data_t scalar) override;
/// assign a scalar to all elements of the data vector
DataHandler<data_t>& operator=(data_t scalar) override;
/// return a reference to the sequential block starting at startIndex and containing
/// numberOfElements elements
std::unique_ptr<DataHandler<data_t>> getBlock(index_t startIndex,
index_t numberOfElements) override;
/// return a const reference to the sequential block starting at startIndex and containing
/// numberOfElements elements
std::unique_ptr<const DataHandler<data_t>>
getBlock(index_t startIndex, index_t numberOfElements) const override;
protected:
/// vector mapping of the data
quickvec::Vector<data_t> _map;
/// pointer to the data-owning handler
DataHandlerGPU<data_t>* _dataOwner;
/// handle to this in the list of Maps associated with the data-owning handler
typename std::list<DataHandlerMapGPU<data_t>*>::iterator _handle;
/// implement the polymorphic clone operation
DataHandlerGPU<data_t>* cloneImpl() const override;
/// implement the polymorphic comparison operation
bool isEqual(const DataHandler<data_t>& other) const override;
void assign(const DataHandler<data_t>& other) override;
void assign(DataHandler<data_t>&& other) override;
/// return non-const version of the data
quickvec::Vector<data_t> accessData();
/// return const version of the data
quickvec::Vector<data_t> accessData() const;
private:
/**
* \brief Construct a DataHandlerMapGPU referencing a sequential block of data owned by
* DataHandlerGPU
*
* \param[in] dataOwner pointer to the DataHandlerGPU owning the data vector
* \param[in] data pointer to start of segment
* \param[in] n number of elements in block
*/
DataHandlerMapGPU(DataHandlerGPU<data_t>* dataOwner, data_t* data, index_t n);
};
} // namespace elsa
......@@ -6,28 +6,56 @@
#include <optional>
#include "DataDescriptor.h"
#include "ExpressionPredicates.h"
#include "DataHandlerCPU.h"
#include "DataHandlerMapCPU.h"
#ifdef ELSA_CUDA_VECTOR
#include "DataHandlerGPU.h"
#endif
namespace elsa
{
/// Compile time switch to select to recursively evaluate for expression type
template <class Operand, std::enable_if_t<isExpression<Operand>, int> = 0>
template <bool GPU, class Operand, std::enable_if_t<isExpression<Operand>, int> = 0>
constexpr auto evaluateOrReturn(Operand const& operand)
{
return operand.eval();
return operand.template eval<GPU>();
}
/// Compile time switch to return-by-value of arithmetic types
template <class Operand, std::enable_if_t<isArithmetic<Operand>, int> = 0>
template <bool GPU, class Operand, std::enable_if_t<isArithmetic<Operand>, int> = 0>
constexpr auto evaluateOrReturn(Operand const operand)
{
return operand;
}
/// Compile time switch to return data in container
template <class Operand, std::enable_if_t<isDataContainer<Operand>, int> = 0>
template <bool GPU, class Operand, std::enable_if_t<isDataContainer<Operand>, int> = 0>
constexpr auto evaluateOrReturn(Operand const& operand)
{
return operand._dataHandler->accessData();
using data_t = typename Operand::value_type;
if constexpr (GPU) {
#ifdef ELSA_CUDA_VECTOR
if (auto handler = dynamic_cast<DataHandlerGPU<data_t>*>(operand._dataHandler.get())) {
return handler->accessData();
} else if (auto handler =
dynamic_cast<DataHandlerMapGPU<data_t>*>(operand._dataHandler.get())) {
return handler->accessData();
} else {
throw std::logic_error("Unknown handler type");
}
#endif
} else {
if (auto handler = dynamic_cast<DataHandlerCPU<data_t>*>(operand._dataHandler.get())) {
return handler->accessData();
} else if (auto handler =
dynamic_cast<DataHandlerMapCPU<data_t>*>(operand._dataHandler.get())) {
return handler->accessData();
} else {
throw std::logic_error("Unknown handler type");
}
}
}
/// Type trait which decides if const lvalue reference or not
......@@ -65,13 +93,19 @@ namespace elsa
}
/// Evaluates the expression
template <bool GPU = false>
auto eval() const
{
// generic lambda for evaluating tree, we need this to get a pack again out of the tuple
auto const callCallable = [this](Operands const&... args) {
// here evaluateOrReturn is called on each Operand within args as the unpacking
// takes place after the fcn call
return _callable(evaluateOrReturn(args)...);
// selects the right callable from the Callables struct with multiple lambdas
if constexpr (GPU) {
return _callable(evaluateOrReturn<GPU>(args)..., GPU);
} else {
return _callable(evaluateOrReturn<GPU>(args)...);
}
};
return std::apply(callCallable, _args);
}
......
......@@ -5,24 +5,6 @@
namespace elsa
{
/// Remove cv qualifiers as well as reference of given type
// TODO: Replace with std::remove_cv_ref_t when C++20 available
template <typename T>
struct RemoveCvRef {
using type = std::remove_cv_t<std::remove_reference_t<T>>;
};
/// Helper to make type available
template <class T>
using RemoveCvRef_t = typename RemoveCvRef<T>::type;
/// Predicate to check if of complex type
template <typename T>
constexpr bool isComplex =
std::is_same_v<
RemoveCvRef_t<T>,
std::complex<float>> || std::is_same_v<RemoveCvRef_t<T>, std::complex<double>>;
/// User defined is_arithmetic which includes complex numbers
template <typename T>
constexpr bool isArithmetic = std::is_arithmetic_v<T> || isComplex<T>;
......
......@@ -3,6 +3,7 @@
#include <complex>
#include <cstddef>
#include <Eigen/Core>
#include <type_traits>