Commit 3a8b37b2 authored by AndiBraimllari's avatar AndiBraimllari
Browse files

Merge branch 'master' into metrics

# Conflicts:
#	elsa/core/CMakeLists.txt
#	elsa/core/Utilities/Statistics.hpp
#	elsa/core/tests/CMakeLists.txt
parents 61586aff 65e71040
Pipeline #866807 failed with stages
in 14 minutes and 12 seconds
......@@ -202,7 +202,7 @@ with section("markup"):
with section("lint"):
# a list of lint codes to disable
disabled_codes = ["C0103", "C0111", "C0304", "C0306", "E1120", "R0912", "C0113", "C0301", "C0303", "C0305", "C0307", "W0105"]
disabled_codes = ["C0103", "C0111", "C0304", "C0306", "E1120", "R0912", "R0915", "C0113", "C0301", "C0303", "C0305", "C0307", "W0105"]
# regular expression pattern describing valid function names
function_pattern = '[0-9a-z_]+'
......
......@@ -16,7 +16,7 @@ variables:
GCC_IMAGE: "${DOCKER_REGISTRY_URL}/elsa/gcc-pybinds:10"
CLANG9_IMAGE: "${DOCKER_REGISTRY_URL}/elsa/clang-pybinds:9"
CLANG10_IMAGE: "${DOCKER_REGISTRY_URL}/elsa/clang-pybinds:10"
CUDA_IMAGE: "${DOCKER_REGISTRY_URL}/elsa/cuda:10.2"
CUDA_IMAGE: "${DOCKER_REGISTRY_URL}/elsa/cuda:11.2"
UBUNTU_IMAGE: "${DOCKER_REGISTRY_URL}/elsa/ubuntu:20.04"
GPU_ONLY_IMAGE: "${DOCKER_REGISTRY_URL}/elsa/cuda-clang:10.0"
SANITIZER_IMAGE: "${DOCKER_REGISTRY_URL}/elsa/gcc:10"
......@@ -84,11 +84,6 @@ cache:
- install-elsa/include/elsa/**/*.cuh
- install-elsa/lib/cmake/elsa/**/*.cmake
- install-elsa/lib/libelsa*.a
# Add quickvec install to artifact
- install-elsa/include/quickvec/**/*.h
- install-elsa/include/quickvec/**/*.hpp
- install-elsa/lib/cmake/quickvec/*.cmake
- install-elsa/lib/libquickvec.a
# Add doctest install to artifact
- install-elsa/include/doctest/**/*.h
- install-elsa/lib/cmake/doctest/**/*.cmake
......@@ -264,20 +259,10 @@ build-clang10:
build-cuda:
<<: *build_job_artifact
image: $CUDA_IMAGE
script: *buildjob_normal
tags:
- linux
- elsa
- gcc
- cuda
build-gpu-only:
<<: *build_job_artifact
image: $GPU_ONLY_IMAGE
script:
- mkdir -p build
- cd build
- cmake .. -GNinja -DELSA_BENCHMARKS=ON -DELSA_CUDA_VECTOR=ON -DCMAKE_INSTALL_PREFIX="../install-elsa" -DCMAKE_CXX_FLAGS="-stdlib=libc++" -DCMAKE_EXE_LINKER_FLAGS="-lc++abi"
- cmake .. -GNinja -DELSA_CUDA_VECTOR=ON -DELSA_BENCHMARKS=ON -DCMAKE_INSTALL_PREFIX="../install-elsa"
- ninja
- ninja build-tests
- ninja build-benchmarks
......@@ -289,6 +274,25 @@ build-gpu-only:
- gcc
- cuda
# TODO: CUDA compilation with clang no longer supported, change to nvcc build with clang as host compiler later
# build-gpu-only:
# <<: *build_job_artifact
# image: $GPU_ONLY_IMAGE
# script:
# - mkdir -p build
# - cd build
# - cmake .. -GNinja -DELSA_BENCHMARKS=ON -DELSA_CUDA_VECTOR=ON -DCMAKE_INSTALL_PREFIX="../install-elsa" -DCMAKE_CXX_FLAGS="-stdlib=libc++" -DCMAKE_EXE_LINKER_FLAGS="-lc++abi"
# - ninja
# - ninja build-tests
# - ninja build-benchmarks
# - ninja build-examples
# - ninja install
# tags:
# - linux
# - elsa
# - gcc
# - cuda
### test jobs ###
......@@ -347,17 +351,17 @@ test-cuda:
- gcc
- cuda
test-gpu-only:
<<: *test_job_artifact
stage: test
image: $GPU_ONLY_IMAGE
dependencies:
- build-gpu-only
tags:
- linux
- elsa
- gcc
- cuda
# test-gpu-only:
# <<: *test_job_artifact
# stage: test
# image: $GPU_ONLY_IMAGE
# dependencies:
# - build-gpu-only
# tags:
# - linux
# - elsa
# - gcc
# - cuda
install-gcc:
<<: *install_job
......@@ -401,24 +405,11 @@ install-cuda:
image: $CUDA_IMAGE
dependencies:
- build-cuda
script: *installjob_script
tags:
- linux
- elsa
- gcc
- cuda
install-gpu-only:
<<: *install_job
stage: test
image: $GPU_ONLY_IMAGE
dependencies:
- build-gpu-only
script:
- git clone https://gitlab.lrz.de/IP/elsa_testing.git /tmp/elsa_testing
- cd /tmp/elsa_testing
- mkdir -p build/ && cd build/
- cmake .. -GNinja -DELSA_INSTALL_DIR="$CI_PROJECT_DIR/install-elsa" -Dquickvec_DIR="$CI_PROJECT_DIR/install-elsa/lib/cmake/quickvec" -DCMAKE_CXX_FLAGS="-stdlib=libc++" -DCMAKE_EXE_LINKER_FLAGS="-lc++abi" -DELSA_CUDA_VECTOR=ON
- cmake .. -GNinja -DELSA_INSTALL_DIR="$CI_PROJECT_DIR/install-elsa" -DELSA_CUDA_VECTOR=ON
- ninja
tags:
- linux
......@@ -426,16 +417,34 @@ install-gpu-only:
- gcc
- cuda
# install-gpu-only:
# <<: *install_job
# stage: test
# image: $GPU_ONLY_IMAGE
# dependencies:
# - build-gpu-only
# script:
# - git clone https://gitlab.lrz.de/IP/elsa_testing.git /tmp/elsa_testing
# - cd /tmp/elsa_testing
# - mkdir -p build/ && cd build/
# - cmake .. -GNinja -DELSA_INSTALL_DIR="$CI_PROJECT_DIR/install-elsa" -Dquickvec_DIR="$CI_PROJECT_DIR/install-elsa/lib/cmake/quickvec" -DCMAKE_CXX_FLAGS="-stdlib=libc++" -DCMAKE_EXE_LINKER_FLAGS="-lc++abi" -DELSA_CUDA_VECTOR=ON
# - ninja
# tags:
# - linux
# - elsa
# - gcc
# - cuda
### sanitizers ###
cuda-memcheck:
<<: *run_on_merge_requests
stage: sanitizer
image: $GPU_ONLY_IMAGE
image: $CUDA_IMAGE
script:
./tools/ci_scripts/cuda-memcheck.sh
dependencies:
- test-gpu-only
- test-cuda
tags:
- linux
- elsa
......
repos:
- repo: local
hooks:
- id: clang-format-ci
name: Check if C++-like files are formatted such that CI will be happy
entry: bash tools/ci_scripts/clang-format-test.sh
stages: [ commit ]
language: system
- id: doxygen-tag-lint-ci
name: Check that Doxygen tags use the @ instead of \, such that CI will be happy
entry: bash tools/ci_scripts/check-comment-style.sh
stages: [ commit ]
language: system
- id: cmake-lint-ci
name: Check linting for CMake files, such that CI will be happy
entry: bash tools/ci_scripts/cmake-lint-test.sh
stages: [ commit ]
language: system
- id: lint-commit
name: Lint commit message to ensure commit messages are nicely formatted
entry: bash tools/ci_scripts/commit-msg-test.sh
stages: [ commit-msg ]
language: system
- id: clang-format-ci
name: Check if C++-like files are formatted such that CI will be happy
entry: bash tools/ci_scripts/clang-format-test.sh
stages: [ commit ]
language: system
- id: doxygen-tag-lint-ci
name: Check that Doxygen tags use the @ instead of \, such that CI will be happy
entry: bash tools/ci_scripts/check-comment-style.sh
stages: [ commit ]
language: system
- id: cmake-lint-ci
name: Check linting for CMake files, such that CI will be happy
entry: bash tools/ci_scripts/cmake-lint-test.sh
stages: [ commit ]
language: system
- id: lint-commit
name: Lint commit message to ensure commit messages are nicely formatted
entry: bash tools/ci_scripts/commit-msg-test.sh
stages: [ commit-msg ]
language: system
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v4.1.0 # Use the ref you want to point at
hooks:
- id: check-added-large-files
args: [--maxkb=500]
- id: check-merge-conflict
- id: no-commit-to-branch
args: [--branch, master]
- id: trailing-whitespace
......@@ -38,10 +38,12 @@ set(ELSA_SANITIZER
"Compile with a sanitizer. Options are: Address, Memory, MemoryWithOrigins, Undefined, Thread, Leak, 'Address;Undefined'"
)
set(ELSA_CUDA_ARCH_TYPE "auto" CACHE STRING "Set CUDA architectures")
set(ELSA_CUDA_ARCH_TYPE "auto" CACHE STRING "Set CUDA architectures")
option(SYSTEM_EIGEN "Build elsa using the system eigen installation" OFF)
option(SYSTEM_SPDLOG "Build elsa using the system spdlog installation" OFF)
option(SYSTEM_DOCTEST "Build elsa using the system doctest installation" OFF)
option(SYSTEM_CATCH "Build elsa using the system catch installation" OFF)
# ------------ general setup -----------
# ------------
......@@ -56,10 +58,7 @@ include(SetDefaultBuildType)
# Forbid extensions (such as gnu++) for this project
set(CMAKE_CXX_EXTENSIONS OFF)
# Include CPM dependency manager
include(CPM)
# export compile_commands.json for language servers such as ccls
# export compile_commands.json for lsp language servers (e.g. clangd)
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
# By default set all output to lib or bin directories
......@@ -76,6 +75,12 @@ set(INSTALL_CONFIG_DIR ${CMAKE_INSTALL_LIBDIR}/cmake/elsa)
# ------------ dependencies ------------
# ------------
if (NOT (SYSTEM_EIGEN AND SYSTEM_SPDLOG AND SYSTEM_DOCTEST AND SYSTEM_CATCH))
# Include CPM dependency manager for bundling dependencies
include(CPM)
endif()
# only add the dependencies if elsa is stand-alone
if(ELSA_MASTER_PROJECT)
if(SYSTEM_EIGEN)
......@@ -164,46 +169,60 @@ if(ELSA_MASTER_PROJECT)
find_package(Npp)
endif(ELSA_BUILD_ML_CUDNN)
# Adding quickvec
if(ELSA_CUDA_VECTOR)
message(STATUS "CUDA support requested...")
include(CheckLanguage)
check_language(CUDA)
if(CMAKE_CUDA_COMPILER)
enable_language(CUDA)
CPMAddPackage(
NAME Quickvec
GIT_REPOSITORY https://gitlab.lrz.de/IP/quickvec.git
GIT_TAG 960ac64 # current commit on master
)
if(Quickvec_ADDED)
message(STATUS "CUDA support enabled")
message(STATUS "Enabling DataHandlerGPU")
message(STATUS "The default container type will be on the GPU")
set(ELSA_BUILD_WITH_QUICKVEC TRUE)
add_definitions(-DELSA_CUDA_VECTOR)
else()
message(FATAL_ERROR "CUDA_VECTOR library quickvec could not be added.")
endif()
else()
message(FATAL_ERROR "CUDA_VECTOR language support not found")
endif()
endif()
if (${ELSA_BUILD_PYTHON_BINDINGS})
CPMAddPackage(
NAME pybind11
GITHUB_REPOSITORY pybind/pybind11
VERSION 2.6.0
VERSION 2.8.0
)
endif()
else()
message(STATUS " No dependencies added for elsa, as it is not stand-alone")
endif()
# Enable CUDA support if requested
if(ELSA_CUDA_VECTOR OR ELSA_BUILD_CUDA_PROJECTORS OR ELSA_BUILD_ML_CUDNN)
message(STATUS "CUDA support requested...")
include(CheckLanguage)
check_language(CUDA)
find_package(CUDA 11.0)
if(CMAKE_CUDA_COMPILER AND (CUDA_VERSION VERSION_GREATER 11))
enable_language(CUDA)
message(STATUS "CUDA support enabled")
# set target CUDA architectures
include(ComputeCapabilityCUDA)
set_cuda_arch_type(${ELSA_CUDA_ARCH_TYPE})
foreach(TARGET_GPU ${TARGET_GPUS})
string(REPLACE "." "" TARGET_GPU ${TARGET_GPU})
list(APPEND CMAKE_CUDA_ARCHITECTURES ${TARGET_GPU})
endforeach()
if (ELSA_CUDA_VECTOR)
message(STATUS "Enabling DataHandlerGPU")
message(STATUS "The default container type will be on the GPU")
set(ELSA_BUILD_WITH_QUICKVEC TRUE)
add_definitions(-DELSA_CUDA_VECTOR)
endif()
elseif(ELSA_CUDA_VECTOR)
message(FATAL_ERROR "CUDA compiler not found!")
message(STATUS "Compiling without a DataHandlerGPU")
message(STATUS "The default container type will be CPU")
set(ELSA_BUILD_WITH_QUICKVEC FALSE)
set(ELSA_CUDA_VECTOR OFF)
endif()
endif()
# set where to install the exports/targets
include(GNUInstallDirs)
include(CMakePackageConfigHelpers)
set(INSTALL_CONFIG_DIR ${CMAKE_INSTALL_LIBDIR}/cmake/elsa)
# include the install_elsa_module function
include(InstallElsaModule)
......@@ -225,14 +244,19 @@ endif(NOT ELSA_MASTER_PROJECT)
if(ELSA_TESTING OR ELSA_BENCHMARKS)
enable_testing()
CPMAddPackage(
NAME doctest
GITHUB_REPOSITORY onqtam/doctest
GIT_TAG 2.4.4
)
if(SYSTEM_DOCTEST)
message(STATUS "Using system-wide doctest library...")
find_package(doctest REQUIRED)
else()
CPMAddPackage(
NAME doctest
GITHUB_REPOSITORY onqtam/doctest
GIT_TAG 2.4.7
)
# add the CMake modules for automatic test discovery
set(CMAKE_MODULE_PATH "${doctest_SOURCE_DIR}/scripts/cmake" ${CMAKE_MODULE_PATH})
# add the CMake modules for automatic test discovery
set(CMAKE_MODULE_PATH "${doctest_SOURCE_DIR}/scripts/cmake" ${CMAKE_MODULE_PATH})
endif()
if(ELSA_TESTING)
message(STATUS "elsa testing is enabled")
......@@ -256,6 +280,7 @@ if(ELSA_TESTING OR ELSA_BENCHMARKS)
append_coverage_compiler_flags()
set(COVERAGE_LCOV_EXCLUDES
'${CPM_SOURCE_CACHE}/*'
'*/tests/*'
'${PROJECT_BINARY_DIR}/_deps/*'
'/usr/*')
setup_target_for_coverage_lcov(NAME test_coverage EXECUTABLE ctest)
......
......@@ -45,9 +45,9 @@ as an example for our usage.
It is recommended to set `CPM_SOURCE_CACHE` (see [here](https://github.com/TheLartians/CPM.cmake#cpm_source_cache) for
more info). It's an environment variable, that will save all dependencies outside of the build directory and -
for all projects using CPM - only one version of the dependency. This way no re-downloading is necessary.
for all projects using CPM - only one version of the dependency. This way no re-downloading is necessary.
Set it in your e.g. `.bashrc`.
## Testing
You can run the elsa unit tests by running `ctest` in the build folder. To specify which tests run,
filter with `ctest -R regular_expression`.
......@@ -57,14 +57,14 @@ Development](https://github.com/onqtam/doctest/blob/master/doc/markdown/testcase
follow this style when adding new tests. However, isntead of using `SCENARIO` use `TEST_CASE` with the name of the
class under test at the beginning of the test name. Also be sure to add the tests to the test suite associated
to the module of the test.
We're currently relying on [doctest](https://github.com/onqtam/doctest/) as our testing framework, when
using assertion macros, please try to use the
We're currently relying on [doctest](https://github.com/onqtam/doctest/) as our testing framework, when
using assertion macros, please try to use the
[binary and unary asserts](https://github.com/onqtam/doctest/blob/master/doc/markdown/assertions.md#binary-and-unary-asserts)
as much as possible.
as much as possible.
## Benchmarking
You can use the catch testing framework to do [benchmarking
](https://github.com/catchorg/Catch2/blob/master/docs/benchmarks.md). If so, add your benchmarking
case following this template
......@@ -92,8 +92,8 @@ currently use version 10.0.0, different versions might produce errors.
We use `clang-tidy` with the enabled checks specified in [the configuration file](.clang-tidy). Note
that currently all `readability-*` checks have to pass, otherwise the CI will fail. We encourage
developers to check their code with `clang-tidy` and remove all warnings if applicable.
#### CMake
#### CMake
We use [cmakelang](https://cmake-format.readthedocs.io/en/latest/index.html) to enforce
certain style guide and reduce the changes of error in our CMake code, please check the guide to install it.
......@@ -118,9 +118,9 @@ local results to [the latest master coverage results](https://ciip.in.tum.de/els
## pre-commit
There is also a basic `.pre-commit-config.yaml` file to install pre-commit hooks using
There is also a basic `.pre-commit-config.yaml` file to install pre-commit hooks using
[pre-commit](https://pre-commit.com/). You are highly encouraged to install the pre-commits
with `pre-commit install` such that they are run before each commit.
with `pre-commit install -t pre-commit -t commit-msg` such that they are run before each commit.
None of the commit hooks will change anything in your commit, they mearly check and error if
something is wrong.
......@@ -129,7 +129,7 @@ something is wrong.
The [elsa documentation](https://ciip.in.tum.de/elsadocs/) is automatically built and deployed through the CI for each commit to master.
To build it locally the following packages are required: `sphinx doxygen` which should be available in
most major linux distributions or via pip. Additionally, the following sphinx extensions need to be installed via pip:
`sphinx-rtd-theme m2r2 breathe`.
`sphinx-rtd-theme sphinxcontrib-katex m2r2 breathe`.
Then simply build the documentation using ninja
```
mkdir -p build
......
# Create binaries for benchmarks in bin/benchmarks
set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin/benchmarks)
# add Catch2 only here to still use it in benchmarks
CPMAddPackage(
NAME Catch2
GITHUB_REPOSITORY catchorg/Catch2
VERSION 2.10.1 # <-- Current, update to: 2.13.1
)
# add the CMake modules for automatic test discovery
set(CMAKE_MODULE_PATH "${Catch2_SOURCE_DIR}/contrib" ${CMAKE_MODULE_PATH})
if (SYSTEM_CATCH)
message(STATUS "Using system-wide catch library...")
find_package(Catch2 REQUIRED 2.10)
else()
# add Catch2 only here to still use it in benchmarks
CPMAddPackage(
NAME Catch2
GITHUB_REPOSITORY catchorg/Catch2
VERSION 2.10.1 # <-- Current, update to: 2.13.1
)
# add the CMake modules for automatic test discovery
set(CMAKE_MODULE_PATH "${Catch2_SOURCE_DIR}/contrib" ${CMAKE_MODULE_PATH})
endif()
# enable ctest and Catch test discovery
include(CTest)
include(Catch)
......@@ -26,15 +31,17 @@ macro(ELSA_BENCHMARK NAME)
# add catch and the corresponding elsa library
target_link_libraries(bench_${NAME} PRIVATE Catch2::Catch2 elsa::all)
if (CMAKE_CUDA_COMPILER)
enable_language(CUDA)
find_library(CUDART_LIBRARY cudart ${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES})
target_include_directories(bench_${NAME}
PUBLIC ${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES})
if(${ELSA_BUILD_WITH_QUICKVEC})
# build with nvcc when using quickvec
set_source_files_properties(bench_${NAME}.cpp bench_main PROPERTIES LANGUAGE CUDA)
target_compile_definitions(bench_${NAME} PUBLIC ELSA_ENABLE_CUDA_VECTOR)
target_link_libraries(bench_${NAME} PUBLIC "elsa_quickvec")
target_compile_features(bench_${NAME} PUBLIC cuda_std_17)
else()
# enable C++17
target_compile_features(bench_${NAME} PUBLIC cxx_std_17)
endif()
# enable C++17
target_compile_features(bench_${NAME} PUBLIC cxx_std_17)
# include helpers
target_include_directories(bench_${NAME} PRIVATE ${CMAKE_SOURCE_DIR}/elsa/test_routines/)
......@@ -47,6 +54,9 @@ ELSA_BENCHMARK(Projectors)
ELSA_BENCHMARK(Intersection)
ELSA_BENCHMARK(DataContainerConstruction)
ELSA_BENCHMARK(ExpressionTemplates)
if (ELSA_BUILD_WITH_QUICKVEC)
ELSA_BENCHMARK(Quickvec)
endif()
# Add a single executable for all benchmarks, as CTest removes a lot of the output
add_executable(bench_all EXCLUDE_FROM_ALL bench_main.cpp ${BENCHMARK_SOURCES})
......
#define CATCH_CONFIG_ENABLE_BENCHMARKING
#include "catch2/catch.hpp"
#include "Vector.cuh"
#include "Eigen/Dense"
#include <cublas_v2.h>
using namespace quickvec;
// for general problems, should be maximum 256 to avoid memory issues
static size_t SIZE = 128;
// for memory critical problems, using only two vectors
static size_t SIZE_2 = 512;
__global__ void computeDirect(size_t n, float* dc1, float a, float b, float* result)
{
unsigned int index = blockIdx.x * blockDim.x + threadIdx.x;
unsigned int stride = blockDim.x * gridDim.x;
for (unsigned int i = index; i < n; i += stride) {
result[i] = dc1[i] * a - dc1[i] / dc1[i] + b * dc1[i];
}
}
__global__ void computeDirect2(size_t n, float* dc1, float* dc2, float* result)
{
unsigned int index = blockIdx.x * blockDim.x + threadIdx.x;
unsigned int stride = blockDim.x * gridDim.x;
for (unsigned int i = index; i < n; i += stride) {
result[i] = dc1[i] * dc2[i];
}
}
__global__ void computeDirect3(size_t n, float* dc1, float a, float* result)
{
unsigned int index = blockIdx.x * blockDim.x + threadIdx.x;
unsigned int stride = blockDim.x * gridDim.x;
for (unsigned int i = index; i < n; i += stride) {
result[i] = dc1[i] * a - dc1[i] / (1 + dc1[i]);
}
}
__global__ void computeSaxpy(size_t n, float* dc1, float a, float* result)
{
unsigned int index = blockIdx.x * blockDim.x + threadIdx.x;
unsigned int stride = blockDim.x * gridDim.x;
for (unsigned int i = index; i < n; i += stride) {
result[i] = dc1[i] * a + result[i];
}
}
TEST_CASE("CUDA expression benchmark using Eigen with n=" + std::to_string(SIZE) + "^3")
{
size_t size = SIZE * SIZE * SIZE;
unsigned int blockSize = 256;
unsigned int numBlocks = static_cast<unsigned int>((size + blockSize - 1) / blockSize);
Eigen::Matrix<float, Eigen::Dynamic, 1> randVec(size);
Eigen::Matrix<float, Eigen::Dynamic, 1> randVec2(size);
Eigen::Matrix<float, Eigen::Dynamic, 1> resultVec(size);
for (size_t i = 0; i < size; ++i) {
randVec[static_cast<long>(i)] =
static_cast<float>(rand()) / (static_cast<float>(RAND_MAX / 100.0));
randVec2[static_cast<long>(i)] =
static_cast<float>(rand()) / (static_cast<float>(RAND_MAX / 100.0));
}
Vector dc(randVec);
Vector dc2(randVec2);
Vector result(resultVec);
auto expr = dc * dc2;
BENCHMARK("Eigen") { resultVec = (randVec.array() * randVec2.array()).matrix(); };
BENCHMARK("CUDA direct")
{
computeDirect2<<<numBlocks, blockSize>>>(size, dc._data.get(), dc2._data.get(),
result._data.get());
cudaDeviceSynchronize();
};
BENCHMARK("CUDA over ET") { result.eval(expr); };
}
TEST_CASE("CUDA expression benchmark for memory critical with n=" + std::to_string(SIZE_2) + "^3")
{
size_t size = SIZE_2 * SIZE_2 * SIZE_2;
unsigned int blockSize = 256;
unsigned int numBlocks = static_cast<unsigned int>((size + blockSize - 1) / blockSize);
float a = 1.22f;
float b = 2.222f;
Eigen::Matrix<float, Eigen::Dynamic, 1> randVec(size);
Eigen::Matrix<float, Eigen::Dynamic, 1> resultVec(size);
for (size_t i = 0; i < size; ++i) {
randVec[static_cast<long>(i)] =
static_cast<float>(rand()) / (static_cast<float>(RAND_MAX / 100.0));
}
Vector dc(randVec);
Vector result(resultVec);
auto expr = dc * a - dc / dc + dc * b;
auto expr2 = dc * dc;
auto expr3 = dc * a - dc<