Commit 0ab07fec authored by Jens Petit's avatar Jens Petit Committed by Tobias Lasser

clang-format on cuda files

parent 432c010b
Pipeline #192517 passed with stages
in 7 minutes and 16 seconds
/**
* \file TraverseSiddonsCUDA.cuh
*
* \brief Provides interface definitions for the Siddon's CUDA projector. Allows for separable compilation of device and host code.
*
*
* \brief Provides interface definitions for the Siddon's CUDA projector. Allows for separable
* compilation of device and host code.
*
* \author Nikola Dinev (nikola.dinev@tum.de)
*/
#pragma once
......@@ -11,7 +12,8 @@
#include "stdint.h"
#include "elsaDefines.h"
namespace elsa {
namespace elsa
{
template <typename data_t = real_t, uint32_t dim = 3>
struct TraverseSiddonsCUDA {
......@@ -19,27 +21,23 @@ namespace elsa {
const static uint32_t MAX_THREADS_PER_BLOCK = 64;
/**
* Allows for the bounding box to be passed to the kernel by value.
* Kernel arguments are stored in constant memory, and should generally
* Kernel arguments are stored in constant memory, and should generally
* provide faster access to the variables than via global memory.
*/
struct BoundingBox
{
//min is always 0
*/
struct BoundingBox {
// min is always 0
uint32_t max[dim];
__device__ __forceinline__ const uint32_t &operator[](const uint32_t idx) const
{
return max[idx];
}
__device__ __forceinline__ uint32_t &operator[](const uint32_t idx)
__device__ __forceinline__ const uint32_t& operator[](const uint32_t idx) const
{
return max[idx];
}
__device__ __forceinline__ uint32_t& operator[](const uint32_t idx) { return max[idx]; }
};
/**
* \brief Forward projection using Siddon's method
*
*
* \param[in] blocks specifies the grid used for kernel execution
* \param[in] threads specifies the number of threads for each block
* \param[in] volume pointer to volume data
......@@ -52,31 +50,28 @@ namespace elsa {
* \param[in] projPitch pitch of inverse of projection matrices
* \param[in] boxMax specifies the size of the volume
* \param[in] stream handle to stream in which the kernel should be placed
*
* The variables blocks and threads should be picked based on the sinogram dimensions. To process all
* rays set blocks to (detectorSizeX, detectorSizeY, numAngles / threads), if numAngles is not a multiple of threads
* a second kernel call must be made to process the remaining rays with blocks = (detectorSizeX, detectorSizeY, 1)
* and threads = numAngles % threadsFirstCall. Sinogram, projection matrix, and ray origin pointers should be
* adjusted accordingly to point to the start of the (numAngles - numAngles % threadsFirstCall)-th element.
*
*
* The variables blocks and threads should be picked based on the sinogram dimensions. To
* process all rays set blocks to (detectorSizeX, detectorSizeY, numAngles / threads), if
* numAngles is not a multiple of threads a second kernel call must be made to process the
* remaining rays with blocks = (detectorSizeX, detectorSizeY, 1) and threads = numAngles %
* threadsFirstCall. Sinogram, projection matrix, and ray origin pointers should be adjusted
* accordingly to point to the start of the (numAngles - numAngles % threadsFirstCall)-th
* element.
*
* threads should ideally be a multiple of the warp size (32 for all current GPUs).
*/
static void traverseForward(const dim3 blocks,
const int threads,
int8_t* const __restrict__ volume,
const uint64_t volumePitch,
int8_t* const __restrict__ sinogram,
const uint64_t sinogramPitch,
const int8_t* const __restrict__ rayOrigins,
const uint32_t originPitch,
const int8_t* const __restrict__ projInv,
const uint32_t projPitch,
const BoundingBox& boxMax,
cudaStream_t stream = (cudaStream_t)0);
*/
static void
traverseForward(const dim3 blocks, const int threads, int8_t* const __restrict__ volume,
const uint64_t volumePitch, int8_t* const __restrict__ sinogram,
const uint64_t sinogramPitch,
const int8_t* const __restrict__ rayOrigins, const uint32_t originPitch,
const int8_t* const __restrict__ projInv, const uint32_t projPitch,
const BoundingBox& boxMax, cudaStream_t stream = (cudaStream_t) 0);
/**
* \brief Backward projection using Siddon's method
*
*
* \param[in] blocks specifies the grid used for kernel execution
* \param[in] threads specifies the number of threads for each block
* \param[out] volume pointer to output
......@@ -89,26 +84,23 @@ namespace elsa {
* \param[in] projPitch pitch of inverse of projection matrices
* \param[in] boxMax specifies the size of the volume
* \param[in] stream handle to stream in which the kernel should be placed
*
* The variables blocks and threads should be picked based on the sinogram dimensions. To process all
* rays set blocks to (detectorSizeX, detectorSizeY, numAngles / threads), if numAngles is not a multiple of threads
* a second kernel call must be made to process the remaining rays with blocks = (detectorSizeX, detectorSizeY, 1)
* and threads = numAngles % threadsFirstCall. Sinogram, projection matrix, and ray origin pointers should be
* adjusted accordingly to point to the start of the (numAngles - numAngles % threadsFirstCall)-th element.
*
*
* The variables blocks and threads should be picked based on the sinogram dimensions. To
* process all rays set blocks to (detectorSizeX, detectorSizeY, numAngles / threads), if
* numAngles is not a multiple of threads a second kernel call must be made to process the
* remaining rays with blocks = (detectorSizeX, detectorSizeY, 1) and threads = numAngles %
* threadsFirstCall. Sinogram, projection matrix, and ray origin pointers should be adjusted
* accordingly to point to the start of the (numAngles - numAngles % threadsFirstCall)-th
* element.
*
* threads should ideally be a multiple of the warp size (32 for all current GPUs).
*/
static void traverseAdjoint(const dim3 blocks,
const int threads,
int8_t* const __restrict__ volume,
const uint64_t volumePitch,
int8_t* const __restrict__ sinogram,
const uint64_t sinogramPitch,
const int8_t* const __restrict__ rayOrigins,
const uint32_t originPitch,
const int8_t* const __restrict__ projInv,
const uint32_t projPitch,
const BoundingBox& boxMax,
cudaStream_t stream = (cudaStream_t)0);
*/
static void
traverseAdjoint(const dim3 blocks, const int threads, int8_t* const __restrict__ volume,
const uint64_t volumePitch, int8_t* const __restrict__ sinogram,
const uint64_t sinogramPitch,
const int8_t* const __restrict__ rayOrigins, const uint32_t originPitch,
const int8_t* const __restrict__ projInv, const uint32_t projPitch,
const BoundingBox& boxMax, cudaStream_t stream = (cudaStream_t) 0);
};
}
\ No newline at end of file
} // namespace elsa
\ No newline at end of file
......@@ -14,7 +14,7 @@ clang-format --version
echo
# perform clang-format on all cpp-files
find elsa/ -name '*.h' -or -name '*.hpp' -or -name '*.cpp' | xargs clang-format -i -style=file $1
find elsa/ -name '*.h' -or -name '*.hpp' -or -name '*.cpp' -or -name '*.cu' -or -name '*.cuh' | xargs clang-format -i -style=file $1
# check if something was modified
notcorrectlist=`git status --porcelain | grep '^ M' | cut -c4-`
......@@ -28,7 +28,7 @@ else
git diff --stat $notcorrectlist
echo "Please run"
echo
echo "find elsa/ -name '*.h' -or -name '*.hpp' -or -name '*.cpp' | xargs clang-format -i -style=file $1"
echo "find elsa/ -name '*.h' -or -name '*.hpp' -or -name '*.cpp' -or -name '*.cu' -or -name '*.cuh' | xargs clang-format -i -style=file $1"
echo
echo "to solve the issue."
# cleanup changes in git
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment