Commit 2fce6808 authored by Nikola Dinev's avatar Nikola Dinev
Browse files

CUDA building no longer verbose; Adjusted code to suppress warnings

parent d880034a
Pipeline #151981 passed with stages
in 51 seconds
......@@ -68,7 +68,7 @@ endif()
#fmad can reduce accuracy, but only gives a negligible boost to speed
#use expt-relaxed-constexpr flag to suppress warnings caused by including the Eigen header
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --fmad=false --verbose --expt-relaxed-constexpr")
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --fmad=false --expt-relaxed-constexpr")
#set target GPU architectures
foreach(TARGET_GPU ${TARGET_GPUS})
......
......@@ -29,15 +29,23 @@ __device__ __forceinline__ void gesqmv(const int8_t* const __restrict__ matrix,
}
}
/// normalizes a vector of length 2 or 3 using device inbuilt functions
/// determine reverse norm of vector of length 2 or 3 using device inbuilt functions
template <typename real_t, uint32_t dim>
__device__ __forceinline__ void normalize(real_t* const __restrict__ vector)
__device__ __forceinline__ real_t rnorm(real_t* const __restrict__ vector)
{
real_t rn;
if(dim==3)
rn = rnorm3d(vector[0],vector[1],vector[2]);
return rnorm3d(vector[0],vector[1],vector[2]);
else if(dim==2)
rn = rhypot(vector[0],vector[1]);
return rhypot(vector[0],vector[1]);
else
return -1.0;
}
/// normalizes a vector of length 2 or 3 using device inbuilt norm
template <typename real_t, uint32_t dim>
__device__ __forceinline__ void normalize(real_t* const __restrict__ vector)
{
real_t rn = rnorm<real_t,dim>(vector);
#pragma unroll
for (int i=0;i<dim;i++) {
......@@ -310,11 +318,7 @@ __global__ void __launch_bounds__(elsa::TraverseJosephsCUDA<data_t,dim>::MAX_THR
const uint32_t idx = maxAbsIndex<real_t,dim>(rd);
const real_t rdMax = abs(rd[idx]);
real_t rn;
if(dim==3)
rn = rnorm3d(rd[0],rd[1],rd[2]);
else if(dim==2)
rn = rhypot(rd[0],rd[1]);
real_t rn = rnorm<real_t,dim>(rd);
real_t weight = rn/rdMax;
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment