Skip to content
GitLab
Menu
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Menu
The container registry cleanup task is now completed and the registry can be used normally.
Open sidebar
IP
elsa
Commits
ac48bd43
Commit
ac48bd43
authored
Sep 05, 2019
by
Nikola Dinev
Browse files
JosephsMethodCUDA accuracy issues fixed
parent
dff81cdd
Pipeline
#151648
passed with stages
in 50 seconds
Changes
2
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
elsa/projectors_cuda/projector_kernels/TraverseJosephsCUDA.cu
View file @
ac48bd43
...
...
@@ -206,7 +206,7 @@ __device__ __forceinline__ data_t tex(cudaTextureObject_t texObj, const elsa::re
return
tex2D
<
data_t
>
(
texObj
,
p
[
0
],
p
[
1
]);
}
__device__
__forceinline__
double
tex2Dd
(
cudaTextureObject_t
texObj
,
const
elsa
::
real_t
x
,
const
elsa
::
real_
t
y
)
{
__device__
__forceinline__
double
tex2Dd
(
cudaTextureObject_t
texObj
,
const
float
x
,
const
floa
t
y
)
{
uint2
rt
=
tex2D
<
uint2
>
(
texObj
,
x
,
y
);
return
__hiloint2double
(
rt
.
y
,
rt
.
x
);
}
...
...
@@ -231,7 +231,7 @@ __device__ __forceinline__ double tex<double,2>(cudaTextureObject_t texObj, cons
return
(
1
-
a
)
*
(
1
-
b
)
*
T
[
0
][
0
]
+
a
*
(
1
-
b
)
*
T
[
1
][
0
]
+
(
1
-
a
)
*
b
*
T
[
0
][
1
]
+
a
*
b
*
T
[
1
][
1
];
}
__device__
__forceinline__
double
tex3Dd
(
cudaTextureObject_t
texObj
,
const
elsa
::
real_t
x
,
const
elsa
::
real_
t
y
,
const
elsa
::
real_t
z
)
{
__device__
__forceinline__
double
tex3Dd
(
cudaTextureObject_t
texObj
,
const
float
x
,
const
floa
t
y
,
const
elsa
::
real_t
z
)
{
uint2
rt
=
tex3D
<
uint2
>
(
texObj
,
x
,
y
,
z
);
return
__hiloint2double
(
rt
.
y
,
rt
.
x
);
}
...
...
@@ -304,7 +304,22 @@ __global__ void __launch_bounds__(elsa::TraverseJosephsCUDA<data_t,dim>::MAX_THR
//compute ray direction
real_t
rd
[
dim
];
gesqmv
<
real_t
,
dim
>
(
projInvPtr
,
pixelCoord
,
rd
,
projPitch
);
normalize
<
real_t
,
dim
>
(
rd
);
// determine main direction
const
uint32_t
idx
=
maxAbsIndex
<
real_t
,
dim
>
(
rd
);
const
real_t
rdMax
=
abs
(
rd
[
idx
]);
real_t
rn
;
if
(
dim
==
3
)
rn
=
rnorm3d
(
rd
[
0
],
rd
[
1
],
rd
[
2
]);
else
if
(
dim
==
2
)
rn
=
rhypot
(
rd
[
0
],
rd
[
1
]);
real_t
weight
=
rn
/
rdMax
;
// normalize ray direction to have length 1/-1 in the main direction
#pragma unroll
for
(
int
i
=
0
;
i
<
dim
;
++
i
)
rd
[
i
]
/=
rdMax
;
//find volume intersections
real_t
tmin
,
tmax
;
...
...
@@ -314,10 +329,6 @@ __global__ void __launch_bounds__(elsa::TraverseJosephsCUDA<data_t,dim>::MAX_THR
real_t
currentPosition
[
dim
];
pointAt
<
real_t
,
dim
>
(
rayOrigin
,
rd
,
tmin
,
currentPosition
);
//determine primary direction
const
uint32_t
idx
=
maxAbsIndex
<
real_t
,
dim
>
(
rd
);
const
real_t
tdelta
=
1.0
f
/
fabs
(
rd
[
idx
]);
// truncate as currentPosition is non-negative
const
real_t
fl
=
trunc
(
currentPosition
[
idx
]);
// for Joseph's also consider rays running along the "left" boundary
...
...
@@ -333,7 +344,7 @@ __global__ void __launch_bounds__(elsa::TraverseJosephsCUDA<data_t,dim>::MAX_THR
//use midpoint of entire ray intersection as a constant integration value
updateTraverse
<
real_t
,
dim
>
(
currentPosition
,
rd
,
intersectionLength
*
0.5
f
);
*
sinogramPtr
=
intersectionLength
*
tex
<
data_t
,
dim
>
(
volume
,
currentPosition
);
*
sinogramPtr
=
weight
*
intersectionLength
*
tex
<
data_t
,
dim
>
(
volume
,
currentPosition
);
return
;
}
...
...
@@ -342,33 +353,32 @@ __global__ void __launch_bounds__(elsa::TraverseJosephsCUDA<data_t,dim>::MAX_THR
* add first line segment and move to first interior point
*/
updateTraverse
<
real_t
,
dim
>
(
currentPosition
,
rd
,
minDelta
*
0.5
f
);
data_t
pixelValue
=
minDelta
*
tex
<
data_t
,
dim
>
(
volume
,
currentPosition
);
data_t
pixelValue
=
weight
*
minDelta
*
tex
<
data_t
,
dim
>
(
volume
,
currentPosition
);
//from here on use tmin as an indication of the current position along the ray
tmin
+=
minDelta
;
//if next point isn't last
if
(
tmax
-
tmin
>
tdelta
){
updateTraverse
<
real_t
,
dim
>
(
currentPosition
,
rd
,(
minDelta
+
tdelta
)
*
0.5
f
);
tmin
+=
tdelta
;
minDelta
=
tdelta
;
pixelValue
+=
minDelta
*
tex
<
data_t
,
dim
>
(
volume
,
currentPosition
);
if
(
tmax
-
tmin
>
1.0
f
){
updateTraverse
<
real_t
,
dim
>
(
currentPosition
,
rd
,(
minDelta
+
1.0
f
)
*
0.5
f
);
tmin
+=
1.0
f
;
pixelValue
+=
weight
*
tex
<
data_t
,
dim
>
(
volume
,
currentPosition
);
//while interior intersection points remain
while
(
tm
in
+
minDelta
<
tmax
)
{
updateTraverse
<
real_t
,
dim
>
(
currentPosition
,
rd
,
minDelta
);
tmin
+=
minDelta
;
pixelValue
+=
minDelta
*
tex
<
data_t
,
dim
>
(
volume
,
currentPosition
);
while
(
tm
ax
-
tmin
>
1.0
f
)
{
updateTraverse
<
real_t
,
dim
>
(
currentPosition
,
rd
,
1.0
f
);
tmin
+=
1.0
f
;
pixelValue
+=
weight
*
tex
<
data_t
,
dim
>
(
volume
,
currentPosition
);
}
}
updateTraverse
<
real_t
,
dim
>
(
currentPosition
,
rd
,(
tmax
+
minDelta
-
tmin
)
*
0.5
f
);
pixelValue
+=
(
tmax
-
tmin
)
*
tex
<
data_t
,
dim
>
(
volume
,
currentPosition
);
updateTraverse
<
real_t
,
dim
>
(
currentPosition
,
rd
,(
tmax
-
tmin
+
1.0
f
)
*
0.5
f
);
pixelValue
+=
weight
*
(
tmax
-
tmin
)
*
tex
<
data_t
,
dim
>
(
volume
,
currentPosition
);
*
sinogramPtr
=
pixelValue
;
}
__device__
__forceinline__
double
tex1DLayeredd
(
cudaTextureObject_t
texObj
,
const
elsa
::
real_
t
x
,
const
int
layer
)
{
__device__
__forceinline__
double
tex1DLayeredd
(
cudaTextureObject_t
texObj
,
const
floa
t
x
,
const
int
layer
)
{
uint2
rt
=
tex1DLayered
<
uint2
>
(
texObj
,
x
,
layer
);
return
__hiloint2double
(
rt
.
y
,
rt
.
x
);
}
...
...
@@ -388,7 +398,7 @@ double tex1DLayered<double>(cudaTextureObject_t texObj, elsa::real_t x, const in
return
(
1
-
a
)
*
T
[
0
]
+
a
*
T
[
1
];
}
__device__
__forceinline__
double
tex2DLayeredd
(
cudaTextureObject_t
texObj
,
const
elsa
::
real_t
x
,
const
elsa
::
real_
t
y
,
const
int
layer
)
{
__device__
__forceinline__
double
tex2DLayeredd
(
cudaTextureObject_t
texObj
,
const
float
x
,
const
floa
t
y
,
const
int
layer
)
{
uint2
rt
=
tex2DLayered
<
uint2
>
(
texObj
,
x
,
y
,
layer
);
return
__hiloint2double
(
rt
.
y
,
rt
.
x
);
}
...
...
@@ -444,7 +454,7 @@ __global__ void __launch_bounds__(elsa::TraverseJosephsCUDA<data_t,dim>::MAX_TH
voxelCenter
[
1
]
=
y
+
offset
+
0.5
f
;
}
real
_t
val
=
0.0
f
;
data
_t
val
=
0.0
f
;
for
(
uint
i
=
0
;
i
<
numAngles
;
i
++
)
{
const
int8_t
*
const
projPtr
=
proj
+
i
*
projPitch
*
dim
;
const
real_t
*
const
rayOrigin
=
(
real_t
*
)(
rayOrigins
+
i
*
originPitch
);
...
...
@@ -832,7 +842,6 @@ void TraverseJosephsCUDA<data_t,dim>::traverseAdjointFast(const dim3 blocks, con
template
struct
TraverseJosephsCUDA
<
float
,
2
>;
template
struct
TraverseJosephsCUDA
<
float
,
3
>;
// TODO: support for double precision textures
template
struct
TraverseJosephsCUDA
<
double
,
2
>;
template
struct
TraverseJosephsCUDA
<
double
,
3
>;
}
\ No newline at end of file
elsa/projectors_cuda/tests/test_JosephsMethodCUDA.cpp
View file @
ac48bd43
...
...
@@ -412,10 +412,24 @@ TEMPLATE_TEST_CASE("Scenario: Axis-aligned rays are present", "", JosephsMethodC
volume
(
j
,
volSize
/
2
)
=
1
;
fast
.
apply
(
volume
,
sino
);
REQUIRE
(
sino
[
0
]
==
1
);
/** Using doubles significantly increases interpolation accuracy
* For example: when using floats, points very near the border (less than ~1/500th of
* a pixel away from the border) are rounded to actually lie on the border. This then yields more accurate
* results when using floats in some of the axis-aligned test cases, despite the lower interpolation accuracy.
*
* => different requirements for floats and doubles, looser requirements for doubles
*/
if
constexpr
(
std
::
is_same_v
<
data_t
,
float
>
)
REQUIRE
(
sino
[
0
]
==
1
);
else
REQUIRE
(
sino
[
0
]
==
Approx
(
1.0
));
slow
.
apply
(
volume
,
sino
);
REQUIRE
(
sino
[
0
]
==
1
);
if
constexpr
(
std
::
is_same_v
<
data_t
,
float
>
)
REQUIRE
(
sino
[
0
]
==
1
);
else
REQUIRE
(
sino
[
0
]
==
Approx
(
1.0
));
}
AND_THEN
(
"The backprojection sets the values of all hit pixels to the detector value"
)
{
...
...
@@ -471,7 +485,18 @@ TEMPLATE_TEST_CASE("Scenario: Axis-aligned rays are present", "", JosephsMethodC
REQUIRE
(
isApprox
(
volume
,
DataContainer
<
data_t
>
(
volumeDescriptor
,
backProj
[
i
%
2
])));
fast
.
applyAdjoint
(
sino
,
volume
);
REQUIRE
(
isApprox
(
volume
,
DataContainer
<
data_t
>
(
volumeDescriptor
,
backProj
[
i
%
2
])));
/** Using doubles significantly increases interpolation accuracy
* For example: when using floats, points very near the border (less than ~1/500th of
* a pixel away from the border) are rounded to actually lie on the border. This then yields more accurate
* results when using floats in some of the axis-aligned test cases, despite the lower interpolation accuracy.
*
* => different requirements for floats and doubles, looser requirements for doubles
*/
if
constexpr
(
std
::
is_same_v
<
data_t
,
float
>
)
REQUIRE
(
isApprox
(
volume
,
DataContainer
<
data_t
>
(
volumeDescriptor
,
backProj
[
i
%
2
])));
else
REQUIRE
(
isApprox
(
volume
,
DataContainer
<
data_t
>
(
volumeDescriptor
,
backProj
[
i
%
2
]),
0.001
));
}
}
}
...
...
@@ -506,7 +531,18 @@ TEMPLATE_TEST_CASE("Scenario: Axis-aligned rays are present", "", JosephsMethodC
REQUIRE
(
isApprox
(
volume
,
DataContainer
<
data_t
>
(
volumeDescriptor
,
backProj
[
0
])));
fast
.
applyAdjoint
(
sino
,
volume
);
REQUIRE
(
isApprox
(
volume
,
DataContainer
<
data_t
>
(
volumeDescriptor
,
(
backProj
[
0
]
/
2
).
eval
())
));
/** Using doubles significantly increases interpolation accuracy
* For example: when using floats, points very near the border (less than ~1/500th of
* a pixel away from the border) are rounded to actually lie on the border. This then yields more accurate
* results when using floats in some of the axis-aligned test cases, despite the lower interpolation accuracy.
*
* => different requirements for floats and doubles, looser requirements for doubles
*/
if
constexpr
(
std
::
is_same_v
<
data_t
,
float
>
)
REQUIRE
(
isApprox
(
volume
,
DataContainer
<
data_t
>
(
volumeDescriptor
,
(
backProj
[
0
]
/
2
).
eval
())));
else
REQUIRE
(
isApprox
(
volume
,
DataContainer
<
data_t
>
(
volumeDescriptor
,
(
backProj
[
0
]
/
2
).
eval
()),
0.001
));
}
}
}
...
...
@@ -539,7 +575,17 @@ TEMPLATE_TEST_CASE("Scenario: Axis-aligned rays are present", "", JosephsMethodC
REQUIRE
(
isApprox
(
volume
,
DataContainer
<
data_t
>
(
volumeDescriptor
,
backProj
[
0
])));
fast
.
applyAdjoint
(
sino
,
volume
);
REQUIRE
(
isApprox
(
volume
,
DataContainer
<
data_t
>
(
volumeDescriptor
,
(
backProj
[
0
]
/
2
).
eval
())));
/** Using doubles significantly increases interpolation accuracy
* For example: when using floats, points very near the border (less than ~1/500th of
* a pixel away from the border) are rounded to actually lie on the border. This then yields more accurate
* results when using floats in some of the axis-aligned test cases, despite the lower interpolation accuracy.
*
* => different requirements for floats and doubles, looser requirements for doubles
*/
if
constexpr
(
std
::
is_same_v
<
data_t
,
float
>
)
REQUIRE
(
isApprox
(
volume
,
DataContainer
<
data_t
>
(
volumeDescriptor
,
(
backProj
[
0
]
/
2
).
eval
())));
else
REQUIRE
(
isApprox
(
volume
,
DataContainer
<
data_t
>
(
volumeDescriptor
,
(
backProj
[
0
]
/
2
).
eval
()),
0.001
));
}
}
}
...
...
@@ -620,10 +666,23 @@ TEMPLATE_TEST_CASE("Scenario: Axis-aligned rays are present", "", JosephsMethodC
volume
(
volSize
/
2
,
j
,
volSize
/
2
)
=
1
;
fast
.
apply
(
volume
,
sino
);
REQUIRE
(
sino
[
0
]
==
1
);
/** Using doubles significantly increases interpolation accuracy
* For example: when using floats, points very near the border (less than ~1/500th of
* a pixel away from the border) are rounded to actually lie on the border. This then yields more accurate
* results when using floats in some of the axis-aligned test cases, despite the lower interpolation accuracy.
*
* => different requirements for floats and doubles, looser requirements for doubles
*/
if
constexpr
(
std
::
is_same_v
<
data_t
,
float
>
)
REQUIRE
(
sino
[
0
]
==
1
);
else
REQUIRE
(
sino
[
0
]
==
Approx
(
1.0
));
slow
.
apply
(
volume
,
sino
);
REQUIRE
(
sino
[
0
]
==
1
);
if
constexpr
(
std
::
is_same_v
<
data_t
,
float
>
)
REQUIRE
(
sino
[
0
]
==
1
);
else
REQUIRE
(
sino
[
0
]
==
Approx
(
1.0
));
}
AND_THEN
(
"The backprojection sets the values of all hit voxels to the detector value"
)
{
...
...
@@ -694,17 +753,40 @@ TEMPLATE_TEST_CASE("Scenario: Axis-aligned rays are present", "", JosephsMethodC
volume
(
volSize
/
2
,
j
,
volSize
/
2
)
=
1
;
fast
.
apply
(
volume
,
sino
);
REQUIRE
(
sino
[
0
]
==
0.75
);
/** Using doubles significantly increases interpolation accuracy
* For example: when using floats, points very near the border (less than ~1/500th of
* a pixel away from the border) are rounded to actually lie on the border. This then yields more accurate
* results when using floats in some of the axis-aligned test cases, despite the lower interpolation accuracy.
*
* => different requirements for floats and doubles, looser requirements for doubles
*/
if
constexpr
(
std
::
is_same_v
<
data_t
,
float
>
)
REQUIRE
(
sino
[
0
]
==
0.75
);
else
REQUIRE
(
sino
[
0
]
==
Approx
(
0.75
));
slow
.
apply
(
volume
,
sino
);
REQUIRE
(
sino
[
0
]
==
0.75
);
if
constexpr
(
std
::
is_same_v
<
data_t
,
float
>
)
REQUIRE
(
sino
[
0
]
==
0.75
);
else
REQUIRE
(
sino
[
0
]
==
Approx
(
0.75
));
}
AND_THEN
(
"The slow backprojection yields the exact adjoint, the fast backprojection is also exact for a very distant x-ray source"
)
{
sino
[
0
]
=
1
;
fast
.
applyAdjoint
(
sino
,
volume
);
REQUIRE
(
isApprox
(
volume
,
DataContainer
<
data_t
>
(
volumeDescriptor
,
backProj
[
i
/
2
])));
/** Using doubles significantly increases interpolation accuracy
* For example: when using floats, points very near the border (less than ~1/500th of
* a pixel away from the border) are rounded to actually lie on the border. This then yields more accurate
* results when using floats in some of the axis-aligned test cases, despite the lower interpolation accuracy.
*
* => different requirements for floats and doubles, looser requirements for doubles
*/
if
constexpr
(
std
::
is_same_v
<
data_t
,
float
>
)
REQUIRE
(
isApprox
(
volume
,
DataContainer
<
data_t
>
(
volumeDescriptor
,
backProj
[
i
/
2
])));
else
REQUIRE
(
isApprox
(
volume
,
DataContainer
<
data_t
>
(
volumeDescriptor
,
backProj
[
i
/
2
]),
0.005
));
slow
.
applyAdjoint
(
sino
,
volume
);
REQUIRE
(
isApprox
(
volume
,
DataContainer
<
data_t
>
(
volumeDescriptor
,
backProj
[
i
/
2
])));
...
...
@@ -850,7 +932,17 @@ TEMPLATE_TEST_CASE("Scenario: Axis-aligned rays are present", "", JosephsMethodC
sino
[
0
]
=
1
;
fast
.
applyAdjoint
(
sino
,
volume
);
REQUIRE
(
isApprox
(
volume
,
DataContainer
<
data_t
>
(
volumeDescriptor
,
(
backProj
[
i
]
/
(
i
>
3
?
4
:
2
)).
eval
())));
/** Using doubles significantly increases interpolation accuracy
* For example: when using floats, points very near the border (less than ~1/500th of
* a pixel away from the border) are rounded to actually lie on the border. This then yields more accurate
* results when using floats in some of the axis-aligned test cases, despite the lower interpolation accuracy.
*
* => different requirements for floats and doubles, looser requirements for doubles
*/
if
constexpr
(
std
::
is_same_v
<
data_t
,
float
>
)
REQUIRE
(
isApprox
(
volume
,
DataContainer
<
data_t
>
(
volumeDescriptor
,
(
backProj
[
i
]
/
(
i
>
3
?
4
:
2
)).
eval
())));
else
REQUIRE
(
isApprox
(
volume
,
DataContainer
<
data_t
>
(
volumeDescriptor
,
(
backProj
[
i
]
/
(
i
>
3
?
4
:
2
)).
eval
()),
0.005
));
slow
.
applyAdjoint
(
sino
,
volume
);
REQUIRE
(
isApprox
(
volume
,
DataContainer
<
data_t
>
(
volumeDescriptor
,
backProj
[
i
])));
...
...
@@ -1270,12 +1362,6 @@ TEMPLATE_TEST_CASE("Scenario: Projection under an angle", "", JosephsMethodCUDA<
THEN
(
"Ray intersects the correct pixels"
)
{
sino
[
0
]
=
1
;
slow
.
applyAdjoint
(
sino
,
volume
);
for
(
int
i
=
0
;
i
<
volSize
;
i
++
)
{
for
(
int
j
=
0
;
j
<
volSize
;
j
++
)
{
printf
(
"%f "
,
volume
(
j
,
i
));
}
printf
(
"
\n
"
);
}
volume
=
1
;
volume
(
0
,
0
)
=
0
;
...
...
@@ -1289,7 +1375,6 @@ TEMPLATE_TEST_CASE("Scenario: Projection under an angle", "", JosephsMethodCUDA<
volume
(
3
,
3
)
=
0
;
fast
.
apply
(
volume
,
sino
);
printf
(
"%f "
,
sino
[
0
]);
REQUIRE
(
sino
==
DataContainer
<
data_t
>
(
sinoDescriptor
));
slow
.
apply
(
volume
,
sino
);
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment