Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Menu
9.2.2023: Due to updates GitLab will be unavailable for some minutes between 9:00 and 11:00.
Open sidebar
CAMP
campvis-public
Commits
ab9a7bff
Commit
ab9a7bff
authored
Nov 19, 2014
by
Declara Denis
Committed by
Christian Schulte zu Berge
Feb 18, 2015
Browse files
Added initial stup for Cuda Confidence Maps Pipeline.
parent
f9428f6f
Changes
368
Expand all
Hide whitespace changes
Inline
Side-by-side
modules/cudaconfidencemaps/core/cm.cpp
0 → 100644
View file @
ab9a7bff
#include
<chrono>
#include
<iostream>
#include
<iomanip>
#include
"cm.h"
#include
"cm_cusp.h"
void
measureExecution
(
std
::
string
message
,
const
std
::
function
<
void
()
>
&
function
)
{
std
::
cout
<<
"started: "
<<
message
<<
std
::
endl
;
auto
start
=
std
::
chrono
::
high_resolution_clock
::
now
();
function
();
auto
stop
=
std
::
chrono
::
high_resolution_clock
::
now
();
float
milliseconds
=
std
::
chrono
::
duration_cast
<
std
::
chrono
::
microseconds
>
(
stop
-
start
).
count
()
/
1000.0
f
;
std
::
cout
<<
"finished: "
<<
std
::
left
<<
std
::
setw
(
35
)
<<
message
<<
"["
<<
milliseconds
<<
"ms]"
<<
std
::
endl
;
}
std
::
unique_ptr
<
ConfidenceMapSolver
>
createCUSPSolver
(
int
width
,
int
height
,
float
gradientScaling
,
float
alpha
,
float
beta
,
float
gamma
)
{
return
std
::
make_unique
<
CuspConfidenceMapSolver
>
(
width
,
height
,
gradientScaling
,
alpha
,
beta
,
gamma
);
}
\ No newline at end of file
modules/cudaconfidencemaps/core/cm.h
0 → 100644
View file @
ab9a7bff
#pragma once
typedef
unsigned
char
uint8
;
#include
<memory>
#include
<vector>
#include
<functional>
void
measureExecution
(
std
::
string
message
,
const
std
::
function
<
void
()
>
&
function
);
class
ConfidenceMapSolver
{
public:
virtual
~
ConfidenceMapSolver
()
{}
virtual
void
createSystem
(
const
uint8
*
image
,
int
width
,
int
height
)
=
0
;
virtual
void
setInitialSolution
(
const
std
::
vector
<
float
>
&
val
)
=
0
;
virtual
void
solve
()
=
0
;
virtual
const
float
*
getSolution
(
int
&
width
,
int
&
height
)
const
=
0
;
};
std
::
unique_ptr
<
ConfidenceMapSolver
>
createCUSPSolver
(
int
width
,
int
height
,
float
gradientScaling
,
float
alpha
,
float
beta
,
float
gamma
);
/*std::unique_ptr<ConfidenceMapSolver> createEIGENSolver(int width, int height, float gradientScaling,
float alpha, float beta, float gamma);*/
\ No newline at end of file
modules/cudaconfidencemaps/core/cm_cusp.cpp
0 → 100644
View file @
ab9a7bff
#include
"cm_cusp.h"
#include
"cm_cusp_cuda_exports.h"
CuspConfidenceMapSolver
::
CuspConfidenceMapSolver
(
int
width
,
int
height
,
float
gradientScaling
,
float
alpha
,
float
beta
,
float
gamma
)
:
width
(
width
),
height
(
height
),
gradientScaling
(
gradientScaling
),
alpha
(
alpha
),
beta
(
beta
),
gamma
(
gamma
),
gpuData
(
CUSP_CM_createGpuData
(
width
,
height
))
{
}
CuspConfidenceMapSolver
::~
CuspConfidenceMapSolver
()
{
CUSP_CM_destroyGpuData
(
gpuData
);
}
void
CuspConfidenceMapSolver
::
createSystem
(
const
uint8
*
image
,
int
width
,
int
height
)
{
if
(
width
!=
this
->
width
||
height
!=
this
->
height
)
{
CUSP_CM_resizeGpuData
(
*
gpuData
,
width
,
height
);
}
this
->
width
=
width
;
this
->
height
=
height
;
CUSP_CM_buildEquationSystem
(
*
gpuData
,
image
,
width
,
height
,
alpha
,
beta
,
gamma
,
gradientScaling
);
}
void
CuspConfidenceMapSolver
::
setInitialSolution
(
const
std
::
vector
<
float
>
&
val
)
{
CUSP_CM_setInitialSolution
(
*
gpuData
,
val
);
}
void
CuspConfidenceMapSolver
::
solve
()
{
measureExecution
(
" . uploading system"
,
[
=
]()
{
CUSP_CM_uploadSystem
(
*
gpuData
);
});
measureExecution
(
" . solving system"
,
[
=
]()
{
CUSP_CM_solveSystem
(
*
gpuData
,
100
,
1e-20
);
});
measureExecution
(
" . downloading solution"
,
[
=
]()
{
CUSP_CM_downloadSolution
(
*
gpuData
);
});
}
const
float
*
CuspConfidenceMapSolver
::
getSolution
(
int
&
width
,
int
&
height
)
const
{
const
float
*
ptr
=
CUSP_CM_getSolutionPtr
(
*
gpuData
);
width
=
width
;
height
=
height
;
// FIXME: height and width are not actually checked against the size of the buffer...
return
ptr
;
}
modules/cudaconfidencemaps/core/cm_cusp.cu
0 → 100644
View file @
ab9a7bff
#include
<vector>
#include
<cmath>
#include
<cusp/dia_matrix.h>
#include
<cusp/krylov/cg.h>
#include
<cusp/precond/diagonal.h>
#include
"cm_cusp_cuda_exports.h"
typedef
cusp
::
dia_matrix
<
int
,
float
,
cusp
::
host_memory
>
CuspHostDiaMatrix
;
typedef
cusp
::
dia_matrix
<
int
,
float
,
cusp
::
device_memory
>
CuspDeviceDiaMatrix
;
typedef
cusp
::
array1d
<
float
,
cusp
::
host_memory
>
CuspHostVector
;
typedef
cusp
::
array1d
<
float
,
cusp
::
device_memory
>
CuspDeviceVector
;
class
CuspGPUData
{
public:
CuspGPUData
(
int
width
,
int
height
)
:
L_h
(
width
*
height
,
width
*
height
,
width
*
height
*
9
,
9
),
b_h
(
width
*
height
),
x_h
(
width
*
height
),
L_d
(
width
*
height
,
width
*
height
,
width
*
height
*
9
,
9
),
b_d
(
width
*
height
),
x_d
(
width
*
height
)
{};
void
resize
(
int
width
,
int
height
)
{
this
->
L_h
.
resize
(
width
*
height
,
width
*
height
,
width
*
height
*
9
,
9
);
this
->
b_h
.
resize
(
width
*
height
);
this
->
x_h
.
resize
(
width
*
height
);
}
CuspHostDiaMatrix
L_h
;
CuspHostVector
b_h
;
CuspHostVector
x_h
;
CuspDeviceDiaMatrix
L_d
;
CuspDeviceVector
b_d
;
CuspDeviceVector
x_d
;
};
CuspGPUData
*
CUSP_CM_createGpuData
(
int
width
,
int
height
)
{
CuspGPUData
*
data
=
new
CuspGPUData
(
width
,
height
);
return
data
;
}
void
CUSP_CM_destroyGpuData
(
CuspGPUData
*
data
)
{
delete
data
;
}
void
CUSP_CM_resizeGpuData
(
CuspGPUData
&
data
,
int
width
,
int
height
)
{
data
.
resize
(
width
,
height
);
}
void
CUSP_CM_setInitialSolution
(
CuspGPUData
&
data
,
const
std
::
vector
<
float
>&
values
)
{
for
(
int
i
=
0
;
i
<
data
.
x_h
.
size
()
&&
i
<
values
.
size
();
++
i
)
{
data
.
x_h
[
i
]
=
values
[
i
];
}
}
void
CUSP_CM_uploadSystem
(
CuspGPUData
&
data
)
{
// Check for NaN values
/*
for (int r = 0; r < data.x_h.size(); ++r) {
for (int d = 0; d < 9; ++d) {
if (std::isnan(data.L_h.values(r, d))) {
std::cout << "NaN detected in A: " << r << std::endl;
break;
}
}
if (std::isnan(data.x_h[r])) {
std::cout << "NaN detected in x: " << r << std::endl;
}
if (std::isnan(data.b_h[r])) {
std::cout << "NaN detected in b: " << r << std::endl;
}
}*/
data
.
x_d
=
data
.
x_h
;
data
.
b_d
=
data
.
b_h
;
data
.
L_d
=
data
.
L_h
;
}
void
CUSP_CM_downloadSolution
(
CuspGPUData
&
data
)
{
data
.
x_h
=
data
.
x_d
;
// FIXME: Should not be needed... (Clamp solution)
for
(
int
i
=
0
;
i
<
data
.
x_h
.
size
();
++
i
)
{
data
.
x_h
[
i
]
=
min
(
1.0
f
,
data
.
x_h
[
i
]);
}
}
void
CUSP_CM_solveSystem
(
CuspGPUData
&
data
,
int
iterations
,
float
precision
)
{
cusp
::
default_monitor
<
float
>
monitor
(
data
.
b_d
,
iterations
,
precision
);
//cusp::precond::bridson_ainv<float, cusp::device_memory> M(A, 1);
cusp
::
precond
::
diagonal
<
float
,
cusp
::
device_memory
>
M
(
data
.
L_d
);
//cusp::krylov::bicgstab(A, x, b, monitor);
cusp
::
krylov
::
cg
(
data
.
L_d
,
data
.
x_d
,
data
.
b_d
,
monitor
,
M
);
}
const
float
*
CUSP_CM_getSolutionPtr
(
const
CuspGPUData
&
data
)
{
//const_cast<CuspGPUData&>(data).x_h = const_cast<CuspGPUData&>(data).x_d;
return
&
data
.
x_h
[
0
];
}
struct
ComputeLaplacianData
{
float
alpha
,
beta
,
gamma
;
float
gradientScaling
;
const
unsigned
char
*
image
;
int
width
,
height
;
int
centralDiagonal
;
int
offsets
[
9
];
float
gammaList
[
9
];
};
static
inline
float
_getAttenuation
(
int
y
,
int
height
,
float
alpha
)
{
return
(
1
-
exp
(
-
alpha
*
((
float
)
y
/
(
float
)(
height
-
1
))));
}
static
inline
float
_getGradient
(
const
ComputeLaplacianData
&
data
,
int
idx
,
int
offset
)
{
const
unsigned
char
*
image
=
data
.
image
;
int
y1
=
idx
/
data
.
width
;
int
y2
=
(
idx
+
offset
)
/
data
.
width
;
float
a1
=
_getAttenuation
(
y1
,
data
.
height
,
data
.
alpha
);
float
a2
=
_getAttenuation
(
y2
,
data
.
height
,
data
.
alpha
);
return
abs
(
image
[
idx
]
*
a1
/
255.0
f
-
image
[
idx
+
offset
]
*
a2
/
255.0
f
);
}
static
inline
float
_calculateWeight
(
float
gradient
,
float
beta
,
float
gamma
,
float
scaling
)
{
return
exp
(
-
beta
*
(
gradient
*
scaling
+
gamma
));
}
static
inline
float
_getWeight
(
const
ComputeLaplacianData
&
data
,
int
x
,
int
y
,
int
diagonal
)
{
float
gradient
=
_getGradient
(
data
,
y
*
data
.
width
+
x
,
data
.
offsets
[
diagonal
]);
float
weight
=
_calculateWeight
(
gradient
,
data
.
beta
,
data
.
gammaList
[
diagonal
],
data
.
gradientScaling
);
return
weight
+
1e-4
;
}
void
CUSP_CM_buildEquationSystem
(
CuspGPUData
&
gpudata
,
const
unsigned
char
*
image
,
int
width
,
int
height
,
float
alpha
,
float
beta
,
float
gamma
,
float
gradientScaling
)
{
// Gather all of the options together
ComputeLaplacianData
data
;
data
.
alpha
=
alpha
;
data
.
beta
=
beta
;
data
.
gamma
=
gamma
;
data
.
gradientScaling
=
1.0
f
/
0.1764
f
;
data
.
image
=
image
;
data
.
width
=
width
;
data
.
height
=
height
;
data
.
centralDiagonal
=
4
;
int
offsets
[
9
]
=
{
-
width
-
1
,
-
width
,
-
width
+
1
,
-
1
,
0
,
1
,
width
-
1
,
width
,
width
+
1
};
float
gammaList
[
9
]
=
{
sqrt
(
2.0
f
)
*
gamma
,
0.0
f
,
sqrt
(
2.0
f
)
*
gamma
,
gamma
,
0.0
f
,
gamma
,
sqrt
(
2.0
f
)
*
gamma
,
0.0
f
,
sqrt
(
2.0
f
)
*
gamma
};
for
(
int
i
=
0
;
i
<
9
;
++
i
)
{
data
.
offsets
[
i
]
=
offsets
[
i
];
data
.
gammaList
[
i
]
=
gammaList
[
i
];
}
// Prepare equation system data structure
for
(
int
i
=
0
;
i
<
9
;
++
i
)
{
gpudata
.
L_h
.
diagonal_offsets
[
i
]
=
data
.
offsets
[
i
];
}
// Reset B
for
(
int
x
=
0
;
x
<
width
*
height
;
++
x
)
{
gpudata
.
b_h
[
x
]
=
0.0
f
;
}
// Fill in first row of b, which sholud be one
for
(
int
x
=
0
;
x
<
width
;
++
x
)
{
gpudata
.
b_h
[
x
]
=
1.0
f
;
}
// Fill in rows of A corresponding to first and last row of the image
for
(
int
x
=
0
;
x
<
width
;
++
x
)
{
for
(
int
d
=
0
;
d
<
9
;
++
d
)
{
gpudata
.
L_h
.
values
(
x
,
d
)
=
(
d
==
data
.
centralDiagonal
?
1.0
f
:
0.0
f
);
gpudata
.
L_h
.
values
(
width
*
(
height
-
1
)
+
x
,
d
)
=
(
d
==
data
.
centralDiagonal
?
1.0
f
:
0.0
f
);
}
}
// Fill in the rest of the matrix
for
(
int
y
=
1
;
y
<
height
-
1
;
++
y
)
{
for
(
int
x
=
0
;
x
<
width
;
++
x
)
{
int
idx
=
y
*
width
+
x
;
// Filter off out-of-bounds edges
unsigned
short
filter
=
495
;
// 111 101 111
if
(
x
==
0
)
filter
&=
203
;
// 011 001 011
if
(
x
==
width
-
1
)
filter
&=
422
;
// 110 100 110
if
(
y
==
1
)
filter
&=
47
;
// 000 101 111
if
(
y
==
height
-
2
)
filter
&=
488
;
// 111 101 000
float
valueSum
=
0.0
f
;
for
(
int
d
=
0
;
d
<
9
;
++
d
)
{
gpudata
.
L_h
.
values
(
idx
,
d
)
=
0
;
float
value
=
0.0
f
;
if
(((
256
>>
d
)
&
filter
)
!=
0
)
{
value
=
_getWeight
(
data
,
x
,
y
,
d
);
gpudata
.
L_h
.
values
(
idx
,
d
)
=
-
value
;
}
else
if
(
y
==
1
||
y
==
height
-
2
)
{
unsigned
short
filter2
=
495
;
// 111 101 11
if
(
x
==
0
)
filter2
&=
203
;
// 011 001 011
if
(
x
==
width
-
1
)
filter2
&=
422
;
// 110 100 110
if
(((
256
>>
d
)
&
filter2
)
!=
0
)
{
value
=
_getWeight
(
data
,
x
,
y
,
d
);
if
(
y
==
1
)
gpudata
.
b_h
[
idx
]
+=
value
;
}
}
valueSum
+=
value
;
}
gpudata
.
L_h
.
values
(
idx
,
data
.
centralDiagonal
)
=
valueSum
;
}
}
}
\ No newline at end of file
modules/cudaconfidencemaps/core/cm_cusp.h
0 → 100644
View file @
ab9a7bff
#pragma once
#include
"cm.h"
class
CuspGPUData
;
/* Laplacian construction parameters */
class
CuspConfidenceMapSolver
:
public
ConfidenceMapSolver
{
public:
CuspConfidenceMapSolver
(
int
width
,
int
height
,
float
gradientScaling
,
float
alpha
,
float
beta
,
float
gamma
);
virtual
~
CuspConfidenceMapSolver
();
virtual
void
createSystem
(
const
uint8
*
image
,
int
width
,
int
height
);
virtual
void
setInitialSolution
(
const
std
::
vector
<
float
>
&
val
);
virtual
void
solve
();
virtual
const
float
*
getSolution
(
int
&
width
,
int
&
height
)
const
;
private:
// Input image data
int
width
,
height
;
// Solver parameters
float
alpha
,
beta
,
gamma
;
float
gradientScaling
;
// Matrices and Vectors
CuspGPUData
*
gpuData
;
};
\ No newline at end of file
modules/cudaconfidencemaps/core/cm_cusp_build_system.cu
0 → 100644
View file @
ab9a7bff
/*<
__global__ void k_buildCMSystem(cudaTextureObject_t image, int width, int height,
float alpha, float beta, float gamma,
float *out_weights)
{
int x = blockIdx.x * blockDim.x + threadIdx.x;
int y = blockIdx.y * blockDim.y + threadIdx.y;
int idx = y * width + x;
if (x >= width || y >= height) return;
float attenuation_center = exp(-alpha * (float)y / height);
float attenuation_bottom = exp(-alpha * (float)(y+1) / height);
float c_center = tex2D(image, x, y) * attenuation_center;
float c_right = tex2D(image, x+1, y) * attenuation_center;
float c_bottom = tex2D(image, x, y+1) * attenuation_bottom;
float c_se = tex2D(image, x+1, y+1) * attenuation_bottom;
float c_sw = tex2D(image, x-1, y+1) * attenuation_bottom;
float w_right = exp(-beta * (abs(c_center - c_right) + gamma));
float w_bottom = exp(-beta * (abs(c_center - c_bottom)));
float w_se = exp(-beta * (abs(c_center - c_se) + sqrt(2.0f) * gamma));
float w_sw = exp(-beta * (abs(c_center - c_sw) + sqrt(2.0f) * gamma));
out_weights[idx*4 + 0] = w_right;
out_weights[idx*4 + 1] = w_sw;
out_weights[idx*4 + 2] = w_bottom;
out_weights[idx*4 + 3] = w_se;
}
int main()
{
// Create texture object
cudaResourceDesc resDesc;
memset(&resDesc, 0, sizeof(resDesc));
resDesc.resType = cudaResourceTypePitch2D
return 0;
}*/
\ No newline at end of file
modules/cudaconfidencemaps/core/cm_cusp_cuda_exports.h
0 → 100644
View file @
ab9a7bff
#pragma once
#include
<vector>
class
CuspGPUData
;
CuspGPUData
*
CUSP_CM_createGpuData
(
int
width
,
int
height
);
void
CUSP_CM_destroyGpuData
(
CuspGPUData
*
data
);
void
CUSP_CM_resizeGpuData
(
CuspGPUData
&
data
,
int
width
,
int
height
);
void
CUSP_CM_buildEquationSystem
(
CuspGPUData
&
data
,
const
unsigned
char
*
image
,
int
width
,
int
height
,
float
alpha
,
float
beta
,
float
gamma
,
float
gradientScaling
);
void
CUSP_CM_setInitialSolution
(
CuspGPUData
&
data
,
const
std
::
vector
<
float
>&
values
);
void
CUSP_CM_uploadSystem
(
CuspGPUData
&
data
);
void
CUSP_CM_downloadSolution
(
CuspGPUData
&
data
);
void
CUSP_CM_solveSystem
(
CuspGPUData
&
data
,
int
iterations
,
float
precision
);
const
float
*
CUSP_CM_getSolutionPtr
(
const
CuspGPUData
&
data
);
\ No newline at end of file
modules/cudaconfidencemaps/cudaconfidencemaps.cmake
0 → 100644
View file @
ab9a7bff
# CMake file for vis module
IF
(
${
ModuleEnabled
}
)
# Find CUDA
FIND_PACKAGE
(
CUDA REQUIRED
)
if
(
CUDA_FOUND
)
# Source files:
FILE
(
GLOB ThisModSources RELATIVE
${
ModulesDir
}
modules/cudaconfidencemaps/pipelines/*.cpp
modules/cudaconfidencemaps/processors/*.cpp
)
# Header files (including GLSL files so that they'll appear in VS projects)
FILE
(
GLOB ThisModHeaders RELATIVE
${
ModulesDir
}
modules/cudaconfidencemaps/glsl/*.frag
modules/cudaconfidencemaps/glsl/*.geom
modules/cudaconfidencemaps/glsl/*.vert
modules/cudaconfidencemaps/pipelines/*.h
modules/cudaconfidencemaps/processors/*.h
)
SET
(
ThisModShaderDirectories
"modules/cudaconfidencemaps/glsl"
)
SET
(
ThisModDependencies base io
)
else
()
MESSAGE
(
FATAL_ERROR
"Could not find CUDA SDK."
)
endif
()
ENDIF
(
${
ModuleEnabled
}
)
SET
(
ThisModStatus EXPERIMENTAL
)
SET
(
ThisModExternalDependencies TRUE
)
modules/cudaconfidencemaps/ext/cusplibrary-0.4.0/.gitignore
0 → 100644
View file @
ab9a7bff
syntax: glob
# Ignore files left by editors
*~
*.bak
*.swp
# Ignore compiled files
*.o
*.obj
*.exe
*.dll
*.pyc
*.so
*.pdb
# Ignore CUDA auxiliary files
*.linkinfo
*.cubin
# Other things to ignore
*.vtu
*.dblite
nvcc_options_file.txt
print_sm_version
tester
# Ignore build files
#<AutoIgnore>
examples/Solvers/gmres
examples/Solvers/bicgstab
examples/Solvers/cg_m
examples/Solvers/cg
examples/InputOutput/matrix_market
examples/MatrixFormats/csr
examples/MatrixFormats/ell
examples/MatrixFormats/hyb
examples/MatrixFormats/dia
examples/MatrixFormats/coo
examples/MatrixAssembly/unordered_triplets
examples/Gallery/diffusion
examples/Gallery/poisson
examples/Monitors/convergence_monitor
examples/Monitors/verbose_monitor
examples/Monitors/default_monitor
examples/Preconditioners/ainv
examples/Preconditioners/smoothed_aggregation
examples/Preconditioners/diagonal
examples/Views/csr
examples/Views/cg_raw
examples/Views/array1d
examples/Views/csr_raw
examples/Views/array2d_raw
examples/Algorithms/maximal_independent_set
examples/Algorithms/transpose
examples/Algorithms/multiply
examples/Algorithms/blas
examples/LinearOperator/stencil
examples/Version/version
#<AutoIgnore/>
modules/cudaconfidencemaps/ext/cusplibrary-0.4.0/CHANGELOG
0 → 100644
View file @
ab9a7bff
#######################################
# Cusp v0.4.0 #
#######################################
Summary
Cusp v0.4.0 provides support for CUDA 5.5 and Thrust v1.7. Version
0.4.0 includes several new graph algorithms based on backend integration
with back40computing, a refactored smoothed aggregation solver allowing
user-defined functions for core components and experimental work concerning
sparse matrix visualization.
New Features
array2d formats allow access to row and column elements regardless of orientation
gallery method to generate grids
OpenGL implementation of spy to view sparse matrices
Generalized multilevel class
Added Sparse * Dense multiplication routine
Algorithms
cusp::graph::bfs
cusp::graph::connected_components
cusp::graph::hilbert_curve
cusp::graph::maximum_flow
cusp::graph::max_flow_to_min_cut
cusp::graph::pseudo_peripheral_vertex
cusp::graph::symmetric_rcm
cusp::krylov::cr
Bug Fixes
Resolved issues related to Thrust 1.6 & Thrust 1.7
Resolved issues related to CUDA 5.5
Silenced warnings concerning uninitialized memory in generalized SpMV
#######################################
# Cusp v0.3.1 #
#######################################
Summary
Minor compatibility fixes for Thrust 1.6.x
#######################################
# Cusp v0.3.0 #
#######################################
Summary
Cusp v0.3.0 provides support for CUDA 4.1 and Thrust v1.5. Version
0.3.0 includes incremental improvements to MatrixMarket I/O support
and array format conversions.
New Features
MatrixMarket I/O routines support complex matrices
array1d is convertible to and from all other formats (e.g. array1d<->array2d)
Bug Fixes
Resize final output of CSR * CSR to account for excluded zeros
Acknowledgements
Justin Luitjens and Simon Layton for diagnosing CSR SpMM bug
Kashif Rasul for Thrust v1.6 support patch
Filipe Maia and Steven Dalton for general support and maintenance