Commit e82f00cf authored by Declara Denis's avatar Declara Denis Committed by Christian Schulte zu Berge
Browse files

Implemented confidence-maps equation-system creation using CUDA

On my machine this means a performance increase from 8ms to <1ms. Time that can
be spent running more iterations of the CG algorithm to get more accurate solutions.
parent d309ea25
......@@ -9,6 +9,8 @@ typedef cusp::dia_matrix<int, float, cusp::device_memory> CuspDeviceDiaMatrix;
typedef cusp::array1d<float, cusp::host_memory> CuspHostVector;
typedef cusp::array1d<float, cusp::device_memory> CuspDeviceVector;
#define CUDACONFIDENCEMAPS_USE_GPU_FOR_SYSTEM_CREATION 1;
namespace campvis {
namespace cuda {
......@@ -27,6 +29,8 @@ namespace cuda {
int imageWidth;
int imageHeight;
float solutionResidualNorm;
float systemCreationTime;
float systemSolveTime;
};
struct ComputeLaplacianData
......@@ -50,6 +54,121 @@ namespace cuda {
_gpuData->solutionResidualNorm = 0.0f;
}
void CudaConfidenceMapsSystemSolver::uploadImage(const unsigned char* imageData, int imageWidth, int imageHeight,
float gradientScaling, float alpha, float beta, float gamma,
bool useGPU, bool isUpsideDown) {
resizeDataStructures(imageWidth, imageHeight, isUpsideDown);
// Measure execution time
cudaEvent_t start, stop;
cudaEventCreate(&start);
cudaEventCreate(&stop);
cudaEventRecord(start, 0);
if (useGPU) {
createSystemGPU(imageData, imageWidth, imageHeight, gradientScaling, alpha, beta, gamma, isUpsideDown);
} else {
createSystemCPU(imageData, imageWidth, imageHeight, gradientScaling, alpha, beta, gamma, isUpsideDown);
}
cudaEventRecord(stop, 0);
cudaEventSynchronize(stop);
cudaEventElapsedTime(&_gpuData->systemCreationTime, start, stop);
}
void CudaConfidenceMapsSystemSolver::resetSolution() {
int width = _gpuData->imageWidth;
int height = _gpuData->imageHeight;
bool isUpsideDown = _gpuData->isUpsideDown;
for (int y = 0; y < height; ++y) {
for (int x = 0; x < width; ++x) {
float value;
if (!isUpsideDown)
value = y / (height - 1.0f);
else
value = 1.0f - y / (height - 1.0f);
_gpuData->x_h[y * width + x] = value;
}
}
// Uplaod solution vector on the GPU
_gpuData->x_d = _gpuData->x_h;
}
void CudaConfidenceMapsSystemSolver::solve(int maximumIterations, float errorTolerance) {
// Measure execution time
cudaEvent_t start, stop;
cudaEventCreate(&start);
cudaEventCreate(&stop);
cudaEventRecord(start, 0);
cusp::default_monitor<float> monitor(_gpuData->b_d, maximumIterations, errorTolerance);
cusp::precond::diagonal<float, cusp::device_memory> M(_gpuData->L_d);
cusp::krylov::cg(_gpuData->L_d, _gpuData->x_d, _gpuData->b_d, monitor, M);
// Downlaod data
_gpuData->x_h = _gpuData->x_d;
_gpuData->solutionResidualNorm = monitor.residual_norm();
cudaEventRecord(stop, 0);
cudaEventSynchronize(stop);
cudaEventElapsedTime(&_gpuData->systemSolveTime, start, stop);
}
const float* CudaConfidenceMapsSystemSolver::getSolution(int& width, int& height) {
width = _gpuData->imageWidth;
height = _gpuData->imageHeight;
return &_gpuData->x_h[0];
}
float CudaConfidenceMapsSystemSolver::getSolutionResidualNorm() const {
return _gpuData->solutionResidualNorm;
}
float CudaConfidenceMapsSystemSolver::getSystemCreationTime() const {
return _gpuData->systemCreationTime;
}
float CudaConfidenceMapsSystemSolver::getSystemSolveTime() const {
return _gpuData->systemSolveTime;
}
void CudaConfidenceMapsSystemSolver::resizeDataStructures(int imageWidth, int imageHeight, bool isUpsideDown) {
// If the problem size changed, reset the solution vector, as well as resizing the vectors and matrices
if (_gpuData->imageWidth != imageWidth || _gpuData->imageHeight != imageHeight || _gpuData->isUpsideDown != isUpsideDown) {
// Resize the system vectors and matrices to accomodate the different image sze
_gpuData->imageWidth = imageWidth;
_gpuData->imageHeight = imageHeight;
int numElements = imageWidth * imageHeight;
_gpuData->x_h.resize(numElements);
_gpuData->b_d.resize(numElements);
_gpuData->x_d.resize(numElements);
_gpuData->L_d.resize(numElements, numElements, numElements * 9, 9);
_gpuData->L_h.resize(numElements, numElements, numElements * 9, 9);
// Set the b vector to 0 (except for the row corresponding to the seed points set to 1)
if (isUpsideDown) {
CuspDeviceVector::view lastRow(_gpuData->b_d.begin() + (numElements - imageWidth), _gpuData->b_d.end());
CuspDeviceVector::view rest(_gpuData->b_d.begin(), _gpuData->b_d.begin() + (numElements - imageWidth));
cusp::blas::fill(rest, 0.0f);
cusp::blas::fill(lastRow, 1.0f);
}
else {
CuspDeviceVector::view firstRow(_gpuData->b_d.begin(), _gpuData->b_d.begin() + imageWidth);
CuspDeviceVector::view rest(_gpuData->b_d.begin() + imageWidth, _gpuData->b_d.end());
cusp::blas::fill(firstRow, 1.0f);
cusp::blas::fill(rest, 0.0f);
}
resetSolution();
}
}
static inline float _getWeight(const ComputeLaplacianData &data, int x, int y, int diagonal)
{
const unsigned char *image = data.image;
......@@ -66,11 +185,9 @@ namespace cuda {
return weight + 1e-4;
}
void CudaConfidenceMapsSystemSolver::uploadImage(const unsigned char* imageData, int imageWidth, int imageHeight,
float gradientScaling, float alpha, float beta, float gamma,
bool isUpsideDown) {
resizeDataStructures(imageWidth, imageHeight, isUpsideDown);
void CudaConfidenceMapsSystemSolver::createSystemCPU(const unsigned char* imageData, int imageWidth, int imageHeight,
float gradientScaling, float alpha, float beta, float gamma,
bool isUpsideDown) {
// Gather all the parameters needed to create the system in one place
ComputeLaplacianData data;
data.alpha = alpha;
......@@ -88,6 +205,7 @@ namespace cuda {
data.offsets[i] = offsets[i];
data.gammaList[i] = gammaList[i];
_gpuData->L_h.diagonal_offsets[i] = offsets[i];
_gpuData->L_d.diagonal_offsets[i] = offsets[i];
}
// Precompute attenuation tables
......@@ -116,15 +234,13 @@ namespace cuda {
if (y == 0 || y == imageHeight - 1) valueSum = 1.0f;
for (int d = 0; d < 9; ++d) {
_gpuData->L_h.values(idx, d) = 0;
float value = 0.0f;
if (((256>>d) & filter) != 0) {
value = _getWeight(data, x, y, d);
_gpuData->L_h.values(idx, d) = -value;
}
_gpuData->L_h.values(idx, d) = -value;
valueSum += value;
}
......@@ -136,74 +252,85 @@ namespace cuda {
_gpuData->L_d = _gpuData->L_h;
}
void CudaConfidenceMapsSystemSolver::resetSolution() {
int width = _gpuData->imageWidth;
int height = _gpuData->imageHeight;
bool isUpsideDown = _gpuData->isUpsideDown;
for (int y = 0; y < height; ++y) {
for (int x = 0; x < width; ++x) {
float value;
if (!isUpsideDown)
value = y / (height - 1.0f);
else
value = 1.0f - y / (height - 1.0f);
_gpuData->x_h[y * width + x] = value;
}
}
// Uplaod solution vector on the GPU
_gpuData->x_d = _gpuData->x_h;
static __device__ float d_getWeight(float v1, float v2, float gradientScaling, float beta, float gamma)
{
float grad = abs(v1 - v2) * gradientScaling / 255.0f;
return exp(-beta * (grad + gamma)) + 1e-4;
}
void CudaConfidenceMapsSystemSolver::solve(int maximumIterations, float errorTolerance) {
cusp::default_monitor<float> monitor(_gpuData->b_d, maximumIterations, errorTolerance);
cusp::precond::diagonal<float, cusp::device_memory> M(_gpuData->L_d);
cusp::krylov::cg(_gpuData->L_d, _gpuData->x_d, _gpuData->b_d, monitor, M);
_gpuData->solutionResidualNorm = monitor.residual_norm();
}
static __global__ void k_buildSystem(float* L, int pitch, const unsigned char* image, int width, int height,
float gradientScaling, float alpha, float beta, float gamma)
{
const float* CudaConfidenceMapsSystemSolver::getSolution(int& width, int& height) {
// Downlaod data
_gpuData->x_h = _gpuData->x_d;
return &_gpuData->x_h[0];
}
const int x = blockDim.x * blockIdx.x + threadIdx.x;
const int y = blockDim.y * blockIdx.y + threadIdx.y;
const int pidx = y * width + x;
if (x >= width || y >= height) return;
const float gamma_sq2 = gamma * 1.4142; // Fixme....
const int offsets[] = {-width-1, -width, -width+1, -1, 0, 1, width-1, width, width+1};
const float gammas[] = {gamma_sq2, 0.0f, gamma_sq2, gamma, 0.0f, gamma, gamma_sq2, 0.0f, gamma_sq2};
const float attenuations[] = {
1.0f - (y - 1.0f) / (height - 1.0f),
1.0f - (y ) / (height - 1.0f),
1.0f - (y + 1.0f) / (height - 1.0f)
};
// Filter off out-of-bounds edges
unsigned short filter = 495; // 111 101 111
// 8 - neighbourhood filter
if (x == 0) filter &= 203; // 011 001 011
if (x == width-1) filter &= 422; // 110 100 110
if (y == 0) filter &= 47; // 000 101 111
if (y == height-1) filter &= 488; // 111 101 000
// get central pixel
float centralValue = image[pidx] * attenuations[1];
// If the pixel is at the top or at the bottom, add a value of 1 to the diagonal, to
// account for the edge to the seed points
float valueSum = 0.0f;
if (y == 0 || y == height - 1)
valueSum = 1.0f;
for (int d = 0; d < 9; ++d) {
float value = 0.0f;
if (((256>>d) & filter) != 0) {
int pidx_2 = pidx + offsets[d];
float v = image[pidx_2] * attenuations[d/3];
value = d_getWeight(centralValue, v, gradientScaling, beta, gammas[d]);
}
float CudaConfidenceMapsSystemSolver::getSolutionResidualNorm() const {
return _gpuData->solutionResidualNorm;
L[d * pitch + pidx] = -value;
valueSum += value;
}
L[4 * pitch + pidx] = valueSum;
}
void CudaConfidenceMapsSystemSolver::resizeDataStructures(int imageWidth, int imageHeight, bool isUpsideDown) {
// If the problem size changed, reset the solution vector, as well as resizing the vectors and matrices
if (_gpuData->imageWidth != imageWidth || _gpuData->imageHeight != imageHeight || _gpuData->isUpsideDown != isUpsideDown) {
// Resize the system vectors and matrices to accomodate the different image sze
_gpuData->imageWidth = imageWidth;
_gpuData->imageHeight = imageHeight;
int numElements = imageWidth * imageHeight;
_gpuData->x_h.resize(numElements);
_gpuData->b_d.resize(numElements);
_gpuData->x_d.resize(numElements);
_gpuData->L_d.resize(numElements, numElements, numElements * 9, 9);
_gpuData->L_h.resize(numElements, numElements, numElements * 9, 9);
void CudaConfidenceMapsSystemSolver::createSystemGPU(const unsigned char* imageData, int imageWidth, int imageHeight,
float gradientScaling, float alpha, float beta, float gamma,
bool isUpsideDown) {
// Set the b vector to 0 (except for the row corresponding to the seed points set to 1)
if (isUpsideDown) {
CuspDeviceVector::view lastRow(_gpuData->b_d.begin() + (numElements - imageWidth), _gpuData->b_d.end());
CuspDeviceVector::view rest(_gpuData->b_d.begin(), _gpuData->b_d.begin() + (numElements - imageWidth));
cusp::blas::fill(rest, 0.0f);
cusp::blas::fill(lastRow, 1.0f);
}
else {
CuspDeviceVector::view firstRow(_gpuData->b_d.begin(), _gpuData->b_d.begin() + imageWidth);
CuspDeviceVector::view rest(_gpuData->b_d.begin() + imageWidth, _gpuData->b_d.end());
cusp::blas::fill(firstRow, 1.0f);
cusp::blas::fill(rest, 0.0f);
}
int offsets[9] = {-imageWidth-1, -imageWidth, -imageWidth+1, -1, 0, 1, imageWidth-1, imageWidth, imageWidth+1};
resetSolution();
for (int i = 0; i < 9; ++i) {
_gpuData->L_d.diagonal_offsets[i] = offsets[i];
}
int numElements = imageWidth * imageHeight;
dim3 dimBlock(32, 32, 1);
dim3 dimGrid((imageWidth + 31) / 32, (imageHeight + 31) / 32, 1);
unsigned char *deviceImage;
cudaMalloc((void**)&deviceImage, numElements);
cudaMemcpy(deviceImage, imageData, numElements, cudaMemcpyHostToDevice);
k_buildSystem<<<dimGrid, dimBlock>>>(thrust::raw_pointer_cast(&_gpuData->L_d.values.values[0]), _gpuData->L_d.values.pitch,
deviceImage, imageWidth, imageHeight,
gradientScaling, alpha, beta, gamma);
cudaFree(deviceImage);
}
} // cuda
......
......@@ -25,7 +25,7 @@ namespace cuda {
* \param isUpsideDown if set to true, the image is interpreted as being upside down (as common in OpenGL)
*/
void uploadImage(const unsigned char* imageData, int imageWidth, int imageHeight,
float gradientScaling, float alpha, float beta, float gamma, bool isUpsideDown=true);
float gradientScaling, float alpha, float beta, float gamma, bool useGPU, bool isUpsideDown=true);
/**
* Resets the current solution vector to a linear fallof from white (top of the image) to black
......@@ -56,8 +56,25 @@ namespace cuda {
*/
float getSolutionResidualNorm() const;
/**
* Returns the number of milliseconds that were needed to build the equation system
* when calling \see uploadImage()
*/
float getSystemCreationTime() const;
/**
* Returns the number of milliseconds that were needed to solve the system when
* \see solve() was called
*/
float getSystemSolveTime() const;
private:
void resizeDataStructures(int imageWidth, int imageHeight, bool isUpsideDown);
void createSystemCPU(const unsigned char* imageData, int imageWidth, int imageHeight,
float gradientScaling, float alpha, float beta, float gamma, bool isUpsideDown=true);
void createSystemGPU(const unsigned char* imageData, int imageWidth, int imageHeight,
float gradientScaling, float alpha, float beta, float gamma, bool isUpsideDown=true);
CudaConfidenceMapsSystemGPUData *_gpuData;
};
......
......@@ -32,8 +32,6 @@
#include "core/datastructures/imagerepresentationgl.h"
#include "core/tools/stringutils.h"
#include "modules/cudaconfidencemaps/core/cm_cusp_cuda_exports.h"
namespace campvis {
const std::string CudaConfidenceMapsSolver::loggerCat_ = "CAMPVis.modules.cudaconfidencemaps.solver";
......@@ -47,6 +45,8 @@ namespace campvis {
, p_paramAlpha("Alpha", "Alpha (TGC)", 2.0f, 0.001, 10)
, p_paramBeta("Beta", "Beta (Weight mapping)", 20.0f, 0.001, 200)
, p_paramGamma("Gamma", "Gamma (Diagonal penalty)", 0.03f, 0.001, 0.5)
, p_createSystemOnGPU("CreateSystemOnGPU", "Use the GPU to build the equation system", true)
, p_printStatistics("PrintStatistics", "Print execution times and residual values", false)
, _solver()
{
......@@ -59,6 +59,9 @@ namespace campvis {
addProperty(p_paramAlpha);
addProperty(p_paramBeta);
addProperty(p_paramGamma);
addProperty(p_createSystemOnGPU);
addProperty(p_printStatistics);
}
CudaConfidenceMapsSolver::~CudaConfidenceMapsSolver() { }
......@@ -87,7 +90,7 @@ namespace campvis {
size_t elementCount = cgt::hmul(size);
auto image = (unsigned char*)img->getWeaklyTypedPointer()._pointer;
_solver.uploadImage(image, size.x, size.y, gradientScaling, alpha, beta, gamma);
_solver.uploadImage(image, size.x, size.y, gradientScaling, alpha, beta, gamma, p_createSystemOnGPU.getValue());
_solver.solve(iterations, 1e-10);
const float *solution = _solver.getSolution(size.x, size.y);
......@@ -101,7 +104,11 @@ namespace campvis {
id->setMappingInformation(img->getParent()->getMappingInformation());
data.addData(p_outputConfidenceMap.getValue(), id);
std::cout << "Residual: " << _solver.getSolutionResidualNorm() << std::endl;
if (p_printStatistics.getValue()) {
std::cout << "Residual: " << _solver.getSolutionResidualNorm() << std::endl;
std::cout << "System Creation Time: " << _solver.getSystemCreationTime() << std::endl;
std::cout << "System Solve Time: " << _solver.getSystemSolveTime() << std::endl;
}
}
}
......
......@@ -91,6 +91,8 @@ namespace campvis {
FloatProperty p_paramAlpha;
FloatProperty p_paramBeta;
FloatProperty p_paramGamma;
BoolProperty p_createSystemOnGPU;
BoolProperty p_printStatistics;
protected:
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment