11.3.2021, 9:00 - 11:00: Due to updates GitLab may be unavailable for some minutes between 09:00 and 11:00.

Commit 376f75c4 authored by Benedikt Zoennchen's avatar Benedikt Zoennchen

OpenCL bug-fix: memory leaks.

parent 50d9ef0d
......@@ -17,6 +17,7 @@ public class CLGaussianCalculator {
private SimulationModel<DefaultSimulationConfig> model;
private IGaussianFilter filterObstacles;
private IGaussianFilter filterPedestrians;
public CLGaussianCalculator(final SimulationModel model,
final double scale,
......@@ -44,7 +45,7 @@ public class CLGaussianCalculator {
IGaussianFilter.Type.OpenCL);
filterPedestrians.filterImage();
filterObstacles.filterImage();
filterPedestrians.destroy();
return convertFilterToImage(filterPedestrians, filterObstacles);
}
......@@ -56,7 +57,7 @@ public class CLGaussianCalculator {
int maxColorValue = 255 * 255 * 255;
ColorHelper colorHelper = new ColorHelper(maxColorValue);
// double max = filter.getMaxFilteredValue();
// double bound = filter.getMaxFilteredValue();
double max = 1.00;
double factor = maxColorValue / max;
System.out.println(filterPedestrians.getMaxFilteredValue()); // 0.1259
......@@ -80,6 +81,10 @@ public class CLGaussianCalculator {
return image;
}
public void destroy() {
this.filterObstacles.destroy();
}
/**
* Helper method which create a new standard BufferedImage with the needed
* configurations.
......
......@@ -7,6 +7,7 @@ import java.awt.Stroke;
import java.awt.geom.Path2D;
import java.awt.geom.Rectangle2D;
import java.awt.image.BufferedImage;
import java.util.Collection;
import java.util.stream.Stream;
import org.apache.log4j.LogManager;
......@@ -17,6 +18,7 @@ import org.vadere.gui.components.utils.ColorHelper;
import org.vadere.gui.components.utils.Resources;
import org.vadere.state.scenario.Agent;
import org.vadere.util.geometry.shapes.VPoint;
import org.vadere.util.geometry.shapes.VTriangle;
public abstract class SimulationRenderer extends DefaultRenderer {
......@@ -133,6 +135,12 @@ public abstract class SimulationRenderer extends DefaultRenderer {
// g.setStroke(stroke);
}
protected void renderTriangulation(final Graphics2D g, final Collection<VTriangle> triangleList) {
g.setColor(Color.GRAY);
g.setStroke(new BasicStroke(getGridLineWidth()));
triangleList.stream().forEach(triangle -> g.draw(triangle));
}
private void renderDensity(final Graphics2D g) {
CLGaussianCalculator densityCalculator = new CLGaussianCalculator(model, model.config.getDensityScale(),
model.config.getDensityMeasurementRadius(),
......@@ -158,6 +166,7 @@ public abstract class SimulationRenderer extends DefaultRenderer {
g.drawImage(densityImage, 0, 0, null);
// g.drawImage(pedestrianDensity, 0, 0, null);
g.scale(model.config.getDensityScale(), model.config.getDensityScale());
densityCalculator.destroy();
}
private void renderPotentialField(final Graphics2D g, final int width, final int height) {
......@@ -192,4 +201,8 @@ public abstract class SimulationRenderer extends DefaultRenderer {
}
protected abstract void renderSimulationContent(final Graphics2D g);
private float getGridLineWidth() {
return (float) (0.5 / model.getScaleFactor());
}
}
\ No newline at end of file
......@@ -13,11 +13,17 @@ class CLGaussianFilter extends GaussianFilter {
CLGaussianFilter(final Rectangle2D scenarioBounds, final double scale, final BiFunction<Integer, Integer, Float> f,
final boolean normalize) throws IOException {
super(scenarioBounds, scale, f, normalize);
this.convolution = new CLConvolution();
this.convolution = new CLConvolution(matrixWidth, matrixHeight, kernelWidth, kernel);
this.convolution.init();
}
@Override
public void filterImage() {
outputMatrix = this.convolution.convolveSeparate(inputMatrix, matrixWidth, matrixHeight, kernel, kernelWidth);
outputMatrix = this.convolution.convolve(inputMatrix);
}
@Override
public void destroy() {
this.convolution.clearCL();
}
}
......@@ -50,6 +50,7 @@ abstract class GaussianFilter implements IGaussianFilter {
kernelWidth = kernelWidth % 2 == 0 ? kernelWidth + 1 : kernelWidth;
kernelHeight = kernelWidth;
this.kernel = Convolution.floatGaussian1DKernel(kernelWidth, f, noramized);
//this.kernel = Convolution.generateFloatGaussianKernel(kernelWidth, 0.1f);
}
@Override
......
......@@ -46,7 +46,7 @@ public interface IGaussianFilter {
void setInputValue(final int x, final int y, final double value);
/** refresh or update the values of the image that contains all values. */
/** refresh or update the values of the image that triangleContains all values. */
void filterImage();
void clear();
......@@ -61,6 +61,11 @@ public interface IGaussianFilter {
double getMinFilteredValue();
/**
* This method has to be called if the Filter will no longer called!
*/
void destroy();
static <E extends Agent> IGaussianFilter create(final Rectangle2D scenarioBounds,
Collection<E> pedestrians, final double scale,
final double standardDerivation,
......
......@@ -16,4 +16,7 @@ public class JGaussianFilter extends GaussianFilter {
public void filterImage() {
outputMatrix = Convolution.convolveSeperate(inputMatrix, kernel, kernel, matrixWidth, matrixHeight, kernelWidth);
}
@Override
public void destroy() {}
}
......@@ -68,6 +68,11 @@ public class ObstacleGaussianFilter implements IGaussianFilter {
return filter.getMinFilteredValue();
}
@Override
public void destroy() {
this.filter.destroy();
}
private void setValues() {
for (int x = 0; x < getMatrixWidth(); x++) {
for (int y = 0; y < getMatrixHeight(); y++) {
......
......@@ -68,7 +68,6 @@ public class PedestrianGaussianFilter<E extends Pedestrian> implements IGaussian
@Override
public void filterImage() {
setValues();
filter.filterImage();
}
......@@ -98,6 +97,11 @@ public class PedestrianGaussianFilter<E extends Pedestrian> implements IGaussian
return filter.getMinFilteredValue();
}
@Override
public void destroy() {
this.filter.destroy();
}
private void setValue(E pedestrian) {
VPoint position = pedestrian.getPosition();
VPoint filteredPosition = new VPoint(Math.max(0, position.x), Math.max(0, position.y));
......
......@@ -93,6 +93,7 @@ public class QueueDetector extends EikonalSolverFMM {
}
});
orderedPoints.clear();
filter.destroy();
super.initialize();
}
......
//KERNEL_SIMPLE with crop strategy
/*__kernel void convolve(const __global float * pInput,
__constant float * pFilter,
__global float * pOutput,
const int nInWidth,
const int nInHeight,
const int nFilterWidth)
{
int nWidth = get_global_size(0);
int xOut = get_global_id(0);
int yOut = get_global_id(1);
if(xOut < nInWidth && yOut < nInHeight) {
int bottomBorder = yOut + nFilterWidth / 2;
bottomBorder = bottomBorder < nInHeight ? bottomBorder : nInHeight-1;
int topBorder = yOut - (nFilterWidth / 2);
topBorder = topBorder > 0 ? topBorder : 0;
int rightBorder = xOut + nFilterWidth / 2;
rightBorder = rightBorder < nInWidth ? rightBorder : nInWidth - 1;
int leftBorder = xOut - (nFilterWidth / 2) > 0;
leftBorder = leftBorder > 0 ? leftBorder : 0;
float sum = 0;
int kernelX = 0;
int kernelY = 0;
for(int y = topBorder; y <= bottomBorder; y++) {
for(int x = leftBorder; x <= rightBorder; x++) {
int inputIndex = y * nInWidth + x;
int kernelIndex = kernelY * nFilterWidth + kernelX;
sum += pFilter[kernelIndex] * pInput[inputIndex];
kernelX++;
}
kernelX = 0;
kernelY++;
}
int idxOut = yOut * nInWidth + xOut;
pOutput[idxOut] = sum;
}
}*/
__kernel void convolve(const __global float * pInput,
__constant float * pFilter,
__global float * pOutput,
......@@ -68,10 +113,14 @@ __kernel void convolveRow(const __global float * pInput,
int idxF = (r + nFilterWidth / 2);
int yIn = yInTopLeft * nInWidth;
int idxIn = yIn + xInTopLeft + r * nInWidth;
if(idxF >= 0 && idxF < nFilterWidth && idxIn >= 0 && idxIn < nInWidth * nInHeight) {
sum += pFilter[idxF] * pInput[idxIn];
}
}
int idxOut = yOut * nWidth + xOut;
if(idxOut >= 0 && idxOut < nInWidth * nInHeight) {
pOutput[idxOut] = sum;
}
}
__kernel void convolveCol(const __global float * pInput,
......@@ -100,8 +149,14 @@ __kernel void convolveCol(const __global float * pInput,
int idxF = (r + nFilterWidth / 2);
int yIn = yInTopLeft * nInWidth;
int idxIn = yIn + xInTopLeft + r;
if(idxF >= 0 && idxF < nFilterWidth && idxIn >= 0 && idxIn < nInWidth * nInHeight) {
sum += pFilter[idxF] * pInput[idxIn];
}
}
int idxOut = yOut * nWidth + xOut;
if(idxOut >= 0 && idxOut < nInWidth * nInHeight) {
pOutput[idxOut] = sum;
}
}
\ No newline at end of file
......@@ -2,11 +2,15 @@ package org.vadere.util.opencl;
import org.apache.log4j.LogManager;
import org.apache.log4j.Logger;
import org.jetbrains.annotations.NotNull;
import org.lwjgl.BufferUtils;
import org.lwjgl.PointerBuffer;
import org.lwjgl.opencl.CLContextCallback;
import org.lwjgl.opencl.CLProgramCallback;
import org.lwjgl.system.Configuration;
import org.lwjgl.system.MemoryStack;
import org.lwjgl.system.MemoryUtil;
import org.vadere.util.math.Convolution;
import java.io.IOException;
import java.nio.ByteBuffer;
......@@ -14,19 +18,19 @@ import java.nio.FloatBuffer;
import java.nio.IntBuffer;
import static org.lwjgl.opencl.CL10.*;
import static org.lwjgl.system.MemoryStack.stackPush;
import static org.lwjgl.system.MemoryUtil.NULL;
import static org.lwjgl.system.MemoryUtil.memUTF8;
/**
* Class to compute the (separate) convolutions on the GPU.
*
* @author Benedikt Zoennchen
*/
public class CLConvolution {
private static Logger log = LogManager.getLogger(CLConvolution.class);
private Convolution gaussianFilter;
// CL ids
private MemoryStack stack;
private long clPlatform;
private long clDevice;
private long clContext;
......@@ -47,6 +51,9 @@ public class CLConvolution {
private FloatBuffer hostGaussKernel;
private FloatBuffer output;
private PointerBuffer strings;
private PointerBuffer lengths;
// CL callbacks
private CLContextCallback contextCB;
private CLProgramCallback programCB;
......@@ -56,102 +63,111 @@ public class CLConvolution {
private long clKernelConvolveRow;
private long clKernelConvolveCol;
public CLConvolution() {
this.stack = MemoryStack.stackPush();
private long clKernel;
private int matrixWidth;
private int matrixHeight;
private int kernelWidth;
private float[] kernel;
private KernelType type;
public enum KernelType {
Separate,
Col,
Row,
NonSeparate
}
public void init() {
initCallbacks();
initCL();
buildProgram();
public CLConvolution(
final int matrixWidth,
final int matrixHeight,
final int kernelWidth, @NotNull final float[] kernel) {
this(KernelType.Separate, matrixWidth, matrixHeight, kernelWidth, kernel);
}
public float[] convolve(final float[] input,
public CLConvolution(
@NotNull final KernelType type,
final int matrixWidth,
final int matrixHeight,
final float[] kernel,
final int kernelWidth) {
final int kernelWidth, @NotNull final float[] kernel) {
this.type = type;
this.matrixHeight = matrixHeight;
this.matrixWidth = matrixWidth;
this.kernelWidth = kernelWidth;
this.kernel = kernel;
init();
float[] result = convolve(input, matrixWidth, matrixHeight, kernel, kernelWidth, clKernelConvolve);
clearCL();
clReleaseKernel(clKernelConvolve);
return result;
Configuration.DEBUG.set(true);
Configuration.DEBUG_MEMORY_ALLOCATOR.set(true);
Configuration.DEBUG_STACK.set(true);
}
public float[] convolveRow(final float[] input, final int matrixWidth, final int matrixHeight, final float[] kernel,
final int kernelWidth) {
init();
float[] result = convolve(input, matrixWidth, matrixHeight, kernel, kernelWidth, clKernelConvolveRow);
clearCL();
clReleaseKernel(clKernelConvolveRow);
return result;
public void init() {
initCallbacks();
initCL();
buildProgram();
hostScenario = MemoryUtil.memAllocFloat(matrixWidth * matrixHeight);
output = MemoryUtil.memAllocFloat(matrixWidth * matrixHeight);
switch (type) {
case NonSeparate: clKernel = clKernelConvolve; break;
case Col: clKernel = clKernelConvolveCol; break;
case Row: clKernel = clKernelConvolveRow; break;
case Separate: clKernel = -1; break;
default: throw new IllegalArgumentException("unsupported kernel type = " + type);
}
public float[] convolveCol(final float[] input, final int matrixWidth, final int matrixHeight, final float[] kernel,
final int kernelWidth) {
init();
float[] result = convolve(input, matrixWidth, matrixHeight, kernel, kernelWidth, clKernelConvolveCol);
clearCL();
clReleaseKernel(clKernelConvolveCol);
return result;
if(type != KernelType.Separate) {
setArguments(clKernel);
}
else {
setArguments(clKernelConvolveCol, clKernelConvolveRow);
}
}
public float[] convolveSeparate(final float[] input, final int matrixWidth, final int matrixHeight, final float[] kernel,
final int kernelWidth) {
assert matrixWidth * matrixHeight == input.length;
init();
hostScenario = CLUtils.toFloatBuffer(input);
output = CLUtils.toFloatBuffer(input);
hostGaussKernel = CLUtils.toFloatBuffer(kernel);
public float[] convolve(final float[] input) {
// 1. write input to native-c-like-memory
CLUtils.toFloatBuffer(input, hostScenario);
// host memory to gpu memory
clInput = clCreateBuffer(clContext, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, hostScenario, errcode_ret);
clTmp = clCreateBuffer(clContext, CL_MEM_READ_WRITE, 4 * input.length, errcode_ret);
clOutput = clCreateBuffer(clContext, CL_MEM_WRITE_ONLY, 4 * input.length, errcode_ret);
clGaussianKernel = clCreateBuffer(clContext, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, hostGaussKernel, errcode_ret);
// 2. write this memory to the GPU
clEnqueueWriteBuffer(clQueue, clInput, true, 0, hostScenario, null, null);
clSetKernelArg1p(clKernelConvolveCol, 0, clInput);
clSetKernelArg1p(clKernelConvolveCol, 1, clGaussianKernel);
clSetKernelArg1p(clKernelConvolveCol, 2, clTmp);
clSetKernelArg1i(clKernelConvolveCol, 3, matrixWidth);
clSetKernelArg1i(clKernelConvolveCol, 4, matrixHeight);
clSetKernelArg1i(clKernelConvolveCol, 5, kernelWidth);
// 2. convolve
switch (type) {
case NonSeparate: convolve(clKernelConvolve); break;
case Col: convolve(clKernelConvolveCol); break;
case Row: convolve(clKernelConvolveRow); break;
case Separate: convolveSeparate(); break;
default: throw new IllegalArgumentException("unsupported kernel type = " + type);
}
clSetKernelArg1p(clKernelConvolveRow, 0, clTmp);
clSetKernelArg1p(clKernelConvolveRow, 1, clGaussianKernel);
clSetKernelArg1p(clKernelConvolveRow, 2, clOutput);
clSetKernelArg1i(clKernelConvolveRow, 3, matrixWidth);
clSetKernelArg1i(clKernelConvolveRow, 4, matrixHeight);
clSetKernelArg1i(clKernelConvolveRow, 5, kernelWidth);
// 4. read result from the GPU to a native-c-like-memory
clEnqueueReadBuffer(clQueue, clOutput, true, 0, output, null, null);
// 5. read this memory and transform it back into a java array.
float[] foutput = CLUtils.toFloatArray(output, matrixWidth * matrixHeight);
return foutput;
}
private void convolveSeparate() {
//init();
try (MemoryStack stack = stackPush()) {
PointerBuffer clGlobalWorkSizeEdges = BufferUtils.createPointerBuffer(2);
clGlobalWorkSizeEdges.put(0, matrixWidth);
clGlobalWorkSizeEdges.put(1, matrixHeight);
PointerBuffer ev = stack.callocPointer(1);
// run the kernel and read the result
clEnqueueNDRangeKernel(clQueue, clKernelConvolveCol, 2, null, clGlobalWorkSizeEdges, null, null, null);
clEnqueueNDRangeKernel(clQueue, clKernelConvolveRow, 2, null, clGlobalWorkSizeEdges, null, null, null);
clFinish(clQueue);
clEnqueueReadBuffer(clQueue, clOutput, true, 0, output, null, null);
float[] foutput = CLUtils.toFloatArray(output, input.length);
clearCL();
clReleaseKernel(clTmp);
clReleaseKernel(clKernelConvolve);
clReleaseKernel(clKernelConvolveRow);
clReleaseKernel(clKernelConvolveCol);
return foutput;
}
}
private float[] convolve(final float[] input,
final int matrixWidth,
final int matrixHeight,
final float[] kernel,
final int kernelWidth, final long clKernel) {
assert matrixWidth * matrixHeight == input.length;
setArguments(input, matrixWidth, matrixHeight, kernel, kernelWidth, clKernel);
private void convolve(final long clKernel) {
PointerBuffer clGlobalWorkSizeEdges = BufferUtils.createPointerBuffer(2);
clGlobalWorkSizeEdges.put(0, matrixWidth);
clGlobalWorkSizeEdges.put(1, matrixHeight);
......@@ -159,21 +175,13 @@ public class CLConvolution {
// run the kernel and read the result
clEnqueueNDRangeKernel(clQueue, clKernel, 2, null, clGlobalWorkSizeEdges, null, null, null);
clFinish(clQueue);
clEnqueueReadBuffer(clQueue, clOutput, true, 0, output, null, null);
float[] foutput = CLUtils.toFloatArray(output, input.length);
return foutput;
}
private void setArguments(final float[] input, final int matrixWidth, final int matrixHeight, final float[] kernel, final int kernelWidth, final long clKernel) {
hostScenario = CLUtils.toFloatBuffer(input);
output = CLUtils.toFloatBuffer(input);
hostGaussKernel = CLUtils.toFloatBuffer(kernel);
private void setArguments(final long clKernel) {
// host memory to gpu memory
clInput = clCreateBuffer(clContext, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, hostScenario, errcode_ret);
clOutput = clCreateBuffer(clContext, CL_MEM_WRITE_ONLY, 4 * input.length, errcode_ret);
clGaussianKernel = clCreateBuffer(clContext, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, hostGaussKernel, errcode_ret);
clInput = clCreateBuffer(clContext, CL_MEM_READ_WRITE, 4 * matrixWidth * matrixHeight, errcode_ret);
clOutput = clCreateBuffer(clContext, CL_MEM_WRITE_ONLY, 4 * matrixWidth * matrixHeight, errcode_ret);
clGaussianKernel = clCreateBuffer(clContext, CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR, hostGaussKernel, errcode_ret);
clSetKernelArg1p(clKernel, 0, clInput);
clSetKernelArg1p(clKernel, 1, clGaussianKernel);
......@@ -183,17 +191,57 @@ public class CLConvolution {
clSetKernelArg1i(clKernel, 5, kernelWidth);
}
private void clearCL() {
private void setArguments(final long clKernelConvolveCol, final long clKernelConvolveRow) {
clTmp = clCreateBuffer(clContext, CL_MEM_READ_WRITE, 4 * matrixWidth * matrixHeight, errcode_ret);
clInput = clCreateBuffer(clContext, CL_MEM_READ_WRITE, 4 * matrixWidth * matrixHeight, errcode_ret);
clOutput = clCreateBuffer(clContext, CL_MEM_WRITE_ONLY, 4 * matrixWidth * matrixHeight, errcode_ret);
clGaussianKernel = clCreateBuffer(clContext, CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR, hostGaussKernel, errcode_ret);
clSetKernelArg1p(clKernelConvolveCol, 0, clInput);
clSetKernelArg1p(clKernelConvolveCol, 1, clGaussianKernel);
clSetKernelArg1p(clKernelConvolveCol, 2, clTmp);
clSetKernelArg1i(clKernelConvolveCol, 3, matrixWidth);
clSetKernelArg1i(clKernelConvolveCol, 4, matrixHeight);
clSetKernelArg1i(clKernelConvolveCol, 5, kernelWidth);
clSetKernelArg1p(clKernelConvolveRow, 0, clTmp);
clSetKernelArg1p(clKernelConvolveRow, 1, clGaussianKernel);
clSetKernelArg1p(clKernelConvolveRow, 2, clOutput);
clSetKernelArg1i(clKernelConvolveRow, 3, matrixWidth);
clSetKernelArg1i(clKernelConvolveRow, 4, matrixHeight);
clSetKernelArg1i(clKernelConvolveRow, 5, kernelWidth);
}
private void clearMemory() {
// release memory and devices
contextCB.free();
programCB.free();
clReleaseMemObject(clInput);
clReleaseMemObject(clOutput);
clReleaseMemObject(clGaussianKernel);
clReleaseCommandQueue(clQueue);
clReleaseProgram(clProgram);
clReleaseContext(clContext);
if(type == KernelType.Separate) {
clReleaseMemObject(clTmp);
}
clReleaseKernel(clKernelConvolve);
clReleaseKernel(clKernelConvolveRow);
clReleaseKernel(clKernelConvolveCol);
MemoryUtil.memFree(hostScenario);
MemoryUtil.memFree(output);
MemoryUtil.memFree(hostGaussKernel);
//CL.destroy();
// strings.free();
// lengths.free();
}
public void clearCL() {
clearMemory();
contextCB.free();
programCB.free();
log.info("release command queue: " + (clReleaseCommandQueue(clQueue) == CL_SUCCESS));
log.info("release program: " + (clReleaseProgram(clProgram) == CL_SUCCESS));
log.info("release context: " + (clReleaseContext(clContext) == CL_SUCCESS));
}
// private helpers
......@@ -211,6 +259,7 @@ public class CLConvolution {
}
private void initCL() {
try (MemoryStack stack = stackPush()) {