2.12.2021, 9:00 - 11:00: Due to updates GitLab may be unavailable for some minutes between 09:00 and 11:00.

Commit d380bf90 authored by Benedikt Zoennchen's avatar Benedikt Zoennchen
Browse files

GPU osm copy potential field onto the gpu.

parent e3c08bfb
......@@ -211,6 +211,7 @@
"spawnAtRandomPositions" : false,
"useFreeSpaceOnly" : false,
"targetIds" : [ 1 ],
"groupSizeDistribution" : [ 0.0, 0.0, 1.0 ],
"dynamicElementType" : "PEDESTRIAN"
} ],
"dynamicElements" : [ ],
......
......@@ -202,6 +202,7 @@
"spawnAtRandomPositions" : false,
"useFreeSpaceOnly" : false,
"targetIds" : [ 1 ],
"groupSizeDistribution" : [ 0.0, 0.0, 1.0 ],
"dynamicElementType" : "PEDESTRIAN"
} ],
"dynamicElements" : [ ],
......
......@@ -552,6 +552,7 @@
"spawnAtRandomPositions" : true,
"useFreeSpaceOnly" : false,
"targetIds" : [ 1 ],
"groupSizeDistribution" : [ 0.0, 0.0, 1.0 ],
"dynamicElementType" : "PEDESTRIAN"
}, {
"id" : -1,
......@@ -571,6 +572,7 @@
"spawnAtRandomPositions" : true,
"useFreeSpaceOnly" : false,
"targetIds" : [ 1 ],
"groupSizeDistribution" : [ 0.0, 0.0, 1.0 ],
"dynamicElementType" : "PEDESTRIAN"
}, {
"id" : -1,
......@@ -590,6 +592,7 @@
"spawnAtRandomPositions" : true,
"useFreeSpaceOnly" : false,
"targetIds" : [ 1 ],
"groupSizeDistribution" : [ 0.0, 0.0, 1.0 ],
"dynamicElementType" : "PEDESTRIAN"
}, {
"id" : -1,
......@@ -609,6 +612,7 @@
"spawnAtRandomPositions" : true,
"useFreeSpaceOnly" : false,
"targetIds" : [ 1 ],
"groupSizeDistribution" : [ 0.0, 0.0, 1.0 ],
"dynamicElementType" : "PEDESTRIAN"
}, {
"id" : -1,
......@@ -628,6 +632,7 @@
"spawnAtRandomPositions" : true,
"useFreeSpaceOnly" : false,
"targetIds" : [ 1 ],
"groupSizeDistribution" : [ 0.0, 0.0, 1.0 ],
"dynamicElementType" : "PEDESTRIAN"
}, {
"id" : -1,
......@@ -647,6 +652,7 @@
"spawnAtRandomPositions" : true,
"useFreeSpaceOnly" : false,
"targetIds" : [ 1 ],
"groupSizeDistribution" : [ 0.0, 0.0, 1.0 ],
"dynamicElementType" : "PEDESTRIAN"
}, {
"id" : -1,
......@@ -666,6 +672,7 @@
"spawnAtRandomPositions" : true,
"useFreeSpaceOnly" : false,
"targetIds" : [ 2 ],
"groupSizeDistribution" : [ 0.0, 0.0, 1.0 ],
"dynamicElementType" : "PEDESTRIAN"
}, {
"id" : -1,
......@@ -685,6 +692,7 @@
"spawnAtRandomPositions" : true,
"useFreeSpaceOnly" : false,
"targetIds" : [ 2 ],
"groupSizeDistribution" : [ 0.0, 0.0, 1.0 ],
"dynamicElementType" : "PEDESTRIAN"
}, {
"id" : -1,
......@@ -704,6 +712,7 @@
"spawnAtRandomPositions" : true,
"useFreeSpaceOnly" : false,
"targetIds" : [ 1 ],
"groupSizeDistribution" : [ 0.0, 0.0, 1.0 ],
"dynamicElementType" : "PEDESTRIAN"
}, {
"id" : -1,
......@@ -723,6 +732,7 @@
"spawnAtRandomPositions" : true,
"useFreeSpaceOnly" : false,
"targetIds" : [ 2 ],
"groupSizeDistribution" : [ 0.0, 0.0, 1.0 ],
"dynamicElementType" : "PEDESTRIAN"
}, {
"id" : -1,
......@@ -742,6 +752,7 @@
"spawnAtRandomPositions" : true,
"useFreeSpaceOnly" : false,
"targetIds" : [ 2 ],
"groupSizeDistribution" : [ 0.0, 0.0, 1.0 ],
"dynamicElementType" : "PEDESTRIAN"
}, {
"id" : -1,
......@@ -761,6 +772,7 @@
"spawnAtRandomPositions" : true,
"useFreeSpaceOnly" : false,
"targetIds" : [ 1 ],
"groupSizeDistribution" : [ 0.0, 0.0, 1.0 ],
"dynamicElementType" : "PEDESTRIAN"
} ],
"dynamicElements" : [ ],
......
......@@ -190,6 +190,7 @@
"spawnAtRandomPositions" : false,
"useFreeSpaceOnly" : false,
"targetIds" : [ 1 ],
"groupSizeDistribution" : [ 0.0, 0.0, 1.0 ],
"dynamicElementType" : "PEDESTRIAN"
} ],
"dynamicElements" : [ ],
......
......@@ -154,6 +154,7 @@
"spawnAtRandomPositions" : true,
"useFreeSpaceOnly" : false,
"targetIds" : [ 1 ],
"groupSizeDistribution" : [ 0.0, 0.0, 1.0 ],
"dynamicElementType" : "PEDESTRIAN"
} ],
"dynamicElements" : [ ],
......
......@@ -150,6 +150,7 @@
"spawnAtRandomPositions" : false,
"useFreeSpaceOnly" : false,
"targetIds" : [ 1 ],
"groupSizeDistribution" : [ 0.0, 0.0, 1.0 ],
"dynamicElementType" : "PEDESTRIAN"
} ],
"dynamicElements" : [ ],
......
......@@ -150,6 +150,7 @@
"spawnAtRandomPositions" : false,
"useFreeSpaceOnly" : false,
"targetIds" : [ 1 ],
"groupSizeDistribution" : [ 0.0, 0.0, 1.0 ],
"dynamicElementType" : "PEDESTRIAN"
} ],
"dynamicElements" : [ ],
......
......@@ -200,6 +200,7 @@
"spawnAtRandomPositions" : false,
"useFreeSpaceOnly" : false,
"targetIds" : [ 1 ],
"groupSizeDistribution" : [ 0.0, 0.0, 1.0 ],
"dynamicElementType" : "PEDESTRIAN"
} ],
"dynamicElements" : [ ],
......
package org.vadere.simulator.models.osm.opencl;
import org.apache.log4j.LogManager;
import org.apache.log4j.Logger;
import org.jetbrains.annotations.NotNull;
import org.lwjgl.PointerBuffer;
import org.lwjgl.opencl.CLContextCallback;
import org.lwjgl.opencl.CLProgramCallback;
import org.lwjgl.system.Configuration;
import org.lwjgl.system.MemoryStack;
import org.lwjgl.system.MemoryUtil;
import org.vadere.simulator.models.osm.PedestrianOSM;
import org.vadere.state.attributes.models.AttributesFloorField;
import org.vadere.state.scenario.Pedestrian;
import org.vadere.util.geometry.shapes.VPoint;
import org.vadere.util.geometry.shapes.VRectangle;
import org.vadere.util.opencl.CLInfo;
import org.vadere.util.opencl.CLUtils;
import org.vadere.util.opencl.OpenCLException;
import org.vadere.util.potential.calculators.EikonalSolver;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.nio.FloatBuffer;
import java.nio.IntBuffer;
import java.util.List;
import java.util.stream.Collectors;
import static org.lwjgl.opencl.CL10.CL_CONTEXT_PLATFORM;
import static org.lwjgl.opencl.CL10.CL_DEVICE_MAX_WORK_GROUP_SIZE;
import static org.lwjgl.opencl.CL10.CL_DEVICE_NAME;
import static org.lwjgl.opencl.CL10.CL_DEVICE_TYPE_GPU;
import static org.lwjgl.opencl.CL10.CL_MEM_ALLOC_HOST_PTR;
import static org.lwjgl.opencl.CL10.CL_MEM_COPY_HOST_PTR;
import static org.lwjgl.opencl.CL10.CL_MEM_READ_ONLY;
import static org.lwjgl.opencl.CL10.CL_MEM_READ_WRITE;
import static org.lwjgl.opencl.CL10.CL_PROGRAM_BUILD_STATUS;
import static org.lwjgl.opencl.CL10.CL_SUCCESS;
import static org.lwjgl.opencl.CL10.clBuildProgram;
import static org.lwjgl.opencl.CL10.clCreateBuffer;
import static org.lwjgl.opencl.CL10.clCreateCommandQueue;
import static org.lwjgl.opencl.CL10.clCreateContext;
import static org.lwjgl.opencl.CL10.clCreateKernel;
import static org.lwjgl.opencl.CL10.clCreateProgramWithSource;
import static org.lwjgl.opencl.CL10.clEnqueueNDRangeKernel;
import static org.lwjgl.opencl.CL10.clEnqueueReadBuffer;
import static org.lwjgl.opencl.CL10.clEnqueueWriteBuffer;
import static org.lwjgl.opencl.CL10.clFinish;
import static org.lwjgl.opencl.CL10.clGetDeviceIDs;
import static org.lwjgl.opencl.CL10.clGetDeviceInfo;
import static org.lwjgl.opencl.CL10.clGetPlatformIDs;
import static org.lwjgl.opencl.CL10.clReleaseCommandQueue;
import static org.lwjgl.opencl.CL10.clReleaseContext;
import static org.lwjgl.opencl.CL10.clReleaseKernel;
import static org.lwjgl.opencl.CL10.clReleaseMemObject;
import static org.lwjgl.opencl.CL10.clReleaseProgram;
import static org.lwjgl.opencl.CL10.clSetKernelArg;
import static org.lwjgl.opencl.CL10.clSetKernelArg1f;
import static org.lwjgl.opencl.CL10.clSetKernelArg1i;
import static org.lwjgl.opencl.CL10.clSetKernelArg1p;
import static org.lwjgl.system.MemoryStack.stackPush;
import static org.lwjgl.system.MemoryUtil.NULL;
import static org.lwjgl.system.MemoryUtil.memUTF8;
/**
* @author Benedikt Zoennchen
*
* This class offers the methods to compute an array based linked-cell which contains 2D-coordinates i.e. {@link VPoint}
* using the GPU (see. green-2007 Building the Grid using Sorting).
*/
public class CLOptimalStepsModel {
private static Logger log = LogManager.getLogger(CLOptimalStepsModel.class);
// CL ids
private long clPlatform;
private long clDevice;
private long clContext;
private long clQueue;
private long clProgram;
// CL Memory
private long clHashes;
private long clIndices;
private long clCellStarts;
private long clCellEnds;
private long clReorderedPedestrians;
private long clPedestrians;
private long clCellSize;
private long clWorldOrigin;
private long clGridSize;
private long clTargetPotential;
private long clObstaclePotential;
private long clPedestrianNextPositions;
// Host Memory
private IntBuffer hashes;
private IntBuffer indices;
private IntBuffer cellStarts;
private IntBuffer cellEnds;
private FloatBuffer reorderedPedestrians;
private FloatBuffer pedestrians;
private FloatBuffer worldOrigin;
private FloatBuffer cellSize;
private FloatBuffer targetPotentialField;
private FloatBuffer obstaclePotentialField;
private IntBuffer gridSize;
private IntBuffer inValues;
private IntBuffer outValues;
private ByteBuffer source;
private ByteBuffer particleSource;
// CL callbacks
private CLContextCallback contextCB;
private CLProgramCallback programCB;
// CL kernel
private long clBitonicSortLocal;
private long clBitonicSortLocal1;
private long clBitonicMergeGlobal;
private long clBitonicMergeLocal;
private long clCalcHash;
private long clFindCellBoundsAndReorder;
private long clNextPositions;
private int numberOfElements;
private int numberOfGridCells;
private VRectangle bound;
private float iCellSize;
private int[] iGridSize;
private List<PedestrianOpenCL> pedestrianList;
private final AttributesFloorField attributesFloorField;
private int[] keys;
private int[] values;
private int[] resultValues;
private int[] resultKeys;
private static final Logger logger = LogManager.getLogger(CLOptimalStepsModel.class);
private int max_work_group_size;
private boolean debug = false;
public enum KernelType {
Separate,
Col,
Row,
NonSeparate
}
/**
* Default constructor.
*
* @param numberOfElements the number of pedestrians contained in the linked cell.
* @param bound the spatial bound of the linked cell.
* @param cellSize the cellSize (in x and y direction) of the linked cell.
*
* @throws OpenCLException
*/
public CLOptimalStepsModel(
final int numberOfElements,
@NotNull final VRectangle bound,
final double cellSize,
@NotNull final AttributesFloorField attributesFloorField,
@NotNull final EikonalSolver targetPotential,
@NotNull final EikonalSolver obstaclePotential) throws OpenCLException {
this.numberOfElements = numberOfElements;
this.iGridSize = new int[]{ (int)Math.ceil(bound.getWidth() / cellSize), (int)Math.ceil(bound.getHeight() / cellSize)};
this.numberOfGridCells = this.iGridSize[0] * this.iGridSize[1];
this.bound = bound;
this.iCellSize = (float)cellSize;
this.attributesFloorField = attributesFloorField;
//TODO: this should be done in mallocHostMemory().
this.targetPotentialField = generatePotentialFieldApproximation(targetPotential);
this.obstaclePotentialField = generatePotentialFieldApproximation(obstaclePotential);
if(debug) {
Configuration.DEBUG.set(true);
Configuration.DEBUG_MEMORY_ALLOCATOR.set(true);
Configuration.DEBUG_STACK.set(true);
}
init();
}
private int getPotentialFieldWidth() {
return (int) Math.floor(bound.getWidth() / attributesFloorField.getPotentialFieldResolution())+1;
}
private int getPotentialFieldHeight() {
return (int) Math.floor(bound.getHeight() / attributesFloorField.getPotentialFieldResolution())+1;
}
private int getPotentialFieldSize() {
return getPotentialFieldWidth() * getPotentialFieldHeight();
}
private FloatBuffer generatePotentialFieldApproximation(@NotNull EikonalSolver eikonalSolver) {
FloatBuffer floatBuffer = MemoryUtil.memAllocFloat(getPotentialFieldSize());
int index = 0;
for(float y = (float)bound.getMinY(); y <= (float)bound.getHeight(); y += attributesFloorField.getPotentialFieldResolution()) {
for(float x = (float)bound.getMinX(); x <= (float)bound.getWidth(); x += attributesFloorField.getPotentialFieldResolution()) {
floatBuffer.put(index,
(float)eikonalSolver.getPotential(new VPoint(x, y),
attributesFloorField.getObstacleGridPenalty(),
attributesFloorField.getTargetAttractionStrength()));
index++;
}
}
return floatBuffer;
}
public static class PedestrianOpenCL {
public float stepRadius;
public VPoint position;
public VPoint newPosition;
public PedestrianOpenCL(final VPoint position, final float stepRadius) {
this.position = position;
this.stepRadius = stepRadius;
}
@Override
public String toString() {
return position + " -> " + newPosition;
}
}
/**
* The data structure representing the linked cell. The elements of cell i
* between (reorderedPedestrians[cellStart[i]*2], reorderedPedestrians[cellStart[i]*2+1])
* and (reorderedPedestrians[(cellEnds[i]-1)*2], reorderedPedestrians[(cellEnds[i]-1)*2+1]).
*/
public class LinkedCell {
/**
* the starting index at which the cell starts, i.e. cell i starts at cellStart[i].
*/
public int[] cellStarts;
/**
* the ending index at which the cell starts, i.e. cell i ends at cellStart[i].
*/
public int[] cellEnds;
/**
* the ordered 2D-coordinates.
*/
public float[] reorderedPositions;
/**
* the mapping between the unordered (original) pedestrians and the reorderedPedestrians,
* i.e. reorderedPedestrians[i] == pedestrians[indices[i]]
*/
public int[] indices;
/**
* the hashes i.e. the cell of the pedestrians, i.e. hashes[i] is the cell of pedestrians[i].
*/
public int[] hashes;
/**
* the original pedestrians in original order.
*/
public float[] positions;
}
/**
* Computes the {@link LinkedCell} of the list of pedestrians.
*
* @param pedestriansOSM
* @return {@link LinkedCell} which is the linked list in an array based structure.
*
* @throws OpenCLException
*/
public List<PedestrianOpenCL> getNextSteps(@NotNull final List<PedestrianOpenCL> pedestrians) throws OpenCLException {
try (MemoryStack stack = stackPush()) {
assert pedestrians.size() == numberOfElements;
this.pedestrianList = pedestrians;
allocHostMemory();
allocDeviceMemory();
//clCalcHash(clHashes, clIndices, clPedestrians, clCellSize, clWorldOrigin, clGridSize, numberOfElements);
//clBitonicSort(clHashes, clIndices, clHashes, clIndices, numberOfElements, 1);
//clFindCellBoundsAndReorder(clCellStarts, clCellEnds, clReorderedPedestrians, clHashes, clIndices, clPedestrians, numberOfElements);
clNextPosition(clPedestrianNextPositions, clPedestrians, clCellStarts, clCellEnds, clObstaclePotential, clTargetPotential, clWorldOrigin);
//clEnqueueReadBuffer(clQueue, clCellStarts, true, 0, cellStarts, null, null);
//clEnqueueReadBuffer(clQueue, clCellEnds, true, 0, cellEnds, null, null);
FloatBuffer nextPositions = stack.mallocFloat(numberOfElements * 2);
clEnqueueReadBuffer(clQueue, clPedestrianNextPositions, true, 0, nextPositions, null, null);
clEnqueueReadBuffer(clQueue, clIndices, true, 0, indices, null, null);
//clEnqueueReadBuffer(clQueue, clHashes, true, 0, hashes, null, null);
//clEnqueueReadBuffer(clQueue, clPedestrians, true, 0, this.pedestrians, null, null);
int[] aIndices = CLUtils.toIntArray(indices, numberOfElements);
float[] positionsAndRadi = CLUtils.toFloatArray(nextPositions, numberOfElements * 2);
for(int i = 0; i < numberOfElements; i++) {
float x = positionsAndRadi[i * 2];
float y = positionsAndRadi[i * 2 + 1];
VPoint newPosition = new VPoint(x,y);
pedestrians.get(i).newPosition = newPosition;
}
/*int[] aCellStarts = CLUtils.toIntArray(cellStarts, numberOfGridCells);
int[] aCellEnds = CLUtils.toIntArray(cellEnds, numberOfGridCells);
int[] aIndices = CLUtils.toIntArray(indices, numberOfElements);
int[] aHashes = CLUtils.toIntArray(hashes, numberOfElements);
float[] aPositions = CLUtils.toFloatArray(this.pedestrians, numberOfElements * 2);
LinkedCell gridCells = new LinkedCell();
gridCells.cellEnds = aCellEnds;
gridCells.cellStarts = aCellStarts;
gridCells.reorderedPedestrians = aReorderedPositions;
gridCells.indices = aIndices;
gridCells.hashes = aHashes;
gridCells.positions = aPositions;*/
clearMemory();
clearCL();
return pedestrians;
//clBitonicSort(clHashes, clIndices, clHashes, clIndices, numberOfElements, 1);
//clFindCellBoundsAndReorder(clCellStarts, clCellEnds, clReorderedPedestrians, clHashes, clIndices, clPedestrians, numberOfElements, numberOfGridCells);
}
}
/**
* Computes all the hash values, i.e. cells of each position and sort these hashes and construct a mapping
* of the rearrangement. This method exists to test the bitonic sort algorithm on the GPU.
*
* @param positions the pedestrians which will be hashed.
* @return the sorted hashes.
* @throws OpenCLException
*/
public int[] calcSortedHashes(@NotNull final List<PedestrianOpenCL> positions) throws OpenCLException {
assert positions.size() == numberOfElements;
this.pedestrianList = positions;
allocHostMemory();
allocDeviceMemory();
clCalcHash(clHashes, clIndices, clPedestrians, clCellSize, clWorldOrigin, clGridSize, numberOfElements);
clBitonicSort(clHashes, clIndices, clHashes, clIndices, numberOfElements, 1);
clEnqueueReadBuffer(clQueue, clHashes, true, 0, hashes, null, null);
int[] result = CLUtils.toIntArray(hashes, numberOfElements);
clearMemory();
clearCL();
return result;
//clBitonicSort(clHashes, clIndices, clHashes, clIndices, numberOfElements, 1);
//clFindCellBoundsAndReorder(clCellStarts, clCellEnds, clReorderedPedestrians, clHashes, clIndices, clPedestrians, numberOfElements, numberOfGridCells);
}
/**
* Computes all the hash values, i.e. cells of each position.
* This method exists to test the hash computation on the GPU.
*
* @param positions the pedestrians which will be hashed.
* @return the (unsorted) hashes.
* @throws OpenCLException
*/
public int[] calcHashes(@NotNull final List<PedestrianOpenCL> positions) throws OpenCLException {
assert positions.size() == numberOfElements;
this.pedestrianList = positions;
allocHostMemory();
allocDeviceMemory();
clCalcHash(clHashes, clIndices, clPedestrians, clCellSize, clWorldOrigin, clGridSize, numberOfElements);
clEnqueueReadBuffer(clQueue, clHashes, true, 0, hashes, null, null);
int[] result = CLUtils.toIntArray(hashes, numberOfElements);
clearMemory();
clearCL();
return result;
//clBitonicSort(clHashes, clIndices, clHashes, clIndices, numberOfElements, 1);
//clFindCellBoundsAndReorder(clCellStarts, clCellEnds, clReorderedPedestrians, clHashes, clIndices, clPedestrians, numberOfElements, numberOfGridCells);
}
/**
* Returns the gridSizes of the linked cell, i.e. result[0] is the x and
* result[1] the y direction.
*
* @return the gridSizes (2D) stored in an array.
*/
public int[] getGridSize() {
return new int[]{iGridSize[0], iGridSize[1]};
}
/**
* Returns the gridSize which is equal in x and y direction.
*
* @return the gridSize
*/
public float getCellSize() {
return iCellSize;
}
public VPoint getWorldOrign() {
return new VPoint(bound.getMinX(), bound.getMinY());
}
public void allocHostMemory() {
assert pedestrianList.size() == numberOfElements;
float[] posAndRadius = new float[numberOfElements*3];
for(int i = 0; i < numberOfElements; i++) {
posAndRadius[i*3] = (float) pedestrianList.get(i).position.getX();
posAndRadius[i*3+1] = (float) pedestrianList.get(i).position.getY();
posAndRadius[i*3+2] = pedestrianList.get(i).stepRadius;
}
this.pedestrians = CLUtils.toFloatBuffer(posAndRadius);
for(int i = 0; i < numberOfElements; i++) {
logger.info(this.pedestrians .get(i*3));
logger.info(this.pedestrians .get(i*3+1));
logger.info(this.pedestrians .get(i*3+2));
}
this.hashes = MemoryUtil.memAllocInt(numberOfElements);