11.3.2021, 9:00 - 11:00: Due to updates GitLab may be unavailable for some minutes between 09:00 and 11:00.

Commit 36865d52 authored by Benedikt Zoennchen's avatar Benedikt Zoennchen

documentation (adding comments), the hardware parameters are no longer hardcoded for the sorting.

parent fdc22cfc
...@@ -8,9 +8,6 @@ ...@@ -8,9 +8,6 @@
* is strictly prohibited. * is strictly prohibited.
* *
*/ */
#define LOCAL_SIZE_LIMIT 16U
inline void ComparatorPrivate( inline void ComparatorPrivate(
uint *keyA, uint *keyA,
uint *valA, uint *valA,
...@@ -48,10 +45,11 @@ __kernel void bitonicSortLocal( ...@@ -48,10 +45,11 @@ __kernel void bitonicSortLocal(
__global uint *d_SrcKey, __global uint *d_SrcKey,
__global uint *d_SrcVal, __global uint *d_SrcVal,
uint arrayLength, uint arrayLength,
uint dir uint dir,
__local uint *l_key,
__local uint *l_val
){ ){
__local uint l_key[LOCAL_SIZE_LIMIT]; uint LOCAL_SIZE_LIMIT = get_local_size(0) * 2;
__local uint l_val[LOCAL_SIZE_LIMIT];
//Offset to the beginning of subbatch and load data //Offset to the beginning of subbatch and load data
d_SrcKey += get_group_id(0) * LOCAL_SIZE_LIMIT + get_local_id(0); d_SrcKey += get_group_id(0) * LOCAL_SIZE_LIMIT + get_local_id(0);
...@@ -108,11 +106,11 @@ __kernel void bitonicSortLocal1( ...@@ -108,11 +106,11 @@ __kernel void bitonicSortLocal1(
__global uint *d_DstKey, __global uint *d_DstKey,
__global uint *d_DstVal, __global uint *d_DstVal,
__global uint *d_SrcKey, __global uint *d_SrcKey,
__global uint *d_SrcVal __global uint *d_SrcVal,
__local uint *l_key,
__local uint *l_val
){ ){
__local uint l_key[LOCAL_SIZE_LIMIT]; uint LOCAL_SIZE_LIMIT = get_local_size(0) * 2;
__local uint l_val[LOCAL_SIZE_LIMIT];
//Offset to the beginning of subarray and load data //Offset to the beginning of subarray and load data
d_SrcKey += get_group_id(0) * LOCAL_SIZE_LIMIT + get_local_id(0); d_SrcKey += get_group_id(0) * LOCAL_SIZE_LIMIT + get_local_id(0);
d_SrcVal += get_group_id(0) * LOCAL_SIZE_LIMIT + get_local_id(0); d_SrcVal += get_group_id(0) * LOCAL_SIZE_LIMIT + get_local_id(0);
...@@ -206,11 +204,11 @@ __kernel void bitonicMergeLocal( ...@@ -206,11 +204,11 @@ __kernel void bitonicMergeLocal(
uint arrayLength, uint arrayLength,
uint stride, uint stride,
uint size, uint size,
uint dir uint dir,
__local uint *l_key,
__local uint *l_val
){ ){
__local uint l_key[LOCAL_SIZE_LIMIT]; uint LOCAL_SIZE_LIMIT = get_local_size(0) * 2;
__local uint l_val[LOCAL_SIZE_LIMIT];
d_SrcKey += get_group_id(0) * LOCAL_SIZE_LIMIT + get_local_id(0); d_SrcKey += get_group_id(0) * LOCAL_SIZE_LIMIT + get_local_id(0);
d_SrcVal += get_group_id(0) * LOCAL_SIZE_LIMIT + get_local_id(0); d_SrcVal += get_group_id(0) * LOCAL_SIZE_LIMIT + get_local_id(0);
d_DstKey += get_group_id(0) * LOCAL_SIZE_LIMIT + get_local_id(0); d_DstKey += get_group_id(0) * LOCAL_SIZE_LIMIT + get_local_id(0);
......
...@@ -63,8 +63,6 @@ typedef struct { ...@@ -63,8 +63,6 @@ typedef struct {
float stepLength; float stepLength;
} pedestrian; } pedestrian;
#define LOCAL_SIZE_LIMIT 16U
inline void ComparatorPrivate( inline void ComparatorPrivate(
uint *keyA, uint *keyA,
uint *valA, uint *valA,
...@@ -329,10 +327,11 @@ __kernel void bitonicSortLocal( ...@@ -329,10 +327,11 @@ __kernel void bitonicSortLocal(
__global uint *d_SrcKey, __global uint *d_SrcKey,
__global uint *d_SrcVal, __global uint *d_SrcVal,
uint arrayLength, uint arrayLength,
uint dir uint dir,
__local uint *l_key,
__local uint *l_val
){ ){
__local uint l_key[LOCAL_SIZE_LIMIT]; uint LOCAL_SIZE_LIMIT = get_local_size(0) * 2;
__local uint l_val[LOCAL_SIZE_LIMIT];
//Offset to the beginning of subbatch and load data //Offset to the beginning of subbatch and load data
d_SrcKey += get_group_id(0) * LOCAL_SIZE_LIMIT + get_local_id(0); d_SrcKey += get_group_id(0) * LOCAL_SIZE_LIMIT + get_local_id(0);
...@@ -389,11 +388,11 @@ __kernel void bitonicSortLocal1( ...@@ -389,11 +388,11 @@ __kernel void bitonicSortLocal1(
__global uint *d_DstKey, __global uint *d_DstKey,
__global uint *d_DstVal, __global uint *d_DstVal,
__global uint *d_SrcKey, __global uint *d_SrcKey,
__global uint *d_SrcVal __global uint *d_SrcVal,
__local uint *l_key,
__local uint *l_val
){ ){
__local uint l_key[LOCAL_SIZE_LIMIT]; uint LOCAL_SIZE_LIMIT = get_local_size(0) * 2;
__local uint l_val[LOCAL_SIZE_LIMIT];
//Offset to the beginning of subarray and load data //Offset to the beginning of subarray and load data
d_SrcKey += get_group_id(0) * LOCAL_SIZE_LIMIT + get_local_id(0); d_SrcKey += get_group_id(0) * LOCAL_SIZE_LIMIT + get_local_id(0);
d_SrcVal += get_group_id(0) * LOCAL_SIZE_LIMIT + get_local_id(0); d_SrcVal += get_group_id(0) * LOCAL_SIZE_LIMIT + get_local_id(0);
...@@ -487,11 +486,11 @@ __kernel void bitonicMergeLocal( ...@@ -487,11 +486,11 @@ __kernel void bitonicMergeLocal(
uint arrayLength, uint arrayLength,
uint stride, uint stride,
uint size, uint size,
uint dir uint dir,
__local uint *l_key,
__local uint *l_val
){ ){
__local uint l_key[LOCAL_SIZE_LIMIT]; uint LOCAL_SIZE_LIMIT = get_local_size(0) * 2;
__local uint l_val[LOCAL_SIZE_LIMIT];
d_SrcKey += get_group_id(0) * LOCAL_SIZE_LIMIT + get_local_id(0); d_SrcKey += get_group_id(0) * LOCAL_SIZE_LIMIT + get_local_id(0);
d_SrcVal += get_group_id(0) * LOCAL_SIZE_LIMIT + get_local_id(0); d_SrcVal += get_group_id(0) * LOCAL_SIZE_LIMIT + get_local_id(0);
d_DstKey += get_group_id(0) * LOCAL_SIZE_LIMIT + get_local_id(0); d_DstKey += get_group_id(0) * LOCAL_SIZE_LIMIT + get_local_id(0);
......
...@@ -12,17 +12,15 @@ import org.lwjgl.system.MemoryUtil; ...@@ -12,17 +12,15 @@ import org.lwjgl.system.MemoryUtil;
import java.io.IOException; import java.io.IOException;
import java.nio.ByteBuffer; import java.nio.ByteBuffer;
import java.nio.FloatBuffer;
import java.nio.IntBuffer; import java.nio.IntBuffer;
import static org.lwjgl.opencl.CL10.CL_CONTEXT_PLATFORM; import static org.lwjgl.opencl.CL10.CL_CONTEXT_PLATFORM;
import static org.lwjgl.opencl.CL10.CL_DEVICE_MAX_WORK_GROUP_SIZE;
import static org.lwjgl.opencl.CL10.CL_DEVICE_NAME; import static org.lwjgl.opencl.CL10.CL_DEVICE_NAME;
import static org.lwjgl.opencl.CL10.CL_DEVICE_TYPE_GPU; import static org.lwjgl.opencl.CL10.CL_DEVICE_TYPE_GPU;
import static org.lwjgl.opencl.CL10.CL_MEM_ALLOC_HOST_PTR; import static org.lwjgl.opencl.CL10.CL_MEM_ALLOC_HOST_PTR;
import static org.lwjgl.opencl.CL10.CL_MEM_COPY_HOST_PTR; import static org.lwjgl.opencl.CL10.CL_MEM_COPY_HOST_PTR;
import static org.lwjgl.opencl.CL10.CL_MEM_READ_ONLY;
import static org.lwjgl.opencl.CL10.CL_MEM_READ_WRITE; import static org.lwjgl.opencl.CL10.CL_MEM_READ_WRITE;
import static org.lwjgl.opencl.CL10.CL_MEM_WRITE_ONLY;
import static org.lwjgl.opencl.CL10.CL_PROGRAM_BUILD_STATUS; import static org.lwjgl.opencl.CL10.CL_PROGRAM_BUILD_STATUS;
import static org.lwjgl.opencl.CL10.CL_SUCCESS; import static org.lwjgl.opencl.CL10.CL_SUCCESS;
import static org.lwjgl.opencl.CL10.clBuildProgram; import static org.lwjgl.opencl.CL10.clBuildProgram;
...@@ -33,15 +31,16 @@ import static org.lwjgl.opencl.CL10.clCreateKernel; ...@@ -33,15 +31,16 @@ import static org.lwjgl.opencl.CL10.clCreateKernel;
import static org.lwjgl.opencl.CL10.clCreateProgramWithSource; import static org.lwjgl.opencl.CL10.clCreateProgramWithSource;
import static org.lwjgl.opencl.CL10.clEnqueueNDRangeKernel; import static org.lwjgl.opencl.CL10.clEnqueueNDRangeKernel;
import static org.lwjgl.opencl.CL10.clEnqueueReadBuffer; import static org.lwjgl.opencl.CL10.clEnqueueReadBuffer;
import static org.lwjgl.opencl.CL10.clEnqueueWriteBuffer;
import static org.lwjgl.opencl.CL10.clFinish; import static org.lwjgl.opencl.CL10.clFinish;
import static org.lwjgl.opencl.CL10.clGetDeviceIDs; import static org.lwjgl.opencl.CL10.clGetDeviceIDs;
import static org.lwjgl.opencl.CL10.clGetDeviceInfo;
import static org.lwjgl.opencl.CL10.clGetPlatformIDs; import static org.lwjgl.opencl.CL10.clGetPlatformIDs;
import static org.lwjgl.opencl.CL10.clReleaseCommandQueue; import static org.lwjgl.opencl.CL10.clReleaseCommandQueue;
import static org.lwjgl.opencl.CL10.clReleaseContext; import static org.lwjgl.opencl.CL10.clReleaseContext;
import static org.lwjgl.opencl.CL10.clReleaseKernel; import static org.lwjgl.opencl.CL10.clReleaseKernel;
import static org.lwjgl.opencl.CL10.clReleaseMemObject; import static org.lwjgl.opencl.CL10.clReleaseMemObject;
import static org.lwjgl.opencl.CL10.clReleaseProgram; import static org.lwjgl.opencl.CL10.clReleaseProgram;
import static org.lwjgl.opencl.CL10.clSetKernelArg;
import static org.lwjgl.opencl.CL10.clSetKernelArg1i; import static org.lwjgl.opencl.CL10.clSetKernelArg1i;
import static org.lwjgl.opencl.CL10.clSetKernelArg1p; import static org.lwjgl.opencl.CL10.clSetKernelArg1p;
import static org.lwjgl.system.MemoryStack.stackPush; import static org.lwjgl.system.MemoryStack.stackPush;
...@@ -50,6 +49,8 @@ import static org.lwjgl.system.MemoryUtil.memUTF8; ...@@ -50,6 +49,8 @@ import static org.lwjgl.system.MemoryUtil.memUTF8;
/** /**
* @author Benedikt Zoennchen * @author Benedikt Zoennchen
*
* This class implements the bitonic sort using the GPU via OpenCL.
*/ */
public class CLBitonicSort { public class CLBitonicSort {
private static Logger log = LogManager.getLogger(CLBitonicSort.class); private static Logger log = LogManager.getLogger(CLBitonicSort.class);
...@@ -93,7 +94,7 @@ public class CLBitonicSort { ...@@ -93,7 +94,7 @@ public class CLBitonicSort {
private int[] resultKeys; private int[] resultKeys;
//Note: logically shared with BitonicSort.cl! //Note: logically shared with BitonicSort.cl!
private static final int LOCAL_SIZE_LIMIT = 16; private int max_work_group_size = 16;
private boolean debug = false; private boolean debug = false;
...@@ -154,7 +155,7 @@ public class CLBitonicSort { ...@@ -154,7 +155,7 @@ public class CLBitonicSort {
// small sorts // small sorts
if(keys.length <= LOCAL_SIZE_LIMIT) if(keys.length <= max_work_group_size)
{ {
CLInfo.checkCLError(clSetKernelArg1p(clBitonicSortLocal, 0, clOutKeys)); CLInfo.checkCLError(clSetKernelArg1p(clBitonicSortLocal, 0, clOutKeys));
CLInfo.checkCLError(clSetKernelArg1p(clBitonicSortLocal, 1, clOutValues)); CLInfo.checkCLError(clSetKernelArg1p(clBitonicSortLocal, 1, clOutValues));
...@@ -162,6 +163,8 @@ public class CLBitonicSort { ...@@ -162,6 +163,8 @@ public class CLBitonicSort {
CLInfo.checkCLError(clSetKernelArg1p(clBitonicSortLocal, 3, clInValues)); CLInfo.checkCLError(clSetKernelArg1p(clBitonicSortLocal, 3, clInValues));
CLInfo.checkCLError(clSetKernelArg1i(clBitonicSortLocal, 4, keys.length)); CLInfo.checkCLError(clSetKernelArg1i(clBitonicSortLocal, 4, keys.length));
CLInfo.checkCLError(clSetKernelArg1i(clBitonicSortLocal, 5, 1)); CLInfo.checkCLError(clSetKernelArg1i(clBitonicSortLocal, 5, 1));
CLInfo.checkCLError(clSetKernelArg(clBitonicSortLocal, 6, keys.length * 4)); // local memory
CLInfo.checkCLError(clSetKernelArg(clBitonicSortLocal, 7, keys.length * 4)); // local memory
clGlobalWorkSize.put(0, keys.length / 2); clGlobalWorkSize.put(0, keys.length / 2);
clLocalWorkSize.put(0, keys.length / 2); clLocalWorkSize.put(0, keys.length / 2);
...@@ -175,20 +178,22 @@ public class CLBitonicSort { ...@@ -175,20 +178,22 @@ public class CLBitonicSort {
CLInfo.checkCLError(clSetKernelArg1p(clBitonicSortLocal1, 1, clOutValues)); CLInfo.checkCLError(clSetKernelArg1p(clBitonicSortLocal1, 1, clOutValues));
CLInfo.checkCLError(clSetKernelArg1p(clBitonicSortLocal1, 2, clInKeys)); CLInfo.checkCLError(clSetKernelArg1p(clBitonicSortLocal1, 2, clInKeys));
CLInfo.checkCLError(clSetKernelArg1p(clBitonicSortLocal1, 3, clInValues)); CLInfo.checkCLError(clSetKernelArg1p(clBitonicSortLocal1, 3, clInValues));
CLInfo.checkCLError(clSetKernelArg(clBitonicSortLocal1, 4, max_work_group_size * 4)); // local memory
CLInfo.checkCLError(clSetKernelArg(clBitonicSortLocal1, 5, max_work_group_size * 4)); // local memory
clGlobalWorkSize = stack.callocPointer(1); clGlobalWorkSize = stack.callocPointer(1);
clLocalWorkSize = stack.callocPointer(1); clLocalWorkSize = stack.callocPointer(1);
clGlobalWorkSize.put(0, keys.length / 2); clGlobalWorkSize.put(0, keys.length / 2);
clLocalWorkSize.put(0, LOCAL_SIZE_LIMIT / 2); clLocalWorkSize.put(0, max_work_group_size / 2);
CLInfo.checkCLError(clEnqueueNDRangeKernel(clQueue, clBitonicSortLocal1, 1, null, clGlobalWorkSize, clLocalWorkSize, null, null)); CLInfo.checkCLError(clEnqueueNDRangeKernel(clQueue, clBitonicSortLocal1, 1, null, clGlobalWorkSize, clLocalWorkSize, null, null));
CLInfo.checkCLError(clFinish(clQueue)); CLInfo.checkCLError(clFinish(clQueue));
for(int size = 2 * LOCAL_SIZE_LIMIT; size <= keys.length; size <<= 1) for(int size = 2 * max_work_group_size; size <= keys.length; size <<= 1)
{ {
for(int stride = size / 2; stride > 0; stride >>= 1) for(int stride = size / 2; stride > 0; stride >>= 1)
{ {
if(stride >= LOCAL_SIZE_LIMIT) if(stride >= max_work_group_size)
{ {
//Launch bitonicMergeGlobal //Launch bitonicMergeGlobal
CLInfo.checkCLError(clSetKernelArg1p(clBitonicMergeGlobal, 0, clOutKeys)); CLInfo.checkCLError(clSetKernelArg1p(clBitonicMergeGlobal, 0, clOutKeys));
...@@ -204,7 +209,7 @@ public class CLBitonicSort { ...@@ -204,7 +209,7 @@ public class CLBitonicSort {
clGlobalWorkSize = stack.callocPointer(1); clGlobalWorkSize = stack.callocPointer(1);
clLocalWorkSize = stack.callocPointer(1); clLocalWorkSize = stack.callocPointer(1);
clGlobalWorkSize.put(0, keys.length / 2); clGlobalWorkSize.put(0, keys.length / 2);
clLocalWorkSize.put(0, LOCAL_SIZE_LIMIT / 4); clLocalWorkSize.put(0, max_work_group_size / 4);
CLInfo.checkCLError(clEnqueueNDRangeKernel(clQueue, clBitonicMergeGlobal, 1, null, clGlobalWorkSize, clLocalWorkSize, null, null)); CLInfo.checkCLError(clEnqueueNDRangeKernel(clQueue, clBitonicMergeGlobal, 1, null, clGlobalWorkSize, clLocalWorkSize, null, null));
CLInfo.checkCLError(clFinish(clQueue)); CLInfo.checkCLError(clFinish(clQueue));
...@@ -221,11 +226,13 @@ public class CLBitonicSort { ...@@ -221,11 +226,13 @@ public class CLBitonicSort {
CLInfo.checkCLError(clSetKernelArg1i(clBitonicMergeLocal, 5, stride)); CLInfo.checkCLError(clSetKernelArg1i(clBitonicMergeLocal, 5, stride));
CLInfo.checkCLError(clSetKernelArg1i(clBitonicMergeLocal, 6, size)); CLInfo.checkCLError(clSetKernelArg1i(clBitonicMergeLocal, 6, size));
CLInfo.checkCLError(clSetKernelArg1i(clBitonicMergeLocal, 7, dir)); CLInfo.checkCLError(clSetKernelArg1i(clBitonicMergeLocal, 7, dir));
CLInfo.checkCLError(clSetKernelArg(clBitonicMergeLocal, 8, max_work_group_size * 4 )); // local memory
CLInfo.checkCLError(clSetKernelArg(clBitonicMergeLocal, 9, max_work_group_size * 4)); // local memory
clGlobalWorkSize = stack.callocPointer(1); clGlobalWorkSize = stack.callocPointer(1);
clLocalWorkSize = stack.callocPointer(1); clLocalWorkSize = stack.callocPointer(1);
clGlobalWorkSize.put(0, keys.length / 2); clGlobalWorkSize.put(0, keys.length / 2);
clLocalWorkSize.put(0, LOCAL_SIZE_LIMIT / 2); clLocalWorkSize.put(0, max_work_group_size / 2);
CLInfo.checkCLError(clEnqueueNDRangeKernel(clQueue, clBitonicMergeLocal, 1, null, clGlobalWorkSize, clLocalWorkSize, null, null)); CLInfo.checkCLError(clEnqueueNDRangeKernel(clQueue, clBitonicMergeLocal, 1, null, clGlobalWorkSize, clLocalWorkSize, null, null));
CLInfo.checkCLError(clFinish(clQueue)); CLInfo.checkCLError(clFinish(clQueue));
...@@ -368,6 +375,9 @@ public class CLBitonicSort { ...@@ -368,6 +375,9 @@ public class CLBitonicSort {
clBitonicMergeLocal = clCreateKernel(clProgram, "bitonicMergeLocal", errcode_ret); clBitonicMergeLocal = clCreateKernel(clProgram, "bitonicMergeLocal", errcode_ret);
CLInfo.checkCLError(errcode_ret); CLInfo.checkCLError(errcode_ret);
PointerBuffer pp = stack.mallocPointer(1);
clGetDeviceInfo(clDevice, CL_DEVICE_MAX_WORK_GROUP_SIZE, pp, null);
max_work_group_size = (int)pp.get(0);
} }
} }
......
...@@ -19,7 +19,6 @@ import java.nio.IntBuffer; ...@@ -19,7 +19,6 @@ import java.nio.IntBuffer;
import java.util.List; import java.util.List;
import static org.lwjgl.opencl.CL10.CL_CONTEXT_PLATFORM; import static org.lwjgl.opencl.CL10.CL_CONTEXT_PLATFORM;
import static org.lwjgl.opencl.CL10.CL_DEVICE_ADDRESS_BITS;
import static org.lwjgl.opencl.CL10.CL_DEVICE_MAX_WORK_GROUP_SIZE; import static org.lwjgl.opencl.CL10.CL_DEVICE_MAX_WORK_GROUP_SIZE;
import static org.lwjgl.opencl.CL10.CL_DEVICE_NAME; import static org.lwjgl.opencl.CL10.CL_DEVICE_NAME;
import static org.lwjgl.opencl.CL10.CL_DEVICE_TYPE_GPU; import static org.lwjgl.opencl.CL10.CL_DEVICE_TYPE_GPU;
...@@ -57,9 +56,12 @@ import static org.lwjgl.system.MemoryUtil.memUTF8; ...@@ -57,9 +56,12 @@ import static org.lwjgl.system.MemoryUtil.memUTF8;
/** /**
* @author Benedikt Zoennchen * @author Benedikt Zoennchen
*
* This class offers the methods to compute an array based linked-cell which contains 2D-coordinates i.e. {@link VPoint}
* using the GPU (see. green-2007 Building the Grid using Sorting).
*/ */
public class CLUniformHashedGrid { public class CLLinkedCell {
private static Logger log = LogManager.getLogger(CLUniformHashedGrid.class); private static Logger log = LogManager.getLogger(CLLinkedCell.class);
// CL ids // CL ids
private long clPlatform; private long clPlatform;
...@@ -122,12 +124,9 @@ public class CLUniformHashedGrid { ...@@ -122,12 +124,9 @@ public class CLUniformHashedGrid {
private int[] resultValues; private int[] resultValues;
private int[] resultKeys; private int[] resultKeys;
//Note: logically shared with BitonicSort.cl! private static final Logger logger = LogManager.getLogger(CLLinkedCell.class);
private static final int LOCAL_SIZE_LIMIT = 16;
private static final Logger logger = LogManager.getLogger(CLUniformHashedGrid.class); private int max_work_group_size;
private long max_work_group_size;
private boolean debug = false; private boolean debug = false;
...@@ -138,7 +137,16 @@ public class CLUniformHashedGrid { ...@@ -138,7 +137,16 @@ public class CLUniformHashedGrid {
NonSeparate NonSeparate
} }
public CLUniformHashedGrid(final int numberOfElements, final VRectangle bound, final double cellSize) throws OpenCLException { /**
* Default constructor.
*
* @param numberOfElements the number of positions contained in the linked cell.
* @param bound the spatial bound of the linked cell.
* @param cellSize the cellSize (in x and y direction) of the linked cell.
*
* @throws OpenCLException
*/
public CLLinkedCell(final int numberOfElements, final VRectangle bound, final double cellSize) throws OpenCLException {
this.numberOfElements = numberOfElements; this.numberOfElements = numberOfElements;
this.iGridSize = new int[]{ (int)Math.ceil(bound.getWidth() / cellSize), (int)Math.ceil(bound.getHeight() / cellSize)}; this.iGridSize = new int[]{ (int)Math.ceil(bound.getWidth() / cellSize), (int)Math.ceil(bound.getHeight() / cellSize)};
this.numberOfGridCells = this.iGridSize[0] * this.iGridSize[1]; this.numberOfGridCells = this.iGridSize[0] * this.iGridSize[1];
...@@ -153,16 +161,53 @@ public class CLUniformHashedGrid { ...@@ -153,16 +161,53 @@ public class CLUniformHashedGrid {
init(); init();
} }
public class GridCells { /**
* The data structure representing the linked cell. The elements of cell i
* between (reorderedPositions[cellStart[i]*2], reorderedPositions[cellStart[i]*2+1])
* and (reorderedPositions[(cellEnds[i]-1)*2], reorderedPositions[(cellEnds[i]-1)*2+1]).
*/
public class LinkedCell {
/**
* the starting index at which the cell starts, i.e. cell i starts at cellStart[i].
*/
public int[] cellStarts; public int[] cellStarts;
public int[] cellEnds;
/**
* the ending index at which the cell starts, i.e. cell i ends at cellStart[i].
*/
public int[] cellEnds;
/**
* the ordered 2D-coordinates.
*/
public float[] reorderedPositions; public float[] reorderedPositions;
/**
* the mapping between the unordered (original) positions and the reorderedPositions,
* i.e. reorderedPositions[i] == positions[indices[i]]
*/
public int[] indices; public int[] indices;
public int[] hashes;
public float[] positions; /**
* the hashes i.e. the cell of the positions, i.e. hashes[i] is the cell of positions[i].
*/
public int[] hashes;
/**
* the original positions in original order.
*/
public float[] positions;
} }
public GridCells calcPositionsInCell(@NotNull final List<VPoint> positions) throws OpenCLException { /**
* Computes the {@link LinkedCell} of the list of positions.
*
* @param positions a list of position contained in {@link CLLinkedCell#bound}.
* @return {@link LinkedCell} which is the linked list in an array based structure.
*
* @throws OpenCLException
*/
public LinkedCell calcLinkedCell(@NotNull final List<VPoint> positions) throws OpenCLException {
assert positions.size() == numberOfElements; assert positions.size() == numberOfElements;
this.positionList = positions; this.positionList = positions;
allocHostMemory(); allocHostMemory();
...@@ -186,7 +231,7 @@ public class CLUniformHashedGrid { ...@@ -186,7 +231,7 @@ public class CLUniformHashedGrid {
int[] aHashes = CLUtils.toIntArray(hashes, numberOfElements); int[] aHashes = CLUtils.toIntArray(hashes, numberOfElements);
float[] aPositions = CLUtils.toFloatArray(this.positions, numberOfElements * 2); float[] aPositions = CLUtils.toFloatArray(this.positions, numberOfElements * 2);
GridCells gridCells = new GridCells(); LinkedCell gridCells = new LinkedCell();
gridCells.cellEnds = aCellEnds; gridCells.cellEnds = aCellEnds;
gridCells.cellStarts = aCellStarts; gridCells.cellStarts = aCellStarts;
gridCells.reorderedPositions = aReorderedPositions; gridCells.reorderedPositions = aReorderedPositions;
...@@ -202,6 +247,14 @@ public class CLUniformHashedGrid { ...@@ -202,6 +247,14 @@ public class CLUniformHashedGrid {
//clFindCellBoundsAndReorder(clCellStarts, clCellEnds, clReorderedPositions, clHashes, clIndices, clPositions, numberOfElements, numberOfGridCells); //clFindCellBoundsAndReorder(clCellStarts, clCellEnds, clReorderedPositions, clHashes, clIndices, clPositions, numberOfElements, numberOfGridCells);
} }
/**
* Computes all the hash values, i.e. cells of each position and sort these hashes and construct a mapping
* of the rearrangement. This method exists to test the bitonic sort algorithm on the GPU.
*
* @param positions the positions which will be hashed.
* @return the sorted hashes.
* @throws OpenCLException
*/
public int[] calcSortedHashes(@NotNull final List<VPoint> positions) throws OpenCLException { public int[] calcSortedHashes(@NotNull final List<VPoint> positions) throws OpenCLException {
assert positions.size() == numberOfElements; assert positions.size() == numberOfElements;
this.positionList = positions; this.positionList = positions;
...@@ -221,6 +274,14 @@ public class CLUniformHashedGrid { ...@@ -221,6 +274,14 @@ public class CLUniformHashedGrid {
//clFindCellBoundsAndReorder(clCellStarts, clCellEnds, clReorderedPositions, clHashes, clIndices, clPositions, numberOfElements, numberOfGridCells); //clFindCellBoundsAndReorder(clCellStarts, clCellEnds, clReorderedPositions, clHashes, clIndices, clPositions, numberOfElements, numberOfGridCells);
} }
/**
* Computes all the hash values, i.e. cells of each position.
* This method exists to test the hash computation on the GPU.
*
* @param positions the positions which will be hashed.
* @return the (unsorted) hashes.
* @throws OpenCLException
*/
public int[] calcHashes(@NotNull final List<VPoint> positions) throws OpenCLException { public int[] calcHashes(@NotNull final List<VPoint> positions) throws OpenCLException {
assert positions.size() == numberOfElements; assert positions.size() == numberOfElements;
this.positionList = positions; this.positionList = positions;
...@@ -239,11 +300,22 @@ public class CLUniformHashedGrid { ...@@ -239,11 +300,22 @@ public class CLUniformHashedGrid {
//clFindCellBoundsAndReorder(clCellStarts, clCellEnds, clReorderedPositions, clHashes, clIndices, clPositions, numberOfElements, numberOfGridCells); //clFindCellBoundsAndReorder(clCellStarts, clCellEnds, clReorderedPositions, clHashes, clIndices, clPositions, numberOfElements, numberOfGridCells);
} }
public int[] getGridSize() { /**
* Returns the gridSizes of the linked cell, i.e. result[0] is the x and
* result[1] the y direction.
*
* @return the gridSizes (2D) stored in an array.
*/
public int[] getGridSize() {
return new int[]{iGridSize[0], iGridSize[1]}; return new int[]{iGridSize[0], iGridSize[1]};
} }
public float getCellSize() { /**
* Returns the gridSize which is equal in x and y direction.
*
* @return the gridSize
*/
public float getCellSize() {
return iCellSize; return iCellSize;
} }
...@@ -299,7 +371,7 @@ public class CLUniformHashedGrid { ...@@ -299,7 +371,7 @@ public class CLUniformHashedGrid {
return resultValues; return resultValues;
} }