Commit 36865d52 authored by Benedikt Zoennchen's avatar Benedikt Zoennchen
Browse files

documentation (adding comments), the hardware parameters are no longer hardcoded for the sorting.

parent fdc22cfc
......@@ -8,9 +8,6 @@
* is strictly prohibited.
*
*/
#define LOCAL_SIZE_LIMIT 16U
inline void ComparatorPrivate(
uint *keyA,
uint *valA,
......@@ -48,10 +45,11 @@ __kernel void bitonicSortLocal(
__global uint *d_SrcKey,
__global uint *d_SrcVal,
uint arrayLength,
uint dir
uint dir,
__local uint *l_key,
__local uint *l_val
){
__local uint l_key[LOCAL_SIZE_LIMIT];
__local uint l_val[LOCAL_SIZE_LIMIT];
uint LOCAL_SIZE_LIMIT = get_local_size(0) * 2;
//Offset to the beginning of subbatch and load data
d_SrcKey += get_group_id(0) * LOCAL_SIZE_LIMIT + get_local_id(0);
......@@ -108,11 +106,11 @@ __kernel void bitonicSortLocal1(
__global uint *d_DstKey,
__global uint *d_DstVal,
__global uint *d_SrcKey,
__global uint *d_SrcVal
__global uint *d_SrcVal,
__local uint *l_key,
__local uint *l_val
){
__local uint l_key[LOCAL_SIZE_LIMIT];
__local uint l_val[LOCAL_SIZE_LIMIT];
uint LOCAL_SIZE_LIMIT = get_local_size(0) * 2;
//Offset to the beginning of subarray and load data
d_SrcKey += get_group_id(0) * LOCAL_SIZE_LIMIT + get_local_id(0);
d_SrcVal += get_group_id(0) * LOCAL_SIZE_LIMIT + get_local_id(0);
......@@ -206,11 +204,11 @@ __kernel void bitonicMergeLocal(
uint arrayLength,
uint stride,
uint size,
uint dir
uint dir,
__local uint *l_key,
__local uint *l_val
){
__local uint l_key[LOCAL_SIZE_LIMIT];
__local uint l_val[LOCAL_SIZE_LIMIT];
uint LOCAL_SIZE_LIMIT = get_local_size(0) * 2;
d_SrcKey += get_group_id(0) * LOCAL_SIZE_LIMIT + get_local_id(0);
d_SrcVal += get_group_id(0) * LOCAL_SIZE_LIMIT + get_local_id(0);
d_DstKey += get_group_id(0) * LOCAL_SIZE_LIMIT + get_local_id(0);
......
......@@ -63,8 +63,6 @@ typedef struct {
float stepLength;
} pedestrian;
#define LOCAL_SIZE_LIMIT 16U
inline void ComparatorPrivate(
uint *keyA,
uint *valA,
......@@ -329,10 +327,11 @@ __kernel void bitonicSortLocal(
__global uint *d_SrcKey,
__global uint *d_SrcVal,
uint arrayLength,
uint dir
uint dir,
__local uint *l_key,
__local uint *l_val
){
__local uint l_key[LOCAL_SIZE_LIMIT];
__local uint l_val[LOCAL_SIZE_LIMIT];
uint LOCAL_SIZE_LIMIT = get_local_size(0) * 2;
//Offset to the beginning of subbatch and load data
d_SrcKey += get_group_id(0) * LOCAL_SIZE_LIMIT + get_local_id(0);
......@@ -389,11 +388,11 @@ __kernel void bitonicSortLocal1(
__global uint *d_DstKey,
__global uint *d_DstVal,
__global uint *d_SrcKey,
__global uint *d_SrcVal
__global uint *d_SrcVal,
__local uint *l_key,
__local uint *l_val
){
__local uint l_key[LOCAL_SIZE_LIMIT];
__local uint l_val[LOCAL_SIZE_LIMIT];
uint LOCAL_SIZE_LIMIT = get_local_size(0) * 2;
//Offset to the beginning of subarray and load data
d_SrcKey += get_group_id(0) * LOCAL_SIZE_LIMIT + get_local_id(0);
d_SrcVal += get_group_id(0) * LOCAL_SIZE_LIMIT + get_local_id(0);
......@@ -487,11 +486,11 @@ __kernel void bitonicMergeLocal(
uint arrayLength,
uint stride,
uint size,
uint dir
uint dir,
__local uint *l_key,
__local uint *l_val
){
__local uint l_key[LOCAL_SIZE_LIMIT];
__local uint l_val[LOCAL_SIZE_LIMIT];
uint LOCAL_SIZE_LIMIT = get_local_size(0) * 2;
d_SrcKey += get_group_id(0) * LOCAL_SIZE_LIMIT + get_local_id(0);
d_SrcVal += get_group_id(0) * LOCAL_SIZE_LIMIT + get_local_id(0);
d_DstKey += get_group_id(0) * LOCAL_SIZE_LIMIT + get_local_id(0);
......
......@@ -12,17 +12,15 @@ import org.lwjgl.system.MemoryUtil;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.nio.FloatBuffer;
import java.nio.IntBuffer;
import static org.lwjgl.opencl.CL10.CL_CONTEXT_PLATFORM;
import static org.lwjgl.opencl.CL10.CL_DEVICE_MAX_WORK_GROUP_SIZE;
import static org.lwjgl.opencl.CL10.CL_DEVICE_NAME;
import static org.lwjgl.opencl.CL10.CL_DEVICE_TYPE_GPU;
import static org.lwjgl.opencl.CL10.CL_MEM_ALLOC_HOST_PTR;
import static org.lwjgl.opencl.CL10.CL_MEM_COPY_HOST_PTR;
import static org.lwjgl.opencl.CL10.CL_MEM_READ_ONLY;
import static org.lwjgl.opencl.CL10.CL_MEM_READ_WRITE;
import static org.lwjgl.opencl.CL10.CL_MEM_WRITE_ONLY;
import static org.lwjgl.opencl.CL10.CL_PROGRAM_BUILD_STATUS;
import static org.lwjgl.opencl.CL10.CL_SUCCESS;
import static org.lwjgl.opencl.CL10.clBuildProgram;
......@@ -33,15 +31,16 @@ import static org.lwjgl.opencl.CL10.clCreateKernel;
import static org.lwjgl.opencl.CL10.clCreateProgramWithSource;
import static org.lwjgl.opencl.CL10.clEnqueueNDRangeKernel;
import static org.lwjgl.opencl.CL10.clEnqueueReadBuffer;
import static org.lwjgl.opencl.CL10.clEnqueueWriteBuffer;
import static org.lwjgl.opencl.CL10.clFinish;
import static org.lwjgl.opencl.CL10.clGetDeviceIDs;
import static org.lwjgl.opencl.CL10.clGetDeviceInfo;
import static org.lwjgl.opencl.CL10.clGetPlatformIDs;
import static org.lwjgl.opencl.CL10.clReleaseCommandQueue;
import static org.lwjgl.opencl.CL10.clReleaseContext;
import static org.lwjgl.opencl.CL10.clReleaseKernel;
import static org.lwjgl.opencl.CL10.clReleaseMemObject;
import static org.lwjgl.opencl.CL10.clReleaseProgram;
import static org.lwjgl.opencl.CL10.clSetKernelArg;
import static org.lwjgl.opencl.CL10.clSetKernelArg1i;
import static org.lwjgl.opencl.CL10.clSetKernelArg1p;
import static org.lwjgl.system.MemoryStack.stackPush;
......@@ -50,6 +49,8 @@ import static org.lwjgl.system.MemoryUtil.memUTF8;
/**
* @author Benedikt Zoennchen
*
* This class implements the bitonic sort using the GPU via OpenCL.
*/
public class CLBitonicSort {
private static Logger log = LogManager.getLogger(CLBitonicSort.class);
......@@ -93,7 +94,7 @@ public class CLBitonicSort {
private int[] resultKeys;
//Note: logically shared with BitonicSort.cl!
private static final int LOCAL_SIZE_LIMIT = 16;
private int max_work_group_size = 16;
private boolean debug = false;
......@@ -154,7 +155,7 @@ public class CLBitonicSort {
// small sorts
if(keys.length <= LOCAL_SIZE_LIMIT)
if(keys.length <= max_work_group_size)
{
CLInfo.checkCLError(clSetKernelArg1p(clBitonicSortLocal, 0, clOutKeys));
CLInfo.checkCLError(clSetKernelArg1p(clBitonicSortLocal, 1, clOutValues));
......@@ -162,6 +163,8 @@ public class CLBitonicSort {
CLInfo.checkCLError(clSetKernelArg1p(clBitonicSortLocal, 3, clInValues));
CLInfo.checkCLError(clSetKernelArg1i(clBitonicSortLocal, 4, keys.length));
CLInfo.checkCLError(clSetKernelArg1i(clBitonicSortLocal, 5, 1));
CLInfo.checkCLError(clSetKernelArg(clBitonicSortLocal, 6, keys.length * 4)); // local memory
CLInfo.checkCLError(clSetKernelArg(clBitonicSortLocal, 7, keys.length * 4)); // local memory
clGlobalWorkSize.put(0, keys.length / 2);
clLocalWorkSize.put(0, keys.length / 2);
......@@ -175,20 +178,22 @@ public class CLBitonicSort {
CLInfo.checkCLError(clSetKernelArg1p(clBitonicSortLocal1, 1, clOutValues));
CLInfo.checkCLError(clSetKernelArg1p(clBitonicSortLocal1, 2, clInKeys));
CLInfo.checkCLError(clSetKernelArg1p(clBitonicSortLocal1, 3, clInValues));
CLInfo.checkCLError(clSetKernelArg(clBitonicSortLocal1, 4, max_work_group_size * 4)); // local memory
CLInfo.checkCLError(clSetKernelArg(clBitonicSortLocal1, 5, max_work_group_size * 4)); // local memory
clGlobalWorkSize = stack.callocPointer(1);
clLocalWorkSize = stack.callocPointer(1);
clGlobalWorkSize.put(0, keys.length / 2);
clLocalWorkSize.put(0, LOCAL_SIZE_LIMIT / 2);
clLocalWorkSize.put(0, max_work_group_size / 2);
CLInfo.checkCLError(clEnqueueNDRangeKernel(clQueue, clBitonicSortLocal1, 1, null, clGlobalWorkSize, clLocalWorkSize, null, null));
CLInfo.checkCLError(clFinish(clQueue));
for(int size = 2 * LOCAL_SIZE_LIMIT; size <= keys.length; size <<= 1)
for(int size = 2 * max_work_group_size; size <= keys.length; size <<= 1)
{
for(int stride = size / 2; stride > 0; stride >>= 1)
{
if(stride >= LOCAL_SIZE_LIMIT)
if(stride >= max_work_group_size)
{
//Launch bitonicMergeGlobal
CLInfo.checkCLError(clSetKernelArg1p(clBitonicMergeGlobal, 0, clOutKeys));
......@@ -204,7 +209,7 @@ public class CLBitonicSort {
clGlobalWorkSize = stack.callocPointer(1);
clLocalWorkSize = stack.callocPointer(1);
clGlobalWorkSize.put(0, keys.length / 2);
clLocalWorkSize.put(0, LOCAL_SIZE_LIMIT / 4);
clLocalWorkSize.put(0, max_work_group_size / 4);
CLInfo.checkCLError(clEnqueueNDRangeKernel(clQueue, clBitonicMergeGlobal, 1, null, clGlobalWorkSize, clLocalWorkSize, null, null));
CLInfo.checkCLError(clFinish(clQueue));
......@@ -221,11 +226,13 @@ public class CLBitonicSort {
CLInfo.checkCLError(clSetKernelArg1i(clBitonicMergeLocal, 5, stride));
CLInfo.checkCLError(clSetKernelArg1i(clBitonicMergeLocal, 6, size));
CLInfo.checkCLError(clSetKernelArg1i(clBitonicMergeLocal, 7, dir));
CLInfo.checkCLError(clSetKernelArg(clBitonicMergeLocal, 8, max_work_group_size * 4 )); // local memory
CLInfo.checkCLError(clSetKernelArg(clBitonicMergeLocal, 9, max_work_group_size * 4)); // local memory
clGlobalWorkSize = stack.callocPointer(1);
clLocalWorkSize = stack.callocPointer(1);
clGlobalWorkSize.put(0, keys.length / 2);
clLocalWorkSize.put(0, LOCAL_SIZE_LIMIT / 2);
clLocalWorkSize.put(0, max_work_group_size / 2);
CLInfo.checkCLError(clEnqueueNDRangeKernel(clQueue, clBitonicMergeLocal, 1, null, clGlobalWorkSize, clLocalWorkSize, null, null));
CLInfo.checkCLError(clFinish(clQueue));
......@@ -368,6 +375,9 @@ public class CLBitonicSort {
clBitonicMergeLocal = clCreateKernel(clProgram, "bitonicMergeLocal", errcode_ret);
CLInfo.checkCLError(errcode_ret);
PointerBuffer pp = stack.mallocPointer(1);
clGetDeviceInfo(clDevice, CL_DEVICE_MAX_WORK_GROUP_SIZE, pp, null);
max_work_group_size = (int)pp.get(0);
}
}
......
......@@ -19,7 +19,6 @@ import java.nio.IntBuffer;
import java.util.List;
import static org.lwjgl.opencl.CL10.CL_CONTEXT_PLATFORM;
import static org.lwjgl.opencl.CL10.CL_DEVICE_ADDRESS_BITS;
import static org.lwjgl.opencl.CL10.CL_DEVICE_MAX_WORK_GROUP_SIZE;
import static org.lwjgl.opencl.CL10.CL_DEVICE_NAME;
import static org.lwjgl.opencl.CL10.CL_DEVICE_TYPE_GPU;
......@@ -57,9 +56,12 @@ import static org.lwjgl.system.MemoryUtil.memUTF8;
/**
* @author Benedikt Zoennchen
*
* This class offers the methods to compute an array based linked-cell which contains 2D-coordinates i.e. {@link VPoint}
* using the GPU (see. green-2007 Building the Grid using Sorting).
*/
public class CLUniformHashedGrid {
private static Logger log = LogManager.getLogger(CLUniformHashedGrid.class);
public class CLLinkedCell {
private static Logger log = LogManager.getLogger(CLLinkedCell.class);
// CL ids
private long clPlatform;
......@@ -122,12 +124,9 @@ public class CLUniformHashedGrid {
private int[] resultValues;
private int[] resultKeys;
//Note: logically shared with BitonicSort.cl!
private static final int LOCAL_SIZE_LIMIT = 16;
private static final Logger logger = LogManager.getLogger(CLLinkedCell.class);
private static final Logger logger = LogManager.getLogger(CLUniformHashedGrid.class);
private long max_work_group_size;
private int max_work_group_size;
private boolean debug = false;
......@@ -138,7 +137,16 @@ public class CLUniformHashedGrid {
NonSeparate
}
public CLUniformHashedGrid(final int numberOfElements, final VRectangle bound, final double cellSize) throws OpenCLException {
/**
* Default constructor.
*
* @param numberOfElements the number of positions contained in the linked cell.
* @param bound the spatial bound of the linked cell.
* @param cellSize the cellSize (in x and y direction) of the linked cell.
*
* @throws OpenCLException
*/
public CLLinkedCell(final int numberOfElements, final VRectangle bound, final double cellSize) throws OpenCLException {
this.numberOfElements = numberOfElements;
this.iGridSize = new int[]{ (int)Math.ceil(bound.getWidth() / cellSize), (int)Math.ceil(bound.getHeight() / cellSize)};
this.numberOfGridCells = this.iGridSize[0] * this.iGridSize[1];
......@@ -153,16 +161,53 @@ public class CLUniformHashedGrid {
init();
}
public class GridCells {
/**
* The data structure representing the linked cell. The elements of cell i
* between (reorderedPositions[cellStart[i]*2], reorderedPositions[cellStart[i]*2+1])
* and (reorderedPositions[(cellEnds[i]-1)*2], reorderedPositions[(cellEnds[i]-1)*2+1]).
*/
public class LinkedCell {
/**
* the starting index at which the cell starts, i.e. cell i starts at cellStart[i].
*/
public int[] cellStarts;
public int[] cellEnds;
/**
* the ending index at which the cell starts, i.e. cell i ends at cellStart[i].
*/
public int[] cellEnds;
/**
* the ordered 2D-coordinates.
*/
public float[] reorderedPositions;
/**
* the mapping between the unordered (original) positions and the reorderedPositions,
* i.e. reorderedPositions[i] == positions[indices[i]]
*/
public int[] indices;
public int[] hashes;
public float[] positions;
/**
* the hashes i.e. the cell of the positions, i.e. hashes[i] is the cell of positions[i].
*/
public int[] hashes;
/**
* the original positions in original order.
*/
public float[] positions;
}
public GridCells calcPositionsInCell(@NotNull final List<VPoint> positions) throws OpenCLException {
/**
* Computes the {@link LinkedCell} of the list of positions.
*
* @param positions a list of position contained in {@link CLLinkedCell#bound}.
* @return {@link LinkedCell} which is the linked list in an array based structure.
*
* @throws OpenCLException
*/
public LinkedCell calcLinkedCell(@NotNull final List<VPoint> positions) throws OpenCLException {
assert positions.size() == numberOfElements;
this.positionList = positions;
allocHostMemory();
......@@ -186,7 +231,7 @@ public class CLUniformHashedGrid {
int[] aHashes = CLUtils.toIntArray(hashes, numberOfElements);
float[] aPositions = CLUtils.toFloatArray(this.positions, numberOfElements * 2);
GridCells gridCells = new GridCells();
LinkedCell gridCells = new LinkedCell();
gridCells.cellEnds = aCellEnds;
gridCells.cellStarts = aCellStarts;
gridCells.reorderedPositions = aReorderedPositions;
......@@ -202,6 +247,14 @@ public class CLUniformHashedGrid {
//clFindCellBoundsAndReorder(clCellStarts, clCellEnds, clReorderedPositions, clHashes, clIndices, clPositions, numberOfElements, numberOfGridCells);
}
/**
* Computes all the hash values, i.e. cells of each position and sort these hashes and construct a mapping
* of the rearrangement. This method exists to test the bitonic sort algorithm on the GPU.
*
* @param positions the positions which will be hashed.
* @return the sorted hashes.
* @throws OpenCLException
*/
public int[] calcSortedHashes(@NotNull final List<VPoint> positions) throws OpenCLException {
assert positions.size() == numberOfElements;
this.positionList = positions;
......@@ -221,6 +274,14 @@ public class CLUniformHashedGrid {
//clFindCellBoundsAndReorder(clCellStarts, clCellEnds, clReorderedPositions, clHashes, clIndices, clPositions, numberOfElements, numberOfGridCells);
}
/**
* Computes all the hash values, i.e. cells of each position.
* This method exists to test the hash computation on the GPU.
*
* @param positions the positions which will be hashed.
* @return the (unsorted) hashes.
* @throws OpenCLException
*/
public int[] calcHashes(@NotNull final List<VPoint> positions) throws OpenCLException {
assert positions.size() == numberOfElements;
this.positionList = positions;
......@@ -239,11 +300,22 @@ public class CLUniformHashedGrid {
//clFindCellBoundsAndReorder(clCellStarts, clCellEnds, clReorderedPositions, clHashes, clIndices, clPositions, numberOfElements, numberOfGridCells);
}
public int[] getGridSize() {
/**
* Returns the gridSizes of the linked cell, i.e. result[0] is the x and
* result[1] the y direction.
*
* @return the gridSizes (2D) stored in an array.
*/
public int[] getGridSize() {
return new int[]{iGridSize[0], iGridSize[1]};
}
public float getCellSize() {
/**
* Returns the gridSize which is equal in x and y direction.
*
* @return the gridSize
*/
public float getCellSize() {
return iCellSize;
}
......@@ -299,7 +371,7 @@ public class CLUniformHashedGrid {
return resultValues;
}
public void init() throws OpenCLException {
private void init() throws OpenCLException {
initCallbacks();
initCL();
buildProgram();
......@@ -373,7 +445,7 @@ public class CLUniformHashedGrid {
IntBuffer errcode_ret = stack.callocInt(1);
// small sorts
if (numberOfElements <= LOCAL_SIZE_LIMIT) {
if (numberOfElements <= max_work_group_size) {
CLInfo.checkCLError(clSetKernelArg1p(clBitonicSortLocal, 0, clKeysOut));
CLInfo.checkCLError(clSetKernelArg1p(clBitonicSortLocal, 1, clValuesOut));
CLInfo.checkCLError(clSetKernelArg1p(clBitonicSortLocal, 2, clKeysIn));
......@@ -381,6 +453,8 @@ public class CLUniformHashedGrid {
CLInfo.checkCLError(clSetKernelArg1i(clBitonicSortLocal, 4, numberOfElements));
//TODO: check the hard coded 1, and the waiting of the queue
CLInfo.checkCLError(clSetKernelArg1i(clBitonicSortLocal, 5, 1));
CLInfo.checkCLError(clSetKernelArg(clBitonicSortLocal, 6, keys.length * 4)); // local memory
CLInfo.checkCLError(clSetKernelArg(clBitonicSortLocal, 7, keys.length * 4)); // local memory
clGlobalWorkSize.put(0, numberOfElements / 2);
clLocalWorkSize.put(0, numberOfElements / 2);
......@@ -393,18 +467,20 @@ public class CLUniformHashedGrid {
CLInfo.checkCLError(clSetKernelArg1p(clBitonicSortLocal1, 1, clValuesOut));
CLInfo.checkCLError(clSetKernelArg1p(clBitonicSortLocal1, 2, clKeysIn));
CLInfo.checkCLError(clSetKernelArg1p(clBitonicSortLocal1, 3, clValuesIn));
CLInfo.checkCLError(clSetKernelArg(clBitonicSortLocal1, 4, max_work_group_size * 4)); // local memory
CLInfo.checkCLError(clSetKernelArg(clBitonicSortLocal1, 5, max_work_group_size * 4)); // local memory
clGlobalWorkSize = stack.callocPointer(1);
clLocalWorkSize = stack.callocPointer(1);
clGlobalWorkSize.put(0, numberOfElements / 2);
clLocalWorkSize.put(0, LOCAL_SIZE_LIMIT / 2);
clLocalWorkSize.put(0, max_work_group_size / 2);
CLInfo.checkCLError(clEnqueueNDRangeKernel(clQueue, clBitonicSortLocal1, 1, null, clGlobalWorkSize, clLocalWorkSize, null, null));
CLInfo.checkCLError(clFinish(clQueue));
for (int size = 2 * LOCAL_SIZE_LIMIT; size <= numberOfElements; size <<= 1) {
for (int size = 2 * max_work_group_size; size <= numberOfElements; size <<= 1) {
for (int stride = size / 2; stride > 0; stride >>= 1) {
if (stride >= LOCAL_SIZE_LIMIT) {
if (stride >= max_work_group_size) {
//Launch bitonicMergeGlobal
CLInfo.checkCLError(clSetKernelArg1p(clBitonicMergeGlobal, 0, clKeysOut));
CLInfo.checkCLError(clSetKernelArg1p(clBitonicMergeGlobal, 1, clValuesOut));
......@@ -419,7 +495,7 @@ public class CLUniformHashedGrid {
clGlobalWorkSize = stack.callocPointer(1);
clLocalWorkSize = stack.callocPointer(1);
clGlobalWorkSize.put(0, numberOfElements / 2);
clLocalWorkSize.put(0, LOCAL_SIZE_LIMIT / 4);
clLocalWorkSize.put(0, max_work_group_size / 4);
CLInfo.checkCLError(clEnqueueNDRangeKernel(clQueue, clBitonicMergeGlobal, 1, null, clGlobalWorkSize, clLocalWorkSize, null, null));
CLInfo.checkCLError(clFinish(clQueue));
......@@ -434,11 +510,13 @@ public class CLUniformHashedGrid {
CLInfo.checkCLError(clSetKernelArg1i(clBitonicMergeLocal, 5, stride));
CLInfo.checkCLError(clSetKernelArg1i(clBitonicMergeLocal, 6, size));
CLInfo.checkCLError(clSetKernelArg1i(clBitonicMergeLocal, 7, dir));
CLInfo.checkCLError(clSetKernelArg(clBitonicMergeLocal, 8, max_work_group_size * 4)); // local memory
CLInfo.checkCLError(clSetKernelArg(clBitonicMergeLocal, 9, max_work_group_size * 4)); // local memory
clGlobalWorkSize = stack.callocPointer(1);
clLocalWorkSize = stack.callocPointer(1);
clGlobalWorkSize.put(0, numberOfElements / 2);
clLocalWorkSize.put(0, LOCAL_SIZE_LIMIT / 2);
clLocalWorkSize.put(0, max_work_group_size / 2);
CLInfo.checkCLError(clEnqueueNDRangeKernel(clQueue, clBitonicMergeLocal, 1, null, clGlobalWorkSize, clLocalWorkSize, null, null));
CLInfo.checkCLError(clFinish(clQueue));
......@@ -594,7 +672,7 @@ public class CLUniformHashedGrid {
PointerBuffer pp = stack.mallocPointer(1);
clGetDeviceInfo(clDevice, CL_DEVICE_MAX_WORK_GROUP_SIZE, pp, null);
max_work_group_size = pp.get(0);
max_work_group_size = (int)pp.get(0);
logger.info("CL_DEVICE_MAX_WORK_GROUP_SIZE = " + max_work_group_size);
}
......
......@@ -28,8 +28,8 @@ public class TestBitonicSort {
@Test
public void testLocalSort() throws IOException, OpenCLException {
int[] keys = randomArray(256);
int[] values = randomArray(256);
int[] keys = randomArray(32);
int[] values = randomArray(32);
CLBitonicSort clBitonicSort = new CLBitonicSort();
clBitonicSort.sort(keys, values);
......
......@@ -6,7 +6,7 @@ import org.junit.Before;
import org.junit.Test;
import org.vadere.util.geometry.shapes.VPoint;
import org.vadere.util.geometry.shapes.VRectangle;
import org.vadere.util.opencl.CLUniformHashedGrid;
import org.vadere.util.opencl.CLLinkedCell;
import org.vadere.util.opencl.OpenCLException;
import java.io.IOException;
......@@ -19,7 +19,7 @@ import static org.junit.Assert.assertEquals;
/**
* @author Benedikt Zoennchen
*/
public class TestCellGridSort {
public class TestCLLinkedList {
private static Logger logger = LogManager.getLogger(TestConvolution.class);
......@@ -30,7 +30,7 @@ public class TestCellGridSort {
@Test
public void testCalcHash() throws IOException, OpenCLException {
CLUniformHashedGrid clUniformHashedGrid = new CLUniformHashedGrid(1024, new VRectangle(0, 0, 10, 10), 1);
CLLinkedCell clUniformHashedGrid = new CLLinkedCell(1024, new VRectangle(0, 0, 10, 10), 1);
ArrayList<VPoint> positions = new ArrayList<>();
for(int i = 0; i < 1024; i++) {
positions.add(new VPoint(random.nextDouble() * 10,random.nextDouble() * 10));
......@@ -49,7 +49,7 @@ public class TestCellGridSort {
@Test
public void testCalcAndSortHash() throws IOException, OpenCLException {
CLUniformHashedGrid clUniformHashedGrid = new CLUniformHashedGrid(1024, new VRectangle(0, 0, 10, 10), 1);
CLLinkedCell clUniformHashedGrid = new CLLinkedCell(1024, new VRectangle(0, 0, 10, 10), 1);
ArrayList<VPoint> positions = new ArrayList<>();
for(int i = 0; i < 1024; i++) {
positions.add(new VPoint(random.nextDouble() * 10,random.nextDouble() * 10));
......@@ -72,12 +72,12 @@ public class TestCellGridSort {
@Test
public void testGridCell() throws IOException, OpenCLException {
CLUniformHashedGrid clUniformHashedGrid = new CLUniformHashedGrid(1024, new VRectangle(0, 0, 10, 10), 1);
CLLinkedCell clUniformHashedGrid = new CLLinkedCell(1024, new VRectangle(0, 0, 10, 10), 1);
ArrayList<VPoint> positions = new ArrayList<>();
for(int i = 0; i < 1024; i++) {
positions.add(new VPoint(random.nextDouble() * 10,random.nextDouble() * 10));
}
CLUniformHashedGrid.GridCells gridCells = clUniformHashedGrid.calcPositionsInCell(positions);
CLLinkedCell.LinkedCell gridCells = clUniformHashedGrid.calcLinkedCell(positions);
int numberOfCells = clUniformHashedGrid.getGridSize()[0] * clUniformHashedGrid.getGridSize()[1];
for(int cell = 0; cell < numberOfCells; cell++) {
int cellStart = gridCells.cellStarts[cell];
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment