Commit 09ccf88f authored by Benedikt Zoennchen's avatar Benedikt Zoennchen

GPU linkedCell algorithm fix, has to be tested on other systems.

parent 42bd1f01
Pipeline #65627 failed with stages
in 46 minutes and 28 seconds
......@@ -95,7 +95,7 @@ inline void ComparatorLocal(
////////////////////////////////////////////////////////////////////////////////
// Save particle grid cell hashes and indices
////////////////////////////////////////////////////////////////////////////////
static uint2 getGridPos(float2 p, __constant float* cellSize, __constant float2* worldOrigin){
inline uint2 getGridPos(float2 p, __constant float* cellSize, __constant float2* worldOrigin){
uint2 gridPos;
float2 wordOr = (*worldOrigin);
gridPos.x = (int)floor((p.x - wordOr.x) / (*cellSize));
......@@ -104,7 +104,7 @@ static uint2 getGridPos(float2 p, __constant float* cellSize, __constant float2*
}
//Calculate address in grid from position (clamping to edges)
static uint getGridHash(uint2 gridPos, __constant uint2* gridSize){
inline uint getGridHash(uint2 gridPos, __constant uint2* gridSize){
//Wrap addressing, assume power-of-two grid dimensions
gridPos.x = gridPos.x & ((*gridSize).x - 1);
gridPos.y = gridPos.y & ((*gridSize).y - 1);
......
......@@ -424,13 +424,33 @@ public class CLLinkedCell {
CLInfo.checkCLError(clSetKernelArg(clFindCellBoundsAndReorder, 6, (Math.min(numberOfElements, max_work_group_size)+1) * 4)); // local memory
CLInfo.checkCLError(clSetKernelArg1i(clFindCellBoundsAndReorder, 7, numberOfElements));
clGlobalWorkSize.put(0, Math.min(max_work_group_size, numberOfElements));
clLocalWorkSize.put(0, Math.min(max_work_group_size, numberOfElements));
int globalWorkSize;
int localWorkSize;
if(numberOfElements <= max_work_group_size){
localWorkSize = numberOfElements;
globalWorkSize = numberOfElements;
}
else {
localWorkSize = max_work_group_size;
globalWorkSize = multipleOf(numberOfElements, localWorkSize);
}
clGlobalWorkSize.put(0, globalWorkSize);
clLocalWorkSize.put(0, localWorkSize);
//TODO: local work size? + check 2^n constrain!
CLInfo.checkCLError(clEnqueueNDRangeKernel(clQueue, clFindCellBoundsAndReorder, 1, null, clGlobalWorkSize, clLocalWorkSize, null, null));
}
}
private int multipleOf(int value, int multiple) {
int result = multiple;
while (result < value) {
result += multiple;
}
return result;
}
private void clBitonicSort(
final long clKeysIn,
final long clValuesIn,
......
......@@ -127,17 +127,20 @@ public class TestCLLinkedList {
}
CLLinkedCell.LinkedCell gridCells = clUniformHashedGrid.calcLinkedCell(positions);
int numberOfCells = clUniformHashedGrid.getGridSize()[0] * clUniformHashedGrid.getGridSize()[1];
int sum = 0;
for(int cell = 0; cell < numberOfCells; cell++) {
int cellStart = gridCells.cellStarts[cell];
int cellEnd = gridCells.cellEnds[cell];
for(int i = cellStart; i < cellEnd; i++) {
sum++;
VPoint point = new VPoint(gridCells.reorderedPositions[i*2], gridCells.reorderedPositions[i*2+1]);
int[] gridPosition = getGridPosition(point, clUniformHashedGrid.getCellSize(), clUniformHashedGrid.getWorldOrign());
int gridHash = getGridHash(gridPosition, clUniformHashedGrid.getGridSize());
assertEquals(gridHash, cell);
}
}
assertEquals(sum, size);
}
@Test
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment