Commit ab95ee8a authored by schultezub's avatar schultezub
Browse files

Furhter updates in KissCL:

 * CLRuntime is a resource manager for programs and command queues
 * added Image and SharedTexture
 * introduced WeaklyTypedPointer::getClChannelOrder/Type()

Started implementing proof of concept processor CLRaycaster

git-svn-id: https://camplinux.in.tum.de/svn/campvis/trunk@245 bb408c1c-ae56-11e1-83d9-df6b3e0c105e
parent 021c94b9
......@@ -96,7 +96,7 @@ namespace TUMVis {
tgt::GLContextScopedLock lock(_localContext->getContext());
tgt::initGL(featureset);
ShdrMgr.setGlobalHeader("#version 130\n");
//ShdrMgr.setGlobalHeader("#version 130\n");
LGL_ERROR;
// ensure matching OpenGL specs
......@@ -113,10 +113,15 @@ namespace TUMVis {
if (_argc > 0) {
// ugly hack
std::string programPath(_argv[0]);
programPath = tgt::FileSystem::parentDir(tgt::FileSystem::parentDir(tgt::FileSystem::parentDir(programPath)));
ShdrMgr.addPath(programPath);
ShdrMgr.addPath(programPath + "/core/glsl");
std::string basePath(_argv[0]);
basePath = tgt::FileSystem::parentDir(tgt::FileSystem::parentDir(tgt::FileSystem::parentDir(basePath)));
ShdrMgr.addPath(basePath);
ShdrMgr.addPath(basePath + "/core/glsl");
if (_useOpenCL) {
CLRtm.addPath(basePath);
CLRtm.addPath(basePath + "/core/cl");
}
}
// init pipeline first
......
......@@ -45,20 +45,20 @@ namespace TUMVis {
size_t WeaklyTypedPointer::numBytes(BaseType pt, size_t numChannels) {
switch (pt) {
case WeaklyTypedPointer::UINT8:
case WeaklyTypedPointer::INT8:
return 1 * numChannels;
case WeaklyTypedPointer::UINT16:
case WeaklyTypedPointer::INT16:
return 2 * numChannels;
case WeaklyTypedPointer::UINT32:
case WeaklyTypedPointer::INT32:
return 4 * numChannels;
case WeaklyTypedPointer::FLOAT:
return sizeof(float) * numChannels;
default:
tgtAssert(false, "Should not reach this - called WeaklyTypedPointer::numBytes() with wrong argument!");
return 1;
case WeaklyTypedPointer::UINT8:
case WeaklyTypedPointer::INT8:
return 1 * numChannels;
case WeaklyTypedPointer::UINT16:
case WeaklyTypedPointer::INT16:
return 2 * numChannels;
case WeaklyTypedPointer::UINT32:
case WeaklyTypedPointer::INT32:
return 4 * numChannels;
case WeaklyTypedPointer::FLOAT:
return sizeof(float) * numChannels;
default:
tgtAssert(false, "Should not reach this - called WeaklyTypedPointer::numBytes() with wrong argument!");
return 1;
}
};
......@@ -227,4 +227,40 @@ namespace TUMVis {
return (_baseType == rhs._baseType) && (_numChannels == rhs._numChannels) && (_pointer == rhs._pointer);
}
cl_channel_type WeaklyTypedPointer::getClChannelType() const {
switch (_baseType) {
case WeaklyTypedPointer::UINT8:
return CL_UNSIGNED_INT8;
case WeaklyTypedPointer::INT8:
return CL_SIGNED_INT8;
case WeaklyTypedPointer::UINT16:
return CL_UNSIGNED_INT16;
case WeaklyTypedPointer::INT16:
return CL_SIGNED_INT16;
case WeaklyTypedPointer::UINT32:
return CL_UNSIGNED_INT32;
case WeaklyTypedPointer::INT32:
return CL_SIGNED_INT32;
case WeaklyTypedPointer::FLOAT:
return CL_FLOAT;
default:
tgtAssert(false, "Should not reach this - wrong base data type!");
return CL_SIGNED_INT8;
}
}
cl_channel_order WeaklyTypedPointer::getClChannelOrder() const {
switch (_numChannels) {
case 1:
return CL_A;
case 2:
return CL_RA;
case 4:
return CL_RGBA;
default:
LERROR("Unsupported number of channels.");
return CL_A;
}
}
}
......@@ -35,6 +35,8 @@
#include "tgt/tgt_gl.h"
#include "tgt/types.h"
#include "kisscl/kisscl.h"
namespace TUMVis {
/**
......@@ -115,6 +117,10 @@ namespace TUMVis {
GLint getGlInternalFormat() const;
cl_channel_type getClChannelType() const;
cl_channel_order getClChannelOrder() const;
BaseType _baseType; ///< Base data type of the pointer
size_t _numChannels; ///< Number of channels, must be in [1, 4]!
void* _pointer; ///< Pointer to the data
......
......@@ -25,7 +25,9 @@ namespace kisscl {
const std::string CLRuntime::loggerCat_ = "kisscl.CLRuntime";
CLRuntime::CLRuntime() {
CLRuntime::CLRuntime()
: tgt::ResourceManager<Program>(false)
{
initPlatforms();
}
......@@ -88,4 +90,57 @@ namespace kisscl {
return 0;
}
Program* CLRuntime::loadProgram(Context* context, const std::string& filename) {
std::vector<std::string> v;
v.push_back(filename);
return loadProgram(context, v);
}
Program* CLRuntime::loadProgram(Context* context, const std::vector<std::string>& filenames) {
tgtAssert(context != 0, "Context must not be 0.");
// complete paths and build unique identifier for resource manager
std::string concatenatedFilenamens;
std::vector<std::string> completedFilenmaes;
completedFilenmaes.reserve(filenames.size());
for (std::vector<std::string>::const_iterator it = filenames.begin(); it != filenames.end(); ++it) {
completedFilenmaes.push_back(completePath(*it));
concatenatedFilenamens.append(*it + "#");
}
Program* toReturn = new Program(context);
toReturn->setHeader(_globalHeader);
toReturn->loadFromFiles(completedFilenmaes);
reg(toReturn, concatenatedFilenamens);
return toReturn;
}
const std::string& CLRuntime::getGlobalHeader() const {
return _globalHeader;
}
void CLRuntime::setGlobalHeader(const std::string& header) {
_globalHeader = header;
}
CommandQueue* CLRuntime::getCommandQueue(Context* context, cl_command_queue_properties properties /*= 0*/) {
return getCommandQueue(context, context->getDevices().front());
}
CommandQueue* CLRuntime::getCommandQueue(Context* context, Device* device, cl_command_queue_properties properties /*= 0*/) {
std::pair<Context*, Device*> p = std::make_pair(context, device);
auto lb = _commandQueues.lower_bound(p);
if (lb == _commandQueues.end() || lb ->first != p) {
CommandQueue* queue = new CommandQueue(context, device, properties);
_commandQueues.insert(lb, std::make_pair(p, queue));
return queue;
}
else {
return lb->second;
}
}
}
\ No newline at end of file
......@@ -26,18 +26,26 @@
#include "kisscl/kisscl.h"
#include "kisscl/context.h"
#include "kisscl/commandqueue.h"
#include "kisscl/device.h"
#include "kisscl/platform.h"
#include "kisscl/program.h"
#include <map>
#include <string>
#include <utility>
namespace kisscl {
/**
* Singleton class for managing the OpenCL runtime.
* Gathers all available OpenCL platforms/devices and offers methods to create OpenCL contexts on them.
* Gathers all available OpenCL platforms/devices, offers methods to create OpenCL contexts on them and
* manages the command queue for each context-device pair.Furthermore, CLRuntime acts as resource manager
* for OpenCL programs. Caching of them is currently disabled.
*
* \see tgt::Singleton, tgt::ResourceManager
*/
class CLRuntime : public tgt::Singleton<CLRuntime> {
class CLRuntime : public tgt::Singleton<CLRuntime>, public tgt::ResourceManager<Program> {
friend class tgt::Singleton<CLRuntime>;
public:
......@@ -66,6 +74,53 @@ namespace kisscl {
*/
const std::vector<Device*> getGPUDevices() const;
/**
* Creates a new OpenCL program from the given context from the file specified by \a filename.
* \param context OpenCL context the program shall live in.
* \param filename Filename of the source file.
* \return
*/
Program* loadProgram(Context* context, const std::string& filename);
/**
* Creates a new OpenCL program from the given context from the files specified by \a filename.
* \param context OpenCL context the program shall live in.
* \param filenames List of the filenames of the source files.
* \return
*/
Program* loadProgram(Context* context, const std::vector<std::string>& filenames);
/**
* Gets the global header for OpenCL programs.
* \return _header
*/
const std::string& getGlobalHeader() const;
/**
* Sets the global header for OpenCL programs to \a header.
* \param header The new global header for OpenCL programs.
*/
void setGlobalHeader(const std::string& header);
/**
* Gets the command queue for the given OpenCL context and its first device.
* If no such command queue has yet been requested, a new one will be created.
* \param context OpenCL context to create the command queue for.
* \param properties Command queue properties bitfield.
* \return The command queue for the given context and its first device.
*/
CommandQueue* getCommandQueue(Context* context, cl_command_queue_properties properties = 0);
/**
* Gets the command queue for the given OpenCL context-device pair.
* If no such command queue has yet been requested, a new one will be created.
* \param context OpenCL context to create the command queue for.
* \param device OpenCL device to create the command queue for.
* \param properties Command queue properties bitfield.
* \return The command queue for the given context-device pair.
*/
CommandQueue* getCommandQueue(Context* context, Device* device, cl_command_queue_properties properties = 0);
private:
/**
* Gathers and inits all available platforms and their devices.
......@@ -77,6 +132,10 @@ namespace kisscl {
std::vector<Device*> _cpuDevices; ///< List of all OpenCL CPU devices (just a shortcut to the corresponding devices in _platforms)
std::vector<Device*> _gpuDevices; ///< List of all OpenCL GPU devices (just a shortcut to the corresponding devices in _platforms)
std::map< std::pair<Context*, Device*>, CommandQueue*> _commandQueues;
std::string _globalHeader; ///< The global header for OpenCL programs.
/**
* Private constructor for singleton pattern.
*/
......@@ -85,7 +144,7 @@ namespace kisscl {
static const std::string loggerCat_;
};
#define CLMgr tgt::Singleton<kisscl::CLRuntime>::getRef()
#define CLRtm tgt::Singleton<kisscl::CLRuntime>::getRef()
}
......
......@@ -95,8 +95,8 @@ namespace kisscl {
Event CommandQueue::enqueueKernel(const Kernel* kernel, size_t globalWorkSize, size_t localWorkSize /*= 0*/, size_t offset /*= 0*/, const EventList& eventsToWaitFor /*= EventList()*/) {
tgtAssert(kernel != 0, "Kernel must not be 0.");
tgtAssert(localWorkSize != 0 && localWorkSize > globalWorkSize, "Global work size must be greater than local work size.");
tgtAssert(localWorkSize != 0 && (globalWorkSize % localWorkSize != 0), "Global work size must be a multiple than local work size.");
tgtAssert(localWorkSize == 0 || localWorkSize < globalWorkSize, "Global work size must be greater than local work size.");
tgtAssert(localWorkSize == 0 || (globalWorkSize % localWorkSize == 0), "Global work size must be a multiple than local work size.");
cl_event e;
LCL_ERROR(clEnqueueNDRangeKernel(
......@@ -114,9 +114,9 @@ namespace kisscl {
Event CommandQueue::enqueueKernel(const Kernel* kernel, tgt::svec2 globalWorkSize, tgt::svec2 localWorkSize /*= tgt::svec2::zero*/, tgt::svec2 offset /*= tgt::svec2::zero*/, const EventList& eventsToWaitFor /*= EventList()*/) {
tgtAssert(kernel != 0, "Kernel must not be 0.");
tgtAssert(localWorkSize != tgt::svec2::zero && tgt::hor(tgt::greaterThan(localWorkSize, globalWorkSize)), "Global work size must be greater than local work size.");
tgtAssert(localWorkSize != tgt::svec2::zero && (globalWorkSize.x % localWorkSize.x != 0), "Global work size must be a multiple than local work size.");
tgtAssert(localWorkSize != tgt::svec2::zero && (globalWorkSize.y % localWorkSize.y != 0), "Global work size must be a multiple than local work size.");
tgtAssert(localWorkSize == tgt::svec2::zero || tgt::hor(tgt::lessThan(localWorkSize, globalWorkSize)), "Global work size must be greater than local work size.");
tgtAssert(localWorkSize == tgt::svec2::zero || (globalWorkSize.x % localWorkSize.x == 0), "Global work size must be a multiple than local work size.");
tgtAssert(localWorkSize == tgt::svec2::zero || (globalWorkSize.y % localWorkSize.y == 0), "Global work size must be a multiple than local work size.");
cl_event e;
LCL_ERROR(clEnqueueNDRangeKernel(
......@@ -134,10 +134,10 @@ namespace kisscl {
Event CommandQueue::enqueueKernel(const Kernel* kernel, tgt::svec3 globalWorkSize, tgt::svec3 localWorkSize /*= tgt::svec3::zero*/, tgt::svec3 offset /*= tgt::svec3::zero*/, const EventList& eventsToWaitFor /*= EventList()*/) {
tgtAssert(kernel != 0, "Kernel must not be 0.");
tgtAssert(localWorkSize != tgt::svec3::zero && tgt::hor(tgt::greaterThan(localWorkSize, globalWorkSize)), "Global work size must be greater than local work size.");
tgtAssert(localWorkSize != tgt::svec3::zero && (globalWorkSize.x % localWorkSize.x != 0), "Global work size must be a multiple than local work size.");
tgtAssert(localWorkSize != tgt::svec3::zero && (globalWorkSize.y % localWorkSize.y != 0), "Global work size must be a multiple than local work size.");
tgtAssert(localWorkSize != tgt::svec3::zero && (globalWorkSize.z % localWorkSize.z != 0), "Global work size must be a multiple than local work size.");
tgtAssert(localWorkSize == tgt::svec3::zero || tgt::hor(tgt::lessThan (localWorkSize, globalWorkSize)), "Global work size must be greater than local work size.");
tgtAssert(localWorkSize == tgt::svec3::zero || (globalWorkSize.x % localWorkSize.x == 0), "Global work size must be a multiple than local work size.");
tgtAssert(localWorkSize == tgt::svec3::zero || (globalWorkSize.y % localWorkSize.y == 0), "Global work size must be a multiple than local work size.");
tgtAssert(localWorkSize == tgt::svec3::zero || (globalWorkSize.z % localWorkSize.z == 0), "Global work size must be a multiple than local work size.");
cl_event e;
LCL_ERROR(clEnqueueNDRangeKernel(
......@@ -189,6 +189,20 @@ namespace kisscl {
return Event(e);
}
Event CommandQueue::enqueueAcquireGLObject(const SharedTexture* texture, const EventList& eventsToWaitFor /*= EventList()*/) {
cl_event e;
cl_mem mem = texture->getId();
LCL_ERROR(clEnqueueAcquireGLObjects(_id, 1, &mem, eventsToWaitFor._size, eventsToWaitFor._events, &e));
return Event(e);
}
Event CommandQueue::enqueueReleaseGLObject(const SharedTexture* texture, const EventList& eventsToWaitFor /*= EventList()*/) {
cl_event e;
cl_mem mem = texture->getId();
LCL_ERROR(clEnqueueReleaseGLObjects(_id, 1, &mem, eventsToWaitFor._size, eventsToWaitFor._events, &e));
return Event(e);
}
}
......@@ -32,6 +32,7 @@ namespace kisscl {
class Context;
class Device;
class Kernel;
class SharedTexture;
/**
* Wrapper class for an OpenCL command queue.
......@@ -182,6 +183,10 @@ namespace kisscl {
// TODO: buffers, images, etc.
Event enqueueAcquireGLObject(const SharedTexture* texture, const EventList& eventsToWaitFor = EventList());
Event enqueueReleaseGLObject(const SharedTexture* texture, const EventList& eventsToWaitFor = EventList());
private:
Context* _context; ///< OpenCL context for which the command queue is created.
......
......@@ -24,6 +24,15 @@
#include "kisscl/device.h"
#include "kisscl/platform.h"
#ifdef WIN32
#include <windows.h>
#endif
#if !(defined(WIN32) || defined(__APPLE__))
#include <GL/glx.h>
#define CL_GLX_DISPLAY_KHR 0x200A
#endif
namespace kisscl {
void CL_API_CALL clContextCallback(const char* errinfo, const void* private_info, size_t cb, void* user_data) {
......
......@@ -29,7 +29,6 @@ namespace kisscl {
Event::Event(cl_event id)
: CLWrapper<cl_event>(id)
{
tgtAssert(_id != 0, "Event ID must not be 0.");
}
......
......@@ -36,4 +36,9 @@ namespace kisscl {
return getInfo<cl_uint>(CL_KERNEL_NUM_ARGS);
}
void Kernel::setMemoryArgument(cl_uint index, const MemoryObject* memoryObject) {
cl_mem mem = memoryObject->getId();
LCL_ERROR(clSetKernelArg(_id, index, sizeof(cl_mem), &mem));
}
}
......@@ -22,7 +22,7 @@
#include "tgt/vector.h"
#include "kisscl/kisscl.h"
#include "kisscl/memory.h"
namespace kisscl {
......@@ -112,6 +112,12 @@ namespace kisscl {
void setArgument(cl_uint index, const T& data);
// TODO: samplers, buffers, etc.
/**
* Sets the kernel argument with index \a index to the given MemoryObject
* \param index Argument index
* \param memoryObject Memory object to set as kernel argument
*/
void setMemoryArgument(cl_uint index, const MemoryObject* memoryObject);
private:
/**
......@@ -126,7 +132,7 @@ namespace kisscl {
template<class T>
void kisscl::Kernel::setArgument(cl_uint index, const T& data) {
return LCL_ERROR(clSetKernelArg(id_, index, KernelArgumentTypeTraits<T>::size(), KernelArgumentTypeTraits<T>::pointer(data)));
LCL_ERROR(clSetKernelArg(_id, index, KernelArgumentTypeTraits<T>::size(), KernelArgumentTypeTraits<T>::pointer(data)));
}
}
......
......@@ -21,7 +21,12 @@
#define KISSCL_H__
#include "tgt/tgt_gl.h"
#include "CL/cl.hpp"
#if defined(__APPLE__) || defined(__MACOSX)
#include <OpenCL/opencl.h>
#else
#include <CL/opencl.h>
#endif // !__APPLE__
#include <string>
......
......@@ -21,6 +21,7 @@
#include "tgt/assert.h"
#include "tgt/logmanager.h"
#include "tgt/texture.h"
#include "kisscl/context.h"
namespace kisscl {
......@@ -61,4 +62,177 @@ namespace kisscl {
return _size;
}
// ================================================================================================
Image::Image(const Context* context, cl_mem_flags flags, const tgt::Texture* texture)
: MemoryObject(context)
{
tgtAssert(texture != 0, "Texture must not be 0");
if (!texture->getPixelData()) {
LERRORC("kisscl.Image", "Invalid pixel data in given texture.");
return;
}
cl_int err;
const tgt::ivec3& dims = texture->getDimensions();
GLenum texDataType = texture->getDataType();
GLint texFormat = texture->getFormat();
cl_image_format imageFormat;
switch(texDataType) {
case GL_BYTE:
imageFormat.image_channel_data_type = CL_SNORM_INT8;
break;
case GL_UNSIGNED_BYTE:
imageFormat.image_channel_data_type = CL_UNORM_INT8;
break;
case GL_SHORT:
imageFormat.image_channel_data_type = CL_SNORM_INT16;
break;
case GL_UNSIGNED_SHORT:
imageFormat.image_channel_data_type = CL_UNORM_INT16;
break;
case GL_INT:
imageFormat.image_channel_data_type = CL_SIGNED_INT32;
break;
case GL_UNSIGNED_INT:
imageFormat.image_channel_data_type = CL_UNSIGNED_INT32;
break;
case GL_FLOAT:
imageFormat.image_channel_data_type = CL_FLOAT;
break;
}
switch(texFormat) {
case GL_ALPHA:
case GL_LUMINANCE:
imageFormat.image_channel_order = CL_A;
break;
case GL_LUMINANCE_ALPHA:
imageFormat.image_channel_order = CL_RA;
break;
case GL_RGBA:
imageFormat.image_channel_order = CL_RGBA;
break;
}
#if defined(CL_VERSION_1_2)
cl_image_desc desc;
if (dims.y == 1 && dims.z == 1)
desc.image_type = CL_MEM_OBJECT_IMAGE1D;
else if (dims.z == 1)
desc.image_type = CL_MEM_OBJECT_IMAGE2D;
else
desc.image_type = CL_MEM_OBJECT_IMAGE3D;
desc.image_width = dims.x;
desc.image_height = dims.y;
desc.image_depth = dims.z
desc.image_row_pitch = 0;
desc.image_slice_pitch = 0;
desc.num_mip_levels = 0;
desc.num_samples = 0;
desc.buffer = 0;
object_ = clCreateImage(context->getId(), flags, &imageFormat, &desc, texture->getPixelData(), &err);
#else // defined(CL_VERSION_1_2)
if (dims.z == 1) {
// 1D or 2D image
_id = clCreateImage2D(context->getId(), flags, &imageFormat, dims.x, dims.y, 0, const_cast<GLubyte*>(texture->getPixelData()), &err);
}
else {
// 3D image
_id = clCreateImage3D(context->getId(), flags, &imageFormat, dims.x, dims.y, dims.z, 0, 0, const_cast<GLubyte*>(texture->getPixelData()), &err);
}
#endif // defined(CL_VERSION_1_2)
LCL_ERROR(err);
}
Image::Image(const Context* context, cl_mem_flags flags, const tgt::svec3& dimensions, cl_channel_order channelOrder, cl_channel_type channelType, void* hostPtr)
: MemoryObject(context)
{
tgtAssert(hostPtr != 0, "Texture must not be 0");
cl_int err;
cl_image_format imageFormat;
imageFormat.image_channel_data_type = channelType;
imageFormat.image_channel_order = channelOrder;
#if defined(CL_VERSION_1_2)
cl_image_desc desc;
if (dimensions.y == 1 && dimensions.z == 1)
desc.image_type = CL_MEM_OBJECT_IMAGE1D;
else if (dimensions.z == 1)
desc.image_type = CL_MEM_OBJECT_IMAGE2D;
else
desc.image_type = CL_MEM_OBJECT_IMAGE3D;
desc.image_width = dimensions.x;
desc.image_height = dimensions.y;
desc.image_depth = dimensions.z
desc.image_row_pitch = 0;
desc.image_slice_pitch = 0;
desc.num_mip_levels = 0;
desc.num_samples = 0;
desc.buffer = 0;
object_ = clCreateImage(context->getId(), flags, &imageFormat, &desc, texture->getPixelData(), &err);
#else // defined(CL_VERSION_1_2)
if (dimensions.z == 1) {
// 1D or 2D image
_id = clCreateImage2D(context->getId(), flags, &imageFormat, dimensions.x, dimensions.y, 0, hostPtr, &err);
}
else {
// 3D image
_id = clCreateImage3D(context->getId(), flags, &imageFormat, dimensions.x, dimensions.y, dimensions.z, 0, 0, hostPtr, &err);
}
#endif // defined(CL_VERSION_1_2)