FreeWRL / FreeX3D 4.3.0
OpenCL_Utils.c
1/*
2
3
4Render the children of nodes.
5
6*/
7
8/****************************************************************************
9 This file is part of the FreeWRL/FreeX3D Distribution.
10
11 Copyright 2009 CRC Canada. (http://www.crc.gc.ca)
12
13 FreeWRL/FreeX3D is free software: you can redistribute it and/or modify
14 it under the terms of the GNU Lesser Public License as published by
15 the Free Software Foundation, either version 3 of the License, or
16 (at your option) any later version.
17
18 FreeWRL/FreeX3D is distributed in the hope that it will be useful,
19 but WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 GNU General Public License for more details.
22
23 You should have received a copy of the GNU General Public License
24 along with FreeWRL/FreeX3D. If not, see <http://www.gnu.org/licenses/>.
25****************************************************************************/
26
27
28#include <config.h>
29#include <system.h>
30#include <display.h>
31#include <internal.h>
32
33#include <libFreeWRL.h>
34
35#include "../scenegraph/Viewer.h"
36#include "../scenegraph/RenderFuncs.h"
37#include "../vrml_parser/Structs.h"
38
39#include "../main/headers.h"
40#include "../vrml_parser/CRoutes.h"
41#include "../scenegraph/Collision.h"
42
43#include "OpenCL_Utils.h"
44
45#ifdef HAVE_OPENCL
46
47
48static bool initialize_OpenCL();
49static void createGPUInterpolators();
50static const char* coordinateInterpolator_kernel;
51static const char* interpolator_headers;
52
53
54static void *OpenCL_Utils_constructor(){
55 void *v = malloc(sizeof(struct pOpenCL_Utils));
56 memset(v,0,sizeof(struct pOpenCL_Utils));
57 return v;
58}
59
60
61void OpenCL_Utils_init(struct tOpenCL_Utils *t)
62{
63 //printf ("start calling OpenCL_Utils_init - t is %p\n",t);
64 //private
65 t->prv = OpenCL_Utils_constructor();
66 {
67 //ppOpenCL_Utils p = (ppOpenCL_Utils)t->prv;
68 }
69
70 t->OpenCL_Initialized = FALSE;
71 t->OpenCL_OK = FALSE;
72 //printf ("done calling OpenCL_Utils_init\n");
73
74}
75
76void fwl_OpenCL_startup(struct tOpenCL_Utils *t) {
77 //printf ("called fwl_OpenCL_startup...\n");
78if (t->OpenCL_Initialized) printf (".... fwl_opencl already done?\n"); else printf ("..... OpenCL init currently false\n");
79
80 initialize_OpenCL();
81
82 //printf ("past the initialize_OpenCL call\n");
83 createGPUCollisionProgram();
84
85 createGPUInterpolators();
86 //printf ("finished called fwl_OpenCL_startup...\n");
87 t->OpenCL_Initialized = TRUE;
88
89}
90
91
92static char *getCLErrorString(cl_int err) {
93 switch (err) {
94#ifdef CL_VERSION_1_2
95 case CL_COMPILE_PROGRAM_FAILURE: return "CL_COMPILE_PROGRAM_FAILURE"; break;
96 case CL_LINKER_NOT_AVAILABLE: return "CL_LINKER_NOT_AVAILABLE"; break;
97 case CL_LINK_PROGRAM_FAILURE: return "CL_LINK_PROGRAM_FAILURE"; break;
98 case CL_DEVICE_PARTITION_FAILED: return "CL_DEVICE_PARTITION_FAILED"; break;
99 case CL_KERNEL_ARG_INFO_NOT_AVAILABLE: return "CL_KERNEL_ARG_INFO_NOT_AVAILABLE"; break;
100 case CL_INVALID_IMAGE_DESCRIPTOR: return "CL_INVALID_IMAGE_DESCRIPTOR"; break;
101 case CL_INVALID_COMPILER_OPTIONS: return "CL_INVALID_COMPILER_OPTIONS"; break;
102 case CL_INVALID_LINKER_OPTIONS: return "CL_INVALID_LINKER_OPTIONS"; break;
103 case CL_INVALID_DEVICE_PARTITION_COUNT: return "CL_INVALID_DEVICE_PARTITION_COUNT"; break;
104
105#endif //CL_VERSION_1_2
106
107 case CL_DEVICE_NOT_FOUND: return "CL_DEVICE_NOT_FOUND"; break;
108 case CL_DEVICE_NOT_AVAILABLE: return "CL_DEVICE_NOT_AVAILABLE"; break;
109 case CL_COMPILER_NOT_AVAILABLE: return "CL_COMPILER_NOT_AVAILABLE"; break;
110 case CL_MEM_OBJECT_ALLOCATION_FAILURE: return "CL_MEM_OBJECT_ALLOCATION_FAILURE"; break;
111 case CL_OUT_OF_RESOURCES: return "CL_OUT_OF_RESOURCES"; break;
112 case CL_OUT_OF_HOST_MEMORY: return "CL_OUT_OF_HOST_MEMORY"; break;
113 case CL_PROFILING_INFO_NOT_AVAILABLE: return "CL_PROFILING_INFO_NOT_AVAILABLE"; break;
114 case CL_MEM_COPY_OVERLAP: return "CL_MEM_COPY_OVERLAP"; break;
115 case CL_IMAGE_FORMAT_MISMATCH: return "CL_IMAGE_FORMAT_MISMATCH"; break;
116 case CL_IMAGE_FORMAT_NOT_SUPPORTED: return "CL_IMAGE_FORMAT_NOT_SUPPORTED"; break;
117 case CL_BUILD_PROGRAM_FAILURE: return "CL_BUILD_PROGRAM_FAILURE"; break;
118 case CL_MAP_FAILURE: return "CL_MAP_FAILURE"; break;
119 case CL_MISALIGNED_SUB_BUFFER_OFFSET: return "CL_MISALIGNED_SUB_BUFFER_OFFSET"; break;
120 case CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST: return "CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST"; break;
121 case CL_INVALID_VALUE: return "CL_INVALID_VALUE"; break;
122 case CL_INVALID_DEVICE_TYPE: return "CL_INVALID_DEVICE_TYPE"; break;
123 case CL_INVALID_PLATFORM: return "CL_INVALID_PLATFORM"; break;
124 case CL_INVALID_DEVICE: return "CL_INVALID_DEVICE"; break;
125 case CL_INVALID_CONTEXT: return "CL_INVALID_CONTEXT"; break;
126 case CL_INVALID_QUEUE_PROPERTIES: return "CL_INVALID_QUEUE_PROPERTIES"; break;
127 case CL_INVALID_COMMAND_QUEUE: return "CL_INVALID_COMMAND_QUEUE"; break;
128 case CL_INVALID_HOST_PTR: return "CL_INVALID_HOST_PTR"; break;
129 case CL_INVALID_MEM_OBJECT: return "CL_INVALID_MEM_OBJECT"; break;
130 case CL_INVALID_IMAGE_FORMAT_DESCRIPTOR: return "CL_INVALID_IMAGE_FORMAT_DESCRIPTOR"; break;
131 case CL_INVALID_IMAGE_SIZE: return "CL_INVALID_IMAGE_SIZE"; break;
132 case CL_INVALID_SAMPLER: return "CL_INVALID_SAMPLER"; break;
133 case CL_INVALID_BINARY: return "CL_INVALID_BINARY"; break;
134 case CL_INVALID_BUILD_OPTIONS: return "CL_INVALID_BUILD_OPTIONS"; break;
135 case CL_INVALID_PROGRAM: return "CL_INVALID_PROGRAM"; break;
136 case CL_INVALID_PROGRAM_EXECUTABLE: return "CL_INVALID_PROGRAM_EXECUTABLE"; break;
137 case CL_INVALID_KERNEL_NAME: return "CL_INVALID_KERNEL_NAME"; break;
138 case CL_INVALID_KERNEL_DEFINITION: return "CL_INVALID_KERNEL_DEFINITION"; break;
139 case CL_INVALID_KERNEL: return "CL_INVALID_KERNEL"; break;
140 case CL_INVALID_ARG_INDEX: return "CL_INVALID_ARG_INDEX"; break;
141 case CL_INVALID_ARG_VALUE: return "CL_INVALID_ARG_VALUE"; break;
142 case CL_INVALID_ARG_SIZE: return "CL_INVALID_ARG_SIZE"; break;
143 case CL_INVALID_KERNEL_ARGS: return "CL_INVALID_KERNEL_ARGS"; break;
144 case CL_INVALID_WORK_DIMENSION: return "CL_INVALID_WORK_DIMENSION"; break;
145 case CL_INVALID_WORK_GROUP_SIZE: return "CL_INVALID_WORK_GROUP_SIZE"; break;
146 case CL_INVALID_WORK_ITEM_SIZE: return "CL_INVALID_WORK_ITEM_SIZE"; break;
147 case CL_INVALID_GLOBAL_OFFSET: return "CL_INVALID_GLOBAL_OFFSET"; break;
148 case CL_INVALID_EVENT_WAIT_LIST: return "CL_INVALID_EVENT_WAIT_LIST"; break;
149 case CL_INVALID_EVENT: return "CL_INVALID_EVENT"; break;
150 case CL_INVALID_OPERATION: return "CL_INVALID_OPERATION"; break;
151 case CL_INVALID_GL_OBJECT: return "CL_INVALID_GL_OBJECT"; break;
152 case CL_INVALID_BUFFER_SIZE: return "CL_INVALID_BUFFER_SIZE"; break;
153 case CL_INVALID_MIP_LEVEL: return "CL_INVALID_MIP_LEVEL"; break;
154 case CL_INVALID_GLOBAL_WORK_SIZE: return "CL_INVALID_GLOBAL_WORK_SIZE"; break;
155 case CL_INVALID_PROPERTY: return "CL_INVALID_PROPERTY"; break;
156 case CL_INVALID_GL_SHAREGROUP_REFERENCE_KHR: return "CL_INVALID_GL_SHAREGROUP_REFERENCE_KHR"; break;
157 default :{return "hmmm - error message makes no sense";}
158 }
159
160}
161void printCLError(const char *where, cl_int err) {
162 printf ("OpenCL fn %s, error %s (%d)\n",where,getCLErrorString(err),err);
163}
164
165/********************************************************************************/
166/* */
167/* */
168/********************************************************************************/
169
170static bool initialize_OpenCL() {
171
172 ppOpenCL_Utils p;
173 ttglobal tg = gglobal();
174 p = (ppOpenCL_Utils)tg->OpenCL_Utils.prv;
175
176 cl_int err;
177
178 // debugging information
179 cl_int rv;
180 size_t rvlen;
181
182 #ifdef GPU_DEBUG
183 size_t wg_size;
184 cl_ulong longish;
185 size_t xyz;
186 char rvstring[1000];
187 int gpu;
188 #endif // GPU_DEBUG
189
190// get the current context.
191// windows - IntPtr curDC = wglGetCurrentDC();
192// then in the new compute context, we pass in the context
193
194 /* initialized yet? */
195 //if (p->kernel != NULL) return false;
196
197
198 // get the device id
199
200#if defined (_MSC_VER)
201
202 if(1)
203 err = extraInitFromNvidiaSamples(p);
204 else
205 {
206 cl_int ciErrNum;
207 cl_platform_id cpPlatform = NULL; // OpenCL platform
208 // Get the NVIDIA platform
209 //ciErrNum = oclGetPlatformID(&cpPlatform);
210 {
211 /* from OpenCL Programming Guide, pg 338 */
212 cl_context_properties properties[] = {
213 CL_GL_CONTEXT_KHR, (cl_context_properties)wglGetCurrentContext(),
214 CL_WGL_HDC_KHR, (cl_context_properties)wglGetCurrentDC(),
215 CL_CONTEXT_PLATFORM, (cl_context_properties)cpPlatform,
216 0};
217
218 p->context = clCreateContext(properties, 1, &p->device_id, NULL, NULL, &err);
219 }
220 }
221#endif // _MSC_VER
222
223/* is this Linux? */
224#if defined (__linux__)
225
226 #define MAX_OPENCL_PLATFORMS 10
227 #define MAX_OPENCL_DEVICES 32
228 cl_platform_id platforms[MAX_OPENCL_PLATFORMS];
229 cl_device_id devices[MAX_OPENCL_DEVICES];
230 cl_uint numPlats;
231 cl_uint numDevs;
232
233 // we may have OpenCL, but maybe we dont have cl_khr_gl_sharing, so look for it
234 int selectedPlatform = -1;
235 int selectedDevice = -1;
236
237 // printf ("have linux uere\n");
238 // printf ("OpenCL - before clGetPlatformIDs\n");
239
240 err = clGetPlatformIDs(10,platforms,&numPlats);
241 TEST_ERR("clGetPlatformIDs",err);
242
243 //printf ("looking for up to 10 platforms, got %d\n",numPlats);
244 if (numPlats <1) {
245 printf ("OpenCL init - numPlats is %d, OpenCL device not found\n",numPlats);
246 return FALSE;
247 }
248
249 // bounds check
250 if (numPlats >= MAX_OPENCL_PLATFORMS) {
251 printf ("OpenCL init - numPlats is %d, setting to %d\n",numPlats,MAX_OPENCL_PLATFORMS);
252 numPlats = MAX_OPENCL_PLATFORMS;
253 }
254
255 /* not sure what platform to choose, if more than 1...
256 {
257 int i;
258
259 //printf ("printing out the platform names:\n");
260 for (i=0; i<numPlats; i++) {
261 char platname[500];
262 cl_int err = clGetPlatformInfo(platforms[i],CL_PLATFORM_NAME,sizeof(platname),platname,NULL);
263 TEST_ERR("clGetPlatformInfo",err);
264 printf ("GetPlatfromInfo for %d is :%s:\n",i,platname);
265 }
266
267 }
268 */
269
270 //printf ("now, trying to get the device IDS\n");
271
272
273
274
275 {
276 int i,j;
277 printf ("printing out the device names for each platform found\n");
278 for (i=0; i<numPlats; i++) {
279 err = clGetDeviceIDs(platforms[i], CL_DEVICE_TYPE_GPU, MAX_OPENCL_DEVICES, devices, &numDevs);
280 printf ("done the clGetDeviceIDS call for platform %d - devices %d\n",i,numDevs);
281
282/* XXX */
283 for (j=0; j<numDevs; j++) {
284 char crv[1000];
285 size_t crvs;
286
287 /*
288 err = clGetDeviceInfo(devices[j],CL_DEVICE_NAME,1000,crv,&crvs);
289 printf ("NAME for %d is %s\n",j,crv);
290 err = clGetDeviceInfo(devices[j],CL_DEVICE_VENDOR,1000,crv,&crvs);
291 printf ("VENDOR for %d is %s\n",j,crv);
292 err = clGetDeviceInfo(devices[j],CL_DEVICE_PROFILE,1000,crv,&crvs);
293 printf ("PROFILE for %d is %s\n",j,crv);
294 */
295
296 err = clGetDeviceInfo(devices[j],CL_DEVICE_EXTENSIONS,1000,crv,&crvs);
297 if (err != CL_SUCCESS) {
298 printCLError("clGetDeviceIDs",err);
299 return FALSE;
300 }
301 // printf ("EXTENSIONS for %d is %s\n",j,crv);
302
303 if (strstr(crv,"cl_khr_gl_sharing") != NULL) {
304 printf ("**** Found cl_khr_gl_sharing ****\n");
305 selectedPlatform = i;
306 selectedDevice = j;
307 p->CL_device_id = devices[j];
308 }
309 }
310 }
311 }
312
313 //printf ("Linux, have device id...\n");
314 if ((selectedPlatform <0) || (selectedDevice<0)) {
315 printCLError("No good OpenCL device or platform found, error ",err);
316 return FALSE;
317 }
318
319 // redo the calls, now that we have (the best?) match
320 if ((selectedPlatform != 0) && (selectedDevice != 0)) {
321 //printf ("regetting platform %d and device %d\n",selectedPlatform, selectedDevice);
322 err = clGetDeviceIDs(platforms[selectedPlatform], CL_DEVICE_TYPE_GPU, MAX_OPENCL_DEVICES, devices, &numDevs);
323 }
324
325 // now save the device id
326 p->CL_device_id = devices[selectedDevice];
327
328
329 // printf ("\n.....now doing the context sharing getting.....\n\n");
330
331 cl_context_properties properties[] = {
332 CL_GL_CONTEXT_KHR, (cl_context_properties)glXGetCurrentContext(),
333 CL_GLX_DISPLAY_KHR, (cl_context_properties)glXGetCurrentDisplay(),
334 CL_CONTEXT_PLATFORM, (cl_context_properties)platforms[selectedPlatform],
335 0 };
336
337// function pointer typedefs must use the
338// following naming convention
339typedef CL_API_ENTRY cl_int
340 (CL_API_CALL *clGetGLContextInfoKHR_fn)(
341 const cl_context_properties * /* properties */,
342 cl_gl_context_info /* param_name */,
343 size_t /* param_value_size */,
344 void * /* param_value */,
345 size_t * /*param_value_size_ret*/);
346
347clGetGLContextInfoKHR_fn clGetGLContextInfoKHR = NULL;
348
349#ifdef CL_VERSION_1_2
350clGetGLContextInfoKHR = (clGetGLContextInfoKHR_fn)clGetExtensionFunctionAddressForPlatform(platforms[selectedPlatform],"clGetGLContextInfoKHR");
351#else
352#ifdef CL_VERSION_1_1
353clGetGLContextInfoKHR = (clGetGLContextInfoKHR_fn)clGetExtensionFunctionAddress("clGetGLContextInfoKHR");
354#endif
355#endif
356 // find CL capable devices in the current GL context
357 size_t size;
358 err = clGetGLContextInfoKHR(properties, CL_DEVICES_FOR_GL_CONTEXT_KHR,
359 MAX_OPENCL_DEVICES*sizeof(cl_device_id), devices, &size);
360
361 TEST_ERR("clGetGLContextInfoKHR",err);
362
363 printf ("clGetGLContextInfoKHR returns size of %d\n",size);
364
365 //printf ("going to clCreateContextFromType:\n");
366 //printf ("just so we know, p is %p\n",p);
367 //printf ("just so we know, p->CL_context is %p\n",p->CL_context);
368
369 p->CL_context=clCreateContextFromType(properties, CL_DEVICE_TYPE_GPU, NULL, NULL, &err);
370
371 //printf ("done the createContextFromType, it is %p\n",p->CL_context);
372
373 TEST_ERR("clCreateContextFromType",err);
374
375#endif // linux
376
377/* how about Android (and maybe IPHONE) using OpenCL-ES 2.0? */
378#ifdef GL_ES_VERSION_2_0
379
380 cl_platform_id platforms[10];
381 cl_uint numPlats;
382
383 err = getFunctionHandles();
384
385
386
387 if (err != CL_SUCCESS) {
388 printCLError("clCreateContext",err);
389 return FALSE;
390 }
391
392
393 err = clGetPlatformIDs(10,platforms,&numPlats);
394 TEST_ERR("clGetPlatformIDs",err);
395 printf ("looking for up to 10 platforms, got %d",numPlats);
396
397 cl_platform_id platform;
398 err = clGetPlatformIDs(1,&platform,NULL);
399 TEST_ERR("clGetPlatformIDs",err);
400
401 err = clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, 1, &p->device_id, NULL);
402
403 if (err != CL_SUCCESS) {
404 printCLError("clGetDeviceIDs",err);
405 return FALSE;
406 }
407
408/*
409 cl_context_properties properties[] = {
410 CL_GL_CONTEXT_KHR, (cl_context_properties)glXGetCurrentContext(),
411 CL_GLX_DISPLAY_KHR, (cl_context_properties)glXGetCurrentDisplay(),
412 CL_CONTEXT_PLATFORM, (cl_context_properties)platform,
413 0 };
414 p->context=clCreateContextFromType(properties, CL_DEVICE_TYPE_GPU, NULL, NULL, &err);
415 TEST_ERR("clCreateContextFromType",err);
416*/
417
418 p->CL_context=clCreateContextFromType(NULL, CL_DEVICE_TYPE_ANY, NULL, NULL, &err);
419 TEST_ERR("clCreateContextFromType",err);
420
421 printf ("remember - building currently without the CL_KHR_gl_sharing enabled - the clCreateFromGLBuffer will error out, so return code removed.");
422
423#endif //GL_ES_VERSION_2_0
424
425
426 // create a command queue
427
428 p->CL_queue = clCreateCommandQueue(p->CL_context, p->CL_device_id, 0, &err);
429 //printf ("CL_queue is %p for context %p, device %d\n",p->CL_queue, p->CL_context, p->CL_device_id);
430
431
432 if (!p->CL_queue || (err != CL_SUCCESS)) {
433 printCLError("clCreateCommandQueue",err);
434 return FALSE;
435 }
436
437 rv = clGetDeviceInfo (p->CL_device_id, CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof(size_t), &(p->CL_default_workgroup_size), &rvlen);
438 if ((rv != CL_SUCCESS) || (err != CL_SUCCESS)) {
439 printCLError("clGetDeviceInfo",err);
440 return FALSE;
441 }
442
443
444 #ifdef GPU_DEBUG
445 // Find the work group size
446 rv = clGetDeviceInfo (p->CL_device_id, CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof(size_t), &wg_size, &rvlen);
447 TEST_ERR("clGetDeviceInfo",rv);
448 printf ("CL_DEVICE_MAX_WORK_GROUP_SIZE %d\n",wg_size);
449
450 // debugging information
451 rv = clGetPlatformInfo(platforms[0],CL_PLATFORM_PROFILE,1000,rvstring,&rvlen);
452 printf ("CL_PLATFORM_PROFILE :%s:\n",rvstring);
453 rv = clGetPlatformInfo(platforms[0],CL_PLATFORM_VERSION,1000,rvstring,&rvlen);
454 printf ("CL_PLATFORM_VERSION :%s:\n",rvstring);
455 rv = clGetPlatformInfo(platforms[0],CL_PLATFORM_NAME,1000,rvstring,&rvlen);
456 printf ("CL_PLATFORM_NAME :%s:\n",rvstring);
457 rv = clGetPlatformInfo(platforms[0],CL_PLATFORM_VENDOR,1000,rvstring,&rvlen);
458 printf ("CL_PLATFORM_VENDOR :%s:\n",rvstring);
459 rv = clGetPlatformInfo(platforms[0],CL_PLATFORM_EXTENSIONS,1000,rvstring,&rvlen);
460 printf ("CL_PLATFORM_EXTENSIONS :%s:\n",rvstring);
461
462 rv = clGetDeviceInfo (p->CL_device_id, CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(size_t), &xyz, &rvlen);
463 printf ("CL_DEVICE_MAX_COMPUTE_UNITS %d\n",xyz);
464 rv = clGetDeviceInfo (p->CL_device_id, CL_DEVICE_GLOBAL_MEM_CACHE_SIZE, sizeof(cl_ulong), &longish, &rvlen);
465 printf ("CL_DEVICE_GLOBAL_MEM_CACHE_SIZE %lu\n",longish);
466 rv = clGetDeviceInfo (p->CL_device_id, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof(cl_ulong), &longish, &rvlen);
467 printf ("CL_DEVICE_GLOBAL_MEM_SIZE %lu\n",longish);
468 rv = clGetDeviceInfo (p->CL_device_id, CL_DEVICE_LOCAL_MEM_SIZE, sizeof(cl_ulong), &longish, &rvlen);
469 printf ("CL_DEVICE_LOCAL_MEM_SIZE %lu\n",longish);
470 rv= clGetDeviceInfo (p->CL_device_id,CL_DEVICE_EXTENSIONS, 1000, rvstring,&rvlen);
471 printf ("CL_DEVICE_EXTENSIONS :%s:\n",rvstring);
472 rv= clGetDeviceInfo (p->CL_device_id,CL_DEVICE_PROFILE, 1000, rvstring,&rvlen);
473 printf ("CL_DEVICE_PROFILE :%s:\n",rvstring);
474 rv= clGetDeviceInfo (p->CL_device_id,CL_DEVICE_NAME, 1000, rvstring,&rvlen);
475 printf ("CL_DEVICE_NAME :%s:\n",rvstring);
476 rv= clGetDeviceInfo (p->CL_device_id,CL_DEVICE_VENDOR, 1000, rvstring,&rvlen);
477 printf ("CL_DEVICE_VENDOR :%s:\n",rvstring);
478 rv= clGetDeviceInfo (p->CL_device_id,CL_DEVICE_VERSION, 1000, rvstring,&rvlen);
479 printf ("CL_DEVICE_VERSION :%s:\n",rvstring);
480
481
482 #endif //GPU_DEBUG
483#undef GPU_DEBUG
484
485 // do this when we need collision - collision_initGPUCollide(p);
486
487 return TRUE;
488}
489
490
491/********************************************************************************/
492/* */
493/* Android, (code might work on IPHONE) OpenGL ES 2.0, CL integration */
494/* */
495/********************************************************************************/
496
497#ifdef GL_ES_VERSION_2_0
498
499#include <dlfcn.h> // possibly Android only
500
501#define clGetPlatformIDs(aa,bb,cc) rclGetPlatformIDs(aa,bb,cc)
502#define clGetPlatformInfo(aa,bb,cc,dd,ee) rclGetPlatformInfo(aa,bb,cc,dd,ee)
503#define clGetDeviceIDs(aa,bb,cc,dd,ee) rclGetDeviceIDs(aa,bb,cc,dd,ee)
504#define clGetDeviceInfo(aa,bb,cc,dd,ee) rclGetDeviceInfo(aa,bb,cc,dd,ee)
505#define clCreateKernel(aa,bb,cc) rclCreateKernel(aa,bb,cc)
506#define clBuildProgram(aa,bb,cc,dd,ee,ff) rclBuildProgram(aa,bb,cc,dd,ee,ff)
507#define clCreateBuffer(aa,bb,cc,dd,ee) rclCreateBuffer(aa,bb,cc,dd,ee)
508#define clCreateCommandQueue(aa,bb,cc,dd) rclCreateCommandQueue(aa,bb,cc,dd)
509#define clCreateContextFromType(aa,bb,cc,dd,ee) rclCreateContextFromType(aa,bb,cc,dd,ee)
510#define clCreateFromGLBuffer(aa,bb,cc,dd) rclCreateFromGLBuffer(aa,bb,cc,dd)
511#define clCreateProgramWithSource(aa,bb,cc,dd,ee) rclCreateProgramWithSource(aa,bb,cc,dd,ee)
512#define clEnqueueNDRangeKernel(aa,bb,cc,dd,ee,ff,gg,hh,ii) rclEnqueueNDRangeKernel(aa,bb,cc,dd,ee,ff,gg,hh,ii)
513#define clEnqueueReadBuffer(aa,bb,cc,dd,ee,ff,gg,hh,ii) rclEnqueueReadBuffer(aa,bb,cc,dd,ee,ff,gg,hh,ii)
514#define clEnqueueWriteBuffer(aa,bb,cc,dd,ee,ff,gg,hh,ii) rclEnqueueWriteBuffer(aa,bb,cc,dd,ee,ff,gg,hh,ii)
515#define clGetKernelWorkGroupInfo(aa,bb,cc,dd,ee,ff) rclGetKernelWorkGroupInfo(aa,bb,cc,dd,ee,ff)
516#define clGetProgramBuildInfo(aa,bb,cc,dd,ee,ff) rclGetProgramBuildInfo(aa,bb,cc,dd,ee,ff)
517#define clReleaseMemObject(aa) rclReleaseMemObject(aa)
518#define clSetKernelArg(aa,bb,cc,dd) rclSetKernelArg(aa,bb,cc,dd)
519
520static void *getCLHandle(){
521 void *res = NULL;
522 int which=0;
523
524 res = dlopen("/system/lib/libOpenCL.so",RTLD_LAZY);
525 if(res==NULL){
526 res = dlopen("/system/vendor/lib/egl/libGLES_mali.so",RTLD_LAZY);
527 which = 1;
528 }
529 if(res==NULL){
530 res = dlopen("/system/lib/libllvm-a3xx.so",RTLD_LAZY);
531 which = 2;
532 }
533 if(res==NULL) {
534 ConsoleMessage("Could not open library :(\n");
535 return NULL;
536 }
537
538 if (which==0) {
539 ConsoleMessage ("OpenCL lib - libOpenCL.so");
540 } else if (which == 1) {
541 ConsoleMessage ("OpenCL lib libGLES_mali.so");
542 } else if (which == 2) {
543 ConsoleMessage ("OpenCL Lib - liblvm-a3xx.so");
544 }
545 return res;
546}
547
548cl_int (*rclGetPlatformIDs)(cl_uint /* num_entries */,
549 cl_platform_id * /* platforms */,
550 cl_uint * /* num_platforms */);
551
552
553cl_int (*rclGetPlatformInfo)(cl_platform_id /* platform */,
554 cl_platform_info /* param_name */,
555 size_t /* param_value_size */,
556 void * /* param_value */,
557 size_t * /* param_value_size_ret */);
558
559cl_int (*rclGetDeviceIDs)(cl_platform_id /* platform */,
560 cl_device_type /* device_type */,
561 cl_uint /* num_entries */,
562 cl_device_id * /* devices */,
563 cl_uint * /* num_devices */);
564
565
566cl_int (*rclGetDeviceInfo)(cl_device_id /* device */,
567 cl_device_info /* param_name */,
568 size_t /* param_value_size */,
569 void * /* param_value */,
570 size_t * /* param_value_size_ret */);
571
572cl_kernel (*rclCreateKernel)(cl_program /*program */,
573 const char * /* kernel name */,
574 cl_int * /* errorcode_ret */);
575
576cl_int (*rclBuildProgram)(cl_program /* program */,
577 cl_uint /* num_devices */,
578 const cl_device_id * /* device_list */,
579 const char * /* options */,
580 void (CL_CALLBACK * /* pfn_notify */)(cl_program /* program */, void * /* user_data */),
581 void * /* user_data */);
582
583cl_mem (*rclCreateBuffer)(cl_context /* context */,
584 cl_mem_flags /* flags */,
585 size_t /* size */,
586 void * /* host_ptr */,
587 cl_int * /* errcode_ret */);
588
589cl_command_queue (*rclCreateCommandQueue)(cl_context /* context */,
590 cl_device_id /* device */,
591 cl_command_queue_properties /* properties */,
592 cl_int * /* errcode_ret */);
593
594
595cl_context (*rclCreateContextFromType)(const cl_context_properties * /* properties */,
596 cl_device_type /* device_type */,
597 void (CL_CALLBACK * /* pfn_notify*/ )(const char *, const void *, size_t, void *),
598 void * /* user_data */,
599 cl_int * /* errcode_ret */);
600
601cl_program (*rclCreateProgramWithSource)(cl_context /* context */,
602 cl_uint /* count */,
603 const char ** /* strings */,
604 const size_t * /* lengths */,
605 cl_int * /* errcode_ret */);
606
607
608cl_int (*rclEnqueueNDRangeKernel)(cl_command_queue /* command_queue */,
609 cl_kernel /* kernel */,
610 cl_uint /* work_dim */,
611 const size_t * /* global_work_offset */,
612 const size_t * /* global_work_size */,
613 const size_t * /* local_work_size */,
614 cl_uint /* num_events_in_wait_list */,
615 const cl_event * /* event_wait_list */,
616 cl_event * /* event */);
617
618cl_int (*rclEnqueueReadBuffer)(cl_command_queue /* command_queue */,
619 cl_mem /* buffer */,
620 cl_bool /* blocking_read */,
621 size_t /* offset */,
622 size_t /* size */,
623 void * /* ptr */,
624 cl_uint /* num_events_in_wait_list */,
625 const cl_event * /* event_wait_list */,
626 cl_event * /* event */);
627
628
629cl_int (*rclEnqueueWriteBuffer)(cl_command_queue /* command_queue */,
630 cl_mem /* buffer */,
631 cl_bool /* blocking_write */,
632 size_t /* offset */,
633 size_t /* size */,
634 const void * /* ptr */,
635 cl_uint /* num_events_in_wait_list */,
636 const cl_event * /* event_wait_list */,
637 cl_event * /* event */);
638
639
640cl_int (*rclGetKernelWorkGroupInfo)(cl_kernel /* kernel */,
641 cl_device_id /* device */,
642 cl_kernel_work_group_info /* param_name */,
643 size_t /* param_value_size */,
644 void * /* param_value */,
645 size_t * /* param_value_size_ret */);
646
647
648cl_int (*rclReleaseMemObject)(cl_mem /* memobj */);
649
650cl_int (*rclSetKernelArg)(cl_kernel /* kernel */,
651 cl_uint /* arg_index */,
652 size_t /* arg_size */,
653 const void * /* arg_value */);
654
655cl_mem (*rclCreateFromGLBuffer)(cl_context, cl_mem_flags, GLuint, cl_int *);
656
657cl_int (*rclGetProgramBuildInfo)(cl_program, cl_device_id, cl_program_build_info, size_t, void *, size_t *);
658
659static int getFunctionHandles(){
660 static void* getCLHandle();
661
662 void *handle = getCLHandle();
663 if(handle==NULL) return CL_DEVICE_NOT_AVAILABLE;
664 rclGetPlatformIDs = (cl_int (*)(cl_uint,cl_platform_id *,cl_uint*))dlsym(handle,"clGetPlatformIDs");
665 rclGetPlatformInfo = (cl_int (*)(cl_platform_id, cl_platform_info, size_t, void *, size_t*))dlsym(handle,"clGetPlatformInfo");
666 rclGetDeviceIDs = (cl_int (*)(cl_platform_id, cl_device_type, cl_uint, cl_device_id *, cl_uint*))dlsym(handle,"clGetDeviceIDs");
667 rclGetDeviceInfo = (cl_int (*)(cl_device_id, cl_device_info, size_t, void *, size_t*))dlsym(handle,"clGetDeviceInfo");
668 rclBuildProgram = (cl_int (*)(cl_program,cl_uint,const cl_device_id *, const char *, void (CL_CALLBACK*)(cl_program,void*), void *))dlsym(handle,"clBuildProgram");
669 rclCreateBuffer = (cl_mem (*)(cl_context, cl_mem_flags, size_t, void *, cl_int *))dlsym(handle,"clCreateBuffer");
670 rclCreateKernel = (cl_kernel (*)(cl_program,const char*,cl_int*))dlsym(handle,"clCreateKernel");
671 rclCreateCommandQueue = (cl_command_queue (*) (cl_context,cl_device_id,cl_command_queue_properties,cl_int*))dlsym(handle,"clCreateCommandQueue");
672 rclCreateContextFromType = (cl_context (*)(const cl_context_properties*,cl_device_type,void(CL_CALLBACK*)(const char*,const void*,size_t,void*),void*,cl_int*))dlsym(handle,"clCreateContextFromType");
673 rclCreateProgramWithSource = (cl_program (*) (cl_context,cl_uint,const char**,const size_t*,cl_int*))dlsym(handle,"clCreateProgramWithSource");
674 rclEnqueueNDRangeKernel=(cl_int(*)(cl_command_queue,cl_kernel,cl_uint,const size_t*,const size_t*,const size_t*,cl_uint,const cl_event*,cl_event*))dlsym(handle,"clEnqueueNDRangeKernel");
675 rclEnqueueReadBuffer = (cl_int (*)(cl_command_queue,cl_mem,cl_bool,size_t,size_t,void*,cl_uint,const cl_event*,cl_event*))dlsym(handle,"clEnqueueReadBuffer");
676 rclEnqueueWriteBuffer = (cl_int (*)(cl_command_queue,cl_mem,cl_bool,size_t,size_t,const void*,cl_uint,const cl_event*,cl_event*))dlsym(handle,"clEnqueueWriteBuffer");
677 rclGetKernelWorkGroupInfo = (cl_int (*)(cl_kernel,cl_device_id,cl_kernel_work_group_info,size_t, void*, size_t *))dlsym(handle,"clGetKernelWorkGroupInfo");
678 rclReleaseMemObject = (cl_int (*)(cl_mem))dlsym(handle,"clReleaseMemObject");
679 rclSetKernelArg = (cl_int (*)(cl_kernel,cl_uint,size_t,const void *))dlsym(handle,"clSetKernelArg");
680 rclGetProgramBuildInfo = (cl_int (*)(cl_program, cl_device_id, cl_program_build_info, size_t, void *, size_t *))dlsym(handle,"clGetProgramBuildInfo");
681 rclCreateFromGLBuffer = (cl_mem (*)(cl_context, cl_mem_flags, GLuint, cl_int *))dlsym(handle,"clCreateFromGLBuffer");
682
683
684
685 if (!(rclGetPlatformIDs) || !(rclGetPlatformInfo) || !(rclGetDeviceIDs) || !(rclGetDeviceInfo) ||
686 !(rclBuildProgram) || !(rclCreateBuffer) || !(rclCreateKernel) || !(rclCreateCommandQueue) ||
687 !(rclCreateContextFromType) || !(rclCreateProgramWithSource) || !(rclEnqueueNDRangeKernel) || !(rclEnqueueReadBuffer) ||
688 !(rclEnqueueWriteBuffer) || !(rclGetKernelWorkGroupInfo) || !(rclReleaseMemObject) || !(rclSetKernelArg) ||
689 !(rclGetProgramBuildInfo) || !(rclCreateFromGLBuffer)) {
690 ConsoleMessage ("did not find one of the functions in this OpenCL Library");
691 if (!rclGetPlatformIDs) ConsoleMessage ("did not find rclGetPlatformIDs");
692 if (!rclGetPlatformInfo) ConsoleMessage ("did not find rclGetPlatformInfo");
693 if (!rclGetDeviceIDs) ConsoleMessage ("did not find rclGetDeviceIDs");
694 if (!rclGetDeviceInfo) ConsoleMessage ("did not find rclGetDeviceInfo");
695 if (!rclBuildProgram) ConsoleMessage ("did not find rclBuildProgram");
696 if (!rclCreateBuffer) ConsoleMessage ("did not find rclCreateBuffer");
697 if (!rclCreateKernel) ConsoleMessage ("did not find rclCreateKernel");
698 if (!rclCreateCommandQueue) ConsoleMessage ("did not find rclCreateCommandQueue");
699 if (!rclCreateContextFromType) ConsoleMessage ("did not find rclCreateContextFromType");
700 if (!rclCreateProgramWithSource) ConsoleMessage ("did not find rclCreateProgramWithSource");
701 if (!rclEnqueueNDRangeKernel) ConsoleMessage ("did not find rclEnqueueNDRangeKernel");
702 if (!rclEnqueueReadBuffer) ConsoleMessage ("did not find rclEnqueueReadBuffer");
703 if (!rclEnqueueWriteBuffer) ConsoleMessage ("did not find rclEnqueueWriteBuffer");
704 if (!rclGetKernelWorkGroupInfo) ConsoleMessage ("did not find rclGetKernelWorkGroupInfo");
705 if (!rclReleaseMemObject) ConsoleMessage ("did not find rclReleaseMemObject");
706 if (!rclSetKernelArg) ConsoleMessage ("did not find rclSetKernelArg");
707 if (!rclGetProgramBuildInfo) ConsoleMessage ("did not find rclGetProgramBuildInfo");
708 if (!rclCreateFromGLBuffer) ConsoleMessage ("did not find rclCreateFromGLBuffer");
709
710 // JAS return !CL_SUCCESS;
711 }
712 return CL_SUCCESS;
713}
714
715
716#endif
717
718/* create the Interpolators for the GPU */
719static void createGPUInterpolators() {
720 ppOpenCL_Utils p;
721 ttglobal tg = gglobal();
722 p = (ppOpenCL_Utils)tg->OpenCL_Utils.prv;
723
724 //bool collision_initGPUCollide (struct sCollisionGPU* initme) {
725 char *kp[2];
726 cl_int err;
727 size_t kernel_wg_size;
728 size_t rvlen;
729
730 //printf ("called initGPUCollide, p is %p\n",p);
731 //printf ("called initGPUCollide, context is %p\n",p->CL_context);
732 //printf ("called initGPUCollide, initme is %p\n",initme);
733
734 kp[0] = (char *)interpolator_headers;
735 kp[1] = (char *)coordinateInterpolator_kernel;
736
737 p->coordinateInterpolatorProgram = clCreateProgramWithSource(p->CL_context, 2, (const char **) kp, NULL, &err);
738 //printf ("past the clCreateProgramWithSource call\n");
739
740 if (!p->coordinateInterpolatorProgram || (err != CL_SUCCESS)) {
741 printCLError("clCreateProgramWithSource",err);
742 return;
743 }
744
745 // build the compute program executable
746 //char *opts = "-Werror -cl-single-precision-constant -cl-nv-verbose -g -cl-opt-disable -cl-strict-aliasing";
747 //char *opts = "-Werror -cl-single-precision-constant -cl-opt-disable -cl-strict-aliasing";
748 //err = clBuildProgram(p->program, 0, NULL, opts, NULL, NULL);
749 //ConsoleMessage ("calling clBuildProgram with program %p\n",p->program);
750
751 // build the program, hard code in devices to 1 device, with the device list, no options
752 char *opts = NULL;
753 err = clBuildProgram(p->coordinateInterpolatorProgram, 1, &(p->CL_device_id), opts, NULL, NULL);
754 //printf ("past the clBuildProgram call\n");
755
756 //ConsoleMessage ("called clBuildProgram error %d\n",err);
757 if (err != CL_SUCCESS) {
758 size_t len;
759 char buffer[16384];
760
761 ConsoleMessage("Error: Failed to build program executable\n");
762 printCLError("clBuildProgram",err);
763 err = clGetProgramBuildInfo(p->coordinateInterpolatorProgram, p->CL_device_id, CL_PROGRAM_BUILD_LOG,
764 sizeof(buffer), buffer, &len);
765 TEST_ERR("clGetProgramBuildInfo",err);
766 ConsoleMessage ("error string len %d\n",(int)len);
767 ConsoleMessage("%s\n", buffer);
768 return;
769 }
770
771 // create the compute kernel
772 p->coordinateInterpolatorKernel = clCreateKernel(p->coordinateInterpolatorProgram, "compute_collide", &err);
773 //printf ("kernel is %p %p\n",p, p->coordinateInterpolatorKernel);
774
775 if (!p->coordinateInterpolatorKernel || (err != CL_SUCCESS)) {
776 printCLError("clCreateKernel",err);
777 return;
778 }
779
780
781 // Kernel Workgroup size
782 // rv = clGetDeviceInfo (p->device_id, CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof(size_t), &wg_size, &rvlen);
783 err = clGetKernelWorkGroupInfo (p->coordinateInterpolatorKernel, p->CL_device_id,
784 CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), &kernel_wg_size, &rvlen);
785
786 if (err!=CL_SUCCESS) {
787 printCLError( "clGetKernelWorkGroupInfo",err);
788 return;
789 }
790
791 // try the smaller of the two
792 if (kernel_wg_size < p->CL_default_workgroup_size) p->coordinateInterpolator_workgroup_size = kernel_wg_size;
793 else p->coordinateInterpolator_workgroup_size = p->CL_default_workgroup_size;
794
795#ifdef GPU_DEBUG
796 ConsoleMessage ("MAX_WORK_GROUP_SIZE %d\n",kernel_wg_size);
797 ConsoleMessage ("We are going to set our workgroup size to %d\n",wg_size);
798
799
800 /*
801 1. Get workGroupSize from clGetDeviceInfo with CL_DEVICE_mum of two values and use that value as your optimal workGroupSize
802 2. Get KernelWorkGroupSize from from clGetKernelWorkGroupInfo with CL_KERNEL_WORK_GPOUP_SIZE
803 3. Get minimum of two values and use that value as your optimal workGroupSize
804 */
805
806#endif // GPU_DEBUG
807}
808
809static int printOnce = FALSE;
810
811/* actually do the interpolator for each and every parent - eg, a Coordinate destination may have multiple
812 parents, of course, (DEF, multi USE) */
813
814#define TEST_GLOBAL
815#ifdef TEST_GLOBAL
816cl_mem myK = NULL;
817cl_mem myKV = NULL;
818cl_mem myVert = NULL;
819#endif
820
821
822static void runItOnce(cl_kernel myKernel, GLuint keyVBO, GLuint keyValueVBO, GLuint destVBO, int keysIn, int keyValuesIn, float frac) {
823 cl_int err;
824 size_t global_work_size;
825 size_t local_work_size;
826
827 //printf ("runItOnce...\n");
828
829#ifdef TESTING
830 /* TESTING */
831 float rvs[2000];
832 int i;
833 cl_mem output_buffer;
834#endif //TESTING
835
836#ifdef TESTING
837 printf ("calling glFinish()\n");
838 glFinish();
839
840 printf ("runItOnce, frac %f keysIn %d keyValuesIn %d\n",frac,keysIn,keyValuesIn);
841
842#endif //TESTING
843
844#ifndef TEST_GLOBAL
845 // set up pointers to buffers
846 cl_mem myK, myKV,myVert;
847#endif
848
849 ppOpenCL_Utils p;
850 ttglobal tg = gglobal();
851 p = (ppOpenCL_Utils)tg->OpenCL_Utils.prv;
852
853#ifdef TEST_GLOBAL
854 if (myK==NULL) myK = clCreateFromGLBuffer(p->CL_context, CL_MEM_READ_ONLY, keyVBO, &err);
855 TEST_ERR("clCreateFromGLBuffer 1",err);
856
857 if (myKV == NULL) myKV = clCreateFromGLBuffer(p->CL_context, CL_MEM_READ_ONLY, keyValueVBO, &err);
858 TEST_ERR("clCreateFromGLBuffer 2",err);
859
860
861 if (myVert==NULL) myVert = clCreateFromGLBuffer(p->CL_context, CL_MEM_WRITE_ONLY, destVBO, &err);
862 TEST_ERR("clCreateFromGLBuffer 3",err);
863 clFinish(p->CL_queue);
864
865#else
866 myK = clCreateFromGLBuffer(p->CL_context, CL_MEM_READ_ONLY, keyVBO, &err);
867 TEST_ERR("clCreateFromGLBuffer 1",err);
868
869 myKV = clCreateFromGLBuffer(p->CL_context, CL_MEM_READ_ONLY, keyValueVBO, &err);
870 TEST_ERR("clCreateFromGLBuffer 2",err);
871
872
873 myVert = clCreateFromGLBuffer(p->CL_context, CL_MEM_WRITE_ONLY, destVBO, &err);
874 TEST_ERR("clCreateFromGLBuffer 3",err);
875 clFinish(p->CL_queue);
876#endif //TEST_GLOBAL
877
878
879
880 /* TESTING */
881 //printf ("acquiring objects\n");
882 err = clEnqueueAcquireGLObjects(p->CL_queue, 1, &myVert, 0, NULL, NULL);
883 TEST_ERR("clEnqueueAcquire",err);
884
885
886
887
888 //send along the values as arguments to the CL kernel
889 err = clSetKernelArg(myKernel, 0, sizeof(cl_mem), &myK);
890 TEST_ERR("clSetKernelArg",err);
891 err = clSetKernelArg(myKernel, 1, sizeof(cl_mem), &myKV);
892 TEST_ERR("clSetKernelArg",err);
893 err = clSetKernelArg(myKernel, 2, sizeof(cl_mem), &myVert);
894 TEST_ERR("clSetKernelArg",err);
895 err =clSetKernelArg(myKernel, 3, sizeof(int), &keysIn);
896 TEST_ERR("clSetKernelArg",err);
897 err =clSetKernelArg(myKernel, 4, sizeof(int), &keyValuesIn);
898 TEST_ERR("clSetKernelArg",err);
899 err =clSetKernelArg(myKernel, 5, sizeof(float), &frac);
900 TEST_ERR("clSetKernelArg",err);
901
902
903 /* testing */
904#ifdef TESTING
905 output_buffer = clCreateBuffer(p->CL_context, CL_MEM_WRITE_ONLY, sizeof(float) * keyValuesIn/keysIn, NULL, NULL);
906 err = clSetKernelArg(myKernel, 6, sizeof(cl_mem), &output_buffer);
907 TEST_ERR("clSetKernelArg",err);
908#endif //TESTING
909
910
911
912
913
914 // global work group size
915#define MYWG (p->CL_default_workgroup_size)
916 // find out how many "blocks" we can have
917 if (MYWG > 0)
918 global_work_size = (size_t) (keysIn) / MYWG;
919 else global_work_size = 0;
920
921 // add 1 to it, because we have to round up
922 global_work_size += 1;
923
924 // now, global_work_size will be an exact multiple of local_work_size
925 global_work_size *= MYWG;
926
927 //ConsoleMessage ("global_work_size is %d %x right now...\n",global_work_size, global_work_size);
928
929 local_work_size = MYWG;
930 //ConsoleMessage ("local_work_size %d\n",local_work_size);
931 //ConsoleMessage ("ntri %d, global_work_size %d, local_work_size %d\n",ntri,global_work_size,local_work_size);
932
933 //printf ("calling kernel local_work_size %d, global_Work_size %d\n",local_work_size, global_work_size);
934 err = clEnqueueNDRangeKernel(p->CL_queue, myKernel, 1, NULL, &global_work_size, &local_work_size, 0, NULL, NULL);
935 TEST_ERR("clEnqueueNDRangeKernel", err);
936
937 //printf ("called kernel\n");
938
939#ifdef TESTING
940 clFinish(p->CL_queue);
941
942 printf ("past clFinish\n");
943#endif //TESTING
944
945
946 /* TESTING */
947 /* glBindBuffer(GL_ARRAY_BUFFER,destVBO);
948 glGetBufferSubData(GL_ARRAY_BUFFER, 0, sizeof (float) * 24, rvs);
949 for (i=0; i<8; i++) {
950 printf ("glBufferData is %d %f\n",i,rvs[i]);
951 }
952 */
953
954#ifndef TEST_GLOBAL
955 err = clEnqueueReleaseGLObjects(p->CL_queue, 1, &myKV, 0, NULL, NULL);
956 TEST_ERR("clEnqueueRelease",err);
957
958 err = clReleaseMemObject(myK) || clReleaseMemObject(myKV) || clReleaseMemObject(myVert);
959 TEST_ERR("clReleaseMemObject",err);
960
961#endif // TEST_GLOBAL
962
963#ifdef TESTING
964
965 err = clEnqueueReadBuffer (p->CL_queue, output_buffer,
966 CL_TRUE, 0, sizeof(float) * 6 /* keyValuesIn/keysIn */,
967 rvs, 0, NULL, NULL);
968
969 if (err != CL_SUCCESS) {
970 printCLError("clEnqueueReadBuffer",err);
971 return;
972 }
973
974
975 for (i=0; i < (keyValuesIn/keysIn); i++) {
976 printf ("rv %d is %f\n", i, rvs[i]);
977 }
978 clReleaseMemObject(output_buffer);
979
980#endif //TESTING
981
982
983
984
985}
986
987/* do an interpolator on the GPU - the destination will be on the GPU, too */
988void runOpenCLInterpolator(struct CRStruct *route, struct X3D_Node * toNode, int toOffset) {
989 GLuint keyVBO = 0;
990 GLuint keyValueVBO = 0;
991 GLuint destVBO = 0;
992 int keysIn = 0;
993 int keyValuesIn = 0;
994 float frac = 0.0;
995
996
997
998 if (!printOnce) {
999 printf ("RUNNING OPENCL INTERPOLATOR PROGRAM %p\n",route->CL_Interpolator);
1000 printf ("it is coming from a %s\n",stringNodeType(route->routeFromNode->_nodeType));
1001 printf ("and, it is going to a %s\n",stringNodeType(toNode->_nodeType));
1002 printf ("with a length of %d\n",route->len);
1003 }
1004
1005 if (route->CL_Interpolator == NULL) {
1006 printf ("runCLInterpolator - interpolator is NULL??\n");
1007 return;
1008 }
1009
1010 if ((toNode == NULL) || (route->routeFromNode == NULL)) {
1011 printf ("runCLInterpolator - error - destination or source NULL\n");
1012 return;
1013 }
1014
1015 // gather info here
1016 switch (route->routeFromNode->_nodeType) {
1017 case NODE_CoordinateInterpolator: {
1018 struct X3D_CoordinateInterpolator *px = (struct X3D_CoordinateInterpolator *) route->routeFromNode;
1019 keyVBO = px->_keyVBO;
1020 keyValueVBO = px->_keyValueVBO;
1021 keysIn = px->key.n;
1022 keyValuesIn = px->keyValue.n;
1023 frac = px->set_fraction;
1024 break;
1025 }
1026 default: ConsoleMessage ("do not route from a node of %s on the GPU - help!\n",
1027 stringNodeType(route->routeFromNode->_nodeType));
1028 }
1029
1030 if ((keyVBO == 0) || (keyValueVBO == 0)) {
1031 printf ("runCLInterpolator - error - source VBOS are %d %d, should not be zero\n",
1032 keyVBO, keyValueVBO);
1033 return;
1034 }
1035
1036 switch (toNode->_nodeType) {
1037 case NODE_Coordinate: {
1038 struct X3D_Coordinate *px = X3D_COORDINATE(toNode);
1039 int i;
1040 //printf ("haveCoordinateHere...\n");
1041
1042 for (i=0; i<vectorSize(px->_parentVector); i++) {
1043 struct X3D_Node * me = vector_get(struct X3D_Node *, px->_parentVector, i);
1044 //printf ("parent %d of %d is %s\n",i,vectorSize(px->_parentVector), stringNodeType(me->_nodeType));
1045 struct X3D_PolyRep pr = *(me->_intern);
1046 //printf ("polyrep buffer is %d\n",pr.VBO_buffers[VERTEX_VBO]);
1047 destVBO = pr.VBO_buffers[VERTEX_VBO];
1048
1049 if (destVBO != 0) {
1050 runItOnce(route->CL_Interpolator, keyVBO, keyValueVBO, destVBO, keysIn, keyValuesIn, frac);
1051 }
1052 }
1053 //destVBO = px->
1054 break;
1055 }
1056 default: ConsoleMessage ("do not route from a node of %s on the GPU - help!\n",
1057 stringNodeType(route->routeFromNode->_nodeType));
1058 }
1059
1060 if (!printOnce) {
1061 printf ("so, if we were to run the interp, keyVBO %d keyValueVBO %d destVBO %d keysIn %d keyValuesIn %d frac %f\n",
1062 keyVBO, keyValueVBO, destVBO, keysIn, keyValuesIn, frac);
1063 printOnce = TRUE;
1064 }
1065}
1066
1067#ifdef GL_ES_VERSION_2_0
1068static const char* interpolator_headers = " \
1069//#pragma OPENCL EXTENSION cl_khr_fp64 : enable \n\
1070#pragma OPENCL EXTENSION cl_khr_byte_addressable_store : enable \n\
1071//#pragma OPENCL EXTENSION CL_APPLE_gl_sharing : enable \n\
1072//#pragma OPENCL EXTENSION CL_KHR_gl_sharing : enable \n\
1073//#pragma OPENCL EXTENSION cl_khr_select_fprounding_mode : enable \n\
1074";
1075#else
1076
1077// this seems to be ok on AMD drivers under Linux
1078static const char* interpolator_headers = " \
1079#pragma OPENCL EXTENSION cl_khr_byte_addressable_store : enable \n\
1080//#pragma OPENCL EXTENSION cl_khr_gl_sharing : enable \n\
1081";
1082
1083#endif
1084
1085static const char* coordinateInterpolator_kernel = " \
1086/* Function prototypes */ \n \
1087int find_key (int kin, float frac, __global float *keys); \n \
1088\n \
1089int find_key (int kin, float frac, __global float *keys) { \n \
1090int counter; \n \
1091\
1092for (counter=1; counter <= kin; counter++) { \n \
1093 if (frac <keys[counter]) { \n \
1094 return counter; \n \
1095 } \n \
1096} \n \
1097return kin; /* huh? not found! */ \n \
1098} \
1099\
1100/********************************************************************************/ \n\
1101\n\
1102__kernel void compute_collide ( \n\
1103__global float *keys, /* 0 */ \n\
1104__global float *keyValues, /* 1 */ \n \
1105__global float *destVertices, /* 2 */ \n \
1106const int kin, /* 3 */ \n\
1107const int kvin, /* 4 */ \n \
1108const float frac /* 5 */ \n \
1109/* , __global float *output */ \n \
1110) { \n\
1111\n\
1112 int i_am_canadian = get_global_id(0); \n\
1113\
1114 /* get keysPerKeyValue */ \n \
1115 int kpkv = kvin/kin; \n\
1116if (i_am_canadian > kpkv) return; /* this invocation is above our bounds */ \n\
1117\
1118//output[i_am_canadian] = -999.9f; /* convert_float(get_global_id(0)); */ /* keys[kin-1]; */ \n \
1119\
1120\
1121//output[i_am_canadian] = destVertices[i_am_canadian]; \n \
1122 \
1123 /* set fraction less than or greater than keys */ \n\
1124 if (frac <= keys[0]) { \n\
1125//output[i_am_canadian] = -100.0f; \n \
1126 destVertices[i_am_canadian*3+0] = keyValues[i_am_canadian*3+0]; \n \
1127 destVertices[i_am_canadian*3+1] = keyValues[i_am_canadian*3+1]; \n \
1128 destVertices[i_am_canadian*3+2] = keyValues[i_am_canadian*3+2]; \n \
1129 } else if (frac >=keys[kin-1]) { \n \
1130//output[i_am_canadian] = 100.0f; \n\
1131 destVertices[i_am_canadian*3+0] = keyValues[(kvin - kpkv + i_am_canadian)*3+0]; \n \
1132 destVertices[i_am_canadian*3+1] = keyValues[(kvin - kpkv + i_am_canadian)*3+1]; \n \
1133 destVertices[i_am_canadian*3+2] = keyValues[(kvin - kpkv + i_am_canadian)*3+2]; \n \
1134 } else { \n \
1135 int myKey = find_key(kin,frac,keys); \n \
1136 float interval = (frac - keys[myKey-1]) / (keys[myKey] - keys[myKey-1]); \n \
1137//output[i_am_canadian] = convert_float(myKey*100+kpkv); \n \
1138\
1139 int thisone = myKey*kpkv*3; \n \
1140 int prevone = (myKey-1) * kpkv *3; \n \
1141//output[i_am_canadian] = convert_float(thisone * 100 + prevone)+interval; \n \
1142 prevone = prevone + i_am_canadian*3; \n \
1143 thisone = thisone + i_am_canadian*3; \n \
1144 destVertices[i_am_canadian*3+0] = keyValues[prevone+0] + interval*(keyValues[thisone+0]-keyValues[prevone+0]); \n \
1145 destVertices[i_am_canadian*3+1] = keyValues[prevone+1] + interval*(keyValues[thisone+1]-keyValues[prevone+1]); \n \
1146 destVertices[i_am_canadian*3+2] = keyValues[prevone+2] + interval*(keyValues[thisone+2]-keyValues[prevone+2]); \n \
1147 } \n \
1148 //output[i_am_canadian] = destVertices[i_am_canadian*3]; \n \
1149\n \
1150}";
1151
1152
1153
1154#endif //HAVE_OPENCL