Actual source code: vecviennacl.cxx
1: /*
2: Implements the sequential ViennaCL vectors.
3: */
5: #include <petscconf.h>
6: #include <petsc/private/vecimpl.h>
7: #include <../src/vec/vec/impls/dvecimpl.h>
8: #include <../src/vec/vec/impls/seq/seqviennacl/viennaclvecimpl.h>
10: #include "viennacl/linalg/inner_prod.hpp"
11: #include "viennacl/linalg/norm_1.hpp"
12: #include "viennacl/linalg/norm_2.hpp"
13: #include "viennacl/linalg/norm_inf.hpp"
15: #ifdef VIENNACL_WITH_OPENCL
16: #include "viennacl/ocl/backend.hpp"
17: #endif
19: PETSC_EXTERN PetscErrorCode VecViennaCLGetArray(Vec v, ViennaCLVector **a)
20: {
21: PetscFunctionBegin;
22: PetscCheckTypeNames(v, VECSEQVIENNACL, VECMPIVIENNACL);
23: *a = 0;
24: PetscCall(VecViennaCLCopyToGPU(v));
25: *a = ((Vec_ViennaCL *)v->spptr)->GPUarray;
26: ViennaCLWaitForGPU();
27: PetscFunctionReturn(PETSC_SUCCESS);
28: }
30: PETSC_EXTERN PetscErrorCode VecViennaCLRestoreArray(Vec v, ViennaCLVector **a)
31: {
32: PetscFunctionBegin;
33: PetscCheckTypeNames(v, VECSEQVIENNACL, VECMPIVIENNACL);
34: v->offloadmask = PETSC_OFFLOAD_GPU;
36: PetscCall(PetscObjectStateIncrease((PetscObject)v));
37: PetscFunctionReturn(PETSC_SUCCESS);
38: }
40: PETSC_EXTERN PetscErrorCode VecViennaCLGetArrayRead(Vec v, const ViennaCLVector **a)
41: {
42: PetscFunctionBegin;
43: PetscCheckTypeNames(v, VECSEQVIENNACL, VECMPIVIENNACL);
44: *a = 0;
45: PetscCall(VecViennaCLCopyToGPU(v));
46: *a = ((Vec_ViennaCL *)v->spptr)->GPUarray;
47: ViennaCLWaitForGPU();
48: PetscFunctionReturn(PETSC_SUCCESS);
49: }
51: PETSC_EXTERN PetscErrorCode VecViennaCLRestoreArrayRead(Vec v, const ViennaCLVector **a)
52: {
53: PetscFunctionBegin;
54: PetscCheckTypeNames(v, VECSEQVIENNACL, VECMPIVIENNACL);
55: PetscFunctionReturn(PETSC_SUCCESS);
56: }
58: PETSC_EXTERN PetscErrorCode VecViennaCLGetArrayWrite(Vec v, ViennaCLVector **a)
59: {
60: PetscFunctionBegin;
61: PetscCheckTypeNames(v, VECSEQVIENNACL, VECMPIVIENNACL);
62: *a = 0;
63: PetscCall(VecViennaCLAllocateCheck(v));
64: *a = ((Vec_ViennaCL *)v->spptr)->GPUarray;
65: ViennaCLWaitForGPU();
66: PetscFunctionReturn(PETSC_SUCCESS);
67: }
69: PETSC_EXTERN PetscErrorCode VecViennaCLRestoreArrayWrite(Vec v, ViennaCLVector **a)
70: {
71: PetscFunctionBegin;
72: PetscCheckTypeNames(v, VECSEQVIENNACL, VECMPIVIENNACL);
73: v->offloadmask = PETSC_OFFLOAD_GPU;
75: PetscCall(PetscObjectStateIncrease((PetscObject)v));
76: PetscFunctionReturn(PETSC_SUCCESS);
77: }
79: PETSC_EXTERN PetscErrorCode PetscViennaCLInit()
80: {
81: char string[20];
82: PetscBool flg, flg_cuda, flg_opencl, flg_openmp;
84: PetscFunctionBegin;
85: /* ViennaCL backend selection: CUDA, OpenCL, or OpenMP */
86: PetscCall(PetscOptionsGetString(NULL, NULL, "-viennacl_backend", string, sizeof(string), &flg));
87: if (flg) {
88: try {
89: PetscCall(PetscStrcasecmp(string, "cuda", &flg_cuda));
90: PetscCall(PetscStrcasecmp(string, "opencl", &flg_opencl));
91: PetscCall(PetscStrcasecmp(string, "openmp", &flg_openmp));
93: /* A default (sequential) CPU backend is always available - even if OpenMP is not enabled. */
94: if (flg_openmp) viennacl::backend::default_memory_type(viennacl::MAIN_MEMORY);
95: #if defined(PETSC_HAVE_CUDA)
96: else if (flg_cuda) viennacl::backend::default_memory_type(viennacl::CUDA_MEMORY);
97: #endif
98: #if defined(PETSC_HAVE_OPENCL)
99: else if (flg_opencl) viennacl::backend::default_memory_type(viennacl::OPENCL_MEMORY);
100: #endif
101: else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: Backend not recognized or available: %s.\n Pass -viennacl_view to see available backends for ViennaCL.", string);
102: } catch (std::exception const &ex) {
103: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
104: }
105: }
107: #if defined(PETSC_HAVE_OPENCL)
108: /* ViennaCL OpenCL device type configuration */
109: PetscCall(PetscOptionsGetString(NULL, NULL, "-viennacl_opencl_device_type", string, sizeof(string), &flg));
110: if (flg) {
111: try {
112: PetscCall(PetscStrcasecmp(string, "cpu", &flg));
113: if (flg) viennacl::ocl::set_context_device_type(0, CL_DEVICE_TYPE_CPU);
115: PetscCall(PetscStrcasecmp(string, "gpu", &flg));
116: if (flg) viennacl::ocl::set_context_device_type(0, CL_DEVICE_TYPE_GPU);
118: PetscCall(PetscStrcasecmp(string, "accelerator", &flg));
119: if (flg) viennacl::ocl::set_context_device_type(0, CL_DEVICE_TYPE_ACCELERATOR);
120: } catch (std::exception const &ex) {
121: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
122: }
123: }
124: #endif
126: /* Print available backends */
127: PetscCall(PetscOptionsHasName(NULL, NULL, "-viennacl_view", &flg));
128: if (flg) {
129: PetscCall(PetscPrintf(PETSC_COMM_WORLD, "ViennaCL backends available: "));
130: #if defined(PETSC_HAVE_CUDA)
131: PetscCall(PetscPrintf(PETSC_COMM_WORLD, "CUDA, "));
132: #endif
133: #if defined(PETSC_HAVE_OPENCL)
134: PetscCall(PetscPrintf(PETSC_COMM_WORLD, "OpenCL, "));
135: #endif
136: #if defined(PETSC_HAVE_OPENMP)
137: PetscCall(PetscPrintf(PETSC_COMM_WORLD, "OpenMP "));
138: #else
139: PetscCall(PetscPrintf(PETSC_COMM_WORLD, "OpenMP (1 thread) "));
140: #endif
141: PetscCall(PetscPrintf(PETSC_COMM_WORLD, "\n"));
143: /* Print selected backends */
144: PetscCall(PetscPrintf(PETSC_COMM_WORLD, "ViennaCL backend selected: "));
145: #if defined(PETSC_HAVE_CUDA)
146: if (viennacl::backend::default_memory_type() == viennacl::CUDA_MEMORY) PetscCall(PetscPrintf(PETSC_COMM_WORLD, "CUDA "));
147: #endif
148: #if defined(PETSC_HAVE_OPENCL)
149: if (viennacl::backend::default_memory_type() == viennacl::OPENCL_MEMORY) PetscCall(PetscPrintf(PETSC_COMM_WORLD, "OpenCL "));
150: #endif
151: #if defined(PETSC_HAVE_OPENMP)
152: if (viennacl::backend::default_memory_type() == viennacl::MAIN_MEMORY) PetscCall(PetscPrintf(PETSC_COMM_WORLD, "OpenMP "));
153: #else
154: if (viennacl::backend::default_memory_type() == viennacl::MAIN_MEMORY) PetscCall(PetscPrintf(PETSC_COMM_WORLD, "OpenMP (sequential - consider reconfiguration: --with-openmp=1) "));
155: #endif
156: PetscCall(PetscPrintf(PETSC_COMM_WORLD, "\n"));
157: }
158: PetscFunctionReturn(PETSC_SUCCESS);
159: }
161: /*
162: Allocates space for the vector array on the Host if it does not exist.
163: Does NOT change the PetscViennaCLFlag for the vector
164: Does NOT zero the ViennaCL array
165: */
166: PETSC_EXTERN PetscErrorCode VecViennaCLAllocateCheckHost(Vec v)
167: {
168: PetscScalar *array;
169: Vec_Seq *s;
170: PetscInt n = v->map->n;
172: PetscFunctionBegin;
173: s = (Vec_Seq *)v->data;
174: PetscCall(VecViennaCLAllocateCheck(v));
175: if (s->array == 0) {
176: PetscCall(PetscMalloc1(n, &array));
177: s->array = array;
178: s->array_allocated = array;
179: }
180: PetscFunctionReturn(PETSC_SUCCESS);
181: }
183: /*
184: Allocates space for the vector array on the GPU if it does not exist.
185: Does NOT change the PetscViennaCLFlag for the vector
186: Does NOT zero the ViennaCL array
188: */
189: PetscErrorCode VecViennaCLAllocateCheck(Vec v)
190: {
191: PetscFunctionBegin;
192: if (!v->spptr) {
193: try {
194: v->spptr = new Vec_ViennaCL;
195: ((Vec_ViennaCL *)v->spptr)->GPUarray_allocated = new ViennaCLVector((PetscBLASInt)v->map->n);
196: ((Vec_ViennaCL *)v->spptr)->GPUarray = ((Vec_ViennaCL *)v->spptr)->GPUarray_allocated;
198: } catch (std::exception const &ex) {
199: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
200: }
201: }
202: PetscFunctionReturn(PETSC_SUCCESS);
203: }
205: /* Copies a vector from the CPU to the GPU unless we already have an up-to-date copy on the GPU */
206: PetscErrorCode VecViennaCLCopyToGPU(Vec v)
207: {
208: PetscFunctionBegin;
209: PetscCheckTypeNames(v, VECSEQVIENNACL, VECMPIVIENNACL);
210: PetscCall(VecViennaCLAllocateCheck(v));
211: if (v->map->n > 0) {
212: if (v->offloadmask == PETSC_OFFLOAD_CPU) {
213: PetscCall(PetscLogEventBegin(VEC_ViennaCLCopyToGPU, v, 0, 0, 0));
214: try {
215: ViennaCLVector *vec = ((Vec_ViennaCL *)v->spptr)->GPUarray;
216: viennacl::fast_copy(*(PetscScalar **)v->data, *(PetscScalar **)v->data + v->map->n, vec->begin());
217: ViennaCLWaitForGPU();
218: } catch (std::exception const &ex) {
219: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
220: }
221: PetscCall(PetscLogCpuToGpu(v->map->n * sizeof(PetscScalar)));
222: PetscCall(PetscLogEventEnd(VEC_ViennaCLCopyToGPU, v, 0, 0, 0));
223: v->offloadmask = PETSC_OFFLOAD_BOTH;
224: }
225: }
226: PetscFunctionReturn(PETSC_SUCCESS);
227: }
229: /*
230: VecViennaCLCopyFromGPU - Copies a vector from the GPU to the CPU unless we already have an up-to-date copy on the CPU
231: */
232: PetscErrorCode VecViennaCLCopyFromGPU(Vec v)
233: {
234: PetscFunctionBegin;
235: PetscCheckTypeNames(v, VECSEQVIENNACL, VECMPIVIENNACL);
236: PetscCall(VecViennaCLAllocateCheckHost(v));
237: if (v->offloadmask == PETSC_OFFLOAD_GPU) {
238: PetscCall(PetscLogEventBegin(VEC_ViennaCLCopyFromGPU, v, 0, 0, 0));
239: try {
240: ViennaCLVector *vec = ((Vec_ViennaCL *)v->spptr)->GPUarray;
241: viennacl::fast_copy(vec->begin(), vec->end(), *(PetscScalar **)v->data);
242: ViennaCLWaitForGPU();
243: } catch (std::exception const &ex) {
244: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
245: }
246: PetscCall(PetscLogGpuToCpu(v->map->n * sizeof(PetscScalar)));
247: PetscCall(PetscLogEventEnd(VEC_ViennaCLCopyFromGPU, v, 0, 0, 0));
248: v->offloadmask = PETSC_OFFLOAD_BOTH;
249: }
250: PetscFunctionReturn(PETSC_SUCCESS);
251: }
253: /* Copy on CPU */
254: static PetscErrorCode VecCopy_SeqViennaCL_Private(Vec xin, Vec yin)
255: {
256: PetscScalar *ya;
257: const PetscScalar *xa;
259: PetscFunctionBegin;
260: PetscCall(VecViennaCLAllocateCheckHost(xin));
261: PetscCall(VecViennaCLAllocateCheckHost(yin));
262: if (xin != yin) {
263: PetscCall(VecGetArrayRead(xin, &xa));
264: PetscCall(VecGetArray(yin, &ya));
265: PetscCall(PetscArraycpy(ya, xa, xin->map->n));
266: PetscCall(VecRestoreArrayRead(xin, &xa));
267: PetscCall(VecRestoreArray(yin, &ya));
268: }
269: PetscFunctionReturn(PETSC_SUCCESS);
270: }
272: static PetscErrorCode VecSetRandom_SeqViennaCL_Private(Vec xin, PetscRandom r)
273: {
274: PetscInt n = xin->map->n, i;
275: PetscScalar *xx;
277: PetscFunctionBegin;
278: PetscCall(VecGetArray(xin, &xx));
279: for (i = 0; i < n; i++) PetscCall(PetscRandomGetValue(r, &xx[i]));
280: PetscCall(VecRestoreArray(xin, &xx));
281: PetscFunctionReturn(PETSC_SUCCESS);
282: }
284: static PetscErrorCode VecDestroy_SeqViennaCL_Private(Vec v)
285: {
286: Vec_Seq *vs = (Vec_Seq *)v->data;
288: PetscFunctionBegin;
289: PetscCall(PetscObjectSAWsViewOff(v));
290: PetscCall(PetscLogObjectState((PetscObject)v, "Length=%" PetscInt_FMT, v->map->n));
291: PetscCall(PetscFree(vs->array_allocated));
292: PetscCall(PetscFree(vs));
293: PetscFunctionReturn(PETSC_SUCCESS);
294: }
296: static PetscErrorCode VecResetArray_SeqViennaCL_Private(Vec vin)
297: {
298: Vec_Seq *v = (Vec_Seq *)vin->data;
300: PetscFunctionBegin;
301: v->array = v->unplacedarray;
302: v->unplacedarray = 0;
303: PetscFunctionReturn(PETSC_SUCCESS);
304: }
306: /*MC
307: VECSEQVIENNACL - VECSEQVIENNACL = "seqviennacl" - The basic sequential vector, modified to use ViennaCL
309: Options Database Keys:
310: . -vec_type seqviennacl - sets the vector type to VECSEQVIENNACL during a call to VecSetFromOptions()
312: Level: beginner
314: .seealso: `VecCreate()`, `VecSetType()`, `VecSetFromOptions()`, `VecCreateSeqWithArray()`, `VECMPI`, `VecType`, `VecCreateMPI()`, `VecCreateSeq()`
315: M*/
317: PetscErrorCode VecAYPX_SeqViennaCL(Vec yin, PetscScalar alpha, Vec xin)
318: {
319: const ViennaCLVector *xgpu;
320: ViennaCLVector *ygpu;
322: PetscFunctionBegin;
323: PetscCall(VecViennaCLGetArrayRead(xin, &xgpu));
324: PetscCall(VecViennaCLGetArray(yin, &ygpu));
325: PetscCall(PetscLogGpuTimeBegin());
326: try {
327: if (alpha != 0.0 && xin->map->n > 0) {
328: *ygpu = *xgpu + alpha * *ygpu;
329: PetscCall(PetscLogGpuFlops(2.0 * yin->map->n));
330: } else {
331: *ygpu = *xgpu;
332: }
333: ViennaCLWaitForGPU();
334: } catch (std::exception const &ex) {
335: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
336: }
337: PetscCall(PetscLogGpuTimeEnd());
338: PetscCall(VecViennaCLRestoreArrayRead(xin, &xgpu));
339: PetscCall(VecViennaCLRestoreArray(yin, &ygpu));
340: PetscFunctionReturn(PETSC_SUCCESS);
341: }
343: PetscErrorCode VecAXPY_SeqViennaCL(Vec yin, PetscScalar alpha, Vec xin)
344: {
345: const ViennaCLVector *xgpu;
346: ViennaCLVector *ygpu;
348: PetscFunctionBegin;
349: if (alpha != 0.0 && xin->map->n > 0) {
350: PetscCall(VecViennaCLGetArrayRead(xin, &xgpu));
351: PetscCall(VecViennaCLGetArray(yin, &ygpu));
352: PetscCall(PetscLogGpuTimeBegin());
353: try {
354: *ygpu += alpha * *xgpu;
355: ViennaCLWaitForGPU();
356: } catch (std::exception const &ex) {
357: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
358: }
359: PetscCall(PetscLogGpuTimeEnd());
360: PetscCall(VecViennaCLRestoreArrayRead(xin, &xgpu));
361: PetscCall(VecViennaCLRestoreArray(yin, &ygpu));
362: PetscCall(PetscLogGpuFlops(2.0 * yin->map->n));
363: }
364: PetscFunctionReturn(PETSC_SUCCESS);
365: }
367: PetscErrorCode VecPointwiseDivide_SeqViennaCL(Vec win, Vec xin, Vec yin)
368: {
369: const ViennaCLVector *xgpu, *ygpu;
370: ViennaCLVector *wgpu;
372: PetscFunctionBegin;
373: if (xin->map->n > 0) {
374: PetscCall(VecViennaCLGetArrayRead(xin, &xgpu));
375: PetscCall(VecViennaCLGetArrayRead(yin, &ygpu));
376: PetscCall(VecViennaCLGetArrayWrite(win, &wgpu));
377: PetscCall(PetscLogGpuTimeBegin());
378: try {
379: *wgpu = viennacl::linalg::element_div(*xgpu, *ygpu);
380: ViennaCLWaitForGPU();
381: } catch (std::exception const &ex) {
382: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
383: }
384: PetscCall(PetscLogGpuTimeEnd());
385: PetscCall(PetscLogGpuFlops(win->map->n));
386: PetscCall(VecViennaCLRestoreArrayRead(xin, &xgpu));
387: PetscCall(VecViennaCLRestoreArrayRead(yin, &ygpu));
388: PetscCall(VecViennaCLRestoreArrayWrite(win, &wgpu));
389: }
390: PetscFunctionReturn(PETSC_SUCCESS);
391: }
393: PetscErrorCode VecWAXPY_SeqViennaCL(Vec win, PetscScalar alpha, Vec xin, Vec yin)
394: {
395: const ViennaCLVector *xgpu, *ygpu;
396: ViennaCLVector *wgpu;
398: PetscFunctionBegin;
399: if (alpha == 0.0 && xin->map->n > 0) PetscCall(VecCopy_SeqViennaCL(yin, win));
400: else {
401: PetscCall(VecViennaCLGetArrayRead(xin, &xgpu));
402: PetscCall(VecViennaCLGetArrayRead(yin, &ygpu));
403: PetscCall(VecViennaCLGetArrayWrite(win, &wgpu));
404: PetscCall(PetscLogGpuTimeBegin());
405: if (alpha == 1.0) {
406: try {
407: *wgpu = *ygpu + *xgpu;
408: } catch (std::exception const &ex) {
409: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
410: }
411: PetscCall(PetscLogGpuFlops(win->map->n));
412: } else if (alpha == -1.0) {
413: try {
414: *wgpu = *ygpu - *xgpu;
415: } catch (std::exception const &ex) {
416: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
417: }
418: PetscCall(PetscLogGpuFlops(win->map->n));
419: } else {
420: try {
421: *wgpu = *ygpu + alpha * *xgpu;
422: } catch (std::exception const &ex) {
423: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
424: }
425: PetscCall(PetscLogGpuFlops(2 * win->map->n));
426: }
427: ViennaCLWaitForGPU();
428: PetscCall(PetscLogGpuTimeEnd());
429: PetscCall(VecViennaCLRestoreArrayRead(xin, &xgpu));
430: PetscCall(VecViennaCLRestoreArrayRead(yin, &ygpu));
431: PetscCall(VecViennaCLRestoreArrayWrite(win, &wgpu));
432: }
433: PetscFunctionReturn(PETSC_SUCCESS);
434: }
436: /*
437: * Operation x = x + sum_i alpha_i * y_i for vectors x, y_i and scalars alpha_i
438: *
439: * ViennaCL supports a fast evaluation of x += alpha * y and x += alpha * y + beta * z,
440: * hence there is an iterated application of these until the final result is obtained
441: */
442: PetscErrorCode VecMAXPY_SeqViennaCL(Vec xin, PetscInt nv, const PetscScalar *alpha, Vec *y)
443: {
444: PetscInt j;
446: PetscFunctionBegin;
447: for (j = 0; j < nv; ++j) {
448: if (j + 1 < nv) {
449: PetscCall(VecAXPBYPCZ_SeqViennaCL(xin, alpha[j], alpha[j + 1], 1.0, y[j], y[j + 1]));
450: ++j;
451: } else {
452: PetscCall(VecAXPY_SeqViennaCL(xin, alpha[j], y[j]));
453: }
454: }
455: ViennaCLWaitForGPU();
456: PetscFunctionReturn(PETSC_SUCCESS);
457: }
459: PetscErrorCode VecDot_SeqViennaCL(Vec xin, Vec yin, PetscScalar *z)
460: {
461: const ViennaCLVector *xgpu, *ygpu;
463: PetscFunctionBegin;
464: if (xin->map->n > 0) {
465: PetscCall(VecViennaCLGetArrayRead(xin, &xgpu));
466: PetscCall(VecViennaCLGetArrayRead(yin, &ygpu));
467: PetscCall(PetscLogGpuTimeBegin());
468: try {
469: *z = viennacl::linalg::inner_prod(*xgpu, *ygpu);
470: } catch (std::exception const &ex) {
471: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
472: }
473: ViennaCLWaitForGPU();
474: PetscCall(PetscLogGpuTimeEnd());
475: if (xin->map->n > 0) PetscCall(PetscLogGpuFlops(2.0 * xin->map->n - 1));
476: PetscCall(VecViennaCLRestoreArrayRead(xin, &xgpu));
477: PetscCall(VecViennaCLRestoreArrayRead(yin, &ygpu));
478: } else *z = 0.0;
479: PetscFunctionReturn(PETSC_SUCCESS);
480: }
482: /*
483: * Operation z[j] = dot(x, y[j])
484: *
485: * We use an iterated application of dot() for each j. For small ranges of j this is still faster than an allocation of extra memory in order to use gemv().
486: */
487: PetscErrorCode VecMDot_SeqViennaCL(Vec xin, PetscInt nv, const Vec yin[], PetscScalar *z)
488: {
489: PetscInt n = xin->map->n, i;
490: const ViennaCLVector *xgpu, *ygpu;
491: Vec *yyin = (Vec *)yin;
492: std::vector<viennacl::vector_base<PetscScalar> const *> ygpu_array(nv);
494: PetscFunctionBegin;
495: if (xin->map->n > 0) {
496: PetscCall(VecViennaCLGetArrayRead(xin, &xgpu));
497: for (i = 0; i < nv; i++) {
498: PetscCall(VecViennaCLGetArrayRead(yyin[i], &ygpu));
499: ygpu_array[i] = ygpu;
500: }
501: PetscCall(PetscLogGpuTimeBegin());
502: viennacl::vector_tuple<PetscScalar> y_tuple(ygpu_array);
503: ViennaCLVector result = viennacl::linalg::inner_prod(*xgpu, y_tuple);
504: viennacl::copy(result.begin(), result.end(), z);
505: for (i = 0; i < nv; i++) PetscCall(VecViennaCLRestoreArrayRead(yyin[i], &ygpu));
506: ViennaCLWaitForGPU();
507: PetscCall(PetscLogGpuTimeEnd());
508: PetscCall(VecViennaCLRestoreArrayRead(xin, &xgpu));
509: PetscCall(PetscLogGpuFlops(PetscMax(nv * (2.0 * n - 1), 0.0)));
510: } else {
511: for (i = 0; i < nv; i++) z[i] = 0.0;
512: }
513: PetscFunctionReturn(PETSC_SUCCESS);
514: }
516: static PetscErrorCode VecMTDot_SeqViennaCL(Vec xin, PetscInt nv, const Vec yin[], PetscScalar *z)
517: {
518: PetscFunctionBegin;
519: /* Since complex case is not supported at the moment, this is the same as VecMDot_SeqViennaCL */
520: PetscCall(VecMDot_SeqViennaCL(xin, nv, yin, z));
521: ViennaCLWaitForGPU();
522: PetscFunctionReturn(PETSC_SUCCESS);
523: }
525: PetscErrorCode VecSet_SeqViennaCL(Vec xin, PetscScalar alpha)
526: {
527: ViennaCLVector *xgpu;
529: PetscFunctionBegin;
530: if (xin->map->n > 0) {
531: PetscCall(VecViennaCLGetArrayWrite(xin, &xgpu));
532: PetscCall(PetscLogGpuTimeBegin());
533: try {
534: *xgpu = viennacl::scalar_vector<PetscScalar>(xgpu->size(), alpha);
535: ViennaCLWaitForGPU();
536: } catch (std::exception const &ex) {
537: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
538: }
539: PetscCall(PetscLogGpuTimeEnd());
540: PetscCall(VecViennaCLRestoreArrayWrite(xin, &xgpu));
541: }
542: PetscFunctionReturn(PETSC_SUCCESS);
543: }
545: PetscErrorCode VecScale_SeqViennaCL(Vec xin, PetscScalar alpha)
546: {
547: ViennaCLVector *xgpu;
549: PetscFunctionBegin;
550: if (alpha == 0.0 && xin->map->n > 0) {
551: PetscCall(VecSet_SeqViennaCL(xin, alpha));
552: PetscCall(PetscLogGpuFlops(xin->map->n));
553: } else if (alpha != 1.0 && xin->map->n > 0) {
554: PetscCall(VecViennaCLGetArray(xin, &xgpu));
555: PetscCall(PetscLogGpuTimeBegin());
556: try {
557: *xgpu *= alpha;
558: ViennaCLWaitForGPU();
559: } catch (std::exception const &ex) {
560: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
561: }
562: PetscCall(PetscLogGpuTimeEnd());
563: PetscCall(VecViennaCLRestoreArray(xin, &xgpu));
564: PetscCall(PetscLogGpuFlops(xin->map->n));
565: }
566: PetscFunctionReturn(PETSC_SUCCESS);
567: }
569: PetscErrorCode VecTDot_SeqViennaCL(Vec xin, Vec yin, PetscScalar *z)
570: {
571: PetscFunctionBegin;
572: /* Since complex case is not supported at the moment, this is the same as VecDot_SeqViennaCL */
573: PetscCall(VecDot_SeqViennaCL(xin, yin, z));
574: ViennaCLWaitForGPU();
575: PetscFunctionReturn(PETSC_SUCCESS);
576: }
578: PetscErrorCode VecCopy_SeqViennaCL(Vec xin, Vec yin)
579: {
580: const ViennaCLVector *xgpu;
581: ViennaCLVector *ygpu;
583: PetscFunctionBegin;
584: if (xin != yin && xin->map->n > 0) {
585: if (xin->offloadmask == PETSC_OFFLOAD_GPU) {
586: PetscCall(VecViennaCLGetArrayRead(xin, &xgpu));
587: PetscCall(VecViennaCLGetArrayWrite(yin, &ygpu));
588: PetscCall(PetscLogGpuTimeBegin());
589: try {
590: *ygpu = *xgpu;
591: ViennaCLWaitForGPU();
592: } catch (std::exception const &ex) {
593: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
594: }
595: PetscCall(PetscLogGpuTimeEnd());
596: PetscCall(VecViennaCLRestoreArrayRead(xin, &xgpu));
597: PetscCall(VecViennaCLRestoreArrayWrite(yin, &ygpu));
599: } else if (xin->offloadmask == PETSC_OFFLOAD_CPU) {
600: /* copy in CPU if we are on the CPU*/
601: PetscCall(VecCopy_SeqViennaCL_Private(xin, yin));
602: ViennaCLWaitForGPU();
603: } else if (xin->offloadmask == PETSC_OFFLOAD_BOTH) {
604: /* if xin is valid in both places, see where yin is and copy there (because it's probably where we'll want to next use it) */
605: if (yin->offloadmask == PETSC_OFFLOAD_CPU) {
606: /* copy in CPU */
607: PetscCall(VecCopy_SeqViennaCL_Private(xin, yin));
608: ViennaCLWaitForGPU();
609: } else if (yin->offloadmask == PETSC_OFFLOAD_GPU) {
610: /* copy in GPU */
611: PetscCall(VecViennaCLGetArrayRead(xin, &xgpu));
612: PetscCall(VecViennaCLGetArrayWrite(yin, &ygpu));
613: PetscCall(PetscLogGpuTimeBegin());
614: try {
615: *ygpu = *xgpu;
616: ViennaCLWaitForGPU();
617: } catch (std::exception const &ex) {
618: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
619: }
620: PetscCall(PetscLogGpuTimeEnd());
621: PetscCall(VecViennaCLRestoreArrayRead(xin, &xgpu));
622: PetscCall(VecViennaCLRestoreArrayWrite(yin, &ygpu));
623: } else if (yin->offloadmask == PETSC_OFFLOAD_BOTH) {
624: /* xin and yin are both valid in both places (or yin was unallocated before the earlier call to allocatecheck
625: default to copy in GPU (this is an arbitrary choice) */
626: PetscCall(VecViennaCLGetArrayRead(xin, &xgpu));
627: PetscCall(VecViennaCLGetArrayWrite(yin, &ygpu));
628: PetscCall(PetscLogGpuTimeBegin());
629: try {
630: *ygpu = *xgpu;
631: ViennaCLWaitForGPU();
632: } catch (std::exception const &ex) {
633: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
634: }
635: PetscCall(PetscLogGpuTimeEnd());
636: PetscCall(VecViennaCLRestoreArrayRead(xin, &xgpu));
637: PetscCall(VecViennaCLRestoreArrayWrite(yin, &ygpu));
638: } else {
639: PetscCall(VecCopy_SeqViennaCL_Private(xin, yin));
640: ViennaCLWaitForGPU();
641: }
642: }
643: }
644: PetscFunctionReturn(PETSC_SUCCESS);
645: }
647: PetscErrorCode VecSwap_SeqViennaCL(Vec xin, Vec yin)
648: {
649: ViennaCLVector *xgpu, *ygpu;
651: PetscFunctionBegin;
652: if (xin != yin && xin->map->n > 0) {
653: PetscCall(VecViennaCLGetArray(xin, &xgpu));
654: PetscCall(VecViennaCLGetArray(yin, &ygpu));
655: PetscCall(PetscLogGpuTimeBegin());
656: try {
657: viennacl::swap(*xgpu, *ygpu);
658: ViennaCLWaitForGPU();
659: } catch (std::exception const &ex) {
660: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
661: }
662: PetscCall(PetscLogGpuTimeEnd());
663: PetscCall(VecViennaCLRestoreArray(xin, &xgpu));
664: PetscCall(VecViennaCLRestoreArray(yin, &ygpu));
665: }
666: PetscFunctionReturn(PETSC_SUCCESS);
667: }
669: // y = alpha * x + beta * y
670: PetscErrorCode VecAXPBY_SeqViennaCL(Vec yin, PetscScalar alpha, PetscScalar beta, Vec xin)
671: {
672: PetscScalar a = alpha, b = beta;
673: const ViennaCLVector *xgpu;
674: ViennaCLVector *ygpu;
676: PetscFunctionBegin;
677: if (a == 0.0 && xin->map->n > 0) PetscCall(VecScale_SeqViennaCL(yin, beta));
678: else if (b == 1.0 && xin->map->n > 0) PetscCall(VecAXPY_SeqViennaCL(yin, alpha, xin));
679: else if (a == 1.0 && xin->map->n > 0) PetscCall(VecAYPX_SeqViennaCL(yin, beta, xin));
680: else if (b == 0.0 && xin->map->n > 0) {
681: PetscCall(VecViennaCLGetArrayRead(xin, &xgpu));
682: PetscCall(VecViennaCLGetArray(yin, &ygpu));
683: PetscCall(PetscLogGpuTimeBegin());
684: try {
685: *ygpu = *xgpu * alpha;
686: ViennaCLWaitForGPU();
687: } catch (std::exception const &ex) {
688: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
689: }
690: PetscCall(PetscLogGpuTimeEnd());
691: PetscCall(PetscLogGpuFlops(xin->map->n));
692: PetscCall(VecViennaCLRestoreArrayRead(xin, &xgpu));
693: PetscCall(VecViennaCLRestoreArray(yin, &ygpu));
694: } else if (xin->map->n > 0) {
695: PetscCall(VecViennaCLGetArrayRead(xin, &xgpu));
696: PetscCall(VecViennaCLGetArray(yin, &ygpu));
697: PetscCall(PetscLogGpuTimeBegin());
698: try {
699: *ygpu = *xgpu * alpha + *ygpu * beta;
700: ViennaCLWaitForGPU();
701: } catch (std::exception const &ex) {
702: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
703: }
704: PetscCall(PetscLogGpuTimeEnd());
705: PetscCall(VecViennaCLRestoreArrayRead(xin, &xgpu));
706: PetscCall(VecViennaCLRestoreArray(yin, &ygpu));
707: PetscCall(PetscLogGpuFlops(3.0 * xin->map->n));
708: }
709: PetscFunctionReturn(PETSC_SUCCESS);
710: }
712: /* operation z = alpha * x + beta *y + gamma *z*/
713: PetscErrorCode VecAXPBYPCZ_SeqViennaCL(Vec zin, PetscScalar alpha, PetscScalar beta, PetscScalar gamma, Vec xin, Vec yin)
714: {
715: PetscInt n = zin->map->n;
716: const ViennaCLVector *xgpu, *ygpu;
717: ViennaCLVector *zgpu;
719: PetscFunctionBegin;
720: PetscCall(VecViennaCLGetArrayRead(xin, &xgpu));
721: PetscCall(VecViennaCLGetArrayRead(yin, &ygpu));
722: PetscCall(VecViennaCLGetArray(zin, &zgpu));
723: if (alpha == 0.0 && xin->map->n > 0) {
724: PetscCall(PetscLogGpuTimeBegin());
725: try {
726: if (beta == 0.0) {
727: *zgpu = gamma * *zgpu;
728: ViennaCLWaitForGPU();
729: PetscCall(PetscLogGpuFlops(1.0 * n));
730: } else if (gamma == 0.0) {
731: *zgpu = beta * *ygpu;
732: ViennaCLWaitForGPU();
733: PetscCall(PetscLogGpuFlops(1.0 * n));
734: } else {
735: *zgpu = beta * *ygpu + gamma * *zgpu;
736: ViennaCLWaitForGPU();
737: PetscCall(PetscLogGpuFlops(3.0 * n));
738: }
739: } catch (std::exception const &ex) {
740: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
741: }
742: PetscCall(PetscLogGpuTimeEnd());
743: PetscCall(PetscLogGpuFlops(3.0 * n));
744: } else if (beta == 0.0 && xin->map->n > 0) {
745: PetscCall(PetscLogGpuTimeBegin());
746: try {
747: if (gamma == 0.0) {
748: *zgpu = alpha * *xgpu;
749: ViennaCLWaitForGPU();
750: PetscCall(PetscLogGpuFlops(1.0 * n));
751: } else {
752: *zgpu = alpha * *xgpu + gamma * *zgpu;
753: ViennaCLWaitForGPU();
754: PetscCall(PetscLogGpuFlops(3.0 * n));
755: }
756: } catch (std::exception const &ex) {
757: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
758: }
759: PetscCall(PetscLogGpuTimeEnd());
760: } else if (gamma == 0.0 && xin->map->n > 0) {
761: PetscCall(PetscLogGpuTimeBegin());
762: try {
763: *zgpu = alpha * *xgpu + beta * *ygpu;
764: ViennaCLWaitForGPU();
765: } catch (std::exception const &ex) {
766: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
767: }
768: PetscCall(PetscLogGpuTimeEnd());
769: PetscCall(PetscLogGpuFlops(3.0 * n));
770: } else if (xin->map->n > 0) {
771: PetscCall(PetscLogGpuTimeBegin());
772: try {
773: /* Split operation into two steps. This is not completely ideal, but avoids temporaries (which are far worse) */
774: if (gamma != 1.0) *zgpu *= gamma;
775: *zgpu += alpha * *xgpu + beta * *ygpu;
776: ViennaCLWaitForGPU();
777: } catch (std::exception const &ex) {
778: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
779: }
780: PetscCall(PetscLogGpuTimeEnd());
781: PetscCall(VecViennaCLRestoreArray(zin, &zgpu));
782: PetscCall(VecViennaCLRestoreArrayRead(xin, &xgpu));
783: PetscCall(VecViennaCLRestoreArrayRead(yin, &ygpu));
784: PetscCall(PetscLogGpuFlops(5.0 * n));
785: }
786: PetscFunctionReturn(PETSC_SUCCESS);
787: }
789: PetscErrorCode VecPointwiseMult_SeqViennaCL(Vec win, Vec xin, Vec yin)
790: {
791: PetscInt n = win->map->n;
792: const ViennaCLVector *xgpu, *ygpu;
793: ViennaCLVector *wgpu;
795: PetscFunctionBegin;
796: if (xin->map->n > 0) {
797: PetscCall(VecViennaCLGetArrayRead(xin, &xgpu));
798: PetscCall(VecViennaCLGetArrayRead(yin, &ygpu));
799: PetscCall(VecViennaCLGetArray(win, &wgpu));
800: PetscCall(PetscLogGpuTimeBegin());
801: try {
802: *wgpu = viennacl::linalg::element_prod(*xgpu, *ygpu);
803: ViennaCLWaitForGPU();
804: } catch (std::exception const &ex) {
805: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
806: }
807: PetscCall(PetscLogGpuTimeEnd());
808: PetscCall(VecViennaCLRestoreArrayRead(xin, &xgpu));
809: PetscCall(VecViennaCLRestoreArrayRead(yin, &ygpu));
810: PetscCall(VecViennaCLRestoreArray(win, &wgpu));
811: PetscCall(PetscLogGpuFlops(n));
812: }
813: PetscFunctionReturn(PETSC_SUCCESS);
814: }
816: PetscErrorCode VecNorm_SeqViennaCL(Vec xin, NormType type, PetscReal *z)
817: {
818: PetscInt n = xin->map->n;
819: PetscBLASInt bn;
820: const ViennaCLVector *xgpu;
822: PetscFunctionBegin;
823: if (xin->map->n > 0) {
824: PetscCall(PetscBLASIntCast(n, &bn));
825: PetscCall(VecViennaCLGetArrayRead(xin, &xgpu));
826: if (type == NORM_2 || type == NORM_FROBENIUS) {
827: PetscCall(PetscLogGpuTimeBegin());
828: try {
829: *z = viennacl::linalg::norm_2(*xgpu);
830: ViennaCLWaitForGPU();
831: } catch (std::exception const &ex) {
832: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
833: }
834: PetscCall(PetscLogGpuTimeEnd());
835: PetscCall(PetscLogGpuFlops(PetscMax(2.0 * n - 1, 0.0)));
836: } else if (type == NORM_INFINITY) {
837: PetscCall(PetscLogGpuTimeBegin());
838: try {
839: *z = viennacl::linalg::norm_inf(*xgpu);
840: ViennaCLWaitForGPU();
841: } catch (std::exception const &ex) {
842: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
843: }
844: PetscCall(PetscLogGpuTimeEnd());
845: } else if (type == NORM_1) {
846: PetscCall(PetscLogGpuTimeBegin());
847: try {
848: *z = viennacl::linalg::norm_1(*xgpu);
849: ViennaCLWaitForGPU();
850: } catch (std::exception const &ex) {
851: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
852: }
853: PetscCall(PetscLogGpuTimeEnd());
854: PetscCall(PetscLogGpuFlops(PetscMax(n - 1.0, 0.0)));
855: } else if (type == NORM_1_AND_2) {
856: PetscCall(PetscLogGpuTimeBegin());
857: try {
858: *z = viennacl::linalg::norm_1(*xgpu);
859: *(z + 1) = viennacl::linalg::norm_2(*xgpu);
860: ViennaCLWaitForGPU();
861: } catch (std::exception const &ex) {
862: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
863: }
864: PetscCall(PetscLogGpuTimeEnd());
865: PetscCall(PetscLogGpuFlops(PetscMax(2.0 * n - 1, 0.0)));
866: PetscCall(PetscLogGpuFlops(PetscMax(n - 1.0, 0.0)));
867: }
868: PetscCall(VecViennaCLRestoreArrayRead(xin, &xgpu));
869: } else if (type == NORM_1_AND_2) {
870: *z = 0.0;
871: *(z + 1) = 0.0;
872: } else *z = 0.0;
873: PetscFunctionReturn(PETSC_SUCCESS);
874: }
876: PetscErrorCode VecSetRandom_SeqViennaCL(Vec xin, PetscRandom r)
877: {
878: PetscFunctionBegin;
879: PetscCall(VecSetRandom_SeqViennaCL_Private(xin, r));
880: xin->offloadmask = PETSC_OFFLOAD_CPU;
881: PetscFunctionReturn(PETSC_SUCCESS);
882: }
884: PetscErrorCode VecResetArray_SeqViennaCL(Vec vin)
885: {
886: PetscFunctionBegin;
887: PetscCheckTypeNames(vin, VECSEQVIENNACL, VECMPIVIENNACL);
888: PetscCall(VecViennaCLCopyFromGPU(vin));
889: PetscCall(VecResetArray_SeqViennaCL_Private(vin));
890: vin->offloadmask = PETSC_OFFLOAD_CPU;
891: PetscFunctionReturn(PETSC_SUCCESS);
892: }
894: PetscErrorCode VecPlaceArray_SeqViennaCL(Vec vin, const PetscScalar *a)
895: {
896: PetscFunctionBegin;
897: PetscCheckTypeNames(vin, VECSEQVIENNACL, VECMPIVIENNACL);
898: PetscCall(VecViennaCLCopyFromGPU(vin));
899: PetscCall(VecPlaceArray_Seq(vin, a));
900: vin->offloadmask = PETSC_OFFLOAD_CPU;
901: PetscFunctionReturn(PETSC_SUCCESS);
902: }
904: PetscErrorCode VecReplaceArray_SeqViennaCL(Vec vin, const PetscScalar *a)
905: {
906: PetscFunctionBegin;
907: PetscCheckTypeNames(vin, VECSEQVIENNACL, VECMPIVIENNACL);
908: PetscCall(VecViennaCLCopyFromGPU(vin));
909: PetscCall(VecReplaceArray_Seq(vin, a));
910: vin->offloadmask = PETSC_OFFLOAD_CPU;
911: PetscFunctionReturn(PETSC_SUCCESS);
912: }
914: /*@C
915: VecCreateSeqViennaCL - Creates a standard, sequential array-style vector.
917: Collective
919: Input Parameters:
920: + comm - the communicator, should be PETSC_COMM_SELF
921: - n - the vector length
923: Output Parameter:
924: . v - the vector
926: Notes:
927: Use VecDuplicate() or VecDuplicateVecs() to form additional vectors of the
928: same type as an existing vector.
930: Level: intermediate
932: .seealso: `VecCreateMPI()`, `VecCreate()`, `VecDuplicate()`, `VecDuplicateVecs()`, `VecCreateGhost()`
933: @*/
934: PetscErrorCode VecCreateSeqViennaCL(MPI_Comm comm, PetscInt n, Vec *v)
935: {
936: PetscFunctionBegin;
937: PetscCall(VecCreate(comm, v));
938: PetscCall(VecSetSizes(*v, n, n));
939: PetscCall(VecSetType(*v, VECSEQVIENNACL));
940: PetscFunctionReturn(PETSC_SUCCESS);
941: }
943: /*@C
944: VecCreateSeqViennaCLWithArray - Creates a viennacl sequential array-style vector,
945: where the user provides the array space to store the vector values.
947: Collective
949: Input Parameters:
950: + comm - the communicator, should be PETSC_COMM_SELF
951: . bs - the block size
952: . n - the vector length
953: - array - viennacl array where the vector elements are to be stored.
955: Output Parameter:
956: . V - the vector
958: Notes:
959: Use VecDuplicate() or VecDuplicateVecs() to form additional vectors of the
960: same type as an existing vector.
962: If the user-provided array is NULL, then VecViennaCLPlaceArray() can be used
963: at a later stage to SET the array for storing the vector values.
965: PETSc does NOT free the array when the vector is destroyed via VecDestroy().
966: The user should not free the array until the vector is destroyed.
968: Level: intermediate
970: .seealso: `VecCreateMPIViennaCLWithArray()`, `VecCreate()`, `VecDuplicate()`, `VecDuplicateVecs()`,
971: `VecCreateGhost()`, `VecCreateSeq()`, `VecCUDAPlaceArray()`, `VecCreateSeqWithArray()`,
972: `VecCreateMPIWithArray()`
973: @*/
974: PETSC_EXTERN PetscErrorCode VecCreateSeqViennaCLWithArray(MPI_Comm comm, PetscInt bs, PetscInt n, const ViennaCLVector *array, Vec *V) PeNS
975: {
976: PetscMPIInt size;
978: PetscFunctionBegin;
979: PetscCall(VecCreate(comm, V));
980: PetscCall(VecSetSizes(*V, n, n));
981: PetscCall(VecSetBlockSize(*V, bs));
982: PetscCallMPI(MPI_Comm_size(comm, &size));
983: PetscCheck(size <= 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot create VECSEQ on more than one process");
984: PetscCall(VecCreate_SeqViennaCL_Private(*V, array));
985: PetscFunctionReturn(PETSC_SUCCESS);
986: }
988: /*@C
989: VecCreateSeqViennaCLWithArrays - Creates a ViennaCL sequential vector, where
990: the user provides the array space to store the vector values.
992: Collective
994: Input Parameters:
995: + comm - the communicator, should be PETSC_COMM_SELF
996: . bs - the block size
997: . n - the vector length
998: . cpuarray - CPU memory where the vector elements are to be stored.
999: - viennaclvec - ViennaCL vector where the Vec entries are to be stored on the device.
1001: Output Parameter:
1002: . V - the vector
1004: Notes:
1005: If both cpuarray and viennaclvec are provided, the caller must ensure that
1006: the provided arrays have identical values.
1008: PETSc does NOT free the provided arrays when the vector is destroyed via
1009: VecDestroy(). The user should not free the array until the vector is
1010: destroyed.
1012: Level: intermediate
1014: .seealso: `VecCreateMPIViennaCLWithArrays()`, `VecCreate()`, `VecCreateSeqWithArray()`,
1015: `VecViennaCLPlaceArray()`, `VecPlaceArray()`, `VecCreateSeqCUDAWithArrays()`,
1016: `VecViennaCLAllocateCheckHost()`
1017: @*/
1018: PetscErrorCode VecCreateSeqViennaCLWithArrays(MPI_Comm comm, PetscInt bs, PetscInt n, const PetscScalar cpuarray[], const ViennaCLVector *viennaclvec, Vec *V) PeNS
1019: {
1020: PetscMPIInt size;
1022: PetscFunctionBegin;
1023: PetscCallMPI(MPI_Comm_size(comm, &size));
1024: PetscCheck(size <= 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot create VECSEQ on more than one process");
1026: // set V's viennaclvec to be viennaclvec, do not allocate memory on host yet.
1027: PetscCall(VecCreateSeqViennaCLWithArray(comm, bs, n, viennaclvec, V));
1029: if (cpuarray && viennaclvec) {
1030: Vec_Seq *s = (Vec_Seq *)((*V)->data);
1031: s->array = (PetscScalar *)cpuarray;
1032: (*V)->offloadmask = PETSC_OFFLOAD_BOTH;
1033: } else if (cpuarray) {
1034: Vec_Seq *s = (Vec_Seq *)((*V)->data);
1035: s->array = (PetscScalar *)cpuarray;
1036: (*V)->offloadmask = PETSC_OFFLOAD_CPU;
1037: } else if (viennaclvec) {
1038: (*V)->offloadmask = PETSC_OFFLOAD_GPU;
1039: } else {
1040: (*V)->offloadmask = PETSC_OFFLOAD_UNALLOCATED;
1041: }
1042: PetscFunctionReturn(PETSC_SUCCESS);
1043: }
1045: /*@C
1046: VecViennaCLPlaceArray - Replace the viennacl vector in a Vec with
1047: the one provided by the user. This is useful to avoid a copy.
1049: Not Collective
1051: Input Parameters:
1052: + vin - the vector
1053: - a - the ViennaCL vector
1055: Notes:
1056: You can return to the original viennacl vector with a call to `VecViennaCLResetArray()`.
1057: It is not possible to use `VecViennaCLPlaceArray()` and `VecPlaceArray()` at the same time on
1058: the same vector.
1060: Level: intermediate
1062: .seealso: `VecPlaceArray()`, `VecSetValues()`, `VecViennaCLResetArray()`,
1063: `VecCUDAPlaceArray()`,
1065: @*/
1066: PETSC_EXTERN PetscErrorCode VecViennaCLPlaceArray(Vec vin, const ViennaCLVector *a)
1067: {
1068: PetscFunctionBegin;
1069: PetscCheckTypeNames(vin, VECSEQVIENNACL, VECMPIVIENNACL);
1070: PetscCall(VecViennaCLCopyToGPU(vin));
1071: PetscCheck(!((Vec_Seq *)vin->data)->unplacedarray, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "VecViennaCLPlaceArray()/VecPlaceArray() was already called on this vector, without a call to VecViennaCLResetArray()/VecResetArray()");
1072: ((Vec_Seq *)vin->data)->unplacedarray = (PetscScalar *)((Vec_ViennaCL *)vin->spptr)->GPUarray; /* save previous GPU array so reset can bring it back */
1073: ((Vec_ViennaCL *)vin->spptr)->GPUarray = (ViennaCLVector *)a;
1074: vin->offloadmask = PETSC_OFFLOAD_GPU;
1075: PetscCall(PetscObjectStateIncrease((PetscObject)vin));
1076: PetscFunctionReturn(PETSC_SUCCESS);
1077: }
1079: /*@C
1080: VecViennaCLResetArray - Resets a vector to use its default memory. Call this
1081: after the use of VecViennaCLPlaceArray().
1083: Not Collective
1085: Input Parameters:
1086: . vin - the vector
1088: Level: developer
1090: .seealso: `VecViennaCLPlaceArray()`, `VecResetArray()`, `VecCUDAResetArray()`, `VecPlaceArray()`
1091: @*/
1092: PETSC_EXTERN PetscErrorCode VecViennaCLResetArray(Vec vin)
1093: {
1094: PetscFunctionBegin;
1095: PetscCheckTypeNames(vin, VECSEQVIENNACL, VECMPIVIENNACL);
1096: PetscCall(VecViennaCLCopyToGPU(vin));
1097: ((Vec_ViennaCL *)vin->spptr)->GPUarray = (ViennaCLVector *)((Vec_Seq *)vin->data)->unplacedarray;
1098: ((Vec_Seq *)vin->data)->unplacedarray = 0;
1099: vin->offloadmask = PETSC_OFFLOAD_GPU;
1100: PetscCall(PetscObjectStateIncrease((PetscObject)vin));
1101: PetscFunctionReturn(PETSC_SUCCESS);
1102: }
1104: /* VecDotNorm2 - computes the inner product of two vectors and the 2-norm squared of the second vector
1105: *
1106: * Simply reuses VecDot() and VecNorm(). Performance improvement through custom kernel (kernel generator) possible.
1107: */
1108: PetscErrorCode VecDotNorm2_SeqViennaCL(Vec s, Vec t, PetscScalar *dp, PetscScalar *nm)
1109: {
1110: PetscFunctionBegin;
1111: PetscCall(VecDot_SeqViennaCL(s, t, dp));
1112: PetscCall(VecNorm_SeqViennaCL(t, NORM_2, nm));
1113: *nm *= *nm; //squared norm required
1114: PetscFunctionReturn(PETSC_SUCCESS);
1115: }
1117: PetscErrorCode VecDuplicate_SeqViennaCL(Vec win, Vec *V)
1118: {
1119: PetscFunctionBegin;
1120: PetscCall(VecCreateSeqViennaCL(PetscObjectComm((PetscObject)win), win->map->n, V));
1121: PetscCall(PetscLayoutReference(win->map, &(*V)->map));
1122: PetscCall(PetscObjectListDuplicate(((PetscObject)win)->olist, &((PetscObject)*V)->olist));
1123: PetscCall(PetscFunctionListDuplicate(((PetscObject)win)->qlist, &((PetscObject)*V)->qlist));
1124: (*V)->stash.ignorenegidx = win->stash.ignorenegidx;
1125: PetscFunctionReturn(PETSC_SUCCESS);
1126: }
1128: PetscErrorCode VecDestroy_SeqViennaCL(Vec v)
1129: {
1130: PetscFunctionBegin;
1131: try {
1132: if (v->spptr) {
1133: delete ((Vec_ViennaCL *)v->spptr)->GPUarray_allocated;
1134: delete (Vec_ViennaCL *)v->spptr;
1135: }
1136: } catch (char *ex) {
1137: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex);
1138: }
1139: PetscCall(VecDestroy_SeqViennaCL_Private(v));
1140: PetscFunctionReturn(PETSC_SUCCESS);
1141: }
1143: PetscErrorCode VecGetArray_SeqViennaCL(Vec v, PetscScalar **a)
1144: {
1145: PetscFunctionBegin;
1146: if (v->offloadmask == PETSC_OFFLOAD_GPU) {
1147: PetscCall(VecViennaCLCopyFromGPU(v));
1148: } else {
1149: PetscCall(VecViennaCLAllocateCheckHost(v));
1150: }
1151: *a = *((PetscScalar **)v->data);
1152: PetscFunctionReturn(PETSC_SUCCESS);
1153: }
1155: PetscErrorCode VecRestoreArray_SeqViennaCL(Vec v, PetscScalar **a)
1156: {
1157: PetscFunctionBegin;
1158: v->offloadmask = PETSC_OFFLOAD_CPU;
1159: PetscFunctionReturn(PETSC_SUCCESS);
1160: }
1162: PetscErrorCode VecGetArrayWrite_SeqViennaCL(Vec v, PetscScalar **a)
1163: {
1164: PetscFunctionBegin;
1165: PetscCall(VecViennaCLAllocateCheckHost(v));
1166: *a = *((PetscScalar **)v->data);
1167: PetscFunctionReturn(PETSC_SUCCESS);
1168: }
1170: static PetscErrorCode VecBindToCPU_SeqAIJViennaCL(Vec V, PetscBool flg)
1171: {
1172: PetscFunctionBegin;
1173: V->boundtocpu = flg;
1174: if (flg) {
1175: PetscCall(VecViennaCLCopyFromGPU(V));
1176: V->offloadmask = PETSC_OFFLOAD_CPU; /* since the CPU code will likely change values in the vector */
1177: V->ops->dot = VecDot_Seq;
1178: V->ops->norm = VecNorm_Seq;
1179: V->ops->tdot = VecTDot_Seq;
1180: V->ops->scale = VecScale_Seq;
1181: V->ops->copy = VecCopy_Seq;
1182: V->ops->set = VecSet_Seq;
1183: V->ops->swap = VecSwap_Seq;
1184: V->ops->axpy = VecAXPY_Seq;
1185: V->ops->axpby = VecAXPBY_Seq;
1186: V->ops->axpbypcz = VecAXPBYPCZ_Seq;
1187: V->ops->pointwisemult = VecPointwiseMult_Seq;
1188: V->ops->pointwisedivide = VecPointwiseDivide_Seq;
1189: V->ops->setrandom = VecSetRandom_Seq;
1190: V->ops->dot_local = VecDot_Seq;
1191: V->ops->tdot_local = VecTDot_Seq;
1192: V->ops->norm_local = VecNorm_Seq;
1193: V->ops->mdot_local = VecMDot_Seq;
1194: V->ops->mtdot_local = VecMTDot_Seq;
1195: V->ops->maxpy = VecMAXPY_Seq;
1196: V->ops->mdot = VecMDot_Seq;
1197: V->ops->mtdot = VecMTDot_Seq;
1198: V->ops->aypx = VecAYPX_Seq;
1199: V->ops->waxpy = VecWAXPY_Seq;
1200: V->ops->dotnorm2 = NULL;
1201: V->ops->placearray = VecPlaceArray_Seq;
1202: V->ops->replacearray = VecReplaceArray_Seq;
1203: V->ops->resetarray = VecResetArray_Seq;
1204: V->ops->duplicate = VecDuplicate_Seq;
1205: } else {
1206: V->ops->dot = VecDot_SeqViennaCL;
1207: V->ops->norm = VecNorm_SeqViennaCL;
1208: V->ops->tdot = VecTDot_SeqViennaCL;
1209: V->ops->scale = VecScale_SeqViennaCL;
1210: V->ops->copy = VecCopy_SeqViennaCL;
1211: V->ops->set = VecSet_SeqViennaCL;
1212: V->ops->swap = VecSwap_SeqViennaCL;
1213: V->ops->axpy = VecAXPY_SeqViennaCL;
1214: V->ops->axpby = VecAXPBY_SeqViennaCL;
1215: V->ops->axpbypcz = VecAXPBYPCZ_SeqViennaCL;
1216: V->ops->pointwisemult = VecPointwiseMult_SeqViennaCL;
1217: V->ops->pointwisedivide = VecPointwiseDivide_SeqViennaCL;
1218: V->ops->setrandom = VecSetRandom_SeqViennaCL;
1219: V->ops->dot_local = VecDot_SeqViennaCL;
1220: V->ops->tdot_local = VecTDot_SeqViennaCL;
1221: V->ops->norm_local = VecNorm_SeqViennaCL;
1222: V->ops->mdot_local = VecMDot_SeqViennaCL;
1223: V->ops->mtdot_local = VecMTDot_SeqViennaCL;
1224: V->ops->maxpy = VecMAXPY_SeqViennaCL;
1225: V->ops->mdot = VecMDot_SeqViennaCL;
1226: V->ops->mtdot = VecMTDot_SeqViennaCL;
1227: V->ops->aypx = VecAYPX_SeqViennaCL;
1228: V->ops->waxpy = VecWAXPY_SeqViennaCL;
1229: V->ops->dotnorm2 = VecDotNorm2_SeqViennaCL;
1230: V->ops->placearray = VecPlaceArray_SeqViennaCL;
1231: V->ops->replacearray = VecReplaceArray_SeqViennaCL;
1232: V->ops->resetarray = VecResetArray_SeqViennaCL;
1233: V->ops->destroy = VecDestroy_SeqViennaCL;
1234: V->ops->duplicate = VecDuplicate_SeqViennaCL;
1235: V->ops->getarraywrite = VecGetArrayWrite_SeqViennaCL;
1236: V->ops->getarray = VecGetArray_SeqViennaCL;
1237: V->ops->restorearray = VecRestoreArray_SeqViennaCL;
1238: }
1239: V->ops->duplicatevecs = VecDuplicateVecs_Default;
1240: PetscFunctionReturn(PETSC_SUCCESS);
1241: }
1243: PETSC_EXTERN PetscErrorCode VecCreate_SeqViennaCL(Vec V)
1244: {
1245: PetscMPIInt size;
1247: PetscFunctionBegin;
1248: PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)V), &size));
1249: PetscCheck(size <= 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot create VECSEQVIENNACL on more than one process");
1250: PetscCall(VecCreate_Seq_Private(V, 0));
1251: PetscCall(PetscObjectChangeTypeName((PetscObject)V, VECSEQVIENNACL));
1253: PetscCall(VecBindToCPU_SeqAIJViennaCL(V, PETSC_FALSE));
1254: V->ops->bindtocpu = VecBindToCPU_SeqAIJViennaCL;
1256: PetscCall(VecViennaCLAllocateCheck(V));
1257: PetscCall(VecSet_SeqViennaCL(V, 0.0));
1258: PetscFunctionReturn(PETSC_SUCCESS);
1259: }
1261: /*@C
1262: VecViennaCLGetCLContext - Get the OpenCL context in which the Vec resides.
1264: Caller should cast (*ctx) to (const cl_context). Caller is responsible for
1265: invoking clReleaseContext().
1267: Input Parameter:
1268: . v - the vector
1270: Output Parameter:
1271: . ctx - pointer to the underlying CL context
1273: Level: intermediate
1275: .seealso: `VecViennaCLGetCLQueue()`, `VecViennaCLGetCLMemRead()`
1276: @*/
1277: PETSC_EXTERN PetscErrorCode VecViennaCLGetCLContext(Vec v, PETSC_UINTPTR_T *ctx)
1278: {
1279: #if !defined(PETSC_HAVE_OPENCL)
1280: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "PETSc must be configured with --with-opencl to get the associated cl_context.");
1281: #else
1283: PetscFunctionBegin;
1284: PetscCheckTypeNames(v, VECSEQVIENNACL, VECMPIVIENNACL);
1285: const ViennaCLVector *v_vcl;
1286: PetscCall(VecViennaCLGetArrayRead(v, &v_vcl));
1287: try {
1288: viennacl::ocl::context vcl_ctx = v_vcl->handle().opencl_handle().context();
1289: const cl_context ocl_ctx = vcl_ctx.handle().get();
1290: clRetainContext(ocl_ctx);
1291: *ctx = (PETSC_UINTPTR_T)ocl_ctx;
1292: } catch (std::exception const &ex) {
1293: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
1294: }
1295: PetscFunctionReturn(PETSC_SUCCESS);
1296: #endif
1297: }
1299: /*@C
1300: VecViennaCLGetCLQueue - Get the OpenCL command queue to which all
1301: operations of the Vec are enqueued.
1303: Caller should cast (*queue) to (const cl_command_queue). Caller is
1304: responsible for invoking clReleaseCommandQueue().
1306: Input Parameter:
1307: . v - the vector
1309: Output Parameter:
1310: . queue - pointer to the CL command queue
1312: Level: intermediate
1314: .seealso: `VecViennaCLGetCLContext()`, `VecViennaCLGetCLMemRead()`
1315: @*/
1316: PETSC_EXTERN PetscErrorCode VecViennaCLGetCLQueue(Vec v, PETSC_UINTPTR_T *queue)
1317: {
1318: #if !defined(PETSC_HAVE_OPENCL)
1319: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "PETSc must be configured with --with-opencl to get the associated cl_command_queue.");
1320: #else
1321: PetscFunctionBegin;
1322: PetscCheckTypeNames(v, VECSEQVIENNACL, VECMPIVIENNACL);
1323: const ViennaCLVector *v_vcl;
1324: PetscCall(VecViennaCLGetArrayRead(v, &v_vcl));
1325: try {
1326: viennacl::ocl::context vcl_ctx = v_vcl->handle().opencl_handle().context();
1327: const viennacl::ocl::command_queue &vcl_queue = vcl_ctx.current_queue();
1328: const cl_command_queue ocl_queue = vcl_queue.handle().get();
1329: clRetainCommandQueue(ocl_queue);
1330: *queue = (PETSC_UINTPTR_T)ocl_queue;
1331: } catch (std::exception const &ex) {
1332: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
1333: }
1334: PetscFunctionReturn(PETSC_SUCCESS);
1335: #endif
1336: }
1338: /*@C
1339: VecViennaCLGetCLMemRead - Provides access to the CL buffer inside a Vec.
1341: Caller should cast (*mem) to (const cl_mem). Caller is responsible for
1342: invoking clReleaseMemObject().
1344: Input Parameter:
1345: . v - the vector
1347: Output Parameter:
1348: . mem - pointer to the device buffer
1350: Level: intermediate
1352: .seealso: `VecViennaCLGetCLContext()`, `VecViennaCLGetCLMemWrite()`
1353: @*/
1354: PETSC_EXTERN PetscErrorCode VecViennaCLGetCLMemRead(Vec v, PETSC_UINTPTR_T *mem)
1355: {
1356: #if !defined(PETSC_HAVE_OPENCL)
1357: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "PETSc must be configured with --with-opencl to get a Vec's cl_mem");
1358: #else
1359: PetscFunctionBegin;
1360: PetscCheckTypeNames(v, VECSEQVIENNACL, VECMPIVIENNACL);
1361: const ViennaCLVector *v_vcl;
1362: PetscCall(VecViennaCLGetArrayRead(v, &v_vcl));
1363: try {
1364: const cl_mem ocl_mem = v_vcl->handle().opencl_handle().get();
1365: clRetainMemObject(ocl_mem);
1366: *mem = (PETSC_UINTPTR_T)ocl_mem;
1367: } catch (std::exception const &ex) {
1368: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
1369: }
1370: PetscFunctionReturn(PETSC_SUCCESS);
1371: #endif
1372: }
1374: /*@C
1375: VecViennaCLGetCLMemWrite - Provides access to the CL buffer inside a Vec.
1377: Caller should cast (*mem) to (const cl_mem). Caller is responsible for
1378: invoking clReleaseMemObject().
1380: The device pointer has to be released by calling
1381: VecViennaCLRestoreCLMemWrite(). Upon restoring the vector data the data on
1382: the host will be marked as out of date. A subsequent access of the host data
1383: will thus incur a data transfer from the device to the host.
1385: Input Parameter:
1386: . v - the vector
1388: Output Parameter:
1389: . mem - pointer to the device buffer
1391: Level: intermediate
1393: .seealso: `VecViennaCLGetCLContext()`, `VecViennaCLRestoreCLMemWrite()`
1394: @*/
1395: PETSC_EXTERN PetscErrorCode VecViennaCLGetCLMemWrite(Vec v, PETSC_UINTPTR_T *mem)
1396: {
1397: #if !defined(PETSC_HAVE_OPENCL)
1398: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "PETSc must be configured with --with-opencl to get a Vec's cl_mem");
1399: #else
1400: PetscFunctionBegin;
1401: PetscCheckTypeNames(v, VECSEQVIENNACL, VECMPIVIENNACL);
1402: ViennaCLVector *v_vcl;
1403: PetscCall(VecViennaCLGetArrayWrite(v, &v_vcl));
1404: try {
1405: const cl_mem ocl_mem = v_vcl->handle().opencl_handle().get();
1406: clRetainMemObject(ocl_mem);
1407: *mem = (PETSC_UINTPTR_T)ocl_mem;
1408: } catch (std::exception const &ex) {
1409: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
1410: }
1411: PetscFunctionReturn(PETSC_SUCCESS);
1412: #endif
1413: }
1415: /*@C
1416: VecViennaCLRestoreCLMemWrite - Restores a CL buffer pointer previously
1417: acquired with VecViennaCLGetCLMemWrite().
1419: This marks the host data as out of date. Subsequent access to the
1420: vector data on the host side with for instance VecGetArray() incurs a
1421: data transfer.
1423: Input Parameter:
1424: . v - the vector
1426: Level: intermediate
1428: .seealso: `VecViennaCLGetCLContext()`, `VecViennaCLGetCLMemWrite()`
1429: @*/
1430: PETSC_EXTERN PetscErrorCode VecViennaCLRestoreCLMemWrite(Vec v)
1431: {
1432: #if !defined(PETSC_HAVE_OPENCL)
1433: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "PETSc must be configured with --with-opencl to restore a Vec's cl_mem");
1434: #else
1435: PetscFunctionBegin;
1436: PetscCheckTypeNames(v, VECSEQVIENNACL, VECMPIVIENNACL);
1437: PetscCall(VecViennaCLRestoreArrayWrite(v, NULL));
1438: PetscFunctionReturn(PETSC_SUCCESS);
1439: #endif
1440: }
1442: /*@C
1443: VecViennaCLGetCLMem - Provides access to the CL buffer inside a Vec.
1445: Caller should cast (*mem) to (const cl_mem). Caller is responsible for
1446: invoking clReleaseMemObject().
1448: The device pointer has to be released by calling VecViennaCLRestoreCLMem().
1449: Upon restoring the vector data the data on the host will be marked as out of
1450: date. A subsequent access of the host data will thus incur a data transfer
1451: from the device to the host.
1453: Input Parameter:
1454: . v - the vector
1456: Output Parameter:
1457: . mem - pointer to the device buffer
1459: Level: intermediate
1461: .seealso: `VecViennaCLGetCLContext()`, `VecViennaCLRestoreCLMem()`
1462: @*/
1463: PETSC_EXTERN PetscErrorCode VecViennaCLGetCLMem(Vec v, PETSC_UINTPTR_T *mem)
1464: {
1465: #if !defined(PETSC_HAVE_OPENCL)
1466: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "PETSc must be configured with --with-opencl to get a Vec's cl_mem");
1467: #else
1468: PetscFunctionBegin;
1469: PetscCheckTypeNames(v, VECSEQVIENNACL, VECMPIVIENNACL);
1470: ViennaCLVector *v_vcl;
1471: PetscCall(VecViennaCLGetArray(v, &v_vcl));
1472: try {
1473: const cl_mem ocl_mem = v_vcl->handle().opencl_handle().get();
1474: clRetainMemObject(ocl_mem);
1475: *mem = (PETSC_UINTPTR_T)ocl_mem;
1476: } catch (std::exception const &ex) {
1477: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
1478: }
1479: PetscFunctionReturn(PETSC_SUCCESS);
1480: #endif
1481: }
1483: /*@C
1484: VecViennaCLRestoreCLMem - Restores a CL buffer pointer previously
1485: acquired with VecViennaCLGetCLMem().
1487: This marks the host data as out of date. Subsequent access to the vector
1488: data on the host side with for instance VecGetArray() incurs a data
1489: transfer.
1491: Input Parameter:
1492: . v - the vector
1494: Level: intermediate
1496: .seealso: `VecViennaCLGetCLContext()`, `VecViennaCLGetCLMem()`
1497: @*/
1498: PETSC_EXTERN PetscErrorCode VecViennaCLRestoreCLMem(Vec v)
1499: {
1500: #if !defined(PETSC_HAVE_OPENCL)
1501: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "PETSc must be configured with --with-opencl to restore a Vec's cl_mem");
1502: #else
1503: PetscFunctionBegin;
1504: PetscCheckTypeNames(v, VECSEQVIENNACL, VECMPIVIENNACL);
1505: PetscCall(VecViennaCLRestoreArray(v, NULL));
1506: PetscFunctionReturn(PETSC_SUCCESS);
1507: #endif
1508: }
1510: PetscErrorCode VecCreate_SeqViennaCL_Private(Vec V, const ViennaCLVector *array)
1511: {
1512: Vec_ViennaCL *vecviennacl;
1513: PetscMPIInt size;
1515: PetscFunctionBegin;
1516: PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)V), &size));
1517: PetscCheck(size <= 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot create VECSEQVIENNACL on more than one process");
1518: PetscCall(VecCreate_Seq_Private(V, 0));
1519: PetscCall(PetscObjectChangeTypeName((PetscObject)V, VECSEQVIENNACL));
1520: PetscCall(VecBindToCPU_SeqAIJViennaCL(V, PETSC_FALSE));
1521: V->ops->bindtocpu = VecBindToCPU_SeqAIJViennaCL;
1523: if (array) {
1524: if (!V->spptr) V->spptr = new Vec_ViennaCL;
1525: vecviennacl = (Vec_ViennaCL *)V->spptr;
1526: vecviennacl->GPUarray_allocated = 0;
1527: vecviennacl->GPUarray = (ViennaCLVector *)array;
1528: V->offloadmask = PETSC_OFFLOAD_UNALLOCATED;
1529: }
1530: PetscFunctionReturn(PETSC_SUCCESS);
1531: }