Actual source code: vecviennacl.cxx
1: /*
2: Implements the sequential ViennaCL vectors.
3: */
5: #include <petscconf.h>
6: #include <petsc/private/vecimpl.h>
7: #include <../src/vec/vec/impls/dvecimpl.h>
8: #include <../src/vec/vec/impls/seq/seqviennacl/viennaclvecimpl.h>
10: #include "viennacl/linalg/inner_prod.hpp"
11: #include "viennacl/linalg/norm_1.hpp"
12: #include "viennacl/linalg/norm_2.hpp"
13: #include "viennacl/linalg/norm_inf.hpp"
15: #ifdef VIENNACL_WITH_OPENCL
16: #include "viennacl/ocl/backend.hpp"
17: #endif
19: PETSC_EXTERN PetscErrorCode VecViennaCLGetArray(Vec v, ViennaCLVector **a)
20: {
22: *a = 0;
23: VecViennaCLCopyToGPU(v);
24: *a = ((Vec_ViennaCL *)v->spptr)->GPUarray;
25: ViennaCLWaitForGPU();
26: return 0;
27: }
29: PETSC_EXTERN PetscErrorCode VecViennaCLRestoreArray(Vec v, ViennaCLVector **a)
30: {
32: v->offloadmask = PETSC_OFFLOAD_GPU;
34: PetscObjectStateIncrease((PetscObject)v);
35: return 0;
36: }
38: PETSC_EXTERN PetscErrorCode VecViennaCLGetArrayRead(Vec v, const ViennaCLVector **a)
39: {
41: *a = 0;
42: VecViennaCLCopyToGPU(v);
43: *a = ((Vec_ViennaCL *)v->spptr)->GPUarray;
44: ViennaCLWaitForGPU();
45: return 0;
46: }
48: PETSC_EXTERN PetscErrorCode VecViennaCLRestoreArrayRead(Vec v, const ViennaCLVector **a)
49: {
51: return 0;
52: }
54: PETSC_EXTERN PetscErrorCode VecViennaCLGetArrayWrite(Vec v, ViennaCLVector **a)
55: {
57: *a = 0;
58: VecViennaCLAllocateCheck(v);
59: *a = ((Vec_ViennaCL *)v->spptr)->GPUarray;
60: ViennaCLWaitForGPU();
61: return 0;
62: }
64: PETSC_EXTERN PetscErrorCode VecViennaCLRestoreArrayWrite(Vec v, ViennaCLVector **a)
65: {
67: v->offloadmask = PETSC_OFFLOAD_GPU;
69: PetscObjectStateIncrease((PetscObject)v);
70: return 0;
71: }
73: PETSC_EXTERN PetscErrorCode PetscViennaCLInit()
74: {
75: char string[20];
76: PetscBool flg, flg_cuda, flg_opencl, flg_openmp;
78: /* ViennaCL backend selection: CUDA, OpenCL, or OpenMP */
79: PetscOptionsGetString(NULL, NULL, "-viennacl_backend", string, sizeof(string), &flg);
80: if (flg) {
81: try {
82: PetscStrcasecmp(string, "cuda", &flg_cuda);
83: PetscStrcasecmp(string, "opencl", &flg_opencl);
84: PetscStrcasecmp(string, "openmp", &flg_openmp);
86: /* A default (sequential) CPU backend is always available - even if OpenMP is not enabled. */
87: if (flg_openmp) viennacl::backend::default_memory_type(viennacl::MAIN_MEMORY);
88: #if defined(PETSC_HAVE_CUDA)
89: else if (flg_cuda) viennacl::backend::default_memory_type(viennacl::CUDA_MEMORY);
90: #endif
91: #if defined(PETSC_HAVE_OPENCL)
92: else if (flg_opencl) viennacl::backend::default_memory_type(viennacl::OPENCL_MEMORY);
93: #endif
94: else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: Backend not recognized or available: %s.\n Pass -viennacl_view to see available backends for ViennaCL.", string);
95: } catch (std::exception const &ex) {
96: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
97: }
98: }
100: if (PetscDefined(HAVE_CUDA)) {
101: /* For CUDA event timers */
102: PetscDeviceInitialize(PETSC_DEVICE_CUDA);
103: }
105: #if defined(PETSC_HAVE_OPENCL)
106: /* ViennaCL OpenCL device type configuration */
107: PetscOptionsGetString(NULL, NULL, "-viennacl_opencl_device_type", string, sizeof(string), &flg);
108: if (flg) {
109: try {
110: PetscStrcasecmp(string, "cpu", &flg);
111: if (flg) viennacl::ocl::set_context_device_type(0, CL_DEVICE_TYPE_CPU);
113: PetscStrcasecmp(string, "gpu", &flg);
114: if (flg) viennacl::ocl::set_context_device_type(0, CL_DEVICE_TYPE_GPU);
116: PetscStrcasecmp(string, "accelerator", &flg);
117: if (flg) viennacl::ocl::set_context_device_type(0, CL_DEVICE_TYPE_ACCELERATOR);
118: } catch (std::exception const &ex) {
119: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
120: }
121: }
122: #endif
124: /* Print available backends */
125: PetscOptionsHasName(NULL, NULL, "-viennacl_view", &flg);
126: if (flg) {
127: PetscPrintf(PETSC_COMM_WORLD, "ViennaCL backends available: ");
128: #if defined(PETSC_HAVE_CUDA)
129: PetscPrintf(PETSC_COMM_WORLD, "CUDA, ");
130: #endif
131: #if defined(PETSC_HAVE_OPENCL)
132: PetscPrintf(PETSC_COMM_WORLD, "OpenCL, ");
133: #endif
134: #if defined(PETSC_HAVE_OPENMP)
135: PetscPrintf(PETSC_COMM_WORLD, "OpenMP ");
136: #else
137: PetscPrintf(PETSC_COMM_WORLD, "OpenMP (1 thread) ");
138: #endif
139: PetscPrintf(PETSC_COMM_WORLD, "\n");
141: /* Print selected backends */
142: PetscPrintf(PETSC_COMM_WORLD, "ViennaCL backend selected: ");
143: #if defined(PETSC_HAVE_CUDA)
144: if (viennacl::backend::default_memory_type() == viennacl::CUDA_MEMORY) PetscPrintf(PETSC_COMM_WORLD, "CUDA ");
145: #endif
146: #if defined(PETSC_HAVE_OPENCL)
147: if (viennacl::backend::default_memory_type() == viennacl::OPENCL_MEMORY) PetscPrintf(PETSC_COMM_WORLD, "OpenCL ");
148: #endif
149: #if defined(PETSC_HAVE_OPENMP)
150: if (viennacl::backend::default_memory_type() == viennacl::MAIN_MEMORY) PetscPrintf(PETSC_COMM_WORLD, "OpenMP ");
151: #else
152: if (viennacl::backend::default_memory_type() == viennacl::MAIN_MEMORY) PetscPrintf(PETSC_COMM_WORLD, "OpenMP (sequential - consider reconfiguration: --with-openmp=1) ");
153: #endif
154: PetscPrintf(PETSC_COMM_WORLD, "\n");
155: }
156: return 0;
157: }
159: /*
160: Allocates space for the vector array on the Host if it does not exist.
161: Does NOT change the PetscViennaCLFlag for the vector
162: Does NOT zero the ViennaCL array
163: */
164: PETSC_EXTERN PetscErrorCode VecViennaCLAllocateCheckHost(Vec v)
165: {
166: PetscScalar *array;
167: Vec_Seq *s;
168: PetscInt n = v->map->n;
170: s = (Vec_Seq *)v->data;
171: VecViennaCLAllocateCheck(v);
172: if (s->array == 0) {
173: PetscMalloc1(n, &array);
174: s->array = array;
175: s->array_allocated = array;
176: }
177: return 0;
178: }
180: /*
181: Allocates space for the vector array on the GPU if it does not exist.
182: Does NOT change the PetscViennaCLFlag for the vector
183: Does NOT zero the ViennaCL array
185: */
186: PetscErrorCode VecViennaCLAllocateCheck(Vec v)
187: {
188: if (!v->spptr) {
189: try {
190: v->spptr = new Vec_ViennaCL;
191: ((Vec_ViennaCL *)v->spptr)->GPUarray_allocated = new ViennaCLVector((PetscBLASInt)v->map->n);
192: ((Vec_ViennaCL *)v->spptr)->GPUarray = ((Vec_ViennaCL *)v->spptr)->GPUarray_allocated;
194: } catch (std::exception const &ex) {
195: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
196: }
197: }
198: return 0;
199: }
201: /* Copies a vector from the CPU to the GPU unless we already have an up-to-date copy on the GPU */
202: PetscErrorCode VecViennaCLCopyToGPU(Vec v)
203: {
205: VecViennaCLAllocateCheck(v);
206: if (v->map->n > 0) {
207: if (v->offloadmask == PETSC_OFFLOAD_CPU) {
208: PetscLogEventBegin(VEC_ViennaCLCopyToGPU, v, 0, 0, 0);
209: try {
210: ViennaCLVector *vec = ((Vec_ViennaCL *)v->spptr)->GPUarray;
211: viennacl::fast_copy(*(PetscScalar **)v->data, *(PetscScalar **)v->data + v->map->n, vec->begin());
212: ViennaCLWaitForGPU();
213: } catch (std::exception const &ex) {
214: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
215: }
216: PetscLogCpuToGpu((v->map->n) * sizeof(PetscScalar));
217: PetscLogEventEnd(VEC_ViennaCLCopyToGPU, v, 0, 0, 0);
218: v->offloadmask = PETSC_OFFLOAD_BOTH;
219: }
220: }
221: return 0;
222: }
224: /*
225: VecViennaCLCopyFromGPU - Copies a vector from the GPU to the CPU unless we already have an up-to-date copy on the CPU
226: */
227: PetscErrorCode VecViennaCLCopyFromGPU(Vec v)
228: {
230: VecViennaCLAllocateCheckHost(v);
231: if (v->offloadmask == PETSC_OFFLOAD_GPU) {
232: PetscLogEventBegin(VEC_ViennaCLCopyFromGPU, v, 0, 0, 0);
233: try {
234: ViennaCLVector *vec = ((Vec_ViennaCL *)v->spptr)->GPUarray;
235: viennacl::fast_copy(vec->begin(), vec->end(), *(PetscScalar **)v->data);
236: ViennaCLWaitForGPU();
237: } catch (std::exception const &ex) {
238: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
239: }
240: PetscLogGpuToCpu((v->map->n) * sizeof(PetscScalar));
241: PetscLogEventEnd(VEC_ViennaCLCopyFromGPU, v, 0, 0, 0);
242: v->offloadmask = PETSC_OFFLOAD_BOTH;
243: }
244: return 0;
245: }
247: /* Copy on CPU */
248: static PetscErrorCode VecCopy_SeqViennaCL_Private(Vec xin, Vec yin)
249: {
250: PetscScalar *ya;
251: const PetscScalar *xa;
253: VecViennaCLAllocateCheckHost(xin);
254: VecViennaCLAllocateCheckHost(yin);
255: if (xin != yin) {
256: VecGetArrayRead(xin, &xa);
257: VecGetArray(yin, &ya);
258: PetscArraycpy(ya, xa, xin->map->n);
259: VecRestoreArrayRead(xin, &xa);
260: VecRestoreArray(yin, &ya);
261: }
262: return 0;
263: }
265: static PetscErrorCode VecSetRandom_SeqViennaCL_Private(Vec xin, PetscRandom r)
266: {
267: PetscInt n = xin->map->n, i;
268: PetscScalar *xx;
270: VecGetArray(xin, &xx);
271: for (i = 0; i < n; i++) PetscRandomGetValue(r, &xx[i]);
272: VecRestoreArray(xin, &xx);
273: return 0;
274: }
276: static PetscErrorCode VecDestroy_SeqViennaCL_Private(Vec v)
277: {
278: Vec_Seq *vs = (Vec_Seq *)v->data;
280: PetscObjectSAWsViewOff(v);
281: #if defined(PETSC_USE_LOG)
282: PetscLogObjectState((PetscObject)v, "Length=%" PetscInt_FMT, v->map->n);
283: #endif
284: if (vs->array_allocated) PetscFree(vs->array_allocated);
285: PetscFree(vs);
286: return 0;
287: }
289: static PetscErrorCode VecResetArray_SeqViennaCL_Private(Vec vin)
290: {
291: Vec_Seq *v = (Vec_Seq *)vin->data;
293: v->array = v->unplacedarray;
294: v->unplacedarray = 0;
295: return 0;
296: }
298: /*MC
299: VECSEQVIENNACL - VECSEQVIENNACL = "seqviennacl" - The basic sequential vector, modified to use ViennaCL
301: Options Database Keys:
302: . -vec_type seqviennacl - sets the vector type to VECSEQVIENNACL during a call to VecSetFromOptions()
304: Level: beginner
306: .seealso: `VecCreate()`, `VecSetType()`, `VecSetFromOptions()`, `VecCreateSeqWithArray()`, `VECMPI`, `VecType`, `VecCreateMPI()`, `VecCreateSeq()`
307: M*/
309: PetscErrorCode VecAYPX_SeqViennaCL(Vec yin, PetscScalar alpha, Vec xin)
310: {
311: const ViennaCLVector *xgpu;
312: ViennaCLVector *ygpu;
314: VecViennaCLGetArrayRead(xin, &xgpu);
315: VecViennaCLGetArray(yin, &ygpu);
316: PetscLogGpuTimeBegin();
317: try {
318: if (alpha != 0.0 && xin->map->n > 0) {
319: *ygpu = *xgpu + alpha * *ygpu;
320: PetscLogGpuFlops(2.0 * yin->map->n);
321: } else {
322: *ygpu = *xgpu;
323: }
324: ViennaCLWaitForGPU();
325: } catch (std::exception const &ex) {
326: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
327: }
328: PetscLogGpuTimeEnd();
329: VecViennaCLRestoreArrayRead(xin, &xgpu);
330: VecViennaCLRestoreArray(yin, &ygpu);
331: return 0;
332: }
334: PetscErrorCode VecAXPY_SeqViennaCL(Vec yin, PetscScalar alpha, Vec xin)
335: {
336: const ViennaCLVector *xgpu;
337: ViennaCLVector *ygpu;
339: if (alpha != 0.0 && xin->map->n > 0) {
340: VecViennaCLGetArrayRead(xin, &xgpu);
341: VecViennaCLGetArray(yin, &ygpu);
342: PetscLogGpuTimeBegin();
343: try {
344: *ygpu += alpha * *xgpu;
345: ViennaCLWaitForGPU();
346: } catch (std::exception const &ex) {
347: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
348: }
349: PetscLogGpuTimeEnd();
350: VecViennaCLRestoreArrayRead(xin, &xgpu);
351: VecViennaCLRestoreArray(yin, &ygpu);
352: PetscLogGpuFlops(2.0 * yin->map->n);
353: }
354: return 0;
355: }
357: PetscErrorCode VecPointwiseDivide_SeqViennaCL(Vec win, Vec xin, Vec yin)
358: {
359: const ViennaCLVector *xgpu, *ygpu;
360: ViennaCLVector *wgpu;
362: if (xin->map->n > 0) {
363: VecViennaCLGetArrayRead(xin, &xgpu);
364: VecViennaCLGetArrayRead(yin, &ygpu);
365: VecViennaCLGetArrayWrite(win, &wgpu);
366: PetscLogGpuTimeBegin();
367: try {
368: *wgpu = viennacl::linalg::element_div(*xgpu, *ygpu);
369: ViennaCLWaitForGPU();
370: } catch (std::exception const &ex) {
371: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
372: }
373: PetscLogGpuTimeEnd();
374: PetscLogGpuFlops(win->map->n);
375: VecViennaCLRestoreArrayRead(xin, &xgpu);
376: VecViennaCLRestoreArrayRead(yin, &ygpu);
377: VecViennaCLRestoreArrayWrite(win, &wgpu);
378: }
379: return 0;
380: }
382: PetscErrorCode VecWAXPY_SeqViennaCL(Vec win, PetscScalar alpha, Vec xin, Vec yin)
383: {
384: const ViennaCLVector *xgpu, *ygpu;
385: ViennaCLVector *wgpu;
387: if (alpha == 0.0 && xin->map->n > 0) {
388: VecCopy_SeqViennaCL(yin, win);
389: } else {
390: VecViennaCLGetArrayRead(xin, &xgpu);
391: VecViennaCLGetArrayRead(yin, &ygpu);
392: VecViennaCLGetArrayWrite(win, &wgpu);
393: PetscLogGpuTimeBegin();
394: if (alpha == 1.0) {
395: try {
396: *wgpu = *ygpu + *xgpu;
397: } catch (std::exception const &ex) {
398: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
399: }
400: PetscLogGpuFlops(win->map->n);
401: } else if (alpha == -1.0) {
402: try {
403: *wgpu = *ygpu - *xgpu;
404: } catch (std::exception const &ex) {
405: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
406: }
407: PetscLogGpuFlops(win->map->n);
408: } else {
409: try {
410: *wgpu = *ygpu + alpha * *xgpu;
411: } catch (std::exception const &ex) {
412: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
413: }
414: PetscLogGpuFlops(2 * win->map->n);
415: }
416: ViennaCLWaitForGPU();
417: PetscLogGpuTimeEnd();
418: VecViennaCLRestoreArrayRead(xin, &xgpu);
419: VecViennaCLRestoreArrayRead(yin, &ygpu);
420: VecViennaCLRestoreArrayWrite(win, &wgpu);
421: }
422: return 0;
423: }
425: /*
426: * Operation x = x + sum_i alpha_i * y_i for vectors x, y_i and scalars alpha_i
427: *
428: * ViennaCL supports a fast evaluation of x += alpha * y and x += alpha * y + beta * z,
429: * hence there is an iterated application of these until the final result is obtained
430: */
431: PetscErrorCode VecMAXPY_SeqViennaCL(Vec xin, PetscInt nv, const PetscScalar *alpha, Vec *y)
432: {
433: PetscInt j;
435: for (j = 0; j < nv; ++j) {
436: if (j + 1 < nv) {
437: VecAXPBYPCZ_SeqViennaCL(xin, alpha[j], alpha[j + 1], 1.0, y[j], y[j + 1]);
438: ++j;
439: } else {
440: VecAXPY_SeqViennaCL(xin, alpha[j], y[j]);
441: }
442: }
443: ViennaCLWaitForGPU();
444: return 0;
445: }
447: PetscErrorCode VecDot_SeqViennaCL(Vec xin, Vec yin, PetscScalar *z)
448: {
449: const ViennaCLVector *xgpu, *ygpu;
451: if (xin->map->n > 0) {
452: VecViennaCLGetArrayRead(xin, &xgpu);
453: VecViennaCLGetArrayRead(yin, &ygpu);
454: PetscLogGpuTimeBegin();
455: try {
456: *z = viennacl::linalg::inner_prod(*xgpu, *ygpu);
457: } catch (std::exception const &ex) {
458: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
459: }
460: ViennaCLWaitForGPU();
461: PetscLogGpuTimeEnd();
462: if (xin->map->n > 0) PetscLogGpuFlops(2.0 * xin->map->n - 1);
463: VecViennaCLRestoreArrayRead(xin, &xgpu);
464: VecViennaCLRestoreArrayRead(yin, &ygpu);
465: } else *z = 0.0;
466: return 0;
467: }
469: /*
470: * Operation z[j] = dot(x, y[j])
471: *
472: * We use an iterated application of dot() for each j. For small ranges of j this is still faster than an allocation of extra memory in order to use gemv().
473: */
474: PetscErrorCode VecMDot_SeqViennaCL(Vec xin, PetscInt nv, const Vec yin[], PetscScalar *z)
475: {
476: PetscInt n = xin->map->n, i;
477: const ViennaCLVector *xgpu, *ygpu;
478: Vec *yyin = (Vec *)yin;
479: std::vector<viennacl::vector_base<PetscScalar> const *> ygpu_array(nv);
481: if (xin->map->n > 0) {
482: VecViennaCLGetArrayRead(xin, &xgpu);
483: for (i = 0; i < nv; i++) {
484: VecViennaCLGetArrayRead(yyin[i], &ygpu);
485: ygpu_array[i] = ygpu;
486: }
487: PetscLogGpuTimeBegin();
488: viennacl::vector_tuple<PetscScalar> y_tuple(ygpu_array);
489: ViennaCLVector result = viennacl::linalg::inner_prod(*xgpu, y_tuple);
490: viennacl::copy(result.begin(), result.end(), z);
491: for (i = 0; i < nv; i++) VecViennaCLRestoreArrayRead(yyin[i], &ygpu);
492: ViennaCLWaitForGPU();
493: PetscLogGpuTimeEnd();
494: VecViennaCLRestoreArrayRead(xin, &xgpu);
495: PetscLogGpuFlops(PetscMax(nv * (2.0 * n - 1), 0.0));
496: } else {
497: for (i = 0; i < nv; i++) z[i] = 0.0;
498: }
499: return 0;
500: }
502: PetscErrorCode VecMTDot_SeqViennaCL(Vec xin, PetscInt nv, const Vec yin[], PetscScalar *z)
503: {
504: /* Since complex case is not supported at the moment, this is the same as VecMDot_SeqViennaCL */
505: VecMDot_SeqViennaCL(xin, nv, yin, z);
506: ViennaCLWaitForGPU();
507: return 0;
508: }
510: PetscErrorCode VecSet_SeqViennaCL(Vec xin, PetscScalar alpha)
511: {
512: ViennaCLVector *xgpu;
514: if (xin->map->n > 0) {
515: VecViennaCLGetArrayWrite(xin, &xgpu);
516: PetscLogGpuTimeBegin();
517: try {
518: *xgpu = viennacl::scalar_vector<PetscScalar>(xgpu->size(), alpha);
519: ViennaCLWaitForGPU();
520: } catch (std::exception const &ex) {
521: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
522: }
523: PetscLogGpuTimeEnd();
524: VecViennaCLRestoreArrayWrite(xin, &xgpu);
525: }
526: return 0;
527: }
529: PetscErrorCode VecScale_SeqViennaCL(Vec xin, PetscScalar alpha)
530: {
531: ViennaCLVector *xgpu;
533: if (alpha == 0.0 && xin->map->n > 0) {
534: VecSet_SeqViennaCL(xin, alpha);
535: PetscLogGpuFlops(xin->map->n);
536: } else if (alpha != 1.0 && xin->map->n > 0) {
537: VecViennaCLGetArray(xin, &xgpu);
538: PetscLogGpuTimeBegin();
539: try {
540: *xgpu *= alpha;
541: ViennaCLWaitForGPU();
542: } catch (std::exception const &ex) {
543: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
544: }
545: PetscLogGpuTimeEnd();
546: VecViennaCLRestoreArray(xin, &xgpu);
547: PetscLogGpuFlops(xin->map->n);
548: }
549: return 0;
550: }
552: PetscErrorCode VecTDot_SeqViennaCL(Vec xin, Vec yin, PetscScalar *z)
553: {
554: /* Since complex case is not supported at the moment, this is the same as VecDot_SeqViennaCL */
555: VecDot_SeqViennaCL(xin, yin, z);
556: ViennaCLWaitForGPU();
557: return 0;
558: }
560: PetscErrorCode VecCopy_SeqViennaCL(Vec xin, Vec yin)
561: {
562: const ViennaCLVector *xgpu;
563: ViennaCLVector *ygpu;
565: if (xin != yin && xin->map->n > 0) {
566: if (xin->offloadmask == PETSC_OFFLOAD_GPU) {
567: VecViennaCLGetArrayRead(xin, &xgpu);
568: VecViennaCLGetArrayWrite(yin, &ygpu);
569: PetscLogGpuTimeBegin();
570: try {
571: *ygpu = *xgpu;
572: ViennaCLWaitForGPU();
573: } catch (std::exception const &ex) {
574: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
575: }
576: PetscLogGpuTimeEnd();
577: VecViennaCLRestoreArrayRead(xin, &xgpu);
578: VecViennaCLRestoreArrayWrite(yin, &ygpu);
580: } else if (xin->offloadmask == PETSC_OFFLOAD_CPU) {
581: /* copy in CPU if we are on the CPU*/
582: VecCopy_SeqViennaCL_Private(xin, yin);
583: ViennaCLWaitForGPU();
584: } else if (xin->offloadmask == PETSC_OFFLOAD_BOTH) {
585: /* if xin is valid in both places, see where yin is and copy there (because it's probably where we'll want to next use it) */
586: if (yin->offloadmask == PETSC_OFFLOAD_CPU) {
587: /* copy in CPU */
588: VecCopy_SeqViennaCL_Private(xin, yin);
589: ViennaCLWaitForGPU();
590: } else if (yin->offloadmask == PETSC_OFFLOAD_GPU) {
591: /* copy in GPU */
592: VecViennaCLGetArrayRead(xin, &xgpu);
593: VecViennaCLGetArrayWrite(yin, &ygpu);
594: PetscLogGpuTimeBegin();
595: try {
596: *ygpu = *xgpu;
597: ViennaCLWaitForGPU();
598: } catch (std::exception const &ex) {
599: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
600: }
601: PetscLogGpuTimeEnd();
602: VecViennaCLRestoreArrayRead(xin, &xgpu);
603: VecViennaCLRestoreArrayWrite(yin, &ygpu);
604: } else if (yin->offloadmask == PETSC_OFFLOAD_BOTH) {
605: /* xin and yin are both valid in both places (or yin was unallocated before the earlier call to allocatecheck
606: default to copy in GPU (this is an arbitrary choice) */
607: VecViennaCLGetArrayRead(xin, &xgpu);
608: VecViennaCLGetArrayWrite(yin, &ygpu);
609: PetscLogGpuTimeBegin();
610: try {
611: *ygpu = *xgpu;
612: ViennaCLWaitForGPU();
613: } catch (std::exception const &ex) {
614: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
615: }
616: PetscLogGpuTimeEnd();
617: VecViennaCLRestoreArrayRead(xin, &xgpu);
618: VecViennaCLRestoreArrayWrite(yin, &ygpu);
619: } else {
620: VecCopy_SeqViennaCL_Private(xin, yin);
621: ViennaCLWaitForGPU();
622: }
623: }
624: }
625: return 0;
626: }
628: PetscErrorCode VecSwap_SeqViennaCL(Vec xin, Vec yin)
629: {
630: ViennaCLVector *xgpu, *ygpu;
632: if (xin != yin && xin->map->n > 0) {
633: VecViennaCLGetArray(xin, &xgpu);
634: VecViennaCLGetArray(yin, &ygpu);
635: PetscLogGpuTimeBegin();
636: try {
637: viennacl::swap(*xgpu, *ygpu);
638: ViennaCLWaitForGPU();
639: } catch (std::exception const &ex) {
640: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
641: }
642: PetscLogGpuTimeEnd();
643: VecViennaCLRestoreArray(xin, &xgpu);
644: VecViennaCLRestoreArray(yin, &ygpu);
645: }
646: return 0;
647: }
649: // y = alpha * x + beta * y
650: PetscErrorCode VecAXPBY_SeqViennaCL(Vec yin, PetscScalar alpha, PetscScalar beta, Vec xin)
651: {
652: PetscScalar a = alpha, b = beta;
653: const ViennaCLVector *xgpu;
654: ViennaCLVector *ygpu;
656: if (a == 0.0 && xin->map->n > 0) {
657: VecScale_SeqViennaCL(yin, beta);
658: } else if (b == 1.0 && xin->map->n > 0) {
659: VecAXPY_SeqViennaCL(yin, alpha, xin);
660: } else if (a == 1.0 && xin->map->n > 0) {
661: VecAYPX_SeqViennaCL(yin, beta, xin);
662: } else if (b == 0.0 && xin->map->n > 0) {
663: VecViennaCLGetArrayRead(xin, &xgpu);
664: VecViennaCLGetArray(yin, &ygpu);
665: PetscLogGpuTimeBegin();
666: try {
667: *ygpu = *xgpu * alpha;
668: ViennaCLWaitForGPU();
669: } catch (std::exception const &ex) {
670: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
671: }
672: PetscLogGpuTimeEnd();
673: PetscLogGpuFlops(xin->map->n);
674: VecViennaCLRestoreArrayRead(xin, &xgpu);
675: VecViennaCLRestoreArray(yin, &ygpu);
676: } else if (xin->map->n > 0) {
677: VecViennaCLGetArrayRead(xin, &xgpu);
678: VecViennaCLGetArray(yin, &ygpu);
679: PetscLogGpuTimeBegin();
680: try {
681: *ygpu = *xgpu * alpha + *ygpu * beta;
682: ViennaCLWaitForGPU();
683: } catch (std::exception const &ex) {
684: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
685: }
686: PetscLogGpuTimeEnd();
687: VecViennaCLRestoreArrayRead(xin, &xgpu);
688: VecViennaCLRestoreArray(yin, &ygpu);
689: PetscLogGpuFlops(3.0 * xin->map->n);
690: }
691: return 0;
692: }
694: /* operation z = alpha * x + beta *y + gamma *z*/
695: PetscErrorCode VecAXPBYPCZ_SeqViennaCL(Vec zin, PetscScalar alpha, PetscScalar beta, PetscScalar gamma, Vec xin, Vec yin)
696: {
697: PetscInt n = zin->map->n;
698: const ViennaCLVector *xgpu, *ygpu;
699: ViennaCLVector *zgpu;
701: VecViennaCLGetArrayRead(xin, &xgpu);
702: VecViennaCLGetArrayRead(yin, &ygpu);
703: VecViennaCLGetArray(zin, &zgpu);
704: if (alpha == 0.0 && xin->map->n > 0) {
705: PetscLogGpuTimeBegin();
706: try {
707: if (beta == 0.0) {
708: *zgpu = gamma * *zgpu;
709: ViennaCLWaitForGPU();
710: PetscLogGpuFlops(1.0 * n);
711: } else if (gamma == 0.0) {
712: *zgpu = beta * *ygpu;
713: ViennaCLWaitForGPU();
714: PetscLogGpuFlops(1.0 * n);
715: } else {
716: *zgpu = beta * *ygpu + gamma * *zgpu;
717: ViennaCLWaitForGPU();
718: PetscLogGpuFlops(3.0 * n);
719: }
720: } catch (std::exception const &ex) {
721: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
722: }
723: PetscLogGpuTimeEnd();
724: PetscLogGpuFlops(3.0 * n);
725: } else if (beta == 0.0 && xin->map->n > 0) {
726: PetscLogGpuTimeBegin();
727: try {
728: if (gamma == 0.0) {
729: *zgpu = alpha * *xgpu;
730: ViennaCLWaitForGPU();
731: PetscLogGpuFlops(1.0 * n);
732: } else {
733: *zgpu = alpha * *xgpu + gamma * *zgpu;
734: ViennaCLWaitForGPU();
735: PetscLogGpuFlops(3.0 * n);
736: }
737: } catch (std::exception const &ex) {
738: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
739: }
740: PetscLogGpuTimeEnd();
741: } else if (gamma == 0.0 && xin->map->n > 0) {
742: PetscLogGpuTimeBegin();
743: try {
744: *zgpu = alpha * *xgpu + beta * *ygpu;
745: ViennaCLWaitForGPU();
746: } catch (std::exception const &ex) {
747: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
748: }
749: PetscLogGpuTimeEnd();
750: PetscLogGpuFlops(3.0 * n);
751: } else if (xin->map->n > 0) {
752: PetscLogGpuTimeBegin();
753: try {
754: /* Split operation into two steps. This is not completely ideal, but avoids temporaries (which are far worse) */
755: if (gamma != 1.0) *zgpu *= gamma;
756: *zgpu += alpha * *xgpu + beta * *ygpu;
757: ViennaCLWaitForGPU();
758: } catch (std::exception const &ex) {
759: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
760: }
761: PetscLogGpuTimeEnd();
762: VecViennaCLRestoreArray(zin, &zgpu);
763: VecViennaCLRestoreArrayRead(xin, &xgpu);
764: VecViennaCLRestoreArrayRead(yin, &ygpu);
765: PetscLogGpuFlops(5.0 * n);
766: }
767: return 0;
768: }
770: PetscErrorCode VecPointwiseMult_SeqViennaCL(Vec win, Vec xin, Vec yin)
771: {
772: PetscInt n = win->map->n;
773: const ViennaCLVector *xgpu, *ygpu;
774: ViennaCLVector *wgpu;
776: if (xin->map->n > 0) {
777: VecViennaCLGetArrayRead(xin, &xgpu);
778: VecViennaCLGetArrayRead(yin, &ygpu);
779: VecViennaCLGetArray(win, &wgpu);
780: PetscLogGpuTimeBegin();
781: try {
782: *wgpu = viennacl::linalg::element_prod(*xgpu, *ygpu);
783: ViennaCLWaitForGPU();
784: } catch (std::exception const &ex) {
785: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
786: }
787: PetscLogGpuTimeEnd();
788: VecViennaCLRestoreArrayRead(xin, &xgpu);
789: VecViennaCLRestoreArrayRead(yin, &ygpu);
790: VecViennaCLRestoreArray(win, &wgpu);
791: PetscLogGpuFlops(n);
792: }
793: return 0;
794: }
796: PetscErrorCode VecNorm_SeqViennaCL(Vec xin, NormType type, PetscReal *z)
797: {
798: PetscInt n = xin->map->n;
799: PetscBLASInt bn;
800: const ViennaCLVector *xgpu;
802: if (xin->map->n > 0) {
803: PetscBLASIntCast(n, &bn);
804: VecViennaCLGetArrayRead(xin, &xgpu);
805: if (type == NORM_2 || type == NORM_FROBENIUS) {
806: PetscLogGpuTimeBegin();
807: try {
808: *z = viennacl::linalg::norm_2(*xgpu);
809: ViennaCLWaitForGPU();
810: } catch (std::exception const &ex) {
811: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
812: }
813: PetscLogGpuTimeEnd();
814: PetscLogGpuFlops(PetscMax(2.0 * n - 1, 0.0));
815: } else if (type == NORM_INFINITY) {
816: PetscLogGpuTimeBegin();
817: try {
818: *z = viennacl::linalg::norm_inf(*xgpu);
819: ViennaCLWaitForGPU();
820: } catch (std::exception const &ex) {
821: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
822: }
823: PetscLogGpuTimeEnd();
824: } else if (type == NORM_1) {
825: PetscLogGpuTimeBegin();
826: try {
827: *z = viennacl::linalg::norm_1(*xgpu);
828: ViennaCLWaitForGPU();
829: } catch (std::exception const &ex) {
830: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
831: }
832: PetscLogGpuTimeEnd();
833: PetscLogGpuFlops(PetscMax(n - 1.0, 0.0));
834: } else if (type == NORM_1_AND_2) {
835: PetscLogGpuTimeBegin();
836: try {
837: *z = viennacl::linalg::norm_1(*xgpu);
838: *(z + 1) = viennacl::linalg::norm_2(*xgpu);
839: ViennaCLWaitForGPU();
840: } catch (std::exception const &ex) {
841: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
842: }
843: PetscLogGpuTimeEnd();
844: PetscLogGpuFlops(PetscMax(2.0 * n - 1, 0.0));
845: PetscLogGpuFlops(PetscMax(n - 1.0, 0.0));
846: }
847: VecViennaCLRestoreArrayRead(xin, &xgpu);
848: } else if (type == NORM_1_AND_2) {
849: *z = 0.0;
850: *(z + 1) = 0.0;
851: } else *z = 0.0;
852: return 0;
853: }
855: PetscErrorCode VecSetRandom_SeqViennaCL(Vec xin, PetscRandom r)
856: {
857: VecSetRandom_SeqViennaCL_Private(xin, r);
858: xin->offloadmask = PETSC_OFFLOAD_CPU;
859: return 0;
860: }
862: PetscErrorCode VecResetArray_SeqViennaCL(Vec vin)
863: {
865: VecViennaCLCopyFromGPU(vin);
866: VecResetArray_SeqViennaCL_Private(vin);
867: vin->offloadmask = PETSC_OFFLOAD_CPU;
868: return 0;
869: }
871: PetscErrorCode VecPlaceArray_SeqViennaCL(Vec vin, const PetscScalar *a)
872: {
874: VecViennaCLCopyFromGPU(vin);
875: VecPlaceArray_Seq(vin, a);
876: vin->offloadmask = PETSC_OFFLOAD_CPU;
877: return 0;
878: }
880: PetscErrorCode VecReplaceArray_SeqViennaCL(Vec vin, const PetscScalar *a)
881: {
883: VecViennaCLCopyFromGPU(vin);
884: VecReplaceArray_Seq(vin, a);
885: vin->offloadmask = PETSC_OFFLOAD_CPU;
886: return 0;
887: }
889: /*@C
890: VecCreateSeqViennaCL - Creates a standard, sequential array-style vector.
892: Collective
894: Input Parameters:
895: + comm - the communicator, should be PETSC_COMM_SELF
896: - n - the vector length
898: Output Parameter:
899: . V - the vector
901: Notes:
902: Use VecDuplicate() or VecDuplicateVecs() to form additional vectors of the
903: same type as an existing vector.
905: Level: intermediate
907: .seealso: `VecCreateMPI()`, `VecCreate()`, `VecDuplicate()`, `VecDuplicateVecs()`, `VecCreateGhost()`
908: @*/
909: PetscErrorCode VecCreateSeqViennaCL(MPI_Comm comm, PetscInt n, Vec *v)
910: {
911: VecCreate(comm, v);
912: VecSetSizes(*v, n, n);
913: VecSetType(*v, VECSEQVIENNACL);
914: return 0;
915: }
917: /*@C
918: VecCreateSeqViennaCLWithArray - Creates a viennacl sequential array-style vector,
919: where the user provides the array space to store the vector values.
921: Collective
923: Input Parameters:
924: + comm - the communicator, should be PETSC_COMM_SELF
925: . bs - the block size
926: . n - the vector length
927: - array - viennacl array where the vector elements are to be stored.
929: Output Parameter:
930: . V - the vector
932: Notes:
933: Use VecDuplicate() or VecDuplicateVecs() to form additional vectors of the
934: same type as an existing vector.
936: If the user-provided array is NULL, then VecViennaCLPlaceArray() can be used
937: at a later stage to SET the array for storing the vector values.
939: PETSc does NOT free the array when the vector is destroyed via VecDestroy().
940: The user should not free the array until the vector is destroyed.
942: Level: intermediate
944: .seealso: `VecCreateMPIViennaCLWithArray()`, `VecCreate()`, `VecDuplicate()`, `VecDuplicateVecs()`,
945: `VecCreateGhost()`, `VecCreateSeq()`, `VecCUDAPlaceArray()`, `VecCreateSeqWithArray()`,
946: `VecCreateMPIWithArray()`
947: @*/
948: PETSC_EXTERN PetscErrorCode VecCreateSeqViennaCLWithArray(MPI_Comm comm, PetscInt bs, PetscInt n, const ViennaCLVector *array, Vec *V)
949: {
950: PetscMPIInt size;
952: VecCreate(comm, V);
953: VecSetSizes(*V, n, n);
954: VecSetBlockSize(*V, bs);
955: MPI_Comm_size(comm, &size);
957: VecCreate_SeqViennaCL_Private(*V, array);
958: return 0;
959: }
961: /*@C
962: VecCreateSeqViennaCLWithArrays - Creates a ViennaCL sequential vector, where
963: the user provides the array space to store the vector values.
965: Collective
967: Input Parameters:
968: + comm - the communicator, should be PETSC_COMM_SELF
969: . bs - the block size
970: . n - the vector length
971: - cpuarray - CPU memory where the vector elements are to be stored.
972: - viennaclvec - ViennaCL vector where the Vec entries are to be stored on the device.
974: Output Parameter:
975: . V - the vector
977: Notes:
978: If both cpuarray and viennaclvec are provided, the caller must ensure that
979: the provided arrays have identical values.
981: PETSc does NOT free the provided arrays when the vector is destroyed via
982: VecDestroy(). The user should not free the array until the vector is
983: destroyed.
985: Level: intermediate
987: .seealso: `VecCreateMPIViennaCLWithArrays()`, `VecCreate()`, `VecCreateSeqWithArray()`,
988: `VecViennaCLPlaceArray()`, `VecPlaceArray()`, `VecCreateSeqCUDAWithArrays()`,
989: `VecViennaCLAllocateCheckHost()`
990: @*/
991: PetscErrorCode VecCreateSeqViennaCLWithArrays(MPI_Comm comm, PetscInt bs, PetscInt n, const PetscScalar cpuarray[], const ViennaCLVector *viennaclvec, Vec *V)
992: {
993: PetscMPIInt size;
996: MPI_Comm_size(comm, &size);
999: // set V's viennaclvec to be viennaclvec, do not allocate memory on host yet.
1000: VecCreateSeqViennaCLWithArray(comm, bs, n, viennaclvec, V);
1002: if (cpuarray && viennaclvec) {
1003: Vec_Seq *s = (Vec_Seq *)((*V)->data);
1004: s->array = (PetscScalar *)cpuarray;
1005: (*V)->offloadmask = PETSC_OFFLOAD_BOTH;
1006: } else if (cpuarray) {
1007: Vec_Seq *s = (Vec_Seq *)((*V)->data);
1008: s->array = (PetscScalar *)cpuarray;
1009: (*V)->offloadmask = PETSC_OFFLOAD_CPU;
1010: } else if (viennaclvec) {
1011: (*V)->offloadmask = PETSC_OFFLOAD_GPU;
1012: } else {
1013: (*V)->offloadmask = PETSC_OFFLOAD_UNALLOCATED;
1014: }
1016: return 0;
1017: }
1019: /*@C
1020: VecViennaCLPlaceArray - Replace the viennacl vector in a Vec with
1021: the one provided by the user. This is useful to avoid a copy.
1023: Not Collective
1025: Input Parameters:
1026: + vec - the vector
1027: - array - the ViennaCL vector
1029: Notes:
1030: You can return to the original viennacl vector with a call to
1031: VecViennaCLResetArray() It is not possible to use VecViennaCLPlaceArray()
1032: and VecPlaceArray() at the same time on the same vector.
1034: Level: intermediate
1036: .seealso: `VecPlaceArray()`, `VecSetValues()`, `VecViennaCLResetArray()`,
1037: `VecCUDAPlaceArray()`,
1039: @*/
1040: PETSC_EXTERN PetscErrorCode VecViennaCLPlaceArray(Vec vin, const ViennaCLVector *a)
1041: {
1043: VecViennaCLCopyToGPU(vin);
1045: ((Vec_Seq *)vin->data)->unplacedarray = (PetscScalar *)((Vec_ViennaCL *)vin->spptr)->GPUarray; /* save previous GPU array so reset can bring it back */
1046: ((Vec_ViennaCL *)vin->spptr)->GPUarray = (ViennaCLVector *)a;
1047: vin->offloadmask = PETSC_OFFLOAD_GPU;
1048: PetscObjectStateIncrease((PetscObject)vin);
1049: return 0;
1050: }
1052: /*@C
1053: VecViennaCLResetArray - Resets a vector to use its default memory. Call this
1054: after the use of VecViennaCLPlaceArray().
1056: Not Collective
1058: Input Parameters:
1059: . vec - the vector
1061: Level: developer
1063: .seealso: `VecViennaCLPlaceArray()`, `VecResetArray()`, `VecCUDAResetArray()`, `VecPlaceArray()`
1064: @*/
1065: PETSC_EXTERN PetscErrorCode VecViennaCLResetArray(Vec vin)
1066: {
1068: VecViennaCLCopyToGPU(vin);
1069: ((Vec_ViennaCL *)vin->spptr)->GPUarray = (ViennaCLVector *)((Vec_Seq *)vin->data)->unplacedarray;
1070: ((Vec_Seq *)vin->data)->unplacedarray = 0;
1071: vin->offloadmask = PETSC_OFFLOAD_GPU;
1072: PetscObjectStateIncrease((PetscObject)vin);
1073: return 0;
1074: }
1076: /* VecDotNorm2 - computes the inner product of two vectors and the 2-norm squared of the second vector
1077: *
1078: * Simply reuses VecDot() and VecNorm(). Performance improvement through custom kernel (kernel generator) possible.
1079: */
1080: PetscErrorCode VecDotNorm2_SeqViennaCL(Vec s, Vec t, PetscScalar *dp, PetscScalar *nm)
1081: {
1082: VecDot_SeqViennaCL(s, t, dp);
1083: VecNorm_SeqViennaCL(t, NORM_2, nm);
1084: *nm *= *nm; //squared norm required
1085: return 0;
1086: }
1088: PetscErrorCode VecDuplicate_SeqViennaCL(Vec win, Vec *V)
1089: {
1090: VecCreateSeqViennaCL(PetscObjectComm((PetscObject)win), win->map->n, V);
1091: PetscLayoutReference(win->map, &(*V)->map);
1092: PetscObjectListDuplicate(((PetscObject)win)->olist, &((PetscObject)(*V))->olist);
1093: PetscFunctionListDuplicate(((PetscObject)win)->qlist, &((PetscObject)(*V))->qlist);
1094: (*V)->stash.ignorenegidx = win->stash.ignorenegidx;
1095: return 0;
1096: }
1098: PetscErrorCode VecDestroy_SeqViennaCL(Vec v)
1099: {
1100: try {
1101: if (v->spptr) {
1102: delete ((Vec_ViennaCL *)v->spptr)->GPUarray_allocated;
1103: delete (Vec_ViennaCL *)v->spptr;
1104: }
1105: } catch (char *ex) {
1106: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex);
1107: }
1108: VecDestroy_SeqViennaCL_Private(v);
1109: return 0;
1110: }
1112: PetscErrorCode VecGetArray_SeqViennaCL(Vec v, PetscScalar **a)
1113: {
1114: if (v->offloadmask == PETSC_OFFLOAD_GPU) {
1115: VecViennaCLCopyFromGPU(v);
1116: } else {
1117: VecViennaCLAllocateCheckHost(v);
1118: }
1119: *a = *((PetscScalar **)v->data);
1120: return 0;
1121: }
1123: PetscErrorCode VecRestoreArray_SeqViennaCL(Vec v, PetscScalar **a)
1124: {
1125: v->offloadmask = PETSC_OFFLOAD_CPU;
1126: return 0;
1127: }
1129: PetscErrorCode VecGetArrayWrite_SeqViennaCL(Vec v, PetscScalar **a)
1130: {
1131: VecViennaCLAllocateCheckHost(v);
1132: *a = *((PetscScalar **)v->data);
1133: return 0;
1134: }
1136: static PetscErrorCode VecBindToCPU_SeqAIJViennaCL(Vec V, PetscBool flg)
1137: {
1138: V->boundtocpu = flg;
1139: if (flg) {
1140: VecViennaCLCopyFromGPU(V);
1141: V->offloadmask = PETSC_OFFLOAD_CPU; /* since the CPU code will likely change values in the vector */
1142: V->ops->dot = VecDot_Seq;
1143: V->ops->norm = VecNorm_Seq;
1144: V->ops->tdot = VecTDot_Seq;
1145: V->ops->scale = VecScale_Seq;
1146: V->ops->copy = VecCopy_Seq;
1147: V->ops->set = VecSet_Seq;
1148: V->ops->swap = VecSwap_Seq;
1149: V->ops->axpy = VecAXPY_Seq;
1150: V->ops->axpby = VecAXPBY_Seq;
1151: V->ops->axpbypcz = VecAXPBYPCZ_Seq;
1152: V->ops->pointwisemult = VecPointwiseMult_Seq;
1153: V->ops->pointwisedivide = VecPointwiseDivide_Seq;
1154: V->ops->setrandom = VecSetRandom_Seq;
1155: V->ops->dot_local = VecDot_Seq;
1156: V->ops->tdot_local = VecTDot_Seq;
1157: V->ops->norm_local = VecNorm_Seq;
1158: V->ops->mdot_local = VecMDot_Seq;
1159: V->ops->mtdot_local = VecMTDot_Seq;
1160: V->ops->maxpy = VecMAXPY_Seq;
1161: V->ops->mdot = VecMDot_Seq;
1162: V->ops->mtdot = VecMTDot_Seq;
1163: V->ops->aypx = VecAYPX_Seq;
1164: V->ops->waxpy = VecWAXPY_Seq;
1165: V->ops->dotnorm2 = NULL;
1166: V->ops->placearray = VecPlaceArray_Seq;
1167: V->ops->replacearray = VecReplaceArray_Seq;
1168: V->ops->resetarray = VecResetArray_Seq;
1169: V->ops->duplicate = VecDuplicate_Seq;
1170: } else {
1171: V->ops->dot = VecDot_SeqViennaCL;
1172: V->ops->norm = VecNorm_SeqViennaCL;
1173: V->ops->tdot = VecTDot_SeqViennaCL;
1174: V->ops->scale = VecScale_SeqViennaCL;
1175: V->ops->copy = VecCopy_SeqViennaCL;
1176: V->ops->set = VecSet_SeqViennaCL;
1177: V->ops->swap = VecSwap_SeqViennaCL;
1178: V->ops->axpy = VecAXPY_SeqViennaCL;
1179: V->ops->axpby = VecAXPBY_SeqViennaCL;
1180: V->ops->axpbypcz = VecAXPBYPCZ_SeqViennaCL;
1181: V->ops->pointwisemult = VecPointwiseMult_SeqViennaCL;
1182: V->ops->pointwisedivide = VecPointwiseDivide_SeqViennaCL;
1183: V->ops->setrandom = VecSetRandom_SeqViennaCL;
1184: V->ops->dot_local = VecDot_SeqViennaCL;
1185: V->ops->tdot_local = VecTDot_SeqViennaCL;
1186: V->ops->norm_local = VecNorm_SeqViennaCL;
1187: V->ops->mdot_local = VecMDot_SeqViennaCL;
1188: V->ops->mtdot_local = VecMTDot_SeqViennaCL;
1189: V->ops->maxpy = VecMAXPY_SeqViennaCL;
1190: V->ops->mdot = VecMDot_SeqViennaCL;
1191: V->ops->mtdot = VecMTDot_SeqViennaCL;
1192: V->ops->aypx = VecAYPX_SeqViennaCL;
1193: V->ops->waxpy = VecWAXPY_SeqViennaCL;
1194: V->ops->dotnorm2 = VecDotNorm2_SeqViennaCL;
1195: V->ops->placearray = VecPlaceArray_SeqViennaCL;
1196: V->ops->replacearray = VecReplaceArray_SeqViennaCL;
1197: V->ops->resetarray = VecResetArray_SeqViennaCL;
1198: V->ops->destroy = VecDestroy_SeqViennaCL;
1199: V->ops->duplicate = VecDuplicate_SeqViennaCL;
1200: V->ops->getarraywrite = VecGetArrayWrite_SeqViennaCL;
1201: V->ops->getarray = VecGetArray_SeqViennaCL;
1202: V->ops->restorearray = VecRestoreArray_SeqViennaCL;
1203: }
1204: return 0;
1205: }
1207: PETSC_EXTERN PetscErrorCode VecCreate_SeqViennaCL(Vec V)
1208: {
1209: PetscMPIInt size;
1211: MPI_Comm_size(PetscObjectComm((PetscObject)V), &size);
1213: VecCreate_Seq_Private(V, 0);
1214: PetscObjectChangeTypeName((PetscObject)V, VECSEQVIENNACL);
1216: VecBindToCPU_SeqAIJViennaCL(V, PETSC_FALSE);
1217: V->ops->bindtocpu = VecBindToCPU_SeqAIJViennaCL;
1219: VecViennaCLAllocateCheck(V);
1220: VecSet_SeqViennaCL(V, 0.0);
1221: return 0;
1222: }
1224: /*@C
1225: VecViennaCLGetCLContext - Get the OpenCL context in which the Vec resides.
1227: Caller should cast (*ctx) to (const cl_context). Caller is responsible for
1228: invoking clReleaseContext().
1230: Input Parameters:
1231: . v - the vector
1233: Output Parameters:
1234: . ctx - pointer to the underlying CL context
1236: Level: intermediate
1238: .seealso: `VecViennaCLGetCLQueue()`, `VecViennaCLGetCLMemRead()`
1239: @*/
1240: PETSC_EXTERN PetscErrorCode VecViennaCLGetCLContext(Vec v, PETSC_UINTPTR_T *ctx)
1241: {
1242: #if !defined(PETSC_HAVE_OPENCL)
1243: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "PETSc must be configured with --with-opencl to get the associated cl_context.");
1244: #else
1247: const ViennaCLVector *v_vcl;
1248: VecViennaCLGetArrayRead(v, &v_vcl);
1249: try {
1250: viennacl::ocl::context vcl_ctx = v_vcl->handle().opencl_handle().context();
1251: const cl_context ocl_ctx = vcl_ctx.handle().get();
1252: clRetainContext(ocl_ctx);
1253: *ctx = (PETSC_UINTPTR_T)(ocl_ctx);
1254: } catch (std::exception const &ex) {
1255: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
1256: }
1258: return 0;
1259: #endif
1260: }
1262: /*@C
1263: VecViennaCLGetCLQueue - Get the OpenCL command queue to which all
1264: operations of the Vec are enqueued.
1266: Caller should cast (*queue) to (const cl_command_queue). Caller is
1267: responsible for invoking clReleaseCommandQueue().
1269: Input Parameters:
1270: . v - the vector
1272: Output Parameters:
1273: . ctx - pointer to the CL command queue
1275: Level: intermediate
1277: .seealso: `VecViennaCLGetCLContext()`, `VecViennaCLGetCLMemRead()`
1278: @*/
1279: PETSC_EXTERN PetscErrorCode VecViennaCLGetCLQueue(Vec v, PETSC_UINTPTR_T *queue)
1280: {
1281: #if !defined(PETSC_HAVE_OPENCL)
1282: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "PETSc must be configured with --with-opencl to get the associated cl_command_queue.");
1283: #else
1285: const ViennaCLVector *v_vcl;
1286: VecViennaCLGetArrayRead(v, &v_vcl);
1287: try {
1288: viennacl::ocl::context vcl_ctx = v_vcl->handle().opencl_handle().context();
1289: const viennacl::ocl::command_queue &vcl_queue = vcl_ctx.current_queue();
1290: const cl_command_queue ocl_queue = vcl_queue.handle().get();
1291: clRetainCommandQueue(ocl_queue);
1292: *queue = (PETSC_UINTPTR_T)(ocl_queue);
1293: } catch (std::exception const &ex) {
1294: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
1295: }
1297: return 0;
1298: #endif
1299: }
1301: /*@C
1302: VecViennaCLGetCLMemRead - Provides access to the the CL buffer inside a Vec.
1304: Caller should cast (*mem) to (const cl_mem). Caller is responsible for
1305: invoking clReleaseMemObject().
1307: Input Parameters:
1308: . v - the vector
1310: Output Parameters:
1311: . mem - pointer to the device buffer
1313: Level: intermediate
1315: .seealso: `VecViennaCLGetCLContext()`, `VecViennaCLGetCLMemWrite()`
1316: @*/
1317: PETSC_EXTERN PetscErrorCode VecViennaCLGetCLMemRead(Vec v, PETSC_UINTPTR_T *mem)
1318: {
1319: #if !defined(PETSC_HAVE_OPENCL)
1320: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "PETSc must be configured with --with-opencl to get a Vec's cl_mem");
1321: #else
1323: const ViennaCLVector *v_vcl;
1324: VecViennaCLGetArrayRead(v, &v_vcl);
1325: try {
1326: const cl_mem ocl_mem = v_vcl->handle().opencl_handle().get();
1327: clRetainMemObject(ocl_mem);
1328: *mem = (PETSC_UINTPTR_T)(ocl_mem);
1329: } catch (std::exception const &ex) {
1330: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
1331: }
1332: return 0;
1333: #endif
1334: }
1336: /*@C
1337: VecViennaCLGetCLMemWrite - Provides access to the the CL buffer inside a Vec.
1339: Caller should cast (*mem) to (const cl_mem). Caller is responsible for
1340: invoking clReleaseMemObject().
1342: The device pointer has to be released by calling
1343: VecViennaCLRestoreCLMemWrite(). Upon restoring the vector data the data on
1344: the host will be marked as out of date. A subsequent access of the host data
1345: will thus incur a data transfer from the device to the host.
1347: Input Parameters:
1348: . v - the vector
1350: Output Parameters:
1351: . mem - pointer to the device buffer
1353: Level: intermediate
1355: .seealso: `VecViennaCLGetCLContext()`, `VecViennaCLRestoreCLMemWrite()`
1356: @*/
1357: PETSC_EXTERN PetscErrorCode VecViennaCLGetCLMemWrite(Vec v, PETSC_UINTPTR_T *mem)
1358: {
1359: #if !defined(PETSC_HAVE_OPENCL)
1360: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "PETSc must be configured with --with-opencl to get a Vec's cl_mem");
1361: #else
1363: ViennaCLVector *v_vcl;
1364: VecViennaCLGetArrayWrite(v, &v_vcl);
1365: try {
1366: const cl_mem ocl_mem = v_vcl->handle().opencl_handle().get();
1367: clRetainMemObject(ocl_mem);
1368: *mem = (PETSC_UINTPTR_T)(ocl_mem);
1369: } catch (std::exception const &ex) {
1370: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
1371: }
1373: return 0;
1374: #endif
1375: }
1377: /*@C
1378: VecViennaCLRestoreCLMemWrite - Restores a CL buffer pointer previously
1379: acquired with VecViennaCLGetCLMemWrite().
1381: This marks the host data as out of date. Subsequent access to the
1382: vector data on the host side with for instance VecGetArray() incurs a
1383: data transfer.
1385: Input Parameters:
1386: . v - the vector
1388: Level: intermediate
1390: .seealso: `VecViennaCLGetCLContext()`, `VecViennaCLGetCLMemWrite()`
1391: @*/
1392: PETSC_EXTERN PetscErrorCode VecViennaCLRestoreCLMemWrite(Vec v)
1393: {
1394: #if !defined(PETSC_HAVE_OPENCL)
1395: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "PETSc must be configured with --with-opencl to restore a Vec's cl_mem");
1396: #else
1398: VecViennaCLRestoreArrayWrite(v, PETSC_NULL);
1400: return 0;
1401: #endif
1402: }
1404: /*@C
1405: VecViennaCLGetCLMem - Provides access to the the CL buffer inside a Vec.
1407: Caller should cast (*mem) to (const cl_mem). Caller is responsible for
1408: invoking clReleaseMemObject().
1410: The device pointer has to be released by calling VecViennaCLRestoreCLMem().
1411: Upon restoring the vector data the data on the host will be marked as out of
1412: date. A subsequent access of the host data will thus incur a data transfer
1413: from the device to the host.
1415: Input Parameters:
1416: . v - the vector
1418: Output Parameters:
1419: . mem - pointer to the device buffer
1421: Level: intermediate
1423: .seealso: `VecViennaCLGetCLContext()`, `VecViennaCLRestoreCLMem()`
1424: @*/
1425: PETSC_EXTERN PetscErrorCode VecViennaCLGetCLMem(Vec v, PETSC_UINTPTR_T *mem)
1426: {
1427: #if !defined(PETSC_HAVE_OPENCL)
1428: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "PETSc must be configured with --with-opencl to get a Vec's cl_mem");
1429: #else
1431: ViennaCLVector *v_vcl;
1432: VecViennaCLGetArray(v, &v_vcl);
1433: try {
1434: const cl_mem ocl_mem = v_vcl->handle().opencl_handle().get();
1435: clRetainMemObject(ocl_mem);
1436: *mem = (PETSC_UINTPTR_T)(ocl_mem);
1437: } catch (std::exception const &ex) {
1438: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
1439: }
1441: return 0;
1442: #endif
1443: }
1445: /*@C
1446: VecViennaCLRestoreCLMem - Restores a CL buffer pointer previously
1447: acquired with VecViennaCLGetCLMem().
1449: This marks the host data as out of date. Subsequent access to the vector
1450: data on the host side with for instance VecGetArray() incurs a data
1451: transfer.
1453: Input Parameters:
1454: . v - the vector
1456: Level: intermediate
1458: .seealso: `VecViennaCLGetCLContext()`, `VecViennaCLGetCLMem()`
1459: @*/
1460: PETSC_EXTERN PetscErrorCode VecViennaCLRestoreCLMem(Vec v)
1461: {
1462: #if !defined(PETSC_HAVE_OPENCL)
1463: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "PETSc must be configured with --with-opencl to restore a Vec's cl_mem");
1464: #else
1466: VecViennaCLRestoreArray(v, PETSC_NULL);
1468: return 0;
1469: #endif
1470: }
1472: PetscErrorCode VecCreate_SeqViennaCL_Private(Vec V, const ViennaCLVector *array)
1473: {
1474: Vec_ViennaCL *vecviennacl;
1475: PetscMPIInt size;
1477: MPI_Comm_size(PetscObjectComm((PetscObject)V), &size);
1479: VecCreate_Seq_Private(V, 0);
1480: PetscObjectChangeTypeName((PetscObject)V, VECSEQVIENNACL);
1481: VecBindToCPU_SeqAIJViennaCL(V, PETSC_FALSE);
1482: V->ops->bindtocpu = VecBindToCPU_SeqAIJViennaCL;
1484: if (array) {
1485: if (!V->spptr) V->spptr = new Vec_ViennaCL;
1486: vecviennacl = (Vec_ViennaCL *)V->spptr;
1487: vecviennacl->GPUarray_allocated = 0;
1488: vecviennacl->GPUarray = (ViennaCLVector *)array;
1489: V->offloadmask = PETSC_OFFLOAD_UNALLOCATED;
1490: }
1492: return 0;
1493: }