Actual source code: vecviennacl.cxx

  1: /*
  2:    Implements the sequential ViennaCL vectors.
  3: */

  5: #include <petscconf.h>
  6: #include <petsc/private/vecimpl.h>
  7: #include <../src/vec/vec/impls/dvecimpl.h>
  8: #include <../src/vec/vec/impls/seq/seqviennacl/viennaclvecimpl.h>

 10: #include "viennacl/linalg/inner_prod.hpp"
 11: #include "viennacl/linalg/norm_1.hpp"
 12: #include "viennacl/linalg/norm_2.hpp"
 13: #include "viennacl/linalg/norm_inf.hpp"

 15: #ifdef VIENNACL_WITH_OPENCL
 16:   #include "viennacl/ocl/backend.hpp"
 17: #endif

 19: PETSC_EXTERN PetscErrorCode VecViennaCLGetArray(Vec v, ViennaCLVector **a)
 20: {
 22:   *a = 0;
 23:   VecViennaCLCopyToGPU(v);
 24:   *a = ((Vec_ViennaCL *)v->spptr)->GPUarray;
 25:   ViennaCLWaitForGPU();
 26:   return 0;
 27: }

 29: PETSC_EXTERN PetscErrorCode VecViennaCLRestoreArray(Vec v, ViennaCLVector **a)
 30: {
 32:   v->offloadmask = PETSC_OFFLOAD_GPU;

 34:   PetscObjectStateIncrease((PetscObject)v);
 35:   return 0;
 36: }

 38: PETSC_EXTERN PetscErrorCode VecViennaCLGetArrayRead(Vec v, const ViennaCLVector **a)
 39: {
 41:   *a = 0;
 42:   VecViennaCLCopyToGPU(v);
 43:   *a = ((Vec_ViennaCL *)v->spptr)->GPUarray;
 44:   ViennaCLWaitForGPU();
 45:   return 0;
 46: }

 48: PETSC_EXTERN PetscErrorCode VecViennaCLRestoreArrayRead(Vec v, const ViennaCLVector **a)
 49: {
 51:   return 0;
 52: }

 54: PETSC_EXTERN PetscErrorCode VecViennaCLGetArrayWrite(Vec v, ViennaCLVector **a)
 55: {
 57:   *a = 0;
 58:   VecViennaCLAllocateCheck(v);
 59:   *a = ((Vec_ViennaCL *)v->spptr)->GPUarray;
 60:   ViennaCLWaitForGPU();
 61:   return 0;
 62: }

 64: PETSC_EXTERN PetscErrorCode VecViennaCLRestoreArrayWrite(Vec v, ViennaCLVector **a)
 65: {
 67:   v->offloadmask = PETSC_OFFLOAD_GPU;

 69:   PetscObjectStateIncrease((PetscObject)v);
 70:   return 0;
 71: }

 73: PETSC_EXTERN PetscErrorCode PetscViennaCLInit()
 74: {
 75:   char      string[20];
 76:   PetscBool flg, flg_cuda, flg_opencl, flg_openmp;

 78:   /* ViennaCL backend selection: CUDA, OpenCL, or OpenMP */
 79:   PetscOptionsGetString(NULL, NULL, "-viennacl_backend", string, sizeof(string), &flg);
 80:   if (flg) {
 81:     try {
 82:       PetscStrcasecmp(string, "cuda", &flg_cuda);
 83:       PetscStrcasecmp(string, "opencl", &flg_opencl);
 84:       PetscStrcasecmp(string, "openmp", &flg_openmp);

 86:       /* A default (sequential) CPU backend is always available - even if OpenMP is not enabled. */
 87:       if (flg_openmp) viennacl::backend::default_memory_type(viennacl::MAIN_MEMORY);
 88: #if defined(PETSC_HAVE_CUDA)
 89:       else if (flg_cuda) viennacl::backend::default_memory_type(viennacl::CUDA_MEMORY);
 90: #endif
 91: #if defined(PETSC_HAVE_OPENCL)
 92:       else if (flg_opencl) viennacl::backend::default_memory_type(viennacl::OPENCL_MEMORY);
 93: #endif
 94:       else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: Backend not recognized or available: %s.\n Pass -viennacl_view to see available backends for ViennaCL.", string);
 95:     } catch (std::exception const &ex) {
 96:       SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
 97:     }
 98:   }

100:   if (PetscDefined(HAVE_CUDA)) {
101:     /* For CUDA event timers */
102:     PetscDeviceInitialize(PETSC_DEVICE_CUDA);
103:   }

105: #if defined(PETSC_HAVE_OPENCL)
106:   /* ViennaCL OpenCL device type configuration */
107:   PetscOptionsGetString(NULL, NULL, "-viennacl_opencl_device_type", string, sizeof(string), &flg);
108:   if (flg) {
109:     try {
110:       PetscStrcasecmp(string, "cpu", &flg);
111:       if (flg) viennacl::ocl::set_context_device_type(0, CL_DEVICE_TYPE_CPU);

113:       PetscStrcasecmp(string, "gpu", &flg);
114:       if (flg) viennacl::ocl::set_context_device_type(0, CL_DEVICE_TYPE_GPU);

116:       PetscStrcasecmp(string, "accelerator", &flg);
117:       if (flg) viennacl::ocl::set_context_device_type(0, CL_DEVICE_TYPE_ACCELERATOR);
118:     } catch (std::exception const &ex) {
119:       SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
120:     }
121:   }
122: #endif

124:   /* Print available backends */
125:   PetscOptionsHasName(NULL, NULL, "-viennacl_view", &flg);
126:   if (flg) {
127:     PetscPrintf(PETSC_COMM_WORLD, "ViennaCL backends available: ");
128: #if defined(PETSC_HAVE_CUDA)
129:     PetscPrintf(PETSC_COMM_WORLD, "CUDA, ");
130: #endif
131: #if defined(PETSC_HAVE_OPENCL)
132:     PetscPrintf(PETSC_COMM_WORLD, "OpenCL, ");
133: #endif
134: #if defined(PETSC_HAVE_OPENMP)
135:     PetscPrintf(PETSC_COMM_WORLD, "OpenMP ");
136: #else
137:     PetscPrintf(PETSC_COMM_WORLD, "OpenMP (1 thread) ");
138: #endif
139:     PetscPrintf(PETSC_COMM_WORLD, "\n");

141:     /* Print selected backends */
142:     PetscPrintf(PETSC_COMM_WORLD, "ViennaCL backend  selected: ");
143: #if defined(PETSC_HAVE_CUDA)
144:     if (viennacl::backend::default_memory_type() == viennacl::CUDA_MEMORY) PetscPrintf(PETSC_COMM_WORLD, "CUDA ");
145: #endif
146: #if defined(PETSC_HAVE_OPENCL)
147:     if (viennacl::backend::default_memory_type() == viennacl::OPENCL_MEMORY) PetscPrintf(PETSC_COMM_WORLD, "OpenCL ");
148: #endif
149: #if defined(PETSC_HAVE_OPENMP)
150:     if (viennacl::backend::default_memory_type() == viennacl::MAIN_MEMORY) PetscPrintf(PETSC_COMM_WORLD, "OpenMP ");
151: #else
152:     if (viennacl::backend::default_memory_type() == viennacl::MAIN_MEMORY) PetscPrintf(PETSC_COMM_WORLD, "OpenMP (sequential - consider reconfiguration: --with-openmp=1) ");
153: #endif
154:     PetscPrintf(PETSC_COMM_WORLD, "\n");
155:   }
156:   return 0;
157: }

159: /*
160:     Allocates space for the vector array on the Host if it does not exist.
161:     Does NOT change the PetscViennaCLFlag for the vector
162:     Does NOT zero the ViennaCL array
163:  */
164: PETSC_EXTERN PetscErrorCode VecViennaCLAllocateCheckHost(Vec v)
165: {
166:   PetscScalar *array;
167:   Vec_Seq     *s;
168:   PetscInt     n = v->map->n;

170:   s = (Vec_Seq *)v->data;
171:   VecViennaCLAllocateCheck(v);
172:   if (s->array == 0) {
173:     PetscMalloc1(n, &array);
174:     s->array           = array;
175:     s->array_allocated = array;
176:   }
177:   return 0;
178: }

180: /*
181:     Allocates space for the vector array on the GPU if it does not exist.
182:     Does NOT change the PetscViennaCLFlag for the vector
183:     Does NOT zero the ViennaCL array

185:  */
186: PetscErrorCode VecViennaCLAllocateCheck(Vec v)
187: {
188:   if (!v->spptr) {
189:     try {
190:       v->spptr                                       = new Vec_ViennaCL;
191:       ((Vec_ViennaCL *)v->spptr)->GPUarray_allocated = new ViennaCLVector((PetscBLASInt)v->map->n);
192:       ((Vec_ViennaCL *)v->spptr)->GPUarray           = ((Vec_ViennaCL *)v->spptr)->GPUarray_allocated;

194:     } catch (std::exception const &ex) {
195:       SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
196:     }
197:   }
198:   return 0;
199: }

201: /* Copies a vector from the CPU to the GPU unless we already have an up-to-date copy on the GPU */
202: PetscErrorCode VecViennaCLCopyToGPU(Vec v)
203: {
205:   VecViennaCLAllocateCheck(v);
206:   if (v->map->n > 0) {
207:     if (v->offloadmask == PETSC_OFFLOAD_CPU) {
208:       PetscLogEventBegin(VEC_ViennaCLCopyToGPU, v, 0, 0, 0);
209:       try {
210:         ViennaCLVector *vec = ((Vec_ViennaCL *)v->spptr)->GPUarray;
211:         viennacl::fast_copy(*(PetscScalar **)v->data, *(PetscScalar **)v->data + v->map->n, vec->begin());
212:         ViennaCLWaitForGPU();
213:       } catch (std::exception const &ex) {
214:         SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
215:       }
216:       PetscLogCpuToGpu((v->map->n) * sizeof(PetscScalar));
217:       PetscLogEventEnd(VEC_ViennaCLCopyToGPU, v, 0, 0, 0);
218:       v->offloadmask = PETSC_OFFLOAD_BOTH;
219:     }
220:   }
221:   return 0;
222: }

224: /*
225:      VecViennaCLCopyFromGPU - Copies a vector from the GPU to the CPU unless we already have an up-to-date copy on the CPU
226: */
227: PetscErrorCode VecViennaCLCopyFromGPU(Vec v)
228: {
230:   VecViennaCLAllocateCheckHost(v);
231:   if (v->offloadmask == PETSC_OFFLOAD_GPU) {
232:     PetscLogEventBegin(VEC_ViennaCLCopyFromGPU, v, 0, 0, 0);
233:     try {
234:       ViennaCLVector *vec = ((Vec_ViennaCL *)v->spptr)->GPUarray;
235:       viennacl::fast_copy(vec->begin(), vec->end(), *(PetscScalar **)v->data);
236:       ViennaCLWaitForGPU();
237:     } catch (std::exception const &ex) {
238:       SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
239:     }
240:     PetscLogGpuToCpu((v->map->n) * sizeof(PetscScalar));
241:     PetscLogEventEnd(VEC_ViennaCLCopyFromGPU, v, 0, 0, 0);
242:     v->offloadmask = PETSC_OFFLOAD_BOTH;
243:   }
244:   return 0;
245: }

247: /* Copy on CPU */
248: static PetscErrorCode VecCopy_SeqViennaCL_Private(Vec xin, Vec yin)
249: {
250:   PetscScalar       *ya;
251:   const PetscScalar *xa;

253:   VecViennaCLAllocateCheckHost(xin);
254:   VecViennaCLAllocateCheckHost(yin);
255:   if (xin != yin) {
256:     VecGetArrayRead(xin, &xa);
257:     VecGetArray(yin, &ya);
258:     PetscArraycpy(ya, xa, xin->map->n);
259:     VecRestoreArrayRead(xin, &xa);
260:     VecRestoreArray(yin, &ya);
261:   }
262:   return 0;
263: }

265: static PetscErrorCode VecSetRandom_SeqViennaCL_Private(Vec xin, PetscRandom r)
266: {
267:   PetscInt     n = xin->map->n, i;
268:   PetscScalar *xx;

270:   VecGetArray(xin, &xx);
271:   for (i = 0; i < n; i++) PetscRandomGetValue(r, &xx[i]);
272:   VecRestoreArray(xin, &xx);
273:   return 0;
274: }

276: static PetscErrorCode VecDestroy_SeqViennaCL_Private(Vec v)
277: {
278:   Vec_Seq *vs = (Vec_Seq *)v->data;

280:   PetscObjectSAWsViewOff(v);
281: #if defined(PETSC_USE_LOG)
282:   PetscLogObjectState((PetscObject)v, "Length=%" PetscInt_FMT, v->map->n);
283: #endif
284:   if (vs->array_allocated) PetscFree(vs->array_allocated);
285:   PetscFree(vs);
286:   return 0;
287: }

289: static PetscErrorCode VecResetArray_SeqViennaCL_Private(Vec vin)
290: {
291:   Vec_Seq *v = (Vec_Seq *)vin->data;

293:   v->array         = v->unplacedarray;
294:   v->unplacedarray = 0;
295:   return 0;
296: }

298: /*MC
299:    VECSEQVIENNACL - VECSEQVIENNACL = "seqviennacl" - The basic sequential vector, modified to use ViennaCL

301:    Options Database Keys:
302: . -vec_type seqviennacl - sets the vector type to VECSEQVIENNACL during a call to VecSetFromOptions()

304:   Level: beginner

306: .seealso: `VecCreate()`, `VecSetType()`, `VecSetFromOptions()`, `VecCreateSeqWithArray()`, `VECMPI`, `VecType`, `VecCreateMPI()`, `VecCreateSeq()`
307: M*/

309: PetscErrorCode VecAYPX_SeqViennaCL(Vec yin, PetscScalar alpha, Vec xin)
310: {
311:   const ViennaCLVector *xgpu;
312:   ViennaCLVector       *ygpu;

314:   VecViennaCLGetArrayRead(xin, &xgpu);
315:   VecViennaCLGetArray(yin, &ygpu);
316:   PetscLogGpuTimeBegin();
317:   try {
318:     if (alpha != 0.0 && xin->map->n > 0) {
319:       *ygpu = *xgpu + alpha * *ygpu;
320:       PetscLogGpuFlops(2.0 * yin->map->n);
321:     } else {
322:       *ygpu = *xgpu;
323:     }
324:     ViennaCLWaitForGPU();
325:   } catch (std::exception const &ex) {
326:     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
327:   }
328:   PetscLogGpuTimeEnd();
329:   VecViennaCLRestoreArrayRead(xin, &xgpu);
330:   VecViennaCLRestoreArray(yin, &ygpu);
331:   return 0;
332: }

334: PetscErrorCode VecAXPY_SeqViennaCL(Vec yin, PetscScalar alpha, Vec xin)
335: {
336:   const ViennaCLVector *xgpu;
337:   ViennaCLVector       *ygpu;

339:   if (alpha != 0.0 && xin->map->n > 0) {
340:     VecViennaCLGetArrayRead(xin, &xgpu);
341:     VecViennaCLGetArray(yin, &ygpu);
342:     PetscLogGpuTimeBegin();
343:     try {
344:       *ygpu += alpha * *xgpu;
345:       ViennaCLWaitForGPU();
346:     } catch (std::exception const &ex) {
347:       SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
348:     }
349:     PetscLogGpuTimeEnd();
350:     VecViennaCLRestoreArrayRead(xin, &xgpu);
351:     VecViennaCLRestoreArray(yin, &ygpu);
352:     PetscLogGpuFlops(2.0 * yin->map->n);
353:   }
354:   return 0;
355: }

357: PetscErrorCode VecPointwiseDivide_SeqViennaCL(Vec win, Vec xin, Vec yin)
358: {
359:   const ViennaCLVector *xgpu, *ygpu;
360:   ViennaCLVector       *wgpu;

362:   if (xin->map->n > 0) {
363:     VecViennaCLGetArrayRead(xin, &xgpu);
364:     VecViennaCLGetArrayRead(yin, &ygpu);
365:     VecViennaCLGetArrayWrite(win, &wgpu);
366:     PetscLogGpuTimeBegin();
367:     try {
368:       *wgpu = viennacl::linalg::element_div(*xgpu, *ygpu);
369:       ViennaCLWaitForGPU();
370:     } catch (std::exception const &ex) {
371:       SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
372:     }
373:     PetscLogGpuTimeEnd();
374:     PetscLogGpuFlops(win->map->n);
375:     VecViennaCLRestoreArrayRead(xin, &xgpu);
376:     VecViennaCLRestoreArrayRead(yin, &ygpu);
377:     VecViennaCLRestoreArrayWrite(win, &wgpu);
378:   }
379:   return 0;
380: }

382: PetscErrorCode VecWAXPY_SeqViennaCL(Vec win, PetscScalar alpha, Vec xin, Vec yin)
383: {
384:   const ViennaCLVector *xgpu, *ygpu;
385:   ViennaCLVector       *wgpu;

387:   if (alpha == 0.0 && xin->map->n > 0) {
388:     VecCopy_SeqViennaCL(yin, win);
389:   } else {
390:     VecViennaCLGetArrayRead(xin, &xgpu);
391:     VecViennaCLGetArrayRead(yin, &ygpu);
392:     VecViennaCLGetArrayWrite(win, &wgpu);
393:     PetscLogGpuTimeBegin();
394:     if (alpha == 1.0) {
395:       try {
396:         *wgpu = *ygpu + *xgpu;
397:       } catch (std::exception const &ex) {
398:         SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
399:       }
400:       PetscLogGpuFlops(win->map->n);
401:     } else if (alpha == -1.0) {
402:       try {
403:         *wgpu = *ygpu - *xgpu;
404:       } catch (std::exception const &ex) {
405:         SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
406:       }
407:       PetscLogGpuFlops(win->map->n);
408:     } else {
409:       try {
410:         *wgpu = *ygpu + alpha * *xgpu;
411:       } catch (std::exception const &ex) {
412:         SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
413:       }
414:       PetscLogGpuFlops(2 * win->map->n);
415:     }
416:     ViennaCLWaitForGPU();
417:     PetscLogGpuTimeEnd();
418:     VecViennaCLRestoreArrayRead(xin, &xgpu);
419:     VecViennaCLRestoreArrayRead(yin, &ygpu);
420:     VecViennaCLRestoreArrayWrite(win, &wgpu);
421:   }
422:   return 0;
423: }

425: /*
426:  * Operation x = x + sum_i alpha_i * y_i for vectors x, y_i and scalars alpha_i
427:  *
428:  * ViennaCL supports a fast evaluation of x += alpha * y and x += alpha * y + beta * z,
429:  * hence there is an iterated application of these until the final result is obtained
430:  */
431: PetscErrorCode VecMAXPY_SeqViennaCL(Vec xin, PetscInt nv, const PetscScalar *alpha, Vec *y)
432: {
433:   PetscInt j;

435:   for (j = 0; j < nv; ++j) {
436:     if (j + 1 < nv) {
437:       VecAXPBYPCZ_SeqViennaCL(xin, alpha[j], alpha[j + 1], 1.0, y[j], y[j + 1]);
438:       ++j;
439:     } else {
440:       VecAXPY_SeqViennaCL(xin, alpha[j], y[j]);
441:     }
442:   }
443:   ViennaCLWaitForGPU();
444:   return 0;
445: }

447: PetscErrorCode VecDot_SeqViennaCL(Vec xin, Vec yin, PetscScalar *z)
448: {
449:   const ViennaCLVector *xgpu, *ygpu;

451:   if (xin->map->n > 0) {
452:     VecViennaCLGetArrayRead(xin, &xgpu);
453:     VecViennaCLGetArrayRead(yin, &ygpu);
454:     PetscLogGpuTimeBegin();
455:     try {
456:       *z = viennacl::linalg::inner_prod(*xgpu, *ygpu);
457:     } catch (std::exception const &ex) {
458:       SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
459:     }
460:     ViennaCLWaitForGPU();
461:     PetscLogGpuTimeEnd();
462:     if (xin->map->n > 0) PetscLogGpuFlops(2.0 * xin->map->n - 1);
463:     VecViennaCLRestoreArrayRead(xin, &xgpu);
464:     VecViennaCLRestoreArrayRead(yin, &ygpu);
465:   } else *z = 0.0;
466:   return 0;
467: }

469: /*
470:  * Operation z[j] = dot(x, y[j])
471:  *
472:  * We use an iterated application of dot() for each j. For small ranges of j this is still faster than an allocation of extra memory in order to use gemv().
473:  */
474: PetscErrorCode VecMDot_SeqViennaCL(Vec xin, PetscInt nv, const Vec yin[], PetscScalar *z)
475: {
476:   PetscInt                                                n = xin->map->n, i;
477:   const ViennaCLVector                                   *xgpu, *ygpu;
478:   Vec                                                    *yyin = (Vec *)yin;
479:   std::vector<viennacl::vector_base<PetscScalar> const *> ygpu_array(nv);

481:   if (xin->map->n > 0) {
482:     VecViennaCLGetArrayRead(xin, &xgpu);
483:     for (i = 0; i < nv; i++) {
484:       VecViennaCLGetArrayRead(yyin[i], &ygpu);
485:       ygpu_array[i] = ygpu;
486:     }
487:     PetscLogGpuTimeBegin();
488:     viennacl::vector_tuple<PetscScalar> y_tuple(ygpu_array);
489:     ViennaCLVector                      result = viennacl::linalg::inner_prod(*xgpu, y_tuple);
490:     viennacl::copy(result.begin(), result.end(), z);
491:     for (i = 0; i < nv; i++) VecViennaCLRestoreArrayRead(yyin[i], &ygpu);
492:     ViennaCLWaitForGPU();
493:     PetscLogGpuTimeEnd();
494:     VecViennaCLRestoreArrayRead(xin, &xgpu);
495:     PetscLogGpuFlops(PetscMax(nv * (2.0 * n - 1), 0.0));
496:   } else {
497:     for (i = 0; i < nv; i++) z[i] = 0.0;
498:   }
499:   return 0;
500: }

502: PetscErrorCode VecMTDot_SeqViennaCL(Vec xin, PetscInt nv, const Vec yin[], PetscScalar *z)
503: {
504:   /* Since complex case is not supported at the moment, this is the same as VecMDot_SeqViennaCL */
505:   VecMDot_SeqViennaCL(xin, nv, yin, z);
506:   ViennaCLWaitForGPU();
507:   return 0;
508: }

510: PetscErrorCode VecSet_SeqViennaCL(Vec xin, PetscScalar alpha)
511: {
512:   ViennaCLVector *xgpu;

514:   if (xin->map->n > 0) {
515:     VecViennaCLGetArrayWrite(xin, &xgpu);
516:     PetscLogGpuTimeBegin();
517:     try {
518:       *xgpu = viennacl::scalar_vector<PetscScalar>(xgpu->size(), alpha);
519:       ViennaCLWaitForGPU();
520:     } catch (std::exception const &ex) {
521:       SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
522:     }
523:     PetscLogGpuTimeEnd();
524:     VecViennaCLRestoreArrayWrite(xin, &xgpu);
525:   }
526:   return 0;
527: }

529: PetscErrorCode VecScale_SeqViennaCL(Vec xin, PetscScalar alpha)
530: {
531:   ViennaCLVector *xgpu;

533:   if (alpha == 0.0 && xin->map->n > 0) {
534:     VecSet_SeqViennaCL(xin, alpha);
535:     PetscLogGpuFlops(xin->map->n);
536:   } else if (alpha != 1.0 && xin->map->n > 0) {
537:     VecViennaCLGetArray(xin, &xgpu);
538:     PetscLogGpuTimeBegin();
539:     try {
540:       *xgpu *= alpha;
541:       ViennaCLWaitForGPU();
542:     } catch (std::exception const &ex) {
543:       SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
544:     }
545:     PetscLogGpuTimeEnd();
546:     VecViennaCLRestoreArray(xin, &xgpu);
547:     PetscLogGpuFlops(xin->map->n);
548:   }
549:   return 0;
550: }

552: PetscErrorCode VecTDot_SeqViennaCL(Vec xin, Vec yin, PetscScalar *z)
553: {
554:   /* Since complex case is not supported at the moment, this is the same as VecDot_SeqViennaCL */
555:   VecDot_SeqViennaCL(xin, yin, z);
556:   ViennaCLWaitForGPU();
557:   return 0;
558: }

560: PetscErrorCode VecCopy_SeqViennaCL(Vec xin, Vec yin)
561: {
562:   const ViennaCLVector *xgpu;
563:   ViennaCLVector       *ygpu;

565:   if (xin != yin && xin->map->n > 0) {
566:     if (xin->offloadmask == PETSC_OFFLOAD_GPU) {
567:       VecViennaCLGetArrayRead(xin, &xgpu);
568:       VecViennaCLGetArrayWrite(yin, &ygpu);
569:       PetscLogGpuTimeBegin();
570:       try {
571:         *ygpu = *xgpu;
572:         ViennaCLWaitForGPU();
573:       } catch (std::exception const &ex) {
574:         SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
575:       }
576:       PetscLogGpuTimeEnd();
577:       VecViennaCLRestoreArrayRead(xin, &xgpu);
578:       VecViennaCLRestoreArrayWrite(yin, &ygpu);

580:     } else if (xin->offloadmask == PETSC_OFFLOAD_CPU) {
581:       /* copy in CPU if we are on the CPU*/
582:       VecCopy_SeqViennaCL_Private(xin, yin);
583:       ViennaCLWaitForGPU();
584:     } else if (xin->offloadmask == PETSC_OFFLOAD_BOTH) {
585:       /* if xin is valid in both places, see where yin is and copy there (because it's probably where we'll want to next use it) */
586:       if (yin->offloadmask == PETSC_OFFLOAD_CPU) {
587:         /* copy in CPU */
588:         VecCopy_SeqViennaCL_Private(xin, yin);
589:         ViennaCLWaitForGPU();
590:       } else if (yin->offloadmask == PETSC_OFFLOAD_GPU) {
591:         /* copy in GPU */
592:         VecViennaCLGetArrayRead(xin, &xgpu);
593:         VecViennaCLGetArrayWrite(yin, &ygpu);
594:         PetscLogGpuTimeBegin();
595:         try {
596:           *ygpu = *xgpu;
597:           ViennaCLWaitForGPU();
598:         } catch (std::exception const &ex) {
599:           SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
600:         }
601:         PetscLogGpuTimeEnd();
602:         VecViennaCLRestoreArrayRead(xin, &xgpu);
603:         VecViennaCLRestoreArrayWrite(yin, &ygpu);
604:       } else if (yin->offloadmask == PETSC_OFFLOAD_BOTH) {
605:         /* xin and yin are both valid in both places (or yin was unallocated before the earlier call to allocatecheck
606:            default to copy in GPU (this is an arbitrary choice) */
607:         VecViennaCLGetArrayRead(xin, &xgpu);
608:         VecViennaCLGetArrayWrite(yin, &ygpu);
609:         PetscLogGpuTimeBegin();
610:         try {
611:           *ygpu = *xgpu;
612:           ViennaCLWaitForGPU();
613:         } catch (std::exception const &ex) {
614:           SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
615:         }
616:         PetscLogGpuTimeEnd();
617:         VecViennaCLRestoreArrayRead(xin, &xgpu);
618:         VecViennaCLRestoreArrayWrite(yin, &ygpu);
619:       } else {
620:         VecCopy_SeqViennaCL_Private(xin, yin);
621:         ViennaCLWaitForGPU();
622:       }
623:     }
624:   }
625:   return 0;
626: }

628: PetscErrorCode VecSwap_SeqViennaCL(Vec xin, Vec yin)
629: {
630:   ViennaCLVector *xgpu, *ygpu;

632:   if (xin != yin && xin->map->n > 0) {
633:     VecViennaCLGetArray(xin, &xgpu);
634:     VecViennaCLGetArray(yin, &ygpu);
635:     PetscLogGpuTimeBegin();
636:     try {
637:       viennacl::swap(*xgpu, *ygpu);
638:       ViennaCLWaitForGPU();
639:     } catch (std::exception const &ex) {
640:       SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
641:     }
642:     PetscLogGpuTimeEnd();
643:     VecViennaCLRestoreArray(xin, &xgpu);
644:     VecViennaCLRestoreArray(yin, &ygpu);
645:   }
646:   return 0;
647: }

649: // y = alpha * x + beta * y
650: PetscErrorCode VecAXPBY_SeqViennaCL(Vec yin, PetscScalar alpha, PetscScalar beta, Vec xin)
651: {
652:   PetscScalar           a = alpha, b = beta;
653:   const ViennaCLVector *xgpu;
654:   ViennaCLVector       *ygpu;

656:   if (a == 0.0 && xin->map->n > 0) {
657:     VecScale_SeqViennaCL(yin, beta);
658:   } else if (b == 1.0 && xin->map->n > 0) {
659:     VecAXPY_SeqViennaCL(yin, alpha, xin);
660:   } else if (a == 1.0 && xin->map->n > 0) {
661:     VecAYPX_SeqViennaCL(yin, beta, xin);
662:   } else if (b == 0.0 && xin->map->n > 0) {
663:     VecViennaCLGetArrayRead(xin, &xgpu);
664:     VecViennaCLGetArray(yin, &ygpu);
665:     PetscLogGpuTimeBegin();
666:     try {
667:       *ygpu = *xgpu * alpha;
668:       ViennaCLWaitForGPU();
669:     } catch (std::exception const &ex) {
670:       SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
671:     }
672:     PetscLogGpuTimeEnd();
673:     PetscLogGpuFlops(xin->map->n);
674:     VecViennaCLRestoreArrayRead(xin, &xgpu);
675:     VecViennaCLRestoreArray(yin, &ygpu);
676:   } else if (xin->map->n > 0) {
677:     VecViennaCLGetArrayRead(xin, &xgpu);
678:     VecViennaCLGetArray(yin, &ygpu);
679:     PetscLogGpuTimeBegin();
680:     try {
681:       *ygpu = *xgpu * alpha + *ygpu * beta;
682:       ViennaCLWaitForGPU();
683:     } catch (std::exception const &ex) {
684:       SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
685:     }
686:     PetscLogGpuTimeEnd();
687:     VecViennaCLRestoreArrayRead(xin, &xgpu);
688:     VecViennaCLRestoreArray(yin, &ygpu);
689:     PetscLogGpuFlops(3.0 * xin->map->n);
690:   }
691:   return 0;
692: }

694: /* operation  z = alpha * x + beta *y + gamma *z*/
695: PetscErrorCode VecAXPBYPCZ_SeqViennaCL(Vec zin, PetscScalar alpha, PetscScalar beta, PetscScalar gamma, Vec xin, Vec yin)
696: {
697:   PetscInt              n = zin->map->n;
698:   const ViennaCLVector *xgpu, *ygpu;
699:   ViennaCLVector       *zgpu;

701:   VecViennaCLGetArrayRead(xin, &xgpu);
702:   VecViennaCLGetArrayRead(yin, &ygpu);
703:   VecViennaCLGetArray(zin, &zgpu);
704:   if (alpha == 0.0 && xin->map->n > 0) {
705:     PetscLogGpuTimeBegin();
706:     try {
707:       if (beta == 0.0) {
708:         *zgpu = gamma * *zgpu;
709:         ViennaCLWaitForGPU();
710:         PetscLogGpuFlops(1.0 * n);
711:       } else if (gamma == 0.0) {
712:         *zgpu = beta * *ygpu;
713:         ViennaCLWaitForGPU();
714:         PetscLogGpuFlops(1.0 * n);
715:       } else {
716:         *zgpu = beta * *ygpu + gamma * *zgpu;
717:         ViennaCLWaitForGPU();
718:         PetscLogGpuFlops(3.0 * n);
719:       }
720:     } catch (std::exception const &ex) {
721:       SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
722:     }
723:     PetscLogGpuTimeEnd();
724:     PetscLogGpuFlops(3.0 * n);
725:   } else if (beta == 0.0 && xin->map->n > 0) {
726:     PetscLogGpuTimeBegin();
727:     try {
728:       if (gamma == 0.0) {
729:         *zgpu = alpha * *xgpu;
730:         ViennaCLWaitForGPU();
731:         PetscLogGpuFlops(1.0 * n);
732:       } else {
733:         *zgpu = alpha * *xgpu + gamma * *zgpu;
734:         ViennaCLWaitForGPU();
735:         PetscLogGpuFlops(3.0 * n);
736:       }
737:     } catch (std::exception const &ex) {
738:       SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
739:     }
740:     PetscLogGpuTimeEnd();
741:   } else if (gamma == 0.0 && xin->map->n > 0) {
742:     PetscLogGpuTimeBegin();
743:     try {
744:       *zgpu = alpha * *xgpu + beta * *ygpu;
745:       ViennaCLWaitForGPU();
746:     } catch (std::exception const &ex) {
747:       SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
748:     }
749:     PetscLogGpuTimeEnd();
750:     PetscLogGpuFlops(3.0 * n);
751:   } else if (xin->map->n > 0) {
752:     PetscLogGpuTimeBegin();
753:     try {
754:       /* Split operation into two steps. This is not completely ideal, but avoids temporaries (which are far worse) */
755:       if (gamma != 1.0) *zgpu *= gamma;
756:       *zgpu += alpha * *xgpu + beta * *ygpu;
757:       ViennaCLWaitForGPU();
758:     } catch (std::exception const &ex) {
759:       SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
760:     }
761:     PetscLogGpuTimeEnd();
762:     VecViennaCLRestoreArray(zin, &zgpu);
763:     VecViennaCLRestoreArrayRead(xin, &xgpu);
764:     VecViennaCLRestoreArrayRead(yin, &ygpu);
765:     PetscLogGpuFlops(5.0 * n);
766:   }
767:   return 0;
768: }

770: PetscErrorCode VecPointwiseMult_SeqViennaCL(Vec win, Vec xin, Vec yin)
771: {
772:   PetscInt              n = win->map->n;
773:   const ViennaCLVector *xgpu, *ygpu;
774:   ViennaCLVector       *wgpu;

776:   if (xin->map->n > 0) {
777:     VecViennaCLGetArrayRead(xin, &xgpu);
778:     VecViennaCLGetArrayRead(yin, &ygpu);
779:     VecViennaCLGetArray(win, &wgpu);
780:     PetscLogGpuTimeBegin();
781:     try {
782:       *wgpu = viennacl::linalg::element_prod(*xgpu, *ygpu);
783:       ViennaCLWaitForGPU();
784:     } catch (std::exception const &ex) {
785:       SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
786:     }
787:     PetscLogGpuTimeEnd();
788:     VecViennaCLRestoreArrayRead(xin, &xgpu);
789:     VecViennaCLRestoreArrayRead(yin, &ygpu);
790:     VecViennaCLRestoreArray(win, &wgpu);
791:     PetscLogGpuFlops(n);
792:   }
793:   return 0;
794: }

796: PetscErrorCode VecNorm_SeqViennaCL(Vec xin, NormType type, PetscReal *z)
797: {
798:   PetscInt              n = xin->map->n;
799:   PetscBLASInt          bn;
800:   const ViennaCLVector *xgpu;

802:   if (xin->map->n > 0) {
803:     PetscBLASIntCast(n, &bn);
804:     VecViennaCLGetArrayRead(xin, &xgpu);
805:     if (type == NORM_2 || type == NORM_FROBENIUS) {
806:       PetscLogGpuTimeBegin();
807:       try {
808:         *z = viennacl::linalg::norm_2(*xgpu);
809:         ViennaCLWaitForGPU();
810:       } catch (std::exception const &ex) {
811:         SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
812:       }
813:       PetscLogGpuTimeEnd();
814:       PetscLogGpuFlops(PetscMax(2.0 * n - 1, 0.0));
815:     } else if (type == NORM_INFINITY) {
816:       PetscLogGpuTimeBegin();
817:       try {
818:         *z = viennacl::linalg::norm_inf(*xgpu);
819:         ViennaCLWaitForGPU();
820:       } catch (std::exception const &ex) {
821:         SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
822:       }
823:       PetscLogGpuTimeEnd();
824:     } else if (type == NORM_1) {
825:       PetscLogGpuTimeBegin();
826:       try {
827:         *z = viennacl::linalg::norm_1(*xgpu);
828:         ViennaCLWaitForGPU();
829:       } catch (std::exception const &ex) {
830:         SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
831:       }
832:       PetscLogGpuTimeEnd();
833:       PetscLogGpuFlops(PetscMax(n - 1.0, 0.0));
834:     } else if (type == NORM_1_AND_2) {
835:       PetscLogGpuTimeBegin();
836:       try {
837:         *z       = viennacl::linalg::norm_1(*xgpu);
838:         *(z + 1) = viennacl::linalg::norm_2(*xgpu);
839:         ViennaCLWaitForGPU();
840:       } catch (std::exception const &ex) {
841:         SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
842:       }
843:       PetscLogGpuTimeEnd();
844:       PetscLogGpuFlops(PetscMax(2.0 * n - 1, 0.0));
845:       PetscLogGpuFlops(PetscMax(n - 1.0, 0.0));
846:     }
847:     VecViennaCLRestoreArrayRead(xin, &xgpu);
848:   } else if (type == NORM_1_AND_2) {
849:     *z       = 0.0;
850:     *(z + 1) = 0.0;
851:   } else *z = 0.0;
852:   return 0;
853: }

855: PetscErrorCode VecSetRandom_SeqViennaCL(Vec xin, PetscRandom r)
856: {
857:   VecSetRandom_SeqViennaCL_Private(xin, r);
858:   xin->offloadmask = PETSC_OFFLOAD_CPU;
859:   return 0;
860: }

862: PetscErrorCode VecResetArray_SeqViennaCL(Vec vin)
863: {
865:   VecViennaCLCopyFromGPU(vin);
866:   VecResetArray_SeqViennaCL_Private(vin);
867:   vin->offloadmask = PETSC_OFFLOAD_CPU;
868:   return 0;
869: }

871: PetscErrorCode VecPlaceArray_SeqViennaCL(Vec vin, const PetscScalar *a)
872: {
874:   VecViennaCLCopyFromGPU(vin);
875:   VecPlaceArray_Seq(vin, a);
876:   vin->offloadmask = PETSC_OFFLOAD_CPU;
877:   return 0;
878: }

880: PetscErrorCode VecReplaceArray_SeqViennaCL(Vec vin, const PetscScalar *a)
881: {
883:   VecViennaCLCopyFromGPU(vin);
884:   VecReplaceArray_Seq(vin, a);
885:   vin->offloadmask = PETSC_OFFLOAD_CPU;
886:   return 0;
887: }

889: /*@C
890:    VecCreateSeqViennaCL - Creates a standard, sequential array-style vector.

892:    Collective

894:    Input Parameters:
895: +  comm - the communicator, should be PETSC_COMM_SELF
896: -  n - the vector length

898:    Output Parameter:
899: .  V - the vector

901:    Notes:
902:    Use VecDuplicate() or VecDuplicateVecs() to form additional vectors of the
903:    same type as an existing vector.

905:    Level: intermediate

907: .seealso: `VecCreateMPI()`, `VecCreate()`, `VecDuplicate()`, `VecDuplicateVecs()`, `VecCreateGhost()`
908: @*/
909: PetscErrorCode VecCreateSeqViennaCL(MPI_Comm comm, PetscInt n, Vec *v)
910: {
911:   VecCreate(comm, v);
912:   VecSetSizes(*v, n, n);
913:   VecSetType(*v, VECSEQVIENNACL);
914:   return 0;
915: }

917: /*@C
918:    VecCreateSeqViennaCLWithArray - Creates a viennacl sequential array-style vector,
919:    where the user provides the array space to store the vector values.

921:    Collective

923:    Input Parameters:
924: +  comm - the communicator, should be PETSC_COMM_SELF
925: .  bs - the block size
926: .  n - the vector length
927: -  array - viennacl array where the vector elements are to be stored.

929:    Output Parameter:
930: .  V - the vector

932:    Notes:
933:    Use VecDuplicate() or VecDuplicateVecs() to form additional vectors of the
934:    same type as an existing vector.

936:    If the user-provided array is NULL, then VecViennaCLPlaceArray() can be used
937:    at a later stage to SET the array for storing the vector values.

939:    PETSc does NOT free the array when the vector is destroyed via VecDestroy().
940:    The user should not free the array until the vector is destroyed.

942:    Level: intermediate

944: .seealso: `VecCreateMPIViennaCLWithArray()`, `VecCreate()`, `VecDuplicate()`, `VecDuplicateVecs()`,
945:           `VecCreateGhost()`, `VecCreateSeq()`, `VecCUDAPlaceArray()`, `VecCreateSeqWithArray()`,
946:           `VecCreateMPIWithArray()`
947: @*/
948: PETSC_EXTERN PetscErrorCode VecCreateSeqViennaCLWithArray(MPI_Comm comm, PetscInt bs, PetscInt n, const ViennaCLVector *array, Vec *V)
949: {
950:   PetscMPIInt size;

952:   VecCreate(comm, V);
953:   VecSetSizes(*V, n, n);
954:   VecSetBlockSize(*V, bs);
955:   MPI_Comm_size(comm, &size);
957:   VecCreate_SeqViennaCL_Private(*V, array);
958:   return 0;
959: }

961: /*@C
962:    VecCreateSeqViennaCLWithArrays - Creates a ViennaCL sequential vector, where
963:    the user provides the array space to store the vector values.

965:    Collective

967:    Input Parameters:
968: +  comm - the communicator, should be PETSC_COMM_SELF
969: .  bs - the block size
970: .  n - the vector length
971: -  cpuarray - CPU memory where the vector elements are to be stored.
972: -  viennaclvec - ViennaCL vector where the Vec entries are to be stored on the device.

974:    Output Parameter:
975: .  V - the vector

977:    Notes:
978:    If both cpuarray and viennaclvec are provided, the caller must ensure that
979:    the provided arrays have identical values.

981:    PETSc does NOT free the provided arrays when the vector is destroyed via
982:    VecDestroy(). The user should not free the array until the vector is
983:    destroyed.

985:    Level: intermediate

987: .seealso: `VecCreateMPIViennaCLWithArrays()`, `VecCreate()`, `VecCreateSeqWithArray()`,
988:           `VecViennaCLPlaceArray()`, `VecPlaceArray()`, `VecCreateSeqCUDAWithArrays()`,
989:           `VecViennaCLAllocateCheckHost()`
990: @*/
991: PetscErrorCode VecCreateSeqViennaCLWithArrays(MPI_Comm comm, PetscInt bs, PetscInt n, const PetscScalar cpuarray[], const ViennaCLVector *viennaclvec, Vec *V)
992: {
993:   PetscMPIInt size;


996:   MPI_Comm_size(comm, &size);

999:   // set V's viennaclvec to be viennaclvec, do not allocate memory on host yet.
1000:   VecCreateSeqViennaCLWithArray(comm, bs, n, viennaclvec, V);

1002:   if (cpuarray && viennaclvec) {
1003:     Vec_Seq *s        = (Vec_Seq *)((*V)->data);
1004:     s->array          = (PetscScalar *)cpuarray;
1005:     (*V)->offloadmask = PETSC_OFFLOAD_BOTH;
1006:   } else if (cpuarray) {
1007:     Vec_Seq *s        = (Vec_Seq *)((*V)->data);
1008:     s->array          = (PetscScalar *)cpuarray;
1009:     (*V)->offloadmask = PETSC_OFFLOAD_CPU;
1010:   } else if (viennaclvec) {
1011:     (*V)->offloadmask = PETSC_OFFLOAD_GPU;
1012:   } else {
1013:     (*V)->offloadmask = PETSC_OFFLOAD_UNALLOCATED;
1014:   }

1016:   return 0;
1017: }

1019: /*@C
1020:    VecViennaCLPlaceArray - Replace the viennacl vector in a Vec with
1021:    the one provided by the user. This is useful to avoid a copy.

1023:    Not Collective

1025:    Input Parameters:
1026: +  vec - the vector
1027: -  array - the ViennaCL vector

1029:    Notes:
1030:    You can return to the original viennacl vector with a call to
1031:    VecViennaCLResetArray() It is not possible to use VecViennaCLPlaceArray()
1032:    and VecPlaceArray() at the same time on the same vector.

1034:    Level: intermediate

1036: .seealso: `VecPlaceArray()`, `VecSetValues()`, `VecViennaCLResetArray()`,
1037:           `VecCUDAPlaceArray()`,

1039: @*/
1040: PETSC_EXTERN PetscErrorCode VecViennaCLPlaceArray(Vec vin, const ViennaCLVector *a)
1041: {
1043:   VecViennaCLCopyToGPU(vin);
1045:   ((Vec_Seq *)vin->data)->unplacedarray  = (PetscScalar *)((Vec_ViennaCL *)vin->spptr)->GPUarray; /* save previous GPU array so reset can bring it back */
1046:   ((Vec_ViennaCL *)vin->spptr)->GPUarray = (ViennaCLVector *)a;
1047:   vin->offloadmask                       = PETSC_OFFLOAD_GPU;
1048:   PetscObjectStateIncrease((PetscObject)vin);
1049:   return 0;
1050: }

1052: /*@C
1053:    VecViennaCLResetArray - Resets a vector to use its default memory. Call this
1054:    after the use of VecViennaCLPlaceArray().

1056:    Not Collective

1058:    Input Parameters:
1059: .  vec - the vector

1061:    Level: developer

1063: .seealso: `VecViennaCLPlaceArray()`, `VecResetArray()`, `VecCUDAResetArray()`, `VecPlaceArray()`
1064: @*/
1065: PETSC_EXTERN PetscErrorCode VecViennaCLResetArray(Vec vin)
1066: {
1068:   VecViennaCLCopyToGPU(vin);
1069:   ((Vec_ViennaCL *)vin->spptr)->GPUarray = (ViennaCLVector *)((Vec_Seq *)vin->data)->unplacedarray;
1070:   ((Vec_Seq *)vin->data)->unplacedarray  = 0;
1071:   vin->offloadmask                       = PETSC_OFFLOAD_GPU;
1072:   PetscObjectStateIncrease((PetscObject)vin);
1073:   return 0;
1074: }

1076: /*  VecDotNorm2 - computes the inner product of two vectors and the 2-norm squared of the second vector
1077:  *
1078:  *  Simply reuses VecDot() and VecNorm(). Performance improvement through custom kernel (kernel generator) possible.
1079:  */
1080: PetscErrorCode VecDotNorm2_SeqViennaCL(Vec s, Vec t, PetscScalar *dp, PetscScalar *nm)
1081: {
1082:   VecDot_SeqViennaCL(s, t, dp);
1083:   VecNorm_SeqViennaCL(t, NORM_2, nm);
1084:   *nm *= *nm; //squared norm required
1085:   return 0;
1086: }

1088: PetscErrorCode VecDuplicate_SeqViennaCL(Vec win, Vec *V)
1089: {
1090:   VecCreateSeqViennaCL(PetscObjectComm((PetscObject)win), win->map->n, V);
1091:   PetscLayoutReference(win->map, &(*V)->map);
1092:   PetscObjectListDuplicate(((PetscObject)win)->olist, &((PetscObject)(*V))->olist);
1093:   PetscFunctionListDuplicate(((PetscObject)win)->qlist, &((PetscObject)(*V))->qlist);
1094:   (*V)->stash.ignorenegidx = win->stash.ignorenegidx;
1095:   return 0;
1096: }

1098: PetscErrorCode VecDestroy_SeqViennaCL(Vec v)
1099: {
1100:   try {
1101:     if (v->spptr) {
1102:       delete ((Vec_ViennaCL *)v->spptr)->GPUarray_allocated;
1103:       delete (Vec_ViennaCL *)v->spptr;
1104:     }
1105:   } catch (char *ex) {
1106:     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex);
1107:   }
1108:   VecDestroy_SeqViennaCL_Private(v);
1109:   return 0;
1110: }

1112: PetscErrorCode VecGetArray_SeqViennaCL(Vec v, PetscScalar **a)
1113: {
1114:   if (v->offloadmask == PETSC_OFFLOAD_GPU) {
1115:     VecViennaCLCopyFromGPU(v);
1116:   } else {
1117:     VecViennaCLAllocateCheckHost(v);
1118:   }
1119:   *a = *((PetscScalar **)v->data);
1120:   return 0;
1121: }

1123: PetscErrorCode VecRestoreArray_SeqViennaCL(Vec v, PetscScalar **a)
1124: {
1125:   v->offloadmask = PETSC_OFFLOAD_CPU;
1126:   return 0;
1127: }

1129: PetscErrorCode VecGetArrayWrite_SeqViennaCL(Vec v, PetscScalar **a)
1130: {
1131:   VecViennaCLAllocateCheckHost(v);
1132:   *a = *((PetscScalar **)v->data);
1133:   return 0;
1134: }

1136: static PetscErrorCode VecBindToCPU_SeqAIJViennaCL(Vec V, PetscBool flg)
1137: {
1138:   V->boundtocpu = flg;
1139:   if (flg) {
1140:     VecViennaCLCopyFromGPU(V);
1141:     V->offloadmask          = PETSC_OFFLOAD_CPU; /* since the CPU code will likely change values in the vector */
1142:     V->ops->dot             = VecDot_Seq;
1143:     V->ops->norm            = VecNorm_Seq;
1144:     V->ops->tdot            = VecTDot_Seq;
1145:     V->ops->scale           = VecScale_Seq;
1146:     V->ops->copy            = VecCopy_Seq;
1147:     V->ops->set             = VecSet_Seq;
1148:     V->ops->swap            = VecSwap_Seq;
1149:     V->ops->axpy            = VecAXPY_Seq;
1150:     V->ops->axpby           = VecAXPBY_Seq;
1151:     V->ops->axpbypcz        = VecAXPBYPCZ_Seq;
1152:     V->ops->pointwisemult   = VecPointwiseMult_Seq;
1153:     V->ops->pointwisedivide = VecPointwiseDivide_Seq;
1154:     V->ops->setrandom       = VecSetRandom_Seq;
1155:     V->ops->dot_local       = VecDot_Seq;
1156:     V->ops->tdot_local      = VecTDot_Seq;
1157:     V->ops->norm_local      = VecNorm_Seq;
1158:     V->ops->mdot_local      = VecMDot_Seq;
1159:     V->ops->mtdot_local     = VecMTDot_Seq;
1160:     V->ops->maxpy           = VecMAXPY_Seq;
1161:     V->ops->mdot            = VecMDot_Seq;
1162:     V->ops->mtdot           = VecMTDot_Seq;
1163:     V->ops->aypx            = VecAYPX_Seq;
1164:     V->ops->waxpy           = VecWAXPY_Seq;
1165:     V->ops->dotnorm2        = NULL;
1166:     V->ops->placearray      = VecPlaceArray_Seq;
1167:     V->ops->replacearray    = VecReplaceArray_Seq;
1168:     V->ops->resetarray      = VecResetArray_Seq;
1169:     V->ops->duplicate       = VecDuplicate_Seq;
1170:   } else {
1171:     V->ops->dot             = VecDot_SeqViennaCL;
1172:     V->ops->norm            = VecNorm_SeqViennaCL;
1173:     V->ops->tdot            = VecTDot_SeqViennaCL;
1174:     V->ops->scale           = VecScale_SeqViennaCL;
1175:     V->ops->copy            = VecCopy_SeqViennaCL;
1176:     V->ops->set             = VecSet_SeqViennaCL;
1177:     V->ops->swap            = VecSwap_SeqViennaCL;
1178:     V->ops->axpy            = VecAXPY_SeqViennaCL;
1179:     V->ops->axpby           = VecAXPBY_SeqViennaCL;
1180:     V->ops->axpbypcz        = VecAXPBYPCZ_SeqViennaCL;
1181:     V->ops->pointwisemult   = VecPointwiseMult_SeqViennaCL;
1182:     V->ops->pointwisedivide = VecPointwiseDivide_SeqViennaCL;
1183:     V->ops->setrandom       = VecSetRandom_SeqViennaCL;
1184:     V->ops->dot_local       = VecDot_SeqViennaCL;
1185:     V->ops->tdot_local      = VecTDot_SeqViennaCL;
1186:     V->ops->norm_local      = VecNorm_SeqViennaCL;
1187:     V->ops->mdot_local      = VecMDot_SeqViennaCL;
1188:     V->ops->mtdot_local     = VecMTDot_SeqViennaCL;
1189:     V->ops->maxpy           = VecMAXPY_SeqViennaCL;
1190:     V->ops->mdot            = VecMDot_SeqViennaCL;
1191:     V->ops->mtdot           = VecMTDot_SeqViennaCL;
1192:     V->ops->aypx            = VecAYPX_SeqViennaCL;
1193:     V->ops->waxpy           = VecWAXPY_SeqViennaCL;
1194:     V->ops->dotnorm2        = VecDotNorm2_SeqViennaCL;
1195:     V->ops->placearray      = VecPlaceArray_SeqViennaCL;
1196:     V->ops->replacearray    = VecReplaceArray_SeqViennaCL;
1197:     V->ops->resetarray      = VecResetArray_SeqViennaCL;
1198:     V->ops->destroy         = VecDestroy_SeqViennaCL;
1199:     V->ops->duplicate       = VecDuplicate_SeqViennaCL;
1200:     V->ops->getarraywrite   = VecGetArrayWrite_SeqViennaCL;
1201:     V->ops->getarray        = VecGetArray_SeqViennaCL;
1202:     V->ops->restorearray    = VecRestoreArray_SeqViennaCL;
1203:   }
1204:   return 0;
1205: }

1207: PETSC_EXTERN PetscErrorCode VecCreate_SeqViennaCL(Vec V)
1208: {
1209:   PetscMPIInt size;

1211:   MPI_Comm_size(PetscObjectComm((PetscObject)V), &size);
1213:   VecCreate_Seq_Private(V, 0);
1214:   PetscObjectChangeTypeName((PetscObject)V, VECSEQVIENNACL);

1216:   VecBindToCPU_SeqAIJViennaCL(V, PETSC_FALSE);
1217:   V->ops->bindtocpu = VecBindToCPU_SeqAIJViennaCL;

1219:   VecViennaCLAllocateCheck(V);
1220:   VecSet_SeqViennaCL(V, 0.0);
1221:   return 0;
1222: }

1224: /*@C
1225:   VecViennaCLGetCLContext - Get the OpenCL context in which the Vec resides.

1227:   Caller should cast (*ctx) to (const cl_context). Caller is responsible for
1228:   invoking clReleaseContext().

1230:   Input Parameters:
1231: .  v    - the vector

1233:   Output Parameters:
1234: .  ctx - pointer to the underlying CL context

1236:   Level: intermediate

1238: .seealso: `VecViennaCLGetCLQueue()`, `VecViennaCLGetCLMemRead()`
1239: @*/
1240: PETSC_EXTERN PetscErrorCode VecViennaCLGetCLContext(Vec v, PETSC_UINTPTR_T *ctx)
1241: {
1242: #if !defined(PETSC_HAVE_OPENCL)
1243:   SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "PETSc must be configured with --with-opencl to get the associated cl_context.");
1244: #else

1247:   const ViennaCLVector *v_vcl;
1248:   VecViennaCLGetArrayRead(v, &v_vcl);
1249:   try {
1250:     viennacl::ocl::context vcl_ctx = v_vcl->handle().opencl_handle().context();
1251:     const cl_context ocl_ctx = vcl_ctx.handle().get();
1252:     clRetainContext(ocl_ctx);
1253:     *ctx = (PETSC_UINTPTR_T)(ocl_ctx);
1254:   } catch (std::exception const &ex) {
1255:     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
1256:   }

1258:   return 0;
1259: #endif
1260: }

1262: /*@C
1263:   VecViennaCLGetCLQueue - Get the OpenCL command queue to which all
1264:   operations of the Vec are enqueued.

1266:   Caller should cast (*queue) to (const cl_command_queue). Caller is
1267:   responsible for invoking clReleaseCommandQueue().

1269:   Input Parameters:
1270: .  v    - the vector

1272:   Output Parameters:
1273: .  ctx - pointer to the CL command queue

1275:   Level: intermediate

1277: .seealso: `VecViennaCLGetCLContext()`, `VecViennaCLGetCLMemRead()`
1278: @*/
1279: PETSC_EXTERN PetscErrorCode VecViennaCLGetCLQueue(Vec v, PETSC_UINTPTR_T *queue)
1280: {
1281: #if !defined(PETSC_HAVE_OPENCL)
1282:   SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "PETSc must be configured with --with-opencl to get the associated cl_command_queue.");
1283: #else
1285:   const ViennaCLVector *v_vcl;
1286:   VecViennaCLGetArrayRead(v, &v_vcl);
1287:   try {
1288:     viennacl::ocl::context vcl_ctx = v_vcl->handle().opencl_handle().context();
1289:     const viennacl::ocl::command_queue &vcl_queue = vcl_ctx.current_queue();
1290:     const cl_command_queue ocl_queue = vcl_queue.handle().get();
1291:     clRetainCommandQueue(ocl_queue);
1292:     *queue = (PETSC_UINTPTR_T)(ocl_queue);
1293:   } catch (std::exception const &ex) {
1294:     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
1295:   }

1297:   return 0;
1298: #endif
1299: }

1301: /*@C
1302:   VecViennaCLGetCLMemRead - Provides access to the the CL buffer inside a Vec.

1304:   Caller should cast (*mem) to (const cl_mem). Caller is responsible for
1305:   invoking clReleaseMemObject().

1307:   Input Parameters:
1308: .  v    - the vector

1310:   Output Parameters:
1311: .  mem - pointer to the device buffer

1313:   Level: intermediate

1315: .seealso: `VecViennaCLGetCLContext()`, `VecViennaCLGetCLMemWrite()`
1316: @*/
1317: PETSC_EXTERN PetscErrorCode VecViennaCLGetCLMemRead(Vec v, PETSC_UINTPTR_T *mem)
1318: {
1319: #if !defined(PETSC_HAVE_OPENCL)
1320:   SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "PETSc must be configured with --with-opencl to get a Vec's cl_mem");
1321: #else
1323:   const ViennaCLVector *v_vcl;
1324:   VecViennaCLGetArrayRead(v, &v_vcl);
1325:   try {
1326:     const cl_mem ocl_mem = v_vcl->handle().opencl_handle().get();
1327:     clRetainMemObject(ocl_mem);
1328:     *mem = (PETSC_UINTPTR_T)(ocl_mem);
1329:   } catch (std::exception const &ex) {
1330:     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
1331:   }
1332:   return 0;
1333: #endif
1334: }

1336: /*@C
1337:   VecViennaCLGetCLMemWrite - Provides access to the the CL buffer inside a Vec.

1339:   Caller should cast (*mem) to (const cl_mem). Caller is responsible for
1340:   invoking clReleaseMemObject().

1342:   The device pointer has to be released by calling
1343:   VecViennaCLRestoreCLMemWrite().  Upon restoring the vector data the data on
1344:   the host will be marked as out of date.  A subsequent access of the host data
1345:   will thus incur a data transfer from the device to the host.

1347:   Input Parameters:
1348: .  v    - the vector

1350:   Output Parameters:
1351: .  mem - pointer to the device buffer

1353:   Level: intermediate

1355: .seealso: `VecViennaCLGetCLContext()`, `VecViennaCLRestoreCLMemWrite()`
1356: @*/
1357: PETSC_EXTERN PetscErrorCode VecViennaCLGetCLMemWrite(Vec v, PETSC_UINTPTR_T *mem)
1358: {
1359: #if !defined(PETSC_HAVE_OPENCL)
1360:   SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "PETSc must be configured with --with-opencl to get a Vec's cl_mem");
1361: #else
1363:   ViennaCLVector *v_vcl;
1364:   VecViennaCLGetArrayWrite(v, &v_vcl);
1365:   try {
1366:     const cl_mem ocl_mem = v_vcl->handle().opencl_handle().get();
1367:     clRetainMemObject(ocl_mem);
1368:     *mem = (PETSC_UINTPTR_T)(ocl_mem);
1369:   } catch (std::exception const &ex) {
1370:     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
1371:   }

1373:   return 0;
1374: #endif
1375: }

1377: /*@C
1378:   VecViennaCLRestoreCLMemWrite - Restores a CL buffer pointer previously
1379:   acquired with VecViennaCLGetCLMemWrite().

1381:    This marks the host data as out of date.  Subsequent access to the
1382:    vector data on the host side with for instance VecGetArray() incurs a
1383:    data transfer.

1385:   Input Parameters:
1386: .  v    - the vector

1388:   Level: intermediate

1390: .seealso: `VecViennaCLGetCLContext()`, `VecViennaCLGetCLMemWrite()`
1391: @*/
1392: PETSC_EXTERN PetscErrorCode VecViennaCLRestoreCLMemWrite(Vec v)
1393: {
1394: #if !defined(PETSC_HAVE_OPENCL)
1395:   SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "PETSc must be configured with --with-opencl to restore a Vec's cl_mem");
1396: #else
1398:   VecViennaCLRestoreArrayWrite(v, PETSC_NULL);

1400:   return 0;
1401: #endif
1402: }

1404: /*@C
1405:   VecViennaCLGetCLMem - Provides access to the the CL buffer inside a Vec.

1407:   Caller should cast (*mem) to (const cl_mem). Caller is responsible for
1408:   invoking clReleaseMemObject().

1410:   The device pointer has to be released by calling VecViennaCLRestoreCLMem().
1411:   Upon restoring the vector data the data on the host will be marked as out of
1412:   date.  A subsequent access of the host data will thus incur a data transfer
1413:   from the device to the host.

1415:   Input Parameters:
1416: .  v    - the vector

1418:   Output Parameters:
1419: .  mem - pointer to the device buffer

1421:   Level: intermediate

1423: .seealso: `VecViennaCLGetCLContext()`, `VecViennaCLRestoreCLMem()`
1424: @*/
1425: PETSC_EXTERN PetscErrorCode VecViennaCLGetCLMem(Vec v, PETSC_UINTPTR_T *mem)
1426: {
1427: #if !defined(PETSC_HAVE_OPENCL)
1428:   SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "PETSc must be configured with --with-opencl to get a Vec's cl_mem");
1429: #else
1431:   ViennaCLVector *v_vcl;
1432:   VecViennaCLGetArray(v, &v_vcl);
1433:   try {
1434:     const cl_mem ocl_mem = v_vcl->handle().opencl_handle().get();
1435:     clRetainMemObject(ocl_mem);
1436:     *mem = (PETSC_UINTPTR_T)(ocl_mem);
1437:   } catch (std::exception const &ex) {
1438:     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
1439:   }

1441:   return 0;
1442: #endif
1443: }

1445: /*@C
1446:   VecViennaCLRestoreCLMem - Restores a CL buffer pointer previously
1447:   acquired with VecViennaCLGetCLMem().

1449:    This marks the host data as out of date. Subsequent access to the vector
1450:    data on the host side with for instance VecGetArray() incurs a data
1451:    transfer.

1453:   Input Parameters:
1454: .  v    - the vector

1456:   Level: intermediate

1458: .seealso: `VecViennaCLGetCLContext()`, `VecViennaCLGetCLMem()`
1459: @*/
1460: PETSC_EXTERN PetscErrorCode VecViennaCLRestoreCLMem(Vec v)
1461: {
1462: #if !defined(PETSC_HAVE_OPENCL)
1463:   SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "PETSc must be configured with --with-opencl to restore a Vec's cl_mem");
1464: #else
1466:   VecViennaCLRestoreArray(v, PETSC_NULL);

1468:   return 0;
1469: #endif
1470: }

1472: PetscErrorCode VecCreate_SeqViennaCL_Private(Vec V, const ViennaCLVector *array)
1473: {
1474:   Vec_ViennaCL *vecviennacl;
1475:   PetscMPIInt   size;

1477:   MPI_Comm_size(PetscObjectComm((PetscObject)V), &size);
1479:   VecCreate_Seq_Private(V, 0);
1480:   PetscObjectChangeTypeName((PetscObject)V, VECSEQVIENNACL);
1481:   VecBindToCPU_SeqAIJViennaCL(V, PETSC_FALSE);
1482:   V->ops->bindtocpu = VecBindToCPU_SeqAIJViennaCL;

1484:   if (array) {
1485:     if (!V->spptr) V->spptr = new Vec_ViennaCL;
1486:     vecviennacl                     = (Vec_ViennaCL *)V->spptr;
1487:     vecviennacl->GPUarray_allocated = 0;
1488:     vecviennacl->GPUarray           = (ViennaCLVector *)array;
1489:     V->offloadmask                  = PETSC_OFFLOAD_UNALLOCATED;
1490:   }

1492:   return 0;
1493: }