Actual source code: pch2opus.c
1: #include <petsc/private/pcimpl.h>
2: #include <petsc/private/matimpl.h>
3: #include <h2opusconf.h>
5: /* Use GPU only if H2OPUS is configured for GPU */
6: #if defined(PETSC_HAVE_CUDA) && defined(H2OPUS_USE_GPU)
7: #define PETSC_H2OPUS_USE_GPU
8: #endif
10: typedef struct {
11: Mat A;
12: Mat M;
13: PetscScalar s0;
15: /* sampler for Newton-Schultz */
16: Mat S;
17: PetscInt hyperorder;
18: Vec wns[4];
19: Mat wnsmat[4];
21: /* convergence testing */
22: Mat T;
23: Vec w;
24: PetscBool testMA;
26: /* Support for PCSetCoordinates */
27: PetscInt sdim;
28: PetscInt nlocc;
29: PetscReal *coords;
31: /* Newton-Schultz customization */
32: PetscInt maxits;
33: PetscReal rtol,atol;
34: PetscBool monitor;
35: PetscBool useapproximatenorms;
36: NormType normtype;
38: /* Used when pmat != MATH2OPUS */
39: PetscReal eta;
40: PetscInt leafsize;
41: PetscInt max_rank;
42: PetscInt bs;
43: PetscReal mrtol;
45: PetscBool boundtocpu;
46: } PC_H2OPUS;
48: PETSC_EXTERN PetscErrorCode MatNorm_H2OPUS(Mat,NormType,PetscReal*);
50: static PetscErrorCode PCReset_H2OPUS(PC pc)
51: {
52: PC_H2OPUS *pch2opus = (PC_H2OPUS*)pc->data;
54: pch2opus->sdim = 0;
55: pch2opus->nlocc = 0;
56: PetscFree(pch2opus->coords);
57: MatDestroy(&pch2opus->A);
58: MatDestroy(&pch2opus->M);
59: MatDestroy(&pch2opus->T);
60: VecDestroy(&pch2opus->w);
61: MatDestroy(&pch2opus->S);
62: VecDestroy(&pch2opus->wns[0]);
63: VecDestroy(&pch2opus->wns[1]);
64: VecDestroy(&pch2opus->wns[2]);
65: VecDestroy(&pch2opus->wns[3]);
66: MatDestroy(&pch2opus->wnsmat[0]);
67: MatDestroy(&pch2opus->wnsmat[1]);
68: MatDestroy(&pch2opus->wnsmat[2]);
69: MatDestroy(&pch2opus->wnsmat[3]);
70: return 0;
71: }
73: static PetscErrorCode PCSetCoordinates_H2OPUS(PC pc, PetscInt sdim, PetscInt nlocc, PetscReal *coords)
74: {
75: PC_H2OPUS *pch2opus = (PC_H2OPUS*)pc->data;
76: PetscBool reset = PETSC_TRUE;
78: if (pch2opus->sdim && sdim == pch2opus->sdim && nlocc == pch2opus->nlocc) {
79: PetscArraycmp(pch2opus->coords,coords,sdim*nlocc,&reset);
80: reset = (PetscBool)!reset;
81: }
82: MPIU_Allreduce(MPI_IN_PLACE,&reset,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));
83: if (reset) {
84: PCReset_H2OPUS(pc);
85: PetscMalloc1(sdim*nlocc,&pch2opus->coords);
86: PetscArraycpy(pch2opus->coords,coords,sdim*nlocc);
87: pch2opus->sdim = sdim;
88: pch2opus->nlocc = nlocc;
89: }
90: return 0;
91: }
93: static PetscErrorCode PCDestroy_H2OPUS(PC pc)
94: {
95: PCReset_H2OPUS(pc);
96: PetscObjectComposeFunction((PetscObject)pc,"PCSetCoordinates_C",NULL);
97: PetscFree(pc->data);
98: return 0;
99: }
101: static PetscErrorCode PCSetFromOptions_H2OPUS(PetscOptionItems *PetscOptionsObject,PC pc)
102: {
103: PC_H2OPUS *pch2opus = (PC_H2OPUS*)pc->data;
105: PetscOptionsHead(PetscOptionsObject,"H2OPUS options");
106: PetscOptionsInt("-pc_h2opus_maxits","Maximum number of iterations for Newton-Schultz",NULL,pch2opus->maxits,&pch2opus->maxits,NULL);
107: PetscOptionsBool("-pc_h2opus_monitor","Monitor Newton-Schultz convergence",NULL,pch2opus->monitor,&pch2opus->monitor,NULL);
108: PetscOptionsReal("-pc_h2opus_atol","Absolute tolerance",NULL,pch2opus->atol,&pch2opus->atol,NULL);
109: PetscOptionsReal("-pc_h2opus_rtol","Relative tolerance",NULL,pch2opus->rtol,&pch2opus->rtol,NULL);
110: PetscOptionsEnum("-pc_h2opus_norm_type","Norm type for convergence monitoring",NULL,NormTypes,(PetscEnum)pch2opus->normtype,(PetscEnum*)&pch2opus->normtype,NULL);
111: PetscOptionsInt("-pc_h2opus_hyperorder","Hyper power order of sampling",NULL,pch2opus->hyperorder,&pch2opus->hyperorder,NULL);
112: PetscOptionsInt("-pc_h2opus_leafsize","Leaf size when constructed from kernel",NULL,pch2opus->leafsize,&pch2opus->leafsize,NULL);
113: PetscOptionsReal("-pc_h2opus_eta","Admissibility condition tolerance",NULL,pch2opus->eta,&pch2opus->eta,NULL);
114: PetscOptionsInt("-pc_h2opus_maxrank","Maximum rank when constructed from matvecs",NULL,pch2opus->max_rank,&pch2opus->max_rank,NULL);
115: PetscOptionsInt("-pc_h2opus_samples","Number of samples to be taken concurrently when constructing from matvecs",NULL,pch2opus->bs,&pch2opus->bs,NULL);
116: PetscOptionsReal("-pc_h2opus_mrtol","Relative tolerance for construction from sampling",NULL,pch2opus->mrtol,&pch2opus->mrtol,NULL);
117: PetscOptionsTail();
118: return 0;
119: }
121: typedef struct {
122: Mat A;
123: Mat M;
124: Vec w;
125: } AAtCtx;
127: static PetscErrorCode MatMult_AAt(Mat A, Vec x, Vec y)
128: {
129: AAtCtx *aat;
131: MatShellGetContext(A,(void*)&aat);
132: /* MatMultTranspose(aat->M,x,aat->w); */
133: MatMultTranspose(aat->A,x,aat->w);
134: MatMult(aat->A,aat->w,y);
135: return 0;
136: }
138: static PetscErrorCode PCH2OpusSetUpInit(PC pc)
139: {
140: PC_H2OPUS *pch2opus = (PC_H2OPUS*)pc->data;
141: Mat A = pc->useAmat ? pc->mat : pc->pmat, AAt;
142: PetscInt M,m;
143: VecType vtype;
144: PetscReal n;
145: AAtCtx aat;
147: aat.A = A;
148: aat.M = pch2opus->M; /* unused so far */
149: MatCreateVecs(A,NULL,&aat.w);
150: MatGetSize(A,&M,NULL);
151: MatGetLocalSize(A,&m,NULL);
152: MatCreateShell(PetscObjectComm((PetscObject)A),m,m,M,M,&aat,&AAt);
153: MatBindToCPU(AAt,pch2opus->boundtocpu);
154: MatShellSetOperation(AAt,MATOP_MULT,(void (*)(void))MatMult_AAt);
155: MatShellSetOperation(AAt,MATOP_MULT_TRANSPOSE,(void (*)(void))MatMult_AAt);
156: MatShellSetOperation(AAt,MATOP_NORM,(void (*)(void))MatNorm_H2OPUS);
157: MatGetVecType(A,&vtype);
158: MatShellSetVecType(AAt,vtype);
159: MatNorm(AAt,NORM_1,&n);
160: pch2opus->s0 = 1./n;
161: VecDestroy(&aat.w);
162: MatDestroy(&AAt);
163: return 0;
164: }
166: static PetscErrorCode PCApplyKernel_H2OPUS(PC pc, Vec x, Vec y, PetscBool t)
167: {
168: PC_H2OPUS *pch2opus = (PC_H2OPUS*)pc->data;
170: if (t) MatMultTranspose(pch2opus->M,x,y);
171: else MatMult(pch2opus->M,x,y);
172: return 0;
173: }
175: static PetscErrorCode PCApplyMatKernel_H2OPUS(PC pc, Mat X, Mat Y, PetscBool t)
176: {
177: PC_H2OPUS *pch2opus = (PC_H2OPUS*)pc->data;
179: if (t) MatTransposeMatMult(pch2opus->M,X,MAT_REUSE_MATRIX,PETSC_DEFAULT,&Y);
180: else MatMatMult(pch2opus->M,X,MAT_REUSE_MATRIX,PETSC_DEFAULT,&Y);
181: return 0;
182: }
184: static PetscErrorCode PCApplyMat_H2OPUS(PC pc, Mat X, Mat Y)
185: {
186: PCApplyMatKernel_H2OPUS(pc,X,Y,PETSC_FALSE);
187: return 0;
188: }
190: static PetscErrorCode PCApplyTransposeMat_H2OPUS(PC pc, Mat X, Mat Y)
191: {
192: PCApplyMatKernel_H2OPUS(pc,X,Y,PETSC_TRUE);
193: return 0;
194: }
196: static PetscErrorCode PCApply_H2OPUS(PC pc, Vec x, Vec y)
197: {
198: PCApplyKernel_H2OPUS(pc,x,y,PETSC_FALSE);
199: return 0;
200: }
202: static PetscErrorCode PCApplyTranspose_H2OPUS(PC pc, Vec x, Vec y)
203: {
204: PCApplyKernel_H2OPUS(pc,x,y,PETSC_TRUE);
205: return 0;
206: }
208: /* used to test the norm of (M^-1 A - I) */
209: static PetscErrorCode MatMultKernel_MAmI(Mat M, Vec x, Vec y, PetscBool t)
210: {
211: PC pc;
212: Mat A;
213: PC_H2OPUS *pch2opus;
214: PetscBool sideleft = PETSC_TRUE;
216: MatShellGetContext(M,(void*)&pc);
217: pch2opus = (PC_H2OPUS*)pc->data;
218: if (!pch2opus->w) MatCreateVecs(pch2opus->M,&pch2opus->w,NULL);
219: A = pch2opus->A;
220: VecBindToCPU(pch2opus->w,pch2opus->boundtocpu);
221: if (t) {
222: if (sideleft) {
223: PCApplyTranspose_H2OPUS(pc,x,pch2opus->w);
224: MatMultTranspose(A,pch2opus->w,y);
225: } else {
226: MatMultTranspose(A,x,pch2opus->w);
227: PCApplyTranspose_H2OPUS(pc,pch2opus->w,y);
228: }
229: } else {
230: if (sideleft) {
231: MatMult(A,x,pch2opus->w);
232: PCApply_H2OPUS(pc,pch2opus->w,y);
233: } else {
234: PCApply_H2OPUS(pc,x,pch2opus->w);
235: MatMult(A,pch2opus->w,y);
236: }
237: }
238: if (!pch2opus->testMA) VecAXPY(y,-1.0,x);
239: return 0;
240: }
242: static PetscErrorCode MatMult_MAmI(Mat A, Vec x, Vec y)
243: {
244: MatMultKernel_MAmI(A,x,y,PETSC_FALSE);
245: return 0;
246: }
248: static PetscErrorCode MatMultTranspose_MAmI(Mat A, Vec x, Vec y)
249: {
250: MatMultKernel_MAmI(A,x,y,PETSC_TRUE);
251: return 0;
252: }
254: /* HyperPower kernel:
255: Y = R = x
256: for i = 1 . . . l - 1 do
257: R = (I - A * Xk) * R
258: Y = Y + R
259: Y = Xk * Y
260: */
261: static PetscErrorCode MatMultKernel_Hyper(Mat M, Vec x, Vec y, PetscBool t)
262: {
263: PC pc;
264: Mat A;
265: PC_H2OPUS *pch2opus;
267: MatShellGetContext(M,(void*)&pc);
268: pch2opus = (PC_H2OPUS*)pc->data;
269: A = pch2opus->A;
270: MatCreateVecs(pch2opus->M,pch2opus->wns[0] ? NULL : &pch2opus->wns[0],pch2opus->wns[1] ? NULL : &pch2opus->wns[1]);
271: MatCreateVecs(pch2opus->M,pch2opus->wns[2] ? NULL : &pch2opus->wns[2],pch2opus->wns[3] ? NULL : &pch2opus->wns[3]);
272: VecBindToCPU(pch2opus->wns[0],pch2opus->boundtocpu);
273: VecBindToCPU(pch2opus->wns[1],pch2opus->boundtocpu);
274: VecBindToCPU(pch2opus->wns[2],pch2opus->boundtocpu);
275: VecBindToCPU(pch2opus->wns[3],pch2opus->boundtocpu);
276: VecCopy(x,pch2opus->wns[0]);
277: VecCopy(x,pch2opus->wns[3]);
278: if (t) {
279: for (PetscInt i=0;i<pch2opus->hyperorder-1;i++) {
280: MatMultTranspose(A,pch2opus->wns[0],pch2opus->wns[1]);
281: PCApplyTranspose_H2OPUS(pc,pch2opus->wns[1],pch2opus->wns[2]);
282: VecAXPY(pch2opus->wns[0],-1.,pch2opus->wns[2]);
283: VecAXPY(pch2opus->wns[3], 1.,pch2opus->wns[0]);
284: }
285: PCApplyTranspose_H2OPUS(pc,pch2opus->wns[3],y);
286: } else {
287: for (PetscInt i=0;i<pch2opus->hyperorder-1;i++) {
288: PCApply_H2OPUS(pc,pch2opus->wns[0],pch2opus->wns[1]);
289: MatMult(A,pch2opus->wns[1],pch2opus->wns[2]);
290: VecAXPY(pch2opus->wns[0],-1.,pch2opus->wns[2]);
291: VecAXPY(pch2opus->wns[3], 1.,pch2opus->wns[0]);
292: }
293: PCApply_H2OPUS(pc,pch2opus->wns[3],y);
294: }
295: return 0;
296: }
298: static PetscErrorCode MatMult_Hyper(Mat M, Vec x, Vec y)
299: {
300: MatMultKernel_Hyper(M,x,y,PETSC_FALSE);
301: return 0;
302: }
304: static PetscErrorCode MatMultTranspose_Hyper(Mat M, Vec x, Vec y)
305: {
306: MatMultKernel_Hyper(M,x,y,PETSC_TRUE);
307: return 0;
308: }
310: /* Hyper power kernel, MatMat version */
311: static PetscErrorCode MatMatMultKernel_Hyper(Mat M, Mat X, Mat Y, PetscBool t)
312: {
313: PC pc;
314: Mat A;
315: PC_H2OPUS *pch2opus;
316: PetscInt i;
318: MatShellGetContext(M,(void*)&pc);
319: pch2opus = (PC_H2OPUS*)pc->data;
320: A = pch2opus->A;
321: if (pch2opus->wnsmat[0] && pch2opus->wnsmat[0]->cmap->N != X->cmap->N) {
322: MatDestroy(&pch2opus->wnsmat[0]);
323: MatDestroy(&pch2opus->wnsmat[1]);
324: }
325: if (!pch2opus->wnsmat[0]) {
326: MatDuplicate(X,MAT_SHARE_NONZERO_PATTERN,&pch2opus->wnsmat[0]);
327: MatDuplicate(Y,MAT_SHARE_NONZERO_PATTERN,&pch2opus->wnsmat[1]);
328: }
329: if (pch2opus->wnsmat[2] && pch2opus->wnsmat[2]->cmap->N != X->cmap->N) {
330: MatDestroy(&pch2opus->wnsmat[2]);
331: MatDestroy(&pch2opus->wnsmat[3]);
332: }
333: if (!pch2opus->wnsmat[2]) {
334: MatDuplicate(X,MAT_SHARE_NONZERO_PATTERN,&pch2opus->wnsmat[2]);
335: MatDuplicate(Y,MAT_SHARE_NONZERO_PATTERN,&pch2opus->wnsmat[3]);
336: }
337: MatCopy(X,pch2opus->wnsmat[0],SAME_NONZERO_PATTERN);
338: MatCopy(X,pch2opus->wnsmat[3],SAME_NONZERO_PATTERN);
339: if (t) {
340: for (i=0;i<pch2opus->hyperorder-1;i++) {
341: MatTransposeMatMult(A,pch2opus->wnsmat[0],MAT_REUSE_MATRIX,PETSC_DEFAULT,&pch2opus->wnsmat[1]);
342: PCApplyTransposeMat_H2OPUS(pc,pch2opus->wnsmat[1],pch2opus->wnsmat[2]);
343: MatAXPY(pch2opus->wnsmat[0],-1.,pch2opus->wnsmat[2],SAME_NONZERO_PATTERN);
344: MatAXPY(pch2opus->wnsmat[3],1.,pch2opus->wnsmat[0],SAME_NONZERO_PATTERN);
345: }
346: PCApplyTransposeMat_H2OPUS(pc,pch2opus->wnsmat[3],Y);
347: } else {
348: for (i=0;i<pch2opus->hyperorder-1;i++) {
349: PCApplyMat_H2OPUS(pc,pch2opus->wnsmat[0],pch2opus->wnsmat[1]);
350: MatMatMult(A,pch2opus->wnsmat[1],MAT_REUSE_MATRIX,PETSC_DEFAULT,&pch2opus->wnsmat[2]);
351: MatAXPY(pch2opus->wnsmat[0],-1.,pch2opus->wnsmat[2],SAME_NONZERO_PATTERN);
352: MatAXPY(pch2opus->wnsmat[3],1.,pch2opus->wnsmat[0],SAME_NONZERO_PATTERN);
353: }
354: PCApplyMat_H2OPUS(pc,pch2opus->wnsmat[3],Y);
355: }
356: return 0;
357: }
359: static PetscErrorCode MatMatMultNumeric_Hyper(Mat M, Mat X, Mat Y,void *ctx)
360: {
361: MatMatMultKernel_Hyper(M,X,Y,PETSC_FALSE);
362: return 0;
363: }
365: /* Basic Newton-Schultz sampler: (2 * I - M * A)*M */
366: static PetscErrorCode MatMultKernel_NS(Mat M, Vec x, Vec y, PetscBool t)
367: {
368: PC pc;
369: Mat A;
370: PC_H2OPUS *pch2opus;
372: MatShellGetContext(M,(void*)&pc);
373: pch2opus = (PC_H2OPUS*)pc->data;
374: A = pch2opus->A;
375: MatCreateVecs(pch2opus->M,pch2opus->wns[0] ? NULL : &pch2opus->wns[0],pch2opus->wns[1] ? NULL : &pch2opus->wns[1]);
376: VecBindToCPU(pch2opus->wns[0],pch2opus->boundtocpu);
377: VecBindToCPU(pch2opus->wns[1],pch2opus->boundtocpu);
378: if (t) {
379: PCApplyTranspose_H2OPUS(pc,x,y);
380: MatMultTranspose(A,y,pch2opus->wns[1]);
381: PCApplyTranspose_H2OPUS(pc,pch2opus->wns[1],pch2opus->wns[0]);
382: VecAXPBY(y,-1.,2.,pch2opus->wns[0]);
383: } else {
384: PCApply_H2OPUS(pc,x,y);
385: MatMult(A,y,pch2opus->wns[0]);
386: PCApply_H2OPUS(pc,pch2opus->wns[0],pch2opus->wns[1]);
387: VecAXPBY(y,-1.,2.,pch2opus->wns[1]);
388: }
389: return 0;
390: }
392: static PetscErrorCode MatMult_NS(Mat M, Vec x, Vec y)
393: {
394: MatMultKernel_NS(M,x,y,PETSC_FALSE);
395: return 0;
396: }
398: static PetscErrorCode MatMultTranspose_NS(Mat M, Vec x, Vec y)
399: {
400: MatMultKernel_NS(M,x,y,PETSC_TRUE);
401: return 0;
402: }
404: /* Basic Newton-Schultz sampler: (2 * I - M * A)*M, MatMat version */
405: static PetscErrorCode MatMatMultKernel_NS(Mat M, Mat X, Mat Y, PetscBool t)
406: {
407: PC pc;
408: Mat A;
409: PC_H2OPUS *pch2opus;
411: MatShellGetContext(M,(void*)&pc);
412: pch2opus = (PC_H2OPUS*)pc->data;
413: A = pch2opus->A;
414: if (pch2opus->wnsmat[0] && pch2opus->wnsmat[0]->cmap->N != X->cmap->N) {
415: MatDestroy(&pch2opus->wnsmat[0]);
416: MatDestroy(&pch2opus->wnsmat[1]);
417: }
418: if (!pch2opus->wnsmat[0]) {
419: MatDuplicate(X,MAT_SHARE_NONZERO_PATTERN,&pch2opus->wnsmat[0]);
420: MatDuplicate(Y,MAT_SHARE_NONZERO_PATTERN,&pch2opus->wnsmat[1]);
421: }
422: if (t) {
423: PCApplyTransposeMat_H2OPUS(pc,X,Y);
424: MatTransposeMatMult(A,Y,MAT_REUSE_MATRIX,PETSC_DEFAULT,&pch2opus->wnsmat[1]);
425: PCApplyTransposeMat_H2OPUS(pc,pch2opus->wnsmat[1],pch2opus->wnsmat[0]);
426: MatScale(Y,2.);
427: MatAXPY(Y,-1.,pch2opus->wnsmat[0],SAME_NONZERO_PATTERN);
428: } else {
429: PCApplyMat_H2OPUS(pc,X,Y);
430: MatMatMult(A,Y,MAT_REUSE_MATRIX,PETSC_DEFAULT,&pch2opus->wnsmat[0]);
431: PCApplyMat_H2OPUS(pc,pch2opus->wnsmat[0],pch2opus->wnsmat[1]);
432: MatScale(Y,2.);
433: MatAXPY(Y,-1.,pch2opus->wnsmat[1],SAME_NONZERO_PATTERN);
434: }
435: return 0;
436: }
438: static PetscErrorCode MatMatMultNumeric_NS(Mat M, Mat X, Mat Y, void *ctx)
439: {
440: MatMatMultKernel_NS(M,X,Y,PETSC_FALSE);
441: return 0;
442: }
444: static PetscErrorCode PCH2OpusSetUpSampler_Private(PC pc)
445: {
446: PC_H2OPUS *pch2opus = (PC_H2OPUS*)pc->data;
447: Mat A = pc->useAmat ? pc->mat : pc->pmat;
449: if (!pch2opus->S) {
450: PetscInt M,N,m,n;
452: MatGetSize(A,&M,&N);
453: MatGetLocalSize(A,&m,&n);
454: MatCreateShell(PetscObjectComm((PetscObject)A),m,n,M,N,pc,&pch2opus->S);
455: MatSetBlockSizesFromMats(pch2opus->S,A,A);
456: #if defined(PETSC_H2OPUS_USE_GPU)
457: MatShellSetVecType(pch2opus->S,VECCUDA);
458: #endif
459: }
460: if (pch2opus->hyperorder >= 2) {
461: MatShellSetOperation(pch2opus->S,MATOP_MULT,(void (*)(void))MatMult_Hyper);
462: MatShellSetOperation(pch2opus->S,MATOP_MULT_TRANSPOSE,(void (*)(void))MatMultTranspose_Hyper);
463: MatShellSetMatProductOperation(pch2opus->S,MATPRODUCT_AB,NULL,MatMatMultNumeric_Hyper,NULL,MATDENSE,MATDENSE);
464: MatShellSetMatProductOperation(pch2opus->S,MATPRODUCT_AB,NULL,MatMatMultNumeric_Hyper,NULL,MATDENSECUDA,MATDENSECUDA);
465: } else {
466: MatShellSetOperation(pch2opus->S,MATOP_MULT,(void (*)(void))MatMult_NS);
467: MatShellSetOperation(pch2opus->S,MATOP_MULT_TRANSPOSE,(void (*)(void))MatMultTranspose_NS);
468: MatShellSetMatProductOperation(pch2opus->S,MATPRODUCT_AB,NULL,MatMatMultNumeric_NS,NULL,MATDENSE,MATDENSE);
469: MatShellSetMatProductOperation(pch2opus->S,MATPRODUCT_AB,NULL,MatMatMultNumeric_NS,NULL,MATDENSECUDA,MATDENSECUDA);
470: }
471: MatPropagateSymmetryOptions(A,pch2opus->S);
472: MatBindToCPU(pch2opus->S,pch2opus->boundtocpu);
473: /* XXX */
474: MatSetOption(pch2opus->S,MAT_SYMMETRIC,PETSC_TRUE);
475: return 0;
476: }
478: static PetscErrorCode PCSetUp_H2OPUS(PC pc)
479: {
480: PC_H2OPUS *pch2opus = (PC_H2OPUS*)pc->data;
481: Mat A = pc->useAmat ? pc->mat : pc->pmat;
482: NormType norm = pch2opus->normtype;
483: PetscReal initerr = 0.0,err;
484: PetscReal initerrMA = 0.0,errMA;
485: PetscBool ish2opus;
487: if (!pch2opus->T) {
488: PetscInt M,N,m,n;
489: const char *prefix;
491: PCGetOptionsPrefix(pc,&prefix);
492: MatGetSize(A,&M,&N);
493: MatGetLocalSize(A,&m,&n);
494: MatCreateShell(PetscObjectComm((PetscObject)pc->pmat),m,n,M,N,pc,&pch2opus->T);
495: MatSetBlockSizesFromMats(pch2opus->T,A,A);
496: MatShellSetOperation(pch2opus->T,MATOP_MULT,(void (*)(void))MatMult_MAmI);
497: MatShellSetOperation(pch2opus->T,MATOP_MULT_TRANSPOSE,(void (*)(void))MatMultTranspose_MAmI);
498: MatShellSetOperation(pch2opus->T,MATOP_NORM,(void (*)(void))MatNorm_H2OPUS);
499: #if defined(PETSC_H2OPUS_USE_GPU)
500: MatShellSetVecType(pch2opus->T,VECCUDA);
501: #endif
502: PetscLogObjectParent((PetscObject)pc,(PetscObject)pch2opus->T);
503: MatSetOptionsPrefix(pch2opus->T,prefix);
504: MatAppendOptionsPrefix(pch2opus->T,"pc_h2opus_est_");
505: }
506: MatDestroy(&pch2opus->A);
507: PetscObjectTypeCompare((PetscObject)A,MATH2OPUS,&ish2opus);
508: if (ish2opus) {
509: PetscObjectReference((PetscObject)A);
510: pch2opus->A = A;
511: } else {
512: const char *prefix;
513: MatCreateH2OpusFromMat(A,pch2opus->sdim,pch2opus->coords,PETSC_FALSE,pch2opus->eta,pch2opus->leafsize,pch2opus->max_rank,pch2opus->bs,pch2opus->mrtol,&pch2opus->A);
514: /* XXX */
515: MatSetOption(pch2opus->A,MAT_SYMMETRIC,PETSC_TRUE);
516: PCGetOptionsPrefix(pc,&prefix);
517: MatSetOptionsPrefix(pch2opus->A,prefix);
518: MatAppendOptionsPrefix(pch2opus->A,"pc_h2opus_init_");
519: MatSetFromOptions(pch2opus->A);
520: MatAssemblyBegin(pch2opus->A,MAT_FINAL_ASSEMBLY);
521: MatAssemblyEnd(pch2opus->A,MAT_FINAL_ASSEMBLY);
522: /* XXX */
523: MatSetOption(pch2opus->A,MAT_SYMMETRIC,PETSC_TRUE);
524: }
525: #if defined(PETSC_H2OPUS_USE_GPU)
526: pch2opus->boundtocpu = pch2opus->A->boundtocpu;
527: #endif
528: MatBindToCPU(pch2opus->T,pch2opus->boundtocpu);
529: if (pch2opus->M) { /* see if we can reuse M as initial guess */
530: PetscReal reuse;
532: MatBindToCPU(pch2opus->M,pch2opus->boundtocpu);
533: MatNorm(pch2opus->T,norm,&reuse);
534: if (reuse >= 1.0) MatDestroy(&pch2opus->M);
535: }
536: if (!pch2opus->M) {
537: const char *prefix;
538: MatDuplicate(pch2opus->A,MAT_COPY_VALUES,&pch2opus->M);
539: PCGetOptionsPrefix(pc,&prefix);
540: MatSetOptionsPrefix(pch2opus->M,prefix);
541: MatAppendOptionsPrefix(pch2opus->M,"pc_h2opus_inv_");
542: MatSetFromOptions(pch2opus->M);
543: PCH2OpusSetUpInit(pc);
544: MatScale(pch2opus->M,pch2opus->s0);
545: }
546: /* A and M have the same h2 matrix structure, save on reordering routines */
547: MatH2OpusSetNativeMult(pch2opus->A,PETSC_TRUE);
548: MatH2OpusSetNativeMult(pch2opus->M,PETSC_TRUE);
549: if (norm == NORM_1 || norm == NORM_2 || norm == NORM_INFINITY) {
550: MatNorm(pch2opus->T,norm,&initerr);
551: pch2opus->testMA = PETSC_TRUE;
552: MatNorm(pch2opus->T,norm,&initerrMA);
553: pch2opus->testMA = PETSC_FALSE;
554: }
555: if (PetscIsInfOrNanReal(initerr)) pc->failedreason = PC_SETUP_ERROR;
556: err = initerr;
557: errMA = initerrMA;
558: if (pch2opus->monitor) {
559: PetscPrintf(PetscObjectComm((PetscObject)pc),"%D: ||M*A - I|| NORM%s abs %g rel %g\n",0,NormTypes[norm],(double)err,(double)(err/initerr));
560: PetscPrintf(PetscObjectComm((PetscObject)pc),"%D: ||M*A|| NORM%s abs %g rel %g\n",0,NormTypes[norm],(double)errMA,(double)(errMA/initerrMA));
561: }
562: if (initerr > pch2opus->atol && !pc->failedreason) {
563: PetscInt i;
565: PCH2OpusSetUpSampler_Private(pc);
566: for (i = 0; i < pch2opus->maxits; i++) {
567: Mat M;
568: const char* prefix;
570: MatDuplicate(pch2opus->M,MAT_SHARE_NONZERO_PATTERN,&M);
571: MatGetOptionsPrefix(pch2opus->M,&prefix);
572: MatSetOptionsPrefix(M,prefix);
573: MatH2OpusSetSamplingMat(M,pch2opus->S,PETSC_DECIDE,PETSC_DECIDE);
574: MatSetFromOptions(M);
575: MatH2OpusSetNativeMult(M,PETSC_TRUE);
576: MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);
577: MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);
579: MatDestroy(&pch2opus->M);
580: pch2opus->M = M;
581: if (norm == NORM_1 || norm == NORM_2 || norm == NORM_INFINITY) {
582: MatNorm(pch2opus->T,norm,&err);
583: pch2opus->testMA = PETSC_TRUE;
584: MatNorm(pch2opus->T,norm,&errMA);
585: pch2opus->testMA = PETSC_FALSE;
586: }
587: if (pch2opus->monitor) {
588: PetscPrintf(PetscObjectComm((PetscObject)pc),"%D: ||M*A - I|| NORM%s abs %g rel %g\n",i+1,NormTypes[norm],(double)err,(double)(err/initerr));
589: PetscPrintf(PetscObjectComm((PetscObject)pc),"%D: ||M*A|| NORM%s abs %g rel %g\n",i+1,NormTypes[norm],(double)errMA,(double)(errMA/initerrMA));
590: }
591: if (PetscIsInfOrNanReal(err)) pc->failedreason = PC_SETUP_ERROR;
592: if (err < pch2opus->atol || err < pch2opus->rtol*initerr || pc->failedreason) break;
593: }
594: }
595: /* cleanup setup workspace */
596: MatH2OpusSetNativeMult(pch2opus->A,PETSC_FALSE);
597: MatH2OpusSetNativeMult(pch2opus->M,PETSC_FALSE);
598: VecDestroy(&pch2opus->wns[0]);
599: VecDestroy(&pch2opus->wns[1]);
600: VecDestroy(&pch2opus->wns[2]);
601: VecDestroy(&pch2opus->wns[3]);
602: MatDestroy(&pch2opus->wnsmat[0]);
603: MatDestroy(&pch2opus->wnsmat[1]);
604: MatDestroy(&pch2opus->wnsmat[2]);
605: MatDestroy(&pch2opus->wnsmat[3]);
606: return 0;
607: }
609: static PetscErrorCode PCView_H2OPUS(PC pc, PetscViewer viewer)
610: {
611: PC_H2OPUS *pch2opus = (PC_H2OPUS*)pc->data;
612: PetscBool isascii;
614: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&isascii);
615: if (isascii) {
616: if (pch2opus->A && pch2opus->A != pc->mat && pch2opus->A != pc->pmat) {
617: PetscViewerASCIIPrintf(viewer,"Initial approximation matrix\n");
618: PetscViewerASCIIPushTab(viewer);
619: PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_INFO_DETAIL);
620: MatView(pch2opus->A,viewer);
621: PetscViewerPopFormat(viewer);
622: PetscViewerASCIIPopTab(viewer);
623: }
624: if (pch2opus->M) {
625: PetscViewerASCIIPrintf(viewer,"Inner matrix constructed\n");
626: PetscViewerASCIIPushTab(viewer);
627: PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_INFO_DETAIL);
628: MatView(pch2opus->M,viewer);
629: PetscViewerPopFormat(viewer);
630: PetscViewerASCIIPopTab(viewer);
631: }
632: PetscViewerASCIIPrintf(viewer,"Initial scaling: %g\n",pch2opus->s0);
633: }
634: return 0;
635: }
637: PETSC_EXTERN PetscErrorCode PCCreate_H2OPUS(PC pc)
638: {
639: PC_H2OPUS *pch2opus;
641: PetscNewLog(pc,&pch2opus);
642: pc->data = (void*)pch2opus;
644: pch2opus->atol = 1.e-2;
645: pch2opus->rtol = 1.e-6;
646: pch2opus->maxits = 50;
647: pch2opus->hyperorder = 1; /* defaults to basic NewtonSchultz */
648: pch2opus->normtype = NORM_2;
650: /* these are needed when we are sampling the pmat */
651: pch2opus->eta = PETSC_DECIDE;
652: pch2opus->leafsize = PETSC_DECIDE;
653: pch2opus->max_rank = PETSC_DECIDE;
654: pch2opus->bs = PETSC_DECIDE;
655: pch2opus->mrtol = PETSC_DECIDE;
656: #if defined(PETSC_H2OPUS_USE_GPU)
657: pch2opus->boundtocpu = PETSC_FALSE;
658: #else
659: pch2opus->boundtocpu = PETSC_TRUE;
660: #endif
661: pc->ops->destroy = PCDestroy_H2OPUS;
662: pc->ops->setup = PCSetUp_H2OPUS;
663: pc->ops->apply = PCApply_H2OPUS;
664: pc->ops->matapply = PCApplyMat_H2OPUS;
665: pc->ops->applytranspose = PCApplyTranspose_H2OPUS;
666: pc->ops->reset = PCReset_H2OPUS;
667: pc->ops->setfromoptions = PCSetFromOptions_H2OPUS;
668: pc->ops->view = PCView_H2OPUS;
670: PetscObjectComposeFunction((PetscObject)pc,"PCSetCoordinates_C",PCSetCoordinates_H2OPUS);
671: return 0;
672: }