Actual source code: fdda.c


  2: #include <petsc/private/dmdaimpl.h>
  3: #include <petscmat.h>
  4: #include <petscbt.h>

  6: extern PetscErrorCode DMCreateColoring_DA_1d_MPIAIJ(DM, ISColoringType, ISColoring *);
  7: extern PetscErrorCode DMCreateColoring_DA_2d_MPIAIJ(DM, ISColoringType, ISColoring *);
  8: extern PetscErrorCode DMCreateColoring_DA_2d_5pt_MPIAIJ(DM, ISColoringType, ISColoring *);
  9: extern PetscErrorCode DMCreateColoring_DA_3d_MPIAIJ(DM, ISColoringType, ISColoring *);

 11: /*
 12:    For ghost i that may be negative or greater than the upper bound this
 13:   maps it into the 0:m-1 range using periodicity
 14: */
 15: #define SetInRange(i, m) ((i < 0) ? m + i : ((i >= m) ? i - m : i))

 17: static PetscErrorCode DMDASetBlockFills_Private(const PetscInt *dfill, PetscInt w, PetscInt **rfill)
 18: {
 19:   PetscInt i, j, nz, *fill;

 21:   if (!dfill) return 0;

 23:   /* count number nonzeros */
 24:   nz = 0;
 25:   for (i = 0; i < w; i++) {
 26:     for (j = 0; j < w; j++) {
 27:       if (dfill[w * i + j]) nz++;
 28:     }
 29:   }
 30:   PetscMalloc1(nz + w + 1, &fill);
 31:   /* construct modified CSR storage of nonzero structure */
 32:   /*  fill[0 -- w] marks starts of each row of column indices (and end of last row)
 33:    so fill[1] - fill[0] gives number of nonzeros in first row etc */
 34:   nz = w + 1;
 35:   for (i = 0; i < w; i++) {
 36:     fill[i] = nz;
 37:     for (j = 0; j < w; j++) {
 38:       if (dfill[w * i + j]) {
 39:         fill[nz] = j;
 40:         nz++;
 41:       }
 42:     }
 43:   }
 44:   fill[w] = nz;

 46:   *rfill = fill;
 47:   return 0;
 48: }

 50: static PetscErrorCode DMDASetBlockFillsSparse_Private(const PetscInt *dfillsparse, PetscInt w, PetscInt **rfill)
 51: {
 52:   PetscInt nz;

 54:   if (!dfillsparse) return 0;

 56:   /* Determine number of non-zeros */
 57:   nz = (dfillsparse[w] - w - 1);

 59:   /* Allocate space for our copy of the given sparse matrix representation. */
 60:   PetscMalloc1(nz + w + 1, rfill);
 61:   PetscArraycpy(*rfill, dfillsparse, nz + w + 1);
 62:   return 0;
 63: }

 65: static PetscErrorCode DMDASetBlockFills_Private2(DM_DA *dd)
 66: {
 67:   PetscInt i, k, cnt = 1;


 70:   /* ofillcount tracks the columns of ofill that have any nonzero in thems; the value in each location is the number of
 71:    columns to the left with any nonzeros in them plus 1 */
 72:   PetscCalloc1(dd->w, &dd->ofillcols);
 73:   for (i = 0; i < dd->w; i++) {
 74:     for (k = dd->ofill[i]; k < dd->ofill[i + 1]; k++) dd->ofillcols[dd->ofill[k]] = 1;
 75:   }
 76:   for (i = 0; i < dd->w; i++) {
 77:     if (dd->ofillcols[i]) dd->ofillcols[i] = cnt++;
 78:   }
 79:   return 0;
 80: }

 82: /*@
 83:     DMDASetBlockFills - Sets the fill pattern in each block for a multi-component problem
 84:     of the matrix returned by DMCreateMatrix().

 86:     Logically Collective on da

 88:     Input Parameters:
 89: +   da - the distributed array
 90: .   dfill - the fill pattern in the diagonal block (may be NULL, means use dense block)
 91: -   ofill - the fill pattern in the off-diagonal blocks

 93:     Level: developer

 95:     Notes:
 96:     This only makes sense when you are doing multicomponent problems but using the
 97:        MPIAIJ matrix format

 99:            The format for dfill and ofill is a 2 dimensional dof by dof matrix with 1 entries
100:        representing coupling and 0 entries for missing coupling. For example
101: $             dfill[9] = {1, 0, 0,
102: $                         1, 1, 0,
103: $                         0, 1, 1}
104:        means that row 0 is coupled with only itself in the diagonal block, row 1 is coupled with
105:        itself and row 0 (in the diagonal block) and row 2 is coupled with itself and row 1 (in the
106:        diagonal block).

108:      DMDASetGetMatrix() allows you to provide general code for those more complicated nonzero patterns then
109:      can be represented in the dfill, ofill format

111:    Contributed by Glenn Hammond

113: .seealso `DMCreateMatrix()`, `DMDASetGetMatrix()`, `DMSetMatrixPreallocateOnly()`

115: @*/
116: PetscErrorCode DMDASetBlockFills(DM da, const PetscInt *dfill, const PetscInt *ofill)
117: {
118:   DM_DA *dd = (DM_DA *)da->data;

120:   /* save the given dfill and ofill information */
121:   DMDASetBlockFills_Private(dfill, dd->w, &dd->dfill);
122:   DMDASetBlockFills_Private(ofill, dd->w, &dd->ofill);

124:   /* count nonzeros in ofill columns */
125:   DMDASetBlockFills_Private2(dd);
126:   return 0;
127: }

129: /*@
130:     DMDASetBlockFillsSparse - Sets the fill pattern in each block for a multi-component problem
131:     of the matrix returned by DMCreateMatrix(), using sparse representations
132:     of fill patterns.

134:     Logically Collective on da

136:     Input Parameters:
137: +   da - the distributed array
138: .   dfill - the sparse fill pattern in the diagonal block (may be NULL, means use dense block)
139: -   ofill - the sparse fill pattern in the off-diagonal blocks

141:     Level: developer

143:     Notes: This only makes sense when you are doing multicomponent problems but using the
144:        MPIAIJ matrix format

146:            The format for dfill and ofill is a sparse representation of a
147:            dof-by-dof matrix with 1 entries representing coupling and 0 entries
148:            for missing coupling.  The sparse representation is a 1 dimensional
149:            array of length nz + dof + 1, where nz is the number of non-zeros in
150:            the matrix.  The first dof entries in the array give the
151:            starting array indices of each row's items in the rest of the array,
152:            the dof+1st item contains the value nz + dof + 1 (i.e. the entire length of the array)
153:            and the remaining nz items give the column indices of each of
154:            the 1s within the logical 2D matrix.  Each row's items within
155:            the array are the column indices of the 1s within that row
156:            of the 2D matrix.  PETSc developers may recognize that this is the
157:            same format as that computed by the DMDASetBlockFills_Private()
158:            function from a dense 2D matrix representation.

160:      DMDASetGetMatrix() allows you to provide general code for those more complicated nonzero patterns then
161:      can be represented in the dfill, ofill format

163:    Contributed by Philip C. Roth

165: .seealso `DMDASetBlockFills()`, `DMCreateMatrix()`, `DMDASetGetMatrix()`, `DMSetMatrixPreallocateOnly()`

167: @*/
168: PetscErrorCode DMDASetBlockFillsSparse(DM da, const PetscInt *dfillsparse, const PetscInt *ofillsparse)
169: {
170:   DM_DA *dd = (DM_DA *)da->data;

172:   /* save the given dfill and ofill information */
173:   DMDASetBlockFillsSparse_Private(dfillsparse, dd->w, &dd->dfill);
174:   DMDASetBlockFillsSparse_Private(ofillsparse, dd->w, &dd->ofill);

176:   /* count nonzeros in ofill columns */
177:   DMDASetBlockFills_Private2(dd);
178:   return 0;
179: }

181: PetscErrorCode DMCreateColoring_DA(DM da, ISColoringType ctype, ISColoring *coloring)
182: {
183:   PetscInt       dim, m, n, p, nc;
184:   DMBoundaryType bx, by, bz;
185:   MPI_Comm       comm;
186:   PetscMPIInt    size;
187:   PetscBool      isBAIJ;
188:   DM_DA         *dd = (DM_DA *)da->data;

190:   /*
191:                                   m
192:           ------------------------------------------------------
193:          |                                                     |
194:          |                                                     |
195:          |               ----------------------                |
196:          |               |                    |                |
197:       n  |           yn  |                    |                |
198:          |               |                    |                |
199:          |               .---------------------                |
200:          |             (xs,ys)     xn                          |
201:          |            .                                        |
202:          |         (gxs,gys)                                   |
203:          |                                                     |
204:           -----------------------------------------------------
205:   */

207:   /*
208:          nc - number of components per grid point
209:          col - number of colors needed in one direction for single component problem

211:   */
212:   DMDAGetInfo(da, &dim, NULL, NULL, NULL, &m, &n, &p, &nc, NULL, &bx, &by, &bz, NULL);

214:   PetscObjectGetComm((PetscObject)da, &comm);
215:   MPI_Comm_size(comm, &size);
216:   if (ctype == IS_COLORING_LOCAL) {
217:     if (size == 1) {
218:       ctype = IS_COLORING_GLOBAL;
219:     } else if (dim > 1) {
220:       if ((m == 1 && bx == DM_BOUNDARY_PERIODIC) || (n == 1 && by == DM_BOUNDARY_PERIODIC) || (p == 1 && bz == DM_BOUNDARY_PERIODIC)) {
221:         SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "IS_COLORING_LOCAL cannot be used for periodic boundary condition having both ends of the domain  on the same process");
222:       }
223:     }
224:   }

226:   /* Tell the DMDA it has 1 degree of freedom per grid point so that the coloring for BAIJ
227:      matrices is for the blocks, not the individual matrix elements  */
228:   PetscStrbeginswith(da->mattype, MATBAIJ, &isBAIJ);
229:   if (!isBAIJ) PetscStrbeginswith(da->mattype, MATMPIBAIJ, &isBAIJ);
230:   if (!isBAIJ) PetscStrbeginswith(da->mattype, MATSEQBAIJ, &isBAIJ);
231:   if (isBAIJ) {
232:     dd->w  = 1;
233:     dd->xs = dd->xs / nc;
234:     dd->xe = dd->xe / nc;
235:     dd->Xs = dd->Xs / nc;
236:     dd->Xe = dd->Xe / nc;
237:   }

239:   /*
240:      We do not provide a getcoloring function in the DMDA operations because
241:    the basic DMDA does not know about matrices. We think of DMDA as being
242:    more low-level then matrices.
243:   */
244:   if (dim == 1) DMCreateColoring_DA_1d_MPIAIJ(da, ctype, coloring);
245:   else if (dim == 2) DMCreateColoring_DA_2d_MPIAIJ(da, ctype, coloring);
246:   else if (dim == 3) DMCreateColoring_DA_3d_MPIAIJ(da, ctype, coloring);
247:   else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "Not done for %" PetscInt_FMT " dimension, send us mail petsc-maint@mcs.anl.gov for code", dim);
248:   if (isBAIJ) {
249:     dd->w  = nc;
250:     dd->xs = dd->xs * nc;
251:     dd->xe = dd->xe * nc;
252:     dd->Xs = dd->Xs * nc;
253:     dd->Xe = dd->Xe * nc;
254:   }
255:   return 0;
256: }

258: /* ---------------------------------------------------------------------------------*/

260: PetscErrorCode DMCreateColoring_DA_2d_MPIAIJ(DM da, ISColoringType ctype, ISColoring *coloring)
261: {
262:   PetscInt         xs, ys, nx, ny, i, j, ii, gxs, gys, gnx, gny, m, n, M, N, dim, s, k, nc, col;
263:   PetscInt         ncolors;
264:   MPI_Comm         comm;
265:   DMBoundaryType   bx, by;
266:   DMDAStencilType  st;
267:   ISColoringValue *colors;
268:   DM_DA           *dd = (DM_DA *)da->data;

270:   /*
271:          nc - number of components per grid point
272:          col - number of colors needed in one direction for single component problem

274:   */
275:   DMDAGetInfo(da, &dim, &m, &n, NULL, &M, &N, NULL, &nc, &s, &bx, &by, NULL, &st);
276:   col = 2 * s + 1;
277:   DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL);
278:   DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL);
279:   PetscObjectGetComm((PetscObject)da, &comm);

281:   /* special case as taught to us by Paul Hovland */
282:   if (st == DMDA_STENCIL_STAR && s == 1) {
283:     DMCreateColoring_DA_2d_5pt_MPIAIJ(da, ctype, coloring);
284:   } else {
285:     if (ctype == IS_COLORING_GLOBAL) {
286:       if (!dd->localcoloring) {
287:         PetscMalloc1(nc * nx * ny, &colors);
288:         ii = 0;
289:         for (j = ys; j < ys + ny; j++) {
290:           for (i = xs; i < xs + nx; i++) {
291:             for (k = 0; k < nc; k++) colors[ii++] = k + nc * ((i % col) + col * (j % col));
292:           }
293:         }
294:         ncolors = nc + nc * (col - 1 + col * (col - 1));
295:         ISColoringCreate(comm, ncolors, nc * nx * ny, colors, PETSC_OWN_POINTER, &dd->localcoloring);
296:       }
297:       *coloring = dd->localcoloring;
298:     } else if (ctype == IS_COLORING_LOCAL) {
299:       if (!dd->ghostedcoloring) {
300:         PetscMalloc1(nc * gnx * gny, &colors);
301:         ii = 0;
302:         for (j = gys; j < gys + gny; j++) {
303:           for (i = gxs; i < gxs + gnx; i++) {
304:             for (k = 0; k < nc; k++) {
305:               /* the complicated stuff is to handle periodic boundaries */
306:               colors[ii++] = k + nc * ((SetInRange(i, m) % col) + col * (SetInRange(j, n) % col));
307:             }
308:           }
309:         }
310:         ncolors = nc + nc * (col - 1 + col * (col - 1));
311:         ISColoringCreate(comm, ncolors, nc * gnx * gny, colors, PETSC_OWN_POINTER, &dd->ghostedcoloring);
312:         /* PetscIntView(ncolors,(PetscInt*)colors,0); */

314:         ISColoringSetType(dd->ghostedcoloring, IS_COLORING_LOCAL);
315:       }
316:       *coloring = dd->ghostedcoloring;
317:     } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_ARG_WRONG, "Unknown ISColoringType %d", (int)ctype);
318:   }
319:   ISColoringReference(*coloring);
320:   return 0;
321: }

323: /* ---------------------------------------------------------------------------------*/

325: PetscErrorCode DMCreateColoring_DA_3d_MPIAIJ(DM da, ISColoringType ctype, ISColoring *coloring)
326: {
327:   PetscInt         xs, ys, nx, ny, i, j, gxs, gys, gnx, gny, m, n, p, dim, s, k, nc, col, zs, gzs, ii, l, nz, gnz, M, N, P;
328:   PetscInt         ncolors;
329:   MPI_Comm         comm;
330:   DMBoundaryType   bx, by, bz;
331:   DMDAStencilType  st;
332:   ISColoringValue *colors;
333:   DM_DA           *dd = (DM_DA *)da->data;

335:   /*
336:          nc - number of components per grid point
337:          col - number of colors needed in one direction for single component problem

339:   */
340:   DMDAGetInfo(da, &dim, &m, &n, &p, &M, &N, &P, &nc, &s, &bx, &by, &bz, &st);
341:   col = 2 * s + 1;
342:   DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz);
343:   DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz);
344:   PetscObjectGetComm((PetscObject)da, &comm);

346:   /* create the coloring */
347:   if (ctype == IS_COLORING_GLOBAL) {
348:     if (!dd->localcoloring) {
349:       PetscMalloc1(nc * nx * ny * nz, &colors);
350:       ii = 0;
351:       for (k = zs; k < zs + nz; k++) {
352:         for (j = ys; j < ys + ny; j++) {
353:           for (i = xs; i < xs + nx; i++) {
354:             for (l = 0; l < nc; l++) colors[ii++] = l + nc * ((i % col) + col * (j % col) + col * col * (k % col));
355:           }
356:         }
357:       }
358:       ncolors = nc + nc * (col - 1 + col * (col - 1) + col * col * (col - 1));
359:       ISColoringCreate(comm, ncolors, nc * nx * ny * nz, colors, PETSC_OWN_POINTER, &dd->localcoloring);
360:     }
361:     *coloring = dd->localcoloring;
362:   } else if (ctype == IS_COLORING_LOCAL) {
363:     if (!dd->ghostedcoloring) {
364:       PetscMalloc1(nc * gnx * gny * gnz, &colors);
365:       ii = 0;
366:       for (k = gzs; k < gzs + gnz; k++) {
367:         for (j = gys; j < gys + gny; j++) {
368:           for (i = gxs; i < gxs + gnx; i++) {
369:             for (l = 0; l < nc; l++) {
370:               /* the complicated stuff is to handle periodic boundaries */
371:               colors[ii++] = l + nc * ((SetInRange(i, m) % col) + col * (SetInRange(j, n) % col) + col * col * (SetInRange(k, p) % col));
372:             }
373:           }
374:         }
375:       }
376:       ncolors = nc + nc * (col - 1 + col * (col - 1) + col * col * (col - 1));
377:       ISColoringCreate(comm, ncolors, nc * gnx * gny * gnz, colors, PETSC_OWN_POINTER, &dd->ghostedcoloring);
378:       ISColoringSetType(dd->ghostedcoloring, IS_COLORING_LOCAL);
379:     }
380:     *coloring = dd->ghostedcoloring;
381:   } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_ARG_WRONG, "Unknown ISColoringType %d", (int)ctype);
382:   ISColoringReference(*coloring);
383:   return 0;
384: }

386: /* ---------------------------------------------------------------------------------*/

388: PetscErrorCode DMCreateColoring_DA_1d_MPIAIJ(DM da, ISColoringType ctype, ISColoring *coloring)
389: {
390:   PetscInt         xs, nx, i, i1, gxs, gnx, l, m, M, dim, s, nc, col;
391:   PetscInt         ncolors;
392:   MPI_Comm         comm;
393:   DMBoundaryType   bx;
394:   ISColoringValue *colors;
395:   DM_DA           *dd = (DM_DA *)da->data;

397:   /*
398:          nc - number of components per grid point
399:          col - number of colors needed in one direction for single component problem

401:   */
402:   DMDAGetInfo(da, &dim, &m, NULL, NULL, &M, NULL, NULL, &nc, &s, &bx, NULL, NULL, NULL);
403:   col = 2 * s + 1;
404:   DMDAGetCorners(da, &xs, NULL, NULL, &nx, NULL, NULL);
405:   DMDAGetGhostCorners(da, &gxs, NULL, NULL, &gnx, NULL, NULL);
406:   PetscObjectGetComm((PetscObject)da, &comm);

408:   /* create the coloring */
409:   if (ctype == IS_COLORING_GLOBAL) {
410:     if (!dd->localcoloring) {
411:       PetscMalloc1(nc * nx, &colors);
412:       if (dd->ofillcols) {
413:         PetscInt tc = 0;
414:         for (i = 0; i < nc; i++) tc += (PetscInt)(dd->ofillcols[i] > 0);
415:         i1 = 0;
416:         for (i = xs; i < xs + nx; i++) {
417:           for (l = 0; l < nc; l++) {
418:             if (dd->ofillcols[l] && (i % col)) {
419:               colors[i1++] = nc - 1 + tc * ((i % col) - 1) + dd->ofillcols[l];
420:             } else {
421:               colors[i1++] = l;
422:             }
423:           }
424:         }
425:         ncolors = nc + 2 * s * tc;
426:       } else {
427:         i1 = 0;
428:         for (i = xs; i < xs + nx; i++) {
429:           for (l = 0; l < nc; l++) colors[i1++] = l + nc * (i % col);
430:         }
431:         ncolors = nc + nc * (col - 1);
432:       }
433:       ISColoringCreate(comm, ncolors, nc * nx, colors, PETSC_OWN_POINTER, &dd->localcoloring);
434:     }
435:     *coloring = dd->localcoloring;
436:   } else if (ctype == IS_COLORING_LOCAL) {
437:     if (!dd->ghostedcoloring) {
438:       PetscMalloc1(nc * gnx, &colors);
439:       i1 = 0;
440:       for (i = gxs; i < gxs + gnx; i++) {
441:         for (l = 0; l < nc; l++) {
442:           /* the complicated stuff is to handle periodic boundaries */
443:           colors[i1++] = l + nc * (SetInRange(i, m) % col);
444:         }
445:       }
446:       ncolors = nc + nc * (col - 1);
447:       ISColoringCreate(comm, ncolors, nc * gnx, colors, PETSC_OWN_POINTER, &dd->ghostedcoloring);
448:       ISColoringSetType(dd->ghostedcoloring, IS_COLORING_LOCAL);
449:     }
450:     *coloring = dd->ghostedcoloring;
451:   } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_ARG_WRONG, "Unknown ISColoringType %d", (int)ctype);
452:   ISColoringReference(*coloring);
453:   return 0;
454: }

456: PetscErrorCode DMCreateColoring_DA_2d_5pt_MPIAIJ(DM da, ISColoringType ctype, ISColoring *coloring)
457: {
458:   PetscInt         xs, ys, nx, ny, i, j, ii, gxs, gys, gnx, gny, m, n, dim, s, k, nc;
459:   PetscInt         ncolors;
460:   MPI_Comm         comm;
461:   DMBoundaryType   bx, by;
462:   ISColoringValue *colors;
463:   DM_DA           *dd = (DM_DA *)da->data;

465:   /*
466:          nc - number of components per grid point
467:          col - number of colors needed in one direction for single component problem

469:   */
470:   DMDAGetInfo(da, &dim, &m, &n, NULL, NULL, NULL, NULL, &nc, &s, &bx, &by, NULL, NULL);
471:   DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL);
472:   DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL);
473:   PetscObjectGetComm((PetscObject)da, &comm);
474:   /* create the coloring */
475:   if (ctype == IS_COLORING_GLOBAL) {
476:     if (!dd->localcoloring) {
477:       PetscMalloc1(nc * nx * ny, &colors);
478:       ii = 0;
479:       for (j = ys; j < ys + ny; j++) {
480:         for (i = xs; i < xs + nx; i++) {
481:           for (k = 0; k < nc; k++) colors[ii++] = k + nc * ((3 * j + i) % 5);
482:         }
483:       }
484:       ncolors = 5 * nc;
485:       ISColoringCreate(comm, ncolors, nc * nx * ny, colors, PETSC_OWN_POINTER, &dd->localcoloring);
486:     }
487:     *coloring = dd->localcoloring;
488:   } else if (ctype == IS_COLORING_LOCAL) {
489:     if (!dd->ghostedcoloring) {
490:       PetscMalloc1(nc * gnx * gny, &colors);
491:       ii = 0;
492:       for (j = gys; j < gys + gny; j++) {
493:         for (i = gxs; i < gxs + gnx; i++) {
494:           for (k = 0; k < nc; k++) colors[ii++] = k + nc * ((3 * SetInRange(j, n) + SetInRange(i, m)) % 5);
495:         }
496:       }
497:       ncolors = 5 * nc;
498:       ISColoringCreate(comm, ncolors, nc * gnx * gny, colors, PETSC_OWN_POINTER, &dd->ghostedcoloring);
499:       ISColoringSetType(dd->ghostedcoloring, IS_COLORING_LOCAL);
500:     }
501:     *coloring = dd->ghostedcoloring;
502:   } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_ARG_WRONG, "Unknown ISColoringType %d", (int)ctype);
503:   return 0;
504: }

506: /* =========================================================================== */
507: extern PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ(DM, Mat);
508: extern PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ_Fill(DM, Mat);
509: extern PetscErrorCode DMCreateMatrix_DA_1d_SeqAIJ_NoPreallocation(DM, Mat);
510: extern PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ(DM, Mat);
511: extern PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ_Fill(DM, Mat);
512: extern PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ(DM, Mat);
513: extern PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ_Fill(DM, Mat);
514: extern PetscErrorCode DMCreateMatrix_DA_2d_MPIBAIJ(DM, Mat);
515: extern PetscErrorCode DMCreateMatrix_DA_3d_MPIBAIJ(DM, Mat);
516: extern PetscErrorCode DMCreateMatrix_DA_2d_MPISBAIJ(DM, Mat);
517: extern PetscErrorCode DMCreateMatrix_DA_3d_MPISBAIJ(DM, Mat);
518: extern PetscErrorCode DMCreateMatrix_DA_2d_MPISELL(DM, Mat);
519: extern PetscErrorCode DMCreateMatrix_DA_3d_MPISELL(DM, Mat);
520: extern PetscErrorCode DMCreateMatrix_DA_IS(DM, Mat);

522: /*@C
523:    MatSetupDM - Sets the DMDA that is to be used by the HYPRE_StructMatrix PETSc matrix

525:    Logically Collective on mat

527:    Input Parameters:
528: +  mat - the matrix
529: -  da - the da

531:    Level: intermediate

533: @*/
534: PetscErrorCode MatSetupDM(Mat mat, DM da)
535: {
538:   PetscTryMethod(mat, "MatSetupDM_C", (Mat, DM), (mat, da));
539:   return 0;
540: }

542: PetscErrorCode MatView_MPI_DA(Mat A, PetscViewer viewer)
543: {
544:   DM                da;
545:   const char       *prefix;
546:   Mat               Anatural;
547:   AO                ao;
548:   PetscInt          rstart, rend, *petsc, i;
549:   IS                is;
550:   MPI_Comm          comm;
551:   PetscViewerFormat format;

553:   /* Check whether we are just printing info, in which case MatView() already viewed everything we wanted to view */
554:   PetscViewerGetFormat(viewer, &format);
555:   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) return 0;

557:   PetscObjectGetComm((PetscObject)A, &comm);
558:   MatGetDM(A, &da);

561:   DMDAGetAO(da, &ao);
562:   MatGetOwnershipRange(A, &rstart, &rend);
563:   PetscMalloc1(rend - rstart, &petsc);
564:   for (i = rstart; i < rend; i++) petsc[i - rstart] = i;
565:   AOApplicationToPetsc(ao, rend - rstart, petsc);
566:   ISCreateGeneral(comm, rend - rstart, petsc, PETSC_OWN_POINTER, &is);

568:   /* call viewer on natural ordering */
569:   MatCreateSubMatrix(A, is, is, MAT_INITIAL_MATRIX, &Anatural);
570:   ISDestroy(&is);
571:   PetscObjectGetOptionsPrefix((PetscObject)A, &prefix);
572:   PetscObjectSetOptionsPrefix((PetscObject)Anatural, prefix);
573:   PetscObjectSetName((PetscObject)Anatural, ((PetscObject)A)->name);
574:   ((PetscObject)Anatural)->donotPetscObjectPrintClassNamePrefixType = PETSC_TRUE;
575:   MatView(Anatural, viewer);
576:   ((PetscObject)Anatural)->donotPetscObjectPrintClassNamePrefixType = PETSC_FALSE;
577:   MatDestroy(&Anatural);
578:   return 0;
579: }

581: PetscErrorCode MatLoad_MPI_DA(Mat A, PetscViewer viewer)
582: {
583:   DM       da;
584:   Mat      Anatural, Aapp;
585:   AO       ao;
586:   PetscInt rstart, rend, *app, i, m, n, M, N;
587:   IS       is;
588:   MPI_Comm comm;

590:   PetscObjectGetComm((PetscObject)A, &comm);
591:   MatGetDM(A, &da);

594:   /* Load the matrix in natural ordering */
595:   MatCreate(PetscObjectComm((PetscObject)A), &Anatural);
596:   MatSetType(Anatural, ((PetscObject)A)->type_name);
597:   MatGetSize(A, &M, &N);
598:   MatGetLocalSize(A, &m, &n);
599:   MatSetSizes(Anatural, m, n, M, N);
600:   MatLoad(Anatural, viewer);

602:   /* Map natural ordering to application ordering and create IS */
603:   DMDAGetAO(da, &ao);
604:   MatGetOwnershipRange(Anatural, &rstart, &rend);
605:   PetscMalloc1(rend - rstart, &app);
606:   for (i = rstart; i < rend; i++) app[i - rstart] = i;
607:   AOPetscToApplication(ao, rend - rstart, app);
608:   ISCreateGeneral(comm, rend - rstart, app, PETSC_OWN_POINTER, &is);

610:   /* Do permutation and replace header */
611:   MatCreateSubMatrix(Anatural, is, is, MAT_INITIAL_MATRIX, &Aapp);
612:   MatHeaderReplace(A, &Aapp);
613:   ISDestroy(&is);
614:   MatDestroy(&Anatural);
615:   return 0;
616: }

618: PetscErrorCode DMCreateMatrix_DA(DM da, Mat *J)
619: {
620:   PetscInt dim, dof, nx, ny, nz, dims[3], starts[3], M, N, P;
621:   Mat      A;
622:   MPI_Comm comm;
623:   MatType  Atype;
624:   void (*aij)(void) = NULL, (*baij)(void) = NULL, (*sbaij)(void) = NULL, (*sell)(void) = NULL, (*is)(void) = NULL;
625:   MatType     mtype;
626:   PetscMPIInt size;
627:   DM_DA      *dd = (DM_DA *)da->data;

629:   MatInitializePackage();
630:   mtype = da->mattype;

632:   /*
633:                                   m
634:           ------------------------------------------------------
635:          |                                                     |
636:          |                                                     |
637:          |               ----------------------                |
638:          |               |                    |                |
639:       n  |           ny  |                    |                |
640:          |               |                    |                |
641:          |               .---------------------                |
642:          |             (xs,ys)     nx                          |
643:          |            .                                        |
644:          |         (gxs,gys)                                   |
645:          |                                                     |
646:           -----------------------------------------------------
647:   */

649:   /*
650:          nc - number of components per grid point
651:          col - number of colors needed in one direction for single component problem

653:   */
654:   M   = dd->M;
655:   N   = dd->N;
656:   P   = dd->P;
657:   dim = da->dim;
658:   dof = dd->w;
659:   /* DMDAGetInfo(da,&dim,&M,&N,&P,NULL,NULL,NULL,&dof,NULL,NULL,NULL,NULL,NULL); */
660:   DMDAGetCorners(da, NULL, NULL, NULL, &nx, &ny, &nz);
661:   PetscObjectGetComm((PetscObject)da, &comm);
662:   MatCreate(comm, &A);
663:   MatSetSizes(A, dof * nx * ny * nz, dof * nx * ny * nz, dof * M * N * P, dof * M * N * P);
664:   MatSetType(A, mtype);
665:   MatSetFromOptions(A);
666:   if (dof * nx * ny * nz < da->bind_below) {
667:     MatSetBindingPropagates(A, PETSC_TRUE);
668:     MatBindToCPU(A, PETSC_TRUE);
669:   }
670:   MatSetDM(A, da);
671:   if (da->structure_only) MatSetOption(A, MAT_STRUCTURE_ONLY, PETSC_TRUE);
672:   MatGetType(A, &Atype);
673:   /*
674:      We do not provide a getmatrix function in the DMDA operations because
675:    the basic DMDA does not know about matrices. We think of DMDA as being more
676:    more low-level than matrices. This is kind of cheating but, cause sometimes
677:    we think of DMDA has higher level than matrices.

679:      We could switch based on Atype (or mtype), but we do not since the
680:    specialized setting routines depend only on the particular preallocation
681:    details of the matrix, not the type itself.
682:   */
683:   PetscObjectQueryFunction((PetscObject)A, "MatMPIAIJSetPreallocation_C", &aij);
684:   if (!aij) PetscObjectQueryFunction((PetscObject)A, "MatSeqAIJSetPreallocation_C", &aij);
685:   if (!aij) {
686:     PetscObjectQueryFunction((PetscObject)A, "MatMPIBAIJSetPreallocation_C", &baij);
687:     if (!baij) PetscObjectQueryFunction((PetscObject)A, "MatSeqBAIJSetPreallocation_C", &baij);
688:     if (!baij) {
689:       PetscObjectQueryFunction((PetscObject)A, "MatMPISBAIJSetPreallocation_C", &sbaij);
690:       if (!sbaij) PetscObjectQueryFunction((PetscObject)A, "MatSeqSBAIJSetPreallocation_C", &sbaij);
691:       if (!sbaij) {
692:         PetscObjectQueryFunction((PetscObject)A, "MatMPISELLSetPreallocation_C", &sell);
693:         if (!sell) PetscObjectQueryFunction((PetscObject)A, "MatSeqSELLSetPreallocation_C", &sell);
694:       }
695:       if (!sell) PetscObjectQueryFunction((PetscObject)A, "MatISSetPreallocation_C", &is);
696:     }
697:   }
698:   if (aij) {
699:     if (dim == 1) {
700:       if (dd->ofill) {
701:         DMCreateMatrix_DA_1d_MPIAIJ_Fill(da, A);
702:       } else {
703:         DMBoundaryType bx;
704:         PetscMPIInt    size;
705:         DMDAGetInfo(da, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, &bx, NULL, NULL, NULL);
706:         MPI_Comm_size(PetscObjectComm((PetscObject)da), &size);
707:         if (size == 1 && bx == DM_BOUNDARY_NONE) {
708:           DMCreateMatrix_DA_1d_SeqAIJ_NoPreallocation(da, A);
709:         } else {
710:           DMCreateMatrix_DA_1d_MPIAIJ(da, A);
711:         }
712:       }
713:     } else if (dim == 2) {
714:       if (dd->ofill) {
715:         DMCreateMatrix_DA_2d_MPIAIJ_Fill(da, A);
716:       } else {
717:         DMCreateMatrix_DA_2d_MPIAIJ(da, A);
718:       }
719:     } else if (dim == 3) {
720:       if (dd->ofill) {
721:         DMCreateMatrix_DA_3d_MPIAIJ_Fill(da, A);
722:       } else {
723:         DMCreateMatrix_DA_3d_MPIAIJ(da, A);
724:       }
725:     }
726:   } else if (baij) {
727:     if (dim == 2) {
728:       DMCreateMatrix_DA_2d_MPIBAIJ(da, A);
729:     } else if (dim == 3) {
730:       DMCreateMatrix_DA_3d_MPIBAIJ(da, A);
731:     } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "Not implemented for %" PetscInt_FMT " dimension and Matrix Type: %s in %" PetscInt_FMT " dimension! Send mail to petsc-maint@mcs.anl.gov for code", dim, Atype, dim);
732:   } else if (sbaij) {
733:     if (dim == 2) {
734:       DMCreateMatrix_DA_2d_MPISBAIJ(da, A);
735:     } else if (dim == 3) {
736:       DMCreateMatrix_DA_3d_MPISBAIJ(da, A);
737:     } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "Not implemented for %" PetscInt_FMT " dimension and Matrix Type: %s in %" PetscInt_FMT " dimension! Send mail to petsc-maint@mcs.anl.gov for code", dim, Atype, dim);
738:   } else if (sell) {
739:     if (dim == 2) {
740:       DMCreateMatrix_DA_2d_MPISELL(da, A);
741:     } else if (dim == 3) {
742:       DMCreateMatrix_DA_3d_MPISELL(da, A);
743:     } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "Not implemented for %" PetscInt_FMT " dimension and Matrix Type: %s in %" PetscInt_FMT " dimension! Send mail to petsc-maint@mcs.anl.gov for code", dim, Atype, dim);
744:   } else if (is) {
745:     DMCreateMatrix_DA_IS(da, A);
746:   } else {
747:     ISLocalToGlobalMapping ltog;

749:     MatSetBlockSize(A, dof);
750:     MatSetUp(A);
751:     DMGetLocalToGlobalMapping(da, &ltog);
752:     MatSetLocalToGlobalMapping(A, ltog, ltog);
753:   }
754:   DMDAGetGhostCorners(da, &starts[0], &starts[1], &starts[2], &dims[0], &dims[1], &dims[2]);
755:   MatSetStencil(A, dim, dims, starts, dof);
756:   MatSetDM(A, da);
757:   MPI_Comm_size(comm, &size);
758:   if (size > 1) {
759:     /* change viewer to display matrix in natural ordering */
760:     MatSetOperation(A, MATOP_VIEW, (void (*)(void))MatView_MPI_DA);
761:     MatSetOperation(A, MATOP_LOAD, (void (*)(void))MatLoad_MPI_DA);
762:   }
763:   *J = A;
764:   return 0;
765: }

767: /* ---------------------------------------------------------------------------------*/
768: PETSC_EXTERN PetscErrorCode MatISSetPreallocation_IS(Mat, PetscInt, const PetscInt[], PetscInt, const PetscInt[]);

770: PetscErrorCode DMCreateMatrix_DA_IS(DM dm, Mat J)
771: {
772:   DM_DA                 *da = (DM_DA *)dm->data;
773:   Mat                    lJ, P;
774:   ISLocalToGlobalMapping ltog;
775:   IS                     is;
776:   PetscBT                bt;
777:   const PetscInt        *e_loc, *idx;
778:   PetscInt               i, nel, nen, nv, dof, *gidx, n, N;

780:   /* The l2g map of DMDA has all ghosted nodes, and e_loc is a subset of all the local nodes (including the ghosted)
781:      We need to filter out the local indices that are not represented through the DMDAGetElements decomposition */
782:   dof = da->w;
783:   MatSetBlockSize(J, dof);
784:   DMGetLocalToGlobalMapping(dm, &ltog);

786:   /* flag local elements indices in local DMDA numbering */
787:   ISLocalToGlobalMappingGetSize(ltog, &nv);
788:   PetscBTCreate(nv / dof, &bt);
789:   DMDAGetElements(dm, &nel, &nen, &e_loc); /* this will throw an error if the stencil type is not DMDA_STENCIL_BOX */
790:   for (i = 0; i < nel * nen; i++) PetscBTSet(bt, e_loc[i]);

792:   /* filter out (set to -1) the global indices not used by the local elements */
793:   PetscMalloc1(nv / dof, &gidx);
794:   ISLocalToGlobalMappingGetBlockIndices(ltog, &idx);
795:   PetscArraycpy(gidx, idx, nv / dof);
796:   ISLocalToGlobalMappingRestoreBlockIndices(ltog, &idx);
797:   for (i = 0; i < nv / dof; i++)
798:     if (!PetscBTLookup(bt, i)) gidx[i] = -1;
799:   PetscBTDestroy(&bt);
800:   ISCreateBlock(PetscObjectComm((PetscObject)dm), dof, nv / dof, gidx, PETSC_OWN_POINTER, &is);
801:   ISLocalToGlobalMappingCreateIS(is, &ltog);
802:   MatSetLocalToGlobalMapping(J, ltog, ltog);
803:   ISLocalToGlobalMappingDestroy(&ltog);
804:   ISDestroy(&is);

806:   /* Preallocation */
807:   if (dm->prealloc_skip) {
808:     MatSetUp(J);
809:   } else {
810:     MatISGetLocalMat(J, &lJ);
811:     MatGetLocalToGlobalMapping(lJ, &ltog, NULL);
812:     MatCreate(PetscObjectComm((PetscObject)lJ), &P);
813:     MatSetType(P, MATPREALLOCATOR);
814:     MatSetLocalToGlobalMapping(P, ltog, ltog);
815:     MatGetSize(lJ, &N, NULL);
816:     MatGetLocalSize(lJ, &n, NULL);
817:     MatSetSizes(P, n, n, N, N);
818:     MatSetBlockSize(P, dof);
819:     MatSetUp(P);
820:     for (i = 0; i < nel; i++) MatSetValuesBlockedLocal(P, nen, e_loc + i * nen, nen, e_loc + i * nen, NULL, INSERT_VALUES);
821:     MatPreallocatorPreallocate(P, (PetscBool)!da->prealloc_only, lJ);
822:     MatISRestoreLocalMat(J, &lJ);
823:     DMDARestoreElements(dm, &nel, &nen, &e_loc);
824:     MatDestroy(&P);

826:     MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY);
827:     MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY);
828:   }
829:   return 0;
830: }

832: PetscErrorCode DMCreateMatrix_DA_2d_MPISELL(DM da, Mat J)
833: {
834:   PetscInt               xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny, m, n, dim, s, *cols = NULL, k, nc, *rows = NULL, col, cnt, l, p;
835:   PetscInt               lstart, lend, pstart, pend, *dnz, *onz;
836:   MPI_Comm               comm;
837:   PetscScalar           *values;
838:   DMBoundaryType         bx, by;
839:   ISLocalToGlobalMapping ltog;
840:   DMDAStencilType        st;

842:   /*
843:          nc - number of components per grid point
844:          col - number of colors needed in one direction for single component problem

846:   */
847:   DMDAGetInfo(da, &dim, &m, &n, NULL, NULL, NULL, NULL, &nc, &s, &bx, &by, NULL, &st);
848:   col = 2 * s + 1;
849:   DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL);
850:   DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL);
851:   PetscObjectGetComm((PetscObject)da, &comm);

853:   PetscMalloc2(nc, &rows, col * col * nc * nc, &cols);
854:   DMGetLocalToGlobalMapping(da, &ltog);

856:   MatSetBlockSize(J, nc);
857:   /* determine the matrix preallocation information */
858:   MatPreallocateBegin(comm, nc * nx * ny, nc * nx * ny, dnz, onz);
859:   for (i = xs; i < xs + nx; i++) {
860:     pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
861:     pend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));

863:     for (j = ys; j < ys + ny; j++) {
864:       slot = i - gxs + gnx * (j - gys);

866:       lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
867:       lend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));

869:       cnt = 0;
870:       for (k = 0; k < nc; k++) {
871:         for (l = lstart; l < lend + 1; l++) {
872:           for (p = pstart; p < pend + 1; p++) {
873:             if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star have either l = 0 or p = 0 */
874:               cols[cnt++] = k + nc * (slot + gnx * l + p);
875:             }
876:           }
877:         }
878:         rows[k] = k + nc * (slot);
879:       }
880:       MatPreallocateSetLocal(ltog, nc, rows, ltog, cnt, cols, dnz, onz);
881:     }
882:   }
883:   MatSetBlockSize(J, nc);
884:   MatSeqSELLSetPreallocation(J, 0, dnz);
885:   MatMPISELLSetPreallocation(J, 0, dnz, 0, onz);
886:   MatPreallocateEnd(dnz, onz);

888:   MatSetLocalToGlobalMapping(J, ltog, ltog);

890:   /*
891:     For each node in the grid: we get the neighbors in the local (on processor ordering
892:     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
893:     PETSc ordering.
894:   */
895:   if (!da->prealloc_only) {
896:     PetscCalloc1(col * col * nc * nc, &values);
897:     for (i = xs; i < xs + nx; i++) {
898:       pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
899:       pend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));

901:       for (j = ys; j < ys + ny; j++) {
902:         slot = i - gxs + gnx * (j - gys);

904:         lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
905:         lend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));

907:         cnt = 0;
908:         for (k = 0; k < nc; k++) {
909:           for (l = lstart; l < lend + 1; l++) {
910:             for (p = pstart; p < pend + 1; p++) {
911:               if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star have either l = 0 or p = 0 */
912:                 cols[cnt++] = k + nc * (slot + gnx * l + p);
913:               }
914:             }
915:           }
916:           rows[k] = k + nc * (slot);
917:         }
918:         MatSetValuesLocal(J, nc, rows, cnt, cols, values, INSERT_VALUES);
919:       }
920:     }
921:     PetscFree(values);
922:     /* do not copy values to GPU since they are all zero and not yet needed there */
923:     MatBindToCPU(J, PETSC_TRUE);
924:     MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY);
925:     MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY);
926:     MatBindToCPU(J, PETSC_FALSE);
927:     MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE);
928:   }
929:   PetscFree2(rows, cols);
930:   return 0;
931: }

933: PetscErrorCode DMCreateMatrix_DA_3d_MPISELL(DM da, Mat J)
934: {
935:   PetscInt               xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny;
936:   PetscInt               m, n, dim, s, *cols = NULL, k, nc, *rows = NULL, col, cnt, l, p, *dnz = NULL, *onz = NULL;
937:   PetscInt               istart, iend, jstart, jend, kstart, kend, zs, nz, gzs, gnz, ii, jj, kk, M, N, P;
938:   MPI_Comm               comm;
939:   PetscScalar           *values;
940:   DMBoundaryType         bx, by, bz;
941:   ISLocalToGlobalMapping ltog;
942:   DMDAStencilType        st;

944:   /*
945:          nc - number of components per grid point
946:          col - number of colors needed in one direction for single component problem

948:   */
949:   DMDAGetInfo(da, &dim, &m, &n, &p, &M, &N, &P, &nc, &s, &bx, &by, &bz, &st);
950:   col = 2 * s + 1;
951:   DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz);
952:   DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz);
953:   PetscObjectGetComm((PetscObject)da, &comm);

955:   PetscMalloc2(nc, &rows, col * col * col * nc * nc, &cols);
956:   DMGetLocalToGlobalMapping(da, &ltog);

958:   MatSetBlockSize(J, nc);
959:   /* determine the matrix preallocation information */
960:   MatPreallocateBegin(comm, nc * nx * ny * nz, nc * nx * ny * nz, dnz, onz);
961:   for (i = xs; i < xs + nx; i++) {
962:     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
963:     iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
964:     for (j = ys; j < ys + ny; j++) {
965:       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
966:       jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
967:       for (k = zs; k < zs + nz; k++) {
968:         kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
969:         kend   = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));

971:         slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);

973:         cnt = 0;
974:         for (l = 0; l < nc; l++) {
975:           for (ii = istart; ii < iend + 1; ii++) {
976:             for (jj = jstart; jj < jend + 1; jj++) {
977:               for (kk = kstart; kk < kend + 1; kk++) {
978:                 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/
979:                   cols[cnt++] = l + nc * (slot + ii + gnx * jj + gnx * gny * kk);
980:                 }
981:               }
982:             }
983:           }
984:           rows[l] = l + nc * (slot);
985:         }
986:         MatPreallocateSetLocal(ltog, nc, rows, ltog, cnt, cols, dnz, onz);
987:       }
988:     }
989:   }
990:   MatSetBlockSize(J, nc);
991:   MatSeqSELLSetPreallocation(J, 0, dnz);
992:   MatMPISELLSetPreallocation(J, 0, dnz, 0, onz);
993:   MatPreallocateEnd(dnz, onz);
994:   MatSetLocalToGlobalMapping(J, ltog, ltog);

996:   /*
997:     For each node in the grid: we get the neighbors in the local (on processor ordering
998:     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
999:     PETSc ordering.
1000:   */
1001:   if (!da->prealloc_only) {
1002:     PetscCalloc1(col * col * col * nc * nc * nc, &values);
1003:     for (i = xs; i < xs + nx; i++) {
1004:       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1005:       iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1006:       for (j = ys; j < ys + ny; j++) {
1007:         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1008:         jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1009:         for (k = zs; k < zs + nz; k++) {
1010:           kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
1011:           kend   = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));

1013:           slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);

1015:           cnt = 0;
1016:           for (l = 0; l < nc; l++) {
1017:             for (ii = istart; ii < iend + 1; ii++) {
1018:               for (jj = jstart; jj < jend + 1; jj++) {
1019:                 for (kk = kstart; kk < kend + 1; kk++) {
1020:                   if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/
1021:                     cols[cnt++] = l + nc * (slot + ii + gnx * jj + gnx * gny * kk);
1022:                   }
1023:                 }
1024:               }
1025:             }
1026:             rows[l] = l + nc * (slot);
1027:           }
1028:           MatSetValuesLocal(J, nc, rows, cnt, cols, values, INSERT_VALUES);
1029:         }
1030:       }
1031:     }
1032:     PetscFree(values);
1033:     /* do not copy values to GPU since they are all zero and not yet needed there */
1034:     MatBindToCPU(J, PETSC_TRUE);
1035:     MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY);
1036:     MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY);
1037:     MatBindToCPU(J, PETSC_FALSE);
1038:     MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE);
1039:   }
1040:   PetscFree2(rows, cols);
1041:   return 0;
1042: }

1044: PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ(DM da, Mat J)
1045: {
1046:   PetscInt               xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny, m, n, dim, s, *cols = NULL, k, nc, *rows = NULL, col, cnt, l, p, M, N;
1047:   PetscInt               lstart, lend, pstart, pend, *dnz, *onz;
1048:   MPI_Comm               comm;
1049:   DMBoundaryType         bx, by;
1050:   ISLocalToGlobalMapping ltog, mltog;
1051:   DMDAStencilType        st;
1052:   PetscBool              removedups = PETSC_FALSE, alreadyboundtocpu = PETSC_TRUE;

1054:   /*
1055:          nc - number of components per grid point
1056:          col - number of colors needed in one direction for single component problem

1058:   */
1059:   DMDAGetInfo(da, &dim, &m, &n, NULL, &M, &N, NULL, &nc, &s, &bx, &by, NULL, &st);
1060:   if (bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE) MatSetOption(J, MAT_SORTED_FULL, PETSC_TRUE);
1061:   col = 2 * s + 1;
1062:   /*
1063:        With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times
1064:        because of "wrapping" around the end of the domain hitting an entry already counted in the other direction.
1065:   */
1066:   if (M == 1 && 2 * s >= m) removedups = PETSC_TRUE;
1067:   if (N == 1 && 2 * s >= n) removedups = PETSC_TRUE;
1068:   DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL);
1069:   DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL);
1070:   PetscObjectGetComm((PetscObject)da, &comm);

1072:   PetscMalloc2(nc, &rows, col * col * nc * nc, &cols);
1073:   DMGetLocalToGlobalMapping(da, &ltog);

1075:   MatSetBlockSize(J, nc);
1076:   /* determine the matrix preallocation information */
1077:   MatPreallocateBegin(comm, nc * nx * ny, nc * nx * ny, dnz, onz);
1078:   for (i = xs; i < xs + nx; i++) {
1079:     pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1080:     pend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));

1082:     for (j = ys; j < ys + ny; j++) {
1083:       slot = i - gxs + gnx * (j - gys);

1085:       lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1086:       lend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));

1088:       cnt = 0;
1089:       for (k = 0; k < nc; k++) {
1090:         for (l = lstart; l < lend + 1; l++) {
1091:           for (p = pstart; p < pend + 1; p++) {
1092:             if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star have either l = 0 or p = 0 */
1093:               cols[cnt++] = k + nc * (slot + gnx * l + p);
1094:             }
1095:           }
1096:         }
1097:         rows[k] = k + nc * (slot);
1098:       }
1099:       if (removedups) MatPreallocateSetLocalRemoveDups(ltog, nc, rows, ltog, cnt, cols, dnz, onz);
1100:       else MatPreallocateSetLocal(ltog, nc, rows, ltog, cnt, cols, dnz, onz);
1101:     }
1102:   }
1103:   MatSetBlockSize(J, nc);
1104:   MatSeqAIJSetPreallocation(J, 0, dnz);
1105:   MatMPIAIJSetPreallocation(J, 0, dnz, 0, onz);
1106:   MatPreallocateEnd(dnz, onz);
1107:   MatGetLocalToGlobalMapping(J, &mltog, NULL);
1108:   if (!mltog) MatSetLocalToGlobalMapping(J, ltog, ltog);

1110:   /*
1111:     For each node in the grid: we get the neighbors in the local (on processor ordering
1112:     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1113:     PETSc ordering.
1114:   */
1115:   if (!da->prealloc_only) {
1116:     for (i = xs; i < xs + nx; i++) {
1117:       pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1118:       pend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));

1120:       for (j = ys; j < ys + ny; j++) {
1121:         slot = i - gxs + gnx * (j - gys);

1123:         lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1124:         lend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));

1126:         cnt = 0;
1127:         for (l = lstart; l < lend + 1; l++) {
1128:           for (p = pstart; p < pend + 1; p++) {
1129:             if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star have either l = 0 or p = 0 */
1130:               cols[cnt++] = nc * (slot + gnx * l + p);
1131:               for (k = 1; k < nc; k++) {
1132:                 cols[cnt] = 1 + cols[cnt - 1];
1133:                 cnt++;
1134:               }
1135:             }
1136:           }
1137:         }
1138:         for (k = 0; k < nc; k++) rows[k] = k + nc * (slot);
1139:         MatSetValuesLocal(J, nc, rows, cnt, cols, NULL, INSERT_VALUES);
1140:       }
1141:     }
1142:     /* do not copy values to GPU since they are all zero and not yet needed there */
1143:     MatBoundToCPU(J, &alreadyboundtocpu);
1144:     MatBindToCPU(J, PETSC_TRUE);
1145:     MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY);
1146:     MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY);
1147:     if (!alreadyboundtocpu) MatBindToCPU(J, PETSC_FALSE);
1148:     MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE);
1149:     if (bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE) MatSetOption(J, MAT_SORTED_FULL, PETSC_FALSE);
1150:   }
1151:   PetscFree2(rows, cols);
1152:   return 0;
1153: }

1155: PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ_Fill(DM da, Mat J)
1156: {
1157:   PetscInt               xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny;
1158:   PetscInt               m, n, dim, s, *cols, k, nc, row, col, cnt, maxcnt = 0, l, p, M, N;
1159:   PetscInt               lstart, lend, pstart, pend, *dnz, *onz;
1160:   DM_DA                 *dd = (DM_DA *)da->data;
1161:   PetscInt               ifill_col, *ofill = dd->ofill, *dfill = dd->dfill;
1162:   MPI_Comm               comm;
1163:   DMBoundaryType         bx, by;
1164:   ISLocalToGlobalMapping ltog;
1165:   DMDAStencilType        st;
1166:   PetscBool              removedups = PETSC_FALSE;

1168:   /*
1169:          nc - number of components per grid point
1170:          col - number of colors needed in one direction for single component problem

1172:   */
1173:   DMDAGetInfo(da, &dim, &m, &n, NULL, &M, &N, NULL, &nc, &s, &bx, &by, NULL, &st);
1174:   col = 2 * s + 1;
1175:   /*
1176:        With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times
1177:        because of "wrapping" around the end of the domain hitting an entry already counted in the other direction.
1178:   */
1179:   if (M == 1 && 2 * s >= m) removedups = PETSC_TRUE;
1180:   if (N == 1 && 2 * s >= n) removedups = PETSC_TRUE;
1181:   DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL);
1182:   DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL);
1183:   PetscObjectGetComm((PetscObject)da, &comm);

1185:   PetscMalloc1(col * col * nc, &cols);
1186:   DMGetLocalToGlobalMapping(da, &ltog);

1188:   MatSetBlockSize(J, nc);
1189:   /* determine the matrix preallocation information */
1190:   MatPreallocateBegin(comm, nc * nx * ny, nc * nx * ny, dnz, onz);
1191:   for (i = xs; i < xs + nx; i++) {
1192:     pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1193:     pend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));

1195:     for (j = ys; j < ys + ny; j++) {
1196:       slot = i - gxs + gnx * (j - gys);

1198:       lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1199:       lend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));

1201:       for (k = 0; k < nc; k++) {
1202:         cnt = 0;
1203:         for (l = lstart; l < lend + 1; l++) {
1204:           for (p = pstart; p < pend + 1; p++) {
1205:             if (l || p) {
1206:               if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star */
1207:                 for (ifill_col = ofill[k]; ifill_col < ofill[k + 1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc * (slot + gnx * l + p);
1208:               }
1209:             } else {
1210:               if (dfill) {
1211:                 for (ifill_col = dfill[k]; ifill_col < dfill[k + 1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc * (slot + gnx * l + p);
1212:               } else {
1213:                 for (ifill_col = 0; ifill_col < nc; ifill_col++) cols[cnt++] = ifill_col + nc * (slot + gnx * l + p);
1214:               }
1215:             }
1216:           }
1217:         }
1218:         row    = k + nc * (slot);
1219:         maxcnt = PetscMax(maxcnt, cnt);
1220:         if (removedups) MatPreallocateSetLocalRemoveDups(ltog, 1, &row, ltog, cnt, cols, dnz, onz);
1221:         else MatPreallocateSetLocal(ltog, 1, &row, ltog, cnt, cols, dnz, onz);
1222:       }
1223:     }
1224:   }
1225:   MatSeqAIJSetPreallocation(J, 0, dnz);
1226:   MatMPIAIJSetPreallocation(J, 0, dnz, 0, onz);
1227:   MatPreallocateEnd(dnz, onz);
1228:   MatSetLocalToGlobalMapping(J, ltog, ltog);

1230:   /*
1231:     For each node in the grid: we get the neighbors in the local (on processor ordering
1232:     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1233:     PETSc ordering.
1234:   */
1235:   if (!da->prealloc_only) {
1236:     for (i = xs; i < xs + nx; i++) {
1237:       pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1238:       pend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));

1240:       for (j = ys; j < ys + ny; j++) {
1241:         slot = i - gxs + gnx * (j - gys);

1243:         lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1244:         lend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));

1246:         for (k = 0; k < nc; k++) {
1247:           cnt = 0;
1248:           for (l = lstart; l < lend + 1; l++) {
1249:             for (p = pstart; p < pend + 1; p++) {
1250:               if (l || p) {
1251:                 if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star */
1252:                   for (ifill_col = ofill[k]; ifill_col < ofill[k + 1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc * (slot + gnx * l + p);
1253:                 }
1254:               } else {
1255:                 if (dfill) {
1256:                   for (ifill_col = dfill[k]; ifill_col < dfill[k + 1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc * (slot + gnx * l + p);
1257:                 } else {
1258:                   for (ifill_col = 0; ifill_col < nc; ifill_col++) cols[cnt++] = ifill_col + nc * (slot + gnx * l + p);
1259:                 }
1260:               }
1261:             }
1262:           }
1263:           row = k + nc * (slot);
1264:           MatSetValuesLocal(J, 1, &row, cnt, cols, NULL, INSERT_VALUES);
1265:         }
1266:       }
1267:     }
1268:     /* do not copy values to GPU since they are all zero and not yet needed there */
1269:     MatBindToCPU(J, PETSC_TRUE);
1270:     MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY);
1271:     MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY);
1272:     MatBindToCPU(J, PETSC_FALSE);
1273:     MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE);
1274:   }
1275:   PetscFree(cols);
1276:   return 0;
1277: }

1279: /* ---------------------------------------------------------------------------------*/

1281: PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ(DM da, Mat J)
1282: {
1283:   PetscInt               xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny;
1284:   PetscInt               m, n, dim, s, *cols = NULL, k, nc, *rows = NULL, col, cnt, l, p, *dnz = NULL, *onz = NULL;
1285:   PetscInt               istart, iend, jstart, jend, kstart, kend, zs, nz, gzs, gnz, ii, jj, kk, M, N, P;
1286:   MPI_Comm               comm;
1287:   DMBoundaryType         bx, by, bz;
1288:   ISLocalToGlobalMapping ltog, mltog;
1289:   DMDAStencilType        st;
1290:   PetscBool              removedups = PETSC_FALSE;

1292:   /*
1293:          nc - number of components per grid point
1294:          col - number of colors needed in one direction for single component problem

1296:   */
1297:   DMDAGetInfo(da, &dim, &m, &n, &p, &M, &N, &P, &nc, &s, &bx, &by, &bz, &st);
1298:   if (bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE && bz == DM_BOUNDARY_NONE) MatSetOption(J, MAT_SORTED_FULL, PETSC_TRUE);
1299:   col = 2 * s + 1;

1301:   /*
1302:        With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times
1303:        because of "wrapping" around the end of the domain hitting an entry already counted in the other direction.
1304:   */
1305:   if (M == 1 && 2 * s >= m) removedups = PETSC_TRUE;
1306:   if (N == 1 && 2 * s >= n) removedups = PETSC_TRUE;
1307:   if (P == 1 && 2 * s >= p) removedups = PETSC_TRUE;

1309:   DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz);
1310:   DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz);
1311:   PetscObjectGetComm((PetscObject)da, &comm);

1313:   PetscMalloc2(nc, &rows, col * col * col * nc * nc, &cols);
1314:   DMGetLocalToGlobalMapping(da, &ltog);

1316:   MatSetBlockSize(J, nc);
1317:   /* determine the matrix preallocation information */
1318:   MatPreallocateBegin(comm, nc * nx * ny * nz, nc * nx * ny * nz, dnz, onz);
1319:   for (i = xs; i < xs + nx; i++) {
1320:     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1321:     iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1322:     for (j = ys; j < ys + ny; j++) {
1323:       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1324:       jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1325:       for (k = zs; k < zs + nz; k++) {
1326:         kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
1327:         kend   = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));

1329:         slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);

1331:         cnt = 0;
1332:         for (l = 0; l < nc; l++) {
1333:           for (ii = istart; ii < iend + 1; ii++) {
1334:             for (jj = jstart; jj < jend + 1; jj++) {
1335:               for (kk = kstart; kk < kend + 1; kk++) {
1336:                 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/
1337:                   cols[cnt++] = l + nc * (slot + ii + gnx * jj + gnx * gny * kk);
1338:                 }
1339:               }
1340:             }
1341:           }
1342:           rows[l] = l + nc * (slot);
1343:         }
1344:         if (removedups) MatPreallocateSetLocalRemoveDups(ltog, nc, rows, ltog, cnt, cols, dnz, onz);
1345:         else MatPreallocateSetLocal(ltog, nc, rows, ltog, cnt, cols, dnz, onz);
1346:       }
1347:     }
1348:   }
1349:   MatSetBlockSize(J, nc);
1350:   MatSeqAIJSetPreallocation(J, 0, dnz);
1351:   MatMPIAIJSetPreallocation(J, 0, dnz, 0, onz);
1352:   MatPreallocateEnd(dnz, onz);
1353:   MatGetLocalToGlobalMapping(J, &mltog, NULL);
1354:   if (!mltog) MatSetLocalToGlobalMapping(J, ltog, ltog);

1356:   /*
1357:     For each node in the grid: we get the neighbors in the local (on processor ordering
1358:     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1359:     PETSc ordering.
1360:   */
1361:   if (!da->prealloc_only) {
1362:     for (i = xs; i < xs + nx; i++) {
1363:       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1364:       iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1365:       for (j = ys; j < ys + ny; j++) {
1366:         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1367:         jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1368:         for (k = zs; k < zs + nz; k++) {
1369:           kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
1370:           kend   = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));

1372:           slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);

1374:           cnt = 0;
1375:           for (kk = kstart; kk < kend + 1; kk++) {
1376:             for (jj = jstart; jj < jend + 1; jj++) {
1377:               for (ii = istart; ii < iend + 1; ii++) {
1378:                 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/
1379:                   cols[cnt++] = nc * (slot + ii + gnx * jj + gnx * gny * kk);
1380:                   for (l = 1; l < nc; l++) {
1381:                     cols[cnt] = 1 + cols[cnt - 1];
1382:                     cnt++;
1383:                   }
1384:                 }
1385:               }
1386:             }
1387:           }
1388:           rows[0] = nc * (slot);
1389:           for (l = 1; l < nc; l++) rows[l] = 1 + rows[l - 1];
1390:           MatSetValuesLocal(J, nc, rows, cnt, cols, NULL, INSERT_VALUES);
1391:         }
1392:       }
1393:     }
1394:     /* do not copy values to GPU since they are all zero and not yet needed there */
1395:     MatBindToCPU(J, PETSC_TRUE);
1396:     MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY);
1397:     MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY);
1398:     if (bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE && bz == DM_BOUNDARY_NONE) MatSetOption(J, MAT_SORTED_FULL, PETSC_FALSE);
1399:     MatBindToCPU(J, PETSC_FALSE);
1400:     MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE);
1401:   }
1402:   PetscFree2(rows, cols);
1403:   return 0;
1404: }

1406: /* ---------------------------------------------------------------------------------*/

1408: PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ_Fill(DM da, Mat J)
1409: {
1410:   DM_DA                 *dd = (DM_DA *)da->data;
1411:   PetscInt               xs, nx, i, j, gxs, gnx, row, k, l;
1412:   PetscInt               m, dim, s, *cols = NULL, nc, cnt, maxcnt = 0, *ocols;
1413:   PetscInt              *ofill = dd->ofill, *dfill = dd->dfill;
1414:   DMBoundaryType         bx;
1415:   ISLocalToGlobalMapping ltog;
1416:   PetscMPIInt            rank, size;

1418:   MPI_Comm_rank(PetscObjectComm((PetscObject)da), &rank);
1419:   MPI_Comm_size(PetscObjectComm((PetscObject)da), &size);

1421:   /*
1422:          nc - number of components per grid point

1424:   */
1425:   DMDAGetInfo(da, &dim, &m, NULL, NULL, NULL, NULL, NULL, &nc, &s, &bx, NULL, NULL, NULL);
1427:   DMDAGetCorners(da, &xs, NULL, NULL, &nx, NULL, NULL);
1428:   DMDAGetGhostCorners(da, &gxs, NULL, NULL, &gnx, NULL, NULL);

1430:   MatSetBlockSize(J, nc);
1431:   PetscCalloc2(nx * nc, &cols, nx * nc, &ocols);

1433:   /*
1434:         note should be smaller for first and last process with no periodic
1435:         does not handle dfill
1436:   */
1437:   cnt = 0;
1438:   /* coupling with process to the left */
1439:   for (i = 0; i < s; i++) {
1440:     for (j = 0; j < nc; j++) {
1441:       ocols[cnt] = ((rank == 0) ? 0 : (s - i) * (ofill[j + 1] - ofill[j]));
1442:       cols[cnt]  = dfill[j + 1] - dfill[j] + (s + i) * (ofill[j + 1] - ofill[j]);
1443:       if (rank == 0 && (dd->bx == DM_BOUNDARY_PERIODIC)) {
1444:         if (size > 1) ocols[cnt] += (s - i) * (ofill[j + 1] - ofill[j]);
1445:         else cols[cnt] += (s - i) * (ofill[j + 1] - ofill[j]);
1446:       }
1447:       maxcnt = PetscMax(maxcnt, ocols[cnt] + cols[cnt]);
1448:       cnt++;
1449:     }
1450:   }
1451:   for (i = s; i < nx - s; i++) {
1452:     for (j = 0; j < nc; j++) {
1453:       cols[cnt] = dfill[j + 1] - dfill[j] + 2 * s * (ofill[j + 1] - ofill[j]);
1454:       maxcnt    = PetscMax(maxcnt, ocols[cnt] + cols[cnt]);
1455:       cnt++;
1456:     }
1457:   }
1458:   /* coupling with process to the right */
1459:   for (i = nx - s; i < nx; i++) {
1460:     for (j = 0; j < nc; j++) {
1461:       ocols[cnt] = ((rank == (size - 1)) ? 0 : (i - nx + s + 1) * (ofill[j + 1] - ofill[j]));
1462:       cols[cnt]  = dfill[j + 1] - dfill[j] + (s + nx - i - 1) * (ofill[j + 1] - ofill[j]);
1463:       if ((rank == size - 1) && (dd->bx == DM_BOUNDARY_PERIODIC)) {
1464:         if (size > 1) ocols[cnt] += (i - nx + s + 1) * (ofill[j + 1] - ofill[j]);
1465:         else cols[cnt] += (i - nx + s + 1) * (ofill[j + 1] - ofill[j]);
1466:       }
1467:       maxcnt = PetscMax(maxcnt, ocols[cnt] + cols[cnt]);
1468:       cnt++;
1469:     }
1470:   }

1472:   MatSeqAIJSetPreallocation(J, 0, cols);
1473:   MatMPIAIJSetPreallocation(J, 0, cols, 0, ocols);
1474:   PetscFree2(cols, ocols);

1476:   DMGetLocalToGlobalMapping(da, &ltog);
1477:   MatSetLocalToGlobalMapping(J, ltog, ltog);

1479:   /*
1480:     For each node in the grid: we get the neighbors in the local (on processor ordering
1481:     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1482:     PETSc ordering.
1483:   */
1484:   if (!da->prealloc_only) {
1485:     PetscMalloc1(maxcnt, &cols);
1486:     row = xs * nc;
1487:     /* coupling with process to the left */
1488:     for (i = xs; i < xs + s; i++) {
1489:       for (j = 0; j < nc; j++) {
1490:         cnt = 0;
1491:         if (rank) {
1492:           for (l = 0; l < s; l++) {
1493:             for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i - s + l) * nc + ofill[k];
1494:           }
1495:         }
1496:         if (rank == 0 && (dd->bx == DM_BOUNDARY_PERIODIC)) {
1497:           for (l = 0; l < s; l++) {
1498:             for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (m + i - s - l) * nc + ofill[k];
1499:           }
1500:         }
1501:         if (dfill) {
1502:           for (k = dfill[j]; k < dfill[j + 1]; k++) cols[cnt++] = i * nc + dfill[k];
1503:         } else {
1504:           for (k = 0; k < nc; k++) cols[cnt++] = i * nc + k;
1505:         }
1506:         for (l = 0; l < s; l++) {
1507:           for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i + s - l) * nc + ofill[k];
1508:         }
1509:         MatSetValues(J, 1, &row, cnt, cols, NULL, INSERT_VALUES);
1510:         row++;
1511:       }
1512:     }
1513:     for (i = xs + s; i < xs + nx - s; i++) {
1514:       for (j = 0; j < nc; j++) {
1515:         cnt = 0;
1516:         for (l = 0; l < s; l++) {
1517:           for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i - s + l) * nc + ofill[k];
1518:         }
1519:         if (dfill) {
1520:           for (k = dfill[j]; k < dfill[j + 1]; k++) cols[cnt++] = i * nc + dfill[k];
1521:         } else {
1522:           for (k = 0; k < nc; k++) cols[cnt++] = i * nc + k;
1523:         }
1524:         for (l = 0; l < s; l++) {
1525:           for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i + s - l) * nc + ofill[k];
1526:         }
1527:         MatSetValues(J, 1, &row, cnt, cols, NULL, INSERT_VALUES);
1528:         row++;
1529:       }
1530:     }
1531:     /* coupling with process to the right */
1532:     for (i = xs + nx - s; i < xs + nx; i++) {
1533:       for (j = 0; j < nc; j++) {
1534:         cnt = 0;
1535:         for (l = 0; l < s; l++) {
1536:           for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i - s + l) * nc + ofill[k];
1537:         }
1538:         if (dfill) {
1539:           for (k = dfill[j]; k < dfill[j + 1]; k++) cols[cnt++] = i * nc + dfill[k];
1540:         } else {
1541:           for (k = 0; k < nc; k++) cols[cnt++] = i * nc + k;
1542:         }
1543:         if (rank < size - 1) {
1544:           for (l = 0; l < s; l++) {
1545:             for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i + s - l) * nc + ofill[k];
1546:           }
1547:         }
1548:         if ((rank == size - 1) && (dd->bx == DM_BOUNDARY_PERIODIC)) {
1549:           for (l = 0; l < s; l++) {
1550:             for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i - s - l - m + 2) * nc + ofill[k];
1551:           }
1552:         }
1553:         MatSetValues(J, 1, &row, cnt, cols, NULL, INSERT_VALUES);
1554:         row++;
1555:       }
1556:     }
1557:     PetscFree(cols);
1558:     /* do not copy values to GPU since they are all zero and not yet needed there */
1559:     MatBindToCPU(J, PETSC_TRUE);
1560:     MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY);
1561:     MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY);
1562:     MatBindToCPU(J, PETSC_FALSE);
1563:     MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE);
1564:   }
1565:   return 0;
1566: }

1568: /* ---------------------------------------------------------------------------------*/

1570: PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ(DM da, Mat J)
1571: {
1572:   PetscInt               xs, nx, i, i1, slot, gxs, gnx;
1573:   PetscInt               m, dim, s, *cols = NULL, nc, *rows = NULL, col, cnt, l;
1574:   PetscInt               istart, iend;
1575:   DMBoundaryType         bx;
1576:   ISLocalToGlobalMapping ltog, mltog;

1578:   /*
1579:          nc - number of components per grid point
1580:          col - number of colors needed in one direction for single component problem

1582:   */
1583:   DMDAGetInfo(da, &dim, &m, NULL, NULL, NULL, NULL, NULL, &nc, &s, &bx, NULL, NULL, NULL);
1584:   if (bx == DM_BOUNDARY_NONE) MatSetOption(J, MAT_SORTED_FULL, PETSC_TRUE);
1585:   col = 2 * s + 1;

1587:   DMDAGetCorners(da, &xs, NULL, NULL, &nx, NULL, NULL);
1588:   DMDAGetGhostCorners(da, &gxs, NULL, NULL, &gnx, NULL, NULL);

1590:   MatSetBlockSize(J, nc);
1591:   MatSeqAIJSetPreallocation(J, col * nc, NULL);
1592:   MatMPIAIJSetPreallocation(J, col * nc, NULL, col * nc, NULL);

1594:   DMGetLocalToGlobalMapping(da, &ltog);
1595:   MatGetLocalToGlobalMapping(J, &mltog, NULL);
1596:   if (!mltog) MatSetLocalToGlobalMapping(J, ltog, ltog);

1598:   /*
1599:     For each node in the grid: we get the neighbors in the local (on processor ordering
1600:     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1601:     PETSc ordering.
1602:   */
1603:   if (!da->prealloc_only) {
1604:     PetscMalloc2(nc, &rows, col * nc * nc, &cols);
1605:     for (i = xs; i < xs + nx; i++) {
1606:       istart = PetscMax(-s, gxs - i);
1607:       iend   = PetscMin(s, gxs + gnx - i - 1);
1608:       slot   = i - gxs;

1610:       cnt = 0;
1611:       for (i1 = istart; i1 < iend + 1; i1++) {
1612:         cols[cnt++] = nc * (slot + i1);
1613:         for (l = 1; l < nc; l++) {
1614:           cols[cnt] = 1 + cols[cnt - 1];
1615:           cnt++;
1616:         }
1617:       }
1618:       rows[0] = nc * (slot);
1619:       for (l = 1; l < nc; l++) rows[l] = 1 + rows[l - 1];
1620:       MatSetValuesLocal(J, nc, rows, cnt, cols, NULL, INSERT_VALUES);
1621:     }
1622:     /* do not copy values to GPU since they are all zero and not yet needed there */
1623:     MatBindToCPU(J, PETSC_TRUE);
1624:     MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY);
1625:     MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY);
1626:     if (bx == DM_BOUNDARY_NONE) MatSetOption(J, MAT_SORTED_FULL, PETSC_FALSE);
1627:     MatBindToCPU(J, PETSC_FALSE);
1628:     MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE);
1629:     PetscFree2(rows, cols);
1630:   }
1631:   return 0;
1632: }

1634: /* ---------------------------------------------------------------------------------*/

1636: PetscErrorCode DMCreateMatrix_DA_1d_SeqAIJ_NoPreallocation(DM da, Mat J)
1637: {
1638:   PetscInt               xs, nx, i, i1, slot, gxs, gnx;
1639:   PetscInt               m, dim, s, *cols = NULL, nc, *rows = NULL, col, cnt, l;
1640:   PetscInt               istart, iend;
1641:   DMBoundaryType         bx;
1642:   ISLocalToGlobalMapping ltog, mltog;

1644:   /*
1645:          nc - number of components per grid point
1646:          col - number of colors needed in one direction for single component problem
1647:   */
1648:   DMDAGetInfo(da, &dim, &m, NULL, NULL, NULL, NULL, NULL, &nc, &s, &bx, NULL, NULL, NULL);
1649:   col = 2 * s + 1;

1651:   DMDAGetCorners(da, &xs, NULL, NULL, &nx, NULL, NULL);
1652:   DMDAGetGhostCorners(da, &gxs, NULL, NULL, &gnx, NULL, NULL);

1654:   MatSetBlockSize(J, nc);
1655:   MatSeqAIJSetTotalPreallocation(J, nx * nc * col * nc);

1657:   DMGetLocalToGlobalMapping(da, &ltog);
1658:   MatGetLocalToGlobalMapping(J, &mltog, NULL);
1659:   if (!mltog) MatSetLocalToGlobalMapping(J, ltog, ltog);

1661:   /*
1662:     For each node in the grid: we get the neighbors in the local (on processor ordering
1663:     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1664:     PETSc ordering.
1665:   */
1666:   if (!da->prealloc_only) {
1667:     PetscMalloc2(nc, &rows, col * nc * nc, &cols);
1668:     for (i = xs; i < xs + nx; i++) {
1669:       istart = PetscMax(-s, gxs - i);
1670:       iend   = PetscMin(s, gxs + gnx - i - 1);
1671:       slot   = i - gxs;

1673:       cnt = 0;
1674:       for (i1 = istart; i1 < iend + 1; i1++) {
1675:         cols[cnt++] = nc * (slot + i1);
1676:         for (l = 1; l < nc; l++) {
1677:           cols[cnt] = 1 + cols[cnt - 1];
1678:           cnt++;
1679:         }
1680:       }
1681:       rows[0] = nc * (slot);
1682:       for (l = 1; l < nc; l++) rows[l] = 1 + rows[l - 1];
1683:       MatSetValuesLocal(J, nc, rows, cnt, cols, NULL, INSERT_VALUES);
1684:     }
1685:     /* do not copy values to GPU since they are all zero and not yet needed there */
1686:     MatBindToCPU(J, PETSC_TRUE);
1687:     MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY);
1688:     MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY);
1689:     if (bx == DM_BOUNDARY_NONE) MatSetOption(J, MAT_SORTED_FULL, PETSC_FALSE);
1690:     MatBindToCPU(J, PETSC_FALSE);
1691:     MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE);
1692:     PetscFree2(rows, cols);
1693:   }
1694:   MatSetOption(J, MAT_SORTED_FULL, PETSC_FALSE);
1695:   return 0;
1696: }

1698: PetscErrorCode DMCreateMatrix_DA_2d_MPIBAIJ(DM da, Mat J)
1699: {
1700:   PetscInt               xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny;
1701:   PetscInt               m, n, dim, s, *cols, nc, col, cnt, *dnz, *onz;
1702:   PetscInt               istart, iend, jstart, jend, ii, jj;
1703:   MPI_Comm               comm;
1704:   PetscScalar           *values;
1705:   DMBoundaryType         bx, by;
1706:   DMDAStencilType        st;
1707:   ISLocalToGlobalMapping ltog;

1709:   /*
1710:      nc - number of components per grid point
1711:      col - number of colors needed in one direction for single component problem
1712:   */
1713:   DMDAGetInfo(da, &dim, &m, &n, NULL, NULL, NULL, NULL, &nc, &s, &bx, &by, NULL, &st);
1714:   col = 2 * s + 1;

1716:   DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL);
1717:   DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL);
1718:   PetscObjectGetComm((PetscObject)da, &comm);

1720:   PetscMalloc1(col * col * nc * nc, &cols);

1722:   DMGetLocalToGlobalMapping(da, &ltog);

1724:   /* determine the matrix preallocation information */
1725:   MatPreallocateBegin(comm, nx * ny, nx * ny, dnz, onz);
1726:   for (i = xs; i < xs + nx; i++) {
1727:     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1728:     iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1729:     for (j = ys; j < ys + ny; j++) {
1730:       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1731:       jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1732:       slot   = i - gxs + gnx * (j - gys);

1734:       /* Find block columns in block row */
1735:       cnt = 0;
1736:       for (ii = istart; ii < iend + 1; ii++) {
1737:         for (jj = jstart; jj < jend + 1; jj++) {
1738:           if (st == DMDA_STENCIL_BOX || !ii || !jj) { /* BOX or on the STAR */
1739:             cols[cnt++] = slot + ii + gnx * jj;
1740:           }
1741:         }
1742:       }
1743:       MatPreallocateSetLocalBlock(ltog, 1, &slot, ltog, cnt, cols, dnz, onz);
1744:     }
1745:   }
1746:   MatSeqBAIJSetPreallocation(J, nc, 0, dnz);
1747:   MatMPIBAIJSetPreallocation(J, nc, 0, dnz, 0, onz);
1748:   MatPreallocateEnd(dnz, onz);

1750:   MatSetLocalToGlobalMapping(J, ltog, ltog);

1752:   /*
1753:     For each node in the grid: we get the neighbors in the local (on processor ordering
1754:     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1755:     PETSc ordering.
1756:   */
1757:   if (!da->prealloc_only) {
1758:     PetscCalloc1(col * col * nc * nc, &values);
1759:     for (i = xs; i < xs + nx; i++) {
1760:       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1761:       iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1762:       for (j = ys; j < ys + ny; j++) {
1763:         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1764:         jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1765:         slot   = i - gxs + gnx * (j - gys);
1766:         cnt    = 0;
1767:         for (ii = istart; ii < iend + 1; ii++) {
1768:           for (jj = jstart; jj < jend + 1; jj++) {
1769:             if (st == DMDA_STENCIL_BOX || !ii || !jj) { /* BOX or on the STAR */
1770:               cols[cnt++] = slot + ii + gnx * jj;
1771:             }
1772:           }
1773:         }
1774:         MatSetValuesBlockedLocal(J, 1, &slot, cnt, cols, values, INSERT_VALUES);
1775:       }
1776:     }
1777:     PetscFree(values);
1778:     /* do not copy values to GPU since they are all zero and not yet needed there */
1779:     MatBindToCPU(J, PETSC_TRUE);
1780:     MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY);
1781:     MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY);
1782:     MatBindToCPU(J, PETSC_FALSE);
1783:     MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE);
1784:   }
1785:   PetscFree(cols);
1786:   return 0;
1787: }

1789: PetscErrorCode DMCreateMatrix_DA_3d_MPIBAIJ(DM da, Mat J)
1790: {
1791:   PetscInt               xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny;
1792:   PetscInt               m, n, dim, s, *cols, k, nc, col, cnt, p, *dnz, *onz;
1793:   PetscInt               istart, iend, jstart, jend, kstart, kend, zs, nz, gzs, gnz, ii, jj, kk;
1794:   MPI_Comm               comm;
1795:   PetscScalar           *values;
1796:   DMBoundaryType         bx, by, bz;
1797:   DMDAStencilType        st;
1798:   ISLocalToGlobalMapping ltog;

1800:   /*
1801:          nc - number of components per grid point
1802:          col - number of colors needed in one direction for single component problem

1804:   */
1805:   DMDAGetInfo(da, &dim, &m, &n, &p, NULL, NULL, NULL, &nc, &s, &bx, &by, &bz, &st);
1806:   col = 2 * s + 1;

1808:   DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz);
1809:   DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz);
1810:   PetscObjectGetComm((PetscObject)da, &comm);

1812:   PetscMalloc1(col * col * col, &cols);

1814:   DMGetLocalToGlobalMapping(da, &ltog);

1816:   /* determine the matrix preallocation information */
1817:   MatPreallocateBegin(comm, nx * ny * nz, nx * ny * nz, dnz, onz);
1818:   for (i = xs; i < xs + nx; i++) {
1819:     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1820:     iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1821:     for (j = ys; j < ys + ny; j++) {
1822:       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1823:       jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1824:       for (k = zs; k < zs + nz; k++) {
1825:         kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
1826:         kend   = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));

1828:         slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);

1830:         /* Find block columns in block row */
1831:         cnt = 0;
1832:         for (ii = istart; ii < iend + 1; ii++) {
1833:           for (jj = jstart; jj < jend + 1; jj++) {
1834:             for (kk = kstart; kk < kend + 1; kk++) {
1835:               if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/
1836:                 cols[cnt++] = slot + ii + gnx * jj + gnx * gny * kk;
1837:               }
1838:             }
1839:           }
1840:         }
1841:         MatPreallocateSetLocalBlock(ltog, 1, &slot, ltog, cnt, cols, dnz, onz);
1842:       }
1843:     }
1844:   }
1845:   MatSeqBAIJSetPreallocation(J, nc, 0, dnz);
1846:   MatMPIBAIJSetPreallocation(J, nc, 0, dnz, 0, onz);
1847:   MatPreallocateEnd(dnz, onz);

1849:   MatSetLocalToGlobalMapping(J, ltog, ltog);

1851:   /*
1852:     For each node in the grid: we get the neighbors in the local (on processor ordering
1853:     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1854:     PETSc ordering.
1855:   */
1856:   if (!da->prealloc_only) {
1857:     PetscCalloc1(col * col * col * nc * nc, &values);
1858:     for (i = xs; i < xs + nx; i++) {
1859:       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1860:       iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1861:       for (j = ys; j < ys + ny; j++) {
1862:         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1863:         jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1864:         for (k = zs; k < zs + nz; k++) {
1865:           kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
1866:           kend   = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));

1868:           slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);

1870:           cnt = 0;
1871:           for (ii = istart; ii < iend + 1; ii++) {
1872:             for (jj = jstart; jj < jend + 1; jj++) {
1873:               for (kk = kstart; kk < kend + 1; kk++) {
1874:                 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/
1875:                   cols[cnt++] = slot + ii + gnx * jj + gnx * gny * kk;
1876:                 }
1877:               }
1878:             }
1879:           }
1880:           MatSetValuesBlockedLocal(J, 1, &slot, cnt, cols, values, INSERT_VALUES);
1881:         }
1882:       }
1883:     }
1884:     PetscFree(values);
1885:     /* do not copy values to GPU since they are all zero and not yet needed there */
1886:     MatBindToCPU(J, PETSC_TRUE);
1887:     MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY);
1888:     MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY);
1889:     MatBindToCPU(J, PETSC_FALSE);
1890:     MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE);
1891:   }
1892:   PetscFree(cols);
1893:   return 0;
1894: }

1896: /*
1897:   This helper is for of SBAIJ preallocation, to discard the lower-triangular values which are difficult to
1898:   identify in the local ordering with periodic domain.
1899: */
1900: static PetscErrorCode L2GFilterUpperTriangular(ISLocalToGlobalMapping ltog, PetscInt *row, PetscInt *cnt, PetscInt col[])
1901: {
1902:   PetscInt i, n;

1904:   ISLocalToGlobalMappingApplyBlock(ltog, 1, row, row);
1905:   ISLocalToGlobalMappingApplyBlock(ltog, *cnt, col, col);
1906:   for (i = 0, n = 0; i < *cnt; i++) {
1907:     if (col[i] >= *row) col[n++] = col[i];
1908:   }
1909:   *cnt = n;
1910:   return 0;
1911: }

1913: PetscErrorCode DMCreateMatrix_DA_2d_MPISBAIJ(DM da, Mat J)
1914: {
1915:   PetscInt               xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny;
1916:   PetscInt               m, n, dim, s, *cols, nc, col, cnt, *dnz, *onz;
1917:   PetscInt               istart, iend, jstart, jend, ii, jj;
1918:   MPI_Comm               comm;
1919:   PetscScalar           *values;
1920:   DMBoundaryType         bx, by;
1921:   DMDAStencilType        st;
1922:   ISLocalToGlobalMapping ltog;

1924:   /*
1925:      nc - number of components per grid point
1926:      col - number of colors needed in one direction for single component problem
1927:   */
1928:   DMDAGetInfo(da, &dim, &m, &n, NULL, NULL, NULL, NULL, &nc, &s, &bx, &by, NULL, &st);
1929:   col = 2 * s + 1;

1931:   DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL);
1932:   DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL);
1933:   PetscObjectGetComm((PetscObject)da, &comm);

1935:   PetscMalloc1(col * col * nc * nc, &cols);

1937:   DMGetLocalToGlobalMapping(da, &ltog);

1939:   /* determine the matrix preallocation information */
1940:   MatPreallocateBegin(comm, nx * ny, nx * ny, dnz, onz);
1941:   for (i = xs; i < xs + nx; i++) {
1942:     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1943:     iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1944:     for (j = ys; j < ys + ny; j++) {
1945:       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1946:       jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1947:       slot   = i - gxs + gnx * (j - gys);

1949:       /* Find block columns in block row */
1950:       cnt = 0;
1951:       for (ii = istart; ii < iend + 1; ii++) {
1952:         for (jj = jstart; jj < jend + 1; jj++) {
1953:           if (st == DMDA_STENCIL_BOX || !ii || !jj) cols[cnt++] = slot + ii + gnx * jj;
1954:         }
1955:       }
1956:       L2GFilterUpperTriangular(ltog, &slot, &cnt, cols);
1957:       MatPreallocateSymmetricSetBlock(slot, cnt, cols, dnz, onz);
1958:     }
1959:   }
1960:   MatSeqSBAIJSetPreallocation(J, nc, 0, dnz);
1961:   MatMPISBAIJSetPreallocation(J, nc, 0, dnz, 0, onz);
1962:   MatPreallocateEnd(dnz, onz);

1964:   MatSetLocalToGlobalMapping(J, ltog, ltog);

1966:   /*
1967:     For each node in the grid: we get the neighbors in the local (on processor ordering
1968:     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1969:     PETSc ordering.
1970:   */
1971:   if (!da->prealloc_only) {
1972:     PetscCalloc1(col * col * nc * nc, &values);
1973:     for (i = xs; i < xs + nx; i++) {
1974:       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1975:       iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1976:       for (j = ys; j < ys + ny; j++) {
1977:         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1978:         jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1979:         slot   = i - gxs + gnx * (j - gys);

1981:         /* Find block columns in block row */
1982:         cnt = 0;
1983:         for (ii = istart; ii < iend + 1; ii++) {
1984:           for (jj = jstart; jj < jend + 1; jj++) {
1985:             if (st == DMDA_STENCIL_BOX || !ii || !jj) cols[cnt++] = slot + ii + gnx * jj;
1986:           }
1987:         }
1988:         L2GFilterUpperTriangular(ltog, &slot, &cnt, cols);
1989:         MatSetValuesBlocked(J, 1, &slot, cnt, cols, values, INSERT_VALUES);
1990:       }
1991:     }
1992:     PetscFree(values);
1993:     /* do not copy values to GPU since they are all zero and not yet needed there */
1994:     MatBindToCPU(J, PETSC_TRUE);
1995:     MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY);
1996:     MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY);
1997:     MatBindToCPU(J, PETSC_FALSE);
1998:     MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE);
1999:   }
2000:   PetscFree(cols);
2001:   return 0;
2002: }

2004: PetscErrorCode DMCreateMatrix_DA_3d_MPISBAIJ(DM da, Mat J)
2005: {
2006:   PetscInt               xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny;
2007:   PetscInt               m, n, dim, s, *cols, k, nc, col, cnt, p, *dnz, *onz;
2008:   PetscInt               istart, iend, jstart, jend, kstart, kend, zs, nz, gzs, gnz, ii, jj, kk;
2009:   MPI_Comm               comm;
2010:   PetscScalar           *values;
2011:   DMBoundaryType         bx, by, bz;
2012:   DMDAStencilType        st;
2013:   ISLocalToGlobalMapping ltog;

2015:   /*
2016:      nc - number of components per grid point
2017:      col - number of colors needed in one direction for single component problem
2018:   */
2019:   DMDAGetInfo(da, &dim, &m, &n, &p, NULL, NULL, NULL, &nc, &s, &bx, &by, &bz, &st);
2020:   col = 2 * s + 1;

2022:   DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz);
2023:   DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz);
2024:   PetscObjectGetComm((PetscObject)da, &comm);

2026:   /* create the matrix */
2027:   PetscMalloc1(col * col * col, &cols);

2029:   DMGetLocalToGlobalMapping(da, &ltog);

2031:   /* determine the matrix preallocation information */
2032:   MatPreallocateBegin(comm, nx * ny * nz, nx * ny * nz, dnz, onz);
2033:   for (i = xs; i < xs + nx; i++) {
2034:     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
2035:     iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
2036:     for (j = ys; j < ys + ny; j++) {
2037:       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
2038:       jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
2039:       for (k = zs; k < zs + nz; k++) {
2040:         kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
2041:         kend   = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));

2043:         slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);

2045:         /* Find block columns in block row */
2046:         cnt = 0;
2047:         for (ii = istart; ii < iend + 1; ii++) {
2048:           for (jj = jstart; jj < jend + 1; jj++) {
2049:             for (kk = kstart; kk < kend + 1; kk++) {
2050:               if ((st == DMDA_STENCIL_BOX) || (!ii && !jj) || (!jj && !kk) || (!ii && !kk)) cols[cnt++] = slot + ii + gnx * jj + gnx * gny * kk;
2051:             }
2052:           }
2053:         }
2054:         L2GFilterUpperTriangular(ltog, &slot, &cnt, cols);
2055:         MatPreallocateSymmetricSetBlock(slot, cnt, cols, dnz, onz);
2056:       }
2057:     }
2058:   }
2059:   MatSeqSBAIJSetPreallocation(J, nc, 0, dnz);
2060:   MatMPISBAIJSetPreallocation(J, nc, 0, dnz, 0, onz);
2061:   MatPreallocateEnd(dnz, onz);

2063:   MatSetLocalToGlobalMapping(J, ltog, ltog);

2065:   /*
2066:     For each node in the grid: we get the neighbors in the local (on processor ordering
2067:     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
2068:     PETSc ordering.
2069:   */
2070:   if (!da->prealloc_only) {
2071:     PetscCalloc1(col * col * col * nc * nc, &values);
2072:     for (i = xs; i < xs + nx; i++) {
2073:       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
2074:       iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
2075:       for (j = ys; j < ys + ny; j++) {
2076:         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
2077:         jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
2078:         for (k = zs; k < zs + nz; k++) {
2079:           kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
2080:           kend   = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));

2082:           slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);

2084:           cnt = 0;
2085:           for (ii = istart; ii < iend + 1; ii++) {
2086:             for (jj = jstart; jj < jend + 1; jj++) {
2087:               for (kk = kstart; kk < kend + 1; kk++) {
2088:                 if ((st == DMDA_STENCIL_BOX) || (!ii && !jj) || (!jj && !kk) || (!ii && !kk)) cols[cnt++] = slot + ii + gnx * jj + gnx * gny * kk;
2089:               }
2090:             }
2091:           }
2092:           L2GFilterUpperTriangular(ltog, &slot, &cnt, cols);
2093:           MatSetValuesBlocked(J, 1, &slot, cnt, cols, values, INSERT_VALUES);
2094:         }
2095:       }
2096:     }
2097:     PetscFree(values);
2098:     /* do not copy values to GPU since they are all zero and not yet needed there */
2099:     MatBindToCPU(J, PETSC_TRUE);
2100:     MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY);
2101:     MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY);
2102:     MatBindToCPU(J, PETSC_FALSE);
2103:     MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE);
2104:   }
2105:   PetscFree(cols);
2106:   return 0;
2107: }

2109: /* ---------------------------------------------------------------------------------*/

2111: PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ_Fill(DM da, Mat J)
2112: {
2113:   PetscInt               xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny;
2114:   PetscInt               m, n, dim, s, *cols, k, nc, row, col, cnt, maxcnt = 0, l, p, *dnz, *onz;
2115:   PetscInt               istart, iend, jstart, jend, kstart, kend, zs, nz, gzs, gnz, ii, jj, kk, M, N, P;
2116:   DM_DA                 *dd = (DM_DA *)da->data;
2117:   PetscInt               ifill_col, *dfill = dd->dfill, *ofill = dd->ofill;
2118:   MPI_Comm               comm;
2119:   PetscScalar           *values;
2120:   DMBoundaryType         bx, by, bz;
2121:   ISLocalToGlobalMapping ltog;
2122:   DMDAStencilType        st;
2123:   PetscBool              removedups = PETSC_FALSE;

2125:   /*
2126:          nc - number of components per grid point
2127:          col - number of colors needed in one direction for single component problem

2129:   */
2130:   DMDAGetInfo(da, &dim, &m, &n, &p, &M, &N, &P, &nc, &s, &bx, &by, &bz, &st);
2131:   col = 2 * s + 1;
2133:                  by 2*stencil_width + 1\n");
2135:                  by 2*stencil_width + 1\n");
2137:                  by 2*stencil_width + 1\n");

2139:   /*
2140:        With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times
2141:        because of "wrapping" around the end of the domain hitting an entry already counted in the other direction.
2142:   */
2143:   if (M == 1 && 2 * s >= m) removedups = PETSC_TRUE;
2144:   if (N == 1 && 2 * s >= n) removedups = PETSC_TRUE;
2145:   if (P == 1 && 2 * s >= p) removedups = PETSC_TRUE;

2147:   DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz);
2148:   DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz);
2149:   PetscObjectGetComm((PetscObject)da, &comm);

2151:   PetscMalloc1(col * col * col * nc, &cols);
2152:   DMGetLocalToGlobalMapping(da, &ltog);

2154:   /* determine the matrix preallocation information */
2155:   MatPreallocateBegin(comm, nc * nx * ny * nz, nc * nx * ny * nz, dnz, onz);

2157:   MatSetBlockSize(J, nc);
2158:   for (i = xs; i < xs + nx; i++) {
2159:     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
2160:     iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
2161:     for (j = ys; j < ys + ny; j++) {
2162:       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
2163:       jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
2164:       for (k = zs; k < zs + nz; k++) {
2165:         kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
2166:         kend   = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));

2168:         slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);

2170:         for (l = 0; l < nc; l++) {
2171:           cnt = 0;
2172:           for (ii = istart; ii < iend + 1; ii++) {
2173:             for (jj = jstart; jj < jend + 1; jj++) {
2174:               for (kk = kstart; kk < kend + 1; kk++) {
2175:                 if (ii || jj || kk) {
2176:                   if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/
2177:                     for (ifill_col = ofill[l]; ifill_col < ofill[l + 1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc * (slot + ii + gnx * jj + gnx * gny * kk);
2178:                   }
2179:                 } else {
2180:                   if (dfill) {
2181:                     for (ifill_col = dfill[l]; ifill_col < dfill[l + 1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc * (slot + ii + gnx * jj + gnx * gny * kk);
2182:                   } else {
2183:                     for (ifill_col = 0; ifill_col < nc; ifill_col++) cols[cnt++] = ifill_col + nc * (slot + ii + gnx * jj + gnx * gny * kk);
2184:                   }
2185:                 }
2186:               }
2187:             }
2188:           }
2189:           row    = l + nc * (slot);
2190:           maxcnt = PetscMax(maxcnt, cnt);
2191:           if (removedups) MatPreallocateSetLocalRemoveDups(ltog, 1, &row, ltog, cnt, cols, dnz, onz);
2192:           else MatPreallocateSetLocal(ltog, 1, &row, ltog, cnt, cols, dnz, onz);
2193:         }
2194:       }
2195:     }
2196:   }
2197:   MatSeqAIJSetPreallocation(J, 0, dnz);
2198:   MatMPIAIJSetPreallocation(J, 0, dnz, 0, onz);
2199:   MatPreallocateEnd(dnz, onz);
2200:   MatSetLocalToGlobalMapping(J, ltog, ltog);

2202:   /*
2203:     For each node in the grid: we get the neighbors in the local (on processor ordering
2204:     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
2205:     PETSc ordering.
2206:   */
2207:   if (!da->prealloc_only) {
2208:     PetscCalloc1(maxcnt, &values);
2209:     for (i = xs; i < xs + nx; i++) {
2210:       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
2211:       iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
2212:       for (j = ys; j < ys + ny; j++) {
2213:         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
2214:         jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
2215:         for (k = zs; k < zs + nz; k++) {
2216:           kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
2217:           kend   = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));

2219:           slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);

2221:           for (l = 0; l < nc; l++) {
2222:             cnt = 0;
2223:             for (ii = istart; ii < iend + 1; ii++) {
2224:               for (jj = jstart; jj < jend + 1; jj++) {
2225:                 for (kk = kstart; kk < kend + 1; kk++) {
2226:                   if (ii || jj || kk) {
2227:                     if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/
2228:                       for (ifill_col = ofill[l]; ifill_col < ofill[l + 1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc * (slot + ii + gnx * jj + gnx * gny * kk);
2229:                     }
2230:                   } else {
2231:                     if (dfill) {
2232:                       for (ifill_col = dfill[l]; ifill_col < dfill[l + 1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc * (slot + ii + gnx * jj + gnx * gny * kk);
2233:                     } else {
2234:                       for (ifill_col = 0; ifill_col < nc; ifill_col++) cols[cnt++] = ifill_col + nc * (slot + ii + gnx * jj + gnx * gny * kk);
2235:                     }
2236:                   }
2237:                 }
2238:               }
2239:             }
2240:             row = l + nc * (slot);
2241:             MatSetValuesLocal(J, 1, &row, cnt, cols, values, INSERT_VALUES);
2242:           }
2243:         }
2244:       }
2245:     }
2246:     PetscFree(values);
2247:     /* do not copy values to GPU since they are all zero and not yet needed there */
2248:     MatBindToCPU(J, PETSC_TRUE);
2249:     MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY);
2250:     MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY);
2251:     MatBindToCPU(J, PETSC_FALSE);
2252:     MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE);
2253:   }
2254:   PetscFree(cols);
2255:   return 0;
2256: }