Actual source code: fdda.c
2: #include <petsc/private/dmdaimpl.h>
3: #include <petscmat.h>
4: #include <petscbt.h>
6: extern PetscErrorCode DMCreateColoring_DA_1d_MPIAIJ(DM, ISColoringType, ISColoring *);
7: extern PetscErrorCode DMCreateColoring_DA_2d_MPIAIJ(DM, ISColoringType, ISColoring *);
8: extern PetscErrorCode DMCreateColoring_DA_2d_5pt_MPIAIJ(DM, ISColoringType, ISColoring *);
9: extern PetscErrorCode DMCreateColoring_DA_3d_MPIAIJ(DM, ISColoringType, ISColoring *);
11: /*
12: For ghost i that may be negative or greater than the upper bound this
13: maps it into the 0:m-1 range using periodicity
14: */
15: #define SetInRange(i, m) ((i < 0) ? m + i : ((i >= m) ? i - m : i))
17: static PetscErrorCode DMDASetBlockFills_Private(const PetscInt *dfill, PetscInt w, PetscInt **rfill)
18: {
19: PetscInt i, j, nz, *fill;
21: if (!dfill) return 0;
23: /* count number nonzeros */
24: nz = 0;
25: for (i = 0; i < w; i++) {
26: for (j = 0; j < w; j++) {
27: if (dfill[w * i + j]) nz++;
28: }
29: }
30: PetscMalloc1(nz + w + 1, &fill);
31: /* construct modified CSR storage of nonzero structure */
32: /* fill[0 -- w] marks starts of each row of column indices (and end of last row)
33: so fill[1] - fill[0] gives number of nonzeros in first row etc */
34: nz = w + 1;
35: for (i = 0; i < w; i++) {
36: fill[i] = nz;
37: for (j = 0; j < w; j++) {
38: if (dfill[w * i + j]) {
39: fill[nz] = j;
40: nz++;
41: }
42: }
43: }
44: fill[w] = nz;
46: *rfill = fill;
47: return 0;
48: }
50: static PetscErrorCode DMDASetBlockFillsSparse_Private(const PetscInt *dfillsparse, PetscInt w, PetscInt **rfill)
51: {
52: PetscInt nz;
54: if (!dfillsparse) return 0;
56: /* Determine number of non-zeros */
57: nz = (dfillsparse[w] - w - 1);
59: /* Allocate space for our copy of the given sparse matrix representation. */
60: PetscMalloc1(nz + w + 1, rfill);
61: PetscArraycpy(*rfill, dfillsparse, nz + w + 1);
62: return 0;
63: }
65: static PetscErrorCode DMDASetBlockFills_Private2(DM_DA *dd)
66: {
67: PetscInt i, k, cnt = 1;
70: /* ofillcount tracks the columns of ofill that have any nonzero in thems; the value in each location is the number of
71: columns to the left with any nonzeros in them plus 1 */
72: PetscCalloc1(dd->w, &dd->ofillcols);
73: for (i = 0; i < dd->w; i++) {
74: for (k = dd->ofill[i]; k < dd->ofill[i + 1]; k++) dd->ofillcols[dd->ofill[k]] = 1;
75: }
76: for (i = 0; i < dd->w; i++) {
77: if (dd->ofillcols[i]) dd->ofillcols[i] = cnt++;
78: }
79: return 0;
80: }
82: /*@
83: DMDASetBlockFills - Sets the fill pattern in each block for a multi-component problem
84: of the matrix returned by DMCreateMatrix().
86: Logically Collective on da
88: Input Parameters:
89: + da - the distributed array
90: . dfill - the fill pattern in the diagonal block (may be NULL, means use dense block)
91: - ofill - the fill pattern in the off-diagonal blocks
93: Level: developer
95: Notes:
96: This only makes sense when you are doing multicomponent problems but using the
97: MPIAIJ matrix format
99: The format for dfill and ofill is a 2 dimensional dof by dof matrix with 1 entries
100: representing coupling and 0 entries for missing coupling. For example
101: $ dfill[9] = {1, 0, 0,
102: $ 1, 1, 0,
103: $ 0, 1, 1}
104: means that row 0 is coupled with only itself in the diagonal block, row 1 is coupled with
105: itself and row 0 (in the diagonal block) and row 2 is coupled with itself and row 1 (in the
106: diagonal block).
108: DMDASetGetMatrix() allows you to provide general code for those more complicated nonzero patterns then
109: can be represented in the dfill, ofill format
111: Contributed by Glenn Hammond
113: .seealso `DMCreateMatrix()`, `DMDASetGetMatrix()`, `DMSetMatrixPreallocateOnly()`
115: @*/
116: PetscErrorCode DMDASetBlockFills(DM da, const PetscInt *dfill, const PetscInt *ofill)
117: {
118: DM_DA *dd = (DM_DA *)da->data;
120: /* save the given dfill and ofill information */
121: DMDASetBlockFills_Private(dfill, dd->w, &dd->dfill);
122: DMDASetBlockFills_Private(ofill, dd->w, &dd->ofill);
124: /* count nonzeros in ofill columns */
125: DMDASetBlockFills_Private2(dd);
126: return 0;
127: }
129: /*@
130: DMDASetBlockFillsSparse - Sets the fill pattern in each block for a multi-component problem
131: of the matrix returned by DMCreateMatrix(), using sparse representations
132: of fill patterns.
134: Logically Collective on da
136: Input Parameters:
137: + da - the distributed array
138: . dfill - the sparse fill pattern in the diagonal block (may be NULL, means use dense block)
139: - ofill - the sparse fill pattern in the off-diagonal blocks
141: Level: developer
143: Notes: This only makes sense when you are doing multicomponent problems but using the
144: MPIAIJ matrix format
146: The format for dfill and ofill is a sparse representation of a
147: dof-by-dof matrix with 1 entries representing coupling and 0 entries
148: for missing coupling. The sparse representation is a 1 dimensional
149: array of length nz + dof + 1, where nz is the number of non-zeros in
150: the matrix. The first dof entries in the array give the
151: starting array indices of each row's items in the rest of the array,
152: the dof+1st item contains the value nz + dof + 1 (i.e. the entire length of the array)
153: and the remaining nz items give the column indices of each of
154: the 1s within the logical 2D matrix. Each row's items within
155: the array are the column indices of the 1s within that row
156: of the 2D matrix. PETSc developers may recognize that this is the
157: same format as that computed by the DMDASetBlockFills_Private()
158: function from a dense 2D matrix representation.
160: DMDASetGetMatrix() allows you to provide general code for those more complicated nonzero patterns then
161: can be represented in the dfill, ofill format
163: Contributed by Philip C. Roth
165: .seealso `DMDASetBlockFills()`, `DMCreateMatrix()`, `DMDASetGetMatrix()`, `DMSetMatrixPreallocateOnly()`
167: @*/
168: PetscErrorCode DMDASetBlockFillsSparse(DM da, const PetscInt *dfillsparse, const PetscInt *ofillsparse)
169: {
170: DM_DA *dd = (DM_DA *)da->data;
172: /* save the given dfill and ofill information */
173: DMDASetBlockFillsSparse_Private(dfillsparse, dd->w, &dd->dfill);
174: DMDASetBlockFillsSparse_Private(ofillsparse, dd->w, &dd->ofill);
176: /* count nonzeros in ofill columns */
177: DMDASetBlockFills_Private2(dd);
178: return 0;
179: }
181: PetscErrorCode DMCreateColoring_DA(DM da, ISColoringType ctype, ISColoring *coloring)
182: {
183: PetscInt dim, m, n, p, nc;
184: DMBoundaryType bx, by, bz;
185: MPI_Comm comm;
186: PetscMPIInt size;
187: PetscBool isBAIJ;
188: DM_DA *dd = (DM_DA *)da->data;
190: /*
191: m
192: ------------------------------------------------------
193: | |
194: | |
195: | ---------------------- |
196: | | | |
197: n | yn | | |
198: | | | |
199: | .--------------------- |
200: | (xs,ys) xn |
201: | . |
202: | (gxs,gys) |
203: | |
204: -----------------------------------------------------
205: */
207: /*
208: nc - number of components per grid point
209: col - number of colors needed in one direction for single component problem
211: */
212: DMDAGetInfo(da, &dim, NULL, NULL, NULL, &m, &n, &p, &nc, NULL, &bx, &by, &bz, NULL);
214: PetscObjectGetComm((PetscObject)da, &comm);
215: MPI_Comm_size(comm, &size);
216: if (ctype == IS_COLORING_LOCAL) {
217: if (size == 1) {
218: ctype = IS_COLORING_GLOBAL;
219: } else if (dim > 1) {
220: if ((m == 1 && bx == DM_BOUNDARY_PERIODIC) || (n == 1 && by == DM_BOUNDARY_PERIODIC) || (p == 1 && bz == DM_BOUNDARY_PERIODIC)) {
221: SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "IS_COLORING_LOCAL cannot be used for periodic boundary condition having both ends of the domain on the same process");
222: }
223: }
224: }
226: /* Tell the DMDA it has 1 degree of freedom per grid point so that the coloring for BAIJ
227: matrices is for the blocks, not the individual matrix elements */
228: PetscStrbeginswith(da->mattype, MATBAIJ, &isBAIJ);
229: if (!isBAIJ) PetscStrbeginswith(da->mattype, MATMPIBAIJ, &isBAIJ);
230: if (!isBAIJ) PetscStrbeginswith(da->mattype, MATSEQBAIJ, &isBAIJ);
231: if (isBAIJ) {
232: dd->w = 1;
233: dd->xs = dd->xs / nc;
234: dd->xe = dd->xe / nc;
235: dd->Xs = dd->Xs / nc;
236: dd->Xe = dd->Xe / nc;
237: }
239: /*
240: We do not provide a getcoloring function in the DMDA operations because
241: the basic DMDA does not know about matrices. We think of DMDA as being
242: more low-level then matrices.
243: */
244: if (dim == 1) DMCreateColoring_DA_1d_MPIAIJ(da, ctype, coloring);
245: else if (dim == 2) DMCreateColoring_DA_2d_MPIAIJ(da, ctype, coloring);
246: else if (dim == 3) DMCreateColoring_DA_3d_MPIAIJ(da, ctype, coloring);
247: else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "Not done for %" PetscInt_FMT " dimension, send us mail petsc-maint@mcs.anl.gov for code", dim);
248: if (isBAIJ) {
249: dd->w = nc;
250: dd->xs = dd->xs * nc;
251: dd->xe = dd->xe * nc;
252: dd->Xs = dd->Xs * nc;
253: dd->Xe = dd->Xe * nc;
254: }
255: return 0;
256: }
258: /* ---------------------------------------------------------------------------------*/
260: PetscErrorCode DMCreateColoring_DA_2d_MPIAIJ(DM da, ISColoringType ctype, ISColoring *coloring)
261: {
262: PetscInt xs, ys, nx, ny, i, j, ii, gxs, gys, gnx, gny, m, n, M, N, dim, s, k, nc, col;
263: PetscInt ncolors;
264: MPI_Comm comm;
265: DMBoundaryType bx, by;
266: DMDAStencilType st;
267: ISColoringValue *colors;
268: DM_DA *dd = (DM_DA *)da->data;
270: /*
271: nc - number of components per grid point
272: col - number of colors needed in one direction for single component problem
274: */
275: DMDAGetInfo(da, &dim, &m, &n, NULL, &M, &N, NULL, &nc, &s, &bx, &by, NULL, &st);
276: col = 2 * s + 1;
277: DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL);
278: DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL);
279: PetscObjectGetComm((PetscObject)da, &comm);
281: /* special case as taught to us by Paul Hovland */
282: if (st == DMDA_STENCIL_STAR && s == 1) {
283: DMCreateColoring_DA_2d_5pt_MPIAIJ(da, ctype, coloring);
284: } else {
285: if (ctype == IS_COLORING_GLOBAL) {
286: if (!dd->localcoloring) {
287: PetscMalloc1(nc * nx * ny, &colors);
288: ii = 0;
289: for (j = ys; j < ys + ny; j++) {
290: for (i = xs; i < xs + nx; i++) {
291: for (k = 0; k < nc; k++) colors[ii++] = k + nc * ((i % col) + col * (j % col));
292: }
293: }
294: ncolors = nc + nc * (col - 1 + col * (col - 1));
295: ISColoringCreate(comm, ncolors, nc * nx * ny, colors, PETSC_OWN_POINTER, &dd->localcoloring);
296: }
297: *coloring = dd->localcoloring;
298: } else if (ctype == IS_COLORING_LOCAL) {
299: if (!dd->ghostedcoloring) {
300: PetscMalloc1(nc * gnx * gny, &colors);
301: ii = 0;
302: for (j = gys; j < gys + gny; j++) {
303: for (i = gxs; i < gxs + gnx; i++) {
304: for (k = 0; k < nc; k++) {
305: /* the complicated stuff is to handle periodic boundaries */
306: colors[ii++] = k + nc * ((SetInRange(i, m) % col) + col * (SetInRange(j, n) % col));
307: }
308: }
309: }
310: ncolors = nc + nc * (col - 1 + col * (col - 1));
311: ISColoringCreate(comm, ncolors, nc * gnx * gny, colors, PETSC_OWN_POINTER, &dd->ghostedcoloring);
312: /* PetscIntView(ncolors,(PetscInt*)colors,0); */
314: ISColoringSetType(dd->ghostedcoloring, IS_COLORING_LOCAL);
315: }
316: *coloring = dd->ghostedcoloring;
317: } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_ARG_WRONG, "Unknown ISColoringType %d", (int)ctype);
318: }
319: ISColoringReference(*coloring);
320: return 0;
321: }
323: /* ---------------------------------------------------------------------------------*/
325: PetscErrorCode DMCreateColoring_DA_3d_MPIAIJ(DM da, ISColoringType ctype, ISColoring *coloring)
326: {
327: PetscInt xs, ys, nx, ny, i, j, gxs, gys, gnx, gny, m, n, p, dim, s, k, nc, col, zs, gzs, ii, l, nz, gnz, M, N, P;
328: PetscInt ncolors;
329: MPI_Comm comm;
330: DMBoundaryType bx, by, bz;
331: DMDAStencilType st;
332: ISColoringValue *colors;
333: DM_DA *dd = (DM_DA *)da->data;
335: /*
336: nc - number of components per grid point
337: col - number of colors needed in one direction for single component problem
339: */
340: DMDAGetInfo(da, &dim, &m, &n, &p, &M, &N, &P, &nc, &s, &bx, &by, &bz, &st);
341: col = 2 * s + 1;
342: DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz);
343: DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz);
344: PetscObjectGetComm((PetscObject)da, &comm);
346: /* create the coloring */
347: if (ctype == IS_COLORING_GLOBAL) {
348: if (!dd->localcoloring) {
349: PetscMalloc1(nc * nx * ny * nz, &colors);
350: ii = 0;
351: for (k = zs; k < zs + nz; k++) {
352: for (j = ys; j < ys + ny; j++) {
353: for (i = xs; i < xs + nx; i++) {
354: for (l = 0; l < nc; l++) colors[ii++] = l + nc * ((i % col) + col * (j % col) + col * col * (k % col));
355: }
356: }
357: }
358: ncolors = nc + nc * (col - 1 + col * (col - 1) + col * col * (col - 1));
359: ISColoringCreate(comm, ncolors, nc * nx * ny * nz, colors, PETSC_OWN_POINTER, &dd->localcoloring);
360: }
361: *coloring = dd->localcoloring;
362: } else if (ctype == IS_COLORING_LOCAL) {
363: if (!dd->ghostedcoloring) {
364: PetscMalloc1(nc * gnx * gny * gnz, &colors);
365: ii = 0;
366: for (k = gzs; k < gzs + gnz; k++) {
367: for (j = gys; j < gys + gny; j++) {
368: for (i = gxs; i < gxs + gnx; i++) {
369: for (l = 0; l < nc; l++) {
370: /* the complicated stuff is to handle periodic boundaries */
371: colors[ii++] = l + nc * ((SetInRange(i, m) % col) + col * (SetInRange(j, n) % col) + col * col * (SetInRange(k, p) % col));
372: }
373: }
374: }
375: }
376: ncolors = nc + nc * (col - 1 + col * (col - 1) + col * col * (col - 1));
377: ISColoringCreate(comm, ncolors, nc * gnx * gny * gnz, colors, PETSC_OWN_POINTER, &dd->ghostedcoloring);
378: ISColoringSetType(dd->ghostedcoloring, IS_COLORING_LOCAL);
379: }
380: *coloring = dd->ghostedcoloring;
381: } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_ARG_WRONG, "Unknown ISColoringType %d", (int)ctype);
382: ISColoringReference(*coloring);
383: return 0;
384: }
386: /* ---------------------------------------------------------------------------------*/
388: PetscErrorCode DMCreateColoring_DA_1d_MPIAIJ(DM da, ISColoringType ctype, ISColoring *coloring)
389: {
390: PetscInt xs, nx, i, i1, gxs, gnx, l, m, M, dim, s, nc, col;
391: PetscInt ncolors;
392: MPI_Comm comm;
393: DMBoundaryType bx;
394: ISColoringValue *colors;
395: DM_DA *dd = (DM_DA *)da->data;
397: /*
398: nc - number of components per grid point
399: col - number of colors needed in one direction for single component problem
401: */
402: DMDAGetInfo(da, &dim, &m, NULL, NULL, &M, NULL, NULL, &nc, &s, &bx, NULL, NULL, NULL);
403: col = 2 * s + 1;
404: DMDAGetCorners(da, &xs, NULL, NULL, &nx, NULL, NULL);
405: DMDAGetGhostCorners(da, &gxs, NULL, NULL, &gnx, NULL, NULL);
406: PetscObjectGetComm((PetscObject)da, &comm);
408: /* create the coloring */
409: if (ctype == IS_COLORING_GLOBAL) {
410: if (!dd->localcoloring) {
411: PetscMalloc1(nc * nx, &colors);
412: if (dd->ofillcols) {
413: PetscInt tc = 0;
414: for (i = 0; i < nc; i++) tc += (PetscInt)(dd->ofillcols[i] > 0);
415: i1 = 0;
416: for (i = xs; i < xs + nx; i++) {
417: for (l = 0; l < nc; l++) {
418: if (dd->ofillcols[l] && (i % col)) {
419: colors[i1++] = nc - 1 + tc * ((i % col) - 1) + dd->ofillcols[l];
420: } else {
421: colors[i1++] = l;
422: }
423: }
424: }
425: ncolors = nc + 2 * s * tc;
426: } else {
427: i1 = 0;
428: for (i = xs; i < xs + nx; i++) {
429: for (l = 0; l < nc; l++) colors[i1++] = l + nc * (i % col);
430: }
431: ncolors = nc + nc * (col - 1);
432: }
433: ISColoringCreate(comm, ncolors, nc * nx, colors, PETSC_OWN_POINTER, &dd->localcoloring);
434: }
435: *coloring = dd->localcoloring;
436: } else if (ctype == IS_COLORING_LOCAL) {
437: if (!dd->ghostedcoloring) {
438: PetscMalloc1(nc * gnx, &colors);
439: i1 = 0;
440: for (i = gxs; i < gxs + gnx; i++) {
441: for (l = 0; l < nc; l++) {
442: /* the complicated stuff is to handle periodic boundaries */
443: colors[i1++] = l + nc * (SetInRange(i, m) % col);
444: }
445: }
446: ncolors = nc + nc * (col - 1);
447: ISColoringCreate(comm, ncolors, nc * gnx, colors, PETSC_OWN_POINTER, &dd->ghostedcoloring);
448: ISColoringSetType(dd->ghostedcoloring, IS_COLORING_LOCAL);
449: }
450: *coloring = dd->ghostedcoloring;
451: } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_ARG_WRONG, "Unknown ISColoringType %d", (int)ctype);
452: ISColoringReference(*coloring);
453: return 0;
454: }
456: PetscErrorCode DMCreateColoring_DA_2d_5pt_MPIAIJ(DM da, ISColoringType ctype, ISColoring *coloring)
457: {
458: PetscInt xs, ys, nx, ny, i, j, ii, gxs, gys, gnx, gny, m, n, dim, s, k, nc;
459: PetscInt ncolors;
460: MPI_Comm comm;
461: DMBoundaryType bx, by;
462: ISColoringValue *colors;
463: DM_DA *dd = (DM_DA *)da->data;
465: /*
466: nc - number of components per grid point
467: col - number of colors needed in one direction for single component problem
469: */
470: DMDAGetInfo(da, &dim, &m, &n, NULL, NULL, NULL, NULL, &nc, &s, &bx, &by, NULL, NULL);
471: DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL);
472: DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL);
473: PetscObjectGetComm((PetscObject)da, &comm);
474: /* create the coloring */
475: if (ctype == IS_COLORING_GLOBAL) {
476: if (!dd->localcoloring) {
477: PetscMalloc1(nc * nx * ny, &colors);
478: ii = 0;
479: for (j = ys; j < ys + ny; j++) {
480: for (i = xs; i < xs + nx; i++) {
481: for (k = 0; k < nc; k++) colors[ii++] = k + nc * ((3 * j + i) % 5);
482: }
483: }
484: ncolors = 5 * nc;
485: ISColoringCreate(comm, ncolors, nc * nx * ny, colors, PETSC_OWN_POINTER, &dd->localcoloring);
486: }
487: *coloring = dd->localcoloring;
488: } else if (ctype == IS_COLORING_LOCAL) {
489: if (!dd->ghostedcoloring) {
490: PetscMalloc1(nc * gnx * gny, &colors);
491: ii = 0;
492: for (j = gys; j < gys + gny; j++) {
493: for (i = gxs; i < gxs + gnx; i++) {
494: for (k = 0; k < nc; k++) colors[ii++] = k + nc * ((3 * SetInRange(j, n) + SetInRange(i, m)) % 5);
495: }
496: }
497: ncolors = 5 * nc;
498: ISColoringCreate(comm, ncolors, nc * gnx * gny, colors, PETSC_OWN_POINTER, &dd->ghostedcoloring);
499: ISColoringSetType(dd->ghostedcoloring, IS_COLORING_LOCAL);
500: }
501: *coloring = dd->ghostedcoloring;
502: } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_ARG_WRONG, "Unknown ISColoringType %d", (int)ctype);
503: return 0;
504: }
506: /* =========================================================================== */
507: extern PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ(DM, Mat);
508: extern PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ_Fill(DM, Mat);
509: extern PetscErrorCode DMCreateMatrix_DA_1d_SeqAIJ_NoPreallocation(DM, Mat);
510: extern PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ(DM, Mat);
511: extern PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ_Fill(DM, Mat);
512: extern PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ(DM, Mat);
513: extern PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ_Fill(DM, Mat);
514: extern PetscErrorCode DMCreateMatrix_DA_2d_MPIBAIJ(DM, Mat);
515: extern PetscErrorCode DMCreateMatrix_DA_3d_MPIBAIJ(DM, Mat);
516: extern PetscErrorCode DMCreateMatrix_DA_2d_MPISBAIJ(DM, Mat);
517: extern PetscErrorCode DMCreateMatrix_DA_3d_MPISBAIJ(DM, Mat);
518: extern PetscErrorCode DMCreateMatrix_DA_2d_MPISELL(DM, Mat);
519: extern PetscErrorCode DMCreateMatrix_DA_3d_MPISELL(DM, Mat);
520: extern PetscErrorCode DMCreateMatrix_DA_IS(DM, Mat);
522: /*@C
523: MatSetupDM - Sets the DMDA that is to be used by the HYPRE_StructMatrix PETSc matrix
525: Logically Collective on mat
527: Input Parameters:
528: + mat - the matrix
529: - da - the da
531: Level: intermediate
533: @*/
534: PetscErrorCode MatSetupDM(Mat mat, DM da)
535: {
538: PetscTryMethod(mat, "MatSetupDM_C", (Mat, DM), (mat, da));
539: return 0;
540: }
542: PetscErrorCode MatView_MPI_DA(Mat A, PetscViewer viewer)
543: {
544: DM da;
545: const char *prefix;
546: Mat Anatural;
547: AO ao;
548: PetscInt rstart, rend, *petsc, i;
549: IS is;
550: MPI_Comm comm;
551: PetscViewerFormat format;
553: /* Check whether we are just printing info, in which case MatView() already viewed everything we wanted to view */
554: PetscViewerGetFormat(viewer, &format);
555: if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) return 0;
557: PetscObjectGetComm((PetscObject)A, &comm);
558: MatGetDM(A, &da);
561: DMDAGetAO(da, &ao);
562: MatGetOwnershipRange(A, &rstart, &rend);
563: PetscMalloc1(rend - rstart, &petsc);
564: for (i = rstart; i < rend; i++) petsc[i - rstart] = i;
565: AOApplicationToPetsc(ao, rend - rstart, petsc);
566: ISCreateGeneral(comm, rend - rstart, petsc, PETSC_OWN_POINTER, &is);
568: /* call viewer on natural ordering */
569: MatCreateSubMatrix(A, is, is, MAT_INITIAL_MATRIX, &Anatural);
570: ISDestroy(&is);
571: PetscObjectGetOptionsPrefix((PetscObject)A, &prefix);
572: PetscObjectSetOptionsPrefix((PetscObject)Anatural, prefix);
573: PetscObjectSetName((PetscObject)Anatural, ((PetscObject)A)->name);
574: ((PetscObject)Anatural)->donotPetscObjectPrintClassNamePrefixType = PETSC_TRUE;
575: MatView(Anatural, viewer);
576: ((PetscObject)Anatural)->donotPetscObjectPrintClassNamePrefixType = PETSC_FALSE;
577: MatDestroy(&Anatural);
578: return 0;
579: }
581: PetscErrorCode MatLoad_MPI_DA(Mat A, PetscViewer viewer)
582: {
583: DM da;
584: Mat Anatural, Aapp;
585: AO ao;
586: PetscInt rstart, rend, *app, i, m, n, M, N;
587: IS is;
588: MPI_Comm comm;
590: PetscObjectGetComm((PetscObject)A, &comm);
591: MatGetDM(A, &da);
594: /* Load the matrix in natural ordering */
595: MatCreate(PetscObjectComm((PetscObject)A), &Anatural);
596: MatSetType(Anatural, ((PetscObject)A)->type_name);
597: MatGetSize(A, &M, &N);
598: MatGetLocalSize(A, &m, &n);
599: MatSetSizes(Anatural, m, n, M, N);
600: MatLoad(Anatural, viewer);
602: /* Map natural ordering to application ordering and create IS */
603: DMDAGetAO(da, &ao);
604: MatGetOwnershipRange(Anatural, &rstart, &rend);
605: PetscMalloc1(rend - rstart, &app);
606: for (i = rstart; i < rend; i++) app[i - rstart] = i;
607: AOPetscToApplication(ao, rend - rstart, app);
608: ISCreateGeneral(comm, rend - rstart, app, PETSC_OWN_POINTER, &is);
610: /* Do permutation and replace header */
611: MatCreateSubMatrix(Anatural, is, is, MAT_INITIAL_MATRIX, &Aapp);
612: MatHeaderReplace(A, &Aapp);
613: ISDestroy(&is);
614: MatDestroy(&Anatural);
615: return 0;
616: }
618: PetscErrorCode DMCreateMatrix_DA(DM da, Mat *J)
619: {
620: PetscInt dim, dof, nx, ny, nz, dims[3], starts[3], M, N, P;
621: Mat A;
622: MPI_Comm comm;
623: MatType Atype;
624: void (*aij)(void) = NULL, (*baij)(void) = NULL, (*sbaij)(void) = NULL, (*sell)(void) = NULL, (*is)(void) = NULL;
625: MatType mtype;
626: PetscMPIInt size;
627: DM_DA *dd = (DM_DA *)da->data;
629: MatInitializePackage();
630: mtype = da->mattype;
632: /*
633: m
634: ------------------------------------------------------
635: | |
636: | |
637: | ---------------------- |
638: | | | |
639: n | ny | | |
640: | | | |
641: | .--------------------- |
642: | (xs,ys) nx |
643: | . |
644: | (gxs,gys) |
645: | |
646: -----------------------------------------------------
647: */
649: /*
650: nc - number of components per grid point
651: col - number of colors needed in one direction for single component problem
653: */
654: M = dd->M;
655: N = dd->N;
656: P = dd->P;
657: dim = da->dim;
658: dof = dd->w;
659: /* DMDAGetInfo(da,&dim,&M,&N,&P,NULL,NULL,NULL,&dof,NULL,NULL,NULL,NULL,NULL); */
660: DMDAGetCorners(da, NULL, NULL, NULL, &nx, &ny, &nz);
661: PetscObjectGetComm((PetscObject)da, &comm);
662: MatCreate(comm, &A);
663: MatSetSizes(A, dof * nx * ny * nz, dof * nx * ny * nz, dof * M * N * P, dof * M * N * P);
664: MatSetType(A, mtype);
665: MatSetFromOptions(A);
666: if (dof * nx * ny * nz < da->bind_below) {
667: MatSetBindingPropagates(A, PETSC_TRUE);
668: MatBindToCPU(A, PETSC_TRUE);
669: }
670: MatSetDM(A, da);
671: if (da->structure_only) MatSetOption(A, MAT_STRUCTURE_ONLY, PETSC_TRUE);
672: MatGetType(A, &Atype);
673: /*
674: We do not provide a getmatrix function in the DMDA operations because
675: the basic DMDA does not know about matrices. We think of DMDA as being more
676: more low-level than matrices. This is kind of cheating but, cause sometimes
677: we think of DMDA has higher level than matrices.
679: We could switch based on Atype (or mtype), but we do not since the
680: specialized setting routines depend only on the particular preallocation
681: details of the matrix, not the type itself.
682: */
683: PetscObjectQueryFunction((PetscObject)A, "MatMPIAIJSetPreallocation_C", &aij);
684: if (!aij) PetscObjectQueryFunction((PetscObject)A, "MatSeqAIJSetPreallocation_C", &aij);
685: if (!aij) {
686: PetscObjectQueryFunction((PetscObject)A, "MatMPIBAIJSetPreallocation_C", &baij);
687: if (!baij) PetscObjectQueryFunction((PetscObject)A, "MatSeqBAIJSetPreallocation_C", &baij);
688: if (!baij) {
689: PetscObjectQueryFunction((PetscObject)A, "MatMPISBAIJSetPreallocation_C", &sbaij);
690: if (!sbaij) PetscObjectQueryFunction((PetscObject)A, "MatSeqSBAIJSetPreallocation_C", &sbaij);
691: if (!sbaij) {
692: PetscObjectQueryFunction((PetscObject)A, "MatMPISELLSetPreallocation_C", &sell);
693: if (!sell) PetscObjectQueryFunction((PetscObject)A, "MatSeqSELLSetPreallocation_C", &sell);
694: }
695: if (!sell) PetscObjectQueryFunction((PetscObject)A, "MatISSetPreallocation_C", &is);
696: }
697: }
698: if (aij) {
699: if (dim == 1) {
700: if (dd->ofill) {
701: DMCreateMatrix_DA_1d_MPIAIJ_Fill(da, A);
702: } else {
703: DMBoundaryType bx;
704: PetscMPIInt size;
705: DMDAGetInfo(da, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, &bx, NULL, NULL, NULL);
706: MPI_Comm_size(PetscObjectComm((PetscObject)da), &size);
707: if (size == 1 && bx == DM_BOUNDARY_NONE) {
708: DMCreateMatrix_DA_1d_SeqAIJ_NoPreallocation(da, A);
709: } else {
710: DMCreateMatrix_DA_1d_MPIAIJ(da, A);
711: }
712: }
713: } else if (dim == 2) {
714: if (dd->ofill) {
715: DMCreateMatrix_DA_2d_MPIAIJ_Fill(da, A);
716: } else {
717: DMCreateMatrix_DA_2d_MPIAIJ(da, A);
718: }
719: } else if (dim == 3) {
720: if (dd->ofill) {
721: DMCreateMatrix_DA_3d_MPIAIJ_Fill(da, A);
722: } else {
723: DMCreateMatrix_DA_3d_MPIAIJ(da, A);
724: }
725: }
726: } else if (baij) {
727: if (dim == 2) {
728: DMCreateMatrix_DA_2d_MPIBAIJ(da, A);
729: } else if (dim == 3) {
730: DMCreateMatrix_DA_3d_MPIBAIJ(da, A);
731: } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "Not implemented for %" PetscInt_FMT " dimension and Matrix Type: %s in %" PetscInt_FMT " dimension! Send mail to petsc-maint@mcs.anl.gov for code", dim, Atype, dim);
732: } else if (sbaij) {
733: if (dim == 2) {
734: DMCreateMatrix_DA_2d_MPISBAIJ(da, A);
735: } else if (dim == 3) {
736: DMCreateMatrix_DA_3d_MPISBAIJ(da, A);
737: } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "Not implemented for %" PetscInt_FMT " dimension and Matrix Type: %s in %" PetscInt_FMT " dimension! Send mail to petsc-maint@mcs.anl.gov for code", dim, Atype, dim);
738: } else if (sell) {
739: if (dim == 2) {
740: DMCreateMatrix_DA_2d_MPISELL(da, A);
741: } else if (dim == 3) {
742: DMCreateMatrix_DA_3d_MPISELL(da, A);
743: } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "Not implemented for %" PetscInt_FMT " dimension and Matrix Type: %s in %" PetscInt_FMT " dimension! Send mail to petsc-maint@mcs.anl.gov for code", dim, Atype, dim);
744: } else if (is) {
745: DMCreateMatrix_DA_IS(da, A);
746: } else {
747: ISLocalToGlobalMapping ltog;
749: MatSetBlockSize(A, dof);
750: MatSetUp(A);
751: DMGetLocalToGlobalMapping(da, <og);
752: MatSetLocalToGlobalMapping(A, ltog, ltog);
753: }
754: DMDAGetGhostCorners(da, &starts[0], &starts[1], &starts[2], &dims[0], &dims[1], &dims[2]);
755: MatSetStencil(A, dim, dims, starts, dof);
756: MatSetDM(A, da);
757: MPI_Comm_size(comm, &size);
758: if (size > 1) {
759: /* change viewer to display matrix in natural ordering */
760: MatSetOperation(A, MATOP_VIEW, (void (*)(void))MatView_MPI_DA);
761: MatSetOperation(A, MATOP_LOAD, (void (*)(void))MatLoad_MPI_DA);
762: }
763: *J = A;
764: return 0;
765: }
767: /* ---------------------------------------------------------------------------------*/
768: PETSC_EXTERN PetscErrorCode MatISSetPreallocation_IS(Mat, PetscInt, const PetscInt[], PetscInt, const PetscInt[]);
770: PetscErrorCode DMCreateMatrix_DA_IS(DM dm, Mat J)
771: {
772: DM_DA *da = (DM_DA *)dm->data;
773: Mat lJ, P;
774: ISLocalToGlobalMapping ltog;
775: IS is;
776: PetscBT bt;
777: const PetscInt *e_loc, *idx;
778: PetscInt i, nel, nen, nv, dof, *gidx, n, N;
780: /* The l2g map of DMDA has all ghosted nodes, and e_loc is a subset of all the local nodes (including the ghosted)
781: We need to filter out the local indices that are not represented through the DMDAGetElements decomposition */
782: dof = da->w;
783: MatSetBlockSize(J, dof);
784: DMGetLocalToGlobalMapping(dm, <og);
786: /* flag local elements indices in local DMDA numbering */
787: ISLocalToGlobalMappingGetSize(ltog, &nv);
788: PetscBTCreate(nv / dof, &bt);
789: DMDAGetElements(dm, &nel, &nen, &e_loc); /* this will throw an error if the stencil type is not DMDA_STENCIL_BOX */
790: for (i = 0; i < nel * nen; i++) PetscBTSet(bt, e_loc[i]);
792: /* filter out (set to -1) the global indices not used by the local elements */
793: PetscMalloc1(nv / dof, &gidx);
794: ISLocalToGlobalMappingGetBlockIndices(ltog, &idx);
795: PetscArraycpy(gidx, idx, nv / dof);
796: ISLocalToGlobalMappingRestoreBlockIndices(ltog, &idx);
797: for (i = 0; i < nv / dof; i++)
798: if (!PetscBTLookup(bt, i)) gidx[i] = -1;
799: PetscBTDestroy(&bt);
800: ISCreateBlock(PetscObjectComm((PetscObject)dm), dof, nv / dof, gidx, PETSC_OWN_POINTER, &is);
801: ISLocalToGlobalMappingCreateIS(is, <og);
802: MatSetLocalToGlobalMapping(J, ltog, ltog);
803: ISLocalToGlobalMappingDestroy(<og);
804: ISDestroy(&is);
806: /* Preallocation */
807: if (dm->prealloc_skip) {
808: MatSetUp(J);
809: } else {
810: MatISGetLocalMat(J, &lJ);
811: MatGetLocalToGlobalMapping(lJ, <og, NULL);
812: MatCreate(PetscObjectComm((PetscObject)lJ), &P);
813: MatSetType(P, MATPREALLOCATOR);
814: MatSetLocalToGlobalMapping(P, ltog, ltog);
815: MatGetSize(lJ, &N, NULL);
816: MatGetLocalSize(lJ, &n, NULL);
817: MatSetSizes(P, n, n, N, N);
818: MatSetBlockSize(P, dof);
819: MatSetUp(P);
820: for (i = 0; i < nel; i++) MatSetValuesBlockedLocal(P, nen, e_loc + i * nen, nen, e_loc + i * nen, NULL, INSERT_VALUES);
821: MatPreallocatorPreallocate(P, (PetscBool)!da->prealloc_only, lJ);
822: MatISRestoreLocalMat(J, &lJ);
823: DMDARestoreElements(dm, &nel, &nen, &e_loc);
824: MatDestroy(&P);
826: MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY);
827: MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY);
828: }
829: return 0;
830: }
832: PetscErrorCode DMCreateMatrix_DA_2d_MPISELL(DM da, Mat J)
833: {
834: PetscInt xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny, m, n, dim, s, *cols = NULL, k, nc, *rows = NULL, col, cnt, l, p;
835: PetscInt lstart, lend, pstart, pend, *dnz, *onz;
836: MPI_Comm comm;
837: PetscScalar *values;
838: DMBoundaryType bx, by;
839: ISLocalToGlobalMapping ltog;
840: DMDAStencilType st;
842: /*
843: nc - number of components per grid point
844: col - number of colors needed in one direction for single component problem
846: */
847: DMDAGetInfo(da, &dim, &m, &n, NULL, NULL, NULL, NULL, &nc, &s, &bx, &by, NULL, &st);
848: col = 2 * s + 1;
849: DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL);
850: DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL);
851: PetscObjectGetComm((PetscObject)da, &comm);
853: PetscMalloc2(nc, &rows, col * col * nc * nc, &cols);
854: DMGetLocalToGlobalMapping(da, <og);
856: MatSetBlockSize(J, nc);
857: /* determine the matrix preallocation information */
858: MatPreallocateBegin(comm, nc * nx * ny, nc * nx * ny, dnz, onz);
859: for (i = xs; i < xs + nx; i++) {
860: pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
861: pend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
863: for (j = ys; j < ys + ny; j++) {
864: slot = i - gxs + gnx * (j - gys);
866: lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
867: lend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
869: cnt = 0;
870: for (k = 0; k < nc; k++) {
871: for (l = lstart; l < lend + 1; l++) {
872: for (p = pstart; p < pend + 1; p++) {
873: if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star have either l = 0 or p = 0 */
874: cols[cnt++] = k + nc * (slot + gnx * l + p);
875: }
876: }
877: }
878: rows[k] = k + nc * (slot);
879: }
880: MatPreallocateSetLocal(ltog, nc, rows, ltog, cnt, cols, dnz, onz);
881: }
882: }
883: MatSetBlockSize(J, nc);
884: MatSeqSELLSetPreallocation(J, 0, dnz);
885: MatMPISELLSetPreallocation(J, 0, dnz, 0, onz);
886: MatPreallocateEnd(dnz, onz);
888: MatSetLocalToGlobalMapping(J, ltog, ltog);
890: /*
891: For each node in the grid: we get the neighbors in the local (on processor ordering
892: that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
893: PETSc ordering.
894: */
895: if (!da->prealloc_only) {
896: PetscCalloc1(col * col * nc * nc, &values);
897: for (i = xs; i < xs + nx; i++) {
898: pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
899: pend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
901: for (j = ys; j < ys + ny; j++) {
902: slot = i - gxs + gnx * (j - gys);
904: lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
905: lend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
907: cnt = 0;
908: for (k = 0; k < nc; k++) {
909: for (l = lstart; l < lend + 1; l++) {
910: for (p = pstart; p < pend + 1; p++) {
911: if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star have either l = 0 or p = 0 */
912: cols[cnt++] = k + nc * (slot + gnx * l + p);
913: }
914: }
915: }
916: rows[k] = k + nc * (slot);
917: }
918: MatSetValuesLocal(J, nc, rows, cnt, cols, values, INSERT_VALUES);
919: }
920: }
921: PetscFree(values);
922: /* do not copy values to GPU since they are all zero and not yet needed there */
923: MatBindToCPU(J, PETSC_TRUE);
924: MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY);
925: MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY);
926: MatBindToCPU(J, PETSC_FALSE);
927: MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE);
928: }
929: PetscFree2(rows, cols);
930: return 0;
931: }
933: PetscErrorCode DMCreateMatrix_DA_3d_MPISELL(DM da, Mat J)
934: {
935: PetscInt xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny;
936: PetscInt m, n, dim, s, *cols = NULL, k, nc, *rows = NULL, col, cnt, l, p, *dnz = NULL, *onz = NULL;
937: PetscInt istart, iend, jstart, jend, kstart, kend, zs, nz, gzs, gnz, ii, jj, kk, M, N, P;
938: MPI_Comm comm;
939: PetscScalar *values;
940: DMBoundaryType bx, by, bz;
941: ISLocalToGlobalMapping ltog;
942: DMDAStencilType st;
944: /*
945: nc - number of components per grid point
946: col - number of colors needed in one direction for single component problem
948: */
949: DMDAGetInfo(da, &dim, &m, &n, &p, &M, &N, &P, &nc, &s, &bx, &by, &bz, &st);
950: col = 2 * s + 1;
951: DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz);
952: DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz);
953: PetscObjectGetComm((PetscObject)da, &comm);
955: PetscMalloc2(nc, &rows, col * col * col * nc * nc, &cols);
956: DMGetLocalToGlobalMapping(da, <og);
958: MatSetBlockSize(J, nc);
959: /* determine the matrix preallocation information */
960: MatPreallocateBegin(comm, nc * nx * ny * nz, nc * nx * ny * nz, dnz, onz);
961: for (i = xs; i < xs + nx; i++) {
962: istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
963: iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
964: for (j = ys; j < ys + ny; j++) {
965: jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
966: jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
967: for (k = zs; k < zs + nz; k++) {
968: kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
969: kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));
971: slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);
973: cnt = 0;
974: for (l = 0; l < nc; l++) {
975: for (ii = istart; ii < iend + 1; ii++) {
976: for (jj = jstart; jj < jend + 1; jj++) {
977: for (kk = kstart; kk < kend + 1; kk++) {
978: if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/
979: cols[cnt++] = l + nc * (slot + ii + gnx * jj + gnx * gny * kk);
980: }
981: }
982: }
983: }
984: rows[l] = l + nc * (slot);
985: }
986: MatPreallocateSetLocal(ltog, nc, rows, ltog, cnt, cols, dnz, onz);
987: }
988: }
989: }
990: MatSetBlockSize(J, nc);
991: MatSeqSELLSetPreallocation(J, 0, dnz);
992: MatMPISELLSetPreallocation(J, 0, dnz, 0, onz);
993: MatPreallocateEnd(dnz, onz);
994: MatSetLocalToGlobalMapping(J, ltog, ltog);
996: /*
997: For each node in the grid: we get the neighbors in the local (on processor ordering
998: that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
999: PETSc ordering.
1000: */
1001: if (!da->prealloc_only) {
1002: PetscCalloc1(col * col * col * nc * nc * nc, &values);
1003: for (i = xs; i < xs + nx; i++) {
1004: istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1005: iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1006: for (j = ys; j < ys + ny; j++) {
1007: jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1008: jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1009: for (k = zs; k < zs + nz; k++) {
1010: kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
1011: kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));
1013: slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);
1015: cnt = 0;
1016: for (l = 0; l < nc; l++) {
1017: for (ii = istart; ii < iend + 1; ii++) {
1018: for (jj = jstart; jj < jend + 1; jj++) {
1019: for (kk = kstart; kk < kend + 1; kk++) {
1020: if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/
1021: cols[cnt++] = l + nc * (slot + ii + gnx * jj + gnx * gny * kk);
1022: }
1023: }
1024: }
1025: }
1026: rows[l] = l + nc * (slot);
1027: }
1028: MatSetValuesLocal(J, nc, rows, cnt, cols, values, INSERT_VALUES);
1029: }
1030: }
1031: }
1032: PetscFree(values);
1033: /* do not copy values to GPU since they are all zero and not yet needed there */
1034: MatBindToCPU(J, PETSC_TRUE);
1035: MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY);
1036: MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY);
1037: MatBindToCPU(J, PETSC_FALSE);
1038: MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE);
1039: }
1040: PetscFree2(rows, cols);
1041: return 0;
1042: }
1044: PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ(DM da, Mat J)
1045: {
1046: PetscInt xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny, m, n, dim, s, *cols = NULL, k, nc, *rows = NULL, col, cnt, l, p, M, N;
1047: PetscInt lstart, lend, pstart, pend, *dnz, *onz;
1048: MPI_Comm comm;
1049: DMBoundaryType bx, by;
1050: ISLocalToGlobalMapping ltog, mltog;
1051: DMDAStencilType st;
1052: PetscBool removedups = PETSC_FALSE, alreadyboundtocpu = PETSC_TRUE;
1054: /*
1055: nc - number of components per grid point
1056: col - number of colors needed in one direction for single component problem
1058: */
1059: DMDAGetInfo(da, &dim, &m, &n, NULL, &M, &N, NULL, &nc, &s, &bx, &by, NULL, &st);
1060: if (bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE) MatSetOption(J, MAT_SORTED_FULL, PETSC_TRUE);
1061: col = 2 * s + 1;
1062: /*
1063: With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times
1064: because of "wrapping" around the end of the domain hitting an entry already counted in the other direction.
1065: */
1066: if (M == 1 && 2 * s >= m) removedups = PETSC_TRUE;
1067: if (N == 1 && 2 * s >= n) removedups = PETSC_TRUE;
1068: DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL);
1069: DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL);
1070: PetscObjectGetComm((PetscObject)da, &comm);
1072: PetscMalloc2(nc, &rows, col * col * nc * nc, &cols);
1073: DMGetLocalToGlobalMapping(da, <og);
1075: MatSetBlockSize(J, nc);
1076: /* determine the matrix preallocation information */
1077: MatPreallocateBegin(comm, nc * nx * ny, nc * nx * ny, dnz, onz);
1078: for (i = xs; i < xs + nx; i++) {
1079: pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1080: pend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1082: for (j = ys; j < ys + ny; j++) {
1083: slot = i - gxs + gnx * (j - gys);
1085: lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1086: lend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1088: cnt = 0;
1089: for (k = 0; k < nc; k++) {
1090: for (l = lstart; l < lend + 1; l++) {
1091: for (p = pstart; p < pend + 1; p++) {
1092: if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star have either l = 0 or p = 0 */
1093: cols[cnt++] = k + nc * (slot + gnx * l + p);
1094: }
1095: }
1096: }
1097: rows[k] = k + nc * (slot);
1098: }
1099: if (removedups) MatPreallocateSetLocalRemoveDups(ltog, nc, rows, ltog, cnt, cols, dnz, onz);
1100: else MatPreallocateSetLocal(ltog, nc, rows, ltog, cnt, cols, dnz, onz);
1101: }
1102: }
1103: MatSetBlockSize(J, nc);
1104: MatSeqAIJSetPreallocation(J, 0, dnz);
1105: MatMPIAIJSetPreallocation(J, 0, dnz, 0, onz);
1106: MatPreallocateEnd(dnz, onz);
1107: MatGetLocalToGlobalMapping(J, &mltog, NULL);
1108: if (!mltog) MatSetLocalToGlobalMapping(J, ltog, ltog);
1110: /*
1111: For each node in the grid: we get the neighbors in the local (on processor ordering
1112: that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1113: PETSc ordering.
1114: */
1115: if (!da->prealloc_only) {
1116: for (i = xs; i < xs + nx; i++) {
1117: pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1118: pend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1120: for (j = ys; j < ys + ny; j++) {
1121: slot = i - gxs + gnx * (j - gys);
1123: lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1124: lend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1126: cnt = 0;
1127: for (l = lstart; l < lend + 1; l++) {
1128: for (p = pstart; p < pend + 1; p++) {
1129: if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star have either l = 0 or p = 0 */
1130: cols[cnt++] = nc * (slot + gnx * l + p);
1131: for (k = 1; k < nc; k++) {
1132: cols[cnt] = 1 + cols[cnt - 1];
1133: cnt++;
1134: }
1135: }
1136: }
1137: }
1138: for (k = 0; k < nc; k++) rows[k] = k + nc * (slot);
1139: MatSetValuesLocal(J, nc, rows, cnt, cols, NULL, INSERT_VALUES);
1140: }
1141: }
1142: /* do not copy values to GPU since they are all zero and not yet needed there */
1143: MatBoundToCPU(J, &alreadyboundtocpu);
1144: MatBindToCPU(J, PETSC_TRUE);
1145: MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY);
1146: MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY);
1147: if (!alreadyboundtocpu) MatBindToCPU(J, PETSC_FALSE);
1148: MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE);
1149: if (bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE) MatSetOption(J, MAT_SORTED_FULL, PETSC_FALSE);
1150: }
1151: PetscFree2(rows, cols);
1152: return 0;
1153: }
1155: PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ_Fill(DM da, Mat J)
1156: {
1157: PetscInt xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny;
1158: PetscInt m, n, dim, s, *cols, k, nc, row, col, cnt, maxcnt = 0, l, p, M, N;
1159: PetscInt lstart, lend, pstart, pend, *dnz, *onz;
1160: DM_DA *dd = (DM_DA *)da->data;
1161: PetscInt ifill_col, *ofill = dd->ofill, *dfill = dd->dfill;
1162: MPI_Comm comm;
1163: DMBoundaryType bx, by;
1164: ISLocalToGlobalMapping ltog;
1165: DMDAStencilType st;
1166: PetscBool removedups = PETSC_FALSE;
1168: /*
1169: nc - number of components per grid point
1170: col - number of colors needed in one direction for single component problem
1172: */
1173: DMDAGetInfo(da, &dim, &m, &n, NULL, &M, &N, NULL, &nc, &s, &bx, &by, NULL, &st);
1174: col = 2 * s + 1;
1175: /*
1176: With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times
1177: because of "wrapping" around the end of the domain hitting an entry already counted in the other direction.
1178: */
1179: if (M == 1 && 2 * s >= m) removedups = PETSC_TRUE;
1180: if (N == 1 && 2 * s >= n) removedups = PETSC_TRUE;
1181: DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL);
1182: DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL);
1183: PetscObjectGetComm((PetscObject)da, &comm);
1185: PetscMalloc1(col * col * nc, &cols);
1186: DMGetLocalToGlobalMapping(da, <og);
1188: MatSetBlockSize(J, nc);
1189: /* determine the matrix preallocation information */
1190: MatPreallocateBegin(comm, nc * nx * ny, nc * nx * ny, dnz, onz);
1191: for (i = xs; i < xs + nx; i++) {
1192: pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1193: pend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1195: for (j = ys; j < ys + ny; j++) {
1196: slot = i - gxs + gnx * (j - gys);
1198: lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1199: lend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1201: for (k = 0; k < nc; k++) {
1202: cnt = 0;
1203: for (l = lstart; l < lend + 1; l++) {
1204: for (p = pstart; p < pend + 1; p++) {
1205: if (l || p) {
1206: if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star */
1207: for (ifill_col = ofill[k]; ifill_col < ofill[k + 1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc * (slot + gnx * l + p);
1208: }
1209: } else {
1210: if (dfill) {
1211: for (ifill_col = dfill[k]; ifill_col < dfill[k + 1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc * (slot + gnx * l + p);
1212: } else {
1213: for (ifill_col = 0; ifill_col < nc; ifill_col++) cols[cnt++] = ifill_col + nc * (slot + gnx * l + p);
1214: }
1215: }
1216: }
1217: }
1218: row = k + nc * (slot);
1219: maxcnt = PetscMax(maxcnt, cnt);
1220: if (removedups) MatPreallocateSetLocalRemoveDups(ltog, 1, &row, ltog, cnt, cols, dnz, onz);
1221: else MatPreallocateSetLocal(ltog, 1, &row, ltog, cnt, cols, dnz, onz);
1222: }
1223: }
1224: }
1225: MatSeqAIJSetPreallocation(J, 0, dnz);
1226: MatMPIAIJSetPreallocation(J, 0, dnz, 0, onz);
1227: MatPreallocateEnd(dnz, onz);
1228: MatSetLocalToGlobalMapping(J, ltog, ltog);
1230: /*
1231: For each node in the grid: we get the neighbors in the local (on processor ordering
1232: that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1233: PETSc ordering.
1234: */
1235: if (!da->prealloc_only) {
1236: for (i = xs; i < xs + nx; i++) {
1237: pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1238: pend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1240: for (j = ys; j < ys + ny; j++) {
1241: slot = i - gxs + gnx * (j - gys);
1243: lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1244: lend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1246: for (k = 0; k < nc; k++) {
1247: cnt = 0;
1248: for (l = lstart; l < lend + 1; l++) {
1249: for (p = pstart; p < pend + 1; p++) {
1250: if (l || p) {
1251: if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star */
1252: for (ifill_col = ofill[k]; ifill_col < ofill[k + 1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc * (slot + gnx * l + p);
1253: }
1254: } else {
1255: if (dfill) {
1256: for (ifill_col = dfill[k]; ifill_col < dfill[k + 1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc * (slot + gnx * l + p);
1257: } else {
1258: for (ifill_col = 0; ifill_col < nc; ifill_col++) cols[cnt++] = ifill_col + nc * (slot + gnx * l + p);
1259: }
1260: }
1261: }
1262: }
1263: row = k + nc * (slot);
1264: MatSetValuesLocal(J, 1, &row, cnt, cols, NULL, INSERT_VALUES);
1265: }
1266: }
1267: }
1268: /* do not copy values to GPU since they are all zero and not yet needed there */
1269: MatBindToCPU(J, PETSC_TRUE);
1270: MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY);
1271: MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY);
1272: MatBindToCPU(J, PETSC_FALSE);
1273: MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE);
1274: }
1275: PetscFree(cols);
1276: return 0;
1277: }
1279: /* ---------------------------------------------------------------------------------*/
1281: PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ(DM da, Mat J)
1282: {
1283: PetscInt xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny;
1284: PetscInt m, n, dim, s, *cols = NULL, k, nc, *rows = NULL, col, cnt, l, p, *dnz = NULL, *onz = NULL;
1285: PetscInt istart, iend, jstart, jend, kstart, kend, zs, nz, gzs, gnz, ii, jj, kk, M, N, P;
1286: MPI_Comm comm;
1287: DMBoundaryType bx, by, bz;
1288: ISLocalToGlobalMapping ltog, mltog;
1289: DMDAStencilType st;
1290: PetscBool removedups = PETSC_FALSE;
1292: /*
1293: nc - number of components per grid point
1294: col - number of colors needed in one direction for single component problem
1296: */
1297: DMDAGetInfo(da, &dim, &m, &n, &p, &M, &N, &P, &nc, &s, &bx, &by, &bz, &st);
1298: if (bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE && bz == DM_BOUNDARY_NONE) MatSetOption(J, MAT_SORTED_FULL, PETSC_TRUE);
1299: col = 2 * s + 1;
1301: /*
1302: With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times
1303: because of "wrapping" around the end of the domain hitting an entry already counted in the other direction.
1304: */
1305: if (M == 1 && 2 * s >= m) removedups = PETSC_TRUE;
1306: if (N == 1 && 2 * s >= n) removedups = PETSC_TRUE;
1307: if (P == 1 && 2 * s >= p) removedups = PETSC_TRUE;
1309: DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz);
1310: DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz);
1311: PetscObjectGetComm((PetscObject)da, &comm);
1313: PetscMalloc2(nc, &rows, col * col * col * nc * nc, &cols);
1314: DMGetLocalToGlobalMapping(da, <og);
1316: MatSetBlockSize(J, nc);
1317: /* determine the matrix preallocation information */
1318: MatPreallocateBegin(comm, nc * nx * ny * nz, nc * nx * ny * nz, dnz, onz);
1319: for (i = xs; i < xs + nx; i++) {
1320: istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1321: iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1322: for (j = ys; j < ys + ny; j++) {
1323: jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1324: jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1325: for (k = zs; k < zs + nz; k++) {
1326: kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
1327: kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));
1329: slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);
1331: cnt = 0;
1332: for (l = 0; l < nc; l++) {
1333: for (ii = istart; ii < iend + 1; ii++) {
1334: for (jj = jstart; jj < jend + 1; jj++) {
1335: for (kk = kstart; kk < kend + 1; kk++) {
1336: if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/
1337: cols[cnt++] = l + nc * (slot + ii + gnx * jj + gnx * gny * kk);
1338: }
1339: }
1340: }
1341: }
1342: rows[l] = l + nc * (slot);
1343: }
1344: if (removedups) MatPreallocateSetLocalRemoveDups(ltog, nc, rows, ltog, cnt, cols, dnz, onz);
1345: else MatPreallocateSetLocal(ltog, nc, rows, ltog, cnt, cols, dnz, onz);
1346: }
1347: }
1348: }
1349: MatSetBlockSize(J, nc);
1350: MatSeqAIJSetPreallocation(J, 0, dnz);
1351: MatMPIAIJSetPreallocation(J, 0, dnz, 0, onz);
1352: MatPreallocateEnd(dnz, onz);
1353: MatGetLocalToGlobalMapping(J, &mltog, NULL);
1354: if (!mltog) MatSetLocalToGlobalMapping(J, ltog, ltog);
1356: /*
1357: For each node in the grid: we get the neighbors in the local (on processor ordering
1358: that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1359: PETSc ordering.
1360: */
1361: if (!da->prealloc_only) {
1362: for (i = xs; i < xs + nx; i++) {
1363: istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1364: iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1365: for (j = ys; j < ys + ny; j++) {
1366: jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1367: jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1368: for (k = zs; k < zs + nz; k++) {
1369: kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
1370: kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));
1372: slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);
1374: cnt = 0;
1375: for (kk = kstart; kk < kend + 1; kk++) {
1376: for (jj = jstart; jj < jend + 1; jj++) {
1377: for (ii = istart; ii < iend + 1; ii++) {
1378: if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/
1379: cols[cnt++] = nc * (slot + ii + gnx * jj + gnx * gny * kk);
1380: for (l = 1; l < nc; l++) {
1381: cols[cnt] = 1 + cols[cnt - 1];
1382: cnt++;
1383: }
1384: }
1385: }
1386: }
1387: }
1388: rows[0] = nc * (slot);
1389: for (l = 1; l < nc; l++) rows[l] = 1 + rows[l - 1];
1390: MatSetValuesLocal(J, nc, rows, cnt, cols, NULL, INSERT_VALUES);
1391: }
1392: }
1393: }
1394: /* do not copy values to GPU since they are all zero and not yet needed there */
1395: MatBindToCPU(J, PETSC_TRUE);
1396: MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY);
1397: MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY);
1398: if (bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE && bz == DM_BOUNDARY_NONE) MatSetOption(J, MAT_SORTED_FULL, PETSC_FALSE);
1399: MatBindToCPU(J, PETSC_FALSE);
1400: MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE);
1401: }
1402: PetscFree2(rows, cols);
1403: return 0;
1404: }
1406: /* ---------------------------------------------------------------------------------*/
1408: PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ_Fill(DM da, Mat J)
1409: {
1410: DM_DA *dd = (DM_DA *)da->data;
1411: PetscInt xs, nx, i, j, gxs, gnx, row, k, l;
1412: PetscInt m, dim, s, *cols = NULL, nc, cnt, maxcnt = 0, *ocols;
1413: PetscInt *ofill = dd->ofill, *dfill = dd->dfill;
1414: DMBoundaryType bx;
1415: ISLocalToGlobalMapping ltog;
1416: PetscMPIInt rank, size;
1418: MPI_Comm_rank(PetscObjectComm((PetscObject)da), &rank);
1419: MPI_Comm_size(PetscObjectComm((PetscObject)da), &size);
1421: /*
1422: nc - number of components per grid point
1424: */
1425: DMDAGetInfo(da, &dim, &m, NULL, NULL, NULL, NULL, NULL, &nc, &s, &bx, NULL, NULL, NULL);
1427: DMDAGetCorners(da, &xs, NULL, NULL, &nx, NULL, NULL);
1428: DMDAGetGhostCorners(da, &gxs, NULL, NULL, &gnx, NULL, NULL);
1430: MatSetBlockSize(J, nc);
1431: PetscCalloc2(nx * nc, &cols, nx * nc, &ocols);
1433: /*
1434: note should be smaller for first and last process with no periodic
1435: does not handle dfill
1436: */
1437: cnt = 0;
1438: /* coupling with process to the left */
1439: for (i = 0; i < s; i++) {
1440: for (j = 0; j < nc; j++) {
1441: ocols[cnt] = ((rank == 0) ? 0 : (s - i) * (ofill[j + 1] - ofill[j]));
1442: cols[cnt] = dfill[j + 1] - dfill[j] + (s + i) * (ofill[j + 1] - ofill[j]);
1443: if (rank == 0 && (dd->bx == DM_BOUNDARY_PERIODIC)) {
1444: if (size > 1) ocols[cnt] += (s - i) * (ofill[j + 1] - ofill[j]);
1445: else cols[cnt] += (s - i) * (ofill[j + 1] - ofill[j]);
1446: }
1447: maxcnt = PetscMax(maxcnt, ocols[cnt] + cols[cnt]);
1448: cnt++;
1449: }
1450: }
1451: for (i = s; i < nx - s; i++) {
1452: for (j = 0; j < nc; j++) {
1453: cols[cnt] = dfill[j + 1] - dfill[j] + 2 * s * (ofill[j + 1] - ofill[j]);
1454: maxcnt = PetscMax(maxcnt, ocols[cnt] + cols[cnt]);
1455: cnt++;
1456: }
1457: }
1458: /* coupling with process to the right */
1459: for (i = nx - s; i < nx; i++) {
1460: for (j = 0; j < nc; j++) {
1461: ocols[cnt] = ((rank == (size - 1)) ? 0 : (i - nx + s + 1) * (ofill[j + 1] - ofill[j]));
1462: cols[cnt] = dfill[j + 1] - dfill[j] + (s + nx - i - 1) * (ofill[j + 1] - ofill[j]);
1463: if ((rank == size - 1) && (dd->bx == DM_BOUNDARY_PERIODIC)) {
1464: if (size > 1) ocols[cnt] += (i - nx + s + 1) * (ofill[j + 1] - ofill[j]);
1465: else cols[cnt] += (i - nx + s + 1) * (ofill[j + 1] - ofill[j]);
1466: }
1467: maxcnt = PetscMax(maxcnt, ocols[cnt] + cols[cnt]);
1468: cnt++;
1469: }
1470: }
1472: MatSeqAIJSetPreallocation(J, 0, cols);
1473: MatMPIAIJSetPreallocation(J, 0, cols, 0, ocols);
1474: PetscFree2(cols, ocols);
1476: DMGetLocalToGlobalMapping(da, <og);
1477: MatSetLocalToGlobalMapping(J, ltog, ltog);
1479: /*
1480: For each node in the grid: we get the neighbors in the local (on processor ordering
1481: that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1482: PETSc ordering.
1483: */
1484: if (!da->prealloc_only) {
1485: PetscMalloc1(maxcnt, &cols);
1486: row = xs * nc;
1487: /* coupling with process to the left */
1488: for (i = xs; i < xs + s; i++) {
1489: for (j = 0; j < nc; j++) {
1490: cnt = 0;
1491: if (rank) {
1492: for (l = 0; l < s; l++) {
1493: for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i - s + l) * nc + ofill[k];
1494: }
1495: }
1496: if (rank == 0 && (dd->bx == DM_BOUNDARY_PERIODIC)) {
1497: for (l = 0; l < s; l++) {
1498: for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (m + i - s - l) * nc + ofill[k];
1499: }
1500: }
1501: if (dfill) {
1502: for (k = dfill[j]; k < dfill[j + 1]; k++) cols[cnt++] = i * nc + dfill[k];
1503: } else {
1504: for (k = 0; k < nc; k++) cols[cnt++] = i * nc + k;
1505: }
1506: for (l = 0; l < s; l++) {
1507: for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i + s - l) * nc + ofill[k];
1508: }
1509: MatSetValues(J, 1, &row, cnt, cols, NULL, INSERT_VALUES);
1510: row++;
1511: }
1512: }
1513: for (i = xs + s; i < xs + nx - s; i++) {
1514: for (j = 0; j < nc; j++) {
1515: cnt = 0;
1516: for (l = 0; l < s; l++) {
1517: for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i - s + l) * nc + ofill[k];
1518: }
1519: if (dfill) {
1520: for (k = dfill[j]; k < dfill[j + 1]; k++) cols[cnt++] = i * nc + dfill[k];
1521: } else {
1522: for (k = 0; k < nc; k++) cols[cnt++] = i * nc + k;
1523: }
1524: for (l = 0; l < s; l++) {
1525: for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i + s - l) * nc + ofill[k];
1526: }
1527: MatSetValues(J, 1, &row, cnt, cols, NULL, INSERT_VALUES);
1528: row++;
1529: }
1530: }
1531: /* coupling with process to the right */
1532: for (i = xs + nx - s; i < xs + nx; i++) {
1533: for (j = 0; j < nc; j++) {
1534: cnt = 0;
1535: for (l = 0; l < s; l++) {
1536: for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i - s + l) * nc + ofill[k];
1537: }
1538: if (dfill) {
1539: for (k = dfill[j]; k < dfill[j + 1]; k++) cols[cnt++] = i * nc + dfill[k];
1540: } else {
1541: for (k = 0; k < nc; k++) cols[cnt++] = i * nc + k;
1542: }
1543: if (rank < size - 1) {
1544: for (l = 0; l < s; l++) {
1545: for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i + s - l) * nc + ofill[k];
1546: }
1547: }
1548: if ((rank == size - 1) && (dd->bx == DM_BOUNDARY_PERIODIC)) {
1549: for (l = 0; l < s; l++) {
1550: for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i - s - l - m + 2) * nc + ofill[k];
1551: }
1552: }
1553: MatSetValues(J, 1, &row, cnt, cols, NULL, INSERT_VALUES);
1554: row++;
1555: }
1556: }
1557: PetscFree(cols);
1558: /* do not copy values to GPU since they are all zero and not yet needed there */
1559: MatBindToCPU(J, PETSC_TRUE);
1560: MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY);
1561: MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY);
1562: MatBindToCPU(J, PETSC_FALSE);
1563: MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE);
1564: }
1565: return 0;
1566: }
1568: /* ---------------------------------------------------------------------------------*/
1570: PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ(DM da, Mat J)
1571: {
1572: PetscInt xs, nx, i, i1, slot, gxs, gnx;
1573: PetscInt m, dim, s, *cols = NULL, nc, *rows = NULL, col, cnt, l;
1574: PetscInt istart, iend;
1575: DMBoundaryType bx;
1576: ISLocalToGlobalMapping ltog, mltog;
1578: /*
1579: nc - number of components per grid point
1580: col - number of colors needed in one direction for single component problem
1582: */
1583: DMDAGetInfo(da, &dim, &m, NULL, NULL, NULL, NULL, NULL, &nc, &s, &bx, NULL, NULL, NULL);
1584: if (bx == DM_BOUNDARY_NONE) MatSetOption(J, MAT_SORTED_FULL, PETSC_TRUE);
1585: col = 2 * s + 1;
1587: DMDAGetCorners(da, &xs, NULL, NULL, &nx, NULL, NULL);
1588: DMDAGetGhostCorners(da, &gxs, NULL, NULL, &gnx, NULL, NULL);
1590: MatSetBlockSize(J, nc);
1591: MatSeqAIJSetPreallocation(J, col * nc, NULL);
1592: MatMPIAIJSetPreallocation(J, col * nc, NULL, col * nc, NULL);
1594: DMGetLocalToGlobalMapping(da, <og);
1595: MatGetLocalToGlobalMapping(J, &mltog, NULL);
1596: if (!mltog) MatSetLocalToGlobalMapping(J, ltog, ltog);
1598: /*
1599: For each node in the grid: we get the neighbors in the local (on processor ordering
1600: that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1601: PETSc ordering.
1602: */
1603: if (!da->prealloc_only) {
1604: PetscMalloc2(nc, &rows, col * nc * nc, &cols);
1605: for (i = xs; i < xs + nx; i++) {
1606: istart = PetscMax(-s, gxs - i);
1607: iend = PetscMin(s, gxs + gnx - i - 1);
1608: slot = i - gxs;
1610: cnt = 0;
1611: for (i1 = istart; i1 < iend + 1; i1++) {
1612: cols[cnt++] = nc * (slot + i1);
1613: for (l = 1; l < nc; l++) {
1614: cols[cnt] = 1 + cols[cnt - 1];
1615: cnt++;
1616: }
1617: }
1618: rows[0] = nc * (slot);
1619: for (l = 1; l < nc; l++) rows[l] = 1 + rows[l - 1];
1620: MatSetValuesLocal(J, nc, rows, cnt, cols, NULL, INSERT_VALUES);
1621: }
1622: /* do not copy values to GPU since they are all zero and not yet needed there */
1623: MatBindToCPU(J, PETSC_TRUE);
1624: MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY);
1625: MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY);
1626: if (bx == DM_BOUNDARY_NONE) MatSetOption(J, MAT_SORTED_FULL, PETSC_FALSE);
1627: MatBindToCPU(J, PETSC_FALSE);
1628: MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE);
1629: PetscFree2(rows, cols);
1630: }
1631: return 0;
1632: }
1634: /* ---------------------------------------------------------------------------------*/
1636: PetscErrorCode DMCreateMatrix_DA_1d_SeqAIJ_NoPreallocation(DM da, Mat J)
1637: {
1638: PetscInt xs, nx, i, i1, slot, gxs, gnx;
1639: PetscInt m, dim, s, *cols = NULL, nc, *rows = NULL, col, cnt, l;
1640: PetscInt istart, iend;
1641: DMBoundaryType bx;
1642: ISLocalToGlobalMapping ltog, mltog;
1644: /*
1645: nc - number of components per grid point
1646: col - number of colors needed in one direction for single component problem
1647: */
1648: DMDAGetInfo(da, &dim, &m, NULL, NULL, NULL, NULL, NULL, &nc, &s, &bx, NULL, NULL, NULL);
1649: col = 2 * s + 1;
1651: DMDAGetCorners(da, &xs, NULL, NULL, &nx, NULL, NULL);
1652: DMDAGetGhostCorners(da, &gxs, NULL, NULL, &gnx, NULL, NULL);
1654: MatSetBlockSize(J, nc);
1655: MatSeqAIJSetTotalPreallocation(J, nx * nc * col * nc);
1657: DMGetLocalToGlobalMapping(da, <og);
1658: MatGetLocalToGlobalMapping(J, &mltog, NULL);
1659: if (!mltog) MatSetLocalToGlobalMapping(J, ltog, ltog);
1661: /*
1662: For each node in the grid: we get the neighbors in the local (on processor ordering
1663: that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1664: PETSc ordering.
1665: */
1666: if (!da->prealloc_only) {
1667: PetscMalloc2(nc, &rows, col * nc * nc, &cols);
1668: for (i = xs; i < xs + nx; i++) {
1669: istart = PetscMax(-s, gxs - i);
1670: iend = PetscMin(s, gxs + gnx - i - 1);
1671: slot = i - gxs;
1673: cnt = 0;
1674: for (i1 = istart; i1 < iend + 1; i1++) {
1675: cols[cnt++] = nc * (slot + i1);
1676: for (l = 1; l < nc; l++) {
1677: cols[cnt] = 1 + cols[cnt - 1];
1678: cnt++;
1679: }
1680: }
1681: rows[0] = nc * (slot);
1682: for (l = 1; l < nc; l++) rows[l] = 1 + rows[l - 1];
1683: MatSetValuesLocal(J, nc, rows, cnt, cols, NULL, INSERT_VALUES);
1684: }
1685: /* do not copy values to GPU since they are all zero and not yet needed there */
1686: MatBindToCPU(J, PETSC_TRUE);
1687: MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY);
1688: MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY);
1689: if (bx == DM_BOUNDARY_NONE) MatSetOption(J, MAT_SORTED_FULL, PETSC_FALSE);
1690: MatBindToCPU(J, PETSC_FALSE);
1691: MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE);
1692: PetscFree2(rows, cols);
1693: }
1694: MatSetOption(J, MAT_SORTED_FULL, PETSC_FALSE);
1695: return 0;
1696: }
1698: PetscErrorCode DMCreateMatrix_DA_2d_MPIBAIJ(DM da, Mat J)
1699: {
1700: PetscInt xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny;
1701: PetscInt m, n, dim, s, *cols, nc, col, cnt, *dnz, *onz;
1702: PetscInt istart, iend, jstart, jend, ii, jj;
1703: MPI_Comm comm;
1704: PetscScalar *values;
1705: DMBoundaryType bx, by;
1706: DMDAStencilType st;
1707: ISLocalToGlobalMapping ltog;
1709: /*
1710: nc - number of components per grid point
1711: col - number of colors needed in one direction for single component problem
1712: */
1713: DMDAGetInfo(da, &dim, &m, &n, NULL, NULL, NULL, NULL, &nc, &s, &bx, &by, NULL, &st);
1714: col = 2 * s + 1;
1716: DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL);
1717: DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL);
1718: PetscObjectGetComm((PetscObject)da, &comm);
1720: PetscMalloc1(col * col * nc * nc, &cols);
1722: DMGetLocalToGlobalMapping(da, <og);
1724: /* determine the matrix preallocation information */
1725: MatPreallocateBegin(comm, nx * ny, nx * ny, dnz, onz);
1726: for (i = xs; i < xs + nx; i++) {
1727: istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1728: iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1729: for (j = ys; j < ys + ny; j++) {
1730: jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1731: jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1732: slot = i - gxs + gnx * (j - gys);
1734: /* Find block columns in block row */
1735: cnt = 0;
1736: for (ii = istart; ii < iend + 1; ii++) {
1737: for (jj = jstart; jj < jend + 1; jj++) {
1738: if (st == DMDA_STENCIL_BOX || !ii || !jj) { /* BOX or on the STAR */
1739: cols[cnt++] = slot + ii + gnx * jj;
1740: }
1741: }
1742: }
1743: MatPreallocateSetLocalBlock(ltog, 1, &slot, ltog, cnt, cols, dnz, onz);
1744: }
1745: }
1746: MatSeqBAIJSetPreallocation(J, nc, 0, dnz);
1747: MatMPIBAIJSetPreallocation(J, nc, 0, dnz, 0, onz);
1748: MatPreallocateEnd(dnz, onz);
1750: MatSetLocalToGlobalMapping(J, ltog, ltog);
1752: /*
1753: For each node in the grid: we get the neighbors in the local (on processor ordering
1754: that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1755: PETSc ordering.
1756: */
1757: if (!da->prealloc_only) {
1758: PetscCalloc1(col * col * nc * nc, &values);
1759: for (i = xs; i < xs + nx; i++) {
1760: istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1761: iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1762: for (j = ys; j < ys + ny; j++) {
1763: jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1764: jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1765: slot = i - gxs + gnx * (j - gys);
1766: cnt = 0;
1767: for (ii = istart; ii < iend + 1; ii++) {
1768: for (jj = jstart; jj < jend + 1; jj++) {
1769: if (st == DMDA_STENCIL_BOX || !ii || !jj) { /* BOX or on the STAR */
1770: cols[cnt++] = slot + ii + gnx * jj;
1771: }
1772: }
1773: }
1774: MatSetValuesBlockedLocal(J, 1, &slot, cnt, cols, values, INSERT_VALUES);
1775: }
1776: }
1777: PetscFree(values);
1778: /* do not copy values to GPU since they are all zero and not yet needed there */
1779: MatBindToCPU(J, PETSC_TRUE);
1780: MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY);
1781: MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY);
1782: MatBindToCPU(J, PETSC_FALSE);
1783: MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE);
1784: }
1785: PetscFree(cols);
1786: return 0;
1787: }
1789: PetscErrorCode DMCreateMatrix_DA_3d_MPIBAIJ(DM da, Mat J)
1790: {
1791: PetscInt xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny;
1792: PetscInt m, n, dim, s, *cols, k, nc, col, cnt, p, *dnz, *onz;
1793: PetscInt istart, iend, jstart, jend, kstart, kend, zs, nz, gzs, gnz, ii, jj, kk;
1794: MPI_Comm comm;
1795: PetscScalar *values;
1796: DMBoundaryType bx, by, bz;
1797: DMDAStencilType st;
1798: ISLocalToGlobalMapping ltog;
1800: /*
1801: nc - number of components per grid point
1802: col - number of colors needed in one direction for single component problem
1804: */
1805: DMDAGetInfo(da, &dim, &m, &n, &p, NULL, NULL, NULL, &nc, &s, &bx, &by, &bz, &st);
1806: col = 2 * s + 1;
1808: DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz);
1809: DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz);
1810: PetscObjectGetComm((PetscObject)da, &comm);
1812: PetscMalloc1(col * col * col, &cols);
1814: DMGetLocalToGlobalMapping(da, <og);
1816: /* determine the matrix preallocation information */
1817: MatPreallocateBegin(comm, nx * ny * nz, nx * ny * nz, dnz, onz);
1818: for (i = xs; i < xs + nx; i++) {
1819: istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1820: iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1821: for (j = ys; j < ys + ny; j++) {
1822: jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1823: jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1824: for (k = zs; k < zs + nz; k++) {
1825: kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
1826: kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));
1828: slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);
1830: /* Find block columns in block row */
1831: cnt = 0;
1832: for (ii = istart; ii < iend + 1; ii++) {
1833: for (jj = jstart; jj < jend + 1; jj++) {
1834: for (kk = kstart; kk < kend + 1; kk++) {
1835: if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/
1836: cols[cnt++] = slot + ii + gnx * jj + gnx * gny * kk;
1837: }
1838: }
1839: }
1840: }
1841: MatPreallocateSetLocalBlock(ltog, 1, &slot, ltog, cnt, cols, dnz, onz);
1842: }
1843: }
1844: }
1845: MatSeqBAIJSetPreallocation(J, nc, 0, dnz);
1846: MatMPIBAIJSetPreallocation(J, nc, 0, dnz, 0, onz);
1847: MatPreallocateEnd(dnz, onz);
1849: MatSetLocalToGlobalMapping(J, ltog, ltog);
1851: /*
1852: For each node in the grid: we get the neighbors in the local (on processor ordering
1853: that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1854: PETSc ordering.
1855: */
1856: if (!da->prealloc_only) {
1857: PetscCalloc1(col * col * col * nc * nc, &values);
1858: for (i = xs; i < xs + nx; i++) {
1859: istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1860: iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1861: for (j = ys; j < ys + ny; j++) {
1862: jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1863: jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1864: for (k = zs; k < zs + nz; k++) {
1865: kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
1866: kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));
1868: slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);
1870: cnt = 0;
1871: for (ii = istart; ii < iend + 1; ii++) {
1872: for (jj = jstart; jj < jend + 1; jj++) {
1873: for (kk = kstart; kk < kend + 1; kk++) {
1874: if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/
1875: cols[cnt++] = slot + ii + gnx * jj + gnx * gny * kk;
1876: }
1877: }
1878: }
1879: }
1880: MatSetValuesBlockedLocal(J, 1, &slot, cnt, cols, values, INSERT_VALUES);
1881: }
1882: }
1883: }
1884: PetscFree(values);
1885: /* do not copy values to GPU since they are all zero and not yet needed there */
1886: MatBindToCPU(J, PETSC_TRUE);
1887: MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY);
1888: MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY);
1889: MatBindToCPU(J, PETSC_FALSE);
1890: MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE);
1891: }
1892: PetscFree(cols);
1893: return 0;
1894: }
1896: /*
1897: This helper is for of SBAIJ preallocation, to discard the lower-triangular values which are difficult to
1898: identify in the local ordering with periodic domain.
1899: */
1900: static PetscErrorCode L2GFilterUpperTriangular(ISLocalToGlobalMapping ltog, PetscInt *row, PetscInt *cnt, PetscInt col[])
1901: {
1902: PetscInt i, n;
1904: ISLocalToGlobalMappingApplyBlock(ltog, 1, row, row);
1905: ISLocalToGlobalMappingApplyBlock(ltog, *cnt, col, col);
1906: for (i = 0, n = 0; i < *cnt; i++) {
1907: if (col[i] >= *row) col[n++] = col[i];
1908: }
1909: *cnt = n;
1910: return 0;
1911: }
1913: PetscErrorCode DMCreateMatrix_DA_2d_MPISBAIJ(DM da, Mat J)
1914: {
1915: PetscInt xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny;
1916: PetscInt m, n, dim, s, *cols, nc, col, cnt, *dnz, *onz;
1917: PetscInt istart, iend, jstart, jend, ii, jj;
1918: MPI_Comm comm;
1919: PetscScalar *values;
1920: DMBoundaryType bx, by;
1921: DMDAStencilType st;
1922: ISLocalToGlobalMapping ltog;
1924: /*
1925: nc - number of components per grid point
1926: col - number of colors needed in one direction for single component problem
1927: */
1928: DMDAGetInfo(da, &dim, &m, &n, NULL, NULL, NULL, NULL, &nc, &s, &bx, &by, NULL, &st);
1929: col = 2 * s + 1;
1931: DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL);
1932: DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL);
1933: PetscObjectGetComm((PetscObject)da, &comm);
1935: PetscMalloc1(col * col * nc * nc, &cols);
1937: DMGetLocalToGlobalMapping(da, <og);
1939: /* determine the matrix preallocation information */
1940: MatPreallocateBegin(comm, nx * ny, nx * ny, dnz, onz);
1941: for (i = xs; i < xs + nx; i++) {
1942: istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1943: iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1944: for (j = ys; j < ys + ny; j++) {
1945: jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1946: jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1947: slot = i - gxs + gnx * (j - gys);
1949: /* Find block columns in block row */
1950: cnt = 0;
1951: for (ii = istart; ii < iend + 1; ii++) {
1952: for (jj = jstart; jj < jend + 1; jj++) {
1953: if (st == DMDA_STENCIL_BOX || !ii || !jj) cols[cnt++] = slot + ii + gnx * jj;
1954: }
1955: }
1956: L2GFilterUpperTriangular(ltog, &slot, &cnt, cols);
1957: MatPreallocateSymmetricSetBlock(slot, cnt, cols, dnz, onz);
1958: }
1959: }
1960: MatSeqSBAIJSetPreallocation(J, nc, 0, dnz);
1961: MatMPISBAIJSetPreallocation(J, nc, 0, dnz, 0, onz);
1962: MatPreallocateEnd(dnz, onz);
1964: MatSetLocalToGlobalMapping(J, ltog, ltog);
1966: /*
1967: For each node in the grid: we get the neighbors in the local (on processor ordering
1968: that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1969: PETSc ordering.
1970: */
1971: if (!da->prealloc_only) {
1972: PetscCalloc1(col * col * nc * nc, &values);
1973: for (i = xs; i < xs + nx; i++) {
1974: istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1975: iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1976: for (j = ys; j < ys + ny; j++) {
1977: jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1978: jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1979: slot = i - gxs + gnx * (j - gys);
1981: /* Find block columns in block row */
1982: cnt = 0;
1983: for (ii = istart; ii < iend + 1; ii++) {
1984: for (jj = jstart; jj < jend + 1; jj++) {
1985: if (st == DMDA_STENCIL_BOX || !ii || !jj) cols[cnt++] = slot + ii + gnx * jj;
1986: }
1987: }
1988: L2GFilterUpperTriangular(ltog, &slot, &cnt, cols);
1989: MatSetValuesBlocked(J, 1, &slot, cnt, cols, values, INSERT_VALUES);
1990: }
1991: }
1992: PetscFree(values);
1993: /* do not copy values to GPU since they are all zero and not yet needed there */
1994: MatBindToCPU(J, PETSC_TRUE);
1995: MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY);
1996: MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY);
1997: MatBindToCPU(J, PETSC_FALSE);
1998: MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE);
1999: }
2000: PetscFree(cols);
2001: return 0;
2002: }
2004: PetscErrorCode DMCreateMatrix_DA_3d_MPISBAIJ(DM da, Mat J)
2005: {
2006: PetscInt xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny;
2007: PetscInt m, n, dim, s, *cols, k, nc, col, cnt, p, *dnz, *onz;
2008: PetscInt istart, iend, jstart, jend, kstart, kend, zs, nz, gzs, gnz, ii, jj, kk;
2009: MPI_Comm comm;
2010: PetscScalar *values;
2011: DMBoundaryType bx, by, bz;
2012: DMDAStencilType st;
2013: ISLocalToGlobalMapping ltog;
2015: /*
2016: nc - number of components per grid point
2017: col - number of colors needed in one direction for single component problem
2018: */
2019: DMDAGetInfo(da, &dim, &m, &n, &p, NULL, NULL, NULL, &nc, &s, &bx, &by, &bz, &st);
2020: col = 2 * s + 1;
2022: DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz);
2023: DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz);
2024: PetscObjectGetComm((PetscObject)da, &comm);
2026: /* create the matrix */
2027: PetscMalloc1(col * col * col, &cols);
2029: DMGetLocalToGlobalMapping(da, <og);
2031: /* determine the matrix preallocation information */
2032: MatPreallocateBegin(comm, nx * ny * nz, nx * ny * nz, dnz, onz);
2033: for (i = xs; i < xs + nx; i++) {
2034: istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
2035: iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
2036: for (j = ys; j < ys + ny; j++) {
2037: jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
2038: jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
2039: for (k = zs; k < zs + nz; k++) {
2040: kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
2041: kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));
2043: slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);
2045: /* Find block columns in block row */
2046: cnt = 0;
2047: for (ii = istart; ii < iend + 1; ii++) {
2048: for (jj = jstart; jj < jend + 1; jj++) {
2049: for (kk = kstart; kk < kend + 1; kk++) {
2050: if ((st == DMDA_STENCIL_BOX) || (!ii && !jj) || (!jj && !kk) || (!ii && !kk)) cols[cnt++] = slot + ii + gnx * jj + gnx * gny * kk;
2051: }
2052: }
2053: }
2054: L2GFilterUpperTriangular(ltog, &slot, &cnt, cols);
2055: MatPreallocateSymmetricSetBlock(slot, cnt, cols, dnz, onz);
2056: }
2057: }
2058: }
2059: MatSeqSBAIJSetPreallocation(J, nc, 0, dnz);
2060: MatMPISBAIJSetPreallocation(J, nc, 0, dnz, 0, onz);
2061: MatPreallocateEnd(dnz, onz);
2063: MatSetLocalToGlobalMapping(J, ltog, ltog);
2065: /*
2066: For each node in the grid: we get the neighbors in the local (on processor ordering
2067: that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
2068: PETSc ordering.
2069: */
2070: if (!da->prealloc_only) {
2071: PetscCalloc1(col * col * col * nc * nc, &values);
2072: for (i = xs; i < xs + nx; i++) {
2073: istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
2074: iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
2075: for (j = ys; j < ys + ny; j++) {
2076: jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
2077: jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
2078: for (k = zs; k < zs + nz; k++) {
2079: kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
2080: kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));
2082: slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);
2084: cnt = 0;
2085: for (ii = istart; ii < iend + 1; ii++) {
2086: for (jj = jstart; jj < jend + 1; jj++) {
2087: for (kk = kstart; kk < kend + 1; kk++) {
2088: if ((st == DMDA_STENCIL_BOX) || (!ii && !jj) || (!jj && !kk) || (!ii && !kk)) cols[cnt++] = slot + ii + gnx * jj + gnx * gny * kk;
2089: }
2090: }
2091: }
2092: L2GFilterUpperTriangular(ltog, &slot, &cnt, cols);
2093: MatSetValuesBlocked(J, 1, &slot, cnt, cols, values, INSERT_VALUES);
2094: }
2095: }
2096: }
2097: PetscFree(values);
2098: /* do not copy values to GPU since they are all zero and not yet needed there */
2099: MatBindToCPU(J, PETSC_TRUE);
2100: MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY);
2101: MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY);
2102: MatBindToCPU(J, PETSC_FALSE);
2103: MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE);
2104: }
2105: PetscFree(cols);
2106: return 0;
2107: }
2109: /* ---------------------------------------------------------------------------------*/
2111: PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ_Fill(DM da, Mat J)
2112: {
2113: PetscInt xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny;
2114: PetscInt m, n, dim, s, *cols, k, nc, row, col, cnt, maxcnt = 0, l, p, *dnz, *onz;
2115: PetscInt istart, iend, jstart, jend, kstart, kend, zs, nz, gzs, gnz, ii, jj, kk, M, N, P;
2116: DM_DA *dd = (DM_DA *)da->data;
2117: PetscInt ifill_col, *dfill = dd->dfill, *ofill = dd->ofill;
2118: MPI_Comm comm;
2119: PetscScalar *values;
2120: DMBoundaryType bx, by, bz;
2121: ISLocalToGlobalMapping ltog;
2122: DMDAStencilType st;
2123: PetscBool removedups = PETSC_FALSE;
2125: /*
2126: nc - number of components per grid point
2127: col - number of colors needed in one direction for single component problem
2129: */
2130: DMDAGetInfo(da, &dim, &m, &n, &p, &M, &N, &P, &nc, &s, &bx, &by, &bz, &st);
2131: col = 2 * s + 1;
2133: by 2*stencil_width + 1\n");
2135: by 2*stencil_width + 1\n");
2137: by 2*stencil_width + 1\n");
2139: /*
2140: With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times
2141: because of "wrapping" around the end of the domain hitting an entry already counted in the other direction.
2142: */
2143: if (M == 1 && 2 * s >= m) removedups = PETSC_TRUE;
2144: if (N == 1 && 2 * s >= n) removedups = PETSC_TRUE;
2145: if (P == 1 && 2 * s >= p) removedups = PETSC_TRUE;
2147: DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz);
2148: DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz);
2149: PetscObjectGetComm((PetscObject)da, &comm);
2151: PetscMalloc1(col * col * col * nc, &cols);
2152: DMGetLocalToGlobalMapping(da, <og);
2154: /* determine the matrix preallocation information */
2155: MatPreallocateBegin(comm, nc * nx * ny * nz, nc * nx * ny * nz, dnz, onz);
2157: MatSetBlockSize(J, nc);
2158: for (i = xs; i < xs + nx; i++) {
2159: istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
2160: iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
2161: for (j = ys; j < ys + ny; j++) {
2162: jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
2163: jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
2164: for (k = zs; k < zs + nz; k++) {
2165: kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
2166: kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));
2168: slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);
2170: for (l = 0; l < nc; l++) {
2171: cnt = 0;
2172: for (ii = istart; ii < iend + 1; ii++) {
2173: for (jj = jstart; jj < jend + 1; jj++) {
2174: for (kk = kstart; kk < kend + 1; kk++) {
2175: if (ii || jj || kk) {
2176: if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/
2177: for (ifill_col = ofill[l]; ifill_col < ofill[l + 1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc * (slot + ii + gnx * jj + gnx * gny * kk);
2178: }
2179: } else {
2180: if (dfill) {
2181: for (ifill_col = dfill[l]; ifill_col < dfill[l + 1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc * (slot + ii + gnx * jj + gnx * gny * kk);
2182: } else {
2183: for (ifill_col = 0; ifill_col < nc; ifill_col++) cols[cnt++] = ifill_col + nc * (slot + ii + gnx * jj + gnx * gny * kk);
2184: }
2185: }
2186: }
2187: }
2188: }
2189: row = l + nc * (slot);
2190: maxcnt = PetscMax(maxcnt, cnt);
2191: if (removedups) MatPreallocateSetLocalRemoveDups(ltog, 1, &row, ltog, cnt, cols, dnz, onz);
2192: else MatPreallocateSetLocal(ltog, 1, &row, ltog, cnt, cols, dnz, onz);
2193: }
2194: }
2195: }
2196: }
2197: MatSeqAIJSetPreallocation(J, 0, dnz);
2198: MatMPIAIJSetPreallocation(J, 0, dnz, 0, onz);
2199: MatPreallocateEnd(dnz, onz);
2200: MatSetLocalToGlobalMapping(J, ltog, ltog);
2202: /*
2203: For each node in the grid: we get the neighbors in the local (on processor ordering
2204: that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
2205: PETSc ordering.
2206: */
2207: if (!da->prealloc_only) {
2208: PetscCalloc1(maxcnt, &values);
2209: for (i = xs; i < xs + nx; i++) {
2210: istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
2211: iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
2212: for (j = ys; j < ys + ny; j++) {
2213: jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
2214: jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
2215: for (k = zs; k < zs + nz; k++) {
2216: kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
2217: kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));
2219: slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);
2221: for (l = 0; l < nc; l++) {
2222: cnt = 0;
2223: for (ii = istart; ii < iend + 1; ii++) {
2224: for (jj = jstart; jj < jend + 1; jj++) {
2225: for (kk = kstart; kk < kend + 1; kk++) {
2226: if (ii || jj || kk) {
2227: if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/
2228: for (ifill_col = ofill[l]; ifill_col < ofill[l + 1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc * (slot + ii + gnx * jj + gnx * gny * kk);
2229: }
2230: } else {
2231: if (dfill) {
2232: for (ifill_col = dfill[l]; ifill_col < dfill[l + 1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc * (slot + ii + gnx * jj + gnx * gny * kk);
2233: } else {
2234: for (ifill_col = 0; ifill_col < nc; ifill_col++) cols[cnt++] = ifill_col + nc * (slot + ii + gnx * jj + gnx * gny * kk);
2235: }
2236: }
2237: }
2238: }
2239: }
2240: row = l + nc * (slot);
2241: MatSetValuesLocal(J, 1, &row, cnt, cols, values, INSERT_VALUES);
2242: }
2243: }
2244: }
2245: }
2246: PetscFree(values);
2247: /* do not copy values to GPU since they are all zero and not yet needed there */
2248: MatBindToCPU(J, PETSC_TRUE);
2249: MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY);
2250: MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY);
2251: MatBindToCPU(J, PETSC_FALSE);
2252: MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE);
2253: }
2254: PetscFree(cols);
2255: return 0;
2256: }