Actual source code: ex96.c


  2: static char help[] ="Tests sequential and parallel DMCreateMatrix(), MatMatMult() and MatPtAP()\n\
  3:   -Mx <xg>, where <xg> = number of coarse grid points in the x-direction\n\
  4:   -My <yg>, where <yg> = number of coarse grid points in the y-direction\n\
  5:   -Mz <zg>, where <zg> = number of coarse grid points in the z-direction\n\
  6:   -Npx <npx>, where <npx> = number of processors in the x-direction\n\
  7:   -Npy <npy>, where <npy> = number of processors in the y-direction\n\
  8:   -Npz <npz>, where <npz> = number of processors in the z-direction\n\n";

 10: /*
 11:     This test is modified from ~src/ksp/tests/ex19.c.
 12:     Example of usage: mpiexec -n 3 ./ex96 -Mx 10 -My 10 -Mz 10
 13: */

 15: #include <petscdm.h>
 16: #include <petscdmda.h>

 18: /* User-defined application contexts */
 19: typedef struct {
 20:   PetscInt mx,my,mz;            /* number grid points in x, y and z direction */
 21:   Vec      localX,localF;       /* local vectors with ghost region */
 22:   DM       da;
 23:   Vec      x,b,r;               /* global vectors */
 24:   Mat      J;                   /* Jacobian on grid */
 25: } GridCtx;
 26: typedef struct {
 27:   GridCtx  fine;
 28:   GridCtx  coarse;
 29:   PetscInt ratio;
 30:   Mat      Ii;                  /* interpolation from coarse to fine */
 31: } AppCtx;

 33: #define COARSE_LEVEL 0
 34: #define FINE_LEVEL   1

 36: /*
 37:       Mm_ratio - ration of grid lines between fine and coarse grids.
 38: */
 39: int main(int argc,char **argv)
 40: {
 41:   AppCtx         user;
 42:   PetscInt       Npx=PETSC_DECIDE,Npy=PETSC_DECIDE,Npz=PETSC_DECIDE;
 43:   PetscMPIInt    size,rank;
 44:   PetscInt       m,n,M,N,i,nrows;
 45:   PetscScalar    one = 1.0;
 46:   PetscReal      fill=2.0;
 47:   Mat            A,A_tmp,P,C,C1,C2;
 48:   PetscScalar    *array,none = -1.0,alpha;
 49:   Vec            x,v1,v2,v3,v4;
 50:   PetscReal      norm,norm_tmp,norm_tmp1,tol=100.*PETSC_MACHINE_EPSILON;
 51:   PetscRandom    rdm;
 52:   PetscBool      Test_MatMatMult=PETSC_TRUE,Test_MatPtAP=PETSC_TRUE,Test_3D=PETSC_TRUE,flg;
 53:   const PetscInt *ia,*ja;

 55:   PetscInitialize(&argc,&argv,NULL,help);
 56:   PetscOptionsGetReal(NULL,NULL,"-tol",&tol,NULL);

 58:   user.ratio     = 2;
 59:   user.coarse.mx = 20; user.coarse.my = 20; user.coarse.mz = 20;

 61:   PetscOptionsGetInt(NULL,NULL,"-Mx",&user.coarse.mx,NULL);
 62:   PetscOptionsGetInt(NULL,NULL,"-My",&user.coarse.my,NULL);
 63:   PetscOptionsGetInt(NULL,NULL,"-Mz",&user.coarse.mz,NULL);
 64:   PetscOptionsGetInt(NULL,NULL,"-ratio",&user.ratio,NULL);

 66:   if (user.coarse.mz) Test_3D = PETSC_TRUE;

 68:   MPI_Comm_size(PETSC_COMM_WORLD,&size);
 69:   MPI_Comm_rank(PETSC_COMM_WORLD,&rank);
 70:   PetscOptionsGetInt(NULL,NULL,"-Npx",&Npx,NULL);
 71:   PetscOptionsGetInt(NULL,NULL,"-Npy",&Npy,NULL);
 72:   PetscOptionsGetInt(NULL,NULL,"-Npz",&Npz,NULL);

 74:   /* Set up distributed array for fine grid */
 75:   if (!Test_3D) {
 76:     DMDACreate2d(PETSC_COMM_WORLD, DM_BOUNDARY_NONE, DM_BOUNDARY_NONE,DMDA_STENCIL_STAR,user.coarse.mx,user.coarse.my,Npx,Npy,1,1,NULL,NULL,&user.coarse.da);
 77:   } else {
 78:     DMDACreate3d(PETSC_COMM_WORLD,DM_BOUNDARY_NONE,DM_BOUNDARY_NONE,DM_BOUNDARY_NONE,DMDA_STENCIL_STAR,user.coarse.mx,user.coarse.my,user.coarse.mz,Npx,Npy,Npz,1,1,NULL,NULL,NULL,&user.coarse.da);
 79:   }
 80:   DMSetFromOptions(user.coarse.da);
 81:   DMSetUp(user.coarse.da);

 83:   /* This makes sure the coarse DMDA has the same partition as the fine DMDA */
 84:   DMRefine(user.coarse.da,PetscObjectComm((PetscObject)user.coarse.da),&user.fine.da);

 86:   /* Test DMCreateMatrix()                                         */
 87:   /*------------------------------------------------------------*/
 88:   DMSetMatType(user.fine.da,MATAIJ);
 89:   DMCreateMatrix(user.fine.da,&A);
 90:   DMSetMatType(user.fine.da,MATBAIJ);
 91:   DMCreateMatrix(user.fine.da,&C);

 93:   MatConvert(C,MATAIJ,MAT_INITIAL_MATRIX,&A_tmp); /* not work for mpisbaij matrix! */
 94:   MatEqual(A,A_tmp,&flg);
 96:   MatDestroy(&C);
 97:   MatDestroy(&A_tmp);

 99:   /*------------------------------------------------------------*/

101:   MatGetLocalSize(A,&m,&n);
102:   MatGetSize(A,&M,&N);
103:   /* if (rank == 0) printf("A %d, %d\n",M,N); */

105:   /* set val=one to A */
106:   if (size == 1) {
107:     MatGetRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&nrows,&ia,&ja,&flg);
108:     if (flg) {
109:       MatSeqAIJGetArray(A,&array);
110:       for (i=0; i<ia[nrows]; i++) array[i] = one;
111:       MatSeqAIJRestoreArray(A,&array);
112:     }
113:     MatRestoreRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&nrows,&ia,&ja,&flg);
114:   } else {
115:     Mat AA,AB;
116:     MatMPIAIJGetSeqAIJ(A,&AA,&AB,NULL);
117:     MatGetRowIJ(AA,0,PETSC_FALSE,PETSC_FALSE,&nrows,&ia,&ja,&flg);
118:     if (flg) {
119:       MatSeqAIJGetArray(AA,&array);
120:       for (i=0; i<ia[nrows]; i++) array[i] = one;
121:       MatSeqAIJRestoreArray(AA,&array);
122:     }
123:     MatRestoreRowIJ(AA,0,PETSC_FALSE,PETSC_FALSE,&nrows,&ia,&ja,&flg);
124:     MatGetRowIJ(AB,0,PETSC_FALSE,PETSC_FALSE,&nrows,&ia,&ja,&flg);
125:     if (flg) {
126:       MatSeqAIJGetArray(AB,&array);
127:       for (i=0; i<ia[nrows]; i++) array[i] = one;
128:       MatSeqAIJRestoreArray(AB,&array);
129:     }
130:     MatRestoreRowIJ(AB,0,PETSC_FALSE,PETSC_FALSE,&nrows,&ia,&ja,&flg);
131:   }
132:   /* MatView(A, PETSC_VIEWER_STDOUT_WORLD); */

134:   /* Create interpolation between the fine and coarse grids */
135:   DMCreateInterpolation(user.coarse.da,user.fine.da,&P,NULL);
136:   MatGetLocalSize(P,&m,&n);
137:   MatGetSize(P,&M,&N);
138:   /* if (rank == 0) printf("P %d, %d\n",M,N); */

140:   /* Create vectors v1 and v2 that are compatible with A */
141:   VecCreate(PETSC_COMM_WORLD,&v1);
142:   MatGetLocalSize(A,&m,NULL);
143:   VecSetSizes(v1,m,PETSC_DECIDE);
144:   VecSetFromOptions(v1);
145:   VecDuplicate(v1,&v2);
146:   PetscRandomCreate(PETSC_COMM_WORLD,&rdm);
147:   PetscRandomSetFromOptions(rdm);

149:   /* Test MatMatMult(): C = A*P */
150:   /*----------------------------*/
151:   if (Test_MatMatMult) {
152:     MatDuplicate(A,MAT_COPY_VALUES,&A_tmp);
153:     MatMatMult(A_tmp,P,MAT_INITIAL_MATRIX,fill,&C);

155:     /* Test MAT_REUSE_MATRIX - reuse symbolic C */
156:     alpha=1.0;
157:     for (i=0; i<2; i++) {
158:       alpha -= 0.1;
159:       MatScale(A_tmp,alpha);
160:       MatMatMult(A_tmp,P,MAT_REUSE_MATRIX,fill,&C);
161:     }
162:     /* Free intermediate data structures created for reuse of C=Pt*A*P */
163:     MatProductClear(C);

165:     /* Test MatDuplicate()        */
166:     /*----------------------------*/
167:     MatDuplicate(C,MAT_COPY_VALUES,&C1);
168:     MatDuplicate(C1,MAT_COPY_VALUES,&C2);
169:     MatDestroy(&C1);
170:     MatDestroy(&C2);

172:     /* Create vector x that is compatible with P */
173:     VecCreate(PETSC_COMM_WORLD,&x);
174:     MatGetLocalSize(P,NULL,&n);
175:     VecSetSizes(x,n,PETSC_DECIDE);
176:     VecSetFromOptions(x);

178:     norm = 0.0;
179:     for (i=0; i<10; i++) {
180:       VecSetRandom(x,rdm);
181:       MatMult(P,x,v1);
182:       MatMult(A_tmp,v1,v2); /* v2 = A*P*x */
183:       MatMult(C,x,v1);  /* v1 = C*x   */
184:       VecAXPY(v1,none,v2);
185:       VecNorm(v1,NORM_1,&norm_tmp);
186:       VecNorm(v2,NORM_1,&norm_tmp1);
187:       norm_tmp /= norm_tmp1;
188:       if (norm_tmp > norm) norm = norm_tmp;
189:     }
190:     if (norm >= tol && rank == 0) {
191:       PetscPrintf(PETSC_COMM_SELF,"Error: MatMatMult(), |v1 - v2|/|v2|: %g\n",(double)norm);
192:     }

194:     VecDestroy(&x);
195:     MatDestroy(&C);
196:     MatDestroy(&A_tmp);
197:   }

199:   /* Test P^T * A * P - MatPtAP() */
200:   /*------------------------------*/
201:   if (Test_MatPtAP) {
202:     MatPtAP(A,P,MAT_INITIAL_MATRIX,fill,&C);
203:     MatGetLocalSize(C,&m,&n);

205:     /* Test MAT_REUSE_MATRIX - reuse symbolic C */
206:     alpha=1.0;
207:     for (i=0; i<1; i++) {
208:       alpha -= 0.1;
209:       MatScale(A,alpha);
210:       MatPtAP(A,P,MAT_REUSE_MATRIX,fill,&C);
211:     }

213:     /* Free intermediate data structures created for reuse of C=Pt*A*P */
214:     MatProductClear(C);

216:     /* Test MatDuplicate()        */
217:     /*----------------------------*/
218:     MatDuplicate(C,MAT_COPY_VALUES,&C1);
219:     MatDuplicate(C1,MAT_COPY_VALUES,&C2);
220:     MatDestroy(&C1);
221:     MatDestroy(&C2);

223:     /* Create vector x that is compatible with P */
224:     VecCreate(PETSC_COMM_WORLD,&x);
225:     MatGetLocalSize(P,&m,&n);
226:     VecSetSizes(x,n,PETSC_DECIDE);
227:     VecSetFromOptions(x);

229:     VecCreate(PETSC_COMM_WORLD,&v3);
230:     VecSetSizes(v3,n,PETSC_DECIDE);
231:     VecSetFromOptions(v3);
232:     VecDuplicate(v3,&v4);

234:     norm = 0.0;
235:     for (i=0; i<10; i++) {
236:       VecSetRandom(x,rdm);
237:       MatMult(P,x,v1);
238:       MatMult(A,v1,v2);  /* v2 = A*P*x */

240:       MatMultTranspose(P,v2,v3); /* v3 = Pt*A*P*x */
241:       MatMult(C,x,v4);           /* v3 = C*x   */
242:       VecAXPY(v4,none,v3);
243:       VecNorm(v4,NORM_1,&norm_tmp);
244:       VecNorm(v3,NORM_1,&norm_tmp1);

246:       norm_tmp /= norm_tmp1;
247:       if (norm_tmp > norm) norm = norm_tmp;
248:     }
249:     if (norm >= tol && rank == 0) {
250:       PetscPrintf(PETSC_COMM_SELF,"Error: MatPtAP(), |v3 - v4|/|v3|: %g\n",(double)norm);
251:     }
252:     MatDestroy(&C);
253:     VecDestroy(&v3);
254:     VecDestroy(&v4);
255:     VecDestroy(&x);
256:   }

258:   /* Clean up */
259:   MatDestroy(&A);
260:   PetscRandomDestroy(&rdm);
261:   VecDestroy(&v1);
262:   VecDestroy(&v2);
263:   DMDestroy(&user.fine.da);
264:   DMDestroy(&user.coarse.da);
265:   MatDestroy(&P);
266:   PetscFinalize();
267:   return 0;
268: }

270: /*TEST

272:    test:
273:       args: -Mx 10 -My 5 -Mz 10
274:       output_file: output/ex96_1.out

276:    test:
277:       suffix: nonscalable
278:       nsize: 3
279:       args: -Mx 10 -My 5 -Mz 10
280:       output_file: output/ex96_1.out

282:    test:
283:       suffix: scalable
284:       nsize: 3
285:       args: -Mx 10 -My 5 -Mz 10 -matmatmult_via scalable -matptap_via scalable
286:       output_file: output/ex96_1.out

288:    test:
289:      suffix: seq_scalable
290:      nsize: 3
291:      args: -Mx 10 -My 5 -Mz 10 -matmatmult_via scalable -matptap_via scalable -inner_diag_mat_product_algorithm scalable -inner_offdiag_mat_product_algorithm scalable
292:      output_file: output/ex96_1.out

294:    test:
295:      suffix: seq_sorted
296:      nsize: 3
297:      args: -Mx 10 -My 5 -Mz 10 -matmatmult_via scalable -matptap_via scalable -inner_diag_mat_product_algorithm sorted -inner_offdiag_mat_product_algorithm sorted
298:      output_file: output/ex96_1.out

300:    test:
301:      suffix: seq_scalable_fast
302:      nsize: 3
303:      args: -Mx 10 -My 5 -Mz 10 -matmatmult_via scalable -matptap_via scalable -inner_diag_mat_product_algorithm scalable_fast -inner_offdiag_mat_product_algorithm scalable_fast
304:      output_file: output/ex96_1.out

306:    test:
307:      suffix: seq_heap
308:      nsize: 3
309:      args: -Mx 10 -My 5 -Mz 10 -matmatmult_via scalable -matptap_via scalable -inner_diag_mat_product_algorithm heap -inner_offdiag_mat_product_algorithm heap
310:      output_file: output/ex96_1.out

312:    test:
313:      suffix: seq_btheap
314:      nsize: 3
315:      args: -Mx 10 -My 5 -Mz 10 -matmatmult_via scalable -matptap_via scalable -inner_diag_mat_product_algorithm btheap -inner_offdiag_mat_product_algorithm btheap
316:      output_file: output/ex96_1.out

318:    test:
319:      suffix: seq_llcondensed
320:      nsize: 3
321:      args: -Mx 10 -My 5 -Mz 10 -matmatmult_via scalable -matptap_via scalable -inner_diag_mat_product_algorithm llcondensed -inner_offdiag_mat_product_algorithm llcondensed
322:      output_file: output/ex96_1.out

324:    test:
325:      suffix: seq_rowmerge
326:      nsize: 3
327:      args: -Mx 10 -My 5 -Mz 10 -matmatmult_via scalable -matptap_via scalable -inner_diag_mat_product_algorithm rowmerge -inner_offdiag_mat_product_algorithm rowmerge
328:      output_file: output/ex96_1.out

330:    test:
331:      suffix: allatonce
332:      nsize: 3
333:      args: -Mx 10 -My 5 -Mz 10 -matmatmult_via scalable -matptap_via allatonce
334:      output_file: output/ex96_1.out

336:    test:
337:      suffix: allatonce_merged
338:      nsize: 3
339:      args: -Mx 10 -My 5 -Mz 10 -matmatmult_via scalable -matptap_via allatonce_merged
340:      output_file: output/ex96_1.out

342: TEST*/