Actual source code: ex33.c
1: static char help[] = "Test memory scalability of MatMatMult() for AIJ and DENSE matrices. \n\
2: Modified from the code contributed by Ian Lin <iancclin@umich.edu> \n\n";
4: /*
5: Example:
6: mpiexec -n <np> ./ex33 -mem_view -matmatmult_Bbn <Bbn>
7: */
9: #include <petsc.h>
11: PetscErrorCode Print_memory(PetscLogDouble mem)
12: {
13: double max_mem,min_mem;
16: MPI_Reduce(&mem, &max_mem, 1, MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD);
17: MPI_Reduce(&mem, &min_mem, 1, MPI_DOUBLE, MPI_MIN, 0, MPI_COMM_WORLD);
18: max_mem = max_mem / 1024.0 / 1024.0;
19: min_mem = min_mem / 1024.0 / 1024.0;
20: PetscPrintf(MPI_COMM_WORLD, " max and min memory across all processors %.4f Mb, %.4f Mb.\n", (double)max_mem,(double)min_mem);
21: return 0;
22: }
24: /*
25: Illustrate how to use MPI derived data types.
26: It would save memory significantly. See MatMPIDenseScatter()
27: */
28: PetscErrorCode TestMPIDerivedDataType()
29: {
30: MPI_Datatype type1, type2,rtype1,rtype2;
31: PetscInt i,j;
32: PetscScalar buffer[24]; /* An array of 4 rows, 6 cols */
33: MPI_Status status;
34: PetscMPIInt rank,size,disp[2];
37: MPI_Comm_size(MPI_COMM_WORLD, &size);
39: MPI_Comm_rank(MPI_COMM_WORLD, &rank);
41: if (rank == 0) {
42: /* proc[0] sends 2 rows to proc[1] */
43: for (i=0; i<24; i++) buffer[i] = (PetscScalar)i;
45: disp[0] = 0; disp[1] = 2;
46: MPI_Type_create_indexed_block(2, 1, (const PetscMPIInt *)disp, MPIU_SCALAR, &type1);
47: /* one column has 4 entries */
48: MPI_Type_create_resized(type1,0,4*sizeof(PetscScalar),&type2);
49: MPI_Type_commit(&type2);
50: MPI_Send(buffer, 6, type2, 1, 123, MPI_COMM_WORLD);
52: } else if (rank == 1) {
53: /* proc[1] receives 2 rows from proc[0], and put them into contiguous rows, starting at the row 1 (disp[0]) */
54: PetscInt blen = 2;
55: for (i=0; i<24; i++) buffer[i] = 0.0;
57: disp[0] = 1;
58: MPI_Type_create_indexed_block(1, blen, (const PetscMPIInt *)disp, MPIU_SCALAR, &rtype1);
59: MPI_Type_create_resized(rtype1, 0, 4*sizeof(PetscScalar), &rtype2);
61: MPI_Type_commit(&rtype2);
62: MPI_Recv(buffer, 6, rtype2, 0, 123, MPI_COMM_WORLD, &status);
63: for (i=0; i<4; i++) {
64: for (j=0; j<6; j++) {
65: PetscPrintf(MPI_COMM_SELF," %g", (double)PetscRealPart(buffer[i+j*4]));
66: }
67: PetscPrintf(MPI_COMM_SELF,"\n");
68: }
69: }
71: if (rank == 0) {
72: MPI_Type_free(&type1);
73: MPI_Type_free(&type2);
74: } else if (rank == 1) {
75: MPI_Type_free(&rtype1);
76: MPI_Type_free(&rtype2);
77: }
78: MPI_Barrier(MPI_COMM_WORLD);
79: return 0;
80: }
82: int main(int argc, char **args)
83: {
84: PetscInt mA = 2700,nX = 80,nz = 40;
85: /* PetscInt mA=6,nX=5,nz=2; //small test */
86: PetscLogDouble mem;
87: Mat A,X,Y;
88: PetscBool flg = PETSC_FALSE;
90: PetscInitialize(&argc,&args,(char*)0,help);
91: PetscOptionsGetBool(NULL,NULL,"-test_mpiderivedtype",&flg,NULL);
92: if (flg) {
93: TestMPIDerivedDataType();
94: PetscFinalize();
95: return 0;
96: }
98: PetscOptionsGetBool(NULL,NULL,"-mem_view",&flg,NULL);
99: PetscMemoryGetCurrentUsage(&mem);
100: if (flg) {
101: PetscPrintf(MPI_COMM_WORLD, "Before start,");
102: Print_memory(mem);
103: }
105: MatCreateAIJ(PETSC_COMM_WORLD,PETSC_DECIDE,PETSC_DECIDE,mA,mA,nz,PETSC_NULL,nz,PETSC_NULL,&A);
106: MatSetRandom(A,PETSC_NULL);
107: PetscMemoryGetCurrentUsage(&mem);
108: if (flg) {
109: PetscPrintf(MPI_COMM_WORLD, "After creating A,");
110: Print_memory(mem);
111: }
113: MatCreateDense(PETSC_COMM_WORLD,PETSC_DECIDE,PETSC_DECIDE,mA,nX,PETSC_NULL,&X);
114: MatSetRandom(X,PETSC_NULL);
115: PetscMemoryGetCurrentUsage(&mem);
116: if (flg) {
117: PetscPrintf(MPI_COMM_WORLD, "After creating X,");
118: Print_memory(mem);
119: }
121: MatMatMult(A,X,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&Y);
122: PetscMemoryGetCurrentUsage(&mem);
123: if (flg) {
124: PetscPrintf(MPI_COMM_WORLD, "After MatMatMult,");
125: Print_memory(mem);
126: }
128: /* Test reuse */
129: MatMatMult(A,X,MAT_REUSE_MATRIX,PETSC_DEFAULT,&Y);
130: PetscMemoryGetCurrentUsage(&mem);
131: if (flg) {
132: PetscPrintf(MPI_COMM_WORLD, "After reuse MatMatMult,");
133: Print_memory(mem);
134: }
136: /* Check accuracy */
137: MatMatMultEqual(A,X,Y,10,&flg);
140: MatDestroy(&A);
141: MatDestroy(&X);
142: MatDestroy(&Y);
144: PetscFinalize();
145: return 0;
146: }
148: /*TEST
150: test:
151: suffix: 1
152: nsize: 4
153: output_file: output/ex33.out
155: test:
156: suffix: 2
157: nsize: 8
158: output_file: output/ex33.out
160: test:
161: suffix: 3
162: nsize: 2
163: args: -test_mpiderivedtype
164: output_file: output/ex33_3.out
166: TEST*/