Actual source code: flow.c


  2: static char help[] = "FUN3D - 3-D, Unstructured Incompressible Euler Solver.\n\
  3: originally written by W. K. Anderson of NASA Langley, \n\
  4: and ported into PETSc by D. K. Kaushik, ODU and ICASE.\n\n";

  6: #include <petscsnes.h>
  7: #include <petsctime.h>
  8: #include <petscao.h>
  9: #include "user.h"
 10: #if defined(_OPENMP)
 11: #include "omp.h"
 12: #if !defined(HAVE_REDUNDANT_WORK)
 13: #include "metis.h"
 14: #endif
 15: #endif

 17: #define ICALLOC(size,y) PetscMalloc1(PetscMax(size,1),y);
 18: #define FCALLOC(size,y) PetscMalloc1(PetscMax(size,1),y);

 20: typedef struct {
 21:   Vec    qnew,qold,func;
 22:   double fnorm_ini,dt_ini,cfl_ini;
 23:   double ptime;
 24:   double cfl_max,max_time;
 25:   double fnorm,dt,cfl;
 26:   double fnorm_ratio;
 27:   int    ires,iramp,itstep;
 28:   int    max_steps,print_freq;
 29:   int    LocalTimeStepping;
 30: } TstepCtx;

 32: typedef struct {                               /*============================*/
 33:   GRID      *grid;                                 /* Pointer to Grid info       */
 34:   TstepCtx  *tsCtx;                                /* Pointer to Time Stepping Context */
 35:   PetscBool PreLoading;
 36: } AppCtx;                                      /*============================*/

 38: extern int  FormJacobian(SNES,Vec,Mat,Mat,void*),
 39:             FormFunction(SNES,Vec,Vec,void*),
 40:             FormInitialGuess(SNES,GRID*),
 41:             Update(SNES,void*),
 42:             ComputeTimeStep(SNES,int,void*),
 43:             GetLocalOrdering(GRID*),
 44:             SetPetscDS(GRID *,TstepCtx*);
 45: static PetscErrorCode WritePVTU(AppCtx*,const char*,PetscBool);
 46: #if defined(_OPENMP) && defined(HAVE_EDGE_COLORING)
 47: int EdgeColoring(int nnodes,int nedge,int *e2n,int *eperm,int *ncolor,int *ncount);
 48: #endif
 49: /* Global Variables */

 51:                                                /*============================*/
 52: CINFO  *c_info;                                /* Pointer to COMMON INFO     */
 53: CRUNGE *c_runge;                               /* Pointer to COMMON RUNGE    */
 54: CGMCOM *c_gmcom;                               /* Pointer to COMMON GMCOM    */
 55:                                                /*============================*/
 56: int  rank,size,rstart;
 57: REAL memSize = 0.0,grad_time = 0.0;
 58: #if defined(_OPENMP)
 59: int max_threads = 2,tot_threads,my_thread_id;
 60: #endif

 62: #if defined(PARCH_IRIX64) && defined(USE_HW_COUNTERS)
 63: int       event0,event1;
 64: Scalar    time_counters;
 65: long long counter0,counter1;
 66: #endif
 67: int  ntran[max_nbtran];        /* transition stuff put here to make global */
 68: REAL dxtran[max_nbtran];

 70: /* ======================== MAIN ROUTINE =================================== */
 71: /*                                                                           */
 72: /* Finite volume flux split solver for general polygons                      */
 73: /*                                                                           */
 74: /*===========================================================================*/

 76: int main(int argc,char **args)
 77: {
 78:   AppCtx      user;
 79:   GRID        f_pntr;
 80:   TstepCtx    tsCtx;
 81:   SNES        snes;                    /* nonlinear solver context */
 82:   Mat         Jpc;                     /* Jacobian and Preconditioner matrices */
 83:   PetscScalar *qnode;
 84:   int         ierr;
 85:   PetscBool   flg,write_pvtu,pvtu_base64;
 86:   MPI_Comm    comm;
 87:   PetscInt    maxfails                       = 10000;
 88:   char        pvtu_fname[PETSC_MAX_PATH_LEN] = "incomp";

 90:   PetscInitialize(&argc,&args,NULL,help);
 91:   PetscInitializeFortran();
 92:   PetscOptionsInsertFile(PETSC_COMM_WORLD,"petsc.opt",PETSC_FALSE);

 94:   comm = PETSC_COMM_WORLD;
 95:   f77FORLINK();                               /* Link FORTRAN and C COMMONS */

 97:   MPI_Comm_rank(comm,&rank);
 98:   MPI_Comm_size(comm,&size);

100:   flg  = PETSC_FALSE;
101:   PetscOptionsGetBool(NULL,NULL,"-mem_use",&flg,NULL);
102:   if (flg) PetscMemorySetGetMaximumUsage();

104:   /*======================================================================*/
105:   /* Initilize stuff related to time stepping */
106:   /*======================================================================*/
107:   tsCtx.fnorm_ini         = 0.0;  tsCtx.cfl_ini     = 50.0;    tsCtx.cfl_max = 1.0e+05;
108:   tsCtx.max_steps         = 50;   tsCtx.max_time    = 1.0e+12; tsCtx.iramp   = -50;
109:   tsCtx.dt                = -5.0; tsCtx.fnorm_ratio = 1.0e+10;
110:   tsCtx.LocalTimeStepping = 1;
111:   PetscOptionsGetInt(NULL,NULL,"-max_st",&tsCtx.max_steps,NULL);
112:   PetscOptionsGetReal(NULL,"-ts_rtol",&tsCtx.fnorm_ratio,NULL);
113:   PetscOptionsGetReal(NULL,"-cfl_ini",&tsCtx.cfl_ini,NULL);
114:   PetscOptionsGetReal(NULL,"-cfl_max",&tsCtx.cfl_max,NULL);
115:   tsCtx.print_freq        = tsCtx.max_steps;
116:   PetscOptionsGetInt(NULL,NULL,"-print_freq",&tsCtx.print_freq,&flg);
117:   PetscOptionsGetString(NULL,NULL,"-pvtu",pvtu_fname,sizeof(pvtu_fname),&write_pvtu);
118:   pvtu_base64             = PETSC_FALSE;
119:   PetscOptionsGetBool(NULL,NULL,"-pvtu_base64",&pvtu_base64,NULL);

121:   c_info->alpha = 3.0;
122:   c_info->beta  = 15.0;
123:   c_info->ivisc = 0;

125:   c_gmcom->ilu0  = 1;
126:   c_gmcom->nsrch = 10;

128:   c_runge->nitfo = 0;

130:   PetscMemzero(&f_pntr,sizeof(f_pntr));
131:   f_pntr.jvisc  = c_info->ivisc;
132:   f_pntr.ileast = 4;
133:   PetscOptionsGetReal(NULL,"-alpha",&c_info->alpha,NULL);
134:   PetscOptionsGetReal(NULL,"-beta",&c_info->beta,NULL);

136:   /*======================================================================*/

138:   /*Set the maximum number of threads for OpenMP */
139: #if defined(_OPENMP)
140:   PetscOptionsGetInt(NULL,NULL,"-max_threads",&max_threads,&flg);
141:   omp_set_num_threads(max_threads);
142:   PetscPrintf(comm,"Using %d threads for each MPI process\n",max_threads);
143: #endif

145:   /* Get the grid information into local ordering */
146:   GetLocalOrdering(&f_pntr);

148:   /* Allocate Memory for Some Other Grid Arrays */
149:   set_up_grid(&f_pntr);

151:   /* If using least squares for the gradients,calculate the r's */
152:   if (f_pntr.ileast == 4) f77SUMGS(&f_pntr.nnodesLoc,&f_pntr.nedgeLoc,f_pntr.eptr,f_pntr.xyz,f_pntr.rxy,&rank,&f_pntr.nvertices);

154:   user.grid  = &f_pntr;
155:   user.tsCtx = &tsCtx;

157:   /* SAWs Stuff */

159:   /*
160:     Preload the executable to get accurate timings. This runs the following chunk of
161:     code twice, first to get the executable pages into memory and the second time for
162:     accurate timings.
163:   */
164:   PetscPreLoadBegin(PETSC_TRUE,"Time integration");
165:   user.PreLoading = PetscPreLoading;

167:   /* Create nonlinear solver */
168:   SetPetscDS(&f_pntr,&tsCtx);
169:   SNESCreate(comm,&snes);
170:   SNESSetType(snes,"newtonls");

172:   /* Set various routines and options */
173:   SNESSetFunction(snes,user.grid->res,FormFunction,&user);
174:   flg  = PETSC_FALSE;
175:   PetscOptionsGetBool(NULL,NULL,"-matrix_free",&flg,NULL);
176:   if (flg) {
177:     /* Use matrix-free to define Newton system; use explicit (approx) Jacobian for preconditioner */
178:     MatCreateSNESMF(snes,&Jpc);
179:     SNESSetJacobian(snes,Jpc,user.grid->A,FormJacobian,&user);
180:   } else {
181:     /* Use explicit (approx) Jacobian to define Newton system and preconditioner */
182:     SNESSetJacobian(snes,user.grid->A,user.grid->A,FormJacobian,&user);
183:   }

185:   SNESSetMaxLinearSolveFailures(snes,maxfails);
186:   SNESSetFromOptions(snes);

188:   /* Initialize the flowfield */
189:   FormInitialGuess(snes,user.grid);

191:   /* Solve nonlinear system */
192:   Update(snes,&user);

194:   /* Write restart file */
195:   VecGetArray(user.grid->qnode,&qnode);
196:   /*f77WREST(&user.grid->nnodes,qnode,user.grid->turbre,user.grid->amut);*/

198:   /* Write Tecplot solution file */
199: #if 0
200:   if (rank == 0)
201:     f77TECFLO(&user.grid->nnodes,
202:               &user.grid->nnbound,&user.grid->nvbound,&user.grid->nfbound,
203:               &user.grid->nnfacet,&user.grid->nvfacet,&user.grid->nffacet,
204:               &user.grid->nsnode, &user.grid->nvnode, &user.grid->nfnode,
205:               c_info->title,
206:               user.grid->x,       user.grid->y,       user.grid->z,
207:               qnode,
208:               user.grid->nnpts,   user.grid->nntet,   user.grid->nvpts,
209:               user.grid->nvtet,   user.grid->nfpts,   user.grid->nftet,
210:               user.grid->f2ntn,   user.grid->f2ntv,   user.grid->f2ntf,
211:               user.grid->isnode,  user.grid->ivnode,  user.grid->ifnode,
212:               &rank);
213: #endif
214:   if (write_pvtu) WritePVTU(&user,pvtu_fname,pvtu_base64);

216:   /* Write residual,lift,drag,and moment history file */
217:   /*
218:     if (rank == 0) f77PLLAN(&user.grid->nnodes,&rank);
219:   */

221:   VecRestoreArray(user.grid->qnode,&qnode);
222:   flg  = PETSC_FALSE;
223:   PetscOptionsGetBool(NULL,NULL,"-mem_use",&flg,NULL);
224:   if (flg) {
225:     PetscMemoryView(PETSC_VIEWER_STDOUT_WORLD,"Memory usage before destroying\n");
226:   }

228:   VecDestroy(&user.grid->qnode);
229:   VecDestroy(&user.grid->qnodeLoc);
230:   VecDestroy(&user.tsCtx->qold);
231:   VecDestroy(&user.tsCtx->func);
232:   VecDestroy(&user.grid->res);
233:   VecDestroy(&user.grid->grad);
234:   VecDestroy(&user.grid->gradLoc);
235:   MatDestroy(&user.grid->A);
236:   flg  = PETSC_FALSE;
237:   PetscOptionsGetBool(NULL,NULL,"-matrix_free",&flg,NULL);
238:   if (flg) MatDestroy(&Jpc);
239:   SNESDestroy(&snes);
240:   VecScatterDestroy(&user.grid->scatter);
241:   VecScatterDestroy(&user.grid->gradScatter);
242:   flg  = PETSC_FALSE;
243:   PetscOptionsGetBool(NULL,NULL,"-mem_use",&flg,NULL);
244:   if (flg) {
245:     PetscMemoryView(PETSC_VIEWER_STDOUT_WORLD,"Memory usage after destroying\n");
246:   }
247:   PetscPreLoadEnd();

249:   /* allocated in set_up_grid() */
250:   PetscFree(user.grid->isface);
251:   PetscFree(user.grid->ivface);
252:   PetscFree(user.grid->ifface);
253:   PetscFree(user.grid->us);
254:   PetscFree(user.grid->vs);
255:   PetscFree(user.grid->as);

257:   /* Allocated in GetLocalOrdering() */
258:   PetscFree(user.grid->eptr);
259:   PetscFree(user.grid->ia);
260:   PetscFree(user.grid->ja);
261:   PetscFree(user.grid->loc2glo);
262:   PetscFree(user.grid->loc2pet);
263:   PetscFree(user.grid->xyzn);
264: #if defined(_OPENMP)
265: #  if defined(HAVE_REDUNDANT_WORK)
266:   PetscFree(user.grid->resd);
267: #  else
268:   PetscFree(user.grid->part_thr);
269:   PetscFree(user.grid->nedge_thr);
270:   PetscFree(user.grid->edge_thr);
271:   PetscFree(user.grid->xyzn_thr);
272: #  endif
273: #endif
274:   PetscFree(user.grid->xyz);
275:   PetscFree(user.grid->area);

277:   PetscFree(user.grid->nntet);
278:   PetscFree(user.grid->nnpts);
279:   PetscFree(user.grid->f2ntn);
280:   PetscFree(user.grid->isnode);
281:   PetscFree(user.grid->sxn);
282:   PetscFree(user.grid->syn);
283:   PetscFree(user.grid->szn);
284:   PetscFree(user.grid->sa);
285:   PetscFree(user.grid->sface_bit);

287:   PetscFree(user.grid->nvtet);
288:   PetscFree(user.grid->nvpts);
289:   PetscFree(user.grid->f2ntv);
290:   PetscFree(user.grid->ivnode);
291:   PetscFree(user.grid->vxn);
292:   PetscFree(user.grid->vyn);
293:   PetscFree(user.grid->vzn);
294:   PetscFree(user.grid->va);
295:   PetscFree(user.grid->vface_bit);

297:   PetscFree(user.grid->nftet);
298:   PetscFree(user.grid->nfpts);
299:   PetscFree(user.grid->f2ntf);
300:   PetscFree(user.grid->ifnode);
301:   PetscFree(user.grid->fxn);
302:   PetscFree(user.grid->fyn);
303:   PetscFree(user.grid->fzn);
304:   PetscFree(user.grid->fa);
305:   PetscFree(user.grid->cdt);
306:   PetscFree(user.grid->phi);
307:   PetscFree(user.grid->rxy);

309:   PetscPrintf(comm,"Time taken in gradient calculation %g sec.\n",grad_time);

311:   PetscFinalize();
312:   return 0;
313: }

315: /*---------------------------------------------------------------------*/
316: /* ---------------------  Form initial approximation ----------------- */
317: int FormInitialGuess(SNES snes,GRID *grid)
318: /*---------------------------------------------------------------------*/
319: {
320:   int         ierr;
321:   PetscScalar *qnode;

323:   VecGetArray(grid->qnode,&qnode);
324:   f77INIT(&grid->nnodesLoc,qnode,grid->turbre,grid->amut,&grid->nvnodeLoc,grid->ivnode,&rank);
325:   VecRestoreArray(grid->qnode,&qnode);
326:   return 0;
327: }

329: /*---------------------------------------------------------------------*/
330: /* ---------------------  Evaluate Function F(x) --------------------- */
331: int FormFunction(SNES snes,Vec x,Vec f,void *dummy)
332: /*---------------------------------------------------------------------*/
333: {
334:   AppCtx      *user  = (AppCtx*) dummy;
335:   GRID        *grid  = user->grid;
336:   TstepCtx    *tsCtx = user->tsCtx;
337:   PetscScalar *qnode,*res,*qold;
338:   PetscScalar *grad;
339:   PetscScalar temp;
340:   VecScatter  scatter     = grid->scatter;
341:   VecScatter  gradScatter = grid->gradScatter;
342:   Vec         localX      = grid->qnodeLoc;
343:   Vec         localGrad   = grid->gradLoc;
344:   int         i,j,in,ierr;
345:   int         nbface,ires;
346:   PetscScalar time_ini,time_fin;

348:   /* Get X into the local work vector */
349:   VecScatterBegin(scatter,x,localX,INSERT_VALUES,SCATTER_FORWARD);
350:   VecScatterEnd(scatter,x,localX,INSERT_VALUES,SCATTER_FORWARD);
351:   /* VecCopy(x,localX); */
352:   /* access the local work f,grad,and input */
353:   VecGetArray(f,&res);
354:   VecGetArray(grid->grad,&grad);
355:   VecGetArray(localX,&qnode);
356:   ires = tsCtx->ires;

358:   PetscTime(&time_ini);
359:   f77LSTGS(&grid->nnodesLoc,&grid->nedgeLoc,grid->eptr,qnode,grad,grid->xyz,grid->rxy,
360:            &rank,&grid->nvertices);
361:   PetscTime(&time_fin);
362:   grad_time += time_fin - time_ini;
363:   VecRestoreArray(grid->grad,&grad);

365:   VecScatterBegin(gradScatter,grid->grad,localGrad,INSERT_VALUES,SCATTER_FORWARD);
366:   VecScatterEnd(gradScatter,grid->grad,localGrad,INSERT_VALUES,SCATTER_FORWARD);
367:   /*VecCopy(grid->grad,localGrad);*/

369:   VecGetArray(localGrad,&grad);
370:   nbface = grid->nsface + grid->nvface + grid->nfface;
371:   f77GETRES(&grid->nnodesLoc,&grid->ncell,  &grid->nedgeLoc,  &grid->nsface,
372:             &grid->nvface,&grid->nfface, &nbface,
373:             &grid->nsnodeLoc,&grid->nvnodeLoc, &grid->nfnodeLoc,
374:             grid->isface, grid->ivface,  grid->ifface, &grid->ileast,
375:             grid->isnode, grid->ivnode,  grid->ifnode,
376:             &grid->nnfacetLoc,grid->f2ntn,  &grid->nnbound,
377:             &grid->nvfacetLoc,grid->f2ntv,  &grid->nvbound,
378:             &grid->nffacetLoc,grid->f2ntf,  &grid->nfbound,
379:             grid->eptr,
380:             grid->sxn,    grid->syn,     grid->szn,
381:             grid->vxn,    grid->vyn,     grid->vzn,
382:             grid->fxn,    grid->fyn,     grid->fzn,
383:             grid->xyzn,
384:             qnode,        grid->cdt,
385:             grid->xyz,    grid->area,
386:             grad, res,
387:             grid->turbre,
388:             grid->slen,   grid->c2n,
389:             grid->c2e,
390:             grid->us,     grid->vs,      grid->as,
391:             grid->phi,
392:             grid->amut,   &ires,
393: #if defined(_OPENMP)
394:             &max_threads,
395: #if defined(HAVE_EDGE_COLORING)
396:             &grid->ncolor, grid->ncount,
397: #elif defined(HAVE_REDUNDANT_WORK)
398:             grid->resd,
399: #else
400:             &grid->nedgeAllThr,
401:             grid->part_thr,grid->nedge_thr,grid->edge_thr,grid->xyzn_thr,
402: #endif
403: #endif
404:             &tsCtx->LocalTimeStepping,&rank,&grid->nvertices);

406: /* Add the contribution due to time stepping */
407:   if (ires == 1) {
408:     VecGetArray(tsCtx->qold,&qold);
409: #if defined(INTERLACING)
410:     for (i = 0; i < grid->nnodesLoc; i++) {
411:       temp = grid->area[i]/(tsCtx->cfl*grid->cdt[i]);
412:       for (j = 0; j < 4; j++) {
413:         in       = 4*i + j;
414:         res[in] += temp*(qnode[in] - qold[in]);
415:       }
416:     }
417: #else
418:     for (j = 0; j < 4; j++) {
419:       for (i = 0; i < grid->nnodesLoc; i++) {
420:         temp     = grid->area[i]/(tsCtx->cfl*grid->cdt[i]);
421:         in       = grid->nnodesLoc*j + i;
422:         res[in] += temp*(qnode[in] - qold[in]);
423:       }
424:     }
425: #endif
426:     VecRestoreArray(tsCtx->qold,&qold);
427:   }
428:   VecRestoreArray(localX,&qnode);
429:   VecRestoreArray(f,&res);
430:   VecRestoreArray(localGrad,&grad);
431:   return 0;
432: }

434: /*---------------------------------------------------------------------*/
435: /* --------------------  Evaluate Jacobian F'(x) -------------------- */

437: int FormJacobian(SNES snes,Vec x,Mat Jac,Mat pc_mat,void *dummy)
438: /*---------------------------------------------------------------------*/
439: {
440:   AppCtx      *user  = (AppCtx*) dummy;
441:   GRID        *grid  = user->grid;
442:   TstepCtx    *tsCtx = user->tsCtx;
443:   Vec         localX = grid->qnodeLoc;
444:   PetscScalar *qnode;
445:   int         ierr;

447:   /*  VecScatterBegin(scatter,x,localX,INSERT_VALUES,SCATTER_FORWARD);
448:       VecScatterEnd(scatter,x,localX,INSERT_VALUES,SCATTER_FORWARD); */
449:   /* VecCopy(x,localX); */
450:   MatSetUnfactored(pc_mat);

452:   VecGetArray(localX,&qnode);
453:   f77FILLA(&grid->nnodesLoc,&grid->nedgeLoc,grid->eptr,
454:            &grid->nsface,
455:             grid->isface,grid->fxn,grid->fyn,grid->fzn,
456:             grid->sxn,grid->syn,grid->szn,
457:            &grid->nsnodeLoc,&grid->nvnodeLoc,&grid->nfnodeLoc,grid->isnode,
458:             grid->ivnode,grid->ifnode,qnode,&pc_mat,grid->cdt,
459:             grid->area,grid->xyzn,&tsCtx->cfl,
460:            &rank,&grid->nvertices);
461:   VecRestoreArray(localX,&qnode);
462:   MatAssemblyBegin(Jac,MAT_FINAL_ASSEMBLY);
463:   MatAssemblyEnd(Jac,MAT_FINAL_ASSEMBLY);
464: #if defined(MATRIX_VIEW)
465:   if ((tsCtx->itstep != 0) &&(tsCtx->itstep % tsCtx->print_freq) == 0) {
466:     PetscViewer viewer;
467:     char mat_file[PETSC_MAX_PATH_LEN];
468:     sprintf(mat_file,"mat_bin.%d",tsCtx->itstep);
469:     PetscViewerBinaryOpen(MPI_COMM_WORLD,mat_file,FILE_MODE_WRITE,&viewer);
470:     MatView(pc_mat,viewer);
471:     PetscViewerDestroy(&viewer);
472:   }
473: #endif
474:   return 0;
475: }

477: /*---------------------------------------------------------------------*/
478: int Update(SNES snes,void *ctx)
479: /*---------------------------------------------------------------------*/
480: {
481:   AppCtx      *user   = (AppCtx*) ctx;
482:   GRID        *grid   = user->grid;
483:   TstepCtx    *tsCtx  = user->tsCtx;
484:   VecScatter  scatter = grid->scatter;
485:   Vec         localX  = grid->qnodeLoc;
486:   PetscScalar *qnode,*res;
487:   PetscScalar clift,cdrag,cmom;
488:   int         ierr,its;
489:   PetscScalar fratio;
490:   PetscScalar time1,time2,cpuloc,cpuglo;
491:   int         max_steps;
492:   PetscBool   print_flag = PETSC_FALSE;
493:   FILE        *fptr      = 0;
494:   int         nfailsCum  = 0,nfails = 0;
495:   /*Scalar         cpu_ini,cpu_fin,cpu_time;*/
496:   /*int            event0 = 14,event1 = 25,gen_start,gen_read;
497:   PetscScalar    time_start_counters,time_read_counters;
498:   long long      counter0,counter1;*/

500:   PetscOptionsGetBool(NULL,NULL,"-print",&print_flag,NULL);
501:   if (print_flag) {
502:     PetscFOpen(PETSC_COMM_WORLD,"history.out","w",&fptr);
503:     PetscFPrintf(PETSC_COMM_WORLD,fptr,"VARIABLES = iter,cfl,fnorm,clift,cdrag,cmom,cpu\n");
504:   }
505:   if (user->PreLoading) max_steps = 1;
506:   else max_steps = tsCtx->max_steps;
507:   fratio = 1.0;
508:   /*tsCtx->ptime = 0.0;*/
509:   VecCopy(grid->qnode,tsCtx->qold);
510:   PetscTime(&time1);
511: #if defined(PARCH_IRIX64) && defined(USE_HW_COUNTERS)
512:   /* if (!user->PreLoading) {
513:     PetscBool  flg = PETSC_FALSE;
514:     PetscOptionsGetInt(NULL,NULL,"-e0",&event0,&flg);
515:     PetscOptionsGetInt(NULL,NULL,"-e1",&event1,&flg);
516:     PetscTime(&time_start_counters);
517:     if ((gen_start = start_counters(event0,event1)) < 0)
518:     SETERRQ(PETSC_COMM_SELF,1,>"Error in start_counters");
519:   }*/
520: #endif
521:   /*cpu_ini = PetscGetCPUTime();*/
522:   for (tsCtx->itstep = 0; (tsCtx->itstep < max_steps) &&
523:         (fratio <= tsCtx->fnorm_ratio); tsCtx->itstep++) {
524:     ComputeTimeStep(snes,tsCtx->itstep,user);
525:     /*tsCtx->ptime +=  tsCtx->dt;*/

527:     SNESSolve(snes,NULL,grid->qnode);
528:     SNESGetIterationNumber(snes,&its);

530:     SNESGetNonlinearStepFailures(snes,&nfails);
531:     nfailsCum += nfails; nfails = 0;
533:     if (print_flag) {
534:       PetscPrintf(PETSC_COMM_WORLD,"At Time Step %d cfl = %g and fnorm = %g\n",
535:                          tsCtx->itstep,tsCtx->cfl,tsCtx->fnorm);
536:     }
537:     VecCopy(grid->qnode,tsCtx->qold);

539:     c_info->ntt = tsCtx->itstep+1;
540:     PetscTime(&time2);
541:     cpuloc      = time2-time1;
542:     cpuglo      = 0.0;
543:     MPI_Allreduce(&cpuloc,&cpuglo,1,MPIU_REAL,MPIU_MAX,PETSC_COMM_WORLD);
544:     c_info->tot = cpuglo;    /* Total CPU time used upto this time step */

546:     VecScatterBegin(scatter,grid->qnode,localX,INSERT_VALUES,SCATTER_FORWARD);
547:     VecScatterEnd(scatter,grid->qnode,localX,INSERT_VALUES,SCATTER_FORWARD);
548:     /* VecCopy(grid->qnode,localX); */

550:     VecGetArray(grid->res,&res);
551:     VecGetArray(localX,&qnode);

553:     f77FORCE(&grid->nnodesLoc,&grid->nedgeLoc,
554:               grid->isnode, grid->ivnode,
555:              &grid->nnfacetLoc,grid->f2ntn,&grid->nnbound,
556:              &grid->nvfacetLoc,grid->f2ntv,&grid->nvbound,
557:               grid->eptr,   qnode,
558:               grid->xyz,
559:               grid->sface_bit,grid->vface_bit,
560:               &clift,&cdrag,&cmom,&rank,&grid->nvertices);
561:     if (print_flag) {
562:       PetscPrintf(PETSC_COMM_WORLD,"%d\t%g\t%g\t%g\t%g\t%g\n",tsCtx->itstep,
563:                         tsCtx->cfl,tsCtx->fnorm,clift,cdrag,cmom);
564:       PetscPrintf(PETSC_COMM_WORLD,"Wall clock time needed %g seconds for %d time steps\n",
565:                         cpuglo,tsCtx->itstep);
566:       PetscFPrintf(PETSC_COMM_WORLD,fptr,"%d\t%g\t%g\t%g\t%g\t%g\t%g\n",
567:                           tsCtx->itstep,tsCtx->cfl,tsCtx->fnorm,clift,cdrag,cmom,cpuglo);
568:     }
569:     VecRestoreArray(localX,&qnode);
570:     VecRestoreArray(grid->res,&res);
571:     fratio = tsCtx->fnorm_ini/tsCtx->fnorm;
572:     MPI_Barrier(PETSC_COMM_WORLD);

574:   } /* End of time step loop */

576: #if defined(PARCH_IRIX64) && defined(USE_HW_COUNTERS)
577:   if (!user->PreLoading) {
578:     int  eve0,eve1;
579:     FILE *cfp0,*cfp1;
580:     char str[256];
581:     /* if ((gen_read = read_counters(event0,&counter0,event1,&counter1)) < 0)
582:     SETERRQ(PETSC_COMM_SELF,1,"Error in read_counter");
583:     PetscTime(&time_read_counters);
584:     if (gen_read != gen_start) {
585:     SETERRQ(PETSC_COMM_SELF,1,"Lost Counters!! Aborting ...");
586:     }*/
587:     /*sprintf(str,"counters%d_and_%d",event0,event1);
588:     cfp0 = fopen(str,"a");*/
589:     /*print_counters(event0,counter0,event1,counter1);*/
590:     /*fprintf(cfp0,"%lld %lld %g\n",counter0,counter1,
591:                   time_counters);
592:     fclose(cfp0);*/
593:   }
594: #endif
595:   PetscPrintf(PETSC_COMM_WORLD,"Total wall clock time needed %g seconds for %d time steps\n",
596:                      cpuglo,tsCtx->itstep);
597:   PetscPrintf(PETSC_COMM_WORLD,"cfl = %g fnorm = %g\n",tsCtx->cfl,tsCtx->fnorm);
598:   PetscPrintf(PETSC_COMM_WORLD,"clift = %g cdrag = %g cmom = %g\n",clift,cdrag,cmom);

600:   if (rank == 0 && print_flag) fclose(fptr);
601:   if (user->PreLoading) {
602:     tsCtx->fnorm_ini = 0.0;
603:     PetscPrintf(PETSC_COMM_WORLD,"Preloading done ...\n");
604:   }
605:   return 0;
606: }

608: /*---------------------------------------------------------------------*/
609: int ComputeTimeStep(SNES snes,int iter,void *ctx)
610: /*---------------------------------------------------------------------*/
611: {
612:   AppCtx      *user  = (AppCtx*) ctx;
613:   TstepCtx    *tsCtx = user->tsCtx;
614:   Vec         func   = tsCtx->func;
615:   PetscScalar inc    = 1.1;
616:   PetscScalar newcfl;
617:   int         ierr;
618:   /*int       iramp = tsCtx->iramp;*/

620:   tsCtx->ires = 0;
621:   FormFunction(snes,tsCtx->qold,func,user);
622:   tsCtx->ires = 1;
623:   VecNorm(func,NORM_2,&tsCtx->fnorm);
624:   /* first time through so compute initial function norm */
625:   if (tsCtx->fnorm_ini == 0.0) {
626:     tsCtx->fnorm_ini = tsCtx->fnorm;
627:     tsCtx->cfl       = tsCtx->cfl_ini;
628:   } else {
629:     newcfl     = inc*tsCtx->cfl_ini*tsCtx->fnorm_ini/tsCtx->fnorm;
630:     tsCtx->cfl = PetscMin(newcfl,tsCtx->cfl_max);
631:   }

633:   /* if (iramp < 0) {
634:    newcfl = inc*tsCtx->cfl_ini*tsCtx->fnorm_ini/tsCtx->fnorm;
635:   } else {
636:    if (tsCtx->dt < 0 && iramp > 0)
637:     if (iter > iramp) newcfl = tsCtx->cfl_max;
638:     else newcfl = tsCtx->cfl_ini + (tsCtx->cfl_max - tsCtx->cfl_ini)*
639:                                 (double) iter/(double) iramp;
640:   }
641:   tsCtx->cfl = MIN(newcfl,tsCtx->cfl_max);*/
642:   /*printf("In ComputeTime Step - fnorm is %f\n",tsCtx->fnorm);*/
643:   /*VecDestroy(&func);*/
644:   return 0;
645: }

647: /*---------------------------------------------------------------------*/
648: int GetLocalOrdering(GRID *grid)
649: /*---------------------------------------------------------------------*/
650: {
651:   int         ierr,i,j,k,inode,isurf,nte,nb,node1,node2,node3;
652:   int         nnodes,nedge,nnz,jstart,jend;
653:   int         nnodesLoc,nvertices,nedgeLoc,nnodesLocEst;
654:   int         nedgeLocEst,remEdges,readEdges,remNodes,readNodes;
655:   int         nnfacet,nvfacet,nffacet;
656:   int         nnfacetLoc,nvfacetLoc,nffacetLoc;
657:   int         nsnode,nvnode,nfnode;
658:   int         nsnodeLoc,nvnodeLoc,nfnodeLoc;
659:   int         nnbound,nvbound,nfbound;
660:   int         bs = 4;
661:   int         fdes = 0;
662:   off_t       currentPos  = 0,newPos = 0;
663:   int         grid_param  = 13;
664:   int         cross_edges = 0;
665:   int         *edge_bit,*pordering;
666:   int         *l2p,*l2a,*p2l,*a2l,*v2p,*eperm;
667:   int         *tmp,*tmp1,*tmp2;
668:   PetscScalar time_ini,time_fin;
669:   PetscScalar *ftmp,*ftmp1;
670:   char        mesh_file[PETSC_MAX_PATH_LEN] = "";
671:   AO          ao;
672:   FILE        *fptr,*fptr1;
673:   PetscBool   flg;
674:   MPI_Comm    comm = PETSC_COMM_WORLD;

676:   /* Read the integer grid parameters */
677:   ICALLOC(grid_param,&tmp);
678:   if (rank == 0) {
679:     PetscBool exists;
680:     PetscOptionsGetString(NULL,NULL,"-mesh",mesh_file,sizeof(mesh_file),&flg);
681:     PetscTestFile(mesh_file,'r',&exists);
682:     if (!exists) { /* try uns3d.msh as the file name */
683:       PetscStrcpy(mesh_file,"uns3d.msh");
684:     }
685:     PetscBinaryOpen(mesh_file,FILE_MODE_READ,&fdes);
686:   }
687:   PetscBinarySynchronizedRead(comm,fdes,tmp,grid_param,PETSC_INT);
688:   grid->ncell   = tmp[0];
689:   grid->nnodes  = tmp[1];
690:   grid->nedge   = tmp[2];
691:   grid->nnbound = tmp[3];
692:   grid->nvbound = tmp[4];
693:   grid->nfbound = tmp[5];
694:   grid->nnfacet = tmp[6];
695:   grid->nvfacet = tmp[7];
696:   grid->nffacet = tmp[8];
697:   grid->nsnode  = tmp[9];
698:   grid->nvnode  = tmp[10];
699:   grid->nfnode  = tmp[11];
700:   grid->ntte    = tmp[12];
701:   grid->nsface  = 0;
702:   grid->nvface  = 0;
703:   grid->nfface  = 0;
704:   PetscFree(tmp);
705:   PetscPrintf(comm,"nnodes = %d,nedge = %d,nnfacet = %d,nsnode = %d,nfnode = %d\n",
706:                               grid->nnodes,grid->nedge,grid->nnfacet,grid->nsnode,grid->nfnode);

708:   nnodes  = grid->nnodes;
709:   nedge   = grid->nedge;
710:   nnfacet = grid->nnfacet;
711:   nvfacet = grid->nvfacet;
712:   nffacet = grid->nffacet;
713:   nnbound = grid->nnbound;
714:   nvbound = grid->nvbound;
715:   nfbound = grid->nfbound;
716:   nsnode  = grid->nsnode;
717:   nvnode  = grid->nvnode;
718:   nfnode  = grid->nfnode;

720:   /* Read the partitioning vector generated by MeTiS */
721:   ICALLOC(nnodes,&l2a);
722:   ICALLOC(nnodes,&v2p);
723:   ICALLOC(nnodes,&a2l);
724:   nnodesLoc = 0;

726:   for (i = 0; i < nnodes; i++) a2l[i] = -1;
727:   PetscTime(&time_ini);

729:   if (rank == 0) {
730:     if (size == 1) {
731:       PetscMemzero(v2p,nnodes*sizeof(int));
732:     } else {
733:       char      spart_file[PETSC_MAX_PATH_LEN],part_file[PETSC_MAX_PATH_LEN];
734:       PetscBool exists;

736:       PetscOptionsGetString(NULL,NULL,"-partition",spart_file,sizeof(spart_file),&flg);
737:       PetscTestFile(spart_file,'r',&exists);
738:       if (!exists) { /* try appending the number of processors */
739:         sprintf(part_file,"part_vec.part.%d",size);
740:         PetscStrcpy(spart_file,part_file);
741:       }
742:       fptr = fopen(spart_file,"r");
744:       for (inode = 0; inode < nnodes; inode++) {
745:         fscanf(fptr,"%d\n",&node1);
746:         v2p[inode] = node1;
747:       }
748:       fclose(fptr);
749:     }
750:   }
751:   MPI_Bcast(v2p,nnodes,MPI_INT,0,comm);
752:   for (inode = 0; inode < nnodes; inode++) {
753:     if (v2p[inode] == rank) {
754:       l2a[nnodesLoc] = inode;
755:       a2l[inode]     = nnodesLoc;
756:       nnodesLoc++;
757:     }
758:   }

760:   PetscTime(&time_fin);
761:   time_fin -= time_ini;
762:   PetscPrintf(comm,"Partition Vector read successfully\n");
763:   PetscPrintf(comm,"Time taken in this phase was %g\n",time_fin);

765:   MPI_Scan(&nnodesLoc,&rstart,1,MPI_INT,MPI_SUM,comm);
766:   rstart -= nnodesLoc;
767:   ICALLOC(nnodesLoc,&pordering);
768:   for (i=0; i < nnodesLoc; i++) pordering[i] = rstart + i;
769:   AOCreateBasic(comm,nnodesLoc,l2a,pordering,&ao);
770:   PetscFree(pordering);

772:   /* Now count the local number of edges - including edges with
773:    ghost nodes but edges between ghost nodes are NOT counted */
774:   nedgeLoc  = 0;
775:   nvertices = nnodesLoc;
776:   /* Choose an estimated number of local edges. The choice
777:    nedgeLocEst = 1000000 looks reasonable as it will read
778:    the edge and edge normal arrays in 8 MB chunks */
779:   /*nedgeLocEst = nedge/size;*/
780:   nedgeLocEst = PetscMin(nedge,1000000);
781:   remEdges    = nedge;
782:   ICALLOC(2*nedgeLocEst,&tmp);
783:   PetscBinarySynchronizedSeek(comm,fdes,0,PETSC_BINARY_SEEK_CUR,&currentPos);
784:   PetscTime(&time_ini);
785:   while (remEdges > 0) {
786:     readEdges = PetscMin(remEdges,nedgeLocEst);
787:     /*time_ini = PetscTime();*/
788:     PetscBinarySynchronizedRead(comm,fdes,tmp,readEdges,PETSC_INT);
789:     PetscBinarySynchronizedSeek(comm,fdes,(nedge-readEdges)*PETSC_BINARY_INT_SIZE,PETSC_BINARY_SEEK_CUR,&newPos);
790:     PetscBinarySynchronizedRead(comm,fdes,tmp+readEdges,readEdges,PETSC_INT);
791:     PetscBinarySynchronizedSeek(comm,fdes,-nedge*PETSC_BINARY_INT_SIZE,PETSC_BINARY_SEEK_CUR,&newPos);
792:     /*time_fin += PetscTime()-time_ini;*/
793:     for (j = 0; j < readEdges; j++) {
794:       node1 = tmp[j]-1;
795:       node2 = tmp[j+readEdges]-1;
796:       if ((v2p[node1] == rank) || (v2p[node2] == rank)) {
797:         nedgeLoc++;
798:         if (a2l[node1] == -1) {
799:           l2a[nvertices] = node1;
800:           a2l[node1]     = nvertices;
801:           nvertices++;
802:         }
803:         if (a2l[node2] == -1) {
804:           l2a[nvertices] = node2;
805:           a2l[node2]     = nvertices;
806:           nvertices++;
807:         }
808:       }
809:     }
810:     remEdges = remEdges - readEdges;
811:     MPI_Barrier(comm);
812:   }
813:   PetscTime(&time_fin);
814:   time_fin -= time_ini;
815:   PetscPrintf(comm,"Local edges counted with MPI_Bcast %d\n",nedgeLoc);
816:   PetscPrintf(comm,"Local vertices counted %d\n",nvertices);
817:   PetscPrintf(comm,"Time taken in this phase was %g\n",time_fin);

819:   /* Now store the local edges */
820:   ICALLOC(2*nedgeLoc,&grid->eptr);
821:   ICALLOC(nedgeLoc,&edge_bit);
822:   ICALLOC(nedgeLoc,&eperm);
823:   i = 0; j = 0; k = 0;
824:   remEdges   = nedge;
825:   PetscBinarySynchronizedSeek(comm,fdes,currentPos,PETSC_BINARY_SEEK_SET,&newPos);
826:   currentPos = newPos;

828:   PetscTime(&time_ini);
829:   while (remEdges > 0) {
830:     readEdges = PetscMin(remEdges,nedgeLocEst);
831:     PetscBinarySynchronizedRead(comm,fdes,tmp,readEdges,PETSC_INT);
832:     PetscBinarySynchronizedSeek(comm,fdes,(nedge-readEdges)*PETSC_BINARY_INT_SIZE,PETSC_BINARY_SEEK_CUR,&newPos);
833:     PetscBinarySynchronizedRead(comm,fdes,tmp+readEdges,readEdges,PETSC_INT);
834:     PetscBinarySynchronizedSeek(comm,fdes,-nedge*PETSC_BINARY_INT_SIZE,PETSC_BINARY_SEEK_CUR,&newPos);
835:     for (j = 0; j < readEdges; j++) {
836:       node1 = tmp[j]-1;
837:       node2 = tmp[j+readEdges]-1;
838:       if ((v2p[node1] == rank) || (v2p[node2] == rank)) {
839:         grid->eptr[k]          = a2l[node1];
840:         grid->eptr[k+nedgeLoc] = a2l[node2];
841:         edge_bit[k]            = i; /* Record global file index of the edge */
842:         eperm[k]               = k;
843:         k++;
844:       }
845:       i++;
846:     }
847:     remEdges = remEdges - readEdges;
848:     MPI_Barrier(comm);
849:   }
850:   PetscBinarySynchronizedSeek(comm,fdes,currentPos+2*nedge*PETSC_BINARY_INT_SIZE,PETSC_BINARY_SEEK_SET,&newPos);
851:   PetscTime(&time_fin);
852:   time_fin -= time_ini;
853:   PetscPrintf(comm,"Local edges stored\n");
854:   PetscPrintf(comm,"Time taken in this phase was %g\n",time_fin);

856:   PetscFree(tmp);
857:   ICALLOC(2*nedgeLoc,&tmp);
858:   PetscMemcpy(tmp,grid->eptr,2*nedgeLoc*sizeof(int));
859: #if defined(_OPENMP) && defined(HAVE_EDGE_COLORING)
860:   EdgeColoring(nvertices,nedgeLoc,grid->eptr,eperm,&grid->ncolor,grid->ncount);
861: #else
862:   /* Now reorder the edges for better cache locality */
863:   /*
864:   tmp[0]=7;tmp[1]=6;tmp[2]=3;tmp[3]=9;tmp[4]=2;tmp[5]=0;
865:   PetscSortIntWithPermutation(6,tmp,eperm);
866:   for (i=0; i<6; i++)
867:    printf("%d %d %d\n",i,tmp[i],eperm[i]);
868:   */
869:   flg  = PETSC_FALSE;
870:   PetscOptionsGetBool(0,"-no_edge_reordering",&flg,NULL);
871:   if (!flg) {
872:     PetscSortIntWithPermutation(nedgeLoc,tmp,eperm);
873:   }
874: #endif
875:   PetscMallocValidate(__LINE__,PETSC_FUNCTION_NAME,__FILE__);
876:   k    = 0;
877:   for (i = 0; i < nedgeLoc; i++) {
878:     int cross_node=nnodesLoc/2;
879:     node1 = tmp[eperm[i]] + 1;
880:     node2 = tmp[nedgeLoc+eperm[i]] + 1;
881: #if defined(INTERLACING)
882:     grid->eptr[k++] = node1;
883:     grid->eptr[k++] = node2;
884: #else
885:     grid->eptr[i]          = node1;
886:     grid->eptr[nedgeLoc+i] = node2;
887: #endif
888:     /* if (node1 > node2)
889:      printf("On processor %d, for edge %d node1 = %d, node2 = %d\n",
890:             rank,i,node1,node2);*/
891:     if ((node1 <= cross_node) && (node2 > cross_node)) cross_edges++;
892:   }
893:   PetscPrintf(comm,"Number of cross edges %d\n", cross_edges);
894:   PetscFree(tmp);
895: #if defined(_OPENMP) && !defined(HAVE_REDUNDANT_WORK) && !defined(HAVE_EDGE_COLORING)
896:   /* Now make the local 'ia' and 'ja' arrays */
897:   ICALLOC(nvertices+1,&grid->ia);
898:   /* Use tmp for a work array */
899:   ICALLOC(nvertices,&tmp);
900:   f77GETIA(&nvertices,&nedgeLoc,grid->eptr,grid->ia,tmp,&rank);
901:   nnz = grid->ia[nvertices] - 1;
902:   ICALLOC(nnz,&grid->ja);
903:   f77GETJA(&nvertices,&nedgeLoc,grid->eptr,grid->ia,grid->ja,tmp,&rank);
904:   PetscFree(tmp);
905: #else
906:   /* Now make the local 'ia' and 'ja' arrays */
907:   ICALLOC(nnodesLoc+1,&grid->ia);
908:   /* Use tmp for a work array */
909:   ICALLOC(nnodesLoc,&tmp);
910:   f77GETIA(&nnodesLoc,&nedgeLoc,grid->eptr,grid->ia,tmp,&rank);
911:   nnz = grid->ia[nnodesLoc] - 1;
912: #if defined(BLOCKING)
913:   PetscPrintf(comm,"The Jacobian has %d non-zero blocks with block size = %d\n",nnz,bs);
914: #else
915:   PetscPrintf(comm,"The Jacobian has %d non-zeros\n",nnz);
916: #endif
917:   ICALLOC(nnz,&grid->ja);
918:   f77GETJA(&nnodesLoc,&nedgeLoc,grid->eptr,grid->ia,grid->ja,tmp,&rank);
919:   PetscFree(tmp);
920: #endif
921:   ICALLOC(nvertices,&grid->loc2glo);
922:   PetscMemcpy(grid->loc2glo,l2a,nvertices*sizeof(int));
923:   PetscFree(l2a);
924:   l2a  = grid->loc2glo;
925:   ICALLOC(nvertices,&grid->loc2pet);
926:   l2p  = grid->loc2pet;
927:   PetscMemcpy(l2p,l2a,nvertices*sizeof(int));
928:   AOApplicationToPetsc(ao,nvertices,l2p);

930:   /* Renumber unit normals of dual face (from node1 to node2)
931:       and the area of the dual mesh face */
932:   FCALLOC(nedgeLocEst,&ftmp);
933:   FCALLOC(nedgeLoc,&ftmp1);
934:   FCALLOC(4*nedgeLoc,&grid->xyzn);
935:   /* Do the x-component */
936:   i = 0; k = 0;
937:   remEdges = nedge;
938:   PetscTime(&time_ini);
939:   while (remEdges > 0) {
940:     readEdges = PetscMin(remEdges,nedgeLocEst);
941:     PetscBinarySynchronizedRead(comm,fdes,ftmp,readEdges,PETSC_SCALAR);
942:     for (j = 0; j < readEdges; j++)
943:       if (edge_bit[k] == (i+j)) {
944:         ftmp1[k] = ftmp[j];
945:         k++;
946:       }
947:     i       += readEdges;
948:     remEdges = remEdges - readEdges;
949:     MPI_Barrier(comm);
950:   }
951:   for (i = 0; i < nedgeLoc; i++)
952: #if defined(INTERLACING)
953:     grid->xyzn[4*i] = ftmp1[eperm[i]];
954: #else
955:     grid->xyzn[i] = ftmp1[eperm[i]];
956: #endif
957:   /* Do the y-component */
958:   i = 0; k = 0;
959:   remEdges = nedge;
960:   while (remEdges > 0) {
961:     readEdges = PetscMin(remEdges,nedgeLocEst);
962:     PetscBinarySynchronizedRead(comm,fdes,ftmp,readEdges,PETSC_SCALAR);
963:     for (j = 0; j < readEdges; j++)
964:       if (edge_bit[k] == (i+j)) {
965:         ftmp1[k] = ftmp[j];
966:         k++;
967:       }
968:     i       += readEdges;
969:     remEdges = remEdges - readEdges;
970:     MPI_Barrier(comm);
971:   }
972:   for (i = 0; i < nedgeLoc; i++)
973: #if defined(INTERLACING)
974:     grid->xyzn[4*i+1] = ftmp1[eperm[i]];
975: #else
976:     grid->xyzn[nedgeLoc+i] = ftmp1[eperm[i]];
977: #endif
978:   /* Do the z-component */
979:   i = 0; k = 0;
980:   remEdges = nedge;
981:   while (remEdges > 0) {
982:     readEdges = PetscMin(remEdges,nedgeLocEst);
983:     PetscBinarySynchronizedRead(comm,fdes,ftmp,readEdges,PETSC_SCALAR);
984:     for (j = 0; j < readEdges; j++)
985:       if (edge_bit[k] == (i+j)) {
986:         ftmp1[k] = ftmp[j];
987:         k++;
988:       }
989:     i       += readEdges;
990:     remEdges = remEdges - readEdges;
991:     MPI_Barrier(comm);
992:   }
993:   for (i = 0; i < nedgeLoc; i++)
994: #if defined(INTERLACING)
995:     grid->xyzn[4*i+2] = ftmp1[eperm[i]];
996: #else
997:     grid->xyzn[2*nedgeLoc+i] = ftmp1[eperm[i]];
998: #endif
999:   /* Do the area */
1000:   i = 0; k = 0;
1001:   remEdges = nedge;
1002:   while (remEdges > 0) {
1003:     readEdges = PetscMin(remEdges,nedgeLocEst);
1004:     PetscBinarySynchronizedRead(comm,fdes,ftmp,readEdges,PETSC_SCALAR);
1005:     for (j = 0; j < readEdges; j++)
1006:       if (edge_bit[k] == (i+j)) {
1007:         ftmp1[k] = ftmp[j];
1008:         k++;
1009:       }
1010:     i       += readEdges;
1011:     remEdges = remEdges - readEdges;
1012:     MPI_Barrier(comm);
1013:   }
1014:   for (i = 0; i < nedgeLoc; i++)
1015: #if defined(INTERLACING)
1016:     grid->xyzn[4*i+3] = ftmp1[eperm[i]];
1017: #else
1018:     grid->xyzn[3*nedgeLoc+i] = ftmp1[eperm[i]];
1019: #endif

1021:   PetscFree(edge_bit);
1022:   PetscFree(eperm);
1023:   PetscFree(ftmp);
1024:   PetscFree(ftmp1);
1025:   PetscTime(&time_fin);
1026:   time_fin -= time_ini;
1027:   PetscPrintf(comm,"Edge normals partitioned\n");
1028:   PetscPrintf(comm,"Time taken in this phase was %g\n",time_fin);
1029: #if defined(_OPENMP)
1030:   /*Arrange for the division of work among threads*/
1031: #if defined(HAVE_EDGE_COLORING)
1032: #elif defined(HAVE_REDUNDANT_WORK)
1033:   FCALLOC(4*nnodesLoc,   &grid->resd);
1034: #else
1035:   {
1036:     /* Get the local adjacency structure of the graph for partitioning the local
1037:       graph into max_threads pieces */
1038:     int *ia,*ja,*vwtg=0,*adjwgt=0,options[5];
1039:     int numflag = 0, wgtflag = 0, edgecut;
1040:     int thr1,thr2,nedgeAllThreads,ned1,ned2;
1041:     ICALLOC((nvertices+1),&ia);
1042:     ICALLOC((2*nedgeLoc),&ja);
1043:     ia[0] = 0;
1044:     for (i = 1; i <= nvertices; i++) ia[i] = grid->ia[i]-i-1;
1045:     for (i = 0; i < nvertices; i++) {
1046:       int jstart,jend;
1047:       jstart = grid->ia[i]-1;
1048:       jend   = grid->ia[i+1]-1;
1049:       k      = ia[i];
1050:       for (j=jstart; j < jend; j++) {
1051:         inode = grid->ja[j]-1;
1052:         if (inode != i) ja[k++] = inode;
1053:       }
1054:     }
1055:     ICALLOC(nvertices,&grid->part_thr);
1056:     PetscMemzero(grid->part_thr,nvertices*sizeof(int));
1057:     options[0] = 0;
1058:     /* Call the pmetis library routine */
1059:     if (max_threads > 1)
1060:       METIS_PartGraphRecursive(&nvertices,ia,ja,vwtg,adjwgt,
1061:                                &wgtflag,&numflag,&max_threads,options,&edgecut,grid->part_thr);
1062:     PetscPrintf(MPI_COMM_WORLD,"The number of cut edges is %d\n", edgecut);
1063:     /* Write the partition vector to disk */
1064:     flg  = PETSC_FALSE;
1065:     PetscOptionsGetBool(0,"-omp_partitioning",&flg,NULL);
1066:     if (flg) {
1067:       int  *partv_loc, *partv_glo;
1068:       int  *disp,*counts,*loc2glo_glo;
1069:       char part_file[PETSC_MAX_PATH_LEN];
1070:       FILE *fp;

1072:       ICALLOC(nnodes, &partv_glo);
1073:       ICALLOC(nnodesLoc, &partv_loc);
1074:       for (i = 0; i < nnodesLoc; i++)
1075:         /*partv_loc[i] = grid->part_thr[i]*size + rank;*/
1076:         partv_loc[i] = grid->part_thr[i] + max_threads*rank;
1077:       ICALLOC(size,&disp);
1078:       ICALLOC(size,&counts);
1079:       MPI_Allgather(&nnodesLoc,1,MPI_INT,counts,1,MPI_INT,MPI_COMM_WORLD);
1080:       disp[0] = 0;
1081:       for (i = 1; i < size; i++) disp[i] = counts[i-1] + disp[i-1];
1082:       ICALLOC(nnodes, &loc2glo_glo);
1083:       MPI_Gatherv(grid->loc2glo,nnodesLoc,MPI_INT,loc2glo_glo,counts,disp,MPI_INT,0,MPI_COMM_WORLD);
1084:       MPI_Gatherv(partv_loc,nnodesLoc,MPI_INT,partv_glo,counts,disp,MPI_INT,0,MPI_COMM_WORLD);
1085:       if (rank == 0) {
1086:         PetscSortIntWithArray(nnodes,loc2glo_glo,partv_glo);
1087:         sprintf(part_file,"hyb_part_vec.%d",2*size);
1088:         fp = fopen(part_file,"w");
1089:         for (i = 0; i < nnodes; i++) fprintf(fp,"%d\n",partv_glo[i]);
1090:         fclose(fp);
1091:       }
1092:       PetscFree(partv_loc);
1093:       PetscFree(partv_glo);
1094:       PetscFree(disp);
1095:       PetscFree(counts);
1096:       PetscFree(loc2glo_glo);
1097:     }

1099:     /* Divide the work among threads */
1100:     k = 0;
1101:     ICALLOC((max_threads+1),&grid->nedge_thr);
1102:     PetscMemzero(grid->nedge_thr,(max_threads+1)*sizeof(int));
1103:     cross_edges = 0;
1104:     for (i = 0; i < nedgeLoc; i++) {
1105:       node1 = grid->eptr[k++]-1;
1106:       node2 = grid->eptr[k++]-1;
1107:       thr1  = grid->part_thr[node1];
1108:       thr2  = grid->part_thr[node2];
1109:       grid->nedge_thr[thr1]+=1;
1110:       if (thr1 != thr2) {
1111:         grid->nedge_thr[thr2]+=1;
1112:         cross_edges++;
1113:       }
1114:     }
1115:     PetscPrintf(MPI_COMM_WORLD,"The number of cross edges after Metis partitioning is %d\n",cross_edges);
1116:     ned1 = grid->nedge_thr[0];
1117:     grid->nedge_thr[0] = 1;
1118:     for (i = 1; i <= max_threads; i++) {
1119:       ned2 = grid->nedge_thr[i];
1120:       grid->nedge_thr[i] = grid->nedge_thr[i-1]+ned1;
1121:       ned1 = ned2;
1122:     }
1123:     /* Allocate a shared edge array. Note that a cut edge is evaluated
1124:         by both the threads but updates are done only for the locally
1125:         owned node */
1126:     grid->nedgeAllThr = nedgeAllThreads = grid->nedge_thr[max_threads]-1;
1127:     ICALLOC(2*nedgeAllThreads, &grid->edge_thr);
1128:     ICALLOC(max_threads,&tmp);
1129:     FCALLOC(4*nedgeAllThreads,&grid->xyzn_thr);
1130:     for (i = 0; i < max_threads; i++) tmp[i] = grid->nedge_thr[i]-1;
1131:     k = 0;
1132:     for (i = 0; i < nedgeLoc; i++) {
1133:       int ie1,ie2,ie3;
1134:       node1 = grid->eptr[k++];
1135:       node2 = grid->eptr[k++];
1136:       thr1  = grid->part_thr[node1-1];
1137:       thr2  = grid->part_thr[node2-1];
1138:       ie1   = 2*tmp[thr1];
1139:       ie2   = 4*tmp[thr1];
1140:       ie3   = 4*i;

1142:       grid->edge_thr[ie1]   = node1;
1143:       grid->edge_thr[ie1+1] = node2;
1144:       grid->xyzn_thr[ie2]   = grid->xyzn[ie3];
1145:       grid->xyzn_thr[ie2+1] = grid->xyzn[ie3+1];
1146:       grid->xyzn_thr[ie2+2] = grid->xyzn[ie3+2];
1147:       grid->xyzn_thr[ie2+3] = grid->xyzn[ie3+3];

1149:       tmp[thr1]+=1;
1150:       if (thr1 != thr2) {
1151:         ie1 = 2*tmp[thr2];
1152:         ie2 = 4*tmp[thr2];

1154:         grid->edge_thr[ie1]   = node1;
1155:         grid->edge_thr[ie1+1] = node2;
1156:         grid->xyzn_thr[ie2]   = grid->xyzn[ie3];
1157:         grid->xyzn_thr[ie2+1] = grid->xyzn[ie3+1];
1158:         grid->xyzn_thr[ie2+2] = grid->xyzn[ie3+2];
1159:         grid->xyzn_thr[ie2+3] = grid->xyzn[ie3+3];

1161:         tmp[thr2]+=1;
1162:       }
1163:     }
1164:   }
1165: #endif
1166: #endif

1168:   /* Remap coordinates */
1169:   /*nnodesLocEst = nnodes/size;*/
1170:   nnodesLocEst = PetscMin(nnodes,500000);
1171:   FCALLOC(nnodesLocEst,&ftmp);
1172:   FCALLOC(3*nvertices,&grid->xyz);
1173:   remNodes = nnodes;
1174:   i        = 0;
1175:   PetscTime(&time_ini);
1176:   while (remNodes > 0) {
1177:     readNodes = PetscMin(remNodes,nnodesLocEst);
1178:     PetscBinarySynchronizedRead(comm,fdes,ftmp,readNodes,PETSC_SCALAR);
1179:     for (j = 0; j < readNodes; j++) {
1180:       if (a2l[i+j] >= 0) {
1181: #if defined(INTERLACING)
1182:         grid->xyz[3*a2l[i+j]] = ftmp[j];
1183: #else
1184:         grid->xyz[a2l[i+j]] = ftmp[j];
1185: #endif
1186:       }
1187:     }
1188:     i        += nnodesLocEst;
1189:     remNodes -= nnodesLocEst;
1190:     MPI_Barrier(comm);
1191:   }

1193:   remNodes = nnodes;
1194:   i = 0;
1195:   while (remNodes > 0) {
1196:     readNodes = PetscMin(remNodes,nnodesLocEst);
1197:     PetscBinarySynchronizedRead(comm,fdes,ftmp,readNodes,PETSC_SCALAR);
1198:     for (j = 0; j < readNodes; j++) {
1199:       if (a2l[i+j] >= 0) {
1200: #if defined(INTERLACING)
1201:         grid->xyz[3*a2l[i+j]+1] = ftmp[j];
1202: #else
1203:         grid->xyz[nnodesLoc+a2l[i+j]] = ftmp[j];
1204: #endif
1205:       }
1206:     }
1207:     i        += nnodesLocEst;
1208:     remNodes -= nnodesLocEst;
1209:     MPI_Barrier(comm);
1210:   }

1212:   remNodes = nnodes;
1213:   i        = 0;
1214:   while (remNodes > 0) {
1215:     readNodes = PetscMin(remNodes,nnodesLocEst);
1216:     PetscBinarySynchronizedRead(comm,fdes,ftmp,readNodes,PETSC_SCALAR);
1217:     for (j = 0; j < readNodes; j++) {
1218:       if (a2l[i+j] >= 0) {
1219: #if defined(INTERLACING)
1220:         grid->xyz[3*a2l[i+j]+2] = ftmp[j];
1221: #else
1222:         grid->xyz[2*nnodesLoc+a2l[i+j]] = ftmp[j];
1223: #endif
1224:       }
1225:     }
1226:     i        += nnodesLocEst;
1227:     remNodes -= nnodesLocEst;
1228:     MPI_Barrier(comm);
1229:   }

1231:   /* Renumber dual volume "area" */
1232:   FCALLOC(nvertices,&grid->area);
1233:   remNodes = nnodes;
1234:   i        = 0;
1235:   while (remNodes > 0) {
1236:     readNodes = PetscMin(remNodes,nnodesLocEst);
1237:     PetscBinarySynchronizedRead(comm,fdes,ftmp,readNodes,PETSC_SCALAR);
1238:     for (j = 0; j < readNodes; j++)
1239:       if (a2l[i+j] >= 0)
1240:         grid->area[a2l[i+j]] = ftmp[j];
1241:     i        += nnodesLocEst;
1242:     remNodes -= nnodesLocEst;
1243:     MPI_Barrier(comm);
1244:   }

1246:   PetscFree(ftmp);
1247:   PetscTime(&time_fin);
1248:   time_fin -= time_ini;
1249:   PetscPrintf(comm,"Coordinates remapped\n");
1250:   PetscPrintf(comm,"Time taken in this phase was %g\n",time_fin);

1252: /* Now,handle all the solid boundaries - things to be done :
1253:  * 1. Identify the nodes belonging to the solid
1254:  *    boundaries and count them.
1255:  * 2. Put proper indices into f2ntn array,after making it
1256:  *    of suitable size.
1257:  * 3. Remap the normals and areas of solid faces (sxn,syn,szn,
1258:  *    and sa arrays).
1259:  */
1260:   ICALLOC(nnbound,  &grid->nntet);
1261:   ICALLOC(nnbound,  &grid->nnpts);
1262:   ICALLOC(4*nnfacet,&grid->f2ntn);
1263:   ICALLOC(nsnode,&grid->isnode);
1264:   FCALLOC(nsnode,&grid->sxn);
1265:   FCALLOC(nsnode,&grid->syn);
1266:   FCALLOC(nsnode,&grid->szn);
1267:   FCALLOC(nsnode,&grid->sa);
1268:   PetscBinarySynchronizedRead(comm,fdes,grid->nntet,nnbound,PETSC_INT);
1269:   PetscBinarySynchronizedRead(comm,fdes,grid->nnpts,nnbound,PETSC_INT);
1270:   PetscBinarySynchronizedRead(comm,fdes,grid->f2ntn,4*nnfacet,PETSC_INT);
1271:   PetscBinarySynchronizedRead(comm,fdes,grid->isnode,nsnode,PETSC_INT);
1272:   PetscBinarySynchronizedRead(comm,fdes,grid->sxn,nsnode,PETSC_SCALAR);
1273:   PetscBinarySynchronizedRead(comm,fdes,grid->syn,nsnode,PETSC_SCALAR);
1274:   PetscBinarySynchronizedRead(comm,fdes,grid->szn,nsnode,PETSC_SCALAR);

1276:   isurf      = 0;
1277:   nsnodeLoc  = 0;
1278:   nnfacetLoc = 0;
1279:   nb         = 0;
1280:   nte        = 0;
1281:   ICALLOC(3*nnfacet,&tmp);
1282:   ICALLOC(nsnode,&tmp1);
1283:   ICALLOC(nnodes,&tmp2);
1284:   FCALLOC(4*nsnode,&ftmp);
1285:   PetscMemzero(tmp,3*nnfacet*sizeof(int));
1286:   PetscMemzero(tmp1,nsnode*sizeof(int));
1287:   PetscMemzero(tmp2,nnodes*sizeof(int));

1289:   j = 0;
1290:   for (i = 0; i < nsnode; i++) {
1291:     node1 = a2l[grid->isnode[i] - 1];
1292:     if (node1 >= 0) {
1293:       tmp1[nsnodeLoc] = node1;
1294:       tmp2[node1]     = nsnodeLoc;
1295:       ftmp[j++]       = grid->sxn[i];
1296:       ftmp[j++]       = grid->syn[i];
1297:       ftmp[j++]       = grid->szn[i];
1298:       ftmp[j++]       = grid->sa[i];
1299:       nsnodeLoc++;
1300:     }
1301:   }
1302:   for (i = 0; i < nnbound; i++) {
1303:     for (j = isurf; j < isurf + grid->nntet[i]; j++) {
1304:       node1 = a2l[grid->isnode[grid->f2ntn[j] - 1] - 1];
1305:       node2 = a2l[grid->isnode[grid->f2ntn[nnfacet + j] - 1] - 1];
1306:       node3 = a2l[grid->isnode[grid->f2ntn[2*nnfacet + j] - 1] - 1];

1308:       if ((node1 >= 0) && (node2 >= 0) && (node3 >= 0)) {
1309:         nnfacetLoc++;
1310:         nte++;
1311:         tmp[nb++] = tmp2[node1];
1312:         tmp[nb++] = tmp2[node2];
1313:         tmp[nb++] = tmp2[node3];
1314:       }
1315:     }
1316:     isurf += grid->nntet[i];
1317:     /*printf("grid->nntet[%d] before reordering is %d\n",i,grid->nntet[i]);*/
1318:     grid->nntet[i] = nte;
1319:     /*printf("grid->nntet[%d] after reordering is %d\n",i,grid->nntet[i]);*/
1320:     nte = 0;
1321:   }
1322:   PetscFree(grid->f2ntn);
1323:   PetscFree(grid->isnode);
1324:   PetscFree(grid->sxn);
1325:   PetscFree(grid->syn);
1326:   PetscFree(grid->szn);
1327:   PetscFree(grid->sa);
1328:   ICALLOC(4*nnfacetLoc,&grid->f2ntn);
1329:   ICALLOC(nsnodeLoc,&grid->isnode);
1330:   FCALLOC(nsnodeLoc,&grid->sxn);
1331:   FCALLOC(nsnodeLoc,&grid->syn);
1332:   FCALLOC(nsnodeLoc,&grid->szn);
1333:   FCALLOC(nsnodeLoc,&grid->sa);
1334:   j = 0;
1335:   for (i = 0; i < nsnodeLoc; i++) {
1336:     grid->isnode[i] = tmp1[i] + 1;
1337:     grid->sxn[i]    = ftmp[j++];
1338:     grid->syn[i]    = ftmp[j++];
1339:     grid->szn[i]    = ftmp[j++];
1340:     grid->sa[i]     = ftmp[j++];
1341:   }
1342:   j = 0;
1343:   for (i = 0; i < nnfacetLoc; i++) {
1344:     grid->f2ntn[i]              = tmp[j++] + 1;
1345:     grid->f2ntn[nnfacetLoc+i]   = tmp[j++] + 1;
1346:     grid->f2ntn[2*nnfacetLoc+i] = tmp[j++] + 1;
1347:   }
1348:   PetscFree(tmp);
1349:   PetscFree(tmp1);
1350:   PetscFree(tmp2);
1351:   PetscFree(ftmp);

1353: /* Now identify the triangles on which the current processor
1354:    would perform force calculation */
1355:   ICALLOC(nnfacetLoc,&grid->sface_bit);
1356:   PetscMemzero(grid->sface_bit,nnfacetLoc*sizeof(int));
1357:   for (i = 0; i < nnfacetLoc; i++) {
1358:     node1 = l2a[grid->isnode[grid->f2ntn[i] - 1] - 1];
1359:     node2 = l2a[grid->isnode[grid->f2ntn[nnfacetLoc + i] - 1] - 1];
1360:     node3 = l2a[grid->isnode[grid->f2ntn[2*nnfacetLoc + i] - 1] - 1];
1361:     if (((v2p[node1] >= rank) && (v2p[node2] >= rank)
1362:          && (v2p[node3] >= rank)) &&
1363:         ((v2p[node1] == rank) || (v2p[node2] == rank)
1364:          || (v2p[node3] == rank)))
1365:       grid->sface_bit[i] = 1;
1366:   }
1367:   /*printf("On processor %d total solid triangles = %d,locally owned = %d alpha = %d\n",rank,totTr,myTr,alpha);*/
1368:   PetscPrintf(comm,"Solid boundaries partitioned\n");

1370: /* Now,handle all the viscous boundaries - things to be done :
1371:  * 1. Identify the nodes belonging to the viscous
1372:  *    boundaries and count them.
1373:  * 2. Put proper indices into f2ntv array,after making it
1374:  *    of suitable size
1375:  * 3. Remap the normals and areas of viscous faces (vxn,vyn,vzn,
1376:  *    and va arrays).
1377:  */
1378:   ICALLOC(nvbound,  &grid->nvtet);
1379:   ICALLOC(nvbound,  &grid->nvpts);
1380:   ICALLOC(4*nvfacet,&grid->f2ntv);
1381:   ICALLOC(nvnode,&grid->ivnode);
1382:   FCALLOC(nvnode,&grid->vxn);
1383:   FCALLOC(nvnode,&grid->vyn);
1384:   FCALLOC(nvnode,&grid->vzn);
1385:   FCALLOC(nvnode,&grid->va);
1386:   PetscBinarySynchronizedRead(comm,fdes,grid->nvtet,nvbound,PETSC_INT);
1387:   PetscBinarySynchronizedRead(comm,fdes,grid->nvpts,nvbound,PETSC_INT);
1388:   PetscBinarySynchronizedRead(comm,fdes,grid->f2ntv,4*nvfacet,PETSC_INT);
1389:   PetscBinarySynchronizedRead(comm,fdes,grid->ivnode,nvnode,PETSC_INT);
1390:   PetscBinarySynchronizedRead(comm,fdes,grid->vxn,nvnode,PETSC_SCALAR);
1391:   PetscBinarySynchronizedRead(comm,fdes,grid->vyn,nvnode,PETSC_SCALAR);
1392:   PetscBinarySynchronizedRead(comm,fdes,grid->vzn,nvnode,PETSC_SCALAR);

1394:   isurf      = 0;
1395:   nvnodeLoc  = 0;
1396:   nvfacetLoc = 0;
1397:   nb         = 0;
1398:   nte        = 0;
1399:   ICALLOC(3*nvfacet,&tmp);
1400:   ICALLOC(nvnode,&tmp1);
1401:   ICALLOC(nnodes,&tmp2);
1402:   FCALLOC(4*nvnode,&ftmp);
1403:   PetscMemzero(tmp,3*nvfacet*sizeof(int));
1404:   PetscMemzero(tmp1,nvnode*sizeof(int));
1405:   PetscMemzero(tmp2,nnodes*sizeof(int));

1407:   j = 0;
1408:   for (i = 0; i < nvnode; i++) {
1409:     node1 = a2l[grid->ivnode[i] - 1];
1410:     if (node1 >= 0) {
1411:       tmp1[nvnodeLoc] = node1;
1412:       tmp2[node1]     = nvnodeLoc;
1413:       ftmp[j++]       = grid->vxn[i];
1414:       ftmp[j++]       = grid->vyn[i];
1415:       ftmp[j++]       = grid->vzn[i];
1416:       ftmp[j++]       = grid->va[i];
1417:       nvnodeLoc++;
1418:     }
1419:   }
1420:   for (i = 0; i < nvbound; i++) {
1421:     for (j = isurf; j < isurf + grid->nvtet[i]; j++) {
1422:       node1 = a2l[grid->ivnode[grid->f2ntv[j] - 1] - 1];
1423:       node2 = a2l[grid->ivnode[grid->f2ntv[nvfacet + j] - 1] - 1];
1424:       node3 = a2l[grid->ivnode[grid->f2ntv[2*nvfacet + j] - 1] - 1];
1425:       if ((node1 >= 0) && (node2 >= 0) && (node3 >= 0)) {
1426:         nvfacetLoc++;
1427:         nte++;
1428:         tmp[nb++] = tmp2[node1];
1429:         tmp[nb++] = tmp2[node2];
1430:         tmp[nb++] = tmp2[node3];
1431:       }
1432:     }
1433:     isurf         += grid->nvtet[i];
1434:     grid->nvtet[i] = nte;
1435:     nte            = 0;
1436:   }
1437:   PetscFree(grid->f2ntv);
1438:   PetscFree(grid->ivnode);
1439:   PetscFree(grid->vxn);
1440:   PetscFree(grid->vyn);
1441:   PetscFree(grid->vzn);
1442:   PetscFree(grid->va);
1443:   ICALLOC(4*nvfacetLoc,&grid->f2ntv);
1444:   ICALLOC(nvnodeLoc,&grid->ivnode);
1445:   FCALLOC(nvnodeLoc,&grid->vxn);
1446:   FCALLOC(nvnodeLoc,&grid->vyn);
1447:   FCALLOC(nvnodeLoc,&grid->vzn);
1448:   FCALLOC(nvnodeLoc,&grid->va);
1449:   j = 0;
1450:   for (i = 0; i < nvnodeLoc; i++) {
1451:     grid->ivnode[i] = tmp1[i] + 1;
1452:     grid->vxn[i]    = ftmp[j++];
1453:     grid->vyn[i]    = ftmp[j++];
1454:     grid->vzn[i]    = ftmp[j++];
1455:     grid->va[i]     = ftmp[j++];
1456:   }
1457:   j = 0;
1458:   for (i = 0; i < nvfacetLoc; i++) {
1459:     grid->f2ntv[i]              = tmp[j++] + 1;
1460:     grid->f2ntv[nvfacetLoc+i]   = tmp[j++] + 1;
1461:     grid->f2ntv[2*nvfacetLoc+i] = tmp[j++] + 1;
1462:   }
1463:   PetscFree(tmp);
1464:   PetscFree(tmp1);
1465:   PetscFree(tmp2);
1466:   PetscFree(ftmp);

1468: /* Now identify the triangles on which the current processor
1469:    would perform force calculation */
1470:   ICALLOC(nvfacetLoc,&grid->vface_bit);
1471:   PetscMemzero(grid->vface_bit,nvfacetLoc*sizeof(int));
1472:   for (i = 0; i < nvfacetLoc; i++) {
1473:     node1 = l2a[grid->ivnode[grid->f2ntv[i] - 1] - 1];
1474:     node2 = l2a[grid->ivnode[grid->f2ntv[nvfacetLoc + i] - 1] - 1];
1475:     node3 = l2a[grid->ivnode[grid->f2ntv[2*nvfacetLoc + i] - 1] - 1];
1476:     if (((v2p[node1] >= rank) && (v2p[node2] >= rank)
1477:          && (v2p[node3] >= rank)) &&
1478:         ((v2p[node1] == rank) || (v2p[node2] == rank)
1479:         || (v2p[node3] == rank))) {
1480:          grid->vface_bit[i] = 1;
1481:     }
1482:   }
1483:   PetscFree(v2p);
1484:   PetscPrintf(comm,"Viscous boundaries partitioned\n");

1486: /* Now,handle all the free boundaries - things to be done :
1487:  * 1. Identify the nodes belonging to the free
1488:  *    boundaries and count them.
1489:  * 2. Put proper indices into f2ntf array,after making it
1490:  *    of suitable size
1491:  * 3. Remap the normals and areas of free bound. faces (fxn,fyn,fzn,
1492:  *    and fa arrays).
1493:  */

1495:   ICALLOC(nfbound,  &grid->nftet);
1496:   ICALLOC(nfbound,  &grid->nfpts);
1497:   ICALLOC(4*nffacet,&grid->f2ntf);
1498:   ICALLOC(nfnode,&grid->ifnode);
1499:   FCALLOC(nfnode,&grid->fxn);
1500:   FCALLOC(nfnode,&grid->fyn);
1501:   FCALLOC(nfnode,&grid->fzn);
1502:   FCALLOC(nfnode,&grid->fa);
1503:   PetscBinarySynchronizedRead(comm,fdes,grid->nftet,nfbound,PETSC_INT);
1504:   PetscBinarySynchronizedRead(comm,fdes,grid->nfpts,nfbound,PETSC_INT);
1505:   PetscBinarySynchronizedRead(comm,fdes,grid->f2ntf,4*nffacet,PETSC_INT);
1506:   PetscBinarySynchronizedRead(comm,fdes,grid->ifnode,nfnode,PETSC_INT);
1507:   PetscBinarySynchronizedRead(comm,fdes,grid->fxn,nfnode,PETSC_SCALAR);
1508:   PetscBinarySynchronizedRead(comm,fdes,grid->fyn,nfnode,PETSC_SCALAR);
1509:   PetscBinarySynchronizedRead(comm,fdes,grid->fzn,nfnode,PETSC_SCALAR);

1511:   isurf      = 0;
1512:   nfnodeLoc  = 0;
1513:   nffacetLoc = 0;
1514:   nb         = 0;
1515:   nte        = 0;
1516:   ICALLOC(3*nffacet,&tmp);
1517:   ICALLOC(nfnode,&tmp1);
1518:   ICALLOC(nnodes,&tmp2);
1519:   FCALLOC(4*nfnode,&ftmp);
1520:   PetscMemzero(tmp,3*nffacet*sizeof(int));
1521:   PetscMemzero(tmp1,nfnode*sizeof(int));
1522:   PetscMemzero(tmp2,nnodes*sizeof(int));

1524:   j = 0;
1525:   for (i = 0; i < nfnode; i++) {
1526:     node1 = a2l[grid->ifnode[i] - 1];
1527:     if (node1 >= 0) {
1528:       tmp1[nfnodeLoc] = node1;
1529:       tmp2[node1]     = nfnodeLoc;
1530:       ftmp[j++]       = grid->fxn[i];
1531:       ftmp[j++]       = grid->fyn[i];
1532:       ftmp[j++]       = grid->fzn[i];
1533:       ftmp[j++]       = grid->fa[i];
1534:       nfnodeLoc++;
1535:     }
1536:   }
1537:   for (i = 0; i < nfbound; i++) {
1538:     for (j = isurf; j < isurf + grid->nftet[i]; j++) {
1539:       node1 = a2l[grid->ifnode[grid->f2ntf[j] - 1] - 1];
1540:       node2 = a2l[grid->ifnode[grid->f2ntf[nffacet + j] - 1] - 1];
1541:       node3 = a2l[grid->ifnode[grid->f2ntf[2*nffacet + j] - 1] - 1];
1542:       if ((node1 >= 0) && (node2 >= 0) && (node3 >= 0)) {
1543:         nffacetLoc++;
1544:         nte++;
1545:         tmp[nb++] = tmp2[node1];
1546:         tmp[nb++] = tmp2[node2];
1547:         tmp[nb++] = tmp2[node3];
1548:       }
1549:     }
1550:     isurf         += grid->nftet[i];
1551:     grid->nftet[i] = nte;
1552:     nte            = 0;
1553:   }
1554:   PetscFree(grid->f2ntf);
1555:   PetscFree(grid->ifnode);
1556:   PetscFree(grid->fxn);
1557:   PetscFree(grid->fyn);
1558:   PetscFree(grid->fzn);
1559:   PetscFree(grid->fa);
1560:   ICALLOC(4*nffacetLoc,&grid->f2ntf);
1561:   ICALLOC(nfnodeLoc,&grid->ifnode);
1562:   FCALLOC(nfnodeLoc,&grid->fxn);
1563:   FCALLOC(nfnodeLoc,&grid->fyn);
1564:   FCALLOC(nfnodeLoc,&grid->fzn);
1565:   FCALLOC(nfnodeLoc,&grid->fa);
1566:   j = 0;
1567:   for (i = 0; i < nfnodeLoc; i++) {
1568:     grid->ifnode[i] = tmp1[i] + 1;
1569:     grid->fxn[i]    = ftmp[j++];
1570:     grid->fyn[i]    = ftmp[j++];
1571:     grid->fzn[i]    = ftmp[j++];
1572:     grid->fa[i]     = ftmp[j++];
1573:   }
1574:   j = 0;
1575:   for (i = 0; i < nffacetLoc; i++) {
1576:     grid->f2ntf[i]              = tmp[j++] + 1;
1577:     grid->f2ntf[nffacetLoc+i]   = tmp[j++] + 1;
1578:     grid->f2ntf[2*nffacetLoc+i] = tmp[j++] + 1;
1579:   }

1581:   PetscFree(tmp);
1582:   PetscFree(tmp1);
1583:   PetscFree(tmp2);
1584:   PetscFree(ftmp);
1585:   PetscPrintf(comm,"Free boundaries partitioned\n");

1587:   flg  = PETSC_FALSE;
1588:   PetscOptionsGetBool(0,"-mem_use",&flg,NULL);
1589:   if (flg) {
1590:     PetscMemoryView(PETSC_VIEWER_STDOUT_WORLD,"Memory usage after partitioning\n");
1591:   }

1593:   /* Put different mappings and other info into grid */
1594:   /* ICALLOC(nvertices,&grid->loc2pet);
1595:    ICALLOC(nvertices,&grid->loc2glo);
1596:    PetscMemcpy(grid->loc2pet,l2p,nvertices*sizeof(int));
1597:    PetscMemcpy(grid->loc2glo,l2a,nvertices*sizeof(int));
1598:    PetscFree(l2a);
1599:    PetscFree(l2p);*/

1601:   grid->nnodesLoc  = nnodesLoc;
1602:   grid->nedgeLoc   = nedgeLoc;
1603:   grid->nvertices  = nvertices;
1604:   grid->nsnodeLoc  = nsnodeLoc;
1605:   grid->nvnodeLoc  = nvnodeLoc;
1606:   grid->nfnodeLoc  = nfnodeLoc;
1607:   grid->nnfacetLoc = nnfacetLoc;
1608:   grid->nvfacetLoc = nvfacetLoc;
1609:   grid->nffacetLoc = nffacetLoc;
1610: /*
1611:  * FCALLOC(nvertices*4, &grid->gradx);
1612:  * FCALLOC(nvertices*4, &grid->grady);
1613:  * FCALLOC(nvertices*4, &grid->gradz);
1614:  */
1615:   FCALLOC(nvertices,   &grid->cdt);
1616:   FCALLOC(nvertices*4, &grid->phi);
1617: /*
1618:    FCALLOC(nvertices,   &grid->r11);
1619:    FCALLOC(nvertices,   &grid->r12);
1620:    FCALLOC(nvertices,   &grid->r13);
1621:    FCALLOC(nvertices,   &grid->r22);
1622:    FCALLOC(nvertices,   &grid->r23);
1623:    FCALLOC(nvertices,   &grid->r33);
1624: */
1625:   FCALLOC(7*nnodesLoc,   &grid->rxy);

1627: /* Map the 'ja' array in petsc ordering */
1628:   for (i = 0; i < nnz; i++) grid->ja[i] = l2a[grid->ja[i] - 1];
1629:   AOApplicationToPetsc(ao,nnz,grid->ja);
1630:   AODestroy(&ao);

1632: /* Print the different mappings
1633:  *
1634:  */
1635:   {
1636:     int partLoc[7],partMax[7],partMin[7],partSum[7];
1637:     partLoc[0] = nnodesLoc;
1638:     partLoc[1] = nvertices;
1639:     partLoc[2] = nedgeLoc;
1640:     partLoc[3] = nnfacetLoc;
1641:     partLoc[4] = nffacetLoc;
1642:     partLoc[5] = nsnodeLoc;
1643:     partLoc[6] = nfnodeLoc;
1644:     for (i = 0; i < 7; i++) {
1645:       partMin[i] = 0;
1646:       partMax[i] = 0;
1647:       partSum[i] = 0;
1648:     }

1650:     MPI_Allreduce(partLoc,partMax,7,MPI_INT,MPI_MAX,comm);
1651:     MPI_Allreduce(partLoc,partMin,7,MPI_INT,MPI_MIN,comm);
1652:     MPI_Allreduce(partLoc,partSum,7,MPI_INT,MPI_SUM,comm);
1653:     PetscPrintf(comm,"==============================\n");
1654:     PetscPrintf(comm,"Partitioning quality info ....\n");
1655:     PetscPrintf(comm,"==============================\n");
1656:     PetscPrintf(comm,"------------------------------------------------------------\n");
1657:     PetscPrintf(comm,"Item                    Min        Max    Average      Total\n");
1658:     PetscPrintf(comm,"------------------------------------------------------------\n");
1659:     PetscPrintf(comm,"Local Nodes       %9d  %9d  %9d  %9d\n",
1660:                        partMin[0],partMax[0],partSum[0]/size,partSum[0]);
1661:     PetscPrintf(comm,"Local+Ghost Nodes %9d  %9d  %9d  %9d\n",
1662:                        partMin[1],partMax[1],partSum[1]/size,partSum[1]);
1663:     PetscPrintf(comm,"Local Edges       %9d  %9d  %9d  %9d\n",
1664:                        partMin[2],partMax[2],partSum[2]/size,partSum[2]);
1665:     PetscPrintf(comm,"Local solid faces %9d  %9d  %9d  %9d\n",
1666:                        partMin[3],partMax[3],partSum[3]/size,partSum[3]);
1667:     PetscPrintf(comm,"Local free faces  %9d  %9d  %9d  %9d\n",
1668:                        partMin[4],partMax[4],partSum[4]/size,partSum[4]);
1669:     PetscPrintf(comm,"Local solid nodes %9d  %9d  %9d  %9d\n",
1670:                        partMin[5],partMax[5],partSum[5]/size,partSum[5]);
1671:     PetscPrintf(comm,"Local free nodes  %9d  %9d  %9d  %9d\n",
1672:                        partMin[6],partMax[6],partSum[6]/size,partSum[6]);
1673:     PetscPrintf(comm,"------------------------------------------------------------\n");
1674:   }
1675:   flg  = PETSC_FALSE;
1676:   PetscOptionsGetBool(0,"-partition_info",&flg,NULL);
1677:   if (flg) {
1678:     char part_file[PETSC_MAX_PATH_LEN];
1679:     sprintf(part_file,"output.%d",rank);
1680:     fptr1 = fopen(part_file,"w");

1682:     fprintf(fptr1,"---------------------------------------------\n");
1683:     fprintf(fptr1,"Local and Global Grid Parameters are :\n");
1684:     fprintf(fptr1,"---------------------------------------------\n");
1685:     fprintf(fptr1,"Local\t\t\t\tGlobal\n");
1686:     fprintf(fptr1,"nnodesLoc = %d\t\tnnodes = %d\n",nnodesLoc,nnodes);
1687:     fprintf(fptr1,"nedgeLoc = %d\t\t\tnedge = %d\n",nedgeLoc,nedge);
1688:     fprintf(fptr1,"nnfacetLoc = %d\t\tnnfacet = %d\n",nnfacetLoc,nnfacet);
1689:     fprintf(fptr1,"nvfacetLoc = %d\t\t\tnvfacet = %d\n",nvfacetLoc,nvfacet);
1690:     fprintf(fptr1,"nffacetLoc = %d\t\t\tnffacet = %d\n",nffacetLoc,nffacet);
1691:     fprintf(fptr1,"nsnodeLoc = %d\t\t\tnsnode = %d\n",nsnodeLoc,nsnode);
1692:     fprintf(fptr1,"nvnodeLoc = %d\t\t\tnvnode = %d\n",nvnodeLoc,nvnode);
1693:     fprintf(fptr1,"nfnodeLoc = %d\t\t\tnfnode = %d\n",nfnodeLoc,nfnode);
1694:     fprintf(fptr1,"\n");
1695:     fprintf(fptr1,"nvertices = %d\n",nvertices);
1696:     fprintf(fptr1,"nnbound = %d\n",nnbound);
1697:     fprintf(fptr1,"nvbound = %d\n",nvbound);
1698:     fprintf(fptr1,"nfbound = %d\n",nfbound);
1699:     fprintf(fptr1,"\n");

1701:     fprintf(fptr1,"---------------------------------------------\n");
1702:     fprintf(fptr1,"Different Orderings\n");
1703:     fprintf(fptr1,"---------------------------------------------\n");
1704:     fprintf(fptr1,"Local\t\tPETSc\t\tGlobal\n");
1705:     fprintf(fptr1,"---------------------------------------------\n");
1706:     for (i = 0; i < nvertices; i++) fprintf(fptr1,"%d\t\t%d\t\t%d\n",i,grid->loc2pet[i],grid->loc2glo[i]);
1707:     fprintf(fptr1,"\n");

1709:     fprintf(fptr1,"---------------------------------------------\n");
1710:     fprintf(fptr1,"Solid Boundary Nodes\n");
1711:     fprintf(fptr1,"---------------------------------------------\n");
1712:     fprintf(fptr1,"Local\t\tPETSc\t\tGlobal\n");
1713:     fprintf(fptr1,"---------------------------------------------\n");
1714:     for (i = 0; i < nsnodeLoc; i++) {
1715:       j = grid->isnode[i]-1;
1716:       fprintf(fptr1,"%d\t\t%d\t\t%d\n",j,grid->loc2pet[j],grid->loc2glo[j]);
1717:     }
1718:     fprintf(fptr1,"\n");
1719:     fprintf(fptr1,"---------------------------------------------\n");
1720:     fprintf(fptr1,"f2ntn array\n");
1721:     fprintf(fptr1,"---------------------------------------------\n");
1722:     for (i = 0; i < nnfacetLoc; i++) {
1723:       fprintf(fptr1,"%d\t\t%d\t\t%d\t\t%d\n",i,grid->f2ntn[i],
1724:               grid->f2ntn[nnfacetLoc+i],grid->f2ntn[2*nnfacetLoc+i]);
1725:     }
1726:     fprintf(fptr1,"\n");

1728:     fprintf(fptr1,"---------------------------------------------\n");
1729:     fprintf(fptr1,"Viscous Boundary Nodes\n");
1730:     fprintf(fptr1,"---------------------------------------------\n");
1731:     fprintf(fptr1,"Local\t\tPETSc\t\tGlobal\n");
1732:     fprintf(fptr1,"---------------------------------------------\n");
1733:     for (i = 0; i < nvnodeLoc; i++) {
1734:       j = grid->ivnode[i]-1;
1735:       fprintf(fptr1,"%d\t\t%d\t\t%d\n",j,grid->loc2pet[j],grid->loc2glo[j]);
1736:     }
1737:     fprintf(fptr1,"\n");
1738:     fprintf(fptr1,"---------------------------------------------\n");
1739:     fprintf(fptr1,"f2ntv array\n");
1740:     fprintf(fptr1,"---------------------------------------------\n");
1741:     for (i = 0; i < nvfacetLoc; i++) {
1742:       fprintf(fptr1,"%d\t\t%d\t\t%d\t\t%d\n",i,grid->f2ntv[i],
1743:               grid->f2ntv[nvfacetLoc+i],grid->f2ntv[2*nvfacetLoc+i]);
1744:     }
1745:     fprintf(fptr1,"\n");

1747:     fprintf(fptr1,"---------------------------------------------\n");
1748:     fprintf(fptr1,"Free Boundary Nodes\n");
1749:     fprintf(fptr1,"---------------------------------------------\n");
1750:     fprintf(fptr1,"Local\t\tPETSc\t\tGlobal\n");
1751:     fprintf(fptr1,"---------------------------------------------\n");
1752:     for (i = 0; i < nfnodeLoc; i++) {
1753:       j = grid->ifnode[i]-1;
1754:       fprintf(fptr1,"%d\t\t%d\t\t%d\n",j,grid->loc2pet[j],grid->loc2glo[j]);
1755:     }
1756:     fprintf(fptr1,"\n");
1757:     fprintf(fptr1,"---------------------------------------------\n");
1758:     fprintf(fptr1,"f2ntf array\n");
1759:     fprintf(fptr1,"---------------------------------------------\n");
1760:     for (i = 0; i < nffacetLoc; i++) {
1761:       fprintf(fptr1,"%d\t\t%d\t\t%d\t\t%d\n",i,grid->f2ntf[i],
1762:               grid->f2ntf[nffacetLoc+i],grid->f2ntf[2*nffacetLoc+i]);
1763:     }
1764:     fprintf(fptr1,"\n");

1766:     fprintf(fptr1,"---------------------------------------------\n");
1767:     fprintf(fptr1,"Neighborhood Info In Various Ordering\n");
1768:     fprintf(fptr1,"---------------------------------------------\n");
1769:     ICALLOC(nnodes,&p2l);
1770:     for (i = 0; i < nvertices; i++) p2l[grid->loc2pet[i]] = i;
1771:     for (i = 0; i < nnodesLoc; i++) {
1772:       jstart = grid->ia[grid->loc2glo[i]] - 1;
1773:       jend   = grid->ia[grid->loc2glo[i]+1] - 1;
1774:       fprintf(fptr1,"Neighbors of Node %d in Local Ordering are :",i);
1775:       for (j = jstart; j < jend; j++) fprintf(fptr1,"%d ",p2l[grid->ja[j]]);
1776:       fprintf(fptr1,"\n");

1778:       fprintf(fptr1,"Neighbors of Node %d in PETSc ordering are :",grid->loc2pet[i]);
1779:       for (j = jstart; j < jend; j++) fprintf(fptr1,"%d ",grid->ja[j]);
1780:       fprintf(fptr1,"\n");

1782:       fprintf(fptr1,"Neighbors of Node %d in Global Ordering are :",grid->loc2glo[i]);
1783:       for (j = jstart; j < jend; j++) fprintf(fptr1,"%d ",grid->loc2glo[p2l[grid->ja[j]]]);
1784:       fprintf(fptr1,"\n");

1786:     }
1787:     fprintf(fptr1,"\n");
1788:     PetscFree(p2l);
1789:     fclose(fptr1);
1790:   }

1792: /* Free the temporary arrays */
1793:   PetscFree(a2l);
1794:   MPI_Barrier(comm);
1795:   return 0;
1796: }

1798: /*
1799:   encode 3 8-bit binary bytes as 4 '6-bit' characters, len is the number of bytes remaining, at most 3 are used
1800: */
1801: void *base64_encodeblock(void *vout,const void *vin,int len)
1802: {
1803:   unsigned char *out = (unsigned char*)vout,in[3] = {0,0,0};
1804:   /* Translation Table as described in RFC1113 */
1805:   static const char cb64[]="ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
1806:   memcpy(in,vin,PetscMin(len,3));
1807:   out[0] = cb64[in[0] >> 2];
1808:   out[1] = cb64[((in[0] & 0x03) << 4) | ((in[1] & 0xf0) >> 4)];
1809:   out[2] = (unsigned char) (len > 1 ? cb64[((in[1] & 0x0f) << 2) | ((in[2] & 0xc0) >> 6)] : '=');
1810:   out[3] = (unsigned char) (len > 2 ? cb64[in[2] & 0x3f] : '=');
1811:   return (void*)(out+4);
1812: }

1814: /* Write binary data, does not do byte swapping. */
1815: static PetscErrorCode PetscFWrite_FUN3D(MPI_Comm comm,FILE *fp,void *data,PetscInt n,PetscDataType dtype,PetscBool base64)
1816: {
1817:   PetscMPIInt    rank;

1820:   if (!n) return 0;
1821:   MPI_Comm_rank(comm,&rank);
1822:   if (rank == 0) {
1823:     size_t count;
1824:     int    bytes;
1825:     switch (dtype) {
1826:     case PETSC_DOUBLE:
1827:       size = sizeof(double);
1828:       break;
1829:     case PETSC_FLOAT:
1830:       size = sizeof(float);
1831:       break;
1832:     case PETSC_INT:
1833:       size = sizeof(PetscInt);
1834:       break;
1835:     case PETSC_CHAR:
1836:       size = sizeof(char);
1837:       break;
1838:     default: SETERRQ(comm,PETSC_ERR_SUP,"Data type not supported");
1839:     }
1840:     bytes = size*n;
1841:     if (base64) {
1842:       unsigned char *buf,*ptr;
1843:       int           i;
1844:       size_t        b64alloc = 9 + (n*size*4) / 3 + (n*size*4) % 3;
1845:       PetscMalloc(b64alloc,&buf);
1846:       ptr  = buf;
1847:       ptr  = (unsigned char*)base64_encodeblock(ptr,&bytes,3);
1848:       ptr  = (unsigned char*)base64_encodeblock(ptr,((char*)&bytes)+3,1);
1849:       for (i=0; i<bytes; i+=3) {
1850:         int left = bytes - i;
1851:         ptr = (unsigned char*)base64_encodeblock(ptr,((char*)data)+i,left);
1852:       }
1853:       *ptr++ = '\n';
1854:       /* printf("encoded 4+%d raw bytes in %zd base64 chars, allocated for %zd\n",bytes,ptr-buf,b64alloc); */
1855:       count = fwrite(buf,1,ptr-buf,fp);
1856:       if (count < (size_t)(ptr-buf)) {
1857:         perror("");
1858:         SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_WRITE,"Wrote %" PetscInt_FMT " of %" PetscInt_FMT " bytes",(PetscInt)count,(PetscInt)(ptr-buf));
1859:       }
1860:       PetscFree(buf);
1861:     } else {
1862:       count = fwrite(&bytes,sizeof(int),1,fp);
1863:       if (count != 1) {
1864:         perror("");
1865:         SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_WRITE,"Error writing byte count");
1866:       }
1867:       count = fwrite(data,size,(size_t)n,fp);
1868:       if ((int)count != n) {
1869:         perror("");
1870:         SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_WRITE,"Wrote %" PetscInt_FMT "/%" PetscInt_FMT " array members of size %" PetscInt_FMT,(PetscInt)count,n,(PetscInt)size);
1871:       }
1872:     }
1873:   }
1874:   return 0;
1875: }

1877: static void SortInt2(PetscInt *a,PetscInt *b)
1878: {
1879:   if (*b < *a) {
1880:     PetscInt c = *b;
1881:     *b = *a;
1882:     *a = c;
1883:   }
1884: }

1886: /* b = intersection(a,b) */
1887: static PetscErrorCode IntersectInt(PetscInt na,const PetscInt *a,PetscInt *nb,PetscInt *b)
1888: {
1889:   PetscInt i,n,j;

1891:   j = 0;
1892:   n = 0;
1893:   for (i=0; i<*nb; i++) {
1894:     while (j<na && a[j]<b[i]) j++;
1895:     if (j<na && a[j]==b[i]) {
1896:       b[n++] = b[i];
1897:       j++;
1898:     }
1899:   }
1900:   *nb = n;
1901:   return 0;
1902: }

1904: /*
1905:   This function currently has a semantic bug: it only produces cells containing all local edges.  Since the local mesh
1906:   does not even store edges between unowned nodes, primal cells that are effectively shared between processes will not
1907:   be constructed. This causes visualization artifacts.

1909:   This issue could be resolved by either (a) storing more edges from the original mesh or (b) communicating an extra
1910:   layer of edges in this function.
1911: */
1912: static PetscErrorCode InferLocalCellConnectivity(PetscInt nnodes,PetscInt nedge,const PetscInt *eptr,PetscInt *incell,PetscInt **iconn)
1913: {
1914:   PetscInt       ncell,acell,(*conn)[4],node0,node1,node2,node3,i,j,k,l,rowmax;
1915:   PetscInt       *ui,*uj,*utmp,*tmp1,*tmp2,*tmp3,ntmp1,ntmp2,ntmp3;
1916: #if defined(INTERLACING)
1917: #  define GetEdge(eptr,i,n1,n2) do { n1 = eptr[i*2+0]-1; n2 = eptr[i*2+1]-1; } while (0)
1918: #else
1919: #  define GetEdge(eptr,i,n1,n2) do { n1 = eptr[i+0*nedge]-1; n2 = eptr[i+1*nedge]-1; } while (0)
1920: #endif

1922:   *incell = -1;
1923:   *iconn  = NULL;
1924:   acell   = 100000;              /* allocate for this many cells */
1925:   PetscMalloc1(acell,&conn);
1926:   PetscMalloc2(nnodes+1,&ui,nedge,&uj);
1927:   PetscCalloc1(nnodes,&utmp);
1928:   /* count the number of edges in the upper-triangular matrix u */
1929:   for (i=0; i<nedge; i++) {     /* count number of nonzeros in upper triangular matrix */
1930:     GetEdge(eptr,i,node0,node1);
1931:     utmp[PetscMin(node0,node1)]++;
1932:   }
1933:   rowmax = 0;
1934:   ui[0]  = 0;
1935:   for (i=0; i<nnodes; i++) {
1936:     rowmax  = PetscMax(rowmax,utmp[i]);
1937:     ui[i+1] = ui[i] + utmp[i]; /* convert from count to row offsets */
1938:     utmp[i] = 0;
1939:   }
1940:   for (i=0; i<nedge; i++) {     /* assemble upper triangular matrix U */
1941:     GetEdge(eptr,i,node0,node1);
1942:     SortInt2(&node0,&node1);
1943:     uj[ui[node0] + utmp[node0]++] = node1;
1944:   }
1945:   PetscFree(utmp);
1946:   for (i=0; i<nnodes; i++) {    /* sort every row */
1947:     PetscInt n = ui[i+1] - ui[i];
1948:     PetscSortInt(n,&uj[ui[i]]);
1949:   }

1951:   /* Infer cells */
1952:   ncell = 0;
1953:   PetscMalloc3(rowmax,&tmp1,rowmax,&tmp2,rowmax,&tmp3);
1954:   for (i=0; i<nnodes; i++) {
1955:     node0 = i;
1956:     ntmp1 = ui[node0+1] - ui[node0]; /* Number of candidates for node1 */
1957:     PetscMemcpy(tmp1,&uj[ui[node0]],ntmp1*sizeof(PetscInt));
1958:     for (j=0; j<ntmp1; j++) {
1959:       node1 = tmp1[j];
1962:       ntmp2 = ui[node1+1] - ui[node1];
1963:       PetscMemcpy(tmp2,&uj[ui[node1]],ntmp2*sizeof(PetscInt));
1964:       IntersectInt(ntmp1,tmp1,&ntmp2,tmp2);
1965:       for (k=0; k<ntmp2; k++) {
1966:         node2 = tmp2[k];
1969:         ntmp3 = ui[node2+1] - ui[node2];
1970:         PetscMemcpy(tmp3,&uj[ui[node2]],ntmp3*sizeof(PetscInt));
1971:         IntersectInt(ntmp2,tmp2,&ntmp3,tmp3);
1972:         for (l=0; l<ntmp3; l++) {
1973:           node3 = tmp3[l];
1977:           if (ntmp3 < 3) continue;
1978:           conn[ncell][0] = node0;
1979:           conn[ncell][1] = node1;
1980:           conn[ncell][2] = node2;
1981:           conn[ncell][3] = node3;
1982:           if (0) {
1983:             PetscViewer viewer = PETSC_VIEWER_STDOUT_WORLD;
1984:             PetscViewerASCIIPrintf(viewer,"created cell %d: %d %d %d %d\n",ncell,node0,node1,node2,node3);
1985:             PetscIntView(ntmp1,tmp1,viewer);
1986:             PetscIntView(ntmp2,tmp2,viewer);
1987:             PetscIntView(ntmp3,tmp3,viewer);
1988:             /* uns3d.msh has a handful of "tetrahedra" that overlap by violating the following condition. As far as I
1989:              * can tell, that means it is an invalid mesh. I don't know what the intent was. */
1991:           }
1992:           ncell++;
1993:         }
1994:       }
1995:     }
1996:   }
1997:   PetscFree3(tmp1,tmp2,tmp3);
1998:   PetscFree2(ui,uj);

2000:   PetscSynchronizedPrintf(PETSC_COMM_WORLD,"Inferred %" PetscInt_FMT " cells with nnodes=%" PetscInt_FMT " nedge=%" PetscInt_FMT "\n",ncell,nnodes,nedge);
2001:   PetscSynchronizedFlush(PETSC_COMM_WORLD,PETSC_STDOUT);
2002:   *incell = ncell;
2003:   *iconn  = (PetscInt*)conn;
2004:   return 0;
2005: }

2007: static PetscErrorCode GridCompleteOverlap(GRID *grid,PetscInt *invertices,PetscInt *inedgeOv,PetscInt **ieptrOv)
2008: {
2009:   PetscInt       nedgeLoc,nedgeOv,i,j,cnt,node0,node1,node0p,node1p,nnodes,nnodesLoc,nvertices,rstart,nodeEdgeCountAll,nodeEdgeRstart;
2010:   PetscInt       *nodeEdgeCount,*nodeEdgeOffset,*eIdxOv,*p2l,*eptrOv;
2011:   Vec            VNodeEdge,VNodeEdgeInfo,VNodeEdgeInfoOv,VNodeEdgeOv;
2012:   PetscScalar    *vne,*vnei;
2013:   IS             isglobal,isedgeOv;
2014:   VecScatter     nescat,neiscat;
2015:   PetscBool      flg;

2017:   nnodes    = grid->nnodes;     /* Total number of global nodes */
2018:   nnodesLoc = grid->nnodesLoc;  /* Number of owned nodes */
2019:   nvertices = grid->nvertices;  /* Number of owned+ghosted nodes */
2020:   nedgeLoc  = grid->nedgeLoc;   /* Number of edges connected to owned nodes */

2022:   /* Count the number of neighbors of each owned node */
2023:   MPI_Scan(&nnodesLoc,&rstart,1,MPIU_INT,MPI_SUM,PETSC_COMM_WORLD);
2024:   rstart -= nnodesLoc;
2025:   PetscMalloc2(nnodesLoc,&nodeEdgeCount,nnodesLoc,&nodeEdgeOffset);
2026:   PetscMemzero(nodeEdgeCount,nnodesLoc*sizeof(*nodeEdgeCount));
2027:   for (i=0; i<nedgeLoc; i++) {
2028:     GetEdge(grid->eptr,i,node0,node1);
2029:     node0p = grid->loc2pet[node0];
2030:     node1p = grid->loc2pet[node1];
2031:     if (rstart <= node0p && node0p < rstart+nnodesLoc) nodeEdgeCount[node0p-rstart]++;
2032:     if (rstart <= node1p && node1p < rstart+nnodesLoc) nodeEdgeCount[node1p-rstart]++;
2033:   }
2034:   /* Get the offset in the node-based edge array */
2035:   nodeEdgeOffset[0] = 0;
2036:   for (i=0; i<nnodesLoc-1; i++) nodeEdgeOffset[i+1] = nodeEdgeOffset[i] + nodeEdgeCount[i];
2037:   nodeEdgeCountAll = nodeEdgeCount[nnodesLoc-1] + nodeEdgeOffset[nnodesLoc-1];

2039:   /* Pack a Vec by node of all the edges for that node. The nodes are stored by global index */
2040:   VecCreateMPI(PETSC_COMM_WORLD,nodeEdgeCountAll,PETSC_DETERMINE,&VNodeEdge);
2041:   PetscMemzero(nodeEdgeCount,nnodesLoc*sizeof(*nodeEdgeCount));
2042:   VecGetArray(VNodeEdge,&vne);
2043:   for (i=0; i<nedgeLoc; i++) {
2044:     GetEdge(grid->eptr,i,node0,node1);
2045:     node0p = grid->loc2pet[node0];
2046:     node1p = grid->loc2pet[node1];
2047:     if (rstart <= node0p && node0p < rstart+nnodesLoc) vne[nodeEdgeOffset[node0p-rstart] + nodeEdgeCount[node0p-rstart]++] = node1p;
2048:     if (rstart <= node1p && node1p < rstart+nnodesLoc) vne[nodeEdgeOffset[node1p-rstart] + nodeEdgeCount[node1p-rstart]++] = node0p;
2049:   }
2050:   VecRestoreArray(VNodeEdge,&vne);
2051:   VecGetOwnershipRange(VNodeEdge,&nodeEdgeRstart,NULL);

2053:   /* Move the count and offset into a Vec so that we can use VecScatter, translating offset from local to global */
2054:   VecCreate(PETSC_COMM_WORLD,&VNodeEdgeInfo);
2055:   VecSetSizes(VNodeEdgeInfo,2*nnodesLoc,2*nnodes);
2056:   VecSetBlockSize(VNodeEdgeInfo,2);
2057:   VecSetType(VNodeEdgeInfo,VECMPI);

2059:   VecGetArray(VNodeEdgeInfo,&vnei);
2060:   for (i=0; i<nnodesLoc; i++) {
2061:     vnei[i*2+0] = nodeEdgeCount[i];                   /* Total number of edges from this vertex */
2062:     vnei[i*2+1] = nodeEdgeOffset[i] + nodeEdgeRstart; /* Now the global index in the next comm round */
2063:   }
2064:   VecRestoreArray(VNodeEdgeInfo,&vnei);
2065:   PetscFree2(nodeEdgeCount,nodeEdgeOffset);

2067:   /* Create a Vec to receive the edge count and global offset for each node in owned+ghosted, get them, and clean up */
2068:   VecCreate(PETSC_COMM_SELF,&VNodeEdgeInfoOv);
2069:   VecSetSizes(VNodeEdgeInfoOv,2*nvertices,2*nvertices);
2070:   VecSetBlockSize(VNodeEdgeInfoOv,2);
2071:   VecSetType(VNodeEdgeInfoOv,VECSEQ);

2073:   ISCreateBlock(PETSC_COMM_WORLD,2,nvertices,grid->loc2pet,PETSC_COPY_VALUES,&isglobal); /* Address the nodes in overlap to get info from */
2074:   VecScatterCreate(VNodeEdgeInfo,isglobal,VNodeEdgeInfoOv,NULL,&neiscat);
2075:   VecScatterBegin(neiscat,VNodeEdgeInfo,VNodeEdgeInfoOv,INSERT_VALUES,SCATTER_FORWARD);
2076:   VecScatterEnd(neiscat,VNodeEdgeInfo,VNodeEdgeInfoOv,INSERT_VALUES,SCATTER_FORWARD);
2077:   VecScatterDestroy(&neiscat);
2078:   VecDestroy(&VNodeEdgeInfo);
2079:   ISDestroy(&isglobal);

2081:   /* Create a Vec to receive the actual edges for all nodes (owned and ghosted), execute the scatter */
2082:   nedgeOv = 0;                  /* First count the number of edges in the complete overlap */
2083:   VecGetArray(VNodeEdgeInfoOv,&vnei);
2084:   for (i=0; i<nvertices; i++) nedgeOv += (PetscInt)vnei[2*i+0];
2085:   /* Allocate for the global indices in VNodeEdge of the edges to receive */
2086:   PetscMalloc1(nedgeOv,&eIdxOv);
2087:   for (i=0,cnt=0; i<nvertices; i++) {
2088:     for (j=0; j<(PetscInt)vnei[2*i+0]; j++) eIdxOv[cnt++] = (PetscInt)vnei[2*i+1] + j;
2089:   }
2090:   VecRestoreArray(VNodeEdgeInfoOv,&vnei);
2091:   ISCreateGeneral(PETSC_COMM_WORLD,nedgeOv,eIdxOv,PETSC_USE_POINTER,&isedgeOv);
2092:   VecCreateSeq(PETSC_COMM_SELF,nedgeOv,&VNodeEdgeOv);
2093:   VecScatterCreate(VNodeEdge,isedgeOv,VNodeEdgeOv,NULL,&nescat);
2094:   VecScatterBegin(nescat,VNodeEdge,VNodeEdgeOv,INSERT_VALUES,SCATTER_FORWARD);
2095:   VecScatterEnd(nescat,VNodeEdge,VNodeEdgeOv,INSERT_VALUES,SCATTER_FORWARD);
2096:   VecScatterDestroy(&nescat);
2097:   VecDestroy(&VNodeEdge);
2098:   ISDestroy(&isedgeOv);
2099:   PetscFree(eIdxOv);

2101:   PetscSynchronizedPrintf(PETSC_COMM_WORLD,"[%d] %s: number of edges before pruning: %" PetscInt_FMT ", half=%" PetscInt_FMT "\n",rank,PETSC_FUNCTION_NAME,nedgeOv,nedgeOv/2);

2103:   /* Create the non-scalable global-to-local index map. Yuck, but it has already been used elsewhere. */
2104:   PetscMalloc1(nnodes,&p2l);
2105:   for (i=0; i<nnodes; i++) p2l[i] = -1;
2106:   for (i=0; i<nvertices; i++) p2l[grid->loc2pet[i]] = i;
2107:   if (1) {
2108:     PetscInt m = 0;
2109:     for (i=0; i<nnodes; i++) m += (p2l[i] >= 0);
2110:     PetscSynchronizedPrintf(PETSC_COMM_WORLD,"[%d] %s: number of global indices that map to local indices: %" PetscInt_FMT "; nvertices=%" PetscInt_FMT " nnodesLoc=%" PetscInt_FMT " nnodes=%" PetscInt_FMT "\n",rank,PETSC_FUNCTION_NAME,m,nvertices,nnodesLoc,nnodes);
2111:   }

2113:   /* Log each edge connecting nodes in owned+ghosted exactly once */
2114:   VecGetArray(VNodeEdgeInfoOv,&vnei);
2115:   VecGetArray(VNodeEdgeOv,&vne);
2116:   /* First count the number of edges to keep */
2117:   nedgeOv = 0;
2118:   for (i=0,cnt=0; i<nvertices; i++) {
2119:     PetscInt n = (PetscInt)vnei[2*i+0]; /* number of nodes connected to i */
2120:     node0 = i;
2121:     for (j=0; j<n; j++) {
2122:       node1p = vne[cnt++];
2123:       node1  = p2l[node1p];
2124:       if (node0 < node1) nedgeOv++;
2125:     }
2126:   }
2127:   /* Array of edges to keep */
2128:   PetscMalloc1(2*nedgeOv,&eptrOv);
2129:   nedgeOv = 0;
2130:   for (i=0,cnt=0; i<nvertices; i++) {
2131:     PetscInt n = (PetscInt)vnei[2*i+0]; /* number of nodes connected to i */
2132:     node0 = i;
2133:     for (j=0; j<n; j++) {
2134:       node1p = vne[cnt++];
2135:       node1  = p2l[node1p];
2136:       if (node0 < node1) {
2137:         eptrOv[2*nedgeOv+0] = node0;
2138:         eptrOv[2*nedgeOv+1] = node1;
2139:         nedgeOv++;
2140:       }
2141:     }
2142:   }
2143:   VecRestoreArray(VNodeEdgeInfoOv,&vnei);
2144:   VecRestoreArray(VNodeEdgeOv,&vne);
2145:   VecDestroy(&VNodeEdgeInfoOv);
2146:   VecDestroy(&VNodeEdgeOv);
2147:   PetscFree(p2l);

2149:   PetscSynchronizedPrintf(PETSC_COMM_WORLD,"[%d] %s: nedgeLoc=%" PetscInt_FMT " nedgeOv=%" PetscInt_FMT "\n",rank,PETSC_FUNCTION_NAME,nedgeLoc,nedgeOv);
2150:   PetscSynchronizedFlush(PETSC_COMM_WORLD,PETSC_STDOUT);

2152:   flg  = PETSC_TRUE;
2153:   PetscOptionsGetBool(NULL,NULL,"-complete_overlap",&flg,NULL);
2154:   if (flg) {
2155:     *invertices = grid->nvertices; /* We did not change the number of vertices */
2156:     *inedgeOv   = nedgeOv;
2157:     *ieptrOv    = eptrOv;
2158:   } else {
2159:     *invertices = grid->nvertices;
2160:     *inedgeOv   = nedgeLoc;
2161:     PetscFree(eptrOv);
2162:     PetscMalloc1(2*nedgeLoc,&eptrOv);
2163:     PetscMemcpy(eptrOv,grid->eptr,2*nedgeLoc*sizeof(PetscInt));
2164:     *ieptrOv    = eptrOv;
2165:   }
2166:   return 0;
2167: }

2169: static PetscErrorCode WritePVTU(AppCtx *user,const char *fname,PetscBool base64)
2170: {
2171:   GRID              *grid  = user->grid;
2172:   TstepCtx          *tsCtx = user->tsCtx;
2173:   FILE              *vtu,*pvtu;
2174:   char              pvtu_fname[PETSC_MAX_PATH_LEN],vtu_fname[PETSC_MAX_PATH_LEN];
2175:   MPI_Comm          comm;
2176:   PetscMPIInt       rank,size;
2177:   PetscInt          i,nvertices = 0,nedgeLoc = 0,ncells,bs,nloc,boffset = 0,*eptr = NULL;
2178:   PetscErrorCode    ierr;
2179:   Vec               Xloc,Xploc,Xuloc;
2180:   unsigned char     *celltype;
2181:   int               *celloffset,*conn,*cellrank;
2182:   const PetscScalar *x;
2183:   PetscScalar       *xu,*xp;
2184:   const char        *byte_order = PetscBinaryBigEndian() ? "BigEndian" : "LittleEndian";

2186:   GridCompleteOverlap(user->grid,&nvertices,&nedgeLoc,&eptr);
2187:   comm = PETSC_COMM_WORLD;
2188:   MPI_Comm_rank(comm,&rank);
2189:   MPI_Comm_size(comm,&size);
2190: #if defined(PETSC_USE_COMPLEX) || !defined(PETSC_USE_REAL_DOUBLE) || defined(PETSC_USE_64BIT_INDICES)
2191:   SETERRQ(comm,PETSC_ERR_SUP,"This function is only implemented for scalar-type=real precision=double, 32-bit indices");
2192: #endif
2193:   PetscSNPrintf(pvtu_fname,sizeof(pvtu_fname),"%s-%" PetscInt_FMT ".pvtu",fname,tsCtx->itstep);
2194:   PetscSNPrintf(vtu_fname,sizeof(vtu_fname),"%s-%" PetscInt_FMT "-%" PetscInt_FMT ".vtu",fname,tsCtx->itstep,rank);
2195:   PetscFOpen(comm,pvtu_fname,"w",&pvtu);
2196:   PetscFPrintf(comm,pvtu,"<?xml version=\"1.0\"?>\n");
2197:   PetscFPrintf(comm,pvtu,"<VTKFile type=\"PUnstructuredGrid\" version=\"0.1\" byte_order=\"%s\">\n",byte_order);
2198:   PetscFPrintf(comm,pvtu," <PUnstructuredGrid GhostLevel=\"0\">\n");
2199:   PetscFPrintf(comm,pvtu,"  <PPointData Scalars=\"Pressure\" Vectors=\"Velocity\">\n");
2200:   PetscFPrintf(comm,pvtu,"   <PDataArray type=\"Float64\" Name=\"Pressure\" NumberOfComponents=\"1\" />\n");
2201:   PetscFPrintf(comm,pvtu,"   <PDataArray type=\"Float64\" Name=\"Velocity\" NumberOfComponents=\"3\" />\n");
2202:   PetscFPrintf(comm,pvtu,"  </PPointData>\n");
2203:   PetscFPrintf(comm,pvtu,"  <PCellData>\n");
2204:   PetscFPrintf(comm,pvtu,"   <PDataArray type=\"Int32\" Name=\"Rank\" NumberOfComponents=\"1\" />\n");
2205:   PetscFPrintf(comm,pvtu,"  </PCellData>\n");
2206:   PetscFPrintf(comm,pvtu,"  <PPoints>\n");
2207:   PetscFPrintf(comm,pvtu,"   <PDataArray type=\"Float64\" Name=\"Position\" NumberOfComponents=\"3\" />\n");
2208:   PetscFPrintf(comm,pvtu,"  </PPoints>\n");
2209:   PetscFPrintf(comm,pvtu,"  <PCells>\n");
2210:   PetscFPrintf(comm,pvtu,"   <PDataArray type=\"Int32\" Name=\"connectivity\" NumberOfComponents=\"1\" />\n");
2211:   PetscFPrintf(comm,pvtu,"   <PDataArray type=\"Int32\" Name=\"offsets\"      NumberOfComponents=\"1\" />\n");
2212:   PetscFPrintf(comm,pvtu,"   <PDataArray type=\"UInt8\" Name=\"types\"        NumberOfComponents=\"1\" />\n");
2213:   PetscFPrintf(comm,pvtu,"  </PCells>\n");
2214:   for (i=0; i<size; i++) {
2215:     PetscFPrintf(comm,pvtu,"  <Piece Source=\"%s-%" PetscInt_FMT "-%" PetscInt_FMT ".vtu\" />\n",fname,tsCtx->itstep,i);
2216:   }
2217:   PetscFPrintf(comm,pvtu," </PUnstructuredGrid>\n");
2218:   PetscFPrintf(comm,pvtu,"</VTKFile>\n");
2219:   PetscFClose(comm,pvtu);

2221:   Xloc = grid->qnodeLoc;
2222:   VecScatterBegin(grid->scatter,grid->qnode,Xloc,INSERT_VALUES,SCATTER_FORWARD);
2223:   VecScatterEnd(grid->scatter,grid->qnode,Xloc,INSERT_VALUES,SCATTER_FORWARD);
2224:   VecGetBlockSize(Xloc,&bs);
2226:   VecGetSize(Xloc,&nloc);
2229:   VecCreateSeq(PETSC_COMM_SELF,nvertices,&Xploc);

2231:   VecCreate(PETSC_COMM_SELF,&Xuloc);
2232:   VecSetSizes(Xuloc,3*nvertices,3*nvertices);
2233:   VecSetBlockSize(Xuloc,3);
2234:   VecSetType(Xuloc,VECSEQ);

2236:   VecGetArrayRead(Xloc,&x);
2237:   VecGetArray(Xploc,&xp);
2238:   VecGetArray(Xuloc,&xu);
2239:   for (i=0; i<nvertices; i++) {
2240:     xp[i]     = x[i*4+0];
2241:     xu[i*3+0] = x[i*4+1];
2242:     xu[i*3+1] = x[i*4+2];
2243:     xu[i*3+2] = x[i*4+3];
2244:   }
2245:   VecRestoreArrayRead(Xloc,&x);

2247:   InferLocalCellConnectivity(nvertices,nedgeLoc,eptr,&ncells,&conn);

2249:   PetscFOpen(PETSC_COMM_SELF,vtu_fname,"w",&vtu);
2250:   PetscFPrintf(PETSC_COMM_SELF,vtu,"<?xml version=\"1.0\"?>\n");
2251:   PetscFPrintf(PETSC_COMM_SELF,vtu,"<VTKFile type=\"UnstructuredGrid\" version=\"0.1\" byte_order=\"%s\">\n",byte_order);
2252:   PetscFPrintf(PETSC_COMM_SELF,vtu," <UnstructuredGrid>\n");
2253:   PetscFPrintf(PETSC_COMM_SELF,vtu,"  <Piece NumberOfPoints=\"%" PetscInt_FMT "\" NumberOfCells=\"%" PetscInt_FMT "\">\n",nvertices,ncells);

2255:   /* Solution fields */
2256:   PetscFPrintf(PETSC_COMM_SELF,vtu,"   <PointData Scalars=\"Pressure\" Vectors=\"Velocity\">\n");
2257:   PetscFPrintf(PETSC_COMM_SELF,vtu,"    <DataArray type=\"Float64\" Name=\"Pressure\" NumberOfComponents=\"1\" format=\"appended\" offset=\"%" PetscInt_FMT "\" />\n",boffset);
2258:   boffset += nvertices*sizeof(PetscScalar) + sizeof(int);
2259:   PetscFPrintf(PETSC_COMM_SELF,vtu,"    <DataArray type=\"Float64\" Name=\"Velocity\" NumberOfComponents=\"3\" format=\"appended\" offset=\"%" PetscInt_FMT "\" />\n",boffset);
2260:   boffset += nvertices*3*sizeof(PetscScalar) + sizeof(int);
2261:   PetscFPrintf(PETSC_COMM_SELF,vtu,"   </PointData>\n");
2262:   PetscFPrintf(PETSC_COMM_SELF,vtu,"   <CellData>\n");
2263:   PetscFPrintf(PETSC_COMM_SELF,vtu,"    <DataArray type=\"Int32\" Name=\"Rank\" NumberOfComponents=\"1\" format=\"appended\" offset=\"%" PetscInt_FMT "\" />\n",boffset);
2264:   boffset += ncells*sizeof(int) + sizeof(int);
2265:   PetscFPrintf(PETSC_COMM_SELF,vtu,"   </CellData>\n");
2266:   /* Coordinate positions */
2267:   PetscFPrintf(PETSC_COMM_SELF,vtu,"   <Points>\n");
2268:   PetscFPrintf(PETSC_COMM_SELF,vtu,"    <DataArray type=\"Float64\" Name=\"Position\" NumberOfComponents=\"3\" format=\"appended\" offset=\"%" PetscInt_FMT "\" />\n",boffset);
2269:   boffset += nvertices*3*sizeof(double) + sizeof(int);
2270:   PetscFPrintf(PETSC_COMM_SELF,vtu,"   </Points>\n");
2271:   /* Cell connectivity */
2272:   PetscFPrintf(PETSC_COMM_SELF,vtu,"   <Cells>\n");
2273:   PetscFPrintf(PETSC_COMM_SELF,vtu,"    <DataArray type=\"Int32\" Name=\"connectivity\" NumberOfComponents=\"1\" format=\"appended\" offset=\"%" PetscInt_FMT "\" />\n",boffset);
2274:   boffset += ncells*4*sizeof(int) + sizeof(int);
2275:   PetscFPrintf(PETSC_COMM_SELF,vtu,"    <DataArray type=\"Int32\" Name=\"offsets\"      NumberOfComponents=\"1\" format=\"appended\" offset=\"%" PetscInt_FMT "\" />\n",boffset);
2276:   boffset += ncells*sizeof(int) + sizeof(int);
2277:   PetscFPrintf(PETSC_COMM_SELF,vtu,"    <DataArray type=\"UInt8\" Name=\"types\"        NumberOfComponents=\"1\" format=\"appended\" offset=\"%" PetscInt_FMT "\" />\n",boffset);
2278:   boffset += ncells*sizeof(unsigned char) + sizeof(int);
2279:   PetscFPrintf(PETSC_COMM_SELF,vtu,"   </Cells>\n");
2280:   PetscFPrintf(PETSC_COMM_SELF,vtu,"  </Piece>\n");
2281:   PetscFPrintf(PETSC_COMM_SELF,vtu," </UnstructuredGrid>\n");
2282:   PetscFPrintf(PETSC_COMM_SELF,vtu," <AppendedData encoding=\"%s\">\n",base64 ? "base64" : "raw");
2283:   PetscFPrintf(PETSC_COMM_SELF,vtu,"_");

2285:   /* Write pressure */
2286:   PetscFWrite_FUN3D(PETSC_COMM_SELF,vtu,xp,nvertices,PETSC_SCALAR,base64);

2288:   /* Write velocity */
2289:   PetscFWrite_FUN3D(PETSC_COMM_SELF,vtu,xu,nvertices*3,PETSC_SCALAR,base64);

2291:   /* Label cell rank, not a measure of computation because nothing is actually computed at cells.  This is written
2292:    * primarily to aid in debugging. The partition for computation should label vertices. */
2293:   PetscMalloc1(ncells,&cellrank);
2294:   for (i=0; i<ncells; i++) cellrank[i] = rank;
2295:   PetscFWrite_FUN3D(PETSC_COMM_SELF,vtu,cellrank,ncells,PETSC_INT,base64);
2296:   PetscFree(cellrank);

2298:   PetscFWrite_FUN3D(PETSC_COMM_SELF,vtu,grid->xyz,nvertices*3,PETSC_DOUBLE,base64);
2299:   PetscFWrite_FUN3D(PETSC_COMM_SELF,vtu,conn,ncells*4,PETSC_INT,base64);
2300:   PetscFree(conn);

2302:   PetscMalloc1(ncells,&celloffset);
2303:   for (i=0; i<ncells; i++) celloffset[i] = 4*(i+1);
2304:   PetscFWrite_FUN3D(PETSC_COMM_SELF,vtu,celloffset,ncells,PETSC_INT,base64);
2305:   PetscFree(celloffset);

2307:   PetscMalloc1(ncells,&celltype);
2308:   for (i=0; i<ncells; i++) celltype[i] = 10; /* VTK_TETRA */
2309:   PetscFWrite_FUN3D(PETSC_COMM_SELF,vtu,celltype,ncells,PETSC_CHAR,base64);
2310:   PetscFree(celltype);

2312:   PetscFPrintf(PETSC_COMM_SELF,vtu,"\n </AppendedData>\n");
2313:   PetscFPrintf(PETSC_COMM_SELF,vtu,"</VTKFile>\n");
2314:   PetscFClose(PETSC_COMM_SELF,vtu);

2316:   VecRestoreArray(Xploc,&xp);
2317:   VecRestoreArray(Xuloc,&xu);
2318:   VecDestroy(&Xploc);
2319:   VecDestroy(&Xuloc);
2320:   PetscFree(eptr);
2321:   return 0;
2322: }

2324: /*---------------------------------------------------------------------*/
2325: int SetPetscDS(GRID *grid,TstepCtx *tsCtx)
2326: /*---------------------------------------------------------------------*/
2327: {
2328:   int                    ierr,i,j,bs;
2329:   int                    nnodes,jstart,jend,nbrs_diag,nbrs_offd;
2330:   int                    nnodesLoc,nvertices;
2331:   int                    *val_diag,*val_offd,*svertices,*loc2pet;
2332:   IS                     isglobal,islocal;
2333:   ISLocalToGlobalMapping isl2g;
2334:   PetscBool              flg;
2335:   MPI_Comm               comm = PETSC_COMM_WORLD;

2337:   nnodes    = grid->nnodes;
2338:   nnodesLoc = grid->nnodesLoc;
2339:   nvertices = grid->nvertices;
2340:   loc2pet   = grid->loc2pet;
2341:   bs        = 4;

2343:   /* Set up the PETSc datastructures */

2345:   VecCreate(comm,&grid->qnode);
2346:   VecSetSizes(grid->qnode,bs*nnodesLoc,bs*nnodes);
2347:   VecSetBlockSize(grid->qnode,bs);
2348:   VecSetType(grid->qnode,VECMPI);

2350:   VecDuplicate(grid->qnode,&grid->res);
2351:   VecDuplicate(grid->qnode,&tsCtx->qold);
2352:   VecDuplicate(grid->qnode,&tsCtx->func);

2354:   VecCreate(MPI_COMM_SELF,&grid->qnodeLoc);
2355:   VecSetSizes(grid->qnodeLoc,bs*nvertices,bs*nvertices);
2356:   VecSetBlockSize(grid->qnodeLoc,bs);
2357:   VecSetType(grid->qnodeLoc,VECSEQ);

2359:   VecCreate(comm,&grid->grad);
2360:   VecSetSizes(grid->grad,3*bs*nnodesLoc,3*bs*nnodes);
2361:   VecSetBlockSize(grid->grad,3*bs);
2362:   VecSetType(grid->grad,VECMPI);

2364:   VecCreate(MPI_COMM_SELF,&grid->gradLoc);
2365:   VecSetSizes(grid->gradLoc,3*bs*nvertices,3*bs*nvertices);
2366:   VecSetBlockSize(grid->gradLoc,3*bs);
2367:   VecSetType(grid->gradLoc,VECSEQ);

2369: /* Create Scatter between the local and global vectors */
2370: /* First create scatter for qnode */
2371:   ISCreateStride(MPI_COMM_SELF,bs*nvertices,0,1,&islocal);
2372: #if defined(INTERLACING)
2373: #if defined(BLOCKING)
2374:   ICALLOC(nvertices,&svertices);
2375:   for (i=0; i < nvertices; i++) svertices[i] = loc2pet[i];
2376:   ISCreateBlock(MPI_COMM_SELF,bs,nvertices,svertices,PETSC_COPY_VALUES,&isglobal);
2377: #else
2378:   ICALLOC(bs*nvertices,&svertices);
2379:   for (i = 0; i < nvertices; i++)
2380:     for (j = 0; j < bs; j++) svertices[j+bs*i] = j + bs*loc2pet[i];
2381:   ISCreateGeneral(MPI_COMM_SELF,bs*nvertices,svertices,PETSC_COPY_VALUES,&isglobal);
2382: #endif
2383: #else
2384:   ICALLOC(bs*nvertices,&svertices);
2385:   for (j = 0; j < bs; j++)
2386:     for (i = 0; i < nvertices; i++) svertices[j*nvertices+i] = j*nvertices + loc2pet[i];
2387:   ISCreateGeneral(MPI_COMM_SELF,bs*nvertices,svertices,PETSC_COPY_VALUES,&isglobal);
2388: #endif
2389:   PetscFree(svertices);
2390:   VecScatterCreate(grid->qnode,isglobal,grid->qnodeLoc,islocal,&grid->scatter);
2391:   ISDestroy(&isglobal);
2392:   ISDestroy(&islocal);

2394: /* Now create scatter for gradient vector of qnode */
2395:   ISCreateStride(MPI_COMM_SELF,3*bs*nvertices,0,1,&islocal);
2396: #if defined(INTERLACING)
2397: #if defined(BLOCKING)
2398:   ICALLOC(nvertices,&svertices);
2399:   for (i=0; i < nvertices; i++) svertices[i] = loc2pet[i];
2400:   ISCreateBlock(MPI_COMM_SELF,3*bs,nvertices,svertices,PETSC_COPY_VALUES,&isglobal);
2401: #else
2402:   ICALLOC(3*bs*nvertices,&svertices);
2403:   for (i = 0; i < nvertices; i++)
2404:     for (j = 0; j < 3*bs; j++) svertices[j+3*bs*i] = j + 3*bs*loc2pet[i];
2405:   ISCreateGeneral(MPI_COMM_SELF,3*bs*nvertices,svertices,PETSC_COPY_VALUES,&isglobal);
2406: #endif
2407: #else
2408:   ICALLOC(3*bs*nvertices,&svertices);
2409:   for (j = 0; j < 3*bs; j++)
2410:     for (i = 0; i < nvertices; i++) svertices[j*nvertices+i] = j*nvertices + loc2pet[i];
2411:   ISCreateGeneral(MPI_COMM_SELF,3*bs*nvertices,svertices,PETSC_COPY_VALUES,&isglobal);
2412: #endif
2413:   PetscFree(svertices);
2414:   VecScatterCreate(grid->grad,isglobal,grid->gradLoc,islocal,&grid->gradScatter);
2415:   ISDestroy(&isglobal);
2416:   ISDestroy(&islocal);

2418: /* Store the number of non-zeroes per row */
2419: #if defined(INTERLACING)
2420: #if defined(BLOCKING)
2421:   ICALLOC(nnodesLoc,&val_diag);
2422:   ICALLOC(nnodesLoc,&val_offd);
2423:   for (i = 0; i < nnodesLoc; i++) {
2424:     jstart    = grid->ia[i] - 1;
2425:     jend      = grid->ia[i+1] - 1;
2426:     nbrs_diag = 0;
2427:     nbrs_offd = 0;
2428:     for (j = jstart; j < jend; j++) {
2429:       if ((grid->ja[j] >= rstart) && (grid->ja[j] < (rstart+nnodesLoc))) nbrs_diag++;
2430:       else nbrs_offd++;
2431:     }
2432:     val_diag[i] = nbrs_diag;
2433:     val_offd[i] = nbrs_offd;
2434:   }
2435:   MatCreateBAIJ(comm,bs,bs*nnodesLoc,bs*nnodesLoc,
2436:                        bs*nnodes,bs*nnodes,PETSC_DEFAULT,val_diag,
2437:                        PETSC_DEFAULT,val_offd,&grid->A);
2438: #else
2439:   ICALLOC(nnodesLoc*4,&val_diag);
2440:   ICALLOC(nnodesLoc*4,&val_offd);
2441:   for (i = 0; i < nnodesLoc; i++) {
2442:     jstart    = grid->ia[i] - 1;
2443:     jend      = grid->ia[i+1] - 1;
2444:     nbrs_diag = 0;
2445:     nbrs_offd = 0;
2446:     for (j = jstart; j < jend; j++) {
2447:       if ((grid->ja[j] >= rstart) && (grid->ja[j] < (rstart+nnodesLoc))) nbrs_diag++;
2448:       else nbrs_offd++;
2449:     }
2450:     for (j = 0; j < 4; j++) {
2451:       row           = 4*i + j;
2452:       val_diag[row] = nbrs_diag*4;
2453:       val_offd[row] = nbrs_offd*4;
2454:     }
2455:   }
2456:   MatCreateAIJ(comm,bs*nnodesLoc,bs*nnodesLoc,
2457:                       bs*nnodes,bs*nnodes,NULL,val_diag,
2458:                       NULL,val_offd,&grid->A);
2459: #endif
2460:   PetscFree(val_diag);
2461:   PetscFree(val_offd);

2463: #else
2465:   ICALLOC(nnodes*4,&val_diag);
2466:   ICALLOC(nnodes*4,&val_offd);
2467:   for (j = 0; j < 4; j++)
2468:     for (i = 0; i < nnodes; i++) {
2469:       int row;
2470:       row           = i + j*nnodes;
2471:       jstart        = grid->ia[i] - 1;
2472:       jend          = grid->ia[i+1] - 1;
2473:       nbrs_diag     = jend - jstart;
2474:       val_diag[row] = nbrs_diag*4;
2475:       val_offd[row] = 0;
2476:     }
2477:   /* MatCreateSeqAIJ(MPI_COMM_SELF,nnodes*4,nnodes*4,NULL,
2478:                         val,&grid->A);*/
2479:   MatCreateAIJ(comm,bs*nnodesLoc,bs*nnodesLoc,
2480:                       bs*nnodes,bs*nnodes,NULL,val_diag,
2481:                       NULL,val_offd,&grid->A);
2482:   MatSetBlockSize(grid->A,bs);
2483:   PetscFree(val_diag);
2484:   PetscFree(val_offd);
2485: #endif

2487:   flg  = PETSC_FALSE;
2488:   PetscOptionsGetBool(0,"-mem_use",&flg,NULL);
2489:   if (flg) {
2490:     PetscMemoryView(PETSC_VIEWER_STDOUT_WORLD,"Memory usage after allocating PETSc data structures\n");
2491:   }

2493: /* Set local to global mapping for setting the matrix elements in
2494:    local ordering : first set row by row mapping
2495: */
2496:   ISLocalToGlobalMappingCreate(MPI_COMM_SELF,bs,nvertices,loc2pet,PETSC_COPY_VALUES,&isl2g);
2497:   MatSetLocalToGlobalMapping(grid->A,isl2g,isl2g);
2498:   ISLocalToGlobalMappingDestroy(&isl2g);
2499:   return 0;
2500: }

2502: /*================================= CLINK ===================================*/
2503: /*                                                                           */
2504: /* Used in establishing the links between FORTRAN common blocks and C        */
2505: /*                                                                           */
2506: /*===========================================================================*/
2507: EXTERN_C_BEGIN
2508: void f77CLINK(CINFO *p1,CRUNGE *p2,CGMCOM *p3)
2509: {
2510:   c_info  = p1;
2511:   c_runge = p2;
2512:   c_gmcom = p3;
2513: }
2514: EXTERN_C_END

2516: /*========================== SET_UP_GRID====================================*/
2517: /*                                                                          */
2518: /* Allocates the memory for the fine grid                                   */
2519: /*                                                                          */
2520: /*==========================================================================*/
2521: int set_up_grid(GRID *grid)
2522: {
2523:   int nnodes,nedge;
2524:   int nsface,nvface,nfface,nbface;
2525:   int tnode,ierr;
2526:   /*int vface,lnodes,nnz,ncell,kvisc,ilu0,nsrch,ileast,ifcn,valloc;*/
2527:   /*int nsnode,nvnode,nfnode; */
2528:   /*int mgzero=0;*/ /* Variable so we dont allocate memory for multigrid */
2529:   /*int jalloc;*/  /* If jalloc=1 allocate space for dfp and dfm */
2530:   /*
2531:   * stuff to read in dave's grids
2532:   */
2533:   /*int nnbound,nvbound,nfbound,nnfacet,nvfacet,nffacet,ntte;*/
2534:   /* end of stuff */

2536:   nnodes = grid->nnodes;
2537:   tnode  = grid->nnodes;
2538:   nedge  = grid->nedge;
2539:   nsface = grid->nsface;
2540:   nvface = grid->nvface;
2541:   nfface = grid->nfface;
2542:   nbface = nsface + nvface + nfface;

2544:   /*ncell  = grid->ncell;
2545:   vface  = grid->nedge;
2546:   lnodes = grid->nnodes;
2547:   nsnode = grid->nsnode;
2548:   nvnode = grid->nvnode;
2549:   nfnode = grid->nfnode;
2550:   nsrch  = c_gmcom->nsrch;
2551:   ilu0   = c_gmcom->ilu0;
2552:   ileast = grid->ileast;
2553:   ifcn   = c_gmcom->ifcn;
2554:   jalloc = 0;
2555:   kvisc  = grid->jvisc;*/

2557:   /* if (ilu0 >=1 && ifcn == 1) jalloc=0;*/

2559:   /*
2560:   * stuff to read in dave's grids
2561:   */
2562:   /*nnbound = grid->nnbound;
2563:   nvbound = grid->nvbound;
2564:   nfbound = grid->nfbound;
2565:   nnfacet = grid->nnfacet;
2566:   nvfacet = grid->nvfacet;
2567:   nffacet = grid->nffacet;
2568:   ntte    = grid->ntte;*/
2569:   /* end of stuff */

2571:   /* if (!ileast) lnodes = 1;
2572:     printf("In set_up_grid->jvisc = %d\n",grid->jvisc);

2574:   if (grid->jvisc != 2 && grid->jvisc != 4 && grid->jvisc != 6)vface = 1;
2575:   printf(" vface = %d \n",vface);
2576:   if (grid->jvisc < 3) tnode = 1;
2577:   valloc = 1;
2578:   if (grid->jvisc ==  0)valloc = 0;*/

2580:   /*PetscPrintf(PETSC_COMM_WORLD," nsnode= %d nvnode= %d nfnode= %d\n",nsnode,nvnode,nfnode);*/
2581:   /*PetscPrintf(PETSC_COMM_WORLD," nsface= %d nvface= %d nfface= %d\n",nsface,nvface,nfface);
2582:   PetscPrintf(PETSC_COMM_WORLD," nbface= %d\n",nbface);*/
2583:   /* Now allocate memory for the other grid arrays */
2584:   /* ICALLOC(nedge*2,  &grid->eptr); */
2585:   ICALLOC(nsface,   &grid->isface);
2586:   ICALLOC(nvface,   &grid->ivface);
2587:   ICALLOC(nfface,   &grid->ifface);
2588:   /* ICALLOC(nsnode,   &grid->isnode);
2589:     ICALLOC(nvnode,   &grid->ivnode);
2590:     ICALLOC(nfnode,   &grid->ifnode);*/
2591:   /*ICALLOC(nnodes,   &grid->clist);
2592:   ICALLOC(nnodes,   &grid->iupdate);
2593:   ICALLOC(nsface*2, &grid->sface);
2594:   ICALLOC(nvface*2, &grid->vface);
2595:   ICALLOC(nfface*2, &grid->fface);
2596:   ICALLOC(lnodes,   &grid->icount);*/
FCALLOC(nnodes, &grid->y);
FCALLOC(nnodes, &grid->z);
FCALLOC(nnodes, &grid->area);*/
/*
* FCALLOC(nnodes*4, &grid->gradx);
* FCALLOC(nnodes*4, &grid->grady);
* FCALLOC(nnodes*4, &grid->gradz);
* FCALLOC(nnodes, &grid->cdt);
*/
/*
* FCALLOC(nnodes*4, &grid->qnode);
* FCALLOC(nnodes*4, &grid->dq);
* FCALLOC(nnodes*4, &grid->res);
* FCALLOC(jalloc*nnodes*4*4,&grid->A);
* FCALLOC(nnodes*4, &grid->B);
* FCALLOC(jalloc*nedge*4*4,&grid->dfp);
* FCALLOC(jalloc*nedge*4*4,&grid->dfm);
*/
FCALLOC(nsnode, &grid->syn);
FCALLOC(nsnode, &grid->szn);
FCALLOC(nsnode, &grid->sa);
FCALLOC(nvnode, &grid->vxn);
FCALLOC(nvnode, &grid->vyn);
FCALLOC(nvnode, &grid->vzn);
FCALLOC(nvnode, &grid->va);
FCALLOC(nfnode, &grid->fxn);
FCALLOC(nfnode, &grid->fyn);
FCALLOC(nfnode, &grid->fzn);
FCALLOC(nfnode, &grid->fa);
FCALLOC(nedge, &grid->xn);
FCALLOC(nedge, &grid->yn);
FCALLOC(nedge, &grid->zn);
FCALLOC(nedge, &grid->rl);*/

line2633">2633: FCALLOC(nbface*15,&grid-&
line2634">2634: FCALLOC(nbface*15,&grid-&
line2635">2635: FCALLOC(nbface*15,&grid-&
/*
* FCALLOC(nnodes*4, &grid->phi);
* FCALLOC(nnodes, &grid->r11);
* FCALLOC(nnodes, &grid->r12);
* FCALLOC(nnodes, &grid->r13);
* FCALLOC(nnodes, &grid->r22);
* FCALLOC(nnodes, &grid->r23);
* FCALLOC(nnodes, &grid->r33);
*/
/*
* Allocate memory for viscous length scale if turbulent
*/
line2648">2648: if (grid->jvisc >
line2649">2649: FCALLOC(tnode, &grid->
line2650">2650: FCALLOC(nnodes, &grid->t
line2651">2651: FCALLOC(nnodes, &grid->
line2652">2652: FCALLOC(tnode, &grid->tu
line2653">2653: FCALLOC(nedge, &grid->
line2654">2654: FCALLOC(nedge, &grid->
line2655">2655:
/*
* Allocate memory for MG transfer
*/
/*
ICALLOC(mgzero*nsface, &grid->isford);
ICALLOC(mgzero*nvface, &grid->ivford);
ICALLOC(mgzero*nfface, &grid->ifford);
ICALLOC(mgzero*nnodes, &grid->nflag);
ICALLOC(mgzero*nnodes, &grid->nnext);
ICALLOC(mgzero*nnodes, &grid->nneigh);
ICALLOC(mgzero*ncell, &grid->ctag);
ICALLOC(mgzero*ncell, &grid->csearch);
ICALLOC(valloc*ncell*4, &grid->c2n);
ICALLOC(valloc*ncell*6, &grid->c2e);
grid->c2c = (int*)grid->dfp;
ICALLOC(ncell*4, &grid->c2c);
ICALLOC(nnodes, &grid->cenc);
if (grid->iup == 1) {
ICALLOC(mgzero*nnodes*3, &grid->icoefup);
FCALLOC(mgzero*nnodes*3, &grid->rcoefup);
}
if (grid->idown == 1) {
ICALLOC(mgzero*nnodes*3, &grid->icoefdn);
FCALLOC(mgzero*nnodes*3, &grid->rcoefdn);
}
FCALLOC(nnodes*4, &grid->ff);
FCALLOC(tnode, &grid->turbff);
*/
/*
* If using GMRES (nsrch>0) allocate memory
*/
/* NoEq = 0;
if (nsrch > 0)NoEq = 4*nnodes;
if (nsrch < 0)NoEq = nnodes;
FCALLOC(NoEq, &grid->AP);
FCALLOC(NoEq, &grid->Xgm);
FCALLOC(NoEq, &grid->temr);
FCALLOC((abs(nsrch)+1)*NoEq,&grid->Fgm);
*/
/*
* stuff to read in dave's grids
*/
/*
ICALLOC(nnbound, &grid->ncolorn);
ICALLOC(nnbound*100,&grid->countn);
ICALLOC(nvbound, &grid->ncolorv);
ICALLOC(nvbound*100,&grid->countv);
ICALLOC(nfbound, &grid->ncolorf);
ICALLOC(nfbound*100,&grid->countf);
*/
/*ICALLOC(nnbound, &grid->nntet);
ICALLOC(nnbound, &grid->nnpts);
ICALLOC(nvbound, &grid->nvtet);
ICALLOC(nvbound, &grid->nvpts);
ICALLOC(nfbound, &grid->nftet);
ICALLOC(nfbound, &grid->nfpts);
ICALLOC(nnfacet*4,&grid->f2ntn);
ICALLOC(nvfacet*4,&grid->f2ntv);
ICALLOC(nffacet*4,&grid->f2ntf);*/
line2715">2715: return line2716">2716

/*========================== WRITE_FINE_GRID ================================*/
/* */
/* Write memory locations and other information for the fine grid */
/* */
/*===========================================================================*/
line2723">2723: int write_fine_grid(GRID *grid) line2724">2724
line2725">2725: FILE *

/* open file for output */
/* call the output frame.out */

line2731">2731: fprintf(output,"information for fine grid\n" line2732">2732: fprintf(output,"\n" line2733">2733: fprintf(output," address of fine grid = %p\n",(void*

line2735">2735: fprintf(output,"grid.nnodes = %d\n",grid->n
line2736">2736: fprintf(output,"grid.ncell = %d\n",grid->
line2737">2737: fprintf(output,"grid.nedge = %d\n",grid->
line2738">2738: fprintf(output,"grid.nsface = %d\n",grid->n
line2739">2739: fprintf(output,"grid.nvface = %d\n",grid->n
line2740">2740: fprintf(output,"grid.nfface = %d\n",grid->n
line2741">2741: fprintf(output,"grid.nsnode = %d\n",grid->n
line2742">2742: fprintf(output,"grid.nvnode = %d\n",grid->n
line2743">2743: fprintf(output,"grid.nfnode = %d\n",grid->n
/*
fprintf(output,"grid.eptr = %p\n",grid->eptr);
fprintf(output,"grid.isface = %p\n",grid->isface);
fprintf(output,"grid.ivface = %p\n",grid->ivface);
fprintf(output,"grid.ifface = %p\n",grid->ifface);
fprintf(output,"grid.isnode = %p\n",grid->isnode);
fprintf(output,"grid.ivnode = %p\n",grid->ivnode);
fprintf(output,"grid.ifnode = %p\n",grid->ifnode);
fprintf(output,"grid.c2n = %p\n",grid->c2n);
fprintf(output,"grid.c2e = %p\n",grid->c2e);
fprintf(output,"grid.xyz = %p\n",grid->xyz);
*/
/*fprintf(output,"grid.y = %p\n",grid->xyz);
fprintf(output,"grid.z = %p\n",grid->z);*/
/*
fprintf(output,"grid.area = %p\n",grid->area);
fprintf(output,"grid.qnode = %p\n",grid->qnode);
*/
/*
fprintf(output,"grid.gradx = %p\n",grid->gradx);
fprintf(output,"grid.grady = %p\n",grid->grady);
fprintf(output,"grid.gradz = %p\n",grid->gradz);
*/
/*
fprintf(output,"grid.cdt = %p\n",grid->cdt);
fprintf(output,"grid.sxn = %p\n",grid->sxn);
fprintf(output,"grid.syn = %p\n",grid->syn);
fprintf(output,"grid.szn = %p\n",grid->szn);
fprintf(output,"grid.vxn = %p\n",grid->vxn);
fprintf(output,"grid.vyn = %p\n",grid->vyn);
fprintf(output,"grid.vzn = %p\n",grid->vzn);
fprintf(output,"grid.fxn = %p\n",grid->fxn);
fprintf(output,"grid.fyn = %p\n",grid->fyn);
fprintf(output,"grid.fzn = %p\n",grid->fzn);
fprintf(output,"grid.xyzn = %p\n",grid->xyzn);
*/
/*fprintf(output,"grid.yn = %p\n",grid->yn);
fprintf(output,"grid.zn = %p\n",grid->zn);
fprintf(output,"grid.rl = %p\n",grid->rl);*/
line2783">2783: fclose(o
line2784">2784: return line2785">2785

line2787">2787: #if defined(_OPENMP) && defined(HAVE_EDGE_COLORING)
line2788">2788: int EdgeColoring(int nnodes,int nedge,int *e2n,int *eperm,int *ncle,int *counte) line2789">2789
line2790">2790: int ncolore = *nc
line2791">2791: int iedg = 0,ib = 0,ie = nedge,ta
line2792">2792: int i
line2793">2793: in
line2794">2794: ICALLOC(nnodes,&am
line2795">2795: while (ib <
line2796">2796: for (i = 0; i < nnodes; i++) tag[
line2797">2797: counte[ncolor
line2798">2798: for (i = ib; i < ie;
line2799">2799: n1 =
line2800">2800: n2 = e2n[i+
line2801">2801: tagcount = tag[n1]+t
/* If tagcount = 0 then this edge belongs in this color */
line2803">2803: if (!tagc
line2804">2804: tag[n1]
line2805">2805: tag[n2]
line2806">2806: e2n[i] = e2n
line2807">2807: e2n[i+nedge] = e2n[iedg+
line2808">2808: e2n[iedg]
line2809">2809: e2n[iedg+nedge
line2810">2810: n1 = ep
line2811">2811: eperm[i] = eperm
line2812">2812: eperm[iedg]
line2813">2813:
line2814">2814: counte[ncolor
line2815">2815:
line2816">2816: line2817">2817: ib
line2818">2818: nco
line2819">2819:
line2820">2820: *ncle = n
line2821">2821: return line2822">2822
line2823">2823: #endif
line2824">2824: #if defined(PARCH_IRIX64) && defined(USE_HW_COUNTERS)
line2825">2825: int EventCountersBegin(int *gen_start,PetscScalar *time_start_counters) line2826">2826
line2828">2828: PetscTime(&time_start_cou
line2829">2829: return line2830">2830

line2832">2832: int EventCountersEnd(int gen_start,PetscScalar time_start_counters) line2833">2833
line2834">2834: int gen_rea
line2835">2835: PetscScalar time_read_co
line2836">2836: long long _counter0,_co

line2839">2839: PetscTime(&&time_read_cou
line2841">2841: counter0 += _co
line2842">2842: counter1 += _co
line2843">2843: time_counters += time_read_counters-time_start_co
line2844">2844: return line2845">2845
line2846">2846: #endif