Actual source code: plog.c


  2: /*
  3:       PETSc code to log object creation and destruction and PETSc events.

  5:       This provides the public API used by the rest of PETSc and by users.

  7:       These routines use a private API that is not used elsewhere in PETSc and is not
  8:       accessible to users. The private API is defined in logimpl.h and the utils directory.

 10: */
 11: #include <petsc/private/logimpl.h>
 12: #include <petsctime.h>
 13: #include <petscviewer.h>

 15: PetscLogEvent PETSC_LARGEST_EVENT = PETSC_EVENT;

 17: #if defined(PETSC_USE_LOG)
 18:   #include <petscmachineinfo.h>
 19:   #include <petscconfiginfo.h>

 21: /* used in the MPI_XXX() count macros in petsclog.h */

 23: /* Action and object logging variables */
 24: Action   *petsc_actions    = NULL;
 25: Object   *petsc_objects    = NULL;
 26: PetscBool petsc_logActions = PETSC_FALSE;
 27: PetscBool petsc_logObjects = PETSC_FALSE;
 28: int       petsc_numActions = 0, petsc_maxActions = 100;
 29: int       petsc_numObjects = 0, petsc_maxObjects = 100;
 30: int       petsc_numObjectsDestroyed = 0;

 32: /* Global counters */
 33: PetscLogDouble petsc_BaseTime        = 0.0;
 34: PetscLogDouble petsc_TotalFlops      = 0.0; /* The number of flops */
 35: PetscLogDouble petsc_tmp_flops       = 0.0; /* The incremental number of flops */
 36: PetscLogDouble petsc_send_ct         = 0.0; /* The number of sends */
 37: PetscLogDouble petsc_recv_ct         = 0.0; /* The number of receives */
 38: PetscLogDouble petsc_send_len        = 0.0; /* The total length of all sent messages */
 39: PetscLogDouble petsc_recv_len        = 0.0; /* The total length of all received messages */
 40: PetscLogDouble petsc_isend_ct        = 0.0; /* The number of immediate sends */
 41: PetscLogDouble petsc_irecv_ct        = 0.0; /* The number of immediate receives */
 42: PetscLogDouble petsc_isend_len       = 0.0; /* The total length of all immediate send messages */
 43: PetscLogDouble petsc_irecv_len       = 0.0; /* The total length of all immediate receive messages */
 44: PetscLogDouble petsc_wait_ct         = 0.0; /* The number of waits */
 45: PetscLogDouble petsc_wait_any_ct     = 0.0; /* The number of anywaits */
 46: PetscLogDouble petsc_wait_all_ct     = 0.0; /* The number of waitalls */
 47: PetscLogDouble petsc_sum_of_waits_ct = 0.0; /* The total number of waits */
 48: PetscLogDouble petsc_allreduce_ct    = 0.0; /* The number of reductions */
 49: PetscLogDouble petsc_gather_ct       = 0.0; /* The number of gathers and gathervs */
 50: PetscLogDouble petsc_scatter_ct      = 0.0; /* The number of scatters and scattervs */
 51:   #if defined(PETSC_HAVE_DEVICE)
 52: PetscLogDouble petsc_ctog_ct        = 0.0; /* The total number of CPU to GPU copies */
 53: PetscLogDouble petsc_gtoc_ct        = 0.0; /* The total number of GPU to CPU copies */
 54: PetscLogDouble petsc_ctog_sz        = 0.0; /* The total size of CPU to GPU copies */
 55: PetscLogDouble petsc_gtoc_sz        = 0.0; /* The total size of GPU to CPU copies */
 56: PetscLogDouble petsc_ctog_ct_scalar = 0.0; /* The total number of CPU to GPU copies */
 57: PetscLogDouble petsc_gtoc_ct_scalar = 0.0; /* The total number of GPU to CPU copies */
 58: PetscLogDouble petsc_ctog_sz_scalar = 0.0; /* The total size of CPU to GPU copies */
 59: PetscLogDouble petsc_gtoc_sz_scalar = 0.0; /* The total size of GPU to CPU copies */
 60: PetscLogDouble petsc_gflops         = 0.0; /* The flops done on a GPU */
 61: PetscLogDouble petsc_gtime          = 0.0; /* The time spent on a GPU */
 62:   #endif

 64: /* Logging functions */
 65: PetscErrorCode (*PetscLogPHC)(PetscObject)                                                            = NULL;
 66: PetscErrorCode (*PetscLogPHD)(PetscObject)                                                            = NULL;
 67: PetscErrorCode (*PetscLogPLB)(PetscLogEvent, int, PetscObject, PetscObject, PetscObject, PetscObject) = NULL;
 68: PetscErrorCode (*PetscLogPLE)(PetscLogEvent, int, PetscObject, PetscObject, PetscObject, PetscObject) = NULL;

 70: /* Tracing event logging variables */
 71: FILE            *petsc_tracefile          = NULL;
 72: int              petsc_tracelevel         = 0;
 73: const char      *petsc_traceblanks        = "                                                                                                    ";
 74: char             petsc_tracespace[128]    = " ";
 75: PetscLogDouble   petsc_tracetime          = 0.0;
 76: static PetscBool PetscLogInitializeCalled = PETSC_FALSE;

 78: static PetscIntStack current_log_event_stack = NULL;

 80: PETSC_INTERN PetscErrorCode PetscLogInitialize(void)
 81: {
 82:   int       stage;
 83:   PetscBool opt;

 85:   if (PetscLogInitializeCalled) return 0;
 86:   PetscLogInitializeCalled = PETSC_TRUE;

 88:   PetscIntStackCreate(&current_log_event_stack);
 89:   PetscOptionsHasName(NULL, NULL, "-log_exclude_actions", &opt);
 90:   if (opt) petsc_logActions = PETSC_FALSE;
 91:   PetscOptionsHasName(NULL, NULL, "-log_exclude_objects", &opt);
 92:   if (opt) petsc_logObjects = PETSC_FALSE;
 93:   if (petsc_logActions) PetscMalloc1(petsc_maxActions, &petsc_actions);
 94:   if (petsc_logObjects) PetscMalloc1(petsc_maxObjects, &petsc_objects);
 95:   PetscLogPHC = PetscLogObjCreateDefault;
 96:   PetscLogPHD = PetscLogObjDestroyDefault;
 97:   /* Setup default logging structures */
 98:   PetscStageLogCreate(&petsc_stageLog);
 99:   PetscStageLogRegister(petsc_stageLog, "Main Stage", &stage);

101:   /* All processors sync here for more consistent logging */
102:   MPI_Barrier(PETSC_COMM_WORLD);
103:   PetscTime(&petsc_BaseTime);
104:   PetscLogStagePush(stage);
105:   return 0;
106: }

108: PETSC_INTERN PetscErrorCode PetscLogFinalize(void)
109: {
110:   PetscStageLog stageLog;

112:   PetscFree(petsc_actions);
113:   PetscFree(petsc_objects);
114:   PetscLogNestedEnd();
115:   PetscLogSet(NULL, NULL);

117:   /* Resetting phase */
118:   PetscLogGetStageLog(&stageLog);
119:   PetscStageLogDestroy(stageLog);
120:   PetscIntStackDestroy(current_log_event_stack);
121:   current_log_event_stack = NULL;

123:   petsc_TotalFlops          = 0.0;
124:   petsc_numActions          = 0;
125:   petsc_numObjects          = 0;
126:   petsc_numObjectsDestroyed = 0;
127:   petsc_maxActions          = 100;
128:   petsc_maxObjects          = 100;
129:   petsc_actions             = NULL;
130:   petsc_objects             = NULL;
131:   petsc_logActions          = PETSC_FALSE;
132:   petsc_logObjects          = PETSC_FALSE;
133:   petsc_BaseTime            = 0.0;
134:   petsc_TotalFlops          = 0.0;
135:   petsc_tmp_flops           = 0.0;
136:   petsc_send_ct             = 0.0;
137:   petsc_recv_ct             = 0.0;
138:   petsc_send_len            = 0.0;
139:   petsc_recv_len            = 0.0;
140:   petsc_isend_ct            = 0.0;
141:   petsc_irecv_ct            = 0.0;
142:   petsc_isend_len           = 0.0;
143:   petsc_irecv_len           = 0.0;
144:   petsc_wait_ct             = 0.0;
145:   petsc_wait_any_ct         = 0.0;
146:   petsc_wait_all_ct         = 0.0;
147:   petsc_sum_of_waits_ct     = 0.0;
148:   petsc_allreduce_ct        = 0.0;
149:   petsc_gather_ct           = 0.0;
150:   petsc_scatter_ct          = 0.0;
151:   #if defined(PETSC_HAVE_DEVICE)
152:   petsc_ctog_ct = 0.0;
153:   petsc_gtoc_ct = 0.0;
154:   petsc_ctog_sz = 0.0;
155:   petsc_gtoc_sz = 0.0;
156:   petsc_gflops  = 0.0;
157:   petsc_gtime   = 0.0;
158:   #endif
159:   PETSC_LARGEST_EVENT      = PETSC_EVENT;
160:   PetscLogPHC              = NULL;
161:   PetscLogPHD              = NULL;
162:   petsc_tracefile          = NULL;
163:   petsc_tracelevel         = 0;
164:   petsc_traceblanks        = "                                                                                                    ";
165:   petsc_tracespace[0]      = ' ';
166:   petsc_tracespace[1]      = 0;
167:   petsc_tracetime          = 0.0;
168:   PETSC_LARGEST_CLASSID    = PETSC_SMALLEST_CLASSID;
169:   PETSC_OBJECT_CLASSID     = 0;
170:   petsc_stageLog           = NULL;
171:   PetscLogInitializeCalled = PETSC_FALSE;
172:   return 0;
173: }

175: /*@C
176:   PetscLogSet - Sets the logging functions called at the beginning and ending of every event.

178:   Not Collective

180:   Input Parameters:
181: + b - The function called at beginning of event
182: - e - The function called at end of event

184:   Level: developer

186:   Developer Note:
187:   The default loggers are `PetscLogEventBeginDefault()` and `PetscLogEventEndDefault()`.

189: .seealso: `PetscLogDump()`, `PetscLogDefaultBegin()`, `PetscLogAllBegin()`, `PetscLogTraceBegin()`, `PetscLogEventBeginDefault()`, `PetscLogEventEndDefault()`
190: @*/
191: PetscErrorCode PetscLogSet(PetscErrorCode (*b)(PetscLogEvent, int, PetscObject, PetscObject, PetscObject, PetscObject), PetscErrorCode (*e)(PetscLogEvent, int, PetscObject, PetscObject, PetscObject, PetscObject))
192: {
193:   PetscLogPLB = b;
194:   PetscLogPLE = e;
195:   return 0;
196: }

198: /*@C
199:   PetscLogIsActive - Check if logging is currently in progress.

201:   Not Collective

203:   Output Parameter:
204: . isActive - `PETSC_TRUE` if logging is in progress, `PETSC_FALSE` otherwise

206:   Level: beginner

208: .seealso: `PetscLogDefaultBegin()`, `PetscLogAllBegin()`, `PetscLogSet()`
209: @*/
210: PetscErrorCode PetscLogIsActive(PetscBool *isActive)
211: {
212:   *isActive = (PetscLogPLB && PetscLogPLE) ? PETSC_TRUE : PETSC_FALSE;
213:   return 0;
214: }

216: /*@C
217:   PetscLogDefaultBegin - Turns on logging of objects and events using the default logging functions `PetscLogEventBeginDefault()` and `PetscLogEventEndDefault()`. This logs flop
218:   rates and object creation and should not slow programs down too much.
219:   This routine may be called more than once.

221:   Logically Collective over `PETSC_COMM_WORLD`

223:   Options Database Key:
224: . -log_view [viewertype:filename:viewerformat] - Prints summary of flop and timing information to the
225:                   screen (for code configured with --with-log=1 (which is the default))

227:   Usage:
228: .vb
229:       PetscInitialize(...);
230:       PetscLogDefaultBegin();
231:        ... code ...
232:       PetscLogView(viewer); or PetscLogDump();
233:       PetscFinalize();
234: .ve

236:   Note:
237:   `PetscLogView()` or `PetscLogDump()` actually cause the printing of
238:   the logging information.

240:   Level: advanced

242: .seealso: `PetscLogDump()`, `PetscLogAllBegin()`, `PetscLogView()`, `PetscLogTraceBegin()`
243: @*/
244: PetscErrorCode PetscLogDefaultBegin(void)
245: {
246:   PetscLogSet(PetscLogEventBeginDefault, PetscLogEventEndDefault);
247:   return 0;
248: }

250: /*@C
251:   PetscLogAllBegin - Turns on extensive logging of objects and events. Logs
252:   all events. This creates large log files and slows the program down.

254:   Logically Collective on `PETSC_COMM_WORLD`

256:   Options Database Key:
257: . -log_all - Prints extensive log information

259:   Usage:
260: .vb
261:      PetscInitialize(...);
262:      PetscLogAllBegin();
263:      ... code ...
264:      PetscLogDump(filename);
265:      PetscFinalize();
266: .ve

268:   Note:
269:   A related routine is `PetscLogDefaultBegin()` (with the options key -log_view), which is
270:   intended for production runs since it logs only flop rates and object
271:   creation (and shouldn't significantly slow the programs).

273:   Level: advanced

275: .seealso: `PetscLogDump()`, `PetscLogDefaultBegin()`, `PetscLogTraceBegin()`
276: @*/
277: PetscErrorCode PetscLogAllBegin(void)
278: {
279:   PetscLogSet(PetscLogEventBeginComplete, PetscLogEventEndComplete);
280:   return 0;
281: }

283: /*@C
284:   PetscLogTraceBegin - Activates trace logging.  Every time a PETSc event
285:   begins or ends, the event name is printed.

287:   Logically Collective on `PETSC_COMM_WORLD`

289:   Input Parameter:
290: . file - The file to print trace in (e.g. stdout)

292:   Options Database Key:
293: . -log_trace [filename] - Activates `PetscLogTraceBegin()`

295:   Notes:
296:   `PetscLogTraceBegin()` prints the processor number, the execution time (sec),
297:   then "Event begin:" or "Event end:" followed by the event name.

299:   `PetscLogTraceBegin()` allows tracing of all PETSc calls, which is useful
300:   to determine where a program is hanging without running in the
301:   debugger.  Can be used in conjunction with the -info option.

303:   Level: intermediate

305: .seealso: `PetscLogDump()`, `PetscLogAllBegin()`, `PetscLogView()`, `PetscLogDefaultBegin()`
306: @*/
307: PetscErrorCode PetscLogTraceBegin(FILE *file)
308: {
309:   petsc_tracefile = file;

311:   PetscLogSet(PetscLogEventBeginTrace, PetscLogEventEndTrace);
312:   return 0;
313: }

315: /*@
316:   PetscLogActions - Determines whether actions are logged for the graphical viewer.

318:   Not Collective

320:   Input Parameter:
321: . flag - `PETSC_TRUE` if actions are to be logged

323:   Options Database Key:
324: . -log_exclude_actions - Turns off actions logging

326:   Level: intermediate

328:   Note:
329:   Logging of actions continues to consume more memory as the program
330:   runs. Long running programs should consider turning this feature off.
331: .seealso: `PetscLogStagePush()`, `PetscLogStagePop()`
332: @*/
333: PetscErrorCode PetscLogActions(PetscBool flag)
334: {
335:   petsc_logActions = flag;
336:   return 0;
337: }

339: /*@
340:   PetscLogObjects - Determines whether objects are logged for the graphical viewer.

342:   Not Collective

344:   Input Parameter:
345: . flag - `PETSC_TRUE` if objects are to be logged

347:   Options Database Key:
348: . -log_exclude_objects - Turns off objects logging

350:   Level: intermediate

352:   Note:
353:   Logging of objects continues to consume more memory as the program
354:   runs. Long running programs should consider turning this feature off.

356: .seealso: `PetscLogStagePush()`, `PetscLogStagePop()`
357: @*/
358: PetscErrorCode PetscLogObjects(PetscBool flag)
359: {
360:   petsc_logObjects = flag;
361:   return 0;
362: }

364: /*------------------------------------------------ Stage Functions --------------------------------------------------*/
365: /*@C
366:   PetscLogStageRegister - Attaches a character string name to a logging stage.

368:   Not Collective

370:   Input Parameter:
371: . sname - The name to associate with that stage

373:   Output Parameter:
374: . stage - The stage number

376:   Level: intermediate

378: .seealso: `PetscLogStagePush()`, `PetscLogStagePop()`
379: @*/
380: PetscErrorCode PetscLogStageRegister(const char sname[], PetscLogStage *stage)
381: {
382:   PetscStageLog stageLog;
383:   PetscLogEvent event;

385:   PetscLogGetStageLog(&stageLog);
386:   PetscStageLogRegister(stageLog, sname, stage);
387:   /* Copy events already changed in the main stage, this sucks */
388:   PetscEventPerfLogEnsureSize(stageLog->stageInfo[*stage].eventLog, stageLog->eventLog->numEvents);
389:   for (event = 0; event < stageLog->eventLog->numEvents; event++) PetscEventPerfInfoCopy(&stageLog->stageInfo[0].eventLog->eventInfo[event], &stageLog->stageInfo[*stage].eventLog->eventInfo[event]);
390:   PetscClassPerfLogEnsureSize(stageLog->stageInfo[*stage].classLog, stageLog->classLog->numClasses);
391:   return 0;
392: }

394: /*@C
395:   PetscLogStagePush - This function pushes a stage on the logging stack. Events started and stopped until `PetscLogStagePop()` will be associated with the stage

397:   Not Collective

399:   Input Parameter:
400: . stage - The stage on which to log

402:   Usage:
403:   If the option -log_view is used to run the program containing the
404:   following code, then 2 sets of summary data will be printed during
405:   PetscFinalize().
406: .vb
407:       PetscInitialize(int *argc,char ***args,0,0);
408:       [stage 0 of code]
409:       PetscLogStagePush(1);
410:       [stage 1 of code]
411:       PetscLogStagePop();
412:       PetscBarrier(...);
413:       [more stage 0 of code]
414:       PetscFinalize();
415: .ve

417:   Note:
418:   Use `PetscLogStageRegister()` to register a stage.

420:   Level: intermediate

422: .seealso: `PetscLogStagePop()`, `PetscLogStageRegister()`, `PetscBarrier()`
423: @*/
424: PetscErrorCode PetscLogStagePush(PetscLogStage stage)
425: {
426:   PetscStageLog stageLog;

428:   PetscLogGetStageLog(&stageLog);
429:   PetscStageLogPush(stageLog, stage);
430:   return 0;
431: }

433: /*@C
434:   PetscLogStagePop - This function pops a stage from the logging stack that was pushed with `PetscLogStagePush()`

436:   Not Collective

438:   Usage:
439:   If the option -log_view is used to run the program containing the
440:   following code, then 2 sets of summary data will be printed during
441:   PetscFinalize().
442: .vb
443:       PetscInitialize(int *argc,char ***args,0,0);
444:       [stage 0 of code]
445:       PetscLogStagePush(1);
446:       [stage 1 of code]
447:       PetscLogStagePop();
448:       PetscBarrier(...);
449:       [more stage 0 of code]
450:       PetscFinalize();
451: .ve

453:   Level: intermediate

455: .seealso: `PetscLogStagePush()`, `PetscLogStageRegister()`, `PetscBarrier()`
456: @*/
457: PetscErrorCode PetscLogStagePop(void)
458: {
459:   PetscStageLog stageLog;

461:   PetscLogGetStageLog(&stageLog);
462:   PetscStageLogPop(stageLog);
463:   return 0;
464: }

466: /*@
467:   PetscLogStageSetActive - Sets if a stage is used for `PetscLogEventBegin()` and `PetscLogEventEnd()`.

469:   Not Collective

471:   Input Parameters:
472: + stage    - The stage
473: - isActive - The activity flag, `PETSC_TRUE` for logging, else `PETSC_FALSE` (defaults to `PETSC_TRUE`)

475:   Level: intermediate

477:   Note:
478:   If this is set to `PETSC_FALSE` the logging acts as if the stage did not exist

480: .seealso: `PetscLogStageRegister()`, `PetscLogStagePush()`, `PetscLogStagePop()`, `PetscLogEventBegin()`, `PetscLogEventEnd()`, `PetscPreLoadBegin()`, `PetscPreLoadEnd()`, `PetscPreLoadStage()`
481: @*/
482: PetscErrorCode PetscLogStageSetActive(PetscLogStage stage, PetscBool isActive)
483: {
484:   PetscStageLog stageLog;

486:   PetscLogGetStageLog(&stageLog);
487:   PetscStageLogSetActive(stageLog, stage, isActive);
488:   return 0;
489: }

491: /*@
492:   PetscLogStageGetActive - Checks if a stage is used for `PetscLogEventBegin()` and `PetscLogEventEnd()`.

494:   Not Collective

496:   Input Parameter:
497: . stage    - The stage

499:   Output Parameter:
500: . isActive - The activity flag, `PETSC_TRUE` for logging, else `PETSC_FALSE` (defaults to `PETSC_TRUE`)

502:   Level: intermediate

504: .seealso: `PetscLogStageRegister()`, `PetscLogStagePush()`, `PetscLogStagePop()`, `PetscLogEventBegin()`, `PetscLogEventEnd()`, `PetscPreLoadBegin()`, `PetscPreLoadEnd()`, `PetscPreLoadStage()`
505: @*/
506: PetscErrorCode PetscLogStageGetActive(PetscLogStage stage, PetscBool *isActive)
507: {
508:   PetscStageLog stageLog;

510:   PetscLogGetStageLog(&stageLog);
511:   PetscStageLogGetActive(stageLog, stage, isActive);
512:   return 0;
513: }

515: /*@
516:   PetscLogStageSetVisible - Determines stage visibility in `PetscLogView()`

518:   Not Collective

520:   Input Parameters:
521: + stage     - The stage
522: - isVisible - The visibility flag, `PETSC_TRUE` to print, else `PETSC_FALSE` (defaults to `PETSC_TRUE`)

524:   Level: intermediate

526:   Developer Note:
527:   What does visible mean, needs to be documented.

529: .seealso: `PetscLogStageRegister()`, `PetscLogStagePush()`, `PetscLogStagePop()`, `PetscLogView()`
530: @*/
531: PetscErrorCode PetscLogStageSetVisible(PetscLogStage stage, PetscBool isVisible)
532: {
533:   PetscStageLog stageLog;

535:   PetscLogGetStageLog(&stageLog);
536:   PetscStageLogSetVisible(stageLog, stage, isVisible);
537:   return 0;
538: }

540: /*@
541:   PetscLogStageGetVisible - Returns stage visibility in `PetscLogView()`

543:   Not Collective

545:   Input Parameter:
546: . stage     - The stage

548:   Output Parameter:
549: . isVisible - The visibility flag, `PETSC_TRUE` to print, else `PETSC_FALSE` (defaults to `PETSC_TRUE`)

551:   Level: intermediate

553: .seealso: `PetscLogStageRegister()`, `PetscLogStagePush()`, `PetscLogStagePop()`, `PetscLogView()`
554: @*/
555: PetscErrorCode PetscLogStageGetVisible(PetscLogStage stage, PetscBool *isVisible)
556: {
557:   PetscStageLog stageLog;

559:   PetscLogGetStageLog(&stageLog);
560:   PetscStageLogGetVisible(stageLog, stage, isVisible);
561:   return 0;
562: }

564: /*@C
565:   PetscLogStageGetId - Returns the stage id when given the stage name.

567:   Not Collective

569:   Input Parameter:
570: . name  - The stage name

572:   Output Parameter:
573: . stage - The stage, , or -1 if no stage with that name exists

575:   Level: intermediate

577: .seealso: `PetscLogStageRegister()`, `PetscLogStagePush()`, `PetscLogStagePop()`, `PetscPreLoadBegin()`, `PetscPreLoadEnd()`, `PetscPreLoadStage()`
578: @*/
579: PetscErrorCode PetscLogStageGetId(const char name[], PetscLogStage *stage)
580: {
581:   PetscStageLog stageLog;

583:   PetscLogGetStageLog(&stageLog);
584:   PetscStageLogGetStage(stageLog, name, stage);
585:   return 0;
586: }

588: /*------------------------------------------------ Event Functions --------------------------------------------------*/

590: /*@C
591:   PetscLogEventRegister - Registers an event name for logging operations

593:   Not Collective

595:   Input Parameters:
596: + name   - The name associated with the event
597: - classid - The classid associated to the class for this event, obtain either with
598:            `PetscClassIdRegister()` or use a predefined one such as `KSP_CLASSID`, `SNES_CLASSID`, the predefined ones
599:            are only available in C code

601:   Output Parameter:
602: . event - The event id for use with `PetscLogEventBegin()` and `PetscLogEventEnd()`.

604:   Example of Usage:
605: .vb
606:       PetscLogEvent USER_EVENT;
607:       PetscClassId classid;
608:       PetscLogDouble user_event_flops;
609:       PetscClassIdRegister("class name",&classid);
610:       PetscLogEventRegister("User event name",classid,&USER_EVENT);
611:       PetscLogEventBegin(USER_EVENT,0,0,0,0);
612:          [code segment to monitor]
613:          PetscLogFlops(user_event_flops);
614:       PetscLogEventEnd(USER_EVENT,0,0,0,0);
615: .ve

617:   Notes:
618:   PETSc automatically logs library events if the code has been
619:   configured with --with-log (which is the default) and
620:   -log_view or -log_all is specified.  `PetscLogEventRegister()` is
621:   intended for logging user events to supplement this PETSc
622:   information.

624:   PETSc can gather data for use with the utilities Jumpshot
625:   (part of the MPICH distribution).  If PETSc has been compiled
626:   with flag -DPETSC_HAVE_MPE (MPE is an additional utility within
627:   MPICH), the user can employ another command line option, -log_mpe,
628:   to create a logfile, "mpe.log", which can be visualized
629:   Jumpshot.

631:   The classid is associated with each event so that classes of events
632:   can be disabled simultaneously, such as all matrix events. The user
633:   can either use an existing classid, such as `MAT_CLASSID`, or create
634:   their own as shown in the example.

636:   If an existing event with the same name exists, its event handle is
637:   returned instead of creating a new event.

639:   Level: intermediate

641: .seealso: `PetscLogStageRegister()`, `PetscLogEventBegin()`, `PetscLogEventEnd()`, `PetscLogFlops()`,
642:           `PetscLogEventActivate()`, `PetscLogEventDeactivate()`, `PetscClassIdRegister()`
643: @*/
644: PetscErrorCode PetscLogEventRegister(const char name[], PetscClassId classid, PetscLogEvent *event)
645: {
646:   PetscStageLog stageLog;
647:   int           stage;

649:   *event = PETSC_DECIDE;
650:   PetscLogGetStageLog(&stageLog);
651:   PetscEventRegLogGetEvent(stageLog->eventLog, name, event);
652:   if (*event > 0) return 0;
653:   PetscEventRegLogRegister(stageLog->eventLog, name, classid, event);
654:   for (stage = 0; stage < stageLog->numStages; stage++) {
655:     PetscEventPerfLogEnsureSize(stageLog->stageInfo[stage].eventLog, stageLog->eventLog->numEvents);
656:     PetscClassPerfLogEnsureSize(stageLog->stageInfo[stage].classLog, stageLog->classLog->numClasses);
657:   }
658:   return 0;
659: }

661: /*@
662:   PetscLogEventSetCollective - Indicates that a particular event is collective.

664:   Not Collective

666:   Input Parameters:
667: + event - The event id
668: - collective - Bolean flag indicating whether a particular event is collective

670:   Notes:
671:   New events returned from `PetscLogEventRegister()` are collective by default.

673:   Collective events are handled specially if the -log_sync is used. In that case the logging saves information about
674:   two parts of the event; the time for all the MPI ranks to synchronize and then the time for the actual computation/communication
675:   to be performed. This option is useful to debug imbalance within the computations or communications

677:   Level: developer

679: .seealso: `PetscLogEventBegin()`, `PetscLogEventEnd()`, `PetscLogEventRegister()`
680: @*/
681: PetscErrorCode PetscLogEventSetCollective(PetscLogEvent event, PetscBool collective)
682: {
683:   PetscStageLog    stageLog;
684:   PetscEventRegLog eventRegLog;

686:   PetscLogGetStageLog(&stageLog);
687:   PetscStageLogGetEventRegLog(stageLog, &eventRegLog);
689:   eventRegLog->eventInfo[event].collective = collective;
690:   return 0;
691: }

693: /*@
694:   PetscLogEventIncludeClass - Activates event logging for a PETSc object class in every stage.

696:   Not Collective

698:   Input Parameter:
699: . classid - The object class, for example `MAT_CLASSID`, `SNES_CLASSID`, etc.

701:   Level: developer

703: .seealso: `PetscLogEventActivateClass()`, `PetscLogEventDeactivateClass()`, `PetscLogEventActivate()`, `PetscLogEventDeactivate()`
704: @*/
705: PetscErrorCode PetscLogEventIncludeClass(PetscClassId classid)
706: {
707:   PetscStageLog stageLog;
708:   int           stage;

710:   PetscLogGetStageLog(&stageLog);
711:   for (stage = 0; stage < stageLog->numStages; stage++) PetscEventPerfLogActivateClass(stageLog->stageInfo[stage].eventLog, stageLog->eventLog, classid);
712:   return 0;
713: }

715: /*@
716:   PetscLogEventExcludeClass - Deactivates event logging for a PETSc object class in every stage.

718:   Not Collective

720:   Input Parameter:
721: . classid - The object class, for example `MAT_CLASSID`, `SNES_CLASSID`, etc.

723:   Level: developer

725:   Note:
726:   If a class is excluded then events associated with that class are not logged.

728: .seealso: `PetscLogEventDeactivateClass()`, `PetscLogEventActivateClass()`, `PetscLogEventDeactivate()`, `PetscLogEventActivate()`
729: @*/
730: PetscErrorCode PetscLogEventExcludeClass(PetscClassId classid)
731: {
732:   PetscStageLog stageLog;
733:   int           stage;

735:   PetscLogGetStageLog(&stageLog);
736:   for (stage = 0; stage < stageLog->numStages; stage++) PetscEventPerfLogDeactivateClass(stageLog->stageInfo[stage].eventLog, stageLog->eventLog, classid);
737:   return 0;
738: }

740: /*@
741:   PetscLogEventActivate - Indicates that a particular event should be logged.

743:   Not Collective

745:   Input Parameter:
746: . event - The event id

748:   Usage:
749: .vb
750:       PetscLogEventDeactivate(VEC_SetValues);
751:         [code where you do not want to log VecSetValues()]
752:       PetscLogEventActivate(VEC_SetValues);
753:         [code where you do want to log VecSetValues()]
754: .ve

756:   Note:
757:   The event may be either a pre-defined PETSc event (found in include/petsclog.h)
758:   or an event number obtained with `PetscLogEventRegister()`.

760:   Level: advanced

762: .seealso: `PlogEventDeactivate()`, `PlogEventDeactivatePush()`, `PetscLogEventDeactivatePop()`
763: @*/
764: PetscErrorCode PetscLogEventActivate(PetscLogEvent event)
765: {
766:   PetscStageLog stageLog;
767:   int           stage;

769:   PetscLogGetStageLog(&stageLog);
770:   PetscStageLogGetCurrent(stageLog, &stage);
771:   PetscEventPerfLogActivate(stageLog->stageInfo[stage].eventLog, event);
772:   return 0;
773: }

775: /*@
776:   PetscLogEventDeactivate - Indicates that a particular event should not be logged.

778:   Not Collective

780:   Input Parameter:
781: . event - The event id

783:   Usage:
784: .vb
785:       PetscLogEventDeactivate(VEC_SetValues);
786:         [code where you do not want to log VecSetValues()]
787:       PetscLogEventActivate(VEC_SetValues);
788:         [code where you do want to log VecSetValues()]
789: .ve

791:   Note:
792:   The event may be either a pre-defined PETSc event (found in
793:   include/petsclog.h) or an event number obtained with `PetscLogEventRegister()`).

795:   Level: advanced

797: .seealso: `PetscLogEventActivate()`, `PetscLogEventDeactivatePush()`, `PetscLogEventDeactivatePop()`
798: @*/
799: PetscErrorCode PetscLogEventDeactivate(PetscLogEvent event)
800: {
801:   PetscStageLog stageLog;
802:   int           stage;

804:   PetscLogGetStageLog(&stageLog);
805:   PetscStageLogGetCurrent(stageLog, &stage);
806:   PetscEventPerfLogDeactivate(stageLog->stageInfo[stage].eventLog, event);
807:   return 0;
808: }

810: /*@
811:   PetscLogEventDeactivatePush - Indicates that a particular event should not be logged until `PetscLogEventDeactivatePop()` is called

813:   Not Collective

815:   Input Parameter:
816: . event - The event id

818:   Usage:
819: .vb
820:       PetscLogEventDeactivatePush(VEC_SetValues);
821:         [code where you do not want to log VecSetValues()]
822:       PetscLogEventDeactivatePop(VEC_SetValues);
823:         [code where you do want to log VecSetValues()]
824: .ve

826:   Note:
827:   The event may be either a pre-defined PETSc event (found in
828:   include/petsclog.h) or an event number obtained with `PetscLogEventRegister()`).

830:   Level: advanced

832: .seealso: `PetscLogEventActivate()`, `PetscLogEventDeactivatePop()`, `PetscLogEventDeactivate()`
833: @*/
834: PetscErrorCode PetscLogEventDeactivatePush(PetscLogEvent event)
835: {
836:   PetscStageLog stageLog;
837:   int           stage;

839:   PetscLogGetStageLog(&stageLog);
840:   PetscStageLogGetCurrent(stageLog, &stage);
841:   PetscEventPerfLogDeactivatePush(stageLog->stageInfo[stage].eventLog, event);
842:   return 0;
843: }

845: /*@
846:   PetscLogEventDeactivatePop - Indicates that a particular event should again be logged after the logging was turned off with `PetscLogEventDeactivatePush()`

848:   Not Collective

850:   Input Parameter:
851: . event - The event id

853:   Usage:
854: .vb
855:       PetscLogEventDeactivatePush(VEC_SetValues);
856:         [code where you do not want to log VecSetValues()]
857:       PetscLogEventDeactivatePop(VEC_SetValues);
858:         [code where you do want to log VecSetValues()]
859: .ve

861:   Note:
862:   The event may be either a pre-defined PETSc event (found in
863:   include/petsclog.h) or an event number obtained with `PetscLogEventRegister()`).

865:   Level: advanced

867: .seealso: `PetscLogEventActivate()`, `PetscLogEventDeactivatePush()`
868: @*/
869: PetscErrorCode PetscLogEventDeactivatePop(PetscLogEvent event)
870: {
871:   PetscStageLog stageLog;
872:   int           stage;

874:   PetscLogGetStageLog(&stageLog);
875:   PetscStageLogGetCurrent(stageLog, &stage);
876:   PetscEventPerfLogDeactivatePop(stageLog->stageInfo[stage].eventLog, event);
877:   return 0;
878: }

880: /*@
881:   PetscLogEventSetActiveAll - Turns on logging of all events

883:   Not Collective

885:   Input Parameters:
886: + event    - The event id
887: - isActive - The activity flag determining whether the event is logged

889:   Level: advanced

891: .seealso: `PlogEventActivate()`, `PlogEventDeactivate()`
892: @*/
893: PetscErrorCode PetscLogEventSetActiveAll(PetscLogEvent event, PetscBool isActive)
894: {
895:   PetscStageLog stageLog;
896:   int           stage;

898:   PetscLogGetStageLog(&stageLog);
899:   for (stage = 0; stage < stageLog->numStages; stage++) {
900:     if (isActive) {
901:       PetscEventPerfLogActivate(stageLog->stageInfo[stage].eventLog, event);
902:     } else {
903:       PetscEventPerfLogDeactivate(stageLog->stageInfo[stage].eventLog, event);
904:     }
905:   }
906:   return 0;
907: }

909: /*@
910:   PetscLogEventActivateClass - Activates event logging for a PETSc object class for the current stage

912:   Not Collective

914:   Input Parameter:
915: . classid - The event class, for example `MAT_CLASSID`, `SNES_CLASSID`, etc.

917:   Level: developer

919: .seealso: `PetscLogEventIncludeClass()`, `PetscLogEventExcludeClass()`, `PetscLogEventDeactivateClass()`, `PetscLogEventActivate()`, `PetscLogEventDeactivate()`
920: @*/
921: PetscErrorCode PetscLogEventActivateClass(PetscClassId classid)
922: {
923:   PetscStageLog stageLog;
924:   int           stage;

926:   PetscLogGetStageLog(&stageLog);
927:   PetscStageLogGetCurrent(stageLog, &stage);
928:   PetscEventPerfLogActivateClass(stageLog->stageInfo[stage].eventLog, stageLog->eventLog, classid);
929:   return 0;
930: }

932: /*@
933:   PetscLogEventDeactivateClass - Deactivates event logging for a PETSc object class for the current stage

935:   Not Collective

937:   Input Parameter:
938: . classid - The event class, for example `MAT_CLASSID`, `SNES_CLASSID`, etc.

940:   Level: developer

942: .seealso: `PetscLogEventIncludeClass()`, `PetscLogEventExcludeClass()`, `PetscLogEventActivateClass()`, `PetscLogEventActivate()`, `PetscLogEventDeactivate()`
943: @*/
944: PetscErrorCode PetscLogEventDeactivateClass(PetscClassId classid)
945: {
946:   PetscStageLog stageLog;
947:   int           stage;

949:   PetscLogGetStageLog(&stageLog);
950:   PetscStageLogGetCurrent(stageLog, &stage);
951:   PetscEventPerfLogDeactivateClass(stageLog->stageInfo[stage].eventLog, stageLog->eventLog, classid);
952:   return 0;
953: }

955: /*MC
956:    PetscLogEventSync - Synchronizes the beginning of a user event.

958:    Synopsis:
959: #include <petsclog.h>
960:    PetscErrorCode PetscLogEventSync(int e,MPI_Comm comm)

962:    Collective

964:    Input Parameters:
965: +  e - integer associated with the event obtained from PetscLogEventRegister()
966: -  comm - an MPI communicator

968:    Usage:
969: .vb
970:      PetscLogEvent USER_EVENT;
971:      PetscLogEventRegister("User event",0,&USER_EVENT);
972:      PetscLogEventSync(USER_EVENT,PETSC_COMM_WORLD);
973:      PetscLogEventBegin(USER_EVENT,0,0,0,0);
974:         [code segment to monitor]
975:      PetscLogEventEnd(USER_EVENT,0,0,0,0);
976: .ve

978:    Note:
979:    This routine should be called only if there is not a
980:    `PetscObject` available to pass to `PetscLogEventBegin()`.

982:    Level: developer

984: .seealso: `PetscLogEventRegister()`, `PetscLogEventBegin()`, `PetscLogEventEnd()`
985: M*/

987: /*MC
988:    PetscLogEventBegin - Logs the beginning of a user event.

990:    Synopsis:
991: #include <petsclog.h>
992:    PetscErrorCode PetscLogEventBegin(int e,PetscObject o1,PetscObject o2,PetscObject o3,PetscObject o4)

994:    Not Collective

996:    Input Parameters:
997: +  e - integer associated with the event obtained from PetscLogEventRegister()
998: -  o1,o2,o3,o4 - objects associated with the event, or 0

1000:    Fortran Synopsis:
1001:    void PetscLogEventBegin(int e,PetscErrorCode ierr)

1003:    Usage:
1004: .vb
1005:      PetscLogEvent USER_EVENT;
1006:      PetscLogDouble user_event_flops;
1007:      PetscLogEventRegister("User event",0,&USER_EVENT);
1008:      PetscLogEventBegin(USER_EVENT,0,0,0,0);
1009:         [code segment to monitor]
1010:         PetscLogFlops(user_event_flops);
1011:      PetscLogEventEnd(USER_EVENT,0,0,0,0);
1012: .ve

1014:    Developer Note:
1015:      `PetscLogEventBegin()` and `PetscLogEventBegin()` return error codes instead of explicitly handling the
1016:      errors that occur in the macro directly because other packages that use this macros have used them in their
1017:      own functions or methods that do not return error codes and it would be disruptive to change the current
1018:      behavior.

1020:    Level: intermediate

1022: .seealso: `PetscLogEventRegister()`, `PetscLogEventEnd()`, `PetscLogFlops()`
1023: M*/

1025: /*MC
1026:    PetscLogEventEnd - Log the end of a user event.

1028:    Synopsis:
1029: #include <petsclog.h>
1030:    PetscErrorCode PetscLogEventEnd(int e,PetscObject o1,PetscObject o2,PetscObject o3,PetscObject o4)

1032:    Not Collective

1034:    Input Parameters:
1035: +  e - integer associated with the event obtained with PetscLogEventRegister()
1036: -  o1,o2,o3,o4 - objects associated with the event, or 0

1038:    Fortran Synopsis:
1039:    void PetscLogEventEnd(int e,PetscErrorCode ierr)

1041:    Usage:
1042: .vb
1043:      PetscLogEvent USER_EVENT;
1044:      PetscLogDouble user_event_flops;
1045:      PetscLogEventRegister("User event",0,&USER_EVENT,);
1046:      PetscLogEventBegin(USER_EVENT,0,0,0,0);
1047:         [code segment to monitor]
1048:         PetscLogFlops(user_event_flops);
1049:      PetscLogEventEnd(USER_EVENT,0,0,0,0);
1050: .ve

1052:    Level: intermediate

1054: .seealso: `PetscLogEventRegister()`, `PetscLogEventBegin()`, `PetscLogFlops()`
1055: M*/

1057: /*@C
1058:   PetscLogEventGetId - Returns the event id when given the event name.

1060:   Not Collective

1062:   Input Parameter:
1063: . name  - The event name

1065:   Output Parameter:
1066: . event - The event, or -1 if no event with that name exists

1068:   Level: intermediate

1070: .seealso: `PetscLogEventBegin()`, `PetscLogEventEnd()`, `PetscLogStageGetId()`
1071: @*/
1072: PetscErrorCode PetscLogEventGetId(const char name[], PetscLogEvent *event)
1073: {
1074:   PetscStageLog stageLog;

1076:   PetscLogGetStageLog(&stageLog);
1077:   PetscEventRegLogGetEvent(stageLog->eventLog, name, event);
1078:   return 0;
1079: }

1081: PetscErrorCode PetscLogPushCurrentEvent_Internal(PetscLogEvent event)
1082: {
1083:   PetscIntStackPush(current_log_event_stack, event);
1084:   return 0;
1085: }

1087: PetscErrorCode PetscLogPopCurrentEvent_Internal(void)
1088: {
1089:   PetscIntStackPop(current_log_event_stack, NULL);
1090:   return 0;
1091: }

1093: PetscErrorCode PetscLogGetCurrentEvent_Internal(PetscLogEvent *event)
1094: {
1095:   PetscBool empty;

1098:   *event = PETSC_DECIDE;
1099:   PetscIntStackEmpty(current_log_event_stack, &empty);
1100:   if (!empty) PetscIntStackTop(current_log_event_stack, event);
1101:   return 0;
1102: }

1104: PetscErrorCode PetscLogEventPause_Internal(PetscLogEvent event)
1105: {
1106:   if (event != PETSC_DECIDE) PetscLogEventEnd(event, NULL, NULL, NULL, NULL);
1107:   return 0;
1108: }

1110: PetscErrorCode PetscLogEventResume_Internal(PetscLogEvent event)
1111: {
1112:   PetscStageLog     stageLog;
1113:   PetscEventPerfLog eventLog;
1114:   int               stage;

1116:   if (event == PETSC_DECIDE) return 0;
1117:   PetscLogEventBegin(event, NULL, NULL, NULL, NULL);
1118:   PetscLogGetStageLog(&stageLog);
1119:   PetscStageLogGetCurrent(stageLog, &stage);
1120:   PetscStageLogGetEventPerfLog(stageLog, stage, &eventLog);
1121:   eventLog->eventInfo[event].count--;
1122:   return 0;
1123: }

1125: /*------------------------------------------------ Output Functions -------------------------------------------------*/
1126: /*@C
1127:   PetscLogDump - Dumps logs of objects to a file. This file is intended to
1128:   be read by bin/petscview. This program no longer exists.

1130:   Collective on `PETSC_COMM_WORLD`

1132:   Input Parameter:
1133: . name - an optional file name

1135:   Usage:
1136: .vb
1137:      PetscInitialize(...);
1138:      PetscLogDefaultBegin(); or PetscLogAllBegin();
1139:      ... code ...
1140:      PetscLogDump(filename);
1141:      PetscFinalize();
1142: .ve

1144:   Note:
1145:   The default file name is
1146: $    Log.<rank>
1147:   where <rank> is the processor number. If no name is specified,
1148:   this file will be used.

1150:   Level: advanced

1152: .seealso: `PetscLogDefaultBegin()`, `PetscLogAllBegin()`, `PetscLogView()`
1153: @*/
1154: PetscErrorCode PetscLogDump(const char sname[])
1155: {
1156:   PetscStageLog       stageLog;
1157:   PetscEventPerfInfo *eventInfo;
1158:   FILE               *fd;
1159:   char                file[PETSC_MAX_PATH_LEN], fname[PETSC_MAX_PATH_LEN];
1160:   PetscLogDouble      flops, _TotalTime;
1161:   PetscMPIInt         rank;
1162:   int                 action, object, curStage;
1163:   PetscLogEvent       event;

1165:   /* Calculate the total elapsed time */
1166:   PetscTime(&_TotalTime);
1167:   _TotalTime -= petsc_BaseTime;
1168:   /* Open log file */
1169:   MPI_Comm_rank(PETSC_COMM_WORLD, &rank);
1170:   if (sname && sname[0]) sprintf(file, "%s.%d", sname, rank);
1171:   else sprintf(file, "Log.%d", rank);
1172:   PetscFixFilename(file, fname);
1173:   PetscFOpen(PETSC_COMM_WORLD, fname, "w", &fd);
1175:   /* Output totals */
1176:   PetscFPrintf(PETSC_COMM_WORLD, fd, "Total Flop %14e %16.8e\n", petsc_TotalFlops, _TotalTime);
1177:   PetscFPrintf(PETSC_COMM_WORLD, fd, "Clock Resolution %g\n", 0.0);
1178:   /* Output actions */
1179:   if (petsc_logActions) {
1180:     PetscFPrintf(PETSC_COMM_WORLD, fd, "Actions accomplished %d\n", petsc_numActions);
1181:     for (action = 0; action < petsc_numActions; action++) {
1182:       PetscCall(PetscFPrintf(PETSC_COMM_WORLD, fd, "%g %d %d %d %d %d %d %g %g %g\n", petsc_actions[action].time, petsc_actions[action].action, (int)petsc_actions[action].event, (int)petsc_actions[action].classid, petsc_actions[action].id1,
1183:                              petsc_actions[action].id2, petsc_actions[action].id3, petsc_actions[action].flops, petsc_actions[action].mem, petsc_actions[action].maxmem));
1184:     }
1185:   }
1186:   /* Output objects */
1187:   if (petsc_logObjects) {
1188:     PetscFPrintf(PETSC_COMM_WORLD, fd, "Objects created %d destroyed %d\n", petsc_numObjects, petsc_numObjectsDestroyed);
1189:     for (object = 0; object < petsc_numObjects; object++) {
1190:       PetscFPrintf(PETSC_COMM_WORLD, fd, "Parent ID: %d Memory: %d\n", petsc_objects[object].parent, (int)petsc_objects[object].mem);
1191:       if (!petsc_objects[object].name[0]) {
1192:         PetscFPrintf(PETSC_COMM_WORLD, fd, "No Name\n");
1193:       } else {
1194:         PetscFPrintf(PETSC_COMM_WORLD, fd, "Name: %s\n", petsc_objects[object].name);
1195:       }
1196:       if (petsc_objects[object].info[0] != 0) {
1197:         PetscFPrintf(PETSC_COMM_WORLD, fd, "No Info\n");
1198:       } else {
1199:         PetscFPrintf(PETSC_COMM_WORLD, fd, "Info: %s\n", petsc_objects[object].info);
1200:       }
1201:     }
1202:   }
1203:   /* Output events */
1204:   PetscFPrintf(PETSC_COMM_WORLD, fd, "Event log:\n");
1205:   PetscLogGetStageLog(&stageLog);
1206:   PetscIntStackTop(stageLog->stack, &curStage);
1207:   eventInfo = stageLog->stageInfo[curStage].eventLog->eventInfo;
1208:   for (event = 0; event < stageLog->stageInfo[curStage].eventLog->numEvents; event++) {
1209:     if (eventInfo[event].time != 0.0) flops = eventInfo[event].flops / eventInfo[event].time;
1210:     else flops = 0.0;
1211:     PetscFPrintf(PETSC_COMM_WORLD, fd, "%d %16d %16g %16g %16g\n", event, eventInfo[event].count, eventInfo[event].flops, eventInfo[event].time, flops);
1212:   }
1213:   PetscFClose(PETSC_COMM_WORLD, fd);
1214:   return 0;
1215: }

1217: /*
1218:   PetscLogView_Detailed - Each process prints the times for its own events

1220: */
1221: PetscErrorCode PetscLogView_Detailed(PetscViewer viewer)
1222: {
1223:   PetscStageLog       stageLog;
1224:   PetscEventPerfInfo *eventInfo = NULL, *stageInfo = NULL;
1225:   PetscLogDouble      locTotalTime, numRed, maxMem;
1226:   int                 numStages, numEvents, stage, event;
1227:   MPI_Comm            comm = PetscObjectComm((PetscObject)viewer);
1228:   PetscMPIInt         rank, size;

1230:   MPI_Comm_size(comm, &size);
1231:   MPI_Comm_rank(comm, &rank);
1232:   /* Must preserve reduction count before we go on */
1233:   numRed = petsc_allreduce_ct + petsc_gather_ct + petsc_scatter_ct;
1234:   /* Get the total elapsed time */
1235:   PetscTime(&locTotalTime);
1236:   locTotalTime -= petsc_BaseTime;
1237:   PetscViewerASCIIPrintf(viewer, "size = %d\n", size);
1238:   PetscViewerASCIIPrintf(viewer, "LocalTimes = {}\n");
1239:   PetscViewerASCIIPrintf(viewer, "LocalMessages = {}\n");
1240:   PetscViewerASCIIPrintf(viewer, "LocalMessageLens = {}\n");
1241:   PetscViewerASCIIPrintf(viewer, "LocalReductions = {}\n");
1242:   PetscViewerASCIIPrintf(viewer, "LocalFlop = {}\n");
1243:   PetscViewerASCIIPrintf(viewer, "LocalObjects = {}\n");
1244:   PetscViewerASCIIPrintf(viewer, "LocalMemory = {}\n");
1245:   PetscLogGetStageLog(&stageLog);
1246:   MPI_Allreduce(&stageLog->numStages, &numStages, 1, MPI_INT, MPI_MAX, comm);
1247:   PetscViewerASCIIPrintf(viewer, "Stages = {}\n");
1248:   for (stage = 0; stage < numStages; stage++) {
1249:     PetscViewerASCIIPrintf(viewer, "Stages[\"%s\"] = {}\n", stageLog->stageInfo[stage].name);
1250:     PetscViewerASCIIPrintf(viewer, "Stages[\"%s\"][\"summary\"] = {}\n", stageLog->stageInfo[stage].name);
1251:     MPI_Allreduce(&stageLog->stageInfo[stage].eventLog->numEvents, &numEvents, 1, MPI_INT, MPI_MAX, comm);
1252:     for (event = 0; event < numEvents; event++) PetscViewerASCIIPrintf(viewer, "Stages[\"%s\"][\"%s\"] = {}\n", stageLog->stageInfo[stage].name, stageLog->eventLog->eventInfo[event].name);
1253:   }
1254:   PetscMallocGetMaximumUsage(&maxMem);
1255:   PetscViewerASCIIPushSynchronized(viewer);
1256:   PetscViewerASCIISynchronizedPrintf(viewer, "LocalTimes[%d] = %g\n", rank, locTotalTime);
1257:   PetscViewerASCIISynchronizedPrintf(viewer, "LocalMessages[%d] = %g\n", rank, (petsc_irecv_ct + petsc_isend_ct + petsc_recv_ct + petsc_send_ct));
1258:   PetscViewerASCIISynchronizedPrintf(viewer, "LocalMessageLens[%d] = %g\n", rank, (petsc_irecv_len + petsc_isend_len + petsc_recv_len + petsc_send_len));
1259:   PetscViewerASCIISynchronizedPrintf(viewer, "LocalReductions[%d] = %g\n", rank, numRed);
1260:   PetscViewerASCIISynchronizedPrintf(viewer, "LocalFlop[%d] = %g\n", rank, petsc_TotalFlops);
1261:   PetscViewerASCIISynchronizedPrintf(viewer, "LocalObjects[%d] = %d\n", rank, petsc_numObjects);
1262:   PetscViewerASCIISynchronizedPrintf(viewer, "LocalMemory[%d] = %g\n", rank, maxMem);
1263:   PetscViewerFlush(viewer);
1264:   for (stage = 0; stage < numStages; stage++) {
1265:     stageInfo = &stageLog->stageInfo[stage].perfInfo;
1266:     PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "Stages[\"%s\"][\"summary\"][%d] = {\"time\" : %g, \"numMessages\" : %g, \"messageLength\" : %g, \"numReductions\" : %g, \"flop\" : %g}\n", stageLog->stageInfo[stage].name, rank, stageInfo->time,
1267:                                                  stageInfo->numMessages, stageInfo->messageLength, stageInfo->numReductions, stageInfo->flops));
1268:     MPI_Allreduce(&stageLog->stageInfo[stage].eventLog->numEvents, &numEvents, 1, MPI_INT, MPI_MAX, comm);
1269:     for (event = 0; event < numEvents; event++) {
1270:       eventInfo = &stageLog->stageInfo[stage].eventLog->eventInfo[event];
1271:       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "Stages[\"%s\"][\"%s\"][%d] = {\"count\" : %d, \"time\" : %g, \"syncTime\" : %g, \"numMessages\" : %g, \"messageLength\" : %g, \"numReductions\" : %g, \"flop\" : %g",
1272:                                                    stageLog->stageInfo[stage].name, stageLog->eventLog->eventInfo[event].name, rank, eventInfo->count, eventInfo->time, eventInfo->syncTime, eventInfo->numMessages, eventInfo->messageLength, eventInfo->numReductions,
1273:                                                    eventInfo->flops));
1274:       if (eventInfo->dof[0] >= 0.) {
1275:         PetscInt d, e;

1277:         PetscViewerASCIISynchronizedPrintf(viewer, ", \"dof\" : [");
1278:         for (d = 0; d < 8; ++d) {
1279:           if (d > 0) PetscViewerASCIISynchronizedPrintf(viewer, ", ");
1280:           PetscViewerASCIISynchronizedPrintf(viewer, "%g", eventInfo->dof[d]);
1281:         }
1282:         PetscViewerASCIISynchronizedPrintf(viewer, "]");
1283:         PetscViewerASCIISynchronizedPrintf(viewer, ", \"error\" : [");
1284:         for (e = 0; e < 8; ++e) {
1285:           if (e > 0) PetscViewerASCIISynchronizedPrintf(viewer, ", ");
1286:           PetscViewerASCIISynchronizedPrintf(viewer, "%g", eventInfo->errors[e]);
1287:         }
1288:         PetscViewerASCIISynchronizedPrintf(viewer, "]");
1289:       }
1290:       PetscViewerASCIISynchronizedPrintf(viewer, "}\n");
1291:     }
1292:   }
1293:   PetscViewerFlush(viewer);
1294:   PetscViewerASCIIPopSynchronized(viewer);
1295:   return 0;
1296: }

1298: /*
1299:   PetscLogView_CSV - Each process prints the times for its own events in Comma-Separated Value Format
1300: */
1301: PetscErrorCode PetscLogView_CSV(PetscViewer viewer)
1302: {
1303:   PetscStageLog       stageLog;
1304:   PetscEventPerfInfo *eventInfo = NULL;
1305:   PetscLogDouble      locTotalTime, maxMem;
1306:   int                 numStages, numEvents, stage, event;
1307:   MPI_Comm            comm = PetscObjectComm((PetscObject)viewer);
1308:   PetscMPIInt         rank, size;

1310:   MPI_Comm_size(comm, &size);
1311:   MPI_Comm_rank(comm, &rank);
1312:   /* Must preserve reduction count before we go on */
1313:   /* Get the total elapsed time */
1314:   PetscTime(&locTotalTime);
1315:   locTotalTime -= petsc_BaseTime;
1316:   PetscLogGetStageLog(&stageLog);
1317:   MPI_Allreduce(&stageLog->numStages, &numStages, 1, MPI_INT, MPI_MAX, comm);
1318:   PetscMallocGetMaximumUsage(&maxMem);
1319:   PetscViewerASCIIPushSynchronized(viewer);
1320:   PetscViewerASCIIPrintf(viewer, "Stage Name,Event Name,Rank,Count,Time,Num Messages,Message Length,Num Reductions,FLOP,dof0,dof1,dof2,dof3,dof4,dof5,dof6,dof7,e0,e1,e2,e3,e4,e5,e6,e7,%d\n", size);
1321:   PetscViewerFlush(viewer);
1322:   for (stage = 0; stage < numStages; stage++) {
1323:     PetscEventPerfInfo *stageInfo = &stageLog->stageInfo[stage].perfInfo;

1325:     PetscViewerASCIISynchronizedPrintf(viewer, "%s,summary,%d,1,%g,%g,%g,%g,%g\n", stageLog->stageInfo[stage].name, rank, stageInfo->time, stageInfo->numMessages, stageInfo->messageLength, stageInfo->numReductions, stageInfo->flops);
1326:     MPI_Allreduce(&stageLog->stageInfo[stage].eventLog->numEvents, &numEvents, 1, MPI_INT, MPI_MAX, comm);
1327:     for (event = 0; event < numEvents; event++) {
1328:       eventInfo = &stageLog->stageInfo[stage].eventLog->eventInfo[event];
1329:       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "%s,%s,%d,%d,%g,%g,%g,%g,%g", stageLog->stageInfo[stage].name, stageLog->eventLog->eventInfo[event].name, rank, eventInfo->count, eventInfo->time, eventInfo->numMessages, eventInfo->messageLength,
1330:                                                    eventInfo->numReductions, eventInfo->flops));
1331:       if (eventInfo->dof[0] >= 0.) {
1332:         PetscInt d, e;

1334:         for (d = 0; d < 8; ++d) PetscViewerASCIISynchronizedPrintf(viewer, ",%g", eventInfo->dof[d]);
1335:         for (e = 0; e < 8; ++e) PetscViewerASCIISynchronizedPrintf(viewer, ",%g", eventInfo->errors[e]);
1336:       }
1337:       PetscViewerASCIISynchronizedPrintf(viewer, "\n");
1338:     }
1339:   }
1340:   PetscViewerFlush(viewer);
1341:   PetscViewerASCIIPopSynchronized(viewer);
1342:   return 0;
1343: }

1345: static PetscErrorCode PetscLogViewWarnSync(MPI_Comm comm, FILE *fd)
1346: {
1347:   if (!PetscLogSyncOn) return 0;
1348:   PetscFPrintf(comm, fd, "\n\n");
1349:   PetscFPrintf(comm, fd, "      ##########################################################\n");
1350:   PetscFPrintf(comm, fd, "      #                                                        #\n");
1351:   PetscFPrintf(comm, fd, "      #                       WARNING!!!                       #\n");
1352:   PetscFPrintf(comm, fd, "      #                                                        #\n");
1353:   PetscFPrintf(comm, fd, "      #   This program was run with logging synchronization.   #\n");
1354:   PetscFPrintf(comm, fd, "      #   This option provides more meaningful imbalance       #\n");
1355:   PetscFPrintf(comm, fd, "      #   figures at the expense of slowing things down and    #\n");
1356:   PetscFPrintf(comm, fd, "      #   providing a distorted view of the overall runtime.   #\n");
1357:   PetscFPrintf(comm, fd, "      #                                                        #\n");
1358:   PetscFPrintf(comm, fd, "      ##########################################################\n\n\n");
1359:   return 0;
1360: }

1362: static PetscErrorCode PetscLogViewWarnDebugging(MPI_Comm comm, FILE *fd)
1363: {
1364:   if (PetscDefined(USE_DEBUG)) {
1365:     PetscFPrintf(comm, fd, "\n\n");
1366:     PetscFPrintf(comm, fd, "      ##########################################################\n");
1367:     PetscFPrintf(comm, fd, "      #                                                        #\n");
1368:     PetscFPrintf(comm, fd, "      #                       WARNING!!!                       #\n");
1369:     PetscFPrintf(comm, fd, "      #                                                        #\n");
1370:     PetscFPrintf(comm, fd, "      #   This code was compiled with a debugging option.      #\n");
1371:     PetscFPrintf(comm, fd, "      #   To get timing results run ./configure                #\n");
1372:     PetscFPrintf(comm, fd, "      #   using --with-debugging=no, the performance will      #\n");
1373:     PetscFPrintf(comm, fd, "      #   be generally two or three times faster.              #\n");
1374:     PetscFPrintf(comm, fd, "      #                                                        #\n");
1375:     PetscFPrintf(comm, fd, "      ##########################################################\n\n\n");
1376:   }
1377:   return 0;
1378: }

1380: static PetscErrorCode PetscLogViewWarnNoGpuAwareMpi(MPI_Comm comm, FILE *fd)
1381: {
1382:   #if defined(PETSC_HAVE_DEVICE)
1383:   PetscMPIInt size;

1385:   MPI_Comm_size(comm, &size);
1386:   if (use_gpu_aware_mpi || size == 1) return 0;
1387:   PetscFPrintf(comm, fd, "\n\n");
1388:   PetscFPrintf(comm, fd, "      ##########################################################\n");
1389:   PetscFPrintf(comm, fd, "      #                                                        #\n");
1390:   PetscFPrintf(comm, fd, "      #                       WARNING!!!                       #\n");
1391:   PetscFPrintf(comm, fd, "      #                                                        #\n");
1392:   PetscFPrintf(comm, fd, "      #   This code was compiled with GPU support and you've   #\n");
1393:   PetscFPrintf(comm, fd, "      #   created PETSc/GPU objects, but you intentionally     #\n");
1394:   PetscFPrintf(comm, fd, "      #   used -use_gpu_aware_mpi 0, requiring PETSc to copy   #\n");
1395:   PetscFPrintf(comm, fd, "      #   additional data between the GPU and CPU. To obtain   #\n");
1396:   PetscFPrintf(comm, fd, "      #   meaningful timing results on multi-rank runs, use    #\n");
1397:   PetscFPrintf(comm, fd, "      #   GPU-aware MPI instead.                               #\n");
1398:   PetscFPrintf(comm, fd, "      #                                                        #\n");
1399:   PetscFPrintf(comm, fd, "      ##########################################################\n\n\n");
1400:   return 0;
1401:   #else
1402:   return 0;
1403:   #endif
1404: }

1406: static PetscErrorCode PetscLogViewWarnGpuTime(MPI_Comm comm, FILE *fd)
1407: {
1408:   #if defined(PETSC_HAVE_DEVICE)

1410:   if (!PetscLogGpuTimeFlag || petsc_gflops == 0) return 0;
1411:   PetscFPrintf(comm, fd, "\n\n");
1412:   PetscFPrintf(comm, fd, "      ##########################################################\n");
1413:   PetscFPrintf(comm, fd, "      #                                                        #\n");
1414:   PetscFPrintf(comm, fd, "      #                       WARNING!!!                       #\n");
1415:   PetscFPrintf(comm, fd, "      #                                                        #\n");
1416:   PetscFPrintf(comm, fd, "      #   This code was run with -log_view_gpu_time            #\n");
1417:   PetscFPrintf(comm, fd, "      #   This provides accurate timing within the GPU kernels #\n");
1418:   PetscFPrintf(comm, fd, "      #   but can slow down the entire computation by a        #\n");
1419:   PetscFPrintf(comm, fd, "      #   measurable amount. For fastest runs we recommend     #\n");
1420:   PetscFPrintf(comm, fd, "      #   not using this option.                               #\n");
1421:   PetscFPrintf(comm, fd, "      #                                                        #\n");
1422:   PetscFPrintf(comm, fd, "      ##########################################################\n\n\n");
1423:   return 0;
1424:   #else
1425:   return 0;
1426:   #endif
1427: }

1429: PetscErrorCode PetscLogView_Default(PetscViewer viewer)
1430: {
1431:   FILE               *fd;
1432:   PetscLogDouble      zero = 0.0;
1433:   PetscStageLog       stageLog;
1434:   PetscStageInfo     *stageInfo = NULL;
1435:   PetscEventPerfInfo *eventInfo = NULL;
1436:   PetscClassPerfInfo *classInfo;
1437:   char                arch[128], hostname[128], username[128], pname[PETSC_MAX_PATH_LEN], date[128];
1438:   const char         *name;
1439:   PetscLogDouble      locTotalTime, TotalTime, TotalFlops;
1440:   PetscLogDouble      numMessages, messageLength, avgMessLen, numReductions;
1441:   PetscLogDouble      stageTime, flops, flopr, mem, mess, messLen, red;
1442:   PetscLogDouble      fracTime, fracFlops, fracMessages, fracLength, fracReductions, fracMess, fracMessLen, fracRed;
1443:   PetscLogDouble      fracStageTime, fracStageFlops, fracStageMess, fracStageMessLen, fracStageRed;
1444:   PetscLogDouble      min, max, tot, ratio, avg, x, y;
1445:   PetscLogDouble      minf, maxf, totf, ratf, mint, maxt, tott, ratt, ratC, totm, totml, totr, mal, malmax, emalmax;
1446:   #if defined(PETSC_HAVE_DEVICE)
1447:   PetscLogEvent  KSP_Solve, SNES_Solve, TS_Step, TAO_Solve; /* These need to be fixed to be some events registered with certain objects */
1448:   PetscLogDouble cct, gct, csz, gsz, gmaxt, gflops, gflopr, fracgflops;
1449:   #endif
1450:   PetscMPIInt    minC, maxC;
1451:   PetscMPIInt    size, rank;
1452:   PetscBool     *localStageUsed, *stageUsed;
1453:   PetscBool     *localStageVisible, *stageVisible;
1454:   int            numStages, localNumEvents, numEvents;
1455:   int            stage, oclass;
1456:   PetscLogEvent  event;
1457:   PetscErrorCode 0;
1458:   char           version[256];
1459:   MPI_Comm       comm;
1460:   #if defined(PETSC_HAVE_DEVICE)
1461:   PetscLogEvent eventid;
1462:   PetscInt64    nas = 0x7FF0000000000002;
1463:   #endif

1465:   PetscFPTrapPush(PETSC_FP_TRAP_OFF);
1466:   PetscObjectGetComm((PetscObject)viewer, &comm);
1467:   PetscViewerASCIIGetPointer(viewer, &fd);
1468:   MPI_Comm_size(comm, &size);
1469:   MPI_Comm_rank(comm, &rank);
1470:   /* Get the total elapsed time */
1471:   PetscTime(&locTotalTime);
1472:   locTotalTime -= petsc_BaseTime;

1474:   PetscFPrintf(comm, fd, "****************************************************************************************************************************************************************\n");
1475:   PetscFPrintf(comm, fd, "***                                WIDEN YOUR WINDOW TO 160 CHARACTERS.  Use 'enscript -r -fCourier9' to print this document                                 ***\n");
1476:   PetscFPrintf(comm, fd, "****************************************************************************************************************************************************************\n");
1477:   PetscFPrintf(comm, fd, "\n------------------------------------------------------------------ PETSc Performance Summary: ------------------------------------------------------------------\n\n");
1478:   PetscLogViewWarnSync(comm, fd);
1479:   PetscLogViewWarnDebugging(comm, fd);
1480:   PetscLogViewWarnNoGpuAwareMpi(comm, fd);
1481:   PetscLogViewWarnGpuTime(comm, fd);
1482:   PetscGetArchType(arch, sizeof(arch));
1483:   PetscGetHostName(hostname, sizeof(hostname));
1484:   PetscGetUserName(username, sizeof(username));
1485:   PetscGetProgramName(pname, sizeof(pname));
1486:   PetscGetDate(date, sizeof(date));
1487:   PetscGetVersion(version, sizeof(version));
1488:   if (size == 1) {
1489:     PetscFPrintf(comm, fd, "%s on a %s named %s with %d processor, by %s %s\n", pname, arch, hostname, size, username, date);
1490:   } else {
1491:     PetscFPrintf(comm, fd, "%s on a %s named %s with %d processors, by %s %s\n", pname, arch, hostname, size, username, date);
1492:   }
1493:   #if defined(PETSC_HAVE_OPENMP)
1494:   PetscFPrintf(comm, fd, "Using %" PetscInt_FMT " OpenMP threads\n", PetscNumOMPThreads);
1495:   #endif
1496:   PetscFPrintf(comm, fd, "Using %s\n", version);

1498:   /* Must preserve reduction count before we go on */
1499:   red = petsc_allreduce_ct + petsc_gather_ct + petsc_scatter_ct;

1501:   /* Calculate summary information */
1502:   PetscFPrintf(comm, fd, "\n                         Max       Max/Min     Avg       Total\n");
1503:   /*   Time */
1504:   MPI_Allreduce(&locTotalTime, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm);
1505:   MPI_Allreduce(&locTotalTime, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm);
1506:   MPI_Allreduce(&locTotalTime, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1507:   avg = tot / ((PetscLogDouble)size);
1508:   if (min != 0.0) ratio = max / min;
1509:   else ratio = 0.0;
1510:   PetscFPrintf(comm, fd, "Time (sec):           %5.3e   %7.3f   %5.3e\n", max, ratio, avg);
1511:   TotalTime = tot;
1512:   /*   Objects */
1513:   avg = (PetscLogDouble)petsc_numObjects;
1514:   MPI_Allreduce(&avg, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm);
1515:   MPI_Allreduce(&avg, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm);
1516:   MPI_Allreduce(&avg, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1517:   avg = tot / ((PetscLogDouble)size);
1518:   if (min != 0.0) ratio = max / min;
1519:   else ratio = 0.0;
1520:   PetscFPrintf(comm, fd, "Objects:              %5.3e   %7.3f   %5.3e\n", max, ratio, avg);
1521:   /*   Flops */
1522:   MPI_Allreduce(&petsc_TotalFlops, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm);
1523:   MPI_Allreduce(&petsc_TotalFlops, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm);
1524:   MPI_Allreduce(&petsc_TotalFlops, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1525:   avg = tot / ((PetscLogDouble)size);
1526:   if (min != 0.0) ratio = max / min;
1527:   else ratio = 0.0;
1528:   PetscFPrintf(comm, fd, "Flops:                %5.3e   %7.3f   %5.3e  %5.3e\n", max, ratio, avg, tot);
1529:   TotalFlops = tot;
1530:   /*   Flops/sec -- Must talk to Barry here */
1531:   if (locTotalTime != 0.0) flops = petsc_TotalFlops / locTotalTime;
1532:   else flops = 0.0;
1533:   MPI_Allreduce(&flops, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm);
1534:   MPI_Allreduce(&flops, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm);
1535:   MPI_Allreduce(&flops, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1536:   avg = tot / ((PetscLogDouble)size);
1537:   if (min != 0.0) ratio = max / min;
1538:   else ratio = 0.0;
1539:   PetscFPrintf(comm, fd, "Flops/sec:            %5.3e   %7.3f   %5.3e  %5.3e\n", max, ratio, avg, tot);
1540:   /*   Memory */
1541:   PetscMallocGetMaximumUsage(&mem);
1542:   if (mem > 0.0) {
1543:     MPI_Allreduce(&mem, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm);
1544:     MPI_Allreduce(&mem, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm);
1545:     MPI_Allreduce(&mem, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1546:     avg = tot / ((PetscLogDouble)size);
1547:     if (min != 0.0) ratio = max / min;
1548:     else ratio = 0.0;
1549:     PetscFPrintf(comm, fd, "Memory (bytes):       %5.3e   %7.3f   %5.3e  %5.3e\n", max, ratio, avg, tot);
1550:   }
1551:   /*   Messages */
1552:   mess = 0.5 * (petsc_irecv_ct + petsc_isend_ct + petsc_recv_ct + petsc_send_ct);
1553:   MPI_Allreduce(&mess, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm);
1554:   MPI_Allreduce(&mess, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm);
1555:   MPI_Allreduce(&mess, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1556:   avg = tot / ((PetscLogDouble)size);
1557:   if (min != 0.0) ratio = max / min;
1558:   else ratio = 0.0;
1559:   PetscFPrintf(comm, fd, "MPI Msg Count:        %5.3e   %7.3f   %5.3e  %5.3e\n", max, ratio, avg, tot);
1560:   numMessages = tot;
1561:   /*   Message Lengths */
1562:   mess = 0.5 * (petsc_irecv_len + petsc_isend_len + petsc_recv_len + petsc_send_len);
1563:   MPI_Allreduce(&mess, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm);
1564:   MPI_Allreduce(&mess, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm);
1565:   MPI_Allreduce(&mess, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1566:   if (numMessages != 0) avg = tot / numMessages;
1567:   else avg = 0.0;
1568:   if (min != 0.0) ratio = max / min;
1569:   else ratio = 0.0;
1570:   PetscFPrintf(comm, fd, "MPI Msg Len (bytes):  %5.3e   %7.3f   %5.3e  %5.3e\n", max, ratio, avg, tot);
1571:   messageLength = tot;
1572:   /*   Reductions */
1573:   MPI_Allreduce(&red, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm);
1574:   MPI_Allreduce(&red, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm);
1575:   MPI_Allreduce(&red, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1576:   if (min != 0.0) ratio = max / min;
1577:   else ratio = 0.0;
1578:   PetscFPrintf(comm, fd, "MPI Reductions:       %5.3e   %7.3f\n", max, ratio);
1579:   numReductions = red; /* wrong because uses count from process zero */
1580:   PetscFPrintf(comm, fd, "\nFlop counting convention: 1 flop = 1 real number operation of type (multiply/divide/add/subtract)\n");
1581:   PetscFPrintf(comm, fd, "                            e.g., VecAXPY() for real vectors of length N --> 2N flops\n");
1582:   PetscFPrintf(comm, fd, "                            and VecAXPY() for complex vectors of length N --> 8N flops\n");

1584:   /* Get total number of stages --
1585:        Currently, a single processor can register more stages than another, but stages must all be registered in order.
1586:        We can removed this requirement if necessary by having a global stage numbering and indirection on the stage ID.
1587:        This seems best accomplished by assoicating a communicator with each stage.
1588:   */
1589:   PetscLogGetStageLog(&stageLog);
1590:   MPI_Allreduce(&stageLog->numStages, &numStages, 1, MPI_INT, MPI_MAX, comm);
1591:   PetscMalloc1(numStages, &localStageUsed);
1592:   PetscMalloc1(numStages, &stageUsed);
1593:   PetscMalloc1(numStages, &localStageVisible);
1594:   PetscMalloc1(numStages, &stageVisible);
1595:   if (numStages > 0) {
1596:     stageInfo = stageLog->stageInfo;
1597:     for (stage = 0; stage < numStages; stage++) {
1598:       if (stage < stageLog->numStages) {
1599:         localStageUsed[stage]    = stageInfo[stage].used;
1600:         localStageVisible[stage] = stageInfo[stage].perfInfo.visible;
1601:       } else {
1602:         localStageUsed[stage]    = PETSC_FALSE;
1603:         localStageVisible[stage] = PETSC_TRUE;
1604:       }
1605:     }
1606:     MPI_Allreduce(localStageUsed, stageUsed, numStages, MPIU_BOOL, MPI_LOR, comm);
1607:     MPI_Allreduce(localStageVisible, stageVisible, numStages, MPIU_BOOL, MPI_LAND, comm);
1608:     for (stage = 0; stage < numStages; stage++) {
1609:       if (stageUsed[stage]) {
1610:         PetscFPrintf(comm, fd, "\nSummary of Stages:   ----- Time ------  ----- Flop ------  --- Messages ---  -- Message Lengths --  -- Reductions --\n");
1611:         PetscFPrintf(comm, fd, "                        Avg     %%Total     Avg     %%Total    Count   %%Total     Avg         %%Total    Count   %%Total\n");
1612:         break;
1613:       }
1614:     }
1615:     for (stage = 0; stage < numStages; stage++) {
1616:       if (!stageUsed[stage]) continue;
1617:       /* CANNOT use MPI_Allreduce() since it might fail the line number check */
1618:       if (localStageUsed[stage]) {
1619:         MPI_Allreduce(&stageInfo[stage].perfInfo.time, &stageTime, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1620:         MPI_Allreduce(&stageInfo[stage].perfInfo.flops, &flops, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1621:         MPI_Allreduce(&stageInfo[stage].perfInfo.numMessages, &mess, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1622:         MPI_Allreduce(&stageInfo[stage].perfInfo.messageLength, &messLen, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1623:         MPI_Allreduce(&stageInfo[stage].perfInfo.numReductions, &red, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1624:         name = stageInfo[stage].name;
1625:       } else {
1626:         MPI_Allreduce(&zero, &stageTime, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1627:         MPI_Allreduce(&zero, &flops, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1628:         MPI_Allreduce(&zero, &mess, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1629:         MPI_Allreduce(&zero, &messLen, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1630:         MPI_Allreduce(&zero, &red, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1631:         name = "";
1632:       }
1633:       mess *= 0.5;
1634:       messLen *= 0.5;
1635:       red /= size;
1636:       if (TotalTime != 0.0) fracTime = stageTime / TotalTime;
1637:       else fracTime = 0.0;
1638:       if (TotalFlops != 0.0) fracFlops = flops / TotalFlops;
1639:       else fracFlops = 0.0;
1640:       /* Talk to Barry if (stageTime     != 0.0) flops          = (size*flops)/stageTime; else flops          = 0.0; */
1641:       if (numMessages != 0.0) fracMessages = mess / numMessages;
1642:       else fracMessages = 0.0;
1643:       if (mess != 0.0) avgMessLen = messLen / mess;
1644:       else avgMessLen = 0.0;
1645:       if (messageLength != 0.0) fracLength = messLen / messageLength;
1646:       else fracLength = 0.0;
1647:       if (numReductions != 0.0) fracReductions = red / numReductions;
1648:       else fracReductions = 0.0;
1649:       PetscFPrintf(comm, fd, "%2d: %15s: %6.4e %5.1f%%  %6.4e %5.1f%%  %5.3e %5.1f%%  %5.3e      %5.1f%%  %5.3e %5.1f%%\n", stage, name, stageTime / size, 100.0 * fracTime, flops, 100.0 * fracFlops, mess, 100.0 * fracMessages, avgMessLen, 100.0 * fracLength, red, 100.0 * fracReductions);
1650:     }
1651:   }

1653:   PetscFPrintf(comm, fd, "\n------------------------------------------------------------------------------------------------------------------------\n");
1654:   PetscFPrintf(comm, fd, "See the 'Profiling' chapter of the users' manual for details on interpreting output.\n");
1655:   PetscFPrintf(comm, fd, "Phase summary info:\n");
1656:   PetscFPrintf(comm, fd, "   Count: number of times phase was executed\n");
1657:   PetscFPrintf(comm, fd, "   Time and Flop: Max - maximum over all processors\n");
1658:   PetscFPrintf(comm, fd, "                  Ratio - ratio of maximum to minimum over all processors\n");
1659:   PetscFPrintf(comm, fd, "   Mess: number of messages sent\n");
1660:   PetscFPrintf(comm, fd, "   AvgLen: average message length (bytes)\n");
1661:   PetscFPrintf(comm, fd, "   Reduct: number of global reductions\n");
1662:   PetscFPrintf(comm, fd, "   Global: entire computation\n");
1663:   PetscFPrintf(comm, fd, "   Stage: stages of a computation. Set stages with PetscLogStagePush() and PetscLogStagePop().\n");
1664:   PetscFPrintf(comm, fd, "      %%T - percent time in this phase         %%F - percent flop in this phase\n");
1665:   PetscFPrintf(comm, fd, "      %%M - percent messages in this phase     %%L - percent message lengths in this phase\n");
1666:   PetscFPrintf(comm, fd, "      %%R - percent reductions in this phase\n");
1667:   PetscFPrintf(comm, fd, "   Total Mflop/s: 10e-6 * (sum of flop over all processors)/(max time over all processors)\n");
1668:   if (PetscLogMemory) {
1669:     PetscFPrintf(comm, fd, "   Malloc Mbytes: Memory allocated and kept during event (sum over all calls to event). May be negative\n");
1670:     PetscFPrintf(comm, fd, "   EMalloc Mbytes: extra memory allocated during event and then freed (maximum over all calls to events). Never negative\n");
1671:     PetscFPrintf(comm, fd, "   MMalloc Mbytes: Increase in high water mark of allocated memory (sum over all calls to event). Never negative\n");
1672:     PetscFPrintf(comm, fd, "   RMI Mbytes: Increase in resident memory (sum over all calls to event)\n");
1673:   }
1674:   #if defined(PETSC_HAVE_DEVICE)
1675:   PetscFPrintf(comm, fd, "   GPU Mflop/s: 10e-6 * (sum of flop on GPU over all processors)/(max GPU time over all processors)\n");
1676:   PetscFPrintf(comm, fd, "   CpuToGpu Count: total number of CPU to GPU copies per processor\n");
1677:   PetscFPrintf(comm, fd, "   CpuToGpu Size (Mbytes): 10e-6 * (total size of CPU to GPU copies per processor)\n");
1678:   PetscFPrintf(comm, fd, "   GpuToCpu Count: total number of GPU to CPU copies per processor\n");
1679:   PetscFPrintf(comm, fd, "   GpuToCpu Size (Mbytes): 10e-6 * (total size of GPU to CPU copies per processor)\n");
1680:   PetscFPrintf(comm, fd, "   GPU %%F: percent flops on GPU in this event\n");
1681:   #endif
1682:   PetscFPrintf(comm, fd, "------------------------------------------------------------------------------------------------------------------------\n");

1684:   PetscLogViewWarnDebugging(comm, fd);

1686:   /* Report events */
1687:   PetscFPrintf(comm, fd, "Event                Count      Time (sec)     Flop                              --- Global ---  --- Stage ----  Total");
1688:   if (PetscLogMemory) PetscFPrintf(comm, fd, "  Malloc EMalloc MMalloc RMI");
1689:   #if defined(PETSC_HAVE_DEVICE)
1690:   PetscFPrintf(comm, fd, "   GPU    - CpuToGpu -   - GpuToCpu - GPU");
1691:   #endif
1692:   PetscFPrintf(comm, fd, "\n");
1693:   PetscFPrintf(comm, fd, "                   Max Ratio  Max     Ratio   Max  Ratio  Mess   AvgLen  Reduct  %%T %%F %%M %%L %%R  %%T %%F %%M %%L %%R Mflop/s");
1694:   if (PetscLogMemory) PetscFPrintf(comm, fd, " Mbytes Mbytes Mbytes Mbytes");
1695:   #if defined(PETSC_HAVE_DEVICE)
1696:   PetscFPrintf(comm, fd, " Mflop/s Count   Size   Count   Size  %%F");
1697:   #endif
1698:   PetscFPrintf(comm, fd, "\n");
1699:   PetscFPrintf(comm, fd, "------------------------------------------------------------------------------------------------------------------------");
1700:   if (PetscLogMemory) PetscFPrintf(comm, fd, "-----------------------------");
1701:   #if defined(PETSC_HAVE_DEVICE)
1702:   PetscFPrintf(comm, fd, "---------------------------------------");
1703:   #endif
1704:   PetscFPrintf(comm, fd, "\n");

1706:   #if defined(PETSC_HAVE_DEVICE)
1707:   /* this indirect way of accessing these values is needed when PETSc is build with multiple libraries since the symbols are not in libpetscsys */
1708:   PetscEventRegLogGetEvent(stageLog->eventLog, "TAOSolve", &TAO_Solve);
1709:   PetscEventRegLogGetEvent(stageLog->eventLog, "TSStep", &TS_Step);
1710:   PetscEventRegLogGetEvent(stageLog->eventLog, "SNESSolve", &SNES_Solve);
1711:   PetscEventRegLogGetEvent(stageLog->eventLog, "KSPSolve", &KSP_Solve);
1712:   #endif

1714:   /* Problem: The stage name will not show up unless the stage executed on proc 1 */
1715:   for (stage = 0; stage < numStages; stage++) {
1716:     if (!stageVisible[stage]) continue;
1717:     /* CANNOT use MPI_Allreduce() since it might fail the line number check */
1718:     if (localStageUsed[stage]) {
1719:       PetscFPrintf(comm, fd, "\n--- Event Stage %d: %s\n\n", stage, stageInfo[stage].name);
1720:       MPI_Allreduce(&stageInfo[stage].perfInfo.time, &stageTime, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1721:       MPI_Allreduce(&stageInfo[stage].perfInfo.flops, &flops, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1722:       MPI_Allreduce(&stageInfo[stage].perfInfo.numMessages, &mess, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1723:       MPI_Allreduce(&stageInfo[stage].perfInfo.messageLength, &messLen, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1724:       MPI_Allreduce(&stageInfo[stage].perfInfo.numReductions, &red, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1725:     } else {
1726:       PetscFPrintf(comm, fd, "\n--- Event Stage %d: Unknown\n\n", stage);
1727:       MPI_Allreduce(&zero, &stageTime, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1728:       MPI_Allreduce(&zero, &flops, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1729:       MPI_Allreduce(&zero, &mess, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1730:       MPI_Allreduce(&zero, &messLen, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1731:       MPI_Allreduce(&zero, &red, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1732:     }
1733:     mess *= 0.5;
1734:     messLen *= 0.5;
1735:     red /= size;

1737:     /* Get total number of events in this stage --
1738:        Currently, a single processor can register more events than another, but events must all be registered in order,
1739:        just like stages. We can removed this requirement if necessary by having a global event numbering and indirection
1740:        on the event ID. This seems best accomplished by associating a communicator with each stage.

1742:        Problem: If the event did not happen on proc 1, its name will not be available.
1743:        Problem: Event visibility is not implemented
1744:     */
1745:     if (localStageUsed[stage]) {
1746:       eventInfo      = stageLog->stageInfo[stage].eventLog->eventInfo;
1747:       localNumEvents = stageLog->stageInfo[stage].eventLog->numEvents;
1748:     } else localNumEvents = 0;
1749:     MPI_Allreduce(&localNumEvents, &numEvents, 1, MPI_INT, MPI_MAX, comm);
1750:     for (event = 0; event < numEvents; event++) {
1751:       /* CANNOT use MPI_Allreduce() since it might fail the line number check */
1752:       if (localStageUsed[stage] && (event < stageLog->stageInfo[stage].eventLog->numEvents) && (eventInfo[event].depth == 0)) {
1753:         if ((eventInfo[event].count > 0) && (eventInfo[event].time > 0.0)) flopr = eventInfo[event].flops;
1754:         else flopr = 0.0;
1755:         MPI_Allreduce(&flopr, &minf, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm);
1756:         MPI_Allreduce(&flopr, &maxf, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm);
1757:         MPI_Allreduce(&eventInfo[event].flops, &totf, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1758:         MPI_Allreduce(&eventInfo[event].time, &mint, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm);
1759:         MPI_Allreduce(&eventInfo[event].time, &maxt, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm);
1760:         MPI_Allreduce(&eventInfo[event].time, &tott, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1761:         MPI_Allreduce(&eventInfo[event].numMessages, &totm, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1762:         MPI_Allreduce(&eventInfo[event].messageLength, &totml, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1763:         MPI_Allreduce(&eventInfo[event].numReductions, &totr, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1764:         MPI_Allreduce(&eventInfo[event].count, &minC, 1, MPI_INT, MPI_MIN, comm);
1765:         MPI_Allreduce(&eventInfo[event].count, &maxC, 1, MPI_INT, MPI_MAX, comm);
1766:         if (PetscLogMemory) {
1767:           MPI_Allreduce(&eventInfo[event].memIncrease, &mem, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1768:           MPI_Allreduce(&eventInfo[event].mallocSpace, &mal, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1769:           MPI_Allreduce(&eventInfo[event].mallocIncrease, &malmax, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1770:           MPI_Allreduce(&eventInfo[event].mallocIncreaseEvent, &emalmax, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1771:         }
1772:   #if defined(PETSC_HAVE_DEVICE)
1773:         MPI_Allreduce(&eventInfo[event].CpuToGpuCount, &cct, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1774:         MPI_Allreduce(&eventInfo[event].GpuToCpuCount, &gct, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1775:         MPI_Allreduce(&eventInfo[event].CpuToGpuSize, &csz, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1776:         MPI_Allreduce(&eventInfo[event].GpuToCpuSize, &gsz, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1777:         MPI_Allreduce(&eventInfo[event].GpuFlops, &gflops, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1778:         MPI_Allreduce(&eventInfo[event].GpuTime, &gmaxt, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm);
1779:   #endif
1780:         name = stageLog->eventLog->eventInfo[event].name;
1781:       } else {
1782:         flopr = 0.0;
1783:         MPI_Allreduce(&flopr, &minf, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm);
1784:         MPI_Allreduce(&flopr, &maxf, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm);
1785:         MPI_Allreduce(&zero, &totf, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1786:         MPI_Allreduce(&zero, &mint, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm);
1787:         MPI_Allreduce(&zero, &maxt, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm);
1788:         MPI_Allreduce(&zero, &tott, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1789:         MPI_Allreduce(&zero, &totm, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1790:         MPI_Allreduce(&zero, &totml, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1791:         MPI_Allreduce(&zero, &totr, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1792:         MPI_Allreduce(&ierr, &minC, 1, MPI_INT, MPI_MIN, comm);
1793:         MPI_Allreduce(&ierr, &maxC, 1, MPI_INT, MPI_MAX, comm);
1794:         if (PetscLogMemory) {
1795:           MPI_Allreduce(&zero, &mem, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1796:           MPI_Allreduce(&zero, &mal, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1797:           MPI_Allreduce(&zero, &malmax, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1798:           MPI_Allreduce(&zero, &emalmax, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1799:         }
1800:   #if defined(PETSC_HAVE_DEVICE)
1801:         MPI_Allreduce(&zero, &cct, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1802:         MPI_Allreduce(&zero, &gct, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1803:         MPI_Allreduce(&zero, &csz, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1804:         MPI_Allreduce(&zero, &gsz, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1805:         MPI_Allreduce(&zero, &gflops, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1806:         MPI_Allreduce(&zero, &gmaxt, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm);
1807:   #endif
1808:         name = "";
1809:       }
1810:       if (mint < 0.0) {
1811:         PetscFPrintf(comm, fd, "WARNING!!! Minimum time %g over all processors for %s is negative! This happens\n on some machines whose times cannot handle too rapid calls.!\n artificially changing minimum to zero.\n", mint, name);
1812:         mint = 0;
1813:       }
1815:   /* Put NaN into the time for all events that may not be time accurately since they may happen asynchronously on the GPU */
1816:   #if defined(PETSC_HAVE_DEVICE)
1817:       if (!PetscLogGpuTimeFlag && petsc_gflops > 0) {
1818:         memcpy(&gmaxt, &nas, sizeof(PetscLogDouble));
1819:         PetscEventRegLogGetEvent(stageLog->eventLog, name, &eventid);
1820:         if (eventid != SNES_Solve && eventid != KSP_Solve && eventid != TS_Step && eventid != TAO_Solve) {
1821:           memcpy(&mint, &nas, sizeof(PetscLogDouble));
1822:           memcpy(&maxt, &nas, sizeof(PetscLogDouble));
1823:         }
1824:       }
1825:   #endif
1826:       totm *= 0.5;
1827:       totml *= 0.5;
1828:       totr /= size;

1830:       if (maxC != 0) {
1831:         if (minC != 0) ratC = ((PetscLogDouble)maxC) / minC;
1832:         else ratC = 0.0;
1833:         if (mint != 0.0) ratt = maxt / mint;
1834:         else ratt = 0.0;
1835:         if (minf != 0.0) ratf = maxf / minf;
1836:         else ratf = 0.0;
1837:         if (TotalTime != 0.0) fracTime = tott / TotalTime;
1838:         else fracTime = 0.0;
1839:         if (TotalFlops != 0.0) fracFlops = totf / TotalFlops;
1840:         else fracFlops = 0.0;
1841:         if (stageTime != 0.0) fracStageTime = tott / stageTime;
1842:         else fracStageTime = 0.0;
1843:         if (flops != 0.0) fracStageFlops = totf / flops;
1844:         else fracStageFlops = 0.0;
1845:         if (numMessages != 0.0) fracMess = totm / numMessages;
1846:         else fracMess = 0.0;
1847:         if (messageLength != 0.0) fracMessLen = totml / messageLength;
1848:         else fracMessLen = 0.0;
1849:         if (numReductions != 0.0) fracRed = totr / numReductions;
1850:         else fracRed = 0.0;
1851:         if (mess != 0.0) fracStageMess = totm / mess;
1852:         else fracStageMess = 0.0;
1853:         if (messLen != 0.0) fracStageMessLen = totml / messLen;
1854:         else fracStageMessLen = 0.0;
1855:         if (red != 0.0) fracStageRed = totr / red;
1856:         else fracStageRed = 0.0;
1857:         if (totm != 0.0) totml /= totm;
1858:         else totml = 0.0;
1859:         if (maxt != 0.0) flopr = totf / maxt;
1860:         else flopr = 0.0;
1861:         if (fracStageTime > 1.0 || fracStageFlops > 1.0 || fracStageMess > 1.0 || fracStageMessLen > 1.0 || fracStageRed > 1.0)
1862:           PetscFPrintf(comm, fd, "%-16s %7d%4.1f %5.4e%4.1f %3.2e%4.1f %2.1e %2.1e %2.1e%3.0f%3.0f%3.0f%3.0f%3.0f Multiple stages %5.0f", name, maxC, ratC, maxt, ratt, maxf, ratf, totm, totml, totr, 100.0 * fracTime, 100.0 * fracFlops, 100.0 * fracMess, 100.0 * fracMessLen, 100.0 * fracRed, PetscAbs(flopr) / 1.0e6);
1863:         else
1864:           PetscFPrintf(comm, fd, "%-16s %7d%4.1f %5.4e%4.1f %3.2e%4.1f %2.1e %2.1e %2.1e%3.0f%3.0f%3.0f%3.0f%3.0f %3.0f%3.0f%3.0f%3.0f%3.0f %5.0f", name, maxC, ratC, maxt, ratt, maxf, ratf, totm, totml, totr, 100.0 * fracTime, 100.0 * fracFlops, 100.0 * fracMess, 100.0 * fracMessLen, 100.0 * fracRed, 100.0 * fracStageTime, 100.0 * fracStageFlops, 100.0 * fracStageMess, 100.0 * fracStageMessLen, 100.0 * fracStageRed, PetscAbs(flopr) / 1.0e6);
1865:         if (PetscLogMemory) PetscFPrintf(comm, fd, " %5.0f   %5.0f   %5.0f   %5.0f", mal / 1.0e6, emalmax / 1.0e6, malmax / 1.0e6, mem / 1.0e6);
1866:   #if defined(PETSC_HAVE_DEVICE)
1867:         if (totf != 0.0) fracgflops = gflops / totf;
1868:         else fracgflops = 0.0;
1869:         if (gmaxt != 0.0) gflopr = gflops / gmaxt;
1870:         else gflopr = 0.0;
1871:         PetscFPrintf(comm, fd, "   %5.0f   %4.0f %3.2e %4.0f %3.2e% 3.0f", PetscAbs(gflopr) / 1.0e6, cct / size, csz / (1.0e6 * size), gct / size, gsz / (1.0e6 * size), 100.0 * fracgflops);
1872:   #endif
1873:         PetscFPrintf(comm, fd, "\n");
1874:       }
1875:     }
1876:   }

1878:   /* Memory usage and object creation */
1879:   PetscFPrintf(comm, fd, "------------------------------------------------------------------------------------------------------------------------");
1880:   if (PetscLogMemory) PetscFPrintf(comm, fd, "-----------------------------");
1881:   #if defined(PETSC_HAVE_DEVICE)
1882:   PetscFPrintf(comm, fd, "---------------------------------------");
1883:   #endif
1884:   PetscFPrintf(comm, fd, "\n");
1885:   PetscFPrintf(comm, fd, "\n");

1887:   /* Right now, only stages on the first processor are reported here, meaning only objects associated with
1888:      the global communicator, or MPI_COMM_SELF for proc 1. We really should report global stats and then
1889:      stats for stages local to processor sets.
1890:   */
1891:   /* We should figure out the longest object name here (now 20 characters) */
1892:   PetscFPrintf(comm, fd, "Object Type          Creations   Destructions. Reports information only for process 0.\n");
1893:   for (stage = 0; stage < numStages; stage++) {
1894:     if (localStageUsed[stage]) {
1895:       classInfo = stageLog->stageInfo[stage].classLog->classInfo;
1896:       PetscFPrintf(comm, fd, "\n--- Event Stage %d: %s\n\n", stage, stageInfo[stage].name);
1897:       for (oclass = 0; oclass < stageLog->stageInfo[stage].classLog->numClasses; oclass++) {
1898:         if ((classInfo[oclass].creations > 0) || (classInfo[oclass].destructions > 0)) {
1899:           PetscFPrintf(comm, fd, "%20s %5d          %5d\n", stageLog->classLog->classInfo[oclass].name, classInfo[oclass].creations, classInfo[oclass].destructions);
1900:         }
1901:       }
1902:     } else {
1903:       if (!localStageVisible[stage]) continue;
1904:       PetscFPrintf(comm, fd, "\n--- Event Stage %d: Unknown\n\n", stage);
1905:     }
1906:   }

1908:   PetscFree(localStageUsed);
1909:   PetscFree(stageUsed);
1910:   PetscFree(localStageVisible);
1911:   PetscFree(stageVisible);

1913:   /* Information unrelated to this particular run */
1914:   PetscFPrintf(comm, fd, "========================================================================================================================\n");
1915:   PetscTime(&y);
1916:   PetscTime(&x);
1917:   PetscTime(&y);
1918:   PetscTime(&y);
1919:   PetscTime(&y);
1920:   PetscTime(&y);
1921:   PetscTime(&y);
1922:   PetscTime(&y);
1923:   PetscTime(&y);
1924:   PetscTime(&y);
1925:   PetscTime(&y);
1926:   PetscTime(&y);
1927:   PetscFPrintf(comm, fd, "Average time to get PetscTime(): %g\n", (y - x) / 10.0);
1928:   /* MPI information */
1929:   if (size > 1) {
1930:     MPI_Status  status;
1931:     PetscMPIInt tag;
1932:     MPI_Comm    newcomm;

1934:     MPI_Barrier(comm);
1935:     PetscTime(&x);
1936:     MPI_Barrier(comm);
1937:     MPI_Barrier(comm);
1938:     MPI_Barrier(comm);
1939:     MPI_Barrier(comm);
1940:     MPI_Barrier(comm);
1941:     PetscTime(&y);
1942:     PetscFPrintf(comm, fd, "Average time for MPI_Barrier(): %g\n", (y - x) / 5.0);
1943:     PetscCommDuplicate(comm, &newcomm, &tag);
1944:     MPI_Barrier(comm);
1945:     if (rank) {
1946:       MPI_Recv(NULL, 0, MPI_INT, rank - 1, tag, newcomm, &status);
1947:       MPI_Send(NULL, 0, MPI_INT, (rank + 1) % size, tag, newcomm);
1948:     } else {
1949:       PetscTime(&x);
1950:       MPI_Send(NULL, 0, MPI_INT, 1, tag, newcomm);
1951:       MPI_Recv(NULL, 0, MPI_INT, size - 1, tag, newcomm, &status);
1952:       PetscTime(&y);
1953:       PetscFPrintf(comm, fd, "Average time for zero size MPI_Send(): %g\n", (y - x) / size);
1954:     }
1955:     PetscCommDestroy(&newcomm);
1956:   }
1957:   PetscOptionsView(NULL, viewer);

1959:   /* Machine and compile information */
1960:   #if defined(PETSC_USE_FORTRAN_KERNELS)
1961:   PetscFPrintf(comm, fd, "Compiled with FORTRAN kernels\n");
1962:   #else
1963:   PetscFPrintf(comm, fd, "Compiled without FORTRAN kernels\n");
1964:   #endif
1965:   #if defined(PETSC_USE_64BIT_INDICES)
1966:   PetscFPrintf(comm, fd, "Compiled with 64 bit PetscInt\n");
1967:   #elif defined(PETSC_USE___FLOAT128)
1968:   PetscFPrintf(comm, fd, "Compiled with 32 bit PetscInt\n");
1969:   #endif
1970:   #if defined(PETSC_USE_REAL_SINGLE)
1971:   PetscFPrintf(comm, fd, "Compiled with single precision PetscScalar and PetscReal\n");
1972:   #elif defined(PETSC_USE___FLOAT128)
1973:   PetscFPrintf(comm, fd, "Compiled with 128 bit precision PetscScalar and PetscReal\n");
1974:   #endif
1975:   #if defined(PETSC_USE_REAL_MAT_SINGLE)
1976:   PetscFPrintf(comm, fd, "Compiled with single precision matrices\n");
1977:   #else
1978:   PetscFPrintf(comm, fd, "Compiled with full precision matrices (default)\n");
1979:   #endif
1980:   PetscFPrintf(comm, fd, "sizeof(short) %d sizeof(int) %d sizeof(long) %d sizeof(void*) %d sizeof(PetscScalar) %d sizeof(PetscInt) %d\n", (int)sizeof(short), (int)sizeof(int), (int)sizeof(long), (int)sizeof(void *), (int)sizeof(PetscScalar), (int)sizeof(PetscInt));

1982:   PetscFPrintf(comm, fd, "Configure options: %s", petscconfigureoptions);
1983:   PetscFPrintf(comm, fd, "%s", petscmachineinfo);
1984:   PetscFPrintf(comm, fd, "%s", petsccompilerinfo);
1985:   PetscFPrintf(comm, fd, "%s", petsccompilerflagsinfo);
1986:   PetscFPrintf(comm, fd, "%s", petsclinkerinfo);

1988:   /* Cleanup */
1989:   PetscFPrintf(comm, fd, "\n");
1990:   PetscLogViewWarnNoGpuAwareMpi(comm, fd);
1991:   PetscLogViewWarnDebugging(comm, fd);
1992:   PetscFPTrapPop();
1993:   return 0;
1994: }

1996: /*@C
1997:   PetscLogView - Prints a summary of the logging.

1999:   Collective over MPI_Comm

2001:   Input Parameter:
2002: .  viewer - an ASCII viewer

2004:   Options Database Keys:
2005: +  -log_view [:filename] - Prints summary of log information
2006: .  -log_view :filename.py:ascii_info_detail - Saves logging information from each process as a Python file
2007: .  -log_view :filename.xml:ascii_xml - Saves a summary of the logging information in a nested format (see below for how to view it)
2008: .  -log_view :filename.txt:ascii_flamegraph - Saves logging information in a format suitable for visualising as a Flame Graph (see below for how to view it)
2009: .  -log_view_memory - Also display memory usage in each event
2010: .  -log_view_gpu_time - Also display time in each event for GPU kernels (Note this may slow the computation)
2011: .  -log_all - Saves a file Log.rank for each MPI rank with details of each step of the computation
2012: -  -log_trace [filename] - Displays a trace of what each process is doing

2014:   Notes:
2015:   It is possible to control the logging programatically but we recommend using the options database approach whenever possible
2016:   By default the summary is printed to stdout.

2018:   Before calling this routine you must have called either PetscLogDefaultBegin() or PetscLogNestedBegin()

2020:   If PETSc is configured with --with-logging=0 then this functionality is not available

2022:   To view the nested XML format filename.xml first copy  ${PETSC_DIR}/share/petsc/xml/performance_xml2html.xsl to the current
2023:   directory then open filename.xml with your browser. Specific notes for certain browsers
2024: $    Firefox and Internet explorer - simply open the file
2025: $    Google Chrome - you must start up Chrome with the option --allow-file-access-from-files
2026: $    Safari - see https://ccm.net/faq/36342-safari-how-to-enable-local-file-access
2027:   or one can use the package http://xmlsoft.org/XSLT/xsltproc2.html to translate the xml file to html and then open it with
2028:   your browser.
2029:   Alternatively, use the script ${PETSC_DIR}/lib/petsc/bin/petsc-performance-view to automatically open a new browser
2030:   window and render the XML log file contents.

2032:   The nested XML format was kindly donated by Koos Huijssen and Christiaan M. Klaij  MARITIME  RESEARCH  INSTITUTE  NETHERLANDS

2034:   The Flame Graph output can be visualised using either the original Flame Graph script (https://github.com/brendangregg/FlameGraph)
2035:   or using speedscope (https://www.speedscope.app).
2036:   Old XML profiles may be converted into this format using the script ${PETSC_DIR}/lib/petsc/bin/xml2flamegraph.py.

2038:   Level: beginner

2040: .seealso: `PetscLogDefaultBegin()`, `PetscLogDump()`
2041: @*/
2042: PetscErrorCode PetscLogView(PetscViewer viewer)
2043: {
2044:   PetscBool         isascii;
2045:   PetscViewerFormat format;
2046:   int               stage, lastStage;
2047:   PetscStageLog     stageLog;

2050:   /* Pop off any stages the user forgot to remove */
2051:   lastStage = 0;
2052:   PetscLogGetStageLog(&stageLog);
2053:   PetscStageLogGetCurrent(stageLog, &stage);
2054:   while (stage >= 0) {
2055:     lastStage = stage;
2056:     PetscStageLogPop(stageLog);
2057:     PetscStageLogGetCurrent(stageLog, &stage);
2058:   }
2059:   PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii);
2061:   PetscViewerGetFormat(viewer, &format);
2062:   if (format == PETSC_VIEWER_DEFAULT || format == PETSC_VIEWER_ASCII_INFO) {
2063:     PetscLogView_Default(viewer);
2064:   } else if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
2065:     PetscLogView_Detailed(viewer);
2066:   } else if (format == PETSC_VIEWER_ASCII_CSV) {
2067:     PetscLogView_CSV(viewer);
2068:   } else if (format == PETSC_VIEWER_ASCII_XML) {
2069:     PetscLogView_Nested(viewer);
2070:   } else if (format == PETSC_VIEWER_ASCII_FLAMEGRAPH) {
2071:     PetscLogView_Flamegraph(viewer);
2072:   }
2073:   PetscStageLogPush(stageLog, lastStage);
2074:   return 0;
2075: }

2077: /*@C
2078:   PetscLogViewFromOptions - Processes command line options to determine if/how a `PetscLog` is to be viewed.

2080:   Collective on `PETSC_COMM_WORLD`

2082:   Level: developer

2084: .seealso: `PetscLogView()`
2085: @*/
2086: PetscErrorCode PetscLogViewFromOptions(void)
2087: {
2088:   PetscViewer       viewer;
2089:   PetscBool         flg;
2090:   PetscViewerFormat format;

2092:   PetscOptionsGetViewer(PETSC_COMM_WORLD, NULL, NULL, "-log_view", &viewer, &format, &flg);
2093:   if (flg) {
2094:     PetscViewerPushFormat(viewer, format);
2095:     PetscLogView(viewer);
2096:     PetscViewerPopFormat(viewer);
2097:     PetscViewerDestroy(&viewer);
2098:   }
2099:   return 0;
2100: }

2102: /*----------------------------------------------- Counter Functions -------------------------------------------------*/
2103: /*@C
2104:    PetscGetFlops - Returns the number of flops used on this processor
2105:    since the program began.

2107:    Not Collective

2109:    Output Parameter:
2110:    flops - number of floating point operations

2112:    Notes:
2113:    A global counter logs all PETSc flop counts.  The user can use
2114:    `PetscLogFlops()` to increment this counter to include flops for the
2115:    application code.

2117:    A separate counter `PetscLogGPUFlops()` logs the flops that occur on any GPU associated with this MPI rank

2119:    Level: intermediate

2121: .seealso: `PetscLogGPUFlops()`, `PetscTime()`, `PetscLogFlops()`
2122: @*/
2123: PetscErrorCode PetscGetFlops(PetscLogDouble *flops)
2124: {
2125:   *flops = petsc_TotalFlops;
2126:   return 0;
2127: }

2129: PetscErrorCode PetscLogObjectState(PetscObject obj, const char format[], ...)
2130: {
2131:   size_t  fullLength;
2132:   va_list Argp;

2134:   if (!petsc_logObjects) return 0;
2135:   va_start(Argp, format);
2136:   PetscVSNPrintf(petsc_objects[obj->id].info, 64, format, &fullLength, Argp);
2137:   va_end(Argp);
2138:   return 0;
2139: }

2141: /*MC
2142:    PetscLogFlops - Adds floating point operations to the global counter.

2144:    Synopsis:
2145: #include <petsclog.h>
2146:    PetscErrorCode PetscLogFlops(PetscLogDouble f)

2148:    Not Collective

2150:    Input Parameter:
2151: .  f - flop counter

2153:    Usage:
2154: .vb
2155:      PetscLogEvent USER_EVENT;
2156:      PetscLogEventRegister("User event",0,&USER_EVENT);
2157:      PetscLogEventBegin(USER_EVENT,0,0,0,0);
2158:         [code segment to monitor]
2159:         PetscLogFlops(user_flops)
2160:      PetscLogEventEnd(USER_EVENT,0,0,0,0);
2161: .ve

2163:    Note:
2164:    A global counter logs all PETSc flop counts.  The user can use
2165:    PetscLogFlops() to increment this counter to include flops for the
2166:    application code.

2168:    Level: intermediate

2170: .seealso: `PetscLogGPUFlops()`, `PetscLogEventRegister()`, `PetscLogEventBegin()`, `PetscLogEventEnd()`, `PetscGetFlops()`
2171: M*/

2173: /*MC
2174:    PetscPreLoadBegin - Begin a segment of code that may be preloaded (run twice)
2175:     to get accurate timings

2177:    Synopsis:
2178: #include <petsclog.h>
2179:    void PetscPreLoadBegin(PetscBool  flag,char *name);

2181:    Not Collective

2183:    Input Parameters:
2184: +   flag - PETSC_TRUE to run twice, `PETSC_FALSE` to run once, may be overridden
2185:            with command line option -preload true or -preload false
2186: -   name - name of first stage (lines of code timed separately with -log_view) to
2187:            be preloaded

2189:    Usage:
2190: .vb
2191:      PetscPreLoadBegin(PETSC_TRUE,"first stage);
2192:        lines of code
2193:        PetscPreLoadStage("second stage");
2194:        lines of code
2195:      PetscPreLoadEnd();
2196: .ve

2198:    Note:
2199:     Only works in C/C++, not Fortran

2201:      Flags available within the macro.
2202: +    PetscPreLoadingUsed - true if we are or have done preloading
2203: .    PetscPreLoadingOn - true if it is CURRENTLY doing preload
2204: .    PetscPreLoadIt - 0 for the first computation (with preloading turned off it is only 0) 1 for the second
2205: -    PetscPreLoadMax - number of times it will do the computation, only one when preloading is turned on
2206:      The first two variables are available throughout the program, the second two only between the PetscPreLoadBegin()
2207:      and PetscPreLoadEnd()

2209:    Level: intermediate

2211: .seealso: `PetscLogEventRegister()`, `PetscLogEventBegin()`, `PetscLogEventEnd()`, `PetscPreLoadEnd()`, `PetscPreLoadStage()`
2212: M*/

2214: /*MC
2215:    PetscPreLoadEnd - End a segment of code that may be preloaded (run twice)
2216:     to get accurate timings

2218:    Synopsis:
2219: #include <petsclog.h>
2220:    void PetscPreLoadEnd(void);

2222:    Not Collective

2224:    Usage:
2225: .vb
2226:      PetscPreLoadBegin(PETSC_TRUE,"first stage);
2227:        lines of code
2228:        PetscPreLoadStage("second stage");
2229:        lines of code
2230:      PetscPreLoadEnd();
2231: .ve

2233:    Note:
2234:     Only works in C/C++ not fortran

2236:    Level: intermediate

2238: .seealso: `PetscLogEventRegister()`, `PetscLogEventBegin()`, `PetscLogEventEnd()`, `PetscPreLoadBegin()`, `PetscPreLoadStage()`
2239: M*/

2241: /*MC
2242:    PetscPreLoadStage - Start a new segment of code to be timed separately.
2243:     to get accurate timings

2245:    Synopsis:
2246: #include <petsclog.h>
2247:    void PetscPreLoadStage(char *name);

2249:    Not Collective

2251:    Usage:
2252: .vb
2253:      PetscPreLoadBegin(PETSC_TRUE,"first stage);
2254:        lines of code
2255:        PetscPreLoadStage("second stage");
2256:        lines of code
2257:      PetscPreLoadEnd();
2258: .ve

2260:    Note:
2261:     Only works in C/C++ not fortran

2263:    Level: intermediate

2265: .seealso: `PetscLogEventRegister()`, `PetscLogEventBegin()`, `PetscLogEventEnd()`, `PetscPreLoadBegin()`, `PetscPreLoadEnd()`
2266: M*/

2268:   #if PetscDefined(HAVE_DEVICE)
2269: #include <petsc/private/deviceimpl.h>

2271: PetscBool PetscLogGpuTimeFlag = PETSC_FALSE;

2273: /*
2274:    This cannot be called by users between PetscInitialize() and PetscFinalize() at any random location in the code
2275:    because it will result in timing results that cannot be interpreted.
2276: */
2277: static PetscErrorCode PetscLogGpuTime_Off(void)
2278: {
2279:   PetscLogGpuTimeFlag = PETSC_FALSE;
2280:   return 0;
2281: }

2283: /*@C
2284:      PetscLogGpuTime - turn on the logging of GPU time for GPU kernels

2286:   Options Database Key:
2287: .   -log_view_gpu_time - provide the GPU times in the -log_view output

2289:   Notes:
2290:     Turning on the timing of the
2291:     GPU kernels can slow down the entire computation and should only be used when studying the performance
2292:     of operations on GPU such as vector operations and matrix-vector operations.

2294:     This routine should only be called once near the beginning of the program. Once it is started it cannot be turned off.

2296:    Level: advanced

2298: .seealso: `PetscLogView()`, `PetscLogGpuFlops()`, `PetscLogGpuTimeEnd()`, `PetscLogGpuTimeBegin()`
2299: @*/
2300: PetscErrorCode PetscLogGpuTime(void)
2301: {
2302:   if (!PetscLogGpuTimeFlag) PetscRegisterFinalize(PetscLogGpuTime_Off);
2303:   PetscLogGpuTimeFlag = PETSC_TRUE;
2304:   return 0;
2305: }

2307: /*@C
2308:   PetscLogGpuTimeBegin - Start timer for device

2310:   Notes:
2311:     When CUDA or HIP is enabled, the timer is run on the GPU, it is a separate logging of time devoted to GPU computations (excluding kernel launch times).

2313:     When CUDA or HIP is not available, the timer is run on the CPU, it is a separate logging of time devoted to GPU computations (including kernel launch times).

2315:     There is no need to call WaitForCUDA() or WaitForHIP() between `PetscLogGpuTimeBegin()` and `PetscLogGpuTimeEnd()`

2317:     This timer should NOT include times for data transfers between the GPU and CPU, nor setup actions such as allocating space.

2319:     The regular logging captures the time for data transfers and any CPU activites during the event

2321:     It is used to compute the flop rate on the GPU as it is actively engaged in running a kernel.

2323:   Developer Notes:
2324:     The GPU event timer captures the execution time of all the kernels launched in the default stream by the CPU between `PetscLogGpuTimeBegin()` and `PetsLogGpuTimeEnd()`.

2326:     `PetscLogGpuTimeBegin()` and `PetsLogGpuTimeEnd()` insert the begin and end events into the default stream (stream 0). The device will record a time stamp for the
2327:     event when it reaches that event in the stream. The function xxxEventSynchronize() is called in `PetsLogGpuTimeEnd()` to block CPU execution,
2328:     but not continued GPU excution, until the timer event is recorded.

2330:   Level: intermediate

2332: .seealso: `PetscLogView()`, `PetscLogGpuFlops()`, `PetscLogGpuTimeEnd()`, `PetscLogGpuTime()`
2333: @*/
2334: PetscErrorCode PetscLogGpuTimeBegin(void)
2335: {
2336:   if (!PetscLogPLB || !PetscLogGpuTimeFlag) return 0;
2337:   if (PetscDefined(HAVE_CUDA) || PetscDefined(HAVE_HIP)) {
2338:     PetscDeviceContext dctx;

2340:     PetscDeviceContextGetCurrentContext(&dctx);
2341:     PetscDeviceContextBeginTimer_Internal(dctx);
2342:   } else {
2343:     PetscTimeSubtract(&petsc_gtime);
2344:   }
2345:   return 0;
2346: }

2348: /*@C
2349:   PetscLogGpuTimeEnd - Stop timer for device

2351:   Level: intermediate

2353: .seealso: `PetscLogView()`, `PetscLogGpuFlops()`, `PetscLogGpuTimeBegin()`
2354: @*/
2355: PetscErrorCode PetscLogGpuTimeEnd(void)
2356: {
2357:   if (!PetscLogPLE || !PetscLogGpuTimeFlag) return 0;
2358:   if (PetscDefined(HAVE_CUDA) || PetscDefined(HAVE_HIP)) {
2359:     PetscDeviceContext dctx;
2360:     PetscLogDouble     elapsed;

2362:     PetscDeviceContextGetCurrentContext(&dctx);
2363:     PetscDeviceContextEndTimer_Internal(dctx, &elapsed);
2364:     petsc_gtime += (elapsed / 1000.0);
2365:   } else {
2366:     PetscTimeAdd(&petsc_gtime);
2367:   }
2368:   return 0;
2369: }
2370:   #endif /* end of PETSC_HAVE_DEVICE */

2372: #else /* end of -DPETSC_USE_LOG section */

2374: PetscErrorCode PetscLogObjectState(PetscObject obj, const char format[], ...)
2375: {
2376:   return 0;
2377: }

2379: #endif /* PETSC_USE_LOG*/

2381: PetscClassId PETSC_LARGEST_CLASSID = PETSC_SMALLEST_CLASSID;
2382: PetscClassId PETSC_OBJECT_CLASSID  = 0;

2384: /*@C
2385:   PetscClassIdRegister - Registers a new class name for objects and logging operations in an application code.

2387:   Not Collective

2389:   Input Parameter:
2390: . name   - The class name

2392:   Output Parameter:
2393: . oclass - The class id or classid

2395:   Level: developer

2397: .seealso: `PetscLogEventRegister()`
2398: @*/
2399: PetscErrorCode PetscClassIdRegister(const char name[], PetscClassId *oclass)
2400: {
2401: #if defined(PETSC_USE_LOG)
2402:   PetscStageLog stageLog;
2403:   PetscInt      stage;
2404: #endif

2406:   *oclass = ++PETSC_LARGEST_CLASSID;
2407: #if defined(PETSC_USE_LOG)
2408:   PetscLogGetStageLog(&stageLog);
2409:   PetscClassRegLogRegister(stageLog->classLog, name, *oclass);
2410:   for (stage = 0; stage < stageLog->numStages; stage++) PetscClassPerfLogEnsureSize(stageLog->stageInfo[stage].classLog, stageLog->classLog->numClasses);
2411: #endif
2412:   return 0;
2413: }

2415: #if defined(PETSC_USE_LOG) && defined(PETSC_HAVE_MPE)
2416:   #include <mpe.h>

2418: PetscBool PetscBeganMPE = PETSC_FALSE;

2420: PETSC_INTERN PetscErrorCode PetscLogEventBeginMPE(PetscLogEvent, int, PetscObject, PetscObject, PetscObject, PetscObject);
2421: PETSC_INTERN PetscErrorCode PetscLogEventEndMPE(PetscLogEvent, int, PetscObject, PetscObject, PetscObject, PetscObject);

2423: /*@C
2424:    PetscLogMPEBegin - Turns on MPE logging of events. This creates large log files
2425:    and slows the program down.

2427:    Collective over `PETSC_COMM_WORLD`

2429:    Options Database Key:
2430: . -log_mpe - Prints extensive log information

2432:    Note:
2433:    A related routine is `PetscLogDefaultBegin()` (with the options key -log_view), which is
2434:    intended for production runs since it logs only flop rates and object
2435:    creation (and should not significantly slow the programs).

2437:    Level: advanced

2439: .seealso: `PetscLogDump()`, `PetscLogDefaultBegin()`, `PetscLogAllBegin()`, `PetscLogEventActivate()`,
2440:           `PetscLogEventDeactivate()`
2441: @*/
2442: PetscErrorCode PetscLogMPEBegin(void)
2443: {
2444:   /* Do MPE initialization */
2445:   if (!MPE_Initialized_logging()) { /* This function exists in mpich 1.1.2 and higher */
2446:     PetscInfo(0, "Initializing MPE.\n");
2447:     MPE_Init_log();

2449:     PetscBeganMPE = PETSC_TRUE;
2450:   } else {
2451:     PetscInfo(0, "MPE already initialized. Not attempting to reinitialize.\n");
2452:   }
2453:   PetscLogSet(PetscLogEventBeginMPE, PetscLogEventEndMPE);
2454:   return 0;
2455: }

2457: /*@C
2458:    PetscLogMPEDump - Dumps the MPE logging info to file for later use with Jumpshot.

2460:    Collective over `PETSC_COMM_WORLD`

2462:    Level: advanced

2464: .seealso: `PetscLogDump()`, `PetscLogAllBegin()`, `PetscLogMPEBegin()`
2465: @*/
2466: PetscErrorCode PetscLogMPEDump(const char sname[])
2467: {
2468:   char name[PETSC_MAX_PATH_LEN];

2470:   if (PetscBeganMPE) {
2471:     PetscInfo(0, "Finalizing MPE.\n");
2472:     if (sname) {
2473:       PetscStrcpy(name, sname);
2474:     } else {
2475:       PetscGetProgramName(name, sizeof(name));
2476:     }
2477:     MPE_Finish_log(name);
2478:   } else {
2479:     PetscInfo(0, "Not finalizing MPE (not started by PETSc).\n");
2480:   }
2481:   return 0;
2482: }

2484:   #define PETSC_RGB_COLORS_MAX 39
2485: static const char *PetscLogMPERGBColors[PETSC_RGB_COLORS_MAX] = {"OliveDrab:      ", "BlueViolet:     ", "CadetBlue:      ", "CornflowerBlue: ", "DarkGoldenrod:  ", "DarkGreen:      ", "DarkKhaki:      ", "DarkOliveGreen: ",
2486:                                                                  "DarkOrange:     ", "DarkOrchid:     ", "DarkSeaGreen:   ", "DarkSlateGray:  ", "DarkTurquoise:  ", "DeepPink:       ", "DarkKhaki:      ", "DimGray:        ",
2487:                                                                  "DodgerBlue:     ", "GreenYellow:    ", "HotPink:        ", "IndianRed:      ", "LavenderBlush:  ", "LawnGreen:      ", "LemonChiffon:   ", "LightCoral:     ",
2488:                                                                  "LightCyan:      ", "LightPink:      ", "LightSalmon:    ", "LightSlateGray: ", "LightYellow:    ", "LimeGreen:      ", "MediumPurple:   ", "MediumSeaGreen: ",
2489:                                                                  "MediumSlateBlue:", "MidnightBlue:   ", "MintCream:      ", "MistyRose:      ", "NavajoWhite:    ", "NavyBlue:       ", "OliveDrab:      "};

2491: /*@C
2492:   PetscLogMPEGetRGBColor - This routine returns a rgb color useable with `PetscLogEventRegister()`

2494:   Not collective. Maybe it should be?

2496:   Output Parameter:
2497: . str - character string representing the color

2499:   Level: developer

2501: .seealso: `PetscLogEventRegister()`
2502: @*/
2503: PetscErrorCode PetscLogMPEGetRGBColor(const char *str[])
2504: {
2505:   static int idx = 0;

2507:   *str = PetscLogMPERGBColors[idx];
2508:   idx  = (idx + 1) % PETSC_RGB_COLORS_MAX;
2509:   return 0;
2510: }

2512: #endif /* PETSC_USE_LOG && PETSC_HAVE_MPE */