Actual source code: sfimpl.h
1: #if !defined(SFIMPL_H)
2: #define SFIMPL_H
4: #include <petscvec.h>
5: #include <petscsf.h>
6: #include <petsc/private/deviceimpl.h>
7: #include <petsc/private/mpiutils.h>
8: #include <petsc/private/petscimpl.h>
10: PETSC_EXTERN PetscLogEvent PETSCSF_SetGraph;
11: PETSC_EXTERN PetscLogEvent PETSCSF_SetUp;
12: PETSC_EXTERN PetscLogEvent PETSCSF_BcastBegin;
13: PETSC_EXTERN PetscLogEvent PETSCSF_BcastEnd;
14: PETSC_EXTERN PetscLogEvent PETSCSF_BcastBegin;
15: PETSC_EXTERN PetscLogEvent PETSCSF_BcastEnd;
16: PETSC_EXTERN PetscLogEvent PETSCSF_ReduceBegin;
17: PETSC_EXTERN PetscLogEvent PETSCSF_ReduceEnd;
18: PETSC_EXTERN PetscLogEvent PETSCSF_FetchAndOpBegin;
19: PETSC_EXTERN PetscLogEvent PETSCSF_FetchAndOpEnd;
20: PETSC_EXTERN PetscLogEvent PETSCSF_EmbedSF;
21: PETSC_EXTERN PetscLogEvent PETSCSF_DistSect;
22: PETSC_EXTERN PetscLogEvent PETSCSF_SectSF;
23: PETSC_EXTERN PetscLogEvent PETSCSF_RemoteOff;
24: PETSC_EXTERN PetscLogEvent PETSCSF_Pack;
25: PETSC_EXTERN PetscLogEvent PETSCSF_Unpack;
27: typedef enum {PETSCSF_../../..2LEAF=0, PETSCSF_LEAF2../../..} PetscSFDirection;
28: typedef enum {PETSCSF_BCAST=0, PETSCSF_REDUCE, PETSCSF_FETCH} PetscSFOperation;
29: /* When doing device-aware MPI, a backend refers to the SF/device interface */
30: typedef enum {PETSCSF_BACKEND_INVALID=0,PETSCSF_BACKEND_CUDA,PETSCSF_BACKEND_HIP,PETSCSF_BACKEND_KOKKOS} PetscSFBackend;
32: struct _PetscSFOps {
33: PetscErrorCode (*Reset)(PetscSF);
34: PetscErrorCode (*Destroy)(PetscSF);
35: PetscErrorCode (*SetUp)(PetscSF);
36: PetscErrorCode (*SetFromOptions)(PetscOptionItems*,PetscSF);
37: PetscErrorCode (*View)(PetscSF,PetscViewer);
38: PetscErrorCode (*Duplicate)(PetscSF,PetscSFDuplicateOption,PetscSF);
39: PetscErrorCode (*BcastBegin) (PetscSF,MPI_Datatype,PetscMemType,const void*,PetscMemType,void*,MPI_Op);
40: PetscErrorCode (*BcastEnd) (PetscSF,MPI_Datatype,const void*,void*,MPI_Op);
41: PetscErrorCode (*ReduceBegin) (PetscSF,MPI_Datatype,PetscMemType,const void*,PetscMemType,void*,MPI_Op);
42: PetscErrorCode (*ReduceEnd) (PetscSF,MPI_Datatype,const void*,void*,MPI_Op);
43: PetscErrorCode (*FetchAndOpBegin)(PetscSF,MPI_Datatype,PetscMemType,void*,PetscMemType,const void*,void*,MPI_Op);
44: PetscErrorCode (*FetchAndOpEnd) (PetscSF,MPI_Datatype,void*,const void*,void*,MPI_Op);
45: PetscErrorCode (*BcastToZero) (PetscSF,MPI_Datatype,PetscMemType,const void*,PetscMemType, void*); /* For interal use only */
46: PetscErrorCode (*GetRootRanks)(PetscSF,PetscInt*,const PetscMPIInt**,const PetscInt**,const PetscInt**,const PetscInt**);
47: PetscErrorCode (*GetLeafRanks)(PetscSF,PetscInt*,const PetscMPIInt**,const PetscInt**,const PetscInt**);
48: PetscErrorCode (*CreateLocalSF)(PetscSF,PetscSF*);
49: PetscErrorCode (*GetGraph)(PetscSF,PetscInt*,PetscInt*,const PetscInt**,const PetscSFNode**);
50: PetscErrorCode (*CreateEmbeddedRootSF)(PetscSF,PetscInt,const PetscInt*,PetscSF*);
51: PetscErrorCode (*CreateEmbeddedLeafSF)(PetscSF,PetscInt,const PetscInt*,PetscSF*);
53: PetscErrorCode (*Malloc)(PetscMemType,size_t,void**);
54: PetscErrorCode (*Free)(PetscMemType,void*);
55: };
57: typedef struct _n_PetscSFPackOpt *PetscSFPackOpt;
59: struct _p_PetscSF {
60: PETSCHEADER(struct _PetscSFOps);
61: struct { /* Fields needed to implement VecScatter behavior */
62: PetscInt from_n,to_n; /* Recorded local sizes of the input from/to vectors in VecScatterCreate(). Used subsequently for error checking. */
63: PetscBool beginandendtogether; /* Indicates that the scatter begin and end function are called together, VecScatterEnd() is then treated as a nop */
64: const PetscScalar *xdata; /* Vector data to read from */
65: PetscScalar *ydata; /* Vector data to write to. The two pointers are recorded in VecScatterBegin. Memory is not managed by SF. */
66: PetscSF lsf; /* The local part of the scatter, used in SCATTER_LOCAL. Built on demand. */
67: PetscInt bs; /* Block size, determined by IS passed to VecScatterCreate */
68: MPI_Datatype unit; /* one unit = bs PetscScalars */
69: PetscBool logging; /* Indicate if vscat log events are happening. If yes, avoid duplicated SF logging to have clear -log_view */
70: } vscat;
72: /* Fields for generic PetscSF functionality */
73: PetscInt nroots; /* Number of root vertices on current process (candidates for incoming edges) */
74: PetscInt nleaves; /* Number of leaf vertices on current process (this process specifies a root for each leaf) */
75: PetscInt *mine; /* Location of leaves in leafdata arrays provided to the communication routines */
76: PetscInt *mine_alloc;
77: PetscInt minleaf,maxleaf;
78: PetscSFNode *remote; /* Remote references to roots for each local leaf */
79: PetscSFNode *remote_alloc;
80: PetscInt nranks; /* Number of ranks owning roots connected to my leaves */
81: PetscInt ndranks; /* Number of ranks in distinguished group holding roots connected to my leaves */
82: PetscMPIInt *ranks; /* List of ranks referenced by "remote" */
83: PetscInt *roffset; /* Array of length nranks+1, offset in rmine/rremote for each rank */
84: PetscInt *rmine; /* Concatenated array holding local indices referencing each remote rank */
85: PetscInt *rmine_d[2]; /* A copy of rmine[local/remote] in device memory if needed */
87: /* Some results useful in packing by analyzing rmine[] */
88: PetscInt leafbuflen[2]; /* Length (in unit) of leaf buffers, in layout of [PETSCSF_LOCAL/REMOTE] */
89: PetscBool leafcontig[2]; /* True means indices in rmine[self part] or rmine[remote part] are contiguous, and they start from ... */
90: PetscInt leafstart[2]; /* ... leafstart[0] and leafstart[1] respectively */
91: PetscSFPackOpt leafpackopt[2]; /* Optimization plans to (un)pack leaves connected to remote roots, based on index patterns in rmine[]. NULL for no optimization */
92: PetscSFPackOpt leafpackopt_d[2];/* Copy of leafpackopt_d[] on device if needed */
93: PetscBool leafdups[2]; /* Indices in rmine[] for self(0)/remote(1) communication have dups respectively? TRUE implies theads working on them in parallel may have data race. */
95: PetscInt nleafreqs; /* Number of MPI reqests for leaves */
96: PetscInt *rremote; /* Concatenated array holding remote indices referenced for each remote rank */
97: PetscBool degreeknown; /* The degree is currently known, do not have to recompute */
98: PetscInt *degree; /* Degree of each of my root vertices */
99: PetscInt *degreetmp; /* Temporary local array for computing degree */
100: PetscBool rankorder; /* Sort ranks for gather and scatter operations */
101: MPI_Group ingroup; /* Group of processes connected to my roots */
102: MPI_Group outgroup; /* Group of processes connected to my leaves */
103: PetscSF multi; /* Internal graph used to implement gather and scatter operations */
104: PetscBool graphset; /* Flag indicating that the graph has been set, required before calling communication routines */
105: PetscBool setupcalled; /* Type and communication structures have been set up */
106: PetscSFPattern pattern; /* Pattern of the graph */
107: PetscBool persistent; /* Does this SF use MPI persistent requests for communication */
108: PetscLayout map; /* Layout of leaves over all processes when building a patterned graph */
109: PetscBool unknown_input_stream;/* If true, SF does not know which streams root/leafdata is on. Default is false, since we only use petsc default stream */
110: PetscBool use_gpu_aware_mpi; /* If true, SF assumes it can pass GPU pointers to MPI */
111: PetscBool use_stream_aware_mpi;/* If true, SF assumes the underlying MPI is cuda-stream aware and we won't sync streams for send/recv buffers passed to MPI */
112: PetscInt maxResidentThreadsPerGPU;
113: PetscBool allow_multi_leaves;
114: PetscSFBackend backend; /* The device backend (if any) SF will use */
115: void *data; /* Pointer to implementation */
117: #if defined(PETSC_HAVE_NVSHMEM)
118: PetscBool use_nvshmem; /* TRY to use nvshmem on cuda devices with this SF when possible */
119: PetscBool use_nvshmem_get; /* If true, use nvshmem_get based protocal, otherwise, use nvshmem_put based protocol */
120: PetscBool checked_nvshmem_eligibility; /* Have we checked eligibility of using NVSHMEM on this sf? */
121: PetscBool setup_nvshmem; /* Have we already set up NVSHMEM related fields below? These fields are built on-demand */
122: PetscInt leafbuflen_rmax; /* max leafbuflen[REMOTE] over comm */
123: PetscInt nRemoteRootRanks;/* nranks - ndranks */
124: PetscInt nRemoteRootRanksMax; /* max nranks-ndranks over comm */
126: /* The following two fields look confusing but actually make sense: They are offsets of buffers at the remote side. We're doing one-sided communication! */
127: PetscInt *rootsigdisp; /* [nRemoteRootRanks]. For my i-th remote root rank, I will access its rootsigdisp[i]-th root signal */
128: PetscInt *rootbufdisp; /* [nRemoteRootRanks]. For my i-th remote root rank, I will access its root buf at offset rootbufdisp[i], in <unit> to be set */
130: PetscInt *rootbufdisp_d;
131: PetscInt *rootsigdisp_d; /* Copy of rootsigdisp[] on device */
132: PetscMPIInt *ranks_d; /* Copy of the remote part of (root) ranks[] on device */
133: PetscInt *roffset_d; /* Copy of the remote part of roffset[] on device */
134: #endif
135: };
137: PETSC_EXTERN PetscBool PetscSFRegisterAllCalled;
138: PETSC_EXTERN PetscErrorCode PetscSFRegisterAll(void);
140: PETSC_INTERN PetscErrorCode PetscSFCreateLocalSF_Private(PetscSF,PetscSF*);
141: PETSC_INTERN PetscErrorCode PetscSFBcastToZero_Private(PetscSF,MPI_Datatype,const void*,void*);
143: PETSC_EXTERN PetscErrorCode MPIPetsc_Type_unwrap(MPI_Datatype,MPI_Datatype*,PetscBool*);
144: PETSC_EXTERN PetscErrorCode MPIPetsc_Type_compare(MPI_Datatype,MPI_Datatype,PetscBool*);
145: PETSC_EXTERN PetscErrorCode MPIPetsc_Type_compare_contig(MPI_Datatype,MPI_Datatype,PetscInt*);
147: #if defined(PETSC_HAVE_MPI_NONBLOCKING_COLLECTIVES)
148: #define MPIU_Iscatter(a,b,c,d,e,f,g,h,req) MPI_Iscatter(a,b,c,d,e,f,g,h,req)
149: #define MPIU_Iscatterv(a,b,c,d,e,f,g,h,i,req) MPI_Iscatterv(a,b,c,d,e,f,g,h,i,req)
150: #define MPIU_Igather(a,b,c,d,e,f,g,h,req) MPI_Igather(a,b,c,d,e,f,g,h,req)
151: #define MPIU_Igatherv(a,b,c,d,e,f,g,h,i,req) MPI_Igatherv(a,b,c,d,e,f,g,h,i,req)
152: #define MPIU_Iallgather(a,b,c,d,e,f,g,req) MPI_Iallgather(a,b,c,d,e,f,g,req)
153: #define MPIU_Iallgatherv(a,b,c,d,e,f,g,h,req) MPI_Iallgatherv(a,b,c,d,e,f,g,h,req)
154: #define MPIU_Ialltoall(a,b,c,d,e,f,g,req) MPI_Ialltoall(a,b,c,d,e,f,g,req)
155: #else
156: /* Ignore req, the MPI_Request argument, and use MPI blocking collectives. One should initialize req
157: to MPI_REQUEST_NULL so that one can do MPI_Wait(req,status) no matter the call is blocking or not.
158: */
159: #define MPIU_Iscatter(a,b,c,d,e,f,g,h,req) MPI_Scatter(a,b,c,d,e,f,g,h)
160: #define MPIU_Iscatterv(a,b,c,d,e,f,g,h,i,req) MPI_Scatterv(a,b,c,d,e,f,g,h,i)
161: #define MPIU_Igather(a,b,c,d,e,f,g,h,req) MPI_Gather(a,b,c,d,e,f,g,h)
162: #define MPIU_Igatherv(a,b,c,d,e,f,g,h,i,req) MPI_Gatherv(a,b,c,d,e,f,g,h,i)
163: #define MPIU_Iallgather(a,b,c,d,e,f,g,req) MPI_Allgather(a,b,c,d,e,f,g)
164: #define MPIU_Iallgatherv(a,b,c,d,e,f,g,h,req) MPI_Allgatherv(a,b,c,d,e,f,g,h)
165: #define MPIU_Ialltoall(a,b,c,d,e,f,g,req) MPI_Alltoall(a,b,c,d,e,f,g)
166: #endif
168: PETSC_EXTERN PetscErrorCode VecScatterGetRemoteCount_Private(VecScatter,PetscBool,PetscInt*,PetscInt*);
169: PETSC_EXTERN PetscErrorCode VecScatterGetRemote_Private(VecScatter,PetscBool,PetscInt*,const PetscInt**,const PetscInt**,const PetscMPIInt**,PetscInt*);
170: PETSC_EXTERN PetscErrorCode VecScatterGetRemoteOrdered_Private(VecScatter,PetscBool,PetscInt*,const PetscInt**,const PetscInt**,const PetscMPIInt**,PetscInt*);
171: PETSC_EXTERN PetscErrorCode VecScatterRestoreRemote_Private(VecScatter,PetscBool,PetscInt*,const PetscInt**,const PetscInt**,const PetscMPIInt**,PetscInt*);
172: PETSC_EXTERN PetscErrorCode VecScatterRestoreRemoteOrdered_Private(VecScatter,PetscBool,PetscInt*,const PetscInt**,const PetscInt**,const PetscMPIInt**,PetscInt*);
174: #if defined(PETSC_HAVE_CUDA)
175: PETSC_EXTERN PetscErrorCode PetscSFMalloc_CUDA(PetscMemType,size_t,void**);
176: PETSC_EXTERN PetscErrorCode PetscSFFree_CUDA(PetscMemType,void*);
177: #endif
178: #if defined(PETSC_HAVE_HIP)
179: PETSC_EXTERN PetscErrorCode PetscSFMalloc_HIP(PetscMemType,size_t,void**);
180: PETSC_EXTERN PetscErrorCode PetscSFFree_HIP(PetscMemType,void*);
181: #endif
182: #if defined(PETSC_HAVE_KOKKOS)
183: PETSC_EXTERN PetscErrorCode PetscSFMalloc_Kokkos(PetscMemType,size_t,void**);
184: PETSC_EXTERN PetscErrorCode PetscSFFree_Kokkos(PetscMemType,void*);
185: #endif
187: /* SF only supports CUDA and Kokkos devices. Even VIENNACL is a device, its device pointers are invisible to SF.
188: Through VecGetArray(), we copy data of VECVIENNACL from device to host and pass host pointers to SF.
189: */
190: #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_KOKKOS) || defined(PETSC_HAVE_HIP)
191: #define PetscSFMalloc(sf,mtype,sz,ptr) ((*(sf)->ops->Malloc)(mtype,sz,ptr))
192: /* Free memory and set ptr to NULL when succeeded */
193: #define PetscSFFree(sf,mtype,ptr) ((ptr) && ((*(sf)->ops->Free)(mtype,ptr) || ((ptr)=NULL,0)))
194: #else
195: /* If pure host code, do with less indirection */
196: #define PetscSFMalloc(sf,mtype,sz,ptr) PetscMalloc(sz,ptr)
197: #define PetscSFFree(sf,mtype,ptr) PetscFree(ptr)
198: #endif
200: #endif