Actual source code: baijfact3.c
2: /*
3: Factorization code for BAIJ format.
4: */
5: #include <../src/mat/impls/baij/seq/baij.h>
6: #include <petsc/private/kernels/blockinvert.h>
8: /*
9: This is used to set the numeric factorization for both LU and ILU symbolic factorization
10: */
11: PetscErrorCode MatSeqBAIJSetNumericFactorization(Mat fact,PetscBool natural)
12: {
13: if (natural) {
14: switch (fact->rmap->bs) {
15: case 1:
16: fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1;
17: break;
18: case 2:
19: fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2_NaturalOrdering;
20: break;
21: case 3:
22: fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3_NaturalOrdering;
23: break;
24: case 4:
25: fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering;
26: break;
27: case 5:
28: fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5_NaturalOrdering;
29: break;
30: case 6:
31: fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6_NaturalOrdering;
32: break;
33: case 7:
34: fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7_NaturalOrdering;
35: break;
36: case 9:
37: #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(__FMA__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES)
38: fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_9_NaturalOrdering;
39: #else
40: fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N;
41: #endif
42: break;
43: case 15:
44: fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_15_NaturalOrdering;
45: break;
46: default:
47: fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N;
48: break;
49: }
50: } else {
51: switch (fact->rmap->bs) {
52: case 1:
53: fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1;
54: break;
55: case 2:
56: fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2;
57: break;
58: case 3:
59: fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3;
60: break;
61: case 4:
62: fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4;
63: break;
64: case 5:
65: fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5;
66: break;
67: case 6:
68: fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6;
69: break;
70: case 7:
71: fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7;
72: break;
73: default:
74: fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N;
75: break;
76: }
77: }
78: return 0;
79: }
81: PetscErrorCode MatSeqBAIJSetNumericFactorization_inplace(Mat inA,PetscBool natural)
82: {
83: if (natural) {
84: switch (inA->rmap->bs) {
85: case 1:
86: inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1_inplace;
87: break;
88: case 2:
89: inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2_NaturalOrdering_inplace;
90: break;
91: case 3:
92: inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3_NaturalOrdering_inplace;
93: break;
94: case 4:
95: #if defined(PETSC_USE_REAL_MAT_SINGLE)
96: {
97: PetscBool sse_enabled_local;
98: PetscSSEIsEnabled(inA->comm,&sse_enabled_local,NULL);
99: if (sse_enabled_local) {
100: # if defined(PETSC_HAVE_SSE)
101: int i,*AJ=a->j,nz=a->nz,n=a->mbs;
102: if (n==(unsigned short)n) {
103: unsigned short *aj=(unsigned short*)AJ;
104: for (i=0; i<nz; i++) aj[i] = (unsigned short)AJ[i];
106: inA->ops->setunfactored = MatSetUnfactored_SeqBAIJ_4_NaturalOrdering_SSE_usj;
107: inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering_SSE_usj;
109: PetscInfo(inA,"Using special SSE, in-place natural ordering, ushort j index factor BS=4\n");
110: } else {
111: /* Scale the column indices for easier indexing in MatSolve. */
112: /* for (i=0;i<nz;i++) { */
113: /* AJ[i] = AJ[i]*4; */
114: /* } */
115: inA->ops->setunfactored = MatSetUnfactored_SeqBAIJ_4_NaturalOrdering_SSE;
116: inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering_SSE;
118: PetscInfo(inA,"Using special SSE, in-place natural ordering, int j index factor BS=4\n");
119: }
120: # else
121: /* This should never be reached. If so, problem in PetscSSEIsEnabled. */
122: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSE Hardware unavailable");
123: # endif
124: } else {
125: inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering_inplace;
126: }
127: }
128: #else
129: inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering_inplace;
130: #endif
131: break;
132: case 5:
133: inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5_NaturalOrdering_inplace;
134: break;
135: case 6:
136: inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6_NaturalOrdering_inplace;
137: break;
138: case 7:
139: inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7_NaturalOrdering_inplace;
140: break;
141: default:
142: inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N_inplace;
143: break;
144: }
145: } else {
146: switch (inA->rmap->bs) {
147: case 1:
148: inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1_inplace;
149: break;
150: case 2:
151: inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2_inplace;
152: break;
153: case 3:
154: inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3_inplace;
155: break;
156: case 4:
157: inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_inplace;
158: break;
159: case 5:
160: inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5_inplace;
161: break;
162: case 6:
163: inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6_inplace;
164: break;
165: case 7:
166: inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7_inplace;
167: break;
168: default:
169: inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N_inplace;
170: break;
171: }
172: }
173: return 0;
174: }
176: /*
177: The symbolic factorization code is identical to that for AIJ format,
178: except for very small changes since this is now a SeqBAIJ datastructure.
179: NOT good code reuse.
180: */
181: #include <petscbt.h>
182: #include <../src/mat/utils/freespace.h>
184: PetscErrorCode MatLUFactorSymbolic_SeqBAIJ(Mat B,Mat A,IS isrow,IS iscol,const MatFactorInfo *info)
185: {
186: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data,*b;
187: PetscInt n =a->mbs,bs = A->rmap->bs,bs2=a->bs2;
188: PetscBool row_identity,col_identity,both_identity;
189: IS isicol;
190: const PetscInt *r,*ic;
191: PetscInt i,*ai=a->i,*aj=a->j;
192: PetscInt *bi,*bj,*ajtmp;
193: PetscInt *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im;
194: PetscReal f;
195: PetscInt nlnk,*lnk,k,**bi_ptr;
196: PetscFreeSpaceList free_space=NULL,current_space=NULL;
197: PetscBT lnkbt;
198: PetscBool missing;
201: MatMissingDiagonal(A,&missing,&i);
204: if (bs>1) { /* check shifttype */
206: }
208: ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);
209: ISGetIndices(isrow,&r);
210: ISGetIndices(isicol,&ic);
212: /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */
213: PetscMalloc1(n+1,&bi);
214: PetscMalloc1(n+1,&bdiag);
215: bi[0] = bdiag[0] = 0;
217: /* linked list for storing column indices of the active row */
218: nlnk = n + 1;
219: PetscLLCreate(n,n,nlnk,lnk,lnkbt);
221: PetscMalloc2(n+1,&bi_ptr,n+1,&im);
223: /* initial FreeSpace size is f*(ai[n]+1) */
224: f = info->fill;
225: PetscFreeSpaceGet(PetscRealIntMultTruncate(f,ai[n]+1),&free_space);
227: current_space = free_space;
229: for (i=0; i<n; i++) {
230: /* copy previous fill into linked list */
231: nzi = 0;
232: nnz = ai[r[i]+1] - ai[r[i]];
233: ajtmp = aj + ai[r[i]];
234: PetscLLAddPerm(nnz,ajtmp,ic,n,&nlnk,lnk,lnkbt);
235: nzi += nlnk;
237: /* add pivot rows into linked list */
238: row = lnk[n];
239: while (row < i) {
240: nzbd = bdiag[row] + 1; /* num of entries in the row with column index <= row */
241: ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */
242: PetscLLAddSortedLU(ajtmp,row,&nlnk,lnk,lnkbt,i,nzbd,im);
243: nzi += nlnk;
244: row = lnk[row];
245: }
246: bi[i+1] = bi[i] + nzi;
247: im[i] = nzi;
249: /* mark bdiag */
250: nzbd = 0;
251: nnz = nzi;
252: k = lnk[n];
253: while (nnz-- && k < i) {
254: nzbd++;
255: k = lnk[k];
256: }
257: bdiag[i] = nzbd; /* note : bdaig[i] = nnzL as input for PetscFreeSpaceContiguous_LU() */
259: /* if free space is not available, make more free space */
260: if (current_space->local_remaining<nzi) {
261: nnz = PetscIntMultTruncate(2,PetscIntMultTruncate(n - i,nzi)); /* estimated and max additional space needed */
262: PetscFreeSpaceGet(nnz,¤t_space);
263: reallocs++;
264: }
266: /* copy data into free space, then initialize lnk */
267: PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);
269: bi_ptr[i] = current_space->array;
270: current_space->array += nzi;
271: current_space->local_used += nzi;
272: current_space->local_remaining -= nzi;
273: }
275: ISRestoreIndices(isrow,&r);
276: ISRestoreIndices(isicol,&ic);
278: /* copy free_space into bj and free free_space; set bi, bj, bdiag in new datastructure; */
279: PetscMalloc1(bi[n]+1,&bj);
280: PetscFreeSpaceContiguous_LU(&free_space,bj,n,bi,bdiag);
281: PetscLLDestroy(lnk,lnkbt);
282: PetscFree2(bi_ptr,im);
284: /* put together the new matrix */
285: MatSeqBAIJSetPreallocation(B,bs,MAT_SKIP_ALLOCATION,NULL);
286: PetscLogObjectParent((PetscObject)B,(PetscObject)isicol);
287: b = (Mat_SeqBAIJ*)(B)->data;
289: b->free_a = PETSC_TRUE;
290: b->free_ij = PETSC_TRUE;
291: b->singlemalloc = PETSC_FALSE;
293: PetscMalloc1((bdiag[0]+1)*bs2,&b->a);
294: b->j = bj;
295: b->i = bi;
296: b->diag = bdiag;
297: b->free_diag = PETSC_TRUE;
298: b->ilen = NULL;
299: b->imax = NULL;
300: b->row = isrow;
301: b->col = iscol;
302: b->pivotinblocks = (info->pivotinblocks) ? PETSC_TRUE : PETSC_FALSE;
304: PetscObjectReference((PetscObject)isrow);
305: PetscObjectReference((PetscObject)iscol);
306: b->icol = isicol;
307: PetscMalloc1(bs*n+bs,&b->solve_work);
308: PetscLogObjectMemory((PetscObject)B,(bdiag[0]+1)*(sizeof(PetscInt)+sizeof(PetscScalar)*bs2));
310: b->maxnz = b->nz = bdiag[0]+1;
312: B->factortype = MAT_FACTOR_LU;
313: B->info.factor_mallocs = reallocs;
314: B->info.fill_ratio_given = f;
316: if (ai[n] != 0) {
317: B->info.fill_ratio_needed = ((PetscReal)(bdiag[0]+1))/((PetscReal)ai[n]);
318: } else {
319: B->info.fill_ratio_needed = 0.0;
320: }
321: #if defined(PETSC_USE_INFO)
322: if (ai[n] != 0) {
323: PetscReal af = B->info.fill_ratio_needed;
324: PetscInfo(A,"Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n",reallocs,(double)f,(double)af);
325: PetscInfo(A,"Run with -pc_factor_fill %g or use \n",(double)af);
326: PetscInfo(A,"PCFactorSetFill(pc,%g);\n",(double)af);
327: PetscInfo(A,"for best performance.\n");
328: } else {
329: PetscInfo(A,"Empty matrix\n");
330: }
331: #endif
333: ISIdentity(isrow,&row_identity);
334: ISIdentity(iscol,&col_identity);
336: both_identity = (PetscBool) (row_identity && col_identity);
338: MatSeqBAIJSetNumericFactorization(B,both_identity);
339: return 0;
340: }
342: PetscErrorCode MatLUFactorSymbolic_SeqBAIJ_inplace(Mat B,Mat A,IS isrow,IS iscol,const MatFactorInfo *info)
343: {
344: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data,*b;
345: PetscInt n =a->mbs,bs = A->rmap->bs,bs2=a->bs2;
346: PetscBool row_identity,col_identity,both_identity;
347: IS isicol;
348: const PetscInt *r,*ic;
349: PetscInt i,*ai=a->i,*aj=a->j;
350: PetscInt *bi,*bj,*ajtmp;
351: PetscInt *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im;
352: PetscReal f;
353: PetscInt nlnk,*lnk,k,**bi_ptr;
354: PetscFreeSpaceList free_space=NULL,current_space=NULL;
355: PetscBT lnkbt;
356: PetscBool missing;
359: MatMissingDiagonal(A,&missing,&i);
362: ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);
363: ISGetIndices(isrow,&r);
364: ISGetIndices(isicol,&ic);
366: /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */
367: PetscMalloc1(n+1,&bi);
368: PetscMalloc1(n+1,&bdiag);
370: bi[0] = bdiag[0] = 0;
372: /* linked list for storing column indices of the active row */
373: nlnk = n + 1;
374: PetscLLCreate(n,n,nlnk,lnk,lnkbt);
376: PetscMalloc2(n+1,&bi_ptr,n+1,&im);
378: /* initial FreeSpace size is f*(ai[n]+1) */
379: f = info->fill;
380: PetscFreeSpaceGet(PetscRealIntMultTruncate(f,ai[n]+1),&free_space);
381: current_space = free_space;
383: for (i=0; i<n; i++) {
384: /* copy previous fill into linked list */
385: nzi = 0;
386: nnz = ai[r[i]+1] - ai[r[i]];
387: ajtmp = aj + ai[r[i]];
388: PetscLLAddPerm(nnz,ajtmp,ic,n,&nlnk,lnk,lnkbt);
389: nzi += nlnk;
391: /* add pivot rows into linked list */
392: row = lnk[n];
393: while (row < i) {
394: nzbd = bdiag[row] - bi[row] + 1; /* num of entries in the row with column index <= row */
395: ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */
396: PetscLLAddSortedLU(ajtmp,row,&nlnk,lnk,lnkbt,i,nzbd,im);
397: nzi += nlnk;
398: row = lnk[row];
399: }
400: bi[i+1] = bi[i] + nzi;
401: im[i] = nzi;
403: /* mark bdiag */
404: nzbd = 0;
405: nnz = nzi;
406: k = lnk[n];
407: while (nnz-- && k < i) {
408: nzbd++;
409: k = lnk[k];
410: }
411: bdiag[i] = bi[i] + nzbd;
413: /* if free space is not available, make more free space */
414: if (current_space->local_remaining<nzi) {
415: nnz = PetscIntMultTruncate(n - i,nzi); /* estimated and max additional space needed */
416: PetscFreeSpaceGet(nnz,¤t_space);
417: reallocs++;
418: }
420: /* copy data into free space, then initialize lnk */
421: PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);
423: bi_ptr[i] = current_space->array;
424: current_space->array += nzi;
425: current_space->local_used += nzi;
426: current_space->local_remaining -= nzi;
427: }
428: #if defined(PETSC_USE_INFO)
429: if (ai[n] != 0) {
430: PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]);
431: PetscInfo(A,"Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n",reallocs,(double)f,(double)af);
432: PetscInfo(A,"Run with -pc_factor_fill %g or use \n",(double)af);
433: PetscInfo(A,"PCFactorSetFill(pc,%g);\n",(double)af);
434: PetscInfo(A,"for best performance.\n");
435: } else {
436: PetscInfo(A,"Empty matrix\n");
437: }
438: #endif
440: ISRestoreIndices(isrow,&r);
441: ISRestoreIndices(isicol,&ic);
443: /* destroy list of free space and other temporary array(s) */
444: PetscMalloc1(bi[n]+1,&bj);
445: PetscFreeSpaceContiguous(&free_space,bj);
446: PetscLLDestroy(lnk,lnkbt);
447: PetscFree2(bi_ptr,im);
449: /* put together the new matrix */
450: MatSeqBAIJSetPreallocation(B,bs,MAT_SKIP_ALLOCATION,NULL);
451: PetscLogObjectParent((PetscObject)B,(PetscObject)isicol);
452: b = (Mat_SeqBAIJ*)(B)->data;
454: b->free_a = PETSC_TRUE;
455: b->free_ij = PETSC_TRUE;
456: b->singlemalloc = PETSC_FALSE;
458: PetscMalloc1((bi[n]+1)*bs2,&b->a);
459: b->j = bj;
460: b->i = bi;
461: b->diag = bdiag;
462: b->free_diag = PETSC_TRUE;
463: b->ilen = NULL;
464: b->imax = NULL;
465: b->row = isrow;
466: b->col = iscol;
467: b->pivotinblocks = (info->pivotinblocks) ? PETSC_TRUE : PETSC_FALSE;
469: PetscObjectReference((PetscObject)isrow);
470: PetscObjectReference((PetscObject)iscol);
471: b->icol = isicol;
473: PetscMalloc1(bs*n+bs,&b->solve_work);
474: PetscLogObjectMemory((PetscObject)B,(bi[n]-n)*(sizeof(PetscInt)+sizeof(PetscScalar)*bs2));
476: b->maxnz = b->nz = bi[n];
478: (B)->factortype = MAT_FACTOR_LU;
479: (B)->info.factor_mallocs = reallocs;
480: (B)->info.fill_ratio_given = f;
482: if (ai[n] != 0) {
483: (B)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]);
484: } else {
485: (B)->info.fill_ratio_needed = 0.0;
486: }
488: ISIdentity(isrow,&row_identity);
489: ISIdentity(iscol,&col_identity);
491: both_identity = (PetscBool) (row_identity && col_identity);
493: MatSeqBAIJSetNumericFactorization_inplace(B,both_identity);
494: return 0;
495: }