Amesos2 - Direct Sparse Solver Interfaces Version of the Day
klu2_factor.hpp
1/* ========================================================================== */
2/* === KLU_factor =========================================================== */
3/* ========================================================================== */
4// @HEADER
5// *****************************************************************************
6// KLU2: A Direct Linear Solver package
7//
8// Copyright 2011 NTESS and the KLU2 contributors.
9// SPDX-License-Identifier: LGPL-2.1-or-later
10// *****************************************************************************
11// @HEADER
12
13/* Factor the matrix, after ordering and analyzing it with KLU_analyze
14 * or KLU_analyze_given.
15 */
16
17#ifndef KLU2_FACTOR_HPP
18#define KLU2_FACTOR_HPP
19
20#include "klu2_internal.h"
21#include "klu2.hpp"
22#include "klu2_memory.hpp"
23#include "klu2_scale.hpp"
24
25
26/* ========================================================================== */
27/* === KLU_factor2 ========================================================== */
28/* ========================================================================== */
29
30template <typename Entry, typename Int>
31static void factor2
32(
33 /* inputs, not modified */
34 Int Ap [ ], /* size n+1, column pointers */
35 Int Ai [ ], /* size nz, row indices */
36 Entry Ax [ ],
37 KLU_symbolic<Entry, Int> *Symbolic,
38
39 /* inputs, modified on output: */
40 KLU_numeric<Entry, Int> *Numeric,
41 KLU_common<Entry, Int> *Common
42)
43{
44 double lsize ;
45 double *Lnz, *Rs ;
46 Int *P, *Q, *R, *Pnum, *Offp, *Offi, *Pblock, *Pinv, *Iwork,
47 *Lip, *Uip, *Llen, *Ulen ;
48 Entry *Offx, *X, s, *Udiag ;
49 Unit **LUbx ;
50 Int k1, k2, nk, k, block, oldcol, pend, oldrow, n, lnz, unz, p, newrow,
51 nblocks, poff, nzoff, lnz_block, unz_block, scale, max_lnz_block,
52 max_unz_block ;
53
54 /* ---------------------------------------------------------------------- */
55 /* initializations */
56 /* ---------------------------------------------------------------------- */
57
58 /* get the contents of the Symbolic object */
59 n = Symbolic->n ;
60 P = Symbolic->P ;
61 Q = Symbolic->Q ;
62 R = Symbolic->R ;
63 Lnz = Symbolic->Lnz ;
64 nblocks = Symbolic->nblocks ;
65 nzoff = Symbolic->nzoff ;
66
67 Pnum = Numeric->Pnum ;
68 Offp = Numeric->Offp ;
69 Offi = Numeric->Offi ;
70 Offx = (Entry *) Numeric->Offx ;
71
72 Lip = Numeric->Lip ;
73 Uip = Numeric->Uip ;
74 Llen = Numeric->Llen ;
75 Ulen = Numeric->Ulen ;
76 LUbx = (Unit **) Numeric->LUbx ;
77 Udiag = (Entry *) Numeric->Udiag ;
78
79 Rs = Numeric->Rs ;
80 Pinv = Numeric->Pinv ;
81 X = (Entry *) Numeric->Xwork ; /* X is of size n */
82 Iwork = Numeric->Iwork ; /* 5*maxblock for KLU_factor */
83 /* 1*maxblock for Pblock */
84 Pblock = Iwork + 5*((size_t) Symbolic->maxblock) ;
85 Common->nrealloc = 0 ;
86 scale = Common->scale ;
87 max_lnz_block = 1 ;
88 max_unz_block = 1 ;
89
90 /* compute the inverse of P from symbolic analysis. Will be updated to
91 * become the inverse of the numerical factorization when the factorization
92 * is done, for use in KLU_refactor */
93#ifndef NDEBUGKLU2
94 for (k = 0 ; k < n ; k++)
95 {
96 Pinv [k] = AMESOS2_KLU2_EMPTY ;
97 }
98#endif
99 for (k = 0 ; k < n ; k++)
100 {
101 ASSERT (P [k] >= 0 && P [k] < n) ;
102 Pinv [P [k]] = k ;
103 }
104#ifndef NDEBUGKLU2
105 for (k = 0 ; k < n ; k++) ASSERT (Pinv [k] != AMESOS2_KLU2_EMPTY) ;
106#endif
107
108 lnz = 0 ;
109 unz = 0 ;
110 Common->noffdiag = 0 ;
111 Offp [0] = 0 ;
112
113 /* ---------------------------------------------------------------------- */
114 /* optionally check input matrix and compute scale factors */
115 /* ---------------------------------------------------------------------- */
116
117 if (scale >= 0)
118 {
119 /* use Pnum as workspace. NOTE: scale factors are not yet permuted
120 * according to the final pivot row ordering, so Rs [oldrow] is the
121 * scale factor for A (oldrow,:), for the user's matrix A. Pnum is
122 * used as workspace in KLU_scale. When the factorization is done,
123 * the scale factors are permuted according to the final pivot row
124 * permutation, so that Rs [k] is the scale factor for the kth row of
125 * A(p,q) where p and q are the final row and column permutations. */
126 /*KLU_scale (scale, n, Ap, Ai, (double *) Ax, Rs, Pnum, Common) ;*/
127 KLU_scale (scale, n, Ap, Ai, Ax, Rs, Pnum, Common) ;
128 if (Common->status < KLU_OK)
129 {
130 /* matrix is invalid */
131 return ;
132 }
133 }
134
135#ifndef NDEBUGKLU2
136 if (scale > 0)
137 {
138 for (k = 0 ; k < n ; k++) PRINTF (("Rs [%d] %g\n", k, Rs [k])) ;
139 }
140#endif
141
142 /* ---------------------------------------------------------------------- */
143 /* factor each block using klu */
144 /* ---------------------------------------------------------------------- */
145
146 for (block = 0 ; block < nblocks ; block++)
147 {
148
149 /* ------------------------------------------------------------------ */
150 /* the block is from rows/columns k1 to k2-1 */
151 /* ------------------------------------------------------------------ */
152
153 k1 = R [block] ;
154 k2 = R [block+1] ;
155 nk = k2 - k1 ;
156 PRINTF (("FACTOR BLOCK %d, k1 %d k2-1 %d nk %d\n", block, k1,k2-1,nk)) ;
157
158 if (nk == 1)
159 {
160
161 /* -------------------------------------------------------------- */
162 /* singleton case */
163 /* -------------------------------------------------------------- */
164
165 poff = Offp [k1] ;
166 oldcol = Q [k1] ;
167 pend = Ap [oldcol+1] ;
168 CLEAR (s) ;
169
170 if (scale <= 0)
171 {
172 /* no scaling */
173 for (p = Ap [oldcol] ; p < pend ; p++)
174 {
175 oldrow = Ai [p] ;
176 newrow = Pinv [oldrow] ;
177 if (newrow < k1)
178 {
179 Offi [poff] = oldrow ;
180 Offx [poff] = Ax [p] ;
181 poff++ ;
182 }
183 else
184 {
185 ASSERT (newrow == k1) ;
186 PRINTF (("singleton block %d", block)) ;
187 PRINT_ENTRY (Ax [p]) ;
188 s = Ax [p] ;
189 }
190 }
191 }
192 else
193 {
194 /* row scaling. NOTE: scale factors are not yet permuted
195 * according to the pivot row permutation, so Rs [oldrow] is
196 * used below. When the factorization is done, the scale
197 * factors are permuted, so that Rs [newrow] will be used in
198 * klu_solve, klu_tsolve, and klu_rgrowth */
199 for (p = Ap [oldcol] ; p < pend ; p++)
200 {
201 oldrow = Ai [p] ;
202 newrow = Pinv [oldrow] ;
203 if (newrow < k1)
204 {
205 Offi [poff] = oldrow ;
206 /* Offx [poff] = Ax [p] / Rs [oldrow] ; */
207 SCALE_DIV_ASSIGN (Offx [poff], Ax [p], Rs [oldrow]) ;
208 poff++ ;
209 }
210 else
211 {
212 ASSERT (newrow == k1) ;
213 PRINTF (("singleton block %d ", block)) ;
214 PRINT_ENTRY (Ax[p]) ;
215 SCALE_DIV_ASSIGN (s, Ax [p], Rs [oldrow]) ;
216 }
217 }
218 }
219
220 Udiag [k1] = s ;
221
222 if (IS_ZERO (s))
223 {
224 /* singular singleton */
225 Common->status = KLU_SINGULAR ;
226 Common->numerical_rank = k1 ;
227 Common->singular_col = oldcol ;
228 if (Common->halt_if_singular)
229 {
230 return ;
231 }
232 }
233
234 Offp [k1+1] = poff ;
235 Pnum [k1] = P [k1] ;
236 lnz++ ;
237 unz++ ;
238
239 }
240 else
241 {
242
243 /* -------------------------------------------------------------- */
244 /* construct and factorize the kth block */
245 /* -------------------------------------------------------------- */
246
247 if (Lnz [block] < 0)
248 {
249 /* COLAMD was used - no estimate of fill-in */
250 /* use 10 times the nnz in A, plus n */
251 lsize = -(Common->initmem) ;
252 }
253 else
254 {
255 lsize = Common->initmem_amd * Lnz [block] + nk ;
256 }
257
258 /* allocates 1 arrays: LUbx [block] */
259 Numeric->LUsize [block] = KLU_kernel_factor (nk, Ap, Ai, Ax, Q,
260 lsize, &LUbx [block], Udiag + k1, Llen + k1, Ulen + k1,
261 Lip + k1, Uip + k1, Pblock, &lnz_block, &unz_block,
262 X, Iwork, k1, Pinv, Rs, Offp, Offi, Offx, Common) ;
263
264 if (Common->status < KLU_OK ||
265 (Common->status == KLU_SINGULAR && Common->halt_if_singular))
266 {
267 /* out of memory, invalid inputs, or singular */
268 return ;
269 }
270
271 PRINTF (("\n----------------------- L %d:\n", block)) ;
272 ASSERT (KLU_valid_LU (nk, TRUE, Lip+k1, Llen+k1, LUbx [block])) ;
273 PRINTF (("\n----------------------- U %d:\n", block)) ;
274 ASSERT (KLU_valid_LU (nk, FALSE, Uip+k1, Ulen+k1, LUbx [block])) ;
275
276 /* -------------------------------------------------------------- */
277 /* get statistics */
278 /* -------------------------------------------------------------- */
279
280 lnz += lnz_block ;
281 unz += unz_block ;
282 max_lnz_block = MAX (max_lnz_block, lnz_block) ;
283 max_unz_block = MAX (max_unz_block, unz_block) ;
284
285 if (Lnz [block] == AMESOS2_KLU2_EMPTY)
286 {
287 /* revise estimate for subsequent factorization */
288 Lnz [block] = MAX (lnz_block, unz_block) ;
289 }
290
291 /* -------------------------------------------------------------- */
292 /* combine the klu row ordering with the symbolic pre-ordering */
293 /* -------------------------------------------------------------- */
294
295 PRINTF (("Pnum, 1-based:\n")) ;
296 for (k = 0 ; k < nk ; k++)
297 {
298 ASSERT (k + k1 < n) ;
299 ASSERT (Pblock [k] + k1 < n) ;
300 Pnum [k + k1] = P [Pblock [k] + k1] ;
301 PRINTF (("Pnum (%d + %d + 1 = %d) = %d + 1 = %d\n",
302 k, k1, k+k1+1, Pnum [k+k1], Pnum [k+k1]+1)) ;
303 }
304
305 /* the local pivot row permutation Pblock is no longer needed */
306 }
307 }
308 ASSERT (nzoff == Offp [n]) ;
309 PRINTF (("\n------------------- Off diagonal entries:\n")) ;
310 ASSERT (KLU_valid (n, Offp, Offi, Offx)) ;
311
312 Numeric->lnz = lnz ;
313 Numeric->unz = unz ;
314 Numeric->max_lnz_block = max_lnz_block ;
315 Numeric->max_unz_block = max_unz_block ;
316
317 /* compute the inverse of Pnum */
318#ifndef NDEBUGKLU2
319 for (k = 0 ; k < n ; k++)
320 {
321 Pinv [k] = AMESOS2_KLU2_EMPTY ;
322 }
323#endif
324 for (k = 0 ; k < n ; k++)
325 {
326 ASSERT (Pnum [k] >= 0 && Pnum [k] < n) ;
327 Pinv [Pnum [k]] = k ;
328 }
329#ifndef NDEBUGKLU2
330 for (k = 0 ; k < n ; k++) ASSERT (Pinv [k] != AMESOS2_KLU2_EMPTY) ;
331#endif
332
333 /* permute scale factors Rs according to pivotal row order */
334 if (scale > 0)
335 {
336 for (k = 0 ; k < n ; k++)
337 {
338 /* TODO : Check. REAL(X[k]) Can be just X[k] */
339 /* REAL (X [k]) = Rs [Pnum [k]] ; */
340 X [k] = Rs [Pnum [k]] ;
341 }
342 for (k = 0 ; k < n ; k++)
343 {
344 Rs [k] = REAL (X [k]) ;
345 }
346 }
347
348 PRINTF (("\n------------------- Off diagonal entries, old:\n")) ;
349 ASSERT (KLU_valid (n, Offp, Offi, Offx)) ;
350
351 /* apply the pivot row permutations to the off-diagonal entries */
352 for (p = 0 ; p < nzoff ; p++)
353 {
354 ASSERT (Offi [p] >= 0 && Offi [p] < n) ;
355 Offi [p] = Pinv [Offi [p]] ;
356 }
357
358 PRINTF (("\n------------------- Off diagonal entries, new:\n")) ;
359 ASSERT (KLU_valid (n, Offp, Offi, Offx)) ;
360
361#ifndef NDEBUGKLU2
362 {
363 PRINTF (("\n ############# KLU_BTF_FACTOR done, nblocks %d\n",nblocks));
364 Entry ss, *Udiag = Numeric->Udiag ;
365 for (block = 0 ; block < nblocks && Common->status == KLU_OK ; block++)
366 {
367 k1 = R [block] ;
368 k2 = R [block+1] ;
369 nk = k2 - k1 ;
370 PRINTF (("\n======================KLU_factor output: k1 %d k2 %d nk %d\n",k1,k2,nk)) ;
371 if (nk == 1)
372 {
373 PRINTF (("singleton ")) ;
374 /* ENTRY_PRINT (singleton [block]) ; */
375 ss = Udiag [k1] ;
376 PRINT_ENTRY (ss) ;
377 }
378 else
379 {
380 Int *Lip, *Uip, *Llen, *Ulen ;
381 Unit *LU ;
382 Lip = Numeric->Lip + k1 ;
383 Llen = Numeric->Llen + k1 ;
384 LU = (Unit *) Numeric->LUbx [block] ;
385 PRINTF (("\n---- L block %d\n", block));
386 ASSERT (KLU_valid_LU (nk, TRUE, Lip, Llen, LU)) ;
387 Uip = Numeric->Uip + k1 ;
388 Ulen = Numeric->Ulen + k1 ;
389 PRINTF (("\n---- U block %d\n", block)) ;
390 ASSERT (KLU_valid_LU (nk, FALSE, Uip, Ulen, LU)) ;
391 }
392 }
393 }
394#endif
395}
396
397
398
399/* ========================================================================== */
400/* === KLU_factor =========================================================== */
401/* ========================================================================== */
402
403template <typename Entry, typename Int>
404KLU_numeric<Entry, Int> *KLU_factor /* returns NULL if error, or a valid
405 KLU_numeric object if successful */
406(
407 /* --- inputs --- */
408 Int Ap [ ], /* size n+1, column pointers */
409 Int Ai [ ], /* size nz, row indices */
410 /* TODO : Checked, switch from double to Entry , still make sure works in all cases */
411 Entry Ax [ ],
412 KLU_symbolic<Entry, Int> *Symbolic,
413 /* -------------- */
414 KLU_common<Entry, Int> *Common
415)
416{
417 Int n, nzoff, nblocks, maxblock, k, ok = TRUE ;
418
419 KLU_numeric<Entry, Int> *Numeric ;
420 size_t n1, nzoff1, s, b6, n3 ;
421
422 if (Common == NULL)
423 {
424 return (NULL) ;
425 }
426 Common->status = KLU_OK ;
427 Common->numerical_rank = AMESOS2_KLU2_EMPTY ;
428 Common->singular_col = AMESOS2_KLU2_EMPTY ;
429
430 /* ---------------------------------------------------------------------- */
431 /* get the contents of the Symbolic object */
432 /* ---------------------------------------------------------------------- */
433
434 /* check for a valid Symbolic object */
435 if (Symbolic == NULL)
436 {
437 Common->status = KLU_INVALID ;
438 return (NULL) ;
439 }
440
441 n = Symbolic->n ;
442 nzoff = Symbolic->nzoff ;
443 nblocks = Symbolic->nblocks ;
444 maxblock = Symbolic->maxblock ;
445
446 PRINTF (("KLU_factor: n %d nzoff %d nblocks %d maxblock %d\n",
447 n, nzoff, nblocks, maxblock)) ;
448
449 /* ---------------------------------------------------------------------- */
450 /* get control parameters and make sure they are in the proper range */
451 /* ---------------------------------------------------------------------- */
452
453 Common->initmem_amd = MAX (1.0, Common->initmem_amd) ;
454 Common->initmem = MAX (1.0, Common->initmem) ;
455 Common->tol = MIN (Common->tol, 1.0) ;
456 Common->tol = MAX (0.0, Common->tol) ;
457 Common->memgrow = MAX (1.0, Common->memgrow) ;
458
459 /* ---------------------------------------------------------------------- */
460 /* allocate the Numeric object */
461 /* ---------------------------------------------------------------------- */
462
463 /* this will not cause size_t overflow (already checked by KLU_symbolic) */
464 n1 = ((size_t) n) + 1 ;
465 nzoff1 = ((size_t) nzoff) + 1 ;
466
467 Numeric = (KLU_numeric<Entry, Int> *) KLU_malloc (sizeof (KLU_numeric<Entry, Int>), 1, Common) ;
468 if (Common->status < KLU_OK)
469 {
470 /* out of memory */
471 Common->status = KLU_OUT_OF_MEMORY ;
472 return (NULL) ;
473 }
474 Numeric->n = n ;
475 Numeric->nblocks = nblocks ;
476 Numeric->nzoff = nzoff ;
477 Numeric->Pnum = (Int *) KLU_malloc (n, sizeof (Int), Common) ;
478 Numeric->Offp = (Int *) KLU_malloc (n1, sizeof (Int), Common) ;
479 Numeric->Offi = (Int *) KLU_malloc (nzoff1, sizeof (Int), Common) ;
480 Numeric->Offx = KLU_malloc (nzoff1, sizeof (Entry), Common) ;
481
482 Numeric->Lip = (Int *) KLU_malloc (n, sizeof (Int), Common) ;
483 Numeric->Uip = (Int *) KLU_malloc (n, sizeof (Int), Common) ;
484 Numeric->Llen = (Int *) KLU_malloc (n, sizeof (Int), Common) ;
485 Numeric->Ulen = (Int *) KLU_malloc (n, sizeof (Int), Common) ;
486
487 Numeric->LUsize = (size_t *) KLU_malloc (nblocks, sizeof (size_t), Common) ;
488
489 Numeric->LUbx = (void **) KLU_malloc (nblocks, sizeof (Unit *), Common) ;
490 if (Numeric->LUbx != NULL)
491 {
492 for (k = 0 ; k < nblocks ; k++)
493 {
494 Numeric->LUbx [k] = NULL ;
495 }
496 }
497
498 Numeric->Udiag = KLU_malloc (n, sizeof (Entry), Common) ;
499
500 if (Common->scale > 0)
501 {
502 Numeric->Rs = (double *) KLU_malloc (n, sizeof (double), Common) ;
503 }
504 else
505 {
506 /* no scaling */
507 Numeric->Rs = NULL ;
508 }
509
510 Numeric->Pinv = (Int *) KLU_malloc (n, sizeof (Int), Common) ;
511
512 /* allocate permanent workspace for factorization and solve. Note that the
513 * solver will use an Xwork of size 4n, whereas the factorization codes use
514 * an Xwork of size n and integer space (Iwork) of size 6n. KLU_condest
515 * uses an Xwork of size 2n. Total size is:
516 *
517 * n*sizeof(Entry) + max (6*maxblock*sizeof(Int), 3*n*sizeof(Entry))
518 */
519 s = KLU_mult_size_t (n, sizeof (Entry), &ok) ;
520 n3 = KLU_mult_size_t (n, 3 * sizeof (Entry), &ok) ;
521 b6 = KLU_mult_size_t (maxblock, 6 * sizeof (Int), &ok) ;
522 Numeric->worksize = KLU_add_size_t (s, MAX (n3, b6), &ok) ;
523 Numeric->Work = KLU_malloc (Numeric->worksize, 1, Common) ;
524 Numeric->Xwork = Numeric->Work ;
525 Numeric->Iwork = (Int *) ((Entry *) Numeric->Xwork + n) ;
526 if (!ok || Common->status < KLU_OK)
527 {
528 /* out of memory or problem too large */
529 Common->status = ok ? KLU_OUT_OF_MEMORY : KLU_TOO_LARGE ;
530 KLU_free_numeric (&Numeric, Common) ;
531 return (NULL) ;
532 }
533
534 /* ---------------------------------------------------------------------- */
535 /* factorize the blocks */
536 /* ---------------------------------------------------------------------- */
537
538 factor2 (Ap, Ai, (Entry *) Ax, Symbolic, Numeric, Common) ;
539
540 /* ---------------------------------------------------------------------- */
541 /* return or free the Numeric object */
542 /* ---------------------------------------------------------------------- */
543
544 if (Common->status < KLU_OK)
545 {
546 /* out of memory or inputs invalid */
547 KLU_free_numeric (&Numeric, Common) ;
548 }
549 else if (Common->status == KLU_SINGULAR)
550 {
551 if (Common->halt_if_singular)
552 {
553 /* Matrix is singular, and the Numeric object is only partially
554 * defined because we halted early. This is the default case for
555 * a singular matrix. */
556 KLU_free_numeric (&Numeric, Common) ;
557 }
558 }
559 else if (Common->status == KLU_OK)
560 {
561 /* successful non-singular factorization */
562 Common->numerical_rank = n ;
563 Common->singular_col = n ;
564 }
565 return (Numeric) ;
566}
567
568#endif
void scale(ArrayView< Scalar1 > vals, size_t l, size_t ld, ArrayView< Scalar2 > s)
Scales a 1-D representation of a multivector.
int Ai[]
Row values.
Definition klu2_simple.cpp:54
int Ap[]
Column offsets.
Definition klu2_simple.cpp:52