Amesos2 - Direct Sparse Solver Interfaces Version of the Day
klu2.hpp
1/* ========================================================================== */
2/* === klu ================================================================== */
3/* ========================================================================== */
4// @HEADER
5// *****************************************************************************
6// KLU2: A Direct Linear Solver package
7//
8// Copyright 2011 NTESS and the KLU2 contributors.
9// SPDX-License-Identifier: LGPL-2.1-or-later
10// *****************************************************************************
11// @HEADER
12
13/* KLU: factorizes P*A into L*U, using the Gilbert-Peierls algorithm [1], with
14 * optional symmetric pruning by Eisenstat and Liu [2]. The code is by Tim
15 * Davis. This algorithm is what appears as the default sparse LU routine in
16 * MATLAB version 6.0, and still appears in MATLAB 6.5 as [L,U,P] = lu (A).
17 * Note that no column ordering is provided (see COLAMD or AMD for suitable
18 * orderings). SuperLU is based on this algorithm, except that it adds the
19 * use of dense matrix operations on "supernodes" (adjacent columns with
20 * identical). This code doesn't use supernodes, thus its name ("Kent" LU,
21 * as in "Clark Kent", in contrast with Super-LU...). This algorithm is slower
22 * than SuperLU and UMFPACK for large matrices with lots of nonzeros in their
23 * factors (such as for most finite-element problems). However, for matrices
24 * with very sparse LU factors, this algorithm is typically faster than both
25 * SuperLU and UMFPACK, since in this case there is little chance to exploit
26 * dense matrix kernels (the BLAS).
27 *
28 * Only one block of A is factorized, in the BTF form. The input n is the
29 * size of the block; k1 is the first row and column in the block.
30 *
31 * NOTE: no error checking is done on the inputs. This version is not meant to
32 * be called directly by the user. Use klu_factor instead.
33 *
34 * No fill-reducing ordering is provided. The ordering quality of
35 * klu_kernel_factor is the responsibility of the caller. The input A must
36 * pre-permuted to reduce fill-in, or fill-reducing input permutation Q must
37 * be provided.
38 *
39 * The input matrix A must be in compressed-column form, with either sorted
40 * or unsorted row indices. Row indices for column j of A is in
41 * Ai [Ap [j] ... Ap [j+1]-1] and the same range of indices in Ax holds the
42 * numerical values. No duplicate entries are allowed.
43 *
44 * Copyright 2004-2009, Tim Davis. All rights reserved. See the README
45 * file for details on permitted use. Note that no code from The MathWorks,
46 * Inc, or from SuperLU, or from any other source appears here. The code is
47 * written from scratch, from the algorithmic description in Gilbert & Peierls'
48 * and Eisenstat & Liu's journal papers [1,2].
49 *
50 * If an input permutation Q is provided, the factorization L*U = A (P,Q)
51 * is computed, where P is determined by partial pivoting, and Q is the input
52 * ordering. If the pivot tolerance is less than 1, the "diagonal" entry that
53 * KLU attempts to choose is the diagonal of A (Q,Q). In other words, the
54 * input permutation is applied symmetrically to the input matrix. The output
55 * permutation P includes both the partial pivoting ordering and the input
56 * permutation. If Q is NULL, then it is assumed to be the identity
57 * permutation. Q is not modified.
58 *
59 * [1] Gilbert, J. R. and Peierls, T., "Sparse Partial Pivoting in Time
60 * Proportional to Arithmetic Operations," SIAM J. Sci. Stat. Comp.,
61 * vol 9, pp. 862-874, 1988.
62 * [2] Eisenstat, S. C. and Liu, J. W. H., "Exploiting Structural Symmetry in
63 * Unsymmetric Sparse Symbolic Factorization," SIAM J. Matrix Analysis &
64 * Applic., vol 13, pp. 202-211, 1992.
65 */
66
67/* ========================================================================== */
68
69#ifndef KLU2_HPP
70#define KLU2_HPP
71
72#include "klu2_internal.h"
73#include "klu2_kernel.hpp"
74#include "klu2_memory.hpp"
75
76template <typename Entry, typename Int>
77size_t KLU_kernel_factor /* 0 if failure, size of LU if OK */
78(
79 /* inputs, not modified */
80 Int n, /* A is n-by-n. n must be > 0. */
81 Int Ap [ ], /* size n+1, column pointers for A */
82 Int Ai [ ], /* size nz = Ap [n], row indices for A */
83 Entry Ax [ ], /* size nz, values of A */
84 Int Q [ ], /* size n, optional column permutation */
85 double Lsize, /* estimate of number of nonzeros in L */
86
87 /* outputs, not defined on input */
88 Unit **p_LU, /* row indices and values of L and U */
89 Entry Udiag [ ], /* size n, diagonal of U */
90 Int Llen [ ], /* size n, column length of L */
91 Int Ulen [ ], /* size n, column length of U */
92 Int Lip [ ], /* size n, column pointers for L */
93 Int Uip [ ], /* size n, column pointers for U */
94 Int P [ ], /* row permutation, size n */
95 Int *lnz, /* size of L */
96 Int *unz, /* size of U */
97
98 /* workspace, undefined on input */
99 Entry *X, /* size n double's, zero on output */
100 Int *Work, /* size 5n Int's */
101
102 /* inputs, not modified on output */
103 Int k1, /* the block of A is from k1 to k2-1 */
104 Int PSinv [ ], /* inverse of P from symbolic factorization */
105 double Rs [ ], /* scale factors for A */
106
107 /* inputs, modified on output */
108 Int Offp [ ], /* off-diagonal matrix (modified by this routine) */
109 Int Offi [ ],
110 Entry Offx [ ],
111 /* --------------- */
112 KLU_common<Entry, Int> *Common
113)
114{
115 double maxlnz, dunits ;
116 Unit *LU ;
117 Int *Pinv, *Lpend, *Stack, *Flag, *Ap_pos, *W ;
118 Int lsize, usize, anz, ok ;
119 size_t lusize ;
120 ASSERT (Common != NULL) ;
121
122 /* ---------------------------------------------------------------------- */
123 /* get control parameters, or use defaults */
124 /* ---------------------------------------------------------------------- */
125
126 n = MAX (1, n) ;
127 anz = Ap [n+k1] - Ap [k1] ;
128
129 if (Lsize <= 0)
130 {
131 Lsize = -Lsize ;
132 Lsize = MAX (Lsize, 1.0) ;
133 lsize = (Int) Lsize * anz + n ;
134 }
135 else
136 {
137 lsize = (Int) Lsize ;
138 }
139
140 usize = lsize ;
141
142 lsize = MAX (n+1, lsize) ;
143 usize = MAX (n+1, usize) ;
144
145 maxlnz = (((double) n) * ((double) n) + ((double) n)) / 2. ;
146 maxlnz = MIN (maxlnz, ((double) INT_MAX)) ;
147 lsize = (Int) MIN (maxlnz, lsize) ;
148 usize = (Int) MIN (maxlnz, usize) ;
149
150 PRINTF (("Welcome to klu: n %d anz %d k1 %d lsize %d usize %d maxlnz %g\n",
151 n, anz, k1, lsize, usize, maxlnz)) ;
152
153 /* ---------------------------------------------------------------------- */
154 /* allocate workspace and outputs */
155 /* ---------------------------------------------------------------------- */
156
157 /* return arguments are not yet assigned */
158 *p_LU = (Unit *) NULL ;
159
160 /* these computations are safe from size_t overflow */
161 W = Work ;
162 Pinv = (Int *) W ; W += n ;
163 Stack = (Int *) W ; W += n ;
164 Flag = (Int *) W ; W += n ;
165 Lpend = (Int *) W ; W += n ;
166 Ap_pos = (Int *) W ; W += n ;
167
168 dunits = DUNITS (Int, lsize) + DUNITS (Entry, lsize) +
169 DUNITS (Int, usize) + DUNITS (Entry, usize) ;
170 lusize = (size_t) dunits ;
171 ok = !INT_OVERFLOW (dunits) ;
172 LU = (Unit *) (ok ? KLU_malloc (lusize, sizeof (Unit), Common) : NULL) ;
173 if (LU == NULL)
174 {
175 /* out of memory, or problem too large */
176 Common->status = KLU_OUT_OF_MEMORY ;
177 lusize = 0 ;
178 return (lusize) ;
179 }
180
181 /* ---------------------------------------------------------------------- */
182 /* factorize */
183 /* ---------------------------------------------------------------------- */
184
185 /* with pruning, and non-recursive depth-first-search */
186 lusize = KLU_kernel<Entry> (n, Ap, Ai, Ax, Q, lusize,
187 Pinv, P, &LU, Udiag, Llen, Ulen, Lip, Uip, lnz, unz,
188 X, Stack, Flag, Ap_pos, Lpend,
189 k1, PSinv, Rs, Offp, Offi, Offx, Common) ;
190
191 /* ---------------------------------------------------------------------- */
192 /* return LU factors, or return nothing if an error occurred */
193 /* ---------------------------------------------------------------------- */
194
195 if (Common->status < KLU_OK)
196 {
197 LU = (Unit *) KLU_free (LU, lusize, sizeof (Unit), Common) ;
198 lusize = 0 ;
199 }
200 *p_LU = LU ;
201 PRINTF ((" in klu noffdiag %d\n", Common->noffdiag)) ;
202 return (lusize) ;
203}
204
205/* ========================================================================== */
206/* === KLU_lsolve =========================================================== */
207/* ========================================================================== */
208
209/* Solve Lx=b. Assumes L is unit lower triangular and where the unit diagonal
210 * entry is NOT stored. Overwrites B with the solution X. B is n-by-nrhs
211 * and is stored in ROW form with row dimension nrhs. nrhs must be in the
212 * range 1 to 4. */
213
214template <typename Entry, typename Int>
215void KLU_lsolve
216(
217 /* inputs, not modified: */
218 const Int n,
219 const Int *Lip,
220 const Int *Llen,
221 Unit *LU,
222 const Int nrhs,
223 /* right-hand-side on input, solution to Lx=b on output */
224 Entry *X
225)
226{
227 Entry x [4], lik ;
228 Int *Li ;
229 Entry *Lx ;
230 Int k, p, len, i ;
231
232 switch (nrhs)
233 {
234
235 case 1:
236 for (k = 0 ; k < n ; k++)
237 {
238 x [0] = X [k] ;
239 GET_POINTER (LU, Lip, Llen, Li, Lx, k, len) ;
240 /* unit diagonal of L is not stored*/
241 for (p = 0 ; p < len ; p++)
242 {
243 /* X [Li [p]] -= Lx [p] * x [0] ; */
244 MULT_SUB (X [Li [p]], Lx [p], x [0]) ;
245 }
246 }
247 break ;
248
249 case 2:
250
251 for (k = 0 ; k < n ; k++)
252 {
253 x [0] = X [2*k ] ;
254 x [1] = X [2*k + 1] ;
255 GET_POINTER (LU, Lip, Llen, Li, Lx, k, len) ;
256 for (p = 0 ; p < len ; p++)
257 {
258 i = Li [p] ;
259 lik = Lx [p] ;
260 MULT_SUB (X [2*i], lik, x [0]) ;
261 MULT_SUB (X [2*i + 1], lik, x [1]) ;
262 }
263 }
264 break ;
265
266 case 3:
267
268 for (k = 0 ; k < n ; k++)
269 {
270 x [0] = X [3*k ] ;
271 x [1] = X [3*k + 1] ;
272 x [2] = X [3*k + 2] ;
273 GET_POINTER (LU, Lip, Llen, Li, Lx, k, len) ;
274 for (p = 0 ; p < len ; p++)
275 {
276 i = Li [p] ;
277 lik = Lx [p] ;
278 MULT_SUB (X [3*i], lik, x [0]) ;
279 MULT_SUB (X [3*i + 1], lik, x [1]) ;
280 MULT_SUB (X [3*i + 2], lik, x [2]) ;
281 }
282 }
283 break ;
284
285 case 4:
286
287 for (k = 0 ; k < n ; k++)
288 {
289 x [0] = X [4*k ] ;
290 x [1] = X [4*k + 1] ;
291 x [2] = X [4*k + 2] ;
292 x [3] = X [4*k + 3] ;
293 GET_POINTER (LU, Lip, Llen, Li, Lx, k, len) ;
294 for (p = 0 ; p < len ; p++)
295 {
296 i = Li [p] ;
297 lik = Lx [p] ;
298 MULT_SUB (X [4*i], lik, x [0]) ;
299 MULT_SUB (X [4*i + 1], lik, x [1]) ;
300 MULT_SUB (X [4*i + 2], lik, x [2]) ;
301 MULT_SUB (X [4*i + 3], lik, x [3]) ;
302 }
303 }
304 break ;
305
306 default:
307 throw ::std::domain_error("More than 4 right-hand sides is not supported.");
308
309 }
310}
311
312/* ========================================================================== */
313/* === KLU_usolve =========================================================== */
314/* ========================================================================== */
315
316/* Solve Ux=b. Assumes U is non-unit upper triangular and where the diagonal
317 * entry is NOT stored. Overwrites B with the solution X. B is n-by-nrhs
318 * and is stored in ROW form with row dimension nrhs. nrhs must be in the
319 * range 1 to 4. */
320
321template <typename Entry, typename Int>
322void KLU_usolve
323(
324 /* inputs, not modified: */
325 Int n,
326 const Int Uip [ ],
327 const Int Ulen [ ],
328 Unit LU [ ],
329 Entry Udiag [ ],
330 Int nrhs,
331 /* right-hand-side on input, solution to Ux=b on output */
332 Entry X [ ]
333)
334{
335 Entry x [4], uik, ukk ;
336 Int *Ui ;
337 Entry *Ux ;
338 Int k, p, len, i ;
339
340 switch (nrhs)
341 {
342
343 case 1:
344
345 for (k = n-1 ; k >= 0 ; k--)
346 {
347 GET_POINTER (LU, Uip, Ulen, Ui, Ux, k, len) ;
348 /* x [0] = X [k] / Udiag [k] ; */
349 DIV (x [0], X [k], Udiag [k]) ;
350 X [k] = x [0] ;
351 for (p = 0 ; p < len ; p++)
352 {
353 /* X [Ui [p]] -= Ux [p] * x [0] ; */
354 MULT_SUB (X [Ui [p]], Ux [p], x [0]) ;
355
356 }
357 }
358
359 break ;
360
361 case 2:
362
363 for (k = n-1 ; k >= 0 ; k--)
364 {
365 GET_POINTER (LU, Uip, Ulen, Ui, Ux, k, len) ;
366 ukk = Udiag [k] ;
367 /* x [0] = X [2*k ] / ukk ;
368 x [1] = X [2*k + 1] / ukk ; */
369 DIV (x [0], X [2*k], ukk) ;
370 DIV (x [1], X [2*k + 1], ukk) ;
371
372 X [2*k ] = x [0] ;
373 X [2*k + 1] = x [1] ;
374 for (p = 0 ; p < len ; p++)
375 {
376 i = Ui [p] ;
377 uik = Ux [p] ;
378 /* X [2*i ] -= uik * x [0] ;
379 X [2*i + 1] -= uik * x [1] ; */
380 MULT_SUB (X [2*i], uik, x [0]) ;
381 MULT_SUB (X [2*i + 1], uik, x [1]) ;
382 }
383 }
384
385 break ;
386
387 case 3:
388
389 for (k = n-1 ; k >= 0 ; k--)
390 {
391 GET_POINTER (LU, Uip, Ulen, Ui, Ux, k, len) ;
392 ukk = Udiag [k] ;
393
394 DIV (x [0], X [3*k], ukk) ;
395 DIV (x [1], X [3*k + 1], ukk) ;
396 DIV (x [2], X [3*k + 2], ukk) ;
397
398 X [3*k ] = x [0] ;
399 X [3*k + 1] = x [1] ;
400 X [3*k + 2] = x [2] ;
401 for (p = 0 ; p < len ; p++)
402 {
403 i = Ui [p] ;
404 uik = Ux [p] ;
405 MULT_SUB (X [3*i], uik, x [0]) ;
406 MULT_SUB (X [3*i + 1], uik, x [1]) ;
407 MULT_SUB (X [3*i + 2], uik, x [2]) ;
408 }
409 }
410
411 break ;
412
413 case 4:
414
415 for (k = n-1 ; k >= 0 ; k--)
416 {
417 GET_POINTER (LU, Uip, Ulen, Ui, Ux, k, len) ;
418 ukk = Udiag [k] ;
419
420 DIV (x [0], X [4*k], ukk) ;
421 DIV (x [1], X [4*k + 1], ukk) ;
422 DIV (x [2], X [4*k + 2], ukk) ;
423 DIV (x [3], X [4*k + 3], ukk) ;
424
425 X [4*k ] = x [0] ;
426 X [4*k + 1] = x [1] ;
427 X [4*k + 2] = x [2] ;
428 X [4*k + 3] = x [3] ;
429 for (p = 0 ; p < len ; p++)
430 {
431 i = Ui [p] ;
432 uik = Ux [p] ;
433
434 MULT_SUB (X [4*i], uik, x [0]) ;
435 MULT_SUB (X [4*i + 1], uik, x [1]) ;
436 MULT_SUB (X [4*i + 2], uik, x [2]) ;
437 MULT_SUB (X [4*i + 3], uik, x [3]) ;
438 }
439 }
440
441 break ;
442
443 }
444}
445
446/* ========================================================================== */
447/* === KLU_ltsolve ========================================================== */
448/* ========================================================================== */
449
450/* Solve L'x=b. Assumes L is unit lower triangular and where the unit diagonal
451 * entry is NOT stored. Overwrites B with the solution X. B is n-by-nrhs
452 * and is stored in ROW form with row dimension nrhs. nrhs must in the
453 * range 1 to 4. */
454
455template <typename Entry, typename Int>
456void KLU_ltsolve
457(
458 /* inputs, not modified: */
459 Int n,
460 const Int Lip [ ],
461 const Int Llen [ ],
462 Unit LU [ ],
463 Int nrhs,
464#ifdef COMPLEX
465 Int conj_solve,
466#endif
467 /* right-hand-side on input, solution to L'x=b on output */
468 Entry X [ ]
469)
470{
471 Entry x [4], lik ;
472 Int *Li ;
473 Entry *Lx ;
474 Int k, p, len, i ;
475
476 switch (nrhs)
477 {
478
479 case 1:
480
481 for (k = n-1 ; k >= 0 ; k--)
482 {
483 GET_POINTER (LU, Lip, Llen, Li, Lx, k, len) ;
484 x [0] = X [k] ;
485 for (p = 0 ; p < len ; p++)
486 {
487#ifdef COMPLEX
488 if (conj_solve)
489 {
490 /* x [0] -= CONJ (Lx [p]) * X [Li [p]] ; */
491 MULT_SUB_CONJ (x [0], X [Li [p]], Lx [p]) ;
492 }
493 else
494#endif
495 {
496 /*x [0] -= Lx [p] * X [Li [p]] ;*/
497 MULT_SUB (x [0], Lx [p], X [Li [p]]) ;
498 }
499 }
500 X [k] = x [0] ;
501 }
502 break ;
503
504 case 2:
505
506 for (k = n-1 ; k >= 0 ; k--)
507 {
508 x [0] = X [2*k ] ;
509 x [1] = X [2*k + 1] ;
510 GET_POINTER (LU, Lip, Llen, Li, Lx, k, len) ;
511 for (p = 0 ; p < len ; p++)
512 {
513 i = Li [p] ;
514#ifdef COMPLEX
515 if (conj_solve)
516 {
517 CONJ (lik, Lx [p]) ;
518 }
519 else
520#endif
521 {
522 lik = Lx [p] ;
523 }
524 MULT_SUB (x [0], lik, X [2*i]) ;
525 MULT_SUB (x [1], lik, X [2*i + 1]) ;
526 }
527 X [2*k ] = x [0] ;
528 X [2*k + 1] = x [1] ;
529 }
530 break ;
531
532 case 3:
533
534 for (k = n-1 ; k >= 0 ; k--)
535 {
536 x [0] = X [3*k ] ;
537 x [1] = X [3*k + 1] ;
538 x [2] = X [3*k + 2] ;
539 GET_POINTER (LU, Lip, Llen, Li, Lx, k, len) ;
540 for (p = 0 ; p < len ; p++)
541 {
542 i = Li [p] ;
543#ifdef COMPLEX
544 if (conj_solve)
545 {
546 CONJ (lik, Lx [p]) ;
547 }
548 else
549#endif
550 {
551 lik = Lx [p] ;
552 }
553 MULT_SUB (x [0], lik, X [3*i]) ;
554 MULT_SUB (x [1], lik, X [3*i + 1]) ;
555 MULT_SUB (x [2], lik, X [3*i + 2]) ;
556 }
557 X [3*k ] = x [0] ;
558 X [3*k + 1] = x [1] ;
559 X [3*k + 2] = x [2] ;
560 }
561 break ;
562
563 case 4:
564
565 for (k = n-1 ; k >= 0 ; k--)
566 {
567 x [0] = X [4*k ] ;
568 x [1] = X [4*k + 1] ;
569 x [2] = X [4*k + 2] ;
570 x [3] = X [4*k + 3] ;
571 GET_POINTER (LU, Lip, Llen, Li, Lx, k, len) ;
572 for (p = 0 ; p < len ; p++)
573 {
574 i = Li [p] ;
575#ifdef COMPLEX
576 if (conj_solve)
577 {
578 CONJ (lik, Lx [p]) ;
579 }
580 else
581#endif
582 {
583 lik = Lx [p] ;
584 }
585 MULT_SUB (x [0], lik, X [4*i]) ;
586 MULT_SUB (x [1], lik, X [4*i + 1]) ;
587 MULT_SUB (x [2], lik, X [4*i + 2]) ;
588 MULT_SUB (x [3], lik, X [4*i + 3]) ;
589 }
590 X [4*k ] = x [0] ;
591 X [4*k + 1] = x [1] ;
592 X [4*k + 2] = x [2] ;
593 X [4*k + 3] = x [3] ;
594 }
595 break ;
596 }
597}
598
599
600/* ========================================================================== */
601/* === KLU_utsolve ========================================================== */
602/* ========================================================================== */
603
604/* Solve U'x=b. Assumes U is non-unit upper triangular and where the diagonal
605 * entry is stored (and appears last in each column of U). Overwrites B
606 * with the solution X. B is n-by-nrhs and is stored in ROW form with row
607 * dimension nrhs. nrhs must be in the range 1 to 4. */
608
609template <typename Entry, typename Int>
610void KLU_utsolve
611(
612 /* inputs, not modified: */
613 const Int n,
614 const Int Uip [ ],
615 const Int Ulen [ ],
616 Unit LU [ ],
617 const Entry Udiag [ ],
618 Int nrhs,
619#ifdef COMPLEX
620 Int conj_solve,
621#endif
622 /* right-hand-side on input, solution to Ux=b on output */
623 Entry X [ ]
624)
625{
626 Entry x [4], uik, ukk ;
627 Int k, p, len, i ;
628 Int *Ui ;
629 Entry *Ux ;
630
631 switch (nrhs)
632 {
633
634 case 1:
635
636 for (k = 0 ; k < n ; k++)
637 {
638 GET_POINTER (LU, Uip, Ulen, Ui, Ux, k, len) ;
639 x [0] = X [k] ;
640 for (p = 0 ; p < len ; p++)
641 {
642#ifdef COMPLEX
643 if (conj_solve)
644 {
645 /* x [0] -= CONJ (Ux [p]) * X [Ui [p]] ; */
646 MULT_SUB_CONJ (x [0], X [Ui [p]], Ux [p]) ;
647 }
648 else
649#endif
650 {
651 /* x [0] -= Ux [p] * X [Ui [p]] ; */
652 MULT_SUB (x [0], Ux [p], X [Ui [p]]) ;
653 }
654 }
655#ifdef COMPLEX
656 if (conj_solve)
657 {
658 CONJ (ukk, Udiag [k]) ;
659 }
660 else
661#endif
662 {
663 ukk = Udiag [k] ;
664 }
665 DIV (X [k], x [0], ukk) ;
666 }
667 break ;
668
669 case 2:
670
671 for (k = 0 ; k < n ; k++)
672 {
673 GET_POINTER (LU, Uip, Ulen, Ui, Ux, k, len) ;
674 x [0] = X [2*k ] ;
675 x [1] = X [2*k + 1] ;
676 for (p = 0 ; p < len ; p++)
677 {
678 i = Ui [p] ;
679#ifdef COMPLEX
680 if (conj_solve)
681 {
682 CONJ (uik, Ux [p]) ;
683 }
684 else
685#endif
686 {
687 uik = Ux [p] ;
688 }
689 MULT_SUB (x [0], uik, X [2*i]) ;
690 MULT_SUB (x [1], uik, X [2*i + 1]) ;
691 }
692#ifdef COMPLEX
693 if (conj_solve)
694 {
695 CONJ (ukk, Udiag [k]) ;
696 }
697 else
698#endif
699 {
700 ukk = Udiag [k] ;
701 }
702 DIV (X [2*k], x [0], ukk) ;
703 DIV (X [2*k + 1], x [1], ukk) ;
704 }
705 break ;
706
707 case 3:
708
709 for (k = 0 ; k < n ; k++)
710 {
711 GET_POINTER (LU, Uip, Ulen, Ui, Ux, k, len) ;
712 x [0] = X [3*k ] ;
713 x [1] = X [3*k + 1] ;
714 x [2] = X [3*k + 2] ;
715 for (p = 0 ; p < len ; p++)
716 {
717 i = Ui [p] ;
718#ifdef COMPLEX
719 if (conj_solve)
720 {
721 CONJ (uik, Ux [p]) ;
722 }
723 else
724#endif
725 {
726 uik = Ux [p] ;
727 }
728 MULT_SUB (x [0], uik, X [3*i]) ;
729 MULT_SUB (x [1], uik, X [3*i + 1]) ;
730 MULT_SUB (x [2], uik, X [3*i + 2]) ;
731 }
732#ifdef COMPLEX
733 if (conj_solve)
734 {
735 CONJ (ukk, Udiag [k]) ;
736 }
737 else
738#endif
739 {
740 ukk = Udiag [k] ;
741 }
742 DIV (X [3*k], x [0], ukk) ;
743 DIV (X [3*k + 1], x [1], ukk) ;
744 DIV (X [3*k + 2], x [2], ukk) ;
745 }
746 break ;
747
748 case 4:
749
750 for (k = 0 ; k < n ; k++)
751 {
752 GET_POINTER (LU, Uip, Ulen, Ui, Ux, k, len) ;
753 x [0] = X [4*k ] ;
754 x [1] = X [4*k + 1] ;
755 x [2] = X [4*k + 2] ;
756 x [3] = X [4*k + 3] ;
757 for (p = 0 ; p < len ; p++)
758 {
759 i = Ui [p] ;
760#ifdef COMPLEX
761 if (conj_solve)
762 {
763 CONJ (uik, Ux [p]) ;
764 }
765 else
766#endif
767 {
768 uik = Ux [p] ;
769 }
770 MULT_SUB (x [0], uik, X [4*i]) ;
771 MULT_SUB (x [1], uik, X [4*i + 1]) ;
772 MULT_SUB (x [2], uik, X [4*i + 2]) ;
773 MULT_SUB (x [3], uik, X [4*i + 3]) ;
774 }
775#ifdef COMPLEX
776 if (conj_solve)
777 {
778 CONJ (ukk, Udiag [k]) ;
779 }
780 else
781#endif
782 {
783 ukk = Udiag [k] ;
784 }
785 DIV (X [4*k], x [0], ukk) ;
786 DIV (X [4*k + 1], x [1], ukk) ;
787 DIV (X [4*k + 2], x [2], ukk) ;
788 DIV (X [4*k + 3], x [3], ukk) ;
789 }
790 break ;
791 }
792}
793
794#endif /* KLU2_HPP */
int Ai[]
Row values.
Definition klu2_simple.cpp:54
int Ap[]
Column offsets.
Definition klu2_simple.cpp:52