Amesos2 - Direct Sparse Solver Interfaces Version of the Day
klu2_tsolve.hpp
1/* ========================================================================== */
2/* === KLU_tsolve =========================================================== */
3/* ========================================================================== */
4// @HEADER
5// *****************************************************************************
6// KLU2: A Direct Linear Solver package
7//
8// Copyright 2011 NTESS and the KLU2 contributors.
9// SPDX-License-Identifier: LGPL-2.1-or-later
10// *****************************************************************************
11// @HEADER
12
13/* Solve A'x=b using the symbolic and numeric objects from KLU_analyze
14 * (or KLU_analyze_given) and KLU_factor. Note that no iterative refinement is
15 * performed. Uses Numeric->Xwork as workspace (undefined on input and output),
16 * of size 4n Entry's (note that columns 2 to 4 of Xwork overlap with
17 * Numeric->Iwork).
18 */
19
20#ifndef KLU2_TSOLVE_HPP
21#define KLU2_TSOLVE_HPP
22
23#include "klu2_internal.h"
24#include "klu2.hpp"
25
26template <typename Entry, typename Int>
27Int KLU_tsolve
28(
29 /* inputs, not modified */
30 KLU_symbolic<Entry, Int> *Symbolic,
31 KLU_numeric<Entry, Int> *Numeric,
32 Int d, /* leading dimension of B */
33 Int nrhs, /* number of right-hand-sides */
34
35 /* right-hand-side on input, overwritten with solution to Ax=b on output */
36 Entry B [ ], /* size n*nrhs, in column-oriented form, with
37 * leading dimension d. */
38#ifdef COMPLEX
39 Int conj_solve, /* TRUE for conjugate transpose solve, FALSE for
40 * array transpose solve. Used for the complex
41 * case only. */
42#endif
43 /* --------------- */
44 KLU_common<Entry, Int> *Common
45)
46{
47 Entry x [4], offik, s ;
48 double rs, *Rs ;
49 Entry *Offx, *X, *Bz, *Udiag ;
50 Int *Q, *R, *Pnum, *Offp, *Offi, *Lip, *Uip, *Llen, *Ulen ;
51 Unit **LUbx ;
52 Int k1, k2, nk, k, block, pend, n, p, nblocks, chunk, nr, i ;
53
54 /* ---------------------------------------------------------------------- */
55 /* check inputs */
56 /* ---------------------------------------------------------------------- */
57
58 if (Common == NULL)
59 {
60 return (FALSE) ;
61 }
62 if (Numeric == NULL || Symbolic == NULL || d < Symbolic->n || nrhs < 0 ||
63 B == NULL)
64 {
65 Common->status = KLU_INVALID ;
66 return (FALSE) ;
67 }
68 Common->status = KLU_OK ;
69
70 /* ---------------------------------------------------------------------- */
71 /* get the contents of the Symbolic object */
72 /* ---------------------------------------------------------------------- */
73
74 Bz = (Entry *) B ;
75 n = Symbolic->n ;
76 nblocks = Symbolic->nblocks ;
77 Q = Symbolic->Q ;
78 R = Symbolic->R ;
79
80 /* ---------------------------------------------------------------------- */
81 /* get the contents of the Numeric object */
82 /* ---------------------------------------------------------------------- */
83
84 ASSERT (nblocks == Numeric->nblocks) ;
85 Pnum = Numeric->Pnum ;
86 Offp = Numeric->Offp ;
87 Offi = Numeric->Offi ;
88 Offx = (Entry *) Numeric->Offx ;
89
90 Lip = Numeric->Lip ;
91 Llen = Numeric->Llen ;
92 Uip = Numeric->Uip ;
93 Ulen = Numeric->Ulen ;
94 LUbx = (Unit **) Numeric->LUbx ;
95 Udiag = (Entry *) Numeric->Udiag ;
96
97 Rs = Numeric->Rs ;
98 X = (Entry *) Numeric->Xwork ;
99 ASSERT (KLU_valid (n, Offp, Offi, Offx)) ;
100
101 /* ---------------------------------------------------------------------- */
102 /* solve in chunks of 4 columns at a time */
103 /* ---------------------------------------------------------------------- */
104
105 for (chunk = 0 ; chunk < nrhs ; chunk += 4)
106 {
107
108 /* ------------------------------------------------------------------ */
109 /* get the size of the current chunk */
110 /* ------------------------------------------------------------------ */
111
112 nr = MIN (nrhs - chunk, 4) ;
113
114 /* ------------------------------------------------------------------ */
115 /* permute the right hand side, X = Q'*B */
116 /* ------------------------------------------------------------------ */
117
118 switch (nr)
119 {
120
121 case 1:
122
123 for (k = 0 ; k < n ; k++)
124 {
125 X [k] = Bz [Q [k]] ;
126 }
127 break ;
128
129 case 2:
130
131 for (k = 0 ; k < n ; k++)
132 {
133 i = Q [k] ;
134 X [2*k ] = Bz [i ] ;
135 X [2*k + 1] = Bz [i + d ] ;
136 }
137 break ;
138
139 case 3:
140
141 for (k = 0 ; k < n ; k++)
142 {
143 i = Q [k] ;
144 X [3*k ] = Bz [i ] ;
145 X [3*k + 1] = Bz [i + d ] ;
146 X [3*k + 2] = Bz [i + d*2] ;
147 }
148 break ;
149
150 case 4:
151
152 for (k = 0 ; k < n ; k++)
153 {
154 i = Q [k] ;
155 X [4*k ] = Bz [i ] ;
156 X [4*k + 1] = Bz [i + d ] ;
157 X [4*k + 2] = Bz [i + d*2] ;
158 X [4*k + 3] = Bz [i + d*3] ;
159 }
160 break ;
161
162 }
163
164 /* ------------------------------------------------------------------ */
165 /* solve X = (L*U + Off)'\X */
166 /* ------------------------------------------------------------------ */
167
168 for (block = 0 ; block < nblocks ; block++)
169 {
170
171 /* -------------------------------------------------------------- */
172 /* the block of size nk is from rows/columns k1 to k2-1 */
173 /* -------------------------------------------------------------- */
174
175 k1 = R [block] ;
176 k2 = R [block+1] ;
177 nk = k2 - k1 ;
178 PRINTF (("tsolve %d, k1 %d k2-1 %d nk %d\n", block, k1,k2-1,nk)) ;
179
180 /* -------------------------------------------------------------- */
181 /* block back-substitution for the off-diagonal-block entries */
182 /* -------------------------------------------------------------- */
183
184 if (block > 0)
185 {
186 switch (nr)
187 {
188
189 case 1:
190
191 for (k = k1 ; k < k2 ; k++)
192 {
193 pend = Offp [k+1] ;
194 for (p = Offp [k] ; p < pend ; p++)
195 {
196#ifdef COMPLEX
197 if (conj_solve)
198 {
199 MULT_SUB_CONJ (X [k], X [Offi [p]],
200 Offx [p]) ;
201 }
202 else
203#endif
204 {
205 MULT_SUB (X [k], Offx [p], X [Offi [p]]) ;
206 }
207 }
208 }
209 break ;
210
211 case 2:
212
213 for (k = k1 ; k < k2 ; k++)
214 {
215 pend = Offp [k+1] ;
216 x [0] = X [2*k ] ;
217 x [1] = X [2*k + 1] ;
218 for (p = Offp [k] ; p < pend ; p++)
219 {
220 i = Offi [p] ;
221#ifdef COMPLEX
222 if (conj_solve)
223 {
224 CONJ (offik, Offx [p]) ;
225 }
226 else
227#endif
228 {
229 offik = Offx [p] ;
230 }
231 MULT_SUB (x [0], offik, X [2*i]) ;
232 MULT_SUB (x [1], offik, X [2*i + 1]) ;
233 }
234 X [2*k ] = x [0] ;
235 X [2*k + 1] = x [1] ;
236 }
237 break ;
238
239 case 3:
240
241 for (k = k1 ; k < k2 ; k++)
242 {
243 pend = Offp [k+1] ;
244 x [0] = X [3*k ] ;
245 x [1] = X [3*k + 1] ;
246 x [2] = X [3*k + 2] ;
247 for (p = Offp [k] ; p < pend ; p++)
248 {
249 i = Offi [p] ;
250#ifdef COMPLEX
251 if (conj_solve)
252 {
253 CONJ (offik, Offx [p]) ;
254 }
255 else
256#endif
257 {
258 offik = Offx [p] ;
259 }
260 MULT_SUB (x [0], offik, X [3*i]) ;
261 MULT_SUB (x [1], offik, X [3*i + 1]) ;
262 MULT_SUB (x [2], offik, X [3*i + 2]) ;
263 }
264 X [3*k ] = x [0] ;
265 X [3*k + 1] = x [1] ;
266 X [3*k + 2] = x [2] ;
267 }
268 break ;
269
270 case 4:
271
272 for (k = k1 ; k < k2 ; k++)
273 {
274 pend = Offp [k+1] ;
275 x [0] = X [4*k ] ;
276 x [1] = X [4*k + 1] ;
277 x [2] = X [4*k + 2] ;
278 x [3] = X [4*k + 3] ;
279 for (p = Offp [k] ; p < pend ; p++)
280 {
281 i = Offi [p] ;
282#ifdef COMPLEX
283 if (conj_solve)
284 {
285 CONJ(offik, Offx [p]) ;
286 }
287 else
288#endif
289 {
290 offik = Offx [p] ;
291 }
292 MULT_SUB (x [0], offik, X [4*i]) ;
293 MULT_SUB (x [1], offik, X [4*i + 1]) ;
294 MULT_SUB (x [2], offik, X [4*i + 2]) ;
295 MULT_SUB (x [3], offik, X [4*i + 3]) ;
296 }
297 X [4*k ] = x [0] ;
298 X [4*k + 1] = x [1] ;
299 X [4*k + 2] = x [2] ;
300 X [4*k + 3] = x [3] ;
301 }
302 break ;
303 }
304 }
305
306 /* -------------------------------------------------------------- */
307 /* solve the block system */
308 /* -------------------------------------------------------------- */
309
310 if (nk == 1)
311 {
312#ifdef COMPLEX
313 if (conj_solve)
314 {
315 CONJ (s, Udiag [k1]) ;
316 }
317 else
318#endif
319 {
320 s = Udiag [k1] ;
321 }
322 switch (nr)
323 {
324
325 case 1:
326 DIV (X [k1], X [k1], s) ;
327 break ;
328
329 case 2:
330 DIV (X [2*k1], X [2*k1], s) ;
331 DIV (X [2*k1 + 1], X [2*k1 + 1], s) ;
332 break ;
333
334 case 3:
335 DIV (X [3*k1], X [3*k1], s) ;
336 DIV (X [3*k1 + 1], X [3*k1 + 1], s) ;
337 DIV (X [3*k1 + 2], X [3*k1 + 2], s) ;
338 break ;
339
340 case 4:
341 DIV (X [4*k1], X [4*k1], s) ;
342 DIV (X [4*k1 + 1], X [4*k1 + 1], s) ;
343 DIV (X [4*k1 + 2], X [4*k1 + 2], s) ;
344 DIV (X [4*k1 + 3], X [4*k1 + 3], s) ;
345 break ;
346
347 }
348 }
349 else
350 {
351 KLU_utsolve (nk, Uip + k1, Ulen + k1, LUbx [block],
352 Udiag + k1, nr,
353#ifdef COMPLEX
354 conj_solve,
355#endif
356 X + nr*k1) ;
357 KLU_ltsolve (nk, Lip + k1, Llen + k1, LUbx [block], nr,
358#ifdef COMPLEX
359 conj_solve,
360#endif
361 X + nr*k1) ;
362 }
363 }
364
365 /* ------------------------------------------------------------------ */
366 /* scale and permute the result, Bz = P'(R\X) */
367 /* ------------------------------------------------------------------ */
368
369 if (Rs == NULL)
370 {
371
372 /* no scaling */
373 switch (nr)
374 {
375
376 case 1:
377
378 for (k = 0 ; k < n ; k++)
379 {
380 Bz [Pnum [k]] = X [k] ;
381 }
382 break ;
383
384 case 2:
385
386 for (k = 0 ; k < n ; k++)
387 {
388 i = Pnum [k] ;
389 Bz [i ] = X [2*k ] ;
390 Bz [i + d ] = X [2*k + 1] ;
391 }
392 break ;
393
394 case 3:
395
396 for (k = 0 ; k < n ; k++)
397 {
398 i = Pnum [k] ;
399 Bz [i ] = X [3*k ] ;
400 Bz [i + d ] = X [3*k + 1] ;
401 Bz [i + d*2] = X [3*k + 2] ;
402 }
403 break ;
404
405 case 4:
406
407 for (k = 0 ; k < n ; k++)
408 {
409 i = Pnum [k] ;
410 Bz [i ] = X [4*k ] ;
411 Bz [i + d ] = X [4*k + 1] ;
412 Bz [i + d*2] = X [4*k + 2] ;
413 Bz [i + d*3] = X [4*k + 3] ;
414 }
415 break ;
416 }
417
418 }
419 else
420 {
421
422 switch (nr)
423 {
424
425 case 1:
426
427 for (k = 0 ; k < n ; k++)
428 {
429 SCALE_DIV_ASSIGN (Bz [Pnum [k]], X [k], Rs [k]) ;
430 }
431 break ;
432
433 case 2:
434
435 for (k = 0 ; k < n ; k++)
436 {
437 i = Pnum [k] ;
438 rs = Rs [k] ;
439 SCALE_DIV_ASSIGN (Bz [i], X [2*k], rs) ;
440 SCALE_DIV_ASSIGN (Bz [i + d], X [2*k + 1], rs) ;
441 }
442 break ;
443
444 case 3:
445
446 for (k = 0 ; k < n ; k++)
447 {
448 i = Pnum [k] ;
449 rs = Rs [k] ;
450 SCALE_DIV_ASSIGN (Bz [i], X [3*k], rs) ;
451 SCALE_DIV_ASSIGN (Bz [i + d], X [3*k + 1], rs) ;
452 SCALE_DIV_ASSIGN (Bz [i + d*2], X [3*k + 2], rs) ;
453 }
454 break ;
455
456 case 4:
457
458 for (k = 0 ; k < n ; k++)
459 {
460 i = Pnum [k] ;
461 rs = Rs [k] ;
462 SCALE_DIV_ASSIGN (Bz [i], X [4*k], rs) ;
463 SCALE_DIV_ASSIGN (Bz [i + d], X [4*k + 1], rs) ;
464 SCALE_DIV_ASSIGN (Bz [i + d*2], X [4*k + 2], rs) ;
465 SCALE_DIV_ASSIGN (Bz [i + d*3], X [4*k + 3], rs) ;
466 }
467 break ;
468 }
469 }
470
471 /* ------------------------------------------------------------------ */
472 /* go to the next chunk of B */
473 /* ------------------------------------------------------------------ */
474
475 Bz += d*4 ;
476 }
477 return (TRUE) ;
478}
479
480
481// Require Xout and B to have equal number of entries, pre-allocated
482template <typename Entry, typename Int>
483Int KLU_tsolve2
484(
485 /* inputs, not modified */
486 KLU_symbolic<Entry, Int> *Symbolic,
487 KLU_numeric<Entry, Int> *Numeric,
488 Int d, /* leading dimension of B */
489 Int nrhs, /* number of right-hand-sides */
490
491 /* right-hand-side on input, overwritten with solution to Ax=b on output */
492 Entry B [ ], /* size n*1, in column-oriented form, with
493 * leading dimension d. */
494 Entry Xout [ ], /* size n*1, in column-oriented form, with
495 * leading dimension d. */
496#ifdef COMPLEX
497 Int conj_solve, /* TRUE for conjugate transpose solve, FALSE for
498 * array transpose solve. Used for the complex
499 * case only. */
500#endif
501 /* --------------- */
502 KLU_common<Entry, Int> *Common
503)
504{
505 Entry x [4], offik, s ;
506 double rs, *Rs ;
507 Entry *Offx, *X, *Bz, *Udiag ;
508 Int *Q, *R, *Pnum, *Offp, *Offi, *Lip, *Uip, *Llen, *Ulen ;
509 Unit **LUbx ;
510 Int k1, k2, nk, k, block, pend, n, p, nblocks, chunk, nr, i ;
511
512 /* ---------------------------------------------------------------------- */
513 /* check inputs */
514 /* ---------------------------------------------------------------------- */
515
516 if (Common == NULL)
517 {
518 return (FALSE) ;
519 }
520 if (Numeric == NULL || Symbolic == NULL || d < Symbolic->n || nrhs < 0 ||
521 B == NULL || Xout == NULL)
522 {
523 Common->status = KLU_INVALID ;
524 return (FALSE) ;
525 }
526 Common->status = KLU_OK ;
527
528 /* ---------------------------------------------------------------------- */
529 /* get the contents of the Symbolic object */
530 /* ---------------------------------------------------------------------- */
531
532 Bz = (Entry *) B ;
533 n = Symbolic->n ;
534 nblocks = Symbolic->nblocks ;
535 Q = Symbolic->Q ;
536 R = Symbolic->R ;
537
538 /* ---------------------------------------------------------------------- */
539 /* get the contents of the Numeric object */
540 /* ---------------------------------------------------------------------- */
541
542 ASSERT (nblocks == Numeric->nblocks) ;
543 Pnum = Numeric->Pnum ;
544 Offp = Numeric->Offp ;
545 Offi = Numeric->Offi ;
546 Offx = (Entry *) Numeric->Offx ;
547
548 Lip = Numeric->Lip ;
549 Llen = Numeric->Llen ;
550 Uip = Numeric->Uip ;
551 Ulen = Numeric->Ulen ;
552 LUbx = (Unit **) Numeric->LUbx ;
553 Udiag = (Entry *) Numeric->Udiag ;
554
555 Rs = Numeric->Rs ;
556 X = (Entry *) Numeric->Xwork ;
557 ASSERT (KLU_valid (n, Offp, Offi, Offx)) ;
558
559 /* ---------------------------------------------------------------------- */
560 /* solve in chunks of 4 columns at a time */
561 /* ---------------------------------------------------------------------- */
562
563 for (chunk = 0 ; chunk < nrhs ; chunk += 4)
564 {
565
566 /* ------------------------------------------------------------------ */
567 /* get the size of the current chunk */
568 /* ------------------------------------------------------------------ */
569
570 nr = MIN (nrhs - chunk, 4) ;
571
572 /* ------------------------------------------------------------------ */
573 /* permute the right hand side, X = Q'*B */
574 /* ------------------------------------------------------------------ */
575
576 switch (nr)
577 {
578
579 case 1:
580
581 for (k = 0 ; k < n ; k++)
582 {
583 X [k] = Bz [Q [k]] ;
584 }
585 break ;
586
587 case 2:
588
589 for (k = 0 ; k < n ; k++)
590 {
591 i = Q [k] ;
592 X [2*k ] = Bz [i ] ;
593 X [2*k + 1] = Bz [i + d ] ;
594 }
595 break ;
596
597 case 3:
598
599 for (k = 0 ; k < n ; k++)
600 {
601 i = Q [k] ;
602 X [3*k ] = Bz [i ] ;
603 X [3*k + 1] = Bz [i + d ] ;
604 X [3*k + 2] = Bz [i + d*2] ;
605 }
606 break ;
607
608 case 4:
609
610 for (k = 0 ; k < n ; k++)
611 {
612 i = Q [k] ;
613 X [4*k ] = Bz [i ] ;
614 X [4*k + 1] = Bz [i + d ] ;
615 X [4*k + 2] = Bz [i + d*2] ;
616 X [4*k + 3] = Bz [i + d*3] ;
617 }
618 break ;
619
620 }
621
622 /* ------------------------------------------------------------------ */
623 /* solve X = (L*U + Off)'\X */
624 /* ------------------------------------------------------------------ */
625
626 for (block = 0 ; block < nblocks ; block++)
627 {
628
629 /* -------------------------------------------------------------- */
630 /* the block of size nk is from rows/columns k1 to k2-1 */
631 /* -------------------------------------------------------------- */
632
633 k1 = R [block] ;
634 k2 = R [block+1] ;
635 nk = k2 - k1 ;
636 PRINTF (("tsolve %d, k1 %d k2-1 %d nk %d\n", block, k1,k2-1,nk)) ;
637
638 /* -------------------------------------------------------------- */
639 /* block back-substitution for the off-diagonal-block entries */
640 /* -------------------------------------------------------------- */
641
642 if (block > 0)
643 {
644 switch (nr)
645 {
646
647 case 1:
648
649 for (k = k1 ; k < k2 ; k++)
650 {
651 pend = Offp [k+1] ;
652 for (p = Offp [k] ; p < pend ; p++)
653 {
654#ifdef COMPLEX
655 if (conj_solve)
656 {
657 MULT_SUB_CONJ (X [k], X [Offi [p]],
658 Offx [p]) ;
659 }
660 else
661#endif
662 {
663 MULT_SUB (X [k], Offx [p], X [Offi [p]]) ;
664 }
665 }
666 }
667 break ;
668
669 case 2:
670
671 for (k = k1 ; k < k2 ; k++)
672 {
673 pend = Offp [k+1] ;
674 x [0] = X [2*k ] ;
675 x [1] = X [2*k + 1] ;
676 for (p = Offp [k] ; p < pend ; p++)
677 {
678 i = Offi [p] ;
679#ifdef COMPLEX
680 if (conj_solve)
681 {
682 CONJ (offik, Offx [p]) ;
683 }
684 else
685#endif
686 {
687 offik = Offx [p] ;
688 }
689 MULT_SUB (x [0], offik, X [2*i]) ;
690 MULT_SUB (x [1], offik, X [2*i + 1]) ;
691 }
692 X [2*k ] = x [0] ;
693 X [2*k + 1] = x [1] ;
694 }
695 break ;
696
697 case 3:
698
699 for (k = k1 ; k < k2 ; k++)
700 {
701 pend = Offp [k+1] ;
702 x [0] = X [3*k ] ;
703 x [1] = X [3*k + 1] ;
704 x [2] = X [3*k + 2] ;
705 for (p = Offp [k] ; p < pend ; p++)
706 {
707 i = Offi [p] ;
708#ifdef COMPLEX
709 if (conj_solve)
710 {
711 CONJ (offik, Offx [p]) ;
712 }
713 else
714#endif
715 {
716 offik = Offx [p] ;
717 }
718 MULT_SUB (x [0], offik, X [3*i]) ;
719 MULT_SUB (x [1], offik, X [3*i + 1]) ;
720 MULT_SUB (x [2], offik, X [3*i + 2]) ;
721 }
722 X [3*k ] = x [0] ;
723 X [3*k + 1] = x [1] ;
724 X [3*k + 2] = x [2] ;
725 }
726 break ;
727
728 case 4:
729
730 for (k = k1 ; k < k2 ; k++)
731 {
732 pend = Offp [k+1] ;
733 x [0] = X [4*k ] ;
734 x [1] = X [4*k + 1] ;
735 x [2] = X [4*k + 2] ;
736 x [3] = X [4*k + 3] ;
737 for (p = Offp [k] ; p < pend ; p++)
738 {
739 i = Offi [p] ;
740#ifdef COMPLEX
741 if (conj_solve)
742 {
743 CONJ(offik, Offx [p]) ;
744 }
745 else
746#endif
747 {
748 offik = Offx [p] ;
749 }
750 MULT_SUB (x [0], offik, X [4*i]) ;
751 MULT_SUB (x [1], offik, X [4*i + 1]) ;
752 MULT_SUB (x [2], offik, X [4*i + 2]) ;
753 MULT_SUB (x [3], offik, X [4*i + 3]) ;
754 }
755 X [4*k ] = x [0] ;
756 X [4*k + 1] = x [1] ;
757 X [4*k + 2] = x [2] ;
758 X [4*k + 3] = x [3] ;
759 }
760 break ;
761 }
762 }
763
764 /* -------------------------------------------------------------- */
765 /* solve the block system */
766 /* -------------------------------------------------------------- */
767
768 if (nk == 1)
769 {
770#ifdef COMPLEX
771 if (conj_solve)
772 {
773 CONJ (s, Udiag [k1]) ;
774 }
775 else
776#endif
777 {
778 s = Udiag [k1] ;
779 }
780 switch (nr)
781 {
782
783 case 1:
784 DIV (X [k1], X [k1], s) ;
785 break ;
786
787 case 2:
788 DIV (X [2*k1], X [2*k1], s) ;
789 DIV (X [2*k1 + 1], X [2*k1 + 1], s) ;
790 break ;
791
792 case 3:
793 DIV (X [3*k1], X [3*k1], s) ;
794 DIV (X [3*k1 + 1], X [3*k1 + 1], s) ;
795 DIV (X [3*k1 + 2], X [3*k1 + 2], s) ;
796 break ;
797
798 case 4:
799 DIV (X [4*k1], X [4*k1], s) ;
800 DIV (X [4*k1 + 1], X [4*k1 + 1], s) ;
801 DIV (X [4*k1 + 2], X [4*k1 + 2], s) ;
802 DIV (X [4*k1 + 3], X [4*k1 + 3], s) ;
803 break ;
804
805 }
806 }
807 else
808 {
809 KLU_utsolve (nk, Uip + k1, Ulen + k1, LUbx [block],
810 Udiag + k1, nr,
811#ifdef COMPLEX
812 conj_solve,
813#endif
814 X + nr*k1) ;
815 KLU_ltsolve (nk, Lip + k1, Llen + k1, LUbx [block], nr,
816#ifdef COMPLEX
817 conj_solve,
818#endif
819 X + nr*k1) ;
820 }
821 }
822
823 /* ------------------------------------------------------------------ */
824 /* scale and permute the result, Bz = P'(R\X) */
825 /* store result in Xout */
826 /* ------------------------------------------------------------------ */
827
828 if (Rs == NULL)
829 {
830
831 /* no scaling */
832 switch (nr)
833 {
834
835 case 1:
836
837 for (k = 0 ; k < n ; k++)
838 {
839 Xout [Pnum [k]] = X [k] ;
840 }
841 break ;
842
843 case 2:
844
845 for (k = 0 ; k < n ; k++)
846 {
847 i = Pnum [k] ;
848 Xout [i ] = X [2*k ] ;
849 Xout [i + d ] = X [2*k + 1] ;
850 }
851 break ;
852
853 case 3:
854
855 for (k = 0 ; k < n ; k++)
856 {
857 i = Pnum [k] ;
858 Xout [i ] = X [3*k ] ;
859 Xout [i + d ] = X [3*k + 1] ;
860 Xout [i + d*2] = X [3*k + 2] ;
861 }
862 break ;
863
864 case 4:
865
866 for (k = 0 ; k < n ; k++)
867 {
868 i = Pnum [k] ;
869 Xout [i ] = X [4*k ] ;
870 Xout [i + d ] = X [4*k + 1] ;
871 Xout [i + d*2] = X [4*k + 2] ;
872 Xout [i + d*3] = X [4*k + 3] ;
873 }
874 break ;
875 }
876
877 }
878 else
879 {
880
881 switch (nr)
882 {
883
884 case 1:
885
886 for (k = 0 ; k < n ; k++)
887 {
888 SCALE_DIV_ASSIGN (Xout [Pnum [k]], X [k], Rs [k]) ;
889 }
890 break ;
891
892 case 2:
893
894 for (k = 0 ; k < n ; k++)
895 {
896 i = Pnum [k] ;
897 rs = Rs [k] ;
898 SCALE_DIV_ASSIGN (Xout [i], X [2*k], rs) ;
899 SCALE_DIV_ASSIGN (Xout [i + d], X [2*k + 1], rs) ;
900 }
901 break ;
902
903 case 3:
904
905 for (k = 0 ; k < n ; k++)
906 {
907 i = Pnum [k] ;
908 rs = Rs [k] ;
909 SCALE_DIV_ASSIGN (Xout [i], X [3*k], rs) ;
910 SCALE_DIV_ASSIGN (Xout [i + d], X [3*k + 1], rs) ;
911 SCALE_DIV_ASSIGN (Xout [i + d*2], X [3*k + 2], rs) ;
912 }
913 break ;
914
915 case 4:
916
917 for (k = 0 ; k < n ; k++)
918 {
919 i = Pnum [k] ;
920 rs = Rs [k] ;
921 SCALE_DIV_ASSIGN (Xout [i], X [4*k], rs) ;
922 SCALE_DIV_ASSIGN (Xout [i + d], X [4*k + 1], rs) ;
923 SCALE_DIV_ASSIGN (Xout [i + d*2], X [4*k + 2], rs) ;
924 SCALE_DIV_ASSIGN (Xout [i + d*3], X [4*k + 3], rs) ;
925 }
926 break ;
927 }
928 }
929
930 /* ------------------------------------------------------------------ */
931 /* go to the next chunk of B */
932 /* ------------------------------------------------------------------ */
933
934 Xout += d*4 ;
935 }
936 return (TRUE) ;
937}
938
939#endif