Amesos2 - Direct Sparse Solver Interfaces Version of the Day
klu2_solve.hpp
1/* ========================================================================== */
2/* === KLU_solve ============================================================ */
3/* ========================================================================== */
4// @HEADER
5// *****************************************************************************
6// KLU2: A Direct Linear Solver package
7//
8// Copyright 2011 NTESS and the KLU2 contributors.
9// SPDX-License-Identifier: LGPL-2.1-or-later
10// *****************************************************************************
11// @HEADER
12
13/* Solve Ax=b using the symbolic and numeric objects from KLU_analyze
14 * (or KLU_analyze_given) and KLU_factor. Note that no iterative refinement is
15 * performed. Uses Numeric->Xwork as workspace (undefined on input and output),
16 * of size 4n Entry's (note that columns 2 to 4 of Xwork overlap with
17 * Numeric->Iwork).
18 */
19
20#ifndef KLU2_SOLVE_HPP
21#define KLU2_SOLVE_HPP
22
23#include "klu2_internal.h"
24#include "klu2.hpp"
25
26template <typename Entry, typename Int>
27Int KLU_solve
28(
29 /* inputs, not modified */
30 KLU_symbolic<Entry, Int> *Symbolic,
31 KLU_numeric<Entry, Int> *Numeric,
32 Int d, /* leading dimension of B */
33 Int nrhs, /* number of right-hand-sides */
34
35 /* right-hand-side on input, overwritten with solution to Ax=b on output */
36 /* TODO: Switched from double to Entry, check for all cases */
37 Entry B [ ], /* size n*nrhs, in column-oriented form, with
38 * leading dimension d. */
39 /* --------------- */
40 KLU_common<Entry, Int> *Common
41)
42{
43 Entry x [4], offik, s ;
44 double rs, *Rs ;
45 Entry *Offx, *X, *Bz, *Udiag ;
46 Int *Q, *R, *Pnum, *Offp, *Offi, *Lip, *Uip, *Llen, *Ulen ;
47 Unit **LUbx ;
48 Int k1, k2, nk, k, block, pend, n, p, nblocks, chunk, nr, i ;
49
50 /* ---------------------------------------------------------------------- */
51 /* check inputs */
52 /* ---------------------------------------------------------------------- */
53
54 if (Common == NULL)
55 {
56 return (FALSE) ;
57 }
58 if (Numeric == NULL || Symbolic == NULL || d < Symbolic->n || nrhs < 0 ||
59 B == NULL)
60 {
61 Common->status = KLU_INVALID ;
62 return (FALSE) ;
63 }
64 Common->status = KLU_OK ;
65
66 /* ---------------------------------------------------------------------- */
67 /* get the contents of the Symbolic object */
68 /* ---------------------------------------------------------------------- */
69
70 Bz = (Entry *) B ;
71 n = Symbolic->n ;
72 nblocks = Symbolic->nblocks ;
73 Q = Symbolic->Q ;
74 R = Symbolic->R ;
75
76 /* ---------------------------------------------------------------------- */
77 /* get the contents of the Numeric object */
78 /* ---------------------------------------------------------------------- */
79
80 ASSERT (nblocks == Numeric->nblocks) ;
81 Pnum = Numeric->Pnum ;
82 Offp = Numeric->Offp ;
83 Offi = Numeric->Offi ;
84 Offx = (Entry *) Numeric->Offx ;
85
86 Lip = Numeric->Lip ;
87 Llen = Numeric->Llen ;
88 Uip = Numeric->Uip ;
89 Ulen = Numeric->Ulen ;
90 LUbx = (Unit **) Numeric->LUbx ;
91 Udiag = (Entry *) Numeric->Udiag ;
92
93 Rs = Numeric->Rs ;
94 X = (Entry *) Numeric->Xwork ;
95
96 ASSERT (KLU_valid (n, Offp, Offi, Offx)) ;
97
98 /* ---------------------------------------------------------------------- */
99 /* solve in chunks of 4 columns at a time */
100 /* ---------------------------------------------------------------------- */
101
102 for (chunk = 0 ; chunk < nrhs ; chunk += 4)
103 {
104
105 /* ------------------------------------------------------------------ */
106 /* get the size of the current chunk */
107 /* ------------------------------------------------------------------ */
108
109 nr = MIN (nrhs - chunk, 4) ;
110
111 /* ------------------------------------------------------------------ */
112 /* scale and permute the right hand side, X = P*(R\B) */
113 /* ------------------------------------------------------------------ */
114
115 if (Rs == NULL)
116 {
117
118 /* no scaling */
119 switch (nr)
120 {
121
122 case 1:
123
124 for (k = 0 ; k < n ; k++)
125 {
126 X [k] = Bz [Pnum [k]] ;
127 }
128 break ;
129
130 case 2:
131
132 for (k = 0 ; k < n ; k++)
133 {
134 i = Pnum [k] ;
135 X [2*k ] = Bz [i ] ;
136 X [2*k + 1] = Bz [i + d ] ;
137 }
138 break ;
139
140 case 3:
141
142 for (k = 0 ; k < n ; k++)
143 {
144 i = Pnum [k] ;
145 X [3*k ] = Bz [i ] ;
146 X [3*k + 1] = Bz [i + d ] ;
147 X [3*k + 2] = Bz [i + d*2] ;
148 }
149 break ;
150
151 case 4:
152
153 for (k = 0 ; k < n ; k++)
154 {
155 i = Pnum [k] ;
156 X [4*k ] = Bz [i ] ;
157 X [4*k + 1] = Bz [i + d ] ;
158 X [4*k + 2] = Bz [i + d*2] ;
159 X [4*k + 3] = Bz [i + d*3] ;
160 }
161 break ;
162 }
163
164 }
165 else
166 {
167
168 switch (nr)
169 {
170
171 case 1:
172
173 for (k = 0 ; k < n ; k++)
174 {
175 SCALE_DIV_ASSIGN (X [k], Bz [Pnum [k]], Rs [k]) ;
176 }
177 break ;
178
179 case 2:
180
181 for (k = 0 ; k < n ; k++)
182 {
183 i = Pnum [k] ;
184 rs = Rs [k] ;
185 SCALE_DIV_ASSIGN (X [2*k], Bz [i], rs) ;
186 SCALE_DIV_ASSIGN (X [2*k + 1], Bz [i + d], rs) ;
187 }
188 break ;
189
190 case 3:
191
192 for (k = 0 ; k < n ; k++)
193 {
194 i = Pnum [k] ;
195 rs = Rs [k] ;
196 SCALE_DIV_ASSIGN (X [3*k], Bz [i], rs) ;
197 SCALE_DIV_ASSIGN (X [3*k + 1], Bz [i + d], rs) ;
198 SCALE_DIV_ASSIGN (X [3*k + 2], Bz [i + d*2], rs) ;
199 }
200 break ;
201
202 case 4:
203
204 for (k = 0 ; k < n ; k++)
205 {
206 i = Pnum [k] ;
207 rs = Rs [k] ;
208 SCALE_DIV_ASSIGN (X [4*k], Bz [i], rs) ;
209 SCALE_DIV_ASSIGN (X [4*k + 1], Bz [i + d], rs) ;
210 SCALE_DIV_ASSIGN (X [4*k + 2], Bz [i + d*2], rs) ;
211 SCALE_DIV_ASSIGN (X [4*k + 3], Bz [i + d*3], rs) ;
212 }
213 break ;
214 }
215 }
216
217 /* ------------------------------------------------------------------ */
218 /* solve X = (L*U + Off)\X */
219 /* ------------------------------------------------------------------ */
220
221 for (block = nblocks-1 ; block >= 0 ; block--)
222 {
223
224 /* -------------------------------------------------------------- */
225 /* the block of size nk is from rows/columns k1 to k2-1 */
226 /* -------------------------------------------------------------- */
227
228 k1 = R [block] ;
229 k2 = R [block+1] ;
230 nk = k2 - k1 ;
231 PRINTF (("solve %d, k1 %d k2-1 %d nk %d\n", block, k1,k2-1,nk)) ;
232
233 /* solve the block system */
234 if (nk == 1)
235 {
236 s = Udiag [k1] ;
237 switch (nr)
238 {
239
240 case 1:
241 DIV (X [k1], X [k1], s) ;
242 break ;
243
244 case 2:
245 DIV (X [2*k1], X [2*k1], s) ;
246 DIV (X [2*k1 + 1], X [2*k1 + 1], s) ;
247 break ;
248
249 case 3:
250 DIV (X [3*k1], X [3*k1], s) ;
251 DIV (X [3*k1 + 1], X [3*k1 + 1], s) ;
252 DIV (X [3*k1 + 2], X [3*k1 + 2], s) ;
253 break ;
254
255 case 4:
256 DIV (X [4*k1], X [4*k1], s) ;
257 DIV (X [4*k1 + 1], X [4*k1 + 1], s) ;
258 DIV (X [4*k1 + 2], X [4*k1 + 2], s) ;
259 DIV (X [4*k1 + 3], X [4*k1 + 3], s) ;
260 break ;
261
262 }
263 }
264 else
265 {
266 KLU_lsolve (nk, Lip + k1, Llen + k1, LUbx [block], nr,
267 X + nr*k1) ;
268 KLU_usolve (nk, Uip + k1, Ulen + k1, LUbx [block],
269 Udiag + k1, nr, X + nr*k1) ;
270 }
271
272 /* -------------------------------------------------------------- */
273 /* block back-substitution for the off-diagonal-block entries */
274 /* -------------------------------------------------------------- */
275
276 if (block > 0)
277 {
278 switch (nr)
279 {
280
281 case 1:
282
283 for (k = k1 ; k < k2 ; k++)
284 {
285 pend = Offp [k+1] ;
286 x [0] = X [k] ;
287 for (p = Offp [k] ; p < pend ; p++)
288 {
289 MULT_SUB (X [Offi [p]], Offx [p], x [0]) ;
290 }
291 }
292 break ;
293
294 case 2:
295
296 for (k = k1 ; k < k2 ; k++)
297 {
298 pend = Offp [k+1] ;
299 x [0] = X [2*k ] ;
300 x [1] = X [2*k + 1] ;
301 for (p = Offp [k] ; p < pend ; p++)
302 {
303 i = Offi [p] ;
304 offik = Offx [p] ;
305 MULT_SUB (X [2*i], offik, x [0]) ;
306 MULT_SUB (X [2*i + 1], offik, x [1]) ;
307 }
308 }
309 break ;
310
311 case 3:
312
313 for (k = k1 ; k < k2 ; k++)
314 {
315 pend = Offp [k+1] ;
316 x [0] = X [3*k ] ;
317 x [1] = X [3*k + 1] ;
318 x [2] = X [3*k + 2] ;
319 for (p = Offp [k] ; p < pend ; p++)
320 {
321 i = Offi [p] ;
322 offik = Offx [p] ;
323 MULT_SUB (X [3*i], offik, x [0]) ;
324 MULT_SUB (X [3*i + 1], offik, x [1]) ;
325 MULT_SUB (X [3*i + 2], offik, x [2]) ;
326 }
327 }
328 break ;
329
330 case 4:
331
332 for (k = k1 ; k < k2 ; k++)
333 {
334 pend = Offp [k+1] ;
335 x [0] = X [4*k ] ;
336 x [1] = X [4*k + 1] ;
337 x [2] = X [4*k + 2] ;
338 x [3] = X [4*k + 3] ;
339 for (p = Offp [k] ; p < pend ; p++)
340 {
341 i = Offi [p] ;
342 offik = Offx [p] ;
343 MULT_SUB (X [4*i], offik, x [0]) ;
344 MULT_SUB (X [4*i + 1], offik, x [1]) ;
345 MULT_SUB (X [4*i + 2], offik, x [2]) ;
346 MULT_SUB (X [4*i + 3], offik, x [3]) ;
347 }
348 }
349 break ;
350 }
351 }
352 }
353
354 /* ------------------------------------------------------------------ */
355 /* permute the result, Bz = Q*X */
356 /* ------------------------------------------------------------------ */
357
358 switch (nr)
359 {
360
361 case 1:
362
363 for (k = 0 ; k < n ; k++)
364 {
365 Bz [Q [k]] = X [k] ;
366 }
367 break ;
368
369 case 2:
370
371 for (k = 0 ; k < n ; k++)
372 {
373 i = Q [k] ;
374 Bz [i ] = X [2*k ] ;
375 Bz [i + d ] = X [2*k + 1] ;
376 }
377 break ;
378
379 case 3:
380
381 for (k = 0 ; k < n ; k++)
382 {
383 i = Q [k] ;
384 Bz [i ] = X [3*k ] ;
385 Bz [i + d ] = X [3*k + 1] ;
386 Bz [i + d*2] = X [3*k + 2] ;
387 }
388 break ;
389
390 case 4:
391
392 for (k = 0 ; k < n ; k++)
393 {
394 i = Q [k] ;
395 Bz [i ] = X [4*k ] ;
396 Bz [i + d ] = X [4*k + 1] ;
397 Bz [i + d*2] = X [4*k + 2] ;
398 Bz [i + d*3] = X [4*k + 3] ;
399 }
400 break ;
401 }
402
403 /* ------------------------------------------------------------------ */
404 /* go to the next chunk of B */
405 /* ------------------------------------------------------------------ */
406
407 Bz += d*4 ;
408 }
409 return (TRUE) ;
410}
411
412
413// Require Xout and B to have equal number of entries, pre-allocated
414template <typename Entry, typename Int>
415Int KLU_solve2
416(
417 /* inputs, not modified */
418 KLU_symbolic<Entry, Int> *Symbolic,
419 KLU_numeric<Entry, Int> *Numeric,
420 Int d, /* leading dimension of B */
421 Int nrhs, /* number of right-hand-sides */
422
423 /* right-hand-side on input, overwritten with solution to Ax=b on output */
424 /* TODO: Switched from double to Entry, check for all cases */
425 Entry B [ ], /* size n*1, in column-oriented form, with
426 * leading dimension d. */
427 Entry Xout [ ], /* size n*1, in column-oriented form, with
428 * leading dimension d. */
429 /* --------------- */
430 KLU_common<Entry, Int> *Common
431)
432{
433 Entry x [4], offik, s ;
434 double rs, *Rs ;
435 Entry *Offx, *X, *Bz, *Udiag ;
436 Int *Q, *R, *Pnum, *Offp, *Offi, *Lip, *Uip, *Llen, *Ulen ;
437 Unit **LUbx ;
438 Int k1, k2, nk, k, block, pend, n, p, nblocks, chunk, nr, i ;
439
440 /* ---------------------------------------------------------------------- */
441 /* check inputs */
442 /* ---------------------------------------------------------------------- */
443
444 if (Common == NULL)
445 {
446 return (FALSE) ;
447 }
448 if (Numeric == NULL || Symbolic == NULL || d < Symbolic->n || nrhs < 0 ||
449 B == NULL || Xout == NULL)
450 {
451 Common->status = KLU_INVALID ;
452 return (FALSE) ;
453 }
454 Common->status = KLU_OK ;
455
456 /* ---------------------------------------------------------------------- */
457 /* get the contents of the Symbolic object */
458 /* ---------------------------------------------------------------------- */
459
460 Bz = (Entry *) B ;
461 n = Symbolic->n ;
462 nblocks = Symbolic->nblocks ;
463 Q = Symbolic->Q ;
464 R = Symbolic->R ;
465
466 /* ---------------------------------------------------------------------- */
467 /* get the contents of the Numeric object */
468 /* ---------------------------------------------------------------------- */
469
470 ASSERT (nblocks == Numeric->nblocks) ;
471 Pnum = Numeric->Pnum ;
472 Offp = Numeric->Offp ;
473 Offi = Numeric->Offi ;
474 Offx = (Entry *) Numeric->Offx ;
475
476 Lip = Numeric->Lip ;
477 Llen = Numeric->Llen ;
478 Uip = Numeric->Uip ;
479 Ulen = Numeric->Ulen ;
480 LUbx = (Unit **) Numeric->LUbx ;
481 Udiag = (Entry *) Numeric->Udiag ;
482
483 Rs = Numeric->Rs ;
484 X = (Entry *) Numeric->Xwork ;
485
486 ASSERT (KLU_valid (n, Offp, Offi, Offx)) ;
487
488 /* ---------------------------------------------------------------------- */
489 /* solve in chunks of 4 columns at a time */
490 /* ---------------------------------------------------------------------- */
491
492 for (chunk = 0 ; chunk < nrhs ; chunk += 4)
493 {
494
495 /* ------------------------------------------------------------------ */
496 /* get the size of the current chunk */
497 /* ------------------------------------------------------------------ */
498
499 nr = MIN (nrhs - chunk, 4) ;
500
501 /* ------------------------------------------------------------------ */
502 /* scale and permute the right hand side, X = P*(R\B) */
503 /* ------------------------------------------------------------------ */
504
505 if (Rs == NULL)
506 {
507
508 /* no scaling */
509 switch (nr)
510 {
511
512 case 1:
513
514 for (k = 0 ; k < n ; k++)
515 {
516 X [k] = Bz [Pnum [k]] ;
517 }
518 break ;
519
520 case 2:
521
522 for (k = 0 ; k < n ; k++)
523 {
524 i = Pnum [k] ;
525 X [2*k ] = Bz [i ] ;
526 X [2*k + 1] = Bz [i + d ] ;
527 }
528 break ;
529
530 case 3:
531
532 for (k = 0 ; k < n ; k++)
533 {
534 i = Pnum [k] ;
535 X [3*k ] = Bz [i ] ;
536 X [3*k + 1] = Bz [i + d ] ;
537 X [3*k + 2] = Bz [i + d*2] ;
538 }
539 break ;
540
541 case 4:
542
543 for (k = 0 ; k < n ; k++)
544 {
545 i = Pnum [k] ;
546 X [4*k ] = Bz [i ] ;
547 X [4*k + 1] = Bz [i + d ] ;
548 X [4*k + 2] = Bz [i + d*2] ;
549 X [4*k + 3] = Bz [i + d*3] ;
550 }
551 break ;
552 }
553
554 }
555 else
556 {
557
558 switch (nr)
559 {
560
561 case 1:
562
563 for (k = 0 ; k < n ; k++)
564 {
565 SCALE_DIV_ASSIGN (X [k], Bz [Pnum [k]], Rs [k]) ;
566 }
567 break ;
568
569 case 2:
570
571 for (k = 0 ; k < n ; k++)
572 {
573 i = Pnum [k] ;
574 rs = Rs [k] ;
575 SCALE_DIV_ASSIGN (X [2*k], Bz [i], rs) ;
576 SCALE_DIV_ASSIGN (X [2*k + 1], Bz [i + d], rs) ;
577 }
578 break ;
579
580 case 3:
581
582 for (k = 0 ; k < n ; k++)
583 {
584 i = Pnum [k] ;
585 rs = Rs [k] ;
586 SCALE_DIV_ASSIGN (X [3*k], Bz [i], rs) ;
587 SCALE_DIV_ASSIGN (X [3*k + 1], Bz [i + d], rs) ;
588 SCALE_DIV_ASSIGN (X [3*k + 2], Bz [i + d*2], rs) ;
589 }
590 break ;
591
592 case 4:
593
594 for (k = 0 ; k < n ; k++)
595 {
596 i = Pnum [k] ;
597 rs = Rs [k] ;
598 SCALE_DIV_ASSIGN (X [4*k], Bz [i], rs) ;
599 SCALE_DIV_ASSIGN (X [4*k + 1], Bz [i + d], rs) ;
600 SCALE_DIV_ASSIGN (X [4*k + 2], Bz [i + d*2], rs) ;
601 SCALE_DIV_ASSIGN (X [4*k + 3], Bz [i + d*3], rs) ;
602 }
603 break ;
604 }
605 }
606
607 /* ------------------------------------------------------------------ */
608 /* solve X = (L*U + Off)\X */
609 /* ------------------------------------------------------------------ */
610
611 for (block = nblocks-1 ; block >= 0 ; block--)
612 {
613
614 /* -------------------------------------------------------------- */
615 /* the block of size nk is from rows/columns k1 to k2-1 */
616 /* -------------------------------------------------------------- */
617
618 k1 = R [block] ;
619 k2 = R [block+1] ;
620 nk = k2 - k1 ;
621 PRINTF (("solve %d, k1 %d k2-1 %d nk %d\n", block, k1,k2-1,nk)) ;
622
623 /* solve the block system */
624 if (nk == 1)
625 {
626 s = Udiag [k1] ;
627 switch (nr)
628 {
629
630 case 1:
631 DIV (X [k1], X [k1], s) ;
632 break ;
633
634 case 2:
635 DIV (X [2*k1], X [2*k1], s) ;
636 DIV (X [2*k1 + 1], X [2*k1 + 1], s) ;
637 break ;
638
639 case 3:
640 DIV (X [3*k1], X [3*k1], s) ;
641 DIV (X [3*k1 + 1], X [3*k1 + 1], s) ;
642 DIV (X [3*k1 + 2], X [3*k1 + 2], s) ;
643 break ;
644
645 case 4:
646 DIV (X [4*k1], X [4*k1], s) ;
647 DIV (X [4*k1 + 1], X [4*k1 + 1], s) ;
648 DIV (X [4*k1 + 2], X [4*k1 + 2], s) ;
649 DIV (X [4*k1 + 3], X [4*k1 + 3], s) ;
650 break ;
651
652 }
653 }
654 else
655 {
656 KLU_lsolve (nk, Lip + k1, Llen + k1, LUbx [block], nr,
657 X + nr*k1) ;
658 KLU_usolve (nk, Uip + k1, Ulen + k1, LUbx [block],
659 Udiag + k1, nr, X + nr*k1) ;
660 }
661
662 /* -------------------------------------------------------------- */
663 /* block back-substitution for the off-diagonal-block entries */
664 /* -------------------------------------------------------------- */
665
666 if (block > 0)
667 {
668 switch (nr)
669 {
670
671 case 1:
672
673 for (k = k1 ; k < k2 ; k++)
674 {
675 pend = Offp [k+1] ;
676 x [0] = X [k] ;
677 for (p = Offp [k] ; p < pend ; p++)
678 {
679 MULT_SUB (X [Offi [p]], Offx [p], x [0]) ;
680 }
681 }
682 break ;
683
684 case 2:
685
686 for (k = k1 ; k < k2 ; k++)
687 {
688 pend = Offp [k+1] ;
689 x [0] = X [2*k ] ;
690 x [1] = X [2*k + 1] ;
691 for (p = Offp [k] ; p < pend ; p++)
692 {
693 i = Offi [p] ;
694 offik = Offx [p] ;
695 MULT_SUB (X [2*i], offik, x [0]) ;
696 MULT_SUB (X [2*i + 1], offik, x [1]) ;
697 }
698 }
699 break ;
700
701 case 3:
702
703 for (k = k1 ; k < k2 ; k++)
704 {
705 pend = Offp [k+1] ;
706 x [0] = X [3*k ] ;
707 x [1] = X [3*k + 1] ;
708 x [2] = X [3*k + 2] ;
709 for (p = Offp [k] ; p < pend ; p++)
710 {
711 i = Offi [p] ;
712 offik = Offx [p] ;
713 MULT_SUB (X [3*i], offik, x [0]) ;
714 MULT_SUB (X [3*i + 1], offik, x [1]) ;
715 MULT_SUB (X [3*i + 2], offik, x [2]) ;
716 }
717 }
718 break ;
719
720 case 4:
721
722 for (k = k1 ; k < k2 ; k++)
723 {
724 pend = Offp [k+1] ;
725 x [0] = X [4*k ] ;
726 x [1] = X [4*k + 1] ;
727 x [2] = X [4*k + 2] ;
728 x [3] = X [4*k + 3] ;
729 for (p = Offp [k] ; p < pend ; p++)
730 {
731 i = Offi [p] ;
732 offik = Offx [p] ;
733 MULT_SUB (X [4*i], offik, x [0]) ;
734 MULT_SUB (X [4*i + 1], offik, x [1]) ;
735 MULT_SUB (X [4*i + 2], offik, x [2]) ;
736 MULT_SUB (X [4*i + 3], offik, x [3]) ;
737 }
738 }
739 break ;
740 }
741 }
742 }
743
744 /* ------------------------------------------------------------------ */
745 /* permute the result, Bz = Q*X */
746 /* store result in Xout */
747 /* ------------------------------------------------------------------ */
748
749 switch (nr)
750 {
751
752 case 1:
753
754 for (k = 0 ; k < n ; k++)
755 {
756 Xout [Q [k]] = X [k] ;
757 }
758 break ;
759
760 case 2:
761
762 for (k = 0 ; k < n ; k++)
763 {
764 i = Q [k] ;
765 Xout [i ] = X [2*k ] ;
766 Xout [i + d ] = X [2*k + 1] ;
767 }
768 break ;
769
770 case 3:
771
772 for (k = 0 ; k < n ; k++)
773 {
774 i = Q [k] ;
775 Xout [i ] = X [3*k ] ;
776 Xout [i + d ] = X [3*k + 1] ;
777 Xout [i + d*2] = X [3*k + 2] ;
778 }
779 break ;
780
781 case 4:
782
783 for (k = 0 ; k < n ; k++)
784 {
785 i = Q [k] ;
786 Xout [i ] = X [4*k ] ;
787 Xout [i + d ] = X [4*k + 1] ;
788 Xout [i + d*2] = X [4*k + 2] ;
789 Xout [i + d*3] = X [4*k + 3] ;
790 }
791 break ;
792 }
793
794 /* ------------------------------------------------------------------ */
795 /* go to the next chunk of B */
796 /* ------------------------------------------------------------------ */
797
798 Xout += d*4 ;
799 }
800 return (TRUE) ;
801}
802
803
804#endif