Amesos2 - Direct Sparse Solver Interfaces Version of the Day
klu2.hpp
1/* ========================================================================== */
2/* === klu ================================================================== */
3/* ========================================================================== */
4// @HEADER
5// ***********************************************************************
6//
7// KLU2: A Direct Linear Solver package
8// Copyright 2011 Sandia Corporation
9//
10// Under terms of Contract DE-AC04-94AL85000, with Sandia Corporation, the
11// U.S. Government retains certain rights in this software.
12//
13// This library is free software; you can redistribute it and/or modify
14// it under the terms of the GNU Lesser General Public License as
15// published by the Free Software Foundation; either version 2.1 of the
16// License, or (at your option) any later version.
17//
18// This library is distributed in the hope that it will be useful, but
19// WITHOUT ANY WARRANTY; without even the implied warranty of
20// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21// Lesser General Public License for more details.
22//
23// You should have received a copy of the GNU Lesser General Public
24// License along with this library; if not, write to the Free Software
25// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
26// USA
27// Questions? Contact Mike A. Heroux (maherou@sandia.gov)
28//
29// KLU2 is derived work from KLU, licensed under LGPL, and copyrighted by
30// University of Florida. The Authors of KLU are Timothy A. Davis and
31// Eka Palamadai. See Doc/KLU_README.txt for the licensing and copyright
32// information for KLU.
33//
34// ***********************************************************************
35// @HEADER
36
37/* KLU: factorizes P*A into L*U, using the Gilbert-Peierls algorithm [1], with
38 * optional symmetric pruning by Eisenstat and Liu [2]. The code is by Tim
39 * Davis. This algorithm is what appears as the default sparse LU routine in
40 * MATLAB version 6.0, and still appears in MATLAB 6.5 as [L,U,P] = lu (A).
41 * Note that no column ordering is provided (see COLAMD or AMD for suitable
42 * orderings). SuperLU is based on this algorithm, except that it adds the
43 * use of dense matrix operations on "supernodes" (adjacent columns with
44 * identical). This code doesn't use supernodes, thus its name ("Kent" LU,
45 * as in "Clark Kent", in contrast with Super-LU...). This algorithm is slower
46 * than SuperLU and UMFPACK for large matrices with lots of nonzeros in their
47 * factors (such as for most finite-element problems). However, for matrices
48 * with very sparse LU factors, this algorithm is typically faster than both
49 * SuperLU and UMFPACK, since in this case there is little chance to exploit
50 * dense matrix kernels (the BLAS).
51 *
52 * Only one block of A is factorized, in the BTF form. The input n is the
53 * size of the block; k1 is the first row and column in the block.
54 *
55 * NOTE: no error checking is done on the inputs. This version is not meant to
56 * be called directly by the user. Use klu_factor instead.
57 *
58 * No fill-reducing ordering is provided. The ordering quality of
59 * klu_kernel_factor is the responsibility of the caller. The input A must
60 * pre-permuted to reduce fill-in, or fill-reducing input permutation Q must
61 * be provided.
62 *
63 * The input matrix A must be in compressed-column form, with either sorted
64 * or unsorted row indices. Row indices for column j of A is in
65 * Ai [Ap [j] ... Ap [j+1]-1] and the same range of indices in Ax holds the
66 * numerical values. No duplicate entries are allowed.
67 *
68 * Copyright 2004-2009, Tim Davis. All rights reserved. See the README
69 * file for details on permitted use. Note that no code from The MathWorks,
70 * Inc, or from SuperLU, or from any other source appears here. The code is
71 * written from scratch, from the algorithmic description in Gilbert & Peierls'
72 * and Eisenstat & Liu's journal papers [1,2].
73 *
74 * If an input permutation Q is provided, the factorization L*U = A (P,Q)
75 * is computed, where P is determined by partial pivoting, and Q is the input
76 * ordering. If the pivot tolerance is less than 1, the "diagonal" entry that
77 * KLU attempts to choose is the diagonal of A (Q,Q). In other words, the
78 * input permutation is applied symmetrically to the input matrix. The output
79 * permutation P includes both the partial pivoting ordering and the input
80 * permutation. If Q is NULL, then it is assumed to be the identity
81 * permutation. Q is not modified.
82 *
83 * [1] Gilbert, J. R. and Peierls, T., "Sparse Partial Pivoting in Time
84 * Proportional to Arithmetic Operations," SIAM J. Sci. Stat. Comp.,
85 * vol 9, pp. 862-874, 1988.
86 * [2] Eisenstat, S. C. and Liu, J. W. H., "Exploiting Structural Symmetry in
87 * Unsymmetric Sparse Symbolic Factorization," SIAM J. Matrix Analysis &
88 * Applic., vol 13, pp. 202-211, 1992.
89 */
90
91/* ========================================================================== */
92
93#ifndef KLU2_HPP
94#define KLU2_HPP
95
96#include "klu2_internal.h"
97#include "klu2_kernel.hpp"
98#include "klu2_memory.hpp"
99
100template <typename Entry, typename Int>
101size_t KLU_kernel_factor /* 0 if failure, size of LU if OK */
102(
103 /* inputs, not modified */
104 Int n, /* A is n-by-n. n must be > 0. */
105 Int Ap [ ], /* size n+1, column pointers for A */
106 Int Ai [ ], /* size nz = Ap [n], row indices for A */
107 Entry Ax [ ], /* size nz, values of A */
108 Int Q [ ], /* size n, optional column permutation */
109 double Lsize, /* estimate of number of nonzeros in L */
110
111 /* outputs, not defined on input */
112 Unit **p_LU, /* row indices and values of L and U */
113 Entry Udiag [ ], /* size n, diagonal of U */
114 Int Llen [ ], /* size n, column length of L */
115 Int Ulen [ ], /* size n, column length of U */
116 Int Lip [ ], /* size n, column pointers for L */
117 Int Uip [ ], /* size n, column pointers for U */
118 Int P [ ], /* row permutation, size n */
119 Int *lnz, /* size of L */
120 Int *unz, /* size of U */
121
122 /* workspace, undefined on input */
123 Entry *X, /* size n double's, zero on output */
124 Int *Work, /* size 5n Int's */
125
126 /* inputs, not modified on output */
127 Int k1, /* the block of A is from k1 to k2-1 */
128 Int PSinv [ ], /* inverse of P from symbolic factorization */
129 double Rs [ ], /* scale factors for A */
130
131 /* inputs, modified on output */
132 Int Offp [ ], /* off-diagonal matrix (modified by this routine) */
133 Int Offi [ ],
134 Entry Offx [ ],
135 /* --------------- */
136 KLU_common<Entry, Int> *Common
137)
138{
139 double maxlnz, dunits ;
140 Unit *LU ;
141 Int *Pinv, *Lpend, *Stack, *Flag, *Ap_pos, *W ;
142 Int lsize, usize, anz, ok ;
143 size_t lusize ;
144 ASSERT (Common != NULL) ;
145
146 /* ---------------------------------------------------------------------- */
147 /* get control parameters, or use defaults */
148 /* ---------------------------------------------------------------------- */
149
150 n = MAX (1, n) ;
151 anz = Ap [n+k1] - Ap [k1] ;
152
153 if (Lsize <= 0)
154 {
155 Lsize = -Lsize ;
156 Lsize = MAX (Lsize, 1.0) ;
157 lsize = (Int) Lsize * anz + n ;
158 }
159 else
160 {
161 lsize = (Int) Lsize ;
162 }
163
164 usize = lsize ;
165
166 lsize = MAX (n+1, lsize) ;
167 usize = MAX (n+1, usize) ;
168
169 maxlnz = (((double) n) * ((double) n) + ((double) n)) / 2. ;
170 maxlnz = MIN (maxlnz, ((double) INT_MAX)) ;
171 lsize = (Int) MIN (maxlnz, lsize) ;
172 usize = (Int) MIN (maxlnz, usize) ;
173
174 PRINTF (("Welcome to klu: n %d anz %d k1 %d lsize %d usize %d maxlnz %g\n",
175 n, anz, k1, lsize, usize, maxlnz)) ;
176
177 /* ---------------------------------------------------------------------- */
178 /* allocate workspace and outputs */
179 /* ---------------------------------------------------------------------- */
180
181 /* return arguments are not yet assigned */
182 *p_LU = (Unit *) NULL ;
183
184 /* these computations are safe from size_t overflow */
185 W = Work ;
186 Pinv = (Int *) W ; W += n ;
187 Stack = (Int *) W ; W += n ;
188 Flag = (Int *) W ; W += n ;
189 Lpend = (Int *) W ; W += n ;
190 Ap_pos = (Int *) W ; W += n ;
191
192 dunits = DUNITS (Int, lsize) + DUNITS (Entry, lsize) +
193 DUNITS (Int, usize) + DUNITS (Entry, usize) ;
194 lusize = (size_t) dunits ;
195 ok = !INT_OVERFLOW (dunits) ;
196 LU = (Unit *) (ok ? KLU_malloc (lusize, sizeof (Unit), Common) : NULL) ;
197 if (LU == NULL)
198 {
199 /* out of memory, or problem too large */
200 Common->status = KLU_OUT_OF_MEMORY ;
201 lusize = 0 ;
202 return (lusize) ;
203 }
204
205 /* ---------------------------------------------------------------------- */
206 /* factorize */
207 /* ---------------------------------------------------------------------- */
208
209 /* with pruning, and non-recursive depth-first-search */
210 lusize = KLU_kernel<Entry> (n, Ap, Ai, Ax, Q, lusize,
211 Pinv, P, &LU, Udiag, Llen, Ulen, Lip, Uip, lnz, unz,
212 X, Stack, Flag, Ap_pos, Lpend,
213 k1, PSinv, Rs, Offp, Offi, Offx, Common) ;
214
215 /* ---------------------------------------------------------------------- */
216 /* return LU factors, or return nothing if an error occurred */
217 /* ---------------------------------------------------------------------- */
218
219 if (Common->status < KLU_OK)
220 {
221 LU = (Unit *) KLU_free (LU, lusize, sizeof (Unit), Common) ;
222 lusize = 0 ;
223 }
224 *p_LU = LU ;
225 PRINTF ((" in klu noffdiag %d\n", Common->noffdiag)) ;
226 return (lusize) ;
227}
228
229/* ========================================================================== */
230/* === KLU_lsolve =========================================================== */
231/* ========================================================================== */
232
233/* Solve Lx=b. Assumes L is unit lower triangular and where the unit diagonal
234 * entry is NOT stored. Overwrites B with the solution X. B is n-by-nrhs
235 * and is stored in ROW form with row dimension nrhs. nrhs must be in the
236 * range 1 to 4. */
237
238template <typename Entry, typename Int>
239void KLU_lsolve
240(
241 /* inputs, not modified: */
242 Int n,
243 Int Lip [ ],
244 Int Llen [ ],
245 Unit LU [ ],
246 Int nrhs,
247 /* right-hand-side on input, solution to Lx=b on output */
248 Entry X [ ]
249)
250{
251 Entry x [4], lik ;
252 Int *Li ;
253 Entry *Lx ;
254 Int k, p, len, i ;
255
256 switch (nrhs)
257 {
258
259 case 1:
260 for (k = 0 ; k < n ; k++)
261 {
262 x [0] = X [k] ;
263 GET_POINTER (LU, Lip, Llen, Li, Lx, k, len) ;
264 /* unit diagonal of L is not stored*/
265 for (p = 0 ; p < len ; p++)
266 {
267 /* X [Li [p]] -= Lx [p] * x [0] ; */
268 MULT_SUB (X [Li [p]], Lx [p], x [0]) ;
269 }
270 }
271 break ;
272
273 case 2:
274
275 for (k = 0 ; k < n ; k++)
276 {
277 x [0] = X [2*k ] ;
278 x [1] = X [2*k + 1] ;
279 GET_POINTER (LU, Lip, Llen, Li, Lx, k, len) ;
280 for (p = 0 ; p < len ; p++)
281 {
282 i = Li [p] ;
283 lik = Lx [p] ;
284 MULT_SUB (X [2*i], lik, x [0]) ;
285 MULT_SUB (X [2*i + 1], lik, x [1]) ;
286 }
287 }
288 break ;
289
290 case 3:
291
292 for (k = 0 ; k < n ; k++)
293 {
294 x [0] = X [3*k ] ;
295 x [1] = X [3*k + 1] ;
296 x [2] = X [3*k + 2] ;
297 GET_POINTER (LU, Lip, Llen, Li, Lx, k, len) ;
298 for (p = 0 ; p < len ; p++)
299 {
300 i = Li [p] ;
301 lik = Lx [p] ;
302 MULT_SUB (X [3*i], lik, x [0]) ;
303 MULT_SUB (X [3*i + 1], lik, x [1]) ;
304 MULT_SUB (X [3*i + 2], lik, x [2]) ;
305 }
306 }
307 break ;
308
309 case 4:
310
311 for (k = 0 ; k < n ; k++)
312 {
313 x [0] = X [4*k ] ;
314 x [1] = X [4*k + 1] ;
315 x [2] = X [4*k + 2] ;
316 x [3] = X [4*k + 3] ;
317 GET_POINTER (LU, Lip, Llen, Li, Lx, k, len) ;
318 for (p = 0 ; p < len ; p++)
319 {
320 i = Li [p] ;
321 lik = Lx [p] ;
322 MULT_SUB (X [4*i], lik, x [0]) ;
323 MULT_SUB (X [4*i + 1], lik, x [1]) ;
324 MULT_SUB (X [4*i + 2], lik, x [2]) ;
325 MULT_SUB (X [4*i + 3], lik, x [3]) ;
326 }
327 }
328 break ;
329
330 }
331}
332
333/* ========================================================================== */
334/* === KLU_usolve =========================================================== */
335/* ========================================================================== */
336
337/* Solve Ux=b. Assumes U is non-unit upper triangular and where the diagonal
338 * entry is NOT stored. Overwrites B with the solution X. B is n-by-nrhs
339 * and is stored in ROW form with row dimension nrhs. nrhs must be in the
340 * range 1 to 4. */
341
342template <typename Entry, typename Int>
343void KLU_usolve
344(
345 /* inputs, not modified: */
346 Int n,
347 Int Uip [ ],
348 Int Ulen [ ],
349 Unit LU [ ],
350 Entry Udiag [ ],
351 Int nrhs,
352 /* right-hand-side on input, solution to Ux=b on output */
353 Entry X [ ]
354)
355{
356 Entry x [4], uik, ukk ;
357 Int *Ui ;
358 Entry *Ux ;
359 Int k, p, len, i ;
360
361 switch (nrhs)
362 {
363
364 case 1:
365
366 for (k = n-1 ; k >= 0 ; k--)
367 {
368 GET_POINTER (LU, Uip, Ulen, Ui, Ux, k, len) ;
369 /* x [0] = X [k] / Udiag [k] ; */
370 DIV (x [0], X [k], Udiag [k]) ;
371 X [k] = x [0] ;
372 for (p = 0 ; p < len ; p++)
373 {
374 /* X [Ui [p]] -= Ux [p] * x [0] ; */
375 MULT_SUB (X [Ui [p]], Ux [p], x [0]) ;
376
377 }
378 }
379
380 break ;
381
382 case 2:
383
384 for (k = n-1 ; k >= 0 ; k--)
385 {
386 GET_POINTER (LU, Uip, Ulen, Ui, Ux, k, len) ;
387 ukk = Udiag [k] ;
388 /* x [0] = X [2*k ] / ukk ;
389 x [1] = X [2*k + 1] / ukk ; */
390 DIV (x [0], X [2*k], ukk) ;
391 DIV (x [1], X [2*k + 1], ukk) ;
392
393 X [2*k ] = x [0] ;
394 X [2*k + 1] = x [1] ;
395 for (p = 0 ; p < len ; p++)
396 {
397 i = Ui [p] ;
398 uik = Ux [p] ;
399 /* X [2*i ] -= uik * x [0] ;
400 X [2*i + 1] -= uik * x [1] ; */
401 MULT_SUB (X [2*i], uik, x [0]) ;
402 MULT_SUB (X [2*i + 1], uik, x [1]) ;
403 }
404 }
405
406 break ;
407
408 case 3:
409
410 for (k = n-1 ; k >= 0 ; k--)
411 {
412 GET_POINTER (LU, Uip, Ulen, Ui, Ux, k, len) ;
413 ukk = Udiag [k] ;
414
415 DIV (x [0], X [3*k], ukk) ;
416 DIV (x [1], X [3*k + 1], ukk) ;
417 DIV (x [2], X [3*k + 2], ukk) ;
418
419 X [3*k ] = x [0] ;
420 X [3*k + 1] = x [1] ;
421 X [3*k + 2] = x [2] ;
422 for (p = 0 ; p < len ; p++)
423 {
424 i = Ui [p] ;
425 uik = Ux [p] ;
426 MULT_SUB (X [3*i], uik, x [0]) ;
427 MULT_SUB (X [3*i + 1], uik, x [1]) ;
428 MULT_SUB (X [3*i + 2], uik, x [2]) ;
429 }
430 }
431
432 break ;
433
434 case 4:
435
436 for (k = n-1 ; k >= 0 ; k--)
437 {
438 GET_POINTER (LU, Uip, Ulen, Ui, Ux, k, len) ;
439 ukk = Udiag [k] ;
440
441 DIV (x [0], X [4*k], ukk) ;
442 DIV (x [1], X [4*k + 1], ukk) ;
443 DIV (x [2], X [4*k + 2], ukk) ;
444 DIV (x [3], X [4*k + 3], ukk) ;
445
446 X [4*k ] = x [0] ;
447 X [4*k + 1] = x [1] ;
448 X [4*k + 2] = x [2] ;
449 X [4*k + 3] = x [3] ;
450 for (p = 0 ; p < len ; p++)
451 {
452 i = Ui [p] ;
453 uik = Ux [p] ;
454
455 MULT_SUB (X [4*i], uik, x [0]) ;
456 MULT_SUB (X [4*i + 1], uik, x [1]) ;
457 MULT_SUB (X [4*i + 2], uik, x [2]) ;
458 MULT_SUB (X [4*i + 3], uik, x [3]) ;
459 }
460 }
461
462 break ;
463
464 }
465}
466
467/* ========================================================================== */
468/* === KLU_ltsolve ========================================================== */
469/* ========================================================================== */
470
471/* Solve L'x=b. Assumes L is unit lower triangular and where the unit diagonal
472 * entry is NOT stored. Overwrites B with the solution X. B is n-by-nrhs
473 * and is stored in ROW form with row dimension nrhs. nrhs must in the
474 * range 1 to 4. */
475
476template <typename Entry, typename Int>
477void KLU_ltsolve
478(
479 /* inputs, not modified: */
480 Int n,
481 Int Lip [ ],
482 Int Llen [ ],
483 Unit LU [ ],
484 Int nrhs,
485#ifdef COMPLEX
486 Int conj_solve,
487#endif
488 /* right-hand-side on input, solution to L'x=b on output */
489 Entry X [ ]
490)
491{
492 Entry x [4], lik ;
493 Int *Li ;
494 Entry *Lx ;
495 Int k, p, len, i ;
496
497 switch (nrhs)
498 {
499
500 case 1:
501
502 for (k = n-1 ; k >= 0 ; k--)
503 {
504 GET_POINTER (LU, Lip, Llen, Li, Lx, k, len) ;
505 x [0] = X [k] ;
506 for (p = 0 ; p < len ; p++)
507 {
508#ifdef COMPLEX
509 if (conj_solve)
510 {
511 /* x [0] -= CONJ (Lx [p]) * X [Li [p]] ; */
512 MULT_SUB_CONJ (x [0], X [Li [p]], Lx [p]) ;
513 }
514 else
515#endif
516 {
517 /*x [0] -= Lx [p] * X [Li [p]] ;*/
518 MULT_SUB (x [0], Lx [p], X [Li [p]]) ;
519 }
520 }
521 X [k] = x [0] ;
522 }
523 break ;
524
525 case 2:
526
527 for (k = n-1 ; k >= 0 ; k--)
528 {
529 x [0] = X [2*k ] ;
530 x [1] = X [2*k + 1] ;
531 GET_POINTER (LU, Lip, Llen, Li, Lx, k, len) ;
532 for (p = 0 ; p < len ; p++)
533 {
534 i = Li [p] ;
535#ifdef COMPLEX
536 if (conj_solve)
537 {
538 CONJ (lik, Lx [p]) ;
539 }
540 else
541#endif
542 {
543 lik = Lx [p] ;
544 }
545 MULT_SUB (x [0], lik, X [2*i]) ;
546 MULT_SUB (x [1], lik, X [2*i + 1]) ;
547 }
548 X [2*k ] = x [0] ;
549 X [2*k + 1] = x [1] ;
550 }
551 break ;
552
553 case 3:
554
555 for (k = n-1 ; k >= 0 ; k--)
556 {
557 x [0] = X [3*k ] ;
558 x [1] = X [3*k + 1] ;
559 x [2] = X [3*k + 2] ;
560 GET_POINTER (LU, Lip, Llen, Li, Lx, k, len) ;
561 for (p = 0 ; p < len ; p++)
562 {
563 i = Li [p] ;
564#ifdef COMPLEX
565 if (conj_solve)
566 {
567 CONJ (lik, Lx [p]) ;
568 }
569 else
570#endif
571 {
572 lik = Lx [p] ;
573 }
574 MULT_SUB (x [0], lik, X [3*i]) ;
575 MULT_SUB (x [1], lik, X [3*i + 1]) ;
576 MULT_SUB (x [2], lik, X [3*i + 2]) ;
577 }
578 X [3*k ] = x [0] ;
579 X [3*k + 1] = x [1] ;
580 X [3*k + 2] = x [2] ;
581 }
582 break ;
583
584 case 4:
585
586 for (k = n-1 ; k >= 0 ; k--)
587 {
588 x [0] = X [4*k ] ;
589 x [1] = X [4*k + 1] ;
590 x [2] = X [4*k + 2] ;
591 x [3] = X [4*k + 3] ;
592 GET_POINTER (LU, Lip, Llen, Li, Lx, k, len) ;
593 for (p = 0 ; p < len ; p++)
594 {
595 i = Li [p] ;
596#ifdef COMPLEX
597 if (conj_solve)
598 {
599 CONJ (lik, Lx [p]) ;
600 }
601 else
602#endif
603 {
604 lik = Lx [p] ;
605 }
606 MULT_SUB (x [0], lik, X [4*i]) ;
607 MULT_SUB (x [1], lik, X [4*i + 1]) ;
608 MULT_SUB (x [2], lik, X [4*i + 2]) ;
609 MULT_SUB (x [3], lik, X [4*i + 3]) ;
610 }
611 X [4*k ] = x [0] ;
612 X [4*k + 1] = x [1] ;
613 X [4*k + 2] = x [2] ;
614 X [4*k + 3] = x [3] ;
615 }
616 break ;
617 }
618}
619
620
621/* ========================================================================== */
622/* === KLU_utsolve ========================================================== */
623/* ========================================================================== */
624
625/* Solve U'x=b. Assumes U is non-unit upper triangular and where the diagonal
626 * entry is stored (and appears last in each column of U). Overwrites B
627 * with the solution X. B is n-by-nrhs and is stored in ROW form with row
628 * dimension nrhs. nrhs must be in the range 1 to 4. */
629
630template <typename Entry, typename Int>
631void KLU_utsolve
632(
633 /* inputs, not modified: */
634 Int n,
635 Int Uip [ ],
636 Int Ulen [ ],
637 Unit LU [ ],
638 Entry Udiag [ ],
639 Int nrhs,
640#ifdef COMPLEX
641 Int conj_solve,
642#endif
643 /* right-hand-side on input, solution to Ux=b on output */
644 Entry X [ ]
645)
646{
647 Entry x [4], uik, ukk ;
648 Int k, p, len, i ;
649 Int *Ui ;
650 Entry *Ux ;
651
652 switch (nrhs)
653 {
654
655 case 1:
656
657 for (k = 0 ; k < n ; k++)
658 {
659 GET_POINTER (LU, Uip, Ulen, Ui, Ux, k, len) ;
660 x [0] = X [k] ;
661 for (p = 0 ; p < len ; p++)
662 {
663#ifdef COMPLEX
664 if (conj_solve)
665 {
666 /* x [0] -= CONJ (Ux [p]) * X [Ui [p]] ; */
667 MULT_SUB_CONJ (x [0], X [Ui [p]], Ux [p]) ;
668 }
669 else
670#endif
671 {
672 /* x [0] -= Ux [p] * X [Ui [p]] ; */
673 MULT_SUB (x [0], Ux [p], X [Ui [p]]) ;
674 }
675 }
676#ifdef COMPLEX
677 if (conj_solve)
678 {
679 CONJ (ukk, Udiag [k]) ;
680 }
681 else
682#endif
683 {
684 ukk = Udiag [k] ;
685 }
686 DIV (X [k], x [0], ukk) ;
687 }
688 break ;
689
690 case 2:
691
692 for (k = 0 ; k < n ; k++)
693 {
694 GET_POINTER (LU, Uip, Ulen, Ui, Ux, k, len) ;
695 x [0] = X [2*k ] ;
696 x [1] = X [2*k + 1] ;
697 for (p = 0 ; p < len ; p++)
698 {
699 i = Ui [p] ;
700#ifdef COMPLEX
701 if (conj_solve)
702 {
703 CONJ (uik, Ux [p]) ;
704 }
705 else
706#endif
707 {
708 uik = Ux [p] ;
709 }
710 MULT_SUB (x [0], uik, X [2*i]) ;
711 MULT_SUB (x [1], uik, X [2*i + 1]) ;
712 }
713#ifdef COMPLEX
714 if (conj_solve)
715 {
716 CONJ (ukk, Udiag [k]) ;
717 }
718 else
719#endif
720 {
721 ukk = Udiag [k] ;
722 }
723 DIV (X [2*k], x [0], ukk) ;
724 DIV (X [2*k + 1], x [1], ukk) ;
725 }
726 break ;
727
728 case 3:
729
730 for (k = 0 ; k < n ; k++)
731 {
732 GET_POINTER (LU, Uip, Ulen, Ui, Ux, k, len) ;
733 x [0] = X [3*k ] ;
734 x [1] = X [3*k + 1] ;
735 x [2] = X [3*k + 2] ;
736 for (p = 0 ; p < len ; p++)
737 {
738 i = Ui [p] ;
739#ifdef COMPLEX
740 if (conj_solve)
741 {
742 CONJ (uik, Ux [p]) ;
743 }
744 else
745#endif
746 {
747 uik = Ux [p] ;
748 }
749 MULT_SUB (x [0], uik, X [3*i]) ;
750 MULT_SUB (x [1], uik, X [3*i + 1]) ;
751 MULT_SUB (x [2], uik, X [3*i + 2]) ;
752 }
753#ifdef COMPLEX
754 if (conj_solve)
755 {
756 CONJ (ukk, Udiag [k]) ;
757 }
758 else
759#endif
760 {
761 ukk = Udiag [k] ;
762 }
763 DIV (X [3*k], x [0], ukk) ;
764 DIV (X [3*k + 1], x [1], ukk) ;
765 DIV (X [3*k + 2], x [2], ukk) ;
766 }
767 break ;
768
769 case 4:
770
771 for (k = 0 ; k < n ; k++)
772 {
773 GET_POINTER (LU, Uip, Ulen, Ui, Ux, k, len) ;
774 x [0] = X [4*k ] ;
775 x [1] = X [4*k + 1] ;
776 x [2] = X [4*k + 2] ;
777 x [3] = X [4*k + 3] ;
778 for (p = 0 ; p < len ; p++)
779 {
780 i = Ui [p] ;
781#ifdef COMPLEX
782 if (conj_solve)
783 {
784 CONJ (uik, Ux [p]) ;
785 }
786 else
787#endif
788 {
789 uik = Ux [p] ;
790 }
791 MULT_SUB (x [0], uik, X [4*i]) ;
792 MULT_SUB (x [1], uik, X [4*i + 1]) ;
793 MULT_SUB (x [2], uik, X [4*i + 2]) ;
794 MULT_SUB (x [3], uik, X [4*i + 3]) ;
795 }
796#ifdef COMPLEX
797 if (conj_solve)
798 {
799 CONJ (ukk, Udiag [k]) ;
800 }
801 else
802#endif
803 {
804 ukk = Udiag [k] ;
805 }
806 DIV (X [4*k], x [0], ukk) ;
807 DIV (X [4*k + 1], x [1], ukk) ;
808 DIV (X [4*k + 2], x [2], ukk) ;
809 DIV (X [4*k + 3], x [3], ukk) ;
810 }
811 break ;
812 }
813}
814
815#endif /* KLU2_HPP */