ergo
|
00001 /* Ergo, version 3.2, a program for linear scaling electronic structure 00002 * calculations. 00003 * Copyright (C) 2012 Elias Rudberg, Emanuel H. Rubensson, and Pawel Salek. 00004 * 00005 * This program is free software: you can redistribute it and/or modify 00006 * it under the terms of the GNU General Public License as published by 00007 * the Free Software Foundation, either version 3 of the License, or 00008 * (at your option) any later version. 00009 * 00010 * This program is distributed in the hope that it will be useful, 00011 * but WITHOUT ANY WARRANTY; without even the implied warranty of 00012 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00013 * GNU General Public License for more details. 00014 * 00015 * You should have received a copy of the GNU General Public License 00016 * along with this program. If not, see <http://www.gnu.org/licenses/>. 00017 * 00018 * Primary academic reference: 00019 * KohnâSham Density Functional Theory Electronic Structure Calculations 00020 * with Linearly Scaling Computational Time and Memory Usage, 00021 * Elias Rudberg, Emanuel H. Rubensson, and Pawel Salek, 00022 * J. Chem. Theory Comput. 7, 340 (2011), 00023 * <http://dx.doi.org/10.1021/ct100611z> 00024 * 00025 * For further information about Ergo, see <http://www.ergoscf.org>. 00026 */ 00027 00028 /* This file belongs to the template_lapack part of the Ergo source 00029 * code. The source files in the template_lapack directory are modified 00030 * versions of files originally distributed as CLAPACK, see the 00031 * Copyright/license notice in the file template_lapack/COPYING. 00032 */ 00033 00034 00035 #ifndef TEMPLATE_LAPACK_LARRE_HEADER 00036 #define TEMPLATE_LAPACK_LARRE_HEADER 00037 00038 00039 template<class Treal> 00040 int template_lapack_larre(const char *range, const integer *n, Treal *vl, 00041 Treal *vu, integer *il, integer *iu, Treal *d__, Treal 00042 *e, Treal *e2, Treal *rtol1, Treal *rtol2, Treal * 00043 spltol, integer *nsplit, integer *isplit, integer *m, Treal *w, 00044 Treal *werr, Treal *wgap, integer *iblock, integer *indexw, 00045 Treal *gers, Treal *pivmin, Treal *work, integer * 00046 iwork, integer *info) 00047 { 00048 /* System generated locals */ 00049 integer i__1, i__2; 00050 Treal d__1, d__2, d__3; 00051 00052 00053 /* Local variables */ 00054 integer i__, j; 00055 Treal s1, s2; 00056 integer mb = 0; // EMANUEL COMMENT: initialize to get rid of compiler warning 00057 Treal gl; 00058 integer in, mm; 00059 Treal gu; 00060 integer cnt; 00061 Treal eps, tau, tmp, rtl; 00062 integer cnt1, cnt2; 00063 Treal tmp1, eabs; 00064 integer iend, jblk; 00065 Treal eold; 00066 integer indl; 00067 Treal dmax__, emax; 00068 integer wend = 0; // EMANUEL COMMENT: initialize to get rid of compiler warning 00069 integer idum, indu; 00070 Treal rtol; 00071 integer iseed[4]; 00072 Treal avgap, sigma; 00073 integer iinfo; 00074 logical norep; 00075 integer ibegin; 00076 logical forceb; 00077 integer irange = 0; // EMANUEL COMMENT: initialize to get rid of compiler warning 00078 Treal sgndef; 00079 integer wbegin; 00080 Treal safmin, spdiam; 00081 logical usedqd; 00082 Treal clwdth, isleft; 00083 Treal isrght, bsrtol, dpivot; 00084 00085 00086 /* -- LAPACK auxiliary routine (version 3.2) -- */ 00087 /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ 00088 /* November 2006 */ 00089 00090 /* .. Scalar Arguments .. */ 00091 /* .. */ 00092 /* .. Array Arguments .. */ 00093 /* .. */ 00094 00095 /* Purpose */ 00096 /* ======= */ 00097 00098 /* To find the desired eigenvalues of a given real symmetric */ 00099 /* tridiagonal matrix T, DLARRE sets any "small" off-diagonal */ 00100 /* elements to zero, and for each unreduced block T_i, it finds */ 00101 /* (a) a suitable shift at one end of the block's spectrum, */ 00102 /* (b) the base representation, T_i - sigma_i I = L_i D_i L_i^T, and */ 00103 /* (c) eigenvalues of each L_i D_i L_i^T. */ 00104 /* The representations and eigenvalues found are then used by */ 00105 /* DSTEMR to compute the eigenvectors of T. */ 00106 /* The accuracy varies depending on whether bisection is used to */ 00107 /* find a few eigenvalues or the dqds algorithm (subroutine DLASQ2) to */ 00108 /* conpute all and then discard any unwanted one. */ 00109 /* As an added benefit, DLARRE also outputs the n */ 00110 /* Gerschgorin intervals for the matrices L_i D_i L_i^T. */ 00111 00112 /* Arguments */ 00113 /* ========= */ 00114 00115 /* RANGE (input) CHARACTER */ 00116 /* = 'A': ("All") all eigenvalues will be found. */ 00117 /* = 'V': ("Value") all eigenvalues in the half-open interval */ 00118 /* (VL, VU] will be found. */ 00119 /* = 'I': ("Index") the IL-th through IU-th eigenvalues (of the */ 00120 /* entire matrix) will be found. */ 00121 00122 /* N (input) INTEGER */ 00123 /* The order of the matrix. N > 0. */ 00124 00125 /* VL (input/output) DOUBLE PRECISION */ 00126 /* VU (input/output) DOUBLE PRECISION */ 00127 /* If RANGE='V', the lower and upper bounds for the eigenvalues. */ 00128 /* Eigenvalues less than or equal to VL, or greater than VU, */ 00129 /* will not be returned. VL < VU. */ 00130 /* If RANGE='I' or ='A', DLARRE computes bounds on the desired */ 00131 /* part of the spectrum. */ 00132 00133 /* IL (input) INTEGER */ 00134 /* IU (input) INTEGER */ 00135 /* If RANGE='I', the indices (in ascending order) of the */ 00136 /* smallest and largest eigenvalues to be returned. */ 00137 /* 1 <= IL <= IU <= N. */ 00138 00139 /* D (input/output) DOUBLE PRECISION array, dimension (N) */ 00140 /* On entry, the N diagonal elements of the tridiagonal */ 00141 /* matrix T. */ 00142 /* On exit, the N diagonal elements of the diagonal */ 00143 /* matrices D_i. */ 00144 00145 /* E (input/output) DOUBLE PRECISION array, dimension (N) */ 00146 /* On entry, the first (N-1) entries contain the subdiagonal */ 00147 /* elements of the tridiagonal matrix T; E(N) need not be set. */ 00148 /* On exit, E contains the subdiagonal elements of the unit */ 00149 /* bidiagonal matrices L_i. The entries E( ISPLIT( I ) ), */ 00150 /* 1 <= I <= NSPLIT, contain the base points sigma_i on output. */ 00151 00152 /* E2 (input/output) DOUBLE PRECISION array, dimension (N) */ 00153 /* On entry, the first (N-1) entries contain the SQUARES of the */ 00154 /* subdiagonal elements of the tridiagonal matrix T; */ 00155 /* E2(N) need not be set. */ 00156 /* On exit, the entries E2( ISPLIT( I ) ), */ 00157 /* 1 <= I <= NSPLIT, have been set to zero */ 00158 00159 /* RTOL1 (input) DOUBLE PRECISION */ 00160 /* RTOL2 (input) DOUBLE PRECISION */ 00161 /* Parameters for bisection. */ 00162 /* An interval [LEFT,RIGHT] has converged if */ 00163 /* RIGHT-LEFT.LT.MAX( RTOL1*GAP, RTOL2*MAX(|LEFT|,|RIGHT|) ) */ 00164 00165 /* SPLTOL (input) DOUBLE PRECISION */ 00166 /* The threshold for splitting. */ 00167 00168 /* NSPLIT (output) INTEGER */ 00169 /* The number of blocks T splits into. 1 <= NSPLIT <= N. */ 00170 00171 /* ISPLIT (output) INTEGER array, dimension (N) */ 00172 /* The splitting points, at which T breaks up into blocks. */ 00173 /* The first block consists of rows/columns 1 to ISPLIT(1), */ 00174 /* the second of rows/columns ISPLIT(1)+1 through ISPLIT(2), */ 00175 /* etc., and the NSPLIT-th consists of rows/columns */ 00176 /* ISPLIT(NSPLIT-1)+1 through ISPLIT(NSPLIT)=N. */ 00177 00178 /* M (output) INTEGER */ 00179 /* The total number of eigenvalues (of all L_i D_i L_i^T) */ 00180 /* found. */ 00181 00182 /* W (output) DOUBLE PRECISION array, dimension (N) */ 00183 /* The first M elements contain the eigenvalues. The */ 00184 /* eigenvalues of each of the blocks, L_i D_i L_i^T, are */ 00185 /* sorted in ascending order ( DLARRE may use the */ 00186 /* remaining N-M elements as workspace). */ 00187 00188 /* WERR (output) DOUBLE PRECISION array, dimension (N) */ 00189 /* The error bound on the corresponding eigenvalue in W. */ 00190 00191 /* WGAP (output) DOUBLE PRECISION array, dimension (N) */ 00192 /* The separation from the right neighbor eigenvalue in W. */ 00193 /* The gap is only with respect to the eigenvalues of the same block */ 00194 /* as each block has its own representation tree. */ 00195 /* Exception: at the right end of a block we store the left gap */ 00196 00197 /* IBLOCK (output) INTEGER array, dimension (N) */ 00198 /* The indices of the blocks (submatrices) associated with the */ 00199 /* corresponding eigenvalues in W; IBLOCK(i)=1 if eigenvalue */ 00200 /* W(i) belongs to the first block from the top, =2 if W(i) */ 00201 /* belongs to the second block, etc. */ 00202 00203 /* INDEXW (output) INTEGER array, dimension (N) */ 00204 /* The indices of the eigenvalues within each block (submatrix); */ 00205 /* for example, INDEXW(i)= 10 and IBLOCK(i)=2 imply that the */ 00206 /* i-th eigenvalue W(i) is the 10-th eigenvalue in block 2 */ 00207 00208 /* GERS (output) DOUBLE PRECISION array, dimension (2*N) */ 00209 /* The N Gerschgorin intervals (the i-th Gerschgorin interval */ 00210 /* is (GERS(2*i-1), GERS(2*i)). */ 00211 00212 /* PIVMIN (output) DOUBLE PRECISION */ 00213 /* The minimum pivot in the Sturm sequence for T. */ 00214 00215 /* WORK (workspace) DOUBLE PRECISION array, dimension (6*N) */ 00216 /* Workspace. */ 00217 00218 /* IWORK (workspace) INTEGER array, dimension (5*N) */ 00219 /* Workspace. */ 00220 00221 /* INFO (output) INTEGER */ 00222 /* = 0: successful exit */ 00223 /* > 0: A problem occured in DLARRE. */ 00224 /* < 0: One of the called subroutines signaled an internal problem. */ 00225 /* Needs inspection of the corresponding parameter IINFO */ 00226 /* for further information. */ 00227 00228 /* =-1: Problem in DLARRD. */ 00229 /* = 2: No base representation could be found in MAXTRY iterations. */ 00230 /* Increasing MAXTRY and recompilation might be a remedy. */ 00231 /* =-3: Problem in DLARRB when computing the refined root */ 00232 /* representation for DLASQ2. */ 00233 /* =-4: Problem in DLARRB when preforming bisection on the */ 00234 /* desired part of the spectrum. */ 00235 /* =-5: Problem in DLASQ2. */ 00236 /* =-6: Problem in DLASQ2. */ 00237 00238 /* Further Details */ 00239 /* The base representations are required to suffer very little */ 00240 /* element growth and consequently define all their eigenvalues to */ 00241 /* high relative accuracy. */ 00242 /* =============== */ 00243 00244 /* Based on contributions by */ 00245 /* Beresford Parlett, University of California, Berkeley, USA */ 00246 /* Jim Demmel, University of California, Berkeley, USA */ 00247 /* Inderjit Dhillon, University of Texas, Austin, USA */ 00248 /* Osni Marques, LBNL/NERSC, USA */ 00249 /* Christof Voemel, University of California, Berkeley, USA */ 00250 00251 /* ===================================================================== */ 00252 00253 /* .. Parameters .. */ 00254 /* .. */ 00255 /* .. Local Scalars .. */ 00256 /* .. */ 00257 /* .. Local Arrays .. */ 00258 /* .. */ 00259 /* .. External Functions .. */ 00260 /* .. */ 00261 /* .. External Subroutines .. */ 00262 /* .. */ 00263 /* .. Intrinsic Functions .. */ 00264 /* .. */ 00265 /* .. Executable Statements .. */ 00266 00267 /* Parameter adjustments */ 00268 00269 00270 /* Table of constant values */ 00271 00272 integer c__1 = 1; 00273 integer c__2 = 2; 00274 00275 00276 --iwork; 00277 --work; 00278 --gers; 00279 --indexw; 00280 --iblock; 00281 --wgap; 00282 --werr; 00283 --w; 00284 --isplit; 00285 --e2; 00286 --e; 00287 --d__; 00288 00289 /* Initialization added by Elias to get rid of compiler warnings. */ 00290 mm = 0; 00291 /* Function Body */ 00292 *info = 0; 00293 00294 /* Decode RANGE */ 00295 00296 if (template_blas_lsame(range, "A")) { 00297 irange = 1; 00298 } else if (template_blas_lsame(range, "V")) { 00299 irange = 3; 00300 } else if (template_blas_lsame(range, "I")) { 00301 irange = 2; 00302 } 00303 *m = 0; 00304 /* Get machine constants */ 00305 safmin = template_lapack_lamch("S",(Treal)0); 00306 eps = template_lapack_lamch("P",(Treal)0); 00307 /* Set parameters */ 00308 rtl = template_blas_sqrt(eps); 00309 bsrtol = template_blas_sqrt(eps); 00310 /* Treat case of 1x1 matrix for quick return */ 00311 if (*n == 1) { 00312 if (irange == 1 || ( irange == 3 && d__[1] > *vl && d__[1] <= *vu ) || 00313 ( irange == 2 && *il == 1 && *iu == 1 ) ) { 00314 *m = 1; 00315 w[1] = d__[1]; 00316 /* The computation error of the eigenvalue is zero */ 00317 werr[1] = 0.; 00318 wgap[1] = 0.; 00319 iblock[1] = 1; 00320 indexw[1] = 1; 00321 gers[1] = d__[1]; 00322 gers[2] = d__[1]; 00323 } 00324 /* store the shift for the initial RRR, which is zero in this case */ 00325 e[1] = 0.; 00326 return 0; 00327 } 00328 /* General case: tridiagonal matrix of order > 1 */ 00329 00330 /* Init WERR, WGAP. Compute Gerschgorin intervals and spectral diameter. */ 00331 /* Compute maximum off-diagonal entry and pivmin. */ 00332 gl = d__[1]; 00333 gu = d__[1]; 00334 eold = 0.; 00335 emax = 0.; 00336 e[*n] = 0.; 00337 i__1 = *n; 00338 for (i__ = 1; i__ <= i__1; ++i__) { 00339 werr[i__] = 0.; 00340 wgap[i__] = 0.; 00341 eabs = (d__1 = e[i__], absMACRO(d__1)); 00342 if (eabs >= emax) { 00343 emax = eabs; 00344 } 00345 tmp1 = eabs + eold; 00346 gers[(i__ << 1) - 1] = d__[i__] - tmp1; 00347 /* Computing MIN */ 00348 d__1 = gl, d__2 = gers[(i__ << 1) - 1]; 00349 gl = minMACRO(d__1,d__2); 00350 gers[i__ * 2] = d__[i__] + tmp1; 00351 /* Computing MAX */ 00352 d__1 = gu, d__2 = gers[i__ * 2]; 00353 gu = maxMACRO(d__1,d__2); 00354 eold = eabs; 00355 /* L5: */ 00356 } 00357 /* The minimum pivot allowed in the Sturm sequence for T */ 00358 /* Computing MAX */ 00359 /* Computing 2nd power */ 00360 d__3 = emax; 00361 d__1 = 1., d__2 = d__3 * d__3; 00362 *pivmin = safmin * maxMACRO(d__1,d__2); 00363 /* Compute spectral diameter. The Gerschgorin bounds give an */ 00364 /* estimate that is wrong by at most a factor of SQRT(2) */ 00365 spdiam = gu - gl; 00366 /* Compute splitting points */ 00367 template_lapack_larra(n, &d__[1], &e[1], &e2[1], spltol, &spdiam, nsplit, &isplit[1], & 00368 iinfo); 00369 /* Can force use of bisection instead of faster DQDS. */ 00370 /* Option left in the code for future multisection work. */ 00371 forceb = FALSE_; 00372 /* Initialize USEDQD, DQDS should be used for ALLRNG unless someone */ 00373 /* explicitly wants bisection. */ 00374 usedqd = irange == 1 && ! forceb; 00375 if (irange == 1 && ! forceb) { 00376 /* Set interval [VL,VU] that contains all eigenvalues */ 00377 *vl = gl; 00378 *vu = gu; 00379 } else { 00380 /* We call DLARRD to find crude approximations to the eigenvalues */ 00381 /* in the desired range. In case IRANGE = INDRNG, we also obtain the */ 00382 /* interval (VL,VU] that contains all the wanted eigenvalues. */ 00383 /* An interval [LEFT,RIGHT] has converged if */ 00384 /* RIGHT-LEFT.LT.RTOL*MAX(ABS(LEFT),ABS(RIGHT)) */ 00385 /* DLARRD needs a WORK of size 4*N, IWORK of size 3*N */ 00386 template_lapack_larrd(range, "B", n, vl, vu, il, iu, &gers[1], &bsrtol, &d__[1], &e[ 00387 1], &e2[1], pivmin, nsplit, &isplit[1], &mm, &w[1], &werr[1], 00388 vl, vu, &iblock[1], &indexw[1], &work[1], &iwork[1], &iinfo); 00389 if (iinfo != 0) { 00390 *info = -1; 00391 return 0; 00392 } 00393 /* Make sure that the entries M+1 to N in W, WERR, IBLOCK, INDEXW are 0 */ 00394 i__1 = *n; 00395 for (i__ = mm + 1; i__ <= i__1; ++i__) { 00396 w[i__] = 0.; 00397 werr[i__] = 0.; 00398 iblock[i__] = 0; 00399 indexw[i__] = 0; 00400 /* L14: */ 00401 } 00402 } 00403 /* ** */ 00404 /* Loop over unreduced blocks */ 00405 ibegin = 1; 00406 wbegin = 1; 00407 i__1 = *nsplit; 00408 for (jblk = 1; jblk <= i__1; ++jblk) { 00409 iend = isplit[jblk]; 00410 in = iend - ibegin + 1; 00411 /* 1 X 1 block */ 00412 if (in == 1) { 00413 if (irange == 1 || ( irange == 3 && d__[ibegin] > *vl && d__[ibegin] 00414 <= *vu ) || ( irange == 2 && iblock[wbegin] == jblk ) ) { 00415 ++(*m); 00416 w[*m] = d__[ibegin]; 00417 werr[*m] = 0.; 00418 /* The gap for a single block doesn't matter for the later */ 00419 /* algorithm and is assigned an arbitrary large value */ 00420 wgap[*m] = 0.; 00421 iblock[*m] = jblk; 00422 indexw[*m] = 1; 00423 ++wbegin; 00424 } 00425 /* E( IEND ) holds the shift for the initial RRR */ 00426 e[iend] = 0.; 00427 ibegin = iend + 1; 00428 goto L170; 00429 } 00430 00431 /* Blocks of size larger than 1x1 */ 00432 00433 /* E( IEND ) will hold the shift for the initial RRR, for now set it =0 */ 00434 e[iend] = 0.; 00435 00436 /* Find local outer bounds GL,GU for the block */ 00437 gl = d__[ibegin]; 00438 gu = d__[ibegin]; 00439 i__2 = iend; 00440 for (i__ = ibegin; i__ <= i__2; ++i__) { 00441 /* Computing MIN */ 00442 d__1 = gers[(i__ << 1) - 1]; 00443 gl = minMACRO(d__1,gl); 00444 /* Computing MAX */ 00445 d__1 = gers[i__ * 2]; 00446 gu = maxMACRO(d__1,gu); 00447 /* L15: */ 00448 } 00449 spdiam = gu - gl; 00450 if (! (irange == 1 && ! forceb)) { 00451 /* Count the number of eigenvalues in the current block. */ 00452 mb = 0; 00453 i__2 = mm; 00454 for (i__ = wbegin; i__ <= i__2; ++i__) { 00455 if (iblock[i__] == jblk) { 00456 ++mb; 00457 } else { 00458 goto L21; 00459 } 00460 /* L20: */ 00461 } 00462 L21: 00463 if (mb == 0) { 00464 /* No eigenvalue in the current block lies in the desired range */ 00465 /* E( IEND ) holds the shift for the initial RRR */ 00466 e[iend] = 0.; 00467 ibegin = iend + 1; 00468 goto L170; 00469 } else { 00470 /* Decide whether dqds or bisection is more efficient */ 00471 usedqd = (Treal) mb > in * .5 && ! forceb; 00472 wend = wbegin + mb - 1; 00473 /* Calculate gaps for the current block */ 00474 /* In later stages, when representations for individual */ 00475 /* eigenvalues are different, we use SIGMA = E( IEND ). */ 00476 sigma = 0.; 00477 i__2 = wend - 1; 00478 for (i__ = wbegin; i__ <= i__2; ++i__) { 00479 /* Computing MAX */ 00480 d__1 = 0., d__2 = w[i__ + 1] - werr[i__ + 1] - (w[i__] + 00481 werr[i__]); 00482 wgap[i__] = maxMACRO(d__1,d__2); 00483 /* L30: */ 00484 } 00485 /* Computing MAX */ 00486 d__1 = 0., d__2 = *vu - sigma - (w[wend] + werr[wend]); 00487 wgap[wend] = maxMACRO(d__1,d__2); 00488 /* Find local index of the first and last desired evalue. */ 00489 indl = indexw[wbegin]; 00490 indu = indexw[wend]; 00491 } 00492 } 00493 if ( ( irange == 1 && ! forceb ) || usedqd) { 00494 /* Case of DQDS */ 00495 /* Find approximations to the extremal eigenvalues of the block */ 00496 template_lapack_larrk(&in, &c__1, &gl, &gu, &d__[ibegin], &e2[ibegin], pivmin, & 00497 rtl, &tmp, &tmp1, &iinfo); 00498 if (iinfo != 0) { 00499 *info = -1; 00500 return 0; 00501 } 00502 /* Computing MAX */ 00503 d__2 = gl, d__3 = tmp - tmp1 - eps * 100. * (d__1 = tmp - tmp1, 00504 absMACRO(d__1)); 00505 isleft = maxMACRO(d__2,d__3); 00506 template_lapack_larrk(&in, &in, &gl, &gu, &d__[ibegin], &e2[ibegin], pivmin, & 00507 rtl, &tmp, &tmp1, &iinfo); 00508 if (iinfo != 0) { 00509 *info = -1; 00510 return 0; 00511 } 00512 /* Computing MIN */ 00513 d__2 = gu, d__3 = tmp + tmp1 + eps * 100. * (d__1 = tmp + tmp1, 00514 absMACRO(d__1)); 00515 isrght = minMACRO(d__2,d__3); 00516 /* Improve the estimate of the spectral diameter */ 00517 spdiam = isrght - isleft; 00518 } else { 00519 /* Case of bisection */ 00520 /* Find approximations to the wanted extremal eigenvalues */ 00521 /* Computing MAX */ 00522 d__2 = gl, d__3 = w[wbegin] - werr[wbegin] - eps * 100. * (d__1 = 00523 w[wbegin] - werr[wbegin], absMACRO(d__1)); 00524 isleft = maxMACRO(d__2,d__3); 00525 /* Computing MIN */ 00526 d__2 = gu, d__3 = w[wend] + werr[wend] + eps * 100. * (d__1 = w[ 00527 wend] + werr[wend], absMACRO(d__1)); 00528 isrght = minMACRO(d__2,d__3); 00529 } 00530 /* Decide whether the base representation for the current block */ 00531 /* L_JBLK D_JBLK L_JBLK^T = T_JBLK - sigma_JBLK I */ 00532 /* should be on the left or the right end of the current block. */ 00533 /* The strategy is to shift to the end which is "more populated" */ 00534 /* Furthermore, decide whether to use DQDS for the computation of */ 00535 /* the eigenvalue approximations at the end of DLARRE or bisection. */ 00536 /* dqds is chosen if all eigenvalues are desired or the number of */ 00537 /* eigenvalues to be computed is large compared to the blocksize. */ 00538 if (irange == 1 && ! forceb) { 00539 /* If all the eigenvalues have to be computed, we use dqd */ 00540 usedqd = TRUE_; 00541 /* INDL is the local index of the first eigenvalue to compute */ 00542 indl = 1; 00543 indu = in; 00544 /* MB = number of eigenvalues to compute */ 00545 mb = in; 00546 wend = wbegin + mb - 1; 00547 /* Define 1/4 and 3/4 points of the spectrum */ 00548 s1 = isleft + spdiam * .25; 00549 s2 = isrght - spdiam * .25; 00550 } else { 00551 /* DLARRD has computed IBLOCK and INDEXW for each eigenvalue */ 00552 /* approximation. */ 00553 /* choose sigma */ 00554 if (usedqd) { 00555 s1 = isleft + spdiam * .25; 00556 s2 = isrght - spdiam * .25; 00557 } else { 00558 tmp = minMACRO(isrght,*vu) - maxMACRO(isleft,*vl); 00559 s1 = maxMACRO(isleft,*vl) + tmp * .25; 00560 s2 = minMACRO(isrght,*vu) - tmp * .25; 00561 } 00562 } 00563 /* Compute the negcount at the 1/4 and 3/4 points */ 00564 if (mb > 1) { 00565 template_lapack_larrc("T", &in, &s1, &s2, &d__[ibegin], &e[ibegin], pivmin, & 00566 cnt, &cnt1, &cnt2, &iinfo); 00567 } 00568 if (mb == 1) { 00569 sigma = gl; 00570 sgndef = 1.; 00571 } else if (cnt1 - indl >= indu - cnt2) { 00572 if (irange == 1 && ! forceb) { 00573 sigma = maxMACRO(isleft,gl); 00574 } else if (usedqd) { 00575 /* use Gerschgorin bound as shift to get pos def matrix */ 00576 /* for dqds */ 00577 sigma = isleft; 00578 } else { 00579 /* use approximation of the first desired eigenvalue of the */ 00580 /* block as shift */ 00581 sigma = maxMACRO(isleft,*vl); 00582 } 00583 sgndef = 1.; 00584 } else { 00585 if (irange == 1 && ! forceb) { 00586 sigma = minMACRO(isrght,gu); 00587 } else if (usedqd) { 00588 /* use Gerschgorin bound as shift to get neg def matrix */ 00589 /* for dqds */ 00590 sigma = isrght; 00591 } else { 00592 /* use approximation of the first desired eigenvalue of the */ 00593 /* block as shift */ 00594 sigma = minMACRO(isrght,*vu); 00595 } 00596 sgndef = -1.; 00597 } 00598 /* An initial SIGMA has been chosen that will be used for computing */ 00599 /* T - SIGMA I = L D L^T */ 00600 /* Define the increment TAU of the shift in case the initial shift */ 00601 /* needs to be refined to obtain a factorization with not too much */ 00602 /* element growth. */ 00603 if (usedqd) { 00604 /* The initial SIGMA was to the outer end of the spectrum */ 00605 /* the matrix is definite and we need not retreat. */ 00606 tau = spdiam * eps * *n + *pivmin * 2.; 00607 } else { 00608 if (mb > 1) { 00609 clwdth = w[wend] + werr[wend] - w[wbegin] - werr[wbegin]; 00610 avgap = (d__1 = clwdth / (Treal) (wend - wbegin), absMACRO( 00611 d__1)); 00612 if (sgndef == 1.) { 00613 /* Computing MAX */ 00614 d__1 = wgap[wbegin]; 00615 tau = maxMACRO(d__1,avgap) * .5; 00616 /* Computing MAX */ 00617 d__1 = tau, d__2 = werr[wbegin]; 00618 tau = maxMACRO(d__1,d__2); 00619 } else { 00620 /* Computing MAX */ 00621 d__1 = wgap[wend - 1]; 00622 tau = maxMACRO(d__1,avgap) * .5; 00623 /* Computing MAX */ 00624 d__1 = tau, d__2 = werr[wend]; 00625 tau = maxMACRO(d__1,d__2); 00626 } 00627 } else { 00628 tau = werr[wbegin]; 00629 } 00630 } 00631 00632 for (idum = 1; idum <= 6; ++idum) { 00633 /* Compute L D L^T factorization of tridiagonal matrix T - sigma I. */ 00634 /* Store D in WORK(1:IN), L in WORK(IN+1:2*IN), and reciprocals of */ 00635 /* pivots in WORK(2*IN+1:3*IN) */ 00636 dpivot = d__[ibegin] - sigma; 00637 work[1] = dpivot; 00638 dmax__ = absMACRO(work[1]); 00639 j = ibegin; 00640 i__2 = in - 1; 00641 for (i__ = 1; i__ <= i__2; ++i__) { 00642 work[(in << 1) + i__] = 1. / work[i__]; 00643 tmp = e[j] * work[(in << 1) + i__]; 00644 work[in + i__] = tmp; 00645 dpivot = d__[j + 1] - sigma - tmp * e[j]; 00646 work[i__ + 1] = dpivot; 00647 /* Computing MAX */ 00648 d__1 = dmax__, d__2 = absMACRO(dpivot); 00649 dmax__ = maxMACRO(d__1,d__2); 00650 ++j; 00651 /* L70: */ 00652 } 00653 /* check for element growth */ 00654 if (dmax__ > spdiam * 64.) { 00655 norep = TRUE_; 00656 } else { 00657 norep = FALSE_; 00658 } 00659 if (usedqd && ! norep) { 00660 /* Ensure the definiteness of the representation */ 00661 /* All entries of D (of L D L^T) must have the same sign */ 00662 i__2 = in; 00663 for (i__ = 1; i__ <= i__2; ++i__) { 00664 tmp = sgndef * work[i__]; 00665 if (tmp < 0.) { 00666 norep = TRUE_; 00667 } 00668 /* L71: */ 00669 } 00670 } 00671 if (norep) { 00672 /* Note that in the case of IRANGE=ALLRNG, we use the Gerschgorin */ 00673 /* shift which makes the matrix definite. So we should end up */ 00674 /* here really only in the case of IRANGE = VALRNG or INDRNG. */ 00675 if (idum == 5) { 00676 if (sgndef == 1.) { 00677 /* The fudged Gerschgorin shift should succeed */ 00678 sigma = gl - spdiam * 2. * eps * *n - *pivmin * 4.; 00679 } else { 00680 sigma = gu + spdiam * 2. * eps * *n + *pivmin * 4.; 00681 } 00682 } else { 00683 sigma -= sgndef * tau; 00684 tau *= 2.; 00685 } 00686 } else { 00687 /* an initial RRR is found */ 00688 goto L83; 00689 } 00690 /* L80: */ 00691 } 00692 /* if the program reaches this point, no base representation could be */ 00693 /* found in MAXTRY iterations. */ 00694 *info = 2; 00695 return 0; 00696 L83: 00697 /* At this point, we have found an initial base representation */ 00698 /* T - SIGMA I = L D L^T with not too much element growth. */ 00699 /* Store the shift. */ 00700 e[iend] = sigma; 00701 /* Store D and L. */ 00702 template_blas_copy(&in, &work[1], &c__1, &d__[ibegin], &c__1); 00703 i__2 = in - 1; 00704 template_blas_copy(&i__2, &work[in + 1], &c__1, &e[ibegin], &c__1); 00705 if (mb > 1) { 00706 00707 /* Perturb each entry of the base representation by a small */ 00708 /* (but random) relative amount to overcome difficulties with */ 00709 /* glued matrices. */ 00710 00711 for (i__ = 1; i__ <= 4; ++i__) { 00712 iseed[i__ - 1] = 1; 00713 /* L122: */ 00714 } 00715 i__2 = (in << 1) - 1; 00716 template_lapack_larnv(&c__2, iseed, &i__2, &work[1]); 00717 i__2 = in - 1; 00718 for (i__ = 1; i__ <= i__2; ++i__) { 00719 d__[ibegin + i__ - 1] *= eps * 8. * work[i__] + 1.; 00720 e[ibegin + i__ - 1] *= eps * 8. * work[in + i__] + 1.; 00721 /* L125: */ 00722 } 00723 d__[iend] *= eps * 4. * work[in] + 1.; 00724 00725 } 00726 00727 /* Don't update the Gerschgorin intervals because keeping track */ 00728 /* of the updates would be too much work in DLARRV. */ 00729 /* We update W instead and use it to locate the proper Gerschgorin */ 00730 /* intervals. */ 00731 /* Compute the required eigenvalues of L D L' by bisection or dqds */ 00732 if (! usedqd) { 00733 /* If DLARRD has been used, shift the eigenvalue approximations */ 00734 /* according to their representation. This is necessary for */ 00735 /* a uniform DLARRV since dqds computes eigenvalues of the */ 00736 /* shifted representation. In DLARRV, W will always hold the */ 00737 /* UNshifted eigenvalue approximation. */ 00738 i__2 = wend; 00739 for (j = wbegin; j <= i__2; ++j) { 00740 w[j] -= sigma; 00741 werr[j] += (d__1 = w[j], absMACRO(d__1)) * eps; 00742 /* L134: */ 00743 } 00744 /* call DLARRB to reduce eigenvalue error of the approximations */ 00745 /* from DLARRD */ 00746 i__2 = iend - 1; 00747 for (i__ = ibegin; i__ <= i__2; ++i__) { 00748 /* Computing 2nd power */ 00749 d__1 = e[i__]; 00750 work[i__] = d__[i__] * (d__1 * d__1); 00751 /* L135: */ 00752 } 00753 /* use bisection to find EV from INDL to INDU */ 00754 i__2 = indl - 1; 00755 template_lapack_larrb(&in, &d__[ibegin], &work[ibegin], &indl, &indu, rtol1, 00756 rtol2, &i__2, &w[wbegin], &wgap[wbegin], &werr[wbegin], & 00757 work[(*n << 1) + 1], &iwork[1], pivmin, &spdiam, &in, & 00758 iinfo); 00759 if (iinfo != 0) { 00760 *info = -4; 00761 return 0; 00762 } 00763 /* DLARRB computes all gaps correctly except for the last one */ 00764 /* Record distance to VU/GU */ 00765 /* Computing MAX */ 00766 d__1 = 0., d__2 = *vu - sigma - (w[wend] + werr[wend]); 00767 wgap[wend] = maxMACRO(d__1,d__2); 00768 i__2 = indu; 00769 for (i__ = indl; i__ <= i__2; ++i__) { 00770 ++(*m); 00771 iblock[*m] = jblk; 00772 indexw[*m] = i__; 00773 /* L138: */ 00774 } 00775 } else { 00776 /* Call dqds to get all eigs (and then possibly delete unwanted */ 00777 /* eigenvalues). */ 00778 /* Note that dqds finds the eigenvalues of the L D L^T representation */ 00779 /* of T to high relative accuracy. High relative accuracy */ 00780 /* might be lost when the shift of the RRR is subtracted to obtain */ 00781 /* the eigenvalues of T. However, T is not guaranteed to define its */ 00782 /* eigenvalues to high relative accuracy anyway. */ 00783 /* Set RTOL to the order of the tolerance used in DLASQ2 */ 00784 /* This is an ESTIMATED error, the worst case bound is 4*N*EPS */ 00785 /* which is usually too large and requires unnecessary work to be */ 00786 /* done by bisection when computing the eigenvectors */ 00787 rtol = template_blas_log((Treal) in) * 4. * eps; 00788 j = ibegin; 00789 i__2 = in - 1; 00790 for (i__ = 1; i__ <= i__2; ++i__) { 00791 work[(i__ << 1) - 1] = (d__1 = d__[j], absMACRO(d__1)); 00792 work[i__ * 2] = e[j] * e[j] * work[(i__ << 1) - 1]; 00793 ++j; 00794 /* L140: */ 00795 } 00796 work[(in << 1) - 1] = (d__1 = d__[iend], absMACRO(d__1)); 00797 work[in * 2] = 0.; 00798 template_lapack_lasq2(&in, &work[1], &iinfo); 00799 if (iinfo != 0) { 00800 /* If IINFO = -5 then an index is part of a tight cluster */ 00801 /* and should be changed. The index is in IWORK(1) and the */ 00802 /* gap is in WORK(N+1) */ 00803 *info = -5; 00804 return 0; 00805 } else { 00806 /* Test that all eigenvalues are positive as expected */ 00807 i__2 = in; 00808 for (i__ = 1; i__ <= i__2; ++i__) { 00809 if (work[i__] < 0.) { 00810 *info = -6; 00811 return 0; 00812 } 00813 /* L149: */ 00814 } 00815 } 00816 if (sgndef > 0.) { 00817 i__2 = indu; 00818 for (i__ = indl; i__ <= i__2; ++i__) { 00819 ++(*m); 00820 w[*m] = work[in - i__ + 1]; 00821 iblock[*m] = jblk; 00822 indexw[*m] = i__; 00823 /* L150: */ 00824 } 00825 } else { 00826 i__2 = indu; 00827 for (i__ = indl; i__ <= i__2; ++i__) { 00828 ++(*m); 00829 w[*m] = -work[i__]; 00830 iblock[*m] = jblk; 00831 indexw[*m] = i__; 00832 /* L160: */ 00833 } 00834 } 00835 i__2 = *m; 00836 for (i__ = *m - mb + 1; i__ <= i__2; ++i__) { 00837 /* the value of RTOL below should be the tolerance in DLASQ2 */ 00838 werr[i__] = rtol * (d__1 = w[i__], absMACRO(d__1)); 00839 /* L165: */ 00840 } 00841 i__2 = *m - 1; 00842 for (i__ = *m - mb + 1; i__ <= i__2; ++i__) { 00843 /* compute the right gap between the intervals */ 00844 /* Computing MAX */ 00845 d__1 = 0., d__2 = w[i__ + 1] - werr[i__ + 1] - (w[i__] + werr[ 00846 i__]); 00847 wgap[i__] = maxMACRO(d__1,d__2); 00848 /* L166: */ 00849 } 00850 /* Computing MAX */ 00851 d__1 = 0., d__2 = *vu - sigma - (w[*m] + werr[*m]); 00852 wgap[*m] = maxMACRO(d__1,d__2); 00853 } 00854 /* proceed with next block */ 00855 ibegin = iend + 1; 00856 wbegin = wend + 1; 00857 L170: 00858 ; 00859 } 00860 00861 return 0; 00862 00863 /* end of DLARRE */ 00864 00865 } /* dlarre_ */ 00866 00867 #endif