ergo
|
00001 /* Ergo, version 3.2, a program for linear scaling electronic structure 00002 * calculations. 00003 * Copyright (C) 2012 Elias Rudberg, Emanuel H. Rubensson, and Pawel Salek. 00004 * 00005 * This program is free software: you can redistribute it and/or modify 00006 * it under the terms of the GNU General Public License as published by 00007 * the Free Software Foundation, either version 3 of the License, or 00008 * (at your option) any later version. 00009 * 00010 * This program is distributed in the hope that it will be useful, 00011 * but WITHOUT ANY WARRANTY; without even the implied warranty of 00012 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00013 * GNU General Public License for more details. 00014 * 00015 * You should have received a copy of the GNU General Public License 00016 * along with this program. If not, see <http://www.gnu.org/licenses/>. 00017 * 00018 * Primary academic reference: 00019 * KohnâSham Density Functional Theory Electronic Structure Calculations 00020 * with Linearly Scaling Computational Time and Memory Usage, 00021 * Elias Rudberg, Emanuel H. Rubensson, and Pawel Salek, 00022 * J. Chem. Theory Comput. 7, 340 (2011), 00023 * <http://dx.doi.org/10.1021/ct100611z> 00024 * 00025 * For further information about Ergo, see <http://www.ergoscf.org>. 00026 */ 00027 00028 /* This file belongs to the template_lapack part of the Ergo source 00029 * code. The source files in the template_lapack directory are modified 00030 * versions of files originally distributed as CLAPACK, see the 00031 * Copyright/license notice in the file template_lapack/COPYING. 00032 */ 00033 00034 00035 #ifndef TEMPLATE_LAPACK_STEQR_HEADER 00036 #define TEMPLATE_LAPACK_STEQR_HEADER 00037 00038 00039 template<class Treal> 00040 int template_lapack_steqr(const char *compz, const integer *n, Treal *d__, 00041 Treal *e, Treal *z__, const integer *ldz, Treal *work, 00042 integer *info) 00043 { 00044 /* -- LAPACK routine (version 3.0) -- 00045 Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., 00046 Courant Institute, Argonne National Lab, and Rice University 00047 September 30, 1994 00048 00049 00050 Purpose 00051 ======= 00052 00053 DSTEQR computes all eigenvalues and, optionally, eigenvectors of a 00054 symmetric tridiagonal matrix using the implicit QL or QR method. 00055 The eigenvectors of a full or band symmetric matrix can also be found 00056 if DSYTRD or DSPTRD or DSBTRD has been used to reduce this matrix to 00057 tridiagonal form. 00058 00059 Arguments 00060 ========= 00061 00062 COMPZ (input) CHARACTER*1 00063 = 'N': Compute eigenvalues only. 00064 = 'V': Compute eigenvalues and eigenvectors of the original 00065 symmetric matrix. On entry, Z must contain the 00066 orthogonal matrix used to reduce the original matrix 00067 to tridiagonal form. 00068 = 'I': Compute eigenvalues and eigenvectors of the 00069 tridiagonal matrix. Z is initialized to the identity 00070 matrix. 00071 00072 N (input) INTEGER 00073 The order of the matrix. N >= 0. 00074 00075 D (input/output) DOUBLE PRECISION array, dimension (N) 00076 On entry, the diagonal elements of the tridiagonal matrix. 00077 On exit, if INFO = 0, the eigenvalues in ascending order. 00078 00079 E (input/output) DOUBLE PRECISION array, dimension (N-1) 00080 On entry, the (n-1) subdiagonal elements of the tridiagonal 00081 matrix. 00082 On exit, E has been destroyed. 00083 00084 Z (input/output) DOUBLE PRECISION array, dimension (LDZ, N) 00085 On entry, if COMPZ = 'V', then Z contains the orthogonal 00086 matrix used in the reduction to tridiagonal form. 00087 On exit, if INFO = 0, then if COMPZ = 'V', Z contains the 00088 orthonormal eigenvectors of the original symmetric matrix, 00089 and if COMPZ = 'I', Z contains the orthonormal eigenvectors 00090 of the symmetric tridiagonal matrix. 00091 If COMPZ = 'N', then Z is not referenced. 00092 00093 LDZ (input) INTEGER 00094 The leading dimension of the array Z. LDZ >= 1, and if 00095 eigenvectors are desired, then LDZ >= max(1,N). 00096 00097 WORK (workspace) DOUBLE PRECISION array, dimension (max(1,2*N-2)) 00098 If COMPZ = 'N', then WORK is not referenced. 00099 00100 INFO (output) INTEGER 00101 = 0: successful exit 00102 < 0: if INFO = -i, the i-th argument had an illegal value 00103 > 0: the algorithm has failed to find all the eigenvalues in 00104 a total of 30*N iterations; if INFO = i, then i 00105 elements of E have not converged to zero; on exit, D 00106 and E contain the elements of a symmetric tridiagonal 00107 matrix which is orthogonally similar to the original 00108 matrix. 00109 00110 ===================================================================== 00111 00112 00113 Test the input parameters. 00114 00115 Parameter adjustments */ 00116 /* Table of constant values */ 00117 Treal c_b9 = 0.; 00118 Treal c_b10 = 1.; 00119 integer c__0 = 0; 00120 integer c__1 = 1; 00121 integer c__2 = 2; 00122 00123 /* System generated locals */ 00124 integer z_dim1, z_offset, i__1, i__2; 00125 Treal d__1, d__2; 00126 /* Local variables */ 00127 integer lend, jtot; 00128 Treal b, c__, f, g; 00129 integer i__, j, k, l, m; 00130 Treal p, r__, s; 00131 Treal anorm; 00132 integer l1; 00133 integer lendm1, lendp1; 00134 integer ii; 00135 integer mm, iscale; 00136 Treal safmin; 00137 Treal safmax; 00138 integer lendsv; 00139 Treal ssfmin; 00140 integer nmaxit, icompz; 00141 Treal ssfmax; 00142 integer lm1, mm1, nm1; 00143 Treal rt1, rt2, eps; 00144 integer lsv; 00145 Treal tst, eps2; 00146 #define z___ref(a_1,a_2) z__[(a_2)*z_dim1 + a_1] 00147 00148 00149 --d__; 00150 --e; 00151 z_dim1 = *ldz; 00152 z_offset = 1 + z_dim1 * 1; 00153 z__ -= z_offset; 00154 --work; 00155 00156 /* Function Body */ 00157 *info = 0; 00158 00159 if (template_blas_lsame(compz, "N")) { 00160 icompz = 0; 00161 } else if (template_blas_lsame(compz, "V")) { 00162 icompz = 1; 00163 } else if (template_blas_lsame(compz, "I")) { 00164 icompz = 2; 00165 } else { 00166 icompz = -1; 00167 } 00168 if (icompz < 0) { 00169 *info = -1; 00170 } else if (*n < 0) { 00171 *info = -2; 00172 } else if (*ldz < 1 || (icompz > 0 && *ldz < maxMACRO(1,*n) ) ) { 00173 *info = -6; 00174 } 00175 if (*info != 0) { 00176 i__1 = -(*info); 00177 template_blas_erbla("STEQR ", &i__1); 00178 return 0; 00179 } 00180 00181 /* Quick return if possible */ 00182 00183 if (*n == 0) { 00184 return 0; 00185 } 00186 00187 if (*n == 1) { 00188 if (icompz == 2) { 00189 z___ref(1, 1) = 1.; 00190 } 00191 return 0; 00192 } 00193 00194 /* Determine the unit roundoff and over/underflow thresholds. */ 00195 00196 eps = template_lapack_lamch("E", (Treal)0); 00197 /* Computing 2nd power */ 00198 d__1 = eps; 00199 eps2 = d__1 * d__1; 00200 safmin = template_lapack_lamch("S", (Treal)0); 00201 safmax = 1. / safmin; 00202 ssfmax = template_blas_sqrt(safmax) / 3.; 00203 ssfmin = template_blas_sqrt(safmin) / eps2; 00204 00205 /* Compute the eigenvalues and eigenvectors of the tridiagonal 00206 matrix. */ 00207 00208 if (icompz == 2) { 00209 template_lapack_laset("Full", n, n, &c_b9, &c_b10, &z__[z_offset], ldz); 00210 } 00211 00212 nmaxit = *n * 30; 00213 jtot = 0; 00214 00215 /* Determine where the matrix splits and choose QL or QR iteration 00216 for each block, according to whether top or bottom diagonal 00217 element is smaller. */ 00218 00219 l1 = 1; 00220 nm1 = *n - 1; 00221 00222 L10: 00223 if (l1 > *n) { 00224 goto L160; 00225 } 00226 if (l1 > 1) { 00227 e[l1 - 1] = 0.; 00228 } 00229 if (l1 <= nm1) { 00230 i__1 = nm1; 00231 for (m = l1; m <= i__1; ++m) { 00232 tst = (d__1 = e[m], absMACRO(d__1)); 00233 if (tst == 0.) { 00234 goto L30; 00235 } 00236 if (tst <= template_blas_sqrt((d__1 = d__[m], absMACRO(d__1))) * template_blas_sqrt((d__2 = d__[m 00237 + 1], absMACRO(d__2))) * eps) { 00238 e[m] = 0.; 00239 goto L30; 00240 } 00241 /* L20: */ 00242 } 00243 } 00244 m = *n; 00245 00246 L30: 00247 l = l1; 00248 lsv = l; 00249 lend = m; 00250 lendsv = lend; 00251 l1 = m + 1; 00252 if (lend == l) { 00253 goto L10; 00254 } 00255 00256 /* Scale submatrix in rows and columns L to LEND */ 00257 00258 i__1 = lend - l + 1; 00259 anorm = template_lapack_lanst("I", &i__1, &d__[l], &e[l]); 00260 iscale = 0; 00261 if (anorm == 0.) { 00262 goto L10; 00263 } 00264 if (anorm > ssfmax) { 00265 iscale = 1; 00266 i__1 = lend - l + 1; 00267 template_lapack_lascl("G", &c__0, &c__0, &anorm, &ssfmax, &i__1, &c__1, &d__[l], n, 00268 info); 00269 i__1 = lend - l; 00270 template_lapack_lascl("G", &c__0, &c__0, &anorm, &ssfmax, &i__1, &c__1, &e[l], n, 00271 info); 00272 } else if (anorm < ssfmin) { 00273 iscale = 2; 00274 i__1 = lend - l + 1; 00275 template_lapack_lascl("G", &c__0, &c__0, &anorm, &ssfmin, &i__1, &c__1, &d__[l], n, 00276 info); 00277 i__1 = lend - l; 00278 template_lapack_lascl("G", &c__0, &c__0, &anorm, &ssfmin, &i__1, &c__1, &e[l], n, 00279 info); 00280 } 00281 00282 /* Choose between QL and QR iteration */ 00283 00284 if ((d__1 = d__[lend], absMACRO(d__1)) < (d__2 = d__[l], absMACRO(d__2))) { 00285 lend = lsv; 00286 l = lendsv; 00287 } 00288 00289 if (lend > l) { 00290 00291 /* QL Iteration 00292 00293 Look for small subdiagonal element. */ 00294 00295 L40: 00296 if (l != lend) { 00297 lendm1 = lend - 1; 00298 i__1 = lendm1; 00299 for (m = l; m <= i__1; ++m) { 00300 /* Computing 2nd power */ 00301 d__2 = (d__1 = e[m], absMACRO(d__1)); 00302 tst = d__2 * d__2; 00303 if (tst <= eps2 * (d__1 = d__[m], absMACRO(d__1)) * (d__2 = d__[m 00304 + 1], absMACRO(d__2)) + safmin) { 00305 goto L60; 00306 } 00307 /* L50: */ 00308 } 00309 } 00310 00311 m = lend; 00312 00313 L60: 00314 if (m < lend) { 00315 e[m] = 0.; 00316 } 00317 p = d__[l]; 00318 if (m == l) { 00319 goto L80; 00320 } 00321 00322 /* If remaining matrix is 2-by-2, use DLAE2 or SLAEV2 00323 to compute its eigensystem. */ 00324 00325 if (m == l + 1) { 00326 if (icompz > 0) { 00327 template_lapack_laev2(&d__[l], &e[l], &d__[l + 1], &rt1, &rt2, &c__, &s); 00328 work[l] = c__; 00329 work[*n - 1 + l] = s; 00330 template_lapack_lasr("R", "V", "B", n, &c__2, &work[l], &work[*n - 1 + l], & 00331 z___ref(1, l), ldz); 00332 } else { 00333 template_lapack_lae2(&d__[l], &e[l], &d__[l + 1], &rt1, &rt2); 00334 } 00335 d__[l] = rt1; 00336 d__[l + 1] = rt2; 00337 e[l] = 0.; 00338 l += 2; 00339 if (l <= lend) { 00340 goto L40; 00341 } 00342 goto L140; 00343 } 00344 00345 if (jtot == nmaxit) { 00346 goto L140; 00347 } 00348 ++jtot; 00349 00350 /* Form shift. */ 00351 00352 g = (d__[l + 1] - p) / (e[l] * 2.); 00353 r__ = template_lapack_lapy2(&g, &c_b10); 00354 g = d__[m] - p + e[l] / (g + template_lapack_d_sign(&r__, &g)); 00355 00356 s = 1.; 00357 c__ = 1.; 00358 p = 0.; 00359 00360 /* Inner loop */ 00361 00362 mm1 = m - 1; 00363 i__1 = l; 00364 for (i__ = mm1; i__ >= i__1; --i__) { 00365 f = s * e[i__]; 00366 b = c__ * e[i__]; 00367 template_lapack_lartg(&g, &f, &c__, &s, &r__); 00368 if (i__ != m - 1) { 00369 e[i__ + 1] = r__; 00370 } 00371 g = d__[i__ + 1] - p; 00372 r__ = (d__[i__] - g) * s + c__ * 2. * b; 00373 p = s * r__; 00374 d__[i__ + 1] = g + p; 00375 g = c__ * r__ - b; 00376 00377 /* If eigenvectors are desired, then save rotations. */ 00378 00379 if (icompz > 0) { 00380 work[i__] = c__; 00381 work[*n - 1 + i__] = -s; 00382 } 00383 00384 /* L70: */ 00385 } 00386 00387 /* If eigenvectors are desired, then apply saved rotations. */ 00388 00389 if (icompz > 0) { 00390 mm = m - l + 1; 00391 template_lapack_lasr("R", "V", "B", n, &mm, &work[l], &work[*n - 1 + l], & 00392 z___ref(1, l), ldz); 00393 } 00394 00395 d__[l] -= p; 00396 e[l] = g; 00397 goto L40; 00398 00399 /* Eigenvalue found. */ 00400 00401 L80: 00402 d__[l] = p; 00403 00404 ++l; 00405 if (l <= lend) { 00406 goto L40; 00407 } 00408 goto L140; 00409 00410 } else { 00411 00412 /* QR Iteration 00413 00414 Look for small superdiagonal element. */ 00415 00416 L90: 00417 if (l != lend) { 00418 lendp1 = lend + 1; 00419 i__1 = lendp1; 00420 for (m = l; m >= i__1; --m) { 00421 /* Computing 2nd power */ 00422 d__2 = (d__1 = e[m - 1], absMACRO(d__1)); 00423 tst = d__2 * d__2; 00424 if (tst <= eps2 * (d__1 = d__[m], absMACRO(d__1)) * (d__2 = d__[m 00425 - 1], absMACRO(d__2)) + safmin) { 00426 goto L110; 00427 } 00428 /* L100: */ 00429 } 00430 } 00431 00432 m = lend; 00433 00434 L110: 00435 if (m > lend) { 00436 e[m - 1] = 0.; 00437 } 00438 p = d__[l]; 00439 if (m == l) { 00440 goto L130; 00441 } 00442 00443 /* If remaining matrix is 2-by-2, use DLAE2 or SLAEV2 00444 to compute its eigensystem. */ 00445 00446 if (m == l - 1) { 00447 if (icompz > 0) { 00448 template_lapack_laev2(&d__[l - 1], &e[l - 1], &d__[l], &rt1, &rt2, &c__, &s) 00449 ; 00450 work[m] = c__; 00451 work[*n - 1 + m] = s; 00452 template_lapack_lasr("R", "V", "F", n, &c__2, &work[m], &work[*n - 1 + m], & 00453 z___ref(1, l - 1), ldz); 00454 } else { 00455 template_lapack_lae2(&d__[l - 1], &e[l - 1], &d__[l], &rt1, &rt2); 00456 } 00457 d__[l - 1] = rt1; 00458 d__[l] = rt2; 00459 e[l - 1] = 0.; 00460 l += -2; 00461 if (l >= lend) { 00462 goto L90; 00463 } 00464 goto L140; 00465 } 00466 00467 if (jtot == nmaxit) { 00468 goto L140; 00469 } 00470 ++jtot; 00471 00472 /* Form shift. */ 00473 00474 g = (d__[l - 1] - p) / (e[l - 1] * 2.); 00475 r__ = template_lapack_lapy2(&g, &c_b10); 00476 g = d__[m] - p + e[l - 1] / (g + template_lapack_d_sign(&r__, &g)); 00477 00478 s = 1.; 00479 c__ = 1.; 00480 p = 0.; 00481 00482 /* Inner loop */ 00483 00484 lm1 = l - 1; 00485 i__1 = lm1; 00486 for (i__ = m; i__ <= i__1; ++i__) { 00487 f = s * e[i__]; 00488 b = c__ * e[i__]; 00489 template_lapack_lartg(&g, &f, &c__, &s, &r__); 00490 if (i__ != m) { 00491 e[i__ - 1] = r__; 00492 } 00493 g = d__[i__] - p; 00494 r__ = (d__[i__ + 1] - g) * s + c__ * 2. * b; 00495 p = s * r__; 00496 d__[i__] = g + p; 00497 g = c__ * r__ - b; 00498 00499 /* If eigenvectors are desired, then save rotations. */ 00500 00501 if (icompz > 0) { 00502 work[i__] = c__; 00503 work[*n - 1 + i__] = s; 00504 } 00505 00506 /* L120: */ 00507 } 00508 00509 /* If eigenvectors are desired, then apply saved rotations. */ 00510 00511 if (icompz > 0) { 00512 mm = l - m + 1; 00513 template_lapack_lasr("R", "V", "F", n, &mm, &work[m], &work[*n - 1 + m], & 00514 z___ref(1, m), ldz); 00515 } 00516 00517 d__[l] -= p; 00518 e[lm1] = g; 00519 goto L90; 00520 00521 /* Eigenvalue found. */ 00522 00523 L130: 00524 d__[l] = p; 00525 00526 --l; 00527 if (l >= lend) { 00528 goto L90; 00529 } 00530 goto L140; 00531 00532 } 00533 00534 /* Undo scaling if necessary */ 00535 00536 L140: 00537 if (iscale == 1) { 00538 i__1 = lendsv - lsv + 1; 00539 template_lapack_lascl("G", &c__0, &c__0, &ssfmax, &anorm, &i__1, &c__1, &d__[lsv], 00540 n, info); 00541 i__1 = lendsv - lsv; 00542 template_lapack_lascl("G", &c__0, &c__0, &ssfmax, &anorm, &i__1, &c__1, &e[lsv], n, 00543 info); 00544 } else if (iscale == 2) { 00545 i__1 = lendsv - lsv + 1; 00546 template_lapack_lascl("G", &c__0, &c__0, &ssfmin, &anorm, &i__1, &c__1, &d__[lsv], 00547 n, info); 00548 i__1 = lendsv - lsv; 00549 template_lapack_lascl("G", &c__0, &c__0, &ssfmin, &anorm, &i__1, &c__1, &e[lsv], n, 00550 info); 00551 } 00552 00553 /* Check for no convergence to an eigenvalue after a total 00554 of N*MAXIT iterations. */ 00555 00556 if (jtot < nmaxit) { 00557 goto L10; 00558 } 00559 i__1 = *n - 1; 00560 for (i__ = 1; i__ <= i__1; ++i__) { 00561 if (e[i__] != 0.) { 00562 ++(*info); 00563 } 00564 /* L150: */ 00565 } 00566 goto L190; 00567 00568 /* Order eigenvalues and eigenvectors. */ 00569 00570 L160: 00571 if (icompz == 0) { 00572 00573 /* Use Quick Sort */ 00574 00575 template_lapack_lasrt("I", n, &d__[1], info); 00576 00577 } else { 00578 00579 /* Use Selection Sort to minimize swaps of eigenvectors */ 00580 00581 i__1 = *n; 00582 for (ii = 2; ii <= i__1; ++ii) { 00583 i__ = ii - 1; 00584 k = i__; 00585 p = d__[i__]; 00586 i__2 = *n; 00587 for (j = ii; j <= i__2; ++j) { 00588 if (d__[j] < p) { 00589 k = j; 00590 p = d__[j]; 00591 } 00592 /* L170: */ 00593 } 00594 if (k != i__) { 00595 d__[k] = d__[i__]; 00596 d__[i__] = p; 00597 template_blas_swap(n, &z___ref(1, i__), &c__1, &z___ref(1, k), &c__1); 00598 } 00599 /* L180: */ 00600 } 00601 } 00602 00603 L190: 00604 return 0; 00605 00606 /* End of DSTEQR */ 00607 00608 } /* dsteqr_ */ 00609 00610 #undef z___ref 00611 00612 00613 #endif