ergo
template_lapack_steqr.h
Go to the documentation of this file.
00001 /* Ergo, version 3.2, a program for linear scaling electronic structure
00002  * calculations.
00003  * Copyright (C) 2012 Elias Rudberg, Emanuel H. Rubensson, and Pawel Salek.
00004  * 
00005  * This program is free software: you can redistribute it and/or modify
00006  * it under the terms of the GNU General Public License as published by
00007  * the Free Software Foundation, either version 3 of the License, or
00008  * (at your option) any later version.
00009  * 
00010  * This program is distributed in the hope that it will be useful,
00011  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00012  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00013  * GNU General Public License for more details.
00014  * 
00015  * You should have received a copy of the GNU General Public License
00016  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
00017  * 
00018  * Primary academic reference:
00019  * Kohn−Sham Density Functional Theory Electronic Structure Calculations 
00020  * with Linearly Scaling Computational Time and Memory Usage,
00021  * Elias Rudberg, Emanuel H. Rubensson, and Pawel Salek,
00022  * J. Chem. Theory Comput. 7, 340 (2011),
00023  * <http://dx.doi.org/10.1021/ct100611z>
00024  * 
00025  * For further information about Ergo, see <http://www.ergoscf.org>.
00026  */
00027  
00028  /* This file belongs to the template_lapack part of the Ergo source 
00029   * code. The source files in the template_lapack directory are modified
00030   * versions of files originally distributed as CLAPACK, see the
00031   * Copyright/license notice in the file template_lapack/COPYING.
00032   */
00033  
00034 
00035 #ifndef TEMPLATE_LAPACK_STEQR_HEADER
00036 #define TEMPLATE_LAPACK_STEQR_HEADER
00037 
00038 
00039 template<class Treal>
00040 int template_lapack_steqr(const char *compz, const integer *n, Treal *d__, 
00041         Treal *e, Treal *z__, const integer *ldz, Treal *work, 
00042         integer *info)
00043 {
00044 /*  -- LAPACK routine (version 3.0) --   
00045        Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,   
00046        Courant Institute, Argonne National Lab, and Rice University   
00047        September 30, 1994   
00048 
00049 
00050     Purpose   
00051     =======   
00052 
00053     DSTEQR computes all eigenvalues and, optionally, eigenvectors of a   
00054     symmetric tridiagonal matrix using the implicit QL or QR method.   
00055     The eigenvectors of a full or band symmetric matrix can also be found   
00056     if DSYTRD or DSPTRD or DSBTRD has been used to reduce this matrix to   
00057     tridiagonal form.   
00058 
00059     Arguments   
00060     =========   
00061 
00062     COMPZ   (input) CHARACTER*1   
00063             = 'N':  Compute eigenvalues only.   
00064             = 'V':  Compute eigenvalues and eigenvectors of the original   
00065                     symmetric matrix.  On entry, Z must contain the   
00066                     orthogonal matrix used to reduce the original matrix   
00067                     to tridiagonal form.   
00068             = 'I':  Compute eigenvalues and eigenvectors of the   
00069                     tridiagonal matrix.  Z is initialized to the identity   
00070                     matrix.   
00071 
00072     N       (input) INTEGER   
00073             The order of the matrix.  N >= 0.   
00074 
00075     D       (input/output) DOUBLE PRECISION array, dimension (N)   
00076             On entry, the diagonal elements of the tridiagonal matrix.   
00077             On exit, if INFO = 0, the eigenvalues in ascending order.   
00078 
00079     E       (input/output) DOUBLE PRECISION array, dimension (N-1)   
00080             On entry, the (n-1) subdiagonal elements of the tridiagonal   
00081             matrix.   
00082             On exit, E has been destroyed.   
00083 
00084     Z       (input/output) DOUBLE PRECISION array, dimension (LDZ, N)   
00085             On entry, if  COMPZ = 'V', then Z contains the orthogonal   
00086             matrix used in the reduction to tridiagonal form.   
00087             On exit, if INFO = 0, then if  COMPZ = 'V', Z contains the   
00088             orthonormal eigenvectors of the original symmetric matrix,   
00089             and if COMPZ = 'I', Z contains the orthonormal eigenvectors   
00090             of the symmetric tridiagonal matrix.   
00091             If COMPZ = 'N', then Z is not referenced.   
00092 
00093     LDZ     (input) INTEGER   
00094             The leading dimension of the array Z.  LDZ >= 1, and if   
00095             eigenvectors are desired, then  LDZ >= max(1,N).   
00096 
00097     WORK    (workspace) DOUBLE PRECISION array, dimension (max(1,2*N-2))   
00098             If COMPZ = 'N', then WORK is not referenced.   
00099 
00100     INFO    (output) INTEGER   
00101             = 0:  successful exit   
00102             < 0:  if INFO = -i, the i-th argument had an illegal value   
00103             > 0:  the algorithm has failed to find all the eigenvalues in   
00104                   a total of 30*N iterations; if INFO = i, then i   
00105                   elements of E have not converged to zero; on exit, D   
00106                   and E contain the elements of a symmetric tridiagonal   
00107                   matrix which is orthogonally similar to the original   
00108                   matrix.   
00109 
00110     =====================================================================   
00111 
00112 
00113        Test the input parameters.   
00114 
00115        Parameter adjustments */
00116     /* Table of constant values */
00117      Treal c_b9 = 0.;
00118      Treal c_b10 = 1.;
00119      integer c__0 = 0;
00120      integer c__1 = 1;
00121      integer c__2 = 2;
00122     
00123     /* System generated locals */
00124     integer z_dim1, z_offset, i__1, i__2;
00125     Treal d__1, d__2;
00126     /* Local variables */
00127      integer lend, jtot;
00128      Treal b, c__, f, g;
00129      integer i__, j, k, l, m;
00130      Treal p, r__, s;
00131      Treal anorm;
00132      integer l1;
00133      integer lendm1, lendp1;
00134      integer ii;
00135      integer mm, iscale;
00136      Treal safmin;
00137      Treal safmax;
00138      integer lendsv;
00139      Treal ssfmin;
00140      integer nmaxit, icompz;
00141      Treal ssfmax;
00142      integer lm1, mm1, nm1;
00143      Treal rt1, rt2, eps;
00144      integer lsv;
00145      Treal tst, eps2;
00146 #define z___ref(a_1,a_2) z__[(a_2)*z_dim1 + a_1]
00147 
00148 
00149     --d__;
00150     --e;
00151     z_dim1 = *ldz;
00152     z_offset = 1 + z_dim1 * 1;
00153     z__ -= z_offset;
00154     --work;
00155 
00156     /* Function Body */
00157     *info = 0;
00158 
00159     if (template_blas_lsame(compz, "N")) {
00160         icompz = 0;
00161     } else if (template_blas_lsame(compz, "V")) {
00162         icompz = 1;
00163     } else if (template_blas_lsame(compz, "I")) {
00164         icompz = 2;
00165     } else {
00166         icompz = -1;
00167     }
00168     if (icompz < 0) {
00169         *info = -1;
00170     } else if (*n < 0) {
00171         *info = -2;
00172     } else if (*ldz < 1 || (icompz > 0 && *ldz < maxMACRO(1,*n) ) ) {
00173         *info = -6;
00174     }
00175     if (*info != 0) {
00176         i__1 = -(*info);
00177         template_blas_erbla("STEQR ", &i__1);
00178         return 0;
00179     }
00180 
00181 /*     Quick return if possible */
00182 
00183     if (*n == 0) {
00184         return 0;
00185     }
00186 
00187     if (*n == 1) {
00188         if (icompz == 2) {
00189             z___ref(1, 1) = 1.;
00190         }
00191         return 0;
00192     }
00193 
00194 /*     Determine the unit roundoff and over/underflow thresholds. */
00195 
00196     eps = template_lapack_lamch("E", (Treal)0);
00197 /* Computing 2nd power */
00198     d__1 = eps;
00199     eps2 = d__1 * d__1;
00200     safmin = template_lapack_lamch("S", (Treal)0);
00201     safmax = 1. / safmin;
00202     ssfmax = template_blas_sqrt(safmax) / 3.;
00203     ssfmin = template_blas_sqrt(safmin) / eps2;
00204 
00205 /*     Compute the eigenvalues and eigenvectors of the tridiagonal   
00206        matrix. */
00207 
00208     if (icompz == 2) {
00209         template_lapack_laset("Full", n, n, &c_b9, &c_b10, &z__[z_offset], ldz);
00210     }
00211 
00212     nmaxit = *n * 30;
00213     jtot = 0;
00214 
00215 /*     Determine where the matrix splits and choose QL or QR iteration   
00216        for each block, according to whether top or bottom diagonal   
00217        element is smaller. */
00218 
00219     l1 = 1;
00220     nm1 = *n - 1;
00221 
00222 L10:
00223     if (l1 > *n) {
00224         goto L160;
00225     }
00226     if (l1 > 1) {
00227         e[l1 - 1] = 0.;
00228     }
00229     if (l1 <= nm1) {
00230         i__1 = nm1;
00231         for (m = l1; m <= i__1; ++m) {
00232             tst = (d__1 = e[m], absMACRO(d__1));
00233             if (tst == 0.) {
00234                 goto L30;
00235             }
00236             if (tst <= template_blas_sqrt((d__1 = d__[m], absMACRO(d__1))) * template_blas_sqrt((d__2 = d__[m 
00237                     + 1], absMACRO(d__2))) * eps) {
00238                 e[m] = 0.;
00239                 goto L30;
00240             }
00241 /* L20: */
00242         }
00243     }
00244     m = *n;
00245 
00246 L30:
00247     l = l1;
00248     lsv = l;
00249     lend = m;
00250     lendsv = lend;
00251     l1 = m + 1;
00252     if (lend == l) {
00253         goto L10;
00254     }
00255 
00256 /*     Scale submatrix in rows and columns L to LEND */
00257 
00258     i__1 = lend - l + 1;
00259     anorm = template_lapack_lanst("I", &i__1, &d__[l], &e[l]);
00260     iscale = 0;
00261     if (anorm == 0.) {
00262         goto L10;
00263     }
00264     if (anorm > ssfmax) {
00265         iscale = 1;
00266         i__1 = lend - l + 1;
00267         template_lapack_lascl("G", &c__0, &c__0, &anorm, &ssfmax, &i__1, &c__1, &d__[l], n, 
00268                 info);
00269         i__1 = lend - l;
00270         template_lapack_lascl("G", &c__0, &c__0, &anorm, &ssfmax, &i__1, &c__1, &e[l], n, 
00271                 info);
00272     } else if (anorm < ssfmin) {
00273         iscale = 2;
00274         i__1 = lend - l + 1;
00275         template_lapack_lascl("G", &c__0, &c__0, &anorm, &ssfmin, &i__1, &c__1, &d__[l], n, 
00276                 info);
00277         i__1 = lend - l;
00278         template_lapack_lascl("G", &c__0, &c__0, &anorm, &ssfmin, &i__1, &c__1, &e[l], n, 
00279                 info);
00280     }
00281 
00282 /*     Choose between QL and QR iteration */
00283 
00284     if ((d__1 = d__[lend], absMACRO(d__1)) < (d__2 = d__[l], absMACRO(d__2))) {
00285         lend = lsv;
00286         l = lendsv;
00287     }
00288 
00289     if (lend > l) {
00290 
00291 /*        QL Iteration   
00292 
00293           Look for small subdiagonal element. */
00294 
00295 L40:
00296         if (l != lend) {
00297             lendm1 = lend - 1;
00298             i__1 = lendm1;
00299             for (m = l; m <= i__1; ++m) {
00300 /* Computing 2nd power */
00301                 d__2 = (d__1 = e[m], absMACRO(d__1));
00302                 tst = d__2 * d__2;
00303                 if (tst <= eps2 * (d__1 = d__[m], absMACRO(d__1)) * (d__2 = d__[m 
00304                         + 1], absMACRO(d__2)) + safmin) {
00305                     goto L60;
00306                 }
00307 /* L50: */
00308             }
00309         }
00310 
00311         m = lend;
00312 
00313 L60:
00314         if (m < lend) {
00315             e[m] = 0.;
00316         }
00317         p = d__[l];
00318         if (m == l) {
00319             goto L80;
00320         }
00321 
00322 /*        If remaining matrix is 2-by-2, use DLAE2 or SLAEV2   
00323           to compute its eigensystem. */
00324 
00325         if (m == l + 1) {
00326             if (icompz > 0) {
00327                 template_lapack_laev2(&d__[l], &e[l], &d__[l + 1], &rt1, &rt2, &c__, &s);
00328                 work[l] = c__;
00329                 work[*n - 1 + l] = s;
00330                 template_lapack_lasr("R", "V", "B", n, &c__2, &work[l], &work[*n - 1 + l], &
00331                         z___ref(1, l), ldz);
00332             } else {
00333                 template_lapack_lae2(&d__[l], &e[l], &d__[l + 1], &rt1, &rt2);
00334             }
00335             d__[l] = rt1;
00336             d__[l + 1] = rt2;
00337             e[l] = 0.;
00338             l += 2;
00339             if (l <= lend) {
00340                 goto L40;
00341             }
00342             goto L140;
00343         }
00344 
00345         if (jtot == nmaxit) {
00346             goto L140;
00347         }
00348         ++jtot;
00349 
00350 /*        Form shift. */
00351 
00352         g = (d__[l + 1] - p) / (e[l] * 2.);
00353         r__ = template_lapack_lapy2(&g, &c_b10);
00354         g = d__[m] - p + e[l] / (g + template_lapack_d_sign(&r__, &g));
00355 
00356         s = 1.;
00357         c__ = 1.;
00358         p = 0.;
00359 
00360 /*        Inner loop */
00361 
00362         mm1 = m - 1;
00363         i__1 = l;
00364         for (i__ = mm1; i__ >= i__1; --i__) {
00365             f = s * e[i__];
00366             b = c__ * e[i__];
00367             template_lapack_lartg(&g, &f, &c__, &s, &r__);
00368             if (i__ != m - 1) {
00369                 e[i__ + 1] = r__;
00370             }
00371             g = d__[i__ + 1] - p;
00372             r__ = (d__[i__] - g) * s + c__ * 2. * b;
00373             p = s * r__;
00374             d__[i__ + 1] = g + p;
00375             g = c__ * r__ - b;
00376 
00377 /*           If eigenvectors are desired, then save rotations. */
00378 
00379             if (icompz > 0) {
00380                 work[i__] = c__;
00381                 work[*n - 1 + i__] = -s;
00382             }
00383 
00384 /* L70: */
00385         }
00386 
00387 /*        If eigenvectors are desired, then apply saved rotations. */
00388 
00389         if (icompz > 0) {
00390             mm = m - l + 1;
00391             template_lapack_lasr("R", "V", "B", n, &mm, &work[l], &work[*n - 1 + l], &
00392                     z___ref(1, l), ldz);
00393         }
00394 
00395         d__[l] -= p;
00396         e[l] = g;
00397         goto L40;
00398 
00399 /*        Eigenvalue found. */
00400 
00401 L80:
00402         d__[l] = p;
00403 
00404         ++l;
00405         if (l <= lend) {
00406             goto L40;
00407         }
00408         goto L140;
00409 
00410     } else {
00411 
00412 /*        QR Iteration   
00413 
00414           Look for small superdiagonal element. */
00415 
00416 L90:
00417         if (l != lend) {
00418             lendp1 = lend + 1;
00419             i__1 = lendp1;
00420             for (m = l; m >= i__1; --m) {
00421 /* Computing 2nd power */
00422                 d__2 = (d__1 = e[m - 1], absMACRO(d__1));
00423                 tst = d__2 * d__2;
00424                 if (tst <= eps2 * (d__1 = d__[m], absMACRO(d__1)) * (d__2 = d__[m 
00425                         - 1], absMACRO(d__2)) + safmin) {
00426                     goto L110;
00427                 }
00428 /* L100: */
00429             }
00430         }
00431 
00432         m = lend;
00433 
00434 L110:
00435         if (m > lend) {
00436             e[m - 1] = 0.;
00437         }
00438         p = d__[l];
00439         if (m == l) {
00440             goto L130;
00441         }
00442 
00443 /*        If remaining matrix is 2-by-2, use DLAE2 or SLAEV2   
00444           to compute its eigensystem. */
00445 
00446         if (m == l - 1) {
00447             if (icompz > 0) {
00448                 template_lapack_laev2(&d__[l - 1], &e[l - 1], &d__[l], &rt1, &rt2, &c__, &s)
00449                         ;
00450                 work[m] = c__;
00451                 work[*n - 1 + m] = s;
00452                 template_lapack_lasr("R", "V", "F", n, &c__2, &work[m], &work[*n - 1 + m], &
00453                         z___ref(1, l - 1), ldz);
00454             } else {
00455                 template_lapack_lae2(&d__[l - 1], &e[l - 1], &d__[l], &rt1, &rt2);
00456             }
00457             d__[l - 1] = rt1;
00458             d__[l] = rt2;
00459             e[l - 1] = 0.;
00460             l += -2;
00461             if (l >= lend) {
00462                 goto L90;
00463             }
00464             goto L140;
00465         }
00466 
00467         if (jtot == nmaxit) {
00468             goto L140;
00469         }
00470         ++jtot;
00471 
00472 /*        Form shift. */
00473 
00474         g = (d__[l - 1] - p) / (e[l - 1] * 2.);
00475         r__ = template_lapack_lapy2(&g, &c_b10);
00476         g = d__[m] - p + e[l - 1] / (g + template_lapack_d_sign(&r__, &g));
00477 
00478         s = 1.;
00479         c__ = 1.;
00480         p = 0.;
00481 
00482 /*        Inner loop */
00483 
00484         lm1 = l - 1;
00485         i__1 = lm1;
00486         for (i__ = m; i__ <= i__1; ++i__) {
00487             f = s * e[i__];
00488             b = c__ * e[i__];
00489             template_lapack_lartg(&g, &f, &c__, &s, &r__);
00490             if (i__ != m) {
00491                 e[i__ - 1] = r__;
00492             }
00493             g = d__[i__] - p;
00494             r__ = (d__[i__ + 1] - g) * s + c__ * 2. * b;
00495             p = s * r__;
00496             d__[i__] = g + p;
00497             g = c__ * r__ - b;
00498 
00499 /*           If eigenvectors are desired, then save rotations. */
00500 
00501             if (icompz > 0) {
00502                 work[i__] = c__;
00503                 work[*n - 1 + i__] = s;
00504             }
00505 
00506 /* L120: */
00507         }
00508 
00509 /*        If eigenvectors are desired, then apply saved rotations. */
00510 
00511         if (icompz > 0) {
00512             mm = l - m + 1;
00513             template_lapack_lasr("R", "V", "F", n, &mm, &work[m], &work[*n - 1 + m], &
00514                     z___ref(1, m), ldz);
00515         }
00516 
00517         d__[l] -= p;
00518         e[lm1] = g;
00519         goto L90;
00520 
00521 /*        Eigenvalue found. */
00522 
00523 L130:
00524         d__[l] = p;
00525 
00526         --l;
00527         if (l >= lend) {
00528             goto L90;
00529         }
00530         goto L140;
00531 
00532     }
00533 
00534 /*     Undo scaling if necessary */
00535 
00536 L140:
00537     if (iscale == 1) {
00538         i__1 = lendsv - lsv + 1;
00539         template_lapack_lascl("G", &c__0, &c__0, &ssfmax, &anorm, &i__1, &c__1, &d__[lsv], 
00540                 n, info);
00541         i__1 = lendsv - lsv;
00542         template_lapack_lascl("G", &c__0, &c__0, &ssfmax, &anorm, &i__1, &c__1, &e[lsv], n, 
00543                 info);
00544     } else if (iscale == 2) {
00545         i__1 = lendsv - lsv + 1;
00546         template_lapack_lascl("G", &c__0, &c__0, &ssfmin, &anorm, &i__1, &c__1, &d__[lsv], 
00547                 n, info);
00548         i__1 = lendsv - lsv;
00549         template_lapack_lascl("G", &c__0, &c__0, &ssfmin, &anorm, &i__1, &c__1, &e[lsv], n, 
00550                 info);
00551     }
00552 
00553 /*     Check for no convergence to an eigenvalue after a total   
00554        of N*MAXIT iterations. */
00555 
00556     if (jtot < nmaxit) {
00557         goto L10;
00558     }
00559     i__1 = *n - 1;
00560     for (i__ = 1; i__ <= i__1; ++i__) {
00561         if (e[i__] != 0.) {
00562             ++(*info);
00563         }
00564 /* L150: */
00565     }
00566     goto L190;
00567 
00568 /*     Order eigenvalues and eigenvectors. */
00569 
00570 L160:
00571     if (icompz == 0) {
00572 
00573 /*        Use Quick Sort */
00574 
00575         template_lapack_lasrt("I", n, &d__[1], info);
00576 
00577     } else {
00578 
00579 /*        Use Selection Sort to minimize swaps of eigenvectors */
00580 
00581         i__1 = *n;
00582         for (ii = 2; ii <= i__1; ++ii) {
00583             i__ = ii - 1;
00584             k = i__;
00585             p = d__[i__];
00586             i__2 = *n;
00587             for (j = ii; j <= i__2; ++j) {
00588                 if (d__[j] < p) {
00589                     k = j;
00590                     p = d__[j];
00591                 }
00592 /* L170: */
00593             }
00594             if (k != i__) {
00595                 d__[k] = d__[i__];
00596                 d__[i__] = p;
00597                 template_blas_swap(n, &z___ref(1, i__), &c__1, &z___ref(1, k), &c__1);
00598             }
00599 /* L180: */
00600         }
00601     }
00602 
00603 L190:
00604     return 0;
00605 
00606 /*     End of DSTEQR */
00607 
00608 } /* dsteqr_ */
00609 
00610 #undef z___ref
00611 
00612 
00613 #endif