ergo
template_lapack_stevr.h
Go to the documentation of this file.
00001 /* Ergo, version 3.2, a program for linear scaling electronic structure
00002  * calculations.
00003  * Copyright (C) 2012 Elias Rudberg, Emanuel H. Rubensson, and Pawel Salek.
00004  * 
00005  * This program is free software: you can redistribute it and/or modify
00006  * it under the terms of the GNU General Public License as published by
00007  * the Free Software Foundation, either version 3 of the License, or
00008  * (at your option) any later version.
00009  * 
00010  * This program is distributed in the hope that it will be useful,
00011  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00012  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00013  * GNU General Public License for more details.
00014  * 
00015  * You should have received a copy of the GNU General Public License
00016  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
00017  * 
00018  * Primary academic reference:
00019  * Kohn−Sham Density Functional Theory Electronic Structure Calculations 
00020  * with Linearly Scaling Computational Time and Memory Usage,
00021  * Elias Rudberg, Emanuel H. Rubensson, and Pawel Salek,
00022  * J. Chem. Theory Comput. 7, 340 (2011),
00023  * <http://dx.doi.org/10.1021/ct100611z>
00024  * 
00025  * For further information about Ergo, see <http://www.ergoscf.org>.
00026  */
00027  
00028  /* This file belongs to the template_lapack part of the Ergo source 
00029   * code. The source files in the template_lapack directory are modified
00030   * versions of files originally distributed as CLAPACK, see the
00031   * Copyright/license notice in the file template_lapack/COPYING.
00032   */
00033  
00034 
00035 #ifndef TEMPLATE_LAPACK_STEVR_HEADER
00036 #define TEMPLATE_LAPACK_STEVR_HEADER
00037 
00038 template<class Treal>
00039 int template_lapack_stevr(const char *jobz, const char *range, const integer *n,
00040                           Treal * d__, Treal *e, const Treal *vl, 
00041                           const Treal *vu, const integer *il, 
00042                           const integer *iu, const Treal *abstol, 
00043                           integer *m, Treal *w, 
00044                           Treal *z__, const integer *ldz, integer *isuppz, 
00045                           Treal *work, 
00046                           integer *lwork, integer *iwork, integer *liwork, 
00047                           integer *info)
00048 {
00049     /* System generated locals */
00050     integer z_dim1, z_offset, i__1, i__2;
00051     Treal d__1, d__2;
00052 
00053     /* Builtin functions */
00054 
00055     /* Local variables */
00056     integer i__, j, jj;
00057     Treal eps, vll, vuu, tmp1;
00058     integer imax;
00059     Treal rmin, rmax;
00060     logical test;
00061     Treal tnrm;
00062     integer itmp1;
00063     Treal sigma;
00064     char order[1];
00065     integer lwmin;
00066     logical wantz;
00067     logical alleig, indeig;
00068     integer iscale, ieeeok, indibl, indifl;
00069     logical valeig;
00070     Treal safmin;
00071     Treal bignum;
00072     integer indisp;
00073     integer indiwo;
00074     integer liwmin;
00075     logical tryrac;
00076     integer nsplit;
00077     Treal smlnum;
00078     logical lquery;
00079 
00080 
00081 /*  -- LAPACK driver routine (version 3.2) -- */
00082 /*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
00083 /*     November 2006 */
00084 
00085 /*     .. Scalar Arguments .. */
00086 /*     .. */
00087 /*     .. Array Arguments .. */
00088 /*     .. */
00089 
00090 /*  Purpose */
00091 /*  ======= */
00092 
00093 /*  DSTEVR computes selected eigenvalues and, optionally, eigenvectors */
00094 /*  of a real symmetric tridiagonal matrix T.  Eigenvalues and */
00095 /*  eigenvectors can be selected by specifying either a range of values */
00096 /*  or a range of indices for the desired eigenvalues. */
00097 
00098 /*  Whenever possible, DSTEVR calls DSTEMR to compute the */
00099 /*  eigenspectrum using Relatively Robust Representations.  DSTEMR */
00100 /*  computes eigenvalues by the dqds algorithm, while orthogonal */
00101 /*  eigenvectors are computed from various "good" L D L^T representations */
00102 /*  (also known as Relatively Robust Representations). Gram-Schmidt */
00103 /*  orthogonalization is avoided as far as possible. More specifically, */
00104 /*  the various steps of the algorithm are as follows. For the i-th */
00105 /*  unreduced block of T, */
00106 /*     (a) Compute T - sigma_i = L_i D_i L_i^T, such that L_i D_i L_i^T */
00107 /*          is a relatively robust representation, */
00108 /*     (b) Compute the eigenvalues, lambda_j, of L_i D_i L_i^T to high */
00109 /*         relative accuracy by the dqds algorithm, */
00110 /*     (c) If there is a cluster of close eigenvalues, "choose" sigma_i */
00111 /*         close to the cluster, and go to step (a), */
00112 /*     (d) Given the approximate eigenvalue lambda_j of L_i D_i L_i^T, */
00113 /*         compute the corresponding eigenvector by forming a */
00114 /*         rank-revealing twisted factorization. */
00115 /*  The desired accuracy of the output can be specified by the input */
00116 /*  parameter ABSTOL. */
00117 
00118 /*  For more details, see "A new O(n^2) algorithm for the symmetric */
00119 /*  tridiagonal eigenvalue/eigenvector problem", by Inderjit Dhillon, */
00120 /*  Computer Science Division Technical Report No. UCB//CSD-97-971, */
00121 /*  UC Berkeley, May 1997. */
00122 
00123 
00124 /*  Note 1 : DSTEVR calls DSTEMR when the full spectrum is requested */
00125 /*  on machines which conform to the ieee-754 floating point standard. */
00126 /*  DSTEVR calls DSTEBZ and DSTEIN on non-ieee machines and */
00127 /*  when partial spectrum requests are made. */
00128 
00129 /*  Normal execution of DSTEMR may create NaNs and infinities and */
00130 /*  hence may abort due to a floating point exception in environments */
00131 /*  which do not handle NaNs and infinities in the ieee standard default */
00132 /*  manner. */
00133 
00134 /*  Arguments */
00135 /*  ========= */
00136 
00137 /*  JOBZ    (input) CHARACTER*1 */
00138 /*          = 'N':  Compute eigenvalues only; */
00139 /*          = 'V':  Compute eigenvalues and eigenvectors. */
00140 
00141 /*  RANGE   (input) CHARACTER*1 */
00142 /*          = 'A': all eigenvalues will be found. */
00143 /*          = 'V': all eigenvalues in the half-open interval (VL,VU] */
00144 /*                 will be found. */
00145 /*          = 'I': the IL-th through IU-th eigenvalues will be found. */
00146 /* ********* For RANGE = 'V' or 'I' and IU - IL < N - 1, DSTEBZ and */
00147 /* ********* DSTEIN are called */
00148 
00149 /*  N       (input) INTEGER */
00150 /*          The order of the matrix.  N >= 0. */
00151 
00152 /*  D       (input/output) DOUBLE PRECISION array, dimension (N) */
00153 /*          On entry, the n diagonal elements of the tridiagonal matrix */
00154 /*          A. */
00155 /*          On exit, D may be multiplied by a constant factor chosen */
00156 /*          to avoid over/underflow in computing the eigenvalues. */
00157 
00158 /*  E       (input/output) DOUBLE PRECISION array, dimension (max(1,N-1)) */
00159 /*          On entry, the (n-1) subdiagonal elements of the tridiagonal */
00160 /*          matrix A in elements 1 to N-1 of E. */
00161 /*          On exit, E may be multiplied by a constant factor chosen */
00162 /*          to avoid over/underflow in computing the eigenvalues. */
00163 
00164 /*  VL      (input) DOUBLE PRECISION */
00165 /*  VU      (input) DOUBLE PRECISION */
00166 /*          If RANGE='V', the lower and upper bounds of the interval to */
00167 /*          be searched for eigenvalues. VL < VU. */
00168 /*          Not referenced if RANGE = 'A' or 'I'. */
00169 
00170 /*  IL      (input) INTEGER */
00171 /*  IU      (input) INTEGER */
00172 /*          If RANGE='I', the indices (in ascending order) of the */
00173 /*          smallest and largest eigenvalues to be returned. */
00174 /*          1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0. */
00175 /*          Not referenced if RANGE = 'A' or 'V'. */
00176 
00177 /*  ABSTOL  (input) DOUBLE PRECISION */
00178 /*          The absolute error tolerance for the eigenvalues. */
00179 /*          An approximate eigenvalue is accepted as converged */
00180 /*          when it is determined to lie in an interval [a,b] */
00181 /*          of width less than or equal to */
00182 
00183 /*                  ABSTOL + EPS *   max( |a|,|b| ) , */
00184 
00185 /*          where EPS is the machine precision.  If ABSTOL is less than */
00186 /*          or equal to zero, then  EPS*|T|  will be used in its place, */
00187 /*          where |T| is the 1-norm of the tridiagonal matrix obtained */
00188 /*          by reducing A to tridiagonal form. */
00189 
00190 /*          See "Computing Small Singular Values of Bidiagonal Matrices */
00191 /*          with Guaranteed High Relative Accuracy," by Demmel and */
00192 /*          Kahan, LAPACK Working Note #3. */
00193 
00194 /*          If high relative accuracy is important, set ABSTOL to */
00195 /*          DLAMCH( 'Safe minimum' ).  Doing so will guarantee that */
00196 /*          eigenvalues are computed to high relative accuracy when */
00197 /*          possible in future releases.  The current code does not */
00198 /*          make any guarantees about high relative accuracy, but */
00199 /*          future releases will. See J. Barlow and J. Demmel, */
00200 /*          "Computing Accurate Eigensystems of Scaled Diagonally */
00201 /*          Dominant Matrices", LAPACK Working Note #7, for a discussion */
00202 /*          of which matrices define their eigenvalues to high relative */
00203 /*          accuracy. */
00204 
00205 /*  M       (output) INTEGER */
00206 /*          The total number of eigenvalues found.  0 <= M <= N. */
00207 /*          If RANGE = 'A', M = N, and if RANGE = 'I', M = IU-IL+1. */
00208 
00209 /*  W       (output) DOUBLE PRECISION array, dimension (N) */
00210 /*          The first M elements contain the selected eigenvalues in */
00211 /*          ascending order. */
00212 
00213 /*  Z       (output) DOUBLE PRECISION array, dimension (LDZ, max(1,M) ) */
00214 /*          If JOBZ = 'V', then if INFO = 0, the first M columns of Z */
00215 /*          contain the orthonormal eigenvectors of the matrix A */
00216 /*          corresponding to the selected eigenvalues, with the i-th */
00217 /*          column of Z holding the eigenvector associated with W(i). */
00218 /*          Note: the user must ensure that at least max(1,M) columns are */
00219 /*          supplied in the array Z; if RANGE = 'V', the exact value of M */
00220 /*          is not known in advance and an upper bound must be used. */
00221 
00222 /*  LDZ     (input) INTEGER */
00223 /*          The leading dimension of the array Z.  LDZ >= 1, and if */
00224 /*          JOBZ = 'V', LDZ >= max(1,N). */
00225 
00226 /*  ISUPPZ  (output) INTEGER array, dimension ( 2*max(1,M) ) */
00227 /*          The support of the eigenvectors in Z, i.e., the indices */
00228 /*          indicating the nonzero elements in Z. The i-th eigenvector */
00229 /*          is nonzero only in elements ISUPPZ( 2*i-1 ) through */
00230 /*          ISUPPZ( 2*i ). */
00231 /* ********* Implemented only for RANGE = 'A' or 'I' and IU - IL = N - 1 */
00232 
00233 /*  WORK    (workspace/output) DOUBLE PRECISION array, dimension (MAX(1,LWORK)) */
00234 /*          On exit, if INFO = 0, WORK(1) returns the optimal (and */
00235 /*          minimal) LWORK. */
00236 
00237 /*  LWORK   (input) INTEGER */
00238 /*          The dimension of the array WORK.  LWORK >= max(1,20*N). */
00239 
00240 /*          If LWORK = -1, then a workspace query is assumed; the routine */
00241 /*          only calculates the optimal sizes of the WORK and IWORK */
00242 /*          arrays, returns these values as the first entries of the WORK */
00243 /*          and IWORK arrays, and no error message related to LWORK or */
00244 /*          LIWORK is issued by XERBLA. */
00245 
00246 /*  IWORK   (workspace/output) INTEGER array, dimension (MAX(1,LIWORK)) */
00247 /*          On exit, if INFO = 0, IWORK(1) returns the optimal (and */
00248 /*          minimal) LIWORK. */
00249 
00250 /*  LIWORK  (input) INTEGER */
00251 /*          The dimension of the array IWORK.  LIWORK >= max(1,10*N). */
00252 
00253 /*          If LIWORK = -1, then a workspace query is assumed; the */
00254 /*          routine only calculates the optimal sizes of the WORK and */
00255 /*          IWORK arrays, returns these values as the first entries of */
00256 /*          the WORK and IWORK arrays, and no error message related to */
00257 /*          LWORK or LIWORK is issued by XERBLA. */
00258 
00259 /*  INFO    (output) INTEGER */
00260 /*          = 0:  successful exit */
00261 /*          < 0:  if INFO = -i, the i-th argument had an illegal value */
00262 /*          > 0:  Internal error */
00263 
00264 /*  Further Details */
00265 /*  =============== */
00266 
00267 /*  Based on contributions by */
00268 /*     Inderjit Dhillon, IBM Almaden, USA */
00269 /*     Osni Marques, LBNL/NERSC, USA */
00270 /*     Ken Stanley, Computer Science Division, University of */
00271 /*       California at Berkeley, USA */
00272 
00273 /*  ===================================================================== */
00274 
00275 /*     .. Parameters .. */
00276 /*     .. */
00277 /*     .. Local Scalars .. */
00278 /*     .. */
00279 /*     .. External Functions .. */
00280 /*     .. */
00281 /*     .. External Subroutines .. */
00282 /*     .. */
00283 /*     .. Intrinsic Functions .. */
00284 /*     .. */
00285 /*     .. Executable Statements .. */
00286 
00287 
00288 /*     Test the input parameters. */
00289 
00290     /* Parameter adjustments */
00291     /* Table of constant values */
00292 
00293     integer c__10 = 10;
00294     integer c__1 = 1;
00295     integer c__2 = 2;
00296     integer c__3 = 3;
00297     integer c__4 = 4;    
00298 
00299     --d__;
00300     --e;
00301     --w;
00302     z_dim1 = *ldz;
00303     z_offset = 1 + z_dim1;
00304     z__ -= z_offset;
00305     --isuppz;
00306     --work;
00307     --iwork;
00308 
00309     /* Function Body */
00310     ieeeok = template_lapack_ilaenv(&c__10, "DSTEVR", "N", &c__1, &c__2, &c__3, &c__4, (ftnlen)6, (ftnlen)1);
00311 
00312     wantz = template_blas_lsame(jobz, "V");
00313     alleig = template_blas_lsame(range, "A");
00314     valeig = template_blas_lsame(range, "V");
00315     indeig = template_blas_lsame(range, "I");
00316 
00317     lquery = *lwork == -1 || *liwork == -1;
00318 /* Computing MAX */
00319     i__1 = 1, i__2 = *n * 20;
00320     lwmin = maxMACRO(i__1,i__2);
00321 /* Computing MAX */
00322     i__1 = 1, i__2 = *n * 10;
00323     liwmin = maxMACRO(i__1,i__2);
00324 
00325 
00326     *info = 0;
00327     if (! (wantz || template_blas_lsame(jobz, "N"))) {
00328         *info = -1;
00329     } else if (! (alleig || valeig || indeig)) {
00330         *info = -2;
00331     } else if (*n < 0) {
00332         *info = -3;
00333     } else {
00334         if (valeig) {
00335             if (*n > 0 && *vu <= *vl) {
00336                 *info = -7;
00337             }
00338         } else if (indeig) {
00339             if (*il < 1 || *il > maxMACRO(1,*n)) {
00340                 *info = -8;
00341             } else if (*iu < minMACRO(*n,*il) || *iu > *n) {
00342                 *info = -9;
00343             }
00344         }
00345     }
00346     if (*info == 0) {
00347       if (*ldz < 1 || ( wantz && *ldz < *n ) ) {
00348             *info = -14;
00349         }
00350     }
00351 
00352     if (*info == 0) {
00353         work[1] = (Treal) lwmin;
00354         iwork[1] = liwmin;
00355 
00356         if (*lwork < lwmin && ! lquery) {
00357             *info = -17;
00358         } else if (*liwork < liwmin && ! lquery) {
00359             *info = -19;
00360         }
00361     }
00362 
00363     if (*info != 0) {
00364         i__1 = -(*info);
00365         template_blas_erbla("STEVR", &i__1);
00366         return 0;
00367     } else if (lquery) {
00368         return 0;
00369     }
00370 
00371 /*     Quick return if possible */
00372 
00373     *m = 0;
00374     if (*n == 0) {
00375         return 0;
00376     }
00377 
00378     if (*n == 1) {
00379         if (alleig || indeig) {
00380             *m = 1;
00381             w[1] = d__[1];
00382         } else {
00383             if (*vl < d__[1] && *vu >= d__[1]) {
00384                 *m = 1;
00385                 w[1] = d__[1];
00386             }
00387         }
00388         if (wantz) {
00389             z__[z_dim1 + 1] = 1.;
00390         }
00391         return 0;
00392     }
00393 
00394 /*     Get machine constants. */
00395 
00396     safmin = template_lapack_lamch("Safe minimum", (Treal)0);
00397     eps = template_lapack_lamch("Precision", (Treal)0);
00398     smlnum = safmin / eps;
00399     bignum = 1. / smlnum;
00400     rmin = template_blas_sqrt(smlnum);
00401 /* Computing MIN */
00402     d__1 = template_blas_sqrt(bignum), d__2 = 1. / template_blas_sqrt(template_blas_sqrt(safmin));
00403     rmax = minMACRO(d__1,d__2);
00404 
00405 
00406 /*     Scale matrix to allowable range, if necessary. */
00407 
00408     iscale = 0;
00409     vll = *vl;
00410     vuu = *vu;
00411 
00412     tnrm = template_lapack_lanst("M", n, &d__[1], &e[1]);
00413     if (tnrm > 0. && tnrm < rmin) {
00414         iscale = 1;
00415         sigma = rmin / tnrm;
00416     } else if (tnrm > rmax) {
00417         iscale = 1;
00418         sigma = rmax / tnrm;
00419     }
00420     if (iscale == 1) {
00421         template_blas_scal(n, &sigma, &d__[1], &c__1);
00422         i__1 = *n - 1;
00423         template_blas_scal(&i__1, &sigma, &e[1], &c__1);
00424         if (valeig) {
00425             vll = *vl * sigma;
00426             vuu = *vu * sigma;
00427         }
00428     }
00429 /*     Initialize indices into workspaces.  Note: These indices are used only */
00430 /*     if DSTERF or DSTEMR fail. */
00431 /*     IWORK(INDIBL:INDIBL+M-1) corresponds to IBLOCK in DSTEBZ and */
00432 /*     stores the block indices of each of the M<=N eigenvalues. */
00433     indibl = 1;
00434 /*     IWORK(INDISP:INDISP+NSPLIT-1) corresponds to ISPLIT in DSTEBZ and */
00435 /*     stores the starting and finishing indices of each block. */
00436     indisp = indibl + *n;
00437 /*     IWORK(INDIFL:INDIFL+N-1) stores the indices of eigenvectors */
00438 /*     that corresponding to eigenvectors that fail to converge in */
00439 /*     DSTEIN.  This information is discarded; if any fail, the driver */
00440 /*     returns INFO > 0. */
00441     indifl = indisp + *n;
00442 /*     INDIWO is the offset of the remaining integer workspace. */
00443     indiwo = indisp + *n;
00444 
00445 /*     If all eigenvalues are desired, then */
00446 /*     call DSTERF or DSTEMR.  If this fails for some eigenvalue, then */
00447 /*     try DSTEBZ. */
00448 
00449 
00450     test = FALSE_;
00451     if (indeig) {
00452         if (*il == 1 && *iu == *n) {
00453             test = TRUE_;
00454         }
00455     }
00456     if ((alleig || test) && ieeeok == 1) {
00457         i__1 = *n - 1;
00458         template_blas_copy(&i__1, &e[1], &c__1, &work[1], &c__1);
00459         if (! wantz) {
00460             template_blas_copy(n, &d__[1], &c__1, &w[1], &c__1);
00461             template_lapack_sterf(n, &w[1], &work[1], info);
00462         } else {
00463             template_blas_copy(n, &d__[1], &c__1, &work[*n + 1], &c__1);
00464             if (*abstol <= *n * 2. * eps) {
00465                 tryrac = TRUE_;
00466             } else {
00467                 tryrac = FALSE_;
00468             }
00469             i__1 = *lwork - (*n << 1);
00470             template_lapack_stemr(jobz, "A", n, &work[*n + 1], &work[1], vl, vu, il, iu, m, 
00471                     &w[1], &z__[z_offset], ldz, n, &isuppz[1], &tryrac, &work[
00472                     (*n << 1) + 1], &i__1, &iwork[1], liwork, info);
00473 
00474         }
00475         if (*info == 0) {
00476             *m = *n;
00477             goto L10;
00478         }
00479         *info = 0;
00480     }
00481 
00482 /*     Otherwise, call DSTEBZ and, if eigenvectors are desired, DSTEIN. */
00483 
00484     if (wantz) {
00485         *(unsigned char *)order = 'B';
00486     } else {
00487         *(unsigned char *)order = 'E';
00488     }
00489     template_lapack_stebz(range, order, n, &vll, &vuu, il, iu, abstol, &d__[1], &e[1], m, &
00490             nsplit, &w[1], &iwork[indibl], &iwork[indisp], &work[1], &iwork[
00491             indiwo], info);
00492 
00493     if (wantz) {
00494         template_lapack_stein(n, &d__[1], &e[1], m, &w[1], &iwork[indibl], &iwork[indisp], &
00495                 z__[z_offset], ldz, &work[1], &iwork[indiwo], &iwork[indifl], 
00496                 info);
00497     }
00498 
00499 /*     If matrix was scaled, then rescale eigenvalues appropriately. */
00500 
00501 L10:
00502     if (iscale == 1) {
00503         if (*info == 0) {
00504             imax = *m;
00505         } else {
00506             imax = *info - 1;
00507         }
00508         d__1 = 1. / sigma;
00509         template_blas_scal(&imax, &d__1, &w[1], &c__1);
00510     }
00511 
00512 /*     If eigenvalues are not in order, then sort them, along with */
00513 /*     eigenvectors. */
00514 
00515     if (wantz) {
00516         i__1 = *m - 1;
00517         for (j = 1; j <= i__1; ++j) {
00518             i__ = 0;
00519             tmp1 = w[j];
00520             i__2 = *m;
00521             for (jj = j + 1; jj <= i__2; ++jj) {
00522                 if (w[jj] < tmp1) {
00523                     i__ = jj;
00524                     tmp1 = w[jj];
00525                 }
00526 /* L20: */
00527             }
00528 
00529             if (i__ != 0) {
00530                 itmp1 = iwork[i__];
00531                 w[i__] = w[j];
00532                 iwork[i__] = iwork[j];
00533                 w[j] = tmp1;
00534                 iwork[j] = itmp1;
00535                 template_blas_swap(n, &z__[i__ * z_dim1 + 1], &c__1, &z__[j * z_dim1 + 1], 
00536                          &c__1);
00537             }
00538 /* L30: */
00539         }
00540     }
00541 
00542 /*      Causes problems with tests 19 & 20: */
00543 /*      IF (wantz .and. INDEIG ) Z( 1,1) = Z(1,1) / 1.002 + .002 */
00544 
00545 
00546     work[1] = (Treal) lwmin;
00547     iwork[1] = liwmin;
00548     return 0;
00549 
00550 /*     End of DSTEVR */
00551 
00552 } /* dstevr_ */
00553 
00554 #endif
00555