ergo
|
00001 /* Ergo, version 3.2, a program for linear scaling electronic structure 00002 * calculations. 00003 * Copyright (C) 2012 Elias Rudberg, Emanuel H. Rubensson, and Pawel Salek. 00004 * 00005 * This program is free software: you can redistribute it and/or modify 00006 * it under the terms of the GNU General Public License as published by 00007 * the Free Software Foundation, either version 3 of the License, or 00008 * (at your option) any later version. 00009 * 00010 * This program is distributed in the hope that it will be useful, 00011 * but WITHOUT ANY WARRANTY; without even the implied warranty of 00012 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00013 * GNU General Public License for more details. 00014 * 00015 * You should have received a copy of the GNU General Public License 00016 * along with this program. If not, see <http://www.gnu.org/licenses/>. 00017 * 00018 * Primary academic reference: 00019 * KohnâSham Density Functional Theory Electronic Structure Calculations 00020 * with Linearly Scaling Computational Time and Memory Usage, 00021 * Elias Rudberg, Emanuel H. Rubensson, and Pawel Salek, 00022 * J. Chem. Theory Comput. 7, 340 (2011), 00023 * <http://dx.doi.org/10.1021/ct100611z> 00024 * 00025 * For further information about Ergo, see <http://www.ergoscf.org>. 00026 */ 00027 00028 /* This file belongs to the template_lapack part of the Ergo source 00029 * code. The source files in the template_lapack directory are modified 00030 * versions of files originally distributed as CLAPACK, see the 00031 * Copyright/license notice in the file template_lapack/COPYING. 00032 */ 00033 00034 00035 #ifndef TEMPLATE_LAPACK_STEIN_HEADER 00036 #define TEMPLATE_LAPACK_STEIN_HEADER 00037 00038 00039 template<class Treal> 00040 int template_lapack_stein(const integer *n, const Treal *d__, const Treal *e, 00041 const integer *m, const Treal *w, const integer *iblock, const integer *isplit, 00042 Treal *z__, const integer *ldz, Treal *work, integer *iwork, 00043 integer *ifail, integer *info) 00044 { 00045 /* -- LAPACK routine (version 3.0) -- 00046 Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., 00047 Courant Institute, Argonne National Lab, and Rice University 00048 September 30, 1994 00049 00050 00051 Purpose 00052 ======= 00053 00054 DSTEIN computes the eigenvectors of a real symmetric tridiagonal 00055 matrix T corresponding to specified eigenvalues, using inverse 00056 iteration. 00057 00058 The maximum number of iterations allowed for each eigenvector is 00059 specified by an internal parameter MAXITS (currently set to 5). 00060 00061 Arguments 00062 ========= 00063 00064 N (input) INTEGER 00065 The order of the matrix. N >= 0. 00066 00067 D (input) DOUBLE PRECISION array, dimension (N) 00068 The n diagonal elements of the tridiagonal matrix T. 00069 00070 E (input) DOUBLE PRECISION array, dimension (N) 00071 The (n-1) subdiagonal elements of the tridiagonal matrix 00072 T, in elements 1 to N-1. E(N) need not be set. 00073 00074 M (input) INTEGER 00075 The number of eigenvectors to be found. 0 <= M <= N. 00076 00077 W (input) DOUBLE PRECISION array, dimension (N) 00078 The first M elements of W contain the eigenvalues for 00079 which eigenvectors are to be computed. The eigenvalues 00080 should be grouped by split-off block and ordered from 00081 smallest to largest within the block. ( The output array 00082 W from DSTEBZ with ORDER = 'B' is expected here. ) 00083 00084 IBLOCK (input) INTEGER array, dimension (N) 00085 The submatrix indices associated with the corresponding 00086 eigenvalues in W; IBLOCK(i)=1 if eigenvalue W(i) belongs to 00087 the first submatrix from the top, =2 if W(i) belongs to 00088 the second submatrix, etc. ( The output array IBLOCK 00089 from DSTEBZ is expected here. ) 00090 00091 ISPLIT (input) INTEGER array, dimension (N) 00092 The splitting points, at which T breaks up into submatrices. 00093 The first submatrix consists of rows/columns 1 to 00094 ISPLIT( 1 ), the second of rows/columns ISPLIT( 1 )+1 00095 through ISPLIT( 2 ), etc. 00096 ( The output array ISPLIT from DSTEBZ is expected here. ) 00097 00098 Z (output) DOUBLE PRECISION array, dimension (LDZ, M) 00099 The computed eigenvectors. The eigenvector associated 00100 with the eigenvalue W(i) is stored in the i-th column of 00101 Z. Any vector which fails to converge is set to its current 00102 iterate after MAXITS iterations. 00103 00104 LDZ (input) INTEGER 00105 The leading dimension of the array Z. LDZ >= max(1,N). 00106 00107 WORK (workspace) DOUBLE PRECISION array, dimension (5*N) 00108 00109 IWORK (workspace) INTEGER array, dimension (N) 00110 00111 IFAIL (output) INTEGER array, dimension (M) 00112 On normal exit, all elements of IFAIL are zero. 00113 If one or more eigenvectors fail to converge after 00114 MAXITS iterations, then their indices are stored in 00115 array IFAIL. 00116 00117 INFO (output) INTEGER 00118 = 0: successful exit. 00119 < 0: if INFO = -i, the i-th argument had an illegal value 00120 > 0: if INFO = i, then i eigenvectors failed to converge 00121 in MAXITS iterations. Their indices are stored in 00122 array IFAIL. 00123 00124 Internal Parameters 00125 =================== 00126 00127 MAXITS INTEGER, default = 5 00128 The maximum number of iterations performed. 00129 00130 EXTRA INTEGER, default = 2 00131 The number of iterations performed after norm growth 00132 criterion is satisfied, should be at least 1. 00133 00134 ===================================================================== 00135 00136 00137 Test the input parameters. 00138 00139 Parameter adjustments */ 00140 /* Table of constant values */ 00141 integer c__2 = 2; 00142 integer c__1 = 1; 00143 integer c_n1 = -1; 00144 00145 /* System generated locals */ 00146 integer z_dim1, z_offset, i__1, i__2, i__3; 00147 Treal d__1, d__2, d__3, d__4, d__5; 00148 /* Local variables */ 00149 integer jblk, nblk; 00150 integer jmax; 00151 integer i__, j; 00152 integer iseed[4], gpind, iinfo; 00153 integer b1; 00154 integer j1; 00155 Treal ortol; 00156 integer indrv1, indrv2, indrv3, indrv4, indrv5, bn; 00157 Treal xj; 00158 integer nrmchk; 00159 integer blksiz; 00160 Treal onenrm, dtpcrt, pertol, scl, eps, sep, nrm, tol; 00161 integer its; 00162 Treal xjm, ztr, eps1; 00163 #define z___ref(a_1,a_2) z__[(a_2)*z_dim1 + a_1] 00164 00165 00166 --d__; 00167 --e; 00168 --w; 00169 --iblock; 00170 --isplit; 00171 z_dim1 = *ldz; 00172 z_offset = 1 + z_dim1 * 1; 00173 z__ -= z_offset; 00174 --work; 00175 --iwork; 00176 --ifail; 00177 00178 /* Initialization added by Elias to get rid of compiler warnings. */ 00179 ortol = dtpcrt = xjm = onenrm = gpind = 0; 00180 /* Function Body */ 00181 *info = 0; 00182 i__1 = *m; 00183 for (i__ = 1; i__ <= i__1; ++i__) { 00184 ifail[i__] = 0; 00185 /* L10: */ 00186 } 00187 00188 if (*n < 0) { 00189 *info = -1; 00190 } else if (*m < 0 || *m > *n) { 00191 *info = -4; 00192 } else if (*ldz < maxMACRO(1,*n)) { 00193 *info = -9; 00194 } else { 00195 i__1 = *m; 00196 for (j = 2; j <= i__1; ++j) { 00197 if (iblock[j] < iblock[j - 1]) { 00198 *info = -6; 00199 goto L30; 00200 } 00201 if (iblock[j] == iblock[j - 1] && w[j] < w[j - 1]) { 00202 *info = -5; 00203 goto L30; 00204 } 00205 /* L20: */ 00206 } 00207 L30: 00208 ; 00209 } 00210 00211 if (*info != 0) { 00212 i__1 = -(*info); 00213 template_blas_erbla("STEIN ", &i__1); 00214 return 0; 00215 } 00216 00217 /* Quick return if possible */ 00218 00219 if (*n == 0 || *m == 0) { 00220 return 0; 00221 } else if (*n == 1) { 00222 z___ref(1, 1) = 1.; 00223 return 0; 00224 } 00225 00226 /* Get machine constants. */ 00227 00228 eps = template_lapack_lamch("Precision", (Treal)0); 00229 00230 /* Initialize seed for random number generator DLARNV. */ 00231 00232 for (i__ = 1; i__ <= 4; ++i__) { 00233 iseed[i__ - 1] = 1; 00234 /* L40: */ 00235 } 00236 00237 /* Initialize pointers. */ 00238 00239 indrv1 = 0; 00240 indrv2 = indrv1 + *n; 00241 indrv3 = indrv2 + *n; 00242 indrv4 = indrv3 + *n; 00243 indrv5 = indrv4 + *n; 00244 00245 /* Compute eigenvectors of matrix blocks. */ 00246 00247 j1 = 1; 00248 i__1 = iblock[*m]; 00249 for (nblk = 1; nblk <= i__1; ++nblk) { 00250 00251 /* Find starting and ending indices of block nblk. */ 00252 00253 if (nblk == 1) { 00254 b1 = 1; 00255 } else { 00256 b1 = isplit[nblk - 1] + 1; 00257 } 00258 bn = isplit[nblk]; 00259 blksiz = bn - b1 + 1; 00260 if (blksiz == 1) { 00261 goto L60; 00262 } 00263 gpind = b1; 00264 00265 /* Compute reorthogonalization criterion and stopping criterion. */ 00266 00267 onenrm = (d__1 = d__[b1], absMACRO(d__1)) + (d__2 = e[b1], absMACRO(d__2)); 00268 /* Computing MAX */ 00269 d__3 = onenrm, d__4 = (d__1 = d__[bn], absMACRO(d__1)) + (d__2 = e[bn - 1], 00270 absMACRO(d__2)); 00271 onenrm = maxMACRO(d__3,d__4); 00272 i__2 = bn - 1; 00273 for (i__ = b1 + 1; i__ <= i__2; ++i__) { 00274 /* Computing MAX */ 00275 d__4 = onenrm, d__5 = (d__1 = d__[i__], absMACRO(d__1)) + (d__2 = e[ 00276 i__ - 1], absMACRO(d__2)) + (d__3 = e[i__], absMACRO(d__3)); 00277 onenrm = maxMACRO(d__4,d__5); 00278 /* L50: */ 00279 } 00280 ortol = onenrm * .001; 00281 00282 dtpcrt = template_blas_sqrt(.1 / blksiz); 00283 00284 /* Loop through eigenvalues of block nblk. */ 00285 00286 L60: 00287 jblk = 0; 00288 i__2 = *m; 00289 for (j = j1; j <= i__2; ++j) { 00290 if (iblock[j] != nblk) { 00291 j1 = j; 00292 goto L160; 00293 } 00294 ++jblk; 00295 xj = w[j]; 00296 00297 /* Skip all the work if the block size is one. */ 00298 00299 if (blksiz == 1) { 00300 work[indrv1 + 1] = 1.; 00301 goto L120; 00302 } 00303 00304 /* If eigenvalues j and j-1 are too close, add a relatively 00305 small perturbation. */ 00306 00307 if (jblk > 1) { 00308 eps1 = (d__1 = eps * xj, absMACRO(d__1)); 00309 pertol = eps1 * 10.; 00310 sep = xj - xjm; 00311 if (sep < pertol) { 00312 xj = xjm + pertol; 00313 } 00314 } 00315 00316 its = 0; 00317 nrmchk = 0; 00318 00319 /* Get random starting vector. */ 00320 00321 template_lapack_larnv(&c__2, iseed, &blksiz, &work[indrv1 + 1]); 00322 00323 /* Copy the matrix T so it won't be destroyed in factorization. */ 00324 00325 template_blas_copy(&blksiz, &d__[b1], &c__1, &work[indrv4 + 1], &c__1); 00326 i__3 = blksiz - 1; 00327 template_blas_copy(&i__3, &e[b1], &c__1, &work[indrv2 + 2], &c__1); 00328 i__3 = blksiz - 1; 00329 template_blas_copy(&i__3, &e[b1], &c__1, &work[indrv3 + 1], &c__1); 00330 00331 /* Compute LU factors with partial pivoting ( PT = LU ) */ 00332 00333 tol = 0.; 00334 template_lapack_lagtf(&blksiz, &work[indrv4 + 1], &xj, &work[indrv2 + 2], &work[ 00335 indrv3 + 1], &tol, &work[indrv5 + 1], &iwork[1], &iinfo); 00336 00337 /* Update iteration count. */ 00338 00339 L70: 00340 ++its; 00341 if (its > 5) { 00342 goto L100; 00343 } 00344 00345 /* Normalize and scale the righthand side vector Pb. 00346 00347 Computing MAX */ 00348 d__2 = eps, d__3 = (d__1 = work[indrv4 + blksiz], absMACRO(d__1)); 00349 scl = blksiz * onenrm * maxMACRO(d__2,d__3) / template_blas_asum(&blksiz, &work[ 00350 indrv1 + 1], &c__1); 00351 template_blas_scal(&blksiz, &scl, &work[indrv1 + 1], &c__1); 00352 00353 /* Solve the system LU = Pb. */ 00354 00355 template_lapack_lagts(&c_n1, &blksiz, &work[indrv4 + 1], &work[indrv2 + 2], & 00356 work[indrv3 + 1], &work[indrv5 + 1], &iwork[1], &work[ 00357 indrv1 + 1], &tol, &iinfo); 00358 00359 /* Reorthogonalize by modified Gram-Schmidt if eigenvalues are 00360 close enough. */ 00361 00362 if (jblk == 1) { 00363 goto L90; 00364 } 00365 if ((d__1 = xj - xjm, absMACRO(d__1)) > ortol) { 00366 gpind = j; 00367 } 00368 if (gpind != j) { 00369 i__3 = j - 1; 00370 for (i__ = gpind; i__ <= i__3; ++i__) { 00371 ztr = -template_blas_dot(&blksiz, &work[indrv1 + 1], &c__1, &z___ref( 00372 b1, i__), &c__1); 00373 template_blas_axpy(&blksiz, &ztr, &z___ref(b1, i__), &c__1, &work[ 00374 indrv1 + 1], &c__1); 00375 /* L80: */ 00376 } 00377 } 00378 00379 /* Check the infinity norm of the iterate. */ 00380 00381 L90: 00382 jmax = template_blas_idamax(&blksiz, &work[indrv1 + 1], &c__1); 00383 nrm = (d__1 = work[indrv1 + jmax], absMACRO(d__1)); 00384 00385 /* Continue for additional iterations after norm reaches 00386 stopping criterion. */ 00387 00388 if (nrm < dtpcrt) { 00389 goto L70; 00390 } 00391 ++nrmchk; 00392 if (nrmchk < 3) { 00393 goto L70; 00394 } 00395 00396 goto L110; 00397 00398 /* If stopping criterion was not satisfied, update info and 00399 store eigenvector number in array ifail. */ 00400 00401 L100: 00402 ++(*info); 00403 ifail[*info] = j; 00404 00405 /* Accept iterate as jth eigenvector. */ 00406 00407 L110: 00408 scl = 1. / template_blas_nrm2(&blksiz, &work[indrv1 + 1], &c__1); 00409 jmax = template_blas_idamax(&blksiz, &work[indrv1 + 1], &c__1); 00410 if (work[indrv1 + jmax] < 0.) { 00411 scl = -scl; 00412 } 00413 template_blas_scal(&blksiz, &scl, &work[indrv1 + 1], &c__1); 00414 L120: 00415 i__3 = *n; 00416 for (i__ = 1; i__ <= i__3; ++i__) { 00417 z___ref(i__, j) = 0.; 00418 /* L130: */ 00419 } 00420 i__3 = blksiz; 00421 for (i__ = 1; i__ <= i__3; ++i__) { 00422 z___ref(b1 + i__ - 1, j) = work[indrv1 + i__]; 00423 /* L140: */ 00424 } 00425 00426 /* Save the shift to check eigenvalue spacing at next 00427 iteration. */ 00428 00429 xjm = xj; 00430 00431 /* L150: */ 00432 } 00433 L160: 00434 ; 00435 } 00436 00437 return 0; 00438 00439 /* End of DSTEIN */ 00440 00441 } /* dstein_ */ 00442 00443 #undef z___ref 00444 00445 00446 #endif