ergo
|
00001 /* Ergo, version 3.2, a program for linear scaling electronic structure 00002 * calculations. 00003 * Copyright (C) 2012 Elias Rudberg, Emanuel H. Rubensson, and Pawel Salek. 00004 * 00005 * This program is free software: you can redistribute it and/or modify 00006 * it under the terms of the GNU General Public License as published by 00007 * the Free Software Foundation, either version 3 of the License, or 00008 * (at your option) any later version. 00009 * 00010 * This program is distributed in the hope that it will be useful, 00011 * but WITHOUT ANY WARRANTY; without even the implied warranty of 00012 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00013 * GNU General Public License for more details. 00014 * 00015 * You should have received a copy of the GNU General Public License 00016 * along with this program. If not, see <http://www.gnu.org/licenses/>. 00017 * 00018 * Primary academic reference: 00019 * KohnâSham Density Functional Theory Electronic Structure Calculations 00020 * with Linearly Scaling Computational Time and Memory Usage, 00021 * Elias Rudberg, Emanuel H. Rubensson, and Pawel Salek, 00022 * J. Chem. Theory Comput. 7, 340 (2011), 00023 * <http://dx.doi.org/10.1021/ct100611z> 00024 * 00025 * For further information about Ergo, see <http://www.ergoscf.org>. 00026 */ 00027 00028 /* This file belongs to the template_lapack part of the Ergo source 00029 * code. The source files in the template_lapack directory are modified 00030 * versions of files originally distributed as CLAPACK, see the 00031 * Copyright/license notice in the file template_lapack/COPYING. 00032 */ 00033 00034 00035 #ifndef TEMPLATE_LAPACK_LASQ3_HEADER 00036 #define TEMPLATE_LAPACK_LASQ3_HEADER 00037 00038 template<class Treal> 00039 int template_lapack_lasq3(integer *i0, integer *n0, Treal *z__, 00040 integer *pp, Treal *dmin__, Treal *sigma, Treal *desig, 00041 Treal *qmax, integer *nfail, integer *iter, integer *ndiv, 00042 logical *ieee, integer *ttype, Treal *dmin1, Treal *dmin2, 00043 Treal *dn, Treal *dn1, Treal *dn2, Treal *g, 00044 Treal *tau) 00045 { 00046 /* System generated locals */ 00047 integer i__1; 00048 Treal d__1, d__2; 00049 00050 00051 /* Local variables */ 00052 Treal s, t; 00053 integer j4, nn; 00054 Treal eps, tol; 00055 integer n0in, ipn4; 00056 Treal tol2, temp; 00057 00058 00059 /* -- LAPACK routine (version 3.2) -- */ 00060 00061 /* -- Contributed by Osni Marques of the Lawrence Berkeley National -- */ 00062 /* -- Laboratory and Beresford Parlett of the Univ. of California at -- */ 00063 /* -- Berkeley -- */ 00064 /* -- November 2008 -- */ 00065 00066 /* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ 00067 /* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ 00068 00069 /* .. Scalar Arguments .. */ 00070 /* .. */ 00071 /* .. Array Arguments .. */ 00072 /* .. */ 00073 00074 /* Purpose */ 00075 /* ======= */ 00076 00077 /* DLASQ3 checks for deflation, computes a shift (TAU) and calls dqds. */ 00078 /* In case of failure it changes shifts, and tries again until output */ 00079 /* is positive. */ 00080 00081 /* Arguments */ 00082 /* ========= */ 00083 00084 /* I0 (input) INTEGER */ 00085 /* First index. */ 00086 00087 /* N0 (input) INTEGER */ 00088 /* Last index. */ 00089 00090 /* Z (input) DOUBLE PRECISION array, dimension ( 4*N ) */ 00091 /* Z holds the qd array. */ 00092 00093 /* PP (input/output) INTEGER */ 00094 /* PP=0 for ping, PP=1 for pong. */ 00095 /* PP=2 indicates that flipping was applied to the Z array */ 00096 /* and that the initial tests for deflation should not be */ 00097 /* performed. */ 00098 00099 /* DMIN (output) DOUBLE PRECISION */ 00100 /* Minimum value of d. */ 00101 00102 /* SIGMA (output) DOUBLE PRECISION */ 00103 /* Sum of shifts used in current segment. */ 00104 00105 /* DESIG (input/output) DOUBLE PRECISION */ 00106 /* Lower order part of SIGMA */ 00107 00108 /* QMAX (input) DOUBLE PRECISION */ 00109 /* Maximum value of q. */ 00110 00111 /* NFAIL (output) INTEGER */ 00112 /* Number of times shift was too big. */ 00113 00114 /* ITER (output) INTEGER */ 00115 /* Number of iterations. */ 00116 00117 /* NDIV (output) INTEGER */ 00118 /* Number of divisions. */ 00119 00120 /* IEEE (input) LOGICAL */ 00121 /* Flag for IEEE or non IEEE arithmetic (passed to DLASQ5). */ 00122 00123 /* TTYPE (input/output) INTEGER */ 00124 /* Shift type. */ 00125 00126 /* DMIN1, DMIN2, DN, DN1, DN2, G, TAU (input/output) DOUBLE PRECISION */ 00127 /* These are passed as arguments in order to save their values */ 00128 /* between calls to DLASQ3. */ 00129 00130 /* ===================================================================== */ 00131 00132 /* .. Parameters .. */ 00133 /* .. */ 00134 /* .. Local Scalars .. */ 00135 /* .. */ 00136 /* .. External Subroutines .. */ 00137 /* .. */ 00138 /* .. External Function .. */ 00139 /* .. */ 00140 /* .. Intrinsic Functions .. */ 00141 /* .. */ 00142 /* .. Executable Statements .. */ 00143 00144 /* Parameter adjustments */ 00145 --z__; 00146 00147 /* Function Body */ 00148 n0in = *n0; 00149 eps = template_lapack_lamch("Precision", (Treal)0); 00150 tol = eps * 100.; 00151 /* Computing 2nd power */ 00152 d__1 = tol; 00153 tol2 = d__1 * d__1; 00154 00155 /* Check for deflation. */ 00156 00157 L10: 00158 00159 if (*n0 < *i0) { 00160 return 0; 00161 } 00162 if (*n0 == *i0) { 00163 goto L20; 00164 } 00165 nn = (*n0 << 2) + *pp; 00166 if (*n0 == *i0 + 1) { 00167 goto L40; 00168 } 00169 00170 /* Check whether E(N0-1) is negligible, 1 eigenvalue. */ 00171 00172 if (z__[nn - 5] > tol2 * (*sigma + z__[nn - 3]) && z__[nn - (*pp << 1) - 00173 4] > tol2 * z__[nn - 7]) { 00174 goto L30; 00175 } 00176 00177 L20: 00178 00179 z__[(*n0 << 2) - 3] = z__[(*n0 << 2) + *pp - 3] + *sigma; 00180 --(*n0); 00181 goto L10; 00182 00183 /* Check whether E(N0-2) is negligible, 2 eigenvalues. */ 00184 00185 L30: 00186 00187 if (z__[nn - 9] > tol2 * *sigma && z__[nn - (*pp << 1) - 8] > tol2 * z__[ 00188 nn - 11]) { 00189 goto L50; 00190 } 00191 00192 L40: 00193 00194 if (z__[nn - 3] > z__[nn - 7]) { 00195 s = z__[nn - 3]; 00196 z__[nn - 3] = z__[nn - 7]; 00197 z__[nn - 7] = s; 00198 } 00199 if (z__[nn - 5] > z__[nn - 3] * tol2) { 00200 t = (z__[nn - 7] - z__[nn - 3] + z__[nn - 5]) * .5; 00201 s = z__[nn - 3] * (z__[nn - 5] / t); 00202 if (s <= t) { 00203 s = z__[nn - 3] * (z__[nn - 5] / (t * (template_blas_sqrt(s / t + 1.) + 1.))); 00204 } else { 00205 s = z__[nn - 3] * (z__[nn - 5] / (t + template_blas_sqrt(t) * template_blas_sqrt(t + s))); 00206 } 00207 t = z__[nn - 7] + (s + z__[nn - 5]); 00208 z__[nn - 3] *= z__[nn - 7] / t; 00209 z__[nn - 7] = t; 00210 } 00211 z__[(*n0 << 2) - 7] = z__[nn - 7] + *sigma; 00212 z__[(*n0 << 2) - 3] = z__[nn - 3] + *sigma; 00213 *n0 += -2; 00214 goto L10; 00215 00216 L50: 00217 if (*pp == 2) { 00218 *pp = 0; 00219 } 00220 00221 /* Reverse the qd-array, if warranted. */ 00222 00223 if (*dmin__ <= 0. || *n0 < n0in) { 00224 if (z__[(*i0 << 2) + *pp - 3] * 1.5 < z__[(*n0 << 2) + *pp - 3]) { 00225 ipn4 = ( *i0 + *n0 ) << 2; 00226 i__1 = ( *i0 + *n0 - 1 ) << 1; 00227 for (j4 = *i0 << 2; j4 <= i__1; j4 += 4) { 00228 temp = z__[j4 - 3]; 00229 z__[j4 - 3] = z__[ipn4 - j4 - 3]; 00230 z__[ipn4 - j4 - 3] = temp; 00231 temp = z__[j4 - 2]; 00232 z__[j4 - 2] = z__[ipn4 - j4 - 2]; 00233 z__[ipn4 - j4 - 2] = temp; 00234 temp = z__[j4 - 1]; 00235 z__[j4 - 1] = z__[ipn4 - j4 - 5]; 00236 z__[ipn4 - j4 - 5] = temp; 00237 temp = z__[j4]; 00238 z__[j4] = z__[ipn4 - j4 - 4]; 00239 z__[ipn4 - j4 - 4] = temp; 00240 /* L60: */ 00241 } 00242 if (*n0 - *i0 <= 4) { 00243 z__[(*n0 << 2) + *pp - 1] = z__[(*i0 << 2) + *pp - 1]; 00244 z__[(*n0 << 2) - *pp] = z__[(*i0 << 2) - *pp]; 00245 } 00246 /* Computing MIN */ 00247 d__1 = *dmin2, d__2 = z__[(*n0 << 2) + *pp - 1]; 00248 *dmin2 = minMACRO(d__1,d__2); 00249 /* Computing MIN */ 00250 d__1 = z__[(*n0 << 2) + *pp - 1], d__2 = z__[(*i0 << 2) + *pp - 1] 00251 , d__1 = minMACRO(d__1,d__2), d__2 = z__[(*i0 << 2) + *pp + 3]; 00252 z__[(*n0 << 2) + *pp - 1] = minMACRO(d__1,d__2); 00253 /* Computing MIN */ 00254 d__1 = z__[(*n0 << 2) - *pp], d__2 = z__[(*i0 << 2) - *pp], d__1 = 00255 minMACRO(d__1,d__2), d__2 = z__[(*i0 << 2) - *pp + 4]; 00256 z__[(*n0 << 2) - *pp] = minMACRO(d__1,d__2); 00257 /* Computing MAX */ 00258 d__1 = *qmax, d__2 = z__[(*i0 << 2) + *pp - 3], d__1 = maxMACRO(d__1, 00259 d__2), d__2 = z__[(*i0 << 2) + *pp + 1]; 00260 *qmax = maxMACRO(d__1,d__2); 00261 *dmin__ = -0.; 00262 } 00263 } 00264 00265 /* Choose a shift. */ 00266 00267 template_lapack_lasq4(i0, n0, &z__[1], pp, &n0in, dmin__, dmin1, dmin2, dn, dn1, dn2, 00268 tau, ttype, g); 00269 00270 /* Call dqds until DMIN > 0. */ 00271 00272 L70: 00273 00274 template_lapack_lasq5(i0, n0, &z__[1], pp, tau, dmin__, dmin1, dmin2, dn, dn1, dn2, 00275 ieee); 00276 00277 *ndiv += *n0 - *i0 + 2; 00278 ++(*iter); 00279 00280 /* Check status. */ 00281 00282 if (*dmin__ >= 0. && *dmin1 > 0.) { 00283 00284 /* Success. */ 00285 00286 goto L90; 00287 00288 } else if (*dmin__ < 0. && *dmin1 > 0. && z__[( ( *n0 - 1 ) << 2) - *pp] < tol 00289 * (*sigma + *dn1) && absMACRO(*dn) < tol * *sigma) { 00290 00291 /* Convergence hidden by negative DN. */ 00292 00293 z__[( ( *n0 - 1 ) << 2) - *pp + 2] = 0.; 00294 *dmin__ = 0.; 00295 goto L90; 00296 } else if (*dmin__ < 0.) { 00297 00298 /* TAU too big. Select new TAU and try again. */ 00299 00300 ++(*nfail); 00301 if (*ttype < -22) { 00302 00303 /* Failed twice. Play it safe. */ 00304 00305 *tau = 0.; 00306 } else if (*dmin1 > 0.) { 00307 00308 /* Late failure. Gives excellent shift. */ 00309 00310 *tau = (*tau + *dmin__) * (1. - eps * 2.); 00311 *ttype += -11; 00312 } else { 00313 00314 /* Early failure. Divide by 4. */ 00315 00316 *tau *= .25; 00317 *ttype += -12; 00318 } 00319 goto L70; 00320 } else if (template_lapack_isnan(dmin__)) { 00321 00322 /* NaN. */ 00323 00324 if (*tau == 0.) { 00325 goto L80; 00326 } else { 00327 *tau = 0.; 00328 goto L70; 00329 } 00330 } else { 00331 00332 /* Possible underflow. Play it safe. */ 00333 00334 goto L80; 00335 } 00336 00337 /* Risk of underflow. */ 00338 00339 L80: 00340 template_lapack_lasq6(i0, n0, &z__[1], pp, dmin__, dmin1, dmin2, dn, dn1, dn2); 00341 *ndiv += *n0 - *i0 + 2; 00342 ++(*iter); 00343 *tau = 0.; 00344 00345 L90: 00346 if (*tau < *sigma) { 00347 *desig += *tau; 00348 t = *sigma + *desig; 00349 *desig -= t - *sigma; 00350 } else { 00351 t = *sigma + *tau; 00352 *desig = *sigma - (t - *tau) + *desig; 00353 } 00354 *sigma = t; 00355 00356 return 0; 00357 00358 /* End of DLASQ3 */ 00359 00360 } /* dlasq3_ */ 00361 00362 #endif