ergo
|
00001 /* Ergo, version 3.2, a program for linear scaling electronic structure 00002 * calculations. 00003 * Copyright (C) 2012 Elias Rudberg, Emanuel H. Rubensson, and Pawel Salek. 00004 * 00005 * This program is free software: you can redistribute it and/or modify 00006 * it under the terms of the GNU General Public License as published by 00007 * the Free Software Foundation, either version 3 of the License, or 00008 * (at your option) any later version. 00009 * 00010 * This program is distributed in the hope that it will be useful, 00011 * but WITHOUT ANY WARRANTY; without even the implied warranty of 00012 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00013 * GNU General Public License for more details. 00014 * 00015 * You should have received a copy of the GNU General Public License 00016 * along with this program. If not, see <http://www.gnu.org/licenses/>. 00017 * 00018 * Primary academic reference: 00019 * KohnâSham Density Functional Theory Electronic Structure Calculations 00020 * with Linearly Scaling Computational Time and Memory Usage, 00021 * Elias Rudberg, Emanuel H. Rubensson, and Pawel Salek, 00022 * J. Chem. Theory Comput. 7, 340 (2011), 00023 * <http://dx.doi.org/10.1021/ct100611z> 00024 * 00025 * For further information about Ergo, see <http://www.ergoscf.org>. 00026 */ 00027 00028 /* This file belongs to the template_lapack part of the Ergo source 00029 * code. The source files in the template_lapack directory are modified 00030 * versions of files originally distributed as CLAPACK, see the 00031 * Copyright/license notice in the file template_lapack/COPYING. 00032 */ 00033 00034 00035 #ifndef TEMPLATE_LAPACK_LASQ4_HEADER 00036 #define TEMPLATE_LAPACK_LASQ4_HEADER 00037 00038 template<class Treal> 00039 int template_lapack_lasq4(integer *i0, integer *n0, Treal *z__, 00040 integer *pp, integer *n0in, Treal *dmin__, Treal *dmin1, 00041 Treal *dmin2, Treal *dn, Treal *dn1, Treal *dn2, 00042 Treal *tau, integer *ttype, Treal *g) 00043 { 00044 /* System generated locals */ 00045 integer i__1; 00046 Treal d__1, d__2; 00047 00048 00049 /* Local variables */ 00050 Treal s = 0; // EMANUEL COMMENT: initialize to get rid of compiler warning 00051 Treal a2, b1, b2; 00052 integer i4, nn, np; 00053 Treal gam, gap1, gap2; 00054 00055 00056 /* -- LAPACK routine (version 3.2) -- */ 00057 00058 /* -- Contributed by Osni Marques of the Lawrence Berkeley National -- */ 00059 /* -- Laboratory and Beresford Parlett of the Univ. of California at -- */ 00060 /* -- Berkeley -- */ 00061 /* -- November 2008 -- */ 00062 00063 /* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ 00064 /* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ 00065 00066 /* .. Scalar Arguments .. */ 00067 /* .. */ 00068 /* .. Array Arguments .. */ 00069 /* .. */ 00070 00071 /* Purpose */ 00072 /* ======= */ 00073 00074 /* DLASQ4 computes an approximation TAU to the smallest eigenvalue */ 00075 /* using values of d from the previous transform. */ 00076 00077 /* I0 (input) INTEGER */ 00078 /* First index. */ 00079 00080 /* N0 (input) INTEGER */ 00081 /* Last index. */ 00082 00083 /* Z (input) DOUBLE PRECISION array, dimension ( 4*N ) */ 00084 /* Z holds the qd array. */ 00085 00086 /* PP (input) INTEGER */ 00087 /* PP=0 for ping, PP=1 for pong. */ 00088 00089 /* NOIN (input) INTEGER */ 00090 /* The value of N0 at start of EIGTEST. */ 00091 00092 /* DMIN (input) DOUBLE PRECISION */ 00093 /* Minimum value of d. */ 00094 00095 /* DMIN1 (input) DOUBLE PRECISION */ 00096 /* Minimum value of d, excluding D( N0 ). */ 00097 00098 /* DMIN2 (input) DOUBLE PRECISION */ 00099 /* Minimum value of d, excluding D( N0 ) and D( N0-1 ). */ 00100 00101 /* DN (input) DOUBLE PRECISION */ 00102 /* d(N) */ 00103 00104 /* DN1 (input) DOUBLE PRECISION */ 00105 /* d(N-1) */ 00106 00107 /* DN2 (input) DOUBLE PRECISION */ 00108 /* d(N-2) */ 00109 00110 /* TAU (output) DOUBLE PRECISION */ 00111 /* This is the shift. */ 00112 00113 /* TTYPE (output) INTEGER */ 00114 /* Shift type. */ 00115 00116 /* G (input/output) REAL */ 00117 /* G is passed as an argument in order to save its value between */ 00118 /* calls to DLASQ4. */ 00119 00120 /* Further Details */ 00121 /* =============== */ 00122 /* CNST1 = 9/16 */ 00123 00124 /* ===================================================================== */ 00125 00126 /* .. Parameters .. */ 00127 /* .. */ 00128 /* .. Local Scalars .. */ 00129 /* .. */ 00130 /* .. Intrinsic Functions .. */ 00131 /* .. */ 00132 /* .. Executable Statements .. */ 00133 00134 /* A negative DMIN forces the shift to take that absolute value */ 00135 /* TTYPE records the type of shift. */ 00136 00137 /* Parameter adjustments */ 00138 --z__; 00139 00140 /* Function Body */ 00141 if (*dmin__ <= 0.) { 00142 *tau = -(*dmin__); 00143 *ttype = -1; 00144 return 0; 00145 } 00146 00147 nn = (*n0 << 2) + *pp; 00148 if (*n0in == *n0) { 00149 00150 /* No eigenvalues deflated. */ 00151 00152 if (*dmin__ == *dn || *dmin__ == *dn1) { 00153 00154 b1 = template_blas_sqrt(z__[nn - 3]) * template_blas_sqrt(z__[nn - 5]); 00155 b2 = template_blas_sqrt(z__[nn - 7]) * template_blas_sqrt(z__[nn - 9]); 00156 a2 = z__[nn - 7] + z__[nn - 5]; 00157 00158 /* Cases 2 and 3. */ 00159 00160 if (*dmin__ == *dn && *dmin1 == *dn1) { 00161 gap2 = *dmin2 - a2 - *dmin2 * .25; 00162 if (gap2 > 0. && gap2 > b2) { 00163 gap1 = a2 - *dn - b2 / gap2 * b2; 00164 } else { 00165 gap1 = a2 - *dn - (b1 + b2); 00166 } 00167 if (gap1 > 0. && gap1 > b1) { 00168 /* Computing MAX */ 00169 d__1 = *dn - b1 / gap1 * b1, d__2 = *dmin__ * .5; 00170 s = maxMACRO(d__1,d__2); 00171 *ttype = -2; 00172 } else { 00173 s = 0.; 00174 if (*dn > b1) { 00175 s = *dn - b1; 00176 } 00177 if (a2 > b1 + b2) { 00178 /* Computing MIN */ 00179 d__1 = s, d__2 = a2 - (b1 + b2); 00180 s = minMACRO(d__1,d__2); 00181 } 00182 /* Computing MAX */ 00183 d__1 = s, d__2 = *dmin__ * .333; 00184 s = maxMACRO(d__1,d__2); 00185 *ttype = -3; 00186 } 00187 } else { 00188 00189 /* Case 4. */ 00190 00191 *ttype = -4; 00192 s = *dmin__ * .25; 00193 if (*dmin__ == *dn) { 00194 gam = *dn; 00195 a2 = 0.; 00196 if (z__[nn - 5] > z__[nn - 7]) { 00197 return 0; 00198 } 00199 b2 = z__[nn - 5] / z__[nn - 7]; 00200 np = nn - 9; 00201 } else { 00202 np = nn - (*pp << 1); 00203 b2 = z__[np - 2]; 00204 gam = *dn1; 00205 if (z__[np - 4] > z__[np - 2]) { 00206 return 0; 00207 } 00208 a2 = z__[np - 4] / z__[np - 2]; 00209 if (z__[nn - 9] > z__[nn - 11]) { 00210 return 0; 00211 } 00212 b2 = z__[nn - 9] / z__[nn - 11]; 00213 np = nn - 13; 00214 } 00215 00216 /* Approximate contribution to norm squared from I < NN-1. */ 00217 00218 a2 += b2; 00219 i__1 = (*i0 << 2) - 1 + *pp; 00220 for (i4 = np; i4 >= i__1; i4 += -4) { 00221 if (b2 == 0.) { 00222 goto L20; 00223 } 00224 b1 = b2; 00225 if (z__[i4] > z__[i4 - 2]) { 00226 return 0; 00227 } 00228 b2 *= z__[i4] / z__[i4 - 2]; 00229 a2 += b2; 00230 if (maxMACRO(b2,b1) * 100. < a2 || .563 < a2) { 00231 goto L20; 00232 } 00233 /* L10: */ 00234 } 00235 L20: 00236 a2 *= 1.05; 00237 00238 /* Rayleigh quotient residual bound. */ 00239 00240 if (a2 < .563) { 00241 s = gam * (1. - template_blas_sqrt(a2)) / (a2 + 1.); 00242 } 00243 } 00244 } else if (*dmin__ == *dn2) { 00245 00246 /* Case 5. */ 00247 00248 *ttype = -5; 00249 s = *dmin__ * .25; 00250 00251 /* Compute contribution to norm squared from I > NN-2. */ 00252 00253 np = nn - (*pp << 1); 00254 b1 = z__[np - 2]; 00255 b2 = z__[np - 6]; 00256 gam = *dn2; 00257 if (z__[np - 8] > b2 || z__[np - 4] > b1) { 00258 return 0; 00259 } 00260 a2 = z__[np - 8] / b2 * (z__[np - 4] / b1 + 1.); 00261 00262 /* Approximate contribution to norm squared from I < NN-2. */ 00263 00264 if (*n0 - *i0 > 2) { 00265 b2 = z__[nn - 13] / z__[nn - 15]; 00266 a2 += b2; 00267 i__1 = (*i0 << 2) - 1 + *pp; 00268 for (i4 = nn - 17; i4 >= i__1; i4 += -4) { 00269 if (b2 == 0.) { 00270 goto L40; 00271 } 00272 b1 = b2; 00273 if (z__[i4] > z__[i4 - 2]) { 00274 return 0; 00275 } 00276 b2 *= z__[i4] / z__[i4 - 2]; 00277 a2 += b2; 00278 if (maxMACRO(b2,b1) * 100. < a2 || .563 < a2) { 00279 goto L40; 00280 } 00281 /* L30: */ 00282 } 00283 L40: 00284 a2 *= 1.05; 00285 } 00286 00287 if (a2 < .563) { 00288 s = gam * (1. - template_blas_sqrt(a2)) / (a2 + 1.); 00289 } 00290 } else { 00291 00292 /* Case 6, no information to guide us. */ 00293 00294 if (*ttype == -6) { 00295 *g += (1. - *g) * .333; 00296 } else if (*ttype == -18) { 00297 *g = .083250000000000005; 00298 } else { 00299 *g = .25; 00300 } 00301 s = *g * *dmin__; 00302 *ttype = -6; 00303 } 00304 00305 } else if (*n0in == *n0 + 1) { 00306 00307 /* One eigenvalue just deflated. Use DMIN1, DN1 for DMIN and DN. */ 00308 00309 if (*dmin1 == *dn1 && *dmin2 == *dn2) { 00310 00311 /* Cases 7 and 8. */ 00312 00313 *ttype = -7; 00314 s = *dmin1 * .333; 00315 if (z__[nn - 5] > z__[nn - 7]) { 00316 return 0; 00317 } 00318 b1 = z__[nn - 5] / z__[nn - 7]; 00319 b2 = b1; 00320 if (b2 == 0.) { 00321 goto L60; 00322 } 00323 i__1 = (*i0 << 2) - 1 + *pp; 00324 for (i4 = (*n0 << 2) - 9 + *pp; i4 >= i__1; i4 += -4) { 00325 a2 = b1; 00326 if (z__[i4] > z__[i4 - 2]) { 00327 return 0; 00328 } 00329 b1 *= z__[i4] / z__[i4 - 2]; 00330 b2 += b1; 00331 if (maxMACRO(b1,a2) * 100. < b2) { 00332 goto L60; 00333 } 00334 /* L50: */ 00335 } 00336 L60: 00337 b2 = template_blas_sqrt(b2 * 1.05); 00338 /* Computing 2nd power */ 00339 d__1 = b2; 00340 a2 = *dmin1 / (d__1 * d__1 + 1.); 00341 gap2 = *dmin2 * .5 - a2; 00342 if (gap2 > 0. && gap2 > b2 * a2) { 00343 /* Computing MAX */ 00344 d__1 = s, d__2 = a2 * (1. - a2 * 1.01 * (b2 / gap2) * b2); 00345 s = maxMACRO(d__1,d__2); 00346 } else { 00347 /* Computing MAX */ 00348 d__1 = s, d__2 = a2 * (1. - b2 * 1.01); 00349 s = maxMACRO(d__1,d__2); 00350 *ttype = -8; 00351 } 00352 } else { 00353 00354 /* Case 9. */ 00355 00356 s = *dmin1 * .25; 00357 if (*dmin1 == *dn1) { 00358 s = *dmin1 * .5; 00359 } 00360 *ttype = -9; 00361 } 00362 00363 } else if (*n0in == *n0 + 2) { 00364 00365 /* Two eigenvalues deflated. Use DMIN2, DN2 for DMIN and DN. */ 00366 00367 /* Cases 10 and 11. */ 00368 00369 if (*dmin2 == *dn2 && z__[nn - 5] * 2. < z__[nn - 7]) { 00370 *ttype = -10; 00371 s = *dmin2 * .333; 00372 if (z__[nn - 5] > z__[nn - 7]) { 00373 return 0; 00374 } 00375 b1 = z__[nn - 5] / z__[nn - 7]; 00376 b2 = b1; 00377 if (b2 == 0.) { 00378 goto L80; 00379 } 00380 i__1 = (*i0 << 2) - 1 + *pp; 00381 for (i4 = (*n0 << 2) - 9 + *pp; i4 >= i__1; i4 += -4) { 00382 if (z__[i4] > z__[i4 - 2]) { 00383 return 0; 00384 } 00385 b1 *= z__[i4] / z__[i4 - 2]; 00386 b2 += b1; 00387 if (b1 * 100. < b2) { 00388 goto L80; 00389 } 00390 /* L70: */ 00391 } 00392 L80: 00393 b2 = template_blas_sqrt(b2 * 1.05); 00394 /* Computing 2nd power */ 00395 d__1 = b2; 00396 a2 = *dmin2 / (d__1 * d__1 + 1.); 00397 gap2 = z__[nn - 7] + z__[nn - 9] - template_blas_sqrt(z__[nn - 11]) * template_blas_sqrt(z__[ 00398 nn - 9]) - a2; 00399 if (gap2 > 0. && gap2 > b2 * a2) { 00400 /* Computing MAX */ 00401 d__1 = s, d__2 = a2 * (1. - a2 * 1.01 * (b2 / gap2) * b2); 00402 s = maxMACRO(d__1,d__2); 00403 } else { 00404 /* Computing MAX */ 00405 d__1 = s, d__2 = a2 * (1. - b2 * 1.01); 00406 s = maxMACRO(d__1,d__2); 00407 } 00408 } else { 00409 s = *dmin2 * .25; 00410 *ttype = -11; 00411 } 00412 } else if (*n0in > *n0 + 2) { 00413 00414 /* Case 12, more than two eigenvalues deflated. No information. */ 00415 00416 s = 0.; 00417 *ttype = -12; 00418 } 00419 00420 *tau = s; 00421 return 0; 00422 00423 /* End of DLASQ4 */ 00424 00425 } /* dlasq4_ */ 00426 00427 #endif