ergo
|
00001 /* Ergo, version 3.2, a program for linear scaling electronic structure 00002 * calculations. 00003 * Copyright (C) 2012 Elias Rudberg, Emanuel H. Rubensson, and Pawel Salek. 00004 * 00005 * This program is free software: you can redistribute it and/or modify 00006 * it under the terms of the GNU General Public License as published by 00007 * the Free Software Foundation, either version 3 of the License, or 00008 * (at your option) any later version. 00009 * 00010 * This program is distributed in the hope that it will be useful, 00011 * but WITHOUT ANY WARRANTY; without even the implied warranty of 00012 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00013 * GNU General Public License for more details. 00014 * 00015 * You should have received a copy of the GNU General Public License 00016 * along with this program. If not, see <http://www.gnu.org/licenses/>. 00017 * 00018 * Primary academic reference: 00019 * KohnâSham Density Functional Theory Electronic Structure Calculations 00020 * with Linearly Scaling Computational Time and Memory Usage, 00021 * Elias Rudberg, Emanuel H. Rubensson, and Pawel Salek, 00022 * J. Chem. Theory Comput. 7, 340 (2011), 00023 * <http://dx.doi.org/10.1021/ct100611z> 00024 * 00025 * For further information about Ergo, see <http://www.ergoscf.org>. 00026 */ 00027 00028 /* This file belongs to the template_lapack part of the Ergo source 00029 * code. The source files in the template_lapack directory are modified 00030 * versions of files originally distributed as CLAPACK, see the 00031 * Copyright/license notice in the file template_lapack/COPYING. 00032 */ 00033 00034 00035 #ifndef TEMPLATE_LAPACK_LAG2_HEADER 00036 #define TEMPLATE_LAPACK_LAG2_HEADER 00037 00038 00039 template<class Treal> 00040 int template_lapack_lag2(const Treal *a, const integer *lda, const Treal *b, 00041 const integer *ldb, const Treal *safmin, Treal *scale1, Treal * 00042 scale2, Treal *wr1, Treal *wr2, Treal *wi) 00043 { 00044 /* -- LAPACK auxiliary routine (version 3.0) -- 00045 Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., 00046 Courant Institute, Argonne National Lab, and Rice University 00047 March 31, 1993 00048 00049 00050 Purpose 00051 ======= 00052 00053 DLAG2 computes the eigenvalues of a 2 x 2 generalized eigenvalue 00054 problem A - w B, with scaling as necessary to avoid over-/underflow. 00055 00056 The scaling factor "s" results in a modified eigenvalue equation 00057 00058 s A - w B 00059 00060 where s is a non-negative scaling factor chosen so that w, w B, 00061 and s A do not overflow and, if possible, do not underflow, either. 00062 00063 Arguments 00064 ========= 00065 00066 A (input) DOUBLE PRECISION array, dimension (LDA, 2) 00067 On entry, the 2 x 2 matrix A. It is assumed that its 1-norm 00068 is less than 1/SAFMIN. Entries less than 00069 sqrt(SAFMIN)*norm(A) are subject to being treated as zero. 00070 00071 LDA (input) INTEGER 00072 The leading dimension of the array A. LDA >= 2. 00073 00074 B (input) DOUBLE PRECISION array, dimension (LDB, 2) 00075 On entry, the 2 x 2 upper triangular matrix B. It is 00076 assumed that the one-norm of B is less than 1/SAFMIN. The 00077 diagonals should be at least sqrt(SAFMIN) times the largest 00078 element of B (in absolute value); if a diagonal is smaller 00079 than that, then +/- sqrt(SAFMIN) will be used instead of 00080 that diagonal. 00081 00082 LDB (input) INTEGER 00083 The leading dimension of the array B. LDB >= 2. 00084 00085 SAFMIN (input) DOUBLE PRECISION 00086 The smallest positive number s.t. 1/SAFMIN does not 00087 overflow. (This should always be DLAMCH('S') -- it is an 00088 argument in order to avoid having to call DLAMCH frequently.) 00089 00090 SCALE1 (output) DOUBLE PRECISION 00091 A scaling factor used to avoid over-/underflow in the 00092 eigenvalue equation which defines the first eigenvalue. If 00093 the eigenvalues are complex, then the eigenvalues are 00094 ( WR1 +/- WI i ) / SCALE1 (which may lie outside the 00095 exponent range of the machine), SCALE1=SCALE2, and SCALE1 00096 will always be positive. If the eigenvalues are real, then 00097 the first (real) eigenvalue is WR1 / SCALE1 , but this may 00098 overflow or underflow, and in fact, SCALE1 may be zero or 00099 less than the underflow threshhold if the exact eigenvalue 00100 is sufficiently large. 00101 00102 SCALE2 (output) DOUBLE PRECISION 00103 A scaling factor used to avoid over-/underflow in the 00104 eigenvalue equation which defines the second eigenvalue. If 00105 the eigenvalues are complex, then SCALE2=SCALE1. If the 00106 eigenvalues are real, then the second (real) eigenvalue is 00107 WR2 / SCALE2 , but this may overflow or underflow, and in 00108 fact, SCALE2 may be zero or less than the underflow 00109 threshhold if the exact eigenvalue is sufficiently large. 00110 00111 WR1 (output) DOUBLE PRECISION 00112 If the eigenvalue is real, then WR1 is SCALE1 times the 00113 eigenvalue closest to the (2,2) element of A B**(-1). If the 00114 eigenvalue is complex, then WR1=WR2 is SCALE1 times the real 00115 part of the eigenvalues. 00116 00117 WR2 (output) DOUBLE PRECISION 00118 If the eigenvalue is real, then WR2 is SCALE2 times the 00119 other eigenvalue. If the eigenvalue is complex, then 00120 WR1=WR2 is SCALE1 times the real part of the eigenvalues. 00121 00122 WI (output) DOUBLE PRECISION 00123 If the eigenvalue is real, then WI is zero. If the 00124 eigenvalue is complex, then WI is SCALE1 times the imaginary 00125 part of the eigenvalues. WI will always be non-negative. 00126 00127 ===================================================================== 00128 00129 00130 Parameter adjustments */ 00131 /* System generated locals */ 00132 integer a_dim1, a_offset, b_dim1, b_offset; 00133 Treal d__1, d__2, d__3, d__4, d__5, d__6; 00134 /* Local variables */ 00135 Treal diff, bmin, wbig, wabs, wdet, r__, binv11, binv22, 00136 discr, anorm, bnorm, bsize, shift, c1, c2, c3, c4, c5, rtmin, 00137 rtmax, wsize, s1, s2, a11, a12, a21, a22, b11, b12, b22, ascale, 00138 bscale, pp, qq, ss, wscale, safmax, wsmall, as11, as12, as22, sum, 00139 abi22; 00140 #define a_ref(a_1,a_2) a[(a_2)*a_dim1 + a_1] 00141 #define b_ref(a_1,a_2) b[(a_2)*b_dim1 + a_1] 00142 00143 a_dim1 = *lda; 00144 a_offset = 1 + a_dim1 * 1; 00145 a -= a_offset; 00146 b_dim1 = *ldb; 00147 b_offset = 1 + b_dim1 * 1; 00148 b -= b_offset; 00149 00150 /* Function Body */ 00151 rtmin = template_blas_sqrt(*safmin); 00152 rtmax = 1. / rtmin; 00153 safmax = 1. / *safmin; 00154 00155 /* Scale A 00156 00157 Computing MAX */ 00158 d__5 = (d__1 = a_ref(1, 1), absMACRO(d__1)) + (d__2 = a_ref(2, 1), absMACRO(d__2)), 00159 d__6 = (d__3 = a_ref(1, 2), absMACRO(d__3)) + (d__4 = a_ref(2, 2), absMACRO( 00160 d__4)), d__5 = maxMACRO(d__5,d__6); 00161 anorm = maxMACRO(d__5,*safmin); 00162 ascale = 1. / anorm; 00163 a11 = ascale * a_ref(1, 1); 00164 a21 = ascale * a_ref(2, 1); 00165 a12 = ascale * a_ref(1, 2); 00166 a22 = ascale * a_ref(2, 2); 00167 00168 /* Perturb B if necessary to insure non-singularity */ 00169 00170 b11 = b_ref(1, 1); 00171 b12 = b_ref(1, 2); 00172 b22 = b_ref(2, 2); 00173 /* Computing MAX */ 00174 d__1 = absMACRO(b11), d__2 = absMACRO(b12), d__1 = maxMACRO(d__1,d__2), d__2 = absMACRO(b22), 00175 d__1 = maxMACRO(d__1,d__2); 00176 bmin = rtmin * maxMACRO(d__1,rtmin); 00177 if (absMACRO(b11) < bmin) { 00178 b11 = template_lapack_d_sign(&bmin, &b11); 00179 } 00180 if (absMACRO(b22) < bmin) { 00181 b22 = template_lapack_d_sign(&bmin, &b22); 00182 } 00183 00184 /* Scale B 00185 00186 Computing MAX */ 00187 d__1 = absMACRO(b11), d__2 = absMACRO(b12) + absMACRO(b22), d__1 = maxMACRO(d__1,d__2); 00188 bnorm = maxMACRO(d__1,*safmin); 00189 /* Computing MAX */ 00190 d__1 = absMACRO(b11), d__2 = absMACRO(b22); 00191 bsize = maxMACRO(d__1,d__2); 00192 bscale = 1. / bsize; 00193 b11 *= bscale; 00194 b12 *= bscale; 00195 b22 *= bscale; 00196 00197 /* Compute larger eigenvalue by method described by C. van Loan 00198 00199 ( AS is A shifted by -SHIFT*B ) */ 00200 00201 binv11 = 1. / b11; 00202 binv22 = 1. / b22; 00203 s1 = a11 * binv11; 00204 s2 = a22 * binv22; 00205 if (absMACRO(s1) <= absMACRO(s2)) { 00206 as12 = a12 - s1 * b12; 00207 as22 = a22 - s1 * b22; 00208 ss = a21 * (binv11 * binv22); 00209 abi22 = as22 * binv22 - ss * b12; 00210 pp = abi22 * .5; 00211 shift = s1; 00212 } else { 00213 as12 = a12 - s2 * b12; 00214 as11 = a11 - s2 * b11; 00215 ss = a21 * (binv11 * binv22); 00216 abi22 = -ss * b12; 00217 pp = (as11 * binv11 + abi22) * .5; 00218 shift = s2; 00219 } 00220 qq = ss * as12; 00221 if ((d__1 = pp * rtmin, absMACRO(d__1)) >= 1.) { 00222 /* Computing 2nd power */ 00223 d__1 = rtmin * pp; 00224 discr = d__1 * d__1 + qq * *safmin; 00225 r__ = template_blas_sqrt((absMACRO(discr))) * rtmax; 00226 } else { 00227 /* Computing 2nd power */ 00228 d__1 = pp; 00229 if (d__1 * d__1 + absMACRO(qq) <= *safmin) { 00230 /* Computing 2nd power */ 00231 d__1 = rtmax * pp; 00232 discr = d__1 * d__1 + qq * safmax; 00233 r__ = template_blas_sqrt((absMACRO(discr))) * rtmin; 00234 } else { 00235 /* Computing 2nd power */ 00236 d__1 = pp; 00237 discr = d__1 * d__1 + qq; 00238 r__ = template_blas_sqrt((absMACRO(discr))); 00239 } 00240 } 00241 00242 /* Note: the test of R in the following IF is to cover the case when 00243 DISCR is small and negative and is flushed to zero during 00244 the calculation of R. On machines which have a consistent 00245 flush-to-zero threshhold and handle numbers above that 00246 threshhold correctly, it would not be necessary. */ 00247 00248 if (discr >= 0. || r__ == 0.) { 00249 sum = pp + template_lapack_d_sign(&r__, &pp); 00250 diff = pp - template_lapack_d_sign(&r__, &pp); 00251 wbig = shift + sum; 00252 00253 /* Compute smaller eigenvalue */ 00254 00255 wsmall = shift + diff; 00256 /* Computing MAX */ 00257 d__1 = absMACRO(wsmall); 00258 if (absMACRO(wbig) * .5 > maxMACRO(d__1,*safmin)) { 00259 wdet = (a11 * a22 - a12 * a21) * (binv11 * binv22); 00260 wsmall = wdet / wbig; 00261 } 00262 00263 /* Choose (real) eigenvalue closest to 2,2 element of A*B**(-1) 00264 for WR1. */ 00265 00266 if (pp > abi22) { 00267 *wr1 = minMACRO(wbig,wsmall); 00268 *wr2 = maxMACRO(wbig,wsmall); 00269 } else { 00270 *wr1 = maxMACRO(wbig,wsmall); 00271 *wr2 = minMACRO(wbig,wsmall); 00272 } 00273 *wi = 0.; 00274 } else { 00275 00276 /* Complex eigenvalues */ 00277 00278 *wr1 = shift + pp; 00279 *wr2 = *wr1; 00280 *wi = r__; 00281 } 00282 00283 /* Further scaling to avoid underflow and overflow in computing 00284 SCALE1 and overflow in computing w*B. 00285 00286 This scale factor (WSCALE) is bounded from above using C1 and C2, 00287 and from below using C3 and C4. 00288 C1 implements the condition s A must never overflow. 00289 C2 implements the condition w B must never overflow. 00290 C3, with C2, 00291 implement the condition that s A - w B must never overflow. 00292 C4 implements the condition s should not underflow. 00293 C5 implements the condition max(s,|w|) should be at least 2. */ 00294 00295 c1 = bsize * (*safmin * maxMACRO(1.,ascale)); 00296 c2 = *safmin * maxMACRO(1.,bnorm); 00297 c3 = bsize * *safmin; 00298 if (ascale <= 1. && bsize <= 1.) { 00299 /* Computing MIN */ 00300 d__1 = 1., d__2 = ascale / *safmin * bsize; 00301 c4 = minMACRO(d__1,d__2); 00302 } else { 00303 c4 = 1.; 00304 } 00305 if (ascale <= 1. || bsize <= 1.) { 00306 /* Computing MIN */ 00307 d__1 = 1., d__2 = ascale * bsize; 00308 c5 = minMACRO(d__1,d__2); 00309 } else { 00310 c5 = 1.; 00311 } 00312 00313 /* Scale first eigenvalue */ 00314 00315 wabs = absMACRO(*wr1) + absMACRO(*wi); 00316 /* Computing MAX 00317 Computing MIN */ 00318 d__3 = c4, d__4 = maxMACRO(wabs,c5) * .5; 00319 d__1 = maxMACRO(*safmin,c1), d__2 = (wabs * c2 + c3) * 1.0000100000000001, 00320 d__1 = maxMACRO(d__1,d__2), d__2 = minMACRO(d__3,d__4); 00321 wsize = maxMACRO(d__1,d__2); 00322 if (wsize != 1.) { 00323 wscale = 1. / wsize; 00324 if (wsize > 1.) { 00325 *scale1 = maxMACRO(ascale,bsize) * wscale * minMACRO(ascale,bsize); 00326 } else { 00327 *scale1 = minMACRO(ascale,bsize) * wscale * maxMACRO(ascale,bsize); 00328 } 00329 *wr1 *= wscale; 00330 if (*wi != 0.) { 00331 *wi *= wscale; 00332 *wr2 = *wr1; 00333 *scale2 = *scale1; 00334 } 00335 } else { 00336 *scale1 = ascale * bsize; 00337 *scale2 = *scale1; 00338 } 00339 00340 /* Scale second eigenvalue (if real) */ 00341 00342 if (*wi == 0.) { 00343 /* Computing MAX 00344 Computing MIN 00345 Computing MAX */ 00346 d__5 = absMACRO(*wr2); 00347 d__3 = c4, d__4 = maxMACRO(d__5,c5) * .5; 00348 d__1 = maxMACRO(*safmin,c1), d__2 = (absMACRO(*wr2) * c2 + c3) * 00349 1.0000100000000001, d__1 = maxMACRO(d__1,d__2), d__2 = minMACRO(d__3, 00350 d__4); 00351 wsize = maxMACRO(d__1,d__2); 00352 if (wsize != 1.) { 00353 wscale = 1. / wsize; 00354 if (wsize > 1.) { 00355 *scale2 = maxMACRO(ascale,bsize) * wscale * minMACRO(ascale,bsize); 00356 } else { 00357 *scale2 = minMACRO(ascale,bsize) * wscale * maxMACRO(ascale,bsize); 00358 } 00359 *wr2 *= wscale; 00360 } else { 00361 *scale2 = ascale * bsize; 00362 } 00363 } 00364 00365 /* End of DLAG2 */ 00366 00367 return 0; 00368 } /* dlag2_ */ 00369 00370 #undef b_ref 00371 #undef a_ref 00372 00373 00374 #endif