ergo
|
00001 /* Ergo, version 3.2, a program for linear scaling electronic structure 00002 * calculations. 00003 * Copyright (C) 2012 Elias Rudberg, Emanuel H. Rubensson, and Pawel Salek. 00004 * 00005 * This program is free software: you can redistribute it and/or modify 00006 * it under the terms of the GNU General Public License as published by 00007 * the Free Software Foundation, either version 3 of the License, or 00008 * (at your option) any later version. 00009 * 00010 * This program is distributed in the hope that it will be useful, 00011 * but WITHOUT ANY WARRANTY; without even the implied warranty of 00012 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00013 * GNU General Public License for more details. 00014 * 00015 * You should have received a copy of the GNU General Public License 00016 * along with this program. If not, see <http://www.gnu.org/licenses/>. 00017 * 00018 * Primary academic reference: 00019 * KohnâSham Density Functional Theory Electronic Structure Calculations 00020 * with Linearly Scaling Computational Time and Memory Usage, 00021 * Elias Rudberg, Emanuel H. Rubensson, and Pawel Salek, 00022 * J. Chem. Theory Comput. 7, 340 (2011), 00023 * <http://dx.doi.org/10.1021/ct100611z> 00024 * 00025 * For further information about Ergo, see <http://www.ergoscf.org>. 00026 */ 00027 00028 /* This file belongs to the template_lapack part of the Ergo source 00029 * code. The source files in the template_lapack directory are modified 00030 * versions of files originally distributed as CLAPACK, see the 00031 * Copyright/license notice in the file template_lapack/COPYING. 00032 */ 00033 00034 00035 #ifndef TEMPLATE_LAPACK_LALN2_HEADER 00036 #define TEMPLATE_LAPACK_LALN2_HEADER 00037 00038 00039 template<class Treal> 00040 int template_lapack_laln2(const logical *ltrans, const integer *na, const integer *nw, 00041 const Treal *smin, const Treal *ca, const Treal *a, const integer *lda, 00042 const Treal *d1, const Treal *d2, const Treal *b, const integer *ldb, 00043 const Treal *wr, const Treal *wi, Treal *x, const integer *ldx, 00044 Treal *scale, Treal *xnorm, integer *info) 00045 { 00046 /* -- LAPACK auxiliary routine (version 3.0) -- 00047 Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., 00048 Courant Institute, Argonne National Lab, and Rice University 00049 October 31, 1992 00050 00051 00052 Purpose 00053 ======= 00054 00055 DLALN2 solves a system of the form (ca A - w D ) X = s B 00056 or (ca A' - w D) X = s B with possible scaling ("s") and 00057 perturbation of A. (A' means A-transpose.) 00058 00059 A is an NA x NA real matrix, ca is a real scalar, D is an NA x NA 00060 real diagonal matrix, w is a real or complex value, and X and B are 00061 NA x 1 matrices -- real if w is real, complex if w is complex. NA 00062 may be 1 or 2. 00063 00064 If w is complex, X and B are represented as NA x 2 matrices, 00065 the first column of each being the real part and the second 00066 being the imaginary part. 00067 00068 "s" is a scaling factor (.LE. 1), computed by DLALN2, which is 00069 so chosen that X can be computed without overflow. X is further 00070 scaled if necessary to assure that norm(ca A - w D)*norm(X) is less 00071 than overflow. 00072 00073 If both singular values of (ca A - w D) are less than SMIN, 00074 SMIN*identity will be used instead of (ca A - w D). If only one 00075 singular value is less than SMIN, one element of (ca A - w D) will be 00076 perturbed enough to make the smallest singular value roughly SMIN. 00077 If both singular values are at least SMIN, (ca A - w D) will not be 00078 perturbed. In any case, the perturbation will be at most some small 00079 multiple of max( SMIN, ulp*norm(ca A - w D) ). The singular values 00080 are computed by infinity-norm approximations, and thus will only be 00081 correct to a factor of 2 or so. 00082 00083 Note: all input quantities are assumed to be smaller than overflow 00084 by a reasonable factor. (See BIGNUM.) 00085 00086 Arguments 00087 ========== 00088 00089 LTRANS (input) LOGICAL 00090 =.TRUE.: A-transpose will be used. 00091 =.FALSE.: A will be used (not transposed.) 00092 00093 NA (input) INTEGER 00094 The size of the matrix A. It may (only) be 1 or 2. 00095 00096 NW (input) INTEGER 00097 1 if "w" is real, 2 if "w" is complex. It may only be 1 00098 or 2. 00099 00100 SMIN (input) DOUBLE PRECISION 00101 The desired lower bound on the singular values of A. This 00102 should be a safe distance away from underflow or overflow, 00103 say, between (underflow/machine precision) and (machine 00104 precision * overflow ). (See BIGNUM and ULP.) 00105 00106 CA (input) DOUBLE PRECISION 00107 The coefficient c, which A is multiplied by. 00108 00109 A (input) DOUBLE PRECISION array, dimension (LDA,NA) 00110 The NA x NA matrix A. 00111 00112 LDA (input) INTEGER 00113 The leading dimension of A. It must be at least NA. 00114 00115 D1 (input) DOUBLE PRECISION 00116 The 1,1 element in the diagonal matrix D. 00117 00118 D2 (input) DOUBLE PRECISION 00119 The 2,2 element in the diagonal matrix D. Not used if NW=1. 00120 00121 B (input) DOUBLE PRECISION array, dimension (LDB,NW) 00122 The NA x NW matrix B (right-hand side). If NW=2 ("w" is 00123 complex), column 1 contains the real part of B and column 2 00124 contains the imaginary part. 00125 00126 LDB (input) INTEGER 00127 The leading dimension of B. It must be at least NA. 00128 00129 WR (input) DOUBLE PRECISION 00130 The real part of the scalar "w". 00131 00132 WI (input) DOUBLE PRECISION 00133 The imaginary part of the scalar "w". Not used if NW=1. 00134 00135 X (output) DOUBLE PRECISION array, dimension (LDX,NW) 00136 The NA x NW matrix X (unknowns), as computed by DLALN2. 00137 If NW=2 ("w" is complex), on exit, column 1 will contain 00138 the real part of X and column 2 will contain the imaginary 00139 part. 00140 00141 LDX (input) INTEGER 00142 The leading dimension of X. It must be at least NA. 00143 00144 SCALE (output) DOUBLE PRECISION 00145 The scale factor that B must be multiplied by to insure 00146 that overflow does not occur when computing X. Thus, 00147 (ca A - w D) X will be SCALE*B, not B (ignoring 00148 perturbations of A.) It will be at most 1. 00149 00150 XNORM (output) DOUBLE PRECISION 00151 The infinity-norm of X, when X is regarded as an NA x NW 00152 real matrix. 00153 00154 INFO (output) INTEGER 00155 An error flag. It will be set to zero if no error occurs, 00156 a negative number if an argument is in error, or a positive 00157 number if ca A - w D had to be perturbed. 00158 The possible values are: 00159 = 0: No error occurred, and (ca A - w D) did not have to be 00160 perturbed. 00161 = 1: (ca A - w D) had to be perturbed to make its smallest 00162 (or only) singular value greater than SMIN. 00163 NOTE: In the interests of speed, this routine does not 00164 check the inputs for errors. 00165 00166 ===================================================================== 00167 00168 Parameter adjustments */ 00169 /* Initialized data */ 00170 logical zswap[4] = { FALSE_,FALSE_,TRUE_,TRUE_ }; 00171 logical rswap[4] = { FALSE_,TRUE_,FALSE_,TRUE_ }; 00172 integer ipivot[16] /* was [4][4] */ = { 1,2,3,4,2,1,4,3,3,4,1,2, 00173 4,3,2,1 }; 00174 /* System generated locals */ 00175 integer a_dim1, a_offset, b_dim1, b_offset, x_dim1, x_offset; 00176 Treal d__1, d__2, d__3, d__4, d__5, d__6; 00177 Treal equiv_0[4], equiv_1[4]; 00178 /* Local variables */ 00179 Treal bbnd, cmax, ui11r, ui12s, temp, ur11r, ur12s; 00180 integer j; 00181 Treal u22abs; 00182 integer icmax; 00183 Treal bnorm, cnorm, smini; 00184 #define ci (equiv_0) 00185 #define cr (equiv_1) 00186 Treal bignum, bi1, bi2, br1, br2, smlnum, xi1, xi2, xr1, xr2, 00187 ci21, ci22, cr21, cr22, li21, csi, ui11, lr21, ui12, ui22; 00188 #define civ (equiv_0) 00189 Treal csr, ur11, ur12, ur22; 00190 #define crv (equiv_1) 00191 #define a_ref(a_1,a_2) a[(a_2)*a_dim1 + a_1] 00192 #define b_ref(a_1,a_2) b[(a_2)*b_dim1 + a_1] 00193 #define x_ref(a_1,a_2) x[(a_2)*x_dim1 + a_1] 00194 #define ci_ref(a_1,a_2) ci[(a_2)*2 + a_1 - 3] 00195 #define cr_ref(a_1,a_2) cr[(a_2)*2 + a_1 - 3] 00196 #define ipivot_ref(a_1,a_2) ipivot[(a_2)*4 + a_1 - 5] 00197 00198 a_dim1 = *lda; 00199 a_offset = 1 + a_dim1 * 1; 00200 a -= a_offset; 00201 b_dim1 = *ldb; 00202 b_offset = 1 + b_dim1 * 1; 00203 b -= b_offset; 00204 x_dim1 = *ldx; 00205 x_offset = 1 + x_dim1 * 1; 00206 x -= x_offset; 00207 00208 /* Function Body 00209 00210 Compute BIGNUM */ 00211 00212 smlnum = 2. * template_lapack_lamch("Safe minimum", (Treal)0); 00213 bignum = 1. / smlnum; 00214 smini = maxMACRO(*smin,smlnum); 00215 00216 /* Don't check for input errors */ 00217 00218 *info = 0; 00219 00220 /* Standard Initializations */ 00221 00222 *scale = 1.; 00223 00224 if (*na == 1) { 00225 00226 /* 1 x 1 (i.e., scalar) system C X = B */ 00227 00228 if (*nw == 1) { 00229 00230 /* Real 1x1 system. 00231 00232 C = ca A - w D */ 00233 00234 csr = *ca * a_ref(1, 1) - *wr * *d1; 00235 cnorm = absMACRO(csr); 00236 00237 /* If | C | < SMINI, use C = SMINI */ 00238 00239 if (cnorm < smini) { 00240 csr = smini; 00241 cnorm = smini; 00242 *info = 1; 00243 } 00244 00245 /* Check scaling for X = B / C */ 00246 00247 bnorm = (d__1 = b_ref(1, 1), absMACRO(d__1)); 00248 if (cnorm < 1. && bnorm > 1.) { 00249 if (bnorm > bignum * cnorm) { 00250 *scale = 1. / bnorm; 00251 } 00252 } 00253 00254 /* Compute X */ 00255 00256 x_ref(1, 1) = b_ref(1, 1) * *scale / csr; 00257 *xnorm = (d__1 = x_ref(1, 1), absMACRO(d__1)); 00258 } else { 00259 00260 /* Complex 1x1 system (w is complex) 00261 00262 C = ca A - w D */ 00263 00264 csr = *ca * a_ref(1, 1) - *wr * *d1; 00265 csi = -(*wi) * *d1; 00266 cnorm = absMACRO(csr) + absMACRO(csi); 00267 00268 /* If | C | < SMINI, use C = SMINI */ 00269 00270 if (cnorm < smini) { 00271 csr = smini; 00272 csi = 0.; 00273 cnorm = smini; 00274 *info = 1; 00275 } 00276 00277 /* Check scaling for X = B / C */ 00278 00279 bnorm = (d__1 = b_ref(1, 1), absMACRO(d__1)) + (d__2 = b_ref(1, 2), 00280 absMACRO(d__2)); 00281 if (cnorm < 1. && bnorm > 1.) { 00282 if (bnorm > bignum * cnorm) { 00283 *scale = 1. / bnorm; 00284 } 00285 } 00286 00287 /* Compute X */ 00288 00289 d__1 = *scale * b_ref(1, 1); 00290 d__2 = *scale * b_ref(1, 2); 00291 template_lapack_ladiv(&d__1, &d__2, &csr, &csi, &x_ref(1, 1), &x_ref(1, 2)); 00292 *xnorm = (d__1 = x_ref(1, 1), absMACRO(d__1)) + (d__2 = x_ref(1, 2), 00293 absMACRO(d__2)); 00294 } 00295 00296 } else { 00297 00298 /* 2x2 System 00299 00300 Compute the real part of C = ca A - w D (or ca A' - w D ) */ 00301 00302 cr_ref(1, 1) = *ca * a_ref(1, 1) - *wr * *d1; 00303 cr_ref(2, 2) = *ca * a_ref(2, 2) - *wr * *d2; 00304 if (*ltrans) { 00305 cr_ref(1, 2) = *ca * a_ref(2, 1); 00306 cr_ref(2, 1) = *ca * a_ref(1, 2); 00307 } else { 00308 cr_ref(2, 1) = *ca * a_ref(2, 1); 00309 cr_ref(1, 2) = *ca * a_ref(1, 2); 00310 } 00311 00312 if (*nw == 1) { 00313 00314 /* Real 2x2 system (w is real) 00315 00316 Find the largest element in C */ 00317 00318 cmax = 0.; 00319 icmax = 0; 00320 00321 for (j = 1; j <= 4; ++j) { 00322 if ((d__1 = crv[j - 1], absMACRO(d__1)) > cmax) { 00323 cmax = (d__1 = crv[j - 1], absMACRO(d__1)); 00324 icmax = j; 00325 } 00326 /* L10: */ 00327 } 00328 00329 /* If norm(C) < SMINI, use SMINI*identity. */ 00330 00331 if (cmax < smini) { 00332 /* Computing MAX */ 00333 d__3 = (d__1 = b_ref(1, 1), absMACRO(d__1)), d__4 = (d__2 = b_ref( 00334 2, 1), absMACRO(d__2)); 00335 bnorm = maxMACRO(d__3,d__4); 00336 if (smini < 1. && bnorm > 1.) { 00337 if (bnorm > bignum * smini) { 00338 *scale = 1. / bnorm; 00339 } 00340 } 00341 temp = *scale / smini; 00342 x_ref(1, 1) = temp * b_ref(1, 1); 00343 x_ref(2, 1) = temp * b_ref(2, 1); 00344 *xnorm = temp * bnorm; 00345 *info = 1; 00346 return 0; 00347 } 00348 00349 /* Gaussian elimination with complete pivoting. */ 00350 00351 ur11 = crv[icmax - 1]; 00352 cr21 = crv[ipivot_ref(2, icmax) - 1]; 00353 ur12 = crv[ipivot_ref(3, icmax) - 1]; 00354 cr22 = crv[ipivot_ref(4, icmax) - 1]; 00355 ur11r = 1. / ur11; 00356 lr21 = ur11r * cr21; 00357 ur22 = cr22 - ur12 * lr21; 00358 00359 /* If smaller pivot < SMINI, use SMINI */ 00360 00361 if (absMACRO(ur22) < smini) { 00362 ur22 = smini; 00363 *info = 1; 00364 } 00365 if (rswap[icmax - 1]) { 00366 br1 = b_ref(2, 1); 00367 br2 = b_ref(1, 1); 00368 } else { 00369 br1 = b_ref(1, 1); 00370 br2 = b_ref(2, 1); 00371 } 00372 br2 -= lr21 * br1; 00373 /* Computing MAX */ 00374 d__2 = (d__1 = br1 * (ur22 * ur11r), absMACRO(d__1)), d__3 = absMACRO(br2); 00375 bbnd = maxMACRO(d__2,d__3); 00376 if (bbnd > 1. && absMACRO(ur22) < 1.) { 00377 if (bbnd >= bignum * absMACRO(ur22)) { 00378 *scale = 1. / bbnd; 00379 } 00380 } 00381 00382 xr2 = br2 * *scale / ur22; 00383 xr1 = *scale * br1 * ur11r - xr2 * (ur11r * ur12); 00384 if (zswap[icmax - 1]) { 00385 x_ref(1, 1) = xr2; 00386 x_ref(2, 1) = xr1; 00387 } else { 00388 x_ref(1, 1) = xr1; 00389 x_ref(2, 1) = xr2; 00390 } 00391 /* Computing MAX */ 00392 d__1 = absMACRO(xr1), d__2 = absMACRO(xr2); 00393 *xnorm = maxMACRO(d__1,d__2); 00394 00395 /* Further scaling if norm(A) norm(X) > overflow */ 00396 00397 if (*xnorm > 1. && cmax > 1.) { 00398 if (*xnorm > bignum / cmax) { 00399 temp = cmax / bignum; 00400 x_ref(1, 1) = temp * x_ref(1, 1); 00401 x_ref(2, 1) = temp * x_ref(2, 1); 00402 *xnorm = temp * *xnorm; 00403 *scale = temp * *scale; 00404 } 00405 } 00406 } else { 00407 00408 /* Complex 2x2 system (w is complex) 00409 00410 Find the largest element in C */ 00411 00412 ci_ref(1, 1) = -(*wi) * *d1; 00413 ci_ref(2, 1) = 0.; 00414 ci_ref(1, 2) = 0.; 00415 ci_ref(2, 2) = -(*wi) * *d2; 00416 cmax = 0.; 00417 icmax = 0; 00418 00419 for (j = 1; j <= 4; ++j) { 00420 if ((d__1 = crv[j - 1], absMACRO(d__1)) + (d__2 = civ[j - 1], absMACRO( 00421 d__2)) > cmax) { 00422 cmax = (d__1 = crv[j - 1], absMACRO(d__1)) + (d__2 = civ[j - 1] 00423 , absMACRO(d__2)); 00424 icmax = j; 00425 } 00426 /* L20: */ 00427 } 00428 00429 /* If norm(C) < SMINI, use SMINI*identity. */ 00430 00431 if (cmax < smini) { 00432 /* Computing MAX */ 00433 d__5 = (d__1 = b_ref(1, 1), absMACRO(d__1)) + (d__2 = b_ref(1, 2), 00434 absMACRO(d__2)), d__6 = (d__3 = b_ref(2, 1), absMACRO(d__3)) + ( 00435 d__4 = b_ref(2, 2), absMACRO(d__4)); 00436 bnorm = maxMACRO(d__5,d__6); 00437 if (smini < 1. && bnorm > 1.) { 00438 if (bnorm > bignum * smini) { 00439 *scale = 1. / bnorm; 00440 } 00441 } 00442 temp = *scale / smini; 00443 x_ref(1, 1) = temp * b_ref(1, 1); 00444 x_ref(2, 1) = temp * b_ref(2, 1); 00445 x_ref(1, 2) = temp * b_ref(1, 2); 00446 x_ref(2, 2) = temp * b_ref(2, 2); 00447 *xnorm = temp * bnorm; 00448 *info = 1; 00449 return 0; 00450 } 00451 00452 /* Gaussian elimination with complete pivoting. */ 00453 00454 ur11 = crv[icmax - 1]; 00455 ui11 = civ[icmax - 1]; 00456 cr21 = crv[ipivot_ref(2, icmax) - 1]; 00457 ci21 = civ[ipivot_ref(2, icmax) - 1]; 00458 ur12 = crv[ipivot_ref(3, icmax) - 1]; 00459 ui12 = civ[ipivot_ref(3, icmax) - 1]; 00460 cr22 = crv[ipivot_ref(4, icmax) - 1]; 00461 ci22 = civ[ipivot_ref(4, icmax) - 1]; 00462 if (icmax == 1 || icmax == 4) { 00463 00464 /* Code when off-diagonals of pivoted C are real */ 00465 00466 if (absMACRO(ur11) > absMACRO(ui11)) { 00467 temp = ui11 / ur11; 00468 /* Computing 2nd power */ 00469 d__1 = temp; 00470 ur11r = 1. / (ur11 * (d__1 * d__1 + 1.)); 00471 ui11r = -temp * ur11r; 00472 } else { 00473 temp = ur11 / ui11; 00474 /* Computing 2nd power */ 00475 d__1 = temp; 00476 ui11r = -1. / (ui11 * (d__1 * d__1 + 1.)); 00477 ur11r = -temp * ui11r; 00478 } 00479 lr21 = cr21 * ur11r; 00480 li21 = cr21 * ui11r; 00481 ur12s = ur12 * ur11r; 00482 ui12s = ur12 * ui11r; 00483 ur22 = cr22 - ur12 * lr21; 00484 ui22 = ci22 - ur12 * li21; 00485 } else { 00486 00487 /* Code when diagonals of pivoted C are real */ 00488 00489 ur11r = 1. / ur11; 00490 ui11r = 0.; 00491 lr21 = cr21 * ur11r; 00492 li21 = ci21 * ur11r; 00493 ur12s = ur12 * ur11r; 00494 ui12s = ui12 * ur11r; 00495 ur22 = cr22 - ur12 * lr21 + ui12 * li21; 00496 ui22 = -ur12 * li21 - ui12 * lr21; 00497 } 00498 u22abs = absMACRO(ur22) + absMACRO(ui22); 00499 00500 /* If smaller pivot < SMINI, use SMINI */ 00501 00502 if (u22abs < smini) { 00503 ur22 = smini; 00504 ui22 = 0.; 00505 *info = 1; 00506 } 00507 if (rswap[icmax - 1]) { 00508 br2 = b_ref(1, 1); 00509 br1 = b_ref(2, 1); 00510 bi2 = b_ref(1, 2); 00511 bi1 = b_ref(2, 2); 00512 } else { 00513 br1 = b_ref(1, 1); 00514 br2 = b_ref(2, 1); 00515 bi1 = b_ref(1, 2); 00516 bi2 = b_ref(2, 2); 00517 } 00518 br2 = br2 - lr21 * br1 + li21 * bi1; 00519 bi2 = bi2 - li21 * br1 - lr21 * bi1; 00520 /* Computing MAX */ 00521 d__1 = (absMACRO(br1) + absMACRO(bi1)) * (u22abs * (absMACRO(ur11r) + absMACRO(ui11r)) 00522 ), d__2 = absMACRO(br2) + absMACRO(bi2); 00523 bbnd = maxMACRO(d__1,d__2); 00524 if (bbnd > 1. && u22abs < 1.) { 00525 if (bbnd >= bignum * u22abs) { 00526 *scale = 1. / bbnd; 00527 br1 = *scale * br1; 00528 bi1 = *scale * bi1; 00529 br2 = *scale * br2; 00530 bi2 = *scale * bi2; 00531 } 00532 } 00533 00534 template_lapack_ladiv(&br2, &bi2, &ur22, &ui22, &xr2, &xi2); 00535 xr1 = ur11r * br1 - ui11r * bi1 - ur12s * xr2 + ui12s * xi2; 00536 xi1 = ui11r * br1 + ur11r * bi1 - ui12s * xr2 - ur12s * xi2; 00537 if (zswap[icmax - 1]) { 00538 x_ref(1, 1) = xr2; 00539 x_ref(2, 1) = xr1; 00540 x_ref(1, 2) = xi2; 00541 x_ref(2, 2) = xi1; 00542 } else { 00543 x_ref(1, 1) = xr1; 00544 x_ref(2, 1) = xr2; 00545 x_ref(1, 2) = xi1; 00546 x_ref(2, 2) = xi2; 00547 } 00548 /* Computing MAX */ 00549 d__1 = absMACRO(xr1) + absMACRO(xi1), d__2 = absMACRO(xr2) + absMACRO(xi2); 00550 *xnorm = maxMACRO(d__1,d__2); 00551 00552 /* Further scaling if norm(A) norm(X) > overflow */ 00553 00554 if (*xnorm > 1. && cmax > 1.) { 00555 if (*xnorm > bignum / cmax) { 00556 temp = cmax / bignum; 00557 x_ref(1, 1) = temp * x_ref(1, 1); 00558 x_ref(2, 1) = temp * x_ref(2, 1); 00559 x_ref(1, 2) = temp * x_ref(1, 2); 00560 x_ref(2, 2) = temp * x_ref(2, 2); 00561 *xnorm = temp * *xnorm; 00562 *scale = temp * *scale; 00563 } 00564 } 00565 } 00566 } 00567 00568 return 0; 00569 00570 /* End of DLALN2 */ 00571 00572 } /* dlaln2_ */ 00573 00574 #undef ipivot_ref 00575 #undef cr_ref 00576 #undef ci_ref 00577 #undef x_ref 00578 #undef b_ref 00579 #undef a_ref 00580 #undef crv 00581 #undef civ 00582 #undef cr 00583 #undef ci 00584 00585 00586 #endif