ergo
|
00001 /* Ergo, version 3.2, a program for linear scaling electronic structure 00002 * calculations. 00003 * Copyright (C) 2012 Elias Rudberg, Emanuel H. Rubensson, and Pawel Salek. 00004 * 00005 * This program is free software: you can redistribute it and/or modify 00006 * it under the terms of the GNU General Public License as published by 00007 * the Free Software Foundation, either version 3 of the License, or 00008 * (at your option) any later version. 00009 * 00010 * This program is distributed in the hope that it will be useful, 00011 * but WITHOUT ANY WARRANTY; without even the implied warranty of 00012 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00013 * GNU General Public License for more details. 00014 * 00015 * You should have received a copy of the GNU General Public License 00016 * along with this program. If not, see <http://www.gnu.org/licenses/>. 00017 * 00018 * Primary academic reference: 00019 * KohnâSham Density Functional Theory Electronic Structure Calculations 00020 * with Linearly Scaling Computational Time and Memory Usage, 00021 * Elias Rudberg, Emanuel H. Rubensson, and Pawel Salek, 00022 * J. Chem. Theory Comput. 7, 340 (2011), 00023 * <http://dx.doi.org/10.1021/ct100611z> 00024 * 00025 * For further information about Ergo, see <http://www.ergoscf.org>. 00026 */ 00027 00028 /* This file belongs to the template_lapack part of the Ergo source 00029 * code. The source files in the template_lapack directory are modified 00030 * versions of files originally distributed as CLAPACK, see the 00031 * Copyright/license notice in the file template_lapack/COPYING. 00032 */ 00033 00034 00035 #ifndef TEMPLATE_LAPACK_LATRD_HEADER 00036 #define TEMPLATE_LAPACK_LATRD_HEADER 00037 00038 00039 template<class Treal> 00040 int template_lapack_latrd(const char *uplo, const integer *n, const integer *nb, Treal * 00041 a, const integer *lda, Treal *e, Treal *tau, Treal *w, 00042 const integer *ldw) 00043 { 00044 /* -- LAPACK auxiliary routine (version 3.0) -- 00045 Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., 00046 Courant Institute, Argonne National Lab, and Rice University 00047 October 31, 1992 00048 00049 00050 Purpose 00051 ======= 00052 00053 DLATRD reduces NB rows and columns of a real symmetric matrix A to 00054 symmetric tridiagonal form by an orthogonal similarity 00055 transformation Q' * A * Q, and returns the matrices V and W which are 00056 needed to apply the transformation to the unreduced part of A. 00057 00058 If UPLO = 'U', DLATRD reduces the last NB rows and columns of a 00059 matrix, of which the upper triangle is supplied; 00060 if UPLO = 'L', DLATRD reduces the first NB rows and columns of a 00061 matrix, of which the lower triangle is supplied. 00062 00063 This is an auxiliary routine called by DSYTRD. 00064 00065 Arguments 00066 ========= 00067 00068 UPLO (input) CHARACTER 00069 Specifies whether the upper or lower triangular part of the 00070 symmetric matrix A is stored: 00071 = 'U': Upper triangular 00072 = 'L': Lower triangular 00073 00074 N (input) INTEGER 00075 The order of the matrix A. 00076 00077 NB (input) INTEGER 00078 The number of rows and columns to be reduced. 00079 00080 A (input/output) DOUBLE PRECISION array, dimension (LDA,N) 00081 On entry, the symmetric matrix A. If UPLO = 'U', the leading 00082 n-by-n upper triangular part of A contains the upper 00083 triangular part of the matrix A, and the strictly lower 00084 triangular part of A is not referenced. If UPLO = 'L', the 00085 leading n-by-n lower triangular part of A contains the lower 00086 triangular part of the matrix A, and the strictly upper 00087 triangular part of A is not referenced. 00088 On exit: 00089 if UPLO = 'U', the last NB columns have been reduced to 00090 tridiagonal form, with the diagonal elements overwriting 00091 the diagonal elements of A; the elements above the diagonal 00092 with the array TAU, represent the orthogonal matrix Q as a 00093 product of elementary reflectors; 00094 if UPLO = 'L', the first NB columns have been reduced to 00095 tridiagonal form, with the diagonal elements overwriting 00096 the diagonal elements of A; the elements below the diagonal 00097 with the array TAU, represent the orthogonal matrix Q as a 00098 product of elementary reflectors. 00099 See Further Details. 00100 00101 LDA (input) INTEGER 00102 The leading dimension of the array A. LDA >= (1,N). 00103 00104 E (output) DOUBLE PRECISION array, dimension (N-1) 00105 If UPLO = 'U', E(n-nb:n-1) contains the superdiagonal 00106 elements of the last NB columns of the reduced matrix; 00107 if UPLO = 'L', E(1:nb) contains the subdiagonal elements of 00108 the first NB columns of the reduced matrix. 00109 00110 TAU (output) DOUBLE PRECISION array, dimension (N-1) 00111 The scalar factors of the elementary reflectors, stored in 00112 TAU(n-nb:n-1) if UPLO = 'U', and in TAU(1:nb) if UPLO = 'L'. 00113 See Further Details. 00114 00115 W (output) DOUBLE PRECISION array, dimension (LDW,NB) 00116 The n-by-nb matrix W required to update the unreduced part 00117 of A. 00118 00119 LDW (input) INTEGER 00120 The leading dimension of the array W. LDW >= max(1,N). 00121 00122 Further Details 00123 =============== 00124 00125 If UPLO = 'U', the matrix Q is represented as a product of elementary 00126 reflectors 00127 00128 Q = H(n) H(n-1) . . . H(n-nb+1). 00129 00130 Each H(i) has the form 00131 00132 H(i) = I - tau * v * v' 00133 00134 where tau is a real scalar, and v is a real vector with 00135 v(i:n) = 0 and v(i-1) = 1; v(1:i-1) is stored on exit in A(1:i-1,i), 00136 and tau in TAU(i-1). 00137 00138 If UPLO = 'L', the matrix Q is represented as a product of elementary 00139 reflectors 00140 00141 Q = H(1) H(2) . . . H(nb). 00142 00143 Each H(i) has the form 00144 00145 H(i) = I - tau * v * v' 00146 00147 where tau is a real scalar, and v is a real vector with 00148 v(1:i) = 0 and v(i+1) = 1; v(i+1:n) is stored on exit in A(i+1:n,i), 00149 and tau in TAU(i). 00150 00151 The elements of the vectors v together form the n-by-nb matrix V 00152 which is needed, with W, to apply the transformation to the unreduced 00153 part of the matrix, using a symmetric rank-2k update of the form: 00154 A := A - V*W' - W*V'. 00155 00156 The contents of A on exit are illustrated by the following examples 00157 with n = 5 and nb = 2: 00158 00159 if UPLO = 'U': if UPLO = 'L': 00160 00161 ( a a a v4 v5 ) ( d ) 00162 ( a a v4 v5 ) ( 1 d ) 00163 ( a 1 v5 ) ( v1 1 a ) 00164 ( d 1 ) ( v1 v2 a a ) 00165 ( d ) ( v1 v2 a a a ) 00166 00167 where d denotes a diagonal element of the reduced matrix, a denotes 00168 an element of the original matrix that is unchanged, and vi denotes 00169 an element of the vector defining H(i). 00170 00171 ===================================================================== 00172 00173 00174 Quick return if possible 00175 00176 Parameter adjustments */ 00177 /* Table of constant values */ 00178 Treal c_b5 = -1.; 00179 Treal c_b6 = 1.; 00180 integer c__1 = 1; 00181 Treal c_b16 = 0.; 00182 00183 /* System generated locals */ 00184 integer a_dim1, a_offset, w_dim1, w_offset, i__1, i__2, i__3; 00185 /* Local variables */ 00186 integer i__; 00187 Treal alpha; 00188 integer iw; 00189 #define a_ref(a_1,a_2) a[(a_2)*a_dim1 + a_1] 00190 #define w_ref(a_1,a_2) w[(a_2)*w_dim1 + a_1] 00191 00192 00193 a_dim1 = *lda; 00194 a_offset = 1 + a_dim1 * 1; 00195 a -= a_offset; 00196 --e; 00197 --tau; 00198 w_dim1 = *ldw; 00199 w_offset = 1 + w_dim1 * 1; 00200 w -= w_offset; 00201 00202 /* Function Body */ 00203 if (*n <= 0) { 00204 return 0; 00205 } 00206 00207 if (template_blas_lsame(uplo, "U")) { 00208 00209 /* Reduce last NB columns of upper triangle */ 00210 00211 i__1 = *n - *nb + 1; 00212 for (i__ = *n; i__ >= i__1; --i__) { 00213 iw = i__ - *n + *nb; 00214 if (i__ < *n) { 00215 00216 /* Update A(1:i,i) */ 00217 00218 i__2 = *n - i__; 00219 template_blas_gemv("No transpose", &i__, &i__2, &c_b5, &a_ref(1, i__ + 1), 00220 lda, &w_ref(i__, iw + 1), ldw, &c_b6, &a_ref(1, i__), 00221 &c__1); 00222 i__2 = *n - i__; 00223 template_blas_gemv("No transpose", &i__, &i__2, &c_b5, &w_ref(1, iw + 1), 00224 ldw, &a_ref(i__, i__ + 1), lda, &c_b6, &a_ref(1, i__), 00225 &c__1); 00226 } 00227 if (i__ > 1) { 00228 00229 /* Generate elementary reflector H(i) to annihilate 00230 A(1:i-2,i) */ 00231 00232 i__2 = i__ - 1; 00233 template_lapack_larfg(&i__2, &a_ref(i__ - 1, i__), &a_ref(1, i__), &c__1, & 00234 tau[i__ - 1]); 00235 e[i__ - 1] = a_ref(i__ - 1, i__); 00236 a_ref(i__ - 1, i__) = 1.; 00237 00238 /* Compute W(1:i-1,i) */ 00239 00240 i__2 = i__ - 1; 00241 template_blas_symv("Upper", &i__2, &c_b6, &a[a_offset], lda, &a_ref(1, 00242 i__), &c__1, &c_b16, &w_ref(1, iw), &c__1); 00243 if (i__ < *n) { 00244 i__2 = i__ - 1; 00245 i__3 = *n - i__; 00246 template_blas_gemv("Transpose", &i__2, &i__3, &c_b6, &w_ref(1, iw + 1) 00247 , ldw, &a_ref(1, i__), &c__1, &c_b16, &w_ref(i__ 00248 + 1, iw), &c__1); 00249 i__2 = i__ - 1; 00250 i__3 = *n - i__; 00251 template_blas_gemv("No transpose", &i__2, &i__3, &c_b5, &a_ref(1, i__ 00252 + 1), lda, &w_ref(i__ + 1, iw), &c__1, &c_b6, & 00253 w_ref(1, iw), &c__1); 00254 i__2 = i__ - 1; 00255 i__3 = *n - i__; 00256 template_blas_gemv("Transpose", &i__2, &i__3, &c_b6, &a_ref(1, i__ + 00257 1), lda, &a_ref(1, i__), &c__1, &c_b16, &w_ref( 00258 i__ + 1, iw), &c__1); 00259 i__2 = i__ - 1; 00260 i__3 = *n - i__; 00261 template_blas_gemv("No transpose", &i__2, &i__3, &c_b5, &w_ref(1, iw 00262 + 1), ldw, &w_ref(i__ + 1, iw), &c__1, &c_b6, & 00263 w_ref(1, iw), &c__1); 00264 } 00265 i__2 = i__ - 1; 00266 template_blas_scal(&i__2, &tau[i__ - 1], &w_ref(1, iw), &c__1); 00267 i__2 = i__ - 1; 00268 alpha = tau[i__ - 1] * -.5 * template_blas_dot(&i__2, &w_ref(1, iw), & 00269 c__1, &a_ref(1, i__), &c__1); 00270 i__2 = i__ - 1; 00271 template_blas_axpy(&i__2, &alpha, &a_ref(1, i__), &c__1, &w_ref(1, iw), & 00272 c__1); 00273 } 00274 00275 /* L10: */ 00276 } 00277 } else { 00278 00279 /* Reduce first NB columns of lower triangle */ 00280 00281 i__1 = *nb; 00282 for (i__ = 1; i__ <= i__1; ++i__) { 00283 00284 /* Update A(i:n,i) */ 00285 00286 i__2 = *n - i__ + 1; 00287 i__3 = i__ - 1; 00288 template_blas_gemv("No transpose", &i__2, &i__3, &c_b5, &a_ref(i__, 1), lda, & 00289 w_ref(i__, 1), ldw, &c_b6, &a_ref(i__, i__), &c__1); 00290 i__2 = *n - i__ + 1; 00291 i__3 = i__ - 1; 00292 template_blas_gemv("No transpose", &i__2, &i__3, &c_b5, &w_ref(i__, 1), ldw, & 00293 a_ref(i__, 1), lda, &c_b6, &a_ref(i__, i__), &c__1); 00294 if (i__ < *n) { 00295 00296 /* Generate elementary reflector H(i) to annihilate 00297 A(i+2:n,i) 00298 00299 Computing MIN */ 00300 i__2 = i__ + 2; 00301 i__3 = *n - i__; 00302 template_lapack_larfg(&i__3, &a_ref(i__ + 1, i__), &a_ref(minMACRO(i__2,*n), i__) 00303 , &c__1, &tau[i__]); 00304 e[i__] = a_ref(i__ + 1, i__); 00305 a_ref(i__ + 1, i__) = 1.; 00306 00307 /* Compute W(i+1:n,i) */ 00308 00309 i__2 = *n - i__; 00310 template_blas_symv("Lower", &i__2, &c_b6, &a_ref(i__ + 1, i__ + 1), lda, & 00311 a_ref(i__ + 1, i__), &c__1, &c_b16, &w_ref(i__ + 1, 00312 i__), &c__1); 00313 i__2 = *n - i__; 00314 i__3 = i__ - 1; 00315 template_blas_gemv("Transpose", &i__2, &i__3, &c_b6, &w_ref(i__ + 1, 1), 00316 ldw, &a_ref(i__ + 1, i__), &c__1, &c_b16, &w_ref(1, 00317 i__), &c__1); 00318 i__2 = *n - i__; 00319 i__3 = i__ - 1; 00320 template_blas_gemv("No transpose", &i__2, &i__3, &c_b5, &a_ref(i__ + 1, 1) 00321 , lda, &w_ref(1, i__), &c__1, &c_b6, &w_ref(i__ + 1, 00322 i__), &c__1); 00323 i__2 = *n - i__; 00324 i__3 = i__ - 1; 00325 template_blas_gemv("Transpose", &i__2, &i__3, &c_b6, &a_ref(i__ + 1, 1), 00326 lda, &a_ref(i__ + 1, i__), &c__1, &c_b16, &w_ref(1, 00327 i__), &c__1); 00328 i__2 = *n - i__; 00329 i__3 = i__ - 1; 00330 template_blas_gemv("No transpose", &i__2, &i__3, &c_b5, &w_ref(i__ + 1, 1) 00331 , ldw, &w_ref(1, i__), &c__1, &c_b6, &w_ref(i__ + 1, 00332 i__), &c__1); 00333 i__2 = *n - i__; 00334 template_blas_scal(&i__2, &tau[i__], &w_ref(i__ + 1, i__), &c__1); 00335 i__2 = *n - i__; 00336 alpha = tau[i__] * -.5 * template_blas_dot(&i__2, &w_ref(i__ + 1, i__), & 00337 c__1, &a_ref(i__ + 1, i__), &c__1); 00338 i__2 = *n - i__; 00339 template_blas_axpy(&i__2, &alpha, &a_ref(i__ + 1, i__), &c__1, &w_ref(i__ 00340 + 1, i__), &c__1); 00341 } 00342 00343 /* L20: */ 00344 } 00345 } 00346 00347 return 0; 00348 00349 /* End of DLATRD */ 00350 00351 } /* dlatrd_ */ 00352 00353 #undef w_ref 00354 #undef a_ref 00355 00356 00357 #endif