ergo
|
00001 /* Ergo, version 3.2, a program for linear scaling electronic structure 00002 * calculations. 00003 * Copyright (C) 2012 Elias Rudberg, Emanuel H. Rubensson, and Pawel Salek. 00004 * 00005 * This program is free software: you can redistribute it and/or modify 00006 * it under the terms of the GNU General Public License as published by 00007 * the Free Software Foundation, either version 3 of the License, or 00008 * (at your option) any later version. 00009 * 00010 * This program is distributed in the hope that it will be useful, 00011 * but WITHOUT ANY WARRANTY; without even the implied warranty of 00012 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00013 * GNU General Public License for more details. 00014 * 00015 * You should have received a copy of the GNU General Public License 00016 * along with this program. If not, see <http://www.gnu.org/licenses/>. 00017 * 00018 * Primary academic reference: 00019 * KohnâSham Density Functional Theory Electronic Structure Calculations 00020 * with Linearly Scaling Computational Time and Memory Usage, 00021 * Elias Rudberg, Emanuel H. Rubensson, and Pawel Salek, 00022 * J. Chem. Theory Comput. 7, 340 (2011), 00023 * <http://dx.doi.org/10.1021/ct100611z> 00024 * 00025 * For further information about Ergo, see <http://www.ergoscf.org>. 00026 */ 00027 00028 /* This file belongs to the template_lapack part of the Ergo source 00029 * code. The source files in the template_lapack directory are modified 00030 * versions of files originally distributed as CLAPACK, see the 00031 * Copyright/license notice in the file template_lapack/COPYING. 00032 */ 00033 00034 00035 #ifndef TEMPLATE_LAPACK_SYGST_HEADER 00036 #define TEMPLATE_LAPACK_SYGST_HEADER 00037 00038 #include "template_lapack_sygs2.h" 00039 00040 template<class Treal> 00041 int template_lapack_sygst(const integer *itype, const char *uplo, const integer *n, 00042 Treal *a, const integer *lda, Treal *b, const integer *ldb, integer * 00043 info) 00044 { 00045 /* -- LAPACK routine (version 3.0) -- 00046 Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., 00047 Courant Institute, Argonne National Lab, and Rice University 00048 September 30, 1994 00049 00050 00051 Purpose 00052 ======= 00053 00054 DSYGST reduces a real symmetric-definite generalized eigenproblem 00055 to standard form. 00056 00057 If ITYPE = 1, the problem is A*x = lambda*B*x, 00058 and A is overwritten by inv(U**T)*A*inv(U) or inv(L)*A*inv(L**T) 00059 00060 If ITYPE = 2 or 3, the problem is A*B*x = lambda*x or 00061 B*A*x = lambda*x, and A is overwritten by U*A*U**T or L**T*A*L. 00062 00063 B must have been previously factorized as U**T*U or L*L**T by DPOTRF. 00064 00065 Arguments 00066 ========= 00067 00068 ITYPE (input) INTEGER 00069 = 1: compute inv(U**T)*A*inv(U) or inv(L)*A*inv(L**T); 00070 = 2 or 3: compute U*A*U**T or L**T*A*L. 00071 00072 UPLO (input) CHARACTER 00073 = 'U': Upper triangle of A is stored and B is factored as 00074 U**T*U; 00075 = 'L': Lower triangle of A is stored and B is factored as 00076 L*L**T. 00077 00078 N (input) INTEGER 00079 The order of the matrices A and B. N >= 0. 00080 00081 A (input/output) DOUBLE PRECISION array, dimension (LDA,N) 00082 On entry, the symmetric matrix A. If UPLO = 'U', the leading 00083 N-by-N upper triangular part of A contains the upper 00084 triangular part of the matrix A, and the strictly lower 00085 triangular part of A is not referenced. If UPLO = 'L', the 00086 leading N-by-N lower triangular part of A contains the lower 00087 triangular part of the matrix A, and the strictly upper 00088 triangular part of A is not referenced. 00089 00090 On exit, if INFO = 0, the transformed matrix, stored in the 00091 same format as A. 00092 00093 LDA (input) INTEGER 00094 The leading dimension of the array A. LDA >= max(1,N). 00095 00096 B (input) DOUBLE PRECISION array, dimension (LDB,N) 00097 The triangular factor from the Cholesky factorization of B, 00098 as returned by DPOTRF. 00099 00100 LDB (input) INTEGER 00101 The leading dimension of the array B. LDB >= max(1,N). 00102 00103 INFO (output) INTEGER 00104 = 0: successful exit 00105 < 0: if INFO = -i, the i-th argument had an illegal value 00106 00107 ===================================================================== 00108 00109 00110 Test the input parameters. 00111 00112 Parameter adjustments */ 00113 /* Table of constant values */ 00114 integer c__1 = 1; 00115 integer c_n1 = -1; 00116 Treal c_b14 = 1.; 00117 Treal c_b16 = -.5; 00118 Treal c_b19 = -1.; 00119 Treal c_b52 = .5; 00120 00121 /* System generated locals */ 00122 integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2, i__3; 00123 /* Local variables */ 00124 integer k; 00125 logical upper; 00126 integer kb; 00127 integer nb; 00128 #define a_ref(a_1,a_2) a[(a_2)*a_dim1 + a_1] 00129 #define b_ref(a_1,a_2) b[(a_2)*b_dim1 + a_1] 00130 00131 00132 a_dim1 = *lda; 00133 a_offset = 1 + a_dim1 * 1; 00134 a -= a_offset; 00135 b_dim1 = *ldb; 00136 b_offset = 1 + b_dim1 * 1; 00137 b -= b_offset; 00138 00139 /* Function Body */ 00140 *info = 0; 00141 upper = template_blas_lsame(uplo, "U"); 00142 if (*itype < 1 || *itype > 3) { 00143 *info = -1; 00144 } else if (! upper && ! template_blas_lsame(uplo, "L")) { 00145 *info = -2; 00146 } else if (*n < 0) { 00147 *info = -3; 00148 } else if (*lda < maxMACRO(1,*n)) { 00149 *info = -5; 00150 } else if (*ldb < maxMACRO(1,*n)) { 00151 *info = -7; 00152 } 00153 if (*info != 0) { 00154 i__1 = -(*info); 00155 template_blas_erbla("SYGST ", &i__1); 00156 return 0; 00157 } 00158 00159 /* Quick return if possible */ 00160 00161 if (*n == 0) { 00162 return 0; 00163 } 00164 00165 /* Determine the block size for this environment. */ 00166 00167 nb = template_lapack_ilaenv(&c__1, "DSYGST", uplo, n, &c_n1, &c_n1, &c_n1, (ftnlen)6, ( 00168 ftnlen)1); 00169 00170 if (nb <= 1 || nb >= *n) { 00171 00172 /* Use unblocked code */ 00173 00174 template_lapack_sygs2(itype, uplo, n, &a[a_offset], lda, &b[b_offset], ldb, info); 00175 } else { 00176 00177 /* Use blocked code */ 00178 00179 if (*itype == 1) { 00180 if (upper) { 00181 00182 /* Compute inv(U')*A*inv(U) */ 00183 00184 i__1 = *n; 00185 i__2 = nb; 00186 for (k = 1; i__2 < 0 ? k >= i__1 : k <= i__1; k += i__2) { 00187 /* Computing MIN */ 00188 i__3 = *n - k + 1; 00189 kb = minMACRO(i__3,nb); 00190 00191 /* Update the upper triangle of A(k:n,k:n) */ 00192 00193 template_lapack_sygs2(itype, uplo, &kb, &a_ref(k, k), lda, &b_ref(k, k), 00194 ldb, info); 00195 if (k + kb <= *n) { 00196 i__3 = *n - k - kb + 1; 00197 template_blas_trsm("Left", uplo, "Transpose", "Non-unit", &kb, & 00198 i__3, &c_b14, &b_ref(k, k), ldb, &a_ref(k, k 00199 + kb), lda); 00200 i__3 = *n - k - kb + 1; 00201 template_blas_symm("Left", uplo, &kb, &i__3, &c_b16, &a_ref(k, k), 00202 lda, &b_ref(k, k + kb), ldb, &c_b14, &a_ref( 00203 k, k + kb), lda); 00204 i__3 = *n - k - kb + 1; 00205 template_blas_syr2k(uplo, "Transpose", &i__3, &kb, &c_b19, &a_ref( 00206 k, k + kb), lda, &b_ref(k, k + kb), ldb, & 00207 c_b14, &a_ref(k + kb, k + kb), lda); 00208 i__3 = *n - k - kb + 1; 00209 template_blas_symm("Left", uplo, &kb, &i__3, &c_b16, &a_ref(k, k), 00210 lda, &b_ref(k, k + kb), ldb, &c_b14, &a_ref( 00211 k, k + kb), lda); 00212 i__3 = *n - k - kb + 1; 00213 template_blas_trsm("Right", uplo, "No transpose", "Non-unit", &kb, 00214 &i__3, &c_b14, &b_ref(k + kb, k + kb), ldb, & 00215 a_ref(k, k + kb), lda); 00216 } 00217 /* L10: */ 00218 } 00219 } else { 00220 00221 /* Compute inv(L)*A*inv(L') */ 00222 00223 i__2 = *n; 00224 i__1 = nb; 00225 for (k = 1; i__1 < 0 ? k >= i__2 : k <= i__2; k += i__1) { 00226 /* Computing MIN */ 00227 i__3 = *n - k + 1; 00228 kb = minMACRO(i__3,nb); 00229 00230 /* Update the lower triangle of A(k:n,k:n) */ 00231 00232 template_lapack_sygs2(itype, uplo, &kb, &a_ref(k, k), lda, &b_ref(k, k), 00233 ldb, info); 00234 if (k + kb <= *n) { 00235 i__3 = *n - k - kb + 1; 00236 template_blas_trsm("Right", uplo, "Transpose", "Non-unit", &i__3, 00237 &kb, &c_b14, &b_ref(k, k), ldb, &a_ref(k + kb, 00238 k), lda); 00239 i__3 = *n - k - kb + 1; 00240 template_blas_symm("Right", uplo, &i__3, &kb, &c_b16, &a_ref(k, k) 00241 , lda, &b_ref(k + kb, k), ldb, &c_b14, &a_ref( 00242 k + kb, k), lda); 00243 i__3 = *n - k - kb + 1; 00244 template_blas_syr2k(uplo, "No transpose", &i__3, &kb, &c_b19, & 00245 a_ref(k + kb, k), lda, &b_ref(k + kb, k), ldb, 00246 &c_b14, &a_ref(k + kb, k + kb), lda); 00247 i__3 = *n - k - kb + 1; 00248 template_blas_symm("Right", uplo, &i__3, &kb, &c_b16, &a_ref(k, k) 00249 , lda, &b_ref(k + kb, k), ldb, &c_b14, &a_ref( 00250 k + kb, k), lda); 00251 i__3 = *n - k - kb + 1; 00252 template_blas_trsm("Left", uplo, "No transpose", "Non-unit", & 00253 i__3, &kb, &c_b14, &b_ref(k + kb, k + kb), 00254 ldb, &a_ref(k + kb, k), lda); 00255 } 00256 /* L20: */ 00257 } 00258 } 00259 } else { 00260 if (upper) { 00261 00262 /* Compute U*A*U' */ 00263 00264 i__1 = *n; 00265 i__2 = nb; 00266 for (k = 1; i__2 < 0 ? k >= i__1 : k <= i__1; k += i__2) { 00267 /* Computing MIN */ 00268 i__3 = *n - k + 1; 00269 kb = minMACRO(i__3,nb); 00270 00271 /* Update the upper triangle of A(1:k+kb-1,1:k+kb-1) */ 00272 00273 i__3 = k - 1; 00274 template_blas_trmm("Left", uplo, "No transpose", "Non-unit", &i__3, & 00275 kb, &c_b14, &b[b_offset], ldb, &a_ref(1, k), lda); 00276 i__3 = k - 1; 00277 template_blas_symm("Right", uplo, &i__3, &kb, &c_b52, &a_ref(k, k), 00278 lda, &b_ref(1, k), ldb, &c_b14, &a_ref(1, k), lda); 00279 i__3 = k - 1; 00280 template_blas_syr2k(uplo, "No transpose", &i__3, &kb, &c_b14, &a_ref( 00281 1, k), lda, &b_ref(1, k), ldb, &c_b14, &a[ 00282 a_offset], lda); 00283 i__3 = k - 1; 00284 template_blas_symm("Right", uplo, &i__3, &kb, &c_b52, &a_ref(k, k), 00285 lda, &b_ref(1, k), ldb, &c_b14, &a_ref(1, k), lda); 00286 i__3 = k - 1; 00287 template_blas_trmm("Right", uplo, "Transpose", "Non-unit", &i__3, &kb, 00288 &c_b14, &b_ref(k, k), ldb, &a_ref(1, k), lda); 00289 template_lapack_sygs2(itype, uplo, &kb, &a_ref(k, k), lda, &b_ref(k, k), 00290 ldb, info); 00291 /* L30: */ 00292 } 00293 } else { 00294 00295 /* Compute L'*A*L */ 00296 00297 i__2 = *n; 00298 i__1 = nb; 00299 for (k = 1; i__1 < 0 ? k >= i__2 : k <= i__2; k += i__1) { 00300 /* Computing MIN */ 00301 i__3 = *n - k + 1; 00302 kb = minMACRO(i__3,nb); 00303 00304 /* Update the lower triangle of A(1:k+kb-1,1:k+kb-1) */ 00305 00306 i__3 = k - 1; 00307 template_blas_trmm("Right", uplo, "No transpose", "Non-unit", &kb, & 00308 i__3, &c_b14, &b[b_offset], ldb, &a_ref(k, 1), 00309 lda); 00310 i__3 = k - 1; 00311 template_blas_symm("Left", uplo, &kb, &i__3, &c_b52, &a_ref(k, k), 00312 lda, &b_ref(k, 1), ldb, &c_b14, &a_ref(k, 1), lda); 00313 i__3 = k - 1; 00314 template_blas_syr2k(uplo, "Transpose", &i__3, &kb, &c_b14, &a_ref(k, 00315 1), lda, &b_ref(k, 1), ldb, &c_b14, &a[a_offset], 00316 lda); 00317 i__3 = k - 1; 00318 template_blas_symm("Left", uplo, &kb, &i__3, &c_b52, &a_ref(k, k), 00319 lda, &b_ref(k, 1), ldb, &c_b14, &a_ref(k, 1), lda); 00320 i__3 = k - 1; 00321 template_blas_trmm("Left", uplo, "Transpose", "Non-unit", &kb, &i__3, 00322 &c_b14, &b_ref(k, k), ldb, &a_ref(k, 1), lda); 00323 template_lapack_sygs2(itype, uplo, &kb, &a_ref(k, k), lda, &b_ref(k, k), 00324 ldb, info); 00325 /* L40: */ 00326 } 00327 } 00328 } 00329 } 00330 return 0; 00331 00332 /* End of DSYGST */ 00333 00334 } /* dsygst_ */ 00335 00336 #undef b_ref 00337 #undef a_ref 00338 00339 00340 #endif