ergo
|
00001 /* Ergo, version 3.2, a program for linear scaling electronic structure 00002 * calculations. 00003 * Copyright (C) 2012 Elias Rudberg, Emanuel H. Rubensson, and Pawel Salek. 00004 * 00005 * This program is free software: you can redistribute it and/or modify 00006 * it under the terms of the GNU General Public License as published by 00007 * the Free Software Foundation, either version 3 of the License, or 00008 * (at your option) any later version. 00009 * 00010 * This program is distributed in the hope that it will be useful, 00011 * but WITHOUT ANY WARRANTY; without even the implied warranty of 00012 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00013 * GNU General Public License for more details. 00014 * 00015 * You should have received a copy of the GNU General Public License 00016 * along with this program. If not, see <http://www.gnu.org/licenses/>. 00017 * 00018 * Primary academic reference: 00019 * KohnâSham Density Functional Theory Electronic Structure Calculations 00020 * with Linearly Scaling Computational Time and Memory Usage, 00021 * Elias Rudberg, Emanuel H. Rubensson, and Pawel Salek, 00022 * J. Chem. Theory Comput. 7, 340 (2011), 00023 * <http://dx.doi.org/10.1021/ct100611z> 00024 * 00025 * For further information about Ergo, see <http://www.ergoscf.org>. 00026 */ 00027 00028 /* This file belongs to the template_lapack part of the Ergo source 00029 * code. The source files in the template_lapack directory are modified 00030 * versions of files originally distributed as CLAPACK, see the 00031 * Copyright/license notice in the file template_lapack/COPYING. 00032 */ 00033 00034 00035 #ifndef TEMPLATE_BLAS_SPMV_HEADER 00036 #define TEMPLATE_BLAS_SPMV_HEADER 00037 00038 00039 template<class Treal> 00040 int template_blas_spmv(const char *uplo, const integer *n, const Treal *alpha, 00041 Treal *ap, const Treal *x, const integer *incx, const Treal *beta, 00042 Treal *y, const integer *incy) 00043 { 00044 /* System generated locals */ 00045 integer i__1, i__2; 00046 /* Local variables */ 00047 integer info; 00048 Treal temp1, temp2; 00049 integer i__, j, k; 00050 integer kk, ix, iy, jx, jy, kx, ky; 00051 /* Purpose 00052 ======= 00053 DSPMV performs the matrix-vector operation 00054 y := alpha*A*x + beta*y, 00055 where alpha and beta are scalars, x and y are n element vectors and 00056 A is an n by n symmetric matrix, supplied in packed form. 00057 Parameters 00058 ========== 00059 UPLO - CHARACTER*1. 00060 On entry, UPLO specifies whether the upper or lower 00061 triangular part of the matrix A is supplied in the packed 00062 array AP as follows: 00063 UPLO = 'U' or 'u' The upper triangular part of A is 00064 supplied in AP. 00065 UPLO = 'L' or 'l' The lower triangular part of A is 00066 supplied in AP. 00067 Unchanged on exit. 00068 N - INTEGER. 00069 On entry, N specifies the order of the matrix A. 00070 N must be at least zero. 00071 Unchanged on exit. 00072 ALPHA - DOUBLE PRECISION. 00073 On entry, ALPHA specifies the scalar alpha. 00074 Unchanged on exit. 00075 AP - DOUBLE PRECISION array of DIMENSION at least 00076 ( ( n*( n + 1 ) )/2 ). 00077 Before entry with UPLO = 'U' or 'u', the array AP must 00078 contain the upper triangular part of the symmetric matrix 00079 packed sequentially, column by column, so that AP( 1 ) 00080 contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 1, 2 ) 00081 and a( 2, 2 ) respectively, and so on. 00082 Before entry with UPLO = 'L' or 'l', the array AP must 00083 contain the lower triangular part of the symmetric matrix 00084 packed sequentially, column by column, so that AP( 1 ) 00085 contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 2, 1 ) 00086 and a( 3, 1 ) respectively, and so on. 00087 Unchanged on exit. 00088 X - DOUBLE PRECISION array of dimension at least 00089 ( 1 + ( n - 1 )*abs( INCX ) ). 00090 Before entry, the incremented array X must contain the n 00091 element vector x. 00092 Unchanged on exit. 00093 INCX - INTEGER. 00094 On entry, INCX specifies the increment for the elements of 00095 X. INCX must not be zero. 00096 Unchanged on exit. 00097 BETA - DOUBLE PRECISION. 00098 On entry, BETA specifies the scalar beta. When BETA is 00099 supplied as zero then Y need not be set on input. 00100 Unchanged on exit. 00101 Y - DOUBLE PRECISION array of dimension at least 00102 ( 1 + ( n - 1 )*abs( INCY ) ). 00103 Before entry, the incremented array Y must contain the n 00104 element vector y. On exit, Y is overwritten by the updated 00105 vector y. 00106 INCY - INTEGER. 00107 On entry, INCY specifies the increment for the elements of 00108 Y. INCY must not be zero. 00109 Unchanged on exit. 00110 Level 2 Blas routine. 00111 -- Written on 22-October-1986. 00112 Jack Dongarra, Argonne National Lab. 00113 Jeremy Du Croz, Nag Central Office. 00114 Sven Hammarling, Nag Central Office. 00115 Richard Hanson, Sandia National Labs. 00116 Test the input parameters. 00117 Parameter adjustments */ 00118 --y; 00119 --x; 00120 --ap; 00121 /* Function Body */ 00122 info = 0; 00123 if (! template_blas_lsame(uplo, "U") && ! template_blas_lsame(uplo, "L")) { 00124 info = 1; 00125 } else if (*n < 0) { 00126 info = 2; 00127 } else if (*incx == 0) { 00128 info = 6; 00129 } else if (*incy == 0) { 00130 info = 9; 00131 } 00132 if (info != 0) { 00133 template_blas_erbla("SPMV ", &info); 00134 return 0; 00135 } 00136 /* Quick return if possible. */ 00137 if (*n == 0 || ( *alpha == 0. && *beta == 1. ) ) { 00138 return 0; 00139 } 00140 /* Set up the start points in X and Y. */ 00141 if (*incx > 0) { 00142 kx = 1; 00143 } else { 00144 kx = 1 - (*n - 1) * *incx; 00145 } 00146 if (*incy > 0) { 00147 ky = 1; 00148 } else { 00149 ky = 1 - (*n - 1) * *incy; 00150 } 00151 /* Start the operations. In this version the elements of the array AP 00152 are accessed sequentially with one pass through AP. 00153 First form y := beta*y. */ 00154 if (*beta != 1.) { 00155 if (*incy == 1) { 00156 if (*beta == 0.) { 00157 i__1 = *n; 00158 for (i__ = 1; i__ <= i__1; ++i__) { 00159 y[i__] = 0.; 00160 /* L10: */ 00161 } 00162 } else { 00163 i__1 = *n; 00164 for (i__ = 1; i__ <= i__1; ++i__) { 00165 y[i__] = *beta * y[i__]; 00166 /* L20: */ 00167 } 00168 } 00169 } else { 00170 iy = ky; 00171 if (*beta == 0.) { 00172 i__1 = *n; 00173 for (i__ = 1; i__ <= i__1; ++i__) { 00174 y[iy] = 0.; 00175 iy += *incy; 00176 /* L30: */ 00177 } 00178 } else { 00179 i__1 = *n; 00180 for (i__ = 1; i__ <= i__1; ++i__) { 00181 y[iy] = *beta * y[iy]; 00182 iy += *incy; 00183 /* L40: */ 00184 } 00185 } 00186 } 00187 } 00188 if (*alpha == 0.) { 00189 return 0; 00190 } 00191 kk = 1; 00192 if (template_blas_lsame(uplo, "U")) { 00193 /* Form y when AP contains the upper triangle. */ 00194 if (*incx == 1 && *incy == 1) { 00195 i__1 = *n; 00196 for (j = 1; j <= i__1; ++j) { 00197 temp1 = *alpha * x[j]; 00198 temp2 = 0.; 00199 k = kk; 00200 i__2 = j - 1; 00201 for (i__ = 1; i__ <= i__2; ++i__) { 00202 y[i__] += temp1 * ap[k]; 00203 temp2 += ap[k] * x[i__]; 00204 ++k; 00205 /* L50: */ 00206 } 00207 y[j] = y[j] + temp1 * ap[kk + j - 1] + *alpha * temp2; 00208 kk += j; 00209 /* L60: */ 00210 } 00211 } else { 00212 jx = kx; 00213 jy = ky; 00214 i__1 = *n; 00215 for (j = 1; j <= i__1; ++j) { 00216 temp1 = *alpha * x[jx]; 00217 temp2 = 0.; 00218 ix = kx; 00219 iy = ky; 00220 i__2 = kk + j - 2; 00221 for (k = kk; k <= i__2; ++k) { 00222 y[iy] += temp1 * ap[k]; 00223 temp2 += ap[k] * x[ix]; 00224 ix += *incx; 00225 iy += *incy; 00226 /* L70: */ 00227 } 00228 y[jy] = y[jy] + temp1 * ap[kk + j - 1] + *alpha * temp2; 00229 jx += *incx; 00230 jy += *incy; 00231 kk += j; 00232 /* L80: */ 00233 } 00234 } 00235 } else { 00236 /* Form y when AP contains the lower triangle. */ 00237 if (*incx == 1 && *incy == 1) { 00238 i__1 = *n; 00239 for (j = 1; j <= i__1; ++j) { 00240 temp1 = *alpha * x[j]; 00241 temp2 = 0.; 00242 y[j] += temp1 * ap[kk]; 00243 k = kk + 1; 00244 i__2 = *n; 00245 for (i__ = j + 1; i__ <= i__2; ++i__) { 00246 y[i__] += temp1 * ap[k]; 00247 temp2 += ap[k] * x[i__]; 00248 ++k; 00249 /* L90: */ 00250 } 00251 y[j] += *alpha * temp2; 00252 kk += *n - j + 1; 00253 /* L100: */ 00254 } 00255 } else { 00256 jx = kx; 00257 jy = ky; 00258 i__1 = *n; 00259 for (j = 1; j <= i__1; ++j) { 00260 temp1 = *alpha * x[jx]; 00261 temp2 = 0.; 00262 y[jy] += temp1 * ap[kk]; 00263 ix = jx; 00264 iy = jy; 00265 i__2 = kk + *n - j; 00266 for (k = kk + 1; k <= i__2; ++k) { 00267 ix += *incx; 00268 iy += *incy; 00269 y[iy] += temp1 * ap[k]; 00270 temp2 += ap[k] * x[ix]; 00271 /* L110: */ 00272 } 00273 y[jy] += *alpha * temp2; 00274 jx += *incx; 00275 jy += *incy; 00276 kk += *n - j + 1; 00277 /* L120: */ 00278 } 00279 } 00280 } 00281 return 0; 00282 /* End of DSPMV . */ 00283 } /* dspmv_ */ 00284 00285 #endif