ergo
template_blas_trmv.h
Go to the documentation of this file.
00001 /* Ergo, version 3.2, a program for linear scaling electronic structure
00002  * calculations.
00003  * Copyright (C) 2012 Elias Rudberg, Emanuel H. Rubensson, and Pawel Salek.
00004  * 
00005  * This program is free software: you can redistribute it and/or modify
00006  * it under the terms of the GNU General Public License as published by
00007  * the Free Software Foundation, either version 3 of the License, or
00008  * (at your option) any later version.
00009  * 
00010  * This program is distributed in the hope that it will be useful,
00011  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00012  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00013  * GNU General Public License for more details.
00014  * 
00015  * You should have received a copy of the GNU General Public License
00016  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
00017  * 
00018  * Primary academic reference:
00019  * Kohn−Sham Density Functional Theory Electronic Structure Calculations 
00020  * with Linearly Scaling Computational Time and Memory Usage,
00021  * Elias Rudberg, Emanuel H. Rubensson, and Pawel Salek,
00022  * J. Chem. Theory Comput. 7, 340 (2011),
00023  * <http://dx.doi.org/10.1021/ct100611z>
00024  * 
00025  * For further information about Ergo, see <http://www.ergoscf.org>.
00026  */
00027  
00028  /* This file belongs to the template_lapack part of the Ergo source 
00029   * code. The source files in the template_lapack directory are modified
00030   * versions of files originally distributed as CLAPACK, see the
00031   * Copyright/license notice in the file template_lapack/COPYING.
00032   */
00033  
00034 
00035 #ifndef TEMPLATE_BLAS_TRMV_HEADER
00036 #define TEMPLATE_BLAS_TRMV_HEADER
00037 
00038 
00039 template<class Treal>
00040 int template_blas_trmv(const char *uplo, const char *trans, const char *diag, const integer *n, 
00041         const Treal *a, const integer *lda, Treal *x, const integer *incx)
00042 {
00043     /* System generated locals */
00044     integer a_dim1, a_offset, i__1, i__2;
00045     /* Local variables */
00046      integer info;
00047      Treal temp;
00048      integer i__, j;
00049      integer ix, jx, kx;
00050      logical nounit;
00051 #define a_ref(a_1,a_2) a[(a_2)*a_dim1 + a_1]
00052 /*  Purpose   
00053     =======   
00054     DTRMV  performs one of the matrix-vector operations   
00055        x := A*x,   or   x := A'*x,   
00056     where x is an n element vector and  A is an n by n unit, or non-unit,   
00057     upper or lower triangular matrix.   
00058     Parameters   
00059     ==========   
00060     UPLO   - CHARACTER*1.   
00061              On entry, UPLO specifies whether the matrix is an upper or   
00062              lower triangular matrix as follows:   
00063                 UPLO = 'U' or 'u'   A is an upper triangular matrix.   
00064                 UPLO = 'L' or 'l'   A is a lower triangular matrix.   
00065              Unchanged on exit.   
00066     TRANS  - CHARACTER*1.   
00067              On entry, TRANS specifies the operation to be performed as   
00068              follows:   
00069                 TRANS = 'N' or 'n'   x := A*x.   
00070                 TRANS = 'T' or 't'   x := A'*x.   
00071                 TRANS = 'C' or 'c'   x := A'*x.   
00072              Unchanged on exit.   
00073     DIAG   - CHARACTER*1.   
00074              On entry, DIAG specifies whether or not A is unit   
00075              triangular as follows:   
00076                 DIAG = 'U' or 'u'   A is assumed to be unit triangular.   
00077                 DIAG = 'N' or 'n'   A is not assumed to be unit   
00078                                     triangular.   
00079              Unchanged on exit.   
00080     N      - INTEGER.   
00081              On entry, N specifies the order of the matrix A.   
00082              N must be at least zero.   
00083              Unchanged on exit.   
00084     A      - DOUBLE PRECISION array of DIMENSION ( LDA, n ).   
00085              Before entry with  UPLO = 'U' or 'u', the leading n by n   
00086              upper triangular part of the array A must contain the upper   
00087              triangular matrix and the strictly lower triangular part of   
00088              A is not referenced.   
00089              Before entry with UPLO = 'L' or 'l', the leading n by n   
00090              lower triangular part of the array A must contain the lower   
00091              triangular matrix and the strictly upper triangular part of   
00092              A is not referenced.   
00093              Note that when  DIAG = 'U' or 'u', the diagonal elements of   
00094              A are not referenced either, but are assumed to be unity.   
00095              Unchanged on exit.   
00096     LDA    - INTEGER.   
00097              On entry, LDA specifies the first dimension of A as declared   
00098              in the calling (sub) program. LDA must be at least   
00099              max( 1, n ).   
00100              Unchanged on exit.   
00101     X      - DOUBLE PRECISION array of dimension at least   
00102              ( 1 + ( n - 1 )*abs( INCX ) ).   
00103              Before entry, the incremented array X must contain the n   
00104              element vector x. On exit, X is overwritten with the   
00105              tranformed vector x.   
00106     INCX   - INTEGER.   
00107              On entry, INCX specifies the increment for the elements of   
00108              X. INCX must not be zero.   
00109              Unchanged on exit.   
00110     Level 2 Blas routine.   
00111     -- Written on 22-October-1986.   
00112        Jack Dongarra, Argonne National Lab.   
00113        Jeremy Du Croz, Nag Central Office.   
00114        Sven Hammarling, Nag Central Office.   
00115        Richard Hanson, Sandia National Labs.   
00116        Test the input parameters.   
00117        Parameter adjustments */
00118     a_dim1 = *lda;
00119     a_offset = 1 + a_dim1 * 1;
00120     a -= a_offset;
00121     --x;
00122     /* Initialization added by Elias to get rid of compiler warnings. */
00123     kx = 0;
00124     /* Function Body */
00125     info = 0;
00126     if (! template_blas_lsame(uplo, "U") && ! template_blas_lsame(uplo, "L")) {
00127         info = 1;
00128     } else if (! template_blas_lsame(trans, "N") && ! template_blas_lsame(trans, 
00129             "T") && ! template_blas_lsame(trans, "C")) {
00130         info = 2;
00131     } else if (! template_blas_lsame(diag, "U") && ! template_blas_lsame(diag, 
00132             "N")) {
00133         info = 3;
00134     } else if (*n < 0) {
00135         info = 4;
00136     } else if (*lda < maxMACRO(1,*n)) {
00137         info = 6;
00138     } else if (*incx == 0) {
00139         info = 8;
00140     }
00141     if (info != 0) {
00142         template_blas_erbla("TRMV  ", &info);
00143         return 0;
00144     }
00145 /*     Quick return if possible. */
00146     if (*n == 0) {
00147         return 0;
00148     }
00149     nounit = template_blas_lsame(diag, "N");
00150 /*     Set up the start point in X if the increment is not unity. This   
00151        will be  ( N - 1 )*INCX  too small for descending loops. */
00152     if (*incx <= 0) {
00153         kx = 1 - (*n - 1) * *incx;
00154     } else if (*incx != 1) {
00155         kx = 1;
00156     }
00157 /*     Start the operations. In this version the elements of A are   
00158        accessed sequentially with one pass through A. */
00159     if (template_blas_lsame(trans, "N")) {
00160 /*        Form  x := A*x. */
00161         if (template_blas_lsame(uplo, "U")) {
00162             if (*incx == 1) {
00163                 i__1 = *n;
00164                 for (j = 1; j <= i__1; ++j) {
00165                     if (x[j] != 0.) {
00166                         temp = x[j];
00167                         i__2 = j - 1;
00168                         for (i__ = 1; i__ <= i__2; ++i__) {
00169                             x[i__] += temp * a_ref(i__, j);
00170 /* L10: */
00171                         }
00172                         if (nounit) {
00173                             x[j] *= a_ref(j, j);
00174                         }
00175                     }
00176 /* L20: */
00177                 }
00178             } else {
00179                 jx = kx;
00180                 i__1 = *n;
00181                 for (j = 1; j <= i__1; ++j) {
00182                     if (x[jx] != 0.) {
00183                         temp = x[jx];
00184                         ix = kx;
00185                         i__2 = j - 1;
00186                         for (i__ = 1; i__ <= i__2; ++i__) {
00187                             x[ix] += temp * a_ref(i__, j);
00188                             ix += *incx;
00189 /* L30: */
00190                         }
00191                         if (nounit) {
00192                             x[jx] *= a_ref(j, j);
00193                         }
00194                     }
00195                     jx += *incx;
00196 /* L40: */
00197                 }
00198             }
00199         } else {
00200             if (*incx == 1) {
00201                 for (j = *n; j >= 1; --j) {
00202                     if (x[j] != 0.) {
00203                         temp = x[j];
00204                         i__1 = j + 1;
00205                         for (i__ = *n; i__ >= i__1; --i__) {
00206                             x[i__] += temp * a_ref(i__, j);
00207 /* L50: */
00208                         }
00209                         if (nounit) {
00210                             x[j] *= a_ref(j, j);
00211                         }
00212                     }
00213 /* L60: */
00214                 }
00215             } else {
00216                 kx += (*n - 1) * *incx;
00217                 jx = kx;
00218                 for (j = *n; j >= 1; --j) {
00219                     if (x[jx] != 0.) {
00220                         temp = x[jx];
00221                         ix = kx;
00222                         i__1 = j + 1;
00223                         for (i__ = *n; i__ >= i__1; --i__) {
00224                             x[ix] += temp * a_ref(i__, j);
00225                             ix -= *incx;
00226 /* L70: */
00227                         }
00228                         if (nounit) {
00229                             x[jx] *= a_ref(j, j);
00230                         }
00231                     }
00232                     jx -= *incx;
00233 /* L80: */
00234                 }
00235             }
00236         }
00237     } else {
00238 /*        Form  x := A'*x. */
00239         if (template_blas_lsame(uplo, "U")) {
00240             if (*incx == 1) {
00241                 for (j = *n; j >= 1; --j) {
00242                     temp = x[j];
00243                     if (nounit) {
00244                         temp *= a_ref(j, j);
00245                     }
00246                     for (i__ = j - 1; i__ >= 1; --i__) {
00247                         temp += a_ref(i__, j) * x[i__];
00248 /* L90: */
00249                     }
00250                     x[j] = temp;
00251 /* L100: */
00252                 }
00253             } else {
00254                 jx = kx + (*n - 1) * *incx;
00255                 for (j = *n; j >= 1; --j) {
00256                     temp = x[jx];
00257                     ix = jx;
00258                     if (nounit) {
00259                         temp *= a_ref(j, j);
00260                     }
00261                     for (i__ = j - 1; i__ >= 1; --i__) {
00262                         ix -= *incx;
00263                         temp += a_ref(i__, j) * x[ix];
00264 /* L110: */
00265                     }
00266                     x[jx] = temp;
00267                     jx -= *incx;
00268 /* L120: */
00269                 }
00270             }
00271         } else {
00272             if (*incx == 1) {
00273                 i__1 = *n;
00274                 for (j = 1; j <= i__1; ++j) {
00275                     temp = x[j];
00276                     if (nounit) {
00277                         temp *= a_ref(j, j);
00278                     }
00279                     i__2 = *n;
00280                     for (i__ = j + 1; i__ <= i__2; ++i__) {
00281                         temp += a_ref(i__, j) * x[i__];
00282 /* L130: */
00283                     }
00284                     x[j] = temp;
00285 /* L140: */
00286                 }
00287             } else {
00288                 jx = kx;
00289                 i__1 = *n;
00290                 for (j = 1; j <= i__1; ++j) {
00291                     temp = x[jx];
00292                     ix = jx;
00293                     if (nounit) {
00294                         temp *= a_ref(j, j);
00295                     }
00296                     i__2 = *n;
00297                     for (i__ = j + 1; i__ <= i__2; ++i__) {
00298                         ix += *incx;
00299                         temp += a_ref(i__, j) * x[ix];
00300 /* L150: */
00301                     }
00302                     x[jx] = temp;
00303                     jx += *incx;
00304 /* L160: */
00305                 }
00306             }
00307         }
00308     }
00309     return 0;
00310 /*     End of DTRMV . */
00311 } /* dtrmv_ */
00312 #undef a_ref
00313 
00314 #endif