ergo
template_lapack_ggbal.h
Go to the documentation of this file.
00001 /* Ergo, version 3.2, a program for linear scaling electronic structure
00002  * calculations.
00003  * Copyright (C) 2012 Elias Rudberg, Emanuel H. Rubensson, and Pawel Salek.
00004  * 
00005  * This program is free software: you can redistribute it and/or modify
00006  * it under the terms of the GNU General Public License as published by
00007  * the Free Software Foundation, either version 3 of the License, or
00008  * (at your option) any later version.
00009  * 
00010  * This program is distributed in the hope that it will be useful,
00011  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00012  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00013  * GNU General Public License for more details.
00014  * 
00015  * You should have received a copy of the GNU General Public License
00016  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
00017  * 
00018  * Primary academic reference:
00019  * Kohn−Sham Density Functional Theory Electronic Structure Calculations 
00020  * with Linearly Scaling Computational Time and Memory Usage,
00021  * Elias Rudberg, Emanuel H. Rubensson, and Pawel Salek,
00022  * J. Chem. Theory Comput. 7, 340 (2011),
00023  * <http://dx.doi.org/10.1021/ct100611z>
00024  * 
00025  * For further information about Ergo, see <http://www.ergoscf.org>.
00026  */
00027  
00028  /* This file belongs to the template_lapack part of the Ergo source 
00029   * code. The source files in the template_lapack directory are modified
00030   * versions of files originally distributed as CLAPACK, see the
00031   * Copyright/license notice in the file template_lapack/COPYING.
00032   */
00033  
00034 
00035 #ifndef TEMPLATE_LAPACK_GGBAL_HEADER
00036 #define TEMPLATE_LAPACK_GGBAL_HEADER
00037 
00038 
00039 template<class Treal>
00040 int template_lapack_ggbal(const char *job, const integer *n, Treal *a, const integer *
00041         lda, Treal *b, const integer *ldb, integer *ilo, integer *ihi, 
00042         Treal *lscale, Treal *rscale, Treal *work, integer *
00043         info)
00044 {
00045 /*  -- LAPACK routine (version 3.0) --   
00046        Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,   
00047        Courant Institute, Argonne National Lab, and Rice University   
00048        September 30, 1994   
00049 
00050 
00051     Purpose   
00052     =======   
00053 
00054     DGGBAL balances a pair of general real matrices (A,B).  This   
00055     involves, first, permuting A and B by similarity transformations to   
00056     isolate eigenvalues in the first 1 to ILO$-$1 and last IHI+1 to N   
00057     elements on the diagonal; and second, applying a diagonal similarity   
00058     transformation to rows and columns ILO to IHI to make the rows   
00059     and columns as close in norm as possible. Both steps are optional.   
00060 
00061     Balancing may reduce the 1-norm of the matrices, and improve the   
00062     accuracy of the computed eigenvalues and/or eigenvectors in the   
00063     generalized eigenvalue problem A*x = lambda*B*x.   
00064 
00065     Arguments   
00066     =========   
00067 
00068     JOB     (input) CHARACTER*1   
00069             Specifies the operations to be performed on A and B:   
00070             = 'N':  none:  simply set ILO = 1, IHI = N, LSCALE(I) = 1.0   
00071                     and RSCALE(I) = 1.0 for i = 1,...,N.   
00072             = 'P':  permute only;   
00073             = 'S':  scale only;   
00074             = 'B':  both permute and scale.   
00075 
00076     N       (input) INTEGER   
00077             The order of the matrices A and B.  N >= 0.   
00078 
00079     A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)   
00080             On entry, the input matrix A.   
00081             On exit,  A is overwritten by the balanced matrix.   
00082             If JOB = 'N', A is not referenced.   
00083 
00084     LDA     (input) INTEGER   
00085             The leading dimension of the array A. LDA >= max(1,N).   
00086 
00087     B       (input/output) DOUBLE PRECISION array, dimension (LDB,N)   
00088             On entry, the input matrix B.   
00089             On exit,  B is overwritten by the balanced matrix.   
00090             If JOB = 'N', B is not referenced.   
00091 
00092     LDB     (input) INTEGER   
00093             The leading dimension of the array B. LDB >= max(1,N).   
00094 
00095     ILO     (output) INTEGER   
00096     IHI     (output) INTEGER   
00097             ILO and IHI are set to integers such that on exit   
00098             A(i,j) = 0 and B(i,j) = 0 if i > j and   
00099             j = 1,...,ILO-1 or i = IHI+1,...,N.   
00100             If JOB = 'N' or 'S', ILO = 1 and IHI = N.   
00101 
00102     LSCALE  (output) DOUBLE PRECISION array, dimension (N)   
00103             Details of the permutations and scaling factors applied   
00104             to the left side of A and B.  If P(j) is the index of the   
00105             row interchanged with row j, and D(j)   
00106             is the scaling factor applied to row j, then   
00107               LSCALE(j) = P(j)    for J = 1,...,ILO-1   
00108                         = D(j)    for J = ILO,...,IHI   
00109                         = P(j)    for J = IHI+1,...,N.   
00110             The order in which the interchanges are made is N to IHI+1,   
00111             then 1 to ILO-1.   
00112 
00113     RSCALE  (output) DOUBLE PRECISION array, dimension (N)   
00114             Details of the permutations and scaling factors applied   
00115             to the right side of A and B.  If P(j) is the index of the   
00116             column interchanged with column j, and D(j)   
00117             is the scaling factor applied to column j, then   
00118               LSCALE(j) = P(j)    for J = 1,...,ILO-1   
00119                         = D(j)    for J = ILO,...,IHI   
00120                         = P(j)    for J = IHI+1,...,N.   
00121             The order in which the interchanges are made is N to IHI+1,   
00122             then 1 to ILO-1.   
00123 
00124     WORK    (workspace) DOUBLE PRECISION array, dimension (6*N)   
00125 
00126     INFO    (output) INTEGER   
00127             = 0:  successful exit   
00128             < 0:  if INFO = -i, the i-th argument had an illegal value.   
00129 
00130     Further Details   
00131     ===============   
00132 
00133     See R.C. WARD, Balancing the generalized eigenvalue problem,   
00134                    SIAM J. Sci. Stat. Comp. 2 (1981), 141-152.   
00135 
00136     =====================================================================   
00137 
00138 
00139        Test the input parameters   
00140 
00141        Parameter adjustments */
00142     /* Table of constant values */
00143      integer c__1 = 1;
00144      Treal c_b34 = 10.;
00145      Treal c_b70 = .5;
00146     
00147     /* System generated locals */
00148     integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2, i__3;
00149     Treal d__1, d__2, d__3;
00150     /* Local variables */
00151      integer lcab;
00152      Treal beta, coef;
00153      integer irab, lrab;
00154      Treal basl, cmax;
00155      Treal coef2, coef5;
00156      integer i__, j, k, l, m;
00157      Treal gamma, t, alpha;
00158      Treal sfmin, sfmax;
00159      integer iflow;
00160      integer kount, jc;
00161      Treal ta, tb, tc;
00162      integer ir, it;
00163      Treal ew;
00164      integer nr;
00165      Treal pgamma;
00166      integer lsfmin, lsfmax, ip1, jp1, lm1;
00167      Treal cab, rab, ewc, cor, sum;
00168      integer nrp2, icab;
00169 #define a_ref(a_1,a_2) a[(a_2)*a_dim1 + a_1]
00170 #define b_ref(a_1,a_2) b[(a_2)*b_dim1 + a_1]
00171 
00172 
00173     a_dim1 = *lda;
00174     a_offset = 1 + a_dim1 * 1;
00175     a -= a_offset;
00176     b_dim1 = *ldb;
00177     b_offset = 1 + b_dim1 * 1;
00178     b -= b_offset;
00179     --lscale;
00180     --rscale;
00181     --work;
00182 
00183     /* Initialization added by Elias to get rid of compiler warnings. */
00184     pgamma = 0;
00185     /* Function Body */
00186     *info = 0;
00187     if (! template_blas_lsame(job, "N") && ! template_blas_lsame(job, "P") && ! template_blas_lsame(job, "S") 
00188             && ! template_blas_lsame(job, "B")) {
00189         *info = -1;
00190     } else if (*n < 0) {
00191         *info = -2;
00192     } else if (*lda < maxMACRO(1,*n)) {
00193         *info = -4;
00194     } else if (*ldb < maxMACRO(1,*n)) {
00195         *info = -5;
00196     }
00197     if (*info != 0) {
00198         i__1 = -(*info);
00199         template_blas_erbla("GGBAL ", &i__1);
00200         return 0;
00201     }
00202 
00203     k = 1;
00204     l = *n;
00205 
00206 /*     Quick return if possible */
00207 
00208     if (*n == 0) {
00209         return 0;
00210     }
00211 
00212     if (template_blas_lsame(job, "N")) {
00213         *ilo = 1;
00214         *ihi = *n;
00215         i__1 = *n;
00216         for (i__ = 1; i__ <= i__1; ++i__) {
00217             lscale[i__] = 1.;
00218             rscale[i__] = 1.;
00219 /* L10: */
00220         }
00221         return 0;
00222     }
00223 
00224     if (k == l) {
00225         *ilo = 1;
00226         *ihi = 1;
00227         lscale[1] = 1.;
00228         rscale[1] = 1.;
00229         return 0;
00230     }
00231 
00232     if (template_blas_lsame(job, "S")) {
00233         goto L190;
00234     }
00235 
00236     goto L30;
00237 
00238 /*     Permute the matrices A and B to isolate the eigenvalues.   
00239 
00240        Find row with one nonzero in columns 1 through L */
00241 
00242 L20:
00243     l = lm1;
00244     if (l != 1) {
00245         goto L30;
00246     }
00247 
00248     rscale[1] = 1.;
00249     lscale[1] = 1.;
00250     goto L190;
00251 
00252 L30:
00253     lm1 = l - 1;
00254     for (i__ = l; i__ >= 1; --i__) {
00255         i__1 = lm1;
00256         for (j = 1; j <= i__1; ++j) {
00257             jp1 = j + 1;
00258             if (a_ref(i__, j) != 0. || b_ref(i__, j) != 0.) {
00259                 goto L50;
00260             }
00261 /* L40: */
00262         }
00263         j = l;
00264         goto L70;
00265 
00266 L50:
00267         i__1 = l;
00268         for (j = jp1; j <= i__1; ++j) {
00269             if (a_ref(i__, j) != 0. || b_ref(i__, j) != 0.) {
00270                 goto L80;
00271             }
00272 /* L60: */
00273         }
00274         j = jp1 - 1;
00275 
00276 L70:
00277         m = l;
00278         iflow = 1;
00279         goto L160;
00280 L80:
00281         ;
00282     }
00283     goto L100;
00284 
00285 /*     Find column with one nonzero in rows K through N */
00286 
00287 L90:
00288     ++k;
00289 
00290 L100:
00291     i__1 = l;
00292     for (j = k; j <= i__1; ++j) {
00293         i__2 = lm1;
00294         for (i__ = k; i__ <= i__2; ++i__) {
00295             ip1 = i__ + 1;
00296             if (a_ref(i__, j) != 0. || b_ref(i__, j) != 0.) {
00297                 goto L120;
00298             }
00299 /* L110: */
00300         }
00301         i__ = l;
00302         goto L140;
00303 L120:
00304         i__2 = l;
00305         for (i__ = ip1; i__ <= i__2; ++i__) {
00306             if (a_ref(i__, j) != 0. || b_ref(i__, j) != 0.) {
00307                 goto L150;
00308             }
00309 /* L130: */
00310         }
00311         i__ = ip1 - 1;
00312 L140:
00313         m = k;
00314         iflow = 2;
00315         goto L160;
00316 L150:
00317         ;
00318     }
00319     goto L190;
00320 
00321 /*     Permute rows M and I */
00322 
00323 L160:
00324     lscale[m] = (Treal) i__;
00325     if (i__ == m) {
00326         goto L170;
00327     }
00328     i__1 = *n - k + 1;
00329     template_blas_swap(&i__1, &a_ref(i__, k), lda, &a_ref(m, k), lda);
00330     i__1 = *n - k + 1;
00331     template_blas_swap(&i__1, &b_ref(i__, k), ldb, &b_ref(m, k), ldb);
00332 
00333 /*     Permute columns M and J */
00334 
00335 L170:
00336     rscale[m] = (Treal) j;
00337     if (j == m) {
00338         goto L180;
00339     }
00340     template_blas_swap(&l, &a_ref(1, j), &c__1, &a_ref(1, m), &c__1);
00341     template_blas_swap(&l, &b_ref(1, j), &c__1, &b_ref(1, m), &c__1);
00342 
00343 L180:
00344     switch (iflow) {
00345         case 1:  goto L20;
00346         case 2:  goto L90;
00347     }
00348 
00349 L190:
00350     *ilo = k;
00351     *ihi = l;
00352 
00353     if (*ilo == *ihi) {
00354         return 0;
00355     }
00356 
00357     if (template_blas_lsame(job, "P")) {
00358         return 0;
00359     }
00360 
00361 /*     Balance the submatrix in rows ILO to IHI. */
00362 
00363     nr = *ihi - *ilo + 1;
00364     i__1 = *ihi;
00365     for (i__ = *ilo; i__ <= i__1; ++i__) {
00366         rscale[i__] = 0.;
00367         lscale[i__] = 0.;
00368 
00369         work[i__] = 0.;
00370         work[i__ + *n] = 0.;
00371         work[i__ + (*n << 1)] = 0.;
00372         work[i__ + *n * 3] = 0.;
00373         work[i__ + (*n << 2)] = 0.;
00374         work[i__ + *n * 5] = 0.;
00375 /* L200: */
00376     }
00377 
00378 /*     Compute right side vector in resulting linear equations */
00379 
00380     basl = template_blas_lg10(&c_b34);
00381     i__1 = *ihi;
00382     for (i__ = *ilo; i__ <= i__1; ++i__) {
00383         i__2 = *ihi;
00384         for (j = *ilo; j <= i__2; ++j) {
00385             tb = b_ref(i__, j);
00386             ta = a_ref(i__, j);
00387             if (ta == 0.) {
00388                 goto L210;
00389             }
00390             d__1 = absMACRO(ta);
00391             ta = template_blas_lg10(&d__1) / basl;
00392 L210:
00393             if (tb == 0.) {
00394                 goto L220;
00395             }
00396             d__1 = absMACRO(tb);
00397             tb = template_blas_lg10(&d__1) / basl;
00398 L220:
00399             work[i__ + (*n << 2)] = work[i__ + (*n << 2)] - ta - tb;
00400             work[j + *n * 5] = work[j + *n * 5] - ta - tb;
00401 /* L230: */
00402         }
00403 /* L240: */
00404     }
00405 
00406     coef = 1. / (Treal) (nr << 1);
00407     coef2 = coef * coef;
00408     coef5 = coef2 * .5;
00409     nrp2 = nr + 2;
00410     beta = 0.;
00411     it = 1;
00412 
00413 /*     Start generalized conjugate gradient iteration */
00414 
00415 L250:
00416 
00417     gamma = template_blas_dot(&nr, &work[*ilo + (*n << 2)], &c__1, &work[*ilo + (*n << 2)]
00418             , &c__1) + template_blas_dot(&nr, &work[*ilo + *n * 5], &c__1, &work[*ilo + *
00419             n * 5], &c__1);
00420 
00421     ew = 0.;
00422     ewc = 0.;
00423     i__1 = *ihi;
00424     for (i__ = *ilo; i__ <= i__1; ++i__) {
00425         ew += work[i__ + (*n << 2)];
00426         ewc += work[i__ + *n * 5];
00427 /* L260: */
00428     }
00429 
00430 /* Computing 2nd power */
00431     d__1 = ew;
00432 /* Computing 2nd power */
00433     d__2 = ewc;
00434 /* Computing 2nd power */
00435     d__3 = ew - ewc;
00436     gamma = coef * gamma - coef2 * (d__1 * d__1 + d__2 * d__2) - coef5 * (
00437             d__3 * d__3);
00438     if (gamma == 0.) {
00439         goto L350;
00440     }
00441     if (it != 1) {
00442         beta = gamma / pgamma;
00443     }
00444     t = coef5 * (ewc - ew * 3.);
00445     tc = coef5 * (ew - ewc * 3.);
00446 
00447     template_blas_scal(&nr, &beta, &work[*ilo], &c__1);
00448     template_blas_scal(&nr, &beta, &work[*ilo + *n], &c__1);
00449 
00450     template_blas_axpy(&nr, &coef, &work[*ilo + (*n << 2)], &c__1, &work[*ilo + *n], &
00451             c__1);
00452     template_blas_axpy(&nr, &coef, &work[*ilo + *n * 5], &c__1, &work[*ilo], &c__1);
00453 
00454     i__1 = *ihi;
00455     for (i__ = *ilo; i__ <= i__1; ++i__) {
00456         work[i__] += tc;
00457         work[i__ + *n] += t;
00458 /* L270: */
00459     }
00460 
00461 /*     Apply matrix to vector */
00462 
00463     i__1 = *ihi;
00464     for (i__ = *ilo; i__ <= i__1; ++i__) {
00465         kount = 0;
00466         sum = 0.;
00467         i__2 = *ihi;
00468         for (j = *ilo; j <= i__2; ++j) {
00469             if (a_ref(i__, j) == 0.) {
00470                 goto L280;
00471             }
00472             ++kount;
00473             sum += work[j];
00474 L280:
00475             if (b_ref(i__, j) == 0.) {
00476                 goto L290;
00477             }
00478             ++kount;
00479             sum += work[j];
00480 L290:
00481             ;
00482         }
00483         work[i__ + (*n << 1)] = (Treal) kount * work[i__ + *n] + sum;
00484 /* L300: */
00485     }
00486 
00487     i__1 = *ihi;
00488     for (j = *ilo; j <= i__1; ++j) {
00489         kount = 0;
00490         sum = 0.;
00491         i__2 = *ihi;
00492         for (i__ = *ilo; i__ <= i__2; ++i__) {
00493             if (a_ref(i__, j) == 0.) {
00494                 goto L310;
00495             }
00496             ++kount;
00497             sum += work[i__ + *n];
00498 L310:
00499             if (b_ref(i__, j) == 0.) {
00500                 goto L320;
00501             }
00502             ++kount;
00503             sum += work[i__ + *n];
00504 L320:
00505             ;
00506         }
00507         work[j + *n * 3] = (Treal) kount * work[j] + sum;
00508 /* L330: */
00509     }
00510 
00511     sum = template_blas_dot(&nr, &work[*ilo + *n], &c__1, &work[*ilo + (*n << 1)], &c__1) 
00512             + template_blas_dot(&nr, &work[*ilo], &c__1, &work[*ilo + *n * 3], &c__1);
00513     alpha = gamma / sum;
00514 
00515 /*     Determine correction to current iteration */
00516 
00517     cmax = 0.;
00518     i__1 = *ihi;
00519     for (i__ = *ilo; i__ <= i__1; ++i__) {
00520         cor = alpha * work[i__ + *n];
00521         if (absMACRO(cor) > cmax) {
00522             cmax = absMACRO(cor);
00523         }
00524         lscale[i__] += cor;
00525         cor = alpha * work[i__];
00526         if (absMACRO(cor) > cmax) {
00527             cmax = absMACRO(cor);
00528         }
00529         rscale[i__] += cor;
00530 /* L340: */
00531     }
00532     if (cmax < .5) {
00533         goto L350;
00534     }
00535 
00536     d__1 = -alpha;
00537     template_blas_axpy(&nr, &d__1, &work[*ilo + (*n << 1)], &c__1, &work[*ilo + (*n << 2)]
00538             , &c__1);
00539     d__1 = -alpha;
00540     template_blas_axpy(&nr, &d__1, &work[*ilo + *n * 3], &c__1, &work[*ilo + *n * 5], &
00541             c__1);
00542 
00543     pgamma = gamma;
00544     ++it;
00545     if (it <= nrp2) {
00546         goto L250;
00547     }
00548 
00549 /*     End generalized conjugate gradient iteration */
00550 
00551 L350:
00552     sfmin = template_lapack_lamch("S", (Treal)0);
00553     sfmax = 1. / sfmin;
00554     lsfmin = (integer) (template_blas_lg10(&sfmin) / basl + 1.);
00555     lsfmax = (integer) (template_blas_lg10(&sfmax) / basl);
00556     i__1 = *ihi;
00557     for (i__ = *ilo; i__ <= i__1; ++i__) {
00558         i__2 = *n - *ilo + 1;
00559         irab = template_blas_idamax(&i__2, &a_ref(i__, *ilo), lda);
00560         rab = (d__1 = a_ref(i__, irab + *ilo - 1), absMACRO(d__1));
00561         i__2 = *n - *ilo + 1;
00562         irab = template_blas_idamax(&i__2, &b_ref(i__, *ilo), lda);
00563 /* Computing MAX */
00564         d__2 = rab, d__3 = (d__1 = b_ref(i__, irab + *ilo - 1), absMACRO(d__1));
00565         rab = maxMACRO(d__2,d__3);
00566         d__1 = rab + sfmin;
00567         lrab = (integer) (template_blas_lg10(&d__1) / basl + 1.);
00568         ir = (integer) (lscale[i__] + template_lapack_d_sign(&c_b70, &lscale[i__]));
00569 /* Computing MIN */
00570         i__2 = maxMACRO(ir,lsfmin), i__2 = minMACRO(i__2,lsfmax), i__3 = lsfmax - lrab;
00571         ir = minMACRO(i__2,i__3);
00572         lscale[i__] = template_lapack_pow_di(&c_b34, &ir);
00573         icab = template_blas_idamax(ihi, &a_ref(1, i__), &c__1);
00574         cab = (d__1 = a_ref(icab, i__), absMACRO(d__1));
00575         icab = template_blas_idamax(ihi, &b_ref(1, i__), &c__1);
00576 /* Computing MAX */
00577         d__2 = cab, d__3 = (d__1 = b_ref(icab, i__), absMACRO(d__1));
00578         cab = maxMACRO(d__2,d__3);
00579         d__1 = cab + sfmin;
00580         lcab = (integer) (template_blas_lg10(&d__1) / basl + 1.);
00581         jc = (integer) (rscale[i__] + template_lapack_d_sign(&c_b70, &rscale[i__]));
00582 /* Computing MIN */
00583         i__2 = maxMACRO(jc,lsfmin), i__2 = minMACRO(i__2,lsfmax), i__3 = lsfmax - lcab;
00584         jc = minMACRO(i__2,i__3);
00585         rscale[i__] = template_lapack_pow_di(&c_b34, &jc);
00586 /* L360: */
00587     }
00588 
00589 /*     Row scaling of matrices A and B */
00590 
00591     i__1 = *ihi;
00592     for (i__ = *ilo; i__ <= i__1; ++i__) {
00593         i__2 = *n - *ilo + 1;
00594         template_blas_scal(&i__2, &lscale[i__], &a_ref(i__, *ilo), lda);
00595         i__2 = *n - *ilo + 1;
00596         template_blas_scal(&i__2, &lscale[i__], &b_ref(i__, *ilo), ldb);
00597 /* L370: */
00598     }
00599 
00600 /*     Column scaling of matrices A and B */
00601 
00602     i__1 = *ihi;
00603     for (j = *ilo; j <= i__1; ++j) {
00604         template_blas_scal(ihi, &rscale[j], &a_ref(1, j), &c__1);
00605         template_blas_scal(ihi, &rscale[j], &b_ref(1, j), &c__1);
00606 /* L380: */
00607     }
00608 
00609     return 0;
00610 
00611 /*     End of DGGBAL */
00612 
00613 } /* dggbal_ */
00614 
00615 #undef b_ref
00616 #undef a_ref
00617 
00618 
00619 #endif