DSDP
|
00001 #include "numchol.h" 00002 00003 static void UpdSnode(int m, 00004 int n, 00005 int s, 00006 double diaga[], 00007 double *a, 00008 int fira[], 00009 double diagb[], 00010 double *b, 00011 int firb[]) 00012 { 00013 int i,k,t,sze; 00014 double rtemp1, rtemp2, rtemp3, rtemp4, 00015 rtemp5, rtemp6, rtemp7, rtemp8, 00016 rtemp9, rtemp10,rtemp11,rtemp12, 00017 rtemp13,rtemp14,rtemp15,rtemp16, 00018 *a1,*a2, *a3, *a4, *a5, *a6, *a7, *a8, 00019 *a9,*a10,*a11,*a12,*a13,*a14,*a15,*a16, 00020 *b00,*b1,*b0; 00021 00022 for(i=0; i+1<s; i+=2) { 00023 b00 = b+firb[i]; 00024 b0 = b00+1; 00025 b1 = b+firb[i+1]; 00026 sze = m-i-2; 00027 k = 0; 00028 00029 for(; k+3<n; k+=4) { 00030 a1 = a+(fira[k+0 ]+i); 00031 a2 = a+(fira[k+1 ]+i); 00032 a3 = a+(fira[k+2 ]+i); 00033 a4 = a+(fira[k+3 ]+i); 00034 00035 rtemp1 = a1[0]/diaga[k+0]; 00036 rtemp2 = a2[0]/diaga[k+1]; 00037 rtemp3 = a3[0]/diaga[k+2]; 00038 rtemp4 = a4[0]/diaga[k+3]; 00039 00040 diagb[i] -= rtemp1 * a1[0] 00041 + rtemp2 * a2[0] 00042 + rtemp3 * a3[0] 00043 + rtemp4 * a4[0]; 00044 00045 ++ a1; 00046 ++ a2; 00047 ++ a3; 00048 ++ a4; 00049 00050 rtemp5 = a1[0]/diaga[k+0]; 00051 rtemp6 = a2[0]/diaga[k+1]; 00052 rtemp7 = a3[0]/diaga[k+2]; 00053 rtemp8 = a4[0]/diaga[k+3]; 00054 00055 b00[0] -= rtemp1 * a1[0] 00056 + rtemp2 * a2[0] 00057 + rtemp3 * a3[0] 00058 + rtemp4 * a4[0]; 00059 00060 diagb[i+1] -= rtemp5 * a1[0] 00061 + rtemp6 * a2[0] 00062 + rtemp7 * a3[0] 00063 + rtemp8 * a4[0]; 00064 00065 ++ a1; 00066 ++ a2; 00067 ++ a3; 00068 ++ a4; 00069 00070 for(t=0; t<sze; ++t) { 00071 b0[t] -= rtemp1 * a1[t] 00072 + rtemp2 * a2[t] 00073 + rtemp3 * a3[t] 00074 + rtemp4 * a4[t]; 00075 00076 b1[t] -= rtemp5 * a1[t] 00077 + rtemp6 * a2[t] 00078 + rtemp7 * a3[t] 00079 + rtemp8 * a4[t]; 00080 } 00081 } 00082 00083 for(; k+1<n; k+=2) { 00084 a1 = a+(fira[k+0 ]+i); 00085 a2 = a+(fira[k+1 ]+i); 00086 00087 rtemp1 = a1[0] / diaga[k+0]; 00088 rtemp2 = a2[0] / diaga[k+1]; 00089 00090 diagb[i] -= a1[0] * rtemp1 00091 + a2[0] * rtemp2; 00092 00093 ++ a1; 00094 ++ a2; 00095 00096 rtemp5 = a1[0] / diaga[k+0]; 00097 rtemp6 = a2[0] / diaga[k+1]; 00098 00099 b00[0] -= a1[0] * rtemp1 00100 + a2[0] * rtemp2; 00101 00102 diagb[i+1] -= a1[0] * rtemp5 00103 + a2[0] * rtemp6; 00104 00105 00106 ++ a1; 00107 ++ a2; 00108 00109 for(t=0; t<sze; ++t) { 00110 b0[t] -= a1[t] * rtemp1 00111 + a2[t] * rtemp2; 00112 00113 b1[t] -= a1[t] * rtemp5 00114 + a2[t] * rtemp6; 00115 } 00116 } 00117 00118 for(; k<n; ++k) { 00119 a1 = a+(fira[k+0 ]+i); 00120 00121 rtemp1 = a1[0] / diaga[k+0]; 00122 00123 diagb[i] -= a1[0] * rtemp1; 00124 00125 ++ a1; 00126 00127 rtemp5 = a1[0] / diaga[k+0]; 00128 00129 diagb[i+1] -= a1[0] * rtemp5; 00130 00131 b00[0] -= a1[0] * rtemp1; 00132 00133 ++ a1; 00134 00135 for(t=0; t<sze; ++t) 00136 { 00137 b0[t] -= rtemp1 * a1[t]; 00138 b1[t] -= rtemp5 * a1[t]; 00139 } 00140 } 00141 } 00142 00143 for(; i<s; ++i) { 00144 00145 b0 = b+firb[i]; 00146 sze = m-i-1; 00147 k = 0; 00148 00149 for(; k+15<n; k+=16) { 00150 a1 = a+(fira[k+0 ]+i); 00151 a2 = a+(fira[k+1 ]+i); 00152 a3 = a+(fira[k+2 ]+i); 00153 a4 = a+(fira[k+3 ]+i); 00154 a5 = a+(fira[k+4 ]+i); 00155 a6 = a+(fira[k+5 ]+i); 00156 a7 = a+(fira[k+6 ]+i); 00157 a8 = a+(fira[k+7 ]+i); 00158 a9 = a+(fira[k+8 ]+i); 00159 a10 = a+(fira[k+9 ]+i); 00160 a11 = a+(fira[k+10]+i); 00161 a12 = a+(fira[k+11]+i); 00162 a13 = a+(fira[k+12]+i); 00163 a14 = a+(fira[k+13]+i); 00164 a15 = a+(fira[k+14]+i); 00165 a16 = a+(fira[k+15]+i); 00166 00167 rtemp1 = *a1/diaga[k+0]; 00168 rtemp2 = *a2/diaga[k+1]; 00169 rtemp3 = *a3/diaga[k+2]; 00170 rtemp4 = *a4/diaga[k+3]; 00171 rtemp5 = *a5/diaga[k+4]; 00172 rtemp6 = *a6/diaga[k+5]; 00173 rtemp7 = *a7/diaga[k+6]; 00174 rtemp8 = *a8/diaga[k+7]; 00175 rtemp9 = *a9/diaga[k+8]; 00176 rtemp10 = *a10/diaga[k+9]; 00177 rtemp11 = *a11/diaga[k+10]; 00178 rtemp12 = *a12/diaga[k+11]; 00179 rtemp13 = *a13/diaga[k+12]; 00180 rtemp14 = *a14/diaga[k+13]; 00181 rtemp15 = *a15/diaga[k+14]; 00182 rtemp16 = *a16/diaga[k+15]; 00183 00184 diagb[i] -= rtemp1 * (*a1) 00185 + rtemp2 * (*a2) 00186 + rtemp3 * (*a3) 00187 + rtemp4 * (*a4) 00188 + rtemp5 * (*a5) 00189 + rtemp6 * (*a6) 00190 + rtemp7 * (*a7) 00191 + rtemp8 * (*a8) 00192 + rtemp9 * (*a9) 00193 + rtemp10 * (*a10) 00194 + rtemp11 * (*a11) 00195 + rtemp12 * (*a12) 00196 + rtemp13 * (*a13) 00197 + rtemp14 * (*a14) 00198 + rtemp15 * (*a15) 00199 + rtemp16 * (*a16); 00200 00201 00202 ++ a1; 00203 ++ a2; 00204 ++ a3; 00205 ++ a4; 00206 ++ a5; 00207 ++ a6; 00208 ++ a7; 00209 ++ a8; 00210 ++ a9; 00211 ++ a10; 00212 ++ a11; 00213 ++ a12; 00214 ++ a13; 00215 ++ a14; 00216 ++ a15; 00217 ++ a16; 00218 00219 for(t=0; t<sze; ++t) 00220 b0[t] -= rtemp1 * a1[t] 00221 + rtemp2 * a2[t] 00222 + rtemp3 * a3[t] 00223 + rtemp4 * a4[t] 00224 + rtemp5 * a5[t] 00225 + rtemp6 * a6[t] 00226 + rtemp7 * a7[t] 00227 + rtemp8 * a8[t] 00228 + rtemp9 * a9[t] 00229 + rtemp10 * a10[t] 00230 + rtemp11 * a11[t] 00231 + rtemp12 * a12[t] 00232 + rtemp13 * a13[t] 00233 + rtemp14 * a14[t] 00234 + rtemp15 * a15[t] 00235 + rtemp16 * a16[t]; 00236 00237 } 00238 00239 for(; k+11<n; k+=12) { 00240 a1 = a+(fira[k+0 ]+i); 00241 a2 = a+(fira[k+1 ]+i); 00242 a3 = a+(fira[k+2 ]+i); 00243 a4 = a+(fira[k+3 ]+i); 00244 a5 = a+(fira[k+4 ]+i); 00245 a6 = a+(fira[k+5 ]+i); 00246 a7 = a+(fira[k+6 ]+i); 00247 a8 = a+(fira[k+7 ]+i); 00248 a9 = a+(fira[k+8 ]+i); 00249 a10 = a+(fira[k+9 ]+i); 00250 a11 = a+(fira[k+10]+i); 00251 a12 = a+(fira[k+11]+i); 00252 00253 rtemp1 = *a1/diaga[k+0]; 00254 rtemp2 = *a2/diaga[k+1]; 00255 rtemp3 = *a3/diaga[k+2]; 00256 rtemp4 = *a4/diaga[k+3]; 00257 rtemp5 = *a5/diaga[k+4]; 00258 rtemp6 = *a6/diaga[k+5]; 00259 rtemp7 = *a7/diaga[k+6]; 00260 rtemp8 = *a8/diaga[k+7]; 00261 rtemp9 = *a9/diaga[k+8]; 00262 rtemp10 = *a10/diaga[k+9]; 00263 rtemp11 = *a11/diaga[k+10]; 00264 rtemp12 = *a12/diaga[k+11]; 00265 00266 diagb[i] -= rtemp1 * (*a1) 00267 + rtemp2 * (*a2) 00268 + rtemp3 * (*a3) 00269 + rtemp4 * (*a4) 00270 + rtemp5 * (*a5) 00271 + rtemp6 * (*a6) 00272 + rtemp7 * (*a7) 00273 + rtemp8 * (*a8) 00274 + rtemp9 * (*a9) 00275 + rtemp10 * (*a10) 00276 + rtemp11 * (*a11) 00277 + rtemp12 * (*a12); 00278 00279 ++ a1; 00280 ++ a2; 00281 ++ a3; 00282 ++ a4; 00283 ++ a5; 00284 ++ a6; 00285 ++ a7; 00286 ++ a8; 00287 ++ a9; 00288 ++ a10; 00289 ++ a11; 00290 ++ a12; 00291 00292 for(t=0; t<sze; ++t) 00293 b0[t] -= rtemp1 * a1[t] 00294 + rtemp2 * a2[t] 00295 + rtemp3 * a3[t] 00296 + rtemp4 * a4[t] 00297 + rtemp5 * a5[t] 00298 + rtemp6 * a6[t] 00299 + rtemp7 * a7[t] 00300 + rtemp8 * a8[t] 00301 + rtemp9 * a9[t] 00302 + rtemp10 * a10[t] 00303 + rtemp11 * a11[t] 00304 + rtemp12 * a12[t]; 00305 00306 } 00307 00308 for(; k+7<n; k+=8) { 00309 a1 = a+(fira[k+0 ]+i); 00310 a2 = a+(fira[k+1 ]+i); 00311 a3 = a+(fira[k+2 ]+i); 00312 a4 = a+(fira[k+3 ]+i); 00313 a5 = a+(fira[k+4 ]+i); 00314 a6 = a+(fira[k+5 ]+i); 00315 a7 = a+(fira[k+6 ]+i); 00316 a8 = a+(fira[k+7 ]+i); 00317 00318 rtemp1 = *a1/diaga[k+0]; 00319 rtemp2 = *a2/diaga[k+1]; 00320 rtemp3 = *a3/diaga[k+2]; 00321 rtemp4 = *a4/diaga[k+3]; 00322 rtemp5 = *a5/diaga[k+4]; 00323 rtemp6 = *a6/diaga[k+5]; 00324 rtemp7 = *a7/diaga[k+6]; 00325 rtemp8 = *a8/diaga[k+7]; 00326 00327 diagb[i] -= rtemp1 * (*a1) 00328 + rtemp2 * (*a2) 00329 + rtemp3 * (*a3) 00330 + rtemp4 * (*a4) 00331 + rtemp5 * (*a5) 00332 + rtemp6 * (*a6) 00333 + rtemp7 * (*a7) 00334 + rtemp8 * (*a8); 00335 00336 ++ a1; 00337 ++ a2; 00338 ++ a3; 00339 ++ a4; 00340 ++ a5; 00341 ++ a6; 00342 ++ a7; 00343 ++ a8; 00344 00345 for(t=0; t<sze; ++t) 00346 b0[t] -= rtemp1 * a1[t] 00347 + rtemp2 * a2[t] 00348 + rtemp3 * a3[t] 00349 + rtemp4 * a4[t] 00350 + rtemp5 * a5[t] 00351 + rtemp6 * a6[t] 00352 + rtemp7 * a7[t] 00353 + rtemp8 * a8[t]; 00354 00355 } 00356 00357 for(; k+3<n; k+=4) { 00358 a1 = a+(fira[k+0 ]+i); 00359 a2 = a+(fira[k+1 ]+i); 00360 a3 = a+(fira[k+2 ]+i); 00361 a4 = a+(fira[k+3 ]+i); 00362 00363 rtemp1 = *a1/diaga[k+0]; 00364 rtemp2 = *a2/diaga[k+1]; 00365 rtemp3 = *a3/diaga[k+2]; 00366 rtemp4 = *a4/diaga[k+3]; 00367 00368 diagb[i] -= rtemp1 * (*a1) 00369 + rtemp2 * (*a2) 00370 + rtemp3 * (*a3) 00371 + rtemp4 * (*a4); 00372 00373 ++ a1; 00374 ++ a2; 00375 ++ a3; 00376 ++ a4; 00377 00378 for(t=0; t<sze; ++t) 00379 b0[t] -= rtemp1 * a1[t] 00380 + rtemp2 * a2[t] 00381 + rtemp3 * a3[t] 00382 + rtemp4 * a4[t]; 00383 00384 } 00385 00386 for(; k+1<n; k+=2) { 00387 a1 = a+(fira[k+0 ]+i); 00388 a2 = a+(fira[k+1 ]+i); 00389 00390 rtemp1 = *a1/diaga[k+0]; 00391 rtemp2 = *a2/diaga[k+1]; 00392 00393 diagb[i] -= rtemp1 * (*a1) 00394 + rtemp2 * (*a2); 00395 00396 ++ a1; 00397 ++ a2; 00398 00399 for(t=0; t<sze; ++t) 00400 b0[t] -= rtemp1 * a1[t] 00401 + rtemp2 * a2[t]; 00402 } 00403 00404 for(; k<n; ++k) { 00405 a1 = a+(fira[k+0 ]+i); 00406 00407 rtemp1 = *a1/diaga[k+0]; 00408 00409 diagb[i] -= rtemp1 * (*a1); 00410 00411 ++ a1; 00412 00413 for(t=0; t<sze; ++t) 00414 b0[t] -= rtemp1 * a1[t]; 00415 } 00416 } 00417 } /* UpdSnode */ 00418 00419 static void iUpdSnode(chfac *cf, 00420 int snde, 00421 int f, 00422 int l, 00423 int uf, 00424 int ul, 00425 int iw[]) 00426 { 00427 int k, 00428 *ujsze=cf->ujsze,*uhead=cf->uhead,*subg=cf->subg; 00429 double *diag=cf->diag,*uval=cf->uval; 00430 00431 if (f==l || uf==ul) 00432 return; 00433 00434 f += subg[snde]; 00435 l += subg[snde]; 00436 uf += subg[snde]; 00437 ul += subg[snde]; 00438 00439 for(k=f; k<l; ++k) 00440 iw[k-f] = uhead[k]+uf-k-1; 00441 00442 UpdSnode(1+ujsze[uf],l-f,ul-uf,diag+f,uval,iw,diag+uf,uval,uhead+uf); 00443 } /* iUpdSnode */ 00444 00445 static int DiagUpdate(double *dii, 00446 int mode) 00447 { 00448 int id=TRUE; 00449 00450 if (mode) { 00451 if (*dii<1.0e-13) 00452 return FALSE; 00453 } 00454 else { 00455 if (fabs(*dii)<1.0e-35) { 00456 printf(" diagonal nearly zero: %5.1e.\n",(*dii)); 00457 return FALSE; 00458 } 00459 } 00460 return id; 00461 } /* DiagUpdate */ 00462 00463 static int FacSnode(chfac *cf, 00464 int snde, 00465 int f, 00466 int l, 00467 int iw[], 00468 int mode) 00469 { 00470 int itemp,k; 00471 00472 if (f==l) 00473 return (CfcOk); 00474 00475 itemp = cf->subg[snde]+f; 00476 00477 if (!DiagUpdate(cf->diag+itemp, 00478 mode)) 00479 return (CfcIndef); 00480 00481 if (fabs(cf->diag[itemp])<=cf->tolpiv) { 00482 printf("Singular d[%d]=%e\n", 00483 cf->subg[snde]+f,cf->diag[cf->subg[snde]+f]); 00484 return (CfcIndef); 00485 } 00486 00487 for(k=f+1; k<l; ++k) { 00488 iUpdSnode(cf,snde,f,k,k,k+1,iw); 00489 00490 itemp = cf->subg[snde]+k; 00491 00492 if (!DiagUpdate(&cf->diag[itemp], 00493 mode)) 00494 return (CfcIndef); 00495 00496 if (fabs(cf->diag[itemp])<=cf->tolpiv) { 00497 printf(" singular d[%d]=%e\n", 00498 cf->subg[snde]+k,cf->diag[cf->subg[snde]+k]); 00499 00500 return (CfcIndef); 00501 } 00502 } 00503 00504 return CfcOk; 00505 } /* FacSnode */ 00506 00507 static void UpdSnodes(int m, 00508 int n, 00509 int s, 00510 double diaga[], 00511 double *a, 00512 int fira[], 00513 double diagb[], 00514 double *b, 00515 int firb[], 00516 int subb[]) 00517 { 00518 int i,j,k,t,u,sze,delay, 00519 *ls; 00520 double rtemp1,rtemp2,rtemp3,rtemp4, 00521 rtemp5,rtemp6,rtemp7,rtemp8, 00522 rtemp9,rtemp10,rtemp11,rtemp12, 00523 rtemp13,rtemp14,rtemp15,rtemp16, 00524 *a1,*a2,*a3,*a4,*a5,*a6,*a7,*a8, 00525 *a9,*a10,*a11,*a12,*a13,*a14,*a15,*a16, 00526 *b0; 00527 00528 if (m<s) 00529 exit(0); 00530 00531 if (m==0 || n==0) 00532 return; 00533 00534 for(i=0; i<s; ++i) { 00535 j = subb[i]; 00536 u = j-subb[0]; 00537 00538 b0 = b+firb[u]; 00539 00540 delay = j+1; 00541 sze = m-i-1; 00542 ls = subb+i+1; 00543 00544 k = 0; 00545 00546 for(; k+15<n; k+=16) { 00547 a1 = a+(fira[k+0 ]+i); 00548 a2 = a+(fira[k+1 ]+i); 00549 a3 = a+(fira[k+2 ]+i); 00550 a4 = a+(fira[k+3 ]+i); 00551 a5 = a+(fira[k+4 ]+i); 00552 a6 = a+(fira[k+5 ]+i); 00553 a7 = a+(fira[k+6 ]+i); 00554 a8 = a+(fira[k+7 ]+i); 00555 a9 = a+(fira[k+8 ]+i); 00556 a10 = a+(fira[k+9 ]+i); 00557 a11 = a+(fira[k+10]+i); 00558 a12 = a+(fira[k+11]+i); 00559 a13 = a+(fira[k+12]+i); 00560 a14 = a+(fira[k+13]+i); 00561 a15 = a+(fira[k+14]+i); 00562 a16 = a+(fira[k+15]+i); 00563 00564 rtemp1 = *a1/diaga[k+0]; 00565 rtemp2 = *a2/diaga[k+1]; 00566 rtemp3 = *a3/diaga[k+2]; 00567 rtemp4 = *a4/diaga[k+3]; 00568 rtemp5 = *a5/diaga[k+4]; 00569 rtemp6 = *a6/diaga[k+5]; 00570 rtemp7 = *a7/diaga[k+6]; 00571 rtemp8 = *a8/diaga[k+7]; 00572 rtemp9 = *a9/diaga[k+8]; 00573 rtemp10 = *a10/diaga[k+9]; 00574 rtemp11 = *a11/diaga[k+10]; 00575 rtemp12 = *a12/diaga[k+11]; 00576 rtemp13 = *a13/diaga[k+12]; 00577 rtemp14 = *a14/diaga[k+13]; 00578 rtemp15 = *a15/diaga[k+14]; 00579 rtemp16 = *a16/diaga[k+15]; 00580 00581 diagb[u] -= rtemp1 * (*a1) 00582 + rtemp2 * (*a2) 00583 + rtemp3 * (*a3) 00584 + rtemp4 * (*a4) 00585 + rtemp5 * (*a5) 00586 + rtemp6 * (*a6) 00587 + rtemp7 * (*a7) 00588 + rtemp8 * (*a8) 00589 + rtemp9 * (*a9) 00590 + rtemp10 * (*a10) 00591 + rtemp11 * (*a11) 00592 + rtemp12 * (*a12) 00593 + rtemp13 * (*a13) 00594 + rtemp14 * (*a14) 00595 + rtemp15 * (*a15) 00596 + rtemp16 * (*a16); 00597 00598 00599 ++ a1; 00600 ++ a2; 00601 ++ a3; 00602 ++ a4; 00603 ++ a5; 00604 ++ a6; 00605 ++ a7; 00606 ++ a8; 00607 ++ a9; 00608 ++ a10; 00609 ++ a11; 00610 ++ a12; 00611 ++ a13; 00612 ++ a14; 00613 ++ a15; 00614 ++ a16; 00615 00616 for(t=0; t<sze; ++t) 00617 b0[ls[t]-delay] -= rtemp1 * a1[t] 00618 + rtemp2 * a2[t] 00619 + rtemp3 * a3[t] 00620 + rtemp4 * a4[t] 00621 + rtemp5 * a5[t] 00622 + rtemp6 * a6[t] 00623 + rtemp7 * a7[t] 00624 + rtemp8 * a8[t] 00625 + rtemp9 * a9[t] 00626 + rtemp10 * a10[t] 00627 + rtemp11 * a11[t] 00628 + rtemp12 * a12[t] 00629 + rtemp13 * a13[t] 00630 + rtemp14 * a14[t] 00631 + rtemp15 * a15[t] 00632 + rtemp16 * a16[t]; 00633 } 00634 00635 for(; k+11<n; k+=12) { 00636 a1 = a+(fira[k+0 ]+i); 00637 a2 = a+(fira[k+1 ]+i); 00638 a3 = a+(fira[k+2 ]+i); 00639 a4 = a+(fira[k+3 ]+i); 00640 a5 = a+(fira[k+4 ]+i); 00641 a6 = a+(fira[k+5 ]+i); 00642 a7 = a+(fira[k+6 ]+i); 00643 a8 = a+(fira[k+7 ]+i); 00644 a9 = a+(fira[k+8 ]+i); 00645 a10 = a+(fira[k+9 ]+i); 00646 a11 = a+(fira[k+10]+i); 00647 a12 = a+(fira[k+11]+i); 00648 00649 rtemp1 = *a1/diaga[k+0]; 00650 rtemp2 = *a2/diaga[k+1]; 00651 rtemp3 = *a3/diaga[k+2]; 00652 rtemp4 = *a4/diaga[k+3]; 00653 rtemp5 = *a5/diaga[k+4]; 00654 rtemp6 = *a6/diaga[k+5]; 00655 rtemp7 = *a7/diaga[k+6]; 00656 rtemp8 = *a8/diaga[k+7]; 00657 rtemp9 = *a9/diaga[k+8]; 00658 rtemp10 = *a10/diaga[k+9]; 00659 rtemp11 = *a11/diaga[k+10]; 00660 rtemp12 = *a12/diaga[k+11]; 00661 00662 diagb[u] -= rtemp1 * (*a1) 00663 + rtemp2 * (*a2) 00664 + rtemp3 * (*a3) 00665 + rtemp4 * (*a4) 00666 + rtemp5 * (*a5) 00667 + rtemp6 * (*a6) 00668 + rtemp7 * (*a7) 00669 + rtemp8 * (*a8) 00670 + rtemp9 * (*a9) 00671 + rtemp10 * (*a10) 00672 + rtemp11 * (*a11) 00673 + rtemp12 * (*a12); 00674 00675 ++ a1; 00676 ++ a2; 00677 ++ a3; 00678 ++ a4; 00679 ++ a5; 00680 ++ a6; 00681 ++ a7; 00682 ++ a8; 00683 ++ a9; 00684 ++ a10; 00685 ++ a11; 00686 ++ a12; 00687 00688 for(t=0; t<sze; ++t) 00689 b0[ls[t]-delay] -= rtemp1 * a1[t] 00690 + rtemp2 * a2[t] 00691 + rtemp3 * a3[t] 00692 + rtemp4 * a4[t] 00693 + rtemp5 * a5[t] 00694 + rtemp6 * a6[t] 00695 + rtemp7 * a7[t] 00696 + rtemp8 * a8[t] 00697 + rtemp9 * a9[t] 00698 + rtemp10 * a10[t] 00699 + rtemp11 * a11[t] 00700 + rtemp12 * a12[t]; 00701 } 00702 00703 for(; k+7<n; k+=8) { 00704 a1 = a+(fira[k+0 ]+i); 00705 a2 = a+(fira[k+1 ]+i); 00706 a3 = a+(fira[k+2 ]+i); 00707 a4 = a+(fira[k+3 ]+i); 00708 a5 = a+(fira[k+4 ]+i); 00709 a6 = a+(fira[k+5 ]+i); 00710 a7 = a+(fira[k+6 ]+i); 00711 a8 = a+(fira[k+7 ]+i); 00712 00713 rtemp1 = *a1/diaga[k+0]; 00714 rtemp2 = *a2/diaga[k+1]; 00715 rtemp3 = *a3/diaga[k+2]; 00716 rtemp4 = *a4/diaga[k+3]; 00717 rtemp5 = *a5/diaga[k+4]; 00718 rtemp6 = *a6/diaga[k+5]; 00719 rtemp7 = *a7/diaga[k+6]; 00720 rtemp8 = *a8/diaga[k+7]; 00721 00722 diagb[u] -= rtemp1 * (*a1) 00723 + rtemp2 * (*a2) 00724 + rtemp3 * (*a3) 00725 + rtemp4 * (*a4) 00726 + rtemp5 * (*a5) 00727 + rtemp6 * (*a6) 00728 + rtemp7 * (*a7) 00729 + rtemp8 * (*a8); 00730 00731 ++ a1; 00732 ++ a2; 00733 ++ a3; 00734 ++ a4; 00735 ++ a5; 00736 ++ a6; 00737 ++ a7; 00738 ++ a8; 00739 00740 for(t=0; t<sze; ++t) 00741 b0[ls[t]-delay] -= rtemp1 * a1[t] 00742 + rtemp2 * a2[t] 00743 + rtemp3 * a3[t] 00744 + rtemp4 * a4[t] 00745 + rtemp5 * a5[t] 00746 + rtemp6 * a6[t] 00747 + rtemp7 * a7[t] 00748 + rtemp8 * a8[t]; 00749 00750 } 00751 00752 for(; k+3<n; k+=4) { 00753 a1 = a+(fira[k+0 ]+i); 00754 a2 = a+(fira[k+1 ]+i); 00755 a3 = a+(fira[k+2 ]+i); 00756 a4 = a+(fira[k+3 ]+i); 00757 00758 rtemp1 = *a1/diaga[k+0]; 00759 rtemp2 = *a2/diaga[k+1]; 00760 rtemp3 = *a3/diaga[k+2]; 00761 rtemp4 = *a4/diaga[k+3]; 00762 00763 diagb[u]-= rtemp1 * (*a1) 00764 +rtemp2 * (*a2) 00765 +rtemp3 * (*a3) 00766 +rtemp4 * (*a4); 00767 00768 ++ a1; 00769 ++ a2; 00770 ++ a3; 00771 ++ a4; 00772 00773 for(t=0; t<sze; ++t) 00774 b0[ls[t]-delay] -= rtemp1 * a1[t] 00775 + rtemp2 * a2[t] 00776 + rtemp3 * a3[t] 00777 + rtemp4 * a4[t]; 00778 00779 } 00780 00781 for(; k+1<n; k+=2) { 00782 a1 = a+(fira[k+0 ]+i); 00783 a2 = a+(fira[k+1 ]+i); 00784 00785 rtemp1 = *a1/diaga[k+0]; 00786 rtemp2 = *a2/diaga[k+1]; 00787 00788 diagb[u] -= rtemp1 * (*a1) 00789 + rtemp2 * (*a2); 00790 00791 ++ a1; 00792 ++ a2; 00793 00794 for(t=0; t<sze; ++t) 00795 b0[ls[t]-delay] -= rtemp1 * a1[t] 00796 + rtemp2 * a2[t]; 00797 } 00798 00799 for(; k<n; ++k) { 00800 a1 = a+(fira[k+0 ]+i); 00801 00802 rtemp1 = *a1/diaga[k+0]; 00803 00804 diagb[u] -= rtemp1 * (*a1); 00805 00806 ++ a1; 00807 00808 for(t=0; t<sze; ++t) 00809 b0[ls[t]-delay] -= rtemp1 * a1[t]; 00810 } 00811 } 00812 } /* UpdSnodes */ 00813 00814 static void ExtUpdSnode(chfac *cf, 00815 int snde, 00816 int usnde, 00817 int f, 00818 int l, 00819 int start, 00820 int iw[]) 00821 { 00822 int k,sze, 00823 *ls, 00824 *subg=cf->subg, 00825 *ujsze=cf->ujsze,*usub=cf->usub,*ujbeg=cf->ujbeg,*uhead=cf->uhead; 00826 double *diag=cf->diag,*uval=cf->uval; 00827 00828 f += subg[snde]; 00829 l += subg[snde]; 00830 00831 if (usnde==cf->nsnds-1) { 00832 if (usub[ujbeg[f]+start]<subg[usnde]) { 00833 printf("\n Index error"); 00834 exit(0); 00835 } 00836 00837 if (cf->sdens) 00838 exit(0); 00839 00840 ls = usub+ujbeg[f]+start; 00841 sze = ujsze[f]-start; 00842 00843 for(k=f; k<l; ++k) 00844 iw[k-f] =uhead[k]+start-(k-f); 00845 00846 UpdSnodes(sze,l-f,sze,diag+f,uval,iw,diag+ls[0],uval,uhead+ls[0],ls); 00847 } 00848 else 00849 exit(0); 00850 } /* ExtUpdSnode */ 00851 00852 static void PushFward(chfac *cf, 00853 int snde, 00854 int f, 00855 int l, 00856 int iw[]) 00857 { 00858 int j,s,t,u,k,stops,offset,sze,itemp, 00859 *ls0,*ls1, 00860 *map=iw,*subg=cf->subg, 00861 *ujsze=cf->ujsze,*uhead=cf->uhead,*ujbeg=cf->ujbeg,*usub=cf->usub; 00862 double rtemp1,*l0,*l1, 00863 *diag=cf->diag,*uval=cf->uval; 00864 00865 if (f>subg[snde+1]-subg[snde]) { 00866 printf("\n PushFward"); 00867 exit(0); 00868 } 00869 00870 if (f==l) 00871 return; 00872 00873 f += subg[snde]; 00874 l += subg[snde]; 00875 00876 offset = subg[snde+1]-f-1; 00877 sze = ujsze[f] - offset; 00878 ls1 = usub+ujbeg[f]+offset; 00879 00880 if (f+1==l) { 00881 l1 = uval+uhead[f]+offset; 00882 00883 stops = sze; 00884 for(t=0; t<sze; ++t) { 00885 j = ls1[0]; 00886 00887 if (j>=subg[cf->nsnds-1]) 00888 break; 00889 00890 rtemp1 = l1[0]/diag[f]; 00891 diag[j] -= rtemp1*l1[0]; 00892 ++ l1; 00893 00894 l0 = uval+uhead[j]; 00895 ls0 = usub+ujbeg[j]; 00896 00897 ++ ls1; 00898 -- stops; 00899 00900 if (stops && ls1[stops-1]==ls0[stops-1]) { 00901 for(s=0; s<stops; ++s) 00902 l0[s] -= rtemp1 * l1[s]; 00903 } 00904 00905 else { 00906 for(s=0, u=0; s<stops; ++u) { 00907 if (ls0[u]==ls1[s]) { 00908 l0[u] -= rtemp1 * l1[s]; 00909 ++ s; 00910 } 00911 } 00912 } 00913 } 00914 00915 if (t<sze && !cf->sdens) 00916 ExtUpdSnode(cf,snde,cf->nsnds-1,f-subg[snde],l-subg[snde],t,iw); 00917 } 00918 00919 else { 00920 stops = sze; 00921 for(t=0; t<sze; ++t, ++offset) { 00922 j = ls1[0]; 00923 00924 if (j>=subg[cf->nsnds-1]) { 00925 if (!cf->sdens) 00926 ExtUpdSnode(cf,snde,cf->nsnds-1,f-subg[snde], 00927 l-subg[snde],offset,iw); 00928 break; 00929 } 00930 00931 ls0 = usub+ujbeg[j]; 00932 l0 = uval+uhead[j]; 00933 00934 ++ ls1; 00935 -- stops; 00936 00937 k = f; 00938 itemp = offset+f; 00939 00940 if (stops && ls1[stops-1]==ls0[stops-1]) { 00941 for(k=f; k<l; ++k) 00942 map[k-f] = uhead[k]+itemp-k; 00943 00944 UpdSnode(1+stops,l-f,1,diag+f,uval,map,diag+j,uval,uhead+j); 00945 } 00946 00947 else { 00948 map[l] = 0; 00949 for(s=0, u=0; s<stops; ++u) { 00950 if (ls1[s]==ls0[u]) { 00951 map[1+l+s] = 1+u; 00952 ++ s; 00953 } 00954 } 00955 00956 for(k=f; k<l; ++k) 00957 map[k-f] = uhead[k]+itemp-k; 00958 00959 UpdSnodes(1+stops,l-f,1,diag+f,uval,map,diag+j,uval,uhead+j,map+l); 00960 } 00961 } 00962 } 00963 } /* PushFwardard */ 00964 00965 static int FacDenNode(chfac *cf, 00966 int iw[], 00967 double rw[], 00968 int mode) 00969 { 00970 int c,d,j,s,t,sncl,k,k0,m,cacsze,sze,offset, 00971 *subg=cf->subg,*ujsze=cf->ujsze, 00972 *ujbeg=cf->ujbeg,*uhead=cf->uhead, 00973 *usub=cf->usub,*dhead=cf->dhead, 00974 *dsub=cf->dsub,*dbeg=cf->dbeg,*ls; 00975 double *diag=cf->diag,*uval=cf->uval; 00976 int sresp; 00977 00978 cacsze = cf->cachesize*cf->cacheunit; 00979 00980 if (cf->sdens) { 00981 for(d=0; d<cf->ndens; ++d) { 00982 c = 0; 00983 for(k=dhead[d]; k<dhead[d+1]; ++k) { 00984 offset = dbeg[k]; 00985 s = dsub[k]; 00986 if (usub[ujbeg[subg[s]]+offset]<subg[cf->nsnds-1]) { 00987 printf("\nindex error1"); 00988 exit(0); 00989 } 00990 00991 for(j=subg[s]; j<subg[s+1]; ++c, ++j) { 00992 rw[c] = diag[j]; 00993 iw[c] = uhead[j]+offset-(j-subg[s]); 00994 00995 if (usub[ujbeg[j]+offset-(j-subg[s])]<subg[cf->nsnds-1]) { 00996 printf("\nindex error"); 00997 exit(0); 00998 } 00999 } 01000 } 01001 01002 if (c) { 01003 k = dhead[d]; 01004 s = dsub[k]; 01005 m = ujsze[subg[s]]-dbeg[k]; 01006 ls = usub+ujbeg[subg[s]]+dbeg[k]; 01007 if (m) { 01008 for(k=0; k<c;) { 01009 t = cacsze/(m*sizeof(double)); 01010 t = max(t,1); 01011 t = min(c-k,t); 01012 01013 UpdSnodes(m,t,m,rw+k,uval,iw+k, 01014 diag+ls[0],uval,uhead+ls[0],ls); 01015 k+=t; 01016 } 01017 } 01018 } 01019 } 01020 } 01021 01022 s = cf->nsnds-1; 01023 01024 sncl = cf->subg[s+1]-cf->subg[s]; 01025 for(k=0; k<sncl;) { 01026 k0 = k; 01027 for(sze=0; sze<cacsze && k<sncl; ++k) 01028 sze += ujsze[subg[s]+k] * sizeof(double); 01029 01030 if (k==k0) 01031 ++k; 01032 else if (k>=k0+2 && sze>cacsze) 01033 --k; 01034 01035 if (k>sncl) 01036 exit(0); 01037 01038 sresp = FacSnode(cf,s,k0,k,iw,mode); 01039 if (sresp!=CfcOk) 01040 return (sresp); 01041 01042 iUpdSnode(cf,s,k0,k,k,sncl,iw); 01043 01044 } 01045 return (CfcOk); 01046 } /* FacDenNode */ 01047 01048 int ChlFact(chfac *cf, 01049 int *iw, 01050 double *rw, 01051 int mode) 01052 { 01053 int s,sncl,k,k0,cacsze,sze, 01054 *subg=cf->subg,*ujsze=cf->ujsze; 01055 int cid; 01056 01057 cacsze=cf->cachesize*cf->cacheunit; 01058 01059 for(s=0; s+1<cf->nsnds; ++s) { 01060 sncl = cf->subg[s+1]-cf->subg[s]; 01061 01062 for(k=0; k<sncl;) { 01063 k0 = k; 01064 for(sze=0; sze<=cacsze && k<sncl; ++k) 01065 sze += ujsze[subg[s]+k]*sizeof(double); 01066 01067 if (k==k0) 01068 ++k; 01069 else if (k>=k0+2 && sze>cacsze) 01070 --k; 01071 01072 if (k>sncl) 01073 exit(0); 01074 01075 cid=FacSnode(cf,s,k0,k,iw,mode); 01076 if (cid!=CfcOk) 01077 return (cid); 01078 01079 iUpdSnode(cf,s,k0,k,k,sncl,iw); 01080 01081 PushFward(cf,s,k0,k,iw); 01082 } 01083 } 01084 01085 cid=FacDenNode(cf,iw,rw,mode); 01086 01087 for (k=0;k<cf->nrow;k++){ 01088 cf->sqrtdiag[k]=sqrt(fabs(cf->diag[k])); 01089 } 01090 01091 return cid; 01092 } /* ChlFact */