53#ifndef AMESOS2_SUPERLU_DEF_HPP
54#define AMESOS2_SUPERLU_DEF_HPP
56#include <Teuchos_Tuple.hpp>
57#include <Teuchos_ParameterList.hpp>
58#include <Teuchos_StandardParameterEntryValidators.hpp>
63#if defined(KOKKOSKERNELS_ENABLE_SUPERNODAL_SPTRSV) && defined(KOKKOSKERNELS_ENABLE_TPL_SUPERLU)
70#include "KokkosSparse_sptrsv_superlu.hpp"
76template <
class Matrix,
class Vector>
78 Teuchos::RCP<const Matrix> A,
79 Teuchos::RCP<Vector> X,
80 Teuchos::RCP<const Vector> B )
82#if defined(KOKKOSKERNELS_ENABLE_SUPERNODAL_SPTRSV) && defined(KOKKOSKERNELS_ENABLE_TPL_SUPERLU)
83 , sptrsv_invert_diag_(true)
84 , sptrsv_invert_offdiag_ (false)
85 , sptrsv_u_in_csr_ (true)
86 , sptrsv_merge_supernodes_ (false)
87 , sptrsv_use_spmv_ (false)
89 , is_contiguous_(true)
90 , use_triangular_solves_(false)
92 , symmetrize_metis_(true)
99 SLU::set_default_options(&(data_.options));
101 data_.options.PrintStat = SLU::NO;
103 SLU::StatInit(&(data_.stat));
105 Kokkos::resize(data_.perm_r, this->globalNumRows_);
106 Kokkos::resize(data_.perm_c, this->globalNumCols_);
107 Kokkos::resize(data_.etree, this->globalNumCols_);
108 Kokkos::resize(data_.R, this->globalNumRows_);
109 Kokkos::resize(data_.C, this->globalNumCols_);
111 data_.relax = SLU::sp_ienv(2);
112 data_.panel_size = SLU::sp_ienv(1);
115 data_.rowequ =
false;
116 data_.colequ =
false;
117 data_.A.Store = NULL;
118 data_.L.Store = NULL;
119 data_.U.Store = NULL;
120 data_.X.Store = NULL;
121 data_.B.Store = NULL;
127template <
class Matrix,
class Vector>
135 SLU::StatFree( &(data_.stat) ) ;
138 if ( data_.A.Store != NULL ){
139 SLU::Destroy_SuperMatrix_Store( &(data_.A) );
143 if ( data_.L.Store != NULL ){
144 SLU::Destroy_SuperNode_Matrix( &(data_.L) );
145 SLU::Destroy_CompCol_Matrix( &(data_.U) );
149template <
class Matrix,
class Vector>
153 std::ostringstream oss;
154 oss <<
"SuperLU solver interface";
156 oss <<
", \"ILUTP\" : {";
157 oss <<
"drop tol = " << data_.options.ILU_DropTol;
158 oss <<
", fill factor = " << data_.options.ILU_FillFactor;
159 oss <<
", fill tol = " << data_.options.ILU_FillTol;
160 switch(data_.options.ILU_MILU) {
172 oss <<
", regular ILU";
174 switch(data_.options.ILU_Norm) {
183 oss <<
", infinity-norm";
187 oss <<
", direct solve";
203template<
class Matrix,
class Vector>
215 int permc_spec = data_.options.ColPerm;
216 if (!use_metis_ && permc_spec != SLU::MY_PERMC && this->root_ ){
217#ifdef HAVE_AMESOS2_TIMERS
218 Teuchos::TimeMonitor preOrderTimer(this->timers_.preOrderTime_);
221 SLU::get_perm_c(permc_spec, &(data_.A), data_.perm_c.data());
228template <
class Matrix,
class Vector>
243#ifdef HAVE_AMESOS2_TIMERS
244 Teuchos::TimeMonitor symFactTime( this->timers_.symFactTime_ );
246 same_symbolic_ =
false;
247 data_.options.Fact = SLU::DOFACT;
250#ifdef HAVE_AMESOS2_TIMERS
251 Teuchos::RCP< Teuchos::Time > MetisTimer_ = Teuchos::TimeMonitor::getNewCounter (
"Time for METIS");
252 Teuchos::TimeMonitor numFactTimer(*MetisTimer_);
254 size_t n = this->globalNumRows_;
255 size_t nz = this->globalNumNonZeros_;
256 host_size_type_array host_rowind_view_ = host_rows_view_;
257 host_ordinal_type_array host_colptr_view_ = host_col_ptr_view_;
260 if (symmetrize_metis_) {
267 SLU::at_plus_a(n, nz, host_col_ptr_view_.data(), host_rows_view_.data(),
268 &new_nz, &new_col_ptr, &new_row_ind);
273 Kokkos::resize (host_rowind_view_, nz);
274 Kokkos::realloc(host_colptr_view_, n+1);
275 for (
size_t i = 0; i <= n; i++) {
276 host_colptr_view_(i) = new_col_ptr[i];
278 for (
size_t i = 0; i < nz; i++) {
279 host_rowind_view_(i) = new_row_ind[i];
283 SLU::SUPERLU_FREE(new_col_ptr);
285 SLU::SUPERLU_FREE(new_row_ind);
290 using metis_int_array_type = host_ordinal_type_array;
291 metis_int_array_type metis_perm = metis_int_array_type(Kokkos::ViewAllocateWithoutInitializing(
"metis_perm"), n);
292 metis_int_array_type metis_iperm = metis_int_array_type(Kokkos::ViewAllocateWithoutInitializing(
"metis_iperm"), n);
296 Amesos2::Util::reorder(
297 host_colptr_view_, host_rowind_view_,
298 metis_perm, metis_iperm, new_nnz,
false);
300 for (
size_t i = 0; i < n; i++) {
301 data_.perm_r(i) = metis_iperm(i);
302 data_.perm_c(i) = metis_iperm(i);
306 data_.options.ColPerm = SLU::MY_PERMC;
307 data_.options.RowPerm = SLU::MY_PERMR;
315template <
class Matrix,
class Vector>
324 if ( !same_symbolic_ && data_.L.Store != NULL ){
325 SLU::Destroy_SuperNode_Matrix( &(data_.L) );
326 SLU::Destroy_CompCol_Matrix( &(data_.U) );
327 data_.L.Store = NULL;
328 data_.U.Store = NULL;
331 if( same_symbolic_ ) data_.options.Fact = SLU::SamePattern_SameRowPerm;
336#ifdef HAVE_AMESOS2_DEBUG
337 TEUCHOS_TEST_FOR_EXCEPTION( data_.A.ncol != as<int>(this->globalNumCols_),
339 "Error in converting to SuperLU SuperMatrix: wrong number of global columns." );
340 TEUCHOS_TEST_FOR_EXCEPTION( data_.A.nrow != as<int>(this->globalNumRows_),
342 "Error in converting to SuperLU SuperMatrix: wrong number of global rows." );
345 if( data_.options.Equil == SLU::YES ){
346 magnitude_type rowcnd, colcnd, amax;
350 function_map::gsequ(&(data_.A), data_.R.data(),
351 data_.C.data(), &rowcnd, &colcnd,
353 TEUCHOS_TEST_FOR_EXCEPTION
354 (info2 < 0, std::runtime_error,
355 "SuperLU's gsequ function returned with status " << info2 <<
" < 0."
356 " This means that argument " << (-info2) <<
" given to the function"
357 " had an illegal value.");
359 if (info2 <= data_.A.nrow) {
360 TEUCHOS_TEST_FOR_EXCEPTION
361 (
true, std::runtime_error,
"SuperLU's gsequ function returned with "
362 "info = " << info2 <<
" > 0, and info <= A.nrow = " << data_.A.nrow
363 <<
". This means that row " << info2 <<
" of A is exactly zero.");
365 else if (info2 > data_.A.ncol) {
366 TEUCHOS_TEST_FOR_EXCEPTION
367 (
true, std::runtime_error,
"SuperLU's gsequ function returned with "
368 "info = " << info2 <<
" > 0, and info > A.ncol = " << data_.A.ncol
369 <<
". This means that column " << (info2 - data_.A.nrow) <<
" of "
370 "A is exactly zero.");
373 TEUCHOS_TEST_FOR_EXCEPTION
374 (
true, std::runtime_error,
"SuperLU's gsequ function returned "
375 "with info = " << info2 <<
" > 0, but its value is not in the "
376 "range permitted by the documentation. This should never happen "
377 "(it appears to be SuperLU's fault).");
382 function_map::laqgs(&(data_.A), data_.R.data(),
383 data_.C.data(), rowcnd, colcnd,
384 amax, &(data_.equed));
387 data_.rowequ = (data_.equed ==
'R') || (data_.equed ==
'B');
388 data_.colequ = (data_.equed ==
'C') || (data_.equed ==
'B');
393 SLU::sp_preorder(&(data_.options), &(data_.A), data_.perm_c.data(),
394 data_.etree.data(), &(data_.AC));
397#ifdef HAVE_AMESOS2_TIMERS
398 Teuchos::TimeMonitor numFactTimer(this->timers_.numFactTime_);
401#ifdef HAVE_AMESOS2_VERBOSE_DEBUG
402 std::cout <<
"Superlu:: Before numeric factorization" << std::endl;
403 std::cout <<
"nzvals_ : " << nzvals_.toString() << std::endl;
404 std::cout <<
"rowind_ : " << rowind_.toString() << std::endl;
405 std::cout <<
"colptr_ : " << colptr_.toString() << std::endl;
408 if(ILU_Flag_==
false) {
409 function_map::gstrf(&(data_.options), &(data_.AC),
410 data_.relax, data_.panel_size, data_.etree.data(),
411 NULL, 0, data_.perm_c.data(), data_.perm_r.data(),
412 &(data_.L), &(data_.U),
413#ifdef HAVE_AMESOS2_SUPERLU5_API
416 &(data_.stat), &info);
419 function_map::gsitrf(&(data_.options), &(data_.AC),
420 data_.relax, data_.panel_size, data_.etree.data(),
421 NULL, 0, data_.perm_c.data(), data_.perm_r.data(),
422 &(data_.L), &(data_.U),
423#ifdef HAVE_AMESOS2_SUPERLU5_API
426 &(data_.stat), &info);
429 if (data_.options.ConditionNumber == SLU::YES) {
431 if (data_.options.Trans == SLU::NOTRANS) {
432 *(
unsigned char *)norm =
'1';
434 *(
unsigned char *)norm =
'I';
437 data_.anorm = function_map::langs(norm, &(data_.A));
438 function_map::gscon(norm, &(data_.L), &(data_.U),
439 data_.anorm, &(data_.rcond),
440 &(data_.stat), &info);
444 SLU::Destroy_CompCol_Permuted( &(data_.AC) );
447 this->setNnzLU( as<size_t>(((SLU::SCformat*)data_.L.Store)->nnz +
448 ((SLU::NCformat*)data_.U.Store)->nnz) );
452 Teuchos::broadcast(*(this->matrixA_->getComm()), 0, &info);
454 global_size_type info_st = as<global_size_type>(info);
455 TEUCHOS_TEST_FOR_EXCEPTION( (info_st > 0) && (info_st <= this->globalNumCols_),
457 "Factorization complete, but matrix is singular. Division by zero eminent");
458 TEUCHOS_TEST_FOR_EXCEPTION( (info_st > 0) && (info_st > this->globalNumCols_),
460 "Memory allocation failure in Superlu factorization");
462 data_.options.Fact = SLU::FACTORED;
463 same_symbolic_ =
true;
465 if(use_triangular_solves_) {
466#ifdef HAVE_AMESOS2_VERBOSE_DEBUG
467 #if defined(KOKKOSKERNELS_ENABLE_SUPERNODAL_SPTRSV) && defined(KOKKOSKERNELS_ENABLE_TPL_SUPERLU)
468 if (this->getComm()->getRank()) {
469 std::cout <<
" > Metis : " << (use_metis_ ?
"YES" :
"NO") << std::endl;
470 std::cout <<
" > Equil : " << (data_.options.Equil == SLU::YES ?
"YES" :
"NO") << std::endl;
471 std::cout <<
" > Cond Number : " << (data_.options.ConditionNumber == SLU::YES ?
"YES" :
"NO") << std::endl;
472 std::cout <<
" > Invert diag : " << sptrsv_invert_diag_ << std::endl;
473 std::cout <<
" > Invert off-diag : " << sptrsv_invert_offdiag_ << std::endl;
474 std::cout <<
" > U in CSR : " << sptrsv_u_in_csr_ << std::endl;
475 std::cout <<
" > Merge : " << sptrsv_merge_supernodes_ << std::endl;
476 std::cout <<
" > Use SpMV : " << sptrsv_use_spmv_ << std::endl;
484#ifdef HAVE_AMESOS2_TIMERS
485 Teuchos::RCP< Teuchos::Time > SpTrsvTimer_ = Teuchos::TimeMonitor::getNewCounter (
"Time for SpTrsv setup");
486 Teuchos::TimeMonitor numFactTimer(*SpTrsvTimer_);
488 triangular_solve_factor();
495template <
class Matrix,
class Vector>
498 const Teuchos::Ptr<
const MultiVecAdapter<Vector> > B)
const
501#ifdef HAVE_AMESOS2_TIMERS
502 Teuchos::RCP< Teuchos::Time > Amesos2SolveTimer_ = Teuchos::TimeMonitor::getNewCounter (
"Time for Amesos2");
503 Teuchos::TimeMonitor solveTimer(*Amesos2SolveTimer_);
506 const global_size_type ld_rhs = this->root_ ? X->getGlobalLength() : 0;
507 const size_t nrhs = X->getGlobalNumVectors();
509 bool bDidAssignX =
false;
510 bool bDidAssignB =
false;
512#ifdef HAVE_AMESOS2_TIMERS
513 Teuchos::TimeMonitor mvConvTimer(this->timers_.vecConvTime_);
514 Teuchos::TimeMonitor redistTimer( this->timers_.vecRedistTime_ );
519 const bool initialize_data =
true;
520 const bool do_not_initialize_data =
false;
521 if(use_triangular_solves_) {
522#if defined(KOKKOSKERNELS_ENABLE_SUPERNODAL_SPTRSV) && defined(KOKKOSKERNELS_ENABLE_TPL_SUPERLU)
523 bDidAssignX = Util::get_1d_copy_helper_kokkos_view<MultiVecAdapter<Vector>,
524 device_solve_array_t>::do_get(do_not_initialize_data, X, device_xValues_,
527 this->rowIndexBase_);
528 bDidAssignB = Util::get_1d_copy_helper_kokkos_view<MultiVecAdapter<Vector>,
529 device_solve_array_t>::do_get(initialize_data, B, device_bValues_,
532 this->rowIndexBase_);
536 bDidAssignX = Util::get_1d_copy_helper_kokkos_view<MultiVecAdapter<Vector>,
537 host_solve_array_t>::do_get(do_not_initialize_data, X, host_xValues_,
540 this->rowIndexBase_);
541 bDidAssignB = Util::get_1d_copy_helper_kokkos_view<MultiVecAdapter<Vector>,
542 host_solve_array_t>::do_get(initialize_data, B, host_bValues_,
545 this->rowIndexBase_);
553 if(bDidAssignB && data_.equed !=
'N') {
554 if(use_triangular_solves_) {
555#if defined(KOKKOSKERNELS_ENABLE_SUPERNODAL_SPTRSV) && defined(KOKKOSKERNELS_ENABLE_TPL_SUPERLU)
556 device_solve_array_t copyB(Kokkos::ViewAllocateWithoutInitializing(
"copyB"),
557 device_bValues_.extent(0), device_bValues_.extent(1));
558 Kokkos::deep_copy(copyB, device_bValues_);
559 device_bValues_ = copyB;
563 host_solve_array_t copyB(Kokkos::ViewAllocateWithoutInitializing(
"copyB"),
564 host_bValues_.extent(0), host_bValues_.extent(1));
565 Kokkos::deep_copy(copyB, host_bValues_);
566 host_bValues_ = copyB;
572 magnitude_type rpg, rcond;
577#ifdef HAVE_AMESOS2_TIMERS
578 Teuchos::TimeMonitor solveTimer(this->timers_.solveTime_);
581 if(use_triangular_solves_) {
585 Kokkos::resize(data_.ferr, nrhs);
586 Kokkos::resize(data_.berr, nrhs);
589 #ifdef HAVE_AMESOS2_TIMERS
590 Teuchos::TimeMonitor mvConvTimer(this->timers_.vecConvTime_);
592 SLU::Dtype_t dtype = type_map::dtype;
593 int i_ld_rhs = as<int>(ld_rhs);
594 function_map::create_Dense_Matrix(&(data_.B), i_ld_rhs, as<int>(nrhs),
595 convert_bValues_, host_bValues_, i_ld_rhs,
596 SLU::SLU_DN, dtype, SLU::SLU_GE);
597 function_map::create_Dense_Matrix(&(data_.X), i_ld_rhs, as<int>(nrhs),
598 convert_xValues_, host_xValues_, i_ld_rhs,
599 SLU::SLU_DN, dtype, SLU::SLU_GE);
602 if(ILU_Flag_==
false) {
603 function_map::gssvx(&(data_.options), &(data_.A),
604 data_.perm_c.data(), data_.perm_r.data(),
605 data_.etree.data(), &(data_.equed), data_.R.data(),
606 data_.C.data(), &(data_.L), &(data_.U), NULL, 0, &(data_.B),
607 &(data_.X), &rpg, &rcond, data_.ferr.data(),
609 #ifdef HAVE_AMESOS2_SUPERLU5_API
612 &(data_.mem_usage), &(data_.stat), &ierr);
615 function_map::gsisx(&(data_.options), &(data_.A),
616 data_.perm_c.data(), data_.perm_r.data(),
617 data_.etree.data(), &(data_.equed), data_.R.data(),
618 data_.C.data(), &(data_.L), &(data_.U), NULL, 0, &(data_.B),
619 &(data_.X), &rpg, &rcond,
620 #ifdef HAVE_AMESOS2_SUPERLU5_API
623 &(data_.mem_usage), &(data_.stat), &ierr);
627 SLU::Destroy_SuperMatrix_Store( &(data_.X) );
628 SLU::Destroy_SuperMatrix_Store( &(data_.B) );
629 data_.X.Store = NULL;
630 data_.B.Store = NULL;
637#ifdef HAVE_AMESOS2_TIMERS
638 Teuchos::TimeMonitor mvConvTimer(this->timers_.vecConvTime_);
640 function_map::convert_back_Dense_Matrix(convert_bValues_, host_bValues_);
641 function_map::convert_back_Dense_Matrix(convert_xValues_, host_xValues_);
645 SLU::Destroy_SuperMatrix_Store( &(data_.X) );
646 SLU::Destroy_SuperMatrix_Store( &(data_.B) );
647 data_.X.Store = NULL;
648 data_.B.Store = NULL;
652 Teuchos::broadcast(*(this->getComm()), 0, &ierr);
654 global_size_type ierr_st = as<global_size_type>(ierr);
655 TEUCHOS_TEST_FOR_EXCEPTION( ierr < 0,
656 std::invalid_argument,
657 "Argument " << -ierr <<
" to SuperLU xgssvx had illegal value" );
658 TEUCHOS_TEST_FOR_EXCEPTION( ierr > 0 && ierr_st <= this->globalNumCols_,
660 "Factorization complete, but U is exactly singular" );
661 TEUCHOS_TEST_FOR_EXCEPTION( ierr > 0 && ierr_st > this->globalNumCols_ + 1,
663 "SuperLU allocated " << ierr - this->globalNumCols_ <<
" bytes of "
664 "memory before allocation failure occured." );
671#ifdef HAVE_AMESOS2_TIMERS
672 Teuchos::TimeMonitor redistTimer(this->timers_.vecRedistTime_);
675 if(use_triangular_solves_) {
676#if defined(KOKKOSKERNELS_ENABLE_SUPERNODAL_SPTRSV) && defined(KOKKOSKERNELS_ENABLE_TPL_SUPERLU)
677 Util::put_1d_data_helper_kokkos_view<
678 MultiVecAdapter<Vector>,device_solve_array_t>::do_put(X, device_xValues_,
681 this->rowIndexBase_);
685 Util::put_1d_data_helper_kokkos_view<
686 MultiVecAdapter<Vector>,host_solve_array_t>::do_put(X, host_xValues_,
689 this->rowIndexBase_);
697template <
class Matrix,
class Vector>
704 return( this->matrixA_->getGlobalNumRows() == this->matrixA_->getGlobalNumCols() );
708template <
class Matrix,
class Vector>
713 using Teuchos::getIntegralValue;
714 using Teuchos::ParameterEntryValidator;
716 RCP<const Teuchos::ParameterList> valid_params = getValidParameters_impl();
718 ILU_Flag_ = parameterList->get<
bool>(
"ILU_Flag",
false);
720 SLU::ilu_set_default_options(&(data_.options));
722 data_.options.PrintStat = SLU::NO;
725 data_.options.Trans = this->control_.useTranspose_ ? SLU::TRANS : SLU::NOTRANS;
727 if( parameterList->isParameter(
"Trans") ){
728 RCP<const ParameterEntryValidator> trans_validator = valid_params->getEntry(
"Trans").validator();
729 parameterList->getEntry(
"Trans").setValidator(trans_validator);
731 data_.options.Trans = getIntegralValue<SLU::trans_t>(*parameterList,
"Trans");
734 if( parameterList->isParameter(
"IterRefine") ){
735 RCP<const ParameterEntryValidator> refine_validator = valid_params->getEntry(
"IterRefine").validator();
736 parameterList->getEntry(
"IterRefine").setValidator(refine_validator);
738 data_.options.IterRefine = getIntegralValue<SLU::IterRefine_t>(*parameterList,
"IterRefine");
741 if( parameterList->isParameter(
"ColPerm") ){
742 RCP<const ParameterEntryValidator> colperm_validator = valid_params->getEntry(
"ColPerm").validator();
743 parameterList->getEntry(
"ColPerm").setValidator(colperm_validator);
745 data_.options.ColPerm = getIntegralValue<SLU::colperm_t>(*parameterList,
"ColPerm");
748 data_.options.DiagPivotThresh = parameterList->get<
double>(
"DiagPivotThresh", 1.0);
750 bool equil = parameterList->get<
bool>(
"Equil",
true);
751 data_.options.Equil = equil ? SLU::YES : SLU::NO;
753 bool condNum = parameterList->get<
bool>(
"ConditionNumber",
false);
754 data_.options.ConditionNumber = condNum ? SLU::YES : SLU::NO;
756 bool symmetric_mode = parameterList->get<
bool>(
"SymmetricMode",
false);
757 data_.options.SymmetricMode = symmetric_mode ? SLU::YES : SLU::NO;
760 if( parameterList->isParameter(
"RowPerm") ){
761 RCP<const ParameterEntryValidator> rowperm_validator = valid_params->getEntry(
"RowPerm").validator();
762 parameterList->getEntry(
"RowPerm").setValidator(rowperm_validator);
763 data_.options.RowPerm = getIntegralValue<SLU::rowperm_t>(*parameterList,
"RowPerm");
772 data_.options.ILU_DropTol = parameterList->get<
double>(
"ILU_DropTol", 0.0001);
774 data_.options.ILU_FillFactor = parameterList->get<
double>(
"ILU_FillFactor", 10.0);
776 if( parameterList->isParameter(
"ILU_Norm") ) {
777 RCP<const ParameterEntryValidator> norm_validator = valid_params->getEntry(
"ILU_Norm").validator();
778 parameterList->getEntry(
"ILU_Norm").setValidator(norm_validator);
779 data_.options.ILU_Norm = getIntegralValue<SLU::norm_t>(*parameterList,
"ILU_Norm");
782 if( parameterList->isParameter(
"ILU_MILU") ) {
783 RCP<const ParameterEntryValidator> milu_validator = valid_params->getEntry(
"ILU_MILU").validator();
784 parameterList->getEntry(
"ILU_MILU").setValidator(milu_validator);
785 data_.options.ILU_MILU = getIntegralValue<SLU::milu_t>(*parameterList,
"ILU_MILU");
788 data_.options.ILU_FillTol = parameterList->get<
double>(
"ILU_FillTol", 0.01);
790 is_contiguous_ = parameterList->get<
bool>(
"IsContiguous",
true);
792 use_metis_ = parameterList->get<
bool>(
"UseMetis",
false);
793 symmetrize_metis_ = parameterList->get<
bool>(
"SymmetrizeMetis",
true);
795 use_triangular_solves_ = parameterList->get<
bool>(
"Enable_KokkosKernels_TriangularSolves",
false);
796 if(use_triangular_solves_) {
797#if defined(KOKKOSKERNELS_ENABLE_SUPERNODAL_SPTRSV) && defined(KOKKOSKERNELS_ENABLE_TPL_SUPERLU)
799 sptrsv_invert_diag_ = parameterList->get<
bool>(
"SpTRSV_Invert_Diag",
true);
801 sptrsv_invert_offdiag_ = parameterList->get<
bool>(
"SpTRSV_Invert_OffDiag",
false);
803 sptrsv_u_in_csr_ = parameterList->get<
bool>(
"SpTRSV_U_CSR",
true);
805 sptrsv_merge_supernodes_ = parameterList->get<
bool>(
"SpTRSV_Merge_Supernodes",
false);
807 sptrsv_use_spmv_ = parameterList->get<
bool>(
"SpTRSV_Use_SpMV",
false);
809 TEUCHOS_TEST_FOR_EXCEPTION(
true, std::runtime_error,
810 "Calling for triangular solves but KokkosKernels_ENABLE_SUPERNODAL_SPTRSV and KokkosKernels_ENABLE_TPL_SUPERLU were not configured." );
816template <
class Matrix,
class Vector>
817Teuchos::RCP<const Teuchos::ParameterList>
821 using Teuchos::tuple;
822 using Teuchos::ParameterList;
823 using Teuchos::EnhancedNumberValidator;
824 using Teuchos::setStringToIntegralParameter;
825 using Teuchos::stringToIntegralParameterEntryValidator;
827 static Teuchos::RCP<const Teuchos::ParameterList> valid_params;
829 if( is_null(valid_params) ){
830 Teuchos::RCP<Teuchos::ParameterList> pl = Teuchos::parameterList();
832 setStringToIntegralParameter<SLU::trans_t>(
"Trans",
"NOTRANS",
833 "Solve for the transpose system or not",
834 tuple<string>(
"TRANS",
"NOTRANS",
"CONJ"),
835 tuple<string>(
"Solve with transpose",
836 "Do not solve with transpose",
837 "Solve with the conjugate transpose"),
838 tuple<SLU::trans_t>(SLU::TRANS,
843 setStringToIntegralParameter<SLU::IterRefine_t>(
"IterRefine",
"NOREFINE",
844 "Type of iterative refinement to use",
845 tuple<string>(
"NOREFINE",
"SLU_SINGLE",
"SLU_DOUBLE"),
846 tuple<string>(
"Do not use iterative refinement",
847 "Do single iterative refinement",
848 "Do double iterative refinement"),
849 tuple<SLU::IterRefine_t>(SLU::NOREFINE,
855 setStringToIntegralParameter<SLU::colperm_t>(
"ColPerm",
"COLAMD",
856 "Specifies how to permute the columns of the "
857 "matrix for sparsity preservation",
858 tuple<string>(
"NATURAL",
"MMD_AT_PLUS_A",
859 "MMD_ATA",
"COLAMD"),
860 tuple<string>(
"Natural ordering",
861 "Minimum degree ordering on A^T + A",
862 "Minimum degree ordering on A^T A",
863 "Approximate minimum degree column ordering"),
864 tuple<SLU::colperm_t>(SLU::NATURAL,
870 Teuchos::RCP<EnhancedNumberValidator<double> > diag_pivot_thresh_validator
871 = Teuchos::rcp(
new EnhancedNumberValidator<double>(0.0, 1.0) );
872 pl->set(
"DiagPivotThresh", 1.0,
873 "Specifies the threshold used for a diagonal entry to be an acceptable pivot",
874 diag_pivot_thresh_validator);
876 pl->set(
"Equil",
true,
"Whether to equilibrate the system before solve");
877 pl->set(
"ConditionNumber",
false,
"Whether to approximate condition number");
879 pl->set(
"SymmetricMode",
false,
880 "Specifies whether to use the symmetric mode. "
881 "Gives preference to diagonal pivots and uses "
882 "an (A^T + A)-based column permutation.");
886 setStringToIntegralParameter<SLU::rowperm_t>(
"RowPerm",
"LargeDiag",
887 "Type of row permutation strategy to use",
888 tuple<string>(
"NOROWPERM",
"LargeDiag",
"MY_PERMR"),
889 tuple<string>(
"Use natural ordering",
890 "Use weighted bipartite matching algorithm",
891 "Use the ordering given in perm_r input"),
892 tuple<SLU::rowperm_t>(SLU::NOROWPERM,
893#
if SUPERLU_MAJOR_VERSION > 5 || (SUPERLU_MAJOR_VERSION == 5 && SUPERLU_MINOR_VERSION > 2) || (SUPERLU_MAJOR_VERSION == 5 && SUPERLU_MINOR_VERSION == 2 && SUPERLU_PATCH_VERSION >= 2)
915 pl->set(
"ILU_DropTol", 0.0001,
"ILUT drop tolerance");
917 pl->set(
"ILU_FillFactor", 10.0,
"ILUT fill factor");
919 setStringToIntegralParameter<SLU::norm_t>(
"ILU_Norm",
"INF_NORM",
920 "Type of norm to use",
921 tuple<string>(
"ONE_NORM",
"TWO_NORM",
"INF_NORM"),
922 tuple<string>(
"1-norm",
"2-norm",
"inf-norm"),
923 tuple<SLU::norm_t>(SLU::ONE_NORM,SLU::TWO_NORM,SLU::INF_NORM),
926 setStringToIntegralParameter<SLU::milu_t>(
"ILU_MILU",
"SILU",
927 "Type of modified ILU to use",
928 tuple<string>(
"SILU",
"SMILU_1",
"SMILU_2",
"SMILU_3"),
929 tuple<string>(
"Regular ILU",
"MILU 1",
"MILU 2",
"MILU 3"),
930 tuple<SLU::milu_t>(SLU::SILU,SLU::SMILU_1,SLU::SMILU_2,
934 pl->set(
"ILU_FillTol", 0.01,
"ILUT fill tolerance");
936 pl->set(
"ILU_Flag",
false,
"ILU flag: if true, run ILU routines");
938 pl->set(
"IsContiguous",
true,
"Whether GIDs contiguous");
941 pl->set(
"UseMetis",
false,
"Whether to call METIS before SuperLU");
942 pl->set(
"SymmetrizeMetis",
true,
"Whether to symmetrize matrix before METIS");
944 pl->set(
"Enable_KokkosKernels_TriangularSolves",
false,
"Whether to use triangular solves.");
945#if defined(KOKKOSKERNELS_ENABLE_SUPERNODAL_SPTRSV) && defined(KOKKOSKERNELS_ENABLE_TPL_SUPERLU)
946 pl->set(
"SpTRSV_Invert_Diag",
true,
"specify whether to invert diagonal blocks for supernodal sparse-trianguular solve");
947 pl->set(
"SpTRSV_Invert_OffDiag",
false,
"specify whether to apply diagonal-inversion to off-diagonal blocks for supernodal sparse-trianguular solve");
948 pl->set(
"SpTRSV_U_CSR",
true,
"specify whether to store U in CSR forma for supernodal sparse-trianguular solve");
949 pl->set(
"SpTRSV_Merge_Supernodes",
false,
"specify whether to merge supernodes with similar sparsity structures for supernodal sparse-trianguular solve");
950 pl->set(
"SpTRSV_Use_SpMV",
false,
"specify whether to use spmv for supernodal sparse-trianguular solve");
960template <
class Matrix,
class Vector>
966#ifdef HAVE_AMESOS2_TIMERS
967 Teuchos::TimeMonitor convTimer(this->timers_.mtxConvTime_);
971 if( current_phase == SYMBFACT )
return false;
974 if( data_.A.Store != NULL ){
975 SLU::Destroy_SuperMatrix_Store( &(data_.A) );
976 data_.A.Store = NULL;
981 Kokkos::resize(host_nzvals_view_, this->globalNumNonZeros_);
982 Kokkos::resize(host_rows_view_, this->globalNumNonZeros_);
983 Kokkos::resize(host_col_ptr_view_, this->globalNumRows_ + 1);
988#ifdef HAVE_AMESOS2_TIMERS
989 Teuchos::TimeMonitor mtxRedistTimer( this->timers_.mtxRedistTime_ );
992 TEUCHOS_TEST_FOR_EXCEPTION( this->rowIndexBase_ != this->columnIndexBase_,
994 "Row and column maps have different indexbase ");
996 if ( is_contiguous_ ==
true ) {
998 MatrixAdapter<Matrix>,host_value_type_array,host_ordinal_type_array,
999 host_size_type_array>::do_get(this->matrixA_.ptr(),
1000 host_nzvals_view_, host_rows_view_,
1001 host_col_ptr_view_, nnz_ret,
ROOTED,
1003 this->rowIndexBase_);
1007 MatrixAdapter<Matrix>,host_value_type_array,host_ordinal_type_array,
1008 host_size_type_array>::do_get(this->matrixA_.ptr(),
1009 host_nzvals_view_, host_rows_view_,
1012 this->rowIndexBase_);
1017 SLU::Dtype_t dtype = type_map::dtype;
1020 TEUCHOS_TEST_FOR_EXCEPTION( nnz_ret != as<int>(this->globalNumNonZeros_),
1022 "Did not get the expected number of non-zero vals");
1024 function_map::template create_CompCol_Matrix<host_value_type_array>( &(data_.A),
1025 this->globalNumRows_, this->globalNumCols_,
1027 convert_nzvals_, host_nzvals_view_,
1028 host_rows_view_.data(),
1029 host_col_ptr_view_.data(),
1030 SLU::SLU_NC, dtype, SLU::SLU_GE);
1036template <
class Matrix,
class Vector>
1040#if defined(KOKKOSKERNELS_ENABLE_SUPERNODAL_SPTRSV) && defined(KOKKOSKERNELS_ENABLE_TPL_SUPERLU)
1041 size_t ld_rhs = this->matrixA_->getGlobalNumRows();
1044 SLU::SCformat * Lstore = (SLU::SCformat*)(data_.L.Store);
1045 int nsuper = 1 + Lstore->nsuper;
1046 Kokkos::resize(data_.parents, nsuper);
1047 for (
int s = 0; s < nsuper; s++) {
1048 int j = Lstore->sup_to_col[s+1]-1;
1049 if (data_.etree[j] ==
static_cast<int>(ld_rhs)) {
1050 data_.parents(s) = -1;
1052 data_.parents(s) = Lstore->col_to_sup[data_.etree[j]];
1056 deep_copy_or_assign_view(device_trsv_perm_r_, data_.perm_r);
1057 deep_copy_or_assign_view(device_trsv_perm_c_, data_.perm_c);
1058 if (data_.options.Equil == SLU::YES) {
1059 deep_copy_or_assign_view(device_trsv_R_, data_.R);
1060 deep_copy_or_assign_view(device_trsv_C_, data_.C);
1063 bool condition_flag =
false;
1064 if (data_.options.ConditionNumber == SLU::YES) {
1065 using STM = Teuchos::ScalarTraits<magnitude_type>;
1066 const magnitude_type eps = STM::eps ();
1068 SCformat *Lstore = (SCformat*)(data_.L.Store);
1069 int nsuper = 1 + Lstore->nsuper;
1070 int *nb = Lstore->sup_to_col;
1072 for (
int i = 0; i < nsuper; i++) {
1073 if (nb[i+1] - nb[i] > max_cols) {
1074 max_cols = nb[i+1] - nb[i];
1079 const magnitude_type multiply_fact (10.0);
1080 condition_flag = (((double)max_cols * nsuper) * eps * multiply_fact >= data_.rcond);
1082#ifdef HAVE_AMESOS2_VERBOSE_DEBUG
1083 int n = data_.perm_r.extent(0);
1084 std::cout << this->getComm()->getRank()
1085 <<
" : anorm = " << data_.anorm <<
", rcond = " << data_.rcond <<
", n = " << n
1086 <<
", num super cols = " << nsuper <<
", max super cols = " << max_cols
1087 <<
" -> " << ((double)max_cols * nsuper) * eps / data_.rcond
1088 << (((double)max_cols * nsuper) * eps * multiply_fact < data_.rcond ?
" (okay)" :
" (warn)") << std::endl;
1093 if (sptrsv_use_spmv_ && !condition_flag) {
1094 device_khL_.create_sptrsv_handle(
1095 KokkosSparse::Experimental::SPTRSVAlgorithm::SUPERNODAL_SPMV_DAG, ld_rhs,
true);
1096 device_khU_.create_sptrsv_handle(
1097 KokkosSparse::Experimental::SPTRSVAlgorithm::SUPERNODAL_SPMV_DAG, ld_rhs,
false);
1099 device_khL_.create_sptrsv_handle(
1100 KokkosSparse::Experimental::SPTRSVAlgorithm::SUPERNODAL_DAG, ld_rhs,
true);
1101 device_khU_.create_sptrsv_handle(
1102 KokkosSparse::Experimental::SPTRSVAlgorithm::SUPERNODAL_DAG, ld_rhs,
false);
1106 device_khU_.set_sptrsv_column_major (!sptrsv_u_in_csr_);
1109 device_khL_.set_sptrsv_merge_supernodes (sptrsv_merge_supernodes_);
1110 device_khU_.set_sptrsv_merge_supernodes (sptrsv_merge_supernodes_);
1113 bool sptrsv_invert_diag = (!condition_flag && sptrsv_invert_diag_);
1114 bool sptrsv_invert_offdiag = (!condition_flag && sptrsv_invert_offdiag_);
1117 device_khL_.set_sptrsv_invert_diagonal (sptrsv_invert_diag);
1118 device_khU_.set_sptrsv_invert_diagonal (sptrsv_invert_diag);
1121 device_khL_.set_sptrsv_invert_offdiagonal (sptrsv_invert_offdiag);
1122 device_khU_.set_sptrsv_invert_offdiagonal (sptrsv_invert_offdiag);
1125 device_khL_.set_sptrsv_etree(data_.parents.data());
1126 device_khU_.set_sptrsv_etree(data_.parents.data());
1129 device_khL_.set_sptrsv_perm(data_.perm_r.data());
1130 device_khU_.set_sptrsv_perm(data_.perm_c.data());
1132 int block_size = -1;
1133 if (block_size >= 0) {
1134 std::cout <<
" Block Size : " << block_size << std::endl;
1135 device_khL_.set_sptrsv_diag_supernode_sizes (block_size, block_size);
1136 device_khU_.set_sptrsv_diag_supernode_sizes (block_size, block_size);
1141#ifdef HAVE_AMESOS2_TIMERS
1142 Teuchos::RCP< Teuchos::Time > SymboTimer_ = Teuchos::TimeMonitor::getNewCounter (
"Time for SpTrsv symbolic");
1143 Teuchos::TimeMonitor numFactTimer(*SymboTimer_);
1145 KokkosSparse::Experimental::sptrsv_symbolic
1146 (&device_khL_, &device_khU_, data_.L, data_.U);
1151#ifdef HAVE_AMESOS2_TIMERS
1152 Teuchos::RCP< Teuchos::Time > NumerTimer_ = Teuchos::TimeMonitor::getNewCounter (
"Time for SpTrsv numeric");
1153 Teuchos::TimeMonitor numFactTimer(*NumerTimer_);
1155 KokkosSparse::Experimental::sptrsv_compute
1156 (&device_khL_, &device_khU_, data_.L, data_.U);
1161template <
class Matrix,
class Vector>
1163Superlu<Matrix,Vector>::triangular_solve()
const
1165#if defined(KOKKOSKERNELS_ENABLE_SUPERNODAL_SPTRSV) && defined(KOKKOSKERNELS_ENABLE_TPL_SUPERLU)
1166 size_t ld_rhs = device_xValues_.extent(0);
1167 size_t nrhs = device_xValues_.extent(1);
1169 Kokkos::resize(device_trsv_rhs_, ld_rhs, nrhs);
1170 Kokkos::resize(device_trsv_sol_, ld_rhs, nrhs);
1173 auto local_device_bValues = device_bValues_;
1174 auto local_device_trsv_perm_r = device_trsv_perm_r_;
1175 auto local_device_trsv_rhs = device_trsv_rhs_;
1178 auto local_device_trsv_R = device_trsv_R_;
1179 Kokkos::parallel_for(Kokkos::RangePolicy<DeviceExecSpaceType>(0, ld_rhs),
1180 KOKKOS_LAMBDA(
size_t j) {
1181 for(
size_t k = 0; k < nrhs; ++k) {
1182 local_device_trsv_rhs(local_device_trsv_perm_r(j),k) = local_device_trsv_R(j) * local_device_bValues(j,k);
1186 Kokkos::parallel_for(Kokkos::RangePolicy<DeviceExecSpaceType>(0, ld_rhs),
1187 KOKKOS_LAMBDA(
size_t j) {
1188 for(
size_t k = 0; k < nrhs; ++k) {
1189 local_device_trsv_rhs(local_device_trsv_perm_r(j),k) = local_device_bValues(j,k);
1194 for(
size_t k = 0; k < nrhs; ++k) {
1195 auto sub_sol = Kokkos::subview(device_trsv_sol_, Kokkos::ALL, k);
1196 auto sub_rhs = Kokkos::subview(device_trsv_rhs_, Kokkos::ALL, k);
1200#ifdef HAVE_AMESOS2_TIMERS
1201 Teuchos::RCP< Teuchos::Time > SpLtrsvTimer_ = Teuchos::TimeMonitor::getNewCounter (
"Time for L solve");
1202 Teuchos::TimeMonitor solveTimer(*SpLtrsvTimer_);
1204 KokkosSparse::Experimental::sptrsv_solve(&device_khL_, sub_sol, sub_rhs);
1208#ifdef HAVE_AMESOS2_TIMERS
1209 Teuchos::RCP< Teuchos::Time > SpUtrsvTimer_ = Teuchos::TimeMonitor::getNewCounter (
"Time for U solve");
1210 Teuchos::TimeMonitor solveTimer(*SpUtrsvTimer_);
1212 KokkosSparse::Experimental::sptrsv_solve(&device_khU_, sub_rhs, sub_sol);
1217 auto local_device_xValues = device_xValues_;
1218 auto local_device_trsv_perm_c = device_trsv_perm_c_;
1220 auto local_device_trsv_C = device_trsv_C_;
1221 Kokkos::parallel_for(Kokkos::RangePolicy<DeviceExecSpaceType>(0, ld_rhs),
1222 KOKKOS_LAMBDA(
size_t j) {
1223 for(
size_t k = 0; k < nrhs; ++k) {
1224 local_device_xValues(j,k) = local_device_trsv_C(j) * local_device_trsv_rhs(local_device_trsv_perm_c(j),k);
1228 Kokkos::parallel_for(Kokkos::RangePolicy<DeviceExecSpaceType>(0, ld_rhs),
1229 KOKKOS_LAMBDA(
size_t j) {
1230 for(
size_t k = 0; k < nrhs; ++k) {
1231 local_device_xValues(j,k) = local_device_trsv_rhs(local_device_trsv_perm_c(j),k);
1238template<
class Matrix,
class Vector>
1239const char* Superlu<Matrix,Vector>::name =
"SuperLU";
Amesos2 Superlu declarations.
@ ROOTED
Definition Amesos2_TypeDecl.hpp:127
@ CONTIGUOUS_AND_ROOTED
Definition Amesos2_TypeDecl.hpp:128
@ ARBITRARY
Definition Amesos2_TypeDecl.hpp:143
Amesos2::SolverCore: A templated interface for interaction with third-party direct sparse solvers.
Definition Amesos2_SolverCore_decl.hpp:106
Amesos2 interface to the SuperLU package.
Definition Amesos2_Superlu_decl.hpp:77
~Superlu()
Destructor.
Definition Amesos2_Superlu_def.hpp:128
Superlu(Teuchos::RCP< const Matrix > A, Teuchos::RCP< Vector > X, Teuchos::RCP< const Vector > B)
Initialize from Teuchos::RCP.
Definition Amesos2_Superlu_def.hpp:77
EPhase
Used to indicate a phase in the direct solution.
Definition Amesos2_TypeDecl.hpp:65
A generic helper class for getting a CCS representation of a Matrix.
Definition Amesos2_Util.hpp:652