42#ifndef STOKHOS_TPETRA_UTILITIES_HPP
43#define STOKHOS_TPETRA_UTILITIES_HPP
47#include "Tpetra_CrsMatrix.hpp"
55 template <
class ViewType>
63 mean_vals = ViewType(
"mean-values", vals.extent(0));
76 template <
class Storage,
class ... P>
88 typename Scalar::cijk_type mean_cijk =
89 Stokhos::create_mean_based_product_tensor<execution_space, typename Storage::ordinal_type, typename Storage::value_type>();
90 mean_vals = Kokkos::make_view<ViewType>(
"mean-values", mean_cijk, nnz, 1);
91 Kokkos::parallel_for( nnz, *
this );
94 KOKKOS_INLINE_FUNCTION
96 mean_vals(i) = vals(i).fastAccessCoeff(0);
109 template <
class Storage,
class ... P>
120 vals(vals_), vec_size(
Kokkos::dimension_scalar(vals))
124 Kokkos::parallel_for( nnz, *
this );
127 KOKKOS_INLINE_FUNCTION
130 typename Scalar::value_type s = 0.0;
132 s += vals(i).fastAccessCoeff(
j);
148 template <
class ViewType>
156 mean_vals = ViewType(
"mean-values", vals.extent(0));
169 template <
class Storage,
class ... P>
183 Kokkos::parallel_for( nnz, *
this );
186 KOKKOS_INLINE_FUNCTION
188 mean_vals(i) = vals(i).fastAccessCoeff(0);
201 template <
class Storage,
class ... P>
213 vals(vals_), vec_size(
Kokkos::dimension_scalar(vals))
217 Kokkos::parallel_for( nnz, *
this );
220 KOKKOS_INLINE_FUNCTION
223 typename Scalar::value_type s = 0.0;
225 s += vals(i).fastAccessCoeff(
j);
237 template <
typename Scalar,
typename LO,
typename GO,
typename N>
238 Teuchos::RCP< Tpetra::CrsMatrix<Scalar,LO,GO,N> >
243 typedef Tpetra::CrsMatrix<Scalar,LO,GO,N> MatrixType;
244 typedef typename MatrixType::local_matrix_device_type KokkosMatrixType;
245 typedef typename KokkosMatrixType::values_type KokkosMatrixValuesType;
247 KokkosMatrixType kokkos_matrix = A.getLocalMatrixDevice();
248 KokkosMatrixValuesType matrix_values = kokkos_matrix.values;
249 const size_t ncols = kokkos_matrix.numCols();
250 typedef GetMeanValsFunc <KokkosMatrixValuesType > MeanFunc;
251 typedef typename MeanFunc::MeanViewType KokkosMeanMatrixValuesType;
252 MeanFunc meanfunc(matrix_values);
253 KokkosMeanMatrixValuesType mean_matrix_values = meanfunc.getMeanValues();
258 RCP < MatrixType > mean_matrix =
259 rcp(
new MatrixType(A.getCrsGraph(), mean_matrix_values) );
260 mean_matrix->fillComplete();
264 template <
typename Scalar,
typename LO,
typename GO,
typename N>
265 Teuchos::RCP< Tpetra::CrsMatrix<typename Scalar::value_type,LO,GO,N> >
270 typedef typename Scalar::value_type BaseScalar;
271 typedef Tpetra::CrsMatrix<Scalar,LO,GO,N> MatrixType;
272 typedef Tpetra::CrsMatrix<BaseScalar,LO,GO,N> ScalarMatrixType;
273 typedef typename MatrixType::local_matrix_device_type KokkosMatrixType;
274 typedef typename ScalarMatrixType::local_matrix_device_type ScalarKokkosMatrixType;
275 typedef typename KokkosMatrixType::values_type KokkosMatrixValuesType;
277 KokkosMatrixType kokkos_matrix = A.getLocalMatrixDevice();
278 KokkosMatrixValuesType matrix_values = kokkos_matrix.values;
279 typedef GetScalarMeanValsFunc <KokkosMatrixValuesType > MeanFunc;
280 typedef typename MeanFunc::MeanViewType KokkosMeanMatrixValuesType;
281 MeanFunc meanfunc(matrix_values);
282 KokkosMeanMatrixValuesType mean_matrix_values = meanfunc.getMeanValues();
287 RCP < ScalarMatrixType > mean_matrix =
288 rcp(
new ScalarMatrixType(A.getCrsGraph(), mean_matrix_values) );
289 mean_matrix->fillComplete();
297 template <
typename ExecSpace>
300 template <
typename DstView,
typename SrcView>
305 template <
typename DstView,
typename SrcView>
306 void impl(
const DstView& dst,
const SrcView& src) {
307 typedef typename SrcView::non_const_value_type
Scalar;
308 const size_t m = src.extent(0);
309 const size_t n = src.extent(1);
311 Kokkos::RangePolicy<exec_space> policy(0,m);
312 Kokkos::parallel_for( policy, KOKKOS_LAMBDA(
const size_t i)
314 for (
size_t j=0;
j<n; ++
j) {
316 for (
size_t k=0; k<p; ++k)
317 dst(i,
j*p+k) = s.fastAccessCoeff(k);
325 template <
typename ExecSpace>
329 template <
typename DstView,
typename SrcView>
334 template <
typename DstView,
typename SrcView>
335 void impl(
const DstView& dst,
const SrcView& src) {
336 typedef typename DstView::non_const_value_type
Scalar;
337 const size_t m = dst.extent(0);
338 const size_t n = dst.extent(1);
341 Kokkos::RangePolicy<exec_space> policy(0,m);
342 Kokkos::parallel_for( policy, KOKKOS_LAMBDA(
const size_t i)
344 for (
size_t j=0;
j<n; ++
j) {
346 for (
size_t k=0; k<p; ++k)
347 d.fastAccessCoeff(k) = src(i,
j*p+k);
353#ifdef KOKKOS_ENABLE_CUDA
357 struct CopyPCE2Scalar<
Kokkos::Cuda> {
360 template <
typename DstView,
typename SrcView>
365 template <
typename DstView,
typename SrcView>
366 void impl(
const DstView& dst,
const SrcView& src) {
367 typedef typename DstView::non_const_value_type
Scalar;
368 typedef Kokkos::TeamPolicy<exec_space> Policy;
369 typedef typename Policy::member_type Member;
371 const size_t m = src.extent(0);
372 const size_t n = src.extent(1);
375 const size_t ChunkSize = 16;
376 const size_t M = (m+ChunkSize-1)/ChunkSize;
378 Policy policy(M,ChunkSize,ChunkSize);
379 Kokkos::parallel_for( policy, [=] __device__(
const Member& team)
381 __shared__
Scalar tmp[ChunkSize][ChunkSize];
382 const size_t i_block = blockIdx.x*ChunkSize;
384 for (
size_t j=0;
j<n; ++
j) {
385 for (
size_t k_block=0; k_block<p; k_block+=ChunkSize) {
391 size_t i = i_block + threadIdx.y;
392 size_t k = k_block + threadIdx.x;
394 tmp[threadIdx.y][threadIdx.x] = src(i,
j).fastAccessCoeff(k);
400 i = i_block + threadIdx.x;
401 k = k_block + threadIdx.y;
403 dst(i,
j*p+k) = tmp[threadIdx.x][threadIdx.y];
414 struct CopyScalar2PCE<
Kokkos::Cuda> {
417 template <
typename DstView,
typename SrcView>
422 template <
typename DstView,
typename SrcView>
423 void impl(
const DstView& dst,
const SrcView& src) {
424 typedef typename SrcView::non_const_value_type
Scalar;
425 typedef Kokkos::TeamPolicy<exec_space> Policy;
426 typedef typename Policy::member_type Member;
428 const size_t m = dst.extent(0);
429 const size_t n = dst.extent(1);
432 const size_t ChunkSize = 16;
433 const size_t M = (m+ChunkSize-1)/ChunkSize;
435 Policy policy(M,ChunkSize,ChunkSize);
436 Kokkos::parallel_for( policy, [=] __device__(
const Member& team)
438 __shared__
Scalar tmp[ChunkSize][ChunkSize];
439 const size_t i_block = blockIdx.x*ChunkSize;
441 for (
size_t j=0;
j<n; ++
j) {
442 for (
size_t k_block=0; k_block<p; k_block+=ChunkSize) {
448 size_t i = i_block + threadIdx.x;
449 size_t k = k_block + threadIdx.y;
451 tmp[threadIdx.x][threadIdx.y] = src(i,
j*p+k);
457 i = i_block + threadIdx.y;
458 k = k_block + threadIdx.x;
460 dst(i,
j).fastAccessCoeff(k) = tmp[threadIdx.y][threadIdx.x];
471 template <
typename DstView,
typename SrcView>
477 template <
typename DstView,
typename SrcView>
485 template <
typename Scalar,
490 virtual public Tpetra::Operator<Scalar, LocalOrdinal, GlobalOrdinal, Node> {
497 typedef Tpetra::Operator<base_scalar_type,LocalOrdinal,GlobalOrdinal,Node>
scalar_op_type;
504 virtual Teuchos::RCP<const Tpetra::Map<LocalOrdinal,GlobalOrdinal,Node> >
506 return mb_op->getDomainMap();
509 virtual Teuchos::RCP<const Tpetra::Map<LocalOrdinal,GlobalOrdinal,Node> >
511 return mb_op->getRangeMap();
515 apply (
const Tpetra::MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> &X,
516 Tpetra::MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> &Y,
517 Teuchos::ETransp mode = Teuchos::NO_TRANS,
518 Scalar alpha = Teuchos::ScalarTraits<Scalar>::one(),
519 Scalar beta = Teuchos::ScalarTraits<Scalar>::zero())
const
521 typedef typename scalar_mv_type::device_type device_type;
523 auto xv = X.getLocalViewDevice(Tpetra::Access::ReadOnly);
524 auto yv = Y.getLocalViewDevice(Tpetra::Access::ReadWrite);
526 if (
X_s == Teuchos::null ||
527 X_s->getNumVectors() != X.getNumVectors()*pce_size)
529 X.getNumVectors()*pce_size));
530 if (
Y_s == Teuchos::null ||
531 Y_s->getNumVectors() != Y.getNumVectors()*pce_size)
533 Y.getNumVectors()*pce_size));
538 auto xv_s =
X_s->getLocalViewDevice(Tpetra::Access::ReadWrite);
539 auto yv_s =
Y_s->getLocalViewDevice(Tpetra::Access::ReadWrite);
549 auto yv_s =
Y_s->getLocalViewDevice(Tpetra::Access::ReadOnly);
555 return mb_op->hasTransposeApply();
560 typedef Tpetra::MultiVector<base_scalar_type,LocalOrdinal,GlobalOrdinal,Node>
scalar_mv_type;
561 mutable Teuchos::RCP<scalar_mv_type>
X_s,
Y_s;
562 Teuchos::RCP<const scalar_op_type>
mb_op;
Kokkos::View< Scalar *, P... > ViewType
MeanViewType getMeanValues() const
ViewType::size_type size_type
GetMeanValsFunc(const ViewType &vals_)
Sacado::MP::Vector< Storage > Scalar
ViewType::execution_space execution_space
Sacado::UQ::PCE< Storage > Scalar
GetMeanValsFunc(const ViewType &vals_)
ViewType::execution_space execution_space
ViewType::size_type size_type
MeanViewType getMeanValues() const
Kokkos::View< Scalar *, P... > ViewType
Get mean values matrix for mean-based preconditioning.
GetMeanValsFunc(const ViewType &vals)
ViewType::execution_space execution_space
ViewType::size_type size_type
MeanViewType getMeanValues() const
Sacado::MP::Vector< Storage > Scalar
ViewType::execution_space execution_space
ViewType::size_type size_type
Kokkos::View< MeanScalar *, P... > MeanViewType
Kokkos::View< Scalar *, P... > ViewType
MeanViewType getMeanValues() const
Scalar::value_type MeanScalar
GetScalarMeanValsFunc(const ViewType &vals_)
Kokkos::View< MeanScalar *, P... > MeanViewType
Kokkos::View< Scalar *, P... > ViewType
GetScalarMeanValsFunc(const ViewType &vals_)
ViewType::execution_space execution_space
ViewType::size_type size_type
Scalar::value_type MeanScalar
Sacado::UQ::PCE< Storage > Scalar
MeanViewType getMeanValues() const
Get mean values matrix for mean-based preconditioning.
MeanViewType getMeanValues() const
ViewType::execution_space execution_space
GetScalarMeanValsFunc(const ViewType &vals)
ViewType::size_type size_type
Tpetra::Operator< base_scalar_type, LocalOrdinal, GlobalOrdinal, Node > scalar_op_type
virtual Teuchos::RCP< const Tpetra::Map< LocalOrdinal, GlobalOrdinal, Node > > getDomainMap() const
scalar_type::value_type base_scalar_type
GlobalOrdinal global_ordinal_type
Teuchos::RCP< scalar_mv_type > X_s
virtual ~MeanBasedTpetraOperator()
LocalOrdinal local_ordinal_type
Teuchos::RCP< const scalar_op_type > mb_op
Tpetra::MultiVector< base_scalar_type, LocalOrdinal, GlobalOrdinal, Node > scalar_mv_type
virtual Teuchos::RCP< const Tpetra::Map< LocalOrdinal, GlobalOrdinal, Node > > getRangeMap() const
Teuchos::RCP< scalar_mv_type > Y_s
MeanBasedTpetraOperator(const Teuchos::RCP< const scalar_op_type > &mb_op_)
virtual void apply(const Tpetra::MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &X, Tpetra::MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &Y, Teuchos::ETransp mode=Teuchos::NO_TRANS, Scalar alpha=Teuchos::ScalarTraits< Scalar >::one(), Scalar beta=Teuchos::ScalarTraits< Scalar >::zero()) const
virtual bool hasTransposeApply() const
KokkosClassic::DefaultNode::DefaultNodeType Node
Stokhos::StandardStorage< int, double > Storage
KOKKOS_INLINE_FUNCTION constexpr std::enable_if< is_view_uq_pce< View< T, P... > >::value, unsigned >::type dimension_scalar(const View< T, P... > &view)
void deep_copy(const Stokhos::CrsMatrix< ValueType, DstDevice, Layout > &dst, const Stokhos::CrsMatrix< ValueType, SrcDevice, Layout > &src)
Top-level namespace for Stokhos classes and functions.
Teuchos::RCP< Tpetra::CrsMatrix< typename Scalar::value_type, LO, GO, N > > build_mean_scalar_matrix(const Tpetra::CrsMatrix< Scalar, LO, GO, N > &A)
Teuchos::RCP< Tpetra::CrsMatrix< Scalar, LO, GO, N > > build_mean_matrix(const Tpetra::CrsMatrix< Scalar, LO, GO, N > &A)
void copy_scalar_to_pce(const DstView &dst, const SrcView &src)
void copy_pce_to_scalar(const DstView &dst, const SrcView &src)
void impl(const DstView &dst, const SrcView &src)
CopyPCE2Scalar(const DstView &dst, const SrcView &src)
CopyScalar2PCE(const DstView &dst, const SrcView &src)
void impl(const DstView &dst, const SrcView &src)