Stokhos Package Browser (Single Doxygen Collection) Version of the Day
Loading...
Searching...
No Matches
Tpetra_KokkosRefactor_Details_MultiVectorDistObjectKernels_MP_Vector.hpp
Go to the documentation of this file.
1/*
2// @HEADER
3// ***********************************************************************
4//
5// Tpetra: Templated Linear Algebra Services Package
6// Copyright (2008) Sandia Corporation
7//
8// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
9// the U.S. Government retains certain rights in this software.
10//
11// Redistribution and use in source and binary forms, with or without
12// modification, are permitted provided that the following conditions are
13// met:
14//
15// 1. Redistributions of source code must retain the above copyright
16// notice, this list of conditions and the following disclaimer.
17//
18// 2. Redistributions in binary form must reproduce the above copyright
19// notice, this list of conditions and the following disclaimer in the
20// documentation and/or other materials provided with the distribution.
21//
22// 3. Neither the name of the Corporation nor the names of the
23// contributors may be used to endorse or promote products derived from
24// this software without specific prior written permission.
25//
26// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
27// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
30// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
31// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
32// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
33// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
34// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
35// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
36// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37//
38// Questions? Contact Michael A. Heroux (maherou@sandia.gov)
39//
40// ************************************************************************
41// @HEADER
42*/
43
44#ifndef TPETRA_KOKKOS_REFACTOR_DETAILS_MULTI_VECTOR_DIST_OBJECT_KERNELS_MP_VECTOR_HPP
45#define TPETRA_KOKKOS_REFACTOR_DETAILS_MULTI_VECTOR_DIST_OBJECT_KERNELS_MP_VECTOR_HPP
46
47//----------------------------------------------------------------------------
48// Specializations of Tpetra::MultiVector pack/unpack kernels for MP::Vector
49//----------------------------------------------------------------------------
50
51#include "Tpetra_KokkosRefactor_Details_MultiVectorDistObjectKernels.hpp"
54
55namespace Tpetra {
56namespace KokkosRefactor {
57namespace Details {
58
59 // Functors for implementing packAndPrepare and unpackAndCombine
60 // through parallel_for
61
62 template <typename DstView, typename SrcView, typename IdxView>
63 struct PackArraySingleColumn<
64 DstView, SrcView, IdxView,
65 typename std::enable_if< Kokkos::is_view_mp_vector<DstView>::value &&
66 Kokkos::is_view_mp_vector<SrcView>::value >::type >
67 {
68 typedef typename DstView::execution_space execution_space;
69 typedef typename execution_space::size_type size_type;
70
71 DstView dst;
72 SrcView src;
73 IdxView idx;
74 size_t col;
75
76 PackArraySingleColumn(const DstView& dst_,
77 const SrcView& src_,
78 const IdxView& idx_,
79 size_t col_) :
80 dst(dst_), src(src_), idx(idx_), col(col_) {}
81
82 KOKKOS_INLINE_FUNCTION
83 void operator()( const size_type k ) const {
84 dst(k) = src(idx(k), col);
85 }
86
87 KOKKOS_INLINE_FUNCTION
88 void operator()( const size_type k, const size_type tidx ) const {
89 dst(k).fastAccessCoeff(tidx) = src(idx(k), col).fastAccessCoeff(tidx);
90 }
91
92 static void pack(const DstView& dst,
93 const SrcView& src,
94 const IdxView& idx,
95 size_t col) {
96 Kokkos::parallel_for(
98 idx.size(), Kokkos::dimension_scalar(dst) ),
99 PackArraySingleColumn(dst,src,idx,col) );
100 }
101 };
102
103 template <typename DstView, typename SrcView, typename IdxView>
104 struct PackArrayMultiColumn<
105 DstView, SrcView, IdxView,
106 typename std::enable_if< Kokkos::is_view_mp_vector<DstView>::value &&
107 Kokkos::is_view_mp_vector<SrcView>::value >::type >
108 {
109 typedef typename DstView::execution_space execution_space;
110 typedef typename execution_space::size_type size_type;
111
112 DstView dst;
113 SrcView src;
114 IdxView idx;
115 size_t numCols;
116
117 PackArrayMultiColumn(const DstView& dst_,
118 const SrcView& src_,
119 const IdxView& idx_,
120 size_t numCols_) :
121 dst(dst_), src(src_), idx(idx_), numCols(numCols_) {}
122
123 KOKKOS_INLINE_FUNCTION
124 void operator()( const size_type k ) const {
125 const typename IdxView::value_type localRow = idx(k);
126 const size_t offset = k*numCols;
127 for (size_t j = 0; j < numCols; ++j)
128 dst(offset + j) = src(localRow, j);
129 }
130
131 KOKKOS_INLINE_FUNCTION
132 void operator()( const size_type k, const size_type tidx ) const {
133 const typename IdxView::value_type localRow = idx(k);
134 const size_t offset = k*numCols;
135 for (size_t j = 0; j < numCols; ++j)
136 dst(offset + j).fastAccessCoeff(tidx) =
137 src(localRow, j).fastAccessCoeff(tidx);
138 }
139
140 static void pack(const DstView& dst,
141 const SrcView& src,
142 const IdxView& idx,
143 size_t numCols) {
144 Kokkos::parallel_for(
146 PackArrayMultiColumn(dst,src,idx,numCols) );
147 }
148 };
149
150 template <typename DstView, typename SrcView, typename IdxView,
151 typename ColView>
152 struct PackArrayMultiColumnVariableStride<
153 DstView, SrcView, IdxView, ColView,
154 typename std::enable_if< Kokkos::is_view_mp_vector<DstView>::value &&
155 Kokkos::is_view_mp_vector<SrcView>::value >::type >
156 {
157 typedef typename DstView::execution_space execution_space;
158 typedef typename execution_space::size_type size_type;
159
160 DstView dst;
161 SrcView src;
162 IdxView idx;
163 ColView col;
164 size_t numCols;
165
167 const SrcView& src_,
168 const IdxView& idx_,
169 const ColView& col_,
170 size_t numCols_) :
171 dst(dst_), src(src_), idx(idx_), col(col_), numCols(numCols_) {}
172
173 KOKKOS_INLINE_FUNCTION
174 void operator()( const size_type k ) const {
175 const typename IdxView::value_type localRow = idx(k);
176 const size_t offset = k*numCols;
177 for (size_t j = 0; j < numCols; ++j)
178 dst(offset + j) = src(localRow, col(j));
179 }
180
181 KOKKOS_INLINE_FUNCTION
182 void operator()( const size_type k, const size_type tidx ) const {
183 const typename IdxView::value_type localRow = idx(k);
184 const size_t offset = k*numCols;
185 for (size_t j = 0; j < numCols; ++j)
186 dst(offset + j).fastAccessCoeff(tidx) =
187 src(localRow, col(j)).fastAccessCoeff(tidx);
188 }
189
190 static void pack(const DstView& dst,
191 const SrcView& src,
192 const IdxView& idx,
193 const ColView& col,
194 size_t numCols) {
195 Kokkos::parallel_for(
197 idx.size(), Kokkos::dimension_scalar(dst) ),
198 PackArrayMultiColumnVariableStride(dst,src,idx,col,numCols) );
199 }
200 };
201
202 template <typename ExecutionSpace,
203 typename DstView,
204 typename SrcView,
205 typename IdxView,
206 typename Op>
207 struct UnpackArrayMultiColumn<
208 ExecutionSpace, DstView, SrcView, IdxView, Op,
209 typename std::enable_if< Kokkos::is_view_mp_vector<DstView>::value &&
210 Kokkos::is_view_mp_vector<SrcView>::value >::type >
211 {
212 typedef typename ExecutionSpace::execution_space execution_space;
213 typedef typename execution_space::size_type size_type;
214
215 DstView dst;
216 SrcView src;
217 IdxView idx;
218 Op op;
219 size_t numCols;
220
221 UnpackArrayMultiColumn (const ExecutionSpace& /* execSpace */,
222 const DstView& dst_,
223 const SrcView& src_,
224 const IdxView& idx_,
225 const Op& op_,
226 const size_t numCols_) :
227 dst (dst_),
228 src (src_),
229 idx (idx_),
230 op (op_),
231 numCols (numCols_)
232 {}
233
234 template <class TagType>
235 KOKKOS_INLINE_FUNCTION void
236 operator() (TagType tag, const size_type k) const
237 {
238 const typename IdxView::value_type localRow = idx(k);
239 const size_t offset = k*numCols;
240 for (size_t j = 0; j < numCols; ++j) {
241 op (tag, dst(localRow, j), src(offset+j));
242 }
243 }
244
245 template <class TagType>
246 KOKKOS_INLINE_FUNCTION void
247 operator() (TagType tag, const size_type k, const size_type tidx) const
248 {
249 const typename IdxView::value_type localRow = idx(k);
250 const size_t offset = k*numCols;
251 for (size_t j = 0; j < numCols; ++j) {
252 op (tag, dst(localRow, j).fastAccessCoeff(tidx),
253 src(offset+j).fastAccessCoeff(tidx));
254 }
255 }
256
257 static void
258 unpack (const ExecutionSpace& execSpace,
259 const DstView& dst,
260 const SrcView& src,
261 const IdxView& idx,
262 const Op& op,
263 const size_t numCols,
264 const bool use_atomic_updates)
265 {
266 if (use_atomic_updates) {
267 Kokkos::parallel_for
268 ("Tpetra::MultiVector (Sacado::MP::Vector) unpack (constant stride)",
270 idx.size (), Kokkos::dimension_scalar (dst)),
271 UnpackArrayMultiColumn (execSpace, dst, src, idx, op, numCols));
272 }
273 else {
274 Kokkos::parallel_for
275 ("Tpetra::MultiVector (Sacado::MP::Vector) unpack (constant stride)",
277 idx.size (), Kokkos::dimension_scalar (dst)),
278 UnpackArrayMultiColumn (execSpace, dst, src, idx, op, numCols));
279 }
280 }
281 };
282
283 template <typename ExecutionSpace,
284 typename DstView,
285 typename SrcView,
286 typename IdxView,
287 typename ColView,
288 typename Op>
289 struct UnpackArrayMultiColumnVariableStride<
290 ExecutionSpace, DstView, SrcView, IdxView, ColView, Op,
291 typename std::enable_if< Kokkos::is_view_mp_vector<DstView>::value &&
292 Kokkos::is_view_mp_vector<SrcView>::value >::type >
293 {
294 typedef typename ExecutionSpace::execution_space execution_space;
295 typedef typename execution_space::size_type size_type;
296
297 DstView dst;
298 SrcView src;
299 IdxView idx;
300 ColView col;
301 Op op;
302 size_t numCols;
303
304 UnpackArrayMultiColumnVariableStride (const ExecutionSpace& /* execSpace */,
305 const DstView& dst_,
306 const SrcView& src_,
307 const IdxView& idx_,
308 const ColView& col_,
309 const Op& op_,
310 const size_t numCols_) :
311 dst (dst_),
312 src (src_),
313 idx (idx_),
314 col (col_),
315 op (op_),
316 numCols (numCols_)
317 {}
318
319 template <class TagType>
320 KOKKOS_INLINE_FUNCTION void
321 operator() (TagType tag, const size_type k) const
322 {
323 const typename IdxView::value_type localRow = idx(k);
324 const size_t offset = k*numCols;
325 for (size_t j = 0; j < numCols; ++j) {
326 op (tag, dst(localRow, col(j)), src(offset+j));
327 }
328 }
329
330 template <class TagType>
331 KOKKOS_INLINE_FUNCTION void
332 operator() (TagType tag, const size_type k, const size_type tidx) const
333 {
334 const typename IdxView::value_type localRow = idx(k);
335 const size_t offset = k*numCols;
336 for (size_t j = 0; j < numCols; ++j) {
337 op (tag, dst(localRow, col(j)).fastAccessCoeff(tidx),
338 src(offset+j).fastAccessCoeff(tidx));
339 }
340 }
341
342 static void
343 unpack (const ExecutionSpace& execSpace,
344 const DstView& dst,
345 const SrcView& src,
346 const IdxView& idx,
347 const ColView& col,
348 const Op& op,
349 const size_t numCols,
350 const bool use_atomic_updates)
351 {
352 if (use_atomic_updates) {
353 Kokkos::parallel_for
354 ("Tpetra::MultiVector unpack (Sacado::MP::Vector) (nonconstant stride)",
356 idx.size (), Kokkos::dimension_scalar (dst)),
357 UnpackArrayMultiColumnVariableStride (execSpace, dst, src, idx, col, op, numCols));
358 }
359 else {
360 Kokkos::parallel_for
361 ("Tpetra::MultiVector unpack (Sacado::MP::Vector) (nonconstant stride)",
363 idx.size (), Kokkos::dimension_scalar (dst)),
364 UnpackArrayMultiColumnVariableStride (execSpace, dst, src, idx, col, op, numCols));
365 }
366 }
367 };
368
369 template <typename DstView, typename SrcView,
370 typename DstIdxView, typename SrcIdxView, typename Op>
371 struct PermuteArrayMultiColumn<
372 DstView, SrcView, DstIdxView, SrcIdxView, Op,
373 typename std::enable_if< Kokkos::is_view_mp_vector<DstView>::value &&
374 Kokkos::is_view_mp_vector<SrcView>::value >::type >
375 {
376 typedef typename DstView::execution_space execution_space;
377 typedef typename execution_space::size_type size_type;
378
379 DstView dst;
380 SrcView src;
381 DstIdxView dst_idx;
382 SrcIdxView src_idx;
383 size_t numCols;
384 Op op;
385
386 PermuteArrayMultiColumn(const DstView& dst_,
387 const SrcView& src_,
388 const DstIdxView& dst_idx_,
389 const SrcIdxView& src_idx_,
390 size_t numCols_, const Op& op_) :
391 dst(dst_), src(src_), dst_idx(dst_idx_), src_idx(src_idx_),
392 numCols(numCols_), op(op_) {}
393
394 KOKKOS_INLINE_FUNCTION
395 void operator()( const size_type k ) const {
396 const typename DstIdxView::value_type toRow = dst_idx(k);
397 const typename SrcIdxView::value_type fromRow = src_idx(k);
398 nonatomic_tag tag; // permute does not need atomics
399 for (size_t j = 0; j < numCols; ++j)
400 op(tag, dst(toRow, j),src(fromRow, j));
401 }
402
403 KOKKOS_INLINE_FUNCTION
404 void operator()( const size_type k, const size_type tidx ) const {
405 const typename DstIdxView::value_type toRow = dst_idx(k);
406 const typename SrcIdxView::value_type fromRow = src_idx(k);
407 nonatomic_tag tag; // permute does not need atomics
408 for (size_t j = 0; j < numCols; ++j)
409 op(tag, dst(toRow, j).fastAccessCoeff(tidx),
410 src(fromRow, j).fastAccessCoeff(tidx));
411 }
412
413 static void permute(const DstView& dst,
414 const SrcView& src,
415 const DstIdxView& dst_idx,
416 const SrcIdxView& src_idx,
417 size_t numCols,
418 const Op& op) {
419 const size_type n = std::min( dst_idx.size(), src_idx.size() );
420 Kokkos::parallel_for(
422 n, Kokkos::dimension_scalar(dst) ),
423 PermuteArrayMultiColumn(dst,src,dst_idx,src_idx,numCols,op) );
424 }
425 };
426
427 template <typename DstView, typename SrcView,
428 typename DstIdxView, typename SrcIdxView,
429 typename DstColView, typename SrcColView, typename Op>
430 struct PermuteArrayMultiColumnVariableStride<
431 DstView, SrcView, DstIdxView, SrcIdxView, DstColView, SrcColView, Op,
432 typename std::enable_if< Kokkos::is_view_mp_vector<DstView>::value &&
433 Kokkos::is_view_mp_vector<SrcView>::value >::type >
434 {
435 typedef typename DstView::execution_space execution_space;
436 typedef typename execution_space::size_type size_type;
437
438 DstView dst;
439 SrcView src;
440 DstIdxView dst_idx;
441 SrcIdxView src_idx;
442 DstColView dst_col;
443 SrcColView src_col;
444 size_t numCols;
445 Op op;
446
448 const SrcView& src_,
449 const DstIdxView& dst_idx_,
450 const SrcIdxView& src_idx_,
451 const DstColView& dst_col_,
452 const SrcColView& src_col_,
453 size_t numCols_,
454 const Op& op_) :
455 dst(dst_), src(src_), dst_idx(dst_idx_), src_idx(src_idx_),
456 dst_col(dst_col_), src_col(src_col_),
457 numCols(numCols_), op(op_) {}
458
459 KOKKOS_INLINE_FUNCTION
460 void operator()( const size_type k ) const {
461 const typename DstIdxView::value_type toRow = dst_idx(k);
462 const typename SrcIdxView::value_type fromRow = src_idx(k);
463 nonatomic_tag tag; // permute does not need atomics
464 for (size_t j = 0; j < numCols; ++j)
465 op(tag, dst(toRow, dst_col(j)),src(fromRow, src_col(j)));
466 }
467
468 KOKKOS_INLINE_FUNCTION
469 void operator()( const size_type k, const size_type tidx ) const {
470 const typename DstIdxView::value_type toRow = dst_idx(k);
471 const typename SrcIdxView::value_type fromRow = src_idx(k);
472 nonatomic_tag tag; // permute does not need atomics
473 for (size_t j = 0; j < numCols; ++j)
474 op(tag, dst(toRow, dst_col(j)).fastAccessCoeff(tidx),
475 src(fromRow, src_col(j)).fastAccessCoeff(tidx));
476 }
477
478 static void permute(const DstView& dst,
479 const SrcView& src,
480 const DstIdxView& dst_idx,
481 const SrcIdxView& src_idx,
482 const DstColView& dst_col,
483 const SrcColView& src_col,
484 size_t numCols,
485 const Op& op) {
486 const size_type n = std::min( dst_idx.size(), src_idx.size() );
487 Kokkos::parallel_for(
489 n, Kokkos::dimension_scalar(dst) ),
490 PermuteArrayMultiColumnVariableStride(
491 dst,src,dst_idx,src_idx,dst_col,src_col,numCols,op) );
492 }
493 };
494
495} // Details namespace
496} // KokkosRefactor namespace
497} // Tpetra namespace
498
499#endif // TPETRA_KOKKOS_REFACTOR_DETAILS_MULTI_VECTOR_DIST_OBJECT_KERNELS_MP_VECTOR_HPP
expr1 expr1 expr1 expr2 expr1 expr1 c expr2 expr1 c fastAccessCoeff(j) - expr2.val(j)
KOKKOS_INLINE_FUNCTION constexpr std::enable_if< is_view_uq_pce< View< T, P... > >::value, unsigned >::type dimension_scalar(const View< T, P... > &view)
Team-based parallel work configuration for Sacado::MP::Vector.