Stokhos Package Browser (Single Doxygen Collection) Version of the Day
Loading...
Searching...
No Matches
Stokhos_TiledCrsProductTensor.hpp
Go to the documentation of this file.
1// @HEADER
2// ***********************************************************************
3//
4// Stokhos Package
5// Copyright (2009) Sandia Corporation
6//
7// Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
8// license for use of this work by or on behalf of the U.S. Government.
9//
10// Redistribution and use in source and binary forms, with or without
11// modification, are permitted provided that the following conditions are
12// met:
13//
14// 1. Redistributions of source code must retain the above copyright
15// notice, this list of conditions and the following disclaimer.
16//
17// 2. Redistributions in binary form must reproduce the above copyright
18// notice, this list of conditions and the following disclaimer in the
19// documentation and/or other materials provided with the distribution.
20//
21// 3. Neither the name of the Corporation nor the names of the
22// contributors may be used to endorse or promote products derived from
23// this software without specific prior written permission.
24//
25// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36//
37// Questions? Contact Eric T. Phipps (etphipp@sandia.gov).
38//
39// ***********************************************************************
40// @HEADER
41
42#ifndef STOKHOS_TILED_CRS_PRODUCT_TENSOR_HPP
43#define STOKHOS_TILED_CRS_PRODUCT_TENSOR_HPP
44
45#include "Kokkos_Core.hpp"
46
47#include "Stokhos_Multiply.hpp"
51#include "Teuchos_ParameterList.hpp"
52#include "Stokhos_TinyVec.hpp"
53
54
55//----------------------------------------------------------------------------
56//----------------------------------------------------------------------------
57
58namespace Stokhos {
59
60template< typename ValueType, class ExecutionSpace >
62public:
63
64 typedef ExecutionSpace execution_space;
65 typedef int size_type;
66 typedef ValueType value_type;
67
68// Vectorsize used in multiply algorithm
69#if defined(__AVX__)
70 static const size_type host_vectorsize = 32/sizeof(value_type);
71 static const bool use_intrinsics = true;
72#elif defined(__MIC__)
73 static const size_type host_vectorsize = 16;
74 static const bool use_intrinsics = true;
75#else
76 static const size_type host_vectorsize = 2;
77 static const bool use_intrinsics = false;
78#endif
79 static const size_type cuda_vectorsize = 32;
80 static const bool is_cuda =
81#if defined( KOKKOS_ENABLE_CUDA )
82 std::is_same<ExecutionSpace,Kokkos::Cuda>::value;
83#else
84 false ;
85#endif
87
88 // Alignment in terms of number of entries of CRS rows
90
91private:
92
93 typedef Kokkos::LayoutRight layout_type;
94 typedef Kokkos::View< value_type[], execution_space > vec_type;
95 typedef Kokkos::View< size_type[], execution_space > coord_array_type;
96 typedef Kokkos::View< size_type[][2], Kokkos::LayoutLeft, execution_space > coord2_array_type;
97 typedef Kokkos::View< size_type[][3], execution_space > coord_offset_type;
98 typedef Kokkos::View< size_type[][3], execution_space > coord_range_type;
99 typedef Kokkos::View< value_type[], execution_space > value_array_type;
100 typedef Kokkos::View< size_type**, layout_type, execution_space > entry_array_type;
101 typedef Kokkos::View< size_type**, layout_type, execution_space > row_map_array_type;
102 typedef Kokkos::View< size_type[], execution_space > num_row_array_type;
103
118
119public:
120
121 inline
123
124 inline
126 m_coord(),
127 m_coord2(),
130 m_value(),
131 m_num_entry(),
132 m_row_map(),
133 m_num_rows(),
134 m_dimension(0),
135 m_tile_size(0),
136 m_entry_max(0),
138 m_nnz(0),
139 m_flops(0) {}
140
141 inline
143 m_coord( rhs.m_coord ),
144 m_coord2( rhs.m_coord2 ),
147 m_value( rhs.m_value ),
149 m_row_map( rhs.m_row_map ),
150 m_num_rows( rhs.m_num_rows ),
155 m_nnz( rhs.m_nnz ),
156 m_flops( rhs.m_flops ) {}
157
158 inline
160 {
161 m_coord = rhs.m_coord;
162 m_coord2 = rhs.m_coord2;
163 m_coord_offset = rhs.m_coord_offset;
164 m_coord_range = rhs.m_coord_range;
165 m_value = rhs.m_value;
166 m_num_entry = rhs.m_num_entry;
167 m_row_map = rhs.m_row_map;
168 m_num_rows = rhs.m_num_rows;
169 m_dimension = rhs.m_dimension;
170 m_tile_size = rhs.m_tile_size;
171 m_entry_max = rhs.m_entry_max;
172 m_max_num_rows = rhs.m_max_num_rows;
173 m_nnz = rhs.m_nnz;
174 m_flops = rhs.m_flops;
175 return *this;
176 }
177
179 KOKKOS_INLINE_FUNCTION
180 size_type dimension() const { return m_dimension; }
181
183 KOKKOS_INLINE_FUNCTION
185 { return m_coord.extent(0); }
186
188 KOKKOS_INLINE_FUNCTION
190 { return m_entry_max; }
191
193 KOKKOS_INLINE_FUNCTION
195 { return m_max_num_rows; }
196
198 KOKKOS_INLINE_FUNCTION
200 { return m_num_rows(tile); }
201
203 KOKKOS_INLINE_FUNCTION
204 const size_type& entry_begin( size_type tile, size_type i ) const
205 { return m_row_map(tile,i); }
206
208 KOKKOS_INLINE_FUNCTION
210 { return m_row_map(tile,i) + m_num_entry(tile,i); }
211
213 KOKKOS_INLINE_FUNCTION
214 const size_type* row_map_ptr() const
215 { return m_row_map.data(); }
216
218 KOKKOS_INLINE_FUNCTION
219 const size_type& num_entry( size_type tile, size_type i ) const
220 { return m_num_entry(tile,i); }
221
223 KOKKOS_INLINE_FUNCTION
224 const size_type& coord( const size_type entry, const size_type c ) const
225 { return m_coord2( entry, c ); }
226
228 KOKKOS_INLINE_FUNCTION
229 const size_type& coord( const size_type entry ) const
230 { return m_coord( entry ); }
231
233 KOKKOS_INLINE_FUNCTION
234 const value_type & value( const size_type entry ) const
235 { return m_value( entry ); }
236
238 KOKKOS_INLINE_FUNCTION
240 { return m_nnz; }
241
243 KOKKOS_INLINE_FUNCTION
245 { return m_flops; }
246
248 KOKKOS_INLINE_FUNCTION
250 { return m_tile_size; }
251
253 KOKKOS_INLINE_FUNCTION
255 { return m_coord_offset.extent(0); }
256
258 KOKKOS_INLINE_FUNCTION
259 const size_type& offset( const size_type entry, const size_type c ) const
260 { return m_coord_offset( entry, c ); }
261
263 KOKKOS_INLINE_FUNCTION
264 const size_type& range( const size_type entry, const size_type c ) const
265 { return m_coord_range( entry, c ); }
266
267 template <typename OrdinalType>
271 const Teuchos::ParameterList& params)
272 {
273 typedef Stokhos::CijkData<OrdinalType,ValueType> Cijk_Data_type;
274
275 const size_type tile_size = params.get<int>("Tile Size");
276 const size_type max_tiles = params.get<int>("Max Tiles");
277
278 // Build tensor data list
279 Teuchos::ArrayRCP<Cijk_Data_type> coordinate_list =
281
282 // Partition via RCB
283 typedef Stokhos::RCB<Cijk_Data_type> rcb_type;
284 typedef typename rcb_type::Box box_type;
285 rcb_type rcb(tile_size, max_tiles, coordinate_list());
286 Teuchos::RCP< Teuchos::Array< Teuchos::RCP<box_type> > > parts =
287 rcb.get_parts();
288 size_type num_parts = rcb.get_num_parts();
289
290 // Compute number of non-zeros for each row in each part
291 size_type total_num_rows = 0, max_num_rows = 0, entry_count = 0;
292 Teuchos::Array< Teuchos::Array<size_type> > coord_work( num_parts );
293 for (size_type part=0; part<num_parts; ++part) {
294 Teuchos::RCP<box_type> box = (*parts)[part];
295 size_type num_rows = box->delta_x;
296 total_num_rows += num_rows;
298 coord_work[part].resize(num_rows, 0);
299
300 size_type nc = box->coords.size();
301 for (size_type c=0; c<nc; ++c) {
302 size_type i = box->coords[c](0) - box->xmin;
303 ++(coord_work[part][i]);
304 ++entry_count;
305 }
306 }
307
308 // Pad each row to have size divisible by alignment size
309 for (size_type part=0; part<num_parts; ++part) {
310 size_type sz = coord_work[part].size();
311 for ( size_type i = 0; i < sz; ++i ) {
312 const size_t rem = coord_work[part][i] % tensor_align;
313 if (rem > 0) {
314 const size_t pad = tensor_align - rem;
315 coord_work[part][i] += pad;
316 entry_count += pad;
317 }
318 }
319 }
320
321 // Allocate tensor data
323 tensor.m_coord =
324 coord_array_type( "tensor_coord", entry_count );
325 tensor.m_coord2 =
326 coord2_array_type( "tensor_coord2", entry_count );
327 tensor.m_coord_offset =
328 coord_offset_type( "tensor_coord_offset", num_parts );
329 tensor.m_coord_range =
330 coord_range_type( "tensor_coord_range", num_parts );
331 tensor.m_value =
332 value_array_type( "tensor_value", entry_count );
333 tensor.m_num_entry =
334 entry_array_type( "tensor_num_entry", num_parts, max_num_rows );
335 tensor.m_row_map =
336 row_map_array_type( "tensor_row_map", num_parts, max_num_rows+1 );
337 tensor.m_num_rows =
338 num_row_array_type( "tensor_num_rows", num_parts );
339 tensor.m_dimension = basis.size();
340 tensor.m_tile_size = tile_size;
342
343 // Create mirror, is a view if is host memory
344 typename coord_array_type::HostMirror host_coord =
346 typename coord2_array_type::HostMirror host_coord2 =
348 typename coord_offset_type::HostMirror host_coord_offset =
350 typename coord_range_type::HostMirror host_coord_range =
352 typename value_array_type::HostMirror host_value =
354 typename entry_array_type::HostMirror host_num_entry =
356 typename row_map_array_type::HostMirror host_row_map =
358 typename num_row_array_type::HostMirror host_num_rows =
360
361 // Compute row map
362 size_type sum = 0;
363 for (size_type part=0; part<num_parts; ++part) {
364 size_type nc = coord_work[part].size();
365 host_row_map(part,0) = sum;
366 for (size_type t=0; t<nc; ++t) {
367 sum += coord_work[part][t];
368 host_row_map(part,t+1) = sum;
369 }
370 }
371
372 // Copy per part row offsets back into coord_work
373 for (size_type part=0; part<num_parts; ++part) {
374 size_type nc = coord_work[part].size();
375 for (size_type t=0; t<nc; ++t) {
376 coord_work[part][t] = host_row_map(part,t);
377 }
378 }
379
380 // Fill in coordinate and value arrays
381 for (size_type part=0; part<num_parts; ++part) {
382 Teuchos::RCP<box_type> box = (*parts)[part];
383
384 host_coord_offset(part,0) = box->xmin;
385 host_coord_offset(part,1) = box->ymin;
386 host_coord_offset(part,2) = box->zmin;
387
388 host_coord_range(part,0) = box->delta_x;
389 host_coord_range(part,1) = box->delta_y;
390 host_coord_range(part,2) = box->delta_z;
391
392 host_num_rows(part) = coord_work[part].size(); // also == box->delta_x
393
394 size_type nc = box->coords.size();
395 for (size_type t=0; t<nc; ++t) {
396 const size_type i = box->coords[t].i;
397 const size_type j = box->coords[t].j;
398 const size_type k = box->coords[t].k;
399 const value_type c = box->coords[t].c;
400
401 const size_type row = i - box->xmin;
402 const size_type n = coord_work[part][row];
403 ++coord_work[part][row];
404
405 host_value(n) = c;
406 host_coord2(n,0) = j - box->ymin;
407 host_coord2(n,1) = k - box->zmin;
408 host_coord(n) = ( host_coord2(n,1) << 16 ) | host_coord2(n,0);
409
410 ++host_num_entry(part,row);
411 ++tensor.m_nnz;
412 }
413 }
414
415 // Copy data to device if necessary
416 Kokkos::deep_copy( tensor.m_coord, host_coord );
417 Kokkos::deep_copy( tensor.m_coord2, host_coord2 );
418 Kokkos::deep_copy( tensor.m_coord_offset, host_coord_offset );
419 Kokkos::deep_copy( tensor.m_coord_range, host_coord_range );
420 Kokkos::deep_copy( tensor.m_value, host_value );
421 Kokkos::deep_copy( tensor.m_num_entry, host_num_entry );
422 Kokkos::deep_copy( tensor.m_row_map, host_row_map );
423 Kokkos::deep_copy( tensor.m_num_rows, host_num_rows );
424
425 tensor.m_entry_max = 0;
426 tensor.m_flops = 0;
427 for (size_type part=0; part<num_parts; ++part) {
428 for ( size_type i = 0; i < host_num_rows(part); ++i ) {
429 tensor.m_entry_max = std::max( tensor.m_entry_max,
430 host_num_entry(part,i) );
431 tensor.m_flops += 5*host_num_entry(part,i) + 1;
432 }
433 }
434
435 return tensor;
436 }
437};
438
439template< class Device, typename OrdinalType, typename ValueType >
440TiledCrsProductTensor<ValueType, Device>
444 const Teuchos::ParameterList& params)
445{
447 basis, Cijk, params );
448}
449
450template < typename ValueType, typename Device >
451class BlockMultiply< TiledCrsProductTensor< ValueType , Device > >
452{
453public:
454
455 typedef typename Device::size_type size_type ;
456 typedef TiledCrsProductTensor< ValueType , Device > tensor_type ;
457
458 template< typename MatrixValue , typename VectorValue >
459 KOKKOS_INLINE_FUNCTION
460 static void apply( const tensor_type & tensor ,
461 const MatrixValue * const a ,
462 const VectorValue * const x ,
463 VectorValue * const y )
464 {
465 const size_type block_size = 2;
466 typedef TinyVec<ValueType,block_size,false> TV;
467
468 const size_type n_tile = tensor.num_tiles();
469
470 for ( size_type tile = 0 ; tile < n_tile ; ++tile ) {
471
472 const size_type i_offset = tensor.offset(tile, 0);
473 const size_type j_offset = tensor.offset(tile, 1);
474 const size_type k_offset = tensor.offset(tile, 2);
475
476 const size_type n_row = tensor.num_rows(tile);
477
478 for ( size_type i = 0 ; i < n_row ; ++i ) {
479
480 const size_type nEntry = tensor.num_entry(tile,i);
481 const size_type iEntryBeg = tensor.entry_begin(tile,i);
482 const size_type iEntryEnd = iEntryBeg + nEntry;
483 size_type iEntry = iEntryBeg;
484
485 VectorValue ytmp = 0 ;
486
487 // Do entries with a blocked loop of size block_size
488 if (block_size > 1) {
489 const size_type nBlock = nEntry / block_size;
490 const size_type nEntryB = nBlock * block_size;
491 const size_type iEnd = iEntryBeg + nEntryB;
492
493 TV vy;
494 vy.zero();
495 int j[block_size], k[block_size];
496
497 for ( ; iEntry < iEnd ; iEntry += block_size ) {
498
499 for (size_type ii=0; ii<block_size; ++ii) {
500 j[ii] = tensor.coord(iEntry+ii,0) + j_offset;
501 k[ii] = tensor.coord(iEntry+ii,1) + k_offset;
502 }
503 TV aj(a, j), ak(a, k), xj(x, j), xk(x, k),
504 c(&(tensor.value(iEntry)));
505
506 // vy += c * ( aj * xk + ak * xj)
507 aj.times_equal(xk);
508 ak.times_equal(xj);
509 aj.plus_equal(ak);
510 c.times_equal(aj);
511 vy.plus_equal(c);
512
513 }
514
515 ytmp += vy.sum();
516 }
517
518 // Do remaining entries with a scalar loop
519 for ( ; iEntry<iEntryEnd; ++iEntry) {
520 const size_type j = tensor.coord(iEntry,0) + j_offset;
521 const size_type k = tensor.coord(iEntry,1) + k_offset;
522
523 ytmp += tensor.value(iEntry) * ( a[j] * x[k] + a[k] * x[j] );
524 }
525
526 y[i+i_offset] += ytmp ;
527 //y[i] += ytmp ;
528 }
529 }
530 }
531
532 KOKKOS_INLINE_FUNCTION
533 static size_type matrix_size( const tensor_type & tensor )
534 { return tensor.dimension(); }
535
536 KOKKOS_INLINE_FUNCTION
537 static size_type vector_size( const tensor_type & tensor )
538 { return tensor.dimension(); }
539};
540
541} /* namespace Stokhos */
542
543//----------------------------------------------------------------------------
544//----------------------------------------------------------------------------
545
546#endif /* #ifndef STOKHOS_TILED_CRS_PRODUCT_TENSOR_HPP */
static KOKKOS_INLINE_FUNCTION void apply(const tensor_type &tensor, const MatrixValue *const a, const VectorValue *const x, VectorValue *const y)
static KOKKOS_INLINE_FUNCTION size_type matrix_size(const tensor_type &tensor)
static KOKKOS_INLINE_FUNCTION size_type vector_size(const tensor_type &tensor)
virtual ordinal_type size() const =0
Return total size of basis.
Abstract base class for multivariate orthogonal polynomials generated from tensor products of univari...
Data structure storing a sparse 3-tensor C(i,j,k) in a a compressed format.
KOKKOS_INLINE_FUNCTION size_type num_flops() const
Number flop's per multiply-add.
Kokkos::View< value_type[], execution_space > value_array_type
KOKKOS_INLINE_FUNCTION const size_type & range(const size_type entry, const size_type c) const
Coordinate range.
Kokkos::View< size_type[][3], execution_space > coord_range_type
KOKKOS_INLINE_FUNCTION size_type entry_end(size_type tile, size_type i) const
End entries with a coordinate 'i'.
KOKKOS_INLINE_FUNCTION const size_type & coord(const size_type entry, const size_type c) const
Coordinates of an entry.
KOKKOS_INLINE_FUNCTION size_type num_tiles() const
Number tiles.
KOKKOS_INLINE_FUNCTION size_type num_rows(size_type tile) const
Number of rows in given tile.
KOKKOS_INLINE_FUNCTION size_type max_num_rows() const
Maximum number of rows in any tile.
Kokkos::View< size_type **, layout_type, execution_space > row_map_array_type
KOKKOS_INLINE_FUNCTION const size_type & entry_begin(size_type tile, size_type i) const
Begin entries with a coordinate 'i'.
Kokkos::View< size_type[], execution_space > coord_array_type
KOKKOS_INLINE_FUNCTION size_type dimension() const
Dimension of the tensor.
KOKKOS_INLINE_FUNCTION const size_type & coord(const size_type entry) const
Coordinates of an entry.
KOKKOS_INLINE_FUNCTION const size_type * row_map_ptr() const
Return row_map ptr.
KOKKOS_INLINE_FUNCTION size_type tile_size() const
Number tiles.
KOKKOS_INLINE_FUNCTION size_type entry_maximum() const
Maximum sparse entries for any coordinate.
KOKKOS_INLINE_FUNCTION const size_type & num_entry(size_type tile, size_type i) const
Number of entries with a coordinate 'i'.
KOKKOS_INLINE_FUNCTION const size_type & offset(const size_type entry, const size_type c) const
Coordinate offset.
Kokkos::View< size_type **, layout_type, execution_space > entry_array_type
Kokkos::View< size_type[], execution_space > num_row_array_type
Kokkos::View< size_type[][2], Kokkos::LayoutLeft, execution_space > coord2_array_type
Kokkos::View< size_type[][3], execution_space > coord_offset_type
KOKKOS_INLINE_FUNCTION size_type entry_count() const
Number of sparse entries.
static TiledCrsProductTensor create(const Stokhos::ProductBasis< OrdinalType, ValueType > &basis, const Stokhos::Sparse3Tensor< OrdinalType, ValueType > &Cijk, const Teuchos::ParameterList &params)
KOKKOS_INLINE_FUNCTION size_type num_non_zeros() const
Number of non-zero's.
TiledCrsProductTensor(const TiledCrsProductTensor &rhs)
TiledCrsProductTensor & operator=(const TiledCrsProductTensor &rhs)
Kokkos::View< value_type[], execution_space > vec_type
KOKKOS_INLINE_FUNCTION const value_type & value(const size_type entry) const
Value of an entry.
void deep_copy(const Stokhos::CrsMatrix< ValueType, DstDevice, Layout > &dst, const Stokhos::CrsMatrix< ValueType, SrcDevice, Layout > &src)
Stokhos::CrsMatrix< ValueType, Device, Layout >::HostMirror create_mirror_view(const Stokhos::CrsMatrix< ValueType, Device, Layout > &A)
Top-level namespace for Stokhos classes and functions.
TiledCrsProductTensor< ValueType, Device > create_tiled_product_tensor(const Stokhos::ProductBasis< OrdinalType, ValueType > &basis, const Stokhos::Sparse3Tensor< OrdinalType, ValueType > &Cijk, const Teuchos::ParameterList &params)
Teuchos::ArrayRCP< CijkData< ordinal_type, scalar_type > > build_cijk_coordinate_list(const Sparse3Tensor< ordinal_type, scalar_type > &Cijk, CijkSymmetryType symmetry_type)