Stokhos Package Browser (Single Doxygen Collection) Version of the Day
Loading...
Searching...
No Matches
TestMeanMultiply.hpp
Go to the documentation of this file.
1// @HEADER
2// ***********************************************************************
3//
4// Stokhos Package
5// Copyright (2009) Sandia Corporation
6//
7// Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
8// license for use of this work by or on behalf of the U.S. Government.
9//
10// Redistribution and use in source and binary forms, with or without
11// modification, are permitted provided that the following conditions are
12// met:
13//
14// 1. Redistributions of source code must retain the above copyright
15// notice, this list of conditions and the following disclaimer.
16//
17// 2. Redistributions in binary form must reproduce the above copyright
18// notice, this list of conditions and the following disclaimer in the
19// documentation and/or other materials provided with the distribution.
20//
21// 3. Neither the name of the Corporation nor the names of the
22// contributors may be used to endorse or promote products derived from
23// this software without specific prior written permission.
24//
25// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36//
37// Questions? Contact Eric T. Phipps (etphipp@sandia.gov).
38//
39// ***********************************************************************
40// @HEADER
41
42#include <iostream>
43
44// PCE scalar type
46
47// Kokkos CrsMatrix
48#include "KokkosSparse_CrsMatrix.hpp"
49#include "KokkosSparse_spmv.hpp"
52
53// Stokhos
57
58// Utilities
59#include "Kokkos_Timer.hpp"
60
61template< typename IntType >
62inline
63IntType map_fem_graph_coord( const IntType & N ,
64 const IntType & i ,
65 const IntType & j ,
66 const IntType & k )
67{
68 return k + N * ( j + N * i );
69}
70
71inline
72size_t generate_fem_graph( size_t N ,
73 std::vector< std::vector<size_t> > & graph )
74{
75 graph.resize( N * N * N , std::vector<size_t>() );
76
77 size_t total = 0 ;
78
79 for ( int i = 0 ; i < (int) N ; ++i ) {
80 for ( int j = 0 ; j < (int) N ; ++j ) {
81 for ( int k = 0 ; k < (int) N ; ++k ) {
82
83 const size_t row = map_fem_graph_coord((int)N,i,j,k);
84
85 graph[row].reserve(27);
86
87 for ( int ii = -1 ; ii < 2 ; ++ii ) {
88 for ( int jj = -1 ; jj < 2 ; ++jj ) {
89 for ( int kk = -1 ; kk < 2 ; ++kk ) {
90 if ( 0 <= i + ii && i + ii < (int) N &&
91 0 <= j + jj && j + jj < (int) N &&
92 0 <= k + kk && k + kk < (int) N ) {
93 size_t col = map_fem_graph_coord((int)N,i+ii,j+jj,k+kk);
94
95 graph[row].push_back(col);
96 }
97 }}}
98 total += graph[row].size();
99 }}}
100
101 return total ;
102}
103
104template <typename ScalarType, typename OrdinalType, typename Device>
105void
106test_mean_multiply(const OrdinalType order,
107 const OrdinalType dim,
108 const OrdinalType nGrid,
109 const OrdinalType iterCount,
110 std::vector<double>& scalar_perf,
111 std::vector<double>& block_left_perf,
112 std::vector<double>& block_right_perf,
113 std::vector<double>& pce_perf,
114 std::vector<double>& block_pce_perf)
115{
116 typedef ScalarType value_type;
117 typedef OrdinalType ordinal_type;
118 typedef Device execution_space;
119
122
123 typedef Kokkos::View< value_type*, Kokkos::LayoutLeft, execution_space > scalar_vector_type;
124 typedef Kokkos::View< value_type**, Kokkos::LayoutLeft, execution_space > scalar_left_multi_vector_type;
125 typedef Kokkos::View< value_type**, Kokkos::LayoutRight, execution_space > scalar_right_multi_vector_type;
126 typedef Kokkos::View< pce_type*, Kokkos::LayoutLeft, execution_space > pce_vector_type;
127 typedef Kokkos::View< pce_type**, Kokkos::LayoutLeft, execution_space > pce_multi_vector_type;
128
129 typedef KokkosSparse::CrsMatrix< value_type, ordinal_type, execution_space > scalar_matrix_type;
130 typedef KokkosSparse::CrsMatrix< pce_type, ordinal_type, execution_space > pce_matrix_type;
131 typedef typename scalar_matrix_type::StaticCrsGraphType matrix_graph_type;
132 typedef typename scalar_matrix_type::values_type scalar_matrix_values_type;
133 typedef typename pce_matrix_type::values_type pce_matrix_values_type;
134
140 typedef typename pce_type::cijk_type kokkos_cijk_type;
141
142 using Teuchos::rcp;
143 using Teuchos::RCP;
144 using Teuchos::Array;
145
146 // Number of columns for PCE multi-vector apply
147 const ordinal_type num_pce_col = 5;
148
149 // Create Stochastic Galerkin basis and expansion
150 Array< RCP<const abstract_basis_type> > bases(dim);
151 for (ordinal_type i=0; i<dim; ++i) {
152 bases[i] = Teuchos::rcp(new basis_type(order, true));
153 }
154 RCP<const product_basis_type> basis = rcp(new product_basis_type(bases));
155 RCP<cijk_type> cijk = basis->computeTripleProductTensor();
156 kokkos_cijk_type kokkos_cijk =
157 Stokhos::create_product_tensor<execution_space>(*basis, *cijk);
158 Kokkos::setGlobalCijkTensor(kokkos_cijk);
159
160 //------------------------------
161 // Generate graph for "FEM" box structure:
162
163 std::vector< std::vector<size_t> > fem_graph;
164 const size_t fem_length = nGrid * nGrid * nGrid;
165 const size_t graph_length = generate_fem_graph( nGrid , fem_graph );
166
167 //------------------------------
168 // Generate input vectors:
169
170 ordinal_type pce_size = basis->size();
171 scalar_left_multi_vector_type xl(Kokkos::ViewAllocateWithoutInitializing("scalar left x"), fem_length, pce_size);
172 scalar_left_multi_vector_type yl(Kokkos::ViewAllocateWithoutInitializing("scalar right y"), fem_length, pce_size);
173 scalar_right_multi_vector_type xr(Kokkos::ViewAllocateWithoutInitializing("scalar right x"), fem_length, pce_size);
174 scalar_right_multi_vector_type yr(Kokkos::ViewAllocateWithoutInitializing("scalar right y"), fem_length, pce_size);
175 std::vector<scalar_vector_type> x_col(pce_size), y_col(pce_size);
176 for (ordinal_type i=0; i<pce_size; ++i) {
177 x_col[i] = scalar_vector_type (Kokkos::ViewAllocateWithoutInitializing("scalar x col"), fem_length);
178 y_col[i] = scalar_vector_type(Kokkos::ViewAllocateWithoutInitializing("scalar y col"), fem_length);
179 Kokkos::deep_copy( x_col[i] , value_type(1.0) );
180 Kokkos::deep_copy( y_col[i] , value_type(0.0) );
181 }
182 pce_vector_type x_pce =
183 Kokkos::make_view<pce_vector_type>(Kokkos::ViewAllocateWithoutInitializing("pce x"),
184 kokkos_cijk, fem_length, pce_size);
185 pce_vector_type y_pce =
186 Kokkos::make_view<pce_vector_type>(Kokkos::ViewAllocateWithoutInitializing("pce y"),
187 kokkos_cijk, fem_length, pce_size);
188 pce_multi_vector_type x_multi_pce =
189 Kokkos::make_view<pce_multi_vector_type>(Kokkos::ViewAllocateWithoutInitializing("pce multi x"),
190 kokkos_cijk, fem_length,
191 num_pce_col, pce_size);
192 pce_multi_vector_type y_multi_pce =
193 Kokkos::make_view<pce_multi_vector_type>(Kokkos::ViewAllocateWithoutInitializing("pce multi y"),
194 kokkos_cijk, fem_length,
195 num_pce_col, pce_size);
196
197 Kokkos::deep_copy( xl , value_type(1.0) );
198 Kokkos::deep_copy( yl , value_type(0.0) );
199 Kokkos::deep_copy( xr , value_type(1.0) );
200 Kokkos::deep_copy( yr , value_type(0.0) );
201 Kokkos::deep_copy( x_pce , value_type(1.0) );
202 Kokkos::deep_copy( y_pce , value_type(0.0) );
203 Kokkos::deep_copy( x_multi_pce , value_type(1.0) );
204 Kokkos::deep_copy( y_multi_pce , value_type(0.0) );
205
206 //------------------------------
207 // Generate matrix
208
209 matrix_graph_type matrix_graph =
210 Kokkos::create_staticcrsgraph<matrix_graph_type>(
211 std::string("test crs graph"), fem_graph);
212 scalar_matrix_values_type scalar_matrix_values =
213 scalar_matrix_values_type(Kokkos::ViewAllocateWithoutInitializing("scalar matrix"), graph_length);
214 pce_matrix_values_type pce_matrix_values =
215 Kokkos::make_view<pce_matrix_values_type>(Kokkos::ViewAllocateWithoutInitializing("pce matrix"), kokkos_cijk, graph_length, 1);
216 scalar_matrix_type scalar_matrix("scalar matrix", fem_length,
217 scalar_matrix_values, matrix_graph);
218 pce_matrix_type pce_matrix("pce matrix", fem_length,
219 pce_matrix_values, matrix_graph);
220
221 Kokkos::deep_copy( scalar_matrix_values , value_type(1.0) );
222 Kokkos::deep_copy( pce_matrix_values , value_type(1.0) );
223
224 //------------------------------
225 // Scalar multiply
226
227 {
228 // warm up
229 for (ordinal_type iter = 0; iter < iterCount; ++iter) {
230 for (ordinal_type col=0; col<pce_size; ++col) {
231 // scalar_vector_type xc =
232 // Kokkos::subview(x, Kokkos::ALL(), col);
233 // scalar_vector_type yc =
234 // Kokkos::subview(y, Kokkos::ALL(), col);
235 // Kokkos::MV_Multiply( yc, scalar_matrix, xc );
236 KokkosSparse::spmv( "N" , value_type(1.0) , scalar_matrix, x_col[col] , value_type(0.0) ,y_col[col]);
237 }
238 }
239
240 execution_space().fence();
241 Kokkos::Timer clock ;
242 for (ordinal_type iter = 0; iter < iterCount; ++iter) {
243 for (ordinal_type col=0; col<pce_size; ++col) {
244 // scalar_vector_type xc =
245 // Kokkos::subview(x, Kokkos::ALL(), col);
246 // scalar_vector_type yc =
247 // Kokkos::subview(y, Kokkos::ALL(), col);
248 // Kokkos::MV_Multiply( yc, scalar_matrix, xc );
249 KokkosSparse::spmv( "N" , value_type(1.0) , scalar_matrix, x_col[col] , value_type(0.0) ,y_col[col]);
250 }
251 }
252 execution_space().fence();
253
254 const double seconds_per_iter = clock.seconds() / ((double) iterCount );
255 const double flops = 1.0e-9 * 2.0 * graph_length * pce_size;
256
257 scalar_perf.resize(5);
258 scalar_perf[0] = fem_length;
259 scalar_perf[1] = pce_size;
260 scalar_perf[2] = graph_length;
261 scalar_perf[3] = seconds_per_iter;
262 scalar_perf[4] = flops / seconds_per_iter;
263 }
264
265 //------------------------------
266 // Block-left multiply
267
268 {
269 // warm up
270 for (ordinal_type iter = 0; iter < iterCount; ++iter) {
271 KokkosSparse::spmv( "N" , value_type(1.0) , scalar_matrix, xl , value_type(0.0) ,yl);
272 }
273
274 execution_space().fence();
275 Kokkos::Timer clock ;
276 for (ordinal_type iter = 0; iter < iterCount; ++iter) {
277 KokkosSparse::spmv( "N" , value_type(1.0) , scalar_matrix, xl , value_type(0.0) ,yl);
278 }
279 execution_space().fence();
280
281 const double seconds_per_iter = clock.seconds() / ((double) iterCount );
282 const double flops = 1.0e-9 * 2.0 * graph_length * pce_size;
283
284 block_left_perf.resize(5);
285 block_left_perf[0] = fem_length;
286 block_left_perf[1] = pce_size;
287 block_left_perf[2] = graph_length;
288 block_left_perf[3] = seconds_per_iter;
289 block_left_perf[4] = flops / seconds_per_iter;
290 }
291
292 //------------------------------
293 // Block-right multiply
294
295 {
296 // warm up
297 for (ordinal_type iter = 0; iter < iterCount; ++iter) {
298 KokkosSparse::spmv( "N" , value_type(1.0) , scalar_matrix, xr , value_type(0.0) ,yr);
299 }
300
301 execution_space().fence();
302 Kokkos::Timer clock ;
303 for (ordinal_type iter = 0; iter < iterCount; ++iter) {
304 KokkosSparse::spmv( "N" , value_type(1.0) , scalar_matrix, xr , value_type(0.0) ,yr);
305 }
306 execution_space().fence();
307
308 const double seconds_per_iter = clock.seconds() / ((double) iterCount );
309 const double flops = 1.0e-9 * 2.0 * graph_length * pce_size;
310
311 block_right_perf.resize(5);
312 block_right_perf[0] = fem_length;
313 block_right_perf[1] = pce_size;
314 block_right_perf[2] = graph_length;
315 block_right_perf[3] = seconds_per_iter;
316 block_right_perf[4] = flops / seconds_per_iter;
317 }
318
319 //------------------------------
320 // PCE multiply
321
322 {
323 // warm up
324 for (ordinal_type iter = 0; iter < iterCount; ++iter) {
325 KokkosSparse::spmv( "N" , value_type(1.0) , pce_matrix, x_pce , value_type(0.0) ,y_pce);
326 }
327
328 execution_space().fence();
329 Kokkos::Timer clock ;
330 for (ordinal_type iter = 0; iter < iterCount; ++iter) {
331 KokkosSparse::spmv( "N" , value_type(1.0) , pce_matrix, x_pce , value_type(0.0) ,y_pce);
332 }
333 execution_space().fence();
334
335 const double seconds_per_iter = clock.seconds() / ((double) iterCount );
336 const double flops = 1.0e-9 * 2.0 * graph_length * pce_size;
337
338 pce_perf.resize(5);
339 pce_perf[0] = fem_length;
340 pce_perf[1] = pce_size;
341 pce_perf[2] = graph_length;
342 pce_perf[3] = seconds_per_iter;
343 pce_perf[4] = flops / seconds_per_iter;
344 }
345
346 //------------------------------
347 // PCE multi-vector multiply
348
349 {
350 // warm up
351 for (ordinal_type iter = 0; iter < iterCount; ++iter) {
352 KokkosSparse::spmv( "N" , value_type(1.0) , pce_matrix, x_multi_pce , value_type(0.0) ,y_multi_pce);
353 }
354
355 execution_space().fence();
356 Kokkos::Timer clock ;
357 for (ordinal_type iter = 0; iter < iterCount; ++iter) {
358 KokkosSparse::spmv( "N" , value_type(1.0) , pce_matrix, x_multi_pce , value_type(0.0) ,y_multi_pce);
359 }
360 execution_space().fence();
361
362 const double seconds_per_iter = clock.seconds() / ((double) iterCount );
363 const double flops = 1.0e-9 * 2.0 * graph_length * pce_size * num_pce_col;
364
365 block_pce_perf.resize(5);
366 block_pce_perf[0] = fem_length;
367 block_pce_perf[1] = pce_size;
368 block_pce_perf[2] = graph_length;
369 block_pce_perf[3] = seconds_per_iter;
370 block_pce_perf[4] = flops / seconds_per_iter;
371 }
372
373}
374
375template <typename Scalar, typename Ordinal, typename Device>
377 const Ordinal nIter,
378 const Ordinal order,
379 const Ordinal min_var,
380 const Ordinal max_var )
381{
382 std::cout.precision(8);
383 std::cout << std::endl
384 << "\"Grid Size\" , "
385 << "\"FEM Size\" , "
386 << "\"FEM Graph Size\" , "
387 << "\"Dimension\" , "
388 << "\"Order\" , "
389 << "\"PCE Size\" , "
390 << "\"Scalar SpMM Time\" , "
391 << "\"Scalar SpMM Speedup\" , "
392 << "\"Scalar SpMM GFLOPS\" , "
393 << "\"Block-Left SpMM Speedup\" , "
394 << "\"Block-Left SpMM GFLOPS\" , "
395 << "\"Block-Right SpMM Speedup\" , "
396 << "\"Block-Right SpMM GFLOPS\" , "
397 << "\"PCE SpMM Speedup\" , "
398 << "\"PCE SpMM GFLOPS\" , "
399 << "\"Block PCE SpMM Speedup\" , "
400 << "\"Block PCE SpMM GFLOPS\" , "
401 << std::endl;
402
403 std::vector<double> perf_scalar, perf_block_left, perf_block_right,
404 perf_pce, perf_block_pce;
405 for (Ordinal dim=min_var; dim<=max_var; ++dim) {
406
407 test_mean_multiply<Scalar,Ordinal,Device>(
408 order, dim, nGrid, nIter, perf_scalar, perf_block_left, perf_block_right,
409 perf_pce, perf_block_pce );
410
411 std::cout << nGrid << " , "
412 << perf_scalar[0] << " , "
413 << perf_scalar[2] << " , "
414 << dim << " , "
415 << order << " , "
416 << perf_scalar[1] << " , "
417 << perf_scalar[3] << " , "
418 << perf_scalar[4] / perf_scalar[4] << " , "
419 << perf_scalar[4] << " , "
420 << perf_block_left[4]/ perf_scalar[4] << " , "
421 << perf_block_left[4] << " , "
422 << perf_block_right[4]/ perf_scalar[4] << " , "
423 << perf_block_right[4] << " , "
424 << perf_pce[4]/ perf_scalar[4] << " , "
425 << perf_pce[4] << " , "
426 << perf_block_pce[4]/ perf_scalar[4] << " , "
427 << perf_block_pce[4] << " , "
428 << std::endl;
429
430 }
431}
432
433#define INST_PERF_DRIVER(SCALAR, ORDINAL, DEVICE) \
434 template void performance_test_driver< SCALAR, ORDINAL, DEVICE >( \
435 const ORDINAL nGrid, const ORDINAL nIter, const ORDINAL order, \
436 const ORDINAL min_var, const ORDINAL max_var);
Stokhos::StandardStorage< int, double > storage_type
Kokkos::DefaultExecutionSpace execution_space
size_t generate_fem_graph(size_t N, std::vector< std::vector< size_t > > &graph)
IntType map_fem_graph_coord(const IntType &N, const IntType &i, const IntType &j, const IntType &k)
void performance_test_driver(const Ordinal nGrid, const Ordinal nIter, const Ordinal order, const Ordinal min_var, const Ordinal max_var)
void test_mean_multiply(const OrdinalType order, const OrdinalType dim, const OrdinalType nGrid, const OrdinalType iterCount, std::vector< double > &scalar_perf, std::vector< double > &block_left_perf, std::vector< double > &block_right_perf, std::vector< double > &pce_perf, std::vector< double > &block_pce_perf)
Legendre polynomial basis.
A comparison functor implementing a strict weak ordering based lexographic ordering.
Abstract base class for 1-D orthogonal polynomials.
Data structure storing a sparse 3-tensor C(i,j,k) in a a compressed format.
Multivariate orthogonal polynomial basis generated from a total order tensor product of univariate po...
Sacado::ETPCE::OrthogPoly< double, Stokhos::StandardStorage< int, double > > pce_type
Stokhos::LegendreBasis< int, double > basis_type
std::enable_if< Kokkos::is_view_uq_pce< Kokkos::View< InputType, InputP... > >::value &&Kokkos::is_view_uq_pce< Kokkos::View< OutputType, OutputP... > >::value >::type spmv(const char mode[], const AlphaType &a, const MatrixType &A, const Kokkos::View< InputType, InputP... > &x, const BetaType &b, const Kokkos::View< OutputType, OutputP... > &y, const RANK_ONE)
void setGlobalCijkTensor(const cijk_type &cijk)
void deep_copy(const Stokhos::CrsMatrix< ValueType, DstDevice, Layout > &dst, const Stokhos::CrsMatrix< ValueType, SrcDevice, Layout > &src)