102 const Teuchos::RCP<
const Teuchos::Comm<int> >& comm ,
103 const int use_print ,
104 const int use_trials ,
105 const int use_nodes[] ,
107 Kokkos::View< Scalar* , Kokkos::LayoutLeft, Device >& nodal_residual)
111 using Teuchos::rcpFromRef;
112 using Teuchos::arrayView;
113 using Teuchos::ParameterList;
119 typedef typename LocalMatrixType::StaticCrsGraphType LocalGraphType ;
127 ElementComputationType ;
132 typedef typename ElementComputationType::vector_type VectorType ;
135 typename FixtureType::comm_list_type ,
136 typename FixtureType::send_nodeid_type ,
137 VectorType > ImportType ;
141 const int print_flag = use_print && std::is_same< Kokkos::HostSpace , typename Device::memory_space >::value ;
143 const int comm_rank = comm->getRank();
144 const int comm_size = comm->getSize();
148 const double bubble_x = 1.0 ;
149 const double bubble_y = 1.0 ;
150 const double bubble_z = 1.0 ;
153 comm_size , comm_rank ,
154 use_nodes[0] , use_nodes[1] , use_nodes[2] ,
155 bubble_x , bubble_y , bubble_z );
159 const ImportType comm_nodal_import(
161 fixture.recv_node() ,
162 fixture.send_node() ,
163 fixture.send_nodeid() ,
164 fixture.node_count_owned() ,
165 fixture.node_count() - fixture.node_count_owned() );
172 CoeffFunctionType diffusion_coefficient( 1.0, 0.1, 1.0, 5 );
178 std::cout <<
"ElemNode {" << std::endl ;
179 for (
unsigned ielem = 0 ; ielem < fixture.elem_count() ; ++ielem ) {
180 std::cout <<
" elem[" << ielem <<
"]{" ;
181 for (
unsigned inode = 0 ; inode < FixtureType::ElemNode ; ++inode ) {
182 std::cout <<
" " << fixture.elem_node(ielem,inode);
184 std::cout <<
" }" << std::endl ;
186 std::cout <<
"}" << std::endl ;
191 Kokkos::Timer wall_clock ;
195 for (
int itrial = 0 ; itrial < use_trials ; ++itrial ) {
199 perf.global_elem_count = fixture.elem_count_global();
200 perf.global_node_count = fixture.node_count_global();
207 typename NodeNodeGraphType::Times graph_times;
208 const NodeNodeGraphType
209 mesh_to_graph( fixture.elem_node() , fixture.node_count_owned(),
213 LocalMatrixType jacobian( mesh_to_graph.graph );
218 VectorType nodal_solution(
"nodal_solution" , fixture.node_count() );
219 nodal_residual = VectorType(
"nodal_residual" , fixture.node_count_owned() );
227 const ElementComputationType elemcomp( fixture , diffusion_coefficient ,
229 mesh_to_graph.elem_graph ,
230 jacobian , nodal_residual ,
247 comm_nodal_import( nodal_solution );
250 perf.import_time =
maximum( comm , wall_clock.seconds() );
268 perf.fill_time =
maximum( comm , wall_clock.seconds() );
272 perf_stats.increment(perf);
281 const EnsembleViewType& ensemble_residual)
283 const double tol = 1e-14;
285 Teuchos::RCP<Teuchos::FancyOStream> out =
286 Teuchos::VerboseObjectBase::getDefaultOStream();
287 std::stringstream buf;
288 Teuchos::FancyOStream fbuf(Teuchos::rcp(&buf,
false));
290 typename ScalarViewType::HostMirror host_scalar_residual =
292 typename EnsembleViewType::HostMirror host_ensemble_residual =
297 TEUCHOS_TEST_EQUALITY( host_scalar_residual.extent(0),
298 host_ensemble_residual.extent(0), fbuf, success );
300 const size_t num_node = host_scalar_residual.extent(0);
302 for (
size_t i=0; i<num_node; ++i) {
303 for (
size_t j=0;
j<num_ensemble; ++
j) {
304 TEUCHOS_TEST_FLOATING_EQUALITY(
306 tol, fbuf, success );
407 const int use_print ,
408 const int use_trials ,
409 const int use_nodes[] ,
413 if (comm->getRank() == 0) {
414 std::cout.precision(8);
415 std::cout << std::endl
416 <<
"\"Grid Size\" , "
418 <<
"\"Ensemble Size\" , "
419 <<
"\"Scalar Import Time\" , "
420 <<
"\"Ensemble Import Time\" , "
421 <<
"\"Ensemble Import Speedup\" , "
422 <<
"\"Scalar Fill Time\" , "
423 <<
"\"Ensemble Fill Time\" , "
424 <<
"\"Ensemble Fill Speedup\" , "
429 typedef Sacado::mpl::range_c< int, entry_min, entry_max+1, entry_step > Range;
430 PerformanceDriverOp<Storage,Method> op(comm, use_print, use_trials,
431 use_nodes,
check, dev_config);
432 Sacado::mpl::for_each_no_kokkos<Range>
f(op);
Perf fenl_assembly(const Teuchos::RCP< const Teuchos::Comm< int > > &comm, const int use_print, const int use_trials, const int use_nodes[], Kokkos::Example::FENL::DeviceConfig dev_config, Kokkos::View< Scalar *, Kokkos::LayoutLeft, Device > &nodal_residual)
void performance_test_driver(const Teuchos::RCP< const Teuchos::Comm< int > > &comm, const int use_print, const int use_trials, const int use_nodes[], const bool check, Kokkos::Example::FENL::DeviceConfig dev_config)
int check(Epetra_CrsGraph &A, int NumMyRows1, int NumGlobalRows1, int NumMyNonzeros1, int NumGlobalNonzeros1, int *MyGlobalElements, bool verbose)