97 const OrdinalType nGrid,
98 const OrdinalType iterCount,
99 std::vector<double>& scalar_perf,
100 std::vector<double>& block_left_perf,
101 std::vector<double>& block_right_perf)
103 typedef ScalarType value_type;
104 typedef OrdinalType ordinal_type;
106 typedef Kokkos::View< value_type*, execution_space > vector_type;
107 typedef Kokkos::View< value_type**, Kokkos::LayoutLeft, execution_space > left_multivec_type;
109 typedef KokkosSparse::CrsMatrix< value_type, ordinal_type, execution_space > matrix_type;
110 typedef typename matrix_type::StaticCrsGraphType matrix_graph_type;
111 typedef typename matrix_type::values_type matrix_values_type;
116 std::vector< std::vector<size_t> > fem_graph;
117 const size_t fem_length = nGrid * nGrid * nGrid;
123 std::vector<vector_type> x(ensemble_length);
124 std::vector<vector_type> y(ensemble_length);
125 for (ordinal_type e=0; e<ensemble_length; ++e) {
126 x[e] = vector_type(Kokkos::ViewAllocateWithoutInitializing(
"x"), fem_length);
127 y[e] = vector_type(Kokkos::ViewAllocateWithoutInitializing(
"y"), fem_length);
132 left_multivec_type xl(Kokkos::ViewAllocateWithoutInitializing(
"xl"), fem_length, ensemble_length);
133 left_multivec_type yl(Kokkos::ViewAllocateWithoutInitializing(
"yl"), fem_length, ensemble_length);
144 matrix_graph_type matrix_graph =
145 Kokkos::create_staticcrsgraph<matrix_graph_type>(
146 std::string(
"test crs graph"), fem_graph);
147 matrix_values_type matrix_values =
148 matrix_values_type(Kokkos::ViewAllocateWithoutInitializing(
"matrix"), graph_length);
149 matrix_type matrix(
"matrix", fem_length, matrix_values, matrix_graph);
157 for (ordinal_type iter = 0; iter < iterCount; ++iter) {
158 for (ordinal_type e=0; e<ensemble_length; ++e) {
164 Kokkos::Timer clock ;
165 for (ordinal_type iter = 0; iter < iterCount; ++iter) {
166 for (ordinal_type e=0; e<ensemble_length; ++e) {
172 const double seconds_per_iter = clock.seconds() / ((
double) iterCount );
173 const double flops = 1.0e-9 * 2.0 * graph_length * ensemble_length;
175 scalar_perf.resize(5);
176 scalar_perf[0] = fem_length;
177 scalar_perf[1] = ensemble_length;
178 scalar_perf[2] = graph_length;
179 scalar_perf[3] = seconds_per_iter;
180 scalar_perf[4] = flops / seconds_per_iter;
188 for (ordinal_type iter = 0; iter < iterCount; ++iter) {
193 Kokkos::Timer clock ;
194 for (ordinal_type iter = 0; iter < iterCount; ++iter) {
199 const double seconds_per_iter = clock.seconds() / ((
double) iterCount );
200 const double flops = 1.0e-9 * 2.0 * graph_length * ensemble_length;
202 block_left_perf.resize(5);
203 block_left_perf[0] = fem_length;
204 block_left_perf[1] = ensemble_length;
205 block_left_perf[2] = graph_length;
206 block_left_perf[3] = seconds_per_iter;
207 block_left_perf[4] = flops / seconds_per_iter;
216 for (ordinal_type iter = 0; iter < iterCount; ++iter) {
221 Kokkos::Timer clock ;
222 for (ordinal_type iter = 0; iter < iterCount; ++iter) {
227 const double seconds_per_iter = clock.seconds() / ((
double) iterCount );
228 const double flops = 1.0e-9 * 2.0 * graph_length * ensemble_length;
230 block_right_perf.resize(5);
231 block_right_perf[0] = fem_length;
232 block_right_perf[1] = ensemble_length;
233 block_right_perf[2] = graph_length;
234 block_right_perf[3] = seconds_per_iter;
235 block_right_perf[4] = flops / seconds_per_iter;