Sacado Package Browser (Single Doxygen Collection) Version of the Day
Loading...
Searching...
No Matches
advection/advection_hierarchical.cpp
Go to the documentation of this file.
1// @HEADER
2// ***********************************************************************
3//
4// Sacado Package
5// Copyright (2006) Sandia Corporation
6//
7// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
8// the U.S. Government retains certain rights in this software.
9//
10// This library is free software; you can redistribute it and/or modify
11// it under the terms of the GNU Lesser General Public License as
12// published by the Free Software Foundation; either version 2.1 of the
13// License, or (at your option) any later version.
14//
15// This library is distributed in the hope that it will be useful, but
16// WITHOUT ANY WARRANTY; without even the implied warranty of
17// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18// Lesser General Public License for more details.
19//
20// You should have received a copy of the GNU Lesser General Public
21// License along with this library; if not, write to the Free Software
22// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
23// USA
24// Questions? Contact David M. Gay (dmgay@sandia.gov) or Eric T. Phipps
25// (etphipp@sandia.gov).
26//
27// ***********************************************************************
28// @HEADER
29
30#define SACADO_VIEW_CUDA_HIERARCHICAL 1
31#define SACADO_ALIGN_SFAD 1
32
33#include "Sacado.hpp"
35#include "common.hpp"
36
37#include "Kokkos_Timer.hpp"
38
39template<typename FluxView, typename WgbView, typename SrcView,
40 typename WbsView, typename ResidualView>
41void run_fad_hierarchical_flat(const FluxView& flux, const WgbView& wgb,
42 const SrcView& src, const WbsView& wbs,
43 const ResidualView& residual)
44{
45 typedef typename ResidualView::execution_space execution_space;
46 typedef typename Kokkos::ThreadLocalScalarType<ResidualView>::type local_scalar_type;
47 typedef Kokkos::TeamPolicy<execution_space> policy_type;
48 typedef typename policy_type::member_type team_member;
49
50 const size_t num_cells = wgb.extent(0);
51 const int num_basis = wgb.extent(1);
52 const int num_points = wgb.extent(2);
53 const int num_dim = wgb.extent(3);
54
55 const bool is_cuda = is_cuda_space<execution_space>::value;
56 const int vector_size = is_cuda ? 32 : 1;
57 const int team_size = is_cuda ? 256/vector_size : 1;
58 const size_t range = (num_cells+team_size-1)/team_size;
59
60 policy_type policy(range,team_size,vector_size);
61 Kokkos::parallel_for(policy, KOKKOS_LAMBDA (const team_member& team)
62 {
63 const size_t cell = team.league_rank()*team_size + team.team_rank();
64 local_scalar_type value, value2;
65 for (int basis=0; basis<num_basis; ++basis) {
66 value = 0.0;
67 value2 = 0.0;
68 for (int qp=0; qp<num_points; ++qp) {
69 for (int dim=0; dim<num_dim; ++dim)
70 value += flux(cell,qp,dim)*wgb(cell,basis,qp,dim);
71 value2 += src(cell,qp)*wbs(cell,basis,qp);
72 }
73 residual(cell,basis) = value+value2;
74 }
75 });
76}
77
78template<typename FluxView, typename WgbView, typename SrcView,
79 typename WbsView, typename ResidualView>
80void run_fad_hierarchical_team(const FluxView& flux, const WgbView& wgb,
81 const SrcView& src, const WbsView& wbs,
82 const ResidualView& residual)
83{
84 typedef typename ResidualView::execution_space execution_space;
85 typedef typename Kokkos::ThreadLocalScalarType<ResidualView>::type local_scalar_type;
86 typedef Kokkos::TeamPolicy<execution_space> policy_type;
87 typedef typename policy_type::member_type team_member;
88
89 const size_t num_cells = wgb.extent(0);
90 const int num_basis = wgb.extent(1);
91 const int num_points = wgb.extent(2);
92 const int num_dim = wgb.extent(3);
93
94 const bool is_cuda = is_cuda_space<execution_space>::value;
95 const int vector_size = is_cuda ? 32 : 1;
96 const int team_size = is_cuda ? 256/vector_size : 1;
97
98 policy_type policy(num_cells,team_size,vector_size);
99 Kokkos::parallel_for(policy, KOKKOS_LAMBDA (const team_member& team)
100 {
101 const int team_rank = team.team_rank();
102 const size_t cell = team.league_rank();
103 local_scalar_type value, value2;
104 for (int basis=team_rank; basis<num_basis; basis+=team_size) {
105 value = 0.0;
106 value2 = 0.0;
107 for (int qp=0; qp<num_points; ++qp) {
108 for (int dim=0; dim<num_dim; ++dim)
109 value += flux(cell,qp,dim)*wgb(cell,basis,qp,dim);
110 value2 += src(cell,qp)*wbs(cell,basis,qp);
111 }
112 residual(cell,basis) = value+value2;
113 }
114 });
115}
116
117template <typename FadType, int N, typename ExecSpace>
118double time_fad_hierarchical_flat(int ncells, int num_basis, int num_points,
119 int ndim, int ntrial, bool check)
120{
121 static const int FadStride = is_cuda_space<ExecSpace>::value ? 32 : 1;
122#if defined(SACADO_ALIGN_SFAD)
123 static const int Nalign = ((N+FadStride-1)/FadStride)*FadStride;
124 typedef typename FadType::template apply_N<Nalign>::type AlignedFadType;
125#else
126 typedef FadType AlignedFadType;
127#endif
128
129 typedef typename ExecSpace::array_layout DefaultLayout;
131 typedef Kokkos::View<AlignedFadType****,ContLayout,ExecSpace> t_4DView;
132 typedef Kokkos::View<AlignedFadType***,ContLayout,ExecSpace> t_3DView;
133 typedef Kokkos::View<AlignedFadType**,ContLayout,ExecSpace> t_2DView;
134
135 t_4DView wgb("",ncells,num_basis,num_points,ndim,N+1);
136 t_3DView flux("",ncells,num_points,ndim,N+1);
137 t_3DView wbs("",ncells,num_basis,num_points,N+1);
138 t_2DView src("",ncells,num_points,N+1);
139 t_2DView residual("",ncells,num_basis,N+1);
140 init_fad(wgb, wbs, flux, src, residual);
141
142 // Run once to warm up, complete any UVM transfers
143 run_fad_hierarchical_flat(flux, wgb, src, wbs, residual);
144
145 // Time execution
146 Kokkos::fence();
147 Kokkos::Timer timer;
148 for (int i=0; i<ntrial; ++i)
149 run_fad_hierarchical_flat(flux, wgb, src, wbs, residual);
150 Kokkos::fence();
151 double time = timer.seconds() / ntrial / ncells;
152
153 // Check result
154 if (check)
155 check_residual(flux, wgb, src, wbs, residual);
156
157 return time;
158}
159
160template <typename FadType, int N, typename ExecSpace>
161double time_fad_hierarchical_team(int ncells, int num_basis, int num_points,
162 int ndim, int ntrial, bool check)
163{
164 static const int FadStride = is_cuda_space<ExecSpace>::value ? 32 : 1;
165#if defined(SACADO_ALIGN_SFAD)
166 static const int Nalign = ((N+FadStride-1)/FadStride)*FadStride;
167 typedef typename FadType::template apply_N<Nalign>::type AlignedFadType;
168#else
169 typedef FadType AlignedFadType;
170#endif
171
172 typedef typename ExecSpace::array_layout DefaultLayout;
174 typedef Kokkos::View<AlignedFadType****,ContLayout,ExecSpace> t_4DView;
175 typedef Kokkos::View<AlignedFadType***,ContLayout,ExecSpace> t_3DView;
176 typedef Kokkos::View<AlignedFadType**,ContLayout,ExecSpace> t_2DView;
177
178 t_4DView wgb("",ncells,num_basis,num_points,ndim,N+1);
179 t_3DView flux("",ncells,num_points,ndim,N+1);
180 t_3DView wbs("",ncells,num_basis,num_points,N+1);
181 t_2DView src("",ncells,num_points,N+1);
182 t_2DView residual("",ncells,num_basis,N+1);
183 init_fad(wgb, wbs, flux, src, residual);
184
185 // Run once to warm up, complete any UVM transfers
186 run_fad_hierarchical_team(flux, wgb, src, wbs, residual);
187
188 // Time execution
189 Kokkos::fence();
190 Kokkos::Timer timer;
191 for (int i=0; i<ntrial; ++i)
192 run_fad_hierarchical_team(flux, wgb, src, wbs, residual);
193 Kokkos::fence();
194 double time = timer.seconds() / ntrial / ncells;
195
196 // Check result
197 if (check)
198 check_residual(flux, wgb, src, wbs, residual);
199
200 return time;
201}
202
203#define INST_FUNC_FAD_N_DEV(FAD,N,DEV) \
204 template double time_fad_hierarchical_flat< FAD, N, DEV >(int ncells, int num_basis, int num_points, int ndim, int ntrial, bool check); \
205 template double time_fad_hierarchical_team< FAD, N, DEV >(int ncells, int num_basis, int num_points, int ndim, int ntrial, bool check);
206
207#define INST_FUNC_DEV(DEV) \
208 INST_FUNC_FAD_N_DEV( SFadType, fad_dim, DEV ) \
209 INST_FUNC_FAD_N_DEV( SLFadType, fad_dim, DEV )
210
211#ifdef KOKKOS_ENABLE_SERIAL
212INST_FUNC_DEV(Kokkos::Serial)
213#endif
214
215#ifdef KOKKOS_ENABLE_OPENMP
216INST_FUNC_DEV(Kokkos::OpenMP)
217#endif
218
219#ifdef KOKKOS_ENABLE_THREADS
220INST_FUNC_DEV(Kokkos::Threads)
221#endif
222
223#ifdef KOKKOS_ENABLE_CUDA
224INST_FUNC_DEV(Kokkos::Cuda)
225#endif
const int N
void run_fad_hierarchical_flat(const FluxView &flux, const WgbView &wgb, const SrcView &src, const WbsView &wbs, const ResidualView &residual)
double time_fad_hierarchical_flat(int ncells, int num_basis, int num_points, int ndim, int ntrial, bool check)
#define INST_FUNC_DEV(DEV)
void run_fad_hierarchical_team(const FluxView &flux, const WgbView &wgb, const SrcView &src, const WbsView &wbs, const ResidualView &residual)
double time_fad_hierarchical_team(int ncells, int num_basis, int num_points, int ndim, int ntrial, bool check)
void init_fad(const V1 &v1, const V2 &v2, const V3 &v3, const V4 &v4, const V5 &v5)
void check_residual(const FluxView &flux, const WgbView &wgb, const SrcView &src, const WbsView &wbs, const ResidualView &residual)
int check(Epetra_CrsGraph &A, int NumMyRows1, int NumGlobalRows1, int NumMyNonzeros1, int NumGlobalNonzeros1, int *MyGlobalElements, bool verbose)