74 Kokkos::View<unsigned*, typename LWGraph_kokkos::device_type>& aggStat,
75 LO& numNonAggregatedNodes)
const {
77 int minNodesPerAggregate = params.get<
int> (
"aggregation: min agg size");
78 int maxNodesPerAggregate = params.get<
int> (
"aggregation: max agg size");
80 TEUCHOS_TEST_FOR_EXCEPTION(maxNodesPerAggregate < minNodesPerAggregate,
82 "MueLu::UncoupledAggregationAlgorithm::BuildAggregates: minNodesPerAggregate must be smaller or equal to MaxNodePerAggregate!");
89 if(params.get<
bool>(
"aggregation: deterministic"))
91 Monitor m(*
this,
"BuildAggregatesDeterministic");
92 BuildAggregatesDeterministic(maxNodesPerAggregate, graph,
93 aggregates, aggStat, numNonAggregatedNodes);
95 Monitor m(*
this,
"BuildAggregatesRandom");
96 BuildAggregatesRandom(maxNodesPerAggregate, graph,
97 aggregates, aggStat, numNonAggregatedNodes);
107 Kokkos::View<unsigned*, typename LWGraph_kokkos::device_type>& aggStat,
108 LO& numNonAggregatedNodes)
const
110 const LO numRows = graph.GetNodeNumVertices();
111 const int myRank = graph.GetComm()->getRank();
114 auto vertex2AggId = aggregates.GetVertex2AggId()->getDeviceLocalView(Xpetra::Access::ReadWrite);
115 auto procWinner = aggregates.GetProcWinner() ->getDeviceLocalView(Xpetra::Access::ReadWrite);
116 auto colors = aggregates.GetGraphColors();
118 auto lclLWGraph = graph.getLocalLWGraph();
120 LO numAggregatedNodes = 0;
121 LO numLocalAggregates = aggregates.GetNumAggregates();
122 Kokkos::View<LO, device_type> aggCount(
"aggCount");
123 Kokkos::deep_copy(aggCount, numLocalAggregates);
124 Kokkos::parallel_for(
"Aggregation Phase 1: initial reduction over color == 1",
125 Kokkos::RangePolicy<LO, execution_space>(0, numRows),
126 KOKKOS_LAMBDA (
const LO nodeIdx) {
127 if(colors(nodeIdx) == 1 && aggStat(nodeIdx) ==
READY) {
128 const LO aggIdx = Kokkos::atomic_fetch_add (&aggCount(), 1);
129 vertex2AggId(nodeIdx, 0) = aggIdx;
131 procWinner(nodeIdx, 0) = myRank;
137 numAggregatedNodes -= numLocalAggregates;
138 Kokkos::deep_copy(numLocalAggregates, aggCount);
139 numAggregatedNodes += numLocalAggregates;
145 Kokkos::View<LO*, device_type> aggSizesView(
"aggSizes", numLocalAggregates);
149 auto aggSizesScatterView = Kokkos::Experimental::create_scatter_view(aggSizesView);
150 Kokkos::parallel_for(
"Aggregation Phase 1: compute initial aggregates size",
151 Kokkos::RangePolicy<LO, execution_space>(0, numRows),
152 KOKKOS_LAMBDA (
const LO nodeIdx) {
153 auto aggSizesScatterViewAccess = aggSizesScatterView.access();
154 if(vertex2AggId(nodeIdx, 0) >= 0)
155 aggSizesScatterViewAccess(vertex2AggId(nodeIdx, 0)) += 1;
157 Kokkos::Experimental::contribute(aggSizesView, aggSizesScatterView);
160 LO tmpNumAggregatedNodes = 0;
161 Kokkos::parallel_reduce(
"Aggregation Phase 1: main parallel_reduce over aggSizes",
162 Kokkos::RangePolicy<size_t, execution_space>(0, numRows),
163 KOKKOS_LAMBDA (
const size_t nodeIdx, LO & lNumAggregatedNodes) {
164 if(colors(nodeIdx) != 1
165 && (aggStat(nodeIdx) ==
READY || aggStat(nodeIdx) ==
NOTSEL)) {
168 auto neighbors = lclLWGraph.getNeighborVertices(nodeIdx);
169 for(LO j = 0; j < neighbors.length; ++j) {
170 auto nei = neighbors.colidx(j);
171 if(lclLWGraph.isLocalNeighborVertex(nei) && colors(nei) == 1
176 LO agg = vertex2AggId(nei, 0);
177 const LO aggSize = Kokkos::atomic_fetch_add (&aggSizesView(agg),
179 if(aggSize < maxAggSize) {
181 vertex2AggId(nodeIdx, 0) = agg;
182 procWinner(nodeIdx, 0) = myRank;
184 ++lNumAggregatedNodes;
188 Kokkos::atomic_decrement(&aggSizesView(agg));
195 if(aggStat(nodeIdx) ==
NOTSEL) { aggStat(nodeIdx) =
READY; }
197 }, tmpNumAggregatedNodes);
198 numAggregatedNodes += tmpNumAggregatedNodes;
199 numNonAggregatedNodes -= numAggregatedNodes;
202 aggregates.SetNumAggregates(numLocalAggregates);
210 Kokkos::View<unsigned*, typename LWGraph_kokkos::device_type>& aggStat,
211 LO& numNonAggregatedNodes)
const
213 const LO numRows = graph.GetNodeNumVertices();
214 const int myRank = graph.GetComm()->getRank();
216 auto vertex2AggId = aggregates.GetVertex2AggId()->getDeviceLocalView(Xpetra::Access::ReadWrite);
217 auto procWinner = aggregates.GetProcWinner() ->getDeviceLocalView(Xpetra::Access::ReadWrite);
218 auto colors = aggregates.GetGraphColors();
220 auto lclLWGraph = graph.getLocalLWGraph();
222 LO numLocalAggregates = aggregates.GetNumAggregates();
223 Kokkos::View<LO, device_type> numLocalAggregatesView(
"Num aggregates");
225 auto h_nla = Kokkos::create_mirror_view(numLocalAggregatesView);
226 h_nla() = numLocalAggregates;
227 Kokkos::deep_copy(numLocalAggregatesView, h_nla);
230 Kokkos::View<LO*, device_type> newRoots(
"New root LIDs", numNonAggregatedNodes);
231 Kokkos::View<LO, device_type> numNewRoots(
"Number of new aggregates of current color");
232 auto h_numNewRoots = Kokkos::create_mirror_view(numNewRoots);
235 Kokkos::parallel_for(
"Aggregation Phase 1: building list of new roots",
236 Kokkos::RangePolicy<execution_space>(0, numRows),
237 KOKKOS_LAMBDA(
const LO i)
239 if(colors(i) == 1 && aggStat(i) ==
READY)
242 newRoots(Kokkos::atomic_fetch_add(&numNewRoots(), 1)) = i;
245 Kokkos::deep_copy(h_numNewRoots, numNewRoots);
247 Kokkos::sort(newRoots, 0, h_numNewRoots());
248 LO numAggregated = 0;
249 Kokkos::parallel_reduce(
"Aggregation Phase 1: aggregating nodes",
250 Kokkos::RangePolicy<execution_space>(0, h_numNewRoots()),
251 KOKKOS_LAMBDA(
const LO rootIndex, LO& lnumAggregated)
253 LO root = newRoots(rootIndex);
254 LO aggID = numLocalAggregatesView() + rootIndex;
256 vertex2AggId(root, 0) = aggID;
257 procWinner(root, 0) = myRank;
259 auto neighOfRoot = lclLWGraph.getNeighborVertices(root);
260 for(LO n = 0; n < neighOfRoot.length; n++)
262 LO neigh = neighOfRoot(n);
263 if (lclLWGraph.isLocalNeighborVertex(neigh) && aggStat(neigh) ==
READY)
266 vertex2AggId(neigh, 0) = aggID;
267 procWinner(neigh, 0) = myRank;
270 if(aggSize == maxAggSize)
277 lnumAggregated += aggSize;
279 numNonAggregatedNodes -= numAggregated;
281 aggregates.SetNumAggregates(numLocalAggregates + h_numNewRoots());