Tpetra parallel linear algebra Version of the Day
Loading...
Searching...
No Matches
Tpetra_CrsGraphTransposer_def.hpp
1// @HEADER
2// *****************************************************************************
3// Tpetra: Templated Linear Algebra Services Package
4//
5// Copyright 2008 NTESS and the Tpetra contributors.
6// SPDX-License-Identifier: BSD-3-Clause
7// *****************************************************************************
8// @HEADER
9
10#ifndef TPETRA_CRSGRAPHTRANSPOSER_DEF_HPP
11#define TPETRA_CRSGRAPHTRANSPOSER_DEF_HPP
12
13#include "Tpetra_CrsGraph.hpp"
14#include "Tpetra_Export.hpp"
16#include "Tpetra_Details_makeColMap.hpp"
18#include "Teuchos_ParameterList.hpp"
19#include "Teuchos_TimeMonitor.hpp"
20#include "KokkosSparse_Utils.hpp"
21#include "KokkosKernels_Handle.hpp"
22#include "KokkosSparse_spadd.hpp"
23
24namespace Tpetra {
25
26template <typename GO,
27 typename LocalIndicesType,
28 typename GlobalIndicesType,
29 typename ColMapType>
30struct ConvertLocalToGlobalFunctor {
31 ConvertLocalToGlobalFunctor(
32 const LocalIndicesType& colindsOrig_,
33 const GlobalIndicesType& colindsConverted_,
34 const ColMapType& colmap_)
35 : colindsOrig(colindsOrig_)
36 , colindsConverted(colindsConverted_)
37 , colmap(colmap_) {}
38 KOKKOS_INLINE_FUNCTION void
39 operator()(const GO i) const {
40 colindsConverted(i) = colmap.getGlobalElement(colindsOrig(i));
41 }
42 LocalIndicesType colindsOrig;
43 GlobalIndicesType colindsConverted;
44 ColMapType colmap;
45};
46
47template <class LO, class GO, class LOView, class GOView, class LocalMap>
48struct ConvertGlobalToLocalFunctor {
49 ConvertGlobalToLocalFunctor(LOView& lids_, const GOView& gids_, const LocalMap localColMap_)
50 : lids(lids_)
51 , gids(gids_)
52 , localColMap(localColMap_) {}
53
54 KOKKOS_FUNCTION void operator()(const GO i) const {
55 lids(i) = localColMap.getLocalElement(gids(i));
56 }
57
58 LOView lids;
59 const GOView gids;
60 const LocalMap localColMap;
61};
62
63template <typename size_type, typename ordinal_type,
64 typename ArowptrsT, typename BrowptrsT, typename CrowptrsT,
65 typename AcolindsT, typename BcolindsT, typename CcolindsT>
66struct SortedNumericIndicesOnlyFunctor {
67 SortedNumericIndicesOnlyFunctor(const ArowptrsT& Arowptrs_,
68 const BrowptrsT& Browptrs_,
69 const CrowptrsT& Crowptrs_,
70 const AcolindsT& Acolinds_,
71 const BcolindsT& Bcolinds_,
72 const CcolindsT& Ccolinds_)
73 : Arowptrs(Arowptrs_)
74 , Browptrs(Browptrs_)
75 , Crowptrs(Crowptrs_)
76 , Acolinds(Acolinds_)
77 , Bcolinds(Bcolinds_)
78 , Ccolinds(Ccolinds_) {}
79
80 KOKKOS_INLINE_FUNCTION void operator()(const ordinal_type i) const {
81 const ordinal_type ORDINAL_MAX = KokkosKernels::ArithTraits<ordinal_type>::max();
82
83 // count the union of nonzeros in Arow and Brow
84 size_type ai = 0;
85 size_type bi = 0;
86 size_type Arowstart = Arowptrs(i);
87 size_type Arowlen = Arowptrs(i + 1) - Arowstart;
88 size_type Browstart = Browptrs(i);
89 size_type Browlen = Browptrs(i + 1) - Browstart;
90 ordinal_type Acol = (Arowlen == 0) ? ORDINAL_MAX : Acolinds(Arowstart);
91 ordinal_type Bcol = (Browlen == 0) ? ORDINAL_MAX : Bcolinds(Browstart);
92 size_type Coffset = Crowptrs(i);
93 while (Acol != ORDINAL_MAX || Bcol != ORDINAL_MAX) {
94 ordinal_type Ccol = (Acol < Bcol) ? Acol : Bcol;
95 while (Acol == Ccol) {
96 ai++;
97 if (ai == Arowlen)
98 Acol = ORDINAL_MAX;
99 else
100 Acol = Acolinds(Arowstart + ai);
101 }
102 while (Bcol == Ccol) {
103 bi++;
104 if (bi == Browlen)
105 Bcol = ORDINAL_MAX;
106 else
107 Bcol = Bcolinds(Browstart + bi);
108 }
109 Ccolinds(Coffset) = Ccol;
110 Coffset++;
111 }
112 }
113
114 const ArowptrsT Arowptrs;
115 const BrowptrsT Browptrs;
116 const CrowptrsT Crowptrs;
117 const AcolindsT Acolinds;
118 const BcolindsT Bcolinds;
119 CcolindsT Ccolinds;
120};
121
122template <typename size_type, typename ordinal_type,
123 typename ArowptrsT, typename BrowptrsT, typename CrowptrsT,
124 typename AcolindsT, typename BcolindsT, typename CcolindsT>
125struct UnsortedNumericIndicesOnlyFunctor {
126 UnsortedNumericIndicesOnlyFunctor(
127 const ArowptrsT Arowptrs_, const BrowptrsT Browptrs_, const CrowptrsT Crowptrs_,
128 const AcolindsT Acolinds_, const BcolindsT Bcolinds_, CcolindsT Ccolinds_,
129 const CcolindsT Apos_, const CcolindsT Bpos_)
130 : Arowptrs(Arowptrs_)
131 , Browptrs(Browptrs_)
132 , Crowptrs(Crowptrs_)
133 , Acolinds(Acolinds_)
134 , Bcolinds(Bcolinds_)
135 , Ccolinds(Ccolinds_)
136 , Apos(Apos_)
137 , Bpos(Bpos_) {}
138
139 KOKKOS_INLINE_FUNCTION void operator()(const ordinal_type i) const {
140 size_type CrowStart = Crowptrs(i);
141 size_type ArowStart = Arowptrs(i);
142 size_type ArowEnd = Arowptrs(i + 1);
143 size_type BrowStart = Browptrs(i);
144 size_type BrowEnd = Browptrs(i + 1);
145 // add in A entries, while setting C colinds
146 for (size_type j = ArowStart; j < ArowEnd; j++) {
147 Ccolinds(CrowStart + Apos(j)) = Acolinds(j);
148 }
149 // add in B entries, while setting C colinds
150 for (size_type j = BrowStart; j < BrowEnd; j++) {
151 Ccolinds(CrowStart + Bpos(j)) = Bcolinds(j);
152 }
153 }
154 const ArowptrsT Arowptrs;
155 const BrowptrsT Browptrs;
156 const CrowptrsT Crowptrs;
157 const AcolindsT Acolinds;
158 const BcolindsT Bcolinds;
159 CcolindsT Ccolinds;
160 const CcolindsT Apos;
161 const CcolindsT Bpos;
162};
163
164template <class LocalOrdinal,
165 class GlobalOrdinal,
166 class Node>
168 CrsGraphTransposer(const Teuchos::RCP<const crs_graph_type>& origGraph)
169 : origGraph_(origGraph) {}
170
171template <class LocalOrdinal,
172 class GlobalOrdinal,
173 class Node>
174Teuchos::RCP<CrsGraph<LocalOrdinal, GlobalOrdinal, Node> >
176 symmetrize(const Teuchos::RCP<Teuchos::ParameterList>& params) {
177 using Teuchos::RCP;
178 using device_type = typename Node::device_type;
179 using execution_space = typename device_type::execution_space;
180 using range_type = Kokkos::RangePolicy<execution_space, size_t>;
181 using local_graph_device_type = typename crs_graph_type::local_graph_device_type;
182 using impl_scalar_type = ::Tpetra::Details::DefaultTypes::scalar_type;
183 using row_ptrs_array = typename local_graph_device_type::row_map_type::non_const_type;
184 using col_inds_array = typename local_graph_device_type::entries_type::non_const_type;
185 using local_map_type = typename map_type::local_map_type;
186 using global_col_inds_array = typename Kokkos::View<GlobalOrdinal*, device_type>;
187
188 auto graph = origGraph_;
189 auto domain_map = graph->getDomainMap();
190 auto range_map = graph->getRangeMap();
191 auto row_map = graph->getRowMap();
192 auto col_map = graph->getColMap();
195
197 TEUCHOS_ASSERT(domain_map->isSameAs(*row_map));
198
199 // Do the transpose
200 RCP<crs_graph_type> graphT = createTranspose(params);
201
202 auto col_map_T = graphT->getColMap();
203 TEUCHOS_ASSERT(!col_map_T.is_null());
204 TEUCHOS_ASSERT(domain_map->isSameAs(*graphT->getDomainMap()));
205
206 bool graphSorted = graph->isSorted();
207 bool graphTSorted = graphT->isSorted();
209 bool matchingColMaps = col_map->isSameAs(*col_map_T);
210
211 auto lclGraph = graph->getLocalGraphDevice();
212 auto lclGraphT = graphT->getLocalGraphDevice();
213
214 using KKH_LO = KokkosKernels::Experimental::KokkosKernelsHandle<size_t, LocalOrdinal, impl_scalar_type,
215 typename Node::execution_space, typename Node::memory_space, typename Node::memory_space>;
216 using KKH_GO = KokkosKernels::Experimental::KokkosKernelsHandle<size_t, GlobalOrdinal, impl_scalar_type,
217 typename Node::execution_space, typename Node::memory_space, typename Node::memory_space>;
218
219 auto rowptrs = lclGraph.row_map;
220 auto rowptrsT = lclGraphT.row_map;
221 auto colinds = lclGraph.entries;
222 auto colindsT = lclGraphT.entries;
223
224 auto nrows = rowptrs.extent(0) - 1;
225 auto rowptrsSym = row_ptrs_array(Kokkos::ViewAllocateWithoutInitializing("row ptrs sym"), nrows + 1);
226
227 col_inds_array colindsSym;
228
229 if (!matchingColMaps) {
230 // convert indices of local graph to GlobalOrdinal
231 auto lclColmap = col_map->getLocalMap();
232 global_col_inds_array colindsConverted(Kokkos::ViewAllocateWithoutInitializing("colinds (converted)"), colinds.extent(0));
234 Kokkos::parallel_for("colInds (converted)", range_type(0, colinds.extent(0)), convert);
235
236 // convert indices of local graphT to GlobalOrdinal
237 auto lclColmapT = col_map_T->getLocalMap();
238 global_col_inds_array colindsTConverted(Kokkos::ViewAllocateWithoutInitializing("colindsT (converted)"), colindsT.extent(0));
240 Kokkos::parallel_for("colIndsT (converted)", range_type(0, colindsT.extent(0)), convertT);
241
242 // sum graph and graphT in GlobalOrdinal
243 KKH_GO handle;
244 handle.create_spadd_handle(false);
245 auto addHandle = handle.get_spadd_handle();
246
247 global_col_inds_array globalColindsSym;
248
249 KokkosSparse::spadd_symbolic(&handle,
250 nrows, graph->getGlobalNumCols(),
251 rowptrs, colindsConverted, rowptrsT, colindsTConverted, rowptrsSym);
252 globalColindsSym = global_col_inds_array(Kokkos::ViewAllocateWithoutInitializing("global colinds sym"), addHandle->get_c_nnz());
253
254 UnsortedNumericIndicesOnlyFunctor<
256 typename row_ptrs_array::const_type, typename row_ptrs_array::const_type, row_ptrs_array,
257 typename global_col_inds_array::const_type, typename global_col_inds_array::const_type, global_col_inds_array>
259 colindsConverted, colindsTConverted, globalColindsSym,
260 addHandle->get_a_pos(), addHandle->get_b_pos());
261 Kokkos::parallel_for("KokkosSparse::SpAdd:Numeric::InputNotSorted",
262 range_type(0, nrows), unsortedNumeric);
263
264 // build column map for graphSym
265 Tpetra::Details::makeColMap<LocalOrdinal, GlobalOrdinal, Node>(col_map_sym, domain_map, globalColindsSym);
266
267 // convert indices of local graphSym to LocalOrdinal
268 auto lclColmapSym = col_map_sym->getLocalMap();
269 colindsSym = col_inds_array("colindsSym", globalColindsSym.extent(0));
271 Kokkos::parallel_for(range_type(0, globalColindsSym.extent(0)), convertSym);
272
273 } else {
274 // sum graph and graphT in LocalOrdinal
275 KKH_LO handle;
276 handle.create_spadd_handle(sorted);
277 auto addHandle = handle.get_spadd_handle();
278
279 KokkosSparse::spadd_symbolic(&handle,
280 nrows, graph->getGlobalNumCols(),
282 colindsSym = col_inds_array(Kokkos::ViewAllocateWithoutInitializing("C colinds"), addHandle->get_c_nnz());
283
284 if (sorted) {
285 SortedNumericIndicesOnlyFunctor<
287 typename row_ptrs_array::const_type, typename row_ptrs_array::const_type, row_ptrs_array,
288 typename col_inds_array::const_type, typename col_inds_array::const_type, col_inds_array>
291 Kokkos::parallel_for("KokkosSparse::SpAdd:Numeric::InputSorted",
292 range_type(0, nrows), sortedNumeric);
293
294 } else {
295 UnsortedNumericIndicesOnlyFunctor<
297 typename row_ptrs_array::const_type, typename row_ptrs_array::const_type, row_ptrs_array,
298 typename col_inds_array::const_type, typename col_inds_array::const_type, col_inds_array>
301 addHandle->get_a_pos(), addHandle->get_b_pos());
302 Kokkos::parallel_for("KokkosSparse::SpAdd:Numeric::InputNotSorted",
303 range_type(0, nrows), unsortedNumeric);
304 }
305
306 // column map for graphSym is graph's column map
308 importer = graph->getImporter();
309 }
310
311 bool sort = true;
312 if (sort)
313 KokkosSparse::sort_crs_graph<execution_space, row_ptrs_array, col_inds_array>(rowptrsSym, colindsSym);
314
315 local_graph_device_type lclGraphSym = local_graph_device_type(colindsSym, rowptrsSym);
316
318 if (!sort) {
319 graphParams = rcp(new Teuchos::ParameterList);
320 graphParams->set("sorted", false);
321 }
322
323 return rcp(new crs_graph_type(lclGraphSym,
324 row_map,
327 range_map,
328 importer,
329 Teuchos::null,
330 graphParams));
331}
332
333template <class LocalOrdinal,
334 class GlobalOrdinal,
335 class Node>
336Teuchos::RCP<CrsGraph<LocalOrdinal, GlobalOrdinal, Node> >
338 createTranspose(const Teuchos::RCP<Teuchos::ParameterList>& params) {
339 using Teuchos::RCP;
340 // Do the local transpose
342
343 Tpetra::Details::ProfilingRegion MM("Tpetra: Transpose TAFC");
344
345 // If transGraphWithSharedRows has an exporter, that's what we
346 // want. If it doesn't, the rows aren't actually shared, and we're
347 // done!
350 transGraphWithSharedRows->getExporter();
351 if (exporter.is_null()) {
353 } else {
354 Teuchos::ParameterList labelList;
355 if (!params.is_null()) {
356 const char paramName[] = "compute global constants";
357 labelList.set(paramName, params->get(paramName, true));
358 }
359 // Use the Export object to do a fused Export and fillComplete.
360 // This always sorts the local graph after communication, so
361 // no need to set "sorted = false" in parameters.
363 Teuchos::null, Teuchos::rcpFromRef(labelList));
364 }
365}
366
367template <class LocalOrdinal,
368 class GlobalOrdinal,
369 class Node>
370Teuchos::RCP<CrsGraph<LocalOrdinal, GlobalOrdinal, Node> >
372 createTransposeLocal(const Teuchos::RCP<Teuchos::ParameterList>& params) {
373 using Teuchos::RCP;
374 using Teuchos::rcp;
375 using Teuchos::rcp_dynamic_cast;
376 using LO = LocalOrdinal;
377 using GO = GlobalOrdinal;
378 using import_type = Tpetra::Import<LO, GO, Node>;
379 using export_type = Tpetra::Export<LO, GO, Node>;
380
381 Tpetra::Details::ProfilingRegion MM("Tpetra: Transpose Local");
382
383 const bool sort = [&]() {
384 constexpr bool sortDefault = true; // see #4607 discussion
385 const char sortParamName[] = "sort";
386 return params.get() == nullptr ? sortDefault : params->get(sortParamName, sortDefault);
387 }();
388
389 using local_graph_device_type = typename crs_graph_type::local_graph_device_type;
390 local_graph_device_type lclGraph = origGraph_->getLocalGraphDevice();
391
392 // Allocate views and call the other version of transpose_graph
393 using c_rowmap_t = typename local_graph_device_type::row_map_type;
394 using c_entries_t = typename local_graph_device_type::entries_type;
395 using rowmap_t = typename local_graph_device_type::row_map_type::non_const_type;
396 using entries_t = typename local_graph_device_type::entries_type::non_const_type;
397 LocalOrdinal numCols = origGraph_->getColMap()->getLocalNumElements();
398 rowmap_t lclGraphT_rowmap("Transpose rowmap", numCols + 1);
400 Kokkos::ViewAllocateWithoutInitializing("Transpose entries"), lclGraph.entries.extent(0));
401 KokkosSparse::Impl::transpose_graph<
404 rowmap_t, typename local_graph_device_type::execution_space>(
405 lclGraph.numRows(), numCols,
406 lclGraph.row_map, lclGraph.entries,
408
409 if (sort)
410 KokkosSparse::sort_crs_graph<
411 typename local_graph_device_type::execution_space,
415
416 // And construct the transpose local_graph_device_type
417 local_graph_device_type lclGraphT = local_graph_device_type(lclGraphT_entries, lclGraphT_rowmap);
418
419 // Prebuild the importers and exporters the no-communication way,
420 // flipping the importers and exporters around.
421 const auto origExport = origGraph_->getExporter();
422 RCP<const import_type> myImport = origExport.is_null() ? Teuchos::null : rcp(new import_type(*origExport));
423 const auto origImport = origGraph_->getImporter();
424 RCP<const export_type> myExport = origImport.is_null() ? Teuchos::null : rcp(new export_type(*origImport));
425
427 if (!sort) {
428 graphParams = rcp(new Teuchos::ParameterList);
429 graphParams->set("sorted", false);
430 }
431
432 return rcp(new crs_graph_type(lclGraphT,
433 origGraph_->getColMap(),
434 origGraph_->getRowMap(),
435 origGraph_->getRangeMap(),
436 origGraph_->getDomainMap(),
438}
439
440//
441// Explicit instantiation macro
442//
443// Must be expanded from within the Tpetra namespace!
444//
445
446#define TPETRA_CRSGRAPHTRANSPOSER_INSTANT(LO, GO, NODE) \
447 template class CrsGraphTransposer<LO, GO, NODE>;
448
449} // namespace Tpetra
450
451#endif
Declare and define the functions Tpetra::Details::computeOffsetsFromCounts and Tpetra::computeOffsets...
Declaration and definition of functions for sorting "short" arrays of keys and corresponding values.
A distributed graph accessed by rows (adjacency lists) and stored sparsely.
KokkosSparse::StaticCrsGraph< local_ordinal_type, Kokkos::LayoutLeft, device_type, void, size_t > local_graph_device_type
The type of the part of the sparse graph on each MPI process.
Teuchos::RCP< crs_graph_type > createTransposeLocal(const Teuchos::RCP< Teuchos::ParameterList > &params=Teuchos::null)
Compute and return the transpose of the graph given to the constructor.
Teuchos::RCP< crs_graph_type > createTranspose(const Teuchos::RCP< Teuchos::ParameterList > &params=Teuchos::null)
Compute and return the transpose of the graph given to the constructor.
Teuchos::RCP< crs_graph_type > symmetrize(const Teuchos::RCP< Teuchos::ParameterList > &params=Teuchos::null)
Compute and return graph+graph^T of the graph given to the constructor.
CrsGraphTransposer(const Teuchos::RCP< const crs_graph_type > &origGraph)
Constructor that takes the graph to transpose.
Struct that holds views of the contents of a CrsMatrix.
Namespace Tpetra contains the class and methods constituting the Tpetra library.
void sort(View &view, const size_t &size)
Convenience wrapper for std::sort for host-accessible views.