Tpetra parallel linear algebra Version of the Day
Loading...
Searching...
No Matches
Tpetra_CrsGraphTransposer_def.hpp
1// @HEADER
2// *****************************************************************************
3// Tpetra: Templated Linear Algebra Services Package
4//
5// Copyright 2008 NTESS and the Tpetra contributors.
6// SPDX-License-Identifier: BSD-3-Clause
7// *****************************************************************************
8// @HEADER
9
10#ifndef TPETRA_CRSGRAPHTRANSPOSER_DEF_HPP
11#define TPETRA_CRSGRAPHTRANSPOSER_DEF_HPP
12
13#include "Tpetra_CrsGraph.hpp"
14#include "Tpetra_Export.hpp"
16#include "Tpetra_Details_makeColMap.hpp"
18#include "Teuchos_ParameterList.hpp"
19#include "Teuchos_TimeMonitor.hpp"
20#include "KokkosSparse_Utils.hpp"
21#include "KokkosKernels_Handle.hpp"
22#include "KokkosSparse_spadd.hpp"
23
24namespace Tpetra {
25
26template <typename GO,
27 typename LocalIndicesType,
28 typename GlobalIndicesType,
29 typename ColMapType>
30struct ConvertLocalToGlobalFunctor {
31 ConvertLocalToGlobalFunctor(
32 const LocalIndicesType& colindsOrig_,
33 const GlobalIndicesType& colindsConverted_,
34 const ColMapType& colmap_)
35 : colindsOrig(colindsOrig_)
36 , colindsConverted(colindsConverted_)
37 , colmap(colmap_) {}
38 KOKKOS_INLINE_FUNCTION void
39 operator()(const GO i) const {
40 colindsConverted(i) = colmap.getGlobalElement(colindsOrig(i));
41 }
42 LocalIndicesType colindsOrig;
43 GlobalIndicesType colindsConverted;
44 ColMapType colmap;
45};
46
47template <class LO, class GO, class LOView, class GOView, class LocalMap>
48struct ConvertGlobalToLocalFunctor {
49 ConvertGlobalToLocalFunctor(LOView& lids_, const GOView& gids_, const LocalMap localColMap_)
50 : lids(lids_)
51 , gids(gids_)
52 , localColMap(localColMap_) {}
53
54 KOKKOS_FUNCTION void operator()(const GO i) const {
55 lids(i) = localColMap.getLocalElement(gids(i));
56 }
57
58 LOView lids;
59 const GOView gids;
60 const LocalMap localColMap;
61};
62
63template <typename size_type, typename ordinal_type,
64 typename ArowptrsT, typename BrowptrsT, typename CrowptrsT,
65 typename AcolindsT, typename BcolindsT, typename CcolindsT>
66struct SortedNumericIndicesOnlyFunctor {
67 SortedNumericIndicesOnlyFunctor(const ArowptrsT& Arowptrs_,
68 const BrowptrsT& Browptrs_,
69 const CrowptrsT& Crowptrs_,
70 const AcolindsT& Acolinds_,
71 const BcolindsT& Bcolinds_,
72 const CcolindsT& Ccolinds_)
73 : Arowptrs(Arowptrs_)
74 , Browptrs(Browptrs_)
75 , Crowptrs(Crowptrs_)
76 , Acolinds(Acolinds_)
77 , Bcolinds(Bcolinds_)
78 , Ccolinds(Ccolinds_) {}
79
80 KOKKOS_INLINE_FUNCTION void operator()(const ordinal_type i) const {
81#if KOKKOS_VERSION >= 40799
82 const ordinal_type ORDINAL_MAX = KokkosKernels::ArithTraits<ordinal_type>::max();
83#else
84 const ordinal_type ORDINAL_MAX = Kokkos::ArithTraits<ordinal_type>::max();
85#endif
86
87 // count the union of nonzeros in Arow and Brow
88 size_type ai = 0;
89 size_type bi = 0;
90 size_type Arowstart = Arowptrs(i);
91 size_type Arowlen = Arowptrs(i + 1) - Arowstart;
92 size_type Browstart = Browptrs(i);
93 size_type Browlen = Browptrs(i + 1) - Browstart;
94 ordinal_type Acol = (Arowlen == 0) ? ORDINAL_MAX : Acolinds(Arowstart);
95 ordinal_type Bcol = (Browlen == 0) ? ORDINAL_MAX : Bcolinds(Browstart);
96 size_type Coffset = Crowptrs(i);
97 while (Acol != ORDINAL_MAX || Bcol != ORDINAL_MAX) {
98 ordinal_type Ccol = (Acol < Bcol) ? Acol : Bcol;
99 while (Acol == Ccol) {
100 ai++;
101 if (ai == Arowlen)
102 Acol = ORDINAL_MAX;
103 else
104 Acol = Acolinds(Arowstart + ai);
105 }
106 while (Bcol == Ccol) {
107 bi++;
108 if (bi == Browlen)
109 Bcol = ORDINAL_MAX;
110 else
111 Bcol = Bcolinds(Browstart + bi);
112 }
113 Ccolinds(Coffset) = Ccol;
114 Coffset++;
115 }
116 }
117
118 const ArowptrsT Arowptrs;
119 const BrowptrsT Browptrs;
120 const CrowptrsT Crowptrs;
121 const AcolindsT Acolinds;
122 const BcolindsT Bcolinds;
123 CcolindsT Ccolinds;
124};
125
126template <typename size_type, typename ordinal_type,
127 typename ArowptrsT, typename BrowptrsT, typename CrowptrsT,
128 typename AcolindsT, typename BcolindsT, typename CcolindsT>
129struct UnsortedNumericIndicesOnlyFunctor {
130 UnsortedNumericIndicesOnlyFunctor(
131 const ArowptrsT Arowptrs_, const BrowptrsT Browptrs_, const CrowptrsT Crowptrs_,
132 const AcolindsT Acolinds_, const BcolindsT Bcolinds_, CcolindsT Ccolinds_,
133 const CcolindsT Apos_, const CcolindsT Bpos_)
134 : Arowptrs(Arowptrs_)
135 , Browptrs(Browptrs_)
136 , Crowptrs(Crowptrs_)
137 , Acolinds(Acolinds_)
138 , Bcolinds(Bcolinds_)
139 , Ccolinds(Ccolinds_)
140 , Apos(Apos_)
141 , Bpos(Bpos_) {}
142
143 KOKKOS_INLINE_FUNCTION void operator()(const ordinal_type i) const {
144 size_type CrowStart = Crowptrs(i);
145 size_type ArowStart = Arowptrs(i);
146 size_type ArowEnd = Arowptrs(i + 1);
147 size_type BrowStart = Browptrs(i);
148 size_type BrowEnd = Browptrs(i + 1);
149 // add in A entries, while setting C colinds
150 for (size_type j = ArowStart; j < ArowEnd; j++) {
151 Ccolinds(CrowStart + Apos(j)) = Acolinds(j);
152 }
153 // add in B entries, while setting C colinds
154 for (size_type j = BrowStart; j < BrowEnd; j++) {
155 Ccolinds(CrowStart + Bpos(j)) = Bcolinds(j);
156 }
157 }
158 const ArowptrsT Arowptrs;
159 const BrowptrsT Browptrs;
160 const CrowptrsT Crowptrs;
161 const AcolindsT Acolinds;
162 const BcolindsT Bcolinds;
163 CcolindsT Ccolinds;
164 const CcolindsT Apos;
165 const CcolindsT Bpos;
166};
167
168template <class LocalOrdinal,
169 class GlobalOrdinal,
170 class Node>
172 CrsGraphTransposer(const Teuchos::RCP<const crs_graph_type>& origGraph,
173 const std::string& label)
174 : origGraph_(origGraph)
175 , label_(label) {}
176
177template <class LocalOrdinal,
178 class GlobalOrdinal,
179 class Node>
180Teuchos::RCP<CrsGraph<LocalOrdinal, GlobalOrdinal, Node> >
182 symmetrize(const Teuchos::RCP<Teuchos::ParameterList>& params) {
183 using Teuchos::RCP;
184 using device_type = typename Node::device_type;
185 using execution_space = typename device_type::execution_space;
186 using range_type = Kokkos::RangePolicy<execution_space, size_t>;
187 using local_graph_device_type = typename crs_graph_type::local_graph_device_type;
188 using impl_scalar_type = ::Tpetra::Details::DefaultTypes::scalar_type;
189 using row_ptrs_array = typename local_graph_device_type::row_map_type::non_const_type;
190 using col_inds_array = typename local_graph_device_type::entries_type::non_const_type;
191 using local_map_type = typename map_type::local_map_type;
192 using global_col_inds_array = typename Kokkos::View<GlobalOrdinal*, device_type>;
193
194 auto graph = origGraph_;
195 auto domain_map = graph->getDomainMap();
196 auto range_map = graph->getRangeMap();
197 auto row_map = graph->getRowMap();
198 auto col_map = graph->getColMap();
201
203 TEUCHOS_ASSERT(domain_map->isSameAs(*row_map));
204
205 // Do the transpose
206 RCP<crs_graph_type> graphT = createTranspose(params);
207
208 auto col_map_T = graphT->getColMap();
209 TEUCHOS_ASSERT(!col_map_T.is_null());
210 TEUCHOS_ASSERT(domain_map->isSameAs(*graphT->getDomainMap()));
211
212 bool graphSorted = graph->isSorted();
213 bool graphTSorted = graphT->isSorted();
215 bool matchingColMaps = col_map->isSameAs(*col_map_T);
216
217 auto lclGraph = graph->getLocalGraphDevice();
218 auto lclGraphT = graphT->getLocalGraphDevice();
219
220 using KKH_LO = KokkosKernels::Experimental::KokkosKernelsHandle<size_t, LocalOrdinal, impl_scalar_type,
221 typename Node::execution_space, typename Node::memory_space, typename Node::memory_space>;
222 using KKH_GO = KokkosKernels::Experimental::KokkosKernelsHandle<size_t, GlobalOrdinal, impl_scalar_type,
223 typename Node::execution_space, typename Node::memory_space, typename Node::memory_space>;
224
225 auto rowptrs = lclGraph.row_map;
226 auto rowptrsT = lclGraphT.row_map;
227 auto colinds = lclGraph.entries;
228 auto colindsT = lclGraphT.entries;
229
230 auto nrows = rowptrs.extent(0) - 1;
231 auto rowptrsSym = row_ptrs_array(Kokkos::ViewAllocateWithoutInitializing("row ptrs sym"), nrows + 1);
232
233 col_inds_array colindsSym;
234
235 if (!matchingColMaps) {
236 // convert indices of local graph to GlobalOrdinal
237 auto lclColmap = col_map->getLocalMap();
238 global_col_inds_array colindsConverted(Kokkos::ViewAllocateWithoutInitializing("colinds (converted)"), colinds.extent(0));
240 Kokkos::parallel_for("colInds (converted)", range_type(0, colinds.extent(0)), convert);
241
242 // convert indices of local graphT to GlobalOrdinal
243 auto lclColmapT = col_map_T->getLocalMap();
244 global_col_inds_array colindsTConverted(Kokkos::ViewAllocateWithoutInitializing("colindsT (converted)"), colindsT.extent(0));
246 Kokkos::parallel_for("colIndsT (converted)", range_type(0, colindsT.extent(0)), convertT);
247
248 // sum graph and graphT in GlobalOrdinal
249 KKH_GO handle;
250 handle.create_spadd_handle(false);
251 auto addHandle = handle.get_spadd_handle();
252
253 global_col_inds_array globalColindsSym;
254
255 KokkosSparse::spadd_symbolic(&handle,
256 nrows, graph->getGlobalNumCols(),
257 rowptrs, colindsConverted, rowptrsT, colindsTConverted, rowptrsSym);
258 globalColindsSym = global_col_inds_array(Kokkos::ViewAllocateWithoutInitializing("global colinds sym"), addHandle->get_c_nnz());
259
260 UnsortedNumericIndicesOnlyFunctor<
262 typename row_ptrs_array::const_type, typename row_ptrs_array::const_type, row_ptrs_array,
263 typename global_col_inds_array::const_type, typename global_col_inds_array::const_type, global_col_inds_array>
265 colindsConverted, colindsTConverted, globalColindsSym,
266 addHandle->get_a_pos(), addHandle->get_b_pos());
267 Kokkos::parallel_for("KokkosSparse::SpAdd:Numeric::InputNotSorted",
268 range_type(0, nrows), unsortedNumeric);
269
270 // build column map for graphSym
271 Tpetra::Details::makeColMap<LocalOrdinal, GlobalOrdinal, Node>(col_map_sym, domain_map, globalColindsSym);
272
273 // convert indices of local graphSym to LocalOrdinal
274 auto lclColmapSym = col_map_sym->getLocalMap();
275 colindsSym = col_inds_array("colindsSym", globalColindsSym.extent(0));
277 Kokkos::parallel_for(range_type(0, globalColindsSym.extent(0)), convertSym);
278
279 } else {
280 // sum graph and graphT in LocalOrdinal
281 KKH_LO handle;
282 handle.create_spadd_handle(sorted);
283 auto addHandle = handle.get_spadd_handle();
284
285 KokkosSparse::spadd_symbolic(&handle,
286 nrows, graph->getGlobalNumCols(),
288 colindsSym = col_inds_array(Kokkos::ViewAllocateWithoutInitializing("C colinds"), addHandle->get_c_nnz());
289
290 if (sorted) {
291 SortedNumericIndicesOnlyFunctor<
293 typename row_ptrs_array::const_type, typename row_ptrs_array::const_type, row_ptrs_array,
294 typename col_inds_array::const_type, typename col_inds_array::const_type, col_inds_array>
297 Kokkos::parallel_for("KokkosSparse::SpAdd:Numeric::InputSorted",
298 range_type(0, nrows), sortedNumeric);
299
300 } else {
301 UnsortedNumericIndicesOnlyFunctor<
303 typename row_ptrs_array::const_type, typename row_ptrs_array::const_type, row_ptrs_array,
304 typename col_inds_array::const_type, typename col_inds_array::const_type, col_inds_array>
307 addHandle->get_a_pos(), addHandle->get_b_pos());
308 Kokkos::parallel_for("KokkosSparse::SpAdd:Numeric::InputNotSorted",
309 range_type(0, nrows), unsortedNumeric);
310 }
311
312 // column map for graphSym is graph's column map
314 importer = graph->getImporter();
315 }
316
317 bool sort = true;
318 if (sort)
319 KokkosSparse::sort_crs_graph<execution_space, row_ptrs_array, col_inds_array>(rowptrsSym, colindsSym);
320
321 local_graph_device_type lclGraphSym = local_graph_device_type(colindsSym, rowptrsSym);
322
324 if (!sort) {
325 graphParams = rcp(new Teuchos::ParameterList);
326 graphParams->set("sorted", false);
327 }
328
329 return rcp(new crs_graph_type(lclGraphSym,
330 row_map,
333 range_map,
334 importer,
335 Teuchos::null,
336 graphParams));
337}
338
339template <class LocalOrdinal,
340 class GlobalOrdinal,
341 class Node>
342Teuchos::RCP<CrsGraph<LocalOrdinal, GlobalOrdinal, Node> >
344 createTranspose(const Teuchos::RCP<Teuchos::ParameterList>& params) {
345 using Teuchos::RCP;
346 // Do the local transpose
348
349#ifdef HAVE_TPETRA_MMM_TIMINGS
350 const std::string prefix = std::string("Tpetra ") + label_ + ": ";
351 using Teuchos::TimeMonitor;
352 TimeMonitor MM(*TimeMonitor::getNewTimer(prefix + "Transpose TAFC"));
353#endif
354
355 // If transGraphWithSharedRows has an exporter, that's what we
356 // want. If it doesn't, the rows aren't actually shared, and we're
357 // done!
360 transGraphWithSharedRows->getExporter();
361 if (exporter.is_null()) {
363 } else {
364 Teuchos::ParameterList labelList;
365#ifdef HAVE_TPETRA_MMM_TIMINGS
366 labelList.set("Timer Label", label_);
367#endif
368 if (!params.is_null()) {
369 const char paramName[] = "compute global constants";
370 labelList.set(paramName, params->get(paramName, true));
371 }
372 // Use the Export object to do a fused Export and fillComplete.
373 // This always sorts the local graph after communication, so
374 // no need to set "sorted = false" in parameters.
376 Teuchos::null, Teuchos::rcpFromRef(labelList));
377 }
378}
379
380template <class LocalOrdinal,
381 class GlobalOrdinal,
382 class Node>
383Teuchos::RCP<CrsGraph<LocalOrdinal, GlobalOrdinal, Node> >
385 createTransposeLocal(const Teuchos::RCP<Teuchos::ParameterList>& params) {
386 using Teuchos::RCP;
387 using Teuchos::rcp;
388 using Teuchos::rcp_dynamic_cast;
389 using LO = LocalOrdinal;
390 using GO = GlobalOrdinal;
391 using import_type = Tpetra::Import<LO, GO, Node>;
392 using export_type = Tpetra::Export<LO, GO, Node>;
393
394#ifdef HAVE_TPETRA_MMM_TIMINGS
395 std::string prefix = std::string("Tpetra ") + label_ + ": ";
396 using Teuchos::TimeMonitor;
397 TimeMonitor MM(*TimeMonitor::getNewTimer(prefix + "Transpose Local"));
398#endif
399
400 const bool sort = [&]() {
401 constexpr bool sortDefault = true; // see #4607 discussion
402 const char sortParamName[] = "sort";
403 return params.get() == nullptr ? sortDefault : params->get(sortParamName, sortDefault);
404 }();
405
406 using local_graph_device_type = typename crs_graph_type::local_graph_device_type;
407 local_graph_device_type lclGraph = origGraph_->getLocalGraphDevice();
408
409 // Allocate views and call the other version of transpose_graph
410 using c_rowmap_t = typename local_graph_device_type::row_map_type;
411 using c_entries_t = typename local_graph_device_type::entries_type;
412 using rowmap_t = typename local_graph_device_type::row_map_type::non_const_type;
413 using entries_t = typename local_graph_device_type::entries_type::non_const_type;
414 LocalOrdinal numCols = origGraph_->getColMap()->getLocalNumElements();
415 rowmap_t lclGraphT_rowmap("Transpose rowmap", numCols + 1);
417 Kokkos::ViewAllocateWithoutInitializing("Transpose entries"), lclGraph.entries.extent(0));
418 KokkosSparse::Impl::transpose_graph<
421 rowmap_t, typename local_graph_device_type::execution_space>(
422 lclGraph.numRows(), numCols,
423 lclGraph.row_map, lclGraph.entries,
425
426 if (sort)
427 KokkosSparse::sort_crs_graph<
428 typename local_graph_device_type::execution_space,
432
433 // And construct the transpose local_graph_device_type
434 local_graph_device_type lclGraphT = local_graph_device_type(lclGraphT_entries, lclGraphT_rowmap);
435
436 // Prebuild the importers and exporters the no-communication way,
437 // flipping the importers and exporters around.
438 const auto origExport = origGraph_->getExporter();
439 RCP<const import_type> myImport = origExport.is_null() ? Teuchos::null : rcp(new import_type(*origExport));
440 const auto origImport = origGraph_->getImporter();
441 RCP<const export_type> myExport = origImport.is_null() ? Teuchos::null : rcp(new export_type(*origImport));
442
444 if (!sort) {
445 graphParams = rcp(new Teuchos::ParameterList);
446 graphParams->set("sorted", false);
447 }
448
449 return rcp(new crs_graph_type(lclGraphT,
450 origGraph_->getColMap(),
451 origGraph_->getRowMap(),
452 origGraph_->getRangeMap(),
453 origGraph_->getDomainMap(),
455}
456
457//
458// Explicit instantiation macro
459//
460// Must be expanded from within the Tpetra namespace!
461//
462
463#define TPETRA_CRSGRAPHTRANSPOSER_INSTANT(LO, GO, NODE) \
464 template class CrsGraphTransposer<LO, GO, NODE>;
465
466} // namespace Tpetra
467
468#endif
Declare and define the functions Tpetra::Details::computeOffsetsFromCounts and Tpetra::computeOffsets...
Declaration and definition of functions for sorting "short" arrays of keys and corresponding values.
CrsGraphTransposer(const Teuchos::RCP< const crs_graph_type > &origGraph, const std::string &label=std::string())
Constructor that takes the graph to transpose.
Teuchos::RCP< crs_graph_type > createTransposeLocal(const Teuchos::RCP< Teuchos::ParameterList > &params=Teuchos::null)
Compute and return the transpose of the graph given to the constructor.
Teuchos::RCP< crs_graph_type > createTranspose(const Teuchos::RCP< Teuchos::ParameterList > &params=Teuchos::null)
Compute and return the transpose of the graph given to the constructor.
Teuchos::RCP< crs_graph_type > symmetrize(const Teuchos::RCP< Teuchos::ParameterList > &params=Teuchos::null)
Compute and return graph+graph^T of the graph given to the constructor.
A distributed graph accessed by rows (adjacency lists) and stored sparsely.
KokkosSparse::StaticCrsGraph< local_ordinal_type, Kokkos::LayoutLeft, device_type, void, size_t > local_graph_device_type
The type of the part of the sparse graph on each MPI process.
Struct that holds views of the contents of a CrsMatrix.
Namespace Tpetra contains the class and methods constituting the Tpetra library.
void sort(View &view, const size_t &size)
Convenience wrapper for std::sort for host-accessible views.