Tpetra parallel linear algebra Version of the Day
Loading...
Searching...
No Matches
Tpetra_CrsGraphTransposer_def.hpp
1// @HEADER
2// *****************************************************************************
3// Tpetra: Templated Linear Algebra Services Package
4//
5// Copyright 2008 NTESS and the Tpetra contributors.
6// SPDX-License-Identifier: BSD-3-Clause
7// *****************************************************************************
8// @HEADER
9
10#ifndef TPETRA_CRSGRAPHTRANSPOSER_DEF_HPP
11#define TPETRA_CRSGRAPHTRANSPOSER_DEF_HPP
12
13#include "Tpetra_CrsGraph.hpp"
14#include "Tpetra_Export.hpp"
16#include "Tpetra_Details_makeColMap.hpp"
18#include "Teuchos_ParameterList.hpp"
19#include "Teuchos_TimeMonitor.hpp"
20#include "KokkosSparse_Utils.hpp"
21#include "KokkosKernels_Handle.hpp"
22#include "KokkosSparse_spadd.hpp"
24
25namespace Tpetra {
26
27template <typename GO,
28 typename LocalIndicesType,
29 typename GlobalIndicesType,
30 typename ColMapType>
31struct ConvertLocalToGlobalFunctor {
32 ConvertLocalToGlobalFunctor(
33 const LocalIndicesType& colindsOrig_,
34 const GlobalIndicesType& colindsConverted_,
35 const ColMapType& colmap_)
36 : colindsOrig(colindsOrig_)
37 , colindsConverted(colindsConverted_)
38 , colmap(colmap_) {}
39 KOKKOS_INLINE_FUNCTION void
40 operator()(const GO i) const {
41 colindsConverted(i) = colmap.getGlobalElement(colindsOrig(i));
42 }
43 LocalIndicesType colindsOrig;
44 GlobalIndicesType colindsConverted;
45 ColMapType colmap;
46};
47
48template <class LO, class GO, class LOView, class GOView, class LocalMap>
49struct ConvertGlobalToLocalFunctor {
50 ConvertGlobalToLocalFunctor(LOView& lids_, const GOView& gids_, const LocalMap localColMap_)
51 : lids(lids_)
52 , gids(gids_)
53 , localColMap(localColMap_) {}
54
55 KOKKOS_FUNCTION void operator()(const GO i) const {
56 lids(i) = localColMap.getLocalElement(gids(i));
57 }
58
59 LOView lids;
60 const GOView gids;
61 const LocalMap localColMap;
62};
63
64template <typename size_type, typename ordinal_type,
65 typename ArowptrsT, typename BrowptrsT, typename CrowptrsT,
66 typename AcolindsT, typename BcolindsT, typename CcolindsT>
67struct SortedNumericIndicesOnlyFunctor {
68 SortedNumericIndicesOnlyFunctor(const ArowptrsT& Arowptrs_,
69 const BrowptrsT& Browptrs_,
70 const CrowptrsT& Crowptrs_,
71 const AcolindsT& Acolinds_,
72 const BcolindsT& Bcolinds_,
73 const CcolindsT& Ccolinds_)
74 : Arowptrs(Arowptrs_)
75 , Browptrs(Browptrs_)
76 , Crowptrs(Crowptrs_)
77 , Acolinds(Acolinds_)
78 , Bcolinds(Bcolinds_)
79 , Ccolinds(Ccolinds_) {}
80
81 KOKKOS_INLINE_FUNCTION void operator()(const ordinal_type i) const {
82 const ordinal_type ORDINAL_MAX = KokkosKernels::ArithTraits<ordinal_type>::max();
83
84 // count the union of nonzeros in Arow and Brow
85 size_type ai = 0;
86 size_type bi = 0;
87 size_type Arowstart = Arowptrs(i);
88 size_type Arowlen = Arowptrs(i + 1) - Arowstart;
89 size_type Browstart = Browptrs(i);
90 size_type Browlen = Browptrs(i + 1) - Browstart;
91 ordinal_type Acol = (Arowlen == 0) ? ORDINAL_MAX : Acolinds(Arowstart);
92 ordinal_type Bcol = (Browlen == 0) ? ORDINAL_MAX : Bcolinds(Browstart);
93 size_type Coffset = Crowptrs(i);
94 while (Acol != ORDINAL_MAX || Bcol != ORDINAL_MAX) {
95 ordinal_type Ccol = (Acol < Bcol) ? Acol : Bcol;
96 while (Acol == Ccol) {
97 ai++;
98 if (ai == Arowlen)
99 Acol = ORDINAL_MAX;
100 else
101 Acol = Acolinds(Arowstart + ai);
102 }
103 while (Bcol == Ccol) {
104 bi++;
105 if (bi == Browlen)
106 Bcol = ORDINAL_MAX;
107 else
108 Bcol = Bcolinds(Browstart + bi);
109 }
110 Ccolinds(Coffset) = Ccol;
111 Coffset++;
112 }
113 }
114
115 const ArowptrsT Arowptrs;
116 const BrowptrsT Browptrs;
117 const CrowptrsT Crowptrs;
118 const AcolindsT Acolinds;
119 const BcolindsT Bcolinds;
120 CcolindsT Ccolinds;
121};
122
123template <typename size_type, typename ordinal_type,
124 typename ArowptrsT, typename BrowptrsT, typename CrowptrsT,
125 typename AcolindsT, typename BcolindsT, typename CcolindsT>
126struct UnsortedNumericIndicesOnlyFunctor {
127 UnsortedNumericIndicesOnlyFunctor(
128 const ArowptrsT Arowptrs_, const BrowptrsT Browptrs_, const CrowptrsT Crowptrs_,
129 const AcolindsT Acolinds_, const BcolindsT Bcolinds_, CcolindsT Ccolinds_,
130 const CcolindsT Apos_, const CcolindsT Bpos_)
131 : Arowptrs(Arowptrs_)
132 , Browptrs(Browptrs_)
133 , Crowptrs(Crowptrs_)
134 , Acolinds(Acolinds_)
135 , Bcolinds(Bcolinds_)
136 , Ccolinds(Ccolinds_)
137 , Apos(Apos_)
138 , Bpos(Bpos_) {}
139
140 KOKKOS_INLINE_FUNCTION void operator()(const ordinal_type i) const {
141 size_type CrowStart = Crowptrs(i);
142 size_type ArowStart = Arowptrs(i);
143 size_type ArowEnd = Arowptrs(i + 1);
144 size_type BrowStart = Browptrs(i);
145 size_type BrowEnd = Browptrs(i + 1);
146 // add in A entries, while setting C colinds
147 for (size_type j = ArowStart; j < ArowEnd; j++) {
148 Ccolinds(CrowStart + Apos(j)) = Acolinds(j);
149 }
150 // add in B entries, while setting C colinds
151 for (size_type j = BrowStart; j < BrowEnd; j++) {
152 Ccolinds(CrowStart + Bpos(j)) = Bcolinds(j);
153 }
154 }
155 const ArowptrsT Arowptrs;
156 const BrowptrsT Browptrs;
157 const CrowptrsT Crowptrs;
158 const AcolindsT Acolinds;
159 const BcolindsT Bcolinds;
160 CcolindsT Ccolinds;
161 const CcolindsT Apos;
162 const CcolindsT Bpos;
163};
164
165template <class LocalOrdinal,
166 class GlobalOrdinal,
167 class Node>
169 CrsGraphTransposer(const Teuchos::RCP<const crs_graph_type>& origGraph)
170 : origGraph_(origGraph) {}
171
172template <class LocalOrdinal,
173 class GlobalOrdinal,
174 class Node>
175Teuchos::RCP<CrsGraph<LocalOrdinal, GlobalOrdinal, Node> >
177 symmetrize(const Teuchos::RCP<Teuchos::ParameterList>& params) {
178 using Teuchos::RCP;
179 using device_type = typename Node::device_type;
180 using execution_space = typename device_type::execution_space;
181 using range_type = Kokkos::RangePolicy<execution_space, size_t>;
182 using local_graph_device_type = typename crs_graph_type::local_graph_device_type;
183 using impl_scalar_type = ::Tpetra::Details::DefaultTypes::scalar_type;
184 using row_ptrs_array = typename local_graph_device_type::row_map_type::non_const_type;
185 using col_inds_array = typename local_graph_device_type::entries_type::non_const_type;
186 using local_map_type = typename map_type::local_map_type;
187 using global_col_inds_array = typename Kokkos::View<GlobalOrdinal*, device_type>;
188
189 auto graph = origGraph_;
190 auto domain_map = graph->getDomainMap();
191 auto range_map = graph->getRangeMap();
192 auto row_map = graph->getRowMap();
193 auto col_map = graph->getColMap();
196
198 TEUCHOS_ASSERT(domain_map->isSameAs(*row_map));
199
200 // Do the transpose
201 RCP<crs_graph_type> graphT = createTranspose(params);
202
203 auto col_map_T = graphT->getColMap();
204 TEUCHOS_ASSERT(!col_map_T.is_null());
205 TEUCHOS_ASSERT(domain_map->isSameAs(*graphT->getDomainMap()));
206
207 bool graphSorted = graph->isSorted();
208 bool graphTSorted = graphT->isSorted();
210 bool matchingColMaps = col_map->isSameAs(*col_map_T);
211
212 auto lclGraph = graph->getLocalGraphDevice();
213 auto lclGraphT = graphT->getLocalGraphDevice();
214
215 using KKH_LO = KokkosKernels::Experimental::KokkosKernelsHandle<size_t, LocalOrdinal, impl_scalar_type,
216 typename Node::execution_space, typename Node::memory_space, typename Node::memory_space>;
217 using KKH_GO = KokkosKernels::Experimental::KokkosKernelsHandle<size_t, GlobalOrdinal, impl_scalar_type,
218 typename Node::execution_space, typename Node::memory_space, typename Node::memory_space>;
219
220 auto rowptrs = lclGraph.row_map;
221 auto rowptrsT = lclGraphT.row_map;
222 auto colinds = lclGraph.entries;
223 auto colindsT = lclGraphT.entries;
224
225 auto nrows = rowptrs.extent(0) - 1;
226 auto rowptrsSym = row_ptrs_array(Kokkos::ViewAllocateWithoutInitializing("row ptrs sym"), nrows + 1);
227
228 col_inds_array colindsSym;
229
230 if (!matchingColMaps) {
231 // convert indices of local graph to GlobalOrdinal
232 auto lclColmap = col_map->getLocalMap();
233 global_col_inds_array colindsConverted(Kokkos::ViewAllocateWithoutInitializing("colinds (converted)"), colinds.extent(0));
235 Kokkos::parallel_for("colInds (converted)", range_type(0, colinds.extent(0)), convert);
236
237 // convert indices of local graphT to GlobalOrdinal
238 auto lclColmapT = col_map_T->getLocalMap();
239 global_col_inds_array colindsTConverted(Kokkos::ViewAllocateWithoutInitializing("colindsT (converted)"), colindsT.extent(0));
241 Kokkos::parallel_for("colIndsT (converted)", range_type(0, colindsT.extent(0)), convertT);
242
243 // sum graph and graphT in GlobalOrdinal
244 KKH_GO handle;
245 handle.create_spadd_handle(false);
246 auto addHandle = handle.get_spadd_handle();
247
248 global_col_inds_array globalColindsSym;
249
250 KokkosSparse::spadd_symbolic(&handle,
251 nrows, graph->getGlobalNumCols(),
252 rowptrs, colindsConverted, rowptrsT, colindsTConverted, rowptrsSym);
253 globalColindsSym = global_col_inds_array(Kokkos::ViewAllocateWithoutInitializing("global colinds sym"), addHandle->get_c_nnz());
254
255 UnsortedNumericIndicesOnlyFunctor<
257 typename row_ptrs_array::const_type, typename row_ptrs_array::const_type, row_ptrs_array,
258 typename global_col_inds_array::const_type, typename global_col_inds_array::const_type, global_col_inds_array>
260 colindsConverted, colindsTConverted, globalColindsSym,
261 addHandle->get_a_pos(), addHandle->get_b_pos());
262 Kokkos::parallel_for("KokkosSparse::SpAdd:Numeric::InputNotSorted",
263 range_type(0, nrows), unsortedNumeric);
264
265 // build column map for graphSym
266 Tpetra::Details::makeColMap<LocalOrdinal, GlobalOrdinal, Node>(col_map_sym, domain_map, globalColindsSym);
267
268 // convert indices of local graphSym to LocalOrdinal
269 auto lclColmapSym = col_map_sym->getLocalMap();
270 colindsSym = col_inds_array("colindsSym", globalColindsSym.extent(0));
272 Kokkos::parallel_for(range_type(0, globalColindsSym.extent(0)), convertSym);
273
274 } else {
275 // sum graph and graphT in LocalOrdinal
276 KKH_LO handle;
277 handle.create_spadd_handle(sorted);
278 auto addHandle = handle.get_spadd_handle();
279
280 KokkosSparse::spadd_symbolic(&handle,
281 nrows, graph->getGlobalNumCols(),
283 colindsSym = col_inds_array(Kokkos::ViewAllocateWithoutInitializing("C colinds"), addHandle->get_c_nnz());
284
285 if (sorted) {
286 SortedNumericIndicesOnlyFunctor<
288 typename row_ptrs_array::const_type, typename row_ptrs_array::const_type, row_ptrs_array,
289 typename col_inds_array::const_type, typename col_inds_array::const_type, col_inds_array>
292 Kokkos::parallel_for("KokkosSparse::SpAdd:Numeric::InputSorted",
293 range_type(0, nrows), sortedNumeric);
294
295 } else {
296 UnsortedNumericIndicesOnlyFunctor<
298 typename row_ptrs_array::const_type, typename row_ptrs_array::const_type, row_ptrs_array,
299 typename col_inds_array::const_type, typename col_inds_array::const_type, col_inds_array>
302 addHandle->get_a_pos(), addHandle->get_b_pos());
303 Kokkos::parallel_for("KokkosSparse::SpAdd:Numeric::InputNotSorted",
304 range_type(0, nrows), unsortedNumeric);
305 }
306
307 // column map for graphSym is graph's column map
309 importer = graph->getImporter();
310 }
311
312 bool sort = true;
313 if (sort)
314 Import_Util::sortCrsEntries(rowptrsSym, colindsSym);
315
316 local_graph_device_type lclGraphSym = local_graph_device_type(colindsSym, rowptrsSym);
317
319 if (!sort) {
320 graphParams = rcp(new Teuchos::ParameterList);
321 graphParams->set("sorted", false);
322 }
323
324 return rcp(new crs_graph_type(lclGraphSym,
325 row_map,
328 range_map,
329 importer,
330 Teuchos::null,
331 graphParams));
332}
333
334template <class LocalOrdinal,
335 class GlobalOrdinal,
336 class Node>
337Teuchos::RCP<CrsGraph<LocalOrdinal, GlobalOrdinal, Node> >
339 createTranspose(const Teuchos::RCP<Teuchos::ParameterList>& params) {
340 using Teuchos::RCP;
341 // Do the local transpose
343
344 Tpetra::Details::ProfilingRegion MM("Tpetra: Transpose TAFC");
345
346 // If transGraphWithSharedRows has an exporter, that's what we
347 // want. If it doesn't, the rows aren't actually shared, and we're
348 // done!
351 transGraphWithSharedRows->getExporter();
352 if (exporter.is_null()) {
354 } else {
355 Teuchos::ParameterList labelList;
356 if (!params.is_null()) {
357 const char paramName[] = "compute global constants";
358 labelList.set(paramName, params->get(paramName, true));
359 }
360 // Use the Export object to do a fused Export and fillComplete.
361 // This always sorts the local graph after communication, so
362 // no need to set "sorted = false" in parameters.
364 Teuchos::null, Teuchos::rcpFromRef(labelList));
365 }
366}
367
368template <class LocalOrdinal,
369 class GlobalOrdinal,
370 class Node>
371Teuchos::RCP<CrsGraph<LocalOrdinal, GlobalOrdinal, Node> >
373 createTransposeLocal(const Teuchos::RCP<Teuchos::ParameterList>& params) {
374 using Teuchos::RCP;
375 using Teuchos::rcp;
376 using Teuchos::rcp_dynamic_cast;
377 using LO = LocalOrdinal;
378 using GO = GlobalOrdinal;
379 using import_type = Tpetra::Import<LO, GO, Node>;
380 using export_type = Tpetra::Export<LO, GO, Node>;
381
382 Tpetra::Details::ProfilingRegion MM("Tpetra: Transpose Local");
383
384 const bool sort = [&]() {
385 constexpr bool sortDefault = true; // see #4607 discussion
386 const char sortParamName[] = "sort";
387 return params.get() == nullptr ? sortDefault : params->get(sortParamName, sortDefault);
388 }();
389
390 using local_graph_device_type = typename crs_graph_type::local_graph_device_type;
391 local_graph_device_type lclGraph = origGraph_->getLocalGraphDevice();
392
393 // Allocate views and call the other version of transpose_graph
394 using c_rowmap_t = typename local_graph_device_type::row_map_type;
395 using c_entries_t = typename local_graph_device_type::entries_type;
396 using rowmap_t = typename local_graph_device_type::row_map_type::non_const_type;
397 using entries_t = typename local_graph_device_type::entries_type::non_const_type;
398 LocalOrdinal numCols = origGraph_->getColMap()->getLocalNumElements();
399 rowmap_t lclGraphT_rowmap("Transpose rowmap", numCols + 1);
401 Kokkos::ViewAllocateWithoutInitializing("Transpose entries"), lclGraph.entries.extent(0));
402 KokkosSparse::Impl::transpose_graph<
405 rowmap_t, typename local_graph_device_type::execution_space>(
406 lclGraph.numRows(), numCols,
407 lclGraph.row_map, lclGraph.entries,
409
410 if (sort)
411 Import_Util::sortCrsEntries(lclGraphT_rowmap, lclGraphT_entries);
412
413 // And construct the transpose local_graph_device_type
414 local_graph_device_type lclGraphT = local_graph_device_type(lclGraphT_entries, lclGraphT_rowmap);
415
416 // Prebuild the importers and exporters the no-communication way,
417 // flipping the importers and exporters around.
418 const auto origExport = origGraph_->getExporter();
419 RCP<const import_type> myImport = origExport.is_null() ? Teuchos::null : rcp(new import_type(*origExport));
420 const auto origImport = origGraph_->getImporter();
421 RCP<const export_type> myExport = origImport.is_null() ? Teuchos::null : rcp(new export_type(*origImport));
422
424 if (!sort) {
425 graphParams = rcp(new Teuchos::ParameterList);
426 graphParams->set("sorted", false);
427 }
428
429 return rcp(new crs_graph_type(lclGraphT,
430 origGraph_->getColMap(),
431 origGraph_->getRowMap(),
432 origGraph_->getRangeMap(),
433 origGraph_->getDomainMap(),
435}
436
437//
438// Explicit instantiation macro
439//
440// Must be expanded from within the Tpetra namespace!
441//
442
443#define TPETRA_CRSGRAPHTRANSPOSER_INSTANT(LO, GO, NODE) \
444 template class CrsGraphTransposer<LO, GO, NODE>;
445
446} // namespace Tpetra
447
448#endif
Declare and define the functions Tpetra::Details::computeOffsetsFromCounts and Tpetra::computeOffsets...
Declaration and definition of functions for sorting "short" arrays of keys and corresponding values.
Utility functions for packing and unpacking sparse matrix entries.
A distributed graph accessed by rows (adjacency lists) and stored sparsely.
KokkosSparse::StaticCrsGraph< local_ordinal_type, Kokkos::LayoutLeft, device_type, void, size_t > local_graph_device_type
The type of the part of the sparse graph on each MPI process.
Teuchos::RCP< crs_graph_type > createTransposeLocal(const Teuchos::RCP< Teuchos::ParameterList > &params=Teuchos::null)
Compute and return the transpose of the graph given to the constructor.
Teuchos::RCP< crs_graph_type > createTranspose(const Teuchos::RCP< Teuchos::ParameterList > &params=Teuchos::null)
Compute and return the transpose of the graph given to the constructor.
Teuchos::RCP< crs_graph_type > symmetrize(const Teuchos::RCP< Teuchos::ParameterList > &params=Teuchos::null)
Compute and return graph+graph^T of the graph given to the constructor.
CrsGraphTransposer(const Teuchos::RCP< const crs_graph_type > &origGraph)
Constructor that takes the graph to transpose.
Struct that holds views of the contents of a CrsMatrix.
Namespace Tpetra contains the class and methods constituting the Tpetra library.
void sort(View &view, const size_t &size)
Convenience wrapper for std::sort for host-accessible views.