Tpetra parallel linear algebra Version of the Day
Loading...
Searching...
No Matches
Tpetra_Details_crsUtils.hpp
Go to the documentation of this file.
1// @HEADER
2// *****************************************************************************
3// Tpetra: Templated Linear Algebra Services Package
4//
5// Copyright 2008 NTESS and the Tpetra contributors.
6// SPDX-License-Identifier: BSD-3-Clause
7// *****************************************************************************
8// @HEADER
9
10#ifndef TPETRA_DETAILS_CRSUTILS_HPP
11#define TPETRA_DETAILS_CRSUTILS_HPP
12#include <numeric>
13#include <type_traits>
14
15#include "TpetraCore_config.h"
16#include "Kokkos_Core.hpp"
18#include "Tpetra_Details_CrsPadding.hpp"
19#include "Tpetra_Details_WrappedDualView.hpp"
20#include <iostream>
21#include <memory>
22#include <unordered_map>
23
29
30namespace Tpetra {
31namespace Details {
32
33namespace impl {
34
35template <class ViewType>
36ViewType
37make_uninitialized_view(
38 const std::string& name,
39 const size_t size,
40 const bool verbose,
41 const std::string* const prefix) {
42 if (verbose) {
43 std::ostringstream os;
44 os << *prefix << "Allocate Kokkos::View " << name
45 << ": " << size << std::endl;
46 std::cerr << os.str();
47 }
48 using Kokkos::view_alloc;
49 using Kokkos::WithoutInitializing;
50 return ViewType(view_alloc(name, WithoutInitializing), size);
51}
52
53template <class ViewType>
54ViewType
55make_initialized_view(
56 const std::string& name,
57 const size_t size,
58 const bool verbose,
59 const std::string* const prefix) {
60 if (verbose) {
61 std::ostringstream os;
62 os << *prefix << "Allocate & initialize Kokkos::View "
63 << name << ": " << size << std::endl;
64 std::cerr << os.str();
65 }
66 return ViewType(name, size);
67}
68
69template <class OutViewType, class InViewType>
70void assign_to_view(OutViewType& out,
71 const InViewType& in,
72 const char viewName[],
73 const bool verbose,
74 const std::string* const prefix) {
75 if (verbose) {
76 std::ostringstream os;
77 os << *prefix << "Assign to Kokkos::View " << viewName
78 << ": Old size: " << out.extent(0)
79 << ", New size: " << in.extent(0) << std::endl;
80 std::cerr << os.str();
81 }
82 out = in;
83}
84
85template <class MemorySpace, class ViewType>
86auto create_mirror_view(
87 const MemorySpace& memSpace,
88 const ViewType& view,
89 const bool verbose,
90 const std::string* const prefix) -> decltype(Kokkos::create_mirror_view(memSpace, view)) {
91 if (verbose) {
92 std::ostringstream os;
93 os << *prefix << "Create mirror view: "
94 << "view.extent(0): " << view.extent(0) << std::endl;
95 std::cerr << os.str();
96 }
97 return Kokkos::create_mirror_view(memSpace, view);
98}
99
100enum class PadCrsAction {
101 INDICES_ONLY,
102 INDICES_AND_VALUES
103};
104
113template <class RowPtr, class Indices, class Values, class Padding>
115 const PadCrsAction action,
116 const RowPtr& row_ptr_beg,
117 const RowPtr& row_ptr_end,
120 const Padding& padding,
121 const int my_rank,
122 const bool verbose) {
123 using execution_space = typename Indices::t_dev::execution_space;
124 using Kokkos::view_alloc;
125 using Kokkos::WithoutInitializing;
126 using std::endl;
127 std::unique_ptr<std::string> prefix;
128
129 const size_t maxNumToPrint = verbose ? Behavior::verbosePrintCountThreshold() : size_t(0);
130 if (verbose) {
131 std::ostringstream os;
132 os << "Proc " << my_rank << ": Tpetra::...::pad_crs_arrays: ";
133 prefix = std::unique_ptr<std::string>(new std::string(os.str()));
134 os << "Start" << endl;
135 std::cerr << os.str();
136 }
137 Kokkos::HostSpace hostSpace;
138
139 if (verbose) {
140 std::ostringstream os;
141 os << *prefix << "On input: ";
142 auto row_ptr_beg_h =
143 Kokkos::create_mirror_view(hostSpace, row_ptr_beg);
144 // DEEP_COPY REVIEW - NOT TESTED
145 Kokkos::deep_copy(row_ptr_beg_h, row_ptr_beg);
146 verbosePrintArray(os, row_ptr_beg_h, "row_ptr_beg before scan",
148 os << ", ";
149 auto row_ptr_end_h =
150 Kokkos::create_mirror_view(hostSpace, row_ptr_end);
151 // DEEP_COPY REVIEW - NOT TESTED
152 Kokkos::deep_copy(row_ptr_end_h, row_ptr_end);
153 verbosePrintArray(os, row_ptr_end_h, "row_ptr_end before scan",
155 os << ", indices.extent(0): " << indices_wdv.extent(0)
156 << ", values.extent(0): " << values_wdv.extent(0)
157 << ", padding: ";
158 padding.print(os);
159 os << endl;
160 std::cerr << os.str();
161 }
162
163 if (row_ptr_beg.size() == 0) {
164 if (verbose) {
165 std::ostringstream os;
166 os << *prefix << "Done; local matrix has no rows" << endl;
167 std::cerr << os.str();
168 }
169 return; // nothing to do
170 }
171
172 const size_t lclNumRows(row_ptr_beg.size() - 1);
175 verbose, prefix.get());
176 if (verbose) {
177 std::ostringstream os;
178 os << *prefix << "Fill newAllocPerRow & compute increase" << endl;
179 std::cerr << os.str();
180 }
181 size_t increase = 0;
182 {
183 // Must do on host because padding uses std::map
184 execution_space exec_space_instance = execution_space();
185 auto row_ptr_end_h = create_mirror_view(
186 hostSpace, row_ptr_end, verbose, prefix.get());
187 // DEEP_COPY REVIEW - DEVICE-TO-HOSTMIRROR
188 Kokkos::deep_copy(exec_space_instance, row_ptr_end_h, row_ptr_end);
189 auto row_ptr_beg_h = create_mirror_view(
190 hostSpace, row_ptr_beg, verbose, prefix.get());
191 // DEEP_COPY REVIEW - DEVICE-TO-HOSTMIRROR
192 Kokkos::deep_copy(exec_space_instance, row_ptr_beg_h, row_ptr_beg);
193
194 // lbv 03/15/23: The execution space deep_copy does an asynchronous
195 // copy so we really want to fence that space before touching the
196 // host data as it is not guarenteed to have arrived by the time we
197 // start the parallel_reduce below which might use a different
198 // execution space, see:
199 // https://kokkos.github.io/kokkos-core-wiki/API/core/view/deep_copy.html#semantics
200 exec_space_instance.fence();
201
202 auto newAllocPerRow_h = create_mirror_view(
203 hostSpace, newAllocPerRow, verbose, prefix.get());
204 using host_range_type = Kokkos::RangePolicy<
205 Kokkos::DefaultHostExecutionSpace, size_t>;
206 Kokkos::parallel_reduce(
207 "Tpetra::CrsGraph: Compute new allocation size per row",
209 [&](const size_t lclRowInd, size_t& lclIncrease) {
210 const size_t start = row_ptr_beg_h[lclRowInd];
211 const size_t end = row_ptr_beg_h[lclRowInd + 1];
212 TEUCHOS_ASSERT(end >= start);
213 const size_t oldAllocSize = end - start;
214 const size_t oldNumEnt = row_ptr_end_h[lclRowInd] - start;
216
217 // This is not a pack routine. Do not shrink! Shrinking now
218 // to fit the number of entries would ignore users' hint for
219 // the max number of entries in each row. Also, CrsPadding
220 // only counts entries and ignores any available free space.
221
222 auto result = padding.get_result(lclRowInd);
223 const size_t newNumEnt = oldNumEnt + result.numInSrcNotInTgt;
224 if (newNumEnt > oldAllocSize) {
227 } else {
229 }
230 },
231 increase);
232
233 if (verbose) {
234 std::ostringstream os;
235 os << *prefix << "increase: " << increase << ", ";
236 verbosePrintArray(os, newAllocPerRow_h, "newAllocPerRow",
238 os << endl;
239 std::cerr << os.str();
240 }
241
242 if (increase == 0) {
243 return;
244 }
245 // DEEP_COPY REVIEW - HOSTMIRROR-TO-DEVICE
246 Kokkos::deep_copy(execution_space(), newAllocPerRow, newAllocPerRow_h);
247 }
248
249 using inds_value_type =
250 typename Indices::t_dev::non_const_value_type;
251 using vals_value_type = typename Values::t_dev::non_const_value_type;
252
253 {
254 auto indices_old = indices_wdv.getDeviceView(Access::ReadOnly);
255 const size_t newIndsSize = size_t(indices_old.size()) + increase;
257 "Tpetra::CrsGraph column indices", newIndsSize, verbose,
258 prefix.get());
259
260 typename Values::t_dev values_new;
261 auto values_old = values_wdv.getDeviceView(Access::ReadOnly);
262 if (action == PadCrsAction::INDICES_AND_VALUES) {
263 const size_t newValsSize = newIndsSize;
264 // NOTE (mfh 10 Feb 2020) If we don't initialize values_new here,
265 // then the CrsMatrix tests fail.
267 "Tpetra::CrsMatrix values", newValsSize, verbose, prefix.get());
268 }
269
270 if (verbose) {
271 std::ostringstream os;
272 os << *prefix << "Repack" << endl;
273 std::cerr << os.str();
274 }
275
276 using range_type = Kokkos::RangePolicy<execution_space, size_t>;
277 Kokkos::parallel_scan(
278 "Tpetra::CrsGraph or CrsMatrix repack",
279 range_type(size_t(0), size_t(lclNumRows + 1)),
280 KOKKOS_LAMBDA(const size_t lclRow, size_t& newRowBeg,
281 const bool finalPass) {
282 // row_ptr_beg has lclNumRows + 1 entries.
283 // row_ptr_end has lclNumRows entries.
284 // newAllocPerRow has lclNumRows entries.
285 const size_t row_beg = row_ptr_beg[lclRow];
286 const size_t row_end =
288 const size_t numEnt = row_end - row_beg;
289 const size_t newRowAllocSize =
291 if (finalPass) {
292 if (lclRow < lclNumRows) {
293 const Kokkos::pair<size_t, size_t> oldRange(
295 const Kokkos::pair<size_t, size_t> newRange(
297 auto oldColInds = Kokkos::subview(indices_old, oldRange);
298 auto newColInds = Kokkos::subview(indices_new, newRange);
299 // memcpy works fine on device; the next step is to
300 // introduce two-level parallelism and use team copy.
301 memcpy(newColInds.data(), oldColInds.data(),
302 numEnt * sizeof(inds_value_type));
303 if (action == PadCrsAction::INDICES_AND_VALUES) {
304 auto oldVals =
305 Kokkos::subview(values_old, oldRange);
306 auto newVals = Kokkos::subview(values_new, newRange);
307 memcpy((void*)newVals.data(), oldVals.data(),
308 numEnt * sizeof(vals_value_type));
309 }
310 }
311 // It's the final pass, so we can modify these arrays.
313 if (lclRow < lclNumRows) {
315 }
316 }
318 });
319
320 if (verbose) {
321 std::ostringstream os;
322
323 os << *prefix;
324 auto row_ptr_beg_h =
325 Kokkos::create_mirror_view(hostSpace, row_ptr_beg);
326 // DEEP_COPY REVIEW - NOT TESTED
327 Kokkos::deep_copy(row_ptr_beg_h, row_ptr_beg);
328 verbosePrintArray(os, row_ptr_beg_h, "row_ptr_beg after scan",
330 os << endl;
331
332 os << *prefix;
333 auto row_ptr_end_h =
334 Kokkos::create_mirror_view(hostSpace, row_ptr_end);
335 // DEEP_COPY REVIEW - NOT TESTED
336 Kokkos::deep_copy(row_ptr_end_h, row_ptr_end);
337 verbosePrintArray(os, row_ptr_end_h, "row_ptr_end after scan",
339 os << endl;
340
341 std::cout << os.str();
342 }
343
346 }
347
348 if (verbose) {
349 auto indices_h = indices_wdv.getHostView(Access::ReadOnly);
350 auto values_h = values_wdv.getHostView(Access::ReadOnly);
351 std::ostringstream os;
352 os << "On output: ";
354 os << ", ";
356 os << ", padding: ";
357 padding.print(os);
358 os << endl;
359 }
360
361 if (verbose) {
362 std::ostringstream os;
363 os << *prefix << "Done" << endl;
364 std::cerr << os.str();
365 }
366}
367
369template <class Pointers, class InOutIndices, class InIndices, class IndexMap>
370size_t
372 typename Pointers::value_type const row,
373 Pointers const& row_ptrs,
375 size_t& num_assigned,
376 InIndices const& new_indices,
377 IndexMap&& map,
378 std::function<void(size_t const, size_t const, size_t const)> cb) {
379 if (new_indices.size() == 0) {
380 return 0;
381 }
382
383 if (cur_indices.size() == 0) {
384 // No room to insert new indices
385 return Teuchos::OrdinalTraits<size_t>::invalid();
386 }
387
388 using offset_type = typename std::decay<decltype(row_ptrs[0])>::type;
389 using ordinal_type = typename std::decay<decltype(cur_indices[0])>::type;
390
391 const offset_type start = row_ptrs[row];
392 offset_type end = start + static_cast<offset_type>(num_assigned);
393 const size_t num_avail = (row_ptrs[row + 1] < end) ? size_t(0) : row_ptrs[row + 1] - end;
394 const size_t num_new_indices = static_cast<size_t>(new_indices.size());
395 size_t num_inserted = 0;
396
398
399 // Threshold determined from test/Utils/insertCrsIndicesThreshold.cpp
400 const size_t useLookUpTableThreshold = 400;
401
403 // For rows with few nonzeros, can use a serial search to find duplicates
404 // Or if inserting only one index, serial search is as fast as anything else
405 for (size_t k = 0; k < num_new_indices; ++k) {
406 const ordinal_type idx = std::forward<IndexMap>(map)(new_indices[k]);
407 offset_type row_offset = start;
408 for (; row_offset < end; ++row_offset) {
409 if (idx == cur_indices[row_offset]) {
410 break;
411 }
412 }
413
414 if (row_offset == end) {
415 if (num_inserted >= num_avail) { // not enough room
416 return Teuchos::OrdinalTraits<size_t>::invalid();
417 }
418 // This index is not yet in indices
419 cur_indices[end++] = idx;
420 num_inserted++;
421 }
422 if (cb) {
423 cb(k, start, row_offset - start);
424 }
425 }
426 } else {
427 // For rows with many nonzeros, use a lookup table to find duplicates
428 std::unordered_map<ordinal_type, offset_type> idxLookup(numIndicesLookup);
429
430 // Put existing indices into the lookup table
431 for (size_t k = 0; k < num_assigned; k++) {
432 idxLookup[cur_indices[start + k]] = start + k;
433 }
434
435 // Check for new indices in table; insert if not there yet
436 for (size_t k = 0; k < num_new_indices; k++) {
437 const ordinal_type idx = std::forward<IndexMap>(map)(new_indices[k]);
438 offset_type row_offset;
439
440 auto it = idxLookup.find(idx);
441 if (it == idxLookup.end()) {
442 if (num_inserted >= num_avail) { // not enough room
443 return Teuchos::OrdinalTraits<size_t>::invalid();
444 }
445 // index not found; insert it
446 row_offset = end;
447 cur_indices[end++] = idx;
448 idxLookup[idx] = row_offset;
449 num_inserted++;
450 } else {
451 // index found; note its position
452 row_offset = it->second;
453 }
454 if (cb) {
455 cb(k, start, row_offset - start);
456 }
457 }
458 }
460 return num_inserted;
461}
462
464template <class Pointers, class Indices1, class Indices2, class IndexMap, class Callback>
465size_t
467 typename Pointers::value_type const row,
468 Pointers const& row_ptrs,
469 const size_t curNumEntries,
470 Indices1 const& cur_indices,
471 Indices2 const& new_indices,
472 IndexMap&& map,
473 Callback&& cb) {
474 if (new_indices.size() == 0)
475 return 0;
476
477 using ordinal =
478 typename std::remove_const<typename Indices1::value_type>::type;
479 auto invalid_ordinal = Teuchos::OrdinalTraits<ordinal>::invalid();
480
481 const size_t start = static_cast<size_t>(row_ptrs[row]);
482 const size_t end = start + curNumEntries;
483 size_t num_found = 0;
484 for (size_t k = 0; k < new_indices.size(); k++) {
485 auto row_offset = start;
486 auto idx = std::forward<IndexMap>(map)(new_indices[k]);
487 if (idx == invalid_ordinal)
488 continue;
489 for (; row_offset < end; row_offset++) {
490 if (idx == cur_indices[row_offset]) {
491 std::forward<Callback>(cb)(k, start, row_offset - start);
492 num_found++;
493 }
494 }
495 }
496 return num_found;
497}
498
499} // namespace impl
500
518template <class RowPtr, class Indices, class Padding>
520 const RowPtr& rowPtrBeg,
521 const RowPtr& rowPtrEnd,
523 const Padding& padding,
524 const int my_rank,
525 const bool verbose) {
526 using impl::pad_crs_arrays;
527 // send empty values array
530 impl::PadCrsAction::INDICES_ONLY, rowPtrBeg, rowPtrEnd,
532}
533
534template <class RowPtr, class Indices, class Values, class Padding>
535void padCrsArrays(
536 const RowPtr& rowPtrBeg,
537 const RowPtr& rowPtrEnd,
540 const Padding& padding,
541 const int my_rank,
542 const bool verbose) {
543 using impl::pad_crs_arrays;
545 impl::PadCrsAction::INDICES_AND_VALUES, rowPtrBeg, rowPtrEnd,
547}
548
594template <class Pointers, class InOutIndices, class InIndices>
595size_t
597 typename Pointers::value_type const row,
598 Pointers const& rowPtrs,
600 size_t& numAssigned,
601 InIndices const& newIndices,
602 std::function<void(const size_t, const size_t, const size_t)> cb =
603 std::function<void(const size_t, const size_t, const size_t)>()) {
604 static_assert(std::is_same<typename std::remove_const<typename InOutIndices::value_type>::type,
605 typename std::remove_const<typename InIndices::value_type>::type>::value,
606 "Expected views to have same value type");
607
608 // Provide a unit map for the more general insert_indices
609 using ordinal = typename InOutIndices::value_type;
610 auto numInserted = impl::insert_crs_indices(
611 row, rowPtrs, curIndices,
612 numAssigned, newIndices, [](ordinal const idx) { return idx; }, cb);
613 return numInserted;
614}
615
616template <class Pointers, class InOutIndices, class InIndices>
617size_t
619 typename Pointers::value_type const row,
620 Pointers const& rowPtrs,
622 size_t& numAssigned,
623 InIndices const& newIndices,
624 std::function<typename InOutIndices::value_type(const typename InIndices::value_type)> map,
625 std::function<void(const size_t, const size_t, const size_t)> cb =
626 std::function<void(const size_t, const size_t, const size_t)>()) {
627 auto numInserted = impl::insert_crs_indices(row, rowPtrs, curIndices,
629 return numInserted;
630}
631
661template <class Pointers, class Indices1, class Indices2, class Callback>
662size_t
664 typename Pointers::value_type const row,
665 Pointers const& rowPtrs,
666 const size_t curNumEntries,
667 Indices1 const& curIndices,
668 Indices2 const& newIndices,
669 Callback&& cb) {
670 static_assert(std::is_same<typename std::remove_const<typename Indices1::value_type>::type,
671 typename std::remove_const<typename Indices2::value_type>::type>::value,
672 "Expected views to have same value type");
673 // Provide a unit map for the more general find_crs_indices
674 using ordinal = typename Indices2::value_type;
675 auto numFound = impl::find_crs_indices(
677 [=](ordinal ind) { return ind; }, cb);
678 return numFound;
679}
680
681template <class Pointers, class Indices1, class Indices2, class IndexMap, class Callback>
682size_t
684 typename Pointers::value_type const row,
685 Pointers const& rowPtrs,
686 const size_t curNumEntries,
687 Indices1 const& curIndices,
688 Indices2 const& newIndices,
689 IndexMap&& map,
690 Callback&& cb) {
691 return impl::find_crs_indices(row, rowPtrs, curNumEntries, curIndices, newIndices, map, cb);
692}
693
694} // namespace Details
695} // namespace Tpetra
696
697#endif // TPETRA_DETAILS_CRSUTILS_HPP
Declaration of Tpetra::Details::Behavior, a class that describes Tpetra's behavior.
void pad_crs_arrays(const PadCrsAction action, const RowPtr &row_ptr_beg, const RowPtr &row_ptr_end, Indices &indices_wdv, Values &values_wdv, const Padding &padding, const int my_rank, const bool verbose)
Implementation of padCrsArrays.
size_t find_crs_indices(typename Pointers::value_type const row, Pointers const &row_ptrs, const size_t curNumEntries, Indices1 const &cur_indices, Indices2 const &new_indices, IndexMap &&map, Callback &&cb)
Implementation of findCrsIndices.
size_t insert_crs_indices(typename Pointers::value_type const row, Pointers const &row_ptrs, InOutIndices &cur_indices, size_t &num_assigned, InIndices const &new_indices, IndexMap &&map, std::function< void(size_t const, size_t const, size_t const)> cb)
Implementation of insertCrsIndices.
Struct that holds views of the contents of a CrsMatrix.
static size_t verbosePrintCountThreshold()
Number of entries below which arrays, lists, etc. will be printed in debug mode.
Implementation details of Tpetra.
void padCrsArrays(const RowPtr &rowPtrBeg, const RowPtr &rowPtrEnd, Indices &indices_wdv, const Padding &padding, const int my_rank, const bool verbose)
Determine if the row pointers and indices arrays need to be resized to accommodate new entries....
void verbosePrintArray(std::ostream &out, const ArrayType &x, const char name[], const size_t maxNumToPrint)
Print min(x.size(), maxNumToPrint) entries of x.
size_t insertCrsIndices(typename Pointers::value_type const row, Pointers const &rowPtrs, InOutIndices &curIndices, size_t &numAssigned, InIndices const &newIndices, std::function< void(const size_t, const size_t, const size_t)> cb=std::function< void(const size_t, const size_t, const size_t)>())
Insert new indices in to current list of indices.
size_t findCrsIndices(typename Pointers::value_type const row, Pointers const &rowPtrs, const size_t curNumEntries, Indices1 const &curIndices, Indices2 const &newIndices, Callback &&cb)
Finds offsets in to current list of indices.
Namespace Tpetra contains the class and methods constituting the Tpetra library.