Tpetra parallel linear algebra Version of the Day
Loading...
Searching...
No Matches
Tpetra_Details_crsUtils.hpp
Go to the documentation of this file.
1// @HEADER
2// *****************************************************************************
3// Tpetra: Templated Linear Algebra Services Package
4//
5// Copyright 2008 NTESS and the Tpetra contributors.
6// SPDX-License-Identifier: BSD-3-Clause
7// *****************************************************************************
8// @HEADER
9
10#ifndef TPETRA_DETAILS_CRSUTILS_HPP
11#define TPETRA_DETAILS_CRSUTILS_HPP
12#include <numeric>
13#include <type_traits>
14
15#include "TpetraCore_config.h"
16#include "Kokkos_Core.hpp"
18#include "Tpetra_Details_CrsPadding.hpp"
19#include "Tpetra_Details_WrappedDualView.hpp"
20#include <iostream>
21#include <memory>
22#include <unordered_map>
23#include <algorithm>
24
30
31namespace Tpetra {
32namespace Details {
33
34namespace impl {
35
36template <class ViewType>
37ViewType
38make_uninitialized_view(
39 const std::string& name,
40 const size_t size,
41 const bool verbose,
42 const std::string* const prefix) {
43 if (verbose) {
44 std::ostringstream os;
45 os << *prefix << "Allocate Kokkos::View " << name
46 << ": " << size << std::endl;
47 std::cerr << os.str();
48 }
49 using Kokkos::view_alloc;
50 using Kokkos::WithoutInitializing;
51 return ViewType(view_alloc(name, WithoutInitializing), size);
52}
53
54template <class ViewType>
55ViewType
56make_initialized_view(
57 const std::string& name,
58 const size_t size,
59 const bool verbose,
60 const std::string* const prefix) {
61 if (verbose) {
62 std::ostringstream os;
63 os << *prefix << "Allocate & initialize Kokkos::View "
64 << name << ": " << size << std::endl;
65 std::cerr << os.str();
66 }
67 return ViewType(name, size);
68}
69
70template <class OutViewType, class InViewType>
71void assign_to_view(OutViewType& out,
72 const InViewType& in,
73 const char viewName[],
74 const bool verbose,
75 const std::string* const prefix) {
76 if (verbose) {
77 std::ostringstream os;
78 os << *prefix << "Assign to Kokkos::View " << viewName
79 << ": Old size: " << out.extent(0)
80 << ", New size: " << in.extent(0) << std::endl;
81 std::cerr << os.str();
82 }
83 out = in;
84}
85
86template <class MemorySpace, class ViewType>
87auto create_mirror_view(
88 const MemorySpace& memSpace,
89 const ViewType& view,
90 const bool verbose,
91 const std::string* const prefix) -> decltype(Kokkos::create_mirror_view(memSpace, view)) {
92 if (verbose) {
93 std::ostringstream os;
94 os << *prefix << "Create mirror view: "
95 << "view.extent(0): " << view.extent(0) << std::endl;
96 std::cerr << os.str();
97 }
98 return Kokkos::create_mirror_view(memSpace, view);
99}
100
101enum class PadCrsAction {
102 INDICES_ONLY,
103 INDICES_AND_VALUES
104};
105
119template <class RowPtr, class Indices, class Values, class Padding>
121 const PadCrsAction action,
122 const RowPtr& row_ptr_beg,
123 const RowPtr& row_ptr_end,
126 const Padding& padding,
127 const int my_rank,
128 const bool verbose) {
129 using execution_space = typename Indices::t_dev::execution_space;
130 using Kokkos::view_alloc;
131 using Kokkos::WithoutInitializing;
132 using std::endl;
133 std::unique_ptr<std::string> prefix;
134
135 const size_t maxNumToPrint = verbose ? Behavior::verbosePrintCountThreshold() : size_t(0);
136 if (verbose) {
137 std::ostringstream os;
138 os << "Proc " << my_rank << ": Tpetra::...::pad_crs_arrays: ";
139 prefix = std::unique_ptr<std::string>(new std::string(os.str()));
140 os << "Start" << endl;
141 std::cerr << os.str();
142 }
143 Kokkos::HostSpace hostSpace;
144
145 if (verbose) {
146 std::ostringstream os;
147 os << *prefix << "On input: ";
148 auto row_ptr_beg_h =
149 Kokkos::create_mirror_view(hostSpace, row_ptr_beg);
150 // DEEP_COPY REVIEW - NOT TESTED
151 Kokkos::deep_copy(row_ptr_beg_h, row_ptr_beg);
152 verbosePrintArray(os, row_ptr_beg_h, "row_ptr_beg before scan",
154 os << ", ";
155 auto row_ptr_end_h =
156 Kokkos::create_mirror_view(hostSpace, row_ptr_end);
157 // DEEP_COPY REVIEW - NOT TESTED
158 Kokkos::deep_copy(row_ptr_end_h, row_ptr_end);
159 verbosePrintArray(os, row_ptr_end_h, "row_ptr_end before scan",
161 os << ", indices.extent(0): " << indices_wdv.extent(0)
162 << ", values.extent(0): " << values_wdv.extent(0)
163 << ", padding: ";
164 padding.print(os);
165 os << endl;
166 std::cerr << os.str();
167 }
168
169 if (row_ptr_beg.size() == 0) {
170 if (verbose) {
171 std::ostringstream os;
172 os << *prefix << "Done; local matrix has no rows" << endl;
173 std::cerr << os.str();
174 }
175 return; // nothing to do
176 }
177
178 const size_t lclNumRows(row_ptr_beg.size() - 1);
181 verbose, prefix.get());
182 if (verbose) {
183 std::ostringstream os;
184 os << *prefix << "Fill newAllocPerRow & compute increase" << endl;
185 std::cerr << os.str();
186 }
187 size_t increase = 0;
188 {
189 // Must do on host because padding uses std::map
190 execution_space exec_space_instance = execution_space();
191 auto row_ptr_end_h = create_mirror_view(
192 hostSpace, row_ptr_end, verbose, prefix.get());
193 // DEEP_COPY REVIEW - DEVICE-TO-HOSTMIRROR
194 Kokkos::deep_copy(exec_space_instance, row_ptr_end_h, row_ptr_end);
195 auto row_ptr_beg_h = create_mirror_view(
196 hostSpace, row_ptr_beg, verbose, prefix.get());
197 // DEEP_COPY REVIEW - DEVICE-TO-HOSTMIRROR
198 Kokkos::deep_copy(exec_space_instance, row_ptr_beg_h, row_ptr_beg);
199
200 // lbv 03/15/23: The execution space deep_copy does an asynchronous
201 // copy so we really want to fence that space before touching the
202 // host data as it is not guarenteed to have arrived by the time we
203 // start the parallel_reduce below which might use a different
204 // execution space, see:
205 // https://kokkos.github.io/kokkos-core-wiki/API/core/view/deep_copy.html#semantics
206 exec_space_instance.fence();
207
208 auto newAllocPerRow_h = create_mirror_view(
209 hostSpace, newAllocPerRow, verbose, prefix.get());
210 using host_range_type = Kokkos::RangePolicy<
211 Kokkos::DefaultHostExecutionSpace, size_t>;
212 Kokkos::parallel_reduce(
213 "Tpetra::CrsGraph: Compute new allocation size per row",
215 [&](const size_t lclRowInd, size_t& lclIncrease) {
216 const size_t start = row_ptr_beg_h[lclRowInd];
217 const size_t end = row_ptr_beg_h[lclRowInd + 1];
218 TEUCHOS_ASSERT(end >= start);
219 const size_t oldAllocSize = end - start;
220 const size_t oldNumEnt = row_ptr_end_h[lclRowInd] - start;
222
223 // This is not a pack routine. Do not shrink! Shrinking now
224 // to fit the number of entries would ignore users' hint for
225 // the max number of entries in each row. Also, CrsPadding
226 // only counts entries and ignores any available free space.
227
228 auto result = padding.get_result(lclRowInd);
229 const size_t newNumEnt = oldNumEnt + result.numInSrcNotInTgt;
230 if (newNumEnt > oldAllocSize) {
233 } else {
235 }
236 },
237 increase);
238
239 if (verbose) {
240 std::ostringstream os;
241 os << *prefix << "increase: " << increase << ", ";
242 verbosePrintArray(os, newAllocPerRow_h, "newAllocPerRow",
244 os << endl;
245 std::cerr << os.str();
246 }
247
248 if (increase == 0) {
249 return;
250 }
251 // DEEP_COPY REVIEW - HOSTMIRROR-TO-DEVICE
252 Kokkos::deep_copy(execution_space(), newAllocPerRow, newAllocPerRow_h);
253 }
254
255 using inds_value_type =
256 typename Indices::t_dev::non_const_value_type;
257 using vals_value_type = typename Values::t_dev::non_const_value_type;
258
259 {
260 auto indices_old = indices_wdv.getDeviceView(Access::ReadOnly);
261 const size_t newIndsSize = size_t(indices_old.size()) + increase;
263 "Tpetra::CrsGraph column indices", newIndsSize, verbose,
264 prefix.get());
265
266 typename Values::t_dev values_new;
267 auto values_old = values_wdv.getDeviceView(Access::ReadOnly);
268 if (action == PadCrsAction::INDICES_AND_VALUES) {
269 const size_t newValsSize = newIndsSize;
270 // NOTE (mfh 10 Feb 2020) If we don't initialize values_new here,
271 // then the CrsMatrix tests fail.
273 "Tpetra::CrsMatrix values", newValsSize, verbose, prefix.get());
274 }
275
276 if (verbose) {
277 std::ostringstream os;
278 os << *prefix << "Repack" << endl;
279 std::cerr << os.str();
280 }
281
282 using range_type = Kokkos::RangePolicy<execution_space, size_t>;
283 Kokkos::parallel_scan(
284 "Tpetra::CrsGraph or CrsMatrix repack",
285 range_type(size_t(0), size_t(lclNumRows + 1)),
286 KOKKOS_LAMBDA(const size_t lclRow, size_t& newRowBeg,
287 const bool finalPass) {
288 // row_ptr_beg has lclNumRows + 1 entries.
289 // row_ptr_end has lclNumRows entries.
290 // newAllocPerRow has lclNumRows entries.
291 const size_t row_beg = row_ptr_beg[lclRow];
292 const size_t row_end =
294 const size_t numEnt = row_end - row_beg;
295 const size_t newRowAllocSize =
297 if (finalPass) {
298 if (lclRow < lclNumRows) {
299 const Kokkos::pair<size_t, size_t> oldRange(
301 const Kokkos::pair<size_t, size_t> newRange(
303 auto oldColInds = Kokkos::subview(indices_old, oldRange);
304 auto newColInds = Kokkos::subview(indices_new, newRange);
305 // memcpy works fine on device; the next step is to
306 // introduce two-level parallelism and use team copy.
307 memcpy(newColInds.data(), oldColInds.data(),
308 numEnt * sizeof(inds_value_type));
309 if (action == PadCrsAction::INDICES_AND_VALUES) {
310 auto oldVals =
311 Kokkos::subview(values_old, oldRange);
312 auto newVals = Kokkos::subview(values_new, newRange);
313 memcpy((void*)newVals.data(), oldVals.data(),
314 numEnt * sizeof(vals_value_type));
315 }
316 }
317 // It's the final pass, so we can modify these arrays.
319 if (lclRow < lclNumRows) {
321 }
322 }
324 });
325
326 if (verbose) {
327 std::ostringstream os;
328
329 os << *prefix;
330 auto row_ptr_beg_h =
331 Kokkos::create_mirror_view(hostSpace, row_ptr_beg);
332 // DEEP_COPY REVIEW - NOT TESTED
333 Kokkos::deep_copy(row_ptr_beg_h, row_ptr_beg);
334 verbosePrintArray(os, row_ptr_beg_h, "row_ptr_beg after scan",
336 os << endl;
337
338 os << *prefix;
339 auto row_ptr_end_h =
340 Kokkos::create_mirror_view(hostSpace, row_ptr_end);
341 // DEEP_COPY REVIEW - NOT TESTED
342 Kokkos::deep_copy(row_ptr_end_h, row_ptr_end);
343 verbosePrintArray(os, row_ptr_end_h, "row_ptr_end after scan",
345 os << endl;
346
347 std::cout << os.str();
348 }
349
352 }
353
354 if (verbose) {
355 auto indices_h = indices_wdv.getHostView(Access::ReadOnly);
356 auto values_h = values_wdv.getHostView(Access::ReadOnly);
357 std::ostringstream os;
358 os << "On output: ";
360 os << ", ";
362 os << ", padding: ";
363 padding.print(os);
364 os << endl;
365 }
366
367 if (verbose) {
368 std::ostringstream os;
369 os << *prefix << "Done" << endl;
370 std::cerr << os.str();
371 }
372}
373
375template <class Pointers, class InOutIndices, class InIndices, class IndexMap>
376size_t
378 typename Pointers::value_type const row,
379 Pointers const& row_ptrs,
381 size_t& num_assigned,
382 InIndices const& new_indices,
383 IndexMap&& map,
384 std::function<void(size_t const, size_t const, size_t const)> cb) {
385 if (new_indices.size() == 0) {
386 return 0;
387 }
388
389 if (cur_indices.size() == 0) {
390 // No room to insert new indices
391 return Teuchos::OrdinalTraits<size_t>::invalid();
392 }
393
394 using offset_type = typename std::decay<decltype(row_ptrs[0])>::type;
395 using ordinal_type = typename std::decay<decltype(cur_indices[0])>::type;
396
397 const offset_type start = row_ptrs[row];
398 offset_type end = start + static_cast<offset_type>(num_assigned);
399 const size_t num_avail = (row_ptrs[row + 1] < end) ? size_t(0) : row_ptrs[row + 1] - end;
400 const size_t num_new_indices = static_cast<size_t>(new_indices.size());
401 size_t num_inserted = 0;
402
404
405 // Threshold determined from test/Utils/insertCrsIndicesThreshold.cpp
406 const size_t useLookUpTableThreshold = 400;
407
409 // For rows with few nonzeros, can use a serial search to find duplicates
410 // Or if inserting only one index, serial search is as fast as anything else
411 for (size_t k = 0; k < num_new_indices; ++k) {
412 const ordinal_type idx = std::forward<IndexMap>(map)(new_indices[k]);
413 offset_type row_offset = start;
414 for (; row_offset < end; ++row_offset) {
415 if (idx == cur_indices[row_offset]) {
416 break;
417 }
418 }
419
420 if (row_offset == end) {
421 if (num_inserted >= num_avail) { // not enough room
422 return Teuchos::OrdinalTraits<size_t>::invalid();
423 }
424 // This index is not yet in indices
425 cur_indices[end++] = idx;
426 num_inserted++;
427 }
428 if (cb) {
429 cb(k, start, row_offset - start);
430 }
431 }
432 } else {
433 // For rows with many nonzeros, use a lookup table to find duplicates
434 std::unordered_map<ordinal_type, offset_type> idxLookup(numIndicesLookup);
435
436 // Put existing indices into the lookup table
437 for (size_t k = 0; k < num_assigned; k++) {
438 idxLookup[cur_indices[start + k]] = start + k;
439 }
440
441 // Check for new indices in table; insert if not there yet
442 for (size_t k = 0; k < num_new_indices; k++) {
443 const ordinal_type idx = std::forward<IndexMap>(map)(new_indices[k]);
444 offset_type row_offset;
445
446 auto it = idxLookup.find(idx);
447 if (it == idxLookup.end()) {
448 if (num_inserted >= num_avail) { // not enough room
449 return Teuchos::OrdinalTraits<size_t>::invalid();
450 }
451 // index not found; insert it
452 row_offset = end;
453 cur_indices[end++] = idx;
454 idxLookup[idx] = row_offset;
455 num_inserted++;
456 } else {
457 // index found; note its position
458 row_offset = it->second;
459 }
460 if (cb) {
461 cb(k, start, row_offset - start);
462 }
463 }
464 }
466 return num_inserted;
467}
468
470template <class Pointers, class Indices1, class Indices2, class IndexMap, class Callback>
471size_t
473 typename Pointers::value_type const row,
474 Pointers const& row_ptrs,
475 const size_t curNumEntries,
476 Indices1 const& cur_indices,
477 Indices2 const& new_indices,
478 IndexMap&& map,
479 Callback&& cb) {
480 if (new_indices.size() == 0)
481 return 0;
482
483 using ordinal =
484 typename std::remove_const<typename Indices1::value_type>::type;
485 auto invalid_ordinal = Teuchos::OrdinalTraits<ordinal>::invalid();
486
487 const size_t start = static_cast<size_t>(row_ptrs[row]);
488 const size_t end = start + curNumEntries;
489 size_t num_found = 0;
490 for (size_t k = 0; k < new_indices.size(); k++) {
491 auto idx = std::forward<IndexMap>(map)(new_indices[k]);
492 if (idx == invalid_ordinal)
493 continue;
494 for (size_t row_offset = start; row_offset < end; row_offset++) {
495 size_t off = row_offset - start;
497 if (idx == lidx) {
498 std::forward<Callback>(cb)(k, start, off);
499 num_found++;
500 }
501 }
502 }
503 return num_found;
504}
505
507template <class Pointers, class Indices1, class Indices2, class IndexMap, class Callback>
509 typename Pointers::value_type const row,
510 Pointers const& row_ptrs,
511 const size_t curNumEntries,
512 Indices1 const& cur_indices,
513 Indices2 const& new_indices,
514 IndexMap&& map,
515 Callback&& cb) {
516 if (new_indices.size() == 0)
517 return 0;
518
519 using ordinal = typename std::remove_const<typename Indices1::value_type>::type;
520 auto invalid_ordinal = Teuchos::OrdinalTraits<ordinal>::invalid();
521
522 const size_t start = static_cast<size_t>(row_ptrs[row]);
523 size_t num_found = 0;
524 for (size_t k = 0; k < new_indices.size(); k++) {
525 auto idx = std::forward<IndexMap>(map)(new_indices[k]);
526 if (idx == invalid_ordinal)
527 continue;
528
529 // FIXME use kokkos findRelOffset
530 auto first = &cur_indices[start];
531 auto first0 = first;
532 auto last = first + curNumEntries * cur_indices.stride(0);
533 first = std::lower_bound(first, last, idx);
534 size_t off = first - first0;
535 if (first != last && !(idx < *first)) {
536 std::forward<Callback>(cb)(k, start, off);
537 num_found++;
538 }
539 }
540 return num_found;
541}
542
543} // namespace impl
544
564template <class RowPtr, class Indices, class Padding>
566 const RowPtr& rowPtrBeg,
567 const RowPtr& rowPtrEnd,
569 const Padding& padding,
570 const int my_rank,
571 const bool verbose) {
572 using impl::pad_crs_arrays;
573 // send empty values array
576 impl::PadCrsAction::INDICES_ONLY, rowPtrBeg, rowPtrEnd,
578}
579
580template <class RowPtr, class Indices, class Values, class Padding>
581void padCrsArrays(
582 const RowPtr& rowPtrBeg,
583 const RowPtr& rowPtrEnd,
586 const Padding& padding,
587 const int my_rank,
588 const bool verbose) {
589 using impl::pad_crs_arrays;
591 impl::PadCrsAction::INDICES_AND_VALUES, rowPtrBeg, rowPtrEnd,
593}
594
639template <class Pointers, class InOutIndices, class InIndices>
640size_t
642 typename Pointers::value_type const row,
643 Pointers const& rowPtrs,
645 size_t& numAssigned,
646 InIndices const& newIndices,
647 std::function<void(const size_t, const size_t, const size_t)> cb =
648 std::function<void(const size_t, const size_t, const size_t)>()) {
649 static_assert(std::is_same<typename std::remove_const<typename InOutIndices::value_type>::type,
650 typename std::remove_const<typename InIndices::value_type>::type>::value,
651 "Expected views to have same value type");
652
653 // Provide a unit map for the more general insert_indices
654 using ordinal = typename InOutIndices::value_type;
655 auto numInserted = impl::insert_crs_indices(
656 row, rowPtrs, curIndices,
657 numAssigned, newIndices, [](ordinal const idx) { return idx; }, cb);
658 return numInserted;
659}
660
661template <class Pointers, class InOutIndices, class InIndices>
662size_t
664 typename Pointers::value_type const row,
665 Pointers const& rowPtrs,
667 size_t& numAssigned,
668 InIndices const& newIndices,
669 std::function<typename InOutIndices::value_type(const typename InIndices::value_type)> map,
670 std::function<void(const size_t, const size_t, const size_t)> cb =
671 std::function<void(const size_t, const size_t, const size_t)>()) {
672 auto numInserted = impl::insert_crs_indices(row, rowPtrs, curIndices,
674 return numInserted;
675}
676
706template <class Pointers, class Indices1, class Indices2, class Callback>
707size_t
709 typename Pointers::value_type const row,
710 Pointers const& rowPtrs,
711 const size_t curNumEntries,
712 Indices1 const& curIndices,
713 Indices2 const& newIndices,
714 Callback&& cb) {
715 static_assert(std::is_same<typename std::remove_const<typename Indices1::value_type>::type,
716 typename std::remove_const<typename Indices2::value_type>::type>::value,
717 "Expected views to have same value type");
718 // Provide a unit map for the more general find_crs_indices
719 using ordinal = typename Indices2::value_type;
720 auto numFound = impl::find_crs_indices(
722 [=](ordinal ind) { return ind; }, cb);
723 return numFound;
724}
725
726template <class Pointers, class Indices1, class Indices2, class IndexMap, class Callback>
727size_t
729 typename Pointers::value_type const row,
730 Pointers const& rowPtrs,
731 const size_t curNumEntries,
732 Indices1 const& curIndices,
733 Indices2 const& newIndices,
734 IndexMap&& map,
735 Callback&& cb) {
736 return impl::find_crs_indices(row, rowPtrs, curNumEntries, curIndices, newIndices, map, cb);
737}
738
739template <class Pointers, class Indices1, class Indices2, class IndexMap, class Callback>
740size_t findCrsIndicesSorted(
741 typename Pointers::value_type const row,
742 Pointers const& rowPtrs,
743 const size_t curNumEntries,
744 Indices1 const& curIndices,
745 Indices2 const& newIndices,
746 IndexMap&& map,
747 Callback&& cb) {
748 return impl::find_crs_indices_sorted(row, rowPtrs, curNumEntries, curIndices, newIndices, map, cb);
749}
750
751} // namespace Details
752} // namespace Tpetra
753
754#endif // TPETRA_DETAILS_CRSUTILS_HPP
Declaration of Tpetra::Details::Behavior, a class that describes Tpetra's behavior.
void pad_crs_arrays(const PadCrsAction action, const RowPtr &row_ptr_beg, const RowPtr &row_ptr_end, Indices &indices_wdv, Values &values_wdv, const Padding &padding, const int my_rank, const bool verbose)
Implementation of padCrsArrays.
size_t find_crs_indices(typename Pointers::value_type const row, Pointers const &row_ptrs, const size_t curNumEntries, Indices1 const &cur_indices, Indices2 const &new_indices, IndexMap &&map, Callback &&cb)
Implementation of findCrsIndices.
size_t find_crs_indices_sorted(typename Pointers::value_type const row, Pointers const &row_ptrs, const size_t curNumEntries, Indices1 const &cur_indices, Indices2 const &new_indices, IndexMap &&map, Callback &&cb)
Implementation of findCrsIndices.
size_t insert_crs_indices(typename Pointers::value_type const row, Pointers const &row_ptrs, InOutIndices &cur_indices, size_t &num_assigned, InIndices const &new_indices, IndexMap &&map, std::function< void(size_t const, size_t const, size_t const)> cb)
Implementation of insertCrsIndices.
Struct that holds views of the contents of a CrsMatrix.
static size_t verbosePrintCountThreshold()
Number of entries below which arrays, lists, etc. will be printed in debug mode.
Implementation details of Tpetra.
void padCrsArrays(const RowPtr &rowPtrBeg, const RowPtr &rowPtrEnd, Indices &indices_wdv, const Padding &padding, const int my_rank, const bool verbose)
Determine if the row pointers and indices arrays need to be resized to accommodate new entries....
void verbosePrintArray(std::ostream &out, const ArrayType &x, const char name[], const size_t maxNumToPrint)
Print min(x.size(), maxNumToPrint) entries of x.
size_t insertCrsIndices(typename Pointers::value_type const row, Pointers const &rowPtrs, InOutIndices &curIndices, size_t &numAssigned, InIndices const &newIndices, std::function< void(const size_t, const size_t, const size_t)> cb=std::function< void(const size_t, const size_t, const size_t)>())
Insert new indices in to current list of indices.
size_t findCrsIndices(typename Pointers::value_type const row, Pointers const &rowPtrs, const size_t curNumEntries, Indices1 const &curIndices, Indices2 const &newIndices, Callback &&cb)
Finds offsets in to current list of indices.
Namespace Tpetra contains the class and methods constituting the Tpetra library.