Tpetra parallel linear algebra Version of the Day
Loading...
Searching...
No Matches
Tpetra_Details_crsMatrixAssembleElement.hpp
1// @HEADER
2// *****************************************************************************
3// Tpetra: Templated Linear Algebra Services Package
4//
5// Copyright 2008 NTESS and the Tpetra contributors.
6// SPDX-License-Identifier: BSD-3-Clause
7// *****************************************************************************
8// @HEADER
9
10#ifndef TPETRA_DETAILS_CRSMATRIXASSEMBLEELEMENT_HPP
11#define TPETRA_DETAILS_CRSMATRIXASSEMBLEELEMENT_HPP
12
13#include "KokkosSparse_CrsMatrix.hpp"
15#include <type_traits>
16
17namespace Tpetra {
18namespace Details {
19
57template <class SparseMatrixType,
58 class ValsViewType>
59KOKKOS_FUNCTION
60 typename SparseMatrixType::ordinal_type
62 const typename SparseMatrixType::ordinal_type lclRow,
63 const typename SparseMatrixType::ordinal_type lclColInds[],
64 const typename SparseMatrixType::ordinal_type sortPerm[],
65 const ValsViewType& vals,
66 const typename SparseMatrixType::ordinal_type numEntInInput,
67 const bool forceAtomic =
69 !std::is_same<typename SparseMatrixType::device_type::execution_space, Kokkos::Serial>::type,
70#else // NOT KOKKOS_ENABLE_SERIAL
71 false,
72#endif // KOKKOS_ENABLE_SERIAL
73 const bool checkInputIndices = true) {
74 typedef typename std::remove_const<typename SparseMatrixType::value_type>::type
75 matrix_scalar_type;
76 static_assert(std::is_same<matrix_scalar_type,
77 typename SparseMatrixType::value_type>::value,
78 "The matrix's entries must have a nonconst type.");
79 // static_assert (std::is_assignable<matrix_scalar_type,
80 // typename std::decay< decltype (A.values[0] + vals[0]) >::type>::value,
81 // "The result of adding a matrix entry and an entry of vals "
82 // "MUST be assignable to a matrix entry.");
83 typedef typename SparseMatrixType::ordinal_type LO;
84 static_assert(std::is_integral<LO>::value,
85 "SparseMatrixType::ordinal_type "
86 "must be a built-in integer type.");
87
88 // If lclRow is NOT a valid row index, this will return a view of
89 // zero entries. If checkInputIndices is true, thus, then none of
90 // the input indices will be valid in that case.
91 auto row_view = A.row(lclRow);
92 const LO numEntInRow = static_cast<LO>(row_view.length);
93 // Number of valid local column indices found, that is, the number
94 // of input indices that are valid column indices found in row
95 // lclRow of the matrix. If not checking, we just return the number
96 // of input indices.
97 LO numValid = checkInputIndices ? static_cast<LO>(0) : numEntInRow;
98
99 // Since both the matrix row and the input (after permutation) are
100 // sorted, we only need to pass once over the matrix row. 'offset'
101 // tells us the current search position in the matrix row.
102 LO offset = 0;
103 for (LO j = 0; j < numEntInInput; ++j) {
104 const LO perm_index = sortPerm[j];
105 const LO lclColInd = lclColInds[perm_index];
106 // Search linearly in the matrix row for the current index.
107 // If we ever want binary search, this would be the place.
108 while (row_view.colidx(offset) != lclColInd) {
109 ++offset;
110 }
111
112 // If we could make checkInputIndices a compile-time constant,
113 // then the compiler might not need to insert a branch here. This
114 // should help vectorization, if vectorization is possible.
115 if (checkInputIndices) {
116 if (offset != numEntInRow) {
117 // If we could make forceAtomic a compile-time constant, then
118 // the compiler might not need to insert a branch here. This
119 // should help vectorization, if vectorization is possible.
120 if (forceAtomic) {
121 Kokkos::atomic_add(&(row_view.value(offset)), vals[perm_index]);
122 } else {
123 row_view.value(offset) += vals[perm_index];
124 }
125 ++numValid;
126 }
127 } else { // don't check input indices; assume they are in the row
128 // See above note on forceAtomic.
129 if (forceAtomic) {
130 Kokkos::atomic_add(&(row_view.value(offset)), vals[perm_index]);
131 } else {
132 row_view.value(offset) += vals[perm_index];
133 }
134 }
135 }
136
137 return numValid;
138}
139
178template <class SparseMatrixType,
179 class ValsViewType>
181 typename SparseMatrixType::ordinal_type
183 const typename SparseMatrixType::ordinal_type lclRow,
184 const typename SparseMatrixType::ordinal_type lclColInds[],
185 const typename SparseMatrixType::ordinal_type sortPerm[],
186 const ValsViewType& vals,
187 const typename SparseMatrixType::ordinal_type numEntInInput,
188 const bool forceAtomic =
190 !std::is_same<typename SparseMatrixType::device_type::execution_space, Kokkos::Serial>::type,
191#else // NOT KOKKOS_ENABLE_SERIAL
192 false,
193#endif // KOKKOS_ENABLE_SERIAL
194 const bool checkInputIndices = true) {
195 typedef typename std::remove_const<typename SparseMatrixType::value_type>::type
196 matrix_scalar_type;
197 static_assert(std::is_same<matrix_scalar_type,
198 typename SparseMatrixType::value_type>::value,
199 "The matrix's entries must have a nonconst type.");
200 static_assert(std::is_assignable<matrix_scalar_type,
201 typename std::decay<decltype(A.values[0] + vals[0])>::type>::value,
202 "The result of adding a matrix entry and an entry of vals "
203 "MUST be assignable to a matrix entry.");
204 typedef typename SparseMatrixType::ordinal_type LO;
205 static_assert(std::is_integral<LO>::value,
206 "SparseMatrixType::ordinal_type "
207 "must be a built-in integer type.");
208
209 // If lclRow is NOT a valid row index, this will return a view of
210 // zero entries. If checkInputIndices is true, thus, then none of
211 // the input indices will be valid in that case.
212 auto row_view = A.row(lclRow);
213 const LO numEntInRow = static_cast<LO>(row_view.length);
214 // Number of valid local column indices found, that is, the number
215 // of input indices that are valid column indices found in row
216 // lclRow of the matrix. If not checking, we just return the number
217 // of input indices.
218 LO numValid = checkInputIndices ? static_cast<LO>(0) : numEntInRow;
219
220 // Since both the matrix row and the input (after permutation) are
221 // sorted, we only need to pass once over the matrix row. 'offset'
222 // tells us the current search position in the matrix row.
223 LO offset = 0;
224 for (LO j = 0; j < numEntInInput; ++j) {
225 const LO perm_index = sortPerm[j];
226 const LO lclColInd = lclColInds[perm_index];
227 // Search linearly in the matrix row for the current index.
228 // If we ever want binary search, this would be the place.
229 while (row_view.colidx(offset) != lclColInd) {
230 ++offset;
231 }
232
233 // If checkInputIndices were a compile-time constant, then the
234 // compiler might not need to insert a branch here. This should
235 // help vectorization, if vectorization is possible at all.
236 if (checkInputIndices) {
237 if (offset != numEntInRow) {
238 // If forceAtomic were a compile-time constant, then the
239 // compiler might not need to insert a branch here. This
240 // could help vectorization, if vectorization is possible.
241 if (forceAtomic) {
242 Kokkos::atomic_store(&(row_view.value(offset)), vals[perm_index]);
243 } else {
244 row_view.value(offset) += vals[perm_index];
245 }
246 ++numValid;
247 }
248 } else { // don't check input indices; assume they are in the row
249 // See above note on forceAtomic.
250 if (forceAtomic) {
251 Kokkos::atomic_add(&(row_view.value(offset)), vals[perm_index]);
252 } else {
253 row_view.value(offset) += vals[perm_index];
254 }
255 }
256 }
257
258 return numValid;
259}
260
312template <class SparseMatrixType,
313 class VectorViewType,
314 class RhsViewType,
315 class LhsViewType>
317 typename SparseMatrixType::ordinal_type
319 const VectorViewType& x,
320 typename SparseMatrixType::ordinal_type lids[],
321 typename SparseMatrixType::ordinal_type sortPerm[],
322 const RhsViewType& rhs,
323 const LhsViewType& lhs,
324 const bool forceAtomic =
326 !std::is_same<typename SparseMatrixType::device_type::execution_space, Kokkos::Serial>::type,
327#else // NOT KOKKOS_ENABLE_SERIAL
328 false,
329#endif // KOKKOS_ENABLE_SERIAL
330 const bool checkInputIndices = true) {
331 typedef typename std::remove_const<typename SparseMatrixType::value_type>::type
332 matrix_scalar_type;
333 typedef typename std::remove_const<typename VectorViewType::value_type>::type
335 static_assert(std::is_same<matrix_scalar_type,
336 typename SparseMatrixType::value_type>::value,
337 "The sparse output matrix A's entries must have a nonconst type.");
338 static_assert(std::is_same<vector_scalar_type,
339 typename VectorViewType::value_type>::value,
340 "The dense output vector x's entries must have a nonconst type.");
341 // static_assert (std::is_assignable<matrix_scalar_type,
342 // typename std::decay< decltype (A.values[0] + lhs(0,0)) >::type>::value,
343 // "The result of adding a sparse matrix entry and an entry of "
344 // "lhs (the dense element matrix) "
345 // "MUST be assignable to a matrix entry.");
346 // static_assert (std::is_assignable<vector_scalar_type,
347 // typename std::decay< decltype (x[0] + rhs[0]) >::type>::value,
348 // "The result of adding a vector entry and an entry of "
349 // "rhs (the dense element vector) "
350 // "MUST be assignable to a vector entry.");
351 typedef typename SparseMatrixType::ordinal_type LO;
352 static_assert(std::is_integral<LO>::value,
353 "SparseMatrixType::ordinal_type "
354 "must be a built-in integer type.");
355
356 const LO eltDim = rhs.extent(0);
357
358 // Generate sort permutation
359 for (LO i = 0; i < eltDim; ++i) {
360 sortPerm[i] = i;
361 }
363
364 LO totalNumValid = 0;
365 for (LO r = 0; r < eltDim; ++r) {
366 const LO lid = lids[r];
367 // auto lhs_r = Kokkos::subview (lhs, sortPerm[r], Kokkos::ALL ());
368 auto lhs_r = Kokkos::subview(lhs, r, Kokkos::ALL());
369
370 // This assumes that lid is always a valid row in the sparse
371 // matrix, and that the local indices in each row of the matrix
372 // are always sorted.
373 const LO curNumValid =
377 if (forceAtomic) {
378 Kokkos::atomic_add(&x(lid), rhs(sortPerm[r]));
379 } else {
380 x(lid) += rhs(sortPerm[r]);
381 }
383 }
384 return totalNumValid;
385}
386
387} // namespace Details
388} // namespace Tpetra
389
390#endif // TPETRA_DETAILS_CRSMATRIXASSEMBLEELEMENT_HPP
Declaration and definition of functions for sorting "short" arrays of keys and corresponding values.
Struct that holds views of the contents of a CrsMatrix.
Implementation details of Tpetra.
KOKKOS_FUNCTION SparseMatrixType::ordinal_type crsMatrixReplaceValues_sortedSortedLinear(const SparseMatrixType &A, const typename SparseMatrixType::ordinal_type lclRow, const typename SparseMatrixType::ordinal_type lclColInds[], const typename SparseMatrixType::ordinal_type sortPerm[], const ValsViewType &vals, const typename SparseMatrixType::ordinal_type numEntInInput, const bool forceAtomic=false, const bool checkInputIndices=true)
A(lclRow, lclColsInds[sortPerm[j]]) = vals[sortPerm[j]], for all j in 0 .. eltDim-1.
KOKKOS_FUNCTION void shellSortKeysAndValues(KeyType keys[], ValueType values[], const IndexType n)
Shellsort (yes, it's one word) the input array keys, and apply the resulting permutation to the input...
KOKKOS_FUNCTION SparseMatrixType::ordinal_type crsMatrixSumIntoValues_sortedSortedLinear(const SparseMatrixType &A, const typename SparseMatrixType::ordinal_type lclRow, const typename SparseMatrixType::ordinal_type lclColInds[], const typename SparseMatrixType::ordinal_type sortPerm[], const ValsViewType &vals, const typename SparseMatrixType::ordinal_type numEntInInput, const bool forceAtomic=false, const bool checkInputIndices=true)
A(lclRow, lclColsInds[sortPerm[j]]) += vals[sortPerm[j]], for all j in 0 .. eltDim-1.
KOKKOS_FUNCTION SparseMatrixType::ordinal_type crsMatrixAssembleElement_sortedLinear(const SparseMatrixType &A, const VectorViewType &x, typename SparseMatrixType::ordinal_type lids[], typename SparseMatrixType::ordinal_type sortPerm[], const RhsViewType &rhs, const LhsViewType &lhs, const bool forceAtomic=false, const bool checkInputIndices=true)
A(lids[j], lids[j]) += lhs(j,j) and x(lids[j]) += rhs(j), for all j in 0 .. eltDim-1.
Namespace Tpetra contains the class and methods constituting the Tpetra library.