Teuchos - Trilinos Tools Package Version of the Day
Loading...
Searching...
No Matches
Teuchos_BLAS.cpp
1// @HEADER
2// *****************************************************************************
3// Teuchos: Common Tools Package
4//
5// Copyright 2004 NTESS and the Teuchos contributors.
6// SPDX-License-Identifier: BSD-3-Clause
7// *****************************************************************************
8// @HEADER
9
10#include "Teuchos_BLAS.hpp"
12
13/* for INTEL_CXML, the second arg may need to be changed to 'one'. If so
14the appropriate declaration of one will need to be added back into
15functions that include the macro:
16*/
17
18namespace {
19#if defined (INTEL_CXML)
20 unsigned int one=1;
21#endif
22} // namespace
23
24#ifdef CHAR_MACRO
25#undef CHAR_MACRO
26#endif
27#if defined (INTEL_CXML)
28#define CHAR_MACRO(char_var) &char_var, one
29#else
30#define CHAR_MACRO(char_var) &char_var
31#endif
32
33
34const char Teuchos::ESideChar[] = {'L' , 'R' };
35const char Teuchos::ETranspChar[] = {'N' , 'T' , 'C' };
36const char Teuchos::EUploChar[] = {'U' , 'L' };
37const char Teuchos::EDiagChar[] = {'U' , 'N' };
38const char Teuchos::ETypeChar[] = {'G' , 'L', 'U', 'H', 'B', 'Q', 'Z' };
39//const char Teuchos::EFactChar[] = {'F', 'N' };
40//const char Teuchos::ENormChar[] = {'O', 'I' };
41//const char Teuchos::ECompQChar[] = {'N', 'I', 'V' };
42//const char Teuchos::EJobChar[] = {'E', 'V', 'B' };
43//const char Teuchos::EJobSChar[] = {'E', 'S' };
44//const char Teuchos::EJobVSChar[] = {'V', 'N' };
45//const char Teuchos::EHowmnyChar[] = {'A', 'S' };
46//const char Teuchos::ECMachChar[] = {'E', 'S', 'B', 'P', 'N', 'R', 'M', 'U', 'L', 'O' };
47//const char Teuchos::ESortChar[] = {'N', 'S'};
48
49
50namespace {
51
52
53template<typename Scalar>
54Scalar generic_dot(const int& n, const Scalar* x, const int& incx,
55 const Scalar* y, const int& incy)
56{
58 Scalar dot = 0.0;
59 if (incx==1 && incy==1) {
60 for (int i = 0; i < n; ++i)
61 dot += (*x++)*ST::conjugate(*y++);
62 }
63 else {
64 if (incx < 0)
65 x = x - incx*(n-1);
66 if (incy < 0)
67 y = y - incy*(n-1);
68 for (int i = 0; i < n; ++i, x+=incx, y+=incy)
69 dot += (*x)*ST::conjugate(*y);
70 }
71 return dot;
72}
73
74
75} // namespace
76
77
78namespace Teuchos {
79
80//Explicitly instantiating these templates for windows due to an issue with
81//resolving them when linking dlls.
82#ifdef _MSC_VER
83# ifdef HAVE_TEUCHOS_COMPLEX
84 template class BLAS<long int, std::complex<float> >;
85 template class BLAS<long int, std::complex<double> >;
86# endif
87 template class BLAS<long int, float>;
88 template class BLAS<long int, double>;
89#endif
90
91 // *************************** BLAS<int,float> DEFINITIONS ******************************
92
93 void BLAS<int, float>::ROTG(float* da, float* db, float* c, float* s) const
94 { SROTG_F77(da, db, c, s ); }
95
96 void BLAS<int, float>::ROT(const int& n, float* dx, const int& incx, float* dy, const int& incy, float* c, float* s) const
97 { SROT_F77(&n, dx, &incx, dy, &incy, c, s); }
98
99
100 float BLAS<int, float>::ASUM(const int& n, const float* x, const int& incx) const
101 {
102#if defined(HAVE_TEUCHOS_BLASFLOAT_APPLE_VECLIB_BUGFIX)
103 return cblas_sasum(n, x, incx);
104#elif defined(HAVE_TEUCHOS_BLASFLOAT)
105 float tmp = SASUM_F77(&n, x, &incx);
106 return tmp;
107#else
108 typedef ScalarTraits<float> ST;
109 float sum = 0.0;
110 if (incx == 1) {
111 for (int i = 0; i < n; ++i)
112 sum += ST::magnitude(*x++);
113 }
114 else {
115 for (int i = 0; i < n; ++i, x+=incx)
116 sum += ST::magnitude(*x);
117 }
118 return sum;
119#endif
120 }
121
122 void BLAS<int, float>::AXPY(const int& n, const float& alpha, const float* x, const int& incx, float* y, const int& incy) const
123 { SAXPY_F77(&n, &alpha, x, &incx, y, &incy); }
124
125 void BLAS<int, float>::COPY(const int& n, const float* x, const int& incx, float* y, const int& incy) const
126 { SCOPY_F77(&n, x, &incx, y, &incy); }
127
128 float BLAS<int, float>::DOT(const int& n, const float* x, const int& incx, const float* y, const int& incy) const
129 {
130#if defined(HAVE_TEUCHOS_BLASFLOAT_APPLE_VECLIB_BUGFIX)
131 return cblas_sdot(n, x, incx, y, incy);
132#elif defined(HAVE_TEUCHOS_BLASFLOAT)
133 return SDOT_F77(&n, x, &incx, y, &incy);
134#else
135 return generic_dot(n, x, incx, y, incy);
136#endif
137 }
138
139 int BLAS<int, float>::IAMAX(const int& n, const float* x, const int& incx) const
140 { return ISAMAX_F77(&n, x, &incx); }
141
142 float BLAS<int, float>::NRM2(const int& n, const float* x, const int& incx) const
143 {
144#if defined(HAVE_TEUCHOS_BLASFLOAT_APPLE_VECLIB_BUGFIX)
145 return cblas_snrm2(n, x, incx);
146#elif defined(HAVE_TEUCHOS_BLASFLOAT)
147 return SNRM2_F77(&n, x, &incx);
148#else
149 return ScalarTraits<float>::squareroot(generic_dot(n, x, incx, x, incx));
150#endif
151 }
152
153 void BLAS<int, float>::SCAL(const int& n, const float& alpha, float* x, const int& incx) const
154 { SSCAL_F77(&n, &alpha, x, &incx); }
155
156 void BLAS<int, float>::GEMV(ETransp trans, const int& m, const int& n, const float& alpha, const float* A, const int& lda, const float* x, const int& incx, const float& beta, float* y, const int& incy) const
157 { SGEMV_F77(CHAR_MACRO(ETranspChar[trans]), &m, &n, &alpha, A, &lda, x, &incx, &beta, y, &incy); }
158
159 void BLAS<int, float>::GER(const int& m, const int& n, const float& alpha, const float* x, const int& incx, const float* y, const int& incy, float* A, const int& lda) const
160 { SGER_F77(&m, &n, &alpha, x, &incx, y, &incy, A, &lda); }
161
162 void BLAS<int, float>::TRMV(EUplo uplo, ETransp trans, EDiag diag, const int& n, const float* A, const int& lda, float* x, const int& incx) const
163 { STRMV_F77(CHAR_MACRO(EUploChar[uplo]), CHAR_MACRO(ETranspChar[trans]), CHAR_MACRO(EDiagChar[diag]), &n, A, &lda, x, &incx); }
164
165 void BLAS<int, float>::GEMM(ETransp transa, ETransp transb, const int& m, const int& n, const int& k, const float& alpha, const float* A, const int& lda, const float* B, const int& ldb, const float& beta, float* C, const int& ldc) const
166 { SGEMM_F77(CHAR_MACRO(ETranspChar[transa]), CHAR_MACRO(ETranspChar[transb]), &m, &n, &k, &alpha, A, &lda, B, &ldb, &beta, C, &ldc); }
167
168 void BLAS<int, float>::SWAP(const int& n, float* const x, const int& incx, float* const y, const int& incy) const
169 {
170 SSWAP_F77 (&n, x, &incx, y, &incy);
171 }
172
173 void BLAS<int, float>::SYMM(ESide side, EUplo uplo, const int& m, const int& n, const float& alpha, const float* A, const int& lda, const float* B, const int& ldb, const float& beta, float* C, const int& ldc) const
174 { SSYMM_F77(CHAR_MACRO(ESideChar[side]), CHAR_MACRO(EUploChar[uplo]), &m, &n, &alpha, A, &lda, B, &ldb, &beta, C, &ldc); }
175
176 void BLAS<int, float>::SYRK(EUplo uplo, ETransp trans, const int& n, const int& k, const float& alpha, const float* A, const int& lda, const float& beta, float* C, const int& ldc) const
177 { SSYRK_F77(CHAR_MACRO(EUploChar[uplo]), CHAR_MACRO(ETranspChar[trans]), &n, &k, &alpha, A, &lda, &beta, C, &ldc); }
178
179 void BLAS<int, float>::HERK(EUplo uplo, ETransp trans, const int& n, const int& k, const float& alpha, const float* A, const int& lda, const float& beta, float* C, const int& ldc) const
180 { SSYRK_F77(CHAR_MACRO(EUploChar[uplo]), CHAR_MACRO(ETranspChar[trans]), &n, &k, &alpha, A, &lda, &beta, C, &ldc); }
181
182 void BLAS<int, float>::TRMM(ESide side, EUplo uplo, ETransp transa, EDiag diag, const int& m, const int& n, const float& alpha, const float* A, const int& lda, float* B, const int& ldb) const
183 { STRMM_F77(CHAR_MACRO(ESideChar[side]), CHAR_MACRO(EUploChar[uplo]), CHAR_MACRO(ETranspChar[transa]), CHAR_MACRO(EDiagChar[diag]), &m, &n, &alpha, A, &lda, B, &ldb); }
184
185 void BLAS<int, float>::TRSM(ESide side, EUplo uplo, ETransp transa, EDiag diag, const int& m, const int& n, const float& alpha, const float* A, const int& lda, float* B, const int& ldb) const
186 { STRSM_F77(CHAR_MACRO(ESideChar[side]), CHAR_MACRO(EUploChar[uplo]), CHAR_MACRO(ETranspChar[transa]), CHAR_MACRO(EDiagChar[diag]), &m, &n, &alpha, A, &lda, B, &ldb); }
187
188 // *************************** BLAS<int,double> DEFINITIONS ******************************
189
190 void BLAS<int, double>::ROTG(double* da, double* db, double* c, double* s) const
191 { DROTG_F77(da, db, c, s); }
192
193 void BLAS<int, double>::ROT(const int& n, double* dx, const int& incx, double* dy, const int& incy, double* c, double* s) const
194 { DROT_F77(&n, dx, &incx, dy, &incy, c, s); }
195
196 double BLAS<int, double>::ASUM(const int& n, const double* x, const int& incx) const
197 { return DASUM_F77(&n, x, &incx); }
198
199 void BLAS<int, double>::AXPY(const int& n, const double& alpha, const double* x, const int& incx, double* y, const int& incy) const
200 { DAXPY_F77(&n, &alpha, x, &incx, y, &incy); }
201
202 void BLAS<int, double>::COPY(const int& n, const double* x, const int& incx, double* y, const int& incy) const
203 { DCOPY_F77(&n, x, &incx, y, &incy); }
204
205 double BLAS<int, double>::DOT(const int& n, const double* x, const int& incx, const double* y, const int& incy) const
206 {
207 return DDOT_F77(&n, x, &incx, y, &incy);
208 }
209
210 int BLAS<int, double>::IAMAX(const int& n, const double* x, const int& incx) const
211 { return IDAMAX_F77(&n, x, &incx); }
212
213 double BLAS<int, double>::NRM2(const int& n, const double* x, const int& incx) const
214 { return DNRM2_F77(&n, x, &incx); }
215
216 void BLAS<int, double>::SCAL(const int& n, const double& alpha, double* x, const int& incx) const
217 { DSCAL_F77(&n, &alpha, x, &incx); }
218
219 void BLAS<int, double>::GEMV(ETransp trans, const int& m, const int& n, const double& alpha, const double* A, const int& lda, const double* x, const int& incx, const double& beta, double* y, const int& incy) const
220 { DGEMV_F77(CHAR_MACRO(ETranspChar[trans]), &m, &n, &alpha, A, &lda, x, &incx, &beta, y, &incy); }
221
222 void BLAS<int, double>::GER(const int& m, const int& n, const double& alpha, const double* x, const int& incx, const double* y, const int& incy, double* A, const int& lda) const
223 { DGER_F77(&m, &n, &alpha, x, &incx, y, &incy, A, &lda); }
224
225 void BLAS<int, double>::TRMV(EUplo uplo, ETransp trans, EDiag diag, const int& n, const double* A, const int& lda, double* x, const int& incx) const
226 { DTRMV_F77(CHAR_MACRO(EUploChar[uplo]), CHAR_MACRO(ETranspChar[trans]), CHAR_MACRO(EDiagChar[diag]), &n, A, &lda, x, &incx); }
227
228 void BLAS<int, double>::GEMM(ETransp transa, ETransp transb, const int& m, const int& n, const int& k, const double& alpha, const double* A, const int& lda, const double* B, const int& ldb, const double& beta, double* C, const int& ldc) const
229 { DGEMM_F77(CHAR_MACRO(ETranspChar[transa]), CHAR_MACRO(ETranspChar[transb]), &m, &n, &k, &alpha, A, &lda, B, &ldb, &beta, C, &ldc); }
230
231 void BLAS<int, double>::SWAP(const int& n, double* const x, const int& incx, double* const y, const int& incy) const
232 {
233 DSWAP_F77 (&n, x, &incx, y, &incy);
234 }
235
236 void BLAS<int, double>::SYMM(ESide side, EUplo uplo, const int& m, const int& n, const double& alpha, const double* A, const int& lda, const double* B, const int& ldb, const double& beta, double* C, const int& ldc) const
237 { DSYMM_F77(CHAR_MACRO(ESideChar[side]), CHAR_MACRO(EUploChar[uplo]), &m, &n, &alpha, A, &lda, B, &ldb, &beta, C, &ldc); }
238
239 void BLAS<int, double>::SYRK(EUplo uplo, ETransp trans, const int& n, const int& k, const double& alpha, const double* A, const int& lda, const double& beta, double* C, const int& ldc) const
240 { DSYRK_F77(CHAR_MACRO(EUploChar[uplo]), CHAR_MACRO(ETranspChar[trans]), &n, &k, &alpha, A, &lda, &beta, C, &ldc); }
241
242 void BLAS<int, double>::HERK(EUplo uplo, ETransp trans, const int& n, const int& k, const double& alpha, const double* A, const int& lda, const double& beta, double* C, const int& ldc) const
243 { DSYRK_F77(CHAR_MACRO(EUploChar[uplo]), CHAR_MACRO(ETranspChar[trans]), &n, &k, &alpha, A, &lda, &beta, C, &ldc); }
244
245 void BLAS<int, double>::TRMM(ESide side, EUplo uplo, ETransp transa, EDiag diag, const int& m, const int& n, const double& alpha, const double* A, const int& lda, double* B, const int& ldb) const
246 { DTRMM_F77(CHAR_MACRO(ESideChar[side]), CHAR_MACRO(EUploChar[uplo]), CHAR_MACRO(ETranspChar[transa]), CHAR_MACRO(EDiagChar[diag]), &m, &n, &alpha, A, &lda, B, &ldb); }
247
248 void BLAS<int, double>::TRSM(ESide side, EUplo uplo, ETransp transa, EDiag diag, const int& m, const int& n, const double& alpha, const double* A, const int& lda, double* B, const int& ldb) const
249 { DTRSM_F77(CHAR_MACRO(ESideChar[side]), CHAR_MACRO(EUploChar[uplo]), CHAR_MACRO(ETranspChar[transa]), CHAR_MACRO(EDiagChar[diag]), &m, &n, &alpha, A, &lda, B, &ldb); }
250
251#ifdef HAVE_TEUCHOS_COMPLEX
252
253 // *************************** BLAS<int,std::complex<float> > DEFINITIONS ******************************
254
255 void BLAS<int, std::complex<float> >::ROTG(std::complex<float>* da, std::complex<float>* db, float* c, std::complex<float>* s) const
256 { CROTG_F77(da, db, c, s ); }
257
258 void BLAS<int, std::complex<float> >::ROT(const int& n, std::complex<float>* dx, const int& incx, std::complex<float>* dy, const int& incy, float* c, std::complex<float>* s) const
259 { CROT_F77(&n, dx, &incx, dy, &incy, c, s); }
260
261 float BLAS<int, std::complex<float> >::ASUM(const int& n, const std::complex<float>* x, const int& incx) const
262 {
263#if defined(HAVE_TEUCHOS_BLASFLOAT_APPLE_VECLIB_BUGFIX)
264 return cblas_scasum(n, x, incx);
265#elif defined(HAVE_TEUCHOS_BLASFLOAT_DOUBLE_RETURN)
266 return (float) SCASUM_F77(&n, x, &incx);
267#elif defined(HAVE_TEUCHOS_BLASFLOAT)
268 return SCASUM_F77(&n, x, &incx);
269#else // Wow, you just plain don't have this routine.
270 // mfh 01 Feb 2013: See www.netlib.org/blas/scasum.f.
271 // I've enhanced this by accumulating in double precision.
272 double result = 0;
273 if (incx == 1) {
274 for (int i = 0; i < n; ++i) {
275 result += std::abs (std::real (x[i])) + std::abs (std::imag (x[i]));
276 }
277 } else {
278 const int nincx = n * incx;
279 for (int i = 0; i < nincx; i += incx) {
280 result += std::abs (std::real (x[i])) + std::abs (std::imag (x[i]));
281 }
282 }
283 return static_cast<float> (result);
284#endif
285 }
286
287 void BLAS<int, std::complex<float> >::AXPY(const int& n, const std::complex<float> alpha, const std::complex<float>* x, const int& incx, std::complex<float>* y, const int& incy) const
288 { CAXPY_F77(&n, &alpha, x, &incx, y, &incy); }
289
290 void BLAS<int, std::complex<float> >::COPY(const int& n, const std::complex<float>* x, const int& incx, std::complex<float>* y, const int& incy) const
291 { CCOPY_F77(&n, x, &incx, y, &incy); }
292
293 std::complex<float> BLAS<int, std::complex<float> >::DOT(const int& n, const std::complex<float>* x, const int& incx, const std::complex<float>* y, const int& incy) const
294 {
295#if defined(HAVE_TEUCHOS_BLASFLOAT_APPLE_VECLIB_BUGFIX)
296 std::complex<float> z;
297 cblas_cdotc_sub(n,x,incx,y,incy,&z);
298 return z;
299#elif defined(HAVE_COMPLEX_BLAS_PROBLEM) && defined(HAVE_FIXABLE_COMPLEX_BLAS_PROBLEM)
300 std::complex<float> z;
301 CDOT_F77(&z, &n, x, &incx, y, &incy);
302 return z;
303#elif defined(HAVE_TEUCHOS_BLASFLOAT)
304 Teuchos_Complex_float_type_name z = CDOT_F77(&n, x, &incx, y, &incy);
305 return TEUCHOS_BLAS_CONVERT_COMPLEX_FORTRAN_TO_CXX(float, z);
306#else // Wow, you just plain don't have this routine.
307 // mfh 01 Feb 2013: See www.netlib.org/blas/cdotc.f.
308 // I've enhanced this by accumulating in double precision.
309 std::complex<double> result (0, 0);
310 if (n >= 0) {
311 if (incx == 1 && incy == 1) {
312 for (int i = 0; i < n; ++i) {
313 result += std::conj (x[i]) * y[i];
314 }
315 } else {
316 int ix = 0;
317 int iy = 0;
318 if (incx < 0) {
319 ix = (1-n) * incx;
320 }
321 if (incy < 0) {
322 iy = (1-n) * incy;
323 }
324 for (int i = 0; i < n; ++i) {
325 result += std::conj (x[ix]) * y[iy];
326 ix += incx;
327 iy += incy;
328 }
329 }
330 }
331 return static_cast<std::complex<float> > (result);
332#endif
333 }
334
335 int BLAS<int, std::complex<float> >::IAMAX(const int& n, const std::complex<float>* x, const int& incx) const
336 { return ICAMAX_F77(&n, x, &incx); }
337
338 float BLAS<int, std::complex<float> >::NRM2(const int& n, const std::complex<float>* x, const int& incx) const
339 {
340#if defined(HAVE_TEUCHOS_BLASFLOAT_APPLE_VECLIB_BUGFIX)
341 return cblas_scnrm2(n, x, incx);
342#elif defined(HAVE_TEUCHOS_BLASFLOAT_DOUBLE_RETURN)
343 return (float) SCNRM2_F77(&n, x, &incx);
344#elif defined(HAVE_TEUCHOS_BLASFLOAT)
345 return SCNRM2_F77(&n, x, &incx);
346#else // Wow, you just plain don't have this routine.
347 // mfh 01 Feb 2013: See www.netlib.org/blas/scnrm2.f.
348 // I've enhanced this by accumulating in double precision.
349 if (n < 1 || incx < 1) {
350 return 0;
351 } else {
352 double scale = 0;
353 double ssq = 1;
354
355 const int upper = 1 + (n-1)*incx;
356 for (int ix = 0; ix < upper; ix += incx) {
357 // The reference BLAS implementation cleverly scales the
358 // intermediate result. so that even if the square of the norm
359 // would overflow, computing the norm itself does not. Hence,
360 // "ssq" for "scaled square root."
361 if (std::real (x[ix]) != 0) {
362 const double temp = std::abs (std::real (x[ix]));
363 if (scale < temp) {
364 const double scale_over_temp = scale / temp;
365 ssq = 1 + ssq * scale_over_temp*scale_over_temp;
366 // New scaling factor: biggest (in magnitude) real or imaginary part seen thus far.
367 scale = temp;
368 } else {
369 const double temp_over_scale = temp / scale;
370 ssq = ssq + temp_over_scale*temp_over_scale;
371 }
372 }
373 if (std::imag (x[ix]) != 0) {
374 const double temp = std::abs (std::imag (x[ix]));
375 if (scale < temp) {
376 const double scale_over_temp = scale / temp;
377 ssq = 1 + ssq * scale_over_temp*scale_over_temp;
378 // New scaling factor: biggest (in magnitude) real or imaginary part seen thus far.
379 scale = temp;
380 } else {
381 const double temp_over_scale = temp / scale;
382 ssq = ssq + temp_over_scale*temp_over_scale;
383 }
384 }
385 }
386 return static_cast<float> (scale * std::sqrt (ssq));
387 }
388#endif
389 }
390
391 void BLAS<int, std::complex<float> >::SCAL(const int& n, const std::complex<float> alpha, std::complex<float>* x, const int& incx) const
392 { CSCAL_F77(&n, &alpha, x, &incx); }
393
394 void BLAS<int, std::complex<float> >::GEMV(ETransp trans, const int& m, const int& n, const std::complex<float> alpha, const std::complex<float>* A, const int& lda, const std::complex<float>* x, const int& incx, const std::complex<float> beta, std::complex<float>* y, const int& incy) const
395 { CGEMV_F77(CHAR_MACRO(ETranspChar[trans]), &m, &n, &alpha, A, &lda, x, &incx, &beta, y, &incy); }
396
397 void BLAS<int, std::complex<float> >::GER(const int& m, const int& n, const std::complex<float> alpha, const std::complex<float>* x, const int& incx, const std::complex<float>* y, const int& incy, std::complex<float>* A, const int& lda) const
398 { CGER_F77(&m, &n, &alpha, x, &incx, y, &incy, A, &lda); }
399
400 void BLAS<int, std::complex<float> >::TRMV(EUplo uplo, ETransp trans, EDiag diag, const int& n, const std::complex<float>* A, const int& lda, std::complex<float>* x, const int& incx) const
401 { CTRMV_F77(CHAR_MACRO(EUploChar[uplo]), CHAR_MACRO(ETranspChar[trans]), CHAR_MACRO(EDiagChar[diag]), &n, A, &lda, x, &incx); }
402
403 void BLAS<int, std::complex<float> >::GEMM(ETransp transa, ETransp transb, const int& m, const int& n, const int& k, const std::complex<float> alpha, const std::complex<float>* A, const int& lda, const std::complex<float>* B, const int& ldb, const std::complex<float> beta, std::complex<float>* C, const int& ldc) const
404 { CGEMM_F77(CHAR_MACRO(ETranspChar[transa]), CHAR_MACRO(ETranspChar[transb]), &m, &n, &k, &alpha, A, &lda, B, &ldb, &beta, C, &ldc); }
405
406 void BLAS<int, std::complex<float> >::SWAP(const int& n, std::complex<float>* const x, const int& incx, std::complex<float>* const y, const int& incy) const
407 {
408 CSWAP_F77 (&n, x, &incx, y, &incy);
409 }
410
411 void BLAS<int, std::complex<float> >::SYMM(ESide side, EUplo uplo, const int& m, const int& n, const std::complex<float> alpha, const std::complex<float>* A, const int& lda, const std::complex<float>* B, const int& ldb, const std::complex<float> beta, std::complex<float>* C, const int& ldc) const
412 { CSYMM_F77(CHAR_MACRO(ESideChar[side]), CHAR_MACRO(EUploChar[uplo]), &m, &n, &alpha, A, &lda, B, &ldb, &beta, C, &ldc); }
413
414 void BLAS<int, std::complex<float> >::SYRK(EUplo uplo, ETransp trans, const int& n, const int& k, const std::complex<float> alpha, const std::complex<float>* A, const int& lda, const std::complex<float> beta, std::complex<float>* C, const int& ldc) const
415 { CSYRK_F77(CHAR_MACRO(EUploChar[uplo]), CHAR_MACRO(ETranspChar[trans]), &n, &k, &alpha, A, &lda, &beta, C, &ldc); }
416
417 void BLAS<int, std::complex<float> >::HERK(EUplo uplo, ETransp trans, const int& n, const int& k, const std::complex<float> alpha, const std::complex<float>* A, const int& lda, const std::complex<float> beta, std::complex<float>* C, const int& ldc) const
418 { CHERK_F77(CHAR_MACRO(EUploChar[uplo]), CHAR_MACRO(ETranspChar[trans]), &n, &k, &alpha, A, &lda, &beta, C, &ldc); }
419
420 void BLAS<int, std::complex<float> >::TRMM(ESide side, EUplo uplo, ETransp transa, EDiag diag, const int& m, const int& n, const std::complex<float> alpha, const std::complex<float>* A, const int& lda, std::complex<float>* B, const int& ldb) const
421 { CTRMM_F77(CHAR_MACRO(ESideChar[side]), CHAR_MACRO(EUploChar[uplo]), CHAR_MACRO(ETranspChar[transa]), CHAR_MACRO(EDiagChar[diag]), &m, &n, &alpha, A, &lda, B, &ldb); }
422
423 void BLAS<int, std::complex<float> >::TRSM(ESide side, EUplo uplo, ETransp transa, EDiag diag, const int& m, const int& n, const std::complex<float> alpha, const std::complex<float>* A, const int& lda, std::complex<float>* B, const int& ldb) const
424 { CTRSM_F77(CHAR_MACRO(ESideChar[side]), CHAR_MACRO(EUploChar[uplo]), CHAR_MACRO(ETranspChar[transa]), CHAR_MACRO(EDiagChar[diag]), &m, &n, &alpha, A, &lda, B, &ldb); }
425
426 // *************************** BLAS<int,std::complex<double> > DEFINITIONS ******************************
427
428 void BLAS<int, std::complex<double> >::ROTG(std::complex<double>* da, std::complex<double>* db, double* c, std::complex<double>* s) const
429 { ZROTG_F77(da, db, c, s); }
430
431 void BLAS<int, std::complex<double> >::ROT(const int& n, std::complex<double>* dx, const int& incx, std::complex<double>* dy, const int& incy, double* c, std::complex<double>* s) const
432 { ZROT_F77(&n, dx, &incx, dy, &incy, c, s); }
433
434 double BLAS<int, std::complex<double> >::ASUM(const int& n, const std::complex<double>* x, const int& incx) const
435 { return ZASUM_F77(&n, x, &incx); }
436
437 void BLAS<int, std::complex<double> >::AXPY(const int& n, const std::complex<double> alpha, const std::complex<double>* x, const int& incx, std::complex<double>* y, const int& incy) const
438 { ZAXPY_F77(&n, &alpha, x, &incx, y, &incy); }
439
440 void BLAS<int, std::complex<double> >::COPY(const int& n, const std::complex<double>* x, const int& incx, std::complex<double>* y, const int& incy) const
441 { ZCOPY_F77(&n, x, &incx, y, &incy); }
442
443 std::complex<double> BLAS<int, std::complex<double> >::DOT(const int& n, const std::complex<double>* x, const int& incx, const std::complex<double>* y, const int& incy) const
444 {
445#if defined(HAVE_TEUCHOS_BLASFLOAT_APPLE_VECLIB_BUGFIX)
446 std::complex<double> z;
447 cblas_zdotc_sub(n,x,incx,y,incy,&z);
448 return z;
449#elif defined(HAVE_COMPLEX_BLAS_PROBLEM)
450# if defined(HAVE_FIXABLE_COMPLEX_BLAS_PROBLEM)
451 std::complex<double> z;
452 ZDOT_F77(&z, &n, x, &incx, y, &incy);
453 return z;
454# else
455 // mfh 01 Feb 2013: Your complex BLAS is broken, but the problem
456 // doesn't have the easy workaround. I'll just reimplement the
457 // missing routine here. See www.netlib.org/blas/zdotc.f.
458 std::complex<double> ztemp (0, 0);
459 if (n > 0) {
460 if (incx == 1 && incy == 1) {
461 for (int i = 0; i < n; ++i) {
462 ztemp += std::conj (x[i]) * y[i];
463 }
464 } else {
465 int ix = 0;
466 int iy = 0;
467 if (incx < 0) {
468 ix = (1-n)*incx;
469 }
470 if (incy < 0) {
471 iy = (1-n)*incy;
472 }
473 for (int i = 0; i < n; ++i) {
474 ztemp += std::conj (x[ix]) * y[iy];
475 ix += incx;
476 iy += incy;
477 }
478 }
479 }
480 return ztemp;
481
482# endif // defined(HAVE_FIXABLE_COMPLEX_BLAS_PROBLEM)
483#else
484 Teuchos_Complex_double_type_name z = ZDOT_F77(&n, x, &incx, y, &incy);
485 return TEUCHOS_BLAS_CONVERT_COMPLEX_FORTRAN_TO_CXX(double, z);
486#endif
487 }
488
489 int BLAS<int, std::complex<double> >::IAMAX(const int& n, const std::complex<double>* x, const int& incx) const
490 { return IZAMAX_F77(&n, x, &incx); }
491
492 double BLAS<int, std::complex<double> >::NRM2(const int& n, const std::complex<double>* x, const int& incx) const
493 { return ZNRM2_F77(&n, x, &incx); }
494
495 void BLAS<int, std::complex<double> >::SCAL(const int& n, const std::complex<double> alpha, std::complex<double>* x, const int& incx) const
496 { ZSCAL_F77(&n, &alpha, x, &incx); }
497
498 void BLAS<int, std::complex<double> >::GEMV(ETransp trans, const int& m, const int& n, const std::complex<double> alpha, const std::complex<double>* A, const int& lda, const std::complex<double>* x, const int& incx, const std::complex<double> beta, std::complex<double>* y, const int& incy) const
499 { ZGEMV_F77(CHAR_MACRO(ETranspChar[trans]), &m, &n, &alpha, A, &lda, x, &incx, &beta, y, &incy); }
500
501 void BLAS<int, std::complex<double> >::GER(const int& m, const int& n, const std::complex<double> alpha, const std::complex<double>* x, const int& incx, const std::complex<double>* y, const int& incy, std::complex<double>* A, const int& lda) const
502 { ZGER_F77(&m, &n, &alpha, x, &incx, y, &incy, A, &lda); }
503
504 void BLAS<int, std::complex<double> >::TRMV(EUplo uplo, ETransp trans, EDiag diag, const int& n, const std::complex<double>* A, const int& lda, std::complex<double>* x, const int& incx) const
505 { ZTRMV_F77(CHAR_MACRO(EUploChar[uplo]), CHAR_MACRO(ETranspChar[trans]), CHAR_MACRO(EDiagChar[diag]), &n, A, &lda, x, &incx); }
506
507 void BLAS<int, std::complex<double> >::GEMM(ETransp transa, ETransp transb, const int& m, const int& n, const int& k, const std::complex<double> alpha, const std::complex<double>* A, const int& lda, const std::complex<double>* B, const int& ldb, const std::complex<double> beta, std::complex<double>* C, const int& ldc) const
508 { ZGEMM_F77(CHAR_MACRO(ETranspChar[transa]), CHAR_MACRO(ETranspChar[transb]), &m, &n, &k, &alpha, A, &lda, B, &ldb, &beta, C, &ldc); }
509
510 void BLAS<int, std::complex<double> >::SWAP(const int& n, std::complex<double>* const x, const int& incx, std::complex<double>* const y, const int& incy) const
511 {
512 ZSWAP_F77 (&n, x, &incx, y, &incy);
513 }
514
515 void BLAS<int, std::complex<double> >::SYMM(ESide side, EUplo uplo, const int& m, const int& n, const std::complex<double> alpha, const std::complex<double>* A, const int& lda, const std::complex<double> *B, const int& ldb, const std::complex<double> beta, std::complex<double> *C, const int& ldc) const
516 { ZSYMM_F77(CHAR_MACRO(ESideChar[side]), CHAR_MACRO(EUploChar[uplo]), &m, &n, &alpha, A, &lda, B, &ldb, &beta, C, &ldc); }
517
518 void BLAS<int, std::complex<double> >::SYRK(EUplo uplo, ETransp trans, const int& n, const int& k, const std::complex<double> alpha, const std::complex<double>* A, const int& lda, const std::complex<double> beta, std::complex<double>* C, const int& ldc) const
519 { ZSYRK_F77(CHAR_MACRO(EUploChar[uplo]), CHAR_MACRO(ETranspChar[trans]), &n, &k, &alpha, A, &lda, &beta, C, &ldc); }
520
521 void BLAS<int, std::complex<double> >::HERK(EUplo uplo, ETransp trans, const int& n, const int& k, const std::complex<double> alpha, const std::complex<double>* A, const int& lda, const std::complex<double> beta, std::complex<double>* C, const int& ldc) const
522 { ZHERK_F77(CHAR_MACRO(EUploChar[uplo]), CHAR_MACRO(ETranspChar[trans]), &n, &k, &alpha, A, &lda, &beta, C, &ldc); }
523
524 void BLAS<int, std::complex<double> >::TRMM(ESide side, EUplo uplo, ETransp transa, EDiag diag, const int& m, const int& n, const std::complex<double> alpha, const std::complex<double>* A, const int& lda, std::complex<double>* B, const int& ldb) const
525 { ZTRMM_F77(CHAR_MACRO(ESideChar[side]), CHAR_MACRO(EUploChar[uplo]), CHAR_MACRO(ETranspChar[transa]), CHAR_MACRO(EDiagChar[diag]), &m, &n, &alpha, A, &lda, B, &ldb); }
526
527 void BLAS<int, std::complex<double> >::TRSM(ESide side, EUplo uplo, ETransp transa, EDiag diag, const int& m, const int& n, const std::complex<double> alpha, const std::complex<double>* A, const int& lda, std::complex<double>* B, const int& ldb) const
528 { ZTRSM_F77(CHAR_MACRO(ESideChar[side]), CHAR_MACRO(EUploChar[uplo]), CHAR_MACRO(ETranspChar[transa]), CHAR_MACRO(EDiagChar[diag]), &m, &n, &alpha, A, &lda, B, &ldb); }
529
530#endif // HAVE_TEUCHOS_COMPLEX
531
532}
Templated interface class to BLAS routines.
The Templated BLAS wrappers.
OrdinalType IAMAX(const OrdinalType &n, const ScalarType *x, const OrdinalType &incx) const
Return the index of the element of x with the maximum magnitude.
void ROTG(ScalarType *da, ScalarType *db, rotg_c_type *c, ScalarType *s) const
Computes a Givens plane rotation.
ScalarType DOT(const OrdinalType &n, const x_type *x, const OrdinalType &incx, const y_type *y, const OrdinalType &incy) const
Form the dot product of the vectors x and y.
void SYMM(ESide side, EUplo uplo, const OrdinalType &m, const OrdinalType &n, const alpha_type alpha, const A_type *A, const OrdinalType &lda, const B_type *B, const OrdinalType &ldb, const beta_type beta, ScalarType *C, const OrdinalType &ldc) const
Performs the matrix-matrix operation: C <- alpha*A*B+beta*C or C <- alpha*B*A+beta*C where A is an m ...
void AXPY(const OrdinalType &n, const alpha_type alpha, const x_type *x, const OrdinalType &incx, ScalarType *y, const OrdinalType &incy) const
Perform the operation: y <- y+alpha*x.
void TRSM(ESide side, EUplo uplo, ETransp transa, EDiag diag, const OrdinalType &m, const OrdinalType &n, const alpha_type alpha, const A_type *A, const OrdinalType &lda, ScalarType *B, const OrdinalType &ldb) const
Solves the matrix equations: op(A)*X=alpha*B or X*op(A)=alpha*B where X and B are m by n matrices,...
void SYRK(EUplo uplo, ETransp trans, const OrdinalType &n, const OrdinalType &k, const alpha_type alpha, const A_type *A, const OrdinalType &lda, const beta_type beta, ScalarType *C, const OrdinalType &ldc) const
Performs the symmetric rank k operation: C <- alpha*A*A'+beta*C or C <- alpha*A'*A+beta*C,...
void GEMM(ETransp transa, ETransp transb, const OrdinalType &m, const OrdinalType &n, const OrdinalType &k, const alpha_type alpha, const A_type *A, const OrdinalType &lda, const B_type *B, const OrdinalType &ldb, const beta_type beta, ScalarType *C, const OrdinalType &ldc) const
General matrix-matrix multiply.
ScalarTraits< ScalarType >::magnitudeType NRM2(const OrdinalType &n, const ScalarType *x, const OrdinalType &incx) const
Compute the 2-norm of the vector x.
ScalarTraits< ScalarType >::magnitudeType ASUM(const OrdinalType &n, const ScalarType *x, const OrdinalType &incx) const
Sum the absolute values of the entries of x.
void COPY(const OrdinalType &n, const ScalarType *x, const OrdinalType &incx, ScalarType *y, const OrdinalType &incy) const
Copy the vector x to the vector y.
void ROT(const OrdinalType &n, ScalarType *dx, const OrdinalType &incx, ScalarType *dy, const OrdinalType &incy, MagnitudeType *c, ScalarType *s) const
Applies a Givens plane rotation.
void GER(const OrdinalType &m, const OrdinalType &n, const alpha_type alpha, const x_type *x, const OrdinalType &incx, const y_type *y, const OrdinalType &incy, ScalarType *A, const OrdinalType &lda) const
Performs the rank 1 operation: A <- alpha*x*y'+A.
void SCAL(const OrdinalType &n, const ScalarType &alpha, ScalarType *x, const OrdinalType &incx) const
Scale the vector x by the constant alpha.
void SWAP(const OrdinalType &n, ScalarType *const x, const OrdinalType &incx, ScalarType *const y, const OrdinalType &incy) const
Swap the entries of x and y.
void GEMV(ETransp trans, const OrdinalType &m, const OrdinalType &n, const alpha_type alpha, const A_type *A, const OrdinalType &lda, const x_type *x, const OrdinalType &incx, const beta_type beta, ScalarType *y, const OrdinalType &incy) const
Performs the matrix-vector operation: y <- alpha*A*x+beta*y or y <- alpha*A'*x+beta*y where A is a ge...
void TRMV(EUplo uplo, ETransp trans, EDiag diag, const OrdinalType &n, const A_type *A, const OrdinalType &lda, ScalarType *x, const OrdinalType &incx) const
Performs the matrix-vector operation: x <- A*x or x <- A'*x where A is a unit/non-unit n by n upper/l...
void TRMM(ESide side, EUplo uplo, ETransp transa, EDiag diag, const OrdinalType &m, const OrdinalType &n, const alpha_type alpha, const A_type *A, const OrdinalType &lda, ScalarType *B, const OrdinalType &ldb) const
Performs the matrix-matrix operation: B <- alpha*op(A)*B or B <- alpha*B*op(A) where op(A) is an unit...
Smart reference counting pointer class for automatic garbage collection.
The Teuchos namespace contains all of the classes, structs and enums used by Teuchos,...
static T squareroot(T x)
Returns a number of magnitudeType that is the square root of this scalar type x.