Teuchos - Trilinos Tools Package Version of the Day
Loading...
Searching...
No Matches
Teuchos_BLAS.hpp
Go to the documentation of this file.
1// @HEADER
2// *****************************************************************************
3// Teuchos: Common Tools Package
4//
5// Copyright 2004 NTESS and the Teuchos contributors.
6// SPDX-License-Identifier: BSD-3-Clause
7// *****************************************************************************
8// @HEADER
9
10// Kris
11// 06.16.03 -- Start over from scratch
12// 06.16.03 -- Initial templatization (Tpetra_BLAS.cpp is no longer needed)
13// 06.18.03 -- Changed xxxxx_() function calls to XXXXX_F77()
14// -- Added warning messages for generic calls
15// 07.08.03 -- Move into Teuchos package/namespace
16// 07.24.03 -- The first iteration of BLAS generics is nearing completion. Caveats:
17// * TRSM isn't finished yet; it works for one case at the moment (left side, upper tri., no transpose, no unit diag.)
18// * Many of the generic implementations are quite inefficient, ugly, or both. I wrote these to be easy to debug, not for efficiency or legibility. The next iteration will improve both of these aspects as much as possible.
19// * Very little verification of input parameters is done, save for the character-type arguments (TRANS, etc.) which is quite robust.
20// * All of the routines that make use of both an incx and incy parameter (which includes much of the L1 BLAS) are set up to work iff incx == incy && incx > 0. Allowing for differing/negative values of incx/incy should be relatively trivial.
21// * All of the L2/L3 routines assume that the entire matrix is being used (that is, if A is mxn, lda = m); they don't work on submatrices yet. This *should* be a reasonably trivial thing to fix, as well.
22// -- Removed warning messages for generic calls
23// 08.08.03 -- TRSM now works for all cases where SIDE == L and DIAG == N. DIAG == U is implemented but does not work correctly; SIDE == R is not yet implemented.
24// 08.14.03 -- TRSM now works for all cases and accepts (and uses) leading-dimension information.
25// 09.26.03 -- character input replaced with enumerated input to cause compiling errors and not run-time errors ( suggested by RAB ).
26
27#ifndef _TEUCHOS_BLAS_HPP_
28#define _TEUCHOS_BLAS_HPP_
29
41#include "Teuchos_Assert.hpp"
42
75namespace Teuchos
76{
77 extern TEUCHOSNUMERICS_LIB_DLL_EXPORT const char ESideChar[];
78 extern TEUCHOSNUMERICS_LIB_DLL_EXPORT const char ETranspChar[];
79 extern TEUCHOSNUMERICS_LIB_DLL_EXPORT const char EUploChar[];
80 extern TEUCHOSNUMERICS_LIB_DLL_EXPORT const char EDiagChar[];
81 extern TEUCHOSNUMERICS_LIB_DLL_EXPORT const char ETypeChar[];
82
83 // Forward declaration
84 namespace details {
85 template<typename ScalarType,
87 class GivensRotator;
88 }
89
91
96 template<typename OrdinalType, typename ScalarType>
98 {
99
100 typedef typename Teuchos::ScalarTraits<ScalarType>::magnitudeType MagnitudeType;
101
102 public:
104
105
107 inline DefaultBLASImpl(void) {}
108
111
113 inline virtual ~DefaultBLASImpl(void) {}
115
117
118
120
122 typedef typename details::GivensRotator<ScalarType>::c_type rotg_c_type;
123
126
128 void ROT(const OrdinalType& n, ScalarType* dx, const OrdinalType& incx, ScalarType* dy, const OrdinalType& incy, MagnitudeType* c, ScalarType* s) const;
129
131 void SCAL(const OrdinalType& n, const ScalarType& alpha, ScalarType* x, const OrdinalType& incx) const;
132
134 void COPY(const OrdinalType& n, const ScalarType* x, const OrdinalType& incx, ScalarType* y, const OrdinalType& incy) const;
135
137 template <typename alpha_type, typename x_type>
138 void AXPY(const OrdinalType& n, const alpha_type alpha, const x_type* x, const OrdinalType& incx, ScalarType* y, const OrdinalType& incy) const;
139
141 typename ScalarTraits<ScalarType>::magnitudeType ASUM(const OrdinalType& n, const ScalarType* x, const OrdinalType& incx) const;
142
144 template <typename x_type, typename y_type>
145 ScalarType DOT(const OrdinalType& n, const x_type* x, const OrdinalType& incx, const y_type* y, const OrdinalType& incy) const;
146
148 typename ScalarTraits<ScalarType>::magnitudeType NRM2(const OrdinalType& n, const ScalarType* x, const OrdinalType& incx) const;
149
151 OrdinalType IAMAX(const OrdinalType& n, const ScalarType* x, const OrdinalType& incx) const;
153
155
156
158 template <typename alpha_type, typename A_type, typename x_type, typename beta_type>
159 void GEMV(ETransp trans, const OrdinalType& m, const OrdinalType& n, const alpha_type alpha, const A_type* A,
160 const OrdinalType& lda, const x_type* x, const OrdinalType& incx, const beta_type beta, ScalarType* y, const OrdinalType& incy) const;
161
163 template <typename A_type>
164 void TRMV(EUplo uplo, ETransp trans, EDiag diag, const OrdinalType& n, const A_type* A,
165 const OrdinalType& lda, ScalarType* x, const OrdinalType& incx) const;
166
169 template <typename alpha_type, typename x_type, typename y_type>
170 void GER(const OrdinalType& m, const OrdinalType& n, const alpha_type alpha, const x_type* x, const OrdinalType& incx,
171 const y_type* y, const OrdinalType& incy, ScalarType* A, const OrdinalType& lda) const;
173
175
176
183 template <typename alpha_type, typename A_type, typename B_type, typename beta_type>
184 void GEMM(ETransp transa, ETransp transb, const OrdinalType& m, const OrdinalType& n, const OrdinalType& k, const alpha_type alpha, const A_type* A, const OrdinalType& lda, const B_type* B, const OrdinalType& ldb, const beta_type beta, ScalarType* C, const OrdinalType& ldc) const;
185
187 void
188 SWAP (const OrdinalType& n, ScalarType* const x, const OrdinalType& incx,
189 ScalarType* const y, const OrdinalType& incy) const;
190
192 template <typename alpha_type, typename A_type, typename B_type, typename beta_type>
193 void SYMM(ESide side, EUplo uplo, const OrdinalType& m, const OrdinalType& n, const alpha_type alpha, const A_type* A, const OrdinalType& lda, const B_type* B, const OrdinalType& ldb, const beta_type beta, ScalarType* C, const OrdinalType& ldc) const;
194
196 template <typename alpha_type, typename A_type, typename beta_type>
197 void SYRK(EUplo uplo, ETransp trans, const OrdinalType& n, const OrdinalType& k, const alpha_type alpha, const A_type* A, const OrdinalType& lda, const beta_type beta, ScalarType* C, const OrdinalType& ldc) const;
198
200 template <typename alpha_type, typename A_type>
202 const alpha_type alpha, const A_type* A, const OrdinalType& lda, ScalarType* B, const OrdinalType& ldb) const;
203
205 template <typename alpha_type, typename A_type>
207 const alpha_type alpha, const A_type* A, const OrdinalType& lda, ScalarType* B, const OrdinalType& ldb) const;
209 };
210
211 template<typename OrdinalType, typename ScalarType>
212 class TEUCHOSNUMERICS_LIB_DLL_EXPORT BLAS : public DefaultBLASImpl<OrdinalType,ScalarType>
213 {
214
216
217 public:
219
220
222 inline BLAS(void) {}
223
225 inline BLAS(const BLAS<OrdinalType, ScalarType>& /*BLAS_source*/) {}
226
228 inline virtual ~BLAS(void) {}
230 };
231
232//------------------------------------------------------------------------------------------
233// LEVEL 1 BLAS ROUTINES
234//------------------------------------------------------------------------------------------
235
243 namespace details {
244
245 // Compute magnitude.
246 template<typename ScalarType, bool isComplex>
247 class MagValue {
248 public:
249 void
250 blas_dabs1(const ScalarType* a, typename ScalarTraits<ScalarType>::magnitudeType* ret) const;
251 };
252
253 // Complex-arithmetic specialization.
254 template<typename ScalarType>
255 class MagValue<ScalarType, true> {
256 public:
257 void
258 blas_dabs1(const ScalarType* a, typename ScalarTraits<ScalarType>::magnitudeType* ret) const;
259 };
260
261 // Real-arithmetic specialization.
262 template<typename ScalarType>
263 class MagValue<ScalarType, false> {
264 public:
265 void
266 blas_dabs1(const ScalarType* a, ScalarType* ret) const;
267 };
268
269 template<typename ScalarType, bool isComplex>
270 class GivensRotator {};
271
272 // Complex-arithmetic specialization.
273 template<typename ScalarType>
274 class GivensRotator<ScalarType, true> {
275 public:
276 typedef typename ScalarTraits<ScalarType>::magnitudeType c_type;
277 void
278 ROTG (ScalarType* ca,
279 ScalarType* cb,
280 typename ScalarTraits<ScalarType>::magnitudeType* c,
281 ScalarType* s) const;
282 };
283
284 // Real-arithmetic specialization.
285 template<typename ScalarType>
286 class GivensRotator<ScalarType, false> {
287 public:
288 typedef ScalarType c_type;
289 void
290 ROTG (ScalarType* da,
291 ScalarType* db,
292 ScalarType* c,
293 ScalarType* s) const;
294
307 ScalarType SIGN (const ScalarType& x, const ScalarType& y) const {
308 typedef ScalarTraits<ScalarType> STS;
309
310 if (y > STS::zero()) {
311 return STS::magnitude (x);
312 } else if (y < STS::zero()) {
313 return -STS::magnitude (x);
314 } else { // y == STS::zero()
315 // Suppose that ScalarType& implements signed zero, as IEEE
316 // 754 - compliant floating-point numbers should. You can't
317 // use == to test for signed zero, since +0 == -0. However,
318 // 1/0 = Inf > 0 and 1/-0 = -Inf < 0. Let's hope ScalarType
319 // supports Inf... we don't need to test for Inf, just see
320 // if it's greater than or less than zero.
321 //
322 // NOTE: This ONLY works if ScalarType is real. Complex
323 // infinity doesn't have a sign, so we can't compare it with
324 // zero. That's OK, because finite complex numbers don't
325 // have a sign either; they have an angle.
326 ScalarType signedInfinity = STS::one() / y;
327 if (signedInfinity > STS::zero()) {
328 return STS::magnitude (x);
329 } else {
330 // Even if ScalarType doesn't implement signed zero,
331 // Fortran's SIGN intrinsic returns -ABS(X) if the second
332 // argument Y is zero. We imitate this behavior here.
333 return -STS::magnitude (x);
334 }
335 }
336 }
337 };
338
339 // Implementation of complex-arithmetic specialization.
340 template<typename ScalarType>
341 void
342 GivensRotator<ScalarType, true>::
343 ROTG (ScalarType* ca,
344 ScalarType* cb,
345 typename ScalarTraits<ScalarType>::magnitudeType* c,
346 ScalarType* s) const
347 {
348 typedef ScalarTraits<ScalarType> STS;
349 typedef typename STS::magnitudeType MagnitudeType;
350 typedef ScalarTraits<MagnitudeType> STM;
351
352 // This is a straightforward translation into C++ of the
353 // reference BLAS' implementation of ZROTG. You can get
354 // the Fortran 77 source code of ZROTG here:
355 //
356 // http://www.netlib.org/blas/zrotg.f
357 //
358 // I used the following rules to translate Fortran types and
359 // intrinsic functions into C++:
360 //
361 // DOUBLE PRECISION -> MagnitudeType
362 // DOUBLE COMPLEX -> ScalarType
363 // CDABS -> STS::magnitude
364 // DCMPLX -> ScalarType constructor (assuming that ScalarType
365 // is std::complex<MagnitudeType>)
366 // DCONJG -> STS::conjugate
367 // DSQRT -> STM::squareroot
368 ScalarType alpha;
369 MagnitudeType norm, scale;
370
371 if (STS::magnitude (*ca) == STM::zero()) {
372 *c = STM::zero();
373 *s = STS::one();
374 *ca = *cb;
375 } else {
376 scale = STS::magnitude (*ca) + STS::magnitude (*cb);
377 { // I introduced temporaries into the translated BLAS code in
378 // order to make the expression easier to read and also save a
379 // few floating-point operations.
380 const MagnitudeType ca_scaled =
381 STS::magnitude (*ca / ScalarType(scale, STM::zero()));
382 const MagnitudeType cb_scaled =
383 STS::magnitude (*cb / ScalarType(scale, STM::zero()));
384 norm = scale *
385 STM::squareroot (ca_scaled*ca_scaled + cb_scaled*cb_scaled);
386 }
387 alpha = *ca / STS::magnitude (*ca);
388 *c = STS::magnitude (*ca) / norm;
389 *s = alpha * STS::conjugate (*cb) / norm;
390 *ca = alpha * norm;
391 }
392 }
393
394 // Implementation of real-arithmetic specialization.
395 template<typename ScalarType>
396 void
397 GivensRotator<ScalarType, false>::
398 ROTG (ScalarType* da,
399 ScalarType* db,
400 ScalarType* c,
401 ScalarType* s) const
402 {
403 typedef ScalarTraits<ScalarType> STS;
404
405 // This is a straightforward translation into C++ of the
406 // reference BLAS' implementation of DROTG. You can get
407 // the Fortran 77 source code of DROTG here:
408 //
409 // http://www.netlib.org/blas/drotg.f
410 //
411 // I used the following rules to translate Fortran types and
412 // intrinsic functions into C++:
413 //
414 // DOUBLE PRECISION -> ScalarType
415 // DABS -> STS::magnitude
416 // DSQRT -> STM::squareroot
417 // DSIGN -> SIGN (see below)
418 //
419 // DSIGN(x,y) (the old DOUBLE PRECISION type-specific form of
420 // the Fortran type-generic SIGN intrinsic) required special
421 // translation, which we did in a separate utility function in
422 // the specializaton of GivensRotator for real arithmetic.
423 // (ROTG for complex arithmetic doesn't require this function.)
424 // C99 provides a copysign() math library function, but we are
425 // not able to rely on the existence of C99 functions here.
426 ScalarType r, roe, scale, z;
427
428 roe = *db;
429 if (STS::magnitude (*da) > STS::magnitude (*db)) {
430 roe = *da;
431 }
432 scale = STS::magnitude (*da) + STS::magnitude (*db);
433 if (scale == STS::zero()) {
434 *c = STS::one();
435 *s = STS::zero();
436 r = STS::zero();
437 z = STS::zero();
438 } else {
439 // I introduced temporaries into the translated BLAS code in
440 // order to make the expression easier to read and also save
441 // a few floating-point& operations.
442 const ScalarType da_scaled = *da / scale;
443 const ScalarType db_scaled = *db / scale;
444 r = scale * STS::squareroot (da_scaled*da_scaled + db_scaled*db_scaled);
445 r = SIGN (STS::one(), roe) * r;
446 *c = *da / r;
447 *s = *db / r;
448 z = STS::one();
449 if (STS::magnitude (*da) > STS::magnitude (*db)) {
450 z = *s;
451 }
452 if (STS::magnitude (*db) >= STS::magnitude (*da) && *c != STS::zero()) {
453 z = STS::one() / *c;
454 }
455 }
456
457 *da = r;
458 *db = z;
459 }
460
461 // Real-valued implementation of MagValue
462 template<typename ScalarType>
463 void
464 MagValue<ScalarType, false>::
465 blas_dabs1(const ScalarType* a, ScalarType* ret) const
466 {
468 }
469
470 // Complex-valued implementation of MagValue
471 template<typename ScalarType>
472 void
473 MagValue<ScalarType, true>::
474 blas_dabs1(const ScalarType* a, typename ScalarTraits<ScalarType>::magnitudeType* ret) const
475 {
476 *ret = ScalarTraits<typename ScalarTraits<ScalarType>::magnitudeType>::magnitude(a->real());
477 *ret += ScalarTraits<typename ScalarTraits<ScalarType>::magnitudeType>::magnitude(a->imag());
478 }
479
480 } // namespace details
481
482 template<typename OrdinalType, typename ScalarType>
483 void
486 ScalarType* db,
487 rotg_c_type* c,
488 ScalarType* s) const
489 {
490 details::GivensRotator<ScalarType> rotator;
491 rotator.ROTG (da, db, c, s);
492 }
493
494 template<typename OrdinalType, typename ScalarType>
496 {
499 if (n <= 0) return;
500 if (incx==1 && incy==1) {
501 for(OrdinalType i=0; i<n; ++i) {
502 ScalarType temp = *c*dx[i] + *s*dy[i];
503 dy[i] = *c*dy[i] - sconj*dx[i];
504 dx[i] = temp;
505 }
506 }
507 else {
508 OrdinalType ix = 0, iy = 0;
509 if (incx < izero) ix = (-n+1)*incx;
510 if (incy < izero) iy = (-n+1)*incy;
511 for(OrdinalType i=0; i<n; ++i) {
512 ScalarType temp = *c*dx[ix] + *s*dy[iy];
513 dy[iy] = *c*dy[iy] - sconj*dx[ix];
514 dx[ix] = temp;
515 ix += incx;
516 iy += incy;
517 }
518 }
519 }
520
521 template<typename OrdinalType, typename ScalarType>
523 {
527
528 if ( n < ione || incx < ione )
529 return;
530
531 // Scale the vector.
532 for(i = izero; i < n; i++)
533 {
534 x[ix] *= alpha;
535 ix += incx;
536 }
537 } /* end SCAL */
538
539 template<typename OrdinalType, typename ScalarType>
541 {
545 if ( n > izero ) {
546 // Set the initial indices (ix, iy).
547 if (incx < izero) { ix = (-n+ione)*incx; }
548 if (incy < izero) { iy = (-n+ione)*incy; }
549
550 for(i = izero; i < n; i++)
551 {
552 y[iy] = x[ix];
553 ix += incx;
554 iy += incy;
555 }
556 }
557 } /* end COPY */
558
559 template<typename OrdinalType, typename ScalarType>
560 template <typename alpha_type, typename x_type>
562 {
567 {
568 // Set the initial indices (ix, iy).
569 if (incx < izero) { ix = (-n+ione)*incx; }
570 if (incy < izero) { iy = (-n+ione)*incy; }
571
572 for(i = izero; i < n; i++)
573 {
574 y[iy] += alpha * x[ix];
575 ix += incx;
576 iy += incy;
577 }
578 }
579 } /* end AXPY */
580
581 template<typename OrdinalType, typename ScalarType>
583 {
589
590 if ( n < ione || incx < ione )
591 return result;
592
593 details::MagValue<ScalarType, ScalarTraits<ScalarType>::isComplex> mval;
594 for (i = izero; i < n; i++)
595 {
596 mval.blas_dabs1( &x[ix], &temp );
597 result += temp;
598 ix += incx;
599 }
600
601 return result;
602 } /* end ASUM */
603
604 template<typename OrdinalType, typename ScalarType>
605 template <typename x_type, typename y_type>
607 {
612 if( n > izero )
613 {
614 // Set the initial indices (ix,iy).
615 if (incx < izero) { ix = (-n+ione)*incx; }
616 if (incy < izero) { iy = (-n+ione)*incy; }
617
618 for(i = izero; i < n; i++)
619 {
621 ix += incx;
622 iy += incy;
623 }
624 }
625 return result;
626 } /* end DOT */
627
628 template<typename OrdinalType, typename ScalarType>
648
649 template<typename OrdinalType, typename ScalarType>
651 {
659
660 if ( n < ione || incx < ione )
661 return result;
662
663 details::MagValue<ScalarType, ScalarTraits<ScalarType>::isComplex> mval;
664
665 mval.blas_dabs1( &x[ix], &maxval );
666 ix += incx;
667 for(i = ione; i < n; i++)
668 {
669 mval.blas_dabs1( &x[ix], &curval );
670 if(curval > maxval)
671 {
672 result = i;
673 maxval = curval;
674 }
675 ix += incx;
676 }
677
678 return result + 1; // the BLAS I?AMAX functions return 1-indexed (Fortran-style) values
679 } /* end IAMAX */
680
681//------------------------------------------------------------------------------------------
682// LEVEL 2 BLAS ROUTINES
683//------------------------------------------------------------------------------------------
684 template<typename OrdinalType, typename ScalarType>
685 template <typename alpha_type, typename A_type, typename x_type, typename beta_type>
687 {
695 bool noConj = true;
696 bool BadArgument = false;
697
698 // Quick return if there is nothing to do!
699 if( m == izero || n == izero || ( alpha == alpha_zero && beta == beta_one ) ){ return; }
700
701 // Otherwise, we need to check the argument list.
702 if( m < izero ) {
703 std::cout << "BLAS::GEMV Error: M == " << m << std::endl;
704 BadArgument = true;
705 }
706 if( n < izero ) {
707 std::cout << "BLAS::GEMV Error: N == " << n << std::endl;
708 BadArgument = true;
709 }
710 if( lda < m ) {
711 std::cout << "BLAS::GEMV Error: LDA < MAX(1,M)"<< std::endl;
712 BadArgument = true;
713 }
714 if( incx == izero ) {
715 std::cout << "BLAS::GEMV Error: INCX == 0"<< std::endl;
716 BadArgument = true;
717 }
718 if( incy == izero ) {
719 std::cout << "BLAS::GEMV Error: INCY == 0"<< std::endl;
720 BadArgument = true;
721 }
722
723 if(!BadArgument) {
724 OrdinalType i, j, lenx, leny, ix, iy, jx, jy;
727
728 // Determine the lengths of the vectors x and y.
729 if(ETranspChar[trans] == 'N') {
730 lenx = n;
731 leny = m;
732 } else {
733 lenx = m;
734 leny = n;
735 }
736
737 // Determine if this is a conjugate tranpose
738 noConj = (ETranspChar[trans] == 'T');
739
740 // Set the starting pointers for the vectors x and y if incx/y < 0.
741 if (incx < izero ) { kx = (ione - lenx)*incx; }
742 if (incy < izero ) { ky = (ione - leny)*incy; }
743
744 // Form y = beta*y
745 ix = kx; iy = ky;
746 if(beta != beta_one) {
747 if (incy == ione) {
748 if (beta == beta_zero) {
749 for(i = izero; i < leny; i++) { y[i] = y_zero; }
750 } else {
751 for(i = izero; i < leny; i++) { y[i] *= beta; }
752 }
753 } else {
754 if (beta == beta_zero) {
755 for(i = izero; i < leny; i++) {
756 y[iy] = y_zero;
757 iy += incy;
758 }
759 } else {
760 for(i = izero; i < leny; i++) {
761 y[iy] *= beta;
762 iy += incy;
763 }
764 }
765 }
766 }
767
768 // Return if we don't have to do anything more.
769 if(alpha == alpha_zero) { return; }
770
771 if( ETranspChar[trans] == 'N' ) {
772 // Form y = alpha*A*y
773 jx = kx;
774 if (incy == ione) {
775 for(j = izero; j < n; j++) {
776 if (x[jx] != x_zero) {
777 temp = alpha*x[jx];
778 for(i = izero; i < m; i++) {
779 y[i] += temp*A[j*lda + i];
780 }
781 }
782 jx += incx;
783 }
784 } else {
785 for(j = izero; j < n; j++) {
786 if (x[jx] != x_zero) {
787 temp = alpha*x[jx];
788 iy = ky;
789 for(i = izero; i < m; i++) {
790 y[iy] += temp*A[j*lda + i];
791 iy += incy;
792 }
793 }
794 jx += incx;
795 }
796 }
797 } else {
798 jy = ky;
799 if (incx == ione) {
800 for(j = izero; j < n; j++) {
801 temp = y_zero;
802 if ( noConj ) {
803 for(i = izero; i < m; i++) {
804 temp += A[j*lda + i]*x[i];
805 }
806 } else {
807 for(i = izero; i < m; i++) {
809 }
810 }
811 y[jy] += alpha*temp;
812 jy += incy;
813 }
814 } else {
815 for(j = izero; j < n; j++) {
816 temp = y_zero;
817 ix = kx;
818 if ( noConj ) {
819 for (i = izero; i < m; i++) {
820 temp += A[j*lda + i]*x[ix];
821 ix += incx;
822 }
823 } else {
824 for (i = izero; i < m; i++) {
826 ix += incx;
827 }
828 }
829 y[jy] += alpha*temp;
830 jy += incy;
831 }
832 }
833 }
834 } /* if (!BadArgument) */
835 } /* end GEMV */
836
837 template<typename OrdinalType, typename ScalarType>
838 template <typename A_type>
840 {
844 bool BadArgument = false;
845 bool noConj = true;
846
847 // Quick return if there is nothing to do!
848 if( n == izero ){ return; }
849
850 // Otherwise, we need to check the argument list.
851 if( n < izero ) {
852 std::cout << "BLAS::TRMV Error: N == " << n << std::endl;
853 BadArgument = true;
854 }
855 if( lda < n ) {
856 std::cout << "BLAS::TRMV Error: LDA < MAX(1,N)"<< std::endl;
857 BadArgument = true;
858 }
859 if( incx == izero ) {
860 std::cout << "BLAS::TRMV Error: INCX == 0"<< std::endl;
861 BadArgument = true;
862 }
863
864 if(!BadArgument) {
865 OrdinalType i, j, ix, jx, kx = izero;
867 bool noUnit = (EDiagChar[diag] == 'N');
868
869 // Determine if this is a conjugate tranpose
870 noConj = (ETranspChar[trans] == 'T');
871
872 // Set the starting pointer for the vector x if incx < 0.
873 if (incx < izero) { kx = (-n+ione)*incx; }
874
875 // Start the operations for a nontransposed triangular matrix
876 if (ETranspChar[trans] == 'N') {
877 /* Compute x = A*x */
878 if (EUploChar[uplo] == 'U') {
879 /* A is an upper triangular matrix */
880 if (incx == ione) {
881 for (j=izero; j<n; j++) {
882 if (x[j] != zero) {
883 temp = x[j];
884 for (i=izero; i<j; i++) {
885 x[i] += temp*A[j*lda + i];
886 }
887 if ( noUnit )
888 x[j] *= A[j*lda + j];
889 }
890 }
891 } else {
892 jx = kx;
893 for (j=izero; j<n; j++) {
894 if (x[jx] != zero) {
895 temp = x[jx];
896 ix = kx;
897 for (i=izero; i<j; i++) {
898 x[ix] += temp*A[j*lda + i];
899 ix += incx;
900 }
901 if ( noUnit )
902 x[jx] *= A[j*lda + j];
903 }
904 jx += incx;
905 }
906 } /* if (incx == ione) */
907 } else { /* A is a lower triangular matrix */
908 if (incx == ione) {
909 for (j=n-ione; j>-ione; j--) {
910 if (x[j] != zero) {
911 temp = x[j];
912 for (i=n-ione; i>j; i--) {
913 x[i] += temp*A[j*lda + i];
914 }
915 if ( noUnit )
916 x[j] *= A[j*lda + j];
917 }
918 }
919 } else {
920 kx += (n-ione)*incx;
921 jx = kx;
922 for (j=n-ione; j>-ione; j--) {
923 if (x[jx] != zero) {
924 temp = x[jx];
925 ix = kx;
926 for (i=n-ione; i>j; i--) {
927 x[ix] += temp*A[j*lda + i];
928 ix -= incx;
929 }
930 if ( noUnit )
931 x[jx] *= A[j*lda + j];
932 }
933 jx -= incx;
934 }
935 }
936 } /* if (EUploChar[uplo]=='U') */
937 } else { /* A is transposed/conjugated */
938 /* Compute x = A'*x */
939 if (EUploChar[uplo]=='U') {
940 /* A is an upper triangular matrix */
941 if (incx == ione) {
942 for (j=n-ione; j>-ione; j--) {
943 temp = x[j];
944 if ( noConj ) {
945 if ( noUnit )
946 temp *= A[j*lda + j];
947 for (i=j-ione; i>-ione; i--) {
948 temp += A[j*lda + i]*x[i];
949 }
950 } else {
951 if ( noUnit )
953 for (i=j-ione; i>-ione; i--) {
955 }
956 }
957 x[j] = temp;
958 }
959 } else {
960 jx = kx + (n-ione)*incx;
961 for (j=n-ione; j>-ione; j--) {
962 temp = x[jx];
963 ix = jx;
964 if ( noConj ) {
965 if ( noUnit )
966 temp *= A[j*lda + j];
967 for (i=j-ione; i>-ione; i--) {
968 ix -= incx;
969 temp += A[j*lda + i]*x[ix];
970 }
971 } else {
972 if ( noUnit )
974 for (i=j-ione; i>-ione; i--) {
975 ix -= incx;
977 }
978 }
979 x[jx] = temp;
980 jx -= incx;
981 }
982 }
983 } else {
984 /* A is a lower triangular matrix */
985 if (incx == ione) {
986 for (j=izero; j<n; j++) {
987 temp = x[j];
988 if ( noConj ) {
989 if ( noUnit )
990 temp *= A[j*lda + j];
991 for (i=j+ione; i<n; i++) {
992 temp += A[j*lda + i]*x[i];
993 }
994 } else {
995 if ( noUnit )
997 for (i=j+ione; i<n; i++) {
999 }
1000 }
1001 x[j] = temp;
1002 }
1003 } else {
1004 jx = kx;
1005 for (j=izero; j<n; j++) {
1006 temp = x[jx];
1007 ix = jx;
1008 if ( noConj ) {
1009 if ( noUnit )
1010 temp *= A[j*lda + j];
1011 for (i=j+ione; i<n; i++) {
1012 ix += incx;
1013 temp += A[j*lda + i]*x[ix];
1014 }
1015 } else {
1016 if ( noUnit )
1018 for (i=j+ione; i<n; i++) {
1019 ix += incx;
1021 }
1022 }
1023 x[jx] = temp;
1024 jx += incx;
1025 }
1026 }
1027 } /* if (EUploChar[uplo]=='U') */
1028 } /* if (ETranspChar[trans]=='N') */
1029 } /* if (!BadArgument) */
1030 } /* end TRMV */
1031
1032 template<typename OrdinalType, typename ScalarType>
1033 template <typename alpha_type, typename x_type, typename y_type>
1035 {
1040 bool BadArgument = false;
1041
1042 // Quick return if there is nothing to do!
1043 if( m == izero || n == izero || alpha == alpha_zero ){ return; }
1044
1045 // Otherwise, we need to check the argument list.
1046 if( m < izero ) {
1047 std::cout << "BLAS::GER Error: M == " << m << std::endl;
1048 BadArgument = true;
1049 }
1050 if( n < izero ) {
1051 std::cout << "BLAS::GER Error: N == " << n << std::endl;
1052 BadArgument = true;
1053 }
1054 if( lda < m ) {
1055 std::cout << "BLAS::GER Error: LDA < MAX(1,M)"<< std::endl;
1056 BadArgument = true;
1057 }
1058 if( incx == 0 ) {
1059 std::cout << "BLAS::GER Error: INCX == 0"<< std::endl;
1060 BadArgument = true;
1061 }
1062 if( incy == 0 ) {
1063 std::cout << "BLAS::GER Error: INCY == 0"<< std::endl;
1064 BadArgument = true;
1065 }
1066
1067 if(!BadArgument) {
1068 OrdinalType i, j, ix, jy = izero, kx = izero;
1070
1071 // Set the starting pointers for the vectors x and y if incx/y < 0.
1072 if (incx < izero) { kx = (-m+ione)*incx; }
1073 if (incy < izero) { jy = (-n+ione)*incy; }
1074
1075 // Start the operations for incx == 1
1076 if( incx == ione ) {
1077 for( j=izero; j<n; j++ ) {
1078 if ( y[jy] != y_zero ) {
1079 temp = alpha*y[jy];
1080 for ( i=izero; i<m; i++ ) {
1081 A[j*lda + i] += x[i]*temp;
1082 }
1083 }
1084 jy += incy;
1085 }
1086 }
1087 else { // Start the operations for incx != 1
1088 for( j=izero; j<n; j++ ) {
1089 if ( y[jy] != y_zero ) {
1090 temp = alpha*y[jy];
1091 ix = kx;
1092 for( i=izero; i<m; i++ ) {
1093 A[j*lda + i] += x[ix]*temp;
1094 ix += incx;
1095 }
1096 }
1097 jy += incy;
1098 }
1099 }
1100 } /* if(!BadArgument) */
1101 } /* end GER */
1102
1103//------------------------------------------------------------------------------------------
1104// LEVEL 3 BLAS ROUTINES
1105//------------------------------------------------------------------------------------------
1106
1107 template<typename OrdinalType, typename ScalarType>
1108 template <typename alpha_type, typename A_type, typename B_type, typename beta_type>
1110 {
1111
1118 OrdinalType i, j, p;
1119 OrdinalType NRowA = m, NRowB = k;
1121 bool BadArgument = false;
1122 bool conjA = false, conjB = false;
1123
1124 // Change dimensions of matrix if either matrix is transposed
1125 if( !(ETranspChar[transa]=='N') ) {
1126 NRowA = k;
1127 }
1128 if( !(ETranspChar[transb]=='N') ) {
1129 NRowB = n;
1130 }
1131
1132 // Quick return if there is nothing to do!
1133 if( (m==izero) || (n==izero) || (((alpha==alpha_zero)||(k==izero)) && (beta==beta_one)) ){ return; }
1134 if( m < izero ) {
1135 std::cout << "BLAS::GEMM Error: M == " << m << std::endl;
1136 BadArgument = true;
1137 }
1138 if( n < izero ) {
1139 std::cout << "BLAS::GEMM Error: N == " << n << std::endl;
1140 BadArgument = true;
1141 }
1142 if( k < izero ) {
1143 std::cout << "BLAS::GEMM Error: K == " << k << std::endl;
1144 BadArgument = true;
1145 }
1146 if( lda < NRowA ) {
1147 std::cout << "BLAS::GEMM Error: LDA < "<<NRowA<<std::endl;
1148 BadArgument = true;
1149 }
1150 if( ldb < NRowB ) {
1151 std::cout << "BLAS::GEMM Error: LDB < "<<NRowB<<std::endl;
1152 BadArgument = true;
1153 }
1154 if( ldc < m ) {
1155 std::cout << "BLAS::GEMM Error: LDC < MAX(1,M)"<< std::endl;
1156 BadArgument = true;
1157 }
1158
1159 if(!BadArgument) {
1160
1161 // Determine if this is a conjugate tranpose
1162 conjA = (ETranspChar[transa] == 'C');
1163 conjB = (ETranspChar[transb] == 'C');
1164
1165 // Only need to scale the resulting matrix C.
1166 if( alpha == alpha_zero ) {
1167 if( beta == beta_zero ) {
1168 for (j=izero; j<n; j++) {
1169 for (i=izero; i<m; i++) {
1170 C[j*ldc + i] = C_zero;
1171 }
1172 }
1173 } else {
1174 for (j=izero; j<n; j++) {
1175 for (i=izero; i<m; i++) {
1176 C[j*ldc + i] *= beta;
1177 }
1178 }
1179 }
1180 return;
1181 }
1182 //
1183 // Now start the operations.
1184 //
1185 if ( ETranspChar[transb]=='N' ) {
1186 if ( ETranspChar[transa]=='N' ) {
1187 // Compute C = alpha*A*B + beta*C
1188 for (j=izero; j<n; j++) {
1189 if( beta == beta_zero ) {
1190 for (i=izero; i<m; i++){
1191 C[j*ldc + i] = C_zero;
1192 }
1193 } else if( beta != beta_one ) {
1194 for (i=izero; i<m; i++){
1195 C[j*ldc + i] *= beta;
1196 }
1197 }
1198 for (p=izero; p<k; p++){
1199 if (B[j*ldb + p] != B_zero ){
1200 temp = alpha*B[j*ldb + p];
1201 for (i=izero; i<m; i++) {
1202 C[j*ldc + i] += temp*A[p*lda + i];
1203 }
1204 }
1205 }
1206 }
1207 } else if ( conjA ) {
1208 // Compute C = alpha*conj(A')*B + beta*C
1209 for (j=izero; j<n; j++) {
1210 for (i=izero; i<m; i++) {
1211 temp = C_zero;
1212 for (p=izero; p<k; p++) {
1214 }
1215 if (beta == beta_zero) {
1216 C[j*ldc + i] = alpha*temp;
1217 } else {
1218 C[j*ldc + i] = alpha*temp + beta*C[j*ldc + i];
1219 }
1220 }
1221 }
1222 } else {
1223 // Compute C = alpha*A'*B + beta*C
1224 for (j=izero; j<n; j++) {
1225 for (i=izero; i<m; i++) {
1226 temp = C_zero;
1227 for (p=izero; p<k; p++) {
1228 temp += A[i*lda + p]*B[j*ldb + p];
1229 }
1230 if (beta == beta_zero) {
1231 C[j*ldc + i] = alpha*temp;
1232 } else {
1233 C[j*ldc + i] = alpha*temp + beta*C[j*ldc + i];
1234 }
1235 }
1236 }
1237 }
1238 } else if ( ETranspChar[transa]=='N' ) {
1239 if ( conjB ) {
1240 // Compute C = alpha*A*conj(B') + beta*C
1241 for (j=izero; j<n; j++) {
1242 if (beta == beta_zero) {
1243 for (i=izero; i<m; i++) {
1244 C[j*ldc + i] = C_zero;
1245 }
1246 } else if ( beta != beta_one ) {
1247 for (i=izero; i<m; i++) {
1248 C[j*ldc + i] *= beta;
1249 }
1250 }
1251 for (p=izero; p<k; p++) {
1252 if (B[p*ldb + j] != B_zero) {
1254 for (i=izero; i<m; i++) {
1255 C[j*ldc + i] += temp*A[p*lda + i];
1256 }
1257 }
1258 }
1259 }
1260 } else {
1261 // Compute C = alpha*A*B' + beta*C
1262 for (j=izero; j<n; j++) {
1263 if (beta == beta_zero) {
1264 for (i=izero; i<m; i++) {
1265 C[j*ldc + i] = C_zero;
1266 }
1267 } else if ( beta != beta_one ) {
1268 for (i=izero; i<m; i++) {
1269 C[j*ldc + i] *= beta;
1270 }
1271 }
1272 for (p=izero; p<k; p++) {
1273 if (B[p*ldb + j] != B_zero) {
1274 temp = alpha*B[p*ldb + j];
1275 for (i=izero; i<m; i++) {
1276 C[j*ldc + i] += temp*A[p*lda + i];
1277 }
1278 }
1279 }
1280 }
1281 }
1282 } else if ( conjA ) {
1283 if ( conjB ) {
1284 // Compute C = alpha*conj(A')*conj(B') + beta*C
1285 for (j=izero; j<n; j++) {
1286 for (i=izero; i<m; i++) {
1287 temp = C_zero;
1288 for (p=izero; p<k; p++) {
1291 }
1292 if (beta == beta_zero) {
1293 C[j*ldc + i] = alpha*temp;
1294 } else {
1295 C[j*ldc + i] = alpha*temp + beta*C[j*ldc + i];
1296 }
1297 }
1298 }
1299 } else {
1300 // Compute C = alpha*conj(A')*B' + beta*C
1301 for (j=izero; j<n; j++) {
1302 for (i=izero; i<m; i++) {
1303 temp = C_zero;
1304 for (p=izero; p<k; p++) {
1306 * B[p*ldb + j];
1307 }
1308 if (beta == beta_zero) {
1309 C[j*ldc + i] = alpha*temp;
1310 } else {
1311 C[j*ldc + i] = alpha*temp + beta*C[j*ldc + i];
1312 }
1313 }
1314 }
1315 }
1316 } else {
1317 if ( conjB ) {
1318 // Compute C = alpha*A'*conj(B') + beta*C
1319 for (j=izero; j<n; j++) {
1320 for (i=izero; i<m; i++) {
1321 temp = C_zero;
1322 for (p=izero; p<k; p++) {
1323 temp += A[i*lda + p]
1325 }
1326 if (beta == beta_zero) {
1327 C[j*ldc + i] = alpha*temp;
1328 } else {
1329 C[j*ldc + i] = alpha*temp + beta*C[j*ldc + i];
1330 }
1331 }
1332 }
1333 } else {
1334 // Compute C = alpha*A'*B' + beta*C
1335 for (j=izero; j<n; j++) {
1336 for (i=izero; i<m; i++) {
1337 temp = C_zero;
1338 for (p=izero; p<k; p++) {
1339 temp += A[i*lda + p]*B[p*ldb + j];
1340 }
1341 if (beta == beta_zero) {
1342 C[j*ldc + i] = alpha*temp;
1343 } else {
1344 C[j*ldc + i] = alpha*temp + beta*C[j*ldc + i];
1345 }
1346 }
1347 }
1348 } // end if (ETranspChar[transa]=='N') ...
1349 } // end if (ETranspChar[transb]=='N') ...
1350 } // end if (!BadArgument) ...
1351 } // end of GEMM
1352
1353
1354 template<typename OrdinalType, typename ScalarType>
1356 SWAP (const OrdinalType& n, ScalarType* const x, const OrdinalType& incx,
1357 ScalarType* const y, const OrdinalType& incy) const
1358 {
1359 if (n <= 0) {
1360 return;
1361 }
1362
1363 if (incx == 1 && incy == 1) {
1364 for (int i = 0; i < n; ++i) {
1365 ScalarType tmp = x[i];
1366 x[i] = y[i];
1367 y[i] = tmp;
1368 }
1369 return;
1370 }
1371
1372 int ix = 1;
1373 int iy = 1;
1374 if (incx < 0) {
1375 ix = (1-n) * incx + 1;
1376 }
1377 if (incy < 0) {
1378 iy = (1-n) * incy + 1;
1379 }
1380
1381 for (int i = 1; i <= n; ++i) {
1382 ScalarType tmp = x[ix - 1];
1383 x[ix - 1] = y[iy - 1];
1384 y[iy - 1] = tmp;
1385 ix += incx;
1386 iy += incy;
1387 }
1388 }
1389
1390
1391 template<typename OrdinalType, typename ScalarType>
1392 template <typename alpha_type, typename A_type, typename B_type, typename beta_type>
1394 {
1401 OrdinalType i, j, k, NRowA = m;
1403 bool BadArgument = false;
1404 bool Upper = (EUploChar[uplo] == 'U');
1405 if (ESideChar[side] == 'R') { NRowA = n; }
1406
1407 // Quick return.
1408 if ( (m==izero) || (n==izero) || ( (alpha==alpha_zero)&&(beta==beta_one) ) ) { return; }
1409 if( m < izero ) {
1410 std::cout << "BLAS::SYMM Error: M == "<< m << std::endl;
1411 BadArgument = true; }
1412 if( n < izero ) {
1413 std::cout << "BLAS::SYMM Error: N == "<< n << std::endl;
1414 BadArgument = true; }
1415 if( lda < NRowA ) {
1416 std::cout << "BLAS::SYMM Error: LDA < "<<NRowA<<std::endl;
1417 BadArgument = true; }
1418 if( ldb < m ) {
1419 std::cout << "BLAS::SYMM Error: LDB < MAX(1,M)"<<std::endl;
1420 BadArgument = true; }
1421 if( ldc < m ) {
1422 std::cout << "BLAS::SYMM Error: LDC < MAX(1,M)"<<std::endl;
1423 BadArgument = true; }
1424
1425 if(!BadArgument) {
1426
1427 // Only need to scale C and return.
1428 if (alpha == alpha_zero) {
1429 if (beta == beta_zero ) {
1430 for (j=izero; j<n; j++) {
1431 for (i=izero; i<m; i++) {
1432 C[j*ldc + i] = C_zero;
1433 }
1434 }
1435 } else {
1436 for (j=izero; j<n; j++) {
1437 for (i=izero; i<m; i++) {
1438 C[j*ldc + i] *= beta;
1439 }
1440 }
1441 }
1442 return;
1443 }
1444
1445 if ( ESideChar[side] == 'L') {
1446 // Compute C = alpha*A*B + beta*C
1447
1448 if (Upper) {
1449 // The symmetric part of A is stored in the upper triangular part of the matrix.
1450 for (j=izero; j<n; j++) {
1451 for (i=izero; i<m; i++) {
1452 temp1 = alpha*B[j*ldb + i];
1453 temp2 = C_zero;
1454 for (k=izero; k<i; k++) {
1455 C[j*ldc + k] += temp1*A[i*lda + k];
1456 temp2 += B[j*ldb + k]*A[i*lda + k];
1457 }
1458 if (beta == beta_zero) {
1459 C[j*ldc + i] = temp1*A[i*lda + i] + alpha*temp2;
1460 } else {
1461 C[j*ldc + i] = beta*C[j*ldc + i] + temp1*A[i*lda + i] + alpha*temp2;
1462 }
1463 }
1464 }
1465 } else {
1466 // The symmetric part of A is stored in the lower triangular part of the matrix.
1467 for (j=izero; j<n; j++) {
1468 for (i=m-ione; i>-ione; i--) {
1469 temp1 = alpha*B[j*ldb + i];
1470 temp2 = C_zero;
1471 for (k=i+ione; k<m; k++) {
1472 C[j*ldc + k] += temp1*A[i*lda + k];
1473 temp2 += B[j*ldb + k]*A[i*lda + k];
1474 }
1475 if (beta == beta_zero) {
1476 C[j*ldc + i] = temp1*A[i*lda + i] + alpha*temp2;
1477 } else {
1478 C[j*ldc + i] = beta*C[j*ldc + i] + temp1*A[i*lda + i] + alpha*temp2;
1479 }
1480 }
1481 }
1482 }
1483 } else {
1484 // Compute C = alpha*B*A + beta*C.
1485 for (j=izero; j<n; j++) {
1486 temp1 = alpha*A[j*lda + j];
1487 if (beta == beta_zero) {
1488 for (i=izero; i<m; i++) {
1489 C[j*ldc + i] = temp1*B[j*ldb + i];
1490 }
1491 } else {
1492 for (i=izero; i<m; i++) {
1493 C[j*ldc + i] = beta*C[j*ldc + i] + temp1*B[j*ldb + i];
1494 }
1495 }
1496 for (k=izero; k<j; k++) {
1497 if (Upper) {
1498 temp1 = alpha*A[j*lda + k];
1499 } else {
1500 temp1 = alpha*A[k*lda + j];
1501 }
1502 for (i=izero; i<m; i++) {
1503 C[j*ldc + i] += temp1*B[k*ldb + i];
1504 }
1505 }
1506 for (k=j+ione; k<n; k++) {
1507 if (Upper) {
1508 temp1 = alpha*A[k*lda + j];
1509 } else {
1510 temp1 = alpha*A[j*lda + k];
1511 }
1512 for (i=izero; i<m; i++) {
1513 C[j*ldc + i] += temp1*B[k*ldb + i];
1514 }
1515 }
1516 }
1517 } // end if (ESideChar[side]=='L') ...
1518 } // end if(!BadArgument) ...
1519} // end SYMM
1520
1521 template<typename OrdinalType, typename ScalarType>
1522 template <typename alpha_type, typename A_type, typename beta_type>
1524 {
1527
1534 OrdinalType i, j, l, NRowA = n;
1535 bool BadArgument = false;
1536 bool Upper = (EUploChar[uplo] == 'U');
1537
1540 && (trans == CONJ_TRANS),
1541 std::logic_error,
1542 "Teuchos::BLAS<"<<OTNT::name()<<","<<STNT::name()<<">::SYRK()"
1543 " does not support CONJ_TRANS for complex data types."
1544 );
1545
1546 // Change dimensions of A matrix is transposed
1547 if( !(ETranspChar[trans]=='N') ) {
1548 NRowA = k;
1549 }
1550
1551 // Quick return.
1552 if ( n==izero ) { return; }
1553 if ( ( (alpha==alpha_zero) || (k==izero) ) && (beta==beta_one) ) { return; }
1554 if( n < izero ) {
1555 std::cout << "BLAS::SYRK Error: N == "<< n <<std::endl;
1556 BadArgument = true; }
1557 if( k < izero ) {
1558 std::cout << "BLAS::SYRK Error: K == "<< k <<std::endl;
1559 BadArgument = true; }
1560 if( lda < NRowA ) {
1561 std::cout << "BLAS::SYRK Error: LDA < "<<NRowA<<std::endl;
1562 BadArgument = true; }
1563 if( ldc < n ) {
1564 std::cout << "BLAS::SYRK Error: LDC < MAX(1,N)"<<std::endl;
1565 BadArgument = true; }
1566
1567 if(!BadArgument) {
1568
1569 // Scale C when alpha is zero
1570 if (alpha == alpha_zero) {
1571 if (Upper) {
1572 if (beta==beta_zero) {
1573 for (j=izero; j<n; j++) {
1574 for (i=izero; i<=j; i++) {
1575 C[j*ldc + i] = C_zero;
1576 }
1577 }
1578 }
1579 else {
1580 for (j=izero; j<n; j++) {
1581 for (i=izero; i<=j; i++) {
1582 C[j*ldc + i] *= beta;
1583 }
1584 }
1585 }
1586 }
1587 else {
1588 if (beta==beta_zero) {
1589 for (j=izero; j<n; j++) {
1590 for (i=j; i<n; i++) {
1591 C[j*ldc + i] = C_zero;
1592 }
1593 }
1594 }
1595 else {
1596 for (j=izero; j<n; j++) {
1597 for (i=j; i<n; i++) {
1598 C[j*ldc + i] *= beta;
1599 }
1600 }
1601 }
1602 }
1603 return;
1604 }
1605
1606 // Now we can start the computation
1607
1608 if ( ETranspChar[trans]=='N' ) {
1609
1610 // Form C <- alpha*A*A' + beta*C
1611 if (Upper) {
1612 for (j=izero; j<n; j++) {
1613 if (beta==beta_zero) {
1614 for (i=izero; i<=j; i++) {
1615 C[j*ldc + i] = C_zero;
1616 }
1617 }
1618 else if (beta!=beta_one) {
1619 for (i=izero; i<=j; i++) {
1620 C[j*ldc + i] *= beta;
1621 }
1622 }
1623 for (l=izero; l<k; l++) {
1624 if (A[l*lda + j] != A_zero) {
1625 temp = alpha*A[l*lda + j];
1626 for (i = izero; i <=j; i++) {
1627 C[j*ldc + i] += temp*A[l*lda + i];
1628 }
1629 }
1630 }
1631 }
1632 }
1633 else {
1634 for (j=izero; j<n; j++) {
1635 if (beta==beta_zero) {
1636 for (i=j; i<n; i++) {
1637 C[j*ldc + i] = C_zero;
1638 }
1639 }
1640 else if (beta!=beta_one) {
1641 for (i=j; i<n; i++) {
1642 C[j*ldc + i] *= beta;
1643 }
1644 }
1645 for (l=izero; l<k; l++) {
1646 if (A[l*lda + j] != A_zero) {
1647 temp = alpha*A[l*lda + j];
1648 for (i=j; i<n; i++) {
1649 C[j*ldc + i] += temp*A[l*lda + i];
1650 }
1651 }
1652 }
1653 }
1654 }
1655 }
1656 else {
1657
1658 // Form C <- alpha*A'*A + beta*C
1659 if (Upper) {
1660 for (j=izero; j<n; j++) {
1661 for (i=izero; i<=j; i++) {
1662 temp = A_zero;
1663 for (l=izero; l<k; l++) {
1664 temp += A[i*lda + l]*A[j*lda + l];
1665 }
1666 if (beta==beta_zero) {
1667 C[j*ldc + i] = alpha*temp;
1668 }
1669 else {
1670 C[j*ldc + i] = alpha*temp + beta*C[j*ldc + i];
1671 }
1672 }
1673 }
1674 }
1675 else {
1676 for (j=izero; j<n; j++) {
1677 for (i=j; i<n; i++) {
1678 temp = A_zero;
1679 for (l=izero; l<k; ++l) {
1680 temp += A[i*lda + l]*A[j*lda + l];
1681 }
1682 if (beta==beta_zero) {
1683 C[j*ldc + i] = alpha*temp;
1684 }
1685 else {
1686 C[j*ldc + i] = alpha*temp + beta*C[j*ldc + i];
1687 }
1688 }
1689 }
1690 }
1691 }
1692 } /* if (!BadArgument) */
1693 } /* END SYRK */
1694
1695 template<typename OrdinalType, typename ScalarType>
1696 template <typename alpha_type, typename A_type>
1698 {
1705 OrdinalType i, j, k, NRowA = m;
1707 bool BadArgument = false;
1708 bool LSide = (ESideChar[side] == 'L');
1709 bool noUnit = (EDiagChar[diag] == 'N');
1710 bool Upper = (EUploChar[uplo] == 'U');
1711 bool noConj = (ETranspChar[transa] == 'T');
1712
1713 if(!LSide) { NRowA = n; }
1714
1715 // Quick return.
1716 if (n==izero || m==izero) { return; }
1717 if( m < izero ) {
1718 std::cout << "BLAS::TRMM Error: M == "<< m <<std::endl;
1719 BadArgument = true; }
1720 if( n < izero ) {
1721 std::cout << "BLAS::TRMM Error: N == "<< n <<std::endl;
1722 BadArgument = true; }
1723 if( lda < NRowA ) {
1724 std::cout << "BLAS::TRMM Error: LDA < "<<NRowA<<std::endl;
1725 BadArgument = true; }
1726 if( ldb < m ) {
1727 std::cout << "BLAS::TRMM Error: LDB < MAX(1,M)"<<std::endl;
1728 BadArgument = true; }
1729
1730 if(!BadArgument) {
1731
1732 // B only needs to be zeroed out.
1733 if( alpha == alpha_zero ) {
1734 for( j=izero; j<n; j++ ) {
1735 for( i=izero; i<m; i++ ) {
1736 B[j*ldb + i] = B_zero;
1737 }
1738 }
1739 return;
1740 }
1741
1742 // Start the computations.
1743 if ( LSide ) {
1744 // A is on the left side of B.
1745
1746 if ( ETranspChar[transa]=='N' ) {
1747 // Compute B = alpha*A*B
1748
1749 if ( Upper ) {
1750 // A is upper triangular
1751 for( j=izero; j<n; j++ ) {
1752 for( k=izero; k<m; k++) {
1753 if ( B[j*ldb + k] != B_zero ) {
1754 temp = alpha*B[j*ldb + k];
1755 for( i=izero; i<k; i++ ) {
1756 B[j*ldb + i] += temp*A[k*lda + i];
1757 }
1758 if ( noUnit )
1759 temp *=A[k*lda + k];
1760 B[j*ldb + k] = temp;
1761 }
1762 }
1763 }
1764 } else {
1765 // A is lower triangular
1766 for( j=izero; j<n; j++ ) {
1767 for( k=m-ione; k>-ione; k-- ) {
1768 if( B[j*ldb + k] != B_zero ) {
1769 temp = alpha*B[j*ldb + k];
1770 B[j*ldb + k] = temp;
1771 if ( noUnit )
1772 B[j*ldb + k] *= A[k*lda + k];
1773 for( i=k+ione; i<m; i++ ) {
1774 B[j*ldb + i] += temp*A[k*lda + i];
1775 }
1776 }
1777 }
1778 }
1779 }
1780 } else {
1781 // Compute B = alpha*A'*B or B = alpha*conj(A')*B
1782 if( Upper ) {
1783 for( j=izero; j<n; j++ ) {
1784 for( i=m-ione; i>-ione; i-- ) {
1785 temp = B[j*ldb + i];
1786 if ( noConj ) {
1787 if( noUnit )
1788 temp *= A[i*lda + i];
1789 for( k=izero; k<i; k++ ) {
1790 temp += A[i*lda + k]*B[j*ldb + k];
1791 }
1792 } else {
1793 if( noUnit )
1795 for( k=izero; k<i; k++ ) {
1797 }
1798 }
1799 B[j*ldb + i] = alpha*temp;
1800 }
1801 }
1802 } else {
1803 for( j=izero; j<n; j++ ) {
1804 for( i=izero; i<m; i++ ) {
1805 temp = B[j*ldb + i];
1806 if ( noConj ) {
1807 if( noUnit )
1808 temp *= A[i*lda + i];
1809 for( k=i+ione; k<m; k++ ) {
1810 temp += A[i*lda + k]*B[j*ldb + k];
1811 }
1812 } else {
1813 if( noUnit )
1815 for( k=i+ione; k<m; k++ ) {
1817 }
1818 }
1819 B[j*ldb + i] = alpha*temp;
1820 }
1821 }
1822 }
1823 }
1824 } else {
1825 // A is on the right hand side of B.
1826
1827 if( ETranspChar[transa] == 'N' ) {
1828 // Compute B = alpha*B*A
1829
1830 if( Upper ) {
1831 // A is upper triangular.
1832 for( j=n-ione; j>-ione; j-- ) {
1833 temp = alpha;
1834 if( noUnit )
1835 temp *= A[j*lda + j];
1836 for( i=izero; i<m; i++ ) {
1837 B[j*ldb + i] *= temp;
1838 }
1839 for( k=izero; k<j; k++ ) {
1840 if( A[j*lda + k] != A_zero ) {
1841 temp = alpha*A[j*lda + k];
1842 for( i=izero; i<m; i++ ) {
1843 B[j*ldb + i] += temp*B[k*ldb + i];
1844 }
1845 }
1846 }
1847 }
1848 } else {
1849 // A is lower triangular.
1850 for( j=izero; j<n; j++ ) {
1851 temp = alpha;
1852 if( noUnit )
1853 temp *= A[j*lda + j];
1854 for( i=izero; i<m; i++ ) {
1855 B[j*ldb + i] *= temp;
1856 }
1857 for( k=j+ione; k<n; k++ ) {
1858 if( A[j*lda + k] != A_zero ) {
1859 temp = alpha*A[j*lda + k];
1860 for( i=izero; i<m; i++ ) {
1861 B[j*ldb + i] += temp*B[k*ldb + i];
1862 }
1863 }
1864 }
1865 }
1866 }
1867 } else {
1868 // Compute B = alpha*B*A' or B = alpha*B*conj(A')
1869
1870 if( Upper ) {
1871 for( k=izero; k<n; k++ ) {
1872 for( j=izero; j<k; j++ ) {
1873 if( A[k*lda + j] != A_zero ) {
1874 if ( noConj )
1875 temp = alpha*A[k*lda + j];
1876 else
1878 for( i=izero; i<m; i++ ) {
1879 B[j*ldb + i] += temp*B[k*ldb + i];
1880 }
1881 }
1882 }
1883 temp = alpha;
1884 if( noUnit ) {
1885 if ( noConj )
1886 temp *= A[k*lda + k];
1887 else
1889 }
1890 if( temp != one ) {
1891 for( i=izero; i<m; i++ ) {
1892 B[k*ldb + i] *= temp;
1893 }
1894 }
1895 }
1896 } else {
1897 for( k=n-ione; k>-ione; k-- ) {
1898 for( j=k+ione; j<n; j++ ) {
1899 if( A[k*lda + j] != A_zero ) {
1900 if ( noConj )
1901 temp = alpha*A[k*lda + j];
1902 else
1904 for( i=izero; i<m; i++ ) {
1905 B[j*ldb + i] += temp*B[k*ldb + i];
1906 }
1907 }
1908 }
1909 temp = alpha;
1910 if( noUnit ) {
1911 if ( noConj )
1912 temp *= A[k*lda + k];
1913 else
1915 }
1916 if( temp != one ) {
1917 for( i=izero; i<m; i++ ) {
1918 B[k*ldb + i] *= temp;
1919 }
1920 }
1921 }
1922 }
1923 } // end if( ETranspChar[transa] == 'N' ) ...
1924 } // end if ( LSide ) ...
1925 } // end if (!BadArgument)
1926 } // end TRMM
1927
1928 template<typename OrdinalType, typename ScalarType>
1929 template <typename alpha_type, typename A_type>
1931 {
1941 bool BadArgument = false;
1942 bool noUnit = (EDiagChar[diag]=='N');
1943 bool noConj = (ETranspChar[transa] == 'T');
1944
1945 if (!(ESideChar[side] == 'L')) { NRowA = n; }
1946
1947 // Quick return.
1948 if (n == izero || m == izero) { return; }
1949 if( m < izero ) {
1950 std::cout << "BLAS::TRSM Error: M == "<<m<<std::endl;
1951 BadArgument = true; }
1952 if( n < izero ) {
1953 std::cout << "BLAS::TRSM Error: N == "<<n<<std::endl;
1954 BadArgument = true; }
1955 if( lda < NRowA ) {
1956 std::cout << "BLAS::TRSM Error: LDA < "<<NRowA<<std::endl;
1957 BadArgument = true; }
1958 if( ldb < m ) {
1959 std::cout << "BLAS::TRSM Error: LDB < MAX(1,M)"<<std::endl;
1960 BadArgument = true; }
1961
1962 if(!BadArgument)
1963 {
1964 int i, j, k;
1965 // Set the solution to the zero vector.
1966 if(alpha == alpha_zero) {
1967 for(j = izero; j < n; j++) {
1968 for( i = izero; i < m; i++) {
1969 B[j*ldb+i] = B_zero;
1970 }
1971 }
1972 }
1973 else
1974 { // Start the operations.
1975 if(ESideChar[side] == 'L') {
1976 //
1977 // Perform computations for OP(A)*X = alpha*B
1978 //
1979 if(ETranspChar[transa] == 'N') {
1980 //
1981 // Compute B = alpha*inv( A )*B
1982 //
1983 if(EUploChar[uplo] == 'U') {
1984 // A is upper triangular.
1985 for(j = izero; j < n; j++) {
1986 // Perform alpha*B if alpha is not 1.
1987 if(alpha != alpha_one) {
1988 for( i = izero; i < m; i++) {
1989 B[j*ldb+i] *= alpha;
1990 }
1991 }
1992 // Perform a backsolve for column j of B.
1993 for(k = (m - ione); k > -ione; k--) {
1994 // If this entry is zero, we don't have to do anything.
1995 if (B[j*ldb + k] != B_zero) {
1996 if ( noUnit ) {
1997 B[j*ldb + k] /= A[k*lda + k];
1998 }
1999 for(i = izero; i < k; i++) {
2000 B[j*ldb + i] -= B[j*ldb + k] * A[k*lda + i];
2001 }
2002 }
2003 }
2004 }
2005 }
2006 else
2007 { // A is lower triangular.
2008 for(j = izero; j < n; j++) {
2009 // Perform alpha*B if alpha is not 1.
2010 if(alpha != alpha_one) {
2011 for( i = izero; i < m; i++) {
2012 B[j*ldb+i] *= alpha;
2013 }
2014 }
2015 // Perform a forward solve for column j of B.
2016 for(k = izero; k < m; k++) {
2017 // If this entry is zero, we don't have to do anything.
2018 if (B[j*ldb + k] != B_zero) {
2019 if ( noUnit ) {
2020 B[j*ldb + k] /= A[k*lda + k];
2021 }
2022 for(i = k+ione; i < m; i++) {
2023 B[j*ldb + i] -= B[j*ldb + k] * A[k*lda + i];
2024 }
2025 }
2026 }
2027 }
2028 } // end if (uplo == 'U')
2029 } // if (transa =='N')
2030 else {
2031 //
2032 // Compute B = alpha*inv( A' )*B
2033 // or B = alpha*inv( conj(A') )*B
2034 //
2035 if(EUploChar[uplo] == 'U') {
2036 // A is upper triangular.
2037 for(j = izero; j < n; j++) {
2038 for( i = izero; i < m; i++) {
2039 temp = alpha*B[j*ldb+i];
2040 if ( noConj ) {
2041 for(k = izero; k < i; k++) {
2042 temp -= A[i*lda + k] * B[j*ldb + k];
2043 }
2044 if ( noUnit ) {
2045 temp /= A[i*lda + i];
2046 }
2047 } else {
2048 for(k = izero; k < i; k++) {
2050 * B[j*ldb + k];
2051 }
2052 if ( noUnit ) {
2054 }
2055 }
2056 B[j*ldb + i] = temp;
2057 }
2058 }
2059 }
2060 else
2061 { // A is lower triangular.
2062 for(j = izero; j < n; j++) {
2063 for(i = (m - ione) ; i > -ione; i--) {
2064 temp = alpha*B[j*ldb+i];
2065 if ( noConj ) {
2066 for(k = i+ione; k < m; k++) {
2067 temp -= A[i*lda + k] * B[j*ldb + k];
2068 }
2069 if ( noUnit ) {
2070 temp /= A[i*lda + i];
2071 }
2072 } else {
2073 for(k = i+ione; k < m; k++) {
2075 * B[j*ldb + k];
2076 }
2077 if ( noUnit ) {
2079 }
2080 }
2081 B[j*ldb + i] = temp;
2082 }
2083 }
2084 }
2085 }
2086 } // if (side == 'L')
2087 else {
2088 // side == 'R'
2089 //
2090 // Perform computations for X*OP(A) = alpha*B
2091 //
2092 if (ETranspChar[transa] == 'N') {
2093 //
2094 // Compute B = alpha*B*inv( A )
2095 //
2096 if(EUploChar[uplo] == 'U') {
2097 // A is upper triangular.
2098 // Perform a backsolve for column j of B.
2099 for(j = izero; j < n; j++) {
2100 // Perform alpha*B if alpha is not 1.
2101 if(alpha != alpha_one) {
2102 for( i = izero; i < m; i++) {
2103 B[j*ldb+i] *= alpha;
2104 }
2105 }
2106 for(k = izero; k < j; k++) {
2107 // If this entry is zero, we don't have to do anything.
2108 if (A[j*lda + k] != A_zero) {
2109 for(i = izero; i < m; i++) {
2110 B[j*ldb + i] -= A[j*lda + k] * B[k*ldb + i];
2111 }
2112 }
2113 }
2114 if ( noUnit ) {
2115 temp = B_one/A[j*lda + j];
2116 for(i = izero; i < m; i++) {
2117 B[j*ldb + i] *= temp;
2118 }
2119 }
2120 }
2121 }
2122 else
2123 { // A is lower triangular.
2124 for(j = (n - ione); j > -ione; j--) {
2125 // Perform alpha*B if alpha is not 1.
2126 if(alpha != alpha_one) {
2127 for( i = izero; i < m; i++) {
2128 B[j*ldb+i] *= alpha;
2129 }
2130 }
2131 // Perform a forward solve for column j of B.
2132 for(k = j+ione; k < n; k++) {
2133 // If this entry is zero, we don't have to do anything.
2134 if (A[j*lda + k] != A_zero) {
2135 for(i = izero; i < m; i++) {
2136 B[j*ldb + i] -= A[j*lda + k] * B[k*ldb + i];
2137 }
2138 }
2139 }
2140 if ( noUnit ) {
2141 temp = B_one/A[j*lda + j];
2142 for(i = izero; i < m; i++) {
2143 B[j*ldb + i] *= temp;
2144 }
2145 }
2146 }
2147 } // end if (uplo == 'U')
2148 } // if (transa =='N')
2149 else {
2150 //
2151 // Compute B = alpha*B*inv( A' )
2152 // or B = alpha*B*inv( conj(A') )
2153 //
2154 if(EUploChar[uplo] == 'U') {
2155 // A is upper triangular.
2156 for(k = (n - ione); k > -ione; k--) {
2157 if ( noUnit ) {
2158 if ( noConj )
2159 temp = B_one/A[k*lda + k];
2160 else
2162 for(i = izero; i < m; i++) {
2163 B[k*ldb + i] *= temp;
2164 }
2165 }
2166 for(j = izero; j < k; j++) {
2167 if (A[k*lda + j] != A_zero) {
2168 if ( noConj )
2169 temp = A[k*lda + j];
2170 else
2172 for(i = izero; i < m; i++) {
2173 B[j*ldb + i] -= temp*B[k*ldb + i];
2174 }
2175 }
2176 }
2177 if (alpha != alpha_one) {
2178 for (i = izero; i < m; i++) {
2179 B[k*ldb + i] *= alpha;
2180 }
2181 }
2182 }
2183 }
2184 else
2185 { // A is lower triangular.
2186 for(k = izero; k < n; k++) {
2187 if ( noUnit ) {
2188 if ( noConj )
2189 temp = B_one/A[k*lda + k];
2190 else
2192 for (i = izero; i < m; i++) {
2193 B[k*ldb + i] *= temp;
2194 }
2195 }
2196 for(j = k+ione; j < n; j++) {
2197 if(A[k*lda + j] != A_zero) {
2198 if ( noConj )
2199 temp = A[k*lda + j];
2200 else
2202 for(i = izero; i < m; i++) {
2203 B[j*ldb + i] -= temp*B[k*ldb + i];
2204 }
2205 }
2206 }
2207 if (alpha != alpha_one) {
2208 for (i = izero; i < m; i++) {
2209 B[k*ldb + i] *= alpha;
2210 }
2211 }
2212 }
2213 }
2214 }
2215 }
2216 }
2217 }
2218 }
2219
2220 // Explicit instantiation for template<int,float>
2221
2222 template <>
2223 class TEUCHOSNUMERICS_LIB_DLL_EXPORT BLAS<int, float>
2224 {
2225 public:
2226 inline BLAS(void) {}
2227 inline BLAS(const BLAS<int, float>& /*BLAS_source*/) {}
2228 inline virtual ~BLAS(void) {}
2229 void ROTG(float* da, float* db, float* c, float* s) const;
2230 void ROT(const int& n, float* dx, const int& incx, float* dy, const int& incy, float* c, float* s) const;
2231 float ASUM(const int& n, const float* x, const int& incx) const;
2232 void AXPY(const int& n, const float& alpha, const float* x, const int& incx, float* y, const int& incy) const;
2233 void COPY(const int& n, const float* x, const int& incx, float* y, const int& incy) const;
2234 float DOT(const int& n, const float* x, const int& incx, const float* y, const int& incy) const;
2235 float NRM2(const int& n, const float* x, const int& incx) const;
2236 void SCAL(const int& n, const float& alpha, float* x, const int& incx) const;
2237 int IAMAX(const int& n, const float* x, const int& incx) const;
2238 void GEMV(ETransp trans, const int& m, const int& n, const float& alpha, const float* A, const int& lda, const float* x, const int& incx, const float& beta, float* y, const int& incy) const;
2239 void TRMV(EUplo uplo, ETransp trans, EDiag diag, const int& n, const float* A, const int& lda, float* x, const int& incx) const;
2240 void GER(const int& m, const int& n, const float& alpha, const float* x, const int& incx, const float* y, const int& incy, float* A, const int& lda) const;
2241 void GEMM(ETransp transa, ETransp transb, const int& m, const int& n, const int& k, const float& alpha, const float* A, const int& lda, const float* B, const int& ldb, const float& beta, float* C, const int& ldc) const;
2242 void SWAP(const int& n, float* const x, const int& incx, float* const y, const int& incy) const;
2243 void SYMM(ESide side, EUplo uplo, const int& m, const int& n, const float& alpha, const float* A, const int& lda, const float* B, const int& ldb, const float& beta, float* C, const int& ldc) const;
2244 void SYRK(EUplo uplo, ETransp trans, const int& n, const int& k, const float& alpha, const float* A, const int& lda, const float& beta, float* C, const int& ldc) const;
2245 void HERK(EUplo uplo, ETransp trans, const int& n, const int& k, const float& alpha, const float* A, const int& lda, const float& beta, float* C, const int& ldc) const;
2246 void TRMM(ESide side, EUplo uplo, ETransp transa, EDiag diag, const int& m, const int& n, const float& alpha, const float* A, const int& lda, float* B, const int& ldb) const;
2247 void TRSM(ESide side, EUplo uplo, ETransp transa, EDiag diag, const int& m, const int& n, const float& alpha, const float* A, const int& lda, float* B, const int& ldb) const;
2248 };
2249
2250 // Explicit instantiation for template<int,double>
2251
2252 template<>
2253 class TEUCHOSNUMERICS_LIB_DLL_EXPORT BLAS<int, double>
2254 {
2255 public:
2256 inline BLAS(void) {}
2257 inline BLAS(const BLAS<int, double>& /*BLAS_source*/) {}
2258 inline virtual ~BLAS(void) {}
2259 void ROTG(double* da, double* db, double* c, double* s) const;
2260 void ROT(const int& n, double* dx, const int& incx, double* dy, const int& incy, double* c, double* s) const;
2261 double ASUM(const int& n, const double* x, const int& incx) const;
2262 void AXPY(const int& n, const double& alpha, const double* x, const int& incx, double* y, const int& incy) const;
2263 void COPY(const int& n, const double* x, const int& incx, double* y, const int& incy) const;
2264 double DOT(const int& n, const double* x, const int& incx, const double* y, const int& incy) const;
2265 double NRM2(const int& n, const double* x, const int& incx) const;
2266 void SCAL(const int& n, const double& alpha, double* x, const int& incx) const;
2267 int IAMAX(const int& n, const double* x, const int& incx) const;
2268 void GEMV(ETransp trans, const int& m, const int& n, const double& alpha, const double* A, const int& lda, const double* x, const int& incx, const double& beta, double* y, const int& incy) const;
2269 void TRMV(EUplo uplo, ETransp trans, EDiag diag, const int& n, const double* A, const int& lda, double* x, const int& incx) const;
2270 void GER(const int& m, const int& n, const double& alpha, const double* x, const int& incx, const double* y, const int& incy, double* A, const int& lda) const;
2271 void GEMM(ETransp transa, ETransp transb, const int& m, const int& n, const int& k, const double& alpha, const double* A, const int& lda, const double* B, const int& ldb, const double& beta, double* C, const int& ldc) const;
2272 void SWAP(const int& n, double* const x, const int& incx, double* const y, const int& incy) const;
2273 void SYMM(ESide side, EUplo uplo, const int& m, const int& n, const double& alpha, const double* A, const int& lda, const double* B, const int& ldb, const double& beta, double* C, const int& ldc) const;
2274 void SYRK(EUplo uplo, ETransp trans, const int& n, const int& k, const double& alpha, const double* A, const int& lda, const double& beta, double* C, const int& ldc) const;
2275 void HERK(EUplo uplo, ETransp trans, const int& n, const int& k, const double& alpha, const double* A, const int& lda, const double& beta, double* C, const int& ldc) const;
2276 void TRMM(ESide side, EUplo uplo, ETransp transa, EDiag diag, const int& m, const int& n, const double& alpha, const double* A, const int& lda, double* B, const int& ldb) const;
2277 void TRSM(ESide side, EUplo uplo, ETransp transa, EDiag diag, const int& m, const int& n, const double& alpha, const double* A, const int& lda, double* B, const int& ldb) const;
2278 };
2279
2280 // Explicit instantiation for template<int,complex<float> >
2281
2282 template<>
2283 class TEUCHOSNUMERICS_LIB_DLL_EXPORT BLAS<int, std::complex<float> >
2284 {
2285 public:
2286 inline BLAS(void) {}
2287 inline BLAS(const BLAS<int, std::complex<float> >& /*BLAS_source*/) {}
2288 inline virtual ~BLAS(void) {}
2289 void ROTG(std::complex<float>* da, std::complex<float>* db, float* c, std::complex<float>* s) const;
2290 void ROT(const int& n, std::complex<float>* dx, const int& incx, std::complex<float>* dy, const int& incy, float* c, std::complex<float>* s) const;
2291 float ASUM(const int& n, const std::complex<float>* x, const int& incx) const;
2292 void AXPY(const int& n, const std::complex<float> alpha, const std::complex<float>* x, const int& incx, std::complex<float>* y, const int& incy) const;
2293 void COPY(const int& n, const std::complex<float>* x, const int& incx, std::complex<float>* y, const int& incy) const;
2294 std::complex<float> DOT(const int& n, const std::complex<float>* x, const int& incx, const std::complex<float>* y, const int& incy) const;
2295 float NRM2(const int& n, const std::complex<float>* x, const int& incx) const;
2296 void SCAL(const int& n, const std::complex<float> alpha, std::complex<float>* x, const int& incx) const;
2297 int IAMAX(const int& n, const std::complex<float>* x, const int& incx) const;
2298 void GEMV(ETransp trans, const int& m, const int& n, const std::complex<float> alpha, const std::complex<float>* A, const int& lda, const std::complex<float>* x, const int& incx, const std::complex<float> beta, std::complex<float>* y, const int& incy) const;
2299 void TRMV(EUplo uplo, ETransp trans, EDiag diag, const int& n, const std::complex<float>* A, const int& lda, std::complex<float>* x, const int& incx) const;
2300 void GER(const int& m, const int& n, const std::complex<float> alpha, const std::complex<float>* x, const int& incx, const std::complex<float>* y, const int& incy, std::complex<float>* A, const int& lda) const;
2301 void GEMM(ETransp transa, ETransp transb, const int& m, const int& n, const int& k, const std::complex<float> alpha, const std::complex<float>* A, const int& lda, const std::complex<float>* B, const int& ldb, const std::complex<float> beta, std::complex<float>* C, const int& ldc) const;
2302 void SWAP(const int& n, std::complex<float>* const x, const int& incx, std::complex<float>* const y, const int& incy) const;
2303 void SYMM(ESide side, EUplo uplo, const int& m, const int& n, const std::complex<float> alpha, const std::complex<float>* A, const int& lda, const std::complex<float> *B, const int& ldb, const std::complex<float> beta, std::complex<float> *C, const int& ldc) const;
2304 void SYRK(EUplo uplo, ETransp trans, const int& n, const int& k, const std::complex<float> alpha, const std::complex<float>* A, const int& lda, const std::complex<float> beta, std::complex<float>* C, const int& ldc) const;
2305 void HERK(EUplo uplo, ETransp trans, const int& n, const int& k, const std::complex<float> alpha, const std::complex<float>* A, const int& lda, const std::complex<float> beta, std::complex<float>* C, const int& ldc) const;
2306 void TRMM(ESide side, EUplo uplo, ETransp transa, EDiag diag, const int& m, const int& n, const std::complex<float> alpha, const std::complex<float>* A, const int& lda, std::complex<float>* B, const int& ldb) const;
2307 void TRSM(ESide side, EUplo uplo, ETransp transa, EDiag diag, const int& m, const int& n, const std::complex<float> alpha, const std::complex<float>* A, const int& lda, std::complex<float>* B, const int& ldb) const;
2308 };
2309
2310 // Explicit instantiation for template<int,complex<double> >
2311
2312 template<>
2313 class TEUCHOSNUMERICS_LIB_DLL_EXPORT BLAS<int, std::complex<double> >
2314 {
2315 public:
2316 inline BLAS(void) {}
2317 inline BLAS(const BLAS<int, std::complex<double> >& /*BLAS_source*/) {}
2318 inline virtual ~BLAS(void) {}
2319 void ROTG(std::complex<double>* da, std::complex<double>* db, double* c, std::complex<double>* s) const;
2320 void ROT(const int& n, std::complex<double>* dx, const int& incx, std::complex<double>* dy, const int& incy, double* c, std::complex<double>* s) const;
2321 double ASUM(const int& n, const std::complex<double>* x, const int& incx) const;
2322 void AXPY(const int& n, const std::complex<double> alpha, const std::complex<double>* x, const int& incx, std::complex<double>* y, const int& incy) const;
2323 void COPY(const int& n, const std::complex<double>* x, const int& incx, std::complex<double>* y, const int& incy) const;
2324 std::complex<double> DOT(const int& n, const std::complex<double>* x, const int& incx, const std::complex<double>* y, const int& incy) const;
2325 double NRM2(const int& n, const std::complex<double>* x, const int& incx) const;
2326 void SCAL(const int& n, const std::complex<double> alpha, std::complex<double>* x, const int& incx) const;
2327 int IAMAX(const int& n, const std::complex<double>* x, const int& incx) const;
2328 void GEMV(ETransp trans, const int& m, const int& n, const std::complex<double> alpha, const std::complex<double>* A, const int& lda, const std::complex<double>* x, const int& incx, const std::complex<double> beta, std::complex<double>* y, const int& incy) const;
2329 void TRMV(EUplo uplo, ETransp trans, EDiag diag, const int& n, const std::complex<double>* A, const int& lda, std::complex<double>* x, const int& incx) const;
2330 void GER(const int& m, const int& n, const std::complex<double> alpha, const std::complex<double>* x, const int& incx, const std::complex<double>* y, const int& incy, std::complex<double>* A, const int& lda) const;
2331 void GEMM(ETransp transa, ETransp transb, const int& m, const int& n, const int& k, const std::complex<double> alpha, const std::complex<double>* A, const int& lda, const std::complex<double>* B, const int& ldb, const std::complex<double> beta, std::complex<double>* C, const int& ldc) const;
2332 void SWAP(const int& n, std::complex<double>* const x, const int& incx, std::complex<double>* const y, const int& incy) const;
2333 void SYMM(ESide side, EUplo uplo, const int& m, const int& n, const std::complex<double> alpha, const std::complex<double>* A, const int& lda, const std::complex<double> *B, const int& ldb, const std::complex<double> beta, std::complex<double> *C, const int& ldc) const;
2334 void SYRK(EUplo uplo, ETransp trans, const int& n, const int& k, const std::complex<double> alpha, const std::complex<double>* A, const int& lda, const std::complex<double> beta, std::complex<double>* C, const int& ldc) const;
2335 void HERK(EUplo uplo, ETransp trans, const int& n, const int& k, const std::complex<double> alpha, const std::complex<double>* A, const int& lda, const std::complex<double> beta, std::complex<double>* C, const int& ldc) const;
2336 void TRMM(ESide side, EUplo uplo, ETransp transa, EDiag diag, const int& m, const int& n, const std::complex<double> alpha, const std::complex<double>* A, const int& lda, std::complex<double>* B, const int& ldb) const;
2337 void TRSM(ESide side, EUplo uplo, ETransp transa, EDiag diag, const int& m, const int& n, const std::complex<double> alpha, const std::complex<double>* A, const int& lda, std::complex<double>* B, const int& ldb) const;
2338 };
2339
2340} // namespace Teuchos
2341
2342#endif // _TEUCHOS_BLAS_HPP_
Enumerated types for BLAS input characters.
Teuchos header file which uses auto-configuration information to include necessary C++ headers.
Defines basic traits for the ordinal field type.
Defines basic traits for the scalar field type.
Templated BLAS wrapper.
virtual ~BLAS(void)
Destructor.
BLAS(const BLAS< OrdinalType, ScalarType > &)
Copy constructor.
BLAS(void)
Default constructor.
Default implementation for BLAS routines.
DefaultBLASImpl(const DefaultBLASImpl< OrdinalType, ScalarType > &)
Copy constructor.
OrdinalType IAMAX(const OrdinalType &n, const ScalarType *x, const OrdinalType &incx) const
Return the index of the element of x with the maximum magnitude.
void ROTG(ScalarType *da, ScalarType *db, rotg_c_type *c, ScalarType *s) const
Computes a Givens plane rotation.
ScalarType DOT(const OrdinalType &n, const x_type *x, const OrdinalType &incx, const y_type *y, const OrdinalType &incy) const
Form the dot product of the vectors x and y.
void SYMM(ESide side, EUplo uplo, const OrdinalType &m, const OrdinalType &n, const alpha_type alpha, const A_type *A, const OrdinalType &lda, const B_type *B, const OrdinalType &ldb, const beta_type beta, ScalarType *C, const OrdinalType &ldc) const
Performs the matrix-matrix operation: C <- alpha*A*B+beta*C or C <- alpha*B*A+beta*C where A is an m ...
void AXPY(const OrdinalType &n, const alpha_type alpha, const x_type *x, const OrdinalType &incx, ScalarType *y, const OrdinalType &incy) const
Perform the operation: y <- y+alpha*x.
void TRSM(ESide side, EUplo uplo, ETransp transa, EDiag diag, const OrdinalType &m, const OrdinalType &n, const alpha_type alpha, const A_type *A, const OrdinalType &lda, ScalarType *B, const OrdinalType &ldb) const
Solves the matrix equations: op(A)*X=alpha*B or X*op(A)=alpha*B where X and B are m by n matrices,...
virtual ~DefaultBLASImpl(void)
Destructor.
void SYRK(EUplo uplo, ETransp trans, const OrdinalType &n, const OrdinalType &k, const alpha_type alpha, const A_type *A, const OrdinalType &lda, const beta_type beta, ScalarType *C, const OrdinalType &ldc) const
Performs the symmetric rank k operation: C <- alpha*A*A'+beta*C or C <- alpha*A'*A+beta*C,...
void GEMM(ETransp transa, ETransp transb, const OrdinalType &m, const OrdinalType &n, const OrdinalType &k, const alpha_type alpha, const A_type *A, const OrdinalType &lda, const B_type *B, const OrdinalType &ldb, const beta_type beta, ScalarType *C, const OrdinalType &ldc) const
General matrix-matrix multiply.
ScalarTraits< ScalarType >::magnitudeType NRM2(const OrdinalType &n, const ScalarType *x, const OrdinalType &incx) const
Compute the 2-norm of the vector x.
DefaultBLASImpl(void)
Default constructor.
ScalarTraits< ScalarType >::magnitudeType ASUM(const OrdinalType &n, const ScalarType *x, const OrdinalType &incx) const
Sum the absolute values of the entries of x.
void COPY(const OrdinalType &n, const ScalarType *x, const OrdinalType &incx, ScalarType *y, const OrdinalType &incy) const
Copy the vector x to the vector y.
void ROT(const OrdinalType &n, ScalarType *dx, const OrdinalType &incx, ScalarType *dy, const OrdinalType &incy, MagnitudeType *c, ScalarType *s) const
Applies a Givens plane rotation.
void GER(const OrdinalType &m, const OrdinalType &n, const alpha_type alpha, const x_type *x, const OrdinalType &incx, const y_type *y, const OrdinalType &incy, ScalarType *A, const OrdinalType &lda) const
Performs the rank 1 operation: A <- alpha*x*y'+A.
void SCAL(const OrdinalType &n, const ScalarType &alpha, ScalarType *x, const OrdinalType &incx) const
Scale the vector x by the constant alpha.
void SWAP(const OrdinalType &n, ScalarType *const x, const OrdinalType &incx, ScalarType *const y, const OrdinalType &incy) const
Swap the entries of x and y.
details::GivensRotator< ScalarType >::c_type rotg_c_type
The type used for c in ROTG.
void GEMV(ETransp trans, const OrdinalType &m, const OrdinalType &n, const alpha_type alpha, const A_type *A, const OrdinalType &lda, const x_type *x, const OrdinalType &incx, const beta_type beta, ScalarType *y, const OrdinalType &incy) const
Performs the matrix-vector operation: y <- alpha*A*x+beta*y or y <- alpha*A'*x+beta*y where A is a ge...
void TRMV(EUplo uplo, ETransp trans, EDiag diag, const OrdinalType &n, const A_type *A, const OrdinalType &lda, ScalarType *x, const OrdinalType &incx) const
Performs the matrix-vector operation: x <- A*x or x <- A'*x where A is a unit/non-unit n by n upper/l...
void TRMM(ESide side, EUplo uplo, ETransp transa, EDiag diag, const OrdinalType &m, const OrdinalType &n, const alpha_type alpha, const A_type *A, const OrdinalType &lda, ScalarType *B, const OrdinalType &ldb) const
Performs the matrix-matrix operation: B <- alpha*op(A)*B or B <- alpha*B*op(A) where op(A) is an unit...
Smart reference counting pointer class for automatic garbage collection.
#define TEUCHOS_TEST_FOR_EXCEPTION(throw_exception_test, Exception, msg)
Macro for throwing an exception with breakpointing to ease debugging.
The Teuchos namespace contains all of the classes, structs and enums used by Teuchos,...
Teuchos implementation details.
static T one()
Returns representation of one for this ordinal type.
static T zero()
Returns representation of zero for this ordinal type.
This structure defines some basic traits for a scalar field type.
static magnitudeType magnitude(T a)
Returns the magnitudeType of the scalar type a.
static T one()
Returns representation of one for this scalar type.
T magnitudeType
Mandatory typedef for result of magnitude.
static T zero()
Returns representation of zero for this scalar type.
static T conjugate(T a)
Returns the conjugate of the scalar type a.