Teuchos - Trilinos Tools Package Version of the Day
Loading...
Searching...
No Matches
Teuchos_DefaultMpiComm.hpp
Go to the documentation of this file.
1// @HEADER
2// *****************************************************************************
3// Teuchos: Common Tools Package
4//
5// Copyright 2004 NTESS and the Teuchos contributors.
6// SPDX-License-Identifier: BSD-3-Clause
7// *****************************************************************************
8// @HEADER
9
10#ifndef TEUCHOS_MPI_COMM_HPP
11#define TEUCHOS_MPI_COMM_HPP
12
17
19
20// If MPI is not enabled, disable the contents of this file.
21#ifdef HAVE_TEUCHOS_MPI
22
23#include "Teuchos_Comm.hpp"
24#include "Teuchos_CommUtilities.hpp"
26#include "Teuchos_OpaqueWrapper.hpp"
28#include "Teuchos_SerializationTraitsHelpers.hpp"
29#include "Teuchos_Workspace.hpp"
31#include "Teuchos_as.hpp"
32#include "Teuchos_Assert.hpp"
33#include <mpi.h>
34#include <iterator>
35
36// This must be defined globally for the whole program!
37//#define TEUCHOS_MPI_COMM_DUMP
38
39#ifdef TEUCHOS_MPI_COMM_DUMP
40# include "Teuchos_VerboseObject.hpp"
41#endif
42
43namespace Teuchos {
44
46TEUCHOSCOMM_LIB_DLL_EXPORT std::string
47mpiErrorCodeToString (const int err);
48
49namespace details {
63 TEUCHOSCOMM_LIB_DLL_EXPORT void safeCommFree (MPI_Comm* comm);
64
69 TEUCHOSCOMM_LIB_DLL_EXPORT int setCommErrhandler (MPI_Comm comm, MPI_Errhandler handler);
70
71} // namespace details
72
73#ifdef TEUCHOS_MPI_COMM_DUMP
74template<typename Ordinal, typename T>
75void dumpBuffer(
76 const std::string &funcName, const std::string &buffName
77 ,const Ordinal bytes, const T buff[]
78 )
79{
82 Teuchos::OSTab tab(out);
83 *out
84 << "\n" << funcName << "::" << buffName << ":\n";
85 tab.incrTab();
86 for( Ordinal i = 0; i < bytes; ++i ) {
87 *out << buffName << "[" << i << "] = '" << buff[i] << "'\n";
88 }
89 *out << "\n";
90}
91#endif // TEUCHOS_MPI_COMM_DUMP
92
104template<class OrdinalType>
105class MpiCommStatus : public CommStatus<OrdinalType> {
106public:
107 MpiCommStatus (MPI_Status status) : status_ (status) {}
108
110 virtual ~MpiCommStatus() {}
111
113 OrdinalType getSourceRank () { return status_.MPI_SOURCE; }
114
116 OrdinalType getTag () { return status_.MPI_TAG; }
117
119 OrdinalType getError () { return status_.MPI_ERROR; }
120
121private:
123 MpiCommStatus ();
124
126 MPI_Status status_;
127};
128
132template<class OrdinalType>
133inline RCP<MpiCommStatus<OrdinalType> >
134mpiCommStatus (MPI_Status rawMpiStatus)
135{
136 return rcp (new MpiCommStatus<OrdinalType> (rawMpiStatus));
137}
138
154template<class OrdinalType>
155class MpiCommRequestBase : public CommRequest<OrdinalType> {
156public:
158 MpiCommRequestBase () :
159 rawMpiRequest_ (MPI_REQUEST_NULL)
160 {}
161
163 MpiCommRequestBase (MPI_Request rawMpiRequest) :
164 rawMpiRequest_ (rawMpiRequest)
165 {}
166
174 MPI_Request releaseRawMpiRequest()
175 {
176 MPI_Request tmp_rawMpiRequest = rawMpiRequest_;
177 rawMpiRequest_ = MPI_REQUEST_NULL;
178 return tmp_rawMpiRequest;
179 }
180
182 bool isNull() const {
183 return rawMpiRequest_ == MPI_REQUEST_NULL;
184 }
185
186 bool isReady() {
187 MPI_Status rawMpiStatus;
188 int flag = 0;
189
190 MPI_Test(&rawMpiRequest_, &flag, &rawMpiStatus);
191
192 return (flag != 0);
193 }
194
200 RCP<CommStatus<OrdinalType> > wait () {
201 MPI_Status rawMpiStatus;
202 // Whether this function satisfies the strong exception guarantee
203 // depends on whether MPI_Wait modifies its input request on error.
204 const int err = MPI_Wait (&rawMpiRequest_, &rawMpiStatus);
206 err != MPI_SUCCESS, std::runtime_error,
207 "Teuchos: MPI_Wait() failed with error \""
208 << mpiErrorCodeToString (err));
209 // MPI_Wait sets the MPI_Request to MPI_REQUEST_NULL on success.
210 return mpiCommStatus<OrdinalType> (rawMpiStatus);
211 }
212
217 RCP<CommStatus<OrdinalType> > cancel () {
218 if (rawMpiRequest_ == MPI_REQUEST_NULL) {
219 return null;
220 }
221 else {
222 int err = MPI_Cancel (&rawMpiRequest_);
224 err != MPI_SUCCESS, std::runtime_error,
225 "Teuchos: MPI_Cancel failed with the following error: "
226 << mpiErrorCodeToString (err));
227
228 // Wait on the request. If successful, MPI_Wait will set the
229 // MPI_Request to MPI_REQUEST_NULL. The returned status may
230 // still be useful; for example, one may call MPI_Test_cancelled
231 // to test an MPI_Status from a nonblocking send.
232 MPI_Status status;
233 err = MPI_Wait (&rawMpiRequest_, &status);
234 TEUCHOS_TEST_FOR_EXCEPTION(err != MPI_SUCCESS, std::runtime_error,
235 "Teuchos::MpiCommStatus::cancel: MPI_Wait failed with the following "
236 "error: " << mpiErrorCodeToString (err));
237 return mpiCommStatus<OrdinalType> (status);
238 }
239 }
240
242 virtual ~MpiCommRequestBase () {
243 if (rawMpiRequest_ != MPI_REQUEST_NULL) {
244 // We're in a destructor, so don't throw errors. However, if
245 // MPI_Cancel fails, it's probably a bad idea to call MPI_Wait.
246 const int err = MPI_Cancel (&rawMpiRequest_);
247 if (err == MPI_SUCCESS) {
248 // The MPI_Cancel succeeded. Now wait on the request. Ignore
249 // any reported error, since we can't do anything about those
250 // in the destructor (other than kill the program). If
251 // successful, MPI_Wait will set the MPI_Request to
252 // MPI_REQUEST_NULL. We ignore the returned MPI_Status, since
253 // if the user let the request fall out of scope, she must not
254 // care about the status.
255 //
256 // mfh 21 Oct 2012: The MPI standard requires completing a
257 // canceled request by calling a function like MPI_Wait,
258 // MPI_Test, or MPI_Request_free. MPI_Wait on a canceled
259 // request behaves like a local operation (it does not
260 // communicate or block waiting for communication). One could
261 // also call MPI_Request_free instead of MPI_Wait, but
262 // MPI_Request_free is intended more for persistent requests
263 // (created with functions like MPI_Recv_init).
264 (void) MPI_Wait (&rawMpiRequest_, MPI_STATUS_IGNORE);
265 }
266 }
267 }
268
269private:
271 MPI_Request rawMpiRequest_;
272};
273
289template<class OrdinalType>
290class MpiCommRequest : public MpiCommRequestBase<OrdinalType> {
291public:
293 MpiCommRequest () :
294 MpiCommRequestBase<OrdinalType> (MPI_REQUEST_NULL),
295 numBytes_ (0)
296 {}
297
299 MpiCommRequest (MPI_Request rawMpiRequest,
300 const ArrayView<char>::size_type numBytesInMessage) :
301 MpiCommRequestBase<OrdinalType> (rawMpiRequest),
302 numBytes_ (numBytesInMessage)
303 {}
304
310 ArrayView<char>::size_type numBytes () const {
311 return numBytes_;
312 }
313
315 virtual ~MpiCommRequest () {}
316
317private:
319 ArrayView<char>::size_type numBytes_;
320};
321
330template<class OrdinalType>
331inline RCP<MpiCommRequest<OrdinalType> >
332mpiCommRequest (MPI_Request rawMpiRequest,
333 const ArrayView<char>::size_type numBytes)
334{
335 return rcp (new MpiCommRequest<OrdinalType> (rawMpiRequest, numBytes));
336}
337
353template<typename Ordinal>
354class MpiComm : public Comm<Ordinal> {
355public:
357
358
379 explicit MpiComm (MPI_Comm rawMpiComm);
380
395 MpiComm (const RCP<const OpaqueWrapper<MPI_Comm> >& rawMpiComm);
396
414 MpiComm (const RCP<const OpaqueWrapper<MPI_Comm> >& rawMpiComm,
415 const int defaultTag);
416
433 MpiComm (const MpiComm<Ordinal>& other);
434
436 RCP<const OpaqueWrapper<MPI_Comm> > getRawMpiComm () const {
437 return rawMpiComm_;
438 }
439
504 void setErrorHandler (const RCP<const OpaqueWrapper<MPI_Errhandler> >& errHandler);
505
507
509
511 virtual int getRank() const;
512
514 virtual int getSize() const;
515
517 virtual void barrier() const;
518
520 virtual void broadcast(
521 const int rootRank, const Ordinal bytes, char buffer[]
522 ) const;
523
525 virtual void
526 gather (const Ordinal sendBytes, const char sendBuffer[],
527 const Ordinal recvBytes, char recvBuffer[],
528 const int root) const;
530 virtual void gatherAll(
531 const Ordinal sendBytes, const char sendBuffer[]
532 ,const Ordinal recvBytes, char recvBuffer[]
533 ) const;
535 virtual void reduceAll(
536 const ValueTypeReductionOp<Ordinal,char> &reductOp
537 ,const Ordinal bytes, const char sendBuffer[], char globalReducts[]
538 ) const;
540 virtual void scan(
541 const ValueTypeReductionOp<Ordinal,char> &reductOp
542 ,const Ordinal bytes, const char sendBuffer[], char scanReducts[]
543 ) const;
545 virtual void send(
546 const Ordinal bytes, const char sendBuffer[], const int destRank
547 ) const;
549 virtual void
550 send (const Ordinal bytes,
551 const char sendBuffer[],
552 const int destRank,
553 const int tag) const;
555 virtual void ssend(
556 const Ordinal bytes, const char sendBuffer[], const int destRank
557 ) const;
559 virtual void
560 ssend (const Ordinal bytes,
561 const char sendBuffer[],
562 const int destRank,
563 const int tag) const;
565 virtual int receive(
566 const int sourceRank, const Ordinal bytes, char recvBuffer[]
567 ) const;
569 virtual void readySend(
570 const ArrayView<const char> &sendBuffer,
571 const int destRank
572 ) const;
574 virtual void
575 readySend (const Ordinal bytes,
576 const char sendBuffer[],
577 const int destRank,
578 const int tag) const;
580 virtual RCP<CommRequest<Ordinal> > isend(
581 const ArrayView<const char> &sendBuffer,
582 const int destRank
583 ) const;
585 virtual RCP<CommRequest<Ordinal> >
586 isend (const ArrayView<const char> &sendBuffer,
587 const int destRank,
588 const int tag) const;
590 virtual RCP<CommRequest<Ordinal> > ireceive(
591 const ArrayView<char> &Buffer,
592 const int sourceRank
593 ) const;
595 virtual RCP<CommRequest<Ordinal> >
596 ireceive (const ArrayView<char> &Buffer,
597 const int sourceRank,
598 const int tag) const;
600 virtual void waitAll(
601 const ArrayView<RCP<CommRequest<Ordinal> > > &requests
602 ) const;
604 virtual void
605 waitAll (const ArrayView<RCP<CommRequest<Ordinal> > >& requests,
606 const ArrayView<RCP<CommStatus<Ordinal> > >& statuses) const;
608 virtual RCP<CommStatus<Ordinal> >
609 wait (const Ptr<RCP<CommRequest<Ordinal> > >& request) const;
611 virtual RCP< Comm<Ordinal> > duplicate() const;
613 virtual RCP< Comm<Ordinal> > split(const int color, const int key) const;
615 virtual RCP< Comm<Ordinal> > createSubcommunicator(
616 const ArrayView<const int>& ranks) const;
617
619
621
623 std::string description() const;
624
626
627 // These should be private but the PGI compiler requires them be public
628
629 static int const minTag_ = 26000; // These came from Teuchos::MpiComm???
630 static int const maxTag_ = 26099; // ""
631
637 int getTag () const { return tag_; }
638
639private:
640
644 void setupMembersFromComm();
645 static int tagCounter_;
646
654 RCP<const OpaqueWrapper<MPI_Comm> > rawMpiComm_;
655
657 int rank_;
658
660 int size_;
661
669 int tag_;
670
672 RCP<const OpaqueWrapper<MPI_Errhandler> > customErrorHandler_;
673
674 void assertRank(const int rank, const std::string &rankName) const;
675
676 // Not defined and not to be called!
677 MpiComm();
678
679#ifdef TEUCHOS_MPI_COMM_DUMP
680public:
681 static bool show_dump;
682#endif // TEUCHOS_MPI_COMM_DUMP
683
684};
685
686
700template<typename Ordinal>
701RCP<MpiComm<Ordinal> >
702createMpiComm(
703 const RCP<const OpaqueWrapper<MPI_Comm> > &rawMpiComm
704 );
705
706
720template<typename Ordinal>
721RCP<MpiComm<Ordinal> >
722createMpiComm(
723 const RCP<const OpaqueWrapper<MPI_Comm> > &rawMpiComm,
724 const int defaultTag
725 );
726
727
755template<typename Ordinal>
756MPI_Comm
757getRawMpiComm(const Comm<Ordinal> &comm);
758
759
760// ////////////////////////
761// Implementations
762
763
764// Static members
765
766
767template<typename Ordinal>
768int MpiComm<Ordinal>::tagCounter_ = MpiComm<Ordinal>::minTag_;
769
770
771// Constructors
772
773
774template<typename Ordinal>
775MpiComm<Ordinal>::
776MpiComm (const RCP<const OpaqueWrapper<MPI_Comm> >& rawMpiComm)
777{
779 rawMpiComm.get () == NULL, std::invalid_argument,
780 "Teuchos::MpiComm constructor: The input RCP is null.");
782 *rawMpiComm == MPI_COMM_NULL, std::invalid_argument,
783 "Teuchos::MpiComm constructor: The given MPI_Comm is MPI_COMM_NULL.");
784
785 rawMpiComm_ = rawMpiComm;
786
787 // mfh 09 Jul 2013: Please resist the temptation to modify the given
788 // MPI communicator's error handler here. See Bug 5943. Note that
789 // an MPI communicator's default error handler is
790 // MPI_ERRORS_ARE_FATAL, which immediately aborts on error (without
791 // returning an error code from the MPI function). Users who want
792 // MPI functions instead to return an error code if they encounter
793 // an error, should set the error handler to MPI_ERRORS_RETURN. DO
794 // NOT SET THE ERROR HANDLER HERE!!! Teuchos' MPI wrappers should
795 // always check the error code returned by an MPI function,
796 // regardless of the error handler. Users who want to set the error
797 // handler on an MpiComm may call its setErrorHandler method.
798
799 setupMembersFromComm ();
800}
801
802
803template<typename Ordinal>
804MpiComm<Ordinal>::
805MpiComm (const RCP<const OpaqueWrapper<MPI_Comm> >& rawMpiComm,
806 const int defaultTag)
807{
809 rawMpiComm.get () == NULL, std::invalid_argument,
810 "Teuchos::MpiComm constructor: The input RCP is null.");
812 *rawMpiComm == MPI_COMM_NULL, std::invalid_argument,
813 "Teuchos::MpiComm constructor: The given MPI_Comm is MPI_COMM_NULL.");
814
815 rawMpiComm_ = rawMpiComm;
816 // Set size_ (the number of processes in the communicator).
817 int err = MPI_Comm_size (*rawMpiComm_, &size_);
818 TEUCHOS_TEST_FOR_EXCEPTION(err != MPI_SUCCESS, std::runtime_error,
819 "Teuchos::MpiComm constructor: MPI_Comm_size failed with "
820 "error \"" << mpiErrorCodeToString (err) << "\".");
821 // Set rank_ (the calling process' rank).
822 err = MPI_Comm_rank (*rawMpiComm_, &rank_);
823 TEUCHOS_TEST_FOR_EXCEPTION(err != MPI_SUCCESS, std::runtime_error,
824 "Teuchos::MpiComm constructor: MPI_Comm_rank failed with "
825 "error \"" << mpiErrorCodeToString (err) << "\".");
826 tag_ = defaultTag; // set the default message tag
827}
828
829
830template<typename Ordinal>
831MpiComm<Ordinal>::MpiComm (MPI_Comm rawMpiComm)
832{
833 TEUCHOS_TEST_FOR_EXCEPTION(rawMpiComm == MPI_COMM_NULL,
834 std::invalid_argument, "Teuchos::MpiComm constructor: The given MPI_Comm "
835 "is MPI_COMM_NULL.");
836 // We don't supply a "free" function here, since this version of the
837 // constructor makes the caller responsible for freeing rawMpiComm
838 // after use if necessary.
839 rawMpiComm_ = opaqueWrapper<MPI_Comm> (rawMpiComm);
840
841 // mfh 09 Jul 2013: Please resist the temptation to modify the given
842 // MPI communicator's error handler here. See Bug 5943. Note that
843 // an MPI communicator's default error handler is
844 // MPI_ERRORS_ARE_FATAL, which immediately aborts on error (without
845 // returning an error code from the MPI function). Users who want
846 // MPI functions instead to return an error code if they encounter
847 // an error, should set the error handler to MPI_ERRORS_RETURN. DO
848 // NOT SET THE ERROR HANDLER HERE!!! Teuchos' MPI wrappers should
849 // always check the error code returned by an MPI function,
850 // regardless of the error handler. Users who want to set the error
851 // handler on an MpiComm may call its setErrorHandler method.
852
853 setupMembersFromComm ();
854}
855
856
857template<typename Ordinal>
858MpiComm<Ordinal>::MpiComm (const MpiComm<Ordinal>& other) :
859 rawMpiComm_ (opaqueWrapper<MPI_Comm> (MPI_COMM_NULL)) // <- This will be set below
860{
861 // These are logic errors, since they violate MpiComm's invariants.
862 RCP<const OpaqueWrapper<MPI_Comm> > origCommPtr = other.getRawMpiComm ();
863 TEUCHOS_TEST_FOR_EXCEPTION(origCommPtr == null, std::logic_error,
864 "Teuchos::MpiComm copy constructor: "
865 "The input's getRawMpiComm() method returns null.");
866 MPI_Comm origComm = *origCommPtr;
867 TEUCHOS_TEST_FOR_EXCEPTION(origComm == MPI_COMM_NULL, std::logic_error,
868 "Teuchos::MpiComm copy constructor: "
869 "The input's raw MPI_Comm is MPI_COMM_NULL.");
870
871 // mfh 19 Oct 2012: Don't change the behavior of MpiComm's copy
872 // constructor for now. Later, we'll switch to the version that
873 // calls MPI_Comm_dup. For now, we just copy other's handle over.
874 // Note that the new MpiComm's tag is still different than the input
875 // MpiComm's tag. See Bug 5740.
876 if (true) {
877 rawMpiComm_ = origCommPtr;
878 }
879 else { // false (not run)
880 MPI_Comm newComm;
881 const int err = MPI_Comm_dup (origComm, &newComm);
882 TEUCHOS_TEST_FOR_EXCEPTION(err != MPI_SUCCESS, std::runtime_error,
883 "Teuchos::MpiComm copy constructor: MPI_Comm_dup failed with "
884 "the following error: " << mpiErrorCodeToString (err));
885 // No side effects until after everything has succeeded.
886 rawMpiComm_ = opaqueWrapper (newComm, details::safeCommFree);
887 }
888
889 setupMembersFromComm ();
890}
891
892
893template<typename Ordinal>
894void MpiComm<Ordinal>::setupMembersFromComm ()
895{
896 int err = MPI_Comm_size (*rawMpiComm_, &size_);
897 TEUCHOS_TEST_FOR_EXCEPTION(err != MPI_SUCCESS, std::runtime_error,
898 "Teuchos::MpiComm constructor: MPI_Comm_size failed with "
899 "error \"" << mpiErrorCodeToString (err) << "\".");
900 err = MPI_Comm_rank (*rawMpiComm_, &rank_);
901 TEUCHOS_TEST_FOR_EXCEPTION(err != MPI_SUCCESS, std::runtime_error,
902 "Teuchos::MpiComm constructor: MPI_Comm_rank failed with "
903 "error \"" << mpiErrorCodeToString (err) << "\".");
904
905 // Set the default tag to make unique across all communicators
906 if (tagCounter_ > maxTag_) {
907 tagCounter_ = minTag_;
908 }
909 tag_ = tagCounter_++;
910 // Ensure that the same tag is used on all processes.
911 //
912 // FIXME (mfh 09 Jul 2013) This would not be necessary if MpiComm
913 // were just to call MPI_Comm_dup (as every library should) when
914 // given its communicator. Of course, MPI_Comm_dup may also be
915 // implemented as a collective, and may even be more expensive than
916 // a broadcast. If we do decide to use MPI_Comm_dup, we can get rid
917 // of the broadcast below, and also get rid of tag_, tagCounter_,
918 // minTag_, and maxTag_.
919 MPI_Bcast (&tag_, 1, MPI_INT, 0, *rawMpiComm_);
920}
921
922
923template<typename Ordinal>
924void
925MpiComm<Ordinal>::
926setErrorHandler (const RCP<const OpaqueWrapper<MPI_Errhandler> >& errHandler)
927{
928 if (! is_null (errHandler)) {
929 const int err = details::setCommErrhandler (*getRawMpiComm (), *errHandler);
930 TEUCHOS_TEST_FOR_EXCEPTION(err != MPI_SUCCESS, std::runtime_error,
931 "Teuchos::MpiComm: Setting the MPI_Comm's error handler failed with "
932 "error \"" << mpiErrorCodeToString (err) << "\".");
933 }
934 // Wait to set this until the end, in case setting the error handler
935 // doesn't succeed.
936 customErrorHandler_ = errHandler;
937}
938
939//
940// Overridden from Comm
941//
942
943template<typename Ordinal>
944int MpiComm<Ordinal>::getRank() const
945{
946 return rank_;
947}
948
949
950template<typename Ordinal>
951int MpiComm<Ordinal>::getSize() const
952{
953 return size_;
954}
955
956
957template<typename Ordinal>
958void MpiComm<Ordinal>::barrier() const
959{
960 TEUCHOS_COMM_TIME_MONITOR(
961 "Teuchos::MpiComm<"<<OrdinalTraits<Ordinal>::name()<<">::barrier()"
962 );
963 const int err = MPI_Barrier (*rawMpiComm_);
964 TEUCHOS_TEST_FOR_EXCEPTION(err != MPI_SUCCESS, std::runtime_error,
965 "Teuchos::MpiComm::barrier: MPI_Barrier failed with error \""
966 << mpiErrorCodeToString (err) << "\".");
967}
968
969
970template<typename Ordinal>
971void MpiComm<Ordinal>::broadcast(
972 const int rootRank, const Ordinal bytes, char buffer[]
973 ) const
974{
975 TEUCHOS_COMM_TIME_MONITOR(
976 "Teuchos::MpiComm<"<<OrdinalTraits<Ordinal>::name()<<">::broadcast(...)"
977 );
978 const int err = MPI_Bcast (buffer, bytes, MPI_CHAR, rootRank, *rawMpiComm_);
979 TEUCHOS_TEST_FOR_EXCEPTION(err != MPI_SUCCESS, std::runtime_error,
980 "Teuchos::MpiComm::broadcast: MPI_Bcast failed with error \""
981 << mpiErrorCodeToString (err) << "\".");
982}
983
984
985template<typename Ordinal>
986void MpiComm<Ordinal>::gatherAll(
987 const Ordinal sendBytes, const char sendBuffer[],
988 const Ordinal recvBytes, char recvBuffer[]
989 ) const
990{
991 TEUCHOS_COMM_TIME_MONITOR(
992 "Teuchos::MpiComm<"<<OrdinalTraits<Ordinal>::name()<<">::gatherAll(...)"
993 );
994 TEUCHOS_ASSERT_EQUALITY((sendBytes*size_), recvBytes );
995 const int err =
996 MPI_Allgather (const_cast<char *>(sendBuffer), sendBytes, MPI_CHAR,
997 recvBuffer, sendBytes, MPI_CHAR, *rawMpiComm_);
998 // NOTE: 'sendBytes' is being sent above for the MPI arg recvcount (which is
999 // very confusing in the MPI documentation) for MPI_Allgether(...).
1000
1001 TEUCHOS_TEST_FOR_EXCEPTION(err != MPI_SUCCESS, std::runtime_error,
1002 "Teuchos::MpiComm::gatherAll: MPI_Allgather failed with error \""
1003 << mpiErrorCodeToString (err) << "\".");
1004}
1005
1006
1007template<typename Ordinal>
1008void
1009MpiComm<Ordinal>::gather (const Ordinal sendBytes,
1010 const char sendBuffer[],
1011 const Ordinal recvBytes,
1012 char recvBuffer[],
1013 const int root) const
1014{
1015 (void) recvBytes; // silence compile warning for "unused parameter"
1016
1017 TEUCHOS_COMM_TIME_MONITOR(
1018 "Teuchos::MpiComm<"<<OrdinalTraits<Ordinal>::name()<<">::gather(...)"
1019 );
1020 const int err =
1021 MPI_Gather (const_cast<char *> (sendBuffer), sendBytes, MPI_CHAR,
1022 recvBuffer, sendBytes, MPI_CHAR, root, *rawMpiComm_);
1023 TEUCHOS_TEST_FOR_EXCEPTION(err != MPI_SUCCESS, std::runtime_error,
1024 "Teuchos::MpiComm::gather: MPI_Gather failed with error \""
1025 << mpiErrorCodeToString (err) << "\".");
1026}
1027
1028
1029template<typename Ordinal>
1030void
1031MpiComm<Ordinal>::
1032reduceAll (const ValueTypeReductionOp<Ordinal,char> &reductOp,
1033 const Ordinal bytes,
1034 const char sendBuffer[],
1035 char globalReducts[]) const
1036{
1037 TEUCHOS_COMM_TIME_MONITOR( "Teuchos::MpiComm::reduceAll(...)" );
1038 int err = MPI_SUCCESS;
1039
1040 if (bytes == 0) return;
1041
1042 Details::MpiReductionOp<Ordinal> opWrap (reductOp);
1043 MPI_Op op = Details::setMpiReductionOp (opWrap);
1044
1045 // FIXME (mfh 23 Nov 2014) Ross decided to mash every type into
1046 // char. This can cause correctness issues if we're actually doing
1047 // a reduction over, say, double. Thus, he creates a custom
1048 // MPI_Datatype here that represents a contiguous block of char, so
1049 // that MPI doesn't split up the reduction type and thus do the sum
1050 // wrong. It's a hack but it works.
1051
1052 MPI_Datatype char_block;
1053 err = MPI_Type_contiguous (bytes, MPI_CHAR, &char_block);
1055 err != MPI_SUCCESS, std::runtime_error, "Teuchos::reduceAll: "
1056 "MPI_Type_contiguous failed with error \"" << mpiErrorCodeToString (err)
1057 << "\".");
1058 err = MPI_Type_commit (&char_block);
1060 err != MPI_SUCCESS, std::runtime_error, "Teuchos::reduceAll: "
1061 "MPI_Type_commit failed with error \"" << mpiErrorCodeToString (err)
1062 << "\".");
1063
1064 if (sendBuffer == globalReducts) {
1065 // NOTE (mfh 31 May 2017) This is only safe if the communicator is
1066 // NOT an intercomm. The usual case is that communicators are
1067 // intracomms.
1068 err = MPI_Allreduce (MPI_IN_PLACE, globalReducts, 1,
1069 char_block, op, *rawMpiComm_);
1070 }
1071 else {
1072 err = MPI_Allreduce (const_cast<char*> (sendBuffer), globalReducts, 1,
1073 char_block, op, *rawMpiComm_);
1074 }
1075 if (err != MPI_SUCCESS) {
1076 // Don't throw until we release the type resources we allocated
1077 // above. If freeing fails for some reason, let the memory leak
1078 // go; we already have more serious problems if MPI_Allreduce
1079 // doesn't work.
1080 (void) MPI_Type_free (&char_block);
1082 true, std::runtime_error, "Teuchos::reduceAll (MPI, custom op): "
1083 "MPI_Allreduce failed with error \"" << mpiErrorCodeToString (err)
1084 << "\".");
1085 }
1086 err = MPI_Type_free (&char_block);
1088 err != MPI_SUCCESS, std::runtime_error, "Teuchos::reduceAll: "
1089 "MPI_Type_free failed with error \"" << mpiErrorCodeToString (err)
1090 << "\".");
1091}
1092
1093
1094template<typename Ordinal>
1095void MpiComm<Ordinal>::scan(
1096 const ValueTypeReductionOp<Ordinal,char> &reductOp
1097 ,const Ordinal bytes, const char sendBuffer[], char scanReducts[]
1098 ) const
1099{
1100 TEUCHOS_COMM_TIME_MONITOR( "Teuchos::MpiComm::scan(...)" );
1101
1102 Details::MpiReductionOp<Ordinal> opWrap (reductOp);
1103 MPI_Op op = Details::setMpiReductionOp (opWrap);
1104 const int err =
1105 MPI_Scan (const_cast<char*> (sendBuffer), scanReducts, bytes, MPI_CHAR,
1106 op, *rawMpiComm_);
1107 TEUCHOS_TEST_FOR_EXCEPTION(err != MPI_SUCCESS, std::runtime_error,
1108 "Teuchos::MpiComm::scan: MPI_Scan() failed with error \""
1109 << mpiErrorCodeToString (err) << "\".");
1110}
1111
1112
1113template<typename Ordinal>
1114void
1115MpiComm<Ordinal>::send (const Ordinal bytes,
1116 const char sendBuffer[],
1117 const int destRank) const
1118{
1119 TEUCHOS_COMM_TIME_MONITOR( "Teuchos::MpiComm::send(...)" );
1120
1121#ifdef TEUCHOS_MPI_COMM_DUMP
1122 if(show_dump) {
1123 dumpBuffer<Ordinal,char>(
1124 "Teuchos::MpiComm<Ordinal>::send(...)"
1125 ,"sendBuffer", bytes, sendBuffer
1126 );
1127 }
1128#endif // TEUCHOS_MPI_COMM_DUMP
1129
1130 const int err = MPI_Send (const_cast<char*>(sendBuffer), bytes, MPI_CHAR,
1131 destRank, tag_, *rawMpiComm_);
1132 TEUCHOS_TEST_FOR_EXCEPTION(err != MPI_SUCCESS, std::runtime_error,
1133 "Teuchos::MpiComm::send: MPI_Send() failed with error \""
1134 << mpiErrorCodeToString (err) << "\".");
1135}
1136
1137
1138template<typename Ordinal>
1139void
1140MpiComm<Ordinal>::send (const Ordinal bytes,
1141 const char sendBuffer[],
1142 const int destRank,
1143 const int tag) const
1144{
1145 TEUCHOS_COMM_TIME_MONITOR( "Teuchos::MpiComm::send(...)" );
1146 const int err = MPI_Send (const_cast<char*> (sendBuffer), bytes, MPI_CHAR,
1147 destRank, tag, *rawMpiComm_);
1148 TEUCHOS_TEST_FOR_EXCEPTION(err != MPI_SUCCESS, std::runtime_error,
1149 "Teuchos::MpiComm::send: MPI_Send() failed with error \""
1150 << mpiErrorCodeToString (err) << "\".");
1151}
1152
1153
1154template<typename Ordinal>
1155void
1156MpiComm<Ordinal>::ssend (const Ordinal bytes,
1157 const char sendBuffer[],
1158 const int destRank) const
1159{
1160 TEUCHOS_COMM_TIME_MONITOR( "Teuchos::MpiComm::ssend(...)" );
1161
1162#ifdef TEUCHOS_MPI_COMM_DUMP
1163 if(show_dump) {
1164 dumpBuffer<Ordinal,char>(
1165 "Teuchos::MpiComm<Ordinal>::send(...)"
1166 ,"sendBuffer", bytes, sendBuffer
1167 );
1168 }
1169#endif // TEUCHOS_MPI_COMM_DUMP
1170
1171 const int err = MPI_Ssend (const_cast<char*>(sendBuffer), bytes, MPI_CHAR,
1172 destRank, tag_, *rawMpiComm_);
1173 TEUCHOS_TEST_FOR_EXCEPTION(err != MPI_SUCCESS, std::runtime_error,
1174 "Teuchos::MpiComm::send: MPI_Ssend() failed with error \""
1175 << mpiErrorCodeToString (err) << "\".");
1176}
1177
1178template<typename Ordinal>
1179void
1180MpiComm<Ordinal>::ssend (const Ordinal bytes,
1181 const char sendBuffer[],
1182 const int destRank,
1183 const int tag) const
1184{
1185 TEUCHOS_COMM_TIME_MONITOR( "Teuchos::MpiComm::ssend(...)" );
1186 const int err =
1187 MPI_Ssend (const_cast<char*>(sendBuffer), bytes, MPI_CHAR,
1188 destRank, tag, *rawMpiComm_);
1189 TEUCHOS_TEST_FOR_EXCEPTION(err != MPI_SUCCESS, std::runtime_error,
1190 "Teuchos::MpiComm::send: MPI_Ssend() failed with error \""
1191 << mpiErrorCodeToString (err) << "\".");
1192}
1193
1194template<typename Ordinal>
1195void MpiComm<Ordinal>::readySend(
1196 const ArrayView<const char> &sendBuffer,
1197 const int destRank
1198 ) const
1199{
1200 TEUCHOS_COMM_TIME_MONITOR( "Teuchos::MpiComm::readySend" );
1201
1202#ifdef TEUCHOS_MPI_COMM_DUMP
1203 if(show_dump) {
1204 dumpBuffer<Ordinal,char>(
1205 "Teuchos::MpiComm<Ordinal>::readySend(...)"
1206 ,"sendBuffer", bytes, sendBuffer
1207 );
1208 }
1209#endif // TEUCHOS_MPI_COMM_DUMP
1210
1211 const int err =
1212 MPI_Rsend (const_cast<char*>(sendBuffer.getRawPtr()), static_cast<int>(sendBuffer.size()),
1213 MPI_CHAR, destRank, tag_, *rawMpiComm_);
1214 TEUCHOS_TEST_FOR_EXCEPTION(err != MPI_SUCCESS, std::runtime_error,
1215 "Teuchos::MpiComm::readySend: MPI_Rsend() failed with error \""
1216 << mpiErrorCodeToString (err) << "\".");
1217}
1218
1219
1220template<typename Ordinal>
1221void MpiComm<Ordinal>::
1222readySend (const Ordinal bytes,
1223 const char sendBuffer[],
1224 const int destRank,
1225 const int tag) const
1226{
1227 TEUCHOS_COMM_TIME_MONITOR( "Teuchos::MpiComm::readySend" );
1228 const int err =
1229 MPI_Rsend (const_cast<char*> (sendBuffer), bytes,
1230 MPI_CHAR, destRank, tag, *rawMpiComm_);
1231 TEUCHOS_TEST_FOR_EXCEPTION(err != MPI_SUCCESS, std::runtime_error,
1232 "Teuchos::MpiComm::readySend: MPI_Rsend() failed with error \""
1233 << mpiErrorCodeToString (err) << "\".");
1234}
1235
1236
1237template<typename Ordinal>
1238int
1239MpiComm<Ordinal>::receive (const int sourceRank,
1240 const Ordinal bytes,
1241 char recvBuffer[]) const
1242{
1243 TEUCHOS_COMM_TIME_MONITOR( "Teuchos::MpiComm::receive(...)" );
1244
1245 // A negative source rank indicates MPI_ANY_SOURCE, namely that we
1246 // will take an incoming message from any process, as long as the
1247 // tag matches.
1248 const int theSrcRank = (sourceRank < 0) ? MPI_ANY_SOURCE : sourceRank;
1249
1250 MPI_Status status;
1251 const int err = MPI_Recv (recvBuffer, bytes, MPI_CHAR, theSrcRank, tag_,
1252 *rawMpiComm_, &status);
1253 TEUCHOS_TEST_FOR_EXCEPTION(err != MPI_SUCCESS, std::runtime_error,
1254 "Teuchos::MpiComm::receive: MPI_Recv() failed with error \""
1255 << mpiErrorCodeToString (err) << "\".");
1256
1257#ifdef TEUCHOS_MPI_COMM_DUMP
1258 if (show_dump) {
1259 dumpBuffer<Ordinal,char> ("Teuchos::MpiComm<Ordinal>::receive(...)",
1260 "recvBuffer", bytes, recvBuffer);
1261 }
1262#endif // TEUCHOS_MPI_COMM_DUMP
1263
1264 // Returning the source rank is useful in the MPI_ANY_SOURCE case.
1265 return status.MPI_SOURCE;
1266}
1267
1268
1269template<typename Ordinal>
1270RCP<CommRequest<Ordinal> >
1271MpiComm<Ordinal>::isend (const ArrayView<const char> &sendBuffer,
1272 const int destRank) const
1273{
1274 using Teuchos::as;
1275 TEUCHOS_COMM_TIME_MONITOR( "Teuchos::MpiComm::isend(...)" );
1276
1277 MPI_Request rawMpiRequest = MPI_REQUEST_NULL;
1278 const int err =
1279 MPI_Isend (const_cast<char*> (sendBuffer.getRawPtr ()),
1280 as<Ordinal> (sendBuffer.size ()), MPI_CHAR,
1281 destRank, tag_, *rawMpiComm_, &rawMpiRequest);
1282 TEUCHOS_TEST_FOR_EXCEPTION(err != MPI_SUCCESS, std::runtime_error,
1283 "Teuchos::MpiComm::isend: MPI_Isend() failed with error \""
1284 << mpiErrorCodeToString (err) << "\".");
1285
1286 return mpiCommRequest<Ordinal> (rawMpiRequest, sendBuffer.size ());
1287}
1288
1289
1290template<typename Ordinal>
1291RCP<CommRequest<Ordinal> >
1292MpiComm<Ordinal>::
1293isend (const ArrayView<const char> &sendBuffer,
1294 const int destRank,
1295 const int tag) const
1296{
1297 using Teuchos::as;
1298 TEUCHOS_COMM_TIME_MONITOR( "Teuchos::MpiComm::isend(...)" );
1299
1300 MPI_Request rawMpiRequest = MPI_REQUEST_NULL;
1301 const int err =
1302 MPI_Isend (const_cast<char*> (sendBuffer.getRawPtr ()),
1303 as<Ordinal> (sendBuffer.size ()), MPI_CHAR,
1304 destRank, tag, *rawMpiComm_, &rawMpiRequest);
1305 TEUCHOS_TEST_FOR_EXCEPTION(err != MPI_SUCCESS, std::runtime_error,
1306 "Teuchos::MpiComm::isend: MPI_Isend() failed with error \""
1307 << mpiErrorCodeToString (err) << "\".");
1308
1309 return mpiCommRequest<Ordinal> (rawMpiRequest, sendBuffer.size ());
1310}
1311
1312
1313template<typename Ordinal>
1314RCP<CommRequest<Ordinal> >
1315MpiComm<Ordinal>::ireceive (const ArrayView<char> &recvBuffer,
1316 const int sourceRank) const
1317{
1318 TEUCHOS_COMM_TIME_MONITOR( "Teuchos::MpiComm::ireceive(...)" );
1319
1320 // A negative source rank indicates MPI_ANY_SOURCE, namely that we
1321 // will take an incoming message from any process, as long as the
1322 // tag matches.
1323 const int theSrcRank = (sourceRank < 0) ? MPI_ANY_SOURCE : sourceRank;
1324
1325 MPI_Request rawMpiRequest = MPI_REQUEST_NULL;
1326 const int err =
1327 MPI_Irecv (const_cast<char*>(recvBuffer.getRawPtr()), recvBuffer.size(),
1328 MPI_CHAR, theSrcRank, tag_, *rawMpiComm_, &rawMpiRequest);
1329 TEUCHOS_TEST_FOR_EXCEPTION(err != MPI_SUCCESS, std::runtime_error,
1330 "Teuchos::MpiComm::ireceive: MPI_Irecv() failed with error \""
1331 << mpiErrorCodeToString (err) << "\".");
1332
1333 return mpiCommRequest<Ordinal> (rawMpiRequest, recvBuffer.size());
1334}
1335
1336template<typename Ordinal>
1337RCP<CommRequest<Ordinal> >
1338MpiComm<Ordinal>::ireceive (const ArrayView<char> &recvBuffer,
1339 const int sourceRank,
1340 const int tag) const
1341{
1342 TEUCHOS_COMM_TIME_MONITOR( "Teuchos::MpiComm::ireceive(...)" );
1343
1344 // A negative source rank indicates MPI_ANY_SOURCE, namely that we
1345 // will take an incoming message from any process, as long as the
1346 // tag matches.
1347 const int theSrcRank = (sourceRank < 0) ? MPI_ANY_SOURCE : sourceRank;
1348
1349 MPI_Request rawMpiRequest = MPI_REQUEST_NULL;
1350 const int err =
1351 MPI_Irecv (const_cast<char*> (recvBuffer.getRawPtr ()), recvBuffer.size (),
1352 MPI_CHAR, theSrcRank, tag, *rawMpiComm_, &rawMpiRequest);
1353 TEUCHOS_TEST_FOR_EXCEPTION(err != MPI_SUCCESS, std::runtime_error,
1354 "Teuchos::MpiComm::ireceive: MPI_Irecv() failed with error \""
1355 << mpiErrorCodeToString (err) << "\".");
1356
1357 return mpiCommRequest<Ordinal> (rawMpiRequest, recvBuffer.size ());
1358}
1359
1360namespace {
1361 // Called by the two-argument MpiComm::waitAll() variant.
1362 template<typename Ordinal>
1363 void
1364 waitAllImpl (const ArrayView<RCP<CommRequest<Ordinal> > >& requests,
1365 const ArrayView<MPI_Status>& rawMpiStatuses)
1366 {
1367 typedef typename ArrayView<RCP<CommRequest<Ordinal> > >::size_type size_type;
1368 const size_type count = requests.size();
1369 // waitAllImpl() is not meant to be called by users, so it's a bug
1370 // for the two views to have different lengths.
1371 TEUCHOS_TEST_FOR_EXCEPTION(rawMpiStatuses.size() != count,
1372 std::logic_error, "Teuchos::MpiComm's waitAllImpl: rawMpiStatus.size() = "
1373 << rawMpiStatuses.size() << " != requests.size() = " << requests.size()
1374 << ". Please report this bug to the Tpetra developers.");
1375 if (count == 0) {
1376 return; // No requests on which to wait
1377 }
1378
1379 // MpiComm wraps MPI and can't expose any MPI structs or opaque
1380 // objects. Thus, we have to unpack requests into a separate array.
1381 // If that's too slow, then your code should just call into MPI
1382 // directly.
1383 //
1384 // Pull out the raw MPI requests from the wrapped requests.
1385 // MPI_Waitall should not fail if a request is MPI_REQUEST_NULL, but
1386 // we keep track just to inform the user.
1387 bool someNullRequests = false;
1388 Array<MPI_Request> rawMpiRequests (count, MPI_REQUEST_NULL);
1389 for (int i = 0; i < count; ++i) {
1390 RCP<CommRequest<Ordinal> > request = requests[i];
1391 if (! is_null (request)) {
1392 RCP<MpiCommRequestBase<Ordinal> > mpiRequest =
1393 rcp_dynamic_cast<MpiCommRequestBase<Ordinal> > (request);
1394 // releaseRawMpiRequest() sets the MpiCommRequest's raw
1395 // MPI_Request to MPI_REQUEST_NULL. This makes waitAll() not
1396 // satisfy the strong exception guarantee. That's OK because
1397 // MPI_Waitall() doesn't promise that it satisfies the strong
1398 // exception guarantee, and we would rather conservatively
1399 // invalidate the handles than leave dangling requests around
1400 // and risk users trying to wait on the same request twice.
1401 rawMpiRequests[i] = mpiRequest->releaseRawMpiRequest();
1402 }
1403 else { // Null requests map to MPI_REQUEST_NULL
1404 rawMpiRequests[i] = MPI_REQUEST_NULL;
1405 someNullRequests = true;
1406 }
1407 }
1408
1409 // This is the part where we've finally peeled off the wrapper and
1410 // we can now interact with MPI directly.
1411 //
1412 // One option in the one-argument version of waitAll() is to ignore
1413 // the statuses completely. MPI lets you pass in the named constant
1414 // MPI_STATUSES_IGNORE for the MPI_Status array output argument in
1415 // MPI_Waitall(), which would tell MPI not to bother with the
1416 // statuses. However, we want the statuses because we can use them
1417 // for detailed error diagnostics in case something goes wrong.
1418 const int err = MPI_Waitall (count, rawMpiRequests.getRawPtr(),
1419 rawMpiStatuses.getRawPtr());
1420
1421 // In MPI_Waitall(), an error indicates that one or more requests
1422 // failed. In that case, there could be requests that completed
1423 // (their MPI_Status' error field is MPI_SUCCESS), and other
1424 // requests that have not completed yet but have not necessarily
1425 // failed (MPI_PENDING). We make no attempt here to wait on the
1426 // pending requests. It doesn't make sense for us to do so, because
1427 // in general Teuchos::Comm doesn't attempt to provide robust
1428 // recovery from failed messages.
1429 if (err != MPI_SUCCESS) {
1430 if (err == MPI_ERR_IN_STATUS) {
1431 //
1432 // When MPI_Waitall returns MPI_ERR_IN_STATUS (a standard error
1433 // class), it's telling us to check the error codes in the
1434 // returned statuses. In that case, we do so and generate a
1435 // detailed exception message.
1436 //
1437 // Figure out which of the requests failed.
1438 Array<std::pair<size_type, int> > errorLocationsAndCodes;
1439 for (size_type k = 0; k < rawMpiStatuses.size(); ++k) {
1440 const int curErr = rawMpiStatuses[k].MPI_ERROR;
1441 if (curErr != MPI_SUCCESS) {
1442 errorLocationsAndCodes.push_back (std::make_pair (k, curErr));
1443 }
1444 }
1445 const size_type numErrs = errorLocationsAndCodes.size();
1446 if (numErrs > 0) {
1447 // There was at least one error. Assemble a detailed
1448 // exception message reporting which requests failed,
1449 // their error codes, and their source
1450 std::ostringstream os;
1451 os << "Teuchos::MpiComm::waitAll: MPI_Waitall() failed with error \""
1452 << mpiErrorCodeToString (err) << "\". Of the " << count
1453 << " total request" << (count != 1 ? "s" : "") << ", " << numErrs
1454 << " failed. Here are the indices of the failed requests, and the "
1455 "error codes extracted from their returned MPI_Status objects:"
1456 << std::endl;
1457 for (size_type k = 0; k < numErrs; ++k) {
1458 const size_type errInd = errorLocationsAndCodes[k].first;
1459 os << "Request " << errInd << ": MPI_ERROR = "
1460 << mpiErrorCodeToString (rawMpiStatuses[errInd].MPI_ERROR)
1461 << std::endl;
1462 }
1463 if (someNullRequests) {
1464 os << " On input to MPI_Waitall, there was at least one MPI_"
1465 "Request that was MPI_REQUEST_NULL. MPI_Waitall should not "
1466 "normally fail in that case, but we thought we should let you know "
1467 "regardless.";
1468 }
1469 TEUCHOS_TEST_FOR_EXCEPTION(true, std::runtime_error, os.str());
1470 }
1471 // If there were no actual errors in the returned statuses,
1472 // well, then I guess everything is OK. Just keep going.
1473 }
1474 else {
1475 std::ostringstream os;
1476 os << "Teuchos::MpiComm::waitAll: MPI_Waitall() failed with error \""
1477 << mpiErrorCodeToString (err) << "\".";
1478 if (someNullRequests) {
1479 os << " On input to MPI_Waitall, there was at least one MPI_Request "
1480 "that was MPI_REQUEST_NULL. MPI_Waitall should not normally fail in "
1481 "that case, but we thought we should let you know regardless.";
1482 }
1483 TEUCHOS_TEST_FOR_EXCEPTION(true, std::runtime_error, os.str());
1484 }
1485 }
1486
1487 // Invalidate the input array of requests by setting all entries
1488 // to null.
1489 std::fill (requests.begin(), requests.end(), null);
1490 }
1491
1492
1493
1494 // Called by the one-argument MpiComm::waitAll() variant.
1495 template<typename Ordinal>
1496 void
1497 waitAllImpl (const ArrayView<RCP<CommRequest<Ordinal> > >& requests)
1498 {
1499 typedef typename ArrayView<RCP<CommRequest<Ordinal> > >::size_type size_type;
1500 const size_type count = requests.size ();
1501 if (count == 0) {
1502 return; // No requests on which to wait
1503 }
1504
1505 // MpiComm wraps MPI and can't expose any MPI structs or opaque
1506 // objects. Thus, we have to unpack requests into a separate
1507 // array. If that's too slow, then your code should just call
1508 // into MPI directly.
1509 //
1510 // Pull out the raw MPI requests from the wrapped requests.
1511 // MPI_Waitall should not fail if a request is MPI_REQUEST_NULL,
1512 // but we keep track just to inform the user.
1513 bool someNullRequests = false;
1514 Array<MPI_Request> rawMpiRequests (count, MPI_REQUEST_NULL);
1515 for (int i = 0; i < count; ++i) {
1516 RCP<CommRequest<Ordinal> > request = requests[i];
1517 if (! request.is_null ()) {
1518 RCP<MpiCommRequestBase<Ordinal> > mpiRequest =
1519 rcp_dynamic_cast<MpiCommRequestBase<Ordinal> > (request);
1520 // releaseRawMpiRequest() sets the MpiCommRequest's raw
1521 // MPI_Request to MPI_REQUEST_NULL. This makes waitAll() not
1522 // satisfy the strong exception guarantee. That's OK because
1523 // MPI_Waitall() doesn't promise that it satisfies the strong
1524 // exception guarantee, and we would rather conservatively
1525 // invalidate the handles than leave dangling requests around
1526 // and risk users trying to wait on the same request twice.
1527 rawMpiRequests[i] = mpiRequest->releaseRawMpiRequest ();
1528 }
1529 else { // Null requests map to MPI_REQUEST_NULL
1530 rawMpiRequests[i] = MPI_REQUEST_NULL;
1531 someNullRequests = true;
1532 }
1533 }
1534
1535 // This is the part where we've finally peeled off the wrapper and
1536 // we can now interact with MPI directly.
1537 //
1538 // MPI lets us pass in the named constant MPI_STATUSES_IGNORE for
1539 // the MPI_Status array output argument in MPI_Waitall(), which
1540 // tells MPI not to bother writing out the statuses.
1541 const int err = MPI_Waitall (count, rawMpiRequests.getRawPtr(),
1542 MPI_STATUSES_IGNORE);
1543
1544 // In MPI_Waitall(), an error indicates that one or more requests
1545 // failed. In that case, there could be requests that completed
1546 // (their MPI_Status' error field is MPI_SUCCESS), and other
1547 // requests that have not completed yet but have not necessarily
1548 // failed (MPI_PENDING). We make no attempt here to wait on the
1549 // pending requests. It doesn't make sense for us to do so,
1550 // because in general Teuchos::Comm doesn't attempt to provide
1551 // robust recovery from failed messages.
1552 if (err != MPI_SUCCESS) {
1553 std::ostringstream os;
1554 os << "Teuchos::MpiComm::waitAll: MPI_Waitall() failed with error \""
1555 << mpiErrorCodeToString (err) << "\".";
1556 if (someNullRequests) {
1557 os << std::endl << "On input to MPI_Waitall, there was at least one "
1558 "MPI_Request that was MPI_REQUEST_NULL. MPI_Waitall should not "
1559 "normally fail in that case, but we thought we should let you know "
1560 "regardless.";
1561 }
1562 TEUCHOS_TEST_FOR_EXCEPTION(true, std::runtime_error, os.str());
1563 }
1564
1565 // Invalidate the input array of requests by setting all entries
1566 // to null. We delay this until the end, since some
1567 // implementations of CommRequest might hold the only reference to
1568 // the communication buffer, and we don't want that to go away
1569 // until we've waited on the communication operation.
1570 std::fill (requests.begin(), requests.end(), null);
1571 }
1572
1573} // namespace (anonymous)
1574
1575
1576
1577template<typename Ordinal>
1578void
1579MpiComm<Ordinal>::
1580waitAll (const ArrayView<RCP<CommRequest<Ordinal> > >& requests) const
1581{
1582 TEUCHOS_COMM_TIME_MONITOR( "Teuchos::MpiComm::waitAll(requests)" );
1583 // Call the one-argument version of waitAllImpl, to avoid overhead
1584 // of handling statuses (which the user didn't want anyway).
1585 waitAllImpl<Ordinal> (requests);
1586}
1587
1588
1589template<typename Ordinal>
1590void
1591MpiComm<Ordinal>::
1592waitAll (const ArrayView<RCP<CommRequest<Ordinal> > >& requests,
1593 const ArrayView<RCP<CommStatus<Ordinal> > >& statuses) const
1594{
1595 TEUCHOS_COMM_TIME_MONITOR( "Teuchos::MpiComm::waitAll(requests, statuses)" );
1596
1597 typedef typename ArrayView<RCP<CommRequest<Ordinal> > >::size_type size_type;
1598 const size_type count = requests.size();
1599
1600 TEUCHOS_TEST_FOR_EXCEPTION(count != statuses.size(),
1601 std::invalid_argument, "Teuchos::MpiComm::waitAll: requests.size() = "
1602 << count << " != statuses.size() = " << statuses.size() << ".");
1603
1604 Array<MPI_Status> rawMpiStatuses (count);
1605 waitAllImpl<Ordinal> (requests, rawMpiStatuses());
1606
1607 // Repackage the raw MPI_Status structs into the wrappers.
1608 for (size_type i = 0; i < count; ++i) {
1609 statuses[i] = mpiCommStatus<Ordinal> (rawMpiStatuses[i]);
1610 }
1611}
1612
1613
1614template<typename Ordinal>
1615RCP<CommStatus<Ordinal> >
1616MpiComm<Ordinal>::wait (const Ptr<RCP<CommRequest<Ordinal> > >& request) const
1617{
1618 TEUCHOS_COMM_TIME_MONITOR( "Teuchos::MpiComm::wait(...)" );
1619
1620 if (is_null (*request)) {
1621 return null; // Nothing to wait on ...
1622 }
1623 else {
1624 RCP<CommStatus<Ordinal> > status = (*request)->wait ();
1625 // mfh 22 Oct 2012: The unit tests expect waiting on the
1626 // CommRequest to invalidate it by setting it to null.
1627 *request = null;
1628 return status;
1629 }
1630}
1631
1632template<typename Ordinal>
1633RCP< Comm<Ordinal> >
1634MpiComm<Ordinal>::duplicate() const
1635{
1636 MPI_Comm origRawComm = *rawMpiComm_;
1637 MPI_Comm newRawComm = MPI_COMM_NULL;
1638 const int err = MPI_Comm_dup (origRawComm, &newRawComm);
1639 TEUCHOS_TEST_FOR_EXCEPTION(err != MPI_SUCCESS, std::runtime_error, "Teuchos"
1640 "::MpiComm::duplicate: MPI_Comm_dup failed with the following error: "
1641 << mpiErrorCodeToString (err));
1642
1643 // Wrap the raw communicator, and pass the (const) wrapped
1644 // communicator to MpiComm's constructor. We created the raw comm,
1645 // so we have to supply a function that frees it after use.
1646 RCP<OpaqueWrapper<MPI_Comm> > wrapped =
1647 opaqueWrapper<MPI_Comm> (newRawComm, details::safeCommFree);
1648 // Since newComm's raw MPI_Comm is the result of an MPI_Comm_dup,
1649 // its messages cannot collide with those of any other MpiComm.
1650 // This means we can assign its tag without an MPI_Bcast.
1651 RCP<MpiComm<Ordinal> > newComm =
1652 rcp (new MpiComm<Ordinal> (wrapped.getConst (), minTag_));
1653 return rcp_implicit_cast<Comm<Ordinal> > (newComm);
1654}
1655
1656
1657template<typename Ordinal>
1658RCP< Comm<Ordinal> >
1659MpiComm<Ordinal>::split(const int color, const int key) const
1660{
1661 MPI_Comm newComm;
1662 const int splitReturn =
1663 MPI_Comm_split (*rawMpiComm_,
1664 color < 0 ? MPI_UNDEFINED : color,
1665 key,
1666 &newComm);
1668 splitReturn != MPI_SUCCESS,
1669 std::logic_error,
1670 "Teuchos::MpiComm::split: Failed to create communicator with color "
1671 << color << "and key " << key << ". MPI_Comm_split failed with error \""
1672 << mpiErrorCodeToString (splitReturn) << "\".");
1673 if (newComm == MPI_COMM_NULL) {
1674 return RCP< Comm<Ordinal> >();
1675 } else {
1676 RCP<const OpaqueWrapper<MPI_Comm> > wrapped =
1677 opaqueWrapper<MPI_Comm> (newComm, details::safeCommFree);
1678 // Since newComm's raw MPI_Comm is the result of an
1679 // MPI_Comm_split, its messages cannot collide with those of any
1680 // other MpiComm. This means we can assign its tag without an
1681 // MPI_Bcast.
1682 return rcp (new MpiComm<Ordinal> (wrapped, minTag_));
1683 }
1684}
1685
1686
1687template<typename Ordinal>
1688RCP< Comm<Ordinal> >
1689MpiComm<Ordinal>::createSubcommunicator(const ArrayView<const int> &ranks) const
1690{
1691 int err = MPI_SUCCESS; // For error codes returned by MPI functions
1692
1693 // Get the group that this communicator is in.
1694 MPI_Group thisGroup;
1695 err = MPI_Comm_group (*rawMpiComm_, &thisGroup);
1696 TEUCHOS_TEST_FOR_EXCEPTION(err != MPI_SUCCESS, std::logic_error,
1697 "Failed to obtain the current communicator's group. "
1698 "MPI_Comm_group failed with error \""
1699 << mpiErrorCodeToString (err) << "\".");
1700
1701 // Create a new group with the specified members.
1702 MPI_Group newGroup;
1703 // It's rude to cast away const, but MPI functions demand it.
1704 //
1705 // NOTE (mfh 14 Aug 2012) Please don't ask for &ranks[0] unless you
1706 // know that ranks.size() > 0. That's why I'm using getRawPtr().
1707 err = MPI_Group_incl (thisGroup, ranks.size(),
1708 const_cast<int*> (ranks.getRawPtr ()), &newGroup);
1709 TEUCHOS_TEST_FOR_EXCEPTION(err != MPI_SUCCESS, std::logic_error,
1710 "Failed to create subgroup. MPI_Group_incl failed with error \""
1711 << mpiErrorCodeToString (err) << "\".");
1712
1713 // Create a new communicator from the new group.
1714 MPI_Comm newComm;
1715 try {
1716 err = MPI_Comm_create (*rawMpiComm_, newGroup, &newComm);
1717 TEUCHOS_TEST_FOR_EXCEPTION(err != MPI_SUCCESS, std::logic_error,
1718 "Failed to create subcommunicator. MPI_Comm_create failed with error \""
1719 << mpiErrorCodeToString (err) << "\".");
1720 } catch (...) {
1721 // Attempt to free the new group before rethrowing. If
1722 // successful, this will prevent a memory leak due to the "lost"
1723 // group that was allocated successfully above. Since we're
1724 // throwing std::logic_error anyway, we can only promise
1725 // best-effort recovery; thus, we don't check the error code.
1726 (void) MPI_Group_free (&newGroup);
1727 (void) MPI_Group_free (&thisGroup);
1728 throw;
1729 }
1730
1731 // We don't need the group any more, so free it.
1732 err = MPI_Group_free (&newGroup);
1733 TEUCHOS_TEST_FOR_EXCEPTION(err != MPI_SUCCESS, std::logic_error,
1734 "Failed to free subgroup. MPI_Group_free failed with error \""
1735 << mpiErrorCodeToString (err) << "\".");
1736 err = MPI_Group_free (&thisGroup);
1737 TEUCHOS_TEST_FOR_EXCEPTION(err != MPI_SUCCESS, std::logic_error,
1738 "Failed to free subgroup. MPI_Group_free failed with error \""
1739 << mpiErrorCodeToString (err) << "\".");
1740
1741 if (newComm == MPI_COMM_NULL) {
1742 return RCP<Comm<Ordinal> > ();
1743 } else {
1744 using Teuchos::details::safeCommFree;
1745 typedef OpaqueWrapper<MPI_Comm> ow_type;
1746 RCP<const ow_type> wrapper =
1747 rcp_implicit_cast<const ow_type> (opaqueWrapper (newComm, safeCommFree));
1748 // Since newComm's raw MPI_Comm is the result of an
1749 // MPI_Comm_create, its messages cannot collide with those of any
1750 // other MpiComm. This means we can assign its tag without an
1751 // MPI_Bcast.
1752 return rcp (new MpiComm<Ordinal> (wrapper, minTag_));
1753 }
1754}
1755
1756
1757// Overridden from Describable
1758
1759
1760template<typename Ordinal>
1761std::string MpiComm<Ordinal>::description() const
1762{
1763 std::ostringstream oss;
1764 oss
1765 << typeName(*this)
1766 << "{"
1767 << "size="<<size_
1768 << ",rank="<<rank_
1769 << ",rawMpiComm="<<static_cast<MPI_Comm>(*rawMpiComm_)
1770 <<"}";
1771 return oss.str();
1772}
1773
1774
1775#ifdef TEUCHOS_MPI_COMM_DUMP
1776template<typename Ordinal>
1777bool MpiComm<Ordinal>::show_dump = false;
1778#endif
1779
1780
1781// private
1782
1783
1784template<typename Ordinal>
1785void MpiComm<Ordinal>::assertRank(const int rank, const std::string &rankName) const
1786{
1788 ! ( 0 <= rank && rank < size_ ), std::logic_error
1789 ,"Error, "<<rankName<<" = " << rank << " is not < 0 or is not"
1790 " in the range [0,"<<size_-1<<"]!"
1791 );
1792}
1793
1794
1795} // namespace Teuchos
1796
1797
1798template<typename Ordinal>
1801 const RCP<const OpaqueWrapper<MPI_Comm> > &rawMpiComm
1802 )
1803{
1804 if( rawMpiComm.get()!=NULL && *rawMpiComm != MPI_COMM_NULL )
1805 return rcp(new MpiComm<Ordinal>(rawMpiComm));
1806 return Teuchos::null;
1807}
1808
1809
1810template<typename Ordinal>
1813 const RCP<const OpaqueWrapper<MPI_Comm> > &rawMpiComm,
1814 const int defaultTag
1815 )
1816{
1817 if( rawMpiComm.get()!=NULL && *rawMpiComm != MPI_COMM_NULL )
1818 return rcp(new MpiComm<Ordinal>(rawMpiComm, defaultTag));
1819 return Teuchos::null;
1820}
1821
1822
1823template<typename Ordinal>
1824MPI_Comm
1825Teuchos::getRawMpiComm(const Comm<Ordinal> &comm)
1826{
1827 return *(
1828 dyn_cast<const MpiComm<Ordinal> >(comm).getRawMpiComm()
1829 );
1830}
1831
1832
1833#endif // HAVE_TEUCHOS_MPI
1834#endif // TEUCHOS_MPI_COMM_HPP
1835
Teuchos header file which uses auto-configuration information to include necessary C++ headers.
Implementation detail of Teuchos' MPI wrapper.
Defines basic traits for the ordinal field type.
Defines basic traits returning the name of a type in a portable and readable way.
Definition of Teuchos::as, for conversions between types.
Ordinal size_type
Type representing the number of elements in an ArrayRCP or view thereof.
Smart reference counting pointer class for automatic garbage collection.
static RCP< FancyOStream > getDefaultOStream()
Get the default output stream object.
Tabbing class for helping to create formated, indented output for a basic_FancyOStream object.
#define TEUCHOS_TEST_FOR_EXCEPTION(throw_exception_test, Exception, msg)
Macro for throwing an exception with breakpointing to ease debugging.
#define TEUCHOS_ASSERT_EQUALITY(val1, val2)
This macro is checks that to numbers are equal and if not then throws an exception with a good error ...
TypeTo as(const TypeFrom &t)
Convert from one value type to another.
std::string typeName(const T &t)
Template function for returning the concrete type name of a passed-in object.
The Teuchos namespace contains all of the classes, structs and enums used by Teuchos,...
void ssend(const Packet sendBuffer[], const Ordinal count, const int destRank, const int tag, const Comm< Ordinal > &comm)
Variant of ssend() that takes a tag (and restores the correct order of arguments).
void readySend(const Packet sendBuffer[], const Ordinal count, const int destRank, const int tag, const Comm< Ordinal > &comm)
Variant of readySend() that accepts a message tag.
TEUCHOS_DEPRECATED RCP< T > rcp(T *p, Dealloc_T dealloc, bool owns_mem)
Deprecated.
RCP< CommRequest< Ordinal > > ireceive(const ArrayRCP< Packet > &recvBuffer, const int sourceRank, const int tag, const Comm< Ordinal > &comm)
Variant of ireceive that takes a tag argument (and restores the correct order of arguments).
void send(const Packet sendBuffer[], const Ordinal count, const int destRank, const int tag, const Comm< Ordinal > &comm)
Variant of send() that takes a tag (and restores the correct order of arguments).
Teuchos implementation details.