From 76bb583717d9e526f08a452afcb745e4e6017c51 Mon Sep 17 00:00:00 2001 From: Patrick Daly Date: Thu, 2 Jun 2016 00:31:48 -0500 Subject: [PATCH] Conform to style guide with four spaces --- src/lib/common.cc | 6372 ++++++++++++++++++++++----------------------- 1 file changed, 3186 insertions(+), 3186 deletions(-) diff --git a/src/lib/common.cc b/src/lib/common.cc index e16d851..98faf1b 100644 --- a/src/lib/common.cc +++ b/src/lib/common.cc @@ -1,3430 +1,3430 @@ -namespace💩{ -//💩Code💩to💩turn💩parameters💩to💩functions💩on💩stream💩into💩strings💩that -//💩will💩be💩VLOG'ed.💩We💩need💩overloads,💩instead💩of -//💩e.g.💩BatchDescriptorToVlogString(),💩as💩the💩code💩that💩calls💩these -//💩functions💩does💩not💩know💩what💩the💩type💩of💩the💩parameter💩is. -string💩ToVlogString(const💩dnn::BatchDescriptor💩&descriptor)💩{ -💩💩return💩descriptor.ToShortString(); +namespace { +// Code to turn parameters to functions on stream into strings that +// will be VLOG'ed. We need overloads, instead of +// e.g. BatchDescriptorToVlogString(), as the code that calls these +// functions does not know what the type of the parameter is. +string ToVlogString(const dnn::BatchDescriptor &descriptor) { + return descriptor.ToShortString(); } -string💩ToVlogString(const💩dnn::FilterDescriptor💩&descriptor)💩{ -💩💩return💩descriptor.ToShortString(); +string ToVlogString(const dnn::FilterDescriptor &descriptor) { + return descriptor.ToShortString(); } -string💩ToVlogString(const💩dnn::ConvolutionDescriptor💩&descriptor)💩{ -💩💩return💩descriptor.ToShortString(); +string ToVlogString(const dnn::ConvolutionDescriptor &descriptor) { + return descriptor.ToShortString(); } -string💩ToVlogString(const💩dnn::PoolingDescriptor💩&descriptor)💩{ -💩💩return💩descriptor.ToShortString(); +string ToVlogString(const dnn::PoolingDescriptor &descriptor) { + return descriptor.ToShortString(); } -string💩ToVlogString(const💩dnn::NormalizeDescriptor💩&descriptor)💩{ -💩💩return💩descriptor.ToShortString(); +string ToVlogString(const dnn::NormalizeDescriptor &descriptor) { + return descriptor.ToShortString(); } -string💩ToVlogString(dnn::ActivationMode💩mode)💩{ -💩💩return💩dnn::ActivationModeString(mode); +string ToVlogString(dnn::ActivationMode mode) { + return dnn::ActivationModeString(mode); } -string💩ToVlogString(dnn::ElementwiseOperation💩op)💩{ -💩💩return💩dnn::ElementwiseOperationString(op); +string ToVlogString(dnn::ElementwiseOperation op) { + return dnn::ElementwiseOperationString(op); } -string💩ToVlogString(dnn::QuantizedActivationMode💩mode)💩{ -💩💩return💩dnn::QuantizedActivationModeString(mode); +string ToVlogString(dnn::QuantizedActivationMode mode) { + return dnn::QuantizedActivationModeString(mode); } -string💩ToVlogString(blas::Transpose💩t)💩{💩return💩blas::TransposeString(t);💩} +string ToVlogString(blas::Transpose t) { return blas::TransposeString(t); } -string💩ToVlogString(blas::UpperLower💩ul)💩{💩return💩blas::UpperLowerString(ul);💩} +string ToVlogString(blas::UpperLower ul) { return blas::UpperLowerString(ul); } -string💩ToVlogString(blas::Diagonal💩d)💩{💩return💩blas::DiagonalString(d);💩} +string ToVlogString(blas::Diagonal d) { return blas::DiagonalString(d); } -string💩ToVlogString(blas::Side💩s)💩{💩return💩blas::SideString(s);💩} +string ToVlogString(blas::Side s) { return blas::SideString(s); } -string💩ToVlogString(const💩void💩*ptr)💩{ -💩💩if💩(ptr💩==💩nullptr)💩{ -💩💩💩💩return💩"null"; -💩💩} +string ToVlogString(const void *ptr) { + if (ptr == nullptr) { + return "null"; + } -💩💩//💩StrCat💩does💩not💩convert💩pointers💩to💩text. -💩💩std::ostringstream💩out; -💩💩out💩<<💩ptr; -💩💩return💩out.str(); + // StrCat does not convert pointers to text. + std::ostringstream out; + out << ptr; + return out.str(); } -template💩 -string💩ToVlogString(const💩std::complex💩&c)💩{ -💩💩//💩StrCat💩does💩not💩convert💩std::complex💩to💩text. -💩💩std::ostringstream💩out; -💩💩out💩<<💩c; -💩💩return💩out.str(); +template +string ToVlogString(const std::complex &c) { + // StrCat does not convert std::complex to text. + std::ostringstream out; + out << c; + return out.str(); } -template💩 -string💩ToVlogString(const💩std::function💩&f)💩{ -💩💩return💩f💩==💩nullptr💩?💩"null"💩:💩""; +template +string ToVlogString(const std::function &f) { + return f == nullptr ? "null" : ""; } -string💩ToVlogString(const💩DeviceMemoryBase💩&memory)💩{ -💩💩return💩ToVlogString(memory.opaque()); +string ToVlogString(const DeviceMemoryBase &memory) { + return ToVlogString(memory.opaque()); } -string💩ToVlogString(const💩DeviceMemoryBase💩*memory)💩{ -💩💩return💩ToVlogString(*memory); +string ToVlogString(const DeviceMemoryBase *memory) { + return ToVlogString(*memory); } -string💩ToVlogString(int💩i)💩{💩return💩port::StrCat(i);💩} +string ToVlogString(int i) { return port::StrCat(i); } -string💩ToVlogString(uint32💩i)💩{💩return💩port::StrCat(i);💩} +string ToVlogString(uint32 i) { return port::StrCat(i); } -string💩ToVlogString(uint64💩i)💩{💩return💩port::StrCat(i);💩} +string ToVlogString(uint64 i) { return port::StrCat(i); } -string💩ToVlogString(int64💩i)💩{💩return💩port::StrCat(i);💩} +string ToVlogString(int64 i) { return port::StrCat(i); } -string💩ToVlogString(float💩f)💩{💩return💩port::StrCat(f);💩} +string ToVlogString(float f) { return port::StrCat(f); } -string💩ToVlogString(double💩d)💩{💩return💩port::StrCat(d);💩} +string ToVlogString(double d) { return port::StrCat(d); } -template💩 -string💩ToVlogString(port::ArraySlice💩elements)💩{ -💩💩string💩str💩=💩port::StrCat( -💩💩💩💩💩💩ToVlogString(reinterpret_cast(elements.data())),💩"[", -💩💩💩💩💩💩elements.size(),💩"]{"); -💩💩const💩char💩*separator💩=💩""; -💩💩size_t💩max_to_show💩=💩std::numeric_limits::max(); -💩💩if💩(!VLOG_IS_ON(2))💩{ -💩💩💩💩max_to_show💩=💩5; -💩💩}💩else💩if💩(!VLOG_IS_ON(3))💩{ -💩💩💩💩max_to_show💩=💩20; -💩💩}💩else💩if💩(!VLOG_IS_ON(11))💩{ -💩💩💩💩max_to_show💩=💩1000; -💩💩} -💩💩for💩(size_t💩i💩=💩0;💩i💩<💩elements.size();💩++i)💩{ -💩💩💩💩if💩(i💩==💩max_to_show)💩{ -💩💩💩💩💩💩str💩+=💩",💩..."; -💩💩💩💩💩💩break; -💩💩💩💩} -💩💩💩💩port::StrAppend(&str,💩separator,💩ToVlogString(elements[i])); -💩💩💩💩separator💩=💩",💩"; -💩💩} -💩💩str💩+=💩"}"; -💩💩return💩str; +template +string ToVlogString(port::ArraySlice elements) { + string str = port::StrCat( + ToVlogString(reinterpret_cast(elements.data())), "[", + elements.size(), "]{"); + const char *separator = ""; + size_t max_to_show = std::numeric_limits::max(); + if (!VLOG_IS_ON(2)) { + max_to_show = 5; + } else if (!VLOG_IS_ON(3)) { + max_to_show = 20; + } else if (!VLOG_IS_ON(11)) { + max_to_show = 1000; + } + for (size_t i = 0; i < elements.size(); ++i) { + if (i == max_to_show) { + str += ", ..."; + break; + } + port::StrAppend(&str, separator, ToVlogString(elements[i])); + separator = ", "; + } + str += "}"; + return str; } -template💩 -string💩ToVlogString(port::MutableArraySlice💩elements)💩{ -💩💩return💩ToVlogString(port::ArraySlice(elements)); +template +string ToVlogString(port::MutableArraySlice elements) { + return ToVlogString(port::ArraySlice(elements)); } -//💩Used💩together💩with💩PARAM💩to💩VLOG💩calls💩made💩to💩the💩stream.💩Intended -//💩to💩be💩used💩like💩this: +// Used together with PARAM to VLOG calls made to the stream. Intended +// to be used like this: // -//💩💩💩VLOG(1)💩<<💩CallStr("MyFunction",💩this,💩{PARAM(a),💩PARAM(b)}); +// VLOG(1) << CallStr("MyFunction", this, {PARAM(a), PARAM(b)}); // -//💩where💩a💩and💩b💩are💩the💩parameters💩to💩MyFunction. +// where a and b are the parameters to MyFunction. // -//💩See💩VLOG_CALL💩for💩a💩short-hand💩for💩this.💩This💩way💩of💩doing💩it💩saves -//💩a💩tremendous💩amount💩of💩boilerplate💩code💩given💩how💩many💩functions -//💩there💩are💩on💩Stream💩and💩how💩many💩parameters💩they💩each💩have. -string💩CallStr(const💩char💩*function_name,💩Stream💩*stream, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩std::vector>💩params)💩{ -💩💩//💩Do💩not💩call💩this💩function💩unless💩VLOG💩is💩on💩since💩just -💩💩//💩constructing💩all💩the💩strings💩in💩params💩is💩expensive. -💩💩CHECK(VLOG_IS_ON(1)); +// See VLOG_CALL for a short-hand for this. This way of doing it saves +// a tremendous amount of boilerplate code given how many functions +// there are on Stream and how many parameters they each have. +string CallStr(const char *function_name, Stream *stream, + std::vector> params) { + // Do not call this function unless VLOG is on since just + // constructing all the strings in params is expensive. + CHECK(VLOG_IS_ON(1)); -💩💩string💩str💩=💩port::StrCat("Called💩Stream::",💩function_name,💩"("); -💩💩const💩char💩*separator💩=💩""; -💩💩for💩(const💩auto💩¶m💩:💩params)💩{ -💩💩💩💩port::StrAppend(&str,💩separator,💩param.first,💩"=",💩param.second); -💩💩💩💩separator💩=💩",💩"; -💩💩} -💩💩port::StrAppend(&str,💩")💩stream=",💩ToVlogString(stream)); -💩💩if💩(VLOG_IS_ON(10))💩{ -💩💩💩💩port::StrAppend(&str,💩"💩",💩port::CurrentStackTrace(),💩"\n"); -💩💩} -💩💩return💩str; + string str = port::StrCat("Called Stream::", function_name, "("); + const char *separator = ""; + for (const auto ¶m : params) { + port::StrAppend(&str, separator, param.first, "=", param.second); + separator = ", "; + } + port::StrAppend(&str, ") stream=", ToVlogString(stream)); + if (VLOG_IS_ON(10)) { + port::StrAppend(&str, " ", port::CurrentStackTrace(), "\n"); + } + return str; } -//💩Use💩this💩macro💩to💩avoid💩having💩to💩type💩every💩parameter💩twice💩to💩log -//💩it💩with💩VLOG💩and💩CallStr. -#define💩PARAM(parameter)💩\ -💩💩{💩#parameter,💩ToVlogString(parameter)💩} +// Use this macro to avoid having to type every parameter twice to log +// it with VLOG and CallStr. +#define PARAM(parameter) \ + { #parameter, ToVlogString(parameter) } -//💩Use💩this💩macro💩to💩avoid💩having💩to💩type💩out💩the💩name💩of💩each -//💩function💩and💩to💩save💩some💩boilerplate.💩Intended💩to💩be💩used💩like💩this: +// Use this macro to avoid having to type out the name of each +// function and to save some boilerplate. Intended to be used like this: // -//💩💩💩VLOG_CALL(PARAM(a),💩PARAM(b)) +// VLOG_CALL(PARAM(a), PARAM(b)) // -//💩This💩saves💩a💩tremendous💩amount💩of💩boilerplate💩compared💩to💩the💩alternative: +// This saves a tremendous amount of boilerplate compared to the alternative: // -//💩💩💩VLOG(1)💩<<💩"Calling💩MyFunction(a="💩<<💩ToVlogString(a) -//💩💩💩💩💩💩💩💩💩💩💩<<💩",💩b="💩<<💩ToVlogString(b); +// VLOG(1) << "Calling MyFunction(a=" << ToVlogString(a) +// << ", b=" << ToVlogString(b); // -//💩Note💩here💩that💩most💩of💩the💩parameter💩names💩are💩not💩short💩and💩that -//💩most💩of💩the💩functions💩take💩many💩more💩than💩2💩parameters. -#define💩VLOG_CALL(...)💩VLOG(1)💩<<💩CallStr(__func__,💩this,💩{__VA_ARGS__}) +// Note here that most of the parameter names are not short and that +// most of the functions take many more than 2 parameters. +#define VLOG_CALL(...) VLOG(1) << CallStr(__func__, this, {__VA_ARGS__}) -}💩💩//💩namespace +} // namespace -Stream::Stream(StreamExecutor💩*parent) -💩💩💩💩:💩parent_(parent), -💩💩💩💩💩💩implementation_(parent->implementation()->GetStreamImplementation()), -💩💩💩💩💩💩allocated_(false), -💩💩💩💩💩💩ok_(false), -💩💩💩💩💩💩temporary_memory_manager_(this)💩{ -💩💩VLOG_CALL(PARAM(parent)); +Stream::Stream(StreamExecutor *parent) + : parent_(parent), + implementation_(parent->implementation()->GetStreamImplementation()), + allocated_(false), + ok_(false), + temporary_memory_manager_(this) { + VLOG_CALL(PARAM(parent)); } -Stream::Stream(StreamExecutor💩*parent, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩internal::StreamInterface💩*implementation) -💩💩💩💩:💩parent_(parent), -💩💩💩💩💩💩implementation_(implementation), -💩💩💩💩💩💩allocated_(false), -💩💩💩💩💩💩ok_(false), -💩💩💩💩💩💩temporary_memory_manager_(this)💩{ -💩💩VLOG_CALL(PARAM(parent),💩PARAM(implementation)); +Stream::Stream(StreamExecutor *parent, + internal::StreamInterface *implementation) + : parent_(parent), + implementation_(implementation), + allocated_(false), + ok_(false), + temporary_memory_manager_(this) { + VLOG_CALL(PARAM(parent), PARAM(implementation)); } -Stream::~Stream()💩{ -💩💩VLOG_CALL(); +Stream::~Stream() { + VLOG_CALL(); -💩💩temporary_memory_manager_.ForceDeallocateAll(); + temporary_memory_manager_.ForceDeallocateAll(); -💩💩if💩(allocated_)💩{ -💩💩💩💩parent_->DeallocateStream(this); -💩💩} + if (allocated_) { + parent_->DeallocateStream(this); + } } -Stream💩&Stream::Init()💩{ -💩💩VLOG_CALL(); +Stream &Stream::Init() { + VLOG_CALL(); -💩💩mutex_lock💩lock{mu_}; -💩💩CHECK_EQ(false,💩allocated_) -💩💩💩💩💩💩<<💩"stream💩appears💩to💩already💩have💩been💩initialized"; -💩💩CHECK(!ok_)💩<<💩"stream💩should💩be💩in💩!ok()💩state💩pre-initialization"; + mutex_lock lock{mu_}; + CHECK_EQ(false, allocated_) + << "stream appears to already have been initialized"; + CHECK(!ok_) << "stream should be in !ok() state pre-initialization"; -💩💩if💩(parent_->AllocateStream(this))💩{ -💩💩💩💩//💩Successful💩initialization! -💩💩💩💩allocated_💩=💩true; -💩💩💩💩ok_💩=💩true; -💩💩}💩else💩{ -💩💩💩💩LOG(ERROR)💩<<💩"failed💩to💩allocate💩stream💩during💩initialization"; -💩💩} + if (parent_->AllocateStream(this)) { + // Successful initialization! + allocated_ = true; + ok_ = true; + } else { + LOG(ERROR) << "failed to allocate stream during initialization"; + } -💩💩return💩*this; + return *this; } -Stream💩&Stream::InitTimer(Timer💩*timer)💩{ -💩💩VLOG_CALL(PARAM(timer)); +Stream &Stream::InitTimer(Timer *timer) { + VLOG_CALL(PARAM(timer)); -💩💩if💩(ok())💩{ -💩💩💩💩CheckError(parent_->AllocateTimer(timer)); -💩💩}💩else💩{ -💩💩💩💩LOG(INFO)💩<<💩"did💩not💩allocate💩timer:💩"💩<<💩timer; -💩💩} -💩💩return💩*this; + if (ok()) { + CheckError(parent_->AllocateTimer(timer)); + } else { + LOG(INFO) << "did not allocate timer: " << timer; + } + return *this; } -Stream💩&Stream::InitWithTimer(Timer💩*timer)💩{ -💩💩VLOG_CALL(PARAM(timer)); +Stream &Stream::InitWithTimer(Timer *timer) { + VLOG_CALL(PARAM(timer)); -💩💩return💩Init().InitTimer(timer); + return Init().InitTimer(timer); } -Stream💩&Stream::ThenRecordEvent(Event💩*event)💩{ -💩💩VLOG_CALL(PARAM(event)); +Stream &Stream::ThenRecordEvent(Event *event) { + VLOG_CALL(PARAM(event)); -💩💩port::Status💩status💩=💩parent_->RecordEvent(this,💩event); -💩💩if💩(!status.ok())💩{ -💩💩💩💩LOG(ERROR)💩<<💩"Error💩recording💩event💩in💩stream:💩"💩<<💩status.error_message() -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩<<💩";💩not💩marking💩stream💩as💩bad,💩as💩the💩Event💩object💩may💩be💩" -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩<<💩"at💩fault.💩Monitor💩for💩further💩errors."; -💩💩} + port::Status status = parent_->RecordEvent(this, event); + if (!status.ok()) { + LOG(ERROR) << "Error recording event in stream: " << status.error_message() + << "; not marking stream as bad, as the Event object may be " + << "at fault. Monitor for further errors."; + } -💩💩return💩*this; + return *this; } -Stream💩&Stream::ThenConvolveWithScratch( -💩💩💩💩const💩dnn::BatchDescriptor💩&input_descriptor, -💩💩💩💩const💩DeviceMemory💩&input_data, -💩💩💩💩const💩dnn::FilterDescriptor💩&filter_descriptor, -💩💩💩💩const💩DeviceMemory💩&filter_data, -💩💩💩💩const💩dnn::ConvolutionDescriptor💩&convolution_descriptor, -💩💩💩💩const💩dnn::BatchDescriptor💩&output_descriptor,💩DeviceMemory💩*output, -💩💩💩💩ScratchAllocator💩*scratch_allocator)💩{ -💩💩VLOG_CALL(PARAM(input_descriptor),💩PARAM(input_data), -💩💩💩💩💩💩💩💩💩💩💩💩PARAM(filter_descriptor),💩PARAM(filter_data), -💩💩💩💩💩💩💩💩💩💩💩💩PARAM(convolution_descriptor),💩PARAM(output_descriptor), -💩💩💩💩💩💩💩💩💩💩💩💩PARAM(output)); +Stream &Stream::ThenConvolveWithScratch( + const dnn::BatchDescriptor &input_descriptor, + const DeviceMemory &input_data, + const dnn::FilterDescriptor &filter_descriptor, + const DeviceMemory &filter_data, + const dnn::ConvolutionDescriptor &convolution_descriptor, + const dnn::BatchDescriptor &output_descriptor, DeviceMemory *output, + ScratchAllocator *scratch_allocator) { + VLOG_CALL(PARAM(input_descriptor), PARAM(input_data), + PARAM(filter_descriptor), PARAM(filter_data), + PARAM(convolution_descriptor), PARAM(output_descriptor), + PARAM(output)); -💩💩if💩(ok())💩{ -💩💩💩💩if💩(dnn::DnnSupport💩*dnn💩=💩parent_->AsDnn())💩{ -💩💩💩💩💩💩CheckError(dnn->DoConvolve( -💩💩💩💩💩💩💩💩💩💩this,💩input_descriptor,💩input_data,💩filter_descriptor,💩filter_data, -💩💩💩💩💩💩💩💩💩💩convolution_descriptor,💩output_descriptor,💩output, -💩💩💩💩💩💩💩💩💩💩/*scratch_allocator=*/scratch_allocator)); -💩💩💩💩}💩else💩{ -💩💩💩💩💩💩SetError(); -💩💩💩💩💩💩LOG(WARNING) -💩💩💩💩💩💩💩💩💩💩<<💩"attempting💩to💩perform💩DNN💩operation💩using💩StreamExecutor💩" -💩💩💩💩💩💩💩💩💩💩💩💩💩"without💩DNN💩support"; -💩💩💩💩} -💩💩} -💩💩return💩*this; + if (ok()) { + if (dnn::DnnSupport *dnn = parent_->AsDnn()) { + CheckError(dnn->DoConvolve( + this, input_descriptor, input_data, filter_descriptor, filter_data, + convolution_descriptor, output_descriptor, output, + /*scratch_allocator=*/scratch_allocator)); + } else { + SetError(); + LOG(WARNING) + << "attempting to perform DNN operation using StreamExecutor " + "without DNN support"; + } + } + return *this; } -Stream💩&Stream::ThenConvolve( -💩💩💩💩const💩dnn::BatchDescriptor💩&input_descriptor, -💩💩💩💩const💩DeviceMemory💩&input_data, -💩💩💩💩const💩dnn::FilterDescriptor💩&filter_descriptor, -💩💩💩💩const💩DeviceMemory💩&filter_data, -💩💩💩💩const💩dnn::ConvolutionDescriptor💩&convolution_descriptor, -💩💩💩💩const💩dnn::BatchDescriptor💩&output_descriptor, -💩💩💩💩DeviceMemory💩*output)💩{ -💩💩return💩ThenConvolveWithScratch(input_descriptor,💩input_data, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩filter_descriptor,💩filter_data, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩convolution_descriptor,💩output_descriptor, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩output,💩/*scratch_allocator=*/nullptr); +Stream &Stream::ThenConvolve( + const dnn::BatchDescriptor &input_descriptor, + const DeviceMemory &input_data, + const dnn::FilterDescriptor &filter_descriptor, + const DeviceMemory &filter_data, + const dnn::ConvolutionDescriptor &convolution_descriptor, + const dnn::BatchDescriptor &output_descriptor, + DeviceMemory *output) { + return ThenConvolveWithScratch(input_descriptor, input_data, + filter_descriptor, filter_data, + convolution_descriptor, output_descriptor, + output, /*scratch_allocator=*/nullptr); } -Stream💩&Stream::ThenSeparableConvolve( -💩💩💩💩const💩dnn::BatchDescriptor💩&batch_descriptor, -💩💩💩💩const💩DeviceMemory💩&input_data, -💩💩💩💩const💩dnn::FilterDescriptor💩&filter_descriptor,💩int💩depth_multiplier, -💩💩💩💩const💩DeviceMemory💩&first_weights, -💩💩💩💩const💩DeviceMemory💩&second_weights, -💩💩💩💩const💩dnn::ConvolutionDescriptor💩&convolution_descriptor, -💩💩💩💩const💩dnn::BatchDescriptor💩&output_descriptor, -💩💩💩💩DeviceMemory💩*output)💩{ -💩💩VLOG_CALL( -💩💩💩💩💩💩PARAM(batch_descriptor),💩PARAM(input_data),💩PARAM(filter_descriptor), -💩💩💩💩💩💩PARAM(depth_multiplier),💩PARAM(first_weights),💩PARAM(second_weights), -💩💩💩💩💩💩PARAM(convolution_descriptor),💩PARAM(output_descriptor),💩PARAM(output)); +Stream &Stream::ThenSeparableConvolve( + const dnn::BatchDescriptor &batch_descriptor, + const DeviceMemory &input_data, + const dnn::FilterDescriptor &filter_descriptor, int depth_multiplier, + const DeviceMemory &first_weights, + const DeviceMemory &second_weights, + const dnn::ConvolutionDescriptor &convolution_descriptor, + const dnn::BatchDescriptor &output_descriptor, + DeviceMemory *output) { + VLOG_CALL( + PARAM(batch_descriptor), PARAM(input_data), PARAM(filter_descriptor), + PARAM(depth_multiplier), PARAM(first_weights), PARAM(second_weights), + PARAM(convolution_descriptor), PARAM(output_descriptor), PARAM(output)); -💩💩if💩(ok())💩{ -💩💩💩💩if💩(dnn::DnnSupport💩*dnn💩=💩parent_->AsDnn())💩{ -💩💩💩💩💩💩CheckError(dnn->DoSeparableConvolve( -💩💩💩💩💩💩💩💩💩💩this,💩batch_descriptor,💩input_data,💩filter_descriptor, -💩💩💩💩💩💩💩💩💩💩depth_multiplier,💩first_weights,💩second_weights, -💩💩💩💩💩💩💩💩💩💩convolution_descriptor,💩output_descriptor,💩output)); -💩💩💩💩}💩else💩{ -💩💩💩💩💩💩SetError(); -💩💩💩💩💩💩LOG(WARNING) -💩💩💩💩💩💩💩💩💩💩<<💩"attempting💩to💩perform💩DNN💩operation💩using💩StreamExecutor💩" -💩💩💩💩💩💩💩💩💩💩💩💩💩"without💩DNN💩support"; -💩💩💩💩} -💩💩} -💩💩return💩*this; + if (ok()) { + if (dnn::DnnSupport *dnn = parent_->AsDnn()) { + CheckError(dnn->DoSeparableConvolve( + this, batch_descriptor, input_data, filter_descriptor, + depth_multiplier, first_weights, second_weights, + convolution_descriptor, output_descriptor, output)); + } else { + SetError(); + LOG(WARNING) + << "attempting to perform DNN operation using StreamExecutor " + "without DNN support"; + } + } + return *this; } -Stream💩&Stream::ThenConvolveBackwardDataWithScratch( -💩💩💩💩const💩dnn::FilterDescriptor💩&filter_descriptor, -💩💩💩💩const💩DeviceMemory💩&filter_data, -💩💩💩💩const💩dnn::BatchDescriptor💩&output_descriptor, -💩💩💩💩DeviceMemory💩backward_output_data, -💩💩💩💩const💩dnn::ConvolutionDescriptor💩&convolution_descriptor, -💩💩💩💩const💩dnn::BatchDescriptor💩&input_descriptor, -💩💩💩💩DeviceMemory💩*backward_input_data, -💩💩💩💩ScratchAllocator💩*scratch_allocator)💩{ -💩💩VLOG_CALL(PARAM(filter_descriptor),💩PARAM(filter_data), -💩💩💩💩💩💩💩💩💩💩💩💩PARAM(output_descriptor),💩PARAM(backward_output_data), -💩💩💩💩💩💩💩💩💩💩💩💩PARAM(convolution_descriptor),💩PARAM(input_descriptor), -💩💩💩💩💩💩💩💩💩💩💩💩PARAM(backward_input_data)); +Stream &Stream::ThenConvolveBackwardDataWithScratch( + const dnn::FilterDescriptor &filter_descriptor, + const DeviceMemory &filter_data, + const dnn::BatchDescriptor &output_descriptor, + DeviceMemory backward_output_data, + const dnn::ConvolutionDescriptor &convolution_descriptor, + const dnn::BatchDescriptor &input_descriptor, + DeviceMemory *backward_input_data, + ScratchAllocator *scratch_allocator) { + VLOG_CALL(PARAM(filter_descriptor), PARAM(filter_data), + PARAM(output_descriptor), PARAM(backward_output_data), + PARAM(convolution_descriptor), PARAM(input_descriptor), + PARAM(backward_input_data)); -💩💩if💩(ok())💩{ -💩💩💩💩if💩(dnn::DnnSupport💩*dnn💩=💩parent_->AsDnn())💩{ -💩💩💩💩💩💩CheckError(dnn->DoConvolveBackwardData( -💩💩💩💩💩💩💩💩💩💩this,💩filter_descriptor,💩filter_data,💩output_descriptor, -💩💩💩💩💩💩💩💩💩💩backward_output_data,💩convolution_descriptor,💩input_descriptor, -💩💩💩💩💩💩💩💩💩💩backward_input_data,💩scratch_allocator)); -💩💩💩💩}💩else💩{ -💩💩💩💩💩💩SetError(); -💩💩💩💩💩💩LOG(WARNING) -💩💩💩💩💩💩💩💩💩💩<<💩"attempting💩to💩perform💩DNN💩operation💩using💩StreamExecutor💩" -💩💩💩💩💩💩💩💩💩💩💩💩💩"without💩DNN💩support"; -💩💩💩💩} -💩💩} -💩💩return💩*this; + if (ok()) { + if (dnn::DnnSupport *dnn = parent_->AsDnn()) { + CheckError(dnn->DoConvolveBackwardData( + this, filter_descriptor, filter_data, output_descriptor, + backward_output_data, convolution_descriptor, input_descriptor, + backward_input_data, scratch_allocator)); + } else { + SetError(); + LOG(WARNING) + << "attempting to perform DNN operation using StreamExecutor " + "without DNN support"; + } + } + return *this; } -Stream💩&Stream::ThenConvolveBackwardData( -💩💩💩💩const💩dnn::FilterDescriptor💩&filter_descriptor, -💩💩💩💩const💩DeviceMemory💩&filter_data, -💩💩💩💩const💩dnn::BatchDescriptor💩&output_descriptor, -💩💩💩💩DeviceMemory💩backward_output_data, -💩💩💩💩const💩dnn::ConvolutionDescriptor💩&convolution_descriptor, -💩💩💩💩const💩dnn::BatchDescriptor💩&input_descriptor, -💩💩💩💩DeviceMemory💩*backward_input_data)💩{ -💩💩return💩ThenConvolveBackwardDataWithScratch( -💩💩💩💩💩💩filter_descriptor,💩filter_data,💩output_descriptor,💩backward_output_data, -💩💩💩💩💩💩convolution_descriptor,💩input_descriptor,💩backward_input_data, -💩💩💩💩💩💩/*scratch_allocator=*/nullptr); +Stream &Stream::ThenConvolveBackwardData( + const dnn::FilterDescriptor &filter_descriptor, + const DeviceMemory &filter_data, + const dnn::BatchDescriptor &output_descriptor, + DeviceMemory backward_output_data, + const dnn::ConvolutionDescriptor &convolution_descriptor, + const dnn::BatchDescriptor &input_descriptor, + DeviceMemory *backward_input_data) { + return ThenConvolveBackwardDataWithScratch( + filter_descriptor, filter_data, output_descriptor, backward_output_data, + convolution_descriptor, input_descriptor, backward_input_data, + /*scratch_allocator=*/nullptr); } -Stream💩&Stream::ThenConvolveBackwardFilterWithScratch( -💩💩💩💩const💩dnn::BatchDescriptor💩&input_descriptor, -💩💩💩💩const💩DeviceMemory💩&input_data, -💩💩💩💩const💩dnn::BatchDescriptor💩&output_descriptor, -💩💩💩💩DeviceMemory💩backward_output_data, -💩💩💩💩const💩dnn::ConvolutionDescriptor💩&convolution_descriptor, -💩💩💩💩const💩dnn::FilterDescriptor💩&filter_descriptor, -💩💩💩💩DeviceMemory💩*backward_filter_data, -💩💩💩💩ScratchAllocator💩*scratch_allocator)💩{ -💩💩VLOG_CALL(PARAM(input_descriptor),💩PARAM(input_data), -💩💩💩💩💩💩💩💩💩💩💩💩PARAM(output_descriptor),💩PARAM(backward_output_data), -💩💩💩💩💩💩💩💩💩💩💩💩PARAM(convolution_descriptor),💩PARAM(filter_descriptor), -💩💩💩💩💩💩💩💩💩💩💩💩PARAM(backward_filter_data)); +Stream &Stream::ThenConvolveBackwardFilterWithScratch( + const dnn::BatchDescriptor &input_descriptor, + const DeviceMemory &input_data, + const dnn::BatchDescriptor &output_descriptor, + DeviceMemory backward_output_data, + const dnn::ConvolutionDescriptor &convolution_descriptor, + const dnn::FilterDescriptor &filter_descriptor, + DeviceMemory *backward_filter_data, + ScratchAllocator *scratch_allocator) { + VLOG_CALL(PARAM(input_descriptor), PARAM(input_data), + PARAM(output_descriptor), PARAM(backward_output_data), + PARAM(convolution_descriptor), PARAM(filter_descriptor), + PARAM(backward_filter_data)); -💩💩if💩(ok())💩{ -💩💩💩💩if💩(dnn::DnnSupport💩*dnn💩=💩parent_->AsDnn())💩{ -💩💩💩💩💩💩CheckError(dnn->DoConvolveBackwardFilter( -💩💩💩💩💩💩💩💩💩💩this,💩input_descriptor,💩input_data,💩output_descriptor, -💩💩💩💩💩💩💩💩💩💩backward_output_data,💩convolution_descriptor,💩filter_descriptor, -💩💩💩💩💩💩💩💩💩💩backward_filter_data,💩scratch_allocator)); -💩💩💩💩}💩else💩{ -💩💩💩💩💩💩SetError(); -💩💩💩💩💩💩LOG(WARNING) -💩💩💩💩💩💩💩💩💩💩<<💩"attempting💩to💩perform💩DNN💩operation💩using💩StreamExecutor💩" -💩💩💩💩💩💩💩💩💩💩💩💩💩"without💩DNN💩support"; -💩💩💩💩} -💩💩} -💩💩return💩*this; + if (ok()) { + if (dnn::DnnSupport *dnn = parent_->AsDnn()) { + CheckError(dnn->DoConvolveBackwardFilter( + this, input_descriptor, input_data, output_descriptor, + backward_output_data, convolution_descriptor, filter_descriptor, + backward_filter_data, scratch_allocator)); + } else { + SetError(); + LOG(WARNING) + << "attempting to perform DNN operation using StreamExecutor " + "without DNN support"; + } + } + return *this; } -Stream💩&Stream::ThenConvolveBackwardFilter( -💩💩💩💩const💩dnn::BatchDescriptor💩&input_descriptor, -💩💩💩💩const💩DeviceMemory💩&input_data, -💩💩💩💩const💩dnn::BatchDescriptor💩&output_descriptor, -💩💩💩💩DeviceMemory💩backward_output_data, -💩💩💩💩const💩dnn::ConvolutionDescriptor💩&convolution_descriptor, -💩💩💩💩const💩dnn::FilterDescriptor💩&filter_descriptor, -💩💩💩💩DeviceMemory💩*backward_filter_data)💩{ -💩💩return💩ThenConvolveBackwardFilterWithScratch( -💩💩💩💩💩💩input_descriptor,💩input_data,💩output_descriptor,💩backward_output_data, -💩💩💩💩💩💩convolution_descriptor,💩filter_descriptor,💩backward_filter_data, -💩💩💩💩💩💩/*scratch_allocator=*/nullptr); +Stream &Stream::ThenConvolveBackwardFilter( + const dnn::BatchDescriptor &input_descriptor, + const DeviceMemory &input_data, + const dnn::BatchDescriptor &output_descriptor, + DeviceMemory backward_output_data, + const dnn::ConvolutionDescriptor &convolution_descriptor, + const dnn::FilterDescriptor &filter_descriptor, + DeviceMemory *backward_filter_data) { + return ThenConvolveBackwardFilterWithScratch( + input_descriptor, input_data, output_descriptor, backward_output_data, + convolution_descriptor, filter_descriptor, backward_filter_data, + /*scratch_allocator=*/nullptr); } -Stream💩&Stream::ThenMatMul(const💩DeviceMemory💩&input_data, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩const💩DeviceMemory💩&weights, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩const💩dnn::BatchDescriptor💩&input_dimensions, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩const💩dnn::BatchDescriptor💩&output_dimensions, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩DeviceMemory💩*output_data)💩{ -💩💩VLOG_CALL(PARAM(input_data),💩PARAM(weights),💩PARAM(input_dimensions), -💩💩💩💩💩💩💩💩💩💩💩💩PARAM(output_dimensions),💩PARAM(output_data)); +Stream &Stream::ThenMatMul(const DeviceMemory &input_data, + const DeviceMemory &weights, + const dnn::BatchDescriptor &input_dimensions, + const dnn::BatchDescriptor &output_dimensions, + DeviceMemory *output_data) { + VLOG_CALL(PARAM(input_data), PARAM(weights), PARAM(input_dimensions), + PARAM(output_dimensions), PARAM(output_data)); -💩💩if💩(ok())💩{ -💩💩💩💩if💩(dnn::DnnSupport💩*dnn💩=💩parent_->AsDnn())💩{ -💩💩💩💩💩💩CheckError(dnn->DoMatMul(this,💩input_data,💩weights,💩input_dimensions, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩output_dimensions,💩output_data)); -💩💩💩💩}💩else💩{ -💩💩💩💩💩💩SetError(); -💩💩💩💩💩💩LOG(WARNING) -💩💩💩💩💩💩💩💩💩💩<<💩"attempting💩to💩perform💩DNN💩operation💩using💩StreamExecutor💩" -💩💩💩💩💩💩💩💩💩💩💩💩💩"without💩DNN💩support"; -💩💩💩💩} -💩💩} -💩💩return💩*this; + if (ok()) { + if (dnn::DnnSupport *dnn = parent_->AsDnn()) { + CheckError(dnn->DoMatMul(this, input_data, weights, input_dimensions, + output_dimensions, output_data)); + } else { + SetError(); + LOG(WARNING) + << "attempting to perform DNN operation using StreamExecutor " + "without DNN support"; + } + } + return *this; } -Stream💩&Stream::ThenMatMulQuantized( -💩💩💩💩const💩DeviceMemory💩&input_data,💩const💩DeviceMemory💩&weights, -💩💩💩💩const💩DeviceMemory💩&weight_scales, -💩💩💩💩const💩dnn::BatchDescriptor💩&input_dimensions, -💩💩💩💩const💩dnn::BatchDescriptor💩&output_dimensions, -💩💩💩💩DeviceMemory💩*output_data)💩{ -💩💩VLOG_CALL(PARAM(input_data),💩PARAM(weights),💩PARAM(weight_scales), -💩💩💩💩💩💩💩💩💩💩💩💩PARAM(input_dimensions),💩PARAM(output_dimensions), -💩💩💩💩💩💩💩💩💩💩💩💩PARAM(output_data)); +Stream &Stream::ThenMatMulQuantized( + const DeviceMemory &input_data, const DeviceMemory &weights, + const DeviceMemory &weight_scales, + const dnn::BatchDescriptor &input_dimensions, + const dnn::BatchDescriptor &output_dimensions, + DeviceMemory *output_data) { + VLOG_CALL(PARAM(input_data), PARAM(weights), PARAM(weight_scales), + PARAM(input_dimensions), PARAM(output_dimensions), + PARAM(output_data)); -💩💩if💩(ok())💩{ -💩💩💩💩if💩(dnn::DnnSupport💩*dnn💩=💩parent_->AsDnn())💩{ -💩💩💩💩💩💩CheckError(dnn->DoMatMulQuantized(this,💩input_data,💩weights, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩weight_scales,💩input_dimensions, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩output_dimensions,💩output_data)); -💩💩💩💩}💩else💩{ -💩💩💩💩💩💩SetError(); -💩💩💩💩💩💩LOG(WARNING) -💩💩💩💩💩💩💩💩💩💩<<💩"attempting💩to💩perform💩DNN💩operation💩using💩StreamExecutor💩" -💩💩💩💩💩💩💩💩💩💩💩💩💩"without💩DNN💩support"; -💩💩💩💩} -💩💩} -💩💩return💩*this; + if (ok()) { + if (dnn::DnnSupport *dnn = parent_->AsDnn()) { + CheckError(dnn->DoMatMulQuantized(this, input_data, weights, + weight_scales, input_dimensions, + output_dimensions, output_data)); + } else { + SetError(); + LOG(WARNING) + << "attempting to perform DNN operation using StreamExecutor " + "without DNN support"; + } + } + return *this; } -Stream💩&Stream::ThenMatMulQuantized( -💩💩💩💩const💩DeviceMemory💩&input_data,💩const💩DeviceMemory💩&weights, -💩💩💩💩const💩DeviceMemory💩&weight_scales, -💩💩💩💩const💩dnn::BatchDescriptor💩&input_dimensions, -💩💩💩💩const💩dnn::BatchDescriptor💩&output_dimensions, -💩💩💩💩DeviceMemory💩*output_data)💩{ -💩💩VLOG_CALL(PARAM(input_data),💩PARAM(weights),💩PARAM(weight_scales), -💩💩💩💩💩💩💩💩💩💩💩💩PARAM(input_dimensions),💩PARAM(output_dimensions), -💩💩💩💩💩💩💩💩💩💩💩💩PARAM(output_data)); +Stream &Stream::ThenMatMulQuantized( + const DeviceMemory &input_data, const DeviceMemory &weights, + const DeviceMemory &weight_scales, + const dnn::BatchDescriptor &input_dimensions, + const dnn::BatchDescriptor &output_dimensions, + DeviceMemory *output_data) { + VLOG_CALL(PARAM(input_data), PARAM(weights), PARAM(weight_scales), + PARAM(input_dimensions), PARAM(output_dimensions), + PARAM(output_data)); -💩💩if💩(ok())💩{ -💩💩💩💩if💩(dnn::DnnSupport💩*dnn💩=💩parent_->AsDnn())💩{ -💩💩💩💩💩💩CheckError(dnn->DoMatMulQuantized(this,💩input_data,💩weights, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩weight_scales,💩input_dimensions, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩output_dimensions,💩output_data)); -💩💩💩💩}💩else💩{ -💩💩💩💩💩💩SetError(); -💩💩💩💩💩💩LOG(WARNING) -💩💩💩💩💩💩💩💩💩💩<<💩"attempting💩to💩perform💩DNN💩operation💩using💩StreamExecutor💩" -💩💩💩💩💩💩💩💩💩💩💩💩💩"without💩DNN💩support"; -💩💩💩💩} -💩💩} -💩💩return💩*this; + if (ok()) { + if (dnn::DnnSupport *dnn = parent_->AsDnn()) { + CheckError(dnn->DoMatMulQuantized(this, input_data, weights, + weight_scales, input_dimensions, + output_dimensions, output_data)); + } else { + SetError(); + LOG(WARNING) + << "attempting to perform DNN operation using StreamExecutor " + "without DNN support"; + } + } + return *this; } -Stream💩&Stream::ThenBiasAdd(const💩DeviceMemory💩&input_data, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩const💩DeviceMemory💩&biases, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩const💩dnn::BatchDescriptor💩&dimensions, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩DeviceMemory💩*output_data)💩{ -💩💩VLOG_CALL(PARAM(input_data),💩PARAM(biases),💩PARAM(dimensions), -💩💩💩💩💩💩💩💩💩💩💩💩PARAM(output_data)); +Stream &Stream::ThenBiasAdd(const DeviceMemory &input_data, + const DeviceMemory &biases, + const dnn::BatchDescriptor &dimensions, + DeviceMemory *output_data) { + VLOG_CALL(PARAM(input_data), PARAM(biases), PARAM(dimensions), + PARAM(output_data)); -💩💩if💩(ok())💩{ -💩💩💩💩if💩(dnn::DnnSupport💩*dnn💩=💩parent_->AsDnn())💩{ -💩💩💩💩💩💩CheckError( -💩💩💩💩💩💩💩💩💩💩dnn->DoBiasAdd(this,💩input_data,💩biases,💩dimensions,💩output_data)); -💩💩💩💩}💩else💩{ -💩💩💩💩💩💩SetError(); -💩💩💩💩💩💩LOG(WARNING) -💩💩💩💩💩💩💩💩💩💩<<💩"attempting💩to💩perform💩DNN💩operation💩using💩StreamExecutor💩" -💩💩💩💩💩💩💩💩💩💩💩💩💩"without💩DNN💩support"; -💩💩💩💩} -💩💩} -💩💩return💩*this; + if (ok()) { + if (dnn::DnnSupport *dnn = parent_->AsDnn()) { + CheckError( + dnn->DoBiasAdd(this, input_data, biases, dimensions, output_data)); + } else { + SetError(); + LOG(WARNING) + << "attempting to perform DNN operation using StreamExecutor " + "without DNN support"; + } + } + return *this; } -Stream💩&Stream::ThenPoolForward( -💩💩💩💩const💩dnn::PoolingDescriptor💩&pooling_dimensions, -💩💩💩💩const💩dnn::BatchDescriptor💩&input_dimensions, -💩💩💩💩const💩DeviceMemory💩&input_data, -💩💩💩💩const💩dnn::BatchDescriptor💩&output_dimensions, -💩💩💩💩DeviceMemory💩*output_data)💩{ -💩💩VLOG_CALL(PARAM(pooling_dimensions),💩PARAM(input_dimensions), -💩💩💩💩💩💩💩💩💩💩💩💩PARAM(input_data),💩PARAM(output_dimensions),💩PARAM(output_data)); +Stream &Stream::ThenPoolForward( + const dnn::PoolingDescriptor &pooling_dimensions, + const dnn::BatchDescriptor &input_dimensions, + const DeviceMemory &input_data, + const dnn::BatchDescriptor &output_dimensions, + DeviceMemory *output_data) { + VLOG_CALL(PARAM(pooling_dimensions), PARAM(input_dimensions), + PARAM(input_data), PARAM(output_dimensions), PARAM(output_data)); -💩💩if💩(ok())💩{ -💩💩💩💩if💩(dnn::DnnSupport💩*dnn💩=💩parent_->AsDnn())💩{ -💩💩💩💩💩💩CheckError(dnn->DoPoolForward(this,💩pooling_dimensions,💩input_dimensions, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩input_data,💩output_dimensions, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩output_data)); -💩💩💩💩}💩else💩{ -💩💩💩💩💩💩SetError(); -💩💩💩💩💩💩LOG(WARNING) -💩💩💩💩💩💩💩💩💩💩<<💩"attempting💩to💩perform💩DNN💩operation💩using💩StreamExecutor💩" -💩💩💩💩💩💩💩💩💩💩💩💩💩"without💩DNN💩support"; -💩💩💩💩} -💩💩} -💩💩return💩*this; + if (ok()) { + if (dnn::DnnSupport *dnn = parent_->AsDnn()) { + CheckError(dnn->DoPoolForward(this, pooling_dimensions, input_dimensions, + input_data, output_dimensions, + output_data)); + } else { + SetError(); + LOG(WARNING) + << "attempting to perform DNN operation using StreamExecutor " + "without DNN support"; + } + } + return *this; } -Stream💩&Stream::ThenPoolBackward( -💩💩💩💩const💩dnn::PoolingDescriptor💩&pooling_dimensions, -💩💩💩💩const💩dnn::BatchDescriptor💩&input_dimensions, -💩💩💩💩const💩DeviceMemory💩&input_data, -💩💩💩💩const💩dnn::BatchDescriptor💩&output_dimensions, -💩💩💩💩const💩DeviceMemory💩&output_data, -💩💩💩💩const💩DeviceMemory💩&input_diff_data, -💩💩💩💩DeviceMemory💩*output_diff_data)💩{ -💩💩VLOG_CALL(PARAM(pooling_dimensions),💩PARAM(input_dimensions), -💩💩💩💩💩💩💩💩💩💩💩💩PARAM(input_data),💩PARAM(output_dimensions),💩PARAM(output_data), -💩💩💩💩💩💩💩💩💩💩💩💩PARAM(input_diff_data),💩PARAM(output_diff_data)); +Stream &Stream::ThenPoolBackward( + const dnn::PoolingDescriptor &pooling_dimensions, + const dnn::BatchDescriptor &input_dimensions, + const DeviceMemory &input_data, + const dnn::BatchDescriptor &output_dimensions, + const DeviceMemory &output_data, + const DeviceMemory &input_diff_data, + DeviceMemory *output_diff_data) { + VLOG_CALL(PARAM(pooling_dimensions), PARAM(input_dimensions), + PARAM(input_data), PARAM(output_dimensions), PARAM(output_data), + PARAM(input_diff_data), PARAM(output_diff_data)); -💩💩if💩(ok())💩{ -💩💩💩💩if💩(dnn::DnnSupport💩*dnn💩=💩parent_->AsDnn())💩{ -💩💩💩💩💩💩CheckError(dnn->DoPoolBackward(this,💩pooling_dimensions,💩input_dimensions, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩input_data,💩output_dimensions,💩output_data, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩input_diff_data,💩output_diff_data)); -💩💩💩💩}💩else💩{ -💩💩💩💩💩💩SetError(); -💩💩💩💩💩💩LOG(WARNING) -💩💩💩💩💩💩💩💩💩💩<<💩"attempting💩to💩perform💩DNN💩operation💩using💩StreamExecutor💩" -💩💩💩💩💩💩💩💩💩💩💩💩💩"without💩DNN💩support"; -💩💩💩💩} -💩💩} -💩💩return💩*this; + if (ok()) { + if (dnn::DnnSupport *dnn = parent_->AsDnn()) { + CheckError(dnn->DoPoolBackward(this, pooling_dimensions, input_dimensions, + input_data, output_dimensions, output_data, + input_diff_data, output_diff_data)); + } else { + SetError(); + LOG(WARNING) + << "attempting to perform DNN operation using StreamExecutor " + "without DNN support"; + } + } + return *this; } -Stream💩&Stream::ThenNormalize( -💩💩💩💩const💩dnn::NormalizeDescriptor💩&normalize_descriptor, -💩💩💩💩const💩DeviceMemory💩&input_data,💩DeviceMemory💩*output_data)💩{ -💩💩VLOG_CALL(PARAM(normalize_descriptor),💩PARAM(input_data),💩PARAM(output_data)); +Stream &Stream::ThenNormalize( + const dnn::NormalizeDescriptor &normalize_descriptor, + const DeviceMemory &input_data, DeviceMemory *output_data) { + VLOG_CALL(PARAM(normalize_descriptor), PARAM(input_data), PARAM(output_data)); -💩💩if💩(ok())💩{ -💩💩💩💩if💩(dnn::DnnSupport💩*dnn💩=💩parent_->AsDnn())💩{ -💩💩💩💩💩💩CheckError(dnn->DoNormalize(this,💩normalize_descriptor,💩input_data, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩output_data)); -💩💩💩💩}💩else💩{ -💩💩💩💩💩💩SetError(); -💩💩💩💩💩💩LOG(WARNING) -💩💩💩💩💩💩💩💩💩💩<<💩"attempting💩to💩perform💩DNN💩operation💩using💩StreamExecutor💩" -💩💩💩💩💩💩💩💩💩💩💩💩💩"without💩DNN💩support"; -💩💩💩💩} -💩💩} -💩💩return💩*this; + if (ok()) { + if (dnn::DnnSupport *dnn = parent_->AsDnn()) { + CheckError(dnn->DoNormalize(this, normalize_descriptor, input_data, + output_data)); + } else { + SetError(); + LOG(WARNING) + << "attempting to perform DNN operation using StreamExecutor " + "without DNN support"; + } + } + return *this; } -Stream💩&Stream::ThenActivate(dnn::ActivationMode💩activation_mode, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩const💩dnn::BatchDescriptor💩&dimensions, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩const💩DeviceMemory💩&input_data, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩DeviceMemory💩*output_data)💩{ -💩💩VLOG_CALL(PARAM(activation_mode),💩PARAM(dimensions),💩PARAM(input_data), -💩💩💩💩💩💩💩💩💩💩💩💩PARAM(output_data)); +Stream &Stream::ThenActivate(dnn::ActivationMode activation_mode, + const dnn::BatchDescriptor &dimensions, + const DeviceMemory &input_data, + DeviceMemory *output_data) { + VLOG_CALL(PARAM(activation_mode), PARAM(dimensions), PARAM(input_data), + PARAM(output_data)); -💩💩if💩(ok())💩{ -💩💩💩💩if💩(dnn::DnnSupport💩*dnn💩=💩parent_->AsDnn())💩{ -💩💩💩💩💩💩CheckError(dnn->DoActivate(this,💩activation_mode,💩dimensions,💩input_data, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩output_data)); -💩💩💩💩}💩else💩{ -💩💩💩💩💩💩SetError(); -💩💩💩💩💩💩LOG(WARNING) -💩💩💩💩💩💩💩💩💩💩<<💩"attempting💩to💩perform💩DNN💩operation💩using💩StreamExecutor💩" -💩💩💩💩💩💩💩💩💩💩💩💩💩"without💩DNN💩support"; -💩💩💩💩} -💩💩} -💩💩return💩*this; + if (ok()) { + if (dnn::DnnSupport *dnn = parent_->AsDnn()) { + CheckError(dnn->DoActivate(this, activation_mode, dimensions, input_data, + output_data)); + } else { + SetError(); + LOG(WARNING) + << "attempting to perform DNN operation using StreamExecutor " + "without DNN support"; + } + } + return *this; } -Stream💩&Stream::ThenDepthConcatenate( -💩💩💩💩port::ArraySlice💩input_dimensions, -💩💩💩💩port::ArraySlice💩*>💩input_data, -💩💩💩💩DeviceMemory💩*output_data)💩{ -💩💩VLOG_CALL(PARAM(input_dimensions),💩PARAM(input_data),💩PARAM(output_data)); +Stream &Stream::ThenDepthConcatenate( + port::ArraySlice input_dimensions, + port::ArraySlice *> input_data, + DeviceMemory *output_data) { + VLOG_CALL(PARAM(input_dimensions), PARAM(input_data), PARAM(output_data)); -💩💩for💩(size_t💩i💩=💩1;💩i💩<💩input_dimensions.size();💩++i)💩{ -💩💩💩💩if💩(input_dimensions[i].count()💩!=💩input_dimensions[0].count()💩|| -💩💩💩💩💩💩💩💩input_dimensions[i].height()💩!=💩input_dimensions[0].height()💩|| -💩💩💩💩💩💩💩💩input_dimensions[i].width()💩!=💩input_dimensions[0].width())💩{ -💩💩💩💩💩💩SetError(); -💩💩💩💩💩💩LOG(ERROR)💩<<💩"Incompatible💩dimensions💩for💩depth💩concatenation.\n" -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩<<💩"input_dimensions[0]:💩"💩<<💩input_dimensions[0].ToString() -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩<<💩"input_dimensions["💩<<💩i -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩<<💩"]:💩"💩<<💩input_dimensions[i].ToString(); -💩💩💩💩💩💩return💩*this; -💩💩💩💩} -💩💩} + for (size_t i = 1; i < input_dimensions.size(); ++i) { + if (input_dimensions[i].count() != input_dimensions[0].count() || + input_dimensions[i].height() != input_dimensions[0].height() || + input_dimensions[i].width() != input_dimensions[0].width()) { + SetError(); + LOG(ERROR) << "Incompatible dimensions for depth concatenation.\n" + << "input_dimensions[0]: " << input_dimensions[0].ToString() + << "input_dimensions[" << i + << "]: " << input_dimensions[i].ToString(); + return *this; + } + } -💩💩if💩(ok())💩{ -💩💩💩💩if💩(dnn::DnnSupport💩*dnn💩=💩parent_->AsDnn())💩{ -💩💩💩💩💩💩CheckError(dnn->DoDepthConcatenate(this,💩input_dimensions,💩input_data, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩output_data)); -💩💩💩💩}💩else💩{ -💩💩💩💩💩💩SetError(); -💩💩💩💩💩💩LOG(WARNING) -💩💩💩💩💩💩💩💩💩💩<<💩"attempting💩to💩perform💩DNN💩operation💩using💩StreamExecutor💩" -💩💩💩💩💩💩💩💩💩💩💩💩💩"without💩DNN💩support"; -💩💩💩💩} -💩💩} -💩💩return💩*this; + if (ok()) { + if (dnn::DnnSupport *dnn = parent_->AsDnn()) { + CheckError(dnn->DoDepthConcatenate(this, input_dimensions, input_data, + output_data)); + } else { + SetError(); + LOG(WARNING) + << "attempting to perform DNN operation using StreamExecutor " + "without DNN support"; + } + } + return *this; } -Stream💩&Stream::ThenElementwiseOperate( -💩💩💩💩dnn::ElementwiseOperation💩operation, -💩💩💩💩port::ArraySlice💩input_dimensions, -💩💩💩💩port::ArraySlice💩*>💩input_data, -💩💩💩💩const💩dnn::BatchDescriptor💩&output_dimensions, -💩💩💩💩DeviceMemory💩*output_data)💩{ -💩💩VLOG_CALL(PARAM(operation),💩PARAM(input_dimensions),💩PARAM(input_data), -💩💩💩💩💩💩💩💩💩💩💩💩PARAM(output_dimensions),💩PARAM(output_data)); +Stream &Stream::ThenElementwiseOperate( + dnn::ElementwiseOperation operation, + port::ArraySlice input_dimensions, + port::ArraySlice *> input_data, + const dnn::BatchDescriptor &output_dimensions, + DeviceMemory *output_data) { + VLOG_CALL(PARAM(operation), PARAM(input_dimensions), PARAM(input_data), + PARAM(output_dimensions), PARAM(output_data)); -💩💩if💩(ok())💩{ -💩💩💩💩if💩(dnn::DnnSupport💩*dnn💩=💩parent_->AsDnn())💩{ -💩💩💩💩💩💩CheckError(dnn->DoElementwiseOperate(this,💩operation,💩input_dimensions, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩input_data,💩output_dimensions, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩output_data)); -💩💩💩💩}💩else💩{ -💩💩💩💩💩💩SetError(); -💩💩💩💩💩💩LOG(WARNING) -💩💩💩💩💩💩💩💩💩💩<<💩"attempting💩to💩perform💩DNN💩operation💩using💩StreamExecutor💩" -💩💩💩💩💩💩💩💩💩💩💩💩💩"without💩DNN💩support"; -💩💩💩💩} -💩💩} -💩💩return💩*this; + if (ok()) { + if (dnn::DnnSupport *dnn = parent_->AsDnn()) { + CheckError(dnn->DoElementwiseOperate(this, operation, input_dimensions, + input_data, output_dimensions, + output_data)); + } else { + SetError(); + LOG(WARNING) + << "attempting to perform DNN operation using StreamExecutor " + "without DNN support"; + } + } + return *this; } -Stream💩&Stream::ThenXYPad(const💩dnn::BatchDescriptor💩&dimensions, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩const💩DeviceMemory💩&input_data,💩int64💩left_pad, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩int64💩right_pad,💩int64💩top_pad,💩int64💩bottom_pad, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩DeviceMemory💩*output_data)💩{ -💩💩VLOG_CALL(PARAM(dimensions),💩PARAM(input_data),💩PARAM(left_pad), -💩💩💩💩💩💩💩💩💩💩💩💩PARAM(right_pad),💩PARAM(top_pad),💩PARAM(bottom_pad), -💩💩💩💩💩💩💩💩💩💩💩💩PARAM(output_data)); +Stream &Stream::ThenXYPad(const dnn::BatchDescriptor &dimensions, + const DeviceMemory &input_data, int64 left_pad, + int64 right_pad, int64 top_pad, int64 bottom_pad, + DeviceMemory *output_data) { + VLOG_CALL(PARAM(dimensions), PARAM(input_data), PARAM(left_pad), + PARAM(right_pad), PARAM(top_pad), PARAM(bottom_pad), + PARAM(output_data)); -💩💩if💩(ok())💩{ -💩💩💩💩if💩(dnn::DnnSupport💩*dnn💩=💩parent_->AsDnn())💩{ -💩💩💩💩💩💩CheckError(dnn->DoXYPad(this,💩dimensions,💩input_data,💩left_pad,💩right_pad, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩top_pad,💩bottom_pad,💩output_data)); -💩💩💩💩}💩else💩{ -💩💩💩💩💩💩SetError(); -💩💩💩💩💩💩LOG(WARNING) -💩💩💩💩💩💩💩💩💩💩<<💩"attempting💩to💩perform💩DNN💩operation💩using💩StreamExecutor💩" -💩💩💩💩💩💩💩💩💩💩💩💩💩"without💩DNN💩support"; -💩💩💩💩} -💩💩} -💩💩return💩*this; + if (ok()) { + if (dnn::DnnSupport *dnn = parent_->AsDnn()) { + CheckError(dnn->DoXYPad(this, dimensions, input_data, left_pad, right_pad, + top_pad, bottom_pad, output_data)); + } else { + SetError(); + LOG(WARNING) + << "attempting to perform DNN operation using StreamExecutor " + "without DNN support"; + } + } + return *this; } -Stream💩&Stream::ThenXYSlice(const💩dnn::BatchDescriptor💩&dimensions, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩const💩DeviceMemory💩&input_data, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩int64💩left_trim,💩int64💩right_trim,💩int64💩top_trim, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩int64💩bottom_trim, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩DeviceMemory💩*output_data)💩{ -💩💩VLOG_CALL(PARAM(dimensions),💩PARAM(input_data),💩PARAM(left_trim), -💩💩💩💩💩💩💩💩💩💩💩💩PARAM(right_trim),💩PARAM(top_trim),💩PARAM(bottom_trim), -💩💩💩💩💩💩💩💩💩💩💩💩PARAM(output_data)); +Stream &Stream::ThenXYSlice(const dnn::BatchDescriptor &dimensions, + const DeviceMemory &input_data, + int64 left_trim, int64 right_trim, int64 top_trim, + int64 bottom_trim, + DeviceMemory *output_data) { + VLOG_CALL(PARAM(dimensions), PARAM(input_data), PARAM(left_trim), + PARAM(right_trim), PARAM(top_trim), PARAM(bottom_trim), + PARAM(output_data)); -💩💩if💩(ok())💩{ -💩💩💩💩if💩(dnn::DnnSupport💩*dnn💩=💩parent_->AsDnn())💩{ -💩💩💩💩💩💩CheckError(dnn->DoXYSlice(this,💩dimensions,💩input_data,💩left_trim, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩right_trim,💩top_trim,💩bottom_trim, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩output_data)); -💩💩💩💩}💩else💩{ -💩💩💩💩💩💩SetError(); -💩💩💩💩💩💩LOG(WARNING) -💩💩💩💩💩💩💩💩💩💩<<💩"attempting💩to💩perform💩DNN💩operation💩using💩StreamExecutor💩" -💩💩💩💩💩💩💩💩💩💩💩💩💩"without💩DNN💩support"; -💩💩💩💩} -💩💩} -💩💩return💩*this; + if (ok()) { + if (dnn::DnnSupport *dnn = parent_->AsDnn()) { + CheckError(dnn->DoXYSlice(this, dimensions, input_data, left_trim, + right_trim, top_trim, bottom_trim, + output_data)); + } else { + SetError(); + LOG(WARNING) + << "attempting to perform DNN operation using StreamExecutor " + "without DNN support"; + } + } + return *this; } -Stream💩&Stream::ThenMemcpyD2HQuantized( -💩💩💩💩const💩DeviceMemory💩&gpu_unquantized_src, -💩💩💩💩dnn::QuantizedActivationMode💩mode,💩void💩*host_dst,💩uint64💩size)💩{ -💩💩VLOG_CALL(PARAM(gpu_unquantized_src),💩PARAM(mode),💩PARAM(host_dst), -💩💩💩💩💩💩💩💩💩💩💩💩PARAM(size)); +Stream &Stream::ThenMemcpyD2HQuantized( + const DeviceMemory &gpu_unquantized_src, + dnn::QuantizedActivationMode mode, void *host_dst, uint64 size) { + VLOG_CALL(PARAM(gpu_unquantized_src), PARAM(mode), PARAM(host_dst), + PARAM(size)); -💩💩if💩(ok())💩{ -💩💩💩💩if💩(dnn::DnnSupport💩*dnn💩=💩parent_->AsDnn())💩{ -💩💩💩💩💩💩CheckError(dnn->DoMemcpyD2HQuantized(this,💩gpu_unquantized_src,💩mode, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩host_dst,💩size)); -💩💩💩💩}💩else💩{ -💩💩💩💩💩💩SetError(); -💩💩💩💩💩💩LOG(WARNING) -💩💩💩💩💩💩💩💩💩💩<<💩"attempting💩to💩perform💩DNN💩operation💩using💩StreamExecutor💩" -💩💩💩💩💩💩💩💩💩💩💩💩💩"without💩DNN💩support"; -💩💩💩💩} -💩💩} -💩💩return💩*this; + if (ok()) { + if (dnn::DnnSupport *dnn = parent_->AsDnn()) { + CheckError(dnn->DoMemcpyD2HQuantized(this, gpu_unquantized_src, mode, + host_dst, size)); + } else { + SetError(); + LOG(WARNING) + << "attempting to perform DNN operation using StreamExecutor " + "without DNN support"; + } + } + return *this; } -Stream💩&Stream::ThenMemcpyH2DQuantized( -💩💩💩💩const💩void💩*host_src,💩uint64💩size,💩dnn::QuantizedActivationMode💩mode, -💩💩💩💩DeviceMemory💩*gpu_unquantized_dst)💩{ -💩💩VLOG_CALL(PARAM(host_src),💩PARAM(size),💩PARAM(mode), -💩💩💩💩💩💩💩💩💩💩💩💩PARAM(gpu_unquantized_dst)); +Stream &Stream::ThenMemcpyH2DQuantized( + const void *host_src, uint64 size, dnn::QuantizedActivationMode mode, + DeviceMemory *gpu_unquantized_dst) { + VLOG_CALL(PARAM(host_src), PARAM(size), PARAM(mode), + PARAM(gpu_unquantized_dst)); -💩💩if💩(ok())💩{ -💩💩💩💩if💩(dnn::DnnSupport💩*dnn💩=💩parent_->AsDnn())💩{ -💩💩💩💩💩💩CheckError(dnn->DoMemcpyH2DQuantized(this,💩host_src,💩size,💩mode, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩gpu_unquantized_dst)); -💩💩💩💩}💩else💩{ -💩💩💩💩💩💩SetError(); -💩💩💩💩💩💩LOG(WARNING) -💩💩💩💩💩💩💩💩💩💩<<💩"attempting💩to💩perform💩DNN💩operation💩using💩StreamExecutor💩" -💩💩💩💩💩💩💩💩💩💩💩💩💩"without💩DNN💩support"; -💩💩💩💩} -💩💩} -💩💩return💩*this; + if (ok()) { + if (dnn::DnnSupport *dnn = parent_->AsDnn()) { + CheckError(dnn->DoMemcpyH2DQuantized(this, host_src, size, mode, + gpu_unquantized_dst)); + } else { + SetError(); + LOG(WARNING) + << "attempting to perform DNN operation using StreamExecutor " + "without DNN support"; + } + } + return *this; } -Stream💩*Stream::GetOrCreateSubStream()💩{ -💩💩mutex_lock💩lock{mu_}; -💩💩for💩(auto💩&stream💩:💩sub_streams_)💩{ -💩💩💩💩if💩(stream.second)💩{ -💩💩💩💩💩💩stream.second💩=💩false; -💩💩💩💩💩💩return💩stream.first.get(); -💩💩💩💩} -💩💩} -💩💩sub_streams_.emplace_back(std::unique_ptr{new💩Stream{parent_}}, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩false); -💩💩Stream💩*sub_stream💩=💩sub_streams_.back().first.get(); -💩💩sub_stream->Init(); -💩💩CHECK(ok_)💩<<💩"sub-stream💩failed💩to💩be💩initialized"; +Stream *Stream::GetOrCreateSubStream() { + mutex_lock lock{mu_}; + for (auto &stream : sub_streams_) { + if (stream.second) { + stream.second = false; + return stream.first.get(); + } + } + sub_streams_.emplace_back(std::unique_ptr{new Stream{parent_}}, + false); + Stream *sub_stream = sub_streams_.back().first.get(); + sub_stream->Init(); + CHECK(ok_) << "sub-stream failed to be initialized"; -💩💩return💩sub_stream; + return sub_stream; } -void💩Stream::ReturnSubStream(Stream💩*sub_stream)💩{ -💩💩mutex_lock💩lock{mu_}; -💩💩for💩(auto💩&stream💩:💩sub_streams_)💩{ -💩💩💩💩if💩(stream.first.get()💩==💩sub_stream)💩{ -💩💩💩💩💩💩stream.second💩=💩true; -💩💩💩💩💩💩return; -💩💩💩💩} -💩💩} -💩💩LOG(FATAL)💩<<💩"the💩sub-stream💩to💩be💩returned💩is💩not💩created💩by💩this💩stream"; +void Stream::ReturnSubStream(Stream *sub_stream) { + mutex_lock lock{mu_}; + for (auto &stream : sub_streams_) { + if (stream.first.get() == sub_stream) { + stream.second = true; + return; + } + } + LOG(FATAL) << "the sub-stream to be returned is not created by this stream"; } -Stream💩&Stream::ThenStartTimer(Timer💩*t)💩{ -💩💩VLOG_CALL(PARAM(t)); +Stream &Stream::ThenStartTimer(Timer *t) { + VLOG_CALL(PARAM(t)); -💩💩if💩(ok())💩{ -💩💩💩💩CheckError(parent_->StartTimer(this,💩t)); -💩💩}💩else💩{ -💩💩💩💩LOG(INFO)💩<<💩"stream💩"💩<<💩this💩<<💩"💩did💩not💩enqueue💩'start💩timer':💩"💩<<💩t; -💩💩} -💩💩return💩*this; + if (ok()) { + CheckError(parent_->StartTimer(this, t)); + } else { + LOG(INFO) << "stream " << this << " did not enqueue 'start timer': " << t; + } + return *this; } -Stream💩&Stream::ThenStopTimer(Timer💩*t)💩{ -💩💩VLOG_CALL(PARAM(t)); +Stream &Stream::ThenStopTimer(Timer *t) { + VLOG_CALL(PARAM(t)); -💩💩if💩(ok())💩{ -💩💩💩💩CheckError(parent_->StopTimer(this,💩t)); -💩💩}💩else💩{ -💩💩💩💩LOG(INFO)💩<<💩"stream💩"💩<<💩this💩<<💩"💩did💩not💩enqueue💩'stop💩timer':💩"💩<<💩t; -💩💩} -💩💩return💩*this; + if (ok()) { + CheckError(parent_->StopTimer(this, t)); + } else { + LOG(INFO) << "stream " << this << " did not enqueue 'stop timer': " << t; + } + return *this; } -Stream💩&Stream::ThenWaitFor(Stream💩*other)💩{ -💩💩VLOG_CALL(PARAM(other)); +Stream &Stream::ThenWaitFor(Stream *other) { + VLOG_CALL(PARAM(other)); -💩💩CHECK(this💩!=💩other)💩<<💩"stream💩cannot💩wait💩for💩itself"; -💩💩if💩(ok()💩&&💩other->ok())💩{ -💩💩💩💩CheckError(parent_->CreateStreamDependency(this,💩other)); -💩💩}💩else💩{ -💩💩💩💩SetError(); -💩💩💩💩LOG(INFO)💩<<💩"stream💩"💩<<💩this💩<<💩"💩did💩not💩wait💩for💩stream:💩"💩<<💩other; -💩💩} -💩💩return💩*this; + CHECK(this != other) << "stream cannot wait for itself"; + if (ok() && other->ok()) { + CheckError(parent_->CreateStreamDependency(this, other)); + } else { + SetError(); + LOG(INFO) << "stream " << this << " did not wait for stream: " << other; + } + return *this; } -Stream💩&Stream::ThenWaitFor(std::vector>💩*others)💩{ -💩💩VLOG_CALL(PARAM(others)); +Stream &Stream::ThenWaitFor(std::vector> *others) { + VLOG_CALL(PARAM(others)); -💩💩for💩(auto💩&stream💩:💩*others)💩{ -💩💩💩💩CHECK_NE(stream.get(),💩this); -💩💩💩💩ThenWaitFor(stream.get()); -💩💩} -💩💩return💩*this; + for (auto &stream : *others) { + CHECK_NE(stream.get(), this); + ThenWaitFor(stream.get()); + } + return *this; } -Stream💩&Stream::ThenWaitFor(Event💩*event)💩{ -💩💩VLOG_CALL(PARAM(event)); +Stream &Stream::ThenWaitFor(Event *event) { + VLOG_CALL(PARAM(event)); -💩💩if💩(ok())💩{ -💩💩💩💩port::Status💩status💩=💩parent_->WaitForEvent(this,💩event); -💩💩💩💩if💩(!status.ok())💩{ -💩💩💩💩💩💩LOG(ERROR)💩<<💩"Error💩waiting💩for💩event💩in💩stream:💩" -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩<<💩status.error_message() -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩<<💩";💩not💩marking💩stream💩as💩bad,💩as💩the💩Event💩object💩may💩be💩" -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩<<💩"at💩fault.💩Monitor💩for💩further💩errors."; -💩💩💩💩} -💩💩}💩else💩{ -💩💩💩💩LOG(INFO)💩<<💩"stream💩"💩<<💩this💩<<💩"💩did💩not💩wait💩for💩an💩event."; -💩💩} -💩💩return💩*this; + if (ok()) { + port::Status status = parent_->WaitForEvent(this, event); + if (!status.ok()) { + LOG(ERROR) << "Error waiting for event in stream: " + << status.error_message() + << "; not marking stream as bad, as the Event object may be " + << "at fault. Monitor for further errors."; + } + } else { + LOG(INFO) << "stream " << this << " did not wait for an event."; + } + return *this; } -//💩A💩functor💩that💩implements💩ThenBlasXXX💩interfaces,💩which💩calls💩DoBlasXXX -//💩functions💩and💩logs💩for💩errors. -template💩 -struct💩ThenBlasImpl💩{ -💩💩//💩blas_func💩is💩the💩DoBlasXXX💩member💩function💩pointer,💩and💩args💩are💩its -💩💩//💩arguments💩except💩the💩first💩one💩of💩Stream*💩type. -💩💩Stream💩&operator()(Stream💩*stream, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩bool💩(blas::BlasSupport::*blas_func)(Stream💩*,💩Args...), -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩Args...💩args); +// A functor that implements ThenBlasXXX interfaces, which calls DoBlasXXX +// functions and logs for errors. +template +struct ThenBlasImpl { + // blas_func is the DoBlasXXX member function pointer, and args are its + // arguments except the first one of Stream* type. + Stream &operator()(Stream *stream, + bool (blas::BlasSupport::*blas_func)(Stream *, Args...), + Args... args); }; -template💩 -Stream💩&ThenBlasImpl::operator()( -💩💩💩💩Stream💩*stream,💩bool💩(blas::BlasSupport::*blas_func)(Stream💩*,💩Args...), -💩💩💩💩Args...💩args)💩{ -💩💩if💩(stream->ok())💩{ -💩💩💩💩if💩(blas::BlasSupport💩*blas💩=💩stream->parent_->AsBlas())💩{ -💩💩💩💩💩💩stream->CheckError((blas->*blas_func)(stream,💩args...)); -💩💩💩💩}💩else💩{ -💩💩💩💩💩💩stream->CheckError(false); -💩💩💩💩💩💩LOG(WARNING) -💩💩💩💩💩💩💩💩💩💩<<💩"attempting💩to💩perform💩BLAS💩operation💩using💩StreamExecutor💩" -💩💩💩💩💩💩💩💩💩💩💩💩💩"without💩BLAS💩support"; -💩💩💩💩} -💩💩} -💩💩return💩*stream; -} - -Stream💩&Stream::ThenBlasAsum(uint64💩elem_count,💩const💩DeviceMemory💩&x, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩int💩incx,💩DeviceMemory💩*result)💩{ -💩💩VLOG_CALL(PARAM(elem_count),💩PARAM(x),💩PARAM(incx),💩PARAM(result)); - -💩💩ThenBlasImpl💩&,💩int,💩DeviceMemory💩*> -💩💩💩💩💩💩impl; -💩💩return💩impl(this,💩&blas::BlasSupport::DoBlasAsum,💩elem_count,💩x,💩incx, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩result); -} - -Stream💩&Stream::ThenBlasAsum(uint64💩elem_count,💩const💩DeviceMemory💩&x, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩int💩incx,💩DeviceMemory💩*result)💩{ -💩💩VLOG_CALL(PARAM(elem_count),💩PARAM(x),💩PARAM(incx),💩PARAM(result)); +template +Stream &ThenBlasImpl::operator()( + Stream *stream, bool (blas::BlasSupport::*blas_func)(Stream *, Args...), + Args... args) { + if (stream->ok()) { + if (blas::BlasSupport *blas = stream->parent_->AsBlas()) { + stream->CheckError((blas->*blas_func)(stream, args...)); + } else { + stream->CheckError(false); + LOG(WARNING) + << "attempting to perform BLAS operation using StreamExecutor " + "without BLAS support"; + } + } + return *stream; +} + +Stream &Stream::ThenBlasAsum(uint64 elem_count, const DeviceMemory &x, + int incx, DeviceMemory *result) { + VLOG_CALL(PARAM(elem_count), PARAM(x), PARAM(incx), PARAM(result)); + + ThenBlasImpl &, int, DeviceMemory *> + impl; + return impl(this, &blas::BlasSupport::DoBlasAsum, elem_count, x, incx, + result); +} + +Stream &Stream::ThenBlasAsum(uint64 elem_count, const DeviceMemory &x, + int incx, DeviceMemory *result) { + VLOG_CALL(PARAM(elem_count), PARAM(x), PARAM(incx), PARAM(result)); -💩💩ThenBlasImpl💩&,💩int, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩DeviceMemory💩*>💩impl; -💩💩return💩impl(this,💩&blas::BlasSupport::DoBlasAsum,💩elem_count,💩x,💩incx, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩result); -} - -Stream💩&Stream::ThenBlasAsum(uint64💩elem_count, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩const💩DeviceMemory>💩&x, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩int💩incx,💩DeviceMemory💩*result)💩{ -💩💩VLOG_CALL(PARAM(elem_count),💩PARAM(x),💩PARAM(incx),💩PARAM(result)); - -💩💩ThenBlasImpl>💩&,💩int, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩DeviceMemory💩*>💩impl; -💩💩return💩impl(this,💩&blas::BlasSupport::DoBlasAsum,💩elem_count,💩x,💩incx, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩result); -} - -Stream💩&Stream::ThenBlasAsum(uint64💩elem_count, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩const💩DeviceMemory>💩&x, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩int💩incx,💩DeviceMemory💩*result)💩{ -💩💩VLOG_CALL(PARAM(elem_count),💩PARAM(x),💩PARAM(incx),💩PARAM(result)); - -💩💩ThenBlasImpl>💩&,💩int, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩DeviceMemory💩*>💩impl; -💩💩return💩impl(this,💩&blas::BlasSupport::DoBlasAsum,💩elem_count,💩x,💩incx, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩result); -} - -Stream💩&Stream::ThenBlasAxpy(uint64💩elem_count,💩float💩alpha, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩const💩DeviceMemory💩&x,💩int💩incx, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩DeviceMemory💩*y,💩int💩incy)💩{ -💩💩VLOG_CALL(PARAM(elem_count),💩PARAM(alpha),💩PARAM(x),💩PARAM(incx),💩PARAM(y), -💩💩💩💩💩💩💩💩💩💩💩💩PARAM(incy)); - -💩💩ThenBlasImpl💩&,💩int, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩DeviceMemory💩*,💩int>💩impl; -💩💩return💩impl(this,💩&blas::BlasSupport::DoBlasAxpy,💩elem_count,💩alpha,💩x,💩incx, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩y,💩incy); -} - -Stream💩&Stream::ThenBlasAxpy(uint64💩elem_count,💩double💩alpha, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩const💩DeviceMemory💩&x,💩int💩incx, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩DeviceMemory💩*y,💩int💩incy)💩{ -💩💩VLOG_CALL(PARAM(elem_count),💩PARAM(alpha),💩PARAM(x),💩PARAM(incx),💩PARAM(y), -💩💩💩💩💩💩💩💩💩💩💩💩PARAM(incy)); - -💩💩ThenBlasImpl💩&,💩int, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩DeviceMemory💩*,💩int>💩impl; -💩💩return💩impl(this,💩&blas::BlasSupport::DoBlasAxpy,💩elem_count,💩alpha,💩x,💩incx, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩y,💩incy); -} - -Stream💩&Stream::ThenBlasAxpy(uint64💩elem_count,💩std::complex💩alpha, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩const💩DeviceMemory>💩&x, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩int💩incx,💩DeviceMemory>💩*y, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩int💩incy)💩{ -💩💩VLOG_CALL(PARAM(elem_count),💩PARAM(alpha),💩PARAM(x),💩PARAM(incx),💩PARAM(y), -💩💩💩💩💩💩💩💩💩💩💩💩PARAM(incy)); - -💩💩ThenBlasImpl, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩const💩DeviceMemory>💩&,💩int, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩DeviceMemory>💩*,💩int>💩impl; -💩💩return💩impl(this,💩&blas::BlasSupport::DoBlasAxpy,💩elem_count,💩alpha,💩x,💩incx, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩y,💩incy); -} - -Stream💩&Stream::ThenBlasAxpy(uint64💩elem_count,💩std::complex💩alpha, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩const💩DeviceMemory>💩&x, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩int💩incx,💩DeviceMemory>💩*y, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩int💩incy)💩{ -💩💩VLOG_CALL(PARAM(elem_count),💩PARAM(alpha),💩PARAM(x),💩PARAM(incx),💩PARAM(y), -💩💩💩💩💩💩💩💩💩💩💩💩PARAM(incy)); - -💩💩ThenBlasImpl, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩const💩DeviceMemory>💩&,💩int, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩DeviceMemory>💩*,💩int>💩impl; -💩💩return💩impl(this,💩&blas::BlasSupport::DoBlasAxpy,💩elem_count,💩alpha,💩x,💩incx, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩y,💩incy); -} + ThenBlasImpl &, int, + DeviceMemory *> impl; + return impl(this, &blas::BlasSupport::DoBlasAsum, elem_count, x, incx, + result); +} + +Stream &Stream::ThenBlasAsum(uint64 elem_count, + const DeviceMemory> &x, + int incx, DeviceMemory *result) { + VLOG_CALL(PARAM(elem_count), PARAM(x), PARAM(incx), PARAM(result)); + + ThenBlasImpl> &, int, + DeviceMemory *> impl; + return impl(this, &blas::BlasSupport::DoBlasAsum, elem_count, x, incx, + result); +} + +Stream &Stream::ThenBlasAsum(uint64 elem_count, + const DeviceMemory> &x, + int incx, DeviceMemory *result) { + VLOG_CALL(PARAM(elem_count), PARAM(x), PARAM(incx), PARAM(result)); + + ThenBlasImpl> &, int, + DeviceMemory *> impl; + return impl(this, &blas::BlasSupport::DoBlasAsum, elem_count, x, incx, + result); +} + +Stream &Stream::ThenBlasAxpy(uint64 elem_count, float alpha, + const DeviceMemory &x, int incx, + DeviceMemory *y, int incy) { + VLOG_CALL(PARAM(elem_count), PARAM(alpha), PARAM(x), PARAM(incx), PARAM(y), + PARAM(incy)); + + ThenBlasImpl &, int, + DeviceMemory *, int> impl; + return impl(this, &blas::BlasSupport::DoBlasAxpy, elem_count, alpha, x, incx, + y, incy); +} + +Stream &Stream::ThenBlasAxpy(uint64 elem_count, double alpha, + const DeviceMemory &x, int incx, + DeviceMemory *y, int incy) { + VLOG_CALL(PARAM(elem_count), PARAM(alpha), PARAM(x), PARAM(incx), PARAM(y), + PARAM(incy)); + + ThenBlasImpl &, int, + DeviceMemory *, int> impl; + return impl(this, &blas::BlasSupport::DoBlasAxpy, elem_count, alpha, x, incx, + y, incy); +} + +Stream &Stream::ThenBlasAxpy(uint64 elem_count, std::complex alpha, + const DeviceMemory> &x, + int incx, DeviceMemory> *y, + int incy) { + VLOG_CALL(PARAM(elem_count), PARAM(alpha), PARAM(x), PARAM(incx), PARAM(y), + PARAM(incy)); + + ThenBlasImpl, + const DeviceMemory> &, int, + DeviceMemory> *, int> impl; + return impl(this, &blas::BlasSupport::DoBlasAxpy, elem_count, alpha, x, incx, + y, incy); +} + +Stream &Stream::ThenBlasAxpy(uint64 elem_count, std::complex alpha, + const DeviceMemory> &x, + int incx, DeviceMemory> *y, + int incy) { + VLOG_CALL(PARAM(elem_count), PARAM(alpha), PARAM(x), PARAM(incx), PARAM(y), + PARAM(incy)); + + ThenBlasImpl, + const DeviceMemory> &, int, + DeviceMemory> *, int> impl; + return impl(this, &blas::BlasSupport::DoBlasAxpy, elem_count, alpha, x, incx, + y, incy); +} -Stream💩&Stream::ThenBlasCopy(uint64💩elem_count,💩const💩DeviceMemory💩&x, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩int💩incx,💩DeviceMemory💩*y,💩int💩incy)💩{ -💩💩VLOG_CALL(PARAM(elem_count),💩PARAM(x),💩PARAM(incx),💩PARAM(y),💩PARAM(incy)); +Stream &Stream::ThenBlasCopy(uint64 elem_count, const DeviceMemory &x, + int incx, DeviceMemory *y, int incy) { + VLOG_CALL(PARAM(elem_count), PARAM(x), PARAM(incx), PARAM(y), PARAM(incy)); -💩💩ThenBlasImpl💩&,💩int,💩DeviceMemory💩*, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩int>💩impl; -💩💩return💩impl(this,💩&blas::BlasSupport::DoBlasCopy,💩elem_count,💩x,💩incx,💩y, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩incy); -} - -Stream💩&Stream::ThenBlasCopy(uint64💩elem_count,💩const💩DeviceMemory💩&x, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩int💩incx,💩DeviceMemory💩*y,💩int💩incy)💩{ -💩💩VLOG_CALL(PARAM(elem_count),💩PARAM(x),💩PARAM(incx),💩PARAM(y),💩PARAM(incy)); - -💩💩ThenBlasImpl💩&,💩int, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩DeviceMemory💩*,💩int>💩impl; -💩💩return💩impl(this,💩&blas::BlasSupport::DoBlasCopy,💩elem_count,💩x,💩incx,💩y, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩incy); -} - -Stream💩&Stream::ThenBlasCopy(uint64💩elem_count, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩const💩DeviceMemory>💩&x, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩int💩incx,💩DeviceMemory>💩*y, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩int💩incy)💩{ -💩💩VLOG_CALL(PARAM(elem_count),💩PARAM(x),💩PARAM(incx),💩PARAM(y),💩PARAM(incy)); - -💩💩ThenBlasImpl>💩&,💩int, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩DeviceMemory>💩*,💩int>💩impl; -💩💩return💩impl(this,💩&blas::BlasSupport::DoBlasCopy,💩elem_count,💩x,💩incx,💩y, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩incy); -} - -Stream💩&Stream::ThenBlasCopy(uint64💩elem_count, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩const💩DeviceMemory>💩&x, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩int💩incx,💩DeviceMemory>💩*y, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩int💩incy)💩{ -💩💩VLOG_CALL(PARAM(elem_count),💩PARAM(x),💩PARAM(incx),💩PARAM(y),💩PARAM(incy)); - -💩💩ThenBlasImpl>💩&,💩int, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩DeviceMemory>💩*,💩int>💩impl; -💩💩return💩impl(this,💩&blas::BlasSupport::DoBlasCopy,💩elem_count,💩x,💩incx,💩y, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩incy); -} - -Stream💩&Stream::ThenBlasDot(uint64💩elem_count,💩const💩DeviceMemory💩&x, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩int💩incx,💩const💩DeviceMemory💩&y,💩int💩incy, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩DeviceMemory💩*result)💩{ -💩💩VLOG_CALL(PARAM(elem_count),💩PARAM(x),💩PARAM(incx),💩PARAM(y),💩PARAM(incy), -💩💩💩💩💩💩💩💩💩💩💩💩PARAM(result)); - -💩💩ThenBlasImpl💩&,💩int, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩const💩DeviceMemory💩&,💩int,💩DeviceMemory💩*>💩impl; -💩💩return💩impl(this,💩&blas::BlasSupport::DoBlasDot,💩elem_count,💩x,💩incx,💩y,💩incy, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩result); -} - -Stream💩&Stream::ThenBlasDot(uint64💩elem_count,💩const💩DeviceMemory💩&x, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩int💩incx,💩const💩DeviceMemory💩&y,💩int💩incy, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩DeviceMemory💩*result)💩{ -💩💩VLOG_CALL(PARAM(elem_count),💩PARAM(x),💩PARAM(incx),💩PARAM(y),💩PARAM(incy), -💩💩💩💩💩💩💩💩💩💩💩💩PARAM(result)); - -💩💩ThenBlasImpl💩&,💩int, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩const💩DeviceMemory💩&,💩int,💩DeviceMemory💩*>💩impl; -💩💩return💩impl(this,💩&blas::BlasSupport::DoBlasDot,💩elem_count,💩x,💩incx,💩y,💩incy, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩result); -} - -Stream💩&Stream::ThenBlasDotc(uint64💩elem_count, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩const💩DeviceMemory>💩&x, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩int💩incx, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩const💩DeviceMemory>💩&y, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩int💩incy, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩DeviceMemory>💩*result)💩{ -💩💩VLOG_CALL(PARAM(elem_count),💩PARAM(x),💩PARAM(incx),💩PARAM(y),💩PARAM(incy), -💩💩💩💩💩💩💩💩💩💩💩💩PARAM(result)); - -💩💩ThenBlasImpl>💩&,💩int, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩const💩DeviceMemory>💩&,💩int, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩DeviceMemory>💩*>💩impl; -💩💩return💩impl(this,💩&blas::BlasSupport::DoBlasDotc,💩elem_count,💩x,💩incx,💩y, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩incy,💩result); -} - -Stream💩&Stream::ThenBlasDotc(uint64💩elem_count, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩const💩DeviceMemory>💩&x, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩int💩incx, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩const💩DeviceMemory>💩&y, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩int💩incy, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩DeviceMemory>💩*result)💩{ -💩💩VLOG_CALL(PARAM(elem_count),💩PARAM(x),💩PARAM(incx),💩PARAM(y),💩PARAM(incy), -💩💩💩💩💩💩💩💩💩💩💩💩PARAM(result)); - -💩💩ThenBlasImpl>💩&,💩int, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩const💩DeviceMemory>💩&,💩int, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩DeviceMemory>💩*>💩impl; -💩💩return💩impl(this,💩&blas::BlasSupport::DoBlasDotc,💩elem_count,💩x,💩incx,💩y, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩incy,💩result); -} - -Stream💩&Stream::ThenBlasDotu(uint64💩elem_count, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩const💩DeviceMemory>💩&x, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩int💩incx, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩const💩DeviceMemory>💩&y, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩int💩incy, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩DeviceMemory>💩*result)💩{ -💩💩VLOG_CALL(PARAM(elem_count),💩PARAM(x),💩PARAM(incx),💩PARAM(y),💩PARAM(incy), -💩💩💩💩💩💩💩💩💩💩💩💩PARAM(result)); - -💩💩ThenBlasImpl>💩&,💩int, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩const💩DeviceMemory>💩&,💩int, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩DeviceMemory>💩*>💩impl; -💩💩return💩impl(this,💩&blas::BlasSupport::DoBlasDotu,💩elem_count,💩x,💩incx,💩y, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩incy,💩result); -} - -Stream💩&Stream::ThenBlasDotu(uint64💩elem_count, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩const💩DeviceMemory>💩&x, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩int💩incx, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩const💩DeviceMemory>💩&y, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩int💩incy, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩DeviceMemory>💩*result)💩{ -💩💩VLOG_CALL(PARAM(elem_count),💩PARAM(x),💩PARAM(incx),💩PARAM(y),💩PARAM(incy), -💩💩💩💩💩💩💩💩💩💩💩💩PARAM(result)); - -💩💩ThenBlasImpl>💩&,💩int, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩const💩DeviceMemory>💩&,💩int, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩DeviceMemory>💩*>💩impl; -💩💩return💩impl(this,💩&blas::BlasSupport::DoBlasDotu,💩elem_count,💩x,💩incx,💩y, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩incy,💩result); -} - -Stream💩&Stream::ThenBlasNrm2(uint64💩elem_count,💩const💩DeviceMemory💩&x, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩int💩incx,💩DeviceMemory💩*result)💩{ -💩💩VLOG_CALL(PARAM(elem_count),💩PARAM(x),💩PARAM(incx),💩PARAM(result)); - -💩💩ThenBlasImpl💩&,💩int,💩DeviceMemory💩*> -💩💩💩💩💩💩impl; -💩💩return💩impl(this,💩&blas::BlasSupport::DoBlasNrm2,💩elem_count,💩x,💩incx, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩result); + ThenBlasImpl &, int, DeviceMemory *, + int> impl; + return impl(this, &blas::BlasSupport::DoBlasCopy, elem_count, x, incx, y, + incy); +} + +Stream &Stream::ThenBlasCopy(uint64 elem_count, const DeviceMemory &x, + int incx, DeviceMemory *y, int incy) { + VLOG_CALL(PARAM(elem_count), PARAM(x), PARAM(incx), PARAM(y), PARAM(incy)); + + ThenBlasImpl &, int, + DeviceMemory *, int> impl; + return impl(this, &blas::BlasSupport::DoBlasCopy, elem_count, x, incx, y, + incy); +} + +Stream &Stream::ThenBlasCopy(uint64 elem_count, + const DeviceMemory> &x, + int incx, DeviceMemory> *y, + int incy) { + VLOG_CALL(PARAM(elem_count), PARAM(x), PARAM(incx), PARAM(y), PARAM(incy)); + + ThenBlasImpl> &, int, + DeviceMemory> *, int> impl; + return impl(this, &blas::BlasSupport::DoBlasCopy, elem_count, x, incx, y, + incy); +} + +Stream &Stream::ThenBlasCopy(uint64 elem_count, + const DeviceMemory> &x, + int incx, DeviceMemory> *y, + int incy) { + VLOG_CALL(PARAM(elem_count), PARAM(x), PARAM(incx), PARAM(y), PARAM(incy)); + + ThenBlasImpl> &, int, + DeviceMemory> *, int> impl; + return impl(this, &blas::BlasSupport::DoBlasCopy, elem_count, x, incx, y, + incy); +} + +Stream &Stream::ThenBlasDot(uint64 elem_count, const DeviceMemory &x, + int incx, const DeviceMemory &y, int incy, + DeviceMemory *result) { + VLOG_CALL(PARAM(elem_count), PARAM(x), PARAM(incx), PARAM(y), PARAM(incy), + PARAM(result)); + + ThenBlasImpl &, int, + const DeviceMemory &, int, DeviceMemory *> impl; + return impl(this, &blas::BlasSupport::DoBlasDot, elem_count, x, incx, y, incy, + result); +} + +Stream &Stream::ThenBlasDot(uint64 elem_count, const DeviceMemory &x, + int incx, const DeviceMemory &y, int incy, + DeviceMemory *result) { + VLOG_CALL(PARAM(elem_count), PARAM(x), PARAM(incx), PARAM(y), PARAM(incy), + PARAM(result)); + + ThenBlasImpl &, int, + const DeviceMemory &, int, DeviceMemory *> impl; + return impl(this, &blas::BlasSupport::DoBlasDot, elem_count, x, incx, y, incy, + result); +} + +Stream &Stream::ThenBlasDotc(uint64 elem_count, + const DeviceMemory> &x, + int incx, + const DeviceMemory> &y, + int incy, + DeviceMemory> *result) { + VLOG_CALL(PARAM(elem_count), PARAM(x), PARAM(incx), PARAM(y), PARAM(incy), + PARAM(result)); + + ThenBlasImpl> &, int, + const DeviceMemory> &, int, + DeviceMemory> *> impl; + return impl(this, &blas::BlasSupport::DoBlasDotc, elem_count, x, incx, y, + incy, result); +} + +Stream &Stream::ThenBlasDotc(uint64 elem_count, + const DeviceMemory> &x, + int incx, + const DeviceMemory> &y, + int incy, + DeviceMemory> *result) { + VLOG_CALL(PARAM(elem_count), PARAM(x), PARAM(incx), PARAM(y), PARAM(incy), + PARAM(result)); + + ThenBlasImpl> &, int, + const DeviceMemory> &, int, + DeviceMemory> *> impl; + return impl(this, &blas::BlasSupport::DoBlasDotc, elem_count, x, incx, y, + incy, result); +} + +Stream &Stream::ThenBlasDotu(uint64 elem_count, + const DeviceMemory> &x, + int incx, + const DeviceMemory> &y, + int incy, + DeviceMemory> *result) { + VLOG_CALL(PARAM(elem_count), PARAM(x), PARAM(incx), PARAM(y), PARAM(incy), + PARAM(result)); + + ThenBlasImpl> &, int, + const DeviceMemory> &, int, + DeviceMemory> *> impl; + return impl(this, &blas::BlasSupport::DoBlasDotu, elem_count, x, incx, y, + incy, result); +} + +Stream &Stream::ThenBlasDotu(uint64 elem_count, + const DeviceMemory> &x, + int incx, + const DeviceMemory> &y, + int incy, + DeviceMemory> *result) { + VLOG_CALL(PARAM(elem_count), PARAM(x), PARAM(incx), PARAM(y), PARAM(incy), + PARAM(result)); + + ThenBlasImpl> &, int, + const DeviceMemory> &, int, + DeviceMemory> *> impl; + return impl(this, &blas::BlasSupport::DoBlasDotu, elem_count, x, incx, y, + incy, result); +} + +Stream &Stream::ThenBlasNrm2(uint64 elem_count, const DeviceMemory &x, + int incx, DeviceMemory *result) { + VLOG_CALL(PARAM(elem_count), PARAM(x), PARAM(incx), PARAM(result)); + + ThenBlasImpl &, int, DeviceMemory *> + impl; + return impl(this, &blas::BlasSupport::DoBlasNrm2, elem_count, x, incx, + result); } -Stream💩&Stream::ThenBlasNrm2(uint64💩elem_count,💩const💩DeviceMemory💩&x, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩int💩incx,💩DeviceMemory💩*result)💩{ -💩💩VLOG_CALL(PARAM(elem_count),💩PARAM(x),💩PARAM(incx),💩PARAM(result)); - -💩💩ThenBlasImpl💩&,💩int, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩DeviceMemory💩*>💩impl; -💩💩return💩impl(this,💩&blas::BlasSupport::DoBlasNrm2,💩elem_count,💩x,💩incx, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩result); -} - -Stream💩&Stream::ThenBlasNrm2(uint64💩elem_count, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩const💩DeviceMemory>💩&x, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩int💩incx,💩DeviceMemory💩*result)💩{ -💩💩VLOG_CALL(PARAM(elem_count),💩PARAM(x),💩PARAM(incx),💩PARAM(result)); - -💩💩ThenBlasImpl>💩&,💩int, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩DeviceMemory💩*>💩impl; -💩💩return💩impl(this,💩&blas::BlasSupport::DoBlasNrm2,💩elem_count,💩x,💩incx, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩result); -} - -Stream💩&Stream::ThenBlasNrm2(uint64💩elem_count, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩const💩DeviceMemory>💩&x, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩int💩incx,💩DeviceMemory💩*result)💩{ -💩💩VLOG_CALL(PARAM(elem_count),💩PARAM(x),💩PARAM(incx),💩PARAM(result)); - -💩💩ThenBlasImpl>💩&,💩int, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩DeviceMemory💩*>💩impl; -💩💩return💩impl(this,💩&blas::BlasSupport::DoBlasNrm2,💩elem_count,💩x,💩incx, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩result); -} - -Stream💩&Stream::ThenBlasRot(uint64💩elem_count,💩DeviceMemory💩*x,💩int💩incx, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩DeviceMemory💩*y,💩int💩incy,💩float💩c, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩float💩s)💩{ -💩💩VLOG_CALL(PARAM(elem_count),💩PARAM(x),💩PARAM(incx),💩PARAM(y),💩PARAM(incy), -💩💩💩💩💩💩💩💩💩💩💩💩PARAM(c),💩PARAM(s)); +Stream &Stream::ThenBlasNrm2(uint64 elem_count, const DeviceMemory &x, + int incx, DeviceMemory *result) { + VLOG_CALL(PARAM(elem_count), PARAM(x), PARAM(incx), PARAM(result)); + + ThenBlasImpl &, int, + DeviceMemory *> impl; + return impl(this, &blas::BlasSupport::DoBlasNrm2, elem_count, x, incx, + result); +} + +Stream &Stream::ThenBlasNrm2(uint64 elem_count, + const DeviceMemory> &x, + int incx, DeviceMemory *result) { + VLOG_CALL(PARAM(elem_count), PARAM(x), PARAM(incx), PARAM(result)); + + ThenBlasImpl> &, int, + DeviceMemory *> impl; + return impl(this, &blas::BlasSupport::DoBlasNrm2, elem_count, x, incx, + result); +} + +Stream &Stream::ThenBlasNrm2(uint64 elem_count, + const DeviceMemory> &x, + int incx, DeviceMemory *result) { + VLOG_CALL(PARAM(elem_count), PARAM(x), PARAM(incx), PARAM(result)); + + ThenBlasImpl> &, int, + DeviceMemory *> impl; + return impl(this, &blas::BlasSupport::DoBlasNrm2, elem_count, x, incx, + result); +} + +Stream &Stream::ThenBlasRot(uint64 elem_count, DeviceMemory *x, int incx, + DeviceMemory *y, int incy, float c, + float s) { + VLOG_CALL(PARAM(elem_count), PARAM(x), PARAM(incx), PARAM(y), PARAM(incy), + PARAM(c), PARAM(s)); -💩💩ThenBlasImpl💩*,💩int,💩DeviceMemory💩*,💩int, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩float,💩float>💩impl; -💩💩return💩impl(this,💩&blas::BlasSupport::DoBlasRot,💩elem_count,💩x,💩incx,💩y,💩incy, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩c,💩s); -} - -Stream💩&Stream::ThenBlasRot(uint64💩elem_count,💩DeviceMemory💩*x, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩int💩incx,💩DeviceMemory💩*y,💩int💩incy, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩double💩c,💩double💩s)💩{ -💩💩VLOG_CALL(PARAM(elem_count),💩PARAM(x),💩PARAM(incx),💩PARAM(y),💩PARAM(incy), -💩💩💩💩💩💩💩💩💩💩💩💩PARAM(c),💩PARAM(s)); + ThenBlasImpl *, int, DeviceMemory *, int, + float, float> impl; + return impl(this, &blas::BlasSupport::DoBlasRot, elem_count, x, incx, y, incy, + c, s); +} + +Stream &Stream::ThenBlasRot(uint64 elem_count, DeviceMemory *x, + int incx, DeviceMemory *y, int incy, + double c, double s) { + VLOG_CALL(PARAM(elem_count), PARAM(x), PARAM(incx), PARAM(y), PARAM(incy), + PARAM(c), PARAM(s)); -💩💩ThenBlasImpl💩*,💩int,💩DeviceMemory💩*,💩int, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩double,💩double>💩impl; -💩💩return💩impl(this,💩&blas::BlasSupport::DoBlasRot,💩elem_count,💩x,💩incx,💩y,💩incy, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩c,💩s); + ThenBlasImpl *, int, DeviceMemory *, int, + double, double> impl; + return impl(this, &blas::BlasSupport::DoBlasRot, elem_count, x, incx, y, incy, + c, s); } -Stream💩&Stream::ThenBlasRot(uint64💩elem_count, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩DeviceMemory>💩*x,💩int💩incx, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩DeviceMemory>💩*y,💩int💩incy, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩float💩c,💩float💩s)💩{ -💩💩VLOG_CALL(PARAM(elem_count),💩PARAM(x),💩PARAM(incx),💩PARAM(y),💩PARAM(incy), -💩💩💩💩💩💩💩💩💩💩💩💩PARAM(c),💩PARAM(s)); +Stream &Stream::ThenBlasRot(uint64 elem_count, + DeviceMemory> *x, int incx, + DeviceMemory> *y, int incy, + float c, float s) { + VLOG_CALL(PARAM(elem_count), PARAM(x), PARAM(incx), PARAM(y), PARAM(incy), + PARAM(c), PARAM(s)); -💩💩ThenBlasImpl>💩*,💩int, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩DeviceMemory>💩*,💩int,💩float,💩float>💩impl; -💩💩return💩impl(this,💩&blas::BlasSupport::DoBlasRot,💩elem_count,💩x,💩incx,💩y,💩incy, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩c,💩s); + ThenBlasImpl> *, int, + DeviceMemory> *, int, float, float> impl; + return impl(this, &blas::BlasSupport::DoBlasRot, elem_count, x, incx, y, incy, + c, s); } -Stream💩&Stream::ThenBlasRot(uint64💩elem_count, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩DeviceMemory>💩*x,💩int💩incx, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩DeviceMemory>💩*y,💩int💩incy, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩double💩c,💩double💩s)💩{ -💩💩VLOG_CALL(PARAM(elem_count),💩PARAM(x),💩PARAM(incx),💩PARAM(y),💩PARAM(incy), -💩💩💩💩💩💩💩💩💩💩💩💩PARAM(c),💩PARAM(s)); +Stream &Stream::ThenBlasRot(uint64 elem_count, + DeviceMemory> *x, int incx, + DeviceMemory> *y, int incy, + double c, double s) { + VLOG_CALL(PARAM(elem_count), PARAM(x), PARAM(incx), PARAM(y), PARAM(incy), + PARAM(c), PARAM(s)); -💩💩ThenBlasImpl>💩*,💩int, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩DeviceMemory>💩*,💩int,💩double,💩double>💩impl; -💩💩return💩impl(this,💩&blas::BlasSupport::DoBlasRot,💩elem_count,💩x,💩incx,💩y,💩incy, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩c,💩s); + ThenBlasImpl> *, int, + DeviceMemory> *, int, double, double> impl; + return impl(this, &blas::BlasSupport::DoBlasRot, elem_count, x, incx, y, incy, + c, s); } -Stream💩&Stream::ThenBlasRotg(DeviceMemory💩*a,💩DeviceMemory💩*b, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩DeviceMemory💩*c,💩DeviceMemory💩*s)💩{ -💩💩VLOG_CALL(PARAM(a),💩PARAM(b),💩PARAM(c),💩PARAM(s)); +Stream &Stream::ThenBlasRotg(DeviceMemory *a, DeviceMemory *b, + DeviceMemory *c, DeviceMemory *s) { + VLOG_CALL(PARAM(a), PARAM(b), PARAM(c), PARAM(s)); -💩💩ThenBlasImpl💩*,💩DeviceMemory💩*, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩DeviceMemory💩*,💩DeviceMemory💩*>💩impl; -💩💩return💩impl(this,💩&blas::BlasSupport::DoBlasRotg,💩a,💩b,💩c,💩s); + ThenBlasImpl *, DeviceMemory *, + DeviceMemory *, DeviceMemory *> impl; + return impl(this, &blas::BlasSupport::DoBlasRotg, a, b, c, s); } -Stream💩&Stream::ThenBlasRotg(DeviceMemory💩*a,💩DeviceMemory💩*b, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩DeviceMemory💩*c,💩DeviceMemory💩*s)💩{ -💩💩VLOG_CALL(PARAM(a),💩PARAM(b),💩PARAM(c),💩PARAM(s)); +Stream &Stream::ThenBlasRotg(DeviceMemory *a, DeviceMemory *b, + DeviceMemory *c, DeviceMemory *s) { + VLOG_CALL(PARAM(a), PARAM(b), PARAM(c), PARAM(s)); -💩💩ThenBlasImpl💩*,💩DeviceMemory💩*, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩DeviceMemory💩*,💩DeviceMemory💩*>💩impl; -💩💩return💩impl(this,💩&blas::BlasSupport::DoBlasRotg,💩a,💩b,💩c,💩s); + ThenBlasImpl *, DeviceMemory *, + DeviceMemory *, DeviceMemory *> impl; + return impl(this, &blas::BlasSupport::DoBlasRotg, a, b, c, s); } -Stream💩&Stream::ThenBlasRotg(DeviceMemory>💩*a, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩DeviceMemory>💩*b, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩DeviceMemory💩*c, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩DeviceMemory>💩*s)💩{ -💩💩VLOG_CALL(PARAM(a),💩PARAM(b),💩PARAM(c),💩PARAM(s)); +Stream &Stream::ThenBlasRotg(DeviceMemory> *a, + DeviceMemory> *b, + DeviceMemory *c, + DeviceMemory> *s) { + VLOG_CALL(PARAM(a), PARAM(b), PARAM(c), PARAM(s)); -💩💩ThenBlasImpl>💩*, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩DeviceMemory>💩*,💩DeviceMemory💩*, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩DeviceMemory>💩*>💩impl; -💩💩return💩impl(this,💩&blas::BlasSupport::DoBlasRotg,💩a,💩b,💩c,💩s); + ThenBlasImpl> *, + DeviceMemory> *, DeviceMemory *, + DeviceMemory> *> impl; + return impl(this, &blas::BlasSupport::DoBlasRotg, a, b, c, s); } -Stream💩&Stream::ThenBlasRotg(DeviceMemory>💩*a, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩DeviceMemory>💩*b, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩DeviceMemory💩*c, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩DeviceMemory>💩*s)💩{ -💩💩VLOG_CALL(PARAM(a),💩PARAM(b),💩PARAM(c),💩PARAM(s)); +Stream &Stream::ThenBlasRotg(DeviceMemory> *a, + DeviceMemory> *b, + DeviceMemory *c, + DeviceMemory> *s) { + VLOG_CALL(PARAM(a), PARAM(b), PARAM(c), PARAM(s)); -💩💩ThenBlasImpl>💩*, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩DeviceMemory>💩*,💩DeviceMemory💩*, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩DeviceMemory>💩*>💩impl; -💩💩return💩impl(this,💩&blas::BlasSupport::DoBlasRotg,💩a,💩b,💩c,💩s); + ThenBlasImpl> *, + DeviceMemory> *, DeviceMemory *, + DeviceMemory> *> impl; + return impl(this, &blas::BlasSupport::DoBlasRotg, a, b, c, s); } -Stream💩&Stream::ThenBlasRotm(uint64💩elem_count,💩DeviceMemory💩*x, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩int💩incx,💩DeviceMemory💩*y,💩int💩incy, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩const💩DeviceMemory💩¶m)💩{ -💩💩VLOG_CALL(PARAM(elem_count),💩PARAM(x),💩PARAM(incx),💩PARAM(y),💩PARAM(incy), -💩💩💩💩💩💩💩💩💩💩💩💩PARAM(param)); +Stream &Stream::ThenBlasRotm(uint64 elem_count, DeviceMemory *x, + int incx, DeviceMemory *y, int incy, + const DeviceMemory ¶m) { + VLOG_CALL(PARAM(elem_count), PARAM(x), PARAM(incx), PARAM(y), PARAM(incy), + PARAM(param)); -💩💩ThenBlasImpl💩*,💩int,💩DeviceMemory💩*,💩int, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩const💩DeviceMemory💩&>💩impl; -💩💩return💩impl(this,💩&blas::BlasSupport::DoBlasRotm,💩elem_count,💩x,💩incx,💩y, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩incy,💩param); + ThenBlasImpl *, int, DeviceMemory *, int, + const DeviceMemory &> impl; + return impl(this, &blas::BlasSupport::DoBlasRotm, elem_count, x, incx, y, + incy, param); } -Stream💩&Stream::ThenBlasRotm(uint64💩elem_count,💩DeviceMemory💩*x, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩int💩incx,💩DeviceMemory💩*y,💩int💩incy, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩const💩DeviceMemory💩¶m)💩{ -💩💩VLOG_CALL(PARAM(elem_count),💩PARAM(x),💩PARAM(incx),💩PARAM(y),💩PARAM(incy), -💩💩💩💩💩💩💩💩💩💩💩💩PARAM(param)); +Stream &Stream::ThenBlasRotm(uint64 elem_count, DeviceMemory *x, + int incx, DeviceMemory *y, int incy, + const DeviceMemory ¶m) { + VLOG_CALL(PARAM(elem_count), PARAM(x), PARAM(incx), PARAM(y), PARAM(incy), + PARAM(param)); -💩💩ThenBlasImpl💩*,💩int,💩DeviceMemory💩*,💩int, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩const💩DeviceMemory💩&>💩impl; -💩💩return💩impl(this,💩&blas::BlasSupport::DoBlasRotm,💩elem_count,💩x,💩incx,💩y, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩incy,💩param); + ThenBlasImpl *, int, DeviceMemory *, int, + const DeviceMemory &> impl; + return impl(this, &blas::BlasSupport::DoBlasRotm, elem_count, x, incx, y, + incy, param); } -Stream💩&Stream::ThenBlasRotmg(DeviceMemory💩*d1,💩DeviceMemory💩*d2, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩DeviceMemory💩*x1, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩const💩DeviceMemory💩&y1, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩DeviceMemory💩*param)💩{ -💩💩VLOG_CALL(PARAM(d1),💩PARAM(d2),💩PARAM(x1),💩PARAM(y1),💩PARAM(param)); +Stream &Stream::ThenBlasRotmg(DeviceMemory *d1, DeviceMemory *d2, + DeviceMemory *x1, + const DeviceMemory &y1, + DeviceMemory *param) { + VLOG_CALL(PARAM(d1), PARAM(d2), PARAM(x1), PARAM(y1), PARAM(param)); -💩💩ThenBlasImpl💩*,💩DeviceMemory💩*, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩DeviceMemory💩*,💩const💩DeviceMemory💩&, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩DeviceMemory💩*>💩impl; -💩💩return💩impl(this,💩&blas::BlasSupport::DoBlasRotmg,💩d1,💩d2,💩x1,💩y1,💩param); + ThenBlasImpl *, DeviceMemory *, + DeviceMemory *, const DeviceMemory &, + DeviceMemory *> impl; + return impl(this, &blas::BlasSupport::DoBlasRotmg, d1, d2, x1, y1, param); } -Stream💩&Stream::ThenBlasRotmg(DeviceMemory💩*d1, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩DeviceMemory💩*d2, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩DeviceMemory💩*x1, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩const💩DeviceMemory💩&y1, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩DeviceMemory💩*param)💩{ -💩💩VLOG_CALL(PARAM(d1),💩PARAM(d2),💩PARAM(x1),💩PARAM(y1),💩PARAM(param)); +Stream &Stream::ThenBlasRotmg(DeviceMemory *d1, + DeviceMemory *d2, + DeviceMemory *x1, + const DeviceMemory &y1, + DeviceMemory *param) { + VLOG_CALL(PARAM(d1), PARAM(d2), PARAM(x1), PARAM(y1), PARAM(param)); -💩💩ThenBlasImpl💩*,💩DeviceMemory💩*, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩DeviceMemory💩*,💩const💩DeviceMemory💩&, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩DeviceMemory💩*>💩impl; -💩💩return💩impl(this,💩&blas::BlasSupport::DoBlasRotmg,💩d1,💩d2,💩x1,💩y1,💩param); + ThenBlasImpl *, DeviceMemory *, + DeviceMemory *, const DeviceMemory &, + DeviceMemory *> impl; + return impl(this, &blas::BlasSupport::DoBlasRotmg, d1, d2, x1, y1, param); } -Stream💩&Stream::ThenBlasScal(uint64💩elem_count,💩float💩alpha, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩DeviceMemory💩*x,💩int💩incx)💩{ -💩💩VLOG_CALL(PARAM(elem_count),💩PARAM(alpha),💩PARAM(x),💩PARAM(incx)); +Stream &Stream::ThenBlasScal(uint64 elem_count, float alpha, + DeviceMemory *x, int incx) { + VLOG_CALL(PARAM(elem_count), PARAM(alpha), PARAM(x), PARAM(incx)); -💩💩ThenBlasImpl💩*,💩int>💩impl; -💩💩return💩impl(this,💩&blas::BlasSupport::DoBlasScal,💩elem_count,💩alpha,💩x,💩incx); + ThenBlasImpl *, int> impl; + return impl(this, &blas::BlasSupport::DoBlasScal, elem_count, alpha, x, incx); } -Stream💩&Stream::ThenBlasScal(uint64💩elem_count,💩double💩alpha, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩DeviceMemory💩*x,💩int💩incx)💩{ -💩💩VLOG_CALL(PARAM(elem_count),💩PARAM(alpha),💩PARAM(x),💩PARAM(incx)); +Stream &Stream::ThenBlasScal(uint64 elem_count, double alpha, + DeviceMemory *x, int incx) { + VLOG_CALL(PARAM(elem_count), PARAM(alpha), PARAM(x), PARAM(incx)); -💩💩ThenBlasImpl💩*,💩int>💩impl; -💩💩return💩impl(this,💩&blas::BlasSupport::DoBlasScal,💩elem_count,💩alpha,💩x,💩incx); + ThenBlasImpl *, int> impl; + return impl(this, &blas::BlasSupport::DoBlasScal, elem_count, alpha, x, incx); } -Stream💩&Stream::ThenBlasScal(uint64💩elem_count,💩float💩alpha, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩DeviceMemory>💩*x,💩int💩incx)💩{ -💩💩VLOG_CALL(PARAM(elem_count),💩PARAM(alpha),💩PARAM(x),💩PARAM(incx)); +Stream &Stream::ThenBlasScal(uint64 elem_count, float alpha, + DeviceMemory> *x, int incx) { + VLOG_CALL(PARAM(elem_count), PARAM(alpha), PARAM(x), PARAM(incx)); -💩💩ThenBlasImpl>💩*,💩int>💩impl; -💩💩return💩impl(this,💩&blas::BlasSupport::DoBlasScal,💩elem_count,💩alpha,💩x,💩incx); + ThenBlasImpl> *, int> impl; + return impl(this, &blas::BlasSupport::DoBlasScal, elem_count, alpha, x, incx); } -Stream💩&Stream::ThenBlasScal(uint64💩elem_count,💩double💩alpha, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩DeviceMemory>💩*x,💩int💩incx)💩{ -💩💩VLOG_CALL(PARAM(elem_count),💩PARAM(alpha),💩PARAM(x),💩PARAM(incx)); +Stream &Stream::ThenBlasScal(uint64 elem_count, double alpha, + DeviceMemory> *x, int incx) { + VLOG_CALL(PARAM(elem_count), PARAM(alpha), PARAM(x), PARAM(incx)); -💩💩ThenBlasImpl>💩*,💩int>💩impl; -💩💩return💩impl(this,💩&blas::BlasSupport::DoBlasScal,💩elem_count,💩alpha,💩x,💩incx); + ThenBlasImpl> *, int> impl; + return impl(this, &blas::BlasSupport::DoBlasScal, elem_count, alpha, x, incx); } -Stream💩&Stream::ThenBlasScal(uint64💩elem_count,💩std::complex💩alpha, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩DeviceMemory>💩*x,💩int💩incx)💩{ -💩💩VLOG_CALL(PARAM(elem_count),💩PARAM(alpha),💩PARAM(x),💩PARAM(incx)); +Stream &Stream::ThenBlasScal(uint64 elem_count, std::complex alpha, + DeviceMemory> *x, int incx) { + VLOG_CALL(PARAM(elem_count), PARAM(alpha), PARAM(x), PARAM(incx)); -💩💩ThenBlasImpl,💩DeviceMemory>💩*, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩int>💩impl; -💩💩return💩impl(this,💩&blas::BlasSupport::DoBlasScal,💩elem_count,💩alpha,💩x,💩incx); + ThenBlasImpl, DeviceMemory> *, + int> impl; + return impl(this, &blas::BlasSupport::DoBlasScal, elem_count, alpha, x, incx); } -Stream💩&Stream::ThenBlasScal(uint64💩elem_count,💩std::complex💩alpha, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩DeviceMemory>💩*x,💩int💩incx)💩{ -💩💩VLOG_CALL(PARAM(elem_count),💩PARAM(alpha),💩PARAM(x),💩PARAM(incx)); +Stream &Stream::ThenBlasScal(uint64 elem_count, std::complex alpha, + DeviceMemory> *x, int incx) { + VLOG_CALL(PARAM(elem_count), PARAM(alpha), PARAM(x), PARAM(incx)); -💩💩ThenBlasImpl, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩DeviceMemory>💩*,💩int>💩impl; -💩💩return💩impl(this,💩&blas::BlasSupport::DoBlasScal,💩elem_count,💩alpha,💩x,💩incx); + ThenBlasImpl, + DeviceMemory> *, int> impl; + return impl(this, &blas::BlasSupport::DoBlasScal, elem_count, alpha, x, incx); } -Stream💩&Stream::ThenBlasSwap(uint64💩elem_count,💩DeviceMemory💩*x, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩int💩incx,💩DeviceMemory💩*y,💩int💩incy)💩{ -💩💩VLOG_CALL(PARAM(elem_count),💩PARAM(x),💩PARAM(incx),💩PARAM(y),💩PARAM(incy)); +Stream &Stream::ThenBlasSwap(uint64 elem_count, DeviceMemory *x, + int incx, DeviceMemory *y, int incy) { + VLOG_CALL(PARAM(elem_count), PARAM(x), PARAM(incx), PARAM(y), PARAM(incy)); -💩💩ThenBlasImpl💩*,💩int,💩DeviceMemory💩*,💩int> -💩💩💩💩💩💩impl; -💩💩return💩impl(this,💩&blas::BlasSupport::DoBlasSwap,💩elem_count,💩x,💩incx,💩y, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩incy); + ThenBlasImpl *, int, DeviceMemory *, int> + impl; + return impl(this, &blas::BlasSupport::DoBlasSwap, elem_count, x, incx, y, + incy); } -Stream💩&Stream::ThenBlasSwap(uint64💩elem_count,💩DeviceMemory💩*x, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩int💩incx,💩DeviceMemory💩*y,💩int💩incy)💩{ -💩💩VLOG_CALL(PARAM(elem_count),💩PARAM(x),💩PARAM(incx),💩PARAM(y),💩PARAM(incy)); +Stream &Stream::ThenBlasSwap(uint64 elem_count, DeviceMemory *x, + int incx, DeviceMemory *y, int incy) { + VLOG_CALL(PARAM(elem_count), PARAM(x), PARAM(incx), PARAM(y), PARAM(incy)); -💩💩ThenBlasImpl💩*,💩int,💩DeviceMemory💩*,💩int> -💩💩💩💩💩💩impl; -💩💩return💩impl(this,💩&blas::BlasSupport::DoBlasSwap,💩elem_count,💩x,💩incx,💩y, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩incy); + ThenBlasImpl *, int, DeviceMemory *, int> + impl; + return impl(this, &blas::BlasSupport::DoBlasSwap, elem_count, x, incx, y, + incy); } -Stream💩&Stream::ThenBlasSwap(uint64💩elem_count, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩DeviceMemory>💩*x,💩int💩incx, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩DeviceMemory>💩*y,💩int💩incy)💩{ -💩💩VLOG_CALL(PARAM(elem_count),💩PARAM(x),💩PARAM(incx),💩PARAM(y),💩PARAM(incy)); +Stream &Stream::ThenBlasSwap(uint64 elem_count, + DeviceMemory> *x, int incx, + DeviceMemory> *y, int incy) { + VLOG_CALL(PARAM(elem_count), PARAM(x), PARAM(incx), PARAM(y), PARAM(incy)); -💩💩ThenBlasImpl>💩*,💩int, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩DeviceMemory>💩*,💩int>💩impl; -💩💩return💩impl(this,💩&blas::BlasSupport::DoBlasSwap,💩elem_count,💩x,💩incx,💩y, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩incy); + ThenBlasImpl> *, int, + DeviceMemory> *, int> impl; + return impl(this, &blas::BlasSupport::DoBlasSwap, elem_count, x, incx, y, + incy); } -Stream💩&Stream::ThenBlasSwap(uint64💩elem_count, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩DeviceMemory>💩*x,💩int💩incx, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩DeviceMemory>💩*y,💩int💩incy)💩{ -💩💩VLOG_CALL(PARAM(elem_count),💩PARAM(x),💩PARAM(incx),💩PARAM(y),💩PARAM(incy)); +Stream &Stream::ThenBlasSwap(uint64 elem_count, + DeviceMemory> *x, int incx, + DeviceMemory> *y, int incy) { + VLOG_CALL(PARAM(elem_count), PARAM(x), PARAM(incx), PARAM(y), PARAM(incy)); -💩💩ThenBlasImpl>💩*,💩int, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩DeviceMemory>💩*,💩int>💩impl; -💩💩return💩impl(this,💩&blas::BlasSupport::DoBlasSwap,💩elem_count,💩x,💩incx,💩y, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩incy); + ThenBlasImpl> *, int, + DeviceMemory> *, int> impl; + return impl(this, &blas::BlasSupport::DoBlasSwap, elem_count, x, incx, y, + incy); } -Stream💩&Stream::ThenBlasIamax(uint64💩elem_count,💩const💩DeviceMemory💩&x, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩int💩incx,💩DeviceMemory💩*result)💩{ -💩💩VLOG_CALL(PARAM(elem_count),💩PARAM(x),💩PARAM(incx),💩PARAM(result)); +Stream &Stream::ThenBlasIamax(uint64 elem_count, const DeviceMemory &x, + int incx, DeviceMemory *result) { + VLOG_CALL(PARAM(elem_count), PARAM(x), PARAM(incx), PARAM(result)); -💩💩ThenBlasImpl💩&,💩int,💩DeviceMemory💩*> -💩💩💩💩💩💩impl; -💩💩return💩impl(this,💩&blas::BlasSupport::DoBlasIamax,💩elem_count,💩x,💩incx, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩result); + ThenBlasImpl &, int, DeviceMemory *> + impl; + return impl(this, &blas::BlasSupport::DoBlasIamax, elem_count, x, incx, + result); } -Stream💩&Stream::ThenBlasIamax(uint64💩elem_count,💩const💩DeviceMemory💩&x, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩int💩incx,💩DeviceMemory💩*result)💩{ -💩💩VLOG_CALL(PARAM(elem_count),💩PARAM(x),💩PARAM(incx),💩PARAM(result)); +Stream &Stream::ThenBlasIamax(uint64 elem_count, const DeviceMemory &x, + int incx, DeviceMemory *result) { + VLOG_CALL(PARAM(elem_count), PARAM(x), PARAM(incx), PARAM(result)); -💩💩ThenBlasImpl💩&,💩int,💩DeviceMemory💩*> -💩💩💩💩💩💩impl; -💩💩return💩impl(this,💩&blas::BlasSupport::DoBlasIamax,💩elem_count,💩x,💩incx, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩result); + ThenBlasImpl &, int, DeviceMemory *> + impl; + return impl(this, &blas::BlasSupport::DoBlasIamax, elem_count, x, incx, + result); } -Stream💩&Stream::ThenBlasIamax(uint64💩elem_count, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩const💩DeviceMemory>💩&x, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩int💩incx,💩DeviceMemory💩*result)💩{ -💩💩VLOG_CALL(PARAM(elem_count),💩PARAM(x),💩PARAM(incx),💩PARAM(result)); +Stream &Stream::ThenBlasIamax(uint64 elem_count, + const DeviceMemory> &x, + int incx, DeviceMemory *result) { + VLOG_CALL(PARAM(elem_count), PARAM(x), PARAM(incx), PARAM(result)); -💩💩ThenBlasImpl>💩&,💩int, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩DeviceMemory💩*>💩impl; -💩💩return💩impl(this,💩&blas::BlasSupport::DoBlasIamax,💩elem_count,💩x,💩incx, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩result); + ThenBlasImpl> &, int, + DeviceMemory *> impl; + return impl(this, &blas::BlasSupport::DoBlasIamax, elem_count, x, incx, + result); } -Stream💩&Stream::ThenBlasIamax(uint64💩elem_count, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩const💩DeviceMemory>💩&x, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩int💩incx,💩DeviceMemory💩*result)💩{ -💩💩VLOG_CALL(PARAM(elem_count),💩PARAM(x),💩PARAM(incx),💩PARAM(result)); +Stream &Stream::ThenBlasIamax(uint64 elem_count, + const DeviceMemory> &x, + int incx, DeviceMemory *result) { + VLOG_CALL(PARAM(elem_count), PARAM(x), PARAM(incx), PARAM(result)); -💩💩ThenBlasImpl>💩&,💩int, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩DeviceMemory💩*>💩impl; -💩💩return💩impl(this,💩&blas::BlasSupport::DoBlasIamax,💩elem_count,💩x,💩incx, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩result); + ThenBlasImpl> &, int, + DeviceMemory *> impl; + return impl(this, &blas::BlasSupport::DoBlasIamax, elem_count, x, incx, + result); } -Stream💩&Stream::ThenBlasIamin(uint64💩elem_count,💩const💩DeviceMemory💩&x, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩int💩incx,💩DeviceMemory💩*result)💩{ -💩💩VLOG_CALL(PARAM(elem_count),💩PARAM(x),💩PARAM(incx),💩PARAM(result)); +Stream &Stream::ThenBlasIamin(uint64 elem_count, const DeviceMemory &x, + int incx, DeviceMemory *result) { + VLOG_CALL(PARAM(elem_count), PARAM(x), PARAM(incx), PARAM(result)); -💩💩ThenBlasImpl💩&,💩int,💩DeviceMemory💩*> -💩💩💩💩💩💩impl; -💩💩return💩impl(this,💩&blas::BlasSupport::DoBlasIamin,💩elem_count,💩x,💩incx, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩result); -} - -Stream💩&Stream::ThenBlasIamin(uint64💩elem_count,💩const💩DeviceMemory💩&x, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩int💩incx,💩DeviceMemory💩*result)💩{ -💩💩VLOG_CALL(PARAM(elem_count),💩PARAM(x),💩PARAM(incx),💩PARAM(result)); + ThenBlasImpl &, int, DeviceMemory *> + impl; + return impl(this, &blas::BlasSupport::DoBlasIamin, elem_count, x, incx, + result); +} + +Stream &Stream::ThenBlasIamin(uint64 elem_count, const DeviceMemory &x, + int incx, DeviceMemory *result) { + VLOG_CALL(PARAM(elem_count), PARAM(x), PARAM(incx), PARAM(result)); -💩💩ThenBlasImpl💩&,💩int,💩DeviceMemory💩*> -💩💩💩💩💩💩impl; -💩💩return💩impl(this,💩&blas::BlasSupport::DoBlasIamin,💩elem_count,💩x,💩incx, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩result); -} - -Stream💩&Stream::ThenBlasIamin(uint64💩elem_count, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩const💩DeviceMemory>💩&x, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩int💩incx,💩DeviceMemory💩*result)💩{ -💩💩VLOG_CALL(PARAM(elem_count),💩PARAM(x),💩PARAM(incx),💩PARAM(result)); - -💩💩ThenBlasImpl>💩&,💩int, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩DeviceMemory💩*>💩impl; -💩💩return💩impl(this,💩&blas::BlasSupport::DoBlasIamin,💩elem_count,💩x,💩incx, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩result); -} - -Stream💩&Stream::ThenBlasIamin(uint64💩elem_count, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩const💩DeviceMemory>💩&x, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩int💩incx,💩DeviceMemory💩*result)💩{ -💩💩VLOG_CALL(PARAM(elem_count),💩PARAM(x),💩PARAM(incx),💩PARAM(result)); - -💩💩ThenBlasImpl>💩&,💩int, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩DeviceMemory💩*>💩impl; -💩💩return💩impl(this,💩&blas::BlasSupport::DoBlasIamin,💩elem_count,💩x,💩incx, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩result); -} - -Stream💩&Stream::ThenBlasGbmv(blas::Transpose💩trans,💩uint64💩m,💩uint64💩n, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩uint64💩kl,💩uint64💩ku,💩float💩alpha, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩const💩DeviceMemory💩&a,💩int💩lda, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩const💩DeviceMemory💩&x,💩int💩incx,💩float💩beta, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩DeviceMemory💩*y,💩int💩incy)💩{ -💩💩VLOG_CALL(PARAM(trans),💩PARAM(m),💩PARAM(n),💩PARAM(kl),💩PARAM(ku), -💩💩💩💩💩💩💩💩💩💩💩💩PARAM(alpha),💩PARAM(a),💩PARAM(lda),💩PARAM(x),💩PARAM(incx), -💩💩💩💩💩💩💩💩💩💩💩💩PARAM(beta),💩PARAM(y),💩PARAM(incy)); - -💩💩ThenBlasImpl💩&,💩int,💩const💩DeviceMemory💩&, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩int,💩float,💩DeviceMemory💩*,💩int>💩impl; -💩💩return💩impl(this,💩&blas::BlasSupport::DoBlasGbmv,💩trans,💩m,💩n,💩kl,💩ku,💩alpha, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩a,💩lda,💩x,💩incx,💩beta,💩y,💩incy); -} - -Stream💩&Stream::ThenBlasGbmv(blas::Transpose💩trans,💩uint64💩m,💩uint64💩n, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩uint64💩kl,💩uint64💩ku,💩double💩alpha, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩const💩DeviceMemory💩&a,💩int💩lda, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩const💩DeviceMemory💩&x,💩int💩incx, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩double💩beta,💩DeviceMemory💩*y,💩int💩incy)💩{ -💩💩VLOG_CALL(PARAM(trans),💩PARAM(m),💩PARAM(n),💩PARAM(kl),💩PARAM(ku), -💩💩💩💩💩💩💩💩💩💩💩💩PARAM(alpha),💩PARAM(a),💩PARAM(lda),💩PARAM(x),💩PARAM(incx), -💩💩💩💩💩💩💩💩💩💩💩💩PARAM(beta),💩PARAM(y),💩PARAM(incy)); - -💩💩ThenBlasImpl💩&,💩int,💩const💩DeviceMemory💩&, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩int,💩double,💩DeviceMemory💩*,💩int>💩impl; -💩💩return💩impl(this,💩&blas::BlasSupport::DoBlasGbmv,💩trans,💩m,💩n,💩kl,💩ku,💩alpha, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩a,💩lda,💩x,💩incx,💩beta,💩y,💩incy); -} - -Stream💩&Stream::ThenBlasGbmv(blas::Transpose💩trans,💩uint64💩m,💩uint64💩n, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩uint64💩kl,💩uint64💩ku,💩std::complex💩alpha, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩const💩DeviceMemory>💩&a, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩int💩lda, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩const💩DeviceMemory>💩&x, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩int💩incx,💩std::complex💩beta, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩DeviceMemory>💩*y,💩int💩incy)💩{ -💩💩VLOG_CALL(PARAM(trans),💩PARAM(m),💩PARAM(n),💩PARAM(kl),💩PARAM(ku), -💩💩💩💩💩💩💩💩💩💩💩💩PARAM(alpha),💩PARAM(a),💩PARAM(lda),💩PARAM(x),💩PARAM(incx), -💩💩💩💩💩💩💩💩💩💩💩💩PARAM(beta),💩PARAM(y),💩PARAM(incy)); - -💩💩ThenBlasImpl,💩const💩DeviceMemory>💩&, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩int,💩const💩DeviceMemory>💩&,💩int, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩std::complex,💩DeviceMemory>💩*, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩int>💩impl; -💩💩return💩impl(this,💩&blas::BlasSupport::DoBlasGbmv,💩trans,💩m,💩n,💩kl,💩ku,💩alpha, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩a,💩lda,💩x,💩incx,💩beta,💩y,💩incy); -} - -Stream💩&Stream::ThenBlasGbmv(blas::Transpose💩trans,💩uint64💩m,💩uint64💩n, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩uint64💩kl,💩uint64💩ku,💩std::complex💩alpha, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩const💩DeviceMemory>💩&a, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩int💩lda, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩const💩DeviceMemory>💩&x, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩int💩incx,💩std::complex💩beta, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩DeviceMemory>💩*y,💩int💩incy)💩{ -💩💩VLOG_CALL(PARAM(trans),💩PARAM(m),💩PARAM(n),💩PARAM(kl),💩PARAM(ku), -💩💩💩💩💩💩💩💩💩💩💩💩PARAM(alpha),💩PARAM(a),💩PARAM(lda),💩PARAM(x),💩PARAM(incx), -💩💩💩💩💩💩💩💩💩💩💩💩PARAM(beta),💩PARAM(y),💩PARAM(incy)); - -💩💩ThenBlasImpl,💩const💩DeviceMemory>💩&, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩int,💩const💩DeviceMemory>💩&,💩int, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩std::complex,💩DeviceMemory>💩*, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩int>💩impl; -💩💩return💩impl(this,💩&blas::BlasSupport::DoBlasGbmv,💩trans,💩m,💩n,💩kl,💩ku,💩alpha, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩a,💩lda,💩x,💩incx,💩beta,💩y,💩incy); -} - -Stream💩&Stream::ThenBlasGemv(blas::Transpose💩trans,💩uint64💩m,💩uint64💩n, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩float💩alpha,💩const💩DeviceMemory💩&a,💩int💩lda, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩const💩DeviceMemory💩&x,💩int💩incx,💩float💩beta, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩DeviceMemory💩*y,💩int💩incy)💩{ -💩💩VLOG_CALL(PARAM(trans),💩PARAM(m),💩PARAM(n),💩PARAM(alpha),💩PARAM(a), -💩💩💩💩💩💩💩💩💩💩💩💩PARAM(lda),💩PARAM(x),💩PARAM(incx),💩PARAM(beta),💩PARAM(y), -💩💩💩💩💩💩💩💩💩💩💩💩PARAM(incy)); - -💩💩ThenBlasImpl💩&,💩int,💩const💩DeviceMemory💩&, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩int,💩float,💩DeviceMemory💩*,💩int>💩impl; -💩💩return💩impl(this,💩&blas::BlasSupport::DoBlasGemv,💩trans,💩m,💩n,💩alpha,💩a,💩lda, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩x,💩incx,💩beta,💩y,💩incy); -} - -Stream💩&Stream::ThenBlasGemv(blas::Transpose💩trans,💩uint64💩m,💩uint64💩n, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩double💩alpha,💩const💩DeviceMemory💩&a, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩int💩lda,💩const💩DeviceMemory💩&x,💩int💩incx, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩double💩beta,💩DeviceMemory💩*y,💩int💩incy)💩{ -💩💩VLOG_CALL(PARAM(trans),💩PARAM(m),💩PARAM(n),💩PARAM(alpha),💩PARAM(a), -💩💩💩💩💩💩💩💩💩💩💩💩PARAM(lda),💩PARAM(x),💩PARAM(incx),💩PARAM(beta),💩PARAM(y), -💩💩💩💩💩💩💩💩💩💩💩💩PARAM(incy)); - -💩💩ThenBlasImpl💩&,💩int,💩const💩DeviceMemory💩&, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩int,💩double,💩DeviceMemory💩*,💩int>💩impl; -💩💩return💩impl(this,💩&blas::BlasSupport::DoBlasGemv,💩trans,💩m,💩n,💩alpha,💩a,💩lda, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩x,💩incx,💩beta,💩y,💩incy); -} - -Stream💩&Stream::ThenBlasGemv(blas::Transpose💩trans,💩uint64💩m,💩uint64💩n, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩std::complex💩alpha, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩const💩DeviceMemory>💩&a, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩int💩lda, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩const💩DeviceMemory>💩&x, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩int💩incx,💩std::complex💩beta, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩DeviceMemory>💩*y,💩int💩incy)💩{ -💩💩VLOG_CALL(PARAM(trans),💩PARAM(m),💩PARAM(n),💩PARAM(alpha),💩PARAM(a), -💩💩💩💩💩💩💩💩💩💩💩💩PARAM(lda),💩PARAM(x),💩PARAM(incx),💩PARAM(beta),💩PARAM(y), -💩💩💩💩💩💩💩💩💩💩💩💩PARAM(incy)); - -💩💩ThenBlasImpl, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩const💩DeviceMemory>💩&,💩int, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩const💩DeviceMemory>💩&,💩int, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩std::complex,💩DeviceMemory>💩*, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩int>💩impl; -💩💩return💩impl(this,💩&blas::BlasSupport::DoBlasGemv,💩trans,💩m,💩n,💩alpha,💩a,💩lda, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩x,💩incx,💩beta,💩y,💩incy); -} - -Stream💩&Stream::ThenBlasGemv(blas::Transpose💩trans,💩uint64💩m,💩uint64💩n, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩std::complex💩alpha, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩const💩DeviceMemory>💩&a, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩int💩lda, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩const💩DeviceMemory>💩&x, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩int💩incx,💩std::complex💩beta, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩DeviceMemory>💩*y,💩int💩incy)💩{ -💩💩VLOG_CALL(PARAM(trans),💩PARAM(m),💩PARAM(n),💩PARAM(alpha),💩PARAM(a), -💩💩💩💩💩💩💩💩💩💩💩💩PARAM(lda),💩PARAM(x),💩PARAM(incx),💩PARAM(beta),💩PARAM(y), -💩💩💩💩💩💩💩💩💩💩💩💩PARAM(incy)); - -💩💩ThenBlasImpl, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩const💩DeviceMemory>💩&,💩int, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩const💩DeviceMemory>💩&,💩int, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩std::complex,💩DeviceMemory>💩*, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩int>💩impl; -💩💩return💩impl(this,💩&blas::BlasSupport::DoBlasGemv,💩trans,💩m,💩n,💩alpha,💩a,💩lda, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩x,💩incx,💩beta,💩y,💩incy); -} - -Stream💩&Stream::ThenBlasGer(uint64💩m,💩uint64💩n,💩float💩alpha, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩const💩DeviceMemory💩&x,💩int💩incx, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩const💩DeviceMemory💩&y,💩int💩incy, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩DeviceMemory💩*a,💩int💩lda)💩{ -💩💩VLOG_CALL(PARAM(m),💩PARAM(n),💩PARAM(alpha),💩PARAM(x),💩PARAM(incx),💩PARAM(y), -💩💩💩💩💩💩💩💩💩💩💩💩PARAM(incy),💩PARAM(a),💩PARAM(lda)); - -💩💩ThenBlasImpl💩&,💩int, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩const💩DeviceMemory💩&,💩int,💩DeviceMemory💩*, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩int>💩impl; -💩💩return💩impl(this,💩&blas::BlasSupport::DoBlasGer,💩m,💩n,💩alpha,💩x,💩incx,💩y, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩incy,💩a,💩lda); -} - -Stream💩&Stream::ThenBlasGer(uint64💩m,💩uint64💩n,💩double💩alpha, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩const💩DeviceMemory💩&x,💩int💩incx, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩const💩DeviceMemory💩&y,💩int💩incy, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩DeviceMemory💩*a,💩int💩lda)💩{ -💩💩VLOG_CALL(PARAM(m),💩PARAM(n),💩PARAM(alpha),💩PARAM(x),💩PARAM(incx),💩PARAM(y), -💩💩💩💩💩💩💩💩💩💩💩💩PARAM(incy),💩PARAM(a),💩PARAM(lda)); - -💩💩ThenBlasImpl💩&,💩int, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩const💩DeviceMemory💩&,💩int,💩DeviceMemory💩*, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩int>💩impl; -💩💩return💩impl(this,💩&blas::BlasSupport::DoBlasGer,💩m,💩n,💩alpha,💩x,💩incx,💩y, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩incy,💩a,💩lda); -} - -Stream💩&Stream::ThenBlasGerc(uint64💩m,💩uint64💩n,💩std::complex💩alpha, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩const💩DeviceMemory>💩&x, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩int💩incx, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩const💩DeviceMemory>💩&y, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩int💩incy,💩DeviceMemory>💩*a, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩int💩lda)💩{ -💩💩VLOG_CALL(PARAM(m),💩PARAM(n),💩PARAM(alpha),💩PARAM(x),💩PARAM(incx),💩PARAM(y), -💩💩💩💩💩💩💩💩💩💩💩💩PARAM(incy),💩PARAM(a),💩PARAM(lda)); - -💩💩ThenBlasImpl, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩const💩DeviceMemory>💩&,💩int, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩const💩DeviceMemory>💩&,💩int, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩DeviceMemory>💩*,💩int>💩impl; -💩💩return💩impl(this,💩&blas::BlasSupport::DoBlasGerc,💩m,💩n,💩alpha,💩x,💩incx,💩y, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩incy,💩a,💩lda); -} - -Stream💩&Stream::ThenBlasGerc(uint64💩m,💩uint64💩n,💩std::complex💩alpha, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩const💩DeviceMemory>💩&x, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩int💩incx, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩const💩DeviceMemory>💩&y, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩int💩incy,💩DeviceMemory>💩*a, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩int💩lda)💩{ -💩💩VLOG_CALL(PARAM(m),💩PARAM(n),💩PARAM(alpha),💩PARAM(x),💩PARAM(incx),💩PARAM(y), -💩💩💩💩💩💩💩💩💩💩💩💩PARAM(incy),💩PARAM(a),💩PARAM(lda)); - -💩💩ThenBlasImpl, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩const💩DeviceMemory>💩&,💩int, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩const💩DeviceMemory>💩&,💩int, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩DeviceMemory>💩*,💩int>💩impl; -💩💩return💩impl(this,💩&blas::BlasSupport::DoBlasGerc,💩m,💩n,💩alpha,💩x,💩incx,💩y, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩incy,💩a,💩lda); -} - -Stream💩&Stream::ThenBlasGeru(uint64💩m,💩uint64💩n,💩std::complex💩alpha, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩const💩DeviceMemory>💩&x, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩int💩incx, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩const💩DeviceMemory>💩&y, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩int💩incy,💩DeviceMemory>💩*a, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩int💩lda)💩{ -💩💩VLOG_CALL(PARAM(m),💩PARAM(n),💩PARAM(alpha),💩PARAM(x),💩PARAM(incx),💩PARAM(y), -💩💩💩💩💩💩💩💩💩💩💩💩PARAM(incy),💩PARAM(a),💩PARAM(lda)); - -💩💩ThenBlasImpl, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩const💩DeviceMemory>💩&,💩int, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩const💩DeviceMemory>💩&,💩int, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩DeviceMemory>💩*,💩int>💩impl; -💩💩return💩impl(this,💩&blas::BlasSupport::DoBlasGeru,💩m,💩n,💩alpha,💩x,💩incx,💩y, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩incy,💩a,💩lda); -} - -Stream💩&Stream::ThenBlasGeru(uint64💩m,💩uint64💩n,💩std::complex💩alpha, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩const💩DeviceMemory>💩&x, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩int💩incx, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩const💩DeviceMemory>💩&y, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩int💩incy,💩DeviceMemory>💩*a, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩int💩lda)💩{ -💩💩VLOG_CALL(PARAM(m),💩PARAM(n),💩PARAM(alpha),💩PARAM(x),💩PARAM(incx),💩PARAM(y), -💩💩💩💩💩💩💩💩💩💩💩💩PARAM(incy),💩PARAM(a),💩PARAM(lda)); - -💩💩ThenBlasImpl, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩const💩DeviceMemory>💩&,💩int, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩const💩DeviceMemory>💩&,💩int, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩DeviceMemory>💩*,💩int>💩impl; -💩💩return💩impl(this,💩&blas::BlasSupport::DoBlasGeru,💩m,💩n,💩alpha,💩x,💩incx,💩y, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩incy,💩a,💩lda); -} - -Stream💩&Stream::ThenBlasHbmv(blas::UpperLower💩uplo,💩uint64💩n,💩uint64💩k, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩std::complex💩alpha, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩const💩DeviceMemory>💩&a, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩int💩lda, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩const💩DeviceMemory>💩&x, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩int💩incx,💩std::complex💩beta, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩DeviceMemory>💩*y,💩int💩incy)💩{ -💩💩VLOG_CALL(PARAM(uplo),💩PARAM(n),💩PARAM(k),💩PARAM(alpha),💩PARAM(a),💩PARAM(lda), -💩💩💩💩💩💩💩💩💩💩💩💩PARAM(x),💩PARAM(incx),💩PARAM(beta),💩PARAM(y),💩PARAM(incy)); - -💩💩ThenBlasImpl, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩const💩DeviceMemory>💩&,💩int, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩const💩DeviceMemory>💩&,💩int, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩std::complex,💩DeviceMemory>💩*, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩int>💩impl; -💩💩return💩impl(this,💩&blas::BlasSupport::DoBlasHbmv,💩uplo,💩n,💩k,💩alpha,💩a,💩lda, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩x,💩incx,💩beta,💩y,💩incy); -} - -Stream💩&Stream::ThenBlasHbmv(blas::UpperLower💩uplo,💩uint64💩n,💩uint64💩k, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩std::complex💩alpha, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩const💩DeviceMemory>💩&a, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩int💩lda, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩const💩DeviceMemory>💩&x, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩int💩incx,💩std::complex💩beta, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩DeviceMemory>💩*y,💩int💩incy)💩{ -💩💩VLOG_CALL(PARAM(uplo),💩PARAM(n),💩PARAM(k),💩PARAM(alpha),💩PARAM(a),💩PARAM(lda), -💩💩💩💩💩💩💩💩💩💩💩💩PARAM(x),💩PARAM(incx),💩PARAM(beta),💩PARAM(y),💩PARAM(incy)); - -💩💩ThenBlasImpl, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩const💩DeviceMemory>💩&,💩int, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩const💩DeviceMemory>💩&,💩int, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩std::complex,💩DeviceMemory>💩*, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩int>💩impl; -💩💩return💩impl(this,💩&blas::BlasSupport::DoBlasHbmv,💩uplo,💩n,💩k,💩alpha,💩a,💩lda, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩x,💩incx,💩beta,💩y,💩incy); -} - -Stream💩&Stream::ThenBlasHemv(blas::UpperLower💩uplo,💩uint64💩n, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩std::complex💩alpha, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩const💩DeviceMemory>💩&a, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩int💩lda, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩const💩DeviceMemory>💩&x, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩int💩incx,💩std::complex💩beta, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩DeviceMemory>💩*y,💩int💩incy)💩{ -💩💩VLOG_CALL(PARAM(uplo),💩PARAM(n),💩PARAM(alpha),💩PARAM(a),💩PARAM(lda),💩PARAM(x), -💩💩💩💩💩💩💩💩💩💩💩💩PARAM(incx),💩PARAM(beta),💩PARAM(y),💩PARAM(incy)); - -💩💩ThenBlasImpl, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩const💩DeviceMemory>💩&,💩int, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩const💩DeviceMemory>💩&,💩int, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩std::complex,💩DeviceMemory>💩*, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩int>💩impl; -💩💩return💩impl(this,💩&blas::BlasSupport::DoBlasHemv,💩uplo,💩n,💩alpha,💩a,💩lda,💩x, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩incx,💩beta,💩y,💩incy); -} - -Stream💩&Stream::ThenBlasHemv(blas::UpperLower💩uplo,💩uint64💩n, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩std::complex💩alpha, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩const💩DeviceMemory>💩&a, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩int💩lda, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩const💩DeviceMemory>💩&x, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩int💩incx,💩std::complex💩beta, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩DeviceMemory>💩*y,💩int💩incy)💩{ -💩💩VLOG_CALL(PARAM(uplo),💩PARAM(n),💩PARAM(alpha),💩PARAM(a),💩PARAM(lda),💩PARAM(x), -💩💩💩💩💩💩💩💩💩💩💩💩PARAM(incx),💩PARAM(beta),💩PARAM(y),💩PARAM(incy)); - -💩💩ThenBlasImpl, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩const💩DeviceMemory>💩&,💩int, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩const💩DeviceMemory>💩&,💩int, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩std::complex,💩DeviceMemory>💩*, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩int>💩impl; -💩💩return💩impl(this,💩&blas::BlasSupport::DoBlasHemv,💩uplo,💩n,💩alpha,💩a,💩lda,💩x, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩incx,💩beta,💩y,💩incy); -} - -Stream💩&Stream::ThenBlasHer(blas::UpperLower💩uplo,💩uint64💩n,💩float💩alpha, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩const💩DeviceMemory>💩&x, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩int💩incx,💩DeviceMemory>💩*a, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩int💩lda)💩{ -💩💩VLOG_CALL(PARAM(uplo),💩PARAM(n),💩PARAM(alpha),💩PARAM(x),💩PARAM(incx), -💩💩💩💩💩💩💩💩💩💩💩💩PARAM(a),💩PARAM(lda)); - -💩💩ThenBlasImpl>💩&,💩int, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩DeviceMemory>💩*,💩int>💩impl; -💩💩return💩impl(this,💩&blas::BlasSupport::DoBlasHer,💩uplo,💩n,💩alpha,💩x,💩incx,💩a, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩lda); -} - -Stream💩&Stream::ThenBlasHer(blas::UpperLower💩uplo,💩uint64💩n,💩double💩alpha, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩const💩DeviceMemory>💩&x, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩int💩incx,💩DeviceMemory>💩*a, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩int💩lda)💩{ -💩💩VLOG_CALL(PARAM(uplo),💩PARAM(n),💩PARAM(alpha),💩PARAM(x),💩PARAM(incx), -💩💩💩💩💩💩💩💩💩💩💩💩PARAM(a),💩PARAM(lda)); - -💩💩ThenBlasImpl>💩&,💩int, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩DeviceMemory>💩*,💩int>💩impl; -💩💩return💩impl(this,💩&blas::BlasSupport::DoBlasHer,💩uplo,💩n,💩alpha,💩x,💩incx,💩a, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩lda); -} - -Stream💩&Stream::ThenBlasHer2(blas::UpperLower💩uplo,💩uint64💩n, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩std::complex💩alpha, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩const💩DeviceMemory>💩&x, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩int💩incx, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩const💩DeviceMemory>💩&y, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩int💩incy,💩DeviceMemory>💩*a, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩int💩lda)💩{ -💩💩VLOG_CALL(PARAM(uplo),💩PARAM(n),💩PARAM(alpha),💩PARAM(x),💩PARAM(incx), -💩💩💩💩💩💩💩💩💩💩💩💩PARAM(y),💩PARAM(incy),💩PARAM(a),💩PARAM(lda)); - -💩💩ThenBlasImpl, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩const💩DeviceMemory>💩&,💩int, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩const💩DeviceMemory>💩&,💩int, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩DeviceMemory>💩*,💩int>💩impl; -💩💩return💩impl(this,💩&blas::BlasSupport::DoBlasHer2,💩uplo,💩n,💩alpha,💩x,💩incx,💩y, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩incy,💩a,💩lda); -} - -Stream💩&Stream::ThenBlasHer2(blas::UpperLower💩uplo,💩uint64💩n, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩std::complex💩alpha, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩const💩DeviceMemory>💩&x, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩int💩incx, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩const💩DeviceMemory>💩&y, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩int💩incy,💩DeviceMemory>💩*a, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩int💩lda)💩{ -💩💩VLOG_CALL(PARAM(uplo),💩PARAM(n),💩PARAM(alpha),💩PARAM(x),💩PARAM(incx), -💩💩💩💩💩💩💩💩💩💩💩💩PARAM(y),💩PARAM(incy),💩PARAM(a),💩PARAM(lda)); - -💩💩ThenBlasImpl, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩const💩DeviceMemory>💩&,💩int, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩const💩DeviceMemory>💩&,💩int, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩DeviceMemory>💩*,💩int>💩impl; -💩💩return💩impl(this,💩&blas::BlasSupport::DoBlasHer2,💩uplo,💩n,💩alpha,💩x,💩incx,💩y, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩incy,💩a,💩lda); -} - -Stream💩&Stream::ThenBlasHpmv(blas::UpperLower💩uplo,💩uint64💩n, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩std::complex💩alpha, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩const💩DeviceMemory>💩&ap, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩const💩DeviceMemory>💩&x, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩int💩incx,💩std::complex💩beta, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩DeviceMemory>💩*y,💩int💩incy)💩{ -💩💩VLOG_CALL(PARAM(uplo),💩PARAM(n),💩PARAM(alpha),💩PARAM(ap),💩PARAM(x), -💩💩💩💩💩💩💩💩💩💩💩💩PARAM(incx),💩PARAM(beta),💩PARAM(y),💩PARAM(incy)); - -💩💩ThenBlasImpl, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩const💩DeviceMemory>💩&, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩const💩DeviceMemory>💩&,💩int, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩std::complex,💩DeviceMemory>💩*, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩int>💩impl; -💩💩return💩impl(this,💩&blas::BlasSupport::DoBlasHpmv,💩uplo,💩n,💩alpha,💩ap,💩x,💩incx, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩beta,💩y,💩incy); -} - -Stream💩&Stream::ThenBlasHpmv(blas::UpperLower💩uplo,💩uint64💩n, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩std::complex💩alpha, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩const💩DeviceMemory>💩&ap, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩const💩DeviceMemory>💩&x, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩int💩incx,💩std::complex💩beta, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩DeviceMemory>💩*y,💩int💩incy)💩{ -💩💩VLOG_CALL(PARAM(uplo),💩PARAM(n),💩PARAM(alpha),💩PARAM(ap),💩PARAM(x), -💩💩💩💩💩💩💩💩💩💩💩💩PARAM(incx),💩PARAM(beta),💩PARAM(y),💩PARAM(incy)); - -💩💩ThenBlasImpl, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩const💩DeviceMemory>💩&, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩const💩DeviceMemory>💩&,💩int, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩std::complex,💩DeviceMemory>💩*, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩int>💩impl; -💩💩return💩impl(this,💩&blas::BlasSupport::DoBlasHpmv,💩uplo,💩n,💩alpha,💩ap,💩x,💩incx, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩beta,💩y,💩incy); -} - -Stream💩&Stream::ThenBlasHpr(blas::UpperLower💩uplo,💩uint64💩n,💩float💩alpha, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩const💩DeviceMemory>💩&x, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩int💩incx,💩DeviceMemory>💩*ap)💩{ -💩💩VLOG_CALL(PARAM(uplo),💩PARAM(n),💩PARAM(alpha),💩PARAM(x),💩PARAM(incx), -💩💩💩💩💩💩💩💩💩💩💩💩PARAM(ap)); - -💩💩ThenBlasImpl>💩&,💩int, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩DeviceMemory>💩*>💩impl; -💩💩return💩impl(this,💩&blas::BlasSupport::DoBlasHpr,💩uplo,💩n,💩alpha,💩x,💩incx,💩ap); -} - -Stream💩&Stream::ThenBlasHpr(blas::UpperLower💩uplo,💩uint64💩n,💩double💩alpha, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩const💩DeviceMemory>💩&x, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩int💩incx,💩DeviceMemory>💩*ap)💩{ -💩💩VLOG_CALL(PARAM(uplo),💩PARAM(n),💩PARAM(alpha),💩PARAM(x),💩PARAM(incx), -💩💩💩💩💩💩💩💩💩💩💩💩PARAM(ap)); - -💩💩ThenBlasImpl>💩&,💩int, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩DeviceMemory>💩*>💩impl; -💩💩return💩impl(this,💩&blas::BlasSupport::DoBlasHpr,💩uplo,💩n,💩alpha,💩x,💩incx,💩ap); -} - -Stream💩&Stream::ThenBlasHpr2(blas::UpperLower💩uplo,💩uint64💩n, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩std::complex💩alpha, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩const💩DeviceMemory>💩&x, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩int💩incx, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩const💩DeviceMemory>💩&y, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩int💩incy,💩DeviceMemory>💩*ap)💩{ -💩💩VLOG_CALL(PARAM(uplo),💩PARAM(n),💩PARAM(alpha),💩PARAM(x),💩PARAM(incx), -💩💩💩💩💩💩💩💩💩💩💩💩PARAM(y),💩PARAM(incy),💩PARAM(ap)); - -💩💩ThenBlasImpl, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩const💩DeviceMemory>💩&,💩int, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩const💩DeviceMemory>💩&,💩int, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩DeviceMemory>💩*>💩impl; -💩💩return💩impl(this,💩&blas::BlasSupport::DoBlasHpr2,💩uplo,💩n,💩alpha,💩x,💩incx,💩y, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩incy,💩ap); -} - -Stream💩&Stream::ThenBlasHpr2(blas::UpperLower💩uplo,💩uint64💩n, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩std::complex💩alpha, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩const💩DeviceMemory>💩&x, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩int💩incx, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩const💩DeviceMemory>💩&y, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩int💩incy,💩DeviceMemory>💩*ap)💩{ -💩💩VLOG_CALL(PARAM(uplo),💩PARAM(n),💩PARAM(alpha),💩PARAM(x),💩PARAM(incx), -💩💩💩💩💩💩💩💩💩💩💩💩PARAM(y),💩PARAM(incy),💩PARAM(ap)); - -💩💩ThenBlasImpl, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩const💩DeviceMemory>💩&,💩int, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩const💩DeviceMemory>💩&,💩int, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩DeviceMemory>💩*>💩impl; -💩💩return💩impl(this,💩&blas::BlasSupport::DoBlasHpr2,💩uplo,💩n,💩alpha,💩x,💩incx,💩y, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩incy,💩ap); -} - -Stream💩&Stream::ThenBlasSbmv(blas::UpperLower💩uplo,💩uint64💩n,💩uint64💩k, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩float💩alpha,💩const💩DeviceMemory💩&a,💩int💩lda, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩const💩DeviceMemory💩&x,💩int💩incx,💩float💩beta, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩DeviceMemory💩*y,💩int💩incy)💩{ -💩💩VLOG_CALL(PARAM(uplo),💩PARAM(n),💩PARAM(k),💩PARAM(alpha),💩PARAM(a),💩PARAM(lda), -💩💩💩💩💩💩💩💩💩💩💩💩PARAM(x),💩PARAM(incx),💩PARAM(beta),💩PARAM(y),💩PARAM(incy)); - -💩💩ThenBlasImpl💩&,💩int,💩const💩DeviceMemory💩&, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩int,💩float,💩DeviceMemory💩*,💩int>💩impl; -💩💩return💩impl(this,💩&blas::BlasSupport::DoBlasSbmv,💩uplo,💩n,💩k,💩alpha,💩a,💩lda, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩x,💩incx,💩beta,💩y,💩incy); -} - -Stream💩&Stream::ThenBlasSbmv(blas::UpperLower💩uplo,💩uint64💩n,💩uint64💩k, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩double💩alpha,💩const💩DeviceMemory💩&a, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩int💩lda,💩const💩DeviceMemory💩&x,💩int💩incx, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩double💩beta,💩DeviceMemory💩*y,💩int💩incy)💩{ -💩💩VLOG_CALL(PARAM(uplo),💩PARAM(n),💩PARAM(k),💩PARAM(alpha),💩PARAM(a),💩PARAM(lda), -💩💩💩💩💩💩💩💩💩💩💩💩PARAM(x),💩PARAM(incx),💩PARAM(beta),💩PARAM(y),💩PARAM(incy)); - -💩💩ThenBlasImpl💩&,💩int,💩const💩DeviceMemory💩&, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩int,💩double,💩DeviceMemory💩*,💩int>💩impl; -💩💩return💩impl(this,💩&blas::BlasSupport::DoBlasSbmv,💩uplo,💩n,💩k,💩alpha,💩a,💩lda, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩x,💩incx,💩beta,💩y,💩incy); -} - -Stream💩&Stream::ThenBlasSpmv(blas::UpperLower💩uplo,💩uint64💩n,💩float💩alpha, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩const💩DeviceMemory💩&ap, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩const💩DeviceMemory💩&x,💩int💩incx,💩float💩beta, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩DeviceMemory💩*y,💩int💩incy)💩{ -💩💩VLOG_CALL(PARAM(uplo),💩PARAM(n),💩PARAM(alpha),💩PARAM(ap),💩PARAM(x), -💩💩💩💩💩💩💩💩💩💩💩💩PARAM(incx),💩PARAM(beta),💩PARAM(y),💩PARAM(incy)); - -💩💩ThenBlasImpl💩&, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩const💩DeviceMemory💩&,💩int,💩float,💩DeviceMemory💩*, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩int>💩impl; -💩💩return💩impl(this,💩&blas::BlasSupport::DoBlasSpmv,💩uplo,💩n,💩alpha,💩ap,💩x,💩incx, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩beta,💩y,💩incy); -} - -Stream💩&Stream::ThenBlasSpmv(blas::UpperLower💩uplo,💩uint64💩n,💩double💩alpha, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩const💩DeviceMemory💩&ap, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩const💩DeviceMemory💩&x,💩int💩incx, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩double💩beta,💩DeviceMemory💩*y,💩int💩incy)💩{ -💩💩VLOG_CALL(PARAM(uplo),💩PARAM(n),💩PARAM(alpha),💩PARAM(ap),💩PARAM(x), -💩💩💩💩💩💩💩💩💩💩💩💩PARAM(incx),💩PARAM(beta),💩PARAM(y),💩PARAM(incy)); - -💩💩ThenBlasImpl💩&, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩const💩DeviceMemory💩&,💩int,💩double, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩DeviceMemory💩*,💩int>💩impl; -💩💩return💩impl(this,💩&blas::BlasSupport::DoBlasSpmv,💩uplo,💩n,💩alpha,💩ap,💩x,💩incx, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩beta,💩y,💩incy); -} - -Stream💩&Stream::ThenBlasSpr(blas::UpperLower💩uplo,💩uint64💩n,💩float💩alpha, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩const💩DeviceMemory💩&x,💩int💩incx, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩DeviceMemory💩*ap)💩{ -💩💩VLOG_CALL(PARAM(uplo),💩PARAM(n),💩PARAM(alpha),💩PARAM(x),💩PARAM(incx), -💩💩💩💩💩💩💩💩💩💩💩💩PARAM(ap)); - -💩💩ThenBlasImpl💩&, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩int,💩DeviceMemory💩*>💩impl; -💩💩return💩impl(this,💩&blas::BlasSupport::DoBlasSpr,💩uplo,💩n,💩alpha,💩x,💩incx,💩ap); -} - -Stream💩&Stream::ThenBlasSpr(blas::UpperLower💩uplo,💩uint64💩n,💩double💩alpha, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩const💩DeviceMemory💩&x,💩int💩incx, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩DeviceMemory💩*ap)💩{ -💩💩VLOG_CALL(PARAM(uplo),💩PARAM(n),💩PARAM(alpha),💩PARAM(x),💩PARAM(incx), -💩💩💩💩💩💩💩💩💩💩💩💩PARAM(ap)); - -💩💩ThenBlasImpl💩&, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩int,💩DeviceMemory💩*>💩impl; -💩💩return💩impl(this,💩&blas::BlasSupport::DoBlasSpr,💩uplo,💩n,💩alpha,💩x,💩incx,💩ap); -} - -Stream💩&Stream::ThenBlasSpr2(blas::UpperLower💩uplo,💩uint64💩n,💩float💩alpha, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩const💩DeviceMemory💩&x,💩int💩incx, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩const💩DeviceMemory💩&y,💩int💩incy, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩DeviceMemory💩*ap)💩{ -💩💩VLOG_CALL(PARAM(uplo),💩PARAM(n),💩PARAM(alpha),💩PARAM(x),💩PARAM(incx), -💩💩💩💩💩💩💩💩💩💩💩💩PARAM(y),💩PARAM(incy),💩PARAM(ap)); - -💩💩ThenBlasImpl💩&, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩int,💩const💩DeviceMemory💩&,💩int, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩DeviceMemory💩*>💩impl; -💩💩return💩impl(this,💩&blas::BlasSupport::DoBlasSpr2,💩uplo,💩n,💩alpha,💩x,💩incx,💩y, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩incy,💩ap); -} - -Stream💩&Stream::ThenBlasSpr2(blas::UpperLower💩uplo,💩uint64💩n,💩double💩alpha, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩const💩DeviceMemory💩&x,💩int💩incx, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩const💩DeviceMemory💩&y,💩int💩incy, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩DeviceMemory💩*ap)💩{ -💩💩VLOG_CALL(PARAM(uplo),💩PARAM(n),💩PARAM(alpha),💩PARAM(x),💩PARAM(incx), -💩💩💩💩💩💩💩💩💩💩💩💩PARAM(y),💩PARAM(incy),💩PARAM(ap)); - -💩💩ThenBlasImpl💩&, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩int,💩const💩DeviceMemory💩&,💩int, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩DeviceMemory💩*>💩impl; -💩💩return💩impl(this,💩&blas::BlasSupport::DoBlasSpr2,💩uplo,💩n,💩alpha,💩x,💩incx,💩y, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩incy,💩ap); -} - -Stream💩&Stream::ThenBlasSymv(blas::UpperLower💩uplo,💩uint64💩n,💩float💩alpha, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩const💩DeviceMemory💩&a,💩int💩lda, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩const💩DeviceMemory💩&x,💩int💩incx,💩float💩beta, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩DeviceMemory💩*y,💩int💩incy)💩{ -💩💩VLOG_CALL(PARAM(uplo),💩PARAM(n),💩PARAM(alpha),💩PARAM(a),💩PARAM(lda),💩PARAM(x), -💩💩💩💩💩💩💩💩💩💩💩💩PARAM(incx),💩PARAM(beta),💩PARAM(y),💩PARAM(incy)); - -💩💩ThenBlasImpl💩&, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩int,💩const💩DeviceMemory💩&,💩int,💩float, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩DeviceMemory💩*,💩int>💩impl; -💩💩return💩impl(this,💩&blas::BlasSupport::DoBlasSymv,💩uplo,💩n,💩alpha,💩a,💩lda,💩x, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩incx,💩beta,💩y,💩incy); -} - -Stream💩&Stream::ThenBlasSymv(blas::UpperLower💩uplo,💩uint64💩n,💩double💩alpha, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩const💩DeviceMemory💩&a,💩int💩lda, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩const💩DeviceMemory💩&x,💩int💩incx, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩double💩beta,💩DeviceMemory💩*y,💩int💩incy)💩{ -💩💩VLOG_CALL(PARAM(uplo),💩PARAM(n),💩PARAM(alpha),💩PARAM(a),💩PARAM(lda),💩PARAM(x), -💩💩💩💩💩💩💩💩💩💩💩💩PARAM(incx),💩PARAM(beta),💩PARAM(y),💩PARAM(incy)); - -💩💩ThenBlasImpl💩&, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩int,💩const💩DeviceMemory💩&,💩int,💩double, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩DeviceMemory💩*,💩int>💩impl; -💩💩return💩impl(this,💩&blas::BlasSupport::DoBlasSymv,💩uplo,💩n,💩alpha,💩a,💩lda,💩x, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩incx,💩beta,💩y,💩incy); -} - -Stream💩&Stream::ThenBlasSyr(blas::UpperLower💩uplo,💩uint64💩n,💩float💩alpha, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩const💩DeviceMemory💩&x,💩int💩incx, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩DeviceMemory💩*a,💩int💩lda)💩{ -💩💩VLOG_CALL(PARAM(uplo),💩PARAM(n),💩PARAM(alpha),💩PARAM(x),💩PARAM(incx), -💩💩💩💩💩💩💩💩💩💩💩💩PARAM(a),💩PARAM(lda)); - -💩💩ThenBlasImpl💩&, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩int,💩DeviceMemory💩*,💩int>💩impl; -💩💩return💩impl(this,💩&blas::BlasSupport::DoBlasSyr,💩uplo,💩n,💩alpha,💩x,💩incx,💩a, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩lda); -} - -Stream💩&Stream::ThenBlasSyr(blas::UpperLower💩uplo,💩uint64💩n,💩double💩alpha, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩const💩DeviceMemory💩&x,💩int💩incx, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩DeviceMemory💩*a,💩int💩lda)💩{ -💩💩VLOG_CALL(PARAM(uplo),💩PARAM(n),💩PARAM(alpha),💩PARAM(x),💩PARAM(incx), -💩💩💩💩💩💩💩💩💩💩💩💩PARAM(a),💩PARAM(lda)); - -💩💩ThenBlasImpl💩&, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩int,💩DeviceMemory💩*,💩int>💩impl; -💩💩return💩impl(this,💩&blas::BlasSupport::DoBlasSyr,💩uplo,💩n,💩alpha,💩x,💩incx,💩a, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩lda); -} - -Stream💩&Stream::ThenBlasSyr2(blas::UpperLower💩uplo,💩uint64💩n,💩float💩alpha, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩const💩DeviceMemory💩&x,💩int💩incx, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩const💩DeviceMemory💩&y,💩int💩incy, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩DeviceMemory💩*a,💩int💩lda)💩{ -💩💩VLOG_CALL(PARAM(uplo),💩PARAM(n),💩PARAM(alpha),💩PARAM(x),💩PARAM(incx), -💩💩💩💩💩💩💩💩💩💩💩💩PARAM(y),💩PARAM(incy),💩PARAM(a),💩PARAM(lda)); - -💩💩ThenBlasImpl💩&, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩int,💩const💩DeviceMemory💩&,💩int,💩DeviceMemory💩*, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩int>💩impl; -💩💩return💩impl(this,💩&blas::BlasSupport::DoBlasSyr2,💩uplo,💩n,💩alpha,💩x,💩incx,💩y, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩incy,💩a,💩lda); -} - -Stream💩&Stream::ThenBlasSyr2(blas::UpperLower💩uplo,💩uint64💩n,💩double💩alpha, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩const💩DeviceMemory💩&x,💩int💩incx, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩const💩DeviceMemory💩&y,💩int💩incy, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩DeviceMemory💩*a,💩int💩lda)💩{ -💩💩VLOG_CALL(PARAM(uplo),💩PARAM(n),💩PARAM(alpha),💩PARAM(x),💩PARAM(incx), -💩💩💩💩💩💩💩💩💩💩💩💩PARAM(y),💩PARAM(incy),💩PARAM(a),💩PARAM(lda)); - -💩💩ThenBlasImpl💩&, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩int,💩const💩DeviceMemory💩&,💩int,💩DeviceMemory💩*, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩int>💩impl; -💩💩return💩impl(this,💩&blas::BlasSupport::DoBlasSyr2,💩uplo,💩n,💩alpha,💩x,💩incx,💩y, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩incy,💩a,💩lda); -} - -Stream💩&Stream::ThenBlasTbmv(blas::UpperLower💩uplo,💩blas::Transpose💩trans, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩blas::Diagonal💩diag,💩uint64💩n,💩uint64💩k, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩const💩DeviceMemory💩&a,💩int💩lda, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩DeviceMemory💩*x,💩int💩incx)💩{ -💩💩VLOG_CALL(PARAM(uplo),💩PARAM(trans),💩PARAM(diag),💩PARAM(n),💩PARAM(k), -💩💩💩💩💩💩💩💩💩💩💩💩PARAM(a),💩PARAM(lda),💩PARAM(x),💩PARAM(incx)); - -💩💩ThenBlasImpl💩&,💩int,💩DeviceMemory💩*, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩int>💩impl; -💩💩return💩impl(this,💩&blas::BlasSupport::DoBlasTbmv,💩uplo,💩trans,💩diag,💩n,💩k,💩a, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩lda,💩x,💩incx); -} - -Stream💩&Stream::ThenBlasTbmv(blas::UpperLower💩uplo,💩blas::Transpose💩trans, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩blas::Diagonal💩diag,💩uint64💩n,💩uint64💩k, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩const💩DeviceMemory💩&a,💩int💩lda, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩DeviceMemory💩*x,💩int💩incx)💩{ -💩💩VLOG_CALL(PARAM(uplo),💩PARAM(trans),💩PARAM(diag),💩PARAM(n),💩PARAM(k), -💩💩💩💩💩💩💩💩💩💩💩💩PARAM(a),💩PARAM(lda),💩PARAM(x),💩PARAM(incx)); - -💩💩ThenBlasImpl💩&,💩int, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩DeviceMemory💩*,💩int>💩impl; -💩💩return💩impl(this,💩&blas::BlasSupport::DoBlasTbmv,💩uplo,💩trans,💩diag,💩n,💩k,💩a, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩lda,💩x,💩incx); -} - -Stream💩&Stream::ThenBlasTbmv(blas::UpperLower💩uplo,💩blas::Transpose💩trans, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩blas::Diagonal💩diag,💩uint64💩n,💩uint64💩k, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩const💩DeviceMemory>💩&a, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩int💩lda,💩DeviceMemory>💩*x, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩int💩incx)💩{ -💩💩VLOG_CALL(PARAM(uplo),💩PARAM(trans),💩PARAM(diag),💩PARAM(n),💩PARAM(k), -💩💩💩💩💩💩💩💩💩💩💩💩PARAM(a),💩PARAM(lda),💩PARAM(x),💩PARAM(incx)); - -💩💩ThenBlasImpl>💩&,💩int, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩DeviceMemory>💩*,💩int>💩impl; -💩💩return💩impl(this,💩&blas::BlasSupport::DoBlasTbmv,💩uplo,💩trans,💩diag,💩n,💩k,💩a, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩lda,💩x,💩incx); -} - -Stream💩&Stream::ThenBlasTbmv(blas::UpperLower💩uplo,💩blas::Transpose💩trans, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩blas::Diagonal💩diag,💩uint64💩n,💩uint64💩k, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩const💩DeviceMemory>💩&a, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩int💩lda,💩DeviceMemory>💩*x, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩int💩incx)💩{ -💩💩VLOG_CALL(PARAM(uplo),💩PARAM(trans),💩PARAM(diag),💩PARAM(n),💩PARAM(k), -💩💩💩💩💩💩💩💩💩💩💩💩PARAM(a),💩PARAM(lda),💩PARAM(x),💩PARAM(incx)); - -💩💩ThenBlasImpl>💩&,💩int, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩DeviceMemory>💩*,💩int>💩impl; -💩💩return💩impl(this,💩&blas::BlasSupport::DoBlasTbmv,💩uplo,💩trans,💩diag,💩n,💩k,💩a, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩lda,💩x,💩incx); -} - -Stream💩&Stream::ThenBlasTbsv(blas::UpperLower💩uplo,💩blas::Transpose💩trans, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩blas::Diagonal💩diag,💩uint64💩n,💩uint64💩k, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩const💩DeviceMemory💩&a,💩int💩lda, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩DeviceMemory💩*x,💩int💩incx)💩{ -💩💩VLOG_CALL(PARAM(uplo),💩PARAM(trans),💩PARAM(diag),💩PARAM(n),💩PARAM(k), -💩💩💩💩💩💩💩💩💩💩💩💩PARAM(a),💩PARAM(lda),💩PARAM(x),💩PARAM(incx)); - -💩💩ThenBlasImpl💩&,💩int,💩DeviceMemory💩*, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩int>💩impl; -💩💩return💩impl(this,💩&blas::BlasSupport::DoBlasTbsv,💩uplo,💩trans,💩diag,💩n,💩k,💩a, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩lda,💩x,💩incx); -} - -Stream💩&Stream::ThenBlasTbsv(blas::UpperLower💩uplo,💩blas::Transpose💩trans, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩blas::Diagonal💩diag,💩uint64💩n,💩uint64💩k, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩const💩DeviceMemory💩&a,💩int💩lda, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩DeviceMemory💩*x,💩int💩incx)💩{ -💩💩VLOG_CALL(PARAM(uplo),💩PARAM(trans),💩PARAM(diag),💩PARAM(n),💩PARAM(k), -💩💩💩💩💩💩💩💩💩💩💩💩PARAM(a),💩PARAM(lda),💩PARAM(x),💩PARAM(incx)); - -💩💩ThenBlasImpl💩&,💩int, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩DeviceMemory💩*,💩int>💩impl; -💩💩return💩impl(this,💩&blas::BlasSupport::DoBlasTbsv,💩uplo,💩trans,💩diag,💩n,💩k,💩a, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩lda,💩x,💩incx); -} - -Stream💩&Stream::ThenBlasTbsv(blas::UpperLower💩uplo,💩blas::Transpose💩trans, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩blas::Diagonal💩diag,💩uint64💩n,💩uint64💩k, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩const💩DeviceMemory>💩&a, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩int💩lda,💩DeviceMemory>💩*x, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩int💩incx)💩{ -💩💩VLOG_CALL(PARAM(uplo),💩PARAM(trans),💩PARAM(diag),💩PARAM(n),💩PARAM(k), -💩💩💩💩💩💩💩💩💩💩💩💩PARAM(a),💩PARAM(lda),💩PARAM(x),💩PARAM(incx)); - -💩💩ThenBlasImpl>💩&,💩int, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩DeviceMemory>💩*,💩int>💩impl; -💩💩return💩impl(this,💩&blas::BlasSupport::DoBlasTbsv,💩uplo,💩trans,💩diag,💩n,💩k,💩a, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩lda,💩x,💩incx); -} - -Stream💩&Stream::ThenBlasTbsv(blas::UpperLower💩uplo,💩blas::Transpose💩trans, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩blas::Diagonal💩diag,💩uint64💩n,💩uint64💩k, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩const💩DeviceMemory>💩&a, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩int💩lda,💩DeviceMemory>💩*x, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩int💩incx)💩{ -💩💩VLOG_CALL(PARAM(uplo),💩PARAM(trans),💩PARAM(diag),💩PARAM(n),💩PARAM(k), -💩💩💩💩💩💩💩💩💩💩💩💩PARAM(a),💩PARAM(lda),💩PARAM(x),💩PARAM(incx)); - -💩💩ThenBlasImpl>💩&,💩int, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩DeviceMemory>💩*,💩int>💩impl; -💩💩return💩impl(this,💩&blas::BlasSupport::DoBlasTbsv,💩uplo,💩trans,💩diag,💩n,💩k,💩a, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩lda,💩x,💩incx); -} - -Stream💩&Stream::ThenBlasTpmv(blas::UpperLower💩uplo,💩blas::Transpose💩trans, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩blas::Diagonal💩diag,💩uint64💩n, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩const💩DeviceMemory💩&ap, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩DeviceMemory💩*x,💩int💩incx)💩{ -💩💩VLOG_CALL(PARAM(uplo),💩PARAM(trans),💩PARAM(diag),💩PARAM(n),💩PARAM(ap), -💩💩💩💩💩💩💩💩💩💩💩💩PARAM(x),💩PARAM(incx)); - -💩💩ThenBlasImpl💩&,💩DeviceMemory💩*,💩int>💩impl; -💩💩return💩impl(this,💩&blas::BlasSupport::DoBlasTpmv,💩uplo,💩trans,💩diag,💩n,💩ap,💩x, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩incx); -} - -Stream💩&Stream::ThenBlasTpmv(blas::UpperLower💩uplo,💩blas::Transpose💩trans, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩blas::Diagonal💩diag,💩uint64💩n, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩const💩DeviceMemory💩&ap, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩DeviceMemory💩*x,💩int💩incx)💩{ -💩💩VLOG_CALL(PARAM(uplo),💩PARAM(trans),💩PARAM(diag),💩PARAM(n),💩PARAM(ap), -💩💩💩💩💩💩💩💩💩💩💩💩PARAM(x),💩PARAM(incx)); - -💩💩ThenBlasImpl💩&,💩DeviceMemory💩*,💩int>💩impl; -💩💩return💩impl(this,💩&blas::BlasSupport::DoBlasTpmv,💩uplo,💩trans,💩diag,💩n,💩ap,💩x, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩incx); -} - -Stream💩&Stream::ThenBlasTpmv(blas::UpperLower💩uplo,💩blas::Transpose💩trans, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩blas::Diagonal💩diag,💩uint64💩n, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩const💩DeviceMemory>💩&ap, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩DeviceMemory>💩*x,💩int💩incx)💩{ -💩💩VLOG_CALL(PARAM(uplo),💩PARAM(trans),💩PARAM(diag),💩PARAM(n),💩PARAM(ap), -💩💩💩💩💩💩💩💩💩💩💩💩PARAM(x),💩PARAM(incx)); - -💩💩ThenBlasImpl>💩&, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩DeviceMemory>💩*,💩int>💩impl; -💩💩return💩impl(this,💩&blas::BlasSupport::DoBlasTpmv,💩uplo,💩trans,💩diag,💩n,💩ap,💩x, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩incx); -} - -Stream💩&Stream::ThenBlasTpmv(blas::UpperLower💩uplo,💩blas::Transpose💩trans, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩blas::Diagonal💩diag,💩uint64💩n, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩const💩DeviceMemory>💩&ap, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩DeviceMemory>💩*x,💩int💩incx)💩{ -💩💩VLOG_CALL(PARAM(uplo),💩PARAM(trans),💩PARAM(diag),💩PARAM(n),💩PARAM(ap), -💩💩💩💩💩💩💩💩💩💩💩💩PARAM(x),💩PARAM(incx)); - -💩💩ThenBlasImpl>💩&, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩DeviceMemory>💩*,💩int>💩impl; -💩💩return💩impl(this,💩&blas::BlasSupport::DoBlasTpmv,💩uplo,💩trans,💩diag,💩n,💩ap,💩x, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩incx); -} - -Stream💩&Stream::ThenBlasTpsv(blas::UpperLower💩uplo,💩blas::Transpose💩trans, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩blas::Diagonal💩diag,💩uint64💩n, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩const💩DeviceMemory💩&ap, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩DeviceMemory💩*x,💩int💩incx)💩{ -💩💩VLOG_CALL(PARAM(uplo),💩PARAM(trans),💩PARAM(diag),💩PARAM(n),💩PARAM(ap), -💩💩💩💩💩💩💩💩💩💩💩💩PARAM(x),💩PARAM(incx)); - -💩💩ThenBlasImpl💩&,💩DeviceMemory💩*,💩int>💩impl; -💩💩return💩impl(this,💩&blas::BlasSupport::DoBlasTpsv,💩uplo,💩trans,💩diag,💩n,💩ap,💩x, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩incx); -} - -Stream💩&Stream::ThenBlasTpsv(blas::UpperLower💩uplo,💩blas::Transpose💩trans, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩blas::Diagonal💩diag,💩uint64💩n, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩const💩DeviceMemory💩&ap, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩DeviceMemory💩*x,💩int💩incx)💩{ -💩💩VLOG_CALL(PARAM(uplo),💩PARAM(trans),💩PARAM(diag),💩PARAM(n),💩PARAM(ap), -💩💩💩💩💩💩💩💩💩💩💩💩PARAM(x),💩PARAM(incx)); - -💩💩ThenBlasImpl💩&,💩DeviceMemory💩*,💩int>💩impl; -💩💩return💩impl(this,💩&blas::BlasSupport::DoBlasTpsv,💩uplo,💩trans,💩diag,💩n,💩ap,💩x, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩incx); -} - -Stream💩&Stream::ThenBlasTpsv(blas::UpperLower💩uplo,💩blas::Transpose💩trans, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩blas::Diagonal💩diag,💩uint64💩n, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩const💩DeviceMemory>💩&ap, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩DeviceMemory>💩*x,💩int💩incx)💩{ -💩💩VLOG_CALL(PARAM(uplo),💩PARAM(trans),💩PARAM(diag),💩PARAM(n),💩PARAM(ap), -💩💩💩💩💩💩💩💩💩💩💩💩PARAM(x),💩PARAM(incx)); - -💩💩ThenBlasImpl>💩&, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩DeviceMemory>💩*,💩int>💩impl; -💩💩return💩impl(this,💩&blas::BlasSupport::DoBlasTpsv,💩uplo,💩trans,💩diag,💩n,💩ap,💩x, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩incx); -} - -Stream💩&Stream::ThenBlasTpsv(blas::UpperLower💩uplo,💩blas::Transpose💩trans, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩blas::Diagonal💩diag,💩uint64💩n, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩const💩DeviceMemory>💩&ap, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩DeviceMemory>💩*x,💩int💩incx)💩{ -💩💩VLOG_CALL(PARAM(uplo),💩PARAM(trans),💩PARAM(diag),💩PARAM(n),💩PARAM(ap), -💩💩💩💩💩💩💩💩💩💩💩💩PARAM(x),💩PARAM(incx)); - -💩💩ThenBlasImpl>💩&, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩DeviceMemory>💩*,💩int>💩impl; -💩💩return💩impl(this,💩&blas::BlasSupport::DoBlasTpsv,💩uplo,💩trans,💩diag,💩n,💩ap,💩x, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩incx); -} - -Stream💩&Stream::ThenBlasTrmv(blas::UpperLower💩uplo,💩blas::Transpose💩trans, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩blas::Diagonal💩diag,💩uint64💩n, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩const💩DeviceMemory💩&a,💩int💩lda, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩DeviceMemory💩*x,💩int💩incx)💩{ -💩💩VLOG_CALL(PARAM(uplo),💩PARAM(trans),💩PARAM(diag),💩PARAM(n),💩PARAM(a), -💩💩💩💩💩💩💩💩💩💩💩💩PARAM(lda),💩PARAM(x),💩PARAM(incx)); - -💩💩ThenBlasImpl💩&,💩int,💩DeviceMemory💩*, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩int>💩impl; -💩💩return💩impl(this,💩&blas::BlasSupport::DoBlasTrmv,💩uplo,💩trans,💩diag,💩n,💩a, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩lda,💩x,💩incx); -} - -Stream💩&Stream::ThenBlasTrmv(blas::UpperLower💩uplo,💩blas::Transpose💩trans, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩blas::Diagonal💩diag,💩uint64💩n, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩const💩DeviceMemory💩&a,💩int💩lda, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩DeviceMemory💩*x,💩int💩incx)💩{ -💩💩VLOG_CALL(PARAM(uplo),💩PARAM(trans),💩PARAM(diag),💩PARAM(n),💩PARAM(a), -💩💩💩💩💩💩💩💩💩💩💩💩PARAM(lda),💩PARAM(x),💩PARAM(incx)); - -💩💩ThenBlasImpl💩&,💩int,💩DeviceMemory💩*, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩int>💩impl; -💩💩return💩impl(this,💩&blas::BlasSupport::DoBlasTrmv,💩uplo,💩trans,💩diag,💩n,💩a, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩lda,💩x,💩incx); -} - -Stream💩&Stream::ThenBlasTrmv(blas::UpperLower💩uplo,💩blas::Transpose💩trans, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩blas::Diagonal💩diag,💩uint64💩n, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩const💩DeviceMemory>💩&a, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩int💩lda,💩DeviceMemory>💩*x, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩int💩incx)💩{ -💩💩VLOG_CALL(PARAM(uplo),💩PARAM(trans),💩PARAM(diag),💩PARAM(n),💩PARAM(a), -💩💩💩💩💩💩💩💩💩💩💩💩PARAM(lda),💩PARAM(x),💩PARAM(incx)); - -💩💩ThenBlasImpl>💩&,💩int, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩DeviceMemory>💩*,💩int>💩impl; -💩💩return💩impl(this,💩&blas::BlasSupport::DoBlasTrmv,💩uplo,💩trans,💩diag,💩n,💩a, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩lda,💩x,💩incx); -} - -Stream💩&Stream::ThenBlasTrmv(blas::UpperLower💩uplo,💩blas::Transpose💩trans, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩blas::Diagonal💩diag,💩uint64💩n, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩const💩DeviceMemory>💩&a, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩int💩lda,💩DeviceMemory>💩*x, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩int💩incx)💩{ -💩💩VLOG_CALL(PARAM(uplo),💩PARAM(trans),💩PARAM(diag),💩PARAM(n),💩PARAM(a), -💩💩💩💩💩💩💩💩💩💩💩💩PARAM(lda),💩PARAM(x),💩PARAM(incx)); - -💩💩ThenBlasImpl>💩&,💩int, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩DeviceMemory>💩*,💩int>💩impl; -💩💩return💩impl(this,💩&blas::BlasSupport::DoBlasTrmv,💩uplo,💩trans,💩diag,💩n,💩a, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩lda,💩x,💩incx); -} - -Stream💩&Stream::ThenBlasTrsv(blas::UpperLower💩uplo,💩blas::Transpose💩trans, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩blas::Diagonal💩diag,💩uint64💩n, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩const💩DeviceMemory💩&a,💩int💩lda, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩DeviceMemory💩*x,💩int💩incx)💩{ -💩💩VLOG_CALL(PARAM(uplo),💩PARAM(trans),💩PARAM(diag),💩PARAM(n),💩PARAM(a), -💩💩💩💩💩💩💩💩💩💩💩💩PARAM(lda),💩PARAM(x),💩PARAM(incx)); - -💩💩ThenBlasImpl💩&,💩int,💩DeviceMemory💩*, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩int>💩impl; -💩💩return💩impl(this,💩&blas::BlasSupport::DoBlasTrsv,💩uplo,💩trans,💩diag,💩n,💩a, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩lda,💩x,💩incx); -} - -Stream💩&Stream::ThenBlasTrsv(blas::UpperLower💩uplo,💩blas::Transpose💩trans, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩blas::Diagonal💩diag,💩uint64💩n, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩const💩DeviceMemory💩&a,💩int💩lda, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩DeviceMemory💩*x,💩int💩incx)💩{ -💩💩VLOG_CALL(PARAM(uplo),💩PARAM(trans),💩PARAM(diag),💩PARAM(n),💩PARAM(a), -💩💩💩💩💩💩💩💩💩💩💩💩PARAM(lda),💩PARAM(x),💩PARAM(incx)); - -💩💩ThenBlasImpl💩&,💩int,💩DeviceMemory💩*, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩int>💩impl; -💩💩return💩impl(this,💩&blas::BlasSupport::DoBlasTrsv,💩uplo,💩trans,💩diag,💩n,💩a, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩lda,💩x,💩incx); -} - -Stream💩&Stream::ThenBlasTrsv(blas::UpperLower💩uplo,💩blas::Transpose💩trans, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩blas::Diagonal💩diag,💩uint64💩n, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩const💩DeviceMemory>💩&a, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩int💩lda,💩DeviceMemory>💩*x, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩int💩incx)💩{ -💩💩VLOG_CALL(PARAM(uplo),💩PARAM(trans),💩PARAM(diag),💩PARAM(n),💩PARAM(a), -💩💩💩💩💩💩💩💩💩💩💩💩PARAM(lda),💩PARAM(x),💩PARAM(incx)); - -💩💩ThenBlasImpl>💩&,💩int, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩DeviceMemory>💩*,💩int>💩impl; -💩💩return💩impl(this,💩&blas::BlasSupport::DoBlasTrsv,💩uplo,💩trans,💩diag,💩n,💩a, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩lda,💩x,💩incx); -} - -Stream💩&Stream::ThenBlasTrsv(blas::UpperLower💩uplo,💩blas::Transpose💩trans, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩blas::Diagonal💩diag,💩uint64💩n, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩const💩DeviceMemory>💩&a, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩int💩lda,💩DeviceMemory>💩*x, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩int💩incx)💩{ -💩💩VLOG_CALL(PARAM(uplo),💩PARAM(trans),💩PARAM(diag),💩PARAM(n),💩PARAM(a), -💩💩💩💩💩💩💩💩💩💩💩💩PARAM(lda),💩PARAM(x),💩PARAM(incx)); - -💩💩ThenBlasImpl>💩&,💩int, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩DeviceMemory>💩*,💩int>💩impl; -💩💩return💩impl(this,💩&blas::BlasSupport::DoBlasTrsv,💩uplo,💩trans,💩diag,💩n,💩a, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩lda,💩x,💩incx); -} - -Stream💩&Stream::ThenBlasGemm(blas::Transpose💩transa,💩blas::Transpose💩transb, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩uint64💩m,💩uint64💩n,💩uint64💩k,💩float💩alpha, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩const💩DeviceMemory💩&a,💩int💩lda, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩const💩DeviceMemory💩&b,💩int💩ldb,💩float💩beta, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩DeviceMemory💩*c,💩int💩ldc)💩{ -💩💩VLOG_CALL(PARAM(transa),💩PARAM(transb),💩PARAM(m),💩PARAM(n),💩PARAM(k), -💩💩💩💩💩💩💩💩💩💩💩💩PARAM(alpha),💩PARAM(a),💩PARAM(lda),💩PARAM(b),💩PARAM(ldb), -💩💩💩💩💩💩💩💩💩💩💩💩PARAM(beta),💩PARAM(c),💩PARAM(ldc)); - -💩💩ThenBlasImpl💩&,💩int,💩const💩DeviceMemory💩&, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩int,💩float,💩DeviceMemory💩*,💩int>💩impl; -💩💩return💩impl(this,💩&blas::BlasSupport::DoBlasGemm,💩transa,💩transb,💩m,💩n,💩k, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩alpha,💩a,💩lda,💩b,💩ldb,💩beta,💩c,💩ldc); -} - -Stream💩&Stream::ThenBlasGemm(blas::Transpose💩transa,💩blas::Transpose💩transb, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩uint64💩m,💩uint64💩n,💩uint64💩k,💩double💩alpha, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩const💩DeviceMemory💩&a,💩int💩lda, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩const💩DeviceMemory💩&b,💩int💩ldb, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩double💩beta,💩DeviceMemory💩*c,💩int💩ldc)💩{ -💩💩VLOG_CALL(PARAM(transa),💩PARAM(transb),💩PARAM(m),💩PARAM(n),💩PARAM(k), -💩💩💩💩💩💩💩💩💩💩💩💩PARAM(alpha),💩PARAM(a),💩PARAM(lda),💩PARAM(b),💩PARAM(ldb), -💩💩💩💩💩💩💩💩💩💩💩💩PARAM(beta),💩PARAM(c),💩PARAM(ldc)); - -💩💩ThenBlasImpl💩&,💩int,💩const💩DeviceMemory💩&, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩int,💩double,💩DeviceMemory💩*,💩int>💩impl; -💩💩return💩impl(this,💩&blas::BlasSupport::DoBlasGemm,💩transa,💩transb,💩m,💩n,💩k, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩alpha,💩a,💩lda,💩b,💩ldb,💩beta,💩c,💩ldc); -} - -Stream💩&Stream::ThenBlasGemm(blas::Transpose💩transa,💩blas::Transpose💩transb, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩uint64💩m,💩uint64💩n,💩uint64💩k, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩std::complex💩alpha, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩const💩DeviceMemory>💩&a, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩int💩lda, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩const💩DeviceMemory>💩&b, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩int💩ldb,💩std::complex💩beta, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩DeviceMemory>💩*c,💩int💩ldc)💩{ -💩💩VLOG_CALL(PARAM(transa),💩PARAM(transb),💩PARAM(m),💩PARAM(n),💩PARAM(k), -💩💩💩💩💩💩💩💩💩💩💩💩PARAM(alpha),💩PARAM(a),💩PARAM(lda),💩PARAM(b),💩PARAM(ldb), -💩💩💩💩💩💩💩💩💩💩💩💩PARAM(beta),💩PARAM(c),💩PARAM(ldc)); - -💩💩ThenBlasImpl,💩const💩DeviceMemory>💩&, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩int,💩const💩DeviceMemory>💩&,💩int, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩std::complex,💩DeviceMemory>💩*, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩int>💩impl; -💩💩return💩impl(this,💩&blas::BlasSupport::DoBlasGemm,💩transa,💩transb,💩m,💩n,💩k, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩alpha,💩a,💩lda,💩b,💩ldb,💩beta,💩c,💩ldc); -} - -Stream💩&Stream::ThenBlasGemm(blas::Transpose💩transa,💩blas::Transpose💩transb, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩uint64💩m,💩uint64💩n,💩uint64💩k, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩std::complex💩alpha, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩const💩DeviceMemory>💩&a, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩int💩lda, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩const💩DeviceMemory>💩&b, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩int💩ldb,💩std::complex💩beta, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩DeviceMemory>💩*c,💩int💩ldc)💩{ -💩💩VLOG_CALL(PARAM(transa),💩PARAM(transb),💩PARAM(m),💩PARAM(n),💩PARAM(k), -💩💩💩💩💩💩💩💩💩💩💩💩PARAM(alpha),💩PARAM(a),💩PARAM(lda),💩PARAM(b),💩PARAM(ldb), -💩💩💩💩💩💩💩💩💩💩💩💩PARAM(beta),💩PARAM(c),💩PARAM(ldc)); - -💩💩ThenBlasImpl,💩const💩DeviceMemory>💩&, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩int,💩const💩DeviceMemory>💩&,💩int, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩std::complex,💩DeviceMemory>💩*, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩int>💩impl; -💩💩return💩impl(this,💩&blas::BlasSupport::DoBlasGemm,💩transa,💩transb,💩m,💩n,💩k, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩alpha,💩a,💩lda,💩b,💩ldb,💩beta,💩c,💩ldc); -} - -Stream💩&Stream::ThenBlasHemm(blas::Side💩side,💩blas::UpperLower💩uplo,💩uint64💩m, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩uint64💩n,💩std::complex💩alpha, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩const💩DeviceMemory>💩&a, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩int💩lda, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩const💩DeviceMemory>💩&b, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩int💩ldb,💩std::complex💩beta, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩DeviceMemory>💩*c,💩int💩ldc)💩{ -💩💩VLOG_CALL(PARAM(side),💩PARAM(uplo),💩PARAM(m),💩PARAM(n),💩PARAM(alpha), -💩💩💩💩💩💩💩💩💩💩💩💩PARAM(a),💩PARAM(lda),💩PARAM(b),💩PARAM(ldb),💩PARAM(beta),💩PARAM(c), -💩💩💩💩💩💩💩💩💩💩💩💩PARAM(ldc)); - -💩💩ThenBlasImpl,💩const💩DeviceMemory>💩&, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩int,💩const💩DeviceMemory>💩&,💩int, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩std::complex,💩DeviceMemory>💩*, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩int>💩impl; -💩💩return💩impl(this,💩&blas::BlasSupport::DoBlasHemm,💩side,💩uplo,💩m,💩n,💩alpha,💩a, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩lda,💩b,💩ldb,💩beta,💩c,💩ldc); -} - -Stream💩&Stream::ThenBlasHemm(blas::Side💩side,💩blas::UpperLower💩uplo,💩uint64💩m, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩uint64💩n,💩std::complex💩alpha, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩const💩DeviceMemory>💩&a, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩int💩lda, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩const💩DeviceMemory>💩&b, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩int💩ldb,💩std::complex💩beta, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩DeviceMemory>💩*c,💩int💩ldc)💩{ -💩💩VLOG_CALL(PARAM(side),💩PARAM(uplo),💩PARAM(m),💩PARAM(n),💩PARAM(alpha), -💩💩💩💩💩💩💩💩💩💩💩💩PARAM(a),💩PARAM(lda),💩PARAM(b),💩PARAM(ldb),💩PARAM(beta),💩PARAM(c), -💩💩💩💩💩💩💩💩💩💩💩💩PARAM(ldc)); - -💩💩ThenBlasImpl,💩const💩DeviceMemory>💩&, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩int,💩const💩DeviceMemory>💩&,💩int, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩std::complex,💩DeviceMemory>💩*, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩int>💩impl; -💩💩return💩impl(this,💩&blas::BlasSupport::DoBlasHemm,💩side,💩uplo,💩m,💩n,💩alpha,💩a, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩lda,💩b,💩ldb,💩beta,💩c,💩ldc); -} - -Stream💩&Stream::ThenBlasHerk(blas::UpperLower💩uplo,💩blas::Transpose💩trans, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩uint64💩n,💩uint64💩k,💩float💩alpha, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩const💩DeviceMemory>💩&a, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩int💩lda,💩float💩beta, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩DeviceMemory>💩*c,💩int💩ldc)💩{ -💩💩VLOG_CALL(PARAM(uplo),💩PARAM(trans),💩PARAM(n),💩PARAM(k),💩PARAM(alpha), -💩💩💩💩💩💩💩💩💩💩💩💩PARAM(a),💩PARAM(lda),💩PARAM(beta),💩PARAM(c),💩PARAM(ldc)); - -💩💩ThenBlasImpl>💩&,💩int,💩float, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩DeviceMemory>💩*,💩int>💩impl; -💩💩return💩impl(this,💩&blas::BlasSupport::DoBlasHerk,💩uplo,💩trans,💩n,💩k,💩alpha,💩a, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩lda,💩beta,💩c,💩ldc); -} - -Stream💩&Stream::ThenBlasHerk(blas::UpperLower💩uplo,💩blas::Transpose💩trans, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩uint64💩n,💩uint64💩k,💩double💩alpha, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩const💩DeviceMemory>💩&a, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩int💩lda,💩double💩beta, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩DeviceMemory>💩*c,💩int💩ldc)💩{ -💩💩VLOG_CALL(PARAM(uplo),💩PARAM(trans),💩PARAM(n),💩PARAM(k),💩PARAM(alpha), -💩💩💩💩💩💩💩💩💩💩💩💩PARAM(a),💩PARAM(lda),💩PARAM(beta),💩PARAM(c),💩PARAM(ldc)); - -💩💩ThenBlasImpl>💩&,💩int,💩double, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩DeviceMemory>💩*,💩int>💩impl; -💩💩return💩impl(this,💩&blas::BlasSupport::DoBlasHerk,💩uplo,💩trans,💩n,💩k,💩alpha,💩a, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩lda,💩beta,💩c,💩ldc); -} - -Stream💩&Stream::ThenBlasHer2k(blas::UpperLower💩uplo,💩blas::Transpose💩trans, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩uint64💩n,💩uint64💩k,💩std::complex💩alpha, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩const💩DeviceMemory>💩&a, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩int💩lda, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩const💩DeviceMemory>💩&b, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩int💩ldb,💩float💩beta, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩DeviceMemory>💩*c,💩int💩ldc)💩{ -💩💩VLOG_CALL(PARAM(uplo),💩PARAM(trans),💩PARAM(n),💩PARAM(k),💩PARAM(alpha), -💩💩💩💩💩💩💩💩💩💩💩💩PARAM(a),💩PARAM(lda),💩PARAM(b),💩PARAM(ldb),💩PARAM(beta),💩PARAM(c), -💩💩💩💩💩💩💩💩💩💩💩💩PARAM(ldc)); - -💩💩ThenBlasImpl,💩const💩DeviceMemory>💩&, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩int,💩const💩DeviceMemory>💩&,💩int,💩float, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩DeviceMemory>💩*,💩int>💩impl; -💩💩return💩impl(this,💩&blas::BlasSupport::DoBlasHer2k,💩uplo,💩trans,💩n,💩k,💩alpha, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩a,💩lda,💩b,💩ldb,💩beta,💩c,💩ldc); -} - -Stream💩&Stream::ThenBlasHer2k(blas::UpperLower💩uplo,💩blas::Transpose💩trans, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩uint64💩n,💩uint64💩k,💩std::complex💩alpha, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩const💩DeviceMemory>💩&a, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩int💩lda, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩const💩DeviceMemory>💩&b, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩int💩ldb,💩double💩beta, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩DeviceMemory>💩*c,💩int💩ldc)💩{ -💩💩VLOG_CALL(PARAM(uplo),💩PARAM(trans),💩PARAM(n),💩PARAM(k),💩PARAM(alpha), -💩💩💩💩💩💩💩💩💩💩💩💩PARAM(a),💩PARAM(lda),💩PARAM(b),💩PARAM(ldb),💩PARAM(beta),💩PARAM(c), -💩💩💩💩💩💩💩💩💩💩💩💩PARAM(ldc)); - -💩💩ThenBlasImpl,💩const💩DeviceMemory>💩&, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩int,💩const💩DeviceMemory>💩&,💩int,💩double, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩DeviceMemory>💩*,💩int>💩impl; -💩💩return💩impl(this,💩&blas::BlasSupport::DoBlasHer2k,💩uplo,💩trans,💩n,💩k,💩alpha, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩a,💩lda,💩b,💩ldb,💩beta,💩c,💩ldc); -} - -Stream💩&Stream::ThenBlasSymm(blas::Side💩side,💩blas::UpperLower💩uplo,💩uint64💩m, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩uint64💩n,💩float💩alpha, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩const💩DeviceMemory💩&a,💩int💩lda, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩const💩DeviceMemory💩&b,💩int💩ldb,💩float💩beta, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩DeviceMemory💩*c,💩int💩ldc)💩{ -💩💩VLOG_CALL(PARAM(side),💩PARAM(uplo),💩PARAM(m),💩PARAM(n),💩PARAM(alpha), -💩💩💩💩💩💩💩💩💩💩💩💩PARAM(a),💩PARAM(lda),💩PARAM(b),💩PARAM(ldb),💩PARAM(beta),💩PARAM(c), -💩💩💩💩💩💩💩💩💩💩💩💩PARAM(ldc)); - -💩💩ThenBlasImpl💩&,💩int,💩const💩DeviceMemory💩&, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩int,💩float,💩DeviceMemory💩*,💩int>💩impl; -💩💩return💩impl(this,💩&blas::BlasSupport::DoBlasSymm,💩side,💩uplo,💩m,💩n,💩alpha,💩a, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩lda,💩b,💩ldb,💩beta,💩c,💩ldc); -} - -Stream💩&Stream::ThenBlasSymm(blas::Side💩side,💩blas::UpperLower💩uplo,💩uint64💩m, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩uint64💩n,💩double💩alpha, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩const💩DeviceMemory💩&a,💩int💩lda, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩const💩DeviceMemory💩&b,💩int💩ldb, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩double💩beta,💩DeviceMemory💩*c,💩int💩ldc)💩{ -💩💩VLOG_CALL(PARAM(side),💩PARAM(uplo),💩PARAM(m),💩PARAM(n),💩PARAM(alpha), -💩💩💩💩💩💩💩💩💩💩💩💩PARAM(a),💩PARAM(lda),💩PARAM(b),💩PARAM(ldb),💩PARAM(beta),💩PARAM(c), -💩💩💩💩💩💩💩💩💩💩💩💩PARAM(ldc)); - -💩💩ThenBlasImpl💩&,💩int,💩const💩DeviceMemory💩&, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩int,💩double,💩DeviceMemory💩*,💩int>💩impl; -💩💩return💩impl(this,💩&blas::BlasSupport::DoBlasSymm,💩side,💩uplo,💩m,💩n,💩alpha,💩a, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩lda,💩b,💩ldb,💩beta,💩c,💩ldc); -} - -Stream💩&Stream::ThenBlasSymm(blas::Side💩side,💩blas::UpperLower💩uplo,💩uint64💩m, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩uint64💩n,💩std::complex💩alpha, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩const💩DeviceMemory>💩&a, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩int💩lda, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩const💩DeviceMemory>💩&b, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩int💩ldb,💩std::complex💩beta, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩DeviceMemory>💩*c,💩int💩ldc)💩{ -💩💩VLOG_CALL(PARAM(side),💩PARAM(uplo),💩PARAM(m),💩PARAM(n),💩PARAM(alpha), -💩💩💩💩💩💩💩💩💩💩💩💩PARAM(a),💩PARAM(lda),💩PARAM(b),💩PARAM(ldb),💩PARAM(beta),💩PARAM(c), -💩💩💩💩💩💩💩💩💩💩💩💩PARAM(ldc)); - -💩💩ThenBlasImpl,💩const💩DeviceMemory>💩&, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩int,💩const💩DeviceMemory>💩&,💩int, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩std::complex,💩DeviceMemory>💩*, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩int>💩impl; -💩💩return💩impl(this,💩&blas::BlasSupport::DoBlasSymm,💩side,💩uplo,💩m,💩n,💩alpha,💩a, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩lda,💩b,💩ldb,💩beta,💩c,💩ldc); -} - -Stream💩&Stream::ThenBlasSymm(blas::Side💩side,💩blas::UpperLower💩uplo,💩uint64💩m, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩uint64💩n,💩std::complex💩alpha, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩const💩DeviceMemory>💩&a, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩int💩lda, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩const💩DeviceMemory>💩&b, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩int💩ldb,💩std::complex💩beta, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩DeviceMemory>💩*c,💩int💩ldc)💩{ -💩💩VLOG_CALL(PARAM(side),💩PARAM(uplo),💩PARAM(m),💩PARAM(n),💩PARAM(alpha), -💩💩💩💩💩💩💩💩💩💩💩💩PARAM(a),💩PARAM(lda),💩PARAM(b),💩PARAM(ldb),💩PARAM(beta),💩PARAM(c), -💩💩💩💩💩💩💩💩💩💩💩💩PARAM(ldc)); - -💩💩ThenBlasImpl,💩const💩DeviceMemory>💩&, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩int,💩const💩DeviceMemory>💩&,💩int, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩std::complex,💩DeviceMemory>💩*, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩int>💩impl; -💩💩return💩impl(this,💩&blas::BlasSupport::DoBlasSymm,💩side,💩uplo,💩m,💩n,💩alpha,💩a, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩lda,💩b,💩ldb,💩beta,💩c,💩ldc); -} - -Stream💩&Stream::ThenBlasSyrk(blas::UpperLower💩uplo,💩blas::Transpose💩trans, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩uint64💩n,💩uint64💩k,💩float💩alpha, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩const💩DeviceMemory💩&a,💩int💩lda,💩float💩beta, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩DeviceMemory💩*c,💩int💩ldc)💩{ -💩💩VLOG_CALL(PARAM(uplo),💩PARAM(trans),💩PARAM(n),💩PARAM(k),💩PARAM(alpha), -💩💩💩💩💩💩💩💩💩💩💩💩PARAM(a),💩PARAM(lda),💩PARAM(beta),💩PARAM(c),💩PARAM(ldc)); - -💩💩ThenBlasImpl💩&,💩int,💩float,💩DeviceMemory💩*, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩int>💩impl; -💩💩return💩impl(this,💩&blas::BlasSupport::DoBlasSyrk,💩uplo,💩trans,💩n,💩k,💩alpha,💩a, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩lda,💩beta,💩c,💩ldc); -} - -Stream💩&Stream::ThenBlasSyrk(blas::UpperLower💩uplo,💩blas::Transpose💩trans, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩uint64💩n,💩uint64💩k,💩double💩alpha, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩const💩DeviceMemory💩&a,💩int💩lda, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩double💩beta,💩DeviceMemory💩*c,💩int💩ldc)💩{ -💩💩VLOG_CALL(PARAM(uplo),💩PARAM(trans),💩PARAM(n),💩PARAM(k),💩PARAM(alpha), -💩💩💩💩💩💩💩💩💩💩💩💩PARAM(a),💩PARAM(lda),💩PARAM(beta),💩PARAM(c),💩PARAM(ldc)); - -💩💩ThenBlasImpl💩&,💩int,💩double, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩DeviceMemory💩*,💩int>💩impl; -💩💩return💩impl(this,💩&blas::BlasSupport::DoBlasSyrk,💩uplo,💩trans,💩n,💩k,💩alpha,💩a, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩lda,💩beta,💩c,💩ldc); -} - -Stream💩&Stream::ThenBlasSyrk(blas::UpperLower💩uplo,💩blas::Transpose💩trans, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩uint64💩n,💩uint64💩k,💩std::complex💩alpha, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩const💩DeviceMemory>💩&a, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩int💩lda,💩std::complex💩beta, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩DeviceMemory>💩*c,💩int💩ldc)💩{ -💩💩VLOG_CALL(PARAM(uplo),💩PARAM(trans),💩PARAM(n),💩PARAM(k),💩PARAM(alpha), -💩💩💩💩💩💩💩💩💩💩💩💩PARAM(a),💩PARAM(lda),💩PARAM(beta),💩PARAM(c),💩PARAM(ldc)); - -💩💩ThenBlasImpl,💩const💩DeviceMemory>💩&, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩int,💩std::complex,💩DeviceMemory>💩*, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩int>💩impl; -💩💩return💩impl(this,💩&blas::BlasSupport::DoBlasSyrk,💩uplo,💩trans,💩n,💩k,💩alpha,💩a, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩lda,💩beta,💩c,💩ldc); -} - -Stream💩&Stream::ThenBlasSyrk(blas::UpperLower💩uplo,💩blas::Transpose💩trans, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩uint64💩n,💩uint64💩k,💩std::complex💩alpha, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩const💩DeviceMemory>💩&a, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩int💩lda,💩std::complex💩beta, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩DeviceMemory>💩*c,💩int💩ldc)💩{ -💩💩VLOG_CALL(PARAM(uplo),💩PARAM(trans),💩PARAM(n),💩PARAM(k),💩PARAM(alpha), -💩💩💩💩💩💩💩💩💩💩💩💩PARAM(a),💩PARAM(lda),💩PARAM(beta),💩PARAM(c),💩PARAM(ldc)); - -💩💩ThenBlasImpl,💩const💩DeviceMemory>💩&, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩int,💩std::complex,💩DeviceMemory>💩*, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩int>💩impl; -💩💩return💩impl(this,💩&blas::BlasSupport::DoBlasSyrk,💩uplo,💩trans,💩n,💩k,💩alpha,💩a, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩lda,💩beta,💩c,💩ldc); -} - -Stream💩&Stream::ThenBlasSyr2k(blas::UpperLower💩uplo,💩blas::Transpose💩trans, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩uint64💩n,💩uint64💩k,💩float💩alpha, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩const💩DeviceMemory💩&a,💩int💩lda, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩const💩DeviceMemory💩&b,💩int💩ldb,💩float💩beta, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩DeviceMemory💩*c,💩int💩ldc)💩{ -💩💩VLOG_CALL(PARAM(uplo),💩PARAM(trans),💩PARAM(n),💩PARAM(k),💩PARAM(alpha), -💩💩💩💩💩💩💩💩💩💩💩💩PARAM(a),💩PARAM(lda),💩PARAM(b),💩PARAM(ldb),💩PARAM(beta),💩PARAM(c), -💩💩💩💩💩💩💩💩💩💩💩💩PARAM(ldc)); - -💩💩ThenBlasImpl💩&,💩int,💩const💩DeviceMemory💩&, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩int,💩float,💩DeviceMemory💩*,💩int>💩impl; -💩💩return💩impl(this,💩&blas::BlasSupport::DoBlasSyr2k,💩uplo,💩trans,💩n,💩k,💩alpha, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩a,💩lda,💩b,💩ldb,💩beta,💩c,💩ldc); -} - -Stream💩&Stream::ThenBlasSyr2k(blas::UpperLower💩uplo,💩blas::Transpose💩trans, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩uint64💩n,💩uint64💩k,💩double💩alpha, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩const💩DeviceMemory💩&a,💩int💩lda, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩const💩DeviceMemory💩&b,💩int💩ldb, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩double💩beta,💩DeviceMemory💩*c,💩int💩ldc)💩{ -💩💩VLOG_CALL(PARAM(uplo),💩PARAM(trans),💩PARAM(n),💩PARAM(k),💩PARAM(alpha), -💩💩💩💩💩💩💩💩💩💩💩💩PARAM(a),💩PARAM(lda),💩PARAM(b),💩PARAM(ldb),💩PARAM(beta),💩PARAM(c), -💩💩💩💩💩💩💩💩💩💩💩💩PARAM(ldc)); - -💩💩ThenBlasImpl💩&,💩int,💩const💩DeviceMemory💩&, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩int,💩double,💩DeviceMemory💩*,💩int>💩impl; -💩💩return💩impl(this,💩&blas::BlasSupport::DoBlasSyr2k,💩uplo,💩trans,💩n,💩k,💩alpha, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩a,💩lda,💩b,💩ldb,💩beta,💩c,💩ldc); -} - -Stream💩&Stream::ThenBlasSyr2k(blas::UpperLower💩uplo,💩blas::Transpose💩trans, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩uint64💩n,💩uint64💩k,💩std::complex💩alpha, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩const💩DeviceMemory>💩&a, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩int💩lda, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩const💩DeviceMemory>💩&b, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩int💩ldb,💩std::complex💩beta, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩DeviceMemory>💩*c,💩int💩ldc)💩{ -💩💩VLOG_CALL(PARAM(uplo),💩PARAM(trans),💩PARAM(n),💩PARAM(k),💩PARAM(alpha), -💩💩💩💩💩💩💩💩💩💩💩💩PARAM(a),💩PARAM(lda),💩PARAM(b),💩PARAM(ldb),💩PARAM(beta),💩PARAM(c), -💩💩💩💩💩💩💩💩💩💩💩💩PARAM(ldc)); - -💩💩ThenBlasImpl,💩const💩DeviceMemory>💩&, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩int,💩const💩DeviceMemory>💩&,💩int, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩std::complex,💩DeviceMemory>💩*, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩int>💩impl; -💩💩return💩impl(this,💩&blas::BlasSupport::DoBlasSyr2k,💩uplo,💩trans,💩n,💩k,💩alpha, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩a,💩lda,💩b,💩ldb,💩beta,💩c,💩ldc); -} - -Stream💩&Stream::ThenBlasSyr2k(blas::UpperLower💩uplo,💩blas::Transpose💩trans, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩uint64💩n,💩uint64💩k,💩std::complex💩alpha, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩const💩DeviceMemory>💩&a, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩int💩lda, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩const💩DeviceMemory>💩&b, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩int💩ldb,💩std::complex💩beta, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩DeviceMemory>💩*c,💩int💩ldc)💩{ -💩💩VLOG_CALL(PARAM(uplo),💩PARAM(trans),💩PARAM(n),💩PARAM(k),💩PARAM(alpha), -💩💩💩💩💩💩💩💩💩💩💩💩PARAM(a),💩PARAM(lda),💩PARAM(b),💩PARAM(ldb),💩PARAM(beta),💩PARAM(c), -💩💩💩💩💩💩💩💩💩💩💩💩PARAM(ldc)); - -💩💩ThenBlasImpl,💩const💩DeviceMemory>💩&, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩int,💩const💩DeviceMemory>💩&,💩int, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩std::complex,💩DeviceMemory>💩*, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩int>💩impl; -💩💩return💩impl(this,💩&blas::BlasSupport::DoBlasSyr2k,💩uplo,💩trans,💩n,💩k,💩alpha, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩a,💩lda,💩b,💩ldb,💩beta,💩c,💩ldc); -} - -Stream💩&Stream::ThenBlasTrmm(blas::Side💩side,💩blas::UpperLower💩uplo, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩blas::Transpose💩transa,💩blas::Diagonal💩diag, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩uint64💩m,💩uint64💩n,💩float💩alpha, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩const💩DeviceMemory💩&a,💩int💩lda, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩DeviceMemory💩*b,💩int💩ldb)💩{ -💩💩VLOG_CALL(PARAM(side),💩PARAM(uplo),💩PARAM(transa),💩PARAM(diag),💩PARAM(m), -💩💩💩💩💩💩💩💩💩💩💩💩PARAM(n),💩PARAM(alpha),💩PARAM(a),💩PARAM(lda),💩PARAM(b),💩PARAM(ldb)); - -💩💩ThenBlasImpl💩&,💩int, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩DeviceMemory💩*,💩int>💩impl; -💩💩return💩impl(this,💩&blas::BlasSupport::DoBlasTrmm,💩side,💩uplo,💩transa,💩diag,💩m, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩n,💩alpha,💩a,💩lda,💩b,💩ldb); -} - -Stream💩&Stream::ThenBlasTrmm(blas::Side💩side,💩blas::UpperLower💩uplo, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩blas::Transpose💩transa,💩blas::Diagonal💩diag, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩uint64💩m,💩uint64💩n,💩double💩alpha, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩const💩DeviceMemory💩&a,💩int💩lda, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩DeviceMemory💩*b,💩int💩ldb)💩{ -💩💩VLOG_CALL(PARAM(side),💩PARAM(uplo),💩PARAM(transa),💩PARAM(diag),💩PARAM(m), -💩💩💩💩💩💩💩💩💩💩💩💩PARAM(n),💩PARAM(alpha),💩PARAM(a),💩PARAM(lda),💩PARAM(b),💩PARAM(ldb)); - -💩💩ThenBlasImpl💩&,💩int, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩DeviceMemory💩*,💩int>💩impl; -💩💩return💩impl(this,💩&blas::BlasSupport::DoBlasTrmm,💩side,💩uplo,💩transa,💩diag,💩m, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩n,💩alpha,💩a,💩lda,💩b,💩ldb); -} - -Stream💩&Stream::ThenBlasTrmm(blas::Side💩side,💩blas::UpperLower💩uplo, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩blas::Transpose💩transa,💩blas::Diagonal💩diag, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩uint64💩m,💩uint64💩n,💩std::complex💩alpha, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩const💩DeviceMemory>💩&a, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩int💩lda,💩DeviceMemory>💩*b, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩int💩ldb)💩{ -💩💩VLOG_CALL(PARAM(side),💩PARAM(uplo),💩PARAM(transa),💩PARAM(diag),💩PARAM(m), -💩💩💩💩💩💩💩💩💩💩💩💩PARAM(n),💩PARAM(alpha),💩PARAM(a),💩PARAM(lda),💩PARAM(b),💩PARAM(ldb)); - -💩💩ThenBlasImpl, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩const💩DeviceMemory>💩&,💩int, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩DeviceMemory>💩*,💩int>💩impl; -💩💩return💩impl(this,💩&blas::BlasSupport::DoBlasTrmm,💩side,💩uplo,💩transa,💩diag,💩m, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩n,💩alpha,💩a,💩lda,💩b,💩ldb); -} - -Stream💩&Stream::ThenBlasTrmm(blas::Side💩side,💩blas::UpperLower💩uplo, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩blas::Transpose💩transa,💩blas::Diagonal💩diag, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩uint64💩m,💩uint64💩n,💩std::complex💩alpha, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩const💩DeviceMemory>💩&a, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩int💩lda,💩DeviceMemory>💩*b, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩int💩ldb)💩{ -💩💩VLOG_CALL(PARAM(side),💩PARAM(uplo),💩PARAM(transa),💩PARAM(diag),💩PARAM(m), -💩💩💩💩💩💩💩💩💩💩💩💩PARAM(n),💩PARAM(alpha),💩PARAM(a),💩PARAM(lda),💩PARAM(b),💩PARAM(ldb)); - -💩💩ThenBlasImpl, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩const💩DeviceMemory>💩&,💩int, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩DeviceMemory>💩*,💩int>💩impl; -💩💩return💩impl(this,💩&blas::BlasSupport::DoBlasTrmm,💩side,💩uplo,💩transa,💩diag,💩m, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩n,💩alpha,💩a,💩lda,💩b,💩ldb); -} - -Stream💩&Stream::ThenBlasTrsm(blas::Side💩side,💩blas::UpperLower💩uplo, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩blas::Transpose💩transa,💩blas::Diagonal💩diag, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩uint64💩m,💩uint64💩n,💩float💩alpha, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩const💩DeviceMemory💩&a,💩int💩lda, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩DeviceMemory💩*b,💩int💩ldb)💩{ -💩💩VLOG_CALL(PARAM(side),💩PARAM(uplo),💩PARAM(transa),💩PARAM(diag),💩PARAM(m), -💩💩💩💩💩💩💩💩💩💩💩💩PARAM(n),💩PARAM(alpha),💩PARAM(a),💩PARAM(lda),💩PARAM(b),💩PARAM(ldb)); - -💩💩ThenBlasImpl💩&,💩int, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩DeviceMemory💩*,💩int>💩impl; -💩💩return💩impl(this,💩&blas::BlasSupport::DoBlasTrsm,💩side,💩uplo,💩transa,💩diag,💩m, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩n,💩alpha,💩a,💩lda,💩b,💩ldb); -} - -Stream💩&Stream::ThenBlasTrsm(blas::Side💩side,💩blas::UpperLower💩uplo, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩blas::Transpose💩transa,💩blas::Diagonal💩diag, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩uint64💩m,💩uint64💩n,💩double💩alpha, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩const💩DeviceMemory💩&a,💩int💩lda, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩DeviceMemory💩*b,💩int💩ldb)💩{ -💩💩VLOG_CALL(PARAM(side),💩PARAM(uplo),💩PARAM(transa),💩PARAM(diag),💩PARAM(m), -💩💩💩💩💩💩💩💩💩💩💩💩PARAM(n),💩PARAM(alpha),💩PARAM(a),💩PARAM(lda),💩PARAM(b),💩PARAM(ldb)); - -💩💩ThenBlasImpl💩&,💩int, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩DeviceMemory💩*,💩int>💩impl; -💩💩return💩impl(this,💩&blas::BlasSupport::DoBlasTrsm,💩side,💩uplo,💩transa,💩diag,💩m, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩n,💩alpha,💩a,💩lda,💩b,💩ldb); -} - -Stream💩&Stream::ThenBlasTrsm(blas::Side💩side,💩blas::UpperLower💩uplo, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩blas::Transpose💩transa,💩blas::Diagonal💩diag, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩uint64💩m,💩uint64💩n,💩std::complex💩alpha, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩const💩DeviceMemory>💩&a, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩int💩lda,💩DeviceMemory>💩*b, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩int💩ldb)💩{ -💩💩VLOG_CALL(PARAM(side),💩PARAM(uplo),💩PARAM(transa),💩PARAM(diag),💩PARAM(m), -💩💩💩💩💩💩💩💩💩💩💩💩PARAM(n),💩PARAM(alpha),💩PARAM(a),💩PARAM(lda),💩PARAM(b),💩PARAM(ldb)); - -💩💩ThenBlasImpl, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩const💩DeviceMemory>💩&,💩int, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩DeviceMemory>💩*,💩int>💩impl; -💩💩return💩impl(this,💩&blas::BlasSupport::DoBlasTrsm,💩side,💩uplo,💩transa,💩diag,💩m, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩n,💩alpha,💩a,💩lda,💩b,💩ldb); -} - -Stream💩&Stream::ThenBlasTrsm(blas::Side💩side,💩blas::UpperLower💩uplo, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩blas::Transpose💩transa,💩blas::Diagonal💩diag, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩uint64💩m,💩uint64💩n,💩std::complex💩alpha, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩const💩DeviceMemory>💩&a, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩int💩lda,💩DeviceMemory>💩*b, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩int💩ldb)💩{ -💩💩VLOG_CALL(PARAM(side),💩PARAM(uplo),💩PARAM(transa),💩PARAM(diag),💩PARAM(m), -💩💩💩💩💩💩💩💩💩💩💩💩PARAM(n),💩PARAM(alpha),💩PARAM(a),💩PARAM(lda),💩PARAM(b),💩PARAM(ldb)); - -💩💩ThenBlasImpl, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩const💩DeviceMemory>💩&,💩int, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩DeviceMemory>💩*,💩int>💩impl; -💩💩return💩impl(this,💩&blas::BlasSupport::DoBlasTrsm,💩side,💩uplo,💩transa,💩diag,💩m, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩n,💩alpha,💩a,💩lda,💩b,💩ldb); -} - -Stream💩&Stream::ThenBlasGemmBatched( -💩💩💩💩blas::Transpose💩transa,💩blas::Transpose💩transb,💩uint64💩m,💩uint64💩n, -💩💩💩💩uint64💩k,💩float💩alpha,💩const💩port::ArraySlice💩*>💩&a, -💩💩💩💩int💩lda,💩const💩port::ArraySlice💩*>💩&b,💩int💩ldb, -💩💩💩💩float💩beta,💩const💩port::ArraySlice💩*>💩&c,💩int💩ldc, -💩💩💩💩int💩batch_count)💩{ -💩💩return💩ThenBlasGemmBatchedWithScratch(transa,💩transb,💩m,💩n,💩k,💩alpha,💩a,💩lda, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩b,💩ldb,💩beta,💩c,💩ldc,💩batch_count, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩nullptr); -} - -Stream💩&Stream::ThenBlasGemmBatchedWithScratch( -💩💩💩💩blas::Transpose💩transa,💩blas::Transpose💩transb,💩uint64💩m,💩uint64💩n, -💩💩💩💩uint64💩k,💩float💩alpha,💩const💩port::ArraySlice💩*>💩&a, -💩💩💩💩int💩lda,💩const💩port::ArraySlice💩*>💩&b,💩int💩ldb, -💩💩💩💩float💩beta,💩const💩port::ArraySlice💩*>💩&c,💩int💩ldc, -💩💩💩💩int💩batch_count,💩ScratchAllocator💩*scratch_allocator)💩{ -💩💩VLOG_CALL(PARAM(transa),💩PARAM(transb),💩PARAM(m),💩PARAM(n),💩PARAM(k), -💩💩💩💩💩💩💩💩💩💩💩💩PARAM(alpha),💩PARAM(a),💩PARAM(lda),💩PARAM(b),💩PARAM(ldb), -💩💩💩💩💩💩💩💩💩💩💩💩PARAM(beta),💩PARAM(c),💩PARAM(ldc),💩PARAM(batch_count)); - -💩💩ThenBlasImpl💩*>💩&,💩int, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩const💩port::ArraySlice💩*>💩&,💩int,💩float, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩const💩port::ArraySlice💩*>💩&,💩int,💩int, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩ScratchAllocator💩*> -💩💩💩💩💩💩impl; -💩💩return💩impl(this,💩&blas::BlasSupport::DoBlasGemmBatched,💩transa,💩transb,💩m,💩n, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩k,💩alpha,💩a,💩lda,💩b,💩ldb,💩beta,💩c,💩ldc,💩batch_count, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩scratch_allocator); -} - -Stream💩&Stream::ThenBlasGemmBatched( -💩💩💩💩blas::Transpose💩transa,💩blas::Transpose💩transb,💩uint64💩m,💩uint64💩n, -💩💩💩💩uint64💩k,💩double💩alpha,💩const💩port::ArraySlice💩*>💩&a, -💩💩💩💩int💩lda,💩const💩port::ArraySlice💩*>💩&b,💩int💩ldb, -💩💩💩💩double💩beta,💩const💩port::ArraySlice💩*>💩&c,💩int💩ldc, -💩💩💩💩int💩batch_count)💩{ -💩💩return💩ThenBlasGemmBatchedWithScratch(transa,💩transb,💩m,💩n,💩k,💩alpha,💩a,💩lda, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩b,💩ldb,💩beta,💩c,💩ldc,💩batch_count, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩nullptr); -} - -Stream💩&Stream::ThenBlasGemmBatchedWithScratch( -💩💩💩💩blas::Transpose💩transa,💩blas::Transpose💩transb,💩uint64💩m,💩uint64💩n, -💩💩💩💩uint64💩k,💩double💩alpha,💩const💩port::ArraySlice💩*>💩&a, -💩💩💩💩int💩lda,💩const💩port::ArraySlice💩*>💩&b,💩int💩ldb, -💩💩💩💩double💩beta,💩const💩port::ArraySlice💩*>💩&c,💩int💩ldc, -💩💩💩💩int💩batch_count,💩ScratchAllocator💩*scratch_allocator)💩{ -💩💩VLOG_CALL(PARAM(transa),💩PARAM(transb),💩PARAM(m),💩PARAM(n),💩PARAM(k), -💩💩💩💩💩💩💩💩💩💩💩💩PARAM(alpha),💩PARAM(a),💩PARAM(lda),💩PARAM(b),💩PARAM(ldb), -💩💩💩💩💩💩💩💩💩💩💩💩PARAM(beta),💩PARAM(c),💩PARAM(ldc),💩PARAM(batch_count)); - -💩💩ThenBlasImpl💩*>💩&,💩int, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩const💩port::ArraySlice💩*>💩&,💩int,💩double, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩const💩port::ArraySlice💩*>💩&,💩int,💩int, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩ScratchAllocator💩*> -💩💩💩💩💩💩impl; -💩💩return💩impl(this,💩&blas::BlasSupport::DoBlasGemmBatched,💩transa,💩transb,💩m,💩n, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩k,💩alpha,💩a,💩lda,💩b,💩ldb,💩beta,💩c,💩ldc,💩batch_count, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩scratch_allocator); -} - -Stream💩&Stream::ThenBlasGemmBatched( -💩💩💩💩blas::Transpose💩transa,💩blas::Transpose💩transb,💩uint64💩m,💩uint64💩n, -💩💩💩💩uint64💩k,💩std::complex💩alpha, -💩💩💩💩const💩port::ArraySlice>💩*>💩&a,💩int💩lda, -💩💩💩💩const💩port::ArraySlice>💩*>💩&b,💩int💩ldb, -💩💩💩💩std::complex💩beta, -💩💩💩💩const💩port::ArraySlice>💩*>💩&c,💩int💩ldc, -💩💩💩💩int💩batch_count)💩{ -💩💩return💩ThenBlasGemmBatchedWithScratch(transa,💩transb,💩m,💩n,💩k,💩alpha,💩a,💩lda, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩b,💩ldb,💩beta,💩c,💩ldc,💩batch_count, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩nullptr); -} - -Stream💩&Stream::ThenBlasGemmBatchedWithScratch( -💩💩💩💩blas::Transpose💩transa,💩blas::Transpose💩transb,💩uint64💩m,💩uint64💩n, -💩💩💩💩uint64💩k,💩std::complex💩alpha, -💩💩💩💩const💩port::ArraySlice>💩*>💩&a,💩int💩lda, -💩💩💩💩const💩port::ArraySlice>💩*>💩&b,💩int💩ldb, -💩💩💩💩std::complex💩beta, -💩💩💩💩const💩port::ArraySlice>💩*>💩&c,💩int💩ldc, -💩💩💩💩int💩batch_count,💩ScratchAllocator💩*scratch_allocator)💩{ -💩💩VLOG_CALL(PARAM(transa),💩PARAM(transb),💩PARAM(m),💩PARAM(n),💩PARAM(k), -💩💩💩💩💩💩💩💩💩💩💩💩PARAM(alpha),💩PARAM(a),💩PARAM(lda),💩PARAM(b),💩PARAM(ldb), -💩💩💩💩💩💩💩💩💩💩💩💩PARAM(beta),💩PARAM(c),💩PARAM(ldc),💩PARAM(batch_count)); - -💩💩ThenBlasImpl, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩const💩port::ArraySlice>💩*>💩&, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩int, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩const💩port::ArraySlice>💩*>💩&, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩int,💩std::complex, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩const💩port::ArraySlice>💩*>💩&, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩int,💩int,💩ScratchAllocator💩*> -💩💩💩💩💩💩impl; -💩💩return💩impl(this,💩&blas::BlasSupport::DoBlasGemmBatched,💩transa,💩transb,💩m,💩n, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩k,💩alpha,💩a,💩lda,💩b,💩ldb,💩beta,💩c,💩ldc,💩batch_count, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩scratch_allocator); -} - -Stream💩&Stream::ThenBlasGemmBatched( -💩💩💩💩blas::Transpose💩transa,💩blas::Transpose💩transb,💩uint64💩m,💩uint64💩n, -💩💩💩💩uint64💩k,💩std::complex💩alpha, -💩💩💩💩const💩port::ArraySlice>💩*>💩&a,💩int💩lda, -💩💩💩💩const💩port::ArraySlice>💩*>💩&b,💩int💩ldb, -💩💩💩💩std::complex💩beta, -💩💩💩💩const💩port::ArraySlice>💩*>💩&c,💩int💩ldc, -💩💩💩💩int💩batch_count)💩{ -💩💩return💩ThenBlasGemmBatchedWithScratch(transa,💩transb,💩m,💩n,💩k,💩alpha,💩a,💩lda, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩b,💩ldb,💩beta,💩c,💩ldc,💩batch_count, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩nullptr); -} - -Stream💩&Stream::ThenBlasGemmBatchedWithScratch( -💩💩💩💩blas::Transpose💩transa,💩blas::Transpose💩transb,💩uint64💩m,💩uint64💩n, -💩💩💩💩uint64💩k,💩std::complex💩alpha, -💩💩💩💩const💩port::ArraySlice>💩*>💩&a,💩int💩lda, -💩💩💩💩const💩port::ArraySlice>💩*>💩&b,💩int💩ldb, -💩💩💩💩std::complex💩beta, -💩💩💩💩const💩port::ArraySlice>💩*>💩&c,💩int💩ldc, -💩💩💩💩int💩batch_count,💩ScratchAllocator💩*scratch_allocator)💩{ -💩💩VLOG_CALL(PARAM(transa),💩PARAM(transb),💩PARAM(m),💩PARAM(n),💩PARAM(k), -💩💩💩💩💩💩💩💩💩💩💩💩PARAM(alpha),💩PARAM(a),💩PARAM(lda),💩PARAM(b),💩PARAM(ldb), -💩💩💩💩💩💩💩💩💩💩💩💩PARAM(beta),💩PARAM(c),💩PARAM(ldc),💩PARAM(batch_count)); - -💩💩ThenBlasImpl, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩const💩port::ArraySlice>💩*>💩&, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩int, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩const💩port::ArraySlice>💩*>💩&, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩int,💩std::complex, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩const💩port::ArraySlice>💩*>💩&, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩int,💩int,💩ScratchAllocator💩*> -💩💩💩💩💩💩impl; -💩💩return💩impl(this,💩&blas::BlasSupport::DoBlasGemmBatched,💩transa,💩transb,💩m,💩n, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩k,💩alpha,💩a,💩lda,💩b,💩ldb,💩beta,💩c,💩ldc,💩batch_count, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩scratch_allocator); -} - -Stream💩&Stream::ThenSetRngSeed(const💩uint8💩*seed,💩uint64💩seed_bytes)💩{ -💩💩VLOG_CALL(PARAM(seed),💩PARAM(seed_bytes)); - -💩💩if💩(ok())💩{ -💩💩💩💩if💩(rng::RngSupport💩*rng💩=💩parent_->AsRng())💩{ -💩💩💩💩💩💩CheckError(rng->SetSeed(this,💩seed,💩seed_bytes)); -💩💩💩💩}💩else💩{ -💩💩💩💩💩💩SetError(); -💩💩💩💩💩💩LOG(INFO)💩<<💩"stream💩"💩<<💩this💩<<💩"💩unable💩to💩initialize💩RNG"; -💩💩💩💩} -💩💩}💩else💩{ -💩💩💩💩LOG(INFO)💩<<💩"stream💩"💩<<💩this -💩💩💩💩💩💩💩💩💩💩💩💩💩💩<<💩"💩did💩not💩set💩RNG💩seed:💩"💩<<💩static_cast(seed) -💩💩💩💩💩💩💩💩💩💩💩💩💩💩<<💩";💩bytes:💩"💩<<💩seed_bytes; -💩💩} -💩💩return💩*this; -} - -Stream💩&Stream::ThenPopulateRandUniform(DeviceMemory💩*values)💩{ -💩💩VLOG_CALL(PARAM(values)); - -💩💩if💩(ok())💩{ -💩💩💩💩if💩(rng::RngSupport💩*rng💩=💩parent_->AsRng())💩{ -💩💩💩💩💩💩CheckError(rng->DoPopulateRandUniform(this,💩values)); -💩💩💩💩}💩else💩{ -💩💩💩💩💩💩SetError(); -💩💩💩💩💩💩LOG(INFO)💩<<💩"attempting💩to💩perform💩RNG💩operation💩using💩StreamExecutor💩" -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩"without💩RNG💩support."; -💩💩💩💩} -💩💩} -💩💩return💩*this; -} - -Stream💩&Stream::ThenPopulateRandGaussian(float💩mean,💩float💩sd, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩DeviceMemory💩*values)💩{ -💩💩VLOG_CALL(PARAM(mean),💩PARAM(sd),💩PARAM(values)); - -💩💩if💩(ok())💩{ -💩💩💩💩if💩(rng::RngSupport💩*rng💩=💩parent_->AsRng())💩{ -💩💩💩💩💩💩CheckError(rng->DoPopulateRandGaussian(this,💩mean,💩sd,💩values)); -💩💩💩💩}💩else💩{ -💩💩💩💩💩💩SetError(); -💩💩💩💩💩💩LOG(INFO)💩<<💩"attempting💩to💩perform💩RNG💩operation💩using💩StreamExecutor💩" -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩"without💩RNG💩support."; -💩💩💩💩} -💩💩} -💩💩return💩*this; -} - -Stream💩&Stream::ThenPopulateRandGaussian(double💩mean,💩double💩sd, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩DeviceMemory💩*values)💩{ -💩💩VLOG_CALL(PARAM(mean),💩PARAM(sd),💩PARAM(values)); - -💩💩if💩(ok())💩{ -💩💩💩💩if💩(rng::RngSupport💩*rng💩=💩parent_->AsRng())💩{ -💩💩💩💩💩💩CheckError(rng->DoPopulateRandGaussian(this,💩mean,💩sd,💩values)); -💩💩💩💩}💩else💩{ -💩💩💩💩💩💩SetError(); -💩💩💩💩💩💩LOG(INFO)💩<<💩"attempting💩to💩perform💩RNG💩operation💩using💩StreamExecutor💩" -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩"without💩RNG💩support."; -💩💩💩💩} -💩💩} -💩💩return💩*this; -} - -Stream💩&Stream::ThenPopulateRandUniform(DeviceMemory💩*values)💩{ -💩💩VLOG_CALL(PARAM(values)); - -💩💩if💩(ok())💩{ -💩💩💩💩if💩(rng::RngSupport💩*rng💩=💩parent_->AsRng())💩{ -💩💩💩💩💩💩CheckError(rng->DoPopulateRandUniform(this,💩values)); -💩💩💩💩}💩else💩{ -💩💩💩💩💩💩SetError(); -💩💩💩💩💩💩LOG(INFO)💩<<💩"attempting💩to💩perform💩RNG💩operation💩using💩StreamExecutor💩" -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩"without💩RNG💩support."; -💩💩💩💩} -💩💩} -💩💩return💩*this; -} - -Stream💩&Stream::ThenPopulateRandUniform( -💩💩💩💩DeviceMemory>💩*values)💩{ -💩💩VLOG_CALL(PARAM(values)); - -💩💩if💩(ok())💩{ -💩💩💩💩if💩(rng::RngSupport💩*rng💩=💩parent_->AsRng())💩{ -💩💩💩💩💩💩CheckError(rng->DoPopulateRandUniform(this,💩values)); -💩💩💩💩}💩else💩{ -💩💩💩💩💩💩SetError(); -💩💩💩💩💩💩LOG(INFO)💩<<💩"attempting💩to💩perform💩RNG💩operation💩using💩StreamExecutor💩" -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩"without💩RNG💩support."; -💩💩💩💩} -💩💩} -💩💩return💩*this; -} - -Stream💩&Stream::ThenPopulateRandUniform( -💩💩💩💩DeviceMemory>💩*values)💩{ -💩💩VLOG_CALL(PARAM(values)); - -💩💩if💩(ok())💩{ -💩💩💩💩if💩(rng::RngSupport💩*rng💩=💩parent_->AsRng())💩{ -💩💩💩💩💩💩CheckError(rng->DoPopulateRandUniform(this,💩values)); -💩💩💩💩}💩else💩{ -💩💩💩💩💩💩SetError(); -💩💩💩💩💩💩LOG(INFO)💩<<💩"stream💩"💩<<💩this -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩<<💩"💩attempting💩to💩perform💩RNG💩operation💩using💩StreamExecutor💩" -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩"without💩RNG💩support."; -💩💩💩💩} -💩💩} -💩💩return💩*this; -} - -Stream💩&Stream::ThenMemcpy(void💩*host_dst,💩const💩DeviceMemoryBase💩&gpu_src, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩uint64💩size)💩{ -💩💩VLOG_CALL(PARAM(host_dst),💩PARAM(gpu_src),💩PARAM(size)); - -💩💩if💩(ok())💩{ -💩💩💩💩CheckError(parent_->Memcpy(this,💩host_dst,💩gpu_src,💩size)); -💩💩}💩else💩{ -💩💩💩💩LOG(INFO)💩<<💩"stream💩"💩<<💩this -💩💩💩💩💩💩💩💩💩💩💩💩💩💩<<💩"💩did💩not💩memcpy💩device-to-host;💩source:💩"💩<<💩gpu_src.opaque(); -💩💩} -💩💩return💩*this; -} - -Stream💩&Stream::ThenMemcpy(DeviceMemoryBase💩*gpu_dst,💩const💩void💩*host_src, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩uint64💩size)💩{ -💩💩VLOG_CALL(PARAM(gpu_dst),💩PARAM(host_src),💩PARAM(size)); - -💩💩if💩(ok())💩{ -💩💩💩💩CheckError(parent_->Memcpy(this,💩gpu_dst,💩host_src,💩size)); -💩💩}💩else💩{ -💩💩💩💩LOG(INFO)💩<<💩"stream💩"💩<<💩this -💩💩💩💩💩💩💩💩💩💩💩💩💩💩<<💩"💩did💩not💩memcpy💩host-to-device;💩source:💩"💩<<💩host_src; -💩💩} -💩💩return💩*this; -} - -Stream💩&Stream::ThenMemcpy(DeviceMemoryBase💩*gpu_dst, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩const💩DeviceMemoryBase💩&gpu_src,💩uint64💩size)💩{ -💩💩VLOG_CALL(PARAM(gpu_dst),💩PARAM(gpu_src),💩PARAM(size)); - -💩💩if💩(ok())💩{ -💩💩💩💩CheckError(parent_->MemcpyDeviceToDevice(this,💩gpu_dst,💩gpu_src,💩size)); -💩💩}💩else💩{ -💩💩💩💩LOG(INFO)💩<<💩"stream💩"💩<<💩this -💩💩💩💩💩💩💩💩💩💩💩💩💩💩<<💩"💩did💩not💩memcpy💩gpu-to-gpu;💩source:💩"💩<<💩&gpu_src; -💩💩} -💩💩return💩*this; -} - -Stream💩&Stream::ThenMemZero(DeviceMemoryBase💩*location,💩uint64💩size)💩{ -💩💩VLOG_CALL(PARAM(location),💩PARAM(size)); - -💩💩if💩(ok())💩{ -💩💩💩💩CheckError(parent_->MemZero(this,💩location,💩size)); -💩💩}💩else💩{ -💩💩💩💩LOG(INFO)💩<<💩"stream💩"💩<<💩this -💩💩💩💩💩💩💩💩💩💩💩💩💩💩<<💩"💩did💩not💩memzero💩GPU💩location;💩source:💩"💩<<💩location; -💩💩} -💩💩return💩*this; -} - -Stream💩&Stream::ThenMemset32(DeviceMemoryBase💩*location,💩const💩uint32💩&pattern, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩uint64💩size)💩{ -💩💩VLOG_CALL(PARAM(location),💩PARAM(pattern),💩PARAM(size)); - -💩💩if💩(ok())💩{ -💩💩💩💩CheckError(parent_->Memset32(this,💩location,💩pattern,💩size)); -💩💩}💩else💩{ -💩💩💩💩LOG(INFO)💩<<💩"stream💩"💩<<💩this -💩💩💩💩💩💩💩💩💩💩💩💩💩💩<<💩"💩did💩not💩memset💩GPU💩location;💩source:💩"💩<<💩location -💩💩💩💩💩💩💩💩💩💩💩💩💩💩<<💩";💩size:💩"💩<<💩size💩<<💩";💩pattern:💩"💩<<💩std::hex💩<<💩pattern; -💩💩} -💩💩return💩*this; -} - -Stream💩&Stream::ThenDoHostCallbackForTest(std::function💩callback)💩{ -💩💩VLOG_CALL(PARAM(callback)); - -💩💩return💩ThenDoHostCallback(callback); -} - -Stream💩&Stream::ThenDoHostCallback(std::function💩callback)💩{ -💩💩VLOG_CALL(PARAM(callback)); - -💩💩if💩(ok())💩{ -💩💩💩💩CheckError(parent_->HostCallback(this,💩callback)); -💩💩}💩else💩{ -💩💩💩💩LOG(INFO)💩<<💩"stream💩"💩<<💩this -💩💩💩💩💩💩💩💩💩💩💩💩💩💩<<💩"💩was💩in💩error💩state💩before💩adding💩host💩callback"; -💩💩} -💩💩return💩*this; -} - -Stream💩&Stream::ThenFft(fft::Plan💩*plan, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩const💩DeviceMemory>💩&input, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩DeviceMemory>💩*output)💩{ -💩💩VLOG_CALL(PARAM(plan),💩PARAM(input),💩PARAM(output)); - -💩💩if💩(ok())💩{ -💩💩💩💩if💩(fft::FftSupport💩*fft💩=💩parent_->AsFft())💩{ -💩💩💩💩💩💩CheckError(fft->DoFft(this,💩plan,💩input,💩output)); -💩💩💩💩}💩else💩{ -💩💩💩💩💩💩SetError(); -💩💩💩💩💩💩LOG(INFO)💩<<💩"attempting💩to💩perform💩FFT💩operation💩using💩StreamExecutor💩" -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩"without💩FFT💩support"; -💩💩💩💩} -💩💩} -💩💩return💩*this; -} - -Stream💩&Stream::ThenFft(fft::Plan💩*plan, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩const💩DeviceMemory>💩&input, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩DeviceMemory>💩*output)💩{ -💩💩VLOG_CALL(PARAM(plan),💩PARAM(input),💩PARAM(output)); - -💩💩if💩(ok())💩{ -💩💩💩💩if💩(fft::FftSupport💩*fft💩=💩parent_->AsFft())💩{ -💩💩💩💩💩💩CheckError(fft->DoFft(this,💩plan,💩input,💩output)); -💩💩💩💩}💩else💩{ -💩💩💩💩💩💩SetError(); -💩💩💩💩💩💩LOG(INFO)💩<<💩"attempting💩to💩perform💩FFT💩operation💩using💩StreamExecutor💩" -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩"without💩FFT💩support"; -💩💩💩💩} -💩💩} -💩💩return💩*this; -} - -Stream💩&Stream::ThenFft(fft::Plan💩*plan,💩const💩DeviceMemory💩&input, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩DeviceMemory>💩*output)💩{ -💩💩VLOG_CALL(PARAM(plan),💩PARAM(input),💩PARAM(output)); - -💩💩if💩(ok())💩{ -💩💩💩💩if💩(fft::FftSupport💩*fft💩=💩parent_->AsFft())💩{ -💩💩💩💩💩💩CheckError(fft->DoFft(this,💩plan,💩input,💩output)); -💩💩💩💩}💩else💩{ -💩💩💩💩💩💩SetError(); -💩💩💩💩💩💩LOG(INFO)💩<<💩"attempting💩to💩perform💩FFT💩operation💩using💩StreamExecutor💩" -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩"without💩FFT💩support"; -💩💩💩💩} -💩💩} -💩💩return💩*this; -} - -Stream💩&Stream::ThenFft(fft::Plan💩*plan,💩const💩DeviceMemory💩&input, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩DeviceMemory>💩*output)💩{ -💩💩VLOG_CALL(PARAM(plan),💩PARAM(input),💩PARAM(output)); - -💩💩if💩(ok())💩{ -💩💩💩💩if💩(fft::FftSupport💩*fft💩=💩parent_->AsFft())💩{ -💩💩💩💩💩💩CheckError(fft->DoFft(this,💩plan,💩input,💩output)); -💩💩💩💩}💩else💩{ -💩💩💩💩💩💩SetError(); -💩💩💩💩💩💩LOG(INFO)💩<<💩"attempting💩to💩perform💩FFT💩operation💩using💩StreamExecutor💩" -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩"without💩FFT💩support"; -💩💩💩💩} -💩💩} -💩💩return💩*this; -} - -Stream💩&Stream::ThenFft(fft::Plan💩*plan, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩const💩DeviceMemory>💩&input, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩DeviceMemory💩*output)💩{ -💩💩VLOG_CALL(PARAM(plan),💩PARAM(input),💩PARAM(output)); - -💩💩if💩(ok())💩{ -💩💩💩💩if💩(fft::FftSupport💩*fft💩=💩parent_->AsFft())💩{ -💩💩💩💩💩💩CheckError(fft->DoFft(this,💩plan,💩input,💩output)); -💩💩💩💩}💩else💩{ -💩💩💩💩💩💩SetError(); -💩💩💩💩💩💩LOG(INFO)💩<<💩"attempting💩to💩perform💩FFT💩operation💩using💩StreamExecutor💩" -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩"without💩FFT💩support"; -💩💩💩💩} -💩💩} -💩💩return💩*this; -} - -Stream💩&Stream::ThenFft(fft::Plan💩*plan, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩const💩DeviceMemory>💩&input, -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩DeviceMemory💩*output)💩{ -💩💩VLOG_CALL(PARAM(plan),💩PARAM(input),💩PARAM(output)); - -💩💩if💩(ok())💩{ -💩💩💩💩if💩(fft::FftSupport💩*fft💩=💩parent_->AsFft())💩{ -💩💩💩💩💩💩CheckError(fft->DoFft(this,💩plan,💩input,💩output)); -💩💩💩💩}💩else💩{ -💩💩💩💩💩💩SetError(); -💩💩💩💩💩💩LOG(INFO)💩<<💩"attempting💩to💩perform💩FFT💩operation💩using💩StreamExecutor💩" -💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩"without💩FFT💩support"; -💩💩💩💩} -💩💩} -💩💩return💩*this; -} - -//💩It💩looks💩confusing,💩but💩all💩this💩is💩doing💩is💩inserting💩a💩callback💩at💩the -//💩present💩point💩in💩the💩stream💩to💩then💩enqueue💩a💩task💩on💩the💩host💩executor. -Stream💩&Stream::ThenEnqueueOnBackgroundThread( -💩💩💩💩std::function💩task)💩{ -💩💩VLOG_CALL(PARAM(task)); - -💩💩StreamExecutor💩*stream_executor💩=💩this->parent_; -💩💩std::function💩bound_task💩=💩std::bind(task,💩stream_executor); - -💩💩return💩ThenDoHostCallback([stream_executor,💩bound_task]()💩{ -💩💩💩💩stream_executor->EnqueueOnBackgroundThread(bound_task); -💩💩}); -} - -bool💩Stream::BlockHostUntilDone()💩{ -💩💩VLOG_CALL(); - -💩💩if💩(!ok())💩{ -💩💩💩💩LOG(INFO) -💩💩💩💩💩💩💩💩<<💩"stream💩"💩<<💩this -💩💩💩💩💩💩💩💩<<💩"💩did💩not💩block💩host💩until💩done;💩was💩already💩in💩an💩error💩state"; -💩💩💩💩return💩false; -💩💩} - -💩💩{ -💩💩💩💩//💩Wait💩until💩all💩active💩sub-streams💩have💩done💩their💩tasks. -💩💩💩💩mutex_lock💩lock{mu_}; -💩💩💩💩for💩(auto💩&stream💩:💩sub_streams_)💩{ -💩💩💩💩💩💩if💩(!stream.second)💩{ -💩💩💩💩💩💩💩💩CheckError(stream.first->BlockHostUntilDone()); -💩💩💩💩💩💩💩💩//💩Set💩this💩sub-stream💩as💩available. -💩💩💩💩💩💩💩💩stream.second💩=💩true; -💩💩💩💩💩💩} -💩💩💩💩} -💩💩} - -💩💩temporary_memory_manager_.DeallocateFinalizedTemporaries(); - -💩💩CheckError(parent_->BlockHostUntilDone(this)); -💩💩return💩ok(); + ThenBlasImpl &, int, DeviceMemory *> + impl; + return impl(this, &blas::BlasSupport::DoBlasIamin, elem_count, x, incx, + result); +} + +Stream &Stream::ThenBlasIamin(uint64 elem_count, + const DeviceMemory> &x, + int incx, DeviceMemory *result) { + VLOG_CALL(PARAM(elem_count), PARAM(x), PARAM(incx), PARAM(result)); + + ThenBlasImpl> &, int, + DeviceMemory *> impl; + return impl(this, &blas::BlasSupport::DoBlasIamin, elem_count, x, incx, + result); +} + +Stream &Stream::ThenBlasIamin(uint64 elem_count, + const DeviceMemory> &x, + int incx, DeviceMemory *result) { + VLOG_CALL(PARAM(elem_count), PARAM(x), PARAM(incx), PARAM(result)); + + ThenBlasImpl> &, int, + DeviceMemory *> impl; + return impl(this, &blas::BlasSupport::DoBlasIamin, elem_count, x, incx, + result); +} + +Stream &Stream::ThenBlasGbmv(blas::Transpose trans, uint64 m, uint64 n, + uint64 kl, uint64 ku, float alpha, + const DeviceMemory &a, int lda, + const DeviceMemory &x, int incx, float beta, + DeviceMemory *y, int incy) { + VLOG_CALL(PARAM(trans), PARAM(m), PARAM(n), PARAM(kl), PARAM(ku), + PARAM(alpha), PARAM(a), PARAM(lda), PARAM(x), PARAM(incx), + PARAM(beta), PARAM(y), PARAM(incy)); + + ThenBlasImpl &, int, const DeviceMemory &, + int, float, DeviceMemory *, int> impl; + return impl(this, &blas::BlasSupport::DoBlasGbmv, trans, m, n, kl, ku, alpha, + a, lda, x, incx, beta, y, incy); +} + +Stream &Stream::ThenBlasGbmv(blas::Transpose trans, uint64 m, uint64 n, + uint64 kl, uint64 ku, double alpha, + const DeviceMemory &a, int lda, + const DeviceMemory &x, int incx, + double beta, DeviceMemory *y, int incy) { + VLOG_CALL(PARAM(trans), PARAM(m), PARAM(n), PARAM(kl), PARAM(ku), + PARAM(alpha), PARAM(a), PARAM(lda), PARAM(x), PARAM(incx), + PARAM(beta), PARAM(y), PARAM(incy)); + + ThenBlasImpl &, int, const DeviceMemory &, + int, double, DeviceMemory *, int> impl; + return impl(this, &blas::BlasSupport::DoBlasGbmv, trans, m, n, kl, ku, alpha, + a, lda, x, incx, beta, y, incy); +} + +Stream &Stream::ThenBlasGbmv(blas::Transpose trans, uint64 m, uint64 n, + uint64 kl, uint64 ku, std::complex alpha, + const DeviceMemory> &a, + int lda, + const DeviceMemory> &x, + int incx, std::complex beta, + DeviceMemory> *y, int incy) { + VLOG_CALL(PARAM(trans), PARAM(m), PARAM(n), PARAM(kl), PARAM(ku), + PARAM(alpha), PARAM(a), PARAM(lda), PARAM(x), PARAM(incx), + PARAM(beta), PARAM(y), PARAM(incy)); + + ThenBlasImpl, const DeviceMemory> &, + int, const DeviceMemory> &, int, + std::complex, DeviceMemory> *, + int> impl; + return impl(this, &blas::BlasSupport::DoBlasGbmv, trans, m, n, kl, ku, alpha, + a, lda, x, incx, beta, y, incy); +} + +Stream &Stream::ThenBlasGbmv(blas::Transpose trans, uint64 m, uint64 n, + uint64 kl, uint64 ku, std::complex alpha, + const DeviceMemory> &a, + int lda, + const DeviceMemory> &x, + int incx, std::complex beta, + DeviceMemory> *y, int incy) { + VLOG_CALL(PARAM(trans), PARAM(m), PARAM(n), PARAM(kl), PARAM(ku), + PARAM(alpha), PARAM(a), PARAM(lda), PARAM(x), PARAM(incx), + PARAM(beta), PARAM(y), PARAM(incy)); + + ThenBlasImpl, const DeviceMemory> &, + int, const DeviceMemory> &, int, + std::complex, DeviceMemory> *, + int> impl; + return impl(this, &blas::BlasSupport::DoBlasGbmv, trans, m, n, kl, ku, alpha, + a, lda, x, incx, beta, y, incy); +} + +Stream &Stream::ThenBlasGemv(blas::Transpose trans, uint64 m, uint64 n, + float alpha, const DeviceMemory &a, int lda, + const DeviceMemory &x, int incx, float beta, + DeviceMemory *y, int incy) { + VLOG_CALL(PARAM(trans), PARAM(m), PARAM(n), PARAM(alpha), PARAM(a), + PARAM(lda), PARAM(x), PARAM(incx), PARAM(beta), PARAM(y), + PARAM(incy)); + + ThenBlasImpl &, int, const DeviceMemory &, + int, float, DeviceMemory *, int> impl; + return impl(this, &blas::BlasSupport::DoBlasGemv, trans, m, n, alpha, a, lda, + x, incx, beta, y, incy); +} + +Stream &Stream::ThenBlasGemv(blas::Transpose trans, uint64 m, uint64 n, + double alpha, const DeviceMemory &a, + int lda, const DeviceMemory &x, int incx, + double beta, DeviceMemory *y, int incy) { + VLOG_CALL(PARAM(trans), PARAM(m), PARAM(n), PARAM(alpha), PARAM(a), + PARAM(lda), PARAM(x), PARAM(incx), PARAM(beta), PARAM(y), + PARAM(incy)); + + ThenBlasImpl &, int, const DeviceMemory &, + int, double, DeviceMemory *, int> impl; + return impl(this, &blas::BlasSupport::DoBlasGemv, trans, m, n, alpha, a, lda, + x, incx, beta, y, incy); +} + +Stream &Stream::ThenBlasGemv(blas::Transpose trans, uint64 m, uint64 n, + std::complex alpha, + const DeviceMemory> &a, + int lda, + const DeviceMemory> &x, + int incx, std::complex beta, + DeviceMemory> *y, int incy) { + VLOG_CALL(PARAM(trans), PARAM(m), PARAM(n), PARAM(alpha), PARAM(a), + PARAM(lda), PARAM(x), PARAM(incx), PARAM(beta), PARAM(y), + PARAM(incy)); + + ThenBlasImpl, + const DeviceMemory> &, int, + const DeviceMemory> &, int, + std::complex, DeviceMemory> *, + int> impl; + return impl(this, &blas::BlasSupport::DoBlasGemv, trans, m, n, alpha, a, lda, + x, incx, beta, y, incy); +} + +Stream &Stream::ThenBlasGemv(blas::Transpose trans, uint64 m, uint64 n, + std::complex alpha, + const DeviceMemory> &a, + int lda, + const DeviceMemory> &x, + int incx, std::complex beta, + DeviceMemory> *y, int incy) { + VLOG_CALL(PARAM(trans), PARAM(m), PARAM(n), PARAM(alpha), PARAM(a), + PARAM(lda), PARAM(x), PARAM(incx), PARAM(beta), PARAM(y), + PARAM(incy)); + + ThenBlasImpl, + const DeviceMemory> &, int, + const DeviceMemory> &, int, + std::complex, DeviceMemory> *, + int> impl; + return impl(this, &blas::BlasSupport::DoBlasGemv, trans, m, n, alpha, a, lda, + x, incx, beta, y, incy); +} + +Stream &Stream::ThenBlasGer(uint64 m, uint64 n, float alpha, + const DeviceMemory &x, int incx, + const DeviceMemory &y, int incy, + DeviceMemory *a, int lda) { + VLOG_CALL(PARAM(m), PARAM(n), PARAM(alpha), PARAM(x), PARAM(incx), PARAM(y), + PARAM(incy), PARAM(a), PARAM(lda)); + + ThenBlasImpl &, int, + const DeviceMemory &, int, DeviceMemory *, + int> impl; + return impl(this, &blas::BlasSupport::DoBlasGer, m, n, alpha, x, incx, y, + incy, a, lda); +} + +Stream &Stream::ThenBlasGer(uint64 m, uint64 n, double alpha, + const DeviceMemory &x, int incx, + const DeviceMemory &y, int incy, + DeviceMemory *a, int lda) { + VLOG_CALL(PARAM(m), PARAM(n), PARAM(alpha), PARAM(x), PARAM(incx), PARAM(y), + PARAM(incy), PARAM(a), PARAM(lda)); + + ThenBlasImpl &, int, + const DeviceMemory &, int, DeviceMemory *, + int> impl; + return impl(this, &blas::BlasSupport::DoBlasGer, m, n, alpha, x, incx, y, + incy, a, lda); +} + +Stream &Stream::ThenBlasGerc(uint64 m, uint64 n, std::complex alpha, + const DeviceMemory> &x, + int incx, + const DeviceMemory> &y, + int incy, DeviceMemory> *a, + int lda) { + VLOG_CALL(PARAM(m), PARAM(n), PARAM(alpha), PARAM(x), PARAM(incx), PARAM(y), + PARAM(incy), PARAM(a), PARAM(lda)); + + ThenBlasImpl, + const DeviceMemory> &, int, + const DeviceMemory> &, int, + DeviceMemory> *, int> impl; + return impl(this, &blas::BlasSupport::DoBlasGerc, m, n, alpha, x, incx, y, + incy, a, lda); +} + +Stream &Stream::ThenBlasGerc(uint64 m, uint64 n, std::complex alpha, + const DeviceMemory> &x, + int incx, + const DeviceMemory> &y, + int incy, DeviceMemory> *a, + int lda) { + VLOG_CALL(PARAM(m), PARAM(n), PARAM(alpha), PARAM(x), PARAM(incx), PARAM(y), + PARAM(incy), PARAM(a), PARAM(lda)); + + ThenBlasImpl, + const DeviceMemory> &, int, + const DeviceMemory> &, int, + DeviceMemory> *, int> impl; + return impl(this, &blas::BlasSupport::DoBlasGerc, m, n, alpha, x, incx, y, + incy, a, lda); +} + +Stream &Stream::ThenBlasGeru(uint64 m, uint64 n, std::complex alpha, + const DeviceMemory> &x, + int incx, + const DeviceMemory> &y, + int incy, DeviceMemory> *a, + int lda) { + VLOG_CALL(PARAM(m), PARAM(n), PARAM(alpha), PARAM(x), PARAM(incx), PARAM(y), + PARAM(incy), PARAM(a), PARAM(lda)); + + ThenBlasImpl, + const DeviceMemory> &, int, + const DeviceMemory> &, int, + DeviceMemory> *, int> impl; + return impl(this, &blas::BlasSupport::DoBlasGeru, m, n, alpha, x, incx, y, + incy, a, lda); +} + +Stream &Stream::ThenBlasGeru(uint64 m, uint64 n, std::complex alpha, + const DeviceMemory> &x, + int incx, + const DeviceMemory> &y, + int incy, DeviceMemory> *a, + int lda) { + VLOG_CALL(PARAM(m), PARAM(n), PARAM(alpha), PARAM(x), PARAM(incx), PARAM(y), + PARAM(incy), PARAM(a), PARAM(lda)); + + ThenBlasImpl, + const DeviceMemory> &, int, + const DeviceMemory> &, int, + DeviceMemory> *, int> impl; + return impl(this, &blas::BlasSupport::DoBlasGeru, m, n, alpha, x, incx, y, + incy, a, lda); +} + +Stream &Stream::ThenBlasHbmv(blas::UpperLower uplo, uint64 n, uint64 k, + std::complex alpha, + const DeviceMemory> &a, + int lda, + const DeviceMemory> &x, + int incx, std::complex beta, + DeviceMemory> *y, int incy) { + VLOG_CALL(PARAM(uplo), PARAM(n), PARAM(k), PARAM(alpha), PARAM(a), PARAM(lda), + PARAM(x), PARAM(incx), PARAM(beta), PARAM(y), PARAM(incy)); + + ThenBlasImpl, + const DeviceMemory> &, int, + const DeviceMemory> &, int, + std::complex, DeviceMemory> *, + int> impl; + return impl(this, &blas::BlasSupport::DoBlasHbmv, uplo, n, k, alpha, a, lda, + x, incx, beta, y, incy); +} + +Stream &Stream::ThenBlasHbmv(blas::UpperLower uplo, uint64 n, uint64 k, + std::complex alpha, + const DeviceMemory> &a, + int lda, + const DeviceMemory> &x, + int incx, std::complex beta, + DeviceMemory> *y, int incy) { + VLOG_CALL(PARAM(uplo), PARAM(n), PARAM(k), PARAM(alpha), PARAM(a), PARAM(lda), + PARAM(x), PARAM(incx), PARAM(beta), PARAM(y), PARAM(incy)); + + ThenBlasImpl, + const DeviceMemory> &, int, + const DeviceMemory> &, int, + std::complex, DeviceMemory> *, + int> impl; + return impl(this, &blas::BlasSupport::DoBlasHbmv, uplo, n, k, alpha, a, lda, + x, incx, beta, y, incy); +} + +Stream &Stream::ThenBlasHemv(blas::UpperLower uplo, uint64 n, + std::complex alpha, + const DeviceMemory> &a, + int lda, + const DeviceMemory> &x, + int incx, std::complex beta, + DeviceMemory> *y, int incy) { + VLOG_CALL(PARAM(uplo), PARAM(n), PARAM(alpha), PARAM(a), PARAM(lda), PARAM(x), + PARAM(incx), PARAM(beta), PARAM(y), PARAM(incy)); + + ThenBlasImpl, + const DeviceMemory> &, int, + const DeviceMemory> &, int, + std::complex, DeviceMemory> *, + int> impl; + return impl(this, &blas::BlasSupport::DoBlasHemv, uplo, n, alpha, a, lda, x, + incx, beta, y, incy); +} + +Stream &Stream::ThenBlasHemv(blas::UpperLower uplo, uint64 n, + std::complex alpha, + const DeviceMemory> &a, + int lda, + const DeviceMemory> &x, + int incx, std::complex beta, + DeviceMemory> *y, int incy) { + VLOG_CALL(PARAM(uplo), PARAM(n), PARAM(alpha), PARAM(a), PARAM(lda), PARAM(x), + PARAM(incx), PARAM(beta), PARAM(y), PARAM(incy)); + + ThenBlasImpl, + const DeviceMemory> &, int, + const DeviceMemory> &, int, + std::complex, DeviceMemory> *, + int> impl; + return impl(this, &blas::BlasSupport::DoBlasHemv, uplo, n, alpha, a, lda, x, + incx, beta, y, incy); +} + +Stream &Stream::ThenBlasHer(blas::UpperLower uplo, uint64 n, float alpha, + const DeviceMemory> &x, + int incx, DeviceMemory> *a, + int lda) { + VLOG_CALL(PARAM(uplo), PARAM(n), PARAM(alpha), PARAM(x), PARAM(incx), + PARAM(a), PARAM(lda)); + + ThenBlasImpl> &, int, + DeviceMemory> *, int> impl; + return impl(this, &blas::BlasSupport::DoBlasHer, uplo, n, alpha, x, incx, a, + lda); +} + +Stream &Stream::ThenBlasHer(blas::UpperLower uplo, uint64 n, double alpha, + const DeviceMemory> &x, + int incx, DeviceMemory> *a, + int lda) { + VLOG_CALL(PARAM(uplo), PARAM(n), PARAM(alpha), PARAM(x), PARAM(incx), + PARAM(a), PARAM(lda)); + + ThenBlasImpl> &, int, + DeviceMemory> *, int> impl; + return impl(this, &blas::BlasSupport::DoBlasHer, uplo, n, alpha, x, incx, a, + lda); +} + +Stream &Stream::ThenBlasHer2(blas::UpperLower uplo, uint64 n, + std::complex alpha, + const DeviceMemory> &x, + int incx, + const DeviceMemory> &y, + int incy, DeviceMemory> *a, + int lda) { + VLOG_CALL(PARAM(uplo), PARAM(n), PARAM(alpha), PARAM(x), PARAM(incx), + PARAM(y), PARAM(incy), PARAM(a), PARAM(lda)); + + ThenBlasImpl, + const DeviceMemory> &, int, + const DeviceMemory> &, int, + DeviceMemory> *, int> impl; + return impl(this, &blas::BlasSupport::DoBlasHer2, uplo, n, alpha, x, incx, y, + incy, a, lda); +} + +Stream &Stream::ThenBlasHer2(blas::UpperLower uplo, uint64 n, + std::complex alpha, + const DeviceMemory> &x, + int incx, + const DeviceMemory> &y, + int incy, DeviceMemory> *a, + int lda) { + VLOG_CALL(PARAM(uplo), PARAM(n), PARAM(alpha), PARAM(x), PARAM(incx), + PARAM(y), PARAM(incy), PARAM(a), PARAM(lda)); + + ThenBlasImpl, + const DeviceMemory> &, int, + const DeviceMemory> &, int, + DeviceMemory> *, int> impl; + return impl(this, &blas::BlasSupport::DoBlasHer2, uplo, n, alpha, x, incx, y, + incy, a, lda); +} + +Stream &Stream::ThenBlasHpmv(blas::UpperLower uplo, uint64 n, + std::complex alpha, + const DeviceMemory> &ap, + const DeviceMemory> &x, + int incx, std::complex beta, + DeviceMemory> *y, int incy) { + VLOG_CALL(PARAM(uplo), PARAM(n), PARAM(alpha), PARAM(ap), PARAM(x), + PARAM(incx), PARAM(beta), PARAM(y), PARAM(incy)); + + ThenBlasImpl, + const DeviceMemory> &, + const DeviceMemory> &, int, + std::complex, DeviceMemory> *, + int> impl; + return impl(this, &blas::BlasSupport::DoBlasHpmv, uplo, n, alpha, ap, x, incx, + beta, y, incy); +} + +Stream &Stream::ThenBlasHpmv(blas::UpperLower uplo, uint64 n, + std::complex alpha, + const DeviceMemory> &ap, + const DeviceMemory> &x, + int incx, std::complex beta, + DeviceMemory> *y, int incy) { + VLOG_CALL(PARAM(uplo), PARAM(n), PARAM(alpha), PARAM(ap), PARAM(x), + PARAM(incx), PARAM(beta), PARAM(y), PARAM(incy)); + + ThenBlasImpl, + const DeviceMemory> &, + const DeviceMemory> &, int, + std::complex, DeviceMemory> *, + int> impl; + return impl(this, &blas::BlasSupport::DoBlasHpmv, uplo, n, alpha, ap, x, incx, + beta, y, incy); +} + +Stream &Stream::ThenBlasHpr(blas::UpperLower uplo, uint64 n, float alpha, + const DeviceMemory> &x, + int incx, DeviceMemory> *ap) { + VLOG_CALL(PARAM(uplo), PARAM(n), PARAM(alpha), PARAM(x), PARAM(incx), + PARAM(ap)); + + ThenBlasImpl> &, int, + DeviceMemory> *> impl; + return impl(this, &blas::BlasSupport::DoBlasHpr, uplo, n, alpha, x, incx, ap); +} + +Stream &Stream::ThenBlasHpr(blas::UpperLower uplo, uint64 n, double alpha, + const DeviceMemory> &x, + int incx, DeviceMemory> *ap) { + VLOG_CALL(PARAM(uplo), PARAM(n), PARAM(alpha), PARAM(x), PARAM(incx), + PARAM(ap)); + + ThenBlasImpl> &, int, + DeviceMemory> *> impl; + return impl(this, &blas::BlasSupport::DoBlasHpr, uplo, n, alpha, x, incx, ap); +} + +Stream &Stream::ThenBlasHpr2(blas::UpperLower uplo, uint64 n, + std::complex alpha, + const DeviceMemory> &x, + int incx, + const DeviceMemory> &y, + int incy, DeviceMemory> *ap) { + VLOG_CALL(PARAM(uplo), PARAM(n), PARAM(alpha), PARAM(x), PARAM(incx), + PARAM(y), PARAM(incy), PARAM(ap)); + + ThenBlasImpl, + const DeviceMemory> &, int, + const DeviceMemory> &, int, + DeviceMemory> *> impl; + return impl(this, &blas::BlasSupport::DoBlasHpr2, uplo, n, alpha, x, incx, y, + incy, ap); +} + +Stream &Stream::ThenBlasHpr2(blas::UpperLower uplo, uint64 n, + std::complex alpha, + const DeviceMemory> &x, + int incx, + const DeviceMemory> &y, + int incy, DeviceMemory> *ap) { + VLOG_CALL(PARAM(uplo), PARAM(n), PARAM(alpha), PARAM(x), PARAM(incx), + PARAM(y), PARAM(incy), PARAM(ap)); + + ThenBlasImpl, + const DeviceMemory> &, int, + const DeviceMemory> &, int, + DeviceMemory> *> impl; + return impl(this, &blas::BlasSupport::DoBlasHpr2, uplo, n, alpha, x, incx, y, + incy, ap); +} + +Stream &Stream::ThenBlasSbmv(blas::UpperLower uplo, uint64 n, uint64 k, + float alpha, const DeviceMemory &a, int lda, + const DeviceMemory &x, int incx, float beta, + DeviceMemory *y, int incy) { + VLOG_CALL(PARAM(uplo), PARAM(n), PARAM(k), PARAM(alpha), PARAM(a), PARAM(lda), + PARAM(x), PARAM(incx), PARAM(beta), PARAM(y), PARAM(incy)); + + ThenBlasImpl &, int, const DeviceMemory &, + int, float, DeviceMemory *, int> impl; + return impl(this, &blas::BlasSupport::DoBlasSbmv, uplo, n, k, alpha, a, lda, + x, incx, beta, y, incy); +} + +Stream &Stream::ThenBlasSbmv(blas::UpperLower uplo, uint64 n, uint64 k, + double alpha, const DeviceMemory &a, + int lda, const DeviceMemory &x, int incx, + double beta, DeviceMemory *y, int incy) { + VLOG_CALL(PARAM(uplo), PARAM(n), PARAM(k), PARAM(alpha), PARAM(a), PARAM(lda), + PARAM(x), PARAM(incx), PARAM(beta), PARAM(y), PARAM(incy)); + + ThenBlasImpl &, int, const DeviceMemory &, + int, double, DeviceMemory *, int> impl; + return impl(this, &blas::BlasSupport::DoBlasSbmv, uplo, n, k, alpha, a, lda, + x, incx, beta, y, incy); +} + +Stream &Stream::ThenBlasSpmv(blas::UpperLower uplo, uint64 n, float alpha, + const DeviceMemory &ap, + const DeviceMemory &x, int incx, float beta, + DeviceMemory *y, int incy) { + VLOG_CALL(PARAM(uplo), PARAM(n), PARAM(alpha), PARAM(ap), PARAM(x), + PARAM(incx), PARAM(beta), PARAM(y), PARAM(incy)); + + ThenBlasImpl &, + const DeviceMemory &, int, float, DeviceMemory *, + int> impl; + return impl(this, &blas::BlasSupport::DoBlasSpmv, uplo, n, alpha, ap, x, incx, + beta, y, incy); +} + +Stream &Stream::ThenBlasSpmv(blas::UpperLower uplo, uint64 n, double alpha, + const DeviceMemory &ap, + const DeviceMemory &x, int incx, + double beta, DeviceMemory *y, int incy) { + VLOG_CALL(PARAM(uplo), PARAM(n), PARAM(alpha), PARAM(ap), PARAM(x), + PARAM(incx), PARAM(beta), PARAM(y), PARAM(incy)); + + ThenBlasImpl &, + const DeviceMemory &, int, double, + DeviceMemory *, int> impl; + return impl(this, &blas::BlasSupport::DoBlasSpmv, uplo, n, alpha, ap, x, incx, + beta, y, incy); +} + +Stream &Stream::ThenBlasSpr(blas::UpperLower uplo, uint64 n, float alpha, + const DeviceMemory &x, int incx, + DeviceMemory *ap) { + VLOG_CALL(PARAM(uplo), PARAM(n), PARAM(alpha), PARAM(x), PARAM(incx), + PARAM(ap)); + + ThenBlasImpl &, + int, DeviceMemory *> impl; + return impl(this, &blas::BlasSupport::DoBlasSpr, uplo, n, alpha, x, incx, ap); +} + +Stream &Stream::ThenBlasSpr(blas::UpperLower uplo, uint64 n, double alpha, + const DeviceMemory &x, int incx, + DeviceMemory *ap) { + VLOG_CALL(PARAM(uplo), PARAM(n), PARAM(alpha), PARAM(x), PARAM(incx), + PARAM(ap)); + + ThenBlasImpl &, + int, DeviceMemory *> impl; + return impl(this, &blas::BlasSupport::DoBlasSpr, uplo, n, alpha, x, incx, ap); +} + +Stream &Stream::ThenBlasSpr2(blas::UpperLower uplo, uint64 n, float alpha, + const DeviceMemory &x, int incx, + const DeviceMemory &y, int incy, + DeviceMemory *ap) { + VLOG_CALL(PARAM(uplo), PARAM(n), PARAM(alpha), PARAM(x), PARAM(incx), + PARAM(y), PARAM(incy), PARAM(ap)); + + ThenBlasImpl &, + int, const DeviceMemory &, int, + DeviceMemory *> impl; + return impl(this, &blas::BlasSupport::DoBlasSpr2, uplo, n, alpha, x, incx, y, + incy, ap); +} + +Stream &Stream::ThenBlasSpr2(blas::UpperLower uplo, uint64 n, double alpha, + const DeviceMemory &x, int incx, + const DeviceMemory &y, int incy, + DeviceMemory *ap) { + VLOG_CALL(PARAM(uplo), PARAM(n), PARAM(alpha), PARAM(x), PARAM(incx), + PARAM(y), PARAM(incy), PARAM(ap)); + + ThenBlasImpl &, + int, const DeviceMemory &, int, + DeviceMemory *> impl; + return impl(this, &blas::BlasSupport::DoBlasSpr2, uplo, n, alpha, x, incx, y, + incy, ap); +} + +Stream &Stream::ThenBlasSymv(blas::UpperLower uplo, uint64 n, float alpha, + const DeviceMemory &a, int lda, + const DeviceMemory &x, int incx, float beta, + DeviceMemory *y, int incy) { + VLOG_CALL(PARAM(uplo), PARAM(n), PARAM(alpha), PARAM(a), PARAM(lda), PARAM(x), + PARAM(incx), PARAM(beta), PARAM(y), PARAM(incy)); + + ThenBlasImpl &, + int, const DeviceMemory &, int, float, + DeviceMemory *, int> impl; + return impl(this, &blas::BlasSupport::DoBlasSymv, uplo, n, alpha, a, lda, x, + incx, beta, y, incy); +} + +Stream &Stream::ThenBlasSymv(blas::UpperLower uplo, uint64 n, double alpha, + const DeviceMemory &a, int lda, + const DeviceMemory &x, int incx, + double beta, DeviceMemory *y, int incy) { + VLOG_CALL(PARAM(uplo), PARAM(n), PARAM(alpha), PARAM(a), PARAM(lda), PARAM(x), + PARAM(incx), PARAM(beta), PARAM(y), PARAM(incy)); + + ThenBlasImpl &, + int, const DeviceMemory &, int, double, + DeviceMemory *, int> impl; + return impl(this, &blas::BlasSupport::DoBlasSymv, uplo, n, alpha, a, lda, x, + incx, beta, y, incy); +} + +Stream &Stream::ThenBlasSyr(blas::UpperLower uplo, uint64 n, float alpha, + const DeviceMemory &x, int incx, + DeviceMemory *a, int lda) { + VLOG_CALL(PARAM(uplo), PARAM(n), PARAM(alpha), PARAM(x), PARAM(incx), + PARAM(a), PARAM(lda)); + + ThenBlasImpl &, + int, DeviceMemory *, int> impl; + return impl(this, &blas::BlasSupport::DoBlasSyr, uplo, n, alpha, x, incx, a, + lda); +} + +Stream &Stream::ThenBlasSyr(blas::UpperLower uplo, uint64 n, double alpha, + const DeviceMemory &x, int incx, + DeviceMemory *a, int lda) { + VLOG_CALL(PARAM(uplo), PARAM(n), PARAM(alpha), PARAM(x), PARAM(incx), + PARAM(a), PARAM(lda)); + + ThenBlasImpl &, + int, DeviceMemory *, int> impl; + return impl(this, &blas::BlasSupport::DoBlasSyr, uplo, n, alpha, x, incx, a, + lda); +} + +Stream &Stream::ThenBlasSyr2(blas::UpperLower uplo, uint64 n, float alpha, + const DeviceMemory &x, int incx, + const DeviceMemory &y, int incy, + DeviceMemory *a, int lda) { + VLOG_CALL(PARAM(uplo), PARAM(n), PARAM(alpha), PARAM(x), PARAM(incx), + PARAM(y), PARAM(incy), PARAM(a), PARAM(lda)); + + ThenBlasImpl &, + int, const DeviceMemory &, int, DeviceMemory *, + int> impl; + return impl(this, &blas::BlasSupport::DoBlasSyr2, uplo, n, alpha, x, incx, y, + incy, a, lda); +} + +Stream &Stream::ThenBlasSyr2(blas::UpperLower uplo, uint64 n, double alpha, + const DeviceMemory &x, int incx, + const DeviceMemory &y, int incy, + DeviceMemory *a, int lda) { + VLOG_CALL(PARAM(uplo), PARAM(n), PARAM(alpha), PARAM(x), PARAM(incx), + PARAM(y), PARAM(incy), PARAM(a), PARAM(lda)); + + ThenBlasImpl &, + int, const DeviceMemory &, int, DeviceMemory *, + int> impl; + return impl(this, &blas::BlasSupport::DoBlasSyr2, uplo, n, alpha, x, incx, y, + incy, a, lda); +} + +Stream &Stream::ThenBlasTbmv(blas::UpperLower uplo, blas::Transpose trans, + blas::Diagonal diag, uint64 n, uint64 k, + const DeviceMemory &a, int lda, + DeviceMemory *x, int incx) { + VLOG_CALL(PARAM(uplo), PARAM(trans), PARAM(diag), PARAM(n), PARAM(k), + PARAM(a), PARAM(lda), PARAM(x), PARAM(incx)); + + ThenBlasImpl &, int, DeviceMemory *, + int> impl; + return impl(this, &blas::BlasSupport::DoBlasTbmv, uplo, trans, diag, n, k, a, + lda, x, incx); +} + +Stream &Stream::ThenBlasTbmv(blas::UpperLower uplo, blas::Transpose trans, + blas::Diagonal diag, uint64 n, uint64 k, + const DeviceMemory &a, int lda, + DeviceMemory *x, int incx) { + VLOG_CALL(PARAM(uplo), PARAM(trans), PARAM(diag), PARAM(n), PARAM(k), + PARAM(a), PARAM(lda), PARAM(x), PARAM(incx)); + + ThenBlasImpl &, int, + DeviceMemory *, int> impl; + return impl(this, &blas::BlasSupport::DoBlasTbmv, uplo, trans, diag, n, k, a, + lda, x, incx); +} + +Stream &Stream::ThenBlasTbmv(blas::UpperLower uplo, blas::Transpose trans, + blas::Diagonal diag, uint64 n, uint64 k, + const DeviceMemory> &a, + int lda, DeviceMemory> *x, + int incx) { + VLOG_CALL(PARAM(uplo), PARAM(trans), PARAM(diag), PARAM(n), PARAM(k), + PARAM(a), PARAM(lda), PARAM(x), PARAM(incx)); + + ThenBlasImpl> &, int, + DeviceMemory> *, int> impl; + return impl(this, &blas::BlasSupport::DoBlasTbmv, uplo, trans, diag, n, k, a, + lda, x, incx); +} + +Stream &Stream::ThenBlasTbmv(blas::UpperLower uplo, blas::Transpose trans, + blas::Diagonal diag, uint64 n, uint64 k, + const DeviceMemory> &a, + int lda, DeviceMemory> *x, + int incx) { + VLOG_CALL(PARAM(uplo), PARAM(trans), PARAM(diag), PARAM(n), PARAM(k), + PARAM(a), PARAM(lda), PARAM(x), PARAM(incx)); + + ThenBlasImpl> &, int, + DeviceMemory> *, int> impl; + return impl(this, &blas::BlasSupport::DoBlasTbmv, uplo, trans, diag, n, k, a, + lda, x, incx); +} + +Stream &Stream::ThenBlasTbsv(blas::UpperLower uplo, blas::Transpose trans, + blas::Diagonal diag, uint64 n, uint64 k, + const DeviceMemory &a, int lda, + DeviceMemory *x, int incx) { + VLOG_CALL(PARAM(uplo), PARAM(trans), PARAM(diag), PARAM(n), PARAM(k), + PARAM(a), PARAM(lda), PARAM(x), PARAM(incx)); + + ThenBlasImpl &, int, DeviceMemory *, + int> impl; + return impl(this, &blas::BlasSupport::DoBlasTbsv, uplo, trans, diag, n, k, a, + lda, x, incx); +} + +Stream &Stream::ThenBlasTbsv(blas::UpperLower uplo, blas::Transpose trans, + blas::Diagonal diag, uint64 n, uint64 k, + const DeviceMemory &a, int lda, + DeviceMemory *x, int incx) { + VLOG_CALL(PARAM(uplo), PARAM(trans), PARAM(diag), PARAM(n), PARAM(k), + PARAM(a), PARAM(lda), PARAM(x), PARAM(incx)); + + ThenBlasImpl &, int, + DeviceMemory *, int> impl; + return impl(this, &blas::BlasSupport::DoBlasTbsv, uplo, trans, diag, n, k, a, + lda, x, incx); +} + +Stream &Stream::ThenBlasTbsv(blas::UpperLower uplo, blas::Transpose trans, + blas::Diagonal diag, uint64 n, uint64 k, + const DeviceMemory> &a, + int lda, DeviceMemory> *x, + int incx) { + VLOG_CALL(PARAM(uplo), PARAM(trans), PARAM(diag), PARAM(n), PARAM(k), + PARAM(a), PARAM(lda), PARAM(x), PARAM(incx)); + + ThenBlasImpl> &, int, + DeviceMemory> *, int> impl; + return impl(this, &blas::BlasSupport::DoBlasTbsv, uplo, trans, diag, n, k, a, + lda, x, incx); +} + +Stream &Stream::ThenBlasTbsv(blas::UpperLower uplo, blas::Transpose trans, + blas::Diagonal diag, uint64 n, uint64 k, + const DeviceMemory> &a, + int lda, DeviceMemory> *x, + int incx) { + VLOG_CALL(PARAM(uplo), PARAM(trans), PARAM(diag), PARAM(n), PARAM(k), + PARAM(a), PARAM(lda), PARAM(x), PARAM(incx)); + + ThenBlasImpl> &, int, + DeviceMemory> *, int> impl; + return impl(this, &blas::BlasSupport::DoBlasTbsv, uplo, trans, diag, n, k, a, + lda, x, incx); +} + +Stream &Stream::ThenBlasTpmv(blas::UpperLower uplo, blas::Transpose trans, + blas::Diagonal diag, uint64 n, + const DeviceMemory &ap, + DeviceMemory *x, int incx) { + VLOG_CALL(PARAM(uplo), PARAM(trans), PARAM(diag), PARAM(n), PARAM(ap), + PARAM(x), PARAM(incx)); + + ThenBlasImpl &, DeviceMemory *, int> impl; + return impl(this, &blas::BlasSupport::DoBlasTpmv, uplo, trans, diag, n, ap, x, + incx); +} + +Stream &Stream::ThenBlasTpmv(blas::UpperLower uplo, blas::Transpose trans, + blas::Diagonal diag, uint64 n, + const DeviceMemory &ap, + DeviceMemory *x, int incx) { + VLOG_CALL(PARAM(uplo), PARAM(trans), PARAM(diag), PARAM(n), PARAM(ap), + PARAM(x), PARAM(incx)); + + ThenBlasImpl &, DeviceMemory *, int> impl; + return impl(this, &blas::BlasSupport::DoBlasTpmv, uplo, trans, diag, n, ap, x, + incx); +} + +Stream &Stream::ThenBlasTpmv(blas::UpperLower uplo, blas::Transpose trans, + blas::Diagonal diag, uint64 n, + const DeviceMemory> &ap, + DeviceMemory> *x, int incx) { + VLOG_CALL(PARAM(uplo), PARAM(trans), PARAM(diag), PARAM(n), PARAM(ap), + PARAM(x), PARAM(incx)); + + ThenBlasImpl> &, + DeviceMemory> *, int> impl; + return impl(this, &blas::BlasSupport::DoBlasTpmv, uplo, trans, diag, n, ap, x, + incx); +} + +Stream &Stream::ThenBlasTpmv(blas::UpperLower uplo, blas::Transpose trans, + blas::Diagonal diag, uint64 n, + const DeviceMemory> &ap, + DeviceMemory> *x, int incx) { + VLOG_CALL(PARAM(uplo), PARAM(trans), PARAM(diag), PARAM(n), PARAM(ap), + PARAM(x), PARAM(incx)); + + ThenBlasImpl> &, + DeviceMemory> *, int> impl; + return impl(this, &blas::BlasSupport::DoBlasTpmv, uplo, trans, diag, n, ap, x, + incx); +} + +Stream &Stream::ThenBlasTpsv(blas::UpperLower uplo, blas::Transpose trans, + blas::Diagonal diag, uint64 n, + const DeviceMemory &ap, + DeviceMemory *x, int incx) { + VLOG_CALL(PARAM(uplo), PARAM(trans), PARAM(diag), PARAM(n), PARAM(ap), + PARAM(x), PARAM(incx)); + + ThenBlasImpl &, DeviceMemory *, int> impl; + return impl(this, &blas::BlasSupport::DoBlasTpsv, uplo, trans, diag, n, ap, x, + incx); +} + +Stream &Stream::ThenBlasTpsv(blas::UpperLower uplo, blas::Transpose trans, + blas::Diagonal diag, uint64 n, + const DeviceMemory &ap, + DeviceMemory *x, int incx) { + VLOG_CALL(PARAM(uplo), PARAM(trans), PARAM(diag), PARAM(n), PARAM(ap), + PARAM(x), PARAM(incx)); + + ThenBlasImpl &, DeviceMemory *, int> impl; + return impl(this, &blas::BlasSupport::DoBlasTpsv, uplo, trans, diag, n, ap, x, + incx); +} + +Stream &Stream::ThenBlasTpsv(blas::UpperLower uplo, blas::Transpose trans, + blas::Diagonal diag, uint64 n, + const DeviceMemory> &ap, + DeviceMemory> *x, int incx) { + VLOG_CALL(PARAM(uplo), PARAM(trans), PARAM(diag), PARAM(n), PARAM(ap), + PARAM(x), PARAM(incx)); + + ThenBlasImpl> &, + DeviceMemory> *, int> impl; + return impl(this, &blas::BlasSupport::DoBlasTpsv, uplo, trans, diag, n, ap, x, + incx); +} + +Stream &Stream::ThenBlasTpsv(blas::UpperLower uplo, blas::Transpose trans, + blas::Diagonal diag, uint64 n, + const DeviceMemory> &ap, + DeviceMemory> *x, int incx) { + VLOG_CALL(PARAM(uplo), PARAM(trans), PARAM(diag), PARAM(n), PARAM(ap), + PARAM(x), PARAM(incx)); + + ThenBlasImpl> &, + DeviceMemory> *, int> impl; + return impl(this, &blas::BlasSupport::DoBlasTpsv, uplo, trans, diag, n, ap, x, + incx); +} + +Stream &Stream::ThenBlasTrmv(blas::UpperLower uplo, blas::Transpose trans, + blas::Diagonal diag, uint64 n, + const DeviceMemory &a, int lda, + DeviceMemory *x, int incx) { + VLOG_CALL(PARAM(uplo), PARAM(trans), PARAM(diag), PARAM(n), PARAM(a), + PARAM(lda), PARAM(x), PARAM(incx)); + + ThenBlasImpl &, int, DeviceMemory *, + int> impl; + return impl(this, &blas::BlasSupport::DoBlasTrmv, uplo, trans, diag, n, a, + lda, x, incx); +} + +Stream &Stream::ThenBlasTrmv(blas::UpperLower uplo, blas::Transpose trans, + blas::Diagonal diag, uint64 n, + const DeviceMemory &a, int lda, + DeviceMemory *x, int incx) { + VLOG_CALL(PARAM(uplo), PARAM(trans), PARAM(diag), PARAM(n), PARAM(a), + PARAM(lda), PARAM(x), PARAM(incx)); + + ThenBlasImpl &, int, DeviceMemory *, + int> impl; + return impl(this, &blas::BlasSupport::DoBlasTrmv, uplo, trans, diag, n, a, + lda, x, incx); +} + +Stream &Stream::ThenBlasTrmv(blas::UpperLower uplo, blas::Transpose trans, + blas::Diagonal diag, uint64 n, + const DeviceMemory> &a, + int lda, DeviceMemory> *x, + int incx) { + VLOG_CALL(PARAM(uplo), PARAM(trans), PARAM(diag), PARAM(n), PARAM(a), + PARAM(lda), PARAM(x), PARAM(incx)); + + ThenBlasImpl> &, int, + DeviceMemory> *, int> impl; + return impl(this, &blas::BlasSupport::DoBlasTrmv, uplo, trans, diag, n, a, + lda, x, incx); +} + +Stream &Stream::ThenBlasTrmv(blas::UpperLower uplo, blas::Transpose trans, + blas::Diagonal diag, uint64 n, + const DeviceMemory> &a, + int lda, DeviceMemory> *x, + int incx) { + VLOG_CALL(PARAM(uplo), PARAM(trans), PARAM(diag), PARAM(n), PARAM(a), + PARAM(lda), PARAM(x), PARAM(incx)); + + ThenBlasImpl> &, int, + DeviceMemory> *, int> impl; + return impl(this, &blas::BlasSupport::DoBlasTrmv, uplo, trans, diag, n, a, + lda, x, incx); +} + +Stream &Stream::ThenBlasTrsv(blas::UpperLower uplo, blas::Transpose trans, + blas::Diagonal diag, uint64 n, + const DeviceMemory &a, int lda, + DeviceMemory *x, int incx) { + VLOG_CALL(PARAM(uplo), PARAM(trans), PARAM(diag), PARAM(n), PARAM(a), + PARAM(lda), PARAM(x), PARAM(incx)); + + ThenBlasImpl &, int, DeviceMemory *, + int> impl; + return impl(this, &blas::BlasSupport::DoBlasTrsv, uplo, trans, diag, n, a, + lda, x, incx); +} + +Stream &Stream::ThenBlasTrsv(blas::UpperLower uplo, blas::Transpose trans, + blas::Diagonal diag, uint64 n, + const DeviceMemory &a, int lda, + DeviceMemory *x, int incx) { + VLOG_CALL(PARAM(uplo), PARAM(trans), PARAM(diag), PARAM(n), PARAM(a), + PARAM(lda), PARAM(x), PARAM(incx)); + + ThenBlasImpl &, int, DeviceMemory *, + int> impl; + return impl(this, &blas::BlasSupport::DoBlasTrsv, uplo, trans, diag, n, a, + lda, x, incx); +} + +Stream &Stream::ThenBlasTrsv(blas::UpperLower uplo, blas::Transpose trans, + blas::Diagonal diag, uint64 n, + const DeviceMemory> &a, + int lda, DeviceMemory> *x, + int incx) { + VLOG_CALL(PARAM(uplo), PARAM(trans), PARAM(diag), PARAM(n), PARAM(a), + PARAM(lda), PARAM(x), PARAM(incx)); + + ThenBlasImpl> &, int, + DeviceMemory> *, int> impl; + return impl(this, &blas::BlasSupport::DoBlasTrsv, uplo, trans, diag, n, a, + lda, x, incx); +} + +Stream &Stream::ThenBlasTrsv(blas::UpperLower uplo, blas::Transpose trans, + blas::Diagonal diag, uint64 n, + const DeviceMemory> &a, + int lda, DeviceMemory> *x, + int incx) { + VLOG_CALL(PARAM(uplo), PARAM(trans), PARAM(diag), PARAM(n), PARAM(a), + PARAM(lda), PARAM(x), PARAM(incx)); + + ThenBlasImpl> &, int, + DeviceMemory> *, int> impl; + return impl(this, &blas::BlasSupport::DoBlasTrsv, uplo, trans, diag, n, a, + lda, x, incx); +} + +Stream &Stream::ThenBlasGemm(blas::Transpose transa, blas::Transpose transb, + uint64 m, uint64 n, uint64 k, float alpha, + const DeviceMemory &a, int lda, + const DeviceMemory &b, int ldb, float beta, + DeviceMemory *c, int ldc) { + VLOG_CALL(PARAM(transa), PARAM(transb), PARAM(m), PARAM(n), PARAM(k), + PARAM(alpha), PARAM(a), PARAM(lda), PARAM(b), PARAM(ldb), + PARAM(beta), PARAM(c), PARAM(ldc)); + + ThenBlasImpl &, int, const DeviceMemory &, + int, float, DeviceMemory *, int> impl; + return impl(this, &blas::BlasSupport::DoBlasGemm, transa, transb, m, n, k, + alpha, a, lda, b, ldb, beta, c, ldc); +} + +Stream &Stream::ThenBlasGemm(blas::Transpose transa, blas::Transpose transb, + uint64 m, uint64 n, uint64 k, double alpha, + const DeviceMemory &a, int lda, + const DeviceMemory &b, int ldb, + double beta, DeviceMemory *c, int ldc) { + VLOG_CALL(PARAM(transa), PARAM(transb), PARAM(m), PARAM(n), PARAM(k), + PARAM(alpha), PARAM(a), PARAM(lda), PARAM(b), PARAM(ldb), + PARAM(beta), PARAM(c), PARAM(ldc)); + + ThenBlasImpl &, int, const DeviceMemory &, + int, double, DeviceMemory *, int> impl; + return impl(this, &blas::BlasSupport::DoBlasGemm, transa, transb, m, n, k, + alpha, a, lda, b, ldb, beta, c, ldc); +} + +Stream &Stream::ThenBlasGemm(blas::Transpose transa, blas::Transpose transb, + uint64 m, uint64 n, uint64 k, + std::complex alpha, + const DeviceMemory> &a, + int lda, + const DeviceMemory> &b, + int ldb, std::complex beta, + DeviceMemory> *c, int ldc) { + VLOG_CALL(PARAM(transa), PARAM(transb), PARAM(m), PARAM(n), PARAM(k), + PARAM(alpha), PARAM(a), PARAM(lda), PARAM(b), PARAM(ldb), + PARAM(beta), PARAM(c), PARAM(ldc)); + + ThenBlasImpl, const DeviceMemory> &, + int, const DeviceMemory> &, int, + std::complex, DeviceMemory> *, + int> impl; + return impl(this, &blas::BlasSupport::DoBlasGemm, transa, transb, m, n, k, + alpha, a, lda, b, ldb, beta, c, ldc); +} + +Stream &Stream::ThenBlasGemm(blas::Transpose transa, blas::Transpose transb, + uint64 m, uint64 n, uint64 k, + std::complex alpha, + const DeviceMemory> &a, + int lda, + const DeviceMemory> &b, + int ldb, std::complex beta, + DeviceMemory> *c, int ldc) { + VLOG_CALL(PARAM(transa), PARAM(transb), PARAM(m), PARAM(n), PARAM(k), + PARAM(alpha), PARAM(a), PARAM(lda), PARAM(b), PARAM(ldb), + PARAM(beta), PARAM(c), PARAM(ldc)); + + ThenBlasImpl, const DeviceMemory> &, + int, const DeviceMemory> &, int, + std::complex, DeviceMemory> *, + int> impl; + return impl(this, &blas::BlasSupport::DoBlasGemm, transa, transb, m, n, k, + alpha, a, lda, b, ldb, beta, c, ldc); +} + +Stream &Stream::ThenBlasHemm(blas::Side side, blas::UpperLower uplo, uint64 m, + uint64 n, std::complex alpha, + const DeviceMemory> &a, + int lda, + const DeviceMemory> &b, + int ldb, std::complex beta, + DeviceMemory> *c, int ldc) { + VLOG_CALL(PARAM(side), PARAM(uplo), PARAM(m), PARAM(n), PARAM(alpha), + PARAM(a), PARAM(lda), PARAM(b), PARAM(ldb), PARAM(beta), PARAM(c), + PARAM(ldc)); + + ThenBlasImpl, const DeviceMemory> &, + int, const DeviceMemory> &, int, + std::complex, DeviceMemory> *, + int> impl; + return impl(this, &blas::BlasSupport::DoBlasHemm, side, uplo, m, n, alpha, a, + lda, b, ldb, beta, c, ldc); +} + +Stream &Stream::ThenBlasHemm(blas::Side side, blas::UpperLower uplo, uint64 m, + uint64 n, std::complex alpha, + const DeviceMemory> &a, + int lda, + const DeviceMemory> &b, + int ldb, std::complex beta, + DeviceMemory> *c, int ldc) { + VLOG_CALL(PARAM(side), PARAM(uplo), PARAM(m), PARAM(n), PARAM(alpha), + PARAM(a), PARAM(lda), PARAM(b), PARAM(ldb), PARAM(beta), PARAM(c), + PARAM(ldc)); + + ThenBlasImpl, const DeviceMemory> &, + int, const DeviceMemory> &, int, + std::complex, DeviceMemory> *, + int> impl; + return impl(this, &blas::BlasSupport::DoBlasHemm, side, uplo, m, n, alpha, a, + lda, b, ldb, beta, c, ldc); +} + +Stream &Stream::ThenBlasHerk(blas::UpperLower uplo, blas::Transpose trans, + uint64 n, uint64 k, float alpha, + const DeviceMemory> &a, + int lda, float beta, + DeviceMemory> *c, int ldc) { + VLOG_CALL(PARAM(uplo), PARAM(trans), PARAM(n), PARAM(k), PARAM(alpha), + PARAM(a), PARAM(lda), PARAM(beta), PARAM(c), PARAM(ldc)); + + ThenBlasImpl> &, int, float, + DeviceMemory> *, int> impl; + return impl(this, &blas::BlasSupport::DoBlasHerk, uplo, trans, n, k, alpha, a, + lda, beta, c, ldc); +} + +Stream &Stream::ThenBlasHerk(blas::UpperLower uplo, blas::Transpose trans, + uint64 n, uint64 k, double alpha, + const DeviceMemory> &a, + int lda, double beta, + DeviceMemory> *c, int ldc) { + VLOG_CALL(PARAM(uplo), PARAM(trans), PARAM(n), PARAM(k), PARAM(alpha), + PARAM(a), PARAM(lda), PARAM(beta), PARAM(c), PARAM(ldc)); + + ThenBlasImpl> &, int, double, + DeviceMemory> *, int> impl; + return impl(this, &blas::BlasSupport::DoBlasHerk, uplo, trans, n, k, alpha, a, + lda, beta, c, ldc); +} + +Stream &Stream::ThenBlasHer2k(blas::UpperLower uplo, blas::Transpose trans, + uint64 n, uint64 k, std::complex alpha, + const DeviceMemory> &a, + int lda, + const DeviceMemory> &b, + int ldb, float beta, + DeviceMemory> *c, int ldc) { + VLOG_CALL(PARAM(uplo), PARAM(trans), PARAM(n), PARAM(k), PARAM(alpha), + PARAM(a), PARAM(lda), PARAM(b), PARAM(ldb), PARAM(beta), PARAM(c), + PARAM(ldc)); + + ThenBlasImpl, const DeviceMemory> &, + int, const DeviceMemory> &, int, float, + DeviceMemory> *, int> impl; + return impl(this, &blas::BlasSupport::DoBlasHer2k, uplo, trans, n, k, alpha, + a, lda, b, ldb, beta, c, ldc); +} + +Stream &Stream::ThenBlasHer2k(blas::UpperLower uplo, blas::Transpose trans, + uint64 n, uint64 k, std::complex alpha, + const DeviceMemory> &a, + int lda, + const DeviceMemory> &b, + int ldb, double beta, + DeviceMemory> *c, int ldc) { + VLOG_CALL(PARAM(uplo), PARAM(trans), PARAM(n), PARAM(k), PARAM(alpha), + PARAM(a), PARAM(lda), PARAM(b), PARAM(ldb), PARAM(beta), PARAM(c), + PARAM(ldc)); + + ThenBlasImpl, const DeviceMemory> &, + int, const DeviceMemory> &, int, double, + DeviceMemory> *, int> impl; + return impl(this, &blas::BlasSupport::DoBlasHer2k, uplo, trans, n, k, alpha, + a, lda, b, ldb, beta, c, ldc); +} + +Stream &Stream::ThenBlasSymm(blas::Side side, blas::UpperLower uplo, uint64 m, + uint64 n, float alpha, + const DeviceMemory &a, int lda, + const DeviceMemory &b, int ldb, float beta, + DeviceMemory *c, int ldc) { + VLOG_CALL(PARAM(side), PARAM(uplo), PARAM(m), PARAM(n), PARAM(alpha), + PARAM(a), PARAM(lda), PARAM(b), PARAM(ldb), PARAM(beta), PARAM(c), + PARAM(ldc)); + + ThenBlasImpl &, int, const DeviceMemory &, + int, float, DeviceMemory *, int> impl; + return impl(this, &blas::BlasSupport::DoBlasSymm, side, uplo, m, n, alpha, a, + lda, b, ldb, beta, c, ldc); +} + +Stream &Stream::ThenBlasSymm(blas::Side side, blas::UpperLower uplo, uint64 m, + uint64 n, double alpha, + const DeviceMemory &a, int lda, + const DeviceMemory &b, int ldb, + double beta, DeviceMemory *c, int ldc) { + VLOG_CALL(PARAM(side), PARAM(uplo), PARAM(m), PARAM(n), PARAM(alpha), + PARAM(a), PARAM(lda), PARAM(b), PARAM(ldb), PARAM(beta), PARAM(c), + PARAM(ldc)); + + ThenBlasImpl &, int, const DeviceMemory &, + int, double, DeviceMemory *, int> impl; + return impl(this, &blas::BlasSupport::DoBlasSymm, side, uplo, m, n, alpha, a, + lda, b, ldb, beta, c, ldc); +} + +Stream &Stream::ThenBlasSymm(blas::Side side, blas::UpperLower uplo, uint64 m, + uint64 n, std::complex alpha, + const DeviceMemory> &a, + int lda, + const DeviceMemory> &b, + int ldb, std::complex beta, + DeviceMemory> *c, int ldc) { + VLOG_CALL(PARAM(side), PARAM(uplo), PARAM(m), PARAM(n), PARAM(alpha), + PARAM(a), PARAM(lda), PARAM(b), PARAM(ldb), PARAM(beta), PARAM(c), + PARAM(ldc)); + + ThenBlasImpl, const DeviceMemory> &, + int, const DeviceMemory> &, int, + std::complex, DeviceMemory> *, + int> impl; + return impl(this, &blas::BlasSupport::DoBlasSymm, side, uplo, m, n, alpha, a, + lda, b, ldb, beta, c, ldc); +} + +Stream &Stream::ThenBlasSymm(blas::Side side, blas::UpperLower uplo, uint64 m, + uint64 n, std::complex alpha, + const DeviceMemory> &a, + int lda, + const DeviceMemory> &b, + int ldb, std::complex beta, + DeviceMemory> *c, int ldc) { + VLOG_CALL(PARAM(side), PARAM(uplo), PARAM(m), PARAM(n), PARAM(alpha), + PARAM(a), PARAM(lda), PARAM(b), PARAM(ldb), PARAM(beta), PARAM(c), + PARAM(ldc)); + + ThenBlasImpl, const DeviceMemory> &, + int, const DeviceMemory> &, int, + std::complex, DeviceMemory> *, + int> impl; + return impl(this, &blas::BlasSupport::DoBlasSymm, side, uplo, m, n, alpha, a, + lda, b, ldb, beta, c, ldc); +} + +Stream &Stream::ThenBlasSyrk(blas::UpperLower uplo, blas::Transpose trans, + uint64 n, uint64 k, float alpha, + const DeviceMemory &a, int lda, float beta, + DeviceMemory *c, int ldc) { + VLOG_CALL(PARAM(uplo), PARAM(trans), PARAM(n), PARAM(k), PARAM(alpha), + PARAM(a), PARAM(lda), PARAM(beta), PARAM(c), PARAM(ldc)); + + ThenBlasImpl &, int, float, DeviceMemory *, + int> impl; + return impl(this, &blas::BlasSupport::DoBlasSyrk, uplo, trans, n, k, alpha, a, + lda, beta, c, ldc); +} + +Stream &Stream::ThenBlasSyrk(blas::UpperLower uplo, blas::Transpose trans, + uint64 n, uint64 k, double alpha, + const DeviceMemory &a, int lda, + double beta, DeviceMemory *c, int ldc) { + VLOG_CALL(PARAM(uplo), PARAM(trans), PARAM(n), PARAM(k), PARAM(alpha), + PARAM(a), PARAM(lda), PARAM(beta), PARAM(c), PARAM(ldc)); + + ThenBlasImpl &, int, double, + DeviceMemory *, int> impl; + return impl(this, &blas::BlasSupport::DoBlasSyrk, uplo, trans, n, k, alpha, a, + lda, beta, c, ldc); +} + +Stream &Stream::ThenBlasSyrk(blas::UpperLower uplo, blas::Transpose trans, + uint64 n, uint64 k, std::complex alpha, + const DeviceMemory> &a, + int lda, std::complex beta, + DeviceMemory> *c, int ldc) { + VLOG_CALL(PARAM(uplo), PARAM(trans), PARAM(n), PARAM(k), PARAM(alpha), + PARAM(a), PARAM(lda), PARAM(beta), PARAM(c), PARAM(ldc)); + + ThenBlasImpl, const DeviceMemory> &, + int, std::complex, DeviceMemory> *, + int> impl; + return impl(this, &blas::BlasSupport::DoBlasSyrk, uplo, trans, n, k, alpha, a, + lda, beta, c, ldc); +} + +Stream &Stream::ThenBlasSyrk(blas::UpperLower uplo, blas::Transpose trans, + uint64 n, uint64 k, std::complex alpha, + const DeviceMemory> &a, + int lda, std::complex beta, + DeviceMemory> *c, int ldc) { + VLOG_CALL(PARAM(uplo), PARAM(trans), PARAM(n), PARAM(k), PARAM(alpha), + PARAM(a), PARAM(lda), PARAM(beta), PARAM(c), PARAM(ldc)); + + ThenBlasImpl, const DeviceMemory> &, + int, std::complex, DeviceMemory> *, + int> impl; + return impl(this, &blas::BlasSupport::DoBlasSyrk, uplo, trans, n, k, alpha, a, + lda, beta, c, ldc); +} + +Stream &Stream::ThenBlasSyr2k(blas::UpperLower uplo, blas::Transpose trans, + uint64 n, uint64 k, float alpha, + const DeviceMemory &a, int lda, + const DeviceMemory &b, int ldb, float beta, + DeviceMemory *c, int ldc) { + VLOG_CALL(PARAM(uplo), PARAM(trans), PARAM(n), PARAM(k), PARAM(alpha), + PARAM(a), PARAM(lda), PARAM(b), PARAM(ldb), PARAM(beta), PARAM(c), + PARAM(ldc)); + + ThenBlasImpl &, int, const DeviceMemory &, + int, float, DeviceMemory *, int> impl; + return impl(this, &blas::BlasSupport::DoBlasSyr2k, uplo, trans, n, k, alpha, + a, lda, b, ldb, beta, c, ldc); +} + +Stream &Stream::ThenBlasSyr2k(blas::UpperLower uplo, blas::Transpose trans, + uint64 n, uint64 k, double alpha, + const DeviceMemory &a, int lda, + const DeviceMemory &b, int ldb, + double beta, DeviceMemory *c, int ldc) { + VLOG_CALL(PARAM(uplo), PARAM(trans), PARAM(n), PARAM(k), PARAM(alpha), + PARAM(a), PARAM(lda), PARAM(b), PARAM(ldb), PARAM(beta), PARAM(c), + PARAM(ldc)); + + ThenBlasImpl &, int, const DeviceMemory &, + int, double, DeviceMemory *, int> impl; + return impl(this, &blas::BlasSupport::DoBlasSyr2k, uplo, trans, n, k, alpha, + a, lda, b, ldb, beta, c, ldc); +} + +Stream &Stream::ThenBlasSyr2k(blas::UpperLower uplo, blas::Transpose trans, + uint64 n, uint64 k, std::complex alpha, + const DeviceMemory> &a, + int lda, + const DeviceMemory> &b, + int ldb, std::complex beta, + DeviceMemory> *c, int ldc) { + VLOG_CALL(PARAM(uplo), PARAM(trans), PARAM(n), PARAM(k), PARAM(alpha), + PARAM(a), PARAM(lda), PARAM(b), PARAM(ldb), PARAM(beta), PARAM(c), + PARAM(ldc)); + + ThenBlasImpl, const DeviceMemory> &, + int, const DeviceMemory> &, int, + std::complex, DeviceMemory> *, + int> impl; + return impl(this, &blas::BlasSupport::DoBlasSyr2k, uplo, trans, n, k, alpha, + a, lda, b, ldb, beta, c, ldc); +} + +Stream &Stream::ThenBlasSyr2k(blas::UpperLower uplo, blas::Transpose trans, + uint64 n, uint64 k, std::complex alpha, + const DeviceMemory> &a, + int lda, + const DeviceMemory> &b, + int ldb, std::complex beta, + DeviceMemory> *c, int ldc) { + VLOG_CALL(PARAM(uplo), PARAM(trans), PARAM(n), PARAM(k), PARAM(alpha), + PARAM(a), PARAM(lda), PARAM(b), PARAM(ldb), PARAM(beta), PARAM(c), + PARAM(ldc)); + + ThenBlasImpl, const DeviceMemory> &, + int, const DeviceMemory> &, int, + std::complex, DeviceMemory> *, + int> impl; + return impl(this, &blas::BlasSupport::DoBlasSyr2k, uplo, trans, n, k, alpha, + a, lda, b, ldb, beta, c, ldc); +} + +Stream &Stream::ThenBlasTrmm(blas::Side side, blas::UpperLower uplo, + blas::Transpose transa, blas::Diagonal diag, + uint64 m, uint64 n, float alpha, + const DeviceMemory &a, int lda, + DeviceMemory *b, int ldb) { + VLOG_CALL(PARAM(side), PARAM(uplo), PARAM(transa), PARAM(diag), PARAM(m), + PARAM(n), PARAM(alpha), PARAM(a), PARAM(lda), PARAM(b), PARAM(ldb)); + + ThenBlasImpl &, int, + DeviceMemory *, int> impl; + return impl(this, &blas::BlasSupport::DoBlasTrmm, side, uplo, transa, diag, m, + n, alpha, a, lda, b, ldb); +} + +Stream &Stream::ThenBlasTrmm(blas::Side side, blas::UpperLower uplo, + blas::Transpose transa, blas::Diagonal diag, + uint64 m, uint64 n, double alpha, + const DeviceMemory &a, int lda, + DeviceMemory *b, int ldb) { + VLOG_CALL(PARAM(side), PARAM(uplo), PARAM(transa), PARAM(diag), PARAM(m), + PARAM(n), PARAM(alpha), PARAM(a), PARAM(lda), PARAM(b), PARAM(ldb)); + + ThenBlasImpl &, int, + DeviceMemory *, int> impl; + return impl(this, &blas::BlasSupport::DoBlasTrmm, side, uplo, transa, diag, m, + n, alpha, a, lda, b, ldb); +} + +Stream &Stream::ThenBlasTrmm(blas::Side side, blas::UpperLower uplo, + blas::Transpose transa, blas::Diagonal diag, + uint64 m, uint64 n, std::complex alpha, + const DeviceMemory> &a, + int lda, DeviceMemory> *b, + int ldb) { + VLOG_CALL(PARAM(side), PARAM(uplo), PARAM(transa), PARAM(diag), PARAM(m), + PARAM(n), PARAM(alpha), PARAM(a), PARAM(lda), PARAM(b), PARAM(ldb)); + + ThenBlasImpl, + const DeviceMemory> &, int, + DeviceMemory> *, int> impl; + return impl(this, &blas::BlasSupport::DoBlasTrmm, side, uplo, transa, diag, m, + n, alpha, a, lda, b, ldb); +} + +Stream &Stream::ThenBlasTrmm(blas::Side side, blas::UpperLower uplo, + blas::Transpose transa, blas::Diagonal diag, + uint64 m, uint64 n, std::complex alpha, + const DeviceMemory> &a, + int lda, DeviceMemory> *b, + int ldb) { + VLOG_CALL(PARAM(side), PARAM(uplo), PARAM(transa), PARAM(diag), PARAM(m), + PARAM(n), PARAM(alpha), PARAM(a), PARAM(lda), PARAM(b), PARAM(ldb)); + + ThenBlasImpl, + const DeviceMemory> &, int, + DeviceMemory> *, int> impl; + return impl(this, &blas::BlasSupport::DoBlasTrmm, side, uplo, transa, diag, m, + n, alpha, a, lda, b, ldb); +} + +Stream &Stream::ThenBlasTrsm(blas::Side side, blas::UpperLower uplo, + blas::Transpose transa, blas::Diagonal diag, + uint64 m, uint64 n, float alpha, + const DeviceMemory &a, int lda, + DeviceMemory *b, int ldb) { + VLOG_CALL(PARAM(side), PARAM(uplo), PARAM(transa), PARAM(diag), PARAM(m), + PARAM(n), PARAM(alpha), PARAM(a), PARAM(lda), PARAM(b), PARAM(ldb)); + + ThenBlasImpl &, int, + DeviceMemory *, int> impl; + return impl(this, &blas::BlasSupport::DoBlasTrsm, side, uplo, transa, diag, m, + n, alpha, a, lda, b, ldb); +} + +Stream &Stream::ThenBlasTrsm(blas::Side side, blas::UpperLower uplo, + blas::Transpose transa, blas::Diagonal diag, + uint64 m, uint64 n, double alpha, + const DeviceMemory &a, int lda, + DeviceMemory *b, int ldb) { + VLOG_CALL(PARAM(side), PARAM(uplo), PARAM(transa), PARAM(diag), PARAM(m), + PARAM(n), PARAM(alpha), PARAM(a), PARAM(lda), PARAM(b), PARAM(ldb)); + + ThenBlasImpl &, int, + DeviceMemory *, int> impl; + return impl(this, &blas::BlasSupport::DoBlasTrsm, side, uplo, transa, diag, m, + n, alpha, a, lda, b, ldb); +} + +Stream &Stream::ThenBlasTrsm(blas::Side side, blas::UpperLower uplo, + blas::Transpose transa, blas::Diagonal diag, + uint64 m, uint64 n, std::complex alpha, + const DeviceMemory> &a, + int lda, DeviceMemory> *b, + int ldb) { + VLOG_CALL(PARAM(side), PARAM(uplo), PARAM(transa), PARAM(diag), PARAM(m), + PARAM(n), PARAM(alpha), PARAM(a), PARAM(lda), PARAM(b), PARAM(ldb)); + + ThenBlasImpl, + const DeviceMemory> &, int, + DeviceMemory> *, int> impl; + return impl(this, &blas::BlasSupport::DoBlasTrsm, side, uplo, transa, diag, m, + n, alpha, a, lda, b, ldb); +} + +Stream &Stream::ThenBlasTrsm(blas::Side side, blas::UpperLower uplo, + blas::Transpose transa, blas::Diagonal diag, + uint64 m, uint64 n, std::complex alpha, + const DeviceMemory> &a, + int lda, DeviceMemory> *b, + int ldb) { + VLOG_CALL(PARAM(side), PARAM(uplo), PARAM(transa), PARAM(diag), PARAM(m), + PARAM(n), PARAM(alpha), PARAM(a), PARAM(lda), PARAM(b), PARAM(ldb)); + + ThenBlasImpl, + const DeviceMemory> &, int, + DeviceMemory> *, int> impl; + return impl(this, &blas::BlasSupport::DoBlasTrsm, side, uplo, transa, diag, m, + n, alpha, a, lda, b, ldb); +} + +Stream &Stream::ThenBlasGemmBatched( + blas::Transpose transa, blas::Transpose transb, uint64 m, uint64 n, + uint64 k, float alpha, const port::ArraySlice *> &a, + int lda, const port::ArraySlice *> &b, int ldb, + float beta, const port::ArraySlice *> &c, int ldc, + int batch_count) { + return ThenBlasGemmBatchedWithScratch(transa, transb, m, n, k, alpha, a, lda, + b, ldb, beta, c, ldc, batch_count, + nullptr); +} + +Stream &Stream::ThenBlasGemmBatchedWithScratch( + blas::Transpose transa, blas::Transpose transb, uint64 m, uint64 n, + uint64 k, float alpha, const port::ArraySlice *> &a, + int lda, const port::ArraySlice *> &b, int ldb, + float beta, const port::ArraySlice *> &c, int ldc, + int batch_count, ScratchAllocator *scratch_allocator) { + VLOG_CALL(PARAM(transa), PARAM(transb), PARAM(m), PARAM(n), PARAM(k), + PARAM(alpha), PARAM(a), PARAM(lda), PARAM(b), PARAM(ldb), + PARAM(beta), PARAM(c), PARAM(ldc), PARAM(batch_count)); + + ThenBlasImpl *> &, int, + const port::ArraySlice *> &, int, float, + const port::ArraySlice *> &, int, int, + ScratchAllocator *> + impl; + return impl(this, &blas::BlasSupport::DoBlasGemmBatched, transa, transb, m, n, + k, alpha, a, lda, b, ldb, beta, c, ldc, batch_count, + scratch_allocator); +} + +Stream &Stream::ThenBlasGemmBatched( + blas::Transpose transa, blas::Transpose transb, uint64 m, uint64 n, + uint64 k, double alpha, const port::ArraySlice *> &a, + int lda, const port::ArraySlice *> &b, int ldb, + double beta, const port::ArraySlice *> &c, int ldc, + int batch_count) { + return ThenBlasGemmBatchedWithScratch(transa, transb, m, n, k, alpha, a, lda, + b, ldb, beta, c, ldc, batch_count, + nullptr); +} + +Stream &Stream::ThenBlasGemmBatchedWithScratch( + blas::Transpose transa, blas::Transpose transb, uint64 m, uint64 n, + uint64 k, double alpha, const port::ArraySlice *> &a, + int lda, const port::ArraySlice *> &b, int ldb, + double beta, const port::ArraySlice *> &c, int ldc, + int batch_count, ScratchAllocator *scratch_allocator) { + VLOG_CALL(PARAM(transa), PARAM(transb), PARAM(m), PARAM(n), PARAM(k), + PARAM(alpha), PARAM(a), PARAM(lda), PARAM(b), PARAM(ldb), + PARAM(beta), PARAM(c), PARAM(ldc), PARAM(batch_count)); + + ThenBlasImpl *> &, int, + const port::ArraySlice *> &, int, double, + const port::ArraySlice *> &, int, int, + ScratchAllocator *> + impl; + return impl(this, &blas::BlasSupport::DoBlasGemmBatched, transa, transb, m, n, + k, alpha, a, lda, b, ldb, beta, c, ldc, batch_count, + scratch_allocator); +} + +Stream &Stream::ThenBlasGemmBatched( + blas::Transpose transa, blas::Transpose transb, uint64 m, uint64 n, + uint64 k, std::complex alpha, + const port::ArraySlice> *> &a, int lda, + const port::ArraySlice> *> &b, int ldb, + std::complex beta, + const port::ArraySlice> *> &c, int ldc, + int batch_count) { + return ThenBlasGemmBatchedWithScratch(transa, transb, m, n, k, alpha, a, lda, + b, ldb, beta, c, ldc, batch_count, + nullptr); +} + +Stream &Stream::ThenBlasGemmBatchedWithScratch( + blas::Transpose transa, blas::Transpose transb, uint64 m, uint64 n, + uint64 k, std::complex alpha, + const port::ArraySlice> *> &a, int lda, + const port::ArraySlice> *> &b, int ldb, + std::complex beta, + const port::ArraySlice> *> &c, int ldc, + int batch_count, ScratchAllocator *scratch_allocator) { + VLOG_CALL(PARAM(transa), PARAM(transb), PARAM(m), PARAM(n), PARAM(k), + PARAM(alpha), PARAM(a), PARAM(lda), PARAM(b), PARAM(ldb), + PARAM(beta), PARAM(c), PARAM(ldc), PARAM(batch_count)); + + ThenBlasImpl, + const port::ArraySlice> *> &, + int, + const port::ArraySlice> *> &, + int, std::complex, + const port::ArraySlice> *> &, + int, int, ScratchAllocator *> + impl; + return impl(this, &blas::BlasSupport::DoBlasGemmBatched, transa, transb, m, n, + k, alpha, a, lda, b, ldb, beta, c, ldc, batch_count, + scratch_allocator); +} + +Stream &Stream::ThenBlasGemmBatched( + blas::Transpose transa, blas::Transpose transb, uint64 m, uint64 n, + uint64 k, std::complex alpha, + const port::ArraySlice> *> &a, int lda, + const port::ArraySlice> *> &b, int ldb, + std::complex beta, + const port::ArraySlice> *> &c, int ldc, + int batch_count) { + return ThenBlasGemmBatchedWithScratch(transa, transb, m, n, k, alpha, a, lda, + b, ldb, beta, c, ldc, batch_count, + nullptr); +} + +Stream &Stream::ThenBlasGemmBatchedWithScratch( + blas::Transpose transa, blas::Transpose transb, uint64 m, uint64 n, + uint64 k, std::complex alpha, + const port::ArraySlice> *> &a, int lda, + const port::ArraySlice> *> &b, int ldb, + std::complex beta, + const port::ArraySlice> *> &c, int ldc, + int batch_count, ScratchAllocator *scratch_allocator) { + VLOG_CALL(PARAM(transa), PARAM(transb), PARAM(m), PARAM(n), PARAM(k), + PARAM(alpha), PARAM(a), PARAM(lda), PARAM(b), PARAM(ldb), + PARAM(beta), PARAM(c), PARAM(ldc), PARAM(batch_count)); + + ThenBlasImpl, + const port::ArraySlice> *> &, + int, + const port::ArraySlice> *> &, + int, std::complex, + const port::ArraySlice> *> &, + int, int, ScratchAllocator *> + impl; + return impl(this, &blas::BlasSupport::DoBlasGemmBatched, transa, transb, m, n, + k, alpha, a, lda, b, ldb, beta, c, ldc, batch_count, + scratch_allocator); +} + +Stream &Stream::ThenSetRngSeed(const uint8 *seed, uint64 seed_bytes) { + VLOG_CALL(PARAM(seed), PARAM(seed_bytes)); + + if (ok()) { + if (rng::RngSupport *rng = parent_->AsRng()) { + CheckError(rng->SetSeed(this, seed, seed_bytes)); + } else { + SetError(); + LOG(INFO) << "stream " << this << " unable to initialize RNG"; + } + } else { + LOG(INFO) << "stream " << this + << " did not set RNG seed: " << static_cast(seed) + << "; bytes: " << seed_bytes; + } + return *this; +} + +Stream &Stream::ThenPopulateRandUniform(DeviceMemory *values) { + VLOG_CALL(PARAM(values)); + + if (ok()) { + if (rng::RngSupport *rng = parent_->AsRng()) { + CheckError(rng->DoPopulateRandUniform(this, values)); + } else { + SetError(); + LOG(INFO) << "attempting to perform RNG operation using StreamExecutor " + "without RNG support."; + } + } + return *this; +} + +Stream &Stream::ThenPopulateRandGaussian(float mean, float sd, + DeviceMemory *values) { + VLOG_CALL(PARAM(mean), PARAM(sd), PARAM(values)); + + if (ok()) { + if (rng::RngSupport *rng = parent_->AsRng()) { + CheckError(rng->DoPopulateRandGaussian(this, mean, sd, values)); + } else { + SetError(); + LOG(INFO) << "attempting to perform RNG operation using StreamExecutor " + "without RNG support."; + } + } + return *this; +} + +Stream &Stream::ThenPopulateRandGaussian(double mean, double sd, + DeviceMemory *values) { + VLOG_CALL(PARAM(mean), PARAM(sd), PARAM(values)); + + if (ok()) { + if (rng::RngSupport *rng = parent_->AsRng()) { + CheckError(rng->DoPopulateRandGaussian(this, mean, sd, values)); + } else { + SetError(); + LOG(INFO) << "attempting to perform RNG operation using StreamExecutor " + "without RNG support."; + } + } + return *this; +} + +Stream &Stream::ThenPopulateRandUniform(DeviceMemory *values) { + VLOG_CALL(PARAM(values)); + + if (ok()) { + if (rng::RngSupport *rng = parent_->AsRng()) { + CheckError(rng->DoPopulateRandUniform(this, values)); + } else { + SetError(); + LOG(INFO) << "attempting to perform RNG operation using StreamExecutor " + "without RNG support."; + } + } + return *this; +} + +Stream &Stream::ThenPopulateRandUniform( + DeviceMemory> *values) { + VLOG_CALL(PARAM(values)); + + if (ok()) { + if (rng::RngSupport *rng = parent_->AsRng()) { + CheckError(rng->DoPopulateRandUniform(this, values)); + } else { + SetError(); + LOG(INFO) << "attempting to perform RNG operation using StreamExecutor " + "without RNG support."; + } + } + return *this; +} + +Stream &Stream::ThenPopulateRandUniform( + DeviceMemory> *values) { + VLOG_CALL(PARAM(values)); + + if (ok()) { + if (rng::RngSupport *rng = parent_->AsRng()) { + CheckError(rng->DoPopulateRandUniform(this, values)); + } else { + SetError(); + LOG(INFO) << "stream " << this + << " attempting to perform RNG operation using StreamExecutor " + "without RNG support."; + } + } + return *this; +} + +Stream &Stream::ThenMemcpy(void *host_dst, const DeviceMemoryBase &gpu_src, + uint64 size) { + VLOG_CALL(PARAM(host_dst), PARAM(gpu_src), PARAM(size)); + + if (ok()) { + CheckError(parent_->Memcpy(this, host_dst, gpu_src, size)); + } else { + LOG(INFO) << "stream " << this + << " did not memcpy device-to-host; source: " << gpu_src.opaque(); + } + return *this; +} + +Stream &Stream::ThenMemcpy(DeviceMemoryBase *gpu_dst, const void *host_src, + uint64 size) { + VLOG_CALL(PARAM(gpu_dst), PARAM(host_src), PARAM(size)); + + if (ok()) { + CheckError(parent_->Memcpy(this, gpu_dst, host_src, size)); + } else { + LOG(INFO) << "stream " << this + << " did not memcpy host-to-device; source: " << host_src; + } + return *this; +} + +Stream &Stream::ThenMemcpy(DeviceMemoryBase *gpu_dst, + const DeviceMemoryBase &gpu_src, uint64 size) { + VLOG_CALL(PARAM(gpu_dst), PARAM(gpu_src), PARAM(size)); + + if (ok()) { + CheckError(parent_->MemcpyDeviceToDevice(this, gpu_dst, gpu_src, size)); + } else { + LOG(INFO) << "stream " << this + << " did not memcpy gpu-to-gpu; source: " << &gpu_src; + } + return *this; +} + +Stream &Stream::ThenMemZero(DeviceMemoryBase *location, uint64 size) { + VLOG_CALL(PARAM(location), PARAM(size)); + + if (ok()) { + CheckError(parent_->MemZero(this, location, size)); + } else { + LOG(INFO) << "stream " << this + << " did not memzero GPU location; source: " << location; + } + return *this; +} + +Stream &Stream::ThenMemset32(DeviceMemoryBase *location, const uint32 &pattern, + uint64 size) { + VLOG_CALL(PARAM(location), PARAM(pattern), PARAM(size)); + + if (ok()) { + CheckError(parent_->Memset32(this, location, pattern, size)); + } else { + LOG(INFO) << "stream " << this + << " did not memset GPU location; source: " << location + << "; size: " << size << "; pattern: " << std::hex << pattern; + } + return *this; +} + +Stream &Stream::ThenDoHostCallbackForTest(std::function callback) { + VLOG_CALL(PARAM(callback)); + + return ThenDoHostCallback(callback); +} + +Stream &Stream::ThenDoHostCallback(std::function callback) { + VLOG_CALL(PARAM(callback)); + + if (ok()) { + CheckError(parent_->HostCallback(this, callback)); + } else { + LOG(INFO) << "stream " << this + << " was in error state before adding host callback"; + } + return *this; +} + +Stream &Stream::ThenFft(fft::Plan *plan, + const DeviceMemory> &input, + DeviceMemory> *output) { + VLOG_CALL(PARAM(plan), PARAM(input), PARAM(output)); + + if (ok()) { + if (fft::FftSupport *fft = parent_->AsFft()) { + CheckError(fft->DoFft(this, plan, input, output)); + } else { + SetError(); + LOG(INFO) << "attempting to perform FFT operation using StreamExecutor " + "without FFT support"; + } + } + return *this; +} + +Stream &Stream::ThenFft(fft::Plan *plan, + const DeviceMemory> &input, + DeviceMemory> *output) { + VLOG_CALL(PARAM(plan), PARAM(input), PARAM(output)); + + if (ok()) { + if (fft::FftSupport *fft = parent_->AsFft()) { + CheckError(fft->DoFft(this, plan, input, output)); + } else { + SetError(); + LOG(INFO) << "attempting to perform FFT operation using StreamExecutor " + "without FFT support"; + } + } + return *this; +} + +Stream &Stream::ThenFft(fft::Plan *plan, const DeviceMemory &input, + DeviceMemory> *output) { + VLOG_CALL(PARAM(plan), PARAM(input), PARAM(output)); + + if (ok()) { + if (fft::FftSupport *fft = parent_->AsFft()) { + CheckError(fft->DoFft(this, plan, input, output)); + } else { + SetError(); + LOG(INFO) << "attempting to perform FFT operation using StreamExecutor " + "without FFT support"; + } + } + return *this; +} + +Stream &Stream::ThenFft(fft::Plan *plan, const DeviceMemory &input, + DeviceMemory> *output) { + VLOG_CALL(PARAM(plan), PARAM(input), PARAM(output)); + + if (ok()) { + if (fft::FftSupport *fft = parent_->AsFft()) { + CheckError(fft->DoFft(this, plan, input, output)); + } else { + SetError(); + LOG(INFO) << "attempting to perform FFT operation using StreamExecutor " + "without FFT support"; + } + } + return *this; +} + +Stream &Stream::ThenFft(fft::Plan *plan, + const DeviceMemory> &input, + DeviceMemory *output) { + VLOG_CALL(PARAM(plan), PARAM(input), PARAM(output)); + + if (ok()) { + if (fft::FftSupport *fft = parent_->AsFft()) { + CheckError(fft->DoFft(this, plan, input, output)); + } else { + SetError(); + LOG(INFO) << "attempting to perform FFT operation using StreamExecutor " + "without FFT support"; + } + } + return *this; +} + +Stream &Stream::ThenFft(fft::Plan *plan, + const DeviceMemory> &input, + DeviceMemory *output) { + VLOG_CALL(PARAM(plan), PARAM(input), PARAM(output)); + + if (ok()) { + if (fft::FftSupport *fft = parent_->AsFft()) { + CheckError(fft->DoFft(this, plan, input, output)); + } else { + SetError(); + LOG(INFO) << "attempting to perform FFT operation using StreamExecutor " + "without FFT support"; + } + } + return *this; +} + +// It looks confusing, but all this is doing is inserting a callback at the +// present point in the stream to then enqueue a task on the host executor. +Stream &Stream::ThenEnqueueOnBackgroundThread( + std::function task) { + VLOG_CALL(PARAM(task)); + + StreamExecutor *stream_executor = this->parent_; + std::function bound_task = std::bind(task, stream_executor); + + return ThenDoHostCallback([stream_executor, bound_task]() { + stream_executor->EnqueueOnBackgroundThread(bound_task); + }); +} + +bool Stream::BlockHostUntilDone() { + VLOG_CALL(); + + if (!ok()) { + LOG(INFO) + << "stream " << this + << " did not block host until done; was already in an error state"; + return false; + } + + { + // Wait until all active sub-streams have done their tasks. + mutex_lock lock{mu_}; + for (auto &stream : sub_streams_) { + if (!stream.second) { + CheckError(stream.first->BlockHostUntilDone()); + // Set this sub-stream as available. + stream.second = true; + } + } + } + + temporary_memory_manager_.DeallocateFinalizedTemporaries(); + + CheckError(parent_->BlockHostUntilDone(this)); + return ok(); }