1、createSession
依据 ScheduleConfig 和 RuntimeInfo 创建会话。
// source/core/Interpreter.cpp Session* Interpreter::createSession(const ScheduleConfig& config, const RuntimeInfo& runtime) { return createMultiPathSession({config}, runtime); }
1.1 createMultiPathSession
// source/core/Interpreter.cpp Session* Interpreter::createMultiPathSession(const std::vector<ScheduleConfig>& configs, const RuntimeInfo& runtime) { // ... auto newSession = std::unique_ptr<Session>(new Session(std::move(info), mNet->modes, std::move(rt))); if (!newSession->valid()) { MNN_PRINT("Invalide Session!!\n"); return nullptr; } auto result = newSession.get(); auto validForResize = info.validForResize; if (validForResize && mNet->modes.inputMode == Session_Input_Inside && mNet->modes.resizeMode == Session_Resize_Direct) { result->resize(); } if ((!mNet->cacheFile.empty()) && (!valid) && mNet->modes.backendMode == Session_Backend_Fix) { // Try to save extra cache auto buffer = result->getCache(); if (buffer.first != nullptr && buffer.second > 0) { MNN_PRINT("Write cache to %s, size = %zu\n", mNet->cacheFile.c_str(), buffer.second); writeCacheFile(mNet, buffer); mNet->lastCacheSize = buffer.second; // Write Cache cacheMode = cacheMode | 2; } } // Reset cache result->loadCache(nullptr, 0); mNet->sessions.emplace_back(std::move(newSession)); #ifdef MNN_INTERNAL_ENABLED int precision = BackendConfig::Precision_Normal; if (nullptr != configs[0].backendConfig) { precision = configs[0].backendConfig->precision; } int mode = configs[0].mode; mNet->sessionInfo.insert(std::make_pair(result, std::make_tuple(precision, mode))); if (shouldLog(FREQ_HIGH)) { std::map<std::string, std::string> metrics = mNet->basicLogginData; metrics.emplace("UUID", mNet->uuid); metrics.emplace("Time", std::to_string((float)_timer.durationInUs() / 1024.0f)); metrics.emplace("Backend", std::to_string(configs[0].type)); metrics.emplace("Precision", std::to_string(precision)); metrics.emplace("Mode", std::to_string(mode)); metrics.emplace("Cache", std::to_string(cacheMode)); metrics.emplace("CacheSize", std::to_string((float)(mNet->lastCacheSize / 1024.0f))); metrics.emplace("ModelSize", std::to_string ((float)mNet->buffer.size() / 1024.0f / 1024.0f)); metrics.emplace("Usage", std::to_string((int) mNet->net->usage())); metrics.emplace("API", "Interpreter::createMultiPathSession"); logAsync(metrics); } #endif // MNN_INTERNAL_ENABLED return result; }
1.1.1 Session 类 ModeGroup
// source/core/Session.hpp class MNN_PUBLIC Session { public: struct ModeGroup { Interpreter::SessionMode callBackMode = Interpreter::Session_Debug; Interpreter::SessionMode inputMode = Interpreter::Session_Input_Inside; Interpreter::SessionMode outputMode = Interpreter::Session_Output_Inside; Interpreter::SessionMode backendMode = Interpreter::Session_Backend_Fix; Interpreter::SessionMode resizeMode = Interpreter::Session_Resize_Direct; Interpreter::SessionMode memoryUsageMode = Interpreter::Session_Memory_Collect; Interpreter::SessionMode codegenMode = Interpreter::Session_Codegen_Disable; int memoryAllocatorType = 0; int maxTuningNumber = MNN_DEFAULT_TUNING_NUMBER; }; Session(Schedule::ScheduleInfo&& info, const ModeGroup& mode, RuntimeInfo&& runtime); ~Session(); Session* clone(RuntimeInfo&& runtime, std::shared_ptr<Schedule::ScheduleInfo> sharedConst); public: /** * @brief infer. * @return result code. */ ErrorCode run() const; /** * @brief infer with callbacks and sync option. * @param enterCallback callback before each op. * @param exitCallback callback after each op. * @param sync wait until all ops done before return or not. * @return result code. */ ErrorCode runWithCallBack(const TensorCallBackWithInfo& enterCallback, const TensorCallBackWithInfo& exitCallback, bool sync = false) const; bool getInfo(Interpreter::SessionInfoCode code, void* ptr) const; public: /** * @brief resize tensors and buffers responding to input changes. * @return result code. */ ErrorCode resize(); /** * @brief set if needs resize. * @param flag needs resize or not. */ void setNeedResize(bool flag = true) { mNeedResize = flag; } void setNeedMalloc(bool flag = true) { mNeedMalloc = flag; } Runtime* getCPURuntime() { return mRuntime.second.get(); } public: /** * @brief get backend that create the tensor. * @param tensor given tensor. * @return backend that create the tensor, NULL if the tensor is created by default backend (CPU backend). */ const Backend* getBackEnd(const Tensor* tensor) const; /** * @brief get input tensor for given op name. * @param name given op name. if NULL, return first input tensor. * @return input tensor if found, NULL otherwise. */ Tensor* getInput(const char* name) const; /** * @brief get output tensor for given op name. * @param name given op name. if NULL, return first output tensor. * @return output tensor if found, NULL otherwise. */ Tensor* getOutput(const char* name) const; /** * @brief get output tensors map. * @return get output tensors map. */ const std::map<std::string, Tensor*>& getOutputAll() const; const std::map<std::string, Tensor*>& getInputAll() const; /** * @brief check session is valid or not. * @return session is valid or not. */ inline bool valid() const { return mValid; } /** * @brief update the session's const value to origin model's const blob. * @return errorcode */ ErrorCode updateToModel(Net* net) const; void waitAsyncResize(); bool hasAsyncWork(); bool loadCache(const void* buffer, size_t size); std::pair<const void*, size_t> getCache(); Tensor* getTensor(int index) const; Schedule::PipelineInfo& getPipelineInfo(int index) const; protected: const std::vector<std::shared_ptr<Pipeline>>& getPipelines() const { return this->mPipelines; } private: void _clearCache(); void _setUpTensorInfo(const Schedule::ScheduleInfo& info); private: RuntimeInfo mRuntime; std::vector<std::shared_ptr<Pipeline>> mPipelines; bool mNeedResize = true; bool mValid = true; bool mNeedMalloc = true; Interpreter::SessionMode mCallBackMode; Interpreter::SessionMode mMemoryUsageMode; Interpreter::SessionMode mCodegenMode; Schedule::ScheduleInfo mInfo; ModeGroup mMode; };
1.1.2 Session::Session
// source/core/Session.cpp Session::Session(Schedule::ScheduleInfo&& info, const ModeGroup& mode, RuntimeInfo&& runtime) { mMode = mode; mRuntime = std::move(runtime); if (info.pipelineInfo.empty()) { mValid = false; return; } mInfo = std::move(info); for (auto& iter : mInfo.pipelineInfo) { _createPipelineBackend(iter, mRuntime); Pipeline::TuningAttr attr; attr.maxTuningNumber = mode.maxTuningNumber; attr.autoSetOpType = mode.backendMode == Interpreter::Session_Backend_Auto; auto rt = mRuntime.first.find(iter.first.info.type)->second.get(); auto cpuRuntime = mRuntime.second; std::shared_ptr<Pipeline> newPipeline(new Pipeline(std::move(iter), mode.inputMode == Interpreter::Session_Input_Inside, mode.outputMode == Interpreter::Session_Output_User, attr, rt, cpuRuntime.get())); mPipelines.emplace_back(std::move(newPipeline)); } mCallBackMode = mode.callBackMode; mMemoryUsageMode = mode.memoryUsageMode; mCodegenMode = mode.codegenMode; }
1.1.2.1 _createPipelineBackend
创建流水线后端。BackendCache
// source/core/Session.cpp // typedef std::pair<BackendCache, std::vector<OpCacheInfo>> PipelineInfo; // // struct BackendCache { // Backend::Info info; // BackendConfig config; // std::pair<std::shared_ptr<Backend>, std::shared_ptr<Backend>> cache; // bool needComputeShape = true; // bool needComputeGeometry = true; // bool reportError = true; // std::map<Tensor*, TENSORCACHE> inputTensorCopyCache; // }; // // typedef std::pair< std::map<MNNForwardType, std::shared_ptr<Runtime>>, \ // std::shared_ptr<Runtime>> RuntimeInfo; // static void _createPipelineBackend(Schedule::PipelineInfo& iter, RuntimeInfo& runtime) { // iter.first 类型为 struct BackendCache if (iter.first.cache.first != nullptr) { return; } // runtime.first 类型为 std::map<MNNForwardType, std::shared_ptr<Runtime>> // 根据 MNNForwardType(MNN_FORWARD_VULKAN) 获取对应的 Runtime(VulkanRuntime) auto rt = runtime.first.find(iter.first.info.type)->second.get(); // runtime.second 为默认 Runtime(CPURuntime) auto cpuRuntime = runtime.second; bool specialUsage = false; if (iter.first.info.user != nullptr) { specialUsage = iter.first.info.user->flags > 0; } // 此处运行 VulkanRuntime::onCreate,创建对应的 Backend(VulkanBackend) // iter.first.cache 类型为 std::pair<std::shared_ptr<Backend>, std::shared_ptr<Backend>> iter.first.cache.first.reset(rt->onCreate(iter.first.info.user)); std::shared_ptr<Backend> second; if (iter.first.cache.first->type() == MNN_FORWARD_CPU && (!specialUsage)) { iter.first.cache.second = iter.first.cache.first; } else { // Const Backend shouldn't be used as default backend // The session may be schedule multi-thread but const backend is the same // We need create a new backend to do size compute / not support op compute // 创建默认的 Backend(CPUBackend) BackendConfig defaultConfig; defaultConfig.flags = 4; iter.first.cache.second.reset(cpuRuntime->onCreate(&defaultConfig)); } }
1.1.2.1.1 VulkanRuntime::onCreate
// source/backend/vulkan/runtime/VulkanRuntime.cpp Backend* VulkanRuntime::onCreate(const BackendConfig* config) const { // FIXME: Use config return new VulkanBackend(this, mInfo); }
1.1.2.1.1.1 VulkanBackend::VulkanBackend
// source/backend/vulkan/image/backend/VulkanBackend.cpp VulkanBackend::VulkanBackend(const VulkanRuntime* runtime, const Backend::Info& info) : Backend(MNN_FORWARD_VULKAN) { mRuntime = runtime; mDirect = Backend::Info::INDIRECT != info.mode; mDynamicMemoryPool.reset(new VulkanMemoryPool(runtime->mMemoryPool.get())); auto& dev = device(); mFence = std::make_shared<VulkanFence>(dev); if (!mDirect) { mCmdBuffer.reset(runtime->mCmdPool->allocBuffer()); } mInitBuffer.reset(runtime->mCmdPool->allocBuffer()); }
1.1.2.1.2 CPURuntime::onCreate
// source/backend/cpu/CPUBackend.cpp Backend* CPURuntime::onCreate(const BackendConfig* config) const { auto precision = mPrecision; auto memory = mMemory; size_t flags = mFlags; if (nullptr != config) { precision = config->precision; flags = config->flags; memory = config->memory; } #ifdef LOG_VERBOSE MNN_PRINT("cpu backend was created by runtime:%p\n", this); #endif #ifdef MNN_USE_ARMV82 auto core = MNNGetCoreFunctions(); if (core->supportFp16arith && precision == BackendConfig::Precision_Low) { return new Arm82Backend(this, memory); } #endif #ifdef MNN_SUPPORT_BF16 if (precision == BackendConfig::Precision_Low_BF16 && BF16Functions::get()) { return new BF16Backend(this); } #endif if (flags == MNN_CPU_USE_DEFAULT_BACKEND) { return new CPUBackend(this, precision, memory, MNN_FORWARD_CPU, 0); } #ifdef MNN_USE_SSE if (AVX2Backend::isValid()) { return new AVX2Backend(this, memory, flags); } #endif return new CPUBackend(this, precision, memory, MNN_FORWARD_CPU, flags); }
1.1.2.1.2.1 CPUBackend::CPUBackend
// source/backend/cpu/CPUBackend.cpp CPUBackend::CPUBackend(const CPURuntime* runtime, BackendConfig::PrecisionMode precision, BackendConfig::MemoryMode memory, MNNForwardType type, size_t flags) : Backend(type) { #ifdef LOG_VERBOSE MNN_PRINT("cpu backend create\n"); #endif mMemory = memory; mRuntime = const_cast<CPURuntime*>(runtime); std::shared_ptr<BufferAllocator::Allocator> defaultAlloc(BufferAllocator::Allocator::createRecurse(runtime->mStaticAllocator.get())); if (mRuntime->getAllocatorType() == Runtime::Allocator_Defer) { mDynamicAllocator.reset(new DeferBufferAllocator(defaultAlloc)); } else { mDynamicAllocator.reset(new EagerBufferAllocator(defaultAlloc)); } mStaticAllocator = runtime->mStaticAllocator; mPrecisionMode = precision; mCoreFunctions = MNNGetCoreFunctions(); mInt8CoreFunctions = MNNGetInt8CoreFunctions(); mCache = new CPUResizeCache; }
1.1.2.2 Pipeline 类 TuningAttr、UnitInfo
// source/core/Pipeline.hpp /** pipeline. one session may contains multiple pipeline, and one pipeline may contains more than one unit. */ class Pipeline : public NonCopyable { public: struct TuningAttr { bool autoSetOpType; int maxTuningNumber; }; Pipeline(Schedule::PipelineInfo&& info, bool allocInput, bool outputStatic, const TuningAttr& tune, const Runtime* rt, const Runtime* cpuRt); ~Pipeline(); class UnitInfo : public OperatorInfo { public: UnitInfo() = default; virtual ~UnitInfo() = default; void setUp(const Command& cmd, int index, const Op* originOp, int totalIndex); }; public: /** encode : 1. compute shape for every op's inputs and outputs; 2. geometry transform; 3. copy op, inputs and outputs tensor info to mBuffer static_model: 3; dynamic_model: 1,2,3 */ ErrorCode encode(bool supportDebug = false, bool permitCodegen = false); /** allocMemory: create Execution and alloc memory for every op */ ErrorCode allocMemory(bool firstMalloc, bool permitCodegen); /** execute this pipline */ ErrorCode execute(); ErrorCode executeCallBack(const TensorCallBackWithInfo& before, const TensorCallBackWithInfo& after); Schedule::PipelineInfo& getPipelineInfo() { return mInfo; } float flops() const { return mFlops; } friend class Session; MNNForwardType getMainForwardType() const { return mInfo.first.cache.first->type(); } private: void _copyInputs(); void _pushTuningTask(std::vector<Schedule::OpCacheInfo>&& initInfos); void _recycleDynamicMemory(Command* command); Schedule::PipelineInfo mInfo; bool mAllocInput; bool mOutputStatic; TuningAttr mTuneAttr; float mFlops = 0.0f; bool mIsQuantModel = false; // For gpu or other backend std::map<Tensor*, std::shared_ptr<Tensor>> mCacheConstTensors; std::map<Tensor*, std::shared_ptr<Tensor>> mShapeFixConstCache; #ifndef MNN_BUILD_MINI GeometryComputer::Context mContext; Runtime::CompilerType mUseGeometry; #endif const Runtime* mRuntime; const Runtime* mCpuRuntime; };
1.1.2.3 Pipeline::Pipeline
// source/core/Pipeline.cpp // typedef std::pair<BackendCache, std::vector<OpCacheInfo>> PipelineInfo; // // /** pipeline info */ // struct OpCacheInfo { // /** op */ // const Op* op; // /** input tensors */ // std::vector<Tensor*> inputs; // /** output tensors */ // std::vector<Tensor*> outputs; // /** schedule type*/ // Schedule::Type type = Schedule::Type::SEPARATE; // // /**Command buffer for cache*/ // CommandBuffer cacheBuffer; // // /**Command buffer for execute*/ // CommandBuffer executeBuffer; // // std::map<const Op*, std::shared_ptr<Execution>> executionCache; // }; // Pipeline::Pipeline(Schedule::PipelineInfo&& info, bool allocInput, bool outputStatic, const TuningAttr& tune, const Runtime* rt, const Runtime* cpuRt) #ifndef MNN_BUILD_MINI // mContext 类型为 GeometryComputer::Context : mContext(info.first.cache.second, info.first.cache.first->type(), info.first.info.user ? info.first.info.user->precision : BackendConfig::Precision_Normal), mUseGeometry(rt->onGetCompilerType()) { #else { #endif rt->onCheckInfo(info.first.info); mRuntime = rt; mCpuRuntime = cpuRt; mTuneAttr = tune; mAllocInput = allocInput; mOutputStatic = outputStatic; mInfo = std::move(info); mIsQuantModel = false; // mInfo.second 类型为 std::vector<OpCacheInfo> for (auto& iter : mInfo.second) { for (auto t : iter.outputs) { if (TensorUtils::getDescribe(t)->quantAttr.get() != nullptr) { // 是否是量化模型 mIsQuantModel = true; break; } } for (auto t : iter.inputs) { if (TensorUtils::getDescribe(t)->quantAttr.get() != nullptr) { mIsQuantModel = true; break; } } if (mIsQuantModel) { break; } } }
1.1.2.3.1 GeometryComputer::Context
class GeometryComputer { public: virtual ~GeometryComputer() { // Do nothing } class MNN_PUBLIC Context { public: Context(std::shared_ptr<Backend> allocBackend, MNNForwardType type = MNN_FORWARD_CPU, BackendConfig::PrecisionMode precision = BackendConfig::Precision_Normal); ~Context(); void clear(); void setBackend(Backend* backend); void getRasterCacheCreateRecursive(Tensor* src, CommandBuffer& cmd); // If has cache, return. Otherwise create cache const std::vector<std::shared_ptr<Tensor>>& searchConst(const Op* op); std::shared_ptr<Tensor> allocConst(const Op* key, const std::vector<int>& shape, halide_type_t type, Tensor::DimensionType dimType = Tensor::TENSORFLOW); bool allocTensor(Tensor* tenosr); inline MNNForwardType forwardType() const { return mForwardType; } inline BackendConfig::PrecisionMode precisionType() const { return mPrecision; } void pushCache(const CommandBuffer& buffer); std::shared_ptr<BufferStorage> mRasterOp; private: void getRasterCacheCreate(Tensor* src, CommandBuffer& cmd); std::map<const Op*, std::vector<std::shared_ptr<Tensor>>> mConstTensors; std::vector<std::shared_ptr<Tensor>> mEmpty; std::vector<std::shared_ptr<Tensor>> mTempConstTensors; std::shared_ptr<Backend> mBackend; MNNForwardType mForwardType; BackendConfig::PrecisionMode mPrecision; std::vector<SharedPtr<Command>> mRasterCmdCache; }; static void init(); MNN_PUBLIC static const GeometryComputer* search(int opType, Runtime::CompilerType compType); static void registerGeometryComputer(std::shared_ptr<GeometryComputer> comp, std::vector<int> type, Runtime::CompilerType compType = Runtime::Compiler_Geometry); virtual bool onCompute(const Op* op, const std::vector<Tensor*>& inputs, const std::vector<Tensor*>& outputs, Context& context, CommandBuffer& cmd) const = 0; virtual bool onRecompute(const Op* op, const std::vector<Tensor*>& inputs, const std::vector<Tensor*>& outputs, Context& context, CommandBuffer& cmd) const { return false; } };