1、createSession
依据 ScheduleConfig 和 RuntimeInfo 创建会话。
// source/core/Interpreter.cpp Session* Interpreter::createSession(const ScheduleConfig& config, const RuntimeInfo& runtime) { return createMultiPathSession({config}, runtime); }
1.1 createMultiPathSession
// source/core/Interpreter.cpp Session* Interpreter::createMultiPathSession(const std::vector<ScheduleConfig>& configs, const RuntimeInfo& runtime) { // ... auto result = newSession.get(); auto validForResize = info.validForResize; if (validForResize && mNet->modes.inputMode == Session_Input_Inside && mNet->modes.resizeMode == Session_Resize_Direct) { result->resize(); } // ... return result; }
1.1.1 Session::resize
// source/core/Session.cpp ErrorCode Session::resize() { // ... if (mNeedResize) { bool debug = mCallBackMode == Interpreter::Session_Debug; // mPipelines 类型为 std::vector<std::shared_ptr<Pipeline>> for (auto& iter : mPipelines) { auto error = iter->encode(debug, permitCodegen); if (NO_ERROR != error) { return error; } } mNeedResize = false; mNeedMalloc = true; firstMalloc = true; } // ... }
1.1.1.1 Pipeline::encode
// source/core/Pipeline.cpp // typedef std::pair<BackendCache, std::vector<OpCacheInfo>> PipelineInfo; // // struct BackendCache { // Backend::Info info; // BackendConfig config; // std::pair<std::shared_ptr<Backend>, std::shared_ptr<Backend>> cache; // bool needComputeShape = true; // bool needComputeGeometry = true; // bool reportError = true; // std::map<Tensor*, TENSORCACHE> inputTensorCopyCache; // }; // // /** pipeline info */ // struct OpCacheInfo { // /** op */ // const Op* op; // /** input tensors */ // std::vector<Tensor*> inputs; // /** output tensors */ // std::vector<Tensor*> outputs; // /** schedule type*/ // Schedule::Type type = Schedule::Type::SEPARATE; // // /**Command buffer for cache*/ // CommandBuffer cacheBuffer; // // /**Command buffer for execute*/ // CommandBuffer executeBuffer; // // std::map<const Op*, std::shared_ptr<Execution>> executionCache; // }; // ErrorCode Pipeline::encode(bool supportDebug, bool permitCodegen) { // mInfo.first.cache 类型为 std::pair<std::shared_ptr<Backend>, std::shared_ptr<Backend>> // mBackend 创建的后端如(VulkanBackend) auto& mBackend = mInfo.first.cache.first; // mBackupBackend 创建的后备(默认)后端如(CPUBackend) auto& mBackupBackend = mInfo.first.cache.second; // Static Model just copy info to command buffer // mInfo.first 类型为 BackendCache if (!mInfo.first.needComputeGeometry) { // ... } else { #ifndef MNN_BUILD_MINI // mContext 类型为 GeometryComputer::Context mContext.clear(); /** Size Compute and compute Const Begin */ auto res = GeometryComputerUtils::shapeComputeAndGeometryTransform(mInfo.second, mContext, mInfo.first.cache.second, mUseGeometry, false, permitCodegen); if (res != NO_ERROR) { return res; } #endif } // ... return NO_ERROR; }
1.1.1.1.1 GeometryComputerUtils::shapeComputeAndGeometryTransform
GeometryComputerUtils::shapeComputeAndGeometryTransform 完整代码
// source/geometry/GeometryComputerUtils.cpp // /** pipeline info */ // struct OpCacheInfo { // /** op */ // const Op* op; // /** input tensors */ // std::vector<Tensor*> inputs; // /** output tensors */ // std::vector<Tensor*> outputs; // /** schedule type*/ // Schedule::Type type = Schedule::Type::SEPARATE; // // /**Command buffer for cache*/ // CommandBuffer cacheBuffer; // // /**Command buffer for execute*/ // CommandBuffer executeBuffer; // // std::map<const Op*, std::shared_ptr<Execution>> executionCache; // }; // ErrorCode GeometryComputerUtils::shapeComputeAndGeometryTransform( std::vector<Schedule::OpCacheInfo>& infos, GeometryComputer::Context& geoContext, std::shared_ptr<Backend> backupBackend, Runtime::CompilerType compileType, bool skipShapeCompute, bool permitCodegen) { /** Size Compute and compute Const Begin */ GeometryComputer::Context ctx(backupBackend); // Size Compute and compute Const // infos 为算子缓存,大小为 171 for (int i=0; i<infos.size(); ++i) { // info 类型为 OpCacheInfo auto& info = infos[i]; auto& cmdBufferVir = info.executeBuffer; auto& tempBuffer = info.cacheBuffer; // ... if (info.type == Schedule::CONSTANT) { if (_hasZeroShapeOutput(info)) { continue; } ctx.clear(); auto geo = GeometryComputer::search(info.op->type(), Runtime::Compiler_Loop); { auto res = geo->onRecompute(info.op, info.inputs, info.outputs, geoContext, tempBuffer); if (!res) { tempBuffer.command.clear(); tempBuffer.extras.clear(); res = geo->onCompute(info.op, info.inputs, info.outputs, geoContext, tempBuffer); } if (!res) { MNN_ERROR("Const Folder Error in geometry for %s\n", info.op->name()->c_str()); return NOT_SUPPORT; } } GeometryComputerUtils::makeRaster(tempBuffer, cmdBufferVir, ctx); for (auto t : info.outputs) { ctx.getRasterCacheCreateRecursive(t, cmdBufferVir); } // ... } } /** Size Compute and compute Const End */ /** Geometry Transform */ for (int i=0; i<infos.size(); ++i) { auto& info = infos[i]; auto& cmdBufferReal = info.executeBuffer; auto& tempBuffer = info.cacheBuffer; // TODO: Optimize if (info.type == Schedule::CONSTANT) { continue; } if (_hasZeroShapeOutput(info)) { continue; } auto geo = GeometryComputer::search(info.op->type(), compileType); { bool res = false; if (!tempBuffer.hasWrap) { res = geo->onRecompute(info.op, info.inputs, info.outputs, geoContext, tempBuffer); } if (!res) { tempBuffer.command.clear(); tempBuffer.extras.clear(); res = geo->onCompute(info.op, info.inputs, info.outputs, geoContext, tempBuffer); } if (!res) { return NOT_SUPPORT; } tempBuffer.hasWrap = false; GeometryComputerUtils::makeRaster(tempBuffer, cmdBufferReal, geoContext); for (auto t : info.outputs) { auto des = TensorUtils::getDescribe(t); if (des->usage == Tensor::InsideDescribe::OUTPUT || des->usage == Tensor::InsideDescribe::TRAINABLE) { // For output and trainable value, must directly compute the tensor geoContext.getRasterCacheCreateRecursive(t, cmdBufferReal); } } } } // ... return NO_ERROR; }
1.1.1.1.1.1 GeometryComputer::search
// source/geometry/GeometryComputer.cpp const GeometryComputer* GeometryComputer::search(int type, Runtime::CompilerType compType) { return GeometryComputerManager::get()->search(type, compType); }
1.1.1.1.1.1.1 GeometryComputerManager::search
// source/geometry/GeometryComputer.cpp GeometryComputer* search(int type, Runtime::CompilerType compType) { if (Runtime::Compiler_Origin == compType) { return &mDefault; } if (Runtime::Compiler_Loop == compType) { auto iter = mLoopTable[type].get(); if (iter != nullptr) { return iter; } } // Geometry auto iter = mTable[type].get(); if (iter != nullptr) { // FUNC_PRINT(type); return iter; } return &mDefault; }
1.1.1.1.1.1.1.1 几何计算初始化
几何计算初始化与维度计算初始化类似,其在 registerBackend 函数中调用 GeometryComputer::init() 来实现的。
static std::once_flag s_flag; void registerBackend() { std::call_once(s_flag, [&]() { // ... SizeComputerSuite::init(); GeometryComputer::init(); // ... }); }
GeometryComputer::init() 实现如下:
// source/geometry/GeometryComputer.cpp void GeometryComputer::init() { if (nullptr == GeometryComputerManager::get()) { GeometryComputerManager::init(); registerGeometryOps(); } } class GeometryComputerManager { static void init() { gInstance = new GeometryComputerManager; gInstance->mTable.resize(OpType_MAX + 1); gInstance->mLoopTable.resize(OpType_MAX + 1); } }
registerGeometryOps 实现如下:
// source/geometry/GeometryOPRegister.cpp void registerGeometryOps() { ___GeometryShape___create__(); ___GeometryPermute___create__(); // ... }
函数 ___GeometryShape___create__ 是通过 REGISTER_GEOMETRY 宏定义的:
// source/geometry/GeometryComputer.hpp #define REGISTER_GEOMETRY(f, c) \ extern void ___##f##__##c##__() { \ c(); \ }
其实现代码如下:
// source/geometry/GeometryShape.cpp class GeometryShape : public GeometryComputer { public: virtual bool onCompute(const Op* op, const std::vector<Tensor*>& inputs, const std::vector<Tensor*>& outputs, Context& context, CommandBuffer& res) const override { if (nullptr == TensorUtils::getDescribe(outputs[0])->mem.get()) { auto originSize = outputs[0]->length(0); outputs[0]->setLength(0, MNN_MAX_TENSOR_DIM); if(!context.allocTensor(outputs[0])) { return false; } outputs[0]->setLength(0, originSize); } auto& ib = inputs[0]->buffer(); auto outputData = outputs[0]->host<int>(); auto inputFormat = TensorUtils::getDescribe(inputs[0])->dimensionFormat; if ((inputFormat == MNN_DATA_FORMAT_NC4HW4) && TensorUtils::getDescribe(outputs[0])->dimensionFormat == MNN_DATA_FORMAT_NHWC) { outputData[0] = ib.dim[0].extent; outputData[1] = ib.dim[2].extent; outputData[2] = ib.dim[3].extent; outputData[3] = ib.dim[1].extent; } else { for (int i = 0; i < ib.dimensions; i++) { outputData[i] = ib.dim[i].extent; } } return true; } }; class GeometryRank : public GeometryComputer { // ... } class GeometryPriorBox : public GeometryComputer { // ... } class GeometrySize : public GeometryComputer { // ... } class GeometryRaster : public GeometryComputer { // ... } static void _create() { // ... } REGISTER_GEOMETRY(GeometryShape, _create);
REGISTER_GEOMETRY(GeometryShape, _create) 宏扩展如下:
// REGISTER_GEOMETRY(GeometryShape, _create) extern void ___GeometryShape___create__() { _create(); }
_create 函数的实现代码如下:
// source/geometry/GeometryShape.cpp static void _create() { std::shared_ptr<GeometryComputer> comp(new GeometryShape); GeometryComputer::registerGeometryComputer(comp, {OpType_Shape}); std::shared_ptr<GeometryComputer> comp1(new GeometryRank); GeometryComputer::registerGeometryComputer(comp1, {OpType_Rank}); std::shared_ptr<GeometryComputer> comp2(new GeometryPriorBox); GeometryComputer::registerGeometryComputer(comp2, {OpType_PriorBox}); std::shared_ptr<GeometryComputer> comp3(new GeometrySize); GeometryComputer::registerGeometryComputer(comp3, {OpType_Size}); std::shared_ptr<GeometryComputer> comp4(new GeometryRaster); GeometryComputer::registerGeometryComputer(comp4, {OpType_Raster}); }
_create 函数依次新建并注册了 GeometryShape、GeometryRank、GeometryPriorBox、GeometrySize、GeometryRaster 。注册是通过函数
GeometryComputer::registerGeometryComputer 实现的,其实现如下:
// source/geometry/GeometryComputer.cpp void GeometryComputer::registerGeometryComputer(std::shared_ptr<GeometryComputer> comp, std::vector<int> type, Runtime::CompilerType compType) { auto ins = GeometryComputerManager::get(); for (auto t : type) { ins->insert(comp, t, compType); } } class GeometryComputerManager { void insert(std::shared_ptr<GeometryComputer> c, int type, Runtime::CompilerType compType) { if (Runtime::Compiler_Geometry == compType) { mTable[type] = c; } else if (Runtime::Compiler_Loop == compType) { mLoopTable[type] = c; } } }
由代码可知,当类型为 Runtime::Compiler_Geometry 时注册到 mTable,否则注册到 mLoopTable 中。
综上可见,扩展后的代码正是一个函数,其通过内部的 _create 函数注册到 mTable 或 mLoopTable 中,函数名 ___GeometryShape___create__ 呼应了 registerGeometryOps 函数中的调用。mTable 和 mLoopTable 呼应了 GeometryComputerManager::search 函数的实现。
1.1.1.1.1.2 GeometryComputer::onRecompute
在函数 GeometryComputerUtils::shapeComputeAndGeometryTransform 中调用GeometryComputer::onRecompute (geo->onRecompute) 函数的代码如下:
// source/geometry/GeometryComputerUtils.cpp ErrorCode GeometryComputerUtils::shapeComputeAndGeometryTransform(...) { // ... // Size Compute and compute Const for (int i=0; i<infos.size(); ++i) { auto& info = infos[i]; // ... if (info.type == Schedule::CONSTANT) { // ... auto geo = GeometryComputer::search(info.op->type(), Runtime::Compiler_Loop); { auto res = geo->onRecompute(info.op, info.inputs, info.outputs, geoContext, tempBuffer); if (!res) { // ... res = geo->onCompute(info.op, info.inputs, info.outputs, geoContext, tempBuffer); } // ... } GeometryComputerUtils::makeRaster(tempBuffer, cmdBufferVir, ctx); for (auto t : info.outputs) { ctx.getRasterCacheCreateRecursive(t, cmdBufferVir); } // ... } } /** Geometry Transform */ for (int i=0; i<infos.size(); ++i) { // ... auto geo = GeometryComputer::search(info.op->type(), compileType); { bool res = false; if (!tempBuffer.hasWrap) { res = geo->onRecompute(info.op, info.inputs, info.outputs, geoContext, tempBuffer); } if (!res) { // ... res = geo->onCompute(info.op, info.inputs, info.outputs, geoContext, tempBuffer); } // ... GeometryComputerUtils::makeRaster(tempBuffer, cmdBufferReal, geoContext); for (auto t : info.outputs) { auto des = TensorUtils::getDescribe(t); if (des->usage == Tensor::InsideDescribe::OUTPUT || des->usage == Tensor::InsideDescribe::TRAINABLE) { // For output and trainable value, must directly compute the tensor geoContext.getRasterCacheCreateRecursive(t, cmdBufferReal); } } } } // ... }
GeometryComputer::search 函数找到对应的几何计算实现(如:GeometryShape),然后调用其方法 onRecompute。
备注:GeometryComputer::onRecompute 调用是个多态,实际运行中根据 info.op->type() 类型,调用不同的几何计算子类。
如下为 GeometryShape::onRecompute 的实现在其基类中,代码如下:
class GeometryComputer { virtual bool onRecompute(const Op* op, const std::vector<Tensor*>& inputs, const std::vector<Tensor*>& outputs, Context& context, CommandBuffer& res) const override { return false; } }
1.1.1.1.1.3 GeometryComputer::onCompute
在函数 GeometryComputerUtils::shapeComputeAndGeometryTransform 中调用 GeometryComputer::onCompute (geo->onCompute) 函数的代码详见。
GeometryComputer::search 函数找到对应的几何计算实现(如:GeometryShape),然后调用其方法 onCompute。
备注:GeometryComputer::onCompute 调用是个多态,实际运行中根据 info.op->type() 类型,调用不同的几何计算子类。
如下为 GeometryShape::onCompute 的实现代码详见 。
1.1.1.1.1.4 GeometryComputerUtils::makeRaster
// source/geometry/GeometryComputerUtils.cpp void GeometryComputerUtils::makeRaster(const CommandBuffer& srcBuffer, CommandBuffer& dstBuffer, GeometryComputer::Context& ctx) { dstBuffer.extras = srcBuffer.extras; for (int index = 0; index < srcBuffer.command.size(); ++index) { auto& iter = *srcBuffer.command[index]; const Op* op = iter.op; auto& cmd = iter; auto type = op->type(); MNN_ASSERT(OpType_Raster != type); for (int i = 0; i < iter.inputs.size(); ++i) { if (!OpCommonUtils::opNeedContent(op, i)) { continue; } auto des = TensorUtils::getDescribe(cmd.inputs[i]); //MNN_ASSERT(des->tensorArrayAttr == nullptr); if (des->memoryType == Tensor::InsideDescribe::MEMORY_VIRTUAL) { ctx.getRasterCacheCreateRecursive(cmd.inputs[i], dstBuffer); } } dstBuffer.command.emplace_back(srcBuffer.command[index]); } }
1.1.1.1.1.5 GeometryComputer::Context::getRasterCacheCreateRecursive
在函数 GeometryComputerUtils::shapeComputeAndGeometryTransform 中调用
GeometryComputer::Context::getRasterCacheCreateRecursive 函数的代码如下:
ctx.getRasterCacheCreateRecursive(t, cmdBufferVir);
其具体实现代码如下:
// source/geometry/GeometryComputer.cpp void GeometryComputer::Context::getRasterCacheCreateRecursive(Tensor* src, CommandBuffer& cmd) { auto srcDes = TensorUtils::getDescribe(src); if (srcDes->memoryType != Tensor::InsideDescribe::MEMORY_VIRTUAL) { return; } if (_hasZeroDim(src)) { return; } for (auto& input : srcDes->regions) { MNN_ASSERT(input.origin != src); auto inputDes = TensorUtils::getDescribe(input.origin); while (inputDes->memoryType == Tensor::InsideDescribe::MEMORY_VIRTUAL) { if (1 != inputDes->regions.size()) { break; } bool merge = TensorUtils::fuseRegion(inputDes->regions[0], input); if (!merge) { break; } inputDes = TensorUtils::getDescribe(input.origin); } getRasterCacheCreateRecursive(input.origin, cmd); } getRasterCacheCreate(src, cmd); }