MNN createSession 之 Schedule(三)

简介: MNN createSession 之 Schedule(三)

1、createSession

    依据 ScheduleConfig 和 RuntimeInfo 创建会话。

// source/core/Interpreter.cpp
Session* Interpreter::createSession(const ScheduleConfig& config) {
  // createMultiPathSession 会根据 ScheduleConfig 创建 RuntimeInfo
    return createMultiPathSession({config});
}

Session* Interpreter::createSession(const ScheduleConfig& config, const RuntimeInfo& runtime) {
    return createMultiPathSession({config}, runtime);
}

1.1 createMultiPathSession

// source/core/Interpreter.cpp
Session* Interpreter::createMultiPathSession(const std::vector<ScheduleConfig>& configs) {
    RuntimeInfo runtime = createRuntime(configs);
    runtime.second->setExternalFile(mNet->externalFile);
    runtime.second->setAllocatorType(mNet->modes.memoryAllocatorType);
    if (runtime.first.empty()) {
        MNN_ERROR("Runtime not valid for create session\n");
        return nullptr;
    }
    return createMultiPathSession(configs, std::move(runtime));
}

Session* Interpreter::createMultiPathSession(const std::vector<ScheduleConfig>& configs, const RuntimeInfo& runtime) {
    if (nullptr == mNet->buffer.get()) {
        MNN_ERROR("The model buffer has been released. Can't create session\n");
        return nullptr;
    }
    if (runtime.first.empty()) {
        MNN_ERROR("Runtime not valid for create session\n");
        return nullptr;
    }
    std::unique_lock<std::mutex> _l(mNet->lock);
#ifdef MNN_INTERNAL_ENABLED
    Timer _timer;
#endif
    int cacheMode = 0; // No cache
    // 创建 Schedule,并进行初始化
    Schedule::ScheduleInfo info;
    auto success = Schedule::schedule(info, mNet->net, configs, runtime);
    if (!success) {
        return nullptr;
    }
    RuntimeInfo rt = runtime;
    bool valid  = false;
    if (mNet->cacheBuffer.get() != nullptr) {
        for (auto iter : rt.first) {
            valid = iter.second->onSetCache(mNet->cacheBuffer.get(),
                                            mNet->cacheBuffer.size());
            if(!valid) {
                iter.second->onSetCache(nullptr, 0);
            }
            if (valid) {
                break;
            }
        }
        if (valid) {
            mNet->lastCacheSize = mNet->cacheBuffer.size();
            cacheMode = cacheMode | 1; // READ cache
        }
    }

    auto newSession =
        std::unique_ptr<Session>(new Session(std::move(info), mNet->modes, std::move(rt)));
    if (!newSession->valid()) {
        MNN_PRINT("Invalide Session!!\n");
        return nullptr;
    }
    auto result = newSession.get();
    auto validForResize = info.validForResize;
    if (validForResize && mNet->modes.inputMode == Session_Input_Inside && mNet->modes.resizeMode == Session_Resize_Direct) {
        result->resize();
    }

    if ((!mNet->cacheFile.empty()) && (!valid) && mNet->modes.backendMode == Session_Backend_Fix) {
        // Try to save extra cache
        auto buffer = result->getCache();
        if (buffer.first != nullptr && buffer.second > 0) {
            MNN_PRINT("Write cache to %s, size = %zu\n", mNet->cacheFile.c_str(), buffer.second);
            writeCacheFile(mNet, buffer);
            mNet->lastCacheSize = buffer.second;
            // Write Cache
            cacheMode = cacheMode | 2;
        }
    }
    // Reset cache
    result->loadCache(nullptr, 0);

    mNet->sessions.emplace_back(std::move(newSession));

#ifdef MNN_INTERNAL_ENABLED
    int precision = BackendConfig::Precision_Normal;
    if (nullptr != configs[0].backendConfig) {
        precision = configs[0].backendConfig->precision;
    }
    int mode = configs[0].mode;
    mNet->sessionInfo.insert(std::make_pair(result, std::make_tuple(precision, mode)));
    if (shouldLog(FREQ_HIGH)) {
        std::map<std::string, std::string> metrics = mNet->basicLogginData;
        metrics.emplace("UUID", mNet->uuid);
        metrics.emplace("Time", std::to_string((float)_timer.durationInUs() / 1024.0f));
        metrics.emplace("Backend", std::to_string(configs[0].type));
        metrics.emplace("Precision", std::to_string(precision));
        metrics.emplace("Mode", std::to_string(mode));
        metrics.emplace("Cache", std::to_string(cacheMode));
        metrics.emplace("CacheSize", std::to_string((float)(mNet->lastCacheSize / 1024.0f)));
        metrics.emplace("ModelSize", std::to_string ((float)mNet->buffer.size() / 1024.0f / 1024.0f));
        metrics.emplace("Usage", std::to_string((int) mNet->net->usage()));
        metrics.emplace("API", "Interpreter::createMultiPathSession");
        logAsync(metrics);
    }
#endif // MNN_INTERNAL_ENABLED

    return result;
}

1.1.1 Schedule 类 OpCacheInfo、BackendCache、PipelineInfo、ScheduleInfo

    调度器

/** net scheduler */
class MNN_PUBLIC Schedule {
public:
    enum Type {
        // Size can be compute separately
        SEPARATE = 0,
        // When size is fixed, the content is fixed
        CONSTANT = 1,
        // Size can't be compute separately
        NOT_SEPERATE
    };
    /** pipeline info */
    struct OpCacheInfo {
        /** op */
        const Op* op;
        /** input tensors */
        std::vector<Tensor*> inputs;
        /** output tensors */
        std::vector<Tensor*> outputs;
        /** schedule type*/
        Schedule::Type type = Schedule::Type::SEPARATE;

        /**Command buffer for cache*/
        CommandBuffer cacheBuffer;

        /**Command buffer for execute*/
        CommandBuffer executeBuffer;
        
        std::map<const Op*, std::shared_ptr<Execution>> executionCache;
    };

    // Backend, Tensor, shape-dirty, content-dirty
    typedef std::tuple<Tensor*, std::shared_ptr<Tensor>, bool, bool> TENSORCACHE;
    struct BackendCache {
        Backend::Info info;
        BackendConfig config;
        std::pair<std::shared_ptr<Backend>, std::shared_ptr<Backend>> cache;
        bool needComputeShape = true;
        bool needComputeGeometry = true;
        bool reportError = true;
        std::map<Tensor*, TENSORCACHE> inputTensorCopyCache;
    };
    typedef std::pair<BackendCache, std::vector<OpCacheInfo>> PipelineInfo;

    /** schedule info */
    struct ScheduleInfo {
        /** pipelines with backend info */
        std::vector<PipelineInfo> pipelineInfo;
        /** input tensors map */
        std::map<std::string, Tensor*> inputTensors;
        /** output tensors map */
        std::map<std::string, Tensor*> outputTensor;
        /** all tensors */
        std::vector<std::shared_ptr<Tensor>> allTensors;
        /** input valid for resize*/
        bool validForResize;
        /** Default Backend for alloc const*/
        std::shared_ptr<Backend> defaultBackend;
        /** Replace Backend for alloc const*/
        std::shared_ptr<Backend> constReplaceBackend;
        /** size need input's content*/
        bool needInputContentForShape = false;
    };

    /**
     * @breif schedule net ops to pipeline with configuration.
     * @param net       given net.
     * @param config    given configuration.
     * @return schedule info.
     */
    static bool schedule(ScheduleInfo& result, const Net* net, const std::vector<ScheduleConfig>& config, const RuntimeInfo& runtimeInfo);
    static MNNForwardType getApprociateType(const ScheduleConfig& config);
};

1.1.1.1 Backend 类 Backend::Info

// source/core/Backend.hpp
class Backend : public NonCopyable {

public:
    /** info used to create backend */
    struct Info {
        /** forward type. */
        MNNForwardType type = MNN_FORWARD_CPU;
        /** numThread for CPU . number of threads.  gpuMode for GPU only. tuning/memory Mode setting. */
        union {
            int numThread = 4;
            int gpuMode;
        };
        /** user data. */
        BackendConfig* user = NULL;
        enum Mode {
            // The Op will be run in execution->onExecute
            DIRECT = 0,

            // The Op will be recorded. Run in onExecuteBegin and Wait in onExecuteEnd
            INDIRECT = 1
        };
        Mode mode = DIRECT;
        enum Allocator {
            DEFER = 0,
            EAGER = 1
        };
        Allocator allocator = DEFER;
    };

    /** backend buffer storage type */
    enum StorageType {
        /**
         use NOT reusable memory.
         - allocates memory when `onAcquireBuffer` is called.
         - releases memory when `onReleaseBuffer` is called or when the backend is deleted.
         - do NOTHING when `onClearBuffer` is called.
         */
        STATIC,
        /**
         use reusable memory.
         - allocates or reuses memory when `onAcquireBuffer` is called. prefers reusing.
         - collects memory for reuse when `onReleaseBuffer` is called.
         - releases memory when `onClearBuffer` is called or when the backend is deleted.
         */
        DYNAMIC,
        /**
         use NOT reusable memory.
         - allocates memory when `onAcquireBuffer` is called.
         - do NOTHING when `onReleaseBuffer` is called.
         - releases memory when `onClearBuffer` is called or when the backend is deleted.
         */
        DYNAMIC_SEPERATE
    };

public:
    /**
     * @brief initializer.
     * @param type  forward type.
     */
    Backend(MNNForwardType type) : mType(type) {
        // nothing to do
    }

    /**
     * @brief deinitializer.
     */
    virtual ~Backend() = default;

public:

    /**
     * @brief create execution for op with input and output tensors.
     * @param inputs    input tensors.
     * @param outputs   output tensors.
     * @param op        given op.
     * @return created execution if op is supported, nullptr otherwise.
     */
    virtual Execution* onCreate(const std::vector<Tensor*>& inputs, const std::vector<Tensor*>& outputs,
                                const MNN::Op* op) = 0;

    /**
     * @brief callback before resize ops.
     */
    virtual void onResizeBegin() {
        // nothing to do
    }
    /**
     * @brief callback after resize ops.
     */
    virtual ErrorCode onResizeEnd() = 0;

    /**
     * @brief callback before executing ops.
     */
    virtual void onExecuteBegin() const = 0;
    /**
     * @brief callback after executing ops.
     */
    virtual void onExecuteEnd() const = 0;

    virtual const Runtime* getRuntime() {
        return nullptr;
    }
    const std::string externalFile();
public:
    /**
     * @brief allocate buffer of tensor for given storage type.
     * @param tensor        buffer provider.
     * @param storageType   buffer storage type.
     * @return success or not.
     */
    MNN_PUBLIC bool onAcquireBuffer(const Tensor* tensor, StorageType storageType);

    /**
     * @brief release buffer of tensor for given storage type.
     * @param tensor        buffer provider.
     * @param storageType   buffer storage type.
     * @return success or not.
     */
    MNN_PUBLIC bool onReleaseBuffer(const Tensor* tensor, StorageType storageType);

    class MemObj {
    public:
        MemObj() {}
        virtual ~ MemObj() {}
        virtual MemChunk chunk() { return MemChunk(); }
    };
    /**
     * @brief allocate buffer of tensor for given storage type.
     * @param tensor        buffer provider.
     * @param storageType   buffer storage type.
     * @return MemObj for release, if failed, return nullptr.
     */
    virtual MemObj* onAcquire(const Tensor* tensor, StorageType storageType) = 0;
    
    /**
     * @brief get buffer from tensor directly
     * @param tensor        buffer provider.
     * @return support or not
     */
    virtual bool onGetTensorInfo(const Tensor* tensor, void* dstInfo) {
        return false;
    }

    /**
     * @brief clear all dynamic buffers.
     * @return success or not.
     */
    virtual bool onClearBuffer() = 0;

    /**
     * @brief copy buffer from tensor to tensor.
     * @param srcTensor source buffer provider.
     * @param dstTensor dest buffer provider.
     */
    virtual void onCopyBuffer(const Tensor* srcTensor, const Tensor* dstTensor) const = 0;

public:
    /**
     * @brief get forward type.
     * @return forward type.
     */
    inline MNNForwardType type() const {
        return mType;
    }

public:
    /**
     * @brief get Gpu Tensor map host ptr/ unmap
     */
    virtual void* onMapTensor(Tensor::MapType mtype, Tensor::DimensionType dtype, const Tensor* srcTensor) {
        return nullptr;
    }

    virtual bool onUnmapTensor(Tensor::MapType mtype, Tensor::DimensionType dtype, const Tensor* dstTensor, void* mapPtr) {
        return false;
    }

    virtual int onSync(Tensor::MapType mtype, bool toCpu, const Tensor* dstTensor) {
        return 0;
    }

private:
    const MNNForwardType mType;
};

1.1.2 Schedule::schedule

bool Schedule::schedule(ScheduleInfo& scheduleInfo, const Net* net, const std::vector<ScheduleConfig>& configs, const RuntimeInfo& runtimeInfo) {
    if (nullptr == net->oplists()) {
        MNN_PRINT("Empty net for schedule\n");
        return false;
    }
    if (scheduleInfo.defaultBackend.get() == nullptr && scheduleInfo.allTensors.empty()) {
        // Const not init, init it
        BackendConfig defaultConfig;
        defaultConfig.flags = 4;
        // 创建默认的后端,即 CPUBackend ,source/backend/cpu/CPUBackend.cpp
        scheduleInfo.defaultBackend.reset(runtimeInfo.second->onCreate(&defaultConfig));
        ErrorCode code = NO_ERROR;
        initConstTensors(scheduleInfo.allTensors, net, scheduleInfo.defaultBackend.get(), code);
        if (NO_ERROR != code) {
            MNN_ERROR("Schedule Const init errorcode = %d\n", code);
            return false;
        }
    }
    bool valid = initTensors(scheduleInfo.allTensors, net);
    scheduleInfo.validForResize = valid;
    std::vector<std::shared_ptr<Tensor>>& allTensors = scheduleInfo.allTensors;
    std::vector<std::pair<Schedule::BackendCache, std::vector<Schedule::OpCacheInfo>>> result;

    for (auto& config : configs) {
        Backend::Info compute;
        compute.type      = getApprociateType(config);
        compute.numThread = config.numThread;
        if(config.type == MNN_FORWARD_AUTO) {
            if(compute.type == MNN_FORWARD_OPENCL || compute.type == MNN_FORWARD_METAL) {
                // AUTO set default gpu-mode MNN_GPU_TUNING_FAST
                compute.numThread = 16;
            }
        }
        compute.user      = config.backendConfig;
        // 初始化算子和张量
        auto oplists      = _scheduleUnit(net, config, allTensors);
        Schedule::BackendCache cache;
        cache.info = std::move(compute);
        result.emplace_back(std::make_pair(cache, std::move(oplists)));
    }

    scheduleInfo.pipelineInfo = std::move(result);

    // get all used op's output, drop unused op, won't change op order. always insert all Input Ops
    std::vector<const Op*> oplists;
    {
        for (std::pair<Schedule::BackendCache, vector<Schedule::OpCacheInfo>>& pipeline : scheduleInfo.pipelineInfo) {
            for (auto& info : pipeline.second) {
                oplists.push_back(info.op);
            }
        }
    }
    // set tensors' input/output usage by oplists info
    setInputOutputForOps(allTensors, oplists, net->usage() == Usage_INFERENCE_STATIC);

    // add output index by config info and outputName
    std::unordered_map<std::string, int> tensorNameIndexMap;
    for (int i = 0; i < net->tensorName()->size(); ++i) {
        tensorNameIndexMap[net->tensorName()->Get(i)->str()] = i;
    }
    bool userSetOutput = false;
    // 初始化调度输出张量
    for (auto& config : configs) {
        userSetOutput = userSetOutput || (!config.saveTensors.empty());
        for (const auto& name : config.saveTensors) {
            auto iter = tensorNameIndexMap.find(name);
            if (iter != tensorNameIndexMap.end()) {
                auto t = allTensors[iter->second].get();
                if (TensorUtils::getDescribe(t)->usage == Tensor::InsideDescribe::NORMAL) {
                    TensorUtils::getDescribe(t)->usage = Tensor::InsideDescribe::OUTPUT;
                }
                scheduleInfo.outputTensor.insert(
                           std::make_pair(net->tensorName()->GetAsString(iter->second)->c_str(), t));
            } else {
                MNN_PRINT("Bad outputname: %s\n", name.c_str());
            }
        }
    }
    // 初始化调度输出张量
    if (net->outputName()) {
        userSetOutput = userSetOutput || net->outputName()->size() >= 1;
        for (int i = 0; i < net->outputName()->size(); ++i) {
            std::string name = net->outputName()->Get(i)->str();
            auto iter = tensorNameIndexMap.find(name);
            if (iter != tensorNameIndexMap.end()) {
                auto t = allTensors[iter->second].get();
                if (TensorUtils::getDescribe(t)->usage == Tensor::InsideDescribe::NORMAL) {
                    TensorUtils::getDescribe(t)->usage = Tensor::InsideDescribe::OUTPUT;
                }
                scheduleInfo.outputTensor.insert(
                               std::make_pair(net->tensorName()->GetAsString(iter->second)->c_str(), t));
            }
        }
    }
    if (scheduleInfo.outputTensor.empty()) {
        userSetOutput = false;
    }
    // add input/output tensor to schedule's input/output
    // 初始化调度输入和输出张量
    for (int index = 0; index < allTensors.size(); index++) {
        auto t = allTensors[index].get();
        auto usage = TensorUtils::getDescribe(t)->usage;
        if (usage == Tensor::InsideDescribe::INPUT) {
          // 如 inputTensors 大小为 1
            scheduleInfo.inputTensors.insert(std::make_pair(net->tensorName()->GetAsString(index)->c_str(), t));
        }
        if (usage == Tensor::InsideDescribe::OUTPUT && (!userSetOutput)) {
          // 如 outputTensor 大小为 3
            scheduleInfo.outputTensor.insert(
                       std::make_pair(net->tensorName()->GetAsString(index)->c_str(), t));
        }
    }
    if (net->usage() == Usage_INFERENCE_STATIC) {
        for (auto& pipInfo : scheduleInfo.pipelineInfo) {
            pipInfo.first.needComputeGeometry = false;
            pipInfo.first.needComputeShape = false;
        }
    }

#ifndef MNN_BUILD_MINI
    for (auto iter = scheduleInfo.pipelineInfo.begin(); iter != scheduleInfo.pipelineInfo.end();) {
        if (!iter->first.needComputeGeometry) {
            // For static model don't need check const
            iter++;
            continue;
        }
        auto breakIndex = GeometryComputerUtils::buildConstantTensors(iter->second);
        if (breakIndex >= 0) {
            scheduleInfo.needInputContentForShape = true;
        }
#ifdef MNN_SEPERTE_SIZE
        if (breakIndex >= 0 && (breakIndex + 1) < iter->second.size()) {
            // Split oplist
            std::vector<Schedule::PipelineInfo> fuse;
            std::vector<Schedule::PipelineInfo> separate;
            fuse.insert(fuse.begin(), iter->second.begin(), iter->second.begin() + breakIndex + 1);
            separate.insert(separate.begin(), iter->second.begin() + breakIndex + 1, iter->second.end());
            oplists.clear();
            iter->second = std::move(separate);
            iter = scheduleInfo.pipelineInfo.insert(iter, std::make_pair(iter->first, fuse));
            iter++;
            iter++;
        } else {
            iter++;
        }
#else
        iter++;
#endif
    }
#endif
    return true;
}

1.1.2.1 initConstTensors

// source/utils/InitNet.cpp
bool initConstTensors(std::vector<std::shared_ptr<Tensor>>& tensors, const Net* net, Backend* defaultBackend, ErrorCode& code) {
    bool valid    = true;
    // 按算子数分配 Tensor 张量容量,如 208
    tensors.resize(net->tensorName()->size());
    // Set up const
    // 算子数 net->oplists()->size() 为 208
    for (int opIndex = 0; opIndex < net->oplists()->size(); ++opIndex) {
        auto op = net->oplists()->GetAs<Op>(opIndex);
        // 不变算子和可训练参数 初始化
        if (OpType_Const == op->type() || OpType_TrainableParam == op->type()) {
            MNN_ASSERT(nullptr != op->outputIndexes());
            auto index = op->outputIndexes()->data()[0];
            tensors[index].reset(new Tensor);
            TensorUtils::getDescribe(tensors[index].get())->index = index;
            auto parameter = op->main_as_Blob();
            auto output    = tensors[index].get();
            bool zeroShape = false;
            if (parameter->dims() != nullptr) {
                output->buffer().dimensions = parameter->dims()->size();
                for (int i = 0; i < output->buffer().dimensions; i++) {
                    output->buffer().dim[i].extent = parameter->dims()->Get(i);
                    if (output->length(i) <= 0) {
                        zeroShape = true;
                    }
                }
            } else {
                output->buffer().dimensions = 0;
            }
            if (parameter->dataType() == DataType_DT_HALF) {
                output->setType(DataType_DT_FLOAT);
            } else {
                output->setType(parameter->dataType());
            }
            TensorUtils::getDescribe(output)->dimensionFormat = parameter->dataFormat();
            TensorUtils::getDescribe(output)->usage = Tensor::InsideDescribe::CONSTANT;
            TensorUtils::getDescribe(output)->isMutable = false;
            if (op->type() == OpType_TrainableParam) {
                TensorUtils::getDescribe(output)->usage = Tensor::InsideDescribe::TRAINABLE;
            }
            TensorUtils::setLinearLayout(output);
            TensorUtils::getDescribe(output)->setBackend(defaultBackend);
            //MNN_PRINT("Const tensor %p is %p bn\n", output, defaultBackend);
            if (zeroShape) {
                continue;
            }
            auto res = defaultBackend->onAcquireBuffer(output, Backend::STATIC);
            if (!res) {
                code = OUT_OF_MEMORY;
                return false;
            }
            if (parameter->dataType() == DataType_DT_HALF) {
                if (nullptr == parameter->uint8s()) {
                    // Error half const
                    code = INVALID_VALUE;
                    return false;
                }
                auto outputPtr = output->host<float>();
                auto size = output->elementSize();
                half_float::half* src = nullptr;
                std::unique_ptr<half_float::half[]> tmp;
                if (USE_EXTERNAL_DATA(parameter)) {
                    tmp.reset((new half_float::half[size]));
                    src = tmp.get();
                    OpCommonUtils::loadExternalDatas(defaultBackend, {reinterpret_cast<char*>(src)}, parameter->external()->data());
                } else {
                    src = (half_float::half*)parameter->uint8s()->data();
                }
                for (int i=0; i<size; ++i) {
                    outputPtr[i] = src[i];
                }
            } else {
                OpCommonUtils::loadBlobData(defaultBackend, op, output->host<char>(), output->size());
            }
        } else {
            if (nullptr != op->outputIndexes()) {
                for (int i=0; i<op->outputIndexes()->size(); ++i) {
                    auto index = op->outputIndexes()->data()[i];
                    if (nullptr == tensors[index].get()) {
                        continue;
                    }
                    auto des = TensorUtils::getDescribe(tensors[index].get());
                    if (des->usage == Tensor::InsideDescribe::CONSTANT) {
                        des->usage = Tensor::InsideDescribe::TRAINABLE;
                    }
                }
            }
        }
    }
    return valid;
}

1.1.2.2 initTensors

    初始化张量

// source/utils/InitNet.cpp
bool initTensors(std::vector<std::shared_ptr<Tensor>>& tensors, const Net* net) {
    bool valid    = true;
    auto describes = net->extraTensorDescribe();
    std::vector<const TensorDescribe*> des(tensors.size());
    for (int i=0; i<tensors.size(); ++i) {
        // Init all tensor except for const
        if (tensors[i].get() == nullptr) {
            tensors[i].reset(new Tensor);
            TensorUtils::getDescribe(tensors[i].get())->index = i;
            // MNN_PRINT("initTensors create tensor:%p, index:%d, backend:%d\n", tensors[i].get(), i, TensorUtils::getDescribe(tensors[i].get())->backend);
        }
    }
    if (describes) {
        for (int i = 0; i < describes->size(); i++) {
            int index  = describes->GetAs<TensorDescribe>(i)->index();
            des[index] = describes->GetAs<TensorDescribe>(i);
        }
    }
    for (int i = 0; i < tensors.size(); ++i) {
        if (des[i] != nullptr && des[i]->quantInfo()) {
            TensorUtils::getDescribe(tensors[i].get())->quantAttr.reset(new QuantAttr);
            auto quant   = TensorUtils::getDescribe(tensors[i].get())->quantAttr.get();
            quant->scale =  des[i]->quantInfo()->scale();
            quant->zero  =  des[i]->quantInfo()->zero();
            quant->min   =  des[i]->quantInfo()->min();
            quant->max   =  des[i]->quantInfo()->max();
            // Don't copy datatype, it can be set by backend
        }
    }
    // Set Input Tensor, if the type of input is not the same with ExtraTensorDescribe, use input parameter
    for (int opIndex = 0; opIndex < net->oplists()->size(); ++opIndex) {
        auto op = net->oplists()->GetAs<Op>(opIndex);
        if (OpType_Input == op->type()) {
            MNN_ASSERT(nullptr != op->outputIndexes());
            MNN_ASSERT(op->outputIndexes()->size() == 1);
            auto index      = op->outputIndexes()->data()[0];
            auto tensor     = tensors[index].get();
            auto& tb        = tensor->buffer();
            auto inputParam = op->main_as_Input();
            if (auto idims = inputParam->dims()) {
                for (int i = 0; i < idims->size(); ++i) {
                    int extent = idims->data()[i];
                    // dim-0 is batch(when input batch is -1, set it to be 1, ignore other dim)
                    if (i == 0 && extent == -1) {
                        extent = 1;
                    }
                    if (extent < 0) {
                        valid = false;
                    }
                    tb.dim[i].extent = extent;
                }
                tb.dimensions = idims->size();
            } else {
                tb.dimensions = 0;
            }
            tensor->setType(inputParam->dtype());
            TensorUtils::getDescribe(tensor)->dimensionFormat = inputParam->dformat();
            TensorUtils::setLinearLayout(tensor);
        }
    }
    if (net->usage() != Usage_INFERENCE_STATIC) {
        return valid;
    }
    // static model will set all tensors' shape
    for (int i = 0; i < describes->size(); i++) {
        int index  = describes->GetAs<TensorDescribe>(i)->index();
        des[index] = describes->GetAs<TensorDescribe>(i);
    }
    for (int i = 0; i < tensors.size(); ++i) {
        if (TensorUtils::getDescribe(tensors[i].get())->usage != Tensor::InsideDescribe::NORMAL) {
            // Const / Trainable Shape has been inited
            continue;
        }
        auto blob = des[i]->blob();
        auto& tb = tensors[i]->buffer();
        if (auto idims = blob->dims()) {
            for (int d = 0; d < idims->size(); d++) {
                tb.dim[d].extent = idims->Get(d);
            }
            tb.dimensions = idims->size();
        } else {
            tb.dimensions = 0;
        }
        tensors[i]->setType(blob->dataType());
    }
    for (int i = 0; i < tensors.size(); ++i) {
        auto blob                                                   = des[i]->blob();
        TensorUtils::getDescribe(tensors[i].get())->dimensionFormat = blob->dataFormat();
        if (auto regions = des[i]->regions()) {
            auto& regs = TensorUtils::getDescribe(tensors[i].get())->regions;
            TensorUtils::getDescribe(tensors[i].get())->memoryType = Tensor::InsideDescribe::MEMORY_BACKEND;
            regs.reserve(regions->size());
            for (int r = 0; r < regions->size(); r++) {
                auto region = regions->GetAs<Region>(r);
                Tensor::InsideDescribe::Region reg;
                reg.origin     = tensors[region->origin()].get();
                reg.src.offset = region->src()->offset();
                reg.dst.offset = region->dst()->offset();
                for (int d = 0; d < 3; d++) {
                    reg.size[d]       = region->size()->data()[d];
                    reg.src.stride[d] = region->src()->stride()->data()[d];
                    reg.dst.stride[d] = region->dst()->stride()->data()[d];
                }
                regs.emplace_back(std::move(reg));
            }
        }
    }
    return valid;
}

1.1.2.3 _scheduleUnit

// source/core/Schedule.cpp
static vector<Schedule::OpCacheInfo> _scheduleUnit(const Net* net, const ScheduleConfig& configs,
                                                    const vector<shared_ptr<Tensor>>& allTensors) {
    vector<Schedule::OpCacheInfo> oplists;
    vector<const Op*> ops;
    generateScheduleGraph(ops, net, configs, allTensors);
    initPipelineInfosFromOps(oplists, ops, allTensors);
    return oplists;
}

1.1.2.3.1 generateScheduleGraph

    产生调度图谱

// source/core/Schedule.cpp
static void generateScheduleGraph(vector<const Op*>& ops, const Net* net, const ScheduleConfig& configs,
                                  const vector<shared_ptr<Tensor>>& allTensors) {

        // for (int i = 0; i < net->oplists()->size(); ++i) {
        //     auto op       = net->oplists()->Get(i);
        //     MNN_PRINT("generateScheduleGraph, op type:%s, op name:%s\n", EnumNameOpType(op->type()), op->name()->c_str());
        // }

    if (configs.path.inputs.empty() && configs.path.outputs.empty()) {
        // Use Default Linear schedule
        ops.clear();
        ops.reserve(net->oplists()->size());
        // 获取算子,208
        for (int i = 0; i < net->oplists()->size(); ++i) {
            auto op = net->oplists()->GetAs<Op>(i);
            ops.emplace_back(op);
        }
        return;
    }
    // 0: not set, 1: output, 2:input
    std::vector<int> tensorMask(net->tensorName()->size());
    ::memset(tensorMask.data(), 0, tensorMask.size() * sizeof(int));

    // 0: use, 1: no use
    std::vector<int> opMask(net->oplists()->size());
    ::memset(opMask.data(), 0, opMask.size() * sizeof(int));

    // Set Initial Status
    std::set<std::string> inputNames;
    std::set<std::string> outputNames;
    for (auto& n : configs.path.inputs) {
        inputNames.insert(n);
    }
    for (auto& n : configs.path.outputs) {
        outputNames.insert(n);
    }
    if (configs.path.mode == ScheduleConfig::Path::Mode::Tensor) {
        for (int i=0; i<tensorMask.size(); ++i) {
            auto name = net->tensorName()->GetAsString(i)->c_str();
            if (outputNames.find(name) != outputNames.end()) {
                tensorMask[i] = 1;
            }
            // If both input/output, set as input
            if (inputNames.find(name) != inputNames.end()) {
                tensorMask[i] = 2;
            }
        }
    } else {
        // Op Mode
        for (int i=0; i<opMask.size(); ++i) {
            auto op = net->oplists()->GetAs<Op>(i);
            if (nullptr == op->name()) {
                continue;
            }
            auto name = op->name()->c_str();
            if (outputNames.find(name) != outputNames.end()) {
                opMask[i] = 1;
                if (nullptr != op->outputIndexes()) {
                    for (int j=0; j<op->outputIndexes()->size(); ++j) {
                        auto index = op->outputIndexes()->data()[j];
                        if (tensorMask[index] != 2) {
                            tensorMask[index] = 1;
                        }
                    }
                }
                if (nullptr != op->inputIndexes()) {
                    for (int j=0; j<op->inputIndexes()->size(); ++j) {
                        auto index = op->inputIndexes()->data()[j];
                        if (tensorMask[index] != 2) {
                            tensorMask[index] = 1;
                        }
                    }
                }
            }
            if (inputNames.find(name) != inputNames.end()) {
                opMask[i] = 1;
                if (nullptr != op->outputIndexes()) {
                    for (int j=0; j<op->outputIndexes()->size(); ++j) {
                        auto index = op->outputIndexes()->data()[j];
                        tensorMask[index] = 2;
                    }
                }
            }
        }
    }

    bool change = false;
    do {
        change = false;
        for (int i=0; i<opMask.size(); ++i) {
            if (opMask[i] > 0) {
                continue;
            }
            auto op = net->oplists()->GetAs<Op>(i);
            if (nullptr != op->outputIndexes()) {
                for (int j=0; j<op->outputIndexes()->size(); ++j) {
                    auto index = op->outputIndexes()->data()[j];
                    if (tensorMask[index] == 1) {
                        opMask[i] = 1;
                        change = true;
                    }
                }
            }
            if (nullptr != op->inputIndexes() && opMask[i]) {
                for (int j=0; j<op->inputIndexes()->size(); ++j) {
                    auto index = op->inputIndexes()->data()[j];
                    if (tensorMask[index] != 2) {
                        tensorMask[index] = 1;
                    }
                }
            }
        }
    } while (change);

    for (int i=0; i<opMask.size(); ++i) {
        if (opMask[i] > 0) {
            ops.emplace_back(net->oplists()->GetAs<Op>(i));
        }
    }
}

1.1.2.3.2 initPipelineInfosFromOps


// source/utils/InitNet.cpp
void initPipelineInfosFromOps(std::vector<Schedule::OpCacheInfo>& infos, std::vector<const Op*>& ops, const std::vector<std::shared_ptr<Tensor>>& allTensors) {
    for (const Op* op : ops) {
        // MNN_PRINT("initPipelineInfosFromOps, op type:%s, op name:%s\n", EnumNameOpType(op->type()), op->name()->c_str());

    // 算子缓存信息
        Schedule::OpCacheInfo opInfo;
        opInfo.op = op;
        if (nullptr != op->outputIndexes()) {
            auto data = op->outputIndexes()->data();
            for (int j = 0; j < op->outputIndexes()->size(); ++j) {
              // 设置算子缓存输出张量信息
                opInfo.outputs.push_back(allTensors[data[j]].get());
            }
        }
        if (nullptr != op->inputIndexes()) {
            auto data = op->inputIndexes()->data();
            for (int j = 0; j < op->inputIndexes()->size(); ++j) {
              // 设置算子缓存输入张量信息
                opInfo.inputs.push_back(allTensors[data[j]].get());
            }
        }
        if (needComputeOp(op)) {
            infos.emplace_back(std::move(opInfo));
        }
    }
}

1.1.2.3.3 Op 算子

// schema/current/MNN_generated.h
struct Op FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
  typedef OpT NativeTableType;
  static const flatbuffers::TypeTable *MiniReflectTypeTable() {
    return OpTypeTable();
  }
  const flatbuffers::Vector<int32_t> *inputIndexes() const {
    return GetPointer<const flatbuffers::Vector<int32_t> *>(4);
  }
  OpParameter main_type() const {
    return static_cast<OpParameter>(GetField<uint8_t>(6, 0));
  }
  const void *main() const {
    return GetPointer<const void *>(8);
  }
  template<typename T> const T *main_as() const;
  const QuantizedAdd *main_as_QuantizedAdd() const {
    return main_type() == OpParameter_QuantizedAdd ? static_cast<const QuantizedAdd *>(main()) : nullptr;
  }
  const ArgMax *main_as_ArgMax() const {
    return main_type() == OpParameter_ArgMax ? static_cast<const ArgMax *>(main()) : nullptr;
  }
  const AsString *main_as_AsString() const {
    return main_type() == OpParameter_AsString ? static_cast<const AsString *>(main()) : nullptr;
  }
  const Axis *main_as_Axis() const {
    return main_type() == OpParameter_Axis ? static_cast<const Axis *>(main()) : nullptr;
  }
  const BatchNorm *main_as_BatchNorm() const {
    return main_type() == OpParameter_BatchNorm ? static_cast<const BatchNorm *>(main()) : nullptr;
  }
  const BinaryOp *main_as_BinaryOp() const {
    return main_type() == OpParameter_BinaryOp ? static_cast<const BinaryOp *>(main()) : nullptr;
  }
  const Blob *main_as_Blob() const {
    return main_type() == OpParameter_Blob ? static_cast<const Blob *>(main()) : nullptr;
  }
  const CastParam *main_as_CastParam() const {
    return main_type() == OpParameter_CastParam ? static_cast<const CastParam *>(main()) : nullptr;
  }
  const Convolution2D *main_as_Convolution2D() const {
    return main_type() == OpParameter_Convolution2D ? static_cast<const Convolution2D *>(main()) : nullptr;
  }
  const Crop *main_as_Crop() const {
    return main_type() == OpParameter_Crop ? static_cast<const Crop *>(main()) : nullptr;
  }
  const CropAndResize *main_as_CropAndResize() const {
    return main_type() == OpParameter_CropAndResize ? static_cast<const CropAndResize *>(main()) : nullptr;
  }
  const Dequantize *main_as_Dequantize() const {
    return main_type() == OpParameter_Dequantize ? static_cast<const Dequantize *>(main()) : nullptr;
  }
  const DetectionOutput *main_as_DetectionOutput() const {
    return main_type() == OpParameter_DetectionOutput ? static_cast<const DetectionOutput *>(main()) : nullptr;
  }
  const Eltwise *main_as_Eltwise() const {
    return main_type() == OpParameter_Eltwise ? static_cast<const Eltwise *>(main()) : nullptr;
  }
  const ExpandDims *main_as_ExpandDims() const {
    return main_type() == OpParameter_ExpandDims ? static_cast<const ExpandDims *>(main()) : nullptr;
  }
  const Fill *main_as_Fill() const {
    return main_type() == OpParameter_Fill ? static_cast<const Fill *>(main()) : nullptr;
  }
  const Flatten *main_as_Flatten() const {
    return main_type() == OpParameter_Flatten ? static_cast<const Flatten *>(main()) : nullptr;
  }
  const Gather *main_as_Gather() const {
    return main_type() == OpParameter_Gather ? static_cast<const Gather *>(main()) : nullptr;
  }
  const GatherV2 *main_as_GatherV2() const {
    return main_type() == OpParameter_GatherV2 ? static_cast<const GatherV2 *>(main()) : nullptr;
  }
  const InnerProduct *main_as_InnerProduct() const {
    return main_type() == OpParameter_InnerProduct ? static_cast<const InnerProduct *>(main()) : nullptr;
  }
  const Input *main_as_Input() const {
    return main_type() == OpParameter_Input ? static_cast<const Input *>(main()) : nullptr;
  }
  const Interp *main_as_Interp() const {
    return main_type() == OpParameter_Interp ? static_cast<const Interp *>(main()) : nullptr;
  }
  const LRN *main_as_LRN() const {
    return main_type() == OpParameter_LRN ? static_cast<const LRN *>(main()) : nullptr;
  }
  const LSTM *main_as_LSTM() const {
    return main_type() == OpParameter_LSTM ? static_cast<const LSTM *>(main()) : nullptr;
  }
  const MatMul *main_as_MatMul() const {
    return main_type() == OpParameter_MatMul ? static_cast<const MatMul *>(main()) : nullptr;
  }
  const NonMaxSuppressionV2 *main_as_NonMaxSuppressionV2() const {
    return main_type() == OpParameter_NonMaxSuppressionV2 ? static_cast<const NonMaxSuppressionV2 *>(main()) : nullptr;
  }
  const Normalize *main_as_Normalize() const {
    return main_type() == OpParameter_Normalize ? static_cast<const Normalize *>(main()) : nullptr;
  }
  const PackParam *main_as_PackParam() const {
    return main_type() == OpParameter_PackParam ? static_cast<const PackParam *>(main()) : nullptr;
  }
  const Permute *main_as_Permute() const {
    return main_type() == OpParameter_Permute ? static_cast<const Permute *>(main()) : nullptr;
  }
  const Plugin *main_as_Plugin() const {
    return main_type() == OpParameter_Plugin ? static_cast<const Plugin *>(main()) : nullptr;
  }
  const Pool *main_as_Pool() const {
    return main_type() == OpParameter_Pool ? static_cast<const Pool *>(main()) : nullptr;
  }
  const PRelu *main_as_PRelu() const {
    return main_type() == OpParameter_PRelu ? static_cast<const PRelu *>(main()) : nullptr;
  }
  const PriorBox *main_as_PriorBox() const {
    return main_type() == OpParameter_PriorBox ? static_cast<const PriorBox *>(main()) : nullptr;
  }
  const Proposal *main_as_Proposal() const {
    return main_type() == OpParameter_Proposal ? static_cast<const Proposal *>(main()) : nullptr;
  }
  const QuantizedAvgPool *main_as_QuantizedAvgPool() const {
    return main_type() == OpParameter_QuantizedAvgPool ? static_cast<const QuantizedAvgPool *>(main()) : nullptr;
  }
  const QuantizedBiasAdd *main_as_QuantizedBiasAdd() const {
    return main_type() == OpParameter_QuantizedBiasAdd ? static_cast<const QuantizedBiasAdd *>(main()) : nullptr;
  }
  const QuantizedConcat *main_as_QuantizedConcat() const {
    return main_type() == OpParameter_QuantizedConcat ? static_cast<const QuantizedConcat *>(main()) : nullptr;
  }
  const QuantizedLogistic *main_as_QuantizedLogistic() const {
    return main_type() == OpParameter_QuantizedLogistic ? static_cast<const QuantizedLogistic *>(main()) : nullptr;
  }
  const QuantizedMatMul *main_as_QuantizedMatMul() const {
    return main_type() == OpParameter_QuantizedMatMul ? static_cast<const QuantizedMatMul *>(main()) : nullptr;
  }
  const QuantizedMaxPool *main_as_QuantizedMaxPool() const {
    return main_type() == OpParameter_QuantizedMaxPool ? static_cast<const QuantizedMaxPool *>(main()) : nullptr;
  }
  const QuantizedRelu *main_as_QuantizedRelu() const {
    return main_type() == OpParameter_QuantizedRelu ? static_cast<const QuantizedRelu *>(main()) : nullptr;
  }
  const QuantizedRelu6 *main_as_QuantizedRelu6() const {
    return main_type() == OpParameter_QuantizedRelu6 ? static_cast<const QuantizedRelu6 *>(main()) : nullptr;
  }
  const QuantizedReshape *main_as_QuantizedReshape() const {
    return main_type() == OpParameter_QuantizedReshape ? static_cast<const QuantizedReshape *>(main()) : nullptr;
  }
  const QuantizedSoftmax *main_as_QuantizedSoftmax() const {
    return main_type() == OpParameter_QuantizedSoftmax ? static_cast<const QuantizedSoftmax *>(main()) : nullptr;
  }
  const QuantizeMaxMin *main_as_QuantizeMaxMin() const {
    return main_type() == OpParameter_QuantizeMaxMin ? static_cast<const QuantizeMaxMin *>(main()) : nullptr;
  }
  const QuantizeV2 *main_as_QuantizeV2() const {
    return main_type() == OpParameter_QuantizeV2 ? static_cast<const QuantizeV2 *>(main()) : nullptr;
  }
  const Range *main_as_Range() const {
    return main_type() == OpParameter_Range ? static_cast<const Range *>(main()) : nullptr;
  }
  const Rank *main_as_Rank() const {
    return main_type() == OpParameter_Rank ? static_cast<const Rank *>(main()) : nullptr;
  }
  const ReduceJoin *main_as_ReduceJoin() const {
    return main_type() == OpParameter_ReduceJoin ? static_cast<const ReduceJoin *>(main()) : nullptr;
  }
  const ReductionParam *main_as_ReductionParam() const {
    return main_type() == OpParameter_ReductionParam ? static_cast<const ReductionParam *>(main()) : nullptr;
  }
  const Relu *main_as_Relu() const {
    return main_type() == OpParameter_Relu ? static_cast<const Relu *>(main()) : nullptr;
  }
  const Relu6 *main_as_Relu6() const {
    return main_type() == OpParameter_Relu6 ? static_cast<const Relu6 *>(main()) : nullptr;
  }
  const RequantizationRange *main_as_RequantizationRange() const {
    return main_type() == OpParameter_RequantizationRange ? static_cast<const RequantizationRange *>(main()) : nullptr;
  }
  const Requantize *main_as_Requantize() const {
    return main_type() == OpParameter_Requantize ? static_cast<const Requantize *>(main()) : nullptr;
  }
  const Reshape *main_as_Reshape() const {
    return main_type() == OpParameter_Reshape ? static_cast<const Reshape *>(main()) : nullptr;
  }
  const Resize *main_as_Resize() const {
    return main_type() == OpParameter_Resize ? static_cast<const Resize *>(main()) : nullptr;
  }
  const RoiParameters *main_as_RoiParameters() const {
    return main_type() == OpParameter_RoiParameters ? static_cast<const RoiParameters *>(main()) : nullptr;
  }
  const Scale *main_as_Scale() const {
    return main_type() == OpParameter_Scale ? static_cast<const Scale *>(main()) : nullptr;
  }
  const Selu *main_as_Selu() const {
    return main_type() == OpParameter_Selu ? static_cast<const Selu *>(main()) : nullptr;
  }
  const Size *main_as_Size() const {
    return main_type() == OpParameter_Size ? static_cast<const Size *>(main()) : nullptr;
  }
  const Slice *main_as_Slice() const {
    return main_type() == OpParameter_Slice ? static_cast<const Slice *>(main()) : nullptr;
  }
  const SliceTf *main_as_SliceTf() const {
    return main_type() == OpParameter_SliceTf ? static_cast<const SliceTf *>(main()) : nullptr;
  }
  const SpaceBatch *main_as_SpaceBatch() const {
    return main_type() == OpParameter_SpaceBatch ? static_cast<const SpaceBatch *>(main()) : nullptr;
  }
  const SqueezeParam *main_as_SqueezeParam() const {
    return main_type() == OpParameter_SqueezeParam ? static_cast<const SqueezeParam *>(main()) : nullptr;
  }
  const StridedSliceParam *main_as_StridedSliceParam() const {
    return main_type() == OpParameter_StridedSliceParam ? static_cast<const StridedSliceParam *>(main()) : nullptr;
  }
  const TensorConvertInfo *main_as_TensorConvertInfo() const {
    return main_type() == OpParameter_TensorConvertInfo ? static_cast<const TensorConvertInfo *>(main()) : nullptr;
  }
  const TfQuantizedConv2D *main_as_TfQuantizedConv2D() const {
    return main_type() == OpParameter_TfQuantizedConv2D ? static_cast<const TfQuantizedConv2D *>(main()) : nullptr;
  }
  const TopKV2 *main_as_TopKV2() const {
    return main_type() == OpParameter_TopKV2 ? static_cast<const TopKV2 *>(main()) : nullptr;
  }
  const Transpose *main_as_Transpose() const {
    return main_type() == OpParameter_Transpose ? static_cast<const Transpose *>(main()) : nullptr;
  }
  const UnaryOp *main_as_UnaryOp() const {
    return main_type() == OpParameter_UnaryOp ? static_cast<const UnaryOp *>(main()) : nullptr;
  }
  const MomentsParam *main_as_MomentsParam() const {
    return main_type() == OpParameter_MomentsParam ? static_cast<const MomentsParam *>(main()) : nullptr;
  }
  const RNNParam *main_as_RNNParam() const {
    return main_type() == OpParameter_RNNParam ? static_cast<const RNNParam *>(main()) : nullptr;
  }
  const BatchMatMulParam *main_as_BatchMatMulParam() const {
    return main_type() == OpParameter_BatchMatMulParam ? static_cast<const BatchMatMulParam *>(main()) : nullptr;
  }
  const QuantizedFloatParam *main_as_QuantizedFloatParam() const {
    return main_type() == OpParameter_QuantizedFloatParam ? static_cast<const QuantizedFloatParam *>(main()) : nullptr;
  }
  const DepthSpaceParam *main_as_DepthSpaceParam() const {
    return main_type() == OpParameter_DepthSpaceParam ? static_cast<const DepthSpaceParam *>(main()) : nullptr;
  }
  const EltwiseInt8 *main_as_EltwiseInt8() const {
    return main_type() == OpParameter_EltwiseInt8 ? static_cast<const EltwiseInt8 *>(main()) : nullptr;
  }
  const ReverseSequenceParam *main_as_ReverseSequenceParam() const {
    return main_type() == OpParameter_ReverseSequenceParam ? static_cast<const ReverseSequenceParam *>(main()) : nullptr;
  }
  const Extra *main_as_Extra() const {
    return main_type() == OpParameter_Extra ? static_cast<const Extra *>(main()) : nullptr;
  }
  const Pool3D *main_as_Pool3D() const {
    return main_type() == OpParameter_Pool3D ? static_cast<const Pool3D *>(main()) : nullptr;
  }
  const Convolution3D *main_as_Convolution3D() const {
    return main_type() == OpParameter_Convolution3D ? static_cast<const Convolution3D *>(main()) : nullptr;
  }
  const ELU *main_as_ELU() const {
    return main_type() == OpParameter_ELU ? static_cast<const ELU *>(main()) : nullptr;
  }
  const DetectionPostProcessParam *main_as_DetectionPostProcessParam() const {
    return main_type() == OpParameter_DetectionPostProcessParam ? static_cast<const DetectionPostProcessParam *>(main()) : nullptr;
  }
  const OneHotParam *main_as_OneHotParam() const {
    return main_type() == OpParameter_OneHotParam ? static_cast<const OneHotParam *>(main()) : nullptr;
  }
  const PadParam *main_as_PadParam() const {
    return main_type() == OpParameter_PadParam ? static_cast<const PadParam *>(main()) : nullptr;
  }
  const WhileParam *main_as_WhileParam() const {
    return main_type() == OpParameter_WhileParam ? static_cast<const WhileParam *>(main()) : nullptr;
  }
  const IfParam *main_as_IfParam() const {
    return main_type() == OpParameter_IfParam ? static_cast<const IfParam *>(main()) : nullptr;
  }
  const RandomUniform *main_as_RandomUniform() const {
    return main_type() == OpParameter_RandomUniform ? static_cast<const RandomUniform *>(main()) : nullptr;
  }
  const LayerNorm *main_as_LayerNorm() const {
    return main_type() == OpParameter_LayerNorm ? static_cast<const LayerNorm *>(main()) : nullptr;
  }
  const TensorArray *main_as_TensorArray() const {
    return main_type() == OpParameter_TensorArray ? static_cast<const TensorArray *>(main()) : nullptr;
  }
  const LSTMBlockCell *main_as_LSTMBlockCell() const {
    return main_type() == OpParameter_LSTMBlockCell ? static_cast<const LSTMBlockCell *>(main()) : nullptr;
  }
  const GridSample *main_as_GridSample() const {
    return main_type() == OpParameter_GridSample ? static_cast<const GridSample *>(main()) : nullptr;
  }
  const LoopParam *main_as_LoopParam() const {
    return main_type() == OpParameter_LoopParam ? static_cast<const LoopParam *>(main()) : nullptr;
  }
  const ImageProcessParam *main_as_ImageProcessParam() const {
    return main_type() == OpParameter_ImageProcessParam ? static_cast<const ImageProcessParam *>(main()) : nullptr;
  }
  const CumSum *main_as_CumSum() const {
    return main_type() == OpParameter_CumSum ? static_cast<const CumSum *>(main()) : nullptr;
  }
  const flatbuffers::String *name() const {
    return GetPointer<const flatbuffers::String *>(10);
  }
  const flatbuffers::Vector<int32_t> *outputIndexes() const {
    return GetPointer<const flatbuffers::Vector<int32_t> *>(12);
  }
  OpType type() const {
    return static_cast<OpType>(GetField<int32_t>(14, 0));
  }
  MNN_DATA_FORMAT defaultDimentionFormat() const {
    return static_cast<MNN_DATA_FORMAT>(GetField<int8_t>(16, 1));
  }
  bool Verify(flatbuffers::Verifier &verifier) const {
    return VerifyTableStart(verifier) &&
           VerifyOffset(verifier, 4) &&
           verifier.VerifyVector(inputIndexes()) &&
           VerifyField<uint8_t>(verifier, 6) &&
           VerifyOffset(verifier, 8) &&
           VerifyOpParameter(verifier, main(), main_type()) &&
           VerifyOffset(verifier, 10) &&
           verifier.VerifyString(name()) &&
           VerifyOffset(verifier, 12) &&
           verifier.VerifyVector(outputIndexes()) &&
           VerifyField<int32_t>(verifier, 14) &&
           VerifyField<int8_t>(verifier, 16) &&
           verifier.EndTable();
  }
  OpT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
  void UnPackTo(OpT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
  static flatbuffers::Offset<Op> Pack(flatbuffers::FlatBufferBuilder &_fbb, const OpT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
};

1.1.2.4 setInputOutputForOps

// source/utils/InitNet.cpp
void setInputOutputForOps(std::vector<std::shared_ptr<Tensor>>& allTensors, const std::vector<const Op*>& ops, bool isStatic) {
    std::set<int> inputIndexes;
    std::set<int> outputIndexes;
    // 0. deal virtual tensor for static model:
    // when : A (Any_Op) -----> B (Raster_Op)
    // the tensor will be like below:
    //      A_outputs : a_tensor
    //      B_inputs  : b_tensor (virtual)
    //      b_tensor.describe.origin = a_tensor_ptr
    // b_tensor is not a InputTensot, a_tensor is not a OutputTensor
    // so add b_tensor to OutputIndexes, a_tensor to InputIndexes.
    if (isStatic) {
        std::unordered_map<Tensor*, int> tensorMap;
        for (int index = 0; index < allTensors.size(); index++) {
            tensorMap.insert(std::make_pair(allTensors[index].get(), index));
        }
        for (int index = 0; index < allTensors.size(); index++) {
            auto des = TensorUtils::getDescribe(allTensors[index].get());
            for (int i = 0; i < des->regions.size(); i++) {
                outputIndexes.insert(index);
                MNN_ASSERT(tensorMap.find(des->regions[i].origin) != tensorMap.end());
                int x = tensorMap[des->regions[i].origin];
                inputIndexes.insert(x);
            }
        }
    }
    // 1. insert all output/input index in outputIndexes/inputIndexes
    for (auto op : ops) {
        if (nullptr != op->outputIndexes()) {
            auto data = op->outputIndexes()->data();
            for (int j = 0; j < op->outputIndexes()->size(); ++j) {
                outputIndexes.insert(data[j]);
            }
        }
        if (nullptr != op->inputIndexes()) {
            auto data = op->inputIndexes()->data();
            for (int j = 0; j < op->inputIndexes()->size(); ++j) {
                inputIndexes.insert(data[j]);
            }
        }
        MNN_ASSERT(OpType_Input != op->type());
    }
    // 2. the index in outputIndexes/inputIndexed but not in inputIndexes/outputIndexes is output/input
    std::set<int> input;
    std::set<int> output;
    std::set_difference(outputIndexes.begin(), outputIndexes.end(), inputIndexes.begin(), inputIndexes.end(),
                        std::inserter(output, output.begin()));
    std::set_difference(inputIndexes.begin(), inputIndexes.end(), outputIndexes.begin(), outputIndexes.end(),
                        std::inserter(input, input.begin()));
    // 3. set usage for Tensor by index
    for (auto index : input) {
        auto des = TensorUtils::getDescribe(allTensors[index].get());
        if (des->usage == Tensor::InsideDescribe::CONSTANT || des->usage == Tensor::InsideDescribe::TRAINABLE) {
            continue;
        }
        des->usage = Tensor::InsideDescribe::INPUT;
    }
    for (auto index : output) {
        auto des = TensorUtils::getDescribe(allTensors[index].get());
        if (des->usage == Tensor::InsideDescribe::NORMAL) {
            des->usage = TensorUsage::OUTPUT;
        }
    }
}

1.1.2.5 GeometryComputerUtils::buildConstantTensors

// source/geometry/GeometryComputerUtils.cpp
int GeometryComputerUtils::buildConstantTensors(std::vector<Schedule::OpCacheInfo>& infos) {
    // Check Middle Const
    // infos.size() = 171
    for (auto& info : infos) {
        if (info.op->type() == OpType_Const) {
            continue;
        }
        bool isConst = true;
        for (int i = 0; i < info.inputs.size(); ++i) {
            if (TensorUtils::getDescribe(info.inputs[i])->usage == Tensor::InsideDescribe::CONSTANT) {
                continue;
            }
            // 需要 content 则不为 Const
            if (OpCommonUtils::opNeedContent(info.op, i)) {
                isConst = false;
                break;
            }
        }
        if (isConst) {
            for (auto t : info.outputs) {
                TensorUtils::getDescribe(t)->usage = Tensor::InsideDescribe::CONSTANT;
            }
            info.type = Schedule::CONSTANT;
        }
    }
    // Check force size compute op
    int breakIndex = -1;
    for (int infoIndex=0; infoIndex < infos.size(); ++infoIndex) {
        auto& info = infos[infoIndex];
        if (info.op->type() == OpType_Const) {
            continue;
        }
        if (info.op->type() == OpType_Where && info.op->main_type() != OpParameter_Extra) {
            // For compability old model
            continue;
        }
        auto dims = SizeComputer::needInputContent(info.op, info.inputs.size());
        for (auto index : dims) {
            if (index < info.inputs.size()) {
                TensorUtils::getDescribe(info.inputs[index])->stageMask |= MNN::Tensor::InsideDescribe::StageInfo::GEOMETRY_STAGE;
                if (TensorUtils::getDescribe(info.inputs[index])->usage != Tensor::InsideDescribe::CONSTANT) {
                    breakIndex = infoIndex;
                    TensorUtils::getDescribe(info.inputs[index])->usage = Tensor::InsideDescribe::CONSTANT;
                }
            }
        }
    }
    if (breakIndex >= 0) {
        bool hasConst = true;
        while (hasConst) {
            hasConst = false;
            for (auto& info : infos) {
                if (info.type == Schedule::CONSTANT) {
                    continue;
                }
                bool turnConst = false;
                for (auto t : info.outputs) {
                    if (TensorUtils::getDescribe(t)->usage == Tensor::InsideDescribe::CONSTANT) {
                        turnConst = true;
                        break;
                    }
                }
                if (turnConst) {
                    for (auto t : info.outputs) {
                        TensorUtils::getDescribe(t)->usage = Tensor::InsideDescribe::CONSTANT;
                    }
                    for (auto t : info.inputs) {
                        TensorUtils::getDescribe(t)->usage = Tensor::InsideDescribe::CONSTANT;
                    }
                    info.type = Schedule::CONSTANT;
                    hasConst  = true;
                }
              }
        }
    }
    for (auto& info : infos) {
        if (info.type == Schedule::CONSTANT) {
            for (auto t : info.inputs) {
                TensorUtils::getDescribe(t)->stageMask |= MNN::Tensor::InsideDescribe::StageInfo::GEOMETRY_STAGE;
            }
            for (auto t : info.outputs) {
                TensorUtils::getDescribe(t)->usage = Tensor::InsideDescribe::CONSTANT;
            }
        }
    }
    return breakIndex;
}
bool OpCommonUtils::opNeedContent(const MNN::Op* op, int index) {
    int type = op->type();
    switch (type) {
        case OpType_ZerosLike:
        case OpType_ZeroGrad:
        case OpType_Shape:
        case OpType_Rank:
        case OpType_Const:
        case OpType_Size:
        case OpType_PriorBox:
            return false;
        case OpType_Interp:
        case OpType_Crop:
        case OpType_Reshape:
        case OpType_Reduction:
        case OpType_Resize:
            if (1 == index) {
                return false;
            }
            break;
        case OpType_GridSample:
            if (2 == index) {
                return false;
            }
            break;
#ifdef MNN_SUPPORT_RENDER
        case OpType_RasterAndInterpolate:
        {
            if (0 == index) {
                int type = 4;
                if (op->main_type() == OpParameter_Extra) {
                    auto extra = op->main_as_Extra();
                    if (nullptr != extra->attr()) {
                        for (int i=0; i<extra->attr()->size(); ++i) {
                            auto attr = extra->attr()->GetAs<Attribute>(i);
                            if (attr->key()->str() == "primitiveType") {
                                type = attr->i();
                                break;
                            }
                        }
                    }
                }
                if (type <= 4) {
                    return false;
                }
            }
            break;
        }
#endif
        default:
            break;
    }
    return true;
}

1.1.3 Tensor 张量

// project/android/demo/app/includes/MNN/Tensor.hpp
class MNN_PUBLIC Tensor {
public:
    struct InsideDescribe;

    /** dimension type used to create tensor */
    enum DimensionType {
        /** for tensorflow net type. uses NHWC as data format. */
        TENSORFLOW,
        /** for caffe net type. uses NCHW as data format. */
        CAFFE,
        /** for caffe net type. uses NC4HW4 as data format. */
        CAFFE_C4
    };

    /** handle type */
    enum HandleDataType {
        /** default handle type */
        HANDLE_NONE = 0,
        /** string handle type */
        HANDLE_STRING = 1
    };

    /** dimension reorder flag */
    enum DataReorderType {
        /** default reorder type, do not reorder */
        NO_REORDER = 0,
        /** reorder dimension 4 by 4. usually used with NC4HW4 or NHWC4 while data type is float. */
        REORDER_4 = 1,
        /** reorder dimension 8 by 8. usually used with NC4HW4 or NHWC4 while data type is uint8 or int8. */
        REORDER_8
    };

public:
    /**
     * @brief create a tensor with dimension size and type without acquire memory for data.
     * @param dimSize   dimension size.
     * @param type      dimension type.
     */
    Tensor(int dimSize = 4, DimensionType type = CAFFE);

    /**
     * @brief create a tensor with same shape as given tensor.
     * @param tensor        shape provider.
     * @param type          dimension type.
     * @param allocMemory   acquire memory for data or not.
     * @warning tensor data won't be copied.
     */
    Tensor(const Tensor* tensor, DimensionType type = CAFFE, bool allocMemory = true);

    /** deinitializer */
    ~Tensor();

private:
    // remove all assignment operator
    Tensor(const Tensor& tensor)  = delete;
    Tensor(const Tensor&& tensor) = delete;
    Tensor& operator=(const Tensor&) = delete;
    Tensor& operator=(const Tensor&&) = delete;

public:
    /**
     * @brief create tensor with shape, data type and dimension type.
     * @param shape     tensor shape.
     * @param type      data type.
     * @param dimType   dimension type.
     * @return created tensor.
     * @warning memory for data won't be acquired. call backend's onAcquireBuffer to get memory ready.
     */
    static Tensor* createDevice(const std::vector<int>& shape, halide_type_t type, DimensionType dimType = TENSORFLOW);

    /**
     * @brief create tensor with shape and dimension type. data type is represented by `T`.
     * @param shape     tensor shape.
     * @param dimType   dimension type.
     * @return created tensor.
     * @warning memory for data won't be acquired. call backend's onAcquireBuffer to get memory ready.
     */
    template <typename T>
    static Tensor* createDevice(const std::vector<int>& shape, DimensionType dimType = TENSORFLOW) {
        return createDevice(shape, halide_type_of<T>(), dimType);
    }

    /**
     * @brief create tensor with shape, data type, data and dimension type.
     * @param shape     tensor shape.
     * @param type      data type.
     * @param data      data to save.
     * @param dimType   dimension type.
     * @return created tensor.
     */
    static Tensor* create(const std::vector<int>& shape, halide_type_t type, void* data = NULL,
                          DimensionType dimType = TENSORFLOW);

    /**
     * @brief create tensor with shape, data and dimension type. data type is represented by `T`.
     * @param shape     tensor shape.
     * @param data      data to save.
     * @param dimType   dimension type.
     * @return created tensor.
     */
    template <typename T>
    static Tensor* create(const std::vector<int>& shape, void* data = NULL, DimensionType dimType = TENSORFLOW) {
        return create(shape, halide_type_of<T>(), data, dimType);
    }

public:
    /**
     * @brief for DEVICE tensor, copy data from given host tensor.
     * @param hostTensor    host tensor, the data provider.
     * @return true for DEVICE tensor, and false for HOST tensor.
     */
    bool copyFromHostTensor(const Tensor* hostTensor);

    /**
     * @brief for DEVICE tensor, copy data to given host tensor.
     * @param hostTensor    host tensor, the data consumer.
     * @return true for DEVICE tensor, and false for HOST tensor.
     */
    bool copyToHostTensor(Tensor* hostTensor) const;

    /**
     * @brief create HOST tensor from DEVICE tensor, with or without data copying.
     * @param deviceTensor  given device tensor.
     * @param copyData      copy data or not.
     * @return created host tensor.
     */
    static Tensor* createHostTensorFromDevice(const Tensor* deviceTensor, bool copyData = true);

public:
    const halide_buffer_t& buffer() const {
        return mBuffer;
    }
    halide_buffer_t& buffer() {
        return mBuffer;
    }

    /**
     * @brief get dimension type.
     * @return dimension type.
     */
    DimensionType getDimensionType() const;

    /**
     * @brief handle data type. used when data type code is halide_type_handle.
     * @return handle data type.
     */
    HandleDataType getHandleDataType() const;

    /**
     * @brief set data type.
     * @param type data type defined in 'Type_generated.h'.
     */
    void setType(int type);

    /**
     * @brief get data type.
     * @return data type.
     */
    inline halide_type_t getType() const {
        return mBuffer.type;
    }

    /**
     * @brief visit host memory, data type is represented by `T`.
     * @return data point in `T` type.
     */
    template <typename T>
    T* host() const {
        return (T*)mBuffer.host;
    }

    /**
     * @brief visit device memory.
     * @return device data ID. what the ID means varies between backends.
     */
    uint64_t deviceId() const {
        return mBuffer.device;
    }

public:
    int dimensions() const {
        return mBuffer.dimensions;
    }

    /**
     * @brief get all dimensions' extent.
     * @return dimensions' extent.
     */
    std::vector<int> shape() const;

    /**
     * @brief calculate number of bytes needed to store data taking reordering flag into account.
     * @return bytes needed to store data
     */
    int size() const;

    /**
     * @brief calculate number of elements needed to store data taking reordering flag into account.
     * @return elements needed to store data
     */
    inline int elementSize() const {
        return size() / mBuffer.type.bytes();
    }

public:
    inline int width() const {
        if (getDimensionType() == TENSORFLOW) {
            return mBuffer.dim[2].extent;
        }
        
        return mBuffer.dim[3].extent;
    }
    inline int height() const {
        if (getDimensionType() == TENSORFLOW) {
            return mBuffer.dim[1].extent;
        }
        return mBuffer.dim[2].extent;
    }
    inline int channel() const {
        if (getDimensionType() == TENSORFLOW) {
            return mBuffer.dim[3].extent;
        }
        return mBuffer.dim[1].extent;
    }
    inline int batch() const {
        return mBuffer.dim[0].extent;
    }

    // visit dimension's extent & stride
    inline int stride(int index) const {
        return mBuffer.dim[index].stride;
    }
    inline int length(int index) const {
        return mBuffer.dim[index].extent;
    }
    inline void setStride(int index, int stride) {
        mBuffer.dim[index].stride = stride;
    }
    inline void setLength(int index, int length) {
        mBuffer.dim[index].extent = length;
    }

public:
    /**
     * @brief print tensor data. for DEBUG use only.
     */
    void print() const;

private:
    halide_buffer_t mBuffer;
    struct InsideDescribe* mDescribe;

private:
    friend class TensorUtils;
};

1.1.3.1 Tensor::InsideDescribe

// source/core/TensorUtils.hpp
struct Tensor::InsideDescribe {
    struct View {
        int32_t offset = 0;
        int32_t stride[3] = {1, 1, 1};
    };
    struct Region {
        View src;
        View dst;
        int32_t size[3] = {1, 1, 1};
        Tensor* origin;
    };
    struct pad {
        int32_t left = 0;
        int32_t right = 0;
        int32_t bottom = 0;
        int32_t top = 0;
    };
    enum MemoryType {
        /** The tensor's memory come from Backend */
        MEMORY_BACKEND = 0,

        /** host memory is owned by tensor or not */
        MEMORY_HOST,

        /** The tensor don't has memory */
        MEMORY_VIRTUAL,

        /** host memory is owned by tensor or not */
        MEMORY_OUTSIDE,
    };
    enum Usage {
        NORMAL,
        INPUT,
        OUTPUT,
        CONSTANT,
        /** Whether the tensor is a trainable parameter. Trainable parameter should be stored in a different area. */
        TRAINABLE,
    };
    // For Mask
    enum StageInfo {
        GEOMETRY_STAGE = 1,
        CONVERTED_STAGE = 1 << 4
    };
    /** extra tensor info container */
    struct NativeInsideDescribe : public RefCount {
    public:
        /** dimension format */
        MNN_DATA_FORMAT dimensionFormat = MNN_DATA_FORMAT_NC4HW4;
        union {
            /** Serperate memory offset*/
            int offset;

            /** function used to free handle */
            void (*handleFreeFunction)(void*);
        } extra;
        MemoryType memoryType = MEMORY_BACKEND;
        /** for DEVICE tensor only. */
        int useCount = 0;
        Usage usage = NORMAL;
        std::vector<Region> regions;
        halide_dimension_t dims[MNN_MAX_TENSOR_DIM];
        // TensorArray Attribute
        std::shared_ptr<TensorArrayAttr> tensorArrayAttr;
        // Tensor Quant Attribute
        std::shared_ptr<QuantAttr> quantAttr;
        // Only valid when quantAttr is not nullptr
        DataType type = DataType_DT_FLOAT;
        AutoRelease<Backend::MemObj> mem;
        bool isMutable = true;
        int index = -1;
    int channel_pack_num = 4;
        bool support_pack16 = true;
        pad mPads;
        // For isMutable = false Tensor , determine whether the content can be convert to main backend
        uint32_t stageMask = 0;
        inline Backend* getBackend() const {
            return backend;
        }
        inline void setBackend(Backend* bn) {
            backend = bn;
        }
    private:
        /** for DEVICE tensor only. backend used to manage tensor's device memory. */
        Backend* backend = nullptr;
    };
    SharedPtr<NativeInsideDescribe> mContent;
};

1.1.3.1.1 NativeInsideDescribe

// source/core/TensorUtils.hpp
/** extra tensor info container */
    struct NativeInsideDescribe : public RefCount {
    public:
        /** dimension format */
        MNN_DATA_FORMAT dimensionFormat = MNN_DATA_FORMAT_NC4HW4;
        union {
            /** Serperate memory offset*/
            int offset;

            /** function used to free handle */
            void (*handleFreeFunction)(void*);
        } extra;
        MemoryType memoryType = MEMORY_BACKEND;
        /** for DEVICE tensor only. */
        int useCount = 0;
        Usage usage = NORMAL;
        std::vector<Region> regions;
        halide_dimension_t dims[MNN_MAX_TENSOR_DIM];
        // TensorArray Attribute
        std::shared_ptr<TensorArrayAttr> tensorArrayAttr;
        // Tensor Quant Attribute
        std::shared_ptr<QuantAttr> quantAttr;
        // Only valid when quantAttr is not nullptr
        DataType type = DataType_DT_FLOAT;
        AutoRelease<Backend::MemObj> mem;
        bool isMutable = true;
        int index = -1;
    int channel_pack_num = 4;
        bool support_pack16 = true;
        pad mPads;
        // For isMutable = false Tensor , determine whether the content can be convert to main backend
        uint32_t stageMask = 0;
        inline Backend* getBackend() const {
            return backend;
        }
        inline void setBackend(Backend* bn) {
            backend = bn;
        }
    private:
        /** for DEVICE tensor only. backend used to manage tensor's device memory. */
        Backend* backend = nullptr;
    };  

1.1.3.1.1.1 RefCount

// source/core/AutoStorage.h
class RefCount
{
    public:
        void addRef() const
        {
            mNum++;
        }
        void decRef() const
        {
            --mNum;
            MNN_ASSERT(mNum>=0);
            if (0 >= mNum)
            {
                delete this;
            }
        }
    inline int count() const{return mNum;}
    protected:
        RefCount():mNum(1){}
        RefCount(const RefCount& f):mNum(f.mNum){}
        void operator=(const RefCount& f)
        {
            if (this != &f)
            {
                mNum = f.mNum;
            }
        }
        virtual ~RefCount(){}
    private:
        mutable int mNum;
};


相关实践学习
部署Stable Diffusion玩转AI绘画(GPU云服务器)
本实验通过在ECS上从零开始部署Stable Diffusion来进行AI绘画创作,开启AIGC盲盒。
目录
相关文章
|
3月前
Scheduler 【ChatGPT】
Scheduler 【ChatGPT】
|
3月前
Scheduler pelt c program 【ChatGPT】
Scheduler pelt c program 【ChatGPT】
|
6月前
|
Kubernetes 监控 调度
K8S中Scheduler原理分析
【6月更文挑战第20天】K8S Scheduler是集群的关键组件,它监听API Server,为新Pod选择合适的Node。
|
7月前
|
资源调度
在SchedulerX中,你可以使用`schedulerx.output()`函数来向Worker报告运行结果
【1月更文挑战第7天】【1月更文挑战第35篇】在SchedulerX中,你可以使用`schedulerx.output()`函数来向Worker报告运行结果
51 1
|
7月前
|
资源调度 分布式计算 算法
Gang Scheduling
Gang Scheduling(Coscheduling)、FIFO Scheduling、Capacity Scheduling、Fair sharing、Binpack/Spread等是云计算和分布式系统中的任务调度算法,用于在资源有限的情况下,公平、高效地分配任务和资源。下面是这些调度算法的基本介绍和如何在实际应用中使用它们的一些建议:
308 2
|
Kubernetes 算法 调度
基于kube-scheduler-simulator编写自己的调度程序
基于kube-scheduler-simulator编写自己的调度程序
157 0
|
API iOS开发 MacOS
【TVM 学习资料】使用 Auto-scheduling 优化算子
【TVM 学习资料】使用 Auto-scheduling 优化算子
133 0
|
Java 调度
Leetcode-Medium 621. Task Scheduler
Leetcode-Medium 621. Task Scheduler
118 0
Leetcode-Medium 621. Task Scheduler