PostgreSQL在何处处理 sql查询之十七-阿里云开发者社区

PostgreSQL在何处处理 sql查询之十七

2017-12-12 1225

版权

本文内容由阿里云实名注册用户自发贡献，版权归原作者所有，阿里云开发者社区不拥有其著作权，亦不承担相应法律责任。具体规则请查看《阿里云开发者社区用户服务协议》和《阿里云开发者社区知识产权保护指引》。如果您发现本社区中有涉嫌抄袭的内容，填写侵权投诉表单进行举报，一经查实，本社区将立刻删除涉嫌侵权内容。

本文涉及的产品

云原生数据库 PolarDB PostgreSQL 版，标准版 2核4GB 50GB

云原生数据库 PolarDB MySQL 版，通用型 2核8GB 50GB

简介：

继续：

/*
 * estimate_rel_size - estimate # pages and # tuples in a table or index
 *
 * We also estimate the fraction of the pages that are marked all-visible in
 * the visibility map, for use in estimation of index-only scans.
 *
 * If attr_widths isn't NULL, it points to the zero-index entry of the
 * relation's attr_widths[] cache; we fill this in if we have need to compute
 * the attribute widths for estimation purposes.
 */
void
estimate_rel_size(Relation rel, int32 *attr_widths,
                  BlockNumber *pages, double *tuples, double *allvisfrac)
{
    ...
    switch (rel->rd_rel->relkind)
    {
        case RELKIND_RELATION:
        case RELKIND_INDEX:
        case RELKIND_TOASTVALUE:
            /* it has storage, ok to call the smgr */
            curpages = RelationGetNumberOfBlocks(rel);
            ...
            break;
        case RELKIND_SEQUENCE:
            ...
            break;
        case RELKIND_FOREIGN_TABLE:
            ...
            break;
        default:
            ...
            break;
    }
}

首先要判断此表有多少个块： RelationGetNumberOfBlocks

/*
 * The physical storage of a relation consists of one or more forks. The
 * main fork is always created, but in addition to that there can be
 * additional forks for storing various metadata. ForkNumber is used when
 * we need to refer to a specific fork in a relation.
 */
typedef enum ForkNumber
{
    InvalidForkNumber = -1,
    MAIN_FORKNUM = 0,
    FSM_FORKNUM,
    VISIBILITYMAP_FORKNUM,
    INIT_FORKNUM

    /*
     * NOTE: if you add a new fork, change MAX_FORKNUM below and update the
     * forkNames array in catalog.c
     */
} ForkNumber;

再看：

#define RelationGetNumberOfBlocks(reln) \
    RelationGetNumberOfBlocksInFork(reln, MAIN_FORKNUM)

再看：

/*
 * RelationGetNumberOfBlocks
 *        Determines the current number of pages in the relation.
 */
BlockNumber
RelationGetNumberOfBlocksInFork(Relation relation, ForkNumber forkNum)
{
    /* Open it at the smgr level if not already done */
    RelationOpenSmgr(relation);

    return smgrnblocks(relation->rd_smgr, forkNum);
}

再看：

数据库表对应的文件发生问题时，smgrnblocks 函数会发生错误：

/*
 *    smgrnblocks() -- Calculate the number of blocks in the
 *                     supplied relation.
 */
BlockNumber
smgrnblocks(SMgrRelation reln, ForkNumber forknum)
{
    return (*(smgrsw[reln->smgr_which].smgr_nblocks)) (reln, forknum);
}

此处，使用了函数指针，经过一番跟踪，发现当我第一次执行如 select * from tab01 命令时，会执行到：

/*
 *    mdnblocks() -- Get the number of blocks stored in a relation.
 *
 *        Important side effect: all active segments of the relation are opened
 *        and added to the mdfd_chain list.  If this routine has not been
 *        called, then only segments up to the last one actually touched
 *        are present in the chain.
 */
BlockNumber
mdnblocks(SMgrRelation reln, ForkNumber forknum)
{

    MdfdVec    *v = mdopen(reln, forknum, EXTENSION_FAIL);
    BlockNumber nblocks;
    BlockNumber segno = 0;

    /*
     * Skip through any segments that aren't the last one, to avoid redundant
     * seeks on them.  We have previously verified that these segments are
     * exactly RELSEG_SIZE long, and it's useless to recheck that each time.
     *
     * NOTE: this assumption could only be wrong if another backend has
     * truncated the relation.    We rely on higher code levels to handle that
     * scenario by closing and re-opening the md fd, which is handled via
     * relcache flush.    (Since the checkpointer doesn't participate in
     * relcache flush, it could have segment chain entries for inactive
     * segments; that's OK because the checkpointer never needs to compute
     * relation size.)
     */
    while (v->mdfd_chain != NULL)
    {
        segno++;
        v = v->mdfd_chain;
    }

    for (;;)
    {
        nblocks = _mdnblocks(reln, forknum, v);
        if (nblocks > ((BlockNumber) RELSEG_SIZE))
            elog(FATAL, "segment too big");
        if (nblocks < ((BlockNumber) RELSEG_SIZE))
            return (segno * ((BlockNumber) RELSEG_SIZE)) + nblocks;

        /*
         * If segment is exactly RELSEG_SIZE, advance to next one.
         */
        segno++;

        if (v->mdfd_chain == NULL)
        {
            /*
             * Because we pass O_CREAT, we will create the next segment (with
             * zero length) immediately, if the last segment is of length
             * RELSEG_SIZE.  While perhaps not strictly necessary, this keeps
             * the logic simple.
             */
            v->mdfd_chain = _mdfd_openseg(reln, forknum, segno, O_CREAT);
            if (v->mdfd_chain == NULL)
                ereport(ERROR,
                        (errcode_for_file_access(),
                         errmsg("could not open file \"%s\": %m",
                                _mdfd_segpath(reln, forknum, segno))));
        }

        v = v->mdfd_chain;
    }
}

下一步看 mdopen函数

/*
 *    mdopen() -- Open the specified relation.
 *
 * Note we only open the first segment, when there are multiple segments.
 *
 * If first segment is not present, either ereport or return NULL according
 * to "behavior".  We treat EXTENSION_CREATE the same as EXTENSION_FAIL;
 * EXTENSION_CREATE means it's OK to extend an existing relation, not to
 * invent one out of whole cloth.
 */
static MdfdVec *
mdopen(SMgrRelation reln, ForkNumber forknum, ExtensionBehavior behavior)
{
    ...

    path = relpath(reln->smgr_rnode, forknum);


    fd = PathNameOpenFile(path, O_RDWR | PG_BINARY, 0600);

if (fd < 0)
    {

        fprintf(stderr,"In %s----%d\n",__FUNCTION__, __LINE__);

        /*
         * During bootstrap, there are cases where a system relation will be
         * accessed (by internal backend processes) before the bootstrap
         * script nominally creates it.  Therefore, accept mdopen() as a
         * substitute for mdcreate() in bootstrap mode only. (See mdcreate)
         */
        if (IsBootstrapProcessingMode())
            fd = PathNameOpenFile(path, O_RDWR | O_CREAT | O_EXCL | PG_BINARY, 0600);

        fprintf(stderr,"In %s----%d\n",__FUNCTION__, __LINE__);

        if (fd < 0)
        {
            if (behavior == EXTENSION_RETURN_NULL &&
                FILE_POSSIBLY_DELETED(errno))
            {
                pfree(path);
                return NULL;
            }
            ereport(ERROR,
                    (errcode_for_file_access(),
                     errmsg("could not open file \"%s\": %m", path)));
        }
    }

    ...return mdfd;
}

再看 PathNameOpenFile，如果打开文件失败，就会返回-1。

/*
 * open a file in an arbitrary directory
 *
 * NB: if the passed pathname is relative (which it usually is),
 * it will be interpreted relative to the process' working directory
 * (which should always be $PGDATA when this code is running).
 */
File
PathNameOpenFile(FileName fileName, int fileFlags, int fileMode)
{
    char       *fnamecopy;
    File        file;
    Vfd           *vfdP;

    DO_DB(elog(LOG, "PathNameOpenFile: %s %x %o",
               fileName, fileFlags, fileMode));

    /*
     * We need a malloc'd copy of the file name; fail cleanly if no room.
     */
    fnamecopy = strdup(fileName);
    if (fnamecopy == NULL)
        ereport(ERROR,
                (errcode(ERRCODE_OUT_OF_MEMORY),
                 errmsg("out of memory")));

    file = AllocateVfd();
    vfdP = &VfdCache[file];

    while (nfile + numAllocatedDescs >= max_safe_fds)
    {
        if (!ReleaseLruFile())
            break;
    }

    vfdP->fd = BasicOpenFile(fileName, fileFlags, fileMode);

    if (vfdP->fd < 0)
    {
        FreeVfd(file);
        free(fnamecopy);
return -1;
    }
    ++nfile;
    DO_DB(elog(LOG, "PathNameOpenFile: success %d",
               vfdP->fd));

    Insert(file);

    vfdP->fileName = fnamecopy;
    /* Saved flags are adjusted to be OK for re-opening file */
    vfdP->fileFlags = fileFlags & ~(O_CREAT | O_TRUNC | O_EXCL);
    vfdP->fileMode = fileMode;
    vfdP->seekPos = 0;
    vfdP->fileSize = 0;
    vfdP->fdstate = 0x0;
    vfdP->resowner = NULL;

    return file;
}

本文转自健哥的数据花园博客园博客，原文链接：http://www.cnblogs.com/gaojian/archive/2013/05/23/3094966.html，如需转载请自行联系原作者

PostgreSQL在何处处理 sql查询之十七

热门文章

最新文章

相关课程

相关电子书

推荐镜像

探索云世界

热门

云计算

大数据

云原生

人工智能

数据库

开发与运维

活动广场

任务中心

训练营

直播

乘风者计划

下载

镜像站

技术资料

PostgreSQL在何处处理 sql查询之十七

热门文章

最新文章

相关课程

相关电子书

推荐镜像