create tablespace 与 heap_insert 函数

简介:

先说 heap_insert 函数:

复制代码
/*
 *    heap_insert        - insert tuple into a heap
 *
 * The new tuple is stamped with current transaction ID and the specified
 * command ID.
 *
 * If the HEAP_INSERT_SKIP_WAL option is specified, the new tuple is not
 * logged in WAL, even for a non-temp relation.  Safe usage of this behavior
 * requires that we arrange that all new tuples go into new pages not
 * containing any tuples from other transactions, and that the relation gets
 * fsync'd before commit.  (See also heap_sync() comments)
 *
 * The HEAP_INSERT_SKIP_FSM option is passed directly to
 * RelationGetBufferForTuple, which see for more info.
 *
 * Note that these options will be applied when inserting into the heap's
 * TOAST table, too, if the tuple requires any out-of-line data.
 *
 * The BulkInsertState object (if any; bistate can be NULL for default
 * behavior) is also just passed through to RelationGetBufferForTuple.
 *
 * The return value is the OID assigned to the tuple (either here or by the
 * caller), or InvalidOid if no OID.  The header fields of *tup are updated
 * to match the stored tuple; in particular tup->t_self receives the actual
 * TID where the tuple was stored.    But note that any toasting of fields
 * within the tuple data is NOT reflected into *tup.
 */
Oid
heap_insert(Relation relation, HeapTuple tup, CommandId cid,
            int options, BulkInsertState bistate)
{
    /**
    Form_pg_class tmprel = relation->rd_rel;
    NameData    tmprelname = tmprel->relname;
    fprintf(stderr,"Insert into: %s\n", tmprelname.data);
    fprintf(stderr,"In heap_insert,going to insert into table:%d \n\n",relation->rd_node.relNode );
   */
TransactionId xid = GetCurrentTransactionId(); HeapTuple heaptup; Buffer buffer; bool all_visible_cleared = false; if (relation->rd_rel->relhasoids) { #ifdef NOT_USED /* this is redundant with an Assert in HeapTupleSetOid */ Assert(tup->t_data->t_infomask & HEAP_HASOID); #endif /* * If the object id of this tuple has already been assigned, trust the * caller. There are a couple of ways this can happen. At initial db * creation, the backend program sets oids for tuples. When we define * an index, we set the oid. Finally, in the future, we may allow * users to set their own object ids in order to support a persistent * object store (objects need to contain pointers to one another). */ if (!OidIsValid(HeapTupleGetOid(tup))) HeapTupleSetOid(tup, GetNewOid(relation)); } else { /* check there is not space for an OID */ Assert(!(tup->t_data->t_infomask & HEAP_HASOID)); } tup->t_data->t_infomask &= ~(HEAP_XACT_MASK); tup->t_data->t_infomask2 &= ~(HEAP2_XACT_MASK); tup->t_data->t_infomask |= HEAP_XMAX_INVALID; HeapTupleHeaderSetXmin(tup->t_data, xid); HeapTupleHeaderSetCmin(tup->t_data, cid); HeapTupleHeaderSetXmax(tup->t_data, 0); /* for cleanliness */ tup->t_tableOid = RelationGetRelid(relation); /* * If the new tuple is too big for storage or contains already toasted * out-of-line attributes from some other relation, invoke the toaster. * * Note: below this point, heaptup is the data we actually intend to store * into the relation; tup is the caller's original untoasted data. */ if (relation->rd_rel->relkind != RELKIND_RELATION) { /* toast table entries should never be recursively toasted */ Assert(!HeapTupleHasExternal(tup)); heaptup = tup; } else if (HeapTupleHasExternal(tup) || tup->t_len > TOAST_TUPLE_THRESHOLD) heaptup = toast_insert_or_update(relation, tup, NULL, options); else heaptup = tup; /* * We're about to do the actual insert -- but check for conflict first, * to avoid possibly having to roll back work we've just done. * * For a heap insert, we only need to check for table-level SSI locks. * Our new tuple can't possibly conflict with existing tuple locks, and * heap page locks are only consolidated versions of tuple locks; they do * not lock "gaps" as index page locks do. So we don't need to identify * a buffer before making the call. */ CheckForSerializableConflictIn(relation, NULL, InvalidBuffer); /* Find buffer to insert this tuple into */ buffer = RelationGetBufferForTuple(relation, heaptup->t_len, InvalidBuffer, options, bistate); /* NO EREPORT(ERROR) from here till changes are logged */ START_CRIT_SECTION(); RelationPutHeapTuple(relation, buffer, heaptup); if (PageIsAllVisible(BufferGetPage(buffer))) { all_visible_cleared = true; PageClearAllVisible(BufferGetPage(buffer)); } /* * XXX Should we set PageSetPrunable on this page ? * * The inserting transaction may eventually abort thus making this tuple * DEAD and hence available for pruning. Though we don't want to optimize * for aborts, if no other tuple in this page is UPDATEd/DELETEd, the * aborted tuple will never be pruned until next vacuum is triggered. * * If you do add PageSetPrunable here, add it in heap_xlog_insert too. */ MarkBufferDirty(buffer); /* XLOG stuff */ if (!(options & HEAP_INSERT_SKIP_WAL) && RelationNeedsWAL(relation)) { xl_heap_insert xlrec; xl_heap_header xlhdr; XLogRecPtr recptr; XLogRecData rdata[3]; Page page = BufferGetPage(buffer); uint8 info = XLOG_HEAP_INSERT; xlrec.all_visible_cleared = all_visible_cleared; xlrec.target.node = relation->rd_node; xlrec.target.tid = heaptup->t_self; rdata[0].data = (char *) &xlrec; rdata[0].len = SizeOfHeapInsert; rdata[0].buffer = InvalidBuffer; rdata[0].next = &(rdata[1]); xlhdr.t_infomask2 = heaptup->t_data->t_infomask2; xlhdr.t_infomask = heaptup->t_data->t_infomask; xlhdr.t_hoff = heaptup->t_data->t_hoff; /* * note we mark rdata[1] as belonging to buffer; if XLogInsert decides * to write the whole page to the xlog, we don't need to store * xl_heap_header in the xlog. */ rdata[1].data = (char *) &xlhdr; rdata[1].len = SizeOfHeapHeader; rdata[1].buffer = buffer; rdata[1].buffer_std = true; rdata[1].next = &(rdata[2]); /* PG73FORMAT: write bitmap [+ padding] [+ oid] + data */ rdata[2].data = (char *) heaptup->t_data + offsetof(HeapTupleHeaderData, t_bits); rdata[2].len = heaptup->t_len - offsetof(HeapTupleHeaderData, t_bits); rdata[2].buffer = buffer; rdata[2].buffer_std = true; rdata[2].next = NULL; /* * If this is the single and first tuple on page, we can reinit the * page instead of restoring the whole thing. Set flag, and hide * buffer references from XLogInsert. */ if (ItemPointerGetOffsetNumber(&(heaptup->t_self)) == FirstOffsetNumber && PageGetMaxOffsetNumber(page) == FirstOffsetNumber) { info |= XLOG_HEAP_INIT_PAGE; rdata[1].buffer = rdata[2].buffer = InvalidBuffer; } recptr = XLogInsert(RM_HEAP_ID, info, rdata); PageSetLSN(page, recptr); PageSetTLI(page, ThisTimeLineID); } END_CRIT_SECTION(); UnlockReleaseBuffer(buffer); /* Clear the bit in the visibility map if necessary */ if (all_visible_cleared) visibilitymap_clear(relation, ItemPointerGetBlockNumber(&(heaptup->t_self))); /* * If tuple is cachable, mark it for invalidation from the caches in case * we abort. Note it is OK to do this after releasing the buffer, because * the heaptup data structure is all in local memory, not in the shared * buffer. */ CacheInvalidateHeapTuple(relation, heaptup); pgstat_count_heap_insert(relation); /* * If heaptup is a private copy, release it. Don't forget to copy t_self * back to the caller's image, too. */ if (heaptup != tup) { tup->t_self = heaptup->t_self; heap_freetuple(heaptup); } return HeapTupleGetOid(tup); }
复制代码

我如果执行一个普通的sql文,则可以加入这样的调试代码,来看看我是否确实向我想要的表中插入数据:

    /**
    Form_pg_class tmprel = relation->rd_rel;
    NameData    tmprelname = tmprel->relname;
    fprintf(stderr,"Insert into: %s\n", tmprelname.data);
    fprintf(stderr,"In heap_insert,going to insert into table:%d \n\n",relation->rd_node.relNode );
   */

当我执行 create tablespace的时候,我想它是要写入数据字典的。

但是上述代码反应出来的relNode是不正确的,而 relname也是空的值。

然后我从更高测调用层面来观察:

当我执行 create tablespace的时候,调用关系如下:

PostgresMain-->exec_simple_query-->PortalRun-->PortalRunMulti-->PortalRunUtility-->Createtablespace

-->simple_heap_insert-->heap_insert

再看 Createtablespace函数:

复制代码
/*
 * Create a table space
 *
 * Only superusers can create a tablespace. This seems a reasonable restriction
 * since we're determining the system layout and, anyway, we probably have
 * root if we're doing this kind of activity
 */
void
CreateTableSpace(CreateTableSpaceStmt *stmt)
{
#ifdef HAVE_SYMLINK
    Relation    rel;
    Datum        values[Natts_pg_tablespace];
    bool        nulls[Natts_pg_tablespace];
    HeapTuple    tuple;
    Oid            tablespaceoid;
    char       *location;
    Oid            ownerId;

    /* Must be super user */
    if (!superuser())
        ereport(ERROR,
                (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
                 errmsg("permission denied to create tablespace \"%s\"",
                        stmt->tablespacename),
                 errhint("Must be superuser to create a tablespace.")));

    /* However, the eventual owner of the tablespace need not be */
    if (stmt->owner)
        ownerId = get_role_oid(stmt->owner, false);
    else
        ownerId = GetUserId();

    /* Unix-ify the offered path, and strip any trailing slashes */
    location = pstrdup(stmt->location);
    canonicalize_path(location);

    /* disallow quotes, else CREATE DATABASE would be at risk */
    if (strchr(location, '\''))
        ereport(ERROR,
                (errcode(ERRCODE_INVALID_NAME),
                 errmsg("tablespace location cannot contain single quotes")));

    /*
     * Allowing relative paths seems risky
     *
     * this also helps us ensure that location is not empty or whitespace
     */
    if (!is_absolute_path(location))
        ereport(ERROR,
                (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
                 errmsg("tablespace location must be an absolute path")));

    /*
     * Check that location isn't too long. Remember that we're going to append
     * 'PG_XXX/<dboid>/<relid>.<nnn>'.    FYI, we never actually reference the
     * whole path, but mkdir() uses the first two parts.
     */
    if (strlen(location) + 1 + strlen(TABLESPACE_VERSION_DIRECTORY) + 1 +
        OIDCHARS + 1 + OIDCHARS + 1 + OIDCHARS > MAXPGPATH)
        ereport(ERROR,
                (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
                 errmsg("tablespace location \"%s\" is too long",
                        location)));

    /*
     * Disallow creation of tablespaces named "pg_xxx"; we reserve this
     * namespace for system purposes.
     */
    if (!allowSystemTableMods && IsReservedName(stmt->tablespacename))
        ereport(ERROR,
                (errcode(ERRCODE_RESERVED_NAME),
                 errmsg("unacceptable tablespace name \"%s\"",
                        stmt->tablespacename),
        errdetail("The prefix \"pg_\" is reserved for system tablespaces.")));

    /*
     * Check that there is no other tablespace by this name.  (The unique
     * index would catch this anyway, but might as well give a friendlier
     * message.)
     */
    if (OidIsValid(get_tablespace_oid(stmt->tablespacename, true)))
        ereport(ERROR,
                (errcode(ERRCODE_DUPLICATE_OBJECT),
                 errmsg("tablespace \"%s\" already exists",
                        stmt->tablespacename)));

    /*
     * Insert tuple into pg_tablespace.  The purpose of doing this first is to
     * lock the proposed tablename against other would-be creators. The
     * insertion will roll back if we find problems below.
     */
    rel = heap_open(TableSpaceRelationId, RowExclusiveLock);

    MemSet(nulls, false, sizeof(nulls));

    values[Anum_pg_tablespace_spcname - 1] =
        DirectFunctionCall1(namein, CStringGetDatum(stmt->tablespacename));
    values[Anum_pg_tablespace_spcowner - 1] =
        ObjectIdGetDatum(ownerId);
    values[Anum_pg_tablespace_spclocation - 1] =
        CStringGetTextDatum(location);
    nulls[Anum_pg_tablespace_spcacl - 1] = true;
    nulls[Anum_pg_tablespace_spcoptions - 1] = true;

    tuple = heap_form_tuple(rel->rd_att, values, nulls);

    tablespaceoid = simple_heap_insert(rel, tuple);

    CatalogUpdateIndexes(rel, tuple);

    heap_freetuple(tuple);

    /* Record dependency on owner */
    recordDependencyOnOwner(TableSpaceRelationId, tablespaceoid, ownerId);

    /* Post creation hook for new tablespace */
    InvokeObjectAccessHook(OAT_POST_CREATE,
                           TableSpaceRelationId, tablespaceoid, 0);

    create_tablespace_directories(location, tablespaceoid);

    /* Record the filesystem change in XLOG */
    {
        xl_tblspc_create_rec xlrec;
        XLogRecData rdata[2];

        xlrec.ts_id = tablespaceoid;
        rdata[0].data = (char *) &xlrec;
        rdata[0].len = offsetof(xl_tblspc_create_rec, ts_path);
        rdata[0].buffer = InvalidBuffer;
        rdata[0].next = &(rdata[1]);

        rdata[1].data = (char *) location;
        rdata[1].len = strlen(location) + 1;
        rdata[1].buffer = InvalidBuffer;
        rdata[1].next = NULL;

        (void) XLogInsert(RM_TBLSPC_ID, XLOG_TBLSPC_CREATE, rdata);
    }

    /*
     * Force synchronous commit, to minimize the window between creating the
     * symlink on-disk and marking the transaction committed.  It's not great
     * that there is any window at all, but definitely we don't want to make
     * it larger than necessary.
     */
    ForceSyncCommit();

    pfree(location);

    /* We keep the lock on pg_tablespace until commit */
    heap_close(rel, NoLock);
#else                            /* !HAVE_SYMLINK */
    ereport(ERROR,
            (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
             errmsg("tablespaces are not supported on this platform")));
#endif   /* HAVE_SYMLINK */
}
复制代码
复制代码
/*
 *    simple_heap_insert - insert a tuple
 *
 * Currently, this routine differs from heap_insert only in supplying
 * a default command ID and not allowing access to the speedup options.
 *
 * This should be used rather than using heap_insert directly in most places
 * where we are modifying system catalogs.
 */
Oid
simple_heap_insert(Relation relation, HeapTuple tup)
{
    return heap_insert(relation, tup, GetCurrentCommandId(true), 0, NULL);
}
复制代码

我把它简练化,看看它都干了什么:

复制代码
/*
 * Create a table space
 *
 * Only superusers can create a tablespace. This seems a reasonable restriction
 * since we're determining the system layout and, anyway, we probably have
 * root if we're doing this kind of activity
 */
void
CreateTableSpace(CreateTableSpaceStmt *stmt)
{
    ......
    /*
     * Insert tuple into pg_tablespace.  The purpose of doing this first is to
     * lock the proposed tablename against other would-be creators. The
     * insertion will roll back if we find problems below.
     */
    rel = heap_open(TableSpaceRelationId, RowExclusiveLock);
    ......
    tablespaceoid = simple_heap_insert(rel, tuple);
    ......
}
复制代码

而 heap_open(TableSpaceRelationId, RowExclusiveLock) 这一句,
里面的 TableSpaceRelationId其实是宏:

/* ----------------
 *        pg_tablespace definition.  cpp turns this into
 *        typedef struct FormData_pg_tablespace
 * ----------------
 */
#define TableSpaceRelationId  1213

而如果想要看到值,可以运行下面的语句,恰好 1213 对应的就是 pg_tablespace 表。

复制代码
[pgsql@localhost bin]$ ./psql
psql (9.1.2)
Type "help" for help.

pgsql=# select 1213::regclass;
   regclass    
---------------
 pg_tablespace
(1 row)

pgsql=# 
复制代码

但是,实际上,pg_tablespace是数据字典,而数据库的目录中,并不存在一个单独的1213文件与之对应。

如果我用上述的:

    /**
    Form_pg_class tmprel = relation->rd_rel;
    NameData    tmprelname = tmprel->relname;
    fprintf(stderr,"Insert into: %s\n", tmprelname.data);
    fprintf(stderr,"In heap_insert,going to insert into table:%d \n\n",relation->rd_node.relNode );
   */

来看,就会知道 relNode是 12587。

我可以在 global目录下,找到这个 12587文件。

1213 对应着 12587文件。这是一个比较怪异的事情。









本文转自健哥的数据花园博客园博客,原文链接:http://www.cnblogs.com/gaojian/p/3167337.html,如需转载请自行联系原作者


目录
相关文章
|
SQL 关系型数据库
[WorkLog] InnoDB Faster truncate/drop table space
这个系列, 介绍upstream 一些有意思的worklog **问题** 在InnoDB 现有的版本里面, 如果一个table space 被truncated 或者 drop 的时候, 比如有一个连接创建了临时表, 连接断开以后, 对应的临时表都需要进行drop 操作. InnoDB 是需要将该tablespace 对应的所有的page 从LRU/FLUSH li
421 0
|
SQL 关系型数据库
ORA-1652: unable to extend temp segment by 128 in tablespace xxx Troubleshootin
当收到告警信息ORA-01652: unable to extend temp segment by 128 in tablespace xxxx 时,如何Troubleshooting ORA-1652这样的问题呢? 当然一般xxx是临时表空间,也有可能是用户表空间。
2045 0