透过pageinspect了解PostgreSQl中的page-阿里云开发者社区

博客 PostgreSQL的Page分析记录有过对page的大体介绍，可以参看之前的blog，下面主要通过插件 pageinspect 向大家进行介绍。

      在此之前需要了解的名词：
      page，物理文件的单位，默认大小为8K。
      tuple，PG中物理行。
      xid，事务号，执行操作时的顺序id。

        pageinspect里边有三个函数是本文用到的，他们分别是：
      a.get_raw_page，根据参数表明、数据文件类型（main、fsm、vm）以及page位置，将当前表文件中的page内容返回。还有一个函数于此同名，只有两个参数，是将第二个参数省略，直接使用'main'。
      b.page_header，参数是函数get_raw_page的返回值，返回值是将本page结构中的PageHeaderData详细信息。
      c.heap_page_items,参数是函数get_raw_page的返回值，返回值是将page内的项指针(ItemIddata)以及HeapTupleHeaderData的详细信息。

下面通过例子来对page的PageHeaderData、ItemIddata和HeapTupleHeaderData进行说明。
a.建表mvcc

    [postgres@localhost bin]$ ./psql 
   

    psql (9.4.5)
   

    Type "help" for help.
   

    postgres=# create table mvcc(id int);
   

    CREATE TABLE
   

    postgres=# insert into mvcc values (1),(2);
   

    INSERT 0 2
   

b.PageHeaderData

    postgres=# select * from page_header(get_raw_page('mvcc', 'main', 0));
   

        lsn    | checksum | flags | lower | upper | special | pagesize | version | p
   

    rune_xid 
   

    -----------+----------+-------+-------+-------+---------+----------+---------+--
   

    ---------
   

     0/300F4D8 |        0 |     0 |    32 |  8128 |    8192 |     8192 |       4 |  
   

0

    (1 row)
   

这是表mvcc的PageHeaderData信息，现在比对源码对各个字段进行解释：

    typedef struct PageHeaderData
   

{

        /* XXX LSN is member of *any* block, not only page-organized ones */
   

        PageXLogRecPtr pd_lsn;      /* LSN: next byte after last byte of xlog
   

                                     * record for last change to this page */
   

        uint16      pd_checksum;    /* checksum */
   

        uint16      pd_flags;       /* flag bits, see below */
   

        LocationIndex pd_lower;     /* offset to start of free space */
   

        LocationIndex pd_upper;     /* offset to end of free space */
   

        LocationIndex pd_special;   /* offset to start of special space */
   

        uint16      pd_pagesize_version;
   

        TransactionId pd_prune_xid; /* oldest prunable XID, or zero if none */
   

        ItemIdData  pd_linp[1];     /* beginning of line pointer array */
   

    } PageHeaderData;
   

      pg_lsn：记录最后一次对page修改的xlog记录id。
      pg_checksum：页面的校验和，主要是通过函数pg_checksum_block函数生成的，0也是有效地，参数为PageHeaderData和BLCKSZ(page's size)。当校验和验证失败，即认为当前页面无效。
      pg_flags：page的flags，具体值为，可以叠加：

    #define PD_HAS_FREE_LINES   0x0001      /* are there any unused line pointers? */
   

    #define PD_PAGE_FULL        0x0002      /* not enough free space for new
   

                                             * tuple? */
   

    #define PD_ALL_VISIBLE      0x0004      /* all tuples on page are visible to
   

                                             * everyone */
   

    #define PD_VALID_FLAG_BITS  0x0007      /* OR of all valid pd_flags bits */
   

      pg_lower和pg_upper：最后一个项指针的位置和最新的tuple位置。主要进行查找空闲位置，进行插入工作。
      pg_special：page预留的位置，可以存储索引等信息。
      pg_pagesize_version：page大小以及当前版本。page大小可以通过configure进行设置。version的意思是

/*

     * Page layout version number 0 is for pre-7.3 Postgres releases.
   

     * Releases 7.3 and 7.4 use 1, denoting a new HeapTupleHeader layout.
   

     * Release 8.0 uses 2; it changed the HeapTupleHeader layout again.
   

     * Release 8.1 uses 3; it redefined HeapTupleHeader infomask bits.
   

     * Release 8.3 uses 4; it changed the HeapTupleHeader layout again, and
   

     *      added the pd_flags field (by stealing some bits from pd_tli),
   

     *      as well as adding the pd_prune_xid field (which enlarges the header).
   

*

     * As of Release 9.3, the checksum version must also be considered when
   

     * handling pages.
   

*/

pg_prune_xid：一般是最后一次删除或者更新的xid。

pg_linp：项指针。
c.ItemIddata

    postgres=# select lp,lp_off,lp_flags,lp_len from heap_page_items(get_raw_page('mvcc', 'main', 0));
   

     lp | lp_off | lp_flags | lp_len 
   

    ----+--------+----------+--------
   

      1 |   8160 |        1 |     28
   

      2 |   8128 |        1 |     28
   

    (2 rows)
   

这是表mvcc的项指针的信息，一样通过结合源码进行介绍：

    typedef struct ItemIdData
   

{

        unsigned    lp_off:15,      /* offset to tuple (from start of page) */
   

                    lp_flags:2,     /* state of item pointer, see below */
   

                    lp_len:15;      /* byte length of tuple */
   

    } ItemIdData;
   

      lp：这是插件自己定义的列，在源码中其实没有，这个是项指针的顺序。
      lp_off：tuple在page中的位置。
      lp_flags：tuple的flags，具体为

    #define LP_UNUSED       0       /* unused (should always have lp_len=0) */
   

    #define LP_NORMAL       1       /* used (should always have lp_len>0) */
   

    #define LP_REDIRECT     2       /* HOT redirect (should have lp_len=0) */
   

    #define LP_DEAD         3       /* dead, may or may not have storage */
   

lp_len： HeapTupleHeaderData 的长度+Oid的长度(8，因为要数据对齐，所以在这里会比原来预计的多4)。

d.HeapTupleHeaderData

    postgres=# select * from heap_page_items(get_raw_page('mvcc', 'main', 0));
   

     lp | lp_off | lp_flags | lp_len | t_xmin | t_xmax | t_field3 | t_ctid | t_infom
   

    ask2 | t_infomask | t_hoff | t_bits | t_oid 
   

    ----+--------+----------+--------+--------+--------+----------+--------+--------
   

    -----+------------+--------+--------+-------
   

      1 |   8160 |        1 |     28 |   1831 |      0 |        0 | (0,1)  |        
   

       1 |       2048 |     24 |        |      
   

      2 |   8128 |        1 |     28 |   1831 |      0 |        0 | (0,2)  |        
   

       1 |       2048 |     24 |        |      
   

    (2 rows)
   

同样结合源码进行介绍：

    typedef struct HeapTupleFields
   

{

        TransactionId t_xmin;       /* inserting xact ID */
   

        TransactionId t_xmax;       /* deleting or locking xact ID */
   

        union
   

{

            CommandId   t_cid;      /* inserting or deleting command ID, or both */
   

            TransactionId t_xvac;   /* old-style VACUUM FULL xact ID */
   

        }           t_field3;
   

    } HeapTupleFields;
   

    typedef struct DatumTupleFields
   

{

     int32      datum_len_;     /* varlena header (do not touch directly!) */
   

     int32      datum_typmod;   /* -1, or identifier of a record type */
   

     Oid            datum_typeid;   /* composite type OID, or RECORDOID */
   

/*

     * Note: field ordering is chosen with thought that Oid might someday
   

     * widen to 64 bits.
   

*/

    } DatumTupleFields;
   

    struct HeapTupleHeaderData
   

{

        union
   

{

            HeapTupleFields t_heap;
   

            DatumTupleFields t_datum;
   

        }           t_choice;
   

        ItemPointerData t_ctid;     /* current TID of this or newer tuple */
   

        /* Fields below here must match MinimalTupleData! */
   

        uint16      t_infomask2;    /* number of attributes + various flags */
   

        uint16      t_infomask;     /* various flag bits, see below */
   

        uint8       t_hoff;         /* sizeof header incl. bitmap, padding */
   

        /* ^ - 23 bytes - ^ */
   

        bits8       t_bits[1];      /* bitmap of NULLs -- VARIABLE LENGTH */
   

        /* MORE DATA FOLLOWS AT END OF STRUCT */
   

};

      xmin和xmax是插入、删除和更新时的事务ID，插入时会在xmin内写入当前事务ID，当删除时就会在xmax写入当前事务ID。更新是进行删除后再插入。
      t_cid：这个是指一个事务内的命令ID，每个事务都是从0开始。
      t_ctid：这个是指物理ID，结构如下：

    typedef struct ItemPointerData
   

{

        BlockIdData ip_blkid;
   

        OffsetNumber ip_posid;
   

}

    typedef struct BlockIdData
   

{

        uint16      bi_hi;
   

        uint16      bi_lo;
   

    } BlockIdData;
   

存储的为bi_hi（文件号） << 16 | bi_lo（page号），来获取磁盘顺序，ip_posid是在page的中序号。以此来准确定位数据。
t_infomask2：表字段的个数以及一些flags，flags如下：

    #define HEAP_NATTS_MASK         0x07FF  /* 11 bits for number of attributes */
   

    /* bits 0x1800 are available */
   

    #define HEAP_KEYS_UPDATED       0x2000  /* tuple was updated and key cols
   

                                             * modified, or tuple deleted */
   

    #define HEAP_HOT_UPDATED        0x4000  /* tuple was HOT-updated */
   

    #define HEAP_ONLY_TUPLE         0x8000  /* this is heap-only tuple */
   

    #define HEAP2_XACT_MASK         0xE000  /* visibility-related bits */
   

t_infomask：tuple的flags，如下：

    #define HEAP_HASNULL            0x0001  /* has null attribute(s) */
   

    #define HEAP_HASVARWIDTH        0x0002  /* has variable-width attribute(s) */
   

    #define HEAP_HASEXTERNAL        0x0004  /* has external stored attribute(s) */
   

    #define HEAP_HASOID             0x0008  /* has an object-id field */
   

    #define HEAP_XMAX_KEYSHR_LOCK   0x0010  /* xmax is a key-shared locker */
   

    #define HEAP_COMBOCID           0x0020  /* t_cid is a combo cid */
   

    #define HEAP_XMAX_EXCL_LOCK     0x0040  /* xmax is exclusive locker */
   

    #define HEAP_XMAX_LOCK_ONLY     0x0080  /* xmax, if valid, is only a locker */
   

     /* xmax is a shared locker */
   

    #define HEAP_XMAX_SHR_LOCK  (HEAP_XMAX_EXCL_LOCK | HEAP_XMAX_KEYSHR_LOCK)
   

    #define HEAP_LOCK_MASK  (HEAP_XMAX_SHR_LOCK | HEAP_XMAX_EXCL_LOCK | \
   

                             HEAP_XMAX_KEYSHR_LOCK)
   

    #define HEAP_XMIN_COMMITTED     0x0100  /* t_xmin committed */
   

    #define HEAP_XMIN_INVALID       0x0200  /* t_xmin invalid/aborted */
   

    #define HEAP_XMIN_FROZEN        (HEAP_XMIN_COMMITTED|HEAP_XMIN_INVALID)
   

    #define HEAP_XMAX_COMMITTED     0x0400  /* t_xmax committed */
   

    #define HEAP_XMAX_INVALID       0x0800  /* t_xmax invalid/aborted */
   

    #define HEAP_XMAX_IS_MULTI      0x1000  /* t_xmax is a MultiXactId */
   

    #define HEAP_UPDATED            0x2000  /* this is UPDATEd version of row */
   

    #define HEAP_MOVED_OFF          0x4000  /* moved to another place by pre-9.0
   

t_hoff： HeapTupleHeaderData长度，如果有Oid会增加4，但由于受到对齐的影响，会增加8。
t_bits：具体数据，可以参照 PostgreSQL的基础数据类型分析记录。

透过pageinspect了解PostgreSQl中的page

热门文章

最新文章

相关课程

相关电子书

相关实验场景

推荐镜像

热门

活动广场

任务中心

开发者评测

高校计划

乘风者计划

训练营

阿里云MVP

话题

直播

下载

镜像站

技术资料

插件