postgresql 定时收集表和索引统计信息

本文涉及的产品
云原生数据库 PolarDB MySQL 版,通用型 2核4GB 50GB
云原生数据库 PolarDB PostgreSQL 版,标准版 2核4GB 50GB
简介: --由于pg中表和索引的信息收集都是基于时间点的,对于以往的信息无法与现在的信息进行对比,故写下此工具进行统计信息收集--创建数据信息的schemacreate schema db_stat;--创建收集信息的基础表create table db_stat.
--由于pg中表和索引的信息收集都是基于时间点的,对于以往的信息无法与现在的信息进行对比,故写下此工具进行统计信息收集

--创建数据信息的schema
create schema db_stat;
--创建收集信息的基础表
create table db_stat.snapshot_pg_stat_all_indexes
(relid int,indexrelid int,schemaname varchar(200),relname varchar(550),indexrelname varchar(550),idx_scan bigint,idx_tup_read bigint,idx_tup_fetch bigint,
snapid int,snap_create_time timestamp,host_ip cidr,host_port int,host_type varchar(20),comment varchar(500));
create index idx_stat_indexe_relname_indexrelname_createtime on db_stat.snapshot_pg_stat_all_indexes(relname varchar_pattern_ops,indexrelname varchar_pattern_ops,snap_create_time);
create index idx_stat_indexe_snapid on db_stat.snapshot_pg_stat_all_indexes(snapid);

create table db_stat.snapshot_pg_stat_all_tables
(relid int,schemaname varchar(200),relname varchar(550),seq_scan bigint,seq_tup_read bigint,idx_scan bigint,idx_tup_fetch bigint,n_tup_ins bigint,n_tup_upd bigint,
n_tup_del bigint,n_tup_hot_upd bigint,n_live_tup bigint,n_dead_tup bigint,last_vacuum timestamp,last_autovacuum timestamp,last_analyze timestamp,last_autoanalyze timestamp,vacuum_count bigint,autovacuum_count bigint,analyze_count bigint,autoanalyze_count bigint,
snapid int,snap_create_time timestamp,host_ip cidr,host_port int,host_type varchar(20),comment varchar(500));
create index idx_stat_table_relname_createtime on db_stat.snapshot_pg_stat_all_tables(relname varchar_pattern_ops,snap_create_time);
create index idx_stat_table_snapid on db_stat.snapshot_pg_stat_all_tables(snapid);

create table db_stat.snapshot_pg_statio_all_indexes
(relid int,indexrelid int,schemaname varchar(200),relname varchar(550),indexrelname varchar(550),idx_blks_read bigint,idx_blks_hit bigint,
snapid int,snap_create_time timestamp,host_ip cidr,host_port int,host_type varchar(20),comment varchar(500));
create index idx_statio_indexe_relname_indexrelname_createtime on db_stat.snapshot_pg_statio_all_indexes(relname varchar_pattern_ops,indexrelname varchar_pattern_ops,snap_create_time);
create index idx_statio_indexe_snapid on db_stat.snapshot_pg_statio_all_indexes(snapid);
create table db_stat.snapshot_pg_statio_all_tables
(relid int,schemaname varchar(200),relname varchar(550),heap_blks_read bigint,heap_blks_hit bigint,idx_blks_read bigint,idx_blks_hit bigint,toast_blks_read bigint,toast_blks_hit bigint,
tidx_blks_read bigint,tidx_blks_hit bigint,
snapid int,snap_create_time timestamp,host_ip cidr,host_port int,host_type varchar(20),comment varchar(500));
create index idx_statio_table_relname_createtime on db_stat.snapshot_pg_statio_all_tables(relname varchar_pattern_ops,snap_create_time);
create index idx_statio_table_snapid on db_stat.snapshot_pg_statio_all_tables(snapid);


--创建快照的序列
create sequence db_stat.seq_snapshot minvalue 1 maxvalue 99999999999999;

--每收集完信息之后,对时间,主机列等进行填充
create or replace function db_stat.process_snapshot_table(in i_host_ip cidr,in i_host_port int,in i_host_type varchar,in i_comment varchar default '') returns int as $$
declare
v_snapid int;
_detail text;
_hint text;
_message text;
begin
select nextval('db_stat.seq_snapshot') into v_snapid;
update db_stat.snapshot_pg_stat_all_indexes set snapid=v_snapid,snap_create_time=now(),host_ip=i_host_ip,host_port=i_host_port,host_type=i_host_type,comment=i_comment where snapid is null;
update db_stat.snapshot_pg_stat_all_tables set snapid=v_snapid,snap_create_time=now(),host_ip=i_host_ip,host_port=i_host_port,host_type=i_host_type,comment=i_comment where snapid is null;
update db_stat.snapshot_pg_statio_all_indexes set snapid=v_snapid,snap_create_time=now(),host_ip=i_host_ip,host_port=i_host_port,host_type=i_host_type,comment=i_comment where snapid is null;
update db_stat.snapshot_pg_statio_all_tables set snapid=v_snapid,snap_create_time=now(),host_ip=i_host_ip,host_port=i_host_port,host_type=i_host_type,comment=i_comment where snapid is null;
-- 返回值 1 代表成功,0 代表失败
return 1;
EXCEPTION WHEN others then
GET STACKED DIAGNOSTICS
_message = message_text,
_detail = pg_exception_detail,
_hint = pg_exception_hint;
raise notice 'message: %, detail: %, hint: %', _message, _detail, _hint;
return 0;
end;  
$$ language plpgsql;


--收动进行信息采集,测试用
INSERT INTO db_stat.snapshot_pg_stat_all_indexes(relid ,indexrelid ,schemaname ,relname ,indexrelname,idx_scan ,idx_tup_read,idx_tup_fetch)
SELECT relid ,
       indexrelid ,
       schemaname ,
       relname ,
       indexrelname,
       idx_scan ,
       idx_tup_read,
       idx_tup_fetch
FROM pg_stat_all_indexes;

INSERT INTO db_stat.snapshot_pg_stat_all_tables(relid ,schemaname ,relname ,seq_scan ,seq_tup_read ,idx_scan ,idx_tup_fetch ,n_tup_ins ,n_tup_upd ,n_tup_del ,n_tup_hot_upd ,n_live_tup ,n_dead_tup ,last_vacuum ,last_autovacuum ,last_analyze ,last_autoanalyze ,vacuum_count ,autovacuum_count ,analyze_count ,autoanalyze_count)
SELECT relid ,
       schemaname ,
       relname ,
       seq_scan ,
       seq_tup_read ,
       idx_scan ,
       idx_tup_fetch ,
       n_tup_ins ,
       n_tup_upd ,
       n_tup_del ,
       n_tup_hot_upd ,
       n_live_tup ,
       n_dead_tup ,
       last_vacuum ,
       last_autovacuum ,
       last_analyze ,
       last_autoanalyze ,
       vacuum_count ,
       autovacuum_count ,
       analyze_count ,
       autoanalyze_count
FROM pg_stat_all_tables;


INSERT INTO db_stat.snapshot_pg_statio_all_indexes(relid ,indexrelid ,schemaname ,relname ,indexrelname ,idx_blks_read ,idx_blks_hit)
SELECT relid ,
       indexrelid ,
       schemaname ,
       relname ,
       indexrelname ,
       idx_blks_read ,
       idx_blks_hit
FROM pg_statio_all_indexes;


INSERT INTO db_stat.snapshot_pg_statio_all_tables(relid ,schemaname ,relname ,heap_blks_read ,heap_blks_hit ,idx_blks_read ,idx_blks_hit ,toast_blks_read ,toast_blks_hit ,tidx_blks_read ,tidx_blks_hit)
SELECT relid ,
       schemaname ,
       relname ,
       heap_blks_read ,
       heap_blks_hit ,
       idx_blks_read ,
       idx_blks_hit ,
       toast_blks_read ,
       toast_blks_hit ,
       tidx_blks_read ,
       tidx_blks_hit
FROM pg_statio_all_tables;

--
select db_stat.process_snapshot_table('192.168.174.10',5432,'MASTER','');

--创建一个shell脚本,每天通过定时任务进行信息采集
cat snap_stat.sh

#!/bin/sh
source ~/.bash_profile
source /etc/profile
PSQL="psql"


help_msg (){
        echo ""
        echo "Usage:"
        echo "  -f              要输出结果的文件,如果为null,则默认为/tmp/snapshot_pg_stat.log"
        echo "  -u              数据库连接用户名,如果为null,则为postgresql默认"
        echo "  -d              连接的数据库名,如果为null,则为postgresql默认"
        echo "  -H              数据库的主机ip,如果为null,则为postgresql默认"
        echo "  -p              数据库的端口,如果为null,则为postgresql默认"
        echo "  -m              数据库的类型,MASTER为主,SLAVE为从"
        echo ""
        exit 0
}

# end functions

while getopts "f:u:d:H:p:m:" flag
do
        case $flag in
                f) FILENAME=$OPTARG
                        ;;
                u) USERNAME=$OPTARG
                        ;;
                d) DATABASE=$OPTARG
                        ;;
                H) HOST=$OPTARG
                        ;;
                p) PORT=$OPTARG
                        ;;
                m) DATABASE_TYPE=$OPTARG
                        ;;        
                \?|h) help_msg
                        ;;
        esac
done


if [ $USERNAME"x" == "x" ]
then
USERNAME=postgres
fi

if [ $DATABASE"x" == "x" ]
then
DATABASE=postgres
fi

if [ $HOST"x" == "x" ]
then
help_msg
fi

if [ $PORT"x" == "x" ]
then
PORT=5432
fi

if [ $DATABASE_TYPE"x" == "x" ]
then
DATABASE_TYPE=MASTER
fi

if [ $FILENAME"x" == "x" ]
then
FILENAME=/tmp/snapshot_pg_stat.log
fi

OUTPUT_FILENAME=/tmp/snapshot_pg_stat.csv

echo "" > $FILENAME


if [ ! -f $FILENAME ]
        then
        touch $FILENAME
else
    printf "" | tee -a $FILENAME
fi


echo "脚本于时间 `date "+%Y-%m-%d %H:%M:%S"` 开始执行" >> $FILENAME

echo "脚本开始于`date "+%Y-%m-%d %H:%M:%S"` 处理pg_stat_all_indexes表" >> $FILENAME
$PSQL -p $PORT -U $USERNAME -d $DATABASE -c "copy (select relid ,indexrelid ,schemaname ,relname ,indexrelname,idx_scan ,idx_tup_read,idx_tup_fetch from pg_stat_all_indexes) to '$OUTPUT_FILENAME' with csv"
$PSQL -p 5432 -U postgres -d postgres -h 192.168.174.11 -c "\copy db_stat.snapshot_pg_stat_all_indexes(relid ,indexrelid ,schemaname ,relname ,indexrelname,idx_scan ,idx_tup_read,idx_tup_fetch) from '$OUTPUT_FILENAME' with csv"
echo "脚本开始于`date "+%Y-%m-%d %H:%M:%S"` 处理pg_stat_all_tables表" >> $FILENAME
$PSQL -p $PORT -U $USERNAME -d $DATABASE -c "copy (select relid ,schemaname ,relname ,seq_scan ,seq_tup_read ,idx_scan ,idx_tup_fetch ,n_tup_ins ,n_tup_upd ,n_tup_del ,n_tup_hot_upd ,n_live_tup ,n_dead_tup ,last_vacuum ,last_autovacuum ,last_analyze ,last_autoanalyze ,vacuum_count ,autovacuum_count ,analyze_count ,autoanalyze_count from pg_stat_all_tables) to '$OUTPUT_FILENAME' with csv"
$PSQL -p 5432 -U postgres -d postgres -h 192.168.174.11 -c "\copy db_stat.snapshot_pg_stat_all_tables(relid ,schemaname ,relname ,seq_scan ,seq_tup_read ,idx_scan ,idx_tup_fetch ,n_tup_ins ,n_tup_upd ,n_tup_del ,n_tup_hot_upd ,n_live_tup ,n_dead_tup ,last_vacuum ,last_autovacuum ,last_analyze ,last_autoanalyze ,vacuum_count ,autovacuum_count ,analyze_count ,autoanalyze_count) from '$OUTPUT_FILENAME' with csv"
echo "脚本开始于`date "+%Y-%m-%d %H:%M:%S"` 处理pg_statio_all_indexes表" >> $FILENAME
$PSQL -p $PORT -U $USERNAME -d $DATABASE -c "copy (select relid ,indexrelid ,schemaname ,relname ,indexrelname ,idx_blks_read ,idx_blks_hit from pg_statio_all_indexes) to '$OUTPUT_FILENAME' with csv"
$PSQL -p 5432 -U postgres -d postgres -h 192.168.174.11 -c "\copy db_stat.snapshot_pg_statio_all_indexes(relid ,indexrelid ,schemaname ,relname ,indexrelname ,idx_blks_read ,idx_blks_hit) from '$OUTPUT_FILENAME' with csv"
echo "脚本开始于`date "+%Y-%m-%d %H:%M:%S"` 处理pg_statio_all_tables表" >> $FILENAME
$PSQL -p $PORT -U $USERNAME -d $DATABASE -c "copy (select relid ,schemaname ,relname ,heap_blks_read ,heap_blks_hit ,idx_blks_read ,idx_blks_hit ,toast_blks_read ,toast_blks_hit ,tidx_blks_read ,tidx_blks_hit from pg_statio_all_tables) to '$OUTPUT_FILENAME' with csv"
$PSQL -p 5432 -U postgres -d postgres -h 192.168.174.11 -c "\copy db_stat.snapshot_pg_statio_all_tables(relid ,schemaname ,relname ,heap_blks_read ,heap_blks_hit ,idx_blks_read ,idx_blks_hit ,toast_blks_read ,toast_blks_hit ,tidx_blks_read ,tidx_blks_hit) from '$OUTPUT_FILENAME' with csv"

$PSQL -p 5432 -U postgres -d postgres -h 192.168.174.11 -c "select db_stat.process_snapshot_table('$HOST',$PORT,'$DATABASE_TYPE','database stat snapshot');"

echo "############################################################################################" >> $FILENAME
echo "脚本于时间 `date "+%Y-%m-%d %H:%M:%S"` 结束执行" >> $FILENAME




--清空数据表
truncate table db_stat.snapshot_pg_stat_all_indexes ;
truncate table db_stat.snapshot_pg_stat_all_tables ;
truncate table db_stat.snapshot_pg_statio_all_indexes ;
truncate table db_stat.snapshot_pg_statio_all_tables ;

--手动执行shell脚本
./snap_stat.sh -d mydb -p 5432 -m SLAVE -u postgres -H 192.168.174.10
--定时任务,每天8点开始执行
8 8 * * * /db/pgsql/snap_stat.sh -d mydb -p 5435 -m SLAVE -u postgres -H 192.168.174.10



--查看使用比较少的索引
select * 
      from (
           SELECT t.relname,
                  t.indexrelname ,
                  max(idx_scan)-min(idx_scan) AS diff_idx_scan,
                  max(idx_tup_read)-min(idx_tup_read) AS diff_idx_tup_read
           FROM db_stat.snapshot_pg_stat_all_indexes t
           --WHERE snap_create_time BETWEEN '2015-12-11' AND '2016-03-11'
           GROUP BY t.relname, t.indexrelname) t1
order by diff_idx_scan,relname,indexrelname ;

--查看索引使用率趋势图
select relname,
       indexrelname,
       snap_day,
       diff_idx_scan,
       case when sum(diff_idx_scan) over w1 >0 then  diff_idx_scan*100/sum(diff_idx_scan) over w1 else 0 end as  diff_idx_scan_percent,
       diff_idx_tup_read,
       case when sum(diff_idx_tup_read) over w1 >0 then  diff_idx_tup_read*100/sum(diff_idx_tup_read) over w1  else 0 end as diff_idx_tup_read_percent 
from (
      SELECT t.relname,
             t.indexrelname,
             date_trunc('hour', snap_create_time) snap_day,
             t.idx_scan-lag(t.idx_scan,1) over w AS diff_idx_scan,
             t.idx_tup_read - lag(t.idx_tup_read,1) over w AS diff_idx_tup_read
      from db_stat.snapshot_pg_stat_all_indexes t 
      --where indexrelname in ('','')
      WINDOW w AS (PARTITION BY t.relname,t.indexrelname ORDER BY date_trunc('hour', t.snap_create_time))
) t1 
where diff_idx_scan is not null
WINDOW w1 as (PARTITION BY t1.relname,t1.indexrelname)
order by relname,indexrelname,snap_day;

相关实践学习
使用PolarDB和ECS搭建门户网站
本场景主要介绍基于PolarDB和ECS实现搭建门户网站。
阿里云数据库产品家族及特性
阿里云智能数据库产品团队一直致力于不断健全产品体系,提升产品性能,打磨产品功能,从而帮助客户实现更加极致的弹性能力、具备更强的扩展能力、并利用云设施进一步降低企业成本。以云原生+分布式为核心技术抓手,打造以自研的在线事务型(OLTP)数据库Polar DB和在线分析型(OLAP)数据库Analytic DB为代表的新一代企业级云原生数据库产品体系, 结合NoSQL数据库、数据库生态工具、云原生智能化数据库管控平台,为阿里巴巴经济体以及各个行业的企业客户和开发者提供从公共云到混合云再到私有云的完整解决方案,提供基于云基础设施进行数据从处理、到存储、再到计算与分析的一体化解决方案。本节课带你了解阿里云数据库产品家族及特性。
目录
相关文章
|
8月前
|
SQL 关系型数据库 PostgreSQL
把PostgreSQL的表导入SQLite
把PostgreSQL的表导入SQLite
119 0
|
5月前
|
监控 关系型数据库 数据库
PostgreSQL的索引优化策略?
【8月更文挑战第26天】PostgreSQL的索引优化策略?
128 1
|
5月前
|
SQL 关系型数据库 PostgreSQL
PostgreSQL 如何通过身份证号码进行年龄段的统计?
【8月更文挑战第20天】PostgreSQL 如何通过身份证号码进行年龄段的统计?
580 2
|
5月前
|
SQL 关系型数据库 MySQL
SQL Server、MySQL、PostgreSQL:主流数据库SQL语法异同比较——深入探讨数据类型、分页查询、表创建与数据插入、函数和索引等关键语法差异,为跨数据库开发提供实用指导
【8月更文挑战第31天】SQL Server、MySQL和PostgreSQL是当今最流行的关系型数据库管理系统,均使用SQL作为查询语言,但在语法和功能实现上存在差异。本文将比较它们在数据类型、分页查询、创建和插入数据以及函数和索引等方面的异同,帮助开发者更好地理解和使用这些数据库。尽管它们共用SQL语言,但每个系统都有独特的语法规则,了解这些差异有助于提升开发效率和项目成功率。
598 0
|
6月前
|
存储 关系型数据库 分布式数据库
PolarDB产品使用问题之如何查看PolarDB for PostgreSQL的备份信息
PolarDB产品使用合集涵盖了从创建与管理、数据管理、性能优化与诊断、安全与合规到生态与集成、运维与支持等全方位的功能和服务,旨在帮助企业轻松构建高可用、高性能且易于管理的数据库环境,满足不同业务场景的需求。用户可以通过阿里云控制台、API、SDK等方式便捷地使用这些功能,实现数据库的高效运维与持续优化。
|
5月前
|
关系型数据库 数据库 PostgreSQL
PostgreSQL索引维护看完这篇就够了
PostgreSQL索引维护看完这篇就够了
390 0
|
6月前
|
SQL 监控 关系型数据库
实时计算 Flink版操作报错合集之在设置监控PostgreSQL数据库时,将wal_level设置为logical,出现一些表更新和删除操作报错,怎么办
在使用实时计算Flink版过程中,可能会遇到各种错误,了解这些错误的原因及解决方法对于高效排错至关重要。针对具体问题,查看Flink的日志是关键,它们通常会提供更详细的错误信息和堆栈跟踪,有助于定位问题。此外,Flink社区文档和官方论坛也是寻求帮助的好去处。以下是一些常见的操作报错及其可能的原因与解决策略。
|
7月前
|
关系型数据库 PostgreSQL
postgresql如何将没有关联关系的两张表的字段合并
【6月更文挑战第2天】postgresql如何将没有关联关系的两张表的字段合并
167 3
|
7月前
|
SQL 关系型数据库 数据库连接
ClickHouse(20)ClickHouse集成PostgreSQL表引擎详细解析
ClickHouse的PostgreSQL引擎允许直接查询和插入远程PostgreSQL服务器的数据。`CREATE TABLE`语句示例展示了如何定义这样的表,包括服务器信息和权限。查询在只读事务中执行,简单筛选在PostgreSQL端处理,复杂操作在ClickHouse端完成。`INSERT`通过`COPY`命令在PostgreSQL事务中进行。注意,数组类型的处理和Nullable列的行为。示例展示了如何从PostgreSQL到ClickHouse同步数据。一系列的文章详细解释了ClickHouse的各种特性和表引擎。
213 0
|
8月前
|
关系型数据库 PostgreSQL
postgresql将没有关联关系的两张表合并成一张
【5月更文挑战第4天】postgresql将没有关联关系的两张表合并成一张
271 5