快速上手并跑通AnalyticDB PostgreSQL版TPC-H测试
1. 生成测试数据
- SSH进入ECS实例,执行如下语句,下载TPC-H dbgen代码到数据盘并编译。
wget https://github.com/electrum/tpch-dbgen/archive/refs/heads/master.zip yum install -y unzip zip unzip master.zip cd tpch-dbgen-master/ echo '#define EOL_HANDLING 1' >> config.h # 消除生成数据末尾的'|' make ./dbgen --help
- 执行如下语句,生成1TB测试数据集。建议使用分片文件(分片数与AnalyticDB PostgreSQL版实例节点数量一致)。
后台运行dbgen程序时间较长,可使用ps -fHU $USER | grep dbgen命令查看进度,确保dbgen程序运行完成。
for((i=1;i<=32;i++)); do ./dbgen -s 1000 -S $i -C 32 -f & done
2. 创建表
- 登录AnalyticDB PostgreSQL版控制台,单击实例名称。
- 左侧菜单选择帐号管理,创建初始帐号并设置密码。
- 左侧菜单选择数据安全性,单击添加白名单分组,将ECS实例添加到白名单。
- SSH进入ECS实例,下载psql客户端(下载链接:ADBPG_client_package_el7)并安装。
wget http://docs-aliyun.cn-hangzhou.oss.aliyun-inc.com/assets/attach/181125/cn_zh/1598426198114/adbpg_client_package.el7.x86_64.tar.gz tar -xzvf adbpg_client_package.el7.x86_64.tar.gz
- 通过psql连接数据库。
cd adbpg_client_package/bin ./psql -h<连接地址> -U<用户名> <数据库>
<连接地址>为下图所示内网地址,<用户名>为刚创建的初始账号,<数据库>与初始账号同名。
此时需输入密码,可编辑~/.pgpass保存后进行免密连接。保存后执行chmod 0600 ~/.pgpass设置文件权限。
<连接地址>:5432:<用户名>:<数据库>:<密码>
- 执行如下语句创建8张表。
CREATE TABLE NATION ( N_NATIONKEY INTEGER NOT NULL, N_NAME CHAR(25) NOT NULL, N_REGIONKEY INTEGER NOT NULL, N_COMMENT VARCHAR(152) ) WITH (APPENDONLY=TRUE, ORIENTATION=COLUMN, COMPRESSTYPE=LZ4, COMPRESSLEVEL=9) DISTRIBUTED Replicated ; CREATE TABLE REGION ( R_REGIONKEY INTEGER NOT NULL, R_NAME CHAR(25) NOT NULL, R_COMMENT VARCHAR(152) ) WITH (APPENDONLY=TRUE, ORIENTATION=COLUMN, COMPRESSTYPE=LZ4, COMPRESSLEVEL=9) DISTRIBUTED Replicated ; CREATE TABLE PART ( P_PARTKEY INTEGER NOT NULL, P_NAME VARCHAR(55) NOT NULL, P_MFGR CHAR(25) NOT NULL, P_BRAND CHAR(10) NOT NULL, P_TYPE VARCHAR(25) NOT NULL, P_SIZE INTEGER NOT NULL, P_CONTAINER CHAR(10) NOT NULL, P_RETAILPRICE DECIMAL(15,2) NOT NULL, P_COMMENT VARCHAR(23) NOT NULL ) WITH (APPENDONLY=TRUE, ORIENTATION=COLUMN, COMPRESSTYPE=LZ4, COMPRESSLEVEL=9) DISTRIBUTED BY (P_PARTKEY) ; CREATE TABLE SUPPLIER ( S_SUPPKEY INTEGER NOT NULL, S_NAME CHAR(25) NOT NULL, S_ADDRESS VARCHAR(40) NOT NULL, S_NATIONKEY INTEGER NOT NULL, S_PHONE CHAR(15) NOT NULL, S_ACCTBAL DECIMAL(15,2) NOT NULL, S_COMMENT VARCHAR(101) NOT NULL ) WITH (APPENDONLY=TRUE, ORIENTATION=COLUMN, COMPRESSTYPE=LZ4, COMPRESSLEVEL=9) DISTRIBUTED BY (S_SUPPKEY) ; CREATE TABLE PARTSUPP ( PS_PARTKEY INTEGER NOT NULL, PS_SUPPKEY INTEGER NOT NULL, PS_AVAILQTY INTEGER NOT NULL, PS_SUPPLYCOST DECIMAL(15,2) NOT NULL, PS_COMMENT VARCHAR(199) NOT NULL ) WITH (APPENDONLY=TRUE, ORIENTATION=COLUMN, COMPRESSTYPE=LZ4, COMPRESSLEVEL=9) DISTRIBUTED BY (PS_PARTKEY) ; CREATE TABLE CUSTOMER ( C_CUSTKEY INTEGER NOT NULL, C_NAME VARCHAR(25) NOT NULL, C_ADDRESS VARCHAR(40) NOT NULL, C_NATIONKEY INTEGER NOT NULL, C_PHONE CHAR(15) NOT NULL, C_ACCTBAL DECIMAL(15,2) NOT NULL, C_MKTSEGMENT CHAR(10) NOT NULL, C_COMMENT VARCHAR(117) NOT NULL ) WITH (APPENDONLY=TRUE, ORIENTATION=COLUMN, COMPRESSTYPE=LZ4, COMPRESSLEVEL=9) DISTRIBUTED BY (C_CUSTKEY) ; CREATE TABLE ORDERS ( O_ORDERKEY BIGINT NOT NULL, O_CUSTKEY INTEGER NOT NULL, O_ORDERSTATUS 'char' NOT NULL, O_TOTALPRICE DECIMAL(15,2) NOT NULL, O_ORDERDATE DATE NOT NULL, O_ORDERPRIORITY CHAR(15) NOT NULL, O_CLERK CHAR(15) NOT NULL, O_SHIPPRIORITY INTEGER NOT NULL, O_COMMENT VARCHAR(79) NOT NULL ) WITH (APPENDONLY=TRUE, ORIENTATION=COLUMN, COMPRESSTYPE=LZ4, COMPRESSLEVEL=9) DISTRIBUTED BY (O_ORDERKEY) ORDER BY(O_ORDERDATE) ; CREATE TABLE LINEITEM ( L_ORDERKEY BIGINT NOT NULL, L_PARTKEY INTEGER NOT NULL, L_SUPPKEY INTEGER NOT NULL, L_LINENUMBER INTEGER NOT NULL, L_QUANTITY DECIMAL(15,2) NOT NULL, L_EXTENDEDPRICE DECIMAL(15,2) NOT NULL, L_DISCOUNT DECIMAL(15,2) NOT NULL, L_TAX DECIMAL(15,2) NOT NULL, L_RETURNFLAG 'char' NOT NULL, L_LINESTATUS 'char' NOT NULL, L_SHIPDATE DATE NOT NULL, L_COMMITDATE DATE NOT NULL, L_RECEIPTDATE DATE NOT NULL, L_SHIPINSTRUCT CHAR(25) NOT NULL, L_SHIPMODE CHAR(10) NOT NULL, L_COMMENT VARCHAR(44) NOT NULL ) WITH (APPENDONLY=TRUE, ORIENTATION=COLUMN, COMPRESSTYPE=LZ4, COMPRESSLEVEL=9) DISTRIBUTED BY (L_ORDERKEY) ORDER BY(L_SHIPDATE) ;
3. 导入数据
- 执行如下语句,将nation和region两张小表通过COPY本地文件的方式进行导入。
yinc2=> \copy nation from '<tbl文件的实际路径>' DELIMITER '|'; COPY 25 yinc2=> \copy region from '<tbl文件的实际路径>' DELIMITER '|'; COPY 5
- 执行如下语句下载ossutil。
wget http://gosspublic.alicdn.com/ossutil/1.7.3/ossutil64
- 执行如下语句修改文件执行权限。
chmod 755 ossutil64
- 执行如下语句,依次将其余6个表的.tbl文件通过ossutil上传到OSS。
ls <table_name>.tbl* | while read line; do ~/ossutil64 -e <EndPoint> -i <AccessKey ID> -k <Access Key Secret> cp $line oss://<oss bucket>/<目录>/ & done
- 上传完毕后,执行如下语句,将数据从OSS导入表。
COPY customer FROM 'oss://<oss bucket>/<目录>/customer.tbl' ACCESS_KEY_ID '<AccessKey ID>' SECRET_ACCESS_KEY '<Access Key Secret>' FORMAT AS text 'delimiter' '|' 'null' '' ENDPOINT '<EndPoint>' FDW 'oss_fdw' ; COPY lineitem FROM 'oss://<oss bucket>/<目录>/lineitem.tbl' ACCESS_KEY_ID '<AccessKey ID>' SECRET_ACCESS_KEY '<Access Key Secret>' FORMAT AS text 'delimiter' '|' 'null' '' ENDPOINT '<EndPoint>' FDW 'oss_fdw' ; -- lineitem表定义了排序列,数据导入完成后可对数据进行聚簇排序 sort lineitem; COPY orders FROM 'oss://<oss bucket>/<目录>/orders.tbl' ACCESS_KEY_ID '<AccessKey ID>' SECRET_ACCESS_KEY '<Access Key Secret>' FORMAT AS text 'delimiter' '|' 'null' '' ENDPOINT '<EndPoint>' FDW 'oss_fdw' ; -- orders表定义了排序列,数据导入完成后可对数据进行聚簇排序 sort orders; COPY part FROM 'oss://<oss bucket>/<目录>/part.tbl' ACCESS_KEY_ID '<AccessKey ID>' SECRET_ACCESS_KEY '<Access Key Secret>' FORMAT AS text 'delimiter' '|' 'null' '' ENDPOINT '<EndPoint>' FDW 'oss_fdw' ; COPY supplier FROM 'oss://<oss bucket>/<目录>/supplier.tbl' ACCESS_KEY_ID '<AccessKey ID>' SECRET_ACCESS_KEY '<Access Key Secret>' FORMAT AS text 'delimiter' '|' 'null' '' ENDPOINT '<EndPoint>' FDW 'oss_fdw' ; COPY partsupp FROM 'oss://<oss bucket>/<目录>/partsupp.tbl' ACCESS_KEY_ID '<AccessKey ID>' SECRET_ACCESS_KEY '<Access Key Secret>' FORMAT AS text 'delimiter' '|' ENDPOINT '<EndPoint>' FDW 'oss_fdw' ;
4. 执行查询
- 方法一:
1)下载22条查询并解压到~/tpch_query目录。下载链接:tpch_query.tar.gz。
2)使用如下query.sh shell脚本测试,执行全部查询,并记录每条耗时和总耗时。
#!/bin/bash total_cost=0 for i in {1..22} do echo 'begin run Q${i}, tpch_query/q$i.sql , `date`' begin_time=`date +%s.%N` ./psql ${实例连接地址} -p ${端口号} -U ${数据库用户} -f ~/tpch_query/q${i}.sql > ~/log/log_q${i}.out rc=$? end_time=`date +%s.%N` cost=`echo '$end_time-$begin_time'|bc` total_cost=`echo '$total_cost+$cost'|bc` if [ $rc -ne 0 ] ; then printf 'run Q%s fail, cost: %.2f, totalCost: %.2f, `date`\n' $i $cost $total_cost else printf 'run Q%s succ, cost: %.2f, totalCost: %.2f, `date`\n' $i $cost $total_cost fi done
3)后台运行query.sh(确保~/.pgpass已设置)。
nohup bash ~/query.sh > /tmp/tpch.log &
4)查看结果。
cat /tmp/tpch.log
- 方法二:可以通过psql等其他客户端逐条执行查询SQL。
--创建向量化计算引擎Laser插件 create extension if not exists laser; -- Q1 -- 开启向量加速引擎,并设置开关变量为on set laser.enable = on; select l_returnflag, l_linestatus, sum(l_quantity) as sum_qty, sum(l_extendedprice) as sum_base_price, sum(l_extendedprice * (1 - l_discount)) as sum_disc_price, sum(l_extendedprice * (1 - l_discount) * (1 + l_tax)) as sum_charge, avg(l_quantity) as avg_qty, avg(l_extendedprice) as avg_price, avg(l_discount) as avg_disc, count(*) as count_order from lineitem where l_shipdate <= date '1998-12-01' - interval '93 day' group by l_returnflag, l_linestatus order by l_returnflag, l_linestatus; -- Q2 -- 开启向量加速引擎,并设置开关变量为on set laser.enable = on; select s_acctbal, s_name, n_name, p_partkey, p_mfgr, s_address, s_phone, s_comment from part, supplier, partsupp, nation, region where p_partkey = ps_partkey and s_suppkey = ps_suppkey and p_size = 23 and p_type like '%STEEL' and s_nationkey = n_nationkey and n_regionkey = r_regionkey and r_name = 'EUROPE' and ps_supplycost = ( select min(ps_supplycost) from partsupp, supplier, nation, region where p_partkey = ps_partkey and s_suppkey = ps_suppkey and s_nationkey = n_nationkey and n_regionkey = r_regionkey and r_name = 'EUROPE' ) order by s_acctbal desc, n_name, s_name, p_partkey limit 100; -- Q3 -- 开启向量加速引擎,并设置开关变量为on set laser.enable = on; select l_orderkey, sum(l_extendedprice * (1 - l_discount)) as revenue, o_orderdate, o_shippriority from customer, orders, lineitem where c_mktsegment = 'MACHINERY' and c_custkey = o_custkey and l_orderkey = o_orderkey and o_orderdate < date '1995-03-24' and l_shipdate > date '1995-03-24' group by l_orderkey, o_orderdate, o_shippriority order by revenue desc, o_orderdate limit 10; -- Q4 -- 开启向量加速引擎,并设置开关变量为on set laser.enable = on; select o_orderpriority, count(*) as order_count from orders where o_orderdate >= date '1996-08-01' and o_orderdate < date '1996-08-01' + interval '3' month and exists ( select * from lineitem where l_orderkey = o_orderkey and l_commitdate < l_receiptdate ) group by o_orderpriority order by o_orderpriority; -- Q6 -- 开启向量加速引擎,并设置开关变量为on set laser.enable = on; select sum(l_extendedprice * l_discount) as revenue from lineitem where l_shipdate >= date '1994-01-01' and l_shipdate < date '1994-01-01' + interval '1' year and l_discount between 0.06 - 0.01 and 0.06 + 0.01 and l_quantity < 24; -- Q7 -- 开启向量加速引擎,并设置开关变量为on set laser.enable = on; select supp_nation, cust_nation, l_year, sum(volume) as revenue from ( select n1.n_name as supp_nation, n2.n_name as cust_nation, extract(year from l_shipdate) as l_year, l_extendedprice * (1 - l_discount) as volume from supplier, lineitem, orders, customer, nation n1, nation n2 where s_suppkey = l_suppkey and o_orderkey = l_orderkey and c_custkey = o_custkey and s_nationkey = n1.n_nationkey and c_nationkey = n2.n_nationkey and ( (n1.n_name = 'JORDAN' and n2.n_name = 'INDONESIA') or (n1.n_name = 'INDONESIA' and n2.n_name = 'JORDAN') ) and l_shipdate between date '1995-01-01' and date '1996-12-31' ) as shipping group by supp_nation, cust_nation, l_year order by supp_nation, cust_nation, l_year; -- Q8 -- 开启向量加速引擎,并设置开关变量为on set laser.enable = on; select o_year, sum(case when nation = 'INDONESIA' then volume else 0 end) / sum(volume) as mkt_share from ( select extract(year from o_orderdate) as o_year, l_extendedprice * (1 - l_discount) as volume, n2.n_name as nation from part, supplier, lineitem, orders, customer, nation n1, nation n2, region where p_partkey = l_partkey and s_suppkey = l_suppkey and l_orderkey = o_orderkey and o_custkey = c_custkey and c_nationkey = n1.n_nationkey and n1.n_regionkey = r_regionkey and r_name = 'ASIA' and s_nationkey = n2.n_nationkey and o_orderdate between date '1995-01-01' and date '1996-12-31' and p_type = 'STANDARD BRUSHED BRASS' ) as all_nations group by o_year order by o_year; -- Q9 -- 开启向量加速引擎,并设置开关变量为on set laser.enable = on; select nation, o_year, sum(amount) as sum_profit from ( select n_name as nation, extract(year from o_orderdate) as o_year, l_extendedprice * (1 - l_discount) - ps_supplycost * l_quantity as amount from part, supplier, lineitem, partsupp, orders, nation where s_suppkey = l_suppkey and ps_suppkey = l_suppkey and ps_partkey = l_partkey and p_partkey = l_partkey and o_orderkey = l_orderkey and s_nationkey = n_nationkey and p_name like '%chartreuse%' ) as profit group by nation, o_year order by nation, o_year desc; -- Q10 -- 开启向量加速引擎,并设置开关变量为on set laser.enable = on; select c_custkey, c_name, sum(l_extendedprice * (1 - l_discount)) as revenue, c_acctbal, n_name, c_address, c_phone, c_comment from customer, orders, lineitem, nation where c_custkey = o_custkey and l_orderkey = o_orderkey and o_orderdate >= date '1994-08-01' and o_orderdate < date '1994-08-01' + interval '3' month and l_returnflag = 'R' and c_nationkey = n_nationkey group by c_custkey, c_name, c_acctbal, c_phone, n_name, c_address, c_comment order by revenue desc limit 20; -- Q11 -- 开启向量加速引擎,并设置开关变量为on set laser.enable = on; select ps_partkey, sum(ps_supplycost * ps_availqty) as value from partsupp, supplier, nation where ps_suppkey = s_suppkey and s_nationkey = n_nationkey and n_name = 'INDONESIA' group by ps_partkey having sum(ps_supplycost * ps_availqty) > ( select sum(ps_supplycost * ps_availqty) * 0.0001000000 from partsupp, supplier, nation where ps_suppkey = s_suppkey and s_nationkey = n_nationkey and n_name = 'INDONESIA' ) order by value desc; -- Q12 -- 开启向量加速引擎,并设置开关变量为on set laser.enable = on; select l_shipmode, sum(case when o_orderpriority = '1-URGENT' or o_orderpriority = '2-HIGH' then 1 else 0 end) as high_line_count, sum(case when o_orderpriority <> '1-URGENT' and o_orderpriority <> '2-HIGH' then 1 else 0 end) as low_line_count from orders, lineitem where o_orderkey = l_orderkey and l_shipmode in ('REG AIR', 'TRUCK') and l_commitdate < l_receiptdate and l_shipdate < l_commitdate and l_receiptdate >= date '1994-01-01' and l_receiptdate < date '1994-01-01' + interval '1' year group by l_shipmode order by l_shipmode; -- Q13 -- 开启向量加速引擎,并设置开关变量为on set laser.enable = on; select c_count, count(*) as custdist from ( select c_custkey, count(o_orderkey) from customer left outer join orders on c_custkey = o_custkey and o_comment not like '%pending%requests%' group by c_custkey ) as c_orders (c_custkey, c_count) group by c_count order by custdist desc, c_count desc; -- Q14 -- 开启向量加速引擎,并设置开关变量为on set laser.enable = on; select 100.00 * sum(case when p_type like 'PROMO%' then l_extendedprice * (1 - l_discount) else 0 end) / sum(l_extendedprice * (1 - l_discount)) as promo_revenue from lineitem, part where l_partkey = p_partkey and l_shipdate >= date '1994-11-01' and l_shipdate < date '1994-11-01' + interval '1' month; -- Q15 -- 开启向量加速引擎,并设置开关变量为on set laser.enable = on; create view revenue0 (supplier_no, total_revenue) as select l_suppkey, sum(l_extendedprice * (1 - l_discount)) from lineitem where l_shipdate >= date '1997-10-01' and l_shipdate < date '1997-10-01' + interval '3' month group by l_suppkey; select s_suppkey, s_name, s_address, s_phone, total_revenue from supplier, revenue0 where s_suppkey = supplier_no and total_revenue = ( select max(total_revenue) from revenue0 ) order by s_suppkey; drop view revenue0; -- Q16 -- 开启向量加速引擎,并设置开关变量为on set laser.enable = on; select p_brand, p_type, p_size, count(distinct ps_suppkey) as supplier_cnt from partsupp, part where p_partkey = ps_partkey and p_brand <> 'Brand#44' and p_type not like 'SMALL BURNISHED%' and p_size in (36, 27, 34, 45, 11, 6, 25, 16) and ps_suppkey not in ( select s_suppkey from supplier where s_comment like '%Customer%Complaints%' ) group by p_brand, p_type, p_size order by supplier_cnt desc, p_brand, p_type, p_size; -- Q17 -- 开启向量加速引擎,并设置开关变量为on set laser.enable = on; select sum(l_extendedprice) / 7.0 as avg_yearly from lineitem, part where p_partkey = l_partkey and p_brand = 'Brand#42' and p_container = 'JUMBO PACK' and l_quantity < ( select 0.2 * avg(l_quantity) from lineitem where l_partkey = p_partkey ); -- Q18 -- 开启向量加速引擎,并设置开关变量为on set laser.enable = on; select c_name, c_custkey, o_orderkey, o_orderdate, o_totalprice, sum(l_quantity) from customer, orders, lineitem where o_orderkey in ( select l_orderkey from lineitem group by l_orderkey having sum(l_quantity) > 312 ) and c_custkey = o_custkey and o_orderkey = l_orderkey group by c_name, c_custkey, o_orderkey, o_orderdate, o_totalprice order by o_totalprice desc, o_orderdate limit 100; -- Q19 -- 开启向量加速引擎,并设置开关变量为on set laser.enable = on; select sum(l_extendedprice* (1 - l_discount)) as revenue from lineitem, part where ( p_partkey = l_partkey and p_brand = 'Brand#43' and p_container in ('SM CASE', 'SM BOX', 'SM PACK', 'SM PKG') and l_quantity >= 5 and l_quantity <= 5 + 10 and p_size between 1 and 5 and l_shipmode in ('AIR', 'AIR REG') and l_shipinstruct = 'DELIVER IN PERSON' ) or ( p_partkey = l_partkey and p_brand = 'Brand#45' and p_container in ('MED BAG', 'MED BOX', 'MED PKG', 'MED PACK') and l_quantity >= 12 and l_quantity <= 12 + 10 and p_size between 1 and 10 and l_shipmode in ('AIR', 'AIR REG') and l_shipinstruct = 'DELIVER IN PERSON' ) or ( p_partkey = l_partkey and p_brand = 'Brand#11' and p_container in ('LG CASE', 'LG BOX', 'LG PACK', 'LG PKG') and l_quantity >= 24 and l_quantity <= 24 + 10 and p_size between 1 and 15 and l_shipmode in ('AIR', 'AIR REG') and l_shipinstruct = 'DELIVER IN PERSON' ); -- Q20 -- 开启向量加速引擎,并设置开关变量为on set laser.enable = on; select s_name, s_address from supplier, nation where s_suppkey in ( select ps_suppkey from partsupp where ps_partkey in ( select p_partkey from part where p_name like 'magenta%' ) and ps_availqty > ( select 0.5 * sum(l_quantity) from lineitem where l_partkey = ps_partkey and l_suppkey = ps_suppkey and l_shipdate >= date '1996-01-01' and l_shipdate < date '1996-01-01' + interval '1' year ) ) and s_nationkey = n_nationkey and n_name = 'RUSSIA' order by s_name; -- Q21 -- 开启向量加速引擎,并设置开关变量为on set laser.enable = on; select s_name, count(*) as numwait from supplier, lineitem l1, orders, nation where s_suppkey = l1.l_suppkey and o_orderkey = l1.l_orderkey and o_orderstatus = 'F' and l1.l_receiptdate > l1.l_commitdate and exists ( select * from lineitem l2 where l2.l_orderkey = l1.l_orderkey and l2.l_suppkey <> l1.l_suppkey ) and not exists ( select * from lineitem l3 where l3.l_orderkey = l1.l_orderkey and l3.l_suppkey <> l1.l_suppkey and l3.l_receiptdate > l3.l_commitdate ) and s_nationkey = n_nationkey and n_name = 'MOZAMBIQUE' group by s_name order by numwait desc, s_name limit 100; -- Q22 -- 开启向量加速引擎,并设置开关变量为on set laser.enable = on; select cntrycode, count(*) as numcust, sum(c_acctbal) as totacctbal from ( select substring(c_phone from 1 for 2) as cntrycode, c_acctbal from customer where substring(c_phone from 1 for 2) in ('13', '31', '23', '29', '30', '18', '17') and c_acctbal > ( select avg(c_acctbal) from customer where c_acctbal > 0.00 and substring(c_phone from 1 for 2) in ('13', '31', '23', '29', '30', '18', '17') ) and not exists ( select * from orders where o_custkey = c_custkey ) ) as custsale group by cntrycode order by cntrycode;
实验链接:https://developer.aliyun.com/adc/scenario/9a4e57e135c9427a9feba3da3268a1cd