标签
PostgreSQL , 临时表 , 全局临时表 , unlogged table , advisory lock
背景
PostgreSQL 暂时不支持类似Oracle风格的临时表。
PostgreSQL 临时表结构是会话级别的,而在Oracle中,临时表的结构是全局有效的,只是数据会话之间独立。
Oracle 全局临时表可以指定SCHEMA,而PostgreSQL的临时表不能指定SCHEMA,自动在temp临时SCHEMA中创建。
细节差异详见:
https://www.codeproject.com/Articles/1176045/Oracle-style-global-temporary-tables-for-PostgreSQL
为了让PostgreSQL临时表的使用与Oracle兼容,除了内核层面兼容之外,目前只能在使用时注意。
CREATE [ [ GLOBAL | LOCAL ] { TEMPORARY | TEMP } | UNLOGGED ] TABLE [ IF NOT EXISTS ] table_name ( [
{ column_name data_type [ COLLATE collation ] [ column_constraint [ ... ] ]
| table_constraint
| LIKE source_table [ like_option ... ] }
[, ... ]
] )
...........
[ ON COMMIT { PRESERVE ROWS | DELETE ROWS | DROP } ]
-- 提交后,保留记录 PRESERVE ROWS
-- 提交后,清空临时表记录 DELETE ROWS
-- 提交后,删除临时表 DROP
用户可以使用以下方式来使用临时表:
方法1(推荐使用)、使用 trigger + inherit 代替临时表 (用户使用形态与ORACLE一致)
思想来自如下,与之不一样的地方是,直接使用的表触发器+继承功能来实现的。
https://www.codeproject.com/Articles/1176045/Oracle-style-global-temporary-tables-for-PostgreSQL
以上这个链接的方法问题:1、不支持truncate, copy。2、使用函数返回所有记录,会有性能问题。3、无法使用游标。4、索引使用比较麻烦。
本文下面的方法没有以上问题(直接TRUNCATE的支持需要打个PATCH, 社区已于2018.12.27后支持https://git.postgresql.org/gitweb/?p=postgresql.git;a=commit;h=1e504f01da11db0181d7b28bb30cb5eeb0767184),所有操作(insert,update,delete,select)与直接使用临时表一样。INSERT性能会有下降(使用本方法88417/s,直接写表1111111/s),一般的使用性能也是足够够的;
对于PG 10以上版本,使用中间表可以增强性能 《PostgreSQL 10.0 preview 功能增强 - 触发器函数内置中间表》
例子
创建一个临时表 stage.abc。
1、创建一个schema,放临时表
create schema IF NOT EXISTS stage;
2、创建表stage.abc
drop table if exists stage.abc;
create table if not exists stage.abc (id int primary key, info text, crt_time timestamp);
3、创建这个"stage.abc表"的触发器,自动基于它创建临时表,并加入它的继承关系
所有PG版本通用,自定义insert before触发器函数
create or replace function public.abc_tg1() returns trigger as $$
declare
begin
-- 如果临时表的定义修改,修改这个触发器的内容,即表名abc,使用你的实际名字替换
-- 注意这里不要加schema.,否则就写入非临时表了。
insert into abc values (new.*);
return null;
-- 第一次插入会插入普通父表,所以创建父表的AFTER触发器,报错,即回到这里处理。
exception when others then
-- 根据临时表的业务需要使用 on commit PRESERVE|DELETE rows
execute format('create temp table if not exists %I (like %I.%I including all) inherits (%I.%I) on commit PRESERVE ROWS',
TG_TABLE_NAME, TG_TABLE_SCHEMA, TG_TABLE_NAME, TG_TABLE_SCHEMA, TG_TABLE_NAME);
-- 如果临时表的定义修改,修改这个触发器的内容,即表名abc,使用你的实际名字替换
-- 注意这里不要加schema.,否则就写入非临时表了。
insert into abc values (new.*);
return null;
end;
$$ language plpgsql strict set client_min_messages = error;
10以后的版本可用批量写入临时表加速,自定义insert before触发器函数
略...
自定义insert after触发器函数,用于加速insert
(避免每次都要执行perform 1 from pg_class where relpersistence='t' and relname=TG_TABLE_NAME and pg_table_is_visible(oid);
)
create or replace function public.abc_tg2() returns trigger as $$
declare
begin
RAISE EXCEPTION 'Do not insert direct to parent persistence table.';
return null;
end;
$$ language plpgsql strict set client_min_messages = error;
4、新建stage.abc的insert before触发器
create trigger tg1 before insert on stage.abc for each row execute function public.abc_tg1();
create trigger tg2 after insert on stage.abc for each row execute function public.abc_tg2();
5、测试,完全OK
insert into stage.abc values (1,'test',now());
insert into stage.abc values (2,'test',now());
postgres=# select tableoid, * from stage.abc;
tableoid | id | info | crt_time
----------+----+------+----------------------------
32224674 | 1 | test | 2018-12-25 09:38:34.252316
32224674 | 2 | test | 2018-12-25 09:38:34.257408
(2 rows)
postgres=# select tableoid, * from only stage.abc;
tableoid | id | info | crt_time
----------+----+------+----------
(0 rows)
postgres=# insert into stage.abc select generate_series(3,10000000);
INSERT 0 0
Time: 113095.297 ms (01:53.095)
postgres=# select count(*) from only stage.abc;
count
-------
0
(1 row)
Time: 0.464 ms
postgres=# select count(*) from stage.abc;
count
----------
10000000
(1 row)
Time: 2109.900 ms (00:02.110)
postgres=# truncate abc;
TRUNCATE TABLE
Time: 149.441 ms
postgres=# insert into abc select generate_series(1,10000000);
INSERT 0 10000000
Time: 9005.758 ms (00:09.006)
postgres=# select 10000000/9.0;
?column?
----------------------
1111111.111111111111
(1 row)
Time: 0.276 ms
postgres=# select 10000000/113.1;
?column?
--------------------
88417.329796640141
(1 row)
Time: 0.287 ms
postgres=# begin;
BEGIN
postgres=# declare a cursor for select * from stage.abc;
DECLARE CURSOR
postgres=# fetch 1 from a;
id | info | crt_time
----+------+----------
2 | |
(1 row)
postgres=# update stage.abc set info='abc',crt_time=now() where CURRENT OF a;
UPDATE 1
postgres=# end;
COMMIT
postgres=# select * from stage.abc where id=2;
id | info | crt_time
----+------+----------------------------
2 | abc | 2018-12-25 14:35:30.043199
(1 row)
postgres=# select * from only stage.abc where id=2;
id | info | crt_time
----+------+----------
(0 rows)
select, delete, update, copy, 游标 均使用正常。
6、后期对临时表加索引,直接操作stage.abc即可。因为我们在创建临时表时,使用了including all子句。
封装成接口函数使用方便。
create or replace function create_temptable(
v_schemaname name,
v_tablename name,
v_on_commit_action text default 'PRESERVE ROWS',
v_tg_schemaname name default 'public'
) returns void as $$
declare
tg_name1 text := 'v'||md5((clock_timestamp()::text||random()::text));
tg_name2 text := 'tg_for_temptable_after_insert_error'; -- 这个函数只需要一个通用的即可
v_sql1 text;
v_sql2 text;
begin
v_sql1 := format($_$
create or replace function %I.%I() returns trigger as $__$ -- v_tg_schemaname, tg_name1
declare
begin
insert into %I values (new.*); -- v_tablename
return null;
exception when others then
execute format('create temp table if not exists %%I (like %%I.%%I including all) inherits (%%I.%%I) on commit %s', -- v_on_commit_action
TG_TABLE_NAME, TG_TABLE_SCHEMA, TG_TABLE_NAME, TG_TABLE_SCHEMA, TG_TABLE_NAME);
insert into %I values (new.*); -- v_tablename
return null;
end;
$__$ language plpgsql strict set client_min_messages = error;
$_$, v_tg_schemaname, tg_name1, v_tablename, v_on_commit_action, v_tablename);
execute v_sql1;
v_sql2 := format($_$
create or replace function %I.%I() returns trigger as $__$ -- v_tg_schemaname, tg_name2
declare
begin
RAISE EXCEPTION 'Do not insert direct to parent persistence table.';
return null;
end;
$__$ language plpgsql strict set client_min_messages = error;
$_$, v_tg_schemaname, tg_name2 );
execute v_sql2;
execute format($_$create trigger tg1 before insert on %I.%I for each row execute function %I.%I();$_$, v_schemaname, v_tablename, v_tg_schemaname, tg_name1);
execute format($_$create trigger tg2 after insert on %I.%I for each row execute function %I.%I();$_$, v_schemaname, v_tablename, v_tg_schemaname, tg_name2);
end;
$$ language plpgsql strict;
使用举例
postgres=# drop table stage.abc;
DROP TABLE
postgres=# create table stage.abc(id int primary key, info text, crt_time timestamp);
CREATE TABLE
postgres=# select create_temptable('stage','abc');
create_temptable
------------------
(1 row)
postgres=# \d+ stage.abc
Table "stage.abc"
Column | Type | Collation | Nullable | Default | Storage | Stats target | Description
----------+-----------------------------+-----------+----------+---------+----------+--------------+-------------
id | integer | | not null | | plain | |
info | text | | | | extended | |
crt_time | timestamp without time zone | | | | plain | |
Indexes:
"abc_pkey" PRIMARY KEY, btree (id)
Triggers:
tg1 BEFORE INSERT ON stage.abc FOR EACH ROW EXECUTE PROCEDURE v70c22a86a17342eb6cb571349c85274b()
tg2 AFTER INSERT ON stage.abc FOR EACH ROW EXECUTE PROCEDURE tg_for_temptable_after_insert_error()
使用限制
1、truncate暂不支持,请使用delete代替 (或者打patch支持truncate, 社区已支持,如果你使用的是2018.12.27后发布的版本,不需要打PATCH)
postgres=# truncate stage.abc;
ERROR: 0A000: cannot truncate temporary tables of other sessions
LOCATION: truncate_check_rel, tablecmds.c:1743
Time: 0.626 ms
-- delete正常
delete from stage.abc;
修正这个truncate问题的PATCH如下
diff --git a/src/backend/commands/tablecmds.c b/src/backend/commands/tablecmds.c
index ad8c176793..9dabfc0d36 100644
--- a/src/backend/commands/tablecmds.c
+++ b/src/backend/commands/tablecmds.c
@@ -1416,6 +1416,19 @@ ExecuteTruncate(TruncateStmt *stmt)
/* find_all_inheritors already got lock */
rel = heap_open(childrelid, NoLock);
+
+ /*
+ * It is possible that the parent table has children that are temp
+ * tables of other backends. We cannot safely access such tables
+ * (because of buffering issues), and the best thing to do seems
+ * to be to silently ignore them.
+ */
+ if (RELATION_IS_OTHER_TEMP(newrelation))
+ {
+ heap_close(rel);
+ continue;
+ }
+
truncate_check_rel(RelationGetRelid(rel), rel->rd_rel);
truncate_check_activity(rel);
2、copy to 不支持 (copy from 正常)
原因:copy时,不读取INHERIT表的内容。
postgres=# copy stage.abc to '/tmp/abc';
COPY 0
所以,直接使用临时表名可以COPY TO
postgres=# copy abc to '/tmp/abc';
COPY 1000
postgres=# copy stage.abc from '/tmp/abc';
COPY 0
postgres=# select count(*) from stage.abc;
count
-------
1000
(1 row)
3、由于使用了insert触发器,INSERT时,数据都写入了继承的临时表,所以returning语法无法获得返回行数,记录。
postgres=# insert into stage.abc values (-1) returning *;
id | info | crt_time
----+------+----------
(0 rows)
INSERT 0 0
4、如果需要修改临时表的表名,必须同时修改触发器函数的内容。
方法2、用临时表时提前创建
1、创建临时表模板(一次性创建)
-- 临时表模板
create table tmp1_template(xxxx);
2、以后每次使用某临时表之前,使用这个模板表创建临时表。
create temp table if not exists tmp_xxx (like tmp1_template including all) ON COMMIT DELETE ROWS;
3、以后要修改临时表的结果,直接修改模板表
alter table tmp_xxx add column c1 int;
例子
-- 创建临时表模板表
create table tmp1_template (id int8 primary key, info text, crt_time timestamp);
-- 每次使用临时表前,先使用模板创建
create temp table if not exists tbl_tmp (like tmp1_template including all) ON COMMIT DELETE ROWS;
-- 以后要修改临时表的结果,直接修改模板表
alter table tmp1_template add column c1 int;
方法3、plpgsql中,可以使用方法1,也可以使用ARRAY代替临时表
创建普通表(默认会创建对应的复合类型),
使用复合类型数组代替临时表
例子
do language plpgsql $$
declare
res tbl[]; x tbl;
begin
select array_agg(t::tbl) into res from (select id, random()::text, clock_timestamp() from generate_series(1,10) t(id)) t;
raise notice 'res: %', res;
foreach x in array res loop
raise notice 'x: %', x;
end loop;
end;
$$;
NOTICE: res: {"(1,0.0940282950177789,\"2018-07-15 23:14:44.060389\")","(2,0.922331794165075,\"2018-07-15 23:14:44.060404\")","(3,0.857550186105072,\"2018-07-15 23:14:44.060406\")","(4,0.373486907221377,\"2018-07-15 23:14:44.060408\")","(5,0.973780393600464,\"2018-07-15 23:14:44.060409\")","(6,0.502839601133019,\"2018-07-15 23:14:44.060411\")","(7,0.217925263568759,\"2018-07-15 23:14:44.060412\")","(8,0.733274032827467,\"2018-07-15 23:14:44.060413\")","(9,0.62150136847049,\"2018-07-15 23:14:44.060416\")","(10,0.241393140517175,\"2018-07-15 23:14:44.060418\")"}
NOTICE: x: (1,0.0940282950177789,"2018-07-15 23:14:44.060389")
NOTICE: x: (2,0.922331794165075,"2018-07-15 23:14:44.060404")
NOTICE: x: (3,0.857550186105072,"2018-07-15 23:14:44.060406")
NOTICE: x: (4,0.373486907221377,"2018-07-15 23:14:44.060408")
NOTICE: x: (5,0.973780393600464,"2018-07-15 23:14:44.060409")
NOTICE: x: (6,0.502839601133019,"2018-07-15 23:14:44.060411")
NOTICE: x: (7,0.217925263568759,"2018-07-15 23:14:44.060412")
NOTICE: x: (8,0.733274032827467,"2018-07-15 23:14:44.060413")
NOTICE: x: (9,0.62150136847049,"2018-07-15 23:14:44.060416")
NOTICE: x: (10,0.241393140517175,"2018-07-15 23:14:44.060418")
DO
方法4、预创建表结构,使用起来比较复杂,不推荐
创建父表
预创建一些继承表
使用时,使用advisory lock保护,挑选其中一个继承表使用
例子
-- 创建父表
create table tmp1(id int, info text, crt_time timestamp);
-- 创建100个子表
do language plpgsql $$
declare
begin
for i in 1..100 loop
execute format('create unlogged table tmp1_%s (like tmp1 including all) inherits (tmp1)', i);
end loop;
end;
$$;
创建加锁函数,返回值即后缀
create or replace function get_lock() returns int as $$
declare
begin
for i in 1..100 loop
if pg_try_advisory_lock(i) then
return i;
end if;
end loop;
return '-1';
end;
$$ language plpgsql strict;
加锁,返回1则使用后缀为1的临时表
postgres=# select get_lock();
get_lock
----------
1
(1 row)
使用临时表
truncate tmp1_1;
... 使用 tmp1_1
释放锁
postgres=# select pg_advisory_unlock(1);
pg_advisory_unlock
--------------------
t
(1 row)
可以精细化
1、维护1张表,后缀ID为PK,这样的话advisory lock id在全局都不会冲突
create table catalog_tmp (
tmp_tbl name,
prefix name,
suffix int primary key
);
create index idx_catalog_tmp_1 on catalog_tmp(tmp_tbl);
insert into catalog_tmp select 'tmp1','tmp1',generate_series(1,100);
2、申请临时表锁时,使用一个函数,从前面的表中获取前后缀,直接返回表名。
create or replace function get_tmp(name) returns text as $$
declare
i int;
v name;
begin
for i,v in select suffix,prefix from catalog_tmp where tmp_tbl=$1
loop
if pg_try_advisory_lock(i) then
return v||'_'||i;
end if;
end loop;
end;
$$ language plpgsql strict;
3、申请临时表,返回的就是当前会话可以使用的临时表名
postgres=# select get_tmp('tmp1');
get_tmp
---------
tmp1_1
(1 row)
4、释放临时表的函数。
create or replace function release_tmp(name) returns void as $$
declare
begin
loop
if not pg_advisory_unlock(substring($1,'_(\d*)$')::int) then
return;
end if;
end loop;
end;
$$ language plpgsql strict;
释放临时表(注意,不释放的话,其他会话就不可用使用这个临时表)
select release_tmp('tmp1_1');
参考
https://www.codeproject.com/Articles/1176045/Oracle-style-global-temporary-tables-for-PostgreSQL