背景
场景:
某些字段的值经过计算后再过滤的场景, 例如:
json里面的内容包含经纬度, 我们需要对经纬度进行地理信息空间查询过滤.
a,b,c,d分别代表语、数、英、科的分数, 查询总分等于或范围时, 需要计算后再搜索.
挑战:
大多数数据库无法使用表达式索引, 只能全表扫描, 逐条计算. 效率低下.
PG解决方案:
支持表达式索引(也可以叫函数索引), 性能指数级提升.
支持表达式统计信息柱状图, 用于优化器计算
例子:
create table a (id int, info jsonb);
create index idx_a on a using gist (ST_SetSRID(ST_MakePoint((info ->> 'lon')::numeric, (info ->> 'lat')::numeric),4326));
explain select * from a order by ST_SetSRID(ST_MakePoint((info ->> 'lon')::numeric, (info ->> 'lat')::numeric),4326) <->
ST_SetSRID(ST_MakePoint(120,70),4326) limit 10;
Limit (cost=0.14..0.69 rows=10 width=44)
-> Index Scan using idx_a on a (cost=0.14..69.40 rows=1270 width=44)
Order By: (st_setsrid(st_makepoint((((info ->> 'lon'::text))::numeric)::double precision, (((info ->> 'lat'::text))::numeric)::double precision), 4326) <-> '0101000020E61000000000000000005E400000000000805140'::geometry)
create table t (id int, a float4, b float4, c float4, d float4);
create index idx_t on t ((a+b+c+d));
explain select * from t where a+b+c+d=400;
Index Scan using idx_t on t (cost=0.15..7.99 rows=8 width=20)
Index Cond: ((((a + b) + c) + d) = '400'::double precision)
postgres=# select * from pg_stats where tablename='idx_t';
-[ RECORD 1 ]----------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
schemaname | public
tablename | idx_t
attname | expr
inherited | f
null_frac | 0
avg_width | 4
n_distinct | -0.9994
most_common_vals | {151.73206,180.91998,197.2688,200.11456,204.47366,223.13992}
most_common_freqs | {0.0002,0.0002,0.0002,0.0002,0.0002,0.0002}
histogram_bounds | {25.474722,71.74933,85.48342,93.977295,99.26418,104.8926,110.16269,114.38039,118.075554,121.30721,124.80748,127.81897,130.67479,133.32335,136.02103,138.41626,140.85258,143.05424,145.51877,147.9408,149.95238,151.72961,153.72885,155.82372,157.6345,159.48929,161.0307,162.76514,164.57907,166.19772,167.8121,169.29343,171.28735,173.25894,174.89429,176.23984,177.65022,179.2883,180.66162,182.22772,183.88147,185.28021,186.64587,188.12837,189.66924,191.4691,192.80214,194.05939,195.64655,197.10524,198.36841,199.72656,201.35751,203.02931,204.50558,205.91415,207.49933,209.28078,210.977,212.39197,214.18248,215.5002,217.03229,218.55179,220.12622,221.61935,223.03786,224.73047,226.53156,228.12646,229.62404,231.14334,232.95035,234.51816,236.07428,237.84808,239.52545,241.77795,243.91528,246.18135,248.33812,250.06604,252.14948,254.52863,257.24,260.0845,262.6031,265.53894,268.5458,271.4497,275.00317,278.2635,281.67947,286.42548,290.9062,295.2775,301.70978,307.6002,317.32483,328.39,375.6833}
correlation | -0.026684083
most_common_elems |
most_common_elem_freqs |
elem_count_histogram |