# 建表SQL,并修改向量列的存储格式为PLAN CREATE TABLE IF NOT EXISTS public.articles ( id INTEGER NOT NULL, sentence TEXT, vector REAL[], PRIMARY KEY(id) ) DISTRIBUTED BY(id); ALTER TABLE public.articles ALTER COLUMN vector SET STORAGE PLAIN; \d+ articles # 创建向量索引 CREATE INDEX ON public.articles USING ann (vector) WITH (dim = '384', hnsw_m = '100', pq_enable='0'); # 为了实现混合检索,为原表增加2列 ALTER TABLE public.articles add column username varchar(512); ALTER TABLE public.articles add column time timestamp without time zone; 改文件字符集 iconv -f GBK -t UTF-8 /usr/local/postgresql-11.5/sentences_vectors1.csv -o sentences_vectors.csv # 将sentences_vectors.csv中处理好的数据导入到表中 PGPASSWORD='Alibaba%1688' ./psql -U dbuser -p 5432 -h gp-xxx.com -d poc -c "COPY public.articles (id, sentence, vector,username,time) FROM STDIN WITH (FORMAT CSV, HEADER true, DELIMITER ',');" < /usr/local/postgresql-11.5/sentences_vectors.csv # 对混合查询关联的结构化与板结构化列建立索引 -- 对结构化字段建立BTREE索引 CREATE INDEX ON articles(time); - 添加tsvector 列,为了支持全文检索,通常需要添加一个 tsvector 列,用于存储文本的向量化表示 ALTER TABLE articles ADD COLUMN tsv_content TSVECTOR; - 更新 tsvector 列,使用 to_tsvector 函数将文本字段转换为 tsvector 格式,并更新到新列中 UPDATE articles SET tsv_content = to_tsvector(sentence); - 在 tsvector 列上创建 GIN 索引以加速全文检索 CREATE INDEX idx_articles_tsv_content ON articles USING GIN (tsv_content); - 查询全文索引 SELECT * FROM articles WHERE tsv_content @@ to_tsquery('make'); # 查询语句 SELECT id, sentence, cosine_similarity(vector, array(SELECT generate_series(1, 384))::real[]) AS score FROM articles WHERE time >= '2023-07-18 00:00:00' AND time <= '2023-08-18 00:00:00' AND tsv_content @@ to_tsquery('best') ORDER BY vector <=> array(SELECT generate_series(1, 384))::real[] LIMIT 10;
#创建表+向量索引
# 为了实现混合检索,为原表增加2列
#对结构化字段建立BTREE索引
#在 tsvector 列上创建 GIN 索引以加速全文检索
查询结果