问题来自云栖问答,觉得比较好,所以记录一下:
我的项目用的是MySQL,但也想同时请教下在Oracle、SqlServer中应该如何处理如下问题:
有数据表如下所示:
希望从表中抽取数据,要求两条记录的时间间隔至少2分钟
对下面图片中的数据而言,假如我查询的时间范围是从2014-08-10 23:20:00开始的,
则希望抽取到如下结果集:
'83', '57', '10041', '74.27', '0', '2014-08-10 23:20:04'
'113', '57', '10041', '59.25', '0', '2014-08-10 23:22:06'
'145', '57', '10041', '96.21', '0', '2014-08-10 23:24:07'
'177', '57', '10041', '34.16', '0', '2014-08-10 23:26:08'
'209', '57', '10041', '39.11', '0', '2014-08-10 23:28:09'
真实的场景是:传感器每隔30秒左右会向数据库里写一条记录,我要取N天的数据绘图,如果一次性查询N天的记录再进行抽稀的话,由于结果集太大,循环次数过多,导致时耗严重。我希望能通过sql语句直接在数据库层面进行数据抽稀,程序里要处理的数据就会少很多。
问题就是,应该如何写SQL语句?
对于PostgreSQL数据库来说,这个需求很简单,写个函数就可以搞定。
例子:
digoal=# create table test(id serial, crt_time timestamp);
CREATE TABLE
digoal=# insert into test (crt_time) select generate_series(now(),now()+interval '10 min', interval '30 sec');
INSERT 0 21
digoal=# select * from test;
id | crt_time
----+----------------------------
1 | 2016-04-12 10:25:08.696388
2 | 2016-04-12 10:25:38.696388
3 | 2016-04-12 10:26:08.696388
4 | 2016-04-12 10:26:38.696388
5 | 2016-04-12 10:27:08.696388
6 | 2016-04-12 10:27:38.696388
7 | 2016-04-12 10:28:08.696388
8 | 2016-04-12 10:28:38.696388
9 | 2016-04-12 10:29:08.696388
10 | 2016-04-12 10:29:38.696388
11 | 2016-04-12 10:30:08.696388
12 | 2016-04-12 10:30:38.696388
13 | 2016-04-12 10:31:08.696388
14 | 2016-04-12 10:31:38.696388
15 | 2016-04-12 10:32:08.696388
16 | 2016-04-12 10:32:38.696388
17 | 2016-04-12 10:33:08.696388
18 | 2016-04-12 10:33:38.696388
19 | 2016-04-12 10:34:08.696388
20 | 2016-04-12 10:34:38.696388
21 | 2016-04-12 10:35:08.696388
(21 rows)
create or replace function get_sparse_data(b timestamp, e timestamp, sparse interval, lmt int) returns setof test as
$$
declare
res test;
rec test;
cn int := 0;
begin
for rec in select * from test where crt_time between b and e order by crt_time loop
if res is null or rec.crt_time - res.crt_time >= sparse then
res := rec;
cn := cn+1;
return next res;
end if;
if cn >= lmt then
return;
end if;
end loop;
end;
$$
language plpgsql;
digoal=# select get_sparse_data('2016-04-12 10:26:38.696388', '2016-04-12 10:34:08.696388', '1 min', 5);
get_sparse_data
-----------------------------------
(4,"2016-04-12 10:26:38.696388")
(6,"2016-04-12 10:27:38.696388")
(8,"2016-04-12 10:28:38.696388")
(10,"2016-04-12 10:29:38.696388")
(12,"2016-04-12 10:30:38.696388")
(5 rows)
digoal=# select get_sparse_data('2016-04-12 10:26:38.696388', '2016-04-12 10:34:08.696388', '2 min', 5);
get_sparse_data
-----------------------------------
(4,"2016-04-12 10:26:38.696388")
(8,"2016-04-12 10:28:38.696388")
(12,"2016-04-12 10:30:38.696388")
(16,"2016-04-12 10:32:38.696388")
(4 rows)