# 几道经典sql练习题

1: 留存率统计分析

select
log_day 日期,
count(user_id_day0) 新增数量,
count(user_id_day1)/count(user_id_day0) 次日留存率,
count(user_id_day2)/count(user_id_day0) 3日留存率,
count(user_id_day7)/count(user_id_day0) 7日留存率,
count(user_id_day30)/count(user_id_day0) 30日留存率
from(
select distinct log_day,
a.user_id_day0,
b.user_id as user_id_day1,
c.user_id as user_id_day3,
d.user_id as user_id_day7,
e.user_id as user_id_day30
from(
select distinct
Date(log_time) as log_day,
user_id as user_id_day0
from
group by user_id
order by log_day
)a
left join t_user_login b on datediff(Date(b.log_time),a.log_day)=1
and a.user_id_day0=b.user_id
left join t_user_login c on datediff(Date(c.log_time),a.log_day)=2
and a.user_id_day0=c.user_id
left join t_user_login d on datediff(Date(d.log_time),a.log_day)=6
and a.user_id_day0=d.user_id
left join t_user_login e on datediff(Date(e.log_time),a.log_day)=29
and a.user_id_day0=e.user_id
) temp
group by log_day

2:计算直播同时在线人数最大值

id            stt                 edt
1001 2021-06-14 12:12:12 2021-06-14 18:12:12
1003 2021-06-14 13:12:12 2021-06-14 16:12:12
1004 2021-06-14 13:15:12 2021-06-14 20:12:12
1002 2021-06-14 15:12:12 2021-06-14 16:12:12
1005 2021-06-14 15:18:12 2021-06-14 20:12:12
1001 2021-06-14 20:12:12 2021-06-14 23:12:12
1006 2021-06-14 21:12:12 2021-06-14 23:15:12
1007 2021-06-14 22:12:12 2021-06-14 23:10:12

1.将上线时间作为 +1，下线时间作为 -1
2.然后运用union all 合并两列
3.利用sum()over()对其分区排序，求出不同时间段的人数（每一个时间点都会对应一个总在线人数）
4.对时间进行分组，求出所有时间点中最大的同时在线总人数

-- 1) 对数据分类,在开始数据后添加正1,表示有主播上线,同时在关播数据后添加-1,表示有主播下线
select id , stt as dt , 1 as p from test
union all
select id , edt as dt ,  -1 as p from test;

1001    2021-06-14 12:12:12     1
1001    2021-06-14 18:12:12     -1
1001    2021-06-14 20:12:12     1
1001    2021-06-14 23:12:12     -1
1002    2021-06-14 15:12:12     1
1002    2021-06-14 16:12:12     -1
1003    2021-06-14 13:12:12     1
1003    2021-06-14 16:12:12     -1
1004    2021-06-14 13:15:12     1
1004    2021-06-14 20:12:12     -1
1005    2021-06-14 15:18:12     1
1005    2021-06-14 20:12:12     -1
1006    2021-06-14 21:12:12     1
1006    2021-06-14 23:15:12     -1
1007    2021-06-14 22:12:12     1
1007    2021-06-14 23:10:12     -1

1001 2021-06-14 12:12:12 1
1003 2021-06-14 13:12:12 1
1004 2021-06-14 13:15:12 1
1002 2021-06-14 15:12:12 1
1005 2021-06-14 15:18:12 1
1002 2021-06-14 16:12:12 -1
1003 2021-06-14 16:12:12 -1
1001 2021-06-14 18:12:12 -1
1001 2021-06-14 20:12:12 1
1004 2021-06-14 20:12:12 -1
1005 2021-06-14 20:12:12 -1
1006 2021-06-14 21:12:12 1
1007 2021-06-14 22:12:12 1
1007 2021-06-14 23:10:12 -1
1001 2021-06-14 23:12:12 -1
1006 2021-06-14 23:15:12 -1
-- 2) 按照时间排序,计算累加人数
select
t1.id,
t1.dt,
sum(p) over(partition by date_format(t1.dt, 'yyyy-MM-dd') order by t1.dt) sum_p
from (
select id , stt as dt , 1 as p from test
union all
select id , edt as dt ,  -1 as p from test
) t1

1001 2021-06-14 12:12:12 1
1003 2021-06-14 13:12:12 2
1004 2021-06-14 13:15:12 3
1002 2021-06-14 15:12:12 4
1005 2021-06-14 15:18:12 5
1002 2021-06-14 16:12:12 3
1003 2021-06-14 16:12:12 3
1001 2021-06-14 18:12:12 2
1001 2021-06-14 20:12:12 1
1004 2021-06-14 20:12:12 1
1005 2021-06-14 20:12:12 1
1006 2021-06-14 21:12:12 2
1007 2021-06-14 22:12:12 3
1007 2021-06-14 23:10:12 2
1001 2021-06-14 23:12:12 1
1006 2021-06-14 23:15:12 0
-- 3) 找出同时在线人数最大值
select
date_format(t2.dt,'yyyy-MM-dd') date,
max(sum_p) con
from(
select
t1.id,
t1.dt,
sum(p) over(partition by date_format(t1.dt, 'yyyy-MM-dd') order by t1.dt) sum_p
from (
select id , stt as dt , 1 as p from test
union all
select id , edt as dt ,  -1 as p from test
) t1
) t2
group by date_format(t2.dt,'yyyy-MM-dd');

构造辅助计数变量及累加变换思路进行求解。

3: 区间分段统计建表和数据准备：

球员    比赛场次    得分    地点    时间

create table tmp_tc.tmp_test_20230303 (
name String  comment '姓名',
number string comment '比赛场次',
score int comment '成绩',
dt String comment '日期'
)
stored as orc tblproperties ("orc.compress"="SNAPPY");
with tmp as (
select '张三' as name,'1' as number,30 as score,'北京' as address , '20220202' as dt union all
select '张三' as name,'2' as number,28 as score,'上海' as address , '20220207' as dt union all
select '张三' as name,'3' as number,36 as score,'广州' as address , '20220212' as dt union all
select '张三' as name,'4' as number,57 as score,'深圳' as address , '20220217' as dt union all
select '张三' as name,'5' as number,19 as score,'南京' as address , '20220222' as dt union all
select '张三' as name,'6' as number,22 as score,'武汉' as address , '20220227' as dt union all
select '张三' as name,'7' as number,32 as score,'成都' as address , '20220304' as dt union all
select '张三' as name,'8' as number,23 as score,'厦门' as address , '20220309' as dt
)
set spark.sql.shuffle.partitions=1;
insert overwrite table tmp_tc.tmp_test_20230303
select * from tmp_tc.tmp_test_20230303
distribute by rand()
;

with tmp as (
select *
, sum(score) over(partition by name order by dt) as total_score
, coalesce(sum(score) over(partition by name order by dt rows between unbounded preceding and 1 preceding),0) as total_score2
from tmp_tc.tmp_test_20230303
),
tmp2 as (
-- 区间分数两者之间 上半部分
select
name
, number
, score
, dt
, case when floor(total_score/100) = 0 then '1-100'
when floor(total_score/100) = 1 then '101-200'
when floor(total_score/100) = 2 then '201-300'
else '300+' end as tag
, (ceil(total_score2/100)*100-total_score2) as tag_score
from tmp
where floor(total_score2/100) != floor(total_score/100)
union all
-- 区间分数两者之间 下半部分
select
name
, number
, score
, dt
, case when floor(total_score/100) = 0 then '1-100'
when floor(total_score/100) = 1 then '101-200'
when floor(total_score/100) = 2 then '201-300'
else '300+' end  as tag
, (score - (ceil(total_score2/100)*100-total_score2)) as tag_score
from tmp
where floor(total_score2/100) != floor(total_score/100)
union all
-- 区间内分数
select
name
, number
, score
, dt
, case when floor(total_score/100) = 0 then '1-100'
when floor(total_score/100) = 1 then '101-200'
when floor(total_score/100) = 2 then '201-300'
else '300+' end  as tag
, score as tag_score
from tmp
where !(floor(total_score2/100) != floor(total_score/100))
)
select
name       -- 球员
, number    -- 比赛场次
, score        -- 得分
, dt              -- 比赛时间
, tag            -- 得分区间
, tag_score  -- 区间内分数
from tmp2
order by dt, tag_score
;

4:hive中怎么统计array中非零的个数？

## 方案一：
select length(translate(concat_ws(',',array('0','1','3','6','0')),',0',''));
## 方案二：
select count(a)
from
(
select explode(array('0','1','3','6','0'))  as a
) t
where a!='0'

5：断点重分组问题分析？

## 数据准备
uid      start_time           end_time       num
1  2020-02-18 14:20:30  2020-02-18 14:46:30  20
1  2020-02-18 14:47:20  2020-02-18 15:20:30  30
1  2020-02-18 15:37:23  2020-02-18 16:05:26  40
1  2020-02-18 16:06:27  2020-02-18 17:20:49  50
1  2020-02-18 17:21:50  2020-02-18 18:03:27  60
2  2020-02-18 14:18:24  2020-02-18 15:01:40  20
2  2020-02-18 15:20:49  2020-02-18 15:30:24  30
2  2020-02-18 16:01:23  2020-02-18 16:40:32  40
2  2020-02-18 16:44:56  2020-02-18 17:40:52  50
3  2020-02-18 14:39:58  2020-02-18 15:35:53  20
3  2020-02-18 15:36:39  2020-02-18 15:24:54  30
## 分析：关键在正确分组
1,   2020-02-18 14:20:30,   2020-02-18 14:46:30,20    ,0   ,0
1,   2020-02-18 14:47:20,   2020-02-18 15:20:30,30    ,0   ,0
1,   2020-02-18 15:37:23,   2020-02-18 16:05:26,40    ,1   ,1
1,   2020-02-18 16:06:27,   2020-02-18 17:20:49,50    ,0   ,1
1,   2020-02-18 17:21:50,   2020-02-18 18:03:27,60    ,0   ,1
1：将end_time往下拉取一行，重新起个名字pre_end_time
2：pre_end_time-start_time<10,记为0用作标识，否则记为1，1即为变化的分割点
3：通过sum over() 将分割的段落再次转换，即可得到正确的可用于分组的标记，依此作为分组条件

WITH tmp as (
SELECT
uid,
start_time,
end_time,
lag(end_time,1,null) over(partition by uid order by start_time) as pre_end_time
FROM t
)
SELECT
uid,
min(start_time) as start_time,
max(end_time) as end_time,
sum(num) as amount
FROM
(
SELECT
uid,
start_time,
end_time,
num,
sum(flag) over(partition by uid order by start_time rows between unbounded preceding and current row) as groupid
FROM
(
SELECT
uid,
start_time,
end_time,
num,
if(unix_timestamp(start_time) - nvl(unix_timestamp(pre_end_time),unix_timestamp(start_time))< 10*60,0,1) as flag
FROM tmp
) o1
)o2
GROUP BY uid,groupid

1  2020-02-18 14:20:30  2020-02-18 15:20:30  50
1  2020-02-18 15:37:23  2020-02-18 18:03:27  150
2  2020-02-18 14:18:24  2020-02-18 15:01:40  20
2  2020-02-18 15:20:49  2020-02-18 15:30:24  30
2  2020-02-18 16:01:23  2020-02-18 17:40:52  90
3  2020-02-18 14:39:58  2020-02-18 15:24:54  50

6：SQL重叠交叉区间问题分析？

pName   sst         et
oppo 2021-06-05 2021-06-09
oppo 2021-06-11 2021-06-21
vivo 2021-06-05 2021-06-15
vivo 2021-06-09 2021-06-21
redmi 2021-06-05 2021-06-21
redmi 2021-06-09 2021-06-15
redmi 2021-06-17 2021-06-26
huawei 2021-06-05 2021-06-26
huawei 2021-06-09 2021-06-15
huawei 2021-06-17 2021-06-21

思路：

（1）根据相差的天数生成序列值。（索引值）
根据时间差值生成相应的空格字符串，然后通过split()函数解析，最终根据posexplode()函数生成对应的索引值。当然此处也可以用repeat()函数代替space()函数。由于split()函数解析的时候会生成多余的空串（''），所以具体操作的时候过滤掉为0的索引。具体生成索引的SQL如下：
select
id,
stt,
edt,
t.pos
from(
select id,stt,edt from brand
) tmp lateral view posexplode(
split(space(datediff(edt, stt)+1), '')
) t as pos, val
where t.pos <> 0

oppo 2021-06-05 2021-06-09 1
oppo 2021-06-05 2021-06-09 2
oppo 2021-06-05 2021-06-09 3
oppo 2021-06-05 2021-06-09 4
oppo 2021-06-05 2021-06-09 5
oppo 2021-06-11 2021-06-21 1
oppo 2021-06-11 2021-06-21 2
oppo 2021-06-11 2021-06-21 3
oppo 2021-06-11 2021-06-21 4
oppo 2021-06-11 2021-06-21 5
oppo 2021-06-11 2021-06-21 6
oppo 2021-06-11 2021-06-21 7
oppo 2021-06-11 2021-06-21 8
oppo 2021-06-11 2021-06-21 9
oppo 2021-06-11 2021-06-21 10
oppo 2021-06-11 2021-06-21 11
vivo 2021-06-05 2021-06-15 1
vivo 2021-06-05 2021-06-15 2
vivo 2021-06-05 2021-06-15 3
vivo 2021-06-05 2021-06-15 4
vivo 2021-06-05 2021-06-15 5
vivo 2021-06-05 2021-06-15 6
vivo 2021-06-05 2021-06-15 7
vivo 2021-06-05 2021-06-15 8
vivo 2021-06-05 2021-06-15 9
vivo 2021-06-05 2021-06-15 10
vivo 2021-06-05 2021-06-15 11
vivo 2021-06-09 2021-06-21 1
vivo 2021-06-09 2021-06-21 2
vivo 2021-06-09 2021-06-21 3
vivo 2021-06-09 2021-06-21 4
vivo 2021-06-09 2021-06-21 5
vivo 2021-06-09 2021-06-21 6
vivo 2021-06-09 2021-06-21 7
vivo 2021-06-09 2021-06-21 8
vivo 2021-06-09 2021-06-21 9
vivo 2021-06-09 2021-06-21 10
vivo 2021-06-09 2021-06-21 11
vivo 2021-06-09 2021-06-21 12
vivo 2021-06-09 2021-06-21 13
redmi 2021-06-05 2021-06-21 1
redmi 2021-06-05 2021-06-21 2
redmi 2021-06-05 2021-06-21 3
redmi 2021-06-05 2021-06-21 4
redmi 2021-06-05 2021-06-21 5
redmi 2021-06-05 2021-06-21 6
redmi 2021-06-05 2021-06-21 7
redmi 2021-06-05 2021-06-21 8
redmi 2021-06-05 2021-06-21 9
redmi 2021-06-05 2021-06-21 10
redmi 2021-06-05 2021-06-21 11
redmi 2021-06-05 2021-06-21 12
redmi 2021-06-05 2021-06-21 13
redmi 2021-06-05 2021-06-21 14
redmi 2021-06-05 2021-06-21 15
redmi 2021-06-05 2021-06-21 16
redmi 2021-06-05 2021-06-21 17
redmi 2021-06-09 2021-06-15 1
redmi 2021-06-09 2021-06-15 2
redmi 2021-06-09 2021-06-15 3
redmi 2021-06-09 2021-06-15 4
redmi 2021-06-09 2021-06-15 5
redmi 2021-06-09 2021-06-15 6
redmi 2021-06-09 2021-06-15 7
redmi 2021-06-17 2021-06-26 1
redmi 2021-06-17 2021-06-26 2
redmi 2021-06-17 2021-06-26 3
redmi 2021-06-17 2021-06-26 4
redmi 2021-06-17 2021-06-26 5
redmi 2021-06-17 2021-06-26 6
redmi 2021-06-17 2021-06-26 7
redmi 2021-06-17 2021-06-26 8
redmi 2021-06-17 2021-06-26 9
redmi 2021-06-17 2021-06-26 10
huawei 2021-06-05 2021-06-26 1
huawei 2021-06-05 2021-06-26 2
huawei 2021-06-05 2021-06-26 3
huawei 2021-06-05 2021-06-26 4
huawei 2021-06-05 2021-06-26 5
huawei 2021-06-05 2021-06-26 6
huawei 2021-06-05 2021-06-26 7
huawei 2021-06-05 2021-06-26 8
huawei 2021-06-05 2021-06-26 9
huawei 2021-06-05 2021-06-26 10
huawei 2021-06-05 2021-06-26 11
huawei 2021-06-05 2021-06-26 12
huawei 2021-06-05 2021-06-26 13
huawei 2021-06-05 2021-06-26 14
huawei 2021-06-05 2021-06-26 15
huawei 2021-06-05 2021-06-26 16
huawei 2021-06-05 2021-06-26 17
huawei 2021-06-05 2021-06-26 18
huawei 2021-06-05 2021-06-26 19
huawei 2021-06-05 2021-06-26 20
huawei 2021-06-05 2021-06-26 21
huawei 2021-06-05 2021-06-26 22
huawei 2021-06-09 2021-06-15 1
huawei 2021-06-09 2021-06-15 2
huawei 2021-06-09 2021-06-15 3
huawei 2021-06-09 2021-06-15 4
huawei 2021-06-09 2021-06-15 5
huawei 2021-06-09 2021-06-15 6
huawei 2021-06-09 2021-06-15 7
huawei 2021-06-17 2021-06-21 1
huawei 2021-06-17 2021-06-21 2
huawei 2021-06-17 2021-06-21 3
huawei 2021-06-17 2021-06-21 4
huawei 2021-06-17 2021-06-21 5
（2） 根据索引生成时间补齐所有时间段值
select
id,
stt,
edt,
pos-1,
from(
select id,stt,edt from brand
) tmp lateral view posexplode(
split(space(datediff(edt, stt)+1), '')
) t as pos, val
where pos <> 0

oppo 2021-06-05 2021-06-09 0 2021-06-05
oppo 2021-06-05 2021-06-09 1 2021-06-06
oppo 2021-06-05 2021-06-09 2 2021-06-07
oppo 2021-06-05 2021-06-09 3 2021-06-08
oppo 2021-06-05 2021-06-09 4 2021-06-09
oppo 2021-06-11 2021-06-21 0 2021-06-11
oppo 2021-06-11 2021-06-21 1 2021-06-12
oppo 2021-06-11 2021-06-21 2 2021-06-13
oppo 2021-06-11 2021-06-21 3 2021-06-14
oppo 2021-06-11 2021-06-21 4 2021-06-15
oppo 2021-06-11 2021-06-21 5 2021-06-16
oppo 2021-06-11 2021-06-21 6 2021-06-17
oppo 2021-06-11 2021-06-21 7 2021-06-18
oppo 2021-06-11 2021-06-21 8 2021-06-19
oppo 2021-06-11 2021-06-21 9 2021-06-20
oppo 2021-06-11 2021-06-21 10 2021-06-21
vivo 2021-06-05 2021-06-15 0 2021-06-05
vivo 2021-06-05 2021-06-15 1 2021-06-06
vivo 2021-06-05 2021-06-15 2 2021-06-07
vivo 2021-06-05 2021-06-15 3 2021-06-08
vivo 2021-06-05 2021-06-15 4 2021-06-09
vivo 2021-06-05 2021-06-15 5 2021-06-10
vivo 2021-06-05 2021-06-15 6 2021-06-11
vivo 2021-06-05 2021-06-15 7 2021-06-12
vivo 2021-06-05 2021-06-15 8 2021-06-13
vivo 2021-06-05 2021-06-15 9 2021-06-14
vivo 2021-06-05 2021-06-15 10 2021-06-15
vivo 2021-06-09 2021-06-21 0 2021-06-09
vivo 2021-06-09 2021-06-21 1 2021-06-10
vivo 2021-06-09 2021-06-21 2 2021-06-11
vivo 2021-06-09 2021-06-21 3 2021-06-12
vivo 2021-06-09 2021-06-21 4 2021-06-13
vivo 2021-06-09 2021-06-21 5 2021-06-14
vivo 2021-06-09 2021-06-21 6 2021-06-15
vivo 2021-06-09 2021-06-21 7 2021-06-16
vivo 2021-06-09 2021-06-21 8 2021-06-17
vivo 2021-06-09 2021-06-21 9 2021-06-18
vivo 2021-06-09 2021-06-21 10 2021-06-19
vivo 2021-06-09 2021-06-21 11 2021-06-20
vivo 2021-06-09 2021-06-21 12 2021-06-21
redmi 2021-06-05 2021-06-21 0 2021-06-05
redmi 2021-06-05 2021-06-21 1 2021-06-06
redmi 2021-06-05 2021-06-21 2 2021-06-07
redmi 2021-06-05 2021-06-21 3 2021-06-08
redmi 2021-06-05 2021-06-21 4 2021-06-09
redmi 2021-06-05 2021-06-21 5 2021-06-10
redmi 2021-06-05 2021-06-21 6 2021-06-11
redmi 2021-06-05 2021-06-21 7 2021-06-12
redmi 2021-06-05 2021-06-21 8 2021-06-13
redmi 2021-06-05 2021-06-21 9 2021-06-14
redmi 2021-06-05 2021-06-21 10 2021-06-15
redmi 2021-06-05 2021-06-21 11 2021-06-16
redmi 2021-06-05 2021-06-21 12 2021-06-17
redmi 2021-06-05 2021-06-21 13 2021-06-18
redmi 2021-06-05 2021-06-21 14 2021-06-19
redmi 2021-06-05 2021-06-21 15 2021-06-20
redmi 2021-06-05 2021-06-21 16 2021-06-21
redmi 2021-06-09 2021-06-15 0 2021-06-09
redmi 2021-06-09 2021-06-15 1 2021-06-10
redmi 2021-06-09 2021-06-15 2 2021-06-11
redmi 2021-06-09 2021-06-15 3 2021-06-12
redmi 2021-06-09 2021-06-15 4 2021-06-13
redmi 2021-06-09 2021-06-15 5 2021-06-14
redmi 2021-06-09 2021-06-15 6 2021-06-15
redmi 2021-06-17 2021-06-26 0 2021-06-17
redmi 2021-06-17 2021-06-26 1 2021-06-18
redmi 2021-06-17 2021-06-26 2 2021-06-19
redmi 2021-06-17 2021-06-26 3 2021-06-20
redmi 2021-06-17 2021-06-26 4 2021-06-21
redmi 2021-06-17 2021-06-26 5 2021-06-22
redmi 2021-06-17 2021-06-26 6 2021-06-23
redmi 2021-06-17 2021-06-26 7 2021-06-24
redmi 2021-06-17 2021-06-26 8 2021-06-25
redmi 2021-06-17 2021-06-26 9 2021-06-26
huawei 2021-06-05 2021-06-26 0 2021-06-05
huawei 2021-06-05 2021-06-26 1 2021-06-06
huawei 2021-06-05 2021-06-26 2 2021-06-07
huawei 2021-06-05 2021-06-26 3 2021-06-08
huawei 2021-06-05 2021-06-26 4 2021-06-09
huawei 2021-06-05 2021-06-26 5 2021-06-10
huawei 2021-06-05 2021-06-26 6 2021-06-11
huawei 2021-06-05 2021-06-26 7 2021-06-12
huawei 2021-06-05 2021-06-26 8 2021-06-13
huawei 2021-06-05 2021-06-26 9 2021-06-14
huawei 2021-06-05 2021-06-26 10 2021-06-15
huawei 2021-06-05 2021-06-26 11 2021-06-16
huawei 2021-06-05 2021-06-26 12 2021-06-17
huawei 2021-06-05 2021-06-26 13 2021-06-18
huawei 2021-06-05 2021-06-26 14 2021-06-19
huawei 2021-06-05 2021-06-26 15 2021-06-20
huawei 2021-06-05 2021-06-26 16 2021-06-21
huawei 2021-06-05 2021-06-26 17 2021-06-22
huawei 2021-06-05 2021-06-26 18 2021-06-23
huawei 2021-06-05 2021-06-26 19 2021-06-24
huawei 2021-06-05 2021-06-26 20 2021-06-25
huawei 2021-06-05 2021-06-26 21 2021-06-26
huawei 2021-06-09 2021-06-15 0 2021-06-09
huawei 2021-06-09 2021-06-15 1 2021-06-10
huawei 2021-06-09 2021-06-15 2 2021-06-11
huawei 2021-06-09 2021-06-15 3 2021-06-12
huawei 2021-06-09 2021-06-15 4 2021-06-13
huawei 2021-06-09 2021-06-15 5 2021-06-14
huawei 2021-06-09 2021-06-15 6 2021-06-15
huawei 2021-06-17 2021-06-21 0 2021-06-17
huawei 2021-06-17 2021-06-21 1 2021-06-18
huawei 2021-06-17 2021-06-21 2 2021-06-19
huawei 2021-06-17 2021-06-21 3 2021-06-20
huawei 2021-06-17 2021-06-21 4 2021-06-21
（3） 去掉重复值，计算剩余点个数。
select
id,
from(
select id,stt,edt from brand
) tmp lateral view posexplode(
split(space(datediff(edt, stt)+1), '')
) t as pos, val
where t.pos <> 0
group by id

select
id,
from(
select id,stt,edt from brand
) tmp lateral view posexplode(
split(repeat('a',datediff(edt, stt)+1), 'a(?!a\$)')
) t as pos, val
group by id

7：SQL之存在性问题分析

# 数据如下：
dt       stu_id
2020-01-02 1001
2020-01-02 1002
2020-02-02 1001
2020-02-02 1002
2020-02-02 1003
2020-02-02 1004
2020-03-02 1001
2020-03-02 1002
2020-04-02 1005
2020-05-02 1006

2020-01         0
2020-02         0
2020-03         2
2020-04         4
2020-05         5

dt       stu_id
2020-01 1001
2020-01 1002
2020-02 1001
2020-02 1002
2020-02 1003
2020-02 1004
2020-03 1001
2020-03 1002
2020-04 1005
2020-05 1006
left join
2020-01 [1001 1002]         [1001 1002 1003 1004]
2020-02 [1001 1002 1003 1004]  [1001 1002]
2020-03 [1001 1002]  [1005]
2020-04 [1005] [1006]
2020-04 [1006]  NULL
====>
2020-01 1001 2020-01 [1001,1002,1003,1004]
2020-01 1002 2020-01 [1001,1002,1003,1004]
2020-02 1001 2020-02 [1001,1002]
2020-02 1002 2020-02 [1001,1002]
2020-02 1003 2020-02 [1001,1002]
2020-02 1004 2020-02 [1001,1002]
2020-03 1001 2020-03 [1005]
2020-03 1002 2020-03 [1005]
2020-04 1005 2020-04 [1006]
2020-05 1006 2020-05 NULL
====>
sum(if(!array_contains(col1，col2),1,0)) over(partition by xxx)
2020-01 0
2020-02 2
2020-03 2
2020-04 1
2020-05 1
===> lag over()
2020-01 0    0
2020-02 2    0
2020-03 2    2
2020-04 1    2
2020-05 1    1
===> sum() over()
OK
2020-01 0
2020-02 0
2020-03 2
2020-04 4
2020-05 5

select month
,sum(lag_month_cnt) over(order by month)
from(
select month
,lag(next_month_cnt,1,0) over(order by month) as lag_month_cnt
from(
select distinct t0.month as month
,sum(if(!array_contains(t1.lag_stu_id_arr,t0.stu_id),1,0)) over(partition by t0.month) as next_month_cnt
from
(select substr(day,1,7) as month
,stu_id
from stu) t0
left join
(
select month
,lead(stu_id_arr,1) over(order by month) as lag_stu_id_arr
from(
select substr(day,1,7) as month
,collect_list(stu_id) as stu_id_arr
from stu
group by substr(day,1,7)
) m
) t1
on t0.month = t1.month
) n
) o

8：水位线思想在解决SQL复杂场景问题中的应用与研究？

解决思路一：sum(id) over(order by ts)思想取出值不为0的，即为需要的结果。
select *
from
(select *
,sum(coalesce(id,0)) over(order by ts) as water_mark
from test01
) t
where water_mark !=0

11：SQL之定位连续区间的起始位置和结束位置？

Logs 表：
+------------+
| log_id     |
+------------+
| 1          |
| 2          |
| 3          |
| 7          |
| 8          |
| 10         |
+------------+

+------------+--------------+
| start_id   | end_id       |
+------------+--------------+
| 1          | 3            |
| 7          | 8            |
| 10         | 10           |
+------------+--------------+

log_id  rn1   rn
1       1     0
2       2     0
3       3     0
7       4     3
8       5     3
10      6     4

select
min(a.log_id) start_id,
max(a.log_id) end_id
from
(
select
a.log_id,
a.log_id - row_number() over(order by a.log_id) rn
from Logs a
)a
group by a.rn;

|
SQL 关系型数据库 MySQL

19563 0
|
SQL
sql查询练习题（下）
sql查询练习题
411 0
|
SQL
sql查询练习题（上）
sql查询练习题
494 0
|
SQL 关系型数据库 MySQL
SQL练习题--5.6和5.7版本的Group by 用法以及中间表使用
M-统计每个老师教授课程的学生总数-if(expr1,expr2)
311 0
|
SQL 运维 关系型数据库
AnalyticDB PostgreSQL常用运维SQL与命令练习题
AnalyticDB PostgreSQL常用运维SQL与命令练习题
188 0
|
SQL 存储 数据库
《SQL与关系数据库理论——如何编写健壮的SQL代码》一1.11 练习题

1052 0
|
SQL Python
SQL练习题1
create table student( sno varchar2(10) primary key, sname varchar2(20), sage number(2), ssex varchar2(5) ); create table teacher( tno varchar2(10)...
1010 0
|
6天前
|
SQL 人工智能 算法
【SQL server】玩转SQL server数据库：第二章 关系数据库
【SQL server】玩转SQL server数据库：第二章 关系数据库
42 10
|
1月前
|
SQL 数据库 数据安全/隐私保护
Sql Server数据库Sa密码如何修改
Sql Server数据库Sa密码如何修改
23 0
|
16天前
|
SQL

15 0