查询连续登陆3天以上用户(字节面试题)
create table userinfo2( `id` string, `date` string ) row format delimited fields terminated by '\t';
insert overwrite table userinfo2 values ('1','2024-01-01 00:00:00'), ('1','2024-01-02 00:00:00'), ('1','2024-01-03 00:00:00'), ('2','2024-02-01 00:00:00'), ('2','2024-02-02 00:00:00'), ('3','2024-03-04 00:00:00'), ('3','2024-03-05 00:00:00'), ('3','2024-03-06 00:00:00'), ('3','2024-03-07 00:00:00');
第一步去重
select id,substring(`date`,1,10) from userinfo2 group by id,substring(`date`,1,10) 去重能用group by,别用distinct,效率太低
select distinct id from (select id, date_add(data,2) as nextday, lead(data,2,0) over (partition by id order by data) as nextorder from (select id,substring(`date`,1,10) as data from userinfo2 group by id,substring(`date`,1,10))t1)t2 where nextday=nextorder
select id,date1,count(*) as day_cnt from (select id,data, date_add(data,-row_number() over (partition by id order by data)) as date1 from (select id,substring(`date`,1,10) as data from userinfo2 group by id,substring(`date`,1,10))t1)t2 group by id,date1 having count(*)>3
查询连续登陆最大天数用户(字节面试题)
select max(day_cnt) from ( select id,date1, count(*) as day_cnt from (select id,data,date_add(data,-row_number() over (partition by id order by data)) as date1 from (select id,substring(`date`,1,10) as data from userinfo2 group by id,substring(`date`,1,10))t1)t2 group by id,date1 )t3
coalesce
波峰波谷
create table userinfo3( `id` string, `ds` string, `price` string ) row format delimited fields terminated by '\t'; insert overwrite table userinfo3 values ('1','2024-01-01','10001'), ('1','2024-01-03','1001'), ('1','2024-01-02','10002'), ('1','2024-01-04','999'), ('1','2024-01-05','1002'), ('1','2024-01-06','1003'), ('1','2024-01-07','1004'), ('1','2024-01-08','998'), ('1','2024-01-09','997'), ('1','2024-01-10','996'), ('2','2024-01-01','10001'), ('2','2024-01-02','10002'), ('2','2024-01-03','10003'), ('2','2024-01-04','10002'), ('2','2024-01-05','1002'), ('2','2024-01-06','1003'), ('2','2024-01-07','1004'), ('2','2024-01-08','998'), ('2','2024-01-09','997'), ('2','2024-01-10','996');
偏移量函数:lag
语法:lag(col,n,default_val)
返回值:字段类型
说明:往前第n行数据。
偏移量函数:lead
语法:lead(col,n, default_val)
返回值:字段类型
说明:往后第n行数据。
select id,ds,price,nvl( case when price>lag_price and price >lea_price then '波峰' when price<lag_price and price<lea_price then '波谷' end,0 )as price_type from (select id,price,ds ,lag(price,1,null) over (partition by id order by ds) as lag_price, lead(price,1,null) over (partition by id order by ds) as lea_price from userinfo3)t1