现有用户登录表(user_active_log)一份,里面有2个字段:userID(用户ID),createdTime(登录时间戳),需要统计2021年12月连续登录7天的用户数量
第1步:选择12月的记录,根据用户iD和登录日期先去重(一天有多行的情况,只记录1行)
select userID,date(from_unixtime(createdTime)) a_createdTime
from user_active_log
where substr(date(from_unixtime(createdTime)),1,7) = '2021-12' # 知识点1:时间戳转为时间字符串格式然后取前7个字符
group by userId,date(from_unixtime(createdTime)) # 知识点2:根据userId,a_createdTime 去重
第2步:创建辅助列a_rk (每个userID下的日期排序值)
select userID,a_createdTime,row_number() over(partition by userId order by a_createdTime) a_rk # 知识点3:用row_number() 对每个userID下的a_createdTime进行排名
from
(
select userID,date(from_unixtime(createdTime)) a_createdTime
from user_active_log
where substr(from_unixtime(createdTime),1,7) = '2021-12'
group by userId,a_createdTime) t0
第3步:创建辅助列起步时间b_createdTime(用登录日期减去排序值,得到新时间列)
select *,date_sub(a_createdTime,interval a_rk day ) b_createdTime # 知识点4:date_sub 日期减去数字;datediff 日期减日期
from
(
select userID,a_createdTime,row_number() over(partition by userId order by a_createdTime) a_rk
from
(
select userID,date(from_unixtime(createdTime)) a_createdTime
from user_active_log
where substr(from_unixtime(createdTime),1,7) = '2021-12'
group by userId,a_createdTime) t0 )t1
第4步 根据起步时间列统计连续登录天数
select userId,b_createdTime,count(1) cts
from
(select *,date_sub(a_createdTime,interval a_rk day ) b_createdTime
from
(
select userID,a_createdTime,row_number() over(partition by userId order by a_createdTime) a_rk
from
(
select userID,date(from_unixtime(createdTime)) a_createdTime
from user_active_log
where substr(from_unixtime(createdTime),1,7) = '2021-12'
group by userId,a_createdTime) t0 ) t1 ) t2 group by userId,b_createdTime having count(1)>6 # 知识点5:having 用在groupby后做条件筛选
第5步 根据统计结果查询连续登录人数
select count(distinct userId) num_users
from
(select userId,b_createdTime,count(1) cts
from
(select *,date_sub(a_createdTime,interval a_rk day ) b_createdTime
from
(
select userID,a_createdTime,row_number() over(partition by userId order by a_createdTime) a_rk
from
(
select userID,date(from_unixtime(createdTime)) a_createdTime
from user_active_log
where substr(from_unixtime(createdTime),1,7) = '2021-12'
group by userId,a_createdTime
) t0
) t1
) t2 group by userId,b_createdTime having count(1)>6
) t3;