实例:创建临时表,一步完成计数

简介: 创建临时表,一步完成计数

实例:创建临时表,一步完成计数

select year,count(1) from

(select regexp_extract(title, "(\\d{4})",1)as year from yiqianbin_movie where title regexp '\\d{4}') temp

group by year;

image.png

补充

image.png

image.png

杂七杂八

select * from yiqianbin_movie where title not regexp '\\d{4}';

create table movie_year as select *,substr(title,-5,4) as year from yiqianbin_movie;

create table movies_year as select *,regexp_extract(title, "(\\d{4})",1)as year from yiqianbin_movie where title regexp '\\d{4}';

select substr(title,-5,4)from yiqianbin_movie where title regexp '\\d{4}';

select year,count(1) from movies_year group by year;

查出括号中的四个数字

select regexp_extract(title,"\\((\\d{4})\\)") from yiqianbin_movie;


查出哪一年出的电影最多,即年份出现最多的数字对应的电影名

select year,count(1) as year1 from movies_year group by year order by year1 desc limit 1;

将多个电影类型分开

先分开

select explode(split(genres,'\\|')) as type from yiqianbin_movie;  

之后将分开的导入一个新表

create table movies_type as select explode(split(genres,'\\|')) as type from yiqianbin_movie;

将类型的多少按降序排下来

select type,count(1) as count from movies_type group by type order by count desc;

临时表(一步完成)

select type,count(1) as count from (select explode(split(genres,'\\|')) as type from yiqianbin_movie)temp group by type order by count desc;

查出评分次数最高的电影

select movieId,count(1) as count from yiqianbin_ratings group by movieId order by count desc limit 1;

select * from(select movieId,count(1) as count from yiqianbin_ratings group by movieId order by count desc limit 1)t left join yiqianbin_movie m on t.movieId = m.movieId;


什么类型的电影评分最高      第四行是一个视图查询

1.先连接查询,每部电影的评分+分类

2.分类  行专列

3.按类型分组统计                                  

select type,avg(rating) as avgrating from

(select t1.rating,type.col as type from(select r.rating,m.genres from yiqianbin_ratings r

left join yiqianbin_movie m on r.movieId = m.movieId)t1

lateral view explode(split(t1.genres,'\\|')) type)t2

group by type order by avgrating desc

limit 10;


按年份统计电影数量

create table movies_year_result(year string,num string);

insert into movies_year_result

select year,count(1) from

(select substr(title,-5,4) as year from yiqianbin_movie where title regexp '\\d{4}')t group by year;

什么类型电影最多

create table genres_top10_result(type string,num string);

insert into genres_top10_result

select type,count(1) as total from

(select explode(split(genres,'\\|')) as type from yiqianbin_movie)t group by type

order by total desc limit 10;

评分人气最高电影top10

create table hot_top10_result(title string,rating string);

insert into hot_top10_result

select m.title,t.total from (

select movieId,count(1) as total

from yiqianbin_ratings group by movieId

order by total desc limit 10)t

left join yiqianbin_movie m on t.movieId = m.movieId;

高频词top10

create table tag_top10_result(tag string,num string);

insert into tag_top10_result

select tag,count(1) as total from

(select explode(split(tag,'\\s')) as tag from yiqianbin_tags)t

group by tag

order by total desc limit 10;

评分最高电影top10

create table rating_top10_result(title string,rating string);

insert into rating_top10_result

select m.title,t.avgrating from(

select movieId,avg(rating) as avgrating

from yiqianbin_ratings

group by movieId

order by avgrating desc limit 10)t

left join yiqianbin_movie m on t.movieId = m.movieId;

目录
相关文章
|
小程序 JavaScript
小程序简单循环列表数据渲染实例
小程序简单循环列表数据渲染实例
88 0
|
3月前
|
SQL 监控 关系型数据库
大数量的DML时对索引处理的技巧
【8月更文挑战第15天】在执行大批量DML操作(如INSERT、UPDATE、DELETE)时,可通过禁用索引、分批处理、选用适宜的索引类型与结构以及持续监控调整等策略优化性能。禁用索引可加速数据修改,分批处理减轻系统负担,合理索引类型支持不同查询需求,并定期优化索引结构保持高效。全程监控确保适时调整策略,提升整体效能。
|
3月前
|
SQL 移动开发 Oracle
SQL查询连续六天数据记录的技巧与方法
在数据库查询中,实现针对连续几天(如连续六天)的数据筛选是一个常见且稍具挑战性的任务
|
5月前
|
SQL
sql语句按指定某个字段分组后删除重复数据只保留id最小/最大的一条数据
sql语句按指定某个字段分组后删除重复数据只保留id最小/最大的一条数据
124 0
|
关系型数据库 MySQL 索引