hive查询的相关示例

简介: hive查询的相关示例

hive基本查询

--hive基本查询
select * from a;
select max(ip) from a;
select count(*) from a;

hive 条件查询

--hive 条件查询
select *
from t_pv_log
where access_time>'2020-11-26 15:31:33';

关联查询

1.内连接

--1.内连接(笛卡尔积),右边字段依次和左边的连一遍
select t_a.*,t_b.*
from a t_a inner join b t_b;
+---------+-----------+---------+-----------+--+
| t_a.id  | t_a.name  | t_b.id  | t_b.name  |
+---------+-----------+---------+-----------+--+
| 1       | a         | 2       | bb        |
| 1       | a         | 3       | cc        |
| 1       | a         | 7       | yy        |
| 1       | a         | 9       | pp        |
| 2       | b         | 2       | bb        |
| 2       | b         | 3       | cc        |
| 2       | b         | 7       | yy        |
| 2       | b         | 9       | pp        |
| 3       | c         | 2       | bb        |
| 3       | c         | 3       | cc        |
| 3       | c         | 7       | yy        |
| 3       | c         | 9       | pp        |
| 4       | d         | 2       | bb        |
| 4       | d         | 3       | cc        |
| 4       | d         | 7       | yy        |
| 4       | d         | 9       | pp        |
| 7       | y         | 2       | bb        |
| 7       | y         | 3       | cc        |
| 7       | y         | 7       | yy        |
| 7       | y         | 9       | pp        |
| 8       | u         | 2       | bb        |
| 8       | u         | 3       | cc        |
| 8       | u         | 7       | yy        |
| 8       | u         | 9       | pp        |
+---------+-----------+---------+-----------+--+
--指定条件 
select t_a.*,t_b.*
from a t_a inner join b t_b
on t_a.id=t_b.id;
+---------+-----------+---------+-----------+--+
| t_a.id  | t_a.name  | t_b.id  | t_b.name  |
+---------+-----------+---------+-----------+--+
| 2       | b         | 2       | bb        |
| 3       | c         | 3       | cc        |
| 7       | y         | 7       | yy        |
+---------+-----------+---------+-----------+--+

2.左连接

--2.左外连接(左连接),左边字段依次和右边的连一遍
select a.*,b.*
from a left join b
on a.id=b.id;
+-------+---------+-------+---------+--+
| a.id  | a.name  | b.id  | b.name  |
+-------+---------+-------+---------+--+
| 1     | a       | NULL  | NULL    |
| 2     | b       | 2     | bb      |
| 3     | c       | 3     | cc      |
| 4     | d       | NULL  | NULL    |
| 7     | y       | 7     | yy      |
| 8     | u       | NULL  | NULL    |
+-------+---------+-------+---------+--+
--加条件:保留左边所有字段,右边没有符合左边字段的就为null

3.右连接

--3.右外连接(右连接),不加条件和内联接查询结果相同
select a.*,b.*
from a right join b
on a.id=b.id;
+-------+---------+-------+---------+--+
| a.id  | a.name  | b.id  | b.name  |
+-------+---------+-------+---------+--+
| 2     | b       | 2     | bb      |
| 3     | c       | 3     | cc      |
| 7     | y       | 7     | yy      |
| NULL  | NULL    | 9     | pp      |
+-------+---------+-------+---------+--+
--加条件:保留右边所有字段,左边没有符合右边字段的就为null

4.全外连接

--4.全外连接
select a.*,b.*
from a full outer join b
on a.id=b.id;
+-------+---------+-------+---------+--+
| a.id  | a.name  | b.id  | b.name  |
+-------+---------+-------+---------+--+
| 1     | a       | NULL  | NULL    |
| 2     | b       | 2     | bb      |
| 3     | c       | 3     | cc      |
| 4     | d       | NULL  | NULL    |
| 7     | y       | 7     | yy      |
| 8     | u       | NULL  | NULL    |
| NULL  | NULL    | 9     | pp      |
+-------+---------+-------+---------+--+
--加条件:左边右边字段都会保存

5.左半连接(hive独有)

--5.左半连接(hive独有)
select a.*
from a left semi join b
on a.id=b.id;
+-------+---------+--+
| a.id  | a.name  |
+-------+---------+--+
| 2     | b       |
| 3     | c       |
| 7     | y       |
+-------+---------+--+
--查询得到与右表相同字段数据的左表数据(故select子句中不能有右表字段)

group by 分组聚合查询

select upper("abcde");--转大写函数
-- 针对每一行计算,指定字段的每行数据都会变
select ip,upper(url),access_time
from t_pv_log;
--求每条url的访问总次数
+---------------+--------------------+-----------------------+---------------+--+
|  t_pv_log.ip  |    t_pv_log.url    | t_pv_log.access_time  | t_pv_log.day  |
+---------------+--------------------+-----------------------+---------------+--+
| 192.168.22.1  | http://sina.com/a  | 2020-11-26 15:31:23   | 20201126      |
| 192.168.22.4  | http://sina.com/a  | 2020-11-26 16:21:33   | 20201126      |
| 192.168.33.1  | http://sina.com/a  | 2020-11-26 17:11:43   | 20201126      |
| 192.168.22.1  | http://sina.com/a  | 2020-11-27 15:31:23   | 20201127      |
| 192.168.22.4  | http://sina.com/a  | 2020-11-27 16:21:33   | 20201127      |
| 192.168.33.1  | http://sina.com/a  | 2020-11-27 17:11:43   | 20201127      |
| 192.168.12.6  | http://sina.com/v  | 2020-11-26 15:31:33   | 20201126      |
| 192.168.12.6  | http://sina.com/v  | 2020-11-27 15:31:33   | 20201127      |
| 192.168.22.7  | http://sina.com/f  | 2020-11-26 15:31:36   | 20201126      |
| 192.168.22.7  | http://sina.com/f  | 2020-11-27 15:31:36   | 20201127      |
| 192.168.22.1  | http://sina.com/u  | 2020-11-26 15:31:33   | 20201126      |
| 192.168.22.1  | http://sina.com/u  | 2020-11-27 15:31:33   | 20201127      |
+---------------+--------------------+-----------------------+---------------+--+
select url,count(1) as cnts
from t_pv_log
group by url;
+--------------------+-------+--+
|        url         | cnts  |
+--------------------+-------+--+
| http://sina.com/a  | 6     |
| http://sina.com/f  | 2     |
| http://sina.com/u  | 2     |
| http://sina.com/v  | 2     |
+--------------------+-------+--+
--语法:group  by 字段(字段中相同的数据划分为一组)
--注:只能按组查询字段,故select 被分组的那个字段,对分好组的数据进行逐组运算。
--求每个url访问者中ip地址最大的
select url,max(ip)
from t_pv_log
group by url;
--max():分组聚合函数(对一组数据即多行进行运算)
+--------------------+---------------+--+
|        url         |      _c1      |
+--------------------+---------------+--+
| http://sina.com/a  | 192.168.33.1  |
| http://sina.com/f  | 192.168.22.7  |
| http://sina.com/u  | 192.168.22.1  |
| http://sina.com/v  | 192.168.12.6  |
+--------------------+---------------+--+
--求每个用户访问同一页面的所有记录中时间最晚的一条
--表分组的样子
+---------------+--------------------+-----------------------+---------------+--+
|  t_pv_log.ip  |    t_pv_log.url    | t_pv_log.access_time  | t_pv_log.day  |
+---------------+--------------------+-----------------------+---------------+--+
| 192.168.22.1  | http://sina.com/a  | 2020-11-26 15:31:23   | 20201126      |
| 192.168.22.1  | http://sina.com/a  | 2020-11-27 15:31:23   | 20201127      
| 192.168.22.4  | http://sina.com/a  | 2020-11-26 16:21:33   | 20201126      |
| 192.168.22.4  | http://sina.com/a  | 2020-11-27 16:21:33   | 20201127      |
| 192.168.33.1  | http://sina.com/a  | 2020-11-26 17:11:43   | 20201126      |
| 192.168.33.1  | http://sina.com/a  | 2020-11-27 17:11:43   | 20201127      |
| 192.168.12.6  | http://sina.com/v  | 2020-11-26 15:31:33   | 20201126      |
| 192.168.12.6  | http://sina.com/v  | 2020-11-27 15:31:33   | 20201127      |
| 192.168.22.7  | http://sina.com/f  | 2020-11-26 15:31:36   | 20201126      |
| 192.168.22.7  | http://sina.com/f  | 2020-11-27 15:31:36   | 20201127      |
| 192.168.22.1  | http://sina.com/u  | 2020-11-26 15:31:33   | 20201126      |
| 192.168.22.1  | http://sina.com/u  | 2020-11-27 15:31:33   | 20201127      |
+---------------+--------------------+-----------------------+---------------+--+
select ip,url,max(access_time) as time --查询的都是分组字段和聚合函数
from t_pv_log
group by ip,url;
+---------------+--------------------+----------------------+--+
|      ip       |        url         |         time         |
+---------------+--------------------+----------------------+--+
| 192.168.12.6  | http://sina.com/v  | 2020-11-27 15:31:33  |
| 192.168.22.1  | http://sina.com/a  | 2020-11-27 15:31:23  |
| 192.168.22.1  | http://sina.com/u  | 2020-11-27 15:31:33  |
| 192.168.22.4  | http://sina.com/a  | 2020-11-27 16:21:33  |
| 192.168.22.7  | http://sina.com/f  | 2020-11-27 15:31:36  |
| 192.168.33.1  | http://sina.com/a  | 2020-11-27 17:11:43  |
+---------------+--------------------+----------------------+--+
--注:结果有多少行就被分了多少组

分组聚合综合示例

--分组聚合综合示例
--1.建表(分区表)
create table t_access(ip string,url string,access_time date)
partitioned by (dt string)
row format delimited 
fields terminated by ',';
--2.编辑分区文件
vi access.log.20201211
vi access.log.20201212
vi access.log.20201213
--3.导入数据
load data local inpath '/root/hivetest/access.log.20201211' into table t_access partition(dt='20201211');
load data local inpath '/root/hivetest/access.log.20201212' into table t_access partition(dt='20201212');
load data local inpath '/root/hivetest/access.log.20201213' into table t_access partition(dt='20201213');
--4.查询
--求12月11日以后每天http://www.edu360.cn/job总访问次数及访问者中ip地址最大的,显示url
--方法一:
select dt,'http://www.edu360.cn/job',count(1),max(ip)
from t_access
where url='http://www.edu360.cn/job'
group by dt having dt>'20201211';
--方法二:
select dt,max(url),count(1),max(ip)
from t_access
where url='http://www.edu360.cn/job'
group by dt having dt>'20201211';
--方法三:
select dt,url,count(1),max(ip)
from t_access
where url='http://www.edu360.cn/job'
group by dt,url having dt>'20201211';
+-----------+---------------------------+------+----------------+--+
|    dt     |            url            | _c2  |      _c3       |
+-----------+---------------------------+------+----------------+--+
| 20201212  | http://www.edu360.cn/job  | 2    | 192.168.33.55  |
| 20201213  | http://www.edu360.cn/job  | 3    | 192.168.33.55  |
+-----------+---------------------------+------+----------------+--+
-- 求12月11号以后,每天每个页面的总访问次数,及访问者中ip地址中最大的
select dt,url,count(1),max(ip)
from t_access
where dt>'20201211'
group by dt,url;
+-----------+---------------------------------+------+----------------+--+
|    dt     |               url               | _c2  |      _c3       |
+-----------+---------------------------------+------+----------------+--+
| 20201212  | http://www.edu360.cn/job        | 2    | 192.168.33.55  |
| 20201212  | http://www.edu360.cn/stu        | 2    | 192.168.33.44  |
| 20201212  | http://www.edu360.cn/teach      | 1    | 192.168.44.3   |
| 20201213  | http://www.edu360.cn/excersize  | 3    | 192.168.33.46  |
| 20201213  | http://www.edu360.cn/job        | 3    | 192.168.33.55  |
| 20201213  | http://www.edu360.cn/pay        | 1    | 192.168.34.44  |
| 20201213  | http://www.edu360.cn/register   | 2    | 192.168.133.3  |
+-----------+---------------------------------+------+----------------+--+

hive 子查询

--hive 子查询
-- 求12月11号以后,每天每个页面的总访问次数,及访问者中ip地址中最大的,且只查询出总访问次数>2 的记录
-- 方式1:
select dt,url,count(1) as cnts,max(ip)
from t_access
where dt>'20201211'
group by dt,url having cnts>'2';
-- 方式2:用子查询
select dt,url,cnts,ip
from 
(select dt,url,count(1) as cnts,max(ip) as ip
from t_access
where dt>'20201211'
group by dt,url) tmp
where cnts>'2';
+-----------+---------------------------------+-------+----------------+--+
|    dt     |               url               | cnts  |       ip       |
+-----------+---------------------------------+-------+----------------+--+
| 20201213  | http://www.edu360.cn/excersize  | 3     | 192.168.33.46  |
| 20201213  | http://www.edu360.cn/job        | 3     | 192.168.33.55  |
+-----------+---------------------------------+-------+----------------+--+

hive 数据类型

数字类型、日期时间类型、字符串类型、混杂类型、复合类型(hive独有)

1.array数组类型

--1.array数组类型
--建表
create table t_movie(movie_name string,actors array<string>,first_show date)
row  format  delimited fields terminated by ','
collection items terminated by ':';
--vi movie.data
无名之辈,章宇:陈建斌:任素汐,2018-11-16
我不是药神,徐峥:王传君:章宇,2018-07-05
一出好戏,黄渤:王宝强:舒淇,2018-08-10
中国机长,欧豪:杜江:袁泉,2018-05-18
囧妈,徐峥:黄梅莹:袁泉,2020-01-25
--导数据
load data local inpath '/root/hivetest/movie.dat' into table t_movie ;
--查询
select movie_name,actors[0],first_show from t_movie;
+-------------+------+-------------+--+
| movie_name  | _c1  | first_show  |
+-------------+------+-------------+--+
| 无名之辈        | 章宇   | 2018-11-16  |
| 我不是药神       | 徐峥   | 2018-07-05  |
| 一出好戏        | 黄渤   | 2018-08-10  |
| 中国机长        | 欧豪   | 2018-05-18  |
| 囧妈          | 徐峥   | 2020-01-25  |
+-------------+------+-------------+--+
--查询徐峥参演的电影
select movie_name,actors,first_show
from t_movie
where array_contains(actors,'徐峥') ;
+-------------+--------------------+-------------+--+
| movie_name  |       actors       | first_show  |
+-------------+--------------------+-------------+--+
| 我不是药神       | ["徐峥","王传君","章宇"]  | 2018-07-05  |
| 囧妈          | ["徐峥","黄梅莹","袁泉"]  | 2020-01-25  |
+-------------+--------------------+-------------+--+
--查询电影参演人数
select movie_name
,size(actors) as act_num
,first_show
from t_movie;
+-------------+----------+-------------+--+
| movie_name  | act_num  | first_show  |
+-------------+----------+-------------+--+
| 无名之辈        | 3        | 2018-11-16  |
| 我不是药神       | 3        | 2018-07-05  |
| 一出好戏        | 3        | 2018-08-10  |
| 中国机长        | 3        | 2018-05-18  |
| 囧妈          | 3        | 2020-01-25  |
+-------------+----------+-------------+--+

2.map类型

--2.map类型
--建表
create table t_family(id int,name string,familys map<string,string>,age int)
row format delimited fields terminated by ','
collection items terminated by '#'
map keys terminated by ':';
--vi family.dat
1,zhangsan,father:xiaoming#mother:xiaohuang#brother:xiaoxu,28
2,lisi,father:mayun#mother:huangyi#brother:guanyu,22
3,wangwu,father:wangjianlin#mother:ruhua#sister:jingtian,29
4,mayun,father:mayongzhen#mother:angelababy,26
--导数据
load data local inpath '/root/hivetest/family.dat' into table t_family;
--查出每个人的爸爸
select id,name,familys['father'],age
from t_family;
+-----+-----------+--------------+------+--+
| id  |   name    |     _c2      | age  |
+-----+-----------+--------------+------+--+
| 1   | zhangsan  | xiaoming     | 28   |
| 2   | lisi      | mayun        | 22   |
| 3   | wangwu    | wangjianlin  | 29   |
| 4   | mayun     | mayongzhen   | 26   |
+-----+-----------+--------------+------+--+
--查询出每个人有哪些亲属关系
select id,name,map_keys(familys) as relations,age
from t_family;
+-----+-----------+--------------------------------+------+--+
| id  |   name    |           relations            | age  |
+-----+-----------+--------------------------------+------+--+
| 1   | zhangsan  | ["father","mother","brother"]  | 28   |
| 2   | lisi      | ["father","mother","brother"]  | 22   |
| 3   | wangwu    | ["father","mother","sister"]   | 29   |
| 4   | mayun     | ["father","mother"]            | 26   |
+-----+-----------+--------------------------------+------+--+
-- 查出每个人的亲人名字
select id,name,map_values(familys) as f_name,age
from t_family;
+-----+-----------+-------------------------------------+------+--+
| id  |   name    |               f_name                | age  |
+-----+-----------+-------------------------------------+------+--+
| 1   | zhangsan  | ["xiaoming","xiaohuang","xiaoxu"]   | 28   |
| 2   | lisi      | ["mayun","huangyi","guanyu"]        | 22   |
| 3   | wangwu    | ["wangjianlin","ruhua","jingtian"]  | 29   |
| 4   | mayun     | ["mayongzhen","angelababy"]         | 26   |
+-----+-----------+-------------------------------------+------+--+
-- 查出每个人的亲人数量
select id,name,size(familys),age
from t_family;
+-----+-----------+------+------+--+
| id  |   name    | _c2  | age  |
+-----+-----------+------+------+--+
| 1   | zhangsan  | 3    | 28   |
| 2   | lisi      | 3    | 22   |
| 3   | wangwu    | 3    | 29   |
| 4   | mayun     | 2    | 26   |
+-----+-----------+------+------+--+
-- 查出所有拥有兄弟的人及他的兄弟是谁
--方法1
select id,name,familys['brother'],age
from t_family
where array_contains(map_keys(familys),'brother');
--方法2
select id,name,age,familys['brother']
from
(select id,name,age,map_keys(familys) as relations,familys
from t_family ) tmp
where array_contains(relations,'brother');
+-----+-----------+------+--------------------------------+----------------------------------------------------------------+--+
| id  |   name    | age  |           relations            |                            familys                             |
+-----+-----------+------+--------------------------------+----------------------------------------------------------------+--+
| 1   | zhangsan  | 28   | ["father","mother","brother"]  | {"father":"xiaoming","mother":"xiaohuang","brother":"xiaoxu"}  |
| 2   | lisi      | 22   | ["father","mother","brother"]  | {"father":"mayun","mother":"huangyi","brother":"guanyu"}       |
| 3   | wangwu    | 29   | ["father","mother","sister"]   | {"father":"wangjianlin","mother":"ruhua","sister":"jingtian"}  |
| 4   | mayun     | 26   | ["father","mother"]            | {"father":"mayongzhen","mother":"angelababy"}                  |
+-----+-----------+------+--------------------------------+----------------------------------------------------------------+--+
+-----+-----------+------+---------+--+
| id  |   name    | age  |   _c3   |
+-----+-----------+------+---------+--+
| 1   | zhangsan  | 28   | xiaoxu  |
| 2   | lisi      | 22   | guanyu  |
+-----+-----------+------+---------+--+

3.struct类型

--3.struct类型
--建表
create table t_user(id int,name string,info struct<age:int,sex:string,addr:string>)
row format delimited fields terminated by ','
collection items terminated by ':';
--vi user.dat
1,zhangsan,18:male:深圳
2,lisi,28:female:北京
3,wangwu,38:male:广州
4,赵六,26:female:上海
5,钱琪,35:male:杭州
6,王八,48:female:南京
--导数据
load data local inpath '/root/hivetest/user.dat' into table t_user;
--查询每个人的id,name,和地址
select id,name,info.addr
from t_user;
+-----+-----------+-------+--+
| id  |   name    | addr  |
+-----+-----------+-------+--+
| 1   | zhangsan  | 深圳    |
| 2   | lisi      | 北京    |
| 3   | wangwu    | 广州    |
| 4   | 赵六        | 上海    |
| 5   | 钱琪        | 杭州    |
| 6   | 王八        | 南京    |
+-----+-----------+-------+--+

hive 的函数

字符串函数、类型转换函数、数学运算函数、时间函数、表生成函数

--字符串函数
select substr("abcgfik",1,3); --用常量测试函数
select substr("abcgfik",0,3);
+------+--+
| _c0  |
+------+--+
| abc  |
+------+--+
--结果一致

表生成函数

--表生成函数(explode()    lateral view的使用)
--wordcount示例
--建表
create table t_wc(sentence string);
--vi word.dat
select name string name
name select from id info
select select id id name info
string from id
id string name
--导数据
load data local inpath '/root/hivetest/word.dat' into table t_wc;
--查询
select  explode(split(sentence,' ')) as word --按空格切割,形成数组,利用行转列函数explode()炸开
from t_wc;
+---------+--+
|  word   |
+---------+--+
| select  |
| name    |
| string  |
| name    |
| name    |
| select  |
| from    |
| id      |
| info    |
| select  |
| select  |
| id      |
| id      |
| name    |
| info    |
| string  |
| from    |
| id      |
| id      |
| string  |
| name    |
+---------+--+
select word,count(1) as conts
from
(
select  explode(split(sentence,' ')) as word
from t_wc
) tmp
group by word;
+---------+--------+--+
|  word   | conts  |
+---------+--------+--+
| from    | 2      |
| id      | 5      |
| info    | 2      |
| name    | 5      |
| select  | 4      |
| string  | 3      |
+---------+--------+--+

lateral view

--lateral view
--建表
create table t_subject(id int,name string,subjects array<string>)
row format delimited fields terminated by ' '
collection items terminated by ',';
--vi subjects.txt
001 zhangsan 物理,化学,数学,英语,生物
002 lisi 生物,历史,数学,语文,化学
003 wangwu 体育,美术,化学,语文,英语,数学
--导数据
load data local inpath '/root/hivetest/subject.txt' into table t_subject;
--查询有哪些课程
select distinct tmp.sub
from(
select explode(subjects) as sub 
from t_subject
) tmp;
+----------+--+
| tmp.sub  |
+----------+--+
| 体育       |
| 化学       |
| 历史       |
| 数学       |
| 物理       |
| 生物       |
| 美术       |
| 英语       |
| 语文       |
+----------+--+
--lateral view 示例(lateral view将explode函数的结果生成新表)
select id,name,tmp.sub
from t_subject
lateral view
explode(subjects) tmp as sub ;
+-----+-----------+----------+--+
| id  |   name    | tmp.sub  |
+-----+-----------+----------+--+
| 1   | zhangsan  | 物理       |
| 1   | zhangsan  | 化学       |
| 1   | zhangsan  | 数学       |
| 1   | zhangsan  | 英语       |
| 1   | zhangsan  | 生物       |
| 2   | lisi      | 生物       |
| 2   | lisi      | 历史       |
| 2   | lisi      | 数学       |
| 2   | lisi      | 语文       |
| 2   | lisi      | 化学       |
| 3   | wangwu    | 体育       |
| 3   | wangwu    | 美术       |
| 3   | wangwu    | 化学       |
| 3   | wangwu    | 语文       |
| 3   | wangwu    | 英语       |
| 3   | wangwu    | 数学       |
+-----+-----------+----------+--+
相关文章
|
SQL 分布式计算 Hadoop
Hive使用Impala组件查询(1)
Hive使用Impala组件查询(1)
417 0
|
SQL 存储 分布式数据库
【通过Hive清洗、处理和计算原始数据,Hive清洗处理后的结果,将存入Hbase,海量数据随机查询场景从HBase查询数据 】
【通过Hive清洗、处理和计算原始数据,Hive清洗处理后的结果,将存入Hbase,海量数据随机查询场景从HBase查询数据 】
247 0
|
SQL HIVE
Hive分区+根据分区查询
Hive分区+根据分区查询
|
1月前
|
SQL 分布式计算 Hadoop
Hadoop-14-Hive HQL学习与测试 表连接查询 HDFS数据导入导出等操作 逻辑运算 函数查询 全表查询 WHERE GROUP BY ORDER BY(一)
Hadoop-14-Hive HQL学习与测试 表连接查询 HDFS数据导入导出等操作 逻辑运算 函数查询 全表查询 WHERE GROUP BY ORDER BY(一)
38 4
|
1月前
|
SQL
Hadoop-14-Hive HQL学习与测试 表连接查询 HDFS数据导入导出等操作 逻辑运算 函数查询 全表查询 WHERE GROUP BY ORDER BY(二)
Hadoop-14-Hive HQL学习与测试 表连接查询 HDFS数据导入导出等操作 逻辑运算 函数查询 全表查询 WHERE GROUP BY ORDER BY(二)
33 2
|
5月前
|
SQL 资源调度 数据库连接
Hive怎么调整优化Tez引擎的查询?在Tez上优化Hive查询的指南
在Tez上优化Hive查询,包括配置参数调整、理解并行化机制以及容器管理。关键步骤包括YARN调度器配置、安全阀设置、识别性能瓶颈(如mapper/reducer任务和连接操作),理解Tez如何动态调整mapper和reducer数量。例如,`tez.grouping.max-size` 影响mapper数量,`hive.exec.reducers.bytes.per.reducer` 控制reducer数量。调整并发和容器复用参数如`hive.server2.tez.sessions.per.default.queue` 和 `tez.am.container.reuse.enabled`
396 0
|
6月前
|
SQL 存储 大数据
Hive的查询、数据加载和交换、聚合、排序、优化
Hive的查询、数据加载和交换、聚合、排序、优化
131 2
|
6月前
|
SQL 存储 关系型数据库
Presto【实践 01】Presto查询性能优化(数据存储+SQL优化+无缝替换Hive表+注意事项)及9个实践问题分享
Presto【实践 01】Presto查询性能优化(数据存储+SQL优化+无缝替换Hive表+注意事项)及9个实践问题分享
725 0
|
6月前
|
SQL 分布式计算 Java
Hive【Hive(三)查询语句】
Hive【Hive(三)查询语句】
|
SQL 存储 大数据
大数据Hive Join连接查询
大数据Hive Join连接查询
124 0

热门文章

最新文章