一.常用 SQL
1.删除表
DROPTABLEifEXISTS ads_sense_rep.tmp_table_${bizdate};
2.清空表
TRUNCATETABLE db.table;
3.删除分区表指定数据
altertable ads_sense_rep.table_name
droppartition(partition_day ='2022-05-18');
### 4.是否重复数据
SELECT period_sdate, product_key, size_code, managing_city_no
FROM ads_sense_rep.ads_day_city_brand_sku_size_rep
GROUPBY period_sdate, product_key, size_code, managing_city_no
HAVINGCOUNT(*)>1;
5.迁移数据
insertinto ads_sense_rep.table_namepartition(partition_day)
SELECT`product_key`,
`period_sdate`,
`total30_sal_qty_store_rate`,
`partition_day`
FROM ads_sense_rep.table_name;
6.修改表名
ALTERTABLE ads_sense_rep.table_nameRENAMETO ads_sense_rep.table_name;
7.创建临时表
DROPTABLEifEXISTS ads_sense_rep.tmp_table_${bizdate};
CREATETABLEifNOTEXISTS ads_sense_rep.tmp_table_${bizdate}AS
SELECTDISTINCT sku.product_code
FROM ads_sense_rep.table sku
WHERE sku.brand_detail_no='01'
AND sku.gender_nameIN('男','女')
AND sku.category_name1='鞋'
AND sku.begin_dateISNOTNULL
;
8.批量插入数据
INSERT INTO ads_sense_rep.ads_dim_natural_year_week_info
( financial_year, financial_year_week, financial_year_start, financial_year_end, week_start_day, week_end_day)
VALUES
(2016, 1, '2016-01-01', '2016-12-31', '2016-01-01', '2016-01-03'),
(2016, 2, '2016-01-01', '2016-12-31', '2016-01-04', '2016-01-10'),
(2016, 3, '2016-01-01', '2016-12-31', '2016-01-11', '2016-01-17'),
(2016, 4, '2016-01-01', '2016-12-31', '2016-01-18', '2016-01-24'),
(2016, 5, '2016-01-01', '2016-12-31', '2016-01-25', '2016-01-31'),
(2025, 53, '2025-01-01', '2025-12-31', '2025-12-29', '2025-12-31');
9.with as 写法
WITH tmp_dim_pro_allinfo AS (
SELECT product_key
FROM bdc_dim.table
WHERE gender_name = '女'
AND category_name1 = '鞋'
AND brand_detail_no = 'SD01'
)
SELECT *
FROM tmp_dim_pro_allinfo;
10.表插入更新
insert into table #在表后直接插入,不去重
insert overwrite table #将原表清空后,再插入数据。
二.不常用
1.创建表
-- 带分区
CREATE TABLE `ads_sense_rep.table_name`
(
`product_key` string COMMENT '商品主键',
`period_sdate` date COMMENT '日期(yyyy-mm-dd)',
`size_code` string COMMENT '尺码',
`total7_sal_qty_store_rate` decimal(18, 4) COMMENT '累计7天转化率',
`total30_sal_qty_store_rate` decimal(18, 4) COMMENT '累计30天转化率'
) PARTITIONED BY ( `partition_day` string);
-- 不带分区
CREATE TABLE `ads_sense_rep.tabel_name`
(
`product_key` string COMMENT '商品主键',
`period_sdate` date COMMENT '日期(yyyy-mm-dd)',
`size_code` string COMMENT '尺码',
`total7_sal_qty_store_rate` decimal(18, 4) COMMENT '累计7天转化率',
`total30_sal_qty_store_rate` decimal(18, 4) COMMENT '累计30天转化率'
) ;
-- 带描述
CREATE TABLE `ads_sense_rep.table_name`
(
`product_key` string COMMENT '商品主键',
`period_sdate` date COMMENT '日期(yyyy-mm-dd)',
`size_code` string COMMENT '尺码'
) COMMENT '对方答复'
PARTITIONED BY (`partition_day` string);
创建分区表:
CREATE TABLE `db_name.table_name`
(
`financial_year` int COMMENT '财年',
`financial_year_week` int COMMENT '财年第n周',
`managing_city_no` string COMMENT '城市编码',
`etl_time` date COMMENT '处理时间'
) comment '城市-品类洞察-品牌概览表'
PARTITIONED BY (
`partition_financial_year` INT
,`partition_financial_year_week` INT);
--插入数据
INSERT INTO TABLE db_name.table_name PARTITION(partition_financial_year,partition_financial_year_week)
SELECT financial_year AS financial_year --财年
, financial_year_week AS financial_year_week --第几周
, date_format(now(), 'yyyy-MM-dd') AS etl_time --跑数时间
, financial_year AS partition_financial_year --分区财年
, financial_year_week AS partition_financial_year_week --分区财年周
FROM ads_sense_rep.xxxxx distribute BY partition_financial_year,partition_financial_year_week,CAST(rand() * 20 AS INT)
;
2.修改表注释
ALTER TABLE ads_sense_rep.table_name SET TBLPROPERTIES ('comment' = '对方答复');
3.更新数据
update ads_sense_rep.table_name
set sex='女' where id =1;
4.前一天
格式:${bizdate}
结果:20220608
date_sub(date_format(now(), 'yyyy-MM-dd'), 1)
date_format(now(), 'yyyy-MM-dd') AS etl_time --跑数时间
5.获取周开始结束
WHERE week_start_day >= date_sub(CURRENT_DATE, 6 + CAST(date_format(CURRENT_DATE, 'u') AS int)) --上周的第一天
AND week_end_day <= date_sub(CURRENT_DATE, CAST(date_format(CURRENT_DATE, 'u') AS int)) --上周的最后一天
;
6.获取月开始结束
WHERE month_start_day >= trunc(add_months(CURRENT_DATE, -1), 'MM') --上个月的第一天
AND month_end_day <= last_day(date_sub(CURRENT_DATE, DAY(CURRENT_DATE))) --上个月的最后一天
;
三.系统 SQL
1.hive 查询失败
需要先执行以下语句
set hive.compute.query.using.stats=false;
2.hive 导数据到 ck
INSERT
into
default.table_name
select
*
from
jdbc('ch-server','select * from ads_sense_rep.table_name')
3.刷新表
REFRESH TABLE bdc_dws.dws_day_org_pro_size_sal_ds;
4.设置 broadcastTimeout
set spark.sql.broadcastTimeout=30000;
5.耗时统计
select COUNT(*) as querySlowCnt from system.query_log WHERE query_duration_ms > 2000 and `type` = 2;
四.高阶使用
1.分区
INSERT INTO ads_sense_rep.ads_day_city_brand_sku_rep PARTITION(partition_day)
SELECT product_key
FROM bdc_dim.table;
2.分区区别
order by,sort by,distribute by,cluster by
oreder by
: 主要是做全局排序。sort by
:局部排序distribute by
:分区cluster by
:当分区数量小于字段种类时,就有意义。
- 当 distribute by 和 sort by 所指定的字段相同时,即可以使用 cluster by。
- 注意:cluster by 指定的列只能是降序,不能指定 asc 和 desc。