勇气就是做你害怕的事,如果你不害怕,就谈不上勇气
文章目录
1、数仓开发之DIM层
DIM层设计要点:
(1)DIM层的设计依据时维度建模理论,该层储存维度模型的维度表。
(2)DIM层的数据储存格式为orc列示储存+snappy压缩
(3)DIM层表命名的命名规范为dim_表名_全量表或者拉链表标识(full/zip)
1.1 商品维度表
1、建表语句
DROP TABLE IF EXISTS dim_sku_full; CREATE EXTERNAL TABLE dim_sku_full ( `id` STRING COMMENT 'sku_id', `price` DECIMAL(16, 2) COMMENT '商品价格', `sku_name` STRING COMMENT '商品名称', `sku_desc` STRING COMMENT '商品描述', `weight` DECIMAL(16, 2) COMMENT '重量', `is_sale` BOOLEAN COMMENT '是否在售', `spu_id` STRING COMMENT 'spu编号', `spu_name` STRING COMMENT 'spu名称', `category3_id` STRING COMMENT '三级分类id', `category3_name` STRING COMMENT '三级分类名称', `category2_id` STRING COMMENT '二级分类id', `category2_name` STRING COMMENT '二级分类名称', `category1_id` STRING COMMENT '一级分类id', `category1_name` STRING COMMENT '一级分类名称', `tm_id` STRING COMMENT '品牌id', `tm_name` STRING COMMENT '品牌名称', `sku_attr_values` ARRAY<STRUCT<attr_id :STRING,value_id :STRING,attr_name :STRING,value_name:STRING>> COMMENT '平台属性', `sku_sale_attr_values` ARRAY<STRUCT<sale_attr_id :STRING,sale_attr_value_id :STRING,sale_attr_name :STRING,sale_attr_value_name:STRING>> COMMENT '销售属性', `create_time` STRING COMMENT '创建时间' ) COMMENT '商品维度表' PARTITIONED BY (`dt` STRING) STORED AS ORC LOCATION '/warehouse/gmall/dim/dim_sku_full/' TBLPROPERTIES ('orc.compress' = 'snappy');
2、数据装载
with sku as ( select id, price, sku_name, sku_desc, weight, is_sale, spu_id, category3_id, tm_id, create_time from ods_sku_info_full where dt='2020-06-14' ), spu as ( select id, spu_name from ods_spu_info_full where dt='2020-06-14' ), c3 as ( select id, name, category2_id from ods_base_category3_full where dt='2020-06-14' ), c2 as ( select id, name, category1_id from ods_base_category2_full where dt='2020-06-14' ), c1 as ( select id, name from ods_base_category1_full where dt='2020-06-14' ), tm as ( select id, tm_name from ods_base_trademark_full where dt='2020-06-14' ), attr as ( select sku_id, collect_set(named_struct('attr_id',attr_id,'value_id',value_id,'attr_name',attr_name,'value_name',value_name)) attrs from ods_sku_attr_value_full where dt='2020-06-14' group by sku_id ), sale_attr as ( select sku_id, collect_set(named_struct('sale_attr_id',sale_attr_id,'sale_attr_value_id',sale_attr_value_id,'sale_attr_name',sale_attr_name,'sale_attr_value_name',sale_attr_value_name)) sale_attrs from ods_sku_sale_attr_value_full where dt='2020-06-14' group by sku_id ) insert overwrite table dim_sku_full partition(dt='2020-06-14') select sku.id, sku.price, sku.sku_name, sku.sku_desc, sku.weight, sku.is_sale, sku.spu_id, spu.spu_name, sku.category3_id, c3.name, c3.category2_id, c2.name, c2.category1_id, c1.name, sku.tm_id, tm.tm_name, attr.attrs, sale_attr.sale_attrs, sku.create_time from sku left join spu on sku.spu_id=spu.id left join c3 on sku.category3_id=c3.id left join c2 on c3.category2_id=c2.id left join c1 on c2.category1_id=c1.id left join tm on sku.tm_id=tm.id left join attr on sku.id=attr.sku_id left join sale_attr on sku.id=sale_attr.sku_id;
1.2 优惠券维度表
1、建表语句
DROP TABLE IF EXISTS dim_coupon_full; CREATE EXTERNAL TABLE dim_coupon_full ( `id` STRING COMMENT '购物券编号', `coupon_name` STRING COMMENT '购物券名称', `coupon_type_code` STRING COMMENT '购物券类型编码', `coupon_type_name` STRING COMMENT '购物券类型名称', `condition_amount` DECIMAL(16, 2) COMMENT '满额数', `condition_num` BIGINT COMMENT '满件数', `activity_id` STRING COMMENT '活动编号', `benefit_amount` DECIMAL(16, 2) COMMENT '减金额', `benefit_discount` DECIMAL(16, 2) COMMENT '折扣', `benefit_rule` STRING COMMENT '优惠规则:满元*减*元,满*件打*折', `create_time` STRING COMMENT '创建时间', `range_type_code` STRING COMMENT '优惠范围类型编码', `range_type_name` STRING COMMENT '优惠范围类型名称', `limit_num` BIGINT COMMENT '最多领取次数', `taken_count` BIGINT COMMENT '已领取次数', `start_time` STRING COMMENT '可以领取的开始日期', `end_time` STRING COMMENT '可以领取的结束日期', `operate_time` STRING COMMENT '修改时间', `expire_time` STRING COMMENT '过期时间' ) COMMENT '优惠券维度表' PARTITIONED BY (`dt` STRING) STORED AS ORC LOCATION '/warehouse/gmall/dim/dim_coupon_full/' TBLPROPERTIES ('orc.compress' = 'snappy');
2、数据装载
insert overwrite table dim_coupon_full partition(dt='2020-06-14') select id, coupon_name, coupon_type, coupon_dic.dic_name, condition_amount, condition_num, activity_id, benefit_amount, benefit_discount, case coupon_type when '3201' then concat('满',condition_amount,'元减',benefit_amount,'元') when '3202' then concat('满',condition_num,'件打',10*(1-benefit_discount),'折') when '3203' then concat('减',benefit_amount,'元') end benefit_rule, create_time, range_type, range_dic.dic_name, limit_num, taken_count, start_time, end_time, operate_time, expire_time from ( select id, coupon_name, coupon_type, condition_amount, condition_num, activity_id, benefit_amount, benefit_discount, create_time, range_type, limit_num, taken_count, start_time, end_time, operate_time, expire_time from ods_coupon_info_full where dt='2020-06-14' )ci left join ( select dic_code, dic_name from ods_base_dic_full where dt='2020-06-14' and parent_code='32' )coupon_dic on ci.coupon_type=coupon_dic.dic_code left join ( select dic_code, dic_name from ods_base_dic_full where dt='2020-06-14' and parent_code='33' )range_dic on ci.range_type=range_dic.dic_code;
1.3 活动维度表
1、建表语句
DROP TABLE IF EXISTS dim_activity_full; CREATE EXTERNAL TABLE dim_activity_full ( `activity_rule_id` STRING COMMENT '活动规则ID', `activity_id` STRING COMMENT '活动ID', `activity_name` STRING COMMENT '活动名称', `activity_type_code` STRING COMMENT '活动类型编码', `activity_type_name` STRING COMMENT '活动类型名称', `activity_desc` STRING COMMENT '活动描述', `start_time` STRING COMMENT '开始时间', `end_time` STRING COMMENT '结束时间', `create_time` STRING COMMENT '创建时间', `condition_amount` DECIMAL(16, 2) COMMENT '满减金额', `condition_num` BIGINT COMMENT '满减件数', `benefit_amount` DECIMAL(16, 2) COMMENT '优惠金额', `benefit_discount` DECIMAL(16, 2) COMMENT '优惠折扣', `benefit_rule` STRING COMMENT '优惠规则', `benefit_level` STRING COMMENT '优惠级别' ) COMMENT '活动信息表' PARTITIONED BY (`dt` STRING) STORED AS ORC LOCATION '/warehouse/gmall/dim/dim_activity_full/' TBLPROPERTIES ('orc.compress' = 'snappy');
2、数据装载
insert overwrite table dim_activity_full partition(dt='2020-06-14') select rule.id, info.id, activity_name, rule.activity_type, dic.dic_name, activity_desc, start_time, end_time, create_time, condition_amount, condition_num, benefit_amount, benefit_discount, case rule.activity_type when '3101' then concat('满',condition_amount,'元减',benefit_amount,'元') when '3102' then concat('满',condition_num,'件打',10*(1-benefit_discount),'折') when '3103' then concat('打',10*(1-benefit_discount),'折') end benefit_rule, benefit_level from ( select id, activity_id, activity_type, condition_amount, condition_num, benefit_amount, benefit_discount, benefit_level from ods_activity_rule_full where dt='2020-06-14' )rule left join ( select id, activity_name, activity_type, activity_desc, start_time, end_time, create_time from ods_activity_info_full where dt='2020-06-14' )info on rule.activity_id=info.id left join ( select dic_code, dic_name from ods_base_dic_full where dt='2020-06-14' and parent_code='31' )dic on rule.activity_type=dic.dic_code;
1.4 地区维度表
1、建表语句
DROP TABLE IF EXISTS dim_province_full; CREATE EXTERNAL TABLE dim_province_full ( `id` STRING COMMENT 'id', `province_name` STRING COMMENT '省市名称', `area_code` STRING COMMENT '地区编码', `iso_code` STRING COMMENT '旧版ISO-3166-2编码,供可视化使用', `iso_3166_2` STRING COMMENT '新版IOS-3166-2编码,供可视化使用', `region_id` STRING COMMENT '地区id', `region_name` STRING COMMENT '地区名称' ) COMMENT '地区维度表' PARTITIONED BY (`dt` STRING) STORED AS ORC LOCATION '/warehouse/gmall/dim/dim_province_full/' TBLPROPERTIES ('orc.compress' = 'snappy');
2、数据装载
insert overwrite table dim_province_full partition(dt='2020-06-14') select province.id, province.name, province.area_code, province.iso_code, province.iso_3166_2, region_id, region_name from ( select id, name, region_id, area_code, iso_code, iso_3166_2 from ods_base_province_full where dt='2020-06-14' )province left join ( select id, region_name from ods_base_region_full where dt='2020-06-14' )region on province.region_id=region.id;
1.5 日期维度表
1、建表语句
DROP TABLE IF EXISTS dim_date; CREATE EXTERNAL TABLE dim_date ( `date_id` STRING COMMENT '日期ID', `week_id` STRING COMMENT '周ID,一年中的第几周', `week_day` STRING COMMENT '周几', `day` STRING COMMENT '每月的第几天', `month` STRING COMMENT '一年中的第几月', `quarter` STRING COMMENT '一年中的第几季度', `year` STRING COMMENT '年份', `is_workday` STRING COMMENT '是否是工作日', `holiday_id` STRING COMMENT '节假日' ) COMMENT '时间维度表' STORED AS ORC LOCATION '/warehouse/gmall/dim/dim_date/' TBLPROPERTIES ('orc.compress' = 'snappy');
2、数据装载
通常情况下,时间维度表的数据并不是来自业务系统,而是手动写入,并且由于时间维度表数据的可预见性,无需每日导入,一般可一次性导入一年的数据。
(1)创建临时表
DROP TABLE IF EXISTS tmp_dim_date_info; CREATE EXTERNAL TABLE tmp_dim_date_info ( `date_id` STRING COMMENT '日', `week_id` STRING COMMENT '周ID', `week_day` STRING COMMENT '周几', `day` STRING COMMENT '每月的第几天', `month` STRING COMMENT '第几月', `quarter` STRING COMMENT '第几季度', `year` STRING COMMENT '年', `is_workday` STRING COMMENT '是否是工作日', `holiday_id` STRING COMMENT '节假日' ) COMMENT '时间维度表' ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t' LOCATION '/warehouse/gmall/tmp/tmp_dim_date_info/';
(2)将数据文件上传到HDFS上临时表路径/warehouse/gmall/tmp/tmp_dim_date_info
(3)执行以下语句将其导入时间维度表
insert overwrite table dim_date select * from tmp_dim_date_info;
(4)检查数据是否导入成功
select * from dim_date;
1.6 用户维度表
1、建表语句
DROP TABLE IF EXISTS dim_user_zip; CREATE EXTERNAL TABLE dim_user_zip ( `id` STRING COMMENT '用户id', `login_name` STRING COMMENT '用户名称', `nick_name` STRING COMMENT '用户昵称', `name` STRING COMMENT '用户姓名', `phone_num` STRING COMMENT '手机号码', `email` STRING COMMENT '邮箱', `user_level` STRING COMMENT '用户等级', `birthday` STRING COMMENT '生日', `gender` STRING COMMENT '性别', `create_time` STRING COMMENT '创建时间', `operate_time` STRING COMMENT '操作时间', `start_date` STRING COMMENT '开始日期', `end_date` STRING COMMENT '结束日期' ) COMMENT '用户表' PARTITIONED BY (`dt` STRING) STORED AS ORC LOCATION '/warehouse/gmall/dim/dim_user_zip/' TBLPROPERTIES ('orc.compress' = 'snappy');
2、分区规划
3、数据装载
(1)数据装载过程
(2)数据流向
(3)首日装载
insert overwrite table dim_user_zip partition (dt='9999-12-31') select data.id, data.login_name, data.nick_name, md5(data.name), md5(data.phone_num), md5(data.email), data.user_level, data.birthday, data.gender, data.create_time, data.operate_time, '2020-06-14' start_date, '9999-12-31' end_date from ods_user_info_inc where dt='2020-06-14' and type='bootstrap-insert';
(4)每日装载
(4-1)装载思路
(4-2)装载语句
with tmp as ( select old.id old_id, old.login_name old_login_name, old.nick_name old_nick_name, old.name old_name, old.phone_num old_phone_num, old.email old_email, old.user_level old_user_level, old.birthday old_birthday, old.gender old_gender, old.create_time old_create_time, old.operate_time old_operate_time, old.start_date old_start_date, old.end_date old_end_date, new.id new_id, new.login_name new_login_name, new.nick_name new_nick_name, new.name new_name, new.phone_num new_phone_num, new.email new_email, new.user_level new_user_level, new.birthday new_birthday, new.gender new_gender, new.create_time new_create_time, new.operate_time new_operate_time, new.start_date new_start_date, new.end_date new_end_date from ( select id, login_name, nick_name, name, phone_num, email, user_level, birthday, gender, create_time, operate_time, start_date, end_date from dim_user_zip where dt='9999-12-31' )old full outer join ( select id, login_name, nick_name, md5(name) name, md5(phone_num) phone_num, md5(email) email, user_level, birthday, gender, create_time, operate_time, '2020-06-15' start_date, '9999-12-31' end_date from ( select data.id, data.login_name, data.nick_name, data.name, data.phone_num, data.email, data.user_level, data.birthday, data.gender, data.create_time, data.operate_time, row_number() over (partition by data.id order by ts desc) rn from ods_user_info_inc where dt='2020-06-15' )t1 where rn=1 )new on old.id=new.id ) insert overwrite table dim_user_zip partition(dt) select if(new_id is not null,new_id,old_id), if(new_id is not null,new_login_name,old_login_name), if(new_id is not null,new_nick_name,old_nick_name), if(new_id is not null,new_name,old_name), if(new_id is not null,new_phone_num,old_phone_num), if(new_id is not null,new_email,old_email), if(new_id is not null,new_user_level,old_user_level), if(new_id is not null,new_birthday,old_birthday), if(new_id is not null,new_gender,old_gender), if(new_id is not null,new_create_time,old_create_time), if(new_id is not null,new_operate_time,old_operate_time), if(new_id is not null,new_start_date,old_start_date), if(new_id is not null,new_end_date,old_end_date), if(new_id is not null,new_end_date,old_end_date) dt from tmp union all select old_id, old_login_name, old_nick_name, old_name, old_phone_num, old_email, old_user_level, old_birthday, old_gender, old_create_time, old_operate_time, old_start_date, cast(date_add('2020-06-15',-1) as string) old_end_date, cast(date_add('2020-06-15',-1) as string) dt from tmp where old_id is not null and new_id is not null;
1.7 数据装载脚本
1.7.1 首日装载脚本
1、在hadoop102的/home/zhm/bin目录下创建ods_to_dim_init.sh
vim ods_to_dim_init.sh
2、添加如下内容
#!/bin/bash APP=gmall if [ -n "$2" ] ;then do_date=$2 else echo "请传入日期参数" exit fi dim_user_zip=" insert overwrite table ${APP}.dim_user_zip partition (dt='9999-12-31') select data.id, data.login_name, data.nick_name, md5(data.name), md5(data.phone_num), md5(data.email), data.user_level, data.birthday, data.gender, data.create_time, data.operate_time, '$do_date' start_date, '9999-12-31' end_date from ${APP}.ods_user_info_inc where dt='$do_date' and type='bootstrap-insert'; " dim_sku_full=" with sku as ( select id, price, sku_name, sku_desc, weight, is_sale, spu_id, category3_id, tm_id, create_time from ${APP}.ods_sku_info_full where dt='$do_date' ), spu as ( select id, spu_name from ${APP}.ods_spu_info_full where dt='$do_date' ), c3 as ( select id, name, category2_id from ${APP}.ods_base_category3_full where dt='$do_date' ), c2 as ( select id, name, category1_id from ${APP}.ods_base_category2_full where dt='$do_date' ), c1 as ( select id, name from ${APP}.ods_base_category1_full where dt='$do_date' ), tm as ( select id, tm_name from ${APP}.ods_base_trademark_full where dt='$do_date' ), attr as ( select sku_id, collect_set(named_struct('attr_id',attr_id,'value_id',value_id,'attr_name',attr_name,'value_name',value_name)) attrs from ${APP}.ods_sku_attr_value_full where dt='$do_date' group by sku_id ), sale_attr as ( select sku_id, collect_set(named_struct('sale_attr_id',sale_attr_id,'sale_attr_value_id',sale_attr_value_id,'sale_attr_name',sale_attr_name,'sale_attr_value_name',sale_attr_value_name)) sale_attrs from ${APP}.ods_sku_sale_attr_value_full where dt='$do_date' group by sku_id ) insert overwrite table ${APP}.dim_sku_full partition(dt='$do_date') select sku.id, sku.price, sku.sku_name, sku.sku_desc, sku.weight, sku.is_sale, sku.spu_id, spu.spu_name, sku.category3_id, c3.name, c3.category2_id, c2.name, c2.category1_id, c1.name, sku.tm_id, tm.tm_name, attr.attrs, sale_attr.sale_attrs, sku.create_time from sku left join spu on sku.spu_id=spu.id left join c3 on sku.category3_id=c3.id left join c2 on c3.category2_id=c2.id left join c1 on c2.category1_id=c1.id left join tm on sku.tm_id=tm.id left join attr on sku.id=attr.sku_id left join sale_attr on sku.id=sale_attr.sku_id; " dim_province_full=" insert overwrite table ${APP}.dim_province_full partition(dt='$do_date') select province.id, province.name, province.area_code, province.iso_code, province.iso_3166_2, region_id, region_name from ( select id, name, region_id, area_code, iso_code, iso_3166_2 from ${APP}.ods_base_province_full where dt='$do_date' )province left join ( select id, region_name from ${APP}.ods_base_region_full where dt='$do_date' )region on province.region_id=region.id; " dim_coupon_full=" insert overwrite table ${APP}.dim_coupon_full partition(dt='$do_date') select id, coupon_name, coupon_type, coupon_dic.dic_name, condition_amount, condition_num, activity_id, benefit_amount, benefit_discount, case coupon_type when '3201' then concat('满',condition_amount,'元减',benefit_amount,'元') when '3202' then concat('满',condition_num,'件打',10*(1-benefit_discount),'折') when '3203' then concat('减',benefit_amount,'元') end benefit_rule, create_time, range_type, range_dic.dic_name, limit_num, taken_count, start_time, end_time, operate_time, expire_time from ( select id, coupon_name, coupon_type, condition_amount, condition_num, activity_id, benefit_amount, benefit_discount, create_time, range_type, limit_num, taken_count, start_time, end_time, operate_time, expire_time from ${APP}.ods_coupon_info_full where dt='$do_date' )ci left join ( select dic_code, dic_name from ${APP}.ods_base_dic_full where dt='$do_date' and parent_code='32' )coupon_dic on ci.coupon_type=coupon_dic.dic_code left join ( select dic_code, dic_name from ${APP}.ods_base_dic_full where dt='$do_date' and parent_code='33' )range_dic on ci.range_type=range_dic.dic_code; " dim_activity_full=" insert overwrite table ${APP}.dim_activity_full partition(dt='$do_date') select rule.id, info.id, activity_name, rule.activity_type, dic.dic_name, activity_desc, start_time, end_time, create_time, condition_amount, condition_num, benefit_amount, benefit_discount, case rule.activity_type when '3101' then concat('满',condition_amount,'元减',benefit_amount,'元') when '3102' then concat('满',condition_num,'件打',10*(1-benefit_discount),'折') when '3103' then concat('打',10*(1-benefit_discount),'折') end benefit_rule, benefit_level from ( select id, activity_id, activity_type, condition_amount, condition_num, benefit_amount, benefit_discount, benefit_level from ${APP}.ods_activity_rule_full where dt='$do_date' )rule left join ( select id, activity_name, activity_type, activity_desc, start_time, end_time, create_time from ${APP}.ods_activity_info_full where dt='$do_date' )info on rule.activity_id=info.id left join ( select dic_code, dic_name from ${APP}.ods_base_dic_full where dt='$do_date' and parent_code='31' )dic on rule.activity_type=dic.dic_code; " case $1 in "dim_user_zip") hive -e "$dim_user_zip" ;; "dim_sku_full") hive -e "$dim_sku_full" ;; "dim_province_full") hive -e "$dim_province_full" ;; "dim_coupon_full") hive -e "$dim_coupon_full" ;; "dim_activity_full") hive -e "$dim_activity_full" ;; "all") hive -e "$dim_user_zip$dim_sku_full$dim_province_full$dim_coupon_full$dim_activity_full" ;; esac
3、增加脚本执行权限
4、脚本用法
ods_to_dim_init.sh all 2020-06-14
1.7.2 每日装载脚本
1、在hadoop102的/home/zhm/bin目录下创建ods_to_dim.sh
vim ods_to_dim.sh
2、添加如下内容
#!/bin/bash APP=gmall # 如果是输入的日期按照取输入日期;如果没输入日期取当前时间的前一天 if [ -n "$2" ] ;then do_date=$2 else do_date=`date -d "-1 day" +%F` fi dim_user_zip=" set hive.exec.dynamic.partition.mode=nonstrict; with tmp as ( select old.id old_id, old.login_name old_login_name, old.nick_name old_nick_name, old.name old_name, old.phone_num old_phone_num, old.email old_email, old.user_level old_user_level, old.birthday old_birthday, old.gender old_gender, old.create_time old_create_time, old.operate_time old_operate_time, old.start_date old_start_date, old.end_date old_end_date, new.id new_id, new.login_name new_login_name, new.nick_name new_nick_name, new.name new_name, new.phone_num new_phone_num, new.email new_email, new.user_level new_user_level, new.birthday new_birthday, new.gender new_gender, new.create_time new_create_time, new.operate_time new_operate_time, new.start_date new_start_date, new.end_date new_end_date from ( select id, login_name, nick_name, name, phone_num, email, user_level, birthday, gender, create_time, operate_time, start_date, end_date from ${APP}.dim_user_zip where dt='9999-12-31' )old full outer join ( select id, login_name, nick_name, md5(name) name, md5(phone_num) phone_num, md5(email) email, user_level, birthday, gender, create_time, operate_time, '$do_date' start_date, '9999-12-31' end_date from ( select data.id, data.login_name, data.nick_name, data.name, data.phone_num, data.email, data.user_level, data.birthday, data.gender, data.create_time, data.operate_time, row_number() over (partition by data.id order by ts desc) rn from ${APP}.ods_user_info_inc where dt='$do_date' )t1 where rn=1 )new on old.id=new.id ) insert overwrite table ${APP}.dim_user_zip partition(dt) select if(new_id is not null,new_id,old_id), if(new_id is not null,new_login_name,old_login_name), if(new_id is not null,new_nick_name,old_nick_name), if(new_id is not null,new_name,old_name), if(new_id is not null,new_phone_num,old_phone_num), if(new_id is not null,new_email,old_email), if(new_id is not null,new_user_level,old_user_level), if(new_id is not null,new_birthday,old_birthday), if(new_id is not null,new_gender,old_gender), if(new_id is not null,new_create_time,old_create_time), if(new_id is not null,new_operate_time,old_operate_time), if(new_id is not null,new_start_date,old_start_date), if(new_id is not null,new_end_date,old_end_date), if(new_id is not null,new_end_date,old_end_date) dt from tmp union all select old_id, old_login_name, old_nick_name, old_name, old_phone_num, old_email, old_user_level, old_birthday, old_gender, old_create_time, old_operate_time, old_start_date, cast(date_add('$do_date',-1) as string) old_end_date, cast(date_add('$do_date',-1) as string) dt from tmp where old_id is not null and new_id is not null; " dim_sku_full=" with sku as ( select id, price, sku_name, sku_desc, weight, is_sale, spu_id, category3_id, tm_id, create_time from ${APP}.ods_sku_info_full where dt='$do_date' ), spu as ( select id, spu_name from ${APP}.ods_spu_info_full where dt='$do_date' ), c3 as ( select id, name, category2_id from ${APP}.ods_base_category3_full where dt='$do_date' ), c2 as ( select id, name, category1_id from ${APP}.ods_base_category2_full where dt='$do_date' ), c1 as ( select id, name from ${APP}.ods_base_category1_full where dt='$do_date' ), tm as ( select id, tm_name from ${APP}.ods_base_trademark_full where dt='$do_date' ), attr as ( select sku_id, collect_set(named_struct('attr_id',attr_id,'value_id',value_id,'attr_name',attr_name,'value_name',value_name)) attrs from ${APP}.ods_sku_attr_value_full where dt='$do_date' group by sku_id ), sale_attr as ( select sku_id, collect_set(named_struct('sale_attr_id',sale_attr_id,'sale_attr_value_id',sale_attr_value_id,'sale_attr_name',sale_attr_name,'sale_attr_value_name',sale_attr_value_name)) sale_attrs from ${APP}.ods_sku_sale_attr_value_full where dt='$do_date' group by sku_id ) insert overwrite table ${APP}.dim_sku_full partition(dt='$do_date') select sku.id, sku.price, sku.sku_name, sku.sku_desc, sku.weight, sku.is_sale, sku.spu_id, spu.spu_name, sku.category3_id, c3.name, c3.category2_id, c2.name, c2.category1_id, c1.name, sku.tm_id, tm.tm_name, attr.attrs, sale_attr.sale_attrs, sku.create_time from sku left join spu on sku.spu_id=spu.id left join c3 on sku.category3_id=c3.id left join c2 on c3.category2_id=c2.id left join c1 on c2.category1_id=c1.id left join tm on sku.tm_id=tm.id left join attr on sku.id=attr.sku_id left join sale_attr on sku.id=sale_attr.sku_id; " dim_province_full=" insert overwrite table ${APP}.dim_province_full partition(dt='$do_date') select province.id, province.name, province.area_code, province.iso_code, province.iso_3166_2, region_id, region_name from ( select id, name, region_id, area_code, iso_code, iso_3166_2 from ${APP}.ods_base_province_full where dt='$do_date' )province left join ( select id, region_name from ${APP}.ods_base_region_full where dt='$do_date' )region on province.region_id=region.id; " dim_coupon_full=" insert overwrite table ${APP}.dim_coupon_full partition(dt='$do_date') select id, coupon_name, coupon_type, coupon_dic.dic_name, condition_amount, condition_num, activity_id, benefit_amount, benefit_discount, case coupon_type when '3201' then concat('满',condition_amount,'元减',benefit_amount,'元') when '3202' then concat('满',condition_num,'件打',10*(1-benefit_discount),'折') when '3203' then concat('减',benefit_amount,'元') end benefit_rule, create_time, range_type, range_dic.dic_name, limit_num, taken_count, start_time, end_time, operate_time, expire_time from ( select id, coupon_name, coupon_type, condition_amount, condition_num, activity_id, benefit_amount, benefit_discount, create_time, range_type, limit_num, taken_count, start_time, end_time, operate_time, expire_time from ${APP}.ods_coupon_info_full where dt='$do_date' )ci left join ( select dic_code, dic_name from ${APP}.ods_base_dic_full where dt='$do_date' and parent_code='32' )coupon_dic on ci.coupon_type=coupon_dic.dic_code left join ( select dic_code, dic_name from ${APP}.ods_base_dic_full where dt='$do_date' and parent_code='33' )range_dic on ci.range_type=range_dic.dic_code; " dim_activity_full=" insert overwrite table ${APP}.dim_activity_full partition(dt='$do_date') select rule.id, info.id, activity_name, rule.activity_type, dic.dic_name, activity_desc, start_time, end_time, create_time, condition_amount, condition_num, benefit_amount, benefit_discount, case rule.activity_type when '3101' then concat('满',condition_amount,'元减',benefit_amount,'元') when '3102' then concat('满',condition_num,'件打',10*(1-benefit_discount),'折') when '3103' then concat('打',10*(1-benefit_discount),'折') end benefit_rule, benefit_level from ( select id, activity_id, activity_type, condition_amount, condition_num, benefit_amount, benefit_discount, benefit_level from ${APP}.ods_activity_rule_full where dt='$do_date' )rule left join ( select id, activity_name, activity_type, activity_desc, start_time, end_time, create_time from ${APP}.ods_activity_info_full where dt='$do_date' )info on rule.activity_id=info.id left join ( select dic_code, dic_name from ${APP}.ods_base_dic_full where dt='$do_date' and parent_code='31' )dic on rule.activity_type=dic.dic_code; " case $1 in "dim_user_zip") hive -e "$dim_user_zip" ;; "dim_sku_full") hive -e "$dim_sku_full" ;; "dim_province_full") hive -e "$dim_province_full" ;; "dim_coupon_full") hive -e "$dim_coupon_full" ;; "dim_activity_full") hive -e "$dim_activity_full" ;; "all") hive -e "$dim_user_zip$dim_sku_full$dim_province_full$dim_coupon_full$dim_activity_full" ;; esac
3、增加脚本执行权限
chmod +x ods_to_dim.sh
4、脚本用法
ods_to_dim.sh all 2020-06-14
您的支持是我创作的无限动力
希望我能为您的未来尽绵薄之力
如有错误,谢谢指正若有收获,谢谢赞美