离线数仓(九)【DWS 层开发】(2)https://developer.aliyun.com/article/1532434
1.1.2、交易域用户商品粒度退单最近1日汇总表
建表语句
DROP TABLE IF EXISTS dws_trade_user_sku_order_refund_1d; CREATE EXTERNAL TABLE dws_trade_user_sku_order_refund_1d ( `user_id` STRING COMMENT '用户id', `sku_id` STRING COMMENT 'sku_id', `sku_name` STRING COMMENT 'sku名称', `category1_id` STRING COMMENT '一级分类id', `category1_name` STRING COMMENT '一级分类名称', `category2_id` STRING COMMENT '一级分类id', `category2_name` STRING COMMENT '一级分类名称', `category3_id` STRING COMMENT '一级分类id', `category3_name` STRING COMMENT '一级分类名称', `tm_id` STRING COMMENT '品牌id', `tm_name` STRING COMMENT '品牌名称', `order_refund_count_1d` BIGINT COMMENT '最近1日退单次数', `order_refund_num_1d` BIGINT COMMENT '最近1日退单件数', `order_refund_amount_1d` DECIMAL(16, 2) COMMENT '最近1日退单金额' ) COMMENT '交易域用户商品粒度退单最近1日汇总事实表' PARTITIONED BY (`dt` STRING) STORED AS ORC LOCATION '/warehouse/gmall/dws/dws_trade_user_sku_order_refund_1d' TBLPROPERTIES ('orc.compress' = 'snappy');
数据装载(区分首日/每日)
set hive.exec.dynamic.partition.mode=nonstrict; insert overwrite table dws_trade_user_sku_order_refund_1d partition(dt) select user_id, sku_id, sku_name, category1_id, category1_name, category2_id, category2_name, category3_id, category3_name, tm_id, tm_name, order_refund_count, order_refund_num, order_refund_amount, dt from ( select dt, user_id, sku_id, count(*) order_refund_count, sum(refund_num) order_refund_num, sum(refund_amount) order_refund_amount from dwd_trade_order_refund_inc group by dt,user_id,sku_id )od left join ( select id, sku_name, category1_id, category1_name, category2_id, category2_name, category3_id, category3_name, tm_id, tm_name from dim_sku_full where dt='2020-06-14' )sku on od.sku_id=sku.id;
每日装载:
insert overwrite table dws_trade_user_sku_order_refund_1d partition(dt='2020-06-15') select user_id, sku_id, sku_name, category1_id, category1_name, category2_id, category2_name, category3_id, category3_name, tm_id, tm_name, order_refund_count, order_refund_num, order_refund_amount from ( select user_id, sku_id, count(*) order_refund_count, sum(refund_num) order_refund_num, sum(refund_amount) order_refund_amount from dwd_trade_order_refund_inc where dt='2020-06-15' group by user_id,sku_id )od left join ( select id, sku_name, category1_id, category1_name, category2_id, category2_name, category3_id, category3_name, tm_id, tm_name from dim_sku_full where dt='2020-06-15' )sku on od.sku_id=sku.id;
1.1.3、交易域用户粒度订单最近1日汇总表
1)建表语句
行:最近一天用户的下单次数、件数和总金额。
列:维度只有用户id,并没有别的维度属性,所以之后 ADS 层如果有相关需要用到用户信息的时候再去 join。剩下的都是统计值。
DROP TABLE IF EXISTS dws_trade_user_order_1d; CREATE EXTERNAL TABLE dws_trade_user_order_1d ( `user_id` STRING COMMENT '用户id', `order_count_1d` BIGINT COMMENT '最近1日下单次数', `order_num_1d` BIGINT COMMENT '最近1日下单商品件数', `order_original_amount_1d` DECIMAL(16, 2) COMMENT '最近1日最近1日下单原始金额', `activity_reduce_amount_1d` DECIMAL(16, 2) COMMENT '最近1日下单活动优惠金额', `coupon_reduce_amount_1d` DECIMAL(16, 2) COMMENT '下单优惠券优惠金额', `order_total_amount_1d` DECIMAL(16, 2) COMMENT '最近1日下单最终金额' ) COMMENT '交易域用户粒度订单最近1日汇总事实表' PARTITIONED BY (`dt` STRING) STORED AS ORC LOCATION '/warehouse/gmall/dws/dws_trade_user_order_1d' TBLPROPERTIES ('orc.compress' = 'snappy');
2)数据装载(首日/每日)
因为有历史数据,所以需根据 user_id 和 dt 进行分组 。
insert overwrite table dws_trade_user_order_1d partition(dt) select user_id, count(distinct(order_id)), sum(sku_num), sum(split_original_amount), sum(nvl(split_activity_amount,0)), sum(nvl(split_coupon_amount,0)), sum(split_total_amount), dt from dwd_trade_order_detail_inc group by user_id,dt;
这里的 order_id 需要进行去重,因为数据来源是 order_detail,所以一个订单有多个商品,下单时一个 order_id 会有多个下单记录。
每日表只需要根据 user_id 进行分组,因为 dwd 层的该天分区没有历史数据。
insert overwrite table dws_trade_user_order_1d partition(dt='2020-06-15') select user_id, count(distinct(order_id)), sum(sku_num), sum(split_original_amount), sum(nvl(split_activity_amount,0)), sum(nvl(split_coupon_amount,0)), sum(split_total_amount) from dwd_trade_order_detail_inc where dt='2020-06-15' group by user_id;
1.1.4、交易域用户粒度加购最近1日汇总表
1)建表语句
行:用户最近 1 天加购的件数和次数
DROP TABLE IF EXISTS dws_trade_user_cart_add_1d; CREATE EXTERNAL TABLE dws_trade_user_cart_add_1d ( `user_id` STRING COMMENT '用户id', `cart_add_count_1d` BIGINT COMMENT '最近1日加购次数', `cart_add_num_1d` BIGINT COMMENT '最近1日加购商品件数' ) COMMENT '交易域用户粒度加购最近1日汇总事实表' PARTITIONED BY (`dt` STRING) STORED AS ORC LOCATION '/warehouse/gmall/dws/dws_trade_user_cart_add_1d' TBLPROPERTIES ('orc.compress' = 'snappy');
2)数据装载(首日/每日)
装载很简单,首日只需要对 dt 进行额外分组,每日不需要。
insert overwrite table dws_trade_user_cart_add_1d partition(dt) select user_id, count(*), sum(sku_num), dt from dwd_trade_cart_add_inc group by user_id,dt;
insert overwrite table dws_trade_user_cart_add_1d partition(dt='2020-06-15') select user_id, count(*), sum(sku_num) from dwd_trade_cart_add_inc where dt='2020-06-15' group by user_id;
1.1.5、交易域用户粒度支付最近1日汇总表
1)建表语句
行:每个用户最近 1 日的支付次数、支付的商品件数、金额。
列:一个维度和三个统计指标。
DROP TABLE IF EXISTS dws_trade_user_payment_1d; CREATE EXTERNAL TABLE dws_trade_user_payment_1d ( `user_id` STRING COMMENT '用户id', `payment_count_1d` BIGINT COMMENT '最近1日支付次数', `payment_num_1d` BIGINT COMMENT '最近1日支付商品件数', `payment_amount_1d` DECIMAL(16, 2) COMMENT '最近1日支付金额' ) COMMENT '交易域用户粒度支付最近1日汇总事实表' PARTITIONED BY (`dt` STRING) STORED AS ORC LOCATION '/warehouse/gmall/dws/dws_trade_user_payment_1d' TBLPROPERTIES ('orc.compress' = 'snappy');
2)数据装载(首日/每日)
insert overwrite table dws_trade_user_payment_1d partition(dt) select user_id, count(distinct(order_id)), sum(sku_num), sum(split_payment_amount), dt from dwd_trade_pay_detail_suc_inc group by user_id,dt;
insert overwrite table dws_trade_user_payment_1d partition(dt='2020-06-15') select user_id, count(distinct(order_id)), sum(sku_num), sum(split_payment_amount) from dwd_trade_pay_detail_suc_inc where dt='2020-06-15' group by user_id;
1.1.6、交易域省份粒度订单最近1日汇总表
1)建表语句
这里需要对省份信息进行一个维度退化,因为后面 Superset 做地图的时候需要用到。
DROP TABLE IF EXISTS dws_trade_province_order_1d; CREATE EXTERNAL TABLE dws_trade_province_order_1d ( `province_id` STRING COMMENT '用户id', `province_name` STRING COMMENT '省份名称', `area_code` STRING COMMENT '地区编码', `iso_code` STRING COMMENT '旧版ISO-3166-2编码', `iso_3166_2` STRING COMMENT '新版版ISO-3166-2编码', `order_count_1d` BIGINT COMMENT '最近1日下单次数', `order_original_amount_1d` DECIMAL(16, 2) COMMENT '最近1日下单原始金额', `activity_reduce_amount_1d` DECIMAL(16, 2) COMMENT '最近1日下单活动优惠金额', `coupon_reduce_amount_1d` DECIMAL(16, 2) COMMENT '最近1日下单优惠券优惠金额', `order_total_amount_1d` DECIMAL(16, 2) COMMENT '最近1日下单最终金额' ) COMMENT '交易域省份粒度订单最近1日汇总事实表' PARTITIONED BY (`dt` STRING) STORED AS ORC LOCATION '/warehouse/gmall/dws/dws_trade_province_order_1d' TBLPROPERTIES ('orc.compress' = 'snappy');
2)数据装载(首日/全日)
因为有维度退化,所以需要进行 join
set hive.exec.dynamic.partition.mode=nonstrict; insert overwrite table dws_trade_province_order_1d partition(dt) select province_id, province_name, area_code, iso_code, iso_3166_2, order_count_1d, order_original_amount_1d, activity_reduce_amount_1d, coupon_reduce_amount_1d, order_total_amount_1d, dt from ( select province_id, count(distinct(order_id)) order_count_1d, sum(split_original_amount) order_original_amount_1d, sum(nvl(split_activity_amount,0)) activity_reduce_amount_1d, sum(nvl(split_coupon_amount,0)) coupon_reduce_amount_1d, sum(split_total_amount) order_total_amount_1d, dt from dwd_trade_order_detail_inc group by province_id,dt )o left join ( select id, province_name, area_code, iso_code, iso_3166_2 from dim_province_full where dt='2020-06-14' )p on o.province_id=p.id;
insert overwrite table dws_trade_province_order_1d partition(dt='2020-06-15') select province_id, province_name, area_code, iso_code, iso_3166_2, order_count_1d, order_original_amount_1d, activity_reduce_amount_1d, coupon_reduce_amount_1d, order_total_amount_1d from ( select province_id, count(distinct(order_id)) order_count_1d, sum(split_original_amount) order_original_amount_1d, sum(nvl(split_activity_amount,0)) activity_reduce_amount_1d, sum(nvl(split_coupon_amount,0)) coupon_reduce_amount_1d, sum(split_total_amount) order_total_amount_1d from dwd_trade_order_detail_inc where dt='2020-06-15' group by province_id )o left join ( select id, province_name, area_code, iso_code, iso_3166_2 from dim_province_full where dt='2020-06-15' )p on o.province_id=p.id;
离线数仓(九)【DWS 层开发】(4)https://developer.aliyun.com/article/1532439