数仓开发之DWD层
DWD层设计要点:
(1)DWD层的设计依据是维度建模理论,该层存储维度模型的事实表。
(2)DWD层的数据存储格式为orc列式存储+snappy压缩。
(3)DWD层表名的命名规范为dwd_数据域_表名_单分区增量全量标识(inc/full)
累计开照事实表
累计快照事实表是基于一个业务流程中的多个关键业务过程联合处理而构建的事实表,如交易流程中的下单、支付、发货、确认收货业务过程。
累积型快照事实表通常具有多个日期字段,每个日期对应业务流程中的一个关键业务过程(里程碑)。
订单id |
用户id |
下单日期 |
支付日期 |
发货日期 |
确认收货日期 |
订单金额 |
支付金额 |
1001 |
1234 |
2022-06-08 |
2022-06-09 |
2022-06-16 |
2022-06-17 |
1000 |
1000 |
累积型快照事实表主要用于分析业务过程(里程碑)之间的时间间隔等需求。例如前文提到的用户下单到支付的平均时间间隔,使用累积型快照事实表进行统计,就能避免两个事务事实表的关联操作,从而变得十分简单高效。
审批域金融租赁全流程累积型快照事实表
1)建表语句 DROP TABLE IF EXISTS dwd_financial_lease_flow_acc; CREATE EXTERNAL TABLE IF NOT EXISTS dwd_financial_lease_flow_acc ( `id` STRING COMMENT '授信表记录编号', `lease_organization` STRING COMMENT '业务方向', `business_partner_id` STRING COMMENT '客户ID(申请人)', `business_partner_name` STRING COMMENT '客户姓名', `industry3_id` STRING COMMENT '三级行业ID', `salesman_id` STRING COMMENT '业务经办ID', `credit_audit_id` STRING COMMENT '信审经办ID', `create_time` STRING COMMENT '申请发起时间', `undistributed_time` STRING COMMENT '风控审核通过时间', `risk_manage_refused_time` STRING COMMENT '风控审核拒绝时间', `credit_audit_distributed_time` STRING COMMENT '信审经办分配时间', `credit_audit_approving_time` STRING COMMENT '信审经办审核通过时间', `feed_back_time` STRING COMMENT '业务反馈提交时间', `first_level_review_approving_time` STRING COMMENT '一级评审人/加签人审核通过时间', `second_level_review_approving_time` STRING COMMENT '二级评审人审核通过时间', `project_review_meeting_approving_time` STRING COMMENT '项目评审会审核通过时间', `general_manager_review_approving_time` STRING COMMENT '总经理/分管总审核通过时间', `reply_review_approving_time` STRING COMMENT '批复通过时间', `credit_create_time` STRING COMMENT '新增授信时间', `credit_occupy_time` STRING COMMENT '完成授信占用时间', `contract_produce_time` STRING COMMENT '完成合同制作时间', `signed_time` STRING COMMENT '完成签约时间', `execution_time` STRING COMMENT '起租时间', `rejected_time` STRING COMMENT '拒绝时间', `cancel_time` STRING COMMENT '客户取消申请时间', `credit_amount` decimal(16,2) COMMENT '申请授信金额', `credit_reply_amount` decimal(16,2) COMMENT '批复授信金额', `credit_real_amount` decimal(16,2) COMMENT '实际授信金额' ) COMMENT '审批域授信审批流程累积型快照事实表' PARTITIONED BY (`dt` STRING) STORED AS ORC LOCATION '/warehouse/financial_lease/dwd/dwd_financial_lease_flow_acc/' TBLPROPERTIES('orc.compress' = 'snappy');
--累积快照事实表分区字段的选择 --事件开始时间-》 优点:便于数据寻找对应的分区 缺点:后续需要多次补充时间字段(修改数据的操作) hdfs修改数据非常麻烦 需要把整个分区的数据读取处理 统一处理在覆盖写回去,每日修改大量分区的数据 --事件结束时间-> --优点:事件结束 不会再发生数据的变化->当该条数据写入对应的分区 就不需要修改数据 --缺点:不便于寻找数据对应的分区 -> 专门设计一个分区存放不结束的数据 9999-12-31
数据流向
数据装载
首日装载
--hive默认的严格模式不能使用一级动态分区
--主要处理历史数据->找历史数据中有无完成的审批 根据完成的时间写入到对应的分区中 没有完成的数据写入到9999-12-31分区中 --金融审批表 set hive.exec.dynamic.partition.mode=nonstrict; insert overwrite table dwd_financial_lease_flow_acc partition (dt) select id, max(lease_organization), max(business_partner_id), max(business_partner_name), max(industry3_id), max(salesman_id), max(credit_audit_id), max(create_time), max(undistributed_time), max(risk_manage_refused_time), max(credit_audit_distributed_time), max(credit_audit_approving_time), max(feed_back_time), max(first_level_review_approving_time), max(second_level_review_approving_time), max(project_review_meeting_approving_time), max(general_manager_review_approving_time), max(reply_review_approving_time), max(credit_create_time), max(credit_occupy_time), max(contract_produce_time), max(signed_time), max(execution_time), max(rejected_time), max(cancel_time), max(credit_amount), max(credit_reply_amount), max(credit_real_amount), date_format(if(max(execution_time) is not null , max(execution_time), if(max(rejected_time) is not null, max(rejected_time) , if(max(cancel_time) is not null, max(cancel_time),'9999-12-31'))),'yyyy-MM-dd') dt from ( select cf.id, lease_organization, cf.business_partner_id business_partner_id, bp.business_partner_name business_partner_name, cf.industry_id industry3_id, salesman_id, if(cfs.status='5' and cfs.action_taken='1',cfs.employee_id,null) credit_audit_id, cf.create_time create_time, if(cfs.status='3' and cfs.action_taken='1',cfs.create_time,null) undistributed_time, if(cfs.status='2' and cfs.action_taken='2',cfs.create_time,null) risk_manage_refused_time, if(cfs.status='4',cfs.create_time,null) credit_audit_distributed_time, if(cfs.status='5' and cfs.action_taken='1',cfs.create_time,null) credit_audit_approving_time, if(cfs.status='7' ,cfs.create_time,null) feed_back_time, if(cfs.status='8' and cfs.action_taken='1',cfs.create_time,null) first_level_review_approving_time, if(cfs.status='10' and cfs.action_taken='1',cfs.create_time,null) second_level_review_approving_time, if(cfs.status='12' and cfs.action_taken='1',cfs.create_time,null) project_review_meeting_approving_time, if(cfs.status='14' and cfs.action_taken='1',cfs.create_time,null) general_manager_review_approving_time, if(cfs.status='16' and cfs.action_taken='1',cfs.create_time,null) reply_review_approving_time, cre.create_time credit_create_time, credit_occupy_time, contract_produce_time, signed_time, execution_time, if(cfs.status='20' ,cfs.create_time,null) rejected_time, if(cfs.status='21' ,cfs.create_time,if(cre.cancel_time is not null,cre.cancel_time,con.cancel_time)) cancel_time, cf.credit_amount, rep.credit_amount credit_reply_amount, cre.credit_amount credit_real_amount from ( -- 金融审批表 select data.id, data.create_time, data.update_time, data.credit_amount, data.lease_organization, data.status, data.business_partner_id, data.credit_id, data.industry_id, data.reply_id, data.salesman_id from ods_credit_facility_inc where dt='2023-05-09' and type='bootstrap-insert' )cf left join ( -- 金融合伙人 select id business_partner_id, name business_partner_name from ods_business_partner_full where dt='2023-05-09' )bp on cf.business_partner_id=bp.business_partner_id left join ( -- 审批状态表 select data.create_time, data.action_taken, data.status, data.credit_facility_id, data.employee_id, data.signatory_id from ods_credit_facility_status_inc where dt='2023-05-09' and type='bootstrap-insert' )cfs on cf.id=cfs.credit_facility_id left join ( -- 客户回复表 -> 添加回复金额 不为null 说明客户同意 select data.create_time, data.update_time, data.credit_amount, data.irr, data.period, data.credit_facility_id from ods_reply_inc where dt='2023-05-09' and type='bootstrap-insert' )rep on cf.id=rep.credit_facility_id left join ( -- 开启授信 select data.id, data.create_time, data.update_time, data.cancel_time, data.contract_produce_time, data.credit_amount, data.credit_occupy_time, data.status, data.contract_id, data.credit_facility_id from ods_credit_inc where dt='2023-05-09' and type='bootstrap-insert' )cre on cf.credit_id=cre.id left join ( -- 合同表 select data.id, data.execution_time, data.signed_time, if(data.status='4',data.update_time,null) cancel_time, data.credit_id from ods_contract_inc where dt='2023-05-09' and type='bootstrap-insert' )con on cre.contract_id=con.id )t1 group by id;
每日装载
et hive.exec.dynamic.partition.mode=nonstrict; #关闭hive优化解决兼容性问题 set hive.cbo.enable=false; set hive.vectorized.execution.enabled=false; select id, max(lease_organization), max(business_partner_id), max(business_partner_name), max(industry3_id), max(salesman_id), max(credit_audit_id), max(create_time), max(undistributed_time), max(risk_manage_refused_time), max(credit_audit_distributed_time), max(credit_audit_approving_time), max(feed_back_time), max(first_level_review_approving_time), max(second_level_review_approving_time), max(project_review_meeting_approving_time), max(general_manager_review_approving_time), max(reply_review_approving_time), max(credit_create_time), max(credit_occupy_time), max(contract_produce_time), max(signed_time), max(execution_time), max(rejected_time), max(cancel_time), max(credit_amount), max(credit_reply_amount), max(credit_real_amount), date_format(if(max(execution_time) is not null , max(execution_time), if(max(rejected_time) is not null, max(rejected_time) , if(max(cancel_time) is not null, max(cancel_time),'9999-12-31'))),'yyyy-MM-dd') dt from ( select cf.id, lease_organization, cf.business_partner_id business_partner_id, bp.business_partner_name business_partner_name, cf.industry3_id industry3_id, salesman_id, if(cfs.status='5' and cfs.action_taken='1',cfs.employee_id,credit_audit_id) credit_audit_id, cf.create_time create_time, if(cfs.status='3' and cfs.action_taken='1',cfs.create_time,undistributed_time) undistributed_time, if(cfs.status='2' and cfs.action_taken='2',cfs.create_time,risk_manage_refused_time) risk_manage_refused_time, if(cfs.status='4',cfs.create_time,credit_audit_distributed_time) credit_audit_distributed_time, if(cfs.status='5' and cfs.action_taken='1',cfs.create_time,credit_audit_approving_time) credit_audit_approving_time, if(cfs.status='7' ,cfs.create_time,feed_back_time) feed_back_time, if(cfs.status='8' and cfs.action_taken='1',cfs.create_time,first_level_review_approving_time) first_level_review_approving_time, if(cfs.status='10' and cfs.action_taken='1',cfs.create_time,second_level_review_approving_time) second_level_review_approving_time, if(cfs.status='12' and cfs.action_taken='1',cfs.create_time,project_review_meeting_approving_time) project_review_meeting_approving_time, if(cfs.status='14' and cfs.action_taken='1',cfs.create_time,general_manager_review_approving_time) general_manager_review_approving_time, if(cfs.status='16' and cfs.action_taken='1',cfs.create_time,reply_review_approving_time) reply_review_approving_time, nvl(credit_create_time,cre.create_time ) credit_create_time, nvl(cf.credit_occupy_time,cre.credit_occupy_time) credit_occupy_time, nvl(cf.contract_produce_time,cre.contract_produce_time) contract_produce_time, nvl(cf.signed_time,con.signed_time) signed_time, con.execution_time execution_time, if(cfs.status='20' ,cfs.create_time,null) rejected_time, if(cfs.status='21' ,cfs.create_time,if(cre.cancel_time is not null,cre.cancel_time,con.cancel_time)) cancel_time, cf.credit_amount, rep.credit_amount credit_reply_amount, cre.credit_amount credit_real_amount from ( -- 合并截止到前一天的9999-12-31数据和当天的新增审批数据 select id, lease_organization, business_partner_id, business_partner_name, industry3_id, salesman_id, credit_audit_id, create_time, undistributed_time, risk_manage_refused_time, credit_audit_distributed_time, credit_audit_approving_time, feed_back_time, first_level_review_approving_time, second_level_review_approving_time, project_review_meeting_approving_time, general_manager_review_approving_time, reply_review_approving_time, credit_create_time, credit_occupy_time, contract_produce_time, signed_time, execution_time, rejected_time, cancel_time, credit_amount, credit_reply_amount, credit_real_amount, null reply_id, null credit_id from dwd_financial_lease_flow_acc where dt='9999-12-31' union --当天的新增审批数据 select data.id id, data.lease_organization, data.business_partner_id, null business_partner_name, data.industry_id industry3_id, data.salesman_id, null credit_audit_id, null create_time, null undistributed_time, null risk_manage_refused_time, null credit_audit_distributed_time, null credit_audit_approving_time, null feed_back_time, null first_level_review_approving_time, null second_level_review_approving_time, null project_review_meeting_approving_time, null general_manager_review_approving_time, null reply_review_approving_time, null credit_create_time, null credit_occupy_time, null contract_produce_time, null signed_time, null execution_time, null rejected_time, null cancel_time, data.credit_amount, null credit_reply_amount, null credit_real_amount, data.reply_id reply_id, data.credit_id credit_id from ods_credit_facility_inc where dt='2023-05-10' and type='insert' )cf -- 之后再join后续可能出现的审批状态 最后根据是否完成写入到最终的分区中 left join ( -- 金融合伙人 select id business_partner_id, name business_partner_name from ods_business_partner_full where dt='2023-05-10' )bp on cf.business_partner_id=bp.business_partner_id left join ( -- 审批状态表 select data.create_time, data.action_taken, data.status, data.credit_facility_id, data.employee_id, data.signatory_id from ods_credit_facility_status_inc where dt='2023-05-10' and type='insert' )cfs on cf.id=cfs.credit_facility_id left join ( -- 客户回复表 -> 添加回复金额 不为null 说明客户同意 select data.create_time, data.update_time, data.credit_amount, data.irr, data.period, data.credit_facility_id from ods_reply_inc where dt='2023-05-10' and type='insert' )rep on cf.id=rep.credit_facility_id left join ( -- 开启授信 select data.id, data.create_time, data.update_time, data.cancel_time, data.contract_produce_time, data.credit_amount, data.credit_occupy_time, data.status, data.contract_id, data.credit_facility_id from ods_credit_inc where dt='2023-05-10' )cre on cf.credit_id=cre.id left join ( -- 合同表 select data.id, data.execution_time, data.signed_time, if(data.status='4',data.update_time,null) cancel_time, data.credit_id from ods_contract_inc where dt='2023-05-10' )con on cre.contract_id=con.id )t1 group by id;
数据装载脚本
9.2.1 首日装载脚本
1)在hadoop102的/home/bin目录下创建financial_ods_to_dwd_init.sh
vim financial_ods_to_dwd_init.sh
#!/bin/bash APP=financial_lease if [[ -n "$2" ]]; then do_date=$2 else do_date=`date -d '-1 day' +%F` fi dwd_financial_lease_flow_acc=" set hive.exec.dynamic.partition.mode=nonstrict; insert overwrite table ${APP}.dwd_financial_lease_flow_acc partition (dt) select id, max(lease_organization), max(business_partner_id), max(business_partner_name), max(industry3_id), max(salesman_id), max(credit_audit_id), max(create_time), max(undistributed_time), max(risk_manage_refused_time), max(credit_audit_distributed_time), max(credit_audit_approving_time), max(feed_back_time), max(first_level_review_approving_time), max(second_level_review_approving_time), max(project_review_meeting_approving_time), max(general_manager_review_approving_time), max(reply_review_approving_time), max(credit_create_time), max(credit_occupy_time), max(contract_produce_time), max(signed_time), max(execution_time), max(rejected_time), max(cancel_time), max(credit_amount), max(credit_reply_amount), max(credit_real_amount), date_format(if(max(execution_time) is not null , max(execution_time), if(max(rejected_time) is not null, max(rejected_time) , if(max(cancel_time) is not null, max(cancel_time),'9999-12-31'))),'yyyy-MM-dd') dt from ( select cf.id, lease_organization, cf.business_partner_id business_partner_id, bp.business_partner_name business_partner_name, cf.industry_id industry3_id, salesman_id, if(cfs.status='5' and cfs.action_taken='1',cfs.employee_id,null) credit_audit_id, cf.create_time create_time, if(cfs.status='3' and cfs.action_taken='1',cfs.create_time,null) undistributed_time, if(cfs.status='2' and cfs.action_taken='2',cfs.create_time,null) risk_manage_refused_time, if(cfs.status='4',cfs.create_time,null) credit_audit_distributed_time, if(cfs.status='5' and cfs.action_taken='1',cfs.create_time,null) credit_audit_approving_time, if(cfs.status='7' ,cfs.create_time,null) feed_back_time, if(cfs.status='8' and cfs.action_taken='1',cfs.create_time,null) first_level_review_approving_time, if(cfs.status='10' and cfs.action_taken='1',cfs.create_time,null) second_level_review_approving_time, if(cfs.status='12' and cfs.action_taken='1',cfs.create_time,null) project_review_meeting_approving_time, if(cfs.status='14' and cfs.action_taken='1',cfs.create_time,null) general_manager_review_approving_time, if(cfs.status='16' and cfs.action_taken='1',cfs.create_time,null) reply_review_approving_time, cre.create_time credit_create_time, credit_occupy_time, contract_produce_time, signed_time, execution_time, if(cfs.status='20' ,cfs.create_time,null) rejected_time, if(cfs.status='21' ,cfs.create_time,if(cre.cancel_time is not null,cre.cancel_time,con.cancel_time)) cancel_time, cf.credit_amount, rep.credit_amount credit_reply_amount, cre.credit_amount credit_real_amount from ( select data.id, data.create_time, data.update_time, data.credit_amount, data.lease_organization, data.status, data.business_partner_id, data.credit_id, data.industry_id, data.reply_id, data.salesman_id from ${APP}.ods_credit_facility_inc where dt='${do_date}' and type='bootstrap-insert' )cf left join ( select id business_partner_id, name business_partner_name from ${APP}.ods_business_partner_full where dt='${do_date}' )bp on cf.business_partner_id=bp.business_partner_id left join ( select data.create_time, data.action_taken, data.status, data.credit_facility_id, data.employee_id, data.signatory_id from ${APP}.ods_credit_facility_status_inc where dt='${do_date}' and type='bootstrap-insert' )cfs on cf.id=cfs.credit_facility_id left join ( select data.create_time, data.update_time, data.credit_amount, data.irr, data.period, data.credit_facility_id from ${APP}.ods_reply_inc where dt='${do_date}' and type='bootstrap-insert' )rep on cf.id=rep.credit_facility_id left join ( select data.id, data.create_time, data.update_time, data.cancel_time, data.contract_produce_time, data.credit_amount, data.credit_occupy_time, data.status, data.contract_id, data.credit_facility_id from ${APP}.ods_credit_inc where dt='${do_date}' and type='bootstrap-insert' )cre on cf.credit_id=cre.id left join ( select data.id, data.execution_time, data.signed_time, if(data.status='4',data.update_time,null) cancel_time, data.credit_id from ${APP}.ods_contract_inc where dt='${do_date}' and type='bootstrap-insert' )con on cre.contract_id=con.id )t1 group by id; " case $1 in "dwd_financial_lease_flow_acc" ) hive -e "$dwd_financial_lease_flow_acc" ;; "all" ) hive -e "$dwd_financial_lease_flow_acc" ;; esac
数仓开发之ADS层
10.1 待审/在审项目主题
综合统计
需求说明如下
统计粒度 |
指标 |
说明 |
-- |
截至当日处于新建状态项目数 |
申请已发起,正交由风控员审核,未得出风控审核结果的项目数 |
-- |
截至当日处于新建状态项目申请金额 |
略 |
-- |
截至当日处于未达风控状态项目数 |
未通过风控员审核,正交由风控经理审核,未得出最终风控结论的项目数 |
-- |
截至当日处于未达风控状态项目申请金额 |
略 |
-- |
截至当日处于信审经办审核通过状态项目数 |
信审经办审核通过,业务经办尚未提交业务反馈的项目数 |
-- |
截至当日处于信审经办审核通过状态项目申请金额 |
略 |
-- |
截至当日处于已提交业务反馈状态项目数 |
业务经办已提交业务反馈,正交由一级评审员及一级评审加签人审核,尚未得出一级评审结论的项目数 |
-- |
截至当日处于已提交业务反馈状态项目申请金额 |
略 |
-- |
截至当日处于一级评审通过状态项目数 |
一级评审已通过,正交由二级评审员审核,尚未得出二级评审结论的项目数 |
-- |
截至当日处于一级评审通过状态项目申请金额 |
略。 |
-- |
截至当日处于二级评审通过状态项目数 |
二级评审已通过,正由项目评审会审核,尚未得出相应评审结论的项目数 |
-- |
截至当日处于二级评审通过状态项目申请金额 |
略 |
-- |
截至当日处于项目评审会审核通过状态项目数 |
项目评审会审核通过,正交由总经理/分管总审核,尚未得出相应评审结论的项目数 |
-- |
截至当日处于项目评审会审核通过状态项目申请金额 |
略 |
-- |
截至当日处于总经理/分管总审核通过状态项目数 |
总经理/分管总审核通过,批复文件尚未生成的项目数 |
-- |
截至当日处于总经理/分管总审核通过状态项目申请金额 |
略 |
-- |
截至当日处于已出具批复状态项目数 |
批复阶段审核通过,生成批复文件,包含了具体的批复金额、条款等,尚未新增授信的项目数 |
-- |
截至当日处于已出具批复状态项目申请金额 |
略 |
-- |
截至当日处于已出具批复状态项目批复金额 |
略 |
1)建表语句
DROP TABLE IF EXISTS ads_unfinished_audit_stats; CREATE EXTERNAL TABLE IF NOT EXISTS ads_unfinished_audit_stats( `dt` STRING COMMENT '统计日期', `created_project_count` BIGINT COMMENT '新建状态项目数', `created_project_amount` DECIMAL(16,2) COMMENT '新建状态项目申请金额', `risk_control_not_approved_count` BIGINT COMMENT '未达风控状态项目数', `risk_control_not_approved_amount` DECIMAL(16,2) COMMENT '未达风控状态项目申请金额', `credit_audit_approved_count` BIGINT COMMENT '信审经办审核通过状态项目数', `credit_audit_approved_amount` DECIMAL(16,2) COMMENT '信审经办审核通过状态项目申请金额', `feedback_submitted_count` BIGINT COMMENT '已提交业务反馈状态项目数', `feedback_submitted_amount` DECIMAL(16,2) COMMENT '已提交业务反馈状态项目申请金额', `level1_review_approved_count` BIGINT COMMENT '一级评审通过状态项目数', `level1_review_approved_amount` DECIMAL(16,2) COMMENT '一级评审通过状态项目申请金额', `level2_review_approved_count` BIGINT COMMENT '二级评审通过状态项目数', `level2_review_approved_amount` DECIMAL(16,2) COMMENT '二级评审通过状态项目申请金额', `review_meeting_approved_count` BIGINT COMMENT '项目评审会审核通过状态项目数', `review_meeting_approved_amount` DECIMAL(16,2) COMMENT '项目评审会审核通过状态项目申请金额', `general_manager_approved_count` BIGINT COMMENT '总经理/分管总审核通过状态项目数', `general_manager_approved_amount` DECIMAL(16,2) COMMENT '总经理/分管总审核通过状态项目申请金额', `reply_issued_count` BIGINT COMMENT '出具批复状态项目数', `reply_issued_apply_amount` DECIMAL(16,2) COMMENT '出具批复状态项目申请金额', `reply_issued_reply_amount` DECIMAL(16,2) COMMENT '出具批复状态项目批复金额' ) COMMENT '待审/在审项目综合统计' ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t' LOCATION '/warehouse/financial_lease/ads/ads_unfinished_audit_stats' TBLPROPERTIES ('compression.codec' = 'org.apache.hadoop.io.compress.GzipCodec');
数据装载
--采用手动读取原始表格的数据 union合并加去重 从而实现幂等性 insert overwrite table ads_unfinished_audit_stats select dt, created_project_count, created_project_amount, risk_control_not_approved_count, risk_control_not_approved_amount, credit_audit_approved_count, credit_audit_approved_amount, feedback_submitted_count, feedback_submitted_amount, level1_review_approved_count, level1_review_approved_amount, level2_review_approved_count, level2_review_approved_amount, review_meeting_approved_count, review_meeting_approved_amount, general_manager_approved_count, general_manager_approved_amount, reply_issued_count, reply_issued_apply_amount, reply_issued_reply_amount from ads_unfinished_audit_stats union select '2023-05-09' dt, sum(if(undistributed_time is null and risk_manage_refused_time is null, 1, 0)) created_project_count, sum(if(undistributed_time is null and risk_manage_refused_time is null, credit_amount, 0)) created_project_amount, sum(if(risk_manage_refused_time is not null and undistributed_time is null, 1, 0)) risk_control_not_approved_count, sum(if(risk_manage_refused_time is not null and undistributed_time is null, credit_amount, 0)) risk_control_not_approved_amount, sum(if(credit_audit_approving_time is not null and feed_back_time is null, 1, 0)) credit_audit_approved_count, sum(if(credit_audit_approving_time is not null and feed_back_time is null, credit_amount, 0)) credit_audit_approved_amount, sum(if(feed_back_time is not null and first_level_review_approving_time is null, 1, 0)) feedback_submitted_count, sum(if(feed_back_time is not null and first_level_review_approving_time is null, credit_amount, 0)) feedback_submitted_amount, sum(if(first_level_review_approving_time is not null and second_level_review_approving_time is null, 1, 0)) level1_review_approved_count, sum(if(first_level_review_approving_time is not null and second_level_review_approving_time is null, credit_amount, 0)) level1_review_approved_amount, sum(if(second_level_review_approving_time is not null and project_review_meeting_approving_time is null, 1, 0)) level2_review_approved_count, sum(if(second_level_review_approving_time is not null and project_review_meeting_approving_time is null, credit_amount, 0)) level2_review_approved_amount, sum(if(project_review_meeting_approving_time is not null and general_manager_review_approving_time is null, 1, 0)) review_meeting_approved_count, sum(if(project_review_meeting_approving_time is not null and general_manager_review_approving_time is null, credit_amount, 0)) review_meeting_approved_amount, sum(if(general_manager_review_approving_time is not null and reply_review_approving_time is null, 1, 0)) general_manager_approved_count, sum(if(general_manager_review_approving_time is not null and reply_review_approving_time is null, credit_amount, 0)) general_manager_approved_amount, sum(if(reply_review_approving_time is not null and credit_create_time is null, 1, 0)) reply_issued_count, sum(if(reply_review_approving_time is not null and credit_create_time is null, credit_amount, 0)) reply_issued_apply_amount, sum(if(reply_review_approving_time is not null and credit_create_time is null, credit_reply_amount, 0)) reply_issued_reply_amount from dwd_financial_lease_flow_acc where dt = '9999-12-31';
各业务方向统计
需求说明如下
统计粒度 |
指标 |
说明 |
业务方向 |
截至当日处于新建状态项目数 |
申请已发起,正交由风控员审核,未得出风控审核结果的项目数 |
业务方向 |
截至当日处于新建状态项目申请金额 |
略 |
业务方向 |
截至当日处于未达风控状态项目数 |
未通过风控员审核,正交由风控经理审核,未得出最终风控结论的项目数 |
业务方向 |
截至当日处于未达风控状态项目申请金额 |
略 |
业务方向 |
截至当日处于信审经办审核通过状态项目数 |
信审经办审核通过,业务经办尚未提交业务反馈的项目数 |
业务方向 |
截至当日处于信审经办审核通过状态项目申请金额 |
略 |
业务方向 |
截至当日处于已提交业务反馈状态项目数 |
业务经办已提交业务反馈,正交由一级评审员及一级评审加签人审核,尚未得出一级评审结论的项目数 |
业务方向 |
截至当日处于已提交业务反馈状态项目申请金额 |
略 |
业务方向 |
截至当日处于一级评审通过状态项目数 |
一级评审已通过,正交由二级评审员审核,尚未得出二级评审结论的项目数 |
业务方向 |
截至当日处于一级评审通过状态项目申请金额 |
略。 |
业务方向 |
截至当日处于二级评审通过状态项目数 |
二级评审已通过,正由项目评审会审核,尚未得出相应评审结论的项目数 |
业务方向 |
截至当日处于二级评审通过状态项目申请金额 |
略 |
业务方向 |
截至当日处于项目评审会审核通过状态项目数 |
项目评审会审核通过,正交由总经理/分管总审核,尚未得出相应评审结论的项目数 |
业务方向 |
截至当日处于项目评审会审核通过状态项目申请金额 |
略 |
业务方向 |
截至当日处于总经理/分管总审核通过状态项目数 |
总经理/分管总审核通过,批复文件尚未生成的项目数 |
业务方向 |
截至当日处于总经理/分管总审核通过状态项目申请金额 |
略 |
业务方向 |
截至当日处于已出具批复状态项目数 |
批复阶段审核通过,生成批复文件,包含了具体的批复金额、条款等,尚未新增授信的项目数 |
业务方向 |
截至当日处于已出具批复状态项目申请金额 |
略 |
业务方向 |
截至当日处于已出具批复状态项目批复金额 |
略 |
1)建表语句
DROP TABLE IF EXISTS ads_lease_org_unfinished_audit_stats; CREATE EXTERNAL TABLE IF NOT EXISTS ads_lease_org_unfinished_audit_stats( `dt` STRING COMMENT '统计日期', `lease_organization` STRING COMMENT '业务方向', `created_project_count` BIGINT COMMENT '新建状态项目数', `created_project_amount` DECIMAL(16,2) COMMENT '新建状态项目申请金额', `risk_control_not_approved_count` BIGINT COMMENT '未达风控状态项目数', `risk_control_not_approved_amount` DECIMAL(16,2) COMMENT '未达风控状态项目申请金额', `credit_audit_approved_count` BIGINT COMMENT '信审经办审核通过状态项目数', `credit_audit_approved_amount` DECIMAL(16,2) COMMENT '信审经办审核通过状态项目申请金额', `feedback_submitted_count` BIGINT COMMENT '已提交业务反馈状态项目数', `feedback_submitted_amount` DECIMAL(16,2) COMMENT '已提交业务反馈状态项目申请金额', `level1_review_approved_count` BIGINT COMMENT '一级评审通过状态项目数', `level1_review_approved_amount` DECIMAL(16,2) COMMENT '一级评审通过状态项目申请金额', `level2_review_approved_count` BIGINT COMMENT '二级评审通过状态项目数', `level2_review_approved_amount` DECIMAL(16,2) COMMENT '二级评审通过状态项目申请金额', `review_meeting_approved_count` BIGINT COMMENT '项目评审会审核通过状态项目数', `review_meeting_approved_amount` DECIMAL(16,2) COMMENT '项目评审会审核通过状态项目申请金额', `general_manager_approved_count` BIGINT COMMENT '总经理/分管总审核通过状态项目数', `general_manager_approved_amount` DECIMAL(16,2) COMMENT '总经理/分管总审核通过状态项目申请金额', `reply_issued_count` BIGINT COMMENT '出具批复状态项目数', `reply_issued_apply_amount` DECIMAL(16,2) COMMENT '出具批复状态项目申请金额', `reply_issued_reply_amount` DECIMAL(16,2) COMMENT '出具批复状态项目批复金额' ) COMMENT '各业务方向待审/在审项目统计' ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t' LOCATION '/warehouse/financial_lease/ads/ads_lease_org_unfinished_audit_stats' TBLPROPERTIES ('compression.codec' = 'org.apache.hadoop.io.compress.GzipCodec');
数据装载
insert overwrite table ads_lease_org_unfinished_audit_stats select dt, lease_organization, created_project_count, created_project_amount, risk_control_not_approved_count, risk_control_not_approved_amount, credit_audit_approved_count, credit_audit_approved_amount, feedback_submitted_count, feedback_submitted_amount, level1_review_approved_count, level1_review_approved_amount, level2_review_approved_count, level2_review_approved_amount, review_meeting_approved_count, review_meeting_approved_amount, general_manager_approved_count, general_manager_approved_amount, reply_issued_count, reply_issued_apply_amount, reply_issued_reply_amount from ads_lease_org_unfinished_audit_stats union select '2023-05-09' dt, lease_organization, sum(if(undistributed_time is null and risk_manage_refused_time is null, 1, 0)) created_project_count, sum(if(undistributed_time is null and risk_manage_refused_time is null, credit_amount, 0)) created_project_amount, sum(if(risk_manage_refused_time is not null and undistributed_time is null, 1, 0)) risk_control_not_approved_count, sum(if(risk_manage_refused_time is not null and undistributed_time is null, credit_amount, 0)) risk_control_not_approved_amount, sum(if(credit_audit_approving_time is not null and feed_back_time is null, 1, 0)) credit_audit_approved_count, sum(if(credit_audit_approving_time is not null and feed_back_time is null, credit_amount, 0)) credit_audit_approved_amount, sum(if(feed_back_time is not null and first_level_review_approving_time is null, 1, 0)) feedback_submitted_count, sum(if(feed_back_time is not null and first_level_review_approving_time is null, credit_amount, 0)) feedback_submitted_amount, sum(if(first_level_review_approving_time is not null and second_level_review_approving_time is null, 1, 0)) level1_review_approved_count, sum(if(first_level_review_approving_time is not null and second_level_review_approving_time is null, credit_amount, 0)) level1_review_approved_amount, sum(if(second_level_review_approving_time is not null and project_review_meeting_approving_time is null, 1, 0)) level2_review_approved_count, sum(if(second_level_review_approving_time is not null and project_review_meeting_approving_time is null, credit_amount, 0)) level2_review_approved_amount, sum(if(project_review_meeting_approving_time is not null and general_manager_review_approving_time is null, 1, 0)) review_meeting_approved_count, sum(if(project_review_meeting_approving_time is not null and general_manager_review_approving_time is null, credit_amount, 0)) review_meeting_approved_amount, sum(if(general_manager_review_approving_time is not null and reply_review_approving_time is null, 1, 0)) general_manager_approved_count, sum(if(general_manager_review_approving_time is not null and reply_review_approving_time is null, credit_amount, 0)) general_manager_approved_amount, sum(if(reply_review_approving_time is not null and credit_create_time is null, 1, 0)) reply_issued_count, sum(if(reply_review_approving_time is not null and credit_create_time is null, credit_amount, 0)) reply_issued_apply_amount, sum(if(reply_review_approving_time is not null and credit_create_time is null, credit_reply_amount, 0)) reply_issued_reply_amount from dwd_financial_lease_flow_acc where dt = '9999-12-31' group by lease_organization;
各部门统计
需求说明如下
统计粒度 |
指标 |
说明 |
部门 |
截至当日处于新建状态项目数 |
申请已发起,正交由风控员审核,未得出风控审核结果的项目数 |
部门 |
截至当日处于新建状态项目申请金额 |
略 |
部门 |
截至当日处于未达风控状态项目数 |
未通过风控员审核,正交由风控经理审核,未得出最终风控结论的项目数 |
部门 |
截至当日处于未达风控状态项目申请金额 |
略 |
部门 |
截至当日处于信审经办审核通过状态项目数 |
信审经办审核通过,业务经办尚未提交业务反馈的项目数 |
部门 |
截至当日处于信审经办审核通过状态项目申请金额 |
略 |
部门 |
截至当日处于已提交业务反馈状态项目数 |
业务经办已提交业务反馈,正交由一级评审员及一级评审加签人审核,尚未得出一级评审结论的项目数 |
部门 |
截至当日处于已提交业务反馈状态项目申请金额 |
略 |
部门 |
截至当日处于一级评审通过状态项目数 |
一级评审已通过,正交由二级评审员审核,尚未得出二级评审结论的项目数 |
部门 |
截至当日处于一级评审通过状态项目申请金额 |
略。 |
部门 |
截至当日处于二级评审通过状态项目数 |
二级评审已通过,正由项目评审会审核,尚未得出相应评审结论的项目数 |
部门 |
截至当日处于二级评审通过状态项目申请金额 |
略 |
部门 |
截至当日处于项目评审会审核通过状态项目数 |
项目评审会审核通过,正交由总经理/分管总审核,尚未得出相应评审结论的项目数 |
部门 |
截至当日处于项目评审会审核通过状态项目申请金额 |
略 |
部门 |
截至当日处于总经理/分管总审核通过状态项目数 |
总经理/分管总审核通过,批复文件尚未生成的项目数 |
部门 |
截至当日处于总经理/分管总审核通过状态项目申请金额 |
略 |
部门 |
截至当日处于已出具批复状态项目数 |
批复阶段审核通过,生成批复文件,包含了具体的批复金额、条款等,尚未新增授信的项目数 |
部门 |
截至当日处于已出具批复状态项目申请金额 |
略 |
部门 |
截至当日处于已出具批复状态项目批复金额 |
略 |
DROP TABLE IF EXISTS ads_department_unfinished_audit_stats; CREATE EXTERNAL TABLE IF NOT EXISTS ads_department_unfinished_audit_stats( `dt` STRING COMMENT '统计日期', `department3_id` STRING COMMENT '三级部门ID', `department3_name` STRING COMMENT '三级部门名称', `department2_id` STRING COMMENT '二级部门ID', `department2_name` STRING COMMENT '二级部门名称', `department1_id` STRING COMMENT '一级部门ID', `department1_name` STRING COMMENT '一级部门名称', `created_project_count` BIGINT COMMENT '新建状态项目数', `created_project_amount` DECIMAL(16,2) COMMENT '新建状态项目申请金额', `risk_control_not_approved_count` BIGINT COMMENT '未达风控状态项目数', `risk_control_not_approved_amount` DECIMAL(16,2) COMMENT '未达风控状态项目申请金额', `credit_audit_approved_count` BIGINT COMMENT '信审经办审核通过状态项目数', `credit_audit_approved_amount` DECIMAL(16,2) COMMENT '信审经办审核通过状态项目申请金额', `feedback_submitted_count` BIGINT COMMENT '已提交业务反馈状态项目数', `feedback_submitted_amount` DECIMAL(16,2) COMMENT '已提交业务反馈状态项目申请金额', `level1_review_approved_count` BIGINT COMMENT '一级评审通过状态项目数', `level1_review_approved_amount` DECIMAL(16,2) COMMENT '一级评审通过状态项目申请金额', `level2_review_approved_count` BIGINT COMMENT '二级评审通过状态项目数', `level2_review_approved_amount` DECIMAL(16,2) COMMENT '二级评审通过状态项目申请金额', `review_meeting_approved_count` BIGINT COMMENT '项目评审会审核通过状态项目数', `review_meeting_approved_amount` DECIMAL(16,2) COMMENT '项目评审会审核通过状态项目申请金额', `general_manager_approved_count` BIGINT COMMENT '总经理/分管总审核通过状态项目数', `general_manager_approved_amount` DECIMAL(16,2) COMMENT '总经理/分管总审核通过状态项目申请金额', `reply_issued_count` BIGINT COMMENT '出具批复状态项目数', `reply_issued_apply_amount` DECIMAL(16,2) COMMENT '出具批复状态项目申请金额', `reply_issued_reply_amount` DECIMAL(16,2) COMMENT '出具批复状态项目批复金额' ) COMMENT '各部门待审/在审项目统计' ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t' LOCATION '/warehouse/financial_lease/ads/ads_department_unfinished_audit_stats' TBLPROPERTIES ('compression.codec' = 'org.apache.hadoop.io.compress.GzipCodec'); 2)数据装载
insert overwrite table ads_department_unfinished_audit_stats select dt, department3_id, department3_name, department2_id, department2_name, department1_id, department1_name, created_project_count, created_project_amount, risk_control_not_approved_count, risk_control_not_approved_amount, credit_audit_approved_count, credit_audit_approved_amount, feedback_submitted_count, feedback_submitted_amount, level1_review_approved_count, level1_review_approved_amount, level2_review_approved_count, level2_review_approved_amount, review_meeting_approved_count, review_meeting_approved_amount, general_manager_approved_count, general_manager_approved_amount, reply_issued_count, reply_issued_apply_amount, reply_issued_reply_amount from ads_department_unfinished_audit_stats union select dt, agg.department3_id, department3_name, department2_id, department2_name, department1_id, department1_name, created_project_count, created_project_amount, risk_control_not_approved_count, risk_control_not_approved_amount, credit_audit_approved_count, credit_audit_approved_amount, feedback_submitted_count, feedback_submitted_amount, level1_review_approved_count, level1_review_approved_amount, level2_review_approved_count, level2_review_approved_amount, review_meeting_approved_count, review_meeting_approved_amount, general_manager_approved_count, general_manager_approved_amount, reply_issued_count, reply_issued_apply_amount, reply_issued_reply_amount from (select '2023-05-09' dt, department3_id, sum(if(undistributed_time is null and risk_manage_refused_time is null, 1, 0)) created_project_count, sum(if(undistributed_time is null and risk_manage_refused_time is null, credit_amount, 0)) created_project_amount, sum(if(risk_manage_refused_time is not null and undistributed_time is null, 1, 0)) risk_control_not_approved_count, sum(if(risk_manage_refused_time is not null and undistributed_time is null, credit_amount, 0)) risk_control_not_approved_amount, sum(if(credit_audit_approving_time is not null and feed_back_time is null, 1, 0)) credit_audit_approved_count, sum(if(credit_audit_approving_time is not null and feed_back_time is null, credit_amount, 0)) credit_audit_approved_amount, sum(if(feed_back_time is not null and first_level_review_approving_time is null, 1, 0)) feedback_submitted_count, sum(if(feed_back_time is not null and first_level_review_approving_time is null, credit_amount, 0)) feedback_submitted_amount, sum(if(first_level_review_approving_time is not null and second_level_review_approving_time is null, 1, 0)) level1_review_approved_count, sum(if(first_level_review_approving_time is not null and second_level_review_approving_time is null, credit_amount, 0)) level1_review_approved_amount, sum(if(second_level_review_approving_time is not null and project_review_meeting_approving_time is null, 1, 0)) level2_review_approved_count, sum(if(second_level_review_approving_time is not null and project_review_meeting_approving_time is null, credit_amount, 0)) level2_review_approved_amount, sum(if(project_review_meeting_approving_time is not null and general_manager_review_approving_time is null, 1, 0)) review_meeting_approved_count, sum(if(project_review_meeting_approving_time is not null and general_manager_review_approving_time is null, credit_amount, 0)) review_meeting_approved_amount, sum(if(general_manager_review_approving_time is not null and reply_review_approving_time is null, 1, 0)) general_manager_approved_count, sum(if(general_manager_review_approving_time is not null and reply_review_approving_time is null, credit_amount, 0)) general_manager_approved_amount, sum(if(reply_review_approving_time is not null and credit_create_time is null, 1, 0)) reply_issued_count, sum(if(reply_review_approving_time is not null and credit_create_time is null, credit_amount, 0)) reply_issued_apply_amount, sum(if(reply_review_approving_time is not null and credit_create_time is null, credit_reply_amount, 0)) reply_issued_reply_amount from (select * from dwd_financial_lease_flow_acc where dt = '9999-12-31') acc left join (select id, department3_id from dim_employee_full where dt = '2023-05-09') emp on acc.salesman_id = emp.id group by department3_id) agg left join (select department3_id, department3_name, department2_id, department2_name, department1_id, department1_name from dim_department_full where dt = '2023-05-09') department on agg.department3_id = department.department3_id; msck repair table dim_department_full;