# 基于MaxCompute InformationSchema进行血缘关系分析

2、根据表上下游依赖推测出血缘关系。

（1）根据作业ID查询某表上下游依赖SQL处理如下：

select
t2.input_table,
t1.inst_id,
replace(replace(t1.output_tables,"[",""),"]","") as output_table
left join
(
select
---去除表开始和结尾的[ ]
trans_array(1,",",inst_id,
replace(replace(input_tables,"[",""),"]","")) as (inst_id,input_table)
from information_schema.tasks_history  where ds = 20190902
)t2
on t1.inst_id = t2.inst_id
where (replace(replace(t1.output_tables,"[",""),"]","")) <> ""
order by t2.input_table limit 1000;

（2）根据结果可以分析得出每张表张表的输入表输出表以及连接的作业ID，即每张表的血缘关系。

（1）设计存储结果表Schema

CREATE TABLE IF NOT EXISTS dim_meta_tasks_history_a
(
stat_date         STRING COMMENT '统计日期',
project_name      STRING COMMENT '项目名称',
start_time        STRING COMMENT '开始时间',
end_time          STRING COMMENT '结束时间',
input_table       STRING COMMENT '输入表',
output_table      STRING COMMENT '输出表',
etl_date          STRING COMMENT 'ETL运行时间'
);

（2）关键解析sql

SELECT
'${yesterday}' AS stat_date ,'project_name' AS project_name ,a.inst_id AS task_id ,start_time AS start_time ,end_time AS end_time ,a.input_table AS input_table ,a.output_table AS output_table ,GETDATE() AS etl_date FROM ( SELECT t2.input_table ,t1.inst_id ,replace(replace(t1.input_tables,"[",""),"]","") AS output_table ,start_time ,end_time FROM ( SELECT * ,ROW_NUMBER() OVER(PARTITION BY output_tables ORDER BY end_time DESC) AS rows FROM information_schema.tasks_history WHERE operation_text LIKE 'INSERT OVERWRITE TABLE%' AND ( start_time >= TO_DATE('${yesterday}','yyyy-mm-dd')
and
end_time <= DATEADD(TO_DATE('${yesterday}','yyyy-mm-dd'),8,'hh') ) AND(replace(replace(output_tables,"[",""),"]",""))<>"" AND ds = CONCAT(SUBSTR('${yesterday}',1,4),SUBSTR('${yesterday}',6,2),SUBSTR('${yesterday}',9,2))
)t1
LEFT JOIN(
SELECT TRANS_ARRAY(1,",",inst_id,replace(replace(input_tables,"[",""),"]","")) AS (inst_id,input_table)
WHERE ds = CONCAT(SUBSTR('${yesterday}',1,4),SUBSTR('${yesterday}',6,2),SUBSTR('\${yesterday}',9,2))
)t2
ON t1.inst_id = t2.inst_id
where t1.rows = 1
) a
WHERE a.input_table is not null
;

（3）任务依赖关系

（4）最终血缘关系

https://h5.dingtalk.com/invite-page/index.html?bizSource=____source____&corpId=dingb682fb31ec15e09f35c2f4657eb6378f&inviterUid=E3F28CD2308408A8&encodeDeptId=0054DC2B53AFE745

+ 订阅