前天收到一个任务,要统计每天ETL的运行情况,感觉每天从给的SQL语句一个一个的查询影响效率。
就行用写一个存储过程来实现,然后通过shell调用这个存储过程来自动跑数据,写到相应的文件里~
思路就是这样,废话不说,上一下写好的代码。
DELIMITER // create procedure sp_etl_info_day (in statis_date int) begin declare num_day int default 0; declare num_month int default 0; declare num_hour int default 0; declare num_total int default 0; declare num_add_lastday int default 0; declare num_schedule_run int default 0; declare num_run int default 0; declare num_succeed int default 0; declare num_failed int default 0; declare num_failed1 int default 0; declare num_killed int default 0; declare num_waited int default 0; declare num_norun int default 0; declare num_man_run int default 0; declare num_norun_lastday int default 0; declare rate_wz_etl FLOAT default 0; declare rate_js_etl FLOAT default 0; select count(*) into num_day from schedule where isOnline=1 and scheduleOption like "%1#day%"; select count(*) into num_month from schedule where isOnline=1 and scheduleOption like "%1#month%"; select count(*) into num_hour from schedule where isOnline=1 and scheduleOption like "%1#hour%"; set num_total=num_day+num_month+num_hour; select count(distinct scheduleId) into num_schedule_run from schedule_execution where DATE_FORMAT(startTime,'%Y%m%d')=statis_date and scheduleId in ( select id from schedule where isOnline=1); select count(*) into num_run from (select count(*) as count, scheduleId,period from schedule_execution where DATE_FORMAT(startTime,'%Y%m%d')=statis_date and scheduleId in ( select id from schedule where isOnline=1) group by scheduleId) a; select count(*) into num_failed from (select count(*) as count, scheduleId from schedule_execution where DATE_FORMAT(startTime,'%Y%m%d')=statis_date and scheduleId in ( select id from schedule where isOnline=1) and status="Failed" group by scheduleId) a where a.scheduleId not in (select scheduleId from schedule_execution where DATE_FORMAT(startTime,'%Y%m%d')=statis_date and scheduleId in ( select id from schedule where isOnline=1) and status="Succeed" group by scheduleId); select count(*) into num_killed from (select count(*) as count, scheduleId from schedule_execution where DATE_FORMAT(startTime,'%Y%m%d')=statis_date and scheduleId in ( select id from schedule where isOnline=1) and status="Killed" group by scheduleId) a where a.scheduleId not in (select scheduleId from schedule_execution where DATE_FORMAT(startTime,'%Y%m%d')=statis_date and scheduleId in ( select id from schedule where isOnline=1) and status="Succeed" group by scheduleId); set num_succeed=num_run-num_failed-num_killed; set rate_wz_etl=round(num_succeed/num_run,4); set num_failed1=num_failed+num_killed; select CONCAT("日:",num_day,"个",",月:",num_month,"个",",小时:",num_hour,"个",",计划运行控制流",num_schedule_run,"个",",共",num_total,"个" ,',昨天共启动控制流:',num_run,'个',',正常执行成功',num_succeed,'个',',报错',num_failed1,'个',',昨天调度运行完整率:',rate_wz_etl) as "ETL日报"; select reserved2, flowName,source from flow where id in ( select originalFlowId from schedule where id in ( select distinct scheduleId from schedule_execution where DATE_FORMAT(startTime,'%Y%m%d')=statis_date and scheduleId in ( select id from schedule where isOnline=1) and status="Failed" and scheduleId not in( select scheduleId from schedule_execution where DATE_FORMAT(startTime,'%Y%m%d')=statis_date and scheduleId in ( select id from schedule where isOnline=1) and status="Succeed" ))); end ;// DELIMITER ;
以上是mysql存储过程~很简单的~
过程写好,下面就开始编写shell调用该存储过程了~
#!/bin/sh ## 用来统计ETL日运行情况 ## the script is executed at 10 o'clock by every day. DATE=$(date +%Y%m%d --date '1 days ago') USER=user PASSWD=****** DATABASE=user time=$(date +%Y%m%d --date '1 days ago') FILE=/home/mysql/etl_log/log/ETL_$time.txt echo "the date:"${DATE} SQL="mysql -u${USER} -p${PASSWD} -D ${DATABASE} -e \"call sp_etl_info_day(${DATE})\"" echo ${SQL} echo $SQL>>${FILE} mysql -u${USER} -p${PASSWD} -D ${DATABASE} -e "call sp_etl_info_day(${DATE})">>$FILE exit
然后设置crontab就可以了~
以后每天只要去对应的目录下查看文件就行啦~~
省去了不少时间~
我想这大概就是思路,在工作期间,尽量避免重复的工作量,把程序能做的让程序自动做~这样可以更高效的工作~