版权声明:本文为博主原创文章,未经博主允许不得转载。 https://blog.csdn.net/qq_18808965/article/details/80017826
#!/bin/bash
######应用配置文件sasp_public.cfg连接mysql数据库及日志文件存放目录######
source /etc/profile
source /ODS/OPT/SHELL/ETL_CCS4/sasp_public.cfg
if [ -z "$1" ]; then
######日期变量取昨天的数据######
work_st_date=`date '+%Y-%m-%d' -d "-1 days"` #昨天取数时间 如: 2016-07-31
work_ed_date=`date '+%Y-%m-%d'` #当天时间 如: 2016-08-01
v_year=`date '+%Y'` #当前时间年份 如:2016
else
work_st_date=`date '+%Y-%m-%d' -d "$1"`
work_ed_date=`date '+%Y-%m-%d' -d " +1 day $1"`
v_year=`date '+%Y' -d "$1"` #当前时间年份 如:2016
fi;
######加载时间变量
##load_date=`date '+%Y-%m-%d %H:%M:%S'`
echo ${v_year}
echo "第一张表抽取开始sasp_income_target_area `date '+%Y-%m-%d %H:%M:%S'`"
source_sys='SASP' #源系统名称
version='ODS' #外部表文件存放根目录
schema='SASP' #源端表属主
targer_owner='ODS_CCS4' #hvie端表属主
##mysql端表列字段####
sqoop_src_columns="ID,HQ_CODE,AREA_CODE,MANAGER_CODE,MANAGER_NAME, CONCAT(IFNULL(ITEM_1, 0),',',IFNULL(ITEM_2, 0),',',IFNULL(ITEM_3, 0),',',IFNULL(ITEM_4, 0),',',IFNULL(ITEM_5, 0),',',IFNULL(ITEM_6, 0),',',IFNULL(ITEM_7, 0),',',IFNULL(ITEM_8, 0),',',IFNULL(ITEM_9, 0),',',IFNULL(ITEM_10, 0),',',IFNULL(ITEM_11,0),',',IFNULL(ITEM_12,0)) AS TARGET_INCOME,YEAR,CREATER,CREATE_DATE"
echo "#starting time: `date '+%Y-%m-%d %H:%M:%S'`"
#####orcle中的表:版本号(目录):数据库名(目录):hive中的表:表字段:线程数####
echo "SASP_INCOME_TARGET_AREA:ODS_CCS4:ODS_CCS4:SASP_INCOME_TARGET_AREA:ID:1" |
while read line;do
#####awk -F ":"以":"进行分割,赋值给src_table、sys_no、dest_db、dest_table、sqoop_split_by、sqoop_m 六个变量####
echo $line | { eval $(awk -F ":" '{print "src_table="$1"; sys_no="$2"; dest_db="$3"; dest_table="$4";sqoop_split_by="$5"; sqoop_m="$6}');
echo -e "#sqoop table: ${dest_table} of `date '+%Y-%m-%d %H:%M:%S'`"
###程序开始时间####
start_dt=`date '+%Y-%m-%d %H:%M:%S'`
###目标表在hdfs对应的表目录
targer_url="/${version}/${sys_no}/${dest_table}/${v_year}"
where_condition=" year = '${v_year}'"
echo "sqoop import start -D mapred.job.queue.name=udp 选择资源阵列"
sqoop import -D mapred.job.queue.name=udp --connect ${MyURL} --username ${MyUSER} --password ${MyPASS} --query "select ${sqoop_src_columns} from ${schema}.${src_table} WHERE \$CONDITIONS and ${where_condition} " --null-string '\\N' --null-non-string '\\N' --target-dir ${targer_url} --fields-terminated-by '\001' --hive-drop-import-delims --split-by ${sqoop_split_by} -m ${sqoop_m} --delete-target-dir
if [ $? -eq 0 ];then
echo -e ${log}>>${SUCCESS_FILE}
echo "导入成功的话,判断添加分区"
${etl_group} hive -e "use ${targer_owner};alter table ${dest_table} add if not exists partition (inc_year='${v_year}') location '${targer_url}';"
###在hive端查询昨天抽取数据过滤时间###
echo "在hive log_imp_to_prism表记录导入的结果"
hive -e "use ${targer_owner}; insert into table log_imp_to_prism select '${source_sys}', '${schema}', '${src_table}', '${targer_owner}','${dest_table}', count(1), '${targer_url}', '${start_dt}', '${end_dt}', unix_timestamp('${end_dt}', 'yyyy-MM-dd HH:mm:ss')-unix_timestamp('${start_dt}', 'yyyy-MM-dd HH:mm:ss'), '${dest_table}.sh', 'OK' , '${work_ed_date}' from ${dest_table} t1 where ${where_condition}"
else
echo "# export failure: `date '+%Y-%m-%d %H:%M:%S'`"
echo "在hive log_imp_to_prism表记录导入的结果"
hive -e "use ${targer_owner}; insert into table log_imp_to_prism select '${source_sys}', '${schema}', '${src_table}', '${targer_owner}','${dest_table}', count(1), '${targer_url}', '${start_dt}', '${end_dt}', unix_timestamp('${end_dt}', 'yyyy-MM-dd HH:mm:ss')-unix_timestamp('${start_dt}', 'yyyy-MM-dd HH:mm:ss'), '${dest_table}.sh', 'NO' , '${work_ed_date}' from ${dest_table} t1 where ${where_condition}"
exit 1; ##hue platform complains, and red when When the task has errors;
fi
}
done
echo -e "#end time: `date '+%Y-%m-%d %H:%M:%S'`"
echo "第一张表抽取完成sasp_income_target_area"