总结以往参与过的FTP服务器上,离线文本数据抽取的校验模板,涉及对文件的大小、条数、加密、列数等的校验规则;
#!/bin/bash
###################################################
#set enviorment
## author=yzg
export LANG=en_US
alias ls='ls --color=auto --time-style=local'
src_path="/data/XX/put_file"
log_file="/data/XX/log/ftp_d_"`date +%Y%m%d`
log_file_mon="/data/XX/log/ftp_m_"`date +%Y%m`
ftp_path="/data/XX/do_ftp"
#rowlen_path="/dataXX/cfg/rowlength.cfg"
allownull_path="/data/XX/cfg/allownull.cfg"
isnull_path="/data/XX/cfg/isnull.cfg"
#------------------------------------------------------------
if [ $# != 1 ]
then
echo `date +%Y%m%d:%T`" not one command(unit id)!"
exit 1
fi
para_len=`echo $1 | wc -c`
# 入参为:21001_20180728 、21002_201807
if [ ${para_len} -gt 14 ]
then
done_log_file=${log_file}"_done.log"
err_log_file=${log_file}"_err.log"
info_log_file=${log_file}"_info.log"
else
done_log_file=${log_file_mon}"_done.log"
err_log_file=${log_file_mon}"_err.log"
info_log_file=${log_file_mon}"_info.log"
fi
cd ${src_path}
if [ $? -eq 0 ];then
f=*$1*000.csv
#a_20008_21001_20180728_00_000.csv
echo $f
if [ -f $f ];then
for i in $f
do
file_num=0
row_num=0
#parse filename
n=`echo $i | awk '{print substr($0,1,length($0)-8)}'`
#n=s_20008_OLN_08001_20180729_00
#n为上传的版本号
echo $n
if [ -f ${done_log_file} ];then
grep $n ${done_log_file}
if [ $? = 0 ];then
echo `date +%Y%m%d:%T`" "$n" 重复上传!">>${err_log_file}
${sms_path} "$n 重复上传!"
exit 6
fi
fi
p=`echo $n".verf"`
#p=s_20008_OLN_08001_20180729_00.verf
if [ -f $p ];then
rm $n".verf"
fi
#s_20008_OLN_21001_20180729_00_000.csv
#单元ID 21001
v_unit=`echo $i | awk -F "_" '{print $(NF-3)}'`
#序号 00
v_seq=`echo $i | awk -F "_" '{print $(NF-1)}'`
v_z_type=`echo $i | cut -c 1`
#周期 20180729
t=`echo $i | awk -F _ '{print $(NF-2)}'`
j=$n*.csv
#j=s_20008_OLN_21001_20180729_00*.csv
for l in $j
#l=s_20008_OLN_21001_20180729_00_000.csv
do
y=`date +%Y`
s=`ls -l $l`
r=`echo $s | awk '{print $5}'`
#245(大小)
z=`echo $r | awk '{if($1>=2147483648)print 0;else print 1;}'`
#判断是否大于2G
if [ $z = 0 ];then
echo `date +%Y%m%d:%T`" "$l" 大小超过2G!">>${err_log_file}
${sms_path} "$l 大小超过2G!"
exit 2
fi
if [ $r = 0 ];then
echo `date +%Y%m%d:%T`" "$l" 文件大小为0!">>${err_log_file}
grep ${v_unit} ${allownull_path}
if [ $? != 0 ];then
${sms_path} "$l 文件大小为0!"
exit 7
fi
u=0
else
grep ${v_unit} ${isnull_path}
if [ $? = 0 ];then
echo `date +%Y%m%d:%T`" "$l" 文件大小为非空!">>${err_log_file}
${sms_path} "$l 文件大小为非空!"
exit 11
fi
fi
#文件条数
num=`cat $l |wc -l`
#文件的md5值
md=`md5sum $l|awk '{print $1}'`
#对文件进行压缩
gzip -f $l
#得到压缩后的文件大小
big=`ls -l $l.gz | awk '{print $5}'`
echo $l".gz" $big $num $t $md| awk '{printf"%-50s%-20s%-20s%-12s%-32s%\r\n",$1,$2,$3,$4,$5}' >> $n".verf"
done
for l in $j.gz
do
echo "hello"
#${ftp_path} $l
if [ $? != 0 ];then
exit 10
fi
done
echo "hello"
#${ftp_path} $n".verf"
if [ $? = 0 ];then
echo `date +%Y%m%d:%T`" "$n >>${done_log_file}
else
echo `date +%Y%m%d:%T`" "$n" 文件ftp中断!">>${err_log_file}
${sms_path} "$n 文件ftp中断!"
exit 8
fi
done
exit 0
else
echo `date +%Y%m%d:%T`" "$1" 文件未找到!">>${err_log_file}
${sms_path} "$1 文件未找到!"
exit 9
fi
else
echo "change file failed!"
echo "change file failed!">>${err_log_file}
fi