最近在用spark跑任务,发现有些任务实际已经执行完成,但是就是占用资源,结束不了,用shell写了个yarn上application耗时监控脚本,超时自动杀掉。
#!/bin/bash
#current_time=`date +'%Y-%m-%d %H:%M:%S'`
current_time_temp=`date +'%s%N'`
current_time=$[$current_time_temp/1000000]
outfile='./moniter_app_on_yarn.log'
echo "==current time:"$current_time >> $outfile
app_list=`yarn application -list | grep app_test.py`
for app in ${app_list[@]}
do
# the next sent is check the rela contain
if [[ $app =~ "application_153" ]];then
echo "====active application: "${app} >> $outfile
app_status=`yarn application -status ${app}`
#echo "========out========"${app_status}
# regex string
start_time=$( expr "$app_status" : '.*\([0-9]\{13\}\).*' )
# other method
#if [[ $app_status=~ Start-Time\ :\ ([0-9]*)\ Finish-Time ]] ; then echo ${BASH_REMATCH[1]}; echo ${BASH_REMATCH[2]}; fi
echo "======application start time: "$start_time >> $outfile
((diff_time=$current_time-$start_time))
threshold=600000 #10min -> ms
echo "======application have run time: "$diff_time >> $outfile
if [ $diff_time -gt $threshold ];then
result=`yarn application -kill ${app}`
echo '============yarn application -kill '$app>> $outfile
break
fi
fi
done