版权声明:技术专栏,转载请注明! https://blog.csdn.net/wankunde/article/details/79522973
背景
TimeLineServer v1存在什么问题
在我们集群只保留3.5天的运行日志时,leveldb数据库大小就已经达到了93G。yarn 的任务在post entities 时,数据的插入会卡的厉害,进而导致集群各种不确定性问题,包括yarn application的状态机转换出错,yarn application 无法正常退出,集群经常出现Socket 连接失败,输入输出流的异常关闭,等等。集群上出现了因为无法写入TimeLineServer,而一直僵死的程序,时间长的有一两个月,高峰时,僵死的程序有上百个。
TimeLineServer v1 问题产生原因
来对比一下,
所以在Hadoop 2.9 中对TimeLineServer 进行了重构。相似地址:http://hadoop.apache.org/docs/r2.9.0/hadoop-yarn/hadoop-yarn-site/TimelineServiceV2.html
启动脚本
sudo su - yarn -c 'export HADOOP_LIBEXEC_DIR=/usr/lib/hadoop/libexec;/usr/lib/hadoop-yarn/sbin/yarn-daemon.sh stop timelineserver'
Create Es Index
/timelineserver PUT
{
"settings": {
"number_of_shards": 12,
"number_of_replicas": 0,
"refresh_interval": "30s"
},
"mappings": {
"entity": {
"dynamic_templates": [
{
"strings": {
"match_mapping_type": "string",
"mapping": {
"type": "string",
"index": "not_analyzed",
"ignore_above": 256,
"fields": {
"raw": {
"type": "string",
"index": "not_analyzed",
"ignore_above": 256
}
}
}
}
}
],
"_ttl": {
"enabled": true,
"default": "7d"
}
},
"domain": {
"dynamic_templates": [
{
"strings": {
"match_mapping_type": "string",
"mapping": {
"type": "string",
"index": "not_analyzed",
"ignore_above": 256,
"fields": {
"raw": {
"type": "string",
"index": "not_analyzed",
"ignore_above": 256
}
}
}
}
}
],
"_ttl": {
"enabled": true,
"default": "7d"
}
}
}
}
tcpdump 抓包
sudo tcpdump tcp port 8188 -n -s 0 -w /tmp/8188.cap
sudo tcpdump -i eth5 -w /tmp/8188.cap dst port 8188 and src xx.xx.xx.xx