1.安装prometheus
1.官方下载地址
wget https://github.com/prometheus/prometheus/releases/download/v2.25.0/prometheus-2.25.0.linux-arm64.tar.gz
上传压缩包
[root@docker03 opt]# rz
[root@docker03 opt]# ls
prometheus-2.23.0.linux-amd64.tar.gz
解压
[root@docker03 opt]# tar prometheus-2.23.0.linux-amd64.tar.gz
[root@docker03 opt]# mv prometheus-2.23.0.linux-amd64 prometheus
启动
[root@docker03 opt] cd prometheus
[root@docker03 prometheus]# ./prometheus --config.file="prometheus.yml" &
web页面
http://10.0.0.13:9090/
#客户端docker01 docker02节点
上传压缩包 docker_monitor_node.tar.gz
导入到docker中 docker load -i docker_monitor_node.tar.gz
#启动node-exporter ( node-exporter监控宿主机)
docker run -d -p 9100:9100 -v "/:/host:ro,rslave" --name=node_exporter quay.io/prometheus/node-exporter --path.rootfs /host
#启动cadvisor (cadvisor监控容器)
docker run --volume=/:/rootfs:ro --volume=/var/run:/var/run:rw --volume=/sys:/sys:ro --volume=/var/lib/docker/:/var/lib/docker:ro -p 8080:8080 -d --name=cadvisor google/cadvisor:latest
#prometheus节点
静态发现的配置
vim prometheus.yml
scrape_configs:
# The job name is added as a label `job=<job_name>` to any timeseries scraped from this config.
- job_name: 'prometheus'
# metrics_path defaults to '/metrics'
# scheme defaults to 'http'.
static_configs:
- targets: ['localhost:9090']
动态发现的配置
vim prometheus.yml
scrape_configs:
# The job name is added as a label `job=<job_name>` to any timeseries scraped from this config.
- job_name: 'prometheus'
static_configs:
- targets: ['localhost:9090']
- job_name: 'cadvisor'
file_sd_configs:
- files:
- /opt/prometheus/discovery/discovery_cadvisor.yml
refresh_interval: 10s
- job_name: 'node-exporter'
file_sd_configs:
- files:
- /opt/prometheus/discovery/discovery_exporter.yml
refresh_interval: 10s
或者是
scrape_configs:
- job_name: 'prometheus'
static_configs:
- targets: ['localhost:9090']
- job_name: 'cadvisor'
static_configs:
- targets: ['10.0.0.11:8080','10.0.0.12:8080']
- job_name: 'node_exporter'
static_configs:
- targets: ['10.0.0.11:9100','10.0.0.12:9100']
修改完重启prometheus
kill 7639
./prometheus --config.file=“prometheus.yml”
2.Prometheus邮件报警
安装altermanager
上传压缩包
[root@docker03 opt]# rz
[root@docker03 opt]# ls
alertmanager-0.21.0.linux-amd64.tar.gz
解压
[root@docker03 opt]# tar xf alertmanager-0.21.0.linux-amd64.tar.gz
[root@docker03 opt]# mv alertmanager-0.21.0.linux-amd64.tar.gz alertmanager
vim alertmanager.yml
global:
resolve_timeout: 5m
smtp_from: '[email protected]'
smtp_smarthost: 'smtp.qq.com:465'
smtp_auth_username: '[email protected]'
smtp_auth_password: 'xxxxxxxxxxxxxxx' 授权码
smtp_require_tls: false
smtp_hello: 'qq.com'
route:
group_by: ['alertname']
group_wait: 5s
group_interval: 5s
repeat_interval: 5m
receiver: 'email'
receivers:
- name: 'email'
email_configs:
- to: '[email protected]'
send_resolved: true
inhibit_rules:
- source_match:
severity: 'critical'
target_match:
severity: 'warning'
equal: ['alertname', 'dev', 'instance']
启动
[root@docker03 opt] cd alertmanager
[root@docker03 prometheus]# ./alertmanager --config.file="alertmanager.yml" &
prometheus报警规则
[root@docker03 prometheus]# pwd
/opt/prometheus
在/opt/prometheus下添加node-up.rules
vim node-up.rules
groups:
- name: node-up
rules:
- alert: node-up
expr: up{job="node-exporter"} == 0
for: 15s
labels:
severity: 1
team: node
annotations:
summary: "{
{ $labels.instance }} 已停止运行超过 15s!"
修改prometheus.yml配置文件
# Alertmanager configuration
alerting:
alertmanagers:
- static_configs:
- targets:
- 10.0.0.13:9093
rule_files:
- "node-up.rules"
修改完重启prometheus
kill 7639
./prometheus --config.file="prometheus.yml"
3.#安装grafana
在prometheus节点
yum localinstall grafana-6.3.3-1.x86_64.rpm -y
systemctl start grafana-server.service
systemctl enable grafana-server.service
#访问grafana http://IP:3000,默认账号admin:admin
新建数据源–导入dashboard模板
在https://grafana.com/grafana/dashboards上查找数据源