一、部署Prometheus
1.下载Prometheus安装包
wget https://github.com/prometheus/prometheus/releases/download/v2.27.1/prometheus-2.27.1.linux-amd64.tar.gz
2.解压至指定目录
tar -xvf prometheus-2.27.1.linux-amd64.tar.gz -C /usr/local/
mv prometheus-2.27.1.linux-amd64/ prometheus
3.创建Prometheus用户
useradd -M -s /sbin/nologin prometheus
mkdir -p /data/prometheus
chown -R prometheus:prometheus /usr/local/prometheus/ /data/prometheus/
4.创建system启动服务
[root@localhost ~]# vim /usr/lib/systemd/system/prometheus.service
[Unit]
Description=Prometheus
After=network.target
[Service]
Type=simple
Environment="GOMAXPROCS=4"
User=prometheus
Group=prometheus
ExecReload=/bin/kill -HUP $MAINPID
ExecStart=/usr/local/prometheus/prometheus
--config.file=/usr/local/prometheus/prometheus.yml
--storage.tsdb.path=/data/prometheus
--storage.tsdb.retention=30d
--web.console.libraries=/usr/local/prometheus/console_libraries
--web.console.templates=/usr/local/prometheus/consoles
--web.listen-address=0.0.0.0:9090
--web.read-timeout=5m
--web.max-connections=10
--query.max-concurrency=20
--query.timeout=2m
--web.enable-lifecycle
PrivateTmp=true
PrivateDevices=true
ProtectHome=true
NoNewPrivileges=true
LimitNOFILE=infinity
ReadWriteDirectories=/data/prometheus
ProtectSystem=full
SyslogIdentifier=prometheus
Restart=always
[Install]
WantedBy=multi-user.target
5.启动Prometheus
systemctl daemon-reload
systemctl start prometheus
systemctl status prometheus
6.查看端口
[root@localhost ~]# netstat -tnpl | grep prometheus tcp6 0 0 :::9090 :::* LISTEN 1745/prometheus
二、部署node_exporter (每台被监控机器需要部署)
1.下载node_exporter安装包
wget https://github.com/prometheus/node_exporter/releases/download/v1.0.1/node_exporter-1.0.1.linux-amd64.tar.gz
2.解压安装
tar -xvf node_exporter-1.0.1.linux-amd64.tar.gz -C /usr/local/ cd /usr/local/ mv node_exporter-1.0.1.linux-amd64/ node_exporter
3.创建Prometheus用户及目录属主
useradd -M -s /sbin/nologin prometheus
chown -R prometheus:prometheus /usr/local/node_exporter
4.编写system服务
vim /usr/lib/systemd/system/node_exporter.service
[Unit]
Description=node_exporter
After=network.target
[Service]
Type=simple
User=prometheus
Group=prometheus
ExecStart=/usr/local/node_exporter/node_exporter
--web.listen-address=0.0.0.0:9100
--web.telemetry-path=/metrics
--log.level=info
--log.format=logfmt
Restart=always
[Install]
WantedBy=multi-user.target
5.开启node_exporter服务
systemctl daemon-reload
systemctl start node_exporter
systemctl status node_exporter
6.查看端口
[root@localhost ~]# netstat -tnlp | grep node_exporter tcp6 0 0 :::9100 :::* LISTEN 1845/node_exporter
7.启动好node_exporter后,还需要配置prometheus才能访问node exporter指标
[root@localhost ~]# vim /usr/local/prometheus/prometheus.yml #修改 scrape_configs 内容
global:
scrape_interval: 15s
evaluation_interval: 15s
alerting:
alertmanagers:
- static_configs:
- targets:
rule_files:
scrape_configs:
- job_name: 'prometheus'
static_configs:
- targets: ['localhost:9090']
- job_name: 'node'
static_configs:
- targets: ['localhost:9100','192.168.0.169:9100','192.168.0.221:9100']
8.重启Prometheus
systemctl restart prometheus
三、AlertManager部署
1.下载alertmanager安装包
wget https://github.com/prometheus/alertmanager/releases/download/v0.21.0/alertmanager-0.21.0.linux-amd64.tar.gz
2.解压安装
tar xf alertmanager-0.21.0.linux-amd64.tar.gz mv alertmanager-0.21.0.linux-amd64 /usr/local/alertmanager
3.创建Prometheus用户及目录属主
useradd -M -s /sbin/nologin prometheus #若已创建,可省略该步 mkdir /usr/local/alertmanager/data chown -R prometheus:prometheus /usr/local/alertmanager
4.创建system服务
vim /usr/lib/systemd/system/alertmanager.service
[Unit] Description=Alertmanager After=network.target [Service] Type=simple User=prometheus Group=prometheus ExecStart=/usr/local/alertmanager/alertmanager --config.file=/usr/local/alertmanager/alertmanager.yml --storage.path=/usr/local/alertmanager/data --web.listen-address=0.0.0.0:9093 --cluster.listen-address=0.0.0.0:9094 --log.level=info --log.format=logfmt Restart=always [Install] WantedBy=multi-user.target
5.启动alertmanager
systemctl daemon-reload
systemctl start alertmanager
6.配置alertmanager
vim /usr/local/prometheus/prometheus.yml global: scrape_interval: 15s evaluation_interval: 15s alerting: alertmanagers: - static_configs: - targets: rule_files: scrape_configs: - job_name: 'prometheus' static_configs: - targets: ['localhost:9090'] - job_name: 'node' static_configs: - targets: ['localhost:9100','192.168.0.169:9100','192.168.0.221:9100'] - job_name: 'alertmanager' static_configs: - targets: ['192.168.0.169:9093']
7.重启Prometheus
[root@localhost ~]# systemctl restart prometheus