Alertmanager报警历史持久化
# 一.场景
Alertsnitch
是一个可以与 Alertmanager
配合使用的工具,用于记录警报历史和状态的变化。使用 alertsnitch
可以帮助你持久化Alertmanager
的警报数据,以便进行后续的审计或分析官方地址 (opens new window)。
# 二.部署,准备一个mysql或者pg
# 2.1 mysql建库
mysql> create database alert;
1
# 2.2 创建alert的表结构,sql (opens new window)
DROP PROCEDURE IF EXISTS bootstrap;
DELIMITER //
CREATE PROCEDURE bootstrap()
BEGIN
SET @exists := (SELECT 1 FROM information_schema.tables I WHERE I.table_name = "Model" AND I.table_schema = database());
IF @exists IS NULL THEN
CREATE TABLE `Model` (
`ID` enum('1') NOT NULL,
`version` VARCHAR(20) NOT NULL,
PRIMARY KEY (`ID`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8;
INSERT INTO `Model` (`version`) VALUES ("0.0.1");
ELSE
SIGNAL SQLSTATE '42000' SET MESSAGE_TEXT='Model Table Exists, quitting...';
END IF;
END;
//
DELIMITER ;
-- Execute the procedure
CALL bootstrap();
-- Drop the procedure
DROP PROCEDURE bootstrap;
-- Create the rest of the tables
CREATE TABLE `AlertGroup` (
`ID` INT NOT NULL AUTO_INCREMENT,
`time` TIMESTAMP NOT NULL,
`receiver` VARCHAR(100) NOT NULL,
`status` VARCHAR(50) NOT NULL,
`externalURL` TEXT NOT NULL,
`groupKey` VARCHAR(255) NOT NULL,
KEY `idx_time` (`time`) USING BTREE,
KEY `idx_status_ts` (`status`, `time`) USING BTREE,
PRIMARY KEY (`ID`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8;
CREATE TABLE `GroupLabel` (
`ID` INT NOT NULL AUTO_INCREMENT,
`AlertGroupID` INT NOT NULL,
`GroupLabel` VARCHAR(100) NOT NULL,
`Value` VARCHAR(1000) NOT NULL,
FOREIGN KEY (AlertGroupID) REFERENCES AlertGroup (ID) ON DELETE CASCADE,
PRIMARY KEY (`ID`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8;
CREATE TABLE `CommonLabel` (
`ID` INT NOT NULL AUTO_INCREMENT,
`AlertGroupID` INT NOT NULL,
`Label` VARCHAR(100) NOT NULL,
`Value` VARCHAR(1000) NOT NULL,
FOREIGN KEY (AlertGroupID) REFERENCES AlertGroup (ID) ON DELETE CASCADE,
PRIMARY KEY (`ID`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8;
CREATE TABLE `CommonAnnotation` (
`ID` INT NOT NULL AUTO_INCREMENT,
`AlertGroupID` INT NOT NULL,
`Annotation` VARCHAR(100) NOT NULL,
`Value` VARCHAR(1000) NOT NULL,
FOREIGN KEY (AlertGroupID) REFERENCES AlertGroup (ID) ON DELETE CASCADE,
PRIMARY KEY (`ID`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8;
CREATE TABLE `Alert` (
`ID` INT NOT NULL AUTO_INCREMENT,
`alertGroupID` INT NOT NULL,
`status` VARCHAR(50) NOT NULL,
`startsAt` DATETIME NOT NULL,
`endsAt` DATETIME DEFAULT NULL,
`generatorURL` TEXT NOT NULL,
FOREIGN KEY (alertGroupID) REFERENCES AlertGroup (ID) ON DELETE CASCADE,
PRIMARY KEY (`ID`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8;
CREATE TABLE `AlertLabel` (
`ID` INT NOT NULL AUTO_INCREMENT,
`AlertID` INT NOT NULL,
`Label` VARCHAR(100) NOT NULL,
`Value` VARCHAR(1000) NOT NULL,
FOREIGN KEY (AlertID) REFERENCES Alert (ID) ON DELETE CASCADE,
PRIMARY KEY (`ID`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8;
CREATE TABLE `AlertAnnotation` (
`ID` INT NOT NULL AUTO_INCREMENT,
`AlertID` INT NOT NULL,
`Annotation` VARCHAR(100) NOT NULL,
`Value` VARCHAR(1000) NOT NULL,
FOREIGN KEY (AlertID) REFERENCES Alert (ID) ON DELETE CASCADE,
PRIMARY KEY (`ID`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8;
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
# 2.3 修改Model源版本,sql (opens new window)
ALTER TABLE Alert
ADD `fingerprint` TEXT NOT NULL
;
UPDATE `Model` SET `version`="0.1.0";
1
2
3
4
2
3
4
# 2.4启动服务
[root@prod-manage alertmanager]# cat alertsnitch.yaml
apiVersion: apps/v1
kind: Deployment
metadata:
name: alertsnitch
namespace: kube-mon
spec:
replicas: 1
selector:
matchLabels:
app.kubernetes.io/name: alertsnitch
template:
metadata:
labels:
app.kubernetes.io/name: alertsnitch
spec:
containers:
- image: registry.cn-hangzhou.aliyuncs.com/s-ops/alertsnitch
name: alertsnitch
ports:
- containerPort: 9567
name: http
env:
- name: ALERTSNITCH_BACKEND
value: mysql
- name: ALERTSNITCH_DSN
value: "DB_USER:DB_PASSWORD@(DB_IP:DB_PORT)/alert" #注意这里要修改成实际的库名帐号信息等
readinessProbe:
httpGet:
path: /-/ready
port: 9567
initialDelaySeconds: 30
periodSeconds: 10
livenessProbe:
httpGet:
path: /-/health
port: 9567
initialDelaySeconds: 60
periodSeconds: 10
---
apiVersion: v1
kind: Service
metadata:
name: alertsnitch
namespace: kube-mon
spec:
ports:
- name: http
port: 9567
targetPort: http
selector:
app.kubernetes.io/name: alertsnitch
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
# 三.配置
# 3.1 配置alertmanager
[root@prod-manage alertmanager]# cat alertmanager-config.yml
apiVersion: v1
kind: ConfigMap
metadata:
name: alert-config
namespace: kube-mon
data:
config.yml: |-
global:
# 当 alertmanager 持续多长时间未接收到告警后标记告警状态为 resolved
resolve_timeout: 5m
http_config:
follow_redirects: true
# 所有报警信息进入后的根路由,用来设置报警的分发策略
route:
group_by: ['alertname','instance']
# 比如 alertname=A cluster=area1 分组发送一个邮件,alertname=A cluster=area2 又会发送一个邮件
group_wait: 30s # 当收到(某个分组的)告警的时候,等待 N 秒看(这个分组下)是否还有告警,如果有就一起发出去
group_interval: 120s
repeat_interval: 15m
receiver: 'default'
routes:
- receiver: 'default'
continue: true
- receiver: alertsnitch
continue: false
receivers:
- name: 'default'
webhook_configs:
- url: 'http://webhook-dingtalk/dingtalk/send/'
send_resolved: true
- name: alertsnitch
webhook_configs:
- send_resolved: true
http_config:
follow_redirects: true
url: http://alertsnitch:9567/webhook
max_alerts: 0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
# 3.2 grafana安装mysql插件
# 3.3 配置mysql数据源
# 3.4查看效果图
编辑 (opens new window)
上次更新: 2024/03/22, 09:14:33