alertmanager.yaml 4.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103
  1. # global块配置下的配置选项在本配置文件内的所有配置项下可见
  2. global:
  3. # 在Alertmanager内管理的每一条告警均有两种状态: "resolved"或者"firing". 在altermanager首次发送告警通知后, 该告警会一直处于firing状态,设置resolve_timeout可以指定处于firing状态的告警间隔多长时间会被设置为resolved状态, 在设置为resolved状态的告警后,altermanager不会再发送firing的告警通知.
  4. resolve_timeout: 1h
  5. # 邮件告警配置
  6. smtp_smarthost: 'smtp.exmail.qq.com:25'
  7. smtp_from: 'dukuan@xxx.com'
  8. smtp_auth_username: 'dukuan@xxx.com'
  9. smtp_auth_password: 'DKxxx'
  10. # HipChat告警配置
  11. # hipchat_auth_token: '123456789'
  12. # hipchat_auth_url: 'https://hipchat.foobar.org/'
  13. # wechat
  14. wechat_api_url: 'https://qyapi.weixin.qq.com/cgi-bin/'
  15. wechat_api_secret: 'JJ'
  16. wechat_api_corp_id: 'ww'
  17. # 告警通知模板
  18. templates:
  19. - '/etc/alertmanager/config/*.tmpl'
  20. # route: 根路由,该模块用于该根路由下的节点及子路由routes的定义. 子树节点如果不对相关配置进行配置,则默认会从父路由树继承该配置选项。每一条告警都要进入route,即要求配置选项group_by的值能够匹配到每一条告警的至少一个labelkey(即通过POST请求向altermanager服务接口所发送告警的labels项所携带的<labelname>),告警进入到route后,将会根据子路由routes节点中的配置项match_re或者match来确定能进入该子路由节点的告警(由在match_re或者match下配置的labelkey: labelvalue是否为告警labels的子集决定,是的话则会进入该子路由节点,否则不能接收进入该子路由节点).
  21. route:
  22. # 例如所有labelkey:labelvalue含cluster=A及altertname=LatencyHigh labelkey的告警都会被归入单一组中
  23. group_by: ['job', 'altername', 'cluster', 'service','severity']
  24. # 若一组新的告警产生,则会等group_wait后再发送通知,该功能主要用于当告警在很短时间内接连产生时,在group_wait内合并为单一的告警后再发送
  25. group_wait: 30s
  26. # 再次告警时间间隔
  27. group_interval: 5m
  28. # 如果一条告警通知已成功发送,且在间隔repeat_interval后,该告警仍然未被设置为resolved,则会再次发送该告警通知
  29. repeat_interval: 12h
  30. # 默认告警通知接收者,凡未被匹配进入各子路由节点的告警均被发送到此接收者
  31. receiver: 'wechat'
  32. # 上述route的配置会被传递给子路由节点,子路由节点进行重新配置才会被覆盖
  33. # 子路由树
  34. routes:
  35. # 该配置选项使用正则表达式来匹配告警的labels,以确定能否进入该子路由树
  36. # match_re和match均用于匹配labelkey为service,labelvalue分别为指定值的告警,被匹配到的告警会将通知发送到对应的receiver
  37. - match_re:
  38. service: ^(foo1|foo2|baz)$
  39. receiver: 'wechat'
  40. # 在带有service标签的告警同时有severity标签时,他可以有自己的子路由,同时具有severity != critical的告警则被发送给接收者team-ops-mails,对severity == critical的告警则被发送到对应的接收者即team-ops-pager
  41. routes:
  42. - match:
  43. severity: critical
  44. receiver: 'wechat'
  45. # 比如关于数据库服务的告警,如果子路由没有匹配到相应的owner标签,则都默认由team-DB-pager接收
  46. - match:
  47. service: database
  48. receiver: 'wechat'
  49. # 我们也可以先根据标签service:database将数据库服务告警过滤出来,然后进一步将所有同时带labelkey为database
  50. - match:
  51. severity: critical
  52. receiver: 'wechat'
  53. # 抑制规则,当出现critical告警时 忽略warning
  54. inhibit_rules:
  55. - source_match:
  56. severity: 'critical'
  57. target_match:
  58. severity: 'warning'
  59. # Apply inhibition if the alertname is the same.
  60. # equal: ['alertname', 'cluster', 'service']
  61. #
  62. # 收件人配置
  63. receivers:
  64. - name: 'team-ops-mails'
  65. email_configs:
  66. - to: 'dukuan@xxx.com'
  67. - name: 'wechat'
  68. wechat_configs:
  69. - send_resolved: true
  70. corp_id: 'ww'
  71. api_secret: 'JJ'
  72. to_tag: '1'
  73. agent_id: '1000002'
  74. api_url: 'https://qyapi.weixin.qq.com/cgi-bin/'
  75. message: '{{ template "wechat.default.message" . }}'
  76. #- name: 'team-X-pager'
  77. # email_configs:
  78. # - to: 'team-X+alerts-critical@example.org'
  79. # pagerduty_configs:
  80. # - service_key: <team-X-key>
  81. #
  82. #- name: 'team-Y-mails'
  83. # email_configs:
  84. # - to: 'team-Y+alerts@example.org'
  85. #
  86. #- name: 'team-Y-pager'
  87. # pagerduty_configs:
  88. # - service_key: <team-Y-key>
  89. #
  90. #- name: 'team-DB-pager'
  91. # pagerduty_configs:
  92. # - service_key: <team-DB-key>
  93. #
  94. #- name: 'team-X-hipchat'
  95. # hipchat_configs:
  96. # - auth_token: <auth_token>
  97. # room_id: 85
  98. # message_format: html
  99. # notify: true