diff --git a/docs/Service Monitor and Error Alter Integration Guideline.md b/docs/Service Monitor and Error Alter Integration Guideline.md index 0d9369b9..a45c2353 100644 --- a/docs/Service Monitor and Error Alter Integration Guideline.md +++ b/docs/Service Monitor and Error Alter Integration Guideline.md @@ -1,32 +1,91 @@ -# Prometheus Alter Rule Config +# 1. Prometheus Alert Rule Configuration -Add `prometheusrule.yaml` to `/templates`. -see -``` +## 1.1. Add `prometheusrule.yaml` to `/templates`. + +Example: + +> Update metrics to your service name, see freeleaps-ops/freeleaps/helm-pkg/metrics +```yaml {{- /* Copyright Broadcom, Inc. All Rights Reserved. SPDX-License-Identifier: APACHE-2.0 */}} -{{- if and .Values.metrics.enabled .Values.metrics.prometheusRule.enabled }} +{{- if .Values.metrics.prometheusRule.enabled }} apiVersion: monitoring.coreos.com/v1 kind: PrometheusRule metadata: - name: {{ include "common.names.fullname" . }} - namespace: {{ default (include "common.names.namespace" .) .Values.metrics.prometheusRule.namespace | quote}} - labels: {{- include "common.labels.standard" ( dict "customLabels" .Values.commonLabels "context" $ ) | nindent 4 }} - {{- if .Values.metrics.prometheusRule.additionalLabels }} - {{- include "common.tplvalues.render" (dict "value" .Values.metrics.prometheusRule.additionalLabels "context" $) | nindent 4 }} - {{- end }} - {{- if .Values.commonAnnotations }} - annotations: {{- include "common.tplvalues.render" ( dict "value" .Values.commonAnnotations "context" $ ) | nindent 4 }} + name: {{ .Values.metrics.prometheusRule.name }} + namespace: {{ .Values.metrics.prometheusRule.namespace | quote }} + {{- with .Values.metrics.prometheusRule.labels }} + labels: + {{- toYaml . | nindent 4 }} {{- end }} spec: groups: {{- with .Values.metrics.prometheusRule.rules }} - - name: {{ template "common.names.name" $ }} - rules: {{- include "common.tplvalues.render" (dict "value" . "context" $) | nindent 8 }} + - name: {{ $.Values.metrics.prometheusRule.name }} + rules: + {{- range . }} + - alert: {{ .alert }} + expr: {{ .expr | quote }} + {{- if .for }} + for: {{ .for }} + {{- end }} + {{- if .labels }} + labels: + {{- toYaml .labels | nindent 12 }} + {{- end }} + {{- if .annotations }} + annotations: + {{- toYaml .annotations | nindent 12 }} + {{- end }} + {{- end }} {{- end }} {{- end }} +``` -``` \ No newline at end of file +## 1.2. Add prometheusrule configuration to values.{alpha/prod}.yaml +Example: + +> See freeleaps-ops/freeleaps/helm-pkg/metrics + +```yaml +prometheusRule: + name: freepeals-metrics + enabled: true + namespace: "freeleaps-monitoring-system" + labels: + release: kube-prometheus-stack + rules: + - alert: FreeleapsMetricsServiceDown + expr: up{job="metrics-service"} == 0 + for: 1m + labels: + severity: critical + service: metrics-service + annotations: + summary: "Freeleaps Metrics service is down (instance {{ $labels.instance }})" + description: "Freeleaps Metrics service has been down for more than 1 minutes." + runbook_url: "https://netorgft10898514.sharepoint.com/:w:/s/FreeleapsEngineeringTeam/EUlvzumTsPxCpPAzI3gm9OIB0DCLTjQzzYVL6VsHYZFjxg?e=0dxVr7" + + - alert: FreeleapsMetricsServiceHighErrorRate + expr: rate(http_requests_total{job="metrics-service",status=~"5.."}[5m]) > 0.1 + for: 5m + labels: + severity: warning + service: metrics-service + annotations: + summary: "High error rate in freeleaps metrics service (instance {{ $labels.instance }})" + description: "Freeleaps Metrics service error rate is {{ $value }} errors per second." + runbook_url: "https://netorgft10898514.sharepoint.com/:w:/s/FreeleapsEngineeringTeam/EUlvzumTsPxCpPAzI3gm9OIB0DCLTjQzzYVL6VsHYZFjxg?e=0dxVr7" +``` + +## 1.3. Verify Alert Rule Configuration is Effective + +> Redirect to local +![alt text](asserts/image4.png) + +> You can see the newly added rules indicating they are effective + +![alt text](asserts/image5.png) \ No newline at end of file diff --git a/docs/asserts/image4.png b/docs/asserts/image4.png new file mode 100644 index 00000000..c887606e Binary files /dev/null and b/docs/asserts/image4.png differ diff --git a/docs/asserts/image5.png b/docs/asserts/image5.png new file mode 100644 index 00000000..45cc0088 Binary files /dev/null and b/docs/asserts/image5.png differ