From cc08c4bbc9f884a451f321c986a5b6ab10bb689d Mon Sep 17 00:00:00 2001 From: Tanguy Herbron Date: Thu, 15 Sep 2022 00:17:21 +0200 Subject: [PATCH] feat(monitoring): Manifests for Grafana/Prometheus/Loki/Promtail Add manifests and basic configuration for all monitoring and visualization services --- helm/adguard/values.yaml | 0 helm/traefik/dashboard.yaml | 27 ---- helm/traefik/values.yaml | 54 ------- monitoring/grafana/clusterrole.yaml | 16 ++ monitoring/grafana/clusterrolebinding.yaml | 20 +++ monitoring/grafana/configmap.yaml | 26 ++++ monitoring/grafana/datasources.yaml | 29 ++++ monitoring/grafana/deployment.yaml | 131 ++++++++++++++++ monitoring/grafana/podsecuritypolicy.yaml | 51 +++++++ monitoring/grafana/role.yaml | 18 +++ monitoring/grafana/rolebinding.yaml | 21 +++ monitoring/grafana/service.yaml | 24 +++ monitoring/grafana/serviceaccount.yaml | 13 ++ monitoring/loki/role.yaml | 17 +++ monitoring/loki/rolebinding.yaml | 19 +++ monitoring/loki/service-headless.yaml | 23 +++ monitoring/loki/service-memberlist.yaml | 24 +++ monitoring/loki/service.yaml | 24 +++ monitoring/loki/serviceaccount.yaml | 15 ++ monitoring/loki/statefulset.yaml | 97 ++++++++++++ monitoring/namespace.yaml | 4 + monitoring/prometheus/clusterRole.yaml | 34 +++++ monitoring/prometheus/config-map.yaml | 159 ++++++++++++++++++++ monitoring/prometheus/deployment.yaml | 45 ++++++ monitoring/prometheus/service.yaml | 13 ++ monitoring/promtail/clusterrole.yaml | 25 +++ monitoring/promtail/clusterrolebinding.yaml | 20 +++ monitoring/promtail/daemonset.yaml | 93 ++++++++++++ monitoring/promtail/serviceaccount.yaml | 13 ++ 29 files changed, 974 insertions(+), 81 deletions(-) delete mode 100644 helm/adguard/values.yaml delete mode 100644 helm/traefik/dashboard.yaml delete mode 100644 helm/traefik/values.yaml create mode 100644 monitoring/grafana/clusterrole.yaml create mode 100644 monitoring/grafana/clusterrolebinding.yaml create mode 100644 monitoring/grafana/configmap.yaml create mode 100644 monitoring/grafana/datasources.yaml create mode 100644 monitoring/grafana/deployment.yaml create mode 100644 monitoring/grafana/podsecuritypolicy.yaml create mode 100644 monitoring/grafana/role.yaml create mode 100644 monitoring/grafana/rolebinding.yaml create mode 100644 monitoring/grafana/service.yaml create mode 100644 monitoring/grafana/serviceaccount.yaml create mode 100644 monitoring/loki/role.yaml create mode 100644 monitoring/loki/rolebinding.yaml create mode 100644 monitoring/loki/service-headless.yaml create mode 100644 monitoring/loki/service-memberlist.yaml create mode 100644 monitoring/loki/service.yaml create mode 100644 monitoring/loki/serviceaccount.yaml create mode 100644 monitoring/loki/statefulset.yaml create mode 100644 monitoring/namespace.yaml create mode 100644 monitoring/prometheus/clusterRole.yaml create mode 100644 monitoring/prometheus/config-map.yaml create mode 100644 monitoring/prometheus/deployment.yaml create mode 100644 monitoring/prometheus/service.yaml create mode 100644 monitoring/promtail/clusterrole.yaml create mode 100644 monitoring/promtail/clusterrolebinding.yaml create mode 100644 monitoring/promtail/daemonset.yaml create mode 100644 monitoring/promtail/serviceaccount.yaml diff --git a/helm/adguard/values.yaml b/helm/adguard/values.yaml deleted file mode 100644 index e69de29..0000000 diff --git a/helm/traefik/dashboard.yaml b/helm/traefik/dashboard.yaml deleted file mode 100644 index 34d8abb..0000000 --- a/helm/traefik/dashboard.yaml +++ /dev/null @@ -1,27 +0,0 @@ -apiVersion: traefik.containo.us/v1alpha1 -kind: Middleware -metadata: - name: internal-ipwhitelist -spec: - ipWhiteList: - sourceRange: - - 10.10.0.1/24 - - 10.20.0.1/24 - - 10.42.1.1/24 - ipStrategy: - depth: 0 - ---- -apiVersion: traefik.containo.us/v1alpha1 -kind: IngressRoute -metadata: - name: traefik-dashboard -spec: - entryPoints: - - websecure - routes: - - kind: Rule - match: Host(`traefik.k3s.beta`) - services: - - name: api@internal - kind: TraefikService diff --git a/helm/traefik/values.yaml b/helm/traefik/values.yaml deleted file mode 100644 index 5b9cbda..0000000 --- a/helm/traefik/values.yaml +++ /dev/null @@ -1,54 +0,0 @@ -ports: - admin: - port: 8080 - expose: true - exposePort: 8080 - protocol: TCP - minecrafttcp: - port: 25565 - expose: true - exposePort: 25565 - protocol: TCP - web: - redirectTo: websecure - -additionalArguments: - - --log.level=DEBUG - - --entrypoints.websecure.http.tls=true - - --entrypoints.websecure.http.tls.certresolver=letsencrypt - - --entrypoints.websecure.http.tls.domains[0].main=beta.halia.dev - - --entrypoints.websecure.http.tls.domains[0].sans=*.beta.halia.dev - - --certificatesresolvers.letsencrypt.acme.tlschallenge=true - - --certificatesresolvers.letsencrypt.acme.dnschallenge=true - - --certificatesresolvers.letsencrypt.acme.dnschallenge.provider=ovh - - --certificatesresolvers.letsencrypt.acme.dnschallenge.resolvers=1.1.1.1 - - --certificatesresolvers.letsencrypt.acme.email=tanguy.herbron@outlook.com - - --certificatesresolvers.letsencrypt.acme.storage=/certs/acme.json - -env: - - name: OVH_APPLICATION_KEY - valueFrom: - secretKeyRef: - key: appKey - name: ovh-api-credentials - - name: OVH_APPLICATION_SECRET - valueFrom: - secretKeyRef: - key: appSecret - name: ovh-api-credentials - - name: OVH_CONSUMER_KEY - valueFrom: - secretKeyRef: - key: consumerKey - name: ovh-api-credentials - - name: OVH_ENDPOINT - valueFrom: - secretKeyRef: - key: endpoint - name: ovh-api-credentials - -persistence: - enabled: true - path: /certs - size: 128Mi - storageClass: "local-path" diff --git a/monitoring/grafana/clusterrole.yaml b/monitoring/grafana/clusterrole.yaml new file mode 100644 index 0000000..856c655 --- /dev/null +++ b/monitoring/grafana/clusterrole.yaml @@ -0,0 +1,16 @@ +--- +# Source: loki-stack/charts/grafana/templates/clusterrole.yaml +kind: ClusterRole +apiVersion: rbac.authorization.k8s.io/v1 +metadata: + labels: + helm.sh/chart: grafana-6.24.1 + app.kubernetes.io/name: grafana + app.kubernetes.io/instance: loki + app.kubernetes.io/version: "8.3.5" + app.kubernetes.io/managed-by: Helm + name: loki-grafana-clusterrole +rules: +- apiGroups: [""] # "" indicates the core API group + resources: ["configmaps", "secrets"] + verbs: ["get", "watch", "list"] diff --git a/monitoring/grafana/clusterrolebinding.yaml b/monitoring/grafana/clusterrolebinding.yaml new file mode 100644 index 0000000..042f78a --- /dev/null +++ b/monitoring/grafana/clusterrolebinding.yaml @@ -0,0 +1,20 @@ +--- +# Source: loki-stack/charts/grafana/templates/clusterrolebinding.yaml +kind: ClusterRoleBinding +apiVersion: rbac.authorization.k8s.io/v1 +metadata: + name: loki-grafana-clusterrolebinding + labels: + helm.sh/chart: grafana-6.24.1 + app.kubernetes.io/name: grafana + app.kubernetes.io/instance: loki + app.kubernetes.io/version: "8.3.5" + app.kubernetes.io/managed-by: Helm +subjects: + - kind: ServiceAccount + name: loki-grafana + namespace: monitoring +roleRef: + kind: ClusterRole + name: loki-grafana-clusterrole + apiGroup: rbac.authorization.k8s.io diff --git a/monitoring/grafana/configmap.yaml b/monitoring/grafana/configmap.yaml new file mode 100644 index 0000000..694b4fc --- /dev/null +++ b/monitoring/grafana/configmap.yaml @@ -0,0 +1,26 @@ +--- +# Source: loki-stack/charts/grafana/templates/configmap.yaml +apiVersion: v1 +kind: ConfigMap +metadata: + name: loki-grafana + namespace: monitoring + labels: + helm.sh/chart: grafana-6.24.1 + app.kubernetes.io/name: grafana + app.kubernetes.io/instance: loki + app.kubernetes.io/version: "8.3.5" + app.kubernetes.io/managed-by: Helm +data: + grafana.ini: | + [analytics] + check_for_updates = true + [grafana_net] + url = https://grafana.net + [log] + mode = console + [paths] + data = /var/lib/grafana/ + logs = /var/log/grafana + plugins = /var/lib/grafana/plugins + provisioning = /etc/grafana/provisioning diff --git a/monitoring/grafana/datasources.yaml b/monitoring/grafana/datasources.yaml new file mode 100644 index 0000000..3a7be3d --- /dev/null +++ b/monitoring/grafana/datasources.yaml @@ -0,0 +1,29 @@ +--- +# Source: loki-stack/templates/datasources.yaml +apiVersion: v1 +kind: ConfigMap +metadata: + name: loki-loki-stack + namespace: monitoring + labels: + app: loki-stack + chart: loki-stack-2.8.2 + release: loki + heritage: Helm + grafana_datasource: "1" +data: + loki-stack-datasource.yaml: |- + apiVersion: 1 + datasources: + - name: Loki + type: loki + access: proxy + url: "http://loki:3100" + version: 1 + isDefault: true + - name: Prometheus + type: prometheus + access: proxy + url: "http://prometheus-svc:3100" + version: 1 + isDefault: false diff --git a/monitoring/grafana/deployment.yaml b/monitoring/grafana/deployment.yaml new file mode 100644 index 0000000..ff581c2 --- /dev/null +++ b/monitoring/grafana/deployment.yaml @@ -0,0 +1,131 @@ +--- +# Source: loki-stack/charts/grafana/templates/deployment.yaml +apiVersion: apps/v1 +kind: Deployment +metadata: + name: loki-grafana + namespace: monitoring + labels: + helm.sh/chart: grafana-6.24.1 + app.kubernetes.io/name: grafana + app.kubernetes.io/instance: loki + app.kubernetes.io/version: "8.3.5" + app.kubernetes.io/managed-by: Helm +spec: + replicas: 1 + revisionHistoryLimit: 10 + selector: + matchLabels: + app.kubernetes.io/name: grafana + app.kubernetes.io/instance: loki + strategy: + type: RollingUpdate + template: + metadata: + labels: + app.kubernetes.io/name: grafana + app.kubernetes.io/instance: loki + annotations: + checksum/config: ab83ab2703f4417b0cae9771e0b48e1607056d6adac4d9d92f9b1960779034f5 + checksum/dashboards-json-config: 01ba4719c80b6fe911b091a7c05124b64eeece964e09c058ef8f9805daca546b + checksum/sc-dashboard-provider-config: 01ba4719c80b6fe911b091a7c05124b64eeece964e09c058ef8f9805daca546b + checksum/secret: a8dec7c19ea590ef9d5a0075b8ed84bdf3a82ce47d9c86f5caada045396ab392 + spec: + + serviceAccountName: loki-grafana + automountServiceAccountToken: true + securityContext: + fsGroup: 472 + runAsGroup: 472 + runAsUser: 472 + enableServiceLinks: true + containers: + - name: grafana-sc-datasources + image: "quay.io/kiwigrid/k8s-sidecar:1.15.6" + imagePullPolicy: IfNotPresent + env: + - name: METHOD + value: WATCH + - name: LABEL + value: "grafana_datasource" + - name: FOLDER + value: "/etc/grafana/provisioning/datasources" + - name: RESOURCE + value: "both" + - name: REQ_USERNAME + valueFrom: + secretKeyRef: + name: loki-grafana + key: admin-user + - name: REQ_PASSWORD + valueFrom: + secretKeyRef: + name: loki-grafana + key: admin-password + - name: REQ_URL + value: http://localhost:3000/api/admin/provisioning/datasources/reload + - name: REQ_METHOD + value: POST + resources: + {} + volumeMounts: + - name: sc-datasources-volume + mountPath: "/etc/grafana/provisioning/datasources" + - name: grafana + image: "grafana/grafana:8.3.5" + imagePullPolicy: IfNotPresent + volumeMounts: + - name: config + mountPath: "/etc/grafana/grafana.ini" + subPath: grafana.ini + - name: storage + mountPath: "/var/lib/grafana" + - name: sc-datasources-volume + mountPath: "/etc/grafana/provisioning/datasources" + ports: + - name: service + containerPort: 80 + protocol: TCP + - name: grafana + containerPort: 3000 + protocol: TCP + env: + - name: GF_SECURITY_ADMIN_USER + valueFrom: + secretKeyRef: + name: loki-grafana + key: admin-user + - name: GF_SECURITY_ADMIN_PASSWORD + valueFrom: + secretKeyRef: + name: loki-grafana + key: admin-password + - name: GF_PATHS_DATA + value: /var/lib/grafana/ + - name: GF_PATHS_LOGS + value: /var/log/grafana + - name: GF_PATHS_PLUGINS + value: /var/lib/grafana/plugins + - name: GF_PATHS_PROVISIONING + value: /etc/grafana/provisioning + livenessProbe: + failureThreshold: 10 + httpGet: + path: /api/health + port: 3000 + initialDelaySeconds: 60 + timeoutSeconds: 30 + readinessProbe: + httpGet: + path: /api/health + port: 3000 + resources: + {} + volumes: + - name: config + configMap: + name: loki-grafana + - name: storage + emptyDir: {} + - name: sc-datasources-volume + emptyDir: {} diff --git a/monitoring/grafana/podsecuritypolicy.yaml b/monitoring/grafana/podsecuritypolicy.yaml new file mode 100644 index 0000000..fd443fd --- /dev/null +++ b/monitoring/grafana/podsecuritypolicy.yaml @@ -0,0 +1,51 @@ +--- +# Source: loki-stack/charts/grafana/templates/podsecuritypolicy.yaml +apiVersion: policy/v1beta1 +kind: PodSecurityPolicy +metadata: + name: loki-grafana + labels: + helm.sh/chart: grafana-6.24.1 + app.kubernetes.io/name: grafana + app.kubernetes.io/instance: loki + app.kubernetes.io/version: "8.3.5" + app.kubernetes.io/managed-by: Helm + annotations: + seccomp.security.alpha.kubernetes.io/allowedProfileNames: 'docker/default,runtime/default' + seccomp.security.alpha.kubernetes.io/defaultProfileName: 'docker/default' + apparmor.security.beta.kubernetes.io/allowedProfileNames: 'runtime/default' + apparmor.security.beta.kubernetes.io/defaultProfileName: 'runtime/default' +spec: + privileged: false + allowPrivilegeEscalation: false + requiredDropCapabilities: + # Default set from Docker, with DAC_OVERRIDE and CHOWN + - ALL + volumes: + - 'configMap' + - 'emptyDir' + - 'projected' + - 'csi' + - 'secret' + - 'downwardAPI' + - 'persistentVolumeClaim' + hostNetwork: false + hostIPC: false + hostPID: false + runAsUser: + rule: 'RunAsAny' + seLinux: + rule: 'RunAsAny' + supplementalGroups: + rule: 'MustRunAs' + ranges: + # Forbid adding the root group. + - min: 1 + max: 65535 + fsGroup: + rule: 'MustRunAs' + ranges: + # Forbid adding the root group. + - min: 1 + max: 65535 + readOnlyRootFilesystem: false diff --git a/monitoring/grafana/role.yaml b/monitoring/grafana/role.yaml new file mode 100644 index 0000000..8e073a5 --- /dev/null +++ b/monitoring/grafana/role.yaml @@ -0,0 +1,18 @@ +--- +# Source: loki-stack/charts/grafana/templates/role.yaml +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + name: loki-grafana + namespace: monitoring + labels: + helm.sh/chart: grafana-6.24.1 + app.kubernetes.io/name: grafana + app.kubernetes.io/instance: loki + app.kubernetes.io/version: "8.3.5" + app.kubernetes.io/managed-by: Helm +rules: +- apiGroups: ['extensions'] + resources: ['podsecuritypolicies'] + verbs: ['use'] + resourceNames: [loki-grafana] diff --git a/monitoring/grafana/rolebinding.yaml b/monitoring/grafana/rolebinding.yaml new file mode 100644 index 0000000..b78e956 --- /dev/null +++ b/monitoring/grafana/rolebinding.yaml @@ -0,0 +1,21 @@ +--- +# Source: loki-stack/charts/grafana/templates/rolebinding.yaml +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + name: loki-grafana + namespace: monitoring + labels: + helm.sh/chart: grafana-6.24.1 + app.kubernetes.io/name: grafana + app.kubernetes.io/instance: loki + app.kubernetes.io/version: "8.3.5" + app.kubernetes.io/managed-by: Helm +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: Role + name: loki-grafana +subjects: +- kind: ServiceAccount + name: loki-grafana + namespace: monitoring diff --git a/monitoring/grafana/service.yaml b/monitoring/grafana/service.yaml new file mode 100644 index 0000000..d878dac --- /dev/null +++ b/monitoring/grafana/service.yaml @@ -0,0 +1,24 @@ +--- +# Source: loki-stack/charts/grafana/templates/service.yaml +apiVersion: v1 +kind: Service +metadata: + name: loki-grafana + namespace: monitoring + labels: + helm.sh/chart: grafana-6.24.1 + app.kubernetes.io/name: grafana + app.kubernetes.io/instance: loki + app.kubernetes.io/version: "8.3.5" + app.kubernetes.io/managed-by: Helm +spec: + type: ClusterIP + ports: + - name: service + port: 80 + protocol: TCP + targetPort: 3000 + + selector: + app.kubernetes.io/name: grafana + app.kubernetes.io/instance: loki diff --git a/monitoring/grafana/serviceaccount.yaml b/monitoring/grafana/serviceaccount.yaml new file mode 100644 index 0000000..a523936 --- /dev/null +++ b/monitoring/grafana/serviceaccount.yaml @@ -0,0 +1,13 @@ +--- +# Source: loki-stack/charts/grafana/templates/serviceaccount.yaml +apiVersion: v1 +kind: ServiceAccount +metadata: + labels: + helm.sh/chart: grafana-6.24.1 + app.kubernetes.io/name: grafana + app.kubernetes.io/instance: loki + app.kubernetes.io/version: "8.3.5" + app.kubernetes.io/managed-by: Helm + name: loki-grafana + namespace: monitoring diff --git a/monitoring/loki/role.yaml b/monitoring/loki/role.yaml new file mode 100644 index 0000000..bfba027 --- /dev/null +++ b/monitoring/loki/role.yaml @@ -0,0 +1,17 @@ +--- +# Source: loki-stack/charts/loki/templates/role.yaml +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + name: loki + namespace: monitoring + labels: + app: loki + chart: loki-2.16.0 + release: loki + heritage: Helm +rules: +- apiGroups: ['extensions'] + resources: ['podsecuritypolicies'] + verbs: ['use'] + resourceNames: [loki] diff --git a/monitoring/loki/rolebinding.yaml b/monitoring/loki/rolebinding.yaml new file mode 100644 index 0000000..e06e352 --- /dev/null +++ b/monitoring/loki/rolebinding.yaml @@ -0,0 +1,19 @@ +--- +# Source: loki-stack/charts/loki/templates/rolebinding.yaml +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + name: loki + namespace: monitoring + labels: + app: loki + chart: loki-2.16.0 + release: loki + heritage: Helm +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: Role + name: loki +subjects: +- kind: ServiceAccount + name: loki diff --git a/monitoring/loki/service-headless.yaml b/monitoring/loki/service-headless.yaml new file mode 100644 index 0000000..9ed57f8 --- /dev/null +++ b/monitoring/loki/service-headless.yaml @@ -0,0 +1,23 @@ +--- +# Source: loki-stack/charts/loki/templates/service-headless.yaml +apiVersion: v1 +kind: Service +metadata: + name: loki-headless + namespace: monitoring + labels: + app: loki + chart: loki-2.16.0 + release: loki + heritage: Helm + variant: headless +spec: + clusterIP: None + ports: + - port: 3100 + protocol: TCP + name: http-metrics + targetPort: http-metrics + selector: + app: loki + release: loki diff --git a/monitoring/loki/service-memberlist.yaml b/monitoring/loki/service-memberlist.yaml new file mode 100644 index 0000000..b6ced42 --- /dev/null +++ b/monitoring/loki/service-memberlist.yaml @@ -0,0 +1,24 @@ +--- +# Source: loki-stack/charts/loki/templates/service-memberlist.yaml +apiVersion: v1 +kind: Service +metadata: + name: loki-memberlist + namespace: monitoring + labels: + app: loki + chart: loki-2.16.0 + release: loki + heritage: Helm +spec: + type: ClusterIP + clusterIP: None + publishNotReadyAddresses: true + ports: + - name: http + port: 7946 + targetPort: memberlist-port + protocol: TCP + selector: + app: loki + release: loki diff --git a/monitoring/loki/service.yaml b/monitoring/loki/service.yaml new file mode 100644 index 0000000..7a20d8c --- /dev/null +++ b/monitoring/loki/service.yaml @@ -0,0 +1,24 @@ +--- +# Source: loki-stack/charts/loki/templates/service.yaml +apiVersion: v1 +kind: Service +metadata: + name: loki + namespace: monitoring + labels: + app: loki + chart: loki-2.16.0 + release: loki + heritage: Helm + annotations: + {} +spec: + type: ClusterIP + ports: + - port: 3100 + protocol: TCP + name: http-metrics + targetPort: http-metrics + selector: + app: loki + release: loki diff --git a/monitoring/loki/serviceaccount.yaml b/monitoring/loki/serviceaccount.yaml new file mode 100644 index 0000000..8dd4098 --- /dev/null +++ b/monitoring/loki/serviceaccount.yaml @@ -0,0 +1,15 @@ +--- +# Source: loki-stack/charts/loki/templates/serviceaccount.yaml +apiVersion: v1 +kind: ServiceAccount +metadata: + labels: + app: loki + chart: loki-2.16.0 + release: loki + heritage: Helm + annotations: + {} + name: loki + namespace: monitoring +automountServiceAccountToken: true diff --git a/monitoring/loki/statefulset.yaml b/monitoring/loki/statefulset.yaml new file mode 100644 index 0000000..c53b163 --- /dev/null +++ b/monitoring/loki/statefulset.yaml @@ -0,0 +1,97 @@ +--- +# Source: loki-stack/charts/loki/templates/statefulset.yaml +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: loki + namespace: monitoring + labels: + app: loki + chart: loki-2.16.0 + release: loki + heritage: Helm + annotations: + {} +spec: + podManagementPolicy: OrderedReady + replicas: 1 + selector: + matchLabels: + app: loki + release: loki + serviceName: loki-headless + updateStrategy: + type: RollingUpdate + template: + metadata: + labels: + app: loki + name: loki + release: loki + annotations: + checksum/config: 70f817aa5a2dd5f771aca66233ce0b140c925212f36795fdeb95102ca96db046 + prometheus.io/port: http-metrics + prometheus.io/scrape: "true" + spec: + serviceAccountName: loki + securityContext: + fsGroup: 10001 + runAsGroup: 10001 + runAsNonRoot: true + runAsUser: 10001 + initContainers: + [] + containers: + - name: loki + image: "grafana/loki:2.6.1" + imagePullPolicy: IfNotPresent + args: + - "-config.file=/etc/loki/loki.yaml" + volumeMounts: + - name: tmp + mountPath: /tmp + - name: config + mountPath: /etc/loki + - name: storage + mountPath: "/data" + subPath: + ports: + - name: http-metrics + containerPort: 3100 + protocol: TCP + - name: grpc + containerPort: 9095 + protocol: TCP + - name: memberlist-port + containerPort: 7946 + protocol: TCP + livenessProbe: + httpGet: + path: /ready + port: http-metrics + initialDelaySeconds: 45 + readinessProbe: + httpGet: + path: /ready + port: http-metrics + initialDelaySeconds: 45 + resources: + {} + securityContext: + readOnlyRootFilesystem: true + env: + nodeSelector: + {} + affinity: + {} + tolerations: + [] + terminationGracePeriodSeconds: 4800 + volumes: + - name: tmp + emptyDir: {} + - name: config + secret: + secretName: loki + - name: storage + emptyDir: {} diff --git a/monitoring/namespace.yaml b/monitoring/namespace.yaml new file mode 100644 index 0000000..d325236 --- /dev/null +++ b/monitoring/namespace.yaml @@ -0,0 +1,4 @@ +apiVersion: v1 +kind: Namespace +metadata: + name: monitoring diff --git a/monitoring/prometheus/clusterRole.yaml b/monitoring/prometheus/clusterRole.yaml new file mode 100644 index 0000000..cb1bf47 --- /dev/null +++ b/monitoring/prometheus/clusterRole.yaml @@ -0,0 +1,34 @@ +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: prometheus +rules: + - apiGroups: [""] + resources: + - nodes + - nodes/proxy + - services + - endpoints + - pods + verbs: ["get", "list", "watch"] + - apiGroups: + - extensions + resources: + - ingresses + verbs: ["get", "list", "watch"] + - nonResourceURLs: ["/metrics"] + verbs: ["get"] + +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: prometheus +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: prometheus +subjects: +- kind: ServiceAccount + name: default + namespace: monitoring diff --git a/monitoring/prometheus/config-map.yaml b/monitoring/prometheus/config-map.yaml new file mode 100644 index 0000000..e009197 --- /dev/null +++ b/monitoring/prometheus/config-map.yaml @@ -0,0 +1,159 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: prometheus-server-conf + labels: + name: prometheus-server-conf + namespace: monitoring +data: + prometheus.rules: |- + groups: + - name: devopscube demo alert + rules: + - alert: High Pod Memory + expr: sum(container_memory_usage_bytes) > 1 + for: 1m + labels: + severity: slack + annotations: + summary: High Memory Usage + prometheus.yml: |- + global: + scrape_interval: 5s + evaluation_interval: 5s + rule_files: + - /etc/prometheus/prometheus.rules + alerting: + alertmanagers: + - scheme: http + static_configs: + - targets: + - "alertmanager.monitoring.svc:9093" + + scrape_configs: + - job_name: 'node-exporter' + kubernetes_sd_configs: + - role: endpoints + relabel_configs: + - source_labels: [__meta_kubernetes_endpoints_name] + regex: 'node-exporter' + action: keep + + - job_name: 'kubernetes-apiservers' + + kubernetes_sd_configs: + - role: endpoints + scheme: https + + tls_config: + ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt + bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token + + relabel_configs: + - source_labels: [__meta_kubernetes_namespace, __meta_kubernetes_service_name, __meta_kubernetes_endpoint_port_name] + action: keep + regex: default;kubernetes;https + + - job_name: 'kubernetes-nodes' + + scheme: https + + tls_config: + ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt + bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token + + kubernetes_sd_configs: + - role: node + + relabel_configs: + - action: labelmap + regex: __meta_kubernetes_node_label_(.+) + - target_label: __address__ + replacement: kubernetes.default.svc:443 + - source_labels: [__meta_kubernetes_node_name] + regex: (.+) + target_label: __metrics_path__ + replacement: /api/v1/nodes/${1}/proxy/metrics + + - job_name: 'kubernetes-pods' + + kubernetes_sd_configs: + - role: pod + + relabel_configs: + - source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_scrape] + action: keep + regex: true + - source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_path] + action: replace + target_label: __metrics_path__ + regex: (.+) + - source_labels: [__address__, __meta_kubernetes_pod_annotation_prometheus_io_port] + action: replace + regex: ([^:]+)(?::\d+)?;(\d+) + replacement: $1:$2 + target_label: __address__ + - action: labelmap + regex: __meta_kubernetes_pod_label_(.+) + - source_labels: [__meta_kubernetes_namespace] + action: replace + target_label: kubernetes_namespace + - source_labels: [__meta_kubernetes_pod_name] + action: replace + target_label: kubernetes_pod_name + + - job_name: 'kube-state-metrics' + static_configs: + - targets: ['kube-state-metrics.kube-system.svc.cluster.local:8080'] + + - job_name: 'kubernetes-cadvisor' + + scheme: https + + tls_config: + ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt + bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token + + kubernetes_sd_configs: + - role: node + + relabel_configs: + - action: labelmap + regex: __meta_kubernetes_node_label_(.+) + - target_label: __address__ + replacement: kubernetes.default.svc:443 + - source_labels: [__meta_kubernetes_node_name] + regex: (.+) + target_label: __metrics_path__ + replacement: /api/v1/nodes/${1}/proxy/metrics/cadvisor + + - job_name: 'kubernetes-service-endpoints' + + kubernetes_sd_configs: + - role: endpoints + + relabel_configs: + - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scrape] + action: keep + regex: true + - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scheme] + action: replace + target_label: __scheme__ + regex: (https?) + - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_path] + action: replace + target_label: __metrics_path__ + regex: (.+) + - source_labels: [__address__, __meta_kubernetes_service_annotation_prometheus_io_port] + action: replace + target_label: __address__ + regex: ([^:]+)(?::\d+)?;(\d+) + replacement: $1:$2 + - action: labelmap + regex: __meta_kubernetes_service_label_(.+) + - source_labels: [__meta_kubernetes_namespace] + action: replace + target_label: kubernetes_namespace + - source_labels: [__meta_kubernetes_service_name] + action: replace + target_label: kubernetes_name diff --git a/monitoring/prometheus/deployment.yaml b/monitoring/prometheus/deployment.yaml new file mode 100644 index 0000000..a6dfebd --- /dev/null +++ b/monitoring/prometheus/deployment.yaml @@ -0,0 +1,45 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: prometheus + namespace: monitoring + labels: + app: prometheus +spec: + replicas: 1 + selector: + matchLabels: + app: prometheus + template: + metadata: + labels: + app: prometheus + spec: + containers: + - name: prometheus + image: prom/prometheus + args: + - "--storage.tsdb.retention.time=12h" + - "--config.file=/etc/prometheus/prometheus.yml" + - "--storage.tsdb.path=/prometheus/" + ports: + - containerPort: 9090 + resources: + requests: + cpu: 500m + memory: 500M + limits: + cpu: 1 + memory: 1Gi + volumeMounts: + - name: prometheus-config-volume + mountPath: /etc/prometheus + - name: prometheus-storage-volume + mountPath: /prometheus/ + volumes: + - name: prometheus-config-volume + configMap: + defaultMode: 420 + name: prometheus-server-conf + - name: prometheus-storage-volume + emptyDir: {} diff --git a/monitoring/prometheus/service.yaml b/monitoring/prometheus/service.yaml new file mode 100644 index 0000000..e4923d0 --- /dev/null +++ b/monitoring/prometheus/service.yaml @@ -0,0 +1,13 @@ +apiVersion: v1 +kind: Service +metadata: + name: prometheus-svc + namespace: monitoring +spec: + ports: + - name: http + port: 9090 + protocol: TCP + targetPort: 9090 + selector: + app: prometheus diff --git a/monitoring/promtail/clusterrole.yaml b/monitoring/promtail/clusterrole.yaml new file mode 100644 index 0000000..818422b --- /dev/null +++ b/monitoring/promtail/clusterrole.yaml @@ -0,0 +1,25 @@ +--- +# Source: loki-stack/charts/promtail/templates/clusterrole.yaml +kind: ClusterRole +apiVersion: rbac.authorization.k8s.io/v1 +metadata: + name: loki-promtail + labels: + helm.sh/chart: promtail-6.3.0 + app.kubernetes.io/name: promtail + app.kubernetes.io/instance: loki + app.kubernetes.io/version: "2.6.1" + app.kubernetes.io/managed-by: Helm +rules: + - apiGroups: + - "" + resources: + - nodes + - nodes/proxy + - services + - endpoints + - pods + verbs: + - get + - watch + - list diff --git a/monitoring/promtail/clusterrolebinding.yaml b/monitoring/promtail/clusterrolebinding.yaml new file mode 100644 index 0000000..7e7a358 --- /dev/null +++ b/monitoring/promtail/clusterrolebinding.yaml @@ -0,0 +1,20 @@ +--- +# Source: loki-stack/charts/promtail/templates/clusterrolebinding.yaml +kind: ClusterRoleBinding +apiVersion: rbac.authorization.k8s.io/v1 +metadata: + name: loki-promtail + labels: + helm.sh/chart: promtail-6.3.0 + app.kubernetes.io/name: promtail + app.kubernetes.io/instance: loki + app.kubernetes.io/version: "2.6.1" + app.kubernetes.io/managed-by: Helm +subjects: + - kind: ServiceAccount + name: loki-promtail + namespace: monitoring +roleRef: + kind: ClusterRole + name: loki-promtail + apiGroup: rbac.authorization.k8s.io diff --git a/monitoring/promtail/daemonset.yaml b/monitoring/promtail/daemonset.yaml new file mode 100644 index 0000000..8583d39 --- /dev/null +++ b/monitoring/promtail/daemonset.yaml @@ -0,0 +1,93 @@ +--- +# Source: loki-stack/charts/promtail/templates/daemonset.yaml +apiVersion: apps/v1 +kind: DaemonSet +metadata: + name: loki-promtail + namespace: monitoring + labels: + helm.sh/chart: promtail-6.3.0 + app.kubernetes.io/name: promtail + app.kubernetes.io/instance: loki + app.kubernetes.io/version: "2.6.1" + app.kubernetes.io/managed-by: Helm +spec: + selector: + matchLabels: + app.kubernetes.io/name: promtail + app.kubernetes.io/instance: loki + updateStrategy: + {} + template: + metadata: + labels: + app.kubernetes.io/name: promtail + app.kubernetes.io/instance: loki + annotations: + checksum/config: 807310f261dd2585fdcb196f53c15ad3295af56ceac4869de7beaa331ecc9a3c + spec: + serviceAccountName: loki-promtail + securityContext: + runAsGroup: 0 + runAsUser: 0 + containers: + - name: promtail + image: "docker.io/grafana/promtail:2.6.1" + imagePullPolicy: IfNotPresent + args: + - "-config.file=/etc/promtail/promtail.yaml" + volumeMounts: + - name: config + mountPath: /etc/promtail + - mountPath: /run/promtail + name: run + - mountPath: /var/lib/docker/containers + name: containers + readOnly: true + - mountPath: /var/log/pods + name: pods + readOnly: true + env: + - name: HOSTNAME + valueFrom: + fieldRef: + fieldPath: spec.nodeName + ports: + - name: http-metrics + containerPort: 3101 + protocol: TCP + securityContext: + allowPrivilegeEscalation: false + capabilities: + drop: + - ALL + readOnlyRootFilesystem: true + readinessProbe: + failureThreshold: 5 + httpGet: + path: /ready + port: http-metrics + initialDelaySeconds: 10 + periodSeconds: 10 + successThreshold: 1 + timeoutSeconds: 1 + tolerations: + - effect: NoSchedule + key: node-role.kubernetes.io/master + operator: Exists + - effect: NoSchedule + key: node-role.kubernetes.io/control-plane + operator: Exists + volumes: + - name: config + secret: + secretName: loki-promtail + - hostPath: + path: /run/promtail + name: run + - hostPath: + path: /var/lib/docker/containers + name: containers + - hostPath: + path: /var/log/pods + name: pods diff --git a/monitoring/promtail/serviceaccount.yaml b/monitoring/promtail/serviceaccount.yaml new file mode 100644 index 0000000..1cec5ac --- /dev/null +++ b/monitoring/promtail/serviceaccount.yaml @@ -0,0 +1,13 @@ +--- +# Source: loki-stack/charts/promtail/templates/serviceaccount.yaml +apiVersion: v1 +kind: ServiceAccount +metadata: + name: loki-promtail + namespace: monitoring + labels: + helm.sh/chart: promtail-6.3.0 + app.kubernetes.io/name: promtail + app.kubernetes.io/instance: loki + app.kubernetes.io/version: "2.6.1" + app.kubernetes.io/managed-by: Helm