commit 57712751f160fe188703b4f58d6a66f0af6d54ac Author: Nikolai Rodionov Date: Fri Feb 21 09:42:13 2025 +0100 chore: mirror k8s-monitoring-2.0.12 upstream_repo: diff --git a/charts/k8s-monitoring/.helmignore b/charts/k8s-monitoring/.helmignore new file mode 100644 index 0000000..d93605e --- /dev/null +++ b/charts/k8s-monitoring/.helmignore @@ -0,0 +1,8 @@ +.ct.yaml +data-alloy +docs +schema-mods +tests +Makefile +README.md +README.md.gotmpl diff --git a/charts/k8s-monitoring/.updatecli-alloy.yaml b/charts/k8s-monitoring/.updatecli-alloy.yaml new file mode 100644 index 0000000..13d2873 --- /dev/null +++ b/charts/k8s-monitoring/.updatecli-alloy.yaml @@ -0,0 +1,67 @@ +--- +name: Update dependency "alloy" for Helm chart "k8s-monitoring" +sources: + alloy: + name: Get latest "alloy" Helm chart version + kind: helmchart + spec: + name: alloy + url: https://grafana.github.io/helm-charts + versionfilter: + kind: semver + pattern: '*' +conditions: + alloy: + name: Ensure Helm chart dependency "alloy" is specified + kind: yaml + spec: + file: charts/k8s-monitoring/Chart.yaml + key: $.dependencies[10].name + value: alloy + disablesourceinput: true +targets: + alloy-metrics: + name: Bump Helm chart dependency "alloy-metrics" for Helm chart "k8s-monitoring" + kind: helmchart + spec: + file: Chart.yaml + key: $.dependencies[10].version + name: charts/k8s-monitoring + versionincrement: none + sourceid: alloy + alloy-singleton: + name: Bump Helm chart dependency "alloy-singleton" for Helm chart "k8s-monitoring" + kind: helmchart + spec: + file: Chart.yaml + key: $.dependencies[11].version + name: charts/k8s-monitoring + versionincrement: none + sourceid: alloy + alloy-logs: + name: Bump Helm chart dependency "alloy-logs" for Helm chart "k8s-monitoring" + kind: helmchart + spec: + file: Chart.yaml + key: $.dependencies[12].version + name: charts/k8s-monitoring + versionincrement: none + sourceid: alloy + alloy-receiver: + name: Bump Helm chart dependency "alloy-receiver" for Helm chart "k8s-monitoring" + kind: helmchart + spec: + file: Chart.yaml + key: $.dependencies[13].version + name: charts/k8s-monitoring + versionincrement: none + sourceid: alloy + alloy-profiles: + name: Bump Helm chart dependency "alloy-profiles" for Helm chart "k8s-monitoring" + kind: helmchart + spec: + file: Chart.yaml + key: $.dependencies[14].version + name: charts/k8s-monitoring + versionincrement: none + sourceid: alloy diff --git a/charts/k8s-monitoring/Chart.lock b/charts/k8s-monitoring/Chart.lock new file mode 100644 index 0000000..694f7e3 --- /dev/null +++ b/charts/k8s-monitoring/Chart.lock @@ -0,0 +1,48 @@ +dependencies: +- name: feature-annotation-autodiscovery + repository: "" + version: 1.0.0 +- name: feature-application-observability + repository: "" + version: 1.0.0 +- name: feature-auto-instrumentation + repository: "" + version: 1.0.0 +- name: feature-cluster-events + repository: "" + version: 1.0.0 +- name: feature-cluster-metrics + repository: "" + version: 1.0.0 +- name: feature-integrations + repository: "" + version: 1.0.0 +- name: feature-node-logs + repository: "" + version: 1.0.0 +- name: feature-pod-logs + repository: "" + version: 1.0.0 +- name: feature-profiling + repository: "" + version: 1.0.0 +- name: feature-prometheus-operator-objects + repository: "" + version: 1.0.0 +- name: alloy + repository: https://grafana.github.io/helm-charts + version: 0.11.0 +- name: alloy + repository: https://grafana.github.io/helm-charts + version: 0.11.0 +- name: alloy + repository: https://grafana.github.io/helm-charts + version: 0.11.0 +- name: alloy + repository: https://grafana.github.io/helm-charts + version: 0.11.0 +- name: alloy + repository: https://grafana.github.io/helm-charts + version: 0.11.0 +digest: sha256:84065dcd958d8eefc9b179f4f27a814f79a35ab687fd5f624e90f443d59bac64 +generated: "2025-01-24T13:18:52.317472-06:00" diff --git a/charts/k8s-monitoring/Chart.yaml b/charts/k8s-monitoring/Chart.yaml new file mode 100644 index 0000000..08f5cb7 --- /dev/null +++ b/charts/k8s-monitoring/Chart.yaml @@ -0,0 +1,89 @@ +apiVersion: v2 +appVersion: 2.0.12 +dependencies: +- alias: annotationAutodiscovery + condition: annotationAutodiscovery.enabled + name: feature-annotation-autodiscovery + repository: "" + version: 1.0.0 +- alias: applicationObservability + condition: applicationObservability.enabled + name: feature-application-observability + repository: "" + version: 1.0.0 +- alias: autoInstrumentation + condition: autoInstrumentation.enabled + name: feature-auto-instrumentation + repository: "" + version: 1.0.0 +- alias: clusterEvents + condition: clusterEvents.enabled + name: feature-cluster-events + repository: "" + version: 1.0.0 +- alias: clusterMetrics + condition: clusterMetrics.enabled + name: feature-cluster-metrics + repository: "" + version: 1.0.0 +- alias: integrations + name: feature-integrations + repository: "" + version: 1.0.0 +- alias: nodeLogs + condition: nodeLogs.enabled + name: feature-node-logs + repository: "" + version: 1.0.0 +- alias: podLogs + condition: podLogs.enabled + name: feature-pod-logs + repository: "" + version: 1.0.0 +- alias: profiling + condition: profiling.enabled + name: feature-profiling + repository: "" + version: 1.0.0 +- alias: prometheusOperatorObjects + condition: prometheusOperatorObjects.enabled + name: feature-prometheus-operator-objects + repository: "" + version: 1.0.0 +- alias: alloy-metrics + condition: alloy-metrics.enabled + name: alloy + repository: https://grafana.github.io/helm-charts + version: 0.11.0 +- alias: alloy-singleton + condition: alloy-singleton.enabled + name: alloy + repository: https://grafana.github.io/helm-charts + version: 0.11.0 +- alias: alloy-logs + condition: alloy-logs.enabled + name: alloy + repository: https://grafana.github.io/helm-charts + version: 0.11.0 +- alias: alloy-receiver + condition: alloy-receiver.enabled + name: alloy + repository: https://grafana.github.io/helm-charts + version: 0.11.0 +- alias: alloy-profiles + condition: alloy-profiles.enabled + name: alloy + repository: https://grafana.github.io/helm-charts + version: 0.11.0 +description: Capture all telemetry data from your Kubernetes cluster. +icon: https://raw.githubusercontent.com/grafana/grafana/main/public/img/grafana_icon.svg +maintainers: +- email: pete.wall@grafana.com + name: petewall +- email: robert.lankford@grafana.com + name: rlankfo +name: k8s-monitoring +sources: +- https://github.com/grafana/k8s-monitoring-helm/tree/main/charts/k8s-monitoring +type: application +version: 2.0.12 diff --git a/charts/k8s-monitoring/alloyModules/LICENSE b/charts/k8s-monitoring/alloyModules/LICENSE new file mode 100644 index 0000000..261eeb9 --- /dev/null +++ b/charts/k8s-monitoring/alloyModules/LICENSE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/charts/k8s-monitoring/alloyModules/modules/databases/kv/etcd/metrics.alloy b/charts/k8s-monitoring/alloyModules/modules/databases/kv/etcd/metrics.alloy new file mode 100644 index 0000000..1399d72 --- /dev/null +++ b/charts/k8s-monitoring/alloyModules/modules/databases/kv/etcd/metrics.alloy @@ -0,0 +1,244 @@ +/* +Module: job-etcd +Description: Scrapes etcd + +Note: Every argument except for "forward_to" is optional, and does have a defined default value. However, the values for these + arguments are not defined using the default = " ... " argument syntax, but rather using the coalesce(argument.value, " ... "). + This is because if the argument passed in from another consuming module is set to null, the default = " ... " syntax will + does not override the value passed in, where coalesce() will return the first non-null value. +*/ +declare "kubernetes" { + // arguments for kubernetes discovery + argument "namespaces" { + comment = "The namespaces to look for targets in (default: [] is all namespaces)" + optional = true + } + + argument "field_selectors" { + // Docs: https://kubernetes.io/docs/concepts/overview/working-with-objects/labels/ + comment = "The label selectors to use to find matching targets (default: [])" + optional = true + } + + argument "label_selectors" { + // Docs: https://kubernetes.io/docs/concepts/overview/working-with-objects/labels/ + comment = "The label selectors to use to find matching targets (default: [\"app.kubernetes.io/component=etcd\"])" + optional = true + } + + argument "port_name" { + comment = "The of the port to scrape metrics from (default: metrics)" + optional = true + } + + // etcd service discovery for all of the pods + discovery.kubernetes "etcd" { + role = "pod" + + selectors { + role = "pod" + field = string.join(coalesce(argument.field_selectors.value, []), ",") + label = string.join(coalesce(argument.label_selectors.value, ["app.kubernetes.io/component=etcd"]), ",") + } + + namespaces { + names = coalesce(argument.namespaces.value, []) + } + } + + // etcd relabelings (pre-scrape) + discovery.relabel "kubernetes" { + targets = discovery.kubernetes.etcd.targets + + // keep only the specified metrics port name, and pods that are Running and ready + rule { + source_labels = [ + "__meta_kubernetes_pod_container_port_name", + "__meta_kubernetes_pod_phase", + "__meta_kubernetes_pod_ready", + ] + separator = "@" + regex = coalesce(argument.port_name.value, "metrics") + "@Running@true" + action = "keep" + } + + // drop any init containers + rule { + source_labels = ["__meta_kubernetes_pod_container_init"] + regex = "true" + action = "drop" + } + + // set the namespace label + rule { + source_labels = ["__meta_kubernetes_namespace"] + target_label = "namespace" + } + + // set the pod label + rule { + source_labels = ["__meta_kubernetes_pod_name"] + target_label = "pod" + } + + // set the container label + rule { + source_labels = ["__meta_kubernetes_pod_container_name"] + target_label = "container" + } + + // set a workload label + rule { + source_labels = [ + "__meta_kubernetes_pod_controller_kind", + "__meta_kubernetes_pod_controller_name", + ] + separator = "/" + target_label = "workload" + } + // remove the hash from the ReplicaSet + rule { + source_labels = ["workload"] + regex = "(ReplicaSet/.+)-.+" + target_label = "workload" + } + + // set the app name if specified as metadata labels "app:" or "app.kubernetes.io/name:" or "k8s-app:" + rule { + action = "replace" + source_labels = [ + "__meta_kubernetes_pod_label_app_kubernetes_io_name", + "__meta_kubernetes_pod_label_k8s_app", + "__meta_kubernetes_pod_label_app", + ] + separator = ";" + regex = "^(?:;*)?([^;]+).*$" + replacement = "$1" + target_label = "app" + } + + // set the component if specified as metadata labels "component:" or "app.kubernetes.io/component:" or "k8s-component:" + rule { + action = "replace" + source_labels = [ + "__meta_kubernetes_pod_label_app_kubernetes_io_component", + "__meta_kubernetes_pod_label_k8s_component", + "__meta_kubernetes_pod_label_component", + ] + regex = "^(?:;*)?([^;]+).*$" + replacement = "$1" + target_label = "component" + } + + // set a source label + rule { + action = "replace" + replacement = "kubernetes" + target_label = "source" + } + } + + export "output" { + value = discovery.relabel.kubernetes.output + } +} + +declare "local" { + argument "port" { + comment = "The port to use (default: 9150)" + optional = true + } + + // arguments for local (static) + discovery.relabel "local" { + targets = [ + { + "__address__" = "localhost" + string.format("%s", coalesce(argument.port.value, "9150")), + "source" = "local", + }, + ] + } + + export "output" { + value = discovery.relabel.local.output + } +} + +declare "scrape" { + argument "targets" { + comment = "Must be a list() of targets" + } + + argument "forward_to" { + comment = "Must be a list(MetricsReceiver) where collected metrics should be forwarded to" + } + + argument "job_label" { + comment = "The job label to add for all etcd metric (default: integrations/etcd)" + optional = true + } + + argument "keep_metrics" { + comment = "A regular expression of metrics to keep (default: see below)" + optional = true + } + + argument "drop_metrics" { + comment = "A regular expression of metrics to drop (default: see below)" + optional = true + } + + argument "scrape_interval" { + comment = "How often to scrape metrics from the targets (default: 60s)" + optional = true + } + + argument "scrape_timeout" { + comment = "How long before a scrape times out (default: 10s)" + optional = true + } + + argument "max_cache_size" { + comment = "The maximum number of elements to hold in the relabeling cache (default: 100000). This should be at least 2x-5x your largest scrape target or samples appended rate." + optional = true + } + + argument "clustering" { + // Docs: https://grafana.com/docs/agent/latest/flow/concepts/clustering/ + comment = "Whether or not clustering should be enabled (default: false)" + optional = true + } + + // etcd scrape job + prometheus.scrape "etcd" { + job_name = coalesce(argument.job_label.value, "integrations/etcd") + forward_to = [prometheus.relabel.etcd.receiver] + targets = argument.targets.value + scrape_interval = coalesce(argument.scrape_interval.value, "60s") + scrape_timeout = coalesce(argument.scrape_timeout.value, "10s") + + clustering { + enabled = coalesce(argument.clustering.value, false) + } + } + + // etcd metric relabelings (post-scrape) + prometheus.relabel "etcd" { + forward_to = argument.forward_to.value + max_cache_size = coalesce(argument.max_cache_size.value, 100000) + + // drop metrics that match the drop_metrics regex + rule { + source_labels = ["__name__"] + regex = coalesce(argument.drop_metrics.value, "(^(go|process)_.+$)") + action = "drop" + } + + // keep only metrics that match the keep_metrics regex + rule { + source_labels = ["__name__"] + regex = coalesce(argument.keep_metrics.value, "(up|etcd_(commands_total|connections_total|current_(bytes|connections|items)|items_(evicted_total|total)|max_connections|read_bytes_total|up|uptime_seconds|version|written_bytes_total))") + action = "keep" + } + } +} diff --git a/charts/k8s-monitoring/alloyModules/modules/kubernetes/cert-manager/metrics.alloy b/charts/k8s-monitoring/alloyModules/modules/kubernetes/cert-manager/metrics.alloy new file mode 100644 index 0000000..b5d83b4 --- /dev/null +++ b/charts/k8s-monitoring/alloyModules/modules/kubernetes/cert-manager/metrics.alloy @@ -0,0 +1,223 @@ +/* +Module: job-cert-manager +Description: Scrapes cert-manager + +Note: Every argument except for "forward_to" is optional, and does have a defined default value. However, the values for these + arguments are not defined using the default = " ... " argument syntax, but rather using the coalesce(argument.value, " ... "). + This is because if the argument passed in from another consuming module is set to null, the default = " ... " syntax will + does not override the value passed in, where coalesce() will return the first non-null value. +*/ +declare "kubernetes" { + // arguments for kubernetes discovery + argument "namespaces" { + comment = "The namespaces to look for targets in (default: [] is all namespaces)" + optional = true + } + + argument "field_selectors" { + // Docs: https://kubernetes.io/docs/concepts/overview/working-with-objects/labels/ + comment = "The label selectors to use to find matching targets (default: [])" + optional = true + } + + argument "label_selectors" { + // Docs: https://kubernetes.io/docs/concepts/overview/working-with-objects/labels/ + comment = "The label selectors to use to find matching targets (default: [\"app.kubernetes.io/name=cert-manager\"])" + optional = true + } + + argument "port_name" { + comment = "The of the port to scrape metrics from (default: http-metrics)" + optional = true + } + + // cert-manager service discovery for all of the pods + discovery.kubernetes "cert_manager" { + role = "pod" + + selectors { + role = "pod" + field = string.join(coalesce(argument.field_selectors.value, []), ",") + label = string.join(coalesce(argument.label_selectors.value, ["app.kubernetes.io/name=cert-manager"]), ",") + } + + namespaces { + names = coalesce(argument.namespaces.value, []) + } + } + + // cert-manager relabelings (pre-scrape) + discovery.relabel "kubernetes" { + targets = discovery.kubernetes.cert_manager.targets + + // keep only the specified metrics port name, and pods that are Running and ready + rule { + source_labels = [ + "__meta_kubernetes_pod_container_port_name", + "__meta_kubernetes_pod_phase", + "__meta_kubernetes_pod_ready", + ] + separator = "@" + regex = coalesce(argument.port_name.value, "http-metrics") + "@Running@true" + action = "keep" + } + + // drop any init containers + rule { + source_labels = ["__meta_kubernetes_pod_container_init"] + regex = "true" + action = "drop" + } + + // set the namespace label + rule { + source_labels = ["__meta_kubernetes_namespace"] + target_label = "namespace" + } + + // set the pod label + rule { + source_labels = ["__meta_kubernetes_pod_name"] + target_label = "pod" + } + + // set the container label + rule { + source_labels = ["__meta_kubernetes_pod_container_name"] + target_label = "container" + } + + // set a workload label + rule { + source_labels = [ + "__meta_kubernetes_pod_controller_kind", + "__meta_kubernetes_pod_controller_name", + ] + separator = "/" + target_label = "workload" + } + // remove the hash from the ReplicaSet + rule { + source_labels = ["workload"] + regex = "(ReplicaSet/.+)-.+" + target_label = "workload" + } + + // set the app name if specified as metadata labels "app:" or "app.kubernetes.io/name:" or "k8s-app:" + rule { + action = "replace" + source_labels = [ + "__meta_kubernetes_pod_label_app_kubernetes_io_name", + "__meta_kubernetes_pod_label_k8s_app", + "__meta_kubernetes_pod_label_app", + ] + separator = ";" + regex = "^(?:;*)?([^;]+).*$" + replacement = "$1" + target_label = "app" + } + + // set the component if specified as metadata labels "component:" or "app.kubernetes.io/component:" or "k8s-component:" + rule { + action = "replace" + source_labels = [ + "__meta_kubernetes_pod_label_app_kubernetes_io_component", + "__meta_kubernetes_pod_label_k8s_component", + "__meta_kubernetes_pod_label_component", + ] + regex = "^(?:;*)?([^;]+).*$" + replacement = "$1" + target_label = "component" + } + + // set a source label + rule { + action = "replace" + replacement = "kubernetes" + target_label = "source" + } + } + + export "output" { + value = discovery.relabel.kubernetes.output + } +} + +declare "scrape" { + argument "targets" { + comment = "Must be a list() of targets" + } + + argument "forward_to" { + comment = "Must be a list(MetricsReceiver) where collected metrics should be forwarded to" + } + + argument "job_label" { + comment = "The job label to add for all cert-manager metric (default: integrations/cert-manager)" + optional = true + } + + argument "keep_metrics" { + comment = "A regular expression of metrics to keep (default: see below)" + optional = true + } + + argument "drop_metrics" { + comment = "A regular expression of metrics to drop (default: see below)" + optional = true + } + + argument "scrape_interval" { + comment = "How often to scrape metrics from the targets (default: 60s)" + optional = true + } + + argument "scrape_timeout" { + comment = "How long before a scrape times out (default: 10s)" + optional = true + } + + argument "max_cache_size" { + comment = "The maximum number of elements to hold in the relabeling cache (default: 100000). This should be at least 2x-5x your largest scrape target or samples appended rate." + optional = true + } + + argument "clustering" { + // Docs: https://grafana.com/docs/agent/latest/flow/concepts/clustering/ + comment = "Whether or not clustering should be enabled (default: false)" + optional = true + } + + // cert-manager scrape job + prometheus.scrape "cert_manager" { + job_name = coalesce(argument.job_label.value, "integrations/cert-manager") + forward_to = [prometheus.relabel.cert_manager.receiver] + targets = argument.targets.value + scrape_interval = coalesce(argument.scrape_interval.value, "60s") + scrape_timeout = coalesce(argument.scrape_timeout.value, "10s") + + clustering { + enabled = coalesce(argument.clustering.value, false) + } + } + + // cert-manager metric relabelings (post-scrape) + prometheus.relabel "cert_manager" { + forward_to = argument.forward_to.value + max_cache_size = coalesce(argument.max_cache_size.value, 100000) + + // drop metrics that match the drop_metrics regex + rule { + source_labels = ["__name__"] + regex = coalesce(argument.drop_metrics.value, "(^(go|process)_.+$)") + action = "drop" + } + + // keep only metrics that match the keep_metrics regex + rule { + source_labels = ["__name__"] + regex = coalesce(argument.keep_metrics.value, "(up|(certmanager_(certificate_(expiration_timestamp_seconds|ready_status)|clock_time_seconds|controller_sync_call_count|http_acme_client_request_(count|duration_seconds_(count|sum)))|container_(cpu_(cfs_(periods|throttled_periods)_total|usage_seconds_total)|memory_usage_bytes|network_(receive|transmit)_bytes_total)|kube_pod_container_resource_(limits|requests)_(cpu_cores|memory_bytes)))") + action = "keep" + } + } +} diff --git a/charts/k8s-monitoring/alloyModules/modules/kubernetes/core/metrics.alloy b/charts/k8s-monitoring/alloyModules/modules/kubernetes/core/metrics.alloy new file mode 100644 index 0000000..13a9ed4 --- /dev/null +++ b/charts/k8s-monitoring/alloyModules/modules/kubernetes/core/metrics.alloy @@ -0,0 +1,1035 @@ +/* +Module: job-cadvisor +Description: Scrapes cadvisor + +Note: Every argument except for "forward_to" is optional, and does have a defined default value. However, the values for these + arguments are not defined using the default = " ... " argument syntax, but rather using the coalesce(argument.value, " ... "). + This is because if the argument passed in from another consuming module is set to null, the default = " ... " syntax will + does not override the value passed in, where coalesce() will return the first non-null value. +*/ +declare "cadvisor" { + argument "forward_to" { + comment = "Must be a list(MetricsReceiver) where collected metrics should be forwarded to" + } + argument "field_selectors" { + // Docs: https://kubernetes.io/docs/concepts/overview/working-with-objects/labels/ + comment = "The label selectors to use to find matching targets (default: [\"metadata.name=kubernetes\"])" + optional = true + } + argument "label_selectors" { + // Docs: https://kubernetes.io/docs/concepts/overview/working-with-objects/labels/ + comment = "The label selectors to use to find matching targets (default: [])" + optional = true + } + argument "job_label" { + comment = "The job label to add for all cadvisor metric (default: integrations/kubernetes/cadvisor)" + optional = true + } + argument "keep_metrics" { + comment = "A regular expression of metrics to keep (default: see below)" + optional = true + } + argument "drop_metrics" { + comment = "A regular expression of metrics to drop (default: see below)" + optional = true + } + argument "scrape_interval" { + comment = "How often to scrape metrics from the targets (default: 60s)" + optional = true + } + argument "scrape_timeout" { + comment = "How long before a scrape times out (default: 10s)" + optional = true + } + argument "max_cache_size" { + comment = "The maximum number of elements to hold in the relabeling cache (default: 100000). This should be at least 2x-5x your largest scrape target or samples appended rate." + optional = true + } + argument "clustering" { + // Docs: https://grafana.com/docs/agent/latest/flow/concepts/clustering/ + comment = "Whether or not clustering should be enabled (default: false)" + optional = true + } + + export "output" { + value = discovery.relabel.cadvisor.output + } + + // cadvisor service discovery for all of the nodes + discovery.kubernetes "cadvisor" { + role = "node" + + selectors { + role = "node" + field = string.join(coalesce(argument.field_selectors.value, []), ",") + label = string.join(coalesce(argument.label_selectors.value, []), ",") + } + } + + // cadvisor relabelings (pre-scrape) + discovery.relabel "cadvisor" { + targets = discovery.kubernetes.cadvisor.targets + + // set the address to use the kubernetes service dns name + rule { + target_label = "__address__" + replacement = "kubernetes.default.svc.cluster.local:443" + } + + // set the metrics path to use the proxy path to the nodes cadvisor metrics endpoint + rule { + source_labels = ["__meta_kubernetes_node_name"] + regex = "(.+)" + replacement = "/api/v1/nodes/${1}/proxy/metrics/cadvisor" + target_label = "__metrics_path__" + } + + // set the node label + rule { + source_labels = ["__meta_kubernetes_node_name"] + target_label = "node" + } + + // set the app name if specified as metadata labels "app:" or "app.kubernetes.io/name:" or "k8s-app:" + rule { + action = "replace" + source_labels = [ + "__meta_kubernetes_node_label_app_kubernetes_io_name", + "__meta_kubernetes_node_label_k8s_app", + "__meta_kubernetes_node_label_app", + ] + separator = ";" + regex = "^(?:;*)?([^;]+).*$" + replacement = "$1" + target_label = "app" + } + + // set a source label + rule { + action = "replace" + replacement = "kubernetes" + target_label = "source" + } + } + + // cadvisor scrape job + prometheus.scrape "cadvisor" { + job_name = coalesce(argument.job_label.value, "integrations/kubernetes/cadvisor") + forward_to = [prometheus.relabel.cadvisor.receiver] + targets = discovery.relabel.cadvisor.output + scheme = "https" + scrape_interval = coalesce(argument.scrape_interval.value, "60s") + scrape_timeout = coalesce(argument.scrape_timeout.value, "10s") + bearer_token_file = "/var/run/secrets/kubernetes.io/serviceaccount/token" + + tls_config { + ca_file = "/var/run/secrets/kubernetes.io/serviceaccount/ca.crt" + insecure_skip_verify = false + server_name = "kubernetes" + } + + clustering { + enabled = coalesce(argument.clustering.value, false) + } + } + + // cadvisor metric relabelings (post-scrape) + prometheus.relabel "cadvisor" { + forward_to = argument.forward_to.value + max_cache_size = coalesce(argument.max_cache_size.value, 100000) + + // drop metrics that match the drop_metrics regex + rule { + source_labels = ["__name__"] + regex = coalesce(argument.drop_metrics.value, "(^(go|process)_.+$)") + action = "drop" + } + + // keep only metrics that match the keep_metrics regex + rule { + source_labels = ["__name__"] + regex = coalesce(argument.keep_metrics.value, "(up|container_(cpu_(cfs_(periods|throttled_periods)_total|usage_seconds_total)|fs_(reads|writes)(_bytes)?_total|memory_(cache|rss|swap|working_set_bytes)|network_(receive|transmit)_(bytes|packets(_dropped)?_total))|machine_memory_bytes)") + action = "keep" + } + + // Drop empty container labels, addressing https://github.com/google/cadvisor/issues/2688 + rule { + source_labels = ["__name__","container"] + separator = "@" + regex = "(container_cpu_.*|container_fs_.*|container_memory_.*)@" + action = "drop" + } + + // Drop empty image labels, addressing https://github.com/google/cadvisor/issues/2688 + rule { + source_labels = ["__name__","image"] + separator = "@" + regex = "(container_cpu_.*|container_fs_.*|container_memory_.*|container_network_.*)@" + action = "drop" + } + + // Normalizing unimportant labels (not deleting to continue satisfying