Nikolai Rodionov 57712751f1
chore: mirror k8s-monitoring-2.0.12
upstream_repo:
2025-02-21 09:42:13 +01:00

224 lines
7.0 KiB
Plaintext

/*
Module: job-cert-manager
Description: Scrapes cert-manager
Note: Every argument except for "forward_to" is optional, and does have a defined default value. However, the values for these
arguments are not defined using the default = " ... " argument syntax, but rather using the coalesce(argument.value, " ... ").
This is because if the argument passed in from another consuming module is set to null, the default = " ... " syntax will
does not override the value passed in, where coalesce() will return the first non-null value.
*/
declare "kubernetes" {
// arguments for kubernetes discovery
argument "namespaces" {
comment = "The namespaces to look for targets in (default: [] is all namespaces)"
optional = true
}
argument "field_selectors" {
// Docs: https://kubernetes.io/docs/concepts/overview/working-with-objects/labels/
comment = "The label selectors to use to find matching targets (default: [])"
optional = true
}
argument "label_selectors" {
// Docs: https://kubernetes.io/docs/concepts/overview/working-with-objects/labels/
comment = "The label selectors to use to find matching targets (default: [\"app.kubernetes.io/name=cert-manager\"])"
optional = true
}
argument "port_name" {
comment = "The of the port to scrape metrics from (default: http-metrics)"
optional = true
}
// cert-manager service discovery for all of the pods
discovery.kubernetes "cert_manager" {
role = "pod"
selectors {
role = "pod"
field = string.join(coalesce(argument.field_selectors.value, []), ",")
label = string.join(coalesce(argument.label_selectors.value, ["app.kubernetes.io/name=cert-manager"]), ",")
}
namespaces {
names = coalesce(argument.namespaces.value, [])
}
}
// cert-manager relabelings (pre-scrape)
discovery.relabel "kubernetes" {
targets = discovery.kubernetes.cert_manager.targets
// keep only the specified metrics port name, and pods that are Running and ready
rule {
source_labels = [
"__meta_kubernetes_pod_container_port_name",
"__meta_kubernetes_pod_phase",
"__meta_kubernetes_pod_ready",
]
separator = "@"
regex = coalesce(argument.port_name.value, "http-metrics") + "@Running@true"
action = "keep"
}
// drop any init containers
rule {
source_labels = ["__meta_kubernetes_pod_container_init"]
regex = "true"
action = "drop"
}
// set the namespace label
rule {
source_labels = ["__meta_kubernetes_namespace"]
target_label = "namespace"
}
// set the pod label
rule {
source_labels = ["__meta_kubernetes_pod_name"]
target_label = "pod"
}
// set the container label
rule {
source_labels = ["__meta_kubernetes_pod_container_name"]
target_label = "container"
}
// set a workload label
rule {
source_labels = [
"__meta_kubernetes_pod_controller_kind",
"__meta_kubernetes_pod_controller_name",
]
separator = "/"
target_label = "workload"
}
// remove the hash from the ReplicaSet
rule {
source_labels = ["workload"]
regex = "(ReplicaSet/.+)-.+"
target_label = "workload"
}
// set the app name if specified as metadata labels "app:" or "app.kubernetes.io/name:" or "k8s-app:"
rule {
action = "replace"
source_labels = [
"__meta_kubernetes_pod_label_app_kubernetes_io_name",
"__meta_kubernetes_pod_label_k8s_app",
"__meta_kubernetes_pod_label_app",
]
separator = ";"
regex = "^(?:;*)?([^;]+).*$"
replacement = "$1"
target_label = "app"
}
// set the component if specified as metadata labels "component:" or "app.kubernetes.io/component:" or "k8s-component:"
rule {
action = "replace"
source_labels = [
"__meta_kubernetes_pod_label_app_kubernetes_io_component",
"__meta_kubernetes_pod_label_k8s_component",
"__meta_kubernetes_pod_label_component",
]
regex = "^(?:;*)?([^;]+).*$"
replacement = "$1"
target_label = "component"
}
// set a source label
rule {
action = "replace"
replacement = "kubernetes"
target_label = "source"
}
}
export "output" {
value = discovery.relabel.kubernetes.output
}
}
declare "scrape" {
argument "targets" {
comment = "Must be a list() of targets"
}
argument "forward_to" {
comment = "Must be a list(MetricsReceiver) where collected metrics should be forwarded to"
}
argument "job_label" {
comment = "The job label to add for all cert-manager metric (default: integrations/cert-manager)"
optional = true
}
argument "keep_metrics" {
comment = "A regular expression of metrics to keep (default: see below)"
optional = true
}
argument "drop_metrics" {
comment = "A regular expression of metrics to drop (default: see below)"
optional = true
}
argument "scrape_interval" {
comment = "How often to scrape metrics from the targets (default: 60s)"
optional = true
}
argument "scrape_timeout" {
comment = "How long before a scrape times out (default: 10s)"
optional = true
}
argument "max_cache_size" {
comment = "The maximum number of elements to hold in the relabeling cache (default: 100000). This should be at least 2x-5x your largest scrape target or samples appended rate."
optional = true
}
argument "clustering" {
// Docs: https://grafana.com/docs/agent/latest/flow/concepts/clustering/
comment = "Whether or not clustering should be enabled (default: false)"
optional = true
}
// cert-manager scrape job
prometheus.scrape "cert_manager" {
job_name = coalesce(argument.job_label.value, "integrations/cert-manager")
forward_to = [prometheus.relabel.cert_manager.receiver]
targets = argument.targets.value
scrape_interval = coalesce(argument.scrape_interval.value, "60s")
scrape_timeout = coalesce(argument.scrape_timeout.value, "10s")
clustering {
enabled = coalesce(argument.clustering.value, false)
}
}
// cert-manager metric relabelings (post-scrape)
prometheus.relabel "cert_manager" {
forward_to = argument.forward_to.value
max_cache_size = coalesce(argument.max_cache_size.value, 100000)
// drop metrics that match the drop_metrics regex
rule {
source_labels = ["__name__"]
regex = coalesce(argument.drop_metrics.value, "(^(go|process)_.+$)")
action = "drop"
}
// keep only metrics that match the keep_metrics regex
rule {
source_labels = ["__name__"]
regex = coalesce(argument.keep_metrics.value, "(up|(certmanager_(certificate_(expiration_timestamp_seconds|ready_status)|clock_time_seconds|controller_sync_call_count|http_acme_client_request_(count|duration_seconds_(count|sum)))|container_(cpu_(cfs_(periods|throttled_periods)_total|usage_seconds_total)|memory_usage_bytes|network_(receive|transmit)_bytes_total)|kube_pod_container_resource_(limits|requests)_(cpu_cores|memory_bytes)))")
action = "keep"
}
}
}