Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

WIP: add ability to give tls certificates to metrics server #4385

Open
wants to merge 11 commits into
base: main
Choose a base branch
from
Open
1 change: 1 addition & 0 deletions charts/kueue/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -81,3 +81,4 @@ The following table lists the configurable parameters of the kueue chart and the
| `managerConfig.controllerManagerConfigYaml` | controllerManagerConfigYaml | abbr. |
| `metricsService` | metricsService's ports | abbr. |
| `webhookService` | webhookService's ports | abbr. |
| `metrics.prometheusNamespace` | prometheus namespace | `monitoring` |
17 changes: 17 additions & 0 deletions charts/kueue/templates/certmanager/certificatemetrics.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
{{- if .Values.enableCertManager }}
apiVersion: cert-manager.io/v1
kind: Certificate
metadata:
name: {{ include "kueue.fullname" . }}-metrics-cert
namespace: '{{ .Release.Namespace }}'
labels:
{{- include "kueue.labels" . | nindent 4 }}
spec:
dnsNames:
- '{{ include "kueue.fullname" . }}-controller-manager-metrics-service.{{ .Release.Namespace }}.svc'
- '{{ include "kueue.fullname" . }}-controller-manager-metrics-service.{{ .Release.Namespace }}.svc.{{ .Values.kubernetesClusterDomain }}'
issuerRef:
kind: Issuer
name: {{ include "kueue.fullname" . }}-selfsigned-issuer
secretName: {{ include "kueue.fullname" . }}-metrics-server-cert
{{- end }}
25 changes: 23 additions & 2 deletions charts/kueue/templates/manager/manager.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,9 @@ spec:
- containerPort: 9443
name: webhook-server
protocol: TCP
- containerPort: 8443
name: metrics
protocol: TCP
readinessProbe:
httpGet:
path: /readyz
Expand All @@ -67,6 +70,11 @@ spec:
- mountPath: /controller_manager_config.yaml
name: manager-config
subPath: controller_manager_config.yaml
{{- if .Values.enableCertManager }}
- mountPath: /etc/kueue/metrics-certs
name: metrics-certs
readOnly: true
{{- end }}
securityContext:
{{- toYaml .Values.controllerManager.manager.podSecurityContext | nindent 8 }}
serviceAccountName: {{ include "kueue.fullname" . }}-controller-manager
Expand All @@ -84,6 +92,19 @@ spec:
secret:
defaultMode: 420
secretName: {{ include "kueue.fullname" . }}-webhook-server-cert
- configMap:
- name: manager-config
configMap:
name: {{ include "kueue.fullname" . }}-manager-config
name: manager-config
{{- if .Values.enableCertManager }}
- name: metrics-certs
secret:
secretName: {{ include "kueue.fullname" . }}-metrics-server-cert
optional: false
items:
- key: ca.crt
path: ca.crt
- key: tls.crt
path: tls.crt
- key: tls.key
path: tls.key
{{- end }}
3 changes: 1 addition & 2 deletions charts/kueue/templates/prometheus/monitor.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,7 @@ spec:
path: /metrics
port: https
scheme: https
tlsConfig:
insecureSkipVerify: true
tlsConfig: {{- toYaml .Values.metrics.serviceMonitor.tlsConfig | nindent 6 }}
selector:
matchLabels:
{{- include "kueue.labels" . | nindent 6 }}
Expand Down
4 changes: 2 additions & 2 deletions charts/kueue/templates/prometheus/role.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -48,8 +48,8 @@ roleRef:
subjects:
- kind: ServiceAccount
name: prometheus-k8s
namespace: '{{ .Release.Namespace }}'
namespace: '{{ .Values.metrics.prometheusNamespace }}'
- kind: ServiceAccount
name: prometheus-operator
namespace: '{{ .Release.Namespace }}'
namespace: '{{ .Values.metrics.prometheusNamespace }}'
{{- end }}
25 changes: 22 additions & 3 deletions charts/kueue/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,9 @@
nameOverride: ""
fullnameOverride: ""
# Enable each function, like kustomize https://github.com/kubernetes-sigs/kueue/blob/main/config/default/kustomization.yaml
enablePrometheus: false
enablePrometheus: true
# Enable x509 automated certificate management using cert-manager (cert-manager.io)
enableCertManager: false
enableCertManager: true
# Enable API Priority and Fairness configuration for the visibility API
enableVisibilityAPF: false
# Customize controllerManager
Expand All @@ -16,9 +16,10 @@ controllerManager:
# enabled: true
manager:
image:
repository: us-central1-docker.pkg.dev/k8s-staging-images/kueue/kueue
repository: quay.io/kevin-oss/kueue
# This should be set to 'IfNotPresent' for released version
pullPolicy: Always
tag: feb28-up
podAnnotations: {}
resources:
limits:
Expand Down Expand Up @@ -153,3 +154,21 @@ webhookService:

# kueue-viz dashboard
enableKueueViz: false

metrics:
prometheusNamespace: monitoring
serviceMonitor:
tlsConfig:
serverName: kueue-controller-metrics-service.kueue-system.svc
insecureSkipVerify: false
ca:
secret:
name: metrics-server-cert
key: ca.crt
cert:
secret:
name: metrics-server-cert
key: tls.crt
keySecret:
name: metrics-server-cert
key: tls.key
23 changes: 23 additions & 0 deletions cmd/kueue/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,12 @@ package main

import (
"context"
"crypto/tls"
"errors"
"flag"
"net/http"
"os"
"path/filepath"

zaplog "go.uber.org/zap"
"go.uber.org/zap/zapcore"
Expand All @@ -39,6 +41,7 @@ import (
"k8s.io/client-go/util/flowcontrol"
"k8s.io/utils/ptr"
ctrl "sigs.k8s.io/controller-runtime"
"sigs.k8s.io/controller-runtime/pkg/certwatcher"
"sigs.k8s.io/controller-runtime/pkg/healthz"
"sigs.k8s.io/controller-runtime/pkg/log/zap"
"sigs.k8s.io/controller-runtime/pkg/metrics/filters"
Expand Down Expand Up @@ -153,6 +156,26 @@ func main() {
SecureServing: true,
FilterProvider: filters.WithAuthenticationAndAuthorization,
}

if cfg.InternalCertManagement == nil || !*cfg.InternalCertManagement.Enable {
metricsCertPath := "/etc/kueue/metrics-certs"
setupLog.Info("Initializing metrics certificate watcher using provided certificates",
"metrics-cert-path", metricsCertPath)

var err error
metricsCertWatcher, err := certwatcher.New(
filepath.Join(metricsCertPath, "tls.crt"),
filepath.Join(metricsCertPath, "tls.key"),
)
if err != nil {
setupLog.Error(err, "to initialize metrics certificate watcher", "error", err)
os.Exit(1)
}

metricsServerOptions.TLSOpts = append(metricsServerOptions.TLSOpts, func(config *tls.Config) {
config.GetCertificate = metricsCertWatcher.GetCertificate
})
}
options.Metrics = metricsServerOptions

metrics.Register()
Expand Down
17 changes: 17 additions & 0 deletions config/components/certmanager/certificate-metrics.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
# The following manifests contain a metrics certificate CR.
# More document can be found at https://docs.cert-manager.io
apiVersion: cert-manager.io/v1
kind: Certificate
metadata:
name: metrics-certs # this name should match the one appeared in kustomizeconfig.yaml
namespace: system
spec:
dnsNames:
# SERVICE_NAME and SERVICE_NAMESPACE will be substituted by kustomize
# replacements in the config/default/kustomization.yaml file.
- SERVICE_NAME.SERVICE_NAMESPACE.svc
- SERVICE_NAME.SERVICE_NAMESPACE.svc.cluster.local
issuerRef:
kind: Issuer
name: selfsigned-issuer
secretName: metrics-server-cert
2 changes: 2 additions & 0 deletions config/components/certmanager/kustomization.yaml
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
resources:
- certificate.yaml
- certificate-metrics.yaml


configurations:
- kustomizeconfig.yaml
4 changes: 3 additions & 1 deletion config/components/manager/controller_manager_config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,9 @@ health:
healthProbeBindAddress: :8081
metrics:
bindAddress: :8443
# enableClusterQueueResources: true
# Uncommented if you are using certificates with prometheus
protectMetricsWithTLS: true
enableClusterQueueResources: true
webhook:
port: 9443
leaderElection:
Expand Down
8 changes: 8 additions & 0 deletions config/components/prometheus/kustomization.yaml
Original file line number Diff line number Diff line change
@@ -1,3 +1,11 @@
resources:
- monitor.yaml
- role.yaml
# [PROMETHEUS-WITH-CERTS] The following patch configures the ServiceMonitor in ../prometheus
# to securely reference certificates created and managed by cert-manager.
# Additionally, ensure that you uncomment the [METRICS WITH CERTMANAGER] patch under config/default/kustomization.yaml
# to mount the "metrics-server-cert" secret in the Manager Deployment.
patches:
- path: monitor_tls_patch.yaml
target:
kind: ServiceMonitor
19 changes: 19 additions & 0 deletions config/components/prometheus/monitor_tls_patch.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
# Patch for Prometheus ServiceMonitor to enable secure TLS configuration
# using certificates managed by cert-manager
- op: replace
path: /spec/endpoints/0/tlsConfig
value:
# SERVICE_NAME and SERVICE_NAMESPACE will be substituted by kustomize
serverName: SERVICE_NAME.SERVICE_NAMESPACE.svc
insecureSkipVerify: false
ca:
secret:
name: metrics-server-cert
key: ca.crt
cert:
secret:
name: metrics-server-cert
key: tls.crt
keySecret:
name: metrics-server-cert
key: tls.key
25 changes: 25 additions & 0 deletions config/default/cert_metrics_manager_patch.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
# This patch adds the args, volumes, and ports to allow the manager to use the metrics-server certs.

# Add the volumeMount for the metrics-server certs
- op: add
path: /spec/template/spec/containers/0/volumeMounts/-
value:
mountPath: /etc/kueue/metrics-certs
name: metrics-certs
readOnly: true

# Add the metrics-server certs volume configuration
- op: add
path: /spec/template/spec/volumes/-
value:
name: metrics-certs
secret:
secretName: metrics-server-cert
optional: false
items:
- key: ca.crt
path: ca.crt
- key: tls.crt
path: tls.crt
- key: tls.key
path: tls.key
12 changes: 11 additions & 1 deletion config/default/kustomization.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ resources:
# [CERTMANAGER] To enable cert-manager, uncomment all sections with 'CERTMANAGER'. 'WEBHOOK' components are required.
# - ../components/certmanager
# [PROMETHEUS] To enable prometheus monitor, uncomment all sections with 'PROMETHEUS'.
#- ../components/prometheus
# - ../components/prometheus
# [METRICS] Expose the controller manager metrics service.
- metrics_service.yaml

Expand All @@ -49,12 +49,22 @@ patches:
# Expose port used by the visibility server
- path: manager_visibility_patch.yaml

# Expose port used by the metrics service
- path: manager_metrics_patch.yaml

# [CERTMANAGER] To enable cert-manager, uncomment all sections with 'CERTMANAGER'.
# Uncomment 'CERTMANAGER' sections in crd/kustomization.yaml to enable the CA injection in the admission webhooks.
# 'CERTMANAGER' needs to be enabled to use ca injection
# - path: mutating_webhookcainjection_patch.yaml
# - path: validating_webhookcainjection_patch.yaml

# Uncomment the patches line if you enable Metrics and CertManager
# [CERTMANAGER] To enable metrics protected with certManager, uncomment the following line.
# This patch will protect the metrics with certManager self-signed certs.
# - path: cert_metrics_manager_patch.yaml
# target:
# kind: Deployment


# the following config is for teaching kustomize how to do var substitution
# vars:
Expand Down
15 changes: 15 additions & 0 deletions config/default/manager_metrics_patch.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
# This patch exposes 8443 port used by metrics service
apiVersion: apps/v1
kind: Deployment
metadata:
name: controller-manager
namespace: system
spec:
template:
spec:
containers:
- name: manager
ports:
- containerPort: 8443
name: metrics
protocol: TCP