kubernetes-sigs · k8s-ci-robot · Sep 12, 2025 · Aug 20, 2025 · Sep 4, 2025 · liu-cong
diff --git a/config/charts/inferencepool/README.md b/config/charts/inferencepool/README.md
@@ -117,6 +117,30 @@ Then apply it with:
 helm install vllm-llama3-8b-instruct ./config/charts/inferencepool -f values.yaml
 ```
 
+### Install with Monitoring
+
+To enable metrics collection and monitoring for the EndpointPicker, you can configure Prometheus ServiceMonitor creation:
+
+```yaml
+inferenceExtension:
+  monitoring:
+    interval: "10s"
+    prometheus:
+      enabled: true
+    secret:
+      name: inference-gateway-sa-metrics-reader-secret
+```
+
+**Note:** Prometheus monitoring requires the Prometheus Operator and ServiceMonitor CRD to be installed in the cluster.
+
+For GKE environments, monitoring is automatically configured when `provider.name` is set to `gke`.
+
+Then apply it with:
+
+```txt
+helm install vllm-llama3-8b-instruct ./config/charts/inferencepool -f values.yaml
+```
+
 ## Uninstall
 
 Run the following command to uninstall the chart:
@@ -146,6 +170,9 @@ The following table list the configurable parameters of the chart.
 | `inferenceExtension.affinity`               | Affinity for the endpoint picker. Defaults to `{}`.                                                                    |
 | `inferenceExtension.tolerations`            | Tolerations for the endpoint picker. Defaults to `[]`.                                                                 |
 | `inferenceExtension.flags.has-enable-leader-election` | Enable leader election for high availability. When enabled, only one EPP pod (the leader) will be ready to serve traffic.       |
+| `inferenceExtension.monitoring.interval`   | Metrics scraping interval for monitoring. Defaults to `10s`.                                                           |
+| `inferenceExtension.monitoring.secret.name` | Name of the service account token secret for metrics authentication. Defaults to `inference-gateway-sa-metrics-reader-secret`. |
+| `inferenceExtension.monitoring.prometheus.enabled` | Enable Prometheus ServiceMonitor creation for EPP metrics collection. Defaults to `false`.                      |
 | `inferenceExtension.pluginsCustomConfig`    | Custom config that is passed to EPP as inline yaml.      |
 | `provider.name`                             | Name of the Inference Gateway implementation being used. Possible values: `gke`. Defaults to `none`.                   |
 

diff --git a/config/charts/inferencepool/templates/epp-sa-token-secret.yaml b/config/charts/inferencepool/templates/epp-sa-token-secret.yaml
@@ -0,0 +1,12 @@
+{{- if or .Values.inferenceExtension.monitoring.prometheus.enabled .Values.inferenceExtension.monitoring.gke.enabled }}
+apiVersion: v1
+kind: Secret
+metadata:
+  name: {{ .Values.inferenceExtension.monitoring.secret.name }}
+  namespace: {{ .Release.Namespace }}
+  labels:
+    {{- include "gateway-api-inference-extension.labels" . | nindent 4 }}
+  annotations:
+    kubernetes.io/service-account.name: {{ include "gateway-api-inference-extension.name" . }}
+type: kubernetes.io/service-account-token
+{{- end }}
diff --git a/config/charts/inferencepool/templates/epp-servicemonitor.yaml b/config/charts/inferencepool/templates/epp-servicemonitor.yaml
@@ -0,0 +1,25 @@
+{{- if .Values.inferenceExtension.monitoring.prometheus.enabled }}
+apiVersion: monitoring.coreos.com/v1
+kind: ServiceMonitor
+metadata:
+  name: {{ include "gateway-api-inference-extension.name" . }}-monitor
+  namespace: {{ .Release.Namespace }}
+  labels:
+    {{- include "gateway-api-inference-extension.labels" . | nindent 4 }}
+spec:
+  endpoints:
+  - interval: {{ .Values.inferenceExtension.monitoring.interval }}
+    port: "http-metrics"
+    path: "/metrics"
+    authorization:
+      credentials:
+        key: token
+        name: {{ .Values.inferenceExtension.monitoring.secret.name }}
+  jobLabel: {{ include "gateway-api-inference-extension.name" . }}
+  namespaceSelector:
+    matchNames:
+    - {{ .Release.Namespace }}
+  selector:
+    matchLabels:
+      {{- include "gateway-api-inference-extension.labels" . | nindent 6 }}
+{{- end }}
diff --git a/config/charts/inferencepool/templates/gke.yaml b/config/charts/inferencepool/templates/gke.yaml
@@ -46,15 +46,15 @@ spec:
   endpoints:
   - port: metrics
     scheme: http
-    interval: 5s
+    interval: {{ .Values.inferenceExtension.monitoring.interval }}
     path: /metrics
     authorization:
       type: Bearer
       credentials:
         secret:
-          name: {{ .Values.gke.monitoringSecret.name }}
+          name: {{ .Values.inferenceExtension.monitoring.secret.name }}
           key: token
-          namespace: {{ .Values.gke.monitoringSecret.namespace }}
+          namespace: {{ .Release.Namespace }}
   selector:
     matchLabels:
       {{- include "gateway-api-inference-extension.selectorLabels" . | nindent 8 }}

diff --git a/config/charts/inferencepool/templates/rbac.yaml b/config/charts/inferencepool/templates/rbac.yaml
@@ -17,6 +17,12 @@ rules:
   - subjectaccessreviews
   verbs:
   - create
+{{- if .Values.inferenceExtension.monitoring.prometheus.enabled }}
+- nonResourceURLs:
+  - "/metrics"
+  verbs:
+  - get
+{{- end }}
 ---
 kind: ClusterRoleBinding
 apiVersion: rbac.authorization.k8s.io/v1

diff --git a/config/charts/inferencepool/values.yaml b/config/charts/inferencepool/values.yaml
@@ -40,6 +40,17 @@ inferenceExtension:
 
   tolerations: []
 
+  # Monitoring configuration for EPP
+  monitoring:
+    interval: "10s"
+    # Service account token secret for authentication
+    secret:
+      name: inference-gateway-sa-metrics-reader-secret
+
+    # Prometheus ServiceMonitor will be created when enabled for EPP metrics collection
+    prometheus:
+      enabled: false
+
 inferencePool:
   targetPorts:
     - number: 8000
@@ -56,7 +67,3 @@ inferencePool:
 provider:
   name: none
 
-gke:
-  monitoringSecret:
-    name: inference-gateway-sa-metrics-reader-secret
-    namespace: default