diff --git a/chart/templates/frontproxy/configmap.yaml b/chart/templates/frontproxy/configmap.yaml index 315d125..d56e9ef 100644 --- a/chart/templates/frontproxy/configmap.yaml +++ b/chart/templates/frontproxy/configmap.yaml @@ -20,6 +20,7 @@ data: timeout connect {{ .Values.frontproxy.timeouts.connect }} timeout client {{ .Values.frontproxy.timeouts.client }} timeout server {{ .Values.frontproxy.timeouts.server }} + timeout queue {{ .Values.frontproxy.timeouts.queue }} frontend http_in bind *:{{ .Values.frontproxy.containerPort }} @@ -32,7 +33,10 @@ data: # Use the FQDN: HAProxy's resolver does NOT apply /etc/resolv.conf search # domains, so a bare service name NXDOMAINs and leaves 0 backends (503 ). option redispatch - server-template pod 1-{{ int .Values.replicaCount }} {{ .Chart.Name }}-headless.{{ .Release.Namespace }}.svc.cluster.local:{{ .Values.service.port }} check resolvers k8s init-addr none + # maxconn per pod bounds in-flight requests on each backend so uvicorn can't + # accumulate request-body buffers and OOM; excess connections queue (timeout + # queue) and redispatch instead of overrunning a pod. + server-template pod 1-{{ int .Values.replicaCount }} {{ .Chart.Name }}-headless.{{ .Release.Namespace }}.svc.cluster.local:{{ .Values.service.port }} check resolvers k8s init-addr none maxconn {{ int .Values.frontproxy.maxConnPerPod }} resolvers k8s parse-resolv-conf diff --git a/chart/values.yaml b/chart/values.yaml index fabc1d9..42866a6 100644 --- a/chart/values.yaml +++ b/chart/values.yaml @@ -209,6 +209,15 @@ frontproxy: client: "1h" server: "1h" connect: "10s" + # How long a connection waits in the per-pod queue for a free slot before 503. + queue: "30s" + # Max concurrent connections HAProxy sends to EACH backend pod. This is the + # real bound on a pod's memory: uvicorn buffers each in-flight request body off + # the socket before our app's limiter runs, so without a per-pod cap a backup + # flood piles up bodies and OOMs the pod. Excess connections queue here (up to + # timeouts.queue) and redispatch to a less-loaded pod instead of hard-failing. + # ~40 x worst-case part size stays well under the pod memory limit. + maxConnPerPod: 40 resources: requests: cpu: "50m"