coder
diff --git a/‎cli/testdata/coder_server_--help.golden‎
Lines changed: 8 additions & 0 deletions b/‎cli/testdata/coder_server_--help.golden‎
Lines changed: 8 additions & 0 deletions
diff --git a/‎cli/testdata/server-config.yaml.golden‎
Lines changed: 8 additions & 0 deletions b/‎cli/testdata/server-config.yaml.golden‎
Lines changed: 8 additions & 0 deletions
diff --git a/‎coderd/aibridge/aibridge.go‎
Lines changed: 24 additions & 0 deletions b/‎coderd/aibridge/aibridge.go‎
Lines changed: 24 additions & 0 deletions
diff --git a/‎coderd/apidoc/docs.go‎
Lines changed: 6 additions & 0 deletions b/‎coderd/apidoc/docs.go‎
Lines changed: 6 additions & 0 deletions
diff --git a/‎coderd/apidoc/swagger.json‎
Lines changed: 6 additions & 0 deletions b/‎coderd/apidoc/swagger.json‎
Lines changed: 6 additions & 0 deletions
diff --git a/‎coderd/httpmw/ratelimit.go‎
Lines changed: 71 additions & 0 deletions b/‎coderd/httpmw/ratelimit.go‎
Lines changed: 71 additions & 0 deletions
@@ -125,12 +125,20 @@ AI BRIDGE OPTIONS:
           requests (requires the "oauth2" and "mcp-server-http" experiments to
           be enabled).
 
+      --aibridge-max-concurrency int, $CODER_AIBRIDGE_MAX_CONCURRENCY (default: 0)
+          Maximum number of concurrent AI Bridge requests per replica. Set to 0
+          to disable (unlimited).
+
       --aibridge-openai-base-url string, $CODER_AIBRIDGE_OPENAI_BASE_URL (default: https://api.openai.com/v1/)
           The base URL of the OpenAI API.
 
       --aibridge-openai-key string, $CODER_AIBRIDGE_OPENAI_KEY
           The key to authenticate against the OpenAI API.
 
+      --aibridge-rate-limit int, $CODER_AIBRIDGE_RATE_LIMIT (default: 0)
+          Maximum number of AI Bridge requests per second per replica. Set to 0
+          to disable (unlimited).
+
 CLIENT OPTIONS: 
 These options change the behavior of how clients interact with the Coder.
 Clients include the Coder CLI, Coder Desktop, IDE extensions, and the web UI.
 
@@ -748,6 +748,14 @@ aibridge:
   # (token, prompt, tool use).
   # (default: 60d, type: duration)
   retention: 1440h0m0s
+  # Maximum number of concurrent AI Bridge requests per replica. Set to 0 to disable
+  # (unlimited).
+  # (default: 0, type: int)
+  maxConcurrency: 0
+  # Maximum number of AI Bridge requests per second per replica. Set to 0 to disable
+  # (unlimited).
+  # (default: 0, type: int)
+  rateLimit: 0
 # Configure data retention policies for various database tables. Retention
 # policies automatically purge old data to reduce database size and improve
 # performance. Setting a retention duration to 0 disables automatic purging for
 
@@ -0,0 +1,24 @@
+// Package aibridge provides utilities for the AI Bridge feature.
+package aibridge
+
+import (
+	"net/http"
+	"strings"
+)
+
+// ExtractAuthToken extracts an authorization token from HTTP headers.
+// It checks the Authorization header (Bearer token) and X-Api-Key header,
+// which represent the different ways clients authenticate against AI providers.
+// If neither are present, an empty string is returned.
+func ExtractAuthToken(header http.Header) string {
+	if auth := strings.TrimSpace(header.Get("Authorization")); auth != "" {
+		fields := strings.Fields(auth)
+		if len(fields) == 2 && strings.EqualFold(fields[0], "Bearer") {
+			return fields[1]
+		}
+	}
+	if apiKey := strings.TrimSpace(header.Get("X-Api-Key")); apiKey != "" {
+		return apiKey
+	}
+	return ""
+}
@@ -4,11 +4,13 @@ import (
 	"fmt"
 	"net/http"
 	"strconv"
+	"sync/atomic"
 	"time"
 
 	"github.com/go-chi/httprate"
 	"golang.org/x/xerrors"
 
+	"github.com/coder/coder/v2/coderd/aibridge"
 	"github.com/coder/coder/v2/coderd/database"
 	"github.com/coder/coder/v2/coderd/httpapi"
 	"github.com/coder/coder/v2/coderd/rbac"
@@ -70,3 +72,72 @@ func RateLimit(count int, window time.Duration) func(http.Handler) http.Handler
 		}),
 	)
 }
+
+// RateLimitByAuthToken returns a handler that limits requests based on the
+// authentication token in the request.
+//
+// This differs from [RateLimit] in several ways:
+//   - It extracts the token directly from request headers (Authorization Bearer
+//     or X-Api-Key) rather than from the request context, making it suitable for
+//     endpoints that handle authentication internally (like AI Bridge) rather than
+//     via [ExtractAPIKeyMW] middleware.
+//   - It does not support the bypass header for Owners.
+//   - It does not key by endpoint, so the limit applies across all endpoints using
+//     this middleware.
+//   - It includes a Retry-After header in 429 responses for backpressure signaling.
+//
+// If no token is found in the headers, it falls back to rate limiting by IP address.
+func RateLimitByAuthToken(count int, window time.Duration) func(http.Handler) http.Handler {
+	if count <= 0 {
+		return func(handler http.Handler) http.Handler {
+			return handler
+		}
+	}
+
+	return httprate.Limit(
+		count,
+		window,
+		httprate.WithKeyFuncs(func(r *http.Request) (string, error) {
+			// Try to extract auth token for per-user rate limiting using
+			// AI provider authentication headers (Authorization Bearer or X-Api-Key).
+			if token := aibridge.ExtractAuthToken(r.Header); token != "" {
+				return token, nil
+			}
+			// Fall back to IP-based rate limiting if no token present.
+			return httprate.KeyByIP(r)
+		}),
+		httprate.WithLimitHandler(func(w http.ResponseWriter, r *http.Request) {
+			// Add Retry-After header for backpressure signaling.
+			w.Header().Set("Retry-After", fmt.Sprintf("%d", int(window.Seconds())))
+			httpapi.Write(r.Context(), w, http.StatusTooManyRequests, codersdk.Response{
+				Message: "You've been rate limited. Please try again later.",
+			})
+		}),
+	)
+}
+
+// ConcurrencyLimit returns a handler that limits the number of concurrent
+// requests. When the limit is exceeded, it returns HTTP 503 Service Unavailable.
+func ConcurrencyLimit(maxConcurrent int64, resourceName string) func(http.Handler) http.Handler {
+	if maxConcurrent <= 0 {
+		return func(handler http.Handler) http.Handler {
+			return handler
+		}
+	}
+
+	var current atomic.Int64
+	return func(next http.Handler) http.Handler {
+		return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+			c := current.Add(1)
+			defer current.Add(-1)
+
+			if c > maxConcurrent {
+				httpapi.Write(r.Context(), w, http.StatusServiceUnavailable, codersdk.Response{
+					Message: fmt.Sprintf("%s is currently at capacity. Please try again later.", resourceName),
+				})
+				return
+			}
+			next.ServeHTTP(w, r)
+		})
+	}
+}