Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions cli/testdata/coder_server_--help.golden
Original file line number Diff line number Diff line change
Expand Up @@ -125,12 +125,20 @@ AI BRIDGE OPTIONS:
requests (requires the "oauth2" and "mcp-server-http" experiments to
be enabled).

--aibridge-max-concurrency int, $CODER_AIBRIDGE_MAX_CONCURRENCY (default: 0)
Maximum number of concurrent AI Bridge requests per replica. Set to 0
to disable (unlimited).

--aibridge-openai-base-url string, $CODER_AIBRIDGE_OPENAI_BASE_URL (default: https://api.openai.com/v1/)
The base URL of the OpenAI API.

--aibridge-openai-key string, $CODER_AIBRIDGE_OPENAI_KEY
The key to authenticate against the OpenAI API.

--aibridge-rate-limit int, $CODER_AIBRIDGE_RATE_LIMIT (default: 0)
Maximum number of AI Bridge requests per second per replica. Set to 0
to disable (unlimited).

CLIENT OPTIONS:
These options change the behavior of how clients interact with the Coder.
Clients include the Coder CLI, Coder Desktop, IDE extensions, and the web UI.
Expand Down
8 changes: 8 additions & 0 deletions cli/testdata/server-config.yaml.golden
Original file line number Diff line number Diff line change
Expand Up @@ -748,6 +748,14 @@ aibridge:
# (token, prompt, tool use).
# (default: 60d, type: duration)
retention: 1440h0m0s
# Maximum number of concurrent AI Bridge requests per replica. Set to 0 to disable
# (unlimited).
# (default: 0, type: int)
maxConcurrency: 0
# Maximum number of AI Bridge requests per second per replica. Set to 0 to disable
# (unlimited).
# (default: 0, type: int)
rateLimit: 0
# Configure data retention policies for various database tables. Retention
# policies automatically purge old data to reduce database size and improve
# performance. Setting a retention duration to 0 disables automatic purging for
Expand Down
24 changes: 24 additions & 0 deletions coderd/aibridge/aibridge.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
// Package aibridge provides utilities for the AI Bridge feature.
package aibridge

import (
"net/http"
"strings"
)

// ExtractAuthToken extracts an authorization token from HTTP headers.
// It checks the Authorization header (Bearer token) and X-Api-Key header,
// which represent the different ways clients authenticate against AI providers.
// If neither are present, an empty string is returned.
func ExtractAuthToken(header http.Header) string {
if auth := strings.TrimSpace(header.Get("Authorization")); auth != "" {
fields := strings.Fields(auth)
if len(fields) == 2 && strings.EqualFold(fields[0], "Bearer") {
return fields[1]
}
}
if apiKey := strings.TrimSpace(header.Get("X-Api-Key")); apiKey != "" {
return apiKey
}
return ""
}
6 changes: 6 additions & 0 deletions coderd/apidoc/docs.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 6 additions & 0 deletions coderd/apidoc/swagger.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

71 changes: 71 additions & 0 deletions coderd/httpmw/ratelimit.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,13 @@ import (
"fmt"
"net/http"
"strconv"
"sync/atomic"
"time"

"github.com/go-chi/httprate"
"golang.org/x/xerrors"

"github.com/coder/coder/v2/coderd/aibridge"
"github.com/coder/coder/v2/coderd/database"
"github.com/coder/coder/v2/coderd/httpapi"
"github.com/coder/coder/v2/coderd/rbac"
Expand Down Expand Up @@ -70,3 +72,72 @@ func RateLimit(count int, window time.Duration) func(http.Handler) http.Handler
}),
)
}

// RateLimitByAuthToken returns a handler that limits requests based on the
// authentication token in the request.
//
// This differs from [RateLimit] in several ways:
// - It extracts the token directly from request headers (Authorization Bearer
// or X-Api-Key) rather than from the request context, making it suitable for
// endpoints that handle authentication internally (like AI Bridge) rather than
// via [ExtractAPIKeyMW] middleware.
// - It does not support the bypass header for Owners.
// - It does not key by endpoint, so the limit applies across all endpoints using
// this middleware.
// - It includes a Retry-After header in 429 responses for backpressure signaling.
//
// If no token is found in the headers, it falls back to rate limiting by IP address.
func RateLimitByAuthToken(count int, window time.Duration) func(http.Handler) http.Handler {
if count <= 0 {
return func(handler http.Handler) http.Handler {
return handler
}
}

return httprate.Limit(
count,
window,
httprate.WithKeyFuncs(func(r *http.Request) (string, error) {
// Try to extract auth token for per-user rate limiting using
// AI provider authentication headers (Authorization Bearer or X-Api-Key).
if token := aibridge.ExtractAuthToken(r.Header); token != "" {
return token, nil
}
// Fall back to IP-based rate limiting if no token present.
return httprate.KeyByIP(r)
}),
httprate.WithLimitHandler(func(w http.ResponseWriter, r *http.Request) {
// Add Retry-After header for backpressure signaling.
w.Header().Set("Retry-After", fmt.Sprintf("%d", int(window.Seconds())))
httpapi.Write(r.Context(), w, http.StatusTooManyRequests, codersdk.Response{
Message: "You've been rate limited. Please try again later.",
})
}),
)
}

// ConcurrencyLimit returns a handler that limits the number of concurrent
// requests. When the limit is exceeded, it returns HTTP 503 Service Unavailable.
func ConcurrencyLimit(maxConcurrent int64, resourceName string) func(http.Handler) http.Handler {
if maxConcurrent <= 0 {
return func(handler http.Handler) http.Handler {
return handler
}
}

var current atomic.Int64
return func(next http.Handler) http.Handler {
return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
c := current.Add(1)
defer current.Add(-1)

if c > maxConcurrent {
httpapi.Write(r.Context(), w, http.StatusServiceUnavailable, codersdk.Response{
Message: fmt.Sprintf("%s is currently at capacity. Please try again later.", resourceName),
})
return
}
next.ServeHTTP(w, r)
})
}
}
Loading
Loading