Skip to content

Commit b0e8384

Browse files
authored
perf: reduce DB calls to GetWorkspaceByAgentID via caching workspace info (#20662)
--------- Signed-off-by: Callum Styan <callumstyan@gmail.com>
1 parent 956cbe7 commit b0e8384

File tree

13 files changed

+839
-52
lines changed

13 files changed

+839
-52
lines changed

coderd/agentapi/api.go

Lines changed: 72 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,8 @@ import (
3636
"github.com/coder/quartz"
3737
)
3838

39+
const workspaceCacheRefreshInterval = 5 * time.Minute
40+
3941
// API implements the DRPC agent API interface from agent/proto. This struct is
4042
// instantiated once per agent connection and kept alive for the duration of the
4143
// session.
@@ -54,6 +56,8 @@ type API struct {
5456
*SubAgentAPI
5557
*tailnet.DRPCService
5658

59+
cachedWorkspaceFields *CachedWorkspaceFields
60+
5761
mu sync.Mutex
5862
}
5963

@@ -92,7 +96,7 @@ type Options struct {
9296
UpdateAgentMetricsFn func(ctx context.Context, labels prometheusmetrics.AgentMetricLabels, metrics []*agentproto.Stats_Metric)
9397
}
9498

95-
func New(opts Options) *API {
99+
func New(opts Options, workspace database.Workspace) *API {
96100
if opts.Clock == nil {
97101
opts.Clock = quartz.NewReal()
98102
}
@@ -114,6 +118,13 @@ func New(opts Options) *API {
114118
WorkspaceID: opts.WorkspaceID,
115119
}
116120

121+
// Don't cache details for prebuilds, though the cached fields will eventually be updated
122+
// by the refresh routine once the prebuild workspace is claimed.
123+
api.cachedWorkspaceFields = &CachedWorkspaceFields{}
124+
if !workspace.IsPrebuild() {
125+
api.cachedWorkspaceFields.UpdateValues(workspace)
126+
}
127+
117128
api.AnnouncementBannerAPI = &AnnouncementBannerAPI{
118129
appearanceFetcher: opts.AppearanceFetcher,
119130
}
@@ -139,6 +150,7 @@ func New(opts Options) *API {
139150

140151
api.StatsAPI = &StatsAPI{
141152
AgentFn: api.agent,
153+
Workspace: api.cachedWorkspaceFields,
142154
Database: opts.Database,
143155
Log: opts.Log,
144156
StatsReporter: opts.StatsReporter,
@@ -162,10 +174,11 @@ func New(opts Options) *API {
162174
}
163175

164176
api.MetadataAPI = &MetadataAPI{
165-
AgentFn: api.agent,
166-
Database: opts.Database,
167-
Pubsub: opts.Pubsub,
168-
Log: opts.Log,
177+
AgentFn: api.agent,
178+
Workspace: api.cachedWorkspaceFields,
179+
Database: opts.Database,
180+
Pubsub: opts.Pubsub,
181+
Log: opts.Log,
169182
}
170183

171184
api.LogsAPI = &LogsAPI{
@@ -205,6 +218,10 @@ func New(opts Options) *API {
205218
Database: opts.Database,
206219
}
207220

221+
// Start background cache refresh loop to handle workspace changes
222+
// like prebuild claims where owner_id and other fields may be modified in the DB.
223+
go api.startCacheRefreshLoop(opts.Ctx)
224+
208225
return api
209226
}
210227

@@ -254,6 +271,56 @@ func (a *API) agent(ctx context.Context) (database.WorkspaceAgent, error) {
254271
return agent, nil
255272
}
256273

274+
// refreshCachedWorkspace periodically updates the cached workspace fields.
275+
// This ensures that changes like prebuild claims (which modify owner_id, name, etc.)
276+
// are eventually reflected in the cache without requiring agent reconnection.
277+
func (a *API) refreshCachedWorkspace(ctx context.Context) {
278+
ws, err := a.opts.Database.GetWorkspaceByID(ctx, a.opts.WorkspaceID)
279+
if err != nil {
280+
a.opts.Log.Warn(ctx, "failed to refresh cached workspace fields", slog.Error(err))
281+
a.cachedWorkspaceFields.Clear()
282+
return
283+
}
284+
285+
if ws.IsPrebuild() {
286+
return
287+
}
288+
289+
// If we still have the same values, skip the update and logging calls.
290+
if a.cachedWorkspaceFields.identity.Equal(database.WorkspaceIdentityFromWorkspace(ws)) {
291+
return
292+
}
293+
// Update fields that can change during workspace lifecycle (e.g., AutostartSchedule)
294+
a.cachedWorkspaceFields.UpdateValues(ws)
295+
296+
a.opts.Log.Debug(ctx, "refreshed cached workspace fields",
297+
slog.F("workspace_id", ws.ID),
298+
slog.F("owner_id", ws.OwnerID),
299+
slog.F("name", ws.Name))
300+
}
301+
302+
// startCacheRefreshLoop runs a background goroutine that periodically refreshes
303+
// the cached workspace fields. This is primarily needed to handle prebuild claims
304+
// where the owner_id and other fields change while the agent connection persists.
305+
func (a *API) startCacheRefreshLoop(ctx context.Context) {
306+
// Refresh every 5 minutes. This provides a reasonable balance between:
307+
// - Keeping cache fresh for prebuild claims and other workspace updates
308+
// - Minimizing unnecessary database queries
309+
ticker := a.opts.Clock.TickerFunc(ctx, workspaceCacheRefreshInterval, func() error {
310+
a.refreshCachedWorkspace(ctx)
311+
return nil
312+
}, "cache_refresh")
313+
314+
// We need to wait on the ticker exiting.
315+
_ = ticker.Wait()
316+
317+
a.opts.Log.Debug(ctx, "cache refresh loop exited, invalidating the workspace cache on agent API",
318+
slog.F("workspace_id", a.cachedWorkspaceFields.identity.ID),
319+
slog.F("owner_id", a.cachedWorkspaceFields.identity.OwnerUsername),
320+
slog.F("name", a.cachedWorkspaceFields.identity.Name))
321+
a.cachedWorkspaceFields.Clear()
322+
}
323+
257324
func (a *API) publishWorkspaceUpdate(ctx context.Context, agent *database.WorkspaceAgent, kind wspubsub.WorkspaceEventKind) error {
258325
a.opts.PublishWorkspaceUpdateFn(ctx, a.opts.OwnerID, wspubsub.WorkspaceEvent{
259326
Kind: kind,
Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
package agentapi
2+
3+
import (
4+
"sync"
5+
6+
"github.com/coder/coder/v2/coderd/database"
7+
)
8+
9+
// CachedWorkspaceFields contains workspace data that is safe to cache for the
10+
// duration of an agent connection. These fields are used to reduce database calls
11+
// in high-frequency operations like stats reporting and metadata updates.
12+
// Prebuild workspaces should not be cached using this struct within the API struct,
13+
// however some of these fields for a workspace can be updated live so there is a
14+
// routine in the API for refreshing the workspace on a timed interval.
15+
//
16+
// IMPORTANT: ACL fields (GroupACL, UserACL) are NOT cached because they can be
17+
// modified in the database and we must use fresh data for authorization checks.
18+
type CachedWorkspaceFields struct {
19+
lock sync.RWMutex
20+
21+
identity database.WorkspaceIdentity
22+
}
23+
24+
func (cws *CachedWorkspaceFields) Clear() {
25+
cws.lock.Lock()
26+
defer cws.lock.Unlock()
27+
cws.identity = database.WorkspaceIdentity{}
28+
}
29+
30+
func (cws *CachedWorkspaceFields) UpdateValues(ws database.Workspace) {
31+
cws.lock.Lock()
32+
defer cws.lock.Unlock()
33+
cws.identity.ID = ws.ID
34+
cws.identity.OwnerID = ws.OwnerID
35+
cws.identity.OrganizationID = ws.OrganizationID
36+
cws.identity.TemplateID = ws.TemplateID
37+
cws.identity.Name = ws.Name
38+
cws.identity.OwnerUsername = ws.OwnerUsername
39+
cws.identity.TemplateName = ws.TemplateName
40+
cws.identity.AutostartSchedule = ws.AutostartSchedule
41+
}
42+
43+
// Returns the Workspace, true, unless the workspace has not been cached (nuked or was a prebuild).
44+
func (cws *CachedWorkspaceFields) AsWorkspaceIdentity() (database.WorkspaceIdentity, bool) {
45+
cws.lock.RLock()
46+
defer cws.lock.RUnlock()
47+
// Should we be more explicit about all fields being set to be valid?
48+
if cws.identity.Equal(database.WorkspaceIdentity{}) {
49+
return database.WorkspaceIdentity{}, false
50+
}
51+
return cws.identity, true
52+
}
Lines changed: 97 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,97 @@
1+
package agentapi_test
2+
3+
import (
4+
"testing"
5+
6+
"github.com/google/uuid"
7+
"github.com/stretchr/testify/require"
8+
9+
"github.com/coder/coder/v2/coderd/agentapi"
10+
"github.com/coder/coder/v2/coderd/database"
11+
)
12+
13+
func TestCacheClear(t *testing.T) {
14+
t.Parallel()
15+
16+
var (
17+
user = database.User{
18+
ID: uuid.New(),
19+
Username: "bill",
20+
}
21+
template = database.Template{
22+
ID: uuid.New(),
23+
Name: "tpl",
24+
}
25+
workspace = database.Workspace{
26+
ID: uuid.New(),
27+
OwnerID: user.ID,
28+
OwnerUsername: user.Username,
29+
TemplateID: template.ID,
30+
Name: "xyz",
31+
TemplateName: template.Name,
32+
}
33+
workspaceAsCacheFields = agentapi.CachedWorkspaceFields{}
34+
)
35+
36+
workspaceAsCacheFields.UpdateValues(database.Workspace{
37+
ID: workspace.ID,
38+
OwnerID: workspace.OwnerID,
39+
OwnerUsername: workspace.OwnerUsername,
40+
TemplateID: workspace.TemplateID,
41+
Name: workspace.Name,
42+
TemplateName: workspace.TemplateName,
43+
AutostartSchedule: workspace.AutostartSchedule,
44+
},
45+
)
46+
47+
emptyCws := agentapi.CachedWorkspaceFields{}
48+
workspaceAsCacheFields.Clear()
49+
wsi, ok := workspaceAsCacheFields.AsWorkspaceIdentity()
50+
require.False(t, ok)
51+
ecwsi, ok := emptyCws.AsWorkspaceIdentity()
52+
require.False(t, ok)
53+
require.True(t, ecwsi.Equal(wsi))
54+
}
55+
56+
func TestCacheUpdate(t *testing.T) {
57+
t.Parallel()
58+
59+
var (
60+
user = database.User{
61+
ID: uuid.New(),
62+
Username: "bill",
63+
}
64+
template = database.Template{
65+
ID: uuid.New(),
66+
Name: "tpl",
67+
}
68+
workspace = database.Workspace{
69+
ID: uuid.New(),
70+
OwnerID: user.ID,
71+
OwnerUsername: user.Username,
72+
TemplateID: template.ID,
73+
Name: "xyz",
74+
TemplateName: template.Name,
75+
}
76+
workspaceAsCacheFields = agentapi.CachedWorkspaceFields{}
77+
)
78+
79+
workspaceAsCacheFields.UpdateValues(database.Workspace{
80+
ID: workspace.ID,
81+
OwnerID: workspace.OwnerID,
82+
OwnerUsername: workspace.OwnerUsername,
83+
TemplateID: workspace.TemplateID,
84+
Name: workspace.Name,
85+
TemplateName: workspace.TemplateName,
86+
AutostartSchedule: workspace.AutostartSchedule,
87+
},
88+
)
89+
90+
cws := agentapi.CachedWorkspaceFields{}
91+
cws.UpdateValues(workspace)
92+
wsi, ok := workspaceAsCacheFields.AsWorkspaceIdentity()
93+
require.True(t, ok)
94+
cwsi, ok := cws.AsWorkspaceIdentity()
95+
require.True(t, ok)
96+
require.True(t, wsi.Equal(cwsi))
97+
}

coderd/agentapi/metadata.go

Lines changed: 19 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -12,15 +12,17 @@ import (
1212
"cdr.dev/slog"
1313
agentproto "github.com/coder/coder/v2/agent/proto"
1414
"github.com/coder/coder/v2/coderd/database"
15+
"github.com/coder/coder/v2/coderd/database/dbauthz"
1516
"github.com/coder/coder/v2/coderd/database/dbtime"
1617
"github.com/coder/coder/v2/coderd/database/pubsub"
1718
)
1819

1920
type MetadataAPI struct {
20-
AgentFn func(context.Context) (database.WorkspaceAgent, error)
21-
Database database.Store
22-
Pubsub pubsub.Pubsub
23-
Log slog.Logger
21+
AgentFn func(context.Context) (database.WorkspaceAgent, error)
22+
Workspace *CachedWorkspaceFields
23+
Database database.Store
24+
Pubsub pubsub.Pubsub
25+
Log slog.Logger
2426

2527
TimeNowFn func() time.Time // defaults to dbtime.Now()
2628
}
@@ -107,7 +109,19 @@ func (a *MetadataAPI) BatchUpdateMetadata(ctx context.Context, req *agentproto.B
107109
)
108110
}
109111

110-
err = a.Database.UpdateWorkspaceAgentMetadata(ctx, dbUpdate)
112+
// Inject RBAC object into context for dbauthz fast path, avoid having to
113+
// call GetWorkspaceByAgentID on every metadata update.
114+
rbacCtx := ctx
115+
if dbws, ok := a.Workspace.AsWorkspaceIdentity(); ok {
116+
rbacCtx, err = dbauthz.WithWorkspaceRBAC(ctx, dbws.RBACObject())
117+
if err != nil {
118+
// Don't error level log here, will exit the function. We want to fall back to GetWorkspaceByAgentID.
119+
//nolint:gocritic
120+
a.Log.Debug(ctx, "Cached workspace was present but RBAC object was invalid", slog.F("err", err))
121+
}
122+
}
123+
124+
err = a.Database.UpdateWorkspaceAgentMetadata(rbacCtx, dbUpdate)
111125
if err != nil {
112126
return nil, xerrors.Errorf("update workspace agent metadata in database: %w", err)
113127
}

0 commit comments

Comments
 (0)