Skip to content

Commit 80049c4

Browse files
committed
feat(coderd): add workspace_agent_logs retention configuration
This commit adds a configurable retention policy for workspace agent logs, allowing administrators to control how long agent logs are retained before being purged. Key changes: - Add WorkspaceAgentLogs field to RetentionConfig struct - Add --workspace-agent-logs-retention CLI flag (default: 7d) - Add CODER_WORKSPACE_AGENT_LOGS_RETENTION environment variable - Update dbpurge to use configurable retention (falls back to 7d default) - Add tests for custom retention periods - Update data-retention.md documentation The retention behavior preserves logs from the latest workspace build regardless of age, ensuring administrators can always debug active workspaces. Only logs from non-latest builds older than the retention period are purged. Follows the pattern established in the data retention PR stack.
1 parent 7e8a005 commit 80049c4

File tree

10 files changed

+2227
-2010
lines changed

10 files changed

+2227
-2010
lines changed

cli/testdata/coder_server_--help.golden

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -725,6 +725,11 @@ that data type.
725725
disable (data is kept indefinitely unless individual settings are
726726
configured).
727727

728+
--workspace-agent-logs-retention duration, $CODER_WORKSPACE_AGENT_LOGS_RETENTION (default: 7d)
729+
How long workspace agent logs are retained. Logs from non-latest
730+
workspace builds are deleted after this period to free up storage
731+
space. Set to 0 to disable automatic deletion of workspace agent logs.
732+
728733
TELEMETRY OPTIONS:
729734
Telemetry is critical to our ability to improve Coder. We strip all personal
730735
information before sending data to our servers. Please only disable telemetry

cli/testdata/server-config.yaml.golden

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -780,3 +780,8 @@ retention:
780780
# an expired key. Set to 0 to disable automatic deletion of expired keys.
781781
# (default: 7d, type: duration)
782782
api_keys: 168h0m0s
783+
# How long workspace agent logs are retained. Logs from non-latest workspace
784+
# builds are deleted after this period to free up storage space. Set to 0 to
785+
# disable automatic deletion of workspace agent logs.
786+
# (default: 7d, type: duration)
787+
workspace_agent_logs: 168h0m0s

coderd/database/dbpurge/dbpurge.go

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,7 @@ import (
1818
)
1919

2020
const (
21-
delay = 10 * time.Minute
22-
maxAgentLogAge = 7 * 24 * time.Hour
21+
delay = 10 * time.Minute
2322
// Connection events are now inserted into the `connection_logs` table.
2423
// We'll slowly remove old connection events from the `audit_logs` table,
2524
// but we won't touch the `connection_logs` table.
@@ -36,6 +35,8 @@ const (
3635
// long enough to cover the maximum interval of a heartbeat event (currently
3736
// 1 hour) plus some buffer.
3837
maxTelemetryHeartbeatAge = 24 * time.Hour
38+
// Default retention period for workspace agent logs.
39+
defaultWorkspaceAgentLogsRetention = 7 * 24 * time.Hour
3940
)
4041

4142
// New creates a new periodically purging database instance.
@@ -67,7 +68,11 @@ func New(ctx context.Context, logger slog.Logger, db database.Store, vals *coder
6768
return nil
6869
}
6970

70-
deleteOldWorkspaceAgentLogsBefore := start.Add(-maxAgentLogAge)
71+
workspaceAgentLogsRetention := vals.Retention.WorkspaceAgentLogs.Value()
72+
if workspaceAgentLogsRetention == 0 {
73+
workspaceAgentLogsRetention = defaultWorkspaceAgentLogsRetention
74+
}
75+
deleteOldWorkspaceAgentLogsBefore := start.Add(-workspaceAgentLogsRetention)
7176
if err := tx.DeleteOldWorkspaceAgentLogs(ctx, deleteOldWorkspaceAgentLogsBefore); err != nil {
7277
return xerrors.Errorf("failed to delete old workspace agent logs: %w", err)
7378
}

coderd/database/dbpurge/dbpurge_test.go

Lines changed: 97 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -392,6 +392,103 @@ func mustCreateAgentLogs(ctx context.Context, t *testing.T, db database.Store, a
392392
require.NotEmpty(t, agentLogs, "agent logs must be present")
393393
}
394394

395+
//nolint:paralleltest // It uses LockIDDBPurge.
396+
func TestDeleteOldWorkspaceAgentLogsWithCustomRetention(t *testing.T) {
397+
t.Run("CustomRetention30Days", func(t *testing.T) {
398+
ctx := testutil.Context(t, testutil.WaitShort)
399+
clk := quartz.NewMock(t)
400+
now := dbtime.Now()
401+
retentionPeriod := 30 * 24 * time.Hour
402+
threshold := now.Add(-retentionPeriod)
403+
beforeThreshold := threshold.Add(-24 * time.Hour) // 31 days ago
404+
afterThreshold := threshold.Add(24 * time.Hour) // 29 days ago
405+
clk.Set(now).MustWait(ctx)
406+
407+
db, _ := dbtestutil.NewDB(t, dbtestutil.WithDumpOnFailure())
408+
org := dbgen.Organization(t, db, database.Organization{})
409+
user := dbgen.User(t, db, database.User{})
410+
_ = dbgen.OrganizationMember(t, db, database.OrganizationMember{UserID: user.ID, OrganizationID: org.ID})
411+
tv := dbgen.TemplateVersion(t, db, database.TemplateVersion{OrganizationID: org.ID, CreatedBy: user.ID})
412+
tmpl := dbgen.Template(t, db, database.Template{OrganizationID: org.ID, ActiveVersionID: tv.ID, CreatedBy: user.ID})
413+
414+
logger := slogtest.Make(t, &slogtest.Options{IgnoreErrors: true})
415+
416+
// Workspace with two builds, both before the 30-day threshold.
417+
ws := dbgen.Workspace(t, db, database.WorkspaceTable{Name: "test-ws", OwnerID: user.ID, OrganizationID: org.ID, TemplateID: tmpl.ID})
418+
wb1 := mustCreateWorkspaceBuild(t, db, org, tv, ws.ID, beforeThreshold, 1)
419+
wb2 := mustCreateWorkspaceBuild(t, db, org, tv, ws.ID, beforeThreshold, 2)
420+
agent1 := mustCreateAgent(t, db, wb1)
421+
agent2 := mustCreateAgent(t, db, wb2)
422+
mustCreateAgentLogs(ctx, t, db, agent1, &beforeThreshold, "agent 1 logs should be deleted")
423+
mustCreateAgentLogs(ctx, t, db, agent2, &beforeThreshold, "agent 2 logs should be retained")
424+
425+
// Workspace with build after the 30-day threshold.
426+
wsRecent := dbgen.Workspace(t, db, database.WorkspaceTable{Name: "recent-ws", OwnerID: user.ID, OrganizationID: org.ID, TemplateID: tmpl.ID})
427+
wbRecent := mustCreateWorkspaceBuild(t, db, org, tv, wsRecent.ID, afterThreshold, 1)
428+
agentRecent := mustCreateAgent(t, db, wbRecent)
429+
mustCreateAgentLogs(ctx, t, db, agentRecent, &afterThreshold, "recent agent logs should be retained")
430+
431+
done := awaitDoTick(ctx, t, clk)
432+
closer := dbpurge.New(ctx, logger, db, &codersdk.DeploymentValues{
433+
Retention: codersdk.RetentionConfig{
434+
WorkspaceAgentLogs: serpent.Duration(retentionPeriod),
435+
},
436+
}, clk)
437+
defer closer.Close()
438+
testutil.TryReceive(ctx, t, done)
439+
440+
// Agent 1 logs should be deleted (non-latest build, older than 30 days).
441+
assertNoWorkspaceAgentLogs(ctx, t, db, agent1.ID)
442+
// Agent 2 logs should be retained (latest build).
443+
assertWorkspaceAgentLogs(ctx, t, db, agent2.ID, "agent 2 logs should be retained")
444+
// Recent agent logs should be retained (within 30-day threshold).
445+
assertWorkspaceAgentLogs(ctx, t, db, agentRecent.ID, "recent agent logs should be retained")
446+
})
447+
448+
t.Run("RetentionDisabled", func(t *testing.T) {
449+
ctx := testutil.Context(t, testutil.WaitShort)
450+
clk := quartz.NewMock(t)
451+
now := dbtime.Now()
452+
// Very old logs (60 days ago).
453+
veryOld := now.Add(-60 * 24 * time.Hour)
454+
clk.Set(now).MustWait(ctx)
455+
456+
db, _ := dbtestutil.NewDB(t, dbtestutil.WithDumpOnFailure())
457+
org := dbgen.Organization(t, db, database.Organization{})
458+
user := dbgen.User(t, db, database.User{})
459+
_ = dbgen.OrganizationMember(t, db, database.OrganizationMember{UserID: user.ID, OrganizationID: org.ID})
460+
tv := dbgen.TemplateVersion(t, db, database.TemplateVersion{OrganizationID: org.ID, CreatedBy: user.ID})
461+
tmpl := dbgen.Template(t, db, database.Template{OrganizationID: org.ID, ActiveVersionID: tv.ID, CreatedBy: user.ID})
462+
463+
logger := slogtest.Make(t, &slogtest.Options{IgnoreErrors: true})
464+
465+
// Workspace with old builds.
466+
ws := dbgen.Workspace(t, db, database.WorkspaceTable{Name: "test-ws", OwnerID: user.ID, OrganizationID: org.ID, TemplateID: tmpl.ID})
467+
wb1 := mustCreateWorkspaceBuild(t, db, org, tv, ws.ID, veryOld, 1)
468+
wb2 := mustCreateWorkspaceBuild(t, db, org, tv, ws.ID, veryOld, 2)
469+
agent1 := mustCreateAgent(t, db, wb1)
470+
agent2 := mustCreateAgent(t, db, wb2)
471+
mustCreateAgentLogs(ctx, t, db, agent1, &veryOld, "agent 1 logs should be deleted with default retention")
472+
mustCreateAgentLogs(ctx, t, db, agent2, &veryOld, "agent 2 logs should be retained")
473+
474+
// Note: When retention is set to 0, we fall back to the default 7-day retention.
475+
// The logs are 60 days old, so non-latest build logs will still be deleted.
476+
done := awaitDoTick(ctx, t, clk)
477+
closer := dbpurge.New(ctx, logger, db, &codersdk.DeploymentValues{
478+
Retention: codersdk.RetentionConfig{
479+
WorkspaceAgentLogs: serpent.Duration(0), // Falls back to default 7 days
480+
},
481+
}, clk)
482+
defer closer.Close()
483+
testutil.TryReceive(ctx, t, done)
484+
485+
// Agent 1 logs should be deleted (non-latest build, older than default 7 days).
486+
assertNoWorkspaceAgentLogs(ctx, t, db, agent1.ID)
487+
// Agent 2 logs should be retained (latest build).
488+
assertWorkspaceAgentLogs(ctx, t, db, agent2.ID, "agent 2 logs should be retained")
489+
})
490+
}
491+
395492
//nolint:paralleltest // It uses LockIDDBPurge.
396493
func TestDeleteOldProvisionerDaemons(t *testing.T) {
397494
// TODO: must refactor DeleteOldProvisionerDaemons to allow passing in cutoff

codersdk/deployment.go

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -834,6 +834,9 @@ type RetentionConfig struct {
834834
// Keys are only deleted if they have been expired for at least this duration.
835835
// Defaults to 7 days to preserve existing behavior.
836836
APIKeys serpent.Duration `json:"api_keys" typescript:",notnull"`
837+
// WorkspaceAgentLogs controls how long workspace agent logs are retained.
838+
// Defaults to 7 days to preserve existing behavior.
839+
WorkspaceAgentLogs serpent.Duration `json:"workspace_agent_logs" typescript:",notnull"`
837840
}
838841

839842
type NotificationsConfig struct {
@@ -3436,6 +3439,17 @@ Write out the current server config as YAML to stdout.`,
34363439
YAML: "api_keys",
34373440
Annotations: serpent.Annotations{}.Mark(annotationFormatDuration, "true"),
34383441
},
3442+
{
3443+
Name: "Workspace Agent Logs Retention",
3444+
Description: "How long workspace agent logs are retained. Logs from non-latest workspace builds are deleted after this period to free up storage space. Set to 0 to disable automatic deletion of workspace agent logs.",
3445+
Flag: "workspace-agent-logs-retention",
3446+
Env: "CODER_WORKSPACE_AGENT_LOGS_RETENTION",
3447+
Value: &c.Retention.WorkspaceAgentLogs,
3448+
Default: "7d",
3449+
Group: &deploymentGroupRetention,
3450+
YAML: "workspace_agent_logs",
3451+
Annotations: serpent.Annotations{}.Mark(annotationFormatDuration, "true"),
3452+
},
34393453
{
34403454
Name: "Enable Authorization Recordings",
34413455
Description: "All api requests will have a header including all authorization calls made during the request. " +

docs/admin/setup/data-retention.md

Lines changed: 22 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
# Data Retention
22

33
Coder supports configurable retention policies that automatically purge old
4-
Audit Logs, Connection Logs, and API keys. These policies help manage database
5-
growth by removing records older than a specified duration.
4+
Audit Logs, Connection Logs, Workspace Agent Logs, and API keys. These policies
5+
help manage database growth by removing records older than a specified duration.
66

77
## Overview
88

@@ -25,12 +25,13 @@ a YAML configuration file.
2525

2626
### Settings
2727

28-
| Setting | CLI Flag | Environment Variable | Default | Description |
29-
|-----------------|-------------------------------|-----------------------------------|------------------|--------------------------------------------------------------------------|
30-
| Global | `--global-retention` | `CODER_GLOBAL_RETENTION` | `0` (disabled) | Default retention for all data types. Individual settings override this. |
31-
| Audit Logs | `--audit-logs-retention` | `CODER_AUDIT_LOGS_RETENTION` | `0` (use global) | How long to retain Audit Log entries. |
32-
| Connection Logs | `--connection-logs-retention` | `CODER_CONNECTION_LOGS_RETENTION` | `0` (use global) | How long to retain Connection Log entries. |
33-
| API Keys | `--api-keys-retention` | `CODER_API_KEYS_RETENTION` | `7d` | How long to retain expired API keys. |
28+
| Setting | CLI Flag | Environment Variable | Default | Description |
29+
|----------------------|------------------------------------|----------------------------------------|------------------|--------------------------------------------------------------------------|
30+
| Global | `--global-retention` | `CODER_GLOBAL_RETENTION` | `0` (disabled) | Default retention for all data types. Individual settings override this. |
31+
| Audit Logs | `--audit-logs-retention` | `CODER_AUDIT_LOGS_RETENTION` | `0` (use global) | How long to retain Audit Log entries. |
32+
| Connection Logs | `--connection-logs-retention` | `CODER_CONNECTION_LOGS_RETENTION` | `0` (use global) | How long to retain Connection Log entries. |
33+
| API Keys | `--api-keys-retention` | `CODER_API_KEYS_RETENTION` | `7d` | How long to retain expired API keys. |
34+
| Workspace Agent Logs | `--workspace-agent-logs-retention` | `CODER_WORKSPACE_AGENT_LOGS_RETENTION` | `7d` | How long to retain workspace agent logs. |
3435

3536
### Duration Format
3637

@@ -68,6 +69,7 @@ retention:
6869
audit_logs: 365d
6970
connection_logs: 0s
7071
api_keys: 7d
72+
workspace_agent_logs: 7d
7173
```
7274
7375
## How Retention Works
@@ -103,6 +105,16 @@ ago. Active keys are never deleted by the retention policy.
103105
Keeping expired keys for a short period allows Coder to return a more helpful
104106
error message when users attempt to use an expired key.
105107

108+
### Workspace Agent Logs Behavior
109+
110+
Workspace agent logs are retained based on the retention period, but **logs from
111+
the latest build of each workspace are always retained** regardless of age. This
112+
ensures you can always debug issues with active workspaces.
113+
114+
Only logs from non-latest workspace builds that are older than the retention
115+
period are deleted. Setting `--workspace-agent-logs-retention=7d` keeps all logs
116+
from the latest build plus logs from previous builds for up to 7 days.
117+
106118
## Best Practices
107119

108120
### Recommended Starting Configuration
@@ -115,6 +127,7 @@ retention:
115127
audit_logs: 365d
116128
connection_logs: 0s # Use global
117129
api_keys: 7d
130+
workspace_agent_logs: 7d
118131
```
119132

120133
### Compliance Considerations
@@ -160,6 +173,7 @@ retention:
160173
audit_logs: 0s
161174
connection_logs: 0s
162175
api_keys: 0s
176+
workspace_agent_logs: 0s
163177
```
164178

165179
There is no way to disable retention for a specific data type while global

0 commit comments

Comments
 (0)