Skip to content

Commit 0339e55

Browse files
committed
feat(coderd/database/dbpurge): add retention for audit logs
Add configurable retention policy for audit logs. The DeleteOldAuditLogs query excludes deprecated connection events (connect, disconnect, open, close) which are handled separately by DeleteOldAuditLogConnectionEvents. Falls back to global retention if audit logs retention is unset. Disabled (0) by default. Depends on #21021 Updates #20743
1 parent 0fb3b51 commit 0339e55

File tree

8 files changed

+369
-0
lines changed

8 files changed

+369
-0
lines changed

coderd/database/dbauthz/dbauthz.go

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1749,6 +1749,13 @@ func (q *querier) DeleteOldAuditLogConnectionEvents(ctx context.Context, thresho
17491749
return q.db.DeleteOldAuditLogConnectionEvents(ctx, threshold)
17501750
}
17511751

1752+
func (q *querier) DeleteOldAuditLogs(ctx context.Context, arg database.DeleteOldAuditLogsParams) (int64, error) {
1753+
if err := q.authorizeContext(ctx, policy.ActionDelete, rbac.ResourceSystem); err != nil {
1754+
return 0, err
1755+
}
1756+
return q.db.DeleteOldAuditLogs(ctx, arg)
1757+
}
1758+
17521759
func (q *querier) DeleteOldConnectionLogs(ctx context.Context, arg database.DeleteOldConnectionLogsParams) (int64, error) {
17531760
if err := q.authorizeContext(ctx, policy.ActionDelete, rbac.ResourceSystem); err != nil {
17541761
return 0, err

coderd/database/dbmetrics/querymetrics.go

Lines changed: 7 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

coderd/database/dbmock/dbmock.go

Lines changed: 15 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

coderd/database/dbpurge/dbpurge.go

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,8 @@ const (
2828
// Batch size for connection log deletion. Smaller batches prevent long-held
2929
// locks that could impact concurrent database operations.
3030
connectionLogsBatchSize = 1000
31+
// Batch size for audit log deletion.
32+
auditLogsBatchSize = 1000
3133
// Telemetry heartbeats are used to deduplicate events across replicas. We
3234
// don't need to persist heartbeat rows for longer than 24 hours, as they
3335
// are only used for deduplication across replicas. The time needs to be
@@ -130,10 +132,27 @@ func New(ctx context.Context, logger slog.Logger, db database.Store, vals *coder
130132
}
131133
}
132134

135+
var purgedAuditLogs int64
136+
auditLogsRetention := vals.Retention.AuditLogs.Value()
137+
if auditLogsRetention == 0 {
138+
auditLogsRetention = vals.Retention.Global.Value()
139+
}
140+
if auditLogsRetention > 0 {
141+
deleteAuditLogsBefore := start.Add(-auditLogsRetention)
142+
purgedAuditLogs, err = tx.DeleteOldAuditLogs(ctx, database.DeleteOldAuditLogsParams{
143+
BeforeTime: deleteAuditLogsBefore,
144+
LimitCount: auditLogsBatchSize,
145+
})
146+
if err != nil {
147+
return xerrors.Errorf("failed to delete old audit logs: %w", err)
148+
}
149+
}
150+
133151
logger.Debug(ctx, "purged old database entries",
134152
slog.F("expired_api_keys", expiredAPIKeys),
135153
slog.F("aibridge_records", purgedAIBridgeRecords),
136154
slog.F("connection_logs", purgedConnectionLogs),
155+
slog.F("audit_logs", purgedAuditLogs),
137156
slog.F("duration", clk.Since(start)),
138157
)
139158

coderd/database/dbpurge/dbpurge_test.go

Lines changed: 262 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1144,3 +1144,265 @@ func TestDeleteOldAIBridgeRecords(t *testing.T) {
11441144
require.NoError(t, err)
11451145
require.Len(t, newToolUsages, 1, "near threshold tool usages should not be deleted")
11461146
}
1147+
1148+
func TestDeleteOldAuditLogs(t *testing.T) {
1149+
t.Parallel()
1150+
1151+
t.Run("RetentionEnabled", func(t *testing.T) {
1152+
t.Parallel()
1153+
1154+
ctx := testutil.Context(t, testutil.WaitShort)
1155+
1156+
clk := quartz.NewMock(t)
1157+
now := time.Date(2025, 1, 15, 7, 30, 0, 0, time.UTC)
1158+
retentionPeriod := 30 * 24 * time.Hour // 30 days
1159+
afterThreshold := now.Add(-retentionPeriod).Add(-24 * time.Hour) // 31 days ago (older than threshold)
1160+
beforeThreshold := now.Add(-15 * 24 * time.Hour) // 15 days ago (newer than threshold)
1161+
clk.Set(now).MustWait(ctx)
1162+
1163+
db, _ := dbtestutil.NewDB(t, dbtestutil.WithDumpOnFailure())
1164+
logger := slogtest.Make(t, &slogtest.Options{IgnoreErrors: true})
1165+
user := dbgen.User(t, db, database.User{})
1166+
org := dbgen.Organization(t, db, database.Organization{})
1167+
1168+
// Create old audit log (should be deleted)
1169+
oldLog := dbgen.AuditLog(t, db, database.AuditLog{
1170+
UserID: user.ID,
1171+
OrganizationID: org.ID,
1172+
Time: afterThreshold,
1173+
Action: database.AuditActionCreate,
1174+
ResourceType: database.ResourceTypeWorkspace,
1175+
})
1176+
1177+
// Create recent audit log (should be kept)
1178+
recentLog := dbgen.AuditLog(t, db, database.AuditLog{
1179+
UserID: user.ID,
1180+
OrganizationID: org.ID,
1181+
Time: beforeThreshold,
1182+
Action: database.AuditActionCreate,
1183+
ResourceType: database.ResourceTypeWorkspace,
1184+
})
1185+
1186+
// Run the purge with configured retention period
1187+
done := awaitDoTick(ctx, t, clk)
1188+
closer := dbpurge.New(ctx, logger, db, &codersdk.DeploymentValues{
1189+
Retention: codersdk.RetentionConfig{
1190+
AuditLogs: serpent.Duration(retentionPeriod),
1191+
},
1192+
}, clk)
1193+
defer closer.Close()
1194+
testutil.TryReceive(ctx, t, done)
1195+
1196+
// Verify results by querying all audit logs
1197+
logs, err := db.GetAuditLogsOffset(ctx, database.GetAuditLogsOffsetParams{
1198+
LimitOpt: 100,
1199+
})
1200+
require.NoError(t, err)
1201+
1202+
logIDs := make([]uuid.UUID, len(logs))
1203+
for i, log := range logs {
1204+
logIDs[i] = log.AuditLog.ID
1205+
}
1206+
1207+
require.NotContains(t, logIDs, oldLog.ID, "old audit log should be deleted")
1208+
require.Contains(t, logIDs, recentLog.ID, "recent audit log should be kept")
1209+
})
1210+
1211+
t.Run("RetentionDisabled", func(t *testing.T) {
1212+
t.Parallel()
1213+
1214+
ctx := testutil.Context(t, testutil.WaitShort)
1215+
1216+
clk := quartz.NewMock(t)
1217+
now := time.Date(2025, 1, 15, 7, 30, 0, 0, time.UTC)
1218+
oldTime := now.Add(-365 * 24 * time.Hour) // 1 year ago
1219+
clk.Set(now).MustWait(ctx)
1220+
1221+
db, _ := dbtestutil.NewDB(t, dbtestutil.WithDumpOnFailure())
1222+
logger := slogtest.Make(t, &slogtest.Options{IgnoreErrors: true})
1223+
user := dbgen.User(t, db, database.User{})
1224+
org := dbgen.Organization(t, db, database.Organization{})
1225+
1226+
// Create old audit log (should NOT be deleted when retention is 0)
1227+
oldLog := dbgen.AuditLog(t, db, database.AuditLog{
1228+
UserID: user.ID,
1229+
OrganizationID: org.ID,
1230+
Time: oldTime,
1231+
Action: database.AuditActionCreate,
1232+
ResourceType: database.ResourceTypeWorkspace,
1233+
})
1234+
1235+
// Run the purge with retention disabled (0)
1236+
done := awaitDoTick(ctx, t, clk)
1237+
closer := dbpurge.New(ctx, logger, db, &codersdk.DeploymentValues{
1238+
Retention: codersdk.RetentionConfig{
1239+
AuditLogs: serpent.Duration(0), // disabled
1240+
},
1241+
}, clk)
1242+
defer closer.Close()
1243+
testutil.TryReceive(ctx, t, done)
1244+
1245+
// Verify old log is still present
1246+
logs, err := db.GetAuditLogsOffset(ctx, database.GetAuditLogsOffsetParams{
1247+
LimitOpt: 100,
1248+
})
1249+
require.NoError(t, err)
1250+
1251+
logIDs := make([]uuid.UUID, len(logs))
1252+
for i, log := range logs {
1253+
logIDs[i] = log.AuditLog.ID
1254+
}
1255+
1256+
require.Contains(t, logIDs, oldLog.ID, "old audit log should NOT be deleted when retention is disabled")
1257+
})
1258+
1259+
t.Run("GlobalRetentionFallback", func(t *testing.T) {
1260+
t.Parallel()
1261+
1262+
ctx := testutil.Context(t, testutil.WaitShort)
1263+
1264+
clk := quartz.NewMock(t)
1265+
now := time.Date(2025, 1, 15, 7, 30, 0, 0, time.UTC)
1266+
retentionPeriod := 30 * 24 * time.Hour // 30 days
1267+
afterThreshold := now.Add(-retentionPeriod).Add(-24 * time.Hour) // 31 days ago (older than threshold)
1268+
beforeThreshold := now.Add(-15 * 24 * time.Hour) // 15 days ago (newer than threshold)
1269+
clk.Set(now).MustWait(ctx)
1270+
1271+
db, _ := dbtestutil.NewDB(t, dbtestutil.WithDumpOnFailure())
1272+
logger := slogtest.Make(t, &slogtest.Options{IgnoreErrors: true})
1273+
user := dbgen.User(t, db, database.User{})
1274+
org := dbgen.Organization(t, db, database.Organization{})
1275+
1276+
// Create old audit log (should be deleted)
1277+
oldLog := dbgen.AuditLog(t, db, database.AuditLog{
1278+
UserID: user.ID,
1279+
OrganizationID: org.ID,
1280+
Time: afterThreshold,
1281+
Action: database.AuditActionCreate,
1282+
ResourceType: database.ResourceTypeWorkspace,
1283+
})
1284+
1285+
// Create recent audit log (should be kept)
1286+
recentLog := dbgen.AuditLog(t, db, database.AuditLog{
1287+
UserID: user.ID,
1288+
OrganizationID: org.ID,
1289+
Time: beforeThreshold,
1290+
Action: database.AuditActionCreate,
1291+
ResourceType: database.ResourceTypeWorkspace,
1292+
})
1293+
1294+
// Run the purge with global retention (audit logs retention is 0, so it falls back)
1295+
done := awaitDoTick(ctx, t, clk)
1296+
closer := dbpurge.New(ctx, logger, db, &codersdk.DeploymentValues{
1297+
Retention: codersdk.RetentionConfig{
1298+
Global: serpent.Duration(retentionPeriod), // Use global
1299+
AuditLogs: serpent.Duration(0), // Not set, should fall back to global
1300+
},
1301+
}, clk)
1302+
defer closer.Close()
1303+
testutil.TryReceive(ctx, t, done)
1304+
1305+
// Verify results
1306+
logs, err := db.GetAuditLogsOffset(ctx, database.GetAuditLogsOffsetParams{
1307+
LimitOpt: 100,
1308+
})
1309+
require.NoError(t, err)
1310+
1311+
logIDs := make([]uuid.UUID, len(logs))
1312+
for i, log := range logs {
1313+
logIDs[i] = log.AuditLog.ID
1314+
}
1315+
1316+
require.NotContains(t, logIDs, oldLog.ID, "old audit log should be deleted via global retention")
1317+
require.Contains(t, logIDs, recentLog.ID, "recent audit log should be kept")
1318+
})
1319+
1320+
t.Run("ConnectionEventsNotDeleted", func(t *testing.T) {
1321+
t.Parallel()
1322+
1323+
ctx := testutil.Context(t, testutil.WaitShort)
1324+
1325+
clk := quartz.NewMock(t)
1326+
now := time.Date(2025, 1, 15, 7, 30, 0, 0, time.UTC)
1327+
retentionPeriod := 30 * 24 * time.Hour // 30 days
1328+
afterThreshold := now.Add(-retentionPeriod).Add(-24 * time.Hour) // 31 days ago (older than threshold)
1329+
clk.Set(now).MustWait(ctx)
1330+
1331+
db, _ := dbtestutil.NewDB(t, dbtestutil.WithDumpOnFailure())
1332+
logger := slogtest.Make(t, &slogtest.Options{IgnoreErrors: true})
1333+
user := dbgen.User(t, db, database.User{})
1334+
org := dbgen.Organization(t, db, database.Organization{})
1335+
1336+
// Create old connection events (should NOT be deleted by audit logs retention)
1337+
oldConnectLog := dbgen.AuditLog(t, db, database.AuditLog{
1338+
UserID: user.ID,
1339+
OrganizationID: org.ID,
1340+
Time: afterThreshold,
1341+
Action: database.AuditActionConnect,
1342+
ResourceType: database.ResourceTypeWorkspace,
1343+
})
1344+
1345+
oldDisconnectLog := dbgen.AuditLog(t, db, database.AuditLog{
1346+
UserID: user.ID,
1347+
OrganizationID: org.ID,
1348+
Time: afterThreshold,
1349+
Action: database.AuditActionDisconnect,
1350+
ResourceType: database.ResourceTypeWorkspace,
1351+
})
1352+
1353+
oldOpenLog := dbgen.AuditLog(t, db, database.AuditLog{
1354+
UserID: user.ID,
1355+
OrganizationID: org.ID,
1356+
Time: afterThreshold,
1357+
Action: database.AuditActionOpen,
1358+
ResourceType: database.ResourceTypeWorkspace,
1359+
})
1360+
1361+
oldCloseLog := dbgen.AuditLog(t, db, database.AuditLog{
1362+
UserID: user.ID,
1363+
OrganizationID: org.ID,
1364+
Time: afterThreshold,
1365+
Action: database.AuditActionClose,
1366+
ResourceType: database.ResourceTypeWorkspace,
1367+
})
1368+
1369+
// Create old non-connection audit log (should be deleted)
1370+
oldCreateLog := dbgen.AuditLog(t, db, database.AuditLog{
1371+
UserID: user.ID,
1372+
OrganizationID: org.ID,
1373+
Time: afterThreshold,
1374+
Action: database.AuditActionCreate,
1375+
ResourceType: database.ResourceTypeWorkspace,
1376+
})
1377+
1378+
// Run the purge with audit logs retention enabled
1379+
done := awaitDoTick(ctx, t, clk)
1380+
closer := dbpurge.New(ctx, logger, db, &codersdk.DeploymentValues{
1381+
Retention: codersdk.RetentionConfig{
1382+
AuditLogs: serpent.Duration(retentionPeriod),
1383+
},
1384+
}, clk)
1385+
defer closer.Close()
1386+
testutil.TryReceive(ctx, t, done)
1387+
1388+
// Verify results
1389+
logs, err := db.GetAuditLogsOffset(ctx, database.GetAuditLogsOffsetParams{
1390+
LimitOpt: 100,
1391+
})
1392+
require.NoError(t, err)
1393+
1394+
logIDs := make([]uuid.UUID, len(logs))
1395+
for i, log := range logs {
1396+
logIDs[i] = log.AuditLog.ID
1397+
}
1398+
1399+
// Connection events should NOT be deleted by audit logs retention
1400+
require.Contains(t, logIDs, oldConnectLog.ID, "old connect log should NOT be deleted by audit logs retention")
1401+
require.Contains(t, logIDs, oldDisconnectLog.ID, "old disconnect log should NOT be deleted by audit logs retention")
1402+
require.Contains(t, logIDs, oldOpenLog.ID, "old open log should NOT be deleted by audit logs retention")
1403+
require.Contains(t, logIDs, oldCloseLog.ID, "old close log should NOT be deleted by audit logs retention")
1404+
1405+
// Non-connection event should be deleted
1406+
require.NotContains(t, logIDs, oldCreateLog.ID, "old create log should be deleted by audit logs retention")
1407+
})
1408+
}

coderd/database/querier.go

Lines changed: 4 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)