Skip to content

Commit eea3b10

Browse files
committed
feat(coderd/database/dbpurge): add retention for audit logs
Add configurable retention policy for audit logs. The DeleteOldAuditLogs query excludes deprecated connection events (connect, disconnect, open, close) which are handled separately by DeleteOldAuditLogConnectionEvents. Falls back to global retention if audit logs retention is unset. Disabled (0) by default. Depends on #21021 Updates #20743
1 parent 782f1f7 commit eea3b10

File tree

8 files changed

+369
-0
lines changed

8 files changed

+369
-0
lines changed

coderd/database/dbauthz/dbauthz.go

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1749,6 +1749,13 @@ func (q *querier) DeleteOldAuditLogConnectionEvents(ctx context.Context, thresho
17491749
return q.db.DeleteOldAuditLogConnectionEvents(ctx, threshold)
17501750
}
17511751

1752+
func (q *querier) DeleteOldAuditLogs(ctx context.Context, arg database.DeleteOldAuditLogsParams) (int64, error) {
1753+
if err := q.authorizeContext(ctx, policy.ActionDelete, rbac.ResourceSystem); err != nil {
1754+
return 0, err
1755+
}
1756+
return q.db.DeleteOldAuditLogs(ctx, arg)
1757+
}
1758+
17521759
func (q *querier) DeleteOldConnectionLogs(ctx context.Context, arg database.DeleteOldConnectionLogsParams) (int64, error) {
17531760
if err := q.authorizeContext(ctx, policy.ActionDelete, rbac.ResourceSystem); err != nil {
17541761
return 0, err

coderd/database/dbmetrics/querymetrics.go

Lines changed: 7 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

coderd/database/dbmock/dbmock.go

Lines changed: 15 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

coderd/database/dbpurge/dbpurge.go

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,8 @@ const (
2828
// Batch size for connection log deletion. Smaller batches prevent long-held
2929
// locks that could impact concurrent database operations.
3030
connectionLogsBatchSize = 1000
31+
// Batch size for audit log deletion.
32+
auditLogsBatchSize = 1000
3133
// Telemetry heartbeats are used to deduplicate events across replicas. We
3234
// don't need to persist heartbeat rows for longer than 24 hours, as they
3335
// are only used for deduplication across replicas. The time needs to be
@@ -130,10 +132,27 @@ func New(ctx context.Context, logger slog.Logger, db database.Store, vals *coder
130132
}
131133
}
132134

135+
var purgedAuditLogs int64
136+
auditLogsRetention := vals.Retention.AuditLogs.Value()
137+
if auditLogsRetention == 0 {
138+
auditLogsRetention = vals.Retention.Global.Value()
139+
}
140+
if auditLogsRetention > 0 {
141+
deleteAuditLogsBefore := start.Add(-auditLogsRetention)
142+
purgedAuditLogs, err = tx.DeleteOldAuditLogs(ctx, database.DeleteOldAuditLogsParams{
143+
BeforeTime: deleteAuditLogsBefore,
144+
LimitCount: auditLogsBatchSize,
145+
})
146+
if err != nil {
147+
return xerrors.Errorf("failed to delete old audit logs: %w", err)
148+
}
149+
}
150+
133151
logger.Debug(ctx, "purged old database entries",
134152
slog.F("expired_api_keys", expiredAPIKeys),
135153
slog.F("aibridge_records", purgedAIBridgeRecords),
136154
slog.F("connection_logs", purgedConnectionLogs),
155+
slog.F("audit_logs", purgedAuditLogs),
137156
slog.F("duration", clk.Since(start)),
138157
)
139158

coderd/database/dbpurge/dbpurge_test.go

Lines changed: 262 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1061,3 +1061,265 @@ func TestDeleteOldAIBridgeRecords(t *testing.T) {
10611061
require.NoError(t, err)
10621062
require.Len(t, newToolUsages, 1, "near threshold tool usages should not be deleted")
10631063
}
1064+
1065+
func TestDeleteOldAuditLogs(t *testing.T) {
1066+
t.Parallel()
1067+
1068+
t.Run("RetentionEnabled", func(t *testing.T) {
1069+
t.Parallel()
1070+
1071+
ctx := testutil.Context(t, testutil.WaitShort)
1072+
1073+
clk := quartz.NewMock(t)
1074+
now := time.Date(2025, 1, 15, 7, 30, 0, 0, time.UTC)
1075+
retentionPeriod := 30 * 24 * time.Hour // 30 days
1076+
afterThreshold := now.Add(-retentionPeriod).Add(-24 * time.Hour) // 31 days ago (older than threshold)
1077+
beforeThreshold := now.Add(-15 * 24 * time.Hour) // 15 days ago (newer than threshold)
1078+
clk.Set(now).MustWait(ctx)
1079+
1080+
db, _ := dbtestutil.NewDB(t, dbtestutil.WithDumpOnFailure())
1081+
logger := slogtest.Make(t, &slogtest.Options{IgnoreErrors: true})
1082+
user := dbgen.User(t, db, database.User{})
1083+
org := dbgen.Organization(t, db, database.Organization{})
1084+
1085+
// Create old audit log (should be deleted)
1086+
oldLog := dbgen.AuditLog(t, db, database.AuditLog{
1087+
UserID: user.ID,
1088+
OrganizationID: org.ID,
1089+
Time: afterThreshold,
1090+
Action: database.AuditActionCreate,
1091+
ResourceType: database.ResourceTypeWorkspace,
1092+
})
1093+
1094+
// Create recent audit log (should be kept)
1095+
recentLog := dbgen.AuditLog(t, db, database.AuditLog{
1096+
UserID: user.ID,
1097+
OrganizationID: org.ID,
1098+
Time: beforeThreshold,
1099+
Action: database.AuditActionCreate,
1100+
ResourceType: database.ResourceTypeWorkspace,
1101+
})
1102+
1103+
// Run the purge with configured retention period
1104+
done := awaitDoTick(ctx, t, clk)
1105+
closer := dbpurge.New(ctx, logger, db, &codersdk.DeploymentValues{
1106+
Retention: codersdk.RetentionConfig{
1107+
AuditLogs: serpent.Duration(retentionPeriod),
1108+
},
1109+
}, clk)
1110+
defer closer.Close()
1111+
testutil.TryReceive(ctx, t, done)
1112+
1113+
// Verify results by querying all audit logs
1114+
logs, err := db.GetAuditLogsOffset(ctx, database.GetAuditLogsOffsetParams{
1115+
LimitOpt: 100,
1116+
})
1117+
require.NoError(t, err)
1118+
1119+
logIDs := make([]uuid.UUID, len(logs))
1120+
for i, log := range logs {
1121+
logIDs[i] = log.AuditLog.ID
1122+
}
1123+
1124+
require.NotContains(t, logIDs, oldLog.ID, "old audit log should be deleted")
1125+
require.Contains(t, logIDs, recentLog.ID, "recent audit log should be kept")
1126+
})
1127+
1128+
t.Run("RetentionDisabled", func(t *testing.T) {
1129+
t.Parallel()
1130+
1131+
ctx := testutil.Context(t, testutil.WaitShort)
1132+
1133+
clk := quartz.NewMock(t)
1134+
now := time.Date(2025, 1, 15, 7, 30, 0, 0, time.UTC)
1135+
oldTime := now.Add(-365 * 24 * time.Hour) // 1 year ago
1136+
clk.Set(now).MustWait(ctx)
1137+
1138+
db, _ := dbtestutil.NewDB(t, dbtestutil.WithDumpOnFailure())
1139+
logger := slogtest.Make(t, &slogtest.Options{IgnoreErrors: true})
1140+
user := dbgen.User(t, db, database.User{})
1141+
org := dbgen.Organization(t, db, database.Organization{})
1142+
1143+
// Create old audit log (should NOT be deleted when retention is 0)
1144+
oldLog := dbgen.AuditLog(t, db, database.AuditLog{
1145+
UserID: user.ID,
1146+
OrganizationID: org.ID,
1147+
Time: oldTime,
1148+
Action: database.AuditActionCreate,
1149+
ResourceType: database.ResourceTypeWorkspace,
1150+
})
1151+
1152+
// Run the purge with retention disabled (0)
1153+
done := awaitDoTick(ctx, t, clk)
1154+
closer := dbpurge.New(ctx, logger, db, &codersdk.DeploymentValues{
1155+
Retention: codersdk.RetentionConfig{
1156+
AuditLogs: serpent.Duration(0), // disabled
1157+
},
1158+
}, clk)
1159+
defer closer.Close()
1160+
testutil.TryReceive(ctx, t, done)
1161+
1162+
// Verify old log is still present
1163+
logs, err := db.GetAuditLogsOffset(ctx, database.GetAuditLogsOffsetParams{
1164+
LimitOpt: 100,
1165+
})
1166+
require.NoError(t, err)
1167+
1168+
logIDs := make([]uuid.UUID, len(logs))
1169+
for i, log := range logs {
1170+
logIDs[i] = log.AuditLog.ID
1171+
}
1172+
1173+
require.Contains(t, logIDs, oldLog.ID, "old audit log should NOT be deleted when retention is disabled")
1174+
})
1175+
1176+
t.Run("GlobalRetentionFallback", func(t *testing.T) {
1177+
t.Parallel()
1178+
1179+
ctx := testutil.Context(t, testutil.WaitShort)
1180+
1181+
clk := quartz.NewMock(t)
1182+
now := time.Date(2025, 1, 15, 7, 30, 0, 0, time.UTC)
1183+
retentionPeriod := 30 * 24 * time.Hour // 30 days
1184+
afterThreshold := now.Add(-retentionPeriod).Add(-24 * time.Hour) // 31 days ago (older than threshold)
1185+
beforeThreshold := now.Add(-15 * 24 * time.Hour) // 15 days ago (newer than threshold)
1186+
clk.Set(now).MustWait(ctx)
1187+
1188+
db, _ := dbtestutil.NewDB(t, dbtestutil.WithDumpOnFailure())
1189+
logger := slogtest.Make(t, &slogtest.Options{IgnoreErrors: true})
1190+
user := dbgen.User(t, db, database.User{})
1191+
org := dbgen.Organization(t, db, database.Organization{})
1192+
1193+
// Create old audit log (should be deleted)
1194+
oldLog := dbgen.AuditLog(t, db, database.AuditLog{
1195+
UserID: user.ID,
1196+
OrganizationID: org.ID,
1197+
Time: afterThreshold,
1198+
Action: database.AuditActionCreate,
1199+
ResourceType: database.ResourceTypeWorkspace,
1200+
})
1201+
1202+
// Create recent audit log (should be kept)
1203+
recentLog := dbgen.AuditLog(t, db, database.AuditLog{
1204+
UserID: user.ID,
1205+
OrganizationID: org.ID,
1206+
Time: beforeThreshold,
1207+
Action: database.AuditActionCreate,
1208+
ResourceType: database.ResourceTypeWorkspace,
1209+
})
1210+
1211+
// Run the purge with global retention (audit logs retention is 0, so it falls back)
1212+
done := awaitDoTick(ctx, t, clk)
1213+
closer := dbpurge.New(ctx, logger, db, &codersdk.DeploymentValues{
1214+
Retention: codersdk.RetentionConfig{
1215+
Global: serpent.Duration(retentionPeriod), // Use global
1216+
AuditLogs: serpent.Duration(0), // Not set, should fall back to global
1217+
},
1218+
}, clk)
1219+
defer closer.Close()
1220+
testutil.TryReceive(ctx, t, done)
1221+
1222+
// Verify results
1223+
logs, err := db.GetAuditLogsOffset(ctx, database.GetAuditLogsOffsetParams{
1224+
LimitOpt: 100,
1225+
})
1226+
require.NoError(t, err)
1227+
1228+
logIDs := make([]uuid.UUID, len(logs))
1229+
for i, log := range logs {
1230+
logIDs[i] = log.AuditLog.ID
1231+
}
1232+
1233+
require.NotContains(t, logIDs, oldLog.ID, "old audit log should be deleted via global retention")
1234+
require.Contains(t, logIDs, recentLog.ID, "recent audit log should be kept")
1235+
})
1236+
1237+
t.Run("ConnectionEventsNotDeleted", func(t *testing.T) {
1238+
t.Parallel()
1239+
1240+
ctx := testutil.Context(t, testutil.WaitShort)
1241+
1242+
clk := quartz.NewMock(t)
1243+
now := time.Date(2025, 1, 15, 7, 30, 0, 0, time.UTC)
1244+
retentionPeriod := 30 * 24 * time.Hour // 30 days
1245+
afterThreshold := now.Add(-retentionPeriod).Add(-24 * time.Hour) // 31 days ago (older than threshold)
1246+
clk.Set(now).MustWait(ctx)
1247+
1248+
db, _ := dbtestutil.NewDB(t, dbtestutil.WithDumpOnFailure())
1249+
logger := slogtest.Make(t, &slogtest.Options{IgnoreErrors: true})
1250+
user := dbgen.User(t, db, database.User{})
1251+
org := dbgen.Organization(t, db, database.Organization{})
1252+
1253+
// Create old connection events (should NOT be deleted by audit logs retention)
1254+
oldConnectLog := dbgen.AuditLog(t, db, database.AuditLog{
1255+
UserID: user.ID,
1256+
OrganizationID: org.ID,
1257+
Time: afterThreshold,
1258+
Action: database.AuditActionConnect,
1259+
ResourceType: database.ResourceTypeWorkspace,
1260+
})
1261+
1262+
oldDisconnectLog := dbgen.AuditLog(t, db, database.AuditLog{
1263+
UserID: user.ID,
1264+
OrganizationID: org.ID,
1265+
Time: afterThreshold,
1266+
Action: database.AuditActionDisconnect,
1267+
ResourceType: database.ResourceTypeWorkspace,
1268+
})
1269+
1270+
oldOpenLog := dbgen.AuditLog(t, db, database.AuditLog{
1271+
UserID: user.ID,
1272+
OrganizationID: org.ID,
1273+
Time: afterThreshold,
1274+
Action: database.AuditActionOpen,
1275+
ResourceType: database.ResourceTypeWorkspace,
1276+
})
1277+
1278+
oldCloseLog := dbgen.AuditLog(t, db, database.AuditLog{
1279+
UserID: user.ID,
1280+
OrganizationID: org.ID,
1281+
Time: afterThreshold,
1282+
Action: database.AuditActionClose,
1283+
ResourceType: database.ResourceTypeWorkspace,
1284+
})
1285+
1286+
// Create old non-connection audit log (should be deleted)
1287+
oldCreateLog := dbgen.AuditLog(t, db, database.AuditLog{
1288+
UserID: user.ID,
1289+
OrganizationID: org.ID,
1290+
Time: afterThreshold,
1291+
Action: database.AuditActionCreate,
1292+
ResourceType: database.ResourceTypeWorkspace,
1293+
})
1294+
1295+
// Run the purge with audit logs retention enabled
1296+
done := awaitDoTick(ctx, t, clk)
1297+
closer := dbpurge.New(ctx, logger, db, &codersdk.DeploymentValues{
1298+
Retention: codersdk.RetentionConfig{
1299+
AuditLogs: serpent.Duration(retentionPeriod),
1300+
},
1301+
}, clk)
1302+
defer closer.Close()
1303+
testutil.TryReceive(ctx, t, done)
1304+
1305+
// Verify results
1306+
logs, err := db.GetAuditLogsOffset(ctx, database.GetAuditLogsOffsetParams{
1307+
LimitOpt: 100,
1308+
})
1309+
require.NoError(t, err)
1310+
1311+
logIDs := make([]uuid.UUID, len(logs))
1312+
for i, log := range logs {
1313+
logIDs[i] = log.AuditLog.ID
1314+
}
1315+
1316+
// Connection events should NOT be deleted by audit logs retention
1317+
require.Contains(t, logIDs, oldConnectLog.ID, "old connect log should NOT be deleted by audit logs retention")
1318+
require.Contains(t, logIDs, oldDisconnectLog.ID, "old disconnect log should NOT be deleted by audit logs retention")
1319+
require.Contains(t, logIDs, oldOpenLog.ID, "old open log should NOT be deleted by audit logs retention")
1320+
require.Contains(t, logIDs, oldCloseLog.ID, "old close log should NOT be deleted by audit logs retention")
1321+
1322+
// Non-connection event should be deleted
1323+
require.NotContains(t, logIDs, oldCreateLog.ID, "old create log should be deleted by audit logs retention")
1324+
})
1325+
}

coderd/database/querier.go

Lines changed: 4 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)