Skip to content

Commit aaa1d80

Browse files
committed
feat(coderd/database/dbpurge): add retention for audit logs
Add configurable retention policy for audit logs. The DeleteOldAuditLogs query excludes deprecated connection events (connect, disconnect, open, close) which are handled separately by DeleteOldAuditLogConnectionEvents. Falls back to global retention if audit logs retention is unset. Disabled (0) by default. Depends on #21021 Updates #20743
1 parent 53f613b commit aaa1d80

File tree

8 files changed

+369
-0
lines changed

8 files changed

+369
-0
lines changed

coderd/database/dbauthz/dbauthz.go

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1749,6 +1749,13 @@ func (q *querier) DeleteOldAuditLogConnectionEvents(ctx context.Context, thresho
17491749
return q.db.DeleteOldAuditLogConnectionEvents(ctx, threshold)
17501750
}
17511751

1752+
func (q *querier) DeleteOldAuditLogs(ctx context.Context, arg database.DeleteOldAuditLogsParams) (int64, error) {
1753+
if err := q.authorizeContext(ctx, policy.ActionDelete, rbac.ResourceSystem); err != nil {
1754+
return 0, err
1755+
}
1756+
return q.db.DeleteOldAuditLogs(ctx, arg)
1757+
}
1758+
17521759
func (q *querier) DeleteOldConnectionLogs(ctx context.Context, arg database.DeleteOldConnectionLogsParams) (int64, error) {
17531760
if err := q.authorizeContext(ctx, policy.ActionDelete, rbac.ResourceSystem); err != nil {
17541761
return 0, err

coderd/database/dbmetrics/querymetrics.go

Lines changed: 7 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

coderd/database/dbmock/dbmock.go

Lines changed: 15 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

coderd/database/dbpurge/dbpurge.go

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,8 @@ const (
2828
// Batch size for connection log deletion. Smaller batches prevent long-held
2929
// locks that could impact concurrent database operations.
3030
connectionLogsBatchSize = 1000
31+
// Batch size for audit log deletion.
32+
auditLogsBatchSize = 1000
3133
// Telemetry heartbeats are used to deduplicate events across replicas. We
3234
// don't need to persist heartbeat rows for longer than 24 hours, as they
3335
// are only used for deduplication across replicas. The time needs to be
@@ -127,10 +129,27 @@ func New(ctx context.Context, logger slog.Logger, db database.Store, vals *coder
127129
}
128130
}
129131

132+
var purgedAuditLogs int64
133+
auditLogsRetention := vals.Retention.AuditLogs.Value()
134+
if auditLogsRetention == 0 {
135+
auditLogsRetention = vals.Retention.Global.Value()
136+
}
137+
if auditLogsRetention > 0 {
138+
deleteAuditLogsBefore := start.Add(-auditLogsRetention)
139+
purgedAuditLogs, err = tx.DeleteOldAuditLogs(ctx, database.DeleteOldAuditLogsParams{
140+
BeforeTime: deleteAuditLogsBefore,
141+
LimitCount: auditLogsBatchSize,
142+
})
143+
if err != nil {
144+
return xerrors.Errorf("failed to delete old audit logs: %w", err)
145+
}
146+
}
147+
130148
logger.Debug(ctx, "purged old database entries",
131149
slog.F("expired_api_keys", expiredAPIKeys),
132150
slog.F("aibridge_records", purgedAIBridgeRecords),
133151
slog.F("connection_logs", purgedConnectionLogs),
152+
slog.F("audit_logs", purgedAuditLogs),
134153
slog.F("duration", clk.Since(start)),
135154
)
136155

coderd/database/dbpurge/dbpurge_test.go

Lines changed: 262 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1050,3 +1050,265 @@ func TestDeleteOldAIBridgeRecords(t *testing.T) {
10501050
require.NoError(t, err)
10511051
require.Len(t, newToolUsages, 1, "near threshold tool usages should not be deleted")
10521052
}
1053+
1054+
func TestDeleteOldAuditLogs(t *testing.T) {
1055+
t.Parallel()
1056+
1057+
t.Run("RetentionEnabled", func(t *testing.T) {
1058+
t.Parallel()
1059+
1060+
ctx := testutil.Context(t, testutil.WaitShort)
1061+
1062+
clk := quartz.NewMock(t)
1063+
now := time.Date(2025, 1, 15, 7, 30, 0, 0, time.UTC)
1064+
retentionPeriod := 30 * 24 * time.Hour // 30 days
1065+
afterThreshold := now.Add(-retentionPeriod).Add(-24 * time.Hour) // 31 days ago (older than threshold)
1066+
beforeThreshold := now.Add(-15 * 24 * time.Hour) // 15 days ago (newer than threshold)
1067+
clk.Set(now).MustWait(ctx)
1068+
1069+
db, _ := dbtestutil.NewDB(t, dbtestutil.WithDumpOnFailure())
1070+
logger := slogtest.Make(t, &slogtest.Options{IgnoreErrors: true})
1071+
user := dbgen.User(t, db, database.User{})
1072+
org := dbgen.Organization(t, db, database.Organization{})
1073+
1074+
// Create old audit log (should be deleted)
1075+
oldLog := dbgen.AuditLog(t, db, database.AuditLog{
1076+
UserID: user.ID,
1077+
OrganizationID: org.ID,
1078+
Time: afterThreshold,
1079+
Action: database.AuditActionCreate,
1080+
ResourceType: database.ResourceTypeWorkspace,
1081+
})
1082+
1083+
// Create recent audit log (should be kept)
1084+
recentLog := dbgen.AuditLog(t, db, database.AuditLog{
1085+
UserID: user.ID,
1086+
OrganizationID: org.ID,
1087+
Time: beforeThreshold,
1088+
Action: database.AuditActionCreate,
1089+
ResourceType: database.ResourceTypeWorkspace,
1090+
})
1091+
1092+
// Run the purge with configured retention period
1093+
done := awaitDoTick(ctx, t, clk)
1094+
closer := dbpurge.New(ctx, logger, db, &codersdk.DeploymentValues{
1095+
Retention: codersdk.RetentionConfig{
1096+
AuditLogs: serpent.Duration(retentionPeriod),
1097+
},
1098+
}, clk)
1099+
defer closer.Close()
1100+
testutil.TryReceive(ctx, t, done)
1101+
1102+
// Verify results by querying all audit logs
1103+
logs, err := db.GetAuditLogsOffset(ctx, database.GetAuditLogsOffsetParams{
1104+
LimitOpt: 100,
1105+
})
1106+
require.NoError(t, err)
1107+
1108+
logIDs := make([]uuid.UUID, len(logs))
1109+
for i, log := range logs {
1110+
logIDs[i] = log.AuditLog.ID
1111+
}
1112+
1113+
require.NotContains(t, logIDs, oldLog.ID, "old audit log should be deleted")
1114+
require.Contains(t, logIDs, recentLog.ID, "recent audit log should be kept")
1115+
})
1116+
1117+
t.Run("RetentionDisabled", func(t *testing.T) {
1118+
t.Parallel()
1119+
1120+
ctx := testutil.Context(t, testutil.WaitShort)
1121+
1122+
clk := quartz.NewMock(t)
1123+
now := time.Date(2025, 1, 15, 7, 30, 0, 0, time.UTC)
1124+
oldTime := now.Add(-365 * 24 * time.Hour) // 1 year ago
1125+
clk.Set(now).MustWait(ctx)
1126+
1127+
db, _ := dbtestutil.NewDB(t, dbtestutil.WithDumpOnFailure())
1128+
logger := slogtest.Make(t, &slogtest.Options{IgnoreErrors: true})
1129+
user := dbgen.User(t, db, database.User{})
1130+
org := dbgen.Organization(t, db, database.Organization{})
1131+
1132+
// Create old audit log (should NOT be deleted when retention is 0)
1133+
oldLog := dbgen.AuditLog(t, db, database.AuditLog{
1134+
UserID: user.ID,
1135+
OrganizationID: org.ID,
1136+
Time: oldTime,
1137+
Action: database.AuditActionCreate,
1138+
ResourceType: database.ResourceTypeWorkspace,
1139+
})
1140+
1141+
// Run the purge with retention disabled (0)
1142+
done := awaitDoTick(ctx, t, clk)
1143+
closer := dbpurge.New(ctx, logger, db, &codersdk.DeploymentValues{
1144+
Retention: codersdk.RetentionConfig{
1145+
AuditLogs: serpent.Duration(0), // disabled
1146+
},
1147+
}, clk)
1148+
defer closer.Close()
1149+
testutil.TryReceive(ctx, t, done)
1150+
1151+
// Verify old log is still present
1152+
logs, err := db.GetAuditLogsOffset(ctx, database.GetAuditLogsOffsetParams{
1153+
LimitOpt: 100,
1154+
})
1155+
require.NoError(t, err)
1156+
1157+
logIDs := make([]uuid.UUID, len(logs))
1158+
for i, log := range logs {
1159+
logIDs[i] = log.AuditLog.ID
1160+
}
1161+
1162+
require.Contains(t, logIDs, oldLog.ID, "old audit log should NOT be deleted when retention is disabled")
1163+
})
1164+
1165+
t.Run("GlobalRetentionFallback", func(t *testing.T) {
1166+
t.Parallel()
1167+
1168+
ctx := testutil.Context(t, testutil.WaitShort)
1169+
1170+
clk := quartz.NewMock(t)
1171+
now := time.Date(2025, 1, 15, 7, 30, 0, 0, time.UTC)
1172+
retentionPeriod := 30 * 24 * time.Hour // 30 days
1173+
afterThreshold := now.Add(-retentionPeriod).Add(-24 * time.Hour) // 31 days ago (older than threshold)
1174+
beforeThreshold := now.Add(-15 * 24 * time.Hour) // 15 days ago (newer than threshold)
1175+
clk.Set(now).MustWait(ctx)
1176+
1177+
db, _ := dbtestutil.NewDB(t, dbtestutil.WithDumpOnFailure())
1178+
logger := slogtest.Make(t, &slogtest.Options{IgnoreErrors: true})
1179+
user := dbgen.User(t, db, database.User{})
1180+
org := dbgen.Organization(t, db, database.Organization{})
1181+
1182+
// Create old audit log (should be deleted)
1183+
oldLog := dbgen.AuditLog(t, db, database.AuditLog{
1184+
UserID: user.ID,
1185+
OrganizationID: org.ID,
1186+
Time: afterThreshold,
1187+
Action: database.AuditActionCreate,
1188+
ResourceType: database.ResourceTypeWorkspace,
1189+
})
1190+
1191+
// Create recent audit log (should be kept)
1192+
recentLog := dbgen.AuditLog(t, db, database.AuditLog{
1193+
UserID: user.ID,
1194+
OrganizationID: org.ID,
1195+
Time: beforeThreshold,
1196+
Action: database.AuditActionCreate,
1197+
ResourceType: database.ResourceTypeWorkspace,
1198+
})
1199+
1200+
// Run the purge with global retention (audit logs retention is 0, so it falls back)
1201+
done := awaitDoTick(ctx, t, clk)
1202+
closer := dbpurge.New(ctx, logger, db, &codersdk.DeploymentValues{
1203+
Retention: codersdk.RetentionConfig{
1204+
Global: serpent.Duration(retentionPeriod), // Use global
1205+
AuditLogs: serpent.Duration(0), // Not set, should fall back to global
1206+
},
1207+
}, clk)
1208+
defer closer.Close()
1209+
testutil.TryReceive(ctx, t, done)
1210+
1211+
// Verify results
1212+
logs, err := db.GetAuditLogsOffset(ctx, database.GetAuditLogsOffsetParams{
1213+
LimitOpt: 100,
1214+
})
1215+
require.NoError(t, err)
1216+
1217+
logIDs := make([]uuid.UUID, len(logs))
1218+
for i, log := range logs {
1219+
logIDs[i] = log.AuditLog.ID
1220+
}
1221+
1222+
require.NotContains(t, logIDs, oldLog.ID, "old audit log should be deleted via global retention")
1223+
require.Contains(t, logIDs, recentLog.ID, "recent audit log should be kept")
1224+
})
1225+
1226+
t.Run("ConnectionEventsNotDeleted", func(t *testing.T) {
1227+
t.Parallel()
1228+
1229+
ctx := testutil.Context(t, testutil.WaitShort)
1230+
1231+
clk := quartz.NewMock(t)
1232+
now := time.Date(2025, 1, 15, 7, 30, 0, 0, time.UTC)
1233+
retentionPeriod := 30 * 24 * time.Hour // 30 days
1234+
afterThreshold := now.Add(-retentionPeriod).Add(-24 * time.Hour) // 31 days ago (older than threshold)
1235+
clk.Set(now).MustWait(ctx)
1236+
1237+
db, _ := dbtestutil.NewDB(t, dbtestutil.WithDumpOnFailure())
1238+
logger := slogtest.Make(t, &slogtest.Options{IgnoreErrors: true})
1239+
user := dbgen.User(t, db, database.User{})
1240+
org := dbgen.Organization(t, db, database.Organization{})
1241+
1242+
// Create old connection events (should NOT be deleted by audit logs retention)
1243+
oldConnectLog := dbgen.AuditLog(t, db, database.AuditLog{
1244+
UserID: user.ID,
1245+
OrganizationID: org.ID,
1246+
Time: afterThreshold,
1247+
Action: database.AuditActionConnect,
1248+
ResourceType: database.ResourceTypeWorkspace,
1249+
})
1250+
1251+
oldDisconnectLog := dbgen.AuditLog(t, db, database.AuditLog{
1252+
UserID: user.ID,
1253+
OrganizationID: org.ID,
1254+
Time: afterThreshold,
1255+
Action: database.AuditActionDisconnect,
1256+
ResourceType: database.ResourceTypeWorkspace,
1257+
})
1258+
1259+
oldOpenLog := dbgen.AuditLog(t, db, database.AuditLog{
1260+
UserID: user.ID,
1261+
OrganizationID: org.ID,
1262+
Time: afterThreshold,
1263+
Action: database.AuditActionOpen,
1264+
ResourceType: database.ResourceTypeWorkspace,
1265+
})
1266+
1267+
oldCloseLog := dbgen.AuditLog(t, db, database.AuditLog{
1268+
UserID: user.ID,
1269+
OrganizationID: org.ID,
1270+
Time: afterThreshold,
1271+
Action: database.AuditActionClose,
1272+
ResourceType: database.ResourceTypeWorkspace,
1273+
})
1274+
1275+
// Create old non-connection audit log (should be deleted)
1276+
oldCreateLog := dbgen.AuditLog(t, db, database.AuditLog{
1277+
UserID: user.ID,
1278+
OrganizationID: org.ID,
1279+
Time: afterThreshold,
1280+
Action: database.AuditActionCreate,
1281+
ResourceType: database.ResourceTypeWorkspace,
1282+
})
1283+
1284+
// Run the purge with audit logs retention enabled
1285+
done := awaitDoTick(ctx, t, clk)
1286+
closer := dbpurge.New(ctx, logger, db, &codersdk.DeploymentValues{
1287+
Retention: codersdk.RetentionConfig{
1288+
AuditLogs: serpent.Duration(retentionPeriod),
1289+
},
1290+
}, clk)
1291+
defer closer.Close()
1292+
testutil.TryReceive(ctx, t, done)
1293+
1294+
// Verify results
1295+
logs, err := db.GetAuditLogsOffset(ctx, database.GetAuditLogsOffsetParams{
1296+
LimitOpt: 100,
1297+
})
1298+
require.NoError(t, err)
1299+
1300+
logIDs := make([]uuid.UUID, len(logs))
1301+
for i, log := range logs {
1302+
logIDs[i] = log.AuditLog.ID
1303+
}
1304+
1305+
// Connection events should NOT be deleted by audit logs retention
1306+
require.Contains(t, logIDs, oldConnectLog.ID, "old connect log should NOT be deleted by audit logs retention")
1307+
require.Contains(t, logIDs, oldDisconnectLog.ID, "old disconnect log should NOT be deleted by audit logs retention")
1308+
require.Contains(t, logIDs, oldOpenLog.ID, "old open log should NOT be deleted by audit logs retention")
1309+
require.Contains(t, logIDs, oldCloseLog.ID, "old close log should NOT be deleted by audit logs retention")
1310+
1311+
// Non-connection event should be deleted
1312+
require.NotContains(t, logIDs, oldCreateLog.ID, "old create log should be deleted by audit logs retention")
1313+
})
1314+
}

coderd/database/querier.go

Lines changed: 4 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)