From 1bbe3bbb6bf7f43dc6de83525ce809bfdbd2be3d Mon Sep 17 00:00:00 2001 From: Callum Styan Date: Mon, 28 Jul 2025 22:16:02 +0000 Subject: [PATCH 1/3] add test that fails since we still trigger autobuilds even if a provisioner is not available Signed-off-by: Callum Styan --- coderd/autobuild/lifecycle_executor_test.go | 44 +++++++++++++++++++++ 1 file changed, 44 insertions(+) diff --git a/coderd/autobuild/lifecycle_executor_test.go b/coderd/autobuild/lifecycle_executor_test.go index 0229a907cbb2e..ede428614a352 100644 --- a/coderd/autobuild/lifecycle_executor_test.go +++ b/coderd/autobuild/lifecycle_executor_test.go @@ -1583,3 +1583,47 @@ func mustWorkspaceParameters(t *testing.T, client *codersdk.Client, workspaceID func TestMain(m *testing.M) { goleak.VerifyTestMain(m, testutil.GoleakOptions...) } + +func TestExecutorAutostartSkipsWhenNoProvisionersAvailable(t *testing.T) { + t.Parallel() + + var ( + sched = mustSchedule(t, "CRON_TZ=UTC 0 * * * *") + tickCh = make(chan time.Time) + statsCh = make(chan autobuild.Stats) + ) + + // Create client with provisioner daemon + client, provisionerCloser := coderdtest.NewWithProvisionerCloser(t, &coderdtest.Options{ + AutobuildTicker: tickCh, + IncludeProvisionerDaemon: true, + AutobuildStats: statsCh, + }) + + // Create workspace with autostart enabled + workspace := mustProvisionWorkspace(t, client, func(cwr *codersdk.CreateWorkspaceRequest) { + cwr.AutostartSchedule = ptr.Ref(sched.String()) + }) + + // Stop the workspace while provisioner is available + workspace = coderdtest.MustTransitionWorkspace(t, client, workspace.ID, + codersdk.WorkspaceTransitionStart, codersdk.WorkspaceTransitionStop) + + // Now shut down the provisioner daemon + err := provisionerCloser.Close() + require.NoError(t, err) + + // Trigger autobuild after scheduled time + go func() { + tickCh <- sched.Next(workspace.LatestBuild.CreatedAt) + close(tickCh) + }() + + // Wait for executor to run + stats := <-statsCh + require.Len(t, stats.Errors, 0, "should not have errors") + + // Verify workspace is still stopped + workspace = coderdtest.MustWorkspace(t, client, workspace.ID) + require.Equal(t, codersdk.WorkspaceTransitionStop, workspace.LatestBuild.Transition) +} From 858d44a15796b2fc7657963360b5dc2f9b1847a4 Mon Sep 17 00:00:00 2001 From: Callum Styan Date: Mon, 28 Jul 2025 22:18:18 +0000 Subject: [PATCH 2/3] add check for available provisioners before we try to do a workspace transition so that we don't accumulate autostart jobs that will eventually have to be cleaned up by the reaper routine Signed-off-by: Callum Styan --- coderd/autobuild/lifecycle_executor.go | 41 ++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) diff --git a/coderd/autobuild/lifecycle_executor.go b/coderd/autobuild/lifecycle_executor.go index 234a72de04c50..2fb8fc70f33bd 100644 --- a/coderd/autobuild/lifecycle_executor.go +++ b/coderd/autobuild/lifecycle_executor.go @@ -26,6 +26,7 @@ import ( "github.com/coder/coder/v2/coderd/database/dbauthz" "github.com/coder/coder/v2/coderd/database/dbtime" "github.com/coder/coder/v2/coderd/database/provisionerjobs" + "github.com/coder/coder/v2/coderd/provisionerdserver" "github.com/coder/coder/v2/coderd/database/pubsub" "github.com/coder/coder/v2/coderd/notifications" "github.com/coder/coder/v2/coderd/schedule" @@ -131,6 +132,30 @@ func (e *Executor) Run() { }() } +// Add this function to check for available provisioners +func (e *Executor) hasAvailableProvisioners(ctx context.Context, tx database.Store, ws database.Workspace, templateVersionJob database.ProvisionerJob) (bool, error) { + // Get eligible provisioner daemons for this workspace's template + provisionerDaemons, err := tx.GetProvisionerDaemonsByOrganization(ctx, database.GetProvisionerDaemonsByOrganizationParams{ + OrganizationID: ws.OrganizationID, + WantTags: templateVersionJob.Tags, + }) + if err != nil { + return false, xerrors.Errorf("get provisioner daemons: %w", err) + } + + // Check if any provisioners are active (not stale) + now := dbtime.Now() + for _, pd := range provisionerDaemons { + if pd.LastSeenAt.Valid { + age := now.Sub(pd.LastSeenAt.Time) + if age <= provisionerdserver.StaleInterval { + return true, nil + } + } + } + return false, nil +} + func (e *Executor) runOnce(t time.Time) Stats { stats := Stats{ Transitions: make(map[uuid.UUID]database.WorkspaceTransition), @@ -280,6 +305,22 @@ func (e *Executor) runOnce(t time.Time) Stats { return nil } + // Get the template version job to access tags + templateVersionJob, err := tx.GetProvisionerJobByID(e.ctx, activeTemplateVersion.JobID) + if err != nil { + return xerrors.Errorf("get template version job: %w", err) + } + + // Before creating the workspace build, check for available provisioners + hasProvisioners, err := e.hasAvailableProvisioners(e.ctx, tx, ws, templateVersionJob) + if err != nil { + return xerrors.Errorf("check provisioner availability: %w", err) + } + if !hasProvisioners { + log.Warn(e.ctx, "skipping autostart - no available provisioners") + return nil // Skip this workspace + } + if nextTransition != "" { builder := wsbuilder.New(ws, nextTransition, *e.buildUsageChecker.Load()). SetLastWorkspaceBuildInTx(&latestBuild). From 997179432abd12ad2b51fe0af10ca978c1e47a7e Mon Sep 17 00:00:00 2001 From: Callum Styan Date: Tue, 29 Jul 2025 00:58:29 +0000 Subject: [PATCH 3/3] fix provisioner check for tests which don't fully set up/wait for the DB/provisioner to be available, so add a way to skip the check Signed-off-by: Callum Styan --- coderd/autobuild/lifecycle_executor.go | 59 ++++++---- coderd/autobuild/lifecycle_executor_test.go | 115 +++++++++++++------- coderd/coderdtest/coderdtest.go | 8 ++ 3 files changed, 119 insertions(+), 63 deletions(-) diff --git a/coderd/autobuild/lifecycle_executor.go b/coderd/autobuild/lifecycle_executor.go index 2fb8fc70f33bd..04f261afaf618 100644 --- a/coderd/autobuild/lifecycle_executor.go +++ b/coderd/autobuild/lifecycle_executor.go @@ -9,6 +9,7 @@ import ( "strings" "sync" "sync/atomic" + "testing" "time" "github.com/dustin/go-humanize" @@ -26,14 +27,16 @@ import ( "github.com/coder/coder/v2/coderd/database/dbauthz" "github.com/coder/coder/v2/coderd/database/dbtime" "github.com/coder/coder/v2/coderd/database/provisionerjobs" - "github.com/coder/coder/v2/coderd/provisionerdserver" "github.com/coder/coder/v2/coderd/database/pubsub" "github.com/coder/coder/v2/coderd/notifications" + "github.com/coder/coder/v2/coderd/provisionerdserver" "github.com/coder/coder/v2/coderd/schedule" "github.com/coder/coder/v2/coderd/wsbuilder" "github.com/coder/coder/v2/codersdk" ) +const TestingStaleInterval = time.Second * 5 + // Executor automatically starts or stops workspaces. type Executor struct { ctx context.Context @@ -53,6 +56,9 @@ type Executor struct { experiments codersdk.Experiments metrics executorMetrics + + // Skip provisioner availability check (should only be true for *some* tests) + SkipProvisionerCheck bool } type executorMetrics struct { @@ -132,28 +138,37 @@ func (e *Executor) Run() { }() } -// Add this function to check for available provisioners func (e *Executor) hasAvailableProvisioners(ctx context.Context, tx database.Store, ws database.Workspace, templateVersionJob database.ProvisionerJob) (bool, error) { - // Get eligible provisioner daemons for this workspace's template - provisionerDaemons, err := tx.GetProvisionerDaemonsByOrganization(ctx, database.GetProvisionerDaemonsByOrganizationParams{ - OrganizationID: ws.OrganizationID, - WantTags: templateVersionJob.Tags, - }) - if err != nil { - return false, xerrors.Errorf("get provisioner daemons: %w", err) - } - - // Check if any provisioners are active (not stale) - now := dbtime.Now() - for _, pd := range provisionerDaemons { - if pd.LastSeenAt.Valid { - age := now.Sub(pd.LastSeenAt.Time) - if age <= provisionerdserver.StaleInterval { - return true, nil - } - } - } - return false, nil + if e.SkipProvisionerCheck { + return true, nil + } + + // Use a shorter stale interval for tests + staleInterval := provisionerdserver.StaleInterval + if testing.Testing() { + staleInterval = TestingStaleInterval + } + + // Get eligible provisioner daemons for this workspace's template + provisionerDaemons, err := tx.GetProvisionerDaemonsByOrganization(ctx, database.GetProvisionerDaemonsByOrganizationParams{ + OrganizationID: ws.OrganizationID, + WantTags: templateVersionJob.Tags, + }) + if err != nil { + return false, xerrors.Errorf("get provisioner daemons: %w", err) + } + + // Check if any provisioners are active (not stale) + now := dbtime.Now() + for _, pd := range provisionerDaemons { + if pd.LastSeenAt.Valid { + age := now.Sub(pd.LastSeenAt.Time) + if age <= staleInterval { + return true, nil + } + } + } + return false, nil } func (e *Executor) runOnce(t time.Time) Stats { diff --git a/coderd/autobuild/lifecycle_executor_test.go b/coderd/autobuild/lifecycle_executor_test.go index ede428614a352..0ce0656e58cbf 100644 --- a/coderd/autobuild/lifecycle_executor_test.go +++ b/coderd/autobuild/lifecycle_executor_test.go @@ -1585,45 +1585,78 @@ func TestMain(m *testing.M) { } func TestExecutorAutostartSkipsWhenNoProvisionersAvailable(t *testing.T) { - t.Parallel() - - var ( - sched = mustSchedule(t, "CRON_TZ=UTC 0 * * * *") - tickCh = make(chan time.Time) - statsCh = make(chan autobuild.Stats) - ) - - // Create client with provisioner daemon - client, provisionerCloser := coderdtest.NewWithProvisionerCloser(t, &coderdtest.Options{ - AutobuildTicker: tickCh, - IncludeProvisionerDaemon: true, - AutobuildStats: statsCh, - }) - - // Create workspace with autostart enabled - workspace := mustProvisionWorkspace(t, client, func(cwr *codersdk.CreateWorkspaceRequest) { - cwr.AutostartSchedule = ptr.Ref(sched.String()) - }) - - // Stop the workspace while provisioner is available - workspace = coderdtest.MustTransitionWorkspace(t, client, workspace.ID, - codersdk.WorkspaceTransitionStart, codersdk.WorkspaceTransitionStop) - - // Now shut down the provisioner daemon - err := provisionerCloser.Close() - require.NoError(t, err) - - // Trigger autobuild after scheduled time - go func() { - tickCh <- sched.Next(workspace.LatestBuild.CreatedAt) - close(tickCh) - }() - - // Wait for executor to run - stats := <-statsCh - require.Len(t, stats.Errors, 0, "should not have errors") - - // Verify workspace is still stopped - workspace = coderdtest.MustWorkspace(t, client, workspace.ID) - require.Equal(t, codersdk.WorkspaceTransitionStop, workspace.LatestBuild.Transition) + t.Parallel() + + db, ps := dbtestutil.NewDB(t) + var ( + sched = mustSchedule(t, "CRON_TZ=UTC 0 * * * *") + tickCh = make(chan time.Time) + statsCh = make(chan autobuild.Stats) + ) + + // Create client with provisioner closer + client, provisionerCloser := coderdtest.NewWithProvisionerCloser(t, &coderdtest.Options{ + AutobuildTicker: tickCh, + IncludeProvisionerDaemon: true, + AutobuildStats: statsCh, + Database: db, + Pubsub: ps, + SkipProvisionerCheck: ptr.Ref(false), + }) + + // Create workspace with autostart enabled + workspace := mustProvisionWorkspace(t, client, func(cwr *codersdk.CreateWorkspaceRequest) { + cwr.AutostartSchedule = ptr.Ref(sched.String()) + }) + + // Stop the workspace while provisioner is available + workspace = coderdtest.MustTransitionWorkspace(t, client, workspace.ID, + codersdk.WorkspaceTransitionStart, codersdk.WorkspaceTransitionStop) + + // Wait for provisioner to be registered + ctx := testutil.Context(t, testutil.WaitShort) + require.Eventually(t, func() bool { + daemons, err := db.GetProvisionerDaemons(ctx) + return err == nil && len(daemons) > 0 + }, testutil.WaitShort, testutil.IntervalFast) + + // Now shut down the provisioner daemon + err := provisionerCloser.Close() + require.NoError(t, err) + + // Debug: check what's in the database + daemons, err := db.GetProvisionerDaemons(ctx) + require.NoError(t, err) + t.Logf("After close: found %d daemons", len(daemons)) + for i, daemon := range daemons { + t.Logf("Daemon %d: ID=%s, Name=%s, LastSeen=%v", i, daemon.ID, daemon.Name, daemon.LastSeenAt) + } + + // Wait for provisioner to become stale (LastSeenAt > StaleInterval ago) + require.Eventually(t, func() bool { + daemons, err := db.GetProvisionerDaemons(ctx) + if err != nil || len(daemons) == 0 { + return false + } + + now := time.Now() + for _, daemon := range daemons { + if daemon.LastSeenAt.Valid { + age := now.Sub(daemon.LastSeenAt.Time) + if age > autobuild.TestingStaleInterval { + return true // Provisioner is now stale + } + } + } + return false + }, testutil.WaitLong, testutil.IntervalMedium) // Use longer timeout since we need to wait for staleness + + // Trigger autobuild + go func() { + tickCh <- sched.Next(workspace.LatestBuild.CreatedAt) + close(tickCh) + }() + + stats := <-statsCh + assert.Len(t, stats.Transitions, 0, "should not create builds when no provisioners available") } diff --git a/coderd/coderdtest/coderdtest.go b/coderd/coderdtest/coderdtest.go index 7085068e97ff4..b25618da5b978 100644 --- a/coderd/coderdtest/coderdtest.go +++ b/coderd/coderdtest/coderdtest.go @@ -183,6 +183,7 @@ type Options struct { OIDCConvertKeyCache cryptokeys.SigningKeycache Clock quartz.Clock TelemetryReporter telemetry.Reporter + SkipProvisionerCheck *bool // Pointer so we can detect if it's set } // New constructs a codersdk client connected to an in-memory API instance. @@ -386,6 +387,13 @@ func NewOptions(t testing.TB, options *Options) (func(http.Handler), context.Can options.NotificationsEnqueuer, experiments, ).WithStatsChannel(options.AutobuildStats) + + skipProvisionerCheck := true + if options.SkipProvisionerCheck != nil { + skipProvisionerCheck = *options.SkipProvisionerCheck + } + lifecycleExecutor.SkipProvisionerCheck = skipProvisionerCheck + lifecycleExecutor.Run() jobReaperTicker := time.NewTicker(options.DeploymentValues.JobReaperDetectorInterval.Value())