diff --git a/coderd/autobuild/lifecycle_executor.go b/coderd/autobuild/lifecycle_executor.go index 234a72de04c50..04f261afaf618 100644 --- a/coderd/autobuild/lifecycle_executor.go +++ b/coderd/autobuild/lifecycle_executor.go @@ -9,6 +9,7 @@ import ( "strings" "sync" "sync/atomic" + "testing" "time" "github.com/dustin/go-humanize" @@ -28,11 +29,14 @@ import ( "github.com/coder/coder/v2/coderd/database/provisionerjobs" "github.com/coder/coder/v2/coderd/database/pubsub" "github.com/coder/coder/v2/coderd/notifications" + "github.com/coder/coder/v2/coderd/provisionerdserver" "github.com/coder/coder/v2/coderd/schedule" "github.com/coder/coder/v2/coderd/wsbuilder" "github.com/coder/coder/v2/codersdk" ) +const TestingStaleInterval = time.Second * 5 + // Executor automatically starts or stops workspaces. type Executor struct { ctx context.Context @@ -52,6 +56,9 @@ type Executor struct { experiments codersdk.Experiments metrics executorMetrics + + // Skip provisioner availability check (should only be true for *some* tests) + SkipProvisionerCheck bool } type executorMetrics struct { @@ -131,6 +138,39 @@ func (e *Executor) Run() { }() } +func (e *Executor) hasAvailableProvisioners(ctx context.Context, tx database.Store, ws database.Workspace, templateVersionJob database.ProvisionerJob) (bool, error) { + if e.SkipProvisionerCheck { + return true, nil + } + + // Use a shorter stale interval for tests + staleInterval := provisionerdserver.StaleInterval + if testing.Testing() { + staleInterval = TestingStaleInterval + } + + // Get eligible provisioner daemons for this workspace's template + provisionerDaemons, err := tx.GetProvisionerDaemonsByOrganization(ctx, database.GetProvisionerDaemonsByOrganizationParams{ + OrganizationID: ws.OrganizationID, + WantTags: templateVersionJob.Tags, + }) + if err != nil { + return false, xerrors.Errorf("get provisioner daemons: %w", err) + } + + // Check if any provisioners are active (not stale) + now := dbtime.Now() + for _, pd := range provisionerDaemons { + if pd.LastSeenAt.Valid { + age := now.Sub(pd.LastSeenAt.Time) + if age <= staleInterval { + return true, nil + } + } + } + return false, nil +} + func (e *Executor) runOnce(t time.Time) Stats { stats := Stats{ Transitions: make(map[uuid.UUID]database.WorkspaceTransition), @@ -280,6 +320,22 @@ func (e *Executor) runOnce(t time.Time) Stats { return nil } + // Get the template version job to access tags + templateVersionJob, err := tx.GetProvisionerJobByID(e.ctx, activeTemplateVersion.JobID) + if err != nil { + return xerrors.Errorf("get template version job: %w", err) + } + + // Before creating the workspace build, check for available provisioners + hasProvisioners, err := e.hasAvailableProvisioners(e.ctx, tx, ws, templateVersionJob) + if err != nil { + return xerrors.Errorf("check provisioner availability: %w", err) + } + if !hasProvisioners { + log.Warn(e.ctx, "skipping autostart - no available provisioners") + return nil // Skip this workspace + } + if nextTransition != "" { builder := wsbuilder.New(ws, nextTransition, *e.buildUsageChecker.Load()). SetLastWorkspaceBuildInTx(&latestBuild). diff --git a/coderd/autobuild/lifecycle_executor_test.go b/coderd/autobuild/lifecycle_executor_test.go index 0229a907cbb2e..0ce0656e58cbf 100644 --- a/coderd/autobuild/lifecycle_executor_test.go +++ b/coderd/autobuild/lifecycle_executor_test.go @@ -1583,3 +1583,80 @@ func mustWorkspaceParameters(t *testing.T, client *codersdk.Client, workspaceID func TestMain(m *testing.M) { goleak.VerifyTestMain(m, testutil.GoleakOptions...) } + +func TestExecutorAutostartSkipsWhenNoProvisionersAvailable(t *testing.T) { + t.Parallel() + + db, ps := dbtestutil.NewDB(t) + var ( + sched = mustSchedule(t, "CRON_TZ=UTC 0 * * * *") + tickCh = make(chan time.Time) + statsCh = make(chan autobuild.Stats) + ) + + // Create client with provisioner closer + client, provisionerCloser := coderdtest.NewWithProvisionerCloser(t, &coderdtest.Options{ + AutobuildTicker: tickCh, + IncludeProvisionerDaemon: true, + AutobuildStats: statsCh, + Database: db, + Pubsub: ps, + SkipProvisionerCheck: ptr.Ref(false), + }) + + // Create workspace with autostart enabled + workspace := mustProvisionWorkspace(t, client, func(cwr *codersdk.CreateWorkspaceRequest) { + cwr.AutostartSchedule = ptr.Ref(sched.String()) + }) + + // Stop the workspace while provisioner is available + workspace = coderdtest.MustTransitionWorkspace(t, client, workspace.ID, + codersdk.WorkspaceTransitionStart, codersdk.WorkspaceTransitionStop) + + // Wait for provisioner to be registered + ctx := testutil.Context(t, testutil.WaitShort) + require.Eventually(t, func() bool { + daemons, err := db.GetProvisionerDaemons(ctx) + return err == nil && len(daemons) > 0 + }, testutil.WaitShort, testutil.IntervalFast) + + // Now shut down the provisioner daemon + err := provisionerCloser.Close() + require.NoError(t, err) + + // Debug: check what's in the database + daemons, err := db.GetProvisionerDaemons(ctx) + require.NoError(t, err) + t.Logf("After close: found %d daemons", len(daemons)) + for i, daemon := range daemons { + t.Logf("Daemon %d: ID=%s, Name=%s, LastSeen=%v", i, daemon.ID, daemon.Name, daemon.LastSeenAt) + } + + // Wait for provisioner to become stale (LastSeenAt > StaleInterval ago) + require.Eventually(t, func() bool { + daemons, err := db.GetProvisionerDaemons(ctx) + if err != nil || len(daemons) == 0 { + return false + } + + now := time.Now() + for _, daemon := range daemons { + if daemon.LastSeenAt.Valid { + age := now.Sub(daemon.LastSeenAt.Time) + if age > autobuild.TestingStaleInterval { + return true // Provisioner is now stale + } + } + } + return false + }, testutil.WaitLong, testutil.IntervalMedium) // Use longer timeout since we need to wait for staleness + + // Trigger autobuild + go func() { + tickCh <- sched.Next(workspace.LatestBuild.CreatedAt) + close(tickCh) + }() + + stats := <-statsCh + assert.Len(t, stats.Transitions, 0, "should not create builds when no provisioners available") +} diff --git a/coderd/coderdtest/coderdtest.go b/coderd/coderdtest/coderdtest.go index 7085068e97ff4..b25618da5b978 100644 --- a/coderd/coderdtest/coderdtest.go +++ b/coderd/coderdtest/coderdtest.go @@ -183,6 +183,7 @@ type Options struct { OIDCConvertKeyCache cryptokeys.SigningKeycache Clock quartz.Clock TelemetryReporter telemetry.Reporter + SkipProvisionerCheck *bool // Pointer so we can detect if it's set } // New constructs a codersdk client connected to an in-memory API instance. @@ -386,6 +387,13 @@ func NewOptions(t testing.TB, options *Options) (func(http.Handler), context.Can options.NotificationsEnqueuer, experiments, ).WithStatsChannel(options.AutobuildStats) + + skipProvisionerCheck := true + if options.SkipProvisionerCheck != nil { + skipProvisionerCheck = *options.SkipProvisionerCheck + } + lifecycleExecutor.SkipProvisionerCheck = skipProvisionerCheck + lifecycleExecutor.Run() jobReaperTicker := time.NewTicker(options.DeploymentValues.JobReaperDetectorInterval.Value())