Skip to content
Merged
Show file tree
Hide file tree
Changes from 17 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 9 additions & 1 deletion provisioner/terraform/executor.go
Original file line number Diff line number Diff line change
Expand Up @@ -325,7 +325,9 @@ func (e *executor) plan(ctx, killCtx context.Context, env, vars []string, logr l
<-doneErr
}()

endStage := e.timings.startStage(database.ProvisionerJobTimingStagePlan)
err := e.execWriteOutput(ctx, killCtx, args, env, outWriter, errWriter)
endStage(err)
if err != nil {
return nil, xerrors.Errorf("terraform plan: %w", err)
}
Expand Down Expand Up @@ -596,10 +598,15 @@ func (e *executor) apply(
<-doneErr
}()

// `terraform apply`
endStage := e.timings.startStage(database.ProvisionerJobTimingStageApply)
err := e.execWriteOutput(ctx, killCtx, args, env, outWriter, errWriter)
endStage(err)
if err != nil {
return nil, xerrors.Errorf("terraform apply: %w", err)
}

// `terraform show` & `terraform graph`
state, err := e.stateResources(ctx, killCtx)
if err != nil {
return nil, err
Expand All @@ -610,12 +617,13 @@ func (e *executor) apply(
return nil, xerrors.Errorf("read statefile %q: %w", statefilePath, err)
}

agg := e.timings.aggregate()
return &proto.ApplyComplete{
Parameters: state.Parameters,
Resources: state.Resources,
ExternalAuthProviders: state.ExternalAuthProviders,
State: stateContent,
Timings: e.timings.aggregate(),
Timings: agg,
AiTasks: state.AITasks,
}, nil
}
Expand Down
27 changes: 0 additions & 27 deletions provisioner/terraform/inittimings.go
Original file line number Diff line number Diff line change
Expand Up @@ -137,30 +137,3 @@ func (t *timingAggregator) finishPrevious(ts time.Time, s *timingSpan) {

t.lookupMu.Unlock()
}

// mergeInitTimings merges manual init timings with existing timings that are
// sourced by the logs. This is done because prior to Terraform v1.9, init logs
// did not have a `-json` formatting option.
// So before v1.9, the init stage is manually timed outside the `terraform init`.
// After v1.9, the init stage is timed via logs.
func mergeInitTimings(manualInit []*proto.Timing, existing []*proto.Timing) []*proto.Timing {
initFailed := slices.ContainsFunc(existing, func(timing *proto.Timing) bool {
return timing.State == proto.TimingState_FAILED
})

if initFailed {
// The init logs do not provide enough information for failed init timings.
// So use the manual timings in this case.
return append(manualInit, existing...)
}

hasInitStage := slices.ContainsFunc(existing, func(timing *proto.Timing) bool {
return timing.Stage == string(database.ProvisionerJobTimingStageInit)
})

if hasInitStage {
return existing
}

return append(manualInit, existing...)
}
9 changes: 3 additions & 6 deletions provisioner/terraform/provision.go
Original file line number Diff line number Diff line change
Expand Up @@ -110,12 +110,11 @@ func (s *server) Plan(
// The JSON output of `terraform init` doesn't include discrete fields for capturing timings of each plugin,
// so we capture the whole init process.
initTimings := newTimingAggregator(database.ProvisionerJobTimingStageInit)
initTimings.ingest(createInitTimingsEvent(timingInitStart))
endStage := initTimings.startStage(database.ProvisionerJobTimingStageInit)

err = e.init(ctx, killCtx, sess)
endStage(err)
if err != nil {
initTimings.ingest(createInitTimingsEvent(timingInitErrored))

s.logger.Debug(ctx, "init failed", slog.Error(err))

// Special handling for "text file busy" c.f. https://github.com/coder/coder/issues/14726
Expand Down Expand Up @@ -148,8 +147,6 @@ func (s *server) Plan(
s.logger.Error(ctx, "failed to get modules from disk", slog.Error(err))
}

initTimings.ingest(createInitTimingsEvent(timingInitComplete))

s.logger.Debug(ctx, "ran initialization")

env, err := provisionEnv(sess.Config, request.Metadata, request.PreviousParameterValues, request.RichParameterValues, request.ExternalAuthProviders)
Expand All @@ -170,7 +167,7 @@ func (s *server) Plan(

// Prepend init timings since they occur prior to plan timings.
// Order is irrelevant; this is merely indicative.
resp.Timings = mergeInitTimings(initTimings.aggregate(), resp.Timings)
resp.Timings = append(initTimings.aggregate(), resp.Timings...) // mergeInitTimings(initTimings.aggregate(), resp.Timings)
resp.Modules = modules
return resp
}
Expand Down
59 changes: 40 additions & 19 deletions provisioner/terraform/timings.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,13 @@ type timingKind string
// Copied from https://github.com/hashicorp/terraform/blob/01c0480e77263933b2b086dc8d600a69f80fad2d/internal/command/jsonformat/renderer.go
// We cannot reference these because they're in an internal package.
const (
// Stage markers are used to denote the beginning and end of stages. Without
// these, only discrete events within stages can be measured, which may omit
// setup/teardown time or other unmeasured overhead.
timingStageStart timingKind = "stage_start"
timingStageEnd timingKind = "stage_end"
timingStageError timingKind = "stage_error"

timingApplyStart timingKind = "apply_start"
timingApplyProgress timingKind = "apply_progress"
timingApplyComplete timingKind = "apply_complete"
Expand All @@ -37,9 +44,6 @@ const (
timingResourceDrift timingKind = "resource_drift"
timingVersion timingKind = "version"
// These are not part of message_types, but we want to track init/graph timings as well.
timingInitStart timingKind = "init_start"
timingInitComplete timingKind = "init_complete"
timingInitErrored timingKind = "init_errored"
timingGraphStart timingKind = "graph_start"
timingGraphComplete timingKind = "graph_complete"
timingGraphErrored timingKind = "graph_errored"
Expand Down Expand Up @@ -109,13 +113,13 @@ func (t *timingAggregator) ingest(ts time.Time, s *timingSpan) {
ts = dbtime.Time(ts.UTC())

switch s.kind {
case timingApplyStart, timingProvisionStart, timingRefreshStart, timingInitStart, timingGraphStart:
case timingApplyStart, timingProvisionStart, timingRefreshStart, timingGraphStart, timingStageStart:
s.start = ts
s.state = proto.TimingState_STARTED
case timingApplyComplete, timingProvisionComplete, timingRefreshComplete, timingInitComplete, timingGraphComplete:
case timingApplyComplete, timingProvisionComplete, timingRefreshComplete, timingGraphComplete, timingStageEnd:
s.end = ts
s.state = proto.TimingState_COMPLETED
case timingApplyErrored, timingProvisionErrored, timingInitErrored, timingGraphErrored:
case timingApplyErrored, timingProvisionErrored, timingGraphErrored, timingStageError:
s.end = ts
s.state = proto.TimingState_FAILED
case timingInitOutput:
Expand Down Expand Up @@ -176,8 +180,35 @@ func (t *timingAggregator) aggregate() []*proto.Timing {
return out
}

// startStage denotes the beginning of a stage and returns a function which
// should be called to mark the end of the stage. This is used to measure a
// stage's total duration across all it's discrete events and unmeasured
// overhead/events.
func (t *timingAggregator) startStage(stage database.ProvisionerJobTimingStage) (end func(err error)) {
ts := timingSpan{
kind: timingStageStart,
stage: stage,
resource: "coder_stage_" + string(stage),
action: "terraform",
provider: "coder",
}
endTs := ts
t.ingest(dbtime.Now(), &ts)

return func(err error) {
endTs.kind = timingStageEnd
if err != nil {
endTs.kind = timingStageError
}
t.ingest(dbtime.Now(), &endTs)
}
}

func (l timingKind) Valid() bool {
return slices.Contains([]timingKind{
timingStageStart,
timingStageEnd,
timingStageError,
timingApplyStart,
timingApplyProgress,
timingApplyComplete,
Expand All @@ -194,9 +225,6 @@ func (l timingKind) Valid() bool {
timingOutputs,
timingResourceDrift,
timingVersion,
timingInitStart,
timingInitComplete,
timingInitErrored,
timingGraphStart,
timingGraphComplete,
timingGraphErrored,
Expand All @@ -210,7 +238,9 @@ func (l timingKind) Valid() bool {
// if all other attributes are identical.
func (l timingKind) Category() string {
switch l {
case timingInitStart, timingInitComplete, timingInitErrored, timingInitOutput:
case timingStageStart, timingStageEnd, timingStageError:
return "stage"
case timingInitOutput:
return "init"
case timingGraphStart, timingGraphComplete, timingGraphErrored:
return "graph"
Expand Down Expand Up @@ -252,15 +282,6 @@ func (e *timingSpan) toProto() *proto.Timing {
}
}

func createInitTimingsEvent(event timingKind) (time.Time, *timingSpan) {
return dbtime.Now(), &timingSpan{
kind: event,
action: "initializing terraform",
provider: "terraform",
resource: "init",
}
}

func createGraphTimingsEvent(event timingKind) (time.Time, *timingSpan) {
return dbtime.Now(), &timingSpan{
kind: event,
Expand Down
15 changes: 15 additions & 0 deletions provisioner/terraform/timings_internal_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -155,3 +155,18 @@ func printTimings(t *testing.T, timings []*proto.Timing) {
terraform_internal.PrintTiming(t, a)
}
}

func TestTimingStages(t *testing.T) {
t.Parallel()

agg := &timingAggregator{
stage: database.ProvisionerJobTimingStageApply,
stateLookup: make(map[uint64]*timingSpan),
}

end := agg.startStage(database.ProvisionerJobTimingStageApply)
end(nil)

evts := agg.aggregate()
require.Len(t, evts, 1)
}
8 changes: 8 additions & 0 deletions provisioner/terraform/timings_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,13 @@ import (
"context"
"os"
"path/filepath"
"strings"
"testing"

"github.com/stretchr/testify/require"

"github.com/coder/coder/v2/coderd/database"
"github.com/coder/coder/v2/coderd/util/slice"
terraform_internal "github.com/coder/coder/v2/provisioner/terraform/internal"
"github.com/coder/coder/v2/provisionersdk/proto"
"github.com/coder/coder/v2/testutil"
Expand Down Expand Up @@ -95,6 +97,12 @@ func TestTimingsFromProvision(t *testing.T) {

// Sort the timings stably to keep reduce flakiness.
terraform_internal.StableSortTimings(t, timings)
// `coder_stage_` timings use `dbtime.Now()`, which makes them hard to compare to
// a static set of expected timings. Filter them out. This test is good for
// testing timings sourced from terraform logs, not internal coder timings.
timings = slice.Filter(timings, func(tim *proto.Timing) bool {
return !strings.HasPrefix(tim.Resource, "coder_stage_")
})

// Then: the received timings should match the expected values below.
// NOTE: These timings have been encoded to JSON format to make the tests more readable.
Expand Down
33 changes: 25 additions & 8 deletions site/src/modules/workspaces/WorkspaceTiming/ResourcesChart.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,10 @@ export const ResourcesChart: FC<ResourcesChartProps> = ({
const [ticks, scale] = makeTicks(totalTime);
const [filter, setFilter] = useState("");
const visibleTimings = timings.filter(
(t) => !isCoderResource(t.name) && t.name.includes(filter),
// Stage boundaries are also included
(t) =>
(!isCoderResource(t.name) || isStageBoundary(t.name)) &&
t.name.includes(filter),
);
const theme = useTheme();
const legendsByAction = getLegendsByAction(theme);
Expand Down Expand Up @@ -86,20 +89,27 @@ export const ResourcesChart: FC<ResourcesChartProps> = ({
<YAxisSection>
<YAxisHeader>{stage.name} stage</YAxisHeader>
<YAxisLabels>
{visibleTimings.map((t) => (
<YAxisLabel key={t.name} id={encodeURIComponent(t.name)}>
{t.name}
</YAxisLabel>
))}
{visibleTimings.map((t) => {
const label = isStageBoundary(t.name)
? "total stage duration"
: t.name;
return (
<YAxisLabel key={label} id={encodeURIComponent(t.name)}>
{label}
</YAxisLabel>
);
})}
</YAxisLabels>
</YAxisSection>
</YAxis>

<XAxis ticks={ticks} scale={scale}>
<XAxisSection>
{visibleTimings.map((t) => {
const stageBoundary = isStageBoundary(t.name);
const duration = calcDuration(t.range);
const legend = legendsByAction[t.action] ?? { label: t.action };
const label = stageBoundary ? "total stage duration" : t.name;

return (
<XAxisRow
Expand All @@ -109,8 +119,11 @@ export const ResourcesChart: FC<ResourcesChartProps> = ({
<Tooltip
title={
<>
<TooltipTitle>{t.name}</TooltipTitle>
<TooltipLink to="">view template</TooltipLink>
<TooltipTitle>{label}</TooltipTitle>
{/* Stage boundaries should not have these links */}
{!stageBoundary && (
<TooltipLink to="">view template</TooltipLink>
)}
</>
}
>
Expand All @@ -132,6 +145,10 @@ export const ResourcesChart: FC<ResourcesChartProps> = ({
);
};

export const isStageBoundary = (resource: string) => {
return resource.startsWith("coder_stage_");
};

export const isCoderResource = (resource: string) => {
return (
resource.startsWith("data.coder") ||
Expand Down
13 changes: 12 additions & 1 deletion site/src/modules/workspaces/WorkspaceTiming/WorkspaceTimings.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,11 @@ import {
mergeTimeRanges,
type TimeRange,
} from "./Chart/utils";
import { isCoderResource, ResourcesChart } from "./ResourcesChart";
import {
isCoderResource,
isStageBoundary,
ResourcesChart,
} from "./ResourcesChart";
import { ScriptsChart } from "./ScriptsChart";
import {
agentStages,
Expand Down Expand Up @@ -140,6 +144,13 @@ export const WorkspaceTimings: FC<WorkspaceTimingsProps> = ({
// user and would add noise.
const visibleResources = stageTimings.filter((t) => {
const isProvisionerTiming = "resource" in t;

// StageBoundaries are being drawn on the total timeline.
// Do not show them as discrete resources inside the stage view.
if (isProvisionerTiming && isStageBoundary(t.resource)) {
return false;
}

return isProvisionerTiming
? !isCoderResource(t.resource)
: true;
Expand Down
24 changes: 21 additions & 3 deletions site/src/modules/workspaces/WorkspaceTiming/storybookData.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,18 @@ export const WorkspaceTimingsResponse: WorkspaceBuildTimings = {
started_at: "2024-10-14T11:30:38.582305Z",
ended_at: "2024-10-14T11:30:47.707708Z",
stage: "init",
source: "terraform",
action: "initializing terraform",
resource: "state file",
source: "coder",
action: "terraform",
resource: "coder_stage_init",
},
{
job_id: "86fd4143-d95f-4602-b464-1149ede62269",
started_at: "2024-10-14T11:30:48.105148Z",
ended_at: "2024-10-14T11:30:49.911366Z",
stage: "plan",
source: "coder",
action: "terraform",
resource: "coder_stage_plan",
},
{
job_id: "86fd4143-d95f-4602-b464-1149ede62269",
Expand Down Expand Up @@ -310,6 +319,15 @@ export const WorkspaceTimingsResponse: WorkspaceBuildTimings = {
action: "building terraform dependency graph",
resource: "state file",
},
{
job_id: "86fd4143-d95f-4602-b464-1149ede62269",
started_at: "2024-10-14T11:30:50.161398Z",
ended_at: "2024-10-14T11:30:53.993767Z",
stage: "apply",
source: "coder",
action: "terraform",
resource: "coder_stage_apply",
},
{
job_id: "86fd4143-d95f-4602-b464-1149ede62269",
started_at: "2024-10-14T11:30:50.861398Z",
Expand Down
Loading