Skip to content

Commit acd9bed

Browse files
author
Callum Styan
committed
feat: add Prometheus metrics for render cache observability
Adds cache hits, misses, and size metrics to track render cache performance and enable monitoring of the cache's effectiveness in reducing expensive terraform parsing operations. Metrics added: - coderd_prebuilds_render_cache_hits_total: Counter for cache hits - coderd_prebuilds_render_cache_misses_total: Counter for cache misses - coderd_prebuilds_render_cache_size_entries: Gauge for current cache size The metrics are optional and only created when a Prometheus registerer is provided to the reconciler.
1 parent 72d711d commit acd9bed

File tree

3 files changed

+177
-1
lines changed

3 files changed

+177
-1
lines changed

coderd/dynamicparameters/rendercache.go

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ import (
77
"sync"
88

99
"github.com/google/uuid"
10+
"github.com/prometheus/client_golang/prometheus"
1011

1112
"github.com/coder/preview"
1213
)
@@ -16,6 +17,11 @@ import (
1617
type RenderCache struct {
1718
mu sync.RWMutex
1819
entries map[cacheKey]*preview.Output
20+
21+
// Metrics (optional)
22+
cacheHits prometheus.Counter
23+
cacheMisses prometheus.Counter
24+
cacheSize prometheus.Gauge
1925
}
2026

2127
type cacheKey struct {
@@ -31,6 +37,16 @@ func NewRenderCache() *RenderCache {
3137
}
3238
}
3339

40+
// NewRenderCacheWithMetrics creates a new render cache with Prometheus metrics.
41+
func NewRenderCacheWithMetrics(cacheHits, cacheMisses prometheus.Counter, cacheSize prometheus.Gauge) *RenderCache {
42+
return &RenderCache{
43+
entries: make(map[cacheKey]*preview.Output),
44+
cacheHits: cacheHits,
45+
cacheMisses: cacheMisses,
46+
cacheSize: cacheSize,
47+
}
48+
}
49+
3450
// NewRenderCacheForTest creates a new render cache for testing purposes.
3551
func NewRenderCacheForTest() *RenderCache {
3652
return NewRenderCache()
@@ -42,6 +58,18 @@ func (c *RenderCache) get(templateVersionID, ownerID uuid.UUID, parameters map[s
4258
defer c.mu.RUnlock()
4359

4460
output, ok := c.entries[key]
61+
62+
// Record metrics
63+
if ok {
64+
if c.cacheHits != nil {
65+
c.cacheHits.Inc()
66+
}
67+
} else {
68+
if c.cacheMisses != nil {
69+
c.cacheMisses.Inc()
70+
}
71+
}
72+
4573
return output, ok
4674
}
4775

@@ -51,6 +79,11 @@ func (c *RenderCache) put(templateVersionID, ownerID uuid.UUID, parameters map[s
5179
defer c.mu.Unlock()
5280

5381
c.entries[key] = output
82+
83+
// Update cache size metric
84+
if c.cacheSize != nil {
85+
c.cacheSize.Set(float64(len(c.entries)))
86+
}
5487
}
5588

5689
func (c *RenderCache) makeKey(templateVersionID, ownerID uuid.UUID, parameters map[string]string) cacheKey {

coderd/dynamicparameters/rendercache_test.go

Lines changed: 120 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,8 @@ import (
44
"testing"
55

66
"github.com/google/uuid"
7+
"github.com/prometheus/client_golang/prometheus"
8+
dto "github.com/prometheus/client_model/go"
79
"github.com/stretchr/testify/require"
810

911
"github.com/coder/preview"
@@ -147,3 +149,121 @@ func TestRenderCache_PrebuildScenario(t *testing.T) {
147149

148150
// All three prebuilds shared the same cache entry
149151
}
152+
153+
func TestRenderCache_Metrics(t *testing.T) {
154+
t.Parallel()
155+
156+
// Create test metrics
157+
cacheHits := &testCounter{}
158+
cacheMisses := &testCounter{}
159+
cacheSize := &testGauge{}
160+
161+
cache := NewRenderCacheWithMetrics(cacheHits, cacheMisses, cacheSize)
162+
templateVersionID := uuid.New()
163+
ownerID := uuid.New()
164+
params := map[string]string{"region": "us-west-2"}
165+
166+
// Initially: 0 hits, 0 misses, 0 size
167+
require.Equal(t, float64(0), cacheHits.value, "initial hits should be 0")
168+
require.Equal(t, float64(0), cacheMisses.value, "initial misses should be 0")
169+
require.Equal(t, float64(0), cacheSize.value, "initial size should be 0")
170+
171+
// First get - should be a miss
172+
_, ok := cache.get(templateVersionID, ownerID, params)
173+
require.False(t, ok)
174+
require.Equal(t, float64(0), cacheHits.value, "hits should still be 0")
175+
require.Equal(t, float64(1), cacheMisses.value, "misses should be 1")
176+
require.Equal(t, float64(0), cacheSize.value, "size should still be 0")
177+
178+
// Put an entry
179+
output := &preview.Output{}
180+
cache.put(templateVersionID, ownerID, params, output)
181+
require.Equal(t, float64(1), cacheSize.value, "size should be 1 after put")
182+
183+
// Second get - should be a hit
184+
_, ok = cache.get(templateVersionID, ownerID, params)
185+
require.True(t, ok)
186+
require.Equal(t, float64(1), cacheHits.value, "hits should be 1")
187+
require.Equal(t, float64(1), cacheMisses.value, "misses should still be 1")
188+
require.Equal(t, float64(1), cacheSize.value, "size should still be 1")
189+
190+
// Third get - another hit
191+
_, ok = cache.get(templateVersionID, ownerID, params)
192+
require.True(t, ok)
193+
require.Equal(t, float64(2), cacheHits.value, "hits should be 2")
194+
require.Equal(t, float64(1), cacheMisses.value, "misses should still be 1")
195+
196+
// Put another entry with different params
197+
params2 := map[string]string{"region": "us-east-1"}
198+
cache.put(templateVersionID, ownerID, params2, output)
199+
require.Equal(t, float64(2), cacheSize.value, "size should be 2 after second put")
200+
201+
// Get with different params - should be a hit
202+
_, ok = cache.get(templateVersionID, ownerID, params2)
203+
require.True(t, ok)
204+
require.Equal(t, float64(3), cacheHits.value, "hits should be 3")
205+
require.Equal(t, float64(1), cacheMisses.value, "misses should still be 1")
206+
}
207+
208+
// Test implementations of prometheus interfaces
209+
type testCounter struct {
210+
value float64
211+
}
212+
213+
func (c *testCounter) Inc() {
214+
c.value++
215+
}
216+
217+
func (c *testCounter) Add(v float64) {
218+
c.value += v
219+
}
220+
221+
func (c *testCounter) Desc() *prometheus.Desc {
222+
return nil
223+
}
224+
225+
func (c *testCounter) Write(*dto.Metric) error {
226+
return nil
227+
}
228+
229+
func (c *testCounter) Describe(chan<- *prometheus.Desc) {}
230+
231+
func (c *testCounter) Collect(chan<- prometheus.Metric) {}
232+
233+
type testGauge struct {
234+
value float64
235+
}
236+
237+
func (g *testGauge) Set(v float64) {
238+
g.value = v
239+
}
240+
241+
func (g *testGauge) Inc() {
242+
g.value++
243+
}
244+
245+
func (g *testGauge) Dec() {
246+
g.value--
247+
}
248+
249+
func (g *testGauge) Add(v float64) {
250+
g.value += v
251+
}
252+
253+
func (g *testGauge) Sub(v float64) {
254+
g.value -= v
255+
}
256+
257+
func (g *testGauge) SetToCurrentTime() {}
258+
259+
func (g *testGauge) Desc() *prometheus.Desc {
260+
return nil
261+
}
262+
263+
func (g *testGauge) Write(*dto.Metric) error {
264+
return nil
265+
}
266+
267+
func (g *testGauge) Describe(chan<- *prometheus.Desc) {}
268+
269+
func (g *testGauge) Collect(chan<- prometheus.Metric) {}

enterprise/coderd/prebuilds/reconcile.go

Lines changed: 24 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -106,7 +106,6 @@ func NewStoreReconciler(store database.Store,
106106
buildUsageChecker: buildUsageChecker,
107107
done: make(chan struct{}, 1),
108108
provisionNotifyCh: make(chan database.ProvisionerJob, 10),
109-
renderCache: dynamicparameters.NewRenderCache(),
110109
}
111110

112111
if registerer != nil {
@@ -124,6 +123,30 @@ func NewStoreReconciler(store database.Store,
124123
Help: "Duration of each prebuilds reconciliation cycle.",
125124
Buckets: prometheus.DefBuckets,
126125
})
126+
127+
// Create metrics for the render cache
128+
renderCacheHits := factory.NewCounter(prometheus.CounterOpts{
129+
Namespace: "coderd",
130+
Subsystem: "prebuilds",
131+
Name: "render_cache_hits_total",
132+
Help: "Total number of render cache hits.",
133+
})
134+
renderCacheMisses := factory.NewCounter(prometheus.CounterOpts{
135+
Namespace: "coderd",
136+
Subsystem: "prebuilds",
137+
Name: "render_cache_misses_total",
138+
Help: "Total number of render cache misses.",
139+
})
140+
renderCacheSize := factory.NewGauge(prometheus.GaugeOpts{
141+
Namespace: "coderd",
142+
Subsystem: "prebuilds",
143+
Name: "render_cache_size_entries",
144+
Help: "Current number of entries in the render cache.",
145+
})
146+
147+
reconciler.renderCache = dynamicparameters.NewRenderCacheWithMetrics(renderCacheHits, renderCacheMisses, renderCacheSize)
148+
} else {
149+
reconciler.renderCache = dynamicparameters.NewRenderCache()
127150
}
128151

129152
return reconciler

0 commit comments

Comments
 (0)