Skip to content

Commit df8eeec

Browse files
committed
feat(agent): add boundary log forwarding to coderd
Add a feature that transmits boundary audit logs from workspaces to coderd via the agent API, then re-emits them to stderr in a structured format. The implementation includes: - BoundaryLog proto messages and ReportBoundaryLogs RPC (API v2.7) - BoundaryLogProxy server that accepts connections from boundary processes on a Unix socket and forwards logs to coderd - Server-side handler that formats logs to stderr - Environment variables CODER_BOUNDARY_LOG_SOCKET and CODER_WORKSPACE_ID automatically set for all commands in the workspace Architecture: - Boundary process connects to Unix socket at $CODER_BOUNDARY_LOG_SOCKET - Sends length-prefixed protobuf ReportBoundaryLogsRequest messages - Agent proxies messages to coderd via DRPC - coderd re-emits to stderr Log format: [API] 2025-12-08 20:58:46.093 [warn] boundary: workspace.id=... decision=deny http.method="GET" http.url="..." time="..."
1 parent 4379230 commit df8eeec

File tree

14 files changed

+1749
-2226
lines changed

14 files changed

+1749
-2226
lines changed

agent/agent.go

Lines changed: 89 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@ import (
4343
"github.com/coder/coder/v2/agent/agentscripts"
4444
"github.com/coder/coder/v2/agent/agentsocket"
4545
"github.com/coder/coder/v2/agent/agentssh"
46+
"github.com/coder/coder/v2/agent/boundarylogproxy"
4647
"github.com/coder/coder/v2/agent/proto"
4748
"github.com/coder/coder/v2/agent/proto/resourcesmonitor"
4849
"github.com/coder/coder/v2/agent/reconnectingpty"
@@ -103,8 +104,8 @@ type Options struct {
103104
}
104105

105106
type Client interface {
106-
ConnectRPC26(ctx context.Context) (
107-
proto.DRPCAgentClient26, tailnetproto.DRPCTailnetClient26, error,
107+
ConnectRPC27(ctx context.Context) (
108+
proto.DRPCAgentClient27, tailnetproto.DRPCTailnetClient26, error,
108109
)
109110
tailnet.DERPMapRewriter
110111
agentsdk.RefreshableSessionTokenProvider
@@ -114,6 +115,9 @@ type Agent interface {
114115
HTTPDebug() http.Handler
115116
// TailnetConn may be nil.
116117
TailnetConn() *tailnet.Conn
118+
// BoundaryLogSocketPath returns the path to the Unix socket where boundary
119+
// processes can send audit logs. Returns empty string if not yet available.
120+
BoundaryLogSocketPath() string
117121
io.Closer
118122
}
119123

@@ -275,6 +279,11 @@ type agent struct {
275279

276280
logSender *agentsdk.LogSender
277281

282+
boundaryLogProxy *boundarylogproxy.Server
283+
boundaryLogProxyMu sync.Mutex
284+
boundaryLogAPIMu sync.RWMutex
285+
boundaryLogAPI proto.DRPCAgentClient27
286+
278287
prometheusRegistry *prometheus.Registry
279288
// metrics are prometheus registered metrics that will be collected and
280289
// labeled in Coder with the agent + workspace.
@@ -296,6 +305,17 @@ func (a *agent) TailnetConn() *tailnet.Conn {
296305
return a.network
297306
}
298307

308+
// BoundaryLogSocketPath returns the path to the Unix socket for boundary logs.
309+
// Boundary processes should connect to this socket to send audit logs.
310+
func (a *agent) BoundaryLogSocketPath() string {
311+
a.boundaryLogProxyMu.Lock()
312+
defer a.boundaryLogProxyMu.Unlock()
313+
if a.boundaryLogProxy == nil {
314+
return ""
315+
}
316+
return a.boundaryLogProxy.SocketPath()
317+
}
318+
299319
func (a *agent) init() {
300320
// pass the "hard" context because we explicitly close the SSH server as part of graceful shutdown.
301321
sshSrv, err := agentssh.NewServer(a.hardCtx, a.logger.Named("ssh-server"), a.prometheusRegistry, a.filesystem, a.execer, &agentssh.Config{
@@ -393,6 +413,39 @@ func (a *agent) initSocketServer() {
393413
a.logger.Debug(a.hardCtx, "socket server started", slog.F("path", a.socketPath))
394414
}
395415

416+
// startBoundaryLogProxy starts the boundary log proxy server which accepts
417+
// connections from boundary processes and forwards audit logs to coderd.
418+
func (a *agent) startBoundaryLogProxy(workspaceID uuid.UUID) {
419+
a.boundaryLogProxyMu.Lock()
420+
defer a.boundaryLogProxyMu.Unlock()
421+
422+
if a.boundaryLogProxy != nil {
423+
// Already started.
424+
return
425+
}
426+
427+
// Create socket path in temp directory.
428+
socketPath := filepath.Join(a.tempDir, "boundary-logs.sock")
429+
430+
// Create the sender function that returns the current API client.
431+
sender := func() boundarylogproxy.Sender {
432+
a.boundaryLogAPIMu.RLock()
433+
defer a.boundaryLogAPIMu.RUnlock()
434+
return a.boundaryLogAPI
435+
}
436+
437+
proxy := boundarylogproxy.NewServer(a.logger, socketPath, sender)
438+
if err := proxy.Start(a.hardCtx); err != nil {
439+
a.logger.Warn(a.hardCtx, "failed to start boundary log proxy", slog.Error(err))
440+
return
441+
}
442+
443+
a.boundaryLogProxy = proxy
444+
a.logger.Info(a.hardCtx, "boundary log proxy started",
445+
slog.F("socket_path", socketPath),
446+
slog.F("workspace_id", workspaceID))
447+
}
448+
396449
// runLoop attempts to start the agent in a retry loop.
397450
// Coder may be offline temporarily, a connection issue
398451
// may be happening, but regardless after the intermittent
@@ -503,7 +556,7 @@ func (t *trySingleflight) Do(key string, fn func()) {
503556
fn()
504557
}
505558

506-
func (a *agent) reportMetadata(ctx context.Context, aAPI proto.DRPCAgentClient26) error {
559+
func (a *agent) reportMetadata(ctx context.Context, aAPI proto.DRPCAgentClient27) error {
507560
tickerDone := make(chan struct{})
508561
collectDone := make(chan struct{})
509562
ctx, cancel := context.WithCancel(ctx)
@@ -718,7 +771,7 @@ func (a *agent) reportMetadata(ctx context.Context, aAPI proto.DRPCAgentClient26
718771

719772
// reportLifecycle reports the current lifecycle state once. All state
720773
// changes are reported in order.
721-
func (a *agent) reportLifecycle(ctx context.Context, aAPI proto.DRPCAgentClient26) error {
774+
func (a *agent) reportLifecycle(ctx context.Context, aAPI proto.DRPCAgentClient27) error {
722775
for {
723776
select {
724777
case <-a.lifecycleUpdate:
@@ -798,7 +851,7 @@ func (a *agent) setLifecycle(state codersdk.WorkspaceAgentLifecycle) {
798851
}
799852

800853
// reportConnectionsLoop reports connections to the agent for auditing.
801-
func (a *agent) reportConnectionsLoop(ctx context.Context, aAPI proto.DRPCAgentClient26) error {
854+
func (a *agent) reportConnectionsLoop(ctx context.Context, aAPI proto.DRPCAgentClient27) error {
802855
for {
803856
select {
804857
case <-a.reportConnectionsUpdate:
@@ -929,7 +982,7 @@ func (a *agent) reportConnection(id uuid.UUID, connectionType proto.Connection_T
929982
// fetchServiceBannerLoop fetches the service banner on an interval. It will
930983
// not be fetched immediately; the expectation is that it is primed elsewhere
931984
// (and must be done before the session actually starts).
932-
func (a *agent) fetchServiceBannerLoop(ctx context.Context, aAPI proto.DRPCAgentClient26) error {
985+
func (a *agent) fetchServiceBannerLoop(ctx context.Context, aAPI proto.DRPCAgentClient27) error {
933986
ticker := time.NewTicker(a.announcementBannersRefreshInterval)
934987
defer ticker.Stop()
935988
for {
@@ -964,7 +1017,7 @@ func (a *agent) run() (retErr error) {
9641017
}
9651018

9661019
// ConnectRPC returns the dRPC connection we use for the Agent and Tailnet v2+ APIs
967-
aAPI, tAPI, err := a.client.ConnectRPC26(a.hardCtx)
1020+
aAPI, tAPI, err := a.client.ConnectRPC27(a.hardCtx)
9681021
if err != nil {
9691022
return err
9701023
}
@@ -981,7 +1034,7 @@ func (a *agent) run() (retErr error) {
9811034
connMan := newAPIConnRoutineManager(a.gracefulCtx, a.hardCtx, a.logger, aAPI, tAPI)
9821035

9831036
connMan.startAgentAPI("init notification banners", gracefulShutdownBehaviorStop,
984-
func(ctx context.Context, aAPI proto.DRPCAgentClient26) error {
1037+
func(ctx context.Context, aAPI proto.DRPCAgentClient27) error {
9851038
bannersProto, err := aAPI.GetAnnouncementBanners(ctx, &proto.GetAnnouncementBannersRequest{})
9861039
if err != nil {
9871040
return xerrors.Errorf("fetch service banner: %w", err)
@@ -998,7 +1051,7 @@ func (a *agent) run() (retErr error) {
9981051
// sending logs gets gracefulShutdownBehaviorRemain because we want to send logs generated by
9991052
// shutdown scripts.
10001053
connMan.startAgentAPI("send logs", gracefulShutdownBehaviorRemain,
1001-
func(ctx context.Context, aAPI proto.DRPCAgentClient26) error {
1054+
func(ctx context.Context, aAPI proto.DRPCAgentClient27) error {
10021055
err := a.logSender.SendLoop(ctx, aAPI)
10031056
if xerrors.Is(err, agentsdk.ErrLogLimitExceeded) {
10041057
// we don't want this error to tear down the API connection and propagate to the
@@ -1009,6 +1062,12 @@ func (a *agent) run() (retErr error) {
10091062
return err
10101063
})
10111064

1065+
1066+
// Set the API client for the boundary log proxy so it can forward logs.
1067+
a.boundaryLogAPIMu.Lock()
1068+
a.boundaryLogAPI = aAPI
1069+
a.boundaryLogAPIMu.Unlock()
1070+
10121071
// part of graceful shut down is reporting the final lifecycle states, e.g "ShuttingDown" so the
10131072
// lifecycle reporting has to be via gracefulShutdownBehaviorRemain
10141073
connMan.startAgentAPI("report lifecycle", gracefulShutdownBehaviorRemain, a.reportLifecycle)
@@ -1017,7 +1076,7 @@ func (a *agent) run() (retErr error) {
10171076
connMan.startAgentAPI("report metadata", gracefulShutdownBehaviorStop, a.reportMetadata)
10181077

10191078
// resources monitor can cease as soon as we start gracefully shutting down.
1020-
connMan.startAgentAPI("resources monitor", gracefulShutdownBehaviorStop, func(ctx context.Context, aAPI proto.DRPCAgentClient26) error {
1079+
connMan.startAgentAPI("resources monitor", gracefulShutdownBehaviorStop, func(ctx context.Context, aAPI proto.DRPCAgentClient27) error {
10211080
logger := a.logger.Named("resources_monitor")
10221081
clk := quartz.NewReal()
10231082
config, err := aAPI.GetResourcesMonitoringConfiguration(ctx, &proto.GetResourcesMonitoringConfigurationRequest{})
@@ -1064,7 +1123,7 @@ func (a *agent) run() (retErr error) {
10641123
connMan.startAgentAPI("handle manifest", gracefulShutdownBehaviorStop, a.handleManifest(manifestOK))
10651124

10661125
connMan.startAgentAPI("app health reporter", gracefulShutdownBehaviorStop,
1067-
func(ctx context.Context, aAPI proto.DRPCAgentClient26) error {
1126+
func(ctx context.Context, aAPI proto.DRPCAgentClient27) error {
10681127
if err := manifestOK.wait(ctx); err != nil {
10691128
return xerrors.Errorf("no manifest: %w", err)
10701129
}
@@ -1097,7 +1156,7 @@ func (a *agent) run() (retErr error) {
10971156

10981157
connMan.startAgentAPI("fetch service banner loop", gracefulShutdownBehaviorStop, a.fetchServiceBannerLoop)
10991158

1100-
connMan.startAgentAPI("stats report loop", gracefulShutdownBehaviorStop, func(ctx context.Context, aAPI proto.DRPCAgentClient26) error {
1159+
connMan.startAgentAPI("stats report loop", gracefulShutdownBehaviorStop, func(ctx context.Context, aAPI proto.DRPCAgentClient27) error {
11011160
if err := networkOK.wait(ctx); err != nil {
11021161
return xerrors.Errorf("no network: %w", err)
11031162
}
@@ -1112,8 +1171,8 @@ func (a *agent) run() (retErr error) {
11121171
}
11131172

11141173
// handleManifest returns a function that fetches and processes the manifest
1115-
func (a *agent) handleManifest(manifestOK *checkpoint) func(ctx context.Context, aAPI proto.DRPCAgentClient26) error {
1116-
return func(ctx context.Context, aAPI proto.DRPCAgentClient26) error {
1174+
func (a *agent) handleManifest(manifestOK *checkpoint) func(ctx context.Context, aAPI proto.DRPCAgentClient27) error {
1175+
return func(ctx context.Context, aAPI proto.DRPCAgentClient27) error {
11171176
var (
11181177
sentResult = false
11191178
err error
@@ -1183,6 +1242,10 @@ func (a *agent) handleManifest(manifestOK *checkpoint) func(ctx context.Context,
11831242
if oldManifest == nil {
11841243
a.setLifecycle(codersdk.WorkspaceAgentLifecycleStarting)
11851244

1245+
// Start the boundary log proxy server. This accepts connections from
1246+
// boundary processes and forwards their audit logs to coderd.
1247+
a.startBoundaryLogProxy(manifest.WorkspaceID)
1248+
11861249
// Perform overrides early so that Git auth can work even if users
11871250
// connect to a workspace that is not yet ready. We don't run this
11881251
// concurrently with the startup script to avoid conflicts between
@@ -1276,7 +1339,7 @@ func (a *agent) handleManifest(manifestOK *checkpoint) func(ctx context.Context,
12761339

12771340
func (a *agent) createDevcontainer(
12781341
ctx context.Context,
1279-
aAPI proto.DRPCAgentClient26,
1342+
aAPI proto.DRPCAgentClient27,
12801343
dc codersdk.WorkspaceAgentDevcontainer,
12811344
script codersdk.WorkspaceAgentScript,
12821345
) (err error) {
@@ -1308,8 +1371,8 @@ func (a *agent) createDevcontainer(
13081371

13091372
// createOrUpdateNetwork waits for the manifest to be set using manifestOK, then creates or updates
13101373
// the tailnet using the information in the manifest
1311-
func (a *agent) createOrUpdateNetwork(manifestOK, networkOK *checkpoint) func(context.Context, proto.DRPCAgentClient26) error {
1312-
return func(ctx context.Context, aAPI proto.DRPCAgentClient26) (retErr error) {
1374+
func (a *agent) createOrUpdateNetwork(manifestOK, networkOK *checkpoint) func(context.Context, proto.DRPCAgentClient27) error {
1375+
return func(ctx context.Context, aAPI proto.DRPCAgentClient27) (retErr error) {
13131376
if err := manifestOK.wait(ctx); err != nil {
13141377
return xerrors.Errorf("no manifest: %w", err)
13151378
}
@@ -1398,6 +1461,7 @@ func (a *agent) updateCommandEnv(current []string) (updated []string, err error)
13981461
"CODER_WORKSPACE_NAME": manifest.WorkspaceName,
13991462
"CODER_WORKSPACE_AGENT_NAME": manifest.AgentName,
14001463
"CODER_WORKSPACE_OWNER_NAME": manifest.OwnerName,
1464+
"CODER_WORKSPACE_ID": manifest.WorkspaceID.String(),
14011465

14021466
// Specific Coder subcommands require the agent token exposed!
14031467
"CODER_AGENT_TOKEN": a.client.GetSessionToken(),
@@ -1409,6 +1473,11 @@ func (a *agent) updateCommandEnv(current []string) (updated []string, err error)
14091473
"CS_DISABLE_GETTING_STARTED_OVERRIDE": "true",
14101474
}
14111475

1476+
// Add boundary log socket path if the proxy is running.
1477+
if socketPath := a.BoundaryLogSocketPath(); socketPath != "" {
1478+
envs["CODER_BOUNDARY_LOG_SOCKET"] = socketPath
1479+
}
1480+
14121481
// This adds the ports dialog to code-server that enables
14131482
// proxying a port dynamically.
14141483
// If this is empty string, do not set anything. Code-server auto defaults
@@ -2095,7 +2164,7 @@ const (
20952164

20962165
type apiConnRoutineManager struct {
20972166
logger slog.Logger
2098-
aAPI proto.DRPCAgentClient26
2167+
aAPI proto.DRPCAgentClient27
20992168
tAPI tailnetproto.DRPCTailnetClient24
21002169
eg *errgroup.Group
21012170
stopCtx context.Context
@@ -2104,7 +2173,7 @@ type apiConnRoutineManager struct {
21042173

21052174
func newAPIConnRoutineManager(
21062175
gracefulCtx, hardCtx context.Context, logger slog.Logger,
2107-
aAPI proto.DRPCAgentClient26, tAPI tailnetproto.DRPCTailnetClient24,
2176+
aAPI proto.DRPCAgentClient27, tAPI tailnetproto.DRPCTailnetClient24,
21082177
) *apiConnRoutineManager {
21092178
// routines that remain in operation during graceful shutdown use the remainCtx. They'll still
21102179
// exit if the errgroup hits an error, which usually means a problem with the conn.
@@ -2137,7 +2206,7 @@ func newAPIConnRoutineManager(
21372206
// but for Tailnet.
21382207
func (a *apiConnRoutineManager) startAgentAPI(
21392208
name string, behavior gracefulShutdownBehavior,
2140-
f func(context.Context, proto.DRPCAgentClient26) error,
2209+
f func(context.Context, proto.DRPCAgentClient27) error,
21412210
) {
21422211
logger := a.logger.With(slog.F("name", name))
21432212
var ctx context.Context

agent/agentcontainers/subagent.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -147,12 +147,12 @@ type SubAgentClient interface {
147147
// agent API client.
148148
type subAgentAPIClient struct {
149149
logger slog.Logger
150-
api agentproto.DRPCAgentClient26
150+
api agentproto.DRPCAgentClient27
151151
}
152152

153153
var _ SubAgentClient = (*subAgentAPIClient)(nil)
154154

155-
func NewSubAgentClientFromAPI(logger slog.Logger, agentAPI agentproto.DRPCAgentClient26) SubAgentClient {
155+
func NewSubAgentClientFromAPI(logger slog.Logger, agentAPI agentproto.DRPCAgentClient27) SubAgentClient {
156156
if agentAPI == nil {
157157
panic("developer error: agentAPI cannot be nil")
158158
}

agent/agentcontainers/subagent_test.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -81,7 +81,7 @@ func TestSubAgentClient_CreateWithDisplayApps(t *testing.T) {
8181

8282
agentAPI := agenttest.NewClient(t, logger, uuid.New(), agentsdk.Manifest{}, statsCh, tailnet.NewCoordinator(logger))
8383

84-
agentClient, _, err := agentAPI.ConnectRPC26(ctx)
84+
agentClient, _, err := agentAPI.ConnectRPC27(ctx)
8585
require.NoError(t, err)
8686

8787
subAgentClient := agentcontainers.NewSubAgentClientFromAPI(logger, agentClient)
@@ -245,7 +245,7 @@ func TestSubAgentClient_CreateWithDisplayApps(t *testing.T) {
245245

246246
agentAPI := agenttest.NewClient(t, logger, uuid.New(), agentsdk.Manifest{}, statsCh, tailnet.NewCoordinator(logger))
247247

248-
agentClient, _, err := agentAPI.ConnectRPC26(ctx)
248+
agentClient, _, err := agentAPI.ConnectRPC27(ctx)
249249
require.NoError(t, err)
250250

251251
subAgentClient := agentcontainers.NewSubAgentClientFromAPI(logger, agentClient)

agent/agenttest/client.go

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -124,8 +124,8 @@ func (c *Client) Close() {
124124
c.derpMapOnce.Do(func() { close(c.derpMapUpdates) })
125125
}
126126

127-
func (c *Client) ConnectRPC26(ctx context.Context) (
128-
agentproto.DRPCAgentClient26, proto.DRPCTailnetClient26, error,
127+
func (c *Client) ConnectRPC27(ctx context.Context) (
128+
agentproto.DRPCAgentClient27, proto.DRPCTailnetClient26, error,
129129
) {
130130
conn, lis := drpcsdk.MemTransportPipe()
131131
c.LastWorkspaceAgent = func() {
@@ -405,6 +405,10 @@ func (f *FakeAgentAPI) ReportConnection(_ context.Context, req *agentproto.Repor
405405
return &emptypb.Empty{}, nil
406406
}
407407

408+
409+
func (f *FakeAgentAPI) ReportBoundaryLogs(_ context.Context, _ *agentproto.ReportBoundaryLogsRequest) (*agentproto.ReportBoundaryLogsResponse, error) {
410+
return &agentproto.ReportBoundaryLogsResponse{}, nil
411+
}
408412
func (f *FakeAgentAPI) GetConnectionReports() []*agentproto.ReportConnectionRequest {
409413
f.Lock()
410414
defer f.Unlock()

0 commit comments

Comments
 (0)