Skip to content

Commit 6d41bfa

Browse files
authored
fix: improve http connection pooling for smtp notifications (#20605)
This change updates how SMTP notifications are polled during scale tests. Before, each of the ~2,000 pollers created its own http.Client, which opened thousands of short-lived TCP connections. Under heavy load, this ran out of available network ports and caused errors like `connect: cannot assign requested address` Now, all pollers share one HTTP connection pool. This prevents port exhaustion and makes polling faster and more stable. If a network error happens, the poller will now retry instead of stopping, so tests keep running until all notifications are received. The `SMTPRequestTimeout` is now applied per request using a context, instead of being set on the `http.Client`.
1 parent bc4838d commit 6d41bfa

File tree

4 files changed

+30
-6
lines changed

4 files changed

+30
-6
lines changed

cli/exp_scaletest_notifications.go

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -142,6 +142,15 @@ func (r *RootCmd) scaletestNotifications() *serpent.Command {
142142
triggerTimes[id] = make(chan time.Time, 1)
143143
}
144144

145+
smtpHTTPTransport := &http.Transport{
146+
MaxConnsPerHost: 512,
147+
MaxIdleConnsPerHost: 512,
148+
IdleConnTimeout: 60 * time.Second,
149+
}
150+
smtpHTTPClient := &http.Client{
151+
Transport: smtpHTTPTransport,
152+
}
153+
145154
configs := make([]notifications.Config, 0, userCount)
146155
for range templateAdminCount {
147156
config := notifications.Config{
@@ -157,6 +166,7 @@ func (r *RootCmd) scaletestNotifications() *serpent.Command {
157166
Metrics: metrics,
158167
SMTPApiURL: smtpAPIURL,
159168
SMTPRequestTimeout: smtpRequestTimeout,
169+
SMTPHttpClient: smtpHTTPClient,
160170
}
161171
if err := config.Validate(); err != nil {
162172
return xerrors.Errorf("validate config: %w", err)

scaletest/notifications/config.go

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
package notifications
22

33
import (
4+
"net/http"
45
"sync"
56
"time"
67

@@ -40,6 +41,9 @@ type Config struct {
4041

4142
// SMTPRequestTimeout is the timeout for SMTP requests.
4243
SMTPRequestTimeout time.Duration `json:"smtp_request_timeout"`
44+
45+
// SMTPHttpClient is the HTTP client for SMTP requests.
46+
SMTPHttpClient *http.Client `json:"-"`
4347
}
4448

4549
func (c Config) Validate() error {
@@ -68,6 +72,10 @@ func (c Config) Validate() error {
6872
return xerrors.New("smtp_request_timeout must be set if smtp_api_url is set")
6973
}
7074

75+
if c.SMTPApiURL != "" && c.SMTPHttpClient == nil {
76+
return xerrors.New("smtp_http_client must be set if smtp_api_url is set")
77+
}
78+
7179
if c.DialTimeout <= 0 {
7280
return xerrors.New("dial_timeout must be greater than 0")
7381
}

scaletest/notifications/run.go

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -298,15 +298,16 @@ func (r *Runner) watchNotificationsSMTP(ctx context.Context, user codersdk.User,
298298
receivedNotifications := make(map[uuid.UUID]struct{})
299299

300300
apiURL := fmt.Sprintf("%s/messages?email=%s", r.cfg.SMTPApiURL, user.Email)
301-
httpClient := &http.Client{
302-
Timeout: r.cfg.SMTPRequestTimeout,
303-
}
301+
httpClient := r.cfg.SMTPHttpClient
304302

305303
const smtpPollInterval = 2 * time.Second
306304
done := xerrors.New("done")
307305

308306
tkr := r.clock.TickerFunc(ctx, smtpPollInterval, func() error {
309-
req, err := http.NewRequestWithContext(ctx, http.MethodGet, apiURL, nil)
307+
reqCtx, cancel := context.WithTimeout(ctx, r.cfg.SMTPRequestTimeout)
308+
defer cancel()
309+
310+
req, err := http.NewRequestWithContext(reqCtx, http.MethodGet, apiURL, nil)
310311
if err != nil {
311312
logger.Error(ctx, "create SMTP API request", slog.Error(err))
312313
r.cfg.Metrics.AddError("smtp_create_request")
@@ -317,14 +318,16 @@ func (r *Runner) watchNotificationsSMTP(ctx context.Context, user codersdk.User,
317318
if err != nil {
318319
logger.Error(ctx, "poll smtp api for notifications", slog.Error(err))
319320
r.cfg.Metrics.AddError("smtp_poll")
320-
return xerrors.Errorf("poll smtp api: %w", err)
321+
return nil
321322
}
322323

323324
if resp.StatusCode != http.StatusOK {
325+
// discard the response to allow reusing of the connection
326+
_, _ = io.Copy(io.Discard, resp.Body)
324327
_ = resp.Body.Close()
325328
logger.Error(ctx, "smtp api returned non-200 status", slog.F("status", resp.StatusCode))
326329
r.cfg.Metrics.AddError("smtp_bad_status")
327-
return xerrors.Errorf("smtp api returned status %d", resp.StatusCode)
330+
return nil
328331
}
329332

330333
var summaries []smtpmock.EmailSummary

scaletest/notifications/run_test.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -212,6 +212,8 @@ func TestRunWithSMTP(t *testing.T) {
212212
smtpTrap := mClock.Trap().TickerFunc("smtp")
213213
defer smtpTrap.Close()
214214

215+
httpClient := &http.Client{}
216+
215217
// Start receiving runners who will receive notifications
216218
receivingRunners := make([]*notifications.Runner, 0, numReceivingUsers)
217219
for i := range numReceivingUsers {
@@ -229,6 +231,7 @@ func TestRunWithSMTP(t *testing.T) {
229231
ExpectedNotificationsIDs: expectedNotificationsIDs,
230232
SMTPApiURL: smtpAPIServer.URL,
231233
SMTPRequestTimeout: testutil.WaitLong,
234+
SMTPHttpClient: httpClient,
232235
}
233236
err := runnerCfg.Validate()
234237
require.NoError(t, err)

0 commit comments

Comments
 (0)