From e60da7821df52e3e975e80b698e6d71ee936504a Mon Sep 17 00:00:00 2001 From: Danny Kopping Date: Thu, 4 Dec 2025 10:53:29 +0200 Subject: [PATCH 01/10] chore: abstract pg test logic, increase runner sizes Signed-off-by: Danny Kopping --- .github/actions/test-go-pg/action.yaml | 79 +++++++++++ .github/workflows/ci.yaml | 182 ++++++++++++++----------- 2 files changed, 180 insertions(+), 81 deletions(-) create mode 100644 .github/actions/test-go-pg/action.yaml diff --git a/.github/actions/test-go-pg/action.yaml b/.github/actions/test-go-pg/action.yaml new file mode 100644 index 0000000000000..9fdd109c21f21 --- /dev/null +++ b/.github/actions/test-go-pg/action.yaml @@ -0,0 +1,79 @@ +name: "Test Go with PostgreSQL" +description: "Run Go tests with PostgreSQL database" + +inputs: + postgres-version: + description: "PostgreSQL version to use" + required: false + default: "13" + test-parallelism-packages: + description: "Number of packages to test in parallel (-p flag)" + required: false + default: "8" + test-parallelism-tests: + description: "Number of tests to run in parallel within each package (-parallel flag)" + required: false + default: "8" + race-detection: + description: "Enable race detection" + required: false + default: "false" + test-count: + description: "Number of times to run each test (empty for cached results)" + required: false + default: "" + test-packages: + description: "Packages to test (default: ./...)" + required: false + default: "./..." + embedded-pg-path: + description: "Path for embedded postgres data (Windows/macOS only)" + required: false + default: "" + embedded-pg-cache: + description: "Path for embedded postgres cache (Windows/macOS only)" + required: false + default: "" + +runs: + using: "composite" + steps: + - name: Start PostgreSQL Docker container (Linux) + if: runner.os == 'Linux' + shell: bash + env: + POSTGRES_VERSION: ${{ inputs.postgres-version }} + run: make test-postgres-docker + + - name: Setup Embedded Postgres (Windows/macOS) + if: runner.os != 'Linux' + shell: bash + env: + POSTGRES_VERSION: ${{ inputs.postgres-version }} + EMBEDDED_PG_PATH: ${{ inputs.embedded-pg-path }} + EMBEDDED_PG_CACHE_DIR: ${{ inputs.embedded-pg-cache }} + run: | + go run scripts/embedded-pg/main.go -path "${EMBEDDED_PG_PATH}" -cache "${EMBEDDED_PG_CACHE_DIR}" + + - name: Run tests + shell: bash + env: + TEST_NUM_PARALLEL_PACKAGES: ${{ inputs.test-parallelism-packages }} + TEST_NUM_PARALLEL_TESTS: ${{ inputs.test-parallelism-tests }} + TEST_COUNT: ${{ inputs.test-count }} + TEST_PACKAGES: ${{ inputs.test-packages }} + TS_DEBUG_DISCO: "true" + LC_CTYPE: "en_US.UTF-8" + LC_ALL: "en_US.UTF-8" + run: | + set -o errexit + set -o pipefail + + if [ "${{ inputs.race-detection }}" == "true" ]; then + gotestsum --junitfile="gotests.xml" --packages="${TEST_PACKAGES}" -- \ + -race \ + -parallel "${TEST_NUM_PARALLEL_TESTS}" \ + -p "${TEST_NUM_PARALLEL_PACKAGES}" + else + make test + fi diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index d61a214cdb4ff..82b86240202eb 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -327,7 +327,7 @@ jobs: test-go-pg: # make sure to adjust NUM_PARALLEL_PACKAGES and NUM_PARALLEL_TESTS below # when changing runner sizes - runs-on: ${{ matrix.os == 'ubuntu-latest' && github.repository_owner == 'coder' && 'depot-ubuntu-22.04-8' || matrix.os && matrix.os == 'macos-latest' && github.repository_owner == 'coder' && 'depot-macos-latest' || matrix.os == 'windows-2022' && github.repository_owner == 'coder' && 'depot-windows-2022-16' || matrix.os }} + runs-on: ${{ matrix.os == 'ubuntu-latest' && github.repository_owner == 'coder' && 'depot-ubuntu-22.04-16' || matrix.os && matrix.os == 'macos-latest' && github.repository_owner == 'coder' && 'depot-macos-latest' || matrix.os == 'windows-2022' && github.repository_owner == 'coder' && 'depot-windows-2022-16' || matrix.os }} needs: changes if: needs.changes.outputs.go == 'true' || needs.changes.outputs.ci == 'true' || github.ref == 'refs/heads/main' # This timeout must be greater than the timeout set by `go test` in @@ -416,82 +416,79 @@ jobs: find . -type f ! -path ./.git/\*\* | mtimehash find . -type d ! -path ./.git/\*\* -exec touch -t 200601010000 {} + - - name: Test with PostgreSQL Database - env: - POSTGRES_VERSION: "13" - TS_DEBUG_DISCO: "true" - LC_CTYPE: "en_US.UTF-8" - LC_ALL: "en_US.UTF-8" + - name: Normalize Terraform Path for Caching shell: bash + # Terraform gets installed in a random directory, so we need to normalize + # the path or many cached tests will be invalidated. run: | - set -o errexit - set -o pipefail - - if [ "$RUNNER_OS" == "Windows" ]; then - # Create a temp dir on the R: ramdisk drive for Windows. The default - # C: drive is extremely slow: https://github.com/actions/runner-images/issues/8755 - mkdir -p "R:/temp/embedded-pg" - go run scripts/embedded-pg/main.go -path "R:/temp/embedded-pg" -cache "${EMBEDDED_PG_CACHE_DIR}" - elif [ "$RUNNER_OS" == "macOS" ]; then - # Postgres runs faster on a ramdisk on macOS too - mkdir -p /tmp/tmpfs - sudo mount_tmpfs -o noowners -s 8g /tmp/tmpfs - go run scripts/embedded-pg/main.go -path /tmp/tmpfs/embedded-pg -cache "${EMBEDDED_PG_CACHE_DIR}" - elif [ "$RUNNER_OS" == "Linux" ]; then - make test-postgres-docker - fi + mkdir -p "$RUNNER_TEMP/sym" + source scripts/normalize_path.sh + normalize_path_with_symlinks "$RUNNER_TEMP/sym" "$(dirname "$(which terraform)")" - # if macOS, install google-chrome for scaletests - # As another concern, should we really have this kind of external dependency - # requirement on standard CI? - if [ "${RUNNER_OS}" == "macOS" ]; then - brew install google-chrome - fi + - name: Setup RAM disk for Embedded Postgres (Windows) + if: runner.os == 'Windows' + shell: bash + # The default C: drive is extremely slow: + # https://github.com/actions/runner-images/issues/8755 + run: mkdir -p "R:/temp/embedded-pg" - # macOS will output "The default interactive shell is now zsh" - # intermittently in CI... - if [ "${RUNNER_OS}" == "macOS" ]; then - touch ~/.bash_profile && echo "export BASH_SILENCE_DEPRECATION_WARNING=1" >> ~/.bash_profile - fi + - name: Setup RAM disk for Embedded Postgres (macOS) + if: runner.os == 'macOS' + shell: bash + run: | + # Postgres runs faster on a ramdisk on macOS. + mkdir -p /tmp/tmpfs + sudo mount_tmpfs -o noowners -s 8g /tmp/tmpfs - if [ "${RUNNER_OS}" == "Windows" ]; then - # Our Windows runners have 16 cores. - # On Windows Postgres chokes up when we have 16x16=256 tests - # running in parallel, and dbtestutil.NewDB starts to take more than - # 10s to complete sometimes causing test timeouts. With 16x8=128 tests - # Postgres tends not to choke. - export TEST_NUM_PARALLEL_PACKAGES=8 - export TEST_NUM_PARALLEL_TESTS=16 - # Only the CLI and Agent are officially supported on Windows and the rest are too flaky - export TEST_PACKAGES="./cli/... ./enterprise/cli/... ./agent/..." - elif [ "${RUNNER_OS}" == "macOS" ]; then - # Our macOS runners have 8 cores. We set NUM_PARALLEL_TESTS to 16 - # because the tests complete faster and Postgres doesn't choke. It seems - # that macOS's tmpfs is faster than the one on Windows. - export TEST_NUM_PARALLEL_PACKAGES=8 - export TEST_NUM_PARALLEL_TESTS=16 - # Only the CLI and Agent are officially supported on macOS and the rest are too flaky - export TEST_PACKAGES="./cli/... ./enterprise/cli/... ./agent/..." - elif [ "${RUNNER_OS}" == "Linux" ]; then - # Our Linux runners have 8 cores. - export TEST_NUM_PARALLEL_PACKAGES=8 - export TEST_NUM_PARALLEL_TESTS=8 - fi + # Install google-chrome for scaletests. + # As another concern, should we really have this kind of external dependency + # requirement on standard CI? + brew install google-chrome - # by default, run tests with cache - if [ "${GITHUB_REF}" == "refs/heads/main" ]; then - # on main, run tests without cache - export TEST_COUNT="1" - fi + # macOS will output "The default interactive shell is now zsh" intermittently in CI. + touch ~/.bash_profile && echo "export BASH_SILENCE_DEPRECATION_WARNING=1" >> ~/.bash_profile - mkdir -p "$RUNNER_TEMP/sym" - source scripts/normalize_path.sh - # terraform gets installed in a random directory, so we need to normalize - # the path to the terraform binary or a bunch of cached tests will be - # invalidated. See scripts/normalize_path.sh for more details. - normalize_path_with_symlinks "$RUNNER_TEMP/sym" "$(dirname "$(which terraform)")" + - name: Test with PostgreSQL Database (Linux) + if: runner.os == 'Linux' + uses: ./.github/actions/test-go-pg + with: + postgres-version: "13" + # Our Linux runners have 16 cores. + test-parallelism-packages: "16" + test-parallelism-tests: "8" + # By default, run tests with cache. On main, run tests without cache. + test-count: ${{ github.ref == 'refs/heads/main' && '1' || '' }} - make test + - name: Test with PostgreSQL Database (macOS) + if: runner.os == 'macOS' + uses: ./.github/actions/test-go-pg + with: + postgres-version: "13" + # Our macOS runners have 8 cores. + test-parallelism-packages: "8" + test-parallelism-tests: "16" + # By default, run tests with cache. On main, run tests without cache. + test-count: ${{ github.ref == 'refs/heads/main' && '1' || '' }} + # Only the CLI and Agent are officially supported on macOS; the rest are too flaky. + test-packages: "./cli/... ./enterprise/cli/... ./agent/..." + embedded-pg-path: "/tmp/tmpfs/embedded-pg" + embedded-pg-cache: ${{ steps.embedded-pg-cache.outputs.embedded-pg-cache }} + + - name: Test with PostgreSQL Database (Windows) + if: runner.os == 'Windows' + uses: ./.github/actions/test-go-pg + with: + postgres-version: "13" + # Our Windows runners have 16 cores. On Windows Postgres chokes when + # we have 16x16=256 tests running in parallel, so we use 8x16=128. + test-parallelism-packages: "8" + test-parallelism-tests: "16" + # By default, run tests with cache. On main, run tests without cache. + test-count: ${{ github.ref == 'refs/heads/main' && '1' || '' }} + # Only the CLI and Agent are officially supported on Windows; the rest are too flaky. + test-packages: "./cli/... ./enterprise/cli/... ./agent/..." + embedded-pg-path: "R:/temp/embedded-pg" + embedded-pg-cache: ${{ steps.embedded-pg-cache.outputs.embedded-pg-cache }} - name: Upload failed test db dumps uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # v5.0.0 @@ -521,7 +518,7 @@ jobs: api-key: ${{ secrets.DATADOG_API_KEY }} test-go-pg-17: - runs-on: ${{ github.repository_owner == 'coder' && 'depot-ubuntu-22.04-8' || 'ubuntu-latest' }} + runs-on: ${{ github.repository_owner == 'coder' && 'depot-ubuntu-22.04-16' || 'ubuntu-latest' }} needs: - changes if: needs.changes.outputs.go == 'true' || needs.changes.outputs.ci == 'true' || github.ref == 'refs/heads/main' @@ -554,12 +551,24 @@ jobs: with: key-prefix: test-go-pg-17-${{ runner.os }}-${{ runner.arch }} - - name: Test with PostgreSQL Database - env: - POSTGRES_VERSION: "17" - TS_DEBUG_DISCO: "true" + - name: Normalize Terraform Path for Caching + shell: bash + # Terraform gets installed in a random directory, so we need to normalize + # the path or many cached tests will be invalidated. run: | - make test-postgres + mkdir -p "$RUNNER_TEMP/sym" + source scripts/normalize_path.sh + normalize_path_with_symlinks "$RUNNER_TEMP/sym" "$(dirname "$(which terraform)")" + + - name: Test with PostgreSQL Database + uses: ./.github/actions/test-go-pg + with: + postgres-version: "17" + # Our Linux runners have 16 cores. + test-parallelism-packages: "16" + test-parallelism-tests: "8" + # By default, run tests with cache. On main, run tests without cache. + test-count: ${{ github.ref == 'refs/heads/main' && '1' || '' }} - name: Upload Test Cache uses: ./.github/actions/test-cache/upload @@ -575,7 +584,7 @@ jobs: api-key: ${{ secrets.DATADOG_API_KEY }} test-go-race-pg: - runs-on: ${{ github.repository_owner == 'coder' && 'depot-ubuntu-22.04-16' || 'ubuntu-latest' }} + runs-on: ${{ github.repository_owner == 'coder' && 'depot-ubuntu-22.04-32' || 'ubuntu-latest' }} needs: changes if: needs.changes.outputs.go == 'true' || needs.changes.outputs.ci == 'true' || github.ref == 'refs/heads/main' timeout-minutes: 25 @@ -603,16 +612,27 @@ jobs: with: key-prefix: test-go-race-pg-${{ runner.os }}-${{ runner.arch }} + - name: Normalize Terraform Path for Caching + shell: bash + # Terraform gets installed in a random directory, so we need to normalize + # the path or many cached tests will be invalidated. + run: | + mkdir -p "$RUNNER_TEMP/sym" + source scripts/normalize_path.sh + normalize_path_with_symlinks "$RUNNER_TEMP/sym" "$(dirname "$(which terraform)")" + # We run race tests with reduced parallelism because they use more CPU and we were finding # instances where tests appear to hang for multiple seconds, resulting in flaky tests when # short timeouts are used. # c.f. discussion on https://github.com/coder/coder/pull/15106 + # Our Linux runners have 32 cores, but we use reduced parallelism for race detection. - name: Run Tests - env: - POSTGRES_VERSION: "17" - run: | - make test-postgres-docker - gotestsum --junitfile="gotests.xml" --packages="./..." -- -race -parallel 4 -p 4 + uses: ./.github/actions/test-go-pg + with: + postgres-version: "17" + test-parallelism-packages: "16" + test-parallelism-tests: "8" + race-detection: "true" - name: Upload Test Cache uses: ./.github/actions/test-cache/upload From 86246ba4064b50007a07aca56ce289ae6fb11244 Mon Sep 17 00:00:00 2001 From: Danny Kopping Date: Thu, 4 Dec 2025 11:28:39 +0200 Subject: [PATCH 02/10] chore: make lint Signed-off-by: Danny Kopping --- .github/actions/test-go-pg/action.yaml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/actions/test-go-pg/action.yaml b/.github/actions/test-go-pg/action.yaml index 9fdd109c21f21..c36926204df0f 100644 --- a/.github/actions/test-go-pg/action.yaml +++ b/.github/actions/test-go-pg/action.yaml @@ -62,6 +62,7 @@ runs: TEST_NUM_PARALLEL_TESTS: ${{ inputs.test-parallelism-tests }} TEST_COUNT: ${{ inputs.test-count }} TEST_PACKAGES: ${{ inputs.test-packages }} + RACE_DETECTION: ${{ inputs.race-detection }} TS_DEBUG_DISCO: "true" LC_CTYPE: "en_US.UTF-8" LC_ALL: "en_US.UTF-8" @@ -69,7 +70,7 @@ runs: set -o errexit set -o pipefail - if [ "${{ inputs.race-detection }}" == "true" ]; then + if [ "${RACE_DETECTION}" == "true" ]; then gotestsum --junitfile="gotests.xml" --packages="${TEST_PACKAGES}" -- \ -race \ -parallel "${TEST_NUM_PARALLEL_TESTS}" \ From 211e7276b52d0e0d6a7d296e1f89c824b82f11d5 Mon Sep 17 00:00:00 2001 From: Danny Kopping Date: Thu, 4 Dec 2025 11:38:04 +0200 Subject: [PATCH 03/10] chore: bump windows Signed-off-by: Danny Kopping --- .github/workflows/ci.yaml | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 82b86240202eb..0abb799117e20 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -327,7 +327,7 @@ jobs: test-go-pg: # make sure to adjust NUM_PARALLEL_PACKAGES and NUM_PARALLEL_TESTS below # when changing runner sizes - runs-on: ${{ matrix.os == 'ubuntu-latest' && github.repository_owner == 'coder' && 'depot-ubuntu-22.04-16' || matrix.os && matrix.os == 'macos-latest' && github.repository_owner == 'coder' && 'depot-macos-latest' || matrix.os == 'windows-2022' && github.repository_owner == 'coder' && 'depot-windows-2022-16' || matrix.os }} + runs-on: ${{ matrix.os == 'ubuntu-latest' && github.repository_owner == 'coder' && 'depot-ubuntu-22.04-16' || matrix.os && matrix.os == 'macos-latest' && github.repository_owner == 'coder' && 'depot-macos-latest' || matrix.os == 'windows-2022' && github.repository_owner == 'coder' && 'depot-windows-2022-32' || matrix.os }} needs: changes if: needs.changes.outputs.go == 'true' || needs.changes.outputs.ci == 'true' || github.ref == 'refs/heads/main' # This timeout must be greater than the timeout set by `go test` in @@ -479,9 +479,10 @@ jobs: uses: ./.github/actions/test-go-pg with: postgres-version: "13" - # Our Windows runners have 16 cores. On Windows Postgres chokes when - # we have 16x16=256 tests running in parallel, so we use 8x16=128. - test-parallelism-packages: "8" + # Our Windows runners have 32 cores. On Windows Postgres chokes when + # we have 16x16=256 tests running in parallel, so we use 16x16=256 + # but may need to reduce if issues arise. + test-parallelism-packages: "16" test-parallelism-tests: "16" # By default, run tests with cache. On main, run tests without cache. test-count: ${{ github.ref == 'refs/heads/main' && '1' || '' }} From 4af14305963e715682ba3f288b7302234c0ebbff Mon Sep 17 00:00:00 2001 From: Danny Kopping Date: Thu, 11 Dec 2025 06:57:11 +0200 Subject: [PATCH 04/10] chore: fix nonsensical Windows comment & add more detail to other comments Signed-off-by: Danny Kopping --- .github/workflows/ci.yaml | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 0abb799117e20..6b48551b17f99 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -456,7 +456,8 @@ jobs: # Our Linux runners have 16 cores. test-parallelism-packages: "16" test-parallelism-tests: "8" - # By default, run tests with cache. On main, run tests without cache. + # By default, run tests with cache for improved speed (possibly at the expense of correctness). + # On main, run tests without cache for the inverse. test-count: ${{ github.ref == 'refs/heads/main' && '1' || '' }} - name: Test with PostgreSQL Database (macOS) @@ -467,7 +468,8 @@ jobs: # Our macOS runners have 8 cores. test-parallelism-packages: "8" test-parallelism-tests: "16" - # By default, run tests with cache. On main, run tests without cache. + # By default, run tests with cache for improved speed (possibly at the expense of correctness). + # On main, run tests without cache for the inverse. test-count: ${{ github.ref == 'refs/heads/main' && '1' || '' }} # Only the CLI and Agent are officially supported on macOS; the rest are too flaky. test-packages: "./cli/... ./enterprise/cli/... ./agent/..." @@ -479,12 +481,11 @@ jobs: uses: ./.github/actions/test-go-pg with: postgres-version: "13" - # Our Windows runners have 32 cores. On Windows Postgres chokes when - # we have 16x16=256 tests running in parallel, so we use 16x16=256 - # but may need to reduce if issues arise. - test-parallelism-packages: "16" + # Our Windows runners have 32 cores. + test-parallelism-packages: "32" test-parallelism-tests: "16" - # By default, run tests with cache. On main, run tests without cache. + # By default, run tests with cache for improved speed (possibly at the expense of correctness). + # On main, run tests without cache for the inverse. test-count: ${{ github.ref == 'refs/heads/main' && '1' || '' }} # Only the CLI and Agent are officially supported on Windows; the rest are too flaky. test-packages: "./cli/... ./enterprise/cli/... ./agent/..." @@ -568,7 +569,8 @@ jobs: # Our Linux runners have 16 cores. test-parallelism-packages: "16" test-parallelism-tests: "8" - # By default, run tests with cache. On main, run tests without cache. + # By default, run tests with cache for improved speed (possibly at the expense of correctness). + # On main, run tests without cache for the inverse. test-count: ${{ github.ref == 'refs/heads/main' && '1' || '' }} - name: Upload Test Cache From 7040c95ba8059f2d805cc84131c9cf2d10ba6f8c Mon Sep 17 00:00:00 2001 From: Danny Kopping Date: Thu, 11 Dec 2025 07:05:33 +0200 Subject: [PATCH 05/10] chore: max 1 test per core Signed-off-by: Danny Kopping --- .github/workflows/ci.yaml | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 6b48551b17f99..ae7fc0feea24d 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -628,13 +628,14 @@ jobs: # instances where tests appear to hang for multiple seconds, resulting in flaky tests when # short timeouts are used. # c.f. discussion on https://github.com/coder/coder/pull/15106 - # Our Linux runners have 32 cores, but we use reduced parallelism for race detection. + # Our Linux runners have 32 cores, but we reduce parallelism since race detection adds a lot of overhead. + # We aim to have parallelism match CPU count (8*4=32) to avoid making flakes worse. - name: Run Tests uses: ./.github/actions/test-go-pg with: postgres-version: "17" - test-parallelism-packages: "16" - test-parallelism-tests: "8" + test-parallelism-packages: "8" + test-parallelism-tests: "4" race-detection: "true" - name: Upload Test Cache From 6cd78b779403ee5a2c8ac642ed548a8a39e46ef5 Mon Sep 17 00:00:00 2001 From: Danny Kopping Date: Thu, 11 Dec 2025 07:07:17 +0200 Subject: [PATCH 06/10] chore: bash improvements Signed-off-by: Danny Kopping --- .github/actions/test-go-pg/action.yaml | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/.github/actions/test-go-pg/action.yaml b/.github/actions/test-go-pg/action.yaml index c36926204df0f..5f19da6910822 100644 --- a/.github/actions/test-go-pg/action.yaml +++ b/.github/actions/test-go-pg/action.yaml @@ -67,10 +67,9 @@ runs: LC_CTYPE: "en_US.UTF-8" LC_ALL: "en_US.UTF-8" run: | - set -o errexit - set -o pipefail + set -euo pipefail - if [ "${RACE_DETECTION}" == "true" ]; then + if [[ ${RACE_DETECTION} == true ]]; then gotestsum --junitfile="gotests.xml" --packages="${TEST_PACKAGES}" -- \ -race \ -parallel "${TEST_NUM_PARALLEL_TESTS}" \ From 6c691c42ff4169496e532561135c4ba6833e007e Mon Sep 17 00:00:00 2001 From: Danny Kopping Date: Thu, 11 Dec 2025 07:29:13 +0200 Subject: [PATCH 07/10] chore: align nightly-gauntlet.yaml with ci.yaml, only run mac/windows tests on main Signed-off-by: Danny Kopping --- .github/workflows/ci.yaml | 4 +- .github/workflows/nightly-gauntlet.yaml | 104 +++++++++--------------- 2 files changed, 39 insertions(+), 69 deletions(-) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index ae7fc0feea24d..88c8f70a0e559 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -461,7 +461,7 @@ jobs: test-count: ${{ github.ref == 'refs/heads/main' && '1' || '' }} - name: Test with PostgreSQL Database (macOS) - if: runner.os == 'macOS' + if: runner.os == 'macOS' && github.ref == 'refs/heads/main' uses: ./.github/actions/test-go-pg with: postgres-version: "13" @@ -477,7 +477,7 @@ jobs: embedded-pg-cache: ${{ steps.embedded-pg-cache.outputs.embedded-pg-cache }} - name: Test with PostgreSQL Database (Windows) - if: runner.os == 'Windows' + if: runner.os == 'Windows' && github.ref == 'refs/heads/main' uses: ./.github/actions/test-go-pg with: postgres-version: "13" diff --git a/.github/workflows/nightly-gauntlet.yaml b/.github/workflows/nightly-gauntlet.yaml index f02a0afcc0650..661aa708d6150 100644 --- a/.github/workflows/nightly-gauntlet.yaml +++ b/.github/workflows/nightly-gauntlet.yaml @@ -1,9 +1,9 @@ -# The nightly-gauntlet runs tests that are either too flaky or too slow to block -# every PR. +# The nightly-gauntlet runs the full test suite on macOS and Windows. +# This complements ci.yaml which only runs a subset of packages on these platforms. name: nightly-gauntlet on: schedule: - # Every day at 4AM + # Every day at 4AM UTC on weekdays - cron: "0 4 * * 1-5" workflow_dispatch: @@ -21,6 +21,7 @@ jobs: # even if some of the preceding steps are slow. timeout-minutes: 25 strategy: + fail-fast: false matrix: os: - macos-latest @@ -80,75 +81,44 @@ jobs: key-prefix: embedded-pg-${{ runner.os }}-${{ runner.arch }} cache-path: ${{ steps.embedded-pg-cache.outputs.cached-dirs }} - - name: Test with PostgreSQL Database - env: - POSTGRES_VERSION: "13" - TS_DEBUG_DISCO: "true" - LC_CTYPE: "en_US.UTF-8" - LC_ALL: "en_US.UTF-8" + - name: Setup RAM disk for Embedded Postgres (Windows) + if: runner.os == 'Windows' shell: bash - run: | - set -o errexit - set -o pipefail - - if [ "${{ runner.os }}" == "Windows" ]; then - # Create a temp dir on the R: ramdisk drive for Windows. The default - # C: drive is extremely slow: https://github.com/actions/runner-images/issues/8755 - mkdir -p "R:/temp/embedded-pg" - go run scripts/embedded-pg/main.go -path "R:/temp/embedded-pg" -cache "${EMBEDDED_PG_CACHE_DIR}" - elif [ "${{ runner.os }}" == "macOS" ]; then - # Postgres runs faster on a ramdisk on macOS too - mkdir -p /tmp/tmpfs - sudo mount_tmpfs -o noowners -s 8g /tmp/tmpfs - go run scripts/embedded-pg/main.go -path /tmp/tmpfs/embedded-pg -cache "${EMBEDDED_PG_CACHE_DIR}" - elif [ "${{ runner.os }}" == "Linux" ]; then - make test-postgres-docker - fi + run: mkdir -p "R:/temp/embedded-pg" - # if macOS, install google-chrome for scaletests - # As another concern, should we really have this kind of external dependency - # requirement on standard CI? - if [ "${{ matrix.os }}" == "macos-latest" ]; then - brew install google-chrome - fi - - # macOS will output "The default interactive shell is now zsh" - # intermittently in CI... - if [ "${{ matrix.os }}" == "macos-latest" ]; then - touch ~/.bash_profile && echo "export BASH_SILENCE_DEPRECATION_WARNING=1" >> ~/.bash_profile - fi - - if [ "${{ runner.os }}" == "Windows" ]; then - # Our Windows runners have 16 cores. - # On Windows Postgres chokes up when we have 16x16=256 tests - # running in parallel, and dbtestutil.NewDB starts to take more than - # 10s to complete sometimes causing test timeouts. With 16x8=128 tests - # Postgres tends not to choke. - NUM_PARALLEL_PACKAGES=8 - NUM_PARALLEL_TESTS=16 - elif [ "${{ runner.os }}" == "macOS" ]; then - # Our macOS runners have 8 cores. We set NUM_PARALLEL_TESTS to 16 - # because the tests complete faster and Postgres doesn't choke. It seems - # that macOS's tmpfs is faster than the one on Windows. - NUM_PARALLEL_PACKAGES=8 - NUM_PARALLEL_TESTS=16 - elif [ "${{ runner.os }}" == "Linux" ]; then - # Our Linux runners have 8 cores. - NUM_PARALLEL_PACKAGES=8 - NUM_PARALLEL_TESTS=8 - fi - - # run tests without cache - TESTCOUNT="-count=1" + - name: Setup RAM disk for Embedded Postgres (macOS) + if: runner.os == 'macOS' + shell: bash + run: | + mkdir -p /tmp/tmpfs + sudo mount_tmpfs -o noowners -s 8g /tmp/tmpfs - DB=ci gotestsum \ - --format standard-quiet --packages "./..." \ - -- -timeout=20m -v -p "$NUM_PARALLEL_PACKAGES" -parallel="$NUM_PARALLEL_TESTS" "$TESTCOUNT" + - name: Test with PostgreSQL Database (macOS) + if: runner.os == 'macOS' + uses: ./.github/actions/test-go-pg + with: + postgres-version: "13" + # Our macOS runners have 8 cores. + test-parallelism-packages: "8" + test-parallelism-tests: "16" + test-count: "1" + embedded-pg-path: "/tmp/tmpfs/embedded-pg" + embedded-pg-cache: ${{ steps.embedded-pg-cache.outputs.embedded-pg-cache }} + + - name: Test with PostgreSQL Database (Windows) + if: runner.os == 'Windows' + uses: ./.github/actions/test-go-pg + with: + postgres-version: "13" + # Our Windows runners have 16 cores. + test-parallelism-packages: "8" + test-parallelism-tests: "16" + test-count: "1" + embedded-pg-path: "R:/temp/embedded-pg" + embedded-pg-cache: ${{ steps.embedded-pg-cache.outputs.embedded-pg-cache }} - name: Upload Embedded Postgres Cache uses: ./.github/actions/embedded-pg-cache/upload - # We only use the embedded Postgres cache on macOS and Windows runners. - if: runner.OS == 'macOS' || runner.OS == 'Windows' with: cache-key: ${{ steps.download-embedded-pg-cache.outputs.cache-key }} cache-path: "${{ steps.embedded-pg-cache.outputs.embedded-pg-cache }}" @@ -165,7 +135,7 @@ jobs: needs: - test-go-pg runs-on: ubuntu-latest - if: failure() && github.ref == 'refs/heads/main' + if: failure() steps: - name: Send Slack notification From f4d2a44e424a6f4a84771b63eb8b21e54ec07d6a Mon Sep 17 00:00:00 2001 From: Danny Kopping Date: Thu, 11 Dec 2025 07:48:33 +0200 Subject: [PATCH 08/10] chore: only run mac/windows jobs on main Signed-off-by: Danny Kopping --- .github/workflows/ci.yaml | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 88c8f70a0e559..ebf28065fb382 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -325,6 +325,7 @@ jobs: run: ./scripts/check_unstaged.sh test-go-pg: + name: test-go-pg (${{ matrix.os }}) # make sure to adjust NUM_PARALLEL_PACKAGES and NUM_PARALLEL_TESTS below # when changing runner sizes runs-on: ${{ matrix.os == 'ubuntu-latest' && github.repository_owner == 'coder' && 'depot-ubuntu-22.04-16' || matrix.os && matrix.os == 'macos-latest' && github.repository_owner == 'coder' && 'depot-macos-latest' || matrix.os == 'windows-2022' && github.repository_owner == 'coder' && 'depot-windows-2022-32' || matrix.os }} @@ -341,6 +342,17 @@ jobs: - ubuntu-latest - macos-latest - windows-2022 + # Use the GitHub ref to determine if we should run on all OSes. + # Evaluates to true on main, false otherwise. + isMain: + - ${{ github.ref == 'refs/heads/main' }} + # macOS and Windows are tested on main only to improve CI speed, and we don't see much use on these platforms. + # See nightly-gauntlet.yaml for comprehensive macOS/Windows coverage. + exclude: + - isMain: false + os: macos-latest + - isMain: false + os: windows-2022 steps: - name: Harden Runner uses: step-security/harden-runner@95d9a5deda9de15063e7595e9719c11c38c90ae2 # v2.13.2 @@ -461,7 +473,7 @@ jobs: test-count: ${{ github.ref == 'refs/heads/main' && '1' || '' }} - name: Test with PostgreSQL Database (macOS) - if: runner.os == 'macOS' && github.ref == 'refs/heads/main' + if: runner.os == 'macOS' uses: ./.github/actions/test-go-pg with: postgres-version: "13" @@ -477,7 +489,7 @@ jobs: embedded-pg-cache: ${{ steps.embedded-pg-cache.outputs.embedded-pg-cache }} - name: Test with PostgreSQL Database (Windows) - if: runner.os == 'Windows' && github.ref == 'refs/heads/main' + if: runner.os == 'Windows' uses: ./.github/actions/test-go-pg with: postgres-version: "13" From 9f29ad08382de070540e8cca64ae862750895c7d Mon Sep 17 00:00:00 2001 From: Danny Kopping Date: Thu, 11 Dec 2025 11:33:08 +0200 Subject: [PATCH 09/10] chore: restore mac/windows steps on PRs Signed-off-by: Danny Kopping --- .github/workflows/ci.yaml | 13 +------------ 1 file changed, 1 insertion(+), 12 deletions(-) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index ebf28065fb382..0b055c42ad49d 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -325,7 +325,6 @@ jobs: run: ./scripts/check_unstaged.sh test-go-pg: - name: test-go-pg (${{ matrix.os }}) # make sure to adjust NUM_PARALLEL_PACKAGES and NUM_PARALLEL_TESTS below # when changing runner sizes runs-on: ${{ matrix.os == 'ubuntu-latest' && github.repository_owner == 'coder' && 'depot-ubuntu-22.04-16' || matrix.os && matrix.os == 'macos-latest' && github.repository_owner == 'coder' && 'depot-macos-latest' || matrix.os == 'windows-2022' && github.repository_owner == 'coder' && 'depot-windows-2022-32' || matrix.os }} @@ -337,22 +336,12 @@ jobs: # even if some of the preceding steps are slow. timeout-minutes: 25 strategy: + fail-fast: false matrix: os: - ubuntu-latest - macos-latest - windows-2022 - # Use the GitHub ref to determine if we should run on all OSes. - # Evaluates to true on main, false otherwise. - isMain: - - ${{ github.ref == 'refs/heads/main' }} - # macOS and Windows are tested on main only to improve CI speed, and we don't see much use on these platforms. - # See nightly-gauntlet.yaml for comprehensive macOS/Windows coverage. - exclude: - - isMain: false - os: macos-latest - - isMain: false - os: windows-2022 steps: - name: Harden Runner uses: step-security/harden-runner@95d9a5deda9de15063e7595e9719c11c38c90ae2 # v2.13.2 From 983515fac0a2e92ac5d39c0a1fd8b365662bbe29 Mon Sep 17 00:00:00 2001 From: Danny Kopping Date: Thu, 11 Dec 2025 11:57:57 +0200 Subject: [PATCH 10/10] chore: document high macos parallelism Signed-off-by: Danny Kopping --- .github/workflows/ci.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 0b055c42ad49d..68494f3d21cc1 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -467,6 +467,8 @@ jobs: with: postgres-version: "13" # Our macOS runners have 8 cores. + # Even though this parallelism seems high, we've observed relatively low flakiness in the past. + # See https://github.com/coder/coder/pull/21091#discussion_r2609891540. test-parallelism-packages: "8" test-parallelism-tests: "16" # By default, run tests with cache for improved speed (possibly at the expense of correctness).