diff --git a/.github/actions/test-go-pg/action.yaml b/.github/actions/test-go-pg/action.yaml new file mode 100644 index 0000000000000..5f19da6910822 --- /dev/null +++ b/.github/actions/test-go-pg/action.yaml @@ -0,0 +1,79 @@ +name: "Test Go with PostgreSQL" +description: "Run Go tests with PostgreSQL database" + +inputs: + postgres-version: + description: "PostgreSQL version to use" + required: false + default: "13" + test-parallelism-packages: + description: "Number of packages to test in parallel (-p flag)" + required: false + default: "8" + test-parallelism-tests: + description: "Number of tests to run in parallel within each package (-parallel flag)" + required: false + default: "8" + race-detection: + description: "Enable race detection" + required: false + default: "false" + test-count: + description: "Number of times to run each test (empty for cached results)" + required: false + default: "" + test-packages: + description: "Packages to test (default: ./...)" + required: false + default: "./..." + embedded-pg-path: + description: "Path for embedded postgres data (Windows/macOS only)" + required: false + default: "" + embedded-pg-cache: + description: "Path for embedded postgres cache (Windows/macOS only)" + required: false + default: "" + +runs: + using: "composite" + steps: + - name: Start PostgreSQL Docker container (Linux) + if: runner.os == 'Linux' + shell: bash + env: + POSTGRES_VERSION: ${{ inputs.postgres-version }} + run: make test-postgres-docker + + - name: Setup Embedded Postgres (Windows/macOS) + if: runner.os != 'Linux' + shell: bash + env: + POSTGRES_VERSION: ${{ inputs.postgres-version }} + EMBEDDED_PG_PATH: ${{ inputs.embedded-pg-path }} + EMBEDDED_PG_CACHE_DIR: ${{ inputs.embedded-pg-cache }} + run: | + go run scripts/embedded-pg/main.go -path "${EMBEDDED_PG_PATH}" -cache "${EMBEDDED_PG_CACHE_DIR}" + + - name: Run tests + shell: bash + env: + TEST_NUM_PARALLEL_PACKAGES: ${{ inputs.test-parallelism-packages }} + TEST_NUM_PARALLEL_TESTS: ${{ inputs.test-parallelism-tests }} + TEST_COUNT: ${{ inputs.test-count }} + TEST_PACKAGES: ${{ inputs.test-packages }} + RACE_DETECTION: ${{ inputs.race-detection }} + TS_DEBUG_DISCO: "true" + LC_CTYPE: "en_US.UTF-8" + LC_ALL: "en_US.UTF-8" + run: | + set -euo pipefail + + if [[ ${RACE_DETECTION} == true ]]; then + gotestsum --junitfile="gotests.xml" --packages="${TEST_PACKAGES}" -- \ + -race \ + -parallel "${TEST_NUM_PARALLEL_TESTS}" \ + -p "${TEST_NUM_PARALLEL_PACKAGES}" + else + make test + fi diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index d61a214cdb4ff..68494f3d21cc1 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -327,7 +327,7 @@ jobs: test-go-pg: # make sure to adjust NUM_PARALLEL_PACKAGES and NUM_PARALLEL_TESTS below # when changing runner sizes - runs-on: ${{ matrix.os == 'ubuntu-latest' && github.repository_owner == 'coder' && 'depot-ubuntu-22.04-8' || matrix.os && matrix.os == 'macos-latest' && github.repository_owner == 'coder' && 'depot-macos-latest' || matrix.os == 'windows-2022' && github.repository_owner == 'coder' && 'depot-windows-2022-16' || matrix.os }} + runs-on: ${{ matrix.os == 'ubuntu-latest' && github.repository_owner == 'coder' && 'depot-ubuntu-22.04-16' || matrix.os && matrix.os == 'macos-latest' && github.repository_owner == 'coder' && 'depot-macos-latest' || matrix.os == 'windows-2022' && github.repository_owner == 'coder' && 'depot-windows-2022-32' || matrix.os }} needs: changes if: needs.changes.outputs.go == 'true' || needs.changes.outputs.ci == 'true' || github.ref == 'refs/heads/main' # This timeout must be greater than the timeout set by `go test` in @@ -336,6 +336,7 @@ jobs: # even if some of the preceding steps are slow. timeout-minutes: 25 strategy: + fail-fast: false matrix: os: - ubuntu-latest @@ -416,82 +417,83 @@ jobs: find . -type f ! -path ./.git/\*\* | mtimehash find . -type d ! -path ./.git/\*\* -exec touch -t 200601010000 {} + - - name: Test with PostgreSQL Database - env: - POSTGRES_VERSION: "13" - TS_DEBUG_DISCO: "true" - LC_CTYPE: "en_US.UTF-8" - LC_ALL: "en_US.UTF-8" + - name: Normalize Terraform Path for Caching shell: bash + # Terraform gets installed in a random directory, so we need to normalize + # the path or many cached tests will be invalidated. run: | - set -o errexit - set -o pipefail - - if [ "$RUNNER_OS" == "Windows" ]; then - # Create a temp dir on the R: ramdisk drive for Windows. The default - # C: drive is extremely slow: https://github.com/actions/runner-images/issues/8755 - mkdir -p "R:/temp/embedded-pg" - go run scripts/embedded-pg/main.go -path "R:/temp/embedded-pg" -cache "${EMBEDDED_PG_CACHE_DIR}" - elif [ "$RUNNER_OS" == "macOS" ]; then - # Postgres runs faster on a ramdisk on macOS too - mkdir -p /tmp/tmpfs - sudo mount_tmpfs -o noowners -s 8g /tmp/tmpfs - go run scripts/embedded-pg/main.go -path /tmp/tmpfs/embedded-pg -cache "${EMBEDDED_PG_CACHE_DIR}" - elif [ "$RUNNER_OS" == "Linux" ]; then - make test-postgres-docker - fi + mkdir -p "$RUNNER_TEMP/sym" + source scripts/normalize_path.sh + normalize_path_with_symlinks "$RUNNER_TEMP/sym" "$(dirname "$(which terraform)")" - # if macOS, install google-chrome for scaletests - # As another concern, should we really have this kind of external dependency - # requirement on standard CI? - if [ "${RUNNER_OS}" == "macOS" ]; then - brew install google-chrome - fi + - name: Setup RAM disk for Embedded Postgres (Windows) + if: runner.os == 'Windows' + shell: bash + # The default C: drive is extremely slow: + # https://github.com/actions/runner-images/issues/8755 + run: mkdir -p "R:/temp/embedded-pg" - # macOS will output "The default interactive shell is now zsh" - # intermittently in CI... - if [ "${RUNNER_OS}" == "macOS" ]; then - touch ~/.bash_profile && echo "export BASH_SILENCE_DEPRECATION_WARNING=1" >> ~/.bash_profile - fi + - name: Setup RAM disk for Embedded Postgres (macOS) + if: runner.os == 'macOS' + shell: bash + run: | + # Postgres runs faster on a ramdisk on macOS. + mkdir -p /tmp/tmpfs + sudo mount_tmpfs -o noowners -s 8g /tmp/tmpfs - if [ "${RUNNER_OS}" == "Windows" ]; then - # Our Windows runners have 16 cores. - # On Windows Postgres chokes up when we have 16x16=256 tests - # running in parallel, and dbtestutil.NewDB starts to take more than - # 10s to complete sometimes causing test timeouts. With 16x8=128 tests - # Postgres tends not to choke. - export TEST_NUM_PARALLEL_PACKAGES=8 - export TEST_NUM_PARALLEL_TESTS=16 - # Only the CLI and Agent are officially supported on Windows and the rest are too flaky - export TEST_PACKAGES="./cli/... ./enterprise/cli/... ./agent/..." - elif [ "${RUNNER_OS}" == "macOS" ]; then - # Our macOS runners have 8 cores. We set NUM_PARALLEL_TESTS to 16 - # because the tests complete faster and Postgres doesn't choke. It seems - # that macOS's tmpfs is faster than the one on Windows. - export TEST_NUM_PARALLEL_PACKAGES=8 - export TEST_NUM_PARALLEL_TESTS=16 - # Only the CLI and Agent are officially supported on macOS and the rest are too flaky - export TEST_PACKAGES="./cli/... ./enterprise/cli/... ./agent/..." - elif [ "${RUNNER_OS}" == "Linux" ]; then - # Our Linux runners have 8 cores. - export TEST_NUM_PARALLEL_PACKAGES=8 - export TEST_NUM_PARALLEL_TESTS=8 - fi + # Install google-chrome for scaletests. + # As another concern, should we really have this kind of external dependency + # requirement on standard CI? + brew install google-chrome - # by default, run tests with cache - if [ "${GITHUB_REF}" == "refs/heads/main" ]; then - # on main, run tests without cache - export TEST_COUNT="1" - fi + # macOS will output "The default interactive shell is now zsh" intermittently in CI. + touch ~/.bash_profile && echo "export BASH_SILENCE_DEPRECATION_WARNING=1" >> ~/.bash_profile - mkdir -p "$RUNNER_TEMP/sym" - source scripts/normalize_path.sh - # terraform gets installed in a random directory, so we need to normalize - # the path to the terraform binary or a bunch of cached tests will be - # invalidated. See scripts/normalize_path.sh for more details. - normalize_path_with_symlinks "$RUNNER_TEMP/sym" "$(dirname "$(which terraform)")" + - name: Test with PostgreSQL Database (Linux) + if: runner.os == 'Linux' + uses: ./.github/actions/test-go-pg + with: + postgres-version: "13" + # Our Linux runners have 16 cores. + test-parallelism-packages: "16" + test-parallelism-tests: "8" + # By default, run tests with cache for improved speed (possibly at the expense of correctness). + # On main, run tests without cache for the inverse. + test-count: ${{ github.ref == 'refs/heads/main' && '1' || '' }} - make test + - name: Test with PostgreSQL Database (macOS) + if: runner.os == 'macOS' + uses: ./.github/actions/test-go-pg + with: + postgres-version: "13" + # Our macOS runners have 8 cores. + # Even though this parallelism seems high, we've observed relatively low flakiness in the past. + # See https://github.com/coder/coder/pull/21091#discussion_r2609891540. + test-parallelism-packages: "8" + test-parallelism-tests: "16" + # By default, run tests with cache for improved speed (possibly at the expense of correctness). + # On main, run tests without cache for the inverse. + test-count: ${{ github.ref == 'refs/heads/main' && '1' || '' }} + # Only the CLI and Agent are officially supported on macOS; the rest are too flaky. + test-packages: "./cli/... ./enterprise/cli/... ./agent/..." + embedded-pg-path: "/tmp/tmpfs/embedded-pg" + embedded-pg-cache: ${{ steps.embedded-pg-cache.outputs.embedded-pg-cache }} + + - name: Test with PostgreSQL Database (Windows) + if: runner.os == 'Windows' + uses: ./.github/actions/test-go-pg + with: + postgres-version: "13" + # Our Windows runners have 32 cores. + test-parallelism-packages: "32" + test-parallelism-tests: "16" + # By default, run tests with cache for improved speed (possibly at the expense of correctness). + # On main, run tests without cache for the inverse. + test-count: ${{ github.ref == 'refs/heads/main' && '1' || '' }} + # Only the CLI and Agent are officially supported on Windows; the rest are too flaky. + test-packages: "./cli/... ./enterprise/cli/... ./agent/..." + embedded-pg-path: "R:/temp/embedded-pg" + embedded-pg-cache: ${{ steps.embedded-pg-cache.outputs.embedded-pg-cache }} - name: Upload failed test db dumps uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # v5.0.0 @@ -521,7 +523,7 @@ jobs: api-key: ${{ secrets.DATADOG_API_KEY }} test-go-pg-17: - runs-on: ${{ github.repository_owner == 'coder' && 'depot-ubuntu-22.04-8' || 'ubuntu-latest' }} + runs-on: ${{ github.repository_owner == 'coder' && 'depot-ubuntu-22.04-16' || 'ubuntu-latest' }} needs: - changes if: needs.changes.outputs.go == 'true' || needs.changes.outputs.ci == 'true' || github.ref == 'refs/heads/main' @@ -554,12 +556,25 @@ jobs: with: key-prefix: test-go-pg-17-${{ runner.os }}-${{ runner.arch }} - - name: Test with PostgreSQL Database - env: - POSTGRES_VERSION: "17" - TS_DEBUG_DISCO: "true" + - name: Normalize Terraform Path for Caching + shell: bash + # Terraform gets installed in a random directory, so we need to normalize + # the path or many cached tests will be invalidated. run: | - make test-postgres + mkdir -p "$RUNNER_TEMP/sym" + source scripts/normalize_path.sh + normalize_path_with_symlinks "$RUNNER_TEMP/sym" "$(dirname "$(which terraform)")" + + - name: Test with PostgreSQL Database + uses: ./.github/actions/test-go-pg + with: + postgres-version: "17" + # Our Linux runners have 16 cores. + test-parallelism-packages: "16" + test-parallelism-tests: "8" + # By default, run tests with cache for improved speed (possibly at the expense of correctness). + # On main, run tests without cache for the inverse. + test-count: ${{ github.ref == 'refs/heads/main' && '1' || '' }} - name: Upload Test Cache uses: ./.github/actions/test-cache/upload @@ -575,7 +590,7 @@ jobs: api-key: ${{ secrets.DATADOG_API_KEY }} test-go-race-pg: - runs-on: ${{ github.repository_owner == 'coder' && 'depot-ubuntu-22.04-16' || 'ubuntu-latest' }} + runs-on: ${{ github.repository_owner == 'coder' && 'depot-ubuntu-22.04-32' || 'ubuntu-latest' }} needs: changes if: needs.changes.outputs.go == 'true' || needs.changes.outputs.ci == 'true' || github.ref == 'refs/heads/main' timeout-minutes: 25 @@ -603,16 +618,28 @@ jobs: with: key-prefix: test-go-race-pg-${{ runner.os }}-${{ runner.arch }} + - name: Normalize Terraform Path for Caching + shell: bash + # Terraform gets installed in a random directory, so we need to normalize + # the path or many cached tests will be invalidated. + run: | + mkdir -p "$RUNNER_TEMP/sym" + source scripts/normalize_path.sh + normalize_path_with_symlinks "$RUNNER_TEMP/sym" "$(dirname "$(which terraform)")" + # We run race tests with reduced parallelism because they use more CPU and we were finding # instances where tests appear to hang for multiple seconds, resulting in flaky tests when # short timeouts are used. # c.f. discussion on https://github.com/coder/coder/pull/15106 + # Our Linux runners have 32 cores, but we reduce parallelism since race detection adds a lot of overhead. + # We aim to have parallelism match CPU count (8*4=32) to avoid making flakes worse. - name: Run Tests - env: - POSTGRES_VERSION: "17" - run: | - make test-postgres-docker - gotestsum --junitfile="gotests.xml" --packages="./..." -- -race -parallel 4 -p 4 + uses: ./.github/actions/test-go-pg + with: + postgres-version: "17" + test-parallelism-packages: "8" + test-parallelism-tests: "4" + race-detection: "true" - name: Upload Test Cache uses: ./.github/actions/test-cache/upload diff --git a/.github/workflows/nightly-gauntlet.yaml b/.github/workflows/nightly-gauntlet.yaml index f02a0afcc0650..661aa708d6150 100644 --- a/.github/workflows/nightly-gauntlet.yaml +++ b/.github/workflows/nightly-gauntlet.yaml @@ -1,9 +1,9 @@ -# The nightly-gauntlet runs tests that are either too flaky or too slow to block -# every PR. +# The nightly-gauntlet runs the full test suite on macOS and Windows. +# This complements ci.yaml which only runs a subset of packages on these platforms. name: nightly-gauntlet on: schedule: - # Every day at 4AM + # Every day at 4AM UTC on weekdays - cron: "0 4 * * 1-5" workflow_dispatch: @@ -21,6 +21,7 @@ jobs: # even if some of the preceding steps are slow. timeout-minutes: 25 strategy: + fail-fast: false matrix: os: - macos-latest @@ -80,75 +81,44 @@ jobs: key-prefix: embedded-pg-${{ runner.os }}-${{ runner.arch }} cache-path: ${{ steps.embedded-pg-cache.outputs.cached-dirs }} - - name: Test with PostgreSQL Database - env: - POSTGRES_VERSION: "13" - TS_DEBUG_DISCO: "true" - LC_CTYPE: "en_US.UTF-8" - LC_ALL: "en_US.UTF-8" + - name: Setup RAM disk for Embedded Postgres (Windows) + if: runner.os == 'Windows' shell: bash - run: | - set -o errexit - set -o pipefail - - if [ "${{ runner.os }}" == "Windows" ]; then - # Create a temp dir on the R: ramdisk drive for Windows. The default - # C: drive is extremely slow: https://github.com/actions/runner-images/issues/8755 - mkdir -p "R:/temp/embedded-pg" - go run scripts/embedded-pg/main.go -path "R:/temp/embedded-pg" -cache "${EMBEDDED_PG_CACHE_DIR}" - elif [ "${{ runner.os }}" == "macOS" ]; then - # Postgres runs faster on a ramdisk on macOS too - mkdir -p /tmp/tmpfs - sudo mount_tmpfs -o noowners -s 8g /tmp/tmpfs - go run scripts/embedded-pg/main.go -path /tmp/tmpfs/embedded-pg -cache "${EMBEDDED_PG_CACHE_DIR}" - elif [ "${{ runner.os }}" == "Linux" ]; then - make test-postgres-docker - fi + run: mkdir -p "R:/temp/embedded-pg" - # if macOS, install google-chrome for scaletests - # As another concern, should we really have this kind of external dependency - # requirement on standard CI? - if [ "${{ matrix.os }}" == "macos-latest" ]; then - brew install google-chrome - fi - - # macOS will output "The default interactive shell is now zsh" - # intermittently in CI... - if [ "${{ matrix.os }}" == "macos-latest" ]; then - touch ~/.bash_profile && echo "export BASH_SILENCE_DEPRECATION_WARNING=1" >> ~/.bash_profile - fi - - if [ "${{ runner.os }}" == "Windows" ]; then - # Our Windows runners have 16 cores. - # On Windows Postgres chokes up when we have 16x16=256 tests - # running in parallel, and dbtestutil.NewDB starts to take more than - # 10s to complete sometimes causing test timeouts. With 16x8=128 tests - # Postgres tends not to choke. - NUM_PARALLEL_PACKAGES=8 - NUM_PARALLEL_TESTS=16 - elif [ "${{ runner.os }}" == "macOS" ]; then - # Our macOS runners have 8 cores. We set NUM_PARALLEL_TESTS to 16 - # because the tests complete faster and Postgres doesn't choke. It seems - # that macOS's tmpfs is faster than the one on Windows. - NUM_PARALLEL_PACKAGES=8 - NUM_PARALLEL_TESTS=16 - elif [ "${{ runner.os }}" == "Linux" ]; then - # Our Linux runners have 8 cores. - NUM_PARALLEL_PACKAGES=8 - NUM_PARALLEL_TESTS=8 - fi - - # run tests without cache - TESTCOUNT="-count=1" + - name: Setup RAM disk for Embedded Postgres (macOS) + if: runner.os == 'macOS' + shell: bash + run: | + mkdir -p /tmp/tmpfs + sudo mount_tmpfs -o noowners -s 8g /tmp/tmpfs - DB=ci gotestsum \ - --format standard-quiet --packages "./..." \ - -- -timeout=20m -v -p "$NUM_PARALLEL_PACKAGES" -parallel="$NUM_PARALLEL_TESTS" "$TESTCOUNT" + - name: Test with PostgreSQL Database (macOS) + if: runner.os == 'macOS' + uses: ./.github/actions/test-go-pg + with: + postgres-version: "13" + # Our macOS runners have 8 cores. + test-parallelism-packages: "8" + test-parallelism-tests: "16" + test-count: "1" + embedded-pg-path: "/tmp/tmpfs/embedded-pg" + embedded-pg-cache: ${{ steps.embedded-pg-cache.outputs.embedded-pg-cache }} + + - name: Test with PostgreSQL Database (Windows) + if: runner.os == 'Windows' + uses: ./.github/actions/test-go-pg + with: + postgres-version: "13" + # Our Windows runners have 16 cores. + test-parallelism-packages: "8" + test-parallelism-tests: "16" + test-count: "1" + embedded-pg-path: "R:/temp/embedded-pg" + embedded-pg-cache: ${{ steps.embedded-pg-cache.outputs.embedded-pg-cache }} - name: Upload Embedded Postgres Cache uses: ./.github/actions/embedded-pg-cache/upload - # We only use the embedded Postgres cache on macOS and Windows runners. - if: runner.OS == 'macOS' || runner.OS == 'Windows' with: cache-key: ${{ steps.download-embedded-pg-cache.outputs.cache-key }} cache-path: "${{ steps.embedded-pg-cache.outputs.embedded-pg-cache }}" @@ -165,7 +135,7 @@ jobs: needs: - test-go-pg runs-on: ubuntu-latest - if: failure() && github.ref == 'refs/heads/main' + if: failure() steps: - name: Send Slack notification