diff --git a/.github/actions/test-go-pg/action.yaml b/.github/actions/test-go-pg/action.yaml deleted file mode 100644 index 5f19da6910822..0000000000000 --- a/.github/actions/test-go-pg/action.yaml +++ /dev/null @@ -1,79 +0,0 @@ -name: "Test Go with PostgreSQL" -description: "Run Go tests with PostgreSQL database" - -inputs: - postgres-version: - description: "PostgreSQL version to use" - required: false - default: "13" - test-parallelism-packages: - description: "Number of packages to test in parallel (-p flag)" - required: false - default: "8" - test-parallelism-tests: - description: "Number of tests to run in parallel within each package (-parallel flag)" - required: false - default: "8" - race-detection: - description: "Enable race detection" - required: false - default: "false" - test-count: - description: "Number of times to run each test (empty for cached results)" - required: false - default: "" - test-packages: - description: "Packages to test (default: ./...)" - required: false - default: "./..." - embedded-pg-path: - description: "Path for embedded postgres data (Windows/macOS only)" - required: false - default: "" - embedded-pg-cache: - description: "Path for embedded postgres cache (Windows/macOS only)" - required: false - default: "" - -runs: - using: "composite" - steps: - - name: Start PostgreSQL Docker container (Linux) - if: runner.os == 'Linux' - shell: bash - env: - POSTGRES_VERSION: ${{ inputs.postgres-version }} - run: make test-postgres-docker - - - name: Setup Embedded Postgres (Windows/macOS) - if: runner.os != 'Linux' - shell: bash - env: - POSTGRES_VERSION: ${{ inputs.postgres-version }} - EMBEDDED_PG_PATH: ${{ inputs.embedded-pg-path }} - EMBEDDED_PG_CACHE_DIR: ${{ inputs.embedded-pg-cache }} - run: | - go run scripts/embedded-pg/main.go -path "${EMBEDDED_PG_PATH}" -cache "${EMBEDDED_PG_CACHE_DIR}" - - - name: Run tests - shell: bash - env: - TEST_NUM_PARALLEL_PACKAGES: ${{ inputs.test-parallelism-packages }} - TEST_NUM_PARALLEL_TESTS: ${{ inputs.test-parallelism-tests }} - TEST_COUNT: ${{ inputs.test-count }} - TEST_PACKAGES: ${{ inputs.test-packages }} - RACE_DETECTION: ${{ inputs.race-detection }} - TS_DEBUG_DISCO: "true" - LC_CTYPE: "en_US.UTF-8" - LC_ALL: "en_US.UTF-8" - run: | - set -euo pipefail - - if [[ ${RACE_DETECTION} == true ]]; then - gotestsum --junitfile="gotests.xml" --packages="${TEST_PACKAGES}" -- \ - -race \ - -parallel "${TEST_NUM_PARALLEL_TESTS}" \ - -p "${TEST_NUM_PARALLEL_PACKAGES}" - else - make test - fi diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 68494f3d21cc1..d61a214cdb4ff 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -327,7 +327,7 @@ jobs: test-go-pg: # make sure to adjust NUM_PARALLEL_PACKAGES and NUM_PARALLEL_TESTS below # when changing runner sizes - runs-on: ${{ matrix.os == 'ubuntu-latest' && github.repository_owner == 'coder' && 'depot-ubuntu-22.04-16' || matrix.os && matrix.os == 'macos-latest' && github.repository_owner == 'coder' && 'depot-macos-latest' || matrix.os == 'windows-2022' && github.repository_owner == 'coder' && 'depot-windows-2022-32' || matrix.os }} + runs-on: ${{ matrix.os == 'ubuntu-latest' && github.repository_owner == 'coder' && 'depot-ubuntu-22.04-8' || matrix.os && matrix.os == 'macos-latest' && github.repository_owner == 'coder' && 'depot-macos-latest' || matrix.os == 'windows-2022' && github.repository_owner == 'coder' && 'depot-windows-2022-16' || matrix.os }} needs: changes if: needs.changes.outputs.go == 'true' || needs.changes.outputs.ci == 'true' || github.ref == 'refs/heads/main' # This timeout must be greater than the timeout set by `go test` in @@ -336,7 +336,6 @@ jobs: # even if some of the preceding steps are slow. timeout-minutes: 25 strategy: - fail-fast: false matrix: os: - ubuntu-latest @@ -417,83 +416,82 @@ jobs: find . -type f ! -path ./.git/\*\* | mtimehash find . -type d ! -path ./.git/\*\* -exec touch -t 200601010000 {} + - - name: Normalize Terraform Path for Caching - shell: bash - # Terraform gets installed in a random directory, so we need to normalize - # the path or many cached tests will be invalidated. - run: | - mkdir -p "$RUNNER_TEMP/sym" - source scripts/normalize_path.sh - normalize_path_with_symlinks "$RUNNER_TEMP/sym" "$(dirname "$(which terraform)")" - - - name: Setup RAM disk for Embedded Postgres (Windows) - if: runner.os == 'Windows' - shell: bash - # The default C: drive is extremely slow: - # https://github.com/actions/runner-images/issues/8755 - run: mkdir -p "R:/temp/embedded-pg" - - - name: Setup RAM disk for Embedded Postgres (macOS) - if: runner.os == 'macOS' + - name: Test with PostgreSQL Database + env: + POSTGRES_VERSION: "13" + TS_DEBUG_DISCO: "true" + LC_CTYPE: "en_US.UTF-8" + LC_ALL: "en_US.UTF-8" shell: bash run: | - # Postgres runs faster on a ramdisk on macOS. - mkdir -p /tmp/tmpfs - sudo mount_tmpfs -o noowners -s 8g /tmp/tmpfs + set -o errexit + set -o pipefail + + if [ "$RUNNER_OS" == "Windows" ]; then + # Create a temp dir on the R: ramdisk drive for Windows. The default + # C: drive is extremely slow: https://github.com/actions/runner-images/issues/8755 + mkdir -p "R:/temp/embedded-pg" + go run scripts/embedded-pg/main.go -path "R:/temp/embedded-pg" -cache "${EMBEDDED_PG_CACHE_DIR}" + elif [ "$RUNNER_OS" == "macOS" ]; then + # Postgres runs faster on a ramdisk on macOS too + mkdir -p /tmp/tmpfs + sudo mount_tmpfs -o noowners -s 8g /tmp/tmpfs + go run scripts/embedded-pg/main.go -path /tmp/tmpfs/embedded-pg -cache "${EMBEDDED_PG_CACHE_DIR}" + elif [ "$RUNNER_OS" == "Linux" ]; then + make test-postgres-docker + fi - # Install google-chrome for scaletests. + # if macOS, install google-chrome for scaletests # As another concern, should we really have this kind of external dependency # requirement on standard CI? - brew install google-chrome + if [ "${RUNNER_OS}" == "macOS" ]; then + brew install google-chrome + fi - # macOS will output "The default interactive shell is now zsh" intermittently in CI. - touch ~/.bash_profile && echo "export BASH_SILENCE_DEPRECATION_WARNING=1" >> ~/.bash_profile + # macOS will output "The default interactive shell is now zsh" + # intermittently in CI... + if [ "${RUNNER_OS}" == "macOS" ]; then + touch ~/.bash_profile && echo "export BASH_SILENCE_DEPRECATION_WARNING=1" >> ~/.bash_profile + fi - - name: Test with PostgreSQL Database (Linux) - if: runner.os == 'Linux' - uses: ./.github/actions/test-go-pg - with: - postgres-version: "13" - # Our Linux runners have 16 cores. - test-parallelism-packages: "16" - test-parallelism-tests: "8" - # By default, run tests with cache for improved speed (possibly at the expense of correctness). - # On main, run tests without cache for the inverse. - test-count: ${{ github.ref == 'refs/heads/main' && '1' || '' }} + if [ "${RUNNER_OS}" == "Windows" ]; then + # Our Windows runners have 16 cores. + # On Windows Postgres chokes up when we have 16x16=256 tests + # running in parallel, and dbtestutil.NewDB starts to take more than + # 10s to complete sometimes causing test timeouts. With 16x8=128 tests + # Postgres tends not to choke. + export TEST_NUM_PARALLEL_PACKAGES=8 + export TEST_NUM_PARALLEL_TESTS=16 + # Only the CLI and Agent are officially supported on Windows and the rest are too flaky + export TEST_PACKAGES="./cli/... ./enterprise/cli/... ./agent/..." + elif [ "${RUNNER_OS}" == "macOS" ]; then + # Our macOS runners have 8 cores. We set NUM_PARALLEL_TESTS to 16 + # because the tests complete faster and Postgres doesn't choke. It seems + # that macOS's tmpfs is faster than the one on Windows. + export TEST_NUM_PARALLEL_PACKAGES=8 + export TEST_NUM_PARALLEL_TESTS=16 + # Only the CLI and Agent are officially supported on macOS and the rest are too flaky + export TEST_PACKAGES="./cli/... ./enterprise/cli/... ./agent/..." + elif [ "${RUNNER_OS}" == "Linux" ]; then + # Our Linux runners have 8 cores. + export TEST_NUM_PARALLEL_PACKAGES=8 + export TEST_NUM_PARALLEL_TESTS=8 + fi - - name: Test with PostgreSQL Database (macOS) - if: runner.os == 'macOS' - uses: ./.github/actions/test-go-pg - with: - postgres-version: "13" - # Our macOS runners have 8 cores. - # Even though this parallelism seems high, we've observed relatively low flakiness in the past. - # See https://github.com/coder/coder/pull/21091#discussion_r2609891540. - test-parallelism-packages: "8" - test-parallelism-tests: "16" - # By default, run tests with cache for improved speed (possibly at the expense of correctness). - # On main, run tests without cache for the inverse. - test-count: ${{ github.ref == 'refs/heads/main' && '1' || '' }} - # Only the CLI and Agent are officially supported on macOS; the rest are too flaky. - test-packages: "./cli/... ./enterprise/cli/... ./agent/..." - embedded-pg-path: "/tmp/tmpfs/embedded-pg" - embedded-pg-cache: ${{ steps.embedded-pg-cache.outputs.embedded-pg-cache }} - - - name: Test with PostgreSQL Database (Windows) - if: runner.os == 'Windows' - uses: ./.github/actions/test-go-pg - with: - postgres-version: "13" - # Our Windows runners have 32 cores. - test-parallelism-packages: "32" - test-parallelism-tests: "16" - # By default, run tests with cache for improved speed (possibly at the expense of correctness). - # On main, run tests without cache for the inverse. - test-count: ${{ github.ref == 'refs/heads/main' && '1' || '' }} - # Only the CLI and Agent are officially supported on Windows; the rest are too flaky. - test-packages: "./cli/... ./enterprise/cli/... ./agent/..." - embedded-pg-path: "R:/temp/embedded-pg" - embedded-pg-cache: ${{ steps.embedded-pg-cache.outputs.embedded-pg-cache }} + # by default, run tests with cache + if [ "${GITHUB_REF}" == "refs/heads/main" ]; then + # on main, run tests without cache + export TEST_COUNT="1" + fi + + mkdir -p "$RUNNER_TEMP/sym" + source scripts/normalize_path.sh + # terraform gets installed in a random directory, so we need to normalize + # the path to the terraform binary or a bunch of cached tests will be + # invalidated. See scripts/normalize_path.sh for more details. + normalize_path_with_symlinks "$RUNNER_TEMP/sym" "$(dirname "$(which terraform)")" + + make test - name: Upload failed test db dumps uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # v5.0.0 @@ -523,7 +521,7 @@ jobs: api-key: ${{ secrets.DATADOG_API_KEY }} test-go-pg-17: - runs-on: ${{ github.repository_owner == 'coder' && 'depot-ubuntu-22.04-16' || 'ubuntu-latest' }} + runs-on: ${{ github.repository_owner == 'coder' && 'depot-ubuntu-22.04-8' || 'ubuntu-latest' }} needs: - changes if: needs.changes.outputs.go == 'true' || needs.changes.outputs.ci == 'true' || github.ref == 'refs/heads/main' @@ -556,25 +554,12 @@ jobs: with: key-prefix: test-go-pg-17-${{ runner.os }}-${{ runner.arch }} - - name: Normalize Terraform Path for Caching - shell: bash - # Terraform gets installed in a random directory, so we need to normalize - # the path or many cached tests will be invalidated. - run: | - mkdir -p "$RUNNER_TEMP/sym" - source scripts/normalize_path.sh - normalize_path_with_symlinks "$RUNNER_TEMP/sym" "$(dirname "$(which terraform)")" - - name: Test with PostgreSQL Database - uses: ./.github/actions/test-go-pg - with: - postgres-version: "17" - # Our Linux runners have 16 cores. - test-parallelism-packages: "16" - test-parallelism-tests: "8" - # By default, run tests with cache for improved speed (possibly at the expense of correctness). - # On main, run tests without cache for the inverse. - test-count: ${{ github.ref == 'refs/heads/main' && '1' || '' }} + env: + POSTGRES_VERSION: "17" + TS_DEBUG_DISCO: "true" + run: | + make test-postgres - name: Upload Test Cache uses: ./.github/actions/test-cache/upload @@ -590,7 +575,7 @@ jobs: api-key: ${{ secrets.DATADOG_API_KEY }} test-go-race-pg: - runs-on: ${{ github.repository_owner == 'coder' && 'depot-ubuntu-22.04-32' || 'ubuntu-latest' }} + runs-on: ${{ github.repository_owner == 'coder' && 'depot-ubuntu-22.04-16' || 'ubuntu-latest' }} needs: changes if: needs.changes.outputs.go == 'true' || needs.changes.outputs.ci == 'true' || github.ref == 'refs/heads/main' timeout-minutes: 25 @@ -618,28 +603,16 @@ jobs: with: key-prefix: test-go-race-pg-${{ runner.os }}-${{ runner.arch }} - - name: Normalize Terraform Path for Caching - shell: bash - # Terraform gets installed in a random directory, so we need to normalize - # the path or many cached tests will be invalidated. - run: | - mkdir -p "$RUNNER_TEMP/sym" - source scripts/normalize_path.sh - normalize_path_with_symlinks "$RUNNER_TEMP/sym" "$(dirname "$(which terraform)")" - # We run race tests with reduced parallelism because they use more CPU and we were finding # instances where tests appear to hang for multiple seconds, resulting in flaky tests when # short timeouts are used. # c.f. discussion on https://github.com/coder/coder/pull/15106 - # Our Linux runners have 32 cores, but we reduce parallelism since race detection adds a lot of overhead. - # We aim to have parallelism match CPU count (8*4=32) to avoid making flakes worse. - name: Run Tests - uses: ./.github/actions/test-go-pg - with: - postgres-version: "17" - test-parallelism-packages: "8" - test-parallelism-tests: "4" - race-detection: "true" + env: + POSTGRES_VERSION: "17" + run: | + make test-postgres-docker + gotestsum --junitfile="gotests.xml" --packages="./..." -- -race -parallel 4 -p 4 - name: Upload Test Cache uses: ./.github/actions/test-cache/upload diff --git a/.github/workflows/nightly-gauntlet.yaml b/.github/workflows/nightly-gauntlet.yaml index 661aa708d6150..f02a0afcc0650 100644 --- a/.github/workflows/nightly-gauntlet.yaml +++ b/.github/workflows/nightly-gauntlet.yaml @@ -1,9 +1,9 @@ -# The nightly-gauntlet runs the full test suite on macOS and Windows. -# This complements ci.yaml which only runs a subset of packages on these platforms. +# The nightly-gauntlet runs tests that are either too flaky or too slow to block +# every PR. name: nightly-gauntlet on: schedule: - # Every day at 4AM UTC on weekdays + # Every day at 4AM - cron: "0 4 * * 1-5" workflow_dispatch: @@ -21,7 +21,6 @@ jobs: # even if some of the preceding steps are slow. timeout-minutes: 25 strategy: - fail-fast: false matrix: os: - macos-latest @@ -81,44 +80,75 @@ jobs: key-prefix: embedded-pg-${{ runner.os }}-${{ runner.arch }} cache-path: ${{ steps.embedded-pg-cache.outputs.cached-dirs }} - - name: Setup RAM disk for Embedded Postgres (Windows) - if: runner.os == 'Windows' - shell: bash - run: mkdir -p "R:/temp/embedded-pg" - - - name: Setup RAM disk for Embedded Postgres (macOS) - if: runner.os == 'macOS' + - name: Test with PostgreSQL Database + env: + POSTGRES_VERSION: "13" + TS_DEBUG_DISCO: "true" + LC_CTYPE: "en_US.UTF-8" + LC_ALL: "en_US.UTF-8" shell: bash run: | - mkdir -p /tmp/tmpfs - sudo mount_tmpfs -o noowners -s 8g /tmp/tmpfs + set -o errexit + set -o pipefail + + if [ "${{ runner.os }}" == "Windows" ]; then + # Create a temp dir on the R: ramdisk drive for Windows. The default + # C: drive is extremely slow: https://github.com/actions/runner-images/issues/8755 + mkdir -p "R:/temp/embedded-pg" + go run scripts/embedded-pg/main.go -path "R:/temp/embedded-pg" -cache "${EMBEDDED_PG_CACHE_DIR}" + elif [ "${{ runner.os }}" == "macOS" ]; then + # Postgres runs faster on a ramdisk on macOS too + mkdir -p /tmp/tmpfs + sudo mount_tmpfs -o noowners -s 8g /tmp/tmpfs + go run scripts/embedded-pg/main.go -path /tmp/tmpfs/embedded-pg -cache "${EMBEDDED_PG_CACHE_DIR}" + elif [ "${{ runner.os }}" == "Linux" ]; then + make test-postgres-docker + fi - - name: Test with PostgreSQL Database (macOS) - if: runner.os == 'macOS' - uses: ./.github/actions/test-go-pg - with: - postgres-version: "13" - # Our macOS runners have 8 cores. - test-parallelism-packages: "8" - test-parallelism-tests: "16" - test-count: "1" - embedded-pg-path: "/tmp/tmpfs/embedded-pg" - embedded-pg-cache: ${{ steps.embedded-pg-cache.outputs.embedded-pg-cache }} - - - name: Test with PostgreSQL Database (Windows) - if: runner.os == 'Windows' - uses: ./.github/actions/test-go-pg - with: - postgres-version: "13" - # Our Windows runners have 16 cores. - test-parallelism-packages: "8" - test-parallelism-tests: "16" - test-count: "1" - embedded-pg-path: "R:/temp/embedded-pg" - embedded-pg-cache: ${{ steps.embedded-pg-cache.outputs.embedded-pg-cache }} + # if macOS, install google-chrome for scaletests + # As another concern, should we really have this kind of external dependency + # requirement on standard CI? + if [ "${{ matrix.os }}" == "macos-latest" ]; then + brew install google-chrome + fi + + # macOS will output "The default interactive shell is now zsh" + # intermittently in CI... + if [ "${{ matrix.os }}" == "macos-latest" ]; then + touch ~/.bash_profile && echo "export BASH_SILENCE_DEPRECATION_WARNING=1" >> ~/.bash_profile + fi + + if [ "${{ runner.os }}" == "Windows" ]; then + # Our Windows runners have 16 cores. + # On Windows Postgres chokes up when we have 16x16=256 tests + # running in parallel, and dbtestutil.NewDB starts to take more than + # 10s to complete sometimes causing test timeouts. With 16x8=128 tests + # Postgres tends not to choke. + NUM_PARALLEL_PACKAGES=8 + NUM_PARALLEL_TESTS=16 + elif [ "${{ runner.os }}" == "macOS" ]; then + # Our macOS runners have 8 cores. We set NUM_PARALLEL_TESTS to 16 + # because the tests complete faster and Postgres doesn't choke. It seems + # that macOS's tmpfs is faster than the one on Windows. + NUM_PARALLEL_PACKAGES=8 + NUM_PARALLEL_TESTS=16 + elif [ "${{ runner.os }}" == "Linux" ]; then + # Our Linux runners have 8 cores. + NUM_PARALLEL_PACKAGES=8 + NUM_PARALLEL_TESTS=8 + fi + + # run tests without cache + TESTCOUNT="-count=1" + + DB=ci gotestsum \ + --format standard-quiet --packages "./..." \ + -- -timeout=20m -v -p "$NUM_PARALLEL_PACKAGES" -parallel="$NUM_PARALLEL_TESTS" "$TESTCOUNT" - name: Upload Embedded Postgres Cache uses: ./.github/actions/embedded-pg-cache/upload + # We only use the embedded Postgres cache on macOS and Windows runners. + if: runner.OS == 'macOS' || runner.OS == 'Windows' with: cache-key: ${{ steps.download-embedded-pg-cache.outputs.cache-key }} cache-path: "${{ steps.embedded-pg-cache.outputs.embedded-pg-cache }}" @@ -135,7 +165,7 @@ jobs: needs: - test-go-pg runs-on: ubuntu-latest - if: failure() + if: failure() && github.ref == 'refs/heads/main' steps: - name: Send Slack notification