-
Notifications
You must be signed in to change notification settings - Fork 1.1k
chore: abstract pg test logic and double runner sizes #21091
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
e60da78
86246ba
211e727
7e3702f
4af1430
7040c95
6cd78b7
6c691c4
f4d2a44
9f29ad0
983515f
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,79 @@ | ||
| name: "Test Go with PostgreSQL" | ||
| description: "Run Go tests with PostgreSQL database" | ||
|
|
||
| inputs: | ||
| postgres-version: | ||
| description: "PostgreSQL version to use" | ||
| required: false | ||
| default: "13" | ||
| test-parallelism-packages: | ||
| description: "Number of packages to test in parallel (-p flag)" | ||
| required: false | ||
| default: "8" | ||
| test-parallelism-tests: | ||
| description: "Number of tests to run in parallel within each package (-parallel flag)" | ||
| required: false | ||
| default: "8" | ||
| race-detection: | ||
| description: "Enable race detection" | ||
| required: false | ||
| default: "false" | ||
| test-count: | ||
| description: "Number of times to run each test (empty for cached results)" | ||
| required: false | ||
| default: "" | ||
| test-packages: | ||
| description: "Packages to test (default: ./...)" | ||
| required: false | ||
| default: "./..." | ||
| embedded-pg-path: | ||
| description: "Path for embedded postgres data (Windows/macOS only)" | ||
| required: false | ||
| default: "" | ||
| embedded-pg-cache: | ||
| description: "Path for embedded postgres cache (Windows/macOS only)" | ||
| required: false | ||
| default: "" | ||
|
|
||
| runs: | ||
| using: "composite" | ||
| steps: | ||
| - name: Start PostgreSQL Docker container (Linux) | ||
| if: runner.os == 'Linux' | ||
| shell: bash | ||
| env: | ||
| POSTGRES_VERSION: ${{ inputs.postgres-version }} | ||
| run: make test-postgres-docker | ||
|
|
||
| - name: Setup Embedded Postgres (Windows/macOS) | ||
| if: runner.os != 'Linux' | ||
| shell: bash | ||
| env: | ||
| POSTGRES_VERSION: ${{ inputs.postgres-version }} | ||
| EMBEDDED_PG_PATH: ${{ inputs.embedded-pg-path }} | ||
| EMBEDDED_PG_CACHE_DIR: ${{ inputs.embedded-pg-cache }} | ||
| run: | | ||
| go run scripts/embedded-pg/main.go -path "${EMBEDDED_PG_PATH}" -cache "${EMBEDDED_PG_CACHE_DIR}" | ||
dannykopping marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
|
||
| - name: Run tests | ||
| shell: bash | ||
| env: | ||
| TEST_NUM_PARALLEL_PACKAGES: ${{ inputs.test-parallelism-packages }} | ||
| TEST_NUM_PARALLEL_TESTS: ${{ inputs.test-parallelism-tests }} | ||
| TEST_COUNT: ${{ inputs.test-count }} | ||
| TEST_PACKAGES: ${{ inputs.test-packages }} | ||
| RACE_DETECTION: ${{ inputs.race-detection }} | ||
| TS_DEBUG_DISCO: "true" | ||
| LC_CTYPE: "en_US.UTF-8" | ||
| LC_ALL: "en_US.UTF-8" | ||
| run: | | ||
| set -euo pipefail | ||
|
|
||
| if [[ ${RACE_DETECTION} == true ]]; then | ||
| gotestsum --junitfile="gotests.xml" --packages="${TEST_PACKAGES}" -- \ | ||
| -race \ | ||
| -parallel "${TEST_NUM_PARALLEL_TESTS}" \ | ||
| -p "${TEST_NUM_PARALLEL_PACKAGES}" | ||
| else | ||
| make test | ||
| fi | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -327,7 +327,7 @@ jobs: | |
| test-go-pg: | ||
| # make sure to adjust NUM_PARALLEL_PACKAGES and NUM_PARALLEL_TESTS below | ||
| # when changing runner sizes | ||
| runs-on: ${{ matrix.os == 'ubuntu-latest' && github.repository_owner == 'coder' && 'depot-ubuntu-22.04-8' || matrix.os && matrix.os == 'macos-latest' && github.repository_owner == 'coder' && 'depot-macos-latest' || matrix.os == 'windows-2022' && github.repository_owner == 'coder' && 'depot-windows-2022-16' || matrix.os }} | ||
| runs-on: ${{ matrix.os == 'ubuntu-latest' && github.repository_owner == 'coder' && 'depot-ubuntu-22.04-16' || matrix.os && matrix.os == 'macos-latest' && github.repository_owner == 'coder' && 'depot-macos-latest' || matrix.os == 'windows-2022' && github.repository_owner == 'coder' && 'depot-windows-2022-32' || matrix.os }} | ||
dannykopping marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| needs: changes | ||
| if: needs.changes.outputs.go == 'true' || needs.changes.outputs.ci == 'true' || github.ref == 'refs/heads/main' | ||
| # This timeout must be greater than the timeout set by `go test` in | ||
|
|
@@ -336,6 +336,7 @@ jobs: | |
| # even if some of the preceding steps are slow. | ||
| timeout-minutes: 25 | ||
| strategy: | ||
| fail-fast: false | ||
| matrix: | ||
| os: | ||
| - ubuntu-latest | ||
|
|
@@ -416,82 +417,83 @@ jobs: | |
| find . -type f ! -path ./.git/\*\* | mtimehash | ||
| find . -type d ! -path ./.git/\*\* -exec touch -t 200601010000 {} + | ||
|
|
||
| - name: Test with PostgreSQL Database | ||
| env: | ||
| POSTGRES_VERSION: "13" | ||
| TS_DEBUG_DISCO: "true" | ||
| LC_CTYPE: "en_US.UTF-8" | ||
| LC_ALL: "en_US.UTF-8" | ||
| - name: Normalize Terraform Path for Caching | ||
| shell: bash | ||
| # Terraform gets installed in a random directory, so we need to normalize | ||
| # the path or many cached tests will be invalidated. | ||
| run: | | ||
| set -o errexit | ||
| set -o pipefail | ||
|
|
||
| if [ "$RUNNER_OS" == "Windows" ]; then | ||
| # Create a temp dir on the R: ramdisk drive for Windows. The default | ||
| # C: drive is extremely slow: https://github.com/actions/runner-images/issues/8755 | ||
| mkdir -p "R:/temp/embedded-pg" | ||
| go run scripts/embedded-pg/main.go -path "R:/temp/embedded-pg" -cache "${EMBEDDED_PG_CACHE_DIR}" | ||
| elif [ "$RUNNER_OS" == "macOS" ]; then | ||
| # Postgres runs faster on a ramdisk on macOS too | ||
| mkdir -p /tmp/tmpfs | ||
| sudo mount_tmpfs -o noowners -s 8g /tmp/tmpfs | ||
| go run scripts/embedded-pg/main.go -path /tmp/tmpfs/embedded-pg -cache "${EMBEDDED_PG_CACHE_DIR}" | ||
| elif [ "$RUNNER_OS" == "Linux" ]; then | ||
| make test-postgres-docker | ||
| fi | ||
| mkdir -p "$RUNNER_TEMP/sym" | ||
| source scripts/normalize_path.sh | ||
| normalize_path_with_symlinks "$RUNNER_TEMP/sym" "$(dirname "$(which terraform)")" | ||
|
|
||
| # if macOS, install google-chrome for scaletests | ||
| # As another concern, should we really have this kind of external dependency | ||
| # requirement on standard CI? | ||
| if [ "${RUNNER_OS}" == "macOS" ]; then | ||
| brew install google-chrome | ||
| fi | ||
| - name: Setup RAM disk for Embedded Postgres (Windows) | ||
| if: runner.os == 'Windows' | ||
| shell: bash | ||
| # The default C: drive is extremely slow: | ||
| # https://github.com/actions/runner-images/issues/8755 | ||
| run: mkdir -p "R:/temp/embedded-pg" | ||
|
|
||
| # macOS will output "The default interactive shell is now zsh" | ||
| # intermittently in CI... | ||
| if [ "${RUNNER_OS}" == "macOS" ]; then | ||
| touch ~/.bash_profile && echo "export BASH_SILENCE_DEPRECATION_WARNING=1" >> ~/.bash_profile | ||
| fi | ||
| - name: Setup RAM disk for Embedded Postgres (macOS) | ||
| if: runner.os == 'macOS' | ||
| shell: bash | ||
| run: | | ||
| # Postgres runs faster on a ramdisk on macOS. | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Have we verified this recently? I'd especially be interested in adjusting PostgreSQL settings to see if we can alleviate it rather than using ramdisk. We simply need to increase RAM retention for PG on macOS and it should be more efficient than placing both storage and cache in RAM. Guessing this applies to Windows as well.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm not changing anything about this right now. We can follow this PR with some PG changes. |
||
| mkdir -p /tmp/tmpfs | ||
| sudo mount_tmpfs -o noowners -s 8g /tmp/tmpfs | ||
|
|
||
| if [ "${RUNNER_OS}" == "Windows" ]; then | ||
| # Our Windows runners have 16 cores. | ||
| # On Windows Postgres chokes up when we have 16x16=256 tests | ||
| # running in parallel, and dbtestutil.NewDB starts to take more than | ||
| # 10s to complete sometimes causing test timeouts. With 16x8=128 tests | ||
| # Postgres tends not to choke. | ||
| export TEST_NUM_PARALLEL_PACKAGES=8 | ||
| export TEST_NUM_PARALLEL_TESTS=16 | ||
| # Only the CLI and Agent are officially supported on Windows and the rest are too flaky | ||
| export TEST_PACKAGES="./cli/... ./enterprise/cli/... ./agent/..." | ||
| elif [ "${RUNNER_OS}" == "macOS" ]; then | ||
| # Our macOS runners have 8 cores. We set NUM_PARALLEL_TESTS to 16 | ||
| # because the tests complete faster and Postgres doesn't choke. It seems | ||
| # that macOS's tmpfs is faster than the one on Windows. | ||
| export TEST_NUM_PARALLEL_PACKAGES=8 | ||
| export TEST_NUM_PARALLEL_TESTS=16 | ||
| # Only the CLI and Agent are officially supported on macOS and the rest are too flaky | ||
| export TEST_PACKAGES="./cli/... ./enterprise/cli/... ./agent/..." | ||
| elif [ "${RUNNER_OS}" == "Linux" ]; then | ||
| # Our Linux runners have 8 cores. | ||
| export TEST_NUM_PARALLEL_PACKAGES=8 | ||
| export TEST_NUM_PARALLEL_TESTS=8 | ||
| fi | ||
| # Install google-chrome for scaletests. | ||
dannykopping marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| # As another concern, should we really have this kind of external dependency | ||
| # requirement on standard CI? | ||
| brew install google-chrome | ||
|
|
||
| # by default, run tests with cache | ||
| if [ "${GITHUB_REF}" == "refs/heads/main" ]; then | ||
| # on main, run tests without cache | ||
| export TEST_COUNT="1" | ||
| fi | ||
| # macOS will output "The default interactive shell is now zsh" intermittently in CI. | ||
| touch ~/.bash_profile && echo "export BASH_SILENCE_DEPRECATION_WARNING=1" >> ~/.bash_profile | ||
|
|
||
| mkdir -p "$RUNNER_TEMP/sym" | ||
| source scripts/normalize_path.sh | ||
| # terraform gets installed in a random directory, so we need to normalize | ||
| # the path to the terraform binary or a bunch of cached tests will be | ||
| # invalidated. See scripts/normalize_path.sh for more details. | ||
| normalize_path_with_symlinks "$RUNNER_TEMP/sym" "$(dirname "$(which terraform)")" | ||
| - name: Test with PostgreSQL Database (Linux) | ||
| if: runner.os == 'Linux' | ||
| uses: ./.github/actions/test-go-pg | ||
| with: | ||
| postgres-version: "13" | ||
| # Our Linux runners have 16 cores. | ||
| test-parallelism-packages: "16" | ||
| test-parallelism-tests: "8" | ||
| # By default, run tests with cache for improved speed (possibly at the expense of correctness). | ||
| # On main, run tests without cache for the inverse. | ||
| test-count: ${{ github.ref == 'refs/heads/main' && '1' || '' }} | ||
|
|
||
| make test | ||
| - name: Test with PostgreSQL Database (macOS) | ||
| if: runner.os == 'macOS' | ||
| uses: ./.github/actions/test-go-pg | ||
| with: | ||
| postgres-version: "13" | ||
| # Our macOS runners have 8 cores. | ||
| # Even though this parallelism seems high, we've observed relatively low flakiness in the past. | ||
| # See https://github.com/coder/coder/pull/21091#discussion_r2609891540. | ||
| test-parallelism-packages: "8" | ||
| test-parallelism-tests: "16" | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Linux has 16 cores and 16x8 parallelism, but macOS has 8 cores and 8x16 parallelism --- seems wrong, since in both cases you can have 128 tests running concurrently.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We can't scale MacOS any further, and for Linux I just naïvely doubled the package parallelism since we now have double the CPUs. It was like this before: Are you suggesting I bump
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If anything I'd cut the macOS parallelism. If you leave it as is, then maybe a comment explaining that the numbers were kinda determined empirically where things don't break horribly.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. https://github.com/coder/coder/blob/main/.github/workflows/ci.yaml#L467-L472 I haven't change the parallelism (sorry, it hard to track changes because of the reorganisation); if you're aware that it's the same, why do you want to cut parallelism?
According to this, it seems kinda OK?
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ideally we'd have some theoretical consistency --- a model of parallelism that maps to CPU cores. Cutting macOS parallelism would align with that consistent model and be easier to reason about. I guess upping Linux parallelism would also be consistent, but I'm gun shy about increasing things and potentially causing more flakes. In the absence of consistency, we can just document what we've observed and 🤷
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Cool, documented in 983515f 👍
Let's link up next week to try reason through this and develop a heuristic which will set the parallelism automatically and consistently across all these different platforms & jobs? |
||
| # By default, run tests with cache for improved speed (possibly at the expense of correctness). | ||
| # On main, run tests without cache for the inverse. | ||
| test-count: ${{ github.ref == 'refs/heads/main' && '1' || '' }} | ||
| # Only the CLI and Agent are officially supported on macOS; the rest are too flaky. | ||
| test-packages: "./cli/... ./enterprise/cli/... ./agent/..." | ||
| embedded-pg-path: "/tmp/tmpfs/embedded-pg" | ||
| embedded-pg-cache: ${{ steps.embedded-pg-cache.outputs.embedded-pg-cache }} | ||
|
|
||
| - name: Test with PostgreSQL Database (Windows) | ||
| if: runner.os == 'Windows' | ||
| uses: ./.github/actions/test-go-pg | ||
| with: | ||
| postgres-version: "13" | ||
| # Our Windows runners have 32 cores. | ||
| test-parallelism-packages: "32" | ||
| test-parallelism-tests: "16" | ||
| # By default, run tests with cache for improved speed (possibly at the expense of correctness). | ||
| # On main, run tests without cache for the inverse. | ||
| test-count: ${{ github.ref == 'refs/heads/main' && '1' || '' }} | ||
| # Only the CLI and Agent are officially supported on Windows; the rest are too flaky. | ||
| test-packages: "./cli/... ./enterprise/cli/... ./agent/..." | ||
| embedded-pg-path: "R:/temp/embedded-pg" | ||
| embedded-pg-cache: ${{ steps.embedded-pg-cache.outputs.embedded-pg-cache }} | ||
|
|
||
| - name: Upload failed test db dumps | ||
| uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # v5.0.0 | ||
|
|
@@ -521,7 +523,7 @@ jobs: | |
| api-key: ${{ secrets.DATADOG_API_KEY }} | ||
|
|
||
| test-go-pg-17: | ||
| runs-on: ${{ github.repository_owner == 'coder' && 'depot-ubuntu-22.04-8' || 'ubuntu-latest' }} | ||
| runs-on: ${{ github.repository_owner == 'coder' && 'depot-ubuntu-22.04-16' || 'ubuntu-latest' }} | ||
| needs: | ||
| - changes | ||
| if: needs.changes.outputs.go == 'true' || needs.changes.outputs.ci == 'true' || github.ref == 'refs/heads/main' | ||
|
|
@@ -554,12 +556,25 @@ jobs: | |
| with: | ||
| key-prefix: test-go-pg-17-${{ runner.os }}-${{ runner.arch }} | ||
|
|
||
| - name: Test with PostgreSQL Database | ||
| env: | ||
| POSTGRES_VERSION: "17" | ||
| TS_DEBUG_DISCO: "true" | ||
| - name: Normalize Terraform Path for Caching | ||
| shell: bash | ||
| # Terraform gets installed in a random directory, so we need to normalize | ||
| # the path or many cached tests will be invalidated. | ||
| run: | | ||
| make test-postgres | ||
| mkdir -p "$RUNNER_TEMP/sym" | ||
| source scripts/normalize_path.sh | ||
| normalize_path_with_symlinks "$RUNNER_TEMP/sym" "$(dirname "$(which terraform)")" | ||
dannykopping marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
|
||
| - name: Test with PostgreSQL Database | ||
| uses: ./.github/actions/test-go-pg | ||
| with: | ||
| postgres-version: "17" | ||
| # Our Linux runners have 16 cores. | ||
| test-parallelism-packages: "16" | ||
| test-parallelism-tests: "8" | ||
| # By default, run tests with cache for improved speed (possibly at the expense of correctness). | ||
| # On main, run tests without cache for the inverse. | ||
| test-count: ${{ github.ref == 'refs/heads/main' && '1' || '' }} | ||
|
|
||
| - name: Upload Test Cache | ||
| uses: ./.github/actions/test-cache/upload | ||
|
|
@@ -575,7 +590,7 @@ jobs: | |
| api-key: ${{ secrets.DATADOG_API_KEY }} | ||
|
|
||
| test-go-race-pg: | ||
| runs-on: ${{ github.repository_owner == 'coder' && 'depot-ubuntu-22.04-16' || 'ubuntu-latest' }} | ||
| runs-on: ${{ github.repository_owner == 'coder' && 'depot-ubuntu-22.04-32' || 'ubuntu-latest' }} | ||
| needs: changes | ||
| if: needs.changes.outputs.go == 'true' || needs.changes.outputs.ci == 'true' || github.ref == 'refs/heads/main' | ||
| timeout-minutes: 25 | ||
|
|
@@ -603,16 +618,28 @@ jobs: | |
| with: | ||
| key-prefix: test-go-race-pg-${{ runner.os }}-${{ runner.arch }} | ||
|
|
||
| - name: Normalize Terraform Path for Caching | ||
| shell: bash | ||
| # Terraform gets installed in a random directory, so we need to normalize | ||
| # the path or many cached tests will be invalidated. | ||
| run: | | ||
| mkdir -p "$RUNNER_TEMP/sym" | ||
| source scripts/normalize_path.sh | ||
| normalize_path_with_symlinks "$RUNNER_TEMP/sym" "$(dirname "$(which terraform)")" | ||
|
|
||
| # We run race tests with reduced parallelism because they use more CPU and we were finding | ||
| # instances where tests appear to hang for multiple seconds, resulting in flaky tests when | ||
| # short timeouts are used. | ||
| # c.f. discussion on https://github.com/coder/coder/pull/15106 | ||
| # Our Linux runners have 32 cores, but we reduce parallelism since race detection adds a lot of overhead. | ||
| # We aim to have parallelism match CPU count (8*4=32) to avoid making flakes worse. | ||
| - name: Run Tests | ||
| env: | ||
| POSTGRES_VERSION: "17" | ||
| run: | | ||
| make test-postgres-docker | ||
| gotestsum --junitfile="gotests.xml" --packages="./..." -- -race -parallel 4 -p 4 | ||
| uses: ./.github/actions/test-go-pg | ||
| with: | ||
| postgres-version: "17" | ||
| test-parallelism-packages: "8" | ||
| test-parallelism-tests: "4" | ||
| race-detection: "true" | ||
|
|
||
| - name: Upload Test Cache | ||
| uses: ./.github/actions/test-cache/upload | ||
|
|
||

Uh oh!
There was an error while loading. Please reload this page.