[Diffusion] Enable vLLM-Omni Plugin for Diffusion Model #6
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: ATOM Test | |
| on: | |
| push: | |
| branches: [main] | |
| pull_request: | |
| branches: [main] # Triggers on PRs targeting `main` | |
| types: [opened, synchronize, reopened, ready_for_review] | |
| paths-ignore: | |
| - '**/*.md' | |
| - 'docs/**' | |
| - 'LICENSE' | |
| - '.gitignore' | |
| schedule: | |
| # Nightly at 00:00 Beijing time (16:00 UTC) | |
| - cron: '0 16 * * *' | |
| workflow_dispatch: | |
| inputs: | |
| aiter_branch: | |
| description: 'ROCm/aiter branch to build inside the CI image' | |
| required: false | |
| default: 'main' | |
| type: string | |
| concurrency: | |
| group: ${{ github.workflow }}-${{ github.ref }} | |
| cancel-in-progress: ${{ github.ref != 'refs/heads/main' }} | |
| env: | |
| ATOM_BASE_IMAGE: rocm/atom-dev:latest | |
| GITHUB_REPO_URL: ${{ github.event.pull_request.head.repo.clone_url || 'https://github.com/ROCm/ATOM.git' }} | |
| GITHUB_COMMIT_SHA: ${{ github.event.pull_request.head.sha || github.event.head_commit.id || github.sha }} | |
| # workflow_dispatch: inputs.aiter_branch; otherwise main (matches previous default-branch shallow clone) | |
| AITER_GIT_REF: ${{ github.event_name == 'workflow_dispatch' && inputs.aiter_branch || 'main' }} | |
| jobs: | |
| check-signal: | |
| if: ${{ !github.event.pull_request || github.event.pull_request.draft == false }} | |
| name: Check Pre Checkin Signal | |
| runs-on: ubuntu-latest | |
| permissions: | |
| actions: read | |
| contents: read | |
| steps: | |
| - name: Checkout ATOM repo | |
| if: ${{ github.event_name != 'workflow_dispatch' }} | |
| uses: actions/checkout@v6 | |
| - name: Download and check pre-checkin signal | |
| if: ${{ github.event_name != 'workflow_dispatch' }} | |
| run: bash ./.github/scripts/check_signal.sh | |
| env: | |
| GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} | |
| GITHUB_SHA: ${{ github.sha }} | |
| download_aiter_wheel: | |
| if: ${{ needs.check-signal.result == 'success' && (!github.event.pull_request || github.event.pull_request.draft == false) }} | |
| needs: [check-signal] | |
| name: Download aiter wheel | |
| runs-on: ubuntu-latest | |
| steps: | |
| - name: Find and download latest aiter wheel | |
| run: | | |
| set -euo pipefail | |
| echo "=== Finding latest aiter-whl-main artifact from ROCm/aiter ===" | |
| API_URL="https://api.github.com" | |
| AUTH_HEADER="Authorization: token ${{ secrets.GITHUB_TOKEN }}" | |
| AITER_TEST_WORKFLOW_ID=179476100 | |
| RUNS=$(curl -s -H "$AUTH_HEADER" \ | |
| "$API_URL/repos/ROCm/aiter/actions/workflows/$AITER_TEST_WORKFLOW_ID/runs?per_page=100&branch=main&event=push") | |
| ARTIFACT_ID="" | |
| ARTIFACT_NAME="" | |
| for RUN_ID in $(echo "$RUNS" | jq -r '.workflow_runs[].id'); do | |
| ARTIFACT_JSON=$(curl -s -H "$AUTH_HEADER" \ | |
| "$API_URL/repos/ROCm/aiter/actions/runs/$RUN_ID/artifacts" \ | |
| | jq '[.artifacts[] | select(.name | startswith("aiter-whl-main")) | select(.expired == false)] | first') | |
| if [ "$ARTIFACT_JSON" != "null" ] && [ -n "$ARTIFACT_JSON" ]; then | |
| ARTIFACT_ID=$(echo "$ARTIFACT_JSON" | jq -r '.id') | |
| ARTIFACT_NAME=$(echo "$ARTIFACT_JSON" | jq -r '.name') | |
| echo "Found artifact in run $RUN_ID: $ARTIFACT_NAME (ID: $ARTIFACT_ID)" | |
| break | |
| fi | |
| done | |
| if [ -z "$ARTIFACT_ID" ] || [ "$ARTIFACT_ID" = "null" ]; then | |
| echo "ERROR: No aiter-whl-main artifact found in recent Aiter Test runs" | |
| exit 1 | |
| fi | |
| echo "=== Downloading artifact ===" | |
| mkdir -p aiter-whl | |
| curl -s -L -H "$AUTH_HEADER" \ | |
| "$API_URL/repos/ROCm/aiter/actions/artifacts/$ARTIFACT_ID/zip" \ | |
| -o aiter-whl.zip | |
| unzip -o aiter-whl.zip -d aiter-whl | |
| rm -f aiter-whl.zip | |
| AITER_WHL=$(ls -t aiter-whl/amd_aiter*.whl 2>/dev/null | head -1) | |
| if [ -z "$AITER_WHL" ]; then | |
| echo "ERROR: No amd_aiter wheel found in artifact" | |
| ls -la aiter-whl/ | |
| exit 1 | |
| fi | |
| echo "Downloaded wheel: $AITER_WHL" | |
| - name: Upload aiter wheel | |
| uses: actions/upload-artifact@v4 | |
| with: | |
| name: aiter-whl | |
| path: aiter-whl/amd_aiter*.whl | |
| retention-days: 1 | |
| load-test-models: | |
| name: Load test model configs | |
| runs-on: ubuntu-latest | |
| outputs: | |
| models_json: ${{ steps.load.outputs.models_json }} | |
| steps: | |
| - uses: actions/checkout@v6 | |
| - id: load | |
| env: | |
| EVENT_NAME: ${{ github.event_name }} | |
| run: | | |
| python3 << 'PY' | |
| import json, os | |
| event = os.environ["EVENT_NAME"] | |
| # pr → pr models only; push to main → pr+main; schedule/dispatch → all | |
| level_map = {"schedule": "nightly", "workflow_dispatch": "nightly", "push": "main"} | |
| current = level_map.get(event, "pr") | |
| allowed = {"pr": {"pr"}, "main": {"pr", "main"}, "nightly": {"pr", "main", "nightly"}}[current] | |
| models = json.load(open(".github/benchmark/models_accuracy.json", encoding="utf-8")) | |
| filtered = [m for m in models if m.get("test_level", "nightly") in allowed] | |
| with open(os.environ["GITHUB_OUTPUT"], "a") as f: | |
| f.write(f"models_json={json.dumps(filtered)}\n") | |
| print(f"Event={event} level={current}: {len(filtered)}/{len(models)} models") | |
| print(f"{'Model':<45} {'Level':<10} {'Runner'}") | |
| print("-" * 80) | |
| for m in models: | |
| enabled = "✓" if m in filtered else "·" | |
| print(f" {enabled} {m['model_name']:<43} {m.get('test_level','?'):<10} {m['runner']}") | |
| PY | |
| atom-test: | |
| needs: [download_aiter_wheel, load-test-models] | |
| name: ATOM Test | |
| strategy: | |
| fail-fast: false | |
| matrix: | |
| include: ${{ fromJson(needs.load-test-models.outputs.models_json) }} | |
| if: ${{ !github.event.pull_request || github.event.pull_request.draft == false }} | |
| runs-on: ${{ matrix.runner }} | |
| env: | |
| CONTAINER_NAME: atom_test_${{ strategy.job-index }} | |
| steps: | |
| - name: Set HF_TOKEN | |
| run: echo "HF_TOKEN=${HF_TOKEN:-${{ secrets.AMD_HF_TOKEN }}}" >> $GITHUB_ENV | |
| - name: Kill all Docker containers and clean up workspace | |
| if: matrix.runner == 'atom-mi355-8gpu.predownload' | |
| run: | | |
| echo "=== Cleaning up containers on $(hostname) ===" | |
| containers=$(docker ps -q) | |
| if [ -n "$containers" ]; then | |
| docker kill $containers || true | |
| fi | |
| docker run --rm -v "${GITHUB_WORKSPACE:-$PWD}":/workspace -w /workspace --privileged rocm/pytorch:latest bash -lc "ls -la /workspace/ && find /workspace -mindepth 1 -delete" || true | |
| - name: Show Docker containers | |
| if: matrix.runner == 'atom-mi355-8gpu.predownload' | |
| run: docker ps -a | |
| - name: Show ROCm memory usage | |
| if: matrix.runner == 'atom-mi355-8gpu.predownload' | |
| run: rocm-smi --showmemuse | |
| - name: Show ROCm GPU processes | |
| if: matrix.runner == 'atom-mi355-8gpu.predownload' | |
| run: rocm-smi --showpidgpus | |
| - name: Checkout ATOM repo | |
| uses: actions/checkout@v6 | |
| - name: Docker Login | |
| if: ${{ !github.event.pull_request.head.repo.fork }} | |
| run: | | |
| echo "${{ secrets.DOCKER_PASSWORD }}" | docker login -u ${{ secrets.DOCKER_USERNAME }} --password-stdin | |
| - name: Generate Dockerfile for forked repo | |
| if: ${{ github.event.pull_request.head.repo.fork }} | |
| run: | | |
| cat <<EOF > Dockerfile.mod | |
| FROM ${{ env.ATOM_BASE_NIGHTLY_IMAGE }} | |
| RUN pip install -U lm-eval[api] | |
| RUN pip show lm-eval || true | |
| RUN pip install hf_transfer | |
| RUN pip show hf_transfer || true | |
| RUN echo "=== Aiter version BEFORE uninstall ===" && pip show amd-aiter || true | |
| RUN pip uninstall -y amd-aiter | |
| RUN pip install --upgrade "pybind11>=3.0.1" | |
| RUN pip show pybind11 | |
| RUN rm -rf /app/aiter-test | |
| RUN git clone --depth 1 -b ${{ env.AITER_GIT_REF }} https://github.com/ROCm/aiter.git /app/aiter-test && \\ | |
| cd /app/aiter-test && \\ | |
| git submodule sync && git submodule update --init --recursive && \\ | |
| MAX_JOBS=64 PREBUILD_KERNELS=0 GPU_ARCHS=gfx950 python3 setup.py develop | |
| RUN echo "=== Aiter version AFTER installation ===" && pip show amd-aiter || true | |
| RUN echo "=== ATOM version BEFORE uninstall ===" && pip show atom || true | |
| RUN pip uninstall -y atom | |
| RUN rm -rf /app/ATOM | |
| RUN git clone ${{ env.GITHUB_REPO_URL }} /app/ATOM && \\ | |
| cd /app/ATOM && \\ | |
| git checkout ${{ env.GITHUB_COMMIT_SHA }} && \\ | |
| pip install -e . | |
| RUN echo "=== ATOM version AFTER installation ===" && pip show atom || true | |
| EOF | |
| - name: Download aiter wheel | |
| uses: actions/download-artifact@v4 | |
| with: | |
| name: aiter-whl | |
| path: /tmp/aiter-whl | |
| - name: Start CI container | |
| run: | | |
| echo "Clean up containers..." | |
| (docker ps -aq -f name="^${CONTAINER_NAME}$" | xargs -r docker stop) || true | |
| (docker ps -aq -f name="^${CONTAINER_NAME}$" | xargs -r docker rm) || true | |
| if [ -f "/etc/podinfo/gha-render-devices" ]; then | |
| DEVICE_FLAG=$(cat /etc/podinfo/gha-render-devices) | |
| else | |
| DEVICE_FLAG="--device /dev/dri" | |
| fi | |
| if [ -d "/models" ]; then | |
| MODEL_MOUNT="-v /models:/models" | |
| else | |
| echo "Warning: /models directory not found on runner; skipping /models mount and disabling model pre-download optimization." | |
| MODEL_MOUNT="" | |
| fi | |
| # Write env_vars via env block (avoids expression injection) | |
| printenv MODEL_ENV_VARS | grep -v '^$' > /tmp/env_file.txt || true | |
| IMAGE_TAG=${{ env.ATOM_BASE_IMAGE }} | |
| echo "Starting container with image: $IMAGE_TAG" | |
| echo "Model-specific environment variables:" | |
| cat /tmp/env_file.txt | |
| docker run -dt --pull always --device=/dev/kfd $DEVICE_FLAG \ | |
| -v "${GITHUB_WORKSPACE:-$PWD}":/workspace \ | |
| $MODEL_MOUNT \ | |
| -w /workspace \ | |
| --ipc=host --group-add video \ | |
| --shm-size=16G \ | |
| --privileged \ | |
| --cap-add=SYS_PTRACE \ | |
| -e HF_TOKEN="${HF_TOKEN:-}" \ | |
| --env-file /tmp/env_file.txt \ | |
| --security-opt seccomp=unconfined \ | |
| --ulimit memlock=-1 \ | |
| --ulimit stack=67108864 \ | |
| -e ATOM_DISABLE_MMAP=true \ | |
| -v "${{ github.workspace }}:/workspace" \ | |
| -w /workspace \ | |
| --name "$CONTAINER_NAME" \ | |
| $IMAGE_TAG | |
| env: | |
| GITHUB_WORKSPACE: ${{ github.workspace }} | |
| MODEL_ENV_VARS: ${{ matrix.env_vars }} | |
| - name: Check shm size | |
| run: | | |
| docker exec "$CONTAINER_NAME" df -h /dev/shm | |
| - name: Install aiter from wheel | |
| run: | | |
| AITER_WHL=$(ls -t /tmp/aiter-whl/amd_aiter*.whl 2>/dev/null | head -1) | |
| if [ -z "$AITER_WHL" ]; then | |
| echo "ERROR: No amd_aiter wheel found" | |
| ls -la /tmp/aiter-whl/ | |
| exit 1 | |
| fi | |
| echo "=== Copying wheel into container ===" | |
| WHL_NAME=$(basename "$AITER_WHL") | |
| docker cp "$AITER_WHL" "$CONTAINER_NAME:/tmp/$WHL_NAME" | |
| docker exec "$CONTAINER_NAME" bash -lc " | |
| set -euo pipefail | |
| echo '=== Uninstalling existing amd-aiter ===' | |
| pip uninstall -y amd-aiter || true | |
| echo '=== Installing amd-aiter from wheel ===' | |
| pip install /tmp/$WHL_NAME | |
| echo '=== Installed amd-aiter version ===' | |
| pip show amd-aiter | |
| " | |
| - name: Install ATOM and dependencies | |
| run: | | |
| docker exec "$CONTAINER_NAME" bash -lc " | |
| set -euo pipefail | |
| pip install --timeout 60 --retries 10 -U 'lm-eval[api]' | |
| pip install --timeout 60 --retries 10 hf_transfer | |
| pip install --timeout 60 --retries 10 --upgrade 'pybind11>=3.0.1' | |
| echo '=== Installing ATOM ===' | |
| cd /workspace | |
| git config --global --add safe.directory /workspace | |
| pip install -e . | |
| echo '=== Installed package versions ===' | |
| pip show amd-aiter | grep -E '^(Name|Version):' | |
| pip show atom | grep -E '^(Name|Version):' | |
| pip show triton | grep -E '^(Name|Version):' | |
| pip show torch | grep -E '^(Name|Version):' | |
| " | |
| - name: Download models | |
| run: | | |
| if [ -d "/models" ]; then | |
| echo "/models directory found, downloading model to /models/${{ matrix.model_path }}" | |
| if ! docker exec -e HF_TOKEN=${{ secrets.AMD_HF_TOKEN }} "$CONTAINER_NAME" bash -lc "hf download ${{ matrix.model_path }} --local-dir /models/${{ matrix.model_path }}"; then | |
| echo "Model download failed for '${{ matrix.model_path }}'. Aborting." | |
| exit 1 | |
| fi | |
| else | |
| echo "/models directory not found, skipping model download" | |
| fi | |
| - name: Run ATOM simple inference | |
| # Skip simple inference; accuracy test already validates correctness | |
| if: false | |
| timeout-minutes: 30 | |
| run: | | |
| # Run the inference and capture output | |
| set -euo pipefail | |
| echo "" | |
| echo "========== Running test ==========" | |
| if [ -d "/models" ]; then | |
| model_path="/models/${{ matrix.model_path }}" | |
| else | |
| model_path="${{ matrix.model_path }}" | |
| fi | |
| echo "Model path: $model_path" | |
| ls -la $model_path || true | |
| # Print debug logs | |
| echo "========= Runner debug logs ===============" | |
| ps aux | |
| rocm-smi --showmemuse | |
| rocm-smi --showpids | |
| docker ps -a | |
| echo "========= End runner debug logs ===============" | |
| docker exec "$CONTAINER_NAME" bash -lc " | |
| set -euo pipefail | |
| python3 -m atom.examples.simple_inference \ | |
| --model \"$model_path\" \ | |
| ${{ matrix.extraArgs }} \ | |
| --temperature 0 \ | |
| | grep -E '^Prompt: |^Completion:' | |
| " > atom_test_output.txt | |
| echo "" | |
| echo "========== Showing test output below ==========" | |
| cat atom_test_output.txt | |
| - name: Compare output with golden outputs | |
| if: false | |
| timeout-minutes: 30 | |
| # TODO: skip for all test until it's fixed | |
| run: | | |
| echo "========== Comparing output with golden outputs ==========" | |
| if ! diff -u -B -w --strip-trailing-cr \ | |
| atom_test_output.txt \ | |
| ".github/workflows/golden_outputs/${{ matrix.model_name }}_golden_output.txt"; then | |
| echo "Failed: Output does not match golden outputs." | |
| exit 1 | |
| else | |
| echo "Success: Output matches golden outputs." | |
| fi | |
| - name: Run ATOM accuracy test | |
| timeout-minutes: 30 | |
| run: | | |
| set -euo pipefail | |
| echo "" | |
| echo "========== Launching ATOM server ==========" | |
| if [ -d "/models" ]; then | |
| model_path="/models/${{ matrix.model_path }}" | |
| else | |
| model_path="${{ matrix.model_path }}" | |
| fi | |
| docker exec "$CONTAINER_NAME" bash -lc " | |
| .github/scripts/atom_test.sh launch $model_path ${{ matrix.extraArgs }} | |
| " | |
| echo "" | |
| echo "========== Running accuracy test ==========" | |
| docker exec "$CONTAINER_NAME" bash -lc " | |
| .github/scripts/atom_test.sh accuracy $model_path | |
| " 2>&1 | tee atom_accuracy_output.txt | |
| - name: Check accuracy test results | |
| if: success() | |
| env: | |
| MODEL_NAME: ${{ matrix.model_name }} | |
| run: | | |
| result_file=$(ls -1t accuracy_test_results/*.json 2>/dev/null | head -n 1) | |
| if [ -z "$result_file" ] || [ ! -f "$result_file" ]; then | |
| echo "ERROR: No results JSON file found in accuracy_test_results/" | |
| exit 2 | |
| else | |
| echo "RESULT_FILE: $result_file" | |
| fi | |
| flexible_extract_value=$(jq '.results.gsm8k["exact_match,flexible-extract"]' "$result_file") | |
| echo "Flexible extract value: $flexible_extract_value" | |
| # Read threshold from models_accuracy.json (via env var to avoid shell injection) | |
| threshold=$(python3 -c " | |
| import json, os | |
| models = json.load(open('.github/benchmark/models_accuracy.json', encoding='utf-8')) | |
| name = os.environ['MODEL_NAME'] | |
| t = next((m.get('accuracy_threshold', 0) for m in models if m['model_name'] == name), 0) | |
| print(t) | |
| ") | |
| echo "Accuracy test threshold: $threshold" | |
| result=$(awk -v val="$flexible_extract_value" -v threshold="$threshold" 'BEGIN {print (val < threshold) ? 1 : 0}') | |
| if [ "$result" -eq 1 ]; then | |
| echo "Accuracy test failed: $flexible_extract_value < $threshold" | |
| exit 1 | |
| else | |
| echo "Accuracy test passed: $flexible_extract_value >= $threshold" | |
| fi | |
| - name: Collect Test Summary | |
| if: success() | |
| env: | |
| MODEL_NAME: ${{ matrix.model_name }} | |
| run: | | |
| # Read threshold and score for summary | |
| threshold=$(python3 -c " | |
| import json, os | |
| models = json.load(open('.github/benchmark/models_accuracy.json', encoding='utf-8')) | |
| name = os.environ['MODEL_NAME'] | |
| print(next((m.get('accuracy_threshold', 0) for m in models if m['model_name'] == name), 0)) | |
| ") | |
| result_file=$(ls -1t accuracy_test_results/*.json 2>/dev/null | head -n 1) | |
| score=$(jq '.results.gsm8k["exact_match,flexible-extract"]' "$result_file" 2>/dev/null || echo "N/A") | |
| echo "Accuracy Test Summary for ${{ matrix.model_name }} (threshold: ${threshold}, score: ${score}):" >> $GITHUB_STEP_SUMMARY | |
| awk '/\|Tasks\|Version\|/,/^$/ { if (NF > 0) print }' atom_accuracy_output.txt >> $GITHUB_STEP_SUMMARY | |
| - name: Upload output | |
| if: always() | |
| uses: actions/upload-artifact@v7 | |
| with: | |
| name: ${{ matrix.model_name }}_atom_test_output.txt | |
| path: atom_test_output.txt | |
| - name: Upload accuracy results | |
| if: always() | |
| uses: actions/upload-artifact@v7 | |
| with: | |
| name: accuracy-${{ matrix.model_name }} | |
| path: accuracy_test_results/*.json | |
| if-no-files-found: ignore | |
| - name: Clean Up | |
| if: always() | |
| run: | | |
| # TODO: run a separate container for cleanup of the workspace due to permission issue to remove some pyc files under __pycache__ whose owners are root. | |
| # We should use non-root user to run the test to avoid this issue. | |
| set -x | |
| echo "========== Cleaning up workspace ==========" | |
| if [[ ${{ matrix.runner }} == atom-mi355-8gpu.predownload ]]; then | |
| docker run --rm -v "${GITHUB_WORKSPACE:-$PWD}":/workspace -w /workspace --privileged rocm/pytorch:latest bash -lc "ls -la /workspace/ && find /workspace -mindepth 1 -delete" || true | |
| fi | |
| docker stop "$CONTAINER_NAME" || true | |
| docker rm "$CONTAINER_NAME" || true | |
| # Remove the pre-built image to free disk space on the runner | |
| docker rmi "rocm/atom-dev:pre-build-${{ env.GITHUB_COMMIT_SHA }}" || true | |
| # ---------- Push accuracy data to benchmark dashboard ---------- | |
| accuracy-dashboard: | |
| name: Update accuracy dashboard | |
| needs: [atom-test] | |
| if: always() && github.ref == 'refs/heads/main' && (github.event_name == 'push' || github.event_name == 'schedule') | |
| runs-on: ubuntu-latest | |
| steps: | |
| - uses: actions/checkout@v6 | |
| - uses: actions/setup-python@v6 | |
| with: | |
| python-version: '3.12' | |
| - name: Download accuracy artifacts | |
| uses: actions/download-artifact@v8 | |
| with: | |
| path: /tmp/accuracy-results | |
| pattern: accuracy-* | |
| - name: List downloaded artifacts | |
| run: | | |
| echo "=== Downloaded accuracy artifacts ===" | |
| find /tmp/accuracy-results -type f -name '*.json' | head -20 || echo "No JSON files found" | |
| - name: Transform accuracy results for dashboard | |
| run: | | |
| python3 .github/scripts/accuracy_to_dashboard.py \ | |
| /tmp/accuracy-results \ | |
| --output accuracy-benchmark-input.json \ | |
| --models .github/benchmark/models_accuracy.json \ | |
| --run-url "https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}" | |
| echo "=== Generated entries ===" | |
| cat accuracy-benchmark-input.json | |
| - name: Store accuracy result to dashboard | |
| if: hashFiles('accuracy-benchmark-input.json') != '' | |
| uses: benchmark-action/github-action-benchmark@v1 | |
| with: | |
| tool: customBiggerIsBetter | |
| output-file-path: accuracy-benchmark-input.json | |
| gh-pages-branch: gh-pages | |
| benchmark-data-dir-path: benchmark-dashboard | |
| auto-push: true | |
| max-items-in-chart: 90 | |
| github-token: ${{ secrets.GITHUB_TOKEN }} | |