Skip to content

GPU Tests

GPU Tests #3052

Workflow file for this run

name: "GPU Tests"
on:
schedule:
- cron: "0 */8 * * *" # Every 8 hours
workflow_dispatch:
merge_group:
pull_request:
paths:
- "tests/integration/**"
env:
SETUPTOOLS_SCM_PRETEND_VERSION_FOR_OUMI: v0.0.1
NCCL_DEBUG: INFO
jobs:
gpu-tests:
permissions:
contents: "read"
runs-on: linux-gpu-runner
steps:
- name: Checkout
uses: actions/checkout@v6
with:
# Checkout using commit hash to make "no-commit-to-branch" test pass.
ref: ${{ github.sha }}
# Need full history for setuptools-scm to detect version
fetch-depth: 0
- name: Install system dependencies
run: |
sudo apt-get update
sudo apt-get install -y libcurl4 wget software-properties-common
- name: Install CUDA Toolkit
run: |
# Install CUDA toolkit from NVIDIA (Ubuntu 24.04)
wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2404/x86_64/cuda-keyring_1.1-1_all.deb
sudo dpkg -i cuda-keyring_1.1-1_all.deb
sudo apt-get update
sudo apt-get install -y cuda-toolkit-12-6
# Set CUDA_HOME
echo "CUDA_HOME=/usr/local/cuda-12.6" >> $GITHUB_ENV
echo "PATH=/usr/local/cuda-12.6/bin:$PATH" >> $GITHUB_ENV
echo "LD_LIBRARY_PATH=/usr/local/cuda-12.6/lib64:$LD_LIBRARY_PATH" >> $GITHUB_ENV
- name: "Set up Python"
uses: actions/setup-python@v6
with:
python-version: "3.11"
- name: Install uv
uses: astral-sh/setup-uv@v7
with:
enable-cache: true
cache-dependency-glob: "pyproject.toml"
- name: Install Dependencies
run: |
# Install in system python as we're in a sandbox env
# Install in verbose mode to see what's going on
uv pip install --system -e '.[ci_gpu]' hf_transfer
- name: Cache HuggingFace Models
uses: actions/cache@v5
with:
path: |
~/.cache/huggingface
key: hf-models-${{ runner.os }}-${{ hashFiles('tests/scripts/predownload_for_github_gpu_tests.sh') }}
restore-keys: |
hf-models-${{ runner.os }}-
- name: Download Test Data
run: |
./tests/scripts/predownload_for_github_gpu_tests.sh
- name: Verify CUDA environment
run: |
echo "=== CUDA Environment Check ==="
nvidia-smi
echo "CUDA_HOME: $CUDA_HOME"
nvcc --version
- name: Run GPU tests
run: |
cd ./tests/integration/
pytest -s -m "not e2e and not e2e_eternal and not multi_gpu" --durations=50 --timeout=300