Skip to content

Commit

Permalink
2024-10-30 nightly release (23d590e)
Browse files Browse the repository at this point in the history
  • Loading branch information
pytorchbot committed Oct 30, 2024
1 parent c787213 commit e47e879
Show file tree
Hide file tree
Showing 211 changed files with 3,544 additions and 1,198 deletions.
6 changes: 0 additions & 6 deletions .ci/docker/build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -414,9 +414,6 @@ case "$image" in
DB=yes
VISION=yes
CONDA_CMAKE=yes
# snadampal: skipping sccache due to the following issue
# https://github.com/pytorch/pytorch/issues/121559
SKIP_SCCACHE_INSTALL=yes
# snadampal: skipping llvm src build install because the current version
# from pytorch/llvm:9.0.1 is x86 specific
SKIP_LLVM_SRC_BUILD_INSTALL=yes
Expand All @@ -429,9 +426,6 @@ case "$image" in
DB=yes
VISION=yes
CONDA_CMAKE=yes
# snadampal: skipping sccache due to the following issue
# https://github.com/pytorch/pytorch/issues/121559
SKIP_SCCACHE_INSTALL=yes
# snadampal: skipping llvm src build install because the current version
# from pytorch/llvm:9.0.1 is x86 specific
SKIP_LLVM_SRC_BUILD_INSTALL=yes
Expand Down
51 changes: 44 additions & 7 deletions .ci/docker/common/install_cache.sh
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,12 @@ install_ubuntu() {
# Instead use lib and headers from OpenSSL1.1 installed in `install_openssl.sh``
apt-get install -y cargo
echo "Checking out sccache repo"
git clone https://github.com/pytorch/sccache
if [ -n "$CUDA_VERSION" ]; then
# TODO: Remove this
git clone https://github.com/pytorch/sccache
else
git clone https://github.com/mozilla/sccache -b v0.8.2
fi
cd sccache
echo "Building sccache"
cargo build --release
Expand All @@ -19,6 +24,10 @@ install_ubuntu() {
rm -rf sccache
apt-get remove -y cargo rustc
apt-get autoclean && apt-get clean

echo "Downloading old sccache binary from S3 repo for PCH builds"
curl --retry 3 https://s3.amazonaws.com/ossci-linux/sccache -o /opt/cache/bin/sccache-0.2.14a
chmod 755 /opt/cache/bin/sccache-0.2.14a
}

install_binary() {
Expand All @@ -36,18 +45,46 @@ if [ -n "$ROCM_VERSION" ]; then
curl --retry 3 http://repo.radeon.com/misc/.sccache_amd/sccache -o /opt/cache/bin/sccache
else
ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"')
# TODO: Install the pre-built binary from S3 as building from source
# https://github.com/pytorch/sccache has started failing mysteriously
# in which sccache server couldn't start with the following error:
# sccache: error: Invalid argument (os error 22)
install_binary
if [ -n "$CUDA_VERSION" ]; then
# TODO: Install the pre-built binary from S3 as building from source
# https://github.com/pytorch/sccache has started failing mysteriously
# in which sccache server couldn't start with the following error:
# sccache: error: Invalid argument (os error 22)
install_binary
else
install_ubuntu
fi
fi
chmod a+x /opt/cache/bin/sccache

function write_sccache_stub() {
# Unset LD_PRELOAD for ps because of asan + ps issues
# https://gcc.gnu.org/bugzilla/show_bug.cgi?id=90589
printf "#!/bin/sh\nif [ \$(env -u LD_PRELOAD ps -p \$PPID -o comm=) != sccache ]; then\n exec sccache $(which $1) \"\$@\"\nelse\n exec $(which $1) \"\$@\"\nfi" > "/opt/cache/bin/$1"
if [ $1 == "gcc" ]; then
# Do not call sccache recursively when dumping preprocessor argument
# For some reason it's very important for the first cached nvcc invocation
cat > "/opt/cache/bin/$1" <<EOF
#!/bin/sh
if [ "\$1" = "-E" ] || [ "\$2" = "-E" ]; then
exec $(which $1) "\$@"
elif [ \$(env -u LD_PRELOAD ps -p \$PPID -o comm=) != sccache ]; then
exec sccache $(which $1) "\$@"
else
exec $(which $1) "\$@"
fi
EOF
else
cat > "/opt/cache/bin/$1" <<EOF
#!/bin/sh
if [ \$(env -u LD_PRELOAD ps -p \$PPID -o comm=) != sccache ]; then
exec sccache $(which $1) "\$@"
else
exec $(which $1) "\$@"
fi
EOF
fi
chmod a+x "/opt/cache/bin/$1"
}

Expand Down
4 changes: 1 addition & 3 deletions .ci/pytorch/build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -398,8 +398,6 @@ if [[ "$BUILD_ENVIRONMENT" != *libtorch* && "$BUILD_ENVIRONMENT" != *bazel* ]];
python tools/stats/export_test_times.py
fi

# snadampal: skipping it till sccache support added for aarch64
# https://github.com/pytorch/pytorch/issues/121559
if [[ "$BUILD_ENVIRONMENT" != *aarch64* && "$BUILD_ENVIRONMENT" != *s390x* ]]; then
if [[ "$BUILD_ENVIRONMENT" != *s390x* ]]; then
print_sccache_stats
fi
6 changes: 6 additions & 0 deletions .ci/pytorch/common-build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,12 @@ if [[ "$BUILD_ENVIRONMENT" != *win-* ]]; then
# Save the absolute path in case later we chdir (as occurs in the gpu perf test)
script_dir="$( cd "$(dirname "${BASH_SOURCE[0]}")" || exit ; pwd -P )"

if [[ "${BUILD_ENVIRONMENT}" == *-pch* ]]; then
# This is really weird, but newer sccache somehow produces broken binary
# see https://github.com/pytorch/pytorch/issues/139188
sudo mv /opt/cache/bin/sccache-0.2.14a /opt/cache/bin/sccache
fi

if which sccache > /dev/null; then
# Save sccache logs to file
sccache --stop-server > /dev/null 2>&1 || true
Expand Down
3 changes: 2 additions & 1 deletion .ci/pytorch/win-test-helpers/build_pytorch.bat
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,8 @@ if not errorlevel 0 goto fail

if "%USE_XPU%"=="1" (
:: Activate xpu environment - VS env is required for xpu
call "C:\Program Files (x86)\Intel\oneAPI\setvars.bat"
call "C:\Program Files (x86)\Intel\oneAPI\compiler\latest\env\vars.bat"
call "C:\Program Files (x86)\Intel\oneAPI\ocloc\latest\env\vars.bat"
if errorlevel 1 exit /b 1
:: Reduce build time. Only have MTL self-hosted runner now
SET TORCH_XPU_ARCH_LIST=xe-lpg
Expand Down
4 changes: 4 additions & 0 deletions .github/actions/build-android/action.yml
Original file line number Diff line number Diff line change
Expand Up @@ -42,11 +42,14 @@ runs:
PR_NUMBER: ${{ github.event.pull_request.number }}
SHA1: ${{ github.event.pull_request.head.sha || github.sha }}
SCCACHE_BUCKET: ossci-compiler-cache-circleci-v2
SCCACHE_REGION: us-east-1
DOCKER_IMAGE: ${{ inputs.docker-image }}
MATRIX_ARCH: ${{ inputs.arch }}
run: |
# detached container should get cleaned up by teardown_ec2_linux
set -exo pipefail
# Fetch aws credential from IMDs
eval "$(python3 .github/scripts/get_aws_session_tokens.py)"
export container_name
container_name=$(docker run \
-e BUILD_ENVIRONMENT \
Expand All @@ -56,6 +59,7 @@ runs:
-e SHA1 \
-e BRANCH \
-e SCCACHE_BUCKET \
-e SCCACHE_REGION \
-e SKIP_SCCACHE_INITIALIZATION=1 \
--env-file="/tmp/github_env_${GITHUB_RUN_ID}" \
--security-opt seccomp=unconfined \
Expand Down
2 changes: 0 additions & 2 deletions .github/requirements/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,6 @@ The list of support files are as follows:
conda environment
* conda-env-macOS-ARM64. This is used by MacOS (m1, arm64) build and
test jobs to setup the conda environment
* conda-env-macOS-X64. This is use by MacOS (x86-64) build and test
jobs to setup the conda environment
* conda-env-Linux-X64. This is used by Linux buck build and test jobs
to setup the conda environment
* Pip:
Expand Down
4 changes: 2 additions & 2 deletions .github/requirements/conda-env-Linux-X64.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,5 +4,5 @@ mkl-include=2022.1.0
ninja=1.10.2
numpy=1.23.3
pyyaml=6.0
setuptools=68.2.2
typing-extensions=4.9.0
setuptools=72.1.0
typing-extensions=4.11.0
2 changes: 1 addition & 1 deletion .github/requirements/conda-env-iOS.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,5 +3,5 @@ cmake=3.22.1
ninja=1.10.2
numpy=1.23.3
pyyaml=6.0
setuptools=68.2.2
setuptools=72.1.0
typing-extensions=4.11.0
4 changes: 2 additions & 2 deletions .github/requirements/conda-env-macOS-ARM64
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
numpy=1.22.3
pyyaml=6.0
setuptools=61.2.0
setuptools=72.1.0
cmake=3.22.*
typing-extensions=4.9.0
typing-extensions=4.11.0
dataclasses=0.8
pip=22.2.2
pillow=10.0.1
Expand Down
16 changes: 0 additions & 16 deletions .github/requirements/conda-env-macOS-X64

This file was deleted.

5 changes: 5 additions & 0 deletions .github/workflows/_android-build-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -95,9 +95,13 @@ jobs:
BUILD_ENVIRONMENT: ${{ inputs.build-environment }}
TORCH_CUDA_ARCH_LIST: 5.2
SCCACHE_BUCKET: ossci-compiler-cache-circleci-v2
SCCACHE_REGION: us-east-1
AWS_DEFAULT_REGION: us-east-1
DOCKER_IMAGE: ${{ steps.calculate-docker-image.outputs.docker-image }}
run: |
set -e
# Fetch aws credential from IMDs
eval "$(python3 .github/scripts/get_aws_session_tokens.py)"
# Unlike other gradle jobs, it's not worth building libtorch in a separate CI job and share via docker, because:
# 1) Not shareable: it's custom selective build, which is different from default libtorch mobile build;
# 2) Not parallelizable by architecture: it only builds libtorch for one architecture;
Expand All @@ -113,6 +117,7 @@ jobs:
id=$(docker run -e BUILD_ENVIRONMENT \
-e MAX_JOBS="$(nproc --ignore=2)" \
-e SCCACHE_BUCKET \
-e SCCACHE_REGION \
-e SKIP_SCCACHE_INITIALIZATION=1 \
-e TORCH_CUDA_ARCH_LIST \
-e BUILD_LITE_INTERPRETER \
Expand Down
5 changes: 5 additions & 0 deletions .github/workflows/_android-full-build-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -145,10 +145,14 @@ jobs:
PR_NUMBER: ${{ github.event.pull_request.number }}
SHA1: ${{ github.event.pull_request.head.sha || github.sha }}
SCCACHE_BUCKET: ossci-compiler-cache-circleci-v2
SCCACHE_REGION: us-east-1
ID_X86_32: ${{ steps.build-x86_32.outputs.container_id }}
run: |
set -eux
# Fetch aws credential from IMDs
eval "$(python3 .github/scripts/get_aws_session_tokens.py)"
# Putting everything together
# ID_X86_32 container were created during build-x86_32 step
docker cp "${GITHUB_WORKSPACE}/build_android_install_arm_v7a" "${ID_X86_32}:/var/lib/jenkins/workspace/build_android_install_arm_v7a"
Expand All @@ -165,6 +169,7 @@ jobs:
-e SHA1 \
-e BRANCH \
-e SCCACHE_BUCKET \
-e SCCACHE_REGION \
-e SKIP_SCCACHE_INITIALIZATION=1 \
--env-file="/tmp/github_env_${GITHUB_RUN_ID}" \
--user jenkins \
Expand Down
5 changes: 5 additions & 0 deletions .github/workflows/_bazel-build-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -137,11 +137,15 @@ jobs:
AWS_DEFAULT_REGION: us-east-1
SHA1: ${{ github.event.pull_request.head.sha || github.sha }}
SCCACHE_BUCKET: ossci-compiler-cache-circleci-v2
SCCACHE_REGION: us-east-1
TORCH_CUDA_ARCH_LIST: 5.2
DOCKER_IMAGE: ${{ steps.calculate-docker-image.outputs.docker-image }}
OUR_GITHUB_JOB_ID: ${{ steps.get-job-id.outputs.job-id }}
CUDA_VERSION: ${{ inputs.cuda-version }}
run: |
python3 -m pip install boto3==1.19.12
# Fetch aws credential from IMDs
eval "$(python3 .github/scripts/get_aws_session_tokens.py)"
export SHARD_NUMBER=0
# detached container should get cleaned up by teardown_ec2_linux
# TODO: Stop building test binaries as part of the build phase
Expand All @@ -163,6 +167,7 @@ jobs:
-e NUM_TEST_SHARDS \
-e MAX_JOBS="$(nproc --ignore=2)" \
-e SCCACHE_BUCKET \
-e SCCACHE_REGION \
-e SKIP_SCCACHE_INITIALIZATION=1 \
-e REENABLED_ISSUES \
-e TORCH_CUDA_ARCH_LIST \
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/_buck-build-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ jobs:
- name: Setup miniconda
uses: pytorch/test-infra/.github/actions/setup-miniconda@main
with:
python-version: 3.8
python-version: 3.9
environment-file: .github/requirements/conda-env-${{ runner.os }}-${{ runner.arch }}

- name: Install Buck
Expand Down
8 changes: 7 additions & 1 deletion .github/workflows/_linux-build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -198,6 +198,7 @@ jobs:
PR_NUMBER: ${{ github.event.pull_request.number }}
SHA1: ${{ github.event.pull_request.head.sha || github.sha }}
SCCACHE_BUCKET: ossci-compiler-cache-circleci-v2
SCCACHE_REGION: us-east-1
SCCACHE_S3_KEY_PREFIX: ${{ github.workflow }}
XLA_CLANG_CACHE_S3_BUCKET_NAME: ossci-compiler-clang-cache-circleci-xla
PR_LABELS: ${{ toJson(github.event.pull_request.labels.*.name) }}
Expand All @@ -221,19 +222,24 @@ jobs:
else
JENKINS_USER="--user jenkins"
USED_IMAGE="${DOCKER_IMAGE}"
# Fetch aws credential from IMDs
eval "$(python3 .github/scripts/get_aws_session_tokens.py)"
fi
# detached container should get cleaned up by teardown_ec2_linux
# Used for JENKINS_USER, which can be empty
# shellcheck disable=SC2086
container_name=$(docker run \
-e BUILD_ENVIRONMENT \
-e MAX_JOBS="$(nproc --ignore=2)" \
-e AWS_DEFAULT_REGION \
-e AWS_ACCESS_KEY_ID \
-e AWS_SECRET_ACCESS_KEY \
-e AWS_SESSION_TOKEN \
-e PR_NUMBER \
-e SHA1 \
-e BRANCH \
-e SCCACHE_BUCKET \
-e SCCACHE_REGION \
-e SCCACHE_S3_KEY_PREFIX \
-e XLA_CUDA \
-e XLA_CLANG_CACHE_S3_BUCKET_NAME \
Expand Down
5 changes: 5 additions & 0 deletions .github/workflows/_linux-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -232,6 +232,8 @@ jobs:
IS_A100_RUNNER: ${{ contains(matrix.runner, 'a100') && '1' || '0' }}
ARTIFACTS_FILE_SUFFIX: ${{ github.job }}-${{ matrix.config }}-${{ matrix.shard }}-${{ matrix.num_shards }}-${{ matrix.runner }}_${{ steps.get-job-id.outputs.job-id }}
run: |
# Fetch aws credential from IMDs
eval "$(python3 .github/scripts/get_aws_session_tokens.py)"
set -x
if [[ $TEST_CONFIG == 'multigpu' ]]; then
Expand Down Expand Up @@ -264,6 +266,9 @@ jobs:
-e BRANCH \
-e SHA1 \
-e AWS_DEFAULT_REGION \
-e AWS_ACCESS_KEY_ID \
-e AWS_SECRET_ACCESS_KEY \
-e AWS_SESSION_TOKEN \
-e IN_WHEEL_TEST \
-e SHARD_NUMBER \
-e TEST_CONFIG \
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/_mac-build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -30,9 +30,9 @@ on:
python-version:
required: false
type: string
default: "3.8"
default: "3.9"
description: |
The python version to be used. Will be 3.8 by default
The python version to be used. Will be 3.9 by default
environment-file:
required: false
type: string
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/_mac-test-mps.yml
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,9 @@ on:
python-version:
required: false
type: string
default: "3.8"
default: "3.9"
description: |
The python version to be used. Will be 3.8 by default
The python version to be used. Will be 3.9 by default
test-matrix:
required: true
type: string
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/_mac-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,9 @@ on:
python-version:
required: false
type: string
default: "3.8"
default: "3.9"
description: |
The python version to be used. Will be 3.8 by default
The python version to be used. Will be 3.9 by default
timeout-minutes:
required: false
type: number
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/_run_android_tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ jobs:
- name: Setup miniconda
uses: pytorch/test-infra/.github/actions/setup-miniconda@main
with:
python-version: 3.8
python-version: 3.9
environment-file: .github/requirements/conda-env-${{ runner.os }}-${{ runner.arch }}.txt

- name: Install NDK
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/_win-build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -146,7 +146,7 @@ jobs:
BUILD_WHEEL: 1
MAX_JOBS: 8
CUDA_VERSION: ${{ inputs.cuda-version }}
PYTHON_VERSION: "3.8"
PYTHON_VERSION: "3.9"
SCCACHE_BUCKET: "ossci-compiler-cache"
SCCACHE_S3_KEY_PREFIX: ${{ github.workflow }}
SCCACHE_REGION: us-east-1
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/_win-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -155,7 +155,7 @@ jobs:
env:
USE_CUDA: ${{ inputs.cuda-version != 'cpu' && '1' || '0' }}
INSTALL_WINDOWS_SDK: 1
PYTHON_VERSION: 3.8
PYTHON_VERSION: 3.9
CONTINUE_THROUGH_ERROR: ${{ steps.keep-going.outputs.keep-going }}
VERBOSE_TEST_LOGS: ${{ steps.keep-going.outputs.ci-verbose-test-logs }}
TEST_SHOWLOCALS: ${{ steps.keep-going.outputs.ci-test-showlocals }}
Expand Down
Loading

0 comments on commit e47e879

Please sign in to comment.