Skip to content

Commit

Permalink
Build pytorch 2.3.1
Browse files Browse the repository at this point in the history
This (properly) builds pytorch 2.3.1, including mkl support.
  • Loading branch information
sdake committed Jul 27, 2024
1 parent fbf4c0c commit 091699e
Show file tree
Hide file tree
Showing 5 changed files with 235 additions and 149 deletions.
209 changes: 209 additions & 0 deletions platform/Dockerfiles/pytorch/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,209 @@
FROM debian:bookworm AS bookworm_cuda

###
#
# Define environment

WORKDIR /workspace


###
#
# Set global environment variables

ENV PYTORCH_VERSION="v2.3.1"
ENV CUDA_VERSION="12.5.1_555.42.06"
ENV PATH="$PATH:/usr/local/cuda/bin"
ENV DEBIAN_FRONTEND="noninteractive"
ENV NVIDIA_DRIVER_CAPABILITIES="compute,utility"
ENV NVIDIA_VISIBLE_DEVICES="all"
ENV VENV_PATH="/workspace/v"
ENV PYTHON_VENV="${VENV_PATH}/bin/python"
ENV PIP_BIN="${VENV_PATH}/bin/pip"

###
#
# Workaround gcc-12 issue:
# https://github.com/pytorch/pytorch/issues/77939#issuecomment-1526844015

ENV CXXFLAGS='-Wno-maybe-uninitialized -Wno-uninitialized -Wno-free-nonheap-object -Wno-dev'
ENV CFLAGS='-Wno-maybe-uninitialized -Wno-uninitialized -Wno-free-nonheap-object -Wno-dev'


###
#
# Set pytorch specific build environment variables

ENV REL_WITH_DEB_INFO="ON"
ENV MAX_JOBS="32"
ENV USE_CUDA="ON"
ENV USE_CUDNN=1
ENV USE_CUSPARSELT=1
ENV USE_FBGEMM="ON"
ENV USE_KINETO="ON"
ENV USE_NUMPY="ON"
ENV USE_NNPACK="ON"
ENV USE_DISTRIBUTED="ON"
ENV USE_TENSORPIPE="ON"
ENV USE_GLOO="ON"
ENV USE_MPI="ON"
ENV USE_SYSTEM_NCCL="OFF"
ENV USE_OPENMP="ON"
ENV USE_FLASH_ATTENTION="ON"
ENV USE_MEM_EFF_ATTENTION="ON"
ENV PYTORCH_BUILD_VERSION="2.3.1"
ENV PYTORCH_BUILD_NUMBER="1"
ENV TORCH_CUDA_ARCH_LIST="8.0;8.6;8.9;9.0"
ENV CUDA_PATH="/usr/local/cuda"
ENV CUDA_HOME="/usr/local/cuda"
ENV CUDA_TOOLKIT_ROOT_DIR="/usr/local/cuda"
ENV CUDA_NVCC_EXECUTABLE="/usr/local/cuda/bin/nvcc"
ENV CUDA_INCLUDE_DIRS="/usr/local/cuda/include"
ENV CUSPARSELT_LIBRARY_PATH="/usr/lib/x86_64-linux-gnu"
ENV CUSPARSE_INCLUDE_PATH="/usr/include/x86_64-linux-gnu"
ENV CUDNN_LIBRARY_PATH="/usr/lib/x86_64-linux-gnu"
ENV CUDNN_INCLUDE_PATH="/usr/include/x86_64-linux-gnu"

#ENV USE_UCC="ON"
ENV USE_MIMALLOC="ON"
ENV USE_NCCL="ON"

#ENV ATEN_THREADING="NATIVE"
#ENV USE_SYSTEM_LIBS ON

###
#
# Install toolchain and system dependencies

RUN apt update
RUN apt -y install build-essential
RUN apt -y install ca-certificates
#RUN apt -y install ccache
RUN apt -y install python3
RUN apt -y install python3-full
RUN apt -y install python3-venv
RUN apt -y install python3-pip
RUN apt -y install swig
RUN apt -y install ninja-build
RUN apt -y install git
RUN apt -y install cmake
RUN apt -y install gpg
RUN apt -y install curl
RUN apt -y install zstd

RUN apt -y install libnuma-dev
RUN apt -y install libssl-dev
RUN apt -y install libzstd-dev
RUN apt -y install libucx-dev
RUN apt -y install libmpfr-dev
RUN apt -y install libgmp3-dev
RUN apt -y install libfftw3-dev
#RUN apt -y install libmagma-dev


###
#
# Not sure if or why these are needed

RUN apt -y install libjpeg-dev
RUN apt -y install libpng-dev

#RUN /usr/sbin/update-ccache-symlinks
RUN mkdir -p /opt/ccache
#RUN ccache --set-config=cache_dir=/opt/ccache


###
#
# Setup build environment and clone pytorch

RUN mkdir -p /workspace/build
RUN mkdir -p /workspace/${PYTORCH_VERSION}
RUN mkdir -p /workspace/tmp
RUN mkdir -p /workspace/added
RUN mkdir -p /workspace/uncompressed
RUN mkdir -p /workspace/target
RUN mkdir -p /workspace/patches

RUN git clone --depth 1 --jobs ${MAX_JOBS} "https://github.com/pytorch/pytorch" --branch "${PYTORCH_VERSION}" --recurse-submodules --shallow-submodules build

###
#
#

COPY /workspace/patches/pytorch-compute-86-override.patch /workspace/patches
RUN patch --directory build -p1 pytorch-compute-86-override.patch

###
#
# Install NVIDIA CUDA SDK

RUN curl -LO https://developer.download.nvidia.com/compute/cuda/repos/debian12/x86_64/cuda-keyring_1.1-1_all.deb
RUN dpkg -i cuda-keyring_1.1-1_all.deb
RUN apt-get update
RUN apt -y install software-properties-common
RUN add-apt-repository contrib
RUN apt-get update
RUN apt -y install cuda-toolkit-12-5
RUN apt -y install libcusparselt-dev
RUN apt -y install cudnn


###
#
# Install Intel MKL BLAS

RUN curl --location "https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB" | gpg --dearmor > /usr/share/keyrings/oneapi-archive-keyring.gpg
RUN echo "deb [signed-by=/usr/share/keyrings/oneapi-archive-keyring.gpg] https://apt.repos.intel.com/oneapi all main" > /etc/apt/sources.list.d/oneAPI.list

RUN apt update
RUN apt install -y intel-oneapi-mkl
RUN apt install -y intel-oneapi-mkl-devel

ENV MKL_VERSION="2024.2"
ENV MKL_ROOT="/opt/intel/oneapi/mkl/${MKL_VERSION}/lib/intel64"
ENV MKL_MODEL="ilp64"
ENV MKL_LIBRARIES="-Wl,--start-group;${MKL_ROOT}/libmkl_intel_${MKL_MODEL}.a;${MKL_ROOT}/libmkl_gnu_thread.a;${MKL_ROOT}/libmkl_core.a;-Wl,--end-group"
ENV CUDA_ARCHS="80;86;89;90"
ENV BLA_VENDOR=Intel10_64ilp
ENV BLA_STATIS=True


###
#
# Install Python virtual environmnet

RUN python3 -m venv ${VENV_PATH}
RUN ${PIP_BIN} install six
RUN ${PIP_BIN} install numpy
RUN ${PIP_BIN} install swig
RUN ${PIP_BIN} install build
RUN ${PIP_BIN} install wheel
RUN ${PIP_BIN} install pyyaml
RUN ${PIP_BIN} install cmake
RUN ${PIP_BIN} install ninja
RUN ${PIP_BIN} install -r /workspace/build/requirements.txt


###
#
# Hardcode the cuda library path for the system loader

RUN echo "/opt/nvidia/cuda/lib64" > /etc/ld.so.conf.d/cuda.conf
RUN ldconfig -v


###
#
# Build pytorch

WORKDIR /workspace/build
RUN ${PYTHON_VENV} -m build --wheel --sdist --no-isolation


###
#
# Produce a clean image of build results for output from buildx

FROM scratch
COPY --from=bookworm_cuda /workspace/build/dist /
6 changes: 6 additions & 0 deletions platform/Dockerfiles/pytorch/build.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
###
#
# Build pytorch and output the build results to "${PWD}/target"

mkdir -p "${PWD}/target"
docker buildx build --progress plain --output type=local,dest="${PWD}/target" . -t pytorch:v2.3.1
20 changes: 20 additions & 0 deletions platform/Dockerfiles/pytorch/pytorch-compute-86-override.patch
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
diff --git a/aten/src/ATen/native/cuda/Blas.cpp b/aten/src/ATen/native/cuda/Blas.cpp
index 8d3b3dbea7..5f04c0cdd1 100644
--- a/aten/src/ATen/native/cuda/Blas.cpp
+++ b/aten/src/ATen/native/cuda/Blas.cpp
@@ -820,7 +820,7 @@ static bool _scaled_mm_allowed_device() {
}
return false;
#else
- return dprops->major >= 9 || (dprops->major == 8 && dprops->minor == 9);
+ return (dprops->major == 8 && dprops->minor >= 0);
#endif
}

diff --git a/third_party/cutlass b/third_party/cutlass
index bbe579a9e3..56b46e2d13 160000
--- a/third_party/cutlass
+++ b/third_party/cutlass
@@ -1 +1 @@
-Subproject commit bbe579a9e3beb6ea6626d9227ec32d0dae119a49
+Subproject commit 56b46e2d13875b46b8f6a03f9f5ac91e2bfdc01a
143 changes: 0 additions & 143 deletions platform/packaging/build/pytorch/Dockerfile

This file was deleted.

6 changes: 0 additions & 6 deletions platform/packaging/build/pytorch/build.sh

This file was deleted.

0 comments on commit 091699e

Please sign in to comment.