From 93023c1d3254033bbf92500ca418399dccbce644 Mon Sep 17 00:00:00 2001 From: Jinzhe Zeng Date: Wed, 15 Nov 2023 16:19:34 -0500 Subject: [PATCH 1/7] build cu11 wheel Signed-off-by: Jinzhe Zeng --- .github/workflows/build_wheel.yml | 9 +++++++++ backend/find_tensorflow.py | 23 +++++++++++++++++------ doc/install/easy-install.md | 6 ++++++ pyproject.toml | 17 +++++++++++++---- 4 files changed, 45 insertions(+), 10 deletions(-) diff --git a/.github/workflows/build_wheel.yml b/.github/workflows/build_wheel.yml index 84c8ac4b74..59df8a2cd0 100644 --- a/.github/workflows/build_wheel.yml +++ b/.github/workflows/build_wheel.yml @@ -33,6 +33,13 @@ jobs: python: 311 platform_id: manylinux_x86_64 dp_variant: cuda + cuda_version: 12.2 + - os: ubuntu-latest + python: 311 + platform_id: manylinux_x86_64 + dp_variant: cuda + cuda_version: 11.8 + dp_pkg_name: deepmd-kit-cu11 # macos-x86-64 - os: macos-latest python: 311 @@ -68,6 +75,8 @@ jobs: CIBW_ARCHS: all CIBW_BUILD: cp${{ matrix.python }}-${{ matrix.platform_id }} DP_VARIANT: ${{ matrix.dp_variant }} + CUDA_VERSION: ${{ matrix.cuda_version }} + DP_PKG_NAME: ${{ matrix.dp_pkg_name }} - uses: actions/upload-artifact@v3 with: path: ./wheelhouse/*.whl diff --git a/backend/find_tensorflow.py b/backend/find_tensorflow.py index fbbe0e56c0..6f691239ee 100644 --- a/backend/find_tensorflow.py +++ b/backend/find_tensorflow.py @@ -88,12 +88,23 @@ def find_tensorflow() -> Tuple[Optional[str], List[str]]: # IndexError if submodule_search_locations is an empty list except (AttributeError, TypeError, IndexError): if os.environ.get("CIBUILDWHEEL", "0") == "1": - # CUDA 12.2 - requires.extend( - [ - "tensorflow-cpu>=2.15.0rc0; platform_machine=='x86_64' and platform_system == 'Linux'", - ] - ) + cuda_version = os.environ.get("CUDA_VERSION", "12.2") + if cuda_version in SpecifierSet(">=12,<13"): + # CUDA 12.2 + requires.extend( + [ + "tensorflow-cpu>=2.15.0rc0; platform_machine=='x86_64' and platform_system == 'Linux'", + ] + ) + elif cuda_version in SpecifierSet(">=11,<12"): + # CUDA 11.8 + requires.extend( + [ + "tensorflow-cpu>=2.5.0rc0,<2.15; platform_machine=='x86_64' and platform_system == 'Linux'", + ] + ) + else: + raise RuntimeError("Unsupported CUDA version") requires.extend(get_tf_requirement()["cpu"]) # setuptools will re-find tensorflow after installing setup_requires tf_install_dir = None diff --git a/doc/install/easy-install.md b/doc/install/easy-install.md index 7bd632694b..73fae4467a 100644 --- a/doc/install/easy-install.md +++ b/doc/install/easy-install.md @@ -92,6 +92,12 @@ pip install deepmd-kit[gpu,cu12] `cu12` is required only when CUDA Toolkit and cuDNN were not installed. +To install the package built against CUDA 11.8, use + +```bash +pip install deepmd-kit-cu11[gpu,cu11] +``` + Or install the CPU version without CUDA supported: ```bash pip install deepmd-kit[cpu] diff --git a/pyproject.toml b/pyproject.toml index e9ee563960..6a7461e62f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -133,8 +133,11 @@ test-command = [ test-extras = ["cpu", "test", "lmp", "ipi"] build = ["cp310-*"] skip = ["*-win32", "*-manylinux_i686", "*-musllinux*"] -manylinux-x86_64-image = "manylinux_2_28" -manylinux-aarch64-image = "manylinux_2_28" +# TODO: uncomment when CUDA 11 is deprecated +# manylinux-x86_64-image = "manylinux_2_28" +# manylinux-aarch64-image = "manylinux_2_28" +manylinux-x86_64-image = "quay.io/pypa/manylinux_2_28_x86_64:2022-11-19-1b19e81" +manylinux-aarch64-image = "quay.io/pypa/manylinux_2_28_aarch64:2022-11-19-1b19e81" [tool.cibuildwheel.macos] environment = { PIP_PREFER_BINARY="1", DP_LAMMPS_VERSION="stable_2Aug2023_update1", DP_ENABLE_IPI="1" } @@ -148,10 +151,16 @@ repair-wheel-command = """if [[ "$CIBW_BUILD" == *macosx_arm64* ]]; then rm -rf [tool.cibuildwheel.linux] repair-wheel-command = "auditwheel repair --exclude libtensorflow_framework.so.2 --exclude libtensorflow_framework.so.1 --exclude libtensorflow_framework.so --exclude _pywrap_tensorflow_internal.so --exclude libtensorflow_cc.so.2 -w {dest_dir} {wheel}" -environment-pass = ["CIBW_BUILD", "DP_VARIANT"] +environment-pass = [ + "CIBW_BUILD", + "DP_VARIANT", + "CUDA_VERSION", + "DP_PKG_NAME", +] environment = { PIP_PREFER_BINARY="1", DP_LAMMPS_VERSION="stable_2Aug2023_update1", DP_ENABLE_IPI="1", MPI_HOME="/usr/lib64/mpich", PATH="/usr/lib64/mpich/bin:$PATH" } before-all = [ - """{ if [ "$(uname -m)" = "x86_64" ] ; then yum config-manager --add-repo http://developer.download.nvidia.com/compute/cuda/repos/rhel8/x86_64/cuda-rhel8.repo && yum install -y cuda-nvcc-12-2 cuda-cudart-devel-12-2; fi }""", + """if [ ! -z "${DP_PKG_NAME}" ]; then sed -i "s/name = \"deepmd-kit\"/name = \"${DP_PKG_NAME}\"/g" pyproject.toml; fi""", + """{ if [ "$(uname -m)" = "x86_64" ] ; then yum config-manager --add-repo http://developer.download.nvidia.com/compute/cuda/repos/rhel8/x86_64/cuda-rhel8.repo && yum install -y cuda-nvcc-${CUDA_VERSION/./-} cuda-cudart-devel-${CUDA_VERSION/./-}; fi }""", "yum install -y mpich-devel", ] From d6941e85500950dd5e759efbf2b17ad6df278077 Mon Sep 17 00:00:00 2001 From: Jinzhe Zeng Date: Wed, 15 Nov 2023 16:26:30 -0500 Subject: [PATCH 2/7] fix invalid version Signed-off-by: Jinzhe Zeng --- backend/find_tensorflow.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backend/find_tensorflow.py b/backend/find_tensorflow.py index 6f691239ee..08a73f7252 100644 --- a/backend/find_tensorflow.py +++ b/backend/find_tensorflow.py @@ -89,7 +89,7 @@ def find_tensorflow() -> Tuple[Optional[str], List[str]]: except (AttributeError, TypeError, IndexError): if os.environ.get("CIBUILDWHEEL", "0") == "1": cuda_version = os.environ.get("CUDA_VERSION", "12.2") - if cuda_version in SpecifierSet(">=12,<13"): + if cuda_version == "" or cuda_version in SpecifierSet(">=12,<13"): # CUDA 12.2 requires.extend( [ From a87a0cb707d83bb797fc1270339764e841f8dd7e Mon Sep 17 00:00:00 2001 From: Jinzhe Zeng Date: Mon, 20 Nov 2023 18:05:31 -0500 Subject: [PATCH 3/7] build cu11 for docker and C interface library --- .github/workflows/build_wheel.yml | 15 +++++++++++++-- .github/workflows/package_c.yml | 17 +++++++++++++++-- doc/install/easy-install-dev.md | 4 +++- doc/install/install-from-c-library.md | 2 +- pyproject.toml | 3 +-- source/install/docker/Dockerfile | 8 +++++--- source/install/docker_package_c.sh | 4 ++-- 7 files changed, 40 insertions(+), 13 deletions(-) diff --git a/.github/workflows/build_wheel.yml b/.github/workflows/build_wheel.yml index 59df8a2cd0..aa944f2684 100644 --- a/.github/workflows/build_wheel.yml +++ b/.github/workflows/build_wheel.yml @@ -118,6 +118,14 @@ jobs: # use the already built wheels to build docker needs: [build_wheels] runs-on: ubuntu-latest + strategy: + fail-fast: false + matrix: + include: + - variant: "" + cuda_version: "12" + - variant: "_cu11" + cuda_version: "11" steps: - uses: actions/checkout@v4 - uses: actions/download-artifact@v3 @@ -142,8 +150,11 @@ jobs: with: context: source/install/docker push: ${{ github.repository_owner == 'deepmodeling' && github.event_name == 'push' }} - tags: ${{ steps.meta.outputs.tags }} + tags: ${{ steps.meta.outputs.tags }}${{ matrix.variant }} labels: ${{ steps.meta.outputs.labels }} + build-args: | + VARIANT=${{ matrix.variant }} + CUDA_VERSION=${{ matrix.cuda_version }} build_pypi_index: needs: [build_wheels, build_sdist] @@ -182,7 +193,7 @@ jobs: pass: name: Pass testing build wheels - needs: [build_wheels, build_sdist] + needs: [build_wheels, build_sdist, build_docker, deploy_pypi_index] runs-on: ubuntu-latest if: always() steps: diff --git a/.github/workflows/package_c.yml b/.github/workflows/package_c.yml index ada205be00..91281ddaf2 100644 --- a/.github/workflows/package_c.yml +++ b/.github/workflows/package_c.yml @@ -8,23 +8,36 @@ jobs: build_c: name: Build C library runs-on: ubuntu-22.04 + strategy: + matrix: + include: + - tensorflow_build_version: "2.15" + tensorflow_version: "" + filename: libdeepmd_c.tar.gz + - tensorflow_build_version: "2.14" + tensorflow_version: ">=2.5.0rc0,<2.15" + filename: libdeepmd_c_cu11.tar.gz steps: - uses: actions/checkout@v4 - name: Package C library run: ./source/install/docker_package_c.sh + env: + TENSORFLOW_VERSION: ${{ matrix.tensorflow_version }} + TENSORFLOW_BUILD_VERSION: ${{ matrix.tensorflow_build_version }} + - run: “cp libdeepmd_c.tar.gz ${{ matrix.filename }} || :” # for download and debug - name: Upload artifact uses: actions/upload-artifact@v3 with: name: libdeepmd_c - path: ./libdeepmd_c.tar.gz + path: ${{ matrix.filename }} - name: Test C library run: ./source/install/docker_test_package_c.sh - name: Release uses: softprops/action-gh-release@v1 if: startsWith(github.ref, 'refs/tags/') with: - files: libdeepmd_c.tar.gz + files: ${{ matrix.filename }} test_c: name: Test building from C library needs: [build_c] diff --git a/doc/install/easy-install-dev.md b/doc/install/easy-install-dev.md index f3d4fa1a32..6fd9171730 100644 --- a/doc/install/easy-install-dev.md +++ b/doc/install/easy-install-dev.md @@ -6,12 +6,14 @@ The following is the way to install the pre-compiled packages without [building ## Install with docker -The [`devel` tag](https://github.com/deepmodeling/deepmd-kit/pkgs/container/deepmd-kit/131827568?tag=devel) is used to mark the latest development version with CUDA support: +The [`devel` tag](https://github.com/deepmodeling/deepmd-kit/pkgs/container/deepmd-kit/131827568?tag=devel) is used to mark the latest development version with CUDA 12.2 support: ```bash docker pull ghcr.io/deepmodeling/deepmd-kit:devel ``` +For CUDA 11.8 support, use the `devel_cu11` tag. + ## Install with pip Below is an one-line shell command to download the [artifact](https://nightly.link/deepmodeling/deepmd-kit/workflows/build_wheel/devel/artifact.zip) containing wheels and install it with `pip`: diff --git a/doc/install/install-from-c-library.md b/doc/install/install-from-c-library.md index 04b71234db..eb89538277 100644 --- a/doc/install/install-from-c-library.md +++ b/doc/install/install-from-c-library.md @@ -2,7 +2,7 @@ DeePMD-kit provides pre-compiled C library package (`libdeepmd_c.tar.gz`) in each [release](https://github.com/deepmodeling/deepmd-kit/releases). It can be used to build the [LAMMPS plugin](./install-lammps.md) and [GROMACS patch](./install-gromacs.md), as well as many [third-party software packages](../third-party/out-of-deepmd-kit.md), without building TensorFlow and DeePMD-kit on one's own. -The library is built in Linux (GLIBC 2.17) with CUDA 12.2. It's noted that this package does not contain CUDA Toolkit and cuDNN, so one needs to download them from the NVIDIA website. +The library is built in Linux (GLIBC 2.17) with CUDA 12.2 (`libdeepmd_c.tar.gz`) or 11.8 (`libdeepmd_c_cu11.tar.gz`). It's noted that this package does not contain CUDA Toolkit and cuDNN, so one needs to download them from the NVIDIA website. ## Use Pre-compiled C Library to build the LAMMPS plugin and GROMACS patch diff --git a/pyproject.toml b/pyproject.toml index 6a7461e62f..51ba8bcd1e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -135,9 +135,8 @@ build = ["cp310-*"] skip = ["*-win32", "*-manylinux_i686", "*-musllinux*"] # TODO: uncomment when CUDA 11 is deprecated # manylinux-x86_64-image = "manylinux_2_28" -# manylinux-aarch64-image = "manylinux_2_28" manylinux-x86_64-image = "quay.io/pypa/manylinux_2_28_x86_64:2022-11-19-1b19e81" -manylinux-aarch64-image = "quay.io/pypa/manylinux_2_28_aarch64:2022-11-19-1b19e81" +manylinux-aarch64-image = "manylinux_2_28" [tool.cibuildwheel.macos] environment = { PIP_PREFER_BINARY="1", DP_LAMMPS_VERSION="stable_2Aug2023_update1", DP_ENABLE_IPI="1" } diff --git a/source/install/docker/Dockerfile b/source/install/docker/Dockerfile index 9ac905dcd0..26b7be9f19 100644 --- a/source/install/docker/Dockerfile +++ b/source/install/docker/Dockerfile @@ -1,16 +1,18 @@ -FROM python:3.10 AS compile-image +FROM python:3.11 AS compile-image +ARG VARIANT="" +ARG CUDA_VERSION="12" RUN python -m venv /opt/deepmd-kit # Make sure we use the virtualenv ENV PATH="/opt/deepmd-kit/bin:$PATH" # Install package COPY dist /dist -RUN pip install "$(ls /dist/deepmd_kit-*manylinux*_x86_64.whl)[gpu,cu12,lmp,ipi]" \ +RUN pip install "$(ls /dist/deepmd_kit${VARIANT}-*manylinux*_x86_64.whl)[gpu,cu${CUDA_VERSION},lmp,ipi]" \ && dp -h \ && lmp -h \ && dp_ipi \ && python -m deepmd -h -FROM python:3.10 AS build-image +FROM python:3.11 AS build-image COPY --from=compile-image /opt/deepmd-kit /opt/deepmd-kit ENV PATH="/opt/deepmd-kit/bin:$PATH" CMD ["/bin/bash"] diff --git a/source/install/docker_package_c.sh b/source/install/docker_package_c.sh index 75f2d1138b..9fa5646260 100755 --- a/source/install/docker_package_c.sh +++ b/source/install/docker_package_c.sh @@ -3,8 +3,8 @@ set -e SCRIPT_PATH=$(dirname $(realpath -s $0)) docker run --rm -v ${SCRIPT_PATH}/../..:/root/deepmd-kit -w /root/deepmd-kit \ - tensorflow/build:2.15-python3.11 \ - /bin/sh -c "pip install tensorflow cmake \ + tensorflow/build:${TENSORFLOW_BUILD_VERSION:-2.15}-python3.11 \ + /bin/sh -c "pip install tensorflow${TENSORFLOW_VERSION} cmake \ && cd /root/deepmd-kit/source/install \ && CC=/dt9/usr/bin/gcc \ CXX=/dt9/usr/bin/g++ \ From 603f5f2e4c69338a893867b63e73631a2b7ca76a Mon Sep 17 00:00:00 2001 From: Jinzhe Zeng Date: Mon, 20 Nov 2023 18:20:12 -0500 Subject: [PATCH 4/7] fix pip install Signed-off-by: Jinzhe Zeng --- source/install/docker_package_c.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/install/docker_package_c.sh b/source/install/docker_package_c.sh index 9fa5646260..544c175a0a 100755 --- a/source/install/docker_package_c.sh +++ b/source/install/docker_package_c.sh @@ -4,7 +4,7 @@ SCRIPT_PATH=$(dirname $(realpath -s $0)) docker run --rm -v ${SCRIPT_PATH}/../..:/root/deepmd-kit -w /root/deepmd-kit \ tensorflow/build:${TENSORFLOW_BUILD_VERSION:-2.15}-python3.11 \ - /bin/sh -c "pip install tensorflow${TENSORFLOW_VERSION} cmake \ + /bin/sh -c "pip install \"tensorflow${TENSORFLOW_VERSION}\" cmake \ && cd /root/deepmd-kit/source/install \ && CC=/dt9/usr/bin/gcc \ CXX=/dt9/usr/bin/g++ \ From e2964fa4f679a7864795594c96d71ba6e1ed21ba Mon Sep 17 00:00:00 2001 From: Jinzhe Zeng Date: Mon, 20 Nov 2023 18:36:57 -0500 Subject: [PATCH 5/7] Update package_c.yml Signed-off-by: Jinzhe Zeng --- .github/workflows/package_c.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/package_c.yml b/.github/workflows/package_c.yml index 91281ddaf2..c8b6d4e76c 100644 --- a/.github/workflows/package_c.yml +++ b/.github/workflows/package_c.yml @@ -24,7 +24,8 @@ jobs: env: TENSORFLOW_VERSION: ${{ matrix.tensorflow_version }} TENSORFLOW_BUILD_VERSION: ${{ matrix.tensorflow_build_version }} - - run: “cp libdeepmd_c.tar.gz ${{ matrix.filename }} || :” + - run: cp libdeepmd_c.tar.gz ${{ matrix.filename }} + if: ${{ matrix.filename }} != "libdeepmd_c.tar.gz" # for download and debug - name: Upload artifact uses: actions/upload-artifact@v3 From 2babf0c72cddb5e20456b4c5e7142c62a946f52f Mon Sep 17 00:00:00 2001 From: Jinzhe Zeng Date: Mon, 20 Nov 2023 18:56:06 -0500 Subject: [PATCH 6/7] fix --- .github/workflows/package_c.yml | 2 +- pyproject.toml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/package_c.yml b/.github/workflows/package_c.yml index c8b6d4e76c..2b5f74b97d 100644 --- a/.github/workflows/package_c.yml +++ b/.github/workflows/package_c.yml @@ -25,7 +25,7 @@ jobs: TENSORFLOW_VERSION: ${{ matrix.tensorflow_version }} TENSORFLOW_BUILD_VERSION: ${{ matrix.tensorflow_build_version }} - run: cp libdeepmd_c.tar.gz ${{ matrix.filename }} - if: ${{ matrix.filename }} != "libdeepmd_c.tar.gz" + if: matrix.filename != 'libdeepmd_c.tar.gz' # for download and debug - name: Upload artifact uses: actions/upload-artifact@v3 diff --git a/pyproject.toml b/pyproject.toml index 51ba8bcd1e..04bcc69f75 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -158,7 +158,7 @@ environment-pass = [ ] environment = { PIP_PREFER_BINARY="1", DP_LAMMPS_VERSION="stable_2Aug2023_update1", DP_ENABLE_IPI="1", MPI_HOME="/usr/lib64/mpich", PATH="/usr/lib64/mpich/bin:$PATH" } before-all = [ - """if [ ! -z "${DP_PKG_NAME}" ]; then sed -i "s/name = \"deepmd-kit\"/name = \"${DP_PKG_NAME}\"/g" pyproject.toml; fi""", + """if [ ! -z "${DP_PKG_NAME}" ]; then sed -i "s/name = \\"deepmd-kit\\"/name = \\"${DP_PKG_NAME}\\"/g" pyproject.toml; fi""", """{ if [ "$(uname -m)" = "x86_64" ] ; then yum config-manager --add-repo http://developer.download.nvidia.com/compute/cuda/repos/rhel8/x86_64/cuda-rhel8.repo && yum install -y cuda-nvcc-${CUDA_VERSION/./-} cuda-cudart-devel-${CUDA_VERSION/./-}; fi }""", "yum install -y mpich-devel", ] From 88ff9bed55b1e60f80b3aafd27e4d93c73ccd8b2 Mon Sep 17 00:00:00 2001 From: Jinzhe Zeng Date: Mon, 20 Nov 2023 20:49:33 -0500 Subject: [PATCH 7/7] fix needs Signed-off-by: Jinzhe Zeng --- .github/workflows/build_wheel.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build_wheel.yml b/.github/workflows/build_wheel.yml index aa944f2684..06d960528f 100644 --- a/.github/workflows/build_wheel.yml +++ b/.github/workflows/build_wheel.yml @@ -193,7 +193,7 @@ jobs: pass: name: Pass testing build wheels - needs: [build_wheels, build_sdist, build_docker, deploy_pypi_index] + needs: [build_wheels, build_sdist, build_docker, build_pypi_index] runs-on: ubuntu-latest if: always() steps: