From 370ea168280ef431608e3326560812595eaa5f89 Mon Sep 17 00:00:00 2001 From: Jinzhe Zeng Date: Tue, 12 Sep 2023 16:01:39 -0400 Subject: [PATCH 1/7] drop old GCC versions in test Signed-off-by: Jinzhe Zeng --- .github/workflows/test_python.yml | 26 ++++++-------------------- 1 file changed, 6 insertions(+), 20 deletions(-) diff --git a/.github/workflows/test_python.yml b/.github/workflows/test_python.yml index 25dded26aa..3c79b31be5 100644 --- a/.github/workflows/test_python.yml +++ b/.github/workflows/test_python.yml @@ -5,40 +5,27 @@ name: Test Python jobs: testpython: name: Test Python - runs-on: ubuntu-latest + runs-on: ubuntu-22.04 strategy: matrix: include: - python: 3.7 - gcc: 5 - tf: 1.14 - - python: 3.7 - gcc: 6 - tf: 1.14 - - python: 3.7 - gcc: 7 - tf: 1.14 - - python: 3.7 - gcc: 8 tf: 1.14 - python: 3.8 - gcc: 8 tf: - python: "3.11" - gcc: 8 tf: - container: ghcr.io/deepmodeling/deepmd-kit-test-environment:py${{ matrix.python }}-gcc${{ matrix.gcc }}-tf${{ matrix.tf }} steps: - - name: work around permission issue - run: git config --global --add safe.directory /__w/deepmd-kit/deepmd-kit - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 + - uses: actions/setup-python@v4 + with: + python-version: ${{ matrix.python }} + cache: 'pip' # https://github.com/pypa/pip/issues/11770 - run: python -m pip install -U "pip>=21.3.1,!=23.0.0" - run: pip install -e .[cpu,test] env: - CC: gcc-${{ matrix.gcc }} - CXX: g++-${{ matrix.gcc }} TENSORFLOW_VERSION: ${{ matrix.tf }} DP_BUILD_TESTING: 1 - run: dp --version @@ -46,7 +33,6 @@ jobs: - uses: codecov/codecov-action@v3 with: gcov: true - gcov_executable: gcov-${{ matrix.gcc }} pass: name: Pass testing Python needs: [testpython] From fa58e69c92ad8ef0dccc40c77024b8f1e07031fc Mon Sep 17 00:00:00 2001 From: Jinzhe Zeng Date: Tue, 12 Sep 2023 16:19:49 -0400 Subject: [PATCH 2/7] pin protobuf < 3.20 for old TF versions Signed-off-by: Jinzhe Zeng --- backend/find_tensorflow.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/backend/find_tensorflow.py b/backend/find_tensorflow.py index 8fe3cedb63..9bfc472f1c 100644 --- a/backend/find_tensorflow.py +++ b/backend/find_tensorflow.py @@ -112,15 +112,21 @@ def get_tf_requirement(tf_version: str = "") -> dict: if tf_version == "": tf_version = os.environ.get("TENSORFLOW_VERSION", "") + extra_requires = [] + if not (tf_version == "" or tf_version in SpecifierSet(">=2.12")): + extra_requires.append("protobuf<3.20") + if tf_version == "": return { "cpu": [ "tensorflow-cpu; platform_machine!='aarch64' and (platform_machine!='arm64' or platform_system != 'Darwin')", "tensorflow; platform_machine=='aarch64' or (platform_machine=='arm64' and platform_system == 'Darwin')", + *extra_requires, ], "gpu": [ "tensorflow", "tensorflow-metal; platform_machine=='arm64' and platform_system == 'Darwin'", + *extra_requires, ], } elif tf_version in SpecifierSet("<1.15") or tf_version in SpecifierSet( @@ -129,10 +135,12 @@ def get_tf_requirement(tf_version: str = "") -> dict: return { "cpu": [ f"tensorflow=={tf_version}", + *extra_requires, ], "gpu": [ f"tensorflow-gpu=={tf_version}; platform_machine!='aarch64'", f"tensorflow=={tf_version}; platform_machine=='aarch64'", + *extra_requires, ], } else: @@ -140,10 +148,12 @@ def get_tf_requirement(tf_version: str = "") -> dict: "cpu": [ f"tensorflow-cpu=={tf_version}; platform_machine!='aarch64' and (platform_machine!='arm64' or platform_system != 'Darwin')", f"tensorflow=={tf_version}; platform_machine=='aarch64' or (platform_machine=='arm64' and platform_system == 'Darwin')", + *extra_requires, ], "gpu": [ f"tensorflow=={tf_version}", "tensorflow-metal; platform_machine=='arm64' and platform_system == 'Darwin'", + *extra_requires, ], } From 9f0aac1a6695f2af1d6a18f885a7b33a1b3f1db3 Mon Sep 17 00:00:00 2001 From: Jinzhe Zeng Date: Tue, 12 Sep 2023 16:42:28 -0400 Subject: [PATCH 3/7] install mpi --- .github/workflows/test_python.yml | 5 ++++- backend/find_tensorflow.py | 11 +++++++++++ 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/.github/workflows/test_python.yml b/.github/workflows/test_python.yml index 3c79b31be5..1ef17d0f67 100644 --- a/.github/workflows/test_python.yml +++ b/.github/workflows/test_python.yml @@ -22,12 +22,15 @@ jobs: with: python-version: ${{ matrix.python }} cache: 'pip' + - uses: mpi4py/setup-mpi@v1 # https://github.com/pypa/pip/issues/11770 - run: python -m pip install -U "pip>=21.3.1,!=23.0.0" - - run: pip install -e .[cpu,test] + - run: pip install -e .[cpu,test,mpi] env: TENSORFLOW_VERSION: ${{ matrix.tf }} DP_BUILD_TESTING: 1 + HOROVOD_WITH_TENSORFLOW: 1 + HOROVOD_WITHOUT_GLOO: 1 - run: dp --version - run: pytest --cov=deepmd --cov=deepmd_cli source/tests --durations=0 - uses: codecov/codecov-action@v3 diff --git a/backend/find_tensorflow.py b/backend/find_tensorflow.py index 9bfc472f1c..aa75d5ecb4 100644 --- a/backend/find_tensorflow.py +++ b/backend/find_tensorflow.py @@ -113,8 +113,16 @@ def get_tf_requirement(tf_version: str = "") -> dict: tf_version = os.environ.get("TENSORFLOW_VERSION", "") extra_requires = [] + extra_select = {} if not (tf_version == "" or tf_version in SpecifierSet(">=2.12")): extra_requires.append("protobuf<3.20") + if tf_version == "" or tf_version in SpecifierSet(">=1.15"): + extra_select["mpi"] = [ + "horovod", + "mpi4py", + ] + else: + extra_select["mpi"] = [] if tf_version == "": return { @@ -128,6 +136,7 @@ def get_tf_requirement(tf_version: str = "") -> dict: "tensorflow-metal; platform_machine=='arm64' and platform_system == 'Darwin'", *extra_requires, ], + **extra_select, } elif tf_version in SpecifierSet("<1.15") or tf_version in SpecifierSet( ">=2.0,<2.1" @@ -142,6 +151,7 @@ def get_tf_requirement(tf_version: str = "") -> dict: f"tensorflow=={tf_version}; platform_machine=='aarch64'", *extra_requires, ], + **extra_select, } else: return { @@ -155,6 +165,7 @@ def get_tf_requirement(tf_version: str = "") -> dict: "tensorflow-metal; platform_machine=='arm64' and platform_system == 'Darwin'", *extra_requires, ], + **extra_select, } From 764f21aaadb86414ffc8465df04e5f48179e411d Mon Sep 17 00:00:00 2001 From: Jinzhe Zeng Date: Tue, 12 Sep 2023 16:59:21 -0400 Subject: [PATCH 4/7] install seperately --- .github/workflows/test_python.yml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/.github/workflows/test_python.yml b/.github/workflows/test_python.yml index 1ef17d0f67..db1f8f5ba6 100644 --- a/.github/workflows/test_python.yml +++ b/.github/workflows/test_python.yml @@ -25,7 +25,9 @@ jobs: - uses: mpi4py/setup-mpi@v1 # https://github.com/pypa/pip/issues/11770 - run: python -m pip install -U "pip>=21.3.1,!=23.0.0" - - run: pip install -e .[cpu,test,mpi] + - run: | + pip install -e .[cpu,test] mpi4py + pip install horovod env: TENSORFLOW_VERSION: ${{ matrix.tf }} DP_BUILD_TESTING: 1 From 839020f5118468f8fa461cdb6b39481e545694d3 Mon Sep 17 00:00:00 2001 From: Jinzhe Zeng Date: Tue, 12 Sep 2023 17:11:54 -0400 Subject: [PATCH 5/7] do not install for tf 1.14 Signed-off-by: Jinzhe Zeng --- .github/workflows/test_python.yml | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/.github/workflows/test_python.yml b/.github/workflows/test_python.yml index db1f8f5ba6..09d013ec2c 100644 --- a/.github/workflows/test_python.yml +++ b/.github/workflows/test_python.yml @@ -25,12 +25,13 @@ jobs: - uses: mpi4py/setup-mpi@v1 # https://github.com/pypa/pip/issues/11770 - run: python -m pip install -U "pip>=21.3.1,!=23.0.0" - - run: | - pip install -e .[cpu,test] mpi4py - pip install horovod + - run: pip install -e .[cpu,test] env: TENSORFLOW_VERSION: ${{ matrix.tf }} DP_BUILD_TESTING: 1 + - run: pip install horovod mpi4py + if: matrix.tf == "" + env: HOROVOD_WITH_TENSORFLOW: 1 HOROVOD_WITHOUT_GLOO: 1 - run: dp --version From 268dba19b0e8e44c34ef2bc2d2b8a529ff55ff8b Mon Sep 17 00:00:00 2001 From: Jinzhe Zeng Date: Tue, 12 Sep 2023 17:15:04 -0400 Subject: [PATCH 6/7] fix if Signed-off-by: Jinzhe Zeng --- .github/workflows/test_python.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/test_python.yml b/.github/workflows/test_python.yml index 09d013ec2c..5e026a50d5 100644 --- a/.github/workflows/test_python.yml +++ b/.github/workflows/test_python.yml @@ -30,7 +30,7 @@ jobs: TENSORFLOW_VERSION: ${{ matrix.tf }} DP_BUILD_TESTING: 1 - run: pip install horovod mpi4py - if: matrix.tf == "" + if: ${{ matrix.tf == '' }} env: HOROVOD_WITH_TENSORFLOW: 1 HOROVOD_WITHOUT_GLOO: 1 From 962378e1ef9af3d81ca73f0fd5f3c5d1d1bdcb15 Mon Sep 17 00:00:00 2001 From: Jinzhe Zeng Date: Tue, 12 Sep 2023 17:47:27 -0400 Subject: [PATCH 7/7] try openmpi instead Signed-off-by: Jinzhe Zeng --- .github/workflows/test_python.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.github/workflows/test_python.yml b/.github/workflows/test_python.yml index 5e026a50d5..0ac29a7d9b 100644 --- a/.github/workflows/test_python.yml +++ b/.github/workflows/test_python.yml @@ -23,6 +23,9 @@ jobs: python-version: ${{ matrix.python }} cache: 'pip' - uses: mpi4py/setup-mpi@v1 + if: ${{ matrix.tf == '' }} + with: + mpi: openmpi # https://github.com/pypa/pip/issues/11770 - run: python -m pip install -U "pip>=21.3.1,!=23.0.0" - run: pip install -e .[cpu,test]