Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
82 changes: 68 additions & 14 deletions deepmd/env.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,14 @@
"""Module that sets tensorflow working environment and exports inportant constants."""

import os
from pathlib import Path
import logging
import os
import platform
from typing import TYPE_CHECKING, Dict, List, Optional, Tuple, Any
import numpy as np
from imp import reload
from configparser import ConfigParser
from imp import reload
from pathlib import Path
from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple

import numpy as np

if TYPE_CHECKING:
from types import ModuleType
Expand Down Expand Up @@ -37,6 +38,7 @@

SHARED_LIB_MODULE = "op"


def set_env_if_empty(key: str, value: str, verbose: bool = True):
"""Set environment variable only if it is empty.

Expand Down Expand Up @@ -74,7 +76,8 @@ def set_mkl():
"""
if "mkl_rt" in np.__config__.get_info("blas_mkl_info").get("libraries", []):
set_env_if_empty("KMP_BLOCKTIME", "0")
set_env_if_empty("KMP_AFFINITY", "granularity=fine,verbose,compact,1,0")
set_env_if_empty(
"KMP_AFFINITY", "granularity=fine,verbose,compact,1,0")
reload(np)


Expand Down Expand Up @@ -118,8 +121,10 @@ def get_tf_session_config() -> Any:
intra_op_parallelism_threads=intra, inter_op_parallelism_threads=inter
)


default_tf_session_config = get_tf_session_config()


def get_module(module_name: str) -> "ModuleType":
"""Load force module.

Expand Down Expand Up @@ -149,14 +154,59 @@ def get_module(module_name: str) -> "ModuleType":
if not module_file.is_file():
raise FileNotFoundError(f"module {module_name} does not exist")
else:
module = tf.load_op_library(str(module_file))
try:
module = tf.load_op_library(str(module_file))
except tf.errors.NotFoundError as e:
# check CXX11_ABI_FLAG is compatiblity
# see https://gcc.gnu.org/onlinedocs/libstdc++/manual/using_dual_abi.html
# ABI should be the same
if 'CXX11_ABI_FLAG' in tf.__dict__:
tf_cxx11_abi_flag = tf.CXX11_ABI_FLAG
else:
tf_cxx11_abi_flag = tf.sysconfig.CXX11_ABI_FLAG
if TF_CXX11_ABI_FLAG != tf_cxx11_abi_flag:
raise RuntimeError(
"This deepmd-kit package was compiled with "
"CXX11_ABI_FLAG=%d, but TensorFlow runtime was compiled "
"with CXX11_ABI_FLAG=%d. These two library ABIs are "
"incompatible and thus an error is raised when loading %s."
"You need to rebuild deepmd-kit against this TensorFlow "
"runtime." % (
TF_CXX11_ABI_FLAG,
tf_cxx11_abi_flag,
module_name,
)) from e

# different versions may cause incompatibility
# see #406, #447, #557, #774, and #796 for example
# throw a message if versions are different
if TF_VERSION != tf.version.VERSION:
raise RuntimeError(
"The version of TensorFlow used to compile this "
"deepmd-kit package is %s, but the version of TensorFlow "
"runtime you are using is %s. These two versions are "
"incompatible and thus an error is raised when loading %s. "
"You need to install TensorFlow %s, or rebuild deepmd-kit "
"against TensorFlow %s.\nIf you are using a wheel from "
"pypi, you may consider to install deepmd-kit execuating "
"`pip install deepmd-kit --no-binary deepmd-kit` "
"instead." % (
TF_VERSION,
tf.version.VERSION,
module_name,
TF_VERSION,
tf.version.VERSION,
)) from e
raise RuntimeError(
"This deepmd-kit package is inconsitent with TensorFlow"
"Runtime, thus an error is raised when loading %s."
"You need to rebuild deepmd-kit against this TensorFlow"
"runtime." % (
module_name,
)) from e
return module


op_module = get_module("libop_abi")
op_grads_module = get_module("libop_grads")


def _get_package_constants(
config_file: Path = Path(__file__).parent / "pkg_config/run_config.ini",
) -> Dict[str, str]:
Expand All @@ -165,7 +215,7 @@ def _get_package_constants(
Parameters
----------
config_file : str, optional
path to CONFIG file, by default "config/run_config.ini"
path to CONFIG file, by default "pkg_config/run_config.ini"

Returns
-------
Expand All @@ -176,8 +226,14 @@ def _get_package_constants(
config.read(config_file)
return dict(config.items("CONFIG"))


GLOBAL_CONFIG = _get_package_constants()
MODEL_VERSION = GLOBAL_CONFIG["model_version"]
TF_VERSION = GLOBAL_CONFIG["tf_version"]
TF_CXX11_ABI_FLAG = int(GLOBAL_CONFIG["tf_cxx11_abi_flag"])

op_module = get_module("libop_abi")
op_grads_module = get_module("libop_grads")

if GLOBAL_CONFIG["precision"] == "-DHIGH_PREC":
GLOBAL_TF_FLOAT_PRECISION = tf.float64
Expand Down Expand Up @@ -221,5 +277,3 @@ def global_cvt_2_ener_float(xx: tf.Tensor) -> tf.Tensor:
output tensor cast to `GLOBAL_ENER_FLOAT_PRECISION`
"""
return tf.cast(xx, GLOBAL_ENER_FLOAT_PRECISION)


19 changes: 18 additions & 1 deletion source/cmake/Findtensorflow.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -137,10 +137,27 @@ else (BUILD_CPP_IF)
endif ()
endif (BUILD_CPP_IF)

# detect TensorFlow version
try_run(
TENSORFLOW_VERSION_RUN_RESULT_VAR TENSORFLOW_VERSION_COMPILE_RESULT_VAR
${CMAKE_CURRENT_BINARY_DIR}/tf_version
"${CMAKE_CURRENT_LIST_DIR}/tf_version.cpp"
LINK_LIBRARIES ${TensorFlowFramework_LIBRARY}
CMAKE_FLAGS "-DINCLUDE_DIRECTORIES:STRING=${TensorFlow_INCLUDE_DIRS}"
RUN_OUTPUT_VARIABLE TENSORFLOW_VERSION
COMPILE_OUTPUT_VARIABLE TENSORFLOW_VERSION_COMPILE_OUTPUT_VAR
)
if (NOT ${TENSORFLOW_VERSION_COMPILE_RESULT_VAR})
message(FATAL_ERROR "Failed to compile: \n ${TENSORFLOW_VERSION_COMPILE_OUTPUT_VAR}" )
endif()
if (NOT ${TENSORFLOW_VERSION_RUN_RESULT_VAR} EQUAL "0")
message(FATAL_ERROR "Failed to run, return code: ${TENSORFLOW_VERSION}" )
endif()

# print message
if (NOT TensorFlow_FIND_QUIETLY)
message(STATUS "Found TensorFlow: ${TensorFlow_INCLUDE_DIRS}, ${TensorFlow_LIBRARY}, ${TensorFlowFramework_LIBRARY} "
" in ${TensorFlow_search_PATHS}")
" in ${TensorFlow_search_PATHS} (found version \"${TENSORFLOW_VERSION}\")")
endif ()

unset(TensorFlow_search_PATHS)
10 changes: 10 additions & 0 deletions source/cmake/tf_version.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
#include <iostream>
#include "tensorflow/core/public/version.h"

int main(int argc, char * argv[])
{
// See https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/public/version.h
// TF_VERSION_STRING has been avaiable since TensorFlow v0.6
std::cout << TF_VERSION_STRING;
return 0;
}
2 changes: 2 additions & 0 deletions source/config/run_config.ini
Original file line number Diff line number Diff line change
Expand Up @@ -6,5 +6,7 @@ GIT_DATE = @GIT_DATE@
GIT_BRANCH = @GIT_BRANCH@
TF_INCLUDE_DIR = @TensorFlow_INCLUDE_DIRS@
TF_LIBS = @TensorFlow_LIBRARY@
TF_VERSION = @TENSORFLOW_VERSION@
TF_CXX11_ABI_FLAG = @OP_CXX_ABI@
PRECISION = @PREC_DEF@
MODEL_VERSION=@MODEL_VERSION@