diff --git a/detection/nlp_text_splitter/README.md b/detection/nlp_text_splitter/README.md index 06bbbfc..46956fa 100644 --- a/detection/nlp_text_splitter/README.md +++ b/detection/nlp_text_splitter/README.md @@ -24,7 +24,9 @@ this model lacks support handling for Chinese punctuation. # Installation To install this tool users will need to run `./install.sh`. By default this will set up a -CPU-only PyTorch installation. +CPU-only PyTorch installation. `./install.sh` requires a C++ compiler and the Python development +headers to be installed. If they are not already installed, they can be installed by running +`apt-get install g++ python3.8-dev`. Please note that several customizations are supported: diff --git a/detection/nlp_text_splitter/install.sh b/detection/nlp_text_splitter/install.sh index 45a4dbf..38d1f5c 100755 --- a/detection/nlp_text_splitter/install.sh +++ b/detection/nlp_text_splitter/install.sh @@ -94,10 +94,6 @@ install_py_torch() { pip3 install "$torch_package" else echo "Installing CPU only version of PyTorch." - # networkx is a dependency of PyTorch, but the version of networkx in the PyTorch package - # index requires Python 3.9. networkx needs to be installed in a separate command so that - # pip can get networkx from PyPi. - pip3 install 'networkx~=3.1' pip3 install "$torch_package" --index-url https://download.pytorch.org/whl/cpu fi } diff --git a/detection/nlp_text_splitter/pyproject.toml b/detection/nlp_text_splitter/pyproject.toml index ef65a73..992a847 100644 --- a/detection/nlp_text_splitter/pyproject.toml +++ b/detection/nlp_text_splitter/pyproject.toml @@ -33,9 +33,5 @@ name = "nlp_text_splitter" version = "9.0" dependencies = [ "spacy>=3.7.4,<3.7.6", - "wtpsplit>=1.3.0", - # Starting with version 8.2.5, thinc will try to compile C extensions during install. - # Restricting the version of thinc is easier than installing a C compiler in every component - # Dockerfile that uses the text splitter. - "thinc>=8.2.2,<8.2.5" + "wtpsplit>=1.3.0" ]