diff --git a/LICENSE b/LICENSE
index b7982b1..48665b7 100644
--- a/LICENSE
+++ b/LICENSE
@@ -17,4 +17,62 @@
  This project contains content developed by The MITRE Corporation. If this code
  is used in a deployment or embedded within another project, it is requested
  that you send an email to opensource@mitre.org in order to let us know where
- this software is being used.
\ No newline at end of file
+ this software is being used.
+
+*****************************************************************************
+
+The nlp_text_splitter utlity uses the following sentence detection libraries:
+
+*****************************************************************************
+
+The WtP, "Where the Point", sentence segmentation library falls under the MIT License:
+
+https://github.com/bminixhofer/wtpsplit/blob/main/LICENSE
+
+MIT License
+
+Copyright (c) 2024 Benjamin Minixhofer
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+
+*****************************************************************************
+
+The spaCy Natural Language Processing library falls under the MIT License:
+
+The MIT License (MIT)
+
+Copyright (C) 2016-2024 ExplosionAI GmbH, 2016 spaCy GmbH, 2015 Matthew Honnibal
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
\ No newline at end of file
diff --git a/detection/nlp_text_splitter/README.md b/detection/nlp_text_splitter/README.md
new file mode 100644
index 0000000..06bbbfc
--- /dev/null
+++ b/detection/nlp_text_splitter/README.md
@@ -0,0 +1,50 @@
+# Overview
+
+This directory contains the source code, test examples, and installation script
+for the OpenMPF NlpTextSplitter tool, which uses WtP and spaCy libraries
+to detect sentences in a given chunk of text.
+
+# Background
+
+Our primary motivation for creating this tool was to find a lightweight, accurate
+sentence detection capability to support a large variety of text processing tasks
+including translation and tagging.
+
+Through preliminary investigation, we identified the [WtP library ("Where's the
+Point")](https://github.com/bminixhofer/wtpsplit) and [spaCy's multilingual sentence
+detection model](https://spacy.io/models) for identifying sentence breaks
+in a large section of text.
+
+WtP models are trained to split up multilingual text by sentence without the need of an
+input language tag. The disadvantage is that the most accurate WtP models will need ~3.5
+GB of GPU memory. On the other hand, spaCy has a single multilingual sentence detection
+that appears to work better for splitting up English text in certain cases. Unfortunately
+this model lacks support handling for Chinese punctuation.
+
+# Installation
+
+To install this tool users will need to run `./install.sh`. By default this will set up a
+CPU-only PyTorch installation.
+
+Please note that several customizations are supported:
+
+- `--text-splitter-dir|-t <path_to_src>`: This parameter specifies where the
+  source code is located relative to the installation script. In general,
+  since the installation script and source code are both located here, it's not
+  necessary to update this parameter unless the user is running the `install.sh`
+  script from a different directory.
+
+- `--gpu`: Add this parameter to the installation command line above to
+  setup a PyTorch installation with CUDA (GPU) libraries.
+
+- `--wtp-models-dir |-m <wtp-models-dir >`: Add this parameter to
+  change the default WtP model installation directory
+  (default: `/opt/wtp/models`).
+
+- `--install-wtp-model|-w <model-name>`: Add this parameter to specify
+  additional WTP models for installation. This parameter can be provided
+  multiple times to install more than one model.
+
+- `--install-spacy-model|-s <model-name>`: Add this parameter to specify
+  additional spaCy models for installation. This parameter can be provided
+  multiple times to install more than one model.
diff --git a/detection/nlp_text_splitter/install.sh b/detection/nlp_text_splitter/install.sh
new file mode 100755
index 0000000..45a4dbf
--- /dev/null
+++ b/detection/nlp_text_splitter/install.sh
@@ -0,0 +1,168 @@
+#!/usr/bin/env bash
+
+#############################################################################
+# NOTICE                                                                    #
+#                                                                           #
+# This software (or technical data) was produced for the U.S. Government    #
+# under contract, and is subject to the Rights in Data-General Clause       #
+# 52.227-14, Alt. IV (DEC 2007).                                            #
+#                                                                           #
+# Copyright 2024 The MITRE Corporation. All Rights Reserved.                #
+#############################################################################
+
+#############################################################################
+# Copyright 2024 The MITRE Corporation                                      #
+#                                                                           #
+# Licensed under the Apache License, Version 2.0 (the "License");           #
+# you may not use this file except in compliance with the License.          #
+# You may obtain a copy of the License at                                   #
+#                                                                           #
+#    http://www.apache.org/licenses/LICENSE-2.0                             #
+#                                                                           #
+# Unless required by applicable law or agreed to in writing, software       #
+# distributed under the License is distributed on an "AS IS" BASIS,         #
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  #
+# See the License for the specific language governing permissions and       #
+# limitations under the License.                                            #
+#############################################################################
+
+set -o errexit -o pipefail
+
+main() {
+    if ! options=$(getopt --name "$0"  \
+            --options t:gm:w:s: \
+            --longoptions text-splitter-dir:,gpu,wtp-models-dir:,install-wtp-model:,install-spacy-model: \
+            -- "$@"); then
+        print_usage
+    fi
+    eval set -- "$options"
+    local wtp_models_dir=/opt/wtp/models
+    local wtp_models=("wtp-bert-mini")
+    local spacy_models=("xx_sent_ud_sm")
+    while true; do
+        case "$1" in
+        --text-splitter-dir | -t )
+            shift
+            local text_splitter_dir=$1
+            ;;
+        --gpu | -g )
+            local gpu_enabled=true
+            ;;
+        --wtp-models-dir  | -m )
+            shift
+            wtp_models_dir=$1;
+            ;;
+        --install-wtp-model | -w )
+            shift
+            wtp_models+=("$1")
+            ;;
+        --install-spacy-model | -s )
+            shift
+            spacy_models+=("$1")
+            ;;
+        -- )
+            shift
+            break
+            ;;
+        esac
+        shift
+    done
+
+    install_text_splitter "$text_splitter_dir"
+    install_py_torch "$gpu_enabled"
+    download_wtp_models "$wtp_models_dir" "${wtp_models[@]}"
+    download_spacy_models "${spacy_models[@]}"
+}
+
+
+install_text_splitter() {
+    local text_splitter_dir=$1
+    if [[ ! $text_splitter_dir ]]; then
+        text_splitter_dir=$(dirname "$(realpath "${BASH_SOURCE[0]}")")
+    fi
+
+    echo "Installing text splitter from source directory: $text_splitter_dir"
+    pip3 install "$text_splitter_dir"
+}
+
+
+install_py_torch() {
+    local gpu_enabled=$1
+    local torch_package='torch~=2.3'
+    if [[ $gpu_enabled ]]; then
+        echo "Installing GPU enabled PyTorch."
+        pip3 install "$torch_package"
+    else
+        echo "Installing CPU only version of PyTorch."
+        # networkx is a dependency of PyTorch, but the version of networkx in the PyTorch package
+        # index requires Python 3.9. networkx needs to be installed in a separate command so that
+        # pip can get networkx from PyPi.
+        pip3 install 'networkx~=3.1'
+        pip3 install "$torch_package" --index-url https://download.pytorch.org/whl/cpu
+    fi
+}
+
+
+download_wtp_models() {
+    local wtp_models_dir=$1
+    shift
+    local model_names=("$@")
+    setup_wtp_models_dir "$wtp_models_dir"
+
+    for model_name in "${model_names[@]}"; do
+        echo "Downloading the $model_name model to $wtp_models_dir."
+        local wtp_model_dir="$wtp_models_dir/$model_name"
+        python3 -c \
+            "from huggingface_hub import snapshot_download; \
+            snapshot_download('benjamin/$model_name', local_dir='$wtp_model_dir')"
+    done
+}
+
+setup_wtp_models_dir() {
+    local wtp_models_dir=$1
+
+    if [[ ! $REQUESTS_CA_BUNDLE ]]; then
+        export REQUESTS_CA_BUNDLE=/etc/ssl/certs/ca-certificates.crt
+    fi
+
+    if ! mkdir --parents "$wtp_models_dir"; then
+        echo "ERROR: Failed to create the $wtp_models_dir directory."
+        exit 3
+    fi
+
+    if [[ ! -w "$wtp_models_dir" ]]; then
+        echo -n "ERROR: The model directory, \"$wtp_models_dir\" is not writable by the current user. "
+        echo "The permissions on \"$wtp_models_dir\" must be modified."
+        exit 4
+    fi
+}
+
+download_spacy_models() {
+    for model_name in "$@"; do
+        echo "Downloading the $model_name spaCy model."
+        python3 -m spacy download "$model_name"
+    done
+}
+
+
+print_usage() {
+    echo
+    echo "Usage:
+$0 [--text-splitter-dir|-t <path_to_src>] [--gpu|-g] [--wtp-models-dir |-m <wtp-models-dir >] [--install-wtp-model|-w <model-name>]* [--install-spacy-model|-s <model-name>]*
+Options
+    --text-splitter-dir, -t <path>:    Path to text splitter source code. (defaults to to the
+                                       same directory as this script)
+    --gpu, -g:                         Install the GPU version of PyTorch
+    --wtp-models-dir , -m <path>:      Path where WTP models will be stored.
+                                       (defaults to /opt/wtp/models)
+    --install-wtp-model, -w <name>:    Name of a WTP model to install in addtion to wtp-bert-mini.
+                                       This option can be provided more than once to specify
+                                       multiple models.
+    --install-spacy-model | -s <name>: Names of a spaCy model to install in addtion to
+                                       xx_sent_ud_sm. The option can be provided more than once
+                                       to specify multiple models.
+"
+    exit 1
+}
+
+main "$@"
diff --git a/detection/nlp_text_splitter/nlp_text_splitter/__init__.py b/detection/nlp_text_splitter/nlp_text_splitter/__init__.py
new file mode 100644
index 0000000..f083a5f
--- /dev/null
+++ b/detection/nlp_text_splitter/nlp_text_splitter/__init__.py
@@ -0,0 +1,265 @@
+#############################################################################
+# NOTICE                                                                    #
+#                                                                           #
+# This software (or technical data) was produced for the U.S. Government    #
+# under contract, and is subject to the Rights in Data-General Clause       #
+# 52.227-14, Alt. IV (DEC 2007).                                            #
+#                                                                           #
+# Copyright 2024 The MITRE Corporation. All Rights Reserved.                #
+#############################################################################
+
+#############################################################################
+# Copyright 2024 The MITRE Corporation                                      #
+#                                                                           #
+# Licensed under the Apache License, Version 2.0 (the "License");           #
+# you may not use this file except in compliance with the License.          #
+# You may obtain a copy of the License at                                   #
+#                                                                           #
+#    http://www.apache.org/licenses/LICENSE-2.0                             #
+#                                                                           #
+# Unless required by applicable law or agreed to in writing, software       #
+# distributed under the License is distributed on an "AS IS" BASIS,         #
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  #
+# See the License for the specific language governing permissions and       #
+# limitations under the License.                                            #
+#############################################################################
+
+import logging
+import os
+import pkg_resources
+
+import spacy
+from wtpsplit import WtP
+from typing import Callable, List, Optional, Tuple
+
+from .wtp_lang_settings import WtpLanguageSettings
+
+import torch
+
+
+DEFAULT_WTP_MODELS = "/opt/wtp/models"
+
+# If we want to package model installation with this utility in the future:
+WTP_MODELS_PATH = pkg_resources.resource_filename(
+   __name__, "models"
+)
+
+log = logging.getLogger(__name__)
+
+# These models must have an specified language during sentence splitting.
+WTP_MANDATORY_ADAPTOR = ['wtp-canine-s-1l',
+                         'wtp-canine-s-3l',
+                         'wtp-canine-s-6l',
+                         'wtp-canine-s-9l',
+                         'wtp-canine-s-12l']
+
+GPU_AVAILABLE = False
+if torch.cuda.is_available():
+    GPU_AVAILABLE = True
+
+
+class TextSplitterModel:
+    # To hold spaCy, WtP, and other potential sentence detection models in cache
+
+    def __init__(self, model_name: str, model_setting: str, default_lang: str = "en") -> None:
+        self._model_name = ""
+        self._model_setting = ""
+        self._default_lang = default_lang
+        self._mandatory_wtp_language = False
+        self.split = lambda t, **param: [t]
+        self.update_model(model_name, model_setting, default_lang)
+
+    def update_model(self, model_name: str, model_setting: str = "cpu", default_lang: str="en"):
+        if model_name:
+            if "wtp" in model_name:
+                self._update_wtp_model(model_name, model_setting, default_lang)
+                self.split = self._split_wtp
+                log.info(f"Setup WtP model: {model_name}")
+            else:
+                self._update_spacy_model(model_name)
+                self.split = self._split_spacy
+                log.info(f"Setup spaCy model: {model_name}")
+
+    def _update_wtp_model(self, wtp_model_name: str,
+                          model_setting: str,
+                          default_lang: str) -> None:
+
+        if model_setting == "gpu" or model_setting == "cuda":
+            if GPU_AVAILABLE:
+                model_setting = "cuda"
+            else:
+                log.warning("PyTorch determined that CUDA is not available. "
+                            "You may need to update the NVIDIA driver for the host system, "
+                            "or reinstall PyTorch with GPU support by setting "
+                            "ARGS BUILD_TYPE=gpu in the Dockerfile when building this component.")
+                model_setting = "cpu"
+        elif model_setting != "cpu":
+            log.warning("Invalid WtP model setting. Only `cpu` and `cuda` "
+                        "(or `gpu`) WtP model options available at this time. "
+                        "Defaulting to `cpu` mode.")
+            model_setting = "cpu"
+
+        if wtp_model_name in WTP_MANDATORY_ADAPTOR:
+            self._mandatory_wtp_language = True
+            self._default_lang = default_lang
+
+        if self._model_name == wtp_model_name and self._model_setting == model_setting:
+            log.info(f"Using cached model, running on {self._model_setting}: "
+                     f"{self._model_name}")
+        else:
+            self._model_setting = model_setting
+            self._model_name = wtp_model_name
+            # Check if model has been downloaded
+            if os.path.exists(os.path.join(WTP_MODELS_PATH, wtp_model_name)):
+                log.info(f"Using downloaded {wtp_model_name} model.")
+                wtp_model_name = os.path.join(WTP_MODELS_PATH, wtp_model_name)
+
+            elif os.path.exists(os.path.join(DEFAULT_WTP_MODELS,
+                                             wtp_model_name)):
+
+                log.info(f"Using downloaded {wtp_model_name} model.")
+                wtp_model_name = os.path.join(DEFAULT_WTP_MODELS,
+                                              wtp_model_name)
+
+            else:
+                log.warning(f"Model {wtp_model_name} not found, "
+                             "downloading from hugging face.")
+
+            self.wtp_model =  WtP(wtp_model_name)
+
+            if model_setting != "cpu" and model_setting != "cuda":
+                log.warning(f"Invalid setting for WtP runtime {model_setting}. "
+                             "Defaulting to CPU mode.")
+                model_setting = "cpu"
+            self.wtp_model.to(model_setting)
+
+    def _split_wtp(self, text: str, lang: Optional[str] = None) -> List[str]:
+        if lang:
+            iso_lang = WtpLanguageSettings.convert_to_iso(lang)
+            if iso_lang:
+                return self.wtp_model.split(text, lang_code=iso_lang)
+            else:
+                log.warning(f"Language {lang} was not used to train WtP model. "
+                            "If text splitting is not working well with WtP, "
+                            "consider trying spaCy's sentence detection model."
+                            )
+        if self._mandatory_wtp_language:
+            log.warning("WtP model requires a language. "
+                        f"Using default language : {self._default_lang}.")
+            iso_lang = WtpLanguageSettings.convert_to_iso(self._default_lang)
+            return self.wtp_model.split(text, lang_code=iso_lang)
+        return self.wtp_model.split(text)
+
+    def _update_spacy_model(self, spacy_model_name: str):
+        self.spacy_model = spacy.load(spacy_model_name, exclude=["parser"])
+        self.spacy_model.enable_pipe("senter")
+
+    def _split_spacy(self, text: str, lang: Optional[str] = None) -> List[str]:
+        # TODO: We may add an auto model selection for spaCy in the future.
+        # However, the drawback is we will also need to
+        # download a large number of spaCy models beforehand.
+        processed_text = self.spacy_model(text)
+        return [sent.text_with_ws for sent in processed_text.sents]
+
+class TextSplitter:
+
+    def __init__(
+        self, text: str, limit: int, num_boundary_chars: int,
+        get_text_size: Callable[[str], int],
+        sentence_model: TextSplitterModel,
+        in_lang: Optional[str] = None) -> None:
+        self._sentence_model = sentence_model
+        self._limit = limit
+        self._num_boundary_chars = num_boundary_chars
+        self._get_text_size = get_text_size
+        self._text = ""
+        self._text_full_size = 0
+        self._overhead_size = 0
+        self._soft_limit = self._limit
+        self._in_lang = in_lang
+
+        if text:
+            self.set_text(text)
+
+    def set_text(self, text: str):
+        self._text = text
+        self._text_full_size = self._get_text_size(text)
+        chars_per_size = len(text) / self._text_full_size
+        self._overhead_size = self._get_text_size('')
+
+        self._soft_limit = int(self._limit * chars_per_size) - self._overhead_size
+
+        if self._soft_limit <= 1:
+            # Caused by an unusually large overhead relative to text.
+            # This is unlikely to occur except during testing of small text limits.
+            # Recalculate soft limit by subtracting overhead from limit
+            # before applying chars_per_size weighting.
+            self._soft_limit = max(1,
+                                   int((self._limit - self._overhead_size) * chars_per_size))
+
+    def _isolate_largest_section(self, text:str) -> str:
+        # Using cached word splitting model, isolate largest section of text
+        string_length = len(text)
+
+        if self._num_boundary_chars <= 0:
+            num_chars_to_process = string_length
+        else:
+            num_chars_to_process = self._num_boundary_chars
+
+        start_indx = max(0, string_length - num_chars_to_process)
+        substring = text[start_indx: string_length]
+        substring_list = self._sentence_model.split(substring, lang = self._in_lang)
+        div_index = string_length - len(substring_list[-1])
+
+        if div_index==start_indx:
+            return text
+
+        return text[0:div_index]
+
+    @classmethod
+    def split(cls,
+              text: str, limit: int, num_boundary_chars: int, get_text_size: Callable[[str], int],
+              sentence_model: TextSplitterModel,
+              in_lang: Optional[str] = None
+             ):
+        return cls(text, limit, num_boundary_chars, get_text_size, sentence_model, in_lang)._split()
+
+
+    def _split(self):
+        if self._text_full_size <= self._limit:
+            yield self._text
+        else:
+            yield from self._split_internal(self._text)
+
+    def _split_internal(self, text):
+        right = text
+        while True:
+            left, right = self._divide(right)
+            yield left
+            if not right:
+                return
+
+    def _divide(self, text) -> Tuple[str, str]:
+        limit = self._soft_limit
+        while True:
+            left = text[:limit]
+            left_size = self._get_text_size(left)
+
+            if left_size <= self._limit:
+                if left != text:
+                    # If dividing into two parts
+                    # Determine soft boundary for left segment
+                    left = self._isolate_largest_section(left)
+                return left, text[len(left):]
+
+            char_per_size = len(left) / left_size
+
+
+            limit = int(self._limit * char_per_size) - self._overhead_size
+
+            if limit < 1:
+            # Caused by an unusually large overhead relative to text.
+            # This is unlikely to occur except during testing of small text limits.
+            # Recalculate soft limit by subtracting overhead from limit before
+            # applying chars_per_size weighting.
+                limit = max(1, int((self._limit - self._overhead_size) * char_per_size))
diff --git a/detection/nlp_text_splitter/nlp_text_splitter/wtp_lang_settings.py b/detection/nlp_text_splitter/nlp_text_splitter/wtp_lang_settings.py
new file mode 100644
index 0000000..c682fd3
--- /dev/null
+++ b/detection/nlp_text_splitter/nlp_text_splitter/wtp_lang_settings.py
@@ -0,0 +1,259 @@
+#############################################################################
+# NOTICE                                                                    #
+#                                                                           #
+# This software (or technical data) was produced for the U.S. Government    #
+# under contract, and is subject to the Rights in Data-General Clause       #
+# 52.227-14, Alt. IV (DEC 2007).                                            #
+#                                                                           #
+# Copyright 2024 The MITRE Corporation. All Rights Reserved.                #
+#############################################################################
+
+#############################################################################
+# Copyright 2024 The MITRE Corporation                                      #
+#                                                                           #
+# Licensed under the Apache License, Version 2.0 (the "License");           #
+# you may not use this file except in compliance with the License.          #
+# You may obtain a copy of the License at                                   #
+#                                                                           #
+#    http://www.apache.org/licenses/LICENSE-2.0                             #
+#                                                                           #
+# Unless required by applicable law or agreed to in writing, software       #
+# distributed under the License is distributed on an "AS IS" BASIS,         #
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  #
+# See the License for the specific language governing permissions and       #
+# limitations under the License.                                            #
+#############################################################################
+
+from typing import Optional
+
+class WtpLanguageSettings:
+    # Supported languages and ISO 639-1, 639-2 codes for WtP models.
+    # https://github.com/bminixhofer/wtpsplit?tab=readme-ov-file#supported-languages
+    # https://www.loc.gov/standards/iso639-2/php/code_list.php
+    _wtp_lang_map = {
+        'afrikaans': 'af',
+        'afr': 'af',
+        'amharic': 'am',
+        'amh': 'am',
+        'arabic': 'ar',
+        'ara': 'ar',
+        'azerbaijani': 'az',
+        'aze': 'az',
+        'belarusian': 'be',
+        'bel': 'be',
+        'bulgarian': 'bg',
+        'bul': 'bg',
+        'bengali': 'bn',
+        'ben': 'bn',
+        'catalan': 'ca',
+        'valencian': 'ca',
+        'cat': 'ca',
+        'cebuano': 'ceb', # In some cases, ISO-639-1 is not available, use ISO-639-2
+        'ceb': 'ceb',
+        'czech': 'cs',
+        'cze': 'cs',
+        'ces': 'cs',
+        'welsh': 'cy',
+        'wel': 'cy',
+        'cym': 'cy',
+        'danish': 'da',
+        'dan': 'da',
+        'german': 'de',
+        'ger': 'de',
+        'deu': 'de',
+        'greek': 'el',
+        'gre': 'el',
+        'ell': 'el',
+        'english': 'en',
+        'eng': 'en',
+        'esperanto': 'eo',
+        'epo': 'eo',
+        'spanish': 'es',
+        'castilian': 'es',
+        'spa': 'es',
+        'estonian': 'et',
+        'est': 'et',
+        'basque': 'eu',
+        'baq': 'eu',
+        'eus': 'eu',
+        'persian': 'fa',
+        'per': 'fa',
+        'fas': 'fa',
+        'finnish': 'fi',
+        'fin': 'fi',
+        'french': 'fr',
+        'fre': 'fr',
+        'fra': 'fr',
+        'western frisian': 'fy',
+        'fry': 'fy',
+        'irish': 'ga',
+        'gle': 'ga',
+        'gaelic': 'gd',
+        'scottish gaelic': 'gd',
+        'gla': 'gd',
+        'galician': 'gl',
+        'glg': 'gl',
+        'gujarati': 'gu',
+        'guj': 'gu',
+        'hausa': 'ha',
+        'hau': 'ha',
+        'hebrew': 'he',
+        'heb': 'he',
+        'hindi': 'hi',
+        'hin': 'hi',
+        'hungarian': 'hu',
+        'hun': 'hu',
+        'armenian': 'hy',
+        'arm': 'hy',
+        'hye': 'hy',
+        'indonesian': 'id',
+        'ind': 'id',
+        'igbo': 'ig',
+        'ibo': 'ig',
+        'icelandic': 'is',
+        'ice': 'is',
+        'isl': 'is',
+        'italian': 'it',
+        'ita': 'it',
+        'japanese': 'ja',
+        'jpn': 'ja',
+        'javanese': 'jv',
+        'jav': 'jv',
+        'georgian': 'ka',
+        'geo': 'ka',
+        'kat': 'ka',
+        'kazakh': 'kk',
+        'kaz': 'kk',
+        'central khmer': 'km',
+        'khm': 'km',
+        'kannada': 'kn',
+        'kan': 'kn',
+        'korean': 'ko',
+        'kor': 'ko',
+        'kurdish': 'ku',
+        'kur': 'ku',
+        'kirghiz': 'ky',
+        'kyrgyz': 'ky',
+        'kir': 'ky',
+        'latin': 'la',
+        'lat': 'la',
+        'lithuanian': 'lt',
+        'lit': 'lt',
+        'latvian': 'lv',
+        'lav': 'lv',
+        'malagasy': 'mg',
+        'mlg': 'mg',
+        'macedonian': 'mk',
+        'mac': 'mk',
+        'mkd': 'mk',
+        'malayalam': 'ml',
+        'mal': 'ml',
+        'mongolian': 'mn',
+        'mon': 'mn',
+        'marathi': 'mr',
+        'mar': 'mr',
+        'malay': 'ms',
+        'may': 'ms',
+        'msa': 'ms',
+        'maltese': 'mt',
+        'mlt': 'mt',
+        'burmese': 'my',
+        'bur': 'my',
+        'mya': 'my',
+        'nepali': 'ne',
+        'nep': 'ne',
+        'dutch': 'nl',
+        'flemish': 'nl',
+        'dut': 'nl',
+        'nld': 'nl',
+        'norwegian': 'no',
+        'nor': 'no',
+        'panjabi': 'pa',
+        'punjabi': 'pa',
+        'pan': 'pa',
+        'polish': 'pl',
+        'pol': 'pl',
+        'pushto': 'ps',
+        'pashto': 'ps',
+        'pus': 'ps',
+        'portuguese': 'pt',
+        'por': 'pt',
+        'romanian': 'ro',
+        'moldavian': 'ro',
+        'moldovan': 'ro',
+        'rum': 'ro',
+        'ron': 'ro',
+        'russian': 'ru',
+        'rus': 'ru',
+        'sinhala': 'si',
+        'sinhalese': 'si',
+        'sin': 'si',
+        'slovak': 'sk',
+        'slo': 'sk',
+        'slk': 'sk',
+        'slovenian': 'sl',
+        'slv': 'sl',
+        'albanian': 'sq',
+        'alb': 'sq',
+        'sqi': 'sq',
+        'serbian': 'sr',
+        'srp': 'sr',
+        'swedish': 'sv',
+        'swe': 'sv',
+        'tamil': 'ta',
+        'tam': 'ta',
+        'telugu': 'te',
+        'tel': 'te',
+        'tajik': 'tg',
+        'tgk': 'tg',
+        'thai': 'th',
+        'tha': 'th',
+        'turkish': 'tr',
+        'tur': 'tr',
+        'ukrainian': 'uk',
+        'ukr': 'uk',
+        'urdu': 'ur',
+        'urd': 'ur',
+        'uzbek': 'uz',
+        'uzb': 'uz',
+        'vietnamese': 'vi',
+        'vie': 'vi',
+        'xhosa': 'xh',
+        'xho': 'xh',
+        'yiddish': 'yi',
+        'yid': 'yi',
+        'yoruba': 'yo',
+        'yor': 'yo',
+        'chinese': 'zh',
+        'chi': 'zh',
+        'zho': 'zh',
+        'zulu': 'zu',
+        'zul': 'zu',
+        'hans':'zh', # Also check for chinese scripts
+        'hant': 'zh',
+        'cmn':'zh' # In some cases we use 'cmn' = 'Mandarin'
+    }
+
+    _wtp_iso_set = set(_wtp_lang_map.values())
+
+    @classmethod
+    def convert_to_iso(cls, lang: str) -> Optional[str]:
+        # ISO 639-2 (language) is sometimes paired with ISO 15924 (script).
+        # Extract the language portion and check if supported in WtP.
+        if not lang:
+            return None
+
+        if '-' in lang:
+            lang = lang.split('-')[0]
+        if '_' in lang:
+            lang = lang.split('_')[0]
+
+        lang = lang.strip().lower()
+
+        if lang in cls._wtp_iso_set:
+            return lang
+
+        if lang in cls._wtp_lang_map:
+            return cls._wtp_lang_map[lang]
+
+        return None
diff --git a/detection/nlp_text_splitter/pyproject.toml b/detection/nlp_text_splitter/pyproject.toml
new file mode 100644
index 0000000..31583a6
--- /dev/null
+++ b/detection/nlp_text_splitter/pyproject.toml
@@ -0,0 +1,37 @@
+#############################################################################
+# NOTICE                                                                    #
+#                                                                           #
+# This software (or technical data) was produced for the U.S. Government    #
+# under contract, and is subject to the Rights in Data-General Clause       #
+# 52.227-14, Alt. IV (DEC 2007).                                            #
+#                                                                           #
+# Copyright 2024 The MITRE Corporation. All Rights Reserved.                #
+#############################################################################
+
+#############################################################################
+# Copyright 2024 The MITRE Corporation                                      #
+#                                                                           #
+# Licensed under the Apache License, Version 2.0 (the "License");           #
+# you may not use this file except in compliance with the License.          #
+# You may obtain a copy of the License at                                   #
+#                                                                           #
+#    http://www.apache.org/licenses/LICENSE-2.0                             #
+#                                                                           #
+# Unless required by applicable law or agreed to in writing, software       #
+# distributed under the License is distributed on an "AS IS" BASIS,         #
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  #
+# See the License for the specific language governing permissions and       #
+# limitations under the License.                                            #
+#############################################################################
+
+[build-system]
+requires = ["setuptools"]
+build-backend = "setuptools.build_meta"
+
+[project]
+name = "nlp_text_splitter"
+version = "8.0"
+dependencies = [
+    "spacy>=3.7.4",
+    "wtpsplit>=1.3.0"
+]
diff --git a/detection/nlp_text_splitter/tests/test_data/NOTICE b/detection/nlp_text_splitter/tests/test_data/NOTICE
new file mode 100644
index 0000000..0e3ac4d
--- /dev/null
+++ b/detection/nlp_text_splitter/tests/test_data/NOTICE
@@ -0,0 +1,4 @@
+# art-of-war.txt
+Contains the beginning of "The Art of War" by Sunzi in Traditional Chinese.
+Public Domain
+https://www.gutenberg.org/ebooks/12407
\ No newline at end of file
diff --git a/detection/nlp_text_splitter/tests/test_data/art-of-war.txt b/detection/nlp_text_splitter/tests/test_data/art-of-war.txt
new file mode 100644
index 0000000..25e19f5
--- /dev/null
+++ b/detection/nlp_text_splitter/tests/test_data/art-of-war.txt
@@ -0,0 +1,8 @@
+兵者，國之大事，死生之地，存亡之道，不可不察也。
+故經之以五事，校之以計，而索其情：一曰道，二曰天，三曰地，四曰將，五曰法。道者，令民於上同意，可與之死，可與之生，
+而不危也；天者，陰陽、寒暑、時制也；地者，遠近、險易、廣狹、死生也；將者，智、信、仁、勇、嚴也；法者，曲制、官道、
+主用也。凡此五者，將莫不聞，知之者勝，不知之者不勝。故校之以計，而索其情，曰：主孰有道？將孰有能？天地孰得？法令孰行？
+兵眾孰強？士卒孰練？賞罰孰明？吾以此知勝負矣。將聽吾計，用之必勝，留之；將不聽吾計，用之必敗，去之。計利以聽，乃為之勢，
+以佐其外。勢者，因利而制權也。兵者，詭道也。故能而示之不能，用而示之不用，近而示之遠，遠而示之近。利而誘之，亂而取之，
+實而備之，強而避之，怒而撓之，卑而驕之，佚而勞之，親而離之，攻其無備，出其不意。此兵家之勝，不可先傳也。
+夫未戰而廟算勝者，得算多也；未戰而廟算不勝者，得算少也。多算勝少算，而況於無算乎！吾以此觀之，勝負見矣。
diff --git a/detection/nlp_text_splitter/tests/test_text_splitter.py b/detection/nlp_text_splitter/tests/test_text_splitter.py
new file mode 100644
index 0000000..9782870
--- /dev/null
+++ b/detection/nlp_text_splitter/tests/test_text_splitter.py
@@ -0,0 +1,213 @@
+#############################################################################
+# NOTICE                                                                    #
+#                                                                           #
+# This software (or technical data) was produced for the U.S. Government    #
+# under contract, and is subject to the Rights in Data-General Clause       #
+# 52.227-14, Alt. IV (DEC 2007).                                            #
+#                                                                           #
+# Copyright 2024 The MITRE Corporation. All Rights Reserved.                #
+#############################################################################
+
+#############################################################################
+# Copyright 2024 The MITRE Corporation                                      #
+#                                                                           #
+# Licensed under the Apache License, Version 2.0 (the "License");           #
+# you may not use this file except in compliance with the License.          #
+# You may obtain a copy of the License at                                   #
+#                                                                           #
+#    http://www.apache.org/licenses/LICENSE-2.0                             #
+#                                                                           #
+# Unless required by applicable law or agreed to in writing, software       #
+# distributed under the License is distributed on an "AS IS" BASIS,         #
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  #
+# See the License for the specific language governing permissions and       #
+# limitations under the License.                                            #
+#############################################################################
+
+import pathlib
+import unittest
+
+from nlp_text_splitter import TextSplitterModel, TextSplitter
+
+
+TEST_DATA = pathlib.Path(__file__).parent / 'test_data'
+
+class TestTextSplitter(unittest.TestCase):
+    @classmethod
+    def setUpClass(cls):
+        cls.wtp_model = TextSplitterModel("wtp-bert-mini", "cpu", "en")
+        cls.wtp_adv_model = TextSplitterModel("wtp-canine-s-1l", "cpu", "zh")
+        cls.spacy_model = TextSplitterModel("xx_sent_ud_sm", "cpu", "en")
+
+    def test_split_engine_difference(self):
+        # Note: Only WtP's multilingual models
+        # can detect some of '。' characters used for this language.
+        text = (TEST_DATA / 'art-of-war.txt').read_text()
+
+        text_without_newlines = text.replace('\n', '')
+
+        actual = self.wtp_model._split_wtp(text_without_newlines)
+        self.assertEqual(3, len(actual))
+        for line in actual:
+            self.assertTrue(line.endswith('。'))
+
+        actual = self.spacy_model._split_spacy(text_without_newlines)
+        self.assertEqual(1, len(actual))
+
+        # However, WtP prefers newlines over the '。' character.
+        actual = self.wtp_model._split_wtp(text)
+        self.assertEqual(10, len(actual))
+
+    def test_guess_split_simple_sentence(self):
+        input_text = 'Hello, what is your name? My name is John.'
+        actual = list(TextSplitter.split(input_text,
+            28,
+            28,
+            len,
+            self.wtp_model))
+        self.assertEqual(input_text, ''.join(actual))
+        self.assertEqual(2, len(actual))
+
+        # "Hello, what is your name?"
+        self.assertEqual('Hello, what is your name? ', actual[0])
+        # " My name is John."
+        self.assertEqual('My name is John.', actual[1])
+
+        input_text = 'Hello, what is your name? My name is John.'
+        actual = list(TextSplitter.split(input_text,
+            28,
+            28,
+            len,
+            self.spacy_model))
+        self.assertEqual(input_text, ''.join(actual))
+        self.assertEqual(2, len(actual))
+
+        # "Hello, what is your name?"
+        self.assertEqual('Hello, what is your name? ', actual[0])
+        # " My name is John."
+        self.assertEqual('My name is John.', actual[1])
+
+    def test_split_sentence_end_punctuation(self):
+        input_text = 'Hello. How are you? asdfasdf'
+        actual = list(TextSplitter.split(input_text,
+            20,
+            10,
+            len,
+            self.wtp_model))
+
+        self.assertEqual(input_text, ''.join(actual))
+        self.assertEqual(2, len(actual))
+
+        self.assertEqual('Hello. How are you? ', actual[0])
+        self.assertEqual('asdfasdf', actual[1])
+
+        actual = list(TextSplitter.split(input_text,
+            20,
+            10,
+            len,
+            self.spacy_model))
+
+        self.assertEqual(input_text, ''.join(actual))
+        self.assertEqual(2, len(actual))
+
+        self.assertEqual('Hello. How are you? ', actual[0])
+        self.assertEqual('asdfasdf', actual[1])
+
+
+    def test_guess_split_edge_cases(self):
+        input_text = ("This is a sentence (Dr.Test). Is this,"
+                      " a sentence as well? Maybe...maybe not?"
+                      " \n All done, I think!")
+
+        # Split using WtP model.
+        actual = list(TextSplitter.split(input_text,
+            30,
+            30,
+            len,
+            self.wtp_model))
+
+        self.assertEqual(input_text, ''.join(actual))
+        self.assertEqual(4, len(actual))
+
+        # WtP should detect and split out each sentence
+        self.assertEqual("This is a sentence (Dr.Test). ", actual[0])
+        self.assertEqual("Is this, a sentence as well? ", actual[1])
+        self.assertEqual("Maybe...maybe not? \n ", actual[2])
+        self.assertEqual("All done, I think!", actual[3])
+
+        actual = list(TextSplitter.split(input_text,
+            35,
+            35,
+            len,
+            self.spacy_model))
+        self.assertEqual(input_text, ''.join(actual))
+        self.assertEqual(4, len(actual))
+
+        # Split using spaCy model.
+        self.assertEqual("This is a sentence (Dr.Test). ", actual[0])
+        self.assertEqual("Is this, a sentence as well? ", actual[1])
+        self.assertEqual("Maybe...maybe not? \n ", actual[2])
+        self.assertEqual("All done, I think!", actual[3])
+
+
+    def test_split_wtp_basic(self):
+        text = (TEST_DATA / 'art-of-war.txt').read_text().replace('\n','')
+        actual = list(TextSplitter.split(text,
+            150,
+            150,
+            len,
+            self.wtp_model))
+
+        self.assertEqual(4, len(actual))
+
+        expected_chunk_lengths = [86, 116, 104, 114]
+        self.assertEqual(sum(expected_chunk_lengths), len(text.replace('\n','')))
+
+        self.assertTrue(actual[0].startswith('兵者，'))
+        self.assertTrue(actual[0].endswith('而不危也；'))
+        self.assertEqual(expected_chunk_lengths[0], len(actual[0]))
+
+        self.assertTrue(actual[1].startswith('天者，陰陽'))
+        self.assertTrue(actual[1].endswith('兵眾孰強？'))
+        self.assertEqual(expected_chunk_lengths[1], len(actual[1]))
+
+        self.assertTrue(actual[2].startswith('士卒孰練？'))
+        self.assertTrue(actual[2].endswith('遠而示之近。'))
+        self.assertEqual(expected_chunk_lengths[2], len(actual[2]))
+
+        self.assertTrue(actual[3].startswith('利而誘之，'))
+        self.assertTrue(actual[3].endswith('勝負見矣。'))
+        self.assertEqual(expected_chunk_lengths[3], len(actual[3]))
+
+    def test_split_wtp_advanced(self):
+        text = (TEST_DATA / 'art-of-war.txt').read_text().replace('\n','')
+        actual = list(TextSplitter.split(text,
+            150,
+            150,
+            len,
+            self.wtp_adv_model))
+
+        self.assertEqual(4, len(actual))
+
+        expected_chunk_lengths = [61, 150, 61, 148]
+        self.assertEqual(sum(expected_chunk_lengths), len(text.replace('\n','')))
+
+        self.assertTrue(actual[0].startswith('兵者，'))
+        self.assertTrue(actual[0].endswith('四曰將，五曰法。'))
+        self.assertEqual(expected_chunk_lengths[0], len(actual[0]))
+
+        self.assertTrue(actual[1].startswith('道者，令民於上同意'))
+        self.assertTrue(actual[1].endswith('賞罰孰明'))
+        self.assertEqual(expected_chunk_lengths[1], len(actual[1]))
+
+        self.assertTrue(actual[2].startswith('？吾以此知勝'))
+        self.assertTrue(actual[2].endswith('因利而制權也。'))
+        self.assertEqual(expected_chunk_lengths[2], len(actual[2]))
+
+        self.assertTrue(actual[3].startswith('兵者，詭道也。'))
+        self.assertTrue(actual[3].endswith('之，勝負見矣。'))
+        self.assertEqual(expected_chunk_lengths[3], len(actual[3]))
+
+
+if __name__ == '__main__':
+    unittest.main(verbosity=2)