diff --git a/.gitignore b/.gitignore
index 23ad709..55162dd 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,4 +1,8 @@
 dev.ipynb
+.ruff_cache
+.vscode
+
+tmp/
 
 # Byte-compiled / optimized / DLL files
 __pycache__/
@@ -22,7 +26,6 @@ parts/
 sdist/
 var/
 wheels/
-pip-wheel-metadata/
 share/python-wheels/
 *.egg-info/
 .installed.cfg
@@ -52,6 +55,7 @@ coverage.xml
 *.py,cover
 .hypothesis/
 .pytest_cache/
+cover/
 
 # Translations
 *.mo
@@ -74,17 +78,20 @@ instance/
 docs/_build/
 
 # PyBuilder
+.pybuilder/
 target/
 
-# Notebook Checkpoints
-.ipynb_checkpoints/
+# Jupyter Notebook
+.ipynb_checkpoints
 
 # IPython
 profile_default/
 ipython_config.py
 
 # pyenv
-.python-version
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+# .python-version
 
 # pipenv
 #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
@@ -93,7 +100,22 @@ ipython_config.py
 #   install all needed dependencies.
 #Pipfile.lock
 
-# PEP 582; used by e.g. github.com/David-OConnor/pyflow
+# poetry
+#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
+#poetry.lock
+
+# pdm
+#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
+#pdm.lock
+#   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
+#   in version control.
+#   https://pdm.fming.dev/#use-with-ide
+.pdm.toml
+
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
 __pypackages__/
 
 # Celery stuff
@@ -128,4 +150,17 @@ venv.bak/
 dmypy.json
 
 # Pyre type checker
-.pyre/
\ No newline at end of file
+.pyre/
+
+# pytype static type analyzer
+.pytype/
+
+# Cython debug symbols
+cython_debug/
+
+# PyCharm
+#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
+#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
+#  and can be added to the global gitignore or merged into this file.  For a more nuclear
+#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
+#.idea/
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
new file mode 100644
index 0000000..e97baae
--- /dev/null
+++ b/.pre-commit-config.yaml
@@ -0,0 +1,22 @@
+default_language_version:
+  python: python3
+repos:
+  - repo: https://github.com/pre-commit/pre-commit-hooks
+    rev: v4.6.0
+    hooks:
+      - id: trailing-whitespace
+      - id: end-of-file-fixer
+      - id: check-yaml
+        args: [--allow-multiple-documents]
+      - id: check-added-large-files
+  - repo: https://github.com/astral-sh/ruff-pre-commit
+    rev: v0.6.3
+    hooks:
+      - id: ruff
+        types_or: [python, pyi, jupyter]
+        args: [--fix, --exit-non-zero-on-fix]
+      - id: ruff
+        args: ["check", "--select", "I", "--fix"]
+        types_or: [python, pyi, jupyter]
+      - id: ruff-format
+        types_or: [python, pyi, jupyter]
diff --git a/.vscode/settings.json b/.vscode/settings.json
index d99f2f3..8d5d176 100644
--- a/.vscode/settings.json
+++ b/.vscode/settings.json
@@ -1,6 +1,9 @@
 {
-    "[python]": {
-        "editor.defaultFormatter": "ms-python.black-formatter"
-    },
-    "python.formatting.provider": "none"
-}
\ No newline at end of file
+    "recommendations": [
+        "ms-python.vscode-pylance",
+        "ms-python.black-formatter",
+        "ms-python.isort",
+        "charliermarsh.ruff",
+        "redhat.vscode-yaml"
+    ]
+}
diff --git a/Makefile b/Makefile
new file mode 100644
index 0000000..4844829
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,7 @@
+.PHONY: check install
+check:
+	pre-commit run --all-files
+
+install:
+	pip install -r ./requirements-dev.txt
+	pre-commit install
diff --git a/README.md b/README.md
index 1febd07..5cea0de 100644
--- a/README.md
+++ b/README.md
@@ -13,7 +13,7 @@
 
 **yellowduck** is the data science toolbox for everyone. To be precise, for the lazy man like me!
 
-Actually, **yellowduck** is like a sandbox library for me. If I found something great I will surely add it in **yellowduck**. 
+Actually, **yellowduck** is like a sandbox library for me. If I found something great I will surely add it in **yellowduck**.
 
 ## Main Features
 
@@ -57,4 +57,4 @@ Create New Issue [here](https://github.com/PCP55/yellowduck-dev/issues) and I wi
 
 ---------------------------------------
 
-> This library was inspired by [kora](https://github.com/airesearch-in-th/kora/tree/master/kora), A collection of tools to make programming on Google Colab easier.
\ No newline at end of file
+> This library was inspired by [kora](https://github.com/airesearch-in-th/kora/tree/master/kora), A collection of tools to make programming on Google Colab easier.
diff --git a/examples/etc/id_card_validator.ipynb b/examples/etc/id_card_validator.ipynb
index ab20cd7..dee235f 100644
--- a/examples/etc/id_card_validator.ipynb
+++ b/examples/etc/id_card_validator.ipynb
@@ -28,9 +28,9 @@
    "source": [
     "# Passed\n",
     "\n",
-    "id = '1234567890121'\n",
+    "id = \"1234567890121\"\n",
     "\n",
-    "IDValidator(id=id, id_type = NationalThaiIDCard()).validate()"
+    "IDValidator(id=id, id_type=NationalThaiIDCard()).validate()"
    ]
   },
   {
@@ -52,9 +52,9 @@
    "source": [
     "# Passed\n",
     "\n",
-    "id = '1-2345-67890-12-1'\n",
+    "id = \"1-2345-67890-12-1\"\n",
     "\n",
-    "IDValidator(id=id, id_type = NationalThaiIDCard()).validate()"
+    "IDValidator(id=id, id_type=NationalThaiIDCard()).validate()"
    ]
   },
   {
@@ -76,9 +76,9 @@
    "source": [
     "# Failed\n",
     "\n",
-    "id = '1-2345-67890-12-2'\n",
+    "id = \"1-2345-67890-12-2\"\n",
     "\n",
-    "IDValidator(id=id, id_type = NationalThaiIDCard()).validate()"
+    "IDValidator(id=id, id_type=NationalThaiIDCard()).validate()"
    ]
   },
   {
@@ -103,9 +103,9 @@
    "source": [
     "# Invalid ID\n",
     "\n",
-    "id = '123456789022'\n",
+    "id = \"123456789022\"\n",
     "\n",
-    "IDValidator(id=id, id_type = NationalThaiIDCard()).validate()"
+    "IDValidator(id=id, id_type=NationalThaiIDCard()).validate()"
    ]
   }
  ],
diff --git a/examples/images/duplicate_images.ipynb b/examples/images/duplicate_images.ipynb
index 979064c..bc84cfd 100644
--- a/examples/images/duplicate_images.ipynb
+++ b/examples/images/duplicate_images.ipynb
@@ -20,100 +20,121 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "import os\n",
-    "import PIL\n",
     "import hashlib\n",
+    "import os\n",
+    "\n",
     "import imagehash\n",
-    "import numpy as np\n",
     "import matplotlib.pyplot as plt\n",
+    "import numpy as np\n",
+    "import PIL\n",
+    "\n",
     "\n",
-    "class ImageDuplicate():\n",
+    "class ImageDuplicate:\n",
     "    def __init__(self, image_folder_path: str):\n",
-    "        try: # For development phase only\n",
-    "            get_ipython\n",
-    "            self.current_path = os.getcwd()\n",
-    "        except: # For production\n",
-    "            self.current_path = os.path.dirname(os.path.realpath(__file__))\n",
+    "        self.current_path = os.path.dirname(os.path.realpath(__file__))\n",
     "        self.current_path = os.path.join(self.current_path, image_folder_path)\n",
     "\n",
-    "        self.image_in_folder_list = [file for file in os.listdir(self.current_path) if (file.endswith('.png')) | (file.endswith('.jpg'))]\n",
-    "        self.image_path_list = [os.path.join(self.current_path,image) for image in self.image_in_folder_list]\n",
+    "        self.image_in_folder_list = [\n",
+    "            file\n",
+    "            for file in os.listdir(self.current_path)\n",
+    "            if (file.endswith(\".png\")) | (file.endswith(\".jpg\"))\n",
+    "        ]\n",
+    "        self.image_path_list = [\n",
+    "            os.path.join(self.current_path, image)\n",
+    "            for image in self.image_in_folder_list\n",
+    "        ]\n",
     "        self.hash_value_list = []\n",
     "\n",
-    "        self.similar_group_dict = {} # Group of image separate by hash value\n",
-    "        self.non_duplicate_list = [] # Select 1 image in each group\n",
-    "        self.duplicate_list = [] # The rest that not be selected in non_duplicate_list\n",
+    "        self.similar_group_dict = {}  # Group of image separate by hash value\n",
+    "        self.non_duplicate_list = []  # Select 1 image in each group\n",
+    "        self.duplicate_list = []  # The rest that not be selected in non_duplicate_list\n",
     "\n",
     "    def find_exact(self):\n",
-    "        print(f'Using method: Exact Match (MD5)')\n",
+    "        print(\"Using method: Exact Match (MD5)\")\n",
     "\n",
     "        for image_file in self.image_in_folder_list:\n",
-    "            image_fullpath = os.path.join(self.current_path,image_file)\n",
-    "            with open(image_fullpath, 'rb') as f:\n",
+    "            image_fullpath = os.path.join(self.current_path, image_file)\n",
+    "            with open(image_fullpath, \"rb\") as f:\n",
     "                hash_value = hashlib.md5(f.read()).hexdigest()\n",
     "            self.hash_value_list.append(hash_value)\n",
     "\n",
-    "        fast_check_duplicate = len(set(self.hash_value_list)) - len(self.hash_value_list)\n",
+    "        fast_check_duplicate = len(set(self.hash_value_list)) - len(\n",
+    "            self.hash_value_list\n",
+    "        )\n",
     "        if fast_check_duplicate == 0:\n",
-    "            print('There is no duplicate image here.')\n",
+    "            print(\"There is no duplicate image here.\")\n",
     "            return\n",
     "        else:\n",
-    "            for image_name, hash_value in zip(self.image_in_folder_list,self.hash_value_list):\n",
+    "            for image_name, hash_value in zip(\n",
+    "                self.image_in_folder_list, self.hash_value_list\n",
+    "            ):\n",
     "                if hash_value not in self.similar_group_dict:\n",
     "                    self.similar_group_dict[hash_value] = [image_name]\n",
     "                    self.non_duplicate_list.append(image_name)\n",
     "                else:\n",
     "                    self.similar_group_dict[hash_value] += [image_name]\n",
     "                    self.duplicate_list.append(image_name)\n",
-    "                \n",
+    "\n",
     "            temp_similar_group_dict = self.similar_group_dict.copy()\n",
-    "            for (key,value) in temp_similar_group_dict.items():\n",
+    "            for key, value in temp_similar_group_dict.items():\n",
     "                if len(value) == 1:\n",
     "                    self.similar_group_dict.pop(key)\n",
     "\n",
     "        group_key = list(np.arange(len(self.similar_group_dict)))\n",
-    "        self.similar_group_dict = dict(zip(group_key,list(self.similar_group_dict.values())))\n",
+    "        self.similar_group_dict = dict(\n",
+    "            zip(group_key, list(self.similar_group_dict.values()))\n",
+    "        )\n",
     "\n",
     "        num_duplicate = len(self.duplicate_list)\n",
     "        num_all = len(self.image_in_folder_list)\n",
-    "        percentage = np.round(num_duplicate/num_all * 100, 2)\n",
-    "        print(f'There are {num_duplicate} duplicated images out of {num_all} which is around {percentage} %.')\n",
+    "        percentage = np.round(num_duplicate / num_all * 100, 2)\n",
+    "        print(\n",
+    "            f\"There are {num_duplicate} duplicated images out of {num_all} which is around {percentage} %.\"\n",
+    "        )\n",
     "\n",
     "        return self.similar_group_dict, self.duplicate_list, self.non_duplicate_list\n",
     "\n",
-    "    def find_similar(self, hash_method:str='phash', distance:int=2, hash_size:int=16):\n",
-    "        print(f'Using method: {hash_method}\\nAn accepted distance: {distance}\\nHashing size: {hash_size}')\n",
+    "    def find_similar(\n",
+    "        self, hash_method: str = \"phash\", distance: int = 2, hash_size: int = 16\n",
+    "    ):\n",
+    "        print(\n",
+    "            f\"Using method: {hash_method}\\nAn accepted distance: {distance}\\nHashing size: {hash_size}\"\n",
+    "        )\n",
     "\n",
     "        for image_file in self.image_in_folder_list:\n",
-    "            image_fullpath = os.path.join(self.current_path,image_file)\n",
+    "            image_fullpath = os.path.join(self.current_path, image_file)\n",
     "            image = PIL.Image.open(image_fullpath)\n",
-    "            if hash_method == 'phash':\n",
+    "            if hash_method == \"phash\":\n",
     "                hash_value = imagehash.phash(image, hash_size)\n",
-    "            elif hash_method == 'ahash':\n",
+    "            elif hash_method == \"ahash\":\n",
     "                hash_value = imagehash.average_hash(image, hash_size)\n",
-    "            elif hash_method == 'dhash':\n",
+    "            elif hash_method == \"dhash\":\n",
     "                hash_value = imagehash.dhash(image, hash_size)\n",
-    "            elif hash_method == 'whash':\n",
+    "            elif hash_method == \"whash\":\n",
     "                hash_value = imagehash.whash(image, hash_size)\n",
-    "            elif hash_method == 'crop_resistant_hash':\n",
+    "            elif hash_method == \"crop_resistant_hash\":\n",
     "                \"\"\"\n",
     "                - No hashing size\n",
     "                - Take too much time!! (as another hash algorithm use 250 ms but this one take 1 min for test dataset)\n",
     "                \"\"\"\n",
     "                hash_value = imagehash.crop_resistant_hash(image)\n",
     "            else:\n",
-    "                print('There are 4 methods here which is phash, ahash, dhash, whash')\n",
+    "                print(\"There are 4 methods here which is phash, ahash, dhash, whash\")\n",
     "            self.hash_value_list.append(hash_value)\n",
     "\n",
     "        # It is recommend to use distance = 0 for time reduction.\n",
     "\n",
     "        if distance == 0:\n",
-    "            fast_check_duplicate = len(set(self.hash_value_list)) - len(self.hash_value_list)\n",
+    "            fast_check_duplicate = len(set(self.hash_value_list)) - len(\n",
+    "                self.hash_value_list\n",
+    "            )\n",
     "            if fast_check_duplicate == 0:\n",
-    "                print('There is no duplicate image here.')\n",
+    "                print(\"There is no duplicate image here.\")\n",
     "                return\n",
     "            else:\n",
-    "                for image_name, hash_value in zip(self.image_in_folder_list,self.hash_value_list):\n",
+    "                for image_name, hash_value in zip(\n",
+    "                    self.image_in_folder_list, self.hash_value_list\n",
+    "                ):\n",
     "                    if hash_value not in self.similar_group_dict:\n",
     "                        self.similar_group_dict[hash_value] = [image_name]\n",
     "                        self.non_duplicate_list.append(image_name)\n",
@@ -123,25 +144,28 @@
     "        else:\n",
     "            temp_filename_list = []\n",
     "            num = 0\n",
-    "            filename_hash_dict = dict(zip(self.image_in_folder_list,self.hash_value_list))\n",
-    "            temp_filename_hash_dict = dict(zip(self.image_in_folder_list,self.hash_value_list))\n",
+    "            filename_hash_dict = dict(\n",
+    "                zip(self.image_in_folder_list, self.hash_value_list)\n",
+    "            )\n",
+    "            temp_filename_hash_dict = dict(\n",
+    "                zip(self.image_in_folder_list, self.hash_value_list)\n",
+    "            )\n",
     "            sort_filename_hash_dict = sorted(filename_hash_dict)\n",
-    "            \n",
+    "\n",
     "            for file_first in sort_filename_hash_dict:\n",
     "                if file_first in temp_filename_hash_dict:\n",
-    "                \n",
     "                    temp_similar_list = []\n",
     "                    temp_similar_list.append(file_first)\n",
     "                    temp_filename_list.append(file_first)\n",
     "                    temp_filename_hash_dict.pop(file_first)\n",
     "\n",
     "                    image_first = filename_hash_dict[file_first]\n",
-    "                \n",
+    "\n",
     "                for file_second in sort_filename_hash_dict:\n",
     "                    if file_second not in temp_filename_list:\n",
     "                        image_second = filename_hash_dict[file_second]\n",
     "                        hamming_distance = image_first - image_second\n",
-    "                        \n",
+    "\n",
     "                        if hamming_distance <= distance:\n",
     "                            temp_similar_list.append(file_second)\n",
     "                            temp_filename_list.append(file_second)\n",
@@ -154,15 +178,21 @@
     "\n",
     "                    num = num + 1\n",
     "\n",
-    "            self.non_duplicate_list = [image for image in self.image_in_folder_list if image not in self.duplicate_list]\n",
+    "            self.non_duplicate_list = [\n",
+    "                image\n",
+    "                for image in self.image_in_folder_list\n",
+    "                if image not in self.duplicate_list\n",
+    "            ]\n",
     "\n",
     "        return self.similar_group_dict, self.duplicate_list, self.non_duplicate_list\n",
     "\n",
     "        num_duplicate = len(self.duplicate_list)\n",
     "        num_all = len(self.image_in_folder_list)\n",
-    "        percentage = np.round(num_duplicate/num_all * 100, 2)\n",
+    "        percentage = np.round(num_duplicate / num_all * 100, 2)\n",
     "\n",
-    "        print(f'There are {num_duplicate} duplicated images out of {num_all} which is around {percentage} %.')"
+    "        print(\n",
+    "            f\"There are {num_duplicate} duplicated images out of {num_all} which is around {percentage} %.\"\n",
+    "        )"
    ]
   },
   {
@@ -171,13 +201,16 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "class ShowImageDuplicate():\n",
-    "    def __init__(self, image_folder_path, group_of_duplicate_dict:dict):\n",
+    "class ShowImageDuplicate:\n",
+    "    def __init__(self, image_folder_path, group_of_duplicate_dict: dict):\n",
     "        self.image_folder_path = image_folder_path\n",
     "        self.group_of_duplicate_dict = group_of_duplicate_dict\n",
     "\n",
     "        self.number_of_group = len(self.group_of_duplicate_dict)\n",
-    "        print(f'There are {self.number_of_group} of duplicate image.\\nUse .show_group(group_number) or .show_all() for all group.')\n",
+    "        print(\n",
+    "            f\"There are {self.number_of_group} of duplicate image.\\nUse .show_group(group_number) or .show_all() for all group.\"\n",
+    "        )\n",
+    "\n",
     "    def show_all(self):\n",
     "        \"\"\"\n",
     "        Show only first 5 images in each group\n",
@@ -190,9 +223,11 @@
     "            if len(image_list) > 5:\n",
     "                image_list = image_list[:5]\n",
     "            for image_number in np.arange(len(image_list)):\n",
-    "                image_path = os.path.join(self.image_folder_path,image_list[image_number])\n",
+    "                image_path = os.path.join(\n",
+    "                    self.image_folder_path, image_list[image_number]\n",
+    "                )\n",
     "                image = PIL.Image.open(image_path)\n",
-    "                axes[group_number,image_number].imshow(image)\n",
+    "                axes[group_number, image_number].imshow(image)\n",
     "        plt.tight_layout()\n",
     "\n",
     "    def show_group(self, group_number):\n",
@@ -201,15 +236,15 @@
     "            num_col = len(image_list)\n",
     "        else:\n",
     "            num_col = 5\n",
-    "        num_row = int(len(image_list)/num_col)\n",
-    "        mod = len(image_list)%num_col\n",
+    "        num_row = int(len(image_list) / num_col)\n",
+    "        mod = len(image_list) % num_col\n",
     "        if mod != 0:\n",
     "            num_row = num_row + 1\n",
     "        fig, axes = plt.subplots(nrows=num_row, ncols=num_col, figsize=(24, 10))\n",
     "        for axis in axes.ravel():\n",
     "            axis.set_axis_off()\n",
     "        for index, image_name in enumerate(image_list):\n",
-    "            image_path = os.path.join(self.image_folder_path,image_name)\n",
+    "            image_path = os.path.join(self.image_folder_path, image_name)\n",
     "            image = PIL.Image.open(image_path)\n",
     "            axes.ravel()[index].imshow(image)\n",
     "        plt.tight_layout()"
@@ -233,197 +268,191 @@
    },
    "outputs": [],
    "source": [
-    "import os\n",
-    "import hashlib\n",
-    "from PIL import Image\n",
-    "import imagehash\n",
-    "import numpy as np\n",
-    "import matplotlib.pyplot as plt\n",
+    "# from PIL import Image\n",
     "\n",
-    "class duplicate():\n",
-    "    #################################################################\n",
-    "    # init\n",
-    "    def __init__(self, image_folder_path):\n",
-    "        \n",
-    "        try:\n",
-    "            get_ipython\n",
-    "            self.current_path = os.getcwd() # For test function in .ipynb\n",
-    "        except:\n",
-    "            self.current_path = os.path.dirname(os.path.realpath(__file__)) # For .py\n",
-    "            \n",
-    "        self.current_path = os.path.join(self.current_path, image_folder_path)\n",
     "\n",
-    "        self.remove_filename_list = [] # List of similar image except original one\n",
-    "        self.similar_group_dict = {} # Group of similar image including original one\n",
+    "# class duplicate:\n",
+    "#     #################################################################\n",
+    "#     # init\n",
+    "#     def __init__(self, image_folder_path):\n",
+    "#         try:\n",
+    "#             get_ipython\n",
+    "#             self.current_path = os.getcwd()  # For test function in .ipynb\n",
+    "#         except:\n",
+    "#             self.current_path = os.path.dirname(os.path.realpath(__file__))  # For .py\n",
     "\n",
-    "    #################################################################\n",
-    "    # Find    \n",
-    "    def find(self, mode = 'exact', distance = 0, phash_size = 16):\n",
-    "        \n",
-    "        num = 0\n",
-    "        filename_hash = dict()\n",
-    "        image_list = os.listdir(self.current_path)\n",
-    "        \n",
-    "        ###########################\n",
+    "#         self.current_path = os.path.join(self.current_path, image_folder_path)\n",
     "\n",
-    "        if mode == 'exact':\n",
+    "#         self.remove_filename_list = []  # List of similar image except original one\n",
+    "#         self.similar_group_dict = {}  # Group of similar image including original one\n",
     "\n",
-    "            hash_keys = dict()\n",
-    "            duplicate_group = dict()\n",
-    "            self.remove_filename_list = []\n",
+    "#     #################################################################\n",
+    "#     # Find\n",
+    "#     def find(self, mode=\"exact\", distance=0, phash_size=16):\n",
+    "#         num = 0\n",
+    "#         filename_hash = dict()\n",
+    "#         image_list = os.listdir(self.current_path)\n",
     "\n",
-    "            for index, filename in enumerate(image_list):\n",
+    "#         ###########################\n",
     "\n",
-    "                file_path = os.path.join(self.current_path, filename)\n",
+    "#         if mode == \"exact\":\n",
+    "#             hash_keys = dict()\n",
+    "#             duplicate_group = dict()\n",
+    "#             self.remove_filename_list = []\n",
     "\n",
-    "                if os.path.isfile(file_path):\n",
-    "                    with open(file_path, 'rb') as f:\n",
-    "                        filehash = hashlib.md5(f.read()).hexdigest()\n",
+    "#             for index, filename in enumerate(image_list):\n",
+    "#                 file_path = os.path.join(self.current_path, filename)\n",
     "\n",
-    "                    filename_hash[filename] = filehash\n",
+    "#                 if os.path.isfile(file_path):\n",
+    "#                     with open(file_path, \"rb\") as f:\n",
+    "#                         filehash = hashlib.md5(f.read()).hexdigest()\n",
     "\n",
-    "                    if filehash not in hash_keys:\n",
-    "                        hash_keys[filehash] = index\n",
-    "                        \n",
-    "                    else:\n",
-    "                        self.remove_filename_list.append(filename)\n",
+    "#                     filename_hash[filename] = filehash\n",
     "\n",
-    "            set_hash = set(filename_hash.values())\n",
+    "#                     if filehash not in hash_keys:\n",
+    "#                         hash_keys[filehash] = index\n",
     "\n",
-    "            for h in set_hash:\n",
-    "                duplicate_group[h] = [k for k in filename_hash.keys() if filename_hash[k] == h]\n",
+    "#                     else:\n",
+    "#                         self.remove_filename_list.append(filename)\n",
     "\n",
-    "            for val in duplicate_group.values():\n",
-    "                if len(val) > 1:\n",
-    "                    self.similar_group_dict[num] = val\n",
-    "                    num = num + 1\n",
+    "#             set_hash = set(filename_hash.values())\n",
     "\n",
-    "            ############\n",
-    "            # print\n",
+    "#             for h in set_hash:\n",
+    "#                 duplicate_group[h] = [\n",
+    "#                     k for k in filename_hash.keys() if filename_hash[k] == h\n",
+    "#                 ]\n",
     "\n",
-    "            num_duplicate = len(self.remove_filename_list)\n",
-    "            num_all = len(filename_hash)\n",
-    "            percentage = np.round(num_duplicate/num_all * 100, 2)\n",
+    "#             for val in duplicate_group.values():\n",
+    "#                 if len(val) > 1:\n",
+    "#                     self.similar_group_dict[num] = val\n",
+    "#                     num = num + 1\n",
     "\n",
-    "            print('There are {} duplicated images from {} images which is around {} %.'.format(num_duplicate, num_all,percentage))\n",
+    "#             ############\n",
+    "#             # print\n",
     "\n",
-    "            return self.remove_filename_list, self.similar_group_dict\n",
+    "#             num_duplicate = len(self.remove_filename_list)\n",
+    "#             num_all = len(filename_hash)\n",
+    "#             percentage = np.round(num_duplicate / num_all * 100, 2)\n",
     "\n",
-    "        ###########################\n",
+    "#             print(\n",
+    "#                 \"There are {} duplicated images from {} images which is around {} %.\".format(\n",
+    "#                     num_duplicate, num_all, percentage\n",
+    "#                 )\n",
+    "#             )\n",
     "\n",
-    "        if mode == 'similar':\n",
-    "            \n",
-    "            temp_filename_hash = dict()\n",
-    "            temp_filename_list = []\n",
-    "            self.remove_filename_list = []\n",
-    "\n",
-    "            print('The accepted distance is {}'.format(distance))\n",
-    "            \n",
-    "            ############\n",
-    "            # Find phash\n",
-    "            for filename in image_list:\n",
-    "\n",
-    "                file_path = os.path.join(self.current_path, filename)\n",
-    "                \n",
-    "                if os.path.isfile(file_path):\n",
-    "                    image_file = Image.open(file_path)                        \n",
-    "                    phash = imagehash.phash(image_file, hash_size = phash_size)\n",
-    "                    filename_hash[filename] = phash\n",
-    "                    temp_filename_hash[filename] = phash\n",
-    "            \n",
-    "            ############        \n",
-    "            # Find similarity between image using hamming distance (of phash)\n",
-    "            \n",
-    "            sort_filename_hash = sorted(filename_hash)\n",
-    "            \n",
-    "            for file_first in sort_filename_hash:\n",
-    "                \n",
-    "                if file_first in temp_filename_hash:\n",
-    "                \n",
-    "                    temp_similar_list = []\n",
-    "                    temp_similar_list.append(file_first)\n",
-    "                    temp_filename_list.append(file_first)\n",
-    "                    temp_filename_hash.pop(file_first)\n",
+    "#             return self.remove_filename_list, self.similar_group_dict\n",
     "\n",
-    "                    image_first = filename_hash[file_first]\n",
-    "                \n",
-    "                for file_second in sort_filename_hash:\n",
-    "                    \n",
-    "                    if file_second not in temp_filename_list:\n",
-    "                        \n",
-    "                        image_second = filename_hash[file_second]\n",
-    "                        \n",
-    "                        hamming_distance = image_first - image_second\n",
-    "                        \n",
-    "                        if hamming_distance <= distance:\n",
-    "                            temp_similar_list.append(file_second)\n",
-    "                            temp_filename_list.append(file_second)\n",
+    "#         ###########################\n",
     "\n",
-    "                if len(temp_similar_list) > 1:\n",
-    "                    self.similar_group_dict[num] = temp_similar_list\n",
+    "#         if mode == \"similar\":\n",
+    "#             temp_filename_hash = dict()\n",
+    "#             temp_filename_list = []\n",
+    "#             self.remove_filename_list = []\n",
     "\n",
-    "                    for _item in temp_similar_list[1:]:\n",
-    "                        self.remove_filename_list.append(_item)\n",
+    "#             print(\"The accepted distance is {}\".format(distance))\n",
     "\n",
-    "                    num = num + 1\n",
-    "            \n",
-    "            ############\n",
-    "            # print\n",
-    "\n",
-    "            num_duplicate = len(self.remove_filename_list)\n",
-    "            num_all = len(filename_hash)\n",
-    "            percentage = np.round(num_duplicate/num_all * 100, 2)\n",
-    "\n",
-    "            print('There are {} similar images in distance from {} images which is around {} %.'.format(num_duplicate, num_all,percentage))\n",
-    "\n",
-    "            return self.remove_filename_list, self.similar_group_dict\n",
-    "\n",
-    "    #################################################################\n",
-    "    # Get           \n",
-    "    def get(self):\n",
-    "            \n",
-    "        return self.similar_group_dict, self.remove_filename_list\n",
-    "            \n",
-    "\n",
-    "    #################################################################\n",
-    "    # Show    \n",
-    "    def show(self, max_sample_case = 1, max_sample_each_case = 1, figsize = (20,20)):\n",
-    "        \n",
-    "        try:\n",
-    "            get_ipython\n",
-    "\n",
-    "            # nrow = 3\n",
-    "            # ncol = 3\n",
-    "\n",
-    "            # fig, axs = plt.subplots(5, 5, figsize = figsize)\n",
-    "\n",
-    "            for key_group in self.similar_group_dict:\n",
-    "                for filename in self.similar_group_dict[keygroup]:\n",
-    "                    file_path = os.path.join(self.current_path, filename)\n",
-    "                    image = Image.open(os.path.join(file_path))\n",
-    "        \n",
-    "                    # col = -1\n",
-    "\n",
-    "                    # row = index%5\n",
-    "                    # if row == 0:\n",
-    "                    #     col = col + 1\n",
-    "                    # axs[row,col].imshow(np.array(image))\n",
-    "                    # axs[row,col].set_title('Predict as {}, Actual {}'.format(wrong[0], right))\n",
-    "                    # axs[row,col].grid(False)\n",
-    "\n",
-    "            plt.show()\n",
-    "\n",
-    "        except:\n",
-    "            print('Please run it in notebook')\n",
-    "\n",
-    "    #################################################################\n",
-    "    # Remove    \n",
-    "    def remove_in_folder(self):\n",
-    "        for filename in self.remove_filename_list:\n",
-    "            file_path = os.path.join(self.current_path, filename)\n",
-    "            os.remove(file_path)\n",
-    "\n",
-    "# Credit: https://medium.com/@urvisoni/removing-duplicate-images-through-python-23c5fdc7479e"
+    "#             ############\n",
+    "#             # Find phash\n",
+    "#             for filename in image_list:\n",
+    "#                 file_path = os.path.join(self.current_path, filename)\n",
+    "\n",
+    "#                 if os.path.isfile(file_path):\n",
+    "#                     image_file = Image.open(file_path)\n",
+    "#                     phash = imagehash.phash(image_file, hash_size=phash_size)\n",
+    "#                     filename_hash[filename] = phash\n",
+    "#                     temp_filename_hash[filename] = phash\n",
+    "\n",
+    "#             ############\n",
+    "#             # Find similarity between image using hamming distance (of phash)\n",
+    "\n",
+    "#             sort_filename_hash = sorted(filename_hash)\n",
+    "\n",
+    "#             for file_first in sort_filename_hash:\n",
+    "#                 if file_first in temp_filename_hash:\n",
+    "#                     temp_similar_list = []\n",
+    "#                     temp_similar_list.append(file_first)\n",
+    "#                     temp_filename_list.append(file_first)\n",
+    "#                     temp_filename_hash.pop(file_first)\n",
+    "\n",
+    "#                     image_first = filename_hash[file_first]\n",
+    "\n",
+    "#                 for file_second in sort_filename_hash:\n",
+    "#                     if file_second not in temp_filename_list:\n",
+    "#                         image_second = filename_hash[file_second]\n",
+    "\n",
+    "#                         hamming_distance = image_first - image_second\n",
+    "\n",
+    "#                         if hamming_distance <= distance:\n",
+    "#                             temp_similar_list.append(file_second)\n",
+    "#                             temp_filename_list.append(file_second)\n",
+    "\n",
+    "#                 if len(temp_similar_list) > 1:\n",
+    "#                     self.similar_group_dict[num] = temp_similar_list\n",
+    "\n",
+    "#                     for _item in temp_similar_list[1:]:\n",
+    "#                         self.remove_filename_list.append(_item)\n",
+    "\n",
+    "#                     num = num + 1\n",
+    "\n",
+    "#             ############\n",
+    "#             # print\n",
+    "\n",
+    "#             num_duplicate = len(self.remove_filename_list)\n",
+    "#             num_all = len(filename_hash)\n",
+    "#             percentage = np.round(num_duplicate / num_all * 100, 2)\n",
+    "\n",
+    "#             print(\n",
+    "#                 \"There are {} similar images in distance from {} images which is around {} %.\".format(\n",
+    "#                     num_duplicate, num_all, percentage\n",
+    "#                 )\n",
+    "#             )\n",
+    "\n",
+    "#             return self.remove_filename_list, self.similar_group_dict\n",
+    "\n",
+    "#     #################################################################\n",
+    "#     # Get\n",
+    "#     def get(self):\n",
+    "#         return self.similar_group_dict, self.remove_filename_list\n",
+    "\n",
+    "#     #################################################################\n",
+    "#     # Show\n",
+    "#     def show(self, max_sample_case=1, max_sample_each_case=1, figsize=(20, 20)):\n",
+    "#         try:\n",
+    "#             get_ipython\n",
+    "\n",
+    "#             # nrow = 3\n",
+    "#             # ncol = 3\n",
+    "\n",
+    "#             # fig, axs = plt.subplots(5, 5, figsize = figsize)\n",
+    "\n",
+    "#             for key_group in self.similar_group_dict:\n",
+    "#                 for filename in self.similar_group_dict[keygroup]:\n",
+    "#                     file_path = os.path.join(self.current_path, filename)\n",
+    "#                     image = Image.open(os.path.join(file_path))\n",
+    "\n",
+    "#                     # col = -1\n",
+    "\n",
+    "#                     # row = index%5\n",
+    "#                     # if row == 0:\n",
+    "#                     #     col = col + 1\n",
+    "#                     # axs[row,col].imshow(np.array(image))\n",
+    "#                     # axs[row,col].set_title('Predict as {}, Actual {}'.format(wrong[0], right))\n",
+    "#                     # axs[row,col].grid(False)\n",
+    "\n",
+    "#             plt.show()\n",
+    "\n",
+    "#         except:\n",
+    "#             print(\"Please run it in notebook\")\n",
+    "\n",
+    "#     #################################################################\n",
+    "#     # Remove\n",
+    "#     def remove_in_folder(self):\n",
+    "#         for filename in self.remove_filename_list:\n",
+    "#             file_path = os.path.join(self.current_path, filename)\n",
+    "#             os.remove(file_path)\n",
+    "\n",
+    "\n",
+    "# # Credit: https://medium.com/@urvisoni/removing-duplicate-images-through-python-23c5fdc7479e"
    ]
   },
   {
@@ -444,7 +473,7 @@
    },
    "outputs": [],
    "source": [
-    "pic_path = './image_data'"
+    "pic_path = \"./image_data\""
    ]
   },
   {
@@ -498,7 +527,9 @@
     "%%time\n",
     "\n",
     "sim_dups = ImageDuplicate(pic_path)\n",
-    "similar_group_dict, duplicate_list, non_duplicate_list = sim_dups.find_similar(hash_method = 'phash', distance = 20, hash_size = 16)"
+    "similar_group_dict, duplicate_list, non_duplicate_list = sim_dups.find_similar(\n",
+    "    hash_method=\"phash\", distance=20, hash_size=16\n",
+    ")"
    ]
   },
   {
@@ -528,7 +559,7 @@
     }
    ],
    "source": [
-    "ShowImageDuplicate(pic_path,similar_group_dict).show_all()"
+    "ShowImageDuplicate(pic_path, similar_group_dict).show_all()"
    ]
   },
   {
@@ -558,7 +589,7 @@
     }
    ],
    "source": [
-    "ShowImageDuplicate(pic_path,similar_group_dict).show_group(2)"
+    "ShowImageDuplicate(pic_path, similar_group_dict).show_group(2)"
    ]
   },
   {
@@ -579,7 +610,7 @@
    },
    "outputs": [],
    "source": [
-    "my_dup = duplicate(pic_path)"
+    "# my_dup = duplicate(pic_path)"
    ]
   },
   {
@@ -603,9 +634,9 @@
     }
    ],
    "source": [
-    "%%time\n",
+    "# %%time\n",
     "\n",
-    "remove_list, similar_group = my_dup.find(mode = 'exact')"
+    "# remove_list, similar_group = my_dup.find(mode=\"exact\")"
    ]
   },
   {
@@ -630,7 +661,7 @@
     }
    ],
    "source": [
-    "remove_list"
+    "# remove_list"
    ]
   },
   {
@@ -657,7 +688,7 @@
     }
    ],
    "source": [
-    "similar_group"
+    "# similar_group"
    ]
   },
   {
@@ -682,9 +713,9 @@
     }
    ],
    "source": [
-    "%%time\n",
+    "# %%time\n",
     "\n",
-    "remove_list, similar_group = my_dup.find(mode = 'similar', distance = 0)"
+    "# remove_list, similar_group = my_dup.find(mode=\"similar\", distance=0)"
    ]
   },
   {
@@ -704,7 +735,7 @@
     }
    ],
    "source": [
-    "remove_list"
+    "# remove_list"
    ]
   },
   {
@@ -726,7 +757,7 @@
     }
    ],
    "source": [
-    "similar_group"
+    "# similar_group"
    ]
   },
   {
@@ -749,7 +780,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "pic_path = './image_data'"
+    "pic_path = \"./image_data\""
    ]
   },
   {
@@ -819,7 +850,7 @@
     }
    ],
    "source": [
-    "ShowImageDuplicate(pic_path,similar_group_dict).show_all()"
+    "ShowImageDuplicate(pic_path, similar_group_dict).show_all()"
    ]
   }
  ],
diff --git a/examples/images/grouping.ipynb b/examples/images/grouping.ipynb
new file mode 100644
index 0000000..a610084
--- /dev/null
+++ b/examples/images/grouping.ipynb
@@ -0,0 +1,19 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "language_info": {
+   "name": "python"
+  },
+  "orig_nbformat": 4
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/examples/text/cleansing.ipynb b/examples/text/cleansing.ipynb
index 31280db..0c07cc3 100644
--- a/examples/text/cleansing.ipynb
+++ b/examples/text/cleansing.ipynb
@@ -36,7 +36,7 @@
     "    text: python string.\r\n",
     "Returns:\r\n",
     "    A python string.\r\n",
-    "\"\"\""
+    "\"\"\"\r"
    ]
   },
   {
@@ -47,67 +47,203 @@
    "source": [
     "import re\r\n",
     "\r\n",
-    "class TextCleansing():\r\n",
+    "\r\n",
+    "class TextCleansing:\r\n",
     "    def http_https(text: str) -> str:\r\n",
-    "        text = re.sub(r'https\\S+', '', str(text))\r\n",
-    "        text = re.sub(r'http\\S+', '', str(text))\r\n",
+    "        text = re.sub(r\"https\\S+\", \"\", str(text))\r\n",
+    "        text = re.sub(r\"http\\S+\", \"\", str(text))\r\n",
     "        return text\r\n",
     "\r\n",
     "    # Remove new line (\\n) and tab space (\\t)\r\n",
     "    def new_line(text: str) -> str:\r\n",
-    "        text = str(text).replace('\\n',' ')\r\n",
+    "        text = str(text).replace(\"\\n\", \" \")\r\n",
     "        return text\r\n",
     "\r\n",
     "    def tab_space(text: str) -> str:\r\n",
-    "        text = str(text).replace('\\t',' ')\r\n",
+    "        text = str(text).replace(\"\\t\", \" \")\r\n",
     "        return text\r\n",
     "\r\n",
     "    # Remove hashtag and line@ id\r\n",
     "    def hashtag(text: str) -> str:\r\n",
-    "        text = re.sub(r'#[A-Za-z0-9ก-๙]+', ' ', str(text))\r\n",
-    "        text = re.sub(r'@[A-Za-z0-9ก-๙]+', ' ', str(text))\r\n",
+    "        text = re.sub(r\"#[A-Za-z0-9ก-๙]+\", \" \", str(text))\r\n",
+    "        text = re.sub(r\"@[A-Za-z0-9ก-๙]+\", \" \", str(text))\r\n",
     "        return text\r\n",
     "\r\n",
     "    # Clean Symbol\r\n",
-    "    def punctuation(text:str, except_punct:list=[]) -> str:\r\n",
-    "        puncts = [',', '\"', ':', ')', '(', '-', '!', '?', '|', ';', \"'\", '$', '&', '[', ']', '>', '%', '=', '#', '*', '+', '\\\\', '•',  '~', '@', '£',\r\n",
-    "            '·', '_', '{', '}', '©', '^', '®', '`',  '<', '→', '°', '€', '™', '›',  '♥', '←', '×', '§', '″', '′', 'Â', '█', '½', 'à', '…', '\\xa0', '\\t',\r\n",
-    "            '“', '★', '”', '–', '●', 'â', '►', '−', '¢', '²', '¬', '░', '¶', '↑', '±', '¿', '▾', '═', '¦', '║', '―', '¥', '▓', '—', '‹', '─', '\\u3000', '\\u202f',\r\n",
-    "            '▒', '：', '¼', '⊕', '▼', '▪', '†', '■', '’', '▀', '¨', '▄', '♫', '☆', 'é', '¯', '♦', '¤', '▲', 'è', '¸', '¾', 'Ã', '⋅', '‘', '∞', '«',\r\n",
-    "            '∙', '）', '↓', '、', '│', '（', '»', '，', '♪', '╩', '╚', '³', '・', '╦', '╣', '╔', '╗', '▬', '❤', 'ï', 'Ø', '¹', '≤', '‡', '√', '•', '!']\r\n",
+    "    def punctuation(text: str, except_punct: list = []) -> str:\r\n",
+    "        puncts = [\r\n",
+    "            \",\",\r\n",
+    "            '\"',\r\n",
+    "            \":\",\r\n",
+    "            \")\",\r\n",
+    "            \"(\",\r\n",
+    "            \"-\",\r\n",
+    "            \"!\",\r\n",
+    "            \"?\",\r\n",
+    "            \"|\",\r\n",
+    "            \";\",\r\n",
+    "            \"'\",\r\n",
+    "            \"$\",\r\n",
+    "            \"&\",\r\n",
+    "            \"[\",\r\n",
+    "            \"]\",\r\n",
+    "            \">\",\r\n",
+    "            \"%\",\r\n",
+    "            \"=\",\r\n",
+    "            \"#\",\r\n",
+    "            \"*\",\r\n",
+    "            \"+\",\r\n",
+    "            \"\\\\\",\r\n",
+    "            \"•\",\r\n",
+    "            \"~\",\r\n",
+    "            \"@\",\r\n",
+    "            \"£\",\r\n",
+    "            \"·\",\r\n",
+    "            \"_\",\r\n",
+    "            \"{\",\r\n",
+    "            \"}\",\r\n",
+    "            \"©\",\r\n",
+    "            \"^\",\r\n",
+    "            \"®\",\r\n",
+    "            \"`\",\r\n",
+    "            \"<\",\r\n",
+    "            \"→\",\r\n",
+    "            \"°\",\r\n",
+    "            \"€\",\r\n",
+    "            \"™\",\r\n",
+    "            \"›\",\r\n",
+    "            \"♥\",\r\n",
+    "            \"←\",\r\n",
+    "            \"×\",\r\n",
+    "            \"§\",\r\n",
+    "            \"″\",\r\n",
+    "            \"′\",\r\n",
+    "            \"Â\",\r\n",
+    "            \"█\",\r\n",
+    "            \"½\",\r\n",
+    "            \"à\",\r\n",
+    "            \"…\",\r\n",
+    "            \"\\xa0\",\r\n",
+    "            \"\\t\",\r\n",
+    "            \"“\",\r\n",
+    "            \"★\",\r\n",
+    "            \"”\",\r\n",
+    "            \"–\",\r\n",
+    "            \"●\",\r\n",
+    "            \"â\",\r\n",
+    "            \"►\",\r\n",
+    "            \"−\",\r\n",
+    "            \"¢\",\r\n",
+    "            \"²\",\r\n",
+    "            \"¬\",\r\n",
+    "            \"░\",\r\n",
+    "            \"¶\",\r\n",
+    "            \"↑\",\r\n",
+    "            \"±\",\r\n",
+    "            \"¿\",\r\n",
+    "            \"▾\",\r\n",
+    "            \"═\",\r\n",
+    "            \"¦\",\r\n",
+    "            \"║\",\r\n",
+    "            \"―\",\r\n",
+    "            \"¥\",\r\n",
+    "            \"▓\",\r\n",
+    "            \"—\",\r\n",
+    "            \"‹\",\r\n",
+    "            \"─\",\r\n",
+    "            \"\\u3000\",\r\n",
+    "            \"\\u202f\",\r\n",
+    "            \"▒\",\r\n",
+    "            \"：\",\r\n",
+    "            \"¼\",\r\n",
+    "            \"⊕\",\r\n",
+    "            \"▼\",\r\n",
+    "            \"▪\",\r\n",
+    "            \"†\",\r\n",
+    "            \"■\",\r\n",
+    "            \"’\",\r\n",
+    "            \"▀\",\r\n",
+    "            \"¨\",\r\n",
+    "            \"▄\",\r\n",
+    "            \"♫\",\r\n",
+    "            \"☆\",\r\n",
+    "            \"é\",\r\n",
+    "            \"¯\",\r\n",
+    "            \"♦\",\r\n",
+    "            \"¤\",\r\n",
+    "            \"▲\",\r\n",
+    "            \"è\",\r\n",
+    "            \"¸\",\r\n",
+    "            \"¾\",\r\n",
+    "            \"Ã\",\r\n",
+    "            \"⋅\",\r\n",
+    "            \"‘\",\r\n",
+    "            \"∞\",\r\n",
+    "            \"«\",\r\n",
+    "            \"∙\",\r\n",
+    "            \"）\",\r\n",
+    "            \"↓\",\r\n",
+    "            \"、\",\r\n",
+    "            \"│\",\r\n",
+    "            \"（\",\r\n",
+    "            \"»\",\r\n",
+    "            \"，\",\r\n",
+    "            \"♪\",\r\n",
+    "            \"╩\",\r\n",
+    "            \"╚\",\r\n",
+    "            \"³\",\r\n",
+    "            \"・\",\r\n",
+    "            \"╦\",\r\n",
+    "            \"╣\",\r\n",
+    "            \"╔\",\r\n",
+    "            \"╗\",\r\n",
+    "            \"▬\",\r\n",
+    "            \"❤\",\r\n",
+    "            \"ï\",\r\n",
+    "            \"Ø\",\r\n",
+    "            \"¹\",\r\n",
+    "            \"≤\",\r\n",
+    "            \"‡\",\r\n",
+    "            \"√\",\r\n",
+    "            \"•\",\r\n",
+    "            \"!\",\r\n",
+    "        ]\r\n",
     "\r\n",
     "        final_puncts = [ele for ele in puncts if ele not in except_punct]\r\n",
     "\r\n",
     "        for punct in final_puncts:\r\n",
-    "            text = text.replace(punct,' ')\r\n",
+    "            text = text.replace(punct, \" \")\r\n",
     "        return text\r\n",
     "\r\n",
     "    # Remove emoji\r\n",
     "    def emoji(text) -> str:\r\n",
-    "        emoj = re.compile(\"[\"\r\n",
-    "            u\"\\U0001F600-\\U0001F64F\"  # emoticons\r\n",
-    "            u\"\\U0001F300-\\U0001F5FF\"  # symbols & pictographs\r\n",
-    "            u\"\\U0001F680-\\U0001F6FF\"  # transport & map symbols\r\n",
-    "            u\"\\U0001F1E0-\\U0001F1FF\"  # flags (iOS)\r\n",
-    "            u\"\\U00002500-\\U00002BEF\"  # chinese char\r\n",
-    "            u\"\\U00002702-\\U000027B0\"\r\n",
-    "            u\"\\U00002702-\\U000027B0\"\r\n",
-    "            u\"\\U000024C2-\\U0001F251\"\r\n",
-    "            u\"\\U0001f926-\\U0001f937\"\r\n",
-    "            u\"\\U00010000-\\U0010ffff\"\r\n",
-    "            u\"\\u2640-\\u2642\" \r\n",
-    "            u\"\\u2600-\\u2B55\"\r\n",
-    "            u\"\\u200d\"\r\n",
-    "            u\"\\u23cf\"\r\n",
-    "            u\"\\u23e9\"\r\n",
-    "            u\"\\u231a\"\r\n",
-    "            u\"\\ufe0f\"  # dingbats\r\n",
-    "            u\"\\u3030\"\r\n",
-    "                        \"]+\", re.UNICODE)\r\n",
-    "        return re.sub(emoj, ' ', text)\r\n",
+    "        emoj = re.compile(\r\n",
+    "            \"[\"\r\n",
+    "            \"\\U0001f600-\\U0001f64f\"  # emoticons\r\n",
+    "            \"\\U0001f300-\\U0001f5ff\"  # symbols & pictographs\r\n",
+    "            \"\\U0001f680-\\U0001f6ff\"  # transport & map symbols\r\n",
+    "            \"\\U0001f1e0-\\U0001f1ff\"  # flags (iOS)\r\n",
+    "            \"\\U00002500-\\U00002bef\"  # chinese char\r\n",
+    "            \"\\U00002702-\\U000027b0\"\r\n",
+    "            \"\\U00002702-\\U000027b0\"\r\n",
+    "            \"\\U000024c2-\\U0001f251\"\r\n",
+    "            \"\\U0001f926-\\U0001f937\"\r\n",
+    "            \"\\U00010000-\\U0010ffff\"\r\n",
+    "            \"\\u2640-\\u2642\"\r\n",
+    "            \"\\u2600-\\u2b55\"\r\n",
+    "            \"\\u200d\"\r\n",
+    "            \"\\u23cf\"\r\n",
+    "            \"\\u23e9\"\r\n",
+    "            \"\\u231a\"\r\n",
+    "            \"\\ufe0f\"  # dingbats\r\n",
+    "            \"\\u3030\"\r\n",
+    "            \"]+\",\r\n",
+    "            re.UNICODE,\r\n",
+    "        )\r\n",
+    "        return re.sub(emoj, \" \", text)\r\n",
     "\r\n",
     "    def redundant_space(text) -> str:\r\n",
-    "        return ' '.join(text.split())"
+    "        return \" \".join(text.split())\r"
    ]
   },
   {
@@ -145,7 +281,7 @@
    "source": [
     "my_text = 'ร้าน\\n\\n\\n\\n\\n\\n   #ของมันต้องมี   \\t2.เราจะประกาศผลผู้โชคดีภายใน 30 กันยายน นี้ (โดยการ inbox กลับไป)\\n      3.ใช้ได้ทั้งมากินที่ร้านหรือให้ไปส่งที่บ้านก็ได้ (ไม่รวมค่าส่ง)      **ร้านเปิด 11.00-23.00 (ครัวปิด 22.00)**   \\n---------------------------------------------------------   \\nสำหรับ Delivery \\n\\t👇วิธีการสั่ง👇   📱สั่งผ่าน Lineman ได้เลยนะครับ หาคำว่า \"คนมันกุ้ง\" ง่ายๆอิ่มอร่อยสบายอยุ่บ้านได้เลยจ้า    หรือจะโทร ไลน์ ผ่านให้ทางร้านจัดการให้ก็ได้ครับ ** รับออเดอร์ 11.00 - 22.00 เท่านั้นนะครับ **   \\n---------------------------------------------------------   \\n🦐 Follow us 🦐   Line :     Facebook : konmunkung   โทร : 064 414 7844      แผนที่ร้าน : ร้านอยู่ในโครงการ Tree square ทาวน์ อิน ทาวน์   📌📌https://goo.gl/maps/DXTAh5Z4jds                 '\r\n",
     "\r\n",
-    "print(my_text)"
+    "print(my_text)\r"
    ]
   },
   {
@@ -163,7 +299,7 @@
     }
    ],
    "source": [
-    "my_text"
+    "my_text\r"
    ]
   },
   {
@@ -181,7 +317,7 @@
     }
    ],
    "source": [
-    "TextCleansing.http_https(my_text)"
+    "TextCleansing.http_https(my_text)\r"
    ]
   },
   {
@@ -199,7 +335,7 @@
     }
    ],
    "source": [
-    "TextCleansing.new_line(my_text)"
+    "TextCleansing.new_line(my_text)\r"
    ]
   },
   {
@@ -217,7 +353,7 @@
     }
    ],
    "source": [
-    "TextCleansing.new_line(my_text)"
+    "TextCleansing.new_line(my_text)\r"
    ]
   },
   {
@@ -235,7 +371,7 @@
     }
    ],
    "source": [
-    "TextCleansing.tab_space(my_text)"
+    "TextCleansing.tab_space(my_text)\r"
    ]
   },
   {
@@ -253,7 +389,7 @@
     }
    ],
    "source": [
-    "TextCleansing.hashtag(my_text)"
+    "TextCleansing.hashtag(my_text)\r"
    ]
   },
   {
@@ -271,7 +407,7 @@
     }
    ],
    "source": [
-    "TextCleansing.punctuation(my_text, except_punct=['('])"
+    "TextCleansing.punctuation(my_text, except_punct=[\"(\"])\r"
    ]
   },
   {
@@ -289,7 +425,7 @@
     }
    ],
    "source": [
-    "TextCleansing.emoji(my_text)"
+    "TextCleansing.emoji(my_text)\r"
    ]
   },
   {
@@ -307,7 +443,7 @@
     }
    ],
    "source": [
-    "TextCleansing.redundant_space(my_text)"
+    "TextCleansing.redundant_space(my_text)\r"
    ]
   },
   {
@@ -323,7 +459,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "from yellowduck.preprocessing.text import TextCleansing"
+    "from yellowduck.preprocessing.text import TextCleansing\r"
    ]
   },
   {
@@ -354,7 +490,7 @@
    "source": [
     "text = 'ร้าน\\n\\n\\n\\n\\n\\n   #ของมันต้องมี   \\t2.เราจะประกาศผลผู้โชคดีภายใน 30 กันยายน นี้ (โดยการ inbox กลับไป)\\n      3.ใช้ได้ทั้งมากินที่ร้านหรือให้ไปส่งที่บ้านก็ได้ (ไม่รวมค่าส่ง)      **ร้านเปิด 11.00-23.00 (ครัวปิด 22.00)**   \\n---------------------------------------------------------   \\nสำหรับ Delivery \\n\\t👇วิธีการสั่ง👇   📱สั่งผ่าน Lineman ได้เลยนะครับ หาคำว่า \"คนมันกุ้ง\" ง่ายๆอิ่มอร่อยสบายอยุ่บ้านได้เลยจ้า    หรือจะโทร ไลน์ ผ่านให้ทางร้านจัดการให้ก็ได้ครับ ** รับออเดอร์ 11.00 - 22.00 เท่านั้นนะครับ **   \\n---------------------------------------------------------   \\n🦐 Follow us 🦐   Line :     Facebook : konmunkung   โทร : 064 414 7844      แผนที่ร้าน : ร้านอยู่ในโครงการ Tree square ทาวน์ อิน ทาวน์   📌📌https://goo.gl/maps/DXTAh5Z4jds                 '\r\n",
     "\r\n",
-    "print(text)"
+    "print(text)\r"
    ]
   },
   {
@@ -372,7 +508,7 @@
     }
    ],
    "source": [
-    "TextCleansing.http_https(text)\r\n"
+    "TextCleansing.http_https(text)\r"
    ]
   },
   {
@@ -390,7 +526,7 @@
     }
    ],
    "source": [
-    "TextCleansing.new_line(text)"
+    "TextCleansing.new_line(text)\r"
    ]
   },
   {
@@ -408,7 +544,7 @@
     }
    ],
    "source": [
-    "TextCleansing.tab_space(text)\r\n"
+    "TextCleansing.tab_space(text)\r"
    ]
   },
   {
@@ -426,7 +562,7 @@
     }
    ],
    "source": [
-    "TextCleansing.hashtag(text)\r\n"
+    "TextCleansing.hashtag(text)\r"
    ]
   },
   {
@@ -444,7 +580,7 @@
     }
    ],
    "source": [
-    "TextCleansing.punctuation(text)\r\n"
+    "TextCleansing.punctuation(text)\r"
    ]
   },
   {
@@ -462,7 +598,7 @@
     }
    ],
    "source": [
-    "TextCleansing.emoji(text)\r\n"
+    "TextCleansing.emoji(text)\r"
    ]
   },
   {
@@ -480,7 +616,7 @@
     }
    ],
    "source": [
-    "TextCleansing.redundant_space(text)"
+    "TextCleansing.redundant_space(text)\r"
    ]
   }
  ],
@@ -500,4 +636,4 @@
  },
  "nbformat": 4,
  "nbformat_minor": 2
-}
\ No newline at end of file
+}
diff --git a/examples/text/grouping.ipynb b/examples/text/grouping.ipynb
index 6b4f0c6..37d1110 100644
--- a/examples/text/grouping.ipynb
+++ b/examples/text/grouping.ipynb
@@ -42,13 +42,30 @@
     }
    ],
    "source": [
-    "text_data = ['กระเทย','กะเทย','อินเตอร์เน็ต','อินเทอร์เน็ต',\n",
-    "            'กระเพรา','กะเพรา','กระทันหัน','กะทันหัน',\n",
-    "            'แกงกระหรี่','แกงกะหรี่','ปะแป้ง','ประแป้ง',\n",
-    "            'ปลากระพง','ปลากะพง','ไอศครีม','ไอศกรีม',\n",
-    "            'ริดรอน','ลิดรอน','บุคคลากร','บุคลากร']\n",
+    "text_data = [\n",
+    "    \"กระเทย\",\n",
+    "    \"กะเทย\",\n",
+    "    \"อินเตอร์เน็ต\",\n",
+    "    \"อินเทอร์เน็ต\",\n",
+    "    \"กระเพรา\",\n",
+    "    \"กะเพรา\",\n",
+    "    \"กระทันหัน\",\n",
+    "    \"กะทันหัน\",\n",
+    "    \"แกงกระหรี่\",\n",
+    "    \"แกงกะหรี่\",\n",
+    "    \"ปะแป้ง\",\n",
+    "    \"ประแป้ง\",\n",
+    "    \"ปลากระพง\",\n",
+    "    \"ปลากะพง\",\n",
+    "    \"ไอศครีม\",\n",
+    "    \"ไอศกรีม\",\n",
+    "    \"ริดรอน\",\n",
+    "    \"ลิดรอน\",\n",
+    "    \"บุคคลากร\",\n",
+    "    \"บุคลากร\",\n",
+    "]\n",
     "\n",
-    "TextGrouping(text_data, distance = 2, minimum_members = 2).get_group()"
+    "TextGrouping(text_data, distance=2, minimum_members=2).get_group()"
    ]
   },
   {
@@ -83,13 +100,30 @@
     }
    ],
    "source": [
-    "text_data = ['กระเทย','กะเทย','อินเตอร์เน็ต','อินเทอร์เน็ต',\n",
-    "            'กระเพรา','กะเพรา','กระทันหัน','กะทันหัน',\n",
-    "            'แกงกระหรี่','แกงกะหรี่','ปะแป้ง','ประแป้ง',\n",
-    "            'ปลากระพง','ปลากะพง','ไอศครีม','ไอศกรีม',\n",
-    "            'ริดรอน','ลิดรอน','บุคคลากร','บุคลากร']\n",
+    "text_data = [\n",
+    "    \"กระเทย\",\n",
+    "    \"กะเทย\",\n",
+    "    \"อินเตอร์เน็ต\",\n",
+    "    \"อินเทอร์เน็ต\",\n",
+    "    \"กระเพรา\",\n",
+    "    \"กะเพรา\",\n",
+    "    \"กระทันหัน\",\n",
+    "    \"กะทันหัน\",\n",
+    "    \"แกงกระหรี่\",\n",
+    "    \"แกงกะหรี่\",\n",
+    "    \"ปะแป้ง\",\n",
+    "    \"ประแป้ง\",\n",
+    "    \"ปลากระพง\",\n",
+    "    \"ปลากะพง\",\n",
+    "    \"ไอศครีม\",\n",
+    "    \"ไอศกรีม\",\n",
+    "    \"ริดรอน\",\n",
+    "    \"ลิดรอน\",\n",
+    "    \"บุคคลากร\",\n",
+    "    \"บุคลากร\",\n",
+    "]\n",
     "\n",
-    "TextGrouping(text_data, distance = 4, minimum_members = 2).get_group()"
+    "TextGrouping(text_data, distance=4, minimum_members=2).get_group()"
    ]
   },
   {
@@ -124,18 +158,30 @@
     }
    ],
    "source": [
-    "text_data = ['กงกรรมกงเกวียน','กงเกวียนกำเกวียน',\n",
-    "            'เลือดกลบปาก','เลือดกบปาก',\n",
-    "            'ผีซ้ำด้ามพลอย','ผีซ้ำด้ำพลอย',\n",
-    "            'พิธีรีตรอง','พิธีรีตอง',\n",
-    "            'ต่าง ๆ นา ๆ','ต่าง ๆ นานา',\n",
-    "            'นานาพันธุ์','นานาพรรณ',\n",
-    "            'ผลัดวันประกันพรุ่ง','ผัดวันประกันพรุ่ง',\n",
-    "            'แก้ผ้าเอาหน้ารอด','ขายผ้าเอาหน้ารอด',\n",
-    "            'แปรพรรค','แปรพักตร์',\n",
-    "            'ลูกเด็กเล็กแดง','ลูกเล็กเด็กแดง',]\n",
+    "text_data = [\n",
+    "    \"กงกรรมกงเกวียน\",\n",
+    "    \"กงเกวียนกำเกวียน\",\n",
+    "    \"เลือดกลบปาก\",\n",
+    "    \"เลือดกบปาก\",\n",
+    "    \"ผีซ้ำด้ามพลอย\",\n",
+    "    \"ผีซ้ำด้ำพลอย\",\n",
+    "    \"พิธีรีตรอง\",\n",
+    "    \"พิธีรีตอง\",\n",
+    "    \"ต่าง ๆ นา ๆ\",\n",
+    "    \"ต่าง ๆ นานา\",\n",
+    "    \"นานาพันธุ์\",\n",
+    "    \"นานาพรรณ\",\n",
+    "    \"ผลัดวันประกันพรุ่ง\",\n",
+    "    \"ผัดวันประกันพรุ่ง\",\n",
+    "    \"แก้ผ้าเอาหน้ารอด\",\n",
+    "    \"ขายผ้าเอาหน้ารอด\",\n",
+    "    \"แปรพรรค\",\n",
+    "    \"แปรพักตร์\",\n",
+    "    \"ลูกเด็กเล็กแดง\",\n",
+    "    \"ลูกเล็กเด็กแดง\",\n",
+    "]\n",
     "\n",
-    "TextGrouping(text_data, distance = 5, minimum_members = 2).get_group()"
+    "TextGrouping(text_data, distance=5, minimum_members=2).get_group()"
    ]
   },
   {
diff --git a/requirements-dev.txt b/requirements-dev.txt
new file mode 100644
index 0000000..6837283
--- /dev/null
+++ b/requirements-dev.txt
@@ -0,0 +1 @@
+pre-commit==3.7.0
diff --git a/setup.py b/setup.py
index 8e5b85d..1de6a2d 100644
--- a/setup.py
+++ b/setup.py
@@ -22,7 +22,7 @@
 setuptools.setup(
     name="yellowduck",
     version="1.1.0",
-    author="Chalat Phumphiraratthaya",
+    author="Chalat Ph.",
     author_email="chalat.phum@gmail.com",
     description="Data Science Toolbox for everyone",
     long_description=long_description,
@@ -36,11 +36,6 @@
     ],
     python_requires=">=3.6",
     install_requires=[
-        "black>=22.12.0",
-        "flake8>=6.0.0",
-        "pep8-naming>=0.13.3",
-        "isort>=5.11.5",
-        "pre-commit>=2.21.0",
         "scikit-learn>=1.0.0",
     ],
     extras_require=extras_require,
diff --git a/yellowduck/etc/id_card_validator.py b/yellowduck/etc/id_card_validator.py
index 54e206c..ff17c5c 100644
--- a/yellowduck/etc/id_card_validator.py
+++ b/yellowduck/etc/id_card_validator.py
@@ -1,5 +1,5 @@
-from abc import ABC, abstractmethod
 import re
+from abc import ABC, abstractmethod
 
 
 class IDCardStrategy(ABC):
diff --git a/yellowduck/etc/tomek.py b/yellowduck/etc/tomek.py
new file mode 100644
index 0000000..dc022eb
--- /dev/null
+++ b/yellowduck/etc/tomek.py
@@ -0,0 +1,89 @@
+# Get it from https://github.com/scikit-learn-contrib/imbalanced-learn/blob/master/imblearn/under_sampling/_prototype_selection/_tomek_links.py
+# But return the third argument, removed_indices.
+
+"""Class to perform under-sampling by removing Tomek's links."""
+
+# Authors: Guillaume Lemaitre <g.lemaitre58@gmail.com>
+#          Fernando Nogueira
+#          Christos Aridas
+# License: MIT
+
+import numbers
+from typing import Union
+
+import numpy as np
+from imblearn.under_sampling.base import BaseCleaningSampler
+from imblearn.utils import _safe_indexing
+from sklearn.neighbors import NearestNeighbors
+
+
+class TomekLinks(BaseCleaningSampler):
+    """Under-sampling by removing Tomek's links."""
+
+    _parameter_constraints: dict = {
+        **BaseCleaningSampler._parameter_constraints,
+        "n_jobs": [numbers.Integral, None],
+    }
+
+    def __init__(self, *, sampling_strategy="auto", n_jobs=None):
+        super().__init__(sampling_strategy=sampling_strategy)
+        self.n_jobs = n_jobs
+
+    @staticmethod
+    def is_tomek(
+        y: np.ndarray, nn_index: np.ndarray, class_type: Union[int, str]
+    ) -> np.ndarray:
+        """Detect if samples are Tomek's link using vectorized operations.
+
+        Parameters
+        ----------
+        y : np.ndarray
+            Target vector of the data set.
+        nn_index : np.ndarray
+            Index of the closest nearest neighbour for each sample.
+        class_type : int or str
+            Label of the minority class.
+
+        Returns
+        -------
+        np.ndarray
+            Boolean array indicating Tomek links (True for Tomek link).
+        """
+        links = np.zeros(len(y), dtype=bool)
+
+        # Get mask for excluded classes (majority class)
+        excluded_mask = np.isin(y, class_type, invert=True)
+
+        # Find Tomek links: nearest neighbors of each other and different classes
+        different_class_mask = y[nn_index] != y
+        reverse_neighbor_mask = nn_index[nn_index] == np.arange(len(y))
+
+        # Combine conditions to identify Tomek links
+        links = np.logical_and(different_class_mask, reverse_neighbor_mask)
+        links[excluded_mask] = False  # Exclude classes not in the class_type
+
+        return links
+
+    def _fit_resample(self, X: np.ndarray, y: np.ndarray):
+        """Apply Tomek links under-sampling."""
+        # Find the nearest neighbour of every point
+        nn = NearestNeighbors(n_neighbors=2, n_jobs=self.n_jobs)
+        nn.fit(X)
+        nns = nn.kneighbors(X, return_distance=False)[:, 1]
+
+        # Identify Tomek links
+        links = self.is_tomek(y, nns, self.sampling_strategy_)
+
+        # Store indices of retained and removed samples
+        self.sample_indices_ = np.flatnonzero(~links)
+        removed_indices = np.flatnonzero(links)
+
+        # Return the resampled dataset
+        return (
+            _safe_indexing(X, self.sample_indices_),
+            _safe_indexing(y, self.sample_indices_),
+            removed_indices,
+        )
+
+    def _more_tags(self):
+        return {"sample_indices": True}
diff --git a/yellowduck/etc/torch_dbscan.py b/yellowduck/etc/torch_dbscan.py
new file mode 100644
index 0000000..41a64b1
--- /dev/null
+++ b/yellowduck/etc/torch_dbscan.py
@@ -0,0 +1,71 @@
+import torch
+from tqdm import tqdm
+
+
+def torch_dbscan(X, eps, min_samples):
+    """
+    https://www.geeksforgeeks.org/pytorch-for-unsupervised-clustering/#dbscan-clustering
+    with some modification
+
+    # DBSCAN parameters
+    # eps = 0.1
+    # min_samples = 5
+
+    # Perform clustering
+    # labels = torch_dbscan(features, eps, min_samples)
+    """
+    n_samples = X.shape[0]
+    labels = torch.full((n_samples,), -1, dtype=torch.int)
+
+    # Initialize cluster label and visited flags
+    cluster_label = -1
+    visited = torch.zeros(n_samples, dtype=torch.bool)
+
+    # Iterate over each point
+    for i in tqdm(range(n_samples)):
+        if visited[i]:
+            continue
+        visited[i] = True
+
+        # Find neighbors
+        neighbors_cond = torch.nonzero(euclidean_distance(X[i], X) < eps)
+
+        if neighbors_cond.shape[0] < 2:
+            continue
+
+        neighbors = neighbors_cond.squeeze()
+
+        # import pdb; pdb.set_trace()
+
+        if neighbors.shape[0] < min_samples:
+            # Label as noise
+            labels[i] = -1
+        else:
+            # Expand cluster
+            cluster_label += 1
+            labels[i] = cluster_label
+            expand_cluster(
+                X, labels, visited, neighbors, cluster_label, eps, min_samples
+            )
+
+    return labels
+
+
+def expand_cluster(X, labels, visited, neighbors, cluster_label, eps, min_samples):
+    i = 0
+    while i < neighbors.shape[0]:
+        neighbor_index = neighbors[i].item()
+        if not visited[neighbor_index]:
+            visited[neighbor_index] = True
+            neighbor_neighbors = torch.nonzero(
+                euclidean_distance(X[neighbor_index], X) < eps
+            ).squeeze()
+            if neighbor_neighbors.shape[0] >= min_samples:
+                neighbors = torch.cat((neighbors, neighbor_neighbors))
+        if labels[neighbor_index] == -1:
+            labels[neighbor_index] = cluster_label
+        i += 1
+
+
+def euclidean_distance(x1, x2):
+    return torch.sqrt(torch.sum((x1 - x2) ** 2, dim=1))
diff --git a/yellowduck/image/grouping.py b/yellowduck/image/grouping.py
index d52b594..82191ff 100644
--- a/yellowduck/image/grouping.py
+++ b/yellowduck/image/grouping.py
@@ -1,10 +1,10 @@
-from abc import ABC, abstractmethod
 import hashlib
+import os
+from abc import ABC, abstractmethod
+
 import imagehash
 import numpy as np
-import os
 import PIL
-
 from utils.similarity import get_similar
 
 
@@ -19,7 +19,7 @@ def get_similar_images_index(self, **kwargs) -> list:
         """
         Find similar images using MD5 hashing method
         """
-        print(f"Using method: Exact Grouping")
+        print("Using method: Exact Grouping")
         images_index = get_similar(self.images_list)
         return images_index
 
@@ -29,7 +29,7 @@ def get_similar_images_index(self, **kwargs) -> list:
         """
         Find similar images using PHash hashing method
         """
-        print(f"Using method: Similar Grouping using PHash")
+        print("Using method: Similar Grouping using PHash")
         images_index = get_similar(self.images_list)
         return images_index
 
@@ -38,17 +38,20 @@ class ImageGrouping:
     def __init__(self):
         pass
 
+    def _hamming_distance(self, x, y):
+        """
+        Implement DBScan to find similarity
+        """
+        i, j = int(x[0]), int(y[0])
+        return abs(self.list_of_hash_images[i] - self.list_of_hash_images[j])
+
     def get_group(self):
         pass
 
 
 class ImageDuplicate:
     def __init__(self, image_folder_path: str):
-        try:  # For development phase only
-            get_ipython
-            self.current_path = os.getcwd()
-        except:  # For production
-            self.current_path = os.path.dirname(os.path.realpath(__file__))
+        self.current_path = os.path.dirname(os.path.realpath(__file__))
         self.current_path = os.path.join(self.current_path, image_folder_path)
 
         self.image_in_folder_list = [
@@ -67,7 +70,7 @@ def __init__(self, image_folder_path: str):
         self.duplicate_list = []  # The rest that not be selected in non_duplicate_list
 
     def find_exact(self):
-        print(f"Using method: Exact Match (MD5)")
+        print("Using method: Exact Match (MD5)")
 
         for image_file in self.image_in_folder_list:
             image_fullpath = os.path.join(self.current_path, image_file)
diff --git a/yellowduck/image/image.py b/yellowduck/image/image.py
index 7596337..56f3d93 100644
--- a/yellowduck/image/image.py
+++ b/yellowduck/image/image.py
@@ -2,26 +2,23 @@
 
 There are 2 approachs.
 1. exact match:     Using Cryptographic hashing algorithms in 'hashlib'
-2. similar match:   Using Perceptual hashing algorithms in 'imagehash' 
+2. similar match:   Using Perceptual hashing algorithms in 'imagehash'
                     and use Hamming distance for finding differrence.
 """
 
-import os
-import PIL
 import hashlib
+import os
+
 import imagehash
-import numpy as np
 import matplotlib.pyplot as plt
+import numpy as np
+import PIL
 
 
 class ImageDuplicate:
     def __init__(self, image_folder_path: str):
         print("This is legacy function. It will be deprecated in the next version.")
-        try:  # For development phase only
-            get_ipython
-            self.current_path = os.getcwd()
-        except:  # For production
-            self.current_path = os.path.dirname(os.path.realpath(__file__))
+        self.current_path = os.path.dirname(os.path.realpath(__file__))
         self.current_path = os.path.join(self.current_path, image_folder_path)
 
         self.image_in_folder_list = [
@@ -40,7 +37,7 @@ def __init__(self, image_folder_path: str):
         self.duplicate_list = []  # The rest that not be selected in non_duplicate_list
 
     def find_exact(self):
-        print(f"Using method: Exact Match (MD5)")
+        print("Using method: Exact Match (MD5)")
 
         for image_file in self.image_in_folder_list:
             image_fullpath = os.path.join(self.current_path, image_file)
@@ -232,52 +229,3 @@ def show_group(self, group_number):
             image = PIL.Image.open(image_path)
             axes.ravel()[index].imshow(image)
         plt.tight_layout()
-
-
-class ShowImageDuplicate:
-    def __init__(self, image_folder_path, group_of_duplicate_dict: dict):
-        self.image_folder_path = image_folder_path
-        self.group_of_duplicate_dict = group_of_duplicate_dict
-
-        self.number_of_group = len(self.group_of_duplicate_dict)
-        print(
-            f"There are {self.number_of_group} of duplicate image.\nUse .show_group(group_number) or .show_all() for all group."
-        )
-
-    def show_all(self):
-        """
-        Show only first 5 images in each group
-        """
-        fig, axes = plt.subplots(nrows=self.number_of_group, ncols=5, figsize=(24, 24))
-        for axis in axes.ravel():
-            axis.set_axis_off()
-        for group_number in np.arange(self.number_of_group):
-            image_list = self.group_of_duplicate_dict[group_number]
-            if len(image_list) > 5:
-                image_list = image_list[:5]
-            for image_number in np.arange(len(image_list)):
-                image_path = os.path.join(
-                    self.image_folder_path, image_list[image_number]
-                )
-                image = PIL.Image.open(image_path)
-                axes[group_number, image_number].imshow(image)
-        plt.tight_layout()
-
-    def show_group(self, group_number):
-        image_list = self.group_of_duplicate_dict[group_number]
-        if len(image_list) < 5:
-            num_col = len(image_list)
-        else:
-            num_col = 5
-        num_row = int(len(image_list) / num_col)
-        mod = len(image_list) % num_col
-        if mod != 0:
-            num_row = num_row + 1
-        fig, axes = plt.subplots(nrows=num_row, ncols=num_col, figsize=(24, 10))
-        for axis in axes.ravel():
-            axis.set_axis_off()
-        for index, image_name in enumerate(image_list):
-            image_path = os.path.join(self.image_folder_path, image_name)
-            image = PIL.Image.open(image_path)
-            axes.ravel()[index].imshow(image)
-        plt.tight_layout()
diff --git a/yellowduck/image/utils.py b/yellowduck/image/utils.py
index 9adab1b..ac1c770 100644
--- a/yellowduck/image/utils.py
+++ b/yellowduck/image/utils.py
@@ -1,6 +1,7 @@
+import os
+
 import matplotlib.pyplot as plt
 import numpy as np
-import os
 import PIL
 
 
diff --git a/yellowduck/text/clean.py b/yellowduck/text/clean.py
new file mode 100644
index 0000000..b78509d
--- /dev/null
+++ b/yellowduck/text/clean.py
@@ -0,0 +1,311 @@
+import re
+import string
+
+import pandas as pd
+import pythainlp
+from preda.logger import logger
+from preda.nlp import utils as nutils
+from tqdm import tqdm
+
+
+def execute(item_names: pd.Series) -> pd.Series:
+    """Process item names by cleaning and normalizing text.
+
+    Args:
+        item_names (pd.Series): A pandas Series containing item names to be processed.
+
+    Returns:
+        pd.Series: A pandas Series containing the processed item names.
+    """
+    tqdm.pandas()
+
+    processed_item_names = item_names.astype("string").progress_apply(
+        nutils.remove_new_line
+    )
+    processed_item_names = processed_item_names.progress_apply(nutils.remove_tab_space)
+    processed_item_names = processed_item_names.progress_apply(nutils.remove_http_https)
+    # processed_item_names = processed_item_names.progress_apply(remove_phone_number)
+    processed_item_names = remove_phone_number(processed_item_names)
+    processed_item_names = processed_item_names.str.lower()
+    processed_item_names = processed_item_names.progress_apply(pythainlp.util.normalize)
+    processed_item_names = processed_item_names.progress_apply(nutils.remove_emoji)
+    # processed_item_names = processed_item_names.progress_apply(remove_digit)
+    processed_item_names = remove_digit(processed_item_names)
+    processed_item_names = processed_item_names.progress_apply(nutils.replace_rep_after)
+    # processed_item_names = processed_item_names.progress_apply(
+    #     efficiently_remove_punctuation
+    # )
+    processed_item_names = efficiently_remove_punctuation(processed_item_names)
+    processed_item_names = processed_item_names.progress_apply(
+        nutils.remove_useless_spaces
+    )
+    processed_item_names = processed_item_names.str.strip()
+    # processed_item_names = processed_item_names.progress_apply(add_space_between_th_en)
+    processed_item_names = add_space_between_th_en(processed_item_names)
+
+    diff_text_cond = processed_item_names != item_names
+    logger.info(f"There are {sum(diff_text_cond)} processed item names.")
+    import pdb
+
+    pdb.set_trace()
+    return processed_item_names
+
+
+def with_progress(func_name):
+    def decorator(func):
+        def wrapper(series, *args, **kwargs):
+            # Set the description for the progress bar
+            tqdm.pandas(desc=func_name)
+            result = series.progress_apply(func, *args, **kwargs)
+
+            # Clear the description after the operation
+            tqdm.pandas(desc=False)
+            return result
+
+        return wrapper
+
+    return decorator
+
+
+###
+
+
+@with_progress("remove_phone_number")
+def remove_phone_number(text: str) -> str:
+    """Removes phone numbers from the input text.
+
+    This function uses a regular expression to identify and remove phone numbers from the input text. The pattern matches common Thai phone number formats.
+
+    Args:
+        text (str): The input text from which phone numbers will be removed.
+
+    Returns:
+        str: The text with phone numbers removed.
+    """
+    phone_number_pattern = re.compile(r"\b(0[689]{1}[\d]{1}-?)+([\d]{3}-?)+([\d]{4})\b")
+    return phone_number_pattern.sub("", text)
+
+
+###
+
+
+@with_progress("remove_digit")
+def remove_digit(text: str) -> str:
+    """Remove digits from the input text.
+
+    Args:
+        text (str): The input text.
+
+    Returns:
+        str: The text with digits removed.
+    """
+    digit_pattern = re.compile(r"[๐-๙0-9]")
+    return digit_pattern.sub("", text)
+
+
+###
+
+
+@with_progress("efficiently_remove_punctuation")
+def efficiently_remove_punctuation(text: str) -> str:
+    """Remove punctuation from the input text.
+
+    This function uses a regular expression to identify and remove punctuation from the input text.
+
+    Args:
+        text (str): The input text from which punctuation will be removed.
+
+    Returns:
+        str: The text with punctuation removed.
+    """
+    return my_punctuation_pattern.sub(" ", text)
+
+
+# Compile the base punctuation regex pattern only once
+_base_punctuation_list = set(re.escape(p) for p in string.punctuation)
+
+
+def process_punctuation_pattern(
+    punctuation_list: list[str] = [],
+    exceptional_punc_list: list[str] = [],
+    overwrite: bool = False,
+) -> re.Pattern:
+    """Create a regex pattern for punctuation characters, considering exceptional cases.
+
+    Args:
+        punctuation_list (List[str]): List of punctuation characters to include.
+        exceptional_punc_list (List[str]): List of punctuation characters to exclude.
+        overwrite (bool): Whether to overwrite the base punctuation list.
+
+    Returns:
+        re.Pattern: Compiled regex pattern for punctuation characters.
+    """
+
+    # Combine base punctuations with custom ones, handling exceptions
+    if overwrite:
+        punctuations = set(re.escape(p) for p in punctuation_list)
+    else:
+        punctuations = _base_punctuation_list.union(
+            re.escape(p) for p in punctuation_list
+        )
+
+    # Remove any exceptional punctuation from the final set
+    punctuations -= set(re.escape(p) for p in exceptional_punc_list)
+
+    # Compile and return the regex pattern
+    return re.compile(f"[{''.join(punctuations)}]+")
+
+
+# Example custom punctuation list and pattern precompilation
+my_punctuation_list = [
+    "#",
+    "@",
+    "/",
+    ".",
+    ",",
+    '"',
+    ":",
+    ")",
+    "(",
+    "-",
+    "!",
+    "?",
+    "|",
+    ";",
+    "'",
+    "$",
+    "&",
+    "[",
+    "]",
+    ">",
+    "=",
+    "#",
+    "*",
+    "+",
+    "\\",
+    "•",
+    "~",
+    "@",
+    "£",
+    "·",
+    "_",
+    "{",
+    "}",
+    "©",
+    "^",
+    "®",
+    "`",
+    "<",
+    "→",
+    "°",
+    "€",
+    "™",
+    "›",
+    "♥",
+    "←",
+    "×",
+    "§",
+    "″",
+    "′",
+    "Â",
+    "█",
+    "½",
+    "à",
+    "…",
+    "\xa0",
+    "\t",
+    "“",
+    "★",
+    "”",
+    "–",
+    "●",
+    "â",
+    "►",
+    "%",
+    "−",
+    "¢",
+    "²",
+    "¬",
+    "░",
+    "¶",
+    "↑",
+    "±",
+    "¿",
+    "▾",
+    "═",
+    "¦",
+    "║",
+    "―",
+    "¥",
+    "▓",
+    "—",
+    "‹",
+    "─",
+    "▒",
+    "：",
+    "¼",
+    "⊕",
+    "▼",
+    "▪",
+    "†",
+    "■",
+    "’",
+    "▀",
+    "¨",
+    "▄",
+    "♫",
+    "☆",
+    "é",
+    "¯",
+    "♦",
+    "¤",
+    "▲",
+    "è",
+    "¸",
+    "¾",
+    "Ã",
+    "⋅",
+    "‘",
+    "∞",
+    "«",
+    "∙",
+    "）",
+    "↓",
+    "、",
+    "│",
+    "（",
+    "»",
+    "，",
+    "♪",
+    "╩",
+    "╚",
+    "³",
+    "・",
+    "╦",
+    "╣",
+    "╔",
+    "╗",
+    "▬",
+    "❤",
+    "ï",
+    "Ø",
+    "¹",
+    "≤",
+    "‡",
+    "√",
+    "•",
+    "!",
+]
+
+# Precompile the punctuation pattern once, avoiding repeated recomputation
+my_punctuation_pattern = process_punctuation_pattern(my_punctuation_list)
+
+
+###
+
+
+@with_progress("add_space_between_th_en")
+def add_space_between_th_en(text: str) -> str:
+    # Add space between Thai and English characters
+    spaced_text_pattern = re.compile(r"([ก-๙])([a-zA-Z])|([a-zA-Z])([ก-๙])")
+    return spaced_text_pattern.sub(r"\1 \2", text)
diff --git a/yellowduck/text/cleansing.py b/yellowduck/text/cleansing.py
index 4045814..a514feb 100644
--- a/yellowduck/text/cleansing.py
+++ b/yellowduck/text/cleansing.py
@@ -213,18 +213,18 @@ def remove_emoji(text) -> str:
     """
     emoj = re.compile(
         "["
-        "\U0001F600-\U0001F64F"  # emoticons
-        "\U0001F300-\U0001F5FF"  # symbols & pictographs
-        "\U0001F680-\U0001F6FF"  # transport & map symbols
-        "\U0001F1E0-\U0001F1FF"  # flags (iOS)
-        "\U00002500-\U00002BEF"  # chinese char
-        "\U00002702-\U000027B0"
-        "\U00002702-\U000027B0"
-        "\U000024C2-\U0001F251"
+        "\U0001f600-\U0001f64f"  # emoticons
+        "\U0001f300-\U0001f5ff"  # symbols & pictographs
+        "\U0001f680-\U0001f6ff"  # transport & map symbols
+        "\U0001f1e0-\U0001f1ff"  # flags (iOS)
+        "\U00002500-\U00002bef"  # chinese char
+        "\U00002702-\U000027b0"
+        "\U00002702-\U000027b0"
+        "\U000024c2-\U0001f251"
         "\U0001f926-\U0001f937"
         "\U00010000-\U0010ffff"
         "\u2640-\u2642"
-        "\u2600-\u2B55"
+        "\u2600-\u2b55"
         "\u200d"
         "\u23cf"
         "\u23e9"
diff --git a/yellowduck/utils.py b/yellowduck/utils.py
new file mode 100644
index 0000000..e3bce8c
--- /dev/null
+++ b/yellowduck/utils.py
@@ -0,0 +1,118 @@
+import collections
+import json
+from enum import Enum
+
+import numpy as np
+import pandas as pd
+from preda.deployment.airflow.utils import kube_pod_xcom_push
+from preda.logger import logger
+from sklearn.metrics import classification_report
+from snorkel_lab.config import config as package_config
+
+
+def logger_info_dataframe(dataframe: pd.DataFrame):
+    logger.info(f"\n{dataframe.to_markdown()}")
+
+
+def logger_info_classification_report(y_true, y_pred, target_names):
+    report_dict = classification_report(
+        y_true, y_pred, target_names=target_names, output_dict=True, digits=2
+    )
+    report_df = pd.DataFrame(report_dict).round(2)
+    logger.info(f"\n{report_df.transpose().to_markdown()}")
+    return report_dict
+
+
+def pass_information_to_the_next_task(informations: dict) -> None:
+    logger.info("----- pass_information_to_the_next_task -----")
+    xcom_return = json.dumps(informations)
+    kube_pod_xcom_push({"xcom_return": xcom_return})
+
+
+def get_xcom_from_the_previous_task(xcom_return: str) -> dict:
+    logger.info("----- get_xcom_from_the_previous_task -----")
+    xcom_return = json.loads(xcom_return)
+    logger.info(f"xcom_return: {xcom_return}")
+    return xcom_return
+
+
+###
+
+
+# TODO: Fix and add this into training and prediction pipeline
+class PredictionStats:
+    def __init__(self, prediction_array: np.ndarray):
+        self.prediction_array = prediction_array
+
+    @property
+    def value(self) -> np.ndarray:
+        return self.prediction_array
+
+    @property
+    def abstain_value(self) -> int:
+        return package_config.snorkel.abstain_value
+
+    @property
+    def total_labeled_datapoint(self) -> int:
+        return np.count_nonzero(
+            self.prediction_array != package_config.snorkel.abstain_value
+        )
+
+    @property
+    def total_unlabeled_datapoint(self) -> int:
+        return np.count_nonzero(
+            self.prediction_array == package_config.snorkel.abstain_value
+        )
+
+    @property
+    def total_datapoint(self) -> int:
+        return len(self.prediction_array)
+
+    @property
+    def total_coverage_percent(self) -> float:
+        return round(self.total_labeled_datapoint / self.total_datapoint, 4) * 100
+
+    @property
+    def individual_coverage(self) -> dict:
+        return collections.Counter(self.prediction_array)
+
+    @property
+    def unique_class(self) -> list:
+        return list(np.unique(self.prediction_array))
+
+    @property
+    def unique_class_without_abstain(self) -> list:
+        unique_class = list(np.unique(self.prediction_array))
+        if package_config.snorkel.abstain_value in unique_class:
+            unique_class.remove(package_config.snorkel.abstain_value)
+        return unique_class
+
+    def get_least_support_class(self, Label: Enum) -> dict:
+        class_counter = self.individual_coverage.copy()
+        del class_counter[package_config.snorkel.abstain_value]
+        key_least_support_class = min(class_counter, key=class_counter.get)
+        key_name_least_support_class = Label(key_least_support_class).name
+        value_least_support_class = min(class_counter.values())
+        return {key_name_least_support_class: value_least_support_class}
+
+    def get_class_balance(self) -> float:
+        """
+        Using Shannon Entropy to findout class balance
+        0 for an unbalanced data set
+        1 for a balanced data set
+        Ref: https://stats.stackexchange.com/questions/239973/a-general-measure-of-data-set-imbalance
+        """
+        counts = np.array(
+            [
+                count_label
+                for label, count_label in collections.Counter(
+                    self.prediction_array
+                ).items()
+                if label != package_config.snorkel.abstain_value
+            ]
+        )
+        probabilities = counts / self.total_labeled_datapoint
+        shannon_entropy = -(probabilities * np.log(probabilities)).sum()
+        return round(
+            shannon_entropy / np.log(len(self.unique_class_without_abstain)), 2
+        )