diff --git a/models/README.md b/models/README.md index d1423efa9..12a4c4af1 100644 --- a/models/README.md +++ b/models/README.md @@ -1 +1,11 @@ Directory contains models tested , the best performing model will be used finally. + +Directory structure + . + + ├── ... + ├── face_detection # Contains models which detect faces + ├── object_detection # Contains models for object detection + ├── merged # Merge of the above two, or some model which can simply do both. + └── ... + diff --git a/models/face_detection/README.md b/models/face_detection/README.md new file mode 100644 index 000000000..34ac2af46 --- /dev/null +++ b/models/face_detection/README.md @@ -0,0 +1,2 @@ +Directory contains the following models +- YOLO_huggingface_v1 # This directory has an object detection YOLOv8 model, imported from huggingface, source linked in notebook \ No newline at end of file diff --git a/models/face_detection/YOLO_huggingface_v1/YOLO_huggingface_v1.ipynb b/models/face_detection/YOLO_huggingface_v1/YOLO_huggingface_v1.ipynb new file mode 100644 index 000000000..cff626028 --- /dev/null +++ b/models/face_detection/YOLO_huggingface_v1/YOLO_huggingface_v1.ipynb @@ -0,0 +1,591 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "provenance": [] + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3" + }, + "language_info": { + "name": "python" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "0b9b1f701cd244c2ab1f93e0172ecd2f": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_fd7ab5c65255486d999a741bc65865c9", + "IPY_MODEL_4471adc0c6a445d2acfcbc5c597eeb1e", + "IPY_MODEL_cd8c367fcd244a11b2e0ec72f807728e" + ], + "layout": "IPY_MODEL_7ac699d1fa204c33913f501b8c052d7e" + } + }, + "fd7ab5c65255486d999a741bc65865c9": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_6a37c7408337431f8e2837c6f6857eba", + "placeholder": "​", + "style": "IPY_MODEL_f7960e72e84840d489a3490f46bde60d", + "value": "model.pt: 100%" + } + }, + "4471adc0c6a445d2acfcbc5c597eeb1e": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_f0dd9a258c5b4713933cd7e78406a13e", + "max": 6247065, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_b05c2e348ba34a699dd477ac6887b11e", + "value": 6247065 + } + }, + "cd8c367fcd244a11b2e0ec72f807728e": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_9d7b66f7c6034b78b6328f7ec3e48480", + "placeholder": "​", + "style": "IPY_MODEL_5f7f69170e60465885dbec7c72202fa3", + "value": " 6.25M/6.25M [00:00<00:00, 51.9MB/s]" + } + }, + "7ac699d1fa204c33913f501b8c052d7e": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "6a37c7408337431f8e2837c6f6857eba": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "f7960e72e84840d489a3490f46bde60d": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "f0dd9a258c5b4713933cd7e78406a13e": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "b05c2e348ba34a699dd477ac6887b11e": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "9d7b66f7c6034b78b6328f7ec3e48480": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "5f7f69170e60465885dbec7c72202fa3": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + } + } + } + }, + "cells": [ + { + "cell_type": "markdown", + "source": [ + "# Hugging face model\n", + "This notebook is testing the hugging face model (source: https://huggingface.co/arnabdhar/YOLOv8-Face-Detection). It is built on YOLOv8 architecture, which is what we had used to build our first object detection model as well (Indicating their speeds will be similliar, *note*: we test this in the merged notebook)\n", + "\n", + "**Note**: The reason we are having this notebook in the first place is for modularity, later on future contributors can see this notebook as a standalone notebook testing face detection model and decide to merge or combine this with another object detection model." + ], + "metadata": { + "id": "3R_0IRoJWqAn" + } + }, + { + "cell_type": "code", + "source": [ + "!pip install ultralytics\n", + "!pip install supervision" + ], + "metadata": { + "id": "Dw7ZzFnvmQDE" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": { + "id": "ygRL3jfjhwdR" + }, + "outputs": [], + "source": [ + "# load libraries\n", + "from huggingface_hub import hf_hub_download\n", + "from ultralytics import YOLO\n", + "from supervision import Detections\n", + "from PIL import Image\n", + "import cv2\n", + "from google.colab.patches import cv2_imshow\n", + "import os" + ] + }, + { + "cell_type": "code", + "source": [ + "# download model\n", + "model_path = hf_hub_download(repo_id=\"arnabdhar/YOLOv8-Face-Detection\", filename=\"model.pt\")\n", + "\n" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 173, + "referenced_widgets": [ + "0b9b1f701cd244c2ab1f93e0172ecd2f", + "fd7ab5c65255486d999a741bc65865c9", + "4471adc0c6a445d2acfcbc5c597eeb1e", + "cd8c367fcd244a11b2e0ec72f807728e", + "7ac699d1fa204c33913f501b8c052d7e", + "6a37c7408337431f8e2837c6f6857eba", + "f7960e72e84840d489a3490f46bde60d", + "f0dd9a258c5b4713933cd7e78406a13e", + "b05c2e348ba34a699dd477ac6887b11e", + "9d7b66f7c6034b78b6328f7ec3e48480", + "5f7f69170e60465885dbec7c72202fa3" + ] + }, + "id": "kOYf-yV3mGAk", + "outputId": "20fb74dc-ed25-4bb8-e495-0472ead821d6" + }, + "execution_count": 3, + "outputs": [ + { + "output_type": "stream", + "name": "stderr", + "text": [ + "/usr/local/lib/python3.10/dist-packages/huggingface_hub/utils/_token.py:88: UserWarning: \n", + "The secret `HF_TOKEN` does not exist in your Colab secrets.\n", + "To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.\n", + "You will be able to reuse this secret in all of your notebooks.\n", + "Please note that authentication is recommended but still optional to access public models or datasets.\n", + " warnings.warn(\n" + ] + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + "model.pt: 0%| | 0.00/6.25M [00:00\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 6\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 7\u001b[0m \u001b[0mcurr_result\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mface_model\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mpath\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 8\u001b[0;31m \u001b[0mcurr_result\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msave\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0msave_path\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 9\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 10\u001b[0m \u001b[0mimage_results_2\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mappend\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcurr_result\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/usr/local/lib/python3.10/dist-packages/ultralytics/engine/results.py\u001b[0m in \u001b[0;36m__getitem__\u001b[0;34m(self, idx)\u001b[0m\n\u001b[1;32m 124\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m__getitem__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0midx\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 125\u001b[0m \u001b[0;34m\"\"\"Return a Results object for the specified index.\"\"\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 126\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_apply\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"__getitem__\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0midx\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 127\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 128\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m__len__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/usr/local/lib/python3.10/dist-packages/ultralytics/engine/results.py\u001b[0m in \u001b[0;36m_apply\u001b[0;34m(self, fn, *args, **kwargs)\u001b[0m\n\u001b[1;32m 161\u001b[0m \u001b[0mv\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mgetattr\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mk\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 162\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mv\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 163\u001b[0;31m \u001b[0msetattr\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mr\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mk\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mgetattr\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mv\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfn\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 164\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mr\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 165\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/usr/local/lib/python3.10/dist-packages/ultralytics/engine/results.py\u001b[0m in \u001b[0;36m__getitem__\u001b[0;34m(self, idx)\u001b[0m\n\u001b[1;32m 61\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m__getitem__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0midx\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 62\u001b[0m \u001b[0;34m\"\"\"Return a BaseTensor with the specified index of the data tensor.\"\"\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 63\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m__class__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdata\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0midx\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0morig_shape\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 64\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 65\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;31mIndexError\u001b[0m: index 0 is out of bounds for dimension 0 with size 0" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "face_paths" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "O95uaut8M4yb", + "outputId": "cc719293-1d55-4893-b4e1-3c526c28c16a" + }, + "execution_count": 24, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "['001_result_face_result.png',\n", + " '002_result_face_result.jpg',\n", + " '003_result_face_result.jpeg']" + ] + }, + "metadata": {}, + "execution_count": 24 + } + ] + }, + { + "cell_type": "code", + "source": [ + "image_data = []\n", + "\n", + "for result in image_results:\n", + " speed = result.speed\n", + " orig_shape = result.orig_shape\n", + " image_data.append((orig_shape, speed['preprocess'], speed['inference'], speed['postprocess']))\n", + "\n", + "print(tabulate(image_data, headers=[ \"Original Shape\",\"Preprocess\", \"Inference\", \"Postprocess\",]))\n" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "oS7aXenYM6M3", + "outputId": "a10ddaf9-a0ee-4b7f-f184-b276130c7852" + }, + "execution_count": 26, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Original Shape Preprocess Inference Postprocess\n", + "---------------- ------------ ----------- -------------\n", + "(168, 300) 2.71463 172.394 1.44792\n", + "(480, 768) 3.41344 162.386 1.98889\n", + "(225, 225) 3.90077 251.972 1.68943\n", + "(514, 875) 2.9695 162.759 1.84631\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "image_data_2 = []\n", + "\n", + "for result in image_results_2:\n", + " speed = result.speed\n", + " orig_shape = result.orig_shape\n", + " image_data_2.append((orig_shape, speed['preprocess'], speed['inference'], speed['postprocess']))\n", + "\n", + "print(tabulate(image_data_2, headers=[ \"Original Shape\",\"Preprocess\", \"Inference\", \"Postprocess\",]))\n" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "ElJkyPKFN4RD", + "outputId": "5f74d512-8275-48b6-da74-f89e9e6e26c9" + }, + "execution_count": 27, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Original Shape Preprocess Inference Postprocess\n", + "---------------- ------------ ----------- -------------\n", + "(168, 300) 3.63016 199.303 2.563\n", + "(480, 768) 3.582 160.225 1.28984\n", + "(225, 225) 4.67849 254.233 1.26791\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "# Results\n", + "\n", + "- The first model was properly able to detect all 24 persons in the second image, funfact the image is actually 24 AI generated faces ! It categorized all of them as Persons.\n", + "- The second model was able to detect three faces, including the person who is barely visible behind one of the people present in the scene.\n", + "- The second model was able to detect 24 faces in the second image as well\n", + "- The second model threw an error in the lats image, which had no faces, indicating it detects the presence of no face, which is exactly what we need.\n", + "\n", + "- The time taken for each image is roughly the same for both the models" + ], + "metadata": { + "id": "hDevjwprOEkV" + } + }, + { + "cell_type": "markdown", + "source": [ + "# Future Work\n", + "We now have models which can\n", + "- Detect faces\n", + "- Detect objects in the scene\n", + "\n", + "Keep in mind these models, especially the object detection, is trained on coco8 dataset only, future scope is to train it on a much larger dataset, discussed in the first YOLOv8_coco8.ipynb notebook. coco8 dataset only consists 4 images.\n", + "\n", + "**Note**: another method which passes the input picture first through the object detection model followed by face detection model was tested, but those results were not saved as the bounding boxes get cluttered.\n", + "\n", + "1) Now we aim to see how we can use the attributes of the detected objects to enhance this passage of photos to the second model. This can be done by , first detecting the presence of a `person` , if person is present we should be passing the photo to detecting the presence of a `face` on the second model.\n", + "\n", + "2) Training the first model (object detection) on a larger dataset is still pending and is a task left due time.\n", + "\n", + "3) Unfortunately the hugging face model we are using, the code generating it is not open source, I could not find it. Fortunately the datasets it was trained on is given, we could reverse engineer and build a model which combines both of them in a transfer learning fashion." + ], + "metadata": { + "id": "dvxjJLX-QFwQ" + } + }, + { + "cell_type": "code", + "source": [], + "metadata": { + "id": "mkUsQ1x_N_kS" + }, + "execution_count": null, + "outputs": [] + } + ] +} \ No newline at end of file diff --git a/models/merged/YOLO_merged_v1/images/images/001_face_result.png b/models/merged/YOLO_merged_v1/images/images/001_face_result.png new file mode 100644 index 000000000..e8266657b Binary files /dev/null and b/models/merged/YOLO_merged_v1/images/images/001_face_result.png differ diff --git a/models/merged/YOLO_merged_v1/images/images/001_result.png b/models/merged/YOLO_merged_v1/images/images/001_result.png new file mode 100644 index 000000000..9dea97ffb Binary files /dev/null and b/models/merged/YOLO_merged_v1/images/images/001_result.png differ diff --git a/models/merged/YOLO_merged_v1/images/images/002_face_result.jpg b/models/merged/YOLO_merged_v1/images/images/002_face_result.jpg new file mode 100644 index 000000000..63035c076 Binary files /dev/null and b/models/merged/YOLO_merged_v1/images/images/002_face_result.jpg differ diff --git a/models/merged/YOLO_merged_v1/images/images/002_result.jpg b/models/merged/YOLO_merged_v1/images/images/002_result.jpg new file mode 100644 index 000000000..55862a7cc Binary files /dev/null and b/models/merged/YOLO_merged_v1/images/images/002_result.jpg differ diff --git a/models/merged/YOLO_merged_v1/images/images/002_result_face_result.jpg b/models/merged/YOLO_merged_v1/images/images/002_result_face_result.jpg new file mode 100644 index 000000000..98bfd1354 Binary files /dev/null and b/models/merged/YOLO_merged_v1/images/images/002_result_face_result.jpg differ diff --git a/models/merged/YOLO_merged_v1/images/images/003_result.jpeg b/models/merged/YOLO_merged_v1/images/images/003_result.jpeg new file mode 100644 index 000000000..059da20e7 Binary files /dev/null and b/models/merged/YOLO_merged_v1/images/images/003_result.jpeg differ diff --git a/models/merged/YOLO_merged_v1/images/images/003_result_face_result.jpeg b/models/merged/YOLO_merged_v1/images/images/003_result_face_result.jpeg new file mode 100644 index 000000000..497883919 Binary files /dev/null and b/models/merged/YOLO_merged_v1/images/images/003_result_face_result.jpeg differ diff --git a/models/merged/YOLO_merged_v1/images/images/004_result.png b/models/merged/YOLO_merged_v1/images/images/004_result.png new file mode 100644 index 000000000..b1e4f248e Binary files /dev/null and b/models/merged/YOLO_merged_v1/images/images/004_result.png differ diff --git a/models/merged/YOLO_merged_v1/images/original/001.png b/models/merged/YOLO_merged_v1/images/original/001.png new file mode 100644 index 000000000..fcb3f7aef Binary files /dev/null and b/models/merged/YOLO_merged_v1/images/original/001.png differ diff --git a/models/merged/YOLO_merged_v1/images/original/002.jpg b/models/merged/YOLO_merged_v1/images/original/002.jpg new file mode 100644 index 000000000..01e0d865b Binary files /dev/null and b/models/merged/YOLO_merged_v1/images/original/002.jpg differ diff --git a/models/merged/YOLO_merged_v1/images/original/003.jpeg b/models/merged/YOLO_merged_v1/images/original/003.jpeg new file mode 100644 index 000000000..5b1023d94 Binary files /dev/null and b/models/merged/YOLO_merged_v1/images/original/003.jpeg differ diff --git a/models/merged/YOLO_merged_v1/images/original/004.png b/models/merged/YOLO_merged_v1/images/original/004.png new file mode 100644 index 000000000..fc78917d5 Binary files /dev/null and b/models/merged/YOLO_merged_v1/images/original/004.png differ diff --git a/models/object_detection/README.md b/models/object_detection/README.md new file mode 100644 index 000000000..16dabee6e --- /dev/null +++ b/models/object_detection/README.md @@ -0,0 +1,2 @@ +Directory contains the following models +- YOLOv8_coco8 # This directory has an object detection YOLOv8 model, imported from ultralytics \ No newline at end of file diff --git a/models/YOLOv8_coco8.ipynb b/models/object_detection/YOLOv8_coco8/YOLOv8_coco8.ipynb similarity index 100% rename from models/YOLOv8_coco8.ipynb rename to models/object_detection/YOLOv8_coco8/YOLOv8_coco8.ipynb diff --git a/models/images/000000000009.jpg b/models/object_detection/YOLOv8_coco8/images/000000000009.jpg similarity index 100% rename from models/images/000000000009.jpg rename to models/object_detection/YOLOv8_coco8/images/000000000009.jpg diff --git a/models/images/000000000025.jpg b/models/object_detection/YOLOv8_coco8/images/000000000025.jpg similarity index 100% rename from models/images/000000000025.jpg rename to models/object_detection/YOLOv8_coco8/images/000000000025.jpg diff --git a/models/images/000000000030.jpg b/models/object_detection/YOLOv8_coco8/images/000000000030.jpg similarity index 100% rename from models/images/000000000030.jpg rename to models/object_detection/YOLOv8_coco8/images/000000000030.jpg diff --git a/models/images/000000000034.jpg b/models/object_detection/YOLOv8_coco8/images/000000000034.jpg similarity index 100% rename from models/images/000000000034.jpg rename to models/object_detection/YOLOv8_coco8/images/000000000034.jpg diff --git a/models/images/cat.png b/models/object_detection/YOLOv8_coco8/images/cat.png similarity index 100% rename from models/images/cat.png rename to models/object_detection/YOLOv8_coco8/images/cat.png diff --git a/models/results/000000000009_result.jpg b/models/object_detection/YOLOv8_coco8/results/000000000009_result.jpg similarity index 100% rename from models/results/000000000009_result.jpg rename to models/object_detection/YOLOv8_coco8/results/000000000009_result.jpg diff --git a/models/results/000000000025_result.jpg b/models/object_detection/YOLOv8_coco8/results/000000000025_result.jpg similarity index 100% rename from models/results/000000000025_result.jpg rename to models/object_detection/YOLOv8_coco8/results/000000000025_result.jpg diff --git a/models/results/000000000030_result.jpg b/models/object_detection/YOLOv8_coco8/results/000000000030_result.jpg similarity index 100% rename from models/results/000000000030_result.jpg rename to models/object_detection/YOLOv8_coco8/results/000000000030_result.jpg diff --git a/models/results/000000000034_result.jpg b/models/object_detection/YOLOv8_coco8/results/000000000034_result.jpg similarity index 100% rename from models/results/000000000034_result.jpg rename to models/object_detection/YOLOv8_coco8/results/000000000034_result.jpg