diff --git a/BOWpractice.ipynb b/BOWpractice.ipynb
new file mode 100644
index 0000000..4c5f9d3
--- /dev/null
+++ b/BOWpractice.ipynb
@@ -0,0 +1,1808 @@
+{
+  "nbformat": 4,
+  "nbformat_minor": 0,
+  "metadata": {
+    "colab": {
+      "provenance": [],
+      "authorship_tag": "ABX9TyOqk0a1WDXiVZJhkncj4kXi",
+      "include_colab_link": true
+    },
+    "kernelspec": {
+      "name": "python3",
+      "display_name": "Python 3"
+    },
+    "language_info": {
+      "name": "python"
+    }
+  },
+  "cells": [
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "view-in-github",
+        "colab_type": "text"
+      },
+      "source": [
+        "<a href=\"https://colab.research.google.com/github/Lokendra-parmar/python-programming-questions/blob/main/BOWpractice.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 1,
+      "metadata": {
+        "id": "669Bj7r8lcn1"
+      },
+      "outputs": [],
+      "source": [
+        "import numpy as np\n",
+        "import pandas as pd\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "df = pd.read_csv(\"/content/email_text.csv\")"
+      ],
+      "metadata": {
+        "id": "eb6ElgqmRjSV"
+      },
+      "execution_count": 2,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "df.head(10)"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 363
+        },
+        "id": "oJkvZkyCSQ_D",
+        "outputId": "aa439ac5-03e3-4552-c486-59fb2a55257c"
+      },
+      "execution_count": 3,
+      "outputs": [
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/plain": [
+              "   label                                               text\n",
+              "0      0  user id enrondlr pw bnawebescapenumber origina...\n",
+              "1      0  hi chris tonight we are rolling out a new repo...\n",
+              "2      0  rika r these new original message from thomas ...\n",
+              "3      0  john gerald we are currently trading under gtc...\n",
+              "4      0  gerald and stacy attached is a worksheet for a...\n",
+              "5      0  fyi below is a copy of my communication with m...\n",
+              "6      0  pg e gt nw plans lateral across washington sta...\n",
+              "7      0  mark i am working with the east power desk to ...\n",
+              "8      0  oops here it is kal original message from shah...\n",
+              "9      0  mark and charlie fmpa is ready to bill us for ..."
+            ],
+            "text/html": [
+              "\n",
+              "  <div id=\"df-90c3ad98-74de-4947-a682-ef914f58667a\" class=\"colab-df-container\">\n",
+              "    <div>\n",
+              "<style scoped>\n",
+              "    .dataframe tbody tr th:only-of-type {\n",
+              "        vertical-align: middle;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe tbody tr th {\n",
+              "        vertical-align: top;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe thead th {\n",
+              "        text-align: right;\n",
+              "    }\n",
+              "</style>\n",
+              "<table border=\"1\" class=\"dataframe\">\n",
+              "  <thead>\n",
+              "    <tr style=\"text-align: right;\">\n",
+              "      <th></th>\n",
+              "      <th>label</th>\n",
+              "      <th>text</th>\n",
+              "    </tr>\n",
+              "  </thead>\n",
+              "  <tbody>\n",
+              "    <tr>\n",
+              "      <th>0</th>\n",
+              "      <td>0</td>\n",
+              "      <td>user id enrondlr pw bnawebescapenumber origina...</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>1</th>\n",
+              "      <td>0</td>\n",
+              "      <td>hi chris tonight we are rolling out a new repo...</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>2</th>\n",
+              "      <td>0</td>\n",
+              "      <td>rika r these new original message from thomas ...</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>3</th>\n",
+              "      <td>0</td>\n",
+              "      <td>john gerald we are currently trading under gtc...</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>4</th>\n",
+              "      <td>0</td>\n",
+              "      <td>gerald and stacy attached is a worksheet for a...</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>5</th>\n",
+              "      <td>0</td>\n",
+              "      <td>fyi below is a copy of my communication with m...</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>6</th>\n",
+              "      <td>0</td>\n",
+              "      <td>pg e gt nw plans lateral across washington sta...</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>7</th>\n",
+              "      <td>0</td>\n",
+              "      <td>mark i am working with the east power desk to ...</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>8</th>\n",
+              "      <td>0</td>\n",
+              "      <td>oops here it is kal original message from shah...</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>9</th>\n",
+              "      <td>0</td>\n",
+              "      <td>mark and charlie fmpa is ready to bill us for ...</td>\n",
+              "    </tr>\n",
+              "  </tbody>\n",
+              "</table>\n",
+              "</div>\n",
+              "    <div class=\"colab-df-buttons\">\n",
+              "\n",
+              "  <div class=\"colab-df-container\">\n",
+              "    <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-90c3ad98-74de-4947-a682-ef914f58667a')\"\n",
+              "            title=\"Convert this dataframe to an interactive table.\"\n",
+              "            style=\"display:none;\">\n",
+              "\n",
+              "  <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\" viewBox=\"0 -960 960 960\">\n",
+              "    <path d=\"M120-120v-720h720v720H120Zm60-500h600v-160H180v160Zm220 220h160v-160H400v160Zm0 220h160v-160H400v160ZM180-400h160v-160H180v160Zm440 0h160v-160H620v160ZM180-180h160v-160H180v160Zm440 0h160v-160H620v160Z\"/>\n",
+              "  </svg>\n",
+              "    </button>\n",
+              "\n",
+              "  <style>\n",
+              "    .colab-df-container {\n",
+              "      display:flex;\n",
+              "      gap: 12px;\n",
+              "    }\n",
+              "\n",
+              "    .colab-df-convert {\n",
+              "      background-color: #E8F0FE;\n",
+              "      border: none;\n",
+              "      border-radius: 50%;\n",
+              "      cursor: pointer;\n",
+              "      display: none;\n",
+              "      fill: #1967D2;\n",
+              "      height: 32px;\n",
+              "      padding: 0 0 0 0;\n",
+              "      width: 32px;\n",
+              "    }\n",
+              "\n",
+              "    .colab-df-convert:hover {\n",
+              "      background-color: #E2EBFA;\n",
+              "      box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
+              "      fill: #174EA6;\n",
+              "    }\n",
+              "\n",
+              "    .colab-df-buttons div {\n",
+              "      margin-bottom: 4px;\n",
+              "    }\n",
+              "\n",
+              "    [theme=dark] .colab-df-convert {\n",
+              "      background-color: #3B4455;\n",
+              "      fill: #D2E3FC;\n",
+              "    }\n",
+              "\n",
+              "    [theme=dark] .colab-df-convert:hover {\n",
+              "      background-color: #434B5C;\n",
+              "      box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
+              "      filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
+              "      fill: #FFFFFF;\n",
+              "    }\n",
+              "  </style>\n",
+              "\n",
+              "    <script>\n",
+              "      const buttonEl =\n",
+              "        document.querySelector('#df-90c3ad98-74de-4947-a682-ef914f58667a button.colab-df-convert');\n",
+              "      buttonEl.style.display =\n",
+              "        google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
+              "\n",
+              "      async function convertToInteractive(key) {\n",
+              "        const element = document.querySelector('#df-90c3ad98-74de-4947-a682-ef914f58667a');\n",
+              "        const dataTable =\n",
+              "          await google.colab.kernel.invokeFunction('convertToInteractive',\n",
+              "                                                    [key], {});\n",
+              "        if (!dataTable) return;\n",
+              "\n",
+              "        const docLinkHtml = 'Like what you see? Visit the ' +\n",
+              "          '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
+              "          + ' to learn more about interactive tables.';\n",
+              "        element.innerHTML = '';\n",
+              "        dataTable['output_type'] = 'display_data';\n",
+              "        await google.colab.output.renderOutput(dataTable, element);\n",
+              "        const docLink = document.createElement('div');\n",
+              "        docLink.innerHTML = docLinkHtml;\n",
+              "        element.appendChild(docLink);\n",
+              "      }\n",
+              "    </script>\n",
+              "  </div>\n",
+              "\n",
+              "\n",
+              "    </div>\n",
+              "  </div>\n"
+            ],
+            "application/vnd.google.colaboratory.intrinsic+json": {
+              "type": "dataframe",
+              "variable_name": "df",
+              "summary": "{\n  \"name\": \"df\",\n  \"rows\": 17415,\n  \"fields\": [\n    {\n      \"column\": \"label\",\n      \"properties\": {\n        \"dtype\": \"number\",\n        \"std\": 0,\n        \"min\": 0,\n        \"max\": 1,\n        \"num_unique_values\": 2,\n        \"samples\": [\n          1,\n          0\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"text\",\n      \"properties\": {\n        \"dtype\": \"string\",\n        \"num_unique_values\": 17415,\n        \"samples\": [\n          \"greetings i would like to take this opportunity to introduce our service please examine the information below and let me know if you have any inquiry we are accepting your m ortgage requirement there is no problem if you have bad cr edit you can get a escapenumber escapenumber loan for a escapenumber monthly payment approval procedure will only take escapenumber minute just visit the link below and fill out the quick and easy form http www masfre info aqwcwdpep thank you for your time best regards forrest rich general manager\",\n          \"http www synetix com ammonia original appointment from taylor michael e sent thursday august escapenumber escapenumber escapenumber escapenumber pm to massey ii john woods trevor salhotra rajneesh subject nox model when friday august escapenumber escapenumber escapenumber escapenumber pm escapenumber escapenumber pm gmt escapenumber escapenumber central time us canada where escapenumber feedback suggestions on nox model\"\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    }\n  ]\n}"
+            }
+          },
+          "metadata": {},
+          "execution_count": 3
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 178
+        },
+        "id": "16a94e53",
+        "outputId": "06fcc323-589b-4abb-93ee-a1f7d87241b2"
+      },
+      "source": [
+        "label_distribution = df['label'].value_counts()\n",
+        "display(label_distribution)"
+      ],
+      "execution_count": 4,
+      "outputs": [
+        {
+          "output_type": "display_data",
+          "data": {
+            "text/plain": [
+              "label\n",
+              "0    9840\n",
+              "1    7575\n",
+              "Name: count, dtype: int64"
+            ],
+            "text/html": [
+              "<div>\n",
+              "<style scoped>\n",
+              "    .dataframe tbody tr th:only-of-type {\n",
+              "        vertical-align: middle;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe tbody tr th {\n",
+              "        vertical-align: top;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe thead th {\n",
+              "        text-align: right;\n",
+              "    }\n",
+              "</style>\n",
+              "<table border=\"1\" class=\"dataframe\">\n",
+              "  <thead>\n",
+              "    <tr style=\"text-align: right;\">\n",
+              "      <th></th>\n",
+              "      <th>count</th>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>label</th>\n",
+              "      <th></th>\n",
+              "    </tr>\n",
+              "  </thead>\n",
+              "  <tbody>\n",
+              "    <tr>\n",
+              "      <th>0</th>\n",
+              "      <td>9840</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>1</th>\n",
+              "      <td>7575</td>\n",
+              "    </tr>\n",
+              "  </tbody>\n",
+              "</table>\n",
+              "</div><br><label><b>dtype:</b> int64</label>"
+            ]
+          },
+          "metadata": {}
+        }
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "cda0fd8f"
+      },
+      "source": [
+        "The above output shows the count of each unique value in the 'label' column. We can also visualize this distribution using a bar plot to better understand the proportion of each label."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 444
+        },
+        "id": "5cdd3939",
+        "outputId": "328e4335-843c-4731-edbf-d7ef61b974e9"
+      },
+      "source": [
+        "import matplotlib.pyplot as plt\n",
+        "import seaborn as sns\n",
+        "\n",
+        "plt.figure(figsize=(8, 6))\n",
+        "sns.barplot(x=label_distribution.index, y=label_distribution.values)\n",
+        "plt.title('Distribution of the Label Column')\n",
+        "plt.xlabel('Label')\n",
+        "plt.ylabel('Count')\n",
+        "plt.xticks(rotation=0)\n",
+        "plt.show()"
+      ],
+      "execution_count": 5,
+      "outputs": [
+        {
+          "output_type": "display_data",
+          "data": {
+            "text/plain": [
+              "<Figure size 800x600 with 1 Axes>"
+            ],
+            "image/png": "iVBORw0KGgoAAAANSUhEUgAAAskAAAIjCAYAAADx6oYJAAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjAsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvlHJYcgAAAAlwSFlzAAAPYQAAD2EBqD+naQAAOkdJREFUeJzt3XtUVXX+//EXyFUU8JIgRoiXTM201JC8JyOlVkw6RTl5GS/VoKWWFpWmllGaeJ/MmSnvk+YvzdEyEbxMSmo0XkdNGy3LASyFI5YgsH9/9GUvzwe8gMhBez7WOmt1Pp/32fu9YXt8td3nc9wsy7IEAAAAwObu6gYAAACAyoaQDAAAABgIyQAAAICBkAwAAAAYCMkAAACAgZAMAAAAGAjJAAAAgIGQDAAAABgIyQAAAICBkAygzMaPHy83N7cK2VeXLl3UpUsX+/mmTZvk5uamFStWVMj+BwwYoPr161fIvsoqJydHgwcPVnBwsNzc3DRixIhSb6Pod/rjjz+Wf4PXyLFjx+Tm5qa333673LZZdH5t2rSp3LZZGub5DqDiEZIBSJLmz58vNzc3++Hj46OQkBBFR0dr5syZOnPmTLns58SJExo/frx27dpVLtsrT5W5tyvxxhtvaP78+Xr66ae1aNEiPfHEE5esXbVqVcU1Zyg637788kuX9VCevvnmGz355JNq0KCBfHx85O/vr/bt22vGjBn65ZdfXN0egDLwcHUDACqXiRMnKjw8XOfPn1d6ero2bdqkESNGKDExUatXr9Ydd9xh177yyit68cUXS7X9EydOaMKECapfv75atWp1xa9bv359qfZTFpfq7a9//asKCwuveQ9XIyUlRe3atdOrr7562do33nhDffr0UUxMzLVv7Aa3du1a/eEPf5C3t7f69eun22+/XXl5efr88881evRo7d+/X/PmzXN1mwBKiZAMwMn999+vNm3a2M/j4+OVkpKiXr166cEHH9SBAwfk6+srSfLw8JCHx7V9G/n5559VtWpVeXl5XdP9XI6np6dL938lMjMz1axZM1e38Zty9OhRxcbGKiwsTCkpKapbt649FxcXpyNHjmjt2rUu7BBAWXG7BYDLuvfeezV27Fh9++23Wrx4sT1e0j3JSUlJ6tChgwIDA1WtWjU1adJEL730kqRf7/Ns27atJGngwIH2rR3z58+X9Ot9mLfffrvS0tLUqVMnVa1a1X7txe7RLCgo0EsvvaTg4GD5+fnpwQcf1PHjx51q6tevrwEDBhR77YXbvFxvJd2TfPbsWT333HMKDQ2Vt7e3mjRporfffluWZTnVubm5adiwYVq1apVuv/12eXt7q3nz5lq3bl3JP3BDZmamBg0apKCgIPn4+Khly5ZasGCBPV90/+zRo0e1du1au/djx46VuD03NzedPXtWCxYssGvNn09WVpYGDBigwMBABQQEaODAgfr555+LbWvx4sVq3bq1fH19VbNmTcXGxhb7+ZdVXl6exo0bp9atWysgIEB+fn7q2LGjNm7ceNHXTJs2TWFhYfL19VXnzp21b9++YjUHDx5Unz59VLNmTfn4+KhNmzZavXp1mXqcPHmycnJy9Pe//90pIBdp1KiRnn32Wft5fn6+XnvtNTVs2FDe3t6qX7++XnrpJeXm5l5yP0W3p5i/05LunS76c7Rnzx517txZVatWVaNGjez79zdv3qyIiAj5+vqqSZMm2rBhg9M2i/5cHzly5IrOAeBGRUgGcEWK7m+91G0P+/fvV69evZSbm6uJEydq6tSpevDBB7V161ZJUtOmTTVx4kRJ0tChQ7Vo0SItWrRInTp1srfx008/6f7771erVq00ffp0de3a9ZJ9TZo0SWvXrtULL7ygZ555RklJSYqKiir1faBX0tuFLMvSgw8+qGnTpum+++5TYmKimjRpotGjR2vUqFHF6j///HP9+c9/VmxsrCZPnqxz586pd+/e+umnny7Z1y+//KIuXbpo0aJF6tu3r6ZMmaKAgAANGDBAM2bMsHtftGiRateurVatWtm933TTTSVuc9GiRfL29lbHjh3t2ieffNKp5pFHHtGZM2eUkJCgRx55RPPnz9eECROcaiZNmqR+/fqpcePGSkxM1IgRI5ScnKxOnTopKyvrksd1JRwOh/72t7+pS5cueuuttzR+/HidPHlS0dHRJd43vnDhQs2cOVNxcXGKj4/Xvn37dO+99yojI8Ou2b9/v9q1a6cDBw7oxRdf1NSpU+Xn56eYmBitXLmy1D3+85//VIMGDXTPPfdcUf3gwYM1btw43XXXXZo2bZo6d+6shIQExcbGlnrfl3L69Gn16tVLERERmjx5sry9vRUbG6tly5YpNjZWPXr00JtvvqmzZ8+qT58+JX7m4ErOAeCGZgGAZVnvv/++JcnauXPnRWsCAgKsO++8037+6quvWhe+jUybNs2SZJ08efKi29i5c6clyXr//feLzXXu3NmSZM2dO7fEuc6dO9vPN27caEmy6tWrZzkcDnt8+fLlliRrxowZ9lhYWJjVv3//y27zUr3179/fCgsLs5+vWrXKkmS9/vrrTnV9+vSx3NzcrCNHjthjkiwvLy+nsd27d1uSrFmzZhXb14WmT59uSbIWL15sj+Xl5VmRkZFWtWrVnI49LCzM6tmz5yW3V8TPz6/En0nR7/RPf/qT0/jvf/97q1atWvbzY8eOWVWqVLEmTZrkVLd3717Lw8Oj2LjpSs63/Px8Kzc312ns9OnTVlBQkFN/R48etSRZvr6+1vfff2+Pb9++3ZJkjRw50h7r1q2b1aJFC+vcuXP2WGFhoXXPPfdYjRs3tseKzq+NGzdetL/s7GxLkvXQQw9d8liL7Nq1y5JkDR482Gn8+eeftyRZKSkp9ph5bhb9vI4ePer02pL6LPpztHTpUnvs4MGDliTL3d3d+uKLL+zxzz77rNg5f6XnAHCj40oygCtWrVq1S65yERgYKEn6+OOPy/whN29vbw0cOPCK6/v166fq1avbz/v06aO6devqk08+KdP+r9Qnn3yiKlWq6JlnnnEaf+6552RZlj799FOn8aioKDVs2NB+fscdd8jf31///e9/L7uf4OBgPfbYY/aYp6ennnnmGeXk5Gjz5s3lcDTFPfXUU07PO3bsqJ9++kkOh0OS9NFHH6mwsFCPPPKIfvzxR/sRHBysxo0bX/KWiCtVpUoV+170wsJCnTp1Svn5+WrTpo2++uqrYvUxMTGqV6+e/fzuu+9WRESEfS6cOnVKKSkp9hXSop5/+uknRUdH6/Dhw/rhhx+uuL+in8WF59+lFPVh/kvDc889J0nleu9ytWrVnK5ON2nSRIGBgWratKkiIiLs8aL/Luk8vNw5ANzoCMkArlhOTs4lA8Gjjz6q9u3ba/DgwQoKClJsbKyWL19eqsBcr169Un1Ir3Hjxk7P3dzc1KhRo4vej1tevv32W4WEhBT7eTRt2tSev9Att9xSbBs1atTQ6dOnL7ufxo0by93d+e36YvspL2a/NWrUkCS738OHD8uyLDVu3Fg33XST0+PAgQPKzMwslz4WLFigO+64Qz4+PqpVq5ZuuukmrV27VtnZ2cVqzXNBkm699Vb7XDhy5Igsy9LYsWOL9Vy0Ikhp+vb395ekK14e8dtvv5W7u7saNWrkNB4cHKzAwMBy/V3efPPNxT4vEBAQoNDQ0GJjkko8Dy93DgA3Ola3AHBFvv/+e2VnZxf7C/5Cvr6+2rJlizZu3Ki1a9dq3bp1WrZsme69916tX79eVapUuex+ilbOKE8X+8KTgoKCK+qpPFxsP5bxIb/K4nL9FhYWys3NTZ9++mmJtdWqVbvqHhYvXqwBAwYoJiZGo0ePVp06dVSlShUlJCTom2++KfX2iv5n7fnnn1d0dHSJNZc6v03+/v4KCQkp8cOBl1KWL+C51Dlckov9/kpzHl5v5yxQ3gjJAK7IokWLJOmi4aKIu7u7unXrpm7duikxMVFvvPGGXn75ZW3cuFFRUVHl/g19hw8fdnpuWZaOHDnitJ5zjRo1Svwg2bfffqsGDRrYz0vTW1hYmDZs2KAzZ844XU0+ePCgPV8ewsLCtGfPHhUWFjpdTb7a/Vzt76Fhw4ayLEvh4eG69dZbr2pbF7NixQo1aNBAH330kVO/F1sH2jwXJOnrr7+2VyUp+l17enoqKiqqXHrs1auX5s2bp9TUVEVGRl6yNiwsTIWFhTp8+LD9LwGSlJGRoaysrEv+Louu4prn8bX6lwQA3G4B4AqkpKTotddeU3h4uPr27XvRulOnThUbK/pSjqIlrvz8/CQV/8u+rBYuXOj0z90rVqzQ//73P91///32WMOGDfXFF18oLy/PHluzZk2xpcpK01uPHj1UUFCg2bNnO41PmzZNbm5uTvu/Gj169FB6erqWLVtmj+Xn52vWrFmqVq2aOnfuXKbt+vn5XdXv4OGHH1aVKlU0YcKEYlcWLcu67KodV6LoSuaF29++fbtSU1NLrF+1apXTPcU7duzQ9u3b7d9FnTp11KVLF7377rv63//+V+z1J0+eLHWPY8aMkZ+fnwYPHuy0ikaRb775xl6FpEePHpKk6dOnO9UkJiZKknr27HnR/RTdz75lyxZ7rKCggC8pAa4hriQDcPLpp5/q4MGDys/PV0ZGhlJSUpSUlKSwsDCtXr1aPj4+F33txIkTtWXLFvXs2VNhYWHKzMzUX/7yF918883q0KGDpF//sg8MDNTcuXNVvXp1+fn5KSIiQuHh4WXqt2bNmurQoYMGDhyojIwMTZ8+XY0aNdKQIUPsmsGDB2vFihW677779Mgjj+ibb77R4sWLnT5IV9reHnjgAXXt2lUvv/yyjh07ppYtW2r9+vX6+OOPNWLEiGLbLquhQ4fq3Xff1YABA5SWlqb69etrxYoV2rp1q6ZPn37FHxoztW7dWhs2bFBiYqJCQkIUHh7u9IGuy2nYsKFef/11xcfH69ixY4qJiVH16tV19OhRrVy5UkOHDtXzzz9/2e289957Ja4X/eyzz6pXr1766KOP9Pvf/149e/bU0aNHNXfuXDVr1kw5OTnFXtOoUSN16NBBTz/9tHJzczV9+nTVqlVLY8aMsWvmzJmjDh06qEWLFhoyZIgaNGigjIwMpaam6vvvv9fu3buv+GdQ9HNYunSpHn30UTVt2tTpG/e2bdumDz/80F6DumXLlurfv7/mzZunrKwsde7cWTt27NCCBQsUExNzyeUOmzdvrnbt2ik+Pl6nTp1SzZo19cEHHyg/P79U/QIoBdcsqgGgsilaYqro4eXlZQUHB1u/+93vrBkzZjgtNVbEXAIuOTnZeuihh6yQkBDLy8vLCgkJsR577DHr66+/dnrdxx9/bDVr1szy8PBwWn6qc+fOVvPmzUvs72JLwP3jH/+w4uPjrTp16li+vr5Wz549rW+//bbY66dOnWrVq1fP8vb2ttq3b299+eWXxbZ5qd7MJeAsy7LOnDljjRw50goJCbE8PT2txo0bW1OmTLEKCwud6iRZcXFxxXq62NJ0poyMDGvgwIFW7dq1LS8vL6tFixYlLlNXmiXgDh48aHXq1Mny9fW1JNl9FP1OzWX8LrYE2f/7f//P6tChg+Xn52f5+flZt912mxUXF2cdOnTokvs3zzfzcfz4cauwsNB64403rLCwMMvb29u68847rTVr1hT7XRQtATdlyhRr6tSpVmhoqOXt7W117NjR2r17d7F9f/PNN1a/fv2s4OBgy9PT06pXr57Vq1cva8WKFXbNlSwBd6Gvv/7aGjJkiFW/fn3Ly8vLql69utW+fXtr1qxZTsvNnT9/3powYYIVHh5ueXp6WqGhoVZ8fLxTjWUVP9+L+o6KirK8vb2toKAg66WXXrKSkpJKXAKupD9HFzs/zPOztOcAcKNysyzuwAcAAAAuxD3JAAAAgIGQDAAAABgIyQAAAICBkAwAAAAYCMkAAACAgZAMAAAAGPgykXJSWFioEydOqHr16uX+tbsAAAC4epZl6cyZMwoJCZG7+6WvFROSy8mJEycUGhrq6jYAAABwGcePH9fNN998yRpCcjkp+mrY48ePy9/f38XdAAAAwORwOBQaGmrntkshJJeTolss/P39CckAAACV2JXcGssH9wAAAAADIRkAAAAwEJIBAAAAAyEZAAAAMBCSAQAAAAMhGQAAADAQkgEAAAADIRkAAAAwuDQkb9myRQ888IBCQkLk5uamVatWOc1blqVx48apbt268vX1VVRUlA4fPuxUc+rUKfXt21f+/v4KDAzUoEGDlJOT41SzZ88edezYUT4+PgoNDdXkyZOL9fLhhx/qtttuk4+Pj1q0aKFPPvmk3I8XAAAA1weXhuSzZ8+qZcuWmjNnTonzkydP1syZMzV37lxt375dfn5+io6O1rlz5+yavn37av/+/UpKStKaNWu0ZcsWDR061J53OBzq3r27wsLClJaWpilTpmj8+PGaN2+eXbNt2zY99thjGjRokP79738rJiZGMTEx2rdv37U7eAAAAFRabpZlWa5uQvr16wFXrlypmJgYSb9eRQ4JCdFzzz2n559/XpKUnZ2toKAgzZ8/X7GxsTpw4ICaNWumnTt3qk2bNpKkdevWqUePHvr+++8VEhKid955Ry+//LLS09Pl5eUlSXrxxRe1atUqHTx4UJL06KOP6uzZs1qzZo3dT7t27dSqVSvNnTv3ivp3OBwKCAhQdnY2X0sNAABQCZUmr1Xae5KPHj2q9PR0RUVF2WMBAQGKiIhQamqqJCk1NVWBgYF2QJakqKgoubu7a/v27XZNp06d7IAsSdHR0Tp06JBOnz5t11y4n6Kaov2UJDc3Vw6Hw+kBAACAG0OlDcnp6emSpKCgIKfxoKAgey49PV116tRxmvfw8FDNmjWdakraxoX7uFhN0XxJEhISFBAQYD9CQ0NLe4gAAACopCptSK7s4uPjlZ2dbT+OHz/u6pYAAABQTiptSA4ODpYkZWRkOI1nZGTYc8HBwcrMzHSaz8/P16lTp5xqStrGhfu4WE3RfEm8vb3l7+/v9AAAAMCNodKG5PDwcAUHBys5Odkeczgc2r59uyIjIyVJkZGRysrKUlpaml2TkpKiwsJCRURE2DVbtmzR+fPn7ZqkpCQ1adJENWrUsGsu3E9RTdF+AAAA8Nvi0pCck5OjXbt2adeuXZJ+/bDerl279N1338nNzU0jRozQ66+/rtWrV2vv3r3q16+fQkJC7BUwmjZtqvvuu09DhgzRjh07tHXrVg0bNkyxsbEKCQmRJD3++OPy8vLSoEGDtH//fi1btkwzZszQqFGj7D6effZZrVu3TlOnTtXBgwc1fvx4ffnllxo2bFhF/0gAAABQGVgutHHjRktSsUf//v0ty7KswsJCa+zYsVZQUJDl7e1tdevWzTp06JDTNn766Sfrscces6pVq2b5+/tbAwcOtM6cOeNUs3v3bqtDhw6Wt7e3Va9ePevNN98s1svy5cutW2+91fLy8rKaN29urV27tlTHkp2dbUmysrOzS/dDAAAAQIUoTV6rNOskX+9YJxkAAKByK01e86ignnCNtR690NUtALhG0qb0c3ULAPCbU2k/uAcAAAC4CiEZAAAAMBCSAQAAAAMhGQAAADAQkgEAAAADIRkAAAAwEJIBAAAAAyEZAAAAMBCSAQAAAAMhGQAAADAQkgEAAAADIRkAAAAwEJIBAAAAAyEZAAAAMBCSAQAAAAMhGQAAADAQkgEAAAADIRkAAAAwEJIBAAAAAyEZAAAAMBCSAQAAAAMhGQAAADAQkgEAAAADIRkAAAAwEJIBAAAAAyEZAAAAMBCSAQAAAAMhGQAAADAQkgEAAAADIRkAAAAwEJIBAAAAAyEZAAAAMBCSAQAAAAMhGQAAADAQkgEAAAADIRkAAAAwEJIBAAAAAyEZAAAAMBCSAQAAAAMhGQAAADAQkgEAAAADIRkAAAAwEJIBAAAAAyEZAAAAMBCSAQAAAAMhGQAAADAQkgEAAAADIRkAAAAwEJIBAAAAAyEZAAAAMBCSAQAAAAMhGQAAADAQkgEAAAADIRkAAAAwEJIBAAAAAyEZAAAAMBCSAQAAAAMhGQAAADAQkgEAAAADIRkAAAAwEJIBAAAAAyEZAAAAMBCSAQAAAAMhGQAAADAQkgEAAAADIRkAAAAwEJIBAAAAAyEZAAAAMBCSAQAAAAMhGQAAADAQkgEAAAADIRkAAAAwEJIBAAAAAyEZAAAAMBCSAQAAAAMhGQAAADAQkgEAAABDpQ7JBQUFGjt2rMLDw+Xr66uGDRvqtddek2VZdo1lWRo3bpzq1q0rX19fRUVF6fDhw07bOXXqlPr27St/f38FBgZq0KBBysnJcarZs2ePOnbsKB8fH4WGhmry5MkVcowAAACofCp1SH7rrbf0zjvvaPbs2Tpw4IDeeustTZ48WbNmzbJrJk+erJkzZ2ru3Lnavn27/Pz8FB0drXPnztk1ffv21f79+5WUlKQ1a9Zoy5YtGjp0qD3vcDjUvXt3hYWFKS0tTVOmTNH48eM1b968Cj1eAAAAVA4erm7gUrZt26aHHnpIPXv2lCTVr19f//jHP7Rjxw5Jv15Fnj59ul555RU99NBDkqSFCxcqKChIq1atUmxsrA4cOKB169Zp586datOmjSRp1qxZ6tGjh95++22FhIRoyZIlysvL03vvvScvLy81b95cu3btUmJiolOYBgAAwG9Dpb6SfM899yg5OVlff/21JGn37t36/PPPdf/990uSjh49qvT0dEVFRdmvCQgIUEREhFJTUyVJqampCgwMtAOyJEVFRcnd3V3bt2+3azp16iQvLy+7Jjo6WocOHdLp06dL7C03N1cOh8PpAQAAgBtDpb6S/OKLL8rhcOi2225TlSpVVFBQoEmTJqlv376SpPT0dElSUFCQ0+uCgoLsufT0dNWpU8dp3sPDQzVr1nSqCQ8PL7aNorkaNWoU6y0hIUETJkwoh6MEAABAZVOpryQvX75cS5Ys0dKlS/XVV19pwYIFevvtt7VgwQJXt6b4+HhlZ2fbj+PHj7u6JQAAAJSTSn0lefTo0XrxxRcVGxsrSWrRooW+/fZbJSQkqH///goODpYkZWRkqG7duvbrMjIy1KpVK0lScHCwMjMznbabn5+vU6dO2a8PDg5WRkaGU03R86Iak7e3t7y9va/+IAEAAFDpVOoryT///LPc3Z1brFKligoLCyVJ4eHhCg4OVnJysj3vcDi0fft2RUZGSpIiIyOVlZWltLQ0uyYlJUWFhYWKiIiwa7Zs2aLz58/bNUlJSWrSpEmJt1oAAADgxlapQ/IDDzygSZMmae3atTp27JhWrlypxMRE/f73v5ckubm5acSIEXr99de1evVq7d27V/369VNISIhiYmIkSU2bNtV9992nIUOGaMeOHdq6dauGDRum2NhYhYSESJIef/xxeXl5adCgQdq/f7+WLVumGTNmaNSoUa46dAAAALhQpb7dYtasWRo7dqz+/Oc/KzMzUyEhIXryySc1btw4u2bMmDE6e/ashg4dqqysLHXo0EHr1q2Tj4+PXbNkyRINGzZM3bp1k7u7u3r37q2ZM2fa8wEBAVq/fr3i4uLUunVr1a5dW+PGjWP5NwAAgN8oN+vCr69DmTkcDgUEBCg7O1v+/v4Vvv/WoxdW+D4BVIy0Kf1c3QIA3BBKk9cq9e0WAAAAgCsQkgEAAAADIRkAAAAwEJIBAAAAAyEZAAAAMBCSAQAAAAMhGQAAADAQkgEAAAADIRkAAAAwEJIBAAAAAyEZAAAAMBCSAQAAAIOHqxsAAKAkrUcvdHULAK6RtCn9XN3CZXElGQAAADAQkgEAAAADIRkAAAAwEJIBAAAAAyEZAAAAMBCSAQAAAAMhGQAAADAQkgEAAAADIRkAAAAwEJIBAAAAAyEZAAAAMBCSAQAAAAMhGQAAADAQkgEAAAADIRkAAAAwEJIBAAAAAyEZAAAAMBCSAQAAAAMhGQAAADAQkgEAAAADIRkAAAAwEJIBAAAAAyEZAAAAMBCSAQAAAAMhGQAAADAQkgEAAAADIRkAAAAwEJIBAAAAAyEZAAAAMBCSAQAAAAMhGQAAADAQkgEAAAADIRkAAAAwEJIBAAAAAyEZAAAAMBCSAQAAAAMhGQAAADAQkgEAAAADIRkAAAAwEJIBAAAAAyEZAAAAMBCSAQAAAAMhGQAAADAQkgEAAAADIRkAAAAwEJIBAAAAAyEZAAAAMBCSAQAAAAMhGQAAADAQkgEAAAADIRkAAAAwEJIBAAAAAyEZAAAAMBCSAQAAAAMhGQAAADAQkgEAAAADIRkAAAAwEJIBAAAAAyEZAAAAMBCSAQAAAAMhGQAAADAQkgEAAAADIRkAAAAwEJIBAAAAAyEZAAAAMBCSAQAAAEOlD8k//PCD/vjHP6pWrVry9fVVixYt9OWXX9rzlmVp3Lhxqlu3rnx9fRUVFaXDhw87bePUqVPq27ev/P39FRgYqEGDBiknJ8epZs+ePerYsaN8fHwUGhqqyZMnV8jxAQAAoPKp1CH59OnTat++vTw9PfXpp5/qP//5j6ZOnaoaNWrYNZMnT9bMmTM1d+5cbd++XX5+foqOjta5c+fsmr59+2r//v1KSkrSmjVrtGXLFg0dOtSedzgc6t69u8LCwpSWlqYpU6Zo/PjxmjdvXoUeLwAAACoHD1c3cClvvfWWQkND9f7779tj4eHh9n9blqXp06frlVde0UMPPSRJWrhwoYKCgrRq1SrFxsbqwIEDWrdunXbu3Kk2bdpIkmbNmqUePXro7bffVkhIiJYsWaK8vDy999578vLyUvPmzbVr1y4lJiY6hekL5ebmKjc3137ucDiuxY8AAAAALlCprySvXr1abdq00R/+8AfVqVNHd955p/7617/a80ePHlV6erqioqLssYCAAEVERCg1NVWSlJqaqsDAQDsgS1JUVJTc3d21fft2u6ZTp07y8vKya6Kjo3Xo0CGdPn26xN4SEhIUEBBgP0JDQ8v12AEAAOA6lTok//e//9U777yjxo0b67PPPtPTTz+tZ555RgsWLJAkpaenS5KCgoKcXhcUFGTPpaenq06dOk7zHh4eqlmzplNNSdu4cB+m+Ph4ZWdn24/jx49f5dECAACgsqjUt1sUFhaqTZs2euONNyRJd955p/bt26e5c+eqf//+Lu3N29tb3t7eLu0BAAAA10alvpJct25dNWvWzGmsadOm+u677yRJwcHBkqSMjAynmoyMDHsuODhYmZmZTvP5+fk6deqUU01J27hwHwAAAPjtqNQhuX379jp06JDT2Ndff62wsDBJv36ILzg4WMnJyfa8w+HQ9u3bFRkZKUmKjIxUVlaW0tLS7JqUlBQVFhYqIiLCrtmyZYvOnz9v1yQlJalJkyZOK2kAAADgt6FSh+SRI0fqiy++0BtvvKEjR45o6dKlmjdvnuLi4iRJbm5uGjFihF5//XWtXr1ae/fuVb9+/RQSEqKYmBhJv155vu+++zRkyBDt2LFDW7du1bBhwxQbG6uQkBBJ0uOPPy4vLy8NGjRI+/fv17JlyzRjxgyNGjXKVYcOAAAAF6rU9yS3bdtWK1euVHx8vCZOnKjw8HBNnz5dffv2tWvGjBmjs2fPaujQocrKylKHDh20bt06+fj42DVLlizRsGHD1K1bN7m7u6t3796aOXOmPR8QEKD169crLi5OrVu3Vu3atTVu3LiLLv8GAACAG5ubZVmWq5u4ETgcDgUEBCg7O1v+/v4Vvv/WoxdW+D4BVIy0Kf1c3YJL8L4G3Lhc9b5WmrxWqW+3AAAAAFyBkAwAAAAYCMkAAACAgZAMAAAAGAjJAAAAgIGQDAAAABgIyQAAAICBkAwAAAAYCMkAAACAgZAMAAAAGAjJAAAAgIGQDAAAABjKFJIbNGign376qdh4VlaWGjRocNVNAQAAAK5UppB87NgxFRQUFBvPzc3VDz/8cNVNAQAAAK7kUZri1atX2//92WefKSAgwH5eUFCg5ORk1a9fv9yaAwAAAFyhVCE5JiZGkuTm5qb+/fs7zXl6eqp+/fqaOnVquTUHAAAAuEKpQnJhYaEkKTw8XDt37lTt2rWvSVMAAACAK5UqJBc5evRoefcBAAAAVBplCsmSlJycrOTkZGVmZtpXmIu89957V90YAAAA4CplCskTJkzQxIkT1aZNG9WtW1dubm7l3RcAAADgMmUKyXPnztX8+fP1xBNPlHc/AAAAgMuVaZ3kvLw83XPPPeXdCwAAAFAplCkkDx48WEuXLi3vXgAAAIBKoUy3W5w7d07z5s3Thg0bdMcdd8jT09NpPjExsVyaAwAAAFyhTCF5z549atWqlSRp3759TnN8iA8AAADXuzKF5I0bN5Z3HwAAAEClUaZ7kgEAAIAbWZmuJHft2vWSt1WkpKSUuSEAAADA1coUkovuRy5y/vx57dq1S/v27VP//v3Loy8AAADAZcoUkqdNm1bi+Pjx45WTk3NVDQEAAACuVq73JP/xj3/Ue++9V56bBAAAACpcuYbk1NRU+fj4lOcmAQAAgApXptstHn74YafnlmXpf//7n7788kuNHTu2XBoDAAAAXKVMITkgIMDpubu7u5o0aaKJEyeqe/fu5dIYAAAA4CplCsnvv/9+efcBAAAAVBplCslF0tLSdODAAUlS8+bNdeedd5ZLUwAAAIArlSkkZ2ZmKjY2Vps2bVJgYKAkKSsrS127dtUHH3ygm266qTx7BAAAACpUmVa3GD58uM6cOaP9+/fr1KlTOnXqlPbt2yeHw6FnnnmmvHsEAAAAKlSZriSvW7dOGzZsUNOmTe2xZs2aac6cOXxwDwAAANe9Ml1JLiwslKenZ7FxT09PFRYWXnVTAAAAgCuVKSTfe++9evbZZ3XixAl77IcfftDIkSPVrVu3cmsOAAAAcIUyheTZs2fL4XCofv36atiwoRo2bKjw8HA5HA7NmjWrvHsEAAAAKlSZ7kkODQ3VV199pQ0bNujgwYOSpKZNmyoqKqpcmwMAAABcoVRXklNSUtSsWTM5HA65ubnpd7/7nYYPH67hw4erbdu2at68uf71r39dq14BAACAClGqkDx9+nQNGTJE/v7+xeYCAgL05JNPKjExsdyaAwAAAFyhVCF59+7duu+++y463717d6WlpV11UwAAAIArlSokZ2RklLj0WxEPDw+dPHnyqpsCAAAAXKlUIblevXrat2/fRef37NmjunXrXnVTAAAAgCuVKiT36NFDY8eO1blz54rN/fLLL3r11VfVq1evcmsOAAAAcIVSLQH3yiuv6KOPPtKtt96qYcOGqUmTJpKkgwcPas6cOSooKNDLL798TRoFAAAAKkqpQnJQUJC2bdump59+WvHx8bIsS5Lk5uam6OhozZkzR0FBQdekUQAAAKCilPrLRMLCwvTJJ5/o9OnTOnLkiCzLUuPGjVWjRo1r0R8AAABQ4cr0jXuSVKNGDbVt27Y8ewEAAAAqhVJ9cA8AAAD4LSAkAwAAAAZCMgAAAGAgJAMAAAAGQjIAAABgICQDAAAABkIyAAAAYCAkAwAAAAZCMgAAAGAgJAMAAAAGQjIAAABgICQDAAAABkIyAAAAYCAkAwAAAAZCMgAAAGAgJAMAAAAGQjIAAABgICQDAAAABkIyAAAAYCAkAwAAAAZCMgAAAGAgJAMAAAAGQjIAAABgICQDAAAABkIyAAAAYLiuQvKbb74pNzc3jRgxwh47d+6c4uLiVKtWLVWrVk29e/dWRkaG0+u+++479ezZU1WrVlWdOnU0evRo5efnO9Vs2rRJd911l7y9vdWoUSPNnz+/Ao4IAAAAldF1E5J37typd999V3fccYfT+MiRI/XPf/5TH374oTZv3qwTJ07o4YcftucLCgrUs2dP5eXladu2bVqwYIHmz5+vcePG2TVHjx5Vz5491bVrV+3atUsjRozQ4MGD9dlnn1XY8QEAAKDyuC5Cck5Ojvr27au//vWvqlGjhj2enZ2tv//970pMTNS9996r1q1b6/3339e2bdv0xRdfSJLWr1+v//znP1q8eLFatWql+++/X6+99prmzJmjvLw8SdLcuXMVHh6uqVOnqmnTpho2bJj69OmjadOmueR4AQAA4FrXRUiOi4tTz549FRUV5TSelpam8+fPO43fdtttuuWWW5SamipJSk1NVYsWLRQUFGTXREdHy+FwaP/+/XaNue3o6Gh7GyXJzc2Vw+FwegAAAODG4OHqBi7ngw8+0FdffaWdO3cWm0tPT5eXl5cCAwOdxoOCgpSenm7XXBiQi+aL5i5V43A49Msvv8jX17fYvhMSEjRhwoQyHxcAAAAqr0p9Jfn48eN69tlntWTJEvn4+Li6HSfx8fHKzs62H8ePH3d1SwAAACgnlTokp6WlKTMzU3fddZc8PDzk4eGhzZs3a+bMmfLw8FBQUJDy8vKUlZXl9LqMjAwFBwdLkoKDg4utdlH0/HI1/v7+JV5FliRvb2/5+/s7PQAAAHBjqNQhuVu3btq7d6927dplP9q0aaO+ffva/+3p6ank5GT7NYcOHdJ3332nyMhISVJkZKT27t2rzMxMuyYpKUn+/v5q1qyZXXPhNopqirYBAACA35ZKfU9y9erVdfvttzuN+fn5qVatWvb4oEGDNGrUKNWsWVP+/v4aPny4IiMj1a5dO0lS9+7d1axZMz3xxBOaPHmy0tPT9corryguLk7e3t6SpKeeekqzZ8/WmDFj9Kc//UkpKSlavny51q5dW7EHDAAAgEqhUofkKzFt2jS5u7urd+/eys3NVXR0tP7yl7/Y81WqVNGaNWv09NNPKzIyUn5+furfv78mTpxo14SHh2vt2rUaOXKkZsyYoZtvvll/+9vfFB0d7YpDAgAAgIu5WZZlubqJG4HD4VBAQICys7Ndcn9y69ELK3yfACpG2pR+rm7BJXhfA25crnpfK01eq9T3JAMAAACuQEgGAAAADIRkAAAAwEBIBgAAAAyEZAAAAMBASAYAAAAMhGQAAADAQEgGAAAADIRkAAAAwEBIBgAAAAyEZAAAAMBASAYAAAAMhGQAAADAQEgGAAAADIRkAAAAwEBIBgAAAAyEZAAAAMBASAYAAAAMhGQAAADAQEgGAAAADIRkAAAAwEBIBgAAAAyEZAAAAMBASAYAAAAMhGQAAADAQEgGAAAADIRkAAAAwEBIBgAAAAyEZAAAAMBASAYAAAAMhGQAAADAQEgGAAAADIRkAAAAwEBIBgAAAAyEZAAAAMBASAYAAAAMhGQAAADAQEgGAAAADIRkAAAAwEBIBgAAAAyEZAAAAMBASAYAAAAMhGQAAADAQEgGAAAADIRkAAAAwEBIBgAAAAyEZAAAAMBASAYAAAAMhGQAAADAQEgGAAAADIRkAAAAwEBIBgAAAAyEZAAAAMBASAYAAAAMhGQAAADAQEgGAAAADIRkAAAAwEBIBgAAAAyEZAAAAMBASAYAAAAMhGQAAADAQEgGAAAADIRkAAAAwEBIBgAAAAyEZAAAAMBASAYAAAAMhGQAAADAQEgGAAAADIRkAAAAwEBIBgAAAAyEZAAAAMBASAYAAAAMhGQAAADAQEgGAAAADIRkAAAAwEBIBgAAAAyEZAAAAMBASAYAAAAMlTokJyQkqG3btqpevbrq1KmjmJgYHTp0yKnm3LlziouLU61atVStWjX17t1bGRkZTjXfffedevbsqapVq6pOnToaPXq08vPznWo2bdqku+66S97e3mrUqJHmz59/rQ8PAAAAlVSlDsmbN29WXFycvvjiCyUlJen8+fPq3r27zp49a9eMHDlS//znP/Xhhx9q8+bNOnHihB5++GF7vqCgQD179lReXp62bdumBQsWaP78+Ro3bpxdc/ToUfXs2VNdu3bVrl27NGLECA0ePFifffZZhR4vAAAAKgc3y7IsVzdxpU6ePKk6depo8+bN6tSpk7Kzs3XTTTdp6dKl6tOnjyTp4MGDatq0qVJTU9WuXTt9+umn6tWrl06cOKGgoCBJ0ty5c/XCCy/o5MmT8vLy0gsvvKC1a9dq37599r5iY2OVlZWldevWXVFvDodDAQEBys7Olr+/f/kf/GW0Hr2wwvcJoGKkTenn6hZcgvc14Mblqve10uS1Sn0l2ZSdnS1JqlmzpiQpLS1N58+fV1RUlF1z22236ZZbblFqaqokKTU1VS1atLADsiRFR0fL4XBo//79ds2F2yiqKdpGSXJzc+VwOJweAAAAuDFcNyG5sLBQI0aMUPv27XX77bdLktLT0+Xl5aXAwECn2qCgIKWnp9s1FwbkovmiuUvVOBwO/fLLLyX2k5CQoICAAPsRGhp61ccIAACAyuG6CclxcXHat2+fPvjgA1e3IkmKj49Xdna2/Th+/LirWwIAAEA58XB1A1di2LBhWrNmjbZs2aKbb77ZHg8ODlZeXp6ysrKcriZnZGQoODjYrtmxY4fT9opWv7iwxlwRIyMjQ/7+/vL19S2xJ29vb3l7e1/1sQEAAKDyqdRXki3L0rBhw7Ry5UqlpKQoPDzcab5169by9PRUcnKyPXbo0CF99913ioyMlCRFRkZq7969yszMtGuSkpLk7++vZs2a2TUXbqOopmgbAAAA+G2p1FeS4+LitHTpUn388ceqXr26fQ9xQECAfH19FRAQoEGDBmnUqFGqWbOm/P39NXz4cEVGRqpdu3aSpO7du6tZs2Z64oknNHnyZKWnp+uVV15RXFycfSX4qaee0uzZszVmzBj96U9/UkpKipYvX661a9e67NgBAADgOpX6SvI777yj7OxsdenSRXXr1rUfy5Yts2umTZumXr16qXfv3urUqZOCg4P10Ucf2fNVqlTRmjVrVKVKFUVGRuqPf/yj+vXrp4kTJ9o14eHhWrt2rZKSktSyZUtNnTpVf/vb3xQdHV2hxwsAAIDKoVJfSb6SJZx9fHw0Z84czZkz56I1YWFh+uSTTy65nS5duujf//53qXsEAADAjadSX0kGAAAAXIGQDAAAABgIyQAAAICBkAwAAAAYCMkAAACAgZAMAAAAGAjJAAAAgIGQDAAAABgIyQAAAICBkAwAAAAYCMkAAACAgZAMAAAAGAjJAAAAgIGQDAAAABgIyQAAAICBkAwAAAAYCMkAAACAgZAMAAAAGAjJAAAAgIGQDAAAABgIyQAAAICBkAwAAAAYCMkAAACAgZAMAAAAGAjJAAAAgIGQDAAAABgIyQAAAICBkAwAAAAYCMkAAACAgZAMAAAAGAjJAAAAgIGQDAAAABgIyQAAAICBkAwAAAAYCMkAAACAgZAMAAAAGAjJAAAAgIGQDAAAABgIyQAAAICBkAwAAAAYCMkAAACAgZAMAAAAGAjJAAAAgIGQDAAAABgIyQAAAICBkAwAAAAYCMkAAACAgZAMAAAAGAjJAAAAgIGQDAAAABgIyQAAAICBkAwAAAAYCMkAAACAgZAMAAAAGAjJAAAAgIGQDAAAABgIyQAAAICBkAwAAAAYCMkAAACAgZAMAAAAGAjJAAAAgIGQDAAAABgIyQAAAICBkAwAAAAYCMkAAACAgZAMAAAAGAjJAAAAgIGQDAAAABgIyQAAAICBkAwAAAAYCMkAAACAgZAMAAAAGAjJAAAAgIGQDAAAABgIyQAAAICBkAwAAAAYCMkAAACAgZAMAAAAGAjJAAAAgIGQbJgzZ47q168vHx8fRUREaMeOHa5uCQAAABWMkHyBZcuWadSoUXr11Vf11VdfqWXLloqOjlZmZqarWwMAAEAFIiRfIDExUUOGDNHAgQPVrFkzzZ07V1WrVtV7773n6tYAAABQgTxc3UBlkZeXp7S0NMXHx9tj7u7uioqKUmpqarH63Nxc5ebm2s+zs7MlSQ6H49o3W4KC3F9csl8A156r3ldcjfc14Mblqve1ov1alnXZWkLy//nxxx9VUFCgoKAgp/GgoCAdPHiwWH1CQoImTJhQbDw0NPSa9Qjgtylg1lOubgEAypWr39fOnDmjgICAS9YQkssoPj5eo0aNsp8XFhbq1KlTqlWrltzc3FzYGW50DodDoaGhOn78uPz9/V3dDgBcNd7XUFEsy9KZM2cUEhJy2VpC8v+pXbu2qlSpooyMDKfxjIwMBQcHF6v39vaWt7e301hgYOC1bBFw4u/vz18mAG4ovK+hIlzuCnIRPrj3f7y8vNS6dWslJyfbY4WFhUpOTlZkZKQLOwMAAEBF40ryBUaNGqX+/furTZs2uvvuuzV9+nSdPXtWAwcOdHVrAAAAqECE5As8+uijOnnypMaNG6f09HS1atVK69atK/ZhPsCVvL299eqrrxa73QcArle8r6EycrOuZA0MAAAA4DeEe5IBAAAAAyEZAAAAMBCSAQAAAAMhGQAAADAQkoHrzJw5c1S/fn35+PgoIiJCO3bscHVLAFAmW7Zs0QMPPKCQkBC5ublp1apVrm4JsBGSgevIsmXLNGrUKL366qv66quv1LJlS0VHRyszM9PVrQFAqZ09e1YtW7bUnDlzXN0KUAxLwAHXkYiICLVt21azZ8+W9Ou3QoaGhmr48OF68cUXXdwdAJSdm5ubVq5cqZiYGFe3AkjiSjJw3cjLy1NaWpqioqLsMXd3d0VFRSk1NdWFnQEAcOMhJAPXiR9//FEFBQXFvgEyKChI6enpLuoKAIAbEyEZAAAAMBCSgetE7dq1VaVKFWVkZDiNZ2RkKDg42EVdAQBwYyIkA9cJLy8vtW7dWsnJyfZYYWGhkpOTFRkZ6cLOAAC48Xi4ugEAV27UqFHq37+/2rRpo7vvvlvTp0/X2bNnNXDgQFe3BgCllpOToyNHjtjPjx49ql27dqlmzZq65ZZbXNgZwBJwwHVn9uzZmjJlitLT09WqVSvNnDlTERERrm4LAEpt06ZN6tq1a7Hx/v37a/78+RXfEHABQjIAAABg4J5kAAAAwEBIBgAAAAyEZAAAAMBASAYAAAAMhGQAAADAQEgGAAAADIRkAAAAwEBIBgAAAAyEZABAMfPnz1dgYOBVb8fNzU2rVq266u0AQEUjJAPADWrAgAGKiYlxdRsAcF0iJAMAAAAGQjIA/AYlJiaqRYsW8vPzU2hoqP785z8rJyenWN2qVavUuHFj+fj4KDo6WsePH3ea//jjj3XXXXfJx8dHDRo00IQJE5Sfn19RhwEA1wwhGQB+g9zd3TVz5kzt379fCxYsUEpKisaMGeNU8/PPP2vSpElauHChtm7dqqysLMXGxtrz//rXv9SvXz89++yz+s9//qN3331X8+fP16RJkyr6cACg3LlZlmW5ugkAQPkbMGCAsrKyruiDcytWrNBTTz2lH3/8UdKvH9wbOHCgvvjiC0VEREiSDh48qKZNm2r79u26++67FRUVpW7duik+Pt7ezuLFizVmzBidOHFC0q8f3Fu5ciX3RgO47ni4ugEAQMXbsGGDEhISdPDgQTkcDuXn5+vcuXP6+eefVbVqVUmSh4eH2rZta7/mtttuU2BgoA4cOKC7775bu3fv1tatW52uHBcUFBTbDgBcjwjJAPAbc+zYMfXq1UtPP/20Jk2apJo1a+rzzz/XoEGDlJeXd8XhNicnRxMmTNDDDz9cbM7Hx6e82waACkVIBoDfmLS0NBUWFmrq1Klyd//1oynLly8vVpefn68vv/xSd999tyTp0KFDysrKUtOmTSVJd911lw4dOqRGjRpVXPMAUEEIyQBwA8vOztauXbucxmrXrq3z589r1qxZeuCBB7R161bNnTu32Gs9PT01fPhwzZw5Ux4eHho2bJjatWtnh+Zx48apV69euuWWW9SnTx+5u7tr9+7d2rdvn15//fWKODwAuGZY3QIAbmCbNm3SnXfe6fRYtGiREhMT9dZbb+n222/XkiVLlJCQUOy1VatW1QsvvKDHH39c7du3V7Vq1bRs2TJ7Pjo6WmvWrNH69evVtm1btWvXTtOmTVNYWFhFHiIAXBOsbgEAAAAYuJIMAAAAGAjJAAAAgIGQDAAAABgIyQAAAICBkAwAAAAYCMkAAACAgZAMAAAAGAjJAAAAgIGQDAAAABgIyQAAAICBkAwAAAAY/j8efSUp2XpE+gAAAABJRU5ErkJggg==\n"
+          },
+          "metadata": {}
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "import re\n",
+        "import nltk\n",
+        "nltk.download('stopwords')"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "TC5g2Y6zTdkG",
+        "outputId": "6692619c-1712-4c36-b6b5-95541a11665c"
+      },
+      "execution_count": 6,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            "[nltk_data] Downloading package stopwords to /root/nltk_data...\n",
+            "[nltk_data]   Unzipping corpora/stopwords.zip.\n"
+          ]
+        },
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/plain": [
+              "True"
+            ]
+          },
+          "metadata": {},
+          "execution_count": 6
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "from nltk.corpus import stopwords\n",
+        "from nltk.stem import PorterStemmer\n",
+        "ps = PorterStemmer()"
+      ],
+      "metadata": {
+        "id": "qhClmm5EUCCK"
+      },
+      "execution_count": 7,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "corpus=[]\n",
+        "for i in range (0,len(df)):\n",
+        "  review = re.sub('[^a-zA-z]',' ',df['text'][i])\n",
+        "  review = review.lower()\n",
+        "  review = review.split()\n",
+        "  review = [ps.stem(word) for word in review if not word in stopwords.words('english')]\n",
+        "  review = ' '.join(review)\n",
+        "  corpus.append(review)"
+      ],
+      "metadata": {
+        "id": "aIVAQXzhVDOs"
+      },
+      "execution_count": 8,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "# **Create Bag of Words**"
+      ],
+      "metadata": {
+        "id": "ipsL5hyDWl3X"
+      }
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "from sklearn.feature_extraction.text import CountVectorizer\n",
+        "cv = CountVectorizer(max_features=2500,ngram_range=(1,2))\n",
+        "X = cv.fit_transform(corpus).toarray()\n"
+      ],
+      "metadata": {
+        "id": "esUtMy6CWYdi"
+      },
+      "execution_count": 9,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "cv.vocabulary_"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "yPEaAou0aO3o",
+        "outputId": "07300953-4837-4413-9cb7-e170bd9ab405"
+      },
+      "execution_count": 10,
+      "outputs": [
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/plain": [
+              "{'user': np.int64(2369),\n",
+              " 'id': np.int64(1130),\n",
+              " 'origin': np.int64(1610),\n",
+              " 'messag': np.int64(1442),\n",
+              " 'bna': np.int64(257),\n",
+              " 'highlight': np.int64(1096),\n",
+              " 'sent': np.int64(2023),\n",
+              " 'thursday': np.int64(2262),\n",
+              " 'june': np.int64(1260),\n",
+              " 'escapenumb': np.int64(746),\n",
+              " 'pm': np.int64(1716),\n",
+              " 'subject': np.int64(2179),\n",
+              " 'inc': np.int64(1152),\n",
+              " 'daili': np.int64(528),\n",
+              " 'labor': np.int64(1286),\n",
+              " 'report': np.int64(1900),\n",
+              " 'tabl': np.int64(2211),\n",
+              " 'content': np.int64(465),\n",
+              " 'regist': np.int64(1874),\n",
+              " 'web': np.int64(2413),\n",
+              " 'subscrib': np.int64(2183),\n",
+              " 'access': np.int64(8),\n",
+              " 'full': np.int64(1019),\n",
+              " 'text': np.int64(2246),\n",
+              " 'articl': np.int64(140),\n",
+              " 'use': np.int64(2365),\n",
+              " 'url': np.int64(2359),\n",
+              " 'link': np.int64(1333),\n",
+              " 'suppli': np.int64(2200),\n",
+              " 'inform': np.int64(1169),\n",
+              " 'becom': np.int64(208),\n",
+              " 'sign': np.int64(2068),\n",
+              " 'free': np.int64(1009),\n",
+              " 'trial': np.int64(2313),\n",
+              " 'avail': np.int64(173),\n",
+              " 'http': np.int64(1123),\n",
+              " 'com': np.int64(400),\n",
+              " 'call': np.int64(300),\n",
+              " 'custom': np.int64(522),\n",
+              " 'relat': np.int64(1881),\n",
+              " 'mon': np.int64(1481),\n",
+              " 'fri': np.int64(1011),\n",
+              " 'et': np.int64(870),\n",
+              " 'decis': np.int64(561),\n",
+              " 'conduct': np.int64(437),\n",
+              " 'polici': np.int64(1719),\n",
+              " 'california': np.int64(299),\n",
+              " 'firm': np.int64(976),\n",
+              " 'two': np.int64(2327),\n",
+              " 'bar': np.int64(188),\n",
+              " 'languag': np.int64(1288),\n",
+              " 'restrict': np.int64(1920),\n",
+              " 'solicit': np.int64(2092),\n",
+              " 'distribut': np.int64(617),\n",
+              " 'constitut': np.int64(454),\n",
+              " 'practic': np.int64(1733),\n",
+              " 'district': np.int64(619),\n",
+              " 'rule': np.int64(1959),\n",
+              " 'nation': np.int64(1511),\n",
+              " 'board': np.int64(260),\n",
+              " 'held': np.int64(1085),\n",
+              " 'transport': np.int64(2305),\n",
+              " 'employe': np.int64(694),\n",
+              " 'right': np.int64(1937),\n",
+              " 'act': np.int64(16),\n",
+              " 'order': np.int64(1607),\n",
+              " 'second': np.int64(1990),\n",
+              " 'elect': np.int64(680),\n",
+              " 'determin': np.int64(594),\n",
+              " 'whether': np.int64(2432),\n",
+              " 'calif': np.int64(298),\n",
+              " 'want': np.int64(2403),\n",
+              " 'repres': np.int64(1903),\n",
+              " 'trade': np.int64(2294),\n",
+              " 'northern': np.int64(1551),\n",
+              " 'court': np.int64(506),\n",
+              " 'find': np.int64(973),\n",
+              " 'consid': np.int64(450),\n",
+              " 'new': np.int64(1528),\n",
+              " 'final': np.int64(968),\n",
+              " 'view': np.int64(2390),\n",
+              " 'actual': np.int64(21),\n",
+              " 'judg': np.int64(1255),\n",
+              " 'legal': np.int64(1314),\n",
+              " 'appli': np.int64(119),\n",
+              " 'work': np.int64(2463),\n",
+              " 'time': np.int64(2271),\n",
+              " 'place': np.int64(1696),\n",
+              " 'across': np.int64(15),\n",
+              " 'cannot': np.int64(310),\n",
+              " 'said': np.int64(1965),\n",
+              " 'union': np.int64(2348),\n",
+              " 'effort': np.int64(675),\n",
+              " 'protect': np.int64(1795),\n",
+              " 'activ': np.int64(20),\n",
+              " 'page': np.int64(1628),\n",
+              " 'aa': np.int64(0),\n",
+              " 'pub': np.int64(1803),\n",
+              " 'ip': np.int64(1219),\n",
+              " 'dlr': np.int64(621),\n",
+              " 'nsf': np.int64(1563),\n",
+              " 'escapelong': np.int64(742),\n",
+              " 'attack': np.int64(159),\n",
+              " 'former': np.int64(998),\n",
+              " 'servic': np.int64(2041),\n",
+              " 'worker': np.int64(2464),\n",
+              " 'power': np.int64(1729),\n",
+              " 'compani': np.int64(422),\n",
+              " 'fire': np.int64(975),\n",
+              " 'like': np.int64(1329),\n",
+              " 'stem': np.int64(2152),\n",
+              " 'head': np.int64(1079),\n",
+              " 'injuri': np.int64(1178),\n",
+              " 'unabl': np.int64(2340),\n",
+              " 'establish': np.int64(864),\n",
+              " 'american': np.int64(94),\n",
+              " 'state': np.int64(2132),\n",
+              " 'co': np.int64(395),\n",
+              " 'escapenumberth': np.int64(861),\n",
+              " 'fail': np.int64(927),\n",
+              " 'show': np.int64(2066),\n",
+              " 'qualifi': np.int64(1821),\n",
+              " 'individu': np.int64(1166),\n",
+              " 'given': np.int64(1046),\n",
+              " 'could': np.int64(497),\n",
+              " 'abil': np.int64(3),\n",
+              " 'answer': np.int64(109),\n",
+              " 'phone': np.int64(1684),\n",
+              " 'ga': np.int64(1028),\n",
+              " 'electr': np.int64(681),\n",
+              " 'emerg': np.int64(691),\n",
+              " 'summari': np.int64(2195),\n",
+              " 'also': np.int64(88),\n",
+              " 'direct': np.int64(604),\n",
+              " 'employ': np.int64(693),\n",
+              " 'job': np.int64(1241),\n",
+              " 'requir': np.int64(1907),\n",
+              " 'handl': np.int64(1074),\n",
+              " 'line': np.int64(1332),\n",
+              " 'signific': np.int64(2069),\n",
+              " 'public': np.int64(1806),\n",
+              " 'vote': np.int64(2397),\n",
+              " 'affili': np.int64(58),\n",
+              " 'assembl': np.int64(144),\n",
+              " 'unit': np.int64(2350),\n",
+              " 'arm': np.int64(133),\n",
+              " 'associ': np.int64(149),\n",
+              " 'goe': np.int64(1052),\n",
+              " 'plan': np.int64(1697),\n",
+              " 'member': np.int64(1435),\n",
+              " 'effect': np.int64(673),\n",
+              " 'juli': np.int64(1257),\n",
+              " 'howev': np.int64(1118),\n",
+              " 'appear': np.int64(118),\n",
+              " 'one': np.int64(1597),\n",
+              " 'may': np.int64(1412),\n",
+              " 'need': np.int64(1518),\n",
+              " 'clear': np.int64(388),\n",
+              " 'year': np.int64(2486),\n",
+              " 'hous': np.int64(1114),\n",
+              " 'schedul': np.int64(1980),\n",
+              " 'chang': np.int64(368),\n",
+              " 'would': np.int64(2468),\n",
+              " 'collect': np.int64(399),\n",
+              " 'bargain': np.int64(189),\n",
+              " 'within': np.int64(2452),\n",
+              " 'four': np.int64(1006),\n",
+              " 'condit': np.int64(436),\n",
+              " 'grant': np.int64(1058),\n",
+              " 'pass': np.int64(1648),\n",
+              " 'happen': np.int64(1075),\n",
+              " 'agreement': np.int64(71),\n",
+              " 'york': np.int64(2491),\n",
+              " 'citi': np.int64(383),\n",
+              " 'law': np.int64(1301),\n",
+              " 'case': np.int64(321),\n",
+              " 'first': np.int64(977),\n",
+              " 'feder': np.int64(946),\n",
+              " 'million': np.int64(1462),\n",
+              " 'damag': np.int64(530),\n",
+              " 'award': np.int64(178),\n",
+              " 'human': np.int64(1127),\n",
+              " 'post': np.int64(1727),\n",
+              " 'southern': np.int64(2105),\n",
+              " 'defens': np.int64(564),\n",
+              " 'take': np.int64(2213),\n",
+              " 'advantag': np.int64(36),\n",
+              " 'depart': np.int64(575),\n",
+              " 'process': np.int64(1771),\n",
+              " 'pressur': np.int64(1748),\n",
+              " 'late': np.int64(1295),\n",
+              " 'rais': np.int64(1830),\n",
+              " 'challeng': np.int64(366),\n",
+              " 'nomin': np.int64(1544),\n",
+              " 'candid': np.int64(309),\n",
+              " 'presid': np.int64(1745),\n",
+              " 'session': np.int64(2044),\n",
+              " 'intern': np.int64(1200),\n",
+              " 'jame': np.int64(1226),\n",
+              " 'tom': np.int64(2282),\n",
+              " 'top': np.int64(2286),\n",
+              " 'offic': np.int64(1587),\n",
+              " 'current': np.int64(519),\n",
+              " 'secretari': np.int64(1991),\n",
+              " 'run': np.int64(1960),\n",
+              " 'local': np.int64(1348),\n",
+              " 'occur': np.int64(1576),\n",
+              " 'receiv': np.int64(1848),\n",
+              " 'percent': np.int64(1665),\n",
+              " 'support': np.int64(2202),\n",
+              " 'name': np.int64(1509),\n",
+              " 'fall': np.int64(930),\n",
+              " 'among': np.int64(96),\n",
+              " 'voic': np.int64(2394),\n",
+              " 'variou': np.int64(2376),\n",
+              " 'amend': np.int64(92),\n",
+              " 'hundr': np.int64(1128),\n",
+              " 'campaign': np.int64(305),\n",
+              " 'confid': np.int64(440),\n",
+              " 'get': np.int64(1042),\n",
+              " 'necessari': np.int64(1517),\n",
+              " 'earlier': np.int64(650),\n",
+              " 'readi': np.int64(1841),\n",
+              " 'survey': np.int64(2205),\n",
+              " 'week': np.int64(2421),\n",
+              " 'launch': np.int64(1298),\n",
+              " 'safeti': np.int64(1964),\n",
+              " 'health': np.int64(1080),\n",
+              " 'administr': np.int64(29),\n",
+              " 'annual': np.int64(107),\n",
+              " 'gather': np.int64(1034),\n",
+              " 'data': np.int64(535),\n",
+              " 'better': np.int64(234),\n",
+              " 'target': np.int64(2217),\n",
+              " 'high': np.int64(1093),\n",
+              " 'agenc': np.int64(64),\n",
+              " 'includ': np.int64(1156),\n",
+              " 'construct': np.int64(457),\n",
+              " 'program': np.int64(1780),\n",
+              " 'improv': np.int64(1151),\n",
+              " 'identifi': np.int64(1136),\n",
+              " 'davi': np.int64(540),\n",
+              " 'say': np.int64(1976),\n",
+              " 'request': np.int64(1906),\n",
+              " 'calendar': np.int64(297),\n",
+              " 'base': np.int64(191),\n",
+              " 'januari': np.int64(1229),\n",
+              " 'februari': np.int64(945),\n",
+              " 'spokesman': np.int64(2116),\n",
+              " 'ask': np.int64(142),\n",
+              " 'figur': np.int64(965),\n",
+              " 'averag': np.int64(174),\n",
+              " 'hour': np.int64(1112),\n",
+              " 'site': np.int64(2078),\n",
+              " 'must': np.int64(1501),\n",
+              " 'return': np.int64(1928),\n",
+              " 'even': np.int64(877),\n",
+              " 'record': np.int64(1858),\n",
+              " 'today': np.int64(2278),\n",
+              " 'event': np.int64(879),\n",
+              " 'revis': np.int64(1934),\n",
+              " 'domest': np.int64(626),\n",
+              " 'product': np.int64(1773),\n",
+              " 'quarter': np.int64(1823),\n",
+              " 'releas': np.int64(1883),\n",
+              " 'commerc': np.int64(411),\n",
+              " 'news': np.int64(1530),\n",
+              " 'reflect': np.int64(1866),\n",
+              " 'market': np.int64(1400),\n",
+              " 'number': np.int64(1565),\n",
+              " 'involv': np.int64(1215),\n",
+              " 'higher': np.int64(1094),\n",
+              " 'five': np.int64(979),\n",
+              " 'month': np.int64(1488),\n",
+              " 'period': np.int64(1671),\n",
+              " 'total': np.int64(2288),\n",
+              " 'insur': np.int64(1185),\n",
+              " 'claim': np.int64(384),\n",
+              " 'file': np.int64(966),\n",
+              " 'fell': np.int64(951),\n",
+              " 'third': np.int64(2252),\n",
+              " 'season': np.int64(1988),\n",
+              " 'adjust': np.int64(28),\n",
+              " 'end': np.int64(699),\n",
+              " 'care': np.int64(318),\n",
+              " 'sever': np.int64(2049),\n",
+              " 'factor': np.int64(926),\n",
+              " 'combin': np.int64(408),\n",
+              " 'key': np.int64(1270),\n",
+              " 'accord': np.int64(9),\n",
+              " 'gener': np.int64(1038),\n",
+              " 'account': np.int64(10),\n",
+              " 'entertain': np.int64(725),\n",
+              " 'six': np.int64(2080),\n",
+              " 'world': np.int64(2465),\n",
+              " 'three': np.int64(2259),\n",
+              " 'contract': np.int64(470),\n",
+              " 'lead': np.int64(1304),\n",
+              " 'committe': np.int64(416),\n",
+              " 'expect': np.int64(900),\n",
+              " 'bill': np.int64(245),\n",
+              " 'allow': np.int64(84),\n",
+              " 'organ': np.int64(1609),\n",
+              " 'propos': np.int64(1792),\n",
+              " 'panel': np.int64(1633),\n",
+              " 'chairman': np.int64(365),\n",
+              " 'econom': np.int64(657),\n",
+              " 'outlook': np.int64(1618),\n",
+              " 'anderson': np.int64(101),\n",
+              " 'forecast': np.int64(992),\n",
+              " 'face': np.int64(922),\n",
+              " 'sinc': np.int64(2074),\n",
+              " 'earli': np.int64(649),\n",
+              " 'predict': np.int64(1735),\n",
+              " 'lower': np.int64(1368),\n",
+              " 'growth': np.int64(1067),\n",
+              " 'least': np.int64(1309),\n",
+              " 'senat': np.int64(2015),\n",
+              " 'democrat': np.int64(573),\n",
+              " 'legisl': np.int64(1315),\n",
+              " 'prospect': np.int64(1794),\n",
+              " 'close': np.int64(392),\n",
+              " 'busi': np.int64(287),\n",
+              " 'night': np.int64(1542),\n",
+              " 'help': np.int64(1087),\n",
+              " 'ad': np.int64(23),\n",
+              " 'confer': np.int64(438),\n",
+              " 'demand': np.int64(572),\n",
+              " 'declin': np.int64(562),\n",
+              " 'advertis': np.int64(37),\n",
+              " 'index': np.int64(1161),\n",
+              " 'point': np.int64(1718),\n",
+              " 'previou': np.int64(1751),\n",
+              " 'offici': np.int64(1590),\n",
+              " 'go': np.int64(1048),\n",
+              " 'forward': np.int64(1002),\n",
+              " 'way': np.int64(2411),\n",
+              " 'announc': np.int64(106),\n",
+              " 'studi': np.int64(2177),\n",
+              " 'feel': np.int64(949),\n",
+              " 'affect': np.int64(57),\n",
+              " 'perform': np.int64(1667),\n",
+              " 'manag': np.int64(1392),\n",
+              " 'promot': np.int64(1789),\n",
+              " 'transfer': np.int64(2302),\n",
+              " 'anoth': np.int64(108),\n",
+              " 'facil': np.int64(923),\n",
+              " 'situat': np.int64(2079),\n",
+              " 'treat': np.int64(2309),\n",
+              " 'favor': np.int64(936),\n",
+              " 'steel': np.int64(2149),\n",
+              " 'reach': np.int64(1837),\n",
+              " 'cover': np.int64(507),\n",
+              " 'letter': np.int64(1323),\n",
+              " 'bush': np.int64(286),\n",
+              " 'leadership': np.int64(1306),\n",
+              " 'reduct': np.int64(1863),\n",
+              " 'john': np.int64(1243),\n",
+              " 'cut': np.int64(523),\n",
+              " 'special': np.int64(2109),\n",
+              " 'last': np.int64(1292),\n",
+              " 'train': np.int64(2297),\n",
+              " 'publish': np.int64(1808),\n",
+              " 'affair': np.int64(56),\n",
+              " 'st': np.int64(2124),\n",
+              " 'washington': np.int64(2406),\n",
+              " 'dc': np.int64(545),\n",
+              " 'contact': np.int64(460),\n",
+              " 'copi': np.int64(481),\n",
+              " 'mail': np.int64(1380),\n",
+              " 'copyright': np.int64(483),\n",
+              " 'www': np.int64(2475),\n",
+              " 'corp': np.int64(487),\n",
+              " 'term': np.int64(2238),\n",
+              " 'licens': np.int64(1326),\n",
+              " 'prohibit': np.int64(1782),\n",
+              " 'origin messag': np.int64(1611),\n",
+              " 'sent thursday': np.int64(2027),\n",
+              " 'june escapenumb': np.int64(1261),\n",
+              " 'escapenumb escapenumb': np.int64(772),\n",
+              " 'escapenumb pm': np.int64(808),\n",
+              " 'bna com': np.int64(258),\n",
+              " 'aa escapenumb': np.int64(1),\n",
+              " 'escapenumb http': np.int64(787),\n",
+              " 'http pub': np.int64(1124),\n",
+              " 'pub bna': np.int64(1804),\n",
+              " 'com ip': np.int64(405),\n",
+              " 'ip bna': np.int64(1220),\n",
+              " 'bna dlr': np.int64(259),\n",
+              " 'dlr nsf': np.int64(622),\n",
+              " 'nsf id': np.int64(1564),\n",
+              " 'id escapelong': np.int64(1133),\n",
+              " 'ga electr': np.int64(1029),\n",
+              " 'page escapenumb': np.int64(1629),\n",
+              " 'juli escapenumb': np.int64(1258),\n",
+              " 'new york': np.int64(1529),\n",
+              " 'escapenumb million': np.int64(794),\n",
+              " 'escapenumb percent': np.int64(805),\n",
+              " 'year escapenumb': np.int64(2488),\n",
+              " 'total escapenumb': np.int64(2289),\n",
+              " 'week escapenumb': np.int64(2422),\n",
+              " 'escapenumb week': np.int64(824),\n",
+              " 'unit state': np.int64(2351),\n",
+              " 'escapenumb new': np.int64(800),\n",
+              " 'last year': np.int64(1294),\n",
+              " 'inc escapenumb': np.int64(1153),\n",
+              " 'dc escapenumb': np.int64(546),\n",
+              " 'escapenumb mail': np.int64(790),\n",
+              " 'go http': np.int64(1049),\n",
+              " 'http www': np.int64(1125),\n",
+              " 'copyright escapenumb': np.int64(484),\n",
+              " 'hi': np.int64(1092),\n",
+              " 'chri': np.int64(379),\n",
+              " 'roll': np.int64(1951),\n",
+              " 'jon': np.int64(1249),\n",
+              " 'select': np.int64(2010),\n",
+              " 'main': np.int64(1386),\n",
+              " 'download': np.int64(634),\n",
+              " 'screen': np.int64(1985),\n",
+              " 'bottom': np.int64(266),\n",
+              " 'export': np.int64(911),\n",
+              " 'click': np.int64(390),\n",
+              " 'give': np.int64(1045),\n",
+              " 'choos': np.int64(378),\n",
+              " 'save': np.int64(1974),\n",
+              " 'let': np.int64(1321),\n",
+              " 'know': np.int64(1281),\n",
+              " 'anyon': np.int64(113),\n",
+              " 'els': np.int64(684),\n",
+              " 'thank': np.int64(2247),\n",
+              " 'jay': np.int64(1233),\n",
+              " 'let know': np.int64(1322),\n",
+              " 'thoma': np.int64(2255),\n",
+              " 'paul': np.int64(1656),\n",
+              " 'friday': np.int64(1012),\n",
+              " 'mark': np.int64(1399),\n",
+              " 'east': np.int64(654),\n",
+              " 'follow': np.int64(988),\n",
+              " 'pjm': np.int64(1695),\n",
+              " 'price': np.int64(1753),\n",
+              " 'enron': np.int64(716),\n",
+              " 'asp': np.int64(143),\n",
+              " 'iso': np.int64(1223),\n",
+              " 'cooper': np.int64(479),\n",
+              " 'mid': np.int64(1453),\n",
+              " 'add': np.int64(25),\n",
+              " 'question': np.int64(1824),\n",
+              " 'sent friday': np.int64(2025),\n",
+              " 'web site': np.int64(2414),\n",
+              " 'corp enron': np.int64(489),\n",
+              " 'enron com': np.int64(717),\n",
+              " 'know question': np.int64(1282),\n",
+              " 'gerald': np.int64(1041),\n",
+              " 'spot': np.int64(2121),\n",
+              " 'el': np.int64(678),\n",
+              " 'paso': np.int64(1647),\n",
+              " 'ena': np.int64(695),\n",
+              " 'prefer': np.int64(1736),\n",
+              " 'review': np.int64(1932),\n",
+              " 'master': np.int64(1402),\n",
+              " 'send': np.int64(2016),\n",
+              " 'judi': np.int64(1256),\n",
+              " 'jan': np.int64(1227),\n",
+              " 'tuesday': np.int64(2319),\n",
+              " 'cc': np.int64(330),\n",
+              " 'harri': np.int64(1078),\n",
+              " 'quick': np.int64(1826),\n",
+              " 'respons': np.int64(1918),\n",
+              " 'attach': np.int64(156),\n",
+              " 'pleas': np.int64(1703),\n",
+              " 'pro': np.int64(1766),\n",
+              " 'fill': np.int64(967),\n",
+              " 'analyst': np.int64(100),\n",
+              " 'vacat': np.int64(2372),\n",
+              " 'turn': np.int64(2324),\n",
+              " 'back': np.int64(182),\n",
+              " 'comment': np.int64(410),\n",
+              " 'recommend': np.int64(1857),\n",
+              " 'merchant': np.int64(1441),\n",
+              " 'energi': np.int64(701),\n",
+              " 'mailto': np.int64(1385),\n",
+              " 'anyth': np.int64(115),\n",
+              " 'llc': np.int64(1342),\n",
+              " 'email': np.int64(686),\n",
+              " 'corpor': np.int64(492),\n",
+              " 'confidenti': np.int64(441),\n",
+              " 'intend': np.int64(1188),\n",
+              " 'sole': np.int64(2090),\n",
+              " 'entiti': np.int64(727),\n",
+              " 'address': np.int64(27),\n",
+              " 'error': np.int64(739),\n",
+              " 'notifi': np.int64(1558),\n",
+              " 'sender': np.int64(2019),\n",
+              " 'doc': np.int64(623),\n",
+              " 'el paso': np.int64(679),\n",
+              " 'sent tuesday': np.int64(2028),\n",
+              " 'pleas find': np.int64(1708),\n",
+              " 'call question': np.int64(302),\n",
+              " 'escapenumb origin': np.int64(803),\n",
+              " 'com sent': np.int64(406),\n",
+              " 'com subject': np.int64(407),\n",
+              " 'pleas let': np.int64(1709),\n",
+              " 'escapenumb email': np.int64(768),\n",
+              " 'sole use': np.int64(2091),\n",
+              " 'error pleas': np.int64(740),\n",
+              " 'physic': np.int64(1689),\n",
+              " 'prepar': np.int64(1741),\n",
+              " 'draft': np.int64(637),\n",
+              " 'conveni': np.int64(476),\n",
+              " 'person': np.int64(1673),\n",
+              " 'email address': np.int64(687),\n",
+              " 'fyi': np.int64(1027),\n",
+              " 'commun': np.int64(419),\n",
+              " 'america': np.int64(93),\n",
+              " 'regard': np.int64(1871),\n",
+              " 'someon': np.int64(2095),\n",
+              " 'stephani': np.int64(2155),\n",
+              " 'sorri': np.int64(2100),\n",
+              " 'tri': np.int64(2312),\n",
+              " 'per': np.int64(1662),\n",
+              " 'trader': np.int64(2296),\n",
+              " 'transact': np.int64(2301),\n",
+              " 'energi market': np.int64(702),\n",
+              " 'pg': np.int64(1679),\n",
+              " 'later': np.int64(1296),\n",
+              " 'transmiss': np.int64(2304),\n",
+              " 'northwest': np.int64(1552),\n",
+              " 'capac': np.int64(313),\n",
+              " 'cross': np.int64(514),\n",
+              " 'pipelin': np.int64(1693),\n",
+              " 'expand': np.int64(898),\n",
+              " 'western': np.int64(2430),\n",
+              " 'half': np.int64(1071),\n",
+              " 'along': np.int64(86),\n",
+              " 'open': np.int64(1601),\n",
+              " 'project': np.int64(1784),\n",
+              " 'expans': np.int64(899),\n",
+              " 'region': np.int64(1873),\n",
+              " 'level': np.int64(1324),\n",
+              " 'interest': np.int64(1197),\n",
+              " 'increas': np.int64(1159),\n",
+              " 'peter': np.int64(1678),\n",
+              " 'vice': np.int64(2387),\n",
+              " 'group': np.int64(1065),\n",
+              " 'capabl': np.int64(312),\n",
+              " 'express': np.int64(913),\n",
+              " 'begin': np.int64(209),\n",
+              " 'south': np.int64(2104),\n",
+              " 'west': np.int64(2429),\n",
+              " 'area': np.int64(131),\n",
+              " 'central': np.int64(341),\n",
+              " 'serv': np.int64(2039),\n",
+              " 'rout': np.int64(1957),\n",
+              " 'deliveri': np.int64(571),\n",
+              " 'look': np.int64(1355),\n",
+              " 'park': np.int64(1636),\n",
+              " 'basic': np.int64(196),\n",
+              " 'pacif': np.int64(1625),\n",
+              " 'larg': np.int64(1289),\n",
+              " 'william': np.int64(2440),\n",
+              " 'crisi': np.int64(512),\n",
+              " 'primari': np.int64(1755),\n",
+              " 'reli': np.int64(1886),\n",
+              " 'wood': np.int64(2460),\n",
+              " 'natur': np.int64(1512),\n",
+              " 'escapenumb third': np.int64(819),\n",
+              " 'vice presid': np.int64(2388),\n",
+              " 'natur ga': np.int64(1513),\n",
+              " 'desk': np.int64(591),\n",
+              " 'purchas': np.int64(1812),\n",
+              " 'space': np.int64(2107),\n",
+              " 'enrononlin': np.int64(721),\n",
+              " 'websit': np.int64(2415),\n",
+              " 'buy': np.int64(289),\n",
+              " 'offer': np.int64(1584),\n",
+              " 'us': np.int64(2360),\n",
+              " 'section': np.int64(1992),\n",
+              " 'refus': np.int64(1870),\n",
+              " 'keep': np.int64(1264),\n",
+              " 'extend': np.int64(916),\n",
+              " 'escapenumb month': np.int64(798),\n",
+              " 'escapenumb total': np.int64(822),\n",
+              " 'offer escapenumb': np.int64(1586),\n",
+              " 'would like': np.int64(2469),\n",
+              " 'section escapenumb': np.int64(1993),\n",
+              " 'taylor': np.int64(2222),\n",
+              " 'internet': np.int64(1201),\n",
+              " 'oil': np.int64(1593),\n",
+              " 'coupl': np.int64(504),\n",
+              " 'invoic': np.int64(1214),\n",
+              " 'refer': np.int64(1864),\n",
+              " 'steve': np.int64(2157),\n",
+              " 'tell': np.int64(2236),\n",
+              " 'cell': np.int64(338),\n",
+              " 'check': np.int64(372),\n",
+              " 'miss': np.int64(1471),\n",
+              " 'great': np.int64(1059),\n",
+              " 'escapenumb cell': np.int64(756),\n",
+              " 'note': np.int64(1553),\n",
+              " 'cash': np.int64(322),\n",
+              " 'balanc': np.int64(184),\n",
+              " 'smith': np.int64(2084),\n",
+              " 'asset': np.int64(146),\n",
+              " 'fix': np.int64(980),\n",
+              " 'incom': np.int64(1158),\n",
+              " 'portfolio': np.int64(1722),\n",
+              " 'fund': np.int64(1023),\n",
+              " 'discuss': np.int64(613),\n",
+              " 'pleas note': np.int64(1710),\n",
+              " 'eric': np.int64(738),\n",
+              " 'structur': np.int64(2175),\n",
+              " 'north': np.int64(1549),\n",
+              " 'enron north': np.int64(720),\n",
+              " 'north america': np.int64(1550),\n",
+              " 'com escapenumb': np.int64(404),\n",
+              " 'rod': np.int64(1948),\n",
+              " 'mani': np.int64(1393),\n",
+              " 'host': np.int64(1107),\n",
+              " 'rather': np.int64(1834),\n",
+              " 'guy': np.int64(1069),\n",
+              " 'feedback': np.int64(948),\n",
+              " 'step': np.int64(2154),\n",
+              " 'land': np.int64(1287),\n",
+              " 'partner': np.int64(1645),\n",
+              " 'terri': np.int64(2241),\n",
+              " 'donna': np.int64(628),\n",
+              " 'edison': np.int64(664),\n",
+              " 'meet': np.int64(1433),\n",
+              " 'minut': np.int64(1470),\n",
+              " 'execut': np.int64(896),\n",
+              " 'ceo': np.int64(342),\n",
+              " 'suit': np.int64(2192),\n",
+              " 'houston': np.int64(1115),\n",
+              " 'tx': np.int64(2328),\n",
+              " 'fax': np.int64(937),\n",
+              " 'xl': np.int64(2479),\n",
+              " 'suit escapenumb': np.int64(2193),\n",
+              " 'escapenumb houston': np.int64(786),\n",
+              " 'houston tx': np.int64(1117),\n",
+              " 'tx escapenumb': np.int64(2329),\n",
+              " 'escapenumb phone': np.int64(806),\n",
+              " 'phone escapenumb': np.int64(1685),\n",
+              " 'escapenumb fax': np.int64(782),\n",
+              " 'path': np.int64(1655),\n",
+              " 'monday': np.int64(1482),\n",
+              " 'deal': np.int64(550),\n",
+              " 'texa': np.int64(2244),\n",
+              " 'side': np.int64(2067),\n",
+              " 'much': np.int64(1497),\n",
+              " 'everyth': np.int64(885),\n",
+              " 'issu': np.int64(1224),\n",
+              " 'deliv': np.int64(570),\n",
+              " 'possibl': np.int64(1726),\n",
+              " 'idea': np.int64(1135),\n",
+              " 'look like': np.int64(1357),\n",
+              " 'sent monday': np.int64(2026),\n",
+              " 'deal escapenumb': np.int64(551),\n",
+              " 'januari escapenumb': np.int64(1230),\n",
+              " 'escapenumb deal': np.int64(764),\n",
+              " 'escapenumb receiv': np.int64(812),\n",
+              " 'posit': np.int64(1724),\n",
+              " 'network': np.int64(1526),\n",
+              " 'info': np.int64(1168),\n",
+              " 'locat': np.int64(1349),\n",
+              " 'xescapenumb': np.int64(2478),\n",
+              " 'mba': np.int64(1422),\n",
+              " 'home': np.int64(1105),\n",
+              " 'johnson': np.int64(1245),\n",
+              " 'wednesday': np.int64(2417),\n",
+              " 'allen': np.int64(82),\n",
+              " 'dave': np.int64(539),\n",
+              " 'fw': np.int64(1026),\n",
+              " 'surpris': np.int64(2204),\n",
+              " 'extens': np.int64(917),\n",
+              " 'sent wednesday': np.int64(2029),\n",
+              " 'subject fw': np.int64(2181),\n",
+              " 'pleas send': np.int64(1714),\n",
+              " 'aol': np.int64(116),\n",
+              " 'date': np.int64(537),\n",
+              " 'mime': np.int64(1464),\n",
+              " 'version': np.int64(2381),\n",
+              " 'type': np.int64(2330),\n",
+              " 'multipart': np.int64(1498),\n",
+              " 'mix': np.int64(1472),\n",
+              " 'boundari': np.int64(267),\n",
+              " 'mailer': np.int64(1384),\n",
+              " 'window': np.int64(2443),\n",
+              " 'escapenumberd': np.int64(840),\n",
+              " 'mx': np.int64(1506),\n",
+              " 'air': np.int64(74),\n",
+              " 'vescapenumb': np.int64(2383),\n",
+              " 'rescapenumb': np.int64(1908),\n",
+              " 'net': np.int64(1524),\n",
+              " 'unknown': np.int64(2353),\n",
+              " 'default': np.int64(563),\n",
+              " 'georg': np.int64(1039),\n",
+              " 'hotmail': np.int64(1110),\n",
+              " 'edu': np.int64(666),\n",
+              " 'msn': np.int64(1496),\n",
+              " 'cs': np.int64(517),\n",
+              " 'went': np.int64(2428),\n",
+              " 'long': np.int64(1352),\n",
+              " 'eye': np.int64(920),\n",
+              " 'bad': np.int64(183),\n",
+              " 'best': np.int64(232),\n",
+              " 'put': np.int64(1816),\n",
+              " 'solid': np.int64(2093),\n",
+              " 'wait': np.int64(2400),\n",
+              " 'room': np.int64(1953),\n",
+              " 'well': np.int64(2426),\n",
+              " 'thing': np.int64(2250),\n",
+              " 'good': np.int64(1053),\n",
+              " 'littl': np.int64(1340),\n",
+              " 'less': np.int64(1319),\n",
+              " 'approach': np.int64(123),\n",
+              " 'old': np.int64(1596),\n",
+              " 'friend': np.int64(1016),\n",
+              " 'told': np.int64(2281),\n",
+              " 'aid': np.int64(73),\n",
+              " 'gave': np.int64(1035),\n",
+              " 'left': np.int64(1313),\n",
+              " 'thought': np.int64(2257),\n",
+              " 'die': np.int64(597),\n",
+              " 'true': np.int64(2316),\n",
+              " 'god': np.int64(1051),\n",
+              " 'never': np.int64(1527),\n",
+              " 'argu': np.int64(132),\n",
+              " 'robert': np.int64(1946),\n",
+              " 'richard': np.int64(1935),\n",
+              " 'road': np.int64(1944),\n",
+              " 'win': np.int64(2441),\n",
+              " 'draw': np.int64(638),\n",
+              " 'simpli': np.int64(2073),\n",
+              " 'entir': np.int64(726),\n",
+              " 'past': np.int64(1651),\n",
+              " 'altern': np.int64(89),\n",
+              " 'directli': np.int64(606),\n",
+              " 'aol com': np.int64(117),\n",
+              " 'com escapelong': np.int64(403),\n",
+              " 'mime version': np.int64(1465),\n",
+              " 'version escapenumb': np.int64(2382),\n",
+              " 'escapenumb content': np.int64(760),\n",
+              " 'content type': np.int64(467),\n",
+              " 'type multipart': np.int64(2333),\n",
+              " 'escapenumb escapelong': np.int64(771),\n",
+              " 'escapelong escapelong': np.int64(744),\n",
+              " 'escapenumb window': np.int64(825),\n",
+              " 'escapelong escapenumb': np.int64(745),\n",
+              " 'id escapenumb': np.int64(1134),\n",
+              " 'escapenumb date': np.int64(762),\n",
+              " 'hotmail com': np.int64(1111),\n",
+              " 'type text': np.int64(2334),\n",
+              " 'fax escapenumb': np.int64(938),\n",
+              " 'pleas click': np.int64(1706),\n",
+              " 'investig': np.int64(1210),\n",
+              " 'submit': np.int64(2182),\n",
+              " 'pleas review': np.int64(1712),\n",
+              " 'pat': np.int64(1653),\n",
+              " 'suggest': np.int64(2191),\n",
+              " 'matter': np.int64(1411),\n",
+              " 'david': np.int64(541),\n",
+              " 'mr': np.int64(1494),\n",
+              " 'see': np.int64(2004),\n",
+              " 'statu': np.int64(2147),\n",
+              " 'concern': np.int64(435),\n",
+              " 'agent': np.int64(66),\n",
+              " 'hello': np.int64(1086),\n",
+              " 'everyon': np.int64(884),\n",
+              " 'ann': np.int64(105),\n",
+              " 'escapenumb thank': np.int64(818),\n",
+              " 'street': np.int64(2171),\n",
+              " 'roger': np.int64(1949),\n",
+              " 'enjoy': np.int64(713),\n",
+              " 'talk': np.int64(2215),\n",
+              " 'document': np.int64(624),\n",
+              " 'green': np.int64(1061),\n",
+              " 'light': np.int64(1328),\n",
+              " 'format': np.int64(997),\n",
+              " 'doug': np.int64(631),\n",
+              " 'jone': np.int64(1250),\n",
+              " 'na': np.int64(1508),\n",
+              " 'escapenumb smith': np.int64(816),\n",
+              " 'smith street': np.int64(2085),\n",
+              " 'houston texa': np.int64(1116),\n",
+              " 'texa escapenumb': np.int64(2245),\n",
+              " 'pleas see': np.int64(1713),\n",
+              " 'see attach': np.int64(2005),\n",
+              " 'sunday': np.int64(2198),\n",
+              " 'lee': np.int64(1312),\n",
+              " 'build': np.int64(284),\n",
+              " 'materi': np.int64(1405),\n",
+              " 'reason': np.int64(1846),\n",
+              " 'decid': np.int64(559),\n",
+              " 'recogn': np.int64(1856),\n",
+              " 'major': np.int64(1388),\n",
+              " 'depend': np.int64(576),\n",
+              " 'analysi': np.int64(99),\n",
+              " 'parti': np.int64(1639),\n",
+              " 'initi': np.int64(1177),\n",
+              " 'negoti': np.int64(1520),\n",
+              " 'procedur': np.int64(1769),\n",
+              " 'amount': np.int64(97),\n",
+              " 'progress': np.int64(1781),\n",
+              " 'made': np.int64(1374),\n",
+              " 'near': np.int64(1515),\n",
+              " 'liabil': np.int64(1325),\n",
+              " 'evid': np.int64(886),\n",
+              " 'prove': np.int64(1797),\n",
+              " 'respond': np.int64(1917),\n",
+              " 'tel': np.int64(2231),\n",
+              " 'join': np.int64(1246),\n",
+              " 'relev': np.int64(1884),\n",
+              " 'brought': np.int64(281),\n",
+              " 'titl': np.int64(2276),\n",
+              " 'differ': np.int64(599),\n",
+              " 'arbitr': np.int64(130),\n",
+              " 'part': np.int64(1637),\n",
+              " 'critic': np.int64(513),\n",
+              " 'congress': np.int64(447),\n",
+              " 'next': np.int64(1536),\n",
+              " 'secur': np.int64(1997),\n",
+              " 'adopt': np.int64(34),\n",
+              " 'independ': np.int64(1160),\n",
+              " 'medic': np.int64(1432),\n",
+              " 'rush': np.int64(1961),\n",
+              " 'regul': np.int64(1878),\n",
+              " 'reserv': np.int64(1910),\n",
+              " 'recov': np.int64(1859),\n",
+              " 'cost': np.int64(495),\n",
+              " 'paid': np.int64(1630),\n",
+              " 'deni': np.int64(574),\n",
+              " 'disput': np.int64(616),\n",
+              " 'brief': np.int64(275),\n",
+              " 'accept': np.int64(6),\n",
+              " 'ground': np.int64(1064),\n",
+              " 'economi': np.int64(658),\n",
+              " 'still': np.int64(2159),\n",
+              " 'yet': np.int64(2490),\n",
+              " 'remain': np.int64(1892),\n",
+              " 'particip': np.int64(1642),\n",
+              " 'believ': np.int64(214),\n",
+              " 'rate': np.int64(1833),\n",
+              " 'prior': np.int64(1758),\n",
+              " 'fulli': np.int64(1020),\n",
+              " 'countri': np.int64(503),\n",
+              " 'chicago': np.int64(373),\n",
+              " 'util': np.int64(2370),\n",
+              " 'day': np.int64(542),\n",
+              " 'resum': np.int64(1926),\n",
+              " 'money': np.int64(1486),\n",
+              " 'attempt': np.int64(160),\n",
+              " 'outsid': np.int64(1619),\n",
+              " 'contractor': np.int64(473),\n",
+              " 'normal': np.int64(1548),\n",
+              " 'plant': np.int64(1699),\n",
+              " 'oper': np.int64(1602),\n",
+              " 'maintain': np.int64(1387),\n",
+              " 'limit': np.int64(1330),\n",
+              " 'faith': np.int64(929),\n",
+              " 'longer': np.int64(1354),\n",
+              " 'environ': np.int64(729),\n",
+              " 'competit': np.int64(430),\n",
+              " 'stand': np.int64(2129),\n",
+              " 'enterpris': np.int64(724),\n",
+              " 'substanti': np.int64(2187),\n",
+              " 'found': np.int64(1005),\n",
+              " 'commit': np.int64(415),\n",
+              " 'continu': np.int64(469),\n",
+              " 'violat': np.int64(2391),\n",
+              " 'especi': np.int64(863),\n",
+              " 'specif': np.int64(2110),\n",
+              " 'stop': np.int64(2163),\n",
+              " 'relief': np.int64(1891),\n",
+              " 'compens': np.int64(429),\n",
+              " 'privat': np.int64(1761),\n",
+              " 'industri': np.int64(1167),\n",
+              " 'march': np.int64(1396),\n",
+              " 'delay': np.int64(567),\n",
+              " 'provis': np.int64(1802),\n",
+              " 'hear': np.int64(1081),\n",
+              " 'loss': np.int64(1360),\n",
+              " 'fear': np.int64(942),\n",
+              " 'uncertainti': np.int64(2341),\n",
+              " 'log': np.int64(1350),\n",
+              " 'alway': np.int64(91),\n",
+              " 'agre': np.int64(70),\n",
+              " 'commiss': np.int64(413),\n",
+              " 'cancel': np.int64(308),\n",
+              " 'staff': np.int64(2126),\n",
+              " 'interpret': np.int64(1202),\n",
+              " 'pertain': np.int64(1676),\n",
+              " 'assess': np.int64(145),\n",
+              " 'approv': np.int64(125),\n",
+              " 'set': np.int64(2045),\n",
+              " 'hotel': np.int64(1109),\n",
+              " 'defin': np.int64(565),\n",
+              " 'contribut': np.int64(474),\n",
+              " 'benefit': np.int64(217),\n",
+              " 'sponsor': np.int64(2117),\n",
+              " 'model': np.int64(1479),\n",
+              " 'tax': np.int64(2221),\n",
+              " 'relianc': np.int64(1889),\n",
+              " 'counti': np.int64(502),\n",
+              " 'govern': np.int64(1056),\n",
+              " 'minimum': np.int64(1468),\n",
+              " 'credit': np.int64(511),\n",
+              " 'without': np.int64(2457),\n",
+              " 'provid': np.int64(1798),\n",
+              " 'caus': np.int64(326),\n",
+              " 'polit': np.int64(1720),\n",
+              " 'financ': np.int64(971),\n",
+              " 'reform': np.int64(1867),\n",
+              " 'floor': np.int64(983),\n",
+              " 'start': np.int64(2131),\n",
+              " 'charg': np.int64(370),\n",
+              " 'make': np.int64(1389),\n",
+              " 'statement': np.int64(2133),\n",
+              " 'connect': np.int64(448),\n",
+              " 'novemb': np.int64(1561),\n",
+              " 'implement': np.int64(1148),\n",
+              " 'forc': np.int64(991),\n",
+              " 'sue': np.int64(2189),\n",
+              " 'despit': np.int64(592),\n",
+              " 'correct': np.int64(493),\n",
+              " 'stori': np.int64(2167),\n",
+              " 'describ': np.int64(587),\n",
+              " 'paragraph': np.int64(1635),\n",
+              " 'highli': np.int64(1095),\n",
+              " 'rose': np.int64(1954),\n",
+              " 'preliminari': np.int64(1738),\n",
+              " 'estim': np.int64(865),\n",
+              " 'educ': np.int64(667),\n",
+              " 'jim': np.int64(1239),\n",
+              " 'bd': np.int64(204),\n",
+              " 'brown': np.int64(282),\n",
+              " 'wi': np.int64(2436),\n",
+              " 'moor': np.int64(1490),\n",
+              " 'gov': np.int64(1055),\n",
+              " 'microsoft': np.int64(1452),\n",
+              " 'corp escapenumb': np.int64(490),\n",
+              " 'secur act': np.int64(1998),\n",
+              " 'third quarter': np.int64(2254),\n",
+              " 'power plant': np.int64(1731),\n",
+              " 'escapenumb hour': np.int64(785),\n",
+              " 'march escapenumb': np.int64(1397),\n",
+              " 'novemb escapenumb': np.int64(1562),\n",
+              " 'escapenumb per': np.int64(804),\n",
+              " 'cera': np.int64(343),\n",
+              " 'monthli': np.int64(1489),\n",
+              " 'rest': np.int64(1919),\n",
+              " 'summer': np.int64(2196),\n",
+              " 'winter': np.int64(2446),\n",
+              " 'print': np.int64(1757),\n",
+              " 'regulatori': np.int64(1879),\n",
+              " 'reduc': np.int64(1862),\n",
+              " 'reliabl': np.int64(1888),\n",
+              " 'peak': np.int64(1660),\n",
+              " 'ferc': np.int64(952),\n",
+              " 'strong': np.int64(2174),\n",
+              " 'consider': np.int64(451),\n",
+              " 'basi': np.int64(194),\n",
+              " 'eas': np.int64(652),\n",
+              " 'load': np.int64(1346),\n",
+              " 'bring': np.int64(276),\n",
+              " 'complet': np.int64(431),\n",
+              " 'categori': np.int64(324),\n",
+              " 'knowledg': np.int64(1283),\n",
+              " 'profil': np.int64(1778),\n",
+              " 'client': np.int64(391),\n",
+              " 'password': np.int64(1650),\n",
+              " 'electron': np.int64(682),\n",
+              " 'contain': np.int64(462),\n",
+              " 'research': np.int64(1909),\n",
+              " 'privileg': np.int64(1763),\n",
+              " 'disclosur': np.int64(609),\n",
+              " 'whole': np.int64(2434),\n",
+              " 'strictli': np.int64(2172),\n",
+              " 'last week': np.int64(1293),\n",
+              " 'ga price': np.int64(1030),\n",
+              " 'strictli prohibit': np.int64(2173),\n",
+              " 'brad': np.int64(269),\n",
+              " 'escapenumbera': np.int64(831),\n",
+              " 'descript': np.int64(588),\n",
+              " 'elizabeth': np.int64(683),\n",
+              " 'alreadi': np.int64(87),\n",
+              " 'standard': np.int64(2130),\n",
+              " 'wish': np.int64(2449),\n",
+              " 'own': np.int64(1623),\n",
+              " 'properti': np.int64(1790),\n",
+              " 'perhap': np.int64(1670),\n",
+              " 'hereto': np.int64(1089),\n",
+              " 'assumpt': np.int64(151),\n",
+              " 'watson': np.int64(2410),\n",
+              " 'design': np.int64(589),\n",
+              " 'escapenumber': np.int64(830),\n",
+              " 'speak': np.int64(2108),\n",
+              " 'pursu': np.int64(1814),\n",
+              " 'acquir': np.int64(13),\n",
+              " 'subsidiari': np.int64(2186),\n",
+              " 'except': np.int64(890),\n",
+              " ...}"
+            ]
+          },
+          "metadata": {},
+          "execution_count": 10
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "y=df['label']"
+      ],
+      "metadata": {
+        "id": "lPyOikpYamj9"
+      },
+      "execution_count": 11,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "y"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 458
+        },
+        "id": "e1bQoUy7awqT",
+        "outputId": "a4e8f3c4-5e3d-49f5-dbda-292751cb4043"
+      },
+      "execution_count": 12,
+      "outputs": [
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/plain": [
+              "0        0\n",
+              "1        0\n",
+              "2        0\n",
+              "3        0\n",
+              "4        0\n",
+              "        ..\n",
+              "17410    0\n",
+              "17411    0\n",
+              "17412    0\n",
+              "17413    0\n",
+              "17414    0\n",
+              "Name: label, Length: 17415, dtype: int64"
+            ],
+            "text/html": [
+              "<div>\n",
+              "<style scoped>\n",
+              "    .dataframe tbody tr th:only-of-type {\n",
+              "        vertical-align: middle;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe tbody tr th {\n",
+              "        vertical-align: top;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe thead th {\n",
+              "        text-align: right;\n",
+              "    }\n",
+              "</style>\n",
+              "<table border=\"1\" class=\"dataframe\">\n",
+              "  <thead>\n",
+              "    <tr style=\"text-align: right;\">\n",
+              "      <th></th>\n",
+              "      <th>label</th>\n",
+              "    </tr>\n",
+              "  </thead>\n",
+              "  <tbody>\n",
+              "    <tr>\n",
+              "      <th>0</th>\n",
+              "      <td>0</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>1</th>\n",
+              "      <td>0</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>2</th>\n",
+              "      <td>0</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>3</th>\n",
+              "      <td>0</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>4</th>\n",
+              "      <td>0</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>...</th>\n",
+              "      <td>...</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>17410</th>\n",
+              "      <td>0</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>17411</th>\n",
+              "      <td>0</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>17412</th>\n",
+              "      <td>0</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>17413</th>\n",
+              "      <td>0</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>17414</th>\n",
+              "      <td>0</td>\n",
+              "    </tr>\n",
+              "  </tbody>\n",
+              "</table>\n",
+              "<p>17415 rows × 1 columns</p>\n",
+              "</div><br><label><b>dtype:</b> int64</label>"
+            ]
+          },
+          "metadata": {},
+          "execution_count": 12
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "np.set_printoptions(edgeitems=30,linewidth =100000,\n",
+        "                    formatter = dict(float=lambda x: \"%.3g\" % x))\n",
+        "X"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "XQ94xcP9c7c4",
+        "outputId": "99ca082f-30b7-4c15-de6a-de8a18200e39"
+      },
+      "execution_count": 17,
+      "outputs": [
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/plain": [
+              "array([[ 3,  3,  0,  3,  0,  0,  0,  0,  2,  1,  2,  0,  0,  0,  0,  1,  4,  0,  0,  0,  1,  1,  0,  1,  0,  0,  0,  0,  2,  5, ...,  0,  0,  0,  0,  0,  1,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  9,  0,  1,  0,  0,  7,  0,  0,  0,  0,  0,  0,  0,  0],\n",
+              "       [ 0,  0,  0,  0,  0,  0,  0,  0,  2,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0, ...,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0],\n",
+              "       [ 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  1,  0,  1,  0,  0,  0,  0, ...,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  2,  0,  0,  0,  0,  0,  0,  0,  0],\n",
+              "       [ 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  1,  0,  0, ...,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0],\n",
+              "       [ 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  1,  0,  0, ...,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0],\n",
+              "       [ 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0, ...,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0],\n",
+              "       [ 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  2,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0, ...,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  1,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0],\n",
+              "       [ 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  5,  0,  0,  0,  0,  0,  0, ...,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0],\n",
+              "       [ 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  5,  0,  0,  0,  0,  0,  0, ...,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0],\n",
+              "       [ 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0, ...,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0],\n",
+              "       [ 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0, ...,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0],\n",
+              "       [ 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0, ...,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0],\n",
+              "       [ 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  1,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0, ...,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0],\n",
+              "       [ 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  1, ...,  0,  0,  0,  0,  0,  0,  0,  0,  0,  1,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0],\n",
+              "       [ 0,  0,  0,  0,  0,  0,  0,  0,  0,  3,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0, ...,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0],\n",
+              "       [ 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0, ...,  0,  0,  0,  0,  0,  0,  0,  0,  1,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0],\n",
+              "       [ 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  1,  0,  0, ...,  0,  0,  0,  0,  0,  1,  0,  0,  1,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0],\n",
+              "       [ 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0, ...,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0],\n",
+              "       [ 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0, ...,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0],\n",
+              "       [ 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0, ...,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0],\n",
+              "       [ 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0, ...,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0],\n",
+              "       [ 3,  3,  0,  1,  0,  0,  1,  0,  2,  2,  1,  0,  0,  0,  0,  0,  2,  0,  0,  0,  0,  0,  0,  1,  0,  0,  0,  0,  1,  3, ...,  0,  0,  0,  0,  0,  1,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  6,  0,  0,  0,  2,  1,  0,  0,  0,  0,  0,  0,  0,  0],\n",
+              "       [ 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0, ...,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  2,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0],\n",
+              "       [ 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0, ...,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0],\n",
+              "       [ 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  1,  0,  1,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0, ...,  0,  0,  0,  0,  0,  0,  0,  0,  1,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0],\n",
+              "       [ 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0, ...,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0],\n",
+              "       [ 0,  0,  1,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  1,  0,  1,  0,  0,  0,  0, ...,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  3,  1,  0,  0,  1,  0,  0,  0,  0,  0,  0,  0,  0,  0],\n",
+              "       [ 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  1,  0,  0,  0,  0,  1,  0,  0,  0, ...,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0],\n",
+              "       [ 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  1,  0,  0,  0,  0,  0,  2,  0,  0,  1,  0,  0,  0,  0,  1,  0,  0,  0, ...,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  1,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0],\n",
+              "       [ 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0, ...,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  1,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0],\n",
+              "       ...,\n",
+              "       [ 0,  0,  6,  2,  3,  0,  0,  0,  1,  0,  1,  0,  1,  0,  0,  0,  0,  0,  0,  0,  1,  0,  0,  6,  0, 16,  1,  0,  0,  0, ...,  0,  0,  1,  0,  0,  0,  0,  0,  0,  0,  0,  0,  1,  1,  0,  0,  6,  0,  0,  0,  1,  5,  0,  0,  0,  0,  0,  0,  0,  0],\n",
+              "       [ 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  1,  0,  0, ...,  0,  0,  0,  0,  0,  1,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0],\n",
+              "       [ 0,  0,  0,  0,  0,  0,  2,  1,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  1, ...,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0],\n",
+              "       [ 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0, ...,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0],\n",
+              "       [ 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  3,  1,  0,  0,  0,  0,  2,  0,  0,  0, ...,  0,  0,  0,  0,  0,  0,  0,  0,  1,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0],\n",
+              "       [ 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  2,  0,  0,  0, ...,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  1,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0],\n",
+              "       [ 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0, ...,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0],\n",
+              "       [ 0,  0,  0,  0,  0,  0,  0,  0,  0,  1,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0, ...,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0],\n",
+              "       [ 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  1, ...,  0,  0,  0,  0,  0,  1,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0],\n",
+              "       [ 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0, ...,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0],\n",
+              "       [ 0,  0,  0,  1,  2,  0,  1,  0,  1, 14,  2,  0,  0,  3,  0,  0,  3,  1, 11,  0,  5,  0,  0,  7,  0,  5,  2,  1,  0,  1, ...,  0, 10,  1,  0,  0,  6,  0,  0,  0,  0,  1,  0,  0,  0,  0,  0, 18,  0,  0, 13,  3, 21,  0,  0,  0,  0,  0,  0,  0,  0],\n",
+              "       [ 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0, ...,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0],\n",
+              "       [ 0,  0,  0,  0,  0,  0,  1,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  1,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0, ...,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0],\n",
+              "       [ 0,  0,  0,  0,  0,  0,  0,  0,  0,  2,  0,  0,  0,  1,  0,  1,  0,  0,  0,  0,  2,  0,  0,  2,  0,  0,  0,  0,  0,  0, ...,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  1,  0,  0,  0,  0,  4,  0,  0,  0,  0,  0,  0,  0,  0],\n",
+              "       [ 0,  0,  0,  0,  1,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  2,  0,  0,  0, ...,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  2,  0,  1,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0],\n",
+              "       [ 0,  0,  0,  0,  0,  0,  0,  0,  0,  1,  4,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  1,  0,  1, ...,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  1,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0],\n",
+              "       [ 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  2,  0,  0, ...,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0],\n",
+              "       [ 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0, ...,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0],\n",
+              "       [ 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0, ...,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0],\n",
+              "       [ 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0, ...,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0],\n",
+              "       [ 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  1,  0,  0, ...,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0],\n",
+              "       [ 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  1,  0,  0,  0,  0, ...,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0],\n",
+              "       [ 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0, ...,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  2,  0,  1,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0],\n",
+              "       [ 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0, ...,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0],\n",
+              "       [ 0,  0,  0,  0,  0,  0,  2,  1,  1,  0,  0,  0,  0,  0,  0,  0,  3,  0,  1,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  1, ...,  0,  1,  0,  0,  0,  1,  0,  0,  0,  0,  0,  0,  0,  0,  0,  1,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0],\n",
+              "       [ 0,  0,  0,  0,  0,  0,  0,  0,  1,  0,  1,  1,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  2,  0,  1,  2,  1,  0,  0, ...,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  3,  0,  2,  0,  0,  1,  0,  0,  0,  0,  0,  0,  0,  0],\n",
+              "       [ 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0, ...,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0],\n",
+              "       [ 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0, ...,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0],\n",
+              "       [ 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  1,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0, ...,  0,  0,  0,  0,  0,  1,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0],\n",
+              "       [ 2,  2,  0,  2,  0,  0,  0,  0,  1,  1,  0,  0,  0,  0,  0,  0,  6,  0,  1,  0,  1,  0,  0,  0,  0,  1,  3,  0,  0,  2, ...,  0,  2,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  1,  0,  0,  0,  0,  3,  0,  0,  0,  0,  0,  0,  0,  0]])"
+            ]
+          },
+          "metadata": {},
+          "execution_count": 17
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "from sklearn.model_selection import train_test_split\n",
+        "X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.20)"
+      ],
+      "metadata": {
+        "id": "b3BPaHvRdgp1"
+      },
+      "execution_count": 18,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "from sklearn.naive_bayes import MultinomialNB"
+      ],
+      "metadata": {
+        "id": "gU6d2QB_ePWK"
+      },
+      "execution_count": 19,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "spam_detect_model = MultinomialNB().fit(X_train,y_train)"
+      ],
+      "metadata": {
+        "id": "218ZZYjReTpV"
+      },
+      "execution_count": 20,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "y_pred = spam_detect_model.predict(X_test)"
+      ],
+      "metadata": {
+        "id": "sdzxoiszejR7"
+      },
+      "execution_count": 21,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "from sklearn.metrics import accuracy_score,classification_report,confusion_matrix"
+      ],
+      "metadata": {
+        "id": "o2HQTtdwezNV"
+      },
+      "execution_count": 22,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "accuracy_score(y_test,y_pred)"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "pZcH5t41fL38",
+        "outputId": "93f559d1-366a-44ab-d337-9fa4c90c9132"
+      },
+      "execution_count": 23,
+      "outputs": [
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/plain": [
+              "0.8380706287683032"
+            ]
+          },
+          "metadata": {},
+          "execution_count": 23
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "print(classification_report(y_test,y_pred))"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "i1teODLnfVzD",
+        "outputId": "87f7827d-abb0-4e95-bb7b-dcd1216a65c9"
+      },
+      "execution_count": 24,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "              precision    recall  f1-score   support\n",
+            "\n",
+            "           0       0.78      0.99      0.88      2002\n",
+            "           1       0.98      0.63      0.77      1481\n",
+            "\n",
+            "    accuracy                           0.84      3483\n",
+            "   macro avg       0.88      0.81      0.82      3483\n",
+            "weighted avg       0.87      0.84      0.83      3483\n",
+            "\n"
+          ]
+        }
+      ]
+    }
+  ]
+}
\ No newline at end of file
diff --git a/MLPproject.ipynb b/MLPproject.ipynb
new file mode 100644
index 0000000..8a7b0f2
--- /dev/null
+++ b/MLPproject.ipynb
@@ -0,0 +1,388 @@
+{
+  "nbformat": 4,
+  "nbformat_minor": 0,
+  "metadata": {
+    "colab": {
+      "provenance": [],
+      "authorship_tag": "ABX9TyM/fQMHaYzh1HwHzpLLGyA9",
+      "include_colab_link": true
+    },
+    "kernelspec": {
+      "name": "python3",
+      "display_name": "Python 3"
+    },
+    "language_info": {
+      "name": "python"
+    }
+  },
+  "cells": [
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "view-in-github",
+        "colab_type": "text"
+      },
+      "source": [
+        "<a href=\"https://colab.research.google.com/github/Lokendra-parmar/python-programming-questions/blob/main/MLPproject.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "wvsuiNVuMWa5"
+      },
+      "outputs": [],
+      "source": [
+        "import pandas as pd\n",
+        "\n",
+        "# Load all necessary files\n",
+        "try:\n",
+        "    booknow_booking_df = pd.read_csv('booknow_booking.csv')\n",
+        "    cinepos_booking_df = pd.read_csv('cinePOS_booking.csv')\n",
+        "    id_relation_df = pd.read_csv('movie_theater_id_relation.csv')\n",
+        "    booknow_visits_df = pd.read_csv('booknow_visits.csv')\n",
+        "    date_info_df = pd.read_csv('date_info.csv')\n",
+        "    booknow_theaters_df = pd.read_csv('booknow_theaters.csv')\n",
+        "\n",
+        "    print(\"All files loaded successfully.\")\n",
+        "except Exception as e:\n",
+        "    print(f\"Error loading files: {e}\")\n",
+        "    # Stop execution if files can't be loaded\n",
+        "    raise e\n",
+        "\n",
+        "print(\"Starting data consolidation...\")\n",
+        "\n",
+        "# --- 1a: Clean booknow_theaters ---\n",
+        "# Drop rows where book_theater_id is null, as they cannot be linked\n",
+        "booknow_theaters_df.dropna(subset=['book_theater_id'], inplace=True)\n",
+        "# We'll ignore lat/lon and sparse theater_type/area for this model\n",
+        "booknow_theaters_df = booknow_theaters_df[['book_theater_id']]\n",
+        "\n",
+        "# --- 1b: Process booknow_booking (Online) ---\n",
+        "# Convert to datetime and get the date part\n",
+        "booknow_booking_df['show_datetime'] = pd.to_datetime(booknow_booking_df['show_datetime'])\n",
+        "booknow_booking_df['show_date'] = booknow_booking_df['show_datetime'].dt.strftime('%Y-%m-%d')\n",
+        "# Aggregate: sum tickets by theater and date\n",
+        "booknow_agg_df = booknow_booking_df.groupby(['book_theater_id', 'show_date'])['tickets_booked'].sum().reset_index()\n",
+        "booknow_agg_df.rename(columns={'tickets_booked': 'total_booknow_tickets'}, inplace=True)\n",
+        "\n",
+        "# --- 1c: Process cinePOS_booking (On-site) ---\n",
+        "# Convert to datetime and get the date part\n",
+        "cinepos_booking_df['show_datetime'] = pd.to_datetime(cinepos_booking_df['show_datetime'])\n",
+        "cinepos_booking_df['show_date'] = cinepos_booking_df['show_datetime'].dt.strftime('%Y-%m-%d')\n",
+        "# Aggregate: sum tickets by theater and date\n",
+        "cinepos_agg_df = cinepos_booking_df.groupby(['cine_theater_id', 'show_date'])['tickets_sold'].sum().reset_index()\n",
+        "cinepos_agg_df.rename(columns={'tickets_sold': 'total_cinepos_tickets'}, inplace=True)\n",
+        "\n",
+        "# --- 1d: Link cinePOS to booknow IDs ---\n",
+        "cinepos_linked_df = pd.merge(cinepos_agg_df, id_relation_df, on='cine_theater_id', how='inner')\n",
+        "# Re-aggregate in case multiple cinePOS IDs map to a single book_theater_id\n",
+        "cinepos_linked_agg_df = cinepos_linked_df.groupby(['book_theater_id', 'show_date'])['total_cinepos_tickets'].sum().reset_index()\n",
+        "\n",
+        "# --- 1e: Create Master DataFrame ---\n",
+        "# Start with the base visits data (our target)\n",
+        "master_df = booknow_visits_df.copy()\n",
+        "\n",
+        "# Merge calendar info\n",
+        "master_df = pd.merge(master_df, date_info_df, on='show_date', how='left')\n",
+        "\n",
+        "# Merge aggregated BookNow bookings\n",
+        "master_df = pd.merge(master_df, booknow_agg_df, on=['book_theater_id', 'show_date'], how='left')\n",
+        "\n",
+        "# Merge aggregated and linked CinePOS bookings\n",
+        "master_df = pd.merge(master_df, cinepos_linked_agg_df, on=['book_theater_id', 'show_date'], how='left')\n",
+        "\n",
+        "# --- 1f: Final Cleanup ---\n",
+        "# Fill booking NaNs with 0 (days with visits but no recorded online/POS bookings)\n",
+        "master_df['total_booknow_tickets'].fillna(0, inplace=True)\n",
+        "master_df['total_cinepos_tickets'].fillna(0, inplace=True)\n",
+        "\n",
+        "# Convert show_date to datetime object for sorting and feature engineering\n",
+        "master_df['show_date'] = pd.to_datetime(master_df['show_date'])\n",
+        "\n",
+        "print(\"--- Master DataFrame Created ---\")\n",
+        "print(master_df.head())\n",
+        "print(f\"\\nShape of master_df: {master_df.shape}\")\n",
+        "print(master_df.info())"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "Step 2: Feature Engineering & Model Validation"
+      ],
+      "metadata": {
+        "id": "UvSxegRaNEPO"
+      }
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "import pandas as pd\n",
+        "import numpy as np\n",
+        "from sklearn.preprocessing import LabelEncoder\n",
+        "from sklearn.ensemble import RandomForestRegressor\n",
+        "from sklearn.metrics import mean_squared_error\n",
+        "\n",
+        "print(\"\\n--- Starting Step 2: Feature Engineering & Validation ---\")\n",
+        "\n",
+        "# --- 2a: Create Features ---\n",
+        "# Create 'total_tickets' feature\n",
+        "master_df['total_tickets'] = master_df['total_booknow_tickets'] + master_df['total_cinepos_tickets']\n",
+        "\n",
+        "# Date Features\n",
+        "master_df['day_of_month'] = master_df['show_date'].dt.day\n",
+        "master_df['month'] = master_df['show_date'].dt.month\n",
+        "master_df['year'] = master_df['show_date'].dt.year\n",
+        "master_df['day_of_year'] = master_df['show_date'].dt.dayofyear\n",
+        "master_df['is_weekend'] = master_df['day_of_week'].isin(['Saturday', 'Sunday']).astype(int)\n",
+        "\n",
+        "# CRITICAL: Sort by theater and date\n",
+        "master_df = master_df.sort_values(by=['book_theater_id', 'show_date'])\n",
+        "\n",
+        "# Lag & Rolling Features\n",
+        "print(\"Creating lag and rolling features...\")\n",
+        "gb = master_df.groupby('book_theater_id')['audience_count']\n",
+        "master_df['audience_lag_7'] = gb.shift(7)\n",
+        "master_df['audience_lag_14'] = gb.shift(14)\n",
+        "master_df['audience_roll_mean_7'] = gb.shift(1).rolling(7, min_periods=1).mean()\n",
+        "\n",
+        "# --- 2b: Categorical Encoding ---\n",
+        "# We will use LabelEncoder for IDs and One-Hot Encoding for 'day_of_week'\n",
+        "le = LabelEncoder()\n",
+        "master_df['book_theater_id_encoded'] = le.fit_transform(master_df['book_theater_id'])\n",
+        "master_df = pd.get_dummies(master_df, columns=['day_of_week'], prefix='dow')\n",
+        "\n",
+        "# --- 2c: Clean Data ---\n",
+        "# Drop rows where lag features are NaN (at the start of each series)\n",
+        "master_df_cleaned = master_df.dropna()\n",
+        "print(f\"Data shape after feature engineering and cleaning: {master_df_cleaned.shape}\")\n",
+        "\n",
+        "# --- 2d: Time-Series Split for Validation ---\n",
+        "target_col = 'audience_count'\n",
+        "# Exclude original IDs and date\n",
+        "features = [col for col in master_df_cleaned.columns if col not in [\n",
+        "    'audience_count', 'show_date', 'book_theater_id'\n",
+        "]]\n",
+        "\n",
+        "X = master_df_cleaned[features]\n",
+        "y = master_df_cleaned[target_col]\n",
+        "\n",
+        "# We will use the last 4 weeks (28 days) for validation\n",
+        "max_date = master_df_cleaned['show_date'].max()\n",
+        "split_date = max_date - pd.to_timedelta('28 days')\n",
+        "\n",
+        "train_mask = (master_df_cleaned['show_date'] < split_date)\n",
+        "valid_mask = (master_df_cleaned['show_date'] >= split_date)\n",
+        "\n",
+        "X_train, y_train = X[train_mask], y[train_mask]\n",
+        "X_valid, y_valid = X[valid_mask], y[valid_mask]\n",
+        "\n",
+        "print(f\"Training data shape: {X_train.shape}\")\n",
+        "print(f\"Validation data shape: {X_valid.shape}\")\n",
+        "\n",
+        "# --- 2e: Train and Validate Model ---\n",
+        "print(\"\\nTraining RandomForestRegressor for validation...\")\n",
+        "# Use a fast and powerful RandomForest\n",
+        "rf = RandomForestRegressor(\n",
+        "    n_estimators=100,\n",
+        "    random_state=42,\n",
+        "    n_jobs=-1,\n",
+        "    min_samples_leaf=5,\n",
+        "    max_features=0.7\n",
+        ")\n",
+        "\n",
+        "rf.fit(X_train, y_train)\n",
+        "\n",
+        "# Evaluate\n",
+        "y_pred = rf.predict(X_valid)\n",
+        "rmse = np.sqrt(mean_squared_error(y_valid, y_pred))\n",
+        "print(f\"\\n--- Validation Complete ---\")\n",
+        "print(f\"Validation RMSE: {rmse:.4f}\")\n",
+        "print(\"This shows our model is predictive. Now we will build the final submission.\")"
+      ],
+      "metadata": {
+        "id": "XdhawvAYM_qs"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "Step 3: Create Full Dataset for Submission"
+      ],
+      "metadata": {
+        "id": "aedKBYJ5NInq"
+      }
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "import pandas as pd\n",
+        "from itertools import product\n",
+        "from sklearn.preprocessing import LabelEncoder\n",
+        "\n",
+        "print(\"\\n--- Starting Step 3: Creating Full Train+Test Dataset ---\")\n",
+        "\n",
+        "# --- 3a: Reload original data ---\n",
+        "# We need the original files to build the full train+test set\n",
+        "booknow_visits_df = pd.read_csv('booknow_visits.csv')\n",
+        "date_info_df = pd.read_csv('date_info.csv')\n",
+        "\n",
+        "# --- 3b: Identify Test Period ---\n",
+        "booknow_visits_df['show_date'] = pd.to_datetime(booknow_visits_df['show_date'])\n",
+        "date_info_df['show_date'] = pd.to_datetime(date_info_df['show_date'])\n",
+        "max_train_date = booknow_visits_df['show_date'].max()\n",
+        "test_dates_df = date_info_df[date_info_df['show_date'] > max_train_date]\n",
+        "print(f\"Test period identified: {test_dates_df['show_date'].min().date()} to {test_dates_df['show_date'].max().date()}\")\n",
+        "\n",
+        "# --- 3c: Create Test Scaffolding ---\n",
+        "all_theater_ids = booknow_visits_df['book_theater_id'].unique()\n",
+        "test_scaffold_df = pd.DataFrame(product(all_theater_ids, test_dates_df['show_date']),\n",
+        "                                columns=['book_theater_id', 'show_date'])\n",
+        "print(f\"Test scaffold created with shape: {test_scaffold_df.shape}\")\n",
+        "\n",
+        "# --- 3d: Combine Train and Test ---\n",
+        "# `audience_count` will be NaN for the test pairs\n",
+        "full_data_df = pd.concat([booknow_visits_df, test_scaffold_df], sort=True)\n",
+        "full_data_df = full_data_df.sort_values(by=['book_theater_id', 'show_date']).reset_index(drop=True)\n",
+        "\n",
+        "# --- 3e: Re-run Feature Engineering on Full Dataset ---\n",
+        "# We re-use the aggregated DataFrames from Step 1\n",
+        "print(\"Merging all features into full dataset...\")\n",
+        "full_master_df = pd.merge(full_data_df, date_info_df, on='show_date', how='left')\n",
+        "full_master_df = pd.merge(full_master_df, booknow_agg_df, on=['book_theater_id', 'show_date'], how='left')\n",
+        "full_master_df = pd.merge(full_master_df, cinepos_linked_agg_df, on=['book_theater_id', 'show_date'], how='left')\n",
+        "\n",
+        "# Cleanup NaNs\n",
+        "full_master_df['total_booknow_tickets'].fillna(0, inplace=True)\n",
+        "full_master_df['total_cinepos_tickets'].fillna(0, inplace=True)\n",
+        "full_master_df['total_tickets'] = full_master_df['total_booknow_tickets'] + full_master_df['total_cinepos_tickets']\n",
+        "\n",
+        "# Date Features\n",
+        "full_master_df['day_of_month'] = full_master_df['show_date'].dt.day\n",
+        "full_master_df['month'] = full_master_df['show_date'].dt.month\n",
+        "full_master_df['year'] = full_master_df['show_date'].dt.year\n",
+        "full_master_df['day_of_year'] = full_master_df['show_date'].dt.dayofyear\n",
+        "full_master_df['is_weekend'] = full_master_df['day_of_week'].isin(['Saturday', 'Sunday']).astype(int)\n",
+        "\n",
+        "# Lag & Rolling Features\n",
+        "# This now correctly uses train data to create lags for the test data\n",
+        "print(\"Creating lags on full dataset...\")\n",
+        "gb_full = full_master_df.groupby('book_theater_id')['audience_count']\n",
+        "full_master_df['audience_lag_7'] = gb_full.shift(7)\n",
+        "full_master_df['audience_lag_14'] = gb_full.shift(14)\n",
+        "full_master_df['audience_roll_mean_7'] = gb_full.shift(1).rolling(7, min_periods=1).mean()\n",
+        "\n",
+        "# Categorical Encoding\n",
+        "full_master_df['book_theater_id_encoded'] = le.transform(full_master_df['book_theater_id']) # Use the LE from Step 2\n",
+        "full_master_df = pd.get_dummies(full_master_df, columns=['day_of_week'], prefix='dow')\n",
+        "\n",
+        "print(\"--- Full Train+Test Dataset is Ready ---\")\n",
+        "print(full_master_df.info())"
+      ],
+      "metadata": {
+        "id": "s0H0ImjrNNRI"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "Step 4: Final Model Training & Submission"
+      ],
+      "metadata": {
+        "id": "85nmUKffNP_e"
+      }
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "import pandas as pd\n",
+        "import numpy as np\n",
+        "from sklearn.ensemble import RandomForestRegressor\n",
+        "\n",
+        "print(\"\\n--- Starting Step 4: Final Training & Submission ---\")\n",
+        "\n",
+        "# --- 4a: Split into Final Train and Test ---\n",
+        "# Training data is where 'audience_count' is known\n",
+        "train_final_df = full_master_df[full_master_df['audience_count'].notnull()]\n",
+        "# Test data is where 'audience_count' is unknown\n",
+        "test_final_df = full_master_df[full_master_df['audience_count'].isnull()]\n",
+        "\n",
+        "# Clean the final training data (drop initial NaNs)\n",
+        "train_final_df = train_final_df.dropna(subset=['audience_lag_7', 'audience_lag_14', 'audience_roll_mean_7'])\n",
+        "\n",
+        "print(f\"Final training data shape: {train_final_df.shape}\")\n",
+        "print(f\"Final test data shape: {test_final_df.shape}\")\n",
+        "\n",
+        "# --- 4b: Align Columns ---\n",
+        "# Get feature list from the training set\n",
+        "features = [col for col in train_final_df.columns if col not in [\n",
+        "    'audience_count', 'show_date', 'book_theater_id'\n",
+        "]]\n",
+        "\n",
+        "# Ensure test set has the exact same columns as the train set\n",
+        "X_train_final = train_final_df[features]\n",
+        "y_train_final = train_final_df[target_col]\n",
+        "\n",
+        "# Align test set columns\n",
+        "X_test_final = test_final_df.copy()\n",
+        "for col in features:\n",
+        "    if col not in X_test_final.columns:\n",
+        "        X_test_final[col] = 0\n",
+        "X_test_final = X_test_final[features] # Keep only feature columns in correct order\n",
+        "\n",
+        "# Handle any NaNs in test features (e.g., if a new theater had no lag data)\n",
+        "# For this problem, we'll fill with 0\n",
+        "X_test_final.fillna(0, inplace=True)\n",
+        "\n",
+        "# --- 4c: Train Final Model ---\n",
+        "print(\"Training final model on ALL available data...\")\n",
+        "rf_final = RandomForestRegressor(\n",
+        "    n_estimators=100,\n",
+        "    random_state=42,\n",
+        "    n_jobs=-1,\n",
+        "    min_samples_leaf=5,\n",
+        "    max_features=0.7\n",
+        ")\n",
+        "rf_final.fit(X_train_final, y_train_final)\n",
+        "print(\"Final model trained.\")\n",
+        "\n",
+        "# --- 4d: Make Predictions ---\n",
+        "print(\"Making final predictions...\")\n",
+        "predictions = rf_final.predict(X_test_final)\n",
+        "\n",
+        "# --- 4e: Format Submission File ---\n",
+        "submission_df = test_final_df[['book_theater_id', 'show_date']].copy()\n",
+        "submission_df['audience_count'] = predictions\n",
+        "\n",
+        "# Format the ID: book_theater_id + show_date\n",
+        "submission_df['show_date'] = submission_df['show_date'].dt.strftime('%Y-%m-%d')\n",
+        "submission_df['ID'] = submission_df['book_theater_id'] + '_' + submission_df['show_date']\n",
+        "\n",
+        "# Ensure predictions are non-negative and integers\n",
+        "submission_df['audience_count'] = np.round(submission_df['audience_count']).astype(int)\n",
+        "submission_df.loc[submission_df['audience_count'] < 0, 'audience_count'] = 0\n",
+        "\n",
+        "# Select final columns\n",
+        "final_submission = submission_df[['ID', 'audience_count']]\n",
+        "\n",
+        "# Save the file\n",
+        "final_submission.to_csv('submission.csv', index=False)\n",
+        "\n",
+        "print(\"\\n--- Submission File Created! ---\")\n",
+        "print(final_submission.head())\n",
+        "print(f\"File 'submission.csv' saved with {len(final_submission)} predictions.\")"
+      ],
+      "metadata": {
+        "id": "vxFSnZKTNiZK"
+      },
+      "execution_count": null,
+      "outputs": []
+    }
+  ]
+}
\ No newline at end of file
diff --git a/Welcome_to_Colab.ipynb b/Welcome_to_Colab.ipynb
new file mode 100644
index 0000000..c1d21c4
--- /dev/null
+++ b/Welcome_to_Colab.ipynb
@@ -0,0 +1,655 @@
+{
+  "cells": [
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "view-in-github",
+        "colab_type": "text"
+      },
+      "source": [
+        "<a href=\"https://colab.research.google.com/github/Lokendra-parmar/python-programming-questions/blob/main/Welcome_to_Colab.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "# GA_5_MLP"
+      ],
+      "metadata": {
+        "id": "v2RMPpA9fg29"
+      }
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "import pandas as pd\n",
+        "from sklearn.model_selection import train_test_split\n",
+        "\n",
+        "# Step 2: Upload the dataset\n",
+        "from google.colab import files\n",
+        "uploaded = files.upload()"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 73
+        },
+        "id": "GcizXku4X4wC",
+        "outputId": "da4ce9b3-6868-46e1-8ab8-59b8fa65d3ba"
+      },
+      "execution_count": 5,
+      "outputs": [
+        {
+          "output_type": "display_data",
+          "data": {
+            "text/plain": [
+              "<IPython.core.display.HTML object>"
+            ],
+            "text/html": [
+              "\n",
+              "     <input type=\"file\" id=\"files-12f1d2cc-aaab-41f5-94e4-024c08557a08\" name=\"files[]\" multiple disabled\n",
+              "        style=\"border:none\" />\n",
+              "     <output id=\"result-12f1d2cc-aaab-41f5-94e4-024c08557a08\">\n",
+              "      Upload widget is only available when the cell has been executed in the\n",
+              "      current browser session. Please rerun this cell to enable.\n",
+              "      </output>\n",
+              "      <script>// Copyright 2017 Google LLC\n",
+              "//\n",
+              "// Licensed under the Apache License, Version 2.0 (the \"License\");\n",
+              "// you may not use this file except in compliance with the License.\n",
+              "// You may obtain a copy of the License at\n",
+              "//\n",
+              "//      http://www.apache.org/licenses/LICENSE-2.0\n",
+              "//\n",
+              "// Unless required by applicable law or agreed to in writing, software\n",
+              "// distributed under the License is distributed on an \"AS IS\" BASIS,\n",
+              "// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n",
+              "// See the License for the specific language governing permissions and\n",
+              "// limitations under the License.\n",
+              "\n",
+              "/**\n",
+              " * @fileoverview Helpers for google.colab Python module.\n",
+              " */\n",
+              "(function(scope) {\n",
+              "function span(text, styleAttributes = {}) {\n",
+              "  const element = document.createElement('span');\n",
+              "  element.textContent = text;\n",
+              "  for (const key of Object.keys(styleAttributes)) {\n",
+              "    element.style[key] = styleAttributes[key];\n",
+              "  }\n",
+              "  return element;\n",
+              "}\n",
+              "\n",
+              "// Max number of bytes which will be uploaded at a time.\n",
+              "const MAX_PAYLOAD_SIZE = 100 * 1024;\n",
+              "\n",
+              "function _uploadFiles(inputId, outputId) {\n",
+              "  const steps = uploadFilesStep(inputId, outputId);\n",
+              "  const outputElement = document.getElementById(outputId);\n",
+              "  // Cache steps on the outputElement to make it available for the next call\n",
+              "  // to uploadFilesContinue from Python.\n",
+              "  outputElement.steps = steps;\n",
+              "\n",
+              "  return _uploadFilesContinue(outputId);\n",
+              "}\n",
+              "\n",
+              "// This is roughly an async generator (not supported in the browser yet),\n",
+              "// where there are multiple asynchronous steps and the Python side is going\n",
+              "// to poll for completion of each step.\n",
+              "// This uses a Promise to block the python side on completion of each step,\n",
+              "// then passes the result of the previous step as the input to the next step.\n",
+              "function _uploadFilesContinue(outputId) {\n",
+              "  const outputElement = document.getElementById(outputId);\n",
+              "  const steps = outputElement.steps;\n",
+              "\n",
+              "  const next = steps.next(outputElement.lastPromiseValue);\n",
+              "  return Promise.resolve(next.value.promise).then((value) => {\n",
+              "    // Cache the last promise value to make it available to the next\n",
+              "    // step of the generator.\n",
+              "    outputElement.lastPromiseValue = value;\n",
+              "    return next.value.response;\n",
+              "  });\n",
+              "}\n",
+              "\n",
+              "/**\n",
+              " * Generator function which is called between each async step of the upload\n",
+              " * process.\n",
+              " * @param {string} inputId Element ID of the input file picker element.\n",
+              " * @param {string} outputId Element ID of the output display.\n",
+              " * @return {!Iterable<!Object>} Iterable of next steps.\n",
+              " */\n",
+              "function* uploadFilesStep(inputId, outputId) {\n",
+              "  const inputElement = document.getElementById(inputId);\n",
+              "  inputElement.disabled = false;\n",
+              "\n",
+              "  const outputElement = document.getElementById(outputId);\n",
+              "  outputElement.innerHTML = '';\n",
+              "\n",
+              "  const pickedPromise = new Promise((resolve) => {\n",
+              "    inputElement.addEventListener('change', (e) => {\n",
+              "      resolve(e.target.files);\n",
+              "    });\n",
+              "  });\n",
+              "\n",
+              "  const cancel = document.createElement('button');\n",
+              "  inputElement.parentElement.appendChild(cancel);\n",
+              "  cancel.textContent = 'Cancel upload';\n",
+              "  const cancelPromise = new Promise((resolve) => {\n",
+              "    cancel.onclick = () => {\n",
+              "      resolve(null);\n",
+              "    };\n",
+              "  });\n",
+              "\n",
+              "  // Wait for the user to pick the files.\n",
+              "  const files = yield {\n",
+              "    promise: Promise.race([pickedPromise, cancelPromise]),\n",
+              "    response: {\n",
+              "      action: 'starting',\n",
+              "    }\n",
+              "  };\n",
+              "\n",
+              "  cancel.remove();\n",
+              "\n",
+              "  // Disable the input element since further picks are not allowed.\n",
+              "  inputElement.disabled = true;\n",
+              "\n",
+              "  if (!files) {\n",
+              "    return {\n",
+              "      response: {\n",
+              "        action: 'complete',\n",
+              "      }\n",
+              "    };\n",
+              "  }\n",
+              "\n",
+              "  for (const file of files) {\n",
+              "    const li = document.createElement('li');\n",
+              "    li.append(span(file.name, {fontWeight: 'bold'}));\n",
+              "    li.append(span(\n",
+              "        `(${file.type || 'n/a'}) - ${file.size} bytes, ` +\n",
+              "        `last modified: ${\n",
+              "            file.lastModifiedDate ? file.lastModifiedDate.toLocaleDateString() :\n",
+              "                                    'n/a'} - `));\n",
+              "    const percent = span('0% done');\n",
+              "    li.appendChild(percent);\n",
+              "\n",
+              "    outputElement.appendChild(li);\n",
+              "\n",
+              "    const fileDataPromise = new Promise((resolve) => {\n",
+              "      const reader = new FileReader();\n",
+              "      reader.onload = (e) => {\n",
+              "        resolve(e.target.result);\n",
+              "      };\n",
+              "      reader.readAsArrayBuffer(file);\n",
+              "    });\n",
+              "    // Wait for the data to be ready.\n",
+              "    let fileData = yield {\n",
+              "      promise: fileDataPromise,\n",
+              "      response: {\n",
+              "        action: 'continue',\n",
+              "      }\n",
+              "    };\n",
+              "\n",
+              "    // Use a chunked sending to avoid message size limits. See b/62115660.\n",
+              "    let position = 0;\n",
+              "    do {\n",
+              "      const length = Math.min(fileData.byteLength - position, MAX_PAYLOAD_SIZE);\n",
+              "      const chunk = new Uint8Array(fileData, position, length);\n",
+              "      position += length;\n",
+              "\n",
+              "      const base64 = btoa(String.fromCharCode.apply(null, chunk));\n",
+              "      yield {\n",
+              "        response: {\n",
+              "          action: 'append',\n",
+              "          file: file.name,\n",
+              "          data: base64,\n",
+              "        },\n",
+              "      };\n",
+              "\n",
+              "      let percentDone = fileData.byteLength === 0 ?\n",
+              "          100 :\n",
+              "          Math.round((position / fileData.byteLength) * 100);\n",
+              "      percent.textContent = `${percentDone}% done`;\n",
+              "\n",
+              "    } while (position < fileData.byteLength);\n",
+              "  }\n",
+              "\n",
+              "  // All done.\n",
+              "  yield {\n",
+              "    response: {\n",
+              "      action: 'complete',\n",
+              "    }\n",
+              "  };\n",
+              "}\n",
+              "\n",
+              "scope.google = scope.google || {};\n",
+              "scope.google.colab = scope.google.colab || {};\n",
+              "scope.google.colab._files = {\n",
+              "  _uploadFiles,\n",
+              "  _uploadFilesContinue,\n",
+              "};\n",
+              "})(self);\n",
+              "</script> "
+            ]
+          },
+          "metadata": {}
+        },
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Saving GA_5_dataset.csv to GA_5_dataset.csv\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "df = pd.read_csv('GA_5_dataset.csv')  # Make sure the file name is correct\n",
+        "\n",
+        "# Step 4: Separate features (X) and target (y)\n",
+        "X = df.drop(columns=['Credit_Limit'])  # Features\n",
+        "y = df['Credit_Limit']                # Target\n",
+        "\n",
+        "# Step 5: Split the data (70% train, 30% test)\n",
+        "X_train, X_test, y_train, y_test = train_test_split(X, y,\n",
+        "                                                    test_size=0.3,\n",
+        "                                                    random_state=42)\n",
+        "\n",
+        "# Step 6: Check the shapes\n",
+        "print(\"Shape of X_train:\", X_train.shape)\n",
+        "print(\"Shape of X_test :\", X_test.shape)\n",
+        "print(\"Shape of y_train:\", y_train.shape)\n",
+        "print(\"Shape of y_test :\", y_test.shape)"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "u1DmB_u6YT6u",
+        "outputId": "ef1206b7-6f61-488e-db54-09237e10d8b9"
+      },
+      "execution_count": 6,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Shape of X_train: (4200, 16)\n",
+            "Shape of X_test : (1800, 16)\n",
+            "Shape of y_train: (4200,)\n",
+            "Shape of y_test : (1800,)\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "from sklearn.linear_model import LinearRegression\n",
+        "from sklearn.metrics import r2_score\n",
+        "\n",
+        "# Initialize the model\n",
+        "model = LinearRegression(fit_intercept=False)\n",
+        "\n",
+        "# Train the model on training data\n",
+        "model.fit(X_train, y_train)\n",
+        "\n",
+        "# Predict on test data\n",
+        "y_pred = model.predict(X_test)\n",
+        "\n",
+        "# Step 8: Calculate and print R² score\n",
+        "r2 = r2_score(y_test, y_pred)\n",
+        "print(\"R² Score on Test Set:\", r2)"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "0BK61pSnZC_Z",
+        "outputId": "7951a2ed-e47c-4e39-e5b6-7d14216eeb8d"
+      },
+      "execution_count": 7,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "R² Score on Test Set: -0.41121711792312987\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "import numpy as np\n",
+        "\n",
+        "# Step 9: Get the model coefficients\n",
+        "coefficients = model.coef_\n",
+        "\n",
+        "# Step 10: Find index of the highest absolute coefficient\n",
+        "index_max_coeff = np.argmax(np.abs(coefficients))\n",
+        "\n",
+        "print(\"Index of feature with highest absolute coefficient value:\", index_max_coeff)\n"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "OuZr9EawZtgL",
+        "outputId": "faef861a-95c0-4a9d-d322-be226ac16285"
+      },
+      "execution_count": 8,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Index of feature with highest absolute coefficient value: 5\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "from sklearn.linear_model import Ridge\n",
+        "\n",
+        "# Step 11: Train Ridge Regression model\n",
+        "ridge_model = Ridge(solver='sag', tol=0.0005, random_state=42)\n",
+        "\n",
+        "# Fit the model on training data\n",
+        "ridge_model.fit(X_train, y_train)\n",
+        "\n",
+        "# Predict on test data\n",
+        "y_pred_ridge = ridge_model.predict(X_test)\n",
+        "\n",
+        "# Step 12: Compute R² score\n",
+        "r2_ridge = r2_score(y_test, y_pred_ridge)\n",
+        "print(\"R² Score of Ridge Regression on Test Set:\", r2_ridge)"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "7WL6ZDkdbDtT",
+        "outputId": "09a378ac-464e-4dd4-e14f-91a32d3f5ed6"
+      },
+      "execution_count": 9,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "R² Score of Ridge Regression on Test Set: 0.5031632306039973\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "print(\"Intercept of Ridge Regression model:\", ridge_model.intercept_)\n"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "wlxpDa3FbgH9",
+        "outputId": "b46a973e-5dad-4c6e-99d5-4a9cfffadeec"
+      },
+      "execution_count": 10,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Intercept of Ridge Regression model: 8638.307615757858\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "from sklearn.linear_model import Lasso\n",
+        "\n",
+        "# Step: Train Lasso Regression model\n",
+        "lasso_model = Lasso(alpha=100, random_state=42)\n",
+        "\n",
+        "# Fit the model on training data\n",
+        "lasso_model.fit(X_train, y_train)\n",
+        "\n",
+        "# Predict on test data\n",
+        "y_pred_lasso = lasso_model.predict(X_test)\n",
+        "\n",
+        "# Compute R² score\n",
+        "r2_lasso = r2_score(y_test, y_pred_lasso)\n",
+        "print(\"R² Score of Lasso Regression on Test Set:\", r2_lasso)\n"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "3NLyRn2BbgEh",
+        "outputId": "c71d523b-15a6-42f8-a903-e82eb453611c"
+      },
+      "execution_count": 11,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "R² Score of Lasso Regression on Test Set: 0.5013545795541585\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "import numpy as np\n",
+        "\n",
+        "# Get coefficients from the trained Lasso model\n",
+        "lasso_coeffs = lasso_model.coef_\n",
+        "\n",
+        "# Count how many are in the range [-1, 1]\n",
+        "count_in_range = np.sum((lasso_coeffs >= -1) & (lasso_coeffs <= 1))\n",
+        "\n",
+        "print(\"Number of coefficients in the range [-1, 1]:\", count_in_range)\n"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "TVC_y_B3cT4k",
+        "outputId": "72b42fba-7fdf-4d59-f0fe-6e1825c02559"
+      },
+      "execution_count": 12,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Number of coefficients in the range [-1, 1]: 9\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "from sklearn.neighbors import KNeighborsRegressor\n",
+        "from sklearn.metrics import mean_squared_error\n",
+        "\n",
+        "# Step: Train KNeighborsRegressor\n",
+        "knn_model = KNeighborsRegressor(n_neighbors=10, p=1)\n",
+        "\n",
+        "# Fit the model on training data\n",
+        "knn_model.fit(X_train, y_train)\n",
+        "\n",
+        "# Predict on test data\n",
+        "y_pred_knn = knn_model.predict(X_test)\n",
+        "\n",
+        "# Compute RMSE\n",
+        "rmse_knn = np.sqrt(mean_squared_error(y_test, y_pred_knn))\n",
+        "print(\"Root Mean Squared Error (RMSE) of KNN on Test Set:\", rmse_knn)\n"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "X5jo--1PcvYw",
+        "outputId": "525f31ae-cc7a-47ab-eb47-146a2089f249"
+      },
+      "execution_count": 13,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Root Mean Squared Error (RMSE) of KNN on Test Set: 6707.055787083381\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "from sklearn.tree import DecisionTreeRegressor\n",
+        "from sklearn.metrics import mean_squared_error\n",
+        "\n",
+        "# Step: Train Decision Tree Regressor\n",
+        "tree_model = DecisionTreeRegressor(\n",
+        "    max_depth=10,\n",
+        "    min_samples_split=6,\n",
+        "    min_samples_leaf=6,\n",
+        "    random_state=42\n",
+        ")\n",
+        "\n",
+        "# Fit the model on training data\n",
+        "tree_model.fit(X_train, y_train)\n",
+        "\n",
+        "# Predict on test data\n",
+        "y_pred_tree = tree_model.predict(X_test)\n",
+        "\n",
+        "# Compute RMSE\n",
+        "rmse_tree = np.sqrt(mean_squared_error(y_test, y_pred_tree))\n",
+        "print(\"Root Mean Squared Error (RMSE) of Decision Tree on Test Set:\", rmse_tree)\n"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "GcgGvaEQdcFY",
+        "outputId": "df107910-763a-4fa7-aa52-9edc4190729c"
+      },
+      "execution_count": 14,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Root Mean Squared Error (RMSE) of Decision Tree on Test Set: 6740.833851583081\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "from sklearn.ensemble import AdaBoostRegressor\n",
+        "from sklearn.model_selection import GridSearchCV\n",
+        "from sklearn.metrics import r2_score\n",
+        "\n",
+        "# Step: Define parameter grid\n",
+        "param_grid = {\n",
+        "    'n_estimators': [10, 50, 100, 200, 500],\n",
+        "    'learning_rate': [0.1, 0.5, 1, 2]\n",
+        "}\n",
+        "\n",
+        "# Step: Initialize AdaBoostRegressor\n",
+        "ada = AdaBoostRegressor(random_state=42)\n",
+        "\n",
+        "# Step: Apply GridSearchCV\n",
+        "grid_search = GridSearchCV(estimator=ada,\n",
+        "                           param_grid=param_grid,\n",
+        "                           cv=4,\n",
+        "                           scoring='r2',\n",
+        "                           n_jobs=-1)\n",
+        "\n",
+        "# Fit on training data\n",
+        "grid_search.fit(X_train, y_train)\n",
+        "\n",
+        "# Get the best model\n",
+        "best_ada_model = grid_search.best_estimator_\n",
+        "\n",
+        "# Predict on test data\n",
+        "y_pred_ada = best_ada_model.predict(X_test)\n",
+        "\n",
+        "# Compute R² score\n",
+        "r2_ada = r2_score(y_test, y_pred_ada)\n",
+        "\n",
+        "# Display results\n",
+        "print(\"Best Parameters:\", grid_search.best_params_)\n",
+        "print(\"R² Score of Best AdaBoost Model on Test Set:\", r2_ada)\n"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "7nZzB46geoNF",
+        "outputId": "497cb68d-51b9-488d-a147-c21cbb119b8f"
+      },
+      "execution_count": 15,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Best Parameters: {'learning_rate': 0.1, 'n_estimators': 10}\n",
+            "R² Score of Best AdaBoost Model on Test Set: 0.5400284992718735\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "print(\"Best n_estimators:\", grid_search.best_params_['n_estimators'])\n"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "3zu1AJ7UfXJD",
+        "outputId": "a39a3a41-be25-42a3-e0cd-3865bcde9c4f"
+      },
+      "execution_count": 16,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Best n_estimators: 10\n"
+          ]
+        }
+      ]
+    }
+  ],
+  "metadata": {
+    "colab": {
+      "name": "Welcome to Colab",
+      "toc_visible": true,
+      "provenance": [],
+      "include_colab_link": true
+    },
+    "kernelspec": {
+      "display_name": "Python 3",
+      "name": "python3"
+    }
+  },
+  "nbformat": 4,
+  "nbformat_minor": 0
+}
\ No newline at end of file
diff --git a/chapter_appendix-tools-for-deep-learning/jupyter.ipynb b/chapter_appendix-tools-for-deep-learning/jupyter.ipynb
new file mode 100644
index 0000000..d4b2714
--- /dev/null
+++ b/chapter_appendix-tools-for-deep-learning/jupyter.ipynb
@@ -0,0 +1,1753 @@
+{
+  "cells": [
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "view-in-github",
+        "colab_type": "text"
+      },
+      "source": [
+        "<a href=\"https://colab.research.google.com/github/Lokendra-parmar/python-programming-questions/blob/main/chapter_appendix-tools-for-deep-learning/jupyter.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "Exploratory Data Analysis using python"
+      ],
+      "metadata": {
+        "id": "sYAuY4XefN_D"
+      },
+      "id": "sYAuY4XefN_D"
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "import pandas as pd"
+      ],
+      "metadata": {
+        "id": "0vB2tXoDfhD6"
+      },
+      "id": "0vB2tXoDfhD6",
+      "execution_count": 1,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "df = pd.read_csv(\"/content/customer_shopping_behavior.csv\")"
+      ],
+      "metadata": {
+        "id": "zxQ7epxjf7S1"
+      },
+      "id": "zxQ7epxjf7S1",
+      "execution_count": 2,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "df.head()"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 313
+        },
+        "id": "uZe77-EMgE4T",
+        "outputId": "1952fd3e-a5a9-47f8-fdaf-aa08913a4308"
+      },
+      "id": "uZe77-EMgE4T",
+      "execution_count": 3,
+      "outputs": [
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/plain": [
+              "   Customer ID  Age Gender Item Purchased  Category  Purchase Amount (USD)  \\\n",
+              "0            1   55   Male         Blouse  Clothing                     53   \n",
+              "1            2   19   Male        Sweater  Clothing                     64   \n",
+              "2            3   50   Male          Jeans  Clothing                     73   \n",
+              "3            4   21   Male        Sandals  Footwear                     90   \n",
+              "4            5   45   Male         Blouse  Clothing                     49   \n",
+              "\n",
+              "        Location Size      Color  Season  Review Rating Subscription Status  \\\n",
+              "0       Kentucky    L       Gray  Winter            3.1                 Yes   \n",
+              "1          Maine    L     Maroon  Winter            3.1                 Yes   \n",
+              "2  Massachusetts    S     Maroon  Spring            3.1                 Yes   \n",
+              "3   Rhode Island    M     Maroon  Spring            3.5                 Yes   \n",
+              "4         Oregon    M  Turquoise  Spring            2.7                 Yes   \n",
+              "\n",
+              "   Shipping Type Discount Applied Promo Code Used  Previous Purchases  \\\n",
+              "0        Express              Yes             Yes                  14   \n",
+              "1        Express              Yes             Yes                   2   \n",
+              "2  Free Shipping              Yes             Yes                  23   \n",
+              "3   Next Day Air              Yes             Yes                  49   \n",
+              "4  Free Shipping              Yes             Yes                  31   \n",
+              "\n",
+              "  Payment Method Frequency of Purchases  \n",
+              "0          Venmo            Fortnightly  \n",
+              "1           Cash            Fortnightly  \n",
+              "2    Credit Card                 Weekly  \n",
+              "3         PayPal                 Weekly  \n",
+              "4         PayPal               Annually  "
+            ],
+            "text/html": [
+              "\n",
+              "  <div id=\"df-5e4ae1f2-0d01-4a37-bd1d-7ca9a802e108\" class=\"colab-df-container\">\n",
+              "    <div>\n",
+              "<style scoped>\n",
+              "    .dataframe tbody tr th:only-of-type {\n",
+              "        vertical-align: middle;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe tbody tr th {\n",
+              "        vertical-align: top;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe thead th {\n",
+              "        text-align: right;\n",
+              "    }\n",
+              "</style>\n",
+              "<table border=\"1\" class=\"dataframe\">\n",
+              "  <thead>\n",
+              "    <tr style=\"text-align: right;\">\n",
+              "      <th></th>\n",
+              "      <th>Customer ID</th>\n",
+              "      <th>Age</th>\n",
+              "      <th>Gender</th>\n",
+              "      <th>Item Purchased</th>\n",
+              "      <th>Category</th>\n",
+              "      <th>Purchase Amount (USD)</th>\n",
+              "      <th>Location</th>\n",
+              "      <th>Size</th>\n",
+              "      <th>Color</th>\n",
+              "      <th>Season</th>\n",
+              "      <th>Review Rating</th>\n",
+              "      <th>Subscription Status</th>\n",
+              "      <th>Shipping Type</th>\n",
+              "      <th>Discount Applied</th>\n",
+              "      <th>Promo Code Used</th>\n",
+              "      <th>Previous Purchases</th>\n",
+              "      <th>Payment Method</th>\n",
+              "      <th>Frequency of Purchases</th>\n",
+              "    </tr>\n",
+              "  </thead>\n",
+              "  <tbody>\n",
+              "    <tr>\n",
+              "      <th>0</th>\n",
+              "      <td>1</td>\n",
+              "      <td>55</td>\n",
+              "      <td>Male</td>\n",
+              "      <td>Blouse</td>\n",
+              "      <td>Clothing</td>\n",
+              "      <td>53</td>\n",
+              "      <td>Kentucky</td>\n",
+              "      <td>L</td>\n",
+              "      <td>Gray</td>\n",
+              "      <td>Winter</td>\n",
+              "      <td>3.1</td>\n",
+              "      <td>Yes</td>\n",
+              "      <td>Express</td>\n",
+              "      <td>Yes</td>\n",
+              "      <td>Yes</td>\n",
+              "      <td>14</td>\n",
+              "      <td>Venmo</td>\n",
+              "      <td>Fortnightly</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>1</th>\n",
+              "      <td>2</td>\n",
+              "      <td>19</td>\n",
+              "      <td>Male</td>\n",
+              "      <td>Sweater</td>\n",
+              "      <td>Clothing</td>\n",
+              "      <td>64</td>\n",
+              "      <td>Maine</td>\n",
+              "      <td>L</td>\n",
+              "      <td>Maroon</td>\n",
+              "      <td>Winter</td>\n",
+              "      <td>3.1</td>\n",
+              "      <td>Yes</td>\n",
+              "      <td>Express</td>\n",
+              "      <td>Yes</td>\n",
+              "      <td>Yes</td>\n",
+              "      <td>2</td>\n",
+              "      <td>Cash</td>\n",
+              "      <td>Fortnightly</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>2</th>\n",
+              "      <td>3</td>\n",
+              "      <td>50</td>\n",
+              "      <td>Male</td>\n",
+              "      <td>Jeans</td>\n",
+              "      <td>Clothing</td>\n",
+              "      <td>73</td>\n",
+              "      <td>Massachusetts</td>\n",
+              "      <td>S</td>\n",
+              "      <td>Maroon</td>\n",
+              "      <td>Spring</td>\n",
+              "      <td>3.1</td>\n",
+              "      <td>Yes</td>\n",
+              "      <td>Free Shipping</td>\n",
+              "      <td>Yes</td>\n",
+              "      <td>Yes</td>\n",
+              "      <td>23</td>\n",
+              "      <td>Credit Card</td>\n",
+              "      <td>Weekly</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>3</th>\n",
+              "      <td>4</td>\n",
+              "      <td>21</td>\n",
+              "      <td>Male</td>\n",
+              "      <td>Sandals</td>\n",
+              "      <td>Footwear</td>\n",
+              "      <td>90</td>\n",
+              "      <td>Rhode Island</td>\n",
+              "      <td>M</td>\n",
+              "      <td>Maroon</td>\n",
+              "      <td>Spring</td>\n",
+              "      <td>3.5</td>\n",
+              "      <td>Yes</td>\n",
+              "      <td>Next Day Air</td>\n",
+              "      <td>Yes</td>\n",
+              "      <td>Yes</td>\n",
+              "      <td>49</td>\n",
+              "      <td>PayPal</td>\n",
+              "      <td>Weekly</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>4</th>\n",
+              "      <td>5</td>\n",
+              "      <td>45</td>\n",
+              "      <td>Male</td>\n",
+              "      <td>Blouse</td>\n",
+              "      <td>Clothing</td>\n",
+              "      <td>49</td>\n",
+              "      <td>Oregon</td>\n",
+              "      <td>M</td>\n",
+              "      <td>Turquoise</td>\n",
+              "      <td>Spring</td>\n",
+              "      <td>2.7</td>\n",
+              "      <td>Yes</td>\n",
+              "      <td>Free Shipping</td>\n",
+              "      <td>Yes</td>\n",
+              "      <td>Yes</td>\n",
+              "      <td>31</td>\n",
+              "      <td>PayPal</td>\n",
+              "      <td>Annually</td>\n",
+              "    </tr>\n",
+              "  </tbody>\n",
+              "</table>\n",
+              "</div>\n",
+              "    <div class=\"colab-df-buttons\">\n",
+              "\n",
+              "  <div class=\"colab-df-container\">\n",
+              "    <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-5e4ae1f2-0d01-4a37-bd1d-7ca9a802e108')\"\n",
+              "            title=\"Convert this dataframe to an interactive table.\"\n",
+              "            style=\"display:none;\">\n",
+              "\n",
+              "  <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\" viewBox=\"0 -960 960 960\">\n",
+              "    <path d=\"M120-120v-720h720v720H120Zm60-500h600v-160H180v160Zm220 220h160v-160H400v160Zm0 220h160v-160H400v160ZM180-400h160v-160H180v160Zm440 0h160v-160H620v160ZM180-180h160v-160H180v160Zm440 0h160v-160H620v160Z\"/>\n",
+              "  </svg>\n",
+              "    </button>\n",
+              "\n",
+              "  <style>\n",
+              "    .colab-df-container {\n",
+              "      display:flex;\n",
+              "      gap: 12px;\n",
+              "    }\n",
+              "\n",
+              "    .colab-df-convert {\n",
+              "      background-color: #E8F0FE;\n",
+              "      border: none;\n",
+              "      border-radius: 50%;\n",
+              "      cursor: pointer;\n",
+              "      display: none;\n",
+              "      fill: #1967D2;\n",
+              "      height: 32px;\n",
+              "      padding: 0 0 0 0;\n",
+              "      width: 32px;\n",
+              "    }\n",
+              "\n",
+              "    .colab-df-convert:hover {\n",
+              "      background-color: #E2EBFA;\n",
+              "      box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
+              "      fill: #174EA6;\n",
+              "    }\n",
+              "\n",
+              "    .colab-df-buttons div {\n",
+              "      margin-bottom: 4px;\n",
+              "    }\n",
+              "\n",
+              "    [theme=dark] .colab-df-convert {\n",
+              "      background-color: #3B4455;\n",
+              "      fill: #D2E3FC;\n",
+              "    }\n",
+              "\n",
+              "    [theme=dark] .colab-df-convert:hover {\n",
+              "      background-color: #434B5C;\n",
+              "      box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
+              "      filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
+              "      fill: #FFFFFF;\n",
+              "    }\n",
+              "  </style>\n",
+              "\n",
+              "    <script>\n",
+              "      const buttonEl =\n",
+              "        document.querySelector('#df-5e4ae1f2-0d01-4a37-bd1d-7ca9a802e108 button.colab-df-convert');\n",
+              "      buttonEl.style.display =\n",
+              "        google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
+              "\n",
+              "      async function convertToInteractive(key) {\n",
+              "        const element = document.querySelector('#df-5e4ae1f2-0d01-4a37-bd1d-7ca9a802e108');\n",
+              "        const dataTable =\n",
+              "          await google.colab.kernel.invokeFunction('convertToInteractive',\n",
+              "                                                    [key], {});\n",
+              "        if (!dataTable) return;\n",
+              "\n",
+              "        const docLinkHtml = 'Like what you see? Visit the ' +\n",
+              "          '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
+              "          + ' to learn more about interactive tables.';\n",
+              "        element.innerHTML = '';\n",
+              "        dataTable['output_type'] = 'display_data';\n",
+              "        await google.colab.output.renderOutput(dataTable, element);\n",
+              "        const docLink = document.createElement('div');\n",
+              "        docLink.innerHTML = docLinkHtml;\n",
+              "        element.appendChild(docLink);\n",
+              "      }\n",
+              "    </script>\n",
+              "  </div>\n",
+              "\n",
+              "\n",
+              "    </div>\n",
+              "  </div>\n"
+            ],
+            "application/vnd.google.colaboratory.intrinsic+json": {
+              "type": "dataframe",
+              "variable_name": "df",
+              "summary": "{\n  \"name\": \"df\",\n  \"rows\": 3900,\n  \"fields\": [\n    {\n      \"column\": \"Customer ID\",\n      \"properties\": {\n        \"dtype\": \"number\",\n        \"std\": 1125,\n        \"min\": 1,\n        \"max\": 3900,\n        \"num_unique_values\": 3900,\n        \"samples\": [\n          840,\n          1718,\n          322\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"Age\",\n      \"properties\": {\n        \"dtype\": \"number\",\n        \"std\": 15,\n        \"min\": 18,\n        \"max\": 70,\n        \"num_unique_values\": 53,\n        \"samples\": [\n          56,\n          24,\n          51\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"Gender\",\n      \"properties\": {\n        \"dtype\": \"category\",\n        \"num_unique_values\": 2,\n        \"samples\": [\n          \"Female\",\n          \"Male\"\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"Item Purchased\",\n      \"properties\": {\n        \"dtype\": \"category\",\n        \"num_unique_values\": 25,\n        \"samples\": [\n          \"Handbag\",\n          \"Jewelry\"\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"Category\",\n      \"properties\": {\n        \"dtype\": \"category\",\n        \"num_unique_values\": 4,\n        \"samples\": [\n          \"Footwear\",\n          \"Accessories\"\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"Purchase Amount (USD)\",\n      \"properties\": {\n        \"dtype\": \"number\",\n        \"std\": 23,\n        \"min\": 20,\n        \"max\": 100,\n        \"num_unique_values\": 81,\n        \"samples\": [\n          60,\n          53\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"Location\",\n      \"properties\": {\n        \"dtype\": \"category\",\n        \"num_unique_values\": 50,\n        \"samples\": [\n          \"New Hampshire\",\n          \"Connecticut\"\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"Size\",\n      \"properties\": {\n        \"dtype\": \"category\",\n        \"num_unique_values\": 4,\n        \"samples\": [\n          \"S\",\n          \"XL\"\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"Color\",\n      \"properties\": {\n        \"dtype\": \"category\",\n        \"num_unique_values\": 25,\n        \"samples\": [\n          \"Olive\",\n          \"Red\"\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"Season\",\n      \"properties\": {\n        \"dtype\": \"category\",\n        \"num_unique_values\": 4,\n        \"samples\": [\n          \"Spring\",\n          \"Fall\"\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"Review Rating\",\n      \"properties\": {\n        \"dtype\": \"number\",\n        \"std\": 0.7169829842073647,\n        \"min\": 2.5,\n        \"max\": 5.0,\n        \"num_unique_values\": 26,\n        \"samples\": [\n          4.9,\n          5.0\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"Subscription Status\",\n      \"properties\": {\n        \"dtype\": \"category\",\n        \"num_unique_values\": 2,\n        \"samples\": [\n          \"No\",\n          \"Yes\"\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"Shipping Type\",\n      \"properties\": {\n        \"dtype\": \"category\",\n        \"num_unique_values\": 6,\n        \"samples\": [\n          \"Express\",\n          \"Free Shipping\"\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"Discount Applied\",\n      \"properties\": {\n        \"dtype\": \"category\",\n        \"num_unique_values\": 2,\n        \"samples\": [\n          \"No\",\n          \"Yes\"\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"Promo Code Used\",\n      \"properties\": {\n        \"dtype\": \"category\",\n        \"num_unique_values\": 2,\n        \"samples\": [\n          \"No\",\n          \"Yes\"\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"Previous Purchases\",\n      \"properties\": {\n        \"dtype\": \"number\",\n        \"std\": 14,\n        \"min\": 1,\n        \"max\": 50,\n        \"num_unique_values\": 50,\n        \"samples\": [\n          36,\n          47\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"Payment Method\",\n      \"properties\": {\n        \"dtype\": \"category\",\n        \"num_unique_values\": 6,\n        \"samples\": [\n          \"Venmo\",\n          \"Cash\"\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"Frequency of Purchases\",\n      \"properties\": {\n        \"dtype\": \"category\",\n        \"num_unique_values\": 7,\n        \"samples\": [\n          \"Fortnightly\",\n          \"Weekly\"\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    }\n  ]\n}"
+            }
+          },
+          "metadata": {},
+          "execution_count": 3
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "df.info()"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "fYjWO4C7gcWH",
+        "outputId": "a2b1c2e1-7c09-4f07-f559-c38b4774d7e0"
+      },
+      "id": "fYjWO4C7gcWH",
+      "execution_count": 4,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "<class 'pandas.core.frame.DataFrame'>\n",
+            "RangeIndex: 3900 entries, 0 to 3899\n",
+            "Data columns (total 18 columns):\n",
+            " #   Column                  Non-Null Count  Dtype  \n",
+            "---  ------                  --------------  -----  \n",
+            " 0   Customer ID             3900 non-null   int64  \n",
+            " 1   Age                     3900 non-null   int64  \n",
+            " 2   Gender                  3900 non-null   object \n",
+            " 3   Item Purchased          3900 non-null   object \n",
+            " 4   Category                3900 non-null   object \n",
+            " 5   Purchase Amount (USD)   3900 non-null   int64  \n",
+            " 6   Location                3900 non-null   object \n",
+            " 7   Size                    3900 non-null   object \n",
+            " 8   Color                   3900 non-null   object \n",
+            " 9   Season                  3900 non-null   object \n",
+            " 10  Review Rating           3863 non-null   float64\n",
+            " 11  Subscription Status     3900 non-null   object \n",
+            " 12  Shipping Type           3900 non-null   object \n",
+            " 13  Discount Applied        3900 non-null   object \n",
+            " 14  Promo Code Used         3900 non-null   object \n",
+            " 15  Previous Purchases      3900 non-null   int64  \n",
+            " 16  Payment Method          3900 non-null   object \n",
+            " 17  Frequency of Purchases  3900 non-null   object \n",
+            "dtypes: float64(1), int64(4), object(13)\n",
+            "memory usage: 548.6+ KB\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "df.describe(include='all')"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 466
+        },
+        "id": "3SaaS_RTge3A",
+        "outputId": "141ef1f3-8f27-49d4-85af-c0692ad5161c"
+      },
+      "id": "3SaaS_RTge3A",
+      "execution_count": 7,
+      "outputs": [
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/plain": [
+              "        Customer ID          Age Gender Item Purchased  Category  \\\n",
+              "count   3900.000000  3900.000000   3900           3900      3900   \n",
+              "unique          NaN          NaN      2             25         4   \n",
+              "top             NaN          NaN   Male         Blouse  Clothing   \n",
+              "freq            NaN          NaN   2652            171      1737   \n",
+              "mean    1950.500000    44.068462    NaN            NaN       NaN   \n",
+              "std     1125.977353    15.207589    NaN            NaN       NaN   \n",
+              "min        1.000000    18.000000    NaN            NaN       NaN   \n",
+              "25%      975.750000    31.000000    NaN            NaN       NaN   \n",
+              "50%     1950.500000    44.000000    NaN            NaN       NaN   \n",
+              "75%     2925.250000    57.000000    NaN            NaN       NaN   \n",
+              "max     3900.000000    70.000000    NaN            NaN       NaN   \n",
+              "\n",
+              "        Purchase Amount (USD) Location  Size  Color  Season  Review Rating  \\\n",
+              "count             3900.000000     3900  3900   3900    3900    3863.000000   \n",
+              "unique                    NaN       50     4     25       4            NaN   \n",
+              "top                       NaN  Montana     M  Olive  Spring            NaN   \n",
+              "freq                      NaN       96  1755    177     999            NaN   \n",
+              "mean                59.764359      NaN   NaN    NaN     NaN       3.750065   \n",
+              "std                 23.685392      NaN   NaN    NaN     NaN       0.716983   \n",
+              "min                 20.000000      NaN   NaN    NaN     NaN       2.500000   \n",
+              "25%                 39.000000      NaN   NaN    NaN     NaN       3.100000   \n",
+              "50%                 60.000000      NaN   NaN    NaN     NaN       3.800000   \n",
+              "75%                 81.000000      NaN   NaN    NaN     NaN       4.400000   \n",
+              "max                100.000000      NaN   NaN    NaN     NaN       5.000000   \n",
+              "\n",
+              "       Subscription Status  Shipping Type Discount Applied Promo Code Used  \\\n",
+              "count                 3900           3900             3900            3900   \n",
+              "unique                   2              6                2               2   \n",
+              "top                     No  Free Shipping               No              No   \n",
+              "freq                  2847            675             2223            2223   \n",
+              "mean                   NaN            NaN              NaN             NaN   \n",
+              "std                    NaN            NaN              NaN             NaN   \n",
+              "min                    NaN            NaN              NaN             NaN   \n",
+              "25%                    NaN            NaN              NaN             NaN   \n",
+              "50%                    NaN            NaN              NaN             NaN   \n",
+              "75%                    NaN            NaN              NaN             NaN   \n",
+              "max                    NaN            NaN              NaN             NaN   \n",
+              "\n",
+              "        Previous Purchases Payment Method Frequency of Purchases  \n",
+              "count          3900.000000           3900                   3900  \n",
+              "unique                 NaN              6                      7  \n",
+              "top                    NaN         PayPal         Every 3 Months  \n",
+              "freq                   NaN            677                    584  \n",
+              "mean             25.351538            NaN                    NaN  \n",
+              "std              14.447125            NaN                    NaN  \n",
+              "min               1.000000            NaN                    NaN  \n",
+              "25%              13.000000            NaN                    NaN  \n",
+              "50%              25.000000            NaN                    NaN  \n",
+              "75%              38.000000            NaN                    NaN  \n",
+              "max              50.000000            NaN                    NaN  "
+            ],
+            "text/html": [
+              "\n",
+              "  <div id=\"df-773596bd-0f04-48be-92b8-30c7d7cee283\" class=\"colab-df-container\">\n",
+              "    <div>\n",
+              "<style scoped>\n",
+              "    .dataframe tbody tr th:only-of-type {\n",
+              "        vertical-align: middle;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe tbody tr th {\n",
+              "        vertical-align: top;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe thead th {\n",
+              "        text-align: right;\n",
+              "    }\n",
+              "</style>\n",
+              "<table border=\"1\" class=\"dataframe\">\n",
+              "  <thead>\n",
+              "    <tr style=\"text-align: right;\">\n",
+              "      <th></th>\n",
+              "      <th>Customer ID</th>\n",
+              "      <th>Age</th>\n",
+              "      <th>Gender</th>\n",
+              "      <th>Item Purchased</th>\n",
+              "      <th>Category</th>\n",
+              "      <th>Purchase Amount (USD)</th>\n",
+              "      <th>Location</th>\n",
+              "      <th>Size</th>\n",
+              "      <th>Color</th>\n",
+              "      <th>Season</th>\n",
+              "      <th>Review Rating</th>\n",
+              "      <th>Subscription Status</th>\n",
+              "      <th>Shipping Type</th>\n",
+              "      <th>Discount Applied</th>\n",
+              "      <th>Promo Code Used</th>\n",
+              "      <th>Previous Purchases</th>\n",
+              "      <th>Payment Method</th>\n",
+              "      <th>Frequency of Purchases</th>\n",
+              "    </tr>\n",
+              "  </thead>\n",
+              "  <tbody>\n",
+              "    <tr>\n",
+              "      <th>count</th>\n",
+              "      <td>3900.000000</td>\n",
+              "      <td>3900.000000</td>\n",
+              "      <td>3900</td>\n",
+              "      <td>3900</td>\n",
+              "      <td>3900</td>\n",
+              "      <td>3900.000000</td>\n",
+              "      <td>3900</td>\n",
+              "      <td>3900</td>\n",
+              "      <td>3900</td>\n",
+              "      <td>3900</td>\n",
+              "      <td>3863.000000</td>\n",
+              "      <td>3900</td>\n",
+              "      <td>3900</td>\n",
+              "      <td>3900</td>\n",
+              "      <td>3900</td>\n",
+              "      <td>3900.000000</td>\n",
+              "      <td>3900</td>\n",
+              "      <td>3900</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>unique</th>\n",
+              "      <td>NaN</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>2</td>\n",
+              "      <td>25</td>\n",
+              "      <td>4</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>50</td>\n",
+              "      <td>4</td>\n",
+              "      <td>25</td>\n",
+              "      <td>4</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>2</td>\n",
+              "      <td>6</td>\n",
+              "      <td>2</td>\n",
+              "      <td>2</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>6</td>\n",
+              "      <td>7</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>top</th>\n",
+              "      <td>NaN</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>Male</td>\n",
+              "      <td>Blouse</td>\n",
+              "      <td>Clothing</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>Montana</td>\n",
+              "      <td>M</td>\n",
+              "      <td>Olive</td>\n",
+              "      <td>Spring</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>No</td>\n",
+              "      <td>Free Shipping</td>\n",
+              "      <td>No</td>\n",
+              "      <td>No</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>PayPal</td>\n",
+              "      <td>Every 3 Months</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>freq</th>\n",
+              "      <td>NaN</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>2652</td>\n",
+              "      <td>171</td>\n",
+              "      <td>1737</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>96</td>\n",
+              "      <td>1755</td>\n",
+              "      <td>177</td>\n",
+              "      <td>999</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>2847</td>\n",
+              "      <td>675</td>\n",
+              "      <td>2223</td>\n",
+              "      <td>2223</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>677</td>\n",
+              "      <td>584</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>mean</th>\n",
+              "      <td>1950.500000</td>\n",
+              "      <td>44.068462</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>59.764359</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>3.750065</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>25.351538</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>NaN</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>std</th>\n",
+              "      <td>1125.977353</td>\n",
+              "      <td>15.207589</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>23.685392</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>0.716983</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>14.447125</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>NaN</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>min</th>\n",
+              "      <td>1.000000</td>\n",
+              "      <td>18.000000</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>20.000000</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>2.500000</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>1.000000</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>NaN</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>25%</th>\n",
+              "      <td>975.750000</td>\n",
+              "      <td>31.000000</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>39.000000</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>3.100000</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>13.000000</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>NaN</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>50%</th>\n",
+              "      <td>1950.500000</td>\n",
+              "      <td>44.000000</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>60.000000</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>3.800000</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>25.000000</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>NaN</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>75%</th>\n",
+              "      <td>2925.250000</td>\n",
+              "      <td>57.000000</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>81.000000</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>4.400000</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>38.000000</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>NaN</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>max</th>\n",
+              "      <td>3900.000000</td>\n",
+              "      <td>70.000000</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>100.000000</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>5.000000</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>50.000000</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>NaN</td>\n",
+              "    </tr>\n",
+              "  </tbody>\n",
+              "</table>\n",
+              "</div>\n",
+              "    <div class=\"colab-df-buttons\">\n",
+              "\n",
+              "  <div class=\"colab-df-container\">\n",
+              "    <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-773596bd-0f04-48be-92b8-30c7d7cee283')\"\n",
+              "            title=\"Convert this dataframe to an interactive table.\"\n",
+              "            style=\"display:none;\">\n",
+              "\n",
+              "  <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\" viewBox=\"0 -960 960 960\">\n",
+              "    <path d=\"M120-120v-720h720v720H120Zm60-500h600v-160H180v160Zm220 220h160v-160H400v160Zm0 220h160v-160H400v160ZM180-400h160v-160H180v160Zm440 0h160v-160H620v160ZM180-180h160v-160H180v160Zm440 0h160v-160H620v160Z\"/>\n",
+              "  </svg>\n",
+              "    </button>\n",
+              "\n",
+              "  <style>\n",
+              "    .colab-df-container {\n",
+              "      display:flex;\n",
+              "      gap: 12px;\n",
+              "    }\n",
+              "\n",
+              "    .colab-df-convert {\n",
+              "      background-color: #E8F0FE;\n",
+              "      border: none;\n",
+              "      border-radius: 50%;\n",
+              "      cursor: pointer;\n",
+              "      display: none;\n",
+              "      fill: #1967D2;\n",
+              "      height: 32px;\n",
+              "      padding: 0 0 0 0;\n",
+              "      width: 32px;\n",
+              "    }\n",
+              "\n",
+              "    .colab-df-convert:hover {\n",
+              "      background-color: #E2EBFA;\n",
+              "      box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
+              "      fill: #174EA6;\n",
+              "    }\n",
+              "\n",
+              "    .colab-df-buttons div {\n",
+              "      margin-bottom: 4px;\n",
+              "    }\n",
+              "\n",
+              "    [theme=dark] .colab-df-convert {\n",
+              "      background-color: #3B4455;\n",
+              "      fill: #D2E3FC;\n",
+              "    }\n",
+              "\n",
+              "    [theme=dark] .colab-df-convert:hover {\n",
+              "      background-color: #434B5C;\n",
+              "      box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
+              "      filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
+              "      fill: #FFFFFF;\n",
+              "    }\n",
+              "  </style>\n",
+              "\n",
+              "    <script>\n",
+              "      const buttonEl =\n",
+              "        document.querySelector('#df-773596bd-0f04-48be-92b8-30c7d7cee283 button.colab-df-convert');\n",
+              "      buttonEl.style.display =\n",
+              "        google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
+              "\n",
+              "      async function convertToInteractive(key) {\n",
+              "        const element = document.querySelector('#df-773596bd-0f04-48be-92b8-30c7d7cee283');\n",
+              "        const dataTable =\n",
+              "          await google.colab.kernel.invokeFunction('convertToInteractive',\n",
+              "                                                    [key], {});\n",
+              "        if (!dataTable) return;\n",
+              "\n",
+              "        const docLinkHtml = 'Like what you see? Visit the ' +\n",
+              "          '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
+              "          + ' to learn more about interactive tables.';\n",
+              "        element.innerHTML = '';\n",
+              "        dataTable['output_type'] = 'display_data';\n",
+              "        await google.colab.output.renderOutput(dataTable, element);\n",
+              "        const docLink = document.createElement('div');\n",
+              "        docLink.innerHTML = docLinkHtml;\n",
+              "        element.appendChild(docLink);\n",
+              "      }\n",
+              "    </script>\n",
+              "  </div>\n",
+              "\n",
+              "\n",
+              "    </div>\n",
+              "  </div>\n"
+            ],
+            "application/vnd.google.colaboratory.intrinsic+json": {
+              "type": "dataframe",
+              "summary": "{\n  \"name\": \"df\",\n  \"rows\": 11,\n  \"fields\": [\n    {\n      \"column\": \"Customer ID\",\n      \"properties\": {\n        \"dtype\": \"number\",\n        \"std\": 1405.274081775269,\n        \"min\": 1.0,\n        \"max\": 3900.0,\n        \"num_unique_values\": 6,\n        \"samples\": [\n          3900.0,\n          1950.5,\n          2925.25\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"Age\",\n      \"properties\": {\n        \"dtype\": \"number\",\n        \"std\": 1364.878318051572,\n        \"min\": 15.20758912716238,\n        \"max\": 3900.0,\n        \"num_unique_values\": 8,\n        \"samples\": [\n          44.06846153846154,\n          44.0,\n          3900.0\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"Gender\",\n      \"properties\": {\n        \"dtype\": \"category\",\n        \"num_unique_values\": 4,\n        \"samples\": [\n          2,\n          \"2652\",\n          \"3900\"\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"Item Purchased\",\n      \"properties\": {\n        \"dtype\": \"category\",\n        \"num_unique_values\": 4,\n        \"samples\": [\n          25,\n          \"171\",\n          \"3900\"\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"Category\",\n      \"properties\": {\n        \"dtype\": \"category\",\n        \"num_unique_values\": 4,\n        \"samples\": [\n          4,\n          \"1737\",\n          \"3900\"\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"Purchase Amount (USD)\",\n      \"properties\": {\n        \"dtype\": \"number\",\n        \"std\": 1359.7647934740537,\n        \"min\": 20.0,\n        \"max\": 3900.0,\n        \"num_unique_values\": 8,\n        \"samples\": [\n          59.76435897435898,\n          60.0,\n          3900.0\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"Location\",\n      \"properties\": {\n        \"dtype\": \"category\",\n        \"num_unique_values\": 4,\n        \"samples\": [\n          50,\n          \"96\",\n          \"3900\"\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"Size\",\n      \"properties\": {\n        \"dtype\": \"category\",\n        \"num_unique_values\": 4,\n        \"samples\": [\n          4,\n          \"1755\",\n          \"3900\"\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"Color\",\n      \"properties\": {\n        \"dtype\": \"category\",\n        \"num_unique_values\": 4,\n        \"samples\": [\n          25,\n          \"177\",\n          \"3900\"\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"Season\",\n      \"properties\": {\n        \"dtype\": \"category\",\n        \"num_unique_values\": 4,\n        \"samples\": [\n          4,\n          \"999\",\n          \"3900\"\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"Review Rating\",\n      \"properties\": {\n        \"dtype\": \"number\",\n        \"std\": 1364.602207410873,\n        \"min\": 0.7169829842073647,\n        \"max\": 3863.0,\n        \"num_unique_values\": 8,\n        \"samples\": [\n          3.750064716541548,\n          3.8,\n          3863.0\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"Subscription Status\",\n      \"properties\": {\n        \"dtype\": \"category\",\n        \"num_unique_values\": 4,\n        \"samples\": [\n          2,\n          \"2847\",\n          \"3900\"\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"Shipping Type\",\n      \"properties\": {\n        \"dtype\": \"category\",\n        \"num_unique_values\": 4,\n        \"samples\": [\n          6,\n          \"675\",\n          \"3900\"\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"Discount Applied\",\n      \"properties\": {\n        \"dtype\": \"category\",\n        \"num_unique_values\": 4,\n        \"samples\": [\n          2,\n          \"2223\",\n          \"3900\"\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"Promo Code Used\",\n      \"properties\": {\n        \"dtype\": \"category\",\n        \"num_unique_values\": 4,\n        \"samples\": [\n          2,\n          \"2223\",\n          \"3900\"\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"Previous Purchases\",\n      \"properties\": {\n        \"dtype\": \"number\",\n        \"std\": 1370.5178374100155,\n        \"min\": 1.0,\n        \"max\": 3900.0,\n        \"num_unique_values\": 8,\n        \"samples\": [\n          25.35153846153846,\n          25.0,\n          3900.0\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"Payment Method\",\n      \"properties\": {\n        \"dtype\": \"category\",\n        \"num_unique_values\": 4,\n        \"samples\": [\n          6,\n          \"677\",\n          \"3900\"\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"Frequency of Purchases\",\n      \"properties\": {\n        \"dtype\": \"category\",\n        \"num_unique_values\": 4,\n        \"samples\": [\n          7,\n          \"584\",\n          \"3900\"\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    }\n  ]\n}"
+            }
+          },
+          "metadata": {},
+          "execution_count": 7
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "df.isnull().sum()"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 648
+        },
+        "id": "F8QhPcI2indu",
+        "outputId": "240677f0-bb6b-4e1f-bd6b-a6843353a655"
+      },
+      "id": "F8QhPcI2indu",
+      "execution_count": 8,
+      "outputs": [
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/plain": [
+              "Customer ID                0\n",
+              "Age                        0\n",
+              "Gender                     0\n",
+              "Item Purchased             0\n",
+              "Category                   0\n",
+              "Purchase Amount (USD)      0\n",
+              "Location                   0\n",
+              "Size                       0\n",
+              "Color                      0\n",
+              "Season                     0\n",
+              "Review Rating             37\n",
+              "Subscription Status        0\n",
+              "Shipping Type              0\n",
+              "Discount Applied           0\n",
+              "Promo Code Used            0\n",
+              "Previous Purchases         0\n",
+              "Payment Method             0\n",
+              "Frequency of Purchases     0\n",
+              "dtype: int64"
+            ],
+            "text/html": [
+              "<div>\n",
+              "<style scoped>\n",
+              "    .dataframe tbody tr th:only-of-type {\n",
+              "        vertical-align: middle;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe tbody tr th {\n",
+              "        vertical-align: top;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe thead th {\n",
+              "        text-align: right;\n",
+              "    }\n",
+              "</style>\n",
+              "<table border=\"1\" class=\"dataframe\">\n",
+              "  <thead>\n",
+              "    <tr style=\"text-align: right;\">\n",
+              "      <th></th>\n",
+              "      <th>0</th>\n",
+              "    </tr>\n",
+              "  </thead>\n",
+              "  <tbody>\n",
+              "    <tr>\n",
+              "      <th>Customer ID</th>\n",
+              "      <td>0</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>Age</th>\n",
+              "      <td>0</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>Gender</th>\n",
+              "      <td>0</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>Item Purchased</th>\n",
+              "      <td>0</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>Category</th>\n",
+              "      <td>0</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>Purchase Amount (USD)</th>\n",
+              "      <td>0</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>Location</th>\n",
+              "      <td>0</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>Size</th>\n",
+              "      <td>0</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>Color</th>\n",
+              "      <td>0</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>Season</th>\n",
+              "      <td>0</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>Review Rating</th>\n",
+              "      <td>37</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>Subscription Status</th>\n",
+              "      <td>0</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>Shipping Type</th>\n",
+              "      <td>0</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>Discount Applied</th>\n",
+              "      <td>0</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>Promo Code Used</th>\n",
+              "      <td>0</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>Previous Purchases</th>\n",
+              "      <td>0</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>Payment Method</th>\n",
+              "      <td>0</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>Frequency of Purchases</th>\n",
+              "      <td>0</td>\n",
+              "    </tr>\n",
+              "  </tbody>\n",
+              "</table>\n",
+              "</div><br><label><b>dtype:</b> int64</label>"
+            ]
+          },
+          "metadata": {},
+          "execution_count": 8
+        }
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "There are 37 null values in review rating column . We can either remove this null values or fill with some value, So fill with mean or median is better ."
+      ],
+      "metadata": {
+        "id": "GTeriRgHjE89"
+      },
+      "id": "GTeriRgHjE89"
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "So ,now question is replace with mean or median as mean is affected by outliers and median is robust to outliers so we choose median over mean ."
+      ],
+      "metadata": {
+        "id": "PD-kZrZfjeHh"
+      },
+      "id": "PD-kZrZfjeHh"
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "df['Review Rating'] = df.groupby('Category')['Review Rating'].transform(lambda x:x.fillna(x.median()))"
+      ],
+      "metadata": {
+        "id": "cNaRp7DviyLE"
+      },
+      "id": "cNaRp7DviyLE",
+      "execution_count": 9,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "Here we replace null values with median of each category instead of filling with global median ."
+      ],
+      "metadata": {
+        "id": "ubN24NUTlwnq"
+      },
+      "id": "ubN24NUTlwnq"
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "df.isnull().sum()"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 648
+        },
+        "id": "OyfjphiwmCbH",
+        "outputId": "5abbaf4a-1d5c-48cd-fae8-01d62fec8e08"
+      },
+      "id": "OyfjphiwmCbH",
+      "execution_count": 12,
+      "outputs": [
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/plain": [
+              "Customer ID               0\n",
+              "Age                       0\n",
+              "Gender                    0\n",
+              "Item Purchased            0\n",
+              "Category                  0\n",
+              "Purchase Amount (USD)     0\n",
+              "Location                  0\n",
+              "Size                      0\n",
+              "Color                     0\n",
+              "Season                    0\n",
+              "Review Rating             0\n",
+              "Subscription Status       0\n",
+              "Shipping Type             0\n",
+              "Discount Applied          0\n",
+              "Promo Code Used           0\n",
+              "Previous Purchases        0\n",
+              "Payment Method            0\n",
+              "Frequency of Purchases    0\n",
+              "dtype: int64"
+            ],
+            "text/html": [
+              "<div>\n",
+              "<style scoped>\n",
+              "    .dataframe tbody tr th:only-of-type {\n",
+              "        vertical-align: middle;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe tbody tr th {\n",
+              "        vertical-align: top;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe thead th {\n",
+              "        text-align: right;\n",
+              "    }\n",
+              "</style>\n",
+              "<table border=\"1\" class=\"dataframe\">\n",
+              "  <thead>\n",
+              "    <tr style=\"text-align: right;\">\n",
+              "      <th></th>\n",
+              "      <th>0</th>\n",
+              "    </tr>\n",
+              "  </thead>\n",
+              "  <tbody>\n",
+              "    <tr>\n",
+              "      <th>Customer ID</th>\n",
+              "      <td>0</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>Age</th>\n",
+              "      <td>0</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>Gender</th>\n",
+              "      <td>0</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>Item Purchased</th>\n",
+              "      <td>0</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>Category</th>\n",
+              "      <td>0</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>Purchase Amount (USD)</th>\n",
+              "      <td>0</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>Location</th>\n",
+              "      <td>0</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>Size</th>\n",
+              "      <td>0</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>Color</th>\n",
+              "      <td>0</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>Season</th>\n",
+              "      <td>0</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>Review Rating</th>\n",
+              "      <td>0</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>Subscription Status</th>\n",
+              "      <td>0</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>Shipping Type</th>\n",
+              "      <td>0</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>Discount Applied</th>\n",
+              "      <td>0</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>Promo Code Used</th>\n",
+              "      <td>0</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>Previous Purchases</th>\n",
+              "      <td>0</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>Payment Method</th>\n",
+              "      <td>0</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>Frequency of Purchases</th>\n",
+              "      <td>0</td>\n",
+              "    </tr>\n",
+              "  </tbody>\n",
+              "</table>\n",
+              "</div><br><label><b>dtype:</b> int64</label>"
+            ]
+          },
+          "metadata": {},
+          "execution_count": 12
+        }
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "We have transform name of each column in snake case format , so we easy to check column name in our sql queries."
+      ],
+      "metadata": {
+        "id": "OZxQxfDXmdVP"
+      },
+      "id": "OZxQxfDXmdVP"
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "df.columns= df.columns.str.lower()"
+      ],
+      "metadata": {
+        "id": "IF0fbab0mY0I"
+      },
+      "id": "IF0fbab0mY0I",
+      "execution_count": 13,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "df.columns = df.columns.str.replace(' ','_')"
+      ],
+      "metadata": {
+        "id": "Tgm5yN_rm5rr"
+      },
+      "id": "Tgm5yN_rm5rr",
+      "execution_count": 14,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "df.columns"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "5DJowruTnQ0f",
+        "outputId": "2c50fe42-f04a-4e98-d76f-8201d710bd53"
+      },
+      "id": "5DJowruTnQ0f",
+      "execution_count": 15,
+      "outputs": [
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/plain": [
+              "Index(['customer_id', 'age', 'gender', 'item_purchased', 'category',\n",
+              "       'purchase_amount_(usd)', 'location', 'size', 'color', 'season',\n",
+              "       'review_rating', 'subscription_status', 'shipping_type',\n",
+              "       'discount_applied', 'promo_code_used', 'previous_purchases',\n",
+              "       'payment_method', 'frequency_of_purchases'],\n",
+              "      dtype='object')"
+            ]
+          },
+          "metadata": {},
+          "execution_count": 15
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "df = df.rename(columns={'purchase_amount_(usd)':'purchase_amount'})"
+      ],
+      "metadata": {
+        "id": "TuAiHzfNnZiO"
+      },
+      "id": "TuAiHzfNnZiO",
+      "execution_count": 16,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "df.columns"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "5feaMJnEn8-2",
+        "outputId": "e8fd7b9d-0a18-424c-d12f-2360dca1f0e8"
+      },
+      "id": "5feaMJnEn8-2",
+      "execution_count": 17,
+      "outputs": [
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/plain": [
+              "Index(['customer_id', 'age', 'gender', 'item_purchased', 'category',\n",
+              "       'purchase_amount', 'location', 'size', 'color', 'season',\n",
+              "       'review_rating', 'subscription_status', 'shipping_type',\n",
+              "       'discount_applied', 'promo_code_used', 'previous_purchases',\n",
+              "       'payment_method', 'frequency_of_purchases'],\n",
+              "      dtype='object')"
+            ]
+          },
+          "metadata": {},
+          "execution_count": 17
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "#create a column age\n",
+        "labels = ['Young Adult','Adult','Middle aged','Senior']\n",
+        "df['age_group'] = pd.qcut(df['age'],q=4,labels= labels)"
+      ],
+      "metadata": {
+        "id": "_7OzOCVMoGgq"
+      },
+      "id": "_7OzOCVMoGgq",
+      "execution_count": 19,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "df[['age','age_group']].head(10)"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 363
+        },
+        "id": "0G1E_mAfp6mt",
+        "outputId": "4654bb9e-0635-436c-acac-56216071f8f3"
+      },
+      "id": "0G1E_mAfp6mt",
+      "execution_count": 21,
+      "outputs": [
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/plain": [
+              "   age    age_group\n",
+              "0   55  Middle aged\n",
+              "1   19  Young Adult\n",
+              "2   50  Middle aged\n",
+              "3   21  Young Adult\n",
+              "4   45  Middle aged\n",
+              "5   46  Middle aged\n",
+              "6   63       Senior\n",
+              "7   27  Young Adult\n",
+              "8   26  Young Adult\n",
+              "9   57  Middle aged"
+            ],
+            "text/html": [
+              "\n",
+              "  <div id=\"df-9b6c3f38-8d7d-46d5-b3d0-c669b4b849f5\" class=\"colab-df-container\">\n",
+              "    <div>\n",
+              "<style scoped>\n",
+              "    .dataframe tbody tr th:only-of-type {\n",
+              "        vertical-align: middle;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe tbody tr th {\n",
+              "        vertical-align: top;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe thead th {\n",
+              "        text-align: right;\n",
+              "    }\n",
+              "</style>\n",
+              "<table border=\"1\" class=\"dataframe\">\n",
+              "  <thead>\n",
+              "    <tr style=\"text-align: right;\">\n",
+              "      <th></th>\n",
+              "      <th>age</th>\n",
+              "      <th>age_group</th>\n",
+              "    </tr>\n",
+              "  </thead>\n",
+              "  <tbody>\n",
+              "    <tr>\n",
+              "      <th>0</th>\n",
+              "      <td>55</td>\n",
+              "      <td>Middle aged</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>1</th>\n",
+              "      <td>19</td>\n",
+              "      <td>Young Adult</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>2</th>\n",
+              "      <td>50</td>\n",
+              "      <td>Middle aged</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>3</th>\n",
+              "      <td>21</td>\n",
+              "      <td>Young Adult</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>4</th>\n",
+              "      <td>45</td>\n",
+              "      <td>Middle aged</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>5</th>\n",
+              "      <td>46</td>\n",
+              "      <td>Middle aged</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>6</th>\n",
+              "      <td>63</td>\n",
+              "      <td>Senior</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>7</th>\n",
+              "      <td>27</td>\n",
+              "      <td>Young Adult</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>8</th>\n",
+              "      <td>26</td>\n",
+              "      <td>Young Adult</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>9</th>\n",
+              "      <td>57</td>\n",
+              "      <td>Middle aged</td>\n",
+              "    </tr>\n",
+              "  </tbody>\n",
+              "</table>\n",
+              "</div>\n",
+              "    <div class=\"colab-df-buttons\">\n",
+              "\n",
+              "  <div class=\"colab-df-container\">\n",
+              "    <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-9b6c3f38-8d7d-46d5-b3d0-c669b4b849f5')\"\n",
+              "            title=\"Convert this dataframe to an interactive table.\"\n",
+              "            style=\"display:none;\">\n",
+              "\n",
+              "  <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\" viewBox=\"0 -960 960 960\">\n",
+              "    <path d=\"M120-120v-720h720v720H120Zm60-500h600v-160H180v160Zm220 220h160v-160H400v160Zm0 220h160v-160H400v160ZM180-400h160v-160H180v160Zm440 0h160v-160H620v160ZM180-180h160v-160H180v160Zm440 0h160v-160H620v160Z\"/>\n",
+              "  </svg>\n",
+              "    </button>\n",
+              "\n",
+              "  <style>\n",
+              "    .colab-df-container {\n",
+              "      display:flex;\n",
+              "      gap: 12px;\n",
+              "    }\n",
+              "\n",
+              "    .colab-df-convert {\n",
+              "      background-color: #E8F0FE;\n",
+              "      border: none;\n",
+              "      border-radius: 50%;\n",
+              "      cursor: pointer;\n",
+              "      display: none;\n",
+              "      fill: #1967D2;\n",
+              "      height: 32px;\n",
+              "      padding: 0 0 0 0;\n",
+              "      width: 32px;\n",
+              "    }\n",
+              "\n",
+              "    .colab-df-convert:hover {\n",
+              "      background-color: #E2EBFA;\n",
+              "      box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
+              "      fill: #174EA6;\n",
+              "    }\n",
+              "\n",
+              "    .colab-df-buttons div {\n",
+              "      margin-bottom: 4px;\n",
+              "    }\n",
+              "\n",
+              "    [theme=dark] .colab-df-convert {\n",
+              "      background-color: #3B4455;\n",
+              "      fill: #D2E3FC;\n",
+              "    }\n",
+              "\n",
+              "    [theme=dark] .colab-df-convert:hover {\n",
+              "      background-color: #434B5C;\n",
+              "      box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
+              "      filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
+              "      fill: #FFFFFF;\n",
+              "    }\n",
+              "  </style>\n",
+              "\n",
+              "    <script>\n",
+              "      const buttonEl =\n",
+              "        document.querySelector('#df-9b6c3f38-8d7d-46d5-b3d0-c669b4b849f5 button.colab-df-convert');\n",
+              "      buttonEl.style.display =\n",
+              "        google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
+              "\n",
+              "      async function convertToInteractive(key) {\n",
+              "        const element = document.querySelector('#df-9b6c3f38-8d7d-46d5-b3d0-c669b4b849f5');\n",
+              "        const dataTable =\n",
+              "          await google.colab.kernel.invokeFunction('convertToInteractive',\n",
+              "                                                    [key], {});\n",
+              "        if (!dataTable) return;\n",
+              "\n",
+              "        const docLinkHtml = 'Like what you see? Visit the ' +\n",
+              "          '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
+              "          + ' to learn more about interactive tables.';\n",
+              "        element.innerHTML = '';\n",
+              "        dataTable['output_type'] = 'display_data';\n",
+              "        await google.colab.output.renderOutput(dataTable, element);\n",
+              "        const docLink = document.createElement('div');\n",
+              "        docLink.innerHTML = docLinkHtml;\n",
+              "        element.appendChild(docLink);\n",
+              "      }\n",
+              "    </script>\n",
+              "  </div>\n",
+              "\n",
+              "\n",
+              "    </div>\n",
+              "  </div>\n"
+            ],
+            "application/vnd.google.colaboratory.intrinsic+json": {
+              "type": "dataframe",
+              "summary": "{\n  \"name\": \"df[['age','age_group']]\",\n  \"rows\": 10,\n  \"fields\": [\n    {\n      \"column\": \"age\",\n      \"properties\": {\n        \"dtype\": \"number\",\n        \"std\": 16,\n        \"min\": 19,\n        \"max\": 63,\n        \"num_unique_values\": 10,\n        \"samples\": [\n          26,\n          19,\n          46\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"age_group\",\n      \"properties\": {\n        \"dtype\": \"category\",\n        \"num_unique_values\": 3,\n        \"samples\": [\n          \"Middle aged\",\n          \"Young Adult\",\n          \"Senior\"\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    }\n  ]\n}"
+            }
+          },
+          "metadata": {},
+          "execution_count": 21
+        }
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "I make this age_group column to understand customers purchasing behaviour based on their age group ."
+      ],
+      "metadata": {
+        "id": "UGbYFgeBqQXS"
+      },
+      "id": "UGbYFgeBqQXS"
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "# create column purchasig_frequency_days"
+      ],
+      "metadata": {
+        "id": "W6cS9UBzq71f"
+      },
+      "id": "W6cS9UBzq71f",
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "\n",
+        "frequency_mapping = {\n",
+        "    'Fortnightly': 14,\n",
+        "    'Weekly' : 7,\n",
+        "    'Monthly' : 30,\n",
+        "    'Quarterly': 90,\n",
+        "    'Bi-Weekly' : 14,\n",
+        "    'Annually' : 365,\n",
+        "    'Every 3 Month' : 90\n",
+        "}\n",
+        "df['purchase_frequency_days'] = df['frequency_of_purchases'].map(frequency_mapping)"
+      ],
+      "metadata": {
+        "id": "wlvsoLH9rLVq"
+      },
+      "id": "wlvsoLH9rLVq",
+      "execution_count": 26,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "df[['frequency_of_purchases','purchase_frequency_days']].head(10)"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 363
+        },
+        "id": "lVwmDlzytmHA",
+        "outputId": "09ac7514-5600-48e8-adb3-5bdbe055c605"
+      },
+      "id": "lVwmDlzytmHA",
+      "execution_count": 27,
+      "outputs": [
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/plain": [
+              "  frequency_of_purchases  purchase_frequency_days\n",
+              "0            Fortnightly                     14.0\n",
+              "1            Fortnightly                     14.0\n",
+              "2                 Weekly                      7.0\n",
+              "3                 Weekly                      7.0\n",
+              "4               Annually                    365.0\n",
+              "5                 Weekly                      7.0\n",
+              "6              Quarterly                     90.0\n",
+              "7                 Weekly                      7.0\n",
+              "8               Annually                    365.0\n",
+              "9              Quarterly                     90.0"
+            ],
+            "text/html": [
+              "\n",
+              "  <div id=\"df-88488a53-3378-421b-9349-fd83ae483c01\" class=\"colab-df-container\">\n",
+              "    <div>\n",
+              "<style scoped>\n",
+              "    .dataframe tbody tr th:only-of-type {\n",
+              "        vertical-align: middle;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe tbody tr th {\n",
+              "        vertical-align: top;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe thead th {\n",
+              "        text-align: right;\n",
+              "    }\n",
+              "</style>\n",
+              "<table border=\"1\" class=\"dataframe\">\n",
+              "  <thead>\n",
+              "    <tr style=\"text-align: right;\">\n",
+              "      <th></th>\n",
+              "      <th>frequency_of_purchases</th>\n",
+              "      <th>purchase_frequency_days</th>\n",
+              "    </tr>\n",
+              "  </thead>\n",
+              "  <tbody>\n",
+              "    <tr>\n",
+              "      <th>0</th>\n",
+              "      <td>Fortnightly</td>\n",
+              "      <td>14.0</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>1</th>\n",
+              "      <td>Fortnightly</td>\n",
+              "      <td>14.0</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>2</th>\n",
+              "      <td>Weekly</td>\n",
+              "      <td>7.0</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>3</th>\n",
+              "      <td>Weekly</td>\n",
+              "      <td>7.0</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>4</th>\n",
+              "      <td>Annually</td>\n",
+              "      <td>365.0</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>5</th>\n",
+              "      <td>Weekly</td>\n",
+              "      <td>7.0</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>6</th>\n",
+              "      <td>Quarterly</td>\n",
+              "      <td>90.0</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>7</th>\n",
+              "      <td>Weekly</td>\n",
+              "      <td>7.0</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>8</th>\n",
+              "      <td>Annually</td>\n",
+              "      <td>365.0</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>9</th>\n",
+              "      <td>Quarterly</td>\n",
+              "      <td>90.0</td>\n",
+              "    </tr>\n",
+              "  </tbody>\n",
+              "</table>\n",
+              "</div>\n",
+              "    <div class=\"colab-df-buttons\">\n",
+              "\n",
+              "  <div class=\"colab-df-container\">\n",
+              "    <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-88488a53-3378-421b-9349-fd83ae483c01')\"\n",
+              "            title=\"Convert this dataframe to an interactive table.\"\n",
+              "            style=\"display:none;\">\n",
+              "\n",
+              "  <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\" viewBox=\"0 -960 960 960\">\n",
+              "    <path d=\"M120-120v-720h720v720H120Zm60-500h600v-160H180v160Zm220 220h160v-160H400v160Zm0 220h160v-160H400v160ZM180-400h160v-160H180v160Zm440 0h160v-160H620v160ZM180-180h160v-160H180v160Zm440 0h160v-160H620v160Z\"/>\n",
+              "  </svg>\n",
+              "    </button>\n",
+              "\n",
+              "  <style>\n",
+              "    .colab-df-container {\n",
+              "      display:flex;\n",
+              "      gap: 12px;\n",
+              "    }\n",
+              "\n",
+              "    .colab-df-convert {\n",
+              "      background-color: #E8F0FE;\n",
+              "      border: none;\n",
+              "      border-radius: 50%;\n",
+              "      cursor: pointer;\n",
+              "      display: none;\n",
+              "      fill: #1967D2;\n",
+              "      height: 32px;\n",
+              "      padding: 0 0 0 0;\n",
+              "      width: 32px;\n",
+              "    }\n",
+              "\n",
+              "    .colab-df-convert:hover {\n",
+              "      background-color: #E2EBFA;\n",
+              "      box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
+              "      fill: #174EA6;\n",
+              "    }\n",
+              "\n",
+              "    .colab-df-buttons div {\n",
+              "      margin-bottom: 4px;\n",
+              "    }\n",
+              "\n",
+              "    [theme=dark] .colab-df-convert {\n",
+              "      background-color: #3B4455;\n",
+              "      fill: #D2E3FC;\n",
+              "    }\n",
+              "\n",
+              "    [theme=dark] .colab-df-convert:hover {\n",
+              "      background-color: #434B5C;\n",
+              "      box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
+              "      filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
+              "      fill: #FFFFFF;\n",
+              "    }\n",
+              "  </style>\n",
+              "\n",
+              "    <script>\n",
+              "      const buttonEl =\n",
+              "        document.querySelector('#df-88488a53-3378-421b-9349-fd83ae483c01 button.colab-df-convert');\n",
+              "      buttonEl.style.display =\n",
+              "        google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
+              "\n",
+              "      async function convertToInteractive(key) {\n",
+              "        const element = document.querySelector('#df-88488a53-3378-421b-9349-fd83ae483c01');\n",
+              "        const dataTable =\n",
+              "          await google.colab.kernel.invokeFunction('convertToInteractive',\n",
+              "                                                    [key], {});\n",
+              "        if (!dataTable) return;\n",
+              "\n",
+              "        const docLinkHtml = 'Like what you see? Visit the ' +\n",
+              "          '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
+              "          + ' to learn more about interactive tables.';\n",
+              "        element.innerHTML = '';\n",
+              "        dataTable['output_type'] = 'display_data';\n",
+              "        await google.colab.output.renderOutput(dataTable, element);\n",
+              "        const docLink = document.createElement('div');\n",
+              "        docLink.innerHTML = docLinkHtml;\n",
+              "        element.appendChild(docLink);\n",
+              "      }\n",
+              "    </script>\n",
+              "  </div>\n",
+              "\n",
+              "\n",
+              "    </div>\n",
+              "  </div>\n"
+            ],
+            "application/vnd.google.colaboratory.intrinsic+json": {
+              "type": "dataframe",
+              "summary": "{\n  \"name\": \"df[['frequency_of_purchases','purchase_frequency_days']]\",\n  \"rows\": 10,\n  \"fields\": [\n    {\n      \"column\": \"frequency_of_purchases\",\n      \"properties\": {\n        \"dtype\": \"category\",\n        \"num_unique_values\": 4,\n        \"samples\": [\n          \"Weekly\",\n          \"Quarterly\",\n          \"Fortnightly\"\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"purchase_frequency_days\",\n      \"properties\": {\n        \"dtype\": \"number\",\n        \"std\": 145.2669573195807,\n        \"min\": 7.0,\n        \"max\": 365.0,\n        \"num_unique_values\": 4,\n        \"samples\": [\n          7.0,\n          90.0,\n          14.0\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    }\n  ]\n}"
+            }
+          },
+          "metadata": {},
+          "execution_count": 27
+        }
+      ]
+    }
+  ],
+  "metadata": {
+    "kernelspec": {
+      "display_name": "Python 3",
+      "name": "python3"
+    },
+    "language_info": {
+      "name": "python"
+    },
+    "required_libs": [],
+    "colab": {
+      "provenance": [],
+      "include_colab_link": true
+    }
+  },
+  "nbformat": 4,
+  "nbformat_minor": 5
+}
\ No newline at end of file
diff --git a/email.json b/email.json
new file mode 100644
index 0000000..4aa7711
--- /dev/null
+++ b/email.json
@@ -0,0 +1,3 @@
+{
+  "email": "23f3002893@ds.study.iitm.ac.in"
+}
diff --git a/problem_1.md b/problem_1.md
new file mode 100644
index 0000000..78f6cb1
--- /dev/null
+++ b/problem_1.md
@@ -0,0 +1,161 @@
+---
+title: Data type operations
+---
+
+# Problem Statement
+
+Implement the function data_type_operations(data: dict) -> dict that takes a dictionary containing keys 'int', 'float', 'list', and 'set'. Perform the following operations:
+
+For 'int', add 5.
+For 'float', multiply by 1.5.
+For 'list', append the length of the list.
+For 'set', add the square of the size of the set.
+Return the modified dictionary.
+
+**Example**
+```
+data = {
+    'int': 10,
+    'float': 4.0,
+    'list': [1, 2, 3],
+    'set': {1, 2}
+}
+Result: {
+    'int': 15,
+    'float': 6.0,
+    'list': [1, 2, 3, 3],
+    'set': {1, 2, 4}
+}
+```
+
+# Solution
+
+```py3 test.py -r 'python test.py'
+<template>
+def data_type_operations(data: dict) -> dict:
+    '''
+    Modify the input dictionary as per the operations specified.
+
+    Arguments:
+    data: dict - Dictionary containing specific keys and values.
+
+    Returns:
+    dict - Modified dictionary after performing operations.
+    '''
+    <los>...</los>
+    <sol>
+    data['int'] += 5
+    data['float'] *= 1.5
+    data['list'].append(len(data['list']))
+    data['set'].add(len(data['set'])**2)
+    return data
+    </sol>
+
+</template>
+<suffix_invisible>
+{% include '../function_type_and_modify_check_suffix.py.jinja' %}
+</suffix_invisible>
+
+```
+
+# Public Test Cases
+
+## Input 1
+
+```
+data = {
+    'int': 3,
+    'float': 2.0,
+    'list': [10, 20],
+    'set': {2}
+}
+is_equal(
+    data_type_operations(data),
+    {
+        'int': 8,
+        'float': 3.0,
+        'list': [10, 20, 2],
+        'set': {2, 1}
+    }
+)
+```
+
+## Output 1
+
+```
+{
+    'int': 8,
+    'float': 3.0,
+    'list': [10, 20, 2],
+    'set': {2, 1}
+}
+```
+
+# Private Test Cases
+
+## Input 1
+
+```
+data = {
+    'int': 0,
+    'float': 0.0,
+    'list': [],
+    'set': set()
+}
+is_equal(
+    data_type_operations(data),
+    {
+        'int': 5,
+        'float': 0.0,
+        'list': [0],
+        'set': {0}
+    }
+)
+
+```
+
+## Output 1
+
+```
+{
+    'int': 5,
+    'float': 0.0,
+    'list': [0],
+    'set': {0}
+}
+
+```
+
+## Input 2
+
+```
+data = {
+    'int': -5,
+    'float': 10.0,
+    'list': [1, 2, 3, 4],
+    'set': {1, 3, 5}
+}
+is_equal(
+    data_type_operations(data),
+    {
+        'int': 0,
+        'float': 15.0,
+        'list': [1, 2, 3, 4, 4],
+        'set': {1, 3, 5, 9}
+    }
+)
+
+```
+
+## Output 2
+
+```
+{
+    'int': 0,
+    'float': 15.0,
+    'list': [1, 2, 3, 4, 4],
+    'set': {1, 3, 5, 9}
+}
+
+
+```
diff --git a/problem_2.md b/problem_2.md
new file mode 100644
index 0000000..25dcc70
--- /dev/null
+++ b/problem_2.md
@@ -0,0 +1,104 @@
+---
+title: Top Student
+---
+
+# Problem Statement
+
+Implement the function top_student(students: list) -> dict that takes a list of dictionaries where each dictionary contains 'name' and 'marks' of a student. Return a dictionary with the following keys:
+
+- 'average': the average marks rounded to 2 decimal places.
+- 'topper': the name of the student with the highest marks.
+
+*Example*
+
+students = [
+    {"name": "lucky", "marks": 85},
+    {"name": "Rocky", "marks": 92},
+    {"name": "Chocky", "marks": 78}
+]
+top_student(students) # Output: {"average": 85.0, "topper": "Rocky"}
+
+
+# Solution
+
+py3 test.py -r 'python test.py'
+<template>
+def top_student(students: list) -> dict:
+    '''
+    Find the average marks and the student with the highest marks.
+    Arguments:
+    students: list - a list of dictionaries containing 'name' and 'marks'.
+    Return: dict - a dictionary with 'average' and 'topper'.
+    '''
+    <los>...</los>
+    <sol>
+    average = round(sum(s['marks'] for s in students) / len(students), 2)
+    topper = max(students, key=lambda x: x['marks'])['name']
+    return {'average': average, 'topper': topper}  </sol>
+
+</template>
+<suffix_invisible>
+{% include '../function_type_and_modify_check_suffix.py.jinja' %}
+</suffix_invisible>
+
+
+# Public Test Cases
+
+## Input 1
+
+
+students = [
+    {"name": "Johny", "marks": 70},
+    {"name": "Dony", "marks": 90},
+    {"name": "Smithy", "marks": 80}
+]
+is_equal(
+    top_student(students),
+    {"average": 80.0, "topper": "Dony"}
+)
+
+
+## Output 1
+
+
+{"average": 80.0, "topper": "Dony"}
+
+
+# Private Test Cases
+
+## Input 1
+
+
+students = [
+    {"name": "Ammu", "marks": 88},
+    {"name": "Evuram", "marks": 92}
+]
+is_equal(
+    top_student(students),
+    {"average": 90.0, "topper": "Evuram"}
+)
+
+
+## Output 1
+
+
+{"average": 90.0, "topper": "Evuram"}
+
+
+## Input 2
+
+
+students = [
+    {"name": "Manvendra", "marks": 70},
+    {"name": "Lokendra", "marks": 86}
+]
+is_equal(
+    top_student(students),
+    {"average": 78.0, "topper": "Lokendra"}
+)
+
+
+## Output 2
+
+
+{"average": 78.0, "topper": "lokendra"}
diff --git a/problem_3.md b/problem_3.md
new file mode 100644
index 0000000..8ea5e0a
--- /dev/null
+++ b/problem_3.md
@@ -0,0 +1,85 @@
+---
+title: City Temperatures
+---
+
+# Problem Statement
+
+Write a program that reads city temperatures from stdin. The input contains lines of the format <city> <temperature>. Print the average temperature and the name of the city with the highest temperature(no two cities have highest temperature). Input ends when an empty line is encountered.
+
+**Example**
+```
+Input:
+Delhi 30
+Mumbai 35
+Chennai 33
+
+Output:
+Average Temperature: 32.67
+City with Highest Temperature: Mumbai
+```
+
+# Solution
+
+```py3 test.py -r 'python test.py'
+<template>
+import sys
+
+def main():
+    '''
+    Calculate the average temperature and find the city with the highest temperature.
+
+    Input: Reads lines containing '<city> <temperature>'. Ends on an empty line.
+    Output: Prints average temperature and city with highest temperature.
+    '''
+    <los>...</los>
+    <sol>
+    lines = sys.stdin.read().strip().split('\n')
+    data = [line.split() for line in lines if line]
+    cities = [d[0] for d in data]
+    temps = [float(d[1]) for d in data]
+
+    avg_temp = round(sum(temps) / len(temps), 2)
+    highest_temp_city = cities[temps.index(max(temps))]
+
+    print(f"Average Temperature: {avg_temp}")
+    print(f"City with Highest Temperature: {highest_temp_city}")
+    </sol>
+</template>
+<suffix_invisible>
+{% include '../function_type_and_modify_check_suffix.py.jinja' %}
+</suffix_invisible>
+```
+
+# Public Test Cases
+
+## Input 1
+
+```
+NYC 20
+London 25
+Berlin 22
+```
+
+## Output 1
+
+```
+Average Temperature: 22.33
+City with Highest Temperature: London
+```
+
+# Private Test Cases
+
+## Input 1
+
+```
+Delhi 27
+Bhopal 24
+Indore 21
+```
+
+## Output 1
+
+```
+Average Temperature: 24.00
+City with Highest Temperature: Delhi
+```
diff --git a/problem_4.md b/problem_4.md
new file mode 100644
index 0000000..e6cac6f
--- /dev/null
+++ b/problem_4.md
@@ -0,0 +1,135 @@
+---
+title: Problem-Solving: Expense Tracker
+---
+
+# Problem Statement
+
+Design an expense tracker application that allows users to:
+
+1. **Add an expense**: Record an expense with a category and amount.
+2. **Remove an expense**: Remove a specific expense by its ID.
+3. **Get total expenses**: Return the total amount of expenses.
+
+You need to implement the following functions:
+
+- `add_expense(expenses: list, category: str, amount: float) -> dict`: Adds a new expense with a unique ID.
+- `remove_expense(expenses: list, expense_id: int) -> list`: Removes an expense by its ID.
+- `total_expenses(expenses: list) -> float`: Returns the total amount of all expenses.
+
+**Example**
+```
+expenses = []
+expenses = add_expense(expenses, "Food", 50.0)
+expenses = add_expense(expenses, "Transport", 30.0)
+expenses = remove_expense(expenses, 1)
+print(total_expenses(expenses))  # Output: 30.0
+```
+
+# Solution
+
+```py3 test.py -r 'python test.py'
+<template>
+def add_expense(expenses: list, category: str, amount: float) -> dict:
+    '''
+    Add a new expense to the list with a unique ID.
+
+    Arguments:
+    expenses: list - List of existing expenses.
+    category: str - Category of the expense.
+    amount: float - Amount of the expense.
+
+    Return: dict - Newly added expense.
+    '''
+    <los>...</los>
+    <sol>
+    expense_id = len(expenses) + 1
+    expense = {"id": expense_id, "category": category, "amount": amount}
+    expenses.append(expense)
+    return expenses  </sol>
+
+def remove_expense(expenses: list, expense_id: int) -> list:
+    '''
+    Remove an expense from the list by its ID.
+
+    Arguments:
+    expenses: list - List of existing expenses.
+    expense_id: int - ID of the expense to remove.
+
+    Return: list - Updated list of expenses.
+    '''
+    <los>...</los>
+    <sol>
+    expenses = [expense for expense in expenses if expense["id"] != expense_id]
+    return expenses  </sol>
+
+def total_expenses(expenses: list) -> float:
+    '''
+    Calculate the total amount of all expenses.
+
+    Arguments:
+    expenses: list - List of existing expenses.
+
+    Return: float - Total expenses.
+    '''
+    <los>...</los>
+    <sol>
+    return sum(expense["amount"] for expense in expenses)  </sol>
+
+</template>
+<suffix_invisible>
+{% include '../function_type_and_modify_check_suffix.py.jinja' %}
+</suffix_invisible>
+```
+
+# Public Test Cases
+
+## Input 1
+
+```
+expenses = []
+expenses = add_expense(expenses, "Food", 50.0)
+expenses = add_expense(expenses, "Travel", 100.0)
+is_equal(
+    total_expenses(expenses),
+    150.0
+)
+expenses = remove_expense(expenses, 1)
+is_equal(
+    total_expenses(expenses),
+    100.0
+)
+```
+
+## Output 1
+
+```
+150.0
+100.0
+```
+
+# Private Test Cases
+
+## Input 1
+
+```
+expenses = []
+expenses = add_expense(expenses, "Utilities", 80.0)
+expenses = add_expense(expenses, "Groceries", 120.0)
+expenses = add_expense(expenses, "Entertainment", 60.0)
+is_equal(
+    total_expenses(expenses),
+    260.0
+)
+expenses = remove_expense(expenses, 2)
+is_equal(
+    total_expenses(expenses),
+    140.0
+)
+```
+
+## Output 1
+
+```
+260.0
+140.0
+```

	label	text
0	0	user id enrondlr pw bnawebescapenumber origina...
1	0	hi chris tonight we are rolling out a new repo...
2	0	rika r these new original message from thomas ...
3	0	john gerald we are currently trading under gtc...
4	0	gerald and stacy attached is a worksheet for a...
5	0	fyi below is a copy of my communication with m...
6	0	pg e gt nw plans lateral across washington sta...
7	0	mark i am working with the east power desk to ...
8	0	oops here it is kal original message from shah...
9	0	mark and charlie fmpa is ready to bill us for ...
	Customer ID	Age	Gender	Item Purchased	Category	Purchase Amount (USD)	Location	Size	Color	Season	Review Rating	Subscription Status	Shipping Type	Discount Applied	Promo Code Used	Previous Purchases	Payment Method	Frequency of Purchases
0	1	55	Male	Blouse	Clothing	53	Kentucky	L	Gray	Winter	3.1	Yes	Express	Yes	Yes	14	Venmo	Fortnightly
1	2	19	Male	Sweater	Clothing	64	Maine	L	Maroon	Winter	3.1	Yes	Express	Yes	Yes	2	Cash	Fortnightly
2	3	50	Male	Jeans	Clothing	73	Massachusetts	S	Maroon	Spring	3.1	Yes	Free Shipping	Yes	Yes	23	Credit Card	Weekly
3	4	21	Male	Sandals	Footwear	90	Rhode Island	M	Maroon	Spring	3.5	Yes	Next Day Air	Yes	Yes	49	PayPal	Weekly
4	5	45	Male	Blouse	Clothing	49	Oregon	M	Turquoise	Spring	2.7	Yes	Free Shipping	Yes	Yes	31	PayPal	Annually
	Customer ID	Age	Gender	Item Purchased	Category	Purchase Amount (USD)	Location	Size	Color	Season	Review Rating	Subscription Status	Shipping Type	Discount Applied	Promo Code Used	Previous Purchases	Payment Method	Frequency of Purchases
count	3900.000000	3900.000000	3900	3900	3900	3900.000000	3900	3900	3900	3900	3863.000000	3900	3900	3900	3900	3900.000000	3900	3900
unique	NaN	NaN	2	25	4	NaN	50	4	25	4	NaN	2	6	2	2	NaN	6	7
top	NaN	NaN	Male	Blouse	Clothing	NaN	Montana	M	Olive	Spring	NaN	No	Free Shipping	No	No	NaN	PayPal	Every 3 Months
freq	NaN	NaN	2652	171	1737	NaN	96	1755	177	999	NaN	2847	675	2223	2223	NaN	677	584
mean	1950.500000	44.068462	NaN	NaN	NaN	59.764359	NaN	NaN	NaN	NaN	3.750065	NaN	NaN	NaN	NaN	25.351538	NaN	NaN
std	1125.977353	15.207589	NaN	NaN	NaN	23.685392	NaN	NaN	NaN	NaN	0.716983	NaN	NaN	NaN	NaN	14.447125	NaN	NaN
min	1.000000	18.000000	NaN	NaN	NaN	20.000000	NaN	NaN	NaN	NaN	2.500000	NaN	NaN	NaN	NaN	1.000000	NaN	NaN
25%	975.750000	31.000000	NaN	NaN	NaN	39.000000	NaN	NaN	NaN	NaN	3.100000	NaN	NaN	NaN	NaN	13.000000	NaN	NaN
50%	1950.500000	44.000000	NaN	NaN	NaN	60.000000	NaN	NaN	NaN	NaN	3.800000	NaN	NaN	NaN	NaN	25.000000	NaN	NaN
75%	2925.250000	57.000000	NaN	NaN	NaN	81.000000	NaN	NaN	NaN	NaN	4.400000	NaN	NaN	NaN	NaN	38.000000	NaN	NaN
max	3900.000000	70.000000	NaN	NaN	NaN	100.000000	NaN	NaN	NaN	NaN	5.000000	NaN	NaN	NaN	NaN	50.000000	NaN	NaN
	0
Customer ID	0
Age	0
Gender	0
Item Purchased	0
Category	0
Purchase Amount (USD)	0
Location	0
Size	0
Color	0
Season	0
Review Rating	37
Subscription Status	0
Shipping Type	0
Discount Applied	0
Promo Code Used	0
Previous Purchases	0
Payment Method	0
Frequency of Purchases	0
	age	age_group
0	55	Middle aged
1	19	Young Adult
2	50	Middle aged
3	21	Young Adult
4	45	Middle aged
5	46	Middle aged
6	63	Senior
7	27	Young Adult
8	26	Young Adult
9	57	Middle aged
	frequency_of_purchases	purchase_frequency_days
0	Fortnightly	14.0
1	Fortnightly	14.0
2	Weekly	7.0
3	Weekly	7.0
4	Annually	365.0
5	Weekly	7.0
6	Quarterly	90.0
7	Weekly	7.0
8	Annually	365.0
9	Quarterly	90.0