diff --git a/model_practice/TitanicSurviverPredict.ipynb b/model_practice/TitanicSurviverPredict.ipynb
index 60d6851..fb989f9 100644
--- a/model_practice/TitanicSurviverPredict.ipynb
+++ b/model_practice/TitanicSurviverPredict.ipynb
@@ -32,7 +32,7 @@
"metadata": {
"id": "v96JX7Flle9E",
"colab_type": "code",
- "outputId": "f28b1aca-9269-4b53-d601-22c7e39d61af",
+ "outputId": "97fb4732-4afd-4489-b9fb-b89935ba71ed",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 255
@@ -42,7 +42,7 @@
"train_data = pd.read_csv('drive/My Drive/train_data/titanic_train.csv')\n",
"train_data.head()"
],
- "execution_count": 8,
+ "execution_count": 2,
"outputs": [
{
"output_type": "execute_result",
@@ -174,7 +174,7 @@
"metadata": {
"tags": []
},
- "execution_count": 8
+ "execution_count": 2
}
]
},
@@ -183,7 +183,7 @@
"metadata": {
"id": "DzJlvYPl78aL",
"colab_type": "code",
- "outputId": "f3ba4960-e50a-429b-95a9-bdb1ccfe156a",
+ "outputId": "4a00e5a2-bc66-4178-c905-8e437fc956d8",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 238
@@ -192,7 +192,7 @@
"source": [
"train_data.isnull().sum()"
],
- "execution_count": 9,
+ "execution_count": 3,
"outputs": [
{
"output_type": "execute_result",
@@ -216,7 +216,7 @@
"metadata": {
"tags": []
},
- "execution_count": 9
+ "execution_count": 3
}
]
},
@@ -225,7 +225,7 @@
"metadata": {
"id": "1zILNZW08BSP",
"colab_type": "code",
- "outputId": "64cd24e0-1622-42f5-c118-87dbc2c2bfe7",
+ "outputId": "26ae2e62-0a67-408d-df55-f012e411bd63",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 255
@@ -237,7 +237,7 @@
"train_data['Sex'] = train_data['Sex'].map(sex_mapping)\n",
"train_data.head()"
],
- "execution_count": 10,
+ "execution_count": 4,
"outputs": [
{
"output_type": "execute_result",
@@ -369,7 +369,422 @@
"metadata": {
"tags": []
},
- "execution_count": 10
+ "execution_count": 4
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "id": "FmmMODl4IS8W",
+ "colab_type": "code",
+ "outputId": "3597bb8b-b6fc-4aab-c320-34efab5e385c",
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 238
+ }
+ },
+ "source": [
+ "# guess Age by Sex and Pclass for no Age \n",
+ "guess_ages = np.zeros((2,3))\n",
+ "\n",
+ "for i in range(0, 2):\n",
+ " for j in range(0, 3):\n",
+ " guess_df = train_data[(train_data['Sex'] == i) & (train_data['Pclass'] == j+1)]['Age'].dropna()\n",
+ " age_guess = guess_df.median()\n",
+ " guess_ages[i, j] = int( age_guess/0.5 + 0.5 ) * 0.5\n",
+ "\n",
+ "for i in range(0, 2):\n",
+ " for j in range(0, 3):\n",
+ " # fill guess_ages\n",
+ " train_data.loc[ (train_data.Age.isnull()) & (train_data.Sex == i) & (train_data.Pclass == j+1), 'Age'] = guess_ages[i, j]\n",
+ "\n",
+ "train_data['Age'] = train_data['Age'].astype(int)\n",
+ "train_data.head()"
+ ],
+ "execution_count": 5,
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/html": [
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " PassengerId | \n",
+ " Survived | \n",
+ " Pclass | \n",
+ " Name | \n",
+ " Sex | \n",
+ " Age | \n",
+ " SibSp | \n",
+ " Parch | \n",
+ " Ticket | \n",
+ " Fare | \n",
+ " Cabin | \n",
+ " Embarked | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " 3 | \n",
+ " Braund, Mr. Owen Harris | \n",
+ " 1 | \n",
+ " 22 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " A/5 21171 | \n",
+ " 7.2500 | \n",
+ " NaN | \n",
+ " S | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " 2 | \n",
+ " 1 | \n",
+ " 1 | \n",
+ " Cumings, Mrs. John Bradley (Florence Briggs Th... | \n",
+ " 0 | \n",
+ " 38 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " PC 17599 | \n",
+ " 71.2833 | \n",
+ " C85 | \n",
+ " C | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " 3 | \n",
+ " 1 | \n",
+ " 3 | \n",
+ " Heikkinen, Miss. Laina | \n",
+ " 0 | \n",
+ " 26 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " STON/O2. 3101282 | \n",
+ " 7.9250 | \n",
+ " NaN | \n",
+ " S | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " 4 | \n",
+ " 1 | \n",
+ " 1 | \n",
+ " Futrelle, Mrs. Jacques Heath (Lily May Peel) | \n",
+ " 0 | \n",
+ " 35 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " 113803 | \n",
+ " 53.1000 | \n",
+ " C123 | \n",
+ " S | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " 5 | \n",
+ " 0 | \n",
+ " 3 | \n",
+ " Allen, Mr. William Henry | \n",
+ " 1 | \n",
+ " 35 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 373450 | \n",
+ " 8.0500 | \n",
+ " NaN | \n",
+ " S | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " PassengerId Survived Pclass ... Fare Cabin Embarked\n",
+ "0 1 0 3 ... 7.2500 NaN S\n",
+ "1 2 1 1 ... 71.2833 C85 C\n",
+ "2 3 1 3 ... 7.9250 NaN S\n",
+ "3 4 1 1 ... 53.1000 C123 S\n",
+ "4 5 0 3 ... 8.0500 NaN S\n",
+ "\n",
+ "[5 rows x 12 columns]"
+ ]
+ },
+ "metadata": {
+ "tags": []
+ },
+ "execution_count": 5
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "id": "ZDdBkC88Xqg1",
+ "colab_type": "code",
+ "outputId": "039f55c9-bd05-4f75-af2c-e9305bf67b3f",
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 204
+ }
+ },
+ "source": [
+ "train_data['AgeBand'] = pd.cut(train_data['Age'], 5)\n",
+ "train_data[['AgeBand', 'Survived']].groupby(['AgeBand'], as_index=False).mean().sort_values(by='AgeBand', ascending=True)"
+ ],
+ "execution_count": 6,
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " AgeBand | \n",
+ " Survived | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " (-0.08, 16.0] | \n",
+ " 0.550000 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " (16.0, 32.0] | \n",
+ " 0.337374 | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " (32.0, 48.0] | \n",
+ " 0.412037 | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " (48.0, 64.0] | \n",
+ " 0.434783 | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " (64.0, 80.0] | \n",
+ " 0.090909 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " AgeBand Survived\n",
+ "0 (-0.08, 16.0] 0.550000\n",
+ "1 (16.0, 32.0] 0.337374\n",
+ "2 (32.0, 48.0] 0.412037\n",
+ "3 (48.0, 64.0] 0.434783\n",
+ "4 (64.0, 80.0] 0.090909"
+ ]
+ },
+ "metadata": {
+ "tags": []
+ },
+ "execution_count": 6
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "id": "Pj_U1OgZUj3e",
+ "colab_type": "code",
+ "outputId": "93d1ca92-513d-49db-bd9e-d69712df0ecb",
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 289
+ }
+ },
+ "source": [
+ "train_data.loc[ train_data['Age'] <= 16, 'Age'] = 0\n",
+ "train_data.loc[ (train_data['Age'] > 16) & (train_data['Age'] <= 32), 'Age'] = 1\n",
+ "train_data.loc[ (train_data['Age'] > 32) & (train_data['Age'] <= 48), 'Age'] = 2\n",
+ "train_data.loc[ (train_data['Age'] > 48) & (train_data['Age'] <= 64), 'Age'] = 3\n",
+ "train_data.loc[ train_data['Age'] > 64, 'Age']\n",
+ "train_data.head()"
+ ],
+ "execution_count": 7,
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " PassengerId | \n",
+ " Survived | \n",
+ " Pclass | \n",
+ " Name | \n",
+ " Sex | \n",
+ " Age | \n",
+ " SibSp | \n",
+ " Parch | \n",
+ " Ticket | \n",
+ " Fare | \n",
+ " Cabin | \n",
+ " Embarked | \n",
+ " AgeBand | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " 3 | \n",
+ " Braund, Mr. Owen Harris | \n",
+ " 1 | \n",
+ " 1 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " A/5 21171 | \n",
+ " 7.2500 | \n",
+ " NaN | \n",
+ " S | \n",
+ " (16.0, 32.0] | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " 2 | \n",
+ " 1 | \n",
+ " 1 | \n",
+ " Cumings, Mrs. John Bradley (Florence Briggs Th... | \n",
+ " 0 | \n",
+ " 2 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " PC 17599 | \n",
+ " 71.2833 | \n",
+ " C85 | \n",
+ " C | \n",
+ " (32.0, 48.0] | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " 3 | \n",
+ " 1 | \n",
+ " 3 | \n",
+ " Heikkinen, Miss. Laina | \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " STON/O2. 3101282 | \n",
+ " 7.9250 | \n",
+ " NaN | \n",
+ " S | \n",
+ " (16.0, 32.0] | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " 4 | \n",
+ " 1 | \n",
+ " 1 | \n",
+ " Futrelle, Mrs. Jacques Heath (Lily May Peel) | \n",
+ " 0 | \n",
+ " 2 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " 113803 | \n",
+ " 53.1000 | \n",
+ " C123 | \n",
+ " S | \n",
+ " (32.0, 48.0] | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " 5 | \n",
+ " 0 | \n",
+ " 3 | \n",
+ " Allen, Mr. William Henry | \n",
+ " 1 | \n",
+ " 2 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 373450 | \n",
+ " 8.0500 | \n",
+ " NaN | \n",
+ " S | \n",
+ " (32.0, 48.0] | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " PassengerId Survived Pclass ... Cabin Embarked AgeBand\n",
+ "0 1 0 3 ... NaN S (16.0, 32.0]\n",
+ "1 2 1 1 ... C85 C (32.0, 48.0]\n",
+ "2 3 1 3 ... NaN S (16.0, 32.0]\n",
+ "3 4 1 1 ... C123 S (32.0, 48.0]\n",
+ "4 5 0 3 ... NaN S (32.0, 48.0]\n",
+ "\n",
+ "[5 rows x 13 columns]"
+ ]
+ },
+ "metadata": {
+ "tags": []
+ },
+ "execution_count": 7
}
]
},
@@ -378,24 +793,30 @@
"metadata": {
"id": "vp49nn2usonr",
"colab_type": "code",
- "outputId": "4c32d3f9-8892-460e-9c04-6e70de6cbe65",
+ "outputId": "51b07567-bb9d-4c41-a1e0-513ef8d8cc42",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 51
}
},
"source": [
- "X = pd.DataFrame(train_data[['Pclass', 'Sex']])\n",
+ "feature_label = [\n",
+ " 'Pclass',\n",
+ " 'Sex',\n",
+ " 'Age',\n",
+ " 'SibSp',\n",
+ "]\n",
+ "X = pd.DataFrame(train_data[feature_label])\n",
"y = pd.DataFrame(train_data.iloc[:, 1]) # Survived\n",
"print(X.shape)\n",
"print(y.shape)"
],
- "execution_count": 11,
+ "execution_count": 33,
"outputs": [
{
"output_type": "stream",
"text": [
- "(891, 2)\n",
+ "(891, 4)\n",
"(891, 1)\n"
],
"name": "stdout"
@@ -407,7 +828,7 @@
"metadata": {
"id": "avgsvmL9t_sn",
"colab_type": "code",
- "outputId": "25f45bff-2a9e-4111-a9d2-8ae4f9eee4e9",
+ "outputId": "e8a5d053-b99b-453b-9c90-5afcc1f15216",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 51
@@ -419,13 +840,13 @@
"print(X_train.shape)\n",
"print(X_test.shape)"
],
- "execution_count": 15,
+ "execution_count": 34,
"outputs": [
{
"output_type": "stream",
"text": [
- "(623, 2)\n",
- "(268, 2)\n"
+ "(623, 4)\n",
+ "(268, 4)\n"
],
"name": "stdout"
}
@@ -436,7 +857,7 @@
"metadata": {
"id": "kbX47pzmu5D-",
"colab_type": "code",
- "outputId": "d7d1218e-a7c7-4360-f24d-1e6522336dc9",
+ "outputId": "2152512b-4c00-45a4-fc0a-c758eb56e126",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 156
@@ -448,7 +869,7 @@
"model = LogisticRegression(random_state=FIXED_RESULT)\n",
"model.fit(X_train, y_train)"
],
- "execution_count": 20,
+ "execution_count": 35,
"outputs": [
{
"output_type": "stream",
@@ -472,7 +893,7 @@
"metadata": {
"tags": []
},
- "execution_count": 20
+ "execution_count": 35
}
]
},
@@ -481,39 +902,26 @@
"metadata": {
"id": "WFqxHH6NvQ50",
"colab_type": "code",
+ "outputId": "bb33c855-a196-41bd-dd92-55bf60e4f25b",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 34
- },
- "outputId": "1fac9b1d-c84b-42e2-f011-0ae87a25fa60"
+ }
},
"source": [
"# prediction\n",
- "print(model.score(X_train, y_train))"
+ "print(model.score(X_test, y_test))"
],
- "execution_count": 21,
+ "execution_count": 36,
"outputs": [
{
"output_type": "stream",
"text": [
- "0.8009630818619583\n"
+ "0.7611940298507462\n"
],
"name": "stdout"
}
]
- },
- {
- "cell_type": "code",
- "metadata": {
- "id": "v_89Rw7G7uc-",
- "colab_type": "code",
- "colab": {}
- },
- "source": [
- ""
- ],
- "execution_count": 0,
- "outputs": []
}
]
}
\ No newline at end of file
diff --git a/model_practice/titanicsurviverpredict.py b/model_practice/titanicsurviverpredict.py
index cdd8876..8dd7397 100644
--- a/model_practice/titanicsurviverpredict.py
+++ b/model_practice/titanicsurviverpredict.py
@@ -21,7 +21,40 @@
train_data['Sex'] = train_data['Sex'].map(sex_mapping)
train_data.head()
-X = pd.DataFrame(train_data[['Pclass', 'Sex']])
+# guess Age by Sex and Pclass for no Age
+guess_ages = np.zeros((2,3))
+
+for i in range(0, 2):
+ for j in range(0, 3):
+ guess_df = train_data[(train_data['Sex'] == i) & (train_data['Pclass'] == j+1)]['Age'].dropna()
+ age_guess = guess_df.median()
+ guess_ages[i, j] = int( age_guess/0.5 + 0.5 ) * 0.5
+
+for i in range(0, 2):
+ for j in range(0, 3):
+ # fill guess_ages
+ train_data.loc[ (train_data.Age.isnull()) & (train_data.Sex == i) & (train_data.Pclass == j+1), 'Age'] = guess_ages[i, j]
+
+train_data['Age'] = train_data['Age'].astype(int)
+train_data.head()
+
+train_data['AgeBand'] = pd.cut(train_data['Age'], 5)
+train_data[['AgeBand', 'Survived']].groupby(['AgeBand'], as_index=False).mean().sort_values(by='AgeBand', ascending=True)
+
+train_data.loc[ train_data['Age'] <= 16, 'Age'] = 0
+train_data.loc[ (train_data['Age'] > 16) & (train_data['Age'] <= 32), 'Age'] = 1
+train_data.loc[ (train_data['Age'] > 32) & (train_data['Age'] <= 48), 'Age'] = 2
+train_data.loc[ (train_data['Age'] > 48) & (train_data['Age'] <= 64), 'Age'] = 3
+train_data.loc[ train_data['Age'] > 64, 'Age']
+train_data.head()
+
+feature_label = [
+ 'Pclass',
+ 'Sex',
+ 'Age',
+ 'SibSp',
+]
+X = pd.DataFrame(train_data[feature_label])
y = pd.DataFrame(train_data.iloc[:, 1]) # Survived
print(X.shape)
print(y.shape)
@@ -37,5 +70,4 @@
model.fit(X_train, y_train)
# prediction
-print(model.score(X_train, y_train))
-
+print(model.score(X_test, y_test))
\ No newline at end of file