From ea297ed9a467e997d6c2aee360a4fb8c01943de1 Mon Sep 17 00:00:00 2001 From: Nwabueze Ugoh Date: Tue, 28 Apr 2026 13:44:18 +0100 Subject: [PATCH 1/3] Update notebooks to enhance execution counts and output displays - Added execution counts to various code cells across multiple notebooks for better tracking of code execution. - Enhanced output displays in `boolean-data.ipynb`, `categorical-data.ipynb`, `communicate-plots.ipynb`, `data-import.ipynb`, `data-tidy.ipynb`, `data-transform.ipynb`, and `data-visualise.ipynb` to provide clearer results and visualizations. - Improved overall consistency in notebook formatting and presentation. --- boolean-data.ipynb | 1135 +- categorical-data.ipynb | 359 +- communicate-plots.ipynb | 7792 ++++++++++++- data-import.ipynb | 326 +- data-tidy.ipynb | 1013 +- data-transform.ipynb | 1486 ++- data-visualise.ipynb | 2522 ++++- data/bake_sale.xlsx | Bin 6218 -> 6192 bytes databases.ipynb | 420 +- dates-and-times.ipynb | 11631 ++++++++++++++++++- exploratory-data-analysis.ipynb | 17753 +++++++++++++++++++++++++++++- functions.ipynb | 249 +- introduction.ipynb | 122 +- iteration.ipynb | 549 +- joins.ipynb | 374 +- missing-values.ipynb | 1470 ++- numbers.ipynb | 7333 +++++++++++- prerequisites.ipynb | 2 +- rectangling.ipynb | 359 +- regex.ipynb | 105 +- spreadsheets.ipynb | 418 +- strings.ipynb | 810 +- vis-layers.ipynb | 3902 ++++++- webscraping-and-apis.ipynb | 500 +- whole-game.ipynb | 110 +- workflow-basics.ipynb | 180 +- workflow-style.ipynb | 36 +- workflow-writing-code.ipynb | 2 +- 28 files changed, 59878 insertions(+), 1080 deletions(-) diff --git a/boolean-data.ipynb b/boolean-data.ipynb index 931bbb2..7da9c0b 100644 --- a/boolean-data.ipynb +++ b/boolean-data.ipynb @@ -22,10 +22,21 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "id": "7e35b9fc", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "True" + ] + }, + "execution_count": 1, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "bool_variable = True\n", "bool_variable" @@ -51,10 +62,21 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 2, "id": "590cd75d", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "False" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "not True" ] @@ -86,10 +108,18 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 3, "id": "51622575", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "False\n" + ] + } + ], "source": [ "boolean_condition = 10 == 20\n", "print(boolean_condition)" @@ -115,10 +145,20 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 4, "id": "0c550daa", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Ada, you achieved a high score.\n", + "You could be called Smith or have a high score\n", + "You are not called Smith and you have a high score\n" + ] + } + ], "source": [ "name = \"Ada\"\n", "score = 99\n", @@ -143,10 +183,19 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 5, "id": "7420e1c1", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "True\n", + "False\n" + ] + } + ], "source": [ "name_list = [\"Ada\", \"Adam\"]\n", "name_list_two = [\"Ada\", \"Adam\"]\n", @@ -176,10 +225,19 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 6, "id": "39caa7be", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "True\n", + "False\n" + ] + } + ], "source": [ "name_list = [\"Lovelace\", \"Smith\", \"Hopper\", \"Babbage\"]\n", "\n", @@ -210,10 +268,18 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 7, "id": "95794e71", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "High score!\n" + ] + } + ], "source": [ "score = 98\n", "\n", @@ -243,10 +309,21 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 8, "id": "cd1cd061", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "True" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "a, b = 3, 6\n", "\n", @@ -265,10 +342,21 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 9, "id": "59638407", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "[x for x in range(12)]" ] @@ -283,10 +371,21 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 10, "id": "8e8072ea", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "[0, 2, 4, 6, 8, 10]" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "[x for x in range(12) if x % 2 == 0]" ] @@ -301,10 +400,32 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 11, "id": "ec01f460", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "[0,\n", + " 'Not divisible by 2',\n", + " 2,\n", + " 'Not divisible by 2',\n", + " 4,\n", + " 'Not divisible by 2',\n", + " 6,\n", + " 'Not divisible by 2',\n", + " 8,\n", + " 'Not divisible by 2',\n", + " 10,\n", + " 'Not divisible by 2']" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "[x if x % 2 == 0 else \"Not divisible by 2\" for x in range(12)]" ] @@ -328,10 +449,18 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 12, "id": "dc605a93", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Falsy\n" + ] + } + ], "source": [ "listy = []\n", "other_listy = [1, 2, 3]\n", @@ -344,10 +473,18 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 13, "id": "da8fe682", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Truthy\n" + ] + } + ], "source": [ "if not (other_listy):\n", " print(\"Falsy\")\n", @@ -365,10 +502,18 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 14, "id": "d80ba0be", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Falsy\n" + ] + } + ], "source": [ "if not 0:\n", " print(\"Falsy\")\n", @@ -378,10 +523,18 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 15, "id": "1973d44d", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Truthy\n" + ] + } + ], "source": [ "if not [0, 0, 0]:\n", " print(\"Falsy\")\n", @@ -399,10 +552,18 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 16, "id": "62840c4a", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Falsy\n" + ] + } + ], "source": [ "if not None:\n", " print(\"Falsy\")\n", @@ -432,10 +593,21 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 17, "id": "bdcb09a5", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "True" + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "any([True, False, False])" ] @@ -450,10 +622,21 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 18, "id": "2f666185", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "True" + ] + }, + "execution_count": 18, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "all([True, True, True, True])" ] @@ -468,10 +651,21 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 19, "id": "78777d9c", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "False" + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "all([0, 0, 0, 1])" ] @@ -490,10 +684,79 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 20, "id": "7f338fd7", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
bool_col_1bool_col_2
0FalseTrue
1FalseFalse
2FalseTrue
3TrueFalse
4TrueTrue
\n", + "
" + ], + "text/plain": [ + " bool_col_1 bool_col_2\n", + "0 False True\n", + "1 False False\n", + "2 False True\n", + "3 True False\n", + "4 True True" + ] + }, + "execution_count": 20, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "import pandas as pd\n", "\n", @@ -516,10 +779,26 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 21, "id": "9cdaec7a", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "0 True\n", + "1 False\n", + "2 True\n", + "3 True\n", + "4 True\n", + "dtype: bool" + ] + }, + "execution_count": 21, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "df[\"bool_col_1\"] | df[\"bool_col_2\"]" ] @@ -534,10 +813,23 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 22, "id": "89ee3e44", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "bool_col_1 2\n", + "bool_col_2 3\n", + "dtype: int64" + ] + }, + "execution_count": 22, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "df.sum()" ] @@ -552,10 +844,26 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 23, "id": "5e30cee7", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "0 False\n", + "1 True\n", + "2 False\n", + "3 True\n", + "4 True\n", + "Name: bool_col, dtype: bool" + ] + }, + "execution_count": 23, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "df = pd.DataFrame.from_dict({\"bool_col\": [0, 1, 0, 1, 1]})\n", "df[\"bool_col\"].astype(bool)" @@ -574,10 +882,127 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 24, "id": "9f63005f", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
caratcutcolorclaritydepthtablepricexyz
00.23IdealESI261.555.03263.953.982.43
10.21PremiumESI159.861.03263.893.842.31
20.23GoodEVS156.965.03274.054.072.31
30.29PremiumIVS262.458.03344.204.232.63
40.31GoodJSI263.358.03354.344.352.75
\n", + "
" + ], + "text/plain": [ + " carat cut color clarity depth table price x y z\n", + "0 0.23 Ideal E SI2 61.5 55.0 326 3.95 3.98 2.43\n", + "1 0.21 Premium E SI1 59.8 61.0 326 3.89 3.84 2.31\n", + "2 0.23 Good E VS1 56.9 65.0 327 4.05 4.07 2.31\n", + "3 0.29 Premium I VS2 62.4 58.0 334 4.20 4.23 2.63\n", + "4 0.31 Good J SI2 63.3 58.0 335 4.34 4.35 2.75" + ] + }, + "execution_count": 24, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "diamonds = pd.read_csv(\n", " \"https://github.com/mwaskom/seaborn-data/raw/master/diamonds.csv\"\n", @@ -595,10 +1020,220 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 25, "id": "7a27f0a0", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
caratcutcolorclaritydepthtablepricexyzexpensive
93380.94PremiumESI160.058.045806.446.373.84True
11800.71IdealGVS162.757.029305.695.733.58True
283340.33PremiumGVS161.358.06664.424.462.72False
139191.23Very GoodJVVS261.257.056656.866.924.22True
264742.37IdealJVS262.257.0160598.528.585.32True
122331.19FairISI164.958.051986.646.554.28True
59570.90Very GoodFSI162.660.039506.106.143.83True
483250.33GoodFSI161.862.05364.404.452.74False
257421.51IdealGVS161.157.0146747.417.384.52True
434010.59Very GoodFSI161.757.014125.395.433.34True
\n", + "
" + ], + "text/plain": [ + " carat cut color clarity depth table price x y z \\\n", + "9338 0.94 Premium E SI1 60.0 58.0 4580 6.44 6.37 3.84 \n", + "1180 0.71 Ideal G VS1 62.7 57.0 2930 5.69 5.73 3.58 \n", + "28334 0.33 Premium G VS1 61.3 58.0 666 4.42 4.46 2.72 \n", + "13919 1.23 Very Good J VVS2 61.2 57.0 5665 6.86 6.92 4.22 \n", + "26474 2.37 Ideal J VS2 62.2 57.0 16059 8.52 8.58 5.32 \n", + "12233 1.19 Fair I SI1 64.9 58.0 5198 6.64 6.55 4.28 \n", + "5957 0.90 Very Good F SI1 62.6 60.0 3950 6.10 6.14 3.83 \n", + "48325 0.33 Good F SI1 61.8 62.0 536 4.40 4.45 2.74 \n", + "25742 1.51 Ideal G VS1 61.1 57.0 14674 7.41 7.38 4.52 \n", + "43401 0.59 Very Good F SI1 61.7 57.0 1412 5.39 5.43 3.34 \n", + "\n", + " expensive \n", + "9338 True \n", + "1180 True \n", + "28334 False \n", + "13919 True \n", + "26474 True \n", + "12233 True \n", + "5957 True \n", + "48325 False \n", + "25742 True \n", + "43401 True " + ] + }, + "execution_count": 25, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "diamonds[\"expensive\"] = diamonds[\"price\"] > 1000\n", "diamonds.sample(10)" @@ -614,10 +1249,140 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 26, "id": "c78a6d47", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
caratcutcolorclaritydepthtablepricexyzexpensive
00.23IdealESI261.555.03263.953.982.43False
10.21PremiumESI159.861.03263.893.842.31False
20.23GoodEVS156.965.03274.054.072.31False
30.29PremiumIVS262.458.03344.204.232.63False
40.31GoodJSI263.358.03354.344.352.75False
\n", + "
" + ], + "text/plain": [ + " carat cut color clarity depth table price x y z \\\n", + "0 0.23 Ideal E SI2 61.5 55.0 326 3.95 3.98 2.43 \n", + "1 0.21 Premium E SI1 59.8 61.0 326 3.89 3.84 2.31 \n", + "2 0.23 Good E VS1 56.9 65.0 327 4.05 4.07 2.31 \n", + "3 0.29 Premium I VS2 62.4 58.0 334 4.20 4.23 2.63 \n", + "4 0.31 Good J SI2 63.3 58.0 335 4.34 4.35 2.75 \n", + "\n", + " expensive \n", + "0 False \n", + "1 False \n", + "2 False \n", + "3 False \n", + "4 False " + ] + }, + "execution_count": 26, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "diamonds.assign(expensive=lambda x: x[\"price\"] > 1000).head()" ] @@ -632,10 +1397,22 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 27, "id": "d12f537d", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "array([False, False, False, False, False, False, False, True, True,\n", + " True, False])" + ] + }, + "execution_count": 27, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "diamonds.columns.isin([\"x\", \"y\", \"z\"])" ] @@ -652,10 +1429,21 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 28, "id": "35e73305", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "np.True_" + ] + }, + "execution_count": 28, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "diamonds[\"expensive\"].any()" ] @@ -672,10 +1460,239 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 29, "id": "d1bbb0fa", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
caratcutcolorclaritydepthtablepricexyzexpensive
10.21PremiumESI159.861.03263.893.842.31False
80.22FairEVS265.161.03373.873.782.49False
110.23IdealJVS162.856.03403.933.902.46False
120.22PremiumFSI160.461.03423.883.842.33False
140.20PremiumESI260.262.03453.793.752.27False
....................................
539280.79PremiumESI261.458.027566.035.963.68True
539290.71IdealGVS161.456.027565.765.733.53True
539300.71PremiumESI160.555.027565.795.743.49True
539310.71PremiumFSI159.862.027565.745.733.43True
539380.86PremiumHSI261.058.027576.156.123.74True
\n", + "

23423 rows × 11 columns

\n", + "
" + ], + "text/plain": [ + " carat cut color clarity depth table price x y z \\\n", + "1 0.21 Premium E SI1 59.8 61.0 326 3.89 3.84 2.31 \n", + "8 0.22 Fair E VS2 65.1 61.0 337 3.87 3.78 2.49 \n", + "11 0.23 Ideal J VS1 62.8 56.0 340 3.93 3.90 2.46 \n", + "12 0.22 Premium F SI1 60.4 61.0 342 3.88 3.84 2.33 \n", + "14 0.20 Premium E SI2 60.2 62.0 345 3.79 3.75 2.27 \n", + "... ... ... ... ... ... ... ... ... ... ... \n", + "53928 0.79 Premium E SI2 61.4 58.0 2756 6.03 5.96 3.68 \n", + "53929 0.71 Ideal G VS1 61.4 56.0 2756 5.76 5.73 3.53 \n", + "53930 0.71 Premium E SI1 60.5 55.0 2756 5.79 5.74 3.49 \n", + "53931 0.71 Premium F SI1 59.8 62.0 2756 5.74 5.73 3.43 \n", + "53938 0.86 Premium H SI2 61.0 58.0 2757 6.15 6.12 3.74 \n", + "\n", + " expensive \n", + "1 False \n", + "8 False \n", + "11 False \n", + "12 False \n", + "14 False \n", + "... ... \n", + "53928 True \n", + "53929 True \n", + "53930 True \n", + "53931 True \n", + "53938 True \n", + "\n", + "[23423 rows x 11 columns]" + ] + }, + "execution_count": 29, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "diamonds[diamonds[\"x\"] > diamonds[\"y\"]]" ] @@ -714,7 +1731,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.12.12" + "version": "3.12.13" }, "toc-showtags": true }, diff --git a/categorical-data.ipynb b/categorical-data.ipynb index 5796a26..6cc6776 100644 --- a/categorical-data.ipynb +++ b/categorical-data.ipynb @@ -14,7 +14,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "id": "51a55374", "metadata": { "tags": [ @@ -65,10 +65,26 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 2, "id": "535ef959", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "0 a\n", + "1 b\n", + "2 c\n", + "3 a\n", + "Name: A, dtype: category\n", + "Categories (3, object): ['a', 'b', 'c']" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "import numpy as np\n", "import pandas as pd\n", @@ -91,10 +107,79 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 3, "id": "358c83bb", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
valuegroup
03630 - 39
16160 - 69
28980 - 89
32820 - 29
48180 - 89
\n", + "
" + ], + "text/plain": [ + " value group\n", + "0 36 30 - 39\n", + "1 61 60 - 69\n", + "2 89 80 - 89\n", + "3 28 20 - 29\n", + "4 81 80 - 89" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "df = pd.DataFrame({\"value\": np.random.randint(0, 100, 20)})\n", "labels = [f\"{i} - {i+9}\" for i in range(0, 100, 10)]\n", @@ -114,10 +199,22 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 4, "id": "fb389105", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "[NaN, 'b', 'c', NaN, 'd', NaN, 'c']\n", + "Categories (3, object): ['b', 'c', 'd']" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "raw_cat = pd.Categorical(\n", " [\"a\", \"b\", \"c\", \"a\", \"d\", \"a\", \"c\"], categories=[\"b\", \"c\", \"d\"]\n", @@ -135,10 +232,29 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 5, "id": "0497fc16", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "0 NaN\n", + "1 b\n", + "2 c\n", + "3 NaN\n", + "4 d\n", + "5 NaN\n", + "6 c\n", + "Name: cat_type, dtype: category\n", + "Categories (3, object): ['b', 'c', 'd']" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "df = pd.DataFrame(raw_cat, columns=[\"cat_type\"])\n", "df[\"cat_type\"]" @@ -162,10 +278,22 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 6, "id": "f7520d3d", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "['a', 'b', 'c', 'a', 'd', 'a', 'c']\n", + "Categories (4, object): ['a' < 'b' < 'c' < 'd']" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "ordered_cat = pd.Categorical(\n", " [\"a\", \"b\", \"c\", \"a\", \"d\", \"a\", \"c\"],\n", @@ -189,20 +317,42 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 7, "id": "2caba354", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "Index(['b', 'c', 'd'], dtype='object')" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "df[\"cat_type\"].cat.categories" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 8, "id": "5f1fe093", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "False" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "df[\"cat_type\"].cat.ordered" ] @@ -227,7 +377,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 9, "id": "097171b8", "metadata": {}, "outputs": [], @@ -245,10 +395,29 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 10, "id": "4ae6df38", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "0 NaN\n", + "1 alpha\n", + "2 beta\n", + "3 NaN\n", + "4 gamma\n", + "5 NaN\n", + "6 beta\n", + "Name: cat_type, dtype: category\n", + "Categories (4, object): ['alpha', 'beta', 'gamma', 'delta']" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "df[\"cat_type\"] = df[\"cat_type\"].cat.add_categories([\"delta\"])\n", "df[\"cat_type\"]" @@ -274,10 +443,26 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 11, "id": "19f5bdda", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "cat_type\n", + "beta 2\n", + "alpha 1\n", + "gamma 1\n", + "delta 0\n", + "Name: count, dtype: int64" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "df[\"cat_type\"].value_counts()" ] @@ -294,10 +479,23 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 12, "id": "f52d5d0d", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "0 beta\n", + "Name: cat_type, dtype: category\n", + "Categories (4, object): ['alpha', 'beta', 'gamma', 'delta']" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "df[\"cat_type\"].mode()" ] @@ -312,10 +510,81 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 13, "id": "4d43e94d", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/var/folders/j1/0d3dmg7d6gnbc_zwrh8t9gcw0000gn/T/ipykernel_60895/379284818.py:2: FutureWarning: 'M' is deprecated and will be removed in a future version, please use 'ME' instead.\n", + " pd.Series(pd.date_range(\"2015/05/01\", periods=5, freq=\"M\"), dtype=\"category\"),\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
datetime
02015-05-31
12015-06-30
22015-07-31
32015-08-31
42015-09-30
\n", + "
" + ], + "text/plain": [ + " datetime\n", + "0 2015-05-31\n", + "1 2015-06-30\n", + "2 2015-07-31\n", + "3 2015-08-31\n", + "4 2015-09-30" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "time_df = pd.DataFrame(\n", " pd.Series(pd.date_range(\"2015/05/01\", periods=5, freq=\"M\"), dtype=\"category\"),\n", @@ -326,10 +595,26 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 14, "id": "db697f86", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "0 5\n", + "1 6\n", + "2 7\n", + "3 8\n", + "4 9\n", + "Name: datetime, dtype: int32" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "time_df[\"datetime\"].dt.month" ] @@ -344,10 +629,26 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 15, "id": "13e7bc66", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "0 0\n", + "1 1\n", + "2 2\n", + "3 3\n", + "4 4\n", + "dtype: int8" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "time_df[\"datetime\"].cat.codes" ] @@ -378,7 +679,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.12.12" + "version": "3.12.13" }, "toc-showtags": true }, diff --git a/communicate-plots.ipynb b/communicate-plots.ipynb index fc14db8..f833d44 100644 --- a/communicate-plots.ipynb +++ b/communicate-plots.ipynb @@ -26,7 +26,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "id": "78eeea41", "metadata": {}, "outputs": [], @@ -42,10 +42,51 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 2, "id": "ae4a818a", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "
\n", + " \n", + " " + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], "source": [ "import numpy as np\n", "import polars as pl\n", @@ -66,7 +107,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 3, "id": "c36b4cd5", "metadata": {}, "outputs": [], @@ -88,10 +129,132 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 4, "id": "c7574bc6", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + " " + ], + "text/plain": [ + "" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "(ggplot(mpg, aes(x=\"displ\", y=\"hwy\")) + geom_point())" ] @@ -116,10 +279,162 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 5, "id": "24b3513e", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + " " + ], + "text/plain": [ + "" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "(\n", " ggplot(mpg, aes(x=\"displ\", y=\"hwy\"))\n", @@ -147,10 +462,165 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 6, "id": "6489a6bf", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + " " + ], + "text/plain": [ + "" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "(\n", " ggplot(mpg, aes(x=\"displ\", y=\"hwy\"))\n", @@ -182,14 +652,153 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 7, "id": "683d547c", "metadata": { "tags": [ "remove-cell" ] }, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + " " + ], + "text/plain": [ + "" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "(\n", " ggplot(mpg, aes(x=\"cty\", y=\"hwy\", color=\"drv\", shape=\"drv\"))\n", @@ -231,10 +840,40 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 8, "id": "60826a32", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "shape: (3, 4)
drvhwydispldrive_type
strf64f64str
"f"28.162.56"front-wheel drive"
"r"21.05.18"rear-wheel drive"
"4"19.174.0"4-wheel drive"
" + ], + "text/plain": [ + "shape: (3, 4)\n", + "┌─────┬───────┬───────┬───────────────────┐\n", + "│ drv ┆ hwy ┆ displ ┆ drive_type │\n", + "│ --- ┆ --- ┆ --- ┆ --- │\n", + "│ str ┆ f64 ┆ f64 ┆ str │\n", + "╞═════╪═══════╪═══════╪═══════════════════╡\n", + "│ f ┆ 28.16 ┆ 2.56 ┆ front-wheel drive │\n", + "│ r ┆ 21.0 ┆ 5.18 ┆ rear-wheel drive │\n", + "│ 4 ┆ 19.17 ┆ 4.0 ┆ 4-wheel drive │\n", + "└─────┴───────┴───────┴───────────────────┘" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "mapping = {\n", " \"4\": \"4-wheel drive\",\n", @@ -263,10 +902,183 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 9, "id": "6f90c2aa", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + " " + ], + "text/plain": [ + "" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "(\n", " ggplot(mpg, aes(x=\"displ\", y=\"hwy\", color=\"drv\"))\n", @@ -296,10 +1108,254 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 10, "id": "bdcd79bb", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + " " + ], + "text/plain": [ + "" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "potential_outliers = mpg.filter(\n", " (pl.col(\"hwy\") > 40) | ((pl.col(\"hwy\") > 20) & (pl.col(\"displ\") > 5))\n", @@ -336,10 +1392,21 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 11, "id": "d1e2cc3a", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "'Larger engine sizes tend to\\nhave lower fuel economy.'" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "import textwrap\n", "\n", @@ -350,10 +1417,162 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 12, "id": "e8c09f57", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + " " + ], + "text/plain": [ + "" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "(\n", " ggplot(mpg, aes(x=\"displ\", y=\"hwy\"))\n", @@ -485,10 +1704,137 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 13, "id": "a95604d8", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + " " + ], + "text/plain": [ + "" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "(\n", " ggplot(mpg, aes(x=\"displ\", y=\"hwy\", color=\"drv\"))\n", @@ -509,10 +1855,138 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 14, "id": "1a852304", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + " " + ], + "text/plain": [ + "" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "(\n", " ggplot(mpg, aes(x=\"displ\", y=\"hwy\", color=\"drv\"))\n", @@ -539,7 +2013,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 15, "id": "40ac230e", "metadata": {}, "outputs": [], @@ -559,10 +2033,158 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 16, "id": "1520bb3c", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + " " + ], + "text/plain": [ + "" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "(\n", " ggplot(diamonds, aes(x=\"cut\", y=\"price\"))\n", @@ -582,10 +2204,42 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 17, "id": "9d1f993a", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "shape: (5, 6)
rownamesnamestartendpartyid
i64stri32i32stri64
1"Eisenhower"19531961"Republican"34
2"Kennedy"19611963"Democratic"35
3"Johnson"19631969"Democratic"36
4"Nixon"19691974"Republican"37
5"Ford"19741977"Republican"38
" + ], + "text/plain": [ + "shape: (5, 6)\n", + "┌──────────┬────────────┬───────┬──────┬────────────┬─────┐\n", + "│ rownames ┆ name ┆ start ┆ end ┆ party ┆ id │\n", + "│ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │\n", + "│ i64 ┆ str ┆ i32 ┆ i32 ┆ str ┆ i64 │\n", + "╞══════════╪════════════╪═══════╪══════╪════════════╪═════╡\n", + "│ 1 ┆ Eisenhower ┆ 1953 ┆ 1961 ┆ Republican ┆ 34 │\n", + "│ 2 ┆ Kennedy ┆ 1961 ┆ 1963 ┆ Democratic ┆ 35 │\n", + "│ 3 ┆ Johnson ┆ 1963 ┆ 1969 ┆ Democratic ┆ 36 │\n", + "│ 4 ┆ Nixon ┆ 1969 ┆ 1974 ┆ Republican ┆ 37 │\n", + "│ 5 ┆ Ford ┆ 1974 ┆ 1977 ┆ Republican ┆ 38 │\n", + "└──────────┴────────────┴───────┴──────┴────────────┴─────┘" + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "presidential = pl.read_csv(\n", " \"https://vincentarelbundock.github.io/Rdatasets/csv/ggplot2/presidential.csv\",\n", @@ -601,10 +2255,128 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 18, "id": "7d88976d", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + " " + ], + "text/plain": [ + "" + ] + }, + "execution_count": 18, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "(\n", " ggplot(presidential, aes(x=\"start\", y=\"id\"))\n", @@ -631,10 +2403,355 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 19, "id": "52d6e86a", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + " " + ], + "text/plain": [ + "" + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "base = ggplot(mpg, aes(x=\"displ\", y=\"hwy\")) + geom_point(aes(color=\"class\"))\n", "\n", @@ -673,10 +2790,130 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 20, "id": "2c1d3f8d", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + " " + ], + "text/plain": [ + "" + ] + }, + "execution_count": 20, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "(\n", " ggplot(\n", @@ -697,10 +2934,136 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 21, "id": "39b4ef8d", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + " " + ], + "text/plain": [ + "" + ] + }, + "execution_count": 21, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "(\n", " ggplot(diamonds, aes(x=\"carat\", y=\"price\"))\n", @@ -720,20 +3083,272 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 22, "id": "f06d7e40", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + " " + ], + "text/plain": [ + "" + ] + }, + "execution_count": 22, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "(ggplot(mpg, aes(x=\"displ\", y=\"hwy\")) + geom_point(aes(color=\"drv\")))" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 23, "id": "6186b520", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + " " + ], + "text/plain": [ + "" + ] + }, + "execution_count": 23, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "(\n", " ggplot(mpg, aes(x=\"displ\", y=\"hwy\"))\n", @@ -762,14 +3377,1235 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 24, "id": "bd347524", "metadata": { "tags": [ "remove-input" ] }, - "outputs": [], + "outputs": [ + { + "data": { + "image/svg+xml": [ + "\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " 2026-04-28T13:34:54.100390\n", + " image/svg+xml\n", + " \n", + " \n", + " Matplotlib v3.10.0, https://matplotlib.org/\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n" + ], + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], "source": [ "# | echo: false\n", "cmaps = [\n", @@ -871,14 +4707,1126 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 25, "id": "d6350c71", "metadata": { "tags": [ "remove-input" ] }, - "outputs": [], + "outputs": [ + { + "data": { + "image/svg+xml": [ + "\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " 2026-04-28T13:34:54.622782\n", + " image/svg+xml\n", + " \n", + " \n", + " Matplotlib v3.10.0, https://matplotlib.org/\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n" + ], + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], "source": [ "# remove input\n", "for cmap_category, cmap_list in cmaps[3:4]:\n", @@ -887,14 +5835,1003 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 26, "id": "0063a574", "metadata": { "tags": [ "remove-input" ] }, - "outputs": [], + "outputs": [ + { + "data": { + "image/svg+xml": [ + "\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " 2026-04-28T13:34:54.717286\n", + " image/svg+xml\n", + " \n", + " \n", + " Matplotlib v3.10.0, https://matplotlib.org/\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n" + ], + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], "source": [ "# remove input\n", "for cmap_category, cmap_list in cmaps[2:3]:\n", @@ -911,10 +6848,138 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 27, "id": "9751058d", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + " " + ], + "text/plain": [ + "" + ] + }, + "execution_count": 27, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "mini_presid = presidential.slice(5)\n", "\n", @@ -942,10 +7007,115 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 28, "id": "644fd814", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + " " + ], + "text/plain": [ + "" + ] + }, + "execution_count": 28, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "prng = np.random.default_rng(1837) # prng=probabilistic random number generator\n", "df_rnd = pl.DataFrame(prng.standard_normal((1000, 2)), schema=[\"x\", \"y\"])\n", @@ -979,10 +7149,147 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 29, "id": "25a29f38", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + " " + ], + "text/plain": [ + "" + ] + }, + "execution_count": 29, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "(\n", " ggplot(mpg, aes(x=\"displ\", y=\"hwy\"))\n", @@ -993,10 +7300,147 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 30, "id": "42318a59", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + " " + ], + "text/plain": [ + "" + ] + }, + "execution_count": 30, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "mpg_condition = (\n", " (mpg[\"displ\"] >= 5) & (mpg[\"displ\"] <= 6) & (mpg[\"hwy\"] >= 10) & (mpg[\"hwy\"] <= 25)\n", @@ -1021,10 +7465,153 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 31, "id": "03001d5e", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + " " + ], + "text/plain": [ + "" + ] + }, + "execution_count": 31, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "(\n", " ggplot(mpg, aes(x=\"displ\", y=\"hwy\"))\n", @@ -1037,10 +7624,153 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 32, "id": "dc3bb833", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + " " + ], + "text/plain": [ + "" + ] + }, + "execution_count": 32, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "(\n", " ggplot(mpg, aes(x=\"displ\", y=\"hwy\"))\n", @@ -1061,10 +7791,134 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 33, "id": "aee538a8", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + " " + ], + "text/plain": [ + "" + ] + }, + "execution_count": 33, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "suv = mpg.filter(mpg[\"class\"] == \"suv\")\n", "compact = mpg.filter(mpg[\"class\"] == \"compact\")\n", @@ -1073,10 +7927,134 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 34, "id": "a82c8c23", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + " " + ], + "text/plain": [ + "" + ] + }, + "execution_count": 34, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "(ggplot(compact, aes(x=\"displ\", y=\"hwy\", color=\"drv\")) + geom_point())" ] @@ -1091,7 +8069,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 35, "id": "db6fce43", "metadata": {}, "outputs": [], @@ -1103,10 +8081,73 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 36, "id": "dd9e6606", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + " " + ], + "text/plain": [ + "" + ] + }, + "execution_count": 36, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "(\n", " ggplot(suv, aes(x=\"displ\", y=\"hwy\", color=\"drv\"))\n", @@ -1119,10 +8160,73 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 37, "id": "bdd8b2c5", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + " " + ], + "text/plain": [ + "" + ] + }, + "execution_count": 37, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "(\n", " ggplot(compact, aes(x=\"displ\", y=\"hwy\", color=\"drv\"))\n", @@ -1172,10 +8276,148 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 38, "id": "0b2364ca", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + " " + ], + "text/plain": [ + "" + ] + }, + "execution_count": 38, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "(\n", " ggplot(mpg, aes(x=\"displ\", y=\"hwy\"))\n", @@ -1197,10 +8439,159 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 39, "id": "67bfa9c8", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + " " + ], + "text/plain": [ + "" + ] + }, + "execution_count": 39, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "(\n", " ggplot(mpg, aes(x=\"displ\", color=\"drv\"))\n", @@ -1241,10 +8632,228 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 40, "id": "a8081df4", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + " " + ], + "text/plain": [ + "" + ] + }, + "execution_count": 40, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "p1 = ggplot(mpg, aes(x=\"displ\", y=\"hwy\")) + geom_point() + labs(title=\"Plot 1\")\n", "p2 = ggplot(mpg, aes(x=\"drv\", y=\"hwy\")) + geom_boxplot() + labs(title=\"Plot 2\")\n", @@ -1265,10 +8874,21 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 41, "id": "710a6a4f", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "'/Users/omagic/Documents/GitHub/python4DSpolars/chart.svg'" + ] + }, + "execution_count": 41, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "ggsave(p1, \"chart.svg\", path=\".\")" ] @@ -1283,17 +8903,25 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 42, "id": "bc831b1b", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "ls: stdout: Broken pipe\r\n" + ] + } + ], "source": [ "!ls | grep *.svg" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 43, "id": "9cc10ab7", "metadata": { "tags": [ @@ -1346,7 +8974,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.12.12" + "version": "3.12.13" }, "toc-showtags": true }, diff --git a/data-import.ipynb b/data-import.ipynb index c240a16..9369dde 100644 --- a/data-import.ipynb +++ b/data-import.ipynb @@ -18,7 +18,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "id": "1cf01bda", "metadata": {}, "outputs": [], @@ -54,10 +54,30 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 2, "id": "eca85c47", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Student ID,Full Name,favourite.food,mealPlan,AGE\r", + "\r\n", + "1,Sunil Huffmann,Strawberry yoghurt,Lunch only,4\r", + "\r\n", + "2,Barclay Lynn,French fries,Lunch only,5\r", + "\r\n", + "3,Jayendra Lyne,N/A,Breakfast and lunch,7\r", + "\r\n", + "4,Leon Rossini,Anchovies,Lunch only,8\r", + "\r\n", + "5,Chidiegwu Dunkel,Pizza,Breakfast and lunch,five\r", + "\r\n", + "6,Güvenç Attila,Ice cream,Lunch only,6" + ] + } + ], "source": [ "! cat data/students.csv" ] @@ -72,10 +92,43 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 3, "id": "232fdfef", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "shape: (6, 5)
Student IDFull Namefavourite.foodmealPlanAGE
i64strstrstrstr
1"Sunil Huffmann""Strawberry yoghurt""Lunch only""4"
2"Barclay Lynn""French fries""Lunch only""5"
3"Jayendra Lyne""N/A""Breakfast and lunch""7"
4"Leon Rossini""Anchovies""Lunch only""8"
5"Chidiegwu Dunkel""Pizza""Breakfast and lunch""five"
6"Güvenç Attila""Ice cream""Lunch only""6"
" + ], + "text/plain": [ + "shape: (6, 5)\n", + "┌────────────┬──────────────────┬────────────────────┬─────────────────────┬──────┐\n", + "│ Student ID ┆ Full Name ┆ favourite.food ┆ mealPlan ┆ AGE │\n", + "│ --- ┆ --- ┆ --- ┆ --- ┆ --- │\n", + "│ i64 ┆ str ┆ str ┆ str ┆ str │\n", + "╞════════════╪══════════════════╪════════════════════╪═════════════════════╪══════╡\n", + "│ 1 ┆ Sunil Huffmann ┆ Strawberry yoghurt ┆ Lunch only ┆ 4 │\n", + "│ 2 ┆ Barclay Lynn ┆ French fries ┆ Lunch only ┆ 5 │\n", + "│ 3 ┆ Jayendra Lyne ┆ N/A ┆ Breakfast and lunch ┆ 7 │\n", + "│ 4 ┆ Leon Rossini ┆ Anchovies ┆ Lunch only ┆ 8 │\n", + "│ 5 ┆ Chidiegwu Dunkel ┆ Pizza ┆ Breakfast and lunch ┆ five │\n", + "│ 6 ┆ Güvenç Attila ┆ Ice cream ┆ Lunch only ┆ 6 │\n", + "└────────────┴──────────────────┴────────────────────┴─────────────────────┴──────┘" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "students = pl.read_csv(\"data/students.csv\")\n", "students" @@ -133,10 +186,43 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 4, "id": "51969364", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "shape: (6, 5)
student_idfull_namefavourite_foodmeal_planage
i64strstrstrstr
1"Sunil Huffmann""Strawberry yoghurt""Lunch only""4"
2"Barclay Lynn""French fries""Lunch only""5"
3"Jayendra Lyne""N/A""Breakfast and lunch""7"
4"Leon Rossini""Anchovies""Lunch only""8"
5"Chidiegwu Dunkel""Pizza""Breakfast and lunch""five"
6"Güvenç Attila""Ice cream""Lunch only""6"
" + ], + "text/plain": [ + "shape: (6, 5)\n", + "┌────────────┬──────────────────┬────────────────────┬─────────────────────┬──────┐\n", + "│ student_id ┆ full_name ┆ favourite_food ┆ meal_plan ┆ age │\n", + "│ --- ┆ --- ┆ --- ┆ --- ┆ --- │\n", + "│ i64 ┆ str ┆ str ┆ str ┆ str │\n", + "╞════════════╪══════════════════╪════════════════════╪═════════════════════╪══════╡\n", + "│ 1 ┆ Sunil Huffmann ┆ Strawberry yoghurt ┆ Lunch only ┆ 4 │\n", + "│ 2 ┆ Barclay Lynn ┆ French fries ┆ Lunch only ┆ 5 │\n", + "│ 3 ┆ Jayendra Lyne ┆ N/A ┆ Breakfast and lunch ┆ 7 │\n", + "│ 4 ┆ Leon Rossini ┆ Anchovies ┆ Lunch only ┆ 8 │\n", + "│ 5 ┆ Chidiegwu Dunkel ┆ Pizza ┆ Breakfast and lunch ┆ five │\n", + "│ 6 ┆ Güvenç Attila ┆ Ice cream ┆ Lunch only ┆ 6 │\n", + "└────────────┴──────────────────┴────────────────────┴─────────────────────┴──────┘" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "from skimpy import clean_columns\n", "\n", @@ -154,10 +240,40 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 5, "id": "f3c31e4a", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "shape: (6,)
age
i64
4
5
7
8
5
6
" + ], + "text/plain": [ + "shape: (6,)\n", + "Series: 'age' [i64]\n", + "[\n", + "\t4\n", + "\t5\n", + "\t7\n", + "\t8\n", + "\t5\n", + "\t6\n", + "]" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "students = students.with_columns(pl.col(\"age\").replace(\"five\", 5).cast(pl.Int64))\n", "students[\"age\"]" @@ -181,10 +297,40 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 6, "id": "678fdd2d", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "shape: (6,)
meal_plan
cat
"Lunch only"
"Lunch only"
"Breakfast and lunch"
"Lunch only"
"Breakfast and lunch"
"Lunch only"
" + ], + "text/plain": [ + "shape: (6,)\n", + "Series: 'meal_plan' [cat]\n", + "[\n", + "\t\"Lunch only\"\n", + "\t\"Lunch only\"\n", + "\t\"Breakfast and lunch\"\n", + "\t\"Lunch only\"\n", + "\t\"Breakfast and lunch\"\n", + "\t\"Lunch only\"\n", + "]" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "students = students.with_columns(pl.col(\"meal_plan\").cast(pl.Categorical))\n", "students[\"meal_plan\"]" @@ -202,10 +348,25 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 7, "id": "f54108d3", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "Schema([('student_id', Int32),\n", + " ('full_name', String),\n", + " ('favourite_food', String),\n", + " ('meal_plan', Categorical(ordering='physical')),\n", + " ('age', Int64)])" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "students = students.cast(\n", " {\"student_id\": pl.Int32, \"full_name\": pl.String, \"age\": pl.Int64}\n", @@ -243,10 +404,48 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 8, "id": "b80b958b", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "shape: (19, 5)
monthyearbranditemn
stri64i64i64i64
"January"2019112343
"January"2019187219
"January"2019118222
"January"2019233331
"January"2019221569
"March"2019136271
"March"2019188203
"March"2019272531
"March"2019287663
"March"2019282886
" + ], + "text/plain": [ + "shape: (19, 5)\n", + "┌─────────┬──────┬───────┬──────┬─────┐\n", + "│ month ┆ year ┆ brand ┆ item ┆ n │\n", + "│ --- ┆ --- ┆ --- ┆ --- ┆ --- │\n", + "│ str ┆ i64 ┆ i64 ┆ i64 ┆ i64 │\n", + "╞═════════╪══════╪═══════╪══════╪═════╡\n", + "│ January ┆ 2019 ┆ 1 ┆ 1234 ┆ 3 │\n", + "│ January ┆ 2019 ┆ 1 ┆ 8721 ┆ 9 │\n", + "│ January ┆ 2019 ┆ 1 ┆ 1822 ┆ 2 │\n", + "│ January ┆ 2019 ┆ 2 ┆ 3333 ┆ 1 │\n", + "│ January ┆ 2019 ┆ 2 ┆ 2156 ┆ 9 │\n", + "│ … ┆ … ┆ … ┆ … ┆ … │\n", + "│ March ┆ 2019 ┆ 1 ┆ 3627 ┆ 1 │\n", + "│ March ┆ 2019 ┆ 1 ┆ 8820 ┆ 3 │\n", + "│ March ┆ 2019 ┆ 2 ┆ 7253 ┆ 1 │\n", + "│ March ┆ 2019 ┆ 2 ┆ 8766 ┆ 3 │\n", + "│ March ┆ 2019 ┆ 2 ┆ 8288 ┆ 6 │\n", + "└─────────┴──────┴───────┴──────┴─────┘" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "list_of_dataframes = [\n", " pl.read_csv(x)\n", @@ -266,10 +465,57 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 9, "id": "4a92056c", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "List of csvs is:\n", + "['data/03-sales.csv', 'data/02-sales.csv', 'data/01-sales.csv'] \n", + "\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "shape: (19, 5)
monthyearbranditemn
stri64i64i64i64
"March"2019112343
"March"2019136271
"March"2019188203
"March"2019272531
"March"2019287663
"January"2019118222
"January"2019233331
"January"2019221569
"January"2019239876
"January"2019238276
" + ], + "text/plain": [ + "shape: (19, 5)\n", + "┌─────────┬──────┬───────┬──────┬─────┐\n", + "│ month ┆ year ┆ brand ┆ item ┆ n │\n", + "│ --- ┆ --- ┆ --- ┆ --- ┆ --- │\n", + "│ str ┆ i64 ┆ i64 ┆ i64 ┆ i64 │\n", + "╞═════════╪══════╪═══════╪══════╪═════╡\n", + "│ March ┆ 2019 ┆ 1 ┆ 1234 ┆ 3 │\n", + "│ March ┆ 2019 ┆ 1 ┆ 3627 ┆ 1 │\n", + "│ March ┆ 2019 ┆ 1 ┆ 8820 ┆ 3 │\n", + "│ March ┆ 2019 ┆ 2 ┆ 7253 ┆ 1 │\n", + "│ March ┆ 2019 ┆ 2 ┆ 8766 ┆ 3 │\n", + "│ … ┆ … ┆ … ┆ … ┆ … │\n", + "│ January ┆ 2019 ┆ 1 ┆ 1822 ┆ 2 │\n", + "│ January ┆ 2019 ┆ 2 ┆ 3333 ┆ 1 │\n", + "│ January ┆ 2019 ┆ 2 ┆ 2156 ┆ 9 │\n", + "│ January ┆ 2019 ┆ 2 ┆ 3987 ┆ 6 │\n", + "│ January ┆ 2019 ┆ 2 ┆ 3827 ┆ 6 │\n", + "└─────────┴──────┴───────┴──────┴─────┘" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "import glob\n", "\n", @@ -294,7 +540,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 10, "id": "0bc97749", "metadata": {}, "outputs": [], @@ -312,10 +558,25 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 11, "id": "542c5223", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "Schema([('student_id', Int64),\n", + " ('full_name', String),\n", + " ('favourite_food', String),\n", + " ('meal_plan', String),\n", + " ('age', Int64)])" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "pl.read_csv(\"data/students-clean.csv\").schema" ] @@ -336,7 +597,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 12, "id": "16c6ca1b", "metadata": {}, "outputs": [], @@ -354,10 +615,25 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 13, "id": "bfd5104f", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "Schema([('student_id', Int32),\n", + " ('full_name', String),\n", + " ('favourite_food', String),\n", + " ('meal_plan', Categorical(ordering='physical')),\n", + " ('age', Int64)])" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "pl.read_ipc(\"data/students-clean.feather\").schema" ] @@ -376,7 +652,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 14, "id": "227f7c50", "metadata": { "tags": [ @@ -414,7 +690,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.12.12" + "version": "3.12.13" }, "toc-showtags": true }, diff --git a/data-tidy.ipynb b/data-tidy.ipynb index e0ba7a7..903b0a0 100644 --- a/data-tidy.ipynb +++ b/data-tidy.ipynb @@ -18,7 +18,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "id": "51a55374", "metadata": { "tags": [ @@ -111,10 +111,96 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 2, "id": "0f9fbf5a", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + " Unmelted: \n", + " first last job height weight\n", + "0 John Doe Nurse 5.5 130\n", + "1 Mary Bo Economist 6.0 150\n", + "\n", + " Melted: \n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
firstlastquantityvalue
0JohnDoeheight5.5
1MaryBoheight6.0
2JohnDoeweight130.0
3MaryBoweight150.0
\n", + "
" + ], + "text/plain": [ + " first last quantity value\n", + "0 John Doe height 5.5\n", + "1 Mary Bo height 6.0\n", + "2 John Doe weight 130.0\n", + "3 Mary Bo weight 150.0" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "import pandas as pd\n", "\n", @@ -149,10 +235,71 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 3, "id": "bfa121cf", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
country19992000
0Afghanistan745.02666.0
1Brazil37737.080488.0
2China212258.0213766.0
\n", + "
" + ], + "text/plain": [ + " country 1999 2000\n", + "0 Afghanistan 745.0 2666.0\n", + "1 Brazil 37737.0 80488.0\n", + "2 China 212258.0 213766.0" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "df_tb = pd.read_parquet(\n", " \"https://github.com/aeturrell/python4DS/raw/refs/heads/main/data/who_tb_cases.parquet\"\n", @@ -170,10 +317,92 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 4, "id": "dc03ccd9", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
countryyearcases
0Afghanistan1999745.0
1Brazil199937737.0
2China1999212258.0
3Afghanistan20002666.0
4Brazil200080488.0
5China2000213766.0
\n", + "
" + ], + "text/plain": [ + " country year cases\n", + "0 Afghanistan 1999 745.0\n", + "1 Brazil 1999 37737.0\n", + "2 China 1999 212258.0\n", + "3 Afghanistan 2000 2666.0\n", + "4 Brazil 2000 80488.0\n", + "5 China 2000 213766.0" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "df_tb.melt(\n", " id_vars=[\"country\"],\n", @@ -203,10 +432,83 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 5, "id": "293768c1", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
A1970A1980B1970B1980Xid
0ad2.53.22.2900010
1be1.21.3-0.5127471
2cf0.70.1-2.3662972
\n", + "
" + ], + "text/plain": [ + " A1970 A1980 B1970 B1980 X id\n", + "0 a d 2.5 3.2 2.290001 0\n", + "1 b e 1.2 1.3 -0.512747 1\n", + "2 c f 0.7 0.1 -2.366297 2" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "import numpy as np\n", "\n", @@ -233,10 +535,107 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 6, "id": "a9ca2fa8", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
XAB
idyear
019702.290001a2.5
11970-0.512747b1.2
21970-2.366297c0.7
019802.290001d3.2
11980-0.512747e1.3
21980-2.366297f0.1
\n", + "
" + ], + "text/plain": [ + " X A B\n", + "id year \n", + "0 1970 2.290001 a 2.5\n", + "1 1970 -0.512747 b 1.2\n", + "2 1970 -2.366297 c 0.7\n", + "0 1980 2.290001 d 3.2\n", + "1 1980 -0.512747 e 1.3\n", + "2 1980 -2.366297 f 0.1" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "pd.wide_to_long(df, stubnames=[\"A\", \"B\"], i=\"id\", j=\"year\")" ] @@ -261,10 +660,109 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 7, "id": "2b791dd1", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
AB
firstsecond
barone1.3129600.427839
two-0.070985-0.738495
bazone1.042258-0.430793
two0.511735-0.782214
fooone-0.847108-1.179077
two1.0210641.015834
quxone-1.194002-0.313362
two2.226642-0.898217
\n", + "
" + ], + "text/plain": [ + " A B\n", + "first second \n", + "bar one 1.312960 0.427839\n", + " two -0.070985 -0.738495\n", + "baz one 1.042258 -0.430793\n", + " two 0.511735 -0.782214\n", + "foo one -0.847108 -1.179077\n", + " two 1.021064 1.015834\n", + "qux one -1.194002 -0.313362\n", + " two 2.226642 -0.898217" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "tuples = list(\n", " zip(\n", @@ -289,10 +787,38 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 8, "id": "d25eb012", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "first second \n", + "bar one A 1.312960\n", + " B 0.427839\n", + " two A -0.070985\n", + " B -0.738495\n", + "baz one A 1.042258\n", + " B -0.430793\n", + " two A 0.511735\n", + " B -0.782214\n", + "foo one A -0.847108\n", + " B -1.179077\n", + " two A 1.021064\n", + " B 1.015834\n", + "qux one A -1.194002\n", + " B -0.313362\n", + " two A 2.226642\n", + " B -0.898217\n", + "dtype: float64" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "df = df.stack()\n", "df" @@ -320,10 +846,95 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 9, "id": "b6742a54", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
firstbarbazfooqux
second
oneA1.3129601.042258-0.847108-1.194002
B0.427839-0.430793-1.179077-0.313362
twoA-0.0709850.5117351.0210642.226642
B-0.738495-0.7822141.015834-0.898217
\n", + "
" + ], + "text/plain": [ + "first bar baz foo qux\n", + "second \n", + "one A 1.312960 1.042258 -0.847108 -1.194002\n", + " B 0.427839 -0.430793 -1.179077 -0.313362\n", + "two A -0.070985 0.511735 1.021064 2.226642\n", + " B -0.738495 -0.782214 1.015834 -0.898217" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "df.unstack(level=0)" ] @@ -360,10 +971,91 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 10, "id": "fa612456", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
countryyeartypecount
0Afghanistan1999-01-01cases745
1Afghanistan1999-01-01population19987071
2Afghanistan2000-01-01cases2666
3Afghanistan2000-01-01population20595360
4Brazil1999-01-01cases37737
\n", + "
" + ], + "text/plain": [ + " country year type count\n", + "0 Afghanistan 1999-01-01 cases 745\n", + "1 Afghanistan 1999-01-01 population 19987071\n", + "2 Afghanistan 2000-01-01 cases 2666\n", + "3 Afghanistan 2000-01-01 population 20595360\n", + "4 Brazil 1999-01-01 cases 37737" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "df_tb_cp = pd.read_parquet(\n", " \"https://github.com/aeturrell/python4DS/raw/refs/heads/main/data/who_tb_case_and_pop.parquet\"\n", @@ -389,10 +1081,99 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 11, "id": "e584cf37", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
typecountryyearcasespopulation
0Afghanistan1999-01-0174519987071
1Afghanistan2000-01-01266620595360
2Brazil1999-01-0137737172006362
3Brazil2000-01-0180488174504898
4China1999-01-012122581272915272
5China2000-01-012137661280428583
\n", + "
" + ], + "text/plain": [ + "type country year cases population\n", + "0 Afghanistan 1999-01-01 745 19987071\n", + "1 Afghanistan 2000-01-01 2666 20595360\n", + "2 Brazil 1999-01-01 37737 172006362\n", + "3 Brazil 2000-01-01 80488 174504898\n", + "4 China 1999-01-01 212258 1272915272\n", + "5 China 2000-01-01 213766 1280428583" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "pivoted = df_tb_cp.pivot(\n", " index=[\"country\", \"year\"], columns=[\"type\"], values=\"count\"\n", @@ -410,10 +1191,85 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 12, "id": "97c6d139", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
datevariablevalue
12000-02-29A0.799993
22000-03-31A0.247382
92000-10-31A0.685868
182000-09-30B-0.911245
42000-05-31A0.874574
\n", + "
" + ], + "text/plain": [ + " date variable value\n", + "1 2000-02-29 A 0.799993\n", + "2 2000-03-31 A 0.247382\n", + "9 2000-10-31 A 0.685868\n", + "18 2000-09-30 B -0.911245\n", + "4 2000-05-31 A 0.874574" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "import numpy as np\n", "\n", @@ -439,10 +1295,115 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 13, "id": "04f2bd28", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
variableAB
date
2000-01-31NaNNaN
2000-02-290.7116320.869024
2000-03-310.799993-0.276892
2000-04-300.247382-1.172654
2000-05-31-1.545182-1.452367
2000-06-300.874574-2.377642
2000-07-31-0.735886-2.036017
2000-08-310.0138172.211417
2000-09-30-1.401537-0.896416
2000-10-310.063176-0.911245
\n", + "
" + ], + "text/plain": [ + "variable A B\n", + "date \n", + "2000-01-31 NaN NaN\n", + "2000-02-29 0.711632 0.869024\n", + "2000-03-31 0.799993 -0.276892\n", + "2000-04-30 0.247382 -1.172654\n", + "2000-05-31 -1.545182 -1.452367\n", + "2000-06-30 0.874574 -2.377642\n", + "2000-07-31 -0.735886 -2.036017\n", + "2000-08-31 0.013817 2.211417\n", + "2000-09-30 -1.401537 -0.896416\n", + "2000-10-31 0.063176 -0.911245" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "df.pivot(index=\"date\", columns=\"variable\", values=\"value\").shift(1)" ] @@ -485,7 +1446,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.12.12" + "version": "3.12.13" }, "toc-showtags": true }, diff --git a/data-transform.ipynb b/data-transform.ipynb index 83588d0..03eed1b 100644 --- a/data-transform.ipynb +++ b/data-transform.ipynb @@ -24,7 +24,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "id": "04dcb195", "metadata": {}, "outputs": [], @@ -44,10 +44,21 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 2, "id": "09eb2e2e", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "'1.19.0'" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "pl.__version__" ] @@ -66,7 +77,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 3, "id": "eff283e8", "metadata": {}, "outputs": [], @@ -97,10 +108,42 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 4, "id": "39f99d76", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "shape: (5, 19)
yearmonthdaydep_timesched_dep_timedep_delayarr_timesched_arr_timearr_delaycarrierflighttailnumorigindestair_timedistancehourminutetime_hour
i64i64i64i64i64i64i64i64i64stri64strstrstri64i64i64i64str
201311517515283081911"UA"1545"N14228""EWR""IAH"2271400515"2013-01-01T10:00:00Z"
201311533529485083020"UA"1714"N24211""LGA""IAH"2271416529"2013-01-01T10:00:00Z"
201311542540292385033"AA"1141"N619AA""JFK""MIA"1601089540"2013-01-01T10:00:00Z"
201311544545-110041022-18"B6"725"N804JB""JFK""BQN"1831576545"2013-01-01T10:00:00Z"
201311554600-6812837-25"DL"461"N668DN""LGA""ATL"11676260"2013-01-01T11:00:00Z"
" + ], + "text/plain": [ + "shape: (5, 19)\n", + "┌──────┬───────┬─────┬──────────┬───┬──────────┬──────┬────────┬──────────────────────┐\n", + "│ year ┆ month ┆ day ┆ dep_time ┆ … ┆ distance ┆ hour ┆ minute ┆ time_hour │\n", + "│ --- ┆ --- ┆ --- ┆ --- ┆ ┆ --- ┆ --- ┆ --- ┆ --- │\n", + "│ i64 ┆ i64 ┆ i64 ┆ i64 ┆ ┆ i64 ┆ i64 ┆ i64 ┆ str │\n", + "╞══════╪═══════╪═════╪══════════╪═══╪══════════╪══════╪════════╪══════════════════════╡\n", + "│ 2013 ┆ 1 ┆ 1 ┆ 517 ┆ … ┆ 1400 ┆ 5 ┆ 15 ┆ 2013-01-01T10:00:00Z │\n", + "│ 2013 ┆ 1 ┆ 1 ┆ 533 ┆ … ┆ 1416 ┆ 5 ┆ 29 ┆ 2013-01-01T10:00:00Z │\n", + "│ 2013 ┆ 1 ┆ 1 ┆ 542 ┆ … ┆ 1089 ┆ 5 ┆ 40 ┆ 2013-01-01T10:00:00Z │\n", + "│ 2013 ┆ 1 ┆ 1 ┆ 544 ┆ … ┆ 1576 ┆ 5 ┆ 45 ┆ 2013-01-01T10:00:00Z │\n", + "│ 2013 ┆ 1 ┆ 1 ┆ 554 ┆ … ┆ 762 ┆ 6 ┆ 0 ┆ 2013-01-01T11:00:00Z │\n", + "└──────┴───────┴─────┴──────────┴───┴──────────┴──────┴────────┴──────────────────────┘" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "flights.head()" ] @@ -115,10 +158,39 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 5, "id": "95dea97b", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Rows: 336776\n", + "Columns: 19\n", + "$ year 2013, 2013, 2013, 2013, 2013\n", + "$ month 1, 1, 1, 1, 1\n", + "$ day 1, 1, 1, 1, 1\n", + "$ dep_time 517, 533, 542, 544, 554\n", + "$ sched_dep_time 515, 529, 540, 545, 600\n", + "$ dep_delay 2, 4, 2, -1, -6\n", + "$ arr_time 830, 850, 923, 1004, 812\n", + "$ sched_arr_time 819, 830, 850, 1022, 837\n", + "$ arr_delay 11, 20, 33, -18, -25\n", + "$ carrier 'UA', 'UA', 'AA', 'B6', 'DL'\n", + "$ flight 1545, 1714, 1141, 725, 461\n", + "$ tailnum 'N14228', 'N24211', 'N619AA', 'N804JB', 'N668DN'\n", + "$ origin 'EWR', 'LGA', 'JFK', 'JFK', 'LGA'\n", + "$ dest 'IAH', 'IAH', 'MIA', 'BQN', 'ATL'\n", + "$ air_time 227, 227, 160, 183, 116\n", + "$ distance 1400, 1416, 1089, 1576, 762\n", + "$ hour 5, 5, 5, 5, 6\n", + "$ minute 15, 29, 40, 45, 0\n", + "$ time_hour '2013-01-01T10:00:00Z', '2013-01-01T10:00:00Z', '2013-01-01T10:00:00Z', '2013-01-01T10:00:00Z', '2013-01-01T11:00:00Z'\n", + "\n" + ] + } + ], "source": [ "flights.glimpse(max_items_per_column=5)" ] @@ -149,20 +221,93 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 6, "id": "ffb275b0", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "shape: (336_776,)
time_hour
str
"2013-01-01T10:00:00Z"
"2013-01-01T10:00:00Z"
"2013-01-01T10:00:00Z"
"2013-01-01T10:00:00Z"
"2013-01-01T11:00:00Z"
"2013-09-30T18:00:00Z"
"2013-10-01T02:00:00Z"
"2013-09-30T16:00:00Z"
"2013-09-30T15:00:00Z"
"2013-09-30T12:00:00Z"
" + ], + "text/plain": [ + "shape: (336_776,)\n", + "Series: 'time_hour' [str]\n", + "[\n", + "\t\"2013-01-01T10:00:00Z\"\n", + "\t\"2013-01-01T10:00:00Z\"\n", + "\t\"2013-01-01T10:00:00Z\"\n", + "\t\"2013-01-01T10:00:00Z\"\n", + "\t\"2013-01-01T11:00:00Z\"\n", + "\t…\n", + "\t\"2013-09-30T18:00:00Z\"\n", + "\t\"2013-10-01T02:00:00Z\"\n", + "\t\"2013-09-30T16:00:00Z\"\n", + "\t\"2013-09-30T15:00:00Z\"\n", + "\t\"2013-09-30T12:00:00Z\"\n", + "]" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "flights.get_column(\"time_hour\")" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 7, "id": "88a8b983", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "shape: (336_776, 19)
yearmonthdaydep_timesched_dep_timedep_delayarr_timesched_arr_timearr_delaycarrierflighttailnumorigindestair_timedistancehourminutetime_hour
i64i64i64i64i64i64i64i64i64stri64strstrstri64i64i64i64datetime[μs, UTC]
201311517515283081911"UA"1545"N14228""EWR""IAH"22714005152013-01-01 10:00:00 UTC
201311533529485083020"UA"1714"N24211""LGA""IAH"22714165292013-01-01 10:00:00 UTC
201311542540292385033"AA"1141"N619AA""JFK""MIA"16010895402013-01-01 10:00:00 UTC
201311544545-110041022-18"B6"725"N804JB""JFK""BQN"18315765452013-01-01 10:00:00 UTC
201311554600-6812837-25"DL"461"N668DN""LGA""ATL"116762602013-01-01 11:00:00 UTC
2013930null1455nullnull1634null"9E"3393null"JFK""DCA"null21314552013-09-30 18:00:00 UTC
2013930null2200nullnull2312null"9E"3525null"LGA""SYR"null1982202013-10-01 02:00:00 UTC
2013930null1210nullnull1330null"MQ"3461"N535MQ""LGA""BNA"null76412102013-09-30 16:00:00 UTC
2013930null1159nullnull1344null"MQ"3572"N511MQ""LGA""CLE"null41911592013-09-30 15:00:00 UTC
2013930null840nullnull1020null"MQ"3531"N839MQ""LGA""RDU"null4318402013-09-30 12:00:00 UTC
" + ], + "text/plain": [ + "shape: (336_776, 19)\n", + "┌──────┬───────┬─────┬──────────┬───┬──────────┬──────┬────────┬─────────────────────────┐\n", + "│ year ┆ month ┆ day ┆ dep_time ┆ … ┆ distance ┆ hour ┆ minute ┆ time_hour │\n", + "│ --- ┆ --- ┆ --- ┆ --- ┆ ┆ --- ┆ --- ┆ --- ┆ --- │\n", + "│ i64 ┆ i64 ┆ i64 ┆ i64 ┆ ┆ i64 ┆ i64 ┆ i64 ┆ datetime[μs, UTC] │\n", + "╞══════╪═══════╪═════╪══════════╪═══╪══════════╪══════╪════════╪═════════════════════════╡\n", + "│ 2013 ┆ 1 ┆ 1 ┆ 517 ┆ … ┆ 1400 ┆ 5 ┆ 15 ┆ 2013-01-01 10:00:00 UTC │\n", + "│ 2013 ┆ 1 ┆ 1 ┆ 533 ┆ … ┆ 1416 ┆ 5 ┆ 29 ┆ 2013-01-01 10:00:00 UTC │\n", + "│ 2013 ┆ 1 ┆ 1 ┆ 542 ┆ … ┆ 1089 ┆ 5 ┆ 40 ┆ 2013-01-01 10:00:00 UTC │\n", + "│ 2013 ┆ 1 ┆ 1 ┆ 544 ┆ … ┆ 1576 ┆ 5 ┆ 45 ┆ 2013-01-01 10:00:00 UTC │\n", + "│ 2013 ┆ 1 ┆ 1 ┆ 554 ┆ … ┆ 762 ┆ 6 ┆ 0 ┆ 2013-01-01 11:00:00 UTC │\n", + "│ … ┆ … ┆ … ┆ … ┆ … ┆ … ┆ … ┆ … ┆ … │\n", + "│ 2013 ┆ 9 ┆ 30 ┆ null ┆ … ┆ 213 ┆ 14 ┆ 55 ┆ 2013-09-30 18:00:00 UTC │\n", + "│ 2013 ┆ 9 ┆ 30 ┆ null ┆ … ┆ 198 ┆ 22 ┆ 0 ┆ 2013-10-01 02:00:00 UTC │\n", + "│ 2013 ┆ 9 ┆ 30 ┆ null ┆ … ┆ 764 ┆ 12 ┆ 10 ┆ 2013-09-30 16:00:00 UTC │\n", + "│ 2013 ┆ 9 ┆ 30 ┆ null ┆ … ┆ 419 ┆ 11 ┆ 59 ┆ 2013-09-30 15:00:00 UTC │\n", + "│ 2013 ┆ 9 ┆ 30 ┆ null ┆ … ┆ 431 ┆ 8 ┆ 40 ┆ 2013-09-30 12:00:00 UTC │\n", + "└──────┴───────┴─────┴──────────┴───┴──────────┴──────┴────────┴─────────────────────────┘" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "flights.with_columns(pl.col(\"time_hour\").str.to_datetime())" ] @@ -187,10 +332,41 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 8, "id": "1b6bd8b1", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "shape: (4, 5)
col0col1col2col3col4
i64i64i64strstr
000"a""alpha"
000"b""gamma"
000"b""gamma"
000"a""gamma"
" + ], + "text/plain": [ + "shape: (4, 5)\n", + "┌──────┬──────┬──────┬──────┬───────┐\n", + "│ col0 ┆ col1 ┆ col2 ┆ col3 ┆ col4 │\n", + "│ --- ┆ --- ┆ --- ┆ --- ┆ --- │\n", + "│ i64 ┆ i64 ┆ i64 ┆ str ┆ str │\n", + "╞══════╪══════╪══════╪══════╪═══════╡\n", + "│ 0 ┆ 0 ┆ 0 ┆ a ┆ alpha │\n", + "│ 0 ┆ 0 ┆ 0 ┆ b ┆ gamma │\n", + "│ 0 ┆ 0 ┆ 0 ┆ b ┆ gamma │\n", + "│ 0 ┆ 0 ┆ 0 ┆ a ┆ gamma │\n", + "└──────┴──────┴──────┴──────┴───────┘" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "df = pl.DataFrame(\n", " data={\n", @@ -229,10 +405,48 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 9, "id": "cb114649", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "shape: (365, 4)
yearmonthdayarr_delay
i64i64i64f64
2013102-19.47619
201342630.380952
20131129-25.384615
2013430-9.52381
201381614.05
20135151.095238
20137912.714286
2013531-6.52381
201311235.375
2013416-1.736842
" + ], + "text/plain": [ + "shape: (365, 4)\n", + "┌──────┬───────┬─────┬────────────┐\n", + "│ year ┆ month ┆ day ┆ arr_delay │\n", + "│ --- ┆ --- ┆ --- ┆ --- │\n", + "│ i64 ┆ i64 ┆ i64 ┆ f64 │\n", + "╞══════╪═══════╪═════╪════════════╡\n", + "│ 2013 ┆ 10 ┆ 2 ┆ -19.47619 │\n", + "│ 2013 ┆ 4 ┆ 26 ┆ 30.380952 │\n", + "│ 2013 ┆ 11 ┆ 29 ┆ -25.384615 │\n", + "│ 2013 ┆ 4 ┆ 30 ┆ -9.52381 │\n", + "│ 2013 ┆ 8 ┆ 16 ┆ 14.05 │\n", + "│ … ┆ … ┆ … ┆ … │\n", + "│ 2013 ┆ 5 ┆ 15 ┆ 1.095238 │\n", + "│ 2013 ┆ 7 ┆ 9 ┆ 12.714286 │\n", + "│ 2013 ┆ 5 ┆ 31 ┆ -6.52381 │\n", + "│ 2013 ┆ 11 ┆ 23 ┆ 5.375 │\n", + "│ 2013 ┆ 4 ┆ 16 ┆ -1.736842 │\n", + "└──────┴───────┴─────┴────────────┘" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "flights.filter(pl.col(\"dest\") == \"IAH\").group_by([\"year\", \"month\", \"day\"]).agg(\n", " pl.col(\"arr_delay\").mean()\n", @@ -267,10 +481,43 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 10, "id": "3958ddb5", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "shape: (6, 7)
col0col1col2col3col4col5col6
i64i64i64i64i64i64str
012345"apple"
67891011"orange"
121314151617"pineapple"
181920212223"mango"
242526272829"kiwi"
303132333435"lemon"
" + ], + "text/plain": [ + "shape: (6, 7)\n", + "┌──────┬──────┬──────┬──────┬──────┬──────┬───────────┐\n", + "│ col0 ┆ col1 ┆ col2 ┆ col3 ┆ col4 ┆ col5 ┆ col6 │\n", + "│ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │\n", + "│ i64 ┆ i64 ┆ i64 ┆ i64 ┆ i64 ┆ i64 ┆ str │\n", + "╞══════╪══════╪══════╪══════╪══════╪══════╪═══════════╡\n", + "│ 0 ┆ 1 ┆ 2 ┆ 3 ┆ 4 ┆ 5 ┆ apple │\n", + "│ 6 ┆ 7 ┆ 8 ┆ 9 ┆ 10 ┆ 11 ┆ orange │\n", + "│ 12 ┆ 13 ┆ 14 ┆ 15 ┆ 16 ┆ 17 ┆ pineapple │\n", + "│ 18 ┆ 19 ┆ 20 ┆ 21 ┆ 22 ┆ 23 ┆ mango │\n", + "│ 24 ┆ 25 ┆ 26 ┆ 27 ┆ 28 ┆ 29 ┆ kiwi │\n", + "│ 30 ┆ 31 ┆ 32 ┆ 33 ┆ 34 ┆ 35 ┆ lemon │\n", + "└──────┴──────┴──────┴──────┴──────┴──────┴───────────┘" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "import numpy as np\n", "\n", @@ -299,10 +546,21 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 11, "id": "0c2faf83", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "(24, 25, 26, 27, 28, 29, 'kiwi')" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "# Gets the first row of the DataFrame\n", "df.row(0)\n", @@ -321,10 +579,21 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 12, "id": "9d34599b", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "(18, 19, 20, 21, 22, 23, 'mango')" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "df.row(by_predicate=pl.col(\"col6\") == \"mango\")" ] @@ -339,10 +608,27 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 13, "id": "9bf6313b", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "{'col0': 18,\n", + " 'col1': 19,\n", + " 'col2': 20,\n", + " 'col3': 21,\n", + " 'col4': 22,\n", + " 'col5': 23,\n", + " 'col6': 'mango'}" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "# Get the first row of the DataFrame as a dictionary\n", "df.row(0, named=True)\n", @@ -361,10 +647,39 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 14, "id": "f7e8e892", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "shape: (2, 7)
col0col1col2col3col4col5col6
i64i64i64i64i64i64str
242526272829"kiwi"
303132333435"lemon"
" + ], + "text/plain": [ + "shape: (2, 7)\n", + "┌──────┬──────┬──────┬──────┬──────┬──────┬───────┐\n", + "│ col0 ┆ col1 ┆ col2 ┆ col3 ┆ col4 ┆ col5 ┆ col6 │\n", + "│ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │\n", + "│ i64 ┆ i64 ┆ i64 ┆ i64 ┆ i64 ┆ i64 ┆ str │\n", + "╞══════╪══════╪══════╪══════╪══════╪══════╪═══════╡\n", + "│ 24 ┆ 25 ┆ 26 ┆ 27 ┆ 28 ┆ 29 ┆ kiwi │\n", + "│ 30 ┆ 31 ┆ 32 ┆ 33 ┆ 34 ┆ 35 ┆ lemon │\n", + "└──────┴──────┴──────┴──────┴──────┴──────┴───────┘" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "df.slice(-2, 2)" ] @@ -381,10 +696,39 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 15, "id": "54a9d2b1", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "shape: (2, 7)
col0col1col2col3col4col5col6
i64i64i64i64i64i64str
121314151617"pineapple"
242526272829"kiwi"
" + ], + "text/plain": [ + "shape: (2, 7)\n", + "┌──────┬──────┬──────┬──────┬──────┬──────┬───────────┐\n", + "│ col0 ┆ col1 ┆ col2 ┆ col3 ┆ col4 ┆ col5 ┆ col6 │\n", + "│ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │\n", + "│ i64 ┆ i64 ┆ i64 ┆ i64 ┆ i64 ┆ i64 ┆ str │\n", + "╞══════╪══════╪══════╪══════╪══════╪══════╪═══════════╡\n", + "│ 12 ┆ 13 ┆ 14 ┆ 15 ┆ 16 ┆ 17 ┆ pineapple │\n", + "│ 24 ┆ 25 ┆ 26 ┆ 27 ┆ 28 ┆ 29 ┆ kiwi │\n", + "└──────┴──────┴──────┴──────┴──────┴──────┴───────────┘" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "df.filter((pl.col(\"col6\") == \"kiwi\") | (pl.col(\"col6\") == \"pineapple\"))" ] @@ -399,10 +743,41 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 16, "id": "7849a962", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "shape: (4, 7)
col0col1col2col3col4col5col6
i64i64i64i64i64i64str
121314151617"pineapple"
181920212223"mango"
242526272829"kiwi"
303132333435"lemon"
" + ], + "text/plain": [ + "shape: (4, 7)\n", + "┌──────┬──────┬──────┬──────┬──────┬──────┬───────────┐\n", + "│ col0 ┆ col1 ┆ col2 ┆ col3 ┆ col4 ┆ col5 ┆ col6 │\n", + "│ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │\n", + "│ i64 ┆ i64 ┆ i64 ┆ i64 ┆ i64 ┆ i64 ┆ str │\n", + "╞══════╪══════╪══════╪══════╪══════╪══════╪═══════════╡\n", + "│ 12 ┆ 13 ┆ 14 ┆ 15 ┆ 16 ┆ 17 ┆ pineapple │\n", + "│ 18 ┆ 19 ┆ 20 ┆ 21 ┆ 22 ┆ 23 ┆ mango │\n", + "│ 24 ┆ 25 ┆ 26 ┆ 27 ┆ 28 ┆ 29 ┆ kiwi │\n", + "│ 30 ┆ 31 ┆ 32 ┆ 33 ┆ 34 ┆ 35 ┆ lemon │\n", + "└──────┴──────┴──────┴──────┴──────┴──────┴───────────┘" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "df.filter(pl.col(\"col0\") > 6)" ] @@ -417,10 +792,48 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 17, "id": "c6dd919f", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "shape: (4_334, 19)
yearmonthdaydep_timesched_dep_timedep_delayarr_timesched_arr_timearr_delaycarrierflighttailnumorigindestair_timedistancehourminutetime_hour
i64i64i64i64i64i64i64i64i64stri64strstrstri64i64i64i64str
201311517515283081911"UA"1545"N14228""EWR""IAH"2271400515"2013-01-01T10:00:00Z"
201311533529485083020"UA"1714"N24211""LGA""IAH"2271416529"2013-01-01T10:00:00Z"
201311542540292385033"AA"1141"N619AA""JFK""MIA"1601089540"2013-01-01T10:00:00Z"
201311544545-110041022-18"B6"725"N804JB""JFK""BQN"1831576545"2013-01-01T10:00:00Z"
201311554600-6812837-25"DL"461"N668DN""LGA""ATL"11676260"2013-01-01T11:00:00Z"
20131523552359-4425442-17"B6"707"N583JB""JFK""SJU"19315982359"2013-01-06T04:00:00Z"
20131523572359-2432437-5"B6"727"N649JB""JFK""BQN"19515762359"2013-01-06T04:00:00Z"
201315null1400nullnull1518null"EV"5712"N827AS""JFK""IAD"null228140"2013-01-05T19:00:00Z"
201315null840nullnull1001null"9E"3422null"JFK""BOS"null187840"2013-01-05T13:00:00Z"
201315null1430nullnull1735null"AA"883"N544AA""EWR""DFW"null13721430"2013-01-05T19:00:00Z"
" + ], + "text/plain": [ + "shape: (4_334, 19)\n", + "┌──────┬───────┬─────┬──────────┬───┬──────────┬──────┬────────┬──────────────────────┐\n", + "│ year ┆ month ┆ day ┆ dep_time ┆ … ┆ distance ┆ hour ┆ minute ┆ time_hour │\n", + "│ --- ┆ --- ┆ --- ┆ --- ┆ ┆ --- ┆ --- ┆ --- ┆ --- │\n", + "│ i64 ┆ i64 ┆ i64 ┆ i64 ┆ ┆ i64 ┆ i64 ┆ i64 ┆ str │\n", + "╞══════╪═══════╪═════╪══════════╪═══╪══════════╪══════╪════════╪══════════════════════╡\n", + "│ 2013 ┆ 1 ┆ 1 ┆ 517 ┆ … ┆ 1400 ┆ 5 ┆ 15 ┆ 2013-01-01T10:00:00Z │\n", + "│ 2013 ┆ 1 ┆ 1 ┆ 533 ┆ … ┆ 1416 ┆ 5 ┆ 29 ┆ 2013-01-01T10:00:00Z │\n", + "│ 2013 ┆ 1 ┆ 1 ┆ 542 ┆ … ┆ 1089 ┆ 5 ┆ 40 ┆ 2013-01-01T10:00:00Z │\n", + "│ 2013 ┆ 1 ┆ 1 ┆ 544 ┆ … ┆ 1576 ┆ 5 ┆ 45 ┆ 2013-01-01T10:00:00Z │\n", + "│ 2013 ┆ 1 ┆ 1 ┆ 554 ┆ … ┆ 762 ┆ 6 ┆ 0 ┆ 2013-01-01T11:00:00Z │\n", + "│ … ┆ … ┆ … ┆ … ┆ … ┆ … ┆ … ┆ … ┆ … │\n", + "│ 2013 ┆ 1 ┆ 5 ┆ 2355 ┆ … ┆ 1598 ┆ 23 ┆ 59 ┆ 2013-01-06T04:00:00Z │\n", + "│ 2013 ┆ 1 ┆ 5 ┆ 2357 ┆ … ┆ 1576 ┆ 23 ┆ 59 ┆ 2013-01-06T04:00:00Z │\n", + "│ 2013 ┆ 1 ┆ 5 ┆ null ┆ … ┆ 228 ┆ 14 ┆ 0 ┆ 2013-01-05T19:00:00Z │\n", + "│ 2013 ┆ 1 ┆ 5 ┆ null ┆ … ┆ 187 ┆ 8 ┆ 40 ┆ 2013-01-05T13:00:00Z │\n", + "│ 2013 ┆ 1 ┆ 5 ┆ null ┆ … ┆ 1372 ┆ 14 ┆ 30 ┆ 2013-01-05T19:00:00Z │\n", + "└──────┴───────┴─────┴──────────┴───┴──────────┴──────┴────────┴──────────────────────┘" + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "# Flights that departed on January 1\n", "flights.filter((pl.col(\"month\") == 1) & (pl.col(\"day\") <= 5))" @@ -446,20 +859,96 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 18, "id": "395c9c62", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "shape: (336_776, 19)
yearmonthdaydep_timesched_dep_timedep_delayarr_timesched_arr_timearr_delaycarrierflighttailnumorigindestair_timedistancehourminutetime_hour
i64i64i64i64i64i64i64i64i64stri64strstrstri64i64i64i64str
201311null1630nullnull1815null"EV"4308"N18120""EWR""RDU"null4161630"2013-01-01T21:00:00Z"
201311null1935nullnull2240null"AA"791"N3EHAA""LGA""DFW"null13891935"2013-01-02T00:00:00Z"
201311null1500nullnull1825null"AA"1925"N3EVAA""LGA""MIA"null1096150"2013-01-01T20:00:00Z"
201311null600nullnull901null"B6"125"N618JB""JFK""FLL"null106960"2013-01-01T11:00:00Z"
201312null1540nullnull1747null"EV"4352"N10575""EWR""CVG"null5691540"2013-01-02T20:00:00Z"
201372824002359141134427"B6"1503"N503JB""JFK""SJU"20415982359"2013-07-29T03:00:00Z"
20138102400224575110169"B6"234"N328JB""JFK""BTV"532662245"2013-08-11T02:00:00Z"
20138202400235913543504"B6"745"N708JB""JFK""PSE"20116172359"2013-08-21T03:00:00Z"
20139224002359141134031"B6"839"N828JB""JFK""BQN"21715762359"2013-09-03T03:00:00Z"
2013912240020002402032230213"DL"1147"N910DE""LGA""ATL"101762200"2013-09-13T00:00:00Z"
" + ], + "text/plain": [ + "shape: (336_776, 19)\n", + "┌──────┬───────┬─────┬──────────┬───┬──────────┬──────┬────────┬──────────────────────┐\n", + "│ year ┆ month ┆ day ┆ dep_time ┆ … ┆ distance ┆ hour ┆ minute ┆ time_hour │\n", + "│ --- ┆ --- ┆ --- ┆ --- ┆ ┆ --- ┆ --- ┆ --- ┆ --- │\n", + "│ i64 ┆ i64 ┆ i64 ┆ i64 ┆ ┆ i64 ┆ i64 ┆ i64 ┆ str │\n", + "╞══════╪═══════╪═════╪══════════╪═══╪══════════╪══════╪════════╪══════════════════════╡\n", + "│ 2013 ┆ 1 ┆ 1 ┆ null ┆ … ┆ 416 ┆ 16 ┆ 30 ┆ 2013-01-01T21:00:00Z │\n", + "│ 2013 ┆ 1 ┆ 1 ┆ null ┆ … ┆ 1389 ┆ 19 ┆ 35 ┆ 2013-01-02T00:00:00Z │\n", + "│ 2013 ┆ 1 ┆ 1 ┆ null ┆ … ┆ 1096 ┆ 15 ┆ 0 ┆ 2013-01-01T20:00:00Z │\n", + "│ 2013 ┆ 1 ┆ 1 ┆ null ┆ … ┆ 1069 ┆ 6 ┆ 0 ┆ 2013-01-01T11:00:00Z │\n", + "│ 2013 ┆ 1 ┆ 2 ┆ null ┆ … ┆ 569 ┆ 15 ┆ 40 ┆ 2013-01-02T20:00:00Z │\n", + "│ … ┆ … ┆ … ┆ … ┆ … ┆ … ┆ … ┆ … ┆ … │\n", + "│ 2013 ┆ 7 ┆ 28 ┆ 2400 ┆ … ┆ 1598 ┆ 23 ┆ 59 ┆ 2013-07-29T03:00:00Z │\n", + "│ 2013 ┆ 8 ┆ 10 ┆ 2400 ┆ … ┆ 266 ┆ 22 ┆ 45 ┆ 2013-08-11T02:00:00Z │\n", + "│ 2013 ┆ 8 ┆ 20 ┆ 2400 ┆ … ┆ 1617 ┆ 23 ┆ 59 ┆ 2013-08-21T03:00:00Z │\n", + "│ 2013 ┆ 9 ┆ 2 ┆ 2400 ┆ … ┆ 1576 ┆ 23 ┆ 59 ┆ 2013-09-03T03:00:00Z │\n", + "│ 2013 ┆ 9 ┆ 12 ┆ 2400 ┆ … ┆ 762 ┆ 20 ┆ 0 ┆ 2013-09-13T00:00:00Z │\n", + "└──────┴───────┴─────┴──────────┴───┴──────────┴──────┴────────┴──────────────────────┘" + ] + }, + "execution_count": 18, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "flights.sort(\"dep_time\")" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 19, "id": "9d5f4270", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "shape: (336_776, 19)
yearmonthdaydep_timesched_dep_timedep_delayarr_timesched_arr_timearr_delaycarrierflighttailnumorigindestair_timedistancehourminutetime_hour
i64i64i64i64i64i64i64i64i64stri64strstrstri64i64i64i64str
201311null1935nullnull2240null"AA"791"N3EHAA""LGA""DFW"null13891935"2013-01-02T00:00:00Z"
201311null1500nullnull1825null"AA"1925"N3EVAA""LGA""MIA"null1096150"2013-01-01T20:00:00Z"
201311null1630nullnull1815null"EV"4308"N18120""EWR""RDU"null4161630"2013-01-01T21:00:00Z"
201311null600nullnull901null"B6"125"N618JB""JFK""FLL"null106960"2013-01-01T11:00:00Z"
201311517515283081911"UA"1545"N14228""EWR""IAH"2271400515"2013-01-01T10:00:00Z"
20131231232122503146838"B6"2002"N179JB""JFK""BUF"663012250"2014-01-01T03:00:00Z"
2013123123282330-24124093"B6"1389"N651JB""EWR""SJU"19816082330"2014-01-01T04:00:00Z"
20131231233222454758355"B6"486"N334JB""JFK""ROC"602642245"2014-01-01T03:00:00Z"
2013123123552359-4430440-10"B6"1503"N509JB""JFK""SJU"19515982359"2014-01-01T04:00:00Z"
2013123123562359-3436445-9"B6"745"N665JB""JFK""PSE"20016172359"2014-01-01T04:00:00Z"
" + ], + "text/plain": [ + "shape: (336_776, 19)\n", + "┌──────┬───────┬─────┬──────────┬───┬──────────┬──────┬────────┬──────────────────────┐\n", + "│ year ┆ month ┆ day ┆ dep_time ┆ … ┆ distance ┆ hour ┆ minute ┆ time_hour │\n", + "│ --- ┆ --- ┆ --- ┆ --- ┆ ┆ --- ┆ --- ┆ --- ┆ --- │\n", + "│ i64 ┆ i64 ┆ i64 ┆ i64 ┆ ┆ i64 ┆ i64 ┆ i64 ┆ str │\n", + "╞══════╪═══════╪═════╪══════════╪═══╪══════════╪══════╪════════╪══════════════════════╡\n", + "│ 2013 ┆ 1 ┆ 1 ┆ null ┆ … ┆ 1389 ┆ 19 ┆ 35 ┆ 2013-01-02T00:00:00Z │\n", + "│ 2013 ┆ 1 ┆ 1 ┆ null ┆ … ┆ 1096 ┆ 15 ┆ 0 ┆ 2013-01-01T20:00:00Z │\n", + "│ 2013 ┆ 1 ┆ 1 ┆ null ┆ … ┆ 416 ┆ 16 ┆ 30 ┆ 2013-01-01T21:00:00Z │\n", + "│ 2013 ┆ 1 ┆ 1 ┆ null ┆ … ┆ 1069 ┆ 6 ┆ 0 ┆ 2013-01-01T11:00:00Z │\n", + "│ 2013 ┆ 1 ┆ 1 ┆ 517 ┆ … ┆ 1400 ┆ 5 ┆ 15 ┆ 2013-01-01T10:00:00Z │\n", + "│ … ┆ … ┆ … ┆ … ┆ … ┆ … ┆ … ┆ … ┆ … │\n", + "│ 2013 ┆ 12 ┆ 31 ┆ 2321 ┆ … ┆ 301 ┆ 22 ┆ 50 ┆ 2014-01-01T03:00:00Z │\n", + "│ 2013 ┆ 12 ┆ 31 ┆ 2328 ┆ … ┆ 1608 ┆ 23 ┆ 30 ┆ 2014-01-01T04:00:00Z │\n", + "│ 2013 ┆ 12 ┆ 31 ┆ 2332 ┆ … ┆ 264 ┆ 22 ┆ 45 ┆ 2014-01-01T03:00:00Z │\n", + "│ 2013 ┆ 12 ┆ 31 ┆ 2355 ┆ … ┆ 1598 ┆ 23 ┆ 59 ┆ 2014-01-01T04:00:00Z │\n", + "│ 2013 ┆ 12 ┆ 31 ┆ 2356 ┆ … ┆ 1617 ┆ 23 ┆ 59 ┆ 2014-01-01T04:00:00Z │\n", + "└──────┴───────┴─────┴──────────┴───┴──────────┴──────┴────────┴──────────────────────┘" + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "# Sort by multiple columns by passing a list of columns.\n", "flights.sort([\"year\", \"month\", \"day\", \"dep_time\"])\n", @@ -479,20 +968,96 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 20, "id": "483acdc1", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "shape: (336_776, 19)
yearmonthdaydep_timesched_dep_timedep_delayarr_timesched_arr_timearr_delaycarrierflighttailnumorigindestair_timedistancehourminutetime_hour
i64i64i64i64i64i64i64i64i64stri64strstrstri64i64i64i64str
201311null1630nullnull1815null"EV"4308"N18120""EWR""RDU"null4161630"2013-01-01T21:00:00Z"
201311null1935nullnull2240null"AA"791"N3EHAA""LGA""DFW"null13891935"2013-01-02T00:00:00Z"
201311null1500nullnull1825null"AA"1925"N3EVAA""LGA""MIA"null1096150"2013-01-01T20:00:00Z"
201311null600nullnull901null"B6"125"N618JB""JFK""FLL"null106960"2013-01-01T11:00:00Z"
201312null1540nullnull1747null"EV"4352"N10575""EWR""CVG"null5691540"2013-01-02T20:00:00Z"
201312917031730-2719471957-10"F9"837"N208FR""LGA""DEN"25016201730"2013-01-29T22:00:00Z"
201311119001930-3022332243-10"DL"1435"N934DL""LGA""TPA"13910101930"2013-01-12T00:00:00Z"
2013111014081440-3215491559-10"EV"5713"N825AS""LGA""IAD"522291440"2013-11-10T19:00:00Z"
20132320222055-3322402338-58"DL"1715"N612DL""LGA""MSY"16211832055"2013-02-04T01:00:00Z"
201312720402123-4340235248"B6"97"N592JB""JFK""DEN"26516262123"2013-12-08T02:00:00Z"
" + ], + "text/plain": [ + "shape: (336_776, 19)\n", + "┌──────┬───────┬─────┬──────────┬───┬──────────┬──────┬────────┬──────────────────────┐\n", + "│ year ┆ month ┆ day ┆ dep_time ┆ … ┆ distance ┆ hour ┆ minute ┆ time_hour │\n", + "│ --- ┆ --- ┆ --- ┆ --- ┆ ┆ --- ┆ --- ┆ --- ┆ --- │\n", + "│ i64 ┆ i64 ┆ i64 ┆ i64 ┆ ┆ i64 ┆ i64 ┆ i64 ┆ str │\n", + "╞══════╪═══════╪═════╪══════════╪═══╪══════════╪══════╪════════╪══════════════════════╡\n", + "│ 2013 ┆ 1 ┆ 1 ┆ null ┆ … ┆ 416 ┆ 16 ┆ 30 ┆ 2013-01-01T21:00:00Z │\n", + "│ 2013 ┆ 1 ┆ 1 ┆ null ┆ … ┆ 1389 ┆ 19 ┆ 35 ┆ 2013-01-02T00:00:00Z │\n", + "│ 2013 ┆ 1 ┆ 1 ┆ null ┆ … ┆ 1096 ┆ 15 ┆ 0 ┆ 2013-01-01T20:00:00Z │\n", + "│ 2013 ┆ 1 ┆ 1 ┆ null ┆ … ┆ 1069 ┆ 6 ┆ 0 ┆ 2013-01-01T11:00:00Z │\n", + "│ 2013 ┆ 1 ┆ 2 ┆ null ┆ … ┆ 569 ┆ 15 ┆ 40 ┆ 2013-01-02T20:00:00Z │\n", + "│ … ┆ … ┆ … ┆ … ┆ … ┆ … ┆ … ┆ … ┆ … │\n", + "│ 2013 ┆ 1 ┆ 29 ┆ 1703 ┆ … ┆ 1620 ┆ 17 ┆ 30 ┆ 2013-01-29T22:00:00Z │\n", + "│ 2013 ┆ 1 ┆ 11 ┆ 1900 ┆ … ┆ 1010 ┆ 19 ┆ 30 ┆ 2013-01-12T00:00:00Z │\n", + "│ 2013 ┆ 11 ┆ 10 ┆ 1408 ┆ … ┆ 229 ┆ 14 ┆ 40 ┆ 2013-11-10T19:00:00Z │\n", + "│ 2013 ┆ 2 ┆ 3 ┆ 2022 ┆ … ┆ 1183 ┆ 20 ┆ 55 ┆ 2013-02-04T01:00:00Z │\n", + "│ 2013 ┆ 12 ┆ 7 ┆ 2040 ┆ … ┆ 1626 ┆ 21 ┆ 23 ┆ 2013-12-08T02:00:00Z │\n", + "└──────┴───────┴─────┴──────────┴───┴──────────┴──────┴────────┴──────────────────────┘" + ] + }, + "execution_count": 20, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "flights.sort(\"dep_delay\", descending=True)" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 21, "id": "80bf3df7", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "shape: (336_776, 19)
yearmonthdaydep_timesched_dep_timedep_delayarr_timesched_arr_timearr_delaycarrierflighttailnumorigindestair_timedistancehourminutetime_hour
i64i64i64i64i64i64i64i64i64stri64strstrstri64i64i64i64str
201338null1800nullnull1937null"UA"1177null"LGA""ORD"null733180"2013-03-08T23:00:00Z"
201328null1659nullnull1822null"UA"531null"EWR""BOS"null2001659"2013-02-08T21:00:00Z"
2013813null1727nullnull1941null"EV"5892"N16561""EWR""CVG"null5691727"2013-08-13T21:00:00Z"
2013813null1225nullnull1338null"EV"5897"N15973""EWR""ORF"null2841225"2013-08-13T16:00:00Z"
2013930null840nullnull1020null"MQ"3531"N839MQ""LGA""RDU"null431840"2013-09-30T12:00:00Z"
201312917031730-2719471957-10"F9"837"N208FR""LGA""DEN"25016201730"2013-01-29T22:00:00Z"
201311119001930-3022332243-10"DL"1435"N934DL""LGA""TPA"13910101930"2013-01-12T00:00:00Z"
2013111014081440-3215491559-10"EV"5713"N825AS""LGA""IAD"522291440"2013-11-10T19:00:00Z"
20132320222055-3322402338-58"DL"1715"N612DL""LGA""MSY"16211832055"2013-02-04T01:00:00Z"
201312720402123-4340235248"B6"97"N592JB""JFK""DEN"26516262123"2013-12-08T02:00:00Z"
" + ], + "text/plain": [ + "shape: (336_776, 19)\n", + "┌──────┬───────┬─────┬──────────┬───┬──────────┬──────┬────────┬──────────────────────┐\n", + "│ year ┆ month ┆ day ┆ dep_time ┆ … ┆ distance ┆ hour ┆ minute ┆ time_hour │\n", + "│ --- ┆ --- ┆ --- ┆ --- ┆ ┆ --- ┆ --- ┆ --- ┆ --- │\n", + "│ i64 ┆ i64 ┆ i64 ┆ i64 ┆ ┆ i64 ┆ i64 ┆ i64 ┆ str │\n", + "╞══════╪═══════╪═════╪══════════╪═══╪══════════╪══════╪════════╪══════════════════════╡\n", + "│ 2013 ┆ 3 ┆ 8 ┆ null ┆ … ┆ 733 ┆ 18 ┆ 0 ┆ 2013-03-08T23:00:00Z │\n", + "│ 2013 ┆ 2 ┆ 8 ┆ null ┆ … ┆ 200 ┆ 16 ┆ 59 ┆ 2013-02-08T21:00:00Z │\n", + "│ 2013 ┆ 8 ┆ 13 ┆ null ┆ … ┆ 569 ┆ 17 ┆ 27 ┆ 2013-08-13T21:00:00Z │\n", + "│ 2013 ┆ 8 ┆ 13 ┆ null ┆ … ┆ 284 ┆ 12 ┆ 25 ┆ 2013-08-13T16:00:00Z │\n", + "│ 2013 ┆ 9 ┆ 30 ┆ null ┆ … ┆ 431 ┆ 8 ┆ 40 ┆ 2013-09-30T12:00:00Z │\n", + "│ … ┆ … ┆ … ┆ … ┆ … ┆ … ┆ … ┆ … ┆ … │\n", + "│ 2013 ┆ 1 ┆ 29 ┆ 1703 ┆ … ┆ 1620 ┆ 17 ┆ 30 ┆ 2013-01-29T22:00:00Z │\n", + "│ 2013 ┆ 1 ┆ 11 ┆ 1900 ┆ … ┆ 1010 ┆ 19 ┆ 30 ┆ 2013-01-12T00:00:00Z │\n", + "│ 2013 ┆ 11 ┆ 10 ┆ 1408 ┆ … ┆ 229 ┆ 14 ┆ 40 ┆ 2013-11-10T19:00:00Z │\n", + "│ 2013 ┆ 2 ┆ 3 ┆ 2022 ┆ … ┆ 1183 ┆ 20 ┆ 55 ┆ 2013-02-04T01:00:00Z │\n", + "│ 2013 ┆ 12 ┆ 7 ┆ 2040 ┆ … ┆ 1626 ┆ 21 ┆ 23 ┆ 2013-12-08T02:00:00Z │\n", + "└──────┴───────┴─────┴──────────┴───┴──────────┴──────┴────────┴──────────────────────┘" + ] + }, + "execution_count": 21, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "flights.sort([\"dep_delay\", \"arr_delay\"], descending=[True, False])" ] @@ -508,10 +1073,40 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 22, "id": "a939f3c2", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "shape: (3, 19)
yearmonthdaydep_timesched_dep_timedep_delayarr_timesched_arr_timearr_delaycarrierflighttailnumorigindestair_timedistancehourminutetime_hour
i64i64i64i64i64i64i64i64i64stri64strstrstri64i64i64i64str
20131115251530-519341805null"MQ"4525"N719MQ""LGA""XNA"null11471530"2013-01-01T20:00:00Z"
20131117401745-521582020null"MQ"4413"N739MQ""LGA""XNA"null11471745"2013-01-01T22:00:00Z"
20131218481840823332151null"9E"3325"N920XJ""JFK""DFW"null13911840"2013-01-02T23:00:00Z"
" + ], + "text/plain": [ + "shape: (3, 19)\n", + "┌──────┬───────┬─────┬──────────┬───┬──────────┬──────┬────────┬──────────────────────┐\n", + "│ year ┆ month ┆ day ┆ dep_time ┆ … ┆ distance ┆ hour ┆ minute ┆ time_hour │\n", + "│ --- ┆ --- ┆ --- ┆ --- ┆ ┆ --- ┆ --- ┆ --- ┆ --- │\n", + "│ i64 ┆ i64 ┆ i64 ┆ i64 ┆ ┆ i64 ┆ i64 ┆ i64 ┆ str │\n", + "╞══════╪═══════╪═════╪══════════╪═══╪══════════╪══════╪════════╪══════════════════════╡\n", + "│ 2013 ┆ 1 ┆ 1 ┆ 1525 ┆ … ┆ 1147 ┆ 15 ┆ 30 ┆ 2013-01-01T20:00:00Z │\n", + "│ 2013 ┆ 1 ┆ 1 ┆ 1740 ┆ … ┆ 1147 ┆ 17 ┆ 45 ┆ 2013-01-01T22:00:00Z │\n", + "│ 2013 ┆ 1 ┆ 2 ┆ 1848 ┆ … ┆ 1391 ┆ 18 ┆ 40 ┆ 2013-01-02T23:00:00Z │\n", + "└──────┴───────┴─────┴──────────┴───┴──────────┴──────┴────────┴──────────────────────┘" + ] + }, + "execution_count": 22, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "(\n", " flights.filter((pl.col(\"dep_delay\") <= 10) & (pl.col(\"dep_delay\") >= -10))\n", @@ -572,10 +1167,43 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 23, "id": "86827cf9", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "shape: (6, 8)
col0col1col2col3col4col5col6new_column0
i64i64i64i64i64i64stri32
012345"apple"5
67891011"orange"5
121314151617"pineapple"5
181920212223"mango"5
242526272829"kiwi"5
303132333435"lemon"5
" + ], + "text/plain": [ + "shape: (6, 8)\n", + "┌──────┬──────┬──────┬──────┬──────┬──────┬───────────┬─────────────┐\n", + "│ col0 ┆ col1 ┆ col2 ┆ col3 ┆ col4 ┆ col5 ┆ col6 ┆ new_column0 │\n", + "│ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │\n", + "│ i64 ┆ i64 ┆ i64 ┆ i64 ┆ i64 ┆ i64 ┆ str ┆ i32 │\n", + "╞══════╪══════╪══════╪══════╪══════╪══════╪═══════════╪═════════════╡\n", + "│ 0 ┆ 1 ┆ 2 ┆ 3 ┆ 4 ┆ 5 ┆ apple ┆ 5 │\n", + "│ 6 ┆ 7 ┆ 8 ┆ 9 ┆ 10 ┆ 11 ┆ orange ┆ 5 │\n", + "│ 12 ┆ 13 ┆ 14 ┆ 15 ┆ 16 ┆ 17 ┆ pineapple ┆ 5 │\n", + "│ 18 ┆ 19 ┆ 20 ┆ 21 ┆ 22 ┆ 23 ┆ mango ┆ 5 │\n", + "│ 24 ┆ 25 ┆ 26 ┆ 27 ┆ 28 ┆ 29 ┆ kiwi ┆ 5 │\n", + "│ 30 ┆ 31 ┆ 32 ┆ 33 ┆ 34 ┆ 35 ┆ lemon ┆ 5 │\n", + "└──────┴──────┴──────┴──────┴──────┴──────┴───────────┴─────────────┘" + ] + }, + "execution_count": 23, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "df = df.with_columns(new_column0=pl.lit(5))\n", "df" @@ -591,10 +1219,43 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 24, "id": "0ab01f9d", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "shape: (6, 8)
col0col1col2col3col4col5col6new_column0
i64i64i64i64i64i64stri64
012345"apple"0
67891011"orange"1
121314151617"pineapple"2
181920212223"mango"3
242526272829"kiwi"4
303132333435"lemon"5
" + ], + "text/plain": [ + "shape: (6, 8)\n", + "┌──────┬──────┬──────┬──────┬──────┬──────┬───────────┬─────────────┐\n", + "│ col0 ┆ col1 ┆ col2 ┆ col3 ┆ col4 ┆ col5 ┆ col6 ┆ new_column0 │\n", + "│ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │\n", + "│ i64 ┆ i64 ┆ i64 ┆ i64 ┆ i64 ┆ i64 ┆ str ┆ i64 │\n", + "╞══════╪══════╪══════╪══════╪══════╪══════╪═══════════╪═════════════╡\n", + "│ 0 ┆ 1 ┆ 2 ┆ 3 ┆ 4 ┆ 5 ┆ apple ┆ 0 │\n", + "│ 6 ┆ 7 ┆ 8 ┆ 9 ┆ 10 ┆ 11 ┆ orange ┆ 1 │\n", + "│ 12 ┆ 13 ┆ 14 ┆ 15 ┆ 16 ┆ 17 ┆ pineapple ┆ 2 │\n", + "│ 18 ┆ 19 ┆ 20 ┆ 21 ┆ 22 ┆ 23 ┆ mango ┆ 3 │\n", + "│ 24 ┆ 25 ┆ 26 ┆ 27 ┆ 28 ┆ 29 ┆ kiwi ┆ 4 │\n", + "│ 30 ┆ 31 ┆ 32 ┆ 33 ┆ 34 ┆ 35 ┆ lemon ┆ 5 │\n", + "└──────┴──────┴──────┴──────┴──────┴──────┴───────────┴─────────────┘" + ] + }, + "execution_count": 24, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "df = df.with_columns(new_column0=pl.Series([0, 1, 2, 3, 4, 5]))\n", "df" @@ -614,10 +1275,43 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 25, "id": "fff10e83", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "shape: (6, 10)
col0col1col2col3col4col5col6new_column0new_column1new_column2
i64i64i64i64i64i64stri64i32i32
012345"apple"056
67891011"orange"156
121314151617"pineapple"256
181920212223"mango"356
242526272829"kiwi"456
303132333435"lemon"556
" + ], + "text/plain": [ + "shape: (6, 10)\n", + "┌──────┬──────┬──────┬──────┬───┬───────────┬─────────────┬─────────────┬─────────────┐\n", + "│ col0 ┆ col1 ┆ col2 ┆ col3 ┆ … ┆ col6 ┆ new_column0 ┆ new_column1 ┆ new_column2 │\n", + "│ --- ┆ --- ┆ --- ┆ --- ┆ ┆ --- ┆ --- ┆ --- ┆ --- │\n", + "│ i64 ┆ i64 ┆ i64 ┆ i64 ┆ ┆ str ┆ i64 ┆ i32 ┆ i32 │\n", + "╞══════╪══════╪══════╪══════╪═══╪═══════════╪═════════════╪═════════════╪═════════════╡\n", + "│ 0 ┆ 1 ┆ 2 ┆ 3 ┆ … ┆ apple ┆ 0 ┆ 5 ┆ 6 │\n", + "│ 6 ┆ 7 ┆ 8 ┆ 9 ┆ … ┆ orange ┆ 1 ┆ 5 ┆ 6 │\n", + "│ 12 ┆ 13 ┆ 14 ┆ 15 ┆ … ┆ pineapple ┆ 2 ┆ 5 ┆ 6 │\n", + "│ 18 ┆ 19 ┆ 20 ┆ 21 ┆ … ┆ mango ┆ 3 ┆ 5 ┆ 6 │\n", + "│ 24 ┆ 25 ┆ 26 ┆ 27 ┆ … ┆ kiwi ┆ 4 ┆ 5 ┆ 6 │\n", + "│ 30 ┆ 31 ┆ 32 ┆ 33 ┆ … ┆ lemon ┆ 5 ┆ 5 ┆ 6 │\n", + "└──────┴──────┴──────┴──────┴───┴───────────┴─────────────┴─────────────┴─────────────┘" + ] + }, + "execution_count": 25, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "df = df.with_columns(new_column1=pl.lit(5), new_column2=pl.lit(6))\n", "df" @@ -633,10 +1327,43 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 26, "id": "82477100", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "shape: (6, 10)
col0col1col2col3col4col5col6new_column0new_column1new_column2
i64i64i64i64i64i64stri64i32i64
012345"apple"050
67891011"orange"155
121314151617"pineapple"2510
181920212223"mango"3515
242526272829"kiwi"4520
303132333435"lemon"5525
" + ], + "text/plain": [ + "shape: (6, 10)\n", + "┌──────┬──────┬──────┬──────┬───┬───────────┬─────────────┬─────────────┬─────────────┐\n", + "│ col0 ┆ col1 ┆ col2 ┆ col3 ┆ … ┆ col6 ┆ new_column0 ┆ new_column1 ┆ new_column2 │\n", + "│ --- ┆ --- ┆ --- ┆ --- ┆ ┆ --- ┆ --- ┆ --- ┆ --- │\n", + "│ i64 ┆ i64 ┆ i64 ┆ i64 ┆ ┆ str ┆ i64 ┆ i32 ┆ i64 │\n", + "╞══════╪══════╪══════╪══════╪═══╪═══════════╪═════════════╪═════════════╪═════════════╡\n", + "│ 0 ┆ 1 ┆ 2 ┆ 3 ┆ … ┆ apple ┆ 0 ┆ 5 ┆ 0 │\n", + "│ 6 ┆ 7 ┆ 8 ┆ 9 ┆ … ┆ orange ┆ 1 ┆ 5 ┆ 5 │\n", + "│ 12 ┆ 13 ┆ 14 ┆ 15 ┆ … ┆ pineapple ┆ 2 ┆ 5 ┆ 10 │\n", + "│ 18 ┆ 19 ┆ 20 ┆ 21 ┆ … ┆ mango ┆ 3 ┆ 5 ┆ 15 │\n", + "│ 24 ┆ 25 ┆ 26 ┆ 27 ┆ … ┆ kiwi ┆ 4 ┆ 5 ┆ 20 │\n", + "│ 30 ┆ 31 ┆ 32 ┆ 33 ┆ … ┆ lemon ┆ 5 ┆ 5 ┆ 25 │\n", + "└──────┴──────┴──────┴──────┴───┴───────────┴─────────────┴─────────────┴─────────────┘" + ] + }, + "execution_count": 26, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "df.with_columns(new_column2=pl.col(\"col0\") - pl.col(\"new_column0\"))" ] @@ -652,10 +1379,48 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 27, "id": "55645bdd", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "shape: (336_776, 21)
yearmonthdaydep_timesched_dep_timedep_delayarr_timesched_arr_timearr_delaycarrierflighttailnumorigindestair_timedistancehourminutetime_hourgainspeed
i64i64i64i64i64i64i64i64i64stri64strstrstri64i64i64i64stri64f64
201311517515283081911"UA"1545"N14228""EWR""IAH"2271400515"2013-01-01T10:00:00Z"-9370.044053
201311533529485083020"UA"1714"N24211""LGA""IAH"2271416529"2013-01-01T10:00:00Z"-16374.273128
201311542540292385033"AA"1141"N619AA""JFK""MIA"1601089540"2013-01-01T10:00:00Z"-31408.375
201311544545-110041022-18"B6"725"N804JB""JFK""BQN"1831576545"2013-01-01T10:00:00Z"17516.721311
201311554600-6812837-25"DL"461"N668DN""LGA""ATL"11676260"2013-01-01T11:00:00Z"19394.137931
2013930null1455nullnull1634null"9E"3393null"JFK""DCA"null2131455"2013-09-30T18:00:00Z"nullnull
2013930null2200nullnull2312null"9E"3525null"LGA""SYR"null198220"2013-10-01T02:00:00Z"nullnull
2013930null1210nullnull1330null"MQ"3461"N535MQ""LGA""BNA"null7641210"2013-09-30T16:00:00Z"nullnull
2013930null1159nullnull1344null"MQ"3572"N511MQ""LGA""CLE"null4191159"2013-09-30T15:00:00Z"nullnull
2013930null840nullnull1020null"MQ"3531"N839MQ""LGA""RDU"null431840"2013-09-30T12:00:00Z"nullnull
" + ], + "text/plain": [ + "shape: (336_776, 21)\n", + "┌──────┬───────┬─────┬──────────┬───┬────────┬──────────────────────┬──────┬────────────┐\n", + "│ year ┆ month ┆ day ┆ dep_time ┆ … ┆ minute ┆ time_hour ┆ gain ┆ speed │\n", + "│ --- ┆ --- ┆ --- ┆ --- ┆ ┆ --- ┆ --- ┆ --- ┆ --- │\n", + "│ i64 ┆ i64 ┆ i64 ┆ i64 ┆ ┆ i64 ┆ str ┆ i64 ┆ f64 │\n", + "╞══════╪═══════╪═════╪══════════╪═══╪════════╪══════════════════════╪══════╪════════════╡\n", + "│ 2013 ┆ 1 ┆ 1 ┆ 517 ┆ … ┆ 15 ┆ 2013-01-01T10:00:00Z ┆ -9 ┆ 370.044053 │\n", + "│ 2013 ┆ 1 ┆ 1 ┆ 533 ┆ … ┆ 29 ┆ 2013-01-01T10:00:00Z ┆ -16 ┆ 374.273128 │\n", + "│ 2013 ┆ 1 ┆ 1 ┆ 542 ┆ … ┆ 40 ┆ 2013-01-01T10:00:00Z ┆ -31 ┆ 408.375 │\n", + "│ 2013 ┆ 1 ┆ 1 ┆ 544 ┆ … ┆ 45 ┆ 2013-01-01T10:00:00Z ┆ 17 ┆ 516.721311 │\n", + "│ 2013 ┆ 1 ┆ 1 ┆ 554 ┆ … ┆ 0 ┆ 2013-01-01T11:00:00Z ┆ 19 ┆ 394.137931 │\n", + "│ … ┆ … ┆ … ┆ … ┆ … ┆ … ┆ … ┆ … ┆ … │\n", + "│ 2013 ┆ 9 ┆ 30 ┆ null ┆ … ┆ 55 ┆ 2013-09-30T18:00:00Z ┆ null ┆ null │\n", + "│ 2013 ┆ 9 ┆ 30 ┆ null ┆ … ┆ 0 ┆ 2013-10-01T02:00:00Z ┆ null ┆ null │\n", + "│ 2013 ┆ 9 ┆ 30 ┆ null ┆ … ┆ 10 ┆ 2013-09-30T16:00:00Z ┆ null ┆ null │\n", + "│ 2013 ┆ 9 ┆ 30 ┆ null ┆ … ┆ 59 ┆ 2013-09-30T15:00:00Z ┆ null ┆ null │\n", + "│ 2013 ┆ 9 ┆ 30 ┆ null ┆ … ┆ 40 ┆ 2013-09-30T12:00:00Z ┆ null ┆ null │\n", + "└──────┴───────┴─────┴──────────┴───┴────────┴──────────────────────┴──────┴────────────┘" + ] + }, + "execution_count": 27, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "flights.with_columns(\n", " (pl.col(\"dep_delay\") - pl.col(\"arr_delay\")).alias(\"gain\"),\n", @@ -681,10 +1446,40 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 28, "id": "4643b978", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "shape: (6,)
col0
i64
0
6
12
18
24
30
" + ], + "text/plain": [ + "shape: (6,)\n", + "Series: 'col0' [i64]\n", + "[\n", + "\t0\n", + "\t6\n", + "\t12\n", + "\t18\n", + "\t24\n", + "\t30\n", + "]" + ] + }, + "execution_count": 28, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "df[\"col0\"]" ] @@ -699,10 +1494,43 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 29, "id": "219852d8", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "shape: (6, 3)
col0new_column0col2
i64i64i64
002
618
12214
18320
24426
30532
" + ], + "text/plain": [ + "shape: (6, 3)\n", + "┌──────┬─────────────┬──────┐\n", + "│ col0 ┆ new_column0 ┆ col2 │\n", + "│ --- ┆ --- ┆ --- │\n", + "│ i64 ┆ i64 ┆ i64 │\n", + "╞══════╪═════════════╪══════╡\n", + "│ 0 ┆ 0 ┆ 2 │\n", + "│ 6 ┆ 1 ┆ 8 │\n", + "│ 12 ┆ 2 ┆ 14 │\n", + "│ 18 ┆ 3 ┆ 20 │\n", + "│ 24 ┆ 4 ┆ 26 │\n", + "│ 30 ┆ 5 ┆ 32 │\n", + "└──────┴─────────────┴──────┘" + ] + }, + "execution_count": 29, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "df[[\"col0\", \"new_column0\", \"col2\"]]" ] @@ -717,10 +1545,43 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 30, "id": "1bc0cd22", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "shape: (6, 3)
col1col2col3
i64i64i64
002
618
12214
18320
24426
30532
" + ], + "text/plain": [ + "shape: (6, 3)\n", + "┌──────┬──────┬──────┐\n", + "│ col1 ┆ col2 ┆ col3 │\n", + "│ --- ┆ --- ┆ --- │\n", + "│ i64 ┆ i64 ┆ i64 │\n", + "╞══════╪══════╪══════╡\n", + "│ 0 ┆ 0 ┆ 2 │\n", + "│ 6 ┆ 1 ┆ 8 │\n", + "│ 12 ┆ 2 ┆ 14 │\n", + "│ 18 ┆ 3 ┆ 20 │\n", + "│ 24 ┆ 4 ┆ 26 │\n", + "│ 30 ┆ 5 ┆ 32 │\n", + "└──────┴──────┴──────┘" + ] + }, + "execution_count": 30, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "# selecting a single column\n", "df.select(\"col0\")\n", @@ -742,10 +1603,43 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 31, "id": "ed447fb7", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "shape: (6, 3)
col0new_column0col2
i64i64i64
024
6316
12428
18540
24652
30764
" + ], + "text/plain": [ + "shape: (6, 3)\n", + "┌──────┬─────────────┬──────┐\n", + "│ col0 ┆ new_column0 ┆ col2 │\n", + "│ --- ┆ --- ┆ --- │\n", + "│ i64 ┆ i64 ┆ i64 │\n", + "╞══════╪═════════════╪══════╡\n", + "│ 0 ┆ 2 ┆ 4 │\n", + "│ 6 ┆ 3 ┆ 16 │\n", + "│ 12 ┆ 4 ┆ 28 │\n", + "│ 18 ┆ 5 ┆ 40 │\n", + "│ 24 ┆ 6 ┆ 52 │\n", + "│ 30 ┆ 7 ┆ 64 │\n", + "└──────┴─────────────┴──────┘" + ] + }, + "execution_count": 31, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "df.select(pl.col(\"col0\"), pl.col(\"new_column0\") + 2, pl.col(\"col2\") * 2)" ] @@ -760,10 +1654,39 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 32, "id": "eabfd313", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "shape: (2, 3)
col0new_column0col2
i64i64i64
002
618
" + ], + "text/plain": [ + "shape: (2, 3)\n", + "┌──────┬─────────────┬──────┐\n", + "│ col0 ┆ new_column0 ┆ col2 │\n", + "│ --- ┆ --- ┆ --- │\n", + "│ i64 ┆ i64 ┆ i64 │\n", + "╞══════╪═════════════╪══════╡\n", + "│ 0 ┆ 0 ┆ 2 │\n", + "│ 6 ┆ 1 ┆ 8 │\n", + "└──────┴─────────────┴──────┘" + ] + }, + "execution_count": 32, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "df.select(\"col0\", \"new_column0\", \"col2\").slice(0, 2)" ] @@ -778,10 +1701,48 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 33, "id": "aed67406", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "shape: (336_776, 14)
yearmonthdaydep_timesched_dep_timedep_delayarr_timesched_arr_timearr_delayflightair_timedistancehourminute
i64i64i64i64i64i64i64i64i64i64i64i64i64i64
20131151751528308191115452271400515
20131153352948508302017142271416529
20131154254029238503311411601089540
201311544545-110041022-187251831576545
201311554600-6812837-2546111676260
2013930null1455nullnull1634null3393null2131455
2013930null2200nullnull2312null3525null198220
2013930null1210nullnull1330null3461null7641210
2013930null1159nullnull1344null3572null4191159
2013930null840nullnull1020null3531null431840
" + ], + "text/plain": [ + "shape: (336_776, 14)\n", + "┌──────┬───────┬─────┬──────────┬───┬──────────┬──────────┬──────┬────────┐\n", + "│ year ┆ month ┆ day ┆ dep_time ┆ … ┆ air_time ┆ distance ┆ hour ┆ minute │\n", + "│ --- ┆ --- ┆ --- ┆ --- ┆ ┆ --- ┆ --- ┆ --- ┆ --- │\n", + "│ i64 ┆ i64 ┆ i64 ┆ i64 ┆ ┆ i64 ┆ i64 ┆ i64 ┆ i64 │\n", + "╞══════╪═══════╪═════╪══════════╪═══╪══════════╪══════════╪══════╪════════╡\n", + "│ 2013 ┆ 1 ┆ 1 ┆ 517 ┆ … ┆ 227 ┆ 1400 ┆ 5 ┆ 15 │\n", + "│ 2013 ┆ 1 ┆ 1 ┆ 533 ┆ … ┆ 227 ┆ 1416 ┆ 5 ┆ 29 │\n", + "│ 2013 ┆ 1 ┆ 1 ┆ 542 ┆ … ┆ 160 ┆ 1089 ┆ 5 ┆ 40 │\n", + "│ 2013 ┆ 1 ┆ 1 ┆ 544 ┆ … ┆ 183 ┆ 1576 ┆ 5 ┆ 45 │\n", + "│ 2013 ┆ 1 ┆ 1 ┆ 554 ┆ … ┆ 116 ┆ 762 ┆ 6 ┆ 0 │\n", + "│ … ┆ … ┆ … ┆ … ┆ … ┆ … ┆ … ┆ … ┆ … │\n", + "│ 2013 ┆ 9 ┆ 30 ┆ null ┆ … ┆ null ┆ 213 ┆ 14 ┆ 55 │\n", + "│ 2013 ┆ 9 ┆ 30 ┆ null ┆ … ┆ null ┆ 198 ┆ 22 ┆ 0 │\n", + "│ 2013 ┆ 9 ┆ 30 ┆ null ┆ … ┆ null ┆ 764 ┆ 12 ┆ 10 │\n", + "│ 2013 ┆ 9 ┆ 30 ┆ null ┆ … ┆ null ┆ 419 ┆ 11 ┆ 59 │\n", + "│ 2013 ┆ 9 ┆ 30 ┆ null ┆ … ┆ null ┆ 431 ┆ 8 ┆ 40 │\n", + "└──────┴───────┴─────┴──────────┴───┴──────────┴──────────┴──────┴────────┘" + ] + }, + "execution_count": 33, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "flights.select(pl.col(pl.Int64))" ] @@ -796,10 +1757,48 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 34, "id": "62f578d1", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "shape: (336_776, 14)
yearmonthdaydep_timesched_dep_timedep_delayarr_timesched_arr_timearr_delayflightair_timedistancehourminute
i64i64i64i64i64i64i64i64i64i64i64i64i64i64
20131151751528308191115452271400515
20131153352948508302017142271416529
20131154254029238503311411601089540
201311544545-110041022-187251831576545
201311554600-6812837-2546111676260
2013930null1455nullnull1634null3393null2131455
2013930null2200nullnull2312null3525null198220
2013930null1210nullnull1330null3461null7641210
2013930null1159nullnull1344null3572null4191159
2013930null840nullnull1020null3531null431840
" + ], + "text/plain": [ + "shape: (336_776, 14)\n", + "┌──────┬───────┬─────┬──────────┬───┬──────────┬──────────┬──────┬────────┐\n", + "│ year ┆ month ┆ day ┆ dep_time ┆ … ┆ air_time ┆ distance ┆ hour ┆ minute │\n", + "│ --- ┆ --- ┆ --- ┆ --- ┆ ┆ --- ┆ --- ┆ --- ┆ --- │\n", + "│ i64 ┆ i64 ┆ i64 ┆ i64 ┆ ┆ i64 ┆ i64 ┆ i64 ┆ i64 │\n", + "╞══════╪═══════╪═════╪══════════╪═══╪══════════╪══════════╪══════╪════════╡\n", + "│ 2013 ┆ 1 ┆ 1 ┆ 517 ┆ … ┆ 227 ┆ 1400 ┆ 5 ┆ 15 │\n", + "│ 2013 ┆ 1 ┆ 1 ┆ 533 ┆ … ┆ 227 ┆ 1416 ┆ 5 ┆ 29 │\n", + "│ 2013 ┆ 1 ┆ 1 ┆ 542 ┆ … ┆ 160 ┆ 1089 ┆ 5 ┆ 40 │\n", + "│ 2013 ┆ 1 ┆ 1 ┆ 544 ┆ … ┆ 183 ┆ 1576 ┆ 5 ┆ 45 │\n", + "│ 2013 ┆ 1 ┆ 1 ┆ 554 ┆ … ┆ 116 ┆ 762 ┆ 6 ┆ 0 │\n", + "│ … ┆ … ┆ … ┆ … ┆ … ┆ … ┆ … ┆ … ┆ … │\n", + "│ 2013 ┆ 9 ┆ 30 ┆ null ┆ … ┆ null ┆ 213 ┆ 14 ┆ 55 │\n", + "│ 2013 ┆ 9 ┆ 30 ┆ null ┆ … ┆ null ┆ 198 ┆ 22 ┆ 0 │\n", + "│ 2013 ┆ 9 ┆ 30 ┆ null ┆ … ┆ null ┆ 764 ┆ 12 ┆ 10 │\n", + "│ 2013 ┆ 9 ┆ 30 ┆ null ┆ … ┆ null ┆ 419 ┆ 11 ┆ 59 │\n", + "│ 2013 ┆ 9 ┆ 30 ┆ null ┆ … ┆ null ┆ 431 ┆ 8 ┆ 40 │\n", + "└──────┴───────┴─────┴──────────┴───┴──────────┴──────────┴──────┴────────┘" + ] + }, + "execution_count": 34, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "import polars.selectors as S\n", "\n", @@ -812,10 +1811,48 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 35, "id": "d4e486db", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "shape: (336_776, 2)
arr_timearr_delay
i64i64
83011
85020
92333
1004-18
812-25
nullnull
nullnull
nullnull
nullnull
nullnull
" + ], + "text/plain": [ + "shape: (336_776, 2)\n", + "┌──────────┬───────────┐\n", + "│ arr_time ┆ arr_delay │\n", + "│ --- ┆ --- │\n", + "│ i64 ┆ i64 │\n", + "╞══════════╪═══════════╡\n", + "│ 830 ┆ 11 │\n", + "│ 850 ┆ 20 │\n", + "│ 923 ┆ 33 │\n", + "│ 1004 ┆ -18 │\n", + "│ 812 ┆ -25 │\n", + "│ … ┆ … │\n", + "│ null ┆ null │\n", + "│ null ┆ null │\n", + "│ null ┆ null │\n", + "│ null ┆ null │\n", + "│ null ┆ null │\n", + "└──────────┴───────────┘" + ] + }, + "execution_count": 35, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "# Select columns that contain \"delay\" in their name\n", "flights.select(S.contains(\"delay\"))\n", @@ -844,10 +1881,43 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 36, "id": "5e5c0dd0", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "shape: (6, 10)
col0col1col2lettersnamescol5fruitnew_column0new_column1new_column2
i64i64i64i64i64i64stri64i32i32
012345"apple"056
67891011"orange"156
121314151617"pineapple"256
181920212223"mango"356
242526272829"kiwi"456
303132333435"lemon"556
" + ], + "text/plain": [ + "shape: (6, 10)\n", + "┌──────┬──────┬──────┬─────────┬───┬───────────┬─────────────┬─────────────┬─────────────┐\n", + "│ col0 ┆ col1 ┆ col2 ┆ letters ┆ … ┆ fruit ┆ new_column0 ┆ new_column1 ┆ new_column2 │\n", + "│ --- ┆ --- ┆ --- ┆ --- ┆ ┆ --- ┆ --- ┆ --- ┆ --- │\n", + "│ i64 ┆ i64 ┆ i64 ┆ i64 ┆ ┆ str ┆ i64 ┆ i32 ┆ i32 │\n", + "╞══════╪══════╪══════╪═════════╪═══╪═══════════╪═════════════╪═════════════╪═════════════╡\n", + "│ 0 ┆ 1 ┆ 2 ┆ 3 ┆ … ┆ apple ┆ 0 ┆ 5 ┆ 6 │\n", + "│ 6 ┆ 7 ┆ 8 ┆ 9 ┆ … ┆ orange ┆ 1 ┆ 5 ┆ 6 │\n", + "│ 12 ┆ 13 ┆ 14 ┆ 15 ┆ … ┆ pineapple ┆ 2 ┆ 5 ┆ 6 │\n", + "│ 18 ┆ 19 ┆ 20 ┆ 21 ┆ … ┆ mango ┆ 3 ┆ 5 ┆ 6 │\n", + "│ 24 ┆ 25 ┆ 26 ┆ 27 ┆ … ┆ kiwi ┆ 4 ┆ 5 ┆ 6 │\n", + "│ 30 ┆ 31 ┆ 32 ┆ 33 ┆ … ┆ lemon ┆ 5 ┆ 5 ┆ 6 │\n", + "└──────┴──────┴──────┴─────────┴───┴───────────┴─────────────┴─────────────┴─────────────┘" + ] + }, + "execution_count": 36, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "df.rename({\"col3\": \"letters\", \"col4\": \"names\", \"col6\": \"fruit\"})" ] @@ -862,10 +1932,43 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 37, "id": "482d301f", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "shape: (6, 10)
COL0COL1COL2COL3COL4COL5COL6NEW_COLUMN0NEW_COLUMN1NEW_COLUMN2
i64i64i64i64i64i64stri64i32i32
012345"apple"056
67891011"orange"156
121314151617"pineapple"256
181920212223"mango"356
242526272829"kiwi"456
303132333435"lemon"556
" + ], + "text/plain": [ + "shape: (6, 10)\n", + "┌──────┬──────┬──────┬──────┬───┬───────────┬─────────────┬─────────────┬─────────────┐\n", + "│ COL0 ┆ COL1 ┆ COL2 ┆ COL3 ┆ … ┆ COL6 ┆ NEW_COLUMN0 ┆ NEW_COLUMN1 ┆ NEW_COLUMN2 │\n", + "│ --- ┆ --- ┆ --- ┆ --- ┆ ┆ --- ┆ --- ┆ --- ┆ --- │\n", + "│ i64 ┆ i64 ┆ i64 ┆ i64 ┆ ┆ str ┆ i64 ┆ i32 ┆ i32 │\n", + "╞══════╪══════╪══════╪══════╪═══╪═══════════╪═════════════╪═════════════╪═════════════╡\n", + "│ 0 ┆ 1 ┆ 2 ┆ 3 ┆ … ┆ apple ┆ 0 ┆ 5 ┆ 6 │\n", + "│ 6 ┆ 7 ┆ 8 ┆ 9 ┆ … ┆ orange ┆ 1 ┆ 5 ┆ 6 │\n", + "│ 12 ┆ 13 ┆ 14 ┆ 15 ┆ … ┆ pineapple ┆ 2 ┆ 5 ┆ 6 │\n", + "│ 18 ┆ 19 ┆ 20 ┆ 21 ┆ … ┆ mango ┆ 3 ┆ 5 ┆ 6 │\n", + "│ 24 ┆ 25 ┆ 26 ┆ 27 ┆ … ┆ kiwi ┆ 4 ┆ 5 ┆ 6 │\n", + "│ 30 ┆ 31 ┆ 32 ┆ 33 ┆ … ┆ lemon ┆ 5 ┆ 5 ┆ 6 │\n", + "└──────┴──────┴──────┴──────┴───┴───────────┴─────────────┴─────────────┴─────────────┘" + ] + }, + "execution_count": 37, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "df.rename(lambda column_name: column_name.upper())" ] @@ -902,10 +2005,43 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 38, "id": "b7c0d519", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "shape: (6, 6)
col0col1col2col3col4col5
i64i64i64i64i64i64
012345
67891011
121314151617
181920212223
242526272829
303132333435
" + ], + "text/plain": [ + "shape: (6, 6)\n", + "┌──────┬──────┬──────┬──────┬──────┬──────┐\n", + "│ col0 ┆ col1 ┆ col2 ┆ col3 ┆ col4 ┆ col5 │\n", + "│ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │\n", + "│ i64 ┆ i64 ┆ i64 ┆ i64 ┆ i64 ┆ i64 │\n", + "╞══════╪══════╪══════╪══════╪══════╪══════╡\n", + "│ 0 ┆ 1 ┆ 2 ┆ 3 ┆ 4 ┆ 5 │\n", + "│ 6 ┆ 7 ┆ 8 ┆ 9 ┆ 10 ┆ 11 │\n", + "│ 12 ┆ 13 ┆ 14 ┆ 15 ┆ 16 ┆ 17 │\n", + "│ 18 ┆ 19 ┆ 20 ┆ 21 ┆ 22 ┆ 23 │\n", + "│ 24 ┆ 25 ┆ 26 ┆ 27 ┆ 28 ┆ 29 │\n", + "│ 30 ┆ 31 ┆ 32 ┆ 33 ┆ 34 ┆ 35 │\n", + "└──────┴──────┴──────┴──────┴──────┴──────┘" + ] + }, + "execution_count": 38, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "df = pl.DataFrame(\n", " data=np.reshape(range(36), (6, 6)), schema=[\"col\" + str(i) for i in range(6)]\n", @@ -915,10 +2051,43 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 39, "id": "3c2029cc", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "shape: (6, 6)
col5col3col1col4col2col0
i64i64i64i64i64i64
531420
11971086
171513161412
232119222018
292725282624
353331343230
" + ], + "text/plain": [ + "shape: (6, 6)\n", + "┌──────┬──────┬──────┬──────┬──────┬──────┐\n", + "│ col5 ┆ col3 ┆ col1 ┆ col4 ┆ col2 ┆ col0 │\n", + "│ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │\n", + "│ i64 ┆ i64 ┆ i64 ┆ i64 ┆ i64 ┆ i64 │\n", + "╞══════╪══════╪══════╪══════╪══════╪══════╡\n", + "│ 5 ┆ 3 ┆ 1 ┆ 4 ┆ 2 ┆ 0 │\n", + "│ 11 ┆ 9 ┆ 7 ┆ 10 ┆ 8 ┆ 6 │\n", + "│ 17 ┆ 15 ┆ 13 ┆ 16 ┆ 14 ┆ 12 │\n", + "│ 23 ┆ 21 ┆ 19 ┆ 22 ┆ 20 ┆ 18 │\n", + "│ 29 ┆ 27 ┆ 25 ┆ 28 ┆ 26 ┆ 24 │\n", + "│ 35 ┆ 33 ┆ 31 ┆ 34 ┆ 32 ┆ 30 │\n", + "└──────┴──────┴──────┴──────┴──────┴──────┘" + ] + }, + "execution_count": 39, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "df = df.select([\"col5\", \"col3\", \"col1\", \"col4\", \"col2\", \"col0\"])\n", "df" @@ -934,10 +2103,43 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 40, "id": "e7ab5f64", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "shape: (6, 6)
col5col4col3col2col1col0
i64i64i64i64i64i64
543210
11109876
171615141312
232221201918
292827262524
353433323130
" + ], + "text/plain": [ + "shape: (6, 6)\n", + "┌──────┬──────┬──────┬──────┬──────┬──────┐\n", + "│ col5 ┆ col4 ┆ col3 ┆ col2 ┆ col1 ┆ col0 │\n", + "│ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │\n", + "│ i64 ┆ i64 ┆ i64 ┆ i64 ┆ i64 ┆ i64 │\n", + "╞══════╪══════╪══════╪══════╪══════╪══════╡\n", + "│ 5 ┆ 4 ┆ 3 ┆ 2 ┆ 1 ┆ 0 │\n", + "│ 11 ┆ 10 ┆ 9 ┆ 8 ┆ 7 ┆ 6 │\n", + "│ 17 ┆ 16 ┆ 15 ┆ 14 ┆ 13 ┆ 12 │\n", + "│ 23 ┆ 22 ┆ 21 ┆ 20 ┆ 19 ┆ 18 │\n", + "│ 29 ┆ 28 ┆ 27 ┆ 26 ┆ 25 ┆ 24 │\n", + "│ 35 ┆ 34 ┆ 33 ┆ 32 ┆ 31 ┆ 30 │\n", + "└──────┴──────┴──────┴──────┴──────┴──────┘" + ] + }, + "execution_count": 40, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "# Alphabetical order\n", "df.select(sorted(df.columns))\n", @@ -982,10 +2184,48 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 41, "id": "62e540c0", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "shape: (12, 2)
monthdep_delay
i64f64
721.727787
620.846332
512.986859
1216.576688
210.816843
812.61104
110.036665
313.227076
96.722476
106.243988
" + ], + "text/plain": [ + "shape: (12, 2)\n", + "┌───────┬───────────┐\n", + "│ month ┆ dep_delay │\n", + "│ --- ┆ --- │\n", + "│ i64 ┆ f64 │\n", + "╞═══════╪═══════════╡\n", + "│ 7 ┆ 21.727787 │\n", + "│ 6 ┆ 20.846332 │\n", + "│ 5 ┆ 12.986859 │\n", + "│ 12 ┆ 16.576688 │\n", + "│ 2 ┆ 10.816843 │\n", + "│ … ┆ … │\n", + "│ 8 ┆ 12.61104 │\n", + "│ 1 ┆ 10.036665 │\n", + "│ 3 ┆ 13.227076 │\n", + "│ 9 ┆ 6.722476 │\n", + "│ 10 ┆ 6.243988 │\n", + "└───────┴───────────┘" + ] + }, + "execution_count": 41, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "flights.group_by(\"month\").agg(pl.col(\"dep_delay\").mean())" ] @@ -1023,10 +2263,48 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 42, "id": "af588177", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "shape: (12, 3)
monthmean_delaycount_flights
i64f64u32
313.22707627973
1216.57668827110
620.84633227234
96.72247627122
110.03666526483
721.72778728485
210.81684323690
812.6110428841
512.98685928233
115.43536227035
" + ], + "text/plain": [ + "shape: (12, 3)\n", + "┌───────┬────────────┬───────────────┐\n", + "│ month ┆ mean_delay ┆ count_flights │\n", + "│ --- ┆ --- ┆ --- │\n", + "│ i64 ┆ f64 ┆ u32 │\n", + "╞═══════╪════════════╪═══════════════╡\n", + "│ 3 ┆ 13.227076 ┆ 27973 │\n", + "│ 12 ┆ 16.576688 ┆ 27110 │\n", + "│ 6 ┆ 20.846332 ┆ 27234 │\n", + "│ 9 ┆ 6.722476 ┆ 27122 │\n", + "│ 1 ┆ 10.036665 ┆ 26483 │\n", + "│ … ┆ … ┆ … │\n", + "│ 7 ┆ 21.727787 ┆ 28485 │\n", + "│ 2 ┆ 10.816843 ┆ 23690 │\n", + "│ 8 ┆ 12.61104 ┆ 28841 │\n", + "│ 5 ┆ 12.986859 ┆ 28233 │\n", + "│ 11 ┆ 5.435362 ┆ 27035 │\n", + "└───────┴────────────┴───────────────┘" + ] + }, + "execution_count": 42, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "# Multiple aggregations using polars' syntactic sugar (shorthand) for mean and count\n", "flights.group_by(\"month\").agg(\n", @@ -1055,10 +2333,48 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 43, "id": "b0e56ff1", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "shape: (12, 4)
monthyearmean_delaycount_flights
i64i64f64u32
2201310.81684323690
6201320.84633227234
7201321.72778728485
1020136.24398828653
1201310.03666526483
920136.72247627122
3201313.22707627973
1120135.43536227035
12201316.57668827110
4201313.93803827662
" + ], + "text/plain": [ + "shape: (12, 4)\n", + "┌───────┬──────┬────────────┬───────────────┐\n", + "│ month ┆ year ┆ mean_delay ┆ count_flights │\n", + "│ --- ┆ --- ┆ --- ┆ --- │\n", + "│ i64 ┆ i64 ┆ f64 ┆ u32 │\n", + "╞═══════╪══════╪════════════╪═══════════════╡\n", + "│ 2 ┆ 2013 ┆ 10.816843 ┆ 23690 │\n", + "│ 6 ┆ 2013 ┆ 20.846332 ┆ 27234 │\n", + "│ 7 ┆ 2013 ┆ 21.727787 ┆ 28485 │\n", + "│ 10 ┆ 2013 ┆ 6.243988 ┆ 28653 │\n", + "│ 1 ┆ 2013 ┆ 10.036665 ┆ 26483 │\n", + "│ … ┆ … ┆ … ┆ … │\n", + "│ 9 ┆ 2013 ┆ 6.722476 ┆ 27122 │\n", + "│ 3 ┆ 2013 ┆ 13.227076 ┆ 27973 │\n", + "│ 11 ┆ 2013 ┆ 5.435362 ┆ 27035 │\n", + "│ 12 ┆ 2013 ┆ 16.576688 ┆ 27110 │\n", + "│ 4 ┆ 2013 ┆ 13.938038 ┆ 27662 │\n", + "└───────┴──────┴────────────┴───────────────┘" + ] + }, + "execution_count": 43, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "month_year_delay = flights.group_by(\"month\", \"year\").agg(\n", " mean_delay=pl.mean(\"dep_delay\"),\n", @@ -1104,7 +2420,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.12.12" + "version": "3.12.13" }, "toc-showtags": true }, diff --git a/data-visualise.ipynb b/data-visualise.ipynb index 488a1db..0e2feeb 100644 --- a/data-visualise.ipynb +++ b/data-visualise.ipynb @@ -62,10 +62,51 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "id": "a86fb211", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "
\n", + " \n", + " " + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], "source": [ "import polars as pl\n", "from lets_plot import *\n", @@ -122,10 +163,49 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 2, "id": "0cf986aa", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "shape: (344, 8)
speciesislandbill_length_mmbill_depth_mmflipper_length_mmbody_mass_gsexyear
strstrf64f64f64f64stri64
"Adelie""Torgersen"39.118.7181.03750.0"male"2007
"Adelie""Torgersen"39.517.4186.03800.0"female"2007
"Adelie""Torgersen"40.318.0195.03250.0"female"2007
"Adelie""Torgersen"nullnullnullnullnull2007
"Adelie""Torgersen"36.719.3193.03450.0"female"2007
"Chinstrap""Dream"55.819.8207.04000.0"male"2009
"Chinstrap""Dream"43.518.1202.03400.0"female"2009
"Chinstrap""Dream"49.618.2193.03775.0"male"2009
"Chinstrap""Dream"50.819.0210.04100.0"male"2009
"Chinstrap""Dream"50.218.7198.03775.0"female"2009
" + ], + "text/plain": [ + "shape: (344, 8)\n", + "┌───────────┬───────────┬──────────────┬──────────────┬──────────────┬─────────────┬────────┬──────┐\n", + "│ species ┆ island ┆ bill_length_ ┆ bill_depth_m ┆ flipper_leng ┆ body_mass_g ┆ sex ┆ year │\n", + "│ --- ┆ --- ┆ mm ┆ m ┆ th_mm ┆ --- ┆ --- ┆ --- │\n", + "│ str ┆ str ┆ --- ┆ --- ┆ --- ┆ f64 ┆ str ┆ i64 │\n", + "│ ┆ ┆ f64 ┆ f64 ┆ f64 ┆ ┆ ┆ │\n", + "╞═══════════╪═══════════╪══════════════╪══════════════╪══════════════╪═════════════╪════════╪══════╡\n", + "│ Adelie ┆ Torgersen ┆ 39.1 ┆ 18.7 ┆ 181.0 ┆ 3750.0 ┆ male ┆ 2007 │\n", + "│ Adelie ┆ Torgersen ┆ 39.5 ┆ 17.4 ┆ 186.0 ┆ 3800.0 ┆ female ┆ 2007 │\n", + "│ Adelie ┆ Torgersen ┆ 40.3 ┆ 18.0 ┆ 195.0 ┆ 3250.0 ┆ female ┆ 2007 │\n", + "│ Adelie ┆ Torgersen ┆ null ┆ null ┆ null ┆ null ┆ null ┆ 2007 │\n", + "│ Adelie ┆ Torgersen ┆ 36.7 ┆ 19.3 ┆ 193.0 ┆ 3450.0 ┆ female ┆ 2007 │\n", + "│ … ┆ … ┆ … ┆ … ┆ … ┆ … ┆ … ┆ … │\n", + "│ Chinstrap ┆ Dream ┆ 55.8 ┆ 19.8 ┆ 207.0 ┆ 4000.0 ┆ male ┆ 2009 │\n", + "│ Chinstrap ┆ Dream ┆ 43.5 ┆ 18.1 ┆ 202.0 ┆ 3400.0 ┆ female ┆ 2009 │\n", + "│ Chinstrap ┆ Dream ┆ 49.6 ┆ 18.2 ┆ 193.0 ┆ 3775.0 ┆ male ┆ 2009 │\n", + "│ Chinstrap ┆ Dream ┆ 50.8 ┆ 19.0 ┆ 210.0 ┆ 4100.0 ┆ male ┆ 2009 │\n", + "│ Chinstrap ┆ Dream ┆ 50.2 ┆ 18.7 ┆ 198.0 ┆ 3775.0 ┆ female ┆ 2009 │\n", + "└───────────┴───────────┴──────────────┴──────────────┴──────────────┴─────────────┴────────┴──────┘" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "penguins = pl.from_pandas(load_penguins())\n", "penguins" @@ -141,10 +221,43 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 3, "id": "23c75ba7", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "shape: (5, 8)
speciesislandbill_length_mmbill_depth_mmflipper_length_mmbody_mass_gsexyear
strstrf64f64f64f64stri64
"Adelie""Torgersen"39.118.7181.03750.0"male"2007
"Adelie""Torgersen"39.517.4186.03800.0"female"2007
"Adelie""Torgersen"40.318.0195.03250.0"female"2007
"Adelie""Torgersen"nullnullnullnullnull2007
"Adelie""Torgersen"36.719.3193.03450.0"female"2007
" + ], + "text/plain": [ + "shape: (5, 8)\n", + "┌─────────┬───────────┬───────────────┬───────────────┬──────────────┬─────────────┬────────┬──────┐\n", + "│ species ┆ island ┆ bill_length_m ┆ bill_depth_mm ┆ flipper_leng ┆ body_mass_g ┆ sex ┆ year │\n", + "│ --- ┆ --- ┆ m ┆ --- ┆ th_mm ┆ --- ┆ --- ┆ --- │\n", + "│ str ┆ str ┆ --- ┆ f64 ┆ --- ┆ f64 ┆ str ┆ i64 │\n", + "│ ┆ ┆ f64 ┆ ┆ f64 ┆ ┆ ┆ │\n", + "╞═════════╪═══════════╪═══════════════╪═══════════════╪══════════════╪═════════════╪════════╪══════╡\n", + "│ Adelie ┆ Torgersen ┆ 39.1 ┆ 18.7 ┆ 181.0 ┆ 3750.0 ┆ male ┆ 2007 │\n", + "│ Adelie ┆ Torgersen ┆ 39.5 ┆ 17.4 ┆ 186.0 ┆ 3800.0 ┆ female ┆ 2007 │\n", + "│ Adelie ┆ Torgersen ┆ 40.3 ┆ 18.0 ┆ 195.0 ┆ 3250.0 ┆ female ┆ 2007 │\n", + "│ Adelie ┆ Torgersen ┆ null ┆ null ┆ null ┆ null ┆ null ┆ 2007 │\n", + "│ Adelie ┆ Torgersen ┆ 36.7 ┆ 19.3 ┆ 193.0 ┆ 3450.0 ┆ female ┆ 2007 │\n", + "└─────────┴───────────┴───────────────┴───────────────┴──────────────┴─────────────┴────────┴──────┘" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "penguins.head()" ] @@ -177,14 +290,158 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 4, "id": "574fe39f", "metadata": { "tags": [ "remove-input" ] }, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + " " + ], + "text/plain": [ + "" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "(\n", " ggplot(penguins, aes(x=\"flipper_length_mm\", y=\"body_mass_g\"))\n", @@ -247,10 +504,120 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 5, "id": "15c3848b", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + " " + ], + "text/plain": [ + "" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "(\n", " ggplot(data=penguins, mapping=aes(x=\"flipper_length_mm\", y=\"body_mass_g\"))\n", @@ -291,10 +658,122 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 6, "id": "6b0e1c38", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + " " + ], + "text/plain": [ + "" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "(\n", " ggplot(\n", @@ -325,10 +804,136 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 7, "id": "943efd36", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + " " + ], + "text/plain": [ + "" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "(\n", " ggplot(\n", @@ -356,10 +961,135 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 8, "id": "9e12b3bf", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + " " + ], + "text/plain": [ + "" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "(\n", " ggplot(data=penguins, mapping=aes(x=\"flipper_length_mm\", y=\"body_mass_g\"))\n", @@ -382,10 +1112,136 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 9, "id": "17d5803b", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + " " + ], + "text/plain": [ + "" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "(\n", " ggplot(data=penguins, mapping=aes(x=\"flipper_length_mm\", y=\"body_mass_g\"))\n", @@ -406,10 +1262,154 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 10, "id": "b9b98ec4", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + " " + ], + "text/plain": [ + "" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "(\n", " ggplot(data=penguins, mapping=aes(x=\"flipper_length_mm\", y=\"body_mass_g\"))\n", @@ -470,14 +1470,138 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 11, "id": "7c76be4b", "metadata": { "tags": [ "remove-input" ] }, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + " " + ], + "text/plain": [ + "" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "(\n", " ggplot(data=penguins, mapping=aes(x=\"flipper_length_mm\", y=\"body_mass_g\"))\n", @@ -590,10 +1714,119 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 12, "id": "21b45061", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + " " + ], + "text/plain": [ + "" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "(ggplot(penguins, aes(x=\"species\")) + geom_bar())" ] @@ -610,10 +1843,43 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 13, "id": "4e046bb2", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "shape: (5, 8)
speciesislandbill_length_mmbill_depth_mmflipper_length_mmbody_mass_gsexyear
catstrf64f64f64f64stri64
"Adelie""Torgersen"39.118.7181.03750.0"male"2007
"Adelie""Torgersen"39.517.4186.03800.0"female"2007
"Adelie""Torgersen"40.318.0195.03250.0"female"2007
"Adelie""Torgersen"nullnullnullnullnull2007
"Adelie""Torgersen"36.719.3193.03450.0"female"2007
" + ], + "text/plain": [ + "shape: (5, 8)\n", + "┌─────────┬───────────┬───────────────┬───────────────┬──────────────┬─────────────┬────────┬──────┐\n", + "│ species ┆ island ┆ bill_length_m ┆ bill_depth_mm ┆ flipper_leng ┆ body_mass_g ┆ sex ┆ year │\n", + "│ --- ┆ --- ┆ m ┆ --- ┆ th_mm ┆ --- ┆ --- ┆ --- │\n", + "│ cat ┆ str ┆ --- ┆ f64 ┆ --- ┆ f64 ┆ str ┆ i64 │\n", + "│ ┆ ┆ f64 ┆ ┆ f64 ┆ ┆ ┆ │\n", + "╞═════════╪═══════════╪═══════════════╪═══════════════╪══════════════╪═════════════╪════════╪══════╡\n", + "│ Adelie ┆ Torgersen ┆ 39.1 ┆ 18.7 ┆ 181.0 ┆ 3750.0 ┆ male ┆ 2007 │\n", + "│ Adelie ┆ Torgersen ┆ 39.5 ┆ 17.4 ┆ 186.0 ┆ 3800.0 ┆ female ┆ 2007 │\n", + "│ Adelie ┆ Torgersen ┆ 40.3 ┆ 18.0 ┆ 195.0 ┆ 3250.0 ┆ female ┆ 2007 │\n", + "│ Adelie ┆ Torgersen ┆ null ┆ null ┆ null ┆ null ┆ null ┆ 2007 │\n", + "│ Adelie ┆ Torgersen ┆ 36.7 ┆ 19.3 ┆ 193.0 ┆ 3450.0 ┆ female ┆ 2007 │\n", + "└─────────┴───────────┴───────────────┴───────────────┴──────────────┴─────────────┴────────┴──────┘" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "penguins = penguins.cast({\"species\": pl.Categorical})\n", "penguins.head()" @@ -641,10 +1907,120 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 14, "id": "93675336", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + " " + ], + "text/plain": [ + "" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "(ggplot(penguins, aes(x=\"body_mass_g\")) + geom_histogram(binwidth=200))" ] @@ -680,10 +2056,120 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 15, "id": "6a58021f", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + " " + ], + "text/plain": [ + "" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "(ggplot(penguins, aes(x=\"body_mass_g\")) + geom_density())" ] @@ -752,10 +2238,141 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 16, "id": "a636947a", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + " " + ], + "text/plain": [ + "" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "(ggplot(penguins, aes(x=\"species\", y=\"body_mass_g\")) + geom_boxplot())" ] @@ -770,10 +2387,123 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 17, "id": "9b85a2df", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + " " + ], + "text/plain": [ + "" + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "(ggplot(penguins, aes(x=\"body_mass_g\", color=\"species\")) + geom_density(size=2))" ] @@ -792,10 +2522,124 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 18, "id": "353189e5", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + " " + ], + "text/plain": [ + "" + ] + }, + "execution_count": 18, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "(\n", " ggplot(penguins, aes(x=\"body_mass_g\", color=\"species\", fill=\"species\"))\n", @@ -833,10 +2677,121 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 19, "id": "e091e211", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + " " + ], + "text/plain": [ + "" + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "(ggplot(penguins, aes(x=\"island\", fill=\"species\")) + geom_bar())" ] @@ -853,10 +2808,122 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 20, "id": "7df8fb7a", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + " " + ], + "text/plain": [ + "" + ] + }, + "execution_count": 20, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "(ggplot(penguins, aes(x=\"island\", fill=\"species\")) + geom_bar(position=\"fill\"))" ] @@ -882,10 +2949,120 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 21, "id": "5066527d", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + " " + ], + "text/plain": [ + "" + ] + }, + "execution_count": 21, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "(ggplot(penguins, aes(x=\"flipper_length_mm\", y=\"body_mass_g\")) + geom_point())" ] @@ -904,10 +3081,124 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 22, "id": "8ca23d34", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + " " + ], + "text/plain": [ + "" + ] + }, + "execution_count": 22, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "(\n", " ggplot(penguins, aes(x=\"flipper_length_mm\", y=\"body_mass_g\"))\n", @@ -931,10 +3222,130 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 23, "id": "00dd36e3", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + " " + ], + "text/plain": [ + "" + ] + }, + "execution_count": 23, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "(\n", " ggplot(penguins, aes(x=\"flipper_length_mm\", y=\"body_mass_g\"))\n", @@ -1004,10 +3415,21 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 24, "id": "3410634b", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "'/Users/omagic/Documents/GitHub/python4DSpolars/lets-plot-images/penguin-plot.svg'" + ] + }, + "execution_count": 24, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "plotted_data = (\n", " ggplot(penguins, aes(x=\"flipper_length_mm\", y=\"body_mass_g\")) + geom_point()\n", @@ -1029,7 +3451,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 25, "id": "852afe51", "metadata": { "tags": [ @@ -1119,7 +3541,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.12.12" + "version": "3.12.13" }, "toc-showtags": true }, diff --git a/data/bake_sale.xlsx b/data/bake_sale.xlsx index e122900cc12c6d5ee3e5bbbecc52b1a202f53865..5de6f9a30425e19aa46b807093c289a907c83332 100644 GIT binary patch literal 6192 zcmZ`-1yoe+79F}fBnD6rq+3D+>Fy3`#$gy3I;5n#1(jw95fBulq(ea%NQ!g1ONcIsOtg1m}0Wu3=IGv!2$p%Q2&_8 zIm0|aFb_-p$1Wgub6y`O$MV!cSO-607t$|=bG!s^i(i8xyz>$$T3CsL8?DANd^-js zv@UlFeez|zV0)1v^z1A^vpkM1bMg^1%D#!~YZ8euw&r7}>3l2XyVdgq(2K&6VP71q zDfFtI69?fb?6$ZJ2I|+U9Mc5Br6H!w$7n#1Dibt)?Jlo;k^{$$n=tH9GkBBQylxI1 zkqD!tEQgT@NQ?e-nl{=h8AM`k-O2fau$_*T;@ii`b*5B*rTNi`F)I+dg&rD3D|trW zT#)%3SGKw^T~$fky26+~YqinH8U}vpYPUgP)E7yF>wkHw)EG2Uc%;|Rq`35mb@vhGD@ku&T zJm!g)2%e)Zu-?Kw)AL?-J9UQW;K!rLtO%nOIH&f|s=48K zvpp9az9Ty15BQT2 zc~7&QH53!?QH;=`Rv$;cU!-_DyFqQ7ouNPL^FIVRbn=tRBQNkPk4?SERXLK~Il>=K z9bkl2*Qz_J*w;ZWj>+&e7;xb@xgX-brsFhj_74ulVz6z57->_n;vTSl9jL>X@^(%f zyctzf*FeHZ6{q~RglP=<6*?fa+@Q=H>7zUUVktJ&4oI94L;86&m#t97{aHzxPG&Pq zhqrf6x~1F1^(ib=5BjkKia%D|T5NEU>61!Mehfd{;7smH9u)LGBD<)cU@bRR0KfL zHjYufZwb*%fFUd!Sv_NZail4eyydntmi}Z_R45zbd-%D=k)(@rl$OK@4V*{7omL@q zdFtjJ*pu$5`@Bkli1rE-@n-McLD0&K2q1w>dMZwI;4qrB);{m4SGc6TE~jv|hh2v0 z*V!QM$h-9fx(v4&`d1(o~b@3ZKgCn}hD`(c&rE!*# zJsN!qImGF6l)$a6zN~2}IfC%el+)Ed(OL!n<3)`3=uDiNHpqzl96~O5O5@l?cchb99Y24~FV*ahU9u&-B%}3}f!H*;nHp z&fZ#}WMqJM(|?X4em1eGaC92hEM@Zsywxvw&qBWY&e{ns!{E?!C3^wCJ&Um&c8&6C z5Rd)`0>djYp{1~hcb$P^PGVE}?q4sAiJUlSoOM{J^3Me9ORIT!?yD?ubGZ~>RR8Eh zsOQ^*`{Zbeq5FQ>aqEa>%RjRiO5?z)jLIS#s>(@zXYoJv9j}8xz5Aq1`&47I5Jm?U z)4L@f)bwS8v%-c95>*Vtmy(?!vwwGiSSCBy+tF?0Xd~W`M#oV=|Sm3p>7xlP)=( zbk?oL^fCUO9=Hn zxp6}qWMBk3s5^VK<3PMnuVD*f*F$LhU_P6&h3!`C>W%Ms-&&5u``&RAuYBe^UI?@W z`Qjcky2xHpZe$$YalDZdWFBkhLes5Up{h#JJM=;m+ygZJ!r`ejSr#bb&yOo-o3cp% zC+VjLCZ7pWq<=wOKaWF{w0KzCID*`NEq}c>pv0b^q7s1&z!x}H)MPIT`wAzBkY0yF zn5-NUZG)+egkXzxZ?Zh1dsNoOTm4PTNd`*!VjM#Tt)&ps$!}zx&Uqt`>-J8GDG5&L zoOV(>r3p6|9avASRf^uO0!@=)C3lT@#FtIhJ{3P_laEZPa4mix8+Nx&e(|-1-9)*U z{e5=5VgqePb51_nd*m`s_3y-*)ITU|5FhV<>m*{w3wdVQ88{UYusD!sY|RH3He}B+ z^K_LBAguMyF`)A06X&+d*Aj?JQX^$Q@vs*;G=9VPJ=XFJoM;h8`1F&1+}i$nZbUiZ zGm#d&7z#fKGopuo09XTVO6tm06=v8ab5GSwcyRY@j;jUjPxf#`R<){7tdaa?&)weI z4P>Y1;Rb<$-G7P>C{YH6E(n2Wx4==RUa+T-+Q0_Rd17s?*L7Z zp!b8GWEFWsc$46;@#1H}FlKpSy(G@QFJ=W?<9SM#PtTd#$ z3D{%8XrNg^WcvmWn_ZN5K7*sFoC!Sw@)-ayxy)2-NH*}G@Em6E+wwsUd{)RtX_(bm_bJ@k@O6LHs=l6EX zr#p1J#;|H3Kr=#wej+NxWs2aGl=9n${uYX1f{RGkTX!COc?az;u=DTguCd(y6iS!# zefL;UD4|wt^!3v~Q5f|B`4cqWdxx;sr<@60BnHn<-0M%vG$@;9L3d^gcZoRX($+2>MhsY{@c=q7nvvw|6W@*Q~jztTC%d3 z*6($C56oTeI~g9H4(W~CFd(URNm_lXL3L#346M^*i`v49%!+c~@Yflp2t zRyS&J1mu+Dm3yDoetTG95_MaO#GbsiTq5+c=vjDShe{Bwmhw~vImCSGbe?lopDY>= zeNP^?SWCr`2Aixsu%<^K?z7@e)4P({VyTf$2Df#$i9GUZHiS+Jyo?Sf2V#s*6_6d` z(b;jAm(%VuhQ5*SaqI*y4h^zyP>PGtNK7PV_3~1z;l8WIazhWif0HCNoaPxmth(Iv zvU01u>?kX$gsPg({J~YK2dfN!#i|E0Z~!m6rO_1x9%!}LvFtkZZH5h|?<3H@9=Gd) z@ivdlG{IeZ7*x*@O^tWotQhXnU3HNM7L2U>&{O%_275o>DK{ONsW95vMo02&&P{B4 zWl}ZOwTV98x!|JLsEZktAuH(d!OQ;W&X_K4_hEvFr3q_ij2n%b-3(C?BBfL2#?y8D zehuR$39q5Bj6+5W*%8|K$6amhEWEZ%OsIDZk0q}Ys4D3IOT%F5l>MVN8NsKr)dx+! zmz2%Em&?lw@190J$i46DvA1&vV}U3u?A`(bDRs4(sWp_V)f({QL`}$I<76)I{9xR& zg&!l0*njuS)45#9$GeB%C;nGo0dv$ouR3vr*~n*@yVructukc*v@!cSgv{z(lw#3HL}3|42Th5+hqn@`WE%t=@TbVdQw@r(E_iSA-X}~iEWyH- zMqdeOFf*LNfpRf6)q`m|coyYJjzq3)J-se-Me^BiUL%M21@9<(fthF4X5RHwkE@oLa zxhXU1bP<0^P0kc_?k7(rK7E_c=a&CNJx9^SB&KX$Hh3R;yZ-b==^B3rN0h4@9-P}G zlDz0)hf14ikHiB_U4E&#{YPMFY1^9aP@6`0F43yvo&me3Onh8>LkFGFtmX?nzi+QWg1s3J-^TwlUi5z8PDdbDxdWX&+j*?%*J3#} zMV7@fDv4Oo<3ue=vPd15xClpPV*a)7)Du+Nc!P1Lj<@n;X9Rak(<}|!*Nucho>tEk z>$g;Rc#g2{x8`gny%9d#qH1V`bHoz}fAsU#=n=fpHKv%v^va5mlm;X|SzE777Rj5c zIEo~o0lJyw-3Y1rq%X_uH;Hm@W6o?jP7SUyCySTFgeon@L=OGLWfcf8P~|7w{ik5z1^3ozO6cK;Z3$87Psm3yor|^bP*R4>NvK1n_xqSD z#tv$f+PkJoTn3b!W*l8-%+7Q&I~FiNmP7R=I} zgO&Il{G{0PGsU=dnbpM1!_~34=0#F%=?_P2GQ)1i1o&axV;2*fy*Qt9HQ0Y~$E%oB z3Cx<>1W+^-mo#Nrm}VH4uo8fO5ffJ5B`PqEo}E7@%w3@zK6?($Wx66no(iT zqXnbdwNu)DK=+ANb7V+8SzB_-a{t_M)j2~wD{8mes7ZtjwPQPHTWvRI7k55eXE)H# zwkJCU@v|dVIKIFyXy%!MXpq)oIQ5hU%bJ@uPz>KDt6@}j$UYCCYE6+gG*8&h7V^eU zD+_fjuzag&Zvf|3(ZP&?@Qzl-*qt*7cPhOZ=JTrNv zyH+wth>%BFdv<}lXJ=xwUDf*?+=w?z_oN^=b4(>`2ItNr+i_2oy0!9KPa+#m9}d@w^h-=#UAcV1$aW7=WIyeT)W^27MPq$pH> z6=LyWJs^uOllx9=$TEp#ak9|`I1dEAiE zc5(SdX}jj;PfAhNj-;pVtpj~=#aBVbJMu-;#`NG}W3k$AwE558bgO(xLg%V#5OVhn z5Uj1AtP$C~nZH|M_mLY+EB2jhsg~7mayt9UmNvyLt!h@L=O}1PH7DBO0F9Zpjhj`B z+eD6%I;#MdXCS6#`<$&Nu{>YDi|5$WNqE}lVm5?DYH(kTSOVQh#*?v2+DrMN$HDke z?`953uIO3PUSU0PJ6$>ZMd7)#$H5x37v@d(dVU11ECU@vejc(|3DHuk9P8~6#6p;w zV79l?Ei&r2zDpA%D_XPv9L&ZfB+LYXx<6~?3}Y;|%Z^bLx51f$(^L~F>$IGlL=XYs z^S16udPx0D0u#cB+nVwIm@21~uC|}hrN+3Z)^9A@sNUy)i?`IU$`!c7pmqAJ`M^n= zmnSK4qrG0#AD{k70dB(PeWw`D0%rYn>@LqlBG=sd{O=bC#rHh=N`r$kPkszW;_$}) z;MSURS?$i5w{$iML|JcW=#&`$-v~uz>aR~w@XzHRY|-lg*WJ5c;Q&BTa2zW8fAjLL z2VOV1eh22DUc>)ydHoO0H8bcpP7A8^P&j`RuivtD9p$U*!CNN9`6^ze}}m1q1QY5-=PvHKNj`y z>;3(8fa@p5ZvY@lPXA27Kb{}g0j~@4Z$M)5e~{@H`g>o0^8fy~>$B(DuAO_=bD#6MpL3rX>f+$i0000&%%u##eNRbcg9QLk-~j;C zm`|+LJU#qi9)7kaL0&Ll8__^_x9W^Bk8W|&K6LsK(De09rdj)3UMt|a@28YLR(5XD zM6GmwAn9VL2Z{4DdRT;R^~}-nd7c$RBBD{-jKWaiS-riq9g)R)M$>*?h@S1o;4C{D zGolsYRX3aueeVyaom+Pw5&2sWThhJ=`@#&vYkW5x^d>{Vrd*>0A&$EJA^Is!CkyF! zxSw=HqKg~Y0`wrlSXf`L0CCXbOZARjm-TITOs3>g`-HLj;C`lQC;xg@ijk9bWad$z z8eRe$H#nyg3)vwfpIb!BO-;F6Szhs2h>x!;Xw{^U#azyAGV^7Y3gb|&m4q{?fMErn zHlb0^;=WOnAJzr==X|5d5KeD**}g<9#bA>Itf+2c_8Hzy_23VwM?|ft9k)=SNt_>k z_8?ym?{)iUPp2VB>JxX+o^+n_p#fL$&yGks$uk8TctfmNgXdR4N|pA0mE-aEGU&HQ zqXeGl`ehQszp`QY5ZbkY$$}m>06>CSZGB*FzG9-6uhnTIIyfMTN9YJN#oFRXQ$!pX zsZ05Qx`JGsw_bURmpaX;+0&DPMyT=N3n)FuvxC90J?Cf1BLCO$$Z(ixp~7`R@I-_GDIg-aDl`P&hjTu zx)sgvh|d~^Hhi)>QBwuDEX1xu@z+3^8{BGho6SiM=N8?kk}G-RBVnSkj>TU-9ck*l$(>Jbmun@$|fRS)%`8sLQoSoI)KPeGxeu zETSOtGUG!_6y&x085YFKnzxC&0a7&RVga7S}%ICA0ZM)0@ytknKpLgkOC za34^+p{(%=kJWtN-yP-kV? zufETW^|o#4gS@>D$^T|ZA@cZFD&`grzySd0e=~H+jjuBd=I1N+&jPw6DKTBwph}$P z(Xq^~GI&7&FJ_k!n?4cLDh}#6Vv$whI!I}f^hI|b)6fi%k_4b8_Vb-4E5mGtj_j%+ z`MX}UUihu=jMwE?JZl0_*?E?H{sPuhDZ9VyPEA9P_^ub7z9D<$pgu0WlIR=4870E_ zt;EKjJXeu67bD~Zw6`;@!1kz}^)Ff=1dZ$5I7Dh5 zW}#dvKA@QH;J-?+jUCV*)+hkxe5PgI(sP7**cL^1t7_wX8J20%_^~reAc3c3lGnRP zrg5FY48jvo9o*M*y}Z>McTR84<7Ik~>(0AjKy&*+J#l*6hH%?%ZWYbS<|0Z}@F=6| zWN%?OWNDSU_gT>d=ahA9)XhM@C{|l_E_n;b8oIen>Z7x`_JUfOp>KhdEQLiYTz-mb zwyeeP-B#PHmW*<(T!yU5s48$m)7RQ>63UC*-y|8e?A5Bp}LDi6=sY8cbWW zt`(GcXVJnZLy-?1VZ26ul554JvGu1x(7^-Pp26Q~G#>tj*2Sce4^!ymf71BBC7zh7 zZ_p!78iUsQ5&I!vhTNU@*$i}9R}b>2cXqVc8{zC?%p<_^i?!?~!6(_d8VBXQO=0tX z&02#7C}X~wM0|L$XkyBP!k+WBr47cX`?pA5C-gCQjCZ|)w^FMV}4KA?1qaq0rM zwj5q|Xd_}x@V+jMxS8)pLi4tyr!)Emb}4-S81jHdPER|R^%spBV$shS5A?)|g4QX$ zCX1*luS8PmF7jbCo5-hF_2I4f90ArF#R0?#x!S;oY@i&AQzACuT^5kKu5ur2+A4`;5jaiLX-IdW z#PjbKeehI+{Sb{!KHghnr6D;ZcHhZRGawyWK6B-O*OnO1`vMcS6cD-1WOA$8&I4uH z^6If^uf5A!5~FBk)R(IYU~~ERofY|SzO*!CwQ&`5kf(yc;El7LNxfSBK=R4c6%l@- zh^)`up$kzV*keT&c6`NPGyVc=e{V<#ag%Ssw6x!CpQl|~Y!37s+3bcpr-n7Jif`M_ zJ9Qq70o&=-y=82&Vlwb~=4&S$VH2sw~8 z?1#8s00aZ0(5~JF1KopQV^ghi;m_y4#5#~6*#mjCI*xlg@b!1>Jl`c*u0L!;BgE{~ zxemSWK*;H6W}~Mgsg3I0v=vET?N>IY;R_o{D{_ep@FA%DyB;)1m?yN8##!;6Px+5E z_3GJC$rzJcizn_?EFr}R_~c~CENf*eyXJ&Gf2z>+fw6pwsJ)FA)lNs2pyLj!vo7@HIt%HqilS6)B zU$QCcvkekT}EKVZUGh(UEOS{bZy%eFxVr zn`TFI61Bp}>)dMTxsvGn$Y~F&sg>TG*~x_KGsAbATZ)h3qN4cYNT_6hgymB@w+3r0 z`6D~O?rp2)rC4c4hur4+pD0SCmYpQMgKuy)t4x*zp zdqWd`o$7?}M05Bx!c||P8{A}=z-1SFPq9=m}}^GS|+K4(u7 zFZ7Jdscr~YzPHLfJdaYDG39&^LaK`=GjAqxZgDgN3s&tl!K>Cpj49#SsEX%KNy<}_ z(nUc(FTfk>zu1nQ)+|40zuk-yK7j$Y&h0AdBmB=r$|O-0;d?`-iwn8tzC(eDVGekm z6YG9$iBui6y>Eq1%&ONt5W0Z@@yBL|Xt#j3BUFpl%g=;PE?y>rA!3YML<_t z;)by$*0ypf7qJ8H6##TIs$u3(w#rwxp6px~Jp+7*ctV?)n#*%HA>^IEH<*T|410&S zYd-(sP_~eDg)n!BD)n@e5>(uzmuhcsE?KPbx+>;U3f(z4=@RQFXJb`9nNtZQ>ko&X zET4;ATpZmzK%}E@E1nG|WXoS17b=}^Bg95jUg8vn{X9u7OD+uCKP^;Nlz7*S$GWkD zl^CZkGe9|yqMUW4P>ES~qlu)Lq{0JYxd4t#EU*{z>>a!PwdR5XaKQlTosEOr+K5O) zJz6P1O{@_Sd`NE7%N~q+AgC+j#7N+{bm!H|5l9I<@V$W0-7{B(_a^m_hn9}(cTk)( zP)m*b;KbSt7G{;M%R|b2jNk7RAJ4 z!qv0$j`I$5$CHreS6PVdrXl=$o4($CVy~Ff`ZH>Aq#RabhOO#xU)~6r9lpjeafjCc z0vGPh^a_7BCq;ED1Y#OUv5ea*n-isStqLceN_2_a&U9}yDV-)^?5OROfnwZZd(Wt2 zVSFrxM0!pzrL{^K1Bh`MJmY7*IemMw#2vh-H!rXH@qG*Ja+`0Qu{)2>k*i!CybW@r zDAW8{cy*=3J<&cbVu&9rRKi{5DV{fS&(wZU=*ekG*II+0YbJiSIydAMi*Qe#;9IFM ze=4!VnqZHO*nQBu@x+j$yG$U#XO+wEsVOc~MQKw}J5e`|(+t2}KhX|ok zWm?!x&cP}Xkg}V_>~{@l?{`Gx+(Z=C_-bktT9yl?{|L|m5;2ubBlWhM4Q0{1(OeRr z28!lNHyH7MHV5u`3-K0C_D8;CKNhZ!tCR}aHhQz`D44fvWG!QT>v$$~ir4EzWt|S} zA^5N<&y(OG+wn8Ns}eRyZP{ z$!VuCptW*)nyRreQm@JCbS0h~9lpgP%del4c8V-H_nO)o{UrfVI!g1~ykbVn^9}*^ zn8Tk3(+F_=ZUX+xYW?=5e+<_@t~7D1mbhD-0{@H64omkoZ*^oGc$*VqUP10vijbb= z<%qxYdGQhO8Z$FG^a2fk{s77qoI*a&NF+D*DAlNGx-{F%1^Vpz8;XerSt~UvM`FXw zO;hhuN(aGc)wl1`?nFd!Xa-PlmWT6_ev+3ESg$G+FP%8jSYD}-^{OJLn$?085$RdNTrJFDB};`KVG9=RSahzga{W1T{+gksg&h> znEe`Idg6FBu-St za>F#S;8djAle3H0cVooemquK1W=On@*adDptr#B-RujMPMv&IZEo(v{Gq1Cu=F&WC zc%jOhwvdTv%A6_dX_r47dg4W3muyz8uYR<+~16$_M-xjlf@Bl2<1Z9Mko5@y`sTqFBGcaOQEM_D8Nt`3!CO2EBNmt zQcSk~c{GJzF8?wxy$W!3s`on_0B8zN!c@S2&G@bcUY&jY2`t9kjQ{^h*j1dX6QVyj zJ(%*t;QW_}{gJP$C|3s*e^4G{y59ew{O`EpYVcK;{U;ck;NLy>RfMZP^A7?9lPAp2 z|39wtD#BF@_Xk0M=r_Xu8M>>XS9|+Ep}LqkE#}9scKTNVuAUr!07NmO`gaQc^&Ghh xcvY7F0CH0Oi%4G$zp6if!UHjX1N@)H|3{4sb@A{ozd?$5Qe#SngZA>>{{X;;djbFe diff --git a/databases.ipynb b/databases.ipynb index 661cfd1..3c786b9 100644 --- a/databases.ipynb +++ b/databases.ipynb @@ -22,7 +22,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "id": "9e54fbf6", "metadata": {}, "outputs": [], @@ -92,10 +92,30 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 2, "id": "970d2c19", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "[(1, 'AC/DC'),\n", + " (2, 'Accept'),\n", + " (3, 'Aerosmith'),\n", + " (4, 'Alanis Morissette'),\n", + " (5, 'Alice In Chains'),\n", + " (6, 'Antônio Carlos Jobim'),\n", + " (7, 'Apocalyptica'),\n", + " (8, 'Audioslave'),\n", + " (9, 'BackBeat'),\n", + " (10, 'Billy Cobham')]" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "import sqlite3\n", "\n", @@ -116,10 +136,47 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 3, "id": "c5871b6e", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "shape: (10, 2)
column_0column_1
i64str
1"AC/DC"
2"Accept"
3"Aerosmith"
4"Alanis Morissette"
5"Alice In Chains"
6"Antônio Carlos Jobim"
7"Apocalyptica"
8"Audioslave"
9"BackBeat"
10"Billy Cobham"
" + ], + "text/plain": [ + "shape: (10, 2)\n", + "┌──────────┬──────────────────────┐\n", + "│ column_0 ┆ column_1 │\n", + "│ --- ┆ --- │\n", + "│ i64 ┆ str │\n", + "╞══════════╪══════════════════════╡\n", + "│ 1 ┆ AC/DC │\n", + "│ 2 ┆ Accept │\n", + "│ 3 ┆ Aerosmith │\n", + "│ 4 ┆ Alanis Morissette │\n", + "│ 5 ┆ Alice In Chains │\n", + "│ 6 ┆ Antônio Carlos Jobim │\n", + "│ 7 ┆ Apocalyptica │\n", + "│ 8 ┆ Audioslave │\n", + "│ 9 ┆ BackBeat │\n", + "│ 10 ┆ Billy Cobham │\n", + "└──────────┴──────────────────────┘" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "import polars as pl\n", "\n", @@ -138,10 +195,21 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 4, "id": "62791eab", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "['ArtistId', 'Name']" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "[i[0] for i in cursor.description]" ] @@ -158,7 +226,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 5, "id": "0074d4d3", "metadata": {}, "outputs": [], @@ -181,7 +249,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 6, "id": "8020a73a", "metadata": {}, "outputs": [], @@ -202,10 +270,21 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 7, "id": "8a8354b2", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "[('US', 1.0, 3), ('UK', 0.6, 2), ('France', 0.8, 1)]" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "con_new.execute(\"SELECT * FROM test\").fetchall()" ] @@ -220,7 +299,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 8, "id": "927e804d", "metadata": { "tags": [ @@ -278,10 +357,30 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 9, "id": "be55f957", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "[('Bodies', 180035),\n", + " ('Vivo Isolado Do Mundo', 180035),\n", + " ('Elvis Ate America', 180166),\n", + " ('Remote Control', 180297),\n", + " ('Promises', 180401),\n", + " ('Emergency', 180427),\n", + " ('À Vontade (Live Mix)', 180636),\n", + " ('Hyperconectividade', 180636),\n", + " ('On Fire', 180636),\n", + " ('Fascinação', 180793)]" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "sql_query = \"SELECT name, milliseconds FROM track WHERE milliseconds > 1e3*3*60 ORDER BY milliseconds ASC LIMIT 10;\"\n", "cursor = con.execute(sql_query)\n", @@ -299,10 +398,25 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 10, "id": "3f894066", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "[(340, 0.863),\n", + " (345, 1.11065),\n", + " (318, 1.6882166666666667),\n", + " (314, 1.69135),\n", + " (328, 1.8377666666666668)]" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "sql_groupby = \"SELECT albumid, AVG(milliseconds)/1e3/60 FROM track GROUP BY albumid ORDER BY AVG(milliseconds) ASC LIMIT 5;\"\n", "cursor = con.execute(sql_groupby)\n", @@ -324,10 +438,25 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 11, "id": "9824b70a", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "[(340, 0.863, \"Liszt - 12 Études D'Execution Transcendante\"),\n", + " (345, 1.11065, \"Monteverdi: L'Orfeo\"),\n", + " (318, 1.6882166666666667, 'SCRIABIN: Vers la flamme'),\n", + " (314, 1.69135, 'English Renaissance'),\n", + " (328, 1.8377666666666668, 'Charpentier: Divertissements, Airs & Concerts')]" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "sql_join = \"SELECT track.albumid, AVG(milliseconds)/1e3/60, album.title FROM track INNER JOIN album ON (track.albumid = album.albumid) GROUP BY album.albumid ORDER BY AVG(milliseconds) ASC LIMIT 5;\"\n", "cursor = con.execute(sql_join)\n", @@ -411,7 +540,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 12, "id": "f5c83909", "metadata": {}, "outputs": [], @@ -448,10 +577,45 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 13, "id": "fc89c429", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
┏━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━┳━━━━━━━━━━┳━━━━━━━━━━━━━━━━━┓\n",
+       "┃ TrackId  Name                                     AlbumId  MediaTypeId  GenreId  Composer                                                                Milliseconds  Bytes     UnitPrice       ┃\n",
+       "┡━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━╇━━━━━━━━━━━━━╇━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━╇━━━━━━━━━━╇━━━━━━━━━━━━━━━━━┩\n",
+       "│ !int64!stringint64!int64int64string!int64int64!decimal(10, 2) │\n",
+       "├─────────┼─────────────────────────────────────────┼─────────┼─────────────┼─────────┼────────────────────────────────────────────────────────────────────────┼──────────────┼──────────┼─────────────────┤\n",
+       "│       1For Those About To Rock (We Salute You)111Angus Young, Malcolm Young, Brian Johnson                             343719111703340.99 │\n",
+       "│       2Balls to the Wall                      221NULL34256255104240.99 │\n",
+       "│       3Fast As a Shark                        321F. Baltes, S. Kaufman, U. Dirkscneider & W. Hoffman                   23061939909940.99 │\n",
+       "│       4Restless and Wild                      321F. Baltes, R.A. Smith-Diesel, S. Kaufman, U. Dirkscneider & W. Hoffman25205143317790.99 │\n",
+       "│       5Princess of the Dawn                   321Deaffy & R.A. Smith-Diesel                                            37541862905210.99 │\n",
+       "└─────────┴─────────────────────────────────────────┴─────────┴─────────────┴─────────┴────────────────────────────────────────────────────────────────────────┴──────────────┴──────────┴─────────────────┘\n",
+       "
\n" + ], + "text/plain": [ + "┏━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━┳━━━━━━━━━━┳━━━━━━━━━━━━━━━━━┓\n", + "┃\u001b[1m \u001b[0m\u001b[1mTrackId\u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mName\u001b[0m\u001b[1m \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mAlbumId\u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mMediaTypeId\u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mGenreId\u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mComposer\u001b[0m\u001b[1m \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mMilliseconds\u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mBytes\u001b[0m\u001b[1m \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mUnitPrice\u001b[0m\u001b[1m \u001b[0m\u001b[1m \u001b[0m┃\n", + "┡━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━╇━━━━━━━━━━━━━╇━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━╇━━━━━━━━━━╇━━━━━━━━━━━━━━━━━┩\n", + "│ \u001b[2m!int64\u001b[0m │ \u001b[2m!string\u001b[0m │ \u001b[2mint64\u001b[0m │ \u001b[2m!int64\u001b[0m │ \u001b[2mint64\u001b[0m │ \u001b[2mstring\u001b[0m │ \u001b[2m!int64\u001b[0m │ \u001b[2mint64\u001b[0m │ \u001b[2m!decimal(10, 2)\u001b[0m │\n", + "├─────────┼─────────────────────────────────────────┼─────────┼─────────────┼─────────┼────────────────────────────────────────────────────────────────────────┼──────────────┼──────────┼─────────────────┤\n", + "│ \u001b[1;36m1\u001b[0m │ \u001b[32mFor Those About To Rock (We Salute You)\u001b[0m │ \u001b[1;36m1\u001b[0m │ \u001b[1;36m1\u001b[0m │ \u001b[1;36m1\u001b[0m │ \u001b[32mAngus Young, Malcolm Young, Brian Johnson \u001b[0m │ \u001b[1;36m343719\u001b[0m │ \u001b[1;36m11170334\u001b[0m │ \u001b[1;36m0.99\u001b[0m │\n", + "│ \u001b[1;36m2\u001b[0m │ \u001b[32mBalls to the Wall \u001b[0m │ \u001b[1;36m2\u001b[0m │ \u001b[1;36m2\u001b[0m │ \u001b[1;36m1\u001b[0m │ \u001b[2mNULL\u001b[0m │ \u001b[1;36m342562\u001b[0m │ \u001b[1;36m5510424\u001b[0m │ \u001b[1;36m0.99\u001b[0m │\n", + "│ \u001b[1;36m3\u001b[0m │ \u001b[32mFast As a Shark \u001b[0m │ \u001b[1;36m3\u001b[0m │ \u001b[1;36m2\u001b[0m │ \u001b[1;36m1\u001b[0m │ \u001b[32mF. Baltes, S. Kaufman, U. Dirkscneider & W. Hoffman \u001b[0m │ \u001b[1;36m230619\u001b[0m │ \u001b[1;36m3990994\u001b[0m │ \u001b[1;36m0.99\u001b[0m │\n", + "│ \u001b[1;36m4\u001b[0m │ \u001b[32mRestless and Wild \u001b[0m │ \u001b[1;36m3\u001b[0m │ \u001b[1;36m2\u001b[0m │ \u001b[1;36m1\u001b[0m │ \u001b[32mF. Baltes, R.A. Smith-Diesel, S. Kaufman, U. Dirkscneider & W. Hoffman\u001b[0m │ \u001b[1;36m252051\u001b[0m │ \u001b[1;36m4331779\u001b[0m │ \u001b[1;36m0.99\u001b[0m │\n", + "│ \u001b[1;36m5\u001b[0m │ \u001b[32mPrincess of the Dawn \u001b[0m │ \u001b[1;36m3\u001b[0m │ \u001b[1;36m2\u001b[0m │ \u001b[1;36m1\u001b[0m │ \u001b[32mDeaffy & R.A. Smith-Diesel \u001b[0m │ \u001b[1;36m375418\u001b[0m │ \u001b[1;36m6290521\u001b[0m │ \u001b[1;36m0.99\u001b[0m │\n", + "└─────────┴─────────────────────────────────────────┴─────────┴─────────────┴─────────┴────────────────────────────────────────────────────────────────────────┴──────────────┴──────────┴─────────────────┘" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "import ibis\n", "\n", @@ -471,10 +635,45 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 14, "id": "5e5a482b", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
┏━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━┳━━━━━━━━━┳━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━┓\n",
+       "┃ TrackId  Name                                                      AlbumId  MediaTypeId  GenreId  Composer            Milliseconds  Bytes    UnitPrice        mean_mins_track ┃\n",
+       "┡━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━╇━━━━━━━━━━━━━╇━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━╇━━━━━━━━━╇━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━┩\n",
+       "│ !int64!stringint64!int64int64string!int64int64!decimal(10, 2)float64         │\n",
+       "├─────────┼──────────────────────────────────────────────────────────┼─────────┼─────────────┼─────────┼────────────────────┼──────────────┼─────────┼─────────────────┼─────────────────┤\n",
+       "│    3496Étude 1, In C Major - Preludio (Presto) - Liszt         340424NULL5178022296170.990.863000 │\n",
+       "│    3501L'orfeo, Act 3, Sinfonia (Orchestra)                    345224Claudio Monteverdi6663911890620.991.110650 │\n",
+       "│    3452SCRIABIN: Prelude in B Major, Op. 11, No. 11            318424NULL10129338195350.991.688217 │\n",
+       "│    3448Lamentations of Jeremiah, First Set \\ Incipit Lamentatio314224Thomas Tallis     6919412080800.991.691350 │\n",
+       "│    3492Sing Joyfully                                           314224William Byrd      13376822564840.991.691350 │\n",
+       "└─────────┴──────────────────────────────────────────────────────────┴─────────┴─────────────┴─────────┴────────────────────┴──────────────┴─────────┴─────────────────┴─────────────────┘\n",
+       "
\n" + ], + "text/plain": [ + "┏━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━┳━━━━━━━━━┳━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━┓\n", + "┃\u001b[1m \u001b[0m\u001b[1mTrackId\u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mName\u001b[0m\u001b[1m \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mAlbumId\u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mMediaTypeId\u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mGenreId\u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mComposer\u001b[0m\u001b[1m \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mMilliseconds\u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mBytes\u001b[0m\u001b[1m \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mUnitPrice\u001b[0m\u001b[1m \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mmean_mins_track\u001b[0m\u001b[1m \u001b[0m┃\n", + "┡━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━╇━━━━━━━━━━━━━╇━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━╇━━━━━━━━━╇━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━┩\n", + "│ \u001b[2m!int64\u001b[0m │ \u001b[2m!string\u001b[0m │ \u001b[2mint64\u001b[0m │ \u001b[2m!int64\u001b[0m │ \u001b[2mint64\u001b[0m │ \u001b[2mstring\u001b[0m │ \u001b[2m!int64\u001b[0m │ \u001b[2mint64\u001b[0m │ \u001b[2m!decimal(10, 2)\u001b[0m │ \u001b[2mfloat64\u001b[0m │\n", + "├─────────┼──────────────────────────────────────────────────────────┼─────────┼─────────────┼─────────┼────────────────────┼──────────────┼─────────┼─────────────────┼─────────────────┤\n", + "│ \u001b[1;36m3496\u001b[0m │ \u001b[32mÉtude 1, In C Major - Preludio (Presto) - Liszt \u001b[0m │ \u001b[1;36m340\u001b[0m │ \u001b[1;36m4\u001b[0m │ \u001b[1;36m24\u001b[0m │ \u001b[2mNULL\u001b[0m │ \u001b[1;36m51780\u001b[0m │ \u001b[1;36m2229617\u001b[0m │ \u001b[1;36m0.99\u001b[0m │ \u001b[1;36m0.863000\u001b[0m │\n", + "│ \u001b[1;36m3501\u001b[0m │ \u001b[32mL'orfeo, Act 3, Sinfonia (Orchestra) \u001b[0m │ \u001b[1;36m345\u001b[0m │ \u001b[1;36m2\u001b[0m │ \u001b[1;36m24\u001b[0m │ \u001b[32mClaudio Monteverdi\u001b[0m │ \u001b[1;36m66639\u001b[0m │ \u001b[1;36m1189062\u001b[0m │ \u001b[1;36m0.99\u001b[0m │ \u001b[1;36m1.110650\u001b[0m │\n", + "│ \u001b[1;36m3452\u001b[0m │ \u001b[32mSCRIABIN: Prelude in B Major, Op. 11, No. 11 \u001b[0m │ \u001b[1;36m318\u001b[0m │ \u001b[1;36m4\u001b[0m │ \u001b[1;36m24\u001b[0m │ \u001b[2mNULL\u001b[0m │ \u001b[1;36m101293\u001b[0m │ \u001b[1;36m3819535\u001b[0m │ \u001b[1;36m0.99\u001b[0m │ \u001b[1;36m1.688217\u001b[0m │\n", + "│ \u001b[1;36m3448\u001b[0m │ \u001b[32mLamentations of Jeremiah, First Set \\ Incipit Lamentatio\u001b[0m │ \u001b[1;36m314\u001b[0m │ \u001b[1;36m2\u001b[0m │ \u001b[1;36m24\u001b[0m │ \u001b[32mThomas Tallis \u001b[0m │ \u001b[1;36m69194\u001b[0m │ \u001b[1;36m1208080\u001b[0m │ \u001b[1;36m0.99\u001b[0m │ \u001b[1;36m1.691350\u001b[0m │\n", + "│ \u001b[1;36m3492\u001b[0m │ \u001b[32mSing Joyfully \u001b[0m │ \u001b[1;36m314\u001b[0m │ \u001b[1;36m2\u001b[0m │ \u001b[1;36m24\u001b[0m │ \u001b[32mWilliam Byrd \u001b[0m │ \u001b[1;36m133768\u001b[0m │ \u001b[1;36m2256484\u001b[0m │ \u001b[1;36m0.99\u001b[0m │ \u001b[1;36m1.691350\u001b[0m │\n", + "└─────────┴──────────────────────────────────────────────────────────┴─────────┴─────────────┴─────────┴────────────────────┴──────────────┴─────────┴─────────────────┴─────────────────┘" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "track.group_by(\"AlbumId\").mutate(\n", " mean_mins_track=track.Milliseconds.mean() / 1e3 / 60\n", @@ -483,10 +682,57 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 15, "id": "39c80365", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
┏━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━┳━━━━━━━━━━┳━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━┓\n",
+       "┃ TrackId  Name                                     AlbumId  MediaTypeId  GenreId  Composer                                   Milliseconds  Bytes     UnitPrice        mean_mins_track ┃\n",
+       "┡━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━╇━━━━━━━━━━━━━╇━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━╇━━━━━━━━━━╇━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━┩\n",
+       "│ !int64!stringint64!int64int64string!int64int64!decimal(10, 2)float64         │\n",
+       "├─────────┼─────────────────────────────────────────┼─────────┼─────────────┼─────────┼───────────────────────────────────────────┼──────────────┼──────────┼─────────────────┼─────────────────┤\n",
+       "│       1For Those About To Rock (We Salute You)111Angus Young, Malcolm Young, Brian Johnson343719111703340.994.000692 │\n",
+       "│       6Put The Finger On You                  111Angus Young, Malcolm Young, Brian Johnson20566267134510.994.000692 │\n",
+       "│       7Let's Get It Up                        111Angus Young, Malcolm Young, Brian Johnson23392676365610.994.000692 │\n",
+       "│       8Inject The Venom                       111Angus Young, Malcolm Young, Brian Johnson21083468528600.994.000692 │\n",
+       "│       9Snowballed                             111Angus Young, Malcolm Young, Brian Johnson20310265994240.994.000692 │\n",
+       "│      10Evil Walks                             111Angus Young, Malcolm Young, Brian Johnson26349786112450.994.000692 │\n",
+       "│      11C.O.D.                                 111Angus Young, Malcolm Young, Brian Johnson19983665663140.994.000692 │\n",
+       "│      12Breaking The Rules                     111Angus Young, Malcolm Young, Brian Johnson26328885968400.994.000692 │\n",
+       "│      13Night Of The Long Knives               111Angus Young, Malcolm Young, Brian Johnson20568867063470.994.000692 │\n",
+       "│      14Spellbound                             111Angus Young, Malcolm Young, Brian Johnson27086388170380.994.000692 │\n",
+       "│        │\n",
+       "└─────────┴─────────────────────────────────────────┴─────────┴─────────────┴─────────┴───────────────────────────────────────────┴──────────────┴──────────┴─────────────────┴─────────────────┘\n",
+       "
\n" + ], + "text/plain": [ + "┏━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━┳━━━━━━━━━━┳━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━┓\n", + "┃\u001b[1m \u001b[0m\u001b[1mTrackId\u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mName\u001b[0m\u001b[1m \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mAlbumId\u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mMediaTypeId\u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mGenreId\u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mComposer\u001b[0m\u001b[1m \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mMilliseconds\u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mBytes\u001b[0m\u001b[1m \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mUnitPrice\u001b[0m\u001b[1m \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mmean_mins_track\u001b[0m\u001b[1m \u001b[0m┃\n", + "┡━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━╇━━━━━━━━━━━━━╇━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━╇━━━━━━━━━━╇━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━┩\n", + "│ \u001b[2m!int64\u001b[0m │ \u001b[2m!string\u001b[0m │ \u001b[2mint64\u001b[0m │ \u001b[2m!int64\u001b[0m │ \u001b[2mint64\u001b[0m │ \u001b[2mstring\u001b[0m │ \u001b[2m!int64\u001b[0m │ \u001b[2mint64\u001b[0m │ \u001b[2m!decimal(10, 2)\u001b[0m │ \u001b[2mfloat64\u001b[0m │\n", + "├─────────┼─────────────────────────────────────────┼─────────┼─────────────┼─────────┼───────────────────────────────────────────┼──────────────┼──────────┼─────────────────┼─────────────────┤\n", + "│ \u001b[1;36m1\u001b[0m │ \u001b[32mFor Those About To Rock (We Salute You)\u001b[0m │ \u001b[1;36m1\u001b[0m │ \u001b[1;36m1\u001b[0m │ \u001b[1;36m1\u001b[0m │ \u001b[32mAngus Young, Malcolm Young, Brian Johnson\u001b[0m │ \u001b[1;36m343719\u001b[0m │ \u001b[1;36m11170334\u001b[0m │ \u001b[1;36m0.99\u001b[0m │ \u001b[1;36m4.000692\u001b[0m │\n", + "│ \u001b[1;36m6\u001b[0m │ \u001b[32mPut The Finger On You \u001b[0m │ \u001b[1;36m1\u001b[0m │ \u001b[1;36m1\u001b[0m │ \u001b[1;36m1\u001b[0m │ \u001b[32mAngus Young, Malcolm Young, Brian Johnson\u001b[0m │ \u001b[1;36m205662\u001b[0m │ \u001b[1;36m6713451\u001b[0m │ \u001b[1;36m0.99\u001b[0m │ \u001b[1;36m4.000692\u001b[0m │\n", + "│ \u001b[1;36m7\u001b[0m │ \u001b[32mLet's Get It Up \u001b[0m │ \u001b[1;36m1\u001b[0m │ \u001b[1;36m1\u001b[0m │ \u001b[1;36m1\u001b[0m │ \u001b[32mAngus Young, Malcolm Young, Brian Johnson\u001b[0m │ \u001b[1;36m233926\u001b[0m │ \u001b[1;36m7636561\u001b[0m │ \u001b[1;36m0.99\u001b[0m │ \u001b[1;36m4.000692\u001b[0m │\n", + "│ \u001b[1;36m8\u001b[0m │ \u001b[32mInject The Venom \u001b[0m │ \u001b[1;36m1\u001b[0m │ \u001b[1;36m1\u001b[0m │ \u001b[1;36m1\u001b[0m │ \u001b[32mAngus Young, Malcolm Young, Brian Johnson\u001b[0m │ \u001b[1;36m210834\u001b[0m │ \u001b[1;36m6852860\u001b[0m │ \u001b[1;36m0.99\u001b[0m │ \u001b[1;36m4.000692\u001b[0m │\n", + "│ \u001b[1;36m9\u001b[0m │ \u001b[32mSnowballed \u001b[0m │ \u001b[1;36m1\u001b[0m │ \u001b[1;36m1\u001b[0m │ \u001b[1;36m1\u001b[0m │ \u001b[32mAngus Young, Malcolm Young, Brian Johnson\u001b[0m │ \u001b[1;36m203102\u001b[0m │ \u001b[1;36m6599424\u001b[0m │ \u001b[1;36m0.99\u001b[0m │ \u001b[1;36m4.000692\u001b[0m │\n", + "│ \u001b[1;36m10\u001b[0m │ \u001b[32mEvil Walks \u001b[0m │ \u001b[1;36m1\u001b[0m │ \u001b[1;36m1\u001b[0m │ \u001b[1;36m1\u001b[0m │ \u001b[32mAngus Young, Malcolm Young, Brian Johnson\u001b[0m │ \u001b[1;36m263497\u001b[0m │ \u001b[1;36m8611245\u001b[0m │ \u001b[1;36m0.99\u001b[0m │ \u001b[1;36m4.000692\u001b[0m │\n", + "│ \u001b[1;36m11\u001b[0m │ \u001b[32mC.O.D. \u001b[0m │ \u001b[1;36m1\u001b[0m │ \u001b[1;36m1\u001b[0m │ \u001b[1;36m1\u001b[0m │ \u001b[32mAngus Young, Malcolm Young, Brian Johnson\u001b[0m │ \u001b[1;36m199836\u001b[0m │ \u001b[1;36m6566314\u001b[0m │ \u001b[1;36m0.99\u001b[0m │ \u001b[1;36m4.000692\u001b[0m │\n", + "│ \u001b[1;36m12\u001b[0m │ \u001b[32mBreaking The Rules \u001b[0m │ \u001b[1;36m1\u001b[0m │ \u001b[1;36m1\u001b[0m │ \u001b[1;36m1\u001b[0m │ \u001b[32mAngus Young, Malcolm Young, Brian Johnson\u001b[0m │ \u001b[1;36m263288\u001b[0m │ \u001b[1;36m8596840\u001b[0m │ \u001b[1;36m0.99\u001b[0m │ \u001b[1;36m4.000692\u001b[0m │\n", + "│ \u001b[1;36m13\u001b[0m │ \u001b[32mNight Of The Long Knives \u001b[0m │ \u001b[1;36m1\u001b[0m │ \u001b[1;36m1\u001b[0m │ \u001b[1;36m1\u001b[0m │ \u001b[32mAngus Young, Malcolm Young, Brian Johnson\u001b[0m │ \u001b[1;36m205688\u001b[0m │ \u001b[1;36m6706347\u001b[0m │ \u001b[1;36m0.99\u001b[0m │ \u001b[1;36m4.000692\u001b[0m │\n", + "│ \u001b[1;36m14\u001b[0m │ \u001b[32mSpellbound \u001b[0m │ \u001b[1;36m1\u001b[0m │ \u001b[1;36m1\u001b[0m │ \u001b[1;36m1\u001b[0m │ \u001b[32mAngus Young, Malcolm Young, Brian Johnson\u001b[0m │ \u001b[1;36m270863\u001b[0m │ \u001b[1;36m8817038\u001b[0m │ \u001b[1;36m0.99\u001b[0m │ \u001b[1;36m4.000692\u001b[0m │\n", + "│ \u001b[2m…\u001b[0m │ \u001b[2m…\u001b[0m │ \u001b[2m…\u001b[0m │ \u001b[2m…\u001b[0m │ \u001b[2m…\u001b[0m │ \u001b[2m…\u001b[0m │ \u001b[2m…\u001b[0m │ \u001b[2m…\u001b[0m │ \u001b[2m…\u001b[0m │ \u001b[2m…\u001b[0m │\n", + "└─────────┴─────────────────────────────────────────┴─────────┴─────────────┴─────────┴───────────────────────────────────────────┴──────────────┴──────────┴─────────────────┴─────────────────┘" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "track.group_by(\"AlbumId\").mutate(mean_mins_track=track.Milliseconds.mean() / 1e3 / 60)" ] @@ -501,10 +747,57 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 16, "id": "02e6602a", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
┏━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━┳━━━━━━━━━━┳━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━┓\n",
+       "┃ TrackId  Name                                     AlbumId  MediaTypeId  GenreId  Composer                                                                Milliseconds  Bytes     UnitPrice       Name_right ┃\n",
+       "┡━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━╇━━━━━━━━━━━━━╇━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━╇━━━━━━━━━━╇━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━┩\n",
+       "│ int64stringint64int64int64stringint64int64decimal(10, 2)string     │\n",
+       "├─────────┼─────────────────────────────────────────┼─────────┼─────────────┼─────────┼────────────────────────────────────────────────────────────────────────┼──────────────┼──────────┼────────────────┼────────────┤\n",
+       "│       1For Those About To Rock (We Salute You)111Angus Young, Malcolm Young, Brian Johnson                             343719111703340.99Rock       │\n",
+       "│       2Balls to the Wall                      221NULL34256255104240.99Rock       │\n",
+       "│       3Fast As a Shark                        321F. Baltes, S. Kaufman, U. Dirkscneider & W. Hoffman                   23061939909940.99Rock       │\n",
+       "│       4Restless and Wild                      321F. Baltes, R.A. Smith-Diesel, S. Kaufman, U. Dirkscneider & W. Hoffman25205143317790.99Rock       │\n",
+       "│       5Princess of the Dawn                   321Deaffy & R.A. Smith-Diesel                                            37541862905210.99Rock       │\n",
+       "│       6Put The Finger On You                  111Angus Young, Malcolm Young, Brian Johnson                             20566267134510.99Rock       │\n",
+       "│       7Let's Get It Up                        111Angus Young, Malcolm Young, Brian Johnson                             23392676365610.99Rock       │\n",
+       "│       8Inject The Venom                       111Angus Young, Malcolm Young, Brian Johnson                             21083468528600.99Rock       │\n",
+       "│       9Snowballed                             111Angus Young, Malcolm Young, Brian Johnson                             20310265994240.99Rock       │\n",
+       "│      10Evil Walks                             111Angus Young, Malcolm Young, Brian Johnson                             26349786112450.99Rock       │\n",
+       "│                 │\n",
+       "└─────────┴─────────────────────────────────────────┴─────────┴─────────────┴─────────┴────────────────────────────────────────────────────────────────────────┴──────────────┴──────────┴────────────────┴────────────┘\n",
+       "
\n" + ], + "text/plain": [ + "┏━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━┳━━━━━━━━━━┳━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━┓\n", + "┃\u001b[1m \u001b[0m\u001b[1mTrackId\u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mName\u001b[0m\u001b[1m \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mAlbumId\u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mMediaTypeId\u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mGenreId\u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mComposer\u001b[0m\u001b[1m \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mMilliseconds\u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mBytes\u001b[0m\u001b[1m \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mUnitPrice\u001b[0m\u001b[1m \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mName_right\u001b[0m\u001b[1m \u001b[0m┃\n", + "┡━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━╇━━━━━━━━━━━━━╇━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━╇━━━━━━━━━━╇━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━┩\n", + "│ \u001b[2mint64\u001b[0m │ \u001b[2mstring\u001b[0m │ \u001b[2mint64\u001b[0m │ \u001b[2mint64\u001b[0m │ \u001b[2mint64\u001b[0m │ \u001b[2mstring\u001b[0m │ \u001b[2mint64\u001b[0m │ \u001b[2mint64\u001b[0m │ \u001b[2mdecimal(10, 2)\u001b[0m │ \u001b[2mstring\u001b[0m │\n", + "├─────────┼─────────────────────────────────────────┼─────────┼─────────────┼─────────┼────────────────────────────────────────────────────────────────────────┼──────────────┼──────────┼────────────────┼────────────┤\n", + "│ \u001b[1;36m1\u001b[0m │ \u001b[32mFor Those About To Rock (We Salute You)\u001b[0m │ \u001b[1;36m1\u001b[0m │ \u001b[1;36m1\u001b[0m │ \u001b[1;36m1\u001b[0m │ \u001b[32mAngus Young, Malcolm Young, Brian Johnson \u001b[0m │ \u001b[1;36m343719\u001b[0m │ \u001b[1;36m11170334\u001b[0m │ \u001b[1;36m0.99\u001b[0m │ \u001b[32mRock \u001b[0m │\n", + "│ \u001b[1;36m2\u001b[0m │ \u001b[32mBalls to the Wall \u001b[0m │ \u001b[1;36m2\u001b[0m │ \u001b[1;36m2\u001b[0m │ \u001b[1;36m1\u001b[0m │ \u001b[2mNULL\u001b[0m │ \u001b[1;36m342562\u001b[0m │ \u001b[1;36m5510424\u001b[0m │ \u001b[1;36m0.99\u001b[0m │ \u001b[32mRock \u001b[0m │\n", + "│ \u001b[1;36m3\u001b[0m │ \u001b[32mFast As a Shark \u001b[0m │ \u001b[1;36m3\u001b[0m │ \u001b[1;36m2\u001b[0m │ \u001b[1;36m1\u001b[0m │ \u001b[32mF. Baltes, S. Kaufman, U. Dirkscneider & W. Hoffman \u001b[0m │ \u001b[1;36m230619\u001b[0m │ \u001b[1;36m3990994\u001b[0m │ \u001b[1;36m0.99\u001b[0m │ \u001b[32mRock \u001b[0m │\n", + "│ \u001b[1;36m4\u001b[0m │ \u001b[32mRestless and Wild \u001b[0m │ \u001b[1;36m3\u001b[0m │ \u001b[1;36m2\u001b[0m │ \u001b[1;36m1\u001b[0m │ \u001b[32mF. Baltes, R.A. Smith-Diesel, S. Kaufman, U. Dirkscneider & W. Hoffman\u001b[0m │ \u001b[1;36m252051\u001b[0m │ \u001b[1;36m4331779\u001b[0m │ \u001b[1;36m0.99\u001b[0m │ \u001b[32mRock \u001b[0m │\n", + "│ \u001b[1;36m5\u001b[0m │ \u001b[32mPrincess of the Dawn \u001b[0m │ \u001b[1;36m3\u001b[0m │ \u001b[1;36m2\u001b[0m │ \u001b[1;36m1\u001b[0m │ \u001b[32mDeaffy & R.A. Smith-Diesel \u001b[0m │ \u001b[1;36m375418\u001b[0m │ \u001b[1;36m6290521\u001b[0m │ \u001b[1;36m0.99\u001b[0m │ \u001b[32mRock \u001b[0m │\n", + "│ \u001b[1;36m6\u001b[0m │ \u001b[32mPut The Finger On You \u001b[0m │ \u001b[1;36m1\u001b[0m │ \u001b[1;36m1\u001b[0m │ \u001b[1;36m1\u001b[0m │ \u001b[32mAngus Young, Malcolm Young, Brian Johnson \u001b[0m │ \u001b[1;36m205662\u001b[0m │ \u001b[1;36m6713451\u001b[0m │ \u001b[1;36m0.99\u001b[0m │ \u001b[32mRock \u001b[0m │\n", + "│ \u001b[1;36m7\u001b[0m │ \u001b[32mLet's Get It Up \u001b[0m │ \u001b[1;36m1\u001b[0m │ \u001b[1;36m1\u001b[0m │ \u001b[1;36m1\u001b[0m │ \u001b[32mAngus Young, Malcolm Young, Brian Johnson \u001b[0m │ \u001b[1;36m233926\u001b[0m │ \u001b[1;36m7636561\u001b[0m │ \u001b[1;36m0.99\u001b[0m │ \u001b[32mRock \u001b[0m │\n", + "│ \u001b[1;36m8\u001b[0m │ \u001b[32mInject The Venom \u001b[0m │ \u001b[1;36m1\u001b[0m │ \u001b[1;36m1\u001b[0m │ \u001b[1;36m1\u001b[0m │ \u001b[32mAngus Young, Malcolm Young, Brian Johnson \u001b[0m │ \u001b[1;36m210834\u001b[0m │ \u001b[1;36m6852860\u001b[0m │ \u001b[1;36m0.99\u001b[0m │ \u001b[32mRock \u001b[0m │\n", + "│ \u001b[1;36m9\u001b[0m │ \u001b[32mSnowballed \u001b[0m │ \u001b[1;36m1\u001b[0m │ \u001b[1;36m1\u001b[0m │ \u001b[1;36m1\u001b[0m │ \u001b[32mAngus Young, Malcolm Young, Brian Johnson \u001b[0m │ \u001b[1;36m203102\u001b[0m │ \u001b[1;36m6599424\u001b[0m │ \u001b[1;36m0.99\u001b[0m │ \u001b[32mRock \u001b[0m │\n", + "│ \u001b[1;36m10\u001b[0m │ \u001b[32mEvil Walks \u001b[0m │ \u001b[1;36m1\u001b[0m │ \u001b[1;36m1\u001b[0m │ \u001b[1;36m1\u001b[0m │ \u001b[32mAngus Young, Malcolm Young, Brian Johnson \u001b[0m │ \u001b[1;36m263497\u001b[0m │ \u001b[1;36m8611245\u001b[0m │ \u001b[1;36m0.99\u001b[0m │ \u001b[32mRock \u001b[0m │\n", + "│ \u001b[2m…\u001b[0m │ \u001b[2m…\u001b[0m │ \u001b[2m…\u001b[0m │ \u001b[2m…\u001b[0m │ \u001b[2m…\u001b[0m │ \u001b[2m…\u001b[0m │ \u001b[2m…\u001b[0m │ \u001b[2m…\u001b[0m │ \u001b[2m…\u001b[0m │ \u001b[2m…\u001b[0m │\n", + "└─────────┴─────────────────────────────────────────┴─────────┴─────────────┴─────────┴────────────────────────────────────────────────────────────────────────┴──────────────┴──────────┴────────────────┴────────────┘" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "genre = connection.table(\"genre\")\n", "genre_and_track = track.inner_join(\n", @@ -557,7 +850,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 17, "id": "325c8789", "metadata": { "tags": [ @@ -577,7 +870,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 18, "id": "9117af26", "metadata": {}, "outputs": [], @@ -604,7 +897,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 19, "id": "e8277743", "metadata": {}, "outputs": [], @@ -625,7 +918,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 20, "id": "2097c995", "metadata": {}, "outputs": [], @@ -652,10 +945,24 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 21, "id": "14c14fc8", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "[(1, 'Deadpond', 'Dive Wilson', None),\n", + " (2, 'Spider-Boy', 'Pedro Parqueador', None),\n", + " (3, 'Rusty-Man', 'Tommy Sharp', 48),\n", + " (4, 'Ms Amazing', 'Barjabeen Bhabra', 17)]" + ] + }, + "execution_count": 21, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "con = sqlite3.connect(Path(\"data/hero.db\"))\n", "\n", @@ -674,10 +981,21 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 22, "id": "afa69365", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "name='Deadpond' secret_name='Dive Wilson' id=1 age=None\n", + "name='Spider-Boy' secret_name='Pedro Parqueador' id=2 age=None\n", + "name='Rusty-Man' secret_name='Tommy Sharp' id=3 age=48\n", + "name='Ms Amazing' secret_name='Barjabeen Bhabra' id=4 age=17\n" + ] + } + ], "source": [ "from sqlmodel import select\n", "\n", @@ -698,10 +1016,21 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 23, "id": "8fae7081", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "name='Deadpond' secret_name='Dive Wilson' id=1 age=None\n", + "name='Spider-Boy' secret_name='Pedro Parqueador' id=2 age=None\n", + "name='Rusty-Man' secret_name='Tommy Sharp' id=3 age=48\n", + "name='Ms Amazing' secret_name='Barjabeen Bhabra' id=4 age=17\n" + ] + } + ], "source": [ "with Session(engine) as session:\n", " statement = select(Hero)\n", @@ -721,10 +1050,19 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 24, "id": "b2dbde03", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "name='Ms Amazing' secret_name='Barjabeen Bhabra' id=4 age=17\n", + "name='Rusty-Man' secret_name='Tommy Sharp' id=3 age=48\n" + ] + } + ], "source": [ "with Session(engine) as session:\n", " statement = select(Hero).where(Hero.age < 100).limit(2).order_by(Hero.age)\n", @@ -744,7 +1082,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 25, "id": "349e5d3d", "metadata": { "tags": [ @@ -797,7 +1135,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.12.12" + "version": "3.12.13" }, "toc-showtags": true }, diff --git a/dates-and-times.ipynb b/dates-and-times.ipynb index 4fe0c68..1ef38ab 100644 --- a/dates-and-times.ipynb +++ b/dates-and-times.ipynb @@ -26,7 +26,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "id": "51a55374", "metadata": { "tags": [ @@ -74,10 +74,18 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 2, "id": "84829a6b", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "2026-04-28 13:36:00.720344\n" + ] + } + ], "source": [ "from datetime import datetime\n", "\n", @@ -121,10 +129,18 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 3, "id": "fc224a47", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "2019-11-28 00:00:00\n" + ] + } + ], "source": [ "specific_datetime = datetime(2019, 11, 28)\n", "print(specific_datetime)" @@ -150,10 +166,21 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 4, "id": "4558d476", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "datetime.datetime(2002, 2, 16, 0, 0)" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "date_string = \"16 February in 2002\"\n", "datetime.strptime(date_string, \"%d %B in %Y\")" @@ -169,10 +196,21 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 5, "id": "fb90ac84", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "datetime.datetime(2002, 2, 16, 0, 0)" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "date_string = \"16 Feb in 2002\"\n", "datetime.strptime(date_string, \"%d %b in %Y\")" @@ -188,10 +226,19 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 6, "id": "7a568f5b", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "2002-02-03 00:00:00\n", + "2002-02-03 00:00:00\n" + ] + } + ], "source": [ "from dateutil.parser import parse\n", "\n", @@ -211,10 +258,21 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 7, "id": "d6b5e3c3", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "'Tuesday, 04, 2026'" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "now.strftime(\"%A, %m, %Y\")" ] @@ -273,10 +331,21 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 8, "id": "39ba17e6", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "True" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "now > specific_datetime" ] @@ -291,10 +360,18 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 9, "id": "fda57a44", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "2309 days, 13:36:00.720344\n" + ] + } + ], "source": [ "time_diff = now - datetime(year=2020, month=1, day=1)\n", "print(time_diff)" @@ -310,10 +387,21 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 10, "id": "e1e25736", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "datetime.timedelta" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "type(time_diff)" ] @@ -361,10 +449,21 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 11, "id": "ed526fbc", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "array('2020-01-01', dtype='datetime64[D]')" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "import numpy as np\n", "\n", @@ -382,10 +481,29 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 12, "id": "537895c5", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "array(['2020-01-01', '2020-01-02', '2020-01-03', '2020-01-04',\n", + " '2020-01-05', '2020-01-06', '2020-01-07', '2020-01-08',\n", + " '2020-01-09', '2020-01-10', '2020-01-11', '2020-01-12',\n", + " '2020-01-13', '2020-01-14', '2020-01-15', '2020-01-16',\n", + " '2020-01-17', '2020-01-18', '2020-01-19', '2020-01-20',\n", + " '2020-01-21', '2020-01-22', '2020-01-23', '2020-01-24',\n", + " '2020-01-25', '2020-01-26', '2020-01-27', '2020-01-28',\n", + " '2020-01-29', '2020-01-30', '2020-01-31', '2020-02-01'],\n", + " dtype='datetime64[D]')" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "date + range(32)" ] @@ -402,10 +520,21 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 13, "id": "cd7a15e3", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "np.datetime64('2020-01-01T09:00')" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "np.datetime64(\"2020-01-01 09:00\")" ] @@ -436,10 +565,21 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 14, "id": "48f0e9c6", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "Timestamp('2020-02-16 00:00:00')" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "import pandas as pd\n", "\n", @@ -469,10 +609,24 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 15, "id": "462b26da", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "DatetimeIndex(['2020-02-16', '2020-02-17', '2020-02-18', '2020-02-19',\n", + " '2020-02-20', '2020-02-21', '2020-02-22', '2020-02-23',\n", + " '2020-02-24', '2020-02-25', '2020-02-26', '2020-02-27'],\n", + " dtype='datetime64[ns]', freq=None)" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "date + pd.to_timedelta(np.arange(12), \"D\")" ] @@ -489,10 +643,23 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 16, "id": "10e71325", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "DatetimeIndex(['2018-01-01', '2018-01-02', '2018-01-03', '2018-01-04',\n", + " '2018-01-05', '2018-01-06', '2018-01-07', '2018-01-08'],\n", + " dtype='datetime64[ns]', freq='D')" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "pd.date_range(start=\"2018/1/1\", end=\"2018/1/8\")" ] @@ -507,10 +674,23 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 17, "id": "291ace2c", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "DatetimeIndex(['2018-01-01 00:00:00', '2018-01-01 01:00:00',\n", + " '2018-01-01 02:00:00'],\n", + " dtype='datetime64[ns]', freq='h')" + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "pd.date_range(\"2018-01-01\", periods=3, freq=\"h\")" ] @@ -525,10 +705,23 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 18, "id": "6703682c", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "DatetimeIndex(['2017-12-31 16:00:00-08:00', '2017-12-31 17:00:00-08:00',\n", + " '2017-12-31 18:00:00-08:00'],\n", + " dtype='datetime64[ns, US/Pacific]', freq='h')" + ] + }, + "execution_count": 18, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "dti = pd.date_range(\"2018-01-01\", periods=3, freq=\"h\").tz_localize(\"UTC\")\n", "dti.tz_convert(\"US/Pacific\")" @@ -544,10 +737,79 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 19, "id": "dd00df7f", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
dateVacancies (ICT), thousands
02001 MAY568
12001 JUN563
22001 JUL554
32001 AUG554
42001 SEP536
\n", + "
" + ], + "text/plain": [ + " date Vacancies (ICT), thousands\n", + "0 2001 MAY 568\n", + "1 2001 JUN 563\n", + "2 2001 JUL 554\n", + "3 2001 AUG 554\n", + "4 2001 SEP 536" + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "import requests\n", "\n", @@ -572,10 +834,26 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 20, "id": "247d9725", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "RangeIndex: 281 entries, 0 to 280\n", + "Data columns (total 2 columns):\n", + " # Column Non-Null Count Dtype \n", + "--- ------ -------------- ----- \n", + " 0 date 281 non-null object\n", + " 1 Vacancies (ICT), thousands 281 non-null int64 \n", + "dtypes: int64(1), object(1)\n", + "memory usage: 4.5+ KB\n" + ] + } + ], "source": [ "df.info()" ] @@ -590,10 +868,34 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 21, "id": "b90f8038", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/var/folders/j1/0d3dmg7d6gnbc_zwrh8t9gcw0000gn/T/ipykernel_60965/3535541307.py:1: UserWarning: Could not infer format, so each element will be parsed individually, falling back to `dateutil`. To ensure parsing is consistent and as-expected, please specify a format.\n", + " df[\"date\"] = pd.to_datetime(df[\"date\"])\n" + ] + }, + { + "data": { + "text/plain": [ + "0 2001-05-01\n", + "1 2001-06-01\n", + "2 2001-07-01\n", + "3 2001-08-01\n", + "4 2001-09-01\n", + "Name: date, dtype: datetime64[ns]" + ] + }, + "execution_count": 21, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "df[\"date\"] = pd.to_datetime(df[\"date\"])\n", "df[\"date\"].head()" @@ -611,10 +913,23 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 22, "id": "05d056ae", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "0 1, '19, 22\n", + "1 1, '19, 23\n", + "Name: date, dtype: object" + ] + }, + "execution_count": 22, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "small_df = pd.DataFrame({\"date\": [\"1, '19, 22\", \"1, '19, 23\"], \"values\": [\"1\", \"2\"]})\n", "small_df[\"date\"]" @@ -630,10 +945,23 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 23, "id": "514c9052", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "0 2019-01-22\n", + "1 2019-01-23\n", + "Name: date, dtype: datetime64[ns]" + ] + }, + "execution_count": 23, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "pd.to_datetime(small_df[\"date\"], format=\"%m, '%y, %d\")" ] @@ -650,10 +978,79 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 24, "id": "ac3addbc", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
dateVacancies (ICT), thousands
02001-05-31568
12001-06-30563
22001-07-31554
32001-08-31554
42001-09-30536
\n", + "
" + ], + "text/plain": [ + " date Vacancies (ICT), thousands\n", + "0 2001-05-31 568\n", + "1 2001-06-30 563\n", + "2 2001-07-31 554\n", + "3 2001-08-31 554\n", + "4 2001-09-30 536" + ] + }, + "execution_count": 24, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "df[\"date\"] = df[\"date\"] + pd.offsets.MonthEnd()\n", "df.head()" @@ -679,10 +1076,38 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 25, "id": "a6c3d2d9", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Using `dt.day_name()`\n", + "0 Thursday\n", + "1 Saturday\n", + "2 Tuesday\n", + "3 Friday\n", + "4 Sunday\n", + "Name: date, dtype: object\n", + "Using `dt.isocalendar()`\n", + " year week day\n", + "0 2001 22 4\n", + "1 2001 26 6\n", + "2 2001 31 2\n", + "3 2001 35 5\n", + "4 2001 39 7\n", + "Using `dt.month`\n", + "0 5\n", + "1 6\n", + "2 7\n", + "3 8\n", + "4 9\n", + "Name: date, dtype: int32\n" + ] + } + ], "source": [ "print(\"Using `dt.day_name()`\")\n", "print(df[\"date\"].dt.day_name().head())\n", @@ -704,10 +1129,78 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 26, "id": "e0a4f68d", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Vacancies (ICT), thousands
date
2001-05-31568
2001-06-30563
2001-07-31554
2001-08-31554
2001-09-30536
\n", + "
" + ], + "text/plain": [ + " Vacancies (ICT), thousands\n", + "date \n", + "2001-05-31 568\n", + "2001-06-30 563\n", + "2001-07-31 554\n", + "2001-08-31 554\n", + "2001-09-30 536" + ] + }, + "execution_count": 26, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "df = df.set_index(\"date\")\n", "df.head()" @@ -723,10 +1216,23 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 27, "id": "acf1ae60", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "DatetimeIndex(['2001-05-31', '2001-06-30', '2001-07-31', '2001-08-31',\n", + " '2001-09-30'],\n", + " dtype='datetime64[ns]', name='date', freq=None)" + ] + }, + "execution_count": 27, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "df.index[:5]" ] @@ -741,10 +1247,31 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 28, "id": "9146c99d", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/var/folders/j1/0d3dmg7d6gnbc_zwrh8t9gcw0000gn/T/ipykernel_60965/2067773505.py:1: FutureWarning: 'M' is deprecated and will be removed in a future version, please use 'ME' instead.\n", + " df = df.asfreq(\"M\")\n" + ] + }, + { + "data": { + "text/plain": [ + "DatetimeIndex(['2001-05-31', '2001-06-30', '2001-07-31', '2001-08-31',\n", + " '2001-09-30'],\n", + " dtype='datetime64[ns]', name='date', freq='ME')" + ] + }, + "execution_count": 28, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "df = df.asfreq(\"M\")\n", "df.index[:5]" @@ -794,10 +1321,1352 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 29, "id": "b4c5f841", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "image/svg+xml": [ + "\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " 2026-04-28T13:36:02.756256\n", + " image/svg+xml\n", + " \n", + " \n", + " Matplotlib v3.10.0, https://matplotlib.org/\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n" + ], + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], "source": [ "df.plot();" ] @@ -818,10 +2687,181 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 30, "id": "e56ba5c4", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/var/folders/j1/0d3dmg7d6gnbc_zwrh8t9gcw0000gn/T/ipykernel_60965/311401334.py:1: FutureWarning: 'A' is deprecated and will be removed in a future version, please use 'YE' instead.\n", + " df.resample(\"A\").mean()\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Vacancies (ICT), thousands
date
2001-12-31540.625000
2002-12-31517.500000
2003-12-31504.166667
2004-12-31551.916667
2005-12-31544.666667
2006-12-31529.500000
2007-12-31576.333333
2008-12-31544.583333
2009-12-31402.750000
2010-12-31424.166667
2011-12-31413.250000
2012-12-31423.916667
2013-12-31480.250000
2014-12-31592.416667
2015-12-31655.166667
2016-12-31671.250000
2017-12-31704.750000
2018-12-31742.666667
2019-12-31734.166667
2020-12-31487.500000
2021-12-31843.416667
2022-12-311092.083333
2023-12-31894.500000
2024-12-31767.888889
\n", + "
" + ], + "text/plain": [ + " Vacancies (ICT), thousands\n", + "date \n", + "2001-12-31 540.625000\n", + "2002-12-31 517.500000\n", + "2003-12-31 504.166667\n", + "2004-12-31 551.916667\n", + "2005-12-31 544.666667\n", + "2006-12-31 529.500000\n", + "2007-12-31 576.333333\n", + "2008-12-31 544.583333\n", + "2009-12-31 402.750000\n", + "2010-12-31 424.166667\n", + "2011-12-31 413.250000\n", + "2012-12-31 423.916667\n", + "2013-12-31 480.250000\n", + "2014-12-31 592.416667\n", + "2015-12-31 655.166667\n", + "2016-12-31 671.250000\n", + "2017-12-31 704.750000\n", + "2018-12-31 742.666667\n", + "2019-12-31 734.166667\n", + "2020-12-31 487.500000\n", + "2021-12-31 843.416667\n", + "2022-12-31 1092.083333\n", + "2023-12-31 894.500000\n", + "2024-12-31 767.888889" + ] + }, + "execution_count": 30, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "df.resample(\"A\").mean()" ] @@ -836,10 +2876,94 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 31, "id": "fbbbcdff", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Vacancies (ICT), thousands
meanstd
date
2001-12-31540.62500022.398581
2006-12-31529.55000020.434621
2011-12-31472.21666777.919796
2016-12-31564.60000099.829210
2021-12-31702.500000164.019480
\n", + "
" + ], + "text/plain": [ + " Vacancies (ICT), thousands \n", + " mean std\n", + "date \n", + "2001-12-31 540.625000 22.398581\n", + "2006-12-31 529.550000 20.434621\n", + "2011-12-31 472.216667 77.919796\n", + "2016-12-31 564.600000 99.829210\n", + "2021-12-31 702.500000 164.019480" + ] + }, + "execution_count": 31, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "df.resample(\"5YE\").agg([\"mean\", \"std\"]).head()" ] @@ -854,10 +2978,111 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 32, "id": "9a48a45f", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Vacancies (ICT), thousands
date
2001-05-31568.0
2001-06-01NaN
2001-06-02NaN
2001-06-03NaN
2001-06-04NaN
......
2024-09-26NaN
2024-09-27NaN
2024-09-28NaN
2024-09-29NaN
2024-09-30727.0
\n", + "

8524 rows × 1 columns

\n", + "
" + ], + "text/plain": [ + " Vacancies (ICT), thousands\n", + "date \n", + "2001-05-31 568.0\n", + "2001-06-01 NaN\n", + "2001-06-02 NaN\n", + "2001-06-03 NaN\n", + "2001-06-04 NaN\n", + "... ...\n", + "2024-09-26 NaN\n", + "2024-09-27 NaN\n", + "2024-09-28 NaN\n", + "2024-09-29 NaN\n", + "2024-09-30 727.0\n", + "\n", + "[8524 rows x 1 columns]" + ] + }, + "execution_count": 32, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "df.resample(\"D\").asfreq()" ] @@ -872,10 +3097,83 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 33, "id": "d3ac1789", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Vacancies (ICT), thousands
date
2001-05-31568.000000
2001-06-01567.833333
2001-06-02567.666667
2001-06-03567.500000
2001-06-04NaN
2001-06-05NaN
\n", + "
" + ], + "text/plain": [ + " Vacancies (ICT), thousands\n", + "date \n", + "2001-05-31 568.000000\n", + "2001-06-01 567.833333\n", + "2001-06-02 567.666667\n", + "2001-06-03 567.500000\n", + "2001-06-04 NaN\n", + "2001-06-05 NaN" + ] + }, + "execution_count": 33, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "df.resample(\"D\").interpolate(method=\"linear\", limit_direction=\"forward\", limit=3)[:6]" ] @@ -890,10 +3188,26 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 34, "id": "51647c56", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r", + "[*********************100%***********************] 1 of 1 completed" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n" + ] + } + ], "source": [ "# Get stock market data\n", "import yfinance as yf\n", @@ -905,20 +3219,1640 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 35, "id": "ab1efae3", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
'color'
'#bc80bd'
'#fb8072'
'#b3de69'
'#fdb462'
'#fccde5'
'#8dd3c7'
'#ffed6f'
'#bebada'
'#80b1d3'
'#ccebc5'
'#d9d9d9'
" + ], + "text/plain": [ + "cycler('color', ['#bc80bd', '#fb8072', '#b3de69', '#fdb462', '#fccde5', '#8dd3c7', '#ffed6f', '#bebada', '#80b1d3', '#ccebc5', '#d9d9d9'])" + ] + }, + "execution_count": 35, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "plt.rcParams[\"axes.prop_cycle\"]" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 36, "id": "fa0c9973", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "image/svg+xml": [ + "\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " 2026-04-28T13:36:08.720742\n", + " image/svg+xml\n", + " \n", + " \n", + " Matplotlib v3.10.0, https://matplotlib.org/\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n" + ], + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], "source": [ "from itertools import cycle\n", "\n", @@ -957,10 +4891,111 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 37, "id": "1ddc4fb2", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Vacancies (ICT), thousands
date
2001-05-31NaN
2001-06-30565.5
2001-07-31558.5
2001-08-31554.0
2001-09-30545.0
......
2024-05-31776.5
2024-06-30760.0
2024-07-31748.0
2024-08-31737.0
2024-09-30729.5
\n", + "

281 rows × 1 columns

\n", + "
" + ], + "text/plain": [ + " Vacancies (ICT), thousands\n", + "date \n", + "2001-05-31 NaN\n", + "2001-06-30 565.5\n", + "2001-07-31 558.5\n", + "2001-08-31 554.0\n", + "2001-09-30 545.0\n", + "... ...\n", + "2024-05-31 776.5\n", + "2024-06-30 760.0\n", + "2024-07-31 748.0\n", + "2024-08-31 737.0\n", + "2024-09-30 729.5\n", + "\n", + "[281 rows x 1 columns]" + ] + }, + "execution_count": 37, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "df.rolling(2).mean()" ] @@ -983,10 +5018,111 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 38, "id": "0ea9c8ce", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Vacancies (ICT), thousands
date
2001-05-31568.000000
2001-06-30565.222222
2001-07-31560.622951
2001-08-31558.379404
2001-09-30551.722037
......
2024-05-31813.183347
2024-06-30801.346677
2024-07-31789.477342
2024-08-31777.981873
2024-09-30767.785499
\n", + "

281 rows × 1 columns

\n", + "
" + ], + "text/plain": [ + " Vacancies (ICT), thousands\n", + "date \n", + "2001-05-31 568.000000\n", + "2001-06-30 565.222222\n", + "2001-07-31 560.622951\n", + "2001-08-31 558.379404\n", + "2001-09-30 551.722037\n", + "... ...\n", + "2024-05-31 813.183347\n", + "2024-06-30 801.346677\n", + "2024-07-31 789.477342\n", + "2024-08-31 777.981873\n", + "2024-09-30 767.785499\n", + "\n", + "[281 rows x 1 columns]" + ] + }, + "execution_count": 38, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "df.ewm(alpha=0.2).mean()" ] @@ -1001,10 +5137,3078 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 39, "id": "0af7b5e6", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "image/svg+xml": [ + "\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " 2026-04-28T13:36:08.899458\n", + " image/svg+xml\n", + " \n", + " \n", + " Matplotlib v3.10.0, https://matplotlib.org/\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n" + ], + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], "source": [ "fig, ax = plt.subplots()\n", "roll_num = 28\n", @@ -1031,10 +8235,2366 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 40, "id": "134199ae", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "image/svg+xml": [ + "\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " 2026-04-28T13:36:08.991604\n", + " image/svg+xml\n", + " \n", + " \n", + " Matplotlib v3.10.0, https://matplotlib.org/\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n" + ], + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], "source": [ "roll = xf[\"Close\"].rolling(50, center=True)\n", "\n", @@ -1057,10 +10617,92 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 41, "id": "3078fbb4", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Vacancies (ICT), thousandslead (12 months)lag (3 months)
date
2001-05-31568518.0NaN
2001-06-30563514.0NaN
2001-07-31554517.0NaN
2001-08-31554517.0568.0
2001-09-30536519.0563.0
\n", + "
" + ], + "text/plain": [ + " Vacancies (ICT), thousands lead (12 months) lag (3 months)\n", + "date \n", + "2001-05-31 568 518.0 NaN\n", + "2001-06-30 563 514.0 NaN\n", + "2001-07-31 554 517.0 NaN\n", + "2001-08-31 554 517.0 568.0\n", + "2001-09-30 536 519.0 563.0" + ] + }, + "execution_count": 41, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "lead = 12\n", "lag = 3\n", @@ -1072,10 +10714,1831 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 42, "id": "18b9afb3", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "image/svg+xml": [ + "\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " 2026-04-28T13:36:09.071401\n", + " image/svg+xml\n", + " \n", + " \n", + " Matplotlib v3.10.0, https://matplotlib.org/\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n" + ], + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], "source": [ "df.iloc[100:300, :].plot();" ] @@ -1103,7 +12566,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.12.12" + "version": "3.12.13" }, "toc-showtags": true }, diff --git a/exploratory-data-analysis.ipynb b/exploratory-data-analysis.ipynb index 2bf323b..b08137e 100644 --- a/exploratory-data-analysis.ipynb +++ b/exploratory-data-analysis.ipynb @@ -28,10 +28,51 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "id": "a3377aa6", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "
\n", + " \n", + " " + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], "source": [ "import pandas as pd\n", "from lets_plot import *\n", @@ -44,7 +85,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 2, "id": "51a55374", "metadata": { "tags": [ @@ -102,10 +143,127 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 3, "id": "069caa7c", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
caratcutcolorclaritydepthtablepricexyz
00.23IdealESI261.555.03263.953.982.43
10.21PremiumESI159.861.03263.893.842.31
20.23GoodEVS156.965.03274.054.072.31
30.29PremiumIVS262.458.03344.204.232.63
40.31GoodJSI263.358.03354.344.352.75
\n", + "
" + ], + "text/plain": [ + " carat cut color clarity depth table price x y z\n", + "0 0.23 Ideal E SI2 61.5 55.0 326 3.95 3.98 2.43\n", + "1 0.21 Premium E SI1 59.8 61.0 326 3.89 3.84 2.31\n", + "2 0.23 Good E VS1 56.9 65.0 327 4.05 4.07 2.31\n", + "3 0.29 Premium I VS2 62.4 58.0 334 4.20 4.23 2.63\n", + "4 0.31 Good J SI2 63.3 58.0 335 4.34 4.35 2.75" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "diamonds = pd.read_csv(\n", " \"https://github.com/mwaskom/seaborn-data/raw/master/diamonds.csv\"\n", @@ -131,10 +289,135 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 4, "id": "97900f58", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + " " + ], + "text/plain": [ + "" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "(ggplot(diamonds, aes(x=\"carat\")) + geom_histogram(binwidth=0.5))" ] @@ -172,10 +455,135 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 5, "id": "20d75550", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + " " + ], + "text/plain": [ + "" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "smaller_diamonds = diamonds.query(\"carat < 3\").copy()\n", "\n", @@ -225,10 +633,135 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 6, "id": "d9d7e995", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + " " + ], + "text/plain": [ + "" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "(ggplot(diamonds, aes(x=\"y\")) + geom_histogram(binwidth=0.5))" ] @@ -244,10 +777,140 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 7, "id": "ea8f8bf3", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + " " + ], + "text/plain": [ + "" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "(\n", " ggplot(diamonds, aes(x=\"y\"))\n", @@ -269,10 +932,123 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 8, "id": "e81ffb55", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
xyzprice
119630.000.00.005139
159510.000.00.006381
240678.0958.98.0612210
245200.000.00.0012800
262430.000.00.0015686
274290.000.00.0018034
491895.1531.85.122075
495560.000.00.002130
495570.000.00.002130
\n", + "
" + ], + "text/plain": [ + " x y z price\n", + "11963 0.00 0.0 0.00 5139\n", + "15951 0.00 0.0 0.00 6381\n", + "24067 8.09 58.9 8.06 12210\n", + "24520 0.00 0.0 0.00 12800\n", + "26243 0.00 0.0 0.00 15686\n", + "27429 0.00 0.0 0.00 18034\n", + "49189 5.15 31.8 5.12 2075\n", + "49556 0.00 0.0 0.00 2130\n", + "49557 0.00 0.0 0.00 2130" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "unusual = diamonds.query(\"y < 3 or y > 20\").loc[:, [\"x\", \"y\", \"z\", \"price\"]]\n", "unusual" @@ -344,7 +1120,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 9, "id": "ecf345a7", "metadata": {}, "outputs": [], @@ -364,10 +1140,135 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 10, "id": "15a43255", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + " " + ], + "text/plain": [ + "" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "(ggplot(diamonds2, aes(x=\"x\", y=\"y\")) + geom_point())" ] @@ -385,10 +1286,195 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 11, "id": "0a4ea922", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
yearmonthdaydep_timesched_dep_timedep_delayarr_timesched_arr_timearr_delaycarrierflighttailnumorigindestair_timedistancehourminutetime_hour
0201311517.05152.0830.081911.0UA1545N14228EWRIAH227.014005152013-01-01T10:00:00Z
1201311533.05294.0850.083020.0UA1714N24211LGAIAH227.014165292013-01-01T10:00:00Z
2201311542.05402.0923.085033.0AA1141N619AAJFKMIA160.010895402013-01-01T10:00:00Z
3201311544.0545-1.01004.01022-18.0B6725N804JBJFKBQN183.015765452013-01-01T10:00:00Z
4201311554.0600-6.0812.0837-25.0DL461N668DNLGAATL116.0762602013-01-01T11:00:00Z
\n", + "
" + ], + "text/plain": [ + " year month day dep_time sched_dep_time dep_delay arr_time \\\n", + "0 2013 1 1 517.0 515 2.0 830.0 \n", + "1 2013 1 1 533.0 529 4.0 850.0 \n", + "2 2013 1 1 542.0 540 2.0 923.0 \n", + "3 2013 1 1 544.0 545 -1.0 1004.0 \n", + "4 2013 1 1 554.0 600 -6.0 812.0 \n", + "\n", + " sched_arr_time arr_delay carrier flight tailnum origin dest air_time \\\n", + "0 819 11.0 UA 1545 N14228 EWR IAH 227.0 \n", + "1 830 20.0 UA 1714 N24211 LGA IAH 227.0 \n", + "2 850 33.0 AA 1141 N619AA JFK MIA 160.0 \n", + "3 1022 -18.0 B6 725 N804JB JFK BQN 183.0 \n", + "4 837 -25.0 DL 461 N668DN LGA ATL 116.0 \n", + "\n", + " distance hour minute time_hour \n", + "0 1400 5 15 2013-01-01T10:00:00Z \n", + "1 1416 5 29 2013-01-01T10:00:00Z \n", + "2 1089 5 40 2013-01-01T10:00:00Z \n", + "3 1576 5 45 2013-01-01T10:00:00Z \n", + "4 762 6 0 2013-01-01T11:00:00Z " + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "url = \"https://raw.githubusercontent.com/byuidatascience/data4python4ds/master/data-raw/flights/flights.csv\"\n", "flights = pd.read_csv(url)\n", @@ -397,10 +1483,164 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 12, "id": "6849f4d9", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + " " + ], + "text/plain": [ + "" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "flights2 = flights.assign(\n", " cancelled=lambda x: pd.isna(x[\"dep_time\"]),\n", @@ -452,10 +1692,138 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 13, "id": "e1719d8f", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + " " + ], + "text/plain": [ + "" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "(\n", " ggplot(diamonds, aes(x=\"price\"))\n", @@ -476,10 +1844,140 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 14, "id": "9388e24b", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + " " + ], + "text/plain": [ + "" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "(\n", " ggplot(diamonds, aes(x=\"price\"))\n", @@ -499,10 +1997,163 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 15, "id": "a3f333a6", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + " " + ], + "text/plain": [ + "" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "(ggplot(diamonds, aes(x=\"cut\", y=\"price\")) + geom_boxplot())" ] @@ -521,10 +2172,150 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 16, "id": "6949db81", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + " " + ], + "text/plain": [ + "" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "mpg = pd.read_csv(\n", " \"https://vincentarelbundock.github.io/Rdatasets/csv/ggplot2/mpg.csv\", index_col=0\n", @@ -544,10 +2335,168 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 17, "id": "a5b1ed09", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + " " + ], + "text/plain": [ + "" + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "(ggplot(mpg) + geom_boxplot(aes(as_discrete(\"class\", order_by=\"..middle..\"), \"hwy\")))" ] @@ -562,10 +2511,172 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 18, "id": "920a4268", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + " " + ], + "text/plain": [ + "" + ] + }, + "execution_count": 18, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "(\n", " ggplot(mpg)\n", @@ -608,7 +2719,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 19, "id": "68d330d2", "metadata": {}, "outputs": [], @@ -630,10 +2741,112 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 20, "id": "e858cd22", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + " " + ], + "text/plain": [ + "" + ] + }, + "execution_count": 20, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "(ggplot(ct_cut_color, aes(x=\"color\", y=\"cut\")) + geom_tile(aes(fill=\"value\")))" ] @@ -667,10 +2880,135 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 21, "id": "2afe2535", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + " " + ], + "text/plain": [ + "" + ] + }, + "execution_count": 21, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "(ggplot(smaller_diamonds, aes(x=\"carat\", y=\"price\")) + geom_point())" ] @@ -688,10 +3026,136 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 22, "id": "b55707a9", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + " " + ], + "text/plain": [ + "" + ] + }, + "execution_count": 22, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "(ggplot(smaller_diamonds, aes(x=\"carat\", y=\"price\")) + geom_point(alpha=1 / 20))" ] @@ -720,10 +3184,152 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 23, "id": "13079065", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
caratdepthtablepricexyz
count53940.00000053940.00000053940.00000053940.00000053940.00000053940.00000053940.000000
mean0.79794061.74940557.4571843932.7997225.7311575.7345263.538734
std0.4740111.4326212.2344913989.4397381.1217611.1421350.705699
min0.20000043.00000043.000000326.0000000.0000000.0000000.000000
25%0.40000061.00000056.000000950.0000004.7100004.7200002.910000
50%0.70000061.80000057.0000002401.0000005.7000005.7100003.530000
75%1.04000062.50000059.0000005324.2500006.5400006.5400004.040000
max5.01000079.00000095.00000018823.00000010.74000058.90000031.800000
\n", + "
" + ], + "text/plain": [ + " carat depth table price x \\\n", + "count 53940.000000 53940.000000 53940.000000 53940.000000 53940.000000 \n", + "mean 0.797940 61.749405 57.457184 3932.799722 5.731157 \n", + "std 0.474011 1.432621 2.234491 3989.439738 1.121761 \n", + "min 0.200000 43.000000 43.000000 326.000000 0.000000 \n", + "25% 0.400000 61.000000 56.000000 950.000000 4.710000 \n", + "50% 0.700000 61.800000 57.000000 2401.000000 5.700000 \n", + "75% 1.040000 62.500000 59.000000 5324.250000 6.540000 \n", + "max 5.010000 79.000000 95.000000 18823.000000 10.740000 \n", + "\n", + " y z \n", + "count 53940.000000 53940.000000 \n", + "mean 5.734526 3.538734 \n", + "std 1.142135 0.705699 \n", + "min 0.000000 0.000000 \n", + "25% 4.720000 2.910000 \n", + "50% 5.710000 3.530000 \n", + "75% 6.540000 4.040000 \n", + "max 58.900000 31.800000 " + ] + }, + "execution_count": 23, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "diamonds.describe()" ] @@ -738,10 +3344,142 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 24, "id": "b4144440", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
caratdepthtablepricexyz
count53940.053940.053940.053940.053940.053940.053940.0
mean0.861.757.53932.85.75.73.5
std0.51.42.23989.41.11.10.7
min0.243.043.0326.00.00.00.0
25%0.461.056.0950.04.74.72.9
50%0.761.857.02401.05.75.73.5
75%1.062.559.05324.26.56.54.0
max5.079.095.018823.010.758.931.8
\n", + "
" + ], + "text/plain": [ + " carat depth table price x y z\n", + "count 53940.0 53940.0 53940.0 53940.0 53940.0 53940.0 53940.0\n", + "mean 0.8 61.7 57.5 3932.8 5.7 5.7 3.5\n", + "std 0.5 1.4 2.2 3989.4 1.1 1.1 0.7\n", + "min 0.2 43.0 43.0 326.0 0.0 0.0 0.0\n", + "25% 0.4 61.0 56.0 950.0 4.7 4.7 2.9\n", + "50% 0.7 61.8 57.0 2401.0 5.7 5.7 3.5\n", + "75% 1.0 62.5 59.0 5324.2 6.5 6.5 4.0\n", + "max 5.0 79.0 95.0 18823.0 10.7 58.9 31.8" + ] + }, + "execution_count": 24, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "sum_table = diamonds.describe().round(1)\n", "sum_table" @@ -757,10 +3495,139 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 25, "id": "cd2f8772", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
countmeanstdmin25%50%75%max
carat53940.00.80.50.20.40.71.05.0
depth53940.061.71.443.061.061.862.579.0
table53940.057.52.243.056.057.059.095.0
price53940.03932.83989.4326.0950.02401.05324.218823.0
x53940.05.71.10.04.75.76.510.7
y53940.05.71.10.04.75.76.558.9
z53940.03.50.70.02.93.54.031.8
\n", + "
" + ], + "text/plain": [ + " count mean std min 25% 50% 75% max\n", + "carat 53940.0 0.8 0.5 0.2 0.4 0.7 1.0 5.0\n", + "depth 53940.0 61.7 1.4 43.0 61.0 61.8 62.5 79.0\n", + "table 53940.0 57.5 2.2 43.0 56.0 57.0 59.0 95.0\n", + "price 53940.0 3932.8 3989.4 326.0 950.0 2401.0 5324.2 18823.0\n", + "x 53940.0 5.7 1.1 0.0 4.7 5.7 6.5 10.7\n", + "y 53940.0 5.7 1.1 0.0 4.7 5.7 6.5 58.9\n", + "z 53940.0 3.5 0.7 0.0 2.9 3.5 4.0 31.8" + ] + }, + "execution_count": 25, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "sum_table = sum_table.T\n", "sum_table" @@ -780,10 +3647,110 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 26, "id": "5afcacbc", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/var/folders/j1/0d3dmg7d6gnbc_zwrh8t9gcw0000gn/T/ipykernel_58745/1534868389.py:2: FutureWarning: The default of observed=False is deprecated and will be changed to True in a future version of pandas. Pass observed=False to retain current behavior or observed=True to adopt the future default and silence this warning.\n", + " diamonds.groupby([\"cut\", \"color\"])[\"price\"]\n" + ] + }, + { + "data": { + "text/html": [ + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Sale price (thousands)
colorDEFGHIJ
cut       
Fair4.293.683.834.245.144.694.98
Good3.413.423.504.124.285.084.57
Very Good3.473.213.783.874.545.265.10
Premium3.633.544.324.505.225.956.29
Ideal2.632.603.373.723.894.454.92
\n" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 26, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "(\n", " diamonds.groupby([\"cut\", \"color\"])[\"price\"]\n", @@ -810,10 +3777,203 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 27, "id": "21e65189", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
cutFairGoodVery GoodPremiumIdeal
color     
D163662151316032834
E224933240023373903
F312909216423313826
G314871229929244884
H303702182423603115
I175522120414282093
J119307678808896
\n" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 27, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "pd.crosstab(diamonds[\"color\"], diamonds[\"cut\"]).style.background_gradient(cmap=\"plasma\")" ] @@ -836,10 +3996,223 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 28, "id": "bb0162ba", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
cutFairGoodVery GoodPremiumIdeal
color     
D163662151316032834
E224933240023373903
F312909216423313826
G314871229929244884
H303702182423603115
I175522120414282093
J119307678808896
\n" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 28, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "(\n", " pd.crosstab(diamonds[\"color\"], diamonds[\"cut\"])\n", @@ -858,10 +4231,106 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 29, "id": "5d19072c", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
cutFairGoodVery GoodPremiumIdeal
color     
D163662151316032834
E224933240023373903
F312909216423313826
G314871229929244884
H303702182423603115
I175522120414282093
J119307678808896
\n" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 29, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "pd.crosstab(diamonds[\"color\"], diamonds[\"cut\"]).style.highlight_max().format(\"{:.0f}\")" ] @@ -886,10 +4355,165 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 30, "id": "b479d5b1", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
pickupdropoffpassengersdistancefaretiptollstotalcolorpaymentpickup_zonedropoff_zonepickup_boroughdropoff_borough
02019-03-23 20:21:092019-03-23 20:27:2411.607.02.150.012.95yellowcredit cardLenox Hill WestUN/Turtle Bay SouthManhattanManhattan
12019-03-04 16:11:552019-03-04 16:19:0010.795.00.000.09.30yellowcashUpper West Side SouthUpper West Side SouthManhattanManhattan
22019-03-27 17:53:012019-03-27 18:00:2511.377.52.360.014.16yellowcredit cardAlphabet CityWest VillageManhattanManhattan
32019-03-10 01:23:592019-03-10 01:49:5117.7027.06.150.036.95yellowcredit cardHudson SqYorkville WestManhattanManhattan
42019-03-30 13:27:422019-03-30 13:37:1432.169.01.100.013.40yellowcredit cardMidtown EastYorkville WestManhattanManhattan
\n", + "
" + ], + "text/plain": [ + " pickup dropoff passengers distance fare tip \\\n", + "0 2019-03-23 20:21:09 2019-03-23 20:27:24 1 1.60 7.0 2.15 \n", + "1 2019-03-04 16:11:55 2019-03-04 16:19:00 1 0.79 5.0 0.00 \n", + "2 2019-03-27 17:53:01 2019-03-27 18:00:25 1 1.37 7.5 2.36 \n", + "3 2019-03-10 01:23:59 2019-03-10 01:49:51 1 7.70 27.0 6.15 \n", + "4 2019-03-30 13:27:42 2019-03-30 13:37:14 3 2.16 9.0 1.10 \n", + "\n", + " tolls total color payment pickup_zone \\\n", + "0 0.0 12.95 yellow credit card Lenox Hill West \n", + "1 0.0 9.30 yellow cash Upper West Side South \n", + "2 0.0 14.16 yellow credit card Alphabet City \n", + "3 0.0 36.95 yellow credit card Hudson Sq \n", + "4 0.0 13.40 yellow credit card Midtown East \n", + "\n", + " dropoff_zone pickup_borough dropoff_borough \n", + "0 UN/Turtle Bay South Manhattan Manhattan \n", + "1 Upper West Side South Manhattan Manhattan \n", + "2 West Village Manhattan Manhattan \n", + "3 Yorkville West Manhattan Manhattan \n", + "4 Yorkville West Manhattan Manhattan " + ] + }, + "execution_count": 30, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "taxis = pd.read_csv(\"https://github.com/mwaskom/seaborn-data/raw/master/taxis.csv\")\n", "# turn the pickup time column into a datetime\n", @@ -912,20 +4536,1289 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 31, "id": "ee971c9c", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "RangeIndex: 6433 entries, 0 to 6432\n", + "Data columns (total 14 columns):\n", + " # Column Non-Null Count Dtype \n", + "--- ------ -------------- ----- \n", + " 0 pickup 6433 non-null datetime64[ns]\n", + " 1 dropoff 6433 non-null datetime64[ns]\n", + " 2 passengers 6433 non-null int64 \n", + " 3 distance 6433 non-null float64 \n", + " 4 fare 6433 non-null float64 \n", + " 5 tip 6433 non-null float64 \n", + " 6 tolls 6433 non-null float64 \n", + " 7 total 6433 non-null float64 \n", + " 8 color 6433 non-null category \n", + " 9 payment 6389 non-null category \n", + " 10 pickup_zone 6407 non-null string \n", + " 11 dropoff_zone 6388 non-null string \n", + " 12 pickup_borough 6407 non-null category \n", + " 13 dropoff_borough 6388 non-null category \n", + "dtypes: category(4), datetime64[ns](2), float64(5), int64(1), string(2)\n", + "memory usage: 528.5 KB\n" + ] + } + ], "source": [ "taxis.info()" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 32, "id": "2015b1dc", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "image/svg+xml": [ + "\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " 2026-04-28T13:34:43.401430\n", + " image/svg+xml\n", + " \n", + " \n", + " Matplotlib v3.10.0, https://matplotlib.org/\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n" + ], + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], "source": [ "(\n", " taxis.set_index(\"pickup\")\n", @@ -949,10 +5842,1476 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 33, "id": "51e86185", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "image/svg+xml": [ + "\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " 2026-04-28T13:34:43.501760\n", + " image/svg+xml\n", + " \n", + " \n", + " Matplotlib v3.10.0, https://matplotlib.org/\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n" + ], + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], "source": [ "(\n", " taxis.set_index(\"pickup\")\n", @@ -985,10 +7344,888 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 34, "id": "79ceca92", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "image/svg+xml": [ + "\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " 2026-04-28T13:34:43.567782\n", + " image/svg+xml\n", + " \n", + " \n", + " Matplotlib v3.10.0, https://matplotlib.org/\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n" + ], + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], "source": [ "taxis.value_counts(\"payment\").sort_index().plot.bar(title=\"Counts\", rot=0);" ] @@ -1003,10 +8240,1011 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 35, "id": "5efc5817", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "image/svg+xml": [ + "\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " 2026-04-28T13:34:43.648471\n", + " image/svg+xml\n", + " \n", + " \n", + " Matplotlib v3.10.0, https://matplotlib.org/\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n" + ], + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], "source": [ "taxis[\"tip\"].plot.hist(bins=30, title=\"Tip\");" ] @@ -1021,10 +9259,2055 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 36, "id": "0b735d15", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "image/svg+xml": [ + "\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " 2026-04-28T13:34:43.718910\n", + " image/svg+xml\n", + " \n", + " \n", + " Matplotlib v3.10.0, https://matplotlib.org/\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n" + ], + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], "source": [ "(taxis[[\"fare\", \"tolls\", \"tip\"]].plot.box());" ] @@ -1039,10 +11322,7210 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 37, "id": "66adada2", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "image/svg+xml": [ + "\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " 2026-04-28T13:34:43.811014\n", + " image/svg+xml\n", + " \n", + " \n", + " Matplotlib v3.10.0, https://matplotlib.org/\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n" + ], + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], "source": [ "taxis.plot.scatter(x=\"fare\", y=\"tip\", alpha=0.7, ylim=(0, None));" ] @@ -1071,10 +18554,132 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 38, "id": "32796b5f", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
╭──────────────────────────────────────────────── skimpy summary ─────────────────────────────────────────────────╮\n",
+       "│          Data Summary                Data Types               Categories                                        │\n",
+       "│ ┏━━━━━━━━━━━━━━━━━━━┳━━━━━━━━┓ ┏━━━━━━━━━━━━━┳━━━━━━━┓ ┏━━━━━━━━━━━━━━━━━━━━━━━┓                                │\n",
+       "│ ┃ Dataframe          Values ┃ ┃ Column Type  Count ┃ ┃ Categorical Variables ┃                                │\n",
+       "│ ┡━━━━━━━━━━━━━━━━━━━╇━━━━━━━━┩ ┡━━━━━━━━━━━━━╇━━━━━━━┩ ┡━━━━━━━━━━━━━━━━━━━━━━━┩                                │\n",
+       "│ │ Number of rows    │ 6433   │ │ float64     │ 5     │ │ color                 │                                │\n",
+       "│ │ Number of columns │ 14     │ │ category    │ 4     │ │ payment               │                                │\n",
+       "│ └───────────────────┴────────┘ │ datetime64  │ 2     │ │ pickup_borough        │                                │\n",
+       "│                                │ string      │ 2     │ │ dropoff_borough       │                                │\n",
+       "│                                │ int64       │ 1     │ └───────────────────────┘                                │\n",
+       "│                                └─────────────┴───────┘                                                          │\n",
+       "│                                                     number                                                      │\n",
+       "│ ┏━━━━━━━━━━━━━━━━┳━━━━━━┳━━━━━━━━┳━━━━━━━━━━━┳━━━━━━━━━┳━━━━━━━┳━━━━━━━━┳━━━━━━━━━┳━━━━━━━┳━━━━━━━━┳━━━━━━━━━┓  │\n",
+       "│ ┃ column          NA    NA %    mean       sd       p0     p25     p50      p75    p100    hist    ┃  │\n",
+       "│ ┡━━━━━━━━━━━━━━━━╇━━━━━━╇━━━━━━━━╇━━━━━━━━━━━╇━━━━━━━━━╇━━━━━━━╇━━━━━━━━╇━━━━━━━━━╇━━━━━━━╇━━━━━━━━╇━━━━━━━━━┩  │\n",
+       "│ │ passengers       0     0    1.539  1.204    0     1      1    2     6 █▁  ▁  │  │\n",
+       "│ │ distance         0     0    3.025  3.828    0  0.98   1.64 3.21  36.7  █▁    │  │\n",
+       "│ │ fare             0     0    13.09  11.55    1   6.5    9.5   15   150  █▁    │  │\n",
+       "│ │ tip              0     0    1.979  2.449    0     0    1.7  2.8  33.2 │  │\n",
+       "│ │ tolls            0     0   0.3253  1.415    0     0      0    0 24.02 │  │\n",
+       "│ │ total            0     0    18.52  13.82  1.3  10.8  14.16 20.3 174.8  █▁    │  │\n",
+       "│ └────────────────┴──────┴────────┴───────────┴─────────┴───────┴────────┴─────────┴───────┴────────┴─────────┘  │\n",
+       "│                                                    category                                                     │\n",
+       "│ ┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓  │\n",
+       "│ ┃ column                         NA      NA %                                ordered         unique      ┃  │\n",
+       "│ ┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩  │\n",
+       "│ │ color                             0                                 0False                   2 │  │\n",
+       "│ │ payment                          44                0.6839732628633608False                   3 │  │\n",
+       "│ │ pickup_borough                   26                0.4041660189647132False                   5 │  │\n",
+       "│ │ dropoff_borough                  45                 0.699518109746619False                   6 │  │\n",
+       "│ └───────────────────────────────┴────────┴────────────────────────────────────┴────────────────┴─────────────┘  │\n",
+       "│                                                    datetime                                                     │\n",
+       "│ ┏━━━━━━━━━━━━━┳━━━━━━┳━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┓  │\n",
+       "│ ┃ column       NA    NA %     first                         last                          frequency     ┃  │\n",
+       "│ ┡━━━━━━━━━━━━━╇━━━━━━╇━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━┩  │\n",
+       "│ │ pickup        0      0    2019-02-28 23:29:03         2019-03-31 23:43:45     None          │  │\n",
+       "│ │ dropoff       0      0    2019-02-28 23:32:35         2019-04-01 00:13:58     None          │  │\n",
+       "│ └─────────────┴──────┴─────────┴──────────────────────────────┴──────────────────────────────┴───────────────┘  │\n",
+       "│                                                     string                                                      │\n",
+       "│ ┏━━━━━━━━━━━┳━━━━┳━━━━━━━━━━━┳━━━━━━━━━━┳━━━━━━━━━━━┳━━━━━━━━━━┳━━━━━━━━━━━┳━━━━━━━━━━┳━━━━━━━━━━━┳━━━━━━━━━━┓  │\n",
+       "│ ┃                                                                     chars     words per  total    ┃  │\n",
+       "│ ┃ column     NA  NA %       shortest  longest    min       max        per row   row        words    ┃  │\n",
+       "│ ┡━━━━━━━━━━━╇━━━━╇━━━━━━━━━━━╇━━━━━━━━━━╇━━━━━━━━━━━╇━━━━━━━━━━╇━━━━━━━━━━━╇━━━━━━━━━━╇━━━━━━━━━━━╇━━━━━━━━━━┩  │\n",
+       "│ │ pickup_zo260.4041660SoHo    RiverdaleAllertonYorkville    16.2      2.5   15791 │  │\n",
+       "│ │ ne        │    │ 189647132 │          │ /North   /Pelham West      │          │           │          │  │\n",
+       "│ │           │    │           │          │ RiverdaleGardens  │           │          │           │          │  │\n",
+       "│ │           │    │           │          │ /Fieldsto │          │           │          │           │          │  │\n",
+       "│ │           │    │           │          │ n         │          │           │          │           │          │  │\n",
+       "│ │ dropoff_z450.6995181SoHo    RiverdaleAllertonYorkville    16.3      2.5   15851 │  │\n",
+       "│ │ one       │    │  09746619 │          │ /North   /Pelham West      │          │           │          │  │\n",
+       "│ │           │    │           │          │ RiverdaleGardens  │           │          │           │          │  │\n",
+       "│ │           │    │           │          │ /Fieldsto │          │           │          │           │          │  │\n",
+       "│ │           │    │           │          │ n         │          │           │          │           │          │  │\n",
+       "│ └───────────┴────┴───────────┴──────────┴───────────┴──────────┴───────────┴──────────┴───────────┴──────────┘  │\n",
+       "╰────────────────────────────────────────────────────── End ──────────────────────────────────────────────────────╯\n",
+       "
\n" + ], + "text/plain": [ + "╭──────────────────────────────────────────────── skimpy summary ─────────────────────────────────────────────────╮\n", + "│ \u001b[3m Data Summary \u001b[0m \u001b[3m Data Types \u001b[0m \u001b[3m Categories \u001b[0m │\n", + "│ ┏━━━━━━━━━━━━━━━━━━━┳━━━━━━━━┓ ┏━━━━━━━━━━━━━┳━━━━━━━┓ ┏━━━━━━━━━━━━━━━━━━━━━━━┓ │\n", + "│ ┃\u001b[1;36m \u001b[0m\u001b[1;36mDataframe \u001b[0m\u001b[1;36m \u001b[0m┃\u001b[1;36m \u001b[0m\u001b[1;36mValues\u001b[0m\u001b[1;36m \u001b[0m┃ ┃\u001b[1;36m \u001b[0m\u001b[1;36mColumn Type\u001b[0m\u001b[1;36m \u001b[0m┃\u001b[1;36m \u001b[0m\u001b[1;36mCount\u001b[0m\u001b[1;36m \u001b[0m┃ ┃\u001b[1;36m \u001b[0m\u001b[1;36mCategorical Variables\u001b[0m\u001b[1;36m \u001b[0m┃ │\n", + "│ ┡━━━━━━━━━━━━━━━━━━━╇━━━━━━━━┩ ┡━━━━━━━━━━━━━╇━━━━━━━┩ ┡━━━━━━━━━━━━━━━━━━━━━━━┩ │\n", + "│ │ Number of rows │ 6433 │ │ float64 │ 5 │ │ color │ │\n", + "│ │ Number of columns │ 14 │ │ category │ 4 │ │ payment │ │\n", + "│ └───────────────────┴────────┘ │ datetime64 │ 2 │ │ pickup_borough │ │\n", + "│ │ string │ 2 │ │ dropoff_borough │ │\n", + "│ │ int64 │ 1 │ └───────────────────────┘ │\n", + "│ └─────────────┴───────┘ │\n", + "│ \u001b[3m number \u001b[0m │\n", + "│ ┏━━━━━━━━━━━━━━━━┳━━━━━━┳━━━━━━━━┳━━━━━━━━━━━┳━━━━━━━━━┳━━━━━━━┳━━━━━━━━┳━━━━━━━━━┳━━━━━━━┳━━━━━━━━┳━━━━━━━━━┓ │\n", + "│ ┃\u001b[1m \u001b[0m\u001b[1mcolumn \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mNA \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mNA % \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mmean \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1msd \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mp0 \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mp25 \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mp50 \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mp75 \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mp100 \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mhist \u001b[0m\u001b[1m \u001b[0m┃ │\n", + "│ ┡━━━━━━━━━━━━━━━━╇━━━━━━╇━━━━━━━━╇━━━━━━━━━━━╇━━━━━━━━━╇━━━━━━━╇━━━━━━━━╇━━━━━━━━━╇━━━━━━━╇━━━━━━━━╇━━━━━━━━━┩ │\n", + "│ │ \u001b[38;5;141mpassengers \u001b[0m │ \u001b[36m 0\u001b[0m │ \u001b[36m 0\u001b[0m │ \u001b[36m 1.539\u001b[0m │ \u001b[36m 1.204\u001b[0m │ \u001b[36m 0\u001b[0m │ \u001b[36m 1\u001b[0m │ \u001b[36m 1\u001b[0m │ \u001b[36m 2\u001b[0m │ \u001b[36m 6\u001b[0m │ \u001b[32m █▁ ▁ \u001b[0m │ │\n", + "│ │ \u001b[38;5;141mdistance \u001b[0m │ \u001b[36m 0\u001b[0m │ \u001b[36m 0\u001b[0m │ \u001b[36m 3.025\u001b[0m │ \u001b[36m 3.828\u001b[0m │ \u001b[36m 0\u001b[0m │ \u001b[36m 0.98\u001b[0m │ \u001b[36m 1.64\u001b[0m │ \u001b[36m 3.21\u001b[0m │ \u001b[36m 36.7\u001b[0m │ \u001b[32m █▁ \u001b[0m │ │\n", + "│ │ \u001b[38;5;141mfare \u001b[0m │ \u001b[36m 0\u001b[0m │ \u001b[36m 0\u001b[0m │ \u001b[36m 13.09\u001b[0m │ \u001b[36m 11.55\u001b[0m │ \u001b[36m 1\u001b[0m │ \u001b[36m 6.5\u001b[0m │ \u001b[36m 9.5\u001b[0m │ \u001b[36m 15\u001b[0m │ \u001b[36m 150\u001b[0m │ \u001b[32m █▁ \u001b[0m │ │\n", + "│ │ \u001b[38;5;141mtip \u001b[0m │ \u001b[36m 0\u001b[0m │ \u001b[36m 0\u001b[0m │ \u001b[36m 1.979\u001b[0m │ \u001b[36m 2.449\u001b[0m │ \u001b[36m 0\u001b[0m │ \u001b[36m 0\u001b[0m │ \u001b[36m 1.7\u001b[0m │ \u001b[36m 2.8\u001b[0m │ \u001b[36m 33.2\u001b[0m │ \u001b[32m █ \u001b[0m │ │\n", + "│ │ \u001b[38;5;141mtolls \u001b[0m │ \u001b[36m 0\u001b[0m │ \u001b[36m 0\u001b[0m │ \u001b[36m 0.3253\u001b[0m │ \u001b[36m 1.415\u001b[0m │ \u001b[36m 0\u001b[0m │ \u001b[36m 0\u001b[0m │ \u001b[36m 0\u001b[0m │ \u001b[36m 0\u001b[0m │ \u001b[36m 24.02\u001b[0m │ \u001b[32m █ \u001b[0m │ │\n", + "│ │ \u001b[38;5;141mtotal \u001b[0m │ \u001b[36m 0\u001b[0m │ \u001b[36m 0\u001b[0m │ \u001b[36m 18.52\u001b[0m │ \u001b[36m 13.82\u001b[0m │ \u001b[36m 1.3\u001b[0m │ \u001b[36m 10.8\u001b[0m │ \u001b[36m 14.16\u001b[0m │ \u001b[36m 20.3\u001b[0m │ \u001b[36m 174.8\u001b[0m │ \u001b[32m █▁ \u001b[0m │ │\n", + "│ └────────────────┴──────┴────────┴───────────┴─────────┴───────┴────────┴─────────┴───────┴────────┴─────────┘ │\n", + "│ \u001b[3m category \u001b[0m │\n", + "│ ┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓ │\n", + "│ ┃\u001b[1m \u001b[0m\u001b[1mcolumn \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mNA \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mNA % \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mordered \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1munique \u001b[0m\u001b[1m \u001b[0m┃ │\n", + "│ ┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩ │\n", + "│ │ \u001b[38;5;141mcolor \u001b[0m │ \u001b[36m 0\u001b[0m │ \u001b[36m 0\u001b[0m │ \u001b[38;5;45mFalse \u001b[0m │ \u001b[36m 2\u001b[0m │ │\n", + "│ │ \u001b[38;5;141mpayment \u001b[0m │ \u001b[36m 44\u001b[0m │ \u001b[36m 0.6839732628633608\u001b[0m │ \u001b[38;5;45mFalse \u001b[0m │ \u001b[36m 3\u001b[0m │ │\n", + "│ │ \u001b[38;5;141mpickup_borough \u001b[0m │ \u001b[36m 26\u001b[0m │ \u001b[36m 0.4041660189647132\u001b[0m │ \u001b[38;5;45mFalse \u001b[0m │ \u001b[36m 5\u001b[0m │ │\n", + "│ │ \u001b[38;5;141mdropoff_borough \u001b[0m │ \u001b[36m 45\u001b[0m │ \u001b[36m 0.699518109746619\u001b[0m │ \u001b[38;5;45mFalse \u001b[0m │ \u001b[36m 6\u001b[0m │ │\n", + "│ └───────────────────────────────┴────────┴────────────────────────────────────┴────────────────┴─────────────┘ │\n", + "│ \u001b[3m datetime \u001b[0m │\n", + "│ ┏━━━━━━━━━━━━━┳━━━━━━┳━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┓ │\n", + "│ ┃\u001b[1m \u001b[0m\u001b[1mcolumn \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mNA \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mNA % \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mfirst \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mlast \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mfrequency \u001b[0m\u001b[1m \u001b[0m┃ │\n", + "│ ┡━━━━━━━━━━━━━╇━━━━━━╇━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━┩ │\n", + "│ │ \u001b[38;5;141mpickup \u001b[0m │ \u001b[36m 0\u001b[0m │ \u001b[36m 0\u001b[0m │ \u001b[31m 2019-02-28 23:29:03 \u001b[0m │ \u001b[31m 2019-03-31 23:43:45 \u001b[0m │ \u001b[38;5;141mNone \u001b[0m │ │\n", + "│ │ \u001b[38;5;141mdropoff \u001b[0m │ \u001b[36m 0\u001b[0m │ \u001b[36m 0\u001b[0m │ \u001b[31m 2019-02-28 23:32:35 \u001b[0m │ \u001b[31m 2019-04-01 00:13:58 \u001b[0m │ \u001b[38;5;141mNone \u001b[0m │ │\n", + "│ └─────────────┴──────┴─────────┴──────────────────────────────┴──────────────────────────────┴───────────────┘ │\n", + "│ \u001b[3m string \u001b[0m │\n", + "│ ┏━━━━━━━━━━━┳━━━━┳━━━━━━━━━━━┳━━━━━━━━━━┳━━━━━━━━━━━┳━━━━━━━━━━┳━━━━━━━━━━━┳━━━━━━━━━━┳━━━━━━━━━━━┳━━━━━━━━━━┓ │\n", + "│ ┃\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mchars \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mwords per\u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mtotal \u001b[0m\u001b[1m \u001b[0m┃ │\n", + "│ ┃\u001b[1m \u001b[0m\u001b[1mcolumn \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mNA\u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mNA % \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mshortest\u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mlongest \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mmin \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mmax \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mper row \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mrow \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mwords \u001b[0m\u001b[1m \u001b[0m┃ │\n", + "│ ┡━━━━━━━━━━━╇━━━━╇━━━━━━━━━━━╇━━━━━━━━━━╇━━━━━━━━━━━╇━━━━━━━━━━╇━━━━━━━━━━━╇━━━━━━━━━━╇━━━━━━━━━━━╇━━━━━━━━━━┩ │\n", + "│ │ \u001b[38;5;141mpickup_zo\u001b[0m │ \u001b[36m26\u001b[0m │ \u001b[36m0.4041660\u001b[0m │ \u001b[38;5;141mSoHo \u001b[0m │ \u001b[38;5;141mRiverdale\u001b[0m │ \u001b[38;5;141mAllerton\u001b[0m │ \u001b[38;5;141mYorkville\u001b[0m │ \u001b[36m 16.2\u001b[0m │ \u001b[36m 2.5\u001b[0m │ \u001b[36m 15791\u001b[0m │ │\n", + "│ │ \u001b[38;5;141mne \u001b[0m │ │ \u001b[36m189647132\u001b[0m │ │ \u001b[38;5;141m/North \u001b[0m │ \u001b[38;5;141m/Pelham \u001b[0m │ \u001b[38;5;141mWest \u001b[0m │ │ │ │ │\n", + "│ │ │ │ │ │ \u001b[38;5;141mRiverdale\u001b[0m │ \u001b[38;5;141mGardens \u001b[0m │ │ │ │ │ │\n", + "│ │ │ │ │ │ \u001b[38;5;141m/Fieldsto\u001b[0m │ │ │ │ │ │ │\n", + "│ │ │ │ │ │ \u001b[38;5;141mn \u001b[0m │ │ │ │ │ │ │\n", + "│ │ \u001b[38;5;141mdropoff_z\u001b[0m │ \u001b[36m45\u001b[0m │ \u001b[36m0.6995181\u001b[0m │ \u001b[38;5;141mSoHo \u001b[0m │ \u001b[38;5;141mRiverdale\u001b[0m │ \u001b[38;5;141mAllerton\u001b[0m │ \u001b[38;5;141mYorkville\u001b[0m │ \u001b[36m 16.3\u001b[0m │ \u001b[36m 2.5\u001b[0m │ \u001b[36m 15851\u001b[0m │ │\n", + "│ │ \u001b[38;5;141mone \u001b[0m │ │ \u001b[36m 09746619\u001b[0m │ │ \u001b[38;5;141m/North \u001b[0m │ \u001b[38;5;141m/Pelham \u001b[0m │ \u001b[38;5;141mWest \u001b[0m │ │ │ │ │\n", + "│ │ │ │ │ │ \u001b[38;5;141mRiverdale\u001b[0m │ \u001b[38;5;141mGardens \u001b[0m │ │ │ │ │ │\n", + "│ │ │ │ │ │ \u001b[38;5;141m/Fieldsto\u001b[0m │ │ │ │ │ │ │\n", + "│ │ │ │ │ │ \u001b[38;5;141mn \u001b[0m │ │ │ │ │ │ │\n", + "│ └───────────┴────┴───────────┴──────────┴───────────┴──────────┴───────────┴──────────┴───────────┴──────────┘ │\n", + "╰────────────────────────────────────────────────────── End ──────────────────────────────────────────────────────╯\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], "source": [ "skim(taxis)" ] @@ -1119,7 +18724,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.12.12" + "version": "3.12.13" }, "toc-showtags": true }, diff --git a/functions.ipynb b/functions.ipynb index 7d55cbc..70c4fd3 100644 --- a/functions.ipynb +++ b/functions.ipynb @@ -48,10 +48,18 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "id": "0450ad6c", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Hello Ada, and welcome!\n" + ] + } + ], "source": [ "def welcome_message(name):\n", " return f\"Hello {name}, and welcome!\"\n", @@ -73,10 +81,19 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 2, "id": "dd49bee5", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "('Hello student', 4.8)\n", + "('Hello Ada', 4.8)\n" + ] + } + ], "source": [ "def score_message(score, name=\"student\"):\n", " \"\"\"This is a doc-string, a string describing a function.\n", @@ -121,10 +138,28 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 3, "id": "50dfff24", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Help on function score_message in module __main__:\n", + "\n", + "score_message(score, name='student')\n", + " This is a doc-string, a string describing a function.\n", + " Args:\n", + " score (float): Raw score\n", + " name (str): Name of student\n", + " Returns:\n", + " str: A hello message.\n", + " float: A normalised score.\n", + "\n" + ] + } + ], "source": [ "help(score_message)" ] @@ -159,10 +194,47 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 4, "id": "43d10e40", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "shape: (10, 4)
abcd
f64f64f64f64
0.4345330.9990720.3288630.269584
0.00.4799171.00.676026
1.00.00.3837450.381636
0.6584960.4682960.0809160.283739
0.3495880.1646770.6419610.38563
0.3624860.3741330.4753370.024468
0.0426121.1409950.4003250.121255
0.5385220.3190620.5273250.292675
0.5442450.6919370.01.0
0.3524920.2954480.4701810.0
" + ], + "text/plain": [ + "shape: (10, 4)\n", + "┌──────────┬──────────┬──────────┬──────────┐\n", + "│ a ┆ b ┆ c ┆ d │\n", + "│ --- ┆ --- ┆ --- ┆ --- │\n", + "│ f64 ┆ f64 ┆ f64 ┆ f64 │\n", + "╞══════════╪══════════╪══════════╪══════════╡\n", + "│ 0.434533 ┆ 0.999072 ┆ 0.328863 ┆ 0.269584 │\n", + "│ 0.0 ┆ 0.479917 ┆ 1.0 ┆ 0.676026 │\n", + "│ 1.0 ┆ 0.0 ┆ 0.383745 ┆ 0.381636 │\n", + "│ 0.658496 ┆ 0.468296 ┆ 0.080916 ┆ 0.283739 │\n", + "│ 0.349588 ┆ 0.164677 ┆ 0.641961 ┆ 0.38563 │\n", + "│ 0.362486 ┆ 0.374133 ┆ 0.475337 ┆ 0.024468 │\n", + "│ 0.042612 ┆ 1.140995 ┆ 0.400325 ┆ 0.121255 │\n", + "│ 0.538522 ┆ 0.319062 ┆ 0.527325 ┆ 0.292675 │\n", + "│ 0.544245 ┆ 0.691937 ┆ 0.0 ┆ 1.0 │\n", + "│ 0.352492 ┆ 0.295448 ┆ 0.470181 ┆ 0.0 │\n", + "└──────────┴──────────┴──────────┴──────────┘" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "import numpy as np\n", "import polars as pl\n", @@ -241,7 +313,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 5, "id": "406648b6", "metadata": {}, "outputs": [], @@ -275,20 +347,76 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 6, "id": "74834d90", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "shape: (3,)
f64
0.0
0.5
1.0
" + ], + "text/plain": [ + "shape: (3,)\n", + "Series: '' [f64]\n", + "[\n", + "\t0.0\n", + "\t0.5\n", + "\t1.0\n", + "]" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "rescale(pl.Series([-10, 0, 10]))" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 7, "id": "8a0e643e", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "shape: (5,)
f64
0.0
0.25
0.5
null
1.0
" + ], + "text/plain": [ + "shape: (5,)\n", + "Series: '' [f64]\n", + "[\n", + "\t0.0\n", + "\t0.25\n", + "\t0.5\n", + "\tnull\n", + "\t1.0\n", + "]" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "rescale(pl.Series([1, 2, 3, None, 5]))" ] @@ -306,7 +434,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 8, "id": "ce066e4f", "metadata": {}, "outputs": [], @@ -334,10 +462,39 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 9, "id": "f8b80894", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "shape: (5,)
f64
0.0
0.0
0.0
NaN
0.0
" + ], + "text/plain": [ + "shape: (5,)\n", + "Series: '' [f64]\n", + "[\n", + "\t0.0\n", + "\t0.0\n", + "\t0.0\n", + "\tNaN\n", + "\t0.0\n", + "]" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "rescale(pl.Series([1, 2, 3, float(\"inf\"), 5], strict=False))" ] @@ -352,10 +509,39 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 10, "id": "cd63ddea", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "shape: (5,)
f64
0.0
0.25
0.5
null
1.0
" + ], + "text/plain": [ + "shape: (5,)\n", + "Series: '' [f64]\n", + "[\n", + "\t0.0\n", + "\t0.25\n", + "\t0.5\n", + "\tnull\n", + "\t1.0\n", + "]" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "def rescale(x):\n", " x = x.cast(pl.Float64).replace(float(\"inf\"), None)\n", @@ -441,10 +627,18 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 11, "id": "647da443", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Hello World!\n" + ] + } + ], "source": [ "def var_func():\n", " str_variable = \"Hello World!\"\n", @@ -467,10 +661,19 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 12, "id": "c5fb049d", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "y is inside a function: I'm a global variable\n", + "y is outside a function: I'm a global variable\n" + ] + } + ], "source": [ "y = \"I'm a global variable\"\n", "\n", @@ -506,7 +709,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.12.12" + "version": "3.12.13" }, "toc-showtags": true }, diff --git a/introduction.ipynb b/introduction.ipynb index 60bfa83..2a70fd3 100644 --- a/introduction.ipynb +++ b/introduction.ipynb @@ -17,7 +17,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "id": "51a55374", "metadata": { "tags": [ @@ -33,14 +33,116 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 2, "id": "209ef434", "metadata": { "tags": [ "remove-input" ] }, - "outputs": [], + "outputs": [ + { + "data": { + "image/svg+xml": [ + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "cluster_0\n", + "\n", + "Understand\n", + "\n", + "\n", + "\n", + "Import\n", + "\n", + "Import\n", + "\n", + "\n", + "\n", + "Clean\n", + "\n", + "Clean\n", + "\n", + "\n", + "\n", + "Import->Clean\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "Analyse\n", + "\n", + "Analyse\n", + "\n", + "\n", + "\n", + "Clean->Analyse\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "Visualise\n", + "\n", + "Visualise\n", + "\n", + "\n", + "\n", + "Visualise->Analyse\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "Transform\n", + "\n", + "Transform\n", + "\n", + "\n", + "\n", + "Analyse->Transform\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "Communicate\n", + "\n", + "Communicate\n", + "\n", + "\n", + "\n", + "Analyse->Communicate\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "Transform->Visualise\n", + "\n", + "\n", + "\n", + "\n", + "\n" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "# | echo: false\n", "import graphviz\n", @@ -138,14 +240,22 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 3, "id": "26faf349", "metadata": { "tags": [ "remove-input" ] }, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Compiled with Python version: 3.12.13 (main, Mar 3 2026, 12:39:30) [Clang 17.0.0 (clang-1700.6.3.2)]\n" + ] + } + ], "source": [ "# | echo: false\n", "import sys\n", @@ -187,7 +297,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.12.12" + "version": "3.12.13" }, "toc-showtags": true }, diff --git a/iteration.ipynb b/iteration.ipynb index fc1452a..74021f3 100644 --- a/iteration.ipynb +++ b/iteration.ipynb @@ -21,12 +21,12 @@ "\n", "One tool for reducing duplication is functions, which reduce duplication by identifying repeated patterns of code and extract them out into independent pieces that can be easily reused and updated. Another tool for reducing duplication is *iteration*, which helps you when you need to do the same thing to multiple inputs: repeating the same operation on different columns, or on different datasets.\n", "\n", - "In this chapter you'll learn about iteration in three ways: explicit iteration, using for loops and while loops; iteration via comprehensions (eg list comprehensions); and iteration for **pandas** data frames." + "In this chapter you'll learn about iteration in three ways: explicit iteration, using for loops and while loops; iteration via comprehensions (eg list comprehensions); and iteration for **polars** data frames." ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "id": "51a55374", "metadata": { "tags": [ @@ -51,7 +51,7 @@ "source": [ "### Prerequisites\n", "\n", - "This chapter will use the **pandas** data analysis package." + "This chapter will use the **polars** data analysis package." ] }, { @@ -68,10 +68,21 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 2, "id": "a2bbd41c", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Lovelace\n", + "Smith\n", + "Pigou\n", + "Babbage\n" + ] + } + ], "source": [ "name_list = [\"Lovelace\", \"Smith\", \"Pigou\", \"Babbage\"]\n", "\n", @@ -106,10 +117,19 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 3, "id": "835ebda7", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "hello\n", + "---end entry---\n" + ] + } + ], "source": [ "for entry in [\"hello\"]:\n", " print(entry)\n", @@ -126,10 +146,27 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 4, "id": "2a19ac2e", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "h\n", + "---end entry---\n", + "e\n", + "---end entry---\n", + "l\n", + "---end entry---\n", + "l\n", + "---end entry---\n", + "o\n", + "---end entry---\n" + ] + } + ], "source": [ "for entry in \"hello\":\n", " print(entry)\n", @@ -150,10 +187,21 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 5, "id": "239e133f", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The name in position 0 is Lovelace\n", + "The name in position 1 is Smith\n", + "The name in position 2 is Hopper\n", + "The name in position 3 is Babbage\n" + ] + } + ], "source": [ "name_list = [\"Lovelace\", \"Smith\", \"Hopper\", \"Babbage\"]\n", "\n", @@ -171,10 +219,21 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 6, "id": "b66c5c53", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The name in position 1 is Lovelace\n", + "The name in position 2 is Smith\n", + "The name in position 3 is Hopper\n", + "The name in position 4 is Babbage\n" + ] + } + ], "source": [ "for i, name in enumerate(name_list, start=1):\n", " print(f\"The name in position {i} is {name}\")" @@ -190,10 +249,21 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 7, "id": "010239fe", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "In Paris, the temperature is 28 degrees C today.\n", + "In London, the temperature is 22 degrees C today.\n", + "In Seville, the temperature is 36 degrees C today.\n", + "In Wellesley, the temperature is 29 degrees C today.\n" + ] + } + ], "source": [ "cities_to_temps = {\"Paris\": 28, \"London\": 22, \"Seville\": 36, \"Wellesley\": 29}\n", "\n", @@ -217,10 +287,21 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 8, "id": "8ea3efc5", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Ada Lovelace\n", + "Adam Smith\n", + "Grace Hopper\n", + "Charles Babbage\n" + ] + } + ], "source": [ "first_names = [\"Ada\", \"Adam\", \"Grace\", \"Charles\"]\n", "last_names = [\"Lovelace\", \"Smith\", \"Hopper\", \"Babbage\"]\n", @@ -257,10 +338,21 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 9, "id": "7efed381", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "[51, 52, 53, 54, 55, 56, 57, 58, 59, 60]" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "num_list = range(50, 60)\n", "[1 + num for num in num_list]" @@ -286,10 +378,18 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 10, "id": "722fda21", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[3, 6, 9, 12, 15, 18, 21, 24, 27, 30, 33, 36, 39]\n" + ] + } + ], "source": [ "number_list = range(1, 40)\n", "divide_list = [x for x in number_list if x % 3 == 0]\n", @@ -308,10 +408,18 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 11, "id": "b6e80d6b", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "['Adam Smith', 'leonara smith']\n" + ] + } + ], "source": [ "names_list = [\"Joe Bloggs\", \"Adam Smith\", \"Sandra Noone\", \"leonara smith\"]\n", "smith_list = [x for x in names_list if \"smith\" in x.lower()]\n", @@ -330,10 +438,18 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 12, "id": "f348bfb6", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "['Not Smith!', 'Adam Smith', 'Not Smith!', 'leonara smith']\n" + ] + } + ], "source": [ "names_list = [\"Joe Bloggs\", \"Adam Smith\", \"Sandra Noone\", \"leonara smith\"]\n", "smith_list = [x if \"smith\" in x.lower() else \"Not Smith!\" for x in names_list]\n", @@ -350,10 +466,18 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 13, "id": "74e4fcc7", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "['Ada Lovelace', 'Adam Smith', 'Grace Hopper', 'Charles Babbage']\n" + ] + } + ], "source": [ "first_names = [\"Ada\", \"Adam\", \"Grace\", \"Charles\"]\n", "last_names = [\"Lovelace\", \"Smith\", \"Hopper\", \"Babbage\"]\n", @@ -371,10 +495,18 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 14, "id": "2c82cf1f", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[['Ada Lovelace', 'Adam Lovelace'], ['Ada Smith', 'Adam Smith']]\n" + ] + } + ], "source": [ "first_names = [\"Ada\", \"Adam\"]\n", "last_names = [\"Lovelace\", \"Smith\"]\n", @@ -394,10 +526,21 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 15, "id": "acef16ae", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "{'Ada': 'Lovelace', 'Adam': 'Smith'}" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "{key: value for key, value in zip(first_names, last_names)}" ] @@ -420,10 +563,28 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 16, "id": "3e47ba02", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "10\n", + "9\n", + "8\n", + "7\n", + "6\n", + "5\n", + "4\n", + "3\n", + "2\n", + "1\n", + "execution complete\n" + ] + } + ], "source": [ "n = 10\n", "while n > 0:\n", @@ -452,11 +613,13 @@ "id": "5ec0643e", "metadata": {}, "source": [ - "## Iteration with **pandas** Data Frames\n", + "## Iteration with **polars** Data Frames\n", "\n", - "For loops, while loops, and comprehensions all work on **pandas** data frames, but they are generally a bad way to get things done because they are slow and not memory efficient. To aid cases where iteration is needed, **pandas** has built-in methods for iteration depending on what you need to do.\n", + "For loops, while loops, and comprehensions can be used with data frames, but in **Polars**, they are even more strongly discouraged than in pandas. **Polars** is built on a columnar, vectorized, and expression-based engine, so row-by-row iteration breaks performance and prevents optimizations.\n", "\n", "These built-in methods for iteration have an overlap with what we've seen in @sec-data-transform but we'll dig a little deeper into `assign()`/assignment operations, `apply()`, and `eval()` here.\n", + "\n", + "Instead of iterating, **Polars** encourages you to use expressions and lazy evaluation, which are much faster and more memory efficient.\n", "\n" ] }, @@ -474,15 +637,48 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 17, "id": "b3116809", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "shape: (6, 4)
abcd
f64f64f64f64
-0.405242-1.3517780.4098180.975997
1.425904-1.610260.456314-0.371164
-0.555008-0.13544-0.3194540.612513
1.842544-1.7097670.0183770.304892
0.926347-1.3718410.3992051.433585
0.649944-0.011038-0.904321-0.5888
" + ], + "text/plain": [ + "shape: (6, 4)\n", + "┌───────────┬───────────┬───────────┬───────────┐\n", + "│ a ┆ b ┆ c ┆ d │\n", + "│ --- ┆ --- ┆ --- ┆ --- │\n", + "│ f64 ┆ f64 ┆ f64 ┆ f64 │\n", + "╞═══════════╪═══════════╪═══════════╪═══════════╡\n", + "│ -0.405242 ┆ -1.351778 ┆ 0.409818 ┆ 0.975997 │\n", + "│ 1.425904 ┆ -1.61026 ┆ 0.456314 ┆ -0.371164 │\n", + "│ -0.555008 ┆ -0.13544 ┆ -0.319454 ┆ 0.612513 │\n", + "│ 1.842544 ┆ -1.709767 ┆ 0.018377 ┆ 0.304892 │\n", + "│ 0.926347 ┆ -1.371841 ┆ 0.399205 ┆ 1.433585 │\n", + "│ 0.649944 ┆ -0.011038 ┆ -0.904321 ┆ -0.5888 │\n", + "└───────────┴───────────┴───────────┴───────────┘" + ] + }, + "execution_count": 18, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "import numpy as np\n", - "import pandas as pd\n", + "import polars as pl\n", "\n", - "df = pd.DataFrame(np.random.normal(size=(6, 4)), columns=[\"a\", \"b\", \"c\", \"d\"])\n", + "df = pl.DataFrame(np.random.normal(size=(6, 4)), schema=[\"a\", \"b\", \"c\", \"d\"])\n", "df" ] }, @@ -491,27 +687,88 @@ "id": "0e826ad5", "metadata": {}, "source": [ - "**pandas** has many built-in functions that are already built to iterate over rows and columns; for example, to compute the median of rows or columns respectively:" + "**polars** has built-in expressions designed to operate over columns and rows. For example, to compute the median:" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 18, "id": "ac909c2f", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "shape: (1, 4)
abcd
f64f64f64f64
0.788146-1.361810.2087910.458703
" + ], + "text/plain": [ + "shape: (1, 4)\n", + "┌──────────┬──────────┬──────────┬──────────┐\n", + "│ a ┆ b ┆ c ┆ d │\n", + "│ --- ┆ --- ┆ --- ┆ --- │\n", + "│ f64 ┆ f64 ┆ f64 ┆ f64 │\n", + "╞══════════╪══════════╪══════════╪══════════╡\n", + "│ 0.788146 ┆ -1.36181 ┆ 0.208791 ┆ 0.458703 │\n", + "└──────────┴──────────┴──────────┴──────────┘" + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "df.median(axis=\"rows\") # can also use axis=1" + "df.select(pl.all().median())" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 19, "id": "96426002", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "shape: (6, 1)
row_median
f64
0.002288
0.042575
-0.227447
0.161635
0.662776
-0.299919
" + ], + "text/plain": [ + "shape: (6, 1)\n", + "┌────────────┐\n", + "│ row_median │\n", + "│ --- │\n", + "│ f64 │\n", + "╞════════════╡\n", + "│ 0.002288 │\n", + "│ 0.042575 │\n", + "│ -0.227447 │\n", + "│ 0.161635 │\n", + "│ 0.662776 │\n", + "│ -0.299919 │\n", + "└────────────┘" + ] + }, + "execution_count": 20, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "df.median(axis=\"columns\") # can also use axis=0" + "df.select(pl.concat_list(pl.all()).list.median().alias(\"row_median\"))" ] }, { @@ -524,10 +781,18 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 20, "id": "060b6815", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "291 μs ± 1.95 μs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)\n" + ] + } + ], "source": [ "# Do not do this!\n", "\n", @@ -535,7 +800,7 @@ "def add_five_slow(df):\n", " for i in range(len(df)):\n", " for j in range(len(df.columns)):\n", - " df.iloc[i, j] = df.iloc[i, j] + 5\n", + " df[i, j] = df[i, j] + 5\n", "\n", "\n", "%timeit add_five_slow(df)" @@ -546,15 +811,23 @@ "id": "8246132e", "metadata": {}, "source": [ - "But to do this, every individual cell must be accessed and operated on—so it is very slow, taking milliseconds. **pandas** has far faster ways of performing the same operation. For simple operations on data frames with consistent type, you can simply add five to the whole data frame:" + "But to do this, every individual cell must be accessed and operated on—so it is very slow, taking milliseconds. **polars** has far faster ways of performing the same operation. For simple operations on data frames with consistent type, you can simply add five to the whole data frame:" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 21, "id": "1a48ae52", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "56.7 μs ± 601 ns per loop (mean ± std. dev. of 7 runs, 10,000 loops each)\n" + ] + } + ], "source": [ "%timeit df + 5" ] @@ -572,19 +845,19 @@ "id": "7313616e", "metadata": {}, "source": [ - "This also works on a per column basis, so you can do `df[\"a\"] = df[\"a\"] + 5` and so on.\n", + "This also works on a per column basis, so you can do `df.with_columns(pl.col(\"a\") + 5)` and so on.\n", "\n", - "These operations have equivalents using the `assign()` operator, which allows for *method chaining*; stringing multiple operations together. The `assign()` operator version of `df[\"new_a\"] = df[\"a\"] + 5` would be" + "These operations have equivalents using method chaining; stringing multiple operations together. The version of `df.with_columns(new_a = pl.col(\"a\") + 5)` would be:" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 22, "id": "f7391dc5", "metadata": {}, "outputs": [], "source": [ - "df = df.assign(new_a=lambda x: x[\"a\"] + 5)" + "df = df.with_columns(new_a=pl.col(\"a\") + 5)" ] }, { @@ -592,19 +865,63 @@ "id": "76aec162", "metadata": {}, "source": [ - "### Apply\n", + "### Expressions (Polars' Alternative to apply)\n", + "\n", + "What happens if you have a more complicated operation you want to perform? In pandas, you might reach for `apply()`. In **polars**, you almost never need an equivalent because its expression API is incredibly expressive.\n", "\n", - "What happens if you have a more complicated function you want to iterate over? This is where **pandas**' `apply()` comes in, and can be used with assignment. `apply()` can also be used across rows or columns. Like `assign()`, it can be combined with a lambda function and used with either the whole data frame or just a column (in which case no need to specify `axis=`)." + "Most \"complicated\" operations can be expressed directly using **polars'** built-in expressions:" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 23, "id": "31adcb3f", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "shape: (6, 6)
abcdnew_aresult
f64f64f64f64f64f64
40554.59475840553.64822240555.40981840555.97599740559.594758-7.814557
40556.42590440553.3897440555.45631440554.62883640561.425904-6.288454
40554.44499240554.8645640554.68054640555.61251340559.444992-6.018383
40556.84254440553.29023340555.01837740555.30489240561.842544-5.533329
40555.92634740553.62815940555.39920540556.43358540560.926347-6.492421
40555.64994440554.98896240554.09567940554.411240560.649944-4.104063
" + ], + "text/plain": [ + "shape: (6, 6)\n", + "┌──────────────┬──────────────┬──────────────┬──────────────┬──────────────┬───────────┐\n", + "│ a ┆ b ┆ c ┆ d ┆ new_a ┆ result │\n", + "│ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │\n", + "│ f64 ┆ f64 ┆ f64 ┆ f64 ┆ f64 ┆ f64 │\n", + "╞══════════════╪══════════════╪══════════════╪══════════════╪══════════════╪═══════════╡\n", + "│ 40554.594758 ┆ 40553.648222 ┆ 40555.409818 ┆ 40555.975997 ┆ 40559.594758 ┆ -7.814557 │\n", + "│ 40556.425904 ┆ 40553.38974 ┆ 40555.456314 ┆ 40554.628836 ┆ 40561.425904 ┆ -6.288454 │\n", + "│ 40554.444992 ┆ 40554.86456 ┆ 40554.680546 ┆ 40555.612513 ┆ 40559.444992 ┆ -6.018383 │\n", + "│ 40556.842544 ┆ 40553.290233 ┆ 40555.018377 ┆ 40555.304892 ┆ 40561.842544 ┆ -5.533329 │\n", + "│ 40555.926347 ┆ 40553.628159 ┆ 40555.399205 ┆ 40556.433585 ┆ 40560.926347 ┆ -6.492421 │\n", + "│ 40555.649944 ┆ 40554.988962 ┆ 40554.095679 ┆ 40554.4112 ┆ 40560.649944 ┆ -4.104063 │\n", + "└──────────────┴──────────────┴──────────────┴──────────────┴──────────────┴───────────┘" + ] + }, + "execution_count": 24, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "df.apply(lambda x: x[\"a\"] - x[\"new_a\"].mean() * x[\"c\"] / x[\"b\"], axis=1)" + "# Don't do this (slow, row-wise)\n", + "mean_new_a = df.select(pl.col(\"new_a\").mean()).item()\n", + "df.with_columns(\n", + " result=pl.struct([\"a\", \"b\", \"c\"]).map_elements(\n", + " lambda x: x[\"a\"] - mean_new_a * x[\"c\"] / x[\"b\"], return_dtype=pl.Float64\n", + " )\n", + ")\n", + "\n", + "# Do this instead (fast, vectorized)\n", + "df.with_columns(result=pl.col(\"a\") - pl.col(\"new_a\").mean() * pl.col(\"c\") / pl.col(\"b\"))" ] }, { @@ -612,24 +929,7 @@ "id": "78b558f4", "metadata": {}, "source": [ - "Note that this is just an example: you could still do this entire operation without using apply! But you will sometimes find yourself with cases where you do need to use it.\n", - "\n", - "Apply also works with functions, including user-defined functions:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "136d435d", - "metadata": {}, - "outputs": [], - "source": [ - "def complicated_function(x):\n", - " return x - x.mean()\n", - "\n", - "\n", - "df = df.apply(complicated_function, axis=1)\n", - "df" + "The first expression would work, but it evaluates the computation row by row using a **python lambda**, which is slow and prevents **polars** from optimizing the query. The second approach uses native expressions, allowing **polars** to execute the computation efficiently in a fully vectorized and optimized manner." ] }, { @@ -637,19 +937,50 @@ "id": "171be2c9", "metadata": {}, "source": [ - "### Eval(uate)\n", - "\n", - "`eval()` evaluates a string describing operations on DataFrame columns to create new columns. It operates on columns only, not rows or elements. Here's an example:" + "In **polars**, there's no `eval()` — you use expressions directly instead:\n" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 24, "id": "8d9defd3", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "shape: (6, 6)
abcdnew_aratio
f64f64f64f64f64f64
40554.59475840553.64822240555.40981840555.97599740559.5947580.999877
40556.42590440553.3897440555.45631440554.62883640561.4259040.999877
40554.44499240554.8645640554.68054640555.61251340559.4449920.999877
40556.84254440553.29023340555.01837740555.30489240561.8425440.999877
40555.92634740553.62815940555.39920540556.43358540560.9263470.999877
40555.64994440554.98896240554.09567940554.411240560.6499440.999877
" + ], + "text/plain": [ + "shape: (6, 6)\n", + "┌──────────────┬──────────────┬──────────────┬──────────────┬──────────────┬──────────┐\n", + "│ a ┆ b ┆ c ┆ d ┆ new_a ┆ ratio │\n", + "│ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │\n", + "│ f64 ┆ f64 ┆ f64 ┆ f64 ┆ f64 ┆ f64 │\n", + "╞══════════════╪══════════════╪══════════════╪══════════════╪══════════════╪══════════╡\n", + "│ 40554.594758 ┆ 40553.648222 ┆ 40555.409818 ┆ 40555.975997 ┆ 40559.594758 ┆ 0.999877 │\n", + "│ 40556.425904 ┆ 40553.38974 ┆ 40555.456314 ┆ 40554.628836 ┆ 40561.425904 ┆ 0.999877 │\n", + "│ 40554.444992 ┆ 40554.86456 ┆ 40554.680546 ┆ 40555.612513 ┆ 40559.444992 ┆ 0.999877 │\n", + "│ 40556.842544 ┆ 40553.290233 ┆ 40555.018377 ┆ 40555.304892 ┆ 40561.842544 ┆ 0.999877 │\n", + "│ 40555.926347 ┆ 40553.628159 ┆ 40555.399205 ┆ 40556.433585 ┆ 40560.926347 ┆ 0.999877 │\n", + "│ 40555.649944 ┆ 40554.988962 ┆ 40554.095679 ┆ 40554.4112 ┆ 40560.649944 ┆ 0.999877 │\n", + "└──────────────┴──────────────┴──────────────┴──────────────┴──────────────┴──────────┘" + ] + }, + "execution_count": 25, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "df[\"ratio\"] = df.eval(\"a / new_a\")\n", + "df = df.with_columns((pl.col(\"a\") / pl.col(\"new_a\")).alias(\"ratio\"))\n", "df" ] }, @@ -658,7 +989,51 @@ "id": "8b275b5b", "metadata": {}, "source": [ - "Evaluate can also be used to create new boolean columns using, for example, a string `\"a > 0.5\"` in the above example." + "You can also create boolean columns the same way:" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "id": "f301c8cb", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "shape: (6, 7)
abcdnew_aratioa_gt_0.5
f64f64f64f64f64f64bool
40554.59475840553.64822240555.40981840555.97599740559.5947580.999877true
40556.42590440553.3897440555.45631440554.62883640561.4259040.999877true
40554.44499240554.8645640554.68054640555.61251340559.4449920.999877true
40556.84254440553.29023340555.01837740555.30489240561.8425440.999877true
40555.92634740553.62815940555.39920540556.43358540560.9263470.999877true
40555.64994440554.98896240554.09567940554.411240560.6499440.999877true
" + ], + "text/plain": [ + "shape: (6, 7)\n", + "┌──────────────┬──────────────┬──────────────┬──────────────┬──────────────┬──────────┬──────────┐\n", + "│ a ┆ b ┆ c ┆ d ┆ new_a ┆ ratio ┆ a_gt_0.5 │\n", + "│ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │\n", + "│ f64 ┆ f64 ┆ f64 ┆ f64 ┆ f64 ┆ f64 ┆ bool │\n", + "╞══════════════╪══════════════╪══════════════╪══════════════╪══════════════╪══════════╪══════════╡\n", + "│ 40554.594758 ┆ 40553.648222 ┆ 40555.409818 ┆ 40555.975997 ┆ 40559.594758 ┆ 0.999877 ┆ true │\n", + "│ 40556.425904 ┆ 40553.38974 ┆ 40555.456314 ┆ 40554.628836 ┆ 40561.425904 ┆ 0.999877 ┆ true │\n", + "│ 40554.444992 ┆ 40554.86456 ┆ 40554.680546 ┆ 40555.612513 ┆ 40559.444992 ┆ 0.999877 ┆ true │\n", + "│ 40556.842544 ┆ 40553.290233 ┆ 40555.018377 ┆ 40555.304892 ┆ 40561.842544 ┆ 0.999877 ┆ true │\n", + "│ 40555.926347 ┆ 40553.628159 ┆ 40555.399205 ┆ 40556.433585 ┆ 40560.926347 ┆ 0.999877 ┆ true │\n", + "│ 40555.649944 ┆ 40554.988962 ┆ 40554.095679 ┆ 40554.4112 ┆ 40560.649944 ┆ 0.999877 ┆ true │\n", + "└──────────────┴──────────────┴──────────────┴──────────────┴──────────────┴──────────┴──────────┘" + ] + }, + "execution_count": 26, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df = df.with_columns((pl.col(\"a\") > 0.5).alias(\"a_gt_0.5\"))\n", + "df" ] } ], @@ -687,7 +1062,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.12.12" + "version": "3.12.13" }, "toc-showtags": true }, diff --git a/joins.ipynb b/joins.ipynb index e58d2d3..a1b57a3 100644 --- a/joins.ipynb +++ b/joins.ipynb @@ -16,7 +16,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "id": "51a55374", "metadata": { "tags": [ @@ -64,10 +64,98 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 2, "id": "f5ef4f37", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " county pop\n", + "0 Los Angeles 9878554\n", + "1 Orange 2997033\n", + "2 Ventura 798364\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
countypop
ca0Los Angeles9878554
1Orange2997033
2Ventura798364
il0Cook5285107
1DeKalb103729
2Will673586
\n", + "
" + ], + "text/plain": [ + " county pop\n", + "ca 0 Los Angeles 9878554\n", + " 1 Orange 2997033\n", + " 2 Ventura 798364\n", + "il 0 Cook 5285107\n", + " 1 DeKalb 103729\n", + " 2 Will 673586" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "import pandas as pd\n", "\n", @@ -130,10 +218,93 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 3, "id": "53c66d5d", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
key1key2ABCD
0K0K0A0B0C0D0
1K1K0A2B2C1D1
2K1K0A2B2C2D2
3K2K0NaNNaNC3D3
\n", + "
" + ], + "text/plain": [ + " key1 key2 A B C D\n", + "0 K0 K0 A0 B0 C0 D0\n", + "1 K1 K0 A2 B2 C1 D1\n", + "2 K1 K0 A2 B2 C2 D2\n", + "3 K2 K0 NaN NaN C3 D3" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "left = pd.DataFrame(\n", " {\n", @@ -167,10 +338,83 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 4, "id": "5e73608f", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
key1key2ABCD
0K0K0A0B0C0D0
1K1K0A2B2C1D1
2K1K0A2B2C2D2
\n", + "
" + ], + "text/plain": [ + " key1 key2 A B C D\n", + "0 K0 K0 A0 B0 C0 D0\n", + "1 K1 K0 A2 B2 C1 D1\n", + "2 K1 K0 A2 B2 C2 D2" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "pd.merge(left, right, on=[\"key1\", \"key2\"], how=\"inner\")" ] @@ -187,10 +431,120 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 5, "id": "5d209fb9", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
key1key2ABCD_merge
0K0K0A0B0C0D0both
1K0K1A1B1NaNNaNleft_only
2K1K0A2B2C1D1both
3K1K0A2B2C2D2both
4K2K0NaNNaNC3D3right_only
5K2K1A3B3NaNNaNleft_only
\n", + "
" + ], + "text/plain": [ + " key1 key2 A B C D _merge\n", + "0 K0 K0 A0 B0 C0 D0 both\n", + "1 K0 K1 A1 B1 NaN NaN left_only\n", + "2 K1 K0 A2 B2 C1 D1 both\n", + "3 K1 K0 A2 B2 C2 D2 both\n", + "4 K2 K0 NaN NaN C3 D3 right_only\n", + "5 K2 K1 A3 B3 NaN NaN left_only" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "pd.merge(left, right, on=[\"key1\", \"key2\"], how=\"outer\", indicator=True)" ] @@ -255,7 +609,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.12.12" + "version": "3.12.13" }, "toc-showtags": true }, diff --git a/missing-values.ipynb b/missing-values.ipynb index 262f015..08aed7c 100644 --- a/missing-values.ipynb +++ b/missing-values.ipynb @@ -15,7 +15,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "id": "51a55374", "metadata": { "tags": [ @@ -61,10 +61,68 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 2, "id": "535ef959", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
numbers
05.0
127.3
2NaN
3-16.0
\n", + "
" + ], + "text/plain": [ + " numbers\n", + "0 5.0\n", + "1 27.3\n", + "2 NaN\n", + "3 -16.0" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "import numpy as np\n", "import pandas as pd\n", @@ -83,10 +141,73 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 3, "id": "bf317bce", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
numbers
0<NA>
127.3
2NaN
3-16
4None
\n", + "
" + ], + "text/plain": [ + " numbers\n", + "0 \n", + "1 27.3\n", + "2 NaN\n", + "3 -16\n", + "4 None" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "numbers = pd.DataFrame([pd.NA, 27.3, np.nan, -16, None], columns=[\"numbers\"])\n", "numbers" @@ -102,10 +223,78 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 4, "id": "b1c864d9", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
fruit
0orange
1NaN
2apple
3None
4banana
5<NA>
\n", + "
" + ], + "text/plain": [ + " fruit\n", + "0 orange\n", + "1 NaN\n", + "2 apple\n", + "3 None\n", + "4 banana\n", + "5 " + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "fruits = pd.DataFrame(\n", " [\"orange\", np.nan, \"apple\", None, \"banana\", pd.NA], columns=[\"fruit\"]\n", @@ -123,10 +312,78 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 5, "id": "cbcfe630", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
fruit
0False
1True
2False
3True
4False
5True
\n", + "
" + ], + "text/plain": [ + " fruit\n", + "0 False\n", + "1 True\n", + "2 False\n", + "3 True\n", + "4 False\n", + "5 True" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "fruits.isna()" ] @@ -151,10 +408,83 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 6, "id": "c96f89e6", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ABCD
0NaN2.0NaN0
13.04.0NaN1
25.0NaNNaN<NA>
3NaN3.0NaN4
\n", + "
" + ], + "text/plain": [ + " A B C D\n", + "0 NaN 2.0 NaN 0\n", + "1 3.0 4.0 NaN 1\n", + "2 5.0 NaN NaN \n", + "3 NaN 3.0 NaN 4" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "nan_df = pd.DataFrame(\n", " [\n", @@ -179,10 +509,91 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 7, "id": "a6bc5fe2", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/var/folders/j1/0d3dmg7d6gnbc_zwrh8t9gcw0000gn/T/ipykernel_61211/4054961691.py:1: FutureWarning: Downcasting object dtype arrays on .fillna, .ffill, .bfill is deprecated and will change in a future version. Call result.infer_objects(copy=False) instead. To opt-in to the future behavior, set `pd.set_option('future.no_silent_downcasting', True)`\n", + " nan_df.fillna(0)\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ABCD
00.02.00.00
13.04.00.01
25.00.00.00
30.03.00.04
\n", + "
" + ], + "text/plain": [ + " A B C D\n", + "0 0.0 2.0 0.0 0\n", + "1 3.0 4.0 0.0 1\n", + "2 5.0 0.0 0.0 0\n", + "3 0.0 3.0 0.0 4" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "nan_df.fillna(0)" ] @@ -197,10 +608,91 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 8, "id": "e65b67c2", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/var/folders/j1/0d3dmg7d6gnbc_zwrh8t9gcw0000gn/T/ipykernel_61211/2397886090.py:1: FutureWarning: Downcasting object dtype arrays on .fillna, .ffill, .bfill is deprecated and will change in a future version. Call result.infer_objects(copy=False) instead. To opt-in to the future behavior, set `pd.set_option('future.no_silent_downcasting', True)`\n", + " nan_df.fillna(value={\"A\": 0, \"B\": 1, \"C\": 2, \"D\": 3})\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ABCD
00.02.02.00
13.04.02.01
25.01.02.03
30.03.02.04
\n", + "
" + ], + "text/plain": [ + " A B C D\n", + "0 0.0 2.0 2.0 0\n", + "1 3.0 4.0 2.0 1\n", + "2 5.0 1.0 2.0 3\n", + "3 0.0 3.0 2.0 4" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "nan_df.fillna(value={\"A\": 0, \"B\": 1, \"C\": 2, \"D\": 3})" ] @@ -215,20 +707,186 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 9, "id": "2a19e196", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/var/folders/j1/0d3dmg7d6gnbc_zwrh8t9gcw0000gn/T/ipykernel_61211/1353804149.py:1: FutureWarning: DataFrame.fillna with 'method' is deprecated and will raise in a future version. Use obj.ffill() or obj.bfill() instead.\n", + " nan_df.fillna(method=\"ffill\")\n", + "/var/folders/j1/0d3dmg7d6gnbc_zwrh8t9gcw0000gn/T/ipykernel_61211/1353804149.py:1: FutureWarning: Downcasting object dtype arrays on .fillna, .ffill, .bfill is deprecated and will change in a future version. Call result.infer_objects(copy=False) instead. To opt-in to the future behavior, set `pd.set_option('future.no_silent_downcasting', True)`\n", + " nan_df.fillna(method=\"ffill\")\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ABCD
0NaN2.0NaN0
13.04.0NaN1
25.04.0NaN1
35.03.0NaN4
\n", + "
" + ], + "text/plain": [ + " A B C D\n", + "0 NaN 2.0 NaN 0\n", + "1 3.0 4.0 NaN 1\n", + "2 5.0 4.0 NaN 1\n", + "3 5.0 3.0 NaN 4" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "nan_df.fillna(method=\"ffill\")" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 10, "id": "8b5b001e", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/var/folders/j1/0d3dmg7d6gnbc_zwrh8t9gcw0000gn/T/ipykernel_61211/2505504399.py:1: FutureWarning: DataFrame.fillna with 'method' is deprecated and will raise in a future version. Use obj.ffill() or obj.bfill() instead.\n", + " nan_df.fillna(method=\"bfill\")\n", + "/var/folders/j1/0d3dmg7d6gnbc_zwrh8t9gcw0000gn/T/ipykernel_61211/2505504399.py:1: FutureWarning: Downcasting object dtype arrays on .fillna, .ffill, .bfill is deprecated and will change in a future version. Call result.infer_objects(copy=False) instead. To opt-in to the future behavior, set `pd.set_option('future.no_silent_downcasting', True)`\n", + " nan_df.fillna(method=\"bfill\")\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ABCD
03.02.0NaN0
13.04.0NaN1
25.03.0NaN4
3NaN3.0NaN4
\n", + "
" + ], + "text/plain": [ + " A B C D\n", + "0 3.0 2.0 NaN 0\n", + "1 3.0 4.0 NaN 1\n", + "2 5.0 3.0 NaN 4\n", + "3 NaN 3.0 NaN 4" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "nan_df.fillna(method=\"bfill\")" ] @@ -251,10 +909,91 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 11, "id": "558e7a23", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/var/folders/j1/0d3dmg7d6gnbc_zwrh8t9gcw0000gn/T/ipykernel_61211/1730877720.py:1: FutureWarning: Downcasting object dtype arrays on .fillna, .ffill, .bfill is deprecated and will change in a future version. Call result.infer_objects(copy=False) instead. To opt-in to the future behavior, set `pd.set_option('future.no_silent_downcasting', True)`\n", + " nan_df.fillna(value={\"A\": 0, \"B\": 1, \"C\": 2, \"D\": 3}, limit=1)\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ABCD
00.02.02.00
13.04.0NaN1
25.01.0NaN3
3NaN3.0NaN4
\n", + "
" + ], + "text/plain": [ + " A B C D\n", + "0 0.0 2.0 2.0 0\n", + "1 3.0 4.0 NaN 1\n", + "2 5.0 1.0 NaN 3\n", + "3 NaN 3.0 NaN 4" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "nan_df.fillna(value={\"A\": 0, \"B\": 1, \"C\": 2, \"D\": 3}, limit=1)" ] @@ -269,20 +1008,84 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 12, "id": "0e3a81c1", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "1 3.0\n", + "2 5.0\n", + "Name: A, dtype: float64" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "nan_df[\"A\"].dropna(axis=0) # on a single column" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 13, "id": "9c1e312e", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
0
1
2
3
\n", + "
" + ], + "text/plain": [ + "Empty DataFrame\n", + "Columns: []\n", + "Index: [0, 1, 2, 3]" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "nan_df.dropna(axis=1)" ] @@ -297,10 +1100,83 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 14, "id": "3296ea35", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ABCD
0NaN2.0NaN0
13.04.0NaN1
25.0NaNNaN<NA>
3NaN3.0NaN4
\n", + "
" + ], + "text/plain": [ + " A B C D\n", + "0 NaN 2.0 NaN 0\n", + "1 3.0 4.0 NaN 1\n", + "2 5.0 NaN NaN \n", + "3 NaN 3.0 NaN 4" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "nan_df.dropna(how=\"all\")" ] @@ -323,10 +1199,67 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 15, "id": "d3e1af81", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ABCD
13.04.0NaN1
25.0NaNNaN<NA>
\n", + "
" + ], + "text/plain": [ + " A B C D\n", + "1 3.0 4.0 NaN 1\n", + "2 5.0 NaN NaN " + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "nan_df[nan_df[\"A\"].notna()]" ] @@ -345,10 +1278,71 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 16, "id": "45668d12", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ABC
0345
1-74-99
2-9965
\n", + "
" + ], + "text/plain": [ + " A B C\n", + "0 3 4 5\n", + "1 -7 4 -99\n", + "2 -99 6 5" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "stata_df = pd.DataFrame([[3, 4, 5], [-7, 4, -99], [-99, 6, 5]], columns=list(\"ABC\"))\n", "\n", @@ -365,10 +1359,71 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 17, "id": "c43070f4", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ABC
0345
1-74<NA>
2<NA>65
\n", + "
" + ], + "text/plain": [ + " A B C\n", + "0 3 4 5\n", + "1 -7 4 \n", + "2 6 5" + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "stata_df.replace({-99: pd.NA})" ] @@ -383,10 +1438,71 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 18, "id": "12cc567a", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ABC
0345
1<NA>4<NA>
2<NA>65
\n", + "
" + ], + "text/plain": [ + " A B C\n", + "0 3 4 5\n", + "1 4 \n", + "2 6 5" + ] + }, + "execution_count": 18, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "stata_df.replace({-99: pd.NA, -7: pd.NA})" ] @@ -413,10 +1529,99 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 19, "id": "057c5203", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
yearqtrprice
0202011.88
1202020.59
2202030.35
320204NaN
4202120.92
5202130.17
6202142.66
\n", + "
" + ], + "text/plain": [ + " year qtr price\n", + "0 2020 1 1.88\n", + "1 2020 2 0.59\n", + "2 2020 3 0.35\n", + "3 2020 4 NaN\n", + "4 2021 2 0.92\n", + "5 2021 3 0.17\n", + "6 2021 4 2.66" + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "stocks = pd.DataFrame(\n", " {\n", @@ -462,10 +1667,75 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 20, "id": "87ab37d0", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
qtr1234
year
20201.880.590.35NaN
2021NaN0.920.172.66
\n", + "
" + ], + "text/plain": [ + "qtr 1 2 3 4\n", + "year \n", + "2020 1.88 0.59 0.35 NaN\n", + "2021 NaN 0.92 0.17 2.66" + ] + }, + "execution_count": 20, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "stocks.pivot(columns=\"qtr\", values=\"price\", index=\"year\")" ] @@ -492,7 +1762,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 21, "id": "92a914e3", "metadata": {}, "outputs": [], @@ -517,10 +1787,78 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 22, "id": "cc904247", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
namesmokerage
0Ikaiano34
1Olettano88
2Leriahpreviously75
3Dashayno47
\n", + "
" + ], + "text/plain": [ + " name smoker age\n", + "0 Ikaia no 34\n", + "1 Oletta no 88\n", + "2 Leriah previously 75\n", + "3 Dashay no 47" + ] + }, + "execution_count": 22, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "health_cut = health.iloc[:-1, :]\n", "health_cut" @@ -536,10 +1874,25 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 23, "id": "546bc395", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "smoker\n", + "no 3\n", + "previously 1\n", + "yes 0\n", + "Name: count, dtype: int64" + ] + }, + "execution_count": 23, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "health_cut[\"smoker\"].value_counts()" ] @@ -554,10 +1907,33 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 24, "id": "efb6c6ab", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/var/folders/j1/0d3dmg7d6gnbc_zwrh8t9gcw0000gn/T/ipykernel_61211/3998383890.py:1: FutureWarning: The default of observed=False is deprecated and will be changed to True in a future version of pandas. Pass observed=False to retain current behavior or observed=True to adopt the future default and silence this warning.\n", + " health_cut.groupby(\"smoker\")[\"age\"].mean()\n" + ] + }, + { + "data": { + "text/plain": [ + "smoker\n", + "no 56.333333\n", + "previously 75.000000\n", + "yes NaN\n", + "Name: age, dtype: float64" + ] + }, + "execution_count": 24, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "health_cut.groupby(\"smoker\")[\"age\"].mean()" ] @@ -596,7 +1972,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.12.12" + "version": "3.12.13" }, "toc-showtags": true }, diff --git a/numbers.ipynb b/numbers.ipynb index 388db20..129d361 100644 --- a/numbers.ipynb +++ b/numbers.ipynb @@ -20,7 +20,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "id": "51a55374", "metadata": { "tags": [ @@ -39,7 +39,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 2, "id": "6c89ca3d", "metadata": {}, "outputs": [], @@ -62,10 +62,21 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 3, "id": "18f1ee4c", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "np.int64(336776)" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "flights[\"dest\"].count()" ] @@ -80,10 +91,33 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 4, "id": "161a24ea", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "dest\n", + "ORD 17283\n", + "ATL 17215\n", + "LAX 16174\n", + "BOS 15508\n", + "MCO 14082\n", + " ... \n", + "MTJ 15\n", + "SBN 10\n", + "ANC 8\n", + "LEX 1\n", + "LGA 1\n", + "Name: count, Length: 105, dtype: int64" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "flights[\"dest\"].value_counts()" ] @@ -98,10 +132,124 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 5, "id": "8554277a", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
mean_delaycount_flights
dest
ORD13.57048417283
ATL12.50982417215
LAX9.40134416174
BOS8.73061315508
MCO11.27599814082
.........
MTJ17.64285715
SBN21.10000010
ANC12.8750008
LGANaN1
LEX-9.0000001
\n", + "

105 rows × 2 columns

\n", + "
" + ], + "text/plain": [ + " mean_delay count_flights\n", + "dest \n", + "ORD 13.570484 17283\n", + "ATL 12.509824 17215\n", + "LAX 9.401344 16174\n", + "BOS 8.730613 15508\n", + "MCO 11.275998 14082\n", + "... ... ...\n", + "MTJ 17.642857 15\n", + "SBN 21.100000 10\n", + "ANC 12.875000 8\n", + "LGA NaN 1\n", + "LEX -9.000000 1\n", + "\n", + "[105 rows x 2 columns]" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "(\n", " flights.groupby([\"dest\"])\n", @@ -123,10 +271,111 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 6, "id": "061decae", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
miles
tailnum
D942DN3418
N0EGMQ250866
N10156115966
N102UW25722
N103US24619
......
N997DL54669
N998AT15432
N998DL66052
N999DN54623
N9EAMQ167317
\n", + "

4043 rows × 1 columns

\n", + "
" + ], + "text/plain": [ + " miles\n", + "tailnum \n", + "D942DN 3418\n", + "N0EGMQ 250866\n", + "N10156 115966\n", + "N102UW 25722\n", + "N103US 24619\n", + "... ...\n", + "N997DL 54669\n", + "N998AT 15432\n", + "N998DL 66052\n", + "N999DN 54623\n", + "N9EAMQ 167317\n", + "\n", + "[4043 rows x 1 columns]" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "(flights.groupby(\"tailnum\").agg(miles=(\"distance\", \"sum\")))" ] @@ -141,10 +390,111 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 7, "id": "ecdb5630", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
n_cancelled
dest
ABQ0
ACK0
ALB20
ANC0
ATL317
......
TPA59
TUL16
TVC5
TYS52
XNA25
\n", + "

105 rows × 1 columns

\n", + "
" + ], + "text/plain": [ + " n_cancelled\n", + "dest \n", + "ABQ 0\n", + "ACK 0\n", + "ALB 20\n", + "ANC 0\n", + "ATL 317\n", + "... ...\n", + "TPA 59\n", + "TUL 16\n", + "TVC 5\n", + "TYS 52\n", + "XNA 25\n", + "\n", + "[105 rows x 1 columns]" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "(flights.groupby(\"dest\").agg(n_cancelled=(\"dep_time\", lambda x: x.isnull().sum())))" ] @@ -188,10 +538,21 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 8, "id": "a473cd56", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "np.int64(4983)" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "flights[\"distance\"].max()" ] @@ -206,10 +567,67 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 9, "id": "96285702", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
xy
013
152
27<NA>
\n", + "
" + ], + "text/plain": [ + " x y\n", + "0 1 3\n", + "1 5 2\n", + "2 7 " + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "df = pd.DataFrame({\"x\": [1, 5, 7], \"y\": [3, 2, pd.NA]})\n", "df" @@ -225,10 +643,24 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 10, "id": "5bae5499", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "0 1\n", + "1 2\n", + "2 7\n", + "dtype: object" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "df.min(axis=1)" ] @@ -245,10 +677,23 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 11, "id": "3af6ce86", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]\n", + "divided by 3 gives\n", + "remainder:\n", + "[1, 2, 0, 1, 2, 0, 1, 2, 0, 1]\n", + "divisions:\n", + "[0, 0, 1, 1, 1, 2, 2, 2, 3, 3]\n" + ] + } + ], "source": [ "print([x for x in range(1, 11)])\n", "print(\"divided by 3 gives\")\n", @@ -268,10 +713,348 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 12, "id": "6be57e34", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
yearmonthdaydep_timesched_dep_timedep_delayarr_timesched_arr_timearr_delaycarrierflighttailnumorigindestair_timedistancehourminutetime_hour
0201311517.05152.0830.081911.0UA1545N14228EWRIAH227.014005152013-01-01T10:00:00Z
1201311533.05294.0850.083020.0UA1714N24211LGAIAH227.014165292013-01-01T10:00:00Z
2201311542.05402.0923.085033.0AA1141N619AAJFKMIA160.010895402013-01-01T10:00:00Z
3201311544.0545-1.01004.01022-18.0B6725N804JBJFKBQN183.015765452013-01-01T10:00:00Z
4201311554.0600-6.0812.0837-25.0DL461N668DNLGAATL116.0762602013-01-01T11:00:00Z
............................................................
3367712013930NaN1455NaNNaN1634NaN9E3393NaNJFKDCANaN21314552013-09-30T18:00:00Z
3367722013930NaN2200NaNNaN2312NaN9E3525NaNLGASYRNaN1982202013-10-01T02:00:00Z
3367732013930NaN1210NaNNaN1330NaNMQ3461N535MQLGABNANaN76412102013-09-30T16:00:00Z
3367742013930NaN1159NaNNaN1344NaNMQ3572N511MQLGACLENaN41911592013-09-30T15:00:00Z
3367752013930NaN840NaNNaN1020NaNMQ3531N839MQLGARDUNaN4318402013-09-30T12:00:00Z
\n", + "

336776 rows × 19 columns

\n", + "
" + ], + "text/plain": [ + " year month day dep_time sched_dep_time dep_delay arr_time \\\n", + "0 2013 1 1 517.0 515 2.0 830.0 \n", + "1 2013 1 1 533.0 529 4.0 850.0 \n", + "2 2013 1 1 542.0 540 2.0 923.0 \n", + "3 2013 1 1 544.0 545 -1.0 1004.0 \n", + "4 2013 1 1 554.0 600 -6.0 812.0 \n", + "... ... ... ... ... ... ... ... \n", + "336771 2013 9 30 NaN 1455 NaN NaN \n", + "336772 2013 9 30 NaN 2200 NaN NaN \n", + "336773 2013 9 30 NaN 1210 NaN NaN \n", + "336774 2013 9 30 NaN 1159 NaN NaN \n", + "336775 2013 9 30 NaN 840 NaN NaN \n", + "\n", + " sched_arr_time arr_delay carrier flight tailnum origin dest \\\n", + "0 819 11.0 UA 1545 N14228 EWR IAH \n", + "1 830 20.0 UA 1714 N24211 LGA IAH \n", + "2 850 33.0 AA 1141 N619AA JFK MIA \n", + "3 1022 -18.0 B6 725 N804JB JFK BQN \n", + "4 837 -25.0 DL 461 N668DN LGA ATL \n", + "... ... ... ... ... ... ... ... \n", + "336771 1634 NaN 9E 3393 NaN JFK DCA \n", + "336772 2312 NaN 9E 3525 NaN LGA SYR \n", + "336773 1330 NaN MQ 3461 N535MQ LGA BNA \n", + "336774 1344 NaN MQ 3572 N511MQ LGA CLE \n", + "336775 1020 NaN MQ 3531 N839MQ LGA RDU \n", + "\n", + " air_time distance hour minute time_hour \n", + "0 227.0 1400 5 15 2013-01-01T10:00:00Z \n", + "1 227.0 1416 5 29 2013-01-01T10:00:00Z \n", + "2 160.0 1089 5 40 2013-01-01T10:00:00Z \n", + "3 183.0 1576 5 45 2013-01-01T10:00:00Z \n", + "4 116.0 762 6 0 2013-01-01T11:00:00Z \n", + "... ... ... ... ... ... \n", + "336771 NaN 213 14 55 2013-09-30T18:00:00Z \n", + "336772 NaN 198 22 0 2013-10-01T02:00:00Z \n", + "336773 NaN 764 12 10 2013-09-30T16:00:00Z \n", + "336774 NaN 419 11 59 2013-09-30T15:00:00Z \n", + "336775 NaN 431 8 40 2013-09-30T12:00:00Z \n", + "\n", + "[336776 rows x 19 columns]" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "flights.assign(\n", " hour=lambda x: x[\"sched_dep_time\"] // 100,\n", @@ -291,10 +1074,79 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 13, "id": "a3ffc085", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
yearmoney
02001105.000000
12002110.250000
22003115.762500
32004121.550625
42005127.628156
\n", + "
" + ], + "text/plain": [ + " year money\n", + "0 2001 105.000000\n", + "1 2002 110.250000\n", + "2 2003 115.762500\n", + "3 2004 121.550625\n", + "4 2005 127.628156" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "import numpy as np\n", "\n", @@ -316,10 +1168,821 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 14, "id": "aaad2abb", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "image/svg+xml": [ + "\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " 2026-04-28T13:35:12.580055\n", + " image/svg+xml\n", + " \n", + " \n", + " Matplotlib v3.10.0, https://matplotlib.org/\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n" + ], + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], "source": [ "money.plot(x=\"year\", y=\"money\");" ] @@ -334,10 +1997,767 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 15, "id": "bbcf4d7c", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "image/svg+xml": [ + "\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " 2026-04-28T13:35:12.673392\n", + " image/svg+xml\n", + " \n", + " \n", + " Matplotlib v3.10.0, https://matplotlib.org/\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n" + ], + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], "source": [ "money.plot(x=\"year\", y=\"money\", logy=True);" ] @@ -366,10 +2786,79 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 16, "id": "ec1a09ca", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
yearmoney
02001105.00
12002110.25
22003115.76
32004121.55
42005127.63
\n", + "
" + ], + "text/plain": [ + " year money\n", + "0 2001 105.00\n", + "1 2002 110.25\n", + "2 2003 115.76\n", + "3 2004 121.55\n", + "4 2005 127.63" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "money.head().round(2)" ] @@ -384,10 +2873,79 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 17, "id": "9a306519", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
yearmoney
02001105.0
12002110.2
22003115.8
32004121.6
42005127.6
\n", + "
" + ], + "text/plain": [ + " year money\n", + "0 2001 105.0\n", + "1 2002 110.2\n", + "2 2003 115.8\n", + "3 2004 121.6\n", + "4 2005 127.6" + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "money.head().round({\"year\": 0, \"money\": 1})" ] @@ -402,10 +2960,79 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 18, "id": "fb9dfb59", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
yearmoney
452046900.0
4620471000.0
4720481000.0
4820491100.0
4920501100.0
\n", + "
" + ], + "text/plain": [ + " year money\n", + "45 2046 900.0\n", + "46 2047 1000.0\n", + "47 2048 1000.0\n", + "48 2049 1100.0\n", + "49 2050 1100.0" + ] + }, + "execution_count": 18, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "money.tail().round({\"year\": 0, \"money\": -2})" ] @@ -420,10 +3047,26 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 19, "id": "f0290c0a", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "0 100.0\n", + "1 110.0\n", + "2 120.0\n", + "3 120.0\n", + "4 130.0\n", + "Name: money, dtype: float64" + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "money[\"money\"].head().apply(lambda x: float(f'{float(f\"{x:.2g}\"):g}'))" ] @@ -438,10 +3081,21 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 20, "id": "6a6a4644", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "array([2., 2., 1.])" + ] + }, + "execution_count": 20, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "np.round([1.5, 2.5, 1.4])" ] @@ -456,10 +3110,22 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 21, "id": "6270c3cc", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "array([27.15101204, 15.41426421, 76.7650241 , 70.51407739, 88.22482077,\n", + " 51.56875497, 63.03191713, 86.16710762, 72.35185576, 87.16963342])" + ] + }, + "execution_count": 21, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "real_nums = 100 * np.random.random(size=10)\n", "real_nums" @@ -467,20 +3133,42 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 22, "id": "dc3608d6", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "array([28., 16., 77., 71., 89., 52., 64., 87., 73., 88.])" + ] + }, + "execution_count": 22, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "np.ceil(real_nums)" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 23, "id": "d577bb21", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "array([27., 15., 76., 70., 88., 51., 63., 86., 72., 87.])" + ] + }, + "execution_count": 23, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "np.floor(real_nums)" ] @@ -495,10 +3183,26 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 24, "id": "dca417b7", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "0 105.0\n", + "1 111.0\n", + "2 116.0\n", + "3 122.0\n", + "4 128.0\n", + "Name: money, dtype: float64" + ] + }, + "execution_count": 24, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "money[\"money\"].head().apply(np.ceil)" ] @@ -515,10 +3219,26 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 25, "id": "c5d77818", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "45 943.425818\n", + "46 1934.022928\n", + "47 2974.149892\n", + "48 4066.283205\n", + "49 5213.023184\n", + "Name: money, dtype: float64" + ] + }, + "execution_count": 25, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "money[\"money\"].tail().cumsum()" ] @@ -547,20 +3267,134 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 26, "id": "db1272a3", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
xy
013
152
27<NA>
\n", + "
" + ], + "text/plain": [ + " x y\n", + "0 1 3\n", + "1 5 2\n", + "2 7 " + ] + }, + "execution_count": 26, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "df" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 27, "id": "11ee0bac", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
xy
01.02.0
12.01.0
23.0NaN
\n", + "
" + ], + "text/plain": [ + " x y\n", + "0 1.0 2.0\n", + "1 2.0 1.0\n", + "2 3.0 NaN" + ] + }, + "execution_count": 27, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "df.rank()" ] @@ -575,10 +3409,67 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 28, "id": "edc7bd81", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
xy
00.3333331.0
10.6666670.5
21.000000NaN
\n", + "
" + ], + "text/plain": [ + " x y\n", + "0 0.333333 1.0\n", + "1 0.666667 0.5\n", + "2 1.000000 NaN" + ] + }, + "execution_count": 28, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "df.rank(pct=True)" ] @@ -597,10 +3488,1078 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 29, "id": "9e499d1b", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "image/svg+xml": [ + "\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " 2026-04-28T13:35:12.825403\n", + " image/svg+xml\n", + " \n", + " \n", + " Matplotlib v3.10.0, https://matplotlib.org/\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n" + ], + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], "source": [ "money[\"money_lag_5\"] = money[\"money\"].shift(5)\n", "money[\"money_lead_10\"] = money[\"money\"].shift(-10)\n", @@ -643,10 +4602,21 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 30, "id": "81389031", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "np.float64(190.92197566022773)" + ] + }, + "execution_count": 30, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "money[\"money\"].quantile(0.25)" ] @@ -661,10 +4631,25 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 31, "id": "9eb35866", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "0.00 105.000000\n", + "0.25 190.921976\n", + "0.50 347.101381\n", + "0.75 630.945970\n", + "Name: money, dtype: float64" + ] + }, + "execution_count": 31, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "money[\"money\"].quantile([0, 0.25, 0.5, 0.75])" ] @@ -684,10 +4669,71 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 32, "id": "e2efca3a", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
distance_sdcount
origindest
EWREGE1.0110
JFKEGE1.0103
\n", + "
" + ], + "text/plain": [ + " distance_sd count\n", + "origin dest \n", + "EWR EGE 1.0 110\n", + "JFK EGE 1.0 103" + ] + }, + "execution_count": 32, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "(\n", " flights.groupby([\"origin\", \"dest\"])\n", @@ -713,20 +4759,3173 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 33, "id": "b5ac4861", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "image/svg+xml": [ + "\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " 2026-04-28T13:35:13.026671\n", + " image/svg+xml\n", + " \n", + " \n", + " Matplotlib v3.10.0, https://matplotlib.org/\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n" + ], + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], "source": [ "flights[\"dep_delay\"].plot.hist(bins=50, title=\" Distribution: length of delay\");" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 34, "id": "d4e76051", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "image/svg+xml": [ + "\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " 2026-04-28T13:35:13.184191\n", + " image/svg+xml\n", + " \n", + " \n", + " Matplotlib v3.10.0, https://matplotlib.org/\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n" + ], + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], "source": [ "flights.query(\"dep_delay <= 120\")[\"dep_delay\"].plot.hist(\n", " bins=50, title=\" Distribution: length of delay\"\n", @@ -792,7 +7991,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.12.12" + "version": "3.12.13" }, "toc-showtags": true }, diff --git a/prerequisites.ipynb b/prerequisites.ipynb index 68bea6d..07119a8 100644 --- a/prerequisites.ipynb +++ b/prerequisites.ipynb @@ -305,7 +305,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.0" + "version": "3.12.13" }, "toc-showtags": true }, diff --git a/rectangling.ipynb b/rectangling.ipynb index e565c1d..4c0ff4f 100644 --- a/rectangling.ipynb +++ b/rectangling.ipynb @@ -16,7 +16,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "id": "51a55374", "metadata": { "tags": [ @@ -56,10 +56,18 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 2, "id": "4d1c3fa4", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[10, 1.23, 'like this', True, None]\n" + ] + } + ], "source": [ "list_example = [10, 1.23, \"like this\", True, None]\n", "print(list_example)" @@ -75,10 +83,18 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 3, "id": "96c4b86b", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[10, 1.23, 'like this', True, None, 'one more entry']\n" + ] + } + ], "source": [ "list_example.append(\"one more entry\")\n", "print(list_example)" @@ -94,10 +110,19 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 4, "id": "138ac0e0", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "10\n", + "one more entry\n" + ] + } + ], "source": [ "print(list_example[0])\n", "print(list_example[-1])" @@ -123,10 +148,19 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 5, "id": "ef390263", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[10, 1.23, 'like this']\n", + "[True, None, 'one more entry']\n" + ] + } + ], "source": [ "print(list_example[:3])\n", "print(list_example[-3:])" @@ -142,10 +176,18 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 6, "id": "13584953", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[2, 4, 6, 8]\n" + ] + } + ], "source": [ "list_of_numbers = list(range(1, 11))\n", "start = 1\n", @@ -164,10 +206,18 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 7, "id": "f2bc8926", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[10, 9, 8, 7, 6, 5, 4, 3, 2, 1]\n" + ] + } + ], "source": [ "print(list_of_numbers[::-1])" ] @@ -192,10 +242,28 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 8, "id": "703fb0b2", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "[3.1415,\n", + " 16,\n", + " ['five', 4, 3],\n", + " (91, 93, 90),\n", + " 'Hello World!',\n", + " True,\n", + " None,\n", + " {'key': 'value', 'key2': 'value2'}]" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "wacky_list = [\n", " 3.1415,\n", @@ -222,10 +290,21 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 9, "id": "28fe13d2", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "[[1, 2, 3, 4, 5], [6, 7, 8, 9, 10], [11, 12, 13, 14, 15]]" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "multilayer_list = [[1, 2, 3, 4, 5], [6, 7, 8, 9, 10], [11, 12, 13, 14, 15]]\n", "multilayer_list" @@ -241,10 +320,21 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 10, "id": "73a71803", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "[x for little_list in multilayer_list for x in little_list]" ] @@ -269,7 +359,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 11, "id": "007dadfb", "metadata": {}, "outputs": [], @@ -287,7 +377,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 12, "id": "aeb70f97", "metadata": {}, "outputs": [], @@ -307,10 +397,41 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 13, "id": "fa1d828c", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "shape: (4, 3)
alphabetagamma
stri64list[str]
"["0,1,2"]"1["a", "b", "c"]
"foo"1null
"[]"1[]
"["3,4"]"1["d", "e"]
" + ], + "text/plain": [ + "shape: (4, 3)\n", + "┌───────────┬──────┬─────────────────┐\n", + "│ alpha ┆ beta ┆ gamma │\n", + "│ --- ┆ --- ┆ --- │\n", + "│ str ┆ i64 ┆ list[str] │\n", + "╞═══════════╪══════╪═════════════════╡\n", + "│ [\"0,1,2\"] ┆ 1 ┆ [\"a\", \"b\", \"c\"] │\n", + "│ foo ┆ 1 ┆ null │\n", + "│ [] ┆ 1 ┆ [] │\n", + "│ [\"3,4\"] ┆ 1 ┆ [\"d\", \"e\"] │\n", + "└───────────┴──────┴─────────────────┘" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "df = pl.DataFrame(\n", " {\n", @@ -333,10 +454,44 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 14, "id": "0854bcf3", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "shape: (7, 3)
alphabetagamma
stri64str
"["0,1,2"]"1"a"
"["0,1,2"]"1"b"
"["0,1,2"]"1"c"
"foo"1null
"[]"1null
"["3,4"]"1"d"
"["3,4"]"1"e"
" + ], + "text/plain": [ + "shape: (7, 3)\n", + "┌───────────┬──────┬───────┐\n", + "│ alpha ┆ beta ┆ gamma │\n", + "│ --- ┆ --- ┆ --- │\n", + "│ str ┆ i64 ┆ str │\n", + "╞═══════════╪══════╪═══════╡\n", + "│ [\"0,1,2\"] ┆ 1 ┆ a │\n", + "│ [\"0,1,2\"] ┆ 1 ┆ b │\n", + "│ [\"0,1,2\"] ┆ 1 ┆ c │\n", + "│ foo ┆ 1 ┆ null │\n", + "│ [] ┆ 1 ┆ null │\n", + "│ [\"3,4\"] ┆ 1 ┆ d │\n", + "│ [\"3,4\"] ┆ 1 ┆ e │\n", + "└───────────┴──────┴───────┘" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "df.explode(\"gamma\")" ] @@ -384,7 +539,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 15, "id": "285ade5e", "metadata": {}, "outputs": [], @@ -407,10 +562,21 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 16, "id": "75399bf0", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "dict" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "type(json_data)" ] @@ -427,10 +593,36 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 17, "id": "8cc4f408", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "[{'date': '1971 FEB',\n", + " 'value': '3.8',\n", + " 'label': '1971 JAN-MAR',\n", + " 'year': '1971',\n", + " 'month': 'February',\n", + " 'quarter': '',\n", + " 'sourceDataset': 'LMS',\n", + " 'updateDate': '2015-10-13T23:00:00.000Z'},\n", + " {'date': '1971 MAR',\n", + " 'value': '3.9',\n", + " 'label': '1971 FEB-APR',\n", + " 'year': '1971',\n", + " 'month': 'March',\n", + " 'quarter': '',\n", + " 'sourceDataset': 'LMS',\n", + " 'updateDate': '2015-10-13T23:00:00.000Z'}]" + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "json_data[\"months\"][:2]" ] @@ -447,7 +639,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 18, "id": "05069731", "metadata": {}, "outputs": [], @@ -473,10 +665,24 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 19, "id": "eca7982f", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "{'food': 'doughnut',\n", + " 'good_with': ['coffee', 'tea'],\n", + " 'flavour': None,\n", + " 'toppings': [{'id': 0, 'type': 'glazed'}, {'id': 1, 'type': 'sugar'}]}" + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "import json\n", "\n", @@ -494,10 +700,21 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 20, "id": "ea6f887d", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "'{\"food\": \"doughnut\", \"good_with\": [\"coffee\", \"tea\"], \"flavour\": null, \"toppings\": [{\"id\": 0, \"type\": \"glazed\"}, {\"id\": 1, \"type\": \"sugar\"}]}'" + ] + }, + "execution_count": 20, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "json_stream = json.dumps(result)\n", "json_stream" @@ -546,7 +763,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 21, "id": "1dbfd7c3", "metadata": {}, "outputs": [], @@ -566,10 +783,40 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 22, "id": "b3ea0e0b", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "shape: (3, 6)
idname.firstname.lastname.givenname.familyname
i64strstrstrstrstr
1"Coleen""Volk"nullnullnull
nullnullnull"Mark""Regner"null
2nullnullnullnull"Faye Raker"
" + ], + "text/plain": [ + "shape: (3, 6)\n", + "┌──────┬────────────┬───────────┬────────────┬─────────────┬────────────┐\n", + "│ id ┆ name.first ┆ name.last ┆ name.given ┆ name.family ┆ name │\n", + "│ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │\n", + "│ i64 ┆ str ┆ str ┆ str ┆ str ┆ str │\n", + "╞══════╪════════════╪═══════════╪════════════╪═════════════╪════════════╡\n", + "│ 1 ┆ Coleen ┆ Volk ┆ null ┆ null ┆ null │\n", + "│ null ┆ null ┆ null ┆ Mark ┆ Regner ┆ null │\n", + "│ 2 ┆ null ┆ null ┆ null ┆ null ┆ Faye Raker │\n", + "└──────┴────────────┴───────────┴────────────┴─────────────┴────────────┘" + ] + }, + "execution_count": 22, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "data = [\n", " {\"id\": 1, \"name\": {\"first\": \"Coleen\", \"last\": \"Volk\"}},\n", @@ -589,10 +836,40 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 23, "id": "9349a3a7", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "shape: (3, 2)
idname
i64str
1"{"first": "Coleen", "last": "V…
null"{"given": "Mark", "family": "R…
2"Faye Raker"
" + ], + "text/plain": [ + "shape: (3, 2)\n", + "┌──────┬─────────────────────────────────┐\n", + "│ id ┆ name │\n", + "│ --- ┆ --- │\n", + "│ i64 ┆ str │\n", + "╞══════╪═════════════════════════════════╡\n", + "│ 1 ┆ {\"first\": \"Coleen\", \"last\": \"V… │\n", + "│ null ┆ {\"given\": \"Mark\", \"family\": \"R… │\n", + "│ 2 ┆ Faye Raker │\n", + "└──────┴─────────────────────────────────┘" + ] + }, + "execution_count": 23, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "pl.json_normalize(data, max_level=0)" ] @@ -628,7 +905,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.12.12" + "version": "3.12.13" }, "toc-showtags": true }, diff --git a/regex.ipynb b/regex.ipynb index a8055fc..d8cb9d0 100644 --- a/regex.ipynb +++ b/regex.ipynb @@ -23,9 +23,30 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "<>:4: SyntaxWarning: invalid escape sequence '\\w'\n", + "<>:4: SyntaxWarning: invalid escape sequence '\\w'\n", + "/var/folders/j1/0d3dmg7d6gnbc_zwrh8t9gcw0000gn/T/ipykernel_60848/970224743.py:4: SyntaxWarning: invalid escape sequence '\\w'\n", + " re.findall(\"string \\w+\\s\", text)\n" + ] + }, + { + "data": { + "text/plain": [ + "['string cleaning ', 'string editing ']" + ] + }, + "execution_count": 1, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "import re\n", "\n", @@ -57,9 +78,30 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 2, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "<>:2: SyntaxWarning: invalid escape sequence '\\w'\n", + "<>:2: SyntaxWarning: invalid escape sequence '\\w'\n", + "/var/folders/j1/0d3dmg7d6gnbc_zwrh8t9gcw0000gn/T/ipykernel_60848/3231031485.py:2: SyntaxWarning: invalid escape sequence '\\w'\n", + " re.sub(\"string \\w+\\s\", new_text, text)\n" + ] + }, + { + "data": { + "text/plain": [ + "'It is true that new text here! is a topic in this chapter. new text here! is another.'" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "new_text = \"new text here! \"\n", "re.sub(\"string \\w+\\s\", new_text, text)" @@ -141,9 +183,18 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 3, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The greedy match is stackoverflo\n", + "The lazy match is stacko\n" + ] + } + ], "source": [ "test_string = \"stackoverflow\"\n", "greedy_regex = \"s.*o\"\n", @@ -171,9 +222,30 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 4, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "<>:2: SyntaxWarning: invalid escape sequence '\\$'\n", + "<>:2: SyntaxWarning: invalid escape sequence '\\$'\n", + "/var/folders/j1/0d3dmg7d6gnbc_zwrh8t9gcw0000gn/T/ipykernel_60848/3729052804.py:2: SyntaxWarning: invalid escape sequence '\\$'\n", + " re.findall(\"\\$(\\d{2}.\\d{2})\", text)\n" + ] + }, + { + "data": { + "text/plain": [ + "['45.34', '50.00']" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "text = \"Product 1 was $45.34, while product 2 came in at $50.00 however it was assessed that the $4.66 difference did not make up for the higher quality of product 2.\"\n", "re.findall(\"\\$(\\d{2}.\\d{2})\", text)" @@ -192,9 +264,20 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 5, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "['30500.00 to 35000']" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "sal_r_per = r\"\\b([0-9]{1,6}(?:\\.)?(?:[0-9]{1,2})?(?:\\s?-\\s?|\\s?to\\s?)[0-9]{1,6}(?:\\.)?(?:[0-9]{1,2})?)(?:\\s?per)\\b\"\n", "text = \"This job pays gbp 30500.00 to 35000 per year. Apply at number 100 per the below address.\"\n", @@ -233,7 +316,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.12.12" + "version": "3.12.13" } }, "nbformat": 4, diff --git a/spreadsheets.ipynb b/spreadsheets.ipynb index 1c1f378..4f5ebfd 100644 --- a/spreadsheets.ipynb +++ b/spreadsheets.ipynb @@ -16,7 +16,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "id": "51a55374", "metadata": { "tags": [ @@ -68,10 +68,43 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 2, "id": "bf00d4c6", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "shape: (6, 5)
Student IDFull Namefavourite.foodmealPlanAGE
i64strstrstrstr
1"Sunil Huffmann""Strawberry yoghurt""Lunch only""4"
2"Barclay Lynn""French fries""Lunch only""5"
3"Jayendra Lyne""N/A""Breakfast and lunch""7"
4"Leon Rossini""Anchovies""Lunch only"null
5"Chidiegwu Dunkel""Pizza""Breakfast and lunch""five"
6"Güvenç Attila""Ice cream""Lunch only""6"
" + ], + "text/plain": [ + "shape: (6, 5)\n", + "┌────────────┬──────────────────┬────────────────────┬─────────────────────┬──────┐\n", + "│ Student ID ┆ Full Name ┆ favourite.food ┆ mealPlan ┆ AGE │\n", + "│ --- ┆ --- ┆ --- ┆ --- ┆ --- │\n", + "│ i64 ┆ str ┆ str ┆ str ┆ str │\n", + "╞════════════╪══════════════════╪════════════════════╪═════════════════════╪══════╡\n", + "│ 1 ┆ Sunil Huffmann ┆ Strawberry yoghurt ┆ Lunch only ┆ 4 │\n", + "│ 2 ┆ Barclay Lynn ┆ French fries ┆ Lunch only ┆ 5 │\n", + "│ 3 ┆ Jayendra Lyne ┆ N/A ┆ Breakfast and lunch ┆ 7 │\n", + "│ 4 ┆ Leon Rossini ┆ Anchovies ┆ Lunch only ┆ null │\n", + "│ 5 ┆ Chidiegwu Dunkel ┆ Pizza ┆ Breakfast and lunch ┆ five │\n", + "│ 6 ┆ Güvenç Attila ┆ Ice cream ┆ Lunch only ┆ 6 │\n", + "└────────────┴──────────────────┴────────────────────┴─────────────────────┴──────┘" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "import polars as pl\n", "\n", @@ -93,10 +126,43 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 3, "id": "ecce26ea", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "shape: (6, 5)
student_idfull_namefavourite_foodmeal_planage
i64strstrstrstr
1"Sunil Huffmann""Strawberry yoghurt""Lunch only""4"
2"Barclay Lynn""French fries""Lunch only""5"
3"Jayendra Lyne""N/A""Breakfast and lunch""7"
4"Leon Rossini""Anchovies""Lunch only"null
5"Chidiegwu Dunkel""Pizza""Breakfast and lunch""five"
6"Güvenç Attila""Ice cream""Lunch only""6"
" + ], + "text/plain": [ + "shape: (6, 5)\n", + "┌────────────┬──────────────────┬────────────────────┬─────────────────────┬──────┐\n", + "│ student_id ┆ full_name ┆ favourite_food ┆ meal_plan ┆ age │\n", + "│ --- ┆ --- ┆ --- ┆ --- ┆ --- │\n", + "│ i64 ┆ str ┆ str ┆ str ┆ str │\n", + "╞════════════╪══════════════════╪════════════════════╪═════════════════════╪══════╡\n", + "│ 1 ┆ Sunil Huffmann ┆ Strawberry yoghurt ┆ Lunch only ┆ 4 │\n", + "│ 2 ┆ Barclay Lynn ┆ French fries ┆ Lunch only ┆ 5 │\n", + "│ 3 ┆ Jayendra Lyne ┆ N/A ┆ Breakfast and lunch ┆ 7 │\n", + "│ 4 ┆ Leon Rossini ┆ Anchovies ┆ Lunch only ┆ null │\n", + "│ 5 ┆ Chidiegwu Dunkel ┆ Pizza ┆ Breakfast and lunch ┆ five │\n", + "│ 6 ┆ Güvenç Attila ┆ Ice cream ┆ Lunch only ┆ 6 │\n", + "└────────────┴──────────────────┴────────────────────┴─────────────────────┴──────┘" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "students.columns = [\n", " \"student_id\",\n", @@ -118,10 +184,43 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 4, "id": "22e45cc2", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "shape: (6, 5)
student_idfull_namefavourite_foodmeal_planage
i64strstrstrstr
1"Sunil Huffmann""Strawberry yoghurt""Lunch only""4"
2"Barclay Lynn""French fries""Lunch only""5"
3"Jayendra Lyne""N/A""Breakfast and lunch""7"
4"Leon Rossini""Anchovies""Lunch only"null
5"Chidiegwu Dunkel""Pizza""Breakfast and lunch""5"
6"Güvenç Attila""Ice cream""Lunch only""6"
" + ], + "text/plain": [ + "shape: (6, 5)\n", + "┌────────────┬──────────────────┬────────────────────┬─────────────────────┬──────┐\n", + "│ student_id ┆ full_name ┆ favourite_food ┆ meal_plan ┆ age │\n", + "│ --- ┆ --- ┆ --- ┆ --- ┆ --- │\n", + "│ i64 ┆ str ┆ str ┆ str ┆ str │\n", + "╞════════════╪══════════════════╪════════════════════╪═════════════════════╪══════╡\n", + "│ 1 ┆ Sunil Huffmann ┆ Strawberry yoghurt ┆ Lunch only ┆ 4 │\n", + "│ 2 ┆ Barclay Lynn ┆ French fries ┆ Lunch only ┆ 5 │\n", + "│ 3 ┆ Jayendra Lyne ┆ N/A ┆ Breakfast and lunch ┆ 7 │\n", + "│ 4 ┆ Leon Rossini ┆ Anchovies ┆ Lunch only ┆ null │\n", + "│ 5 ┆ Chidiegwu Dunkel ┆ Pizza ┆ Breakfast and lunch ┆ 5 │\n", + "│ 6 ┆ Güvenç Attila ┆ Ice cream ┆ Lunch only ┆ 6 │\n", + "└────────────┴──────────────────┴────────────────────┴─────────────────────┴──────┘" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "students = pl.read_excel(\"data/students.xlsx\")\n", "students.columns = [\n", @@ -145,10 +244,25 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 5, "id": "f67490d3", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "Schema([('student_id', Int64),\n", + " ('full_name', String),\n", + " ('favourite_food', String),\n", + " ('meal_plan', Categorical(ordering='physical')),\n", + " ('age', Int64)])" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "students = students.with_columns(\n", " [\n", @@ -186,10 +300,43 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 6, "id": "e37f9e3d", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "shape: (5, 8)
speciesislandbill_length_mmbill_depth_mmflipper_length_mmbody_mass_gsexyear
strstrf64f64i64i64stri64
"Adelie""Torgersen"39.118.71813750"male"2007
"Adelie""Torgersen"39.517.41863800"female"2007
"Adelie""Torgersen"40.318.01953250"female"2007
"Adelie""Torgersen"nullnullnullnull"NA"2007
"Adelie""Torgersen"36.719.31933450"female"2007
" + ], + "text/plain": [ + "shape: (5, 8)\n", + "┌─────────┬───────────┬───────────────┬───────────────┬──────────────┬─────────────┬────────┬──────┐\n", + "│ species ┆ island ┆ bill_length_m ┆ bill_depth_mm ┆ flipper_leng ┆ body_mass_g ┆ sex ┆ year │\n", + "│ --- ┆ --- ┆ m ┆ --- ┆ th_mm ┆ --- ┆ --- ┆ --- │\n", + "│ str ┆ str ┆ --- ┆ f64 ┆ --- ┆ i64 ┆ str ┆ i64 │\n", + "│ ┆ ┆ f64 ┆ ┆ i64 ┆ ┆ ┆ │\n", + "╞═════════╪═══════════╪═══════════════╪═══════════════╪══════════════╪═════════════╪════════╪══════╡\n", + "│ Adelie ┆ Torgersen ┆ 39.1 ┆ 18.7 ┆ 181 ┆ 3750 ┆ male ┆ 2007 │\n", + "│ Adelie ┆ Torgersen ┆ 39.5 ┆ 17.4 ┆ 186 ┆ 3800 ┆ female ┆ 2007 │\n", + "│ Adelie ┆ Torgersen ┆ 40.3 ┆ 18.0 ┆ 195 ┆ 3250 ┆ female ┆ 2007 │\n", + "│ Adelie ┆ Torgersen ┆ null ┆ null ┆ null ┆ null ┆ NA ┆ 2007 │\n", + "│ Adelie ┆ Torgersen ┆ 36.7 ┆ 19.3 ┆ 193 ┆ 3450 ┆ female ┆ 2007 │\n", + "└─────────┴───────────┴───────────────┴───────────────┴──────────────┴─────────────┴────────┴──────┘" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "pl.read_excel(\n", " \"data/penguins.xlsx\",\n", @@ -209,10 +356,18 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 7, "id": "0f3a3b78", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "['Torgersen Island', 'Biscoe Island', 'Dream Island']\n" + ] + } + ], "source": [ "penguins_dict = pl.read_excel(\n", " \"data/penguins.xlsx\",\n", @@ -231,10 +386,50 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 8, "id": "15495426", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Biscoe Island\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "shape: (5, 8)
speciesislandbill_length_mmbill_depth_mmflipper_length_mmbody_mass_gsexyear
strstrf64f64i64i64stri64
"Adelie""Biscoe"37.818.31743400"female"2007
"Adelie""Biscoe"37.718.71803600"male"2007
"Adelie""Biscoe"35.919.21893800"female"2007
"Adelie""Biscoe"38.218.11853950"male"2007
"Adelie""Biscoe"38.817.21803800"male"2007
" + ], + "text/plain": [ + "shape: (5, 8)\n", + "┌─────────┬────────┬────────────────┬───────────────┬────────────────┬─────────────┬────────┬──────┐\n", + "│ species ┆ island ┆ bill_length_mm ┆ bill_depth_mm ┆ flipper_length ┆ body_mass_g ┆ sex ┆ year │\n", + "│ --- ┆ --- ┆ --- ┆ --- ┆ _mm ┆ --- ┆ --- ┆ --- │\n", + "│ str ┆ str ┆ f64 ┆ f64 ┆ --- ┆ i64 ┆ str ┆ i64 │\n", + "│ ┆ ┆ ┆ ┆ i64 ┆ ┆ ┆ │\n", + "╞═════════╪════════╪════════════════╪═══════════════╪════════════════╪═════════════╪════════╪══════╡\n", + "│ Adelie ┆ Biscoe ┆ 37.8 ┆ 18.3 ┆ 174 ┆ 3400 ┆ female ┆ 2007 │\n", + "│ Adelie ┆ Biscoe ┆ 37.7 ┆ 18.7 ┆ 180 ┆ 3600 ┆ male ┆ 2007 │\n", + "│ Adelie ┆ Biscoe ┆ 35.9 ┆ 19.2 ┆ 189 ┆ 3800 ┆ female ┆ 2007 │\n", + "│ Adelie ┆ Biscoe ┆ 38.2 ┆ 18.1 ┆ 185 ┆ 3950 ┆ male ┆ 2007 │\n", + "│ Adelie ┆ Biscoe ┆ 38.8 ┆ 17.2 ┆ 180 ┆ 3800 ┆ male ┆ 2007 │\n", + "└─────────┴────────┴────────────────┴───────────────┴────────────────┴─────────────┴────────┴──────┘" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "print(list(penguins_dict.keys())[1])\n", "list(penguins_dict.values())[1].head()" @@ -250,10 +445,49 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 9, "id": "151ba846", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "shape: (344, 8)
speciesislandbill_length_mmbill_depth_mmflipper_length_mmbody_mass_gsexyear
strstrf64f64i64i64stri64
"Adelie""Torgersen"39.118.71813750"male"2007
"Adelie""Torgersen"39.517.41863800"female"2007
"Adelie""Torgersen"40.318.01953250"female"2007
"Adelie""Torgersen"nullnullnullnull"NA"2007
"Adelie""Torgersen"36.719.31933450"female"2007
"Chinstrap""Dream"55.819.82074000"male"2009
"Chinstrap""Dream"43.518.12023400"female"2009
"Chinstrap""Dream"49.618.21933775"male"2009
"Chinstrap""Dream"50.819.02104100"male"2009
"Chinstrap""Dream"50.218.71983775"female"2009
" + ], + "text/plain": [ + "shape: (344, 8)\n", + "┌───────────┬───────────┬──────────────┬──────────────┬──────────────┬─────────────┬────────┬──────┐\n", + "│ species ┆ island ┆ bill_length_ ┆ bill_depth_m ┆ flipper_leng ┆ body_mass_g ┆ sex ┆ year │\n", + "│ --- ┆ --- ┆ mm ┆ m ┆ th_mm ┆ --- ┆ --- ┆ --- │\n", + "│ str ┆ str ┆ --- ┆ --- ┆ --- ┆ i64 ┆ str ┆ i64 │\n", + "│ ┆ ┆ f64 ┆ f64 ┆ i64 ┆ ┆ ┆ │\n", + "╞═══════════╪═══════════╪══════════════╪══════════════╪══════════════╪═════════════╪════════╪══════╡\n", + "│ Adelie ┆ Torgersen ┆ 39.1 ┆ 18.7 ┆ 181 ┆ 3750 ┆ male ┆ 2007 │\n", + "│ Adelie ┆ Torgersen ┆ 39.5 ┆ 17.4 ┆ 186 ┆ 3800 ┆ female ┆ 2007 │\n", + "│ Adelie ┆ Torgersen ┆ 40.3 ┆ 18.0 ┆ 195 ┆ 3250 ┆ female ┆ 2007 │\n", + "│ Adelie ┆ Torgersen ┆ null ┆ null ┆ null ┆ null ┆ NA ┆ 2007 │\n", + "│ Adelie ┆ Torgersen ┆ 36.7 ┆ 19.3 ┆ 193 ┆ 3450 ┆ female ┆ 2007 │\n", + "│ … ┆ … ┆ … ┆ … ┆ … ┆ … ┆ … ┆ … │\n", + "│ Chinstrap ┆ Dream ┆ 55.8 ┆ 19.8 ┆ 207 ┆ 4000 ┆ male ┆ 2009 │\n", + "│ Chinstrap ┆ Dream ┆ 43.5 ┆ 18.1 ┆ 202 ┆ 3400 ┆ female ┆ 2009 │\n", + "│ Chinstrap ┆ Dream ┆ 49.6 ┆ 18.2 ┆ 193 ┆ 3775 ┆ male ┆ 2009 │\n", + "│ Chinstrap ┆ Dream ┆ 50.8 ┆ 19.0 ┆ 210 ┆ 4100 ┆ male ┆ 2009 │\n", + "│ Chinstrap ┆ Dream ┆ 50.2 ┆ 18.7 ┆ 198 ┆ 3775 ┆ female ┆ 2009 │\n", + "└───────────┴───────────┴──────────────┴──────────────┴──────────────┴─────────────┴────────┴──────┘" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "penguins = pl.concat(penguins_dict.values())\n", "penguins" @@ -285,10 +519,49 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 10, "id": "29987b9d", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "shape: (14, 6)
Lots of people__UNNAMED__1__UNNAMED__2__UNNAMED__3__UNNAMED__4__UNNAMED__5
strstrstrstrdatestr
"David Bowie""musician""69""true"1947-01-08"2016-01-10 00:00:00"
"Carrie Fisher""actor""60""true"1956-10-21"2016-12-27 00:00:00"
"Chuck Berry""musician""90""true"1926-10-18"2017-03-18 00:00:00"
"Bill Paxton""actor""61""true"1955-05-17"2017-02-25 00:00:00"
"Prince""musician""57""true"1958-06-07"2016-04-21 00:00:00"
"George Michael""musician""53""false"1963-06-25"2016-12-25 00:00:00"
"Some"nullnullnullnullnull
null"also like to write stuff"nullnullnullnull
nullnull"at the""bottom,"nullnull
nullnullnullnullnull"too!"
" + ], + "text/plain": [ + "shape: (14, 6)\n", + "┌────────────────┬──────────────┬──────────────┬──────────────┬──────────────┬─────────────────────┐\n", + "│ Lots of people ┆ __UNNAMED__1 ┆ __UNNAMED__2 ┆ __UNNAMED__3 ┆ __UNNAMED__4 ┆ __UNNAMED__5 │\n", + "│ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │\n", + "│ str ┆ str ┆ str ┆ str ┆ date ┆ str │\n", + "╞════════════════╪══════════════╪══════════════╪══════════════╪══════════════╪═════════════════════╡\n", + "│ David Bowie ┆ musician ┆ 69 ┆ true ┆ 1947-01-08 ┆ 2016-01-10 00:00:00 │\n", + "│ Carrie Fisher ┆ actor ┆ 60 ┆ true ┆ 1956-10-21 ┆ 2016-12-27 00:00:00 │\n", + "│ Chuck Berry ┆ musician ┆ 90 ┆ true ┆ 1926-10-18 ┆ 2017-03-18 00:00:00 │\n", + "│ Bill Paxton ┆ actor ┆ 61 ┆ true ┆ 1955-05-17 ┆ 2017-02-25 00:00:00 │\n", + "│ Prince ┆ musician ┆ 57 ┆ true ┆ 1958-06-07 ┆ 2016-04-21 00:00:00 │\n", + "│ … ┆ … ┆ … ┆ … ┆ … ┆ … │\n", + "│ George Michael ┆ musician ┆ 53 ┆ false ┆ 1963-06-25 ┆ 2016-12-25 00:00:00 │\n", + "│ Some ┆ null ┆ null ┆ null ┆ null ┆ null │\n", + "│ null ┆ also like to ┆ null ┆ null ┆ null ┆ null │\n", + "│ ┆ write stuff ┆ ┆ ┆ ┆ │\n", + "│ null ┆ null ┆ at the ┆ bottom, ┆ null ┆ null │\n", + "│ null ┆ null ┆ null ┆ null ┆ null ┆ too! │\n", + "└────────────────┴──────────────┴──────────────┴──────────────┴──────────────┴─────────────────────┘" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "pl.read_excel(\n", " \"data/deaths.xlsx\",\n", @@ -306,10 +579,47 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 11, "id": "9d7a3db3", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "shape: (10, 6)
Lots of people__UNNAMED__1__UNNAMED__2__UNNAMED__3__UNNAMED__4__UNNAMED__5
strstri64booldatedate
"David Bowie""musician"69true1947-01-082016-01-10
"Carrie Fisher""actor"60true1956-10-212016-12-27
"Chuck Berry""musician"90true1926-10-182017-03-18
"Bill Paxton""actor"61true1955-05-172017-02-25
"Prince""musician"57true1958-06-072016-04-21
"Alan Rickman""actor"69false1946-02-212016-01-14
"Florence Henderson""actor"82true1934-02-142016-11-24
"Harper Lee""author"89false1926-04-282016-02-19
"Zsa Zsa Gábor""actor"99true1917-02-062016-12-18
"George Michael""musician"53false1963-06-252016-12-25
" + ], + "text/plain": [ + "shape: (10, 6)\n", + "┌────────────────────┬──────────────┬──────────────┬──────────────┬──────────────┬──────────────┐\n", + "│ Lots of people ┆ __UNNAMED__1 ┆ __UNNAMED__2 ┆ __UNNAMED__3 ┆ __UNNAMED__4 ┆ __UNNAMED__5 │\n", + "│ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │\n", + "│ str ┆ str ┆ i64 ┆ bool ┆ date ┆ date │\n", + "╞════════════════════╪══════════════╪══════════════╪══════════════╪══════════════╪══════════════╡\n", + "│ David Bowie ┆ musician ┆ 69 ┆ true ┆ 1947-01-08 ┆ 2016-01-10 │\n", + "│ Carrie Fisher ┆ actor ┆ 60 ┆ true ┆ 1956-10-21 ┆ 2016-12-27 │\n", + "│ Chuck Berry ┆ musician ┆ 90 ┆ true ┆ 1926-10-18 ┆ 2017-03-18 │\n", + "│ Bill Paxton ┆ actor ┆ 61 ┆ true ┆ 1955-05-17 ┆ 2017-02-25 │\n", + "│ Prince ┆ musician ┆ 57 ┆ true ┆ 1958-06-07 ┆ 2016-04-21 │\n", + "│ Alan Rickman ┆ actor ┆ 69 ┆ false ┆ 1946-02-21 ┆ 2016-01-14 │\n", + "│ Florence Henderson ┆ actor ┆ 82 ┆ true ┆ 1934-02-14 ┆ 2016-11-24 │\n", + "│ Harper Lee ┆ author ┆ 89 ┆ false ┆ 1926-04-28 ┆ 2016-02-19 │\n", + "│ Zsa Zsa Gábor ┆ actor ┆ 99 ┆ true ┆ 1917-02-06 ┆ 2016-12-18 │\n", + "│ George Michael ┆ musician ┆ 53 ┆ false ┆ 1963-06-25 ┆ 2016-12-25 │\n", + "└────────────────────┴──────────────┴──────────────┴──────────────┴──────────────┴──────────────┘" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "pl.read_excel(\n", " \"data/deaths.xlsx\",\n", @@ -356,10 +666,40 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 12, "id": "15963e18", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "shape: (3, 2)
itemquantity
cati64
"brownie"10
"cupcake"5
"cookie"8
" + ], + "text/plain": [ + "shape: (3, 2)\n", + "┌─────────┬──────────┐\n", + "│ item ┆ quantity │\n", + "│ --- ┆ --- │\n", + "│ cat ┆ i64 │\n", + "╞═════════╪══════════╡\n", + "│ brownie ┆ 10 │\n", + "│ cupcake ┆ 5 │\n", + "│ cookie ┆ 8 │\n", + "└─────────┴──────────┘" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "bake_sale = pl.DataFrame(\n", " {\n", @@ -380,10 +720,21 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 13, "id": "1fc17141", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "bake_sale.write_excel(\"data/bake_sale.xlsx\")" ] @@ -408,10 +759,21 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 14, "id": "27e128f9", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "Schema([('item', String), ('quantity', Int64)])" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "pl.read_excel(\"data/bake_sale.xlsx\").schema" ] @@ -449,7 +811,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.12.12" + "version": "3.12.13" }, "toc-showtags": true }, diff --git a/strings.ipynb b/strings.ipynb index e401876..10d93ae 100644 --- a/strings.ipynb +++ b/strings.ipynb @@ -28,7 +28,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "id": "478a847a", "metadata": {}, "outputs": [], @@ -49,10 +49,21 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 2, "id": "d7f4ea2d", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "str" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "type(string_one)" ] @@ -67,7 +78,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 3, "id": "01379fe7", "metadata": {}, "outputs": [], @@ -77,10 +88,21 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 4, "id": "d88f7928", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "'ban'" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "var[:3]" ] @@ -95,10 +117,21 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 5, "id": "e03d95d1", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "'aaa'" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "var[1::2]" ] @@ -115,10 +148,21 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 6, "id": "83ab201b", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "6" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "len(var)" ] @@ -133,10 +177,21 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 7, "id": "7801bd5d", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "\"This is a string. If I want to include a 'quote' inside a string, I use double quotes on the outside.\"" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "string_one + \". \" + string_two + \".\"" ] @@ -151,10 +206,21 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 8, "id": "138cef18", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "\"This is a string. If I want to include a 'quote' inside a string, I use double quotes on the outside\"" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "\". \".join([string_one, string_two])" ] @@ -169,10 +235,18 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 9, "id": "e11896f8", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "['INPUT TEXT', 'input text', 'Input Text']\n" + ] + } + ], "source": [ "var = \"input TEXT\"\n", "var_list = [var.upper(), var.lower(), var.title()]\n", @@ -201,10 +275,20 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 10, "id": "bf0aadec", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "INPUT TEXT; and \n", + "input text; and \n", + "Input Text\n" + ] + } + ], "source": [ "print(*var_list, sep=\"; and \\n\")" ] @@ -219,10 +303,21 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 11, "id": "a96f048c", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "'A boolean is either True or False, there are only 2 options.'" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "(\n", " \"A boolean is either \"\n", @@ -255,10 +350,18 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 12, "id": "9dddf0da", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "You scored 15.32399\n" + ] + } + ], "source": [ "variable = 15.32399\n", "print(f\"You scored {variable}\")" @@ -274,10 +377,18 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 13, "id": "795e7c07", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "You scored 234.8246695201\n" + ] + } + ], "source": [ "print(f\"You scored {variable**2}\")" ] @@ -300,10 +411,18 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 14, "id": "1f3d3806", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "You scored +15.32\n" + ] + } + ], "source": [ "print(f\"You scored {variable:+.2f}\")" ] @@ -352,10 +471,21 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 15, "id": "0ccd65aa", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "'!\"#$%&\\'()*+,-./:;<=>?@[\\\\]^_`{|}~'" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "import string\n", "\n", @@ -372,10 +502,21 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 16, "id": "16205c36", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "string.ascii_letters" ] @@ -390,10 +531,21 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 17, "id": "0c67f5cd", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "'0123456789'" + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "string.digits" ] @@ -416,10 +568,20 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 18, "id": "16e9904a", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Here is a \n", + " new line\n", + "Here is an \\n escaped new line \n" + ] + } + ], "source": [ "print(\"Here is a \\n new line\")\n", "print(\"Here is an \\\\n escaped new line \")" @@ -452,10 +614,19 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 19, "id": "af423bd1", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "a\tb\n", + "A\tB\n" + ] + } + ], "source": [ "print(\"a\\tb\\nA\\tB\")" ] @@ -472,10 +643,18 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 20, "id": "c2b9c689", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "a\\tb\\nA\\tB\n" + ] + } + ], "source": [ "print(r\"a\\tb\\nA\\tB\")" ] @@ -496,10 +675,21 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 21, "id": "229ada3a", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "'Value is subjective'" + ] + }, + "execution_count": 21, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "\"Value is objective\".replace(\"objective\", \"subjective\")" ] @@ -514,10 +704,21 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 22, "id": "79f754dc", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "'Value is subjective'" + ] + }, + "execution_count": 22, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "text = \"Value is objective\"\n", "old_substr = \"objective\"\n", @@ -549,10 +750,21 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 23, "id": "99675fee", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "{'a': '', 'e': '', 'i': '', 'o': '', 'u': ''}" + ] + }, + "execution_count": 23, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "example_text = \"Much recent work has focused on the influence of social capital on innovative outcomes. Little research has been done on disadvantaged groups who were often restricted from participation in social networks that provide information necessary for invention and innovation. Unique new data on African American inventors and patentees between 1843 and 1930 permit an empirical investigation of the relation between social capital and economic outcomes. I find that African Americans used both traditional, i.e., occupation-based, and nontraditional, i.e., civic, networks to maximize inventive output and that laws constraining social-capital formation are most negatively correlated with economically important inventive activity.\"\n", "vowels = \"aeiou\"\n", @@ -570,10 +782,21 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 24, "id": "e48763cb", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "'Mch rcnt wrk hs fcsd n th nflnc f scl cptl n nnvtv tcms. Lttl rsrch hs bn dn n dsdvntgd grps wh wr ftn rstrctd frm prtcptn n scl ntwrks tht prvd nfrmtn ncssry fr nvntn nd nnvtn. Unq nw dt n Afrcn Amrcn nvntrs nd ptnts btwn 1843 nd 1930 prmt n mprcl nvstgtn f th rltn btwn scl cptl nd cnmc tcms. I fnd tht Afrcn Amrcns sd bth trdtnl, .., ccptn-bsd, nd nntrdtnl, .., cvc, ntwrks t mxmz nvntv tpt nd tht lws cnstrnng scl-cptl frmtn r mst ngtvly crrltd wth cnmclly mprtnt nvntv ctvty.'" + ] + }, + "execution_count": 24, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "translator = example_text.maketrans(translation_dict)\n", "example_text.translate(translator)" @@ -599,10 +822,21 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 25, "id": "ac758b38", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "'Example string with excess punctuation'" + ] + }, + "execution_count": 25, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "\"\".join(\n", " [\n", @@ -625,10 +859,21 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 26, "id": "122619bf", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "'This is a sentence'" + ] + }, + "execution_count": 26, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "\"This is a sentence and we will split it at character 18\"[:18]" ] @@ -643,10 +888,21 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 27, "id": "9fc432ed", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "['This is a sentence', ' And another sentence', ' And a third sentence']" + ] + }, + "execution_count": 27, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "\"This is a sentence. And another sentence. And a third sentence\".split(\".\")" ] @@ -661,10 +917,21 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 28, "id": "6904e486", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "['This is a ', '. And another ', '. And a third ', '']" + ] + }, + "execution_count": 28, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "\"This is a sentence. And another sentence. And a third sentence\".split(\"sentence\")" ] @@ -689,10 +956,18 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 29, "id": "22f94993", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The word \"coffee\" appears 2 times.\n" + ] + } + ], "source": [ "text = \"At six o'clock we were waiting for coffee, \\n waiting for coffee and the charitable crumb \\n that was going to be served from a certain balcony \\n --like kings of old, or like a miracle. \\n It was still dark. One foot of the sun \\n steadied itself on a long ripple in the river.\"\n", "word = \"coffee\"\n", @@ -709,10 +984,21 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 30, "id": "a351a11b", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "35" + ] + }, + "execution_count": 30, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "text.find(word)" ] @@ -727,10 +1013,21 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 31, "id": "8e0a7020", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "'coffee'" + ] + }, + "execution_count": 31, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "text[text.find(word) : text.find(word) + len(word)]" ] @@ -745,10 +1042,21 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 32, "id": "e18f64a3", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "57" + ] + }, + "execution_count": 32, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "text.rfind(word)" ] @@ -775,10 +1083,21 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 33, "id": "bbc3eb7b", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "['Ada', 'Adam', 'Elinor', 'Grace', 'Jean']" + ] + }, + "execution_count": 33, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "[name.capitalize() for name in [\"ada\", \"adam\", \"elinor\", \"grace\", \"jean\"]]" ] @@ -793,10 +1112,26 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 34, "id": "c8a7f68b", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "0 ada lovelace\n", + "1 adam smith\n", + "2 elinor ostrom\n", + "3 grace hopper\n", + "4 jean bartik\n", + "dtype: string" + ] + }, + "execution_count": 34, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "import pandas as pd\n", "\n", @@ -817,10 +1152,26 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 35, "id": "7cf149b5", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "0 Ada Lovelace\n", + "1 Adam Smith\n", + "2 Elinor Ostrom\n", + "3 Grace Hopper\n", + "4 Jean Bartik\n", + "dtype: string" + ] + }, + "execution_count": 35, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "dfs.str.title()" ] @@ -835,10 +1186,26 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 36, "id": "26dc9a7b", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "0 Ada Lovelace\n", + "1 Adam Smith\n", + "2 Elinor Ostrom\n", + "3 Grace Hopper\n", + "4 Jean Bartik\n", + "Name: names, dtype: string" + ] + }, + "execution_count": 36, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "df = pd.DataFrame(dfs, columns=[\"names\"])\n", "df[\"names\"].str.title()" @@ -892,10 +1259,26 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 37, "id": "d7a29663", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "0 [ada, lovelace]\n", + "1 [adam, smith]\n", + "2 [elinor, ostrom]\n", + "3 [grace, hopper]\n", + "4 [jean, bartik]\n", + "Name: names, dtype: object" + ] + }, + "execution_count": 37, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "df[\"names\"].str.split(\" \")" ] @@ -910,10 +1293,79 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 38, "id": "85a5cd2c", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
01
0adalovelace
1adamsmith
2elinorostrom
3gracehopper
4jeanbartik
\n", + "
" + ], + "text/plain": [ + " 0 1\n", + "0 ada lovelace\n", + "1 adam smith\n", + "2 elinor ostrom\n", + "3 grace hopper\n", + "4 jean bartik" + ] + }, + "execution_count": 38, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "df[\"names\"].str.split(\" \", n=2, expand=True)" ] @@ -932,10 +1384,36 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 39, "id": "2e8781ba", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "<>:1: SyntaxWarning: invalid escape sequence '\\w'\n", + "<>:1: SyntaxWarning: invalid escape sequence '\\w'\n", + "/var/folders/j1/0d3dmg7d6gnbc_zwrh8t9gcw0000gn/T/ipykernel_60781/3354940721.py:1: SyntaxWarning: invalid escape sequence '\\w'\n", + " df[\"names\"].str.extract(\"(\\w+)\", expand=False)\n" + ] + }, + { + "data": { + "text/plain": [ + "0 ada\n", + "1 adam\n", + "2 elinor\n", + "3 grace\n", + "4 jean\n", + "Name: names, dtype: string" + ] + }, + "execution_count": 39, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "df[\"names\"].str.extract(\"(\\w+)\", expand=False)" ] @@ -967,10 +1445,26 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 40, "id": "ba13d894", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "0 lovelace\n", + "1 smith\n", + "2 ostrom\n", + "3 hopper\n", + "4 bartik\n", + "Name: names, dtype: object" + ] + }, + "execution_count": 40, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "df[\"names\"].str.split().str.get(-1)" ] @@ -985,10 +1479,79 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 41, "id": "056147d6", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
namestags
0ada lovelaceuk; cs
1adam smithuk; econ
2elinor ostromusa; econ
3grace hopperusa; cs
4jean bartikusa; cs
\n", + "
" + ], + "text/plain": [ + " names tags\n", + "0 ada lovelace uk; cs\n", + "1 adam smith uk; econ\n", + "2 elinor ostrom usa; econ\n", + "3 grace hopper usa; cs\n", + "4 jean bartik usa; cs" + ] + }, + "execution_count": 41, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "df = pd.DataFrame(\n", " {\n", @@ -1015,10 +1578,91 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 42, "id": "a5cbc10f", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
cseconukusa
01010
10110
20101
31001
41001
\n", + "
" + ], + "text/plain": [ + " cs econ uk usa\n", + "0 1 0 1 0\n", + "1 0 1 1 0\n", + "2 0 1 0 1\n", + "3 1 0 0 1\n", + "4 1 0 0 1" + ] + }, + "execution_count": 42, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "df[\"tags\"].str.get_dummies(\";\")" ] @@ -1089,7 +1733,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.12.12" + "version": "3.12.13" }, "toc-showtags": true }, diff --git a/vis-layers.ipynb b/vis-layers.ipynb index ba22bb2..062b285 100644 --- a/vis-layers.ipynb +++ b/vis-layers.ipynb @@ -42,10 +42,51 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "id": "a86fb211", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "
\n", + " \n", + " " + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], "source": [ "import pandas as pd\n", "from lets_plot import *\n", @@ -67,10 +108,156 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 2, "id": "39a6d993", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
manufacturermodeldisplyearcyltransdrvctyhwyflclass
rownames
1audia41.819994auto(l5)f18.029.0pcompact
2audia41.819994manual(m5)f21.029.0pcompact
3audia42.020084manual(m6)f20.031.0pcompact
4audia42.020084auto(av)f21.030.0pcompact
5audia42.819996auto(l5)f16.026.0pcompact
\n", + "
" + ], + "text/plain": [ + " manufacturer model displ year cyl trans drv cty hwy fl \\\n", + "rownames \n", + "1 audi a4 1.8 1999 4 auto(l5) f 18.0 29.0 p \n", + "2 audi a4 1.8 1999 4 manual(m5) f 21.0 29.0 p \n", + "3 audi a4 2.0 2008 4 manual(m6) f 20.0 31.0 p \n", + "4 audi a4 2.0 2008 4 auto(av) f 21.0 30.0 p \n", + "5 audi a4 2.8 1999 6 auto(l5) f 16.0 26.0 p \n", + "\n", + " class \n", + "rownames \n", + "1 compact \n", + "2 compact \n", + "3 compact \n", + "4 compact \n", + "5 compact " + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "mpg = pd.read_csv(\n", " \"https://vincentarelbundock.github.io/Rdatasets/csv/ggplot2/mpg.csv\", index_col=0\n", @@ -117,20 +304,262 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 3, "id": "fe77349a", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + " " + ], + "text/plain": [ + "" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "(ggplot(mpg, aes(x=\"displ\", y=\"hwy\", color=\"class\")) + geom_point())" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 4, "id": "e77b5640", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + " " + ], + "text/plain": [ + "" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "(ggplot(mpg, aes(x=\"displ\", y=\"hwy\", shape=\"class\")) + geom_point())" ] @@ -145,20 +574,262 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 5, "id": "ef221330", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + " " + ], + "text/plain": [ + "" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "(ggplot(mpg, aes(x=\"displ\", y=\"hwy\", size=\"class\")) + geom_point())" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 6, "id": "d042255e", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + " " + ], + "text/plain": [ + "" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "(ggplot(mpg, aes(x=\"displ\", y=\"hwy\", alpha=\"class\")) + geom_point())" ] @@ -181,10 +852,130 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 7, "id": "618edcb4", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + " " + ], + "text/plain": [ + "" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "(ggplot(mpg, aes(x=\"displ\", y=\"hwy\")) + geom_point(color=\"blue\"))" ] @@ -250,20 +1041,263 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 8, "id": "277a4c0f", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + " " + ], + "text/plain": [ + "" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "(ggplot(mpg, aes(x=\"displ\", y=\"hwy\")) + geom_point(size=4))" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 9, "id": "07247ba9", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + " " + ], + "text/plain": [ + "" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "(ggplot(mpg, aes(x=\"displ\", y=\"hwy\")) + geom_smooth(method=\"loess\", size=2))" ] @@ -298,20 +1332,207 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 10, "id": "4b20c825", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + " " + ], + "text/plain": [ + "" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "(ggplot(mpg, aes(x=\"displ\", y=\"hwy\", line=\"drv\")) + geom_smooth(method=\"loess\"))" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 11, "id": "84df3e78", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + " " + ], + "text/plain": [ + "" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "(ggplot(mpg, aes(x=\"displ\", y=\"hwy\", linetype=\"drv\")) + geom_smooth(method=\"loess\"))" ] @@ -330,10 +1551,146 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 12, "id": "c9e8d92f", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + " " + ], + "text/plain": [ + "" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "(\n", " ggplot(mpg, aes(x=\"displ\", y=\"hwy\", color=\"drv\"))\n", @@ -362,10 +1719,143 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 13, "id": "b3916558", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + " " + ], + "text/plain": [ + "" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "(ggplot(mpg, aes(x=\"displ\", y=\"hwy\")) + geom_point(aes(color=\"class\")) + geom_smooth())" ] @@ -382,10 +1872,222 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 14, "id": "38870eb5", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + " " + ], + "text/plain": [ + "" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "(\n", " ggplot(mpg, aes(x=\"displ\", y=\"hwy\"))\n", @@ -442,14 +2144,149 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 15, "id": "ae75c5c1", "metadata": { "tags": [ "remove-cell" ] }, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + " " + ], + "text/plain": [ + "" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "(\n", " ggplot(mpg, aes(x=\"displ\", y=\"hwy\", color=\"drv\"))\n", @@ -470,10 +2307,136 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 16, "id": "cb651300", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + " " + ], + "text/plain": [ + "" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "(ggplot(mpg, aes(x=\"displ\", y=\"hwy\")) + geom_point() + facet_wrap(\"cyl\"))" ] @@ -488,10 +2451,138 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 17, "id": "61481052", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + " " + ], + "text/plain": [ + "" + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "(ggplot(mpg, aes(x=\"displ\", y=\"hwy\")) + geom_point() + facet_grid(\"drv\", \"cyl\"))" ] @@ -508,10 +2599,139 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 18, "id": "adcd9079", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + " " + ], + "text/plain": [ + "" + ] + }, + "execution_count": 18, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "(\n", " ggplot(mpg, aes(x=\"displ\", y=\"hwy\"))\n", @@ -522,10 +2742,137 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 19, "id": "ceb2a354", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + " " + ], + "text/plain": [ + "" + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "(ggplot(mpg) + geom_point(aes(x=\"displ\", y=\"hwy\")) + facet_wrap(\"class\", nrow=2))" ] @@ -615,10 +2962,141 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 20, "id": "f379e31b", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
caratcutcolorclaritydepthtablepricexyz
rownames
10.23IdealESI261.555.03263.953.982.43
20.21PremiumESI159.861.03263.893.842.31
30.23GoodEVS156.965.03274.054.072.31
40.29PremiumIVS262.458.03344.204.232.63
50.31GoodJSI263.358.03354.344.352.75
\n", + "
" + ], + "text/plain": [ + " carat cut color clarity depth table price x y z\n", + "rownames \n", + "1 0.23 Ideal E SI2 61.5 55.0 326 3.95 3.98 2.43\n", + "2 0.21 Premium E SI1 59.8 61.0 326 3.89 3.84 2.31\n", + "3 0.23 Good E VS1 56.9 65.0 327 4.05 4.07 2.31\n", + "4 0.29 Premium I VS2 62.4 58.0 334 4.20 4.23 2.63\n", + "5 0.31 Good J SI2 63.3 58.0 335 4.34 4.35 2.75" + ] + }, + "execution_count": 20, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "diamonds = pd.read_csv(\n", " \"https://vincentarelbundock.github.io/Rdatasets/csv/ggplot2/diamonds.csv\",\n", @@ -633,10 +3111,130 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 21, "id": "d8faf1ab", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + " " + ], + "text/plain": [ + "" + ] + }, + "execution_count": 21, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "(ggplot(diamonds, aes(x=\"cut\")) + geom_bar())" ] @@ -679,10 +3277,108 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 22, "id": "ca772dd5", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + " " + ], + "text/plain": [ + "" + ] + }, + "execution_count": 22, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "(\n", " ggplot(\n", @@ -706,20 +3402,258 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 23, "id": "f8da7d91", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + " " + ], + "text/plain": [ + "" + ] + }, + "execution_count": 23, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "(ggplot(mpg, aes(x=\"drv\", color=\"drv\")) + geom_bar())" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 24, "id": "088e7550", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + " " + ], + "text/plain": [ + "" + ] + }, + "execution_count": 24, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "(ggplot(mpg, aes(x=\"drv\", fill=\"drv\")) + geom_bar())" ] @@ -735,10 +3669,130 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 25, "id": "181c70d2", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + " " + ], + "text/plain": [ + "" + ] + }, + "execution_count": 25, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "(ggplot(mpg, aes(x=\"drv\", fill=\"class\")) + geom_bar())" ] @@ -758,10 +3812,132 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 26, "id": "a8e9c378", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + " " + ], + "text/plain": [ + "" + ] + }, + "execution_count": 26, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "(ggplot(mpg, aes(x=\"drv\", fill=\"class\")) + geom_bar(alpha=0.5, position=\"identity\"))" ] @@ -779,10 +3955,131 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 27, "id": "14205000", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + " " + ], + "text/plain": [ + "" + ] + }, + "execution_count": 27, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "(ggplot(mpg, aes(x=\"drv\", fill=\"class\")) + geom_bar(position=\"fill\"))" ] @@ -798,10 +4095,131 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 28, "id": "c33c4a03", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + " " + ], + "text/plain": [ + "" + ] + }, + "execution_count": 28, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "(ggplot(mpg, aes(x=\"drv\", fill=\"class\")) + geom_bar(position=\"dodge\"))" ] @@ -818,10 +4236,129 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 29, "id": "ba4161de", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + " " + ], + "text/plain": [ + "" + ] + }, + "execution_count": 29, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "(ggplot(mpg, aes(x=\"displ\", y=\"hwy\")) + geom_point())" ] @@ -843,10 +4380,130 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 30, "id": "414ce7af", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + " " + ], + "text/plain": [ + "" + ] + }, + "execution_count": 30, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "(ggplot(mpg, aes(x=\"displ\", y=\"hwy\")) + geom_point(position=\"jitter\"))" ] @@ -878,10 +4535,129 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 31, "id": "9bc38aef", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + " " + ], + "text/plain": [ + "" + ] + }, + "execution_count": 31, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "(ggplot(mpg, aes(x=\"cty\", y=\"hwy\")) + geom_point())" ] @@ -995,7 +4771,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.12.12" + "version": "3.12.13" }, "toc-showtags": true }, diff --git a/webscraping-and-apis.ipynb b/webscraping-and-apis.ipynb index 5171a4d..4bd60c2 100644 --- a/webscraping-and-apis.ipynb +++ b/webscraping-and-apis.ipynb @@ -15,7 +15,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "id": "51a55374", "metadata": { "tags": [ @@ -74,7 +74,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 2, "id": "a7f62293", "metadata": {}, "outputs": [], @@ -101,10 +101,48 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 3, "id": "06108a4d", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "shape: (10, 14)
rownamesnameyearmonthdayhourlatlongstatuscategorywindpressuretropicalstorm_force_diameterhurricane_force_diameter
i64stri64i64i64i64f64f64stri64i64i64strstr
1"Amy"1975627027.5-79.0"tropical depression"null251013nullnull
2"Amy"1975627628.5-79.0"tropical depression"null251013nullnull
3"Amy"19756271229.5-79.0"tropical depression"null251013nullnull
4"Amy"19756271830.5-79.0"tropical depression"null251013nullnull
5"Amy"1975628031.5-78.8"tropical depression"null251012nullnull
6"Amy"1975628632.4-78.7"tropical depression"null251012nullnull
7"Amy"19756281233.3-78.0"tropical depression"null251011nullnull
8"Amy"19756281834.0-77.0"tropical depression"null301006nullnull
9"Amy"1975629034.4-75.8"tropical storm"null351004nullnull
10"Amy"1975629634.0-74.8"tropical storm"null401002nullnull
" + ], + "text/plain": [ + "shape: (10, 14)\n", + "┌──────────┬──────┬──────┬───────┬───┬──────┬──────────┬─────────────────────┬─────────────────────┐\n", + "│ rownames ┆ name ┆ year ┆ month ┆ … ┆ wind ┆ pressure ┆ tropicalstorm_force ┆ hurricane_force_dia │\n", + "│ --- ┆ --- ┆ --- ┆ --- ┆ ┆ --- ┆ --- ┆ _diameter ┆ meter │\n", + "│ i64 ┆ str ┆ i64 ┆ i64 ┆ ┆ i64 ┆ i64 ┆ --- ┆ --- │\n", + "│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ str ┆ str │\n", + "╞══════════╪══════╪══════╪═══════╪═══╪══════╪══════════╪═════════════════════╪═════════════════════╡\n", + "│ 1 ┆ Amy ┆ 1975 ┆ 6 ┆ … ┆ 25 ┆ 1013 ┆ null ┆ null │\n", + "│ 2 ┆ Amy ┆ 1975 ┆ 6 ┆ … ┆ 25 ┆ 1013 ┆ null ┆ null │\n", + "│ 3 ┆ Amy ┆ 1975 ┆ 6 ┆ … ┆ 25 ┆ 1013 ┆ null ┆ null │\n", + "│ 4 ┆ Amy ┆ 1975 ┆ 6 ┆ … ┆ 25 ┆ 1013 ┆ null ┆ null │\n", + "│ 5 ┆ Amy ┆ 1975 ┆ 6 ┆ … ┆ 25 ┆ 1012 ┆ null ┆ null │\n", + "│ 6 ┆ Amy ┆ 1975 ┆ 6 ┆ … ┆ 25 ┆ 1012 ┆ null ┆ null │\n", + "│ 7 ┆ Amy ┆ 1975 ┆ 6 ┆ … ┆ 25 ┆ 1011 ┆ null ┆ null │\n", + "│ 8 ┆ Amy ┆ 1975 ┆ 6 ┆ … ┆ 30 ┆ 1006 ┆ null ┆ null │\n", + "│ 9 ┆ Amy ┆ 1975 ┆ 6 ┆ … ┆ 35 ┆ 1004 ┆ null ┆ null │\n", + "│ 10 ┆ Amy ┆ 1975 ┆ 6 ┆ … ┆ 40 ┆ 1002 ┆ null ┆ null │\n", + "└──────────┴──────┴──────┴───────┴───┴──────┴──────────┴─────────────────────┴─────────────────────┘" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "pl.read_csv(\n", " \"https://vincentarelbundock.github.io/Rdatasets/csv/dplyr/storms.csv\", n_rows=10\n", @@ -136,10 +174,113 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 4, "id": "6107093c", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "
\n", + " \n", + " " + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " " + ], + "text/plain": [ + "" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "url = \"https://api.beta.ons.gov.uk/v1/data?uri=/employmentandlabourmarket/peopleinwork/employmentandemployeetypes/timeseries/jp9z/lms/previous/v108\"\n", "\n", @@ -202,12 +343,60 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 5, "id": "380ca743", "metadata": { "lines_to_next_cell": 2 }, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/var/folders/j1/0d3dmg7d6gnbc_zwrh8t9gcw0000gn/T/ipykernel_61971/1219974464.py:18: MapWithoutReturnDtypeWarning: Calling `map_elements` without specifying `return_dtype` can lead to unpredictable results. Specify `return_dtype` to silence this warning.\n", + " .with_columns(pl.col(\"country\").map_elements(lambda x: textwrap.fill(x, 10)))\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "shape: (5, 3)
economycountryEN.GHG.ALL.PC.CE.AR5
strstrf64
"USA""United\n", + "States"18.921098
"CHN""China"9.982534
"ECS""Europe &\n", + "Central\n", + "Asia"9.171446
"EAS""East Asia\n", + "& Pacific"8.435052
"IND""India"2.621464
" + ], + "text/plain": [ + "shape: (5, 3)\n", + "┌─────────┬───────────┬──────────────────────┐\n", + "│ economy ┆ country ┆ EN.GHG.ALL.PC.CE.AR5 │\n", + "│ --- ┆ --- ┆ --- │\n", + "│ str ┆ str ┆ f64 │\n", + "╞═════════╪═══════════╪══════════════════════╡\n", + "│ USA ┆ United ┆ 18.921098 │\n", + "│ ┆ States ┆ │\n", + "│ CHN ┆ China ┆ 9.982534 │\n", + "│ ECS ┆ Europe & ┆ 9.171446 │\n", + "│ ┆ Central ┆ │\n", + "│ ┆ Asia ┆ │\n", + "│ EAS ┆ East Asia ┆ 8.435052 │\n", + "│ ┆ & Pacific ┆ │\n", + "│ IND ┆ India ┆ 2.621464 │\n", + "└─────────┴───────────┴──────────────────────┘" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "# World Bank CO2 equivalent emissions (metric tons per capita)\n", "# https://data.worldbank.org/indicator/EN.GHG.ALL.PC.CE.AR5\n", @@ -235,10 +424,170 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 6, "id": "00379b93", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "
\n", + " \n", + " " + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " " + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], "source": [ "lp.LetsPlot.setup_html()\n", "\n", @@ -386,10 +735,21 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 7, "id": "073d89e3", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "'\\n\\n\\n\\n\\n\\n\\n\\n\\n'" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "url = \"http://aeturrell.com/research\"\n", "page = requests.get(url)\n", @@ -409,10 +769,35 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 8, "id": "22f96be0", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + ">\n", + "
\n", + " \n", + " \n", + " \n", + " <\n" + ] + } + ], "source": [ "soup = BeautifulSoup(page.text, \"html.parser\")\n", "print(soup.prettify()[60000:60500])" @@ -429,10 +814,21 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 9, "id": "d82775de", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "

Blundell, Jack, Emma Duchini, Stefania Simion, and Arthur Turrell. \"Pay transparency and gender equality.\" American Economic Journal: Economic Policy (2024). doi: 10.1257/pol.20220766

" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "# Get all paragraphs\n", "all_paras = soup.find_all(\"p\")\n", @@ -451,10 +847,21 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 10, "id": "11321154", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "'Blundell, Jack, Emma Duchini, Stefania Simion, and Arthur Turrell. \"Pay transparency and gender equality.\" American Economic Journal: Economic Policy (2024). doi: 10.1257/pol.20220766'" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "all_paras[1].text" ] @@ -470,10 +877,40 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 11, "id": "feac1fdd", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "['Blundell, Jack, Emma Duchini, Stefania Simion, and Arthur Turrell. \"Pay transparency and gender equality.\" American Economic Journal: Economic Policy (2024). doi: 10.1257/pol.20220766',\n", + " 'Botta, Federico, Robin Lovelace, Laura Gilbert, and Arthur Turrell. \"Packaging code and data for reproducible research: A case study of journey time statistics.\" Environment and Planning B: Urban Analytics and City Science (2024): 23998083241267331. doi: 10.1177/23998083241267331',\n", + " 'Kalamara, Eleni, Arthur Turrell, Chris Redl, George Kapetanios, and Sujit Kapadia. \"Making text count: economic forecasting using newspaper text.\" Journal of Applied Econometrics 37, no. 5 (2022): 896-919. doi: 10.1002/jae.2907',\n", + " 'Turrell, A., Speigner, B., Copple, D., Djumalieva, J. and Thurgood, J., 2021. Is the UK’s productivity puzzle mostly driven by occupational mismatch? An analysis using big data on job vacancies. Labour Economics, 71, p.102013. doi: 10.1016/j.labeco.2021.102013',\n", + " 'Haldane, Andrew G., and Arthur E. Turrell. \"Drawing on different disciplines: macroeconomic agent-based models.\" Journal of Evolutionary Economics 29 (2019): 39-66. doi: 10.1007/s00191-018-0557-5',\n", + " 'Haldane, Andrew G., and Arthur E. Turrell. \"An interdisciplinary model for macroeconomics.\" Oxford Review of Economic Policy 34, no. 1-2 (2018): 219-251. doi: 10.1093/oxrep/grx051',\n", + " 'Braun-Munzinger, Karen, Z. Liu, and A. E. Turrell. \"An agent-based model of corporate bond trading.\" Quantitative Finance 18, no. 4 (2018): 591-608. doi: 10.1080/14697688.2017.1380310',\n", + " 'Turrell, A. E., M. Sherlock, and S. J. Rose. \"Efficient evaluation of collisional energy transfer terms for plasma particle simulations.\" Journal of Plasma Physics 82, no. 1 (2016): 905820107. doi: 10.1017/S0022377816000131',\n", + " 'Turrell, A. E., M. Sherlock, and S. J. Rose. \"Ultrafast collisional ion heating by electrostatic shocks.\" Nature Communications 6, no. 1 (2015): 8905. doi: 10.1038/ncomms9905',\n", + " 'Turrell, Arthur E., Mark Sherlock, and Steven J. Rose. \"Self-consistent inclusion of classical large-angle Coulomb collisions in plasma Monte Carlo simulations.\" Journal of Computational Physics 299 (2015): 144-155. doi: 10.1016/j.jcp.2015.06.034',\n", + " 'Turrell, Arthur E., Mark Sherlock, and Steven J. Rose. \"A Monte Carlo algorithm for degenerate plasmas.\" Journal of Computational Physics 249 (2013): 13-21. doi: 10.1016/j.jcp.2013.03.052',\n", + " 'Turrell, Arthur. \"Cutting through Complexity: How Data Science Can Help Policymakers Understand the World.\" In The Economy as a Complex Evolving System, Part IV. Sante Fe Institute, 2025. doi: https://doi.org/10.37911/9781947864665.11',\n", + " 'Duchini, Emma, Stefania Simion, and Arthur Turrell. \"A Review of the Effects of Pay Transparency.\" In Oxford Research Encyclopedia of Economics and Finance, Oxford University Press, 2024. doi: 10.1093/acrefore/9780190625979.013.860',\n", + " 'Turrell, Arthur, Bradley Speigner, Jyldyz Djumalieva, David Copple, and James Thurgood. \"6. Transforming Naturally Occurring Text Data into Economic Statistics.\" In Big Data for Twenty-First-Century Economic Statistics, pp. 173-208. University of Chicago Press, 2022. doi: 10.7208/chicago/9780226801391-008',\n", + " 'Turrell, Arthur. \"Agent-based models: understanding the economy from the bottom up\" In Quarterly Bulletin, Q4. Bank of England, 2016.',\n", + " 'Cohen, Samuel N., Giulia Mantoan, Lars Nesheim, Áureo de Paula, Arthur Turrell, and Lingyi Yang. Nowcasting using regression on signatures arXiv preprint arXiv:2305.10256v2 (2025).',\n", + " 'Van Dijcke, David, Marcus Buckmann, Arthur Turrell, and Tomas Key. \"Vacancy Posting, Firm Balance Sheets, and Pandemic Policy Interventions.\" Bank of England Staff Working Paper Series 1033 (2022).',\n", + " 'Draca, Mirko, Emma Duchini, Roland Rathelot, Arthur Turrell, and Giulia Vattuone. Revolution in Progress? The Rise of Remote Work in the UK. University of Warwick, Department of Economics, 2022.',\n", + " 'Hill, Edward, Marco Bardoscia, and Arthur Turrell. \"Solving heterogeneous general equilibrium economic models with deep reinforcement learning.\" arXiv arXiv:2103.16977 (2021).',\n", + " 'Turrell, Arthur, James Thurgood, David Copple, Jyldyz Djumalieva, and Bradley Speigner. \"Using online job vacancies to understand the UK labour market from the bottom-up.\" Bank of England Staff Working Papers 742 (2018).']" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "projects = soup.find_all(\"div\", class_=\"project-content listing-pub-info\")\n", "projects = [x.text.strip() for x in projects]\n", @@ -518,10 +955,27 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 12, "id": "0ada9ce7", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "shape: (3, 4)\n", + "┌─────┬────────────┬───────────┬──────────┐\n", + "│ # ┆ First Name ┆ Last Name ┆ Username │\n", + "│ --- ┆ --- ┆ --- ┆ --- │\n", + "│ i64 ┆ str ┆ str ┆ str │\n", + "╞═════╪════════════╪═══════════╪══════════╡\n", + "│ 1 ┆ Mark ┆ Otto ┆ @mdo │\n", + "│ 2 ┆ Jacob ┆ Thornton ┆ @fat │\n", + "│ 3 ┆ Larry ┆ the Bird ┆ @twitter │\n", + "└─────┴────────────┴───────────┴──────────┘\n" + ] + } + ], "source": [ "import polars as pl\n", "\n", @@ -566,7 +1020,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.12.12" + "version": "3.12.13" }, "toc-showtags": true }, diff --git a/whole-game.ipynb b/whole-game.ipynb index eee6c2c..5498d0e 100644 --- a/whole-game.ipynb +++ b/whole-game.ipynb @@ -13,7 +13,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "id": "51a55374", "metadata": { "tags": [ @@ -29,14 +29,116 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 2, "id": "209ef434", "metadata": { "tags": [ "remove-input" ] }, - "outputs": [], + "outputs": [ + { + "data": { + "image/svg+xml": [ + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "cluster_0\n", + "\n", + "Understand\n", + "\n", + "\n", + "\n", + "Import\n", + "\n", + "Import\n", + "\n", + "\n", + "\n", + "Clean\n", + "\n", + "Clean\n", + "\n", + "\n", + "\n", + "Import->Clean\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "Analyse\n", + "\n", + "Analyse\n", + "\n", + "\n", + "\n", + "Clean->Analyse\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "Visualise\n", + "\n", + "Visualise\n", + "\n", + "\n", + "\n", + "Visualise->Analyse\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "Transform\n", + "\n", + "Transform\n", + "\n", + "\n", + "\n", + "Analyse->Transform\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "Communicate\n", + "\n", + "Communicate\n", + "\n", + "\n", + "\n", + "Analyse->Communicate\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "Transform->Visualise\n", + "\n", + "\n", + "\n", + "\n", + "\n" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "# | echo: false\n", "import graphviz\n", @@ -107,7 +209,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.12.12" + "version": "3.12.13" }, "toc-showtags": true }, diff --git a/workflow-basics.ipynb b/workflow-basics.ipynb index a0a7e47..27b2168 100644 --- a/workflow-basics.ipynb +++ b/workflow-basics.ipynb @@ -26,10 +26,19 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "id": "23465996", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "0.15\n", + "44.666666666666664\n" + ] + } + ], "source": [ "print(1 / 200 * 30)\n", "print((59 + 73 + 2) / 3)" @@ -46,10 +55,18 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 2, "id": "bdd3c2c0", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "1.0\n" + ] + } + ], "source": [ "import numpy as np\n", "\n", @@ -67,10 +84,18 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 3, "id": "cead1264", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "12\n" + ] + } + ], "source": [ "x = 3 * 4\n", "print(x)" @@ -87,10 +112,18 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 4, "id": "91a44d83", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[1, 2, 3, 5, 7, 11, 13]\n" + ] + } + ], "source": [ "primes = [1, 2, 3, 5, 7, 11, 13]\n", "print(primes)" @@ -107,10 +140,21 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 5, "id": "1a526124", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "[3, 6, 9, 15, 21, 33, 39]" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "[element * 3 for element in primes]" ] @@ -154,10 +198,21 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 6, "id": "c061f9cc", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "[2, 4, 6, 10, 14, 22, 26]" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "# define primes\n", "primes = [1, 2, 3, 5, 7, 11, 13]\n", @@ -193,10 +248,21 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 7, "id": "e77540e0", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "[1, 2, 3, 5, 7, 11, 13]" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "primes" ] @@ -212,10 +278,21 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 8, "id": "9dd0fd49", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "list" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "type(primes)" ] @@ -273,10 +350,21 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 9, "id": "a5998cb5", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "[1, 2, 3, 5, 7, 11, 13]" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "primes" ] @@ -291,7 +379,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 10, "id": "ae55d78e", "metadata": {}, "outputs": [], @@ -313,7 +401,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 11, "id": "038c7d52", "metadata": {}, "outputs": [], @@ -369,10 +457,21 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 12, "id": "97af119d", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "42" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "sum(primes)" ] @@ -388,10 +487,21 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 13, "id": "14ce9b99", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "52" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "sum(primes, start=10)" ] @@ -407,10 +517,26 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 14, "id": "f5e45616", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Help on built-in function sum in module builtins:\n", + "\n", + "sum(iterable, /, start=0)\n", + " Return the sum of a 'start' value (default: 0) plus an iterable of numbers\n", + "\n", + " When the iterable is empty, return the start value.\n", + " This function is intended specifically for use with numeric values and may\n", + " reject non-numeric types.\n", + "\n" + ] + } + ], "source": [ "help(sum)" ] @@ -528,7 +654,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.12.12" + "version": "3.12.13" }, "toc-showtags": true }, diff --git a/workflow-style.ipynb b/workflow-style.ipynb index 6921e86..7a1a2a4 100644 --- a/workflow-style.ipynb +++ b/workflow-style.ipynb @@ -148,10 +148,40 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "id": "f0f5bb37", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "shape: (3, 4)
col3col4col1col2
strstru32f64
"a""alpha"10.0
"b""gamma"20.0
"a""gamma"10.0
" + ], + "text/plain": [ + "shape: (3, 4)\n", + "┌──────┬───────┬──────┬──────┐\n", + "│ col3 ┆ col4 ┆ col1 ┆ col2 │\n", + "│ --- ┆ --- ┆ --- ┆ --- │\n", + "│ str ┆ str ┆ u32 ┆ f64 │\n", + "╞══════╪═══════╪══════╪══════╡\n", + "│ a ┆ alpha ┆ 1 ┆ 0.0 │\n", + "│ b ┆ gamma ┆ 2 ┆ 0.0 │\n", + "│ a ┆ gamma ┆ 1 ┆ 0.0 │\n", + "└──────┴───────┴──────┴──────┘" + ] + }, + "execution_count": 1, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "import polars as pl\n", "\n", @@ -271,7 +301,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.12.12" + "version": "3.12.13" }, "toc-showtags": true }, diff --git a/workflow-writing-code.ipynb b/workflow-writing-code.ipynb index 28e2edd..bb442d2 100644 --- a/workflow-writing-code.ipynb +++ b/workflow-writing-code.ipynb @@ -150,7 +150,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.0" + "version": "3.12.13" } }, "nbformat": 4, From 2ae8c35a23c9920aef5b16c0c787e090a816ccef Mon Sep 17 00:00:00 2001 From: Nwabueze Ugoh Date: Tue, 28 Apr 2026 13:49:11 +0100 Subject: [PATCH 2/3] Enhance command line documentation and update notebook outputs - Updated the command line chapter in `command-line.md` to improve formatting by replacing asterisks with underscores for emphasis. - Modified output displays in `functions.ipynb` and `iteration.ipynb` to reflect new data and execution counts, ensuring consistency and clarity in results. - Adjusted various execution counts across notebooks to maintain accurate tracking of code execution. --- command-line.md | 104 +++++++++++++++++++++++------------------------ functions.ipynb | 22 +++++----- iteration.ipynb | 106 ++++++++++++++++++++++++------------------------ 3 files changed, 116 insertions(+), 116 deletions(-) diff --git a/command-line.md b/command-line.md index 1079842..2cdce9b 100644 --- a/command-line.md +++ b/command-line.md @@ -1,6 +1,6 @@ # The Command Line {#sec-command-line} -In this chapter, you'll meet the *command line* and learn how to use it. Beyond a few key commands like `uv add ` you don't strictly need to know how to use the command line to follow the rest of this book. However, even a tiny bit of knowledge of the command line goes a long way in coding and will serve you well. +In this chapter, you'll meet the _command line_ and learn how to use it. Beyond a few key commands like `uv add ` you don't strictly need to know how to use the command line to follow the rest of this book. However, even a tiny bit of knowledge of the command line goes a long way in coding and will serve you well. To try out any of the commands in this chapter on your machine, you can select 'New Terminal' from the menu bar in Visual Studio Code (Mac and Linux), use the Windows Subsystem for Linux or git bash (Windows), or use a free [online terminal](https://cocalc.com/doc/terminal.html). @@ -8,17 +8,17 @@ This chapter has benefited from numerous sources, including absolutely excellent ## What is the command line? -The command line is a way to directly issue text-based commands to a computer one line at a time (as distinct from a graphical user interface, or GUI, that you navigate with a mouse). It goes under many names: shell, bash, terminal, CLI, and command line. These are actually different things but most people tend to use them to mean the same thing most of the time. The *shell* is the part of an operating system that you interact with but mostly people use shell to mean the command line. *bash* is the programming language that is used in the command line; it's actually a synonym for 'Born Again SHell'. The *terminal* is sometimes used to refer to the command line on Macs. Finally, a *CLI* is just an acronym for command line interface, and is often used in the context of an application; for example, uv has a command line interface because you run it on the command line to install packages (`uv add packagename`). +The command line is a way to directly issue text-based commands to a computer one line at a time (as distinct from a graphical user interface, or GUI, that you navigate with a mouse). It goes under many names: shell, bash, terminal, CLI, and command line. These are actually different things but most people tend to use them to mean the same thing most of the time. The _shell_ is the part of an operating system that you interact with but mostly people use shell to mean the command line. _bash_ is the programming language that is used in the command line; it's actually a synonym for 'Born Again SHell'. The _terminal_ is sometimes used to refer to the command line on Macs. Finally, a _CLI_ is just an acronym for command line interface, and is often used in the context of an application; for example, uv has a command line interface because you run it on the command line to install packages (`uv add packagename`). It's worth mentioning that there's a big difference between the command line on UNIX based systems (MacOS and Linux), and on Windows systems. Here, we'll only address the UNIX version. There is a command line on Windows but it's not widely used for coding. If you're on a Windows machine, you can access a UNIX command line using the Windows Subsystem for Linux. ## Why is the command line useful? -The command line has many uses. Graphical user interfaces are, generally, a bit easier to use *but* they're not very repeatable or scalable. Because the command line uses text-based instructions and can be programmed, it is both repeatable and scalable; properties that are very useful for research and analysis. +The command line has many uses. Graphical user interfaces are, generally, a bit easier to use _but_ they're not very repeatable or scalable. Because the command line uses text-based instructions and can be programmed, it is both repeatable and scalable; properties that are very useful for research and analysis. The broad reasons you might use the command line to issue instructions include: -- software functionality: some software *only* has a command line interface +- software functionality: some software _only_ has a command line interface - efficiency: your computer has limited memory, which graphical user interfaces use a lot of—the command line uses less @@ -71,7 +71,7 @@ The flags or options, such as `-n` in the example above, typically begin with a Spaces take on a special role when using the command line. For this reason, it's good practice to avoid spaces in file names. If you need to refer to a filename with spaces in, you’ll need to use quotes or escape the spaces in the file names using a `\`, for example `this is my file.txt` becomes `this\ is\ my\ file.txt` ::: -To run programmes from the command line, all you need is the name of the programme as the command: in fact, commands *are* programmes. The `date` command refers to an actual programme on your computer that you can find. And this also explains a bit of what's going on when you *run a script from the command line* (more on that later). +To run programmes from the command line, all you need is the name of the programme as the command: in fact, commands _are_ programmes. The `date` command refers to an actual programme on your computer that you can find. And this also explains a bit of what's going on when you _run a script from the command line_ (more on that later). Once you've run a few commands, you'll notice that you can't navigate around the command line like you can a text file or Python script. Here are some tips for navigating the command line: @@ -93,20 +93,20 @@ Once you've run a few commands, you'll notice that you can't navigate around the ### Navigating directories -While we're on navigating, it's useful to understand *where* in the computer you are when you open the command line. If you open a terminal pane within VS Code, you will start (by default at least) within the same folder as your project. Starting a terminal instance outside of VS Code will get you a terminal in a root directory for your computer; for example, on a Mac, opening a new terminal window starts you in `/Users/yourusername/`. +While we're on navigating, it's useful to understand _where_ in the computer you are when you open the command line. If you open a terminal pane within VS Code, you will start (by default at least) within the same folder as your project. Starting a terminal instance outside of VS Code will get you a terminal in a root directory for your computer; for example, on a Mac, opening a new terminal window starts you in `/Users/yourusername/`. To find out "where" you are when you open a terminal, you can use the `pwd` command, which stands for "print working directory". -The table below shows some useful commands for moving around your computer using the command line. Note that `cd` accepts a location *relative* to your current directory. +The table below shows some useful commands for moving around your computer using the command line. Note that `cd` accepts a location _relative_ to your current directory. - | Command | What it does | - | --------------------- | ------------------------------------------------------------ | - | `pwd` | Shows current directory | - | `cd` | Change directory command | - | `cd ..` | Go up one level in the directory (`cd ../..` for two levels) | - | `cd ~` | Go to your home directory | - | `cd -` | Go to the previous directory | - | `cd documents/papers` | Go directly to a directory named 'papers' | +| Command | What it does | +| --------------------- | ------------------------------------------------------------ | +| `pwd` | Shows current directory | +| `cd` | Change directory command | +| `cd ..` | Go up one level in the directory (`cd ../..` for two levels) | +| `cd ~` | Go to your home directory | +| `cd -` | Go to the previous directory | +| `cd documents/papers` | Go directly to a directory named 'papers' | ## Using Python on the command line @@ -124,7 +124,7 @@ Say you have a script called `analysis.py`, you can run it with Python on the co uv run python analysis.py ``` -which calls Python as a programme and gives it `analysis.py` as the argument. If you have multiple versions of Python, which you should do if you're following best practice and using a version per project, then you can see *which* version of Python is being used with +which calls Python as a programme and gives it `analysis.py` as the argument. If you have multiple versions of Python, which you should do if you're following best practice and using a version per project, then you can see _which_ version of Python is being used with ```bash which python @@ -134,39 +134,39 @@ which python Now we'll see some useful commands for the terminal. - | Command                                          | What it does | - | ---------------------------------------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------ | ---------------------------------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------ | - | `man ` | Shows a manual for the given command | - | `touch ` | Creates an empty file named `` | - | `code ` | Open a file in VS Code (creating it, if it does not exist) | - | `mkdir ` | creates a new folder called `foldername` | - | `echo ` | Prints `` | - | `cat ` | Print the full contents of `` | - | `head ` | Print the start of a file | - | `tail ` | Print the end of a file | - | `> ` | Redirects output from screen to ``. For example, `echo "Hello World" > hello.txt` | - | `>> ` | Redirects output from screen to the end of ``, ie appends output rather than overwrites it | - | ` | ` | The pipe symbol: uses output from one command as input into another. For example, `head -n 10 data.csv | > hello_world.txt` would write the first 10 lines of data.csv into a file called hello_world.txt | - | `less ` | Print out the contents of a file in paginated form. Use `ctrl+v` and `Alt+v` (or `⌘+v` and `⌥+v` on Mac) to move up and down. Press `q` to quit. | - | `wc -l` | Returns number of lines in input, for example `cat | wc -l`. Use `wc` alone for word count. | - | `sort` | Arrange lines in a file in alphabetical order | - | `uniq` | Remove duplicate lines from input, for example `cat | uniq` or `uniq -d` to show duplicate files | - | `mv` | Move or rename a file; for example, `mv file1 file2` would rename `file1` to `file2` while `mv file1 ~` would move `file1` to the home directory | - | `cp` | Copy a file; for example, `cp file1 file2` would copy `file1` to `file2` while `cp file1 ~` would make a copy of `file1` in the home directory | - | `rm ` | Permanently remove a file | - | `rmdir ` | Permanently remove an empty directory | - | `rm -rf ` | ⚠ Permanently remove everything in a directory ⚠ | - | `grep ` | Search for a given term, for example `cat hello_world.txt | grep world` | - | `ls` | Basically, this means list stuff (files and folders) in the current directory | - | `ls -a` | List stuff in the current directory even if it's hidden | - | `ls -l` | List stuff in a more readable format and show permissions | - | `ls -S` | List stuff by size | - | `file ` | Give information on the file type of `` | - | `find` | Find specific files on your computer, can be piped into other commands for example `find *.md -size +5k -type f | xargs wc -l` will count the number of lines `wc -l` of all files, `-type f`, ending in `.md` that are greater than 5 kilobytes in size, `-size +5k`. | - | `diff -u ` | Show a single summary of the differences between two files. | +| Command                                          | What it does | +| --------------------------------------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------ | ---------------------------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------ | +| `man ` | Shows a manual for the given command | +| `touch ` | Creates an empty file named `` | +| `code ` | Open a file in VS Code (creating it, if it does not exist) | +| `mkdir ` | creates a new folder called `foldername` | +| `echo ` | Prints `` | +| `cat ` | Print the full contents of `` | +| `head ` | Print the start of a file | +| `tail ` | Print the end of a file | +| `> ` | Redirects output from screen to ``. For example, `echo "Hello World" > hello.txt` | +| `>> ` | Redirects output from screen to the end of ``, ie appends output rather than overwrites it | +| ` | ` | The pipe symbol: uses output from one command as input into another. For example, `head -n 10 data.csv | > hello_world.txt` would write the first 10 lines of data.csv into a file called hello_world.txt | +| `less ` | Print out the contents of a file in paginated form. Use `ctrl+v` and `Alt+v` (or `⌘+v` and `⌥+v` on Mac) to move up and down. Press `q` to quit. | +| `wc -l` | Returns number of lines in input, for example `cat | wc -l`. Use `wc` alone for word count. | +| `sort` | Arrange lines in a file in alphabetical order | +| `uniq` | Remove duplicate lines from input, for example `cat | uniq`or`uniq -d` to show duplicate files | +| `mv` | Move or rename a file; for example, `mv file1 file2` would rename `file1` to `file2` while `mv file1 ~` would move `file1` to the home directory | +| `cp` | Copy a file; for example, `cp file1 file2` would copy `file1` to `file2` while `cp file1 ~` would make a copy of `file1` in the home directory | +| `rm ` | Permanently remove a file | +| `rmdir ` | Permanently remove an empty directory | +| `rm -rf ` | ⚠ Permanently remove everything in a directory ⚠ | +| `grep ` | Search for a given term, for example `cat hello_world.txt | grep world` | +| `ls` | Basically, this means list stuff (files and folders) in the current directory | +| `ls -a` | List stuff in the current directory even if it's hidden | +| `ls -l` | List stuff in a more readable format and show permissions | +| `ls -S` | List stuff by size | +| `file ` | Give information on the file type of `` | +| `find` | Find specific files on your computer, can be piped into other commands for example `find \*.md -size +5k -type f | xargs wc -l`will count the number of lines`wc -l`of all files,`-type f`, ending in `.md`that are greater than 5 kilobytes in size,`-size +5k`. | +| `diff -u ` | Show a single summary of the differences between two files. | ![More details of the grep command](https://pbs.twimg.com/media/DcPeD_CW0AEkSar?format=jpg&name=small) -*More details of the grep command, by [\@b0rk](https://twitter.com/b0rk).* +_More details of the grep command, by [\@b0rk](https://twitter.com/b0rk)._ You can write for loops in bash (remember, it's a language). The general structure is @@ -187,7 +187,7 @@ A more interesting example is giving the number of lines of text, number of word ```bash for i in $(ls *.csv) -do +do wc $i done ``` @@ -204,7 +204,7 @@ done A couple of new features appeared in the examples above. -`*` is a *wildcard character*, it tells bash to look for anything that ends in ".csv". This is not the only special case; `?` serves a similar purpose of standing in for any character but just *one* character rather than arbitrarily many. If you had a folder with `file1.csv`, `file2.csv`, etc., up to 9, then you could use `file?.csv` to refer to all of them but this would not pick up `file10.csv`. +`*` is a _wildcard character_, it tells bash to look for anything that ends in ".csv". This is not the only special case; `?` serves a similar purpose of standing in for any character but just _one_ character rather than arbitrarily many. If you had a folder with `file1.csv`, `file2.csv`, etc., up to 9, then you could use `file?.csv` to refer to all of them but this would not pick up `file10.csv`. Another special character we've already seen is the curly brace, `{}`. Whenever you have a common substring in a series of commands using curly braces tells the command line to expand what's in them automatically. In an example above, this is used on 1 to 5. But it can also be used in, for example, file names: @@ -272,7 +272,7 @@ You can find more of these special variables [here](https://tldp.org/LDP/abs/htm [**pandoc**](https://pandoc.org/) is absolutely brilliant: if you need to convert files containing text from one format to another, it really is a swiss-army knife. There isn't space here to list the ridiculous number of documents it can convert between, but, importantly, it can translate back and forth between all of the following: markdown, $\LaTeX$, Microsoft Word's docx, OpenOffice's ODT, HTML, and Jupyter Notebook. -It can also write from any of those formats (and more) in one direction *to* PDF, Microsoft Powerpoint, and $\LaTeX$ Beamer. +It can also write from any of those formats (and more) in one direction _to_ PDF, Microsoft Powerpoint, and $\LaTeX$ Beamer. To use **pandoc**, install it following the instructions on the website and then call it like this: @@ -284,9 +284,9 @@ This is an example where the input is a .tex document and the output, `-o`, is a You can get quite fancy with **pandoc**, for example you can translate a whole book's worth of latex into a Word doc complete with a Word style, a bibliography via biblatex, equations, and figures. Nothing can save Word from being painful to use, but **pandoc** certainly helps. -[**eza**](https://eza.rocks/) is an upgrade on the `ls` command. It is designed to be an improved file lister with more features and better defaults. It uses colours to distinguish file types and metadata. Follow the instructions on the website to install it on your operating system. To replace `ls` with `eza`, you can use a terminal *alias*. There's a good guide [available here](https://denisrasulev.medium.com/eza-the-best-ls-command-replacement-9621252323e). +[**eza**](https://eza.rocks/) is an upgrade on the `ls` command. It is designed to be an improved file lister with more features and better defaults. It uses colours to distinguish file types and metadata. Follow the instructions on the website to install it on your operating system. To replace `ls` with `eza`, you can use a terminal _alias_. There's a good guide [available here](https://denisrasulev.medium.com/eza-the-best-ls-command-replacement-9621252323e). -**nano** is a built-in text editor that runs *within* the terminal. This can be really useful if you're working on the cloud (but it's not got the rich features of a GUI-based text editor like VS Code). To open a file using **nano**, the command is `nano file.txt`. Nano displays instructions on how to navigate when it loads up but exiting is the hardest part: when you're done, hit `Ctrl+X`, then `y` to save, and then `enter` to exit. +**nano** is a built-in text editor that runs _within_ the terminal. This can be really useful if you're working on the cloud (but it's not got the rich features of a GUI-based text editor like VS Code). To open a file using **nano**, the command is `nano file.txt`. Nano displays instructions on how to navigate when it loads up but exiting is the hardest part: when you're done, hit `Ctrl+X`, then `y` to save, and then `enter` to exit. [**wget**](https://www.gnu.org/software/wget/) is a command-line utility for downloading files from the internet. It's very simple to use, the syntax is just `wget [options] [url]`. For example, to download the starwars csv file used in this book, the command is diff --git a/functions.ipynb b/functions.ipynb index 70c4fd3..0f2cce3 100644 --- a/functions.ipynb +++ b/functions.ipynb @@ -208,7 +208,7 @@ " white-space: pre-wrap;\n", "}\n", "\n", - "shape: (10, 4)
abcd
f64f64f64f64
0.4345330.9990720.3288630.269584
0.00.4799171.00.676026
1.00.00.3837450.381636
0.6584960.4682960.0809160.283739
0.3495880.1646770.6419610.38563
0.3624860.3741330.4753370.024468
0.0426121.1409950.4003250.121255
0.5385220.3190620.5273250.292675
0.5442450.6919370.01.0
0.3524920.2954480.4701810.0
" + "shape: (10, 4)
abcd
f64f64f64f64
0.4736410.3145550.780770.683499
1.00.1921670.8502790.186199
0.4507660.4572280.5157880.0
0.3617790.2195290.2433410.934585
0.028510.00.00.948462
0.4747280.1190681.00.556861
0.1679770.6302890.6544850.727404
0.279770.5057040.9460620.527484
0.4111420.8182760.1750241.0
0.00.1704820.3733090.45716
" ], "text/plain": [ "shape: (10, 4)\n", @@ -217,16 +217,16 @@ "│ --- ┆ --- ┆ --- ┆ --- │\n", "│ f64 ┆ f64 ┆ f64 ┆ f64 │\n", "╞══════════╪══════════╪══════════╪══════════╡\n", - "│ 0.434533 ┆ 0.999072 ┆ 0.328863 ┆ 0.269584 │\n", - "│ 0.0 ┆ 0.479917 ┆ 1.0 ┆ 0.676026 │\n", - "│ 1.0 ┆ 0.0 ┆ 0.383745 ┆ 0.381636 │\n", - "│ 0.658496 ┆ 0.468296 ┆ 0.080916 ┆ 0.283739 │\n", - "│ 0.349588 ┆ 0.164677 ┆ 0.641961 ┆ 0.38563 │\n", - "│ 0.362486 ┆ 0.374133 ┆ 0.475337 ┆ 0.024468 │\n", - "│ 0.042612 ┆ 1.140995 ┆ 0.400325 ┆ 0.121255 │\n", - "│ 0.538522 ┆ 0.319062 ┆ 0.527325 ┆ 0.292675 │\n", - "│ 0.544245 ┆ 0.691937 ┆ 0.0 ┆ 1.0 │\n", - "│ 0.352492 ┆ 0.295448 ┆ 0.470181 ┆ 0.0 │\n", + "│ 0.473641 ┆ 0.314555 ┆ 0.78077 ┆ 0.683499 │\n", + "│ 1.0 ┆ 0.192167 ┆ 0.850279 ┆ 0.186199 │\n", + "│ 0.450766 ┆ 0.457228 ┆ 0.515788 ┆ 0.0 │\n", + "│ 0.361779 ┆ 0.219529 ┆ 0.243341 ┆ 0.934585 │\n", + "│ 0.02851 ┆ 0.0 ┆ 0.0 ┆ 0.948462 │\n", + "│ 0.474728 ┆ 0.119068 ┆ 1.0 ┆ 0.556861 │\n", + "│ 0.167977 ┆ 0.630289 ┆ 0.654485 ┆ 0.727404 │\n", + "│ 0.27977 ┆ 0.505704 ┆ 0.946062 ┆ 0.527484 │\n", + "│ 0.411142 ┆ 0.818276 ┆ 0.175024 ┆ 1.0 │\n", + "│ 0.0 ┆ 0.170482 ┆ 0.373309 ┆ 0.45716 │\n", "└──────────┴──────────┴──────────┴──────────┘" ] }, diff --git a/iteration.ipynb b/iteration.ipynb index 74021f3..fd727cd 100644 --- a/iteration.ipynb +++ b/iteration.ipynb @@ -348,7 +348,7 @@ "[51, 52, 53, 54, 55, 56, 57, 58, 59, 60]" ] }, - "execution_count": 10, + "execution_count": 35, "metadata": {}, "output_type": "execute_result" } @@ -536,7 +536,7 @@ "{'Ada': 'Lovelace', 'Adam': 'Smith'}" ] }, - "execution_count": 16, + "execution_count": 41, "metadata": {}, "output_type": "execute_result" } @@ -651,7 +651,7 @@ " white-space: pre-wrap;\n", "}\n", "\n", - "shape: (6, 4)
abcd
f64f64f64f64
-0.405242-1.3517780.4098180.975997
1.425904-1.610260.456314-0.371164
-0.555008-0.13544-0.3194540.612513
1.842544-1.7097670.0183770.304892
0.926347-1.3718410.3992051.433585
0.649944-0.011038-0.904321-0.5888
" + "shape: (6, 4)
abcd
f64f64f64f64
-0.871189-0.681725-1.562824-0.560553
-1.2694980.148269-0.217629-0.221637
-0.555918-1.4479222.4705161.641996
0.35463-1.664464-0.0148610.030461
-0.8667982.2567060.968797-0.078444
0.5406780.8744340.286314-0.138539
" ], "text/plain": [ "shape: (6, 4)\n", @@ -660,16 +660,16 @@ "│ --- ┆ --- ┆ --- ┆ --- │\n", "│ f64 ┆ f64 ┆ f64 ┆ f64 │\n", "╞═══════════╪═══════════╪═══════════╪═══════════╡\n", - "│ -0.405242 ┆ -1.351778 ┆ 0.409818 ┆ 0.975997 │\n", - "│ 1.425904 ┆ -1.61026 ┆ 0.456314 ┆ -0.371164 │\n", - "│ -0.555008 ┆ -0.13544 ┆ -0.319454 ┆ 0.612513 │\n", - "│ 1.842544 ┆ -1.709767 ┆ 0.018377 ┆ 0.304892 │\n", - "│ 0.926347 ┆ -1.371841 ┆ 0.399205 ┆ 1.433585 │\n", - "│ 0.649944 ┆ -0.011038 ┆ -0.904321 ┆ -0.5888 │\n", + "│ -0.871189 ┆ -0.681725 ┆ -1.562824 ┆ -0.560553 │\n", + "│ -1.269498 ┆ 0.148269 ┆ -0.217629 ┆ -0.221637 │\n", + "│ -0.555918 ┆ -1.447922 ┆ 2.470516 ┆ 1.641996 │\n", + "│ 0.35463 ┆ -1.664464 ┆ -0.014861 ┆ 0.030461 │\n", + "│ -0.866798 ┆ 2.256706 ┆ 0.968797 ┆ -0.078444 │\n", + "│ 0.540678 ┆ 0.874434 ┆ 0.286314 ┆ -0.138539 │\n", "└───────────┴───────────┴───────────┴───────────┘" ] }, - "execution_count": 18, + "execution_count": 43, "metadata": {}, "output_type": "execute_result" } @@ -706,20 +706,20 @@ " white-space: pre-wrap;\n", "}\n", "\n", - "shape: (1, 4)
abcd
f64f64f64f64
0.788146-1.361810.2087910.458703
" + "shape: (1, 4)
abcd
f64f64f64f64
-0.711358-0.2667280.135727-0.108492
" ], "text/plain": [ "shape: (1, 4)\n", - "┌──────────┬──────────┬──────────┬──────────┐\n", - "│ a ┆ b ┆ c ┆ d │\n", - "│ --- ┆ --- ┆ --- ┆ --- │\n", - "│ f64 ┆ f64 ┆ f64 ┆ f64 │\n", - "╞══════════╪══════════╪══════════╪══════════╡\n", - "│ 0.788146 ┆ -1.36181 ┆ 0.208791 ┆ 0.458703 │\n", - "└──────────┴──────────┴──────────┴──────────┘" + "┌───────────┬───────────┬──────────┬───────────┐\n", + "│ a ┆ b ┆ c ┆ d │\n", + "│ --- ┆ --- ┆ --- ┆ --- │\n", + "│ f64 ┆ f64 ┆ f64 ┆ f64 │\n", + "╞═══════════╪═══════════╪══════════╪═══════════╡\n", + "│ -0.711358 ┆ -0.266728 ┆ 0.135727 ┆ -0.108492 │\n", + "└───────────┴───────────┴──────────┴───────────┘" ] }, - "execution_count": 19, + "execution_count": 44, "metadata": {}, "output_type": "execute_result" } @@ -744,7 +744,7 @@ " white-space: pre-wrap;\n", "}\n", "\n", - "shape: (6, 1)
row_median
f64
0.002288
0.042575
-0.227447
0.161635
0.662776
-0.299919
" + "shape: (6, 1)
row_median
f64
-0.776457
-0.219633
0.543039
0.0078
0.445177
0.413496
" ], "text/plain": [ "shape: (6, 1)\n", @@ -753,16 +753,16 @@ "│ --- │\n", "│ f64 │\n", "╞════════════╡\n", - "│ 0.002288 │\n", - "│ 0.042575 │\n", - "│ -0.227447 │\n", - "│ 0.161635 │\n", - "│ 0.662776 │\n", - "│ -0.299919 │\n", + "│ -0.776457 │\n", + "│ -0.219633 │\n", + "│ 0.543039 │\n", + "│ 0.0078 │\n", + "│ 0.445177 │\n", + "│ 0.413496 │\n", "└────────────┘" ] }, - "execution_count": 20, + "execution_count": 45, "metadata": {}, "output_type": "execute_result" } @@ -789,7 +789,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "291 μs ± 1.95 μs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)\n" + "294 μs ± 4.83 μs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)\n" ] } ], @@ -824,7 +824,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "56.7 μs ± 601 ns per loop (mean ± std. dev. of 7 runs, 10,000 loops each)\n" + "56.5 μs ± 929 ns per loop (mean ± std. dev. of 7 runs, 10,000 loops each)\n" ] } ], @@ -888,7 +888,7 @@ " white-space: pre-wrap;\n", "}\n", "\n", - "shape: (6, 6)
abcdnew_aresult
f64f64f64f64f64f64
40554.59475840553.64822240555.40981840555.97599740559.594758-7.814557
40556.42590440553.3897440555.45631440554.62883640561.425904-6.288454
40554.44499240554.8645640554.68054640555.61251340559.444992-6.018383
40556.84254440553.29023340555.01837740555.30489240561.842544-5.533329
40555.92634740553.62815940555.39920540556.43358540560.926347-6.492421
40555.64994440554.98896240554.09567940554.411240560.649944-4.104063
" + "shape: (6, 6)
abcdnew_aresult
f64f64f64f64f64f64
40554.12881140554.31827540553.43717640554.43944740559.128811-4.545293
40553.73050240555.14826940554.78237140554.77836340558.730502-5.458877
40554.44408240553.55207840557.47051640556.64199640559.444082-9.030254
40555.3546340553.33553640554.98513940555.03046140560.35463-5.850543
40554.13320240557.25670640555.96879740554.92155640559.133202-4.134134
40555.54067840555.87443440555.28631440554.86146140560.540678-3.426466
" ], "text/plain": [ "shape: (6, 6)\n", @@ -897,16 +897,16 @@ "│ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │\n", "│ f64 ┆ f64 ┆ f64 ┆ f64 ┆ f64 ┆ f64 │\n", "╞══════════════╪══════════════╪══════════════╪══════════════╪══════════════╪═══════════╡\n", - "│ 40554.594758 ┆ 40553.648222 ┆ 40555.409818 ┆ 40555.975997 ┆ 40559.594758 ┆ -7.814557 │\n", - "│ 40556.425904 ┆ 40553.38974 ┆ 40555.456314 ┆ 40554.628836 ┆ 40561.425904 ┆ -6.288454 │\n", - "│ 40554.444992 ┆ 40554.86456 ┆ 40554.680546 ┆ 40555.612513 ┆ 40559.444992 ┆ -6.018383 │\n", - "│ 40556.842544 ┆ 40553.290233 ┆ 40555.018377 ┆ 40555.304892 ┆ 40561.842544 ┆ -5.533329 │\n", - "│ 40555.926347 ┆ 40553.628159 ┆ 40555.399205 ┆ 40556.433585 ┆ 40560.926347 ┆ -6.492421 │\n", - "│ 40555.649944 ┆ 40554.988962 ┆ 40554.095679 ┆ 40554.4112 ┆ 40560.649944 ┆ -4.104063 │\n", + "│ 40554.128811 ┆ 40554.318275 ┆ 40553.437176 ┆ 40554.439447 ┆ 40559.128811 ┆ -4.545293 │\n", + "│ 40553.730502 ┆ 40555.148269 ┆ 40554.782371 ┆ 40554.778363 ┆ 40558.730502 ┆ -5.458877 │\n", + "│ 40554.444082 ┆ 40553.552078 ┆ 40557.470516 ┆ 40556.641996 ┆ 40559.444082 ┆ -9.030254 │\n", + "│ 40555.35463 ┆ 40553.335536 ┆ 40554.985139 ┆ 40555.030461 ┆ 40560.35463 ┆ -5.850543 │\n", + "│ 40554.133202 ┆ 40557.256706 ┆ 40555.968797 ┆ 40554.921556 ┆ 40559.133202 ┆ -4.134134 │\n", + "│ 40555.540678 ┆ 40555.874434 ┆ 40555.286314 ┆ 40554.861461 ┆ 40560.540678 ┆ -3.426466 │\n", "└──────────────┴──────────────┴──────────────┴──────────────┴──────────────┴───────────┘" ] }, - "execution_count": 24, + "execution_count": 49, "metadata": {}, "output_type": "execute_result" } @@ -956,7 +956,7 @@ " white-space: pre-wrap;\n", "}\n", "\n", - "shape: (6, 6)
abcdnew_aratio
f64f64f64f64f64f64
40554.59475840553.64822240555.40981840555.97599740559.5947580.999877
40556.42590440553.3897440555.45631440554.62883640561.4259040.999877
40554.44499240554.8645640554.68054640555.61251340559.4449920.999877
40556.84254440553.29023340555.01837740555.30489240561.8425440.999877
40555.92634740553.62815940555.39920540556.43358540560.9263470.999877
40555.64994440554.98896240554.09567940554.411240560.6499440.999877
" + "shape: (6, 6)
abcdnew_aratio
f64f64f64f64f64f64
40554.12881140554.31827540553.43717640554.43944740559.1288110.999877
40553.73050240555.14826940554.78237140554.77836340558.7305020.999877
40554.44408240553.55207840557.47051640556.64199640559.4440820.999877
40555.3546340553.33553640554.98513940555.03046140560.354630.999877
40554.13320240557.25670640555.96879740554.92155640559.1332020.999877
40555.54067840555.87443440555.28631440554.86146140560.5406780.999877
" ], "text/plain": [ "shape: (6, 6)\n", @@ -965,16 +965,16 @@ "│ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │\n", "│ f64 ┆ f64 ┆ f64 ┆ f64 ┆ f64 ┆ f64 │\n", "╞══════════════╪══════════════╪══════════════╪══════════════╪══════════════╪══════════╡\n", - "│ 40554.594758 ┆ 40553.648222 ┆ 40555.409818 ┆ 40555.975997 ┆ 40559.594758 ┆ 0.999877 │\n", - "│ 40556.425904 ┆ 40553.38974 ┆ 40555.456314 ┆ 40554.628836 ┆ 40561.425904 ┆ 0.999877 │\n", - "│ 40554.444992 ┆ 40554.86456 ┆ 40554.680546 ┆ 40555.612513 ┆ 40559.444992 ┆ 0.999877 │\n", - "│ 40556.842544 ┆ 40553.290233 ┆ 40555.018377 ┆ 40555.304892 ┆ 40561.842544 ┆ 0.999877 │\n", - "│ 40555.926347 ┆ 40553.628159 ┆ 40555.399205 ┆ 40556.433585 ┆ 40560.926347 ┆ 0.999877 │\n", - "│ 40555.649944 ┆ 40554.988962 ┆ 40554.095679 ┆ 40554.4112 ┆ 40560.649944 ┆ 0.999877 │\n", + "│ 40554.128811 ┆ 40554.318275 ┆ 40553.437176 ┆ 40554.439447 ┆ 40559.128811 ┆ 0.999877 │\n", + "│ 40553.730502 ┆ 40555.148269 ┆ 40554.782371 ┆ 40554.778363 ┆ 40558.730502 ┆ 0.999877 │\n", + "│ 40554.444082 ┆ 40553.552078 ┆ 40557.470516 ┆ 40556.641996 ┆ 40559.444082 ┆ 0.999877 │\n", + "│ 40555.35463 ┆ 40553.335536 ┆ 40554.985139 ┆ 40555.030461 ┆ 40560.35463 ┆ 0.999877 │\n", + "│ 40554.133202 ┆ 40557.256706 ┆ 40555.968797 ┆ 40554.921556 ┆ 40559.133202 ┆ 0.999877 │\n", + "│ 40555.540678 ┆ 40555.874434 ┆ 40555.286314 ┆ 40554.861461 ┆ 40560.540678 ┆ 0.999877 │\n", "└──────────────┴──────────────┴──────────────┴──────────────┴──────────────┴──────────┘" ] }, - "execution_count": 25, + "execution_count": 50, "metadata": {}, "output_type": "execute_result" } @@ -1008,7 +1008,7 @@ " white-space: pre-wrap;\n", "}\n", "\n", - "shape: (6, 7)
abcdnew_aratioa_gt_0.5
f64f64f64f64f64f64bool
40554.59475840553.64822240555.40981840555.97599740559.5947580.999877true
40556.42590440553.3897440555.45631440554.62883640561.4259040.999877true
40554.44499240554.8645640554.68054640555.61251340559.4449920.999877true
40556.84254440553.29023340555.01837740555.30489240561.8425440.999877true
40555.92634740553.62815940555.39920540556.43358540560.9263470.999877true
40555.64994440554.98896240554.09567940554.411240560.6499440.999877true
" + "shape: (6, 7)
abcdnew_aratioa_gt_0.5
f64f64f64f64f64f64bool
40554.12881140554.31827540553.43717640554.43944740559.1288110.999877true
40553.73050240555.14826940554.78237140554.77836340558.7305020.999877true
40554.44408240553.55207840557.47051640556.64199640559.4440820.999877true
40555.3546340553.33553640554.98513940555.03046140560.354630.999877true
40554.13320240557.25670640555.96879740554.92155640559.1332020.999877true
40555.54067840555.87443440555.28631440554.86146140560.5406780.999877true
" ], "text/plain": [ "shape: (6, 7)\n", @@ -1017,16 +1017,16 @@ "│ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │\n", "│ f64 ┆ f64 ┆ f64 ┆ f64 ┆ f64 ┆ f64 ┆ bool │\n", "╞══════════════╪══════════════╪══════════════╪══════════════╪══════════════╪══════════╪══════════╡\n", - "│ 40554.594758 ┆ 40553.648222 ┆ 40555.409818 ┆ 40555.975997 ┆ 40559.594758 ┆ 0.999877 ┆ true │\n", - "│ 40556.425904 ┆ 40553.38974 ┆ 40555.456314 ┆ 40554.628836 ┆ 40561.425904 ┆ 0.999877 ┆ true │\n", - "│ 40554.444992 ┆ 40554.86456 ┆ 40554.680546 ┆ 40555.612513 ┆ 40559.444992 ┆ 0.999877 ┆ true │\n", - "│ 40556.842544 ┆ 40553.290233 ┆ 40555.018377 ┆ 40555.304892 ┆ 40561.842544 ┆ 0.999877 ┆ true │\n", - "│ 40555.926347 ┆ 40553.628159 ┆ 40555.399205 ┆ 40556.433585 ┆ 40560.926347 ┆ 0.999877 ┆ true │\n", - "│ 40555.649944 ┆ 40554.988962 ┆ 40554.095679 ┆ 40554.4112 ┆ 40560.649944 ┆ 0.999877 ┆ true │\n", + "│ 40554.128811 ┆ 40554.318275 ┆ 40553.437176 ┆ 40554.439447 ┆ 40559.128811 ┆ 0.999877 ┆ true │\n", + "│ 40553.730502 ┆ 40555.148269 ┆ 40554.782371 ┆ 40554.778363 ┆ 40558.730502 ┆ 0.999877 ┆ true │\n", + "│ 40554.444082 ┆ 40553.552078 ┆ 40557.470516 ┆ 40556.641996 ┆ 40559.444082 ┆ 0.999877 ┆ true │\n", + "│ 40555.35463 ┆ 40553.335536 ┆ 40554.985139 ┆ 40555.030461 ┆ 40560.35463 ┆ 0.999877 ┆ true │\n", + "│ 40554.133202 ┆ 40557.256706 ┆ 40555.968797 ┆ 40554.921556 ┆ 40559.133202 ┆ 0.999877 ┆ true │\n", + "│ 40555.540678 ┆ 40555.874434 ┆ 40555.286314 ┆ 40554.861461 ┆ 40560.540678 ┆ 0.999877 ┆ true │\n", "└──────────────┴──────────────┴──────────────┴──────────────┴──────────────┴──────────┴──────────┘" ] }, - "execution_count": 26, + "execution_count": 51, "metadata": {}, "output_type": "execute_result" } From d93ca65a5dc162a87c09fd2400cfe57e1933a1c9 Mon Sep 17 00:00:00 2001 From: Nwabueze Ugoh Date: Tue, 28 Apr 2026 14:05:14 +0100 Subject: [PATCH 3/3] Refactor notebooks to reset execution counts and clear outputs - Updated execution counts to null and cleared outputs in `boolean-data.ipynb`, `categorical-data.ipynb`, `communicate-plots.ipynb`, `data-import.ipynb`, `data-tidy.ipynb`, `data-transform.ipynb`, and other notebooks for a cleaner presentation. - Enhanced overall consistency in notebook formatting by removing previous output displays, ensuring a more streamlined user experience. --- boolean-data.ipynb | 1133 +- categorical-data.ipynb | 357 +- command-line.md | 76 +- communicate-plots.ipynb | 7790 +------------ data-import.ipynb | 324 +- data-tidy.ipynb | 1011 +- data-transform.ipynb | 1484 +-- data-visualise.ipynb | 2520 +---- databases.ipynb | 418 +- dates-and-times.ipynb | 11629 +------------------ exploratory-data-analysis.ipynb | 17751 +----------------------------- functions.ipynb | 247 +- introduction.ipynb | 120 +- iteration.ipynb | 466 +- joins.ipynb | 372 +- missing-values.ipynb | 1468 +-- numbers.ipynb | 7331 +----------- rectangling.ipynb | 357 +- regex.ipynb | 103 +- spreadsheets.ipynb | 416 +- strings.ipynb | 808 +- vis-layers.ipynb | 3900 +------ webscraping-and-apis.ipynb | 498 +- whole-game.ipynb | 108 +- workflow-basics.ipynb | 178 +- workflow-style.ipynb | 34 +- 26 files changed, 1053 insertions(+), 59846 deletions(-) diff --git a/boolean-data.ipynb b/boolean-data.ipynb index 7da9c0b..d389d25 100644 --- a/boolean-data.ipynb +++ b/boolean-data.ipynb @@ -22,21 +22,10 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "id": "7e35b9fc", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "True" - ] - }, - "execution_count": 1, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "bool_variable = True\n", "bool_variable" @@ -62,21 +51,10 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": null, "id": "590cd75d", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "False" - ] - }, - "execution_count": 2, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "not True" ] @@ -108,18 +86,10 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": null, "id": "51622575", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "False\n" - ] - } - ], + "outputs": [], "source": [ "boolean_condition = 10 == 20\n", "print(boolean_condition)" @@ -145,20 +115,10 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": null, "id": "0c550daa", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Ada, you achieved a high score.\n", - "You could be called Smith or have a high score\n", - "You are not called Smith and you have a high score\n" - ] - } - ], + "outputs": [], "source": [ "name = \"Ada\"\n", "score = 99\n", @@ -183,19 +143,10 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": null, "id": "7420e1c1", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "True\n", - "False\n" - ] - } - ], + "outputs": [], "source": [ "name_list = [\"Ada\", \"Adam\"]\n", "name_list_two = [\"Ada\", \"Adam\"]\n", @@ -225,19 +176,10 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": null, "id": "39caa7be", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "True\n", - "False\n" - ] - } - ], + "outputs": [], "source": [ "name_list = [\"Lovelace\", \"Smith\", \"Hopper\", \"Babbage\"]\n", "\n", @@ -268,18 +210,10 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": null, "id": "95794e71", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "High score!\n" - ] - } - ], + "outputs": [], "source": [ "score = 98\n", "\n", @@ -309,21 +243,10 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": null, "id": "cd1cd061", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "True" - ] - }, - "execution_count": 8, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "a, b = 3, 6\n", "\n", @@ -342,21 +265,10 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": null, "id": "59638407", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]" - ] - }, - "execution_count": 9, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "[x for x in range(12)]" ] @@ -371,21 +283,10 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": null, "id": "8e8072ea", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "[0, 2, 4, 6, 8, 10]" - ] - }, - "execution_count": 10, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "[x for x in range(12) if x % 2 == 0]" ] @@ -400,32 +301,10 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": null, "id": "ec01f460", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "[0,\n", - " 'Not divisible by 2',\n", - " 2,\n", - " 'Not divisible by 2',\n", - " 4,\n", - " 'Not divisible by 2',\n", - " 6,\n", - " 'Not divisible by 2',\n", - " 8,\n", - " 'Not divisible by 2',\n", - " 10,\n", - " 'Not divisible by 2']" - ] - }, - "execution_count": 11, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "[x if x % 2 == 0 else \"Not divisible by 2\" for x in range(12)]" ] @@ -449,18 +328,10 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": null, "id": "dc605a93", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Falsy\n" - ] - } - ], + "outputs": [], "source": [ "listy = []\n", "other_listy = [1, 2, 3]\n", @@ -473,18 +344,10 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": null, "id": "da8fe682", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Truthy\n" - ] - } - ], + "outputs": [], "source": [ "if not (other_listy):\n", " print(\"Falsy\")\n", @@ -502,18 +365,10 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": null, "id": "d80ba0be", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Falsy\n" - ] - } - ], + "outputs": [], "source": [ "if not 0:\n", " print(\"Falsy\")\n", @@ -523,18 +378,10 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": null, "id": "1973d44d", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Truthy\n" - ] - } - ], + "outputs": [], "source": [ "if not [0, 0, 0]:\n", " print(\"Falsy\")\n", @@ -552,18 +399,10 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": null, "id": "62840c4a", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Falsy\n" - ] - } - ], + "outputs": [], "source": [ "if not None:\n", " print(\"Falsy\")\n", @@ -593,21 +432,10 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": null, "id": "bdcb09a5", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "True" - ] - }, - "execution_count": 17, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "any([True, False, False])" ] @@ -622,21 +450,10 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": null, "id": "2f666185", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "True" - ] - }, - "execution_count": 18, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "all([True, True, True, True])" ] @@ -651,21 +468,10 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": null, "id": "78777d9c", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "False" - ] - }, - "execution_count": 19, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "all([0, 0, 0, 1])" ] @@ -684,79 +490,10 @@ }, { "cell_type": "code", - "execution_count": 20, + "execution_count": null, "id": "7f338fd7", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
bool_col_1bool_col_2
0FalseTrue
1FalseFalse
2FalseTrue
3TrueFalse
4TrueTrue
\n", - "
" - ], - "text/plain": [ - " bool_col_1 bool_col_2\n", - "0 False True\n", - "1 False False\n", - "2 False True\n", - "3 True False\n", - "4 True True" - ] - }, - "execution_count": 20, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "import pandas as pd\n", "\n", @@ -779,26 +516,10 @@ }, { "cell_type": "code", - "execution_count": 21, + "execution_count": null, "id": "9cdaec7a", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "0 True\n", - "1 False\n", - "2 True\n", - "3 True\n", - "4 True\n", - "dtype: bool" - ] - }, - "execution_count": 21, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "df[\"bool_col_1\"] | df[\"bool_col_2\"]" ] @@ -813,23 +534,10 @@ }, { "cell_type": "code", - "execution_count": 22, + "execution_count": null, "id": "89ee3e44", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "bool_col_1 2\n", - "bool_col_2 3\n", - "dtype: int64" - ] - }, - "execution_count": 22, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "df.sum()" ] @@ -844,26 +552,10 @@ }, { "cell_type": "code", - "execution_count": 23, + "execution_count": null, "id": "5e30cee7", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "0 False\n", - "1 True\n", - "2 False\n", - "3 True\n", - "4 True\n", - "Name: bool_col, dtype: bool" - ] - }, - "execution_count": 23, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "df = pd.DataFrame.from_dict({\"bool_col\": [0, 1, 0, 1, 1]})\n", "df[\"bool_col\"].astype(bool)" @@ -882,127 +574,10 @@ }, { "cell_type": "code", - "execution_count": 24, + "execution_count": null, "id": "9f63005f", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
caratcutcolorclaritydepthtablepricexyz
00.23IdealESI261.555.03263.953.982.43
10.21PremiumESI159.861.03263.893.842.31
20.23GoodEVS156.965.03274.054.072.31
30.29PremiumIVS262.458.03344.204.232.63
40.31GoodJSI263.358.03354.344.352.75
\n", - "
" - ], - "text/plain": [ - " carat cut color clarity depth table price x y z\n", - "0 0.23 Ideal E SI2 61.5 55.0 326 3.95 3.98 2.43\n", - "1 0.21 Premium E SI1 59.8 61.0 326 3.89 3.84 2.31\n", - "2 0.23 Good E VS1 56.9 65.0 327 4.05 4.07 2.31\n", - "3 0.29 Premium I VS2 62.4 58.0 334 4.20 4.23 2.63\n", - "4 0.31 Good J SI2 63.3 58.0 335 4.34 4.35 2.75" - ] - }, - "execution_count": 24, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "diamonds = pd.read_csv(\n", " \"https://github.com/mwaskom/seaborn-data/raw/master/diamonds.csv\"\n", @@ -1020,220 +595,10 @@ }, { "cell_type": "code", - "execution_count": 25, + "execution_count": null, "id": "7a27f0a0", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
caratcutcolorclaritydepthtablepricexyzexpensive
93380.94PremiumESI160.058.045806.446.373.84True
11800.71IdealGVS162.757.029305.695.733.58True
283340.33PremiumGVS161.358.06664.424.462.72False
139191.23Very GoodJVVS261.257.056656.866.924.22True
264742.37IdealJVS262.257.0160598.528.585.32True
122331.19FairISI164.958.051986.646.554.28True
59570.90Very GoodFSI162.660.039506.106.143.83True
483250.33GoodFSI161.862.05364.404.452.74False
257421.51IdealGVS161.157.0146747.417.384.52True
434010.59Very GoodFSI161.757.014125.395.433.34True
\n", - "
" - ], - "text/plain": [ - " carat cut color clarity depth table price x y z \\\n", - "9338 0.94 Premium E SI1 60.0 58.0 4580 6.44 6.37 3.84 \n", - "1180 0.71 Ideal G VS1 62.7 57.0 2930 5.69 5.73 3.58 \n", - "28334 0.33 Premium G VS1 61.3 58.0 666 4.42 4.46 2.72 \n", - "13919 1.23 Very Good J VVS2 61.2 57.0 5665 6.86 6.92 4.22 \n", - "26474 2.37 Ideal J VS2 62.2 57.0 16059 8.52 8.58 5.32 \n", - "12233 1.19 Fair I SI1 64.9 58.0 5198 6.64 6.55 4.28 \n", - "5957 0.90 Very Good F SI1 62.6 60.0 3950 6.10 6.14 3.83 \n", - "48325 0.33 Good F SI1 61.8 62.0 536 4.40 4.45 2.74 \n", - "25742 1.51 Ideal G VS1 61.1 57.0 14674 7.41 7.38 4.52 \n", - "43401 0.59 Very Good F SI1 61.7 57.0 1412 5.39 5.43 3.34 \n", - "\n", - " expensive \n", - "9338 True \n", - "1180 True \n", - "28334 False \n", - "13919 True \n", - "26474 True \n", - "12233 True \n", - "5957 True \n", - "48325 False \n", - "25742 True \n", - "43401 True " - ] - }, - "execution_count": 25, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "diamonds[\"expensive\"] = diamonds[\"price\"] > 1000\n", "diamonds.sample(10)" @@ -1249,140 +614,10 @@ }, { "cell_type": "code", - "execution_count": 26, + "execution_count": null, "id": "c78a6d47", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
caratcutcolorclaritydepthtablepricexyzexpensive
00.23IdealESI261.555.03263.953.982.43False
10.21PremiumESI159.861.03263.893.842.31False
20.23GoodEVS156.965.03274.054.072.31False
30.29PremiumIVS262.458.03344.204.232.63False
40.31GoodJSI263.358.03354.344.352.75False
\n", - "
" - ], - "text/plain": [ - " carat cut color clarity depth table price x y z \\\n", - "0 0.23 Ideal E SI2 61.5 55.0 326 3.95 3.98 2.43 \n", - "1 0.21 Premium E SI1 59.8 61.0 326 3.89 3.84 2.31 \n", - "2 0.23 Good E VS1 56.9 65.0 327 4.05 4.07 2.31 \n", - "3 0.29 Premium I VS2 62.4 58.0 334 4.20 4.23 2.63 \n", - "4 0.31 Good J SI2 63.3 58.0 335 4.34 4.35 2.75 \n", - "\n", - " expensive \n", - "0 False \n", - "1 False \n", - "2 False \n", - "3 False \n", - "4 False " - ] - }, - "execution_count": 26, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "diamonds.assign(expensive=lambda x: x[\"price\"] > 1000).head()" ] @@ -1397,22 +632,10 @@ }, { "cell_type": "code", - "execution_count": 27, + "execution_count": null, "id": "d12f537d", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "array([False, False, False, False, False, False, False, True, True,\n", - " True, False])" - ] - }, - "execution_count": 27, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "diamonds.columns.isin([\"x\", \"y\", \"z\"])" ] @@ -1429,21 +652,10 @@ }, { "cell_type": "code", - "execution_count": 28, + "execution_count": null, "id": "35e73305", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "np.True_" - ] - }, - "execution_count": 28, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "diamonds[\"expensive\"].any()" ] @@ -1460,239 +672,10 @@ }, { "cell_type": "code", - "execution_count": 29, + "execution_count": null, "id": "d1bbb0fa", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
caratcutcolorclaritydepthtablepricexyzexpensive
10.21PremiumESI159.861.03263.893.842.31False
80.22FairEVS265.161.03373.873.782.49False
110.23IdealJVS162.856.03403.933.902.46False
120.22PremiumFSI160.461.03423.883.842.33False
140.20PremiumESI260.262.03453.793.752.27False
....................................
539280.79PremiumESI261.458.027566.035.963.68True
539290.71IdealGVS161.456.027565.765.733.53True
539300.71PremiumESI160.555.027565.795.743.49True
539310.71PremiumFSI159.862.027565.745.733.43True
539380.86PremiumHSI261.058.027576.156.123.74True
\n", - "

23423 rows × 11 columns

\n", - "
" - ], - "text/plain": [ - " carat cut color clarity depth table price x y z \\\n", - "1 0.21 Premium E SI1 59.8 61.0 326 3.89 3.84 2.31 \n", - "8 0.22 Fair E VS2 65.1 61.0 337 3.87 3.78 2.49 \n", - "11 0.23 Ideal J VS1 62.8 56.0 340 3.93 3.90 2.46 \n", - "12 0.22 Premium F SI1 60.4 61.0 342 3.88 3.84 2.33 \n", - "14 0.20 Premium E SI2 60.2 62.0 345 3.79 3.75 2.27 \n", - "... ... ... ... ... ... ... ... ... ... ... \n", - "53928 0.79 Premium E SI2 61.4 58.0 2756 6.03 5.96 3.68 \n", - "53929 0.71 Ideal G VS1 61.4 56.0 2756 5.76 5.73 3.53 \n", - "53930 0.71 Premium E SI1 60.5 55.0 2756 5.79 5.74 3.49 \n", - "53931 0.71 Premium F SI1 59.8 62.0 2756 5.74 5.73 3.43 \n", - "53938 0.86 Premium H SI2 61.0 58.0 2757 6.15 6.12 3.74 \n", - "\n", - " expensive \n", - "1 False \n", - "8 False \n", - "11 False \n", - "12 False \n", - "14 False \n", - "... ... \n", - "53928 True \n", - "53929 True \n", - "53930 True \n", - "53931 True \n", - "53938 True \n", - "\n", - "[23423 rows x 11 columns]" - ] - }, - "execution_count": 29, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "diamonds[diamonds[\"x\"] > diamonds[\"y\"]]" ] diff --git a/categorical-data.ipynb b/categorical-data.ipynb index 6cc6776..ab6ba30 100644 --- a/categorical-data.ipynb +++ b/categorical-data.ipynb @@ -14,7 +14,7 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "id": "51a55374", "metadata": { "tags": [ @@ -65,26 +65,10 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": null, "id": "535ef959", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "0 a\n", - "1 b\n", - "2 c\n", - "3 a\n", - "Name: A, dtype: category\n", - "Categories (3, object): ['a', 'b', 'c']" - ] - }, - "execution_count": 2, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "import numpy as np\n", "import pandas as pd\n", @@ -107,79 +91,10 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": null, "id": "358c83bb", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
valuegroup
03630 - 39
16160 - 69
28980 - 89
32820 - 29
48180 - 89
\n", - "
" - ], - "text/plain": [ - " value group\n", - "0 36 30 - 39\n", - "1 61 60 - 69\n", - "2 89 80 - 89\n", - "3 28 20 - 29\n", - "4 81 80 - 89" - ] - }, - "execution_count": 3, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "df = pd.DataFrame({\"value\": np.random.randint(0, 100, 20)})\n", "labels = [f\"{i} - {i+9}\" for i in range(0, 100, 10)]\n", @@ -199,22 +114,10 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": null, "id": "fb389105", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "[NaN, 'b', 'c', NaN, 'd', NaN, 'c']\n", - "Categories (3, object): ['b', 'c', 'd']" - ] - }, - "execution_count": 4, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "raw_cat = pd.Categorical(\n", " [\"a\", \"b\", \"c\", \"a\", \"d\", \"a\", \"c\"], categories=[\"b\", \"c\", \"d\"]\n", @@ -232,29 +135,10 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": null, "id": "0497fc16", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "0 NaN\n", - "1 b\n", - "2 c\n", - "3 NaN\n", - "4 d\n", - "5 NaN\n", - "6 c\n", - "Name: cat_type, dtype: category\n", - "Categories (3, object): ['b', 'c', 'd']" - ] - }, - "execution_count": 5, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "df = pd.DataFrame(raw_cat, columns=[\"cat_type\"])\n", "df[\"cat_type\"]" @@ -278,22 +162,10 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": null, "id": "f7520d3d", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "['a', 'b', 'c', 'a', 'd', 'a', 'c']\n", - "Categories (4, object): ['a' < 'b' < 'c' < 'd']" - ] - }, - "execution_count": 6, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "ordered_cat = pd.Categorical(\n", " [\"a\", \"b\", \"c\", \"a\", \"d\", \"a\", \"c\"],\n", @@ -317,42 +189,20 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": null, "id": "2caba354", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "Index(['b', 'c', 'd'], dtype='object')" - ] - }, - "execution_count": 7, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "df[\"cat_type\"].cat.categories" ] }, { "cell_type": "code", - "execution_count": 8, + "execution_count": null, "id": "5f1fe093", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "False" - ] - }, - "execution_count": 8, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "df[\"cat_type\"].cat.ordered" ] @@ -377,7 +227,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": null, "id": "097171b8", "metadata": {}, "outputs": [], @@ -395,29 +245,10 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": null, "id": "4ae6df38", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "0 NaN\n", - "1 alpha\n", - "2 beta\n", - "3 NaN\n", - "4 gamma\n", - "5 NaN\n", - "6 beta\n", - "Name: cat_type, dtype: category\n", - "Categories (4, object): ['alpha', 'beta', 'gamma', 'delta']" - ] - }, - "execution_count": 10, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "df[\"cat_type\"] = df[\"cat_type\"].cat.add_categories([\"delta\"])\n", "df[\"cat_type\"]" @@ -443,26 +274,10 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": null, "id": "19f5bdda", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "cat_type\n", - "beta 2\n", - "alpha 1\n", - "gamma 1\n", - "delta 0\n", - "Name: count, dtype: int64" - ] - }, - "execution_count": 11, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "df[\"cat_type\"].value_counts()" ] @@ -479,23 +294,10 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": null, "id": "f52d5d0d", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "0 beta\n", - "Name: cat_type, dtype: category\n", - "Categories (4, object): ['alpha', 'beta', 'gamma', 'delta']" - ] - }, - "execution_count": 12, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "df[\"cat_type\"].mode()" ] @@ -510,81 +312,10 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": null, "id": "4d43e94d", "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/var/folders/j1/0d3dmg7d6gnbc_zwrh8t9gcw0000gn/T/ipykernel_60895/379284818.py:2: FutureWarning: 'M' is deprecated and will be removed in a future version, please use 'ME' instead.\n", - " pd.Series(pd.date_range(\"2015/05/01\", periods=5, freq=\"M\"), dtype=\"category\"),\n" - ] - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
datetime
02015-05-31
12015-06-30
22015-07-31
32015-08-31
42015-09-30
\n", - "
" - ], - "text/plain": [ - " datetime\n", - "0 2015-05-31\n", - "1 2015-06-30\n", - "2 2015-07-31\n", - "3 2015-08-31\n", - "4 2015-09-30" - ] - }, - "execution_count": 13, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "time_df = pd.DataFrame(\n", " pd.Series(pd.date_range(\"2015/05/01\", periods=5, freq=\"M\"), dtype=\"category\"),\n", @@ -595,26 +326,10 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": null, "id": "db697f86", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "0 5\n", - "1 6\n", - "2 7\n", - "3 8\n", - "4 9\n", - "Name: datetime, dtype: int32" - ] - }, - "execution_count": 14, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "time_df[\"datetime\"].dt.month" ] @@ -629,26 +344,10 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": null, "id": "13e7bc66", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "0 0\n", - "1 1\n", - "2 2\n", - "3 3\n", - "4 4\n", - "dtype: int8" - ] - }, - "execution_count": 15, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "time_df[\"datetime\"].cat.codes" ] diff --git a/command-line.md b/command-line.md index 2cdce9b..f9eeb20 100644 --- a/command-line.md +++ b/command-line.md @@ -99,14 +99,14 @@ To find out "where" you are when you open a terminal, you can use the `pwd` comm The table below shows some useful commands for moving around your computer using the command line. Note that `cd` accepts a location _relative_ to your current directory. -| Command | What it does | -| --------------------- | ------------------------------------------------------------ | -| `pwd` | Shows current directory | -| `cd` | Change directory command | -| `cd ..` | Go up one level in the directory (`cd ../..` for two levels) | -| `cd ~` | Go to your home directory | -| `cd -` | Go to the previous directory | -| `cd documents/papers` | Go directly to a directory named 'papers' | + | Command | What it does | + | --------------------- | ------------------------------------------------------------ | + | `pwd` | Shows current directory | + | `cd` | Change directory command | + | `cd ..` | Go up one level in the directory (`cd ../..` for two levels) | + | `cd ~` | Go to your home directory | + | `cd -` | Go to the previous directory | + | `cd documents/papers` | Go directly to a directory named 'papers' | ## Using Python on the command line @@ -134,36 +134,36 @@ which python Now we'll see some useful commands for the terminal. -| Command                                          | What it does | -| --------------------------------------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------ | ---------------------------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------ | -| `man ` | Shows a manual for the given command | -| `touch ` | Creates an empty file named `` | -| `code ` | Open a file in VS Code (creating it, if it does not exist) | -| `mkdir ` | creates a new folder called `foldername` | -| `echo ` | Prints `` | -| `cat ` | Print the full contents of `` | -| `head ` | Print the start of a file | -| `tail ` | Print the end of a file | -| `> ` | Redirects output from screen to ``. For example, `echo "Hello World" > hello.txt` | -| `>> ` | Redirects output from screen to the end of ``, ie appends output rather than overwrites it | -| ` | ` | The pipe symbol: uses output from one command as input into another. For example, `head -n 10 data.csv | > hello_world.txt` would write the first 10 lines of data.csv into a file called hello_world.txt | -| `less ` | Print out the contents of a file in paginated form. Use `ctrl+v` and `Alt+v` (or `⌘+v` and `⌥+v` on Mac) to move up and down. Press `q` to quit. | -| `wc -l` | Returns number of lines in input, for example `cat | wc -l`. Use `wc` alone for word count. | -| `sort` | Arrange lines in a file in alphabetical order | -| `uniq` | Remove duplicate lines from input, for example `cat | uniq`or`uniq -d` to show duplicate files | -| `mv` | Move or rename a file; for example, `mv file1 file2` would rename `file1` to `file2` while `mv file1 ~` would move `file1` to the home directory | -| `cp` | Copy a file; for example, `cp file1 file2` would copy `file1` to `file2` while `cp file1 ~` would make a copy of `file1` in the home directory | -| `rm ` | Permanently remove a file | -| `rmdir ` | Permanently remove an empty directory | -| `rm -rf ` | ⚠ Permanently remove everything in a directory ⚠ | -| `grep ` | Search for a given term, for example `cat hello_world.txt | grep world` | -| `ls` | Basically, this means list stuff (files and folders) in the current directory | -| `ls -a` | List stuff in the current directory even if it's hidden | -| `ls -l` | List stuff in a more readable format and show permissions | -| `ls -S` | List stuff by size | -| `file ` | Give information on the file type of `` | -| `find` | Find specific files on your computer, can be piped into other commands for example `find \*.md -size +5k -type f | xargs wc -l`will count the number of lines`wc -l`of all files,`-type f`, ending in `.md`that are greater than 5 kilobytes in size,`-size +5k`. | -| `diff -u ` | Show a single summary of the differences between two files. | + | Command                                          | What it does | + | --------------------------------------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------ | ---------------------------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------ | + | `man ` | Shows a manual for the given command | + | `touch ` | Creates an empty file named `` | + | `code ` | Open a file in VS Code (creating it, if it does not exist) | + | `mkdir ` | creates a new folder called `foldername` | + | `echo ` | Prints `` | + | `cat ` | Print the full contents of `` | + | `head ` | Print the start of a file | + | `tail ` | Print the end of a file | + | `> ` | Redirects output from screen to ``. For example, `echo "Hello World" > hello.txt` | + | `>> ` | Redirects output from screen to the end of ``, ie appends output rather than overwrites it | + | ` | ` | The pipe symbol: uses output from one command as input into another. For example, `head -n 10 data.csv | > hello_world.txt` would write the first 10 lines of data.csv into a file called hello_world.txt | + | `less ` | Print out the contents of a file in paginated form. Use `ctrl+v` and `Alt+v` (or `⌘+v` and `⌥+v` on Mac) to move up and down. Press `q` to quit. | + | `wc -l` | Returns number of lines in input, for example `cat | wc -l`. Use `wc` alone for word count. | + | `sort` | Arrange lines in a file in alphabetical order | + | `uniq` | Remove duplicate lines from input, for example `cat | uniq`or`uniq -d` to show duplicate files | + | `mv` | Move or rename a file; for example, `mv file1 file2` would rename `file1` to `file2` while `mv file1 ~` would move `file1` to the home directory | + | `cp` | Copy a file; for example, `cp file1 file2` would copy `file1` to `file2` while `cp file1 ~` would make a copy of `file1` in the home directory | + | `rm ` | Permanently remove a file | + | `rmdir ` | Permanently remove an empty directory | + | `rm -rf ` | ⚠ Permanently remove everything in a directory ⚠ | + | `grep ` | Search for a given term, for example `cat hello_world.txt | grep world` | + | `ls` | Basically, this means list stuff (files and folders) in the current directory | + | `ls -a` | List stuff in the current directory even if it's hidden | + | `ls -l` | List stuff in a more readable format and show permissions | + | `ls -S` | List stuff by size | + | `file ` | Give information on the file type of `` | + | `find` | Find specific files on your computer, can be piped into other commands for example `find \*.md -size +5k -type f | xargs wc -l`will count the number of lines`wc -l`of all files,`-type f`, ending in `.md`that are greater than 5 kilobytes in size,`-size +5k`. | + | `diff -u ` | Show a single summary of the differences between two files. | ![More details of the grep command](https://pbs.twimg.com/media/DcPeD_CW0AEkSar?format=jpg&name=small) _More details of the grep command, by [\@b0rk](https://twitter.com/b0rk)._ diff --git a/communicate-plots.ipynb b/communicate-plots.ipynb index f833d44..0f0f567 100644 --- a/communicate-plots.ipynb +++ b/communicate-plots.ipynb @@ -26,7 +26,7 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "id": "78eeea41", "metadata": {}, "outputs": [], @@ -42,51 +42,10 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": null, "id": "ae4a818a", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "\n", - "
\n", - " \n", - " " - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], + "outputs": [], "source": [ "import numpy as np\n", "import polars as pl\n", @@ -107,7 +66,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": null, "id": "c36b4cd5", "metadata": {}, "outputs": [], @@ -129,132 +88,10 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": null, "id": "c7574bc6", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - " " - ], - "text/plain": [ - "" - ] - }, - "execution_count": 4, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "(ggplot(mpg, aes(x=\"displ\", y=\"hwy\")) + geom_point())" ] @@ -279,162 +116,10 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": null, "id": "24b3513e", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - " " - ], - "text/plain": [ - "" - ] - }, - "execution_count": 5, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "(\n", " ggplot(mpg, aes(x=\"displ\", y=\"hwy\"))\n", @@ -462,165 +147,10 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": null, "id": "6489a6bf", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - " " - ], - "text/plain": [ - "" - ] - }, - "execution_count": 6, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "(\n", " ggplot(mpg, aes(x=\"displ\", y=\"hwy\"))\n", @@ -652,153 +182,14 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": null, "id": "683d547c", "metadata": { "tags": [ "remove-cell" ] }, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - " " - ], - "text/plain": [ - "" - ] - }, - "execution_count": 7, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "(\n", " ggplot(mpg, aes(x=\"cty\", y=\"hwy\", color=\"drv\", shape=\"drv\"))\n", @@ -840,40 +231,10 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": null, "id": "60826a32", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "shape: (3, 4)
drvhwydispldrive_type
strf64f64str
"f"28.162.56"front-wheel drive"
"r"21.05.18"rear-wheel drive"
"4"19.174.0"4-wheel drive"
" - ], - "text/plain": [ - "shape: (3, 4)\n", - "┌─────┬───────┬───────┬───────────────────┐\n", - "│ drv ┆ hwy ┆ displ ┆ drive_type │\n", - "│ --- ┆ --- ┆ --- ┆ --- │\n", - "│ str ┆ f64 ┆ f64 ┆ str │\n", - "╞═════╪═══════╪═══════╪═══════════════════╡\n", - "│ f ┆ 28.16 ┆ 2.56 ┆ front-wheel drive │\n", - "│ r ┆ 21.0 ┆ 5.18 ┆ rear-wheel drive │\n", - "│ 4 ┆ 19.17 ┆ 4.0 ┆ 4-wheel drive │\n", - "└─────┴───────┴───────┴───────────────────┘" - ] - }, - "execution_count": 8, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "mapping = {\n", " \"4\": \"4-wheel drive\",\n", @@ -902,183 +263,10 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": null, "id": "6f90c2aa", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - " " - ], - "text/plain": [ - "" - ] - }, - "execution_count": 9, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "(\n", " ggplot(mpg, aes(x=\"displ\", y=\"hwy\", color=\"drv\"))\n", @@ -1108,254 +296,10 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": null, "id": "bdcd79bb", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - " " - ], - "text/plain": [ - "" - ] - }, - "execution_count": 10, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "potential_outliers = mpg.filter(\n", " (pl.col(\"hwy\") > 40) | ((pl.col(\"hwy\") > 20) & (pl.col(\"displ\") > 5))\n", @@ -1392,21 +336,10 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": null, "id": "d1e2cc3a", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "'Larger engine sizes tend to\\nhave lower fuel economy.'" - ] - }, - "execution_count": 11, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "import textwrap\n", "\n", @@ -1417,162 +350,10 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": null, "id": "e8c09f57", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - " " - ], - "text/plain": [ - "" - ] - }, - "execution_count": 12, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "(\n", " ggplot(mpg, aes(x=\"displ\", y=\"hwy\"))\n", @@ -1704,137 +485,10 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": null, "id": "a95604d8", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - " " - ], - "text/plain": [ - "" - ] - }, - "execution_count": 13, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "(\n", " ggplot(mpg, aes(x=\"displ\", y=\"hwy\", color=\"drv\"))\n", @@ -1855,138 +509,10 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": null, "id": "1a852304", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - " " - ], - "text/plain": [ - "" - ] - }, - "execution_count": 14, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "(\n", " ggplot(mpg, aes(x=\"displ\", y=\"hwy\", color=\"drv\"))\n", @@ -2013,7 +539,7 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": null, "id": "40ac230e", "metadata": {}, "outputs": [], @@ -2033,158 +559,10 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": null, "id": "1520bb3c", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - " " - ], - "text/plain": [ - "" - ] - }, - "execution_count": 16, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "(\n", " ggplot(diamonds, aes(x=\"cut\", y=\"price\"))\n", @@ -2204,42 +582,10 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": null, "id": "9d1f993a", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "shape: (5, 6)
rownamesnamestartendpartyid
i64stri32i32stri64
1"Eisenhower"19531961"Republican"34
2"Kennedy"19611963"Democratic"35
3"Johnson"19631969"Democratic"36
4"Nixon"19691974"Republican"37
5"Ford"19741977"Republican"38
" - ], - "text/plain": [ - "shape: (5, 6)\n", - "┌──────────┬────────────┬───────┬──────┬────────────┬─────┐\n", - "│ rownames ┆ name ┆ start ┆ end ┆ party ┆ id │\n", - "│ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │\n", - "│ i64 ┆ str ┆ i32 ┆ i32 ┆ str ┆ i64 │\n", - "╞══════════╪════════════╪═══════╪══════╪════════════╪═════╡\n", - "│ 1 ┆ Eisenhower ┆ 1953 ┆ 1961 ┆ Republican ┆ 34 │\n", - "│ 2 ┆ Kennedy ┆ 1961 ┆ 1963 ┆ Democratic ┆ 35 │\n", - "│ 3 ┆ Johnson ┆ 1963 ┆ 1969 ┆ Democratic ┆ 36 │\n", - "│ 4 ┆ Nixon ┆ 1969 ┆ 1974 ┆ Republican ┆ 37 │\n", - "│ 5 ┆ Ford ┆ 1974 ┆ 1977 ┆ Republican ┆ 38 │\n", - "└──────────┴────────────┴───────┴──────┴────────────┴─────┘" - ] - }, - "execution_count": 17, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "presidential = pl.read_csv(\n", " \"https://vincentarelbundock.github.io/Rdatasets/csv/ggplot2/presidential.csv\",\n", @@ -2255,128 +601,10 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": null, "id": "7d88976d", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - " " - ], - "text/plain": [ - "" - ] - }, - "execution_count": 18, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "(\n", " ggplot(presidential, aes(x=\"start\", y=\"id\"))\n", @@ -2403,355 +631,10 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": null, "id": "52d6e86a", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - " " - ], - "text/plain": [ - "" - ] - }, - "execution_count": 19, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "base = ggplot(mpg, aes(x=\"displ\", y=\"hwy\")) + geom_point(aes(color=\"class\"))\n", "\n", @@ -2790,130 +673,10 @@ }, { "cell_type": "code", - "execution_count": 20, + "execution_count": null, "id": "2c1d3f8d", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - " " - ], - "text/plain": [ - "" - ] - }, - "execution_count": 20, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "(\n", " ggplot(\n", @@ -2934,136 +697,10 @@ }, { "cell_type": "code", - "execution_count": 21, + "execution_count": null, "id": "39b4ef8d", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - " " - ], - "text/plain": [ - "" - ] - }, - "execution_count": 21, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "(\n", " ggplot(diamonds, aes(x=\"carat\", y=\"price\"))\n", @@ -3083,272 +720,20 @@ }, { "cell_type": "code", - "execution_count": 22, + "execution_count": null, "id": "f06d7e40", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - " " - ], - "text/plain": [ - "" - ] - }, - "execution_count": 22, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "(ggplot(mpg, aes(x=\"displ\", y=\"hwy\")) + geom_point(aes(color=\"drv\")))" ] }, { "cell_type": "code", - "execution_count": 23, + "execution_count": null, "id": "6186b520", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - " " - ], - "text/plain": [ - "" - ] - }, - "execution_count": 23, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "(\n", " ggplot(mpg, aes(x=\"displ\", y=\"hwy\"))\n", @@ -3377,1235 +762,14 @@ }, { "cell_type": "code", - "execution_count": 24, + "execution_count": null, "id": "bd347524", "metadata": { "tags": [ "remove-input" ] }, - "outputs": [ - { - "data": { - "image/svg+xml": [ - "\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " 2026-04-28T13:34:54.100390\n", - " image/svg+xml\n", - " \n", - " \n", - " Matplotlib v3.10.0, https://matplotlib.org/\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "\n" - ], - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], + "outputs": [], "source": [ "# | echo: false\n", "cmaps = [\n", @@ -4707,1126 +871,14 @@ }, { "cell_type": "code", - "execution_count": 25, + "execution_count": null, "id": "d6350c71", "metadata": { "tags": [ "remove-input" ] }, - "outputs": [ - { - "data": { - "image/svg+xml": [ - "\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " 2026-04-28T13:34:54.622782\n", - " image/svg+xml\n", - " \n", - " \n", - " Matplotlib v3.10.0, https://matplotlib.org/\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "\n" - ], - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], + "outputs": [], "source": [ "# remove input\n", "for cmap_category, cmap_list in cmaps[3:4]:\n", @@ -5835,1003 +887,14 @@ }, { "cell_type": "code", - "execution_count": 26, + "execution_count": null, "id": "0063a574", "metadata": { "tags": [ "remove-input" ] }, - "outputs": [ - { - "data": { - "image/svg+xml": [ - "\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " 2026-04-28T13:34:54.717286\n", - " image/svg+xml\n", - " \n", - " \n", - " Matplotlib v3.10.0, https://matplotlib.org/\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "\n" - ], - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], + "outputs": [], "source": [ "# remove input\n", "for cmap_category, cmap_list in cmaps[2:3]:\n", @@ -6848,138 +911,10 @@ }, { "cell_type": "code", - "execution_count": 27, + "execution_count": null, "id": "9751058d", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - " " - ], - "text/plain": [ - "" - ] - }, - "execution_count": 27, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "mini_presid = presidential.slice(5)\n", "\n", @@ -7007,115 +942,10 @@ }, { "cell_type": "code", - "execution_count": 28, + "execution_count": null, "id": "644fd814", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - " " - ], - "text/plain": [ - "" - ] - }, - "execution_count": 28, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "prng = np.random.default_rng(1837) # prng=probabilistic random number generator\n", "df_rnd = pl.DataFrame(prng.standard_normal((1000, 2)), schema=[\"x\", \"y\"])\n", @@ -7149,147 +979,10 @@ }, { "cell_type": "code", - "execution_count": 29, + "execution_count": null, "id": "25a29f38", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - " " - ], - "text/plain": [ - "" - ] - }, - "execution_count": 29, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "(\n", " ggplot(mpg, aes(x=\"displ\", y=\"hwy\"))\n", @@ -7300,147 +993,10 @@ }, { "cell_type": "code", - "execution_count": 30, + "execution_count": null, "id": "42318a59", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - " " - ], - "text/plain": [ - "" - ] - }, - "execution_count": 30, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "mpg_condition = (\n", " (mpg[\"displ\"] >= 5) & (mpg[\"displ\"] <= 6) & (mpg[\"hwy\"] >= 10) & (mpg[\"hwy\"] <= 25)\n", @@ -7465,153 +1021,10 @@ }, { "cell_type": "code", - "execution_count": 31, + "execution_count": null, "id": "03001d5e", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - " " - ], - "text/plain": [ - "" - ] - }, - "execution_count": 31, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "(\n", " ggplot(mpg, aes(x=\"displ\", y=\"hwy\"))\n", @@ -7624,153 +1037,10 @@ }, { "cell_type": "code", - "execution_count": 32, + "execution_count": null, "id": "dc3bb833", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - " " - ], - "text/plain": [ - "" - ] - }, - "execution_count": 32, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "(\n", " ggplot(mpg, aes(x=\"displ\", y=\"hwy\"))\n", @@ -7791,134 +1061,10 @@ }, { "cell_type": "code", - "execution_count": 33, + "execution_count": null, "id": "aee538a8", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - " " - ], - "text/plain": [ - "" - ] - }, - "execution_count": 33, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "suv = mpg.filter(mpg[\"class\"] == \"suv\")\n", "compact = mpg.filter(mpg[\"class\"] == \"compact\")\n", @@ -7927,134 +1073,10 @@ }, { "cell_type": "code", - "execution_count": 34, + "execution_count": null, "id": "a82c8c23", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - " " - ], - "text/plain": [ - "" - ] - }, - "execution_count": 34, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "(ggplot(compact, aes(x=\"displ\", y=\"hwy\", color=\"drv\")) + geom_point())" ] @@ -8069,7 +1091,7 @@ }, { "cell_type": "code", - "execution_count": 35, + "execution_count": null, "id": "db6fce43", "metadata": {}, "outputs": [], @@ -8081,73 +1103,10 @@ }, { "cell_type": "code", - "execution_count": 36, + "execution_count": null, "id": "dd9e6606", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - " " - ], - "text/plain": [ - "" - ] - }, - "execution_count": 36, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "(\n", " ggplot(suv, aes(x=\"displ\", y=\"hwy\", color=\"drv\"))\n", @@ -8160,73 +1119,10 @@ }, { "cell_type": "code", - "execution_count": 37, + "execution_count": null, "id": "bdd8b2c5", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - " " - ], - "text/plain": [ - "" - ] - }, - "execution_count": 37, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "(\n", " ggplot(compact, aes(x=\"displ\", y=\"hwy\", color=\"drv\"))\n", @@ -8276,148 +1172,10 @@ }, { "cell_type": "code", - "execution_count": 38, + "execution_count": null, "id": "0b2364ca", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - " " - ], - "text/plain": [ - "" - ] - }, - "execution_count": 38, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "(\n", " ggplot(mpg, aes(x=\"displ\", y=\"hwy\"))\n", @@ -8439,159 +1197,10 @@ }, { "cell_type": "code", - "execution_count": 39, + "execution_count": null, "id": "67bfa9c8", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - " " - ], - "text/plain": [ - "" - ] - }, - "execution_count": 39, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "(\n", " ggplot(mpg, aes(x=\"displ\", color=\"drv\"))\n", @@ -8632,228 +1241,10 @@ }, { "cell_type": "code", - "execution_count": 40, + "execution_count": null, "id": "a8081df4", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - " " - ], - "text/plain": [ - "" - ] - }, - "execution_count": 40, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "p1 = ggplot(mpg, aes(x=\"displ\", y=\"hwy\")) + geom_point() + labs(title=\"Plot 1\")\n", "p2 = ggplot(mpg, aes(x=\"drv\", y=\"hwy\")) + geom_boxplot() + labs(title=\"Plot 2\")\n", @@ -8874,21 +1265,10 @@ }, { "cell_type": "code", - "execution_count": 41, + "execution_count": null, "id": "710a6a4f", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "'/Users/omagic/Documents/GitHub/python4DSpolars/chart.svg'" - ] - }, - "execution_count": 41, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "ggsave(p1, \"chart.svg\", path=\".\")" ] @@ -8903,25 +1283,17 @@ }, { "cell_type": "code", - "execution_count": 42, + "execution_count": null, "id": "bc831b1b", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "ls: stdout: Broken pipe\r\n" - ] - } - ], + "outputs": [], "source": [ "!ls | grep *.svg" ] }, { "cell_type": "code", - "execution_count": 43, + "execution_count": null, "id": "9cc10ab7", "metadata": { "tags": [ diff --git a/data-import.ipynb b/data-import.ipynb index 9369dde..9852811 100644 --- a/data-import.ipynb +++ b/data-import.ipynb @@ -18,7 +18,7 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "id": "1cf01bda", "metadata": {}, "outputs": [], @@ -54,30 +54,10 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": null, "id": "eca85c47", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Student ID,Full Name,favourite.food,mealPlan,AGE\r", - "\r\n", - "1,Sunil Huffmann,Strawberry yoghurt,Lunch only,4\r", - "\r\n", - "2,Barclay Lynn,French fries,Lunch only,5\r", - "\r\n", - "3,Jayendra Lyne,N/A,Breakfast and lunch,7\r", - "\r\n", - "4,Leon Rossini,Anchovies,Lunch only,8\r", - "\r\n", - "5,Chidiegwu Dunkel,Pizza,Breakfast and lunch,five\r", - "\r\n", - "6,Güvenç Attila,Ice cream,Lunch only,6" - ] - } - ], + "outputs": [], "source": [ "! cat data/students.csv" ] @@ -92,43 +72,10 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": null, "id": "232fdfef", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "shape: (6, 5)
Student IDFull Namefavourite.foodmealPlanAGE
i64strstrstrstr
1"Sunil Huffmann""Strawberry yoghurt""Lunch only""4"
2"Barclay Lynn""French fries""Lunch only""5"
3"Jayendra Lyne""N/A""Breakfast and lunch""7"
4"Leon Rossini""Anchovies""Lunch only""8"
5"Chidiegwu Dunkel""Pizza""Breakfast and lunch""five"
6"Güvenç Attila""Ice cream""Lunch only""6"
" - ], - "text/plain": [ - "shape: (6, 5)\n", - "┌────────────┬──────────────────┬────────────────────┬─────────────────────┬──────┐\n", - "│ Student ID ┆ Full Name ┆ favourite.food ┆ mealPlan ┆ AGE │\n", - "│ --- ┆ --- ┆ --- ┆ --- ┆ --- │\n", - "│ i64 ┆ str ┆ str ┆ str ┆ str │\n", - "╞════════════╪══════════════════╪════════════════════╪═════════════════════╪══════╡\n", - "│ 1 ┆ Sunil Huffmann ┆ Strawberry yoghurt ┆ Lunch only ┆ 4 │\n", - "│ 2 ┆ Barclay Lynn ┆ French fries ┆ Lunch only ┆ 5 │\n", - "│ 3 ┆ Jayendra Lyne ┆ N/A ┆ Breakfast and lunch ┆ 7 │\n", - "│ 4 ┆ Leon Rossini ┆ Anchovies ┆ Lunch only ┆ 8 │\n", - "│ 5 ┆ Chidiegwu Dunkel ┆ Pizza ┆ Breakfast and lunch ┆ five │\n", - "│ 6 ┆ Güvenç Attila ┆ Ice cream ┆ Lunch only ┆ 6 │\n", - "└────────────┴──────────────────┴────────────────────┴─────────────────────┴──────┘" - ] - }, - "execution_count": 3, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "students = pl.read_csv(\"data/students.csv\")\n", "students" @@ -186,43 +133,10 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": null, "id": "51969364", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "shape: (6, 5)
student_idfull_namefavourite_foodmeal_planage
i64strstrstrstr
1"Sunil Huffmann""Strawberry yoghurt""Lunch only""4"
2"Barclay Lynn""French fries""Lunch only""5"
3"Jayendra Lyne""N/A""Breakfast and lunch""7"
4"Leon Rossini""Anchovies""Lunch only""8"
5"Chidiegwu Dunkel""Pizza""Breakfast and lunch""five"
6"Güvenç Attila""Ice cream""Lunch only""6"
" - ], - "text/plain": [ - "shape: (6, 5)\n", - "┌────────────┬──────────────────┬────────────────────┬─────────────────────┬──────┐\n", - "│ student_id ┆ full_name ┆ favourite_food ┆ meal_plan ┆ age │\n", - "│ --- ┆ --- ┆ --- ┆ --- ┆ --- │\n", - "│ i64 ┆ str ┆ str ┆ str ┆ str │\n", - "╞════════════╪══════════════════╪════════════════════╪═════════════════════╪══════╡\n", - "│ 1 ┆ Sunil Huffmann ┆ Strawberry yoghurt ┆ Lunch only ┆ 4 │\n", - "│ 2 ┆ Barclay Lynn ┆ French fries ┆ Lunch only ┆ 5 │\n", - "│ 3 ┆ Jayendra Lyne ┆ N/A ┆ Breakfast and lunch ┆ 7 │\n", - "│ 4 ┆ Leon Rossini ┆ Anchovies ┆ Lunch only ┆ 8 │\n", - "│ 5 ┆ Chidiegwu Dunkel ┆ Pizza ┆ Breakfast and lunch ┆ five │\n", - "│ 6 ┆ Güvenç Attila ┆ Ice cream ┆ Lunch only ┆ 6 │\n", - "└────────────┴──────────────────┴────────────────────┴─────────────────────┴──────┘" - ] - }, - "execution_count": 4, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "from skimpy import clean_columns\n", "\n", @@ -240,40 +154,10 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": null, "id": "f3c31e4a", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "shape: (6,)
age
i64
4
5
7
8
5
6
" - ], - "text/plain": [ - "shape: (6,)\n", - "Series: 'age' [i64]\n", - "[\n", - "\t4\n", - "\t5\n", - "\t7\n", - "\t8\n", - "\t5\n", - "\t6\n", - "]" - ] - }, - "execution_count": 5, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "students = students.with_columns(pl.col(\"age\").replace(\"five\", 5).cast(pl.Int64))\n", "students[\"age\"]" @@ -297,40 +181,10 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": null, "id": "678fdd2d", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "shape: (6,)
meal_plan
cat
"Lunch only"
"Lunch only"
"Breakfast and lunch"
"Lunch only"
"Breakfast and lunch"
"Lunch only"
" - ], - "text/plain": [ - "shape: (6,)\n", - "Series: 'meal_plan' [cat]\n", - "[\n", - "\t\"Lunch only\"\n", - "\t\"Lunch only\"\n", - "\t\"Breakfast and lunch\"\n", - "\t\"Lunch only\"\n", - "\t\"Breakfast and lunch\"\n", - "\t\"Lunch only\"\n", - "]" - ] - }, - "execution_count": 6, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "students = students.with_columns(pl.col(\"meal_plan\").cast(pl.Categorical))\n", "students[\"meal_plan\"]" @@ -348,25 +202,10 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": null, "id": "f54108d3", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "Schema([('student_id', Int32),\n", - " ('full_name', String),\n", - " ('favourite_food', String),\n", - " ('meal_plan', Categorical(ordering='physical')),\n", - " ('age', Int64)])" - ] - }, - "execution_count": 7, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "students = students.cast(\n", " {\"student_id\": pl.Int32, \"full_name\": pl.String, \"age\": pl.Int64}\n", @@ -404,48 +243,10 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": null, "id": "b80b958b", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "shape: (19, 5)
monthyearbranditemn
stri64i64i64i64
"January"2019112343
"January"2019187219
"January"2019118222
"January"2019233331
"January"2019221569
"March"2019136271
"March"2019188203
"March"2019272531
"March"2019287663
"March"2019282886
" - ], - "text/plain": [ - "shape: (19, 5)\n", - "┌─────────┬──────┬───────┬──────┬─────┐\n", - "│ month ┆ year ┆ brand ┆ item ┆ n │\n", - "│ --- ┆ --- ┆ --- ┆ --- ┆ --- │\n", - "│ str ┆ i64 ┆ i64 ┆ i64 ┆ i64 │\n", - "╞═════════╪══════╪═══════╪══════╪═════╡\n", - "│ January ┆ 2019 ┆ 1 ┆ 1234 ┆ 3 │\n", - "│ January ┆ 2019 ┆ 1 ┆ 8721 ┆ 9 │\n", - "│ January ┆ 2019 ┆ 1 ┆ 1822 ┆ 2 │\n", - "│ January ┆ 2019 ┆ 2 ┆ 3333 ┆ 1 │\n", - "│ January ┆ 2019 ┆ 2 ┆ 2156 ┆ 9 │\n", - "│ … ┆ … ┆ … ┆ … ┆ … │\n", - "│ March ┆ 2019 ┆ 1 ┆ 3627 ┆ 1 │\n", - "│ March ┆ 2019 ┆ 1 ┆ 8820 ┆ 3 │\n", - "│ March ┆ 2019 ┆ 2 ┆ 7253 ┆ 1 │\n", - "│ March ┆ 2019 ┆ 2 ┆ 8766 ┆ 3 │\n", - "│ March ┆ 2019 ┆ 2 ┆ 8288 ┆ 6 │\n", - "└─────────┴──────┴───────┴──────┴─────┘" - ] - }, - "execution_count": 8, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "list_of_dataframes = [\n", " pl.read_csv(x)\n", @@ -465,57 +266,10 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": null, "id": "4a92056c", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "List of csvs is:\n", - "['data/03-sales.csv', 'data/02-sales.csv', 'data/01-sales.csv'] \n", - "\n" - ] - }, - { - "data": { - "text/html": [ - "
\n", - "shape: (19, 5)
monthyearbranditemn
stri64i64i64i64
"March"2019112343
"March"2019136271
"March"2019188203
"March"2019272531
"March"2019287663
"January"2019118222
"January"2019233331
"January"2019221569
"January"2019239876
"January"2019238276
" - ], - "text/plain": [ - "shape: (19, 5)\n", - "┌─────────┬──────┬───────┬──────┬─────┐\n", - "│ month ┆ year ┆ brand ┆ item ┆ n │\n", - "│ --- ┆ --- ┆ --- ┆ --- ┆ --- │\n", - "│ str ┆ i64 ┆ i64 ┆ i64 ┆ i64 │\n", - "╞═════════╪══════╪═══════╪══════╪═════╡\n", - "│ March ┆ 2019 ┆ 1 ┆ 1234 ┆ 3 │\n", - "│ March ┆ 2019 ┆ 1 ┆ 3627 ┆ 1 │\n", - "│ March ┆ 2019 ┆ 1 ┆ 8820 ┆ 3 │\n", - "│ March ┆ 2019 ┆ 2 ┆ 7253 ┆ 1 │\n", - "│ March ┆ 2019 ┆ 2 ┆ 8766 ┆ 3 │\n", - "│ … ┆ … ┆ … ┆ … ┆ … │\n", - "│ January ┆ 2019 ┆ 1 ┆ 1822 ┆ 2 │\n", - "│ January ┆ 2019 ┆ 2 ┆ 3333 ┆ 1 │\n", - "│ January ┆ 2019 ┆ 2 ┆ 2156 ┆ 9 │\n", - "│ January ┆ 2019 ┆ 2 ┆ 3987 ┆ 6 │\n", - "│ January ┆ 2019 ┆ 2 ┆ 3827 ┆ 6 │\n", - "└─────────┴──────┴───────┴──────┴─────┘" - ] - }, - "execution_count": 9, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "import glob\n", "\n", @@ -540,7 +294,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": null, "id": "0bc97749", "metadata": {}, "outputs": [], @@ -558,25 +312,10 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": null, "id": "542c5223", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "Schema([('student_id', Int64),\n", - " ('full_name', String),\n", - " ('favourite_food', String),\n", - " ('meal_plan', String),\n", - " ('age', Int64)])" - ] - }, - "execution_count": 11, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "pl.read_csv(\"data/students-clean.csv\").schema" ] @@ -597,7 +336,7 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": null, "id": "16c6ca1b", "metadata": {}, "outputs": [], @@ -615,25 +354,10 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": null, "id": "bfd5104f", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "Schema([('student_id', Int32),\n", - " ('full_name', String),\n", - " ('favourite_food', String),\n", - " ('meal_plan', Categorical(ordering='physical')),\n", - " ('age', Int64)])" - ] - }, - "execution_count": 13, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "pl.read_ipc(\"data/students-clean.feather\").schema" ] @@ -652,7 +376,7 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": null, "id": "227f7c50", "metadata": { "tags": [ diff --git a/data-tidy.ipynb b/data-tidy.ipynb index 903b0a0..157081f 100644 --- a/data-tidy.ipynb +++ b/data-tidy.ipynb @@ -18,7 +18,7 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "id": "51a55374", "metadata": { "tags": [ @@ -111,96 +111,10 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": null, "id": "0f9fbf5a", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - " Unmelted: \n", - " first last job height weight\n", - "0 John Doe Nurse 5.5 130\n", - "1 Mary Bo Economist 6.0 150\n", - "\n", - " Melted: \n" - ] - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
firstlastquantityvalue
0JohnDoeheight5.5
1MaryBoheight6.0
2JohnDoeweight130.0
3MaryBoweight150.0
\n", - "
" - ], - "text/plain": [ - " first last quantity value\n", - "0 John Doe height 5.5\n", - "1 Mary Bo height 6.0\n", - "2 John Doe weight 130.0\n", - "3 Mary Bo weight 150.0" - ] - }, - "execution_count": 2, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "import pandas as pd\n", "\n", @@ -235,71 +149,10 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": null, "id": "bfa121cf", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
country19992000
0Afghanistan745.02666.0
1Brazil37737.080488.0
2China212258.0213766.0
\n", - "
" - ], - "text/plain": [ - " country 1999 2000\n", - "0 Afghanistan 745.0 2666.0\n", - "1 Brazil 37737.0 80488.0\n", - "2 China 212258.0 213766.0" - ] - }, - "execution_count": 3, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "df_tb = pd.read_parquet(\n", " \"https://github.com/aeturrell/python4DS/raw/refs/heads/main/data/who_tb_cases.parquet\"\n", @@ -317,92 +170,10 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": null, "id": "dc03ccd9", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
countryyearcases
0Afghanistan1999745.0
1Brazil199937737.0
2China1999212258.0
3Afghanistan20002666.0
4Brazil200080488.0
5China2000213766.0
\n", - "
" - ], - "text/plain": [ - " country year cases\n", - "0 Afghanistan 1999 745.0\n", - "1 Brazil 1999 37737.0\n", - "2 China 1999 212258.0\n", - "3 Afghanistan 2000 2666.0\n", - "4 Brazil 2000 80488.0\n", - "5 China 2000 213766.0" - ] - }, - "execution_count": 4, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "df_tb.melt(\n", " id_vars=[\"country\"],\n", @@ -432,83 +203,10 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": null, "id": "293768c1", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
A1970A1980B1970B1980Xid
0ad2.53.22.2900010
1be1.21.3-0.5127471
2cf0.70.1-2.3662972
\n", - "
" - ], - "text/plain": [ - " A1970 A1980 B1970 B1980 X id\n", - "0 a d 2.5 3.2 2.290001 0\n", - "1 b e 1.2 1.3 -0.512747 1\n", - "2 c f 0.7 0.1 -2.366297 2" - ] - }, - "execution_count": 5, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "import numpy as np\n", "\n", @@ -535,107 +233,10 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": null, "id": "a9ca2fa8", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
XAB
idyear
019702.290001a2.5
11970-0.512747b1.2
21970-2.366297c0.7
019802.290001d3.2
11980-0.512747e1.3
21980-2.366297f0.1
\n", - "
" - ], - "text/plain": [ - " X A B\n", - "id year \n", - "0 1970 2.290001 a 2.5\n", - "1 1970 -0.512747 b 1.2\n", - "2 1970 -2.366297 c 0.7\n", - "0 1980 2.290001 d 3.2\n", - "1 1980 -0.512747 e 1.3\n", - "2 1980 -2.366297 f 0.1" - ] - }, - "execution_count": 6, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "pd.wide_to_long(df, stubnames=[\"A\", \"B\"], i=\"id\", j=\"year\")" ] @@ -660,109 +261,10 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": null, "id": "2b791dd1", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
AB
firstsecond
barone1.3129600.427839
two-0.070985-0.738495
bazone1.042258-0.430793
two0.511735-0.782214
fooone-0.847108-1.179077
two1.0210641.015834
quxone-1.194002-0.313362
two2.226642-0.898217
\n", - "
" - ], - "text/plain": [ - " A B\n", - "first second \n", - "bar one 1.312960 0.427839\n", - " two -0.070985 -0.738495\n", - "baz one 1.042258 -0.430793\n", - " two 0.511735 -0.782214\n", - "foo one -0.847108 -1.179077\n", - " two 1.021064 1.015834\n", - "qux one -1.194002 -0.313362\n", - " two 2.226642 -0.898217" - ] - }, - "execution_count": 7, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "tuples = list(\n", " zip(\n", @@ -787,38 +289,10 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": null, "id": "d25eb012", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "first second \n", - "bar one A 1.312960\n", - " B 0.427839\n", - " two A -0.070985\n", - " B -0.738495\n", - "baz one A 1.042258\n", - " B -0.430793\n", - " two A 0.511735\n", - " B -0.782214\n", - "foo one A -0.847108\n", - " B -1.179077\n", - " two A 1.021064\n", - " B 1.015834\n", - "qux one A -1.194002\n", - " B -0.313362\n", - " two A 2.226642\n", - " B -0.898217\n", - "dtype: float64" - ] - }, - "execution_count": 8, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "df = df.stack()\n", "df" @@ -846,95 +320,10 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": null, "id": "b6742a54", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
firstbarbazfooqux
second
oneA1.3129601.042258-0.847108-1.194002
B0.427839-0.430793-1.179077-0.313362
twoA-0.0709850.5117351.0210642.226642
B-0.738495-0.7822141.015834-0.898217
\n", - "
" - ], - "text/plain": [ - "first bar baz foo qux\n", - "second \n", - "one A 1.312960 1.042258 -0.847108 -1.194002\n", - " B 0.427839 -0.430793 -1.179077 -0.313362\n", - "two A -0.070985 0.511735 1.021064 2.226642\n", - " B -0.738495 -0.782214 1.015834 -0.898217" - ] - }, - "execution_count": 9, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "df.unstack(level=0)" ] @@ -971,91 +360,10 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": null, "id": "fa612456", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
countryyeartypecount
0Afghanistan1999-01-01cases745
1Afghanistan1999-01-01population19987071
2Afghanistan2000-01-01cases2666
3Afghanistan2000-01-01population20595360
4Brazil1999-01-01cases37737
\n", - "
" - ], - "text/plain": [ - " country year type count\n", - "0 Afghanistan 1999-01-01 cases 745\n", - "1 Afghanistan 1999-01-01 population 19987071\n", - "2 Afghanistan 2000-01-01 cases 2666\n", - "3 Afghanistan 2000-01-01 population 20595360\n", - "4 Brazil 1999-01-01 cases 37737" - ] - }, - "execution_count": 10, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "df_tb_cp = pd.read_parquet(\n", " \"https://github.com/aeturrell/python4DS/raw/refs/heads/main/data/who_tb_case_and_pop.parquet\"\n", @@ -1081,99 +389,10 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": null, "id": "e584cf37", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
typecountryyearcasespopulation
0Afghanistan1999-01-0174519987071
1Afghanistan2000-01-01266620595360
2Brazil1999-01-0137737172006362
3Brazil2000-01-0180488174504898
4China1999-01-012122581272915272
5China2000-01-012137661280428583
\n", - "
" - ], - "text/plain": [ - "type country year cases population\n", - "0 Afghanistan 1999-01-01 745 19987071\n", - "1 Afghanistan 2000-01-01 2666 20595360\n", - "2 Brazil 1999-01-01 37737 172006362\n", - "3 Brazil 2000-01-01 80488 174504898\n", - "4 China 1999-01-01 212258 1272915272\n", - "5 China 2000-01-01 213766 1280428583" - ] - }, - "execution_count": 11, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "pivoted = df_tb_cp.pivot(\n", " index=[\"country\", \"year\"], columns=[\"type\"], values=\"count\"\n", @@ -1191,85 +410,10 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": null, "id": "97c6d139", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
datevariablevalue
12000-02-29A0.799993
22000-03-31A0.247382
92000-10-31A0.685868
182000-09-30B-0.911245
42000-05-31A0.874574
\n", - "
" - ], - "text/plain": [ - " date variable value\n", - "1 2000-02-29 A 0.799993\n", - "2 2000-03-31 A 0.247382\n", - "9 2000-10-31 A 0.685868\n", - "18 2000-09-30 B -0.911245\n", - "4 2000-05-31 A 0.874574" - ] - }, - "execution_count": 12, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "import numpy as np\n", "\n", @@ -1295,115 +439,10 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": null, "id": "04f2bd28", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
variableAB
date
2000-01-31NaNNaN
2000-02-290.7116320.869024
2000-03-310.799993-0.276892
2000-04-300.247382-1.172654
2000-05-31-1.545182-1.452367
2000-06-300.874574-2.377642
2000-07-31-0.735886-2.036017
2000-08-310.0138172.211417
2000-09-30-1.401537-0.896416
2000-10-310.063176-0.911245
\n", - "
" - ], - "text/plain": [ - "variable A B\n", - "date \n", - "2000-01-31 NaN NaN\n", - "2000-02-29 0.711632 0.869024\n", - "2000-03-31 0.799993 -0.276892\n", - "2000-04-30 0.247382 -1.172654\n", - "2000-05-31 -1.545182 -1.452367\n", - "2000-06-30 0.874574 -2.377642\n", - "2000-07-31 -0.735886 -2.036017\n", - "2000-08-31 0.013817 2.211417\n", - "2000-09-30 -1.401537 -0.896416\n", - "2000-10-31 0.063176 -0.911245" - ] - }, - "execution_count": 13, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "df.pivot(index=\"date\", columns=\"variable\", values=\"value\").shift(1)" ] diff --git a/data-transform.ipynb b/data-transform.ipynb index 03eed1b..8dfc2c1 100644 --- a/data-transform.ipynb +++ b/data-transform.ipynb @@ -24,7 +24,7 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "id": "04dcb195", "metadata": {}, "outputs": [], @@ -44,21 +44,10 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": null, "id": "09eb2e2e", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "'1.19.0'" - ] - }, - "execution_count": 2, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "pl.__version__" ] @@ -77,7 +66,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": null, "id": "eff283e8", "metadata": {}, "outputs": [], @@ -108,42 +97,10 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": null, "id": "39f99d76", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "shape: (5, 19)
yearmonthdaydep_timesched_dep_timedep_delayarr_timesched_arr_timearr_delaycarrierflighttailnumorigindestair_timedistancehourminutetime_hour
i64i64i64i64i64i64i64i64i64stri64strstrstri64i64i64i64str
201311517515283081911"UA"1545"N14228""EWR""IAH"2271400515"2013-01-01T10:00:00Z"
201311533529485083020"UA"1714"N24211""LGA""IAH"2271416529"2013-01-01T10:00:00Z"
201311542540292385033"AA"1141"N619AA""JFK""MIA"1601089540"2013-01-01T10:00:00Z"
201311544545-110041022-18"B6"725"N804JB""JFK""BQN"1831576545"2013-01-01T10:00:00Z"
201311554600-6812837-25"DL"461"N668DN""LGA""ATL"11676260"2013-01-01T11:00:00Z"
" - ], - "text/plain": [ - "shape: (5, 19)\n", - "┌──────┬───────┬─────┬──────────┬───┬──────────┬──────┬────────┬──────────────────────┐\n", - "│ year ┆ month ┆ day ┆ dep_time ┆ … ┆ distance ┆ hour ┆ minute ┆ time_hour │\n", - "│ --- ┆ --- ┆ --- ┆ --- ┆ ┆ --- ┆ --- ┆ --- ┆ --- │\n", - "│ i64 ┆ i64 ┆ i64 ┆ i64 ┆ ┆ i64 ┆ i64 ┆ i64 ┆ str │\n", - "╞══════╪═══════╪═════╪══════════╪═══╪══════════╪══════╪════════╪══════════════════════╡\n", - "│ 2013 ┆ 1 ┆ 1 ┆ 517 ┆ … ┆ 1400 ┆ 5 ┆ 15 ┆ 2013-01-01T10:00:00Z │\n", - "│ 2013 ┆ 1 ┆ 1 ┆ 533 ┆ … ┆ 1416 ┆ 5 ┆ 29 ┆ 2013-01-01T10:00:00Z │\n", - "│ 2013 ┆ 1 ┆ 1 ┆ 542 ┆ … ┆ 1089 ┆ 5 ┆ 40 ┆ 2013-01-01T10:00:00Z │\n", - "│ 2013 ┆ 1 ┆ 1 ┆ 544 ┆ … ┆ 1576 ┆ 5 ┆ 45 ┆ 2013-01-01T10:00:00Z │\n", - "│ 2013 ┆ 1 ┆ 1 ┆ 554 ┆ … ┆ 762 ┆ 6 ┆ 0 ┆ 2013-01-01T11:00:00Z │\n", - "└──────┴───────┴─────┴──────────┴───┴──────────┴──────┴────────┴──────────────────────┘" - ] - }, - "execution_count": 4, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "flights.head()" ] @@ -158,39 +115,10 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": null, "id": "95dea97b", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Rows: 336776\n", - "Columns: 19\n", - "$ year 2013, 2013, 2013, 2013, 2013\n", - "$ month 1, 1, 1, 1, 1\n", - "$ day 1, 1, 1, 1, 1\n", - "$ dep_time 517, 533, 542, 544, 554\n", - "$ sched_dep_time 515, 529, 540, 545, 600\n", - "$ dep_delay 2, 4, 2, -1, -6\n", - "$ arr_time 830, 850, 923, 1004, 812\n", - "$ sched_arr_time 819, 830, 850, 1022, 837\n", - "$ arr_delay 11, 20, 33, -18, -25\n", - "$ carrier 'UA', 'UA', 'AA', 'B6', 'DL'\n", - "$ flight 1545, 1714, 1141, 725, 461\n", - "$ tailnum 'N14228', 'N24211', 'N619AA', 'N804JB', 'N668DN'\n", - "$ origin 'EWR', 'LGA', 'JFK', 'JFK', 'LGA'\n", - "$ dest 'IAH', 'IAH', 'MIA', 'BQN', 'ATL'\n", - "$ air_time 227, 227, 160, 183, 116\n", - "$ distance 1400, 1416, 1089, 1576, 762\n", - "$ hour 5, 5, 5, 5, 6\n", - "$ minute 15, 29, 40, 45, 0\n", - "$ time_hour '2013-01-01T10:00:00Z', '2013-01-01T10:00:00Z', '2013-01-01T10:00:00Z', '2013-01-01T10:00:00Z', '2013-01-01T11:00:00Z'\n", - "\n" - ] - } - ], + "outputs": [], "source": [ "flights.glimpse(max_items_per_column=5)" ] @@ -221,93 +149,20 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": null, "id": "ffb275b0", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "shape: (336_776,)
time_hour
str
"2013-01-01T10:00:00Z"
"2013-01-01T10:00:00Z"
"2013-01-01T10:00:00Z"
"2013-01-01T10:00:00Z"
"2013-01-01T11:00:00Z"
"2013-09-30T18:00:00Z"
"2013-10-01T02:00:00Z"
"2013-09-30T16:00:00Z"
"2013-09-30T15:00:00Z"
"2013-09-30T12:00:00Z"
" - ], - "text/plain": [ - "shape: (336_776,)\n", - "Series: 'time_hour' [str]\n", - "[\n", - "\t\"2013-01-01T10:00:00Z\"\n", - "\t\"2013-01-01T10:00:00Z\"\n", - "\t\"2013-01-01T10:00:00Z\"\n", - "\t\"2013-01-01T10:00:00Z\"\n", - "\t\"2013-01-01T11:00:00Z\"\n", - "\t…\n", - "\t\"2013-09-30T18:00:00Z\"\n", - "\t\"2013-10-01T02:00:00Z\"\n", - "\t\"2013-09-30T16:00:00Z\"\n", - "\t\"2013-09-30T15:00:00Z\"\n", - "\t\"2013-09-30T12:00:00Z\"\n", - "]" - ] - }, - "execution_count": 6, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "flights.get_column(\"time_hour\")" ] }, { "cell_type": "code", - "execution_count": 7, + "execution_count": null, "id": "88a8b983", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "shape: (336_776, 19)
yearmonthdaydep_timesched_dep_timedep_delayarr_timesched_arr_timearr_delaycarrierflighttailnumorigindestair_timedistancehourminutetime_hour
i64i64i64i64i64i64i64i64i64stri64strstrstri64i64i64i64datetime[μs, UTC]
201311517515283081911"UA"1545"N14228""EWR""IAH"22714005152013-01-01 10:00:00 UTC
201311533529485083020"UA"1714"N24211""LGA""IAH"22714165292013-01-01 10:00:00 UTC
201311542540292385033"AA"1141"N619AA""JFK""MIA"16010895402013-01-01 10:00:00 UTC
201311544545-110041022-18"B6"725"N804JB""JFK""BQN"18315765452013-01-01 10:00:00 UTC
201311554600-6812837-25"DL"461"N668DN""LGA""ATL"116762602013-01-01 11:00:00 UTC
2013930null1455nullnull1634null"9E"3393null"JFK""DCA"null21314552013-09-30 18:00:00 UTC
2013930null2200nullnull2312null"9E"3525null"LGA""SYR"null1982202013-10-01 02:00:00 UTC
2013930null1210nullnull1330null"MQ"3461"N535MQ""LGA""BNA"null76412102013-09-30 16:00:00 UTC
2013930null1159nullnull1344null"MQ"3572"N511MQ""LGA""CLE"null41911592013-09-30 15:00:00 UTC
2013930null840nullnull1020null"MQ"3531"N839MQ""LGA""RDU"null4318402013-09-30 12:00:00 UTC
" - ], - "text/plain": [ - "shape: (336_776, 19)\n", - "┌──────┬───────┬─────┬──────────┬───┬──────────┬──────┬────────┬─────────────────────────┐\n", - "│ year ┆ month ┆ day ┆ dep_time ┆ … ┆ distance ┆ hour ┆ minute ┆ time_hour │\n", - "│ --- ┆ --- ┆ --- ┆ --- ┆ ┆ --- ┆ --- ┆ --- ┆ --- │\n", - "│ i64 ┆ i64 ┆ i64 ┆ i64 ┆ ┆ i64 ┆ i64 ┆ i64 ┆ datetime[μs, UTC] │\n", - "╞══════╪═══════╪═════╪══════════╪═══╪══════════╪══════╪════════╪═════════════════════════╡\n", - "│ 2013 ┆ 1 ┆ 1 ┆ 517 ┆ … ┆ 1400 ┆ 5 ┆ 15 ┆ 2013-01-01 10:00:00 UTC │\n", - "│ 2013 ┆ 1 ┆ 1 ┆ 533 ┆ … ┆ 1416 ┆ 5 ┆ 29 ┆ 2013-01-01 10:00:00 UTC │\n", - "│ 2013 ┆ 1 ┆ 1 ┆ 542 ┆ … ┆ 1089 ┆ 5 ┆ 40 ┆ 2013-01-01 10:00:00 UTC │\n", - "│ 2013 ┆ 1 ┆ 1 ┆ 544 ┆ … ┆ 1576 ┆ 5 ┆ 45 ┆ 2013-01-01 10:00:00 UTC │\n", - "│ 2013 ┆ 1 ┆ 1 ┆ 554 ┆ … ┆ 762 ┆ 6 ┆ 0 ┆ 2013-01-01 11:00:00 UTC │\n", - "│ … ┆ … ┆ … ┆ … ┆ … ┆ … ┆ … ┆ … ┆ … │\n", - "│ 2013 ┆ 9 ┆ 30 ┆ null ┆ … ┆ 213 ┆ 14 ┆ 55 ┆ 2013-09-30 18:00:00 UTC │\n", - "│ 2013 ┆ 9 ┆ 30 ┆ null ┆ … ┆ 198 ┆ 22 ┆ 0 ┆ 2013-10-01 02:00:00 UTC │\n", - "│ 2013 ┆ 9 ┆ 30 ┆ null ┆ … ┆ 764 ┆ 12 ┆ 10 ┆ 2013-09-30 16:00:00 UTC │\n", - "│ 2013 ┆ 9 ┆ 30 ┆ null ┆ … ┆ 419 ┆ 11 ┆ 59 ┆ 2013-09-30 15:00:00 UTC │\n", - "│ 2013 ┆ 9 ┆ 30 ┆ null ┆ … ┆ 431 ┆ 8 ┆ 40 ┆ 2013-09-30 12:00:00 UTC │\n", - "└──────┴───────┴─────┴──────────┴───┴──────────┴──────┴────────┴─────────────────────────┘" - ] - }, - "execution_count": 7, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "flights.with_columns(pl.col(\"time_hour\").str.to_datetime())" ] @@ -332,41 +187,10 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": null, "id": "1b6bd8b1", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "shape: (4, 5)
col0col1col2col3col4
i64i64i64strstr
000"a""alpha"
000"b""gamma"
000"b""gamma"
000"a""gamma"
" - ], - "text/plain": [ - "shape: (4, 5)\n", - "┌──────┬──────┬──────┬──────┬───────┐\n", - "│ col0 ┆ col1 ┆ col2 ┆ col3 ┆ col4 │\n", - "│ --- ┆ --- ┆ --- ┆ --- ┆ --- │\n", - "│ i64 ┆ i64 ┆ i64 ┆ str ┆ str │\n", - "╞══════╪══════╪══════╪══════╪═══════╡\n", - "│ 0 ┆ 0 ┆ 0 ┆ a ┆ alpha │\n", - "│ 0 ┆ 0 ┆ 0 ┆ b ┆ gamma │\n", - "│ 0 ┆ 0 ┆ 0 ┆ b ┆ gamma │\n", - "│ 0 ┆ 0 ┆ 0 ┆ a ┆ gamma │\n", - "└──────┴──────┴──────┴──────┴───────┘" - ] - }, - "execution_count": 8, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "df = pl.DataFrame(\n", " data={\n", @@ -405,48 +229,10 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": null, "id": "cb114649", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "shape: (365, 4)
yearmonthdayarr_delay
i64i64i64f64
2013102-19.47619
201342630.380952
20131129-25.384615
2013430-9.52381
201381614.05
20135151.095238
20137912.714286
2013531-6.52381
201311235.375
2013416-1.736842
" - ], - "text/plain": [ - "shape: (365, 4)\n", - "┌──────┬───────┬─────┬────────────┐\n", - "│ year ┆ month ┆ day ┆ arr_delay │\n", - "│ --- ┆ --- ┆ --- ┆ --- │\n", - "│ i64 ┆ i64 ┆ i64 ┆ f64 │\n", - "╞══════╪═══════╪═════╪════════════╡\n", - "│ 2013 ┆ 10 ┆ 2 ┆ -19.47619 │\n", - "│ 2013 ┆ 4 ┆ 26 ┆ 30.380952 │\n", - "│ 2013 ┆ 11 ┆ 29 ┆ -25.384615 │\n", - "│ 2013 ┆ 4 ┆ 30 ┆ -9.52381 │\n", - "│ 2013 ┆ 8 ┆ 16 ┆ 14.05 │\n", - "│ … ┆ … ┆ … ┆ … │\n", - "│ 2013 ┆ 5 ┆ 15 ┆ 1.095238 │\n", - "│ 2013 ┆ 7 ┆ 9 ┆ 12.714286 │\n", - "│ 2013 ┆ 5 ┆ 31 ┆ -6.52381 │\n", - "│ 2013 ┆ 11 ┆ 23 ┆ 5.375 │\n", - "│ 2013 ┆ 4 ┆ 16 ┆ -1.736842 │\n", - "└──────┴───────┴─────┴────────────┘" - ] - }, - "execution_count": 9, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "flights.filter(pl.col(\"dest\") == \"IAH\").group_by([\"year\", \"month\", \"day\"]).agg(\n", " pl.col(\"arr_delay\").mean()\n", @@ -481,43 +267,10 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": null, "id": "3958ddb5", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "shape: (6, 7)
col0col1col2col3col4col5col6
i64i64i64i64i64i64str
012345"apple"
67891011"orange"
121314151617"pineapple"
181920212223"mango"
242526272829"kiwi"
303132333435"lemon"
" - ], - "text/plain": [ - "shape: (6, 7)\n", - "┌──────┬──────┬──────┬──────┬──────┬──────┬───────────┐\n", - "│ col0 ┆ col1 ┆ col2 ┆ col3 ┆ col4 ┆ col5 ┆ col6 │\n", - "│ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │\n", - "│ i64 ┆ i64 ┆ i64 ┆ i64 ┆ i64 ┆ i64 ┆ str │\n", - "╞══════╪══════╪══════╪══════╪══════╪══════╪═══════════╡\n", - "│ 0 ┆ 1 ┆ 2 ┆ 3 ┆ 4 ┆ 5 ┆ apple │\n", - "│ 6 ┆ 7 ┆ 8 ┆ 9 ┆ 10 ┆ 11 ┆ orange │\n", - "│ 12 ┆ 13 ┆ 14 ┆ 15 ┆ 16 ┆ 17 ┆ pineapple │\n", - "│ 18 ┆ 19 ┆ 20 ┆ 21 ┆ 22 ┆ 23 ┆ mango │\n", - "│ 24 ┆ 25 ┆ 26 ┆ 27 ┆ 28 ┆ 29 ┆ kiwi │\n", - "│ 30 ┆ 31 ┆ 32 ┆ 33 ┆ 34 ┆ 35 ┆ lemon │\n", - "└──────┴──────┴──────┴──────┴──────┴──────┴───────────┘" - ] - }, - "execution_count": 10, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "import numpy as np\n", "\n", @@ -546,21 +299,10 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": null, "id": "0c2faf83", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "(24, 25, 26, 27, 28, 29, 'kiwi')" - ] - }, - "execution_count": 11, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "# Gets the first row of the DataFrame\n", "df.row(0)\n", @@ -579,21 +321,10 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": null, "id": "9d34599b", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "(18, 19, 20, 21, 22, 23, 'mango')" - ] - }, - "execution_count": 12, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "df.row(by_predicate=pl.col(\"col6\") == \"mango\")" ] @@ -608,27 +339,10 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": null, "id": "9bf6313b", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "{'col0': 18,\n", - " 'col1': 19,\n", - " 'col2': 20,\n", - " 'col3': 21,\n", - " 'col4': 22,\n", - " 'col5': 23,\n", - " 'col6': 'mango'}" - ] - }, - "execution_count": 13, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "# Get the first row of the DataFrame as a dictionary\n", "df.row(0, named=True)\n", @@ -647,39 +361,10 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": null, "id": "f7e8e892", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "shape: (2, 7)
col0col1col2col3col4col5col6
i64i64i64i64i64i64str
242526272829"kiwi"
303132333435"lemon"
" - ], - "text/plain": [ - "shape: (2, 7)\n", - "┌──────┬──────┬──────┬──────┬──────┬──────┬───────┐\n", - "│ col0 ┆ col1 ┆ col2 ┆ col3 ┆ col4 ┆ col5 ┆ col6 │\n", - "│ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │\n", - "│ i64 ┆ i64 ┆ i64 ┆ i64 ┆ i64 ┆ i64 ┆ str │\n", - "╞══════╪══════╪══════╪══════╪══════╪══════╪═══════╡\n", - "│ 24 ┆ 25 ┆ 26 ┆ 27 ┆ 28 ┆ 29 ┆ kiwi │\n", - "│ 30 ┆ 31 ┆ 32 ┆ 33 ┆ 34 ┆ 35 ┆ lemon │\n", - "└──────┴──────┴──────┴──────┴──────┴──────┴───────┘" - ] - }, - "execution_count": 14, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "df.slice(-2, 2)" ] @@ -696,39 +381,10 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": null, "id": "54a9d2b1", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "shape: (2, 7)
col0col1col2col3col4col5col6
i64i64i64i64i64i64str
121314151617"pineapple"
242526272829"kiwi"
" - ], - "text/plain": [ - "shape: (2, 7)\n", - "┌──────┬──────┬──────┬──────┬──────┬──────┬───────────┐\n", - "│ col0 ┆ col1 ┆ col2 ┆ col3 ┆ col4 ┆ col5 ┆ col6 │\n", - "│ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │\n", - "│ i64 ┆ i64 ┆ i64 ┆ i64 ┆ i64 ┆ i64 ┆ str │\n", - "╞══════╪══════╪══════╪══════╪══════╪══════╪═══════════╡\n", - "│ 12 ┆ 13 ┆ 14 ┆ 15 ┆ 16 ┆ 17 ┆ pineapple │\n", - "│ 24 ┆ 25 ┆ 26 ┆ 27 ┆ 28 ┆ 29 ┆ kiwi │\n", - "└──────┴──────┴──────┴──────┴──────┴──────┴───────────┘" - ] - }, - "execution_count": 15, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "df.filter((pl.col(\"col6\") == \"kiwi\") | (pl.col(\"col6\") == \"pineapple\"))" ] @@ -743,41 +399,10 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": null, "id": "7849a962", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "shape: (4, 7)
col0col1col2col3col4col5col6
i64i64i64i64i64i64str
121314151617"pineapple"
181920212223"mango"
242526272829"kiwi"
303132333435"lemon"
" - ], - "text/plain": [ - "shape: (4, 7)\n", - "┌──────┬──────┬──────┬──────┬──────┬──────┬───────────┐\n", - "│ col0 ┆ col1 ┆ col2 ┆ col3 ┆ col4 ┆ col5 ┆ col6 │\n", - "│ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │\n", - "│ i64 ┆ i64 ┆ i64 ┆ i64 ┆ i64 ┆ i64 ┆ str │\n", - "╞══════╪══════╪══════╪══════╪══════╪══════╪═══════════╡\n", - "│ 12 ┆ 13 ┆ 14 ┆ 15 ┆ 16 ┆ 17 ┆ pineapple │\n", - "│ 18 ┆ 19 ┆ 20 ┆ 21 ┆ 22 ┆ 23 ┆ mango │\n", - "│ 24 ┆ 25 ┆ 26 ┆ 27 ┆ 28 ┆ 29 ┆ kiwi │\n", - "│ 30 ┆ 31 ┆ 32 ┆ 33 ┆ 34 ┆ 35 ┆ lemon │\n", - "└──────┴──────┴──────┴──────┴──────┴──────┴───────────┘" - ] - }, - "execution_count": 16, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "df.filter(pl.col(\"col0\") > 6)" ] @@ -792,48 +417,10 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": null, "id": "c6dd919f", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "shape: (4_334, 19)
yearmonthdaydep_timesched_dep_timedep_delayarr_timesched_arr_timearr_delaycarrierflighttailnumorigindestair_timedistancehourminutetime_hour
i64i64i64i64i64i64i64i64i64stri64strstrstri64i64i64i64str
201311517515283081911"UA"1545"N14228""EWR""IAH"2271400515"2013-01-01T10:00:00Z"
201311533529485083020"UA"1714"N24211""LGA""IAH"2271416529"2013-01-01T10:00:00Z"
201311542540292385033"AA"1141"N619AA""JFK""MIA"1601089540"2013-01-01T10:00:00Z"
201311544545-110041022-18"B6"725"N804JB""JFK""BQN"1831576545"2013-01-01T10:00:00Z"
201311554600-6812837-25"DL"461"N668DN""LGA""ATL"11676260"2013-01-01T11:00:00Z"
20131523552359-4425442-17"B6"707"N583JB""JFK""SJU"19315982359"2013-01-06T04:00:00Z"
20131523572359-2432437-5"B6"727"N649JB""JFK""BQN"19515762359"2013-01-06T04:00:00Z"
201315null1400nullnull1518null"EV"5712"N827AS""JFK""IAD"null228140"2013-01-05T19:00:00Z"
201315null840nullnull1001null"9E"3422null"JFK""BOS"null187840"2013-01-05T13:00:00Z"
201315null1430nullnull1735null"AA"883"N544AA""EWR""DFW"null13721430"2013-01-05T19:00:00Z"
" - ], - "text/plain": [ - "shape: (4_334, 19)\n", - "┌──────┬───────┬─────┬──────────┬───┬──────────┬──────┬────────┬──────────────────────┐\n", - "│ year ┆ month ┆ day ┆ dep_time ┆ … ┆ distance ┆ hour ┆ minute ┆ time_hour │\n", - "│ --- ┆ --- ┆ --- ┆ --- ┆ ┆ --- ┆ --- ┆ --- ┆ --- │\n", - "│ i64 ┆ i64 ┆ i64 ┆ i64 ┆ ┆ i64 ┆ i64 ┆ i64 ┆ str │\n", - "╞══════╪═══════╪═════╪══════════╪═══╪══════════╪══════╪════════╪══════════════════════╡\n", - "│ 2013 ┆ 1 ┆ 1 ┆ 517 ┆ … ┆ 1400 ┆ 5 ┆ 15 ┆ 2013-01-01T10:00:00Z │\n", - "│ 2013 ┆ 1 ┆ 1 ┆ 533 ┆ … ┆ 1416 ┆ 5 ┆ 29 ┆ 2013-01-01T10:00:00Z │\n", - "│ 2013 ┆ 1 ┆ 1 ┆ 542 ┆ … ┆ 1089 ┆ 5 ┆ 40 ┆ 2013-01-01T10:00:00Z │\n", - "│ 2013 ┆ 1 ┆ 1 ┆ 544 ┆ … ┆ 1576 ┆ 5 ┆ 45 ┆ 2013-01-01T10:00:00Z │\n", - "│ 2013 ┆ 1 ┆ 1 ┆ 554 ┆ … ┆ 762 ┆ 6 ┆ 0 ┆ 2013-01-01T11:00:00Z │\n", - "│ … ┆ … ┆ … ┆ … ┆ … ┆ … ┆ … ┆ … ┆ … │\n", - "│ 2013 ┆ 1 ┆ 5 ┆ 2355 ┆ … ┆ 1598 ┆ 23 ┆ 59 ┆ 2013-01-06T04:00:00Z │\n", - "│ 2013 ┆ 1 ┆ 5 ┆ 2357 ┆ … ┆ 1576 ┆ 23 ┆ 59 ┆ 2013-01-06T04:00:00Z │\n", - "│ 2013 ┆ 1 ┆ 5 ┆ null ┆ … ┆ 228 ┆ 14 ┆ 0 ┆ 2013-01-05T19:00:00Z │\n", - "│ 2013 ┆ 1 ┆ 5 ┆ null ┆ … ┆ 187 ┆ 8 ┆ 40 ┆ 2013-01-05T13:00:00Z │\n", - "│ 2013 ┆ 1 ┆ 5 ┆ null ┆ … ┆ 1372 ┆ 14 ┆ 30 ┆ 2013-01-05T19:00:00Z │\n", - "└──────┴───────┴─────┴──────────┴───┴──────────┴──────┴────────┴──────────────────────┘" - ] - }, - "execution_count": 17, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "# Flights that departed on January 1\n", "flights.filter((pl.col(\"month\") == 1) & (pl.col(\"day\") <= 5))" @@ -859,96 +446,20 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": null, "id": "395c9c62", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "shape: (336_776, 19)
yearmonthdaydep_timesched_dep_timedep_delayarr_timesched_arr_timearr_delaycarrierflighttailnumorigindestair_timedistancehourminutetime_hour
i64i64i64i64i64i64i64i64i64stri64strstrstri64i64i64i64str
201311null1630nullnull1815null"EV"4308"N18120""EWR""RDU"null4161630"2013-01-01T21:00:00Z"
201311null1935nullnull2240null"AA"791"N3EHAA""LGA""DFW"null13891935"2013-01-02T00:00:00Z"
201311null1500nullnull1825null"AA"1925"N3EVAA""LGA""MIA"null1096150"2013-01-01T20:00:00Z"
201311null600nullnull901null"B6"125"N618JB""JFK""FLL"null106960"2013-01-01T11:00:00Z"
201312null1540nullnull1747null"EV"4352"N10575""EWR""CVG"null5691540"2013-01-02T20:00:00Z"
201372824002359141134427"B6"1503"N503JB""JFK""SJU"20415982359"2013-07-29T03:00:00Z"
20138102400224575110169"B6"234"N328JB""JFK""BTV"532662245"2013-08-11T02:00:00Z"
20138202400235913543504"B6"745"N708JB""JFK""PSE"20116172359"2013-08-21T03:00:00Z"
20139224002359141134031"B6"839"N828JB""JFK""BQN"21715762359"2013-09-03T03:00:00Z"
2013912240020002402032230213"DL"1147"N910DE""LGA""ATL"101762200"2013-09-13T00:00:00Z"
" - ], - "text/plain": [ - "shape: (336_776, 19)\n", - "┌──────┬───────┬─────┬──────────┬───┬──────────┬──────┬────────┬──────────────────────┐\n", - "│ year ┆ month ┆ day ┆ dep_time ┆ … ┆ distance ┆ hour ┆ minute ┆ time_hour │\n", - "│ --- ┆ --- ┆ --- ┆ --- ┆ ┆ --- ┆ --- ┆ --- ┆ --- │\n", - "│ i64 ┆ i64 ┆ i64 ┆ i64 ┆ ┆ i64 ┆ i64 ┆ i64 ┆ str │\n", - "╞══════╪═══════╪═════╪══════════╪═══╪══════════╪══════╪════════╪══════════════════════╡\n", - "│ 2013 ┆ 1 ┆ 1 ┆ null ┆ … ┆ 416 ┆ 16 ┆ 30 ┆ 2013-01-01T21:00:00Z │\n", - "│ 2013 ┆ 1 ┆ 1 ┆ null ┆ … ┆ 1389 ┆ 19 ┆ 35 ┆ 2013-01-02T00:00:00Z │\n", - "│ 2013 ┆ 1 ┆ 1 ┆ null ┆ … ┆ 1096 ┆ 15 ┆ 0 ┆ 2013-01-01T20:00:00Z │\n", - "│ 2013 ┆ 1 ┆ 1 ┆ null ┆ … ┆ 1069 ┆ 6 ┆ 0 ┆ 2013-01-01T11:00:00Z │\n", - "│ 2013 ┆ 1 ┆ 2 ┆ null ┆ … ┆ 569 ┆ 15 ┆ 40 ┆ 2013-01-02T20:00:00Z │\n", - "│ … ┆ … ┆ … ┆ … ┆ … ┆ … ┆ … ┆ … ┆ … │\n", - "│ 2013 ┆ 7 ┆ 28 ┆ 2400 ┆ … ┆ 1598 ┆ 23 ┆ 59 ┆ 2013-07-29T03:00:00Z │\n", - "│ 2013 ┆ 8 ┆ 10 ┆ 2400 ┆ … ┆ 266 ┆ 22 ┆ 45 ┆ 2013-08-11T02:00:00Z │\n", - "│ 2013 ┆ 8 ┆ 20 ┆ 2400 ┆ … ┆ 1617 ┆ 23 ┆ 59 ┆ 2013-08-21T03:00:00Z │\n", - "│ 2013 ┆ 9 ┆ 2 ┆ 2400 ┆ … ┆ 1576 ┆ 23 ┆ 59 ┆ 2013-09-03T03:00:00Z │\n", - "│ 2013 ┆ 9 ┆ 12 ┆ 2400 ┆ … ┆ 762 ┆ 20 ┆ 0 ┆ 2013-09-13T00:00:00Z │\n", - "└──────┴───────┴─────┴──────────┴───┴──────────┴──────┴────────┴──────────────────────┘" - ] - }, - "execution_count": 18, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "flights.sort(\"dep_time\")" ] }, { "cell_type": "code", - "execution_count": 19, + "execution_count": null, "id": "9d5f4270", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "shape: (336_776, 19)
yearmonthdaydep_timesched_dep_timedep_delayarr_timesched_arr_timearr_delaycarrierflighttailnumorigindestair_timedistancehourminutetime_hour
i64i64i64i64i64i64i64i64i64stri64strstrstri64i64i64i64str
201311null1935nullnull2240null"AA"791"N3EHAA""LGA""DFW"null13891935"2013-01-02T00:00:00Z"
201311null1500nullnull1825null"AA"1925"N3EVAA""LGA""MIA"null1096150"2013-01-01T20:00:00Z"
201311null1630nullnull1815null"EV"4308"N18120""EWR""RDU"null4161630"2013-01-01T21:00:00Z"
201311null600nullnull901null"B6"125"N618JB""JFK""FLL"null106960"2013-01-01T11:00:00Z"
201311517515283081911"UA"1545"N14228""EWR""IAH"2271400515"2013-01-01T10:00:00Z"
20131231232122503146838"B6"2002"N179JB""JFK""BUF"663012250"2014-01-01T03:00:00Z"
2013123123282330-24124093"B6"1389"N651JB""EWR""SJU"19816082330"2014-01-01T04:00:00Z"
20131231233222454758355"B6"486"N334JB""JFK""ROC"602642245"2014-01-01T03:00:00Z"
2013123123552359-4430440-10"B6"1503"N509JB""JFK""SJU"19515982359"2014-01-01T04:00:00Z"
2013123123562359-3436445-9"B6"745"N665JB""JFK""PSE"20016172359"2014-01-01T04:00:00Z"
" - ], - "text/plain": [ - "shape: (336_776, 19)\n", - "┌──────┬───────┬─────┬──────────┬───┬──────────┬──────┬────────┬──────────────────────┐\n", - "│ year ┆ month ┆ day ┆ dep_time ┆ … ┆ distance ┆ hour ┆ minute ┆ time_hour │\n", - "│ --- ┆ --- ┆ --- ┆ --- ┆ ┆ --- ┆ --- ┆ --- ┆ --- │\n", - "│ i64 ┆ i64 ┆ i64 ┆ i64 ┆ ┆ i64 ┆ i64 ┆ i64 ┆ str │\n", - "╞══════╪═══════╪═════╪══════════╪═══╪══════════╪══════╪════════╪══════════════════════╡\n", - "│ 2013 ┆ 1 ┆ 1 ┆ null ┆ … ┆ 1389 ┆ 19 ┆ 35 ┆ 2013-01-02T00:00:00Z │\n", - "│ 2013 ┆ 1 ┆ 1 ┆ null ┆ … ┆ 1096 ┆ 15 ┆ 0 ┆ 2013-01-01T20:00:00Z │\n", - "│ 2013 ┆ 1 ┆ 1 ┆ null ┆ … ┆ 416 ┆ 16 ┆ 30 ┆ 2013-01-01T21:00:00Z │\n", - "│ 2013 ┆ 1 ┆ 1 ┆ null ┆ … ┆ 1069 ┆ 6 ┆ 0 ┆ 2013-01-01T11:00:00Z │\n", - "│ 2013 ┆ 1 ┆ 1 ┆ 517 ┆ … ┆ 1400 ┆ 5 ┆ 15 ┆ 2013-01-01T10:00:00Z │\n", - "│ … ┆ … ┆ … ┆ … ┆ … ┆ … ┆ … ┆ … ┆ … │\n", - "│ 2013 ┆ 12 ┆ 31 ┆ 2321 ┆ … ┆ 301 ┆ 22 ┆ 50 ┆ 2014-01-01T03:00:00Z │\n", - "│ 2013 ┆ 12 ┆ 31 ┆ 2328 ┆ … ┆ 1608 ┆ 23 ┆ 30 ┆ 2014-01-01T04:00:00Z │\n", - "│ 2013 ┆ 12 ┆ 31 ┆ 2332 ┆ … ┆ 264 ┆ 22 ┆ 45 ┆ 2014-01-01T03:00:00Z │\n", - "│ 2013 ┆ 12 ┆ 31 ┆ 2355 ┆ … ┆ 1598 ┆ 23 ┆ 59 ┆ 2014-01-01T04:00:00Z │\n", - "│ 2013 ┆ 12 ┆ 31 ┆ 2356 ┆ … ┆ 1617 ┆ 23 ┆ 59 ┆ 2014-01-01T04:00:00Z │\n", - "└──────┴───────┴─────┴──────────┴───┴──────────┴──────┴────────┴──────────────────────┘" - ] - }, - "execution_count": 19, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "# Sort by multiple columns by passing a list of columns.\n", "flights.sort([\"year\", \"month\", \"day\", \"dep_time\"])\n", @@ -968,96 +479,20 @@ }, { "cell_type": "code", - "execution_count": 20, + "execution_count": null, "id": "483acdc1", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "shape: (336_776, 19)
yearmonthdaydep_timesched_dep_timedep_delayarr_timesched_arr_timearr_delaycarrierflighttailnumorigindestair_timedistancehourminutetime_hour
i64i64i64i64i64i64i64i64i64stri64strstrstri64i64i64i64str
201311null1630nullnull1815null"EV"4308"N18120""EWR""RDU"null4161630"2013-01-01T21:00:00Z"
201311null1935nullnull2240null"AA"791"N3EHAA""LGA""DFW"null13891935"2013-01-02T00:00:00Z"
201311null1500nullnull1825null"AA"1925"N3EVAA""LGA""MIA"null1096150"2013-01-01T20:00:00Z"
201311null600nullnull901null"B6"125"N618JB""JFK""FLL"null106960"2013-01-01T11:00:00Z"
201312null1540nullnull1747null"EV"4352"N10575""EWR""CVG"null5691540"2013-01-02T20:00:00Z"
201312917031730-2719471957-10"F9"837"N208FR""LGA""DEN"25016201730"2013-01-29T22:00:00Z"
201311119001930-3022332243-10"DL"1435"N934DL""LGA""TPA"13910101930"2013-01-12T00:00:00Z"
2013111014081440-3215491559-10"EV"5713"N825AS""LGA""IAD"522291440"2013-11-10T19:00:00Z"
20132320222055-3322402338-58"DL"1715"N612DL""LGA""MSY"16211832055"2013-02-04T01:00:00Z"
201312720402123-4340235248"B6"97"N592JB""JFK""DEN"26516262123"2013-12-08T02:00:00Z"
" - ], - "text/plain": [ - "shape: (336_776, 19)\n", - "┌──────┬───────┬─────┬──────────┬───┬──────────┬──────┬────────┬──────────────────────┐\n", - "│ year ┆ month ┆ day ┆ dep_time ┆ … ┆ distance ┆ hour ┆ minute ┆ time_hour │\n", - "│ --- ┆ --- ┆ --- ┆ --- ┆ ┆ --- ┆ --- ┆ --- ┆ --- │\n", - "│ i64 ┆ i64 ┆ i64 ┆ i64 ┆ ┆ i64 ┆ i64 ┆ i64 ┆ str │\n", - "╞══════╪═══════╪═════╪══════════╪═══╪══════════╪══════╪════════╪══════════════════════╡\n", - "│ 2013 ┆ 1 ┆ 1 ┆ null ┆ … ┆ 416 ┆ 16 ┆ 30 ┆ 2013-01-01T21:00:00Z │\n", - "│ 2013 ┆ 1 ┆ 1 ┆ null ┆ … ┆ 1389 ┆ 19 ┆ 35 ┆ 2013-01-02T00:00:00Z │\n", - "│ 2013 ┆ 1 ┆ 1 ┆ null ┆ … ┆ 1096 ┆ 15 ┆ 0 ┆ 2013-01-01T20:00:00Z │\n", - "│ 2013 ┆ 1 ┆ 1 ┆ null ┆ … ┆ 1069 ┆ 6 ┆ 0 ┆ 2013-01-01T11:00:00Z │\n", - "│ 2013 ┆ 1 ┆ 2 ┆ null ┆ … ┆ 569 ┆ 15 ┆ 40 ┆ 2013-01-02T20:00:00Z │\n", - "│ … ┆ … ┆ … ┆ … ┆ … ┆ … ┆ … ┆ … ┆ … │\n", - "│ 2013 ┆ 1 ┆ 29 ┆ 1703 ┆ … ┆ 1620 ┆ 17 ┆ 30 ┆ 2013-01-29T22:00:00Z │\n", - "│ 2013 ┆ 1 ┆ 11 ┆ 1900 ┆ … ┆ 1010 ┆ 19 ┆ 30 ┆ 2013-01-12T00:00:00Z │\n", - "│ 2013 ┆ 11 ┆ 10 ┆ 1408 ┆ … ┆ 229 ┆ 14 ┆ 40 ┆ 2013-11-10T19:00:00Z │\n", - "│ 2013 ┆ 2 ┆ 3 ┆ 2022 ┆ … ┆ 1183 ┆ 20 ┆ 55 ┆ 2013-02-04T01:00:00Z │\n", - "│ 2013 ┆ 12 ┆ 7 ┆ 2040 ┆ … ┆ 1626 ┆ 21 ┆ 23 ┆ 2013-12-08T02:00:00Z │\n", - "└──────┴───────┴─────┴──────────┴───┴──────────┴──────┴────────┴──────────────────────┘" - ] - }, - "execution_count": 20, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "flights.sort(\"dep_delay\", descending=True)" ] }, { "cell_type": "code", - "execution_count": 21, + "execution_count": null, "id": "80bf3df7", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "shape: (336_776, 19)
yearmonthdaydep_timesched_dep_timedep_delayarr_timesched_arr_timearr_delaycarrierflighttailnumorigindestair_timedistancehourminutetime_hour
i64i64i64i64i64i64i64i64i64stri64strstrstri64i64i64i64str
201338null1800nullnull1937null"UA"1177null"LGA""ORD"null733180"2013-03-08T23:00:00Z"
201328null1659nullnull1822null"UA"531null"EWR""BOS"null2001659"2013-02-08T21:00:00Z"
2013813null1727nullnull1941null"EV"5892"N16561""EWR""CVG"null5691727"2013-08-13T21:00:00Z"
2013813null1225nullnull1338null"EV"5897"N15973""EWR""ORF"null2841225"2013-08-13T16:00:00Z"
2013930null840nullnull1020null"MQ"3531"N839MQ""LGA""RDU"null431840"2013-09-30T12:00:00Z"
201312917031730-2719471957-10"F9"837"N208FR""LGA""DEN"25016201730"2013-01-29T22:00:00Z"
201311119001930-3022332243-10"DL"1435"N934DL""LGA""TPA"13910101930"2013-01-12T00:00:00Z"
2013111014081440-3215491559-10"EV"5713"N825AS""LGA""IAD"522291440"2013-11-10T19:00:00Z"
20132320222055-3322402338-58"DL"1715"N612DL""LGA""MSY"16211832055"2013-02-04T01:00:00Z"
201312720402123-4340235248"B6"97"N592JB""JFK""DEN"26516262123"2013-12-08T02:00:00Z"
" - ], - "text/plain": [ - "shape: (336_776, 19)\n", - "┌──────┬───────┬─────┬──────────┬───┬──────────┬──────┬────────┬──────────────────────┐\n", - "│ year ┆ month ┆ day ┆ dep_time ┆ … ┆ distance ┆ hour ┆ minute ┆ time_hour │\n", - "│ --- ┆ --- ┆ --- ┆ --- ┆ ┆ --- ┆ --- ┆ --- ┆ --- │\n", - "│ i64 ┆ i64 ┆ i64 ┆ i64 ┆ ┆ i64 ┆ i64 ┆ i64 ┆ str │\n", - "╞══════╪═══════╪═════╪══════════╪═══╪══════════╪══════╪════════╪══════════════════════╡\n", - "│ 2013 ┆ 3 ┆ 8 ┆ null ┆ … ┆ 733 ┆ 18 ┆ 0 ┆ 2013-03-08T23:00:00Z │\n", - "│ 2013 ┆ 2 ┆ 8 ┆ null ┆ … ┆ 200 ┆ 16 ┆ 59 ┆ 2013-02-08T21:00:00Z │\n", - "│ 2013 ┆ 8 ┆ 13 ┆ null ┆ … ┆ 569 ┆ 17 ┆ 27 ┆ 2013-08-13T21:00:00Z │\n", - "│ 2013 ┆ 8 ┆ 13 ┆ null ┆ … ┆ 284 ┆ 12 ┆ 25 ┆ 2013-08-13T16:00:00Z │\n", - "│ 2013 ┆ 9 ┆ 30 ┆ null ┆ … ┆ 431 ┆ 8 ┆ 40 ┆ 2013-09-30T12:00:00Z │\n", - "│ … ┆ … ┆ … ┆ … ┆ … ┆ … ┆ … ┆ … ┆ … │\n", - "│ 2013 ┆ 1 ┆ 29 ┆ 1703 ┆ … ┆ 1620 ┆ 17 ┆ 30 ┆ 2013-01-29T22:00:00Z │\n", - "│ 2013 ┆ 1 ┆ 11 ┆ 1900 ┆ … ┆ 1010 ┆ 19 ┆ 30 ┆ 2013-01-12T00:00:00Z │\n", - "│ 2013 ┆ 11 ┆ 10 ┆ 1408 ┆ … ┆ 229 ┆ 14 ┆ 40 ┆ 2013-11-10T19:00:00Z │\n", - "│ 2013 ┆ 2 ┆ 3 ┆ 2022 ┆ … ┆ 1183 ┆ 20 ┆ 55 ┆ 2013-02-04T01:00:00Z │\n", - "│ 2013 ┆ 12 ┆ 7 ┆ 2040 ┆ … ┆ 1626 ┆ 21 ┆ 23 ┆ 2013-12-08T02:00:00Z │\n", - "└──────┴───────┴─────┴──────────┴───┴──────────┴──────┴────────┴──────────────────────┘" - ] - }, - "execution_count": 21, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "flights.sort([\"dep_delay\", \"arr_delay\"], descending=[True, False])" ] @@ -1073,40 +508,10 @@ }, { "cell_type": "code", - "execution_count": 22, + "execution_count": null, "id": "a939f3c2", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "shape: (3, 19)
yearmonthdaydep_timesched_dep_timedep_delayarr_timesched_arr_timearr_delaycarrierflighttailnumorigindestair_timedistancehourminutetime_hour
i64i64i64i64i64i64i64i64i64stri64strstrstri64i64i64i64str
20131115251530-519341805null"MQ"4525"N719MQ""LGA""XNA"null11471530"2013-01-01T20:00:00Z"
20131117401745-521582020null"MQ"4413"N739MQ""LGA""XNA"null11471745"2013-01-01T22:00:00Z"
20131218481840823332151null"9E"3325"N920XJ""JFK""DFW"null13911840"2013-01-02T23:00:00Z"
" - ], - "text/plain": [ - "shape: (3, 19)\n", - "┌──────┬───────┬─────┬──────────┬───┬──────────┬──────┬────────┬──────────────────────┐\n", - "│ year ┆ month ┆ day ┆ dep_time ┆ … ┆ distance ┆ hour ┆ minute ┆ time_hour │\n", - "│ --- ┆ --- ┆ --- ┆ --- ┆ ┆ --- ┆ --- ┆ --- ┆ --- │\n", - "│ i64 ┆ i64 ┆ i64 ┆ i64 ┆ ┆ i64 ┆ i64 ┆ i64 ┆ str │\n", - "╞══════╪═══════╪═════╪══════════╪═══╪══════════╪══════╪════════╪══════════════════════╡\n", - "│ 2013 ┆ 1 ┆ 1 ┆ 1525 ┆ … ┆ 1147 ┆ 15 ┆ 30 ┆ 2013-01-01T20:00:00Z │\n", - "│ 2013 ┆ 1 ┆ 1 ┆ 1740 ┆ … ┆ 1147 ┆ 17 ┆ 45 ┆ 2013-01-01T22:00:00Z │\n", - "│ 2013 ┆ 1 ┆ 2 ┆ 1848 ┆ … ┆ 1391 ┆ 18 ┆ 40 ┆ 2013-01-02T23:00:00Z │\n", - "└──────┴───────┴─────┴──────────┴───┴──────────┴──────┴────────┴──────────────────────┘" - ] - }, - "execution_count": 22, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "(\n", " flights.filter((pl.col(\"dep_delay\") <= 10) & (pl.col(\"dep_delay\") >= -10))\n", @@ -1167,43 +572,10 @@ }, { "cell_type": "code", - "execution_count": 23, + "execution_count": null, "id": "86827cf9", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "shape: (6, 8)
col0col1col2col3col4col5col6new_column0
i64i64i64i64i64i64stri32
012345"apple"5
67891011"orange"5
121314151617"pineapple"5
181920212223"mango"5
242526272829"kiwi"5
303132333435"lemon"5
" - ], - "text/plain": [ - "shape: (6, 8)\n", - "┌──────┬──────┬──────┬──────┬──────┬──────┬───────────┬─────────────┐\n", - "│ col0 ┆ col1 ┆ col2 ┆ col3 ┆ col4 ┆ col5 ┆ col6 ┆ new_column0 │\n", - "│ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │\n", - "│ i64 ┆ i64 ┆ i64 ┆ i64 ┆ i64 ┆ i64 ┆ str ┆ i32 │\n", - "╞══════╪══════╪══════╪══════╪══════╪══════╪═══════════╪═════════════╡\n", - "│ 0 ┆ 1 ┆ 2 ┆ 3 ┆ 4 ┆ 5 ┆ apple ┆ 5 │\n", - "│ 6 ┆ 7 ┆ 8 ┆ 9 ┆ 10 ┆ 11 ┆ orange ┆ 5 │\n", - "│ 12 ┆ 13 ┆ 14 ┆ 15 ┆ 16 ┆ 17 ┆ pineapple ┆ 5 │\n", - "│ 18 ┆ 19 ┆ 20 ┆ 21 ┆ 22 ┆ 23 ┆ mango ┆ 5 │\n", - "│ 24 ┆ 25 ┆ 26 ┆ 27 ┆ 28 ┆ 29 ┆ kiwi ┆ 5 │\n", - "│ 30 ┆ 31 ┆ 32 ┆ 33 ┆ 34 ┆ 35 ┆ lemon ┆ 5 │\n", - "└──────┴──────┴──────┴──────┴──────┴──────┴───────────┴─────────────┘" - ] - }, - "execution_count": 23, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "df = df.with_columns(new_column0=pl.lit(5))\n", "df" @@ -1219,43 +591,10 @@ }, { "cell_type": "code", - "execution_count": 24, + "execution_count": null, "id": "0ab01f9d", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "shape: (6, 8)
col0col1col2col3col4col5col6new_column0
i64i64i64i64i64i64stri64
012345"apple"0
67891011"orange"1
121314151617"pineapple"2
181920212223"mango"3
242526272829"kiwi"4
303132333435"lemon"5
" - ], - "text/plain": [ - "shape: (6, 8)\n", - "┌──────┬──────┬──────┬──────┬──────┬──────┬───────────┬─────────────┐\n", - "│ col0 ┆ col1 ┆ col2 ┆ col3 ┆ col4 ┆ col5 ┆ col6 ┆ new_column0 │\n", - "│ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │\n", - "│ i64 ┆ i64 ┆ i64 ┆ i64 ┆ i64 ┆ i64 ┆ str ┆ i64 │\n", - "╞══════╪══════╪══════╪══════╪══════╪══════╪═══════════╪═════════════╡\n", - "│ 0 ┆ 1 ┆ 2 ┆ 3 ┆ 4 ┆ 5 ┆ apple ┆ 0 │\n", - "│ 6 ┆ 7 ┆ 8 ┆ 9 ┆ 10 ┆ 11 ┆ orange ┆ 1 │\n", - "│ 12 ┆ 13 ┆ 14 ┆ 15 ┆ 16 ┆ 17 ┆ pineapple ┆ 2 │\n", - "│ 18 ┆ 19 ┆ 20 ┆ 21 ┆ 22 ┆ 23 ┆ mango ┆ 3 │\n", - "│ 24 ┆ 25 ┆ 26 ┆ 27 ┆ 28 ┆ 29 ┆ kiwi ┆ 4 │\n", - "│ 30 ┆ 31 ┆ 32 ┆ 33 ┆ 34 ┆ 35 ┆ lemon ┆ 5 │\n", - "└──────┴──────┴──────┴──────┴──────┴──────┴───────────┴─────────────┘" - ] - }, - "execution_count": 24, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "df = df.with_columns(new_column0=pl.Series([0, 1, 2, 3, 4, 5]))\n", "df" @@ -1275,43 +614,10 @@ }, { "cell_type": "code", - "execution_count": 25, + "execution_count": null, "id": "fff10e83", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "shape: (6, 10)
col0col1col2col3col4col5col6new_column0new_column1new_column2
i64i64i64i64i64i64stri64i32i32
012345"apple"056
67891011"orange"156
121314151617"pineapple"256
181920212223"mango"356
242526272829"kiwi"456
303132333435"lemon"556
" - ], - "text/plain": [ - "shape: (6, 10)\n", - "┌──────┬──────┬──────┬──────┬───┬───────────┬─────────────┬─────────────┬─────────────┐\n", - "│ col0 ┆ col1 ┆ col2 ┆ col3 ┆ … ┆ col6 ┆ new_column0 ┆ new_column1 ┆ new_column2 │\n", - "│ --- ┆ --- ┆ --- ┆ --- ┆ ┆ --- ┆ --- ┆ --- ┆ --- │\n", - "│ i64 ┆ i64 ┆ i64 ┆ i64 ┆ ┆ str ┆ i64 ┆ i32 ┆ i32 │\n", - "╞══════╪══════╪══════╪══════╪═══╪═══════════╪═════════════╪═════════════╪═════════════╡\n", - "│ 0 ┆ 1 ┆ 2 ┆ 3 ┆ … ┆ apple ┆ 0 ┆ 5 ┆ 6 │\n", - "│ 6 ┆ 7 ┆ 8 ┆ 9 ┆ … ┆ orange ┆ 1 ┆ 5 ┆ 6 │\n", - "│ 12 ┆ 13 ┆ 14 ┆ 15 ┆ … ┆ pineapple ┆ 2 ┆ 5 ┆ 6 │\n", - "│ 18 ┆ 19 ┆ 20 ┆ 21 ┆ … ┆ mango ┆ 3 ┆ 5 ┆ 6 │\n", - "│ 24 ┆ 25 ┆ 26 ┆ 27 ┆ … ┆ kiwi ┆ 4 ┆ 5 ┆ 6 │\n", - "│ 30 ┆ 31 ┆ 32 ┆ 33 ┆ … ┆ lemon ┆ 5 ┆ 5 ┆ 6 │\n", - "└──────┴──────┴──────┴──────┴───┴───────────┴─────────────┴─────────────┴─────────────┘" - ] - }, - "execution_count": 25, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "df = df.with_columns(new_column1=pl.lit(5), new_column2=pl.lit(6))\n", "df" @@ -1327,43 +633,10 @@ }, { "cell_type": "code", - "execution_count": 26, + "execution_count": null, "id": "82477100", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "shape: (6, 10)
col0col1col2col3col4col5col6new_column0new_column1new_column2
i64i64i64i64i64i64stri64i32i64
012345"apple"050
67891011"orange"155
121314151617"pineapple"2510
181920212223"mango"3515
242526272829"kiwi"4520
303132333435"lemon"5525
" - ], - "text/plain": [ - "shape: (6, 10)\n", - "┌──────┬──────┬──────┬──────┬───┬───────────┬─────────────┬─────────────┬─────────────┐\n", - "│ col0 ┆ col1 ┆ col2 ┆ col3 ┆ … ┆ col6 ┆ new_column0 ┆ new_column1 ┆ new_column2 │\n", - "│ --- ┆ --- ┆ --- ┆ --- ┆ ┆ --- ┆ --- ┆ --- ┆ --- │\n", - "│ i64 ┆ i64 ┆ i64 ┆ i64 ┆ ┆ str ┆ i64 ┆ i32 ┆ i64 │\n", - "╞══════╪══════╪══════╪══════╪═══╪═══════════╪═════════════╪═════════════╪═════════════╡\n", - "│ 0 ┆ 1 ┆ 2 ┆ 3 ┆ … ┆ apple ┆ 0 ┆ 5 ┆ 0 │\n", - "│ 6 ┆ 7 ┆ 8 ┆ 9 ┆ … ┆ orange ┆ 1 ┆ 5 ┆ 5 │\n", - "│ 12 ┆ 13 ┆ 14 ┆ 15 ┆ … ┆ pineapple ┆ 2 ┆ 5 ┆ 10 │\n", - "│ 18 ┆ 19 ┆ 20 ┆ 21 ┆ … ┆ mango ┆ 3 ┆ 5 ┆ 15 │\n", - "│ 24 ┆ 25 ┆ 26 ┆ 27 ┆ … ┆ kiwi ┆ 4 ┆ 5 ┆ 20 │\n", - "│ 30 ┆ 31 ┆ 32 ┆ 33 ┆ … ┆ lemon ┆ 5 ┆ 5 ┆ 25 │\n", - "└──────┴──────┴──────┴──────┴───┴───────────┴─────────────┴─────────────┴─────────────┘" - ] - }, - "execution_count": 26, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "df.with_columns(new_column2=pl.col(\"col0\") - pl.col(\"new_column0\"))" ] @@ -1379,48 +652,10 @@ }, { "cell_type": "code", - "execution_count": 27, + "execution_count": null, "id": "55645bdd", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "shape: (336_776, 21)
yearmonthdaydep_timesched_dep_timedep_delayarr_timesched_arr_timearr_delaycarrierflighttailnumorigindestair_timedistancehourminutetime_hourgainspeed
i64i64i64i64i64i64i64i64i64stri64strstrstri64i64i64i64stri64f64
201311517515283081911"UA"1545"N14228""EWR""IAH"2271400515"2013-01-01T10:00:00Z"-9370.044053
201311533529485083020"UA"1714"N24211""LGA""IAH"2271416529"2013-01-01T10:00:00Z"-16374.273128
201311542540292385033"AA"1141"N619AA""JFK""MIA"1601089540"2013-01-01T10:00:00Z"-31408.375
201311544545-110041022-18"B6"725"N804JB""JFK""BQN"1831576545"2013-01-01T10:00:00Z"17516.721311
201311554600-6812837-25"DL"461"N668DN""LGA""ATL"11676260"2013-01-01T11:00:00Z"19394.137931
2013930null1455nullnull1634null"9E"3393null"JFK""DCA"null2131455"2013-09-30T18:00:00Z"nullnull
2013930null2200nullnull2312null"9E"3525null"LGA""SYR"null198220"2013-10-01T02:00:00Z"nullnull
2013930null1210nullnull1330null"MQ"3461"N535MQ""LGA""BNA"null7641210"2013-09-30T16:00:00Z"nullnull
2013930null1159nullnull1344null"MQ"3572"N511MQ""LGA""CLE"null4191159"2013-09-30T15:00:00Z"nullnull
2013930null840nullnull1020null"MQ"3531"N839MQ""LGA""RDU"null431840"2013-09-30T12:00:00Z"nullnull
" - ], - "text/plain": [ - "shape: (336_776, 21)\n", - "┌──────┬───────┬─────┬──────────┬───┬────────┬──────────────────────┬──────┬────────────┐\n", - "│ year ┆ month ┆ day ┆ dep_time ┆ … ┆ minute ┆ time_hour ┆ gain ┆ speed │\n", - "│ --- ┆ --- ┆ --- ┆ --- ┆ ┆ --- ┆ --- ┆ --- ┆ --- │\n", - "│ i64 ┆ i64 ┆ i64 ┆ i64 ┆ ┆ i64 ┆ str ┆ i64 ┆ f64 │\n", - "╞══════╪═══════╪═════╪══════════╪═══╪════════╪══════════════════════╪══════╪════════════╡\n", - "│ 2013 ┆ 1 ┆ 1 ┆ 517 ┆ … ┆ 15 ┆ 2013-01-01T10:00:00Z ┆ -9 ┆ 370.044053 │\n", - "│ 2013 ┆ 1 ┆ 1 ┆ 533 ┆ … ┆ 29 ┆ 2013-01-01T10:00:00Z ┆ -16 ┆ 374.273128 │\n", - "│ 2013 ┆ 1 ┆ 1 ┆ 542 ┆ … ┆ 40 ┆ 2013-01-01T10:00:00Z ┆ -31 ┆ 408.375 │\n", - "│ 2013 ┆ 1 ┆ 1 ┆ 544 ┆ … ┆ 45 ┆ 2013-01-01T10:00:00Z ┆ 17 ┆ 516.721311 │\n", - "│ 2013 ┆ 1 ┆ 1 ┆ 554 ┆ … ┆ 0 ┆ 2013-01-01T11:00:00Z ┆ 19 ┆ 394.137931 │\n", - "│ … ┆ … ┆ … ┆ … ┆ … ┆ … ┆ … ┆ … ┆ … │\n", - "│ 2013 ┆ 9 ┆ 30 ┆ null ┆ … ┆ 55 ┆ 2013-09-30T18:00:00Z ┆ null ┆ null │\n", - "│ 2013 ┆ 9 ┆ 30 ┆ null ┆ … ┆ 0 ┆ 2013-10-01T02:00:00Z ┆ null ┆ null │\n", - "│ 2013 ┆ 9 ┆ 30 ┆ null ┆ … ┆ 10 ┆ 2013-09-30T16:00:00Z ┆ null ┆ null │\n", - "│ 2013 ┆ 9 ┆ 30 ┆ null ┆ … ┆ 59 ┆ 2013-09-30T15:00:00Z ┆ null ┆ null │\n", - "│ 2013 ┆ 9 ┆ 30 ┆ null ┆ … ┆ 40 ┆ 2013-09-30T12:00:00Z ┆ null ┆ null │\n", - "└──────┴───────┴─────┴──────────┴───┴────────┴──────────────────────┴──────┴────────────┘" - ] - }, - "execution_count": 27, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "flights.with_columns(\n", " (pl.col(\"dep_delay\") - pl.col(\"arr_delay\")).alias(\"gain\"),\n", @@ -1446,40 +681,10 @@ }, { "cell_type": "code", - "execution_count": 28, + "execution_count": null, "id": "4643b978", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "shape: (6,)
col0
i64
0
6
12
18
24
30
" - ], - "text/plain": [ - "shape: (6,)\n", - "Series: 'col0' [i64]\n", - "[\n", - "\t0\n", - "\t6\n", - "\t12\n", - "\t18\n", - "\t24\n", - "\t30\n", - "]" - ] - }, - "execution_count": 28, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "df[\"col0\"]" ] @@ -1494,43 +699,10 @@ }, { "cell_type": "code", - "execution_count": 29, + "execution_count": null, "id": "219852d8", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "shape: (6, 3)
col0new_column0col2
i64i64i64
002
618
12214
18320
24426
30532
" - ], - "text/plain": [ - "shape: (6, 3)\n", - "┌──────┬─────────────┬──────┐\n", - "│ col0 ┆ new_column0 ┆ col2 │\n", - "│ --- ┆ --- ┆ --- │\n", - "│ i64 ┆ i64 ┆ i64 │\n", - "╞══════╪═════════════╪══════╡\n", - "│ 0 ┆ 0 ┆ 2 │\n", - "│ 6 ┆ 1 ┆ 8 │\n", - "│ 12 ┆ 2 ┆ 14 │\n", - "│ 18 ┆ 3 ┆ 20 │\n", - "│ 24 ┆ 4 ┆ 26 │\n", - "│ 30 ┆ 5 ┆ 32 │\n", - "└──────┴─────────────┴──────┘" - ] - }, - "execution_count": 29, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "df[[\"col0\", \"new_column0\", \"col2\"]]" ] @@ -1545,43 +717,10 @@ }, { "cell_type": "code", - "execution_count": 30, + "execution_count": null, "id": "1bc0cd22", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "shape: (6, 3)
col1col2col3
i64i64i64
002
618
12214
18320
24426
30532
" - ], - "text/plain": [ - "shape: (6, 3)\n", - "┌──────┬──────┬──────┐\n", - "│ col1 ┆ col2 ┆ col3 │\n", - "│ --- ┆ --- ┆ --- │\n", - "│ i64 ┆ i64 ┆ i64 │\n", - "╞══════╪══════╪══════╡\n", - "│ 0 ┆ 0 ┆ 2 │\n", - "│ 6 ┆ 1 ┆ 8 │\n", - "│ 12 ┆ 2 ┆ 14 │\n", - "│ 18 ┆ 3 ┆ 20 │\n", - "│ 24 ┆ 4 ┆ 26 │\n", - "│ 30 ┆ 5 ┆ 32 │\n", - "└──────┴──────┴──────┘" - ] - }, - "execution_count": 30, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "# selecting a single column\n", "df.select(\"col0\")\n", @@ -1603,43 +742,10 @@ }, { "cell_type": "code", - "execution_count": 31, + "execution_count": null, "id": "ed447fb7", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "shape: (6, 3)
col0new_column0col2
i64i64i64
024
6316
12428
18540
24652
30764
" - ], - "text/plain": [ - "shape: (6, 3)\n", - "┌──────┬─────────────┬──────┐\n", - "│ col0 ┆ new_column0 ┆ col2 │\n", - "│ --- ┆ --- ┆ --- │\n", - "│ i64 ┆ i64 ┆ i64 │\n", - "╞══════╪═════════════╪══════╡\n", - "│ 0 ┆ 2 ┆ 4 │\n", - "│ 6 ┆ 3 ┆ 16 │\n", - "│ 12 ┆ 4 ┆ 28 │\n", - "│ 18 ┆ 5 ┆ 40 │\n", - "│ 24 ┆ 6 ┆ 52 │\n", - "│ 30 ┆ 7 ┆ 64 │\n", - "└──────┴─────────────┴──────┘" - ] - }, - "execution_count": 31, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "df.select(pl.col(\"col0\"), pl.col(\"new_column0\") + 2, pl.col(\"col2\") * 2)" ] @@ -1654,39 +760,10 @@ }, { "cell_type": "code", - "execution_count": 32, + "execution_count": null, "id": "eabfd313", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "shape: (2, 3)
col0new_column0col2
i64i64i64
002
618
" - ], - "text/plain": [ - "shape: (2, 3)\n", - "┌──────┬─────────────┬──────┐\n", - "│ col0 ┆ new_column0 ┆ col2 │\n", - "│ --- ┆ --- ┆ --- │\n", - "│ i64 ┆ i64 ┆ i64 │\n", - "╞══════╪═════════════╪══════╡\n", - "│ 0 ┆ 0 ┆ 2 │\n", - "│ 6 ┆ 1 ┆ 8 │\n", - "└──────┴─────────────┴──────┘" - ] - }, - "execution_count": 32, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "df.select(\"col0\", \"new_column0\", \"col2\").slice(0, 2)" ] @@ -1701,48 +778,10 @@ }, { "cell_type": "code", - "execution_count": 33, + "execution_count": null, "id": "aed67406", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "shape: (336_776, 14)
yearmonthdaydep_timesched_dep_timedep_delayarr_timesched_arr_timearr_delayflightair_timedistancehourminute
i64i64i64i64i64i64i64i64i64i64i64i64i64i64
20131151751528308191115452271400515
20131153352948508302017142271416529
20131154254029238503311411601089540
201311544545-110041022-187251831576545
201311554600-6812837-2546111676260
2013930null1455nullnull1634null3393null2131455
2013930null2200nullnull2312null3525null198220
2013930null1210nullnull1330null3461null7641210
2013930null1159nullnull1344null3572null4191159
2013930null840nullnull1020null3531null431840
" - ], - "text/plain": [ - "shape: (336_776, 14)\n", - "┌──────┬───────┬─────┬──────────┬───┬──────────┬──────────┬──────┬────────┐\n", - "│ year ┆ month ┆ day ┆ dep_time ┆ … ┆ air_time ┆ distance ┆ hour ┆ minute │\n", - "│ --- ┆ --- ┆ --- ┆ --- ┆ ┆ --- ┆ --- ┆ --- ┆ --- │\n", - "│ i64 ┆ i64 ┆ i64 ┆ i64 ┆ ┆ i64 ┆ i64 ┆ i64 ┆ i64 │\n", - "╞══════╪═══════╪═════╪══════════╪═══╪══════════╪══════════╪══════╪════════╡\n", - "│ 2013 ┆ 1 ┆ 1 ┆ 517 ┆ … ┆ 227 ┆ 1400 ┆ 5 ┆ 15 │\n", - "│ 2013 ┆ 1 ┆ 1 ┆ 533 ┆ … ┆ 227 ┆ 1416 ┆ 5 ┆ 29 │\n", - "│ 2013 ┆ 1 ┆ 1 ┆ 542 ┆ … ┆ 160 ┆ 1089 ┆ 5 ┆ 40 │\n", - "│ 2013 ┆ 1 ┆ 1 ┆ 544 ┆ … ┆ 183 ┆ 1576 ┆ 5 ┆ 45 │\n", - "│ 2013 ┆ 1 ┆ 1 ┆ 554 ┆ … ┆ 116 ┆ 762 ┆ 6 ┆ 0 │\n", - "│ … ┆ … ┆ … ┆ … ┆ … ┆ … ┆ … ┆ … ┆ … │\n", - "│ 2013 ┆ 9 ┆ 30 ┆ null ┆ … ┆ null ┆ 213 ┆ 14 ┆ 55 │\n", - "│ 2013 ┆ 9 ┆ 30 ┆ null ┆ … ┆ null ┆ 198 ┆ 22 ┆ 0 │\n", - "│ 2013 ┆ 9 ┆ 30 ┆ null ┆ … ┆ null ┆ 764 ┆ 12 ┆ 10 │\n", - "│ 2013 ┆ 9 ┆ 30 ┆ null ┆ … ┆ null ┆ 419 ┆ 11 ┆ 59 │\n", - "│ 2013 ┆ 9 ┆ 30 ┆ null ┆ … ┆ null ┆ 431 ┆ 8 ┆ 40 │\n", - "└──────┴───────┴─────┴──────────┴───┴──────────┴──────────┴──────┴────────┘" - ] - }, - "execution_count": 33, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "flights.select(pl.col(pl.Int64))" ] @@ -1757,48 +796,10 @@ }, { "cell_type": "code", - "execution_count": 34, + "execution_count": null, "id": "62f578d1", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "shape: (336_776, 14)
yearmonthdaydep_timesched_dep_timedep_delayarr_timesched_arr_timearr_delayflightair_timedistancehourminute
i64i64i64i64i64i64i64i64i64i64i64i64i64i64
20131151751528308191115452271400515
20131153352948508302017142271416529
20131154254029238503311411601089540
201311544545-110041022-187251831576545
201311554600-6812837-2546111676260
2013930null1455nullnull1634null3393null2131455
2013930null2200nullnull2312null3525null198220
2013930null1210nullnull1330null3461null7641210
2013930null1159nullnull1344null3572null4191159
2013930null840nullnull1020null3531null431840
" - ], - "text/plain": [ - "shape: (336_776, 14)\n", - "┌──────┬───────┬─────┬──────────┬───┬──────────┬──────────┬──────┬────────┐\n", - "│ year ┆ month ┆ day ┆ dep_time ┆ … ┆ air_time ┆ distance ┆ hour ┆ minute │\n", - "│ --- ┆ --- ┆ --- ┆ --- ┆ ┆ --- ┆ --- ┆ --- ┆ --- │\n", - "│ i64 ┆ i64 ┆ i64 ┆ i64 ┆ ┆ i64 ┆ i64 ┆ i64 ┆ i64 │\n", - "╞══════╪═══════╪═════╪══════════╪═══╪══════════╪══════════╪══════╪════════╡\n", - "│ 2013 ┆ 1 ┆ 1 ┆ 517 ┆ … ┆ 227 ┆ 1400 ┆ 5 ┆ 15 │\n", - "│ 2013 ┆ 1 ┆ 1 ┆ 533 ┆ … ┆ 227 ┆ 1416 ┆ 5 ┆ 29 │\n", - "│ 2013 ┆ 1 ┆ 1 ┆ 542 ┆ … ┆ 160 ┆ 1089 ┆ 5 ┆ 40 │\n", - "│ 2013 ┆ 1 ┆ 1 ┆ 544 ┆ … ┆ 183 ┆ 1576 ┆ 5 ┆ 45 │\n", - "│ 2013 ┆ 1 ┆ 1 ┆ 554 ┆ … ┆ 116 ┆ 762 ┆ 6 ┆ 0 │\n", - "│ … ┆ … ┆ … ┆ … ┆ … ┆ … ┆ … ┆ … ┆ … │\n", - "│ 2013 ┆ 9 ┆ 30 ┆ null ┆ … ┆ null ┆ 213 ┆ 14 ┆ 55 │\n", - "│ 2013 ┆ 9 ┆ 30 ┆ null ┆ … ┆ null ┆ 198 ┆ 22 ┆ 0 │\n", - "│ 2013 ┆ 9 ┆ 30 ┆ null ┆ … ┆ null ┆ 764 ┆ 12 ┆ 10 │\n", - "│ 2013 ┆ 9 ┆ 30 ┆ null ┆ … ┆ null ┆ 419 ┆ 11 ┆ 59 │\n", - "│ 2013 ┆ 9 ┆ 30 ┆ null ┆ … ┆ null ┆ 431 ┆ 8 ┆ 40 │\n", - "└──────┴───────┴─────┴──────────┴───┴──────────┴──────────┴──────┴────────┘" - ] - }, - "execution_count": 34, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "import polars.selectors as S\n", "\n", @@ -1811,48 +812,10 @@ }, { "cell_type": "code", - "execution_count": 35, + "execution_count": null, "id": "d4e486db", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "shape: (336_776, 2)
arr_timearr_delay
i64i64
83011
85020
92333
1004-18
812-25
nullnull
nullnull
nullnull
nullnull
nullnull
" - ], - "text/plain": [ - "shape: (336_776, 2)\n", - "┌──────────┬───────────┐\n", - "│ arr_time ┆ arr_delay │\n", - "│ --- ┆ --- │\n", - "│ i64 ┆ i64 │\n", - "╞══════════╪═══════════╡\n", - "│ 830 ┆ 11 │\n", - "│ 850 ┆ 20 │\n", - "│ 923 ┆ 33 │\n", - "│ 1004 ┆ -18 │\n", - "│ 812 ┆ -25 │\n", - "│ … ┆ … │\n", - "│ null ┆ null │\n", - "│ null ┆ null │\n", - "│ null ┆ null │\n", - "│ null ┆ null │\n", - "│ null ┆ null │\n", - "└──────────┴───────────┘" - ] - }, - "execution_count": 35, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "# Select columns that contain \"delay\" in their name\n", "flights.select(S.contains(\"delay\"))\n", @@ -1881,43 +844,10 @@ }, { "cell_type": "code", - "execution_count": 36, + "execution_count": null, "id": "5e5c0dd0", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "shape: (6, 10)
col0col1col2lettersnamescol5fruitnew_column0new_column1new_column2
i64i64i64i64i64i64stri64i32i32
012345"apple"056
67891011"orange"156
121314151617"pineapple"256
181920212223"mango"356
242526272829"kiwi"456
303132333435"lemon"556
" - ], - "text/plain": [ - "shape: (6, 10)\n", - "┌──────┬──────┬──────┬─────────┬───┬───────────┬─────────────┬─────────────┬─────────────┐\n", - "│ col0 ┆ col1 ┆ col2 ┆ letters ┆ … ┆ fruit ┆ new_column0 ┆ new_column1 ┆ new_column2 │\n", - "│ --- ┆ --- ┆ --- ┆ --- ┆ ┆ --- ┆ --- ┆ --- ┆ --- │\n", - "│ i64 ┆ i64 ┆ i64 ┆ i64 ┆ ┆ str ┆ i64 ┆ i32 ┆ i32 │\n", - "╞══════╪══════╪══════╪═════════╪═══╪═══════════╪═════════════╪═════════════╪═════════════╡\n", - "│ 0 ┆ 1 ┆ 2 ┆ 3 ┆ … ┆ apple ┆ 0 ┆ 5 ┆ 6 │\n", - "│ 6 ┆ 7 ┆ 8 ┆ 9 ┆ … ┆ orange ┆ 1 ┆ 5 ┆ 6 │\n", - "│ 12 ┆ 13 ┆ 14 ┆ 15 ┆ … ┆ pineapple ┆ 2 ┆ 5 ┆ 6 │\n", - "│ 18 ┆ 19 ┆ 20 ┆ 21 ┆ … ┆ mango ┆ 3 ┆ 5 ┆ 6 │\n", - "│ 24 ┆ 25 ┆ 26 ┆ 27 ┆ … ┆ kiwi ┆ 4 ┆ 5 ┆ 6 │\n", - "│ 30 ┆ 31 ┆ 32 ┆ 33 ┆ … ┆ lemon ┆ 5 ┆ 5 ┆ 6 │\n", - "└──────┴──────┴──────┴─────────┴───┴───────────┴─────────────┴─────────────┴─────────────┘" - ] - }, - "execution_count": 36, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "df.rename({\"col3\": \"letters\", \"col4\": \"names\", \"col6\": \"fruit\"})" ] @@ -1932,43 +862,10 @@ }, { "cell_type": "code", - "execution_count": 37, + "execution_count": null, "id": "482d301f", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "shape: (6, 10)
COL0COL1COL2COL3COL4COL5COL6NEW_COLUMN0NEW_COLUMN1NEW_COLUMN2
i64i64i64i64i64i64stri64i32i32
012345"apple"056
67891011"orange"156
121314151617"pineapple"256
181920212223"mango"356
242526272829"kiwi"456
303132333435"lemon"556
" - ], - "text/plain": [ - "shape: (6, 10)\n", - "┌──────┬──────┬──────┬──────┬───┬───────────┬─────────────┬─────────────┬─────────────┐\n", - "│ COL0 ┆ COL1 ┆ COL2 ┆ COL3 ┆ … ┆ COL6 ┆ NEW_COLUMN0 ┆ NEW_COLUMN1 ┆ NEW_COLUMN2 │\n", - "│ --- ┆ --- ┆ --- ┆ --- ┆ ┆ --- ┆ --- ┆ --- ┆ --- │\n", - "│ i64 ┆ i64 ┆ i64 ┆ i64 ┆ ┆ str ┆ i64 ┆ i32 ┆ i32 │\n", - "╞══════╪══════╪══════╪══════╪═══╪═══════════╪═════════════╪═════════════╪═════════════╡\n", - "│ 0 ┆ 1 ┆ 2 ┆ 3 ┆ … ┆ apple ┆ 0 ┆ 5 ┆ 6 │\n", - "│ 6 ┆ 7 ┆ 8 ┆ 9 ┆ … ┆ orange ┆ 1 ┆ 5 ┆ 6 │\n", - "│ 12 ┆ 13 ┆ 14 ┆ 15 ┆ … ┆ pineapple ┆ 2 ┆ 5 ┆ 6 │\n", - "│ 18 ┆ 19 ┆ 20 ┆ 21 ┆ … ┆ mango ┆ 3 ┆ 5 ┆ 6 │\n", - "│ 24 ┆ 25 ┆ 26 ┆ 27 ┆ … ┆ kiwi ┆ 4 ┆ 5 ┆ 6 │\n", - "│ 30 ┆ 31 ┆ 32 ┆ 33 ┆ … ┆ lemon ┆ 5 ┆ 5 ┆ 6 │\n", - "└──────┴──────┴──────┴──────┴───┴───────────┴─────────────┴─────────────┴─────────────┘" - ] - }, - "execution_count": 37, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "df.rename(lambda column_name: column_name.upper())" ] @@ -2005,43 +902,10 @@ }, { "cell_type": "code", - "execution_count": 38, + "execution_count": null, "id": "b7c0d519", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "shape: (6, 6)
col0col1col2col3col4col5
i64i64i64i64i64i64
012345
67891011
121314151617
181920212223
242526272829
303132333435
" - ], - "text/plain": [ - "shape: (6, 6)\n", - "┌──────┬──────┬──────┬──────┬──────┬──────┐\n", - "│ col0 ┆ col1 ┆ col2 ┆ col3 ┆ col4 ┆ col5 │\n", - "│ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │\n", - "│ i64 ┆ i64 ┆ i64 ┆ i64 ┆ i64 ┆ i64 │\n", - "╞══════╪══════╪══════╪══════╪══════╪══════╡\n", - "│ 0 ┆ 1 ┆ 2 ┆ 3 ┆ 4 ┆ 5 │\n", - "│ 6 ┆ 7 ┆ 8 ┆ 9 ┆ 10 ┆ 11 │\n", - "│ 12 ┆ 13 ┆ 14 ┆ 15 ┆ 16 ┆ 17 │\n", - "│ 18 ┆ 19 ┆ 20 ┆ 21 ┆ 22 ┆ 23 │\n", - "│ 24 ┆ 25 ┆ 26 ┆ 27 ┆ 28 ┆ 29 │\n", - "│ 30 ┆ 31 ┆ 32 ┆ 33 ┆ 34 ┆ 35 │\n", - "└──────┴──────┴──────┴──────┴──────┴──────┘" - ] - }, - "execution_count": 38, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "df = pl.DataFrame(\n", " data=np.reshape(range(36), (6, 6)), schema=[\"col\" + str(i) for i in range(6)]\n", @@ -2051,43 +915,10 @@ }, { "cell_type": "code", - "execution_count": 39, + "execution_count": null, "id": "3c2029cc", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "shape: (6, 6)
col5col3col1col4col2col0
i64i64i64i64i64i64
531420
11971086
171513161412
232119222018
292725282624
353331343230
" - ], - "text/plain": [ - "shape: (6, 6)\n", - "┌──────┬──────┬──────┬──────┬──────┬──────┐\n", - "│ col5 ┆ col3 ┆ col1 ┆ col4 ┆ col2 ┆ col0 │\n", - "│ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │\n", - "│ i64 ┆ i64 ┆ i64 ┆ i64 ┆ i64 ┆ i64 │\n", - "╞══════╪══════╪══════╪══════╪══════╪══════╡\n", - "│ 5 ┆ 3 ┆ 1 ┆ 4 ┆ 2 ┆ 0 │\n", - "│ 11 ┆ 9 ┆ 7 ┆ 10 ┆ 8 ┆ 6 │\n", - "│ 17 ┆ 15 ┆ 13 ┆ 16 ┆ 14 ┆ 12 │\n", - "│ 23 ┆ 21 ┆ 19 ┆ 22 ┆ 20 ┆ 18 │\n", - "│ 29 ┆ 27 ┆ 25 ┆ 28 ┆ 26 ┆ 24 │\n", - "│ 35 ┆ 33 ┆ 31 ┆ 34 ┆ 32 ┆ 30 │\n", - "└──────┴──────┴──────┴──────┴──────┴──────┘" - ] - }, - "execution_count": 39, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "df = df.select([\"col5\", \"col3\", \"col1\", \"col4\", \"col2\", \"col0\"])\n", "df" @@ -2103,43 +934,10 @@ }, { "cell_type": "code", - "execution_count": 40, + "execution_count": null, "id": "e7ab5f64", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "shape: (6, 6)
col5col4col3col2col1col0
i64i64i64i64i64i64
543210
11109876
171615141312
232221201918
292827262524
353433323130
" - ], - "text/plain": [ - "shape: (6, 6)\n", - "┌──────┬──────┬──────┬──────┬──────┬──────┐\n", - "│ col5 ┆ col4 ┆ col3 ┆ col2 ┆ col1 ┆ col0 │\n", - "│ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │\n", - "│ i64 ┆ i64 ┆ i64 ┆ i64 ┆ i64 ┆ i64 │\n", - "╞══════╪══════╪══════╪══════╪══════╪══════╡\n", - "│ 5 ┆ 4 ┆ 3 ┆ 2 ┆ 1 ┆ 0 │\n", - "│ 11 ┆ 10 ┆ 9 ┆ 8 ┆ 7 ┆ 6 │\n", - "│ 17 ┆ 16 ┆ 15 ┆ 14 ┆ 13 ┆ 12 │\n", - "│ 23 ┆ 22 ┆ 21 ┆ 20 ┆ 19 ┆ 18 │\n", - "│ 29 ┆ 28 ┆ 27 ┆ 26 ┆ 25 ┆ 24 │\n", - "│ 35 ┆ 34 ┆ 33 ┆ 32 ┆ 31 ┆ 30 │\n", - "└──────┴──────┴──────┴──────┴──────┴──────┘" - ] - }, - "execution_count": 40, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "# Alphabetical order\n", "df.select(sorted(df.columns))\n", @@ -2184,48 +982,10 @@ }, { "cell_type": "code", - "execution_count": 41, + "execution_count": null, "id": "62e540c0", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "shape: (12, 2)
monthdep_delay
i64f64
721.727787
620.846332
512.986859
1216.576688
210.816843
812.61104
110.036665
313.227076
96.722476
106.243988
" - ], - "text/plain": [ - "shape: (12, 2)\n", - "┌───────┬───────────┐\n", - "│ month ┆ dep_delay │\n", - "│ --- ┆ --- │\n", - "│ i64 ┆ f64 │\n", - "╞═══════╪═══════════╡\n", - "│ 7 ┆ 21.727787 │\n", - "│ 6 ┆ 20.846332 │\n", - "│ 5 ┆ 12.986859 │\n", - "│ 12 ┆ 16.576688 │\n", - "│ 2 ┆ 10.816843 │\n", - "│ … ┆ … │\n", - "│ 8 ┆ 12.61104 │\n", - "│ 1 ┆ 10.036665 │\n", - "│ 3 ┆ 13.227076 │\n", - "│ 9 ┆ 6.722476 │\n", - "│ 10 ┆ 6.243988 │\n", - "└───────┴───────────┘" - ] - }, - "execution_count": 41, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "flights.group_by(\"month\").agg(pl.col(\"dep_delay\").mean())" ] @@ -2263,48 +1023,10 @@ }, { "cell_type": "code", - "execution_count": 42, + "execution_count": null, "id": "af588177", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "shape: (12, 3)
monthmean_delaycount_flights
i64f64u32
313.22707627973
1216.57668827110
620.84633227234
96.72247627122
110.03666526483
721.72778728485
210.81684323690
812.6110428841
512.98685928233
115.43536227035
" - ], - "text/plain": [ - "shape: (12, 3)\n", - "┌───────┬────────────┬───────────────┐\n", - "│ month ┆ mean_delay ┆ count_flights │\n", - "│ --- ┆ --- ┆ --- │\n", - "│ i64 ┆ f64 ┆ u32 │\n", - "╞═══════╪════════════╪═══════════════╡\n", - "│ 3 ┆ 13.227076 ┆ 27973 │\n", - "│ 12 ┆ 16.576688 ┆ 27110 │\n", - "│ 6 ┆ 20.846332 ┆ 27234 │\n", - "│ 9 ┆ 6.722476 ┆ 27122 │\n", - "│ 1 ┆ 10.036665 ┆ 26483 │\n", - "│ … ┆ … ┆ … │\n", - "│ 7 ┆ 21.727787 ┆ 28485 │\n", - "│ 2 ┆ 10.816843 ┆ 23690 │\n", - "│ 8 ┆ 12.61104 ┆ 28841 │\n", - "│ 5 ┆ 12.986859 ┆ 28233 │\n", - "│ 11 ┆ 5.435362 ┆ 27035 │\n", - "└───────┴────────────┴───────────────┘" - ] - }, - "execution_count": 42, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "# Multiple aggregations using polars' syntactic sugar (shorthand) for mean and count\n", "flights.group_by(\"month\").agg(\n", @@ -2333,48 +1055,10 @@ }, { "cell_type": "code", - "execution_count": 43, + "execution_count": null, "id": "b0e56ff1", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "shape: (12, 4)
monthyearmean_delaycount_flights
i64i64f64u32
2201310.81684323690
6201320.84633227234
7201321.72778728485
1020136.24398828653
1201310.03666526483
920136.72247627122
3201313.22707627973
1120135.43536227035
12201316.57668827110
4201313.93803827662
" - ], - "text/plain": [ - "shape: (12, 4)\n", - "┌───────┬──────┬────────────┬───────────────┐\n", - "│ month ┆ year ┆ mean_delay ┆ count_flights │\n", - "│ --- ┆ --- ┆ --- ┆ --- │\n", - "│ i64 ┆ i64 ┆ f64 ┆ u32 │\n", - "╞═══════╪══════╪════════════╪═══════════════╡\n", - "│ 2 ┆ 2013 ┆ 10.816843 ┆ 23690 │\n", - "│ 6 ┆ 2013 ┆ 20.846332 ┆ 27234 │\n", - "│ 7 ┆ 2013 ┆ 21.727787 ┆ 28485 │\n", - "│ 10 ┆ 2013 ┆ 6.243988 ┆ 28653 │\n", - "│ 1 ┆ 2013 ┆ 10.036665 ┆ 26483 │\n", - "│ … ┆ … ┆ … ┆ … │\n", - "│ 9 ┆ 2013 ┆ 6.722476 ┆ 27122 │\n", - "│ 3 ┆ 2013 ┆ 13.227076 ┆ 27973 │\n", - "│ 11 ┆ 2013 ┆ 5.435362 ┆ 27035 │\n", - "│ 12 ┆ 2013 ┆ 16.576688 ┆ 27110 │\n", - "│ 4 ┆ 2013 ┆ 13.938038 ┆ 27662 │\n", - "└───────┴──────┴────────────┴───────────────┘" - ] - }, - "execution_count": 43, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "month_year_delay = flights.group_by(\"month\", \"year\").agg(\n", " mean_delay=pl.mean(\"dep_delay\"),\n", diff --git a/data-visualise.ipynb b/data-visualise.ipynb index 0e2feeb..8f3d721 100644 --- a/data-visualise.ipynb +++ b/data-visualise.ipynb @@ -62,51 +62,10 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "id": "a86fb211", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "\n", - "
\n", - " \n", - " " - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], + "outputs": [], "source": [ "import polars as pl\n", "from lets_plot import *\n", @@ -163,49 +122,10 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": null, "id": "0cf986aa", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "shape: (344, 8)
speciesislandbill_length_mmbill_depth_mmflipper_length_mmbody_mass_gsexyear
strstrf64f64f64f64stri64
"Adelie""Torgersen"39.118.7181.03750.0"male"2007
"Adelie""Torgersen"39.517.4186.03800.0"female"2007
"Adelie""Torgersen"40.318.0195.03250.0"female"2007
"Adelie""Torgersen"nullnullnullnullnull2007
"Adelie""Torgersen"36.719.3193.03450.0"female"2007
"Chinstrap""Dream"55.819.8207.04000.0"male"2009
"Chinstrap""Dream"43.518.1202.03400.0"female"2009
"Chinstrap""Dream"49.618.2193.03775.0"male"2009
"Chinstrap""Dream"50.819.0210.04100.0"male"2009
"Chinstrap""Dream"50.218.7198.03775.0"female"2009
" - ], - "text/plain": [ - "shape: (344, 8)\n", - "┌───────────┬───────────┬──────────────┬──────────────┬──────────────┬─────────────┬────────┬──────┐\n", - "│ species ┆ island ┆ bill_length_ ┆ bill_depth_m ┆ flipper_leng ┆ body_mass_g ┆ sex ┆ year │\n", - "│ --- ┆ --- ┆ mm ┆ m ┆ th_mm ┆ --- ┆ --- ┆ --- │\n", - "│ str ┆ str ┆ --- ┆ --- ┆ --- ┆ f64 ┆ str ┆ i64 │\n", - "│ ┆ ┆ f64 ┆ f64 ┆ f64 ┆ ┆ ┆ │\n", - "╞═══════════╪═══════════╪══════════════╪══════════════╪══════════════╪═════════════╪════════╪══════╡\n", - "│ Adelie ┆ Torgersen ┆ 39.1 ┆ 18.7 ┆ 181.0 ┆ 3750.0 ┆ male ┆ 2007 │\n", - "│ Adelie ┆ Torgersen ┆ 39.5 ┆ 17.4 ┆ 186.0 ┆ 3800.0 ┆ female ┆ 2007 │\n", - "│ Adelie ┆ Torgersen ┆ 40.3 ┆ 18.0 ┆ 195.0 ┆ 3250.0 ┆ female ┆ 2007 │\n", - "│ Adelie ┆ Torgersen ┆ null ┆ null ┆ null ┆ null ┆ null ┆ 2007 │\n", - "│ Adelie ┆ Torgersen ┆ 36.7 ┆ 19.3 ┆ 193.0 ┆ 3450.0 ┆ female ┆ 2007 │\n", - "│ … ┆ … ┆ … ┆ … ┆ … ┆ … ┆ … ┆ … │\n", - "│ Chinstrap ┆ Dream ┆ 55.8 ┆ 19.8 ┆ 207.0 ┆ 4000.0 ┆ male ┆ 2009 │\n", - "│ Chinstrap ┆ Dream ┆ 43.5 ┆ 18.1 ┆ 202.0 ┆ 3400.0 ┆ female ┆ 2009 │\n", - "│ Chinstrap ┆ Dream ┆ 49.6 ┆ 18.2 ┆ 193.0 ┆ 3775.0 ┆ male ┆ 2009 │\n", - "│ Chinstrap ┆ Dream ┆ 50.8 ┆ 19.0 ┆ 210.0 ┆ 4100.0 ┆ male ┆ 2009 │\n", - "│ Chinstrap ┆ Dream ┆ 50.2 ┆ 18.7 ┆ 198.0 ┆ 3775.0 ┆ female ┆ 2009 │\n", - "└───────────┴───────────┴──────────────┴──────────────┴──────────────┴─────────────┴────────┴──────┘" - ] - }, - "execution_count": 2, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "penguins = pl.from_pandas(load_penguins())\n", "penguins" @@ -221,43 +141,10 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": null, "id": "23c75ba7", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "shape: (5, 8)
speciesislandbill_length_mmbill_depth_mmflipper_length_mmbody_mass_gsexyear
strstrf64f64f64f64stri64
"Adelie""Torgersen"39.118.7181.03750.0"male"2007
"Adelie""Torgersen"39.517.4186.03800.0"female"2007
"Adelie""Torgersen"40.318.0195.03250.0"female"2007
"Adelie""Torgersen"nullnullnullnullnull2007
"Adelie""Torgersen"36.719.3193.03450.0"female"2007
" - ], - "text/plain": [ - "shape: (5, 8)\n", - "┌─────────┬───────────┬───────────────┬───────────────┬──────────────┬─────────────┬────────┬──────┐\n", - "│ species ┆ island ┆ bill_length_m ┆ bill_depth_mm ┆ flipper_leng ┆ body_mass_g ┆ sex ┆ year │\n", - "│ --- ┆ --- ┆ m ┆ --- ┆ th_mm ┆ --- ┆ --- ┆ --- │\n", - "│ str ┆ str ┆ --- ┆ f64 ┆ --- ┆ f64 ┆ str ┆ i64 │\n", - "│ ┆ ┆ f64 ┆ ┆ f64 ┆ ┆ ┆ │\n", - "╞═════════╪═══════════╪═══════════════╪═══════════════╪══════════════╪═════════════╪════════╪══════╡\n", - "│ Adelie ┆ Torgersen ┆ 39.1 ┆ 18.7 ┆ 181.0 ┆ 3750.0 ┆ male ┆ 2007 │\n", - "│ Adelie ┆ Torgersen ┆ 39.5 ┆ 17.4 ┆ 186.0 ┆ 3800.0 ┆ female ┆ 2007 │\n", - "│ Adelie ┆ Torgersen ┆ 40.3 ┆ 18.0 ┆ 195.0 ┆ 3250.0 ┆ female ┆ 2007 │\n", - "│ Adelie ┆ Torgersen ┆ null ┆ null ┆ null ┆ null ┆ null ┆ 2007 │\n", - "│ Adelie ┆ Torgersen ┆ 36.7 ┆ 19.3 ┆ 193.0 ┆ 3450.0 ┆ female ┆ 2007 │\n", - "└─────────┴───────────┴───────────────┴───────────────┴──────────────┴─────────────┴────────┴──────┘" - ] - }, - "execution_count": 3, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "penguins.head()" ] @@ -290,158 +177,14 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": null, "id": "574fe39f", "metadata": { "tags": [ "remove-input" ] }, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - " " - ], - "text/plain": [ - "" - ] - }, - "execution_count": 4, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "(\n", " ggplot(penguins, aes(x=\"flipper_length_mm\", y=\"body_mass_g\"))\n", @@ -504,120 +247,10 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": null, "id": "15c3848b", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - " " - ], - "text/plain": [ - "" - ] - }, - "execution_count": 5, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "(\n", " ggplot(data=penguins, mapping=aes(x=\"flipper_length_mm\", y=\"body_mass_g\"))\n", @@ -658,122 +291,10 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": null, "id": "6b0e1c38", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - " " - ], - "text/plain": [ - "" - ] - }, - "execution_count": 6, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "(\n", " ggplot(\n", @@ -804,136 +325,10 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": null, "id": "943efd36", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - " " - ], - "text/plain": [ - "" - ] - }, - "execution_count": 7, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "(\n", " ggplot(\n", @@ -961,135 +356,10 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": null, "id": "9e12b3bf", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - " " - ], - "text/plain": [ - "" - ] - }, - "execution_count": 8, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "(\n", " ggplot(data=penguins, mapping=aes(x=\"flipper_length_mm\", y=\"body_mass_g\"))\n", @@ -1112,136 +382,10 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": null, "id": "17d5803b", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - " " - ], - "text/plain": [ - "" - ] - }, - "execution_count": 9, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "(\n", " ggplot(data=penguins, mapping=aes(x=\"flipper_length_mm\", y=\"body_mass_g\"))\n", @@ -1262,154 +406,10 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": null, "id": "b9b98ec4", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - " " - ], - "text/plain": [ - "" - ] - }, - "execution_count": 10, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "(\n", " ggplot(data=penguins, mapping=aes(x=\"flipper_length_mm\", y=\"body_mass_g\"))\n", @@ -1470,138 +470,14 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": null, "id": "7c76be4b", "metadata": { "tags": [ "remove-input" ] }, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - " " - ], - "text/plain": [ - "" - ] - }, - "execution_count": 11, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "(\n", " ggplot(data=penguins, mapping=aes(x=\"flipper_length_mm\", y=\"body_mass_g\"))\n", @@ -1714,119 +590,10 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": null, "id": "21b45061", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - " " - ], - "text/plain": [ - "" - ] - }, - "execution_count": 12, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "(ggplot(penguins, aes(x=\"species\")) + geom_bar())" ] @@ -1843,43 +610,10 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": null, "id": "4e046bb2", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "shape: (5, 8)
speciesislandbill_length_mmbill_depth_mmflipper_length_mmbody_mass_gsexyear
catstrf64f64f64f64stri64
"Adelie""Torgersen"39.118.7181.03750.0"male"2007
"Adelie""Torgersen"39.517.4186.03800.0"female"2007
"Adelie""Torgersen"40.318.0195.03250.0"female"2007
"Adelie""Torgersen"nullnullnullnullnull2007
"Adelie""Torgersen"36.719.3193.03450.0"female"2007
" - ], - "text/plain": [ - "shape: (5, 8)\n", - "┌─────────┬───────────┬───────────────┬───────────────┬──────────────┬─────────────┬────────┬──────┐\n", - "│ species ┆ island ┆ bill_length_m ┆ bill_depth_mm ┆ flipper_leng ┆ body_mass_g ┆ sex ┆ year │\n", - "│ --- ┆ --- ┆ m ┆ --- ┆ th_mm ┆ --- ┆ --- ┆ --- │\n", - "│ cat ┆ str ┆ --- ┆ f64 ┆ --- ┆ f64 ┆ str ┆ i64 │\n", - "│ ┆ ┆ f64 ┆ ┆ f64 ┆ ┆ ┆ │\n", - "╞═════════╪═══════════╪═══════════════╪═══════════════╪══════════════╪═════════════╪════════╪══════╡\n", - "│ Adelie ┆ Torgersen ┆ 39.1 ┆ 18.7 ┆ 181.0 ┆ 3750.0 ┆ male ┆ 2007 │\n", - "│ Adelie ┆ Torgersen ┆ 39.5 ┆ 17.4 ┆ 186.0 ┆ 3800.0 ┆ female ┆ 2007 │\n", - "│ Adelie ┆ Torgersen ┆ 40.3 ┆ 18.0 ┆ 195.0 ┆ 3250.0 ┆ female ┆ 2007 │\n", - "│ Adelie ┆ Torgersen ┆ null ┆ null ┆ null ┆ null ┆ null ┆ 2007 │\n", - "│ Adelie ┆ Torgersen ┆ 36.7 ┆ 19.3 ┆ 193.0 ┆ 3450.0 ┆ female ┆ 2007 │\n", - "└─────────┴───────────┴───────────────┴───────────────┴──────────────┴─────────────┴────────┴──────┘" - ] - }, - "execution_count": 13, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "penguins = penguins.cast({\"species\": pl.Categorical})\n", "penguins.head()" @@ -1907,120 +641,10 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": null, "id": "93675336", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - " " - ], - "text/plain": [ - "" - ] - }, - "execution_count": 14, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "(ggplot(penguins, aes(x=\"body_mass_g\")) + geom_histogram(binwidth=200))" ] @@ -2056,120 +680,10 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": null, "id": "6a58021f", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - " " - ], - "text/plain": [ - "" - ] - }, - "execution_count": 15, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "(ggplot(penguins, aes(x=\"body_mass_g\")) + geom_density())" ] @@ -2238,141 +752,10 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": null, "id": "a636947a", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - " " - ], - "text/plain": [ - "" - ] - }, - "execution_count": 16, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "(ggplot(penguins, aes(x=\"species\", y=\"body_mass_g\")) + geom_boxplot())" ] @@ -2387,123 +770,10 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": null, "id": "9b85a2df", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - " " - ], - "text/plain": [ - "" - ] - }, - "execution_count": 17, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "(ggplot(penguins, aes(x=\"body_mass_g\", color=\"species\")) + geom_density(size=2))" ] @@ -2522,124 +792,10 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": null, "id": "353189e5", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - " " - ], - "text/plain": [ - "" - ] - }, - "execution_count": 18, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "(\n", " ggplot(penguins, aes(x=\"body_mass_g\", color=\"species\", fill=\"species\"))\n", @@ -2677,121 +833,10 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": null, "id": "e091e211", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - " " - ], - "text/plain": [ - "" - ] - }, - "execution_count": 19, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "(ggplot(penguins, aes(x=\"island\", fill=\"species\")) + geom_bar())" ] @@ -2808,122 +853,10 @@ }, { "cell_type": "code", - "execution_count": 20, + "execution_count": null, "id": "7df8fb7a", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - " " - ], - "text/plain": [ - "" - ] - }, - "execution_count": 20, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "(ggplot(penguins, aes(x=\"island\", fill=\"species\")) + geom_bar(position=\"fill\"))" ] @@ -2949,120 +882,10 @@ }, { "cell_type": "code", - "execution_count": 21, + "execution_count": null, "id": "5066527d", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - " " - ], - "text/plain": [ - "" - ] - }, - "execution_count": 21, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "(ggplot(penguins, aes(x=\"flipper_length_mm\", y=\"body_mass_g\")) + geom_point())" ] @@ -3081,124 +904,10 @@ }, { "cell_type": "code", - "execution_count": 22, + "execution_count": null, "id": "8ca23d34", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - " " - ], - "text/plain": [ - "" - ] - }, - "execution_count": 22, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "(\n", " ggplot(penguins, aes(x=\"flipper_length_mm\", y=\"body_mass_g\"))\n", @@ -3222,130 +931,10 @@ }, { "cell_type": "code", - "execution_count": 23, + "execution_count": null, "id": "00dd36e3", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - " " - ], - "text/plain": [ - "" - ] - }, - "execution_count": 23, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "(\n", " ggplot(penguins, aes(x=\"flipper_length_mm\", y=\"body_mass_g\"))\n", @@ -3415,21 +1004,10 @@ }, { "cell_type": "code", - "execution_count": 24, + "execution_count": null, "id": "3410634b", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "'/Users/omagic/Documents/GitHub/python4DSpolars/lets-plot-images/penguin-plot.svg'" - ] - }, - "execution_count": 24, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "plotted_data = (\n", " ggplot(penguins, aes(x=\"flipper_length_mm\", y=\"body_mass_g\")) + geom_point()\n", @@ -3451,7 +1029,7 @@ }, { "cell_type": "code", - "execution_count": 25, + "execution_count": null, "id": "852afe51", "metadata": { "tags": [ diff --git a/databases.ipynb b/databases.ipynb index 3c786b9..b183959 100644 --- a/databases.ipynb +++ b/databases.ipynb @@ -22,7 +22,7 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "id": "9e54fbf6", "metadata": {}, "outputs": [], @@ -92,30 +92,10 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": null, "id": "970d2c19", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "[(1, 'AC/DC'),\n", - " (2, 'Accept'),\n", - " (3, 'Aerosmith'),\n", - " (4, 'Alanis Morissette'),\n", - " (5, 'Alice In Chains'),\n", - " (6, 'Antônio Carlos Jobim'),\n", - " (7, 'Apocalyptica'),\n", - " (8, 'Audioslave'),\n", - " (9, 'BackBeat'),\n", - " (10, 'Billy Cobham')]" - ] - }, - "execution_count": 2, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "import sqlite3\n", "\n", @@ -136,47 +116,10 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": null, "id": "c5871b6e", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "shape: (10, 2)
column_0column_1
i64str
1"AC/DC"
2"Accept"
3"Aerosmith"
4"Alanis Morissette"
5"Alice In Chains"
6"Antônio Carlos Jobim"
7"Apocalyptica"
8"Audioslave"
9"BackBeat"
10"Billy Cobham"
" - ], - "text/plain": [ - "shape: (10, 2)\n", - "┌──────────┬──────────────────────┐\n", - "│ column_0 ┆ column_1 │\n", - "│ --- ┆ --- │\n", - "│ i64 ┆ str │\n", - "╞══════════╪══════════════════════╡\n", - "│ 1 ┆ AC/DC │\n", - "│ 2 ┆ Accept │\n", - "│ 3 ┆ Aerosmith │\n", - "│ 4 ┆ Alanis Morissette │\n", - "│ 5 ┆ Alice In Chains │\n", - "│ 6 ┆ Antônio Carlos Jobim │\n", - "│ 7 ┆ Apocalyptica │\n", - "│ 8 ┆ Audioslave │\n", - "│ 9 ┆ BackBeat │\n", - "│ 10 ┆ Billy Cobham │\n", - "└──────────┴──────────────────────┘" - ] - }, - "execution_count": 3, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "import polars as pl\n", "\n", @@ -195,21 +138,10 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": null, "id": "62791eab", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "['ArtistId', 'Name']" - ] - }, - "execution_count": 4, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "[i[0] for i in cursor.description]" ] @@ -226,7 +158,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": null, "id": "0074d4d3", "metadata": {}, "outputs": [], @@ -249,7 +181,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": null, "id": "8020a73a", "metadata": {}, "outputs": [], @@ -270,21 +202,10 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": null, "id": "8a8354b2", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "[('US', 1.0, 3), ('UK', 0.6, 2), ('France', 0.8, 1)]" - ] - }, - "execution_count": 7, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "con_new.execute(\"SELECT * FROM test\").fetchall()" ] @@ -299,7 +220,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": null, "id": "927e804d", "metadata": { "tags": [ @@ -357,30 +278,10 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": null, "id": "be55f957", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "[('Bodies', 180035),\n", - " ('Vivo Isolado Do Mundo', 180035),\n", - " ('Elvis Ate America', 180166),\n", - " ('Remote Control', 180297),\n", - " ('Promises', 180401),\n", - " ('Emergency', 180427),\n", - " ('À Vontade (Live Mix)', 180636),\n", - " ('Hyperconectividade', 180636),\n", - " ('On Fire', 180636),\n", - " ('Fascinação', 180793)]" - ] - }, - "execution_count": 9, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "sql_query = \"SELECT name, milliseconds FROM track WHERE milliseconds > 1e3*3*60 ORDER BY milliseconds ASC LIMIT 10;\"\n", "cursor = con.execute(sql_query)\n", @@ -398,25 +299,10 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": null, "id": "3f894066", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "[(340, 0.863),\n", - " (345, 1.11065),\n", - " (318, 1.6882166666666667),\n", - " (314, 1.69135),\n", - " (328, 1.8377666666666668)]" - ] - }, - "execution_count": 10, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "sql_groupby = \"SELECT albumid, AVG(milliseconds)/1e3/60 FROM track GROUP BY albumid ORDER BY AVG(milliseconds) ASC LIMIT 5;\"\n", "cursor = con.execute(sql_groupby)\n", @@ -438,25 +324,10 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": null, "id": "9824b70a", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "[(340, 0.863, \"Liszt - 12 Études D'Execution Transcendante\"),\n", - " (345, 1.11065, \"Monteverdi: L'Orfeo\"),\n", - " (318, 1.6882166666666667, 'SCRIABIN: Vers la flamme'),\n", - " (314, 1.69135, 'English Renaissance'),\n", - " (328, 1.8377666666666668, 'Charpentier: Divertissements, Airs & Concerts')]" - ] - }, - "execution_count": 11, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "sql_join = \"SELECT track.albumid, AVG(milliseconds)/1e3/60, album.title FROM track INNER JOIN album ON (track.albumid = album.albumid) GROUP BY album.albumid ORDER BY AVG(milliseconds) ASC LIMIT 5;\"\n", "cursor = con.execute(sql_join)\n", @@ -540,7 +411,7 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": null, "id": "f5c83909", "metadata": {}, "outputs": [], @@ -577,45 +448,10 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": null, "id": "fc89c429", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
┏━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━┳━━━━━━━━━━┳━━━━━━━━━━━━━━━━━┓\n",
-       "┃ TrackId  Name                                     AlbumId  MediaTypeId  GenreId  Composer                                                                Milliseconds  Bytes     UnitPrice       ┃\n",
-       "┡━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━╇━━━━━━━━━━━━━╇━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━╇━━━━━━━━━━╇━━━━━━━━━━━━━━━━━┩\n",
-       "│ !int64!stringint64!int64int64string!int64int64!decimal(10, 2) │\n",
-       "├─────────┼─────────────────────────────────────────┼─────────┼─────────────┼─────────┼────────────────────────────────────────────────────────────────────────┼──────────────┼──────────┼─────────────────┤\n",
-       "│       1For Those About To Rock (We Salute You)111Angus Young, Malcolm Young, Brian Johnson                             343719111703340.99 │\n",
-       "│       2Balls to the Wall                      221NULL34256255104240.99 │\n",
-       "│       3Fast As a Shark                        321F. Baltes, S. Kaufman, U. Dirkscneider & W. Hoffman                   23061939909940.99 │\n",
-       "│       4Restless and Wild                      321F. Baltes, R.A. Smith-Diesel, S. Kaufman, U. Dirkscneider & W. Hoffman25205143317790.99 │\n",
-       "│       5Princess of the Dawn                   321Deaffy & R.A. Smith-Diesel                                            37541862905210.99 │\n",
-       "└─────────┴─────────────────────────────────────────┴─────────┴─────────────┴─────────┴────────────────────────────────────────────────────────────────────────┴──────────────┴──────────┴─────────────────┘\n",
-       "
\n" - ], - "text/plain": [ - "┏━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━┳━━━━━━━━━━┳━━━━━━━━━━━━━━━━━┓\n", - "┃\u001b[1m \u001b[0m\u001b[1mTrackId\u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mName\u001b[0m\u001b[1m \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mAlbumId\u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mMediaTypeId\u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mGenreId\u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mComposer\u001b[0m\u001b[1m \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mMilliseconds\u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mBytes\u001b[0m\u001b[1m \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mUnitPrice\u001b[0m\u001b[1m \u001b[0m\u001b[1m \u001b[0m┃\n", - "┡━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━╇━━━━━━━━━━━━━╇━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━╇━━━━━━━━━━╇━━━━━━━━━━━━━━━━━┩\n", - "│ \u001b[2m!int64\u001b[0m │ \u001b[2m!string\u001b[0m │ \u001b[2mint64\u001b[0m │ \u001b[2m!int64\u001b[0m │ \u001b[2mint64\u001b[0m │ \u001b[2mstring\u001b[0m │ \u001b[2m!int64\u001b[0m │ \u001b[2mint64\u001b[0m │ \u001b[2m!decimal(10, 2)\u001b[0m │\n", - "├─────────┼─────────────────────────────────────────┼─────────┼─────────────┼─────────┼────────────────────────────────────────────────────────────────────────┼──────────────┼──────────┼─────────────────┤\n", - "│ \u001b[1;36m1\u001b[0m │ \u001b[32mFor Those About To Rock (We Salute You)\u001b[0m │ \u001b[1;36m1\u001b[0m │ \u001b[1;36m1\u001b[0m │ \u001b[1;36m1\u001b[0m │ \u001b[32mAngus Young, Malcolm Young, Brian Johnson \u001b[0m │ \u001b[1;36m343719\u001b[0m │ \u001b[1;36m11170334\u001b[0m │ \u001b[1;36m0.99\u001b[0m │\n", - "│ \u001b[1;36m2\u001b[0m │ \u001b[32mBalls to the Wall \u001b[0m │ \u001b[1;36m2\u001b[0m │ \u001b[1;36m2\u001b[0m │ \u001b[1;36m1\u001b[0m │ \u001b[2mNULL\u001b[0m │ \u001b[1;36m342562\u001b[0m │ \u001b[1;36m5510424\u001b[0m │ \u001b[1;36m0.99\u001b[0m │\n", - "│ \u001b[1;36m3\u001b[0m │ \u001b[32mFast As a Shark \u001b[0m │ \u001b[1;36m3\u001b[0m │ \u001b[1;36m2\u001b[0m │ \u001b[1;36m1\u001b[0m │ \u001b[32mF. Baltes, S. Kaufman, U. Dirkscneider & W. Hoffman \u001b[0m │ \u001b[1;36m230619\u001b[0m │ \u001b[1;36m3990994\u001b[0m │ \u001b[1;36m0.99\u001b[0m │\n", - "│ \u001b[1;36m4\u001b[0m │ \u001b[32mRestless and Wild \u001b[0m │ \u001b[1;36m3\u001b[0m │ \u001b[1;36m2\u001b[0m │ \u001b[1;36m1\u001b[0m │ \u001b[32mF. Baltes, R.A. Smith-Diesel, S. Kaufman, U. Dirkscneider & W. Hoffman\u001b[0m │ \u001b[1;36m252051\u001b[0m │ \u001b[1;36m4331779\u001b[0m │ \u001b[1;36m0.99\u001b[0m │\n", - "│ \u001b[1;36m5\u001b[0m │ \u001b[32mPrincess of the Dawn \u001b[0m │ \u001b[1;36m3\u001b[0m │ \u001b[1;36m2\u001b[0m │ \u001b[1;36m1\u001b[0m │ \u001b[32mDeaffy & R.A. Smith-Diesel \u001b[0m │ \u001b[1;36m375418\u001b[0m │ \u001b[1;36m6290521\u001b[0m │ \u001b[1;36m0.99\u001b[0m │\n", - "└─────────┴─────────────────────────────────────────┴─────────┴─────────────┴─────────┴────────────────────────────────────────────────────────────────────────┴──────────────┴──────────┴─────────────────┘" - ] - }, - "execution_count": 13, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "import ibis\n", "\n", @@ -635,45 +471,10 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": null, "id": "5e5a482b", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
┏━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━┳━━━━━━━━━┳━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━┓\n",
-       "┃ TrackId  Name                                                      AlbumId  MediaTypeId  GenreId  Composer            Milliseconds  Bytes    UnitPrice        mean_mins_track ┃\n",
-       "┡━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━╇━━━━━━━━━━━━━╇━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━╇━━━━━━━━━╇━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━┩\n",
-       "│ !int64!stringint64!int64int64string!int64int64!decimal(10, 2)float64         │\n",
-       "├─────────┼──────────────────────────────────────────────────────────┼─────────┼─────────────┼─────────┼────────────────────┼──────────────┼─────────┼─────────────────┼─────────────────┤\n",
-       "│    3496Étude 1, In C Major - Preludio (Presto) - Liszt         340424NULL5178022296170.990.863000 │\n",
-       "│    3501L'orfeo, Act 3, Sinfonia (Orchestra)                    345224Claudio Monteverdi6663911890620.991.110650 │\n",
-       "│    3452SCRIABIN: Prelude in B Major, Op. 11, No. 11            318424NULL10129338195350.991.688217 │\n",
-       "│    3448Lamentations of Jeremiah, First Set \\ Incipit Lamentatio314224Thomas Tallis     6919412080800.991.691350 │\n",
-       "│    3492Sing Joyfully                                           314224William Byrd      13376822564840.991.691350 │\n",
-       "└─────────┴──────────────────────────────────────────────────────────┴─────────┴─────────────┴─────────┴────────────────────┴──────────────┴─────────┴─────────────────┴─────────────────┘\n",
-       "
\n" - ], - "text/plain": [ - "┏━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━┳━━━━━━━━━┳━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━┓\n", - "┃\u001b[1m \u001b[0m\u001b[1mTrackId\u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mName\u001b[0m\u001b[1m \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mAlbumId\u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mMediaTypeId\u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mGenreId\u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mComposer\u001b[0m\u001b[1m \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mMilliseconds\u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mBytes\u001b[0m\u001b[1m \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mUnitPrice\u001b[0m\u001b[1m \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mmean_mins_track\u001b[0m\u001b[1m \u001b[0m┃\n", - "┡━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━╇━━━━━━━━━━━━━╇━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━╇━━━━━━━━━╇━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━┩\n", - "│ \u001b[2m!int64\u001b[0m │ \u001b[2m!string\u001b[0m │ \u001b[2mint64\u001b[0m │ \u001b[2m!int64\u001b[0m │ \u001b[2mint64\u001b[0m │ \u001b[2mstring\u001b[0m │ \u001b[2m!int64\u001b[0m │ \u001b[2mint64\u001b[0m │ \u001b[2m!decimal(10, 2)\u001b[0m │ \u001b[2mfloat64\u001b[0m │\n", - "├─────────┼──────────────────────────────────────────────────────────┼─────────┼─────────────┼─────────┼────────────────────┼──────────────┼─────────┼─────────────────┼─────────────────┤\n", - "│ \u001b[1;36m3496\u001b[0m │ \u001b[32mÉtude 1, In C Major - Preludio (Presto) - Liszt \u001b[0m │ \u001b[1;36m340\u001b[0m │ \u001b[1;36m4\u001b[0m │ \u001b[1;36m24\u001b[0m │ \u001b[2mNULL\u001b[0m │ \u001b[1;36m51780\u001b[0m │ \u001b[1;36m2229617\u001b[0m │ \u001b[1;36m0.99\u001b[0m │ \u001b[1;36m0.863000\u001b[0m │\n", - "│ \u001b[1;36m3501\u001b[0m │ \u001b[32mL'orfeo, Act 3, Sinfonia (Orchestra) \u001b[0m │ \u001b[1;36m345\u001b[0m │ \u001b[1;36m2\u001b[0m │ \u001b[1;36m24\u001b[0m │ \u001b[32mClaudio Monteverdi\u001b[0m │ \u001b[1;36m66639\u001b[0m │ \u001b[1;36m1189062\u001b[0m │ \u001b[1;36m0.99\u001b[0m │ \u001b[1;36m1.110650\u001b[0m │\n", - "│ \u001b[1;36m3452\u001b[0m │ \u001b[32mSCRIABIN: Prelude in B Major, Op. 11, No. 11 \u001b[0m │ \u001b[1;36m318\u001b[0m │ \u001b[1;36m4\u001b[0m │ \u001b[1;36m24\u001b[0m │ \u001b[2mNULL\u001b[0m │ \u001b[1;36m101293\u001b[0m │ \u001b[1;36m3819535\u001b[0m │ \u001b[1;36m0.99\u001b[0m │ \u001b[1;36m1.688217\u001b[0m │\n", - "│ \u001b[1;36m3448\u001b[0m │ \u001b[32mLamentations of Jeremiah, First Set \\ Incipit Lamentatio\u001b[0m │ \u001b[1;36m314\u001b[0m │ \u001b[1;36m2\u001b[0m │ \u001b[1;36m24\u001b[0m │ \u001b[32mThomas Tallis \u001b[0m │ \u001b[1;36m69194\u001b[0m │ \u001b[1;36m1208080\u001b[0m │ \u001b[1;36m0.99\u001b[0m │ \u001b[1;36m1.691350\u001b[0m │\n", - "│ \u001b[1;36m3492\u001b[0m │ \u001b[32mSing Joyfully \u001b[0m │ \u001b[1;36m314\u001b[0m │ \u001b[1;36m2\u001b[0m │ \u001b[1;36m24\u001b[0m │ \u001b[32mWilliam Byrd \u001b[0m │ \u001b[1;36m133768\u001b[0m │ \u001b[1;36m2256484\u001b[0m │ \u001b[1;36m0.99\u001b[0m │ \u001b[1;36m1.691350\u001b[0m │\n", - "└─────────┴──────────────────────────────────────────────────────────┴─────────┴─────────────┴─────────┴────────────────────┴──────────────┴─────────┴─────────────────┴─────────────────┘" - ] - }, - "execution_count": 14, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "track.group_by(\"AlbumId\").mutate(\n", " mean_mins_track=track.Milliseconds.mean() / 1e3 / 60\n", @@ -682,57 +483,10 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": null, "id": "39c80365", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
┏━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━┳━━━━━━━━━━┳━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━┓\n",
-       "┃ TrackId  Name                                     AlbumId  MediaTypeId  GenreId  Composer                                   Milliseconds  Bytes     UnitPrice        mean_mins_track ┃\n",
-       "┡━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━╇━━━━━━━━━━━━━╇━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━╇━━━━━━━━━━╇━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━┩\n",
-       "│ !int64!stringint64!int64int64string!int64int64!decimal(10, 2)float64         │\n",
-       "├─────────┼─────────────────────────────────────────┼─────────┼─────────────┼─────────┼───────────────────────────────────────────┼──────────────┼──────────┼─────────────────┼─────────────────┤\n",
-       "│       1For Those About To Rock (We Salute You)111Angus Young, Malcolm Young, Brian Johnson343719111703340.994.000692 │\n",
-       "│       6Put The Finger On You                  111Angus Young, Malcolm Young, Brian Johnson20566267134510.994.000692 │\n",
-       "│       7Let's Get It Up                        111Angus Young, Malcolm Young, Brian Johnson23392676365610.994.000692 │\n",
-       "│       8Inject The Venom                       111Angus Young, Malcolm Young, Brian Johnson21083468528600.994.000692 │\n",
-       "│       9Snowballed                             111Angus Young, Malcolm Young, Brian Johnson20310265994240.994.000692 │\n",
-       "│      10Evil Walks                             111Angus Young, Malcolm Young, Brian Johnson26349786112450.994.000692 │\n",
-       "│      11C.O.D.                                 111Angus Young, Malcolm Young, Brian Johnson19983665663140.994.000692 │\n",
-       "│      12Breaking The Rules                     111Angus Young, Malcolm Young, Brian Johnson26328885968400.994.000692 │\n",
-       "│      13Night Of The Long Knives               111Angus Young, Malcolm Young, Brian Johnson20568867063470.994.000692 │\n",
-       "│      14Spellbound                             111Angus Young, Malcolm Young, Brian Johnson27086388170380.994.000692 │\n",
-       "│        │\n",
-       "└─────────┴─────────────────────────────────────────┴─────────┴─────────────┴─────────┴───────────────────────────────────────────┴──────────────┴──────────┴─────────────────┴─────────────────┘\n",
-       "
\n" - ], - "text/plain": [ - "┏━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━┳━━━━━━━━━━┳━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━┓\n", - "┃\u001b[1m \u001b[0m\u001b[1mTrackId\u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mName\u001b[0m\u001b[1m \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mAlbumId\u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mMediaTypeId\u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mGenreId\u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mComposer\u001b[0m\u001b[1m \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mMilliseconds\u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mBytes\u001b[0m\u001b[1m \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mUnitPrice\u001b[0m\u001b[1m \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mmean_mins_track\u001b[0m\u001b[1m \u001b[0m┃\n", - "┡━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━╇━━━━━━━━━━━━━╇━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━╇━━━━━━━━━━╇━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━┩\n", - "│ \u001b[2m!int64\u001b[0m │ \u001b[2m!string\u001b[0m │ \u001b[2mint64\u001b[0m │ \u001b[2m!int64\u001b[0m │ \u001b[2mint64\u001b[0m │ \u001b[2mstring\u001b[0m │ \u001b[2m!int64\u001b[0m │ \u001b[2mint64\u001b[0m │ \u001b[2m!decimal(10, 2)\u001b[0m │ \u001b[2mfloat64\u001b[0m │\n", - "├─────────┼─────────────────────────────────────────┼─────────┼─────────────┼─────────┼───────────────────────────────────────────┼──────────────┼──────────┼─────────────────┼─────────────────┤\n", - "│ \u001b[1;36m1\u001b[0m │ \u001b[32mFor Those About To Rock (We Salute You)\u001b[0m │ \u001b[1;36m1\u001b[0m │ \u001b[1;36m1\u001b[0m │ \u001b[1;36m1\u001b[0m │ \u001b[32mAngus Young, Malcolm Young, Brian Johnson\u001b[0m │ \u001b[1;36m343719\u001b[0m │ \u001b[1;36m11170334\u001b[0m │ \u001b[1;36m0.99\u001b[0m │ \u001b[1;36m4.000692\u001b[0m │\n", - "│ \u001b[1;36m6\u001b[0m │ \u001b[32mPut The Finger On You \u001b[0m │ \u001b[1;36m1\u001b[0m │ \u001b[1;36m1\u001b[0m │ \u001b[1;36m1\u001b[0m │ \u001b[32mAngus Young, Malcolm Young, Brian Johnson\u001b[0m │ \u001b[1;36m205662\u001b[0m │ \u001b[1;36m6713451\u001b[0m │ \u001b[1;36m0.99\u001b[0m │ \u001b[1;36m4.000692\u001b[0m │\n", - "│ \u001b[1;36m7\u001b[0m │ \u001b[32mLet's Get It Up \u001b[0m │ \u001b[1;36m1\u001b[0m │ \u001b[1;36m1\u001b[0m │ \u001b[1;36m1\u001b[0m │ \u001b[32mAngus Young, Malcolm Young, Brian Johnson\u001b[0m │ \u001b[1;36m233926\u001b[0m │ \u001b[1;36m7636561\u001b[0m │ \u001b[1;36m0.99\u001b[0m │ \u001b[1;36m4.000692\u001b[0m │\n", - "│ \u001b[1;36m8\u001b[0m │ \u001b[32mInject The Venom \u001b[0m │ \u001b[1;36m1\u001b[0m │ \u001b[1;36m1\u001b[0m │ \u001b[1;36m1\u001b[0m │ \u001b[32mAngus Young, Malcolm Young, Brian Johnson\u001b[0m │ \u001b[1;36m210834\u001b[0m │ \u001b[1;36m6852860\u001b[0m │ \u001b[1;36m0.99\u001b[0m │ \u001b[1;36m4.000692\u001b[0m │\n", - "│ \u001b[1;36m9\u001b[0m │ \u001b[32mSnowballed \u001b[0m │ \u001b[1;36m1\u001b[0m │ \u001b[1;36m1\u001b[0m │ \u001b[1;36m1\u001b[0m │ \u001b[32mAngus Young, Malcolm Young, Brian Johnson\u001b[0m │ \u001b[1;36m203102\u001b[0m │ \u001b[1;36m6599424\u001b[0m │ \u001b[1;36m0.99\u001b[0m │ \u001b[1;36m4.000692\u001b[0m │\n", - "│ \u001b[1;36m10\u001b[0m │ \u001b[32mEvil Walks \u001b[0m │ \u001b[1;36m1\u001b[0m │ \u001b[1;36m1\u001b[0m │ \u001b[1;36m1\u001b[0m │ \u001b[32mAngus Young, Malcolm Young, Brian Johnson\u001b[0m │ \u001b[1;36m263497\u001b[0m │ \u001b[1;36m8611245\u001b[0m │ \u001b[1;36m0.99\u001b[0m │ \u001b[1;36m4.000692\u001b[0m │\n", - "│ \u001b[1;36m11\u001b[0m │ \u001b[32mC.O.D. \u001b[0m │ \u001b[1;36m1\u001b[0m │ \u001b[1;36m1\u001b[0m │ \u001b[1;36m1\u001b[0m │ \u001b[32mAngus Young, Malcolm Young, Brian Johnson\u001b[0m │ \u001b[1;36m199836\u001b[0m │ \u001b[1;36m6566314\u001b[0m │ \u001b[1;36m0.99\u001b[0m │ \u001b[1;36m4.000692\u001b[0m │\n", - "│ \u001b[1;36m12\u001b[0m │ \u001b[32mBreaking The Rules \u001b[0m │ \u001b[1;36m1\u001b[0m │ \u001b[1;36m1\u001b[0m │ \u001b[1;36m1\u001b[0m │ \u001b[32mAngus Young, Malcolm Young, Brian Johnson\u001b[0m │ \u001b[1;36m263288\u001b[0m │ \u001b[1;36m8596840\u001b[0m │ \u001b[1;36m0.99\u001b[0m │ \u001b[1;36m4.000692\u001b[0m │\n", - "│ \u001b[1;36m13\u001b[0m │ \u001b[32mNight Of The Long Knives \u001b[0m │ \u001b[1;36m1\u001b[0m │ \u001b[1;36m1\u001b[0m │ \u001b[1;36m1\u001b[0m │ \u001b[32mAngus Young, Malcolm Young, Brian Johnson\u001b[0m │ \u001b[1;36m205688\u001b[0m │ \u001b[1;36m6706347\u001b[0m │ \u001b[1;36m0.99\u001b[0m │ \u001b[1;36m4.000692\u001b[0m │\n", - "│ \u001b[1;36m14\u001b[0m │ \u001b[32mSpellbound \u001b[0m │ \u001b[1;36m1\u001b[0m │ \u001b[1;36m1\u001b[0m │ \u001b[1;36m1\u001b[0m │ \u001b[32mAngus Young, Malcolm Young, Brian Johnson\u001b[0m │ \u001b[1;36m270863\u001b[0m │ \u001b[1;36m8817038\u001b[0m │ \u001b[1;36m0.99\u001b[0m │ \u001b[1;36m4.000692\u001b[0m │\n", - "│ \u001b[2m…\u001b[0m │ \u001b[2m…\u001b[0m │ \u001b[2m…\u001b[0m │ \u001b[2m…\u001b[0m │ \u001b[2m…\u001b[0m │ \u001b[2m…\u001b[0m │ \u001b[2m…\u001b[0m │ \u001b[2m…\u001b[0m │ \u001b[2m…\u001b[0m │ \u001b[2m…\u001b[0m │\n", - "└─────────┴─────────────────────────────────────────┴─────────┴─────────────┴─────────┴───────────────────────────────────────────┴──────────────┴──────────┴─────────────────┴─────────────────┘" - ] - }, - "execution_count": 15, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "track.group_by(\"AlbumId\").mutate(mean_mins_track=track.Milliseconds.mean() / 1e3 / 60)" ] @@ -747,57 +501,10 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": null, "id": "02e6602a", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
┏━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━┳━━━━━━━━━━┳━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━┓\n",
-       "┃ TrackId  Name                                     AlbumId  MediaTypeId  GenreId  Composer                                                                Milliseconds  Bytes     UnitPrice       Name_right ┃\n",
-       "┡━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━╇━━━━━━━━━━━━━╇━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━╇━━━━━━━━━━╇━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━┩\n",
-       "│ int64stringint64int64int64stringint64int64decimal(10, 2)string     │\n",
-       "├─────────┼─────────────────────────────────────────┼─────────┼─────────────┼─────────┼────────────────────────────────────────────────────────────────────────┼──────────────┼──────────┼────────────────┼────────────┤\n",
-       "│       1For Those About To Rock (We Salute You)111Angus Young, Malcolm Young, Brian Johnson                             343719111703340.99Rock       │\n",
-       "│       2Balls to the Wall                      221NULL34256255104240.99Rock       │\n",
-       "│       3Fast As a Shark                        321F. Baltes, S. Kaufman, U. Dirkscneider & W. Hoffman                   23061939909940.99Rock       │\n",
-       "│       4Restless and Wild                      321F. Baltes, R.A. Smith-Diesel, S. Kaufman, U. Dirkscneider & W. Hoffman25205143317790.99Rock       │\n",
-       "│       5Princess of the Dawn                   321Deaffy & R.A. Smith-Diesel                                            37541862905210.99Rock       │\n",
-       "│       6Put The Finger On You                  111Angus Young, Malcolm Young, Brian Johnson                             20566267134510.99Rock       │\n",
-       "│       7Let's Get It Up                        111Angus Young, Malcolm Young, Brian Johnson                             23392676365610.99Rock       │\n",
-       "│       8Inject The Venom                       111Angus Young, Malcolm Young, Brian Johnson                             21083468528600.99Rock       │\n",
-       "│       9Snowballed                             111Angus Young, Malcolm Young, Brian Johnson                             20310265994240.99Rock       │\n",
-       "│      10Evil Walks                             111Angus Young, Malcolm Young, Brian Johnson                             26349786112450.99Rock       │\n",
-       "│                 │\n",
-       "└─────────┴─────────────────────────────────────────┴─────────┴─────────────┴─────────┴────────────────────────────────────────────────────────────────────────┴──────────────┴──────────┴────────────────┴────────────┘\n",
-       "
\n" - ], - "text/plain": [ - "┏━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━┳━━━━━━━━━━┳━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━┓\n", - "┃\u001b[1m \u001b[0m\u001b[1mTrackId\u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mName\u001b[0m\u001b[1m \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mAlbumId\u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mMediaTypeId\u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mGenreId\u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mComposer\u001b[0m\u001b[1m \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mMilliseconds\u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mBytes\u001b[0m\u001b[1m \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mUnitPrice\u001b[0m\u001b[1m \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mName_right\u001b[0m\u001b[1m \u001b[0m┃\n", - "┡━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━╇━━━━━━━━━━━━━╇━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━╇━━━━━━━━━━╇━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━┩\n", - "│ \u001b[2mint64\u001b[0m │ \u001b[2mstring\u001b[0m │ \u001b[2mint64\u001b[0m │ \u001b[2mint64\u001b[0m │ \u001b[2mint64\u001b[0m │ \u001b[2mstring\u001b[0m │ \u001b[2mint64\u001b[0m │ \u001b[2mint64\u001b[0m │ \u001b[2mdecimal(10, 2)\u001b[0m │ \u001b[2mstring\u001b[0m │\n", - "├─────────┼─────────────────────────────────────────┼─────────┼─────────────┼─────────┼────────────────────────────────────────────────────────────────────────┼──────────────┼──────────┼────────────────┼────────────┤\n", - "│ \u001b[1;36m1\u001b[0m │ \u001b[32mFor Those About To Rock (We Salute You)\u001b[0m │ \u001b[1;36m1\u001b[0m │ \u001b[1;36m1\u001b[0m │ \u001b[1;36m1\u001b[0m │ \u001b[32mAngus Young, Malcolm Young, Brian Johnson \u001b[0m │ \u001b[1;36m343719\u001b[0m │ \u001b[1;36m11170334\u001b[0m │ \u001b[1;36m0.99\u001b[0m │ \u001b[32mRock \u001b[0m │\n", - "│ \u001b[1;36m2\u001b[0m │ \u001b[32mBalls to the Wall \u001b[0m │ \u001b[1;36m2\u001b[0m │ \u001b[1;36m2\u001b[0m │ \u001b[1;36m1\u001b[0m │ \u001b[2mNULL\u001b[0m │ \u001b[1;36m342562\u001b[0m │ \u001b[1;36m5510424\u001b[0m │ \u001b[1;36m0.99\u001b[0m │ \u001b[32mRock \u001b[0m │\n", - "│ \u001b[1;36m3\u001b[0m │ \u001b[32mFast As a Shark \u001b[0m │ \u001b[1;36m3\u001b[0m │ \u001b[1;36m2\u001b[0m │ \u001b[1;36m1\u001b[0m │ \u001b[32mF. Baltes, S. Kaufman, U. Dirkscneider & W. Hoffman \u001b[0m │ \u001b[1;36m230619\u001b[0m │ \u001b[1;36m3990994\u001b[0m │ \u001b[1;36m0.99\u001b[0m │ \u001b[32mRock \u001b[0m │\n", - "│ \u001b[1;36m4\u001b[0m │ \u001b[32mRestless and Wild \u001b[0m │ \u001b[1;36m3\u001b[0m │ \u001b[1;36m2\u001b[0m │ \u001b[1;36m1\u001b[0m │ \u001b[32mF. Baltes, R.A. Smith-Diesel, S. Kaufman, U. Dirkscneider & W. Hoffman\u001b[0m │ \u001b[1;36m252051\u001b[0m │ \u001b[1;36m4331779\u001b[0m │ \u001b[1;36m0.99\u001b[0m │ \u001b[32mRock \u001b[0m │\n", - "│ \u001b[1;36m5\u001b[0m │ \u001b[32mPrincess of the Dawn \u001b[0m │ \u001b[1;36m3\u001b[0m │ \u001b[1;36m2\u001b[0m │ \u001b[1;36m1\u001b[0m │ \u001b[32mDeaffy & R.A. Smith-Diesel \u001b[0m │ \u001b[1;36m375418\u001b[0m │ \u001b[1;36m6290521\u001b[0m │ \u001b[1;36m0.99\u001b[0m │ \u001b[32mRock \u001b[0m │\n", - "│ \u001b[1;36m6\u001b[0m │ \u001b[32mPut The Finger On You \u001b[0m │ \u001b[1;36m1\u001b[0m │ \u001b[1;36m1\u001b[0m │ \u001b[1;36m1\u001b[0m │ \u001b[32mAngus Young, Malcolm Young, Brian Johnson \u001b[0m │ \u001b[1;36m205662\u001b[0m │ \u001b[1;36m6713451\u001b[0m │ \u001b[1;36m0.99\u001b[0m │ \u001b[32mRock \u001b[0m │\n", - "│ \u001b[1;36m7\u001b[0m │ \u001b[32mLet's Get It Up \u001b[0m │ \u001b[1;36m1\u001b[0m │ \u001b[1;36m1\u001b[0m │ \u001b[1;36m1\u001b[0m │ \u001b[32mAngus Young, Malcolm Young, Brian Johnson \u001b[0m │ \u001b[1;36m233926\u001b[0m │ \u001b[1;36m7636561\u001b[0m │ \u001b[1;36m0.99\u001b[0m │ \u001b[32mRock \u001b[0m │\n", - "│ \u001b[1;36m8\u001b[0m │ \u001b[32mInject The Venom \u001b[0m │ \u001b[1;36m1\u001b[0m │ \u001b[1;36m1\u001b[0m │ \u001b[1;36m1\u001b[0m │ \u001b[32mAngus Young, Malcolm Young, Brian Johnson \u001b[0m │ \u001b[1;36m210834\u001b[0m │ \u001b[1;36m6852860\u001b[0m │ \u001b[1;36m0.99\u001b[0m │ \u001b[32mRock \u001b[0m │\n", - "│ \u001b[1;36m9\u001b[0m │ \u001b[32mSnowballed \u001b[0m │ \u001b[1;36m1\u001b[0m │ \u001b[1;36m1\u001b[0m │ \u001b[1;36m1\u001b[0m │ \u001b[32mAngus Young, Malcolm Young, Brian Johnson \u001b[0m │ \u001b[1;36m203102\u001b[0m │ \u001b[1;36m6599424\u001b[0m │ \u001b[1;36m0.99\u001b[0m │ \u001b[32mRock \u001b[0m │\n", - "│ \u001b[1;36m10\u001b[0m │ \u001b[32mEvil Walks \u001b[0m │ \u001b[1;36m1\u001b[0m │ \u001b[1;36m1\u001b[0m │ \u001b[1;36m1\u001b[0m │ \u001b[32mAngus Young, Malcolm Young, Brian Johnson \u001b[0m │ \u001b[1;36m263497\u001b[0m │ \u001b[1;36m8611245\u001b[0m │ \u001b[1;36m0.99\u001b[0m │ \u001b[32mRock \u001b[0m │\n", - "│ \u001b[2m…\u001b[0m │ \u001b[2m…\u001b[0m │ \u001b[2m…\u001b[0m │ \u001b[2m…\u001b[0m │ \u001b[2m…\u001b[0m │ \u001b[2m…\u001b[0m │ \u001b[2m…\u001b[0m │ \u001b[2m…\u001b[0m │ \u001b[2m…\u001b[0m │ \u001b[2m…\u001b[0m │\n", - "└─────────┴─────────────────────────────────────────┴─────────┴─────────────┴─────────┴────────────────────────────────────────────────────────────────────────┴──────────────┴──────────┴────────────────┴────────────┘" - ] - }, - "execution_count": 16, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "genre = connection.table(\"genre\")\n", "genre_and_track = track.inner_join(\n", @@ -850,7 +557,7 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": null, "id": "325c8789", "metadata": { "tags": [ @@ -870,7 +577,7 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": null, "id": "9117af26", "metadata": {}, "outputs": [], @@ -897,7 +604,7 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": null, "id": "e8277743", "metadata": {}, "outputs": [], @@ -918,7 +625,7 @@ }, { "cell_type": "code", - "execution_count": 20, + "execution_count": null, "id": "2097c995", "metadata": {}, "outputs": [], @@ -945,24 +652,10 @@ }, { "cell_type": "code", - "execution_count": 21, + "execution_count": null, "id": "14c14fc8", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "[(1, 'Deadpond', 'Dive Wilson', None),\n", - " (2, 'Spider-Boy', 'Pedro Parqueador', None),\n", - " (3, 'Rusty-Man', 'Tommy Sharp', 48),\n", - " (4, 'Ms Amazing', 'Barjabeen Bhabra', 17)]" - ] - }, - "execution_count": 21, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "con = sqlite3.connect(Path(\"data/hero.db\"))\n", "\n", @@ -981,21 +674,10 @@ }, { "cell_type": "code", - "execution_count": 22, + "execution_count": null, "id": "afa69365", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "name='Deadpond' secret_name='Dive Wilson' id=1 age=None\n", - "name='Spider-Boy' secret_name='Pedro Parqueador' id=2 age=None\n", - "name='Rusty-Man' secret_name='Tommy Sharp' id=3 age=48\n", - "name='Ms Amazing' secret_name='Barjabeen Bhabra' id=4 age=17\n" - ] - } - ], + "outputs": [], "source": [ "from sqlmodel import select\n", "\n", @@ -1016,21 +698,10 @@ }, { "cell_type": "code", - "execution_count": 23, + "execution_count": null, "id": "8fae7081", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "name='Deadpond' secret_name='Dive Wilson' id=1 age=None\n", - "name='Spider-Boy' secret_name='Pedro Parqueador' id=2 age=None\n", - "name='Rusty-Man' secret_name='Tommy Sharp' id=3 age=48\n", - "name='Ms Amazing' secret_name='Barjabeen Bhabra' id=4 age=17\n" - ] - } - ], + "outputs": [], "source": [ "with Session(engine) as session:\n", " statement = select(Hero)\n", @@ -1050,19 +721,10 @@ }, { "cell_type": "code", - "execution_count": 24, + "execution_count": null, "id": "b2dbde03", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "name='Ms Amazing' secret_name='Barjabeen Bhabra' id=4 age=17\n", - "name='Rusty-Man' secret_name='Tommy Sharp' id=3 age=48\n" - ] - } - ], + "outputs": [], "source": [ "with Session(engine) as session:\n", " statement = select(Hero).where(Hero.age < 100).limit(2).order_by(Hero.age)\n", @@ -1082,7 +744,7 @@ }, { "cell_type": "code", - "execution_count": 25, + "execution_count": null, "id": "349e5d3d", "metadata": { "tags": [ diff --git a/dates-and-times.ipynb b/dates-and-times.ipynb index 1ef38ab..47b53ad 100644 --- a/dates-and-times.ipynb +++ b/dates-and-times.ipynb @@ -26,7 +26,7 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "id": "51a55374", "metadata": { "tags": [ @@ -74,18 +74,10 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": null, "id": "84829a6b", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "2026-04-28 13:36:00.720344\n" - ] - } - ], + "outputs": [], "source": [ "from datetime import datetime\n", "\n", @@ -129,18 +121,10 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": null, "id": "fc224a47", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "2019-11-28 00:00:00\n" - ] - } - ], + "outputs": [], "source": [ "specific_datetime = datetime(2019, 11, 28)\n", "print(specific_datetime)" @@ -166,21 +150,10 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": null, "id": "4558d476", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "datetime.datetime(2002, 2, 16, 0, 0)" - ] - }, - "execution_count": 4, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "date_string = \"16 February in 2002\"\n", "datetime.strptime(date_string, \"%d %B in %Y\")" @@ -196,21 +169,10 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": null, "id": "fb90ac84", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "datetime.datetime(2002, 2, 16, 0, 0)" - ] - }, - "execution_count": 5, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "date_string = \"16 Feb in 2002\"\n", "datetime.strptime(date_string, \"%d %b in %Y\")" @@ -226,19 +188,10 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": null, "id": "7a568f5b", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "2002-02-03 00:00:00\n", - "2002-02-03 00:00:00\n" - ] - } - ], + "outputs": [], "source": [ "from dateutil.parser import parse\n", "\n", @@ -258,21 +211,10 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": null, "id": "d6b5e3c3", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "'Tuesday, 04, 2026'" - ] - }, - "execution_count": 7, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "now.strftime(\"%A, %m, %Y\")" ] @@ -331,21 +273,10 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": null, "id": "39ba17e6", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "True" - ] - }, - "execution_count": 8, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "now > specific_datetime" ] @@ -360,18 +291,10 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": null, "id": "fda57a44", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "2309 days, 13:36:00.720344\n" - ] - } - ], + "outputs": [], "source": [ "time_diff = now - datetime(year=2020, month=1, day=1)\n", "print(time_diff)" @@ -387,21 +310,10 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": null, "id": "e1e25736", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "datetime.timedelta" - ] - }, - "execution_count": 10, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "type(time_diff)" ] @@ -449,21 +361,10 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": null, "id": "ed526fbc", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "array('2020-01-01', dtype='datetime64[D]')" - ] - }, - "execution_count": 11, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "import numpy as np\n", "\n", @@ -481,29 +382,10 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": null, "id": "537895c5", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "array(['2020-01-01', '2020-01-02', '2020-01-03', '2020-01-04',\n", - " '2020-01-05', '2020-01-06', '2020-01-07', '2020-01-08',\n", - " '2020-01-09', '2020-01-10', '2020-01-11', '2020-01-12',\n", - " '2020-01-13', '2020-01-14', '2020-01-15', '2020-01-16',\n", - " '2020-01-17', '2020-01-18', '2020-01-19', '2020-01-20',\n", - " '2020-01-21', '2020-01-22', '2020-01-23', '2020-01-24',\n", - " '2020-01-25', '2020-01-26', '2020-01-27', '2020-01-28',\n", - " '2020-01-29', '2020-01-30', '2020-01-31', '2020-02-01'],\n", - " dtype='datetime64[D]')" - ] - }, - "execution_count": 12, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "date + range(32)" ] @@ -520,21 +402,10 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": null, "id": "cd7a15e3", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "np.datetime64('2020-01-01T09:00')" - ] - }, - "execution_count": 13, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "np.datetime64(\"2020-01-01 09:00\")" ] @@ -565,21 +436,10 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": null, "id": "48f0e9c6", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "Timestamp('2020-02-16 00:00:00')" - ] - }, - "execution_count": 14, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "import pandas as pd\n", "\n", @@ -609,24 +469,10 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": null, "id": "462b26da", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "DatetimeIndex(['2020-02-16', '2020-02-17', '2020-02-18', '2020-02-19',\n", - " '2020-02-20', '2020-02-21', '2020-02-22', '2020-02-23',\n", - " '2020-02-24', '2020-02-25', '2020-02-26', '2020-02-27'],\n", - " dtype='datetime64[ns]', freq=None)" - ] - }, - "execution_count": 15, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "date + pd.to_timedelta(np.arange(12), \"D\")" ] @@ -643,23 +489,10 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": null, "id": "10e71325", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "DatetimeIndex(['2018-01-01', '2018-01-02', '2018-01-03', '2018-01-04',\n", - " '2018-01-05', '2018-01-06', '2018-01-07', '2018-01-08'],\n", - " dtype='datetime64[ns]', freq='D')" - ] - }, - "execution_count": 16, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "pd.date_range(start=\"2018/1/1\", end=\"2018/1/8\")" ] @@ -674,23 +507,10 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": null, "id": "291ace2c", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "DatetimeIndex(['2018-01-01 00:00:00', '2018-01-01 01:00:00',\n", - " '2018-01-01 02:00:00'],\n", - " dtype='datetime64[ns]', freq='h')" - ] - }, - "execution_count": 17, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "pd.date_range(\"2018-01-01\", periods=3, freq=\"h\")" ] @@ -705,23 +525,10 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": null, "id": "6703682c", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "DatetimeIndex(['2017-12-31 16:00:00-08:00', '2017-12-31 17:00:00-08:00',\n", - " '2017-12-31 18:00:00-08:00'],\n", - " dtype='datetime64[ns, US/Pacific]', freq='h')" - ] - }, - "execution_count": 18, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "dti = pd.date_range(\"2018-01-01\", periods=3, freq=\"h\").tz_localize(\"UTC\")\n", "dti.tz_convert(\"US/Pacific\")" @@ -737,79 +544,10 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": null, "id": "dd00df7f", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
dateVacancies (ICT), thousands
02001 MAY568
12001 JUN563
22001 JUL554
32001 AUG554
42001 SEP536
\n", - "
" - ], - "text/plain": [ - " date Vacancies (ICT), thousands\n", - "0 2001 MAY 568\n", - "1 2001 JUN 563\n", - "2 2001 JUL 554\n", - "3 2001 AUG 554\n", - "4 2001 SEP 536" - ] - }, - "execution_count": 19, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "import requests\n", "\n", @@ -834,26 +572,10 @@ }, { "cell_type": "code", - "execution_count": 20, + "execution_count": null, "id": "247d9725", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "RangeIndex: 281 entries, 0 to 280\n", - "Data columns (total 2 columns):\n", - " # Column Non-Null Count Dtype \n", - "--- ------ -------------- ----- \n", - " 0 date 281 non-null object\n", - " 1 Vacancies (ICT), thousands 281 non-null int64 \n", - "dtypes: int64(1), object(1)\n", - "memory usage: 4.5+ KB\n" - ] - } - ], + "outputs": [], "source": [ "df.info()" ] @@ -868,34 +590,10 @@ }, { "cell_type": "code", - "execution_count": 21, + "execution_count": null, "id": "b90f8038", "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/var/folders/j1/0d3dmg7d6gnbc_zwrh8t9gcw0000gn/T/ipykernel_60965/3535541307.py:1: UserWarning: Could not infer format, so each element will be parsed individually, falling back to `dateutil`. To ensure parsing is consistent and as-expected, please specify a format.\n", - " df[\"date\"] = pd.to_datetime(df[\"date\"])\n" - ] - }, - { - "data": { - "text/plain": [ - "0 2001-05-01\n", - "1 2001-06-01\n", - "2 2001-07-01\n", - "3 2001-08-01\n", - "4 2001-09-01\n", - "Name: date, dtype: datetime64[ns]" - ] - }, - "execution_count": 21, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "df[\"date\"] = pd.to_datetime(df[\"date\"])\n", "df[\"date\"].head()" @@ -913,23 +611,10 @@ }, { "cell_type": "code", - "execution_count": 22, + "execution_count": null, "id": "05d056ae", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "0 1, '19, 22\n", - "1 1, '19, 23\n", - "Name: date, dtype: object" - ] - }, - "execution_count": 22, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "small_df = pd.DataFrame({\"date\": [\"1, '19, 22\", \"1, '19, 23\"], \"values\": [\"1\", \"2\"]})\n", "small_df[\"date\"]" @@ -945,23 +630,10 @@ }, { "cell_type": "code", - "execution_count": 23, + "execution_count": null, "id": "514c9052", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "0 2019-01-22\n", - "1 2019-01-23\n", - "Name: date, dtype: datetime64[ns]" - ] - }, - "execution_count": 23, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "pd.to_datetime(small_df[\"date\"], format=\"%m, '%y, %d\")" ] @@ -978,79 +650,10 @@ }, { "cell_type": "code", - "execution_count": 24, + "execution_count": null, "id": "ac3addbc", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
dateVacancies (ICT), thousands
02001-05-31568
12001-06-30563
22001-07-31554
32001-08-31554
42001-09-30536
\n", - "
" - ], - "text/plain": [ - " date Vacancies (ICT), thousands\n", - "0 2001-05-31 568\n", - "1 2001-06-30 563\n", - "2 2001-07-31 554\n", - "3 2001-08-31 554\n", - "4 2001-09-30 536" - ] - }, - "execution_count": 24, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "df[\"date\"] = df[\"date\"] + pd.offsets.MonthEnd()\n", "df.head()" @@ -1076,38 +679,10 @@ }, { "cell_type": "code", - "execution_count": 25, + "execution_count": null, "id": "a6c3d2d9", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Using `dt.day_name()`\n", - "0 Thursday\n", - "1 Saturday\n", - "2 Tuesday\n", - "3 Friday\n", - "4 Sunday\n", - "Name: date, dtype: object\n", - "Using `dt.isocalendar()`\n", - " year week day\n", - "0 2001 22 4\n", - "1 2001 26 6\n", - "2 2001 31 2\n", - "3 2001 35 5\n", - "4 2001 39 7\n", - "Using `dt.month`\n", - "0 5\n", - "1 6\n", - "2 7\n", - "3 8\n", - "4 9\n", - "Name: date, dtype: int32\n" - ] - } - ], + "outputs": [], "source": [ "print(\"Using `dt.day_name()`\")\n", "print(df[\"date\"].dt.day_name().head())\n", @@ -1129,78 +704,10 @@ }, { "cell_type": "code", - "execution_count": 26, + "execution_count": null, "id": "e0a4f68d", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
Vacancies (ICT), thousands
date
2001-05-31568
2001-06-30563
2001-07-31554
2001-08-31554
2001-09-30536
\n", - "
" - ], - "text/plain": [ - " Vacancies (ICT), thousands\n", - "date \n", - "2001-05-31 568\n", - "2001-06-30 563\n", - "2001-07-31 554\n", - "2001-08-31 554\n", - "2001-09-30 536" - ] - }, - "execution_count": 26, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "df = df.set_index(\"date\")\n", "df.head()" @@ -1216,23 +723,10 @@ }, { "cell_type": "code", - "execution_count": 27, + "execution_count": null, "id": "acf1ae60", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "DatetimeIndex(['2001-05-31', '2001-06-30', '2001-07-31', '2001-08-31',\n", - " '2001-09-30'],\n", - " dtype='datetime64[ns]', name='date', freq=None)" - ] - }, - "execution_count": 27, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "df.index[:5]" ] @@ -1247,31 +741,10 @@ }, { "cell_type": "code", - "execution_count": 28, + "execution_count": null, "id": "9146c99d", "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/var/folders/j1/0d3dmg7d6gnbc_zwrh8t9gcw0000gn/T/ipykernel_60965/2067773505.py:1: FutureWarning: 'M' is deprecated and will be removed in a future version, please use 'ME' instead.\n", - " df = df.asfreq(\"M\")\n" - ] - }, - { - "data": { - "text/plain": [ - "DatetimeIndex(['2001-05-31', '2001-06-30', '2001-07-31', '2001-08-31',\n", - " '2001-09-30'],\n", - " dtype='datetime64[ns]', name='date', freq='ME')" - ] - }, - "execution_count": 28, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "df = df.asfreq(\"M\")\n", "df.index[:5]" @@ -1321,1352 +794,10 @@ }, { "cell_type": "code", - "execution_count": 29, + "execution_count": null, "id": "b4c5f841", "metadata": {}, - "outputs": [ - { - "data": { - "image/svg+xml": [ - "\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " 2026-04-28T13:36:02.756256\n", - " image/svg+xml\n", - " \n", - " \n", - " Matplotlib v3.10.0, https://matplotlib.org/\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "\n" - ], - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], + "outputs": [], "source": [ "df.plot();" ] @@ -2687,181 +818,10 @@ }, { "cell_type": "code", - "execution_count": 30, + "execution_count": null, "id": "e56ba5c4", "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/var/folders/j1/0d3dmg7d6gnbc_zwrh8t9gcw0000gn/T/ipykernel_60965/311401334.py:1: FutureWarning: 'A' is deprecated and will be removed in a future version, please use 'YE' instead.\n", - " df.resample(\"A\").mean()\n" - ] - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
Vacancies (ICT), thousands
date
2001-12-31540.625000
2002-12-31517.500000
2003-12-31504.166667
2004-12-31551.916667
2005-12-31544.666667
2006-12-31529.500000
2007-12-31576.333333
2008-12-31544.583333
2009-12-31402.750000
2010-12-31424.166667
2011-12-31413.250000
2012-12-31423.916667
2013-12-31480.250000
2014-12-31592.416667
2015-12-31655.166667
2016-12-31671.250000
2017-12-31704.750000
2018-12-31742.666667
2019-12-31734.166667
2020-12-31487.500000
2021-12-31843.416667
2022-12-311092.083333
2023-12-31894.500000
2024-12-31767.888889
\n", - "
" - ], - "text/plain": [ - " Vacancies (ICT), thousands\n", - "date \n", - "2001-12-31 540.625000\n", - "2002-12-31 517.500000\n", - "2003-12-31 504.166667\n", - "2004-12-31 551.916667\n", - "2005-12-31 544.666667\n", - "2006-12-31 529.500000\n", - "2007-12-31 576.333333\n", - "2008-12-31 544.583333\n", - "2009-12-31 402.750000\n", - "2010-12-31 424.166667\n", - "2011-12-31 413.250000\n", - "2012-12-31 423.916667\n", - "2013-12-31 480.250000\n", - "2014-12-31 592.416667\n", - "2015-12-31 655.166667\n", - "2016-12-31 671.250000\n", - "2017-12-31 704.750000\n", - "2018-12-31 742.666667\n", - "2019-12-31 734.166667\n", - "2020-12-31 487.500000\n", - "2021-12-31 843.416667\n", - "2022-12-31 1092.083333\n", - "2023-12-31 894.500000\n", - "2024-12-31 767.888889" - ] - }, - "execution_count": 30, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "df.resample(\"A\").mean()" ] @@ -2876,94 +836,10 @@ }, { "cell_type": "code", - "execution_count": 31, + "execution_count": null, "id": "fbbbcdff", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
Vacancies (ICT), thousands
meanstd
date
2001-12-31540.62500022.398581
2006-12-31529.55000020.434621
2011-12-31472.21666777.919796
2016-12-31564.60000099.829210
2021-12-31702.500000164.019480
\n", - "
" - ], - "text/plain": [ - " Vacancies (ICT), thousands \n", - " mean std\n", - "date \n", - "2001-12-31 540.625000 22.398581\n", - "2006-12-31 529.550000 20.434621\n", - "2011-12-31 472.216667 77.919796\n", - "2016-12-31 564.600000 99.829210\n", - "2021-12-31 702.500000 164.019480" - ] - }, - "execution_count": 31, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "df.resample(\"5YE\").agg([\"mean\", \"std\"]).head()" ] @@ -2978,111 +854,10 @@ }, { "cell_type": "code", - "execution_count": 32, + "execution_count": null, "id": "9a48a45f", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
Vacancies (ICT), thousands
date
2001-05-31568.0
2001-06-01NaN
2001-06-02NaN
2001-06-03NaN
2001-06-04NaN
......
2024-09-26NaN
2024-09-27NaN
2024-09-28NaN
2024-09-29NaN
2024-09-30727.0
\n", - "

8524 rows × 1 columns

\n", - "
" - ], - "text/plain": [ - " Vacancies (ICT), thousands\n", - "date \n", - "2001-05-31 568.0\n", - "2001-06-01 NaN\n", - "2001-06-02 NaN\n", - "2001-06-03 NaN\n", - "2001-06-04 NaN\n", - "... ...\n", - "2024-09-26 NaN\n", - "2024-09-27 NaN\n", - "2024-09-28 NaN\n", - "2024-09-29 NaN\n", - "2024-09-30 727.0\n", - "\n", - "[8524 rows x 1 columns]" - ] - }, - "execution_count": 32, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "df.resample(\"D\").asfreq()" ] @@ -3097,83 +872,10 @@ }, { "cell_type": "code", - "execution_count": 33, + "execution_count": null, "id": "d3ac1789", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
Vacancies (ICT), thousands
date
2001-05-31568.000000
2001-06-01567.833333
2001-06-02567.666667
2001-06-03567.500000
2001-06-04NaN
2001-06-05NaN
\n", - "
" - ], - "text/plain": [ - " Vacancies (ICT), thousands\n", - "date \n", - "2001-05-31 568.000000\n", - "2001-06-01 567.833333\n", - "2001-06-02 567.666667\n", - "2001-06-03 567.500000\n", - "2001-06-04 NaN\n", - "2001-06-05 NaN" - ] - }, - "execution_count": 33, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "df.resample(\"D\").interpolate(method=\"linear\", limit_direction=\"forward\", limit=3)[:6]" ] @@ -3188,26 +890,10 @@ }, { "cell_type": "code", - "execution_count": 34, + "execution_count": null, "id": "51647c56", "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "\r", - "[*********************100%***********************] 1 of 1 completed" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "\n" - ] - } - ], + "outputs": [], "source": [ "# Get stock market data\n", "import yfinance as yf\n", @@ -3219,1640 +905,20 @@ }, { "cell_type": "code", - "execution_count": 35, + "execution_count": null, "id": "ab1efae3", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
'color'
'#bc80bd'
'#fb8072'
'#b3de69'
'#fdb462'
'#fccde5'
'#8dd3c7'
'#ffed6f'
'#bebada'
'#80b1d3'
'#ccebc5'
'#d9d9d9'
" - ], - "text/plain": [ - "cycler('color', ['#bc80bd', '#fb8072', '#b3de69', '#fdb462', '#fccde5', '#8dd3c7', '#ffed6f', '#bebada', '#80b1d3', '#ccebc5', '#d9d9d9'])" - ] - }, - "execution_count": 35, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "plt.rcParams[\"axes.prop_cycle\"]" ] }, { "cell_type": "code", - "execution_count": 36, + "execution_count": null, "id": "fa0c9973", "metadata": {}, - "outputs": [ - { - "data": { - "image/svg+xml": [ - "\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " 2026-04-28T13:36:08.720742\n", - " image/svg+xml\n", - " \n", - " \n", - " Matplotlib v3.10.0, https://matplotlib.org/\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "\n" - ], - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], + "outputs": [], "source": [ "from itertools import cycle\n", "\n", @@ -4891,111 +957,10 @@ }, { "cell_type": "code", - "execution_count": 37, + "execution_count": null, "id": "1ddc4fb2", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
Vacancies (ICT), thousands
date
2001-05-31NaN
2001-06-30565.5
2001-07-31558.5
2001-08-31554.0
2001-09-30545.0
......
2024-05-31776.5
2024-06-30760.0
2024-07-31748.0
2024-08-31737.0
2024-09-30729.5
\n", - "

281 rows × 1 columns

\n", - "
" - ], - "text/plain": [ - " Vacancies (ICT), thousands\n", - "date \n", - "2001-05-31 NaN\n", - "2001-06-30 565.5\n", - "2001-07-31 558.5\n", - "2001-08-31 554.0\n", - "2001-09-30 545.0\n", - "... ...\n", - "2024-05-31 776.5\n", - "2024-06-30 760.0\n", - "2024-07-31 748.0\n", - "2024-08-31 737.0\n", - "2024-09-30 729.5\n", - "\n", - "[281 rows x 1 columns]" - ] - }, - "execution_count": 37, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "df.rolling(2).mean()" ] @@ -5018,111 +983,10 @@ }, { "cell_type": "code", - "execution_count": 38, + "execution_count": null, "id": "0ea9c8ce", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
Vacancies (ICT), thousands
date
2001-05-31568.000000
2001-06-30565.222222
2001-07-31560.622951
2001-08-31558.379404
2001-09-30551.722037
......
2024-05-31813.183347
2024-06-30801.346677
2024-07-31789.477342
2024-08-31777.981873
2024-09-30767.785499
\n", - "

281 rows × 1 columns

\n", - "
" - ], - "text/plain": [ - " Vacancies (ICT), thousands\n", - "date \n", - "2001-05-31 568.000000\n", - "2001-06-30 565.222222\n", - "2001-07-31 560.622951\n", - "2001-08-31 558.379404\n", - "2001-09-30 551.722037\n", - "... ...\n", - "2024-05-31 813.183347\n", - "2024-06-30 801.346677\n", - "2024-07-31 789.477342\n", - "2024-08-31 777.981873\n", - "2024-09-30 767.785499\n", - "\n", - "[281 rows x 1 columns]" - ] - }, - "execution_count": 38, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "df.ewm(alpha=0.2).mean()" ] @@ -5137,3078 +1001,10 @@ }, { "cell_type": "code", - "execution_count": 39, + "execution_count": null, "id": "0af7b5e6", "metadata": {}, - "outputs": [ - { - "data": { - "image/svg+xml": [ - "\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " 2026-04-28T13:36:08.899458\n", - " image/svg+xml\n", - " \n", - " \n", - " Matplotlib v3.10.0, https://matplotlib.org/\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "\n" - ], - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], + "outputs": [], "source": [ "fig, ax = plt.subplots()\n", "roll_num = 28\n", @@ -8235,2366 +1031,10 @@ }, { "cell_type": "code", - "execution_count": 40, + "execution_count": null, "id": "134199ae", "metadata": {}, - "outputs": [ - { - "data": { - "image/svg+xml": [ - "\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " 2026-04-28T13:36:08.991604\n", - " image/svg+xml\n", - " \n", - " \n", - " Matplotlib v3.10.0, https://matplotlib.org/\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "\n" - ], - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], + "outputs": [], "source": [ "roll = xf[\"Close\"].rolling(50, center=True)\n", "\n", @@ -10617,92 +1057,10 @@ }, { "cell_type": "code", - "execution_count": 41, + "execution_count": null, "id": "3078fbb4", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
Vacancies (ICT), thousandslead (12 months)lag (3 months)
date
2001-05-31568518.0NaN
2001-06-30563514.0NaN
2001-07-31554517.0NaN
2001-08-31554517.0568.0
2001-09-30536519.0563.0
\n", - "
" - ], - "text/plain": [ - " Vacancies (ICT), thousands lead (12 months) lag (3 months)\n", - "date \n", - "2001-05-31 568 518.0 NaN\n", - "2001-06-30 563 514.0 NaN\n", - "2001-07-31 554 517.0 NaN\n", - "2001-08-31 554 517.0 568.0\n", - "2001-09-30 536 519.0 563.0" - ] - }, - "execution_count": 41, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "lead = 12\n", "lag = 3\n", @@ -10714,1831 +1072,10 @@ }, { "cell_type": "code", - "execution_count": 42, + "execution_count": null, "id": "18b9afb3", "metadata": {}, - "outputs": [ - { - "data": { - "image/svg+xml": [ - "\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " 2026-04-28T13:36:09.071401\n", - " image/svg+xml\n", - " \n", - " \n", - " Matplotlib v3.10.0, https://matplotlib.org/\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "\n" - ], - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], + "outputs": [], "source": [ "df.iloc[100:300, :].plot();" ] diff --git a/exploratory-data-analysis.ipynb b/exploratory-data-analysis.ipynb index b08137e..071a96b 100644 --- a/exploratory-data-analysis.ipynb +++ b/exploratory-data-analysis.ipynb @@ -28,51 +28,10 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "id": "a3377aa6", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "\n", - "
\n", - " \n", - " " - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], + "outputs": [], "source": [ "import pandas as pd\n", "from lets_plot import *\n", @@ -85,7 +44,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": null, "id": "51a55374", "metadata": { "tags": [ @@ -143,127 +102,10 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": null, "id": "069caa7c", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
caratcutcolorclaritydepthtablepricexyz
00.23IdealESI261.555.03263.953.982.43
10.21PremiumESI159.861.03263.893.842.31
20.23GoodEVS156.965.03274.054.072.31
30.29PremiumIVS262.458.03344.204.232.63
40.31GoodJSI263.358.03354.344.352.75
\n", - "
" - ], - "text/plain": [ - " carat cut color clarity depth table price x y z\n", - "0 0.23 Ideal E SI2 61.5 55.0 326 3.95 3.98 2.43\n", - "1 0.21 Premium E SI1 59.8 61.0 326 3.89 3.84 2.31\n", - "2 0.23 Good E VS1 56.9 65.0 327 4.05 4.07 2.31\n", - "3 0.29 Premium I VS2 62.4 58.0 334 4.20 4.23 2.63\n", - "4 0.31 Good J SI2 63.3 58.0 335 4.34 4.35 2.75" - ] - }, - "execution_count": 3, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "diamonds = pd.read_csv(\n", " \"https://github.com/mwaskom/seaborn-data/raw/master/diamonds.csv\"\n", @@ -289,135 +131,10 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": null, "id": "97900f58", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - " " - ], - "text/plain": [ - "" - ] - }, - "execution_count": 4, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "(ggplot(diamonds, aes(x=\"carat\")) + geom_histogram(binwidth=0.5))" ] @@ -455,135 +172,10 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": null, "id": "20d75550", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - " " - ], - "text/plain": [ - "" - ] - }, - "execution_count": 5, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "smaller_diamonds = diamonds.query(\"carat < 3\").copy()\n", "\n", @@ -633,135 +225,10 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": null, "id": "d9d7e995", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - " " - ], - "text/plain": [ - "" - ] - }, - "execution_count": 6, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "(ggplot(diamonds, aes(x=\"y\")) + geom_histogram(binwidth=0.5))" ] @@ -777,140 +244,10 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": null, "id": "ea8f8bf3", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - " " - ], - "text/plain": [ - "" - ] - }, - "execution_count": 7, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "(\n", " ggplot(diamonds, aes(x=\"y\"))\n", @@ -932,123 +269,10 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": null, "id": "e81ffb55", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
xyzprice
119630.000.00.005139
159510.000.00.006381
240678.0958.98.0612210
245200.000.00.0012800
262430.000.00.0015686
274290.000.00.0018034
491895.1531.85.122075
495560.000.00.002130
495570.000.00.002130
\n", - "
" - ], - "text/plain": [ - " x y z price\n", - "11963 0.00 0.0 0.00 5139\n", - "15951 0.00 0.0 0.00 6381\n", - "24067 8.09 58.9 8.06 12210\n", - "24520 0.00 0.0 0.00 12800\n", - "26243 0.00 0.0 0.00 15686\n", - "27429 0.00 0.0 0.00 18034\n", - "49189 5.15 31.8 5.12 2075\n", - "49556 0.00 0.0 0.00 2130\n", - "49557 0.00 0.0 0.00 2130" - ] - }, - "execution_count": 8, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "unusual = diamonds.query(\"y < 3 or y > 20\").loc[:, [\"x\", \"y\", \"z\", \"price\"]]\n", "unusual" @@ -1120,7 +344,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": null, "id": "ecf345a7", "metadata": {}, "outputs": [], @@ -1140,135 +364,10 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": null, "id": "15a43255", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - " " - ], - "text/plain": [ - "" - ] - }, - "execution_count": 10, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "(ggplot(diamonds2, aes(x=\"x\", y=\"y\")) + geom_point())" ] @@ -1286,195 +385,10 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": null, "id": "0a4ea922", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
yearmonthdaydep_timesched_dep_timedep_delayarr_timesched_arr_timearr_delaycarrierflighttailnumorigindestair_timedistancehourminutetime_hour
0201311517.05152.0830.081911.0UA1545N14228EWRIAH227.014005152013-01-01T10:00:00Z
1201311533.05294.0850.083020.0UA1714N24211LGAIAH227.014165292013-01-01T10:00:00Z
2201311542.05402.0923.085033.0AA1141N619AAJFKMIA160.010895402013-01-01T10:00:00Z
3201311544.0545-1.01004.01022-18.0B6725N804JBJFKBQN183.015765452013-01-01T10:00:00Z
4201311554.0600-6.0812.0837-25.0DL461N668DNLGAATL116.0762602013-01-01T11:00:00Z
\n", - "
" - ], - "text/plain": [ - " year month day dep_time sched_dep_time dep_delay arr_time \\\n", - "0 2013 1 1 517.0 515 2.0 830.0 \n", - "1 2013 1 1 533.0 529 4.0 850.0 \n", - "2 2013 1 1 542.0 540 2.0 923.0 \n", - "3 2013 1 1 544.0 545 -1.0 1004.0 \n", - "4 2013 1 1 554.0 600 -6.0 812.0 \n", - "\n", - " sched_arr_time arr_delay carrier flight tailnum origin dest air_time \\\n", - "0 819 11.0 UA 1545 N14228 EWR IAH 227.0 \n", - "1 830 20.0 UA 1714 N24211 LGA IAH 227.0 \n", - "2 850 33.0 AA 1141 N619AA JFK MIA 160.0 \n", - "3 1022 -18.0 B6 725 N804JB JFK BQN 183.0 \n", - "4 837 -25.0 DL 461 N668DN LGA ATL 116.0 \n", - "\n", - " distance hour minute time_hour \n", - "0 1400 5 15 2013-01-01T10:00:00Z \n", - "1 1416 5 29 2013-01-01T10:00:00Z \n", - "2 1089 5 40 2013-01-01T10:00:00Z \n", - "3 1576 5 45 2013-01-01T10:00:00Z \n", - "4 762 6 0 2013-01-01T11:00:00Z " - ] - }, - "execution_count": 11, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "url = \"https://raw.githubusercontent.com/byuidatascience/data4python4ds/master/data-raw/flights/flights.csv\"\n", "flights = pd.read_csv(url)\n", @@ -1483,164 +397,10 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": null, "id": "6849f4d9", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - " " - ], - "text/plain": [ - "" - ] - }, - "execution_count": 12, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "flights2 = flights.assign(\n", " cancelled=lambda x: pd.isna(x[\"dep_time\"]),\n", @@ -1692,138 +452,10 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": null, "id": "e1719d8f", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - " " - ], - "text/plain": [ - "" - ] - }, - "execution_count": 13, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "(\n", " ggplot(diamonds, aes(x=\"price\"))\n", @@ -1844,140 +476,10 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": null, "id": "9388e24b", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - " " - ], - "text/plain": [ - "" - ] - }, - "execution_count": 14, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "(\n", " ggplot(diamonds, aes(x=\"price\"))\n", @@ -1997,163 +499,10 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": null, "id": "a3f333a6", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - " " - ], - "text/plain": [ - "" - ] - }, - "execution_count": 15, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "(ggplot(diamonds, aes(x=\"cut\", y=\"price\")) + geom_boxplot())" ] @@ -2172,150 +521,10 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": null, "id": "6949db81", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - " " - ], - "text/plain": [ - "" - ] - }, - "execution_count": 16, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "mpg = pd.read_csv(\n", " \"https://vincentarelbundock.github.io/Rdatasets/csv/ggplot2/mpg.csv\", index_col=0\n", @@ -2335,168 +544,10 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": null, "id": "a5b1ed09", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - " " - ], - "text/plain": [ - "" - ] - }, - "execution_count": 17, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "(ggplot(mpg) + geom_boxplot(aes(as_discrete(\"class\", order_by=\"..middle..\"), \"hwy\")))" ] @@ -2511,172 +562,10 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": null, "id": "920a4268", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - " " - ], - "text/plain": [ - "" - ] - }, - "execution_count": 18, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "(\n", " ggplot(mpg)\n", @@ -2719,7 +608,7 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": null, "id": "68d330d2", "metadata": {}, "outputs": [], @@ -2741,112 +630,10 @@ }, { "cell_type": "code", - "execution_count": 20, + "execution_count": null, "id": "e858cd22", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - " " - ], - "text/plain": [ - "" - ] - }, - "execution_count": 20, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "(ggplot(ct_cut_color, aes(x=\"color\", y=\"cut\")) + geom_tile(aes(fill=\"value\")))" ] @@ -2880,135 +667,10 @@ }, { "cell_type": "code", - "execution_count": 21, + "execution_count": null, "id": "2afe2535", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - " " - ], - "text/plain": [ - "" - ] - }, - "execution_count": 21, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "(ggplot(smaller_diamonds, aes(x=\"carat\", y=\"price\")) + geom_point())" ] @@ -3026,136 +688,10 @@ }, { "cell_type": "code", - "execution_count": 22, + "execution_count": null, "id": "b55707a9", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - " " - ], - "text/plain": [ - "" - ] - }, - "execution_count": 22, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "(ggplot(smaller_diamonds, aes(x=\"carat\", y=\"price\")) + geom_point(alpha=1 / 20))" ] @@ -3184,152 +720,10 @@ }, { "cell_type": "code", - "execution_count": 23, + "execution_count": null, "id": "13079065", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
caratdepthtablepricexyz
count53940.00000053940.00000053940.00000053940.00000053940.00000053940.00000053940.000000
mean0.79794061.74940557.4571843932.7997225.7311575.7345263.538734
std0.4740111.4326212.2344913989.4397381.1217611.1421350.705699
min0.20000043.00000043.000000326.0000000.0000000.0000000.000000
25%0.40000061.00000056.000000950.0000004.7100004.7200002.910000
50%0.70000061.80000057.0000002401.0000005.7000005.7100003.530000
75%1.04000062.50000059.0000005324.2500006.5400006.5400004.040000
max5.01000079.00000095.00000018823.00000010.74000058.90000031.800000
\n", - "
" - ], - "text/plain": [ - " carat depth table price x \\\n", - "count 53940.000000 53940.000000 53940.000000 53940.000000 53940.000000 \n", - "mean 0.797940 61.749405 57.457184 3932.799722 5.731157 \n", - "std 0.474011 1.432621 2.234491 3989.439738 1.121761 \n", - "min 0.200000 43.000000 43.000000 326.000000 0.000000 \n", - "25% 0.400000 61.000000 56.000000 950.000000 4.710000 \n", - "50% 0.700000 61.800000 57.000000 2401.000000 5.700000 \n", - "75% 1.040000 62.500000 59.000000 5324.250000 6.540000 \n", - "max 5.010000 79.000000 95.000000 18823.000000 10.740000 \n", - "\n", - " y z \n", - "count 53940.000000 53940.000000 \n", - "mean 5.734526 3.538734 \n", - "std 1.142135 0.705699 \n", - "min 0.000000 0.000000 \n", - "25% 4.720000 2.910000 \n", - "50% 5.710000 3.530000 \n", - "75% 6.540000 4.040000 \n", - "max 58.900000 31.800000 " - ] - }, - "execution_count": 23, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "diamonds.describe()" ] @@ -3344,142 +738,10 @@ }, { "cell_type": "code", - "execution_count": 24, + "execution_count": null, "id": "b4144440", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
caratdepthtablepricexyz
count53940.053940.053940.053940.053940.053940.053940.0
mean0.861.757.53932.85.75.73.5
std0.51.42.23989.41.11.10.7
min0.243.043.0326.00.00.00.0
25%0.461.056.0950.04.74.72.9
50%0.761.857.02401.05.75.73.5
75%1.062.559.05324.26.56.54.0
max5.079.095.018823.010.758.931.8
\n", - "
" - ], - "text/plain": [ - " carat depth table price x y z\n", - "count 53940.0 53940.0 53940.0 53940.0 53940.0 53940.0 53940.0\n", - "mean 0.8 61.7 57.5 3932.8 5.7 5.7 3.5\n", - "std 0.5 1.4 2.2 3989.4 1.1 1.1 0.7\n", - "min 0.2 43.0 43.0 326.0 0.0 0.0 0.0\n", - "25% 0.4 61.0 56.0 950.0 4.7 4.7 2.9\n", - "50% 0.7 61.8 57.0 2401.0 5.7 5.7 3.5\n", - "75% 1.0 62.5 59.0 5324.2 6.5 6.5 4.0\n", - "max 5.0 79.0 95.0 18823.0 10.7 58.9 31.8" - ] - }, - "execution_count": 24, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "sum_table = diamonds.describe().round(1)\n", "sum_table" @@ -3495,139 +757,10 @@ }, { "cell_type": "code", - "execution_count": 25, + "execution_count": null, "id": "cd2f8772", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
countmeanstdmin25%50%75%max
carat53940.00.80.50.20.40.71.05.0
depth53940.061.71.443.061.061.862.579.0
table53940.057.52.243.056.057.059.095.0
price53940.03932.83989.4326.0950.02401.05324.218823.0
x53940.05.71.10.04.75.76.510.7
y53940.05.71.10.04.75.76.558.9
z53940.03.50.70.02.93.54.031.8
\n", - "
" - ], - "text/plain": [ - " count mean std min 25% 50% 75% max\n", - "carat 53940.0 0.8 0.5 0.2 0.4 0.7 1.0 5.0\n", - "depth 53940.0 61.7 1.4 43.0 61.0 61.8 62.5 79.0\n", - "table 53940.0 57.5 2.2 43.0 56.0 57.0 59.0 95.0\n", - "price 53940.0 3932.8 3989.4 326.0 950.0 2401.0 5324.2 18823.0\n", - "x 53940.0 5.7 1.1 0.0 4.7 5.7 6.5 10.7\n", - "y 53940.0 5.7 1.1 0.0 4.7 5.7 6.5 58.9\n", - "z 53940.0 3.5 0.7 0.0 2.9 3.5 4.0 31.8" - ] - }, - "execution_count": 25, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "sum_table = sum_table.T\n", "sum_table" @@ -3647,110 +780,10 @@ }, { "cell_type": "code", - "execution_count": 26, + "execution_count": null, "id": "5afcacbc", "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/var/folders/j1/0d3dmg7d6gnbc_zwrh8t9gcw0000gn/T/ipykernel_58745/1534868389.py:2: FutureWarning: The default of observed=False is deprecated and will be changed to True in a future version of pandas. Pass observed=False to retain current behavior or observed=True to adopt the future default and silence this warning.\n", - " diamonds.groupby([\"cut\", \"color\"])[\"price\"]\n" - ] - }, - { - "data": { - "text/html": [ - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
Sale price (thousands)
colorDEFGHIJ
cut       
Fair4.293.683.834.245.144.694.98
Good3.413.423.504.124.285.084.57
Very Good3.473.213.783.874.545.265.10
Premium3.633.544.324.505.225.956.29
Ideal2.632.603.373.723.894.454.92
\n" - ], - "text/plain": [ - "" - ] - }, - "execution_count": 26, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "(\n", " diamonds.groupby([\"cut\", \"color\"])[\"price\"]\n", @@ -3777,203 +810,10 @@ }, { "cell_type": "code", - "execution_count": 27, + "execution_count": null, "id": "21e65189", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
cutFairGoodVery GoodPremiumIdeal
color     
D163662151316032834
E224933240023373903
F312909216423313826
G314871229929244884
H303702182423603115
I175522120414282093
J119307678808896
\n" - ], - "text/plain": [ - "" - ] - }, - "execution_count": 27, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "pd.crosstab(diamonds[\"color\"], diamonds[\"cut\"]).style.background_gradient(cmap=\"plasma\")" ] @@ -3996,223 +836,10 @@ }, { "cell_type": "code", - "execution_count": 28, + "execution_count": null, "id": "bb0162ba", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
cutFairGoodVery GoodPremiumIdeal
color     
D163662151316032834
E224933240023373903
F312909216423313826
G314871229929244884
H303702182423603115
I175522120414282093
J119307678808896
\n" - ], - "text/plain": [ - "" - ] - }, - "execution_count": 28, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "(\n", " pd.crosstab(diamonds[\"color\"], diamonds[\"cut\"])\n", @@ -4231,106 +858,10 @@ }, { "cell_type": "code", - "execution_count": 29, + "execution_count": null, "id": "5d19072c", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
cutFairGoodVery GoodPremiumIdeal
color     
D163662151316032834
E224933240023373903
F312909216423313826
G314871229929244884
H303702182423603115
I175522120414282093
J119307678808896
\n" - ], - "text/plain": [ - "" - ] - }, - "execution_count": 29, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "pd.crosstab(diamonds[\"color\"], diamonds[\"cut\"]).style.highlight_max().format(\"{:.0f}\")" ] @@ -4355,165 +886,10 @@ }, { "cell_type": "code", - "execution_count": 30, + "execution_count": null, "id": "b479d5b1", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
pickupdropoffpassengersdistancefaretiptollstotalcolorpaymentpickup_zonedropoff_zonepickup_boroughdropoff_borough
02019-03-23 20:21:092019-03-23 20:27:2411.607.02.150.012.95yellowcredit cardLenox Hill WestUN/Turtle Bay SouthManhattanManhattan
12019-03-04 16:11:552019-03-04 16:19:0010.795.00.000.09.30yellowcashUpper West Side SouthUpper West Side SouthManhattanManhattan
22019-03-27 17:53:012019-03-27 18:00:2511.377.52.360.014.16yellowcredit cardAlphabet CityWest VillageManhattanManhattan
32019-03-10 01:23:592019-03-10 01:49:5117.7027.06.150.036.95yellowcredit cardHudson SqYorkville WestManhattanManhattan
42019-03-30 13:27:422019-03-30 13:37:1432.169.01.100.013.40yellowcredit cardMidtown EastYorkville WestManhattanManhattan
\n", - "
" - ], - "text/plain": [ - " pickup dropoff passengers distance fare tip \\\n", - "0 2019-03-23 20:21:09 2019-03-23 20:27:24 1 1.60 7.0 2.15 \n", - "1 2019-03-04 16:11:55 2019-03-04 16:19:00 1 0.79 5.0 0.00 \n", - "2 2019-03-27 17:53:01 2019-03-27 18:00:25 1 1.37 7.5 2.36 \n", - "3 2019-03-10 01:23:59 2019-03-10 01:49:51 1 7.70 27.0 6.15 \n", - "4 2019-03-30 13:27:42 2019-03-30 13:37:14 3 2.16 9.0 1.10 \n", - "\n", - " tolls total color payment pickup_zone \\\n", - "0 0.0 12.95 yellow credit card Lenox Hill West \n", - "1 0.0 9.30 yellow cash Upper West Side South \n", - "2 0.0 14.16 yellow credit card Alphabet City \n", - "3 0.0 36.95 yellow credit card Hudson Sq \n", - "4 0.0 13.40 yellow credit card Midtown East \n", - "\n", - " dropoff_zone pickup_borough dropoff_borough \n", - "0 UN/Turtle Bay South Manhattan Manhattan \n", - "1 Upper West Side South Manhattan Manhattan \n", - "2 West Village Manhattan Manhattan \n", - "3 Yorkville West Manhattan Manhattan \n", - "4 Yorkville West Manhattan Manhattan " - ] - }, - "execution_count": 30, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "taxis = pd.read_csv(\"https://github.com/mwaskom/seaborn-data/raw/master/taxis.csv\")\n", "# turn the pickup time column into a datetime\n", @@ -4536,1289 +912,20 @@ }, { "cell_type": "code", - "execution_count": 31, + "execution_count": null, "id": "ee971c9c", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "RangeIndex: 6433 entries, 0 to 6432\n", - "Data columns (total 14 columns):\n", - " # Column Non-Null Count Dtype \n", - "--- ------ -------------- ----- \n", - " 0 pickup 6433 non-null datetime64[ns]\n", - " 1 dropoff 6433 non-null datetime64[ns]\n", - " 2 passengers 6433 non-null int64 \n", - " 3 distance 6433 non-null float64 \n", - " 4 fare 6433 non-null float64 \n", - " 5 tip 6433 non-null float64 \n", - " 6 tolls 6433 non-null float64 \n", - " 7 total 6433 non-null float64 \n", - " 8 color 6433 non-null category \n", - " 9 payment 6389 non-null category \n", - " 10 pickup_zone 6407 non-null string \n", - " 11 dropoff_zone 6388 non-null string \n", - " 12 pickup_borough 6407 non-null category \n", - " 13 dropoff_borough 6388 non-null category \n", - "dtypes: category(4), datetime64[ns](2), float64(5), int64(1), string(2)\n", - "memory usage: 528.5 KB\n" - ] - } - ], + "outputs": [], "source": [ "taxis.info()" ] }, { "cell_type": "code", - "execution_count": 32, + "execution_count": null, "id": "2015b1dc", "metadata": {}, - "outputs": [ - { - "data": { - "image/svg+xml": [ - "\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " 2026-04-28T13:34:43.401430\n", - " image/svg+xml\n", - " \n", - " \n", - " Matplotlib v3.10.0, https://matplotlib.org/\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "\n" - ], - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], + "outputs": [], "source": [ "(\n", " taxis.set_index(\"pickup\")\n", @@ -5842,1476 +949,10 @@ }, { "cell_type": "code", - "execution_count": 33, + "execution_count": null, "id": "51e86185", "metadata": {}, - "outputs": [ - { - "data": { - "image/svg+xml": [ - "\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " 2026-04-28T13:34:43.501760\n", - " image/svg+xml\n", - " \n", - " \n", - " Matplotlib v3.10.0, https://matplotlib.org/\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "\n" - ], - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], + "outputs": [], "source": [ "(\n", " taxis.set_index(\"pickup\")\n", @@ -7344,888 +985,10 @@ }, { "cell_type": "code", - "execution_count": 34, + "execution_count": null, "id": "79ceca92", "metadata": {}, - "outputs": [ - { - "data": { - "image/svg+xml": [ - "\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " 2026-04-28T13:34:43.567782\n", - " image/svg+xml\n", - " \n", - " \n", - " Matplotlib v3.10.0, https://matplotlib.org/\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "\n" - ], - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], + "outputs": [], "source": [ "taxis.value_counts(\"payment\").sort_index().plot.bar(title=\"Counts\", rot=0);" ] @@ -8240,1011 +1003,10 @@ }, { "cell_type": "code", - "execution_count": 35, + "execution_count": null, "id": "5efc5817", "metadata": {}, - "outputs": [ - { - "data": { - "image/svg+xml": [ - "\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " 2026-04-28T13:34:43.648471\n", - " image/svg+xml\n", - " \n", - " \n", - " Matplotlib v3.10.0, https://matplotlib.org/\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "\n" - ], - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], + "outputs": [], "source": [ "taxis[\"tip\"].plot.hist(bins=30, title=\"Tip\");" ] @@ -9259,2055 +1021,10 @@ }, { "cell_type": "code", - "execution_count": 36, + "execution_count": null, "id": "0b735d15", "metadata": {}, - "outputs": [ - { - "data": { - "image/svg+xml": [ - "\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " 2026-04-28T13:34:43.718910\n", - " image/svg+xml\n", - " \n", - " \n", - " Matplotlib v3.10.0, https://matplotlib.org/\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "\n" - ], - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], + "outputs": [], "source": [ "(taxis[[\"fare\", \"tolls\", \"tip\"]].plot.box());" ] @@ -11322,7210 +1039,10 @@ }, { "cell_type": "code", - "execution_count": 37, + "execution_count": null, "id": "66adada2", "metadata": {}, - "outputs": [ - { - "data": { - "image/svg+xml": [ - "\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " 2026-04-28T13:34:43.811014\n", - " image/svg+xml\n", - " \n", - " \n", - " Matplotlib v3.10.0, https://matplotlib.org/\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "\n" - ], - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], + "outputs": [], "source": [ "taxis.plot.scatter(x=\"fare\", y=\"tip\", alpha=0.7, ylim=(0, None));" ] @@ -18554,132 +1071,10 @@ }, { "cell_type": "code", - "execution_count": 38, + "execution_count": null, "id": "32796b5f", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
╭──────────────────────────────────────────────── skimpy summary ─────────────────────────────────────────────────╮\n",
-       "│          Data Summary                Data Types               Categories                                        │\n",
-       "│ ┏━━━━━━━━━━━━━━━━━━━┳━━━━━━━━┓ ┏━━━━━━━━━━━━━┳━━━━━━━┓ ┏━━━━━━━━━━━━━━━━━━━━━━━┓                                │\n",
-       "│ ┃ Dataframe          Values ┃ ┃ Column Type  Count ┃ ┃ Categorical Variables ┃                                │\n",
-       "│ ┡━━━━━━━━━━━━━━━━━━━╇━━━━━━━━┩ ┡━━━━━━━━━━━━━╇━━━━━━━┩ ┡━━━━━━━━━━━━━━━━━━━━━━━┩                                │\n",
-       "│ │ Number of rows    │ 6433   │ │ float64     │ 5     │ │ color                 │                                │\n",
-       "│ │ Number of columns │ 14     │ │ category    │ 4     │ │ payment               │                                │\n",
-       "│ └───────────────────┴────────┘ │ datetime64  │ 2     │ │ pickup_borough        │                                │\n",
-       "│                                │ string      │ 2     │ │ dropoff_borough       │                                │\n",
-       "│                                │ int64       │ 1     │ └───────────────────────┘                                │\n",
-       "│                                └─────────────┴───────┘                                                          │\n",
-       "│                                                     number                                                      │\n",
-       "│ ┏━━━━━━━━━━━━━━━━┳━━━━━━┳━━━━━━━━┳━━━━━━━━━━━┳━━━━━━━━━┳━━━━━━━┳━━━━━━━━┳━━━━━━━━━┳━━━━━━━┳━━━━━━━━┳━━━━━━━━━┓  │\n",
-       "│ ┃ column          NA    NA %    mean       sd       p0     p25     p50      p75    p100    hist    ┃  │\n",
-       "│ ┡━━━━━━━━━━━━━━━━╇━━━━━━╇━━━━━━━━╇━━━━━━━━━━━╇━━━━━━━━━╇━━━━━━━╇━━━━━━━━╇━━━━━━━━━╇━━━━━━━╇━━━━━━━━╇━━━━━━━━━┩  │\n",
-       "│ │ passengers       0     0    1.539  1.204    0     1      1    2     6 █▁  ▁  │  │\n",
-       "│ │ distance         0     0    3.025  3.828    0  0.98   1.64 3.21  36.7  █▁    │  │\n",
-       "│ │ fare             0     0    13.09  11.55    1   6.5    9.5   15   150  █▁    │  │\n",
-       "│ │ tip              0     0    1.979  2.449    0     0    1.7  2.8  33.2 │  │\n",
-       "│ │ tolls            0     0   0.3253  1.415    0     0      0    0 24.02 │  │\n",
-       "│ │ total            0     0    18.52  13.82  1.3  10.8  14.16 20.3 174.8  █▁    │  │\n",
-       "│ └────────────────┴──────┴────────┴───────────┴─────────┴───────┴────────┴─────────┴───────┴────────┴─────────┘  │\n",
-       "│                                                    category                                                     │\n",
-       "│ ┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓  │\n",
-       "│ ┃ column                         NA      NA %                                ordered         unique      ┃  │\n",
-       "│ ┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩  │\n",
-       "│ │ color                             0                                 0False                   2 │  │\n",
-       "│ │ payment                          44                0.6839732628633608False                   3 │  │\n",
-       "│ │ pickup_borough                   26                0.4041660189647132False                   5 │  │\n",
-       "│ │ dropoff_borough                  45                 0.699518109746619False                   6 │  │\n",
-       "│ └───────────────────────────────┴────────┴────────────────────────────────────┴────────────────┴─────────────┘  │\n",
-       "│                                                    datetime                                                     │\n",
-       "│ ┏━━━━━━━━━━━━━┳━━━━━━┳━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┓  │\n",
-       "│ ┃ column       NA    NA %     first                         last                          frequency     ┃  │\n",
-       "│ ┡━━━━━━━━━━━━━╇━━━━━━╇━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━┩  │\n",
-       "│ │ pickup        0      0    2019-02-28 23:29:03         2019-03-31 23:43:45     None          │  │\n",
-       "│ │ dropoff       0      0    2019-02-28 23:32:35         2019-04-01 00:13:58     None          │  │\n",
-       "│ └─────────────┴──────┴─────────┴──────────────────────────────┴──────────────────────────────┴───────────────┘  │\n",
-       "│                                                     string                                                      │\n",
-       "│ ┏━━━━━━━━━━━┳━━━━┳━━━━━━━━━━━┳━━━━━━━━━━┳━━━━━━━━━━━┳━━━━━━━━━━┳━━━━━━━━━━━┳━━━━━━━━━━┳━━━━━━━━━━━┳━━━━━━━━━━┓  │\n",
-       "│ ┃                                                                     chars     words per  total    ┃  │\n",
-       "│ ┃ column     NA  NA %       shortest  longest    min       max        per row   row        words    ┃  │\n",
-       "│ ┡━━━━━━━━━━━╇━━━━╇━━━━━━━━━━━╇━━━━━━━━━━╇━━━━━━━━━━━╇━━━━━━━━━━╇━━━━━━━━━━━╇━━━━━━━━━━╇━━━━━━━━━━━╇━━━━━━━━━━┩  │\n",
-       "│ │ pickup_zo260.4041660SoHo    RiverdaleAllertonYorkville    16.2      2.5   15791 │  │\n",
-       "│ │ ne        │    │ 189647132 │          │ /North   /Pelham West      │          │           │          │  │\n",
-       "│ │           │    │           │          │ RiverdaleGardens  │           │          │           │          │  │\n",
-       "│ │           │    │           │          │ /Fieldsto │          │           │          │           │          │  │\n",
-       "│ │           │    │           │          │ n         │          │           │          │           │          │  │\n",
-       "│ │ dropoff_z450.6995181SoHo    RiverdaleAllertonYorkville    16.3      2.5   15851 │  │\n",
-       "│ │ one       │    │  09746619 │          │ /North   /Pelham West      │          │           │          │  │\n",
-       "│ │           │    │           │          │ RiverdaleGardens  │           │          │           │          │  │\n",
-       "│ │           │    │           │          │ /Fieldsto │          │           │          │           │          │  │\n",
-       "│ │           │    │           │          │ n         │          │           │          │           │          │  │\n",
-       "│ └───────────┴────┴───────────┴──────────┴───────────┴──────────┴───────────┴──────────┴───────────┴──────────┘  │\n",
-       "╰────────────────────────────────────────────────────── End ──────────────────────────────────────────────────────╯\n",
-       "
\n" - ], - "text/plain": [ - "╭──────────────────────────────────────────────── skimpy summary ─────────────────────────────────────────────────╮\n", - "│ \u001b[3m Data Summary \u001b[0m \u001b[3m Data Types \u001b[0m \u001b[3m Categories \u001b[0m │\n", - "│ ┏━━━━━━━━━━━━━━━━━━━┳━━━━━━━━┓ ┏━━━━━━━━━━━━━┳━━━━━━━┓ ┏━━━━━━━━━━━━━━━━━━━━━━━┓ │\n", - "│ ┃\u001b[1;36m \u001b[0m\u001b[1;36mDataframe \u001b[0m\u001b[1;36m \u001b[0m┃\u001b[1;36m \u001b[0m\u001b[1;36mValues\u001b[0m\u001b[1;36m \u001b[0m┃ ┃\u001b[1;36m \u001b[0m\u001b[1;36mColumn Type\u001b[0m\u001b[1;36m \u001b[0m┃\u001b[1;36m \u001b[0m\u001b[1;36mCount\u001b[0m\u001b[1;36m \u001b[0m┃ ┃\u001b[1;36m \u001b[0m\u001b[1;36mCategorical Variables\u001b[0m\u001b[1;36m \u001b[0m┃ │\n", - "│ ┡━━━━━━━━━━━━━━━━━━━╇━━━━━━━━┩ ┡━━━━━━━━━━━━━╇━━━━━━━┩ ┡━━━━━━━━━━━━━━━━━━━━━━━┩ │\n", - "│ │ Number of rows │ 6433 │ │ float64 │ 5 │ │ color │ │\n", - "│ │ Number of columns │ 14 │ │ category │ 4 │ │ payment │ │\n", - "│ └───────────────────┴────────┘ │ datetime64 │ 2 │ │ pickup_borough │ │\n", - "│ │ string │ 2 │ │ dropoff_borough │ │\n", - "│ │ int64 │ 1 │ └───────────────────────┘ │\n", - "│ └─────────────┴───────┘ │\n", - "│ \u001b[3m number \u001b[0m │\n", - "│ ┏━━━━━━━━━━━━━━━━┳━━━━━━┳━━━━━━━━┳━━━━━━━━━━━┳━━━━━━━━━┳━━━━━━━┳━━━━━━━━┳━━━━━━━━━┳━━━━━━━┳━━━━━━━━┳━━━━━━━━━┓ │\n", - "│ ┃\u001b[1m \u001b[0m\u001b[1mcolumn \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mNA \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mNA % \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mmean \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1msd \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mp0 \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mp25 \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mp50 \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mp75 \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mp100 \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mhist \u001b[0m\u001b[1m \u001b[0m┃ │\n", - "│ ┡━━━━━━━━━━━━━━━━╇━━━━━━╇━━━━━━━━╇━━━━━━━━━━━╇━━━━━━━━━╇━━━━━━━╇━━━━━━━━╇━━━━━━━━━╇━━━━━━━╇━━━━━━━━╇━━━━━━━━━┩ │\n", - "│ │ \u001b[38;5;141mpassengers \u001b[0m │ \u001b[36m 0\u001b[0m │ \u001b[36m 0\u001b[0m │ \u001b[36m 1.539\u001b[0m │ \u001b[36m 1.204\u001b[0m │ \u001b[36m 0\u001b[0m │ \u001b[36m 1\u001b[0m │ \u001b[36m 1\u001b[0m │ \u001b[36m 2\u001b[0m │ \u001b[36m 6\u001b[0m │ \u001b[32m █▁ ▁ \u001b[0m │ │\n", - "│ │ \u001b[38;5;141mdistance \u001b[0m │ \u001b[36m 0\u001b[0m │ \u001b[36m 0\u001b[0m │ \u001b[36m 3.025\u001b[0m │ \u001b[36m 3.828\u001b[0m │ \u001b[36m 0\u001b[0m │ \u001b[36m 0.98\u001b[0m │ \u001b[36m 1.64\u001b[0m │ \u001b[36m 3.21\u001b[0m │ \u001b[36m 36.7\u001b[0m │ \u001b[32m █▁ \u001b[0m │ │\n", - "│ │ \u001b[38;5;141mfare \u001b[0m │ \u001b[36m 0\u001b[0m │ \u001b[36m 0\u001b[0m │ \u001b[36m 13.09\u001b[0m │ \u001b[36m 11.55\u001b[0m │ \u001b[36m 1\u001b[0m │ \u001b[36m 6.5\u001b[0m │ \u001b[36m 9.5\u001b[0m │ \u001b[36m 15\u001b[0m │ \u001b[36m 150\u001b[0m │ \u001b[32m █▁ \u001b[0m │ │\n", - "│ │ \u001b[38;5;141mtip \u001b[0m │ \u001b[36m 0\u001b[0m │ \u001b[36m 0\u001b[0m │ \u001b[36m 1.979\u001b[0m │ \u001b[36m 2.449\u001b[0m │ \u001b[36m 0\u001b[0m │ \u001b[36m 0\u001b[0m │ \u001b[36m 1.7\u001b[0m │ \u001b[36m 2.8\u001b[0m │ \u001b[36m 33.2\u001b[0m │ \u001b[32m █ \u001b[0m │ │\n", - "│ │ \u001b[38;5;141mtolls \u001b[0m │ \u001b[36m 0\u001b[0m │ \u001b[36m 0\u001b[0m │ \u001b[36m 0.3253\u001b[0m │ \u001b[36m 1.415\u001b[0m │ \u001b[36m 0\u001b[0m │ \u001b[36m 0\u001b[0m │ \u001b[36m 0\u001b[0m │ \u001b[36m 0\u001b[0m │ \u001b[36m 24.02\u001b[0m │ \u001b[32m █ \u001b[0m │ │\n", - "│ │ \u001b[38;5;141mtotal \u001b[0m │ \u001b[36m 0\u001b[0m │ \u001b[36m 0\u001b[0m │ \u001b[36m 18.52\u001b[0m │ \u001b[36m 13.82\u001b[0m │ \u001b[36m 1.3\u001b[0m │ \u001b[36m 10.8\u001b[0m │ \u001b[36m 14.16\u001b[0m │ \u001b[36m 20.3\u001b[0m │ \u001b[36m 174.8\u001b[0m │ \u001b[32m █▁ \u001b[0m │ │\n", - "│ └────────────────┴──────┴────────┴───────────┴─────────┴───────┴────────┴─────────┴───────┴────────┴─────────┘ │\n", - "│ \u001b[3m category \u001b[0m │\n", - "│ ┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓ │\n", - "│ ┃\u001b[1m \u001b[0m\u001b[1mcolumn \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mNA \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mNA % \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mordered \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1munique \u001b[0m\u001b[1m \u001b[0m┃ │\n", - "│ ┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩ │\n", - "│ │ \u001b[38;5;141mcolor \u001b[0m │ \u001b[36m 0\u001b[0m │ \u001b[36m 0\u001b[0m │ \u001b[38;5;45mFalse \u001b[0m │ \u001b[36m 2\u001b[0m │ │\n", - "│ │ \u001b[38;5;141mpayment \u001b[0m │ \u001b[36m 44\u001b[0m │ \u001b[36m 0.6839732628633608\u001b[0m │ \u001b[38;5;45mFalse \u001b[0m │ \u001b[36m 3\u001b[0m │ │\n", - "│ │ \u001b[38;5;141mpickup_borough \u001b[0m │ \u001b[36m 26\u001b[0m │ \u001b[36m 0.4041660189647132\u001b[0m │ \u001b[38;5;45mFalse \u001b[0m │ \u001b[36m 5\u001b[0m │ │\n", - "│ │ \u001b[38;5;141mdropoff_borough \u001b[0m │ \u001b[36m 45\u001b[0m │ \u001b[36m 0.699518109746619\u001b[0m │ \u001b[38;5;45mFalse \u001b[0m │ \u001b[36m 6\u001b[0m │ │\n", - "│ └───────────────────────────────┴────────┴────────────────────────────────────┴────────────────┴─────────────┘ │\n", - "│ \u001b[3m datetime \u001b[0m │\n", - "│ ┏━━━━━━━━━━━━━┳━━━━━━┳━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┓ │\n", - "│ ┃\u001b[1m \u001b[0m\u001b[1mcolumn \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mNA \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mNA % \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mfirst \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mlast \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mfrequency \u001b[0m\u001b[1m \u001b[0m┃ │\n", - "│ ┡━━━━━━━━━━━━━╇━━━━━━╇━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━┩ │\n", - "│ │ \u001b[38;5;141mpickup \u001b[0m │ \u001b[36m 0\u001b[0m │ \u001b[36m 0\u001b[0m │ \u001b[31m 2019-02-28 23:29:03 \u001b[0m │ \u001b[31m 2019-03-31 23:43:45 \u001b[0m │ \u001b[38;5;141mNone \u001b[0m │ │\n", - "│ │ \u001b[38;5;141mdropoff \u001b[0m │ \u001b[36m 0\u001b[0m │ \u001b[36m 0\u001b[0m │ \u001b[31m 2019-02-28 23:32:35 \u001b[0m │ \u001b[31m 2019-04-01 00:13:58 \u001b[0m │ \u001b[38;5;141mNone \u001b[0m │ │\n", - "│ └─────────────┴──────┴─────────┴──────────────────────────────┴──────────────────────────────┴───────────────┘ │\n", - "│ \u001b[3m string \u001b[0m │\n", - "│ ┏━━━━━━━━━━━┳━━━━┳━━━━━━━━━━━┳━━━━━━━━━━┳━━━━━━━━━━━┳━━━━━━━━━━┳━━━━━━━━━━━┳━━━━━━━━━━┳━━━━━━━━━━━┳━━━━━━━━━━┓ │\n", - "│ ┃\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mchars \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mwords per\u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mtotal \u001b[0m\u001b[1m \u001b[0m┃ │\n", - "│ ┃\u001b[1m \u001b[0m\u001b[1mcolumn \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mNA\u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mNA % \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mshortest\u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mlongest \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mmin \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mmax \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mper row \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mrow \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mwords \u001b[0m\u001b[1m \u001b[0m┃ │\n", - "│ ┡━━━━━━━━━━━╇━━━━╇━━━━━━━━━━━╇━━━━━━━━━━╇━━━━━━━━━━━╇━━━━━━━━━━╇━━━━━━━━━━━╇━━━━━━━━━━╇━━━━━━━━━━━╇━━━━━━━━━━┩ │\n", - "│ │ \u001b[38;5;141mpickup_zo\u001b[0m │ \u001b[36m26\u001b[0m │ \u001b[36m0.4041660\u001b[0m │ \u001b[38;5;141mSoHo \u001b[0m │ \u001b[38;5;141mRiverdale\u001b[0m │ \u001b[38;5;141mAllerton\u001b[0m │ \u001b[38;5;141mYorkville\u001b[0m │ \u001b[36m 16.2\u001b[0m │ \u001b[36m 2.5\u001b[0m │ \u001b[36m 15791\u001b[0m │ │\n", - "│ │ \u001b[38;5;141mne \u001b[0m │ │ \u001b[36m189647132\u001b[0m │ │ \u001b[38;5;141m/North \u001b[0m │ \u001b[38;5;141m/Pelham \u001b[0m │ \u001b[38;5;141mWest \u001b[0m │ │ │ │ │\n", - "│ │ │ │ │ │ \u001b[38;5;141mRiverdale\u001b[0m │ \u001b[38;5;141mGardens \u001b[0m │ │ │ │ │ │\n", - "│ │ │ │ │ │ \u001b[38;5;141m/Fieldsto\u001b[0m │ │ │ │ │ │ │\n", - "│ │ │ │ │ │ \u001b[38;5;141mn \u001b[0m │ │ │ │ │ │ │\n", - "│ │ \u001b[38;5;141mdropoff_z\u001b[0m │ \u001b[36m45\u001b[0m │ \u001b[36m0.6995181\u001b[0m │ \u001b[38;5;141mSoHo \u001b[0m │ \u001b[38;5;141mRiverdale\u001b[0m │ \u001b[38;5;141mAllerton\u001b[0m │ \u001b[38;5;141mYorkville\u001b[0m │ \u001b[36m 16.3\u001b[0m │ \u001b[36m 2.5\u001b[0m │ \u001b[36m 15851\u001b[0m │ │\n", - "│ │ \u001b[38;5;141mone \u001b[0m │ │ \u001b[36m 09746619\u001b[0m │ │ \u001b[38;5;141m/North \u001b[0m │ \u001b[38;5;141m/Pelham \u001b[0m │ \u001b[38;5;141mWest \u001b[0m │ │ │ │ │\n", - "│ │ │ │ │ │ \u001b[38;5;141mRiverdale\u001b[0m │ \u001b[38;5;141mGardens \u001b[0m │ │ │ │ │ │\n", - "│ │ │ │ │ │ \u001b[38;5;141m/Fieldsto\u001b[0m │ │ │ │ │ │ │\n", - "│ │ │ │ │ │ \u001b[38;5;141mn \u001b[0m │ │ │ │ │ │ │\n", - "│ └───────────┴────┴───────────┴──────────┴───────────┴──────────┴───────────┴──────────┴───────────┴──────────┘ │\n", - "╰────────────────────────────────────────────────────── End ──────────────────────────────────────────────────────╯\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], + "outputs": [], "source": [ "skim(taxis)" ] diff --git a/functions.ipynb b/functions.ipynb index 0f2cce3..5ed6b2a 100644 --- a/functions.ipynb +++ b/functions.ipynb @@ -48,18 +48,10 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "id": "0450ad6c", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Hello Ada, and welcome!\n" - ] - } - ], + "outputs": [], "source": [ "def welcome_message(name):\n", " return f\"Hello {name}, and welcome!\"\n", @@ -81,19 +73,10 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": null, "id": "dd49bee5", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "('Hello student', 4.8)\n", - "('Hello Ada', 4.8)\n" - ] - } - ], + "outputs": [], "source": [ "def score_message(score, name=\"student\"):\n", " \"\"\"This is a doc-string, a string describing a function.\n", @@ -138,28 +121,10 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": null, "id": "50dfff24", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Help on function score_message in module __main__:\n", - "\n", - "score_message(score, name='student')\n", - " This is a doc-string, a string describing a function.\n", - " Args:\n", - " score (float): Raw score\n", - " name (str): Name of student\n", - " Returns:\n", - " str: A hello message.\n", - " float: A normalised score.\n", - "\n" - ] - } - ], + "outputs": [], "source": [ "help(score_message)" ] @@ -194,47 +159,10 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": null, "id": "43d10e40", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "shape: (10, 4)
abcd
f64f64f64f64
0.4736410.3145550.780770.683499
1.00.1921670.8502790.186199
0.4507660.4572280.5157880.0
0.3617790.2195290.2433410.934585
0.028510.00.00.948462
0.4747280.1190681.00.556861
0.1679770.6302890.6544850.727404
0.279770.5057040.9460620.527484
0.4111420.8182760.1750241.0
0.00.1704820.3733090.45716
" - ], - "text/plain": [ - "shape: (10, 4)\n", - "┌──────────┬──────────┬──────────┬──────────┐\n", - "│ a ┆ b ┆ c ┆ d │\n", - "│ --- ┆ --- ┆ --- ┆ --- │\n", - "│ f64 ┆ f64 ┆ f64 ┆ f64 │\n", - "╞══════════╪══════════╪══════════╪══════════╡\n", - "│ 0.473641 ┆ 0.314555 ┆ 0.78077 ┆ 0.683499 │\n", - "│ 1.0 ┆ 0.192167 ┆ 0.850279 ┆ 0.186199 │\n", - "│ 0.450766 ┆ 0.457228 ┆ 0.515788 ┆ 0.0 │\n", - "│ 0.361779 ┆ 0.219529 ┆ 0.243341 ┆ 0.934585 │\n", - "│ 0.02851 ┆ 0.0 ┆ 0.0 ┆ 0.948462 │\n", - "│ 0.474728 ┆ 0.119068 ┆ 1.0 ┆ 0.556861 │\n", - "│ 0.167977 ┆ 0.630289 ┆ 0.654485 ┆ 0.727404 │\n", - "│ 0.27977 ┆ 0.505704 ┆ 0.946062 ┆ 0.527484 │\n", - "│ 0.411142 ┆ 0.818276 ┆ 0.175024 ┆ 1.0 │\n", - "│ 0.0 ┆ 0.170482 ┆ 0.373309 ┆ 0.45716 │\n", - "└──────────┴──────────┴──────────┴──────────┘" - ] - }, - "execution_count": 4, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "import numpy as np\n", "import polars as pl\n", @@ -313,7 +241,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": null, "id": "406648b6", "metadata": {}, "outputs": [], @@ -347,76 +275,20 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": null, "id": "74834d90", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "shape: (3,)
f64
0.0
0.5
1.0
" - ], - "text/plain": [ - "shape: (3,)\n", - "Series: '' [f64]\n", - "[\n", - "\t0.0\n", - "\t0.5\n", - "\t1.0\n", - "]" - ] - }, - "execution_count": 6, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "rescale(pl.Series([-10, 0, 10]))" ] }, { "cell_type": "code", - "execution_count": 7, + "execution_count": null, "id": "8a0e643e", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "shape: (5,)
f64
0.0
0.25
0.5
null
1.0
" - ], - "text/plain": [ - "shape: (5,)\n", - "Series: '' [f64]\n", - "[\n", - "\t0.0\n", - "\t0.25\n", - "\t0.5\n", - "\tnull\n", - "\t1.0\n", - "]" - ] - }, - "execution_count": 7, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "rescale(pl.Series([1, 2, 3, None, 5]))" ] @@ -434,7 +306,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": null, "id": "ce066e4f", "metadata": {}, "outputs": [], @@ -462,39 +334,10 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": null, "id": "f8b80894", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "shape: (5,)
f64
0.0
0.0
0.0
NaN
0.0
" - ], - "text/plain": [ - "shape: (5,)\n", - "Series: '' [f64]\n", - "[\n", - "\t0.0\n", - "\t0.0\n", - "\t0.0\n", - "\tNaN\n", - "\t0.0\n", - "]" - ] - }, - "execution_count": 9, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "rescale(pl.Series([1, 2, 3, float(\"inf\"), 5], strict=False))" ] @@ -509,39 +352,10 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": null, "id": "cd63ddea", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "shape: (5,)
f64
0.0
0.25
0.5
null
1.0
" - ], - "text/plain": [ - "shape: (5,)\n", - "Series: '' [f64]\n", - "[\n", - "\t0.0\n", - "\t0.25\n", - "\t0.5\n", - "\tnull\n", - "\t1.0\n", - "]" - ] - }, - "execution_count": 10, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "def rescale(x):\n", " x = x.cast(pl.Float64).replace(float(\"inf\"), None)\n", @@ -627,18 +441,10 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": null, "id": "647da443", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Hello World!\n" - ] - } - ], + "outputs": [], "source": [ "def var_func():\n", " str_variable = \"Hello World!\"\n", @@ -661,19 +467,10 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": null, "id": "c5fb049d", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "y is inside a function: I'm a global variable\n", - "y is outside a function: I'm a global variable\n" - ] - } - ], + "outputs": [], "source": [ "y = \"I'm a global variable\"\n", "\n", diff --git a/introduction.ipynb b/introduction.ipynb index 2a70fd3..04c2ff9 100644 --- a/introduction.ipynb +++ b/introduction.ipynb @@ -17,7 +17,7 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "id": "51a55374", "metadata": { "tags": [ @@ -33,116 +33,14 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": null, "id": "209ef434", "metadata": { "tags": [ "remove-input" ] }, - "outputs": [ - { - "data": { - "image/svg+xml": [ - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "cluster_0\n", - "\n", - "Understand\n", - "\n", - "\n", - "\n", - "Import\n", - "\n", - "Import\n", - "\n", - "\n", - "\n", - "Clean\n", - "\n", - "Clean\n", - "\n", - "\n", - "\n", - "Import->Clean\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "Analyse\n", - "\n", - "Analyse\n", - "\n", - "\n", - "\n", - "Clean->Analyse\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "Visualise\n", - "\n", - "Visualise\n", - "\n", - "\n", - "\n", - "Visualise->Analyse\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "Transform\n", - "\n", - "Transform\n", - "\n", - "\n", - "\n", - "Analyse->Transform\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "Communicate\n", - "\n", - "Communicate\n", - "\n", - "\n", - "\n", - "Analyse->Communicate\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "Transform->Visualise\n", - "\n", - "\n", - "\n", - "\n", - "\n" - ], - "text/plain": [ - "" - ] - }, - "execution_count": 2, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "# | echo: false\n", "import graphviz\n", @@ -240,22 +138,14 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": null, "id": "26faf349", "metadata": { "tags": [ "remove-input" ] }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Compiled with Python version: 3.12.13 (main, Mar 3 2026, 12:39:30) [Clang 17.0.0 (clang-1700.6.3.2)]\n" - ] - } - ], + "outputs": [], "source": [ "# | echo: false\n", "import sys\n", diff --git a/iteration.ipynb b/iteration.ipynb index fd727cd..b44f79a 100644 --- a/iteration.ipynb +++ b/iteration.ipynb @@ -26,7 +26,7 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "id": "51a55374", "metadata": { "tags": [ @@ -68,21 +68,10 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": null, "id": "a2bbd41c", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Lovelace\n", - "Smith\n", - "Pigou\n", - "Babbage\n" - ] - } - ], + "outputs": [], "source": [ "name_list = [\"Lovelace\", \"Smith\", \"Pigou\", \"Babbage\"]\n", "\n", @@ -117,19 +106,10 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": null, "id": "835ebda7", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "hello\n", - "---end entry---\n" - ] - } - ], + "outputs": [], "source": [ "for entry in [\"hello\"]:\n", " print(entry)\n", @@ -146,27 +126,10 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": null, "id": "2a19ac2e", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "h\n", - "---end entry---\n", - "e\n", - "---end entry---\n", - "l\n", - "---end entry---\n", - "l\n", - "---end entry---\n", - "o\n", - "---end entry---\n" - ] - } - ], + "outputs": [], "source": [ "for entry in \"hello\":\n", " print(entry)\n", @@ -187,21 +150,10 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": null, "id": "239e133f", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "The name in position 0 is Lovelace\n", - "The name in position 1 is Smith\n", - "The name in position 2 is Hopper\n", - "The name in position 3 is Babbage\n" - ] - } - ], + "outputs": [], "source": [ "name_list = [\"Lovelace\", \"Smith\", \"Hopper\", \"Babbage\"]\n", "\n", @@ -219,21 +171,10 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": null, "id": "b66c5c53", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "The name in position 1 is Lovelace\n", - "The name in position 2 is Smith\n", - "The name in position 3 is Hopper\n", - "The name in position 4 is Babbage\n" - ] - } - ], + "outputs": [], "source": [ "for i, name in enumerate(name_list, start=1):\n", " print(f\"The name in position {i} is {name}\")" @@ -249,21 +190,10 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": null, "id": "010239fe", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "In Paris, the temperature is 28 degrees C today.\n", - "In London, the temperature is 22 degrees C today.\n", - "In Seville, the temperature is 36 degrees C today.\n", - "In Wellesley, the temperature is 29 degrees C today.\n" - ] - } - ], + "outputs": [], "source": [ "cities_to_temps = {\"Paris\": 28, \"London\": 22, \"Seville\": 36, \"Wellesley\": 29}\n", "\n", @@ -287,21 +217,10 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": null, "id": "8ea3efc5", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Ada Lovelace\n", - "Adam Smith\n", - "Grace Hopper\n", - "Charles Babbage\n" - ] - } - ], + "outputs": [], "source": [ "first_names = [\"Ada\", \"Adam\", \"Grace\", \"Charles\"]\n", "last_names = [\"Lovelace\", \"Smith\", \"Hopper\", \"Babbage\"]\n", @@ -338,21 +257,10 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": null, "id": "7efed381", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "[51, 52, 53, 54, 55, 56, 57, 58, 59, 60]" - ] - }, - "execution_count": 35, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "num_list = range(50, 60)\n", "[1 + num for num in num_list]" @@ -378,18 +286,10 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": null, "id": "722fda21", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[3, 6, 9, 12, 15, 18, 21, 24, 27, 30, 33, 36, 39]\n" - ] - } - ], + "outputs": [], "source": [ "number_list = range(1, 40)\n", "divide_list = [x for x in number_list if x % 3 == 0]\n", @@ -408,18 +308,10 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": null, "id": "b6e80d6b", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "['Adam Smith', 'leonara smith']\n" - ] - } - ], + "outputs": [], "source": [ "names_list = [\"Joe Bloggs\", \"Adam Smith\", \"Sandra Noone\", \"leonara smith\"]\n", "smith_list = [x for x in names_list if \"smith\" in x.lower()]\n", @@ -438,18 +330,10 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": null, "id": "f348bfb6", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "['Not Smith!', 'Adam Smith', 'Not Smith!', 'leonara smith']\n" - ] - } - ], + "outputs": [], "source": [ "names_list = [\"Joe Bloggs\", \"Adam Smith\", \"Sandra Noone\", \"leonara smith\"]\n", "smith_list = [x if \"smith\" in x.lower() else \"Not Smith!\" for x in names_list]\n", @@ -466,18 +350,10 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": null, "id": "74e4fcc7", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "['Ada Lovelace', 'Adam Smith', 'Grace Hopper', 'Charles Babbage']\n" - ] - } - ], + "outputs": [], "source": [ "first_names = [\"Ada\", \"Adam\", \"Grace\", \"Charles\"]\n", "last_names = [\"Lovelace\", \"Smith\", \"Hopper\", \"Babbage\"]\n", @@ -495,18 +371,10 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": null, "id": "2c82cf1f", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[['Ada Lovelace', 'Adam Lovelace'], ['Ada Smith', 'Adam Smith']]\n" - ] - } - ], + "outputs": [], "source": [ "first_names = [\"Ada\", \"Adam\"]\n", "last_names = [\"Lovelace\", \"Smith\"]\n", @@ -526,21 +394,10 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": null, "id": "acef16ae", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "{'Ada': 'Lovelace', 'Adam': 'Smith'}" - ] - }, - "execution_count": 41, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "{key: value for key, value in zip(first_names, last_names)}" ] @@ -563,28 +420,10 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": null, "id": "3e47ba02", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "10\n", - "9\n", - "8\n", - "7\n", - "6\n", - "5\n", - "4\n", - "3\n", - "2\n", - "1\n", - "execution complete\n" - ] - } - ], + "outputs": [], "source": [ "n = 10\n", "while n > 0:\n", @@ -637,43 +476,10 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": null, "id": "b3116809", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "shape: (6, 4)
abcd
f64f64f64f64
-0.871189-0.681725-1.562824-0.560553
-1.2694980.148269-0.217629-0.221637
-0.555918-1.4479222.4705161.641996
0.35463-1.664464-0.0148610.030461
-0.8667982.2567060.968797-0.078444
0.5406780.8744340.286314-0.138539
" - ], - "text/plain": [ - "shape: (6, 4)\n", - "┌───────────┬───────────┬───────────┬───────────┐\n", - "│ a ┆ b ┆ c ┆ d │\n", - "│ --- ┆ --- ┆ --- ┆ --- │\n", - "│ f64 ┆ f64 ┆ f64 ┆ f64 │\n", - "╞═══════════╪═══════════╪═══════════╪═══════════╡\n", - "│ -0.871189 ┆ -0.681725 ┆ -1.562824 ┆ -0.560553 │\n", - "│ -1.269498 ┆ 0.148269 ┆ -0.217629 ┆ -0.221637 │\n", - "│ -0.555918 ┆ -1.447922 ┆ 2.470516 ┆ 1.641996 │\n", - "│ 0.35463 ┆ -1.664464 ┆ -0.014861 ┆ 0.030461 │\n", - "│ -0.866798 ┆ 2.256706 ┆ 0.968797 ┆ -0.078444 │\n", - "│ 0.540678 ┆ 0.874434 ┆ 0.286314 ┆ -0.138539 │\n", - "└───────────┴───────────┴───────────┴───────────┘" - ] - }, - "execution_count": 43, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "import numpy as np\n", "import polars as pl\n", @@ -692,81 +498,20 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": null, "id": "ac909c2f", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "shape: (1, 4)
abcd
f64f64f64f64
-0.711358-0.2667280.135727-0.108492
" - ], - "text/plain": [ - "shape: (1, 4)\n", - "┌───────────┬───────────┬──────────┬───────────┐\n", - "│ a ┆ b ┆ c ┆ d │\n", - "│ --- ┆ --- ┆ --- ┆ --- │\n", - "│ f64 ┆ f64 ┆ f64 ┆ f64 │\n", - "╞═══════════╪═══════════╪══════════╪═══════════╡\n", - "│ -0.711358 ┆ -0.266728 ┆ 0.135727 ┆ -0.108492 │\n", - "└───────────┴───────────┴──────────┴───────────┘" - ] - }, - "execution_count": 44, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "df.select(pl.all().median())" ] }, { "cell_type": "code", - "execution_count": 19, + "execution_count": null, "id": "96426002", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "shape: (6, 1)
row_median
f64
-0.776457
-0.219633
0.543039
0.0078
0.445177
0.413496
" - ], - "text/plain": [ - "shape: (6, 1)\n", - "┌────────────┐\n", - "│ row_median │\n", - "│ --- │\n", - "│ f64 │\n", - "╞════════════╡\n", - "│ -0.776457 │\n", - "│ -0.219633 │\n", - "│ 0.543039 │\n", - "│ 0.0078 │\n", - "│ 0.445177 │\n", - "│ 0.413496 │\n", - "└────────────┘" - ] - }, - "execution_count": 45, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "df.select(pl.concat_list(pl.all()).list.median().alias(\"row_median\"))" ] @@ -781,18 +526,10 @@ }, { "cell_type": "code", - "execution_count": 20, + "execution_count": null, "id": "060b6815", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "294 μs ± 4.83 μs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)\n" - ] - } - ], + "outputs": [], "source": [ "# Do not do this!\n", "\n", @@ -816,18 +553,10 @@ }, { "cell_type": "code", - "execution_count": 21, + "execution_count": null, "id": "1a48ae52", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "56.5 μs ± 929 ns per loop (mean ± std. dev. of 7 runs, 10,000 loops each)\n" - ] - } - ], + "outputs": [], "source": [ "%timeit df + 5" ] @@ -852,7 +581,7 @@ }, { "cell_type": "code", - "execution_count": 22, + "execution_count": null, "id": "f7391dc5", "metadata": {}, "outputs": [], @@ -874,43 +603,10 @@ }, { "cell_type": "code", - "execution_count": 23, + "execution_count": null, "id": "31adcb3f", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "shape: (6, 6)
abcdnew_aresult
f64f64f64f64f64f64
40554.12881140554.31827540553.43717640554.43944740559.128811-4.545293
40553.73050240555.14826940554.78237140554.77836340558.730502-5.458877
40554.44408240553.55207840557.47051640556.64199640559.444082-9.030254
40555.3546340553.33553640554.98513940555.03046140560.35463-5.850543
40554.13320240557.25670640555.96879740554.92155640559.133202-4.134134
40555.54067840555.87443440555.28631440554.86146140560.540678-3.426466
" - ], - "text/plain": [ - "shape: (6, 6)\n", - "┌──────────────┬──────────────┬──────────────┬──────────────┬──────────────┬───────────┐\n", - "│ a ┆ b ┆ c ┆ d ┆ new_a ┆ result │\n", - "│ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │\n", - "│ f64 ┆ f64 ┆ f64 ┆ f64 ┆ f64 ┆ f64 │\n", - "╞══════════════╪══════════════╪══════════════╪══════════════╪══════════════╪═══════════╡\n", - "│ 40554.128811 ┆ 40554.318275 ┆ 40553.437176 ┆ 40554.439447 ┆ 40559.128811 ┆ -4.545293 │\n", - "│ 40553.730502 ┆ 40555.148269 ┆ 40554.782371 ┆ 40554.778363 ┆ 40558.730502 ┆ -5.458877 │\n", - "│ 40554.444082 ┆ 40553.552078 ┆ 40557.470516 ┆ 40556.641996 ┆ 40559.444082 ┆ -9.030254 │\n", - "│ 40555.35463 ┆ 40553.335536 ┆ 40554.985139 ┆ 40555.030461 ┆ 40560.35463 ┆ -5.850543 │\n", - "│ 40554.133202 ┆ 40557.256706 ┆ 40555.968797 ┆ 40554.921556 ┆ 40559.133202 ┆ -4.134134 │\n", - "│ 40555.540678 ┆ 40555.874434 ┆ 40555.286314 ┆ 40554.861461 ┆ 40560.540678 ┆ -3.426466 │\n", - "└──────────────┴──────────────┴──────────────┴──────────────┴──────────────┴───────────┘" - ] - }, - "execution_count": 49, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "# Don't do this (slow, row-wise)\n", "mean_new_a = df.select(pl.col(\"new_a\").mean()).item()\n", @@ -942,43 +638,10 @@ }, { "cell_type": "code", - "execution_count": 24, + "execution_count": null, "id": "8d9defd3", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "shape: (6, 6)
abcdnew_aratio
f64f64f64f64f64f64
40554.12881140554.31827540553.43717640554.43944740559.1288110.999877
40553.73050240555.14826940554.78237140554.77836340558.7305020.999877
40554.44408240553.55207840557.47051640556.64199640559.4440820.999877
40555.3546340553.33553640554.98513940555.03046140560.354630.999877
40554.13320240557.25670640555.96879740554.92155640559.1332020.999877
40555.54067840555.87443440555.28631440554.86146140560.5406780.999877
" - ], - "text/plain": [ - "shape: (6, 6)\n", - "┌──────────────┬──────────────┬──────────────┬──────────────┬──────────────┬──────────┐\n", - "│ a ┆ b ┆ c ┆ d ┆ new_a ┆ ratio │\n", - "│ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │\n", - "│ f64 ┆ f64 ┆ f64 ┆ f64 ┆ f64 ┆ f64 │\n", - "╞══════════════╪══════════════╪══════════════╪══════════════╪══════════════╪══════════╡\n", - "│ 40554.128811 ┆ 40554.318275 ┆ 40553.437176 ┆ 40554.439447 ┆ 40559.128811 ┆ 0.999877 │\n", - "│ 40553.730502 ┆ 40555.148269 ┆ 40554.782371 ┆ 40554.778363 ┆ 40558.730502 ┆ 0.999877 │\n", - "│ 40554.444082 ┆ 40553.552078 ┆ 40557.470516 ┆ 40556.641996 ┆ 40559.444082 ┆ 0.999877 │\n", - "│ 40555.35463 ┆ 40553.335536 ┆ 40554.985139 ┆ 40555.030461 ┆ 40560.35463 ┆ 0.999877 │\n", - "│ 40554.133202 ┆ 40557.256706 ┆ 40555.968797 ┆ 40554.921556 ┆ 40559.133202 ┆ 0.999877 │\n", - "│ 40555.540678 ┆ 40555.874434 ┆ 40555.286314 ┆ 40554.861461 ┆ 40560.540678 ┆ 0.999877 │\n", - "└──────────────┴──────────────┴──────────────┴──────────────┴──────────────┴──────────┘" - ] - }, - "execution_count": 50, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "df = df.with_columns((pl.col(\"a\") / pl.col(\"new_a\")).alias(\"ratio\"))\n", "df" @@ -994,43 +657,10 @@ }, { "cell_type": "code", - "execution_count": 25, + "execution_count": null, "id": "f301c8cb", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "shape: (6, 7)
abcdnew_aratioa_gt_0.5
f64f64f64f64f64f64bool
40554.12881140554.31827540553.43717640554.43944740559.1288110.999877true
40553.73050240555.14826940554.78237140554.77836340558.7305020.999877true
40554.44408240553.55207840557.47051640556.64199640559.4440820.999877true
40555.3546340553.33553640554.98513940555.03046140560.354630.999877true
40554.13320240557.25670640555.96879740554.92155640559.1332020.999877true
40555.54067840555.87443440555.28631440554.86146140560.5406780.999877true
" - ], - "text/plain": [ - "shape: (6, 7)\n", - "┌──────────────┬──────────────┬──────────────┬──────────────┬──────────────┬──────────┬──────────┐\n", - "│ a ┆ b ┆ c ┆ d ┆ new_a ┆ ratio ┆ a_gt_0.5 │\n", - "│ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │\n", - "│ f64 ┆ f64 ┆ f64 ┆ f64 ┆ f64 ┆ f64 ┆ bool │\n", - "╞══════════════╪══════════════╪══════════════╪══════════════╪══════════════╪══════════╪══════════╡\n", - "│ 40554.128811 ┆ 40554.318275 ┆ 40553.437176 ┆ 40554.439447 ┆ 40559.128811 ┆ 0.999877 ┆ true │\n", - "│ 40553.730502 ┆ 40555.148269 ┆ 40554.782371 ┆ 40554.778363 ┆ 40558.730502 ┆ 0.999877 ┆ true │\n", - "│ 40554.444082 ┆ 40553.552078 ┆ 40557.470516 ┆ 40556.641996 ┆ 40559.444082 ┆ 0.999877 ┆ true │\n", - "│ 40555.35463 ┆ 40553.335536 ┆ 40554.985139 ┆ 40555.030461 ┆ 40560.35463 ┆ 0.999877 ┆ true │\n", - "│ 40554.133202 ┆ 40557.256706 ┆ 40555.968797 ┆ 40554.921556 ┆ 40559.133202 ┆ 0.999877 ┆ true │\n", - "│ 40555.540678 ┆ 40555.874434 ┆ 40555.286314 ┆ 40554.861461 ┆ 40560.540678 ┆ 0.999877 ┆ true │\n", - "└──────────────┴──────────────┴──────────────┴──────────────┴──────────────┴──────────┴──────────┘" - ] - }, - "execution_count": 51, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "df = df.with_columns((pl.col(\"a\") > 0.5).alias(\"a_gt_0.5\"))\n", "df" diff --git a/joins.ipynb b/joins.ipynb index a1b57a3..d494d84 100644 --- a/joins.ipynb +++ b/joins.ipynb @@ -16,7 +16,7 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "id": "51a55374", "metadata": { "tags": [ @@ -64,98 +64,10 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": null, "id": "f5ef4f37", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - " county pop\n", - "0 Los Angeles 9878554\n", - "1 Orange 2997033\n", - "2 Ventura 798364\n" - ] - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
countypop
ca0Los Angeles9878554
1Orange2997033
2Ventura798364
il0Cook5285107
1DeKalb103729
2Will673586
\n", - "
" - ], - "text/plain": [ - " county pop\n", - "ca 0 Los Angeles 9878554\n", - " 1 Orange 2997033\n", - " 2 Ventura 798364\n", - "il 0 Cook 5285107\n", - " 1 DeKalb 103729\n", - " 2 Will 673586" - ] - }, - "execution_count": 2, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "import pandas as pd\n", "\n", @@ -218,93 +130,10 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": null, "id": "53c66d5d", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
key1key2ABCD
0K0K0A0B0C0D0
1K1K0A2B2C1D1
2K1K0A2B2C2D2
3K2K0NaNNaNC3D3
\n", - "
" - ], - "text/plain": [ - " key1 key2 A B C D\n", - "0 K0 K0 A0 B0 C0 D0\n", - "1 K1 K0 A2 B2 C1 D1\n", - "2 K1 K0 A2 B2 C2 D2\n", - "3 K2 K0 NaN NaN C3 D3" - ] - }, - "execution_count": 3, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "left = pd.DataFrame(\n", " {\n", @@ -338,83 +167,10 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": null, "id": "5e73608f", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
key1key2ABCD
0K0K0A0B0C0D0
1K1K0A2B2C1D1
2K1K0A2B2C2D2
\n", - "
" - ], - "text/plain": [ - " key1 key2 A B C D\n", - "0 K0 K0 A0 B0 C0 D0\n", - "1 K1 K0 A2 B2 C1 D1\n", - "2 K1 K0 A2 B2 C2 D2" - ] - }, - "execution_count": 4, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "pd.merge(left, right, on=[\"key1\", \"key2\"], how=\"inner\")" ] @@ -431,120 +187,10 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": null, "id": "5d209fb9", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
key1key2ABCD_merge
0K0K0A0B0C0D0both
1K0K1A1B1NaNNaNleft_only
2K1K0A2B2C1D1both
3K1K0A2B2C2D2both
4K2K0NaNNaNC3D3right_only
5K2K1A3B3NaNNaNleft_only
\n", - "
" - ], - "text/plain": [ - " key1 key2 A B C D _merge\n", - "0 K0 K0 A0 B0 C0 D0 both\n", - "1 K0 K1 A1 B1 NaN NaN left_only\n", - "2 K1 K0 A2 B2 C1 D1 both\n", - "3 K1 K0 A2 B2 C2 D2 both\n", - "4 K2 K0 NaN NaN C3 D3 right_only\n", - "5 K2 K1 A3 B3 NaN NaN left_only" - ] - }, - "execution_count": 5, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "pd.merge(left, right, on=[\"key1\", \"key2\"], how=\"outer\", indicator=True)" ] diff --git a/missing-values.ipynb b/missing-values.ipynb index 08aed7c..67e95ef 100644 --- a/missing-values.ipynb +++ b/missing-values.ipynb @@ -15,7 +15,7 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "id": "51a55374", "metadata": { "tags": [ @@ -61,68 +61,10 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": null, "id": "535ef959", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
numbers
05.0
127.3
2NaN
3-16.0
\n", - "
" - ], - "text/plain": [ - " numbers\n", - "0 5.0\n", - "1 27.3\n", - "2 NaN\n", - "3 -16.0" - ] - }, - "execution_count": 2, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "import numpy as np\n", "import pandas as pd\n", @@ -141,73 +83,10 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": null, "id": "bf317bce", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
numbers
0<NA>
127.3
2NaN
3-16
4None
\n", - "
" - ], - "text/plain": [ - " numbers\n", - "0 \n", - "1 27.3\n", - "2 NaN\n", - "3 -16\n", - "4 None" - ] - }, - "execution_count": 3, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "numbers = pd.DataFrame([pd.NA, 27.3, np.nan, -16, None], columns=[\"numbers\"])\n", "numbers" @@ -223,78 +102,10 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": null, "id": "b1c864d9", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
fruit
0orange
1NaN
2apple
3None
4banana
5<NA>
\n", - "
" - ], - "text/plain": [ - " fruit\n", - "0 orange\n", - "1 NaN\n", - "2 apple\n", - "3 None\n", - "4 banana\n", - "5 " - ] - }, - "execution_count": 4, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "fruits = pd.DataFrame(\n", " [\"orange\", np.nan, \"apple\", None, \"banana\", pd.NA], columns=[\"fruit\"]\n", @@ -312,78 +123,10 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": null, "id": "cbcfe630", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
fruit
0False
1True
2False
3True
4False
5True
\n", - "
" - ], - "text/plain": [ - " fruit\n", - "0 False\n", - "1 True\n", - "2 False\n", - "3 True\n", - "4 False\n", - "5 True" - ] - }, - "execution_count": 5, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "fruits.isna()" ] @@ -408,83 +151,10 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": null, "id": "c96f89e6", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
ABCD
0NaN2.0NaN0
13.04.0NaN1
25.0NaNNaN<NA>
3NaN3.0NaN4
\n", - "
" - ], - "text/plain": [ - " A B C D\n", - "0 NaN 2.0 NaN 0\n", - "1 3.0 4.0 NaN 1\n", - "2 5.0 NaN NaN \n", - "3 NaN 3.0 NaN 4" - ] - }, - "execution_count": 6, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "nan_df = pd.DataFrame(\n", " [\n", @@ -509,91 +179,10 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": null, "id": "a6bc5fe2", "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/var/folders/j1/0d3dmg7d6gnbc_zwrh8t9gcw0000gn/T/ipykernel_61211/4054961691.py:1: FutureWarning: Downcasting object dtype arrays on .fillna, .ffill, .bfill is deprecated and will change in a future version. Call result.infer_objects(copy=False) instead. To opt-in to the future behavior, set `pd.set_option('future.no_silent_downcasting', True)`\n", - " nan_df.fillna(0)\n" - ] - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
ABCD
00.02.00.00
13.04.00.01
25.00.00.00
30.03.00.04
\n", - "
" - ], - "text/plain": [ - " A B C D\n", - "0 0.0 2.0 0.0 0\n", - "1 3.0 4.0 0.0 1\n", - "2 5.0 0.0 0.0 0\n", - "3 0.0 3.0 0.0 4" - ] - }, - "execution_count": 7, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "nan_df.fillna(0)" ] @@ -608,91 +197,10 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": null, "id": "e65b67c2", "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/var/folders/j1/0d3dmg7d6gnbc_zwrh8t9gcw0000gn/T/ipykernel_61211/2397886090.py:1: FutureWarning: Downcasting object dtype arrays on .fillna, .ffill, .bfill is deprecated and will change in a future version. Call result.infer_objects(copy=False) instead. To opt-in to the future behavior, set `pd.set_option('future.no_silent_downcasting', True)`\n", - " nan_df.fillna(value={\"A\": 0, \"B\": 1, \"C\": 2, \"D\": 3})\n" - ] - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
ABCD
00.02.02.00
13.04.02.01
25.01.02.03
30.03.02.04
\n", - "
" - ], - "text/plain": [ - " A B C D\n", - "0 0.0 2.0 2.0 0\n", - "1 3.0 4.0 2.0 1\n", - "2 5.0 1.0 2.0 3\n", - "3 0.0 3.0 2.0 4" - ] - }, - "execution_count": 8, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "nan_df.fillna(value={\"A\": 0, \"B\": 1, \"C\": 2, \"D\": 3})" ] @@ -707,186 +215,20 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": null, "id": "2a19e196", "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/var/folders/j1/0d3dmg7d6gnbc_zwrh8t9gcw0000gn/T/ipykernel_61211/1353804149.py:1: FutureWarning: DataFrame.fillna with 'method' is deprecated and will raise in a future version. Use obj.ffill() or obj.bfill() instead.\n", - " nan_df.fillna(method=\"ffill\")\n", - "/var/folders/j1/0d3dmg7d6gnbc_zwrh8t9gcw0000gn/T/ipykernel_61211/1353804149.py:1: FutureWarning: Downcasting object dtype arrays on .fillna, .ffill, .bfill is deprecated and will change in a future version. Call result.infer_objects(copy=False) instead. To opt-in to the future behavior, set `pd.set_option('future.no_silent_downcasting', True)`\n", - " nan_df.fillna(method=\"ffill\")\n" - ] - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
ABCD
0NaN2.0NaN0
13.04.0NaN1
25.04.0NaN1
35.03.0NaN4
\n", - "
" - ], - "text/plain": [ - " A B C D\n", - "0 NaN 2.0 NaN 0\n", - "1 3.0 4.0 NaN 1\n", - "2 5.0 4.0 NaN 1\n", - "3 5.0 3.0 NaN 4" - ] - }, - "execution_count": 9, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "nan_df.fillna(method=\"ffill\")" ] }, { "cell_type": "code", - "execution_count": 10, + "execution_count": null, "id": "8b5b001e", "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/var/folders/j1/0d3dmg7d6gnbc_zwrh8t9gcw0000gn/T/ipykernel_61211/2505504399.py:1: FutureWarning: DataFrame.fillna with 'method' is deprecated and will raise in a future version. Use obj.ffill() or obj.bfill() instead.\n", - " nan_df.fillna(method=\"bfill\")\n", - "/var/folders/j1/0d3dmg7d6gnbc_zwrh8t9gcw0000gn/T/ipykernel_61211/2505504399.py:1: FutureWarning: Downcasting object dtype arrays on .fillna, .ffill, .bfill is deprecated and will change in a future version. Call result.infer_objects(copy=False) instead. To opt-in to the future behavior, set `pd.set_option('future.no_silent_downcasting', True)`\n", - " nan_df.fillna(method=\"bfill\")\n" - ] - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
ABCD
03.02.0NaN0
13.04.0NaN1
25.03.0NaN4
3NaN3.0NaN4
\n", - "
" - ], - "text/plain": [ - " A B C D\n", - "0 3.0 2.0 NaN 0\n", - "1 3.0 4.0 NaN 1\n", - "2 5.0 3.0 NaN 4\n", - "3 NaN 3.0 NaN 4" - ] - }, - "execution_count": 10, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "nan_df.fillna(method=\"bfill\")" ] @@ -909,91 +251,10 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": null, "id": "558e7a23", "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/var/folders/j1/0d3dmg7d6gnbc_zwrh8t9gcw0000gn/T/ipykernel_61211/1730877720.py:1: FutureWarning: Downcasting object dtype arrays on .fillna, .ffill, .bfill is deprecated and will change in a future version. Call result.infer_objects(copy=False) instead. To opt-in to the future behavior, set `pd.set_option('future.no_silent_downcasting', True)`\n", - " nan_df.fillna(value={\"A\": 0, \"B\": 1, \"C\": 2, \"D\": 3}, limit=1)\n" - ] - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
ABCD
00.02.02.00
13.04.0NaN1
25.01.0NaN3
3NaN3.0NaN4
\n", - "
" - ], - "text/plain": [ - " A B C D\n", - "0 0.0 2.0 2.0 0\n", - "1 3.0 4.0 NaN 1\n", - "2 5.0 1.0 NaN 3\n", - "3 NaN 3.0 NaN 4" - ] - }, - "execution_count": 11, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "nan_df.fillna(value={\"A\": 0, \"B\": 1, \"C\": 2, \"D\": 3}, limit=1)" ] @@ -1008,84 +269,20 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": null, "id": "0e3a81c1", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "1 3.0\n", - "2 5.0\n", - "Name: A, dtype: float64" - ] - }, - "execution_count": 12, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "nan_df[\"A\"].dropna(axis=0) # on a single column" ] }, { "cell_type": "code", - "execution_count": 13, + "execution_count": null, "id": "9c1e312e", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
0
1
2
3
\n", - "
" - ], - "text/plain": [ - "Empty DataFrame\n", - "Columns: []\n", - "Index: [0, 1, 2, 3]" - ] - }, - "execution_count": 13, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "nan_df.dropna(axis=1)" ] @@ -1100,83 +297,10 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": null, "id": "3296ea35", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
ABCD
0NaN2.0NaN0
13.04.0NaN1
25.0NaNNaN<NA>
3NaN3.0NaN4
\n", - "
" - ], - "text/plain": [ - " A B C D\n", - "0 NaN 2.0 NaN 0\n", - "1 3.0 4.0 NaN 1\n", - "2 5.0 NaN NaN \n", - "3 NaN 3.0 NaN 4" - ] - }, - "execution_count": 14, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "nan_df.dropna(how=\"all\")" ] @@ -1199,67 +323,10 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": null, "id": "d3e1af81", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
ABCD
13.04.0NaN1
25.0NaNNaN<NA>
\n", - "
" - ], - "text/plain": [ - " A B C D\n", - "1 3.0 4.0 NaN 1\n", - "2 5.0 NaN NaN " - ] - }, - "execution_count": 15, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "nan_df[nan_df[\"A\"].notna()]" ] @@ -1278,71 +345,10 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": null, "id": "45668d12", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
ABC
0345
1-74-99
2-9965
\n", - "
" - ], - "text/plain": [ - " A B C\n", - "0 3 4 5\n", - "1 -7 4 -99\n", - "2 -99 6 5" - ] - }, - "execution_count": 16, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "stata_df = pd.DataFrame([[3, 4, 5], [-7, 4, -99], [-99, 6, 5]], columns=list(\"ABC\"))\n", "\n", @@ -1359,71 +365,10 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": null, "id": "c43070f4", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
ABC
0345
1-74<NA>
2<NA>65
\n", - "
" - ], - "text/plain": [ - " A B C\n", - "0 3 4 5\n", - "1 -7 4 \n", - "2 6 5" - ] - }, - "execution_count": 17, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "stata_df.replace({-99: pd.NA})" ] @@ -1438,71 +383,10 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": null, "id": "12cc567a", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
ABC
0345
1<NA>4<NA>
2<NA>65
\n", - "
" - ], - "text/plain": [ - " A B C\n", - "0 3 4 5\n", - "1 4 \n", - "2 6 5" - ] - }, - "execution_count": 18, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "stata_df.replace({-99: pd.NA, -7: pd.NA})" ] @@ -1529,99 +413,10 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": null, "id": "057c5203", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
yearqtrprice
0202011.88
1202020.59
2202030.35
320204NaN
4202120.92
5202130.17
6202142.66
\n", - "
" - ], - "text/plain": [ - " year qtr price\n", - "0 2020 1 1.88\n", - "1 2020 2 0.59\n", - "2 2020 3 0.35\n", - "3 2020 4 NaN\n", - "4 2021 2 0.92\n", - "5 2021 3 0.17\n", - "6 2021 4 2.66" - ] - }, - "execution_count": 19, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "stocks = pd.DataFrame(\n", " {\n", @@ -1667,75 +462,10 @@ }, { "cell_type": "code", - "execution_count": 20, + "execution_count": null, "id": "87ab37d0", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
qtr1234
year
20201.880.590.35NaN
2021NaN0.920.172.66
\n", - "
" - ], - "text/plain": [ - "qtr 1 2 3 4\n", - "year \n", - "2020 1.88 0.59 0.35 NaN\n", - "2021 NaN 0.92 0.17 2.66" - ] - }, - "execution_count": 20, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "stocks.pivot(columns=\"qtr\", values=\"price\", index=\"year\")" ] @@ -1762,7 +492,7 @@ }, { "cell_type": "code", - "execution_count": 21, + "execution_count": null, "id": "92a914e3", "metadata": {}, "outputs": [], @@ -1787,78 +517,10 @@ }, { "cell_type": "code", - "execution_count": 22, + "execution_count": null, "id": "cc904247", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
namesmokerage
0Ikaiano34
1Olettano88
2Leriahpreviously75
3Dashayno47
\n", - "
" - ], - "text/plain": [ - " name smoker age\n", - "0 Ikaia no 34\n", - "1 Oletta no 88\n", - "2 Leriah previously 75\n", - "3 Dashay no 47" - ] - }, - "execution_count": 22, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "health_cut = health.iloc[:-1, :]\n", "health_cut" @@ -1874,25 +536,10 @@ }, { "cell_type": "code", - "execution_count": 23, + "execution_count": null, "id": "546bc395", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "smoker\n", - "no 3\n", - "previously 1\n", - "yes 0\n", - "Name: count, dtype: int64" - ] - }, - "execution_count": 23, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "health_cut[\"smoker\"].value_counts()" ] @@ -1907,33 +554,10 @@ }, { "cell_type": "code", - "execution_count": 24, + "execution_count": null, "id": "efb6c6ab", "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/var/folders/j1/0d3dmg7d6gnbc_zwrh8t9gcw0000gn/T/ipykernel_61211/3998383890.py:1: FutureWarning: The default of observed=False is deprecated and will be changed to True in a future version of pandas. Pass observed=False to retain current behavior or observed=True to adopt the future default and silence this warning.\n", - " health_cut.groupby(\"smoker\")[\"age\"].mean()\n" - ] - }, - { - "data": { - "text/plain": [ - "smoker\n", - "no 56.333333\n", - "previously 75.000000\n", - "yes NaN\n", - "Name: age, dtype: float64" - ] - }, - "execution_count": 24, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "health_cut.groupby(\"smoker\")[\"age\"].mean()" ] diff --git a/numbers.ipynb b/numbers.ipynb index 129d361..0b96fbb 100644 --- a/numbers.ipynb +++ b/numbers.ipynb @@ -20,7 +20,7 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "id": "51a55374", "metadata": { "tags": [ @@ -39,7 +39,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": null, "id": "6c89ca3d", "metadata": {}, "outputs": [], @@ -62,21 +62,10 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": null, "id": "18f1ee4c", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "np.int64(336776)" - ] - }, - "execution_count": 3, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "flights[\"dest\"].count()" ] @@ -91,33 +80,10 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": null, "id": "161a24ea", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "dest\n", - "ORD 17283\n", - "ATL 17215\n", - "LAX 16174\n", - "BOS 15508\n", - "MCO 14082\n", - " ... \n", - "MTJ 15\n", - "SBN 10\n", - "ANC 8\n", - "LEX 1\n", - "LGA 1\n", - "Name: count, Length: 105, dtype: int64" - ] - }, - "execution_count": 4, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "flights[\"dest\"].value_counts()" ] @@ -132,124 +98,10 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": null, "id": "8554277a", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
mean_delaycount_flights
dest
ORD13.57048417283
ATL12.50982417215
LAX9.40134416174
BOS8.73061315508
MCO11.27599814082
.........
MTJ17.64285715
SBN21.10000010
ANC12.8750008
LGANaN1
LEX-9.0000001
\n", - "

105 rows × 2 columns

\n", - "
" - ], - "text/plain": [ - " mean_delay count_flights\n", - "dest \n", - "ORD 13.570484 17283\n", - "ATL 12.509824 17215\n", - "LAX 9.401344 16174\n", - "BOS 8.730613 15508\n", - "MCO 11.275998 14082\n", - "... ... ...\n", - "MTJ 17.642857 15\n", - "SBN 21.100000 10\n", - "ANC 12.875000 8\n", - "LGA NaN 1\n", - "LEX -9.000000 1\n", - "\n", - "[105 rows x 2 columns]" - ] - }, - "execution_count": 5, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "(\n", " flights.groupby([\"dest\"])\n", @@ -271,111 +123,10 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": null, "id": "061decae", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
miles
tailnum
D942DN3418
N0EGMQ250866
N10156115966
N102UW25722
N103US24619
......
N997DL54669
N998AT15432
N998DL66052
N999DN54623
N9EAMQ167317
\n", - "

4043 rows × 1 columns

\n", - "
" - ], - "text/plain": [ - " miles\n", - "tailnum \n", - "D942DN 3418\n", - "N0EGMQ 250866\n", - "N10156 115966\n", - "N102UW 25722\n", - "N103US 24619\n", - "... ...\n", - "N997DL 54669\n", - "N998AT 15432\n", - "N998DL 66052\n", - "N999DN 54623\n", - "N9EAMQ 167317\n", - "\n", - "[4043 rows x 1 columns]" - ] - }, - "execution_count": 6, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "(flights.groupby(\"tailnum\").agg(miles=(\"distance\", \"sum\")))" ] @@ -390,111 +141,10 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": null, "id": "ecdb5630", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
n_cancelled
dest
ABQ0
ACK0
ALB20
ANC0
ATL317
......
TPA59
TUL16
TVC5
TYS52
XNA25
\n", - "

105 rows × 1 columns

\n", - "
" - ], - "text/plain": [ - " n_cancelled\n", - "dest \n", - "ABQ 0\n", - "ACK 0\n", - "ALB 20\n", - "ANC 0\n", - "ATL 317\n", - "... ...\n", - "TPA 59\n", - "TUL 16\n", - "TVC 5\n", - "TYS 52\n", - "XNA 25\n", - "\n", - "[105 rows x 1 columns]" - ] - }, - "execution_count": 7, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "(flights.groupby(\"dest\").agg(n_cancelled=(\"dep_time\", lambda x: x.isnull().sum())))" ] @@ -538,21 +188,10 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": null, "id": "a473cd56", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "np.int64(4983)" - ] - }, - "execution_count": 8, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "flights[\"distance\"].max()" ] @@ -567,67 +206,10 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": null, "id": "96285702", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
xy
013
152
27<NA>
\n", - "
" - ], - "text/plain": [ - " x y\n", - "0 1 3\n", - "1 5 2\n", - "2 7 " - ] - }, - "execution_count": 9, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "df = pd.DataFrame({\"x\": [1, 5, 7], \"y\": [3, 2, pd.NA]})\n", "df" @@ -643,24 +225,10 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": null, "id": "5bae5499", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "0 1\n", - "1 2\n", - "2 7\n", - "dtype: object" - ] - }, - "execution_count": 10, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "df.min(axis=1)" ] @@ -677,23 +245,10 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": null, "id": "3af6ce86", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]\n", - "divided by 3 gives\n", - "remainder:\n", - "[1, 2, 0, 1, 2, 0, 1, 2, 0, 1]\n", - "divisions:\n", - "[0, 0, 1, 1, 1, 2, 2, 2, 3, 3]\n" - ] - } - ], + "outputs": [], "source": [ "print([x for x in range(1, 11)])\n", "print(\"divided by 3 gives\")\n", @@ -713,348 +268,10 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": null, "id": "6be57e34", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
yearmonthdaydep_timesched_dep_timedep_delayarr_timesched_arr_timearr_delaycarrierflighttailnumorigindestair_timedistancehourminutetime_hour
0201311517.05152.0830.081911.0UA1545N14228EWRIAH227.014005152013-01-01T10:00:00Z
1201311533.05294.0850.083020.0UA1714N24211LGAIAH227.014165292013-01-01T10:00:00Z
2201311542.05402.0923.085033.0AA1141N619AAJFKMIA160.010895402013-01-01T10:00:00Z
3201311544.0545-1.01004.01022-18.0B6725N804JBJFKBQN183.015765452013-01-01T10:00:00Z
4201311554.0600-6.0812.0837-25.0DL461N668DNLGAATL116.0762602013-01-01T11:00:00Z
............................................................
3367712013930NaN1455NaNNaN1634NaN9E3393NaNJFKDCANaN21314552013-09-30T18:00:00Z
3367722013930NaN2200NaNNaN2312NaN9E3525NaNLGASYRNaN1982202013-10-01T02:00:00Z
3367732013930NaN1210NaNNaN1330NaNMQ3461N535MQLGABNANaN76412102013-09-30T16:00:00Z
3367742013930NaN1159NaNNaN1344NaNMQ3572N511MQLGACLENaN41911592013-09-30T15:00:00Z
3367752013930NaN840NaNNaN1020NaNMQ3531N839MQLGARDUNaN4318402013-09-30T12:00:00Z
\n", - "

336776 rows × 19 columns

\n", - "
" - ], - "text/plain": [ - " year month day dep_time sched_dep_time dep_delay arr_time \\\n", - "0 2013 1 1 517.0 515 2.0 830.0 \n", - "1 2013 1 1 533.0 529 4.0 850.0 \n", - "2 2013 1 1 542.0 540 2.0 923.0 \n", - "3 2013 1 1 544.0 545 -1.0 1004.0 \n", - "4 2013 1 1 554.0 600 -6.0 812.0 \n", - "... ... ... ... ... ... ... ... \n", - "336771 2013 9 30 NaN 1455 NaN NaN \n", - "336772 2013 9 30 NaN 2200 NaN NaN \n", - "336773 2013 9 30 NaN 1210 NaN NaN \n", - "336774 2013 9 30 NaN 1159 NaN NaN \n", - "336775 2013 9 30 NaN 840 NaN NaN \n", - "\n", - " sched_arr_time arr_delay carrier flight tailnum origin dest \\\n", - "0 819 11.0 UA 1545 N14228 EWR IAH \n", - "1 830 20.0 UA 1714 N24211 LGA IAH \n", - "2 850 33.0 AA 1141 N619AA JFK MIA \n", - "3 1022 -18.0 B6 725 N804JB JFK BQN \n", - "4 837 -25.0 DL 461 N668DN LGA ATL \n", - "... ... ... ... ... ... ... ... \n", - "336771 1634 NaN 9E 3393 NaN JFK DCA \n", - "336772 2312 NaN 9E 3525 NaN LGA SYR \n", - "336773 1330 NaN MQ 3461 N535MQ LGA BNA \n", - "336774 1344 NaN MQ 3572 N511MQ LGA CLE \n", - "336775 1020 NaN MQ 3531 N839MQ LGA RDU \n", - "\n", - " air_time distance hour minute time_hour \n", - "0 227.0 1400 5 15 2013-01-01T10:00:00Z \n", - "1 227.0 1416 5 29 2013-01-01T10:00:00Z \n", - "2 160.0 1089 5 40 2013-01-01T10:00:00Z \n", - "3 183.0 1576 5 45 2013-01-01T10:00:00Z \n", - "4 116.0 762 6 0 2013-01-01T11:00:00Z \n", - "... ... ... ... ... ... \n", - "336771 NaN 213 14 55 2013-09-30T18:00:00Z \n", - "336772 NaN 198 22 0 2013-10-01T02:00:00Z \n", - "336773 NaN 764 12 10 2013-09-30T16:00:00Z \n", - "336774 NaN 419 11 59 2013-09-30T15:00:00Z \n", - "336775 NaN 431 8 40 2013-09-30T12:00:00Z \n", - "\n", - "[336776 rows x 19 columns]" - ] - }, - "execution_count": 12, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "flights.assign(\n", " hour=lambda x: x[\"sched_dep_time\"] // 100,\n", @@ -1074,79 +291,10 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": null, "id": "a3ffc085", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
yearmoney
02001105.000000
12002110.250000
22003115.762500
32004121.550625
42005127.628156
\n", - "
" - ], - "text/plain": [ - " year money\n", - "0 2001 105.000000\n", - "1 2002 110.250000\n", - "2 2003 115.762500\n", - "3 2004 121.550625\n", - "4 2005 127.628156" - ] - }, - "execution_count": 13, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "import numpy as np\n", "\n", @@ -1168,821 +316,10 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": null, "id": "aaad2abb", "metadata": {}, - "outputs": [ - { - "data": { - "image/svg+xml": [ - "\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " 2026-04-28T13:35:12.580055\n", - " image/svg+xml\n", - " \n", - " \n", - " Matplotlib v3.10.0, https://matplotlib.org/\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "\n" - ], - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], + "outputs": [], "source": [ "money.plot(x=\"year\", y=\"money\");" ] @@ -1997,767 +334,10 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": null, "id": "bbcf4d7c", "metadata": {}, - "outputs": [ - { - "data": { - "image/svg+xml": [ - "\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " 2026-04-28T13:35:12.673392\n", - " image/svg+xml\n", - " \n", - " \n", - " Matplotlib v3.10.0, https://matplotlib.org/\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "\n" - ], - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], + "outputs": [], "source": [ "money.plot(x=\"year\", y=\"money\", logy=True);" ] @@ -2786,79 +366,10 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": null, "id": "ec1a09ca", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
yearmoney
02001105.00
12002110.25
22003115.76
32004121.55
42005127.63
\n", - "
" - ], - "text/plain": [ - " year money\n", - "0 2001 105.00\n", - "1 2002 110.25\n", - "2 2003 115.76\n", - "3 2004 121.55\n", - "4 2005 127.63" - ] - }, - "execution_count": 16, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "money.head().round(2)" ] @@ -2873,79 +384,10 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": null, "id": "9a306519", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
yearmoney
02001105.0
12002110.2
22003115.8
32004121.6
42005127.6
\n", - "
" - ], - "text/plain": [ - " year money\n", - "0 2001 105.0\n", - "1 2002 110.2\n", - "2 2003 115.8\n", - "3 2004 121.6\n", - "4 2005 127.6" - ] - }, - "execution_count": 17, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "money.head().round({\"year\": 0, \"money\": 1})" ] @@ -2960,79 +402,10 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": null, "id": "fb9dfb59", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
yearmoney
452046900.0
4620471000.0
4720481000.0
4820491100.0
4920501100.0
\n", - "
" - ], - "text/plain": [ - " year money\n", - "45 2046 900.0\n", - "46 2047 1000.0\n", - "47 2048 1000.0\n", - "48 2049 1100.0\n", - "49 2050 1100.0" - ] - }, - "execution_count": 18, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "money.tail().round({\"year\": 0, \"money\": -2})" ] @@ -3047,26 +420,10 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": null, "id": "f0290c0a", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "0 100.0\n", - "1 110.0\n", - "2 120.0\n", - "3 120.0\n", - "4 130.0\n", - "Name: money, dtype: float64" - ] - }, - "execution_count": 19, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "money[\"money\"].head().apply(lambda x: float(f'{float(f\"{x:.2g}\"):g}'))" ] @@ -3081,21 +438,10 @@ }, { "cell_type": "code", - "execution_count": 20, + "execution_count": null, "id": "6a6a4644", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "array([2., 2., 1.])" - ] - }, - "execution_count": 20, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "np.round([1.5, 2.5, 1.4])" ] @@ -3110,22 +456,10 @@ }, { "cell_type": "code", - "execution_count": 21, + "execution_count": null, "id": "6270c3cc", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "array([27.15101204, 15.41426421, 76.7650241 , 70.51407739, 88.22482077,\n", - " 51.56875497, 63.03191713, 86.16710762, 72.35185576, 87.16963342])" - ] - }, - "execution_count": 21, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "real_nums = 100 * np.random.random(size=10)\n", "real_nums" @@ -3133,42 +467,20 @@ }, { "cell_type": "code", - "execution_count": 22, + "execution_count": null, "id": "dc3608d6", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "array([28., 16., 77., 71., 89., 52., 64., 87., 73., 88.])" - ] - }, - "execution_count": 22, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "np.ceil(real_nums)" ] }, { "cell_type": "code", - "execution_count": 23, + "execution_count": null, "id": "d577bb21", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "array([27., 15., 76., 70., 88., 51., 63., 86., 72., 87.])" - ] - }, - "execution_count": 23, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "np.floor(real_nums)" ] @@ -3183,26 +495,10 @@ }, { "cell_type": "code", - "execution_count": 24, + "execution_count": null, "id": "dca417b7", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "0 105.0\n", - "1 111.0\n", - "2 116.0\n", - "3 122.0\n", - "4 128.0\n", - "Name: money, dtype: float64" - ] - }, - "execution_count": 24, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "money[\"money\"].head().apply(np.ceil)" ] @@ -3219,26 +515,10 @@ }, { "cell_type": "code", - "execution_count": 25, + "execution_count": null, "id": "c5d77818", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "45 943.425818\n", - "46 1934.022928\n", - "47 2974.149892\n", - "48 4066.283205\n", - "49 5213.023184\n", - "Name: money, dtype: float64" - ] - }, - "execution_count": 25, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "money[\"money\"].tail().cumsum()" ] @@ -3267,134 +547,20 @@ }, { "cell_type": "code", - "execution_count": 26, + "execution_count": null, "id": "db1272a3", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
xy
013
152
27<NA>
\n", - "
" - ], - "text/plain": [ - " x y\n", - "0 1 3\n", - "1 5 2\n", - "2 7 " - ] - }, - "execution_count": 26, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "df" ] }, { "cell_type": "code", - "execution_count": 27, + "execution_count": null, "id": "11ee0bac", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
xy
01.02.0
12.01.0
23.0NaN
\n", - "
" - ], - "text/plain": [ - " x y\n", - "0 1.0 2.0\n", - "1 2.0 1.0\n", - "2 3.0 NaN" - ] - }, - "execution_count": 27, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "df.rank()" ] @@ -3409,67 +575,10 @@ }, { "cell_type": "code", - "execution_count": 28, + "execution_count": null, "id": "edc7bd81", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
xy
00.3333331.0
10.6666670.5
21.000000NaN
\n", - "
" - ], - "text/plain": [ - " x y\n", - "0 0.333333 1.0\n", - "1 0.666667 0.5\n", - "2 1.000000 NaN" - ] - }, - "execution_count": 28, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "df.rank(pct=True)" ] @@ -3488,1078 +597,10 @@ }, { "cell_type": "code", - "execution_count": 29, + "execution_count": null, "id": "9e499d1b", "metadata": {}, - "outputs": [ - { - "data": { - "image/svg+xml": [ - "\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " 2026-04-28T13:35:12.825403\n", - " image/svg+xml\n", - " \n", - " \n", - " Matplotlib v3.10.0, https://matplotlib.org/\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "\n" - ], - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], + "outputs": [], "source": [ "money[\"money_lag_5\"] = money[\"money\"].shift(5)\n", "money[\"money_lead_10\"] = money[\"money\"].shift(-10)\n", @@ -4602,21 +643,10 @@ }, { "cell_type": "code", - "execution_count": 30, + "execution_count": null, "id": "81389031", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "np.float64(190.92197566022773)" - ] - }, - "execution_count": 30, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "money[\"money\"].quantile(0.25)" ] @@ -4631,25 +661,10 @@ }, { "cell_type": "code", - "execution_count": 31, + "execution_count": null, "id": "9eb35866", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "0.00 105.000000\n", - "0.25 190.921976\n", - "0.50 347.101381\n", - "0.75 630.945970\n", - "Name: money, dtype: float64" - ] - }, - "execution_count": 31, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "money[\"money\"].quantile([0, 0.25, 0.5, 0.75])" ] @@ -4669,71 +684,10 @@ }, { "cell_type": "code", - "execution_count": 32, + "execution_count": null, "id": "e2efca3a", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
distance_sdcount
origindest
EWREGE1.0110
JFKEGE1.0103
\n", - "
" - ], - "text/plain": [ - " distance_sd count\n", - "origin dest \n", - "EWR EGE 1.0 110\n", - "JFK EGE 1.0 103" - ] - }, - "execution_count": 32, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "(\n", " flights.groupby([\"origin\", \"dest\"])\n", @@ -4759,3173 +713,20 @@ }, { "cell_type": "code", - "execution_count": 33, + "execution_count": null, "id": "b5ac4861", "metadata": {}, - "outputs": [ - { - "data": { - "image/svg+xml": [ - "\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " 2026-04-28T13:35:13.026671\n", - " image/svg+xml\n", - " \n", - " \n", - " Matplotlib v3.10.0, https://matplotlib.org/\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "\n" - ], - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], + "outputs": [], "source": [ "flights[\"dep_delay\"].plot.hist(bins=50, title=\" Distribution: length of delay\");" ] }, { "cell_type": "code", - "execution_count": 34, + "execution_count": null, "id": "d4e76051", "metadata": {}, - "outputs": [ - { - "data": { - "image/svg+xml": [ - "\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " 2026-04-28T13:35:13.184191\n", - " image/svg+xml\n", - " \n", - " \n", - " Matplotlib v3.10.0, https://matplotlib.org/\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "\n" - ], - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], + "outputs": [], "source": [ "flights.query(\"dep_delay <= 120\")[\"dep_delay\"].plot.hist(\n", " bins=50, title=\" Distribution: length of delay\"\n", diff --git a/rectangling.ipynb b/rectangling.ipynb index 4c0ff4f..2ddf375 100644 --- a/rectangling.ipynb +++ b/rectangling.ipynb @@ -16,7 +16,7 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "id": "51a55374", "metadata": { "tags": [ @@ -56,18 +56,10 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": null, "id": "4d1c3fa4", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[10, 1.23, 'like this', True, None]\n" - ] - } - ], + "outputs": [], "source": [ "list_example = [10, 1.23, \"like this\", True, None]\n", "print(list_example)" @@ -83,18 +75,10 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": null, "id": "96c4b86b", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[10, 1.23, 'like this', True, None, 'one more entry']\n" - ] - } - ], + "outputs": [], "source": [ "list_example.append(\"one more entry\")\n", "print(list_example)" @@ -110,19 +94,10 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": null, "id": "138ac0e0", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "10\n", - "one more entry\n" - ] - } - ], + "outputs": [], "source": [ "print(list_example[0])\n", "print(list_example[-1])" @@ -148,19 +123,10 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": null, "id": "ef390263", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[10, 1.23, 'like this']\n", - "[True, None, 'one more entry']\n" - ] - } - ], + "outputs": [], "source": [ "print(list_example[:3])\n", "print(list_example[-3:])" @@ -176,18 +142,10 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": null, "id": "13584953", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[2, 4, 6, 8]\n" - ] - } - ], + "outputs": [], "source": [ "list_of_numbers = list(range(1, 11))\n", "start = 1\n", @@ -206,18 +164,10 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": null, "id": "f2bc8926", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[10, 9, 8, 7, 6, 5, 4, 3, 2, 1]\n" - ] - } - ], + "outputs": [], "source": [ "print(list_of_numbers[::-1])" ] @@ -242,28 +192,10 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": null, "id": "703fb0b2", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "[3.1415,\n", - " 16,\n", - " ['five', 4, 3],\n", - " (91, 93, 90),\n", - " 'Hello World!',\n", - " True,\n", - " None,\n", - " {'key': 'value', 'key2': 'value2'}]" - ] - }, - "execution_count": 8, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "wacky_list = [\n", " 3.1415,\n", @@ -290,21 +222,10 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": null, "id": "28fe13d2", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "[[1, 2, 3, 4, 5], [6, 7, 8, 9, 10], [11, 12, 13, 14, 15]]" - ] - }, - "execution_count": 9, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "multilayer_list = [[1, 2, 3, 4, 5], [6, 7, 8, 9, 10], [11, 12, 13, 14, 15]]\n", "multilayer_list" @@ -320,21 +241,10 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": null, "id": "73a71803", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]" - ] - }, - "execution_count": 10, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "[x for little_list in multilayer_list for x in little_list]" ] @@ -359,7 +269,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": null, "id": "007dadfb", "metadata": {}, "outputs": [], @@ -377,7 +287,7 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": null, "id": "aeb70f97", "metadata": {}, "outputs": [], @@ -397,41 +307,10 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": null, "id": "fa1d828c", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "shape: (4, 3)
alphabetagamma
stri64list[str]
"["0,1,2"]"1["a", "b", "c"]
"foo"1null
"[]"1[]
"["3,4"]"1["d", "e"]
" - ], - "text/plain": [ - "shape: (4, 3)\n", - "┌───────────┬──────┬─────────────────┐\n", - "│ alpha ┆ beta ┆ gamma │\n", - "│ --- ┆ --- ┆ --- │\n", - "│ str ┆ i64 ┆ list[str] │\n", - "╞═══════════╪══════╪═════════════════╡\n", - "│ [\"0,1,2\"] ┆ 1 ┆ [\"a\", \"b\", \"c\"] │\n", - "│ foo ┆ 1 ┆ null │\n", - "│ [] ┆ 1 ┆ [] │\n", - "│ [\"3,4\"] ┆ 1 ┆ [\"d\", \"e\"] │\n", - "└───────────┴──────┴─────────────────┘" - ] - }, - "execution_count": 13, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "df = pl.DataFrame(\n", " {\n", @@ -454,44 +333,10 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": null, "id": "0854bcf3", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "shape: (7, 3)
alphabetagamma
stri64str
"["0,1,2"]"1"a"
"["0,1,2"]"1"b"
"["0,1,2"]"1"c"
"foo"1null
"[]"1null
"["3,4"]"1"d"
"["3,4"]"1"e"
" - ], - "text/plain": [ - "shape: (7, 3)\n", - "┌───────────┬──────┬───────┐\n", - "│ alpha ┆ beta ┆ gamma │\n", - "│ --- ┆ --- ┆ --- │\n", - "│ str ┆ i64 ┆ str │\n", - "╞═══════════╪══════╪═══════╡\n", - "│ [\"0,1,2\"] ┆ 1 ┆ a │\n", - "│ [\"0,1,2\"] ┆ 1 ┆ b │\n", - "│ [\"0,1,2\"] ┆ 1 ┆ c │\n", - "│ foo ┆ 1 ┆ null │\n", - "│ [] ┆ 1 ┆ null │\n", - "│ [\"3,4\"] ┆ 1 ┆ d │\n", - "│ [\"3,4\"] ┆ 1 ┆ e │\n", - "└───────────┴──────┴───────┘" - ] - }, - "execution_count": 14, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "df.explode(\"gamma\")" ] @@ -539,7 +384,7 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": null, "id": "285ade5e", "metadata": {}, "outputs": [], @@ -562,21 +407,10 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": null, "id": "75399bf0", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "dict" - ] - }, - "execution_count": 16, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "type(json_data)" ] @@ -593,36 +427,10 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": null, "id": "8cc4f408", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "[{'date': '1971 FEB',\n", - " 'value': '3.8',\n", - " 'label': '1971 JAN-MAR',\n", - " 'year': '1971',\n", - " 'month': 'February',\n", - " 'quarter': '',\n", - " 'sourceDataset': 'LMS',\n", - " 'updateDate': '2015-10-13T23:00:00.000Z'},\n", - " {'date': '1971 MAR',\n", - " 'value': '3.9',\n", - " 'label': '1971 FEB-APR',\n", - " 'year': '1971',\n", - " 'month': 'March',\n", - " 'quarter': '',\n", - " 'sourceDataset': 'LMS',\n", - " 'updateDate': '2015-10-13T23:00:00.000Z'}]" - ] - }, - "execution_count": 17, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "json_data[\"months\"][:2]" ] @@ -639,7 +447,7 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": null, "id": "05069731", "metadata": {}, "outputs": [], @@ -665,24 +473,10 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": null, "id": "eca7982f", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "{'food': 'doughnut',\n", - " 'good_with': ['coffee', 'tea'],\n", - " 'flavour': None,\n", - " 'toppings': [{'id': 0, 'type': 'glazed'}, {'id': 1, 'type': 'sugar'}]}" - ] - }, - "execution_count": 19, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "import json\n", "\n", @@ -700,21 +494,10 @@ }, { "cell_type": "code", - "execution_count": 20, + "execution_count": null, "id": "ea6f887d", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "'{\"food\": \"doughnut\", \"good_with\": [\"coffee\", \"tea\"], \"flavour\": null, \"toppings\": [{\"id\": 0, \"type\": \"glazed\"}, {\"id\": 1, \"type\": \"sugar\"}]}'" - ] - }, - "execution_count": 20, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "json_stream = json.dumps(result)\n", "json_stream" @@ -763,7 +546,7 @@ }, { "cell_type": "code", - "execution_count": 21, + "execution_count": null, "id": "1dbfd7c3", "metadata": {}, "outputs": [], @@ -783,40 +566,10 @@ }, { "cell_type": "code", - "execution_count": 22, + "execution_count": null, "id": "b3ea0e0b", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "shape: (3, 6)
idname.firstname.lastname.givenname.familyname
i64strstrstrstrstr
1"Coleen""Volk"nullnullnull
nullnullnull"Mark""Regner"null
2nullnullnullnull"Faye Raker"
" - ], - "text/plain": [ - "shape: (3, 6)\n", - "┌──────┬────────────┬───────────┬────────────┬─────────────┬────────────┐\n", - "│ id ┆ name.first ┆ name.last ┆ name.given ┆ name.family ┆ name │\n", - "│ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │\n", - "│ i64 ┆ str ┆ str ┆ str ┆ str ┆ str │\n", - "╞══════╪════════════╪═══════════╪════════════╪═════════════╪════════════╡\n", - "│ 1 ┆ Coleen ┆ Volk ┆ null ┆ null ┆ null │\n", - "│ null ┆ null ┆ null ┆ Mark ┆ Regner ┆ null │\n", - "│ 2 ┆ null ┆ null ┆ null ┆ null ┆ Faye Raker │\n", - "└──────┴────────────┴───────────┴────────────┴─────────────┴────────────┘" - ] - }, - "execution_count": 22, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "data = [\n", " {\"id\": 1, \"name\": {\"first\": \"Coleen\", \"last\": \"Volk\"}},\n", @@ -836,40 +589,10 @@ }, { "cell_type": "code", - "execution_count": 23, + "execution_count": null, "id": "9349a3a7", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "shape: (3, 2)
idname
i64str
1"{"first": "Coleen", "last": "V…
null"{"given": "Mark", "family": "R…
2"Faye Raker"
" - ], - "text/plain": [ - "shape: (3, 2)\n", - "┌──────┬─────────────────────────────────┐\n", - "│ id ┆ name │\n", - "│ --- ┆ --- │\n", - "│ i64 ┆ str │\n", - "╞══════╪═════════════════════════════════╡\n", - "│ 1 ┆ {\"first\": \"Coleen\", \"last\": \"V… │\n", - "│ null ┆ {\"given\": \"Mark\", \"family\": \"R… │\n", - "│ 2 ┆ Faye Raker │\n", - "└──────┴─────────────────────────────────┘" - ] - }, - "execution_count": 23, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "pl.json_normalize(data, max_level=0)" ] diff --git a/regex.ipynb b/regex.ipynb index d8cb9d0..520a8c8 100644 --- a/regex.ipynb +++ b/regex.ipynb @@ -23,30 +23,9 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "<>:4: SyntaxWarning: invalid escape sequence '\\w'\n", - "<>:4: SyntaxWarning: invalid escape sequence '\\w'\n", - "/var/folders/j1/0d3dmg7d6gnbc_zwrh8t9gcw0000gn/T/ipykernel_60848/970224743.py:4: SyntaxWarning: invalid escape sequence '\\w'\n", - " re.findall(\"string \\w+\\s\", text)\n" - ] - }, - { - "data": { - "text/plain": [ - "['string cleaning ', 'string editing ']" - ] - }, - "execution_count": 1, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "import re\n", "\n", @@ -78,30 +57,9 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "<>:2: SyntaxWarning: invalid escape sequence '\\w'\n", - "<>:2: SyntaxWarning: invalid escape sequence '\\w'\n", - "/var/folders/j1/0d3dmg7d6gnbc_zwrh8t9gcw0000gn/T/ipykernel_60848/3231031485.py:2: SyntaxWarning: invalid escape sequence '\\w'\n", - " re.sub(\"string \\w+\\s\", new_text, text)\n" - ] - }, - { - "data": { - "text/plain": [ - "'It is true that new text here! is a topic in this chapter. new text here! is another.'" - ] - }, - "execution_count": 2, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "new_text = \"new text here! \"\n", "re.sub(\"string \\w+\\s\", new_text, text)" @@ -183,18 +141,9 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "The greedy match is stackoverflo\n", - "The lazy match is stacko\n" - ] - } - ], + "outputs": [], "source": [ "test_string = \"stackoverflow\"\n", "greedy_regex = \"s.*o\"\n", @@ -222,30 +171,9 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "<>:2: SyntaxWarning: invalid escape sequence '\\$'\n", - "<>:2: SyntaxWarning: invalid escape sequence '\\$'\n", - "/var/folders/j1/0d3dmg7d6gnbc_zwrh8t9gcw0000gn/T/ipykernel_60848/3729052804.py:2: SyntaxWarning: invalid escape sequence '\\$'\n", - " re.findall(\"\\$(\\d{2}.\\d{2})\", text)\n" - ] - }, - { - "data": { - "text/plain": [ - "['45.34', '50.00']" - ] - }, - "execution_count": 4, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "text = \"Product 1 was $45.34, while product 2 came in at $50.00 however it was assessed that the $4.66 difference did not make up for the higher quality of product 2.\"\n", "re.findall(\"\\$(\\d{2}.\\d{2})\", text)" @@ -264,20 +192,9 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "['30500.00 to 35000']" - ] - }, - "execution_count": 5, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "sal_r_per = r\"\\b([0-9]{1,6}(?:\\.)?(?:[0-9]{1,2})?(?:\\s?-\\s?|\\s?to\\s?)[0-9]{1,6}(?:\\.)?(?:[0-9]{1,2})?)(?:\\s?per)\\b\"\n", "text = \"This job pays gbp 30500.00 to 35000 per year. Apply at number 100 per the below address.\"\n", diff --git a/spreadsheets.ipynb b/spreadsheets.ipynb index 4f5ebfd..6d62900 100644 --- a/spreadsheets.ipynb +++ b/spreadsheets.ipynb @@ -16,7 +16,7 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "id": "51a55374", "metadata": { "tags": [ @@ -68,43 +68,10 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": null, "id": "bf00d4c6", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "shape: (6, 5)
Student IDFull Namefavourite.foodmealPlanAGE
i64strstrstrstr
1"Sunil Huffmann""Strawberry yoghurt""Lunch only""4"
2"Barclay Lynn""French fries""Lunch only""5"
3"Jayendra Lyne""N/A""Breakfast and lunch""7"
4"Leon Rossini""Anchovies""Lunch only"null
5"Chidiegwu Dunkel""Pizza""Breakfast and lunch""five"
6"Güvenç Attila""Ice cream""Lunch only""6"
" - ], - "text/plain": [ - "shape: (6, 5)\n", - "┌────────────┬──────────────────┬────────────────────┬─────────────────────┬──────┐\n", - "│ Student ID ┆ Full Name ┆ favourite.food ┆ mealPlan ┆ AGE │\n", - "│ --- ┆ --- ┆ --- ┆ --- ┆ --- │\n", - "│ i64 ┆ str ┆ str ┆ str ┆ str │\n", - "╞════════════╪══════════════════╪════════════════════╪═════════════════════╪══════╡\n", - "│ 1 ┆ Sunil Huffmann ┆ Strawberry yoghurt ┆ Lunch only ┆ 4 │\n", - "│ 2 ┆ Barclay Lynn ┆ French fries ┆ Lunch only ┆ 5 │\n", - "│ 3 ┆ Jayendra Lyne ┆ N/A ┆ Breakfast and lunch ┆ 7 │\n", - "│ 4 ┆ Leon Rossini ┆ Anchovies ┆ Lunch only ┆ null │\n", - "│ 5 ┆ Chidiegwu Dunkel ┆ Pizza ┆ Breakfast and lunch ┆ five │\n", - "│ 6 ┆ Güvenç Attila ┆ Ice cream ┆ Lunch only ┆ 6 │\n", - "└────────────┴──────────────────┴────────────────────┴─────────────────────┴──────┘" - ] - }, - "execution_count": 2, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "import polars as pl\n", "\n", @@ -126,43 +93,10 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": null, "id": "ecce26ea", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "shape: (6, 5)
student_idfull_namefavourite_foodmeal_planage
i64strstrstrstr
1"Sunil Huffmann""Strawberry yoghurt""Lunch only""4"
2"Barclay Lynn""French fries""Lunch only""5"
3"Jayendra Lyne""N/A""Breakfast and lunch""7"
4"Leon Rossini""Anchovies""Lunch only"null
5"Chidiegwu Dunkel""Pizza""Breakfast and lunch""five"
6"Güvenç Attila""Ice cream""Lunch only""6"
" - ], - "text/plain": [ - "shape: (6, 5)\n", - "┌────────────┬──────────────────┬────────────────────┬─────────────────────┬──────┐\n", - "│ student_id ┆ full_name ┆ favourite_food ┆ meal_plan ┆ age │\n", - "│ --- ┆ --- ┆ --- ┆ --- ┆ --- │\n", - "│ i64 ┆ str ┆ str ┆ str ┆ str │\n", - "╞════════════╪══════════════════╪════════════════════╪═════════════════════╪══════╡\n", - "│ 1 ┆ Sunil Huffmann ┆ Strawberry yoghurt ┆ Lunch only ┆ 4 │\n", - "│ 2 ┆ Barclay Lynn ┆ French fries ┆ Lunch only ┆ 5 │\n", - "│ 3 ┆ Jayendra Lyne ┆ N/A ┆ Breakfast and lunch ┆ 7 │\n", - "│ 4 ┆ Leon Rossini ┆ Anchovies ┆ Lunch only ┆ null │\n", - "│ 5 ┆ Chidiegwu Dunkel ┆ Pizza ┆ Breakfast and lunch ┆ five │\n", - "│ 6 ┆ Güvenç Attila ┆ Ice cream ┆ Lunch only ┆ 6 │\n", - "└────────────┴──────────────────┴────────────────────┴─────────────────────┴──────┘" - ] - }, - "execution_count": 3, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "students.columns = [\n", " \"student_id\",\n", @@ -184,43 +118,10 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": null, "id": "22e45cc2", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "shape: (6, 5)
student_idfull_namefavourite_foodmeal_planage
i64strstrstrstr
1"Sunil Huffmann""Strawberry yoghurt""Lunch only""4"
2"Barclay Lynn""French fries""Lunch only""5"
3"Jayendra Lyne""N/A""Breakfast and lunch""7"
4"Leon Rossini""Anchovies""Lunch only"null
5"Chidiegwu Dunkel""Pizza""Breakfast and lunch""5"
6"Güvenç Attila""Ice cream""Lunch only""6"
" - ], - "text/plain": [ - "shape: (6, 5)\n", - "┌────────────┬──────────────────┬────────────────────┬─────────────────────┬──────┐\n", - "│ student_id ┆ full_name ┆ favourite_food ┆ meal_plan ┆ age │\n", - "│ --- ┆ --- ┆ --- ┆ --- ┆ --- │\n", - "│ i64 ┆ str ┆ str ┆ str ┆ str │\n", - "╞════════════╪══════════════════╪════════════════════╪═════════════════════╪══════╡\n", - "│ 1 ┆ Sunil Huffmann ┆ Strawberry yoghurt ┆ Lunch only ┆ 4 │\n", - "│ 2 ┆ Barclay Lynn ┆ French fries ┆ Lunch only ┆ 5 │\n", - "│ 3 ┆ Jayendra Lyne ┆ N/A ┆ Breakfast and lunch ┆ 7 │\n", - "│ 4 ┆ Leon Rossini ┆ Anchovies ┆ Lunch only ┆ null │\n", - "│ 5 ┆ Chidiegwu Dunkel ┆ Pizza ┆ Breakfast and lunch ┆ 5 │\n", - "│ 6 ┆ Güvenç Attila ┆ Ice cream ┆ Lunch only ┆ 6 │\n", - "└────────────┴──────────────────┴────────────────────┴─────────────────────┴──────┘" - ] - }, - "execution_count": 4, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "students = pl.read_excel(\"data/students.xlsx\")\n", "students.columns = [\n", @@ -244,25 +145,10 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": null, "id": "f67490d3", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "Schema([('student_id', Int64),\n", - " ('full_name', String),\n", - " ('favourite_food', String),\n", - " ('meal_plan', Categorical(ordering='physical')),\n", - " ('age', Int64)])" - ] - }, - "execution_count": 5, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "students = students.with_columns(\n", " [\n", @@ -300,43 +186,10 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": null, "id": "e37f9e3d", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "shape: (5, 8)
speciesislandbill_length_mmbill_depth_mmflipper_length_mmbody_mass_gsexyear
strstrf64f64i64i64stri64
"Adelie""Torgersen"39.118.71813750"male"2007
"Adelie""Torgersen"39.517.41863800"female"2007
"Adelie""Torgersen"40.318.01953250"female"2007
"Adelie""Torgersen"nullnullnullnull"NA"2007
"Adelie""Torgersen"36.719.31933450"female"2007
" - ], - "text/plain": [ - "shape: (5, 8)\n", - "┌─────────┬───────────┬───────────────┬───────────────┬──────────────┬─────────────┬────────┬──────┐\n", - "│ species ┆ island ┆ bill_length_m ┆ bill_depth_mm ┆ flipper_leng ┆ body_mass_g ┆ sex ┆ year │\n", - "│ --- ┆ --- ┆ m ┆ --- ┆ th_mm ┆ --- ┆ --- ┆ --- │\n", - "│ str ┆ str ┆ --- ┆ f64 ┆ --- ┆ i64 ┆ str ┆ i64 │\n", - "│ ┆ ┆ f64 ┆ ┆ i64 ┆ ┆ ┆ │\n", - "╞═════════╪═══════════╪═══════════════╪═══════════════╪══════════════╪═════════════╪════════╪══════╡\n", - "│ Adelie ┆ Torgersen ┆ 39.1 ┆ 18.7 ┆ 181 ┆ 3750 ┆ male ┆ 2007 │\n", - "│ Adelie ┆ Torgersen ┆ 39.5 ┆ 17.4 ┆ 186 ┆ 3800 ┆ female ┆ 2007 │\n", - "│ Adelie ┆ Torgersen ┆ 40.3 ┆ 18.0 ┆ 195 ┆ 3250 ┆ female ┆ 2007 │\n", - "│ Adelie ┆ Torgersen ┆ null ┆ null ┆ null ┆ null ┆ NA ┆ 2007 │\n", - "│ Adelie ┆ Torgersen ┆ 36.7 ┆ 19.3 ┆ 193 ┆ 3450 ┆ female ┆ 2007 │\n", - "└─────────┴───────────┴───────────────┴───────────────┴──────────────┴─────────────┴────────┴──────┘" - ] - }, - "execution_count": 6, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "pl.read_excel(\n", " \"data/penguins.xlsx\",\n", @@ -356,18 +209,10 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": null, "id": "0f3a3b78", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "['Torgersen Island', 'Biscoe Island', 'Dream Island']\n" - ] - } - ], + "outputs": [], "source": [ "penguins_dict = pl.read_excel(\n", " \"data/penguins.xlsx\",\n", @@ -386,50 +231,10 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": null, "id": "15495426", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Biscoe Island\n" - ] - }, - { - "data": { - "text/html": [ - "
\n", - "shape: (5, 8)
speciesislandbill_length_mmbill_depth_mmflipper_length_mmbody_mass_gsexyear
strstrf64f64i64i64stri64
"Adelie""Biscoe"37.818.31743400"female"2007
"Adelie""Biscoe"37.718.71803600"male"2007
"Adelie""Biscoe"35.919.21893800"female"2007
"Adelie""Biscoe"38.218.11853950"male"2007
"Adelie""Biscoe"38.817.21803800"male"2007
" - ], - "text/plain": [ - "shape: (5, 8)\n", - "┌─────────┬────────┬────────────────┬───────────────┬────────────────┬─────────────┬────────┬──────┐\n", - "│ species ┆ island ┆ bill_length_mm ┆ bill_depth_mm ┆ flipper_length ┆ body_mass_g ┆ sex ┆ year │\n", - "│ --- ┆ --- ┆ --- ┆ --- ┆ _mm ┆ --- ┆ --- ┆ --- │\n", - "│ str ┆ str ┆ f64 ┆ f64 ┆ --- ┆ i64 ┆ str ┆ i64 │\n", - "│ ┆ ┆ ┆ ┆ i64 ┆ ┆ ┆ │\n", - "╞═════════╪════════╪════════════════╪═══════════════╪════════════════╪═════════════╪════════╪══════╡\n", - "│ Adelie ┆ Biscoe ┆ 37.8 ┆ 18.3 ┆ 174 ┆ 3400 ┆ female ┆ 2007 │\n", - "│ Adelie ┆ Biscoe ┆ 37.7 ┆ 18.7 ┆ 180 ┆ 3600 ┆ male ┆ 2007 │\n", - "│ Adelie ┆ Biscoe ┆ 35.9 ┆ 19.2 ┆ 189 ┆ 3800 ┆ female ┆ 2007 │\n", - "│ Adelie ┆ Biscoe ┆ 38.2 ┆ 18.1 ┆ 185 ┆ 3950 ┆ male ┆ 2007 │\n", - "│ Adelie ┆ Biscoe ┆ 38.8 ┆ 17.2 ┆ 180 ┆ 3800 ┆ male ┆ 2007 │\n", - "└─────────┴────────┴────────────────┴───────────────┴────────────────┴─────────────┴────────┴──────┘" - ] - }, - "execution_count": 8, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "print(list(penguins_dict.keys())[1])\n", "list(penguins_dict.values())[1].head()" @@ -445,49 +250,10 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": null, "id": "151ba846", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "shape: (344, 8)
speciesislandbill_length_mmbill_depth_mmflipper_length_mmbody_mass_gsexyear
strstrf64f64i64i64stri64
"Adelie""Torgersen"39.118.71813750"male"2007
"Adelie""Torgersen"39.517.41863800"female"2007
"Adelie""Torgersen"40.318.01953250"female"2007
"Adelie""Torgersen"nullnullnullnull"NA"2007
"Adelie""Torgersen"36.719.31933450"female"2007
"Chinstrap""Dream"55.819.82074000"male"2009
"Chinstrap""Dream"43.518.12023400"female"2009
"Chinstrap""Dream"49.618.21933775"male"2009
"Chinstrap""Dream"50.819.02104100"male"2009
"Chinstrap""Dream"50.218.71983775"female"2009
" - ], - "text/plain": [ - "shape: (344, 8)\n", - "┌───────────┬───────────┬──────────────┬──────────────┬──────────────┬─────────────┬────────┬──────┐\n", - "│ species ┆ island ┆ bill_length_ ┆ bill_depth_m ┆ flipper_leng ┆ body_mass_g ┆ sex ┆ year │\n", - "│ --- ┆ --- ┆ mm ┆ m ┆ th_mm ┆ --- ┆ --- ┆ --- │\n", - "│ str ┆ str ┆ --- ┆ --- ┆ --- ┆ i64 ┆ str ┆ i64 │\n", - "│ ┆ ┆ f64 ┆ f64 ┆ i64 ┆ ┆ ┆ │\n", - "╞═══════════╪═══════════╪══════════════╪══════════════╪══════════════╪═════════════╪════════╪══════╡\n", - "│ Adelie ┆ Torgersen ┆ 39.1 ┆ 18.7 ┆ 181 ┆ 3750 ┆ male ┆ 2007 │\n", - "│ Adelie ┆ Torgersen ┆ 39.5 ┆ 17.4 ┆ 186 ┆ 3800 ┆ female ┆ 2007 │\n", - "│ Adelie ┆ Torgersen ┆ 40.3 ┆ 18.0 ┆ 195 ┆ 3250 ┆ female ┆ 2007 │\n", - "│ Adelie ┆ Torgersen ┆ null ┆ null ┆ null ┆ null ┆ NA ┆ 2007 │\n", - "│ Adelie ┆ Torgersen ┆ 36.7 ┆ 19.3 ┆ 193 ┆ 3450 ┆ female ┆ 2007 │\n", - "│ … ┆ … ┆ … ┆ … ┆ … ┆ … ┆ … ┆ … │\n", - "│ Chinstrap ┆ Dream ┆ 55.8 ┆ 19.8 ┆ 207 ┆ 4000 ┆ male ┆ 2009 │\n", - "│ Chinstrap ┆ Dream ┆ 43.5 ┆ 18.1 ┆ 202 ┆ 3400 ┆ female ┆ 2009 │\n", - "│ Chinstrap ┆ Dream ┆ 49.6 ┆ 18.2 ┆ 193 ┆ 3775 ┆ male ┆ 2009 │\n", - "│ Chinstrap ┆ Dream ┆ 50.8 ┆ 19.0 ┆ 210 ┆ 4100 ┆ male ┆ 2009 │\n", - "│ Chinstrap ┆ Dream ┆ 50.2 ┆ 18.7 ┆ 198 ┆ 3775 ┆ female ┆ 2009 │\n", - "└───────────┴───────────┴──────────────┴──────────────┴──────────────┴─────────────┴────────┴──────┘" - ] - }, - "execution_count": 9, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "penguins = pl.concat(penguins_dict.values())\n", "penguins" @@ -519,49 +285,10 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": null, "id": "29987b9d", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "shape: (14, 6)
Lots of people__UNNAMED__1__UNNAMED__2__UNNAMED__3__UNNAMED__4__UNNAMED__5
strstrstrstrdatestr
"David Bowie""musician""69""true"1947-01-08"2016-01-10 00:00:00"
"Carrie Fisher""actor""60""true"1956-10-21"2016-12-27 00:00:00"
"Chuck Berry""musician""90""true"1926-10-18"2017-03-18 00:00:00"
"Bill Paxton""actor""61""true"1955-05-17"2017-02-25 00:00:00"
"Prince""musician""57""true"1958-06-07"2016-04-21 00:00:00"
"George Michael""musician""53""false"1963-06-25"2016-12-25 00:00:00"
"Some"nullnullnullnullnull
null"also like to write stuff"nullnullnullnull
nullnull"at the""bottom,"nullnull
nullnullnullnullnull"too!"
" - ], - "text/plain": [ - "shape: (14, 6)\n", - "┌────────────────┬──────────────┬──────────────┬──────────────┬──────────────┬─────────────────────┐\n", - "│ Lots of people ┆ __UNNAMED__1 ┆ __UNNAMED__2 ┆ __UNNAMED__3 ┆ __UNNAMED__4 ┆ __UNNAMED__5 │\n", - "│ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │\n", - "│ str ┆ str ┆ str ┆ str ┆ date ┆ str │\n", - "╞════════════════╪══════════════╪══════════════╪══════════════╪══════════════╪═════════════════════╡\n", - "│ David Bowie ┆ musician ┆ 69 ┆ true ┆ 1947-01-08 ┆ 2016-01-10 00:00:00 │\n", - "│ Carrie Fisher ┆ actor ┆ 60 ┆ true ┆ 1956-10-21 ┆ 2016-12-27 00:00:00 │\n", - "│ Chuck Berry ┆ musician ┆ 90 ┆ true ┆ 1926-10-18 ┆ 2017-03-18 00:00:00 │\n", - "│ Bill Paxton ┆ actor ┆ 61 ┆ true ┆ 1955-05-17 ┆ 2017-02-25 00:00:00 │\n", - "│ Prince ┆ musician ┆ 57 ┆ true ┆ 1958-06-07 ┆ 2016-04-21 00:00:00 │\n", - "│ … ┆ … ┆ … ┆ … ┆ … ┆ … │\n", - "│ George Michael ┆ musician ┆ 53 ┆ false ┆ 1963-06-25 ┆ 2016-12-25 00:00:00 │\n", - "│ Some ┆ null ┆ null ┆ null ┆ null ┆ null │\n", - "│ null ┆ also like to ┆ null ┆ null ┆ null ┆ null │\n", - "│ ┆ write stuff ┆ ┆ ┆ ┆ │\n", - "│ null ┆ null ┆ at the ┆ bottom, ┆ null ┆ null │\n", - "│ null ┆ null ┆ null ┆ null ┆ null ┆ too! │\n", - "└────────────────┴──────────────┴──────────────┴──────────────┴──────────────┴─────────────────────┘" - ] - }, - "execution_count": 10, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "pl.read_excel(\n", " \"data/deaths.xlsx\",\n", @@ -579,47 +306,10 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": null, "id": "9d7a3db3", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "shape: (10, 6)
Lots of people__UNNAMED__1__UNNAMED__2__UNNAMED__3__UNNAMED__4__UNNAMED__5
strstri64booldatedate
"David Bowie""musician"69true1947-01-082016-01-10
"Carrie Fisher""actor"60true1956-10-212016-12-27
"Chuck Berry""musician"90true1926-10-182017-03-18
"Bill Paxton""actor"61true1955-05-172017-02-25
"Prince""musician"57true1958-06-072016-04-21
"Alan Rickman""actor"69false1946-02-212016-01-14
"Florence Henderson""actor"82true1934-02-142016-11-24
"Harper Lee""author"89false1926-04-282016-02-19
"Zsa Zsa Gábor""actor"99true1917-02-062016-12-18
"George Michael""musician"53false1963-06-252016-12-25
" - ], - "text/plain": [ - "shape: (10, 6)\n", - "┌────────────────────┬──────────────┬──────────────┬──────────────┬──────────────┬──────────────┐\n", - "│ Lots of people ┆ __UNNAMED__1 ┆ __UNNAMED__2 ┆ __UNNAMED__3 ┆ __UNNAMED__4 ┆ __UNNAMED__5 │\n", - "│ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │\n", - "│ str ┆ str ┆ i64 ┆ bool ┆ date ┆ date │\n", - "╞════════════════════╪══════════════╪══════════════╪══════════════╪══════════════╪══════════════╡\n", - "│ David Bowie ┆ musician ┆ 69 ┆ true ┆ 1947-01-08 ┆ 2016-01-10 │\n", - "│ Carrie Fisher ┆ actor ┆ 60 ┆ true ┆ 1956-10-21 ┆ 2016-12-27 │\n", - "│ Chuck Berry ┆ musician ┆ 90 ┆ true ┆ 1926-10-18 ┆ 2017-03-18 │\n", - "│ Bill Paxton ┆ actor ┆ 61 ┆ true ┆ 1955-05-17 ┆ 2017-02-25 │\n", - "│ Prince ┆ musician ┆ 57 ┆ true ┆ 1958-06-07 ┆ 2016-04-21 │\n", - "│ Alan Rickman ┆ actor ┆ 69 ┆ false ┆ 1946-02-21 ┆ 2016-01-14 │\n", - "│ Florence Henderson ┆ actor ┆ 82 ┆ true ┆ 1934-02-14 ┆ 2016-11-24 │\n", - "│ Harper Lee ┆ author ┆ 89 ┆ false ┆ 1926-04-28 ┆ 2016-02-19 │\n", - "│ Zsa Zsa Gábor ┆ actor ┆ 99 ┆ true ┆ 1917-02-06 ┆ 2016-12-18 │\n", - "│ George Michael ┆ musician ┆ 53 ┆ false ┆ 1963-06-25 ┆ 2016-12-25 │\n", - "└────────────────────┴──────────────┴──────────────┴──────────────┴──────────────┴──────────────┘" - ] - }, - "execution_count": 11, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "pl.read_excel(\n", " \"data/deaths.xlsx\",\n", @@ -666,40 +356,10 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": null, "id": "15963e18", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "shape: (3, 2)
itemquantity
cati64
"brownie"10
"cupcake"5
"cookie"8
" - ], - "text/plain": [ - "shape: (3, 2)\n", - "┌─────────┬──────────┐\n", - "│ item ┆ quantity │\n", - "│ --- ┆ --- │\n", - "│ cat ┆ i64 │\n", - "╞═════════╪══════════╡\n", - "│ brownie ┆ 10 │\n", - "│ cupcake ┆ 5 │\n", - "│ cookie ┆ 8 │\n", - "└─────────┴──────────┘" - ] - }, - "execution_count": 12, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "bake_sale = pl.DataFrame(\n", " {\n", @@ -720,21 +380,10 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": null, "id": "1fc17141", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "" - ] - }, - "execution_count": 13, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "bake_sale.write_excel(\"data/bake_sale.xlsx\")" ] @@ -759,21 +408,10 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": null, "id": "27e128f9", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "Schema([('item', String), ('quantity', Int64)])" - ] - }, - "execution_count": 14, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "pl.read_excel(\"data/bake_sale.xlsx\").schema" ] diff --git a/strings.ipynb b/strings.ipynb index 10d93ae..b1a00fe 100644 --- a/strings.ipynb +++ b/strings.ipynb @@ -28,7 +28,7 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "id": "478a847a", "metadata": {}, "outputs": [], @@ -49,21 +49,10 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": null, "id": "d7f4ea2d", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "str" - ] - }, - "execution_count": 2, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "type(string_one)" ] @@ -78,7 +67,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": null, "id": "01379fe7", "metadata": {}, "outputs": [], @@ -88,21 +77,10 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": null, "id": "d88f7928", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "'ban'" - ] - }, - "execution_count": 4, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "var[:3]" ] @@ -117,21 +95,10 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": null, "id": "e03d95d1", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "'aaa'" - ] - }, - "execution_count": 5, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "var[1::2]" ] @@ -148,21 +115,10 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": null, "id": "83ab201b", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "6" - ] - }, - "execution_count": 6, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "len(var)" ] @@ -177,21 +133,10 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": null, "id": "7801bd5d", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "\"This is a string. If I want to include a 'quote' inside a string, I use double quotes on the outside.\"" - ] - }, - "execution_count": 7, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "string_one + \". \" + string_two + \".\"" ] @@ -206,21 +151,10 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": null, "id": "138cef18", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "\"This is a string. If I want to include a 'quote' inside a string, I use double quotes on the outside\"" - ] - }, - "execution_count": 8, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "\". \".join([string_one, string_two])" ] @@ -235,18 +169,10 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": null, "id": "e11896f8", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "['INPUT TEXT', 'input text', 'Input Text']\n" - ] - } - ], + "outputs": [], "source": [ "var = \"input TEXT\"\n", "var_list = [var.upper(), var.lower(), var.title()]\n", @@ -275,20 +201,10 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": null, "id": "bf0aadec", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "INPUT TEXT; and \n", - "input text; and \n", - "Input Text\n" - ] - } - ], + "outputs": [], "source": [ "print(*var_list, sep=\"; and \\n\")" ] @@ -303,21 +219,10 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": null, "id": "a96f048c", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "'A boolean is either True or False, there are only 2 options.'" - ] - }, - "execution_count": 11, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "(\n", " \"A boolean is either \"\n", @@ -350,18 +255,10 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": null, "id": "9dddf0da", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "You scored 15.32399\n" - ] - } - ], + "outputs": [], "source": [ "variable = 15.32399\n", "print(f\"You scored {variable}\")" @@ -377,18 +274,10 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": null, "id": "795e7c07", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "You scored 234.8246695201\n" - ] - } - ], + "outputs": [], "source": [ "print(f\"You scored {variable**2}\")" ] @@ -411,18 +300,10 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": null, "id": "1f3d3806", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "You scored +15.32\n" - ] - } - ], + "outputs": [], "source": [ "print(f\"You scored {variable:+.2f}\")" ] @@ -471,21 +352,10 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": null, "id": "0ccd65aa", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "'!\"#$%&\\'()*+,-./:;<=>?@[\\\\]^_`{|}~'" - ] - }, - "execution_count": 15, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "import string\n", "\n", @@ -502,21 +372,10 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": null, "id": "16205c36", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'" - ] - }, - "execution_count": 16, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "string.ascii_letters" ] @@ -531,21 +390,10 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": null, "id": "0c67f5cd", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "'0123456789'" - ] - }, - "execution_count": 17, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "string.digits" ] @@ -568,20 +416,10 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": null, "id": "16e9904a", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Here is a \n", - " new line\n", - "Here is an \\n escaped new line \n" - ] - } - ], + "outputs": [], "source": [ "print(\"Here is a \\n new line\")\n", "print(\"Here is an \\\\n escaped new line \")" @@ -614,19 +452,10 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": null, "id": "af423bd1", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "a\tb\n", - "A\tB\n" - ] - } - ], + "outputs": [], "source": [ "print(\"a\\tb\\nA\\tB\")" ] @@ -643,18 +472,10 @@ }, { "cell_type": "code", - "execution_count": 20, + "execution_count": null, "id": "c2b9c689", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "a\\tb\\nA\\tB\n" - ] - } - ], + "outputs": [], "source": [ "print(r\"a\\tb\\nA\\tB\")" ] @@ -675,21 +496,10 @@ }, { "cell_type": "code", - "execution_count": 21, + "execution_count": null, "id": "229ada3a", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "'Value is subjective'" - ] - }, - "execution_count": 21, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "\"Value is objective\".replace(\"objective\", \"subjective\")" ] @@ -704,21 +514,10 @@ }, { "cell_type": "code", - "execution_count": 22, + "execution_count": null, "id": "79f754dc", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "'Value is subjective'" - ] - }, - "execution_count": 22, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "text = \"Value is objective\"\n", "old_substr = \"objective\"\n", @@ -750,21 +549,10 @@ }, { "cell_type": "code", - "execution_count": 23, + "execution_count": null, "id": "99675fee", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "{'a': '', 'e': '', 'i': '', 'o': '', 'u': ''}" - ] - }, - "execution_count": 23, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "example_text = \"Much recent work has focused on the influence of social capital on innovative outcomes. Little research has been done on disadvantaged groups who were often restricted from participation in social networks that provide information necessary for invention and innovation. Unique new data on African American inventors and patentees between 1843 and 1930 permit an empirical investigation of the relation between social capital and economic outcomes. I find that African Americans used both traditional, i.e., occupation-based, and nontraditional, i.e., civic, networks to maximize inventive output and that laws constraining social-capital formation are most negatively correlated with economically important inventive activity.\"\n", "vowels = \"aeiou\"\n", @@ -782,21 +570,10 @@ }, { "cell_type": "code", - "execution_count": 24, + "execution_count": null, "id": "e48763cb", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "'Mch rcnt wrk hs fcsd n th nflnc f scl cptl n nnvtv tcms. Lttl rsrch hs bn dn n dsdvntgd grps wh wr ftn rstrctd frm prtcptn n scl ntwrks tht prvd nfrmtn ncssry fr nvntn nd nnvtn. Unq nw dt n Afrcn Amrcn nvntrs nd ptnts btwn 1843 nd 1930 prmt n mprcl nvstgtn f th rltn btwn scl cptl nd cnmc tcms. I fnd tht Afrcn Amrcns sd bth trdtnl, .., ccptn-bsd, nd nntrdtnl, .., cvc, ntwrks t mxmz nvntv tpt nd tht lws cnstrnng scl-cptl frmtn r mst ngtvly crrltd wth cnmclly mprtnt nvntv ctvty.'" - ] - }, - "execution_count": 24, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "translator = example_text.maketrans(translation_dict)\n", "example_text.translate(translator)" @@ -822,21 +599,10 @@ }, { "cell_type": "code", - "execution_count": 25, + "execution_count": null, "id": "ac758b38", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "'Example string with excess punctuation'" - ] - }, - "execution_count": 25, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "\"\".join(\n", " [\n", @@ -859,21 +625,10 @@ }, { "cell_type": "code", - "execution_count": 26, + "execution_count": null, "id": "122619bf", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "'This is a sentence'" - ] - }, - "execution_count": 26, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "\"This is a sentence and we will split it at character 18\"[:18]" ] @@ -888,21 +643,10 @@ }, { "cell_type": "code", - "execution_count": 27, + "execution_count": null, "id": "9fc432ed", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "['This is a sentence', ' And another sentence', ' And a third sentence']" - ] - }, - "execution_count": 27, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "\"This is a sentence. And another sentence. And a third sentence\".split(\".\")" ] @@ -917,21 +661,10 @@ }, { "cell_type": "code", - "execution_count": 28, + "execution_count": null, "id": "6904e486", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "['This is a ', '. And another ', '. And a third ', '']" - ] - }, - "execution_count": 28, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "\"This is a sentence. And another sentence. And a third sentence\".split(\"sentence\")" ] @@ -956,18 +689,10 @@ }, { "cell_type": "code", - "execution_count": 29, + "execution_count": null, "id": "22f94993", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "The word \"coffee\" appears 2 times.\n" - ] - } - ], + "outputs": [], "source": [ "text = \"At six o'clock we were waiting for coffee, \\n waiting for coffee and the charitable crumb \\n that was going to be served from a certain balcony \\n --like kings of old, or like a miracle. \\n It was still dark. One foot of the sun \\n steadied itself on a long ripple in the river.\"\n", "word = \"coffee\"\n", @@ -984,21 +709,10 @@ }, { "cell_type": "code", - "execution_count": 30, + "execution_count": null, "id": "a351a11b", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "35" - ] - }, - "execution_count": 30, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "text.find(word)" ] @@ -1013,21 +727,10 @@ }, { "cell_type": "code", - "execution_count": 31, + "execution_count": null, "id": "8e0a7020", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "'coffee'" - ] - }, - "execution_count": 31, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "text[text.find(word) : text.find(word) + len(word)]" ] @@ -1042,21 +745,10 @@ }, { "cell_type": "code", - "execution_count": 32, + "execution_count": null, "id": "e18f64a3", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "57" - ] - }, - "execution_count": 32, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "text.rfind(word)" ] @@ -1083,21 +775,10 @@ }, { "cell_type": "code", - "execution_count": 33, + "execution_count": null, "id": "bbc3eb7b", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "['Ada', 'Adam', 'Elinor', 'Grace', 'Jean']" - ] - }, - "execution_count": 33, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "[name.capitalize() for name in [\"ada\", \"adam\", \"elinor\", \"grace\", \"jean\"]]" ] @@ -1112,26 +793,10 @@ }, { "cell_type": "code", - "execution_count": 34, + "execution_count": null, "id": "c8a7f68b", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "0 ada lovelace\n", - "1 adam smith\n", - "2 elinor ostrom\n", - "3 grace hopper\n", - "4 jean bartik\n", - "dtype: string" - ] - }, - "execution_count": 34, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "import pandas as pd\n", "\n", @@ -1152,26 +817,10 @@ }, { "cell_type": "code", - "execution_count": 35, + "execution_count": null, "id": "7cf149b5", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "0 Ada Lovelace\n", - "1 Adam Smith\n", - "2 Elinor Ostrom\n", - "3 Grace Hopper\n", - "4 Jean Bartik\n", - "dtype: string" - ] - }, - "execution_count": 35, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "dfs.str.title()" ] @@ -1186,26 +835,10 @@ }, { "cell_type": "code", - "execution_count": 36, + "execution_count": null, "id": "26dc9a7b", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "0 Ada Lovelace\n", - "1 Adam Smith\n", - "2 Elinor Ostrom\n", - "3 Grace Hopper\n", - "4 Jean Bartik\n", - "Name: names, dtype: string" - ] - }, - "execution_count": 36, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "df = pd.DataFrame(dfs, columns=[\"names\"])\n", "df[\"names\"].str.title()" @@ -1259,26 +892,10 @@ }, { "cell_type": "code", - "execution_count": 37, + "execution_count": null, "id": "d7a29663", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "0 [ada, lovelace]\n", - "1 [adam, smith]\n", - "2 [elinor, ostrom]\n", - "3 [grace, hopper]\n", - "4 [jean, bartik]\n", - "Name: names, dtype: object" - ] - }, - "execution_count": 37, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "df[\"names\"].str.split(\" \")" ] @@ -1293,79 +910,10 @@ }, { "cell_type": "code", - "execution_count": 38, + "execution_count": null, "id": "85a5cd2c", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
01
0adalovelace
1adamsmith
2elinorostrom
3gracehopper
4jeanbartik
\n", - "
" - ], - "text/plain": [ - " 0 1\n", - "0 ada lovelace\n", - "1 adam smith\n", - "2 elinor ostrom\n", - "3 grace hopper\n", - "4 jean bartik" - ] - }, - "execution_count": 38, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "df[\"names\"].str.split(\" \", n=2, expand=True)" ] @@ -1384,36 +932,10 @@ }, { "cell_type": "code", - "execution_count": 39, + "execution_count": null, "id": "2e8781ba", "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "<>:1: SyntaxWarning: invalid escape sequence '\\w'\n", - "<>:1: SyntaxWarning: invalid escape sequence '\\w'\n", - "/var/folders/j1/0d3dmg7d6gnbc_zwrh8t9gcw0000gn/T/ipykernel_60781/3354940721.py:1: SyntaxWarning: invalid escape sequence '\\w'\n", - " df[\"names\"].str.extract(\"(\\w+)\", expand=False)\n" - ] - }, - { - "data": { - "text/plain": [ - "0 ada\n", - "1 adam\n", - "2 elinor\n", - "3 grace\n", - "4 jean\n", - "Name: names, dtype: string" - ] - }, - "execution_count": 39, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "df[\"names\"].str.extract(\"(\\w+)\", expand=False)" ] @@ -1445,26 +967,10 @@ }, { "cell_type": "code", - "execution_count": 40, + "execution_count": null, "id": "ba13d894", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "0 lovelace\n", - "1 smith\n", - "2 ostrom\n", - "3 hopper\n", - "4 bartik\n", - "Name: names, dtype: object" - ] - }, - "execution_count": 40, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "df[\"names\"].str.split().str.get(-1)" ] @@ -1479,79 +985,10 @@ }, { "cell_type": "code", - "execution_count": 41, + "execution_count": null, "id": "056147d6", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
namestags
0ada lovelaceuk; cs
1adam smithuk; econ
2elinor ostromusa; econ
3grace hopperusa; cs
4jean bartikusa; cs
\n", - "
" - ], - "text/plain": [ - " names tags\n", - "0 ada lovelace uk; cs\n", - "1 adam smith uk; econ\n", - "2 elinor ostrom usa; econ\n", - "3 grace hopper usa; cs\n", - "4 jean bartik usa; cs" - ] - }, - "execution_count": 41, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "df = pd.DataFrame(\n", " {\n", @@ -1578,91 +1015,10 @@ }, { "cell_type": "code", - "execution_count": 42, + "execution_count": null, "id": "a5cbc10f", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
cseconukusa
01010
10110
20101
31001
41001
\n", - "
" - ], - "text/plain": [ - " cs econ uk usa\n", - "0 1 0 1 0\n", - "1 0 1 1 0\n", - "2 0 1 0 1\n", - "3 1 0 0 1\n", - "4 1 0 0 1" - ] - }, - "execution_count": 42, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "df[\"tags\"].str.get_dummies(\";\")" ] diff --git a/vis-layers.ipynb b/vis-layers.ipynb index 062b285..6a92e84 100644 --- a/vis-layers.ipynb +++ b/vis-layers.ipynb @@ -42,51 +42,10 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "id": "a86fb211", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "\n", - "
\n", - " \n", - " " - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], + "outputs": [], "source": [ "import pandas as pd\n", "from lets_plot import *\n", @@ -108,156 +67,10 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": null, "id": "39a6d993", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
manufacturermodeldisplyearcyltransdrvctyhwyflclass
rownames
1audia41.819994auto(l5)f18.029.0pcompact
2audia41.819994manual(m5)f21.029.0pcompact
3audia42.020084manual(m6)f20.031.0pcompact
4audia42.020084auto(av)f21.030.0pcompact
5audia42.819996auto(l5)f16.026.0pcompact
\n", - "
" - ], - "text/plain": [ - " manufacturer model displ year cyl trans drv cty hwy fl \\\n", - "rownames \n", - "1 audi a4 1.8 1999 4 auto(l5) f 18.0 29.0 p \n", - "2 audi a4 1.8 1999 4 manual(m5) f 21.0 29.0 p \n", - "3 audi a4 2.0 2008 4 manual(m6) f 20.0 31.0 p \n", - "4 audi a4 2.0 2008 4 auto(av) f 21.0 30.0 p \n", - "5 audi a4 2.8 1999 6 auto(l5) f 16.0 26.0 p \n", - "\n", - " class \n", - "rownames \n", - "1 compact \n", - "2 compact \n", - "3 compact \n", - "4 compact \n", - "5 compact " - ] - }, - "execution_count": 2, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "mpg = pd.read_csv(\n", " \"https://vincentarelbundock.github.io/Rdatasets/csv/ggplot2/mpg.csv\", index_col=0\n", @@ -304,262 +117,20 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": null, "id": "fe77349a", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - " " - ], - "text/plain": [ - "" - ] - }, - "execution_count": 3, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "(ggplot(mpg, aes(x=\"displ\", y=\"hwy\", color=\"class\")) + geom_point())" ] }, { "cell_type": "code", - "execution_count": 4, + "execution_count": null, "id": "e77b5640", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - " " - ], - "text/plain": [ - "" - ] - }, - "execution_count": 4, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "(ggplot(mpg, aes(x=\"displ\", y=\"hwy\", shape=\"class\")) + geom_point())" ] @@ -574,262 +145,20 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": null, "id": "ef221330", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - " " - ], - "text/plain": [ - "" - ] - }, - "execution_count": 5, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "(ggplot(mpg, aes(x=\"displ\", y=\"hwy\", size=\"class\")) + geom_point())" ] }, { "cell_type": "code", - "execution_count": 6, + "execution_count": null, "id": "d042255e", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - " " - ], - "text/plain": [ - "" - ] - }, - "execution_count": 6, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "(ggplot(mpg, aes(x=\"displ\", y=\"hwy\", alpha=\"class\")) + geom_point())" ] @@ -852,130 +181,10 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": null, "id": "618edcb4", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - " " - ], - "text/plain": [ - "" - ] - }, - "execution_count": 7, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "(ggplot(mpg, aes(x=\"displ\", y=\"hwy\")) + geom_point(color=\"blue\"))" ] @@ -1041,263 +250,20 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": null, "id": "277a4c0f", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - " " - ], - "text/plain": [ - "" - ] - }, - "execution_count": 8, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "(ggplot(mpg, aes(x=\"displ\", y=\"hwy\")) + geom_point(size=4))" ] }, { "cell_type": "code", - "execution_count": 9, + "execution_count": null, "id": "07247ba9", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - " " - ], - "text/plain": [ - "" - ] - }, - "execution_count": 9, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "(ggplot(mpg, aes(x=\"displ\", y=\"hwy\")) + geom_smooth(method=\"loess\", size=2))" ] @@ -1332,207 +298,20 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": null, "id": "4b20c825", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - " " - ], - "text/plain": [ - "" - ] - }, - "execution_count": 10, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "(ggplot(mpg, aes(x=\"displ\", y=\"hwy\", line=\"drv\")) + geom_smooth(method=\"loess\"))" ] }, { "cell_type": "code", - "execution_count": 11, + "execution_count": null, "id": "84df3e78", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - " " - ], - "text/plain": [ - "" - ] - }, - "execution_count": 11, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "(ggplot(mpg, aes(x=\"displ\", y=\"hwy\", linetype=\"drv\")) + geom_smooth(method=\"loess\"))" ] @@ -1551,146 +330,10 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": null, "id": "c9e8d92f", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - " " - ], - "text/plain": [ - "" - ] - }, - "execution_count": 12, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "(\n", " ggplot(mpg, aes(x=\"displ\", y=\"hwy\", color=\"drv\"))\n", @@ -1719,143 +362,10 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": null, "id": "b3916558", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - " " - ], - "text/plain": [ - "" - ] - }, - "execution_count": 13, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "(ggplot(mpg, aes(x=\"displ\", y=\"hwy\")) + geom_point(aes(color=\"class\")) + geom_smooth())" ] @@ -1872,222 +382,10 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": null, "id": "38870eb5", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - " " - ], - "text/plain": [ - "" - ] - }, - "execution_count": 14, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "(\n", " ggplot(mpg, aes(x=\"displ\", y=\"hwy\"))\n", @@ -2144,149 +442,14 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": null, "id": "ae75c5c1", "metadata": { "tags": [ "remove-cell" ] }, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - " " - ], - "text/plain": [ - "" - ] - }, - "execution_count": 15, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "(\n", " ggplot(mpg, aes(x=\"displ\", y=\"hwy\", color=\"drv\"))\n", @@ -2307,136 +470,10 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": null, "id": "cb651300", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - " " - ], - "text/plain": [ - "" - ] - }, - "execution_count": 16, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "(ggplot(mpg, aes(x=\"displ\", y=\"hwy\")) + geom_point() + facet_wrap(\"cyl\"))" ] @@ -2451,138 +488,10 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": null, "id": "61481052", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - " " - ], - "text/plain": [ - "" - ] - }, - "execution_count": 17, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "(ggplot(mpg, aes(x=\"displ\", y=\"hwy\")) + geom_point() + facet_grid(\"drv\", \"cyl\"))" ] @@ -2599,139 +508,10 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": null, "id": "adcd9079", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - " " - ], - "text/plain": [ - "" - ] - }, - "execution_count": 18, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "(\n", " ggplot(mpg, aes(x=\"displ\", y=\"hwy\"))\n", @@ -2742,137 +522,10 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": null, "id": "ceb2a354", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - " " - ], - "text/plain": [ - "" - ] - }, - "execution_count": 19, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "(ggplot(mpg) + geom_point(aes(x=\"displ\", y=\"hwy\")) + facet_wrap(\"class\", nrow=2))" ] @@ -2962,141 +615,10 @@ }, { "cell_type": "code", - "execution_count": 20, + "execution_count": null, "id": "f379e31b", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
caratcutcolorclaritydepthtablepricexyz
rownames
10.23IdealESI261.555.03263.953.982.43
20.21PremiumESI159.861.03263.893.842.31
30.23GoodEVS156.965.03274.054.072.31
40.29PremiumIVS262.458.03344.204.232.63
50.31GoodJSI263.358.03354.344.352.75
\n", - "
" - ], - "text/plain": [ - " carat cut color clarity depth table price x y z\n", - "rownames \n", - "1 0.23 Ideal E SI2 61.5 55.0 326 3.95 3.98 2.43\n", - "2 0.21 Premium E SI1 59.8 61.0 326 3.89 3.84 2.31\n", - "3 0.23 Good E VS1 56.9 65.0 327 4.05 4.07 2.31\n", - "4 0.29 Premium I VS2 62.4 58.0 334 4.20 4.23 2.63\n", - "5 0.31 Good J SI2 63.3 58.0 335 4.34 4.35 2.75" - ] - }, - "execution_count": 20, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "diamonds = pd.read_csv(\n", " \"https://vincentarelbundock.github.io/Rdatasets/csv/ggplot2/diamonds.csv\",\n", @@ -3111,130 +633,10 @@ }, { "cell_type": "code", - "execution_count": 21, + "execution_count": null, "id": "d8faf1ab", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - " " - ], - "text/plain": [ - "" - ] - }, - "execution_count": 21, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "(ggplot(diamonds, aes(x=\"cut\")) + geom_bar())" ] @@ -3277,108 +679,10 @@ }, { "cell_type": "code", - "execution_count": 22, + "execution_count": null, "id": "ca772dd5", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - " " - ], - "text/plain": [ - "" - ] - }, - "execution_count": 22, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "(\n", " ggplot(\n", @@ -3402,258 +706,20 @@ }, { "cell_type": "code", - "execution_count": 23, + "execution_count": null, "id": "f8da7d91", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - " " - ], - "text/plain": [ - "" - ] - }, - "execution_count": 23, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "(ggplot(mpg, aes(x=\"drv\", color=\"drv\")) + geom_bar())" ] }, { "cell_type": "code", - "execution_count": 24, + "execution_count": null, "id": "088e7550", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - " " - ], - "text/plain": [ - "" - ] - }, - "execution_count": 24, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "(ggplot(mpg, aes(x=\"drv\", fill=\"drv\")) + geom_bar())" ] @@ -3669,130 +735,10 @@ }, { "cell_type": "code", - "execution_count": 25, + "execution_count": null, "id": "181c70d2", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - " " - ], - "text/plain": [ - "" - ] - }, - "execution_count": 25, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "(ggplot(mpg, aes(x=\"drv\", fill=\"class\")) + geom_bar())" ] @@ -3812,132 +758,10 @@ }, { "cell_type": "code", - "execution_count": 26, + "execution_count": null, "id": "a8e9c378", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - " " - ], - "text/plain": [ - "" - ] - }, - "execution_count": 26, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "(ggplot(mpg, aes(x=\"drv\", fill=\"class\")) + geom_bar(alpha=0.5, position=\"identity\"))" ] @@ -3955,131 +779,10 @@ }, { "cell_type": "code", - "execution_count": 27, + "execution_count": null, "id": "14205000", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - " " - ], - "text/plain": [ - "" - ] - }, - "execution_count": 27, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "(ggplot(mpg, aes(x=\"drv\", fill=\"class\")) + geom_bar(position=\"fill\"))" ] @@ -4095,131 +798,10 @@ }, { "cell_type": "code", - "execution_count": 28, + "execution_count": null, "id": "c33c4a03", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - " " - ], - "text/plain": [ - "" - ] - }, - "execution_count": 28, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "(ggplot(mpg, aes(x=\"drv\", fill=\"class\")) + geom_bar(position=\"dodge\"))" ] @@ -4236,129 +818,10 @@ }, { "cell_type": "code", - "execution_count": 29, + "execution_count": null, "id": "ba4161de", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - " " - ], - "text/plain": [ - "" - ] - }, - "execution_count": 29, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "(ggplot(mpg, aes(x=\"displ\", y=\"hwy\")) + geom_point())" ] @@ -4380,130 +843,10 @@ }, { "cell_type": "code", - "execution_count": 30, + "execution_count": null, "id": "414ce7af", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - " " - ], - "text/plain": [ - "" - ] - }, - "execution_count": 30, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "(ggplot(mpg, aes(x=\"displ\", y=\"hwy\")) + geom_point(position=\"jitter\"))" ] @@ -4535,129 +878,10 @@ }, { "cell_type": "code", - "execution_count": 31, + "execution_count": null, "id": "9bc38aef", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - " " - ], - "text/plain": [ - "" - ] - }, - "execution_count": 31, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "(ggplot(mpg, aes(x=\"cty\", y=\"hwy\")) + geom_point())" ] diff --git a/webscraping-and-apis.ipynb b/webscraping-and-apis.ipynb index 4bd60c2..802151d 100644 --- a/webscraping-and-apis.ipynb +++ b/webscraping-and-apis.ipynb @@ -15,7 +15,7 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "id": "51a55374", "metadata": { "tags": [ @@ -74,7 +74,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": null, "id": "a7f62293", "metadata": {}, "outputs": [], @@ -101,48 +101,10 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": null, "id": "06108a4d", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "shape: (10, 14)
rownamesnameyearmonthdayhourlatlongstatuscategorywindpressuretropicalstorm_force_diameterhurricane_force_diameter
i64stri64i64i64i64f64f64stri64i64i64strstr
1"Amy"1975627027.5-79.0"tropical depression"null251013nullnull
2"Amy"1975627628.5-79.0"tropical depression"null251013nullnull
3"Amy"19756271229.5-79.0"tropical depression"null251013nullnull
4"Amy"19756271830.5-79.0"tropical depression"null251013nullnull
5"Amy"1975628031.5-78.8"tropical depression"null251012nullnull
6"Amy"1975628632.4-78.7"tropical depression"null251012nullnull
7"Amy"19756281233.3-78.0"tropical depression"null251011nullnull
8"Amy"19756281834.0-77.0"tropical depression"null301006nullnull
9"Amy"1975629034.4-75.8"tropical storm"null351004nullnull
10"Amy"1975629634.0-74.8"tropical storm"null401002nullnull
" - ], - "text/plain": [ - "shape: (10, 14)\n", - "┌──────────┬──────┬──────┬───────┬───┬──────┬──────────┬─────────────────────┬─────────────────────┐\n", - "│ rownames ┆ name ┆ year ┆ month ┆ … ┆ wind ┆ pressure ┆ tropicalstorm_force ┆ hurricane_force_dia │\n", - "│ --- ┆ --- ┆ --- ┆ --- ┆ ┆ --- ┆ --- ┆ _diameter ┆ meter │\n", - "│ i64 ┆ str ┆ i64 ┆ i64 ┆ ┆ i64 ┆ i64 ┆ --- ┆ --- │\n", - "│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ str ┆ str │\n", - "╞══════════╪══════╪══════╪═══════╪═══╪══════╪══════════╪═════════════════════╪═════════════════════╡\n", - "│ 1 ┆ Amy ┆ 1975 ┆ 6 ┆ … ┆ 25 ┆ 1013 ┆ null ┆ null │\n", - "│ 2 ┆ Amy ┆ 1975 ┆ 6 ┆ … ┆ 25 ┆ 1013 ┆ null ┆ null │\n", - "│ 3 ┆ Amy ┆ 1975 ┆ 6 ┆ … ┆ 25 ┆ 1013 ┆ null ┆ null │\n", - "│ 4 ┆ Amy ┆ 1975 ┆ 6 ┆ … ┆ 25 ┆ 1013 ┆ null ┆ null │\n", - "│ 5 ┆ Amy ┆ 1975 ┆ 6 ┆ … ┆ 25 ┆ 1012 ┆ null ┆ null │\n", - "│ 6 ┆ Amy ┆ 1975 ┆ 6 ┆ … ┆ 25 ┆ 1012 ┆ null ┆ null │\n", - "│ 7 ┆ Amy ┆ 1975 ┆ 6 ┆ … ┆ 25 ┆ 1011 ┆ null ┆ null │\n", - "│ 8 ┆ Amy ┆ 1975 ┆ 6 ┆ … ┆ 30 ┆ 1006 ┆ null ┆ null │\n", - "│ 9 ┆ Amy ┆ 1975 ┆ 6 ┆ … ┆ 35 ┆ 1004 ┆ null ┆ null │\n", - "│ 10 ┆ Amy ┆ 1975 ┆ 6 ┆ … ┆ 40 ┆ 1002 ┆ null ┆ null │\n", - "└──────────┴──────┴──────┴───────┴───┴──────┴──────────┴─────────────────────┴─────────────────────┘" - ] - }, - "execution_count": 3, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "pl.read_csv(\n", " \"https://vincentarelbundock.github.io/Rdatasets/csv/dplyr/storms.csv\", n_rows=10\n", @@ -174,113 +136,10 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": null, "id": "6107093c", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "\n", - "
\n", - " \n", - " " - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\n", - " " - ], - "text/plain": [ - "" - ] - }, - "execution_count": 4, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "url = \"https://api.beta.ons.gov.uk/v1/data?uri=/employmentandlabourmarket/peopleinwork/employmentandemployeetypes/timeseries/jp9z/lms/previous/v108\"\n", "\n", @@ -343,60 +202,12 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": null, "id": "380ca743", "metadata": { "lines_to_next_cell": 2 }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/var/folders/j1/0d3dmg7d6gnbc_zwrh8t9gcw0000gn/T/ipykernel_61971/1219974464.py:18: MapWithoutReturnDtypeWarning: Calling `map_elements` without specifying `return_dtype` can lead to unpredictable results. Specify `return_dtype` to silence this warning.\n", - " .with_columns(pl.col(\"country\").map_elements(lambda x: textwrap.fill(x, 10)))\n" - ] - }, - { - "data": { - "text/html": [ - "
\n", - "shape: (5, 3)
economycountryEN.GHG.ALL.PC.CE.AR5
strstrf64
"USA""United\n", - "States"18.921098
"CHN""China"9.982534
"ECS""Europe &\n", - "Central\n", - "Asia"9.171446
"EAS""East Asia\n", - "& Pacific"8.435052
"IND""India"2.621464
" - ], - "text/plain": [ - "shape: (5, 3)\n", - "┌─────────┬───────────┬──────────────────────┐\n", - "│ economy ┆ country ┆ EN.GHG.ALL.PC.CE.AR5 │\n", - "│ --- ┆ --- ┆ --- │\n", - "│ str ┆ str ┆ f64 │\n", - "╞═════════╪═══════════╪══════════════════════╡\n", - "│ USA ┆ United ┆ 18.921098 │\n", - "│ ┆ States ┆ │\n", - "│ CHN ┆ China ┆ 9.982534 │\n", - "│ ECS ┆ Europe & ┆ 9.171446 │\n", - "│ ┆ Central ┆ │\n", - "│ ┆ Asia ┆ │\n", - "│ EAS ┆ East Asia ┆ 8.435052 │\n", - "│ ┆ & Pacific ┆ │\n", - "│ IND ┆ India ┆ 2.621464 │\n", - "└─────────┴───────────┴──────────────────────┘" - ] - }, - "execution_count": 5, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "# World Bank CO2 equivalent emissions (metric tons per capita)\n", "# https://data.worldbank.org/indicator/EN.GHG.ALL.PC.CE.AR5\n", @@ -424,170 +235,10 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": null, "id": "00379b93", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "\n", - "
\n", - " \n", - " " - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\n", - " " - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], + "outputs": [], "source": [ "lp.LetsPlot.setup_html()\n", "\n", @@ -735,21 +386,10 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": null, "id": "073d89e3", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "'\\n\\n\\n\\n\\n\\n\\n\\n\\n'" - ] - }, - "execution_count": 7, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "url = \"http://aeturrell.com/research\"\n", "page = requests.get(url)\n", @@ -769,35 +409,10 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": null, "id": "22f96be0", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - ">\n", - "
\n", - " \n", - " \n", - " \n", - " <\n" - ] - } - ], + "outputs": [], "source": [ "soup = BeautifulSoup(page.text, \"html.parser\")\n", "print(soup.prettify()[60000:60500])" @@ -814,21 +429,10 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": null, "id": "d82775de", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "

Blundell, Jack, Emma Duchini, Stefania Simion, and Arthur Turrell. \"Pay transparency and gender equality.\" American Economic Journal: Economic Policy (2024). doi: 10.1257/pol.20220766

" - ] - }, - "execution_count": 9, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "# Get all paragraphs\n", "all_paras = soup.find_all(\"p\")\n", @@ -847,21 +451,10 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": null, "id": "11321154", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "'Blundell, Jack, Emma Duchini, Stefania Simion, and Arthur Turrell. \"Pay transparency and gender equality.\" American Economic Journal: Economic Policy (2024). doi: 10.1257/pol.20220766'" - ] - }, - "execution_count": 10, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "all_paras[1].text" ] @@ -877,40 +470,10 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": null, "id": "feac1fdd", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "['Blundell, Jack, Emma Duchini, Stefania Simion, and Arthur Turrell. \"Pay transparency and gender equality.\" American Economic Journal: Economic Policy (2024). doi: 10.1257/pol.20220766',\n", - " 'Botta, Federico, Robin Lovelace, Laura Gilbert, and Arthur Turrell. \"Packaging code and data for reproducible research: A case study of journey time statistics.\" Environment and Planning B: Urban Analytics and City Science (2024): 23998083241267331. doi: 10.1177/23998083241267331',\n", - " 'Kalamara, Eleni, Arthur Turrell, Chris Redl, George Kapetanios, and Sujit Kapadia. \"Making text count: economic forecasting using newspaper text.\" Journal of Applied Econometrics 37, no. 5 (2022): 896-919. doi: 10.1002/jae.2907',\n", - " 'Turrell, A., Speigner, B., Copple, D., Djumalieva, J. and Thurgood, J., 2021. Is the UK’s productivity puzzle mostly driven by occupational mismatch? An analysis using big data on job vacancies. Labour Economics, 71, p.102013. doi: 10.1016/j.labeco.2021.102013',\n", - " 'Haldane, Andrew G., and Arthur E. Turrell. \"Drawing on different disciplines: macroeconomic agent-based models.\" Journal of Evolutionary Economics 29 (2019): 39-66. doi: 10.1007/s00191-018-0557-5',\n", - " 'Haldane, Andrew G., and Arthur E. Turrell. \"An interdisciplinary model for macroeconomics.\" Oxford Review of Economic Policy 34, no. 1-2 (2018): 219-251. doi: 10.1093/oxrep/grx051',\n", - " 'Braun-Munzinger, Karen, Z. Liu, and A. E. Turrell. \"An agent-based model of corporate bond trading.\" Quantitative Finance 18, no. 4 (2018): 591-608. doi: 10.1080/14697688.2017.1380310',\n", - " 'Turrell, A. E., M. Sherlock, and S. J. Rose. \"Efficient evaluation of collisional energy transfer terms for plasma particle simulations.\" Journal of Plasma Physics 82, no. 1 (2016): 905820107. doi: 10.1017/S0022377816000131',\n", - " 'Turrell, A. E., M. Sherlock, and S. J. Rose. \"Ultrafast collisional ion heating by electrostatic shocks.\" Nature Communications 6, no. 1 (2015): 8905. doi: 10.1038/ncomms9905',\n", - " 'Turrell, Arthur E., Mark Sherlock, and Steven J. Rose. \"Self-consistent inclusion of classical large-angle Coulomb collisions in plasma Monte Carlo simulations.\" Journal of Computational Physics 299 (2015): 144-155. doi: 10.1016/j.jcp.2015.06.034',\n", - " 'Turrell, Arthur E., Mark Sherlock, and Steven J. Rose. \"A Monte Carlo algorithm for degenerate plasmas.\" Journal of Computational Physics 249 (2013): 13-21. doi: 10.1016/j.jcp.2013.03.052',\n", - " 'Turrell, Arthur. \"Cutting through Complexity: How Data Science Can Help Policymakers Understand the World.\" In The Economy as a Complex Evolving System, Part IV. Sante Fe Institute, 2025. doi: https://doi.org/10.37911/9781947864665.11',\n", - " 'Duchini, Emma, Stefania Simion, and Arthur Turrell. \"A Review of the Effects of Pay Transparency.\" In Oxford Research Encyclopedia of Economics and Finance, Oxford University Press, 2024. doi: 10.1093/acrefore/9780190625979.013.860',\n", - " 'Turrell, Arthur, Bradley Speigner, Jyldyz Djumalieva, David Copple, and James Thurgood. \"6. Transforming Naturally Occurring Text Data into Economic Statistics.\" In Big Data for Twenty-First-Century Economic Statistics, pp. 173-208. University of Chicago Press, 2022. doi: 10.7208/chicago/9780226801391-008',\n", - " 'Turrell, Arthur. \"Agent-based models: understanding the economy from the bottom up\" In Quarterly Bulletin, Q4. Bank of England, 2016.',\n", - " 'Cohen, Samuel N., Giulia Mantoan, Lars Nesheim, Áureo de Paula, Arthur Turrell, and Lingyi Yang. Nowcasting using regression on signatures arXiv preprint arXiv:2305.10256v2 (2025).',\n", - " 'Van Dijcke, David, Marcus Buckmann, Arthur Turrell, and Tomas Key. \"Vacancy Posting, Firm Balance Sheets, and Pandemic Policy Interventions.\" Bank of England Staff Working Paper Series 1033 (2022).',\n", - " 'Draca, Mirko, Emma Duchini, Roland Rathelot, Arthur Turrell, and Giulia Vattuone. Revolution in Progress? The Rise of Remote Work in the UK. University of Warwick, Department of Economics, 2022.',\n", - " 'Hill, Edward, Marco Bardoscia, and Arthur Turrell. \"Solving heterogeneous general equilibrium economic models with deep reinforcement learning.\" arXiv arXiv:2103.16977 (2021).',\n", - " 'Turrell, Arthur, James Thurgood, David Copple, Jyldyz Djumalieva, and Bradley Speigner. \"Using online job vacancies to understand the UK labour market from the bottom-up.\" Bank of England Staff Working Papers 742 (2018).']" - ] - }, - "execution_count": 11, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "projects = soup.find_all(\"div\", class_=\"project-content listing-pub-info\")\n", "projects = [x.text.strip() for x in projects]\n", @@ -955,27 +518,10 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": null, "id": "0ada9ce7", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "shape: (3, 4)\n", - "┌─────┬────────────┬───────────┬──────────┐\n", - "│ # ┆ First Name ┆ Last Name ┆ Username │\n", - "│ --- ┆ --- ┆ --- ┆ --- │\n", - "│ i64 ┆ str ┆ str ┆ str │\n", - "╞═════╪════════════╪═══════════╪══════════╡\n", - "│ 1 ┆ Mark ┆ Otto ┆ @mdo │\n", - "│ 2 ┆ Jacob ┆ Thornton ┆ @fat │\n", - "│ 3 ┆ Larry ┆ the Bird ┆ @twitter │\n", - "└─────┴────────────┴───────────┴──────────┘\n" - ] - } - ], + "outputs": [], "source": [ "import polars as pl\n", "\n", diff --git a/whole-game.ipynb b/whole-game.ipynb index 5498d0e..c0b6e5f 100644 --- a/whole-game.ipynb +++ b/whole-game.ipynb @@ -13,7 +13,7 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "id": "51a55374", "metadata": { "tags": [ @@ -29,116 +29,14 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": null, "id": "209ef434", "metadata": { "tags": [ "remove-input" ] }, - "outputs": [ - { - "data": { - "image/svg+xml": [ - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "cluster_0\n", - "\n", - "Understand\n", - "\n", - "\n", - "\n", - "Import\n", - "\n", - "Import\n", - "\n", - "\n", - "\n", - "Clean\n", - "\n", - "Clean\n", - "\n", - "\n", - "\n", - "Import->Clean\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "Analyse\n", - "\n", - "Analyse\n", - "\n", - "\n", - "\n", - "Clean->Analyse\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "Visualise\n", - "\n", - "Visualise\n", - "\n", - "\n", - "\n", - "Visualise->Analyse\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "Transform\n", - "\n", - "Transform\n", - "\n", - "\n", - "\n", - "Analyse->Transform\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "Communicate\n", - "\n", - "Communicate\n", - "\n", - "\n", - "\n", - "Analyse->Communicate\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "Transform->Visualise\n", - "\n", - "\n", - "\n", - "\n", - "\n" - ], - "text/plain": [ - "" - ] - }, - "execution_count": 2, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "# | echo: false\n", "import graphviz\n", diff --git a/workflow-basics.ipynb b/workflow-basics.ipynb index 27b2168..64621de 100644 --- a/workflow-basics.ipynb +++ b/workflow-basics.ipynb @@ -26,19 +26,10 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "id": "23465996", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "0.15\n", - "44.666666666666664\n" - ] - } - ], + "outputs": [], "source": [ "print(1 / 200 * 30)\n", "print((59 + 73 + 2) / 3)" @@ -55,18 +46,10 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": null, "id": "bdd3c2c0", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "1.0\n" - ] - } - ], + "outputs": [], "source": [ "import numpy as np\n", "\n", @@ -84,18 +67,10 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": null, "id": "cead1264", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "12\n" - ] - } - ], + "outputs": [], "source": [ "x = 3 * 4\n", "print(x)" @@ -112,18 +87,10 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": null, "id": "91a44d83", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[1, 2, 3, 5, 7, 11, 13]\n" - ] - } - ], + "outputs": [], "source": [ "primes = [1, 2, 3, 5, 7, 11, 13]\n", "print(primes)" @@ -140,21 +107,10 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": null, "id": "1a526124", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "[3, 6, 9, 15, 21, 33, 39]" - ] - }, - "execution_count": 5, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "[element * 3 for element in primes]" ] @@ -198,21 +154,10 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": null, "id": "c061f9cc", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "[2, 4, 6, 10, 14, 22, 26]" - ] - }, - "execution_count": 6, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "# define primes\n", "primes = [1, 2, 3, 5, 7, 11, 13]\n", @@ -248,21 +193,10 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": null, "id": "e77540e0", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "[1, 2, 3, 5, 7, 11, 13]" - ] - }, - "execution_count": 7, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "primes" ] @@ -278,21 +212,10 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": null, "id": "9dd0fd49", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "list" - ] - }, - "execution_count": 8, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "type(primes)" ] @@ -350,21 +273,10 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": null, "id": "a5998cb5", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "[1, 2, 3, 5, 7, 11, 13]" - ] - }, - "execution_count": 9, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "primes" ] @@ -379,7 +291,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": null, "id": "ae55d78e", "metadata": {}, "outputs": [], @@ -401,7 +313,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": null, "id": "038c7d52", "metadata": {}, "outputs": [], @@ -457,21 +369,10 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": null, "id": "97af119d", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "42" - ] - }, - "execution_count": 12, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "sum(primes)" ] @@ -487,21 +388,10 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": null, "id": "14ce9b99", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "52" - ] - }, - "execution_count": 13, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "sum(primes, start=10)" ] @@ -517,26 +407,10 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": null, "id": "f5e45616", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Help on built-in function sum in module builtins:\n", - "\n", - "sum(iterable, /, start=0)\n", - " Return the sum of a 'start' value (default: 0) plus an iterable of numbers\n", - "\n", - " When the iterable is empty, return the start value.\n", - " This function is intended specifically for use with numeric values and may\n", - " reject non-numeric types.\n", - "\n" - ] - } - ], + "outputs": [], "source": [ "help(sum)" ] diff --git a/workflow-style.ipynb b/workflow-style.ipynb index 7a1a2a4..3de114b 100644 --- a/workflow-style.ipynb +++ b/workflow-style.ipynb @@ -148,40 +148,10 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "id": "f0f5bb37", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "shape: (3, 4)
col3col4col1col2
strstru32f64
"a""alpha"10.0
"b""gamma"20.0
"a""gamma"10.0
" - ], - "text/plain": [ - "shape: (3, 4)\n", - "┌──────┬───────┬──────┬──────┐\n", - "│ col3 ┆ col4 ┆ col1 ┆ col2 │\n", - "│ --- ┆ --- ┆ --- ┆ --- │\n", - "│ str ┆ str ┆ u32 ┆ f64 │\n", - "╞══════╪═══════╪══════╪══════╡\n", - "│ a ┆ alpha ┆ 1 ┆ 0.0 │\n", - "│ b ┆ gamma ┆ 2 ┆ 0.0 │\n", - "│ a ┆ gamma ┆ 1 ┆ 0.0 │\n", - "└──────┴───────┴──────┴──────┘" - ] - }, - "execution_count": 1, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "import polars as pl\n", "\n",