From e68b920fb7b2282f590f10295ba16c06a7c41b9e Mon Sep 17 00:00:00 2001 From: Timothy Wangwe Date: Fri, 18 Dec 2020 19:08:59 +0300 Subject: [PATCH 01/19] Add csv info and example. --- source/week-4/csv-files-jupyter/README.md | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/source/week-4/csv-files-jupyter/README.md b/source/week-4/csv-files-jupyter/README.md index 7ec63e7..56c22d1 100644 --- a/source/week-4/csv-files-jupyter/README.md +++ b/source/week-4/csv-files-jupyter/README.md @@ -1,6 +1,27 @@ # CSV Files and Jupyter Notebooks -CSV files are comma separated variable file. CSV files are frequently used to store data. In order to access the data in a CSV file from a Jupyter Notebook you must upload the file. +The so-called **CSV** (Comma Separated Values) format is the most common import and export format for spreadsheets and databases. The CSV format was used for many years prior to attempts to describe the format in a standardized way. + +Python has an in-built csv module which implements classes to read and write tabular data in CSV format. + +```python +# format example +>>> import csv +>>> with open('./airports.csv') as file: +... data = csv.reader(file) +... for row in data: +... print(*row) # * is used to unpack lists +Name City Country +Seattle-Tacoma Seattle USA +Dulles Washington USA +Heathrow London United Kingdom +Schiphol Amsterdam Netherlands +Changi Singapore Singapore +Pearson Toronto Canada +Narita Tokyo Japan +``` + +A this module has a lot more features, checkout [more details](https://docs.python.org/3/library/csv.html). ## Microsoft Learn Resources From 69d0855577e42695fbc75552558d156d08387fa0 Mon Sep 17 00:00:00 2001 From: Timothy Wangwe Date: Fri, 18 Dec 2020 19:25:59 +0300 Subject: [PATCH 02/19] Markdowon to comments, uppercases --- .../03 - Pandas Series and DataFrame.ipynb | 248 ++++-------------- source/week-4/intro-to-pandas/README.md | 4 +- 2 files changed, 53 insertions(+), 199 deletions(-) diff --git a/source/week-4/intro-to-pandas/03 - Pandas Series and DataFrame.ipynb b/source/week-4/intro-to-pandas/03 - Pandas Series and DataFrame.ipynb index e1802f1..0c84909 100644 --- a/source/week-4/intro-to-pandas/03 - Pandas Series and DataFrame.ipynb +++ b/source/week-4/intro-to-pandas/03 - Pandas Series and DataFrame.ipynb @@ -1,35 +1,53 @@ { "cells": [ { - "cell_type": "markdown", - "metadata": {}, "source": [ - "# pandas Series and DataFrame" - ] + "# Pandas Series and DataFrame" + ], + "cell_type": "markdown", + "metadata": {} }, { "cell_type": "markdown", "metadata": {}, "source": [ - "## pandas\n", - "**pandas** is an open source library providing data structures and data analysis tools for Python programmers" + "**Pandas** is an open source library providing data structures and data analysis tools for Python programmers. \n", + "The pandas **Series** is a one dimensional array, similar to a Python list" ] }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 1, "metadata": {}, - "outputs": [], + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Defaulting to user installation because normal site-packages is not writeable\n", + "Requirement already satisfied: pandas in /home/tim/.local/lib/python3.8/site-packages (1.1.4)\n", + "Requirement already satisfied: pytz>=2017.2 in /usr/lib/python3/dist-packages (from pandas) (2019.3)\n", + "Requirement already satisfied: python-dateutil>=2.7.3 in /home/tim/.local/lib/python3.8/site-packages (from pandas) (2.8.1)\n", + "Requirement already satisfied: numpy>=1.15.4 in /home/tim/.local/lib/python3.8/site-packages (from pandas) (1.19.4)\n", + "Requirement already satisfied: six>=1.5 in /home/tim/.local/lib/python3.8/site-packages (from python-dateutil>=2.7.3->pandas) (1.15.0)\n", + "\u001b[33mWARNING: You are using pip version 20.3.2; however, version 20.3.3 is available.\n", + "You should consider upgrading via the '/usr/bin/python3 -m pip install --upgrade pip' command.\u001b[0m\n" + ] + } + ], "source": [ - "import pandas as pd" + "# install pandas\n", + "! pip install pandas" ] }, { - "cell_type": "markdown", + "cell_type": "code", + "execution_count": 2, "metadata": {}, + "outputs": [], "source": [ - "## Series\n", - "The pandas **Series** is a one dimensional array, similar to a Python list" + "# load pandas into notebook\n", + "import pandas as pd" ] }, { @@ -38,6 +56,7 @@ "metadata": {}, "outputs": [ { + "output_type": "execute_result", "data": { "text/plain": [ "0 Seattle-Tacoma\n", @@ -50,9 +69,8 @@ "dtype: object" ] }, - "execution_count": 3, "metadata": {}, - "output_type": "execute_result" + "execution_count": 3 } ], "source": [ @@ -72,60 +90,42 @@ "airports" ] }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "You can reference an individual value in a Series using it's index" - ] - }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { + "output_type": "execute_result", "data": { "text/plain": [ "'London Heathrow'" ] }, - "execution_count": 4, "metadata": {}, - "output_type": "execute_result" + "execution_count": 4 } ], "source": [ + "# You can reference an individual value in a Series using it's index\n", "airports[2]" ] }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "You can use a loop to iterate through all the values in a Series" - ] - }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { - "name": "stdout", "output_type": "stream", + "name": "stdout", "text": [ - "Seattle-Tacoma\n", - "Dulles\n", - "London Heathrow\n", - "Schiphol\n", - "Changi\n", - "Pearson\n", - "Narita\n" + "Seattle-Tacoma\nDulles\nLondon Heathrow\nSchiphol\nChangi\nPearson\nNarita\n" ] } ], "source": [ + "# You can use a loop to iterate through all the values in a Series\n", "for value in airports:\n", " print(value) " ] @@ -135,9 +135,9 @@ "metadata": {}, "source": [ "## DataFrame\n", - "Most of the time when we are working with pandas we are dealing with two-dimensional arrays\n", "\n", - "The pandas **DataFrame** can store two dimensional arrays" + "Most of the time when we are working with pandas we are dealing with two-dimensional arrays. \n", + "The pandas **DataFrame** can store two dimensional arrays. " ] }, { @@ -146,78 +146,8 @@ "metadata": {}, "outputs": [ { + "output_type": "execute_result", "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
012
0Seatte-TacomaSeattleUSA
1DullesWashingtonUSA
2London HeathrowLondonUnited Kingdom
3SchipholAmsterdamNetherlands
4ChangiSingaporeSingapore
5PearsonTorontoCanada
6NaritaTokyoJapan
\n", - "
" - ], "text/plain": [ " 0 1 2\n", "0 Seatte-Tacoma Seattle USA\n", @@ -227,11 +157,11 @@ "4 Changi Singapore Singapore\n", "5 Pearson Toronto Canada\n", "6 Narita Tokyo Japan" - ] + ], + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
012
0Seatte-TacomaSeattleUSA
1DullesWashingtonUSA
2London HeathrowLondonUnited Kingdom
3SchipholAmsterdamNetherlands
4ChangiSingaporeSingapore
5PearsonTorontoCanada
6NaritaTokyoJapan
\n
" }, - "execution_count": 6, "metadata": {}, - "output_type": "execute_result" + "execution_count": 6 } ], "source": [ @@ -248,91 +178,14 @@ "airports" ] }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Use the **columns** parameter to specify names for the columns when you create the DataFrame" - ] - }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { + "output_type": "execute_result", "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
NameCityCountry
0Seatte-TacomaSeattleUSA
1DullesWashingtonUSA
2London HeathrowLondonUnited Kingdom
3SchipholAmsterdamNetherlands
4ChangiSingaporeSingapore
5PearsonTorontoCanada
6NaritaTokyoJapan
\n", - "
" - ], "text/plain": [ " Name City Country\n", "0 Seatte-Tacoma Seattle USA\n", @@ -342,14 +195,15 @@ "4 Changi Singapore Singapore\n", "5 Pearson Toronto Canada\n", "6 Narita Tokyo Japan" - ] + ], + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
NameCityCountry
0Seatte-TacomaSeattleUSA
1DullesWashingtonUSA
2London HeathrowLondonUnited Kingdom
3SchipholAmsterdamNetherlands
4ChangiSingaporeSingapore
5PearsonTorontoCanada
6NaritaTokyoJapan
\n
" }, - "execution_count": 7, "metadata": {}, - "output_type": "execute_result" + "execution_count": 7 } ], "source": [ + "# Use the **columns** parameter to specify names for the columns when you create the DataFrame\n", "airports = pd.DataFrame([\n", " ['Seatte-Tacoma', 'Seattle', 'USA'],\n", " ['Dulles', 'Washington', 'USA'],\n", @@ -382,9 +236,9 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.6.9" + "version": "3.8.5-final" } }, "nbformat": 4, "nbformat_minor": 2 -} +} \ No newline at end of file diff --git a/source/week-4/intro-to-pandas/README.md b/source/week-4/intro-to-pandas/README.md index cee037f..2184315 100644 --- a/source/week-4/intro-to-pandas/README.md +++ b/source/week-4/intro-to-pandas/README.md @@ -1,6 +1,6 @@ -# pandas +# Pandas -[pandas](https://pandas/pydata.org​) is an open source Python library contains a number of high performance data structures and tools for data analysis. +[Pandas](https://pandas/pydata.org​) is an open source Python library contains a number of high performance data structures and tools for data analysis. ## Documentation From 24a011774637767cb583324fcd7987e9edd77a4f Mon Sep 17 00:00:00 2001 From: Timothy Wangwe Date: Fri, 18 Dec 2020 19:29:22 +0300 Subject: [PATCH 03/19] Embed notebook screenshot. --- source/week-4/jupyter-notebooks/README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/source/week-4/jupyter-notebooks/README.md b/source/week-4/jupyter-notebooks/README.md index 363489c..3581879 100644 --- a/source/week-4/jupyter-notebooks/README.md +++ b/source/week-4/jupyter-notebooks/README.md @@ -2,6 +2,8 @@ Jupyter Notebooks are an open source web application that allows you to create and share Python code. They are frequently used for data science. The code samples in this course are completed using Jupyter Notebooks which have a .ipynb file extension. +![Jupyter Notebook](https://miro.medium.com/max/2544/1*ezJx8ZEu1Va14iscq_h5Gg.png) + ## Documentation - [Jupyter](https://jupyter.org/) to install Jupyter so you can run Jupyter Notebooks locally on your computer From 1acb432e54b531326afebf38acc8146e70095427 Mon Sep 17 00:00:00 2001 From: Timothy Wangwe Date: Fri, 18 Dec 2020 21:59:44 +0300 Subject: [PATCH 04/19] Add the describe function --- source/week-4/panda-dataframe-content/README.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/source/week-4/panda-dataframe-content/README.md b/source/week-4/panda-dataframe-content/README.md index aa35bb3..2662b3c 100644 --- a/source/week-4/panda-dataframe-content/README.md +++ b/source/week-4/panda-dataframe-content/README.md @@ -5,6 +5,7 @@ The pandas [DataFrame](https://pandas.pydata.org/pandas-docs/stable/reference/ap ## Common functions - [head](https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.head.html) returns the first *n* rows from the DataFrame +- [info](https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.info.html) provides a summary of the DataFrame content including column names, their datatypes, and number of rows containing non-null values +- [describe](https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.describe.html) Generate descriptive statistics include those that summarize the central tendency, dispersion and shape of a dataset’s distribution, excluding *NaN* values - [tail](https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.tail.html) returns the last *n* rows from the DataFrame - [shape](https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.shape.html) returns the dimensions of the DataFrame (e.g. number of rows and columns) -- [info](https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.info.html) provides a summary of the DataFrame content including column names, their datatypes, and number of rows containing non-null values From c538e699054d85c55a40cee8d66d9cf6c2e9e57f Mon Sep 17 00:00:00 2001 From: Timothy Wangwe Date: Sat, 19 Dec 2020 20:26:51 +0300 Subject: [PATCH 05/19] Add the describe function, edit markdown. --- ... Exploring pandas DataFrame contents.ipynb | 288 +++++------------- 1 file changed, 82 insertions(+), 206 deletions(-) diff --git a/source/week-4/panda-dataframe-content/04 - Exploring pandas DataFrame contents.ipynb b/source/week-4/panda-dataframe-content/04 - Exploring pandas DataFrame contents.ipynb index 0f50838..7900e35 100644 --- a/source/week-4/panda-dataframe-content/04 - Exploring pandas DataFrame contents.ipynb +++ b/source/week-4/panda-dataframe-content/04 - Exploring pandas DataFrame contents.ipynb @@ -5,8 +5,8 @@ "metadata": {}, "source": [ "# Examining pandas DataFrame contents\n", - "It's useful to be able to quickly examine the contents of a DataFrame. \n", "\n", + "It's useful to be able to quickly examine the contents of a DataFrame. \n", "Let's start by importing the pandas library and creating a DataFrame populated with information about airports" ] }, @@ -25,78 +25,8 @@ "metadata": {}, "outputs": [ { + "output_type": "execute_result", "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
NameCityCountry
0Seatte-TacomaSeattleUSA
1DullesWashingtonUSA
2HeathrowLondonUnited Kingdom
3SchipholAmsterdamNetherlands
4ChangiSingaporeSingapore
5PearsonTorontoCanada
6NaritaTokyoJapan
\n", - "
" - ], "text/plain": [ " Name City Country\n", "0 Seatte-Tacoma Seattle USA\n", @@ -106,11 +36,11 @@ "4 Changi Singapore Singapore\n", "5 Pearson Toronto Canada\n", "6 Narita Tokyo Japan" - ] + ], + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
NameCityCountry
0Seatte-TacomaSeattleUSA
1DullesWashingtonUSA
2HeathrowLondonUnited Kingdom
3SchipholAmsterdamNetherlands
4ChangiSingaporeSingapore
5PearsonTorontoCanada
6NaritaTokyoJapan
\n
" }, - "execution_count": 2, "metadata": {}, - "output_type": "execute_result" + "execution_count": 2 } ], "source": [ @@ -134,9 +64,10 @@ "metadata": {}, "source": [ "## Returning first *n* rows\n", - "If you have thousands of rows, you might just want to look at the first few rows\n", "\n", - "* **head**(*n*) returns the top *n* rows " + "If you have thousands of rows, you might just want to look at the first few rows\n", + "- **head**(*n*) returns the top *n* rows\n", + "- by default *i* is 5" ] }, { @@ -145,68 +76,24 @@ "metadata": {}, "outputs": [ { + "output_type": "execute_result", "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
NameCityCountry
0Seatte-TacomaSeattleUSA
1DullesWashingtonUSA
2HeathrowLondonUnited Kingdom
\n", - "
" - ], "text/plain": [ " Name City Country\n", "0 Seatte-Tacoma Seattle USA\n", "1 Dulles Washington USA\n", - "2 Heathrow London United Kingdom" - ] + "2 Heathrow London United Kingdom\n", + "3 Schiphol Amsterdam Netherlands\n", + "4 Changi Singapore Singapore" + ], + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
NameCityCountry
0Seatte-TacomaSeattleUSA
1DullesWashingtonUSA
2HeathrowLondonUnited Kingdom
3SchipholAmsterdamNetherlands
4ChangiSingaporeSingapore
\n
" }, - "execution_count": 3, "metadata": {}, - "output_type": "execute_result" + "execution_count": 3 } ], "source": [ - "airports.head(3)" + "airports.head()" ] }, { @@ -214,8 +101,10 @@ "metadata": {}, "source": [ "## Returning last *n* rows\n", + "\n", "Looking at the last rows in a DataFrame can be a good way to check that all your data loaded correctly\n", - "* **tail**(*n*) returns the last *n* rows" + "- **tail**(*n*) returns the last *n* rows\n", + "- by default i is 5" ] }, { @@ -224,68 +113,24 @@ "metadata": {}, "outputs": [ { + "output_type": "execute_result", "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
NameCityCountry
4ChangiSingaporeSingapore
5PearsonTorontoCanada
6NaritaTokyoJapan
\n", - "
" - ], "text/plain": [ - " Name City Country\n", - "4 Changi Singapore Singapore\n", - "5 Pearson Toronto Canada\n", - "6 Narita Tokyo Japan" - ] + " Name City Country\n", + "2 Heathrow London United Kingdom\n", + "3 Schiphol Amsterdam Netherlands\n", + "4 Changi Singapore Singapore\n", + "5 Pearson Toronto Canada\n", + "6 Narita Tokyo Japan" + ], + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
NameCityCountry
2HeathrowLondonUnited Kingdom
3SchipholAmsterdamNetherlands
4ChangiSingaporeSingapore
5PearsonTorontoCanada
6NaritaTokyoJapan
\n
" }, - "execution_count": 4, "metadata": {}, - "output_type": "execute_result" + "execution_count": 4 } ], "source": [ - "airports.tail(3)" + "airports.tail()" ] }, { @@ -293,9 +138,9 @@ "metadata": {}, "source": [ "## Checkign number of rows and columns in DataFrame\n", - "Sometimes you just need to know how much data you have in the DataFrame\n", "\n", - "* **shape** returns the number of rows and columns" + "Sometimes you just need to know how much data you have in the DataFrame\n", + "- **shape** returns the number of rows and columns" ] }, { @@ -304,14 +149,14 @@ "metadata": {}, "outputs": [ { + "output_type": "execute_result", "data": { "text/plain": [ "(7, 3)" ] }, - "execution_count": 5, "metadata": {}, - "output_type": "execute_result" + "execution_count": 5 } ], "source": [ @@ -322,14 +167,13 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## Getting mroe detailed information about DataFrame contents\n", - "\n", - "* **info**() returns more detailed information about the DataFrame\n", + "## Getting detailed information about DataFrame contents\n", "\n", + "**DataFrame.info**() returns more detailed information about the DataFrame \n", "Information returned includes:\n", - "* The number of rows, and the range of index values\n", - "* The number of columns\n", - "* For each column: column name, number of non-null values, the datatype\n" + "- The number of rows, and the range of index values\n", + "- The number of columns\n", + "- For each column: column name, number of non-null values, the datatype\n" ] }, { @@ -338,23 +182,55 @@ "metadata": {}, "outputs": [ { - "name": "stdout", "output_type": "stream", + "name": "stdout", "text": [ - "\n", - "RangeIndex: 7 entries, 0 to 6\n", - "Data columns (total 3 columns):\n", - "Name 7 non-null object\n", - "City 7 non-null object\n", - "Country 7 non-null object\n", - "dtypes: object(3)\n", - "memory usage: 148.0+ bytes\n" + "\nRangeIndex: 7 entries, 0 to 6\nData columns (total 3 columns):\n # Column Non-Null Count Dtype \n--- ------ -------------- ----- \n 0 Name 7 non-null object\n 1 City 7 non-null object\n 2 Country 7 non-null object\ndtypes: object(3)\nmemory usage: 296.0+ bytes\n" ] } ], "source": [ "airports.info()" ] + }, + { + "source": [ + "**DataFrame.describe()** returns statistical analyses about the DataFrame \n", + "Information returned might include:\n", + "- Count number of non-NA/null observations.\n", + "- Mean and Standard Deviation\n", + "- Minimum and Maximum values buy column\n", + "- Percentiles (25%, 50%, 75%)\n", + "\n", + "and many other values according to the DataFrame.\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " Name City Country\n", + "count 7 7 7\n", + "unique 7 7 6\n", + "top Changi Amsterdam USA\n", + "freq 1 1 2" + ], + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
NameCityCountry
count777
unique776
topChangiAmsterdamUSA
freq112
\n
" + }, + "metadata": {}, + "execution_count": 7 + } + ], + "source": [ + "airports.describe()" + ] } ], "metadata": { @@ -373,9 +249,9 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.6.9" + "version": "3.8.5-final" } }, "nbformat": 4, "nbformat_minor": 2 -} +} \ No newline at end of file From fb1c91929afb248736277d36aa07dcc15903f670 Mon Sep 17 00:00:00 2001 From: Timothy Wangwe Date: Sat, 19 Dec 2020 21:00:52 +0300 Subject: [PATCH 06/19] Markdown updates --- .../05 - Querying DataFrames.ipynb | 565 ++---------------- .../week-4/panda-dataframe-querry/README.md | 4 +- 2 files changed, 66 insertions(+), 503 deletions(-) diff --git a/source/week-4/panda-dataframe-querry/05 - Querying DataFrames.ipynb b/source/week-4/panda-dataframe-querry/05 - Querying DataFrames.ipynb index 95e8021..0124d51 100644 --- a/source/week-4/panda-dataframe-querry/05 - Querying DataFrames.ipynb +++ b/source/week-4/panda-dataframe-querry/05 - Querying DataFrames.ipynb @@ -1,15 +1,14 @@ { "cells": [ { - "cell_type": "markdown", - "metadata": {}, "source": [ "# Query a pandas DataFrame \n", "\n", - "Returning a portion of the data in a DataFrame is called slicing or dicing the data\n", - "\n", + "Returning a portion of the data in a DataFrame is called *slicing* or *dicing* the data. \n", "There are many different ways to query a pandas DataFrame, here are a few to get you started" - ] + ], + "cell_type": "markdown", + "metadata": {} }, { "cell_type": "code", @@ -22,82 +21,12 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 2, "metadata": {}, "outputs": [ { + "output_type": "execute_result", "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
NameCityCountry
0Seatte-TacomaSeattleUSA
1DullesWashingtonUSA
2London HeathrowLondonUnited Kingdom
3SchipholAmsterdamNetherlands
4ChangiSingaporeSingapore
5PearsonTorontoCanada
6NaritaTokyoJapan
\n", - "
" - ], "text/plain": [ " Name City Country\n", "0 Seatte-Tacoma Seattle USA\n", @@ -107,11 +36,11 @@ "4 Changi Singapore Singapore\n", "5 Pearson Toronto Canada\n", "6 Narita Tokyo Japan" - ] + ], + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
NameCityCountry
0Seatte-TacomaSeattleUSA
1DullesWashingtonUSA
2London HeathrowLondonUnited Kingdom
3SchipholAmsterdamNetherlands
4ChangiSingaporeSingapore
5PearsonTorontoCanada
6NaritaTokyoJapan
\n
" }, - "execution_count": 3, "metadata": {}, - "output_type": "execute_result" + "execution_count": 2 } ], "source": [ @@ -134,16 +63,18 @@ "metadata": {}, "source": [ "## Return one column\n", + "\n", "Specify the name of the column you want to return\n", - "* *DataFrameName*['*columnName*']\n" + "- `DataFrame['columnName']`\n" ] }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 3, "metadata": {}, "outputs": [ { + "output_type": "execute_result", "data": { "text/plain": [ "0 Seattle\n", @@ -156,9 +87,8 @@ "Name: City, dtype: object" ] }, - "execution_count": 4, "metadata": {}, - "output_type": "execute_result" + "execution_count": 3 } ], "source": [ @@ -170,80 +100,19 @@ "metadata": {}, "source": [ "## Return multiple columns\n", + "\n", "Provide a list of the columns you want to return\n", - "* *DataFrameName*[['*FirstColumnName*','*SecondColumnName*',...]]" + "- `DataFrame[['FirstColumn','SecondColumn']]`" ] }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 4, "metadata": {}, "outputs": [ { + "output_type": "execute_result", "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
NameCountry
0Seatte-TacomaUSA
1DullesUSA
2London HeathrowUnited Kingdom
3SchipholNetherlands
4ChangiSingapore
5PearsonCanada
6NaritaJapan
\n", - "
" - ], "text/plain": [ " Name Country\n", "0 Seatte-Tacoma USA\n", @@ -253,11 +122,11 @@ "4 Changi Singapore\n", "5 Pearson Canada\n", "6 Narita Japan" - ] + ], + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
NameCountry
0Seatte-TacomaUSA
1DullesUSA
2London HeathrowUnited Kingdom
3SchipholNetherlands
4ChangiSingapore
5PearsonCanada
6NaritaJapan
\n
" }, - "execution_count": 5, "metadata": {}, - "output_type": "execute_result" + "execution_count": 4 } ], "source": [ @@ -268,33 +137,28 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## Using *iloc* to specify rows and columns to return\n", - "**iloc**[*rows*,*columns*] allows you to access a group of rows or columns by row and column index positions." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "You specify the specific row and column you want returned\n", - "* First row is row 0\n", - "* First column is column 0" + "## Using *iloc*\n", + "\n", + "`iloc[row, column]` allows you to access a group of rows or columns by row and column index positions. \n", + "You specify the specific row and column you want returned:\n", + "- First row is row 0\n", + "- First column is column 0" ] }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 5, "metadata": {}, "outputs": [ { + "output_type": "execute_result", "data": { "text/plain": [ "'Seatte-Tacoma'" ] }, - "execution_count": 7, "metadata": {}, - "output_type": "execute_result" + "execution_count": 5 } ], "source": [ @@ -304,18 +168,18 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 6, "metadata": {}, "outputs": [ { + "output_type": "execute_result", "data": { "text/plain": [ "'United Kingdom'" ] }, - "execution_count": 8, "metadata": {}, - "output_type": "execute_result" + "execution_count": 6 } ], "source": [ @@ -323,91 +187,14 @@ "airports.iloc[2,2]" ] }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "A value of *:* returns all rows or all columns" - ] - }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 7, "metadata": {}, "outputs": [ { + "output_type": "execute_result", "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
NameCityCountry
0Seatte-TacomaSeattleUSA
1DullesWashingtonUSA
2London HeathrowLondonUnited Kingdom
3SchipholAmsterdamNetherlands
4ChangiSingaporeSingapore
5PearsonTorontoCanada
6NaritaTokyoJapan
\n", - "
" - ], "text/plain": [ " Name City Country\n", "0 Seatte-Tacoma Seattle USA\n", @@ -417,14 +204,15 @@ "4 Changi Singapore Singapore\n", "5 Pearson Toronto Canada\n", "6 Narita Tokyo Japan" - ] + ], + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
NameCityCountry
0Seatte-TacomaSeattleUSA
1DullesWashingtonUSA
2London HeathrowLondonUnited Kingdom
3SchipholAmsterdamNetherlands
4ChangiSingaporeSingapore
5PearsonTorontoCanada
6NaritaTokyoJapan
\n
" }, - "execution_count": 9, "metadata": {}, - "output_type": "execute_result" + "execution_count": 7 } ], "source": [ + "# Using : returns all rows or all columns\n", "airports.iloc[:,:]" ] }, @@ -433,66 +221,26 @@ "metadata": {}, "source": [ "You can request a range of rows or a range of columns\n", - "* [x:y] will return rows or columns x through y" + "- `[x:y]` will return rows or columns x through y" ] }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 8, "metadata": {}, "outputs": [ { + "output_type": "execute_result", "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
NameCityCountry
0Seatte-TacomaSeattleUSA
1DullesWashingtonUSA
\n", - "
" - ], "text/plain": [ " Name City Country\n", "0 Seatte-Tacoma Seattle USA\n", "1 Dulles Washington USA" - ] + ], + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
NameCityCountry
0Seatte-TacomaSeattleUSA
1DullesWashingtonUSA
\n
" }, - "execution_count": 10, "metadata": {}, - "output_type": "execute_result" + "execution_count": 8 } ], "source": [ @@ -502,74 +250,12 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 9, "metadata": {}, "outputs": [ { + "output_type": "execute_result", "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
NameCity
0Seatte-TacomaSeattle
1DullesWashington
2London HeathrowLondon
3SchipholAmsterdam
4ChangiSingapore
5PearsonToronto
6NaritaTokyo
\n", - "
" - ], "text/plain": [ " Name City\n", "0 Seatte-Tacoma Seattle\n", @@ -579,11 +265,11 @@ "4 Changi Singapore\n", "5 Pearson Toronto\n", "6 Narita Tokyo" - ] + ], + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
NameCity
0Seatte-TacomaSeattle
1DullesWashington
2London HeathrowLondon
3SchipholAmsterdam
4ChangiSingapore
5PearsonToronto
6NaritaTokyo
\n
" }, - "execution_count": 11, "metadata": {}, - "output_type": "execute_result" + "execution_count": 9 } ], "source": [ @@ -596,79 +282,17 @@ "metadata": {}, "source": [ "You can request a list of rows or a list of columns\n", - "* [x,y,z] will return rows or columns x,y, and z" + "- `[x,y,z]` will return rows or columns x, y & z" ] }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 10, "metadata": {}, "outputs": [ { + "output_type": "execute_result", "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
NameCountry
0Seatte-TacomaUSA
1DullesUSA
2London HeathrowUnited Kingdom
3SchipholNetherlands
4ChangiSingapore
5PearsonCanada
6NaritaJapan
\n", - "
" - ], "text/plain": [ " Name Country\n", "0 Seatte-Tacoma USA\n", @@ -678,11 +302,11 @@ "4 Changi Singapore\n", "5 Pearson Canada\n", "6 Narita Japan" - ] + ], + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
NameCountry
0Seatte-TacomaUSA
1DullesUSA
2London HeathrowUnited Kingdom
3SchipholNetherlands
4ChangiSingapore
5PearsonCanada
6NaritaJapan
\n
" }, - "execution_count": 12, "metadata": {}, - "output_type": "execute_result" + "execution_count": 10 } ], "source": [ @@ -694,79 +318,18 @@ "metadata": {}, "source": [ "## Using *loc* to specify columns by name\n", + "\n", "If you want to list the column names instead of the column positions use **loc** instead of **iloc**" ] }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 11, "metadata": {}, "outputs": [ { + "output_type": "execute_result", "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
NameCountry
0Seatte-TacomaUSA
1DullesUSA
2London HeathrowUnited Kingdom
3SchipholNetherlands
4ChangiSingapore
5PearsonCanada
6NaritaJapan
\n", - "
" - ], "text/plain": [ " Name Country\n", "0 Seatte-Tacoma USA\n", @@ -776,11 +339,11 @@ "4 Changi Singapore\n", "5 Pearson Canada\n", "6 Narita Japan" - ] + ], + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
NameCountry
0Seatte-TacomaUSA
1DullesUSA
2London HeathrowUnited Kingdom
3SchipholNetherlands
4ChangiSingapore
5PearsonCanada
6NaritaJapan
\n
" }, - "execution_count": 13, "metadata": {}, - "output_type": "execute_result" + "execution_count": 11 } ], "source": [ @@ -804,9 +367,9 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.6.9" + "version": "3.8.5-final" } }, "nbformat": 4, "nbformat_minor": 2 -} +} \ No newline at end of file diff --git a/source/week-4/panda-dataframe-querry/README.md b/source/week-4/panda-dataframe-querry/README.md index ea22708..2f12810 100644 --- a/source/week-4/panda-dataframe-querry/README.md +++ b/source/week-4/panda-dataframe-querry/README.md @@ -1,11 +1,11 @@ # Query a pandas DataFrame -The pandas [DataFrame](https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.html) is a structure for storing two-dimensional tabular data. +The Pandas [DataFrame](https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.html) is a structure for storing two-dimensional tabular data. ## Common properties - [loc](https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.loc.html) returns specific rows and columns by specifying column names -- [iloc](https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.iloc.html) returns specific rows and columns by specifying column positions +- [iloc](https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.iloc.html) returns specific rows and columns by specifying column positions(index) ## Microsoft Learn Resources From 45ed9799e053df995c294d8a6215be8a3facb508 Mon Sep 17 00:00:00 2001 From: Timothy Wangwe Date: Sun, 20 Dec 2020 18:50:42 +0300 Subject: [PATCH 07/19] Mark-up updates in notebook. --- .../07 - Read write CSV files.ipynb | 658 +++--------------- .../read-write-csv-pandas/MyNewCSVFile.csv | 8 + .../MyNewCSVFileNoIndex.csv | 8 + 3 files changed, 97 insertions(+), 577 deletions(-) create mode 100644 source/week-4/read-write-csv-pandas/MyNewCSVFile.csv create mode 100644 source/week-4/read-write-csv-pandas/MyNewCSVFileNoIndex.csv diff --git a/source/week-4/read-write-csv-pandas/07 - Read write CSV files.ipynb b/source/week-4/read-write-csv-pandas/07 - Read write CSV files.ipynb index 9e164e3..318f96b 100644 --- a/source/week-4/read-write-csv-pandas/07 - Read write CSV files.ipynb +++ b/source/week-4/read-write-csv-pandas/07 - Read write CSV files.ipynb @@ -23,18 +23,17 @@ "metadata": {}, "source": [ "## Reading a CSV file into a pandas DataFrame\n", - "**read_csv** allows you to read the contents of a csv file into a DataFrame\n", "\n", - "airports.csv contains the following: \n", + "`read_csv` allows you to read the contents of a csv file into a DataFrame.\n", "\n", - "Name,City,Country \n", - "Seattle-Tacoma,Seattle,USA \n", - "Dulles,Washington,USA \n", - "Heathrow,London,United Kingdom \n", - "Schiphol,Amsterdam,Netherlands \n", - "Changi,Singapore,Singapore \n", - "Pearson,Toronto,Canada \n", - "Narita,Tokyo,Japan" + "*airports.csv* contains the following:\n", + "\n", + ">Washington,USA \n", + ">Heathrow,London,United Kingdom \n", + ">Schiphol,Amsterdam,Netherlands \n", + ">Changi,Singapore,Singapore \n", + ">Pearson,Toronto,Canada \n", + ">Narita,Tokyo,Japan" ] }, { @@ -43,97 +42,25 @@ "metadata": {}, "outputs": [ { + "output_type": "execute_result", "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
NameCityCountry
0Seattle-TacomaSeattleUSA
1DullesWashingtonUSA
2HeathrowLondonUnited Kingdom
3SchipholAmsterdamNetherlands
4ChangiSingaporeSingapore
5PearsonTorontoCanada
6NaritaTokyoJapan
\n", - "
" - ], "text/plain": [ " Name City Country\n", "0 Seattle-Tacoma Seattle USA\n", "1 Dulles Washington USA\n", "2 Heathrow London United Kingdom\n", "3 Schiphol Amsterdam Netherlands\n", - "4 Changi Singapore Singapore\n", - "5 Pearson Toronto Canada\n", - "6 Narita Tokyo Japan" - ] + "4 Changi Singapore Singapore" + ], + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
NameCityCountry
0Seattle-TacomaSeattleUSA
1DullesWashingtonUSA
2HeathrowLondonUnited Kingdom
3SchipholAmsterdamNetherlands
4ChangiSingaporeSingapore
\n
" }, - "execution_count": 2, "metadata": {}, - "output_type": "execute_result" + "execution_count": 2 } ], "source": [ - "airports_df = pd.read_csv('Data/airports.csv')\n", - "airports_df" + "airports_df = pd.read_csv('./airports.csv')\n", + "airports_df.head()" ] }, { @@ -141,10 +68,10 @@ "metadata": {}, "source": [ "## Handling rows with errors\n", - "By default rows with an extra , or other issues cause an error\n", - "\n", - "Note the extra , in the row for Heathrow London in airportsInvalidRows.csv: \n", "\n", + "By default rows with an extra , or other issues cause an error. \n", + "Note the extra , in the row for Heathrow London in `airportsInvalidRows.csv`: \n", + ">\n", "Name,City,Country \n", "Seattle-Tacoma,Seattle,USA \n", "Dulles,Washington,USA \n", @@ -161,23 +88,21 @@ "metadata": {}, "outputs": [ { - "ename": "ParserError", - "evalue": "Error tokenizing data. C error: Expected 3 fields in line 4, saw 4\n", "output_type": "error", + "ename": "FileNotFoundError", + "evalue": "[Errno 2] No such file or directory: 'Data/airportsInvalidRows.csv'", "traceback": [ - "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[1;31mParserError\u001b[0m Traceback (most recent call last)", - "\u001b[1;32m\u001b[0m in \u001b[0;36m\u001b[1;34m\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m \u001b[0mairports_df\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mpd\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mread_csv\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m'Data/airportsInvalidRows.csv'\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 2\u001b[0m \u001b[0mairports_df\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", - "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\pandas\\io\\parsers.py\u001b[0m in \u001b[0;36mparser_f\u001b[1;34m(filepath_or_buffer, sep, delimiter, header, names, index_col, usecols, squeeze, prefix, mangle_dupe_cols, dtype, engine, converters, true_values, false_values, skipinitialspace, skiprows, skipfooter, nrows, na_values, keep_default_na, na_filter, verbose, skip_blank_lines, parse_dates, infer_datetime_format, keep_date_col, date_parser, dayfirst, cache_dates, iterator, chunksize, compression, thousands, decimal, lineterminator, quotechar, quoting, doublequote, escapechar, comment, encoding, dialect, error_bad_lines, warn_bad_lines, delim_whitespace, low_memory, memory_map, float_precision)\u001b[0m\n\u001b[0;32m 683\u001b[0m )\n\u001b[0;32m 684\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 685\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0m_read\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mfilepath_or_buffer\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mkwds\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 686\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 687\u001b[0m \u001b[0mparser_f\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m__name__\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mname\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", - "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\pandas\\io\\parsers.py\u001b[0m in \u001b[0;36m_read\u001b[1;34m(filepath_or_buffer, kwds)\u001b[0m\n\u001b[0;32m 461\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 462\u001b[0m \u001b[1;32mtry\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 463\u001b[1;33m \u001b[0mdata\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mparser\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mread\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mnrows\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 464\u001b[0m \u001b[1;32mfinally\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 465\u001b[0m \u001b[0mparser\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mclose\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", - "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\pandas\\io\\parsers.py\u001b[0m in \u001b[0;36mread\u001b[1;34m(self, nrows)\u001b[0m\n\u001b[0;32m 1152\u001b[0m \u001b[1;32mdef\u001b[0m \u001b[0mread\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mnrows\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;32mNone\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 1153\u001b[0m \u001b[0mnrows\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0m_validate_integer\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m\"nrows\"\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mnrows\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m-> 1154\u001b[1;33m \u001b[0mret\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_engine\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mread\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mnrows\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 1155\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 1156\u001b[0m \u001b[1;31m# May alter columns / col_dict\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", - "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\pandas\\io\\parsers.py\u001b[0m in \u001b[0;36mread\u001b[1;34m(self, nrows)\u001b[0m\n\u001b[0;32m 2046\u001b[0m \u001b[1;32mdef\u001b[0m \u001b[0mread\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mnrows\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;32mNone\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 2047\u001b[0m \u001b[1;32mtry\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m-> 2048\u001b[1;33m \u001b[0mdata\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_reader\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mread\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mnrows\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 2049\u001b[0m \u001b[1;32mexcept\u001b[0m \u001b[0mStopIteration\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 2050\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_first_chunk\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", - "\u001b[1;32mpandas\\_libs\\parsers.pyx\u001b[0m in \u001b[0;36mpandas._libs.parsers.TextReader.read\u001b[1;34m()\u001b[0m\n", - "\u001b[1;32mpandas\\_libs\\parsers.pyx\u001b[0m in \u001b[0;36mpandas._libs.parsers.TextReader._read_low_memory\u001b[1;34m()\u001b[0m\n", - "\u001b[1;32mpandas\\_libs\\parsers.pyx\u001b[0m in \u001b[0;36mpandas._libs.parsers.TextReader._read_rows\u001b[1;34m()\u001b[0m\n", - "\u001b[1;32mpandas\\_libs\\parsers.pyx\u001b[0m in \u001b[0;36mpandas._libs.parsers.TextReader._tokenize_rows\u001b[1;34m()\u001b[0m\n", - "\u001b[1;32mpandas\\_libs\\parsers.pyx\u001b[0m in \u001b[0;36mpandas._libs.parsers.raise_parser_error\u001b[1;34m()\u001b[0m\n", - "\u001b[1;31mParserError\u001b[0m: Error tokenizing data. C error: Expected 3 fields in line 4, saw 4\n" + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mFileNotFoundError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mairports_df\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mpd\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mread_csv\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'Data/airportsInvalidRows.csv'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2\u001b[0m \u001b[0mairports_df\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m~/.local/lib/python3.8/site-packages/pandas/io/parsers.py\u001b[0m in \u001b[0;36mread_csv\u001b[0;34m(filepath_or_buffer, sep, delimiter, header, names, index_col, usecols, squeeze, prefix, mangle_dupe_cols, dtype, engine, converters, true_values, false_values, skipinitialspace, skiprows, skipfooter, nrows, na_values, keep_default_na, na_filter, verbose, skip_blank_lines, parse_dates, infer_datetime_format, keep_date_col, date_parser, dayfirst, cache_dates, iterator, chunksize, compression, thousands, decimal, lineterminator, quotechar, quoting, doublequote, escapechar, comment, encoding, dialect, error_bad_lines, warn_bad_lines, delim_whitespace, low_memory, memory_map, float_precision)\u001b[0m\n\u001b[1;32m 686\u001b[0m )\n\u001b[1;32m 687\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 688\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0m_read\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfilepath_or_buffer\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mkwds\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 689\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 690\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m~/.local/lib/python3.8/site-packages/pandas/io/parsers.py\u001b[0m in \u001b[0;36m_read\u001b[0;34m(filepath_or_buffer, kwds)\u001b[0m\n\u001b[1;32m 452\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 453\u001b[0m \u001b[0;31m# Create the parser.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 454\u001b[0;31m \u001b[0mparser\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mTextFileReader\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfp_or_buf\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwds\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 455\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 456\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mchunksize\u001b[0m \u001b[0;32mor\u001b[0m \u001b[0miterator\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m~/.local/lib/python3.8/site-packages/pandas/io/parsers.py\u001b[0m in \u001b[0;36m__init__\u001b[0;34m(self, f, engine, **kwds)\u001b[0m\n\u001b[1;32m 946\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0moptions\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m\"has_index_names\"\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mkwds\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m\"has_index_names\"\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 947\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 948\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_make_engine\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mengine\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 949\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 950\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mclose\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m~/.local/lib/python3.8/site-packages/pandas/io/parsers.py\u001b[0m in \u001b[0;36m_make_engine\u001b[0;34m(self, engine)\u001b[0m\n\u001b[1;32m 1178\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m_make_engine\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mengine\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m\"c\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1179\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mengine\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;34m\"c\"\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1180\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_engine\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mCParserWrapper\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mf\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0moptions\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1181\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1182\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mengine\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;34m\"python\"\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m~/.local/lib/python3.8/site-packages/pandas/io/parsers.py\u001b[0m in \u001b[0;36m__init__\u001b[0;34m(self, src, **kwds)\u001b[0m\n\u001b[1;32m 2008\u001b[0m \u001b[0mkwds\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m\"usecols\"\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0musecols\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2009\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 2010\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_reader\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mparsers\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mTextReader\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0msrc\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwds\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2011\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0munnamed_cols\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_reader\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0munnamed_cols\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2012\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32mpandas/_libs/parsers.pyx\u001b[0m in \u001b[0;36mpandas._libs.parsers.TextReader.__cinit__\u001b[0;34m()\u001b[0m\n", + "\u001b[0;32mpandas/_libs/parsers.pyx\u001b[0m in \u001b[0;36mpandas._libs.parsers.TextReader._setup_parser_source\u001b[0;34m()\u001b[0m\n", + "\u001b[0;31mFileNotFoundError\u001b[0m: [Errno 2] No such file or directory: 'Data/airportsInvalidRows.csv'" ] } ], @@ -190,7 +115,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "Specify **error_bad_lines=False** to skip any rows with errors" + "Specify `error_bad_lines=False` to skip any rows with errors" ] }, { @@ -199,79 +124,15 @@ "metadata": {}, "outputs": [ { - "name": "stderr", "output_type": "stream", + "name": "stderr", "text": [ "b'Skipping line 4: expected 3 fields, saw 4\\n'\n" ] }, { + "output_type": "execute_result", "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
NameCityCountry
0Seattle-TacomaSeattleUSA
1DullesWashingtonUSA
2SchipholAmsterdamNetherlands
3ChangiSingaporeSingapore
4PearsonTorontoCanada
5NaritaTokyoJapan
\n", - "
" - ], "text/plain": [ " Name City Country\n", "0 Seattle-Tacoma Seattle USA\n", @@ -280,30 +141,25 @@ "3 Changi Singapore Singapore\n", "4 Pearson Toronto Canada\n", "5 Narita Tokyo Japan" - ] + ], + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
NameCityCountry
0Seattle-TacomaSeattleUSA
1DullesWashingtonUSA
2SchipholAmsterdamNetherlands
3ChangiSingaporeSingapore
4PearsonTorontoCanada
5NaritaTokyoJapan
\n
" }, - "execution_count": 4, "metadata": {}, - "output_type": "execute_result" + "execution_count": 4 } ], "source": [ - "airports_df = pd.read_csv(\n", - " 'Data/airportsInvalidRows.csv', \n", - " error_bad_lines=False\n", - " )\n", + "airports_df = pd.read_csv('./airportsInvalidRows.csv', error_bad_lines=False)\n", "airports_df" ] }, { - "cell_type": "markdown", - "metadata": {}, "source": [ "## Handling files which do not contain column headers\n", - "If your file does not have the column headers in the first row by default, the first row of data is treated as headers\n", - "\n", - "airportsNoHeaderRows.csv contains airport data but does not have a row specifying the column headers:\n", "\n", + "If your file does not have the column headers in the first row by default, the first row of data is treated as headers. \n", + "`airportsNoHeaderRows.csv` contains airport data but does not have a row specifying the column headers:\n", + ">\n", "Seattle-Tacoma,Seattle,USA \n", "Dulles,Washington,USA \n", "Heathrow,London,United Kingdom \n", @@ -311,7 +167,9 @@ "Changi,Singapore,Singapore \n", "Pearson,Toronto,Canada \n", "Narita,Tokyo,Japan " - ] + ], + "cell_type": "markdown", + "metadata": {} }, { "cell_type": "code", @@ -319,72 +177,8 @@ "metadata": {}, "outputs": [ { + "output_type": "execute_result", "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
Seattle-TacomaSeattleUSA
0DullesWashingtonUSA
1HeathrowLondonUnited Kingdom
2SchipholAmsterdamNetherlands
3ChangiSingaporeSingapore
4PearsonTorontoCanada
5NaritaTokyoJapan
\n", - "
" - ], "text/plain": [ " Seattle-Tacoma Seattle USA\n", "0 Dulles Washington USA\n", @@ -393,15 +187,15 @@ "3 Changi Singapore Singapore\n", "4 Pearson Toronto Canada\n", "5 Narita Tokyo Japan" - ] + ], + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
Seattle-TacomaSeattleUSA
0DullesWashingtonUSA
1HeathrowLondonUnited Kingdom
2SchipholAmsterdamNetherlands
3ChangiSingaporeSingapore
4PearsonTorontoCanada
5NaritaTokyoJapan
\n
" }, - "execution_count": 5, "metadata": {}, - "output_type": "execute_result" + "execution_count": 5 } ], "source": [ - "airports_df = pd.read_csv('Data/airportsNoHeaderRows.csv')\n", + "airports_df = pd.read_csv('./airportsNoHeaderRows.csv')\n", "airports_df" ] }, @@ -409,7 +203,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "Specify **header=None** if you do not have a Header row to avoid having the first row of data treated as a header row" + "Specify `header=None` if you do not have a Header row to avoid having the first row of data treated as a header row" ] }, { @@ -418,78 +212,8 @@ "metadata": {}, "outputs": [ { + "output_type": "execute_result", "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
012
0Seattle-TacomaSeattleUSA
1DullesWashingtonUSA
2HeathrowLondonUnited Kingdom
3SchipholAmsterdamNetherlands
4ChangiSingaporeSingapore
5PearsonTorontoCanada
6NaritaTokyoJapan
\n", - "
" - ], "text/plain": [ " 0 1 2\n", "0 Seattle-Tacoma Seattle USA\n", @@ -499,18 +223,15 @@ "4 Changi Singapore Singapore\n", "5 Pearson Toronto Canada\n", "6 Narita Tokyo Japan" - ] + ], + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
012
0Seattle-TacomaSeattleUSA
1DullesWashingtonUSA
2HeathrowLondonUnited Kingdom
3SchipholAmsterdamNetherlands
4ChangiSingaporeSingapore
5PearsonTorontoCanada
6NaritaTokyoJapan
\n
" }, - "execution_count": 6, "metadata": {}, - "output_type": "execute_result" + "execution_count": 6 } ], "source": [ - "airports_df = pd.read_csv(\n", - " 'Data/airportsNoHeaderRows.csv', \n", - " header=None\n", - " )\n", + "airports_df = pd.read_csv('./airportsNoHeaderRows.csv', header=None)\n", "airports_df" ] }, @@ -527,78 +248,8 @@ "metadata": {}, "outputs": [ { + "output_type": "execute_result", "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
NameCityCountry
0Seattle-TacomaSeattleUSA
1DullesWashingtonUSA
2HeathrowLondonUnited Kingdom
3SchipholAmsterdamNetherlands
4ChangiSingaporeSingapore
5PearsonTorontoCanada
6NaritaTokyoJapan
\n", - "
" - ], "text/plain": [ " Name City Country\n", "0 Seattle-Tacoma Seattle USA\n", @@ -608,19 +259,15 @@ "4 Changi Singapore Singapore\n", "5 Pearson Toronto Canada\n", "6 Narita Tokyo Japan" - ] + ], + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
NameCityCountry
0Seattle-TacomaSeattleUSA
1DullesWashingtonUSA
2HeathrowLondonUnited Kingdom
3SchipholAmsterdamNetherlands
4ChangiSingaporeSingapore
5PearsonTorontoCanada
6NaritaTokyoJapan
\n
" }, - "execution_count": 7, "metadata": {}, - "output_type": "execute_result" + "execution_count": 7 } ], "source": [ - "airports_df = pd.read_csv(\n", - " 'Data/airportsNoHeaderRows.csv', \n", - " header=None, \n", - " names=['Name', 'City', 'Country']\n", - " )\n", + "airports_df = pd.read_csv('./airportsNoHeaderRows.csv', names=['Name', 'City', 'Country'])\n", "airports_df" ] }, @@ -629,10 +276,10 @@ "metadata": {}, "source": [ "## Missing values in Data files\n", - "Missing values appear in DataFrames as **NaN**\n", - "\n", - "There is no city listed for Schiphol airport in airportsBlankValues.csv :\n", "\n", + "Missing values appear in DataFrames as `NaN` \n", + "There is no city listed for Schiphol airport in `airportsBlankValues.csv` :\n", + ">\n", "Name,City,Country \n", "Seattle-Tacoma,Seattle,USA \n", "Dulles,Washington,USA \n", @@ -649,78 +296,8 @@ "metadata": {}, "outputs": [ { + "output_type": "execute_result", "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
NameCityCountry
0Seattle-TacomaSeattleUSA
1DullesWashingtonUSA
2HeathrowLondonUnited Kingdom
3SchipholNaNNetherlands
4ChangiSingaporeSingapore
5PearsonTorontoCanada
6NaritaTokyoJapan
\n", - "
" - ], "text/plain": [ " Name City Country\n", "0 Seattle-Tacoma Seattle USA\n", @@ -730,15 +307,15 @@ "4 Changi Singapore Singapore\n", "5 Pearson Toronto Canada\n", "6 Narita Tokyo Japan" - ] + ], + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
NameCityCountry
0Seattle-TacomaSeattleUSA
1DullesWashingtonUSA
2HeathrowLondonUnited Kingdom
3SchipholNaNNetherlands
4ChangiSingaporeSingapore
5PearsonTorontoCanada
6NaritaTokyoJapan
\n
" }, - "execution_count": 8, "metadata": {}, - "output_type": "execute_result" + "execution_count": 8 } ], "source": [ - "airports_df = pd.read_csv('Data/airportsBlankValues.csv')\n", + "airports_df = pd.read_csv('./airportsBlankValues.csv')\n", "airports_df" ] }, @@ -747,7 +324,8 @@ "metadata": {}, "source": [ "## Writing DataFrame contents to a CSV file\n", - "**to_csv** will write the contents of a pandas DataFrame to a CSV file" + "\n", + "`to_csv` will write the contents of a pandas DataFrame to a CSV file." ] }, { @@ -756,78 +334,8 @@ "metadata": {}, "outputs": [ { + "output_type": "execute_result", "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
NameCityCountry
0Seattle-TacomaSeattleUSA
1DullesWashingtonUSA
2HeathrowLondonUnited Kingdom
3SchipholNaNNetherlands
4ChangiSingaporeSingapore
5PearsonTorontoCanada
6NaritaTokyoJapan
\n", - "
" - ], "text/plain": [ " Name City Country\n", "0 Seattle-Tacoma Seattle USA\n", @@ -837,11 +345,11 @@ "4 Changi Singapore Singapore\n", "5 Pearson Toronto Canada\n", "6 Narita Tokyo Japan" - ] + ], + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
NameCityCountry
0Seattle-TacomaSeattleUSA
1DullesWashingtonUSA
2HeathrowLondonUnited Kingdom
3SchipholNaNNetherlands
4ChangiSingaporeSingapore
5PearsonTorontoCanada
6NaritaTokyoJapan
\n
" }, - "execution_count": 9, "metadata": {}, - "output_type": "execute_result" + "execution_count": 9 } ], "source": [ @@ -854,16 +362,15 @@ "metadata": {}, "outputs": [], "source": [ - "airports_df.to_csv('Data/MyNewCSVFile.csv')" + "airports_df.to_csv('./MyNewCSVFile.csv')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "The index column is written to the csv file\n", - "\n", - "Specify **index=False** if you do not want the index column to be included in the csv file" + "The index column is written to the csv file. \n", + "Specify `index=False` if you do not want the index column to be included in the csv file." ] }, { @@ -872,10 +379,7 @@ "metadata": {}, "outputs": [], "source": [ - "airports_df.to_csv(\n", - " 'Data/MyNewCSVFileNoIndex.csv', \n", - " index=False\n", - " )" + "airports_df.to_csv('./MyNewCSVFileNoIndex.csv', index=False)" ] } ], @@ -895,9 +399,9 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.6.9" + "version": "3.8.5-final" } }, "nbformat": 4, "nbformat_minor": 2 -} +} \ No newline at end of file diff --git a/source/week-4/read-write-csv-pandas/MyNewCSVFile.csv b/source/week-4/read-write-csv-pandas/MyNewCSVFile.csv new file mode 100644 index 0000000..fb9d05b --- /dev/null +++ b/source/week-4/read-write-csv-pandas/MyNewCSVFile.csv @@ -0,0 +1,8 @@ +,Name,City,Country +0,Seattle-Tacoma,Seattle,USA +1,Dulles,Washington,USA +2,Heathrow,London,United Kingdom +3,Schiphol,,Netherlands +4,Changi,Singapore,Singapore +5,Pearson,Toronto,Canada +6,Narita,Tokyo,Japan diff --git a/source/week-4/read-write-csv-pandas/MyNewCSVFileNoIndex.csv b/source/week-4/read-write-csv-pandas/MyNewCSVFileNoIndex.csv new file mode 100644 index 0000000..19ff4c4 --- /dev/null +++ b/source/week-4/read-write-csv-pandas/MyNewCSVFileNoIndex.csv @@ -0,0 +1,8 @@ +Name,City,Country +Seattle-Tacoma,Seattle,USA +Dulles,Washington,USA +Heathrow,London,United Kingdom +Schiphol,,Netherlands +Changi,Singapore,Singapore +Pearson,Toronto,Canada +Narita,Tokyo,Japan From f849c2922470a460765421fc23cae5522dde2d1e Mon Sep 17 00:00:00 2001 From: Timothy Wangwe Date: Sun, 20 Dec 2020 18:57:51 +0300 Subject: [PATCH 08/19] Fix indentation in notebook. --- .../07 - Read write CSV files.ipynb | 49 +++++++++---------- 1 file changed, 23 insertions(+), 26 deletions(-) diff --git a/source/week-4/read-write-csv-pandas/07 - Read write CSV files.ipynb b/source/week-4/read-write-csv-pandas/07 - Read write CSV files.ipynb index 318f96b..99c81c6 100644 --- a/source/week-4/read-write-csv-pandas/07 - Read write CSV files.ipynb +++ b/source/week-4/read-write-csv-pandas/07 - Read write CSV files.ipynb @@ -71,15 +71,14 @@ "\n", "By default rows with an extra , or other issues cause an error. \n", "Note the extra , in the row for Heathrow London in `airportsInvalidRows.csv`: \n", - ">\n", - "Name,City,Country \n", - "Seattle-Tacoma,Seattle,USA \n", - "Dulles,Washington,USA \n", - "Heathrow,London,,United Kingdom \n", - "Schiphol,Amsterdam,Netherlands \n", - "Changi,Singapore,Singapore \n", - "Pearson,Toronto,Canada \n", - "Narita,Tokyo,Japan " + ">Name,City,Country \n", + ">Seattle-Tacoma,Seattle,USA \n", + ">Dulles,Washington,USA \n", + ">Heathrow,London,,United Kingdom \n", + ">Schiphol,Amsterdam,Netherlands \n", + ">Changi,Singapore,Singapore \n", + ">Pearson,Toronto,Canada \n", + ">Narita,Tokyo,Japan " ] }, { @@ -159,14 +158,13 @@ "\n", "If your file does not have the column headers in the first row by default, the first row of data is treated as headers. \n", "`airportsNoHeaderRows.csv` contains airport data but does not have a row specifying the column headers:\n", - ">\n", - "Seattle-Tacoma,Seattle,USA \n", - "Dulles,Washington,USA \n", - "Heathrow,London,United Kingdom \n", - "Schiphol,Amsterdam,Netherlands \n", - "Changi,Singapore,Singapore \n", - "Pearson,Toronto,Canada \n", - "Narita,Tokyo,Japan " + ">Seattle-Tacoma,Seattle,USA \n", + ">Dulles,Washington,USA \n", + ">Heathrow,London,United Kingdom \n", + ">Schiphol,Amsterdam,Netherlands \n", + ">Changi,Singapore,Singapore \n", + ">Pearson,Toronto,Canada \n", + ">Narita,Tokyo,Japan " ], "cell_type": "markdown", "metadata": {} @@ -279,15 +277,14 @@ "\n", "Missing values appear in DataFrames as `NaN` \n", "There is no city listed for Schiphol airport in `airportsBlankValues.csv` :\n", - ">\n", - "Name,City,Country \n", - "Seattle-Tacoma,Seattle,USA \n", - "Dulles,Washington,USA \n", - "Heathrow,London,United Kingdom \n", - "Schiphol,,Netherlands \n", - "Changi,Singapore,Singapore \n", - "Pearson,Toronto,Canada \n", - "Narita,Tokyo,Japan" + ">Name,City,Country \n", + ">Seattle-Tacoma,Seattle,USA \n", + ">Dulles,Washington,USA \n", + ">Heathrow,London,United Kingdom \n", + ">Schiphol,,Netherlands \n", + ">Changi,Singapore,Singapore \n", + ">Pearson,Toronto,Canada \n", + ">Narita,Tokyo,Japan" ] }, { From 3ccb73f586f707a6583105e0244e447dfc01a9ec Mon Sep 17 00:00:00 2001 From: Timothy Wangwe Date: Mon, 21 Dec 2020 13:12:59 +0300 Subject: [PATCH 09/19] Slice dataframe for demo, refactor markdown. --- .../15 - Visualizing correlations.ipynb | 94 +++++++++++-------- 1 file changed, 55 insertions(+), 39 deletions(-) diff --git a/source/week-5/data-visualization-matplotlib/15 - Visualizing correlations.ipynb b/source/week-5/data-visualization-matplotlib/15 - Visualizing correlations.ipynb index b5e35ca..a9b1818 100644 --- a/source/week-5/data-visualization-matplotlib/15 - Visualizing correlations.ipynb +++ b/source/week-5/data-visualization-matplotlib/15 - Visualizing correlations.ipynb @@ -2,29 +2,17 @@ "cells": [ { "cell_type": "markdown", - "execution_count": null, "metadata": {}, - "outputs": [], "source": [ - "# Visualizing data with matplotlib" - ] - }, - { - "cell_type": "markdown", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "Somtimes graphs provide the best way to visualize data\n", - "\n", - "The **matplotlib** library allows you to draw graphs to help with visualization\n", + "# Visualizing data with matplotlib\n", "\n", - "If we want to visualize data, we will need to load some data into a DataFrame" + "Somtimes graphs provide the best way to visualize data. The **matplotlib** library allows you to draw graphs to help with visualization. \n", + "If we want to visualize data, we will need to load some data into a DataFrame first." ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "metadata": {}, "outputs": [], "source": [ @@ -33,26 +21,24 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "# Load our data from the csv file\n", - "delays_df = pd.read_csv('Data/Lots_of_flight_data.csv') " + "delays_df = pd.read_csv('./Lots_of_flight_data.csv') " ] }, { "cell_type": "markdown", - "execution_count": null, "metadata": {}, - "outputs": [], "source": [ "In order to display plots we need to import the **matplotlib** library" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 3, "metadata": {}, "outputs": [], "source": [ @@ -61,30 +47,50 @@ }, { "cell_type": "markdown", - "execution_count": null, "metadata": {}, - "outputs": [], "source": [ - "A common plot used in data science is the scatter plot for checking the relationship between two columns\n", - "If you see dots scattered everywhere, there is no correlation between the two columns\n", - "If you see somethign resembling a line, there is a correlation between the two columns\n", + "A common plot used in data science is the scatter plot for checking the relationship between two columns. \n", + "If you see dots scattered everywhere, there is no correlation between the two columns. \n", + "If you see somethign resembling a line, there is a correlation between the two columns. \n", "\n", "You can use the plot method of the DataFrame to draw the scatter plot\n", - "* kind - the type of graph to draw\n", - "* x - value to plot as x\n", - "* y - value to plot as y\n", - "* color - color to use for the graph points\n", - "* alpha - opacity - useful to show density of points in a scatter plot\n", - "* title - title of the graph" + "- `kind` - the type of graph to draw\n", + "- `x` - value to plot as x\n", + "- `y` - value to plot as y\n", + "- `color` - color to use for the graph points\n", + "- `alpha` - opacity - useful to show density of points in a scatter plot\n", + "- `title` - title of the graph" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "# Slice the DataFrame to reduce the visual processing time and reduce the data points for visibility\n", + "delays_df = delays_df.iloc[0:1000, :]" + ] + }, + { + "cell_type": "code", + "execution_count": 5, "metadata": { "scrolled": true }, - "outputs": [], + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": "
", + "image/svg+xml": "\n\n\n\n \n \n \n \n 2020-12-21T12:58:36.089031\n image/svg+xml\n \n \n Matplotlib v3.3.3, https://matplotlib.org/\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYoAAAEWCAYAAAB42tAoAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/Il7ecAAAACXBIWXMAAAsTAAALEwEAmpwYAABPrUlEQVR4nO2deZycVZX3f6eX6iXdSXfSna2zEwgkLAEiBNnDYsAoiCOLDCIuOO+I2yvzistH1HHUmXdw1MHxFZQRUREVBSZiAFkkEQJJgATCkoTsnU56T3en0t3p7vv+8Xuu9+lK1VNV3bV11fl+PvWpqme9z32e55xzzzn3XjHGQFEURVFiUZTtAiiKoii5jSoKRVEUJRBVFIqiKEogqigURVGUQFRRKIqiKIGoolAURVECUUWhpAQR+bCIrBnF/n8SkRtTWaYEz/tNEWkVkf1pPEePiMxLwXGMiMxPRZninCepeykiO0XkYu/3l0TkJ+krnZINVFHkESLyQRFZ7wmmJk/4npPtckUiIl8TkV/4lxljLjPG3JvhcswC8HkAC40xU9N1HmNMlTFme7qOn0sYY75ljPlYvO1E5BkRibudkhuoosgTROR/A/gegG8BmAJgFoD/AnDFCI5VksiyPGAWgDZjTPNoDySkKGJZPtaZUogYY/Qzxj8AJgDoAfCBgG3KQEWyz/t8D0CZt+4CAHsBfAHAfgD3AfgagN8B+AWALgAf887zUwBNABoBfBNAsXeMDwNY4zvf9wHs8fbdAOBcb/lyAP0Ajnhl3ugtfwbAx7zfRQC+AmAXgGYAPwcwwVs3B4ABcCOA3QBaAXw5Tt38HECLd7yveMe/GMBhAENeOX4WZd9aACu9fTu83zN8658B8C8A/uoda75Xtk8C2Apgh7ed8dad6dVvse8Y7wOwyft9BoDnAXR6dXwngJBvWwNgfozrvAnAGwC6AWwH8AnfOnt/P+/VZxOAm3zrJwF4xLtXLwL4Z/+9jHKuG7y6bAPwZQA7AVzsrfsagF94v8vB56fNu6Z1oBHzLwAGAfR6dX9n0DPjO+5vvHvZDWAzgCW+9TMB/N67V232mN66j3h10wHgMQCzs/3OjrVP1gugnxTcRArfAQAlAdt8A8BaAJMB1AN4DsA/e+su8Pb/V1ChVHgv5hEAV4KCtQLAHwD8GMA47zgvWoGEoxXF33sCqMQTUPsBlHvr/iZMfNs/A6coPgJgG4B5AKo8AXCft24OKDDv9sp0CoA+ACfEuO6fA3gYQLW37xYAH/Vd996AOpsE4P0AKr39fwvgoYgy7wawyLvOUq9sTwCYCKDC2+5vAh7A2wAu8R3jtwBu836fDmCpd6w5nnD7rG/bIEXxbgDHABAA5wMIAzgt4v5+wyvj5d76Wm/9r0EhPA7AiaAREFVRAFgICvfzvGflu96xoymKTwD4H6/+ir3rGx95v5N4Znq9shcD+DaAtd66YgAbAfyHdw3lAM7x1l0BPksneMf9CoDnsv3OjrVP1gugnxTcROB6APvjbPM2gMt9/98FYKf3+wLQyi/3rf8agGd9/6eAArnCt+w6AE97vz8cS7h46zsAnOI7dpCieBLAP/rWLQCVlhWgBsMt+xcBXBvlnMXedS30LfsEgGd81x1TUUQ53mIAHRFl/kbENgbAsijLrKL4JoB7vN/VAA4hhoUL4LMA/hDtOAmU9SEAn/Fd52H4DAmwZbHUq6MjAI73rftWrHsJ4KsAfu37P86r42iK4iOgQXJylOP87X4n8cz82bduIYDD3u+zwJbEUYYSgD/BMwy8/0Wgkoxa5/qJ/tEYRX7QBqAujk98OugusOzylllajDG9Efvs8f2eDVqjTSLSKSKdYOticrSTicitIvKGiBz0tp0AoC6Ri4lR1hJQWVn8WUphsOURSZ1X5shjNSRSCBGpFJEfi8guEekC8CyAGhEp9m22J8qu0ZZZfgXgKhEpA3AVgJeMMbu88x0nIitFZL93vm8hwToTkctEZK2ItHv1fXnEvm3GmAHff1tn9WDd+svsr69Ipvu3NcYcAp+/aNwHunp+LSL7ROTfRKQ04BriPTOR97zce+ZnAtgVcX2W2QC+73tm28FWV0LPgEJUUeQHz4PW/pUB2+wDXxrLLG+ZxUTZx79sj3eOOmNMjfcZb4xZFLmTiJwL4P8AuBp0b9QAOAi+oLHOFa+sAwAOxNkvklbQWo48VmOC+38ebM2caYwZD7pbAHcdQPx6G77CmNdBQXwZgA+CisPyIwBvAjjWO9+XIs4VFU/pPAjg3wFM8er70UT2BS3xAVDYWmYFbN/k31ZEKkF30VEYY44YY75ujFkI4J0AVgD4kF0dcQ3xnpkg9gCYFcNQ2gO6R2t8nwpjzHMJHFfxUEWRBxhjDoIugR+KyJWeJVzqWZn/5m12P4CviEi9iNR52/8i1jGjnKMJwOMA7hCR8SJSJCLHiMj5UTavBoVPC4ASEfkqgPG+9QcAzInMEvJxP4DPichcEakCLesHYliMQWUeBH3v/yIi1SIyG8D/RuLXXQ26bDpFZCKA25M5fwC/AvAZUPH8NuJ8XQB6ROR4AP8rweOFwHhBC4ABEbkMwKWJ7OjV0e8BfM17bhaCiQKx+B2AFSJyjoiEwLhH1PsoIheKyEleC6wLVNpD3uoDYAzKEu+ZCeJFUIF9R0TGiUi5iJztrft/AL4oIou8Mk0QkQ8keFzFQxVFnmCMuQMUgl8BX7Y9AG4BfdUAfePrAWwC8CqAl7xlyfAhUCi9DvqPfwdgWpTtHgOwCgwc7wKDkH7XhhWObSLyUpT97wHdFs8C2OHt/6kky2r5FBgH2A5gDSik70lw3++BAfNWMBFg1QjLEMn9YMD5KWNMq2/5rWAroxsM1j+QyMGMMd0APg0qxQ7vGI8kUZ5bQDfUfgA/A/DfAefaDGZ1/QoUzh1gRlU0poLPSBcYmP8LeF8BZjj9nYh0iMgPEP+ZiYmn7N4DZpbt9spzjbfuD2CSxq89d95rYGtOSQLxAjyKoiiKEhVtUSiKoiiBqKJQFEVRAlFFoSiKogSiikJRFEUJZMwPWlZXV2fmzJmT7WIoiqKMKTZs2NBqjKlPZNsxryjmzJmD9evXZ7sYiqIoYwoRCeqBPwx1PSmKoiiBqKJQFEVRAlFFoSiKogSiikJRFEUJRBWFoiiKEogqCiUh+vuBzk5+K4pSWIz59Fgl/TQ2AqtWAQMDQEkJsHw50KDTvihKwaAtCiWQ/n4qiaoqYPp0fq9apS0LRSkkVFEogYTDbElUVvJ/ZSX/h8PZLZeiKJlDFYUSSGUl3U1WMYTD/G8Vh6Io+Y8qCiWQUIgxiZ4eYN8+fi9fzuWKohQGGsxW4tLQANxwA1sTlZWqJBSl0FBFoSREKKQKQlEKFXU9KYqiKIGoolAURVECUUWhKIqiBKKKQlEURQlEFYWiKIoSiCoKRVEUJRBVFIqiKEogqigURVGUQFRRKIqiKIGoolAURVECUUWhKIqiBKKKQlEURQlEFYWiKIoSiCoKRVEUJRBVFIqiKEogqigURVGUQNKqKERkpog8LSKvi8hmEfmMt3yiiDwhIlu971pvuYjID0Rkm4hsEpHT0lk+RVEUJT7pblEMAPi8MWYhgKUAPikiCwHcBuBJY8yxAJ70/gPAZQCO9T43A/hRmsunKIqixCGtisIY02SMecn73Q3gDQANAK4AcK+32b0ArvR+XwHg54asBVAjItPSWUZFURQlmIzFKERkDoBTAbwAYIoxpslbtR/AFO93A4A9vt32essij3WziKwXkfUtLS3pK3Se0N8PdHbyW1EUJVlKMnESEakC8CCAzxpjukTkb+uMMUZETDLHM8bcBeAuAFiyZElS+xYajY3AqlXAwABQUgIsXw40HKV6FUVRYpP2FoWIlIJK4pfGmN97iw9Yl5L33ewtbwQw07f7DG+ZMgL6+6kkqqqA6dP5vWqVtiwURUmOdGc9CYCfAnjDGPNd36pHANzo/b4RwMO+5R/ysp+WAjjoc1EpSRIOsyVRWcn/lZX8Hw5nt1yKoowt0u16OhvADQBeFZFXvGVfAvAdAL8RkY8C2AXgam/dowAuB7ANQBjATWkuX15TWUl3UzjM3+Ew/1vFoSiKkghpVRTGmDUAJMbqi6JsbwB8Mp1lKiRCIcYkVq1iMNvGKEKhbJdMUZSxREaC2Ur2aGgAbrjBtSpUSSiKkiyqKAqAUEgVhKIoI0fHelIURVECUUWhKIqiBKKKQlEURQlEFYWiKIoSiCoKRVEUJRBVFIqiKEogqigURVGUQFRRKIqiKIGoolAURVECUUWhKIqiBKKKQlEURQlEFYWiKIoSiCoKRVEUJRBVFIqiKEogqigURVGUQFRRKIqiKIGoolAURVECUUWhKIqiBKKKQlEURQlEFYWiKIoSiCoKRVEUJRBVFIqiKEogqigURVGUQFRRKIqiKIGoolAURVECUUWhKIqiBKKKQlEURQkkrYpCRO4RkWYRec237Gsi0igir3ify33rvigi20TkLRF5VzrLpiiKoiRGulsUPwOwPMry/zDGLPY+jwKAiCwEcC2ARd4+/yUixWkun6IoihKHtCoKY8yzANoT3PwKAL82xvQZY3YA2AbgjLQVTlEURUmIbMUobhGRTZ5rqtZb1gBgj2+bvd6yoxCRm0VkvYisb2lpSXdZFUVRCppsKIofATgGwGIATQDuSPYAxpi7jDFLjDFL6uvrU1w8RVEUxU/GFYUx5oAxZtAYMwTgbjj3UiOAmb5NZ3jLFEVRlCyScUUhItN8f98HwGZEPQLgWhEpE5G5AI4F8GKmy6coiqIMpySdBxeR+wFcAKBORPYCuB3ABSKyGIABsBPAJwDAGLNZRH4D4HUAAwA+aYwZTGf5FEVRlPiIMSbbZRgVS5YsMevXr892MRRFUcYUIrLBGLMkkW21Z7aiKIoSiCoKRVEUJRBVFIqiKEogqigURVGUQFRRKIqiKIEEKgoRWZqpgiiKkvv09wOdnfxWCod4/Sj+S0TWAfiCMaYzA+VRFCVHaWwEVq0CBgaAkhJg+XKgIepobEq+Ec/1tATAGwBeFJEbMlAeRVFykP5+KomqKmD6dH6vWqUti0IhUFEYY4aMMd8DcCWAO0WkW0S67HcmCpjv5HJTPttly/b5FUc4zJZEZSX/V1byfzic3XIpmSHuEB4i8lEAtwH4MoAfmrHelTuHyOWmfLbLlu3zK8OprOR9CIf5Oxzmf6s4lPwmXjD7OQAXATjXGHOnX0mISGm6C5fP5HJTPttly/b5laMJhaise3qAffv4vXw5lyv5T7wWxVeNMX+2f0REACwD8EEAKwBMSWPZ8ppoTfnOTi7P9ss3mrL19zurc6TXkct1U8g0NAA33DD6+6uMPQIVhVUSXprsB8FYxUQAnwRwa7oLl8/kclN+pGVLlbsol+um0AmFVEEUIvFcT98Ska0A/gXAJgCnAmgxxtxrjOnIRAHzDRugBXK3KT8SN0Mq3UX2/J2dwLZt/M6VulGUQiSe6+ljALaA05f+jzGmT0Q0mD1ColncudqUT9bNkEvuolS4v3L5fIqSaeIpimkALgFwHYDvicjTACpEpMQYM5D20uURfovbulNWraIwrqnJdumik4ybIZXuIltXNTVsnfjrKl55Mp0tpdlZSiEQrx/FoDFmlTHmRgDHAHgIwF8BNIrIrzJQvrwh3/PQU5kVM9K6ynS2lGZnKYVCwlOhGmP6ADwI4EERGQ/girSVKg8phABtqrJiRlpXmXZ/5ZK7TVHSyYhGjzXGdIEBbiVBCiUPPRSiy2g01zXSuvIrGCD9yjjT51OUbDHiObNFZI8xZmaKy5M0Y23ObA18Jo4/QyxR5aMxCkVJjGTmzE7Y9RQFzX5SRkSiyrKlJXkhnOlOYdoJTSkEAhWFiPwnoisEAVCTjgLlM2p9Jl4HQVli8YRxpjuFaSc0Jd+J16II8umMHX9PDjAawZcvJFMHGihWlNwh3hAe98ZaJyKjcVsVHCr4kquDQsgSU5SxQrwhPNb4ft8XsfrFtJQoT9EMmeTqoFCyxBRlLBCvVTDO93tRxDpJcVnyGiv4Vq2iFW3984Uk+JKtAw0UK0puEE9RBGU2adZTkqjgS74ONFCsKNknnqKoEZH3gS6qGhG5ylsuACaktWRK3hJP+GtfE0XJLeIpir8AeK/v93t8655NS4nyGE2PjY/WkaLkHvGynm4azcFF5B5wJrxmY8yJ3rKJAB4AMAfATgBXG2M6vNnzvg/gcgBhAB82xrw0mvPnEpoeGx+tI0XJTeKO9SQiC0TkDhH5o/f5dxE5LsHj/wzA8ohltwF40hhzLIAnvf8AcBmAY73PzeAcGHlDvo8eC7ghN0Y6emoh1JGijEXipceeBeAZAD0A7gJwN4BDAJ7xpkcNxBjzLID2iMVXALD9M+4Fp1e1y39uyFowJjItscvIffI9PbaxEbjvPuCBB/jd2Jj8MfK9jhRlrBKvRfFVANcZY243xjxsjHnIGHM7OJHR7SM85xRjTJP3ez+AKd7vBgB7fNvt9ZYdhYjcLCLrRWR9S0vLCIuRWfK5X0Cq5mXI5zpSlLFMvGD2McaYZyIXGmP+IiJ3jfbkxhgzkqlVjTF3gS0cLFmyZMyk6TY0ANdcA7S1AZMmUaDmA6nsda4pxIqSe8RTFN0B6w6N8JwHRGSaMabJcy01e8sbAfiHLZ/hLcsb8jWjJ9XDbWjfCUXJLeIpipki8oMoywUx3EIJ8AiAGwF8x/t+2Lf8FhH5NYAzARz0uajGPPmc0ZPOXufap0JRsk88RfFPAevijh4rIvcDuABAnYjsBeMa3wHwGxH5KIBdAK72Nn8UTI3dBqbHjio1N9fI90EBU+kyssqhowN46qn8a4HlI6rQ85sRjx7rR0T+0xjzqSj7Xxdjl4uibGsAfDKR841FCmE01FS4jKx7rrcXWLcOOOMMYObM/GqB5Rv56lJVHCOaMzsKZ6foOHmLZvTEx++emzgRKC0F3nwTOHJE+1TkKqnKeFNyG51TIoNoRk8wfvfckSNAeTmX9fXxf761wPKBfHepKiRVLQolQUIhoKZGX6Jo+N1zpaXAwoUUQu3t2gLLVbSTZGGQqhaFzk2hjJrI7KlQCLj9dqC2VltguYrOs1IYjFhRiMgsY8xu7+/3U1SevEezQ4JR99zYQ+9Z/hNXUXjjPTUAeNYY0ywiJ4MD+Z0Lr4OcMeZn6SxkvqDZIYmhHe7GHnrP8pt4gwL+XwD3AHg/gD+KyDcBPA7gBXCUVyUGkSOpanaIoihjlXgtincDONUY0ysiteCgfScaY3amvWRjgFhupGgth3HjNDtEUZSxSTxF0WuM6QUAb3KhraokSCw3UqyhOq65Jj0d7jTmoShKuomnKOaJyCO+/3P9/40x742yT94TNG5TrLzygYHUZ4dozENRlEwQT1FcEfH/jnQVZCwR1MkoaKiOmprUjoeU64MMamtHyST6vKWPeGM9/SXWOhHJy2E7EnnYgpRBvLzyeNkhiT7sud4jVls7SibR5y29BCoKESkGR3dtALDKGPOaiKwA8CUAFQBOTX8RM0eiD1s8ZTDSvPJkHvZcHmRwLLR2lPxBn7f0E8/19FOwr8SLAH4gIvsALAFwmzHmoTSXLaMk+7DFUwbJ5pUne/5c7hGb660dJb/Q5y39xFMUSwCcbIwZEpFycI7rY4wxbekvWmYZycOWyk5GIzl/rvaIzeXWjpJ/6POWfuINCthvjBkCAC9Ndns+Kgkg+4ObjfT8uTjIoA6prmQSfd7Sj3C+oBgrRcLgjHMAB/47xvsvAIaMMaekvYRxWLJkiVm/Pu5kewmR7YBYts+fatKRhaKZLUos9NlIDhHZYIxZktC2cRTF7GiLwbjFF40xl4+siKkjlYoCyP7Dlu3z5zL5pkgVJZskoyjipcfu8h30VAAfBPABADsAPDiaQuYqseIOmRLgOrhadDSzRVGyR7z02OMAXOd9WgE8ALZCLsxA2XIGtWSzj2a2KEr2iBfMfhPAMgArjDHnGGP+E8Bg+ouVO+ior7lBtpMNFKWQiacorgLQBOBpEblbRC5Cgc1mF82SHRhwAkvJDJrZoijZI16M4iEAD4nIOHDcp88CmCwiPwLwB2PM42kvYZbRHO3cIVf7jShKvhOvRQEAMMYcMsb8yhjzHgAzALwM4AtpLVmOoJZsMJETNKWbXOw3kgtk+j4ohUVgeuxYINXpsbHQtNWj0SB/bqD3QRkJyaTHJtSiUEZvyeabxadB/txA74OSCeKN9aSkgHy0+DRdNTfQ+6BkAm1RxGG0LYF8tfg0XTU30PugZIKstShEZCeAbrBfxoAxZomITAQ79c0BsBPA1caYjmyVMRUtgXy1+HJ5mPNCQu+Dkgmy7Xq60BjT6vt/G4AnjTHfEZHbvP9Zya5K1ZAR+ZxeG5SumsvB/1wu20jQtGEl3WRbUURyBYALvN/3AngGWVIUqWoJ5JrFl2ohGW1sqlyOyYymbLmsYHSMMCWdZFNRGACPi4gB8GNjzF0Aphhjmrz1+wFMyVbhUtkSyBWLLxMCPNmWWCaF72haibms/BQl3WQzmH2OMeY0AJcB+KSInOdfadjBI2onDxG5WUTWi8j6lpaWlBbKBq+B1Ha0y3ZHsUwF1ZMZ8qSxEbjvPuCBB/jd2JjaskTS2Ql0dQGlpfHL5idfExIUJVGy1qIwxjR6380i8gcAZwA4ICLTjDFNIjINQHOMfe8CcBfADnepKlM0qzEXWgKpIFNB9URbYpkeNryxEVi5EnjpJeDNN4GlS11Z47US8zUhQVESJSstChEZJyLV9jeASwG8BuARADd6m90I4OFMlSmW1Qgk3xLIxc51mUqjTHTIk0wOtmjvbU0NcNFFXPbUU7xHibQS/XV35AhgG7H5kJCgKImQrRbFFAB/EBFbhl8ZY1aJyDoAvxGRjwLYBeDqTBUoVVajbZX09gJDQ8CKFcDcuWkpclJkMqieSEwmVsujpITlS2ULzn9vKyuBZcuAt98G3v3uxOIMtu7uvx/YuJHLTjmFCkPjFEohkBVFYYzZDuCo+baNMW0ALsp8iVITvLaWa38/3Ru9vcCGDcDtt+eGsshkUD1eFk40xXXKKYxXpDpg7L+34TCwdi2///hHKvJEzlFfD1RXAxdcANTWsmWhM+wphYL2zPZIxSix4TCVw+uvAxUVwNSpFFArV+aOGypaUD1brjKruK65hp+NG+MHjEdSVntvOzvpcgLogqqpSTwobV1ikyczGK7zkiiFRK71o8gqo7W4KyvpburtpRDq6+OyoqLcDXxmO+3Ttjw6O+O7/kZT1oYG4H3v4/Fmz3aZT4m6F/O546SixENbFBGMJo01FKIr48gRoKmJCuP444Hy8sQFSqas+/5+oLmZrZ1cSPuMF2xPRYpqTQ0wfjzvT7RzWKLdA52XRClktEWRYubOZUxi5Uq2JMrKEhcombLu7Xm6upguetFFLtCbrbTPeMH2VCQbJBLQD7oHudJxUlEyjSqKNNDQQJ87kHjrJB39Cvy9ngFnQdvz1NYy6P7888C73kVLO9PuFH8ZgwRxqlw//nOUlFAh9PfzXIncAx0qQylEVFHEwQoyK1TiWZIjbRWkulOXvxzd3VxWXc3rOXSIQVmAHc+eegrYtYtumUy6U4Lqyrqg/AJ6JOm90YYICYWY2hp57nHj4t+DXB7vSVHShSqKAKwga2mh5X388UyTjCX8R9MqSGWw1F+O0lJg/Xq2Fi6/nOs2bABmzqRiqKwEzjmHgd5MDjESq66WLaPiSoXrJ5YiinXua64JvgfZDvwrSrbQYHYMrDApL2fwcsIECoqysthB1NH0Nk5FsNQGYf0ZRE1NwLZtwI4dwJ//zOXHHw+0t7vzrFjBFkYmLeRoddXbGz+4nmiyQVDwO9Z9GhiIfg+A3Ar8K0qm0RZFDKwwKSri96RJFK7FxU74Rwqr0bYKErGYY7k+/NYuQHdTVxf7dBQXc9uqKsYjTjsNuP76xFxp6SJaXQ0Nsb5T4X4LcuUF3aeamuH3oKWFAxbawP+yZdkP/Ctjk7HstixoRRF54/z/rTAZGuJ3VxeF2OBgbOGfimEyIoOl/jJF86vHcqV0d9MK7uykmwmgxR4OA+eey23j1Uc6iVZXK1bQ7ZQK91uQMoh3n+w98NerDfyvXZu9wL8ydhnrbsuCVRSRN+6UU9gz2H8jrTCZPt3FKPr6nFCJJliT9aMHCedorYSZM4+Of1jrubSU7pKyMgauL7mEx58yhfGIzk4KuPnz49dHvAc5FUolWl2lajyqeMdK5D5FtkrOOgt48snsBP7TSaoMhLFsMaeTTI+UnA4KUlFE3riuLuDOO4GLL6av3n8jI1Mp7UsQJFj9uf/+/5EEHSOyjM3NVGTz5nF9pCuluxtYt84de+pU4IknAGOAZ55xgfgVK44uT7IPciqto8gWVCr7KsQ7VrxU18hWSUVFdgL/6SRV93KsW8zpJB+GqS/IYHbkjSsqoqVdXMz//iC0DZ5WVTnhEK+XcCIT8sQ7RmQZa2v5bSdVinTLdHYCW7fys3kz8Ne/8noWLaICHDeOWT3RXt5YwV0bGPcHbDMxiU8qJ3kabU/7yOB2ooH/XBxqPpJU3Uud2CmYeKMOjAUKskURaSkODdFtMzjI9fFuZJCFAARb57Z53t8fbGVElvHIEbrHjhyh0PK7Upqb6Q45/XTGMV54gd/9/cBVVwHHHce+E9aFFa8+bIzjD3/ger+FmA/WUTLYzpNtbUxoiBbbiWSsWNepupeF9kwkS5AbdKy46wpSUUS7cbfcQtdOpBCORlCgNOil8QejAQrjWIFbfxlbW6nM3v9+uo9iCa2hIfaZ6O2ly6mtDfj974Gbbw5WfJH1YampOVrZ+a+9tNRtP9L+Hrn+kowkdjNW/NGp6ruTyj5A+Uo0N+hYMSiAAlUUQPQbd/LJiQmueIHSWBPyRMtMsu6daMqpoYHpmHbcqAcf5PLq6uEPVk0NWxvr1lE5jBtHF4B9GBsbgRtvDL4mf33097M1EU3Z1dSkZhKfVLwk6VY0IxH6Y8m6TlXyQCqTEEbCWDA4gOExsbFkUAAFrCiA6MHM/n5+4vm1YwVKY700tkOXdSMNDTE4+r73OVdT5DARJSVMF7VzIDz2GNfb9Ez/g3XddVy2di0fvjlz+L+oiO6nRGdys83hIAtxtJP4pOIlyYQ1NhKhP9as61QlD2RrwMSxZJX7GUsGBVDgisJPY+PRVvJ11w3PQoqW/RTtpkZ7aazw3bOHqbb2Qbn8cjfuki2HffD94zL19DA4LcIU3aqq4Q9WQwPwqU+xRfHCC3RXtbdzNNu//IWCPdEXKJ6FaGMxtbUsS1nZ0Z0Qg6y80b4kmbLGRiL0s21dj4REs/QSOU4mr3OsWeV+xppBUbCKInJk1ZUrmTE0bRr/b93KZTfd5GILiY75FI1QiG6kr3+drYOKCmDhQrYY/IHulSu5vr6e5bPjMlVUuGB7WVn0B6uqCvjc54Df/c6lxJ53HrdJ9gUKCuLadNyXXnKK89hjXVniWXmjfUkyZY2NVOiPteHI1SrPPGPNoChIRdHYSIFsX+Rzz3XCqqyM2xQXc1lnJ29mcTGwfTv9//v2UXjbgeQih8KIPL6dl7m2FnjHO4CJE3me0lIeyz7Y27YBa9a4GMSpp7pxmUIhCmOACivag9Xfz/K9//10bc2ezeV9fa5XdipdO3Y2vyJfknVPDwPoEyeyU1o0Ky+RlyRai8TfqsuUNTZSoZ9p63qk5IpVPpI4w1izyiMZSwZFwSmK/n66mLZuddZwRweF88AAhSpA672yktvv3k3BuXUrX6jqagrJlhbgv/+bN9gKu/r6o4/f0kKFYvez6bh+odfcDDz9NM9prfe1a48elwmI/mBF9uIeHAT27z/azVVTE1w3tkyRwmPlStfRzB4vFKJrTIQKY9s29lxeu5atkFNPBerqXMDexmIA168j2nhTkddy7rlsUflHlbU96TNhjY0VoT8ScsEqH2mLZqxZ5dEYK89WwSmKzk4KmGnTaNX39bGD2q23AocPD49RnH468PDDwOOPU1ANDbG/wpEjzv1z5ZXOcl65kj13X3oJmDGD2+zcCdxzDzvAdXayhRAOM8Nq6lSe54EH3KBzp57KkV6HhrhdtHGZEulZ3dHBAQDLy492cwFHK5tYsRG77Zo1/B4/nmV68022JNrauO3mzVQYs2ezNSECvPwycOaZw/tk+OfGsC+2X3n5ryUcptJ59lme66yz2JILh3mfYimaTDJWMm5ikW2rfLQtmrFklY9lCk5RxGLaNODTnx7eL+CBByhkjz2W1vmuXRT+NTVUFnaAwJ4eftasAQ4coKAfN47H2LCBbqvOTgrVP/2JwrSvD/jnfwaee47HsONJvfUWpybt7qYQnD8/+kx1/pcimlVYVkZlZBWWdXO98YYTvOXlFNQTJgx3F3V1sdwzZvBYa9dy/8pKXstjjzGravVq/i8v5/W+9BIwa5az9tvb2VICWGelpdwGiJ65Za+lt5f/161j2QYHWQdvvknlaq3egYHgFlIqiaYQRmIJ55piybZVnooWzVixyscyBacobJ+DrVsp5AYH+d+mw1rXin2A6+vdEB6HDgEnnsjjnHceheuf/kRB+eqr3HbOHFriTz/N/Q4coPW8c6cTKDU1VBq/+AWwdy//FxUBCxbQCt+1i8dcsWJ4J71o1nhDQ3Sr0L54g4NUSraV8e//zmOXl7OVcf/9rIf164e7i44/ntu3tDBTq6ODSiwUAs44g0pl1iz+3759uFKYPp29wQG6q/74R5anp4flNCZ65hbA86xbx3Lv3ctylJW5FlxfX+ZHbo2mEOrrk7eEczVonE2rPNstGksmFHiuGQnJUHCKwvY5sMHmUAi48EKuizZaa00Nhefq1a6PxXnnUdgVFfGh3rcP2LKFgq2picc1hscQoSAuLua+fX1cJ0JlNWECrXGAgnjBAh7TxlKGhlwQ+/nnaZVfdtnR1ni0IbubmjjYoe1PMWUKBe7UqSzHa69RYZ15JsvhdxfV19O109xMhWbTYA8e5ARIP/kJ93/7bS4vL2errK2NrZING4Dbb6fStILAptEC0TO3+vvpHjvzTCrewUHOp3HDDbyGF16gQrItoUy8bLFcI+95D1s+tl9MPEs4V4LGsciWVZ7tFg2QGQWeq0ZCohScogB4g266icHX1avdJ3IYb9tzuqODgvId76CgPHCAwv3007n997/PB3twkK0B20qZNo1CJBx2iqK0lMHxqioqjHe+k0JxYIAWtzGMlTzxBM/f3g588IMU2Dt2sPzHHQccc8zwvguRViHAB3PpUl5DWRkt9ZoaJ/TfftuVJRymMunr47muvprLm5p4Pf6MpcFBrrv1VuDb36Z7budO1s/hw/zu62OWl18Q2DRaIHrmlm3FzZhBpbZoEV1z3d1UPrffzmNGWmTptNRiuUaamlifpaWudeYP1id6nLGQypluUtGiGekzkAkFnutGQiIUpKKwPP+8G88o2jDe1dXAu9/NzKbKSlrc1oK8/nrgt7+loG1v53E6OrhvOMwOb4ODdFeddBKweDGH4OjpobU+YwaF6eHDFPxFRVRC3d20qg8fpnI5dIgj0L7nPVQi+/cDjzxCl8yCBcMFk98qbG5mbGH1agooEb6Qp59OZdbayhZQfT2vobeXx54/f/jDW1HBz8SJrq9HezuXTZtG67+sjOUwhr8rKlxdAdGVWLSX2rakDh5kC6eujq23oGG9022pRXONAKzbM85wWWUvvEBFlq4e27EEYS64M+xIucDoRuodaflH8wxkQoHng5FQsIoi8ub5h/G2nd1KvNrZsmV4ltSWLfTbd3czMLxvHwW9nSa1r4/CeN8+7tPZyVZDWRmt5LPO4nl/9Svgu9+lgC0tpfLo6KDPf/ZsbjNxIoX+7t3uPAcO0H0UiRUaHR2Mnzz0EK+hvJzKbdMm4Oyz2QpqbaXV3tJCwTw46MpRXu4snsmTOUz5s8+681x8MY/3wAO81o0bGZfYt48KrLf3aPdBpCCIfEFs35PWVuDFF1lPtbXMsIolfKylZhXV4GBqLTVbn+eeywC+TQA491wq4JkznRuvvd09Q9EYjYslliDMBXdGvBENRks8RThaaz0TMZJ0nSOTRkLBKorIm3fkCIXTwYO05q0fvLSU2w8OUgAeOcLvRx+lEJ8+nQKirY3xhL4+fh8+7Czs5mb2yq6r4/+33mJLobGRAeGWFlr3PT10Re3Ywf9z5/L4hw9TSM2Zwxeiu5uB54oKZ+FaN9rAAF0iU6fyGmwWUU0N/2/cSGWxdy//V1fTet+8mftu3kxFs3Qp3SvTpgH/8A883sGDdLmdfz5ThmtqqEhmzqSgvPVWN+d1ss1/2/fEYpMAVq9myy+aEAyHXbDdutNs+uxoXxwrhFtaWCezZ7Oezj6b9yzy2Skvj//ij8TFEksQXnNNZtwZQcLIjiQQa0SDVN2DIEU4Wms9EzGSWOcA+H8kgj7TRkLOKQoRWQ7g+wCKAfzEGPOddJwn8uZ1d7PCQyEK+mXLKBissFy92qXB1tXR6j/uOH5PmMCU0qoqCsyBASqJoSH+b2118YFwmALXdljbuZPbWou+pYVW+datFH6HDlF5tLTwtzHACSc4hXTgAF1RL77I4590Eo//5pu8rsOHeV12Yqbqaiq24mJXPpvdNGGCc7H9/vdscdjRYj/+caeMnniCweqLL+b248c7V9a8ebEtP7/A8f+3fVuqqniMw4fZ7+SjH+XxhoaiC8GSEpajo8Mds7nZtQRHir+lsmULn4EtW6gEn3uO9XH66SyzHQI+2syBsZ67ZIRCLEHY1pZ+d0Y8YWSzA+2IBnbgy+ZmPn/z5iU2f0c04rUUUtlLPxNZX5HnaGmhS3kkgj4bMY+cUhQiUgzghwAuAbAXwDoRecQY83o6zmdvXmcnO4T551+45x4K8d5epo52dPD3wACFQ2UlBXJLC7cfGqIQBvjbZkgBFMpvvEGFEw5TAPf10RIWYatlaIjKZt8+Kq3ubu5vDIPOAwPcZsYMLp86lYHhz32OyqKzk62M559nrAGg8rGfvj6ex3aU276dAnBggIJZhMt7e53SmDDBzVZ2zTU89uAg66WxEbj3XsYPWluZDmw7+N1yC7BkiavnaPOTb9jA6y0poStncJAtKX9a76pVDBKXlbFlZYWgFRI9PdxOhIpQhP/D4eECKtkmur/n+Y4dvH+7d7s41FlnUUmceSZblsXFtKJXrGArMJJ4/WCCiOW2mDSJ311dvG9DQ6l1mSQijCor+bHvxI4dfH6ffprvTG3t0c9CogS1FPwp46nqpZ+JrC97jtEK+mzEPHJKUQA4A8A2Y8x2ABCRXwO4AkBaFIV9gS32prW28sYddxwtt6Yml9JaVETLvrycArO21rmampqin2dwkJZ7by/3PXKEAsffkhChQGxtpZIoK+NNt30nJk2iYGxpYV+O2lrgy1+mwDeGwmrvXh6vqIjH6+/nN+Cmez1wgC4zex0iVFwHDrhMLKv4+vuHW7C9vXSbjRvHuvnzn4E77uD2s2c74XXnnfxUVR39Uhw8CPzrv/L8trzPPku32p49vI6+Ph6nqIitFdvZbmiI6bKrV/OawmGn5Gwd2uu1jKSJboXz4cP82GC/LdeWLXRxPfkkz9vcTBecTQn2Kwv/+WP1gwkimtti2TIeb+FC4Mc/5n0tLaVQzmQANhSicuzo4AgGIu5ZGBzkc3LnnYzDJeuSjKUgow0vk4pe+pn0949W0Gej70muKYoGAHt8//cCODNyIxG5GcDNADBr1qwRnci+wL29rn/Dvn3sQNfTQ6FvBZAxR8//29tLwTV+vBu7qbOT+wwNHX0+m+E0NERh2d9PoThvHm9wUxMfksZGvlT2nFaZDA7Soh0aopB++GEep6/PjbV05IgTmBarROx84J2dFNDhMLc7fJhKoKKC5w6FqKTq613PaOu62rqV5Zw5k8Ly0CGW8fBhKqzDh9mr/NAhBvMbGo6e8tUY7tvd7cp+4AD9/jNnOvefjcV0d7Nup09nC2bdOq4/5xwqy+ZmCt5QiOexAslm4qxcGX2mvqAX0grn3/2Oz8LAAOvHdvQ7dIjJCePGse5sZ8KKCp7vE5842nJMpFd6LPwt3337GB/q7WWm1ZIlbKkODlJgnnxy/OP19MSf2jVRYdTQAHzkI+49efRRNyx+RQXLe/fdvAepGMfJP6+LLedoe+lHGhPLlkVPw04VoxX0odDwCc0y0a8o1xRFQhhj7gJwFwAsWbLEJLu/fYH7+mghh8MUXi+/7KxwO62oFZKRWKG8fXtk2YLPW1TEl8cqkwMH2O+gt5dC0U5qZAcntPs1N/NBqKigsNy9m8LHxkP882EPDQ0f0dUqGos9lrXau7vdXBezZ9OlsnMns6QWLuR5li+ngOzooCDo7nb9Pqy7p7GRgqK2FvjmN6nQJk6ke8QOaXLggIu3lJdz/yNHqLxuu41pp/39tM7PO49CsK+Pw6aPG8frLi9n8Pu976VC6+mhkqqoYHrv7t10g9nxs5Yt47lLS7mss3P4HCCR9ygc5vfQEI/f0sJydnc7RXfBBfzf20tFZd1oRUXOMvRbjnY+kaEh1mFt7dFzeATR0sIstmefdZ04d+9mfV57LV2R/pGIY7F+Pfv99PayHv/xHxnzihSKyQR5Dx3ie2Sn3y0t5b06dIjP2tSpfA6SdbFEix309AxPoR6tNR3Z4t27l1MBvOMdTgCnOkhs63blSt6zysrEY1wA37OnnnIux2XL0p/tlmuKohHATN//Gd6ylGIzgd56yz3UO3a4TmL2gbdWUjT8gjgRBgZoqVsBVFrqOtcNDvKF/cIXKOyA4YoC4DaHD3O/tjY31lQodPS2Q0Oxyz00NPylGxri73HjnJXjP97Bgwy62Z7bInSj+Vs7/vK1tLhMrp07ecyqKuCVV7id391mP4ODvJ7Zs3n9Dz7IDoWbNrmh0ydPZhC9vZ31WF1NhXnggAvCDwxQiD/9NBMOamvpslq7lpb3+vVO6Nih3/34W5nr1jELbmiIymfHDrrojAE+8AEed9YsusGsYr7gguHZT37LsayMymTPHlcP/jk8grBZYZs3U7B0dLhWUn8/y3r++fEFZk8P3X4dHSxPUxPdVR/6EO9RpFBMJMhre9OfcYbLDtuyxSVVnHcelYStj9GM42Tvz6FDNCT888KM1Jq2ytx6BF57jfVo3bPpDBLb+GDQsaMlgVjFNnky1/nntEkXSYq7tLMOwLEiMldEQgCuBfBIqk9iX7DGRgaZN2xw/nrr9wb4v7r6aL834LZJNMOmpISulVDIWe/2xu7aRUEaClHQhELDz1lczBe7uJgCdd48ug2OHOEDYzu42f2s6yoWFRW08ubP5wtRUcHytbdTkP3P/7hYSF+fc//09jo3yoQJRyvLoiIKZ2v19/RQyIfDLhX3sstYD7Y1cegQj7tzJxMKPv5xuv9Wr6ayGDcOuOIK13I75hgK5uZmHt/GRewwJbYnvBWYixYx7mOnkb3oIgrZVauGuxP9L+DEibzu3/+eSrm1lXU/fToF4uzZrLu33qKwmjyZLp+336Z15/fhL1/uXJl9fbz2aEaGdZVFujgBlxVWW8v6GBig4urqoqLcv5/3LprA9B+3qYn3xhoFhw5RmJeVuaSFaOcPwgraigreT9up85ZbGJ+YP5/n6Onh90itf3t/ystZ/xde6IaqH401XVnJ5/vRR+nSe+UVGi62b45t9aUSq/g3bWILZtMm/o+s+8ZGGmkPPMDvxsbo8Y10lDGSnGpRGGMGROQWAI+B6bH3GGM2p/o81vXx8ssUetOmUXh0dbkKt64hG6eIhd/lE4RNG7TpqraPRXk5raL9+13w1LqTLH7XUW8vLeTiYj4kDQ0UPK++6qxZG3eJFiuxCrK52XVSswJ7cJDn7u1lPGLiROdWevVVZ/2Xlbl+HENDbhRdq0wACs2hIacw7PAW5eUUqoODVEo2XffEE+muKi9n3OfwYQqGSy91L5EVttOmUUiccgrTVXfvdrEZ28dhzx5a2tu2sfxTp7J3vB1JN9Lt438Bw2Fax3a4d3vNg4Pc15+AUFPDz+LFPG9kpztrldtEB+tKKytzGXORWTyx3B0iVFabNnHbceMoNI8cYSsnshNmpO/95JOdkiku5jNgB1yMZu3HmoDLjxVYa9e6LLmSEir288/nPbLjjY0m4G7rad8+dz3Tpyf+/gXR2cnnfWiIRkF9vTtnOoLE0aY6sFlb1iUa1H8mG4Mo5pSiAABjzKMAHk3X8e0NqK+nO+LAAT7Exx1H7W7dKzaY3N6eunN3dTmfvGX6dC7bvXv4pEOxMMZ1Risvd7EN6/+2wn9wMLqiCIW4rriYwriujg/n1q1OyNvOhS+/7IKH/jINDNCy37jRtc4mTXLCyyqUgQFe8zHHcL9Dh3j+E090neSmTaOPfGiI623wHKCg2rABuOoqHr+2ltcO8PqXL6fVD7AcfX180c8/H/iP/2ArpaKC53/9dfaEX7QoutvH7yZqa+OzUFzshhU5coT11dVFy7i0lM/J7t383r6draVoL2woxOu0PeTt2Fqxsngi3R01NcwKe+YZN16YTdXds4f73X03g8r+Od6tBW592S++yHttRzK2yRjRBI61em3sQYTP2ac/fXQs49xzOcS+jdGcdRbvhRWIl1ziyrBhA5V5skN9lJTQ3TdhAp+Fri7+H22fmc5OtuhPP93F6zZtoqEzcWL2JkKKlRk1MJCdQRRzTlGkG2uZ7NzJT3c3hVt9PV+iOXMoJHft4sNob9Boqa4ernTKyig0bOe7khLXJyCoFWOzl7q62Pyur+cL+PzzXGeDfJFxC4s9h42XtLbyJdmyxcU2bBO4q4vC2ioWu//AAOtk6VIKVSsUxo/nsc86i9vs389l06fz+P39HDersZHn2r+f1z1vHo9hOwTu2eOGFbFpubbfhBU4dXXM/PKPRltf71wgxx7LY3Z1Uei3trJebH+L7m6X1mr94MuXUzg++aSbEwNwzfsVK7jvaafRlbV7N4V1cbHLioqG9TMvW0b3hj+AaRVxaSmvr6yM35Ed1vzp2YDr0VtezuvavXt4j+hIC9w+g6efTmXS1eVSkfft433yC5zOTj5Thw654fjb2xk4t0rNbjt/PrPQbIvKxs8Annv8eP5ubR0+AVYygeKBAbr5GhtZjqIi/k9FiwJwnQaLiljvV17J1loqBbDNNquudlMdlJTwGuxUB5agzKiamswPC19wisL25m1rc4Pw9ffzoSsupgVkZ5cDkvfZxsJa/BYryG0arrX0453PZmRZ4VJSQgE5frzrQGezdqJhX6yuLrfsueeGv9w2xmGtfOBoxdXeznNPmOAUrr2mLVu4/NRT3VAg48fz3GvW8Pjz57tsmb17aWVeeCGFU2uru9adO4Gf/Yy/Z87kOWbM4BwXkydT2djYjY3/rFnD2NO6dRTgdv7z+npmcW3aRIvs9dfpjvn7v2ffh/p6vsQXXujmxOjrc2m8jzzCenj1Vbq0QiEqpMOHeX4706H/xY3sR2GTGizWR75unUui2Lt3eOfFWbNouLzjHbwnq1fz+mwSQE2Nu0Z7fr8FXlpKV1p3N5W2de8BFFDXX3+0hd/fTwViRwDu6+O9uP9+7ut3kdn+FHbIE7vOpsSGwyzD2rW8XusuS3ZMJjuYplVcfX2jd7n456exQvvUU1OvJNavH+6Cu/pqPmt+t14yWWeZ6CDop+AURTjsrOrSUj58RUW0tB5/nNskm9GUCHZk2UisgPP35A7Cb0HZIUV27eL1jBtHYRAt+O7HWuU2W8fO2mcD+raHs+18Z5WTv/Oe7d1thbO/BWOtz/37WcdPP+2C73YYksZGHq+ujm6Z44+nEHj+eQoiYyjkbT+N1lbX+3ztWl73okWuZXDwIIVadTWPuXQpj9XR4Xzng4MU8o2NrnPjqlVUbN/4Bvfv6KAPv7aWLRvb2rMz/02ezDJs2cJ6t8HgOXNcHw5LIv0orrmG12VbXNu3UzHMm8e6uvNO4Ktf5X7FxS6eVF3tEhHa2ljfNlZkn5Pjj+c1vP46y3jcca4Fcfzx3K6kJLobKBTicvtsDQ3x/BMmuBEG/II+VoaUFXY2/nfRRawLm2U0kjGZ/LGcSOWWrJUdCg2fnybZVNVE6OnhfZwwwT1Hv/lN/I6IuTTNa8EpCoACddYs53oZGKAbw2bR9PXxdyoJUgL+daWlsV0YfuyLNmcOX3xr9VZUuIysWPg7BY4fTwHS3U0BFA7z4R0YoOU5aZILSNsOZ5MmURh3dQUruO3bKXRbW7mPTTlsb3fDbpSXc/m4cVxWW+uUQ2kpr8u6AwcGKBQ7O10cpKLCzfJ3/fX0ww8NOVfUwYMuQGh7mNsMnPJy1wJbuZL7v/mms9bsYIfHHutG7m1p4TY9PVxfVcXraG6mAvC/zJH9KGwSg392v6Ymtg5sFlhREbft7XVjaNn5TbZuZd0MDrJOKiupCNra3MjFAAVffT0/NoNr/HiXhVZby9bJxIkuoB4phCorWcYpU5wh0NNDYWfXR+upHXkcf2fBykqX/j3SMZmuuSZ6Z8HRDJJn56dJl0C26ey2FWfva1ubC5zHItMth1gUnKKwTc0NG/hwHTrEl6e93XWGCsL66kWS849aKz6R7eJlWtnz28HwioooPBYsoKAMUhQ2w8UOJWJdC0VFdEu0tbnOcHPnUqD89a9OqA4MuL4mdmTcWGXt62Nq38AAfej19e5cXV2u/uzQJtOn8yXv7OSLZHuVd3fzmmzPbWMo6K3LC6CFPGECBelf/sL6aWtzbr2hIQa1Tz+dbis7nPqhQ+7eW9fMmjVcZmNT27e7HvU24Anwevwd8yKfh8h+FNFm9yspoRKqq2MrYccOnndwkHVks/Ks1dvVxU9FhSt/dTX7K4RCw0dvtZ267OCS73gHXSAAlUTQlLIDA6yrHTvc89DeTmWRbLZNKERlbd1TIw3CxlIGqRgkL50CedIk19nTtij6++nBsM9Ars94V3CKIhQC3v9++oQnT6bimDSJL0Qo5PLxrR80EpvZASSnKBJREgDPW1oa3w115Iizmt9+m8LGCnlriUfD5s8PDVHYzJ1Lobx/P4O0J55IH74xPLYdTHDpUgrdPXv4sNtRWm2dRbs+K9CtIu7qolKYPNmNNWV7oVtheuWVwC9/6YZuty6gqioqxunTXWumpsZ1BLQumQULaCUPDFCA9vQ4QXfCCRzxdsMGWvEDAzyedT9WV7MebR+J8nIX4D94kOezAzhai9+6MQ8eZAzDP/FTpLsk2ux+dngSm548cyYF4t69LM8tt/Daq6qc1ft3f0chY11OVvnboVpsrMJaynb+DP+0utFmGPRjldzgIOsBYHxnYIB1NBLXz2hcKUHKIBWD5KVzrKeqKtevpLWVz9qCBXyOR6rYMk3BKQqAwued73S+6/Jy+q5FnOXsz0Iyhv8bGhj8tML45ZcpMBPF5uBHClV7rrIy10fgrbfcS2oH6fNTXs4HbdcuHtev2GwaZjThbQPQdr1tYe3YQQv9qadcpsqcOXyB5sxxne+KixnI3bePnZTKylwQ0A5RIsJlQ0O0XG3fETuw3vjxXF5XR+H/wQ9y35ISBhHPOos+e5va2ttLZWFTUktKqGw+/nHXQXHfPva5eOopKry2NhdUP+UUlmXfPrrZli+n4tuyxQXtV6xwmTSbNzt/v+1Z3t1NIWuFUns7hfmMGS6ovXnz0cODxJvdr7+f1/vWWyxHQwPLcvXVfBb87hVr9dbUUAG88QbPabOv5s51KdL2XKEQBfz8+SMbwdYqHzvW2PXXRx98L8j1EymER9qPIpYyGO3YSZmY22HJEioK6yb84x9Hp9gyTUEqCut/XbyY/mbbGco+XLavAuB85729tBrPOYcv8tSpnKjngQcS7xVpUzOLilyPZZuWZ4fSmDOHH2st2swm28IoLXXuIus7Hxriw15eTkFshTXgxgUCnCVsA9ZTplCIXnklhe6ECS6g29joBlp7+WWXcRIOswU2bhz3ty6s4mLX0vK3ysrLnUvn8GGWr7fXKQ/bLLdWa2urGwLdDrJYUsLzTZjAF9h2VisqcnN8jB9PwVpSwtbRxo08T08Pj1NbSyVQVORcIFVVTknMncs6rq+ni2b1aqd4S0udm62qitc1bRrLNncunxkbW4hGpHCM/B0tmJqIoFq/ngoPoJJ/5RX2Yo8WjI1VhliWdDjMFs2yZbx/1dWxB98LsvYT7UwYjyBlMJqxkzI5t4NtGVqjKNOd5kZDQSoKv0tgwQK+4A0NzA558003dHVZ2fAhrOfN4xzaCxdSaHR301XQ3U3FEtRRzmYX9fZS0Mydy+0PHuR/a7VWVNCVZF0tdjwkG6iuq2NZTzmFisT6zefM4QtsR3G1fnBrMVdWujkz6usp/GxqsPVjHzjAurCdyt54g52lzjiDKbQnn0yBvGIFr2nPHgoom7ll01etBWr7iRQXu9FybR3Y3uglJcxO+sxnqDS+/nXXAbCqyvV/OHSIgwCeeCKvw6YC+90gVVUUbF//uhuifNEilu2001imoDx0+1w89BCP1dbmMrBOOMEF30Mh4PLL2XqxYwMBbtrWZBlJMNXfT6KqiuWbOpWd7mINeBhJkCVt03Zfesndz1hjU8Wy9js7UyeE46WLjpRUuK2SJV3Xkk4KUlEAw10CJSX0i+/c6fzFHR18gKybxvZwfeYZ56u22LRBaylHC+6OH8/9Dh6kZXvTTdx+zRoKP/uy//Snzt1w0kkU0FbQTpxIq/6YY6isrr+e7pMXXuCooq2tbPHY4UJKS+kmsq2Y7m7XYrLjNtkxjCZNciPb1tfzmuvr2Zo46SQKZ382VkMD8KUv0fVl3SGbNlH4L1vG83Z0UMBY3/+0aTyHFfynnUYX4MSJjBssW8YcdtvCeuMN/p4xg/W3Y4frB2AzeyKFqz+jp6eHLsW2Nl77VVfFz0NvaOBwGM3Nbnra6moq4vJyfiZP5v2oqnLXZIfzGCnJuGT6+1muzZt5nZMn02jYvj3xnsrJWNLx0q1jWftAaoVwrBiHvZaaGjfBVaIKabRuq5GSS6mviVCwigIY/nJeeikn4rHj8Nihtq3FMXMmhVNdnct/nz+fWUYdHS44LOICyTZGMG0aBaJ159jhuM8+G/jOd5y7Z+VKCm6bvmsH/KuocMHOt97i9itWUEgsXUqBe8kl7K9gXVThMIX2rFn0YXd28jgLFrhhPubNo7CdNIktpYceogCaONGNi3TwIJVVVRUVnL+jlO1/0tjo5vWuq+OLtngxz3vKKVReP/whr6usjJbvI4+wPFu3snX18stsybz4Ivffu5fCb8IE1smkSTzmihWsT7/A92N7KxcV8TyVlTzOTTclPi1nTY0bDNLOWlhezhbNY4/Rkh8a4rXbvgn+sZvS+dLbVoDtl2CzxPbupTL95S+pEOO5d+JZ0tb1dNFFzvXU0RH9+mJZyP4Od6kSwtEU6mhaBdm07nMl9TURClpR+Kmt5TDRNtd53Di6Vk44gemhixa5HHD7EE6aRIXy0kvOT19T4zqaAdzm2mvp87aT7Jx11vA+AACH1rbWoR1iwvqEQyEep7iYgvzkk4cLgsiAZUkJYycTJ9KdNn06f//DP9BKty+uDf6Gw3QrfOhDfGGWL3cuIfvynH/+0R2l/L1lrXvs8GGu37GDZV+8mOsvvNAN6f7EEy6fX4R9WI47joHsjRtdJy9/x0A7f4JfSUQj2ot/1VUjm7vZH8jt7KQitmW69FK2BiPHbkqnJepvBdgh1K2/+4QTWLaJExOzpuNZ0n7Xk01WCBoWPV6Hu2SEcLIZSKNtFYw16z4bqKLwsJZoKOQetqoqtgT27HFDXNi5iQEK/WuvZcvg1Vdd6tstt1BwVlVROYTDbtIbGx+wAWobCLeZRqeeSuu6r49CdtIkumWqq1mGxYuHzx8NDH+xrPvDvqDHHz88YLt06fAXIvJFvu02Cuu+PrZUzjyTrq1oHaUi0z8XLOD3q6/SPXbmmdzm8cedUrKdA9/7XioEO8z44sWs3wkTqPAGBqg81q51w5Afd1xiL3FQx6xEsNb0u97lkgNWrmQLzNb9mjV0lT31VOYs0UjL+ayzWPd22tFTT3Ud6+JZ08lY0vH69PiPGavDXaJCeCQZSKloFYwl6z4bqKLwiPWwVVVFHyrZNqvt/BB1dW7mOH9+tLWGTzyRwq6zk/GEujqXMQM4i6iujgLWuktaWjiEgwjLcuKJLm8eiP1ixXpBI1+IaNudfPLw//X1sV/CyP07O+n+qK5242Z1dTHQ6++hW1Pj+nDYyYlsiq0Nem/bxm3sAH9btlApp0PY+LF1a2eis50wrRK211lbm1lLNNJyrqhgvfb3syVqx5pK1JoOEuKRynI0rrVEhfBoMpC0VZBeVFH4iPaw9fdT4F18sXND2LmJIxXL1VfzOP5ltrPTuecym8m2TKZMcS2TIHdJVRXHIbLz45aVOUEd78VK9AWNljoZT5nE2t/OvGbn2wYosFevZkvB30PXCvLrruPYN0eO8JoWLHADNB57LH+ffbZrOQVdUyrSHVta2ALcuJH/Fy2isWANhchWVaaEUrTn5MoruW7VKjdMSCpaNlYpZdK1NtoMJG0VpA9VFBFEPmz24fWnHNq5iWMJ0MgOVs8/zxbBiSe6jmeXXjr8JQgSxnPnAp/4xNHrMpnal4zSsfMT2Pkcli4dLuQjM84eeGD4nAU9PQyuT55Mq7mmxnUQjCeoRlsnVtHMnMmWYmcnz21dZ9lOZ0zkmUu0XEEtr2wEebOVgaTERxVFHOI9vNEEaOQy2xnIHuPcc7k88iUIEsbR1uXqi2XnJ7AT7EQT8vZ6bMDeJhEAVBRVVbSWI4euTne6Y6Siqa93w23nimsjkWcuHom0vDLtzhmL/QsKBVUUcUjFwxs55k5fH91Po30JcvXFCoWiz08QrVxBgn0kE7SMtk7i9QDOdt2mikRbXpm+Zo015CZiEk1pyFGWLFli1tshMdNIqgYNS8fgY+kc0Gw0JFqudIy1M5o6ycTYP9mmvx+4777hLYqentwemE5JLSKywRizJKFtVVEouUCuKbtcK086KASFqMQmGUWhriclJ8g1t06ulScdqJtHSRRVFIpSwBSCQlRGTxpmh1YURVHyCVUUiqIoSiCqKBRFUZRAVFEoiqIogaiiUBRFUQIZ8/0oRKQFwK5slyML1AFozXYhcgCtB6L14NC6IPHqYbYxpj6RA415RVGoiMj6RDvL5DNaD0TrwaF1QVJZD+p6UhRFUQJRRaEoiqIEoopi7HJXtguQI2g9EK0Hh9YFSVk9aIxCURRFCURbFIqiKEogqigURVGUQFRR5CgislNEXhWRV0Rkvbdsoog8ISJbve9ab7mIyA9EZJuIbBKR07Jb+tEhIveISLOIvOZblvS1i8iN3vZbReTGbFzLaIhRD18TkUbvuXhFRC73rfuiVw9vici7fMuXe8u2ichtmb6O0SIiM0XkaRF5XUQ2i8hnvOUF9UwE1EP6nwljjH5y8ANgJ4C6iGX/BuA27/dtAP7V+305gD8BEABLAbyQ7fKP8trPA3AagNdGeu0AJgLY7n3Xer9rs31tKaiHrwG4Ncq2CwFsBFAGYC6AtwEUe5+3AcwDEPK2WZjta0uyHqYBOM37XQ1gi3e9BfVMBNRD2p8JbVGMLa4AcK/3+14AV/qW/9yQtQBqRGRaFsqXEowxzwJoj1ic7LW/C8ATxph2Y0wHgCcALE974VNIjHqIxRUAfm2M6TPG7ACwDcAZ3mebMWa7MaYfwK+9bccMxpgmY8xL3u9uAG8AaECBPRMB9RCLlD0TqihyFwPgcRHZICI3e8umGGOavN/7AUzxfjcA2OPbdy+CH6CxSLLXns91covnUrnHultQIPUgInMAnArgBRTwMxFRD0CanwlVFLnLOcaY0wBcBuCTInKef6Vh27Igc5sL+doB/AjAMQAWA2gCcEdWS5NBRKQKwIMAPmuM6fKvK6RnIko9pP2ZUEWRoxhjGr3vZgB/AJuLB6xLyftu9jZvBDDTt/sMb1k+key152WdGGMOGGMGjTFDAO4Gnwsgz+tBREpB4fhLY8zvvcUF90xEq4dMPBOqKHIQERknItX2N4BLAbwG4BEANlPjRgAPe78fAfAhL9tjKYCDviZ5vpDstT8G4FIRqfWa4pd6y8Y0EbGn94HPBcB6uFZEykRkLoBjAbwIYB2AY0VkroiEAFzrbTtmEBEB8FMAbxhjvutbVVDPRKx6yMgzke1Ivn6iZjfMAzMRNgLYDODL3vJJAJ4EsBXAnwFM9JYLgB+CmQyvAliS7WsY5fXfDzahj4D+04+O5NoBfAQM4G0DcFO2rytF9XCfd52bvJd7mm/7L3v18BaAy3zLLwczZN62z9JY+gA4B3QrbQLwive5vNCeiYB6SPszoUN4KIqiKIGo60lRFEUJRBWFoiiKEogqCkVRFCUQVRSKoihKIKooFEVRlEBUUSgFjYgMeiNubhaRjSLyeREp8tZdICIrvd9TRGSlt83rIvKoiJzkG7GzXUR2eL//7O2zWESMiCyPOKcRkTt8/28Vka/5/n9IRF4Tjh78sojc6i3/me8cr4jIcxmoIkVBSbYLoChZ5rAxZjEAiMhkAL8CMB7A7RHbfQMcUO773rYnG2NeBYdNgIj8DMBKY8zvfPtcB2CN973Kt7wPwFUi8m1jTKv/JCJyGYDPArjUGLNPRMoAfMi3yT9FnENR0o62KBTFw3C4lJvBAdYkYvU0sNOb3XZT0LG8/T8A4MMALhGRct/qAXA+489F2fWL4JDR+7zz9Blj7k7yUhQlpaiiUBQfxpjt4Hj9kyNW/RDAT4UTx3xZRKbHOdQ7AewwxrwN4BkA745yvOtFZELE8hMBbAg47v/1uZ5+GacMipISVFEoSgIYYx4Dh1a5G8DxAF4WkfqAXa4Dx/mH931dxPG6APwcwKeTLMo/GWMWe5/rk9xXUUaEKgpF8SEi8wAMwo1E+jcMJ7z5lTHmBnBgtfMit/GOUQzg/QC+KiI7AfwngOV2oEcf3wPHbxrnW7YZwOmjvAxFSSmqKBTFw2sh/D8Ad5qIQdBEZJmIVHq/q8Hx/3fHONRFADYZY2YaY+YYY2aDQ0O/z7+RMaYdwG9AZWH5NuhemuqdKyQiHxv91SnKyFFFoRQ6FTY9FhyB9HEAX4+y3ekA1ovIJgDPA/iJMWZdjGNeB84h4udBRLifPO4AUGf/GGMeBXAngD97ZXoJzMKy+GMUr3jDRCtKWtHRYxVFUZRAtEWhKIqiBKKKQlEURQlEFYWiKIoSiCoKRVEUJRBVFIqiKEogqigURVGUQFRRKIqiKIH8f1c1aBTCx1+uAAAAAElFTkSuQmCC\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], "source": [ "#Check if there is a relationship between the distance of a flight and how late the flight arrives\n", "delays_df.plot(\n", @@ -100,9 +106,21 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 6, "metadata": {}, - "outputs": [], + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": "
", + "image/svg+xml": "\n\n\n\n \n \n \n \n 2020-12-21T12:58:37.421442\n image/svg+xml\n \n \n Matplotlib v3.3.3, https://matplotlib.org/\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAY0AAAEXCAYAAABRWhj0AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/Il7ecAAAACXBIWXMAAAsTAAALEwEAmpwYAAAxuElEQVR4nO3de5zcdX3v8ddnb0k295BNTDaEhAhGkAoYESsgFy+Bk5agVcSeSKl90NMDVVqrUvUU6rF9eHq0xVblCBVF2+INBIqagtypBQxXucolBLIJZAnZJJtNstndz/nj8/s5v0xmdn97mdnLvJ+Pxz5m5nf9/mZmf5/53s3dERERyaNutBMgIiLjh4KGiIjkpqAhIiK5KWiIiEhuChoiIpKbgoaIiOSmoFFDzOwPzOyeYez/MzM7dyTTlPO8XzCzV83s5Qqeo9PMDh2B47iZvX4k0jTAeQb1WZrZC2b2rkqmaawazHtlZt82sy9UOk3jmYJGlZnZh81sXXKT2pzciE8Y7XQVM7NLzexfssvc/XR3v7rK6VgMfAI4wt1fV6nzuPs0d3++UsevZdUKpFIdChpVZGZ/DlwG/C0wH1gMfB04cwjHasizbAJYDGx19y3DPZCFuqJlE/E9GxNG6r01s/qROI6MDAWNKjGzmcDngQvc/Tp33+Xu+9z93939k8k2k8zsMjPblPxdZmaTknUnm9lGM/t0UkzzrSQ38CMz+xcz2wH8gZnNNLNvJrmYtqRop+Q/nZl9xcxeMrMdZvaAmZ2YLF8JfAY4O8kRPZIsv8PM/ih5XmdmnzOzDWa2xcy+k1wjZrYk+XV5rpm9mBQtfba/9ybZvz053ueS478LuAVYmKTj2yX2nW1mNyX7bkueL8qsv8PM/sbM/hPoAg5N0naBmT0DPJNs52b2ejN7m5m9nH3PzOwsM3s0eX6cmf2XmXUk7/FXzawp53fgPDN70sx2mtnzZvbHmXXp5/uJ5P3cbGbnZdYfZGY3Jp/V/cCyAc61Jnkvtxa/98l7e7GZPZes/4GZzUnWpZ/d+cl3cLOZ/UVm336vv/i9NbO7klWPJJ/h2VaiuMgyuRGLIqLLzeynZrYLOMXMFprZtcnnvN7MPtbPtff7XpnZcjO7xcxeM7OnzeyDZY5T9rtlZh8wsweKtv9zM7uhXLomDHfXXxX+gJVAD9DQzzafB+4F5gEtwC+A/52sOznZ//8Ak4ApwKXAPmA18QNgCvBj4BvA1OQ49wN/nBzjD4B7Muf778BBQANRBPQyMDlZdynwL0XpuwP4o+T5HwLPAocC04DrgO8m65YADlyZpOnNwF7gjWWu+zvADcD0ZN9fAx/NXPfGft6zg4D3A83J/j8Eri9K84vAkcl1NiZpuwWYA0xJtnPg9cnz54B3Z47xQ+Di5PlbgOOTYy0BngQuymz7m+OUSOt/I25gBryTCGLHFn2+n0/SeEayfnay/nvAD5LP9U1AW/azLDrPEUAncFLyXfn75NjvStZ/nPieLUrWfwO4puizuyY511FAe2bfPNdf9r0t9T0s8f5/G9gOvIP4XjcDDwB/BTQR37nngfeWuf6y71Wy7CXgvOQajgFeJYo/03N/YaDvVvK+vUbmOw08BLx/tO81Fb+XjXYCauUP+H3g5QG2eQ44I/P6vcALyfOTgW6Sm3qy7FLgrszr+cTNeUpm2TnA7cnzA/5Zi86/DXhz5tj9BY1bgf+ZWfcGIoClNxMHFmXW3w98qMQ565PrOiKz7I+BOzLXXTZolDje0cC2ojR/vmgbB04tsSy9aX0BuCp5Ph3YBRxS5nwXAT8udZwcab0e+HjmOneT+VEBbCFu0PXJe7s8s+5vy32WxM31e5nXU5P3OL3xPwmcllm/oMRnlz3X3wHfHMT1l31vy30POTBofCez7m3Ai0Xb/yXwrTLfp7LvFXA2cHfRPt8ALsmc+ws5v1uXA3+TPD+S+P+ZlPe7Ol7/VJ5bPVuBuWbW4O49ZbZZCGzIvN6QLEu1u/ueon1eyjw/hPiVutnM0mV1Rdv8RlLs8NHkHA7MAOYOfCll09pABK5UtrVTF5EjKTY3SXPxsVrzJMLMmoF/IHJys5PF082s3t17k9elrr/ke5L4N+AXZvYnwPuAB919Q3K+w4lf7iuIX6ANxK/gPGk9HbgEOJzCL+hfZTbZWvTdSN+zluQ82TRn369iC7PbuvsuM9uaWX8I8GMz68ss62X/z674XEcl15Dn+vt7b/Mq/l4vNLOOzLJ64O4S+w30Xh0CvK3oWA3Ad4sPlOO7dTVwjZl9DlgD/MDd9w58aeOb6jSq57+IXMDqfrbZRHypU4uTZalSQxJnl72UnGOuu89K/ma4+5HFO1nUX3wK+CBRBDKLKBJIo81Awx+XSmsP8MoA+xV7lfhlWHystpz7f4LI5bzN3WcQRTJQuA4Y+H3bf4X7E8SN5nTgw0QQSV0OPAUclpzvM0XnKsmibupa4EvA/OT9/mmefYnioR7g4Myyxf1svzm7bXLzOyiz/iXg9Mx3ZJa7T3b37HtefK70e5jn+gf67uwiAk6avlKt4oq/1+uL0jvd3c8osd9A79VLwJ1Fx5rm7n9S4lj9frfc/V4iB3ci8T05IPBMRAoaVeLu24lig6+Z2WozazazRjM73cz+LtnsGuBzZtZiZnOT7f+l3DFLnGMzcDPwZTObkVR4LjOzd5bYfDrxz9UONJjZXxE5jdQrwBIram2UcQ3wZ2a21MymEUUA3+8nF1Uuzb1E+fPfmNl0MzsE+HPyX/d0olinI6nMvWQw5+/HvxFl/ycRZdnZ8+0AOs1sOVDqZlNKE1EO3g70JLmO9+TZMXmPrgMuTb43RwDn9rPLj4BVZnZCUkn9efb/X/9/xPt9CEDyfTuz6Bj/KznXkUT5//eT5UO5/leIeojUI8CRZna0mU0mikL7cz+w06IRyBQzqzezN5nZW4s3zPFe3QQcnjQUaEz+3mpmbyxx3jzfre8AXwX2ufuQ+0CNJwoaVeTuXyZuiJ8jbh4vARcSZdsQZenrgEeJYosHk2WD8RHiBvUEUcb6I6LMuth/AGuJSucNwB72z9KnN8qtZvZgif2vIn5Z3QWsT/b/00GmNfWnxK/P54F7iBv2VTn3vYyobH+VqNxdO8Q0FLuGqKy+zd1fzSz/C+JX5U6iov/7JfY9gLvvBD5GBMhtyTFuHER6LiSKql4myt2/1c+5HgcuIN7Hzcn5NmY2+Upy7pvNbCfxvr2t6DB3Eg0dbgW+5O43J8uHcv2XAlcnLa4+6O6/JgLZz4nWa/3ebJNAsIqoU1hPfNb/DMwss0vZ9yr5HN4DfIjIPb1MoXFJscsY+Lv1XaKyPfePu/HOkkocERHMbAlxY24cbK6xFpnZFKLBwrHu/sxop6calNMQERm6PwF+WSsBA1DrKRGRoTCzF4hK8dWjm5LqUvGUiIjkpuIpERHJbdwXT82dO9eXLFky2skQERlXHnjggVfdvWWw+437oLFkyRLWrVs32skQERlXzKy/UQXKUvGUiIjkpqAhIiK5KWiIiEhuChoiIpKbgoaIiOSmoCEiMg51d0NHRzxW07hvcisiUmva2mDtWujpgYYGWLkSWnNNWzZ8ymmIiIwj3d0RMKZNg4UL43Ht2urlOBQ0RETGka6uyGE0J3MfNjfH666u6pxfQUNEZBxpbo4iqTRIdHXF6+bm/vcbKQoaIiLjSFNT1GF0dsKmTfG4cmUsrwZVhIuIjDOtrbBmTeQympurFzBAQUNEZFxqaqpusEipeEpERHJT0BARkdwUNEREJDcFDRERyU1BQ0REclPQEBGR3BQ0REQkNwUNERHJTUFDRERyU9AQEZHcFDRERCQ3BQ0REclNQUNERHJT0BARkdwUNEREJDcFDRERya2iQcPMDjaz283sCTN73Mw+niyfY2a3mNkzyePsZLmZ2T+a2bNm9qiZHVvJ9ImIyOBUOqfRA3zC3Y8AjgcuMLMjgIuBW939MODW5DXA6cBhyd/5wOUVTp+IiAxCRYOGu2929weT5zuBJ4FW4Ezg6mSzq4HVyfMzge94uBeYZWYLKplGERHJr2p1Gma2BDgGuA+Y7+6bk1UvA/OT563AS5ndNibLio91vpmtM7N17e3tlUu0iExo3d3Q0RGPkk9DNU5iZtOAa4GL3H2Hmf1mnbu7mflgjufuVwBXAKxYsWJQ+4qIALS1wdq10NMDDQ2wciW0HvATVYpVPKdhZo1EwPhXd78uWfxKWuyUPG5JlrcBB2d2X5QsExEZMd3dETCmTYOFC+Nx7VrlOPKodOspA74JPOnuf59ZdSNwbvL8XOCGzPKPJK2ojge2Z4qxRERGRFdX5DCam+N1c3O87uoa3XSNB5UunnoHsAb4lZk9nCz7DPBF4Adm9lFgA/DBZN1PgTOAZ4Eu4LwKp09EalBzcxRJdXXF866ueJ0GESmvokHD3e8BrMzq00ps78AFlUyTiEhTU9RhrF0bFeFpnUZT02inbOyrSkW4iMhY09oKa9YUchsKGPkoaIhIzWpqUrAYLI09JSIiuSloiIhIbgoaIiKSm4KGiIjkpqAhIiK5KWiIiEhuChoiIpKbgoaIiOSmoCEiIrkpaIiISG4KGiIikpuChoiI5KagISIiuSloiIhIbgoaIiKSm4KGiIjkpqAhIiK5KWiIiEhuChoiIpKbgoaIiOSmoCEiIrkpaIiISG4KGiIikpuChoiI5KagISIiuSloiIhIbgoaIiKSW0WDhpldZWZbzOyxzLJLzazNzB5O/s7IrPtLM3vWzJ42s/dWMm0iIjJ4lc5pfBtYWWL5P7j70cnfTwHM7AjgQ8CRyT5fN7P6CqdPREQGoaJBw93vAl7LufmZwPfcfa+7rweeBY6rWOJERGTQRqtO40IzezQpvpqdLGsFXspsszFZdgAzO9/M1pnZuvb29kqnVUREEqMRNC4HlgFHA5uBLw/2AO5+hbuvcPcVLS0tI5w8EREpp+pBw91fcfded+8DrqRQBNUGHJzZdFGyTERExoiqBw0zW5B5eRaQtqy6EfiQmU0ys6XAYcD91U6fiIiU11DJg5vZNcDJwFwz2whcApxsZkcDDrwA/DGAuz9uZj8AngB6gAvcvbeS6RMRkcExdx/tNAzLihUrfN26daOdDBGRccXMHnD3FYPdTz3CRUQkNwUNERHJTUFDRERyU9AQEZHcFDRERCS3foOGmR1frYSIiMjYN1BO4+tm9g0zm1WNxIjUuu5u6OiIR5GxaKDOfSuAjwH3m9n/dvfvViFNIjWprQ3WroWeHmhogJUrobXkkJ0io6ffnIa797n7ZcBq4KtmttPMdqSP1UigSLVU6ld+nuN2d0fAmDYNFi6Mx7VrleOQsWfAYUTM7KPAxcBnga/5eO9CLlJCpX7lD3Tc7m7o6orHnh5obo7lzc0RaLq6oKlp+OkQGSn9Bg0z+wUxPtSJ7v5y0bpGd99XwbSJVEX2V35zc9yo166FNWuGd8Me6LjZgAKwc2dsk27b0FAIIiJjxUAV4X/l7h9OA4aF08zsm8QkSSLjXlfXgb/ye3pieaWOW1wcNWtWbNPRAZs2QWdn5EqUy5Cxpt+chrv/HH7T9PbDRN3GHOAC4C8qnTgRKBThNDdX5iba3By/6kf6V35/xy0VUKZPh7POimus1LWKDNdAxVN/C3wAeBG4BvhrYJ27X12FtIlUpUVRU1Mcd+3a+KWfnme4N+2BjlsqoMya1f958wTQSgdZqW39Do1uZluAXwOXAf/u7nvN7Hl3P7RK6RuQhkafuLq74bvf3b9OoLNz+HUN/Z2vEjfbcsctDoinngqzZ5c/f54Aqma7ktdQh0YfqPXUAuDdwDnAZWZ2OzDFzBrcvWcI6RTJrVQRTiVbFDU1Vfe4ra0RALu6YNs2uO22/ltZrV0LkydDXR309R1YWV+pCn2RrIHqNHqBtcBaM5sErAKmAG1mdqu7f7gKaZQaVam6hrEkvZlfe23/N/uuLmhvj0ryNLAsXLh/AK12kJXalHvAQnff6+7XuvvvEfN3/6xyyRIp1Al0dk7sFkV5Wm81NMBTT4EZHHRQPD71VCxPZYNsetyJFmRl9A1pjnB332FmfwNoWBGpqGwRzkSt2M2To+rpgeXLo87itdeiiGr58kIfD6hchb5I1pCCRsJGLBUi/ahUXcNYkb3Zv/pq1FesWrX/NTc3Q0sLLFoE9fXQ2wt79x6Yi6iFICujazjzaWg4ERnXqjWibJ7ztLZG66m+vshF3HZb5CpSaWDZuxd27IjHcrmIpqaBm+6KDNVA/TT+idLBwYBZlUiQSDVUq2lq3vN0d0egmDevfGW4chEyFgxUPNVfBwh1jpBxqVpNUwdznrwtnyZ6UZ2MfQM1uS3b89vMhlMfIjJqqtU0dTDnqYXmxTIxDDTd6z2Z58Utpe6vSIpEKqxaTVMHc55aaV4s499AuYWpmedHFq1T6ykZl6rVNHWw51GdhYwHAwWN/lpIqfWUjFvVukEP9jyqs5CxbqCgMcvMziKKsWaZ2fuS5QbMrGjKRCqsUjfo4gEKh3MejVgrY81AQeNO4Hczz38ns+6uiqRIZBwbyaa8GrFWxqKBWk+dN5yDm9lVxCCHW9z9TcmyOcD3gSXEVLIfdPdtZmbAV4AzgC7gD9z9weGcX6SaRrIpr0aslbFqwB7hZvYGM/uymf0k+fuSmR2e8/jfBlYWLbsYuNXdDwNuTV4DnE4MhHgYcD5wec5ziFRU3p7jIzltbKWmoBUZroGa3L4duAPoBK4ArgR2AXckU8D2y93vAl4rWnwmkPb/uJqYQjZd/h0P9xJ1KAvyXYZIZbS1xURQ3/9+PGaH9ig2kk15NWKtjFUD5TT+CjjH3S9x9xvc/Xp3v4SYlOmSIZ5zvrtvTp6/DMxPnrcCL2W225gsO4CZnW9m68xsXXt7+xCTIdK/bBHRwoXxuHZt+RzHSPa1UL8NGasGqghf5u53FC909zvN7Irhntzd3cwG3XTX3a8gcj6sWLFCTX+lIobSc7ylBX4naS4y3EED1W9DxqKBgsbOftbtGuI5XzGzBe6+OSl+2pIsbwMOzmy3KFkmMioGO7RHJVo7qd+GjDUDBY2DzewfSyw3yhQd5XAjcC7wxeTxhszyC83se8DbgO2ZYiyRIRtqX4fB9OhWayepFQMFjU/2s27AUW7N7BrgZGCumW0k6kG+CPzAzD4KbAA+mGz+U6K57bNEk9thNfcVgeH/+i9X3FQciDQ/t9SKIY9ym2Vm/+Tuf1pi/3PK7HJaiW0duCDP+UTyGO6v/3IBp9TylhaNUiu1YTgz92W9Y4SOIzJihtPXoVzLqc7O0stBrZ2kNmhODJmwhjNHRbnipq1byxdDqbWT1IKRymmIjDnD6etQrnPdQQf13+lO83PLRDdSOQ3NrSFj0lB//ZdrOTVtWnXm4hAZq4YcNMxssbu/mLz8ygilR2TEFfd1yNsEt1zAUTGU1LIBg0Yy/lQrcJe7bzGz3yIGGTyRpDOeu3+7kokUGSmDbYJbrnOdOt1JrRpowML/C1wFvB/4iZl9AbgZuI8YjVZk3OjuhptuArNoIjvQWFIicqCBchr/DTjG3feY2WxiQME3ufsLFU+Z1KxKzVb37LNwzz0wfXrkMo45ptAEV7kGkXwGChp73H0PQDJR0jMKGFJJlZqtrrsb7r47AtG0aXH8O++Et75VHfBEBmOgoHGomd2Yeb00+9rdf7fEPiJDUsnxm9Imsm9/O9x1F6xfH8uWLYP2dk2jKpLXQEHjzKLXX65UQkQqOX5T2u+ioQGmTIE3vAHq6uDQQzWwoMhgDDT21J3l1pmZhg6REVXcg3vHjsh9NGS+pcX1HXnrP5qa4NRT4Yc/jF7d8+ZFncaMGdHpT/UaIvn0GzTMrJ4YhbYVWOvuj5nZKuAzwBTgmMonUWpFtkPd+vXw1FOwfHlMtboymWk+W9/x5jfDI4/kq/9oa4PbboNJkyKH8frXw9y5GlhQZLAsBpcts9Ls20RfjPuJOS42ASuAi939+iqkb0ArVqzwdesGHKVdxpHOTvjWt2D2bJg5M27sHR2xbtasuMFv3w4//zm8+92RW+jqiv1KFTN1d8f83mldycaNcN99UQk+efLIVbaLjCdm9oC7rxjsfgPVaawAfsvd+8xsMjGn9zJ33zqURIrk0dMTN/6ZM+N1c3OMHQUxsixAfT3s2xe5hnSbcvUfxXUlixZBXx+sWgULFqhYSmQwBhqwsNvd+wCSprfPK2BIpZUaLLC5udCiCqC3Fxob4+afblOumKnU8SZPVsAQGYqBchrLzezR5LkBy5LXBvS5+5srmjqpSaUGC1y1KtZll114YdRpdHb2P3DgYKZtFZH+DVSncUipxUQ9x1+6+xmVSlheqtOYuEq1lMrWbQym9VSp44nUsorUabj7hswJjgE+DHwAWA9cO9iTSW0YqZtzdlDAcj3FBzNwoAYZFBm+gZrcHg6ck/y9CnyfyJ2cUoW0yThUiWFAKtlTXEQGZ6CK8KeAU4FV7n6Cu/8T0Fv5ZMl4VG5e7eGOIjucub5FZGQNFDTeB2wGbjezK83sNDRLn5RRqZt7ualX1SFPpPr6DRrufr27fwhYDtwOXATMM7PLzew9VUifjCOVurkPZ65vERlZ/baeKrlDzKvxAeBsdz+tIqkaBLWeGltGsk6juLUUqPWTyEgZauupQQeNsUZBY+wZidZTbW1wzTXRDwNinKlzztFwHyIjZahBY6A6DZFBa2oq9KMYSJqbyFaWp9OyPvNM9NpesCCe33STpmYVGW0D9QgXqZhyRVldXfFXXx+j0kKhrkRDmIuMLuU0pGJK5SKy68o1z03Hmerthb174y9tlaUWUyKja9RyGmb2ArCT6PfR4+4rzGwO0YFwCfAC8EF33zZaaZShG6hCvL9Z+mbNirGmdu7cv05j1SrlMkRG22gXT53i7q9mXl8M3OruXzSzi5PXnx6dpMlQ5enBnQaLzZtj+lWz/ZvntrbCxz524FhTIjK6RjtoFDsTODl5fjVwBwoa405XF+zZUwgYfX3xOlsf0d4OL70Et94a6xcvhk9/ev/A0NQU07KKyNgxmkHDgZvNzIFvuPsVwHx335ysfxmYP2qpk1xKNa/dtg1++cuoi9iyJW78kybBGWdEjiFtHdXeDr/92zGZUlcXPPAA/NZvKUchMpaNZtA4wd3bzGwecIuZPZVd6e6eBJQDmNn5wPkAixcvrnxKpaT16+H666NuYsaMqHNoaYm5uN/ylljnHoFj9epYvmbN/q2j0uKo3l61jhIZD0YtaLh7W/K4xcx+DBwHvGJmC9x9s5ktALaU2fcK4AqIzn3VSrMUrF8Pn/oUvPxy1EW0tkbF9Uc+EkFk7lw47LCo19i5M17v2LH/LHxp6yhQ6yiR8WJUmtya2VQzm54+B94DPAbcCJybbHYucMNopG886K85a6XP2dkZuYhXXomcxezZ8Oqr8NBDsU1DQ2E61n37omiqr69Q0d3UFLmSww6LivDNm+O5WkeJjH2jldOYD/zYzNI0/Ju7rzWzXwI/MLOPAhuAD45S+sa0wYzvVGr2u/5e5zlndzds3RrFS1CYq7u3d/+pVVtb4amnYPnyqAjPDjKo1lEi49OoBA13fx44YH5xd98KjPogiGPZYCYkKg4ub35z9Hso97pc8Ck+5/btcP/98LrXwWuvRb3F3r1w9NGFm39ad9HQUCh6Kk6fWkeJjD/qET7O5J2zorjH9eTJ8NWvRlHRwoXx+NWvxvKWlugnUW5sp1LnPPRQOPzwqKuYMwdOP33/wJWOPzVtmnIRIhPJWOunIQPIzlmR5jRKzVlRfKOvq4v6hbRIqb4+Xnd0wH33xbY7d8KJJ8IRR5Q/5+7d8F//Fc+PPx7e974YUFCBQaQ2KKcxzuSdkKh4QqS+vqh76E0m6+3tjUDy8MOR20iLnm6/PZrIFuc43v72qOy++eYoijrppMhlrFungCFSS5TTGIdaWwt1BuUqsLMV0h0dEUAuvDDqMDZtitd/+Idw1VWwa1cEkDe8Ae69NwLGjBmxPxTqRbZsidzIwoXwq1/BMccUisYUNERqg4LGONXUNPCNuji4QAzXAYWZ8H796wgg06fHkB7NzREUdu2CH/0ocijz5sU2d90VuY1ly+Lc994Lxx6rvhUitURBowpGYia7wZyruBlrU1P5ZrqrVsXytrZI41FHRc/tzs5oLnvQQdFKascOeOKJqAe5997IlfT1RR2IchkitUNBo8JGcs7sPOcqNUVqS8uBzXRvugnOOivWrVkTgWbXLvjJT6Lu48UXo95jxw7YsCGeNzfHiLTTpkUz2+OPh9e/vjLXIiJjkyrCK6i/iYZG4tjZHuH9TZHa0bF/S6rdu+HOO+Gf/xm+9a0YOHDWrAgEfX2xTWdnBIj58wvBY+bMCDL79kVl+CmnKJchUmuU06ig/iYaStcPpciqVO5l6tRC89u0OW16Dii0pGpshFtuiWNMnx65iHTMqDlzYpTZPXtiTKm6ukjf4sWx3ZFHxnm2boW3vlW5DJFapKBRxkjUQ5TrU7FtG1x77dCKrNLcy+TJcVPv64vXZ59d6K393HNx7N274eCDIxdx6qmR6+jshOefj6KrlpbIMTzySBx38mR405uiZVQa4ObPjxxIc3PkNnbtin1Xr1YuQ6QWKWiUMFL1EKWavZ56alQ05xkGpJSursgFrF8fxUZTpkROoKcH3vMe+PnPY2iP7dsjV/DggzFPxRNPRJBxj4rtqVMjqLgfmNYjj4zirTSn0dwcx546NbZVvwyR2qWgUWQwYzvlUdzstaMjfrHPnh3rs0VWAx2/uzvqKX70o8gxTJoU9Qzt7RGQZs+Oyuk0mDQ3R9+KL34xKr3nzInr+sUvYuyouqRG66STDhwzKi1Sq1arLxEZHxQ0Mrq7Y5juPXsKA+kN5qZeTrbZ6003xa//xx+PznGzZpUeBqRYOuHRzTdHy6ampsgpdHdHWru6oujo/vvh0UcjODQ0RB3G+vVwzz2F9Pf0wNKlsc4sAk9xWrOvRURSChqJtEhqz56YqrSuLsrz0z4Pw+3A1tkJ110Xv/aPOQZuuCHqDg4/HD7+8f5vzuvXw1//dVRuP/NM3OwnT45jpfUk3d2FGfPWrYtOeO3tsGRJ1KGYRZ+L9vYoulqzJq5x0qRYpl7dIpKHggb7F0nNmxc32NtuK9xEDzsMnnwS3vjGeD3YIpu2tggY994bOYuurri579gBxx0XFdHZubGzlfAQuZOGhggSjY2FwQd3747HI46IfXt64JBDItjt2FFoPjtjRqxrb4+WVQsXRgV4S0v5AQ9FREpR0ODAprGve13chJcvh2efjeE1fvYz+O3fjhvy7Nn5K8jTgDR7dtz09+2DF16I51OmxKB/2V/6xZXwb397oUIa4nydnYWb/ezZkY60mGvXrsKMeGnrqd27I0g0NkaAWbo00pGOQVVqwEMRkVLUuY8DR4RNi6Ta2mJK0+bmKOK5/vroMV1fn7+jXhqQZs6M3EVdXSzbujVe79tX+KVfqjPg3XfH+uXLC4Ftz55Yf/DBMTT5E0/EuVaujOP19UWO4/d+L4qxli2L3IZ71Ke8//1w3nnRTHfNmsr1UBeRiUc5DQ5sGgtxk37mmbhBr18fN+x0StP774/t84zwmg1IBx0UuZWWlriZb94cj6tXx7YbNkRz2eKWVW97WwSP1tbIpZxwAixaFAGovb1Qt9HaGsHgxBNj+02boljt8MMj0B11VBx39ux8Ax6KiBRT0EgUN41ta4PPfQ5eeikCRUtLVC53dMQv9m3b8tUFNDUVOtbV1RVaPN13XwSNJUsKU6Y+/3wEqGXL4F3viuKrnTtj2x074KGHovhp06bYZ+rUOP/SpYV0NDVFEdTixXDlldHnYs6cWPfooxqVVkSGR0EjI/vre+lS+OQn4TOfieHD9+6NG3x3d9zI0850aZFWU1OhAjs7L3Z7e1Sq19XF+i1bYqKjtraocN+1K5rQNjdHH4uZM6O46ZZbIlcCERweeywC1+OPR2X9li0RzHp6Ym7u9vb9i5nS87/jHRFs0lyRRqUVkeFQ0EiUuuHPnBlFSkceGb2w085xn/501BHcfHOheOqIIyK4bNsWN/ZDD42b+86dsd+MGfH81lsj15C2gtqyJfaZOTNyDy0tUR+xaBGcfDLccUc0kU0HFZw2rdBSauHC2G7hwgM7IKbFYs3NcZyOjqjv0HhRIjIcChoUWiy1t8ccEsuXR7n/zp3RzPYnP4miqc2b4QMfiABw7bX713ns2BHBZe/euEFv3BiB4Omno4VUfX0MzbFjR/x1d0dRlVnkGNwjQNTVRUCYPz9u8g89FC24XnstAs306ZEL2r07chjuEUyK+1pk62nSllirVimXISLDU/NBI22xNGlSBI+ZM6POYM6cuGFPnVoYsC+dgGjSpMhVLFoU+9TVxT51dVEHcuSREQiefDLWu0cdQ9rbPB2epLc3buZNTbHNli1xg58+PQLEHXfEsn37CmNFpU12W1vjWMcfv38LrKw808KKiAxGzQeNrq64+ULcfFtaojksxE3+1Vfjb9KkQrPVG24otFzq7Y1cQFdXtH7avj16lKeTFdXVRfHTtm1xEz/88MgZPPhg9LdobIxjNzbGNpMmRe5j6tQ4f09PvE6HPJ87N3IQZ5xRGJ22r698Xwu1khKRkVTzQWPbtsKwIRs2RODo7ITLL48hxtOio1mzotJ68+Z43dQUz3t7Y5/GxtivtzcCyN698bq+PvZLt5s3D1asKDTj7eiINDQ3F3I1UBhMMD1GWteyfXvUsxx7bPwpFyEi1VTTQSMdr+m446IuY968aKW0dWt06jOL7fbsiZt7R0fcnFta4maeTnTU11cIDA0NhUCzd2+hyKi+Ph63b4/6kJdfjjqTgw6KOo7e3sjRmMU5tm6NprdpDqS7O5ZPmwbvfOf+dRciItVS00Ej7a198MExdMiuXTHY3w9/GDfnzs648bvHTT2ttN6zJwJFXV0hsPT1xbF6euJ1mlNwj0r1tKf2c89Fh7602e62bVGHkVZiNzZG0da8efDZz8LXvlYoFjOLALNw4ei8XyIiNR00sr21u7qikvvFF+OGvndvoQc4xC/9hoZCsVNqypRCRXZatASF/Xp6IteQfV1fH/s0NEQw2bo1nk+fHrmYpqao21i8OEbEfeaZQvFUOpy6iMhoqOmgkTZLvemmmG+iszMqtffuLRQ9ZaW5iKy0h3da4Zwdi2ry5DhOT08hV1JfH8GoqSlyLGlF95Qp0WJrx444P0QwOeecSF9ad6FmsyIymsZc0DCzlcBXgHrgn939i5U8X2trzGr32mvR9Dad8W779vzH6Osr9LnI2rOnUEyVBou0vqOjo1B/0dgYxWNbt8b2U6dGX4x7742xpM47TxXeIjI2jKmgYWb1wNeAdwMbgV+a2Y3u/kQlzztrVuQyNmyInMKOHYPbPy2WyhZPpdI5LUqNhuseOZHZswu5jfr6CBwrVsQ2XV2ak1tExo6xNjT6ccCz7v68u3cD3wPOrMaJ6+sjWHR2lr75V/K8e/bEuefMiXqMk06KCnPQ4IIiMraMtaDRCryUeb0xWbYfMzvfzNaZ2br29vZhn/TJJ6Oj3JQphQrrakiLs7q7IzdxzjkxhEljowYXFJGxaUwVT+Xl7lcAVwCsWLFiWPmC9evhS1+KfhP79sVNOh25diSYlc65pMt7e6PC/JRTYliSpUs1uKCIjF1jLafRBhyceb0oWVYR3d3RMmny5Bj8L+0NPpImTz5wmVnUWzQ1xbAg550Hf/Znce729ggmaiUlImPRWMtp/BI4zMyWEsHiQ8CHK3Wyrq64eff2wgMPFDr7jZTGxv2LuxoaoqPe1KnRWirNTVx0UeQwNLigiIx1YypouHuPmV0I/AfR5PYqd3+8UudLO/els+FBBJG0xdNwpb3IUz090XFwxowYuLCvDy69NAIGaHBBERn7xlTQAHD3nwI/rca5mpqisvk//iM61JVqFjtU9fURNCZNiufpHON9fVF38Za3xNzgacAQERkPxlqdRtV0d0eF8+LFMf9FR8fIHdssiqbSx0mTYoiQSZOiWOrUU+H3f18BQ0TGnzGX06iGdKa+dEa7ke4L4R59LyZPjma86eCE9fXRF2PKFI0fJSLjU80FjXSmvmnTIlhs3AjXXTfy52lsjKBhFrMBdnfHaLpHHRWBSkRkPKq521faQiqdvvWRR2Jk25HU0BA5iUmTog5j9epo0jt5crScKp7PW0RkvKi5Oo3scOh798ac3q+9NjLHzs6tsXt3FFM1NsL8+dFiatas8vN5i4iMBzUXNNLh0Ds7YcuWyGWMVN+Mxsb9X+/dC298I5x9dpxv06Z4LDeft4jIWFdzxVMQw6GvWRNzfD/1VEzxOlx1dTGBknvkKqZNi+Wf+hS84Q3RUkod90RkvKvJoAFx416wAJYsiRxCqUmXBmPaNDj22Gha+973xnwcdXXwjncUzqdgISLjXc0VT2U1NcE73xl1DsMxeXLkMnp6oqntLbdE0dTq1QoUIjKx1GxOI3X44dHhrqNj8IMVNjRELuWkk2DbNnjf+6IfxrZtEUBaWiqSZBGRUVPTOQ2IIqRTTommsIMxaVLUTzQ3x76nnRY5lsZGmDcvthnJIdZFRMaCmg8azc1Rt3HwwYXK6zymTo0e3pdcAldeCcuWFYJEV5ea1YrIxFTzQSMdtHDPnsg95Akcy5ZFU9olSyJA7N5daMarZrUiMpHVfJ1GWxvcd1+0enr66f5v9HWZEDt5cmEAwrVrowmv5sMQkYmupnMa3d1wzTXwn/8ZPcPTntz19fvPgwGxrKEhAkdnZ1R0n3FGVHz39BSGBZk1SwFDRCaums5pdHTAQw/F/ODuEQD27o2b/syZ0SnvlVfg8ccjWNTXR/3H0qXwgQ9EhbfqL0SkltR00ICYKKm3t9B8tqcnchl9fTHvxX33RRPahQsjaBxyCBx2WASLTZtiP9VfiEitqOmgMWtWjD7b1hYBY8+eyG1Mnx7Boa8vlp9wQmFokPb2yIWcd15htFwFDBGpFTUdNJqaovIa4Be/iLGoXve6GPrj7LOjWe2MGRE8nngCXn01AsWqVYNrnisiMlGYu492GoZlxYoVvm7dumEdI536tbv7wMrsdJa/PXsieKxapWlaRWT8M7MH3H3FYPer6ZxGqqmp0Iu7WDoirprSiogoaOSiEWpFREJN99MQEZHBUdAQEZHcFDRERCQ3BQ0REclNQUNERHIb9/00zKwd2FDl084FXq3yOUdbrV1zrV0v6JprRXrNh7j7oOcXHfdBYzSY2bqhdIoZz2rtmmvtekHXXCuGe80qnhIRkdwUNEREJDcFjaG5YrQTMApq7Zpr7XpB11wrhnXNqtMQEZHclNMQEZHcFDRERCQ3BY1BMLOVZva0mT1rZhePdnoqxcxeMLNfmdnDZrYuWTbHzG4xs2eSx9mjnc7hMLOrzGyLmT2WWVbyGi38Y/K5P2pmx45eyoeuzDVfamZtyWf9sJmdkVn3l8k1P21m7x2dVA+PmR1sZreb2RNm9riZfTxZPiE/636ud+Q+Z3fXX44/oB54DjgUaAIeAY4Y7XRV6FpfAOYWLfs74OLk+cXA/xntdA7zGk8CjgUeG+gagTOAnwEGHA/cN9rpH8FrvhT4ixLbHpF8xycBS5Pvfv1oX8MQrnkBcGzyfDrw6+TaJuRn3c/1jtjnrJxGfscBz7r78+7eDXwPOHOU01RNZwJXJ8+vBlaPXlKGz93vAl4rWlzuGs8EvuPhXmCWmS2oSkJHUJlrLudM4Hvuvtfd1wPPEv8D44q7b3b3B5PnO4EngVYm6Gfdz/WWM+jPWUEjv1bgpczrjfT/YYxnDtxsZg+Y2fnJsvnuvjl5/jIwf3SSVlHlrnGif/YXJkUxV2WKHSfcNZvZEuAY4D5q4LMuul4Yoc9ZQUNKOcHdjwVOBy4ws5OyKz3ytRO6rXYtXGPicmAZcDSwGfjyqKamQsxsGnAtcJG778ium4ifdYnrHbHPWUEjvzbg4MzrRcmyCcfd25LHLcCPiezqK2k2PXncMnoprJhy1zhhP3t3f8Xde929D7iSQtHEhLlmM2skbqD/6u7XJYsn7Gdd6npH8nNW0Mjvl8BhZrbUzJqADwE3jnKaRpyZTTWz6elz4D3AY8S1nptsdi5ww+iksKLKXeONwEeSljXHA9szRRvjWlF5/VnEZw1xzR8ys0lmthQ4DLi/2ukbLjMz4JvAk+7+95lVE/KzLne9I/o5j3Zt/3j6I1pW/JpoYfDZ0U5Pha7xUKI1xSPA4+l1AgcBtwLPAD8H5ox2Wod5ndcQ2fR9RDnuR8tdI9GS5mvJ5/4rYMVop38Er/m7yTU9mtxAFmS2/2xyzU8Dp492+od4zScQRU+PAg8nf2dM1M+6n+sdsc9Zw4iIiEhuKp4SEZHcFDRERCQ3BQ0REclNQUNERHJT0BARkdwUNEREJDcFDakpZtabDA39uJk9YmafMLO6ZN3JZrY9M3z0w2b2rqL9HjOzH5pZcwXO0dnPMS9LhrauM7PJZvaUmR2VWf9JM/vGSL1PIuU0jHYCRKpst7sfDWBm84B/A2YAlyTr73b3VQPs96/A/wD+vsR2wzlHSUnAOYsYWO6d7n67mV0EfD0ZF2xhkp4VeY8pMlTKaUjN8hhb63xi9E8bxK53A6+v8DmyTiZ6518OnJMcdy3Ru/sjwD8Al7r7tiEeXyQ35TSkprn782ZWD8xLFp1oZg9nNnm/uz+XvjCzBmL037WVOkcJ5xBDgNwA/K2ZNbr7PuAiYpygZ9z9u3nTIzIcChoi+ytXdDQlc6O/mxgUbqTPcYBkcMwzgD93951mdh/wXuAmd99kZrcBNw0jLSKDoqAhNc3MDgV6iaGx39jPpr+pp6jgOUp5LzAL+FVSutUM7KYQKPqSP5GqUNCQmmVmLcD/A77q7j70KoeKnuMc4I/c/ZrkeFOB9WbW7O5dI5takYEpaEitSYuZGoEeYsjobCuo4vqGL7j7j6p0jmYz25hZ/nVgJdEyCgB332Vm9wC/A3x/kOkSGTYNjS4iIrmpya2IiOSm4imRITCzdOa3Yqe5+9Zqp0ekWlQ8JSIiual4SkREclPQEBGR3BQ0REQkNwUNERHJ7f8D+SzdU+UnvbYAAAAASUVORK5CYII=\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], "source": [ "#Check if there is a relationship between the how late the flight leaves and how late the flight arrives\n", "delays_df.plot(\n", @@ -118,9 +136,7 @@ }, { "cell_type": "markdown", - "execution_count": null, "metadata": {}, - "outputs": [], "source": [ "The scatter plot allows us to see there is no correlation between distance and arrival delay but there is a strong correlation between departure delay and arrival delay.\n" ] @@ -142,7 +158,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.7.3" + "version": "3.8.5-final" } }, "nbformat": 4, From afa6185302dd9fd472605a5a0429999a2e482e2a Mon Sep 17 00:00:00 2001 From: Timothy Wangwe Date: Mon, 21 Dec 2020 16:06:49 +0300 Subject: [PATCH 10/19] Update markdown, refactor code. --- .../09 - Removing rows.ipynb | 410 ++---------------- 1 file changed, 36 insertions(+), 374 deletions(-) diff --git a/source/week-5/handling-duplicates-and-rows-with-missing-values/09 - Removing rows.ipynb b/source/week-5/handling-duplicates-and-rows-with-missing-values/09 - Removing rows.ipynb index 49272da..5559655 100644 --- a/source/week-5/handling-duplicates-and-rows-with-missing-values/09 - Removing rows.ipynb +++ b/source/week-5/handling-duplicates-and-rows-with-missing-values/09 - Removing rows.ipynb @@ -6,8 +6,7 @@ "source": [ "# Handling duplicate rows and rows with missing values\n", "\n", - "Most machine learning algorithms will return an error if they encounter a missing value. So, you often have to remove rows with missing values from your DataFrame.\n", - "\n", + "Most machine learning algorithms will return an error if they encounter a missing value. So, you often have to remove rows with missing values from your DataFrame. \n", "To learn how, we need to create a pandas DataFrame and load it with data." ] }, @@ -33,144 +32,8 @@ "metadata": {}, "outputs": [ { + "output_type": "execute_result", "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
FL_DATEOP_UNIQUE_CARRIERTAIL_NUMOP_CARRIER_FL_NUMORIGINDESTCRS_DEP_TIMEDEP_TIMEDEP_DELAYCRS_ARR_TIMEARR_TIMEARR_DELAYCRS_ELAPSED_TIMEACTUAL_ELAPSED_TIMEAIR_TIMEDISTANCE
02018-10-01WNN221WN802ABQBWI905903.0-2.014501433.0-17.0225210.0197.01670
12018-10-01WNN8329B3744ABQBWI15001458.0-2.020452020.0-25.0225202.0191.01670
22018-10-01WNN920WN1019ABQDAL18001802.02.020452032.0-13.010590.080.0580
32018-10-01WNN480WN1499ABQDAL950947.0-3.012351223.0-12.010596.081.0580
42018-10-01WNN227WN3635ABQDAL11501151.01.014301423.0-7.010092.080.0580
\n", - "
" - ], "text/plain": [ " FL_DATE OP_UNIQUE_CARRIER TAIL_NUM OP_CARRIER_FL_NUM ORIGIN DEST \\\n", "0 2018-10-01 WN N221WN 802 ABQ BWI \n", @@ -192,15 +55,15 @@ "2 105 90.0 80.0 580 \n", "3 105 96.0 81.0 580 \n", "4 100 92.0 80.0 580 " - ] + ], + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
FL_DATEOP_UNIQUE_CARRIERTAIL_NUMOP_CARRIER_FL_NUMORIGINDESTCRS_DEP_TIMEDEP_TIMEDEP_DELAYCRS_ARR_TIMEARR_TIMEARR_DELAYCRS_ELAPSED_TIMEACTUAL_ELAPSED_TIMEAIR_TIMEDISTANCE
02018-10-01WNN221WN802ABQBWI905903.0-2.014501433.0-17.0225210.0197.01670
12018-10-01WNN8329B3744ABQBWI15001458.0-2.020452020.0-25.0225202.0191.01670
22018-10-01WNN920WN1019ABQDAL18001802.02.020452032.0-13.010590.080.0580
32018-10-01WNN480WN1499ABQDAL950947.0-3.012351223.0-12.010596.081.0580
42018-10-01WNN227WN3635ABQDAL11501151.01.014301423.0-7.010092.080.0580
\n
" }, - "execution_count": 2, "metadata": {}, - "output_type": "execute_result" + "execution_count": 2 } ], "source": [ - "delays_df = pd.read_csv('Data/Lots_of_flight_data.csv')\n", + "delays_df = pd.read_csv('./Lots_of_flight_data.csv')\n", "delays_df.head()" ] }, @@ -208,7 +71,8 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "**info** will tell us how many rows are in the DataFrame and for each column how many of those rows contain non-null values. From this we can determine which columns (if any) contain null/missing values" + "`info` will tell us how many rows are in the DataFrame and for each column how many of those rows contain non-null values. \n", + "From this we can determine which columns (if any) contain null/missing values" ] }, { @@ -217,30 +81,10 @@ "metadata": {}, "outputs": [ { - "name": "stdout", "output_type": "stream", + "name": "stdout", "text": [ - "\n", - "RangeIndex: 300000 entries, 0 to 299999\n", - "Data columns (total 16 columns):\n", - "FL_DATE 300000 non-null object\n", - "OP_UNIQUE_CARRIER 300000 non-null object\n", - "TAIL_NUM 299660 non-null object\n", - "OP_CARRIER_FL_NUM 300000 non-null int64\n", - "ORIGIN 300000 non-null object\n", - "DEST 300000 non-null object\n", - "CRS_DEP_TIME 300000 non-null int64\n", - "DEP_TIME 296825 non-null float64\n", - "DEP_DELAY 296825 non-null float64\n", - "CRS_ARR_TIME 300000 non-null int64\n", - "ARR_TIME 296574 non-null float64\n", - "ARR_DELAY 295832 non-null float64\n", - "CRS_ELAPSED_TIME 300000 non-null int64\n", - "ACTUAL_ELAPSED_TIME 295832 non-null float64\n", - "AIR_TIME 295832 non-null float64\n", - "DISTANCE 300000 non-null int64\n", - "dtypes: float64(6), int64(5), object(5)\n", - "memory usage: 30.9+ MB\n" + "\nRangeIndex: 300000 entries, 0 to 299999\nData columns (total 16 columns):\n # Column Non-Null Count Dtype \n--- ------ -------------- ----- \n 0 FL_DATE 300000 non-null object \n 1 OP_UNIQUE_CARRIER 300000 non-null object \n 2 TAIL_NUM 299660 non-null object \n 3 OP_CARRIER_FL_NUM 300000 non-null int64 \n 4 ORIGIN 300000 non-null object \n 5 DEST 300000 non-null object \n 6 CRS_DEP_TIME 300000 non-null int64 \n 7 DEP_TIME 296825 non-null float64\n 8 DEP_DELAY 296825 non-null float64\n 9 CRS_ARR_TIME 300000 non-null int64 \n 10 ARR_TIME 296574 non-null float64\n 11 ARR_DELAY 295832 non-null float64\n 12 CRS_ELAPSED_TIME 300000 non-null int64 \n 13 ACTUAL_ELAPSED_TIME 295832 non-null float64\n 14 AIR_TIME 295832 non-null float64\n 15 DISTANCE 300000 non-null int64 \ndtypes: float64(6), int64(5), object(5)\nmemory usage: 36.6+ MB\n" ] } ], @@ -252,16 +96,11 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "TAIL_NUM, DEP_TIME, DEP_DELAY, ARR_TIME, ARR_DELAY, ACTUAL_ELAPSED_TIME, and AIR_TIME all have rows with missing values." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "There are many techniques to deal with missing values, the simplest is to delete the rows with missing values.\n", + "## Null Values\n", "\n", - "**dropna** will delete rows containing null/missing values" + "`TAIL_NUM`, `DEP_TIME`, `DEP_DELAY`, `ARR_TIME`, `ARR_DELAY`, `ACTUAL_ELAPSED_TIME`, and `AIR_TIME` all have rows with missing values. \n", + "There are many techniques to deal with missing values, the simplest is to delete the rows with missing values. \n", + "`dropna` will delete rows containing null/missing values" ] }, { @@ -272,30 +111,10 @@ }, "outputs": [ { - "name": "stdout", "output_type": "stream", + "name": "stdout", "text": [ - "\n", - "Int64Index: 295832 entries, 0 to 299999\n", - "Data columns (total 16 columns):\n", - "FL_DATE 295832 non-null object\n", - "OP_UNIQUE_CARRIER 295832 non-null object\n", - "TAIL_NUM 295832 non-null object\n", - "OP_CARRIER_FL_NUM 295832 non-null int64\n", - "ORIGIN 295832 non-null object\n", - "DEST 295832 non-null object\n", - "CRS_DEP_TIME 295832 non-null int64\n", - "DEP_TIME 295832 non-null float64\n", - "DEP_DELAY 295832 non-null float64\n", - "CRS_ARR_TIME 295832 non-null int64\n", - "ARR_TIME 295832 non-null float64\n", - "ARR_DELAY 295832 non-null float64\n", - "CRS_ELAPSED_TIME 295832 non-null int64\n", - "ACTUAL_ELAPSED_TIME 295832 non-null float64\n", - "AIR_TIME 295832 non-null float64\n", - "DISTANCE 295832 non-null int64\n", - "dtypes: float64(6), int64(5), object(5)\n", - "memory usage: 32.7+ MB\n" + "\nInt64Index: 295832 entries, 0 to 299999\nData columns (total 16 columns):\n # Column Non-Null Count Dtype \n--- ------ -------------- ----- \n 0 FL_DATE 295832 non-null object \n 1 OP_UNIQUE_CARRIER 295832 non-null object \n 2 TAIL_NUM 295832 non-null object \n 3 OP_CARRIER_FL_NUM 295832 non-null int64 \n 4 ORIGIN 295832 non-null object \n 5 DEST 295832 non-null object \n 6 CRS_DEP_TIME 295832 non-null int64 \n 7 DEP_TIME 295832 non-null float64\n 8 DEP_DELAY 295832 non-null float64\n 9 CRS_ARR_TIME 295832 non-null int64 \n 10 ARR_TIME 295832 non-null float64\n 11 ARR_DELAY 295832 non-null float64\n 12 CRS_ELAPSED_TIME 295832 non-null int64 \n 13 ACTUAL_ELAPSED_TIME 295832 non-null float64\n 14 AIR_TIME 295832 non-null float64\n 15 DISTANCE 295832 non-null int64 \ndtypes: float64(6), int64(5), object(5)\nmemory usage: 38.4+ MB\n" ] } ], @@ -308,9 +127,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "If you don't need to keep the original DataFrame, you can just delete the rows within the existing DataFrame instead of creating a new one\n", - "\n", - "**inplace=*True*** indicates you want to drop the rows in the specified DataFrame" + "If you don't need to keep the original DataFrame, you can just delete the rows within the existing DataFrame instead of creating a new one. `inplace=True` indicates you want to drop the rows in the specified DataFrame" ] }, { @@ -319,30 +136,10 @@ "metadata": {}, "outputs": [ { - "name": "stdout", "output_type": "stream", + "name": "stdout", "text": [ - "\n", - "Int64Index: 295832 entries, 0 to 299999\n", - "Data columns (total 16 columns):\n", - "FL_DATE 295832 non-null object\n", - "OP_UNIQUE_CARRIER 295832 non-null object\n", - "TAIL_NUM 295832 non-null object\n", - "OP_CARRIER_FL_NUM 295832 non-null int64\n", - "ORIGIN 295832 non-null object\n", - "DEST 295832 non-null object\n", - "CRS_DEP_TIME 295832 non-null int64\n", - "DEP_TIME 295832 non-null float64\n", - "DEP_DELAY 295832 non-null float64\n", - "CRS_ARR_TIME 295832 non-null int64\n", - "ARR_TIME 295832 non-null float64\n", - "ARR_DELAY 295832 non-null float64\n", - "CRS_ELAPSED_TIME 295832 non-null int64\n", - "ACTUAL_ELAPSED_TIME 295832 non-null float64\n", - "AIR_TIME 295832 non-null float64\n", - "DISTANCE 295832 non-null int64\n", - "dtypes: float64(6), int64(5), object(5)\n", - "memory usage: 32.7+ MB\n" + "\nInt64Index: 295832 entries, 0 to 299999\nData columns (total 16 columns):\n # Column Non-Null Count Dtype \n--- ------ -------------- ----- \n 0 FL_DATE 295832 non-null object \n 1 OP_UNIQUE_CARRIER 295832 non-null object \n 2 TAIL_NUM 295832 non-null object \n 3 OP_CARRIER_FL_NUM 295832 non-null int64 \n 4 ORIGIN 295832 non-null object \n 5 DEST 295832 non-null object \n 6 CRS_DEP_TIME 295832 non-null int64 \n 7 DEP_TIME 295832 non-null float64\n 8 DEP_DELAY 295832 non-null float64\n 9 CRS_ARR_TIME 295832 non-null int64 \n 10 ARR_TIME 295832 non-null float64\n 11 ARR_DELAY 295832 non-null float64\n 12 CRS_ELAPSED_TIME 295832 non-null int64 \n 13 ACTUAL_ELAPSED_TIME 295832 non-null float64\n 14 AIR_TIME 295832 non-null float64\n 15 DISTANCE 295832 non-null int64 \ndtypes: float64(6), int64(5), object(5)\nmemory usage: 38.4+ MB\n" ] } ], @@ -355,6 +152,8 @@ "cell_type": "markdown", "metadata": {}, "source": [ + "## Duplicate Values\n", + "\n", "When data is loaded from multiple data sources you sometimes end up with duplicate records. " ] }, @@ -364,66 +163,8 @@ "metadata": {}, "outputs": [ { + "output_type": "execute_result", "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
NameCityCountry
0Seattle-TacomaSeattleUSA
1DullesWashingtonUSA
2DullesWashingtonUSA
3HeathrowLondonUnited Kingdom
4SchipholAmsterdamNetherlands
\n", - "
" - ], "text/plain": [ " Name City Country\n", "0 Seattle-Tacoma Seattle USA\n", @@ -431,15 +172,15 @@ "2 Dulles Washington USA\n", "3 Heathrow London United Kingdom\n", "4 Schiphol Amsterdam Netherlands" - ] + ], + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
NameCityCountry
0Seattle-TacomaSeattleUSA
1DullesWashingtonUSA
2DullesWashingtonUSA
3HeathrowLondonUnited Kingdom
4SchipholAmsterdamNetherlands
\n
" }, - "execution_count": 6, "metadata": {}, - "output_type": "execute_result" + "execution_count": 6 } ], "source": [ - "airports_df = pd.read_csv('Data/airportsDuplicateRows.csv')\n", + "airports_df = pd.read_csv('./airportsDuplicateRows.csv')\n", "airports_df.head()" ] }, @@ -447,9 +188,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "use **duplicates** to find the duplicate rows.\n", - "\n", - "If a row is a duplicate of a previous row it returns **True**" + "Use `DataFrame.duplicated()` to find the duplicate rows. If a row is a duplicate of a previous row it returns `True`." ] }, { @@ -458,6 +197,7 @@ "metadata": {}, "outputs": [ { + "output_type": "execute_result", "data": { "text/plain": [ "0 False\n", @@ -471,9 +211,8 @@ "dtype: bool" ] }, - "execution_count": 7, "metadata": {}, - "output_type": "execute_result" + "execution_count": 7 } ], "source": [ @@ -484,7 +223,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "**drop_duplicates** will delete the duplicate rows" + "`drop_duplicates` will delete the duplicate rows" ] }, { @@ -493,78 +232,8 @@ "metadata": {}, "outputs": [ { + "output_type": "execute_result", "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
NameCityCountry
0Seattle-TacomaSeattleUSA
1DullesWashingtonUSA
3HeathrowLondonUnited Kingdom
4SchipholAmsterdamNetherlands
5ChangiSingaporeSingapore
6PearsonTorontoCanada
7NaritaTokyoJapan
\n", - "
" - ], "text/plain": [ " Name City Country\n", "0 Seattle-Tacoma Seattle USA\n", @@ -574,24 +243,17 @@ "5 Changi Singapore Singapore\n", "6 Pearson Toronto Canada\n", "7 Narita Tokyo Japan" - ] + ], + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
NameCityCountry
0Seattle-TacomaSeattleUSA
1DullesWashingtonUSA
3HeathrowLondonUnited Kingdom
4SchipholAmsterdamNetherlands
5ChangiSingaporeSingapore
6PearsonTorontoCanada
7NaritaTokyoJapan
\n
" }, - "execution_count": 8, "metadata": {}, - "output_type": "execute_result" + "execution_count": 8 } ], "source": [ "airports_df.drop_duplicates(inplace=True)\n", "airports_df" ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] } ], "metadata": { @@ -610,9 +272,9 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.6.9" + "version": "3.8.5-final" } }, "nbformat": 4, "nbformat_minor": 2 -} +} \ No newline at end of file From 1db58fb9bfadbb049fb445be2999a957351d0567 Mon Sep 17 00:00:00 2001 From: Timothy Wangwe Date: Tue, 22 Dec 2020 08:50:58 +0300 Subject: [PATCH 11/19] Update numpy markdown, optimize code. --- .../14 - Working with numpy and pandas.ipynb | 457 +++--------------- source/week-5/numpy-pandas/README.md | 11 +- 2 files changed, 68 insertions(+), 400 deletions(-) diff --git a/source/week-5/numpy-pandas/14 - Working with numpy and pandas.ipynb b/source/week-5/numpy-pandas/14 - Working with numpy and pandas.ipynb index 03456ae..a48cc53 100644 --- a/source/week-5/numpy-pandas/14 - Working with numpy and pandas.ipynb +++ b/source/week-5/numpy-pandas/14 - Working with numpy and pandas.ipynb @@ -1,25 +1,34 @@ { "cells": [ + { + "source": [ + "# NumPy Arrays\n", + "\n", + "A **numpy array** is a grid of values, all of the same type, and is indexed by a tuple of *nonnegative integers*. The number of dimensions is the **rank** of the array; the **shape** of an array is a tuple of integers giving the **size** of the array along each **dimension**." + ], + "cell_type": "markdown", + "metadata": {} + }, { "cell_type": "markdown", "metadata": {}, "source": [ - "# Moving data from numpy arrays to pandas DataFrames\n", - "In our last notebook we trained a model and compared our actual and predicted results\n", + "## Moving data from Numpy arrays to Pandas DataFrames\n", "\n", - "What may not have been evident was when we did this we were working with two different objects: a **numpy array** and a **pandas DataFrame**" + "In our last notebook we trained a model and compared our actual and predicted results. \n", + "What may not have been evident was when we did this we were working with two different objects: a **numpy array** and a **pandas DataFrame**." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "To explore further let's rerun the code from the previous notebook to create a trained model and get predicted values for our test data" + "To explore further let's rerun the code from the previous notebook to create a trained model and get predicted values for our test data." ] }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 1, "metadata": {}, "outputs": [], "source": [ @@ -30,12 +39,12 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "# Load our data from the csv file\n", - "delays_df = pd.read_csv('Data/Lots_of_flight_data.csv') \n", + "delays_df = pd.read_csv('./Lots_of_flight_data.csv') \n", "\n", "# Remove rows with null values since those will crash our linear regression model training\n", "delays_df.dropna(inplace=True)\n", @@ -61,15 +70,16 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "In the last Notebook, you might have noticed the output displays differently when you display the contents of the predicted values in y_pred and the actual values in y_test" + "In the last Notebook, you might have noticed the output displays differently when you display the contents of the predicted values in `y_pred` and the actual values in `y_test`." ] }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 3, "metadata": {}, "outputs": [ { + "output_type": "execute_result", "data": { "text/plain": [ "array([[3.47739078],\n", @@ -81,9 +91,8 @@ " [5.66255414]])" ] }, - "execution_count": 5, "metadata": {}, - "output_type": "execute_result" + "execution_count": 3 } ], "source": [ @@ -92,283 +101,12 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 4, "metadata": {}, "outputs": [ { + "output_type": "execute_result", "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
ARR_DELAY
291483-5.0
98997-12.0
23454-9.0
110802-14.0
49449-20.0
9494414.0
160885-17.0
47572-20.0
16480020.0
62578-9.0
1967425.0
911660.0
171564-9.0
607066.0
240773-6.0
32695-13.0
98399-23.0
167341-11.0
126191-4.0
188715131.0
258610-5.0
215751-20.0
41210-15.0
68090-19.0
1407940.0
178840-14.0
24807121.0
127705.0
9594840.0
172913-13.0
......
20079721.0
361990.0
70402-37.0
285308152.0
201508-2.0
154671-5.0
238535-5.0
133567-9.0
3349-8.0
257254-28.0
106572-19.0
73023-25.0
214699-12.0
274435-7.0
67089-10.0
269917-4.0
16496670.0
275120-12.0
139292-8.0
31106-25.0
27779917.0
293749-7.0
23111435.0
11645-15.0
252520-12.0
209898-20.0
22210-9.0
165727-6.0
260838-33.0
1925460.0
\n", - "

88750 rows × 1 columns

\n", - "
" - ], "text/plain": [ " ARR_DELAY\n", "291483 -5.0\n", @@ -384,11 +122,11 @@ "192546 0.0\n", "\n", "[88750 rows x 1 columns]" - ] + ], + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
ARR_DELAY
291483-5.0
98997-12.0
23454-9.0
110802-14.0
49449-20.0
......
209898-20.0
22210-9.0
165727-6.0
260838-33.0
1925460.0
\n

88750 rows × 1 columns

\n
" }, - "execution_count": 6, "metadata": {}, - "output_type": "execute_result" + "execution_count": 4 } ], "source": [ @@ -399,23 +137,23 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "Use **type()** to check the datatype of an object." + "Use `type()` to check the datatype of an object." ] }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 5, "metadata": {}, "outputs": [ { + "output_type": "execute_result", "data": { "text/plain": [ "numpy.ndarray" ] }, - "execution_count": 7, "metadata": {}, - "output_type": "execute_result" + "execution_count": 5 } ], "source": [ @@ -424,18 +162,18 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 6, "metadata": {}, "outputs": [ { + "output_type": "execute_result", "data": { "text/plain": [ "pandas.core.frame.DataFrame" ] }, - "execution_count": 8, "metadata": {}, - "output_type": "execute_result" + "execution_count": 6 } ], "source": [ @@ -446,28 +184,27 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "* **y_pred** is a numpy array\n", - "* **y_test** is a pandas DataFrame\n", + "- `y_pred` is a numpy array\n", + "- `y_test` is a pandas DataFrame\n", "\n", - "Another way you might discover this is if you try to use the **head** method on **y_pred**. \n", - "\n", - "This will return an error, because **head** is a method of the DataFrame class it is not a method of numpy arrays" + "Another way you might discover this is if you try to use the `head` method on `y_pred`. \n", + "This will return an error, because `head` is a method of the DataFrame class it is not a method of numpy arrays." ] }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 7, "metadata": {}, "outputs": [ { + "output_type": "error", "ename": "AttributeError", "evalue": "'numpy.ndarray' object has no attribute 'head'", - "output_type": "error", "traceback": [ - "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[1;31mAttributeError\u001b[0m Traceback (most recent call last)", - "\u001b[1;32m\u001b[0m in \u001b[0;36m\u001b[1;34m\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m \u001b[0my_pred\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mhead\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m", - "\u001b[1;31mAttributeError\u001b[0m: 'numpy.ndarray' object has no attribute 'head'" + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mAttributeError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0my_pred\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mhead\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", + "\u001b[0;31mAttributeError\u001b[0m: 'numpy.ndarray' object has no attribute 'head'" ] } ], @@ -484,15 +221,14 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 8, "metadata": {}, "outputs": [ { - "name": "stdout", "output_type": "stream", + "name": "stdout", "text": [ - "['Pearson' 'Changi' 'Narita']\n", - "Narita\n" + "['Pearson' 'Changi' 'Narita']\nNarita\n" ] } ], @@ -505,18 +241,14 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 9, "metadata": {}, "outputs": [ { - "name": "stdout", "output_type": "stream", + "name": "stdout", "text": [ - "0 Pearson\n", - "1 Changi\n", - "2 Narita\n", - "dtype: object\n", - "Narita\n" + "0 Pearson\n1 Changi\n2 Narita\ndtype: object\nNarita\n" ] } ], @@ -535,17 +267,14 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 10, "metadata": {}, "outputs": [ { - "name": "stdout", "output_type": "stream", + "name": "stdout", "text": [ - "[['YYZ' 'Pearson']\n", - " ['SIN' 'Changi']\n", - " ['NRT' 'Narita']]\n", - "YYZ\n" + "[['YYZ' 'Pearson']\n ['SIN' 'Changi']\n ['NRT' 'Narita']]\nYYZ\n" ] } ], @@ -560,18 +289,14 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 11, "metadata": {}, "outputs": [ { - "name": "stdout", "output_type": "stream", + "name": "stdout", "text": [ - " 0 1\n", - "0 YYZ Pearson\n", - "1 SIN Changi\n", - "2 NRT Narita\n", - "YYZ\n" + " 0 1\n0 YYZ Pearson\n1 SIN Changi\n2 NRT Narita\nYYZ\n" ] } ], @@ -585,67 +310,18 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "If you need the functionality of a DataFrame, you can move data from numpy objects to pandas objects and vice-versa.\n", - "\n", - "In the example below we use the DataFrame constructor to read the contents of the numpy array *y_pred* into a DataFrame called *predicted_df*\n", - "\n", - "Then we can use the functionality of the DataFrame object" + "If you need the functionality of a DataFrame, you can move data from numpy objects to pandas objects and vice-versa. \n", + "In the example below we use the DataFrame constructor to read the contents of the numpy array `y_pred` into a DataFrame called `predicted_df`. Then we can use the functionality of the DataFrame object." ] }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 12, "metadata": {}, "outputs": [ { + "output_type": "execute_result", "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
0
03.477391
15.890559
24.332885
33.447476
45.072394
\n", - "
" - ], "text/plain": [ " 0\n", "0 3.477391\n", @@ -653,24 +329,17 @@ "2 4.332885\n", "3 3.447476\n", "4 5.072394" - ] + ], + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
0
03.477391
15.890559
24.332885
33.447476
45.072394
\n
" }, - "execution_count": 14, "metadata": {}, - "output_type": "execute_result" + "execution_count": 12 } ], "source": [ "predicted_df = pd.DataFrame(y_pred)\n", "predicted_df.head()" ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] } ], "metadata": { @@ -689,9 +358,9 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.6.9" + "version": "3.8.5-final" } }, "nbformat": 4, "nbformat_minor": 2 -} +} \ No newline at end of file diff --git a/source/week-5/numpy-pandas/README.md b/source/week-5/numpy-pandas/README.md index 2182364..57acae8 100644 --- a/source/week-5/numpy-pandas/README.md +++ b/source/week-5/numpy-pandas/README.md @@ -1,22 +1,21 @@ -# NumPy vs pandas - -There are numerous libraries available for use for data scientists. NumPy and pandas are two of the most common. +# NumPy vs Pandas +There are numerous libraries available for use for data scientists. NumPy and Pandas are two of the most common. Some operations may return different data types. You can use the Python function [type](https://docs.python.org/3/library/functions.html#type) to determine the type of an object. ## NumPy [NumPy](https://numpy.org/) is a Python package for scientific computing that includes a array and dictionary type objects for data analysis. -### Common object +### Common Object - [array](https://numpy.org/doc/1.18/reference/generated/numpy.array.html?highlight=array#numpy.array) creates an N-dimensional array object -## pandas +## Pandas [pandas](https://pandas.pydata.org/) is a Python package for data analysis that includes a 1 dimensional and 2 dimensional array objects -### Common objects +### Common Objects - [Series](https://pandas.pydata.org/docs/reference/api/pandas.Series.html) stores a one dimensional array - [DataFrame](https://pandas.pydata.org/docs/reference/frame.html) stores a two-dimensional array From 8a4a6204b7b53c543696bf95bd098a3aaa8bf866 Mon Sep 17 00:00:00 2001 From: Timothy Wangwe Date: Wed, 23 Dec 2020 11:19:03 +0300 Subject: [PATCH 12/19] More details on the drop function --- .../08 - Removing columns.ipynb | 477 ++---------------- .../README.md | 3 + 2 files changed, 47 insertions(+), 433 deletions(-) diff --git a/source/week-5/removing-and-splitting-dataFrame-columns/08 - Removing columns.ipynb b/source/week-5/removing-and-splitting-dataFrame-columns/08 - Removing columns.ipynb index 3133f08..f28a88b 100644 --- a/source/week-5/removing-and-splitting-dataFrame-columns/08 - Removing columns.ipynb +++ b/source/week-5/removing-and-splitting-dataFrame-columns/08 - Removing columns.ipynb @@ -4,15 +4,9 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# Removing and splitting pandas DataFrame columns" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "When you are preparing to train machine learning models, you often need to delete specific columns, or split certain columns from your DataFrame into a new DataFrame.\n", + "# Removing and splitting pandas DataFrame columns\n", "\n", + "When you are preparing to train machine learning models, you often need to delete specific columns, or split certain columns from your DataFrame into a new DataFrame. \n", "We need the pandas library and a DataFrame to explore" ] }, @@ -38,144 +32,8 @@ "metadata": {}, "outputs": [ { + "output_type": "execute_result", "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
FL_DATEOP_UNIQUE_CARRIERTAIL_NUMOP_CARRIER_FL_NUMORIGINDESTCRS_DEP_TIMEDEP_TIMEDEP_DELAYCRS_ARR_TIMEARR_TIMEARR_DELAYCRS_ELAPSED_TIMEACTUAL_ELAPSED_TIMEAIR_TIMEDISTANCE
02018-10-01WNN221WN802ABQBWI905903-214501433-172252101971670
12018-10-01WNN8329B3744ABQBWI15001458-220452020-252252021911670
22018-10-01WNN920WN1019ABQDAL18001802220452032-131059080580
32018-10-01WNN480WN1499ABQDAL950947-312351223-121059681580
42018-10-01WNN227WN3635ABQDAL11501151114301423-71009280580
\n", - "
" - ], "text/plain": [ " FL_DATE OP_UNIQUE_CARRIER TAIL_NUM OP_CARRIER_FL_NUM ORIGIN DEST \\\n", "0 2018-10-01 WN N221WN 802 ABQ BWI \n", @@ -197,15 +55,15 @@ "2 105 90 80 580 \n", "3 105 96 81 580 \n", "4 100 92 80 580 " - ] + ], + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
FL_DATEOP_UNIQUE_CARRIERTAIL_NUMOP_CARRIER_FL_NUMORIGINDESTCRS_DEP_TIMEDEP_TIMEDEP_DELAYCRS_ARR_TIMEARR_TIMEARR_DELAYCRS_ELAPSED_TIMEACTUAL_ELAPSED_TIMEAIR_TIMEDISTANCE
02018-10-01WNN221WN802ABQBWI905903-214501433-172252101971670
12018-10-01WNN8329B3744ABQBWI15001458-220452020-252252021911670
22018-10-01WNN920WN1019ABQDAL18001802220452032-131059080580
32018-10-01WNN480WN1499ABQDAL950947-312351223-121059681580
42018-10-01WNN227WN3635ABQDAL11501151114301423-71009280580
\n
" }, - "execution_count": 2, "metadata": {}, - "output_type": "execute_result" + "execution_count": 2 } ], "source": [ - "delays_df = pd.read_csv('Data/flight_delays.csv')\n", + "delays_df = pd.read_csv('./flight_delays.csv')\n", "delays_df.head()" ] }, @@ -217,12 +75,11 @@ "\n", "When you are preparing your data for machine learning, you may need to delete specific columns from the DataFrame before training the model.\n", "\n", - "For example:\n", - "Imagine you are training a model to predict how many minutes late a flight will be (ARR_DELAY)\n", - "\n", - "If the model knew the scheduled arrival time (CRS_ARR_TIME) and the actual arrival time (ARR_TIME), the model would quickly figure out ARR_DELAY = ARR_TIME - CRS_ARR_TIME\n", + "For example: \n", + "Imagine you are training a model to predict how many minutes late a flight will be (`ARR_DELAY`)\n", + "If the model knew the scheduled arrival time (`CRS_ARR_TIME`) and the actual arrival time (`ARR_TIME`), the model would quickly figure out `ARR_DELAY = ARR_TIME - CRS_ARR_TIME`\n", "\n", - "When we predict arrival times for future flights, we won't have a value for arrival time (ARR_TIME). So we should remove this column from the DataFrame so it is not used as a feature when training the model to predict ARR_DELAY. " + "When we predict arrival times for future flights, we won't have a value for arrival time (`ARR_TIME`). So we should remove this column from the DataFrame so it is not used as a feature when training the model to predict `ARR_DELAY`. " ] }, { @@ -231,138 +88,8 @@ "metadata": {}, "outputs": [ { + "output_type": "execute_result", "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
FL_DATEOP_UNIQUE_CARRIERTAIL_NUMOP_CARRIER_FL_NUMORIGINDESTCRS_DEP_TIMEDEP_TIMEDEP_DELAYCRS_ARR_TIMEARR_DELAYCRS_ELAPSED_TIMEACTUAL_ELAPSED_TIMEAIR_TIMEDISTANCE
02018-10-01WNN221WN802ABQBWI905903-21450-172252101971670
12018-10-01WNN8329B3744ABQBWI15001458-22045-252252021911670
22018-10-01WNN920WN1019ABQDAL1800180222045-131059080580
32018-10-01WNN480WN1499ABQDAL950947-31235-121059681580
42018-10-01WNN227WN3635ABQDAL1150115111430-71009280580
\n", - "
" - ], "text/plain": [ " FL_DATE OP_UNIQUE_CARRIER TAIL_NUM OP_CARRIER_FL_NUM ORIGIN DEST \\\n", "0 2018-10-01 WN N221WN 802 ABQ BWI \n", @@ -384,15 +111,15 @@ "2 105 90 80 580 \n", "3 105 96 81 580 \n", "4 100 92 80 580 " - ] + ], + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
FL_DATEOP_UNIQUE_CARRIERTAIL_NUMOP_CARRIER_FL_NUMORIGINDESTCRS_DEP_TIMEDEP_TIMEDEP_DELAYCRS_ARR_TIMEARR_DELAYCRS_ELAPSED_TIMEACTUAL_ELAPSED_TIMEAIR_TIMEDISTANCE
02018-10-01WNN221WN802ABQBWI905903-21450-172252101971670
12018-10-01WNN8329B3744ABQBWI15001458-22045-252252021911670
22018-10-01WNN920WN1019ABQDAL1800180222045-131059080580
32018-10-01WNN480WN1499ABQDAL950947-31235-121059681580
42018-10-01WNN227WN3635ABQDAL1150115111430-71009280580
\n
" }, - "execution_count": 3, "metadata": {}, - "output_type": "execute_result" + "execution_count": 3 } ], "source": [ - "# Remove the column ARR_TIME from the DataFrane delays_df\n", + "# Remove the column ARR_TIME from the DataFrame delays_df\n", "\n", "#delays_df = delays_df.drop(['ARR_TIME'],axis=1)\n", "new_df = delays_df.drop(columns=['ARR_TIME'])\n", @@ -412,138 +139,8 @@ "metadata": {}, "outputs": [ { + "output_type": "execute_result", "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
FL_DATEOP_UNIQUE_CARRIERTAIL_NUMOP_CARRIER_FL_NUMORIGINDESTCRS_DEP_TIMEDEP_TIMEDEP_DELAYCRS_ARR_TIMEARR_DELAYCRS_ELAPSED_TIMEACTUAL_ELAPSED_TIMEAIR_TIMEDISTANCE
02018-10-01WNN221WN802ABQBWI905903-21450-172252101971670
12018-10-01WNN8329B3744ABQBWI15001458-22045-252252021911670
22018-10-01WNN920WN1019ABQDAL1800180222045-131059080580
32018-10-01WNN480WN1499ABQDAL950947-31235-121059681580
42018-10-01WNN227WN3635ABQDAL1150115111430-71009280580
\n", - "
" - ], "text/plain": [ " FL_DATE OP_UNIQUE_CARRIER TAIL_NUM OP_CARRIER_FL_NUM ORIGIN DEST \\\n", "0 2018-10-01 WN N221WN 802 ABQ BWI \n", @@ -565,11 +162,11 @@ "2 105 90 80 580 \n", "3 105 96 81 580 \n", "4 100 92 80 580 " - ] + ], + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
FL_DATEOP_UNIQUE_CARRIERTAIL_NUMOP_CARRIER_FL_NUMORIGINDESTCRS_DEP_TIMEDEP_TIMEDEP_DELAYCRS_ARR_TIMEARR_DELAYCRS_ELAPSED_TIMEACTUAL_ELAPSED_TIMEAIR_TIMEDISTANCE
02018-10-01WNN221WN802ABQBWI905903-21450-172252101971670
12018-10-01WNN8329B3744ABQBWI15001458-22045-252252021911670
22018-10-01WNN920WN1019ABQDAL1800180222045-131059080580
32018-10-01WNN480WN1499ABQDAL950947-31235-121059681580
42018-10-01WNN227WN3635ABQDAL1150115111430-71009280580
\n
" }, - "execution_count": 4, "metadata": {}, - "output_type": "execute_result" + "execution_count": 4 } ], "source": [ @@ -584,22 +181,36 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "We use different techniques to predict based on quantititative values which are usually numeric values (e.g. distance, number of minutes, weight) and qualitative (descriptive) values which may not be numeric (e.g. what airport a flight left from, what airline operated the flight)\n", - "\n", - "Quantitative data may be moved into a separate DataFrame before training a model.\n", - "\n", - "You also need to put the value you want to predict, called the label (ARR_DELAY) in a separate DataFrame from the values you think can help you make the prediction, called the features\n", - "\n", + "We use different techniques to predict based on quantititative values which are usually numeric values (e.g. distance, number of minutes, weight) and qualitative (descriptive) values which may not be numeric (e.g. what airport a flight left from, what airline operated the flight). \n", + "Quantitative data may be moved into a separate DataFrame before training a model. \n", + "You also need to put the value you want to predict, called the label (`ARR_DELAY`) in a separate DataFrame from the values you think can help you make the prediction, called the **features**. \n", "We need to be able to create a new dataframe from the columns in an existing dataframe" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 5, "metadata": { "scrolled": true }, - "outputs": [], + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " ORIGIN DEST OP_CARRIER_FL_NUM OP_UNIQUE_CARRIER TAIL_NUM\n", + "0 ABQ BWI 802 WN N221WN\n", + "1 ABQ BWI 3744 WN N8329B\n", + "2 ABQ DAL 1019 WN N920WN\n", + "3 ABQ DAL 1499 WN N480WN\n", + "4 ABQ DAL 3635 WN N227WN" + ], + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
ORIGINDESTOP_CARRIER_FL_NUMOP_UNIQUE_CARRIERTAIL_NUM
0ABQBWI802WNN221WN
1ABQBWI3744WNN8329B
2ABQDAL1019WNN920WN
3ABQDAL1499WNN480WN
4ABQDAL3635WNN227WN
\n
" + }, + "metadata": {}, + "execution_count": 5 + } + ], "source": [ "# Create a new DataFrame called desc_df\n", "# include all rows\n", @@ -626,9 +237,9 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.6.9" + "version": "3.8.5-final" } }, "nbformat": 4, "nbformat_minor": 2 -} +} \ No newline at end of file diff --git a/source/week-5/removing-and-splitting-dataFrame-columns/README.md b/source/week-5/removing-and-splitting-dataFrame-columns/README.md index 539d8f0..adeca5c 100644 --- a/source/week-5/removing-and-splitting-dataFrame-columns/README.md +++ b/source/week-5/removing-and-splitting-dataFrame-columns/README.md @@ -5,6 +5,9 @@ When preparing data for machine learning you may need to remove specific columns ## Common functions - [drop](https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.drop.html) deletes specified columns from a DataFrame + - `inplace` + - If `False`, returns a modified copy of the dataframe + - If `True`, it modifies the original dataframe ## Microsoft Learn Resources From 79504b80a63f14fa162c85e18025a36be3a39c82 Mon Sep 17 00:00:00 2001 From: Timothy Wangwe Date: Wed, 23 Dec 2020 12:18:07 +0300 Subject: [PATCH 13/19] Explain train_test_split returns --- .../10 - Train Test split.ipynb | 320 +++--------------- .../README.md | 9 + 2 files changed, 62 insertions(+), 267 deletions(-) diff --git a/source/week-5/splitting-test-and-training-data-with-scikit-learn/10 - Train Test split.ipynb b/source/week-5/splitting-test-and-training-data-with-scikit-learn/10 - Train Test split.ipynb index 7739ac5..3d14a6a 100644 --- a/source/week-5/splitting-test-and-training-data-with-scikit-learn/10 - Train Test split.ipynb +++ b/source/week-5/splitting-test-and-training-data-with-scikit-learn/10 - Train Test split.ipynb @@ -5,11 +5,10 @@ "metadata": {}, "source": [ "# Splitting test and training data\n", - "When you train a data model you may need to split up your data into test and training data sets\n", "\n", - "To accomplish this task we will use the [scikit-learn](https://scikit-learn.org/stable/) library\n", - "\n", - "scikit-learn is an open source, BSD licensed library for data science for preprocessing and training models." + "When you train a data model you may need to split up your data into test and training data sets. \n", + "To accomplish this task we will use the **[scikit-learn](https://scikit-learn.org/stable/) library**. \n", + "Scikit-learn is an open source, BSD licensed library for data science for preprocessing and training models." ] }, { @@ -32,9 +31,8 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "Let's load our csv file with information about flights and flight delays\n", - "\n", - "Use **shape** to find out how many rows and columns are in the original DataFrame" + "Let's load our csv file with information about flights and flight delays. \n", + "Use `shape` to find out how many rows and columns are in the original DataFrame." ] }, { @@ -43,18 +41,18 @@ "metadata": {}, "outputs": [ { + "output_type": "execute_result", "data": { "text/plain": [ "(300000, 16)" ] }, - "execution_count": 2, "metadata": {}, - "output_type": "execute_result" + "execution_count": 2 } ], "source": [ - "delays_df = pd.read_csv('Data/Lots_of_flight_data.csv')\n", + "delays_df = pd.read_csv('./Lots_of_flight_data.csv')\n", "delays_df.shape" ] }, @@ -63,8 +61,8 @@ "metadata": {}, "source": [ "## Split data into features and labels\n", - "Create a DataFrame called X containing only the features we want to use to train our model.\n", "\n", + "Create a DataFrame called X containing only the features we want to use to train our model. \n", "**Note** You can only use numeric values as features, if you have non-numeric values you must apply different techniques such as Hot Encoding to convert these into numeric values before using them as features to train a model. Check out Data Science courses for more information on these techniques!" ] }, @@ -74,60 +72,8 @@ "metadata": {}, "outputs": [ { + "output_type": "execute_result", "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
DISTANCECRS_ELAPSED_TIME
01670225
11670225
2580105
3580105
4580100
\n", - "
" - ], "text/plain": [ " DISTANCE CRS_ELAPSED_TIME\n", "0 1670 225\n", @@ -135,14 +81,15 @@ "2 580 105\n", "3 580 105\n", "4 580 100" - ] + ], + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
DISTANCECRS_ELAPSED_TIME
01670225
11670225
2580105
3580105
4580100
\n
" }, - "execution_count": 3, "metadata": {}, - "output_type": "execute_result" + "execution_count": 3 } ], "source": [ + "# Features\n", "X = delays_df.loc[:,['DISTANCE', 'CRS_ELAPSED_TIME']]\n", "X.head()" ] @@ -151,9 +98,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "Create a DataFrame called y containing only the value we want to predict with our model. \n", - "\n", - "In our case we want to predict how many minutes late a flight will arrive. This information is in the ARR_DELAY column. " + "Create a DataFrame called `y` containing only the value we want to predict with our model. In our case we want to predict how many minutes late a flight will arrive. This information is in the `ARR_DELAY` column. " ] }, { @@ -162,54 +107,8 @@ "metadata": {}, "outputs": [ { + "output_type": "execute_result", "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
ARR_DELAY
0-17.0
1-25.0
2-13.0
3-12.0
4-7.0
\n", - "
" - ], "text/plain": [ " ARR_DELAY\n", "0 -17.0\n", @@ -217,11 +116,11 @@ "2 -13.0\n", "3 -12.0\n", "4 -7.0" - ] + ], + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
ARR_DELAY
0-17.0
1-25.0
2-13.0
3-12.0
4-7.0
\n
" }, - "execution_count": 4, "metadata": {}, - "output_type": "execute_result" + "execution_count": 4 } ], "source": [ @@ -234,11 +133,9 @@ "metadata": {}, "source": [ "## Split into test and training data\n", - "Use **scikitlearn train_test_split** to move 30% of the rows into Test DataFrames\n", - "\n", - "The other 70% of the rows into DataFrames we can use to train our model\n", "\n", - "NOTE: by specifying a value for *random_state* we ensure that if we run the code again the same rows will be moved into the test DataFrame. This makes our results repeatable." + "Use **scikitlearn `train_test_split`** to move 30% of the rows into Test DataFrames, the other 70% of the rows into DataFrames we can use to train our model. \n", + "**NOTE**: by specifying a value for `random_state` we ensure that if we run the code again the same rows will be moved into the test DataFrame. This makes our results repeatable." ] }, { @@ -261,25 +158,23 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "We now have a DataFrame **X_train** which contains 70% of the rows\n", - "\n", - "We will use this DataFrame to train our model" + "We now have a DataFrame `X_train` which contains 70% of the rows, we will use it to train our model." ] }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 6, "metadata": {}, "outputs": [ { + "output_type": "execute_result", "data": { "text/plain": [ "(210000, 2)" ] }, - "execution_count": 7, "metadata": {}, - "output_type": "execute_result" + "execution_count": 6 } ], "source": [ @@ -290,25 +185,23 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "The DataFrame **X_test** contains the remaining 30% of the rows\n", - "\n", - "We will use this DataFrame to test our trained model, so we can check it's accuracy" + "The DataFrame `X_test` contains the remaining 30% of the rows, we will use it to test our trained model, so we can check it's accuracy." ] }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 7, "metadata": {}, "outputs": [ { + "output_type": "execute_result", "data": { "text/plain": [ "(90000, 2)" ] }, - "execution_count": 8, "metadata": {}, - "output_type": "execute_result" + "execution_count": 7 } ], "source": [ @@ -319,71 +212,17 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "**X_train** and **X_test** contain our features\n", - "\n", - "The features are the columns we think can help us predict how late a flight will arrive: **DISTANCE** and **CRS_ELAPSED_TIME**" + "`X_train` and `X_test` contain our features. The features are the columns we think can help us predict how late a flight will arrive: `DISTANCE` and `CRS_ELAPSED_TIME`" ] }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 8, "metadata": {}, "outputs": [ { + "output_type": "execute_result", "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
DISTANCECRS_ELAPSED_TIME
18629523760
127847411111
27474034285
749081005164
11630484100
\n", - "
" - ], "text/plain": [ " DISTANCE CRS_ELAPSED_TIME\n", "186295 237 60\n", @@ -391,11 +230,11 @@ "274740 342 85\n", "74908 1005 164\n", "11630 484 100" - ] + ], + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
DISTANCECRS_ELAPSED_TIME
18629523760
127847411111
27474034285
749081005164
11630484100
\n
" }, - "execution_count": 9, "metadata": {}, - "output_type": "execute_result" + "execution_count": 8 } ], "source": [ @@ -408,9 +247,7 @@ "scrolled": true }, "source": [ - "The DataFrame **y_train** contains 70% of the rows\n", - "\n", - "We will use this DataFrame to train our model" + "The DataFrame `y_train` contains 70% of the rows, we will use it to train our model." ] }, { @@ -418,23 +255,23 @@ "metadata": {}, "source": [ "If you don't need to keep the original DataFrame, you can just delete the rows within the existing DataFrame instead of creating a new one\n", - "**inplace=*True*** indicates you want to drop the rows in the specified DataFrame" + "`inplace=True` indicates you want to drop the rows in the specified DataFrame" ] }, { "cell_type": "code", - "execution_count": 27, + "execution_count": 9, "metadata": {}, "outputs": [ { + "output_type": "execute_result", "data": { "text/plain": [ "(210000, 1)" ] }, - "execution_count": 27, "metadata": {}, - "output_type": "execute_result" + "execution_count": 9 } ], "source": [ @@ -445,25 +282,23 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "The DataFrame **y_test** contains the remaining 30% of the rows\n", - "\n", - "We will use this DataFrame to test our trained model, so we can check it's accuracy" + "The DataFrame `y_test` contains the remaining 30% of the rows, we will use it to test our trained model, so we can check it's accuracy" ] }, { "cell_type": "code", - "execution_count": 28, + "execution_count": 10, "metadata": {}, "outputs": [ { + "output_type": "execute_result", "data": { "text/plain": [ "(90000, 1)" ] }, - "execution_count": 28, "metadata": {}, - "output_type": "execute_result" + "execution_count": 10 } ], "source": [ @@ -474,67 +309,18 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "**y_train** and **y_test** contain our label\n", - "\n", - "The label is the columns we want to predict with our trained model: **ARR_DELAY**\n", - "\n", - "**NOTE:** a negative value for ARR_DELAY indicates a flight arrived early" + "`y_train` and `y_test` contain our labels. Labels are the columns we want to predict with our trained model: `ARR_DELAY` \n", + "**NOTE:** a negative value for `ARR_DELAY` indicates a flight arrived early" ] }, { "cell_type": "code", - "execution_count": 29, + "execution_count": 11, "metadata": {}, "outputs": [ { + "output_type": "execute_result", "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
ARR_DELAY
186295-7.0
127847-16.0
274740-10.0
74908-19.0
11630-13.0
\n", - "
" - ], "text/plain": [ " ARR_DELAY\n", "186295 -7.0\n", @@ -542,11 +328,11 @@ "274740 -10.0\n", "74908 -19.0\n", "11630 -13.0" - ] + ], + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
ARR_DELAY
186295-7.0
127847-16.0
274740-10.0
74908-19.0
11630-13.0
\n
" }, - "execution_count": 29, "metadata": {}, - "output_type": "execute_result" + "execution_count": 11 } ], "source": [ @@ -570,9 +356,9 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.6.9" + "version": "3.8.5-final" } }, "nbformat": 4, "nbformat_minor": 2 -} +} \ No newline at end of file diff --git a/source/week-5/splitting-test-and-training-data-with-scikit-learn/README.md b/source/week-5/splitting-test-and-training-data-with-scikit-learn/README.md index fdc5aa7..2c58b31 100644 --- a/source/week-5/splitting-test-and-training-data-with-scikit-learn/README.md +++ b/source/week-5/splitting-test-and-training-data-with-scikit-learn/README.md @@ -5,6 +5,15 @@ ## Common functions - [train_test_split](https://scikit-learn.org/stable/modules/generated/sklearn.model_selection.train_test_split.html) splits arrays into random train and test subsets + - Main Parameters + - `X` - Dataframe containing only the features you want to use for **training** + - `y` - Dataframe containing only the features you want to **predict** + - `test_size` - If float, should be between 0.0 and 1.0 and represent the proportion of the dataset to include in the test split. + - Returns + - `X_train` - Includes your the independent variables,these will be used to train the model. If we have specify the `test_size = 0.4` for instance, this means 60% of observations from your complete data will be used to train/fit the model and rest 40% will be used to test the model. + - `X_test` - This is remaining 40% portion of the independent variables from the data which will not be used in the training phase and will be used to make predictions to test the accuracy of the model. + - `y_train` - This is your dependent variable which needs to be predicted by this model, this includes category labels against your independent variables, we need to specify our dependent variable while training/fitting the model. + - `y_test` - This data has category labels for your test data, these labels will be used to test the accuracy between actual and predicted categories. ## Microsoft Learn Resources From 8338ab5f5e75639e9d9dd2cbfc914fec28796604 Mon Sep 17 00:00:00 2001 From: Timothy Wangwe Date: Wed, 23 Dec 2020 12:48:41 +0300 Subject: [PATCH 14/19] Update links --- .../11 - Train a basic model.ipynb | 22 +++++++++---------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/source/week-5/train-a-linear-regression-model-with-scikit-learn/11 - Train a basic model.ipynb b/source/week-5/train-a-linear-regression-model-with-scikit-learn/11 - Train a basic model.ipynb index f510507..fdc91f1 100644 --- a/source/week-5/train-a-linear-regression-model-with-scikit-learn/11 - Train a basic model.ipynb +++ b/source/week-5/train-a-linear-regression-model-with-scikit-learn/11 - Train a basic model.ipynb @@ -5,9 +5,9 @@ "metadata": {}, "source": [ "# Train a linear regression model\n", - "When you have your data prepared you can train a model.\n", "\n", - "There are multiple libraries and methods you can call to train models. In this notebook we will use the **LinearRegression** model in the **scikit-learn** library" + "When you have your data prepared you can train a model. \n", + "There are multiple libraries and methods you can call to train models. In this notebook we will use the **[LinearRegression](https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.LinearRegression.html#sklearn.linear_model.LinearRegression)** model in the **scikit-learn** library" ] }, { @@ -34,7 +34,7 @@ "outputs": [], "source": [ "# Load our data from the csv file\n", - "delays_df = pd.read_csv('Data/Lots_of_flight_data.csv') \n", + "delays_df = pd.read_csv('./Lots_of_flight_data.csv') \n", "\n", "# Remove rows with null values since those will crash our linear regression model training\n", "delays_df.dropna(inplace=True)\n", @@ -58,23 +58,23 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "Use **Scikitlearn LinearRegression** *fit* method to train a linear regression model based on the training data stored in X_train and y_train" + "Usethe **Scikitlearn LinearRegression** `fit` method to train a linear regression model based on the training data stored in `X_train` and `y_train`" ] }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 3, "metadata": {}, "outputs": [ { + "output_type": "execute_result", "data": { "text/plain": [ - "LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None, normalize=False)" + "LinearRegression()" ] }, - "execution_count": 5, "metadata": {}, - "output_type": "execute_result" + "execution_count": 3 } ], "source": [ @@ -88,7 +88,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "The *regressor* object now contains your trained Linear Regression model" + "The `regressor` object now contains your trained Linear Regression model" ] } ], @@ -108,9 +108,9 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.6.9" + "version": "3.8.5-final" } }, "nbformat": 4, "nbformat_minor": 2 -} +} \ No newline at end of file From 56d4f3dded434fab30f53e1b41ae7deddd9cab7a Mon Sep 17 00:00:00 2001 From: Timothy Wangwe Date: Wed, 23 Dec 2020 22:28:45 +0300 Subject: [PATCH 15/19] Formula screenshots --- .../model-testing/images/MeanAbsoluteError.png | Bin 0 -> 10802 bytes .../model-testing/images/MeanSquaredError.png | Bin 0 -> 12351 bytes source/week-5/model-testing/images/R2Score.png | Bin 0 -> 13463 bytes 3 files changed, 0 insertions(+), 0 deletions(-) create mode 100644 source/week-5/model-testing/images/MeanAbsoluteError.png create mode 100644 source/week-5/model-testing/images/MeanSquaredError.png create mode 100644 source/week-5/model-testing/images/R2Score.png diff --git a/source/week-5/model-testing/images/MeanAbsoluteError.png b/source/week-5/model-testing/images/MeanAbsoluteError.png new file mode 100644 index 0000000000000000000000000000000000000000..fdbac35e60a00a569274759ca984c73098536d5a GIT binary patch literal 10802 zcmbt)Wn5L!x9y=@4k6tj9nwgHGy)RR-Q6WEB@zPCAR%4S(%sVCDbg+7ckzGketRF@ z{arr5-kf#zUTdy7#~gF4`Q^Q$GzKaWDg*+-kd={8fk0q}!Sy#}MDUw#FE#@Ffp-y? zRYL|pKFFqF;GWP`Qqxt{!Q9ou$k_~HVeepP#^Pe)Y-VQfV(H*|4A&+M2C+O3dh2Xv zwN(q2PGU@tWc_FFz*-Kl>Ys_X-q}Vrt4oXt zOwIHA;j)LWs>T!2i4v^Ldr?QV@nUF?HN?p{z2}3Dh27!C9(7is-LkiX(}&9GT4DRC znY|pfrqT!`7WIdsuqx7_MkM;c;Mni0 ziDZY0BU9$8__*)}fU8P8TrL#KU?_(27)>x#oJ&G14*YiH7OfqH`3{Nl zrzB5xkH}<&^j!|EsQw~t(SPMFpl{l!;UU^r=zXJftW>Y|->|0dqJ1jThftbJfoT$V z20`oA1ZSQ^2e_m_tiXYlUFK)({H5czfF#ffr?y zT6!3)3Hv++ipOernwJn3pG3nMq$)GE6JABXpS0GkiOxbVRgl+rk8K0hv5h~{TVfjr z9sJpd=^$C3ZpN2_zX%Y3AhLgJU5(Z(+T&0W6qRFKL&OSxq_tzE#0v_frR1UKyrI&6 zO~-Y5C7!M%BJIyR|IhY5ET=7@n3-CPO2q`mqDDeq1BWLOiYETNjxGu*>p#A)k=N&Y z$u0?k*>gSU{#cm30+p?HPwczN~-lk`nkRCcs5C6842olF$c5@ldg8}pl; z78D$cKfh-zq#puST@nW8dkmz%k*D_Y9QW%6XjT02x{QJB*fXGQ}XZ;O6x1p6&l^g@xU-(k8Y2s89EPXMIK zn}@AjD6qzPQ{|gVM-Z|@n~lRCJRIXU&lWO~!!Dzi+zYE)q<638A^L^fvZ+@ydbLqU z>@brs{Qaf!7IUQ$aHdN3l;26p@N}N8Cn}mF9>iZ`GY2x-x)HuF#kc>AvlwXKf0!OrRNnN8*aICdb|)y`!_Vd_eeGH{S{A? z@MG{C6lEgD3(heVD$Ei;yLd$y{lnpR#;PRE3-b8o>fdG~wFN_3#*5F>eM>Om$bwZE zQVZp0EcXy4k3>%tViV(M^}R!KrX2k4OX>7BZbp9~1i6y(dD4{kO-Zr(=xWFRXkBWX zUu@uIAMS6b;QO~9kIKb-i$@g=qa|fg1@jl*Vo$No!3{i5LFtCN!K3D3XqW6$!WY>H^5@9Jz4tF|S@oKGi@(==u@gLZ@Hr z>7Z0fJWl|tirvC2rCI@&@yPt=S@{=&6R*qiP8;PguUtL?SjyV}JWo91%|0SIR|(DS zJW8n1PzA658HH#IPy8Esp{XwwF;(c`OMZ_*{OA4M*T;KWKHCOkV^t1aT=I=bwFD4w1 zf-a;*SK*Az+W}<+>?2sP%_&lmP@O)2GcLS=P-7CzD};8Yo$x)`JWA0MUZo1}@alzs zd83^tM(L9E5z~B~TU~vg3e%eI?mX_tD;N;u7cW8!3JN|FOAOifgujfD%R@#-moPNU zQofP|!D9SjBvT~Xez_3_lAV(i&I)&Ccik;=T< zfr`Hi0cOxPgi_xmy&;&YVz?a@`MS84(j*%U#mRQ6L>?b2S@fmB^z1Cr77={wzq8VI?!N9SC{pyzw*qZvZH z|BT`vl)c8xcef8%iSv^haA|Ka(y75(cpGUm*HTE`SeF_olw^?Fl!EwHEG~wTnTrc| zWMqW)BZ+|f&=Olxa!d^$iU^x0zA$N9dmTQEfM=F}znHG#m!9ch`MEb;G3QC ztCifLiuVYFAzSvp9_g{dVi-#c3y~2J5NPK$tgK${f2Ot;y0$@~^>4+FWvFUfwZA9C zlk|y35z^In7=K~HtKavTdd5((pXxq2`D_2NNT)#?Mo;IHPju14Uh2=o+X)8CnV&lN zN}0kx9n(%X`b-vU?fAKin}dVlYpf;_vA^VY9yYhMIPK4{1RdZ&>{nYcVr6n2Ul(eW zlaUGg6uRlj%F3QxTySeHs?ArLAc?OV;C9LbK0kDmjUadUIydA@^XY+`~%NN6>XBrzm&`%kcTgd{@J+kX8cgP{dmcZJoe z$>u^Ubb-NPHS59Wph*~=S|hsWZ$-7t(>8KGW|;WjzV&N!@7D`c91k{Q#FO77sEh|~k&X055%)E^9}ZYg7w1*k`;sNG>T&Gs(-q5VzF6db zf0-d5-i`O2!y|%`$EXuNZ&WUdh;lCzWbE<{VaO=Dx@dS5AN=`_>K zTEziWt%;7xQkMtb%pY5R)$>Zc;oN08YwGZ6Q!V@&t?s|{Z67bwO2;6F)z#G|khzIG zd5rfuJvG9Uf6s0R1l*agJnZdRt*2EU8)9N&0{;9lRx43gSk$8ri;R?Tb*&k2ySqB> z%MuGZFE&Uo(J24ZA4^?PU9HOB)#83gM8soT_`&yLf41*@ce0@^np7Z^@@*)+EvLhZ z;Q8^YK~bq1V~84~q?Hvzw=p{^8rtcf9yCGkOZ4&aalViLXgmeUe0+RrtZ^zah^7P%P;fa!$HSvu-iU^VmI5$r;dXLz^1`lFyT$9m7&14M!l?!9k9*ljY7kOOT zii^WuHQ#HQ+1i%syE)8Nz@((4Kp@i6(jjgj@K4Uqr%L?X6w~=b3JdA$>+204?{7}d z&Qvx14;Q#wu9Po*;UHwu1(V!LH*ELnKecz4X;s|Z_Gc7+6uh^yLVG1_IvAv zUix{qAL^rWfs-T?JHfGAY*UZPKi!siqj%q%Tw!^eVtY-)d3eixR#$bTua$TrC5Y_^ z%&)HIgoT59IiuI&^w>)I*02n&u0`q}m<5@aZu z9S)2LZEI`m1Zk0FGom>CUo^! zlT3|g0#s~pf*AshFK);zszwYI-n$^9*JF~wLw&0d3hevV7M+II%`r`LNjjeFHQhDZ zzzoaF*%s;?$}1Ez*ElRjNN{m++dDd7&bEeC2WGg{WB6Qmpk;bZq1wN!jd~*qAz8Rw z&#UN*Cf{$4&d#P98t1x5$*w(cRNyv^kK#Kf73@^h zfTypI$cI3Q*I~AX`tH&3^$c0dm*mVg(f!-rMf}w~CR+QpFMkVrt~F+53CIs@o6`vc zR>U|N7*GP3Eo*IN6^+sb31xKfS#{e#!688)s`>AfZ2YqY4^p{)vnPxwUiW~% zRd__iuU7p|g086|Rha&xFS=ExUsN<`4ZpMgAQf<@U}3?y*qiRkm5r<4^z-wRBMiNT zSNKpaJxdOwTADCD$O?{loqJ zSgtG%c!RVNv0anp)xmt19-3a0JBzNK9tM%=6azi|T4Ny|t8UodbcxIUUkewr-N}p6 zy^Hr$nCvgTy}b_>>o8$I{drkDHKlgsB}^&79fY88lVSie&db_y+WNnnn?@ah2#|n) z00=w+0>dFv%<`G0FyB6tpdbk*xv<1WZxp-Vy=UIVX1NW~zju?rf1|9dtnklTFv!Wv zcWj(SUh;WgmNpm|Cs1O4sj!}A%-dY`Il~x97qAG$Ba~89{JJn2T^t)bu-NL`q|$&z zEtQ#ATWFz6>}RqY?#kx>%q&z?AoOWu?bWK6TMNgf&bWW!=Ej z+c!=t%m(pJJOyR)RC053>1{1`s)q`cGL5@JF(9T4*P9LwF!4d==H|4{E-r2h1~-`D z(b3UTyvd(~gGK3a8PV;e-UU_a3JxhsEv+hFpY34Qdur4+<2J|$e?%4`kM6x7PVqeN zui|_fNEP~$e$r1KVO7<>seT2V12;W4()l;Ui2%MYVQ(9DRT9WB(DD&U`5O6L8+-O1DNQ zM@Lbxyzy!aCD)xX7~i|g%^9l+fvG}eY7$`|0*96hJYMWM9fuaPnoI3CT7^^!dU|9r zF|n}d=)Rr^{028CC#Rk99O;nFx8mYZa`Nc%w#QT*^QhyWBVyGxHMqPEEAUy#@j0x7 zg+6t5A14cxq@A4Dfm>ICu(WTeEVnQ>Gc($s`3VUy8%$)#+Zap`nS?iO)BS~!+NeJ& z=(q*}5y)~ogDspnnqH~#>O$H;YTeSMu9GiVoRY_OE?`vTN!REGI3l}gKlFTmcJv*~ zq=Bd?1Z2`#P8@uEq+ZQD5q_KLVgz1Z-semkaF9rpO;14~N@>~K+pA8*1w;Qg^yJ1T zuiDEd>&#(2jn~1sE-qdu#@ow_F-lTLtFHeG4&6x~xjzJw$f_6ab+PBPA4$rE&!!&< z!14O_*7*8l-MY?ZsxWq9e`2#Szf6piiV7C894(9P&>4(!y%vNSpZGa6l!}h751oYr z9ZV!--;T|owfk_fj)~>!-==(sJeyhH0xieC4AD0V(IHw|T1#hQL5NOC7o}f-C4IJR zwH9dL65`_>yS#r{j>E>$DXBM+i(c;M-WSp~%E`#IM~gfni@FQ_s@e|^l|fW_$>n{q z_rVG~@?I!6OE%JIwoK2_LxaTct|JI3na?RZMcb4B_<-8APTYQ~va&KDkPy?H*rGi% zj{N?9nH&XN`@$d6ZXXR`)&P@PI(y@?MUcyqpYJjD_taBP>|Gk#$*SKSi;5~hMUE>H z5)!o|=`yWqb4O?AiC^=mTceqnMQY)fhl?9I(Za>9cmE7B0CS{ZV^b>e-AiMU5}Q1U zOgSytGx`FcA%I1vmYzk#^MvB@@y>y>rvJ6;8|sB$Hu#YFZE@lF0tT?~tgNhd`sk|J zvWxK=E7hqoy|lYK@c0DU=$_u*KY&O=0szy>0;r2<6;ZV7qoC?{f{z+fsGJkp+4xV4 zQ&?EvjAIPgt-|NV8PdMi5p?=^cg$3ZAJHQtFR!X;UYLc2gVW`P@;M~LN-f`q18RZ} zoZIub<%(;?;iiLZq1G~5_XUciQ{@ibr=?*EAQo}Ik_i>RF5=1@ zowa&rYVwWTlUrHXQr%q&F!DZ9d~tDcHChN!nhZ@R$fr>%9ycbxg!x@}0&wUQA)YSG z2P-X23x}Im9U3L-82hv3yqW@4)zuCAGpVMhdkV9=%FWHq1dz*dX>x|2^78VNmZ=p^ z8@<2tHJn{dt*or%_DxJo686};Yjh*oXBu2A5CfhW&ISiqgWMqsCgyCtkGguI$_-oV zb@qfa5+<1`L^GO9h-(FZT=(YbOC@>Y!S6V_QvDV$U7hWr6q6%b1{4g8X;16dm6}7v zA4+=dj<>ed*DqVVugoEufceh@yhfkfE4Ba(3N>?IA&sYKx%R--9zYkbrNF=pDK4(* zSmTHBB5xiMk<7Fz`RSC*_w;Iw!YYsWD}W(4*v9~W5d)w-ZBj~;WzzR`y)y(2Vrua{ z)xD&MX!S$derjVY*+(fhw_zeAlc8Kp`K`6J_6y`25OJWiHn)%U17zeSYl)!$qplIh zQI+4rg~sp@6uDMy1H^f*ax*(yd2tJMaqvXSZk_$ep|4)|Mva~7se zjx4?%fH9BgN|1b}=|ock^YvD-u&|aI0c$6yiI^jCZOqqF-YV5?azBK=eA)A$cZnec zSQe;MAP@uJi-ZMUT$e6%FfQtHP6Og3~X=w^!NZVr8fDHkm3Rh3wyHcvFhY+(BzJh>33Vy z$pc_)D}R5n`D69M-LK!oZhKRol9KQ_I5;5v_HUW}nTJM4w?m~4!P`UXF0{~E0`qfn zau(QCKfHQ|nXHvPoFq9dzeFea) zpt%J-jk6mmc|%A52u|Xk_BvPBrzY=`U&912_p9Z7 zT4WNS_PW1jPs-(R-FaWqyuP9fR#BhD5Gxb^HN?os$n*My^5Ed09&5EUgB4#K4i1hG zvUPl1?w$sST**5`?M7DyxOOmN4mi_vqW|SVipot)3{)?5P^-;PNMp9aW!ts*ZAQH6 zWhKZha)V%5^fWXO!-w08$s&wkf(3v9;E{j-b^}`vFMmh1ibL)uFi|OrZ)j+6GP9m7 z`&pr%5wimZR8>{gSVsgeTlf#z{|^4OS9x`NU?dx&{j$z$eSO_JI%IcucW8V(7RC@n z^Hk$e&D$=mMK9LUhFMeh^ZeVxQYtK&m0`_s?%I5XG{$sEQXwy3UNR9spAcRQ;#;pR z5Htf(cX>d_%b_qI+%g+Vl%^x`|{-v_rryyl;(W76Q-H3{9!iH`hV1W@k%pI-*6sVl5&v?`-B0kgDvC> zD;nSdXVks!CexfwEFZDFIwhNJd*%}tyOQ?XFX{_8|> zncZS7>a73f@jm$Y^lVfFE{61Ka8*J14IT18WX1Mh`(zk9bg})cBI_3lZXm+rF%^D2 z$ILhl<6`vpGO?}4hr1;R?8DtvU(<1GiK-BU0*V|TsOId<5gyK%fjuxL#z{+y`1|*7 zOOO>4`W{}Bk4hQ|%y)Tnh2 zy9Jr?Wlz7Xr^6Ex6V)3tL4nHsSxI zT#+}e+)KGJ_2?c4xbnVFXmVOw1hw>62&B<< zH(QldLWpdX^g>c^Y_bipTd@6?lP@ylby#eTm56a(@gNG(b(X>v4Dowcf4^2Pi!BeNuJ_gE3P;CAU7=Q7iZ&pJPKfR_A`XY3;XCs#5B4=o zClzODjk^t3QPcn4o#bxvIMxsAsbYvN`SW=~^4e`r?dS0{ zZMa%XIV&rxW}SWd??r4965DfM%V`+41i6M!FkEkT2ID@BWC%ro5^F$51_C=aGcy{L8cS{dnN2Xu zpdbmo<;WiDcA*b}>S_Cg+IO|KV=e|@=WZP2$3d@-(LQLj6HLU!Lh6681UIg#*s9sg zly+g+!xx79MDAtS#hh-~$HxYYzz^UQ@z%vZaIRAb{5awYeZ_R`EFlb;u> zP37FNGVq{!4u~@OTD#7EI24n7H*1+sm{aFpB8Qc6f@Xw}JyV`6OVkE${{rHl%YPpSciaB661lr`t% zP=j?Cg4)NpKrwxy<}MWnBy>Spwt8q9_PO@K{J4I+0V53`@3Hq=>^T`tPCVDxe?t9^Yd%3-&F&5Ygte} zIS|yDQdUnA@TxqXGB_&xx8i*)bmZ&1o$jIOgwu7V$%$Y)H`T%OmjoaCO0`6$zikH#DVuf|LGOK3&jz_vf$pDDHMeXA08_c_>)BMqDTC+Fv{ ztp-jE)n6t_Rs4Y1Y_@)0_HSXn+3$UKayz)YQJbKnufF80VUC2!b490GtIlbLX-lX0 z{k7$CNJz-i4qNDp-pySiE=&?Wio5Zy?P?(J?k#^cKE1MS-*>+@W@^guh?*$^)aZ)@ z6=DDenV{zJt(+zmngg4+!M+jwVv@n>W&Vp)Jg?~x=vkF~YnUn7+KK}FO#dfHCR8ECVmr|U1*#UtkJUN-bcA*9Za&ohsnd7k9ium*CPfx^B%K%U} z5q~a1!^6YtKXvs*5f^Y&8|b*#*;CwHU`^#I$!l7!B#i_49StNiC^Gz##XE9zE13pX z@1w{>sMbh5BE0e~JEGp~K9!O{;Qi(OE7rBYcs;?~DBohf!Pbih zZ{cy(`uOjTA5IE&GmViJ|{GWRP=tq%k(SV5d31|dx5P;_vG2_3lpJ*s{FYNS2{De;` zE9lLPhAF!_a%p?BaWTgD)`qokzyYbDkBDb;>+toA|K)~HiZ@T5jD#HQ+5v4~If8HX zvu<&I?h0!4gXN~#wKKx4%}u9Ci1r^XbD-I&m+NCd=IWhH@^WIW7`fT;jQfT06&@H{ zEn14Tn?2b9+lus9`G^E$PfnTio}$^=@5*Cmb#&#&-(B^Mc4{=9o|Fr*Mw zfYM6<>IfQa6py<&P3BNd=3d}HCxFEN-QLcrIUjz2NyOtKG(pC|kOX!HIzmWR7Ddn$ z(^Dj*DkG%#_UckQ9k_ze)w0LhsplL}(mF!Ve3D&ZLSoW8yl)s*S1jzoaVmp!AWg15 zk}ie#pfla87dt_*9qozS;C%ZD^$G6Jk046O`)`gf8K8z)ZD#NAGUDQLULRS^bBdd= zyNv+>gc*3#lzz@Lom0#d#sDB^pG)*Mh61IiyxeLw7fU)nC!JH549j=*OdD>o2XYVyGRsZ+O= zuge9`a74{5dAWo^g*-h%G8Z$P zQ>sT!#Bm-z^YVaG9rU>4$*E_7fDD|sp;Fs}I>GSf-zG9DDy#`Q$p4)u>!GB@eMgC< zOk{a&9+d@!H`w}I2M*u*)XOIL5_l*%H*CV;(#_Ww%p}-L_ zeB#Djs;U^8hk-*(ZbFfw6X}YKj^2K89&j*|1oF8HX9Gd1pvz2IxcSIHlUa{3`wLS3 z+=?V%(KYek@1sCrkS*r?PftMWj~vRSHq2d>wahr>HL-wL zQf6wG8TIeNz6*cvnA6_uYa@Vf5ty2i+iAVsVix@(8ce^o2}A>(#J96f*SSLw`b$g5 zk7zT~5$p}43Eq_I07zcl83m6;P;ppQO7-;mC+lx?sH(wQs9>q_MoAQd-+wOZy8(Ib z#ZEZZ#>%@YeH@nYdJ_Z$nFPbkb6OW3G=m=o5vh*umf>%Zm`{)$BnQk#wy03i8rwp& zxvke9oRWqYE1W@Bhe~hG;_QhiqiC1<^J>n}Qlm!xi8WD*qcvt}ZJs_n2GKCUnK+Q; zy|U62nc!%``56L-XypCn&mAMnKt~ZNOaeX+C72bYza=vgO*wYX?HsjRkWSE>C}gKi z^>oLoXY=}^kK){S-UZ(SD|Lc5Jcco<^-p&`dzcef@@H)f-?6-__ssl=uM;CS4E%#L z^mMY#mRFG5ezppdlM!J5BU}2x7ezpsYn}ZFZ#?MAFOQK6*5+7T>ZG`LHzw!!@)4RpZk#fe_;`ymKR0QVzDM!v zSx`oDIvkf@8kh^bh#LV745+!^TuD}6I=oCJjvfJgU%em3x8mNBK&o{Wi~I#fU-8c^a_-IPWAR z9Qrew#Nf*>x9p;eShQ=x(sGkyfGFdf`MJGaH0L<_*(RZs7!0#!qv_+=RD%Sw0njv2 z(4WH8^+X3Ft1|CmAwqsVR+kKo`%z9zsh-bw!LOo^G|Yb}ktODV%uXr?Pt`pj_^vTP zT(oaU3*=3bv=t0u*4+%D{Y4SjMSSkr20_Z1=e{2GV6n)l?mH-)V=&aAtz=L`GxGBn z*?LdeEBC3B+>i2McG;j+>SgR7VHH)=(PfIjF@rn)>y!4R0>V}<+ezd~6juc>FDgkY z71qHF)h%YqeCg+jc6!UCR0k*H_--OsQoTX}Z9-vhp4-L3chW$|&^YF^D@n``&0F(Z zj9`nOi>>nipZmQNQSRS3f^0w0c;#>LwzvPRx1gH}`UEsxWyhK#W#RTqVm~hUqF?G? zjIXhF$)25TsMOyF1?E-gp1OdwhIw z;M-@Pwbz_$%rVCt-{oaLp(5cUK_C#+&k~}F5D3%>cpZxX2YxnhC&PeWu#Uo?l@Y*~ zCxUSZ_#4kjOx;Pz*3`+>z`+D!W@Bq@LhoqgU}9q9Xm0Ct4Aa36E~0YZHi?i7^QaD~YIy5eW+u3lj+wCpY^?ZdO(jc^MKhA!RauL@@}21oBx_NZBp@ z@Sm%?(l!zNiES6spp)OUkg|AZsA7Y2j$&BJ)U*O^`QW8$x_U!{ru|fCiEcw~{S?g+ zO{gw>*))`D%Im+yuj<~S%XbZWJcoS5j1_c2c;Fw6+aBQB=Icn`*ho6hbUnc}LJ2?@ zMqsA(RDu^oAPqqOqX7K@{AU=Q!cG{0vtWR~+a=YBIE$6F+ z#)NZaA{7(p1h-!3iiGlDC3f?Se?Coji$1u!(DPefU42EYws+nXGN#f9K5mEYFe!Y$ zdtDQfK0yQxY3Ea0m!Et~{+trAJPIeMU&<*c!7hxLj^4E@1SzB>tG%=h#UF)gZ0?#8 zzMJoc_praoJiCOAEI+Y0j*|VO>jw{|iS__Z{L0D4%;-~IudIZ#2zYm{&odd(_X>#I zaUjB@;N7CsL(qh!vhfLL?6ZU~tT3CkiCF*@V$l+nFP88d^~Lt@@WXI6M^Cs!lQ1I? z+#ZR=M))es&3%bz-;Fn&h-3&_cuSjYQ*AHZGN5)bfE8$dp%)z zcdoAYO3qLVK`RDp(#Z%#Ihvc}Zq zX7=}JCH2IECq*KVV$RnTi)naUQ#JOp9fmxvirT}F7^(Yg|BRg z!b|Xfe@%8meN`D~sm0l}eJk6|w1&0&sIA?%d*XJ)u7T|25kgYSZu-BKyn|yV>J#vL zC2Bse*-n#=W`=?<5I~S}DBDBya91GLSo+F6LiIm4kNKM;j<6a}@|L;*!h^7SSaj2D za*T+pDCho+NM^5}RV&9e1O3SNvW}|%Bq6RMMiOLFm;k~0jb4MWZ8$D{n_d9hL;EU@ zvCus9qse!y)SG7Na!~|kgqOI!ejZ@Ye+$jd-C^yktGd9rmGdX<96+<+(UCB7*(7oT{0P zWhng;TL=UZGsX$IG8jixQZqZL&mZ!y9l{^*`%>2`3apowqT9968(63pi^Tn>bkEh<8jE(G0&>ks4@yk&w8@4IvFy_jx z!f`kap zTf08^EL7i;z>CwYv6zsU9oq%v!2etj0uSXr%mt^`61mA>cDM<}31d*d+-d0>RcohqO=z)+^eBo;1MW_EtHXO#`%oc#Iw7R(= z@=!3ph(A&}Gcm_02)3im5dHn`jvDUD6pF8->>80B3^mJ|it>xs}f(76hQ5Uou_cR!inz6L=n>3GiidOyF^ma>uqt_D1PB z3rig%a-FPrc{W+V3%8_l(+GzYC6J&h1S63I)poc&&mCT9@@M#B<>qzK3ar%LI9i2& z7Z&t@mxvD3!Rb%srV|eN{(Zfpx1M$!d*7O0C-ghnW1^hdO2Cf9N(Vm1J|$Upn}3lA zX^&N#x6?ah=zo99*ICk=_00F|oK*eT4l>&jKckpOrreln@gQ8ZqP}Flu(Y@zO8UJw z(~>1SL`s&w=W=;_Z5^ACnc)DRB~*qm!v(jev1RN1Fc0$NFZAKh>L?Wcb-Ur1T%`H) zG<0-5VK|J^gyMAxcpZpJ-0XrCpM#^*QD@n${HKMbLNI~}&lmFcTjHwDHOrS8?(Z%w zH#RnQ+O$)`e~#g@>~pvw^t93n7>4_9&HVu3FG`fn5!4?=%qk@%1?O|QGv?3dej&lQ*&dSed!EaWy;Guo zT{TB{kSGe_kX4-e0(I|!AIiYnyx?v6!G zO-;4NRL1nnmjR2Z;@-u@#pH?##{G#xv@~WTxP=-sw6LfsGYSfd&BV0m=xo)+=EZuO zf5lTCl&Q>xu>1}i68_(gjT>O#Unk!kR!b!@RKcO*gQuUGN)QthOX1G9$Rqy{WPQ_> z@livv0}VS-n1hTh^aplQjjTP*y@|amWo=A(_xi`0YxrYhdq$_I4-5>EUp+inA;V*1 zF@(Hs@`Y#Pa*+nsR=)x+MP`fG{;ey)M=|rw5XYY#%&|T^oR3O;{wyRZi4+?fdvdX> z6u4&VJ$gi;~57(6tOO z^~tNNhxqwHZ)|R^&efQOL_~-g87{eo zL!%OKuxap4z4FH$xc{&kRGO_yVX|;rvMLcGUF`9Pose zyXf)a1f01d!LO+w&6e6cT-)LWzKoM6JU`Xc)YLI>l9Bl#!a|qR5k>VwLqpHjTQAH; z5OB_R2cv0NSkSb%9O5sueq|+KGe=kBgYlUv(^6ZvCgS&0U*#Ll7iO=&~5z< z@_jTnFK-U)`P`_$^DxM!!9+&A5pd7OWJgEGx+Qo~uPrCI5nQ8RzkVSt*QK&sR^MMA z?`yj_I5_;^f}lx-wMtr4{i%WO^@X)SxwO7T0 zqKht?%2364R8^(#$YK^W)O=-?RC#qN+2PRe;Gm&Q8pme)2V`uzpLQ9Ti31FbmfhN4 zIZC&)-_z*?A6gFmMD+X}O^O1ksjpYPscQ7NE|)dA-kXxT^S|Bd&%K~$Vv;Xmd49UL zRV`7aFMr3MHCnFSlzVkY>~(EOXw)I68^aYHR&mjrlm~1N_e|ktIoot8rJ)8p1o-~04xxXFKBN|Wn7%2Xno;W z-rDjThsO+I!DyTk;*i;Q_z%p_ceV`^i?jiUEjeD@RXaQ8P0I@)BNJLY`_TB3Sv%=S zjS2ZtjEP_0$d_sT4&~A8S5H*HY77pBhw0?MS%Fc_6Gv?#M$*BrPAE@Ro2D|mI+)W; z(5mm#zd2dOFB7nuWrFjuUZ|H@xAt&%AGBWh^G9Z#1=r|c!L~!?o|s|`%~5(KoSDO@ z4<*p3zppQ-x4(a1`6vYAy>x76uOYMLSnjK9-2h=yS~@n*p_M&`^V#6xDdG8*(xFcT zbAM2}txKG`xsV&H5f_|Wq-XO!p&i|?ZEW;p@_Q9K%+;R2Y18{qHrT+F@4YelBEApL zfE{N4{Nx4k0if&Y<+Z6B6%jF2ya+HNASehLlFDftsoUmGO*c_(B2_jB;5KA&UC1Bq zOKp2ZL zS~iHFn;X~BQd>!7ILAO3b5KA4Y|}P4$OQ^`Wtw$<9UTI#ZR`X=A3lD>>+L-=La<2d zJUlq~TdKiYYx7UDc(K?^!<&6zD}O?MBTpO11_uDb`dv^^IpJfLT%)b@Xn5nrf~P=FiZ&LSW*x5L4!nWz#Lo5g^`?7zt$JmN6?=Po{HTz)m>Az5Qc2K| zyQ{-6@5j3VYj_A9J^kwHYL0!YhK2@`8KGypX^Z82?H4YOD>{qm(xNeUlVzCfN4$%8 zY9%`mSvK1x?U(0S+ujcTdcH-?+hH}wqTS)GQ^KCb+&d!7*@Yy#K$MW$X_ohl_ zTU;3-yBY;8Ej(vuXD8cP-v;Rm4^v9=^4=dDY*ZAMYB!BjVG+m6tkl=nn~&uN{%Wu^ z@9pgcwFj<6r_t`$q-~XCWo>QG(Z3c_^xtSPF|o0JA04-nsc0^BnjBuC5%I1NEzHhl zyB>#r8Xp)QR=)ig`#2qMHk3$@w2w;6uU&Yps3M)otFh~?(qLK%j)KF!hL#pB-`E&D zBO@b1pzu+ZI$a+qc_`KNI(U4(Lzu;Zfv+^{EGQa=LAjcw0ogbZ_raeT4hILvW+NSB zW}txN zwPM@8aJ<<%OB_Fc|4j=~B_*Ypj0~OCI|Bm)?L2)56%7r`4)IlW*0;H1WfKF9Q?llv z!9iibJ0Z6Ld)wO|II*Ie%~k*dZ3M*@qTd_(wz45fwzhcGvLAwuiRqX3DQb`phkyVX zFbAvI%3mbi!lq_s>lGbOu^zG+*U>2{--*9HV1NvVjq0E9(7x;3K4%6<=by0rQ{laT zf(}Ms^~r%f91D>d$>Ueda^4tzy>L46jHM_Mu}c!G9XK@{waE%wC2xa-p4u-Hb{9Je zO;Vr2nZt?r@EdHGw<22x3l7j)n#)$oN=yINn8{rn%wYkr6%!PMTx)7;YX|OgT1+Cq zzkVm!=6=}-*K)Zxr6h1dQ~EiM5&;QGSY968X1SgIy5-yXmdwS@Sl;4ge>k2$phS*I zl2F}8T7bX5A1jY#>}efPH5tilGO!wW+z81yTyihQm;B zKmgT6lgG)5uWVHk=bQ*AxO3n{JJEEM02^?YrNmTX3ll~f{~khnP9jVC`?6r*ds#uduq(h`wOD%%)^lhs_c*sd`e z;;hAeTwS#^t`9;mXF)KQQq5=+4T>(Va=}HC$4n^=^bI^W4K`93#HT8i0!UQP1ZhunQH}CvHMagF8=LhMvblKej z7wqcphLR{zUIO^mG&6KHbAEa{*2FzGJ&pZAHobGP$x(&D5v=Cs?#{3)0PzN(h|_## zHSE#xv7P}8!<=^>tn39n7Z)zE=ZSCE`FelU%zgORWH25ER4v$&IPK)m6< zi$X(yTr6+CZDbx36H{4J!}`1I%S3_ucZIY^B*@s<*y3YF`<*0X)lbD17wf?7$y*pR zD{+~XrKKeY#HsOd#nprARy4=Ei(M)@x&Q!UGtXL-@864%e%+%@Ilc7+#dP)C^L^GJ zj3QZ#{{D0sAfA1Iy4EcvmYbc$cYY_eT5cyC85waHH8D0`8Di{UQxF}bw$umAF$;yHKzhMa8w`8h8Q9F>;WdhVH|KV5&;@Drg-^QGVu8{atrm=7;oRc61?;3 z_~dJ_@O4hY2b31|CWjwOYx2F?n2o;(svd8*v$Ws)uv<=3wd%ST3;Z&V`3WMn)ch22IntrWx#E&OEBKk)dJZ=GJhEQT&Z=yJyh}AO1&&e1-Zl zM1QrM5{pjCypa$q0#2I^eDqSC7FC1Nt?C78TaPNM|md@aQ3;A1VAo8R80pG9sIK28eu?Uadmv%p@$SOK; zTt$UZx3zOd?;EP1`=Q93m61sUj6}%;X!d_t&ssP-ICS+z5ZIrr2sQb_LB1>>fW0Z! zQB{#!+2Y0|m@w>u*0D)QNl19hi5?skg@(AFtD|W`-VGuG2zBNeSl2!{vCX z?4)pNGX9SNiRIgJRA>BZu;R!PcI^h64B2nE#bMmed%rIwnjN=a>vnk4H%J41G;?1F z&=Qx!DtT-z4z))~)8s)Ll}KTZR5{`F{Cr(p6=sR@N zem?BChf{d4_?|W*Z0w*yxhy~fp#kEsT38LXvnCvl(&(?i>S}9;^SB;CsGg;(4}2J^ zi^K}0Q~Y9MG1)+21^J=c)ZVU{&Dtj~FAt%mr7bHYI#}-L(1uoPnEwHoA2l_#!>Gy? zjlIn2?SmvAJF&vy?fEvs>(_83kE>;+ajEfyYS4e_eLVn;92odcP|v#o#O8cH%>DzOMItmo#=ik(42c@1wcCOOPg1^7nYL z$-k>h81gpDH|gqZQ;JTjK3poGM72yYxv7-Ri(UWzuwhx>(lQJnjAd)N^qkQ9N$X)n zcfEn1AlGSHT#sM&_RSM6dTtRS7Uc@y$U$Xg^dJLtzHCpca+rrPJSKhR&0O?R04csA z4tu{ICD7=-?6@^N)(9s7#K#B80Mk01zip!o-+ z90wSkv`pJM9|ekidY*RkGFiqGiF_!a_oqAw)yy8cxw)ZImiujr3j#sF|9GKP2To6g zKcMPg@EO!18D@qDWjTwFUoknXQH5okC5x2O9l!nGk1SusVKD#<8|Ab^ zSNnm;IE}kJU@wZ9eB{N*e|NQ~OxRPkBgoZ^8eC^zhD@D&(3x{Ba5@ef?4m+U?_dA5 zu%NZ+psK2hG$2V9^V91v&*}_xu7Rpkgm)4vCh)nt6NORY3k)(zutZo`{`vXjH@CMc zgO;2x`D20cIZoLE4&rCKVY+Ys3dfo<4nuz-tw(<-W+wdTymYNS#Ap&0T>{povN5{mNqVulW^G6hH(HpV1j!vD4M?9Lq?< zr$U{v!Wr^_rDWKt!l*%N0u!Y)O&x#M4Ju}!d6+jRW!{7pMhMeh_ z6vOx8744wD+8tZ>M-mpi{$zc0bfiffTnvyyV%4>8a4?kO!-sv+wY`!9iS+6Pz{+gK z8`~eRv;s$)v$H$%nW>E`VKdXxhW-@t*e|}-VZq?}@%Y=+{0KnoH~gN~vD1LA zHC>jKmCb>Y$FA9N01rNj481LzTXR2vwLs3yN8610<&b#Yl+dVrr7MBC{CZmDnWxZ{d zq1U9yK7m0&d!+Mw^!)s~1|9PY3u&H5hk&9D^#alJLtI~8=&wC1OiawiVsq!>S}#ve zeQWCofJ-(Ovd%mEr#4eS0znIZ@dC`-?>ZnMBQpb0`{AS;Et%hoyV>=)yc+8nv<&C~ zltFQ@8)dg$dI#DFB@gn(@q5Vv@d*5z%NaeD@@`y_&z}*&`$6G|YHj5;HZ>*VXR&ekY4tgvt)Eg+Nq!n!dP{R{kn>p zy*)FyZy-1n;2A#Ujlt6TXYzR%g1{{r&C#hgo6nRJlD&W5)X%`c@Ip{VHdS58Vh~(h zTpY^Uap=?`K-R|+ry%stuLW!{nJN}(XFscRszhxL(g%p6c;^o4Ctz^|X&)Ua3ApS?&d<&aUy$W_1(3r| z_shLrO1aEo4(s_IAXKRsJOIvRnB{#})Rj_6_XISPs+ARe4$Eme4*NCH>5gyTw$|2i znW{u2BoK!F{F!ps+db!Szt{;C;L5E_L+&gkF?NjHu8#XkCdaBw*EcRk)WA0C3jI+TEefsv!~v6n8vYgz4y ztImfu?J`bwHKh)mT>E>_nfSlO#tj2Q1O2LdFd~{n)oj&BEbCkz6Dp+{;WX2Ss8p`|J#cw`h5W4TP;G0UdG__DL5+Andko0}UOb|%iC zH!$cEm1%Q7rA=FY`hy7lFn67B;}DwjCz%;E%Z0KT<3)7e$>V-J{mp99Y`rb5p;-|5-eI5nlnxYpY{Q1+b}jDRq`*-nIxu75H>1SFp-yH#O2?MC$5+gk|>JG0V;)1#pW?+UO}#n z7o`<61DTmjZIciWZyyX@1PN2Q#%A1_l#OnNd5e8}IQ z{tDe?^3u}bi?b6@P*Cpl&w~lHi>(kBmr!=Bph}lR&2qU$QL?=gE3O6@54S-A&EkO?CG4(o&grx@3w3*JqLCIb1p`exh{jk4(<&RE*SK#zyRU~52 z?P3QbkwFIua&y#j{KCN6dJ?g|!R%rlmV7Podxea>0T#<0pCsC z*^l2>l)gu{0>bm2o*vbcpBS{oZzEHG7Nw`ZOXqQY?Q=TPZjwl+;RgyB zbadoA7l5g{g)yc+ChrF~FsJ*qyi6=4B;+1OOF@clDAB?CM^m+(lpB>Ev?pL3$mhmO zRBttx+flRV=SE&Ac|0!!n*-NB*ZTFmQQ;oU3CPIEF0kUDR645e5yoOlQ%5j#&FKWB zIS%-fLrE`X4ZYw_U}L!4&8eGFVu(z!uWd+CeID<1nXDOS8opr@y2V|f-(`Avb$B>z!z0m#Em%%t9TpF8MZBfs2Nx>+EVEFgp0Y=htN4GvB} zq%t`fH*lu9rl#qX9}G@3yxWU0uxjHj{fV&zxl2!@oA%f>c^$V*NEBQk?IY>xz5t8rpRe_ShJ+T`G({>`sjOP?xW0%gnC z*cjJ2t&bljsYtJbpFyuZyt7k~sSoT`I1ms?nVA*JCEI|m6NXLyqF>?~z08v<4Z1G~ zq=k56C-Pm{%eL6zs}+U;19_8qJXwSVBmsp68y!(`aSQe^t*>ltY*miivKRX^XaE~e z&d<9+J2aH>v8FnQ#z~mXIEKnvXz}+aSQFN_tk+ezbJQsINxs0kRLJq{kAZv;4FH3u zRxFM~QkBIQ#0J+Ztw3@GqHk*(Wz-*OusN7;Gy3hRTLy1;ecd0_z#PzM0ts-#G#8)K z=ACM}cF{E#-H{~Y4~R<5t7bZ1xik(`FdXtJDWL_#3!RWK0yLL^vJ!xXG)VsEa-T=4 zcBr@7g4!zx@b7f$l{Gc$uD@ch84S!?%4ZJ zavHi#ufx0G^)F*yKEAOMHO8L4J~9CTVt^Zf$iL{=lrpLL*EQYa##61>Dz&fN0jEEG zxOL?HdIk$*yq{VP3?P}AY!lJaNjj@dhM2tWoYb|n2q=kOY=9wf z*)CPu(gXFW#63Ci;otkrgmh;%E$ENJSg7P;?@Va~za|7>u)a7{y~N&u zG(!1?%fv}x9k-rGF5P9ODN=mIA?e`-XgTnGocV9tAl`&OJQQ?a)Y=F%Q5Ju&F{NN! z{nmtF(2m}&g>J9~FI+RJ@}(5k;0uJpw6>mUWha?VZ6c#Euyf~A;0x~Pd<53C3H{iH zsa79I@3jL7hl!v{$iX-)8nn7*=qX+QBnBQ4@U61xV=w~e-8J7P{a>m78q8um4iFgG zY;0$5BsNV`Zt7F7JWMG3_LA0D?i%#|rnzxeH48};5+xRI0ohZZyDwsf1aIRV#TyB)Ds zo)sE~ch-gOLcq)h`JZ7okO)}841@mbiZcxX>sL{pAMIYQ8h0J$ysG|KRtl9LB|zg2_KA#swN)x^%-3kkOCe06ZIO4?- zNYAv~W8!LfO)R3jU&eL)7`!vvi66)n^jWANG8Fw>44h@dH-sWrxp_tYB>6}$i1dH` z%K1o5OW2qbji#I!~)s7-#+ZJqDFC!eX^%E&0nkN6U)XG@)v)H6G zv1g+Y3K6WG+av|7rWiuM6gF45Xrbz+YEn0gLME$qebl7iK$~IAGfD z#v=~?uwo?ABaSde0E~9|q0~Y&iZ-8*d4ymauB8dkNPCz-5EOfv^2u(|cD;Dqgp@z_ z*^vh9;DJyT<36pskps`%p{9FxTBL(;F*$cFVv!+|(v#p>8mw-=94!7pcy#f1{3;B^ zUS65ja8JP@>>VFi4K9{LuhKi6d(|C&-~_Lk{=EMXdeZP7FVLgDcNP9adUf+4Ql z=8hp@r);DcYTh&ak*P9YyqSejB3`l~1ek(9CoZ}AA#|yxCjAPgeIopGV*3~bo`ba+ z4&(vZiwXGiu@bVAa52YA6iHhPf!{NF&+oOZaB+058*s)-I19aeqUa3v4ln@z=XI_V zvXf<*RyLOK@GHXA0A-9HTG=x`w53u{;x6_&Tq*^5yo|_l&+b0(a;zsC73UJQ5P1}1 zQj8>*cfLQ;kntxS{k8J$?3~z~D+82m{^tb!_jVPxy%s;Dtk*VcU4tiY<|kG9-28}> zuaG8>opCKaf291kjH@Swn@203RX?(x;r}_-bzuHq8`=LpL*-`peXC}lJq6}|$Y(KG K(Q;vZzyAZS`o-1& literal 0 HcmV?d00001 diff --git a/source/week-5/model-testing/images/R2Score.png b/source/week-5/model-testing/images/R2Score.png new file mode 100644 index 0000000000000000000000000000000000000000..4c463ff0afc9c6100751db02e67048bf633db9b2 GIT binary patch literal 13463 zcmcJWRa9J2)22y)1lNS%1b252+PD+kgS$JO;7;)1?(Xi5ySux)+cbQ0@y(i9Yp#AS zIL%u7?6bShuG(+aQv}J$01)A@;UFL&5XHrW6(Ar!^ngFl!h8b%{+)K71-?Mp3yLej zfPXw-eg%U6#&Qr*bx^c6a&XqOGlVdL^kC2=d)&6AJHs(PZWtTU)bNX9M7%*VMoT-Cu zDT(uK0S%#7A`Y`{LR|#tu1#4Qf?qc<3K-!ClY4|&K4+%)2kJq62r8gfBZzHDJ~8={ zmoGa_Mu#jPXhHHei2NC)!dBU2>RIH0_mj$8SHvm7z|W(L3{TDOG_KnwK4HQWk|24Q zbryDWZ%4D)%+so{aY);=l3(U-Gj;%^)85I^G>Cg)G{4_Z%wGgK!W^Z~UcBhX6p@39 z^`F%@OPGul0yz=BdOx2#859dH{bw!!(o!ib#^iObH9tcnDAk$$ZTDgCZ4c@@MK}xVyk#Ry#Ppz8B)s*8&tZaeeUm{B{a+$W)6ujuUkmvB!-~ zlzA9zI6S_Nw{H_a_O+*R87_S*LYaAgx}f`%0{WF@^0mis2R*t1!iHM+Y*Q#MXL*`J z>xaAkyhsX?t+Y&(PRbF2@SzH_eA-uNqA>kXkn$QF z0*QACGTeNYcFEZAZ^#6Gh##z-?l;c)_JzDHrYrk6L8|xf0?!tEIPKGLlkCj*@(!Cg zkzKhtXP9SvpGrjRe(ZuwxyWbmbb=gOOa8FF^ozYObC_r{+GNu(D_@_^km0gk|Jov) zXd<>W`3#Y)PJ;c0V>xKJYOel}@KbAT;_)Mz7$44PDZO^?$o_rk0k&AgxI1G#cb}Ak z0odEt9Ts(_7-ZA>8r765li5>K1d2YB+qUC^G~915)i*V*)&&3exRq8<*h3}KM`S8q zk-P`p@BXVl(;Pf?Isf(xft@zFbiIDm^k0`N?KXu5{+CasDKgM@VySdwtl%$-Vr4*< zAQ?})7!~V2Hn_YF@n#>v`p6BA^GF*4FDM5mpe5>?NqudyLYK|wjr_3@asbrM00Tns zh9^Avuygo0->{k7)YaE1g{ITejNvohaAlsqmR>0`laaJIZSk#{hyi{xUiJ?n0|h96e2siOcJ z=65K3mGZxD16kFr?PdP0gHu@FKo$!9W~1)3u@Okhbx!pU$HnmY2ft>+y>fd3mh&K# zmz}HCA9}Yf45fND&5mPo2Sy>c<$1)y+-{L$Eqbe7mf6+(l!aYg9h6S3M_-bYlUFW^ z&6IL$v1_(`@njtk)zD;M)=_$@S$Q-X*ozcsPaW^ZvUOy=C6Ycc{5v(NZ_;zLkRZNy zqbk_VwJ0huyn5o|D7yo<@} z`8f~a9{J973t=rJAMqioTh!_kh z)wTr66uVcU(8osjWjCF?UgXu~n&ia>Zf1DG7w(Z_(-IE{it=W`F1FnwGCLv9GXJka z&@QxgZefg8;>_DwLdxlR>ULB8$o7?!?z@Fvf4Fi4$_`jlirKoe7d=YEQ{)$SoK#*& z<-5-md3&sX+Rk3yK5bGbQkLoG+c_vnA{)OpsO1zq6Vxa=2H(nscr`_~B{ltd_vC+5 zSl1$Li$N6}US?Op)NDG|O1;y`7$dTwJ)4ut@W?TrS`Mz<#z%t3Lk_74@Cremv{V0( zGmp&BlqCwx$yJKjx#LvaKA+!~ zKda+?K=67N_i#Y5H^pABqBEk^mFh}|F3q*MIhxnhnVp;zt77&7MDe{vvK)&8X`4J& zkXqw6SgVJx{7);3xqMxB$OPG!+5Iwy1}p^F0xh3Hg5<|oESR}#Yw10*h(gj4t|94T zjt9t9(mzG3Stg2R5vgBOW$M8b!c|q`@#%MzFP3&g^W@jl||*#b!w;3t&~8vmVi&k%H5?Y1JB$y`*Zgr`UxP*Qv{} zt5~dvndBMiG~kR6)6u-0IaLuw?;PTXzd6!RbaL3U;FQ_2Z+#k9Eyrn$e1E5==ef(x zX;>-q5~f5!hz5X4mU!fS8V5HX2@Y~P;-)(~`;S01Z23B^5g|6#z5Q~r^V9pZ1l(Vo z&atZc^v3i>-AUS#Rmb#9Gqz`bCLE8_Trp*Y zPGpzbQPv$WbBvwUi)KF@2Mu&`eEr_+7`JK9Z$aF+R_6f!{#eS{U#OBjwnYt0RC+XJ zZTqcS3(QDQ5>h+1lpX~ZUF5pDRX_JW5F2wmxQ`5X7>)e>t6RW&FMA}82!OW8+xSB` zHLjlMJ1Pv?YP?#U2flzHluWsPr+$Td(X6so_F#$I90Z}*m#p171awxJ6`^eK1AJ0@ ztWWhjAL5!lF=Hh3B=4%4SIw!*SZq9b&H=Y85@_oD)>Ml&(h2?x*nO?+ zQH}NCi0&CokeEDtKzF#rS2fV(X5HW7>8-=YB%V?h{k{lv*Cn$r1%*?-qK101a3p|Z z@`WYV#UGf&kMmcwE^DKT>%2$!!)C*p>jG3bglP4GxV^6*mdRbYBL9Y~F zn4dplr)$3ew+2PTW5Ycrq_H@kOr%ukL=2U$O(UA4(b@i&RSoIF@pv+(b5ae_4UX{6 zABEFSlHC$}mWjC;Idrs%>fvQ5l2UFnX*i1C822IbJ50!0<6iOhSciY2c+w(v89r*m zno?ug0Mc?XtUoe8@{6PbB@*Y3%}0A zX3*C%v|qcvVlCPc0MkcpH_s8|61=4k)-*wjDJ-x=iHziu{ zoAAZS>w*tRP0%=WA4*849VG2Tu4;*!F$_qhS?2fxq!Hf{7N*fvQ@0wf$HHa!JZ?O# zn|A`GkE~n(8%Cubu6T=I&nmsM<&Elrxhv8)W#h)Pwq_jV??_sqO1Sc4^Ia}#5;^X| z_Y(Ucp$np7Z(E9V~!CUj8@5529CJ7>zw&1f3HWd|7%71$nAfr0N>kl z0K?^NifO^w>PG4R&tY&zSCI^GnO<4OKZRjRHV92;QP)6^-9mWXAy8eOdR zz!g>V#l(VN7MqyjuZi9Gx`RiYO-p#Y>td&V;H`s-H z?icd{?nI;fv@1~18b8|``n-?;IP_c7Dzw=*klx*6@yb$EE zVr+CCKDS<%>GSzEsYR)`*NJz-#m~?(dDd(3h$_c{OMDS5wZ*@qWpLZ9JtM>kerTQc zH(MA0^MgDs)Gyfcq#}#e0wVXHde$i!oUK4*HcZ=F074-!e5{#) zw87W=xT$0J43YU%iU0c&qV_rfL)c=9Orad1`TP)L=Z0SJ=1oQ01cP#(G1~-Wi#`ng zo2s#4k=Z}nENG1#hcpGD-1}I%3wIuW;K09|beuFvimMvbWw(E*CF#rKzz*}rICfef zXvV8{L7y6n%C|U1h8|2$(plO65z?45Zo)FvXGkw$PPTF9nY29@f`ae4!kMM5CGehX zNs${f9ObN1TBo+h$X@UVEyZvIcqPIXJ9k2zQ;qsf7Q`6JY(ydhN&0&Zl@4SrI84+u z96Hf!Ngp@`XcuKmv~Q1M(K-xD<>qC{jj*^LNuTFi^ZFM>Q5tvfQ`oIJYm=`t*i zUI!!e9|WY5l`wcw(k@_&EKOvTG3UO%d_;}EXsgXnAkjxwi;|8tDz)@#X}(uBIxdJQ zdyz9gXKefX@eb;K_kMe4j{Gu^!JAxw=TiQZXfYJ-{h`?mXR}8m%?&EDN%CpuZdFh0 z8B_>QH-l*uS1wfKP_R=3uf+5-0n|t9i}L*2oR(c4c5Klrvtt%xOCDvkW*ZEEOxi4# zBItQMyH>3EVQvF9g_T2Evc++NLI#^HIWFw0yHB$Xk5v;hD16kB4p-H<<@s`SE`JH$ zpKi~4BkeB}o+nyh)5%|6MmyZc=d}o>e#K@9Z-|^i7gKO(gtCL!c8b-lL2aL=VM3G` z5g@hU(ZkV0n;(B_q&J)+1R`s2dhqUSPqTJ5*pI}xT4eah&s}@VggP8aaRy2|5NH5) zFh+wO##_`Jk+6TJ4JI3gYXPV6-~bWzD4BiC2~Nyav$#mv`vvm!-e(^kPt>(d5wndU zOg|xfg7mLui7Ly{uXjlFy_7?=Zcb4$fTGyvj%3w{-e0m}8~n`I?c>-feuyD6{Cv0< z-{7C0@a0CvHFUP_f%!_~F_GA@;&af9K*Pk#l%t^XuSpYGHukp8=GHp?k?2}PqrGZL z76Lusv9xXSKa;agf>q5MB_+%|11EBSnt%o6&U>3znE2C*{Vx(Qgm8Vu6I1l)EE33Y zpIe?1vR-Tjj%LGY6`t`0Ob2<2%`VCl%Q=N8b7J~kG)q!Y(wol}mdS(*Sh!Equ1qa+ zK7UVltNC@IYm*_>!U})7pqN>xVBkmQ$_Qy4%7Bxy{PT-!U&zz=BsAm*U6Ee~3s;Cn zcgPG)ipn#ee-b@|KA??0jyY{{#WicJp6qnY`2fTKvfH1+2_nsQ7%JTLo~@Y6)r^KM%ZdBk6)Be6&^J!VAY#5g>+i9QT=uMW;GSdl?N+3}VW=H4~ zo41uIcEGaY;n%gw?w%S_?D5?iMw;M<7~*e=9OreJMm~B&+AKl4O)52yLaS0}03-oM zDn;6{b7F;>_{lR9rJ!JdqL@TrmPBk^Gm||WdJkJi-tV*%01`kC0v~3?Rk%?0@T&zb z_irQQuScAIk)o8_pr22jT@DsB(q_Y?Xk)QwS!${{iL>W9#k*}Jq@tD2y?7vP2s}Oq z+}6+tj5>OU$0At5;5dX@hEh%LR;m3y?mVww`8eO?Mv?vgl)1T20K>j@ebdn$KXcDT zf?g70?MX_kehn^bJq$a=>spplsbi=S;?6yl(SPohjWk;6M|P5u!VnBhV)=%QniR#U zoCV`K$%ZOz@x^5gaNX`sC%vRG$;uK#c9wuYA4T-LR7oS(>z<6ZIzcFfKc#QNpfo}r z=6u!GUf9gr>T8*=lm1+F8}0(~A&n6d`Xf7Lpq{{M@BX+m*7yn1fZs_$?$lp(c?6!u zF!=_lGA7jC&xEX)O}&-1Ym?D3l@G_AZF1${Vrp-L@j>6h!07qO0fO0z;YbA%f8i|D zjjCCgac1V9pQUi%f{QSr%_xbebUy_CajY|Ei!yUj0Y$YdxH4;#?_LqNrbeJ$+ zg)Ob&ak}(5$UT)rl-lS970Z;9Zgdmsrqx(I(XX$L8Q3~{hA|D}cB6Ph&aJlzlG?!; zFZOIgbcAeVFtsFNPre}`q#Tk8hAsZvDCmd4z|IvCLy$Yxq_EX zB|B#n!7ZP~)}$JfaqqfSucTJbM>I^#z&v9lYWLl>g&Ol@UiY~Ce2Uh)4fvjLJjv^W z>HV<`$N*%5+WY&s?780=hXZ3?TShZt3N#L(qQvmu=(3H56Ngf`<3mFcoC8RF>p$0* z^k8CQis|aEmE4v!U%*}N3=b+8foD=G)x4j1Je)1FKi?2R!s7znt^#E(ENCDkDA}x6 zhGt4M)EXTqAwMD1A67|YJto{LF_c+fZSKE5hHon#t)^&j^vd!-gXSs>f`WogU~s(o$XhjQMJkce_4S-IxH3o*wY%>l`MEe??kXR8;u-`r7PI zkgC^NNx!~4?~iAF&XJ7I1-`ySVTRO8ybyv*PG2bfh!}TROme-RAJ}wO8|-&!f>3ow zQ+eyq3_dlvR!!^jh5206AUyFLQhKGv-g=%kVT0gT+lDuDERaOzIxN_jZ8wA;ZjO_3 zWYu5@monr?dbpG134uWsZr2C%Bc!CH%UnN6q7zLxTU{J*a3U6*RxeCkm=OPh_LM$+ z_y7e3RnLt821RiF1MJY!q1a($;4c2-0L^7i0jE6w8l+Z(taBxQA!z0CuycMo?c7gtOxo*BA} zli>7l8Rz-V#u(0cq&w2)g6Y%R+KMrSN(EDFp;D&kd1^BTqo`;FT!5XsK18=F5HR8oRMLP3FnHUllIZilqFGARc!?jf&a(k&GpFO#Zy{D2VT z<<)z?6B%F>idw_7`%g_x+}SdnQ9C;Rpz&W!6;{iQG`kY9R3d|Mw0LM~U@YbyvS=O( zeXOI|?84X!-iCHobLDz5bkC~^S#x=^1c|KXJ2CEcCWMO*;E>H)N8aNtH4S!&&j!6l zmJ{dK*UzJ@h^=Rx@Fz$D;Nm)#m9<1mRgY@3i|xTwf&Hwx)lz-OPy(~*YAf&l{(jxa z(czhI{emF>ls1R=n~;{<#@ac-|{55|)d5_VADVhC=Hlms#I@}?N{h8uD^ zgB=rhmvo1Wg(arKpm(A~gB=h+E}Y85#8mHeoTzwGGCP<`FIexRfo>org+LbFUuvEw zlQNJl;A5wGEefM@etWxHGplVs&>|T}vmf)gK*g^k=Vw!!gdWq^nCPD$}Ov;rejbeWe5?(}vn1r~({6=O19rXG@2{QL+-DS`BRL zvepN)R1HN~FwnSPpDDI8)~C^GWKk|MTIHzsxWBkMnr9wt@b!gavs~Dkx`PWD8Attp`Gw2e`?h*HULZHUr9-%PQ`g#7r??syiHQD3;$9vmYbBs9Skle#q zd_?9s`;j^uhReyc*dP*u!AV00w#h9U_)OvkWbY_zG*ePg1cGNY=KO|=+Mxq{-Yy5< ze{W0_o+E^_rc*F95tS_|q=YvBw!RbJD>1FcQB31jnoSs& z@TDmAZX&I01mF1dMHvCvR&h;mse?9XJoQDy-1Fh8V{njqutB-#r|tGY?8ja*Ukc3O zc_EOtdEeSMd?94qXA{z<;rYRouoWR#@3TuA*FeD~WpME=H8m9hmxE!jp-82yQolR+ z{nme>LZ3OMwFI>ZMDz4*f@?fpOBf}w{QeLJjLH##(y*<96$6({V&G>MI9<-ZC$U?T zad9Pf1t6p2;3OF?E0!OTob?e$$mPl4na!3;&z5SPE;M7~;Dofb0psksQo_}1%*M<0 zgs|Bx`p(w7s-|f~*6k0bC~K{jB@`6U#KpyvIqi35&I=Ut`uh6nm1nxTd;;*@0taHL zH#aw#Y~ybg@vmsA+|ml(ZYVErlW< z?E5>9s{om@CONn$cVGw>6nm>J=vC+4 z*mcgum`PjFcWK(oab*19DY`R?*wz$uV7Zn548)40f+KvZb_X=rQZQ*q%W1SCdvbL- z63z43)0<+71xjT*{ClIv(eFrr1oRYDLYChhVAYihD#FBlT~}nwOIuq*&7V->`X;je z&eT!hAOT1@yQ=6k)BCQeH{O$p)_3~D`1&t2l3{L4(n4Qk-cxYB4GutNx_Z|UYX=xW zAYdU#W8q~T~(4-j_oteqE)QOJU5 zMn2DYWlmg7`*}v&!FJNSU*>%Iz31aZZFX~(p=KH0!*h5!at@F+{^f2PW|7=ccjKu|tm5)vYdLF=l_NYC z-AlhADVj(OT)+L&4mN^I{JF4ByGp{w2dVoRN82PYN)C`6tJk(}ecjZ1;?cT2dd&=u z&@{TP@2R$WL0XjEK3#a5Yr=}y(a2&w#QCtvk+MGN7e09ZTWKo~=-1l?T@BrCKXFZz zcsgFejbSrjmcXsnd*)Z0X6NA4JMhKcbv1Fgk^15M z*i;lQg7M2C9|Hb>0zSxQsPqOo06Dx1lG?VmdnI;VmlH~a_9iyj6Y1_^kpq+*1+v)` zz&$(US+?&o>s2uh`ae!jk;OuY{^|5_Ota{)h^Hz> zN=sxKc83=~n(b>A}$2BxP&egFg~;N4PiGD*E}%JAaAHjwiQ~d}*Ah*Gr88hUiuO0dYt64x>wXJvN|&ifdeTJ6w@^=n zLWxjVFZa?t8hK}`ZQA-4`ION!Jq%}ri;)t0el%VZjS*tScdS`f~Svd(kgZBleL;xRngO(6hln?$;e-v5M_X#o$#gg$g?RqZAaOOyP%-*UoIXV zmqs}HeKI|;y5u^&{GIgt1NZ}buaM#l{Qex9ry4%oo;=GJ+l5RY1Q zB@ewTIIpL27*CCO(of4gsGo;!ygluAlgEMSm}2eEZ=DC~D|sm5Qf?!Lr$v`gZ#0Oy z7M>{H;}DIMsyfGUCv&l1_`+FuWV<>?eWI}fT>bv;Ex|>fd+;LQVvJehncQ! z^4=2_Q`#xUcVEuRmp~9qo-i8ofxjU~6dL7mH&SCbIfvl9-`?;m9;?Y*pPfe#rvA%7 zPWnEAxVW76_l344kihr?eYqO-)$d4V+~;TgaKoWwu*$xGZU^einsG4Vzd=_fmEc%tn1MS5(m8FGm31_YfFLad(nVmL#s!M!> zJVQ)~Lg&Whk(eC!0G7X=t7MM0csMtLhMyQ*n(c47SyWv94u}Rqm7?^X1Al*#@yzae z^Ae6^3<5XWr(nc3zOG$z+GnX%BwWd&8-mHr53}8RI|#<3D_%Mm9~Lo0JinKJjjaI$ zGUJHdF=OP%Z|{~dYAW#++^1G5WxYdx;-|Bd zH)NQ3zw2vmAV(4%NC{wDLTk_=n!w(+3&o*JHVRrod+&0^vxO ztzVyPbbHvIpi)W7OY3MlK4s_OH)r9Q1*Sw&4-jSyi9uzYb+slLz06eyhFb`;;QZb} zkLVpF8FG7ou1e!}`u}R%yEarD8ZcUv{?0dIGA`KXz?Pvr9uUX_2* zPAbAveXM7bz}}Th3|)JXFj5^TqJ4;eHRNb?g1wNT+;{v%GESnnZ8D1tpRn3w-((FATt#b&F*%|q!Q2QBhTYTgsU z>tC_pd>yf$#=|HSnzFJGpZcWFanpM&{3|?dBwGY!jXQx|b857x1Krbq+Wk=@TubIn zjZsPoGnbumyoi~0*tf~lT0-M~o?HuBF$*&b3)9dr9Ye>|#NmxYjeR`=ccK|RwZE?1 zrA+MTg{JPmAUd8ZHQG|VsG0?m6VYUagE6Dg5&~Es>{XeS59Y>FOjKHM|13T$`I<`o z-n_uUbt+DFJTZB&9pTboXv;3L9<);E9g}MG_`rh9V(ORHQ>ic2xe^V5o5=i^UPfi3 z0$S09Cn7kW(iVx=dFP|=UzNgIis)0x28l)C6-IJ*`m+r4-?S-QxVG_kdc3p6b7Bt41PGCKhH z%C-r!YHv@YZeKRPbcUDBp~QEr2CbeQuHFcz0o4!C{x$r2oUX^r*J)&(EBeAmiYMsrZVyIdh!iFqIxfP(B*9 zR+?7Ta*#U?jXnj{IuHN)g3*y_N>KGu$Zxel(h?T9z>L@|@gOqgKqq%9G&Wy4pi-pC2@`Fp+! z2l4Y|YmbLMsu1^VZ&~%HlAdTidrfZOs2juxv6SwUr0Z<}m(V*%_2@V|#Q3?_CZ`$= zK0qiKm?@ulTJhi`?#J{xGCgjMDG@9@rqenU!?}3aQhJ%MJO}JN{=g93E_#(zq#8oV zkUlV*wOU>>)J%(3x;)1txMT91?`9a49#??Z(hCV}T$m#`F!?Wj+GNf6J$phgm+SUSo9xKiv1r_3jM3GIdNQQ z=C2ig&%e{A1=c^BIBn5^@ZI&JgXcl0?Zj8KJ#`gRQm^WFLDJl_Nr5|A?6Y#g@fR(% z`6qiiAd11%0I!kzSi5Y7a_T2&B04|DzOd(vUcLl?FDei0MnZ(!w*n)jHz7a)E~*w| zT_c#wJdAsG<2E^u)R!L`>koIIjd2_2rXcbA)2GU)igRS#$B3M6H|u27T|JO4U*wD#a!Uw3eiH5PnR4nz4u1P;1NElj z%NOtI9~e)l$1rX;?vRQ1>cp38nmc~|NI=Ne38==Q#H7Pw(3B+Hclh|}Du(K?nR1V- zjs5e!dBDlAQhpnu{-hnXavj8E~xPBdaMMCZ~`>LBiwFa>}lste)a{j9}Sup-Pt zfSdrj4~2bN{dh}Ls7n@fo#kG2fD82wM@w}GW(i|Z1m`+jyfO|{KR}1Onw~3^728-1 zf%E7adL(5&uBk3Q^r@aXXXW?-`zijJw)llSqdF1g>^61xfmUOW+v@FGEKdF|y^7VC zjGKZw{DZ=uDNUXu*-_J}{oBe|Pt$YOWgJflwG2fpHO4>xUQqcLYLRLl-_O$Oarm<2 zz!4tV84jY=YS}D3JH%wUi1#kjlVe{&A+2_vochb{Jh(dm^(p0zUq~(zw|93gHU7<0 z{u9^m#;o*0T_zZMWtI{~cZIsTlP@XQ#MvI&X%S8hCgqx7LIi>PK9`GV_wQ;W%VBkx zy27!o{unTIpD+K@8P)qndG? zJGKKNxjk{w>&>p;gqqGL$#QhAQOWzNsF2_Wb^pZx!4I;buk^P<8G!L-Ks!9}DS2#E zg@!A5GcoIFu7qlJx=8pT2$CfAHsiR@k;b*IRU?tkSth?@>{f%}m(`a^odRW+B-xTT z0Wv4DiTC3&?j*d^ixmH$J5a@_iu4J^n6B5BW;|jIX+6}{fJe*ccok9DnI@QnW-$Gp zJL6NbR}GZmp;G!${}-93lKgpH%&KqF8}}#5%W;1_)XQ`?=RUBw_ep=?_$Nx~c)MZ; z&K%j8Jsv4mc%9Z)J@8Q78!_$|CR@ZFz}ax;AN?$6JqRRxEXl>*2j;^?>JudKt?2he z8HW13ADgiFZKW{(uglipHRyV1Pc1B$w$=HeYUIc@z)QM$@6~A~#0!3%;w|B4V_Xe1 zI$pHQ#~JezH%D(+2N`SeSNY9D7NLxp5$U`noPfw-BU8Y-Ksa@SO)6c|hM$jbCXVpA zkn5P!DC*8|Cc4^;H5!u7VE@}jH2^m9?OX_KQotOGJM75=;%?=AwH_-=9XBm%pqR*) z+ZA%cTO0^Z@PVc5gqIJ=l$-$ztEJdaywgcdMs;drAoNP^7L4v=?-}=Rc3RXWW z2;~W6^*`oJyId?47o8xf1=75g0BnkX*QwSxGMKVq0MGPhPYEbM%kQEwCf5Nmv95Ie zgu%Bw>1`|fqWpZnfy1ZcH(6J0r9>IP`n9zkEHB^eh?L_+AofFC_R8?W%QfzN$ItbR zYtX&fmg8qv-5r@e$tN81&=pq0voH$H_jZ4YtTF*zW-JflV7fP9_ Date: Wed, 23 Dec 2020 22:32:28 +0300 Subject: [PATCH 16/19] Add regression metrics --- .../model-testing/12 - Test a model.ipynb | 366 ++---------------- source/week-5/model-testing/README.md | 32 ++ 2 files changed, 72 insertions(+), 326 deletions(-) diff --git a/source/week-5/model-testing/12 - Test a model.ipynb b/source/week-5/model-testing/12 - Test a model.ipynb index dd66085..0b2364a 100644 --- a/source/week-5/model-testing/12 - Test a model.ipynb +++ b/source/week-5/model-testing/12 - Test a model.ipynb @@ -5,13 +5,8 @@ "metadata": {}, "source": [ "# Test a trained model\n", - "Once you have trained a model, you can test it with the test data you put aside" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ + "\n", + "Once you have trained a model, you can test it with the test data you put aside.\n", "We will start by rerunning the code from the previous notebook to create a trained model" ] }, @@ -32,19 +27,19 @@ "metadata": {}, "outputs": [ { + "output_type": "execute_result", "data": { "text/plain": [ - "LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None, normalize=False)" + "LinearRegression()" ] }, - "execution_count": 2, "metadata": {}, - "output_type": "execute_result" + "execution_count": 2 } ], "source": [ "# Load our data from the csv file\n", - "delays_df = pd.read_csv('Data/Lots_of_flight_data.csv') \n", + "delays_df = pd.read_csv('./Lots_of_flight_data.csv') \n", "\n", "# Remove rows with null values since those will crash our linear regression model training\n", "delays_df.dropna(inplace=True)\n", @@ -70,12 +65,11 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## Test the model\n", - "Use **Scikitlearn LinearRegression predict** to have our trained model predict values for our test data\n", + "## Prediction\n", "\n", - "We stored our test data in X_Test\n", - "\n", - "We will store the predicted results in y_pred" + "Use **Scikitlearn LinearRegression `predict`** to have our trained model predict values for our test data. \n", + "We stored our test data in `X_Test` \n", + "We will store the predicted results in `y_pred`" ] }, { @@ -93,6 +87,7 @@ "metadata": {}, "outputs": [ { + "output_type": "execute_result", "data": { "text/plain": [ "array([[3.47739078],\n", @@ -104,9 +99,8 @@ " [5.66255414]])" ] }, - "execution_count": 4, "metadata": {}, - "output_type": "execute_result" + "execution_count": 4 } ], "source": [ @@ -114,13 +108,18 @@ ] }, { - "cell_type": "markdown", - "metadata": {}, "source": [ - "When we split our data into training and test data we stored the actual values for each row of test data in the DataFrame y_test\n", + "## Model Testing/Evaluation\n", "\n", - "We can compare the values in y_pred to the value in y_test to get a sense of how accurately our mdoel predicted arrival delays" - ] + "In this step, we will evaluate the model by using the standard metrics available in `sklearn.metrics`. The quality of our model shows how well its predictions match up against actual values. We will assess how well the model performs against the test data using the following standard metrics:\n", + "- Mean Absolute Error\n", + "- Mean Squared Error\n", + "- R^2 score (the coefficient of determination)\n", + "\n", + "The metrics have been discussed further in the README of this section." + ], + "cell_type": "markdown", + "metadata": {} }, { "cell_type": "code", @@ -128,311 +127,26 @@ "metadata": {}, "outputs": [ { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
ARR_DELAY
291483-5.0
98997-12.0
23454-9.0
110802-14.0
49449-20.0
9494414.0
160885-17.0
47572-20.0
16480020.0
62578-9.0
1967425.0
911660.0
171564-9.0
607066.0
240773-6.0
32695-13.0
98399-23.0
167341-11.0
126191-4.0
188715131.0
258610-5.0
215751-20.0
41210-15.0
68090-19.0
1407940.0
178840-14.0
24807121.0
127705.0
9594840.0
172913-13.0
......
20079721.0
361990.0
70402-37.0
285308152.0
201508-2.0
154671-5.0
238535-5.0
133567-9.0
3349-8.0
257254-28.0
106572-19.0
73023-25.0
214699-12.0
274435-7.0
67089-10.0
269917-4.0
16496670.0
275120-12.0
139292-8.0
31106-25.0
27779917.0
293749-7.0
23111435.0
11645-15.0
252520-12.0
209898-20.0
22210-9.0
165727-6.0
260838-33.0
1925460.0
\n", - "

88750 rows × 1 columns

\n", - "
" - ], - "text/plain": [ - " ARR_DELAY\n", - "291483 -5.0\n", - "98997 -12.0\n", - "23454 -9.0\n", - "110802 -14.0\n", - "49449 -20.0\n", - "... ...\n", - "209898 -20.0\n", - "22210 -9.0\n", - "165727 -6.0\n", - "260838 -33.0\n", - "192546 0.0\n", - "\n", - "[88750 rows x 1 columns]" - ] - }, - "execution_count": 5, - "metadata": {}, - "output_type": "execute_result" + "output_type": "stream", + "name": "stdout", + "text": [ + "The Model Performance for the Testing Set\n-----------------------------------------\nMAE is 23.09\nMSE is 2250.44\nR2 score is 0.000096\n" + ] } ], "source": [ - "y_test" + "from sklearn import metrics\n", + "\n", + "mae = metrics.mean_absolute_error(y_test, y_pred)\n", + "mse = metrics.mean_squared_error(y_test, y_pred)\n", + "r2 = metrics.r2_score(y_test, y_pred)\n", + "\n", + "print(\"The Model Performance for the Testing Set\")\n", + "print(\"-----------------------------------------\")\n", + "print(\"MAE is {:.2f}\".format(mae))\n", + "print(\"MSE is {:.2f}\".format(mse))\n", + "print(\"R2 score is {:f}\".format(r2))" ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] } ], "metadata": { @@ -451,9 +165,9 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.6.9" + "version": "3.8.5-final" } }, "nbformat": 4, "nbformat_minor": 2 -} +} \ No newline at end of file diff --git a/source/week-5/model-testing/README.md b/source/week-5/model-testing/README.md index 9fd8956..4ed7631 100644 --- a/source/week-5/model-testing/README.md +++ b/source/week-5/model-testing/README.md @@ -7,6 +7,38 @@ Once a model is built it can be used to predict values. You can provide new valu - [LinearRegression](https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.LinearRegression.html) fits a linear model - [LinearRegression.predict](https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.LinearRegression.html?highlight=linearregression#sklearn.linear_model.LinearRegression.predict) is used to predict outcomes for new data based on the trained linear model + +## Regression Metrics + +Regression metrics are different from classification metrics because we are predicting a continuous quantity. Furthermore, regression typically has simpler evaluation needs than classification. + +### Mean Absolute Error + +Mean absolute error (MAE) is one of the most common metrics that is used to calculate the prediction error of the model. +Prediction error of a single row of data is: +`PredictionError = ActualValue - PredictedValue` +We need to calculate prediction errors for each row of data, get their absolute value and then find the mean of all absolute prediction errors. + +MAE is given by the following formula: +![MeanAbsoluteError](../model-testing/images/MeanAbsoluteError.png) + +A large MAE suggests that your model may have trouble at generalizing well. An MAE of 0 means that our model outputs perfect predictions, but this is unlikely to happen in real scenarios. + +### Mean Squared Error + +Mean squared error (MSE) takes the mean squared difference between the target and predicted values. This value is widely used for many regression problems and larger errors have correspondingly larger squared contributions to the mean error. + +MSE is given by the following formula: +![MeanSquaredError](../model-testing/images/MeanSquaredError.png) +MSE will almost always be bigger than MAE because in MAE residuals contribute linearly to the total error, while in MSE the error grows quadratically with each residual. This is why MSE is used to determine the extent to which the model fits the data because it strongly penalizes the heavy outliers. + +### The Coefficient of Determination (R^2 score) + +`R^2` score determines how well the regression predictions approximate the real data points. +The value of R2 is calculated with the following formula: +![R2Score](../model-testing/images/R2Score.png) +R2 can take values from **0** to **1**. A value of 1 indicates that the regression **predictions perfectly fit** the data. + ## Microsoft Learn Resources Explore related tutorials on [Microsoft Learn](https://learn.microsoft.com/?WT.mc_id=python-c9-niner). From 0a87dd3cc1649ab8ee1236c67cc08315947c24e5 Mon Sep 17 00:00:00 2001 From: Timothy Wangwe Date: Thu, 24 Dec 2020 01:02:11 +0300 Subject: [PATCH 17/19] Model performance conclusion --- source/week-5/model-testing/12 - Test a model.ipynb | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/source/week-5/model-testing/12 - Test a model.ipynb b/source/week-5/model-testing/12 - Test a model.ipynb index 0b2364a..f9f846a 100644 --- a/source/week-5/model-testing/12 - Test a model.ipynb +++ b/source/week-5/model-testing/12 - Test a model.ipynb @@ -147,6 +147,13 @@ "print(\"MSE is {:.2f}\".format(mse))\n", "print(\"R2 score is {:f}\".format(r2))" ] + }, + { + "source": [ + "[Read more](https://www.dataquest.io/blog/understanding-regression-error-metrics/) about the metrics to and try to understand the performance and efficiency of our model." + ], + "cell_type": "markdown", + "metadata": {} } ], "metadata": { From 2b96d28d01ca750a2a59ea61223af3caf5159f5d Mon Sep 17 00:00:00 2001 From: Timothy Wangwe Date: Wed, 6 Jan 2021 12:48:55 +0300 Subject: [PATCH 18/19] Add escape sequences for python print --- source/week-1/print/escape-sequences.py | 11 +++++++++++ 1 file changed, 11 insertions(+) create mode 100644 source/week-1/print/escape-sequences.py diff --git a/source/week-1/print/escape-sequences.py b/source/week-1/print/escape-sequences.py new file mode 100644 index 0000000..6a0935f --- /dev/null +++ b/source/week-1/print/escape-sequences.py @@ -0,0 +1,11 @@ +# \\: backslash +print("A backslash looks like this \\ ") + +# \b: backspace +print("Hide the s in this\b ") + +# \t: tab +print("Name:\tMark") + +# \n: newline +print("Line 1\nLine 2") From 813e41fea233f1f1ad41971478a583e05af49c96 Mon Sep 17 00:00:00 2001 From: Timothy Wangwe Date: Wed, 6 Jan 2021 17:11:32 +0300 Subject: [PATCH 19/19] Deleted readme file to solve conflict --- source/week-4/csv-files-jupyter/README.md | 30 ------------ source/week-4/intro-to-pandas/README.md | 14 ------ source/week-4/jupyter-notebooks/README.md | 20 -------- .../week-4/panda-dataframe-content/README.md | 11 ----- .../week-4/panda-dataframe-querry/README.md | 14 ------ source/week-5/model-testing/README.md | 46 ------------------- source/week-5/numpy-pandas/README.md | 27 ----------- .../README.md | 16 ------- .../README.md | 22 --------- 9 files changed, 200 deletions(-) delete mode 100644 source/week-4/csv-files-jupyter/README.md delete mode 100644 source/week-4/intro-to-pandas/README.md delete mode 100644 source/week-4/jupyter-notebooks/README.md delete mode 100644 source/week-4/panda-dataframe-content/README.md delete mode 100644 source/week-4/panda-dataframe-querry/README.md delete mode 100644 source/week-5/model-testing/README.md delete mode 100644 source/week-5/numpy-pandas/README.md delete mode 100644 source/week-5/removing-and-splitting-dataFrame-columns/README.md delete mode 100644 source/week-5/splitting-test-and-training-data-with-scikit-learn/README.md diff --git a/source/week-4/csv-files-jupyter/README.md b/source/week-4/csv-files-jupyter/README.md deleted file mode 100644 index 56c22d1..0000000 --- a/source/week-4/csv-files-jupyter/README.md +++ /dev/null @@ -1,30 +0,0 @@ -# CSV Files and Jupyter Notebooks - -The so-called **CSV** (Comma Separated Values) format is the most common import and export format for spreadsheets and databases. The CSV format was used for many years prior to attempts to describe the format in a standardized way. - -Python has an in-built csv module which implements classes to read and write tabular data in CSV format. - -```python -# format example ->>> import csv ->>> with open('./airports.csv') as file: -... data = csv.reader(file) -... for row in data: -... print(*row) # * is used to unpack lists -Name City Country -Seattle-Tacoma Seattle USA -Dulles Washington USA -Heathrow London United Kingdom -Schiphol Amsterdam Netherlands -Changi Singapore Singapore -Pearson Toronto Canada -Narita Tokyo Japan -``` - -A this module has a lot more features, checkout [more details](https://docs.python.org/3/library/csv.html). - -## Microsoft Learn Resources - -Explore related tutorials on [Microsoft Learn](https://learn.microsoft.com/?WT.mc_id=python-c9-niner). - -- [Intro to machine learning with Python and Azure Notebooks](https://docs.microsoft.com/learn/paths/intro-to-ml-with-python/?WT.mc_id=python-c9-niner) diff --git a/source/week-4/intro-to-pandas/README.md b/source/week-4/intro-to-pandas/README.md deleted file mode 100644 index 2184315..0000000 --- a/source/week-4/intro-to-pandas/README.md +++ /dev/null @@ -1,14 +0,0 @@ -# Pandas - -[Pandas](https://pandas/pydata.org​) is an open source Python library contains a number of high performance data structures and tools for data analysis. - -## Documentation - -- [Series](https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.Series.html) stores one dimensional arrays -- [DataFrame](https://pandas.pydata.org/pandas-docs/stable/reference/frame.html) stores two dimensional arrays and can contain different datatypes - -## Microsoft Learn Resources - -Explore related tutorials on [Microsoft Learn](https://learn.microsoft.com/?WT.mc_id=python-c9-niner). - -- [Intro to machine learning with Python and Azure Notebooks](https://docs.microsoft.com/learn/paths/intro-to-ml-with-python/?WT.mc_id=python-c9-niner) diff --git a/source/week-4/jupyter-notebooks/README.md b/source/week-4/jupyter-notebooks/README.md deleted file mode 100644 index 3581879..0000000 --- a/source/week-4/jupyter-notebooks/README.md +++ /dev/null @@ -1,20 +0,0 @@ -# Jupyter Notebooks - -Jupyter Notebooks are an open source web application that allows you to create and share Python code. They are frequently used for data science. The code samples in this course are completed using Jupyter Notebooks which have a .ipynb file extension. - -![Jupyter Notebook](https://miro.medium.com/max/2544/1*ezJx8ZEu1Va14iscq_h5Gg.png) - -## Documentation - -- [Jupyter](https://jupyter.org/) to install Jupyter so you can run Jupyter Notebooks locally on your computer -- [Jupyter Notebook viewer](https://nbviewer.jupyter.org/) to view Jupyter Notebooks in this GitHub repository without installing Jupyter -- [Azure Notebooks](https://notebooks.azure.com/) to create a free Azure Notebooks account to run Notebooks in the cloud -- [Create and run a notebook](https://docs.microsoft.com/azure/notebooks/tutorial-create-run-jupyter-notebook?WT.mc_id=python-c9-niner) is a tutorial that walks you through the process of using Azure Notebooks to create a complete Jupyter Notebook that demonstrates linear regression -- [How to create and clone projects](https://docs.microsoft.com/azure/notebooks/create-clone-jupyter-notebooks?WT.mc_id=python-c9-niner) to create a project -- [Manage and configure projects in Azure Notebooks](https://docs.microsoft.com/azure/notebooks/configure-manage-azure-notebooks-projects?WT.mc_id=python-c9-niner) to upload Notebooks to your project - -## Microsoft Learn Resources - -Explore related tutorials on [Microsoft Learn](https://learn.microsoft.com/?WT.mc_id=python-c9-niner). - -- [Intro to machine learning with Python and Azure Notebooks](https://docs.microsoft.com/learn/paths/intro-to-ml-with-python/?WT.mc_id=python-c9-niner) diff --git a/source/week-4/panda-dataframe-content/README.md b/source/week-4/panda-dataframe-content/README.md deleted file mode 100644 index 2662b3c..0000000 --- a/source/week-4/panda-dataframe-content/README.md +++ /dev/null @@ -1,11 +0,0 @@ -# Examining pandas DataFrame contents - -The pandas [DataFrame](https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.html) is a structure for storing two-dimensional tabular data. - -## Common functions - -- [head](https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.head.html) returns the first *n* rows from the DataFrame -- [info](https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.info.html) provides a summary of the DataFrame content including column names, their datatypes, and number of rows containing non-null values -- [describe](https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.describe.html) Generate descriptive statistics include those that summarize the central tendency, dispersion and shape of a dataset’s distribution, excluding *NaN* values -- [tail](https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.tail.html) returns the last *n* rows from the DataFrame -- [shape](https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.shape.html) returns the dimensions of the DataFrame (e.g. number of rows and columns) diff --git a/source/week-4/panda-dataframe-querry/README.md b/source/week-4/panda-dataframe-querry/README.md deleted file mode 100644 index 2f12810..0000000 --- a/source/week-4/panda-dataframe-querry/README.md +++ /dev/null @@ -1,14 +0,0 @@ -# Query a pandas DataFrame - -The Pandas [DataFrame](https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.html) is a structure for storing two-dimensional tabular data. - -## Common properties - -- [loc](https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.loc.html) returns specific rows and columns by specifying column names -- [iloc](https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.iloc.html) returns specific rows and columns by specifying column positions(index) - -## Microsoft Learn Resources - -Explore related tutorials on [Microsoft Learn](https://learn.microsoft.com/?WT.mc_id=python-c9-niner). - -- [Intro to machine learning with Python and Azure Notebooks](https://docs.microsoft.com/learn/paths/intro-to-ml-with-python/?WT.mc_id=python-c9-niner) diff --git a/source/week-5/model-testing/README.md b/source/week-5/model-testing/README.md deleted file mode 100644 index 4ed7631..0000000 --- a/source/week-5/model-testing/README.md +++ /dev/null @@ -1,46 +0,0 @@ -# Testing a model - -Once a model is built it can be used to predict values. You can provide new values to see where it would fall on the spectrum, and test the generated model. - -## Common classes and functions - -- [LinearRegression](https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.LinearRegression.html) fits a linear model -- [LinearRegression.predict](https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.LinearRegression.html?highlight=linearregression#sklearn.linear_model.LinearRegression.predict) is used to predict outcomes for new data based on the trained linear model - - -## Regression Metrics - -Regression metrics are different from classification metrics because we are predicting a continuous quantity. Furthermore, regression typically has simpler evaluation needs than classification. - -### Mean Absolute Error - -Mean absolute error (MAE) is one of the most common metrics that is used to calculate the prediction error of the model. -Prediction error of a single row of data is: -`PredictionError = ActualValue - PredictedValue` -We need to calculate prediction errors for each row of data, get their absolute value and then find the mean of all absolute prediction errors. - -MAE is given by the following formula: -![MeanAbsoluteError](../model-testing/images/MeanAbsoluteError.png) - -A large MAE suggests that your model may have trouble at generalizing well. An MAE of 0 means that our model outputs perfect predictions, but this is unlikely to happen in real scenarios. - -### Mean Squared Error - -Mean squared error (MSE) takes the mean squared difference between the target and predicted values. This value is widely used for many regression problems and larger errors have correspondingly larger squared contributions to the mean error. - -MSE is given by the following formula: -![MeanSquaredError](../model-testing/images/MeanSquaredError.png) -MSE will almost always be bigger than MAE because in MAE residuals contribute linearly to the total error, while in MSE the error grows quadratically with each residual. This is why MSE is used to determine the extent to which the model fits the data because it strongly penalizes the heavy outliers. - -### The Coefficient of Determination (R^2 score) - -`R^2` score determines how well the regression predictions approximate the real data points. -The value of R2 is calculated with the following formula: -![R2Score](../model-testing/images/R2Score.png) -R2 can take values from **0** to **1**. A value of 1 indicates that the regression **predictions perfectly fit** the data. - -## Microsoft Learn Resources - -Explore related tutorials on [Microsoft Learn](https://learn.microsoft.com/?WT.mc_id=python-c9-niner). - -- [Intro to machine learning with Python and Azure Notebooks](https://docs.microsoft.com/learn/paths/intro-to-ml-with-python/?WT.mc_id=python-c9-niner) diff --git a/source/week-5/numpy-pandas/README.md b/source/week-5/numpy-pandas/README.md deleted file mode 100644 index 57acae8..0000000 --- a/source/week-5/numpy-pandas/README.md +++ /dev/null @@ -1,27 +0,0 @@ -# NumPy vs Pandas - -There are numerous libraries available for use for data scientists. NumPy and Pandas are two of the most common. -Some operations may return different data types. You can use the Python function [type](https://docs.python.org/3/library/functions.html#type) to determine the type of an object. - -## NumPy - -[NumPy](https://numpy.org/) is a Python package for scientific computing that includes a array and dictionary type objects for data analysis. - -### Common Object - -- [array](https://numpy.org/doc/1.18/reference/generated/numpy.array.html?highlight=array#numpy.array) creates an N-dimensional array object - -## Pandas - -[pandas](https://pandas.pydata.org/) is a Python package for data analysis that includes a 1 dimensional and 2 dimensional array objects - -### Common Objects - -- [Series](https://pandas.pydata.org/docs/reference/api/pandas.Series.html) stores a one dimensional array -- [DataFrame](https://pandas.pydata.org/docs/reference/frame.html) stores a two-dimensional array - -## Microsoft Learn Resources - -Explore related tutorials on [Microsoft Learn](https://learn.microsoft.com/?WT.mc_id=python-c9-niner). - -- [Intro to machine learning with Python and Azure Notebooks](https://docs.microsoft.com/learn/paths/intro-to-ml-with-python/?WT.mc_id=python-c9-niner) diff --git a/source/week-5/removing-and-splitting-dataFrame-columns/README.md b/source/week-5/removing-and-splitting-dataFrame-columns/README.md deleted file mode 100644 index adeca5c..0000000 --- a/source/week-5/removing-and-splitting-dataFrame-columns/README.md +++ /dev/null @@ -1,16 +0,0 @@ -# Removing and splitting DataFrame columns - -When preparing data for machine learning you may need to remove specific columns from the DataFrame. - -## Common functions - -- [drop](https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.drop.html) deletes specified columns from a DataFrame - - `inplace` - - If `False`, returns a modified copy of the dataframe - - If `True`, it modifies the original dataframe - -## Microsoft Learn Resources - -Explore related tutorials on [Microsoft Learn](https://learn.microsoft.com/?WT.mc_id=python-c9-niner). - -- [Intro to machine learning with Python and Azure Notebooks](https://docs.microsoft.com/learn/paths/intro-to-ml-with-python/?WT.mc_id=python-c9-niner) diff --git a/source/week-5/splitting-test-and-training-data-with-scikit-learn/README.md b/source/week-5/splitting-test-and-training-data-with-scikit-learn/README.md deleted file mode 100644 index 2c58b31..0000000 --- a/source/week-5/splitting-test-and-training-data-with-scikit-learn/README.md +++ /dev/null @@ -1,22 +0,0 @@ -# Splitting test and training data with scikit-learn - -[scikit-learn](https://scikit-learn.org/) is a library of tools for predictive data analysis, which will allow you to prepare your data for machine learning and create models. - -## Common functions - -- [train_test_split](https://scikit-learn.org/stable/modules/generated/sklearn.model_selection.train_test_split.html) splits arrays into random train and test subsets - - Main Parameters - - `X` - Dataframe containing only the features you want to use for **training** - - `y` - Dataframe containing only the features you want to **predict** - - `test_size` - If float, should be between 0.0 and 1.0 and represent the proportion of the dataset to include in the test split. - - Returns - - `X_train` - Includes your the independent variables,these will be used to train the model. If we have specify the `test_size = 0.4` for instance, this means 60% of observations from your complete data will be used to train/fit the model and rest 40% will be used to test the model. - - `X_test` - This is remaining 40% portion of the independent variables from the data which will not be used in the training phase and will be used to make predictions to test the accuracy of the model. - - `y_train` - This is your dependent variable which needs to be predicted by this model, this includes category labels against your independent variables, we need to specify our dependent variable while training/fitting the model. - - `y_test` - This data has category labels for your test data, these labels will be used to test the accuracy between actual and predicted categories. - -## Microsoft Learn Resources - -Explore related tutorials on [Microsoft Learn](https://learn.microsoft.com/?WT.mc_id=python-c9-niner). - -- [Intro to machine learning with Python and Azure Notebooks](https://docs.microsoft.com/learn/paths/intro-to-ml-with-python/?WT.mc_id=python-c9-niner)