diff --git a/02_activities/assignments/assignment_1.ipynb b/02_activities/assignments/assignment_1.ipynb index 45cfc9cd7..909db0898 100644 --- a/02_activities/assignments/assignment_1.ipynb +++ b/02_activities/assignments/assignment_1.ipynb @@ -26,19 +26,35 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "# Write your code below.\n", - "\n" + "\n", + "%load_ext dotenv\n", + "%dotenv " ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "c:\\Users\\NEWPC\\miniconda3\\envs\\dsi_participant\\lib\\site-packages\\dask\\dataframe\\__init__.py:42: FutureWarning: \n", + "Dask dataframe query planning is disabled because dask-expr is not installed.\n", + "\n", + "You can install it with `pip install dask[dataframe]` or `conda install dask`.\n", + "This will raise in a future version.\n", + "\n", + " warnings.warn(msg, FutureWarning)\n" + ] + } + ], "source": [ "import dask.dataframe as dd" ] @@ -58,12 +74,39 @@ "execution_count": 3, "metadata": {}, "outputs": [], + "source": [ + "import pandas as pd\n", + "import os\n", + "import sys\n", + "from glob import glob\n", + "\n", + "sys.path.append(os.getenv('PRICE_DATA'))\n", + "from utils.logger import get_logger \n", + "_logs = get_logger(__name__) " + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2025-09-28 20:53:44,751, 366802870.py, 6, INFO, Reading price data from: ../../05_src/data/prices/\n" + ] + } + ], "source": [ "import os\n", "from glob import glob\n", "\n", "# Write your code below.\n", - "\n" + "PRICE_DATA = os.getenv(\"PRICE_DATA\")\n", + "_logs.info(f\"Reading price data from: {PRICE_DATA}\")\n", + "parquet_files = glob(os.path.join(PRICE_DATA, \"**/*.parquet\"), recursive=True)\n", + "dd_px = dd.read_parquet(parquet_files).set_index(\"ticker\")\n" ] }, { @@ -88,12 +131,338 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 5, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2025-09-28 20:55:00,064, 3115447876.py, 3, INFO, Starting feature engineering for each ticker\n", + "C:\\Users\\NEWPC\\AppData\\Local\\Temp\\ipykernel_19848\\3115447876.py:5: UserWarning: `meta` is not specified, inferred from partial data. Please provide `meta` if the result is unexpected.\n", + " Before: .apply(func)\n", + " After: .apply(func, meta={'x': 'f8', 'y': 'f8'}) for dataframe result\n", + " or: .apply(func, meta=('x', 'f8')) for series result\n", + " dd_shift = dd_px.groupby('ticker', group_keys=False).apply(\n" + ] + } + ], "source": [ "# Write your code below.\n", - "\n" + "\n", + "_logs.info(\" Starting feature engineering for each ticker\")\n", + "\n", + "dd_shift = dd_px.groupby('ticker', group_keys=False).apply(\n", + " lambda x: x.assign(Close_lag_1 = x['Close'].shift(1),\n", + " Adj_Close_lag_1 = x['Adj Close'].shift(1))\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "dd_rets = dd_shift.assign(\n", + " \n", + " hi_lo_range = lambda x: x['High'] - x['Low'],\n", + " Returns = lambda x: x['Close']/x['Close_lag_1'] - 1)" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
DateOpenHighLowCloseAdj CloseVolumesourceYearClose_lag_1Adj_Close_lag_1hi_lo_rangeReturns
ticker
AADR2010-07-2125.10000025.10000024.70000124.70000123.34371442000.0AADR.csv2010NaNNaN0.400000NaN
AADR2010-07-2225.42000025.42000025.12999925.26000023.87296717500.0AADR.csv201024.70000123.3437140.2900010.022672
AADR2010-07-2325.54000125.54000125.08000025.28000123.8918658600.0AADR.csv201025.26000023.8729670.4600010.000792
AADR2010-07-2625.40000025.40000025.21999925.37000123.97692118900.0AADR.csv201025.28000123.8918650.1800000.003560
AADR2010-07-2725.25000025.29000125.20000125.29000123.9013188200.0AADR.csv201025.37000123.9769210.090000-0.003153
..........................................
ZIXI2020-03-264.0600004.5300003.8800004.5100004.5100001668500.0ZIXI.csv20204.0000004.0000000.6500000.127500
ZIXI2020-03-274.4900004.7100004.1000004.6000004.6000001146800.0ZIXI.csv20204.5100004.5100000.6100000.019956
ZIXI2020-03-304.8300004.8700004.4400004.6400004.6400001212000.0ZIXI.csv20204.6000004.6000000.4300000.008696
ZIXI2020-03-314.6000004.6900004.1000004.3100004.3100001057200.0ZIXI.csv20204.6400004.6400000.590000-0.071121
ZIXI2020-04-014.1100004.1600003.8000003.8200003.820000539500.0ZIXI.csv20204.3100004.3100000.360000-0.113689
\n", + "

334040 rows × 13 columns

\n", + "
" + ], + "text/plain": [ + " Date Open High Low Close Adj Close \\\n", + "ticker \n", + "AADR 2010-07-21 25.100000 25.100000 24.700001 24.700001 23.343714 \n", + "AADR 2010-07-22 25.420000 25.420000 25.129999 25.260000 23.872967 \n", + "AADR 2010-07-23 25.540001 25.540001 25.080000 25.280001 23.891865 \n", + "AADR 2010-07-26 25.400000 25.400000 25.219999 25.370001 23.976921 \n", + "AADR 2010-07-27 25.250000 25.290001 25.200001 25.290001 23.901318 \n", + "... ... ... ... ... ... ... \n", + "ZIXI 2020-03-26 4.060000 4.530000 3.880000 4.510000 4.510000 \n", + "ZIXI 2020-03-27 4.490000 4.710000 4.100000 4.600000 4.600000 \n", + "ZIXI 2020-03-30 4.830000 4.870000 4.440000 4.640000 4.640000 \n", + "ZIXI 2020-03-31 4.600000 4.690000 4.100000 4.310000 4.310000 \n", + "ZIXI 2020-04-01 4.110000 4.160000 3.800000 3.820000 3.820000 \n", + "\n", + " Volume source Year Close_lag_1 Adj_Close_lag_1 hi_lo_range \\\n", + "ticker \n", + "AADR 42000.0 AADR.csv 2010 NaN NaN 0.400000 \n", + "AADR 17500.0 AADR.csv 2010 24.700001 23.343714 0.290001 \n", + "AADR 8600.0 AADR.csv 2010 25.260000 23.872967 0.460001 \n", + "AADR 18900.0 AADR.csv 2010 25.280001 23.891865 0.180000 \n", + "AADR 8200.0 AADR.csv 2010 25.370001 23.976921 0.090000 \n", + "... ... ... ... ... ... ... \n", + "ZIXI 1668500.0 ZIXI.csv 2020 4.000000 4.000000 0.650000 \n", + "ZIXI 1146800.0 ZIXI.csv 2020 4.510000 4.510000 0.610000 \n", + "ZIXI 1212000.0 ZIXI.csv 2020 4.600000 4.600000 0.430000 \n", + "ZIXI 1057200.0 ZIXI.csv 2020 4.640000 4.640000 0.590000 \n", + "ZIXI 539500.0 ZIXI.csv 2020 4.310000 4.310000 0.360000 \n", + "\n", + " Returns \n", + "ticker \n", + "AADR NaN \n", + "AADR 0.022672 \n", + "AADR 0.000792 \n", + "AADR 0.003560 \n", + "AADR -0.003153 \n", + "... ... \n", + "ZIXI 0.127500 \n", + "ZIXI 0.019956 \n", + "ZIXI 0.008696 \n", + "ZIXI -0.071121 \n", + "ZIXI -0.113689 \n", + "\n", + "[334040 rows x 13 columns]" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "dd_feat = dd_rets.compute()\n", + "dd_feat" ] }, { @@ -108,12 +477,510 @@ }, { "cell_type": "code", - "execution_count": 25, + "execution_count": 8, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2025-09-28 20:56:00,077, 242924680.py, 3, INFO, Converting Dask DataFrame to pandas and calculating 10-day moving average of returns\n" + ] + } + ], "source": [ "# Write your code below.\n", - "\n" + "\n", + "_logs.info(\"Converting Dask DataFrame to pandas and calculating 10-day moving average of returns\")\n", + "\n", + "dd_feat = dd_rets.compute()\n", + "dd_feat = dd_feat.assign(\n", + " returns_ma_10 = dd_feat['Returns'].rolling(10).mean()\n", + ")\n" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
DateOpenHighLowCloseAdj CloseVolumesourceYearClose_lag_1Adj_Close_lag_1hi_lo_rangeReturnsreturns_ma_10
ticker
AADR2010-07-2125.10000025.10000024.70000124.70000123.34371442000.0AADR.csv2010NaNNaN0.400000NaNNaN
AADR2010-07-2225.42000025.42000025.12999925.26000023.87296717500.0AADR.csv201024.70000123.3437140.2900010.022672NaN
AADR2010-07-2325.54000125.54000125.08000025.28000123.8918658600.0AADR.csv201025.26000023.8729670.4600010.000792NaN
AADR2010-07-2625.40000025.40000025.21999925.37000123.97692118900.0AADR.csv201025.28000123.8918650.1800000.003560NaN
AADR2010-07-2725.25000025.29000125.20000125.29000123.9013188200.0AADR.csv201025.37000123.9769210.090000-0.003153NaN
AADR2010-07-2825.25000025.29000125.12000125.20000123.8162564900.0AADR.csv201025.29000123.9013180.170000-0.003559NaN
AADR2010-07-2925.29999925.29999925.02000025.02000023.6461391200.0AADR.csv201025.20000123.8162560.279999-0.007143NaN
AADR2010-07-3024.99000025.10000024.99000025.10000023.721750600.0AADR.csv201025.02000023.6461390.1100010.003197NaN
AADR2010-08-0225.70000125.70999925.44000125.62000124.2131967000.0AADR.csv201025.10000023.7217500.2699990.020717NaN
AADR2010-08-0325.75000025.86000125.75000025.83000024.41166111200.0AADR.csv201025.62000124.2131960.1100010.008197NaN
AADR2010-08-0425.86000125.86000125.86000125.86000124.440016500.0AADR.csv201025.83000024.4116610.0000000.0011610.004644
AADR2010-08-0525.91000025.91000025.91000025.91000024.487267500.0AADR.csv201025.86000124.4400160.0000000.0019330.002570
AADR2010-08-0625.30999925.70999925.30999925.70999924.298254700.0AADR.csv201025.91000024.4872670.400000-0.0077190.001719
AADR2010-08-0926.00000026.23000026.00000026.05999924.6290365500.0AADR.csv201025.70999924.2982540.2300000.0136130.002725
AADR2010-08-1026.08000026.08000026.08000026.08000024.647932500.0AADR.csv201026.05999924.6290360.0000000.0007670.003117
AADR2010-08-1125.17000025.17000025.17000025.17000023.787901100.0AADR.csv201026.08000024.6479320.000000-0.034893-0.000017
AADR2010-08-1225.04000125.04000125.04000125.04000123.6650431000.0AADR.csv201025.17000023.7879010.000000-0.0051650.000181
AADR2010-08-1325.17000025.17000025.17000025.17000023.787901100.0AADR.csv201025.04000123.6650430.0000000.0051920.000380
AADR2010-08-1625.28000125.28000125.24000025.24000023.8540633000.0AADR.csv201025.17000023.7879010.0400010.002781-0.001413
AADR2010-08-1725.57000025.75000025.57000025.75000024.3360566500.0AADR.csv201025.24000023.8540630.1800000.020206-0.000212
\n", + "
" + ], + "text/plain": [ + " Date Open High Low Close Adj Close \\\n", + "ticker \n", + "AADR 2010-07-21 25.100000 25.100000 24.700001 24.700001 23.343714 \n", + "AADR 2010-07-22 25.420000 25.420000 25.129999 25.260000 23.872967 \n", + "AADR 2010-07-23 25.540001 25.540001 25.080000 25.280001 23.891865 \n", + "AADR 2010-07-26 25.400000 25.400000 25.219999 25.370001 23.976921 \n", + "AADR 2010-07-27 25.250000 25.290001 25.200001 25.290001 23.901318 \n", + "AADR 2010-07-28 25.250000 25.290001 25.120001 25.200001 23.816256 \n", + "AADR 2010-07-29 25.299999 25.299999 25.020000 25.020000 23.646139 \n", + "AADR 2010-07-30 24.990000 25.100000 24.990000 25.100000 23.721750 \n", + "AADR 2010-08-02 25.700001 25.709999 25.440001 25.620001 24.213196 \n", + "AADR 2010-08-03 25.750000 25.860001 25.750000 25.830000 24.411661 \n", + "AADR 2010-08-04 25.860001 25.860001 25.860001 25.860001 24.440016 \n", + "AADR 2010-08-05 25.910000 25.910000 25.910000 25.910000 24.487267 \n", + "AADR 2010-08-06 25.309999 25.709999 25.309999 25.709999 24.298254 \n", + "AADR 2010-08-09 26.000000 26.230000 26.000000 26.059999 24.629036 \n", + "AADR 2010-08-10 26.080000 26.080000 26.080000 26.080000 24.647932 \n", + "AADR 2010-08-11 25.170000 25.170000 25.170000 25.170000 23.787901 \n", + "AADR 2010-08-12 25.040001 25.040001 25.040001 25.040001 23.665043 \n", + "AADR 2010-08-13 25.170000 25.170000 25.170000 25.170000 23.787901 \n", + "AADR 2010-08-16 25.280001 25.280001 25.240000 25.240000 23.854063 \n", + "AADR 2010-08-17 25.570000 25.750000 25.570000 25.750000 24.336056 \n", + "\n", + " Volume source Year Close_lag_1 Adj_Close_lag_1 hi_lo_range \\\n", + "ticker \n", + "AADR 42000.0 AADR.csv 2010 NaN NaN 0.400000 \n", + "AADR 17500.0 AADR.csv 2010 24.700001 23.343714 0.290001 \n", + "AADR 8600.0 AADR.csv 2010 25.260000 23.872967 0.460001 \n", + "AADR 18900.0 AADR.csv 2010 25.280001 23.891865 0.180000 \n", + "AADR 8200.0 AADR.csv 2010 25.370001 23.976921 0.090000 \n", + "AADR 4900.0 AADR.csv 2010 25.290001 23.901318 0.170000 \n", + "AADR 1200.0 AADR.csv 2010 25.200001 23.816256 0.279999 \n", + "AADR 600.0 AADR.csv 2010 25.020000 23.646139 0.110001 \n", + "AADR 7000.0 AADR.csv 2010 25.100000 23.721750 0.269999 \n", + "AADR 11200.0 AADR.csv 2010 25.620001 24.213196 0.110001 \n", + "AADR 500.0 AADR.csv 2010 25.830000 24.411661 0.000000 \n", + "AADR 500.0 AADR.csv 2010 25.860001 24.440016 0.000000 \n", + "AADR 700.0 AADR.csv 2010 25.910000 24.487267 0.400000 \n", + "AADR 5500.0 AADR.csv 2010 25.709999 24.298254 0.230000 \n", + "AADR 500.0 AADR.csv 2010 26.059999 24.629036 0.000000 \n", + "AADR 100.0 AADR.csv 2010 26.080000 24.647932 0.000000 \n", + "AADR 1000.0 AADR.csv 2010 25.170000 23.787901 0.000000 \n", + "AADR 100.0 AADR.csv 2010 25.040001 23.665043 0.000000 \n", + "AADR 3000.0 AADR.csv 2010 25.170000 23.787901 0.040001 \n", + "AADR 6500.0 AADR.csv 2010 25.240000 23.854063 0.180000 \n", + "\n", + " Returns returns_ma_10 \n", + "ticker \n", + "AADR NaN NaN \n", + "AADR 0.022672 NaN \n", + "AADR 0.000792 NaN \n", + "AADR 0.003560 NaN \n", + "AADR -0.003153 NaN \n", + "AADR -0.003559 NaN \n", + "AADR -0.007143 NaN \n", + "AADR 0.003197 NaN \n", + "AADR 0.020717 NaN \n", + "AADR 0.008197 NaN \n", + "AADR 0.001161 0.004644 \n", + "AADR 0.001933 0.002570 \n", + "AADR -0.007719 0.001719 \n", + "AADR 0.013613 0.002725 \n", + "AADR 0.000767 0.003117 \n", + "AADR -0.034893 -0.000017 \n", + "AADR -0.005165 0.000181 \n", + "AADR 0.005192 0.000380 \n", + "AADR 0.002781 -0.001413 \n", + "AADR 0.020206 -0.000212 " + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "dd_feat.head(20)\n" ] }, { @@ -128,6 +995,15 @@ "(1 pt)" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The choice depends largely on your goals and the size of yout dataset. If you are working with small to medium-sized dat, converting to pandas is generally fine. However, for large, or extremely large datasets, switching to pandas can be memory-intensive and slow.\n", + "\n", + "In many cases, it is better to stick with Dask, specially when you are dealing with large scale data where computation are likely to be memory-expensive." + ] + }, { "cell_type": "markdown", "metadata": {}, @@ -165,7 +1041,7 @@ ], "metadata": { "kernelspec": { - "display_name": "env", + "display_name": "dsi_participant", "language": "python", "name": "python3" }, @@ -179,7 +1055,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.0" + "version": "3.9.19" } }, "nbformat": 4,