diff --git a/activitysim/activitysim.py b/activitysim/activitysim.py index 9c6210ac23..6148e20ab3 100644 --- a/activitysim/activitysim.py +++ b/activitysim/activitysim.py @@ -110,6 +110,7 @@ def simple_simulate(choosers, alternatives, spec, print "Failed with DataFrame eval:\n%s" % expr raise e vars[expr] = s + vars[expr] = vars[expr].astype('float') # explicit cast model_design = pd.DataFrame(vars, index=df.index) df = random_rows(model_design, min(100000, len(model_design)))\ diff --git a/example/configs/workplace_location.csv b/example/configs/workplace_location.csv index 81e4f4de00..af7b3ce918 100644 --- a/example/configs/workplace_location.csv +++ b/example/configs/workplace_location.csv @@ -1 +1 @@ -Description,Expression,Alt "Distance, piecewise linear from 0 to 1 miles",@df.distance.clip(1),-0.8428 "Distance, piecewise linear from 1 to 2 miles","@(df.distance-1).clip(0,1)",-0.3104 "Distance, piecewise linear from 2 to 5 miles","@(df.distance-2).clip(0,3)",-0.3783 "Distance, piecewise linear from 5 to 15 miles","@(df.distance-5).clip(0,10)",-0.1285 "Distance, piecewise linear for 15+ miles",@(df.distance-15.0).clip(0),-0.0917 "Distance 0 to 5 mi, high and very high income",@(df.income_segment>=3)*df.distance.clip(upper=5),0.15 "Distance 5+ mi, high and very high income",@(df.income_segment>=3)*(df.distance-5).clip(0),0.02 "Size variable full-time worker, low income",(income_segment==1)*lnWorkLowDcSizeAlt,1 "Size variable full-time worker, medium income",(income_segment==2)*lnWorkMedDcSizeAlt,1 "Size variable full-time worker, high income",(income_segment==3)*lnWorkHighDcSizeAlt,1 "Size variable full-time worker, very high income",(income_segment==4)*lnWorkVeryHighDcSizeAlt,1 "No attractions full-time worker, low income",(income_segment==1)*lnWorkLowDcSizeAlt==0,-999 "No attractions full-time worker, medium income",(income_segment==2)*lnWorkMedDcSizeAlt==0,-999 "No attractions full-time worker, high income",(income_segment==3)*lnWorkHighDcSizeAlt==0,-999 "No attractions full-time worker, very high income",(income_segment==4)*lnWorkVeryHighDcSizeAlt==0,-999 Mode choice logsum,mcLogsum,0.3 \ No newline at end of file +Description,Expression,Alt "Distance, piecewise linear from 0 to 1 miles",@df.distance.clip(1),-0.8428 "Distance, piecewise linear from 1 to 2 miles","@(df.distance-1).clip(0,1)",-0.3104 "Distance, piecewise linear from 2 to 5 miles","@(df.distance-2).clip(0,3)",-0.3783 "Distance, piecewise linear from 5 to 15 miles","@(df.distance-5).clip(0,10)",-0.1285 "Distance, piecewise linear for 15+ miles",@(df.distance-15.0).clip(0),-0.0917 "Distance 0 to 5 mi, high and very high income",@(df.income_segment>=3)*df.distance.clip(upper=5),0.15 "Distance 5+ mi, high and very high income",@(df.income_segment>=3)*(df.distance-5).clip(0),0.02 "Size variable full-time worker, low income",@(df.income_segment==1)*df.size_low,1 "Size variable full-time worker, medium income",@(df.income_segment==2)*df.size_med,1 "Size variable full-time worker, high income",@(df.income_segment==3)*df.size_high,1 "Size variable full-time worker, very high income",@(df.income_segment==4)*df.size_veryhigh,1 "No attractions full-time worker, low income",@(df.income_segment==1)&(df.size_low==0),-999 "No attractions full-time worker, medium income",@(df.income_segment==2)&(df.size_med==0),-999 "No attractions full-time worker, high income",@(df.income_segment==3)&(df.size_high==0),-999 "No attractions full-time worker, very high income",@(df.income_segment==4)&(df.size_veryhigh==0),-999 Mode choice logsum,mcLogsum,0.3 \ No newline at end of file diff --git a/example/configs/workplace_location_size_terms.csv b/example/configs/workplace_location_size_terms.csv new file mode 100644 index 0000000000..f9b7da433a --- /dev/null +++ b/example/configs/workplace_location_size_terms.csv @@ -0,0 +1 @@ +purpose,segment,TOTHH,RETEMPN,FPSEMPN,HEREMPN,OTHEMPN,AGREMPN,MWTEMPN,AGE0519,HSENROLL,COLLFTE,COLLPTE work,low,0,0.129,0.193,0.383,0.12,0.01,0.164,0,0,0,0 work,med,0,0.12,0.197,0.325,0.139,0.008,0.21,0,0,0,0 work,high,0,0.11,0.207,0.284,0.154,0.006,0.239,0,0,0,0 work,veryhigh,0,0.093,0.27,0.241,0.146,0.004,0.246,0,0,0,0 university,university,0,0,0,0,0,0,0,0,0,0.592,0.408 school,grade,0,0,0,0,0,0,0,1,0,0,0 school,high,0,0,0,0,0,0,0,0,1,0,0 escort,kids,0,0.225,0,0.144,0,0,0,0.465,0.166,0,0 escort,no kids,0,0.225,0,0.144,0,0,0,0.465,0.166,0,0 shopping,shopping,0,1,0,0,0,0,0,0,0,0,0 eatOut,eatOut,0,0.742,0,0.258,0,0,0,0,0,0,0 othMaint,othMaint,0,0.482,0,0.518,0,0,0,0,0,0,0 social,social,0,0.522,0,0.478,0,0,0,0,0,0,0 othDiscr,othDiscr,0.252,0.212,0,0.272,0.165,0,0,0,0.098,0,0 atwork,atwork,0,0.742,0,0.258,0,0,0,0,0,0,0 \ No newline at end of file diff --git a/example/models.py b/example/models.py index 792f485c61..941d9973c9 100644 --- a/example/models.py +++ b/example/models.py @@ -37,7 +37,37 @@ def auto_ownership_spec(): @sim.injectable() def workplace_location_spec(): f = os.path.join('configs', "workplace_location.csv") - return asim.read_model_spec(f).head(7) + return asim.read_model_spec(f).head(15) + + +@sim.table() +def workplace_size_spec(): + f = os.path.join('configs', 'workplace_location_size_terms.csv') + return pd.read_csv(f) + + +@sim.table() +def workplace_size_terms(land_use, workplace_size_spec): + """ + This method takes the land use data and multiplies various columns of the + land use data by coefficients from the workplace_size_spec table in order + to yield a size term (a linear combination of land use variables) with + specified coefficients for different segments (like low, med, and high + income) + """ + land_use = land_use.to_frame() + df = workplace_size_spec.to_frame().query("purpose == 'work'") + df = df.drop("purpose", axis=1).set_index("segment") + new_df = {} + for index, row in df.iterrows(): + missing = row[~row.index.isin(land_use.columns)] + if len(missing) > 0: + print "WARNING: missing columns in land use\n", missing.index + row = row[row.index.isin(land_use.columns)] + sparse = land_use[list(row.index)] + new_df["size_"+index] = np.dot(sparse.as_matrix(), row.values) + new_df = pd.DataFrame(new_df, index=land_use.index) + return new_df @sim.model() @@ -67,10 +97,11 @@ def workplace_location_simulate(persons, households, zones, workplace_location_spec, - distance_matrix): + distance_matrix, + workplace_size_terms): choosers = sim.merge_tables(persons.name, tables=[persons, households]) - alternatives = zones.to_frame() + alternatives = zones.to_frame().join(workplace_size_terms.to_frame()) skims = { "distance": distance_matrix @@ -88,4 +119,24 @@ def workplace_location_simulate(persons, print "Describe of hoices:\n", choices.describe() sim.add_column("persons", "workplace_taz", choices) - return model_design \ No newline at end of file + return model_design + + +@sim.column("land_use") +def total_households(land_use): + return land_use.local.TOTHH + + +@sim.column("land_use") +def total_employment(land_use): + return land_use.local.TOTEMP + + +@sim.column("land_use") +def total_acres(land_use): + return land_use.local.TOTACRE + + +@sim.column("land_use") +def county_id(land_use): + return land_use.local.COUNTY \ No newline at end of file diff --git a/notebooks/data_mover.ipynb b/notebooks/data_mover.ipynb index de39900c43..1c065abf11 100644 --- a/notebooks/data_mover.ipynb +++ b/notebooks/data_mover.ipynb @@ -1,7 +1,7 @@ { "metadata": { "name": "", - "signature": "sha256:d62442075d195d4893cdd65305dac0932aeb18464bb30a1d8aac6a89ad987ef1" + "signature": "sha256:afbc3e7040dd9e4a5b21433063f13a6a8abfcc04bcfc6574e7e43376c257cd33" }, "nbformat": 3, "nbformat_minor": 0, @@ -43,10 +43,6 @@ "col_map = {\n", " \"HHID\": \"household_id\",\n", " \"AGE\": \"age\",\n", - " \"TOTHH\": \"total_households\",\n", - " \"TOTEMP\": \"total_employment\",\n", - " \"TOTACRE\": \"total_acres\",\n", - " \"COUNTY\": \"county_id\",\n", " \"hworkers\": \"workers\",\n", " \"HINC\": \"income\"\n", "}" diff --git a/notebooks/simulation.ipynb b/notebooks/simulation.ipynb index cedc75a91c..1643e621dd 100644 --- a/notebooks/simulation.ipynb +++ b/notebooks/simulation.ipynb @@ -1,7 +1,7 @@ { "metadata": { "name": "", - "signature": "sha256:648926b05228f8f33eb77a9ff1df6debcba506d63e4e15cd07e69d3071084fca" + "signature": "sha256:68dd59fb87c331bcb79d97a97557ee2ee411309bd3a85e58d6bee4b169b221df" }, "nbformat": 3, "nbformat_minor": 0, @@ -46,16 +46,16 @@ "output_type": "stream", "stream": "stdout", "text": [ - "count 258248.000000\n", - "mean 733.971489\n", - "std 416.825609\n", + "count 258078.000000\n", + "mean 718.510997\n", + "std 422.975764\n", "min 1.000000\n", - "25% 393.000000\n", - "50% 743.000000\n", - "75% 1105.000000\n", + "25% 352.000000\n", + "50% 719.000000\n", + "75% 1083.000000\n", "max 1454.000000\n", "Name: TAZ, dtype: float64\n", - "Time to execute model 'workplace_location_simulate': 24.48s" + "Time to execute model 'workplace_location_simulate': 35.33s" ] }, { @@ -63,7 +63,7 @@ "stream": "stdout", "text": [ "\n", - "Total time to execute: 24.48s\n" + "Total time to execute: 35.34s\n" ] } ], @@ -90,13 +90,13 @@ "output_type": "stream", "stream": "stdout", "text": [ - "cars2 48234\n", - "cars3 38044\n", - "cars1 7516\n", - "cars4 5021\n", - "cars0 1185\n", + "cars2 47959\n", + "cars3 38186\n", + "cars1 7528\n", + "cars4 5123\n", + "cars0 1204\n", "dtype: int64\n", - "Time to execute model 'auto_ownership_simulate': 3.87s" + "Time to execute model 'auto_ownership_simulate': 4.62s" ] }, { @@ -104,7 +104,7 @@ "stream": "stdout", "text": [ "\n", - "Total time to execute: 3.87s\n" + "Total time to execute: 4.62s\n" ] } ], @@ -128,8 +128,8 @@ " \n", " DISTRICT\n", " SD\n", - " county_id\n", - " total_households\n", + " COUNTY\n", + " TOTHH\n", " HHPOP\n", " TOTPOP\n", " EMPRES\n", @@ -137,16 +137,16 @@ " MFDU\n", " HHINCQ1\n", " ...\n", - " COLLPTE\n", - " TOPOLOGY\n", - " TERMINAL\n", - " ZERO\n", " hhlds\n", " sftaz\n", " gqpop\n", " employment_density\n", - " household_density\n", + " total_acres\n", + " county_id\n", " density_index\n", + " household_density\n", + " total_households\n", + " total_employment\n", " \n", " \n", " \n", @@ -166,13 +166,13 @@ " 1454.000000\n", " 1454.000000\n", " 1454.000000\n", - " 1454\n", - " 1454.000000\n", - " 1454.000000\n", - " 1454.000000\n", " 1454.000000\n", + " 1454.000000\n", " 1454.000000\n", " 1453.000000\n", + " 1454.000000\n", + " 1454.000000\n", + " 1454.000000\n", " \n", " \n", " mean\n", @@ -187,16 +187,16 @@ " 670.889959\n", " 508.134801\n", " ...\n", - " 166.744054\n", - " 2.063274\n", - " 1.630505\n", - " 0\n", " 1793.688446\n", " 727.500000\n", " 101.570151\n", " 9.596395\n", - " 6.008186\n", + " 3146.071457\n", + " 3.835626\n", " 2.279554\n", + " 6.008186\n", + " 1793.688446\n", + " 2247.736589\n", " \n", " \n", " std\n", @@ -211,16 +211,16 @@ " 717.261660\n", " 378.753528\n", " ...\n", - " 1234.717238\n", - " 0.926842\n", - " 0.879441\n", - " 0\n", " 961.021405\n", " 419.877958\n", " 393.886676\n", " 45.067313\n", - " 8.565908\n", + " 16945.908840\n", + " 2.040153\n", " 3.945717\n", + " 8.565908\n", + " 961.021405\n", + " 3538.356220\n", " \n", " \n", " min\n", @@ -237,14 +237,14 @@ " ...\n", " 0.000000\n", " 1.000000\n", - " 0.904320\n", - " 0\n", - " 0.000000\n", - " 1.000000\n", " -1.000000\n", " 0.000000\n", + " 13.000000\n", + " 1.000000\n", " 0.000000\n", " 0.000000\n", + " 0.000000\n", + " 0.000000\n", " \n", " \n", " 25%\n", @@ -259,16 +259,16 @@ " 144.500000\n", " 257.000000\n", " ...\n", - " 0.000000\n", - " 1.000000\n", - " 1.167372\n", - " 0\n", " 1200.250000\n", " 364.250000\n", " 5.000000\n", " 0.877829\n", - " 1.910701\n", + " 230.000000\n", + " 3.000000\n", " 0.550232\n", + " 1.910701\n", + " 1200.250000\n", + " 482.000000\n", " \n", " \n", " 50%\n", @@ -283,16 +283,16 @@ " 460.000000\n", " 434.000000\n", " ...\n", - " 0.000000\n", - " 2.000000\n", - " 1.323075\n", - " 0\n", " 1681.500000\n", " 727.500000\n", " 18.000000\n", " 2.158701\n", - " 3.939122\n", + " 397.000000\n", + " 4.000000\n", " 1.289224\n", + " 3.939122\n", + " 1681.500000\n", + " 1005.500000\n", " \n", " \n", " 75%\n", @@ -307,16 +307,16 @@ " 907.750000\n", " 674.750000\n", " ...\n", - " 0.000000\n", - " 3.000000\n", - " 1.632443\n", - " 0\n", " 2259.750000\n", " 1090.750000\n", " 71.000000\n", " 5.492696\n", - " 6.693238\n", + " 883.500000\n", + " 5.000000\n", " 2.337577\n", + " 6.693238\n", + " 2259.750000\n", + " 2215.750000\n", " \n", " \n", " max\n", @@ -331,35 +331,35 @@ " 4920.000000\n", " 3754.000000\n", " ...\n", - " 19570.523440\n", - " 3.000000\n", - " 7.310200\n", - " 0\n", " 12542.000000\n", " 1454.000000\n", " 7810.000000\n", " 877.564767\n", - " 90.891304\n", + " 372520.000000\n", + " 9.000000\n", " 46.360371\n", + " 90.891304\n", + " 12542.000000\n", + " 37950.000000\n", " \n", " \n", "\n", - "

8 rows \u00d7 44 columns

\n", + "

8 rows \u00d7 48 columns

\n", "" ], "metadata": {}, "output_type": "pyout", "prompt_number": 4, "text": [ - " DISTRICT SD county_id total_households HHPOP \\\n", - "count 1454.000000 1454.000000 1454.000000 1454.000000 1454.000000 \n", - "mean 14.908528 14.908528 3.835626 1793.688446 4816.408528 \n", - "std 8.701078 8.701078 2.040153 961.021405 2686.029808 \n", - "min 1.000000 1.000000 1.000000 0.000000 0.000000 \n", - "25% 8.000000 8.000000 3.000000 1200.250000 3288.250000 \n", - "50% 15.000000 15.000000 4.000000 1681.500000 4504.500000 \n", - "75% 20.750000 20.750000 5.000000 2259.750000 6033.750000 \n", - "max 34.000000 34.000000 9.000000 12542.000000 39671.000000 \n", + " DISTRICT SD COUNTY TOTHH HHPOP \\\n", + "count 1454.000000 1454.000000 1454.000000 1454.000000 1454.000000 \n", + "mean 14.908528 14.908528 3.835626 1793.688446 4816.408528 \n", + "std 8.701078 8.701078 2.040153 961.021405 2686.029808 \n", + "min 1.000000 1.000000 1.000000 0.000000 0.000000 \n", + "25% 8.000000 8.000000 3.000000 1200.250000 3288.250000 \n", + "50% 15.000000 15.000000 4.000000 1681.500000 4504.500000 \n", + "75% 20.750000 20.750000 5.000000 2259.750000 6033.750000 \n", + "max 34.000000 34.000000 9.000000 12542.000000 39671.000000 \n", "\n", " TOTPOP EMPRES SFDU MFDU HHINCQ1 \\\n", "count 1454.000000 1454.000000 1454.000000 1454.000000 1454.000000 \n", @@ -371,37 +371,37 @@ "75% 6098.500000 2735.500000 1496.000000 907.750000 674.750000 \n", "max 40020.000000 16799.000000 12413.000000 4920.000000 3754.000000 \n", "\n", - " ... COLLPTE TOPOLOGY TERMINAL ZERO \\\n", - "count ... 1454.000000 1454.000000 1454.000000 1454 \n", - "mean ... 166.744054 2.063274 1.630505 0 \n", - "std ... 1234.717238 0.926842 0.879441 0 \n", - "min ... 0.000000 1.000000 0.904320 0 \n", - "25% ... 0.000000 1.000000 1.167372 0 \n", - "50% ... 0.000000 2.000000 1.323075 0 \n", - "75% ... 0.000000 3.000000 1.632443 0 \n", - "max ... 19570.523440 3.000000 7.310200 0 \n", + " ... hhlds sftaz gqpop \\\n", + "count ... 1454.000000 1454.000000 1454.000000 \n", + "mean ... 1793.688446 727.500000 101.570151 \n", + "std ... 961.021405 419.877958 393.886676 \n", + "min ... 0.000000 1.000000 -1.000000 \n", + "25% ... 1200.250000 364.250000 5.000000 \n", + "50% ... 1681.500000 727.500000 18.000000 \n", + "75% ... 2259.750000 1090.750000 71.000000 \n", + "max ... 12542.000000 1454.000000 7810.000000 \n", "\n", - " hhlds sftaz gqpop employment_density \\\n", - "count 1454.000000 1454.000000 1454.000000 1454.000000 \n", - "mean 1793.688446 727.500000 101.570151 9.596395 \n", - "std 961.021405 419.877958 393.886676 45.067313 \n", - "min 0.000000 1.000000 -1.000000 0.000000 \n", - "25% 1200.250000 364.250000 5.000000 0.877829 \n", - "50% 1681.500000 727.500000 18.000000 2.158701 \n", - "75% 2259.750000 1090.750000 71.000000 5.492696 \n", - "max 12542.000000 1454.000000 7810.000000 877.564767 \n", + " employment_density total_acres county_id density_index \\\n", + "count 1454.000000 1454.000000 1454.000000 1453.000000 \n", + "mean 9.596395 3146.071457 3.835626 2.279554 \n", + "std 45.067313 16945.908840 2.040153 3.945717 \n", + "min 0.000000 13.000000 1.000000 0.000000 \n", + "25% 0.877829 230.000000 3.000000 0.550232 \n", + "50% 2.158701 397.000000 4.000000 1.289224 \n", + "75% 5.492696 883.500000 5.000000 2.337577 \n", + "max 877.564767 372520.000000 9.000000 46.360371 \n", "\n", - " household_density density_index \n", - "count 1454.000000 1453.000000 \n", - "mean 6.008186 2.279554 \n", - "std 8.565908 3.945717 \n", - "min 0.000000 0.000000 \n", - "25% 1.910701 0.550232 \n", - "50% 3.939122 1.289224 \n", - "75% 6.693238 2.337577 \n", - "max 90.891304 46.360371 \n", + " household_density total_households total_employment \n", + "count 1454.000000 1454.000000 1454.000000 \n", + "mean 6.008186 1793.688446 2247.736589 \n", + "std 8.565908 961.021405 3538.356220 \n", + "min 0.000000 0.000000 0.000000 \n", + "25% 1.910701 1200.250000 482.000000 \n", + "50% 3.939122 1681.500000 1005.500000 \n", + "75% 6.693238 2259.750000 2215.750000 \n", + "max 90.891304 12542.000000 37950.000000 \n", "\n", - "[8 rows x 44 columns]" + "[8 rows x 48 columns]" ] } ], @@ -450,8 +450,8 @@ " \n", " count\n", " 100000.000000\n", - " 100000.00000\n", - " 100000.00000\n", + " 100000.000000\n", + " 100000.000000\n", " 100000.000000\n", " 100000.000000\n", " 100000.000000\n", @@ -461,7 +461,7 @@ " 100000.000000\n", " ...\n", " 100000.000000\n", - " 100000.00000\n", + " 100000.000000\n", " 100000.000000\n", " 100000.000000\n", " 100000.000000\n", @@ -473,57 +473,57 @@ " \n", " \n", " mean\n", - " 751.621990\n", - " 4924086.59391\n", - " 2168.63101\n", - " 77754.054140\n", - " 2.582480\n", - " 2.644130\n", - " 0.075830\n", - " 0.466650\n", - " 3.535230\n", - " 1.895440\n", + " 752.439040\n", + " 4924260.422350\n", + " 2168.287950\n", + " 77684.096720\n", + " 2.580780\n", + " 2.642540\n", + " 0.077640\n", + " 0.468280\n", + " 3.529460\n", + " 1.892780\n", " ...\n", - " 4.480010\n", - " 2168.63101\n", - " 0.402690\n", - " 0.397080\n", - " 2.064820\n", - " 0.356840\n", - " 0.058290\n", - " 77.754054\n", - " 0.160820\n", - " 0.224300\n", + " 4.484880\n", + " 2168.287950\n", + " 0.401800\n", + " 0.394180\n", + " 2.063810\n", + " 0.357290\n", + " 0.060680\n", + " 77.684097\n", + " 0.159680\n", + " 0.226650\n", " \n", " \n", " std\n", - " 430.350017\n", - " 2855184.47779\n", - " 516.52055\n", - " 81679.548159\n", - " 1.611497\n", - " 2.064396\n", - " 0.363072\n", - " 0.913212\n", - " 2.518309\n", - " 1.009583\n", + " 430.258155\n", + " 2863687.886756\n", + " 516.271349\n", + " 81341.474187\n", + " 1.606362\n", + " 2.066835\n", + " 0.367387\n", + " 0.913982\n", + " 2.517375\n", + " 1.010353\n", " ...\n", - " 2.868725\n", - " 516.52055\n", - " 0.490442\n", - " 0.728679\n", - " 1.126238\n", - " 0.761952\n", - " 0.250665\n", - " 81.679548\n", - " 0.465488\n", - " 0.580597\n", + " 2.871932\n", + " 516.271349\n", + " 0.490264\n", + " 0.728922\n", + " 1.122662\n", + " 0.760368\n", + " 0.260074\n", + " 81.341474\n", + " 0.462498\n", + " 0.589748\n", " \n", " \n", " min\n", " 1.000000\n", - " 348.00000\n", - " 1000.00000\n", + " 496.000000\n", + " 1000.000000\n", " -20000.000000\n", " 1.000000\n", " 0.000000\n", @@ -533,7 +533,7 @@ " 0.000000\n", " ...\n", " 0.000000\n", - " 1000.00000\n", + " 1000.000000\n", " 0.000000\n", " 0.000000\n", " 0.000000\n", @@ -546,8 +546,8 @@ " \n", " 25%\n", " 374.000000\n", - " 2474954.00000\n", - " 2104.00000\n", + " 2456274.500000\n", + " 2104.000000\n", " 26500.000000\n", " 1.000000\n", " 1.000000\n", @@ -557,7 +557,7 @@ " 1.000000\n", " ...\n", " 2.000000\n", - " 2104.00000\n", + " 2104.000000\n", " 0.000000\n", " 0.000000\n", " 1.000000\n", @@ -569,9 +569,9 @@ " \n", " \n", " 50%\n", - " 763.000000\n", - " 4902336.00000\n", - " 2303.00000\n", + " 764.000000\n", + " 4895910.500000\n", + " 2303.000000\n", " 58000.000000\n", " 2.000000\n", " 1.000000\n", @@ -581,7 +581,7 @@ " 2.000000\n", " ...\n", " 4.000000\n", - " 2303.00000\n", + " 2303.000000\n", " 0.000000\n", " 0.000000\n", " 2.000000\n", @@ -594,8 +594,8 @@ " \n", " 75%\n", " 1144.000000\n", - " 7358428.00000\n", - " 2410.00000\n", + " 7357629.750000\n", + " 2410.000000\n", " 100000.000000\n", " 4.000000\n", " 4.000000\n", @@ -605,7 +605,7 @@ " 3.000000\n", " ...\n", " 7.000000\n", - " 2410.00000\n", + " 2410.000000\n", " 1.000000\n", " 1.000000\n", " 2.000000\n", @@ -618,10 +618,10 @@ " \n", " max\n", " 1454.000000\n", - " 9999811.00000\n", - " 2714.00000\n", - " 1968504.000000\n", - " 24.000000\n", + " 9999811.000000\n", + " 2714.000000\n", + " 1237000.000000\n", + " 25.000000\n", " 7.000000\n", " 2.000000\n", " 10.000000\n", @@ -629,15 +629,15 @@ " 4.000000\n", " ...\n", " 9.000000\n", - " 2714.00000\n", + " 2714.000000\n", " 1.000000\n", " 9.000000\n", - " 20.000000\n", - " 9.000000\n", + " 25.000000\n", + " 8.000000\n", " 5.000000\n", - " 1968.504000\n", + " 1237.000000\n", " 8.000000\n", - " 12.000000\n", + " 24.000000\n", " \n", " \n", "\n", @@ -648,65 +648,65 @@ "output_type": "pyout", "prompt_number": 5, "text": [ - " TAZ SERIALNO PUMA5 income \\\n", - "count 100000.000000 100000.00000 100000.00000 100000.000000 \n", - "mean 751.621990 4924086.59391 2168.63101 77754.054140 \n", - "std 430.350017 2855184.47779 516.52055 81679.548159 \n", - "min 1.000000 348.00000 1000.00000 -20000.000000 \n", - "25% 374.000000 2474954.00000 2104.00000 26500.000000 \n", - "50% 763.000000 4902336.00000 2303.00000 58000.000000 \n", - "75% 1144.000000 7358428.00000 2410.00000 100000.000000 \n", - "max 1454.000000 9999811.00000 2714.00000 1968504.000000 \n", + " TAZ SERIALNO PUMA5 income \\\n", + "count 100000.000000 100000.000000 100000.000000 100000.000000 \n", + "mean 752.439040 4924260.422350 2168.287950 77684.096720 \n", + "std 430.258155 2863687.886756 516.271349 81341.474187 \n", + "min 1.000000 496.000000 1000.000000 -20000.000000 \n", + "25% 374.000000 2456274.500000 2104.000000 26500.000000 \n", + "50% 764.000000 4895910.500000 2303.000000 58000.000000 \n", + "75% 1144.000000 7357629.750000 2410.000000 100000.000000 \n", + "max 1454.000000 9999811.000000 2714.000000 1237000.000000 \n", "\n", " PERSONS HHT UNITTYPE NOC \\\n", "count 100000.000000 100000.000000 100000.000000 100000.000000 \n", - "mean 2.582480 2.644130 0.075830 0.466650 \n", - "std 1.611497 2.064396 0.363072 0.913212 \n", + "mean 2.580780 2.642540 0.077640 0.468280 \n", + "std 1.606362 2.066835 0.367387 0.913982 \n", "min 1.000000 0.000000 0.000000 0.000000 \n", "25% 1.000000 1.000000 0.000000 0.000000 \n", "50% 2.000000 1.000000 0.000000 0.000000 \n", "75% 4.000000 4.000000 0.000000 1.000000 \n", - "max 24.000000 7.000000 2.000000 10.000000 \n", + "max 25.000000 7.000000 2.000000 10.000000 \n", "\n", " BLDGSZ TENURE ... bucketBin \\\n", "count 100000.000000 100000.000000 ... 100000.000000 \n", - "mean 3.535230 1.895440 ... 4.480010 \n", - "std 2.518309 1.009583 ... 2.868725 \n", + "mean 3.529460 1.892780 ... 4.484880 \n", + "std 2.517375 1.010353 ... 2.871932 \n", "min 0.000000 0.000000 ... 0.000000 \n", "25% 2.000000 1.000000 ... 2.000000 \n", "50% 2.000000 2.000000 ... 4.000000 \n", "75% 5.000000 3.000000 ... 7.000000 \n", "max 10.000000 4.000000 ... 9.000000 \n", "\n", - " originalPUMA hmultiunit num_young_adults drivers \\\n", - "count 100000.00000 100000.000000 100000.000000 100000.000000 \n", - "mean 2168.63101 0.402690 0.397080 2.064820 \n", - "std 516.52055 0.490442 0.728679 1.126238 \n", - "min 1000.00000 0.000000 0.000000 0.000000 \n", - "25% 2104.00000 0.000000 0.000000 1.000000 \n", - "50% 2303.00000 0.000000 0.000000 2.000000 \n", - "75% 2410.00000 1.000000 1.000000 2.000000 \n", - "max 2714.00000 1.000000 9.000000 20.000000 \n", + " originalPUMA hmultiunit num_young_adults drivers \\\n", + "count 100000.000000 100000.000000 100000.000000 100000.000000 \n", + "mean 2168.287950 0.401800 0.394180 2.063810 \n", + "std 516.271349 0.490264 0.728922 1.122662 \n", + "min 1000.000000 0.000000 0.000000 0.000000 \n", + "25% 2104.000000 0.000000 0.000000 1.000000 \n", + "50% 2303.000000 0.000000 0.000000 2.000000 \n", + "75% 2410.000000 1.000000 1.000000 2.000000 \n", + "max 2714.000000 1.000000 9.000000 25.000000 \n", "\n", " num_children num_adolescents income_in_thousands \\\n", "count 100000.000000 100000.000000 100000.000000 \n", - "mean 0.356840 0.058290 77.754054 \n", - "std 0.761952 0.250665 81.679548 \n", + "mean 0.357290 0.060680 77.684097 \n", + "std 0.760368 0.260074 81.341474 \n", "min 0.000000 0.000000 -20.000000 \n", "25% 0.000000 0.000000 26.500000 \n", "50% 0.000000 0.000000 58.000000 \n", "75% 0.000000 0.000000 100.000000 \n", - "max 9.000000 5.000000 1968.504000 \n", + "max 8.000000 5.000000 1237.000000 \n", "\n", " num_young_children num_college_age \n", "count 100000.000000 100000.000000 \n", - "mean 0.160820 0.224300 \n", - "std 0.465488 0.580597 \n", + "mean 0.159680 0.226650 \n", + "std 0.462498 0.589748 \n", "min 0.000000 0.000000 \n", "25% 0.000000 0.000000 \n", "50% 0.000000 0.000000 \n", "75% 0.000000 0.000000 \n", - "max 8.000000 12.000000 \n", + "max 8.000000 24.000000 \n", "\n", "[8 rows x 53 columns]" ] @@ -720,7 +720,8 @@ "input": [], "language": "python", "metadata": {}, - "outputs": [] + "outputs": [], + "prompt_number": 5 } ], "metadata": {}