diff --git a/activitysim/activitysim.py b/activitysim/activitysim.py index 9c6210ac23..6148e20ab3 100644 --- a/activitysim/activitysim.py +++ b/activitysim/activitysim.py @@ -110,6 +110,7 @@ def simple_simulate(choosers, alternatives, spec, print "Failed with DataFrame eval:\n%s" % expr raise e vars[expr] = s + vars[expr] = vars[expr].astype('float') # explicit cast model_design = pd.DataFrame(vars, index=df.index) df = random_rows(model_design, min(100000, len(model_design)))\ diff --git a/example/configs/workplace_location.csv b/example/configs/workplace_location.csv index 81e4f4de00..af7b3ce918 100644 --- a/example/configs/workplace_location.csv +++ b/example/configs/workplace_location.csv @@ -1 +1 @@ -Description,Expression,Alt "Distance, piecewise linear from 0 to 1 miles",@df.distance.clip(1),-0.8428 "Distance, piecewise linear from 1 to 2 miles","@(df.distance-1).clip(0,1)",-0.3104 "Distance, piecewise linear from 2 to 5 miles","@(df.distance-2).clip(0,3)",-0.3783 "Distance, piecewise linear from 5 to 15 miles","@(df.distance-5).clip(0,10)",-0.1285 "Distance, piecewise linear for 15+ miles",@(df.distance-15.0).clip(0),-0.0917 "Distance 0 to 5 mi, high and very high income",@(df.income_segment>=3)*df.distance.clip(upper=5),0.15 "Distance 5+ mi, high and very high income",@(df.income_segment>=3)*(df.distance-5).clip(0),0.02 "Size variable full-time worker, low income",(income_segment==1)*lnWorkLowDcSizeAlt,1 "Size variable full-time worker, medium income",(income_segment==2)*lnWorkMedDcSizeAlt,1 "Size variable full-time worker, high income",(income_segment==3)*lnWorkHighDcSizeAlt,1 "Size variable full-time worker, very high income",(income_segment==4)*lnWorkVeryHighDcSizeAlt,1 "No attractions full-time worker, low income",(income_segment==1)*lnWorkLowDcSizeAlt==0,-999 "No attractions full-time worker, medium income",(income_segment==2)*lnWorkMedDcSizeAlt==0,-999 "No attractions full-time worker, high income",(income_segment==3)*lnWorkHighDcSizeAlt==0,-999 "No attractions full-time worker, very high income",(income_segment==4)*lnWorkVeryHighDcSizeAlt==0,-999 Mode choice logsum,mcLogsum,0.3 \ No newline at end of file +Description,Expression,Alt "Distance, piecewise linear from 0 to 1 miles",@df.distance.clip(1),-0.8428 "Distance, piecewise linear from 1 to 2 miles","@(df.distance-1).clip(0,1)",-0.3104 "Distance, piecewise linear from 2 to 5 miles","@(df.distance-2).clip(0,3)",-0.3783 "Distance, piecewise linear from 5 to 15 miles","@(df.distance-5).clip(0,10)",-0.1285 "Distance, piecewise linear for 15+ miles",@(df.distance-15.0).clip(0),-0.0917 "Distance 0 to 5 mi, high and very high income",@(df.income_segment>=3)*df.distance.clip(upper=5),0.15 "Distance 5+ mi, high and very high income",@(df.income_segment>=3)*(df.distance-5).clip(0),0.02 "Size variable full-time worker, low income",@(df.income_segment==1)*df.size_low,1 "Size variable full-time worker, medium income",@(df.income_segment==2)*df.size_med,1 "Size variable full-time worker, high income",@(df.income_segment==3)*df.size_high,1 "Size variable full-time worker, very high income",@(df.income_segment==4)*df.size_veryhigh,1 "No attractions full-time worker, low income",@(df.income_segment==1)&(df.size_low==0),-999 "No attractions full-time worker, medium income",@(df.income_segment==2)&(df.size_med==0),-999 "No attractions full-time worker, high income",@(df.income_segment==3)&(df.size_high==0),-999 "No attractions full-time worker, very high income",@(df.income_segment==4)&(df.size_veryhigh==0),-999 Mode choice logsum,mcLogsum,0.3 \ No newline at end of file diff --git a/example/configs/workplace_location_size_terms.csv b/example/configs/workplace_location_size_terms.csv new file mode 100644 index 0000000000..f9b7da433a --- /dev/null +++ b/example/configs/workplace_location_size_terms.csv @@ -0,0 +1 @@ +purpose,segment,TOTHH,RETEMPN,FPSEMPN,HEREMPN,OTHEMPN,AGREMPN,MWTEMPN,AGE0519,HSENROLL,COLLFTE,COLLPTE work,low,0,0.129,0.193,0.383,0.12,0.01,0.164,0,0,0,0 work,med,0,0.12,0.197,0.325,0.139,0.008,0.21,0,0,0,0 work,high,0,0.11,0.207,0.284,0.154,0.006,0.239,0,0,0,0 work,veryhigh,0,0.093,0.27,0.241,0.146,0.004,0.246,0,0,0,0 university,university,0,0,0,0,0,0,0,0,0,0.592,0.408 school,grade,0,0,0,0,0,0,0,1,0,0,0 school,high,0,0,0,0,0,0,0,0,1,0,0 escort,kids,0,0.225,0,0.144,0,0,0,0.465,0.166,0,0 escort,no kids,0,0.225,0,0.144,0,0,0,0.465,0.166,0,0 shopping,shopping,0,1,0,0,0,0,0,0,0,0,0 eatOut,eatOut,0,0.742,0,0.258,0,0,0,0,0,0,0 othMaint,othMaint,0,0.482,0,0.518,0,0,0,0,0,0,0 social,social,0,0.522,0,0.478,0,0,0,0,0,0,0 othDiscr,othDiscr,0.252,0.212,0,0.272,0.165,0,0,0,0.098,0,0 atwork,atwork,0,0.742,0,0.258,0,0,0,0,0,0,0 \ No newline at end of file diff --git a/example/models.py b/example/models.py index 792f485c61..941d9973c9 100644 --- a/example/models.py +++ b/example/models.py @@ -37,7 +37,37 @@ def auto_ownership_spec(): @sim.injectable() def workplace_location_spec(): f = os.path.join('configs', "workplace_location.csv") - return asim.read_model_spec(f).head(7) + return asim.read_model_spec(f).head(15) + + +@sim.table() +def workplace_size_spec(): + f = os.path.join('configs', 'workplace_location_size_terms.csv') + return pd.read_csv(f) + + +@sim.table() +def workplace_size_terms(land_use, workplace_size_spec): + """ + This method takes the land use data and multiplies various columns of the + land use data by coefficients from the workplace_size_spec table in order + to yield a size term (a linear combination of land use variables) with + specified coefficients for different segments (like low, med, and high + income) + """ + land_use = land_use.to_frame() + df = workplace_size_spec.to_frame().query("purpose == 'work'") + df = df.drop("purpose", axis=1).set_index("segment") + new_df = {} + for index, row in df.iterrows(): + missing = row[~row.index.isin(land_use.columns)] + if len(missing) > 0: + print "WARNING: missing columns in land use\n", missing.index + row = row[row.index.isin(land_use.columns)] + sparse = land_use[list(row.index)] + new_df["size_"+index] = np.dot(sparse.as_matrix(), row.values) + new_df = pd.DataFrame(new_df, index=land_use.index) + return new_df @sim.model() @@ -67,10 +97,11 @@ def workplace_location_simulate(persons, households, zones, workplace_location_spec, - distance_matrix): + distance_matrix, + workplace_size_terms): choosers = sim.merge_tables(persons.name, tables=[persons, households]) - alternatives = zones.to_frame() + alternatives = zones.to_frame().join(workplace_size_terms.to_frame()) skims = { "distance": distance_matrix @@ -88,4 +119,24 @@ def workplace_location_simulate(persons, print "Describe of hoices:\n", choices.describe() sim.add_column("persons", "workplace_taz", choices) - return model_design \ No newline at end of file + return model_design + + +@sim.column("land_use") +def total_households(land_use): + return land_use.local.TOTHH + + +@sim.column("land_use") +def total_employment(land_use): + return land_use.local.TOTEMP + + +@sim.column("land_use") +def total_acres(land_use): + return land_use.local.TOTACRE + + +@sim.column("land_use") +def county_id(land_use): + return land_use.local.COUNTY \ No newline at end of file diff --git a/notebooks/data_mover.ipynb b/notebooks/data_mover.ipynb index de39900c43..1c065abf11 100644 --- a/notebooks/data_mover.ipynb +++ b/notebooks/data_mover.ipynb @@ -1,7 +1,7 @@ { "metadata": { "name": "", - "signature": "sha256:d62442075d195d4893cdd65305dac0932aeb18464bb30a1d8aac6a89ad987ef1" + "signature": "sha256:afbc3e7040dd9e4a5b21433063f13a6a8abfcc04bcfc6574e7e43376c257cd33" }, "nbformat": 3, "nbformat_minor": 0, @@ -43,10 +43,6 @@ "col_map = {\n", " \"HHID\": \"household_id\",\n", " \"AGE\": \"age\",\n", - " \"TOTHH\": \"total_households\",\n", - " \"TOTEMP\": \"total_employment\",\n", - " \"TOTACRE\": \"total_acres\",\n", - " \"COUNTY\": \"county_id\",\n", " \"hworkers\": \"workers\",\n", " \"HINC\": \"income\"\n", "}" diff --git a/notebooks/simulation.ipynb b/notebooks/simulation.ipynb index cedc75a91c..1643e621dd 100644 --- a/notebooks/simulation.ipynb +++ b/notebooks/simulation.ipynb @@ -1,7 +1,7 @@ { "metadata": { "name": "", - "signature": "sha256:648926b05228f8f33eb77a9ff1df6debcba506d63e4e15cd07e69d3071084fca" + "signature": "sha256:68dd59fb87c331bcb79d97a97557ee2ee411309bd3a85e58d6bee4b169b221df" }, "nbformat": 3, "nbformat_minor": 0, @@ -46,16 +46,16 @@ "output_type": "stream", "stream": "stdout", "text": [ - "count 258248.000000\n", - "mean 733.971489\n", - "std 416.825609\n", + "count 258078.000000\n", + "mean 718.510997\n", + "std 422.975764\n", "min 1.000000\n", - "25% 393.000000\n", - "50% 743.000000\n", - "75% 1105.000000\n", + "25% 352.000000\n", + "50% 719.000000\n", + "75% 1083.000000\n", "max 1454.000000\n", "Name: TAZ, dtype: float64\n", - "Time to execute model 'workplace_location_simulate': 24.48s" + "Time to execute model 'workplace_location_simulate': 35.33s" ] }, { @@ -63,7 +63,7 @@ "stream": "stdout", "text": [ "\n", - "Total time to execute: 24.48s\n" + "Total time to execute: 35.34s\n" ] } ], @@ -90,13 +90,13 @@ "output_type": "stream", "stream": "stdout", "text": [ - "cars2 48234\n", - "cars3 38044\n", - "cars1 7516\n", - "cars4 5021\n", - "cars0 1185\n", + "cars2 47959\n", + "cars3 38186\n", + "cars1 7528\n", + "cars4 5123\n", + "cars0 1204\n", "dtype: int64\n", - "Time to execute model 'auto_ownership_simulate': 3.87s" + "Time to execute model 'auto_ownership_simulate': 4.62s" ] }, { @@ -104,7 +104,7 @@ "stream": "stdout", "text": [ "\n", - "Total time to execute: 3.87s\n" + "Total time to execute: 4.62s\n" ] } ], @@ -128,8 +128,8 @@ "
8 rows \u00d7 44 columns
\n", + "8 rows \u00d7 48 columns
\n", "" ], "metadata": {}, "output_type": "pyout", "prompt_number": 4, "text": [ - " DISTRICT SD county_id total_households HHPOP \\\n", - "count 1454.000000 1454.000000 1454.000000 1454.000000 1454.000000 \n", - "mean 14.908528 14.908528 3.835626 1793.688446 4816.408528 \n", - "std 8.701078 8.701078 2.040153 961.021405 2686.029808 \n", - "min 1.000000 1.000000 1.000000 0.000000 0.000000 \n", - "25% 8.000000 8.000000 3.000000 1200.250000 3288.250000 \n", - "50% 15.000000 15.000000 4.000000 1681.500000 4504.500000 \n", - "75% 20.750000 20.750000 5.000000 2259.750000 6033.750000 \n", - "max 34.000000 34.000000 9.000000 12542.000000 39671.000000 \n", + " DISTRICT SD COUNTY TOTHH HHPOP \\\n", + "count 1454.000000 1454.000000 1454.000000 1454.000000 1454.000000 \n", + "mean 14.908528 14.908528 3.835626 1793.688446 4816.408528 \n", + "std 8.701078 8.701078 2.040153 961.021405 2686.029808 \n", + "min 1.000000 1.000000 1.000000 0.000000 0.000000 \n", + "25% 8.000000 8.000000 3.000000 1200.250000 3288.250000 \n", + "50% 15.000000 15.000000 4.000000 1681.500000 4504.500000 \n", + "75% 20.750000 20.750000 5.000000 2259.750000 6033.750000 \n", + "max 34.000000 34.000000 9.000000 12542.000000 39671.000000 \n", "\n", " TOTPOP EMPRES SFDU MFDU HHINCQ1 \\\n", "count 1454.000000 1454.000000 1454.000000 1454.000000 1454.000000 \n", @@ -371,37 +371,37 @@ "75% 6098.500000 2735.500000 1496.000000 907.750000 674.750000 \n", "max 40020.000000 16799.000000 12413.000000 4920.000000 3754.000000 \n", "\n", - " ... COLLPTE TOPOLOGY TERMINAL ZERO \\\n", - "count ... 1454.000000 1454.000000 1454.000000 1454 \n", - "mean ... 166.744054 2.063274 1.630505 0 \n", - "std ... 1234.717238 0.926842 0.879441 0 \n", - "min ... 0.000000 1.000000 0.904320 0 \n", - "25% ... 0.000000 1.000000 1.167372 0 \n", - "50% ... 0.000000 2.000000 1.323075 0 \n", - "75% ... 0.000000 3.000000 1.632443 0 \n", - "max ... 19570.523440 3.000000 7.310200 0 \n", + " ... hhlds sftaz gqpop \\\n", + "count ... 1454.000000 1454.000000 1454.000000 \n", + "mean ... 1793.688446 727.500000 101.570151 \n", + "std ... 961.021405 419.877958 393.886676 \n", + "min ... 0.000000 1.000000 -1.000000 \n", + "25% ... 1200.250000 364.250000 5.000000 \n", + "50% ... 1681.500000 727.500000 18.000000 \n", + "75% ... 2259.750000 1090.750000 71.000000 \n", + "max ... 12542.000000 1454.000000 7810.000000 \n", "\n", - " hhlds sftaz gqpop employment_density \\\n", - "count 1454.000000 1454.000000 1454.000000 1454.000000 \n", - "mean 1793.688446 727.500000 101.570151 9.596395 \n", - "std 961.021405 419.877958 393.886676 45.067313 \n", - "min 0.000000 1.000000 -1.000000 0.000000 \n", - "25% 1200.250000 364.250000 5.000000 0.877829 \n", - "50% 1681.500000 727.500000 18.000000 2.158701 \n", - "75% 2259.750000 1090.750000 71.000000 5.492696 \n", - "max 12542.000000 1454.000000 7810.000000 877.564767 \n", + " employment_density total_acres county_id density_index \\\n", + "count 1454.000000 1454.000000 1454.000000 1453.000000 \n", + "mean 9.596395 3146.071457 3.835626 2.279554 \n", + "std 45.067313 16945.908840 2.040153 3.945717 \n", + "min 0.000000 13.000000 1.000000 0.000000 \n", + "25% 0.877829 230.000000 3.000000 0.550232 \n", + "50% 2.158701 397.000000 4.000000 1.289224 \n", + "75% 5.492696 883.500000 5.000000 2.337577 \n", + "max 877.564767 372520.000000 9.000000 46.360371 \n", "\n", - " household_density density_index \n", - "count 1454.000000 1453.000000 \n", - "mean 6.008186 2.279554 \n", - "std 8.565908 3.945717 \n", - "min 0.000000 0.000000 \n", - "25% 1.910701 0.550232 \n", - "50% 3.939122 1.289224 \n", - "75% 6.693238 2.337577 \n", - "max 90.891304 46.360371 \n", + " household_density total_households total_employment \n", + "count 1454.000000 1454.000000 1454.000000 \n", + "mean 6.008186 1793.688446 2247.736589 \n", + "std 8.565908 961.021405 3538.356220 \n", + "min 0.000000 0.000000 0.000000 \n", + "25% 1.910701 1200.250000 482.000000 \n", + "50% 3.939122 1681.500000 1005.500000 \n", + "75% 6.693238 2259.750000 2215.750000 \n", + "max 90.891304 12542.000000 37950.000000 \n", "\n", - "[8 rows x 44 columns]" + "[8 rows x 48 columns]" ] } ], @@ -450,8 +450,8 @@ "