diff --git a/.gitignore b/.gitignore index 414b218cbd..5547e6bed0 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,7 @@ +example/data/* + +.ipynb_checkpoints + # Byte-compiled / optimized / DLL files __pycache__/ *.py[cod] diff --git a/activitysim/activitysim.py b/activitysim/activitysim.py new file mode 100644 index 0000000000..c2c8268c9c --- /dev/null +++ b/activitysim/activitysim.py @@ -0,0 +1,71 @@ +import urbansim.sim.simulation as sim +from urbansim.urbanchoice import interaction, mnl +import pandas as pd +import numpy as np +import os + + +def random_rows(df, n): + return df.take(np.random.choice(len(df), size=n, replace=False)) + + +def read_model_spec(fname, + description_name="Description", + expression_name="Expression"): + """ + Read in the excel file and reformat for machines + """ + cfg = pd.read_csv(fname) + # don't need description and set the expression to the index + cfg = cfg.drop(description_name, axis=1).set_index(expression_name).stack() + return cfg + + +def identity_matrix(alt_names): + return pd.DataFrame(np.identity(len(alt_names)), + columns=alt_names, + index=alt_names) + + +def simple_simulate(choosers, alternatives, spec): + exprs = spec.index + coeffs = spec.values + + # merge choosers and alternatives + _, df, _ = interaction.mnl_interaction_dataset( + choosers, alternatives, len(alternatives)) + + # evaluate the expressions to build the final matrix + vars, names = [], [] + for expr in exprs: + if expr[0][0] == "@": + expr = "({}) * df.{}".format(expr[0][1:], expr[1]) + try: + s = eval(expr) + except Exception as e: + print "Failed with Python eval:\n%s" % expr + raise e + else: + expr = "({}) * {}".format(*expr) + try: + s = df.eval(expr) + except Exception as e: + print "Failed with DataFrame eval:\n%s" % expr + raise e + names.append(expr) + vars.append(s) + model_design = pd.concat(vars, axis=1) + model_design.columns = names + + df = random_rows(model_design, 100000).describe().transpose() + df = df[df["std"] == 0] + if len(df): + print "WARNING: Describe of columns with no variability:\n", df + + choices = mnl.mnl_simulate( + model_design.as_matrix(), + coeffs, + numalts=len(alternatives), + returnprobs=False) + + return pd.Series(choices, index=choosers.index), model_design diff --git a/activitysim/defaults/__init__.py b/activitysim/defaults/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/activitysim/defaults/datasources.py b/activitysim/defaults/datasources.py new file mode 100644 index 0000000000..59c12f1f14 --- /dev/null +++ b/activitysim/defaults/datasources.py @@ -0,0 +1,74 @@ +import numpy as np +import pandas as pd +import os +import uuid +import yaml +from urbansim.utils import misc +import urbansim.sim.simulation as sim +from .. import activitysim as asim + +import warnings + +warnings.filterwarnings('ignore', category=pd.io.pytables.PerformanceWarning) +pd.options.mode.chained_assignment = None + + +@sim.injectable('settings', cache=True) +def settings(): + with open(os.path.join(misc.configs_dir(), "settings.yaml")) as f: + settings = yaml.load(f) + # monkey patch on the settings object since it's pretty global + # but will also be available as injectable + sim.settings = settings + return settings + + +@sim.injectable('run_number') +def run_number(): + return misc.get_run_number() + + +@sim.injectable('uuid', cache=True) +def uuid_hex(): + return uuid.uuid4().hex + + +@sim.injectable('store', cache=True) +def hdfstore(settings): + return pd.HDFStore( + os.path.join(misc.data_dir(), settings["store"]), + mode='r') + + +@sim.injectable("scenario") +def scenario(settings): + return settings["scenario"] + + +@sim.table(cache=True) +def land_use(store): + return store["land_use/taz_data"] + + +@sim.table(cache=True) +def accessibility(store): + df = store["skims/accessibility"] + df.columns = [c.upper() for c in df.columns] + return df + + +@sim.table(cache=True) +def households(store, settings): + if "households_sample_size" in settings: + return asim.random_rows(store["households"], + settings["households_sample_size"]) + return store["households"] + + +@sim.table(cache=True) +def persons(store): + return store["persons"] + + +sim.broadcast('land_use', 'households', cast_index=True, onto_on='TAZ') +sim.broadcast('accessibility', 'households', cast_index=True, onto_on='TAZ') diff --git a/activitysim/defaults/variables.py b/activitysim/defaults/variables.py new file mode 100644 index 0000000000..db85f06885 --- /dev/null +++ b/activitysim/defaults/variables.py @@ -0,0 +1,73 @@ +import urbansim.sim.simulation as sim +from activitysim.defaults import datasources + + +@sim.column("households") +def income_in_thousands(households): + return households.income / 1000 + + +@sim.column("households") +def drivers(households, persons): + # we assume that everyone 16 and older is a potential driver + return persons.local.query("16 <= age").\ + groupby("household_id").size().\ + reindex(households.index).fillna(0) + + +@sim.column("households") +def num_young_children(households, persons): + return persons.local.query("age <= 4").\ + groupby("household_id").size().\ + reindex(households.index).fillna(0) + + +@sim.column("households") +def num_children(households, persons): + return persons.local.query("5 <= age <= 15").\ + groupby("household_id").size().\ + reindex(households.index).fillna(0) + + +@sim.column("households") +def num_adolescents(households, persons): + return persons.local.query("16 <= age <= 17").\ + groupby("household_id").size().\ + reindex(households.index).fillna(0) + + +@sim.column("households") +def num_college_age(households, persons): + return persons.local.query("18 <= age <= 24").\ + groupby("household_id").size().\ + reindex(households.index).fillna(0) + + +@sim.column("households") +def num_young_adults(households, persons): + return persons.local.query("25 <= age <= 34").\ + groupby("household_id").size().\ + reindex(households.index).fillna(0) + + +@sim.column("land_use") +def household_density(land_use): + return land_use.total_households / land_use.total_acres + + +@sim.column("land_use") +def employment_density(land_use): + return land_use.total_employment / land_use.total_acres + + +@sim.column("land_use") +def density_index(land_use): + return (land_use.household_density * land_use.employment_density) / \ + (land_use.household_density + land_use.employment_density) + + +@sim.column("land_use") +def county_name(land_use, settings): + assert "county_map" in settings + inv_map = {v: k for k, v in settings["county_map"].items()} + return land_use.county_id.map(inv_map) diff --git a/example/configs/auto_ownership_coeffs.csv b/example/configs/auto_ownership_coeffs.csv new file mode 100644 index 0000000000..0fe176c110 --- /dev/null +++ b/example/configs/auto_ownership_coeffs.csv @@ -0,0 +1 @@ +Description,Expression,cars0,cars1,cars2,cars3,cars4 2 Adults (age 16+),drivers==2,,0,3.0773,3.1962,2.6616 3 Adults (age 16+),drivers==3,,0,3.5401,5.5131,5.208 4+ Adults (age 16+),drivers>3,,2.0107,6.3662,8.5148,9.5807 Persons age 16-17,num_adolescents,,0,-0.881,-1.7313,-1.7313 Persons age 18-24,num_college_age,,-0.4087,-1.0095,-1.0107,-1.0107 Persons age 35-34,num_young_adults,,0,-0.4849,-0.8596,-0.8596 Presence of children age 0-4,num_young_children>0,,0.3669,0.7627,0.7627,0.7627 Presence of children age 5-17,(num_children+num_adolescents)>0,,0.0158,0.2936,0.4769,0.4769 "Number of workers, capped at 3",@df.workers.clip(upper=3),,0,0.2936,0.6389,0.8797 "Piecewise Linear household income, $0-30k","@df.income_in_thousands.clip(0, 30)",,0.0383,0.054,0.0559,0.0619 "Piecewise Linear household income, $30-75k","@(df.income_in_thousands-30).clip(0, 45)",,0,0.0083,0.011,0.0147 "Piecewise Linear household income, $75k+, capped at $125k","@(df.income_in_thousands-75).clip(0, 50)",,0,0.0083,0.011,0.0147 "Density index up to 10, if 0 workers","@(df.workers==0)*df.density_index.clip(0, 10)",,0,-0.2028,-0.3654,-0.3654 "Density index in excess of 10, if 0 workers",@(df.workers==0)*(df.density_index-10).clip(0),,-0.0152,-0.1106,-0.1766,-0.1766 "Density index up to 10, if 1+ workers","@(df.workers>0)*df.density_index.clip(0, 10)",,0,-0.2028,-0.3654,-0.3654 "Density index in excess of 10, if 1+ workers",@(df.workers>0)*(df.density_index-10).clip(0),,-0.0152,-0.1106,-0.1766,-0.1766 Constants,@1,,1.1865,-1.0846,-3.2502,-5.313 San Francisco county,county_name == 'San Francisco',,0.4259,0.4683,0.1458,0.1458 Solano county,county_name == 'Solano',,-0.566,-0.4429,-0.2372,-0.2372 Napa county,county_name == 'Napa',,-0.566,-0.4429,-0.2372,-0.2372 Sonoma county,county_name == 'Sonoma',,-0.566,-0.4429,-0.2372,-0.2372 Marin county,county_name == 'Marin',,-0.2434,0,0,0 "Retail accessibility (0.66*PK + 0.34*OP) by auto, if 0 workers",(workers==0)*(0.66*AUTOPEAKRETAIL+0.34*AUTOOFFPEAKRETAIL),,0.0626,0.0626,0.0626,0.0626 "Retail accessibility (0.66*PK + 0.34*OP) by auto, if 1+ workers",(workers>0)*(0.66*AUTOPEAKRETAIL+0.34*AUTOOFFPEAKRETAIL),,0.1646,0.1646,0.1646,0.1646 "Retail accessibility (0.66*PK + 0.34*OP) by transit, if 0 workers",(workers==0)*(0.66*TRANSITPEAKRETAIL+0.34*TRANSITOFFPEAKRETAIL),,-0.3053,-0.3053,-0.3053,-0.3053 "Retail accessibility (0.66*PK + 0.34*OP) by transit, if 1+ workers",(workers>0)*(0.66*TRANSITPEAKRETAIL+0.34*TRANSITOFFPEAKRETAIL),,-0.5117,-0.5117,-0.5117,-0.5117 "Retail accessibility by non-motorized, if 0 workers",(workers==0)*NONMOTORIZEDRETAIL,,-0.03,-0.03,-0.03,-0.03 "Retail accessibility by non-motorized, if 1+ workers",(workers>0)*NONMOTORIZEDRETAIL,,-0.03,-0.03,-0.03,-0.03 "Auto time savings per worker (over walk or transit, max 120) to work",workTourAutoTimeSavings/workers,,0.4707,0.6142,0.5705,0.7693 \ No newline at end of file diff --git a/example/configs/settings.yaml b/example/configs/settings.yaml new file mode 100644 index 0000000000..05cf2cdb08 --- /dev/null +++ b/example/configs/settings.yaml @@ -0,0 +1,14 @@ +store: mtc_asim.h5 + +households_sample_size: 100000 + +county_map: + San Francisco: 1 + San Mateo: 2 + Santa Clara: 3 + Alameda: 4 + Contra Costa: 5 + Solano: 6 + Napa: 7 + Sonoma: 8 + Marin: 9 diff --git a/example/data/README.md b/example/data/README.md new file mode 100644 index 0000000000..b48eb98b28 --- /dev/null +++ b/example/data/README.md @@ -0,0 +1 @@ +Keep data here diff --git a/example/models.py b/example/models.py new file mode 100644 index 0000000000..ab4bfcffd4 --- /dev/null +++ b/example/models.py @@ -0,0 +1,35 @@ +import urbansim.sim.simulation as sim +import os +from activitysim import activitysim as asim + + +@sim.table() +def auto_alts(): + return asim.identity_matrix(["cars%d" % i for i in range(5)]) + + +@sim.injectable() +def auto_ownership_spec(): + f = os.path.join('configs', "auto_ownership_coeffs.csv") + return asim.read_model_spec(f).head(4*26) + + +@sim.model() +def auto_ownership_simulate(households, + auto_alts, + auto_ownership_spec, + land_use, + accessibility): + + choosers = sim.merge_tables(households.name, tables=[households, + land_use, + accessibility]) + alternatives = auto_alts.to_frame() + + choices, model_design = \ + asim.simple_simulate(choosers, alternatives, auto_ownership_spec) + + print "Choices:\n", choices.value_counts() + sim.add_column("households", "auto_ownership", choices) + + return model_design diff --git a/notebooks/data_mover.ipynb b/notebooks/data_mover.ipynb new file mode 100644 index 0000000000..b9c9f37eff --- /dev/null +++ b/notebooks/data_mover.ipynb @@ -0,0 +1,134 @@ +{ + "metadata": { + "name": "", + "signature": "sha256:b9b5094000883a97d7687aa911926f2d0f45c0546957b83f82fc449cce6edc6c" + }, + "nbformat": 3, + "nbformat_minor": 0, + "worksheets": [ + { + "cells": [ + { + "cell_type": "code", + "collapsed": false, + "input": [ + "import pandas as pd\n", + "import os\n", + "# this is where I unzipped the MTC data\n", + "SRCDIR = \"/Users/ffoti/data/activitysim\"\n", + "# and where it's going to\n", + "TGTFILE = \"../example/data/mtc_asim.h5\"" + ], + "language": "python", + "metadata": {}, + "outputs": [], + "prompt_number": 17 + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "store = pd.HDFStore(TGTFILE, \"w\")" + ], + "language": "python", + "metadata": {}, + "outputs": [], + "prompt_number": 18 + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "col_map = {\n", + " \"HHID\": \"household_id\",\n", + " \"AGE\": \"age\",\n", + " \"TOTHH\": \"total_households\",\n", + " \"TOTEMP\": \"total_employment\",\n", + " \"TOTACRE\": \"total_acres\",\n", + " \"COUNTY\": \"county_id\",\n", + " \"hworkers\": \"workers\",\n", + " \"HINC\": \"income\"\n", + "}" + ], + "language": "python", + "metadata": {}, + "outputs": [], + "prompt_number": 19 + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "df = pd.read_csv(os.path.join(SRCDIR, \"landuse\", \"tazData.csv\"), index_col=\"ZONE\")\n", + "df.columns = [col_map.get(s, s) for s in df.columns]\n", + "store[\"land_use/taz_data\"] = df" + ], + "language": "python", + "metadata": {}, + "outputs": [], + "prompt_number": 20 + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "df = pd.read_csv(os.path.join(SRCDIR, \"skims\", \"accessibility.csv\"), index_col=\"taz\")\n", + "df.columns = [col_map.get(s, s) for s in df.columns]\n", + "store[\"skims/accessibility\"] = df" + ], + "language": "python", + "metadata": {}, + "outputs": [], + "prompt_number": 21 + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "df = pd.read_csv(os.path.join(SRCDIR, \"popsyn\", \"hhFile.p2011s3a1.2010.csv\"), index_col=\"HHID\")\n", + "df.columns = [col_map.get(s, s) for s in df.columns]\n", + "store[\"households\"] = df" + ], + "language": "python", + "metadata": {}, + "outputs": [], + "prompt_number": 22 + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "df = pd.read_csv(os.path.join(SRCDIR, \"popsyn\", \"personFile.p2011s3a1.2010.csv\"), index_col=\"PERID\")\n", + "df.columns = [col_map.get(s, s) for s in df.columns]\n", + "store[\"persons\"] = df" + ], + "language": "python", + "metadata": {}, + "outputs": [], + "prompt_number": 23 + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "store.close()" + ], + "language": "python", + "metadata": {}, + "outputs": [], + "prompt_number": 24 + }, + { + "cell_type": "code", + "collapsed": false, + "input": [], + "language": "python", + "metadata": {}, + "outputs": [], + "prompt_number": 24 + } + ], + "metadata": {} + } + ] +} \ No newline at end of file diff --git a/notebooks/simulation.ipynb b/notebooks/simulation.ipynb new file mode 100644 index 0000000000..31776e6a61 --- /dev/null +++ b/notebooks/simulation.ipynb @@ -0,0 +1,685 @@ +{ + "metadata": { + "name": "", + "signature": "sha256:e753195d2dede5dd16ee412a8640e8afb7012fd102f13ac2ca2f0086ebdcdb09" + }, + "nbformat": 3, + "nbformat_minor": 0, + "worksheets": [ + { + "cells": [ + { + "cell_type": "code", + "collapsed": false, + "input": [ + "%load_ext autoreload\n", + "%autoreload 2\n", + "if 'sim' not in globals():\n", + " import os; os.chdir('../example')\n", + "import urbansim.sim.simulation as sim\n", + "from activitysim.defaults import variables\n", + "import models" + ], + "language": "python", + "metadata": {}, + "outputs": [], + "prompt_number": 1 + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "sim.run([\"auto_ownership_simulate\"])" + ], + "language": "python", + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "Running model 'auto_ownership_simulate'\n", + "Choices:\n" + ] + }, + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "2 39441\n", + "1 32330\n", + "3 14788\n", + "0 8397\n", + "4 5044\n", + "dtype: int64\n", + "Time to execute model 'auto_ownership_simulate': 10.16s" + ] + }, + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "\n", + "Total time to execute: 10.16s\n" + ] + } + ], + "prompt_number": 2 + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "sim.get_table(\"land_use\").to_frame().describe()" + ], + "language": "python", + "metadata": {}, + "outputs": [ + { + "html": [ + "
\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
DISTRICTSDcounty_idtotal_householdsHHPOPTOTPOPEMPRESSFDUMFDUHHINCQ1...COLLPTETOPOLOGYTERMINALZEROhhldssftazgqpopemployment_densityhousehold_densitydensity_index
count 1454.000000 1454.000000 1454.000000 1454.000000 1454.000000 1454.000000 1454.000000 1454.000000 1454.000000 1454.000000... 1454.000000 1454.000000 1454.000000 1454 1454.000000 1454.000000 1454.000000 1454.000000 1454.000000 1453.000000
mean 14.908528 14.908528 3.835626 1793.688446 4816.408528 4917.978680 2168.684319 1122.798487 670.889959 508.134801... 166.744054 2.063274 1.630505 0 1793.688446 727.500000 101.570151 9.596395 6.008186 2.279554
std 8.701078 8.701078 2.040153 961.021405 2686.029808 2690.352928 1211.109335 854.895353 717.261660 378.753528... 1234.717238 0.926842 0.879441 0 961.021405 419.877958 393.886676 45.067313 8.565908 3.945717
min 1.000000 1.000000 1.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000... 0.000000 1.000000 0.904320 0 0.000000 1.000000 -1.000000 0.000000 0.000000 0.000000
25% 8.000000 8.000000 3.000000 1200.250000 3288.250000 3384.500000 1460.500000 602.000000 144.500000 257.000000... 0.000000 1.000000 1.167372 0 1200.250000 364.250000 5.000000 0.877829 1.910701 0.550232
50% 15.000000 15.000000 4.000000 1681.500000 4504.500000 4577.000000 2016.000000 1034.000000 460.000000 434.000000... 0.000000 2.000000 1.323075 0 1681.500000 727.500000 18.000000 2.158701 3.939122 1.289224
75% 20.750000 20.750000 5.000000 2259.750000 6033.750000 6098.500000 2735.500000 1496.000000 907.750000 674.750000... 0.000000 3.000000 1.632443 0 2259.750000 1090.750000 71.000000 5.492696 6.693238 2.337577
max 34.000000 34.000000 9.000000 12542.000000 39671.000000 40020.000000 16799.000000 12413.000000 4920.000000 3754.000000... 19570.523440 3.000000 7.310200 0 12542.000000 1454.000000 7810.000000 877.564767 90.891304 46.360371
\n", + "

8 rows \u00d7 44 columns

\n", + "
" + ], + "metadata": {}, + "output_type": "pyout", + "prompt_number": 16, + "text": [ + " DISTRICT SD county_id total_households HHPOP \\\n", + "count 1454.000000 1454.000000 1454.000000 1454.000000 1454.000000 \n", + "mean 14.908528 14.908528 3.835626 1793.688446 4816.408528 \n", + "std 8.701078 8.701078 2.040153 961.021405 2686.029808 \n", + "min 1.000000 1.000000 1.000000 0.000000 0.000000 \n", + "25% 8.000000 8.000000 3.000000 1200.250000 3288.250000 \n", + "50% 15.000000 15.000000 4.000000 1681.500000 4504.500000 \n", + "75% 20.750000 20.750000 5.000000 2259.750000 6033.750000 \n", + "max 34.000000 34.000000 9.000000 12542.000000 39671.000000 \n", + "\n", + " TOTPOP EMPRES SFDU MFDU HHINCQ1 \\\n", + "count 1454.000000 1454.000000 1454.000000 1454.000000 1454.000000 \n", + "mean 4917.978680 2168.684319 1122.798487 670.889959 508.134801 \n", + "std 2690.352928 1211.109335 854.895353 717.261660 378.753528 \n", + "min 0.000000 0.000000 0.000000 0.000000 0.000000 \n", + "25% 3384.500000 1460.500000 602.000000 144.500000 257.000000 \n", + "50% 4577.000000 2016.000000 1034.000000 460.000000 434.000000 \n", + "75% 6098.500000 2735.500000 1496.000000 907.750000 674.750000 \n", + "max 40020.000000 16799.000000 12413.000000 4920.000000 3754.000000 \n", + "\n", + " ... COLLPTE TOPOLOGY TERMINAL ZERO \\\n", + "count ... 1454.000000 1454.000000 1454.000000 1454 \n", + "mean ... 166.744054 2.063274 1.630505 0 \n", + "std ... 1234.717238 0.926842 0.879441 0 \n", + "min ... 0.000000 1.000000 0.904320 0 \n", + "25% ... 0.000000 1.000000 1.167372 0 \n", + "50% ... 0.000000 2.000000 1.323075 0 \n", + "75% ... 0.000000 3.000000 1.632443 0 \n", + "max ... 19570.523440 3.000000 7.310200 0 \n", + "\n", + " hhlds sftaz gqpop employment_density \\\n", + "count 1454.000000 1454.000000 1454.000000 1454.000000 \n", + "mean 1793.688446 727.500000 101.570151 9.596395 \n", + "std 961.021405 419.877958 393.886676 45.067313 \n", + "min 0.000000 1.000000 -1.000000 0.000000 \n", + "25% 1200.250000 364.250000 5.000000 0.877829 \n", + "50% 1681.500000 727.500000 18.000000 2.158701 \n", + "75% 2259.750000 1090.750000 71.000000 5.492696 \n", + "max 12542.000000 1454.000000 7810.000000 877.564767 \n", + "\n", + " household_density density_index \n", + "count 1454.000000 1453.000000 \n", + "mean 6.008186 2.279554 \n", + "std 8.565908 3.945717 \n", + "min 0.000000 0.000000 \n", + "25% 1.910701 0.550232 \n", + "50% 3.939122 1.289224 \n", + "75% 6.693238 2.337577 \n", + "max 90.891304 46.360371 \n", + "\n", + "[8 rows x 44 columns]" + ] + } + ], + "prompt_number": 16 + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "sim.get_table(\"households\").to_frame().describe()" + ], + "language": "python", + "metadata": {}, + "outputs": [ + { + "html": [ + "
\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
TAZSERIALNOPUMA5incomePERSONSHHTUNITTYPENOCBLDGSZTENURE...bucketBinoriginalPUMAhmultiunitnum_young_adultsdriversnum_childrennum_adolescentsincome_in_thousandsnum_young_childrennum_college_age
count 2732722.000000 2732722.000000 2732722.000000 2732722.000000 2732722.000000 2732722.000000 2732722.000000 2732722.000000 2732722.000000 2732722.000000... 2732722.000000 2732722.000000 2732722.000000 2732722.000000 2732722.000000 2732722.000000 2732722.000000 2732722.000000 2732722.000000 2732722.000000
mean 751.134174 4923397.817272 2168.537679 77670.162133 2.581065 2.640939 0.076833 0.468948 3.528608 1.893538... 4.492757 2168.537679 0.401827 0.396906 2.062626 0.359349 0.059565 77.670162 0.159090 0.223251
std 430.938788 2857497.525312 516.128215 81405.085003 1.605801 2.065958 0.365144 0.917211 2.516513 1.010827... 2.871945 516.128215 0.490267 0.728451 1.117948 0.764597 0.254209 81.405085 0.462614 0.581043
min 1.000000 20.000000 1000.000000 -20000.000000 1.000000 0.000000 0.000000 0.000000 0.000000 0.000000... 0.000000 1000.000000 0.000000 0.000000 0.000000 0.000000 0.000000 -20.000000 0.000000 0.000000
25% 372.000000 2463830.000000 2104.000000 26500.000000 1.000000 1.000000 0.000000 0.000000 2.000000 1.000000... 2.000000 2104.000000 0.000000 0.000000 1.000000 0.000000 0.000000 26.500000 0.000000 0.000000
50% 762.000000 4901786.000000 2303.000000 58000.000000 2.000000 1.000000 0.000000 0.000000 2.000000 2.000000... 4.000000 2303.000000 0.000000 0.000000 2.000000 0.000000 0.000000 58.000000 0.000000 0.000000
75% 1144.000000 7361646.000000 2410.000000 100000.000000 4.000000 4.000000 0.000000 1.000000 5.000000 3.000000... 7.000000 2410.000000 1.000000 1.000000 2.000000 0.000000 0.000000 100.000000 0.000000 0.000000
max 1454.000000 9999899.000000 2714.000000 1968504.000000 25.000000 7.000000 2.000000 12.000000 10.000000 4.000000... 9.000000 2714.000000 1.000000 9.000000 25.000000 9.000000 5.000000 1968.504000 8.000000 24.000000
\n", + "

8 rows \u00d7 53 columns

\n", + "
" + ], + "metadata": {}, + "output_type": "pyout", + "prompt_number": 3, + "text": [ + " TAZ SERIALNO PUMA5 income \\\n", + "count 2732722.000000 2732722.000000 2732722.000000 2732722.000000 \n", + "mean 751.134174 4923397.817272 2168.537679 77670.162133 \n", + "std 430.938788 2857497.525312 516.128215 81405.085003 \n", + "min 1.000000 20.000000 1000.000000 -20000.000000 \n", + "25% 372.000000 2463830.000000 2104.000000 26500.000000 \n", + "50% 762.000000 4901786.000000 2303.000000 58000.000000 \n", + "75% 1144.000000 7361646.000000 2410.000000 100000.000000 \n", + "max 1454.000000 9999899.000000 2714.000000 1968504.000000 \n", + "\n", + " PERSONS HHT UNITTYPE NOC \\\n", + "count 2732722.000000 2732722.000000 2732722.000000 2732722.000000 \n", + "mean 2.581065 2.640939 0.076833 0.468948 \n", + "std 1.605801 2.065958 0.365144 0.917211 \n", + "min 1.000000 0.000000 0.000000 0.000000 \n", + "25% 1.000000 1.000000 0.000000 0.000000 \n", + "50% 2.000000 1.000000 0.000000 0.000000 \n", + "75% 4.000000 4.000000 0.000000 1.000000 \n", + "max 25.000000 7.000000 2.000000 12.000000 \n", + "\n", + " BLDGSZ TENURE ... bucketBin \\\n", + "count 2732722.000000 2732722.000000 ... 2732722.000000 \n", + "mean 3.528608 1.893538 ... 4.492757 \n", + "std 2.516513 1.010827 ... 2.871945 \n", + "min 0.000000 0.000000 ... 0.000000 \n", + "25% 2.000000 1.000000 ... 2.000000 \n", + "50% 2.000000 2.000000 ... 4.000000 \n", + "75% 5.000000 3.000000 ... 7.000000 \n", + "max 10.000000 4.000000 ... 9.000000 \n", + "\n", + " originalPUMA hmultiunit num_young_adults drivers \\\n", + "count 2732722.000000 2732722.000000 2732722.000000 2732722.000000 \n", + "mean 2168.537679 0.401827 0.396906 2.062626 \n", + "std 516.128215 0.490267 0.728451 1.117948 \n", + "min 1000.000000 0.000000 0.000000 0.000000 \n", + "25% 2104.000000 0.000000 0.000000 1.000000 \n", + "50% 2303.000000 0.000000 0.000000 2.000000 \n", + "75% 2410.000000 1.000000 1.000000 2.000000 \n", + "max 2714.000000 1.000000 9.000000 25.000000 \n", + "\n", + " num_children num_adolescents income_in_thousands \\\n", + "count 2732722.000000 2732722.000000 2732722.000000 \n", + "mean 0.359349 0.059565 77.670162 \n", + "std 0.764597 0.254209 81.405085 \n", + "min 0.000000 0.000000 -20.000000 \n", + "25% 0.000000 0.000000 26.500000 \n", + "50% 0.000000 0.000000 58.000000 \n", + "75% 0.000000 0.000000 100.000000 \n", + "max 9.000000 5.000000 1968.504000 \n", + "\n", + " num_young_children num_college_age \n", + "count 2732722.000000 2732722.000000 \n", + "mean 0.159090 0.223251 \n", + "std 0.462614 0.581043 \n", + "min 0.000000 0.000000 \n", + "25% 0.000000 0.000000 \n", + "50% 0.000000 0.000000 \n", + "75% 0.000000 0.000000 \n", + "max 8.000000 24.000000 \n", + "\n", + "[8 rows x 53 columns]" + ] + } + ], + "prompt_number": 3 + }, + { + "cell_type": "code", + "collapsed": false, + "input": [], + "language": "python", + "metadata": {}, + "outputs": [] + } + ], + "metadata": {} + } + ] +} \ No newline at end of file