diff --git a/activitysim/defaults/variables.py b/activitysim/defaults/variables.py index c50a58bba7..03b8af8b2f 100644 --- a/activitysim/defaults/variables.py +++ b/activitysim/defaults/variables.py @@ -20,6 +20,11 @@ def income_segment(households): labels=[1, 2, 3, 4]) +@sim.column("households") +def non_workers(households, persons): + return persons.household_id.value_counts() - households.workers + + @sim.column("households") def drivers(households, persons): # we assume that everyone 16 and older is a potential driver diff --git a/example/README.md b/example/README.md new file mode 100644 index 0000000000..5789e14b3d --- /dev/null +++ b/example/README.md @@ -0,0 +1,64 @@ +This is a list of items to double check before using in practice: + +* Make sure the units in things like distance_to_work match the walk thresholds + in the mandatory tour frequency spec. The original divided by 100. This is + true also of round trip auto to work and round trip auto to school. + +* There might be a few variables left off of some of the models. Look for +`head` in reading of the spec files as this is meant to eliminate some of the + rows. Also can look for `#` to comment out variables in the spec. + +* Go back to the 3 school location choices, and run the models for the +appropriate persons. + +* Probably needs code review of the variable definitions. How much of the +variable definitions are shared between regions and how much unique? Age +categories are shared? Income categories are unique? + + + + +A few overarching principles + +* A little discussion of "NOT so object oriented" - this is more like a +database - data is in standard tables, NOT in objects + +* The implications of this are that most of the core code is pandas and thus +the quality is controlled by the larger community. We are thankful that its +quality is very high. Specifically, there's not so much code in activitysim +"proper" + +* What it takes to add a new model + * define a new model + * define any new data sources necessary + * add any new assumptions in settings.yaml + * co-create the spec and any variables that are too complicated (or + reusable) for the spec + * run in notebook + +* Literally everything is really Python functions that compute something. +Case study of `num_under16_not_at_school` to show the inter-dependencies. + + + + +A few questions about "best practices" + +* What to put into the default data sources and variable specs and what to +put in the example / client-specific stuff? + +* Want to split up injectables from variables from tables or all one big file + so it's easier to search? + +* How much variable computation to put in excel versus Python + +* There were some hard coded limits in the original csv - (area_type < 4 and +distance_to_work < 3) - these are now just left in the csv spec. Why would +this be different than (income_in_thousands > 50)? I've made an effort to +not have such "magic numbers" in Python code. (Elizabeth: MAX_NUM_AUTOS +exists now) + +* Want to name or number the person types in the spec files? + +* Testing for client-specific code? It's harder because outputs are "data +dependent." It's easier to take a small dataset and make sure it always runs. \ No newline at end of file diff --git a/example/configs/auto_ownership.csv b/example/configs/auto_ownership.csv new file mode 100644 index 0000000000..cc1dae874a --- /dev/null +++ b/example/configs/auto_ownership.csv @@ -0,0 +1,30 @@ +Description,Expression,cars0,cars1,cars2,cars3,cars4 +2 Adults (age 16+),drivers==2,,0,3.0773,3.1962,2.6616 +3 Adults (age 16+),drivers==3,,0,3.5401,5.5131,5.208 +4+ Adults (age 16+),drivers>3,,2.0107,6.3662,8.5148,9.5807 +Persons age 16-17,num_adolescents,,0,-0.881,-1.7313,-1.7313 +Persons age 18-24,num_college_age,,-0.4087,-1.0095,-1.0107,-1.0107 +Persons age 35-34,num_young_adults,,0,-0.4849,-0.8596,-0.8596 +Presence of children age 0-4,num_young_children>0,,0.3669,0.7627,0.7627,0.7627 +Presence of children age 5-17,(num_children+num_adolescents)>0,,0.0158,0.2936,0.4769,0.4769 +"Number of workers, capped at 3",@df.workers.clip(upper=3),,0,0.2936,0.6389,0.8797 +"Piecewise Linear household income, $0-30k","@df.income_in_thousands.clip(0, 30)",,0.0383,0.054,0.0559,0.0619 +"Piecewise Linear household income, $30-75k","@(df.income_in_thousands-30).clip(0, 45)",,0,0.0083,0.011,0.0147 +"Piecewise Linear household income, $75k+, capped at $125k","@(df.income_in_thousands-75).clip(0, 50)",,0,0.0083,0.011,0.0147 +"Density index up to 10, if 0 workers","@(df.workers==0)*df.density_index.clip(0, 10)",,0,-0.2028,-0.3654,-0.3654 +"Density index in excess of 10, if 0 workers",@(df.workers==0)*(df.density_index-10).clip(0),,-0.0152,-0.1106,-0.1766,-0.1766 +"Density index up to 10, if 1+ workers","@(df.workers>0)*df.density_index.clip(0, 10)",,0,-0.2028,-0.3654,-0.3654 +"Density index in excess of 10, if 1+ workers",@(df.workers>0)*(df.density_index-10).clip(0),,-0.0152,-0.1106,-0.1766,-0.1766 +Constants,@1,,1.1865,-1.0846,-3.2502,-5.313 +San Francisco county,county_name == 'San Francisco',,0.4259,0.4683,0.1458,0.1458 +Solano county,county_name == 'Solano',,-0.566,-0.4429,-0.2372,-0.2372 +Napa county,county_name == 'Napa',,-0.566,-0.4429,-0.2372,-0.2372 +Sonoma county,county_name == 'Sonoma',,-0.566,-0.4429,-0.2372,-0.2372 +Marin county,county_name == 'Marin',,-0.2434,0,0,0 +"Retail accessibility (0.66*PK + 0.34*OP) by auto, if 0 workers",(workers==0)*(0.66*AUTOPEAKRETAIL+0.34*AUTOOFFPEAKRETAIL),,0.0626,0.0626,0.0626,0.0626 +"Retail accessibility (0.66*PK + 0.34*OP) by auto, if 1+ workers",(workers>0)*(0.66*AUTOPEAKRETAIL+0.34*AUTOOFFPEAKRETAIL),,0.1646,0.1646,0.1646,0.1646 +"Retail accessibility (0.66*PK + 0.34*OP) by transit, if 0 workers",(workers==0)*(0.66*TRANSITPEAKRETAIL+0.34*TRANSITOFFPEAKRETAIL),,-0.3053,-0.3053,-0.3053,-0.3053 +"Retail accessibility (0.66*PK + 0.34*OP) by transit, if 1+ workers",(workers>0)*(0.66*TRANSITPEAKRETAIL+0.34*TRANSITOFFPEAKRETAIL),,-0.5117,-0.5117,-0.5117,-0.5117 +"Retail accessibility by non-motorized, if 0 workers",(workers==0)*NONMOTORIZEDRETAIL,,-0.03,-0.03,-0.03,-0.03 +"Retail accessibility by non-motorized, if 1+ workers",(workers>0)*NONMOTORIZEDRETAIL,,-0.03,-0.03,-0.03,-0.03 +"Auto time savings per worker (over walk or transit, max 120) to work",workTourAutoTimeSavings/workers,,0.4707,0.6142,0.5705,0.7693 diff --git a/example/configs/auto_ownership_coeffs.csv b/example/configs/auto_ownership_coeffs.csv deleted file mode 100644 index 0fe176c110..0000000000 --- a/example/configs/auto_ownership_coeffs.csv +++ /dev/null @@ -1 +0,0 @@ -Description,Expression,cars0,cars1,cars2,cars3,cars4 2 Adults (age 16+),drivers==2,,0,3.0773,3.1962,2.6616 3 Adults (age 16+),drivers==3,,0,3.5401,5.5131,5.208 4+ Adults (age 16+),drivers>3,,2.0107,6.3662,8.5148,9.5807 Persons age 16-17,num_adolescents,,0,-0.881,-1.7313,-1.7313 Persons age 18-24,num_college_age,,-0.4087,-1.0095,-1.0107,-1.0107 Persons age 35-34,num_young_adults,,0,-0.4849,-0.8596,-0.8596 Presence of children age 0-4,num_young_children>0,,0.3669,0.7627,0.7627,0.7627 Presence of children age 5-17,(num_children+num_adolescents)>0,,0.0158,0.2936,0.4769,0.4769 "Number of workers, capped at 3",@df.workers.clip(upper=3),,0,0.2936,0.6389,0.8797 "Piecewise Linear household income, $0-30k","@df.income_in_thousands.clip(0, 30)",,0.0383,0.054,0.0559,0.0619 "Piecewise Linear household income, $30-75k","@(df.income_in_thousands-30).clip(0, 45)",,0,0.0083,0.011,0.0147 "Piecewise Linear household income, $75k+, capped at $125k","@(df.income_in_thousands-75).clip(0, 50)",,0,0.0083,0.011,0.0147 "Density index up to 10, if 0 workers","@(df.workers==0)*df.density_index.clip(0, 10)",,0,-0.2028,-0.3654,-0.3654 "Density index in excess of 10, if 0 workers",@(df.workers==0)*(df.density_index-10).clip(0),,-0.0152,-0.1106,-0.1766,-0.1766 "Density index up to 10, if 1+ workers","@(df.workers>0)*df.density_index.clip(0, 10)",,0,-0.2028,-0.3654,-0.3654 "Density index in excess of 10, if 1+ workers",@(df.workers>0)*(df.density_index-10).clip(0),,-0.0152,-0.1106,-0.1766,-0.1766 Constants,@1,,1.1865,-1.0846,-3.2502,-5.313 San Francisco county,county_name == 'San Francisco',,0.4259,0.4683,0.1458,0.1458 Solano county,county_name == 'Solano',,-0.566,-0.4429,-0.2372,-0.2372 Napa county,county_name == 'Napa',,-0.566,-0.4429,-0.2372,-0.2372 Sonoma county,county_name == 'Sonoma',,-0.566,-0.4429,-0.2372,-0.2372 Marin county,county_name == 'Marin',,-0.2434,0,0,0 "Retail accessibility (0.66*PK + 0.34*OP) by auto, if 0 workers",(workers==0)*(0.66*AUTOPEAKRETAIL+0.34*AUTOOFFPEAKRETAIL),,0.0626,0.0626,0.0626,0.0626 "Retail accessibility (0.66*PK + 0.34*OP) by auto, if 1+ workers",(workers>0)*(0.66*AUTOPEAKRETAIL+0.34*AUTOOFFPEAKRETAIL),,0.1646,0.1646,0.1646,0.1646 "Retail accessibility (0.66*PK + 0.34*OP) by transit, if 0 workers",(workers==0)*(0.66*TRANSITPEAKRETAIL+0.34*TRANSITOFFPEAKRETAIL),,-0.3053,-0.3053,-0.3053,-0.3053 "Retail accessibility (0.66*PK + 0.34*OP) by transit, if 1+ workers",(workers>0)*(0.66*TRANSITPEAKRETAIL+0.34*TRANSITOFFPEAKRETAIL),,-0.5117,-0.5117,-0.5117,-0.5117 "Retail accessibility by non-motorized, if 0 workers",(workers==0)*NONMOTORIZEDRETAIL,,-0.03,-0.03,-0.03,-0.03 "Retail accessibility by non-motorized, if 1+ workers",(workers>0)*NONMOTORIZEDRETAIL,,-0.03,-0.03,-0.03,-0.03 "Auto time savings per worker (over walk or transit, max 120) to work",workTourAutoTimeSavings/workers,,0.4707,0.6142,0.5705,0.7693 \ No newline at end of file diff --git a/example/configs/mandatory_tour_frequency.csv b/example/configs/mandatory_tour_frequency.csv new file mode 100644 index 0000000000..9c0679fcf5 --- /dev/null +++ b/example/configs/mandatory_tour_frequency.csv @@ -0,0 +1,100 @@ +Description,Expression,work1,work2,school1,school2,work_and_school +Full-time worker alternative-specific constants,ptype == 1,0,-3.3781,,, +Part-time worker alternative-specific constants,ptype == 2,0,-3.0476,,, +University student alternative-specific constants,ptype == 3,2.166,-1.3965,0,-3.7429,0.1073 +Non-working adult alternative-specific constants,ptype == 4,,,,, +Retired alternative-specific constants,ptype == 5,,,,, +Driving-age child alternative-specific constants,ptype == 6,,,0,-3.136,-4.4362 +Pre-driving age child who is in school alternative-specific constants,ptype == 7,,,0,-3.9703, +Female - Full-time worker interaction,(ptype == 1) & (sex == 2),0,-0.2255,0.1592,,-0.3442 +Female - Part-time worker interaction,(ptype == 2) & (sex == 2),0,-0.2255,0.1592,,-0.3442 +Female - University student interaction,(ptype == 3) & (sex == 2),0.1737,-0.2255,0.1592,0.114,-0.3442 +Female - Non-working adult interaction,(ptype == 4) & (sex == 2),0,-0.2255,0.1592,, +Female - Retired interaction,(ptype == 5) & (sex == 2),0,-0.2255,0.1592,, +Female - Driving-age child interaction,(ptype == 6) & (sex == 2),0.1737,,0,0.114,-0.3442 +Female - Pre-driving age child who is in school interaction,(ptype == 7) & (sex == 2),0.1737,,0,0.114, +Under 35 - Full-time worker interaction,(ptype == 1) & (age <= 35),0,-0.1375,0.7218,,0.9761 +Under 35 - Part-time worker interaction,(ptype == 2) & (age <= 35),0,-0.1375,0.7218,,0.9761 +Under 35 - University student interaction,(ptype == 3) & (age <= 35),-0.4629,-0.1375,0,1.275,0.9761 +Under 35 - Non-working adult interaction,(ptype == 4) & (age <= 35),0,-0.1375,0.7218,, +Can walk to work - Full-time worker interaction,(ptype == 1) & (distance_to_work < 3),,0.5268,,, +Can walk to work - Part-time worker interaction,(ptype == 2) & (distance_to_work < 3),,0.5268,,, +Can walk to work - University student interaction,(ptype == 3) & (distance_to_work < 3),,0.5268,,, +Can walk to work - Non-working adult interaction,(ptype == 4) & (distance_to_work < 3),,0.5268,,, +Can walk to work - Retired interaction,(ptype == 5) & (distance_to_work < 3),,0.5268,,, +Can walk to school - University student interaction,(ptype == 3) & (distance_to_school < 3),,,,0.7114, +Can walk to school - Driving-age child interaction,(ptype == 6) & (distance_to_school < 3),,,,0.7114, +Can walk to school - Pre-driving age child who is in school interaction,(ptype == 7) & (distance_to_school < 3),,,,0.7114, +Can walk to work or school - Full-time worker interaction,(ptype == 1) & (distance_to_work < 3 | distance_to_school < 3),,,,,0.1391 +Can walk to work or school - Part-time worker interaction,(ptype == 2) & (distance_to_work < 3 | distance_to_school < 3),,,,,0.1391 +Can walk to work or school - University student interaction,(ptype == 3) & (distance_to_work < 3 | distance_to_school < 3),,,,,0.1391 +Can walk to work or school - Driving-age child interaction,(ptype == 6) & (distance_to_work < 3 | distance_to_school < 3),,,,,0.1391 +Round trip auto time to work - Full-time worker interaction,(ptype == 1) * roundtrip_auto_time_to_work,,-0.0035,,,-0.0031 +Round trip auto time to work - Part-time worker interaction,(ptype == 2) * roundtrip_auto_time_to_work,,-0.0035,,,-0.0031 +Round trip auto time to work - University student interaction,(ptype == 3) * roundtrip_auto_time_to_work,,-0.0035,,,-0.0031 +Round trip auto time to work - Non-working adult interaction,(ptype == 4) * roundtrip_auto_time_to_work,,-0.0035,,, +Round trip auto time to work - Retired,(ptype == 5) * roundtrip_auto_time_to_work,,-0.0035,,, +Round trip auto time to school - University student interaction,(ptype == 3) * roundtrip_auto_time_to_school,,,,-0.0034,-0.0031 +Round trip auto time to school - Driving-age child interaction,(ptype == 6) * roundtrip_auto_time_to_school,,,,-0.0034,-0.0031 +Round trip auto time to school - Pre-driving age child who is in school interaction,(ptype == 7) * roundtrip_auto_time_to_school,,,,-0.0034, +Student is employed - University student interaction,(ptype == 3) & student_is_employed,3.014,3.014,,,3.014 +Student is employed - Driving-age child interaction,(ptype == 6) & student_is_employed,3.014,3.014,,,3.014 +Non-student goes to school - Full-time worker interaction,(ptype == 1) & nonstudent_to_school,,,3.883,,3.883 +Non-student goes to school - Part-time worker interaction,(ptype == 2) & nonstudent_to_school,,,3.883,,3.883 +Non-student goes to school - Non-working adult interaction,(ptype == 4) & nonstudent_to_school,,,3.883,, +Non-student goes to school - Retired interaction,(ptype == 5) & nonstudent_to_school,,,3.883,, +No cars in household - Full-time worker interaction,(ptype == 1) & (auto_ownership == 0),,-1.306,,,-1.302 +No cars in household - Part-time worker interaction,(ptype == 2) & (auto_ownership == 0),,-1.306,,,-1.302 +No cars in household - University student interaction,(ptype == 3) & (auto_ownership == 0),,-1.306,,-1.413,-1.302 +No cars in household - Non-working adult interaction,(ptype == 4) & (auto_ownership == 0),,-1.306,,, +No cars in household - Retired interaction,(ptype == 5) & (auto_ownership == 0),,-1.306,,, +No cars in household - Driving-age student interaction,(ptype == 6) & (auto_ownership == 0),,,,-1.413,-1.302 +No cars in household - Pre-driving age child who is in school interaction,(ptype == 7) & (auto_ownership == 0),,,,-1.413, +Fewer cars than drivers in household - University student interaction,(ptype == 3) & (auto_ownership < drivers),,,,-0.5759, +Fewer cars than drivers in household - Driving-age student interaction,(ptype == 6) & (auto_ownership < drivers),,,,-0.5759, +Fewer cars than drivers in household - Pre-driving age child who is in school interaction,(ptype == 7) & (auto_ownership < drivers),,,,-0.5759, +Number of preschool children in household - Full-time worker interaction,(ptype == 1) * (num_young_children),0,-0.1478,-0.1335,,-0.1251 +Number of preschool children in household - Part-time worker interaction,(ptype == 2) * (num_young_children),0,-0.1478,-0.1335,,-0.1251 +Number of preschool children in household - University student interaction,(ptype == 3) * (num_young_children),0.2191,-0.1478,0,-0.5577,-0.1251 +Number of preschool children in household - Non-working adult interaction,(ptype == 4) * (num_young_children),0,-0.1478,-0.1335,, +Number of preschool children in household - Retired interaction,(ptype == 5) * (num_young_children),0,-0.1478,-0.1335,, +Number of preschool children in household - Driving-age student interaction,(ptype == 6) * (num_young_children),0.2191,,0,-0.5577,-0.1251 +Number of preschool children in household - Pre-driving age child who is in school interaction,(ptype == 7) * (num_young_children),0.2191,,0,-0.5577, +Number of non-workers in the household - Full-time worker interaction,(ptype == 1) * non_workers,,,0.2574,, +Number of non-workers in the household - Part-time worker interaction,(ptype == 2) * non_workers,,,0.2574,, +Household income higher than $50k - Full-time worker interaction,(ptype == 1) & (income_in_thousands > 50),0,,0.0347,,0.0347 +Household income higher than $50k - Part-time worker interaction,(ptype == 2) & (income_in_thousands > 50),0,,0.0347,,0.0347 +Household income higher than $50k - University student interaction,(ptype == 3) & (income_in_thousands > 50),-0.0528,-0.0528,0,,-0.0528 +Household income higher than $50k - Non-working adult interaction,(ptype == 4) & (income_in_thousands > 50),0,,0.0347,, +Household income higher than $50k - Retired interaction,(ptype == 5) & (income_in_thousands > 50),0,,0.0347,, +Household income higher than $50k - Driving-age student interaction,(ptype == 6) & (income_in_thousands > 50),-0.0528,,0,,-0.0528 +Household income higher than $50k - Pre-driving age child who is in school interaction,(ptype == 7) & (income_in_thousands > 50),-0.0528,,0,, +Non-family household - Full-time worker interaction,(ptype == 1) & non_family,0,,-0.25,,-0.25 +Non-family household - Part-time worker interaction,(ptype == 2) & non_family,0,,-0.25,,-0.25 +Non-family household - University student interaction,(ptype == 3) & non_family,-0.1792,-0.1792,0,,-0.1792 +Non-family household - Non-working adult interaction,(ptype == 4) & non_family,0,,-0.25,, +Non-family household - Retired interaction,(ptype == 5) & non_family,0,,-0.25,, +Non-family household - Driving-age student interaction,(ptype == 6) & non_family,-0.1792,,0,,-0.1792 +Non-family household - Pre-driving age child who is in school interaction,(ptype == 7) & non_family,-0.1792,,0,, +Number of children under 16 not at school - Full-time worker interaction,(ptype == 1) * num_under16_not_at_school,,0.1804,,,-0.1955 +Number of children under 16 not at school - Part-time worker interaction,(ptype == 2) * num_under16_not_at_school,,0.1804,,,-0.1955 +Number of children under 16 not at school - University student interaction,(ptype == 3) * num_under16_not_at_school,,0.1804,,0.0866,-0.1955 +Number of children under 16 not at school - Non-working adult interaction,(ptype == 4) * num_under16_not_at_school,,0.1804,,, +Number of children under 16 not at school - Retired,(ptype == 5) * num_under16_not_at_school,,0.1804,,, +Number of children under 16 not at school - Driving-age student interaction,(ptype == 6) * num_under16_not_at_school,,,,0.0866,-0.1955 +Number of children under 16 not at school - Pre-driving age child who is in school interaction,(ptype == 7) * num_under16_not_at_school,,,,0.0866, +Home is in urban area - Full-time worker interaction,(ptype == 1) & (area_type < 4),0,0.2308,-0.1361,,-0.3509 +Home is in urban area - Part-time worker interaction,(ptype == 2) & (area_type < 4),0,0.2308,-0.1361,,-0.3509 +Home is in urban area - University student interaction,(ptype == 3) & (area_type < 4),-0.2831,0.2308,0,0.317,-0.3509 +Home is in urban area - Non-working adult interaction,(ptype == 4) & (area_type < 4),0,0.238,-0.1361,, +Home is in urban area - Retired interaction,(ptype == 5) & (area_type < 4),0,0.2308,-0.1361,, +Home is in urban area - Driving-age student interaction,(ptype == 6) & (area_type < 4),-0.2831,,0,0.317,-0.3509 +Home is in urban area - Pre-driving age child who is in school interaction,(ptype == 7) & (area_type < 4),-0.2831,,0,0.317, +Unavailable: Full-time worker,ptype == 1,,,,-999, +Unavailable: Part-time worker,ptype == 2,,,,-999, +Unavailable: Non-working adult,ptype == 4,,,,-999,-999 +Unavailable: Retired,ptype == 5,,,,-999,-999 +Unavailable: Driving-age child,ptype == 6,-999,-999,,, +Unavailable: Pre-driving age child who is in school,ptype == 7,,-999,,,-999 +Unavailable: Work tours for those with no usual work location,~(workplace_taz > -1),-999,-999,,,-999 +Unavailalbe: School tours for those with no usual school location,~(school_taz > -1),,,-999,-999,-999 diff --git a/example/configs/settings.yaml b/example/configs/settings.yaml index 05cf2cdb08..3f1140ab14 100644 --- a/example/configs/settings.yaml +++ b/example/configs/settings.yaml @@ -1,6 +1,6 @@ store: mtc_asim.h5 -households_sample_size: 100000 +households_sample_size: 10000 county_map: San Francisco: 1 @@ -12,3 +12,34 @@ county_map: Napa: 7 Sonoma: 8 Marin: 9 + +employment_map: + 1: "full" + 2: "part" + 3: "not" + 4: "child" + +student_map: + 1: "high" + 2: "college" + 3: "not" + +person_type_map: + 1: "full" + 2: "part" + 3: "university" + 4: "nonwork" + 5: "retired" + 6: "driving" + 7: "school" + 8: "preschool" + +household_type_map: + 0: "null" + 1: "family_married" + 2: "family_male" + 3: "family_female" + 4: "nonfamily_male_alone" + 5: "nonfamily_male_notalone" + 6: "nonfamily_female_alone" + 7: "nonfamily_female_notalone" \ No newline at end of file diff --git a/example/configs/workplace_location.csv b/example/configs/workplace_location.csv index af7b3ce918..0adf1b611d 100644 --- a/example/configs/workplace_location.csv +++ b/example/configs/workplace_location.csv @@ -1 +1,17 @@ -Description,Expression,Alt "Distance, piecewise linear from 0 to 1 miles",@df.distance.clip(1),-0.8428 "Distance, piecewise linear from 1 to 2 miles","@(df.distance-1).clip(0,1)",-0.3104 "Distance, piecewise linear from 2 to 5 miles","@(df.distance-2).clip(0,3)",-0.3783 "Distance, piecewise linear from 5 to 15 miles","@(df.distance-5).clip(0,10)",-0.1285 "Distance, piecewise linear for 15+ miles",@(df.distance-15.0).clip(0),-0.0917 "Distance 0 to 5 mi, high and very high income",@(df.income_segment>=3)*df.distance.clip(upper=5),0.15 "Distance 5+ mi, high and very high income",@(df.income_segment>=3)*(df.distance-5).clip(0),0.02 "Size variable full-time worker, low income",@(df.income_segment==1)*df.size_low,1 "Size variable full-time worker, medium income",@(df.income_segment==2)*df.size_med,1 "Size variable full-time worker, high income",@(df.income_segment==3)*df.size_high,1 "Size variable full-time worker, very high income",@(df.income_segment==4)*df.size_veryhigh,1 "No attractions full-time worker, low income",@(df.income_segment==1)&(df.size_low==0),-999 "No attractions full-time worker, medium income",@(df.income_segment==2)&(df.size_med==0),-999 "No attractions full-time worker, high income",@(df.income_segment==3)&(df.size_high==0),-999 "No attractions full-time worker, very high income",@(df.income_segment==4)&(df.size_veryhigh==0),-999 Mode choice logsum,mcLogsum,0.3 \ No newline at end of file +Description,Expression,Alt +"Distance, piecewise linear from 0 to 1 miles",@df.distance.clip(1),-0.8428 +"Distance, piecewise linear from 1 to 2 miles","@(df.distance-1).clip(0,1)",-0.3104 +"Distance, piecewise linear from 2 to 5 miles","@(df.distance-2).clip(0,3)",-0.3783 +"Distance, piecewise linear from 5 to 15 miles","@(df.distance-5).clip(0,10)",-0.1285 +"Distance, piecewise linear for 15+ miles",@(df.distance-15.0).clip(0),-0.0917 +"Distance 0 to 5 mi, high and very high income",@(df.income_segment>=3)*df.distance.clip(upper=5),0.15 +"Distance 5+ mi, high and very high income",@(df.income_segment>=3)*(df.distance-5).clip(0),0.02 +"Size variable full-time worker, low income",@(df.income_segment==1)*df.size_low,1 +"Size variable full-time worker, medium income",@(df.income_segment==2)*df.size_med,1 +"Size variable full-time worker, high income",@(df.income_segment==3)*df.size_high,1 +"Size variable full-time worker, very high income",@(df.income_segment==4)*df.size_veryhigh,1 +"No attractions full-time worker, low income",@(df.income_segment==1)&(df.size_low==0),-999 +"No attractions full-time worker, medium income",@(df.income_segment==2)&(df.size_med==0),-999 +"No attractions full-time worker, high income",@(df.income_segment==3)&(df.size_high==0),-999 +"No attractions full-time worker, very high income",@(df.income_segment==4)&(df.size_veryhigh==0),-999 +Mode choice logsum,#mcLogsum,0.3 diff --git a/example/configs/workplace_location_size_terms.csv b/example/configs/workplace_location_size_terms.csv index f9b7da433a..6ef694785a 100644 --- a/example/configs/workplace_location_size_terms.csv +++ b/example/configs/workplace_location_size_terms.csv @@ -1 +1,16 @@ -purpose,segment,TOTHH,RETEMPN,FPSEMPN,HEREMPN,OTHEMPN,AGREMPN,MWTEMPN,AGE0519,HSENROLL,COLLFTE,COLLPTE work,low,0,0.129,0.193,0.383,0.12,0.01,0.164,0,0,0,0 work,med,0,0.12,0.197,0.325,0.139,0.008,0.21,0,0,0,0 work,high,0,0.11,0.207,0.284,0.154,0.006,0.239,0,0,0,0 work,veryhigh,0,0.093,0.27,0.241,0.146,0.004,0.246,0,0,0,0 university,university,0,0,0,0,0,0,0,0,0,0.592,0.408 school,grade,0,0,0,0,0,0,0,1,0,0,0 school,high,0,0,0,0,0,0,0,0,1,0,0 escort,kids,0,0.225,0,0.144,0,0,0,0.465,0.166,0,0 escort,no kids,0,0.225,0,0.144,0,0,0,0.465,0.166,0,0 shopping,shopping,0,1,0,0,0,0,0,0,0,0,0 eatOut,eatOut,0,0.742,0,0.258,0,0,0,0,0,0,0 othMaint,othMaint,0,0.482,0,0.518,0,0,0,0,0,0,0 social,social,0,0.522,0,0.478,0,0,0,0,0,0,0 othDiscr,othDiscr,0.252,0.212,0,0.272,0.165,0,0,0,0.098,0,0 atwork,atwork,0,0.742,0,0.258,0,0,0,0,0,0,0 \ No newline at end of file +purpose,segment,TOTHH,RETEMPN,FPSEMPN,HEREMPN,OTHEMPN,AGREMPN,MWTEMPN,AGE0519,HSENROLL,COLLFTE,COLLPTE +work,low,0,0.129,0.193,0.383,0.12,0.01,0.164,0,0,0,0 +work,med,0,0.12,0.197,0.325,0.139,0.008,0.21,0,0,0,0 +work,high,0,0.11,0.207,0.284,0.154,0.006,0.239,0,0,0,0 +work,veryhigh,0,0.093,0.27,0.241,0.146,0.004,0.246,0,0,0,0 +university,university,0,0,0,0,0,0,0,0,0,0.592,0.408 +school,grade,0,0,0,0,0,0,0,1,0,0,0 +school,high,0,0,0,0,0,0,0,0,1,0,0 +escort,kids,0,0.225,0,0.144,0,0,0,0.465,0.166,0,0 +escort,no kids,0,0.225,0,0.144,0,0,0,0.465,0.166,0,0 +shopping,shopping,0,1,0,0,0,0,0,0,0,0,0 +eatOut,eatOut,0,0.742,0,0.258,0,0,0,0,0,0,0 +othMaint,othMaint,0,0.482,0,0.518,0,0,0,0,0,0,0 +social,social,0,0.522,0,0.478,0,0,0,0,0,0,0 +othDiscr,othDiscr,0.252,0.212,0,0.272,0.165,0,0,0,0.098,0,0 +atwork,atwork,0,0.742,0,0.258,0,0,0,0,0,0,0 diff --git a/example/models.py b/example/models.py index 941d9973c9..a5e5730943 100644 --- a/example/models.py +++ b/example/models.py @@ -1,4 +1,5 @@ import urbansim.sim.simulation as sim +import urbansim.utils.misc as usim_misc import os from activitysim import activitysim as asim import openmatrix as omx @@ -7,45 +8,102 @@ import pandas as pd +# this is the max number of cars allowable in the auto ownership model +MAX_NUM_CARS = 5 + + +""" +This part of this file is currently creating small tables to serve as +alternatives in the various models +""" + + @sim.table() def auto_alts(): - return asim.identity_matrix(["cars%d" % i for i in range(5)]) + return asim.identity_matrix(["cars%d" % i for i in range(MAX_NUM_CARS)]) + +@sim.table() +def mandatory_tour_frequency_alts(): + return asim.identity_matrix(["work1", "work2", "school1", "school2", + "work_and_school"]) +# these are the alternatives for the workplace choice @sim.table() def zones(): # I grant this is a weird idiom but it helps to name the index return pd.DataFrame({"TAZ": np.arange(1454)+1}).set_index("TAZ") +""" +Read in the omx files and create the skim objects +""" + + @sim.injectable() def nonmotskm_omx(): return omx.openFile('data/nonmotskm.omx') @sim.injectable() -def distance_matrix(nonmotskm_omx): +def distance_skim(nonmotskm_omx): + return skim.Skim(nonmotskm_omx['DIST'], offset=-1) + + +@sim.injectable() +def sovam_skim(nonmotskm_omx): + # FIXME use the right omx file + return skim.Skim(nonmotskm_omx['DIST'], offset=-1) + + +@sim.injectable() +def sovmd_skim(nonmotskm_omx): + # FIXME use the right omx file return skim.Skim(nonmotskm_omx['DIST'], offset=-1) +@sim.injectable() +def sovpm_skim(nonmotskm_omx): + # FIXME use the right omx file + return skim.Skim(nonmotskm_omx['DIST'], offset=-1) + + +""" +Read in the spec files and reformat as necessary +""" + + @sim.injectable() def auto_ownership_spec(): - f = os.path.join('configs', "auto_ownership_coeffs.csv") + f = os.path.join('configs', "auto_ownership.csv") + # FIXME should read in all variables and comment out ones not used return asim.read_model_spec(f).head(4*26) @sim.injectable() def workplace_location_spec(): f = os.path.join('configs', "workplace_location.csv") + # FIXME should read in all variables and comment out ones not used return asim.read_model_spec(f).head(15) +@sim.injectable() +def mandatory_tour_frequency_spec(): + f = os.path.join('configs', "mandatory_tour_frequency.csv") + return asim.read_model_spec(f) + + @sim.table() def workplace_size_spec(): f = os.path.join('configs', 'workplace_location_size_terms.csv') return pd.read_csv(f) +""" +This is a special submodel for the workplace location choice +""" + + @sim.table() def workplace_size_terms(land_use, workplace_size_spec): """ @@ -70,6 +128,12 @@ def workplace_size_terms(land_use, workplace_size_spec): return new_df +""" +Auto ownership is a standard model which predicts how many cars a household +with given characteristics owns +""" + + @sim.model() def auto_ownership_simulate(households, auto_alts, @@ -86,25 +150,35 @@ def auto_ownership_simulate(households, asim.simple_simulate(choosers, alternatives, auto_ownership_spec, mult_by_alt_col=True) + # map these back to integers + choices = choices.map(dict([("cars%d"%i, i) for i in range(MAX_NUM_CARS)])) + print "Choices:\n", choices.value_counts() sim.add_column("households", "auto_ownership", choices) return model_design +""" +The workplace location model predicts the zones in which various people will +work. Interestingly there's not really any supply side to this model - we +assume there are workplaces for the people to work. +""" + + @sim.model() def workplace_location_simulate(persons, households, zones, workplace_location_spec, - distance_matrix, + distance_skim, workplace_size_terms): choosers = sim.merge_tables(persons.name, tables=[persons, households]) alternatives = zones.to_frame().join(workplace_size_terms.to_frame()) skims = { - "distance": distance_matrix + "distance": distance_skim } choices, model_design = \ @@ -116,12 +190,50 @@ def workplace_location_simulate(persons, mult_by_alt_col=False, sample_size=50) - print "Describe of hoices:\n", choices.describe() + print "Describe of choices:\n", choices.describe() sim.add_column("persons", "workplace_taz", choices) return model_design +""" +This model predicts the frequency of making mandatory trips (see the +alternatives above) - these trips include work and school in some combination. +""" + + +@sim.model() +def mandatory_tour_frequency(persons, + households, + land_use, + mandatory_tour_frequency_alts, + mandatory_tour_frequency_spec): + + choosers = sim.merge_tables(persons.name, tables=[persons, + households, + land_use]) + + choices, model_design = \ + asim.simple_simulate(choosers, + mandatory_tour_frequency_alts.to_frame(), + mandatory_tour_frequency_spec, + mult_by_alt_col=True) + + print "Choices:\n", choices.value_counts() + sim.add_column("persons", "mandatory_tour_frequency", choices) + + return model_design + + +""" +This section contains computed columns on each table. +""" + +""" +for the land use table +""" + + @sim.column("land_use") def total_households(land_use): return land_use.local.TOTHH @@ -139,4 +251,147 @@ def total_acres(land_use): @sim.column("land_use") def county_id(land_use): - return land_use.local.COUNTY \ No newline at end of file + return land_use.local.COUNTY + + +""" +for households +""" + +# just a rename / alias +@sim.column("households") +def home_taz(households): + return households.TAZ + + +# map household type ids to strings +@sim.column("households") +def household_type(households, settings): + return households.HHT.map(settings["household_type_map"]) + + +@sim.column("households") +def non_family(households): + return households.household_type.isin(["nonfamily_male_alone", + "nonfamily_male_notalone", + "nonfamily_female_alone", + "nonfamily_female_notalone"]) + + +# can't just invert these unfortunately because there's a null household type +@sim.column("households") +def family(households): + return households.household_type.isin(["family_married", + "family_male", + "family_female"]) + + +@sim.column("households") +def num_under16_not_at_school(persons, households): + return persons.under16_not_at_school.groupby(persons.household_id).size().\ + reindex(households.index).fillna(0) + + +""" +for the persons table +""" +# FIXME - this is my "placeholder" for the CDAP model ;) +@sim.column("persons") +def cdap_activity(persons): + return pd.Series(np.random.randint(3, size=len(persons)), + index=persons.index).map({0: 'M', 1: 'N', 2: 'H'}) + + +# convert employment categories to string descriptors +@sim.column("persons") +def employed_cat(persons, settings): + return persons.pemploy.map(settings["employment_map"]) + + +# convert student categories to string descriptors +@sim.column("persons") +def student_cat(persons, settings): + return persons.pstudent.map(settings["student_map"]) + + +# convert person type categories to string descriptors +@sim.column("persons") +def ptype_cat(persons, settings): + return persons.ptype.map(settings["person_type_map"]) + + +# borrowing these definitions from the original code +@sim.column("persons") +def student_is_employed(persons): + return (persons.ptype_cat.isin(['university', 'driving']) & + persons.employed_cat.isin(['full', 'part'])) + + +@sim.column("persons") +def nonstudent_to_school(persons): + return (persons.ptype_cat.isin(['full', 'part', 'nonwork', 'retired']) & + persons.student_cat.isin(['high', 'college'])) + + +@sim.column("persons") +def under16_not_at_school(persons): + return (persons.ptype_cat.isin(["school", "preschool"]) & + persons.cdap_activity.isin(["N", "H"])) + + +@sim.column("persons") +def workplace_taz(persons): + # FIXME this is really because we ask for ALL columns in the persons data + # FIXME frame - urbansim actually only asks for the columns that are used by + # FIXME the model specs in play at that time + return pd.Series(1, persons.index) + + +@sim.column("persons") +def home_taz(households, persons): + return usim_misc.reindex(households.home_taz, + persons.household_id) + + +@sim.column("persons") +def school_taz(persons): + # FIXME need to fix this after getting school lcm working + return persons.workplace_taz + + +# this use the distance skims to compute the raw distance to work from home +@sim.column("persons") +def distance_to_work(persons, distance_skim): + return pd.Series(distance_skim.get(persons.home_taz, + persons.workplace_taz), + index=persons.index) + + +# same deal but to school +@sim.column("persons") +def distance_to_school(persons, distance_skim): + return pd.Series(distance_skim.get(persons.home_taz, + persons.school_taz), + index=persons.index) + + +# similar but this adds the am peak travel time to the pm peak travel time in +# the opposite direction (by car) +@sim.column("persons") +def roundtrip_auto_time_to_work(persons, sovam_skim, sovpm_skim): + return pd.Series(sovam_skim.get(persons.home_taz, + persons.workplace_taz) + + sovpm_skim.get(persons.workplace_taz, + persons.home_taz), + index=persons.index) + + +# this adds the am peak travel time to the md peak travel time in +# the opposite direction (by car), assuming students leave school earlier +@sim.column("persons") +def roundtrip_auto_time_to_school(persons, sovam_skim, sovmd_skim): + return pd.Series(sovam_skim.get(persons.home_taz, + persons.school_taz) + + sovmd_skim.get(persons.school_taz, + persons.home_taz), + index=persons.index) \ No newline at end of file diff --git a/notebooks/data_mover.ipynb b/notebooks/data_mover.ipynb index 1c065abf11..e6329cec65 100644 --- a/notebooks/data_mover.ipynb +++ b/notebooks/data_mover.ipynb @@ -1,7 +1,7 @@ { "metadata": { "name": "", - "signature": "sha256:afbc3e7040dd9e4a5b21433063f13a6a8abfcc04bcfc6574e7e43376c257cd33" + "signature": "sha256:07f23263339f1751ee4eb702d126692b9ed2785fe8640a8906eb0fb110d9e67a" }, "nbformat": 3, "nbformat_minor": 0, @@ -43,8 +43,10 @@ "col_map = {\n", " \"HHID\": \"household_id\",\n", " \"AGE\": \"age\",\n", + " \"SEX\": \"sex\",\n", " \"hworkers\": \"workers\",\n", - " \"HINC\": \"income\"\n", + " \"HINC\": \"income\",\n", + " \"AREATYPE\": \"area_type\"\n", "}" ], "language": "python", diff --git a/notebooks/simulation.ipynb b/notebooks/simulation.ipynb index 1643e621dd..406a524541 100644 --- a/notebooks/simulation.ipynb +++ b/notebooks/simulation.ipynb @@ -1,7 +1,7 @@ { "metadata": { "name": "", - "signature": "sha256:68dd59fb87c331bcb79d97a97557ee2ee411309bd3a85e58d6bee4b169b221df" + "signature": "sha256:a5d923d6fa9383fade7f8566bb3508f52ae45b517d7ec84fd71c1983d3d8598a" }, "nbformat": 3, "nbformat_minor": 0, @@ -39,35 +39,55 @@ "stream": "stdout", "text": [ "Running model 'workplace_location_simulate'\n", - "Describe of hoices:\n" + "Describe of choices:\n" ] }, { "output_type": "stream", "stream": "stdout", "text": [ - "count 258078.000000\n", - "mean 718.510997\n", - "std 422.975764\n", - "min 1.000000\n", - "25% 352.000000\n", - "50% 719.000000\n", - "75% 1083.000000\n", - "max 1454.000000\n", + "count 25495.000000\n", + "mean 718.590704\n", + "std 422.297505\n", + "min 1.000000\n", + "25% 352.000000\n", + "50% 716.000000\n", + "75% 1086.000000\n", + "max 1454.000000\n", "Name: TAZ, dtype: float64\n", - "Time to execute model 'workplace_location_simulate': 35.33s" + "Time to execute model 'workplace_location_simulate': 8.91s\n", + "Total time to execute: 8.91s\n" ] - }, + } + ], + "prompt_number": 2 + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "print sim.get_table(\"persons\").distance_to_work.describe()" + ], + "language": "python", + "metadata": {}, + "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ - "\n", - "Total time to execute: 35.34s\n" + "count 25495.000000\n", + "mean 39.004909\n", + "std 24.414926\n", + "min 0.150000\n", + "25% 21.000000\n", + "50% 36.100000\n", + "75% 52.360000\n", + "max 157.170000\n", + "dtype: float64\n" ] } ], - "prompt_number": 2 + "prompt_number": 3 }, { "cell_type": "code", @@ -90,629 +110,307 @@ "output_type": "stream", "stream": "stdout", "text": [ - "cars2 47959\n", - "cars3 38186\n", - "cars1 7528\n", - "cars4 5123\n", - "cars0 1204\n", + "2 4823\n", + "3 3813\n", + "1 732\n", + "4 515\n", + "0 117\n", "dtype: int64\n", - "Time to execute model 'auto_ownership_simulate': 4.62s" + "Time to execute model 'auto_ownership_simulate': 1.16s\n", + "Total time to execute: 1.16s\n" + ] + } + ], + "prompt_number": 4 + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "sim.run(['mandatory_tour_frequency'])" + ], + "language": "python", + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "Running model 'mandatory_tour_frequency'\n", + "WARNING: Describe of columns with no variability:\n" ] }, { "output_type": "stream", "stream": "stdout", "text": [ + " count mean std \\\n", + "((ptype == 2) & nonstudent_to_school) * school1 100000 0 0 \n", + "((ptype == 2) & nonstudent_to_school) * work_and_school 100000 0 0 \n", + "((ptype == 4) & nonstudent_to_school) * school1 100000 0 0 \n", + "((ptype == 5) & nonstudent_to_school) * school1 100000 0 0 \n", + "(~(school_taz > -1)) * school1 100000 0 0 \n", + "(~(school_taz > -1)) * school2 100000 0 0 \n", + "(~(school_taz > -1)) * work_and_school 100000 0 0 \n", + "(~(workplace_taz > -1)) * work1 100000 0 0 \n", + "(~(workplace_taz > -1)) * work2 100000 0 0 \n", + "(~(workplace_taz > -1)) * work_and_school 100000 0 0 \n", "\n", - "Total time to execute: 4.62s\n" + " min 25% 50% 75% \\\n", + "((ptype == 2) & nonstudent_to_school) * school1 0 0 0 0 \n", + "((ptype == 2) & nonstudent_to_school) * work_and_school 0 0 0 0 \n", + "((ptype == 4) & nonstudent_to_school) * school1 0 0 0 0 \n", + "((ptype == 5) & nonstudent_to_school) * school1 0 0 0 0 \n", + "(~(school_taz > -1)) * school1 0 0 0 0 \n", + "(~(school_taz > -1)) * school2 0 0 0 0 \n", + "(~(school_taz > -1)) * work_and_school 0 0 0 0 \n", + "(~(workplace_taz > -1)) * work1 0 0 0 0 \n", + "(~(workplace_taz > -1)) * work2 0 0 0 0 \n", + "(~(workplace_taz > -1)) * work_and_school 0 0 0 0 \n", + "\n", + " max \n", + "((ptype == 2) & nonstudent_to_school) * school1 0 \n", + "((ptype == 2) & nonstudent_to_school) * work_and_school 0 \n", + "((ptype == 4) & nonstudent_to_school) * school1 0 \n", + "((ptype == 5) & nonstudent_to_school) * school1 0 \n", + "(~(school_taz > -1)) * school1 0 \n", + "(~(school_taz > -1)) * school2 0 \n", + "(~(school_taz > -1)) * work_and_school 0 \n", + "(~(workplace_taz > -1)) * work1 0 \n", + "(~(workplace_taz > -1)) * work2 0 \n", + "(~(workplace_taz > -1)) * work_and_school 0 \n", + "Choices:\n" + ] + }, + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "work_and_school 9513\n", + "work2 7568\n", + "work1 4421\n", + "school1 2836\n", + "school2 1157\n", + "dtype: int64\n", + "Time to execute model 'mandatory_tour_frequency': 5.56s\n", + "Total time to execute: 5.56s\n" ] } ], - "prompt_number": 3 + "prompt_number": 5 }, { "cell_type": "code", "collapsed": false, "input": [ - "sim.get_table(\"land_use\").to_frame().describe()" + "sim.get_table(\"land_use\").to_frame().info()" ], "language": "python", "metadata": {}, "outputs": [ { - "html": [ - "
\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
DISTRICTSDCOUNTYTOTHHHHPOPTOTPOPEMPRESSFDUMFDUHHINCQ1...hhldssftazgqpopemployment_densitytotal_acrescounty_iddensity_indexhousehold_densitytotal_householdstotal_employment
count 1454.000000 1454.000000 1454.000000 1454.000000 1454.000000 1454.000000 1454.000000 1454.000000 1454.000000 1454.000000... 1454.000000 1454.000000 1454.000000 1454.000000 1454.000000 1454.000000 1453.000000 1454.000000 1454.000000 1454.000000
mean 14.908528 14.908528 3.835626 1793.688446 4816.408528 4917.978680 2168.684319 1122.798487 670.889959 508.134801... 1793.688446 727.500000 101.570151 9.596395 3146.071457 3.835626 2.279554 6.008186 1793.688446 2247.736589
std 8.701078 8.701078 2.040153 961.021405 2686.029808 2690.352928 1211.109335 854.895353 717.261660 378.753528... 961.021405 419.877958 393.886676 45.067313 16945.908840 2.040153 3.945717 8.565908 961.021405 3538.356220
min 1.000000 1.000000 1.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000... 0.000000 1.000000 -1.000000 0.000000 13.000000 1.000000 0.000000 0.000000 0.000000 0.000000
25% 8.000000 8.000000 3.000000 1200.250000 3288.250000 3384.500000 1460.500000 602.000000 144.500000 257.000000... 1200.250000 364.250000 5.000000 0.877829 230.000000 3.000000 0.550232 1.910701 1200.250000 482.000000
50% 15.000000 15.000000 4.000000 1681.500000 4504.500000 4577.000000 2016.000000 1034.000000 460.000000 434.000000... 1681.500000 727.500000 18.000000 2.158701 397.000000 4.000000 1.289224 3.939122 1681.500000 1005.500000
75% 20.750000 20.750000 5.000000 2259.750000 6033.750000 6098.500000 2735.500000 1496.000000 907.750000 674.750000... 2259.750000 1090.750000 71.000000 5.492696 883.500000 5.000000 2.337577 6.693238 2259.750000 2215.750000
max 34.000000 34.000000 9.000000 12542.000000 39671.000000 40020.000000 16799.000000 12413.000000 4920.000000 3754.000000... 12542.000000 1454.000000 7810.000000 877.564767 372520.000000 9.000000 46.360371 90.891304 12542.000000 37950.000000
\n", - "

8 rows \u00d7 48 columns

\n", - "
" - ], - "metadata": {}, - "output_type": "pyout", - "prompt_number": 4, + "output_type": "stream", + "stream": "stdout", "text": [ - " DISTRICT SD COUNTY TOTHH HHPOP \\\n", - "count 1454.000000 1454.000000 1454.000000 1454.000000 1454.000000 \n", - "mean 14.908528 14.908528 3.835626 1793.688446 4816.408528 \n", - "std 8.701078 8.701078 2.040153 961.021405 2686.029808 \n", - "min 1.000000 1.000000 1.000000 0.000000 0.000000 \n", - "25% 8.000000 8.000000 3.000000 1200.250000 3288.250000 \n", - "50% 15.000000 15.000000 4.000000 1681.500000 4504.500000 \n", - "75% 20.750000 20.750000 5.000000 2259.750000 6033.750000 \n", - "max 34.000000 34.000000 9.000000 12542.000000 39671.000000 \n", - "\n", - " TOTPOP EMPRES SFDU MFDU HHINCQ1 \\\n", - "count 1454.000000 1454.000000 1454.000000 1454.000000 1454.000000 \n", - "mean 4917.978680 2168.684319 1122.798487 670.889959 508.134801 \n", - "std 2690.352928 1211.109335 854.895353 717.261660 378.753528 \n", - "min 0.000000 0.000000 0.000000 0.000000 0.000000 \n", - "25% 3384.500000 1460.500000 602.000000 144.500000 257.000000 \n", - "50% 4577.000000 2016.000000 1034.000000 460.000000 434.000000 \n", - "75% 6098.500000 2735.500000 1496.000000 907.750000 674.750000 \n", - "max 40020.000000 16799.000000 12413.000000 4920.000000 3754.000000 \n", - "\n", - " ... hhlds sftaz gqpop \\\n", - "count ... 1454.000000 1454.000000 1454.000000 \n", - "mean ... 1793.688446 727.500000 101.570151 \n", - "std ... 961.021405 419.877958 393.886676 \n", - "min ... 0.000000 1.000000 -1.000000 \n", - "25% ... 1200.250000 364.250000 5.000000 \n", - "50% ... 1681.500000 727.500000 18.000000 \n", - "75% ... 2259.750000 1090.750000 71.000000 \n", - "max ... 12542.000000 1454.000000 7810.000000 \n", - "\n", - " employment_density total_acres county_id density_index \\\n", - "count 1454.000000 1454.000000 1454.000000 1453.000000 \n", - "mean 9.596395 3146.071457 3.835626 2.279554 \n", - "std 45.067313 16945.908840 2.040153 3.945717 \n", - "min 0.000000 13.000000 1.000000 0.000000 \n", - "25% 0.877829 230.000000 3.000000 0.550232 \n", - "50% 2.158701 397.000000 4.000000 1.289224 \n", - "75% 5.492696 883.500000 5.000000 2.337577 \n", - "max 877.564767 372520.000000 9.000000 46.360371 \n", - "\n", - " household_density total_households total_employment \n", - "count 1454.000000 1454.000000 1454.000000 \n", - "mean 6.008186 1793.688446 2247.736589 \n", - "std 8.565908 961.021405 3538.356220 \n", - "min 0.000000 0.000000 0.000000 \n", - "25% 1.910701 1200.250000 482.000000 \n", - "50% 3.939122 1681.500000 1005.500000 \n", - "75% 6.693238 2259.750000 2215.750000 \n", - "max 90.891304 12542.000000 37950.000000 \n", - "\n", - "[8 rows x 48 columns]" + "\n", + "Int64Index: 1454 entries, 1 to 1454\n", + "Data columns (total 49 columns):\n", + "DISTRICT 1454 non-null int64\n", + "SD 1454 non-null int64\n", + "COUNTY 1454 non-null int64\n", + "TOTHH 1454 non-null int64\n", + "HHPOP 1454 non-null int64\n", + "TOTPOP 1454 non-null int64\n", + "EMPRES 1454 non-null int64\n", + "SFDU 1454 non-null int64\n", + "MFDU 1454 non-null int64\n", + "HHINCQ1 1454 non-null int64\n", + "HHINCQ2 1454 non-null int64\n", + "HHINCQ3 1454 non-null int64\n", + "HHINCQ4 1454 non-null int64\n", + "TOTACRE 1454 non-null float64\n", + "RESACRE 1454 non-null int64\n", + "CIACRE 1454 non-null int64\n", + "SHPOP62P 1454 non-null float64\n", + "TOTEMP 1454 non-null int64\n", + "AGE0004 1454 non-null int64\n", + "AGE0519 1454 non-null int64\n", + "AGE2044 1454 non-null int64\n", + "AGE4564 1454 non-null int64\n", + "AGE65P 1454 non-null int64\n", + "RETEMPN 1454 non-null int64\n", + "FPSEMPN 1454 non-null int64\n", + "HEREMPN 1454 non-null int64\n", + "OTHEMPN 1454 non-null int64\n", + "AGREMPN 1454 non-null int64\n", + "MWTEMPN 1454 non-null int64\n", + "PRKCST 1454 non-null float64\n", + "OPRKCST 1454 non-null float64\n", + "area_type 1454 non-null int64\n", + "HSENROLL 1454 non-null float64\n", + "COLLFTE 1454 non-null float64\n", + "COLLPTE 1454 non-null float64\n", + "TOPOLOGY 1454 non-null int64\n", + "TERMINAL 1454 non-null float64\n", + "ZERO 1454 non-null int64\n", + "hhlds 1454 non-null int64\n", + "sftaz 1454 non-null int64\n", + "gqpop 1454 non-null int64\n", + "employment_density 1454 non-null float64\n", + "total_acres 1454 non-null float64\n", + "county_id 1454 non-null int64\n", + "density_index 1453 non-null float64\n", + "county_name 1454 non-null object\n", + "household_density 1454 non-null float64\n", + "total_households 1454 non-null int64\n", + "total_employment 1454 non-null int64\n", + "dtypes: float64(12), int64(36), object(1)\n", + "memory usage: 568.0 KB\n" ] } ], - "prompt_number": 4 + "prompt_number": 6 }, { "cell_type": "code", "collapsed": false, "input": [ - "sim.get_table(\"households\").to_frame().describe()" + "sim.get_table(\"households\").to_frame().info()" ], "language": "python", "metadata": {}, "outputs": [ { - "html": [ - "
\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
TAZSERIALNOPUMA5incomePERSONSHHTUNITTYPENOCBLDGSZTENURE...bucketBinoriginalPUMAhmultiunitnum_young_adultsdriversnum_childrennum_adolescentsincome_in_thousandsnum_young_childrennum_college_age
count 100000.000000 100000.000000 100000.000000 100000.000000 100000.000000 100000.000000 100000.000000 100000.000000 100000.000000 100000.000000... 100000.000000 100000.000000 100000.000000 100000.000000 100000.000000 100000.000000 100000.000000 100000.000000 100000.000000 100000.000000
mean 752.439040 4924260.422350 2168.287950 77684.096720 2.580780 2.642540 0.077640 0.468280 3.529460 1.892780... 4.484880 2168.287950 0.401800 0.394180 2.063810 0.357290 0.060680 77.684097 0.159680 0.226650
std 430.258155 2863687.886756 516.271349 81341.474187 1.606362 2.066835 0.367387 0.913982 2.517375 1.010353... 2.871932 516.271349 0.490264 0.728922 1.122662 0.760368 0.260074 81.341474 0.462498 0.589748
min 1.000000 496.000000 1000.000000 -20000.000000 1.000000 0.000000 0.000000 0.000000 0.000000 0.000000... 0.000000 1000.000000 0.000000 0.000000 0.000000 0.000000 0.000000 -20.000000 0.000000 0.000000
25% 374.000000 2456274.500000 2104.000000 26500.000000 1.000000 1.000000 0.000000 0.000000 2.000000 1.000000... 2.000000 2104.000000 0.000000 0.000000 1.000000 0.000000 0.000000 26.500000 0.000000 0.000000
50% 764.000000 4895910.500000 2303.000000 58000.000000 2.000000 1.000000 0.000000 0.000000 2.000000 2.000000... 4.000000 2303.000000 0.000000 0.000000 2.000000 0.000000 0.000000 58.000000 0.000000 0.000000
75% 1144.000000 7357629.750000 2410.000000 100000.000000 4.000000 4.000000 0.000000 1.000000 5.000000 3.000000... 7.000000 2410.000000 1.000000 1.000000 2.000000 0.000000 0.000000 100.000000 0.000000 0.000000
max 1454.000000 9999811.000000 2714.000000 1237000.000000 25.000000 7.000000 2.000000 10.000000 10.000000 4.000000... 9.000000 2714.000000 1.000000 9.000000 25.000000 8.000000 5.000000 1237.000000 8.000000 24.000000
\n", - "

8 rows \u00d7 53 columns

\n", - "
" - ], - "metadata": {}, - "output_type": "pyout", - "prompt_number": 5, + "output_type": "stream", + "stream": "stdout", "text": [ - " TAZ SERIALNO PUMA5 income \\\n", - "count 100000.000000 100000.000000 100000.000000 100000.000000 \n", - "mean 752.439040 4924260.422350 2168.287950 77684.096720 \n", - "std 430.258155 2863687.886756 516.271349 81341.474187 \n", - "min 1.000000 496.000000 1000.000000 -20000.000000 \n", - "25% 374.000000 2456274.500000 2104.000000 26500.000000 \n", - "50% 764.000000 4895910.500000 2303.000000 58000.000000 \n", - "75% 1144.000000 7357629.750000 2410.000000 100000.000000 \n", - "max 1454.000000 9999811.000000 2714.000000 1237000.000000 \n", - "\n", - " PERSONS HHT UNITTYPE NOC \\\n", - "count 100000.000000 100000.000000 100000.000000 100000.000000 \n", - "mean 2.580780 2.642540 0.077640 0.468280 \n", - "std 1.606362 2.066835 0.367387 0.913982 \n", - "min 1.000000 0.000000 0.000000 0.000000 \n", - "25% 1.000000 1.000000 0.000000 0.000000 \n", - "50% 2.000000 1.000000 0.000000 0.000000 \n", - "75% 4.000000 4.000000 0.000000 1.000000 \n", - "max 25.000000 7.000000 2.000000 10.000000 \n", - "\n", - " BLDGSZ TENURE ... bucketBin \\\n", - "count 100000.000000 100000.000000 ... 100000.000000 \n", - "mean 3.529460 1.892780 ... 4.484880 \n", - "std 2.517375 1.010353 ... 2.871932 \n", - "min 0.000000 0.000000 ... 0.000000 \n", - "25% 2.000000 1.000000 ... 2.000000 \n", - "50% 2.000000 2.000000 ... 4.000000 \n", - "75% 5.000000 3.000000 ... 7.000000 \n", - "max 10.000000 4.000000 ... 9.000000 \n", - "\n", - " originalPUMA hmultiunit num_young_adults drivers \\\n", - "count 100000.000000 100000.000000 100000.000000 100000.000000 \n", - "mean 2168.287950 0.401800 0.394180 2.063810 \n", - "std 516.271349 0.490264 0.728922 1.122662 \n", - "min 1000.000000 0.000000 0.000000 0.000000 \n", - "25% 2104.000000 0.000000 0.000000 1.000000 \n", - "50% 2303.000000 0.000000 0.000000 2.000000 \n", - "75% 2410.000000 1.000000 1.000000 2.000000 \n", - "max 2714.000000 1.000000 9.000000 25.000000 \n", - "\n", - " num_children num_adolescents income_in_thousands \\\n", - "count 100000.000000 100000.000000 100000.000000 \n", - "mean 0.357290 0.060680 77.684097 \n", - "std 0.760368 0.260074 81.341474 \n", - "min 0.000000 0.000000 -20.000000 \n", - "25% 0.000000 0.000000 26.500000 \n", - "50% 0.000000 0.000000 58.000000 \n", - "75% 0.000000 0.000000 100.000000 \n", - "max 8.000000 5.000000 1237.000000 \n", - "\n", - " num_young_children num_college_age \n", - "count 100000.000000 100000.000000 \n", - "mean 0.159680 0.226650 \n", - "std 0.462498 0.589748 \n", - "min 0.000000 0.000000 \n", - "25% 0.000000 0.000000 \n", - "50% 0.000000 0.000000 \n", - "75% 0.000000 0.000000 \n", - "max 8.000000 24.000000 \n", - "\n", - "[8 rows x 53 columns]" + "\n", + "Int64Index: 10000 entries, 688607 to 2424588\n", + "Data columns (total 61 columns):\n", + "TAZ 10000 non-null int64\n", + "SERIALNO 10000 non-null int64\n", + "PUMA5 10000 non-null int64\n", + "income 10000 non-null int64\n", + "PERSONS 10000 non-null int64\n", + "HHT 10000 non-null int64\n", + "UNITTYPE 10000 non-null int64\n", + "NOC 10000 non-null int64\n", + "BLDGSZ 10000 non-null int64\n", + "TENURE 10000 non-null int64\n", + "VEHICL 10000 non-null int64\n", + "hinccat1 10000 non-null int64\n", + "hinccat2 10000 non-null int64\n", + "hhagecat 10000 non-null int64\n", + "hsizecat 10000 non-null int64\n", + "hfamily 10000 non-null int64\n", + "hunittype 10000 non-null int64\n", + "hNOCcat 10000 non-null int64\n", + "hwrkrcat 10000 non-null int64\n", + "h0004 10000 non-null int64\n", + "h0511 10000 non-null int64\n", + "h1215 10000 non-null int64\n", + "h1617 10000 non-null int64\n", + "h1824 10000 non-null int64\n", + "h2534 10000 non-null int64\n", + "h3549 10000 non-null int64\n", + "h5064 10000 non-null int64\n", + "h6579 10000 non-null int64\n", + "h80up 10000 non-null int64\n", + "workers 10000 non-null int64\n", + "hwork_f 10000 non-null int64\n", + "hwork_p 10000 non-null int64\n", + "huniv 10000 non-null int64\n", + "hnwork 10000 non-null int64\n", + "hretire 10000 non-null int64\n", + "hpresch 10000 non-null int64\n", + "hschpred 10000 non-null int64\n", + "hschdriv 10000 non-null int64\n", + "htypdwel 10000 non-null int64\n", + "hownrent 10000 non-null int64\n", + "hadnwst 10000 non-null int64\n", + "hadwpst 10000 non-null int64\n", + "hadkids 10000 non-null int64\n", + "bucketBin 10000 non-null int64\n", + "originalPUMA 10000 non-null int64\n", + "hmultiunit 10000 non-null int64\n", + "income_segment 10000 non-null category\n", + "non_workers 10000 non-null int64\n", + "family 10000 non-null bool\n", + "num_young_adults 10000 non-null float64\n", + "household_type 10000 non-null object\n", + "auto_ownership 10000 non-null int64\n", + "drivers 10000 non-null float64\n", + "num_under16_not_at_school 10000 non-null int64\n", + "home_taz 10000 non-null int64\n", + "num_children 10000 non-null float64\n", + "non_family 10000 non-null bool\n", + "num_adolescents 10000 non-null float64\n", + "income_in_thousands 10000 non-null float64\n", + "num_young_children 10000 non-null float64\n", + "num_college_age 10000 non-null float64\n", + "dtypes: bool(2), category(1), float64(7), int64(50), object(1)\n", + "memory usage: 4.5 MB\n" ] } ], - "prompt_number": 5 + "prompt_number": 7 + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "sim.get_table(\"persons\").to_frame().info()" + ], + "language": "python", + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "\n", + "Int64Index: 25495 entries, 29 to 7053105\n", + "Data columns (total 34 columns):\n", + "household_id 25495 non-null int64\n", + "age 25495 non-null int64\n", + "RELATE 25495 non-null int64\n", + "ESR 25495 non-null int64\n", + "GRADE 25495 non-null int64\n", + "PNUM 25495 non-null int64\n", + "PAUG 25495 non-null int64\n", + "DDP 25495 non-null int64\n", + "sex 25495 non-null int64\n", + "WEEKS 25495 non-null int64\n", + "HOURS 25495 non-null int64\n", + "MSP 25495 non-null int64\n", + "POVERTY 25495 non-null int64\n", + "EARNS 25495 non-null int64\n", + "pagecat 25495 non-null int64\n", + "pemploy 25495 non-null int64\n", + "pstudent 25495 non-null int64\n", + "ptype 25495 non-null int64\n", + "padkid 25495 non-null int64\n", + "school_taz 25495 non-null int64\n", + "mandatory_tour_frequency 25495 non-null object\n", + "employed_cat 25495 non-null object\n", + "student_is_employed 25495 non-null bool\n", + "roundtrip_auto_time_to_school 25495 non-null float64\n", + "cdap_activity 25495 non-null object\n", + "distance_to_school 25495 non-null float64\n", + "workplace_taz 25495 non-null int64\n", + "roundtrip_auto_time_to_work 25495 non-null float64\n", + "distance_to_work 25495 non-null float64\n", + "student_cat 25495 non-null object\n", + "ptype_cat 25495 non-null object\n", + "under16_not_at_school 25495 non-null bool\n", + "home_taz 25495 non-null int64\n", + "nonstudent_to_school 25495 non-null bool\n", + "dtypes: bool(3), float64(4), int64(22), object(5)\n", + "memory usage: 6.3 MB\n" + ] + } + ], + "prompt_number": 8 }, { "cell_type": "code", @@ -720,8 +418,7 @@ "input": [], "language": "python", "metadata": {}, - "outputs": [], - "prompt_number": 5 + "outputs": [] } ], "metadata": {}