diff --git a/activitysim/defaults/variables.py b/activitysim/defaults/variables.py index c50a58bba7..03b8af8b2f 100644 --- a/activitysim/defaults/variables.py +++ b/activitysim/defaults/variables.py @@ -20,6 +20,11 @@ def income_segment(households): labels=[1, 2, 3, 4]) +@sim.column("households") +def non_workers(households, persons): + return persons.household_id.value_counts() - households.workers + + @sim.column("households") def drivers(households, persons): # we assume that everyone 16 and older is a potential driver diff --git a/example/README.md b/example/README.md new file mode 100644 index 0000000000..5789e14b3d --- /dev/null +++ b/example/README.md @@ -0,0 +1,64 @@ +This is a list of items to double check before using in practice: + +* Make sure the units in things like distance_to_work match the walk thresholds + in the mandatory tour frequency spec. The original divided by 100. This is + true also of round trip auto to work and round trip auto to school. + +* There might be a few variables left off of some of the models. Look for +`head` in reading of the spec files as this is meant to eliminate some of the + rows. Also can look for `#` to comment out variables in the spec. + +* Go back to the 3 school location choices, and run the models for the +appropriate persons. + +* Probably needs code review of the variable definitions. How much of the +variable definitions are shared between regions and how much unique? Age +categories are shared? Income categories are unique? + + + + +A few overarching principles + +* A little discussion of "NOT so object oriented" - this is more like a +database - data is in standard tables, NOT in objects + +* The implications of this are that most of the core code is pandas and thus +the quality is controlled by the larger community. We are thankful that its +quality is very high. Specifically, there's not so much code in activitysim +"proper" + +* What it takes to add a new model + * define a new model + * define any new data sources necessary + * add any new assumptions in settings.yaml + * co-create the spec and any variables that are too complicated (or + reusable) for the spec + * run in notebook + +* Literally everything is really Python functions that compute something. +Case study of `num_under16_not_at_school` to show the inter-dependencies. + + + + +A few questions about "best practices" + +* What to put into the default data sources and variable specs and what to +put in the example / client-specific stuff? + +* Want to split up injectables from variables from tables or all one big file + so it's easier to search? + +* How much variable computation to put in excel versus Python + +* There were some hard coded limits in the original csv - (area_type < 4 and +distance_to_work < 3) - these are now just left in the csv spec. Why would +this be different than (income_in_thousands > 50)? I've made an effort to +not have such "magic numbers" in Python code. (Elizabeth: MAX_NUM_AUTOS +exists now) + +* Want to name or number the person types in the spec files? + +* Testing for client-specific code? It's harder because outputs are "data +dependent." It's easier to take a small dataset and make sure it always runs. \ No newline at end of file diff --git a/example/configs/auto_ownership.csv b/example/configs/auto_ownership.csv new file mode 100644 index 0000000000..cc1dae874a --- /dev/null +++ b/example/configs/auto_ownership.csv @@ -0,0 +1,30 @@ +Description,Expression,cars0,cars1,cars2,cars3,cars4 +2 Adults (age 16+),drivers==2,,0,3.0773,3.1962,2.6616 +3 Adults (age 16+),drivers==3,,0,3.5401,5.5131,5.208 +4+ Adults (age 16+),drivers>3,,2.0107,6.3662,8.5148,9.5807 +Persons age 16-17,num_adolescents,,0,-0.881,-1.7313,-1.7313 +Persons age 18-24,num_college_age,,-0.4087,-1.0095,-1.0107,-1.0107 +Persons age 35-34,num_young_adults,,0,-0.4849,-0.8596,-0.8596 +Presence of children age 0-4,num_young_children>0,,0.3669,0.7627,0.7627,0.7627 +Presence of children age 5-17,(num_children+num_adolescents)>0,,0.0158,0.2936,0.4769,0.4769 +"Number of workers, capped at 3",@df.workers.clip(upper=3),,0,0.2936,0.6389,0.8797 +"Piecewise Linear household income, $0-30k","@df.income_in_thousands.clip(0, 30)",,0.0383,0.054,0.0559,0.0619 +"Piecewise Linear household income, $30-75k","@(df.income_in_thousands-30).clip(0, 45)",,0,0.0083,0.011,0.0147 +"Piecewise Linear household income, $75k+, capped at $125k","@(df.income_in_thousands-75).clip(0, 50)",,0,0.0083,0.011,0.0147 +"Density index up to 10, if 0 workers","@(df.workers==0)*df.density_index.clip(0, 10)",,0,-0.2028,-0.3654,-0.3654 +"Density index in excess of 10, if 0 workers",@(df.workers==0)*(df.density_index-10).clip(0),,-0.0152,-0.1106,-0.1766,-0.1766 +"Density index up to 10, if 1+ workers","@(df.workers>0)*df.density_index.clip(0, 10)",,0,-0.2028,-0.3654,-0.3654 +"Density index in excess of 10, if 1+ workers",@(df.workers>0)*(df.density_index-10).clip(0),,-0.0152,-0.1106,-0.1766,-0.1766 +Constants,@1,,1.1865,-1.0846,-3.2502,-5.313 +San Francisco county,county_name == 'San Francisco',,0.4259,0.4683,0.1458,0.1458 +Solano county,county_name == 'Solano',,-0.566,-0.4429,-0.2372,-0.2372 +Napa county,county_name == 'Napa',,-0.566,-0.4429,-0.2372,-0.2372 +Sonoma county,county_name == 'Sonoma',,-0.566,-0.4429,-0.2372,-0.2372 +Marin county,county_name == 'Marin',,-0.2434,0,0,0 +"Retail accessibility (0.66*PK + 0.34*OP) by auto, if 0 workers",(workers==0)*(0.66*AUTOPEAKRETAIL+0.34*AUTOOFFPEAKRETAIL),,0.0626,0.0626,0.0626,0.0626 +"Retail accessibility (0.66*PK + 0.34*OP) by auto, if 1+ workers",(workers>0)*(0.66*AUTOPEAKRETAIL+0.34*AUTOOFFPEAKRETAIL),,0.1646,0.1646,0.1646,0.1646 +"Retail accessibility (0.66*PK + 0.34*OP) by transit, if 0 workers",(workers==0)*(0.66*TRANSITPEAKRETAIL+0.34*TRANSITOFFPEAKRETAIL),,-0.3053,-0.3053,-0.3053,-0.3053 +"Retail accessibility (0.66*PK + 0.34*OP) by transit, if 1+ workers",(workers>0)*(0.66*TRANSITPEAKRETAIL+0.34*TRANSITOFFPEAKRETAIL),,-0.5117,-0.5117,-0.5117,-0.5117 +"Retail accessibility by non-motorized, if 0 workers",(workers==0)*NONMOTORIZEDRETAIL,,-0.03,-0.03,-0.03,-0.03 +"Retail accessibility by non-motorized, if 1+ workers",(workers>0)*NONMOTORIZEDRETAIL,,-0.03,-0.03,-0.03,-0.03 +"Auto time savings per worker (over walk or transit, max 120) to work",workTourAutoTimeSavings/workers,,0.4707,0.6142,0.5705,0.7693 diff --git a/example/configs/auto_ownership_coeffs.csv b/example/configs/auto_ownership_coeffs.csv deleted file mode 100644 index 0fe176c110..0000000000 --- a/example/configs/auto_ownership_coeffs.csv +++ /dev/null @@ -1 +0,0 @@ -Description,Expression,cars0,cars1,cars2,cars3,cars4 2 Adults (age 16+),drivers==2,,0,3.0773,3.1962,2.6616 3 Adults (age 16+),drivers==3,,0,3.5401,5.5131,5.208 4+ Adults (age 16+),drivers>3,,2.0107,6.3662,8.5148,9.5807 Persons age 16-17,num_adolescents,,0,-0.881,-1.7313,-1.7313 Persons age 18-24,num_college_age,,-0.4087,-1.0095,-1.0107,-1.0107 Persons age 35-34,num_young_adults,,0,-0.4849,-0.8596,-0.8596 Presence of children age 0-4,num_young_children>0,,0.3669,0.7627,0.7627,0.7627 Presence of children age 5-17,(num_children+num_adolescents)>0,,0.0158,0.2936,0.4769,0.4769 "Number of workers, capped at 3",@df.workers.clip(upper=3),,0,0.2936,0.6389,0.8797 "Piecewise Linear household income, $0-30k","@df.income_in_thousands.clip(0, 30)",,0.0383,0.054,0.0559,0.0619 "Piecewise Linear household income, $30-75k","@(df.income_in_thousands-30).clip(0, 45)",,0,0.0083,0.011,0.0147 "Piecewise Linear household income, $75k+, capped at $125k","@(df.income_in_thousands-75).clip(0, 50)",,0,0.0083,0.011,0.0147 "Density index up to 10, if 0 workers","@(df.workers==0)*df.density_index.clip(0, 10)",,0,-0.2028,-0.3654,-0.3654 "Density index in excess of 10, if 0 workers",@(df.workers==0)*(df.density_index-10).clip(0),,-0.0152,-0.1106,-0.1766,-0.1766 "Density index up to 10, if 1+ workers","@(df.workers>0)*df.density_index.clip(0, 10)",,0,-0.2028,-0.3654,-0.3654 "Density index in excess of 10, if 1+ workers",@(df.workers>0)*(df.density_index-10).clip(0),,-0.0152,-0.1106,-0.1766,-0.1766 Constants,@1,,1.1865,-1.0846,-3.2502,-5.313 San Francisco county,county_name == 'San Francisco',,0.4259,0.4683,0.1458,0.1458 Solano county,county_name == 'Solano',,-0.566,-0.4429,-0.2372,-0.2372 Napa county,county_name == 'Napa',,-0.566,-0.4429,-0.2372,-0.2372 Sonoma county,county_name == 'Sonoma',,-0.566,-0.4429,-0.2372,-0.2372 Marin county,county_name == 'Marin',,-0.2434,0,0,0 "Retail accessibility (0.66*PK + 0.34*OP) by auto, if 0 workers",(workers==0)*(0.66*AUTOPEAKRETAIL+0.34*AUTOOFFPEAKRETAIL),,0.0626,0.0626,0.0626,0.0626 "Retail accessibility (0.66*PK + 0.34*OP) by auto, if 1+ workers",(workers>0)*(0.66*AUTOPEAKRETAIL+0.34*AUTOOFFPEAKRETAIL),,0.1646,0.1646,0.1646,0.1646 "Retail accessibility (0.66*PK + 0.34*OP) by transit, if 0 workers",(workers==0)*(0.66*TRANSITPEAKRETAIL+0.34*TRANSITOFFPEAKRETAIL),,-0.3053,-0.3053,-0.3053,-0.3053 "Retail accessibility (0.66*PK + 0.34*OP) by transit, if 1+ workers",(workers>0)*(0.66*TRANSITPEAKRETAIL+0.34*TRANSITOFFPEAKRETAIL),,-0.5117,-0.5117,-0.5117,-0.5117 "Retail accessibility by non-motorized, if 0 workers",(workers==0)*NONMOTORIZEDRETAIL,,-0.03,-0.03,-0.03,-0.03 "Retail accessibility by non-motorized, if 1+ workers",(workers>0)*NONMOTORIZEDRETAIL,,-0.03,-0.03,-0.03,-0.03 "Auto time savings per worker (over walk or transit, max 120) to work",workTourAutoTimeSavings/workers,,0.4707,0.6142,0.5705,0.7693 \ No newline at end of file diff --git a/example/configs/mandatory_tour_frequency.csv b/example/configs/mandatory_tour_frequency.csv new file mode 100644 index 0000000000..9c0679fcf5 --- /dev/null +++ b/example/configs/mandatory_tour_frequency.csv @@ -0,0 +1,100 @@ +Description,Expression,work1,work2,school1,school2,work_and_school +Full-time worker alternative-specific constants,ptype == 1,0,-3.3781,,, +Part-time worker alternative-specific constants,ptype == 2,0,-3.0476,,, +University student alternative-specific constants,ptype == 3,2.166,-1.3965,0,-3.7429,0.1073 +Non-working adult alternative-specific constants,ptype == 4,,,,, +Retired alternative-specific constants,ptype == 5,,,,, +Driving-age child alternative-specific constants,ptype == 6,,,0,-3.136,-4.4362 +Pre-driving age child who is in school alternative-specific constants,ptype == 7,,,0,-3.9703, +Female - Full-time worker interaction,(ptype == 1) & (sex == 2),0,-0.2255,0.1592,,-0.3442 +Female - Part-time worker interaction,(ptype == 2) & (sex == 2),0,-0.2255,0.1592,,-0.3442 +Female - University student interaction,(ptype == 3) & (sex == 2),0.1737,-0.2255,0.1592,0.114,-0.3442 +Female - Non-working adult interaction,(ptype == 4) & (sex == 2),0,-0.2255,0.1592,, +Female - Retired interaction,(ptype == 5) & (sex == 2),0,-0.2255,0.1592,, +Female - Driving-age child interaction,(ptype == 6) & (sex == 2),0.1737,,0,0.114,-0.3442 +Female - Pre-driving age child who is in school interaction,(ptype == 7) & (sex == 2),0.1737,,0,0.114, +Under 35 - Full-time worker interaction,(ptype == 1) & (age <= 35),0,-0.1375,0.7218,,0.9761 +Under 35 - Part-time worker interaction,(ptype == 2) & (age <= 35),0,-0.1375,0.7218,,0.9761 +Under 35 - University student interaction,(ptype == 3) & (age <= 35),-0.4629,-0.1375,0,1.275,0.9761 +Under 35 - Non-working adult interaction,(ptype == 4) & (age <= 35),0,-0.1375,0.7218,, +Can walk to work - Full-time worker interaction,(ptype == 1) & (distance_to_work < 3),,0.5268,,, +Can walk to work - Part-time worker interaction,(ptype == 2) & (distance_to_work < 3),,0.5268,,, +Can walk to work - University student interaction,(ptype == 3) & (distance_to_work < 3),,0.5268,,, +Can walk to work - Non-working adult interaction,(ptype == 4) & (distance_to_work < 3),,0.5268,,, +Can walk to work - Retired interaction,(ptype == 5) & (distance_to_work < 3),,0.5268,,, +Can walk to school - University student interaction,(ptype == 3) & (distance_to_school < 3),,,,0.7114, +Can walk to school - Driving-age child interaction,(ptype == 6) & (distance_to_school < 3),,,,0.7114, +Can walk to school - Pre-driving age child who is in school interaction,(ptype == 7) & (distance_to_school < 3),,,,0.7114, +Can walk to work or school - Full-time worker interaction,(ptype == 1) & (distance_to_work < 3 | distance_to_school < 3),,,,,0.1391 +Can walk to work or school - Part-time worker interaction,(ptype == 2) & (distance_to_work < 3 | distance_to_school < 3),,,,,0.1391 +Can walk to work or school - University student interaction,(ptype == 3) & (distance_to_work < 3 | distance_to_school < 3),,,,,0.1391 +Can walk to work or school - Driving-age child interaction,(ptype == 6) & (distance_to_work < 3 | distance_to_school < 3),,,,,0.1391 +Round trip auto time to work - Full-time worker interaction,(ptype == 1) * roundtrip_auto_time_to_work,,-0.0035,,,-0.0031 +Round trip auto time to work - Part-time worker interaction,(ptype == 2) * roundtrip_auto_time_to_work,,-0.0035,,,-0.0031 +Round trip auto time to work - University student interaction,(ptype == 3) * roundtrip_auto_time_to_work,,-0.0035,,,-0.0031 +Round trip auto time to work - Non-working adult interaction,(ptype == 4) * roundtrip_auto_time_to_work,,-0.0035,,, +Round trip auto time to work - Retired,(ptype == 5) * roundtrip_auto_time_to_work,,-0.0035,,, +Round trip auto time to school - University student interaction,(ptype == 3) * roundtrip_auto_time_to_school,,,,-0.0034,-0.0031 +Round trip auto time to school - Driving-age child interaction,(ptype == 6) * roundtrip_auto_time_to_school,,,,-0.0034,-0.0031 +Round trip auto time to school - Pre-driving age child who is in school interaction,(ptype == 7) * roundtrip_auto_time_to_school,,,,-0.0034, +Student is employed - University student interaction,(ptype == 3) & student_is_employed,3.014,3.014,,,3.014 +Student is employed - Driving-age child interaction,(ptype == 6) & student_is_employed,3.014,3.014,,,3.014 +Non-student goes to school - Full-time worker interaction,(ptype == 1) & nonstudent_to_school,,,3.883,,3.883 +Non-student goes to school - Part-time worker interaction,(ptype == 2) & nonstudent_to_school,,,3.883,,3.883 +Non-student goes to school - Non-working adult interaction,(ptype == 4) & nonstudent_to_school,,,3.883,, +Non-student goes to school - Retired interaction,(ptype == 5) & nonstudent_to_school,,,3.883,, +No cars in household - Full-time worker interaction,(ptype == 1) & (auto_ownership == 0),,-1.306,,,-1.302 +No cars in household - Part-time worker interaction,(ptype == 2) & (auto_ownership == 0),,-1.306,,,-1.302 +No cars in household - University student interaction,(ptype == 3) & (auto_ownership == 0),,-1.306,,-1.413,-1.302 +No cars in household - Non-working adult interaction,(ptype == 4) & (auto_ownership == 0),,-1.306,,, +No cars in household - Retired interaction,(ptype == 5) & (auto_ownership == 0),,-1.306,,, +No cars in household - Driving-age student interaction,(ptype == 6) & (auto_ownership == 0),,,,-1.413,-1.302 +No cars in household - Pre-driving age child who is in school interaction,(ptype == 7) & (auto_ownership == 0),,,,-1.413, +Fewer cars than drivers in household - University student interaction,(ptype == 3) & (auto_ownership < drivers),,,,-0.5759, +Fewer cars than drivers in household - Driving-age student interaction,(ptype == 6) & (auto_ownership < drivers),,,,-0.5759, +Fewer cars than drivers in household - Pre-driving age child who is in school interaction,(ptype == 7) & (auto_ownership < drivers),,,,-0.5759, +Number of preschool children in household - Full-time worker interaction,(ptype == 1) * (num_young_children),0,-0.1478,-0.1335,,-0.1251 +Number of preschool children in household - Part-time worker interaction,(ptype == 2) * (num_young_children),0,-0.1478,-0.1335,,-0.1251 +Number of preschool children in household - University student interaction,(ptype == 3) * (num_young_children),0.2191,-0.1478,0,-0.5577,-0.1251 +Number of preschool children in household - Non-working adult interaction,(ptype == 4) * (num_young_children),0,-0.1478,-0.1335,, +Number of preschool children in household - Retired interaction,(ptype == 5) * (num_young_children),0,-0.1478,-0.1335,, +Number of preschool children in household - Driving-age student interaction,(ptype == 6) * (num_young_children),0.2191,,0,-0.5577,-0.1251 +Number of preschool children in household - Pre-driving age child who is in school interaction,(ptype == 7) * (num_young_children),0.2191,,0,-0.5577, +Number of non-workers in the household - Full-time worker interaction,(ptype == 1) * non_workers,,,0.2574,, +Number of non-workers in the household - Part-time worker interaction,(ptype == 2) * non_workers,,,0.2574,, +Household income higher than $50k - Full-time worker interaction,(ptype == 1) & (income_in_thousands > 50),0,,0.0347,,0.0347 +Household income higher than $50k - Part-time worker interaction,(ptype == 2) & (income_in_thousands > 50),0,,0.0347,,0.0347 +Household income higher than $50k - University student interaction,(ptype == 3) & (income_in_thousands > 50),-0.0528,-0.0528,0,,-0.0528 +Household income higher than $50k - Non-working adult interaction,(ptype == 4) & (income_in_thousands > 50),0,,0.0347,, +Household income higher than $50k - Retired interaction,(ptype == 5) & (income_in_thousands > 50),0,,0.0347,, +Household income higher than $50k - Driving-age student interaction,(ptype == 6) & (income_in_thousands > 50),-0.0528,,0,,-0.0528 +Household income higher than $50k - Pre-driving age child who is in school interaction,(ptype == 7) & (income_in_thousands > 50),-0.0528,,0,, +Non-family household - Full-time worker interaction,(ptype == 1) & non_family,0,,-0.25,,-0.25 +Non-family household - Part-time worker interaction,(ptype == 2) & non_family,0,,-0.25,,-0.25 +Non-family household - University student interaction,(ptype == 3) & non_family,-0.1792,-0.1792,0,,-0.1792 +Non-family household - Non-working adult interaction,(ptype == 4) & non_family,0,,-0.25,, +Non-family household - Retired interaction,(ptype == 5) & non_family,0,,-0.25,, +Non-family household - Driving-age student interaction,(ptype == 6) & non_family,-0.1792,,0,,-0.1792 +Non-family household - Pre-driving age child who is in school interaction,(ptype == 7) & non_family,-0.1792,,0,, +Number of children under 16 not at school - Full-time worker interaction,(ptype == 1) * num_under16_not_at_school,,0.1804,,,-0.1955 +Number of children under 16 not at school - Part-time worker interaction,(ptype == 2) * num_under16_not_at_school,,0.1804,,,-0.1955 +Number of children under 16 not at school - University student interaction,(ptype == 3) * num_under16_not_at_school,,0.1804,,0.0866,-0.1955 +Number of children under 16 not at school - Non-working adult interaction,(ptype == 4) * num_under16_not_at_school,,0.1804,,, +Number of children under 16 not at school - Retired,(ptype == 5) * num_under16_not_at_school,,0.1804,,, +Number of children under 16 not at school - Driving-age student interaction,(ptype == 6) * num_under16_not_at_school,,,,0.0866,-0.1955 +Number of children under 16 not at school - Pre-driving age child who is in school interaction,(ptype == 7) * num_under16_not_at_school,,,,0.0866, +Home is in urban area - Full-time worker interaction,(ptype == 1) & (area_type < 4),0,0.2308,-0.1361,,-0.3509 +Home is in urban area - Part-time worker interaction,(ptype == 2) & (area_type < 4),0,0.2308,-0.1361,,-0.3509 +Home is in urban area - University student interaction,(ptype == 3) & (area_type < 4),-0.2831,0.2308,0,0.317,-0.3509 +Home is in urban area - Non-working adult interaction,(ptype == 4) & (area_type < 4),0,0.238,-0.1361,, +Home is in urban area - Retired interaction,(ptype == 5) & (area_type < 4),0,0.2308,-0.1361,, +Home is in urban area - Driving-age student interaction,(ptype == 6) & (area_type < 4),-0.2831,,0,0.317,-0.3509 +Home is in urban area - Pre-driving age child who is in school interaction,(ptype == 7) & (area_type < 4),-0.2831,,0,0.317, +Unavailable: Full-time worker,ptype == 1,,,,-999, +Unavailable: Part-time worker,ptype == 2,,,,-999, +Unavailable: Non-working adult,ptype == 4,,,,-999,-999 +Unavailable: Retired,ptype == 5,,,,-999,-999 +Unavailable: Driving-age child,ptype == 6,-999,-999,,, +Unavailable: Pre-driving age child who is in school,ptype == 7,,-999,,,-999 +Unavailable: Work tours for those with no usual work location,~(workplace_taz > -1),-999,-999,,,-999 +Unavailalbe: School tours for those with no usual school location,~(school_taz > -1),,,-999,-999,-999 diff --git a/example/configs/settings.yaml b/example/configs/settings.yaml index 05cf2cdb08..3f1140ab14 100644 --- a/example/configs/settings.yaml +++ b/example/configs/settings.yaml @@ -1,6 +1,6 @@ store: mtc_asim.h5 -households_sample_size: 100000 +households_sample_size: 10000 county_map: San Francisco: 1 @@ -12,3 +12,34 @@ county_map: Napa: 7 Sonoma: 8 Marin: 9 + +employment_map: + 1: "full" + 2: "part" + 3: "not" + 4: "child" + +student_map: + 1: "high" + 2: "college" + 3: "not" + +person_type_map: + 1: "full" + 2: "part" + 3: "university" + 4: "nonwork" + 5: "retired" + 6: "driving" + 7: "school" + 8: "preschool" + +household_type_map: + 0: "null" + 1: "family_married" + 2: "family_male" + 3: "family_female" + 4: "nonfamily_male_alone" + 5: "nonfamily_male_notalone" + 6: "nonfamily_female_alone" + 7: "nonfamily_female_notalone" \ No newline at end of file diff --git a/example/configs/workplace_location.csv b/example/configs/workplace_location.csv index af7b3ce918..0adf1b611d 100644 --- a/example/configs/workplace_location.csv +++ b/example/configs/workplace_location.csv @@ -1 +1,17 @@ -Description,Expression,Alt "Distance, piecewise linear from 0 to 1 miles",@df.distance.clip(1),-0.8428 "Distance, piecewise linear from 1 to 2 miles","@(df.distance-1).clip(0,1)",-0.3104 "Distance, piecewise linear from 2 to 5 miles","@(df.distance-2).clip(0,3)",-0.3783 "Distance, piecewise linear from 5 to 15 miles","@(df.distance-5).clip(0,10)",-0.1285 "Distance, piecewise linear for 15+ miles",@(df.distance-15.0).clip(0),-0.0917 "Distance 0 to 5 mi, high and very high income",@(df.income_segment>=3)*df.distance.clip(upper=5),0.15 "Distance 5+ mi, high and very high income",@(df.income_segment>=3)*(df.distance-5).clip(0),0.02 "Size variable full-time worker, low income",@(df.income_segment==1)*df.size_low,1 "Size variable full-time worker, medium income",@(df.income_segment==2)*df.size_med,1 "Size variable full-time worker, high income",@(df.income_segment==3)*df.size_high,1 "Size variable full-time worker, very high income",@(df.income_segment==4)*df.size_veryhigh,1 "No attractions full-time worker, low income",@(df.income_segment==1)&(df.size_low==0),-999 "No attractions full-time worker, medium income",@(df.income_segment==2)&(df.size_med==0),-999 "No attractions full-time worker, high income",@(df.income_segment==3)&(df.size_high==0),-999 "No attractions full-time worker, very high income",@(df.income_segment==4)&(df.size_veryhigh==0),-999 Mode choice logsum,mcLogsum,0.3 \ No newline at end of file +Description,Expression,Alt +"Distance, piecewise linear from 0 to 1 miles",@df.distance.clip(1),-0.8428 +"Distance, piecewise linear from 1 to 2 miles","@(df.distance-1).clip(0,1)",-0.3104 +"Distance, piecewise linear from 2 to 5 miles","@(df.distance-2).clip(0,3)",-0.3783 +"Distance, piecewise linear from 5 to 15 miles","@(df.distance-5).clip(0,10)",-0.1285 +"Distance, piecewise linear for 15+ miles",@(df.distance-15.0).clip(0),-0.0917 +"Distance 0 to 5 mi, high and very high income",@(df.income_segment>=3)*df.distance.clip(upper=5),0.15 +"Distance 5+ mi, high and very high income",@(df.income_segment>=3)*(df.distance-5).clip(0),0.02 +"Size variable full-time worker, low income",@(df.income_segment==1)*df.size_low,1 +"Size variable full-time worker, medium income",@(df.income_segment==2)*df.size_med,1 +"Size variable full-time worker, high income",@(df.income_segment==3)*df.size_high,1 +"Size variable full-time worker, very high income",@(df.income_segment==4)*df.size_veryhigh,1 +"No attractions full-time worker, low income",@(df.income_segment==1)&(df.size_low==0),-999 +"No attractions full-time worker, medium income",@(df.income_segment==2)&(df.size_med==0),-999 +"No attractions full-time worker, high income",@(df.income_segment==3)&(df.size_high==0),-999 +"No attractions full-time worker, very high income",@(df.income_segment==4)&(df.size_veryhigh==0),-999 +Mode choice logsum,#mcLogsum,0.3 diff --git a/example/configs/workplace_location_size_terms.csv b/example/configs/workplace_location_size_terms.csv index f9b7da433a..6ef694785a 100644 --- a/example/configs/workplace_location_size_terms.csv +++ b/example/configs/workplace_location_size_terms.csv @@ -1 +1,16 @@ -purpose,segment,TOTHH,RETEMPN,FPSEMPN,HEREMPN,OTHEMPN,AGREMPN,MWTEMPN,AGE0519,HSENROLL,COLLFTE,COLLPTE work,low,0,0.129,0.193,0.383,0.12,0.01,0.164,0,0,0,0 work,med,0,0.12,0.197,0.325,0.139,0.008,0.21,0,0,0,0 work,high,0,0.11,0.207,0.284,0.154,0.006,0.239,0,0,0,0 work,veryhigh,0,0.093,0.27,0.241,0.146,0.004,0.246,0,0,0,0 university,university,0,0,0,0,0,0,0,0,0,0.592,0.408 school,grade,0,0,0,0,0,0,0,1,0,0,0 school,high,0,0,0,0,0,0,0,0,1,0,0 escort,kids,0,0.225,0,0.144,0,0,0,0.465,0.166,0,0 escort,no kids,0,0.225,0,0.144,0,0,0,0.465,0.166,0,0 shopping,shopping,0,1,0,0,0,0,0,0,0,0,0 eatOut,eatOut,0,0.742,0,0.258,0,0,0,0,0,0,0 othMaint,othMaint,0,0.482,0,0.518,0,0,0,0,0,0,0 social,social,0,0.522,0,0.478,0,0,0,0,0,0,0 othDiscr,othDiscr,0.252,0.212,0,0.272,0.165,0,0,0,0.098,0,0 atwork,atwork,0,0.742,0,0.258,0,0,0,0,0,0,0 \ No newline at end of file +purpose,segment,TOTHH,RETEMPN,FPSEMPN,HEREMPN,OTHEMPN,AGREMPN,MWTEMPN,AGE0519,HSENROLL,COLLFTE,COLLPTE +work,low,0,0.129,0.193,0.383,0.12,0.01,0.164,0,0,0,0 +work,med,0,0.12,0.197,0.325,0.139,0.008,0.21,0,0,0,0 +work,high,0,0.11,0.207,0.284,0.154,0.006,0.239,0,0,0,0 +work,veryhigh,0,0.093,0.27,0.241,0.146,0.004,0.246,0,0,0,0 +university,university,0,0,0,0,0,0,0,0,0,0.592,0.408 +school,grade,0,0,0,0,0,0,0,1,0,0,0 +school,high,0,0,0,0,0,0,0,0,1,0,0 +escort,kids,0,0.225,0,0.144,0,0,0,0.465,0.166,0,0 +escort,no kids,0,0.225,0,0.144,0,0,0,0.465,0.166,0,0 +shopping,shopping,0,1,0,0,0,0,0,0,0,0,0 +eatOut,eatOut,0,0.742,0,0.258,0,0,0,0,0,0,0 +othMaint,othMaint,0,0.482,0,0.518,0,0,0,0,0,0,0 +social,social,0,0.522,0,0.478,0,0,0,0,0,0,0 +othDiscr,othDiscr,0.252,0.212,0,0.272,0.165,0,0,0,0.098,0,0 +atwork,atwork,0,0.742,0,0.258,0,0,0,0,0,0,0 diff --git a/example/models.py b/example/models.py index 941d9973c9..a5e5730943 100644 --- a/example/models.py +++ b/example/models.py @@ -1,4 +1,5 @@ import urbansim.sim.simulation as sim +import urbansim.utils.misc as usim_misc import os from activitysim import activitysim as asim import openmatrix as omx @@ -7,45 +8,102 @@ import pandas as pd +# this is the max number of cars allowable in the auto ownership model +MAX_NUM_CARS = 5 + + +""" +This part of this file is currently creating small tables to serve as +alternatives in the various models +""" + + @sim.table() def auto_alts(): - return asim.identity_matrix(["cars%d" % i for i in range(5)]) + return asim.identity_matrix(["cars%d" % i for i in range(MAX_NUM_CARS)]) + +@sim.table() +def mandatory_tour_frequency_alts(): + return asim.identity_matrix(["work1", "work2", "school1", "school2", + "work_and_school"]) +# these are the alternatives for the workplace choice @sim.table() def zones(): # I grant this is a weird idiom but it helps to name the index return pd.DataFrame({"TAZ": np.arange(1454)+1}).set_index("TAZ") +""" +Read in the omx files and create the skim objects +""" + + @sim.injectable() def nonmotskm_omx(): return omx.openFile('data/nonmotskm.omx') @sim.injectable() -def distance_matrix(nonmotskm_omx): +def distance_skim(nonmotskm_omx): + return skim.Skim(nonmotskm_omx['DIST'], offset=-1) + + +@sim.injectable() +def sovam_skim(nonmotskm_omx): + # FIXME use the right omx file + return skim.Skim(nonmotskm_omx['DIST'], offset=-1) + + +@sim.injectable() +def sovmd_skim(nonmotskm_omx): + # FIXME use the right omx file return skim.Skim(nonmotskm_omx['DIST'], offset=-1) +@sim.injectable() +def sovpm_skim(nonmotskm_omx): + # FIXME use the right omx file + return skim.Skim(nonmotskm_omx['DIST'], offset=-1) + + +""" +Read in the spec files and reformat as necessary +""" + + @sim.injectable() def auto_ownership_spec(): - f = os.path.join('configs', "auto_ownership_coeffs.csv") + f = os.path.join('configs', "auto_ownership.csv") + # FIXME should read in all variables and comment out ones not used return asim.read_model_spec(f).head(4*26) @sim.injectable() def workplace_location_spec(): f = os.path.join('configs', "workplace_location.csv") + # FIXME should read in all variables and comment out ones not used return asim.read_model_spec(f).head(15) +@sim.injectable() +def mandatory_tour_frequency_spec(): + f = os.path.join('configs', "mandatory_tour_frequency.csv") + return asim.read_model_spec(f) + + @sim.table() def workplace_size_spec(): f = os.path.join('configs', 'workplace_location_size_terms.csv') return pd.read_csv(f) +""" +This is a special submodel for the workplace location choice +""" + + @sim.table() def workplace_size_terms(land_use, workplace_size_spec): """ @@ -70,6 +128,12 @@ def workplace_size_terms(land_use, workplace_size_spec): return new_df +""" +Auto ownership is a standard model which predicts how many cars a household +with given characteristics owns +""" + + @sim.model() def auto_ownership_simulate(households, auto_alts, @@ -86,25 +150,35 @@ def auto_ownership_simulate(households, asim.simple_simulate(choosers, alternatives, auto_ownership_spec, mult_by_alt_col=True) + # map these back to integers + choices = choices.map(dict([("cars%d"%i, i) for i in range(MAX_NUM_CARS)])) + print "Choices:\n", choices.value_counts() sim.add_column("households", "auto_ownership", choices) return model_design +""" +The workplace location model predicts the zones in which various people will +work. Interestingly there's not really any supply side to this model - we +assume there are workplaces for the people to work. +""" + + @sim.model() def workplace_location_simulate(persons, households, zones, workplace_location_spec, - distance_matrix, + distance_skim, workplace_size_terms): choosers = sim.merge_tables(persons.name, tables=[persons, households]) alternatives = zones.to_frame().join(workplace_size_terms.to_frame()) skims = { - "distance": distance_matrix + "distance": distance_skim } choices, model_design = \ @@ -116,12 +190,50 @@ def workplace_location_simulate(persons, mult_by_alt_col=False, sample_size=50) - print "Describe of hoices:\n", choices.describe() + print "Describe of choices:\n", choices.describe() sim.add_column("persons", "workplace_taz", choices) return model_design +""" +This model predicts the frequency of making mandatory trips (see the +alternatives above) - these trips include work and school in some combination. +""" + + +@sim.model() +def mandatory_tour_frequency(persons, + households, + land_use, + mandatory_tour_frequency_alts, + mandatory_tour_frequency_spec): + + choosers = sim.merge_tables(persons.name, tables=[persons, + households, + land_use]) + + choices, model_design = \ + asim.simple_simulate(choosers, + mandatory_tour_frequency_alts.to_frame(), + mandatory_tour_frequency_spec, + mult_by_alt_col=True) + + print "Choices:\n", choices.value_counts() + sim.add_column("persons", "mandatory_tour_frequency", choices) + + return model_design + + +""" +This section contains computed columns on each table. +""" + +""" +for the land use table +""" + + @sim.column("land_use") def total_households(land_use): return land_use.local.TOTHH @@ -139,4 +251,147 @@ def total_acres(land_use): @sim.column("land_use") def county_id(land_use): - return land_use.local.COUNTY \ No newline at end of file + return land_use.local.COUNTY + + +""" +for households +""" + +# just a rename / alias +@sim.column("households") +def home_taz(households): + return households.TAZ + + +# map household type ids to strings +@sim.column("households") +def household_type(households, settings): + return households.HHT.map(settings["household_type_map"]) + + +@sim.column("households") +def non_family(households): + return households.household_type.isin(["nonfamily_male_alone", + "nonfamily_male_notalone", + "nonfamily_female_alone", + "nonfamily_female_notalone"]) + + +# can't just invert these unfortunately because there's a null household type +@sim.column("households") +def family(households): + return households.household_type.isin(["family_married", + "family_male", + "family_female"]) + + +@sim.column("households") +def num_under16_not_at_school(persons, households): + return persons.under16_not_at_school.groupby(persons.household_id).size().\ + reindex(households.index).fillna(0) + + +""" +for the persons table +""" +# FIXME - this is my "placeholder" for the CDAP model ;) +@sim.column("persons") +def cdap_activity(persons): + return pd.Series(np.random.randint(3, size=len(persons)), + index=persons.index).map({0: 'M', 1: 'N', 2: 'H'}) + + +# convert employment categories to string descriptors +@sim.column("persons") +def employed_cat(persons, settings): + return persons.pemploy.map(settings["employment_map"]) + + +# convert student categories to string descriptors +@sim.column("persons") +def student_cat(persons, settings): + return persons.pstudent.map(settings["student_map"]) + + +# convert person type categories to string descriptors +@sim.column("persons") +def ptype_cat(persons, settings): + return persons.ptype.map(settings["person_type_map"]) + + +# borrowing these definitions from the original code +@sim.column("persons") +def student_is_employed(persons): + return (persons.ptype_cat.isin(['university', 'driving']) & + persons.employed_cat.isin(['full', 'part'])) + + +@sim.column("persons") +def nonstudent_to_school(persons): + return (persons.ptype_cat.isin(['full', 'part', 'nonwork', 'retired']) & + persons.student_cat.isin(['high', 'college'])) + + +@sim.column("persons") +def under16_not_at_school(persons): + return (persons.ptype_cat.isin(["school", "preschool"]) & + persons.cdap_activity.isin(["N", "H"])) + + +@sim.column("persons") +def workplace_taz(persons): + # FIXME this is really because we ask for ALL columns in the persons data + # FIXME frame - urbansim actually only asks for the columns that are used by + # FIXME the model specs in play at that time + return pd.Series(1, persons.index) + + +@sim.column("persons") +def home_taz(households, persons): + return usim_misc.reindex(households.home_taz, + persons.household_id) + + +@sim.column("persons") +def school_taz(persons): + # FIXME need to fix this after getting school lcm working + return persons.workplace_taz + + +# this use the distance skims to compute the raw distance to work from home +@sim.column("persons") +def distance_to_work(persons, distance_skim): + return pd.Series(distance_skim.get(persons.home_taz, + persons.workplace_taz), + index=persons.index) + + +# same deal but to school +@sim.column("persons") +def distance_to_school(persons, distance_skim): + return pd.Series(distance_skim.get(persons.home_taz, + persons.school_taz), + index=persons.index) + + +# similar but this adds the am peak travel time to the pm peak travel time in +# the opposite direction (by car) +@sim.column("persons") +def roundtrip_auto_time_to_work(persons, sovam_skim, sovpm_skim): + return pd.Series(sovam_skim.get(persons.home_taz, + persons.workplace_taz) + + sovpm_skim.get(persons.workplace_taz, + persons.home_taz), + index=persons.index) + + +# this adds the am peak travel time to the md peak travel time in +# the opposite direction (by car), assuming students leave school earlier +@sim.column("persons") +def roundtrip_auto_time_to_school(persons, sovam_skim, sovmd_skim): + return pd.Series(sovam_skim.get(persons.home_taz, + persons.school_taz) + + sovmd_skim.get(persons.school_taz, + persons.home_taz), + index=persons.index) \ No newline at end of file diff --git a/notebooks/data_mover.ipynb b/notebooks/data_mover.ipynb index 1c065abf11..e6329cec65 100644 --- a/notebooks/data_mover.ipynb +++ b/notebooks/data_mover.ipynb @@ -1,7 +1,7 @@ { "metadata": { "name": "", - "signature": "sha256:afbc3e7040dd9e4a5b21433063f13a6a8abfcc04bcfc6574e7e43376c257cd33" + "signature": "sha256:07f23263339f1751ee4eb702d126692b9ed2785fe8640a8906eb0fb110d9e67a" }, "nbformat": 3, "nbformat_minor": 0, @@ -43,8 +43,10 @@ "col_map = {\n", " \"HHID\": \"household_id\",\n", " \"AGE\": \"age\",\n", + " \"SEX\": \"sex\",\n", " \"hworkers\": \"workers\",\n", - " \"HINC\": \"income\"\n", + " \"HINC\": \"income\",\n", + " \"AREATYPE\": \"area_type\"\n", "}" ], "language": "python", diff --git a/notebooks/simulation.ipynb b/notebooks/simulation.ipynb index 1643e621dd..406a524541 100644 --- a/notebooks/simulation.ipynb +++ b/notebooks/simulation.ipynb @@ -1,7 +1,7 @@ { "metadata": { "name": "", - "signature": "sha256:68dd59fb87c331bcb79d97a97557ee2ee411309bd3a85e58d6bee4b169b221df" + "signature": "sha256:a5d923d6fa9383fade7f8566bb3508f52ae45b517d7ec84fd71c1983d3d8598a" }, "nbformat": 3, "nbformat_minor": 0, @@ -39,35 +39,55 @@ "stream": "stdout", "text": [ "Running model 'workplace_location_simulate'\n", - "Describe of hoices:\n" + "Describe of choices:\n" ] }, { "output_type": "stream", "stream": "stdout", "text": [ - "count 258078.000000\n", - "mean 718.510997\n", - "std 422.975764\n", - "min 1.000000\n", - "25% 352.000000\n", - "50% 719.000000\n", - "75% 1083.000000\n", - "max 1454.000000\n", + "count 25495.000000\n", + "mean 718.590704\n", + "std 422.297505\n", + "min 1.000000\n", + "25% 352.000000\n", + "50% 716.000000\n", + "75% 1086.000000\n", + "max 1454.000000\n", "Name: TAZ, dtype: float64\n", - "Time to execute model 'workplace_location_simulate': 35.33s" + "Time to execute model 'workplace_location_simulate': 8.91s\n", + "Total time to execute: 8.91s\n" ] - }, + } + ], + "prompt_number": 2 + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "print sim.get_table(\"persons\").distance_to_work.describe()" + ], + "language": "python", + "metadata": {}, + "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ - "\n", - "Total time to execute: 35.34s\n" + "count 25495.000000\n", + "mean 39.004909\n", + "std 24.414926\n", + "min 0.150000\n", + "25% 21.000000\n", + "50% 36.100000\n", + "75% 52.360000\n", + "max 157.170000\n", + "dtype: float64\n" ] } ], - "prompt_number": 2 + "prompt_number": 3 }, { "cell_type": "code", @@ -90,629 +110,307 @@ "output_type": "stream", "stream": "stdout", "text": [ - "cars2 47959\n", - "cars3 38186\n", - "cars1 7528\n", - "cars4 5123\n", - "cars0 1204\n", + "2 4823\n", + "3 3813\n", + "1 732\n", + "4 515\n", + "0 117\n", "dtype: int64\n", - "Time to execute model 'auto_ownership_simulate': 4.62s" + "Time to execute model 'auto_ownership_simulate': 1.16s\n", + "Total time to execute: 1.16s\n" + ] + } + ], + "prompt_number": 4 + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "sim.run(['mandatory_tour_frequency'])" + ], + "language": "python", + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "Running model 'mandatory_tour_frequency'\n", + "WARNING: Describe of columns with no variability:\n" ] }, { "output_type": "stream", "stream": "stdout", "text": [ + " count mean std \\\n", + "((ptype == 2) & nonstudent_to_school) * school1 100000 0 0 \n", + "((ptype == 2) & nonstudent_to_school) * work_and_school 100000 0 0 \n", + "((ptype == 4) & nonstudent_to_school) * school1 100000 0 0 \n", + "((ptype == 5) & nonstudent_to_school) * school1 100000 0 0 \n", + "(~(school_taz > -1)) * school1 100000 0 0 \n", + "(~(school_taz > -1)) * school2 100000 0 0 \n", + "(~(school_taz > -1)) * work_and_school 100000 0 0 \n", + "(~(workplace_taz > -1)) * work1 100000 0 0 \n", + "(~(workplace_taz > -1)) * work2 100000 0 0 \n", + "(~(workplace_taz > -1)) * work_and_school 100000 0 0 \n", "\n", - "Total time to execute: 4.62s\n" + " min 25% 50% 75% \\\n", + "((ptype == 2) & nonstudent_to_school) * school1 0 0 0 0 \n", + "((ptype == 2) & nonstudent_to_school) * work_and_school 0 0 0 0 \n", + "((ptype == 4) & nonstudent_to_school) * school1 0 0 0 0 \n", + "((ptype == 5) & nonstudent_to_school) * school1 0 0 0 0 \n", + "(~(school_taz > -1)) * school1 0 0 0 0 \n", + "(~(school_taz > -1)) * school2 0 0 0 0 \n", + "(~(school_taz > -1)) * work_and_school 0 0 0 0 \n", + "(~(workplace_taz > -1)) * work1 0 0 0 0 \n", + "(~(workplace_taz > -1)) * work2 0 0 0 0 \n", + "(~(workplace_taz > -1)) * work_and_school 0 0 0 0 \n", + "\n", + " max \n", + "((ptype == 2) & nonstudent_to_school) * school1 0 \n", + "((ptype == 2) & nonstudent_to_school) * work_and_school 0 \n", + "((ptype == 4) & nonstudent_to_school) * school1 0 \n", + "((ptype == 5) & nonstudent_to_school) * school1 0 \n", + "(~(school_taz > -1)) * school1 0 \n", + "(~(school_taz > -1)) * school2 0 \n", + "(~(school_taz > -1)) * work_and_school 0 \n", + "(~(workplace_taz > -1)) * work1 0 \n", + "(~(workplace_taz > -1)) * work2 0 \n", + "(~(workplace_taz > -1)) * work_and_school 0 \n", + "Choices:\n" + ] + }, + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "work_and_school 9513\n", + "work2 7568\n", + "work1 4421\n", + "school1 2836\n", + "school2 1157\n", + "dtype: int64\n", + "Time to execute model 'mandatory_tour_frequency': 5.56s\n", + "Total time to execute: 5.56s\n" ] } ], - "prompt_number": 3 + "prompt_number": 5 }, { "cell_type": "code", "collapsed": false, "input": [ - "sim.get_table(\"land_use\").to_frame().describe()" + "sim.get_table(\"land_use\").to_frame().info()" ], "language": "python", "metadata": {}, "outputs": [ { - "html": [ - "
| \n", - " | DISTRICT | \n", - "SD | \n", - "COUNTY | \n", - "TOTHH | \n", - "HHPOP | \n", - "TOTPOP | \n", - "EMPRES | \n", - "SFDU | \n", - "MFDU | \n", - "HHINCQ1 | \n", - "... | \n", - "hhlds | \n", - "sftaz | \n", - "gqpop | \n", - "employment_density | \n", - "total_acres | \n", - "county_id | \n", - "density_index | \n", - "household_density | \n", - "total_households | \n", - "total_employment | \n", - "
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| count | \n", - "1454.000000 | \n", - "1454.000000 | \n", - "1454.000000 | \n", - "1454.000000 | \n", - "1454.000000 | \n", - "1454.000000 | \n", - "1454.000000 | \n", - "1454.000000 | \n", - "1454.000000 | \n", - "1454.000000 | \n", - "... | \n", - "1454.000000 | \n", - "1454.000000 | \n", - "1454.000000 | \n", - "1454.000000 | \n", - "1454.000000 | \n", - "1454.000000 | \n", - "1453.000000 | \n", - "1454.000000 | \n", - "1454.000000 | \n", - "1454.000000 | \n", - "
| mean | \n", - "14.908528 | \n", - "14.908528 | \n", - "3.835626 | \n", - "1793.688446 | \n", - "4816.408528 | \n", - "4917.978680 | \n", - "2168.684319 | \n", - "1122.798487 | \n", - "670.889959 | \n", - "508.134801 | \n", - "... | \n", - "1793.688446 | \n", - "727.500000 | \n", - "101.570151 | \n", - "9.596395 | \n", - "3146.071457 | \n", - "3.835626 | \n", - "2.279554 | \n", - "6.008186 | \n", - "1793.688446 | \n", - "2247.736589 | \n", - "
| std | \n", - "8.701078 | \n", - "8.701078 | \n", - "2.040153 | \n", - "961.021405 | \n", - "2686.029808 | \n", - "2690.352928 | \n", - "1211.109335 | \n", - "854.895353 | \n", - "717.261660 | \n", - "378.753528 | \n", - "... | \n", - "961.021405 | \n", - "419.877958 | \n", - "393.886676 | \n", - "45.067313 | \n", - "16945.908840 | \n", - "2.040153 | \n", - "3.945717 | \n", - "8.565908 | \n", - "961.021405 | \n", - "3538.356220 | \n", - "
| min | \n", - "1.000000 | \n", - "1.000000 | \n", - "1.000000 | \n", - "0.000000 | \n", - "0.000000 | \n", - "0.000000 | \n", - "0.000000 | \n", - "0.000000 | \n", - "0.000000 | \n", - "0.000000 | \n", - "... | \n", - "0.000000 | \n", - "1.000000 | \n", - "-1.000000 | \n", - "0.000000 | \n", - "13.000000 | \n", - "1.000000 | \n", - "0.000000 | \n", - "0.000000 | \n", - "0.000000 | \n", - "0.000000 | \n", - "
| 25% | \n", - "8.000000 | \n", - "8.000000 | \n", - "3.000000 | \n", - "1200.250000 | \n", - "3288.250000 | \n", - "3384.500000 | \n", - "1460.500000 | \n", - "602.000000 | \n", - "144.500000 | \n", - "257.000000 | \n", - "... | \n", - "1200.250000 | \n", - "364.250000 | \n", - "5.000000 | \n", - "0.877829 | \n", - "230.000000 | \n", - "3.000000 | \n", - "0.550232 | \n", - "1.910701 | \n", - "1200.250000 | \n", - "482.000000 | \n", - "
| 50% | \n", - "15.000000 | \n", - "15.000000 | \n", - "4.000000 | \n", - "1681.500000 | \n", - "4504.500000 | \n", - "4577.000000 | \n", - "2016.000000 | \n", - "1034.000000 | \n", - "460.000000 | \n", - "434.000000 | \n", - "... | \n", - "1681.500000 | \n", - "727.500000 | \n", - "18.000000 | \n", - "2.158701 | \n", - "397.000000 | \n", - "4.000000 | \n", - "1.289224 | \n", - "3.939122 | \n", - "1681.500000 | \n", - "1005.500000 | \n", - "
| 75% | \n", - "20.750000 | \n", - "20.750000 | \n", - "5.000000 | \n", - "2259.750000 | \n", - "6033.750000 | \n", - "6098.500000 | \n", - "2735.500000 | \n", - "1496.000000 | \n", - "907.750000 | \n", - "674.750000 | \n", - "... | \n", - "2259.750000 | \n", - "1090.750000 | \n", - "71.000000 | \n", - "5.492696 | \n", - "883.500000 | \n", - "5.000000 | \n", - "2.337577 | \n", - "6.693238 | \n", - "2259.750000 | \n", - "2215.750000 | \n", - "
| max | \n", - "34.000000 | \n", - "34.000000 | \n", - "9.000000 | \n", - "12542.000000 | \n", - "39671.000000 | \n", - "40020.000000 | \n", - "16799.000000 | \n", - "12413.000000 | \n", - "4920.000000 | \n", - "3754.000000 | \n", - "... | \n", - "12542.000000 | \n", - "1454.000000 | \n", - "7810.000000 | \n", - "877.564767 | \n", - "372520.000000 | \n", - "9.000000 | \n", - "46.360371 | \n", - "90.891304 | \n", - "12542.000000 | \n", - "37950.000000 | \n", - "
8 rows \u00d7 48 columns
\n", - "| \n", - " | TAZ | \n", - "SERIALNO | \n", - "PUMA5 | \n", - "income | \n", - "PERSONS | \n", - "HHT | \n", - "UNITTYPE | \n", - "NOC | \n", - "BLDGSZ | \n", - "TENURE | \n", - "... | \n", - "bucketBin | \n", - "originalPUMA | \n", - "hmultiunit | \n", - "num_young_adults | \n", - "drivers | \n", - "num_children | \n", - "num_adolescents | \n", - "income_in_thousands | \n", - "num_young_children | \n", - "num_college_age | \n", - "
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| count | \n", - "100000.000000 | \n", - "100000.000000 | \n", - "100000.000000 | \n", - "100000.000000 | \n", - "100000.000000 | \n", - "100000.000000 | \n", - "100000.000000 | \n", - "100000.000000 | \n", - "100000.000000 | \n", - "100000.000000 | \n", - "... | \n", - "100000.000000 | \n", - "100000.000000 | \n", - "100000.000000 | \n", - "100000.000000 | \n", - "100000.000000 | \n", - "100000.000000 | \n", - "100000.000000 | \n", - "100000.000000 | \n", - "100000.000000 | \n", - "100000.000000 | \n", - "
| mean | \n", - "752.439040 | \n", - "4924260.422350 | \n", - "2168.287950 | \n", - "77684.096720 | \n", - "2.580780 | \n", - "2.642540 | \n", - "0.077640 | \n", - "0.468280 | \n", - "3.529460 | \n", - "1.892780 | \n", - "... | \n", - "4.484880 | \n", - "2168.287950 | \n", - "0.401800 | \n", - "0.394180 | \n", - "2.063810 | \n", - "0.357290 | \n", - "0.060680 | \n", - "77.684097 | \n", - "0.159680 | \n", - "0.226650 | \n", - "
| std | \n", - "430.258155 | \n", - "2863687.886756 | \n", - "516.271349 | \n", - "81341.474187 | \n", - "1.606362 | \n", - "2.066835 | \n", - "0.367387 | \n", - "0.913982 | \n", - "2.517375 | \n", - "1.010353 | \n", - "... | \n", - "2.871932 | \n", - "516.271349 | \n", - "0.490264 | \n", - "0.728922 | \n", - "1.122662 | \n", - "0.760368 | \n", - "0.260074 | \n", - "81.341474 | \n", - "0.462498 | \n", - "0.589748 | \n", - "
| min | \n", - "1.000000 | \n", - "496.000000 | \n", - "1000.000000 | \n", - "-20000.000000 | \n", - "1.000000 | \n", - "0.000000 | \n", - "0.000000 | \n", - "0.000000 | \n", - "0.000000 | \n", - "0.000000 | \n", - "... | \n", - "0.000000 | \n", - "1000.000000 | \n", - "0.000000 | \n", - "0.000000 | \n", - "0.000000 | \n", - "0.000000 | \n", - "0.000000 | \n", - "-20.000000 | \n", - "0.000000 | \n", - "0.000000 | \n", - "
| 25% | \n", - "374.000000 | \n", - "2456274.500000 | \n", - "2104.000000 | \n", - "26500.000000 | \n", - "1.000000 | \n", - "1.000000 | \n", - "0.000000 | \n", - "0.000000 | \n", - "2.000000 | \n", - "1.000000 | \n", - "... | \n", - "2.000000 | \n", - "2104.000000 | \n", - "0.000000 | \n", - "0.000000 | \n", - "1.000000 | \n", - "0.000000 | \n", - "0.000000 | \n", - "26.500000 | \n", - "0.000000 | \n", - "0.000000 | \n", - "
| 50% | \n", - "764.000000 | \n", - "4895910.500000 | \n", - "2303.000000 | \n", - "58000.000000 | \n", - "2.000000 | \n", - "1.000000 | \n", - "0.000000 | \n", - "0.000000 | \n", - "2.000000 | \n", - "2.000000 | \n", - "... | \n", - "4.000000 | \n", - "2303.000000 | \n", - "0.000000 | \n", - "0.000000 | \n", - "2.000000 | \n", - "0.000000 | \n", - "0.000000 | \n", - "58.000000 | \n", - "0.000000 | \n", - "0.000000 | \n", - "
| 75% | \n", - "1144.000000 | \n", - "7357629.750000 | \n", - "2410.000000 | \n", - "100000.000000 | \n", - "4.000000 | \n", - "4.000000 | \n", - "0.000000 | \n", - "1.000000 | \n", - "5.000000 | \n", - "3.000000 | \n", - "... | \n", - "7.000000 | \n", - "2410.000000 | \n", - "1.000000 | \n", - "1.000000 | \n", - "2.000000 | \n", - "0.000000 | \n", - "0.000000 | \n", - "100.000000 | \n", - "0.000000 | \n", - "0.000000 | \n", - "
| max | \n", - "1454.000000 | \n", - "9999811.000000 | \n", - "2714.000000 | \n", - "1237000.000000 | \n", - "25.000000 | \n", - "7.000000 | \n", - "2.000000 | \n", - "10.000000 | \n", - "10.000000 | \n", - "4.000000 | \n", - "... | \n", - "9.000000 | \n", - "2714.000000 | \n", - "1.000000 | \n", - "9.000000 | \n", - "25.000000 | \n", - "8.000000 | \n", - "5.000000 | \n", - "1237.000000 | \n", - "8.000000 | \n", - "24.000000 | \n", - "
8 rows \u00d7 53 columns
\n", - "