diff --git a/activitysim/activitysim.py b/activitysim/activitysim.py index ef2e60b95c..a9484e5f91 100644 --- a/activitysim/activitysim.py +++ b/activitysim/activitysim.py @@ -45,6 +45,9 @@ def read_model_spec(fname, expression values are set as the table index. """ cfg = pd.read_csv(fname, comment='#') + + cfg = cfg.dropna(subset=[expression_name]) + # don't need description and set the expression to the index cfg = cfg.drop(description_name, axis=1).set_index(expression_name) return cfg diff --git a/activitysim/cdap/__init__.py b/activitysim/cdap/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/activitysim/cdap/cdap.py b/activitysim/cdap/cdap.py new file mode 100644 index 0000000000..813980fd81 --- /dev/null +++ b/activitysim/cdap/cdap.py @@ -0,0 +1,385 @@ +import itertools + +import numpy as np +import pandas as pd +from zbox import toolz as tz, gen + +from ..activitysim import eval_variables +from .. import mnl + + +def make_interactions(people, hh_id_col, p_type_col): + """ + Make two Pandas DataFrames associating people IDs with two + and three person interactions they have within their households. + + Interactions are strings of numbers representing the makeup + of the interaction, e.g. '12' or '341'. + + Note that for two-person interactions the interaction string is ordered + with the person from the index in the first position of the string + and some other person in the second position. In contrast, + the interaction strings for three-person interactions are not ordered. + The person from the index may be in any position of the string. + + Parameters + ---------- + people : pandas.DataFrame + Table of people data. Must contain at least a household ID + column and a categorization of person type. + hh_id_col : str + Name of the column in `people` that has their household ID. + p_type_col : str + Name of the column in `people` that contains the person type number. + + Returns + ------- + two_interaction : pandas.DataFrame + Interactions between two people. Index will be person IDs taken + from the index of `people`. + The table will have one column called `interaction`. + three_interaction : pandas.DataFrame + Interactions between three people. Index will be person IDs taken + from the index of `people`. + The table will have one column called `interaction`. + + """ + two_fmt = '{}{}'.format + three_fmt = '{}{}{}'.format + two = [] + three = [] + + for hh, df in people.groupby(hh_id_col, sort=False): + # skip households with only one person + if len(df) == 1: + continue + + ptypes = df[p_type_col] + + for pA, pB in itertools.permutations(df.index, 2): + two.append((pA, two_fmt(*ptypes[[pA, pB]]))) + + # now skip households with two people + if len(df) == 2: + continue + + for idx in itertools.combinations(df.index, 3): + combo = three_fmt(*ptypes[list(idx)]) + three.extend((p, combo) for p in idx) + + if two: + two_idx, two_val = zip(*two) + else: + two_idx, two_val = [], [] + + if three: + three_idx, three_val = zip(*three) + else: + three_idx, three_val = [], [] + + return ( + pd.DataFrame({'interaction': two_val}, index=two_idx), + pd.DataFrame({'interaction': three_val}, index=three_idx)) + + +def individual_utilities( + people, hh_id_col, p_type_col, one_spec, two_spec, three_spec): + """ + Calculate CDAP utilities for all individuals. + + Parameters + ---------- + people : pandas.DataFrame + DataFrame of individual people data. + hh_id_col : str + Name of the column in `people` that has their household ID. + p_type_col : str + Name of the column in `people` that contains the person type number. + one_spec : pandas.DataFrame + CDAP spec applied to individuals. + two_spec : pandas.DataFrame + CDAP spec applied to interactions between two people. + three_spec : pandas.DataFrame + CDAP spec applied to interactions between three people. + + Returns + ------- + utilities : pandas.DataFrame + Will have index of `people` and columns for each of the alternatives. + + """ + # calculate single person utilities + # evaluate variables from one_spec expressions + # multiply by one_spec alternative values + one_vars = eval_variables(one_spec.index, people) + one_utils = one_vars.dot(one_spec) + + # make two- and three-person interactions + two_int, three_int = make_interactions(people, hh_id_col, p_type_col) + + # calculate two-interaction utilities + # evaluate variables from two_spec expressions + # multiply by two_spec alternative values + # groupby person and sum + two_vars = eval_variables(two_spec.index, two_int) + two_utils = two_vars.dot(two_spec).groupby(level=0).sum() + + # calculate three-interaction utilities + # evaluate variables from three_spec expressions + # multiply by three_spec alternative values + # groupby person and sum + three_vars = eval_variables(three_spec.index, three_int) + three_utils = three_vars.dot(three_spec).groupby(level=0).sum() + + # add one-, two-, and three-person utilities + utils = one_utils.add( + two_utils, fill_value=0).add(three_utils, fill_value=0) + + return utils + + +def initial_household_utilities(utilities, people, hh_id_col): + """ + Create initial household utilities by grouping and summing utilities + from individual household members. + + Parameters + ---------- + utilities : pandas.DataFrame + Should have the index of `people` and columns for each alternative. + people : pandas.DataFrame + DataFrame of individual people data. + hh_id_col : str + Name of the column in `people` that has their household ID. + + Returns + ------- + hh_util : dict of pandas.Series + Keys will be household IDs and values will be Series + mapping alternative choices to their utility. + + """ + hh_util = {} + + alts = utilities.columns + + for hh_id, df in people.groupby(hh_id_col, sort=False): + utils = utilities.loc[df.index] + hh = [] + + for combo in itertools.product(alts, repeat=len(df)): + hh.append( + (combo, utils.lookup(df.index, combo).sum())) + + idx, u = zip(*hh) + hh_util[hh_id] = pd.Series(u, index=idx) + + return hh_util + + +def apply_final_rules(hh_util, people, hh_id_col, final_rules): + """ + Final rules can be used to set the utility values for certain + household alternatives. Often they are set to zero to reflect + the unavailability of certain alternatives to certain types of people. + + This modifies the `hh_util` data inplace. + + Parameters + ---------- + hh_util : dict of pandas.Series + Keys will be household IDs and values will be Series + mapping alternative choices to their utility. + people : pandas.DataFrame + DataFrame of individual people data. + hh_id_col : str + Name of the column in `people` that has their household ID. + final_rules : pandas.DataFrame + This table must have an index of expressions that can be used + to filter the `people` table. It must have two columns: + the first must have the name of the alternative to which the rule + applies, and the second must have the value of the utility for that + alternative. The names of the columns is not important, but + the order is. + + """ + rule_mask = eval_variables(final_rules.index, people) + + for hh_id, df in people.groupby(hh_id_col, sort=False): + mask = rule_mask.loc[df.index] + utils = hh_util[hh_id] + + for exp, row in final_rules.iterrows(): + m = mask[exp].as_matrix() + + # this crazy business combines three things to figure out + # which household alternatives need to be modified by this rule. + # the three things are: + # - the mask of people for whom the rule expression is true (m) + # - the individual alternative to which the rule applies + # (row.iloc[0]) + # - the alternative combinations for the household (combo) + app = [ + ((np.array([row.iloc[0]] * len(utils.index[0])) == combo) & m + ).any() + for combo in utils.index] + + utils[app] = row.iloc[1] + + +def apply_all_people(hh_util, all_people): + """ + Apply utility adjustments to household alternatives. + + This modifies the `hh_util` data inplace. + + Parameters + ---------- + hh_util : dict of pandas.Series + Keys will be household IDs and values will be Series + mapping alternative choices to their utility. + all_people : pandas.DataFrame + Adjustments to household alternatives, with alternatives in the + index and the adjustment values in the first column. + Index should be household alternatives in the form of tuples + containing individual alternatives, e.g. + ('Mandatory', 'Mandatory', 'Mandatory'), where 'Mandatory' is + one of the alternatives available to individual household members. + Note that these may also be expressed as Python code to save space, + so the previous could also be written as ('Mandatory',) * 3. + + """ + # evaluate all the expressions in the all_people index + all_people.index = [eval(x) for x in all_people.index] + all_people = all_people.icol(0) + + matching_idx = {} + + for hh in hh_util.values(): + l = len(hh) + if l in matching_idx: + matching = matching_idx[l] + else: + matching = hh.index.intersection(all_people.index) + matching_idx[l] = matching + + hh.loc[matching] += all_people.loc[matching] + + +def make_household_choices(hh_util): + """ + Decide on the activity pattern for each household. + + Parameters + ---------- + hh_util : dict of pandas.Series + Keys will be household IDs and values will be Series + mapping alternative choices to their utility. + + Returns + ------- + choices : pandas.Series + Maps household ID to chosen alternative, where the alternative + is a tuple of individual utilities. + + """ + # convert hh_util dict to a few DFs with alternatives in the columns + # and household IDs in the index + df_func = tz.compose( + pd.DataFrame.transpose, + pd.DataFrame.from_dict) + grouped_by_size = ( + tz.valfilter(lambda x: len(x) == l, hh_util) + for l in tz.unique(tz.map(len, hh_util.values()))) + dfs = tz.map(df_func, grouped_by_size) + + # go over all the DFs and do utils_to_probs and make_choices + choices = ( + pd.Series( + df.columns[mnl.make_choices(mnl.utils_to_probs(df))].values, + index=df.index) + for df in dfs) + + # concat all the resulting Series + return pd.concat(choices) + + +def household_choices_to_people(hh_choices, people): + """ + Map household choices to people so that we know the activity pattern + for individuals. + + Parameters + ---------- + hh_choices : pandas.Series + Maps household ID to chosen alternative, where the alternative + is a tuple of individual utilities. + people : pandas.DataFrame + DataFrame of individual people data. + + Returns + ------- + choices : pandas.Series + Maps index of `people` to their activity pattern choice. + + """ + return pd.Series( + gen(tz.concat(hh_choices.values)), index=people.index) + + +def run_cdap( + people, hh_id_col, p_type_col, one_spec, two_spec, three_spec, + final_rules, all_people): + """ + Choose individual activity patterns for people. + + Parameters + ---------- + people : pandas.DataFrame + Table of people data. Must contain at least a household ID + column and a categorization of person type. + hh_id_col : str + Name of the column in `people` that has their household ID. + p_type_col : str + Name of the column in `people` that contains the person type number. + one_spec : pandas.DataFrame + CDAP spec applied to individuals. + two_spec : pandas.DataFrame + CDAP spec applied to interactions between two people. + three_spec : pandas.DataFrame + CDAP spec applied to interactions between three people. + final_rules : pandas.DataFrame + This table must have an index of expressions that can be used + to filter the `people` table. It must have two columns: + the first must have the name of the alternative to which the rule + applies, and the second must have the value of the utility for that + alternative. The names of the columns is not important, but + the order is. + all_people : pandas.DataFrame + Adjustments to household alternatives, with alternatives in the + index and the adjustment values in the first column. + Index should be household alternatives in the form of tuples + containing individual alternatives, e.g. + ('Mandatory', 'Mandatory', 'Mandatory'), where 'Mandatory' is + one of the alternatives available to individual household members. + Note that these may also be expressed as Python code to save space, + so the previous could also be written as ('Mandatory',) * 3. + + Returns + ------- + choices : pandas.Series + Maps index of `people` to their activity pattern choice, + where that choice is taken from the columns of specs + (so it's important that the specs all refer to alternatives + in the same way). + + """ + ind_utils = individual_utilities( + people, hh_id_col, p_type_col, one_spec, two_spec, three_spec) + hh_utils = initial_household_utilities(ind_utils, people, hh_id_col) + if final_rules is not None: + apply_final_rules(hh_utils, people, hh_id_col, final_rules) + if all_people is not None: + apply_all_people(hh_utils, all_people) + hh_choices = make_household_choices(hh_utils) + return household_choices_to_people(hh_choices, people) diff --git a/activitysim/cdap/tests/__init__.py b/activitysim/cdap/tests/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/activitysim/cdap/tests/data/cdap_1_person.csv b/activitysim/cdap/tests/data/cdap_1_person.csv new file mode 100644 index 0000000000..befb8ffa4a --- /dev/null +++ b/activitysim/cdap/tests/data/cdap_1_person.csv @@ -0,0 +1,24 @@ +Description,Expression,Mandatory,NonMandatory,Home +Age filter 1,age < 30,1,0,0 +Age filter 2,age >= 30,0,1,0 +Type 1 and low age,(ptype == 1) & (age < 25),1,0,0 +Type 2 and high age,(ptype == 2) & (age >= 35),0,0,1 +# The rest of these are corrections so results come out the way I want +Person 2 -> H,age == 21,-1,0,1 +Person 5 -> N,age == 24,-1,0,0 +Person 6 -> M,age == 25,0,-1,0 +Person 7 -> M,age == 26,0,-2,-1 +Person 8 -> N,age == 27,-1,0,-1 +Person 9 -> H,age == 28,-1,-2,0 +Person 10 -> M,age == 29,0,0,-2 +Person 11 -> H,age == 30,0,-1,0 +Person 12 -> H,age == 31,-1,-1,0 +Person 13 -> M,age == 32,0,-1,-3 +Person 14 -> M,age == 33,1,-3,-4 +Person 15 -> N,age == 34,-1,0,-2 +Person 16 -> N,age == 35,-1,0,-3 +Person 17 -> H,age == 36,-2,-1,0 +Person 18 -> H,age == 37,0,-2,0 +# This 50 will get zeroed out later by the forbidden alternatives rules +Person 19 -> H,age == 38,50,-4,0 +Person 20 -> M,age == 39,0,-1,-4 diff --git a/activitysim/cdap/tests/data/cdap_2_person.csv b/activitysim/cdap/tests/data/cdap_2_person.csv new file mode 100644 index 0000000000..9fdda047f9 --- /dev/null +++ b/activitysim/cdap/tests/data/cdap_2_person.csv @@ -0,0 +1,8 @@ +Description,Expression,Mandatory,NonMandatory,Home +Two type 1,interaction == '11',1,0,0 +Two type 2,interaction == '22',0,1,0 +Two type 3,interaction == '33',0,0,1 +One and two,interaction == '12',1,0,0 +Two and one,interaction == '21',0,1,0 +One + three,"interaction in ['13', '31']",0,0,1 +Two + three,"interaction in ['23', '32']",0,1,0 diff --git a/activitysim/cdap/tests/data/cdap_3_person.csv b/activitysim/cdap/tests/data/cdap_3_person.csv new file mode 100644 index 0000000000..eb7e1794d4 --- /dev/null +++ b/activitysim/cdap/tests/data/cdap_3_person.csv @@ -0,0 +1,11 @@ +Description,Expression,Mandatory,NonMandatory,Home +All one,interaction == '111',1,0,0 +All two,interaction == '222',0,1,0 +All three,interaction == '333',0,0,1 +One + one + two,"interaction in ['112', '121', '211']",0,0,1 +One + one + three,"interaction in ['113', '131', '311']",0,0,1 +One + two + two,"interaction in ['122', '212', '221']",1,0,0 +One + two + three,"interaction in ['123', '132', '213', '231', '312', '321']",0,0,1 +One + three + three,"interaction in ['133', '313', '331']",0,1,0 +Two + two + three,"interaction in ['223', '232', '322']",0,0,1 +Two + three + three,"interaction in ['233', '323', '332']",1,0,0 diff --git a/activitysim/cdap/tests/data/cdap_all_people.csv b/activitysim/cdap/tests/data/cdap_all_people.csv new file mode 100644 index 0000000000..7d838aea5e --- /dev/null +++ b/activitysim/cdap/tests/data/cdap_all_people.csv @@ -0,0 +1,7 @@ +Description,Alternative,Value +Three Person All M,"('Mandatory',) * 3",0 +Three Person All N,"('NonMandatory',) * 3",0 +Three Person All H,"('Home',) * 3",0 +Four Person All M,"('Mandatory',) * 4",0 +Four Person All N,"('NonMandatory',) * 4",0 +Four Person All H,"('Home',) * 4",0 diff --git a/activitysim/cdap/tests/data/cdap_final_rules.csv b/activitysim/cdap/tests/data/cdap_final_rules.csv new file mode 100644 index 0000000000..f33694a182 --- /dev/null +++ b/activitysim/cdap/tests/data/cdap_final_rules.csv @@ -0,0 +1,2 @@ +Description,Expression,Alternative,Value +M not allowed for Age 38 and Income 1900,(age == 38) & (income == 1900),Mandatory,0 diff --git a/activitysim/cdap/tests/data/people.csv b/activitysim/cdap/tests/data/people.csv new file mode 100644 index 0000000000..1bb990eb4a --- /dev/null +++ b/activitysim/cdap/tests/data/people.csv @@ -0,0 +1,21 @@ +id,household,ptype,age,income +1,1,1,20,100 +2,2,3,21,200 +3,3,1,22,300 +4,3,1,23,400 +5,4,3,24,500 +6,4,2,25,600 +7,5,3,26,700 +8,5,2,27,800 +9,5,2,28,900 +10,6,1,29,1000 +11,6,3,30,1100 +12,6,1,31,1200 +13,7,1,32,1300 +14,7,3,33,1400 +15,7,2,34,1500 +16,7,2,35,1600 +17,8,1,36,1700 +18,8,3,37,1800 +19,8,2,38,1900 +20,8,1,39,2000 diff --git a/activitysim/cdap/tests/test_cdap.py b/activitysim/cdap/tests/test_cdap.py new file mode 100644 index 0000000000..ffc8cf470d --- /dev/null +++ b/activitysim/cdap/tests/test_cdap.py @@ -0,0 +1,353 @@ +import os.path +from itertools import product + +import pandas as pd +import pandas.util.testing as pdt +import pytest + +from .. import cdap +from ...activitysim import read_model_spec + + +@pytest.fixture(scope='module') +def people(): + return pd.read_csv( + os.path.join(os.path.dirname(__file__), 'data', 'people.csv'), + index_col='id') + + +@pytest.fixture(scope='module') +def one_spec(): + return read_model_spec( + os.path.join( + os.path.dirname(__file__), 'data', 'cdap_1_person.csv')) + + +@pytest.fixture(scope='module') +def two_spec(): + return read_model_spec( + os.path.join( + os.path.dirname(__file__), 'data', 'cdap_2_person.csv')) + + +@pytest.fixture(scope='module') +def three_spec(): + return read_model_spec( + os.path.join( + os.path.dirname(__file__), 'data', 'cdap_3_person.csv')) + + +@pytest.fixture(scope='module') +def final_rules(): + return read_model_spec( + os.path.join( + os.path.dirname(__file__), 'data', 'cdap_final_rules.csv')) + + +@pytest.fixture +def all_people(): + return read_model_spec( + os.path.join( + os.path.dirname(__file__), 'data', 'cdap_all_people.csv'), + expression_name='Alternative') + + +@pytest.fixture(scope='module') +def hh_id_col(): + return 'household' + + +@pytest.fixture(scope='module') +def p_type_col(): + return 'ptype' + + +@pytest.fixture(scope='module') +def individual_utils( + people, hh_id_col, p_type_col, one_spec, two_spec, three_spec): + return cdap.individual_utilities( + people, hh_id_col, p_type_col, one_spec, two_spec, three_spec) + + +@pytest.fixture +def hh_utils(individual_utils, people, hh_id_col): + hh_utils = cdap.initial_household_utilities( + individual_utils, people, hh_id_col) + return hh_utils + + +@pytest.fixture +def hh_choices(random_seed, hh_utils): + return cdap.make_household_choices(hh_utils) + + +def test_make_interactions(people, hh_id_col, p_type_col): + expected_two = pd.DataFrame( + {'interaction': [ + '11', # household 3; person 3 + '11', # household 3; person 4 + '32', # household 4; person 5 + '23', # household 4; person 6 + '32', '32', # household 5; person 7 + '23', '22', # household 5; person 8 + '23', '22', # household 5; person 9 + '13', '11', # household 6; person 10 + '31', '31', # household 6; person 11 + '11', '13', # household 6; person 12 + '13', '12', '12', # household 7; person 13 + '31', '32', '32', # household 7; person 14 + '21', '23', '22', # household 7; person 15 + '21', '23', '22', # household 7; person 16 + '13', '12', '11', # household 8; person 17 + '31', '32', '31', # household 8; person 18 + '21', '23', '21', # household 8; person 19 + '11', '13', '12' # household 8; person 20 + ]}, + index=[ + 3, 4, 5, 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, + 13, 13, 13, 14, 14, 14, 15, 15, 15, 16, 16, 16, + 17, 17, 17, 18, 18, 18, 19, 19, 19, 20, 20, 20 + ]) + + expected_three = pd.DataFrame( + {'interaction': [ + '322', '322', '322', # household 5; people 7, 8, 9 + '131', '131', '131', # household 6; people 10, 11, 12 + '132', '132', '132', # household 7; people 13, 14, 15 + '132', '132', '132', # household 7; people 13, 14, 16 + '122', '122', '122', # household 7; people 13, 15, 16 + '322', '322', '322', # household 7; people 14, 15, 16 + '132', '132', '132', # household 8; people 17, 18, 19 + '131', '131', '131', # household 8; people 17, 18, 20 + '121', '121', '121', # household 8; people 17, 19, 20 + '321', '321', '321' # household 8; people 18, 19, 20 + ]}, + index=[ + 7, 8, 9, 10, 11, 12, + 13, 14, 15, 13, 14, 16, 13, 15, 16, 14, 15, 16, + 17, 18, 19, 17, 18, 20, 17, 19, 20, 18, 19, 20 + ]) + + two, three = cdap.make_interactions(people, hh_id_col, p_type_col) + + pdt.assert_frame_equal(two, expected_two) + pdt.assert_frame_equal(three, expected_three) + + +def test_make_interactions_no_interactions(people, hh_id_col, p_type_col): + people = people.loc[[1, 2, 3]] + + two, three = cdap.make_interactions(people, hh_id_col, p_type_col) + + pdt.assert_frame_equal(two, pd.DataFrame(columns=['interaction'])) + pdt.assert_frame_equal(three, pd.DataFrame(columns=['interaction'])) + + +def test_make_interactions_only_twos(people, hh_id_col, p_type_col): + people = people.loc[[1, 2, 3, 4, 5, 6]] + + expected_two = pd.DataFrame( + {'interaction': [ + '11', # household 3; person 3 + '11', # household 3; person 4 + '32', # household 4; person 5 + '23', # household 4; person 6 + ]}, + index=[3, 4, 5, 6] + ) + + two, three = cdap.make_interactions(people, hh_id_col, p_type_col) + + pdt.assert_frame_equal(two, expected_two) + pdt.assert_frame_equal(three, pd.DataFrame(columns=['interaction'])) + + +def test_individual_utilities(people, one_spec, individual_utils): + expected = pd.DataFrame([ + [2, 0, 0], # person 1 + [0, 0, 1], # person 2 + [3, 0, 0], # person 3 + [3, 0, 0], # person 4 + [0, 1, 0], # person 5 + [1, 0, 0], # person 6 + [1, 0, 0], # person 7 + [0, 2, 0], # person 8 + [0, 0, 1], # person 9 + [2, 0, 0], # person 10 + [0, 0, 3], # person 11 + [0, 0, 2], # person 12 + [3, 0, 0], # person 13 + [1, 0, 0], # person 14 + [0, 4, 0], # person 15 + [0, 4, 0], # person 16 + [0, 0, 4], # person 17 + [0, 0, 5], # person 18 + [50, 0, 4], # person 19 + [2, 0, 0] # person 20 + ], index=people.index, columns=one_spec.columns) + + pdt.assert_frame_equal( + individual_utils, expected, check_dtype=False, check_names=False) + + +def test_initial_household_utilities(hh_utils): + alts = ['Mandatory', 'NonMandatory', 'Home'] + one_alts = list(product(alts, repeat=1)) + two_alts = list(product(alts, repeat=2)) + three_alts = list(product(alts, repeat=3)) + four_alts = list(product(alts, repeat=4)) + + expected = { + 1: pd.Series([2, 0, 0], index=one_alts), + 2: pd.Series([0, 0, 1], index=one_alts), + 3: pd.Series([6, 3, 3, 3, 0, 0, 3, 0, 0], index=two_alts), + 4: pd.Series([1, 0, 0, 2, 1, 1, 1, 0, 0], index=two_alts), + 5: pd.Series([ + 1, 1, 2, 3, 3, 4, 1, 1, 2, + 0, 0, 1, 2, 2, 3, 0, 0, 1, + 0, 0, 1, 2, 2, 3, 0, 0, 1, + ], index=three_alts), + 6: pd.Series([ + 2, 2, 4, 2, 2, 4, 5, 5, 7, + 0, 0, 2, 0, 0, 2, 3, 3, 5, + 0, 0, 2, 0, 0, 2, 3, 3, 5, + ], index=three_alts), + 7: pd.Series([ + 4, 8, 4, 8, 12, 8, 4, 8, 4, + 3, 7, 3, 7, 11, 7, 3, 7, 3, + 3, 7, 3, 7, 11, 7, 3, 7, 3, + 1, 5, 1, 5, 9, 5, 1, 5, 1, + 0, 4, 0, 4, 8, 4, 0, 4, 0, + 0, 4, 0, 4, 8, 4, 0, 4, 0, + 1, 5, 1, 5, 9, 5, 1, 5, 1, + 0, 4, 0, 4, 8, 4, 0, 4, 0, + 0, 4, 0, 4, 8, 4, 0, 4, 0 + ], index=four_alts), + 8: pd.Series([ + 52, 50, 50, 2, 0, 0, 6, 4, 4, + 52, 50, 50, 2, 0, 0, 6, 4, 4, + 57, 55, 55, 7, 5, 5, 11, 9, 9, + 52, 50, 50, 2, 0, 0, 6, 4, 4, + 52, 50, 50, 2, 0, 0, 6, 4, 4, + 57, 55, 55, 7, 5, 5, 11, 9, 9, + 56, 54, 54, 6, 4, 4, 10, 8, 8, + 56, 54, 54, 6, 4, 4, 10, 8, 8, + 61, 59, 59, 11, 9, 9, 15, 13, 13 + ], index=four_alts) + } + + assert list(hh_utils.keys()) == list(expected.keys()) + for k in expected: + pdt.assert_series_equal(hh_utils[k], expected[k], check_dtype=False) + + +def test_apply_final_rules(hh_utils, final_rules, people, hh_id_col): + expected = hh_utils.copy() + expected[8] = pd.Series([ + 0, 0, 0, 2, 0, 0, 6, 4, 4, + 0, 0, 0, 2, 0, 0, 6, 4, 4, + 0, 0, 0, 7, 5, 5, 11, 9, 9, + 0, 0, 0, 2, 0, 0, 6, 4, 4, + 0, 0, 0, 2, 0, 0, 6, 4, 4, + 0, 0, 0, 7, 5, 5, 11, 9, 9, + 0, 0, 0, 6, 4, 4, 10, 8, 8, + 0, 0, 0, 6, 4, 4, 10, 8, 8, + 0, 0, 0, 11, 9, 9, 15, 13, 13 + ], index=expected[7].index) + + cdap.apply_final_rules(hh_utils, people, hh_id_col, final_rules) + + for k in expected: + pdt.assert_series_equal(hh_utils[k], expected[k], check_dtype=False) + + +def test_apply_all_people(hh_utils, all_people): + all_people.at["('Mandatory',) * 3", 'Value'] = 300 + all_people.at["('Home',) * 4", 'Value'] = 500 + + expected = hh_utils.copy() + expected[5] = pd.Series([ + 301, 1, 2, 3, 3, 4, 1, 1, 2, + 0, 0, 1, 2, 2, 3, 0, 0, 1, + 0, 0, 1, 2, 2, 3, 0, 0, 1, + ], index=hh_utils[5].index) + expected[6] = pd.Series([ + 302, 2, 4, 2, 2, 4, 5, 5, 7, + 0, 0, 2, 0, 0, 2, 3, 3, 5, + 0, 0, 2, 0, 0, 2, 3, 3, 5, + ], index=hh_utils[6].index) + expected[7] = pd.Series([ + 4, 8, 4, 8, 12, 8, 4, 8, 4, + 3, 7, 3, 7, 11, 7, 3, 7, 3, + 3, 7, 3, 7, 11, 7, 3, 7, 3, + 1, 5, 1, 5, 9, 5, 1, 5, 1, + 0, 4, 0, 4, 8, 4, 0, 4, 0, + 0, 4, 0, 4, 8, 4, 0, 4, 0, + 1, 5, 1, 5, 9, 5, 1, 5, 1, + 0, 4, 0, 4, 8, 4, 0, 4, 0, + 0, 4, 0, 4, 8, 4, 0, 4, 500 + ], index=hh_utils[7].index) + expected[8] = pd.Series([ + 52, 50, 50, 2, 0, 0, 6, 4, 4, + 52, 50, 50, 2, 0, 0, 6, 4, 4, + 57, 55, 55, 7, 5, 5, 11, 9, 9, + 52, 50, 50, 2, 0, 0, 6, 4, 4, + 52, 50, 50, 2, 0, 0, 6, 4, 4, + 57, 55, 55, 7, 5, 5, 11, 9, 9, + 56, 54, 54, 6, 4, 4, 10, 8, 8, + 56, 54, 54, 6, 4, 4, 10, 8, 8, + 61, 59, 59, 11, 9, 9, 15, 13, 513 + ], index=hh_utils[8].index) + + cdap.apply_all_people(hh_utils, all_people) + + for k in expected: + pdt.assert_series_equal(hh_utils[k], expected[k], check_dtype=False) + + +def test_make_household_choices(hh_choices): + expected = pd.Series([ + ('Mandatory',), + ('Home',), + ('Mandatory', 'Mandatory'), + ('NonMandatory', 'NonMandatory'), + ('Mandatory', 'NonMandatory', 'Home'), + ('Mandatory', 'Home', 'Home'), + ('Mandatory', 'Mandatory', 'NonMandatory', 'NonMandatory'), + ('Home', 'Home', 'Mandatory', 'NonMandatory')], + index=range(1, 9)) + pdt.assert_series_equal(hh_choices, expected) + + +def test_household_choices_to_people(hh_choices, people): + people_choices = cdap.household_choices_to_people(hh_choices, people) + expected = pd.Series([ + 'Mandatory', + 'Home', + 'Mandatory', 'Mandatory', + 'NonMandatory', 'NonMandatory', + 'Mandatory', 'NonMandatory', 'Home', + 'Mandatory', 'Home', 'Home', + 'Mandatory', 'Mandatory', 'NonMandatory', 'NonMandatory', + 'Home', 'Home', 'Mandatory', 'NonMandatory'], + index=people.index) + pdt.assert_series_equal(people_choices, expected) + + +def test_run_cdap( + people, hh_id_col, p_type_col, one_spec, two_spec, three_spec, + final_rules, all_people, random_seed): + people_choices = cdap.run_cdap( + people, hh_id_col, p_type_col, one_spec, two_spec, three_spec, + final_rules, all_people) + expected = pd.Series([ + 'Mandatory', + 'Home', + 'Mandatory', 'Mandatory', + 'NonMandatory', 'NonMandatory', + 'Mandatory', 'NonMandatory', 'Home', + 'Mandatory', 'Home', 'Home', + 'Mandatory', 'Mandatory', 'NonMandatory', 'NonMandatory', + 'Home', 'Home', 'Home', 'NonMandatory'], + index=people.index) + pdt.assert_series_equal(people_choices, expected) diff --git a/activitysim/defaults/models/__init__.py b/activitysim/defaults/models/__init__.py index 1339f6c1a9..9fc4e04846 100644 --- a/activitysim/defaults/models/__init__.py +++ b/activitysim/defaults/models/__init__.py @@ -10,3 +10,4 @@ import non_mandatory_scheduling import workplace_location import mode +import cdap diff --git a/activitysim/defaults/models/auto_ownership.py b/activitysim/defaults/models/auto_ownership.py index e0bd2abf68..d35b2666a6 100644 --- a/activitysim/defaults/models/auto_ownership.py +++ b/activitysim/defaults/models/auto_ownership.py @@ -20,6 +20,7 @@ def auto_ownership_spec(configs_dir): @sim.model() def auto_ownership_simulate(set_random_seed, households_merged, auto_ownership_spec): + choices, _ = asim.simple_simulate( households_merged.to_frame(), auto_ownership_spec) diff --git a/activitysim/defaults/models/cdap.py b/activitysim/defaults/models/cdap.py new file mode 100644 index 0000000000..d50f4ae544 --- /dev/null +++ b/activitysim/defaults/models/cdap.py @@ -0,0 +1,63 @@ +import os +import pandas as pd +import urbansim.sim.simulation as sim +from activitysim import activitysim as asim +from activitysim.cdap import cdap + +""" +CDAP stands for Coordinated Daily Activity Pattern, which is a choice of +high-level activity pattern for each person, in a coordinated way with other +members of a person's household. + +Because Python requires vectorization of computation, there are some specialized +routines in the cdap directory of activitysim for this purpose. This module +simply applies those utilities using the simulation framework. +""" + + +@sim.injectable() +def cdap_1_person_spec(configs_dir): + f = os.path.join(configs_dir, 'configs', "cdap_1_person.csv") + return asim.read_model_spec(f).fillna(0) + + +@sim.injectable() +def cdap_2_person_spec(configs_dir): + f = os.path.join(configs_dir, 'configs', "cdap_2_person.csv") + return asim.read_model_spec(f).fillna(0) + + +@sim.injectable() +def cdap_3_person_spec(configs_dir): + f = os.path.join(configs_dir, 'configs', "cdap_3_person.csv") + return asim.read_model_spec(f).fillna(0) + + +@sim.injectable() +def cdap_final_rules(configs_dir): + f = os.path.join(configs_dir, 'configs', "cdap_final_rules.csv") + return asim.read_model_spec(f).fillna(0) + + +@sim.injectable() +def cdap_all_people(configs_dir): + f = os.path.join(configs_dir, 'configs', "cdap_all_people.csv") + return asim.read_model_spec(f).fillna(0) + + +@sim.model() +def cdap_simulate(set_random_seed, persons_merged, + cdap_1_person_spec, cdap_2_person_spec, cdap_3_person_spec, + cdap_final_rules, cdap_all_people): + + choices = cdap.run_cdap(persons_merged.to_frame(), + "household_id", + "ptype", + cdap_1_person_spec, + cdap_2_person_spec, + cdap_3_person_spec, + cdap_final_rules, + cdap_all_people) + + print "Choices:\n", choices.value_counts() + sim.add_column("persons", "cdap_activity", choices) diff --git a/activitysim/defaults/tables/persons.py b/activitysim/defaults/tables/persons.py index 8dbdaaeb73..a0191166ca 100644 --- a/activitysim/defaults/tables/persons.py +++ b/activitysim/defaults/tables/persons.py @@ -34,11 +34,10 @@ def age_16_p(persons): return persons.to_frame(["age"]).eval("16 <= age") -# FIXME - this is my "placeholder" for the CDAP model ;) @sim.column("persons") -def cdap_activity(set_random_seed, persons): - return pd.Series(np.random.randint(3, size=len(persons)), - index=persons.index).map({0: 'M', 1: 'N', 2: 'H'}) +def cdap_activity(persons): + # return a default until it gets filled in by the model + return pd.Series('M', persons.index) # FIXME - these are my "placeholder" for joint trip generation diff --git a/activitysim/skim.py b/activitysim/skim.py index ccb04799f6..a4cd046bf6 100644 --- a/activitysim/skim.py +++ b/activitysim/skim.py @@ -72,6 +72,7 @@ class Skims(object): It is assumed that left_key and right_key identify columns in df. The parameter df is usually set by the simulation itself as it's a result of interacting choosers and alternatives. + When the user calls skims[key], key is an identifier for which skim to use, and the object automatically looks up impedances of that skim using the specified left_key column in df as the origin and @@ -80,6 +81,7 @@ class Skims(object): for this lookup. This is the only purpose of this object: to abstract away the O-D lookup and use skims by specifiying which skim to use in the expressions. + Note that keys are any hashable object, not just strings. So calling skim[('AM', 'SOV')] is valid and useful. """ @@ -228,6 +230,7 @@ class Skims3D(object): ('SOV', 'AM"), ('SOV', 'PM') etc. The time of day is then taken to be different for every row in the tours table, and the 'SOV' portion of the key can be used in __getitem__. + To be more explicit, the input is a dictionary of Skims objects, each of which contains a 2D matrix. These are stacked into a 3D matrix with a mapping of keys to indexes which is applied using pandas .map to a third @@ -238,7 +241,7 @@ class Skims3D(object): __getitem__ below (i.e. the one used in the specs). By convention, every key in the Skims object that is passed in MUST be a tuple with 2 items. The second item in the tuple maps to the items in the dataframe - referred to be the skim_key column and the first item in the tuple is + referred to by the skim_key column and the first item in the tuple is then available to pass directly to __getitem__. This is now made explicit by adding the set_3d and get_3d methods in the Skims object which take the two keys independently and convert to the tuple internally. diff --git a/example/configs/cdap_1_person.csv b/example/configs/cdap_1_person.csv new file mode 100644 index 0000000000..408be97c44 --- /dev/null +++ b/example/configs/cdap_1_person.csv @@ -0,0 +1,55 @@ +Description,Expression,Mandatory,NonMandatory,Home +Full-time worker alternative-specific constants,ptype == 1,1.378734579,0.622662391, +Part-time worker alternative-specific constants,ptype == 2,-0.718823738,0.636032467, +University student alternative-specific constants,ptype == 3,2.353595176,0.609709846, +Non-working adult alternative-specific constants,ptype == 4,-999,0.594645386, +Retired alternative-specific constants,ptype == 5,-999,0.408202071, +Driving-age child who is in school alternative-specific constants,ptype == 6,2.330918685,-0.599119112, +Pre-driving-age child who is in school alternative-specific constants,ptype == 7,3.295863529,0.57142434, +Pre-driving-age child who is too young for school alternative-specific constants,ptype == 8,1.052531189,-0.837567776, +Pre-driving-age child who is too young for school interaction with age 0 to 1,(ptype == 8) & (age >= 0) & (age <= 1),-0.4515,, +Pre-driving-age child who is too young for school interaction with age 4 to 5,(ptype == 8) & (age >= 4) & (age <= 5),0.6107,, +Pre-driving-age child who is in school interaction with age 6 to 9,(ptype == 7) & (age >= 6) & (age <= 9),-0.2943,, +Pre-driving-age child who is in school interaction with age 13 to 15,(ptype == 7) & (age >= 13) & (age <= 15),-0.7141,-0.672, +Full-time worker interaction with age less than 40,(ptype == 1) & (age < 40),0.2091,, +Retired interaction with age more than 80,(ptype == 5) & (age > 80),,,0.7666 +Full-time worker interaction with female gender,(ptype == 1) & (sex == 2),-0.1259,, +Non-working adult interaction with female gender,(ptype == 4) & (sex == 2),-0.743,, +Retired interaction with female,(ptype == 5) & (sex == 2),0.4769,, +Non-working adult interaction with more cars than workers,(ptype == 4) & (auto_ownership > workers),0.6515,0.8168, +Retired interaction with more cars than workers,(ptype == 5) & (auto_ownership > workers),2.992,1.056, +Pre-driving-age child who is too young for school interaction with more cars than workers,(ptype == 8) & (auto_ownership > workers),,0.2991, +Full-time worker interaction with fewer cars than workers,(ptype == 1) & (auto_ownership < workers),,,0.5039 +Non-working adult interaction with fewer cars than workers,(ptype == 4) & (auto_ownership < workers),,,0.8965 +Retired interaction with fewer cars than workers,(ptype == 5) & (auto_ownership < workers),,,0.5496 +Driving-age child who is in school interaction with fewer cars than workers,(ptype == 6) & (auto_ownership < workers),,,0.6475 +Pre-driving-age child who is in school interaction with fewer cars than workers,(ptype == 7) & (auto_ownership < workers),,,0.5862 +Pre-driving-age child who is too young for school interaction with fewer cars than workers,(ptype == 8) & (auto_ownership < workers),,,0.5061 +Full-time worker interaction with income less than $20k,(ptype == 1) & (income_in_thousands < 20),,,0.5313 +Retired interaction with income less than $20k,(ptype == 5) & (income_in_thousands < 20),,,0.533 +Part-time worker interaction with income less than $20k,(ptype == 2) & (income_in_thousands < 20),,,0.3232 +Part-time worker interaction with income between $50k and $100k,(ptype == 2) & (income_in_thousands >= 50) & (income_in_thousands <= 100),,,-0.4032 +Part-time worker interaction with income more than $100k,(ptype == 2) & (income_in_thousands < 100),,0.4207,-0.3534 +Non-working adult interaction with income between $50k and $100k,(ptype == 4) & (income_in_thousands >= 50) & (income_in_thousands <= 100),,,-0.5602 +Non-working adult interaction with income more than $100k,(ptype == 4) & (income_in_thousands < 100),,,-0.7188 +Driving-age child who is in school interaction with less than $20k,(ptype == 6) & (income_in_thousands < 20),,,1.307 +Driving-age child who is in school interaction income between $50k and $100k,(ptype == 6) & (income_in_thousands >= 50) & (income_in_thousands <= 100),,,-0.5031 +Driving-age child who is in school interaction with income more than $100k,(ptype == 6) & (income_in_thousands < 100),,,-2.046 +Pre-driving-age child who is too young for school interaction with income between $50k and $100k,(ptype == 8) & (income_in_thousands >= 50) & (income_in_thousands <= 100),,,-0.5708 +Pre-driving-age child who is too young for school interaction with income more than $100k,(ptype == 8) & (income_in_thousands < 100),,,-0.6186 +Full-time worker intraction with peak accessibility to all employment,(ptype == 1) * AUTOPEAKTOTAL,0.1212,, +Part-time worker interaction with peak accessibility to all employment,(ptype == 2) * AUTOPEAKTOTAL,0.2004,, +Non-working adult interaction with peak accessibility to all employment,(ptype == 4) * AUTOPEAKTOTAL,0.2314,, +Retired interaction with peak accessibility to all employment,(ptype == 5) * AUTOPEAKTOTAL,0.2792,, +Non-working adult interaction with off-peak accessibility to retail,(ptype == 4) * AUTOOFFPEAKRETAIL,,0.07207, +Retired interaction with off-peak accessibility to retail,(ptype == 5) * AUTOOFFPEAKRETAIL,,0.07207, +University student interaction with off-peak accessibility to retail,(ptype == 3) * AUTOOFFPEAKRETAIL,,0.07207, +Driving-age child who is in school interaction with off-peak accessibility to retail,(ptype == 6) * AUTOOFFPEAKRETAIL,,0.08233, +Pre-driving-age child who is in school interaction with off-peak accessibility to retail,(ptype == 7) * AUTOOFFPEAKRETAIL,,0.08233, +Pre-driving-age child who is too young for school interaction with off-peak accessibility to retail,(ptype == 8) * AUTOOFFPEAKRETAIL,,0.08233, +# Full-time worker interaction with usual work location is home,(ptype == 1) * usualWorkLocationIsHome,-1.758,,0.1813 +# Part-time worker interaction with usual work location is home,(ptype == 2) * usualWorkLocationIsHome,-1.758,,0.1813 +# Full-time worker interaction with no usual work location,(ptype == 1) * noUsualWorkLocation,-0.5935,, +# Part-time worker interaction with no usual work location,(ptype == 2) * noUsualWorkLocation,-0.5935,, +# Driving-age child who is in school interaction with no usual school location,(ptype == 6) * noUsualWorkLocation,-0.866,, +# Pre-driving age child who is in school interaction with no usual school location,(ptype == 7) * noUsualWorkLocation,-0.866,, diff --git a/example/configs/cdap_2_person.csv b/example/configs/cdap_2_person.csv new file mode 100644 index 0000000000..0fe6e4bc4b --- /dev/null +++ b/example/configs/cdap_2_person.csv @@ -0,0 +1,37 @@ +Description,Expression,Mandatory,NonMandatory,Home +Full-time worker interaction with full-time worker,interaction == '11',0.141,1.123,1.626 +Full-time worker interaction with part-time worker,"interaction in ['12', '21']",0.08845,0.4947,0.7407 +Full-time worker interaction with university student,"interaction in ['13', '31']",0.4273,0.5523,1.183 +Full-time worker interaction with non-working adult,"interaction in ['14', '41']",,0.02186,0.9436 +Full-time worker interaction with retired,"interaction in ['15', '51']",,0.3115,1.298 +Full-time worker interaction with driving-age child who is in school,"interaction in ['16', '61']",0.3842,0.4095,2.064 +Full-time worker interaction with pre-driving-age child who is in school,"interaction in ['17', '71']",0.2623,0.6008,1.501 +Full-time worker interaction with pre-driving-age child who is too young for school,"interaction in ['18', '81']",0.5118,0.751,0.9912 +Part-time worker interaction with part-time worker,interaction == '22',1.135,1.032,0.8911 +Part-time worker interaction with university student,"interaction in ['23', '32']",0.173,0.3355,1.642 +Part-time worker interaction with non-working adult,"interaction in ['24', '42']",,0.7477,0.7057 +Part-time worker interaction with retired,"interaction in ['25', '52']",,0.09831,0.463 +Part-time worker interaction with driving-age child who is in school,"interaction in ['26', '62']",1.103,0.495,3.057 +Part-time worker interaction with pre-driving-age child who is in school,"interaction in ['27', '72']",0.3079,0.8984,0.7685 +Part-time worker interaction with pre-driving-age child who is too young for school,"interaction in ['82', '82']",0.5074,1.452,1.07 +University student interaction with university student,interaction == '33',0.8726,1.054,1.018 +University student interaction with non-working adult,"interaction in ['34', '43']",,0.193,1.781 +University student interaction with retired,"interaction in ['35', '53']",,0.4065,0.4835 +University student interaction with driving-age child who is in school,"interaction in ['36', '63']",-0.0021,1.62,1.546 +University student interaction with pre-driving-age child who is in school,"interaction in ['37', '73']",0.2975,0.5165,1.552 +University student interaction with pre-driving-age child who is too young for school,"interaction in ['38', '83']",0.2254,0.8973,1.34 +Non-working adult interaction with non-working adult,interaction == '44',,0.6984,1.352 +Non-working adult interaction with retired,"interaction in ['45', '54']",,0.1864,1.209 +Non-working adult interaction with driving-age child who is in school,"interaction in ['46', '64']",,0.6801,0.5243 +Non-working adult interaction with pre-driving-age child who is in school,"interaction in ['47', '74']",,0.5646,0.8112 +Non-working adult interaction with pre-driving-age child who is too young for school,"interaction in ['48', '84']",,1.164,1.167 +Retired interaction with retired,interaction == '55',,0.7291,1.407 +Retired interaction with driving-age child who is in school,"interaction in ['56', '65']",,0.2919,0.8632 +Retired interaction with pre-driving-age child who is in school,"interaction in ['57', '75']",,0.2919,0.8632 +Retired interaction with pre-driving-age child who is too young for school,"interaction in ['58', '85']",,0.2919,0.8632 +Driving-age child who is in school interaction with driving-age child who is in school,interaction == '66',0.4794,1.512,2.198 +Driving-age child who is in school interaction with pre-driving-age child who is in school,"interaction in ['67', '76']",0.5151,1.422,0.977 +Driving-age child who is in school interaction with pre-driving-age child who is too young for school,"interaction in ['68', '86']",0.5516,1.273,1.467 +Pre-driving-age child who is in school interaction with pre-Pre-driving-age child who is in school,interaction == '77',0.9731,1.553,2.8 +Pre-driving-age child who is in school interaction with pre-driving-age child who is too young for school,"interaction in ['78', '87']",0.5961,0.6184,1.434 +Pre-driving-age child who is too young for school interaction with pre-driving-age child who is too young for school,interaction == '88',1.651,0.8771,1.378 diff --git a/example/configs/cdap_3_person.csv b/example/configs/cdap_3_person.csv new file mode 100644 index 0000000000..7e5840d9a3 --- /dev/null +++ b/example/configs/cdap_3_person.csv @@ -0,0 +1,10 @@ +Description,Expression,Mandatory,NonMandatory,Home +Full-time worker / full-time worker / full-time worker,interaction == '111',0.3133,, +Full-time worker / full-time worker / part-time worker or non-working adult,"interaction in ['112', '121', '211', '114', '141', '411']",0.3495,0.4637, +Full-time worker / part-time worker or non-working adult / part-time worker or non-working adult,"interaction in ['124', '142', '214', '412', '241', '421', '122', '144', '212', '414', '221', '441']",,0.3491,0.9573 +Full-time worker / part-time worker or non-working adult / pre-driving-age child,"interaction in ['162', '126', '216', '612', '261', '621']",,,0.2939 +Full-time worker / pre-driving-age child / pre-driving-age child,"interaction in ['166', '616', '661']",,0.3553, +Part-time worker or non-working adult / part-time worker or non-working adult / part-time worker or non-working adult,"interaction in ['222', '224', '242', '422', '424', '442', '244', '444']",,-1.386,0.9881 +Part-time worker or non-working adult / part-time worker or non-working adult / pre-driving-age child,"interaction in ['624', '642', '264', '462', '246', '426', '622', '644', '262', '464', '226', '446']",,-0.8571,0.4374 +Part-time worker or non-working adult / pre-driving-age child / pre-driving-age child,"interaction in ['662', '626', '266', '664', '646', '466']",,,0.4747 +Pre-driving-age child / pre-driving-age child / pre-driving-age child,interaction == '666',-0.3906,, diff --git a/example/configs/cdap_all_people.csv b/example/configs/cdap_all_people.csv new file mode 100644 index 0000000000..13b22cac72 --- /dev/null +++ b/example/configs/cdap_all_people.csv @@ -0,0 +1,10 @@ +Description,Expression,Mandatory,NonMandatory,Home +Three Person All M,"('Mandatory',) * 3",-0.0671,-0.3653,-1.1810 +Three Person All N,"('NonMandatory',) * 3",-0.0671,-0.3653,-1.1810 +Three Person All H,"('Home',) * 3",-0.0671,-0.3653,-1.1810 +Four Person All M,"('Mandatory',) * 4",-0.6104,-1.3460,-3.7330 +Four Person All N,"('NonMandatory',) * 4",-0.6104,-1.3460,-3.7330 +Four Person All H,"('Home',) * 4",-0.6104,-1.3460,-3.7330 +Five Person All M,"('Mandatory',) * 5",-1.5280,-3.4530,-8.6210 +Five Person All N,"('NonMandatory',) * 5",-1.5280,-3.4530,-8.6210 +Five Person All H,"('Home',) * 5",-1.5280,-3.4530,-8.6210 diff --git a/example/configs/cdap_final_rules.csv b/example/configs/cdap_final_rules.csv new file mode 100644 index 0000000000..98aced7c12 --- /dev/null +++ b/example/configs/cdap_final_rules.csv @@ -0,0 +1,3 @@ +Description,Expression,Alternative,Value +M not allowed for retired,ptype == 5,Mandatory,-999 +M not allowed for non-working,ptype == 4,Mandatory,-999 diff --git a/example/simulation.ipynb b/example/simulation.ipynb index ce8a4ba4e0..3e8ddc568b 100644 --- a/example/simulation.ipynb +++ b/example/simulation.ipynb @@ -1,7 +1,7 @@ { "metadata": { "name": "", - "signature": "sha256:939241517691379f8bdce8274305d3f99517bc098d843aa83af88483299fc4b6" + "signature": "sha256:5843fef02535f277e14c945b692abec5411cdf0980fe2342d146c176ccd4db60" }, "nbformat": 3, "nbformat_minor": 0, @@ -22,6 +22,99 @@ "outputs": [], "prompt_number": 1 }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "sim.run([\"cdap_simulate\"])" + ], + "language": "python", + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "Running model 'cdap_simulate'\n", + "Choices:\n" + ] + }, + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "Home 1225\n", + "Mandatory 704\n", + "NonMandatory 660\n", + "dtype: int64\n", + "Time to execute model 'cdap_simulate': 110.06s\n", + "Total time to execute: 110.06s\n" + ] + } + ], + "prompt_number": 2 + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "sim.run([\"school_location_simulate\"])" + ], + "language": "python", + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "Running model 'school_location_simulate'\n", + "WARNING: Some columns have no variability:\n" + ] + }, + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "['mode_choice_logsums']\n", + "WARNING: Some columns have no variability:\n" + ] + }, + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "['mode_choice_logsums']\n", + "WARNING: Some columns have no variability:\n" + ] + }, + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "['mode_choice_logsums']\n", + "Describe of choices:\n" + ] + }, + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "count 2538.000000\n", + "mean 190.435776\n", + "std 389.915126\n", + "min -1.000000\n", + "25% -1.000000\n", + "50% -1.000000\n", + "75% 69.000000\n", + "max 1450.000000\n", + "Name: TAZ, dtype: float64\n", + "Time to execute model 'school_location_simulate': 18.74s\n", + "Total time to execute: 18.74s\n" + ] + } + ], + "prompt_number": 2 + }, { "cell_type": "code", "collapsed": false, @@ -51,21 +144,21 @@ "output_type": "stream", "stream": "stdout", "text": [ - "count 2598.000000\n", - "mean 700.162433\n", - "std 449.674185\n", - "min 1.000000\n", - "25% 340.000000\n", - "50% 661.000000\n", - "75% 1123.000000\n", - "max 1448.000000\n", + "count 2538.000000\n", + "mean 332.508668\n", + "std 473.007494\n", + "min -1.000000\n", + "25% -1.000000\n", + "50% -1.000000\n", + "75% 651.000000\n", + "max 1452.000000\n", "Name: TAZ, dtype: float64\n", - "Time to execute model 'workplace_location_simulate': 18.29s\n", - "Total time to execute: 18.29s\n" + "Time to execute model 'workplace_location_simulate': 10.24s\n", + "Total time to execute: 10.24s\n" ] } ], - "prompt_number": 2 + "prompt_number": 3 }, { "cell_type": "code", @@ -80,14 +173,14 @@ "output_type": "stream", "stream": "stdout", "text": [ - "count 2598.000000\n", - "mean 38.641813\n", - "std 23.368224\n", - "min 0.420000\n", - "25% 21.250000\n", - "50% 35.270000\n", - "75% 51.875000\n", - "max 134.970000\n", + "count 2662.000000\n", + "mean 5.954801\n", + "std 5.547754\n", + "min 0.160000\n", + "25% 2.510000\n", + "50% 4.135000\n", + "75% 7.170000\n", + "max 45.990000\n", "dtype: float64\n" ] } @@ -116,21 +209,22 @@ "stream": "stdout", "text": [ "['@1' 'work_tour_auto_time_savings / (workers+1)']\n", - "Choices:\n" + "Choices:\n", + "2 402\n", + "1 319\n", + "3 151\n", + "0 88\n", + "4 40\n", + "dtype: int64" ] }, { "output_type": "stream", "stream": "stdout", "text": [ - "2 387\n", - "1 301\n", - "3 162\n", - "0 87\n", - "4 63\n", - "dtype: int64\n", - "Time to execute model 'auto_ownership_simulate': 0.39s\n", - "Total time to execute: 0.39s\n" + "\n", + "Time to execute model 'auto_ownership_simulate': 0.25s\n", + "Total time to execute: 0.25s\n" ] } ], @@ -150,7 +244,7 @@ "stream": "stdout", "text": [ "Running model 'mandatory_tour_frequency'\n", - "856 persons run for mandatory tour model" + "884 persons run for mandatory tour model" ] }, { @@ -165,11 +259,7 @@ "output_type": "stream", "stream": "stdout", "text": [ - "['(ptype == 3) & (distance_to_work < 3)'\n", - " '(ptype == 3) & (distance_to_school < 3)'\n", - " '(ptype == 7) & (distance_to_school < 3)'\n", - " '(ptype == 3) & (distance_to_work < 3 | distance_to_school < 3)'\n", - " '(ptype == 2) & nonstudent_to_school'\n", + "['(ptype == 2) & nonstudent_to_school'\n", " '(ptype == 4) & nonstudent_to_school'\n", " '(ptype == 5) & nonstudent_to_school' '(ptype == 6) & non_family'\n", " '~(workplace_taz > -1)' '~(school_taz > -1)']\n", @@ -180,14 +270,14 @@ "output_type": "stream", "stream": "stdout", "text": [ - "school1 378\n", - "work1 284\n", - "work2 109\n", - "work_and_school 70\n", - "school2 15\n", + "school1 394\n", + "work1 252\n", + "work2 146\n", + "work_and_school 73\n", + "school2 19\n", "dtype: int64\n", - "Time to execute model 'mandatory_tour_frequency': 3.55s\n", - "Total time to execute: 3.55s\n" + "Time to execute model 'mandatory_tour_frequency': 2.60s\n", + "Total time to execute: 2.60s\n" ] } ], @@ -207,8 +297,8 @@ "output_type": "pyout", "prompt_number": 6, "text": [ - "work 572\n", - "school 478\n", + "work 617\n", + "school 505\n", "dtype: int64" ] } @@ -229,7 +319,7 @@ "stream": "stdout", "text": [ "Running model 'non_mandatory_tour_frequency'\n", - "1744 persons run for non-mandatory tour model" + "1753 persons run for non-mandatory tour model" ] }, { @@ -237,7 +327,7 @@ "stream": "stdout", "text": [ "\n", - "Running segment 'driving' of size 46\n", + "Running segment 'driving' of size 52\n", "WARNING: Some columns have no variability:\n" ] }, @@ -252,12 +342,12 @@ " 'max_window*(tot_tours == 0)' 'max_window*(tot_tours == 1)'\n", " 'max_window*(tot_tours == 2)' 'max_window*(tot_tours == 3)'\n", " 'max_window*(tot_tours == 4)' 'max_window*(tot_tours > 4)'\n", - " '(car_sufficiency > 0) & (tot_tours == 1)'\n", - " '(car_sufficiency > 0) & (tot_tours == 2)'\n", - " '(car_sufficiency > 0) & (tot_tours == 3)'\n", - " '(car_sufficiency > 0) & (tot_tours == 4)'\n", - " '(car_sufficiency > 0) & (tot_tours > 4)']\n", - "Running segment 'full' of size 606" + " 'has_preschool_kid_at_home * escort'\n", + " 'has_preschool_kid_at_home * shopping'\n", + " 'has_preschool_kid_at_home * othmaint'\n", + " 'has_preschool_kid_at_home * eatout'\n", + " 'has_preschool_kid_at_home * othdiscr']\n", + "Running segment 'full' of size 600" ] }, { @@ -279,7 +369,7 @@ " 'max_window*(tot_tours == 0)' 'max_window*(tot_tours == 1)'\n", " 'max_window*(tot_tours == 2)' 'max_window*(tot_tours == 3)'\n", " 'max_window*(tot_tours == 4)' 'max_window*(tot_tours > 4)']\n", - "Running segment 'nonwork' of size 322" + "Running segment 'nonwork' of size 305" ] }, { @@ -301,7 +391,7 @@ " 'max_window*(tot_tours == 0)' 'max_window*(tot_tours == 1)'\n", " 'max_window*(tot_tours == 2)' 'max_window*(tot_tours == 3)'\n", " 'max_window*(tot_tours == 4)' 'max_window*(tot_tours > 4)']\n", - "Running segment 'part' of size 136" + "Running segment 'part' of size 109" ] }, { @@ -323,7 +413,7 @@ " 'max_window*(tot_tours == 0)' 'max_window*(tot_tours == 1)'\n", " 'max_window*(tot_tours == 2)' 'max_window*(tot_tours == 3)'\n", " 'max_window*(tot_tours == 4)' 'max_window*(tot_tours > 4)']\n", - "Running segment 'preschool' of size 139" + "Running segment 'preschool' of size 150" ] }, { @@ -345,7 +435,7 @@ " 'max_window*(tot_tours == 0)' 'max_window*(tot_tours == 1)'\n", " 'max_window*(tot_tours == 2)' 'max_window*(tot_tours == 3)'\n", " 'max_window*(tot_tours == 4)' 'max_window*(tot_tours > 4)']\n", - "Running segment 'retired' of size 203" + "Running segment 'retired' of size 192" ] }, { @@ -367,7 +457,7 @@ " 'max_window*(tot_tours == 0)' 'max_window*(tot_tours == 1)'\n", " 'max_window*(tot_tours == 2)' 'max_window*(tot_tours == 3)'\n", " 'max_window*(tot_tours == 4)' 'max_window*(tot_tours > 4)']\n", - "Running segment 'school' of size 195" + "Running segment 'school' of size 228" ] }, { @@ -388,8 +478,13 @@ " 'num_shop_j' 'num_main_j' 'num_eat_j' 'num_visi_j' 'num_disc_j'\n", " 'max_window*(tot_tours == 0)' 'max_window*(tot_tours == 1)'\n", " 'max_window*(tot_tours == 2)' 'max_window*(tot_tours == 3)'\n", - " 'max_window*(tot_tours == 4)' 'max_window*(tot_tours > 4)']\n", - "Running segment 'university' of size 97" + " 'max_window*(tot_tours == 4)' 'max_window*(tot_tours > 4)'\n", + " '(car_sufficiency > 0) & (tot_tours == 1)'\n", + " '(car_sufficiency > 0) & (tot_tours == 2)'\n", + " '(car_sufficiency > 0) & (tot_tours == 3)'\n", + " '(car_sufficiency > 0) & (tot_tours == 4)'\n", + " '(car_sufficiency > 0) & (tot_tours > 4)']\n", + "Running segment 'university' of size 117" ] }, { @@ -418,55 +513,56 @@ "output_type": "stream", "stream": "stdout", "text": [ - "0 845\n", - "16 232\n", - "1 144\n", - "8 108\n", - "32 92\n", - "4 70\n", - "2 55\n", - "64 26\n", - "17 20\n", - "20 18\n", - "5 15\n", - "24 14\n", - "9 11\n", - "48 11\n", - "18 10\n", - "33 9\n", - "49 7\n", - "12 6\n", - "40 5\n", - "56 4\n", - "34 4\n", - "80 4\n", - "37 3\n", - "81 3\n", + "0 565\n", + "16 306\n", + "1 208\n", + "8 154\n", + "32 104\n", + "2 89\n", + "4 86\n", + "64 37\n", + "17 23\n", + "24 20\n", + "48 15\n", + "9 15\n", + "5 11\n", + "36 9\n", + "20 9\n", + "80 9\n", + "40 9\n", + "33 8\n", + "65 7\n", + "25 5\n", + "18 5\n", + "34 5\n", + "10 4\n", + "68 4\n", + "6 4\n", + "21 4\n", + "72 4\n", + "12 3\n", + "52 3\n", "19 3\n", - "21 2\n", - "25 2\n", - "36 2\n", - "41 2\n", - "76 1\n", + "11 2\n", + "3 2\n", + "88 2\n", + "28 2\n", + "49 2\n", + "50 2\n", + "53 2\n", + "56 2\n", + "22 1\n", + "26 1\n", + "81 1\n", + "37 1\n", + "38 1\n", "7 1\n", - "6 1\n", - "3 1\n", - "10 1\n", - "68 1\n", - "13 1\n", - "72 1\n", - "66 1\n", - "28 1\n", - "35 1\n", - "44 1\n", - "50 1\n", - "51 1\n", - "52 1\n", - "65 1\n", - "84 1\n", + "42 1\n", + "76 1\n", + "27 1\n", "dtype: int64\n", - "Time to execute model 'non_mandatory_tour_frequency': 21.97s\n", - "Total time to execute: 21.97s\n" + "Time to execute model 'non_mandatory_tour_frequency': 17.48s\n", + "Total time to execute: 17.48s\n" ] } ], @@ -486,12 +582,12 @@ "output_type": "pyout", "prompt_number": 8, "text": [ - "shopping 335\n", - "othdiscr 227\n", - "escort 222\n", - "othmaint 158\n", - "eatout 125\n", - "social 80\n", + "shopping 418\n", + "othdiscr 296\n", + "escort 294\n", + "othmaint 226\n", + "eatout 142\n", + "social 122\n", "dtype: int64" ] } @@ -512,7 +608,7 @@ "stream": "stdout", "text": [ "Running model 'destination_choice'\n", - "Running segment 'eatout' of size 125" + "Running segment 'eatout' of size 142" ] }, { @@ -520,7 +616,7 @@ "stream": "stdout", "text": [ "\n", - "Running segment 'othdiscr' of size 227" + "Running segment 'othdiscr' of size 296" ] }, { @@ -528,7 +624,7 @@ "stream": "stdout", "text": [ "\n", - "Running segment 'othmaint' of size 158" + "Running segment 'othmaint' of size 226" ] }, { @@ -536,7 +632,7 @@ "stream": "stdout", "text": [ "\n", - "Running segment 'shopping' of size 335" + "Running segment 'shopping' of size 418" ] }, { @@ -544,7 +640,7 @@ "stream": "stdout", "text": [ "\n", - "Running segment 'social' of size 80" + "Running segment 'social' of size 122" ] }, { @@ -559,21 +655,21 @@ "output_type": "stream", "stream": "stdout", "text": [ - "count 925.000000\n", - "mean 747.828108\n", - "std 413.552200\n", + "count 1204.000000\n", + "mean 749.697674\n", + "std 424.261384\n", "min 1.000000\n", - "25% 433.000000\n", - "50% 796.000000\n", - "75% 1094.000000\n", - "max 1446.000000\n", + "25% 412.250000\n", + "50% 764.500000\n", + "75% 1137.000000\n", + "max 1452.000000\n", "Name: TAZ, dtype: float64\n", - "Time to execute model 'destination_choice': 2.45s\n", - "Total time to execute: 2.45s\n" + "Time to execute model 'destination_choice': 7.85s\n", + "Total time to execute: 7.85s\n" ] } ], - "prompt_number": 9 + "prompt_number": 20 }, { "cell_type": "code",