ActivitySim · fscottfoti · Feb 20, 2015 · Feb 24, 2015 · Feb 24, 2015 · Feb 24, 2015
diff --git a/.travis.yml b/.travis.yml
@@ -18,8 +18,9 @@ install:
 - >
   conda create -q -c synthicity -n test-environment
   python=$TRAVIS_PYTHON_VERSION
-  numpy pandas pip pytables pytest urbansim
+  numpy pandas pip pytables pytest
 - source activate test-environment
+- pip install https://github.com/synthicity/urbansim/archive/master.zip
 - pip install openmatrix
 - pip install pytest-cov coveralls pep8
 - pip install .

diff --git a/activitysim/activitysim.py b/activitysim/activitysim.py
@@ -14,13 +14,38 @@ def random_rows(df, n):
 
 def read_model_spec(fname,
                     description_name="Description",
-                    expression_name="Expression"):
+                    expression_name="Expression",
+                    stack=True):
     """
-    Read in the excel file and reformat for machines
+    Read a CSV model specification into a Pandas DataFrame.
+
+    The CSV is expected to have columns for component descriptions
+    and expressions, plus one or more alternatives.
+
+    The CSV is required to have a header with column names. For example:
+
+        Description,Expression,alt0,alt1,alt2
+
+    Parameters
+    ----------
+    fname : str
+        Name of a CSV spec file.
+    description_name : str, optional
+        Name of the column in `fname` that contains the component description.
+    expression_name : str, optional
+        Name of the column in `fname` that contains the component expression.
+
+    Returns
+    -------
+    spec : pandas.DataFrame
+        The description column is dropped from the returned data and the
+        expression values are set as the table index.
     """
-    cfg = pd.read_csv(fname)
+    cfg = pd.read_csv(fname, comment='#')
     # don't need description and set the expression to the index
-    cfg = cfg.drop(description_name, axis=1).set_index(expression_name).stack()
+    cfg = cfg.drop(description_name, axis=1).set_index(expression_name)
+    if stack:
+        cfg = cfg.stack()
     return cfg
 
 
@@ -30,6 +55,38 @@ def identity_matrix(alt_names):
                         index=alt_names)
 
 
+def eval_variables(exprs, df):
+    """
+    Evaluate a set of variable expressions from a spec in the context
+    of a given data table.
+
+    There are two kinds of supported expressions: "simple" expressions are
+    evaluated in the context of the DataFrame using DataFrame.eval.
+    This is the default type of expression.
+
+    Python expressions are evaluated in the context of this function using
+    Python's eval function. Because we use Python's eval this type of
+    expression supports more complex operations than a simple expression.
+    Python expressions are denoted by beginning with the @ character.
+    Users should take care that these expressions must result in
+    a Pandas Series.
+
+    Parameters
+    ----------
+    exprs : sequence of str
+    df : pandas.DataFrame
+
+    Returns
+    -------
+    variables : pandas.DataFrame
+        Will have the index of `df` and columns of `exprs`.
+
+    """
+    return pd.DataFrame.from_items(
+        [(e, eval(e[1:]) if e.startswith('@') else df.eval(e))
+         for e in exprs])
+
+
 def simple_simulate(choosers, alternatives, spec,
                     skims=None, skim_join_name='zone_id',
                     mult_by_alt_col=False, sample_size=None):
@@ -87,13 +144,17 @@ def simple_simulate(choosers, alternatives, spec,
                                 df[skim_join_name+"_r"])
 
     # evaluate the expressions to build the final matrix
-    vars = {}
+    vars = []
     for expr in exprs:
         if expr[0][0] == "@":
             if mult_by_alt_col:
                 expr = "({}) * df.{}".format(expr[0][1:], expr[1])
             else:
-                expr = expr[0][1:]
+                if isinstance(expr, tuple):
+                    expr = expr[0][1:]
+                else:
+                    # it's already a string, but need to remove the "@"
+                    expr = expr[1:]
             try:
                 s = eval(expr)
             except Exception as e:
@@ -103,21 +164,25 @@ def simple_simulate(choosers, alternatives, spec,
             if mult_by_alt_col:
                 expr = "({}) * {}".format(*expr)
             else:
-                expr = expr[0]
+                if isinstance(expr, tuple):
+                    expr = expr[0]
+                else:
+                    # it's already a string, which is fine
+                    pass
             try:
                 s = df.eval(expr)
             except Exception as e:
                 print "Failed with DataFrame eval:\n%s" % expr
                 raise e
-        vars[expr] = s
-        vars[expr] = vars[expr].astype('float')  # explicit cast
-    model_design = pd.DataFrame(vars, index=df.index)
+        vars.append((expr, s.astype('float')))
+    model_design = pd.DataFrame.from_items(vars)
+    model_design.index = df.index
 
     df = random_rows(model_design, min(100000, len(model_design)))\
         .describe().transpose()
     df = df[df["std"] == 0]
     if len(df):
-        print "WARNING: Describe of columns with no variability:\n", df
+        print "WARNING: Some columns have no variability:\n", df.index.values
 
     positions = mnl.mnl_simulate(
         model_design.as_matrix(),

diff --git a/activitysim/cdap/__init__.py b/activitysim/cdap/__init__.py
diff --git a/activitysim/cdap/cdap.py b/activitysim/cdap/cdap.py
@@ -0,0 +1,248 @@
+import itertools
+
+import numpy as np
+import pandas as pd
+
+from ..activitysim import eval_variables
+
+
+def make_interactions(people, hh_id_col, p_type_col):
+    """
+    Make two Pandas DataFrames associating people IDs with two
+    and three person interactions they have within their households.
+
+    Interactions are strings of numbers representing the makeup
+    of the interaction, e.g. '12' or '341'.
+
+    Note that for two-person interactions the interaction string is ordered
+    with the person from the index in the first position of the string
+    and some other person in the second position. In contrast,
+    the interaction strings for three-person interactions are not ordered.
+    The person from the index may be in any position of the string.
+
+    Parameters
+    ----------
+    people : pandas.DataFrame
+        Table of people data. Must contain at least a household ID
+        column and a categorization of person type.
+    hh_id_col : str
+        Name of the column in `people` that has their household ID.
+    p_type_col : str
+        Name of the column in `people` that contains the person type number.
+
+    Returns
+    -------
+    two_interaction : pandas.DataFrame
+        Interactions between two people. Index will be person IDs taken
+        from the index of `people`.
+        The table will have one column called `interaction`.
+    three_interaction : pandas.DataFrame
+        Interactions between three people. Index will be person IDs taken
+        from the index of `people`.
+        The table will have one column called `interaction`.
+
+    """
+    two_fmt = '{}{}'.format
+    three_fmt = '{}{}{}'.format
+    two = []
+    three = []
+
+    for hh, df in people.groupby(hh_id_col, sort=False):
+        # skip households with only one person
+        if len(df) == 1:
+            continue
+
+        ptypes = df[p_type_col]
+
+        for pA, pB in itertools.permutations(df.index, 2):
+            two.append((pA, two_fmt(*ptypes[[pA, pB]])))
+
+        # now skip households with two people
+        if len(df) == 2:
+            continue
+
+        for idx in itertools.combinations(df.index, 3):
+            combo = three_fmt(*ptypes[list(idx)])
+            three.extend((p, combo) for p in idx)
+
+    if two:
+        two_idx, two_val = zip(*two)
+    else:
+        two_idx, two_val = [], []
+
+    if three:
+        three_idx, three_val = zip(*three)
+    else:
+        three_idx, three_val = [], []
+
+    return (
+        pd.DataFrame({'interaction': two_val}, index=two_idx),
+        pd.DataFrame({'interaction': three_val}, index=three_idx))
+
+
+def individual_utilities(
+        people, hh_id_col, p_type_col, one_spec, two_spec, three_spec):
+    """
+    Calculate CDAP utilities for all individuals.
+
+    Parameters
+    ----------
+    people : pandas.DataFrame
+        DataFrame of individual people data.
+    hh_id_col : str
+        Name of the column in `people` that has their household ID.
+    p_type_col : str
+        Name of the column in `people` that contains the person type number.
+    one_spec : pandas.DataFrame
+    two_spec : pandas.DataFrame
+    three_spec : pandas.DataFrame
+
+    Returns
+    -------
+    utilities : pandas.DataFrame
+        Will have index of `people` and columns for each of the alternatives.
+
+    """
+    # calculate single person utilities
+    #     evaluate variables from one_spec expressions
+    #     multiply by one_spec alternative values
+    one_vars = eval_variables(one_spec.index, people)
+    one_utils = one_vars.dot(one_spec)
+
+    # make two- and three-person interactions
+    two_int, three_int = make_interactions(people, hh_id_col, p_type_col)
+
+    # calculate two-interaction utilities
+    #     evaluate variables from two_spec expressions
+    #     multiply by two_spec alternative values
+    #     groupby person and sum
+    two_vars = eval_variables(two_spec.index, two_int)
+    two_utils = two_vars.dot(two_spec).groupby(level=0).sum()
+
+    # calculate three-interaction utilities
+    #     evaluate variables from three_spec expressions
+    #     multiply by three_spec alternative values
+    #     groupby person and sum
+    three_vars = eval_variables(three_spec.index, three_int)
+    three_utils = three_vars.dot(three_spec).groupby(level=0).sum()
+
+    # add one-, two-, and three-person utilities
+    utils = one_utils.add(
+        two_utils, fill_value=0).add(three_utils, fill_value=0)
+
+    return utils
+
+
+def initial_household_utilities(utilities, people, hh_id_col):
+    """
+    Create initial household utilities by grouping and summing utilities
+    from individual household members.
+
+    Parameters
+    ----------
+    utilities : pandas.DataFrame
+        Should have the index of `people` and columns for each alternative.
+    people : pandas.DataFrame
+        DataFrame of individual people data.
+    hh_id_col : str
+        Name of the column in `people` that has their household ID.
+
+    Returns
+    -------
+    hh_util : dict of pandas.Series
+        Keys will be household IDs and values will be Series
+        mapping alternative choices to their utility.
+
+    """
+    hh_util = {}
+
+    alts = utilities.columns
+
+    for hh_id, df in people.groupby(hh_id_col, sort=False):
+        utils = utilities.loc[df.index]
+        hh = []
+
+        for combo in itertools.product(alts, repeat=len(df)):
+            hh.append(
+                (combo, utils.lookup(df.index, combo).sum()))
+
+        idx, u = zip(*hh)
+        hh_util[hh_id] = pd.Series(u, index=idx)
+
+    return hh_util
+
+
+def apply_final_rules(hh_util, people, hh_id_col, final_rules):
+    """
+    Final rules can be used to set the utility values for certain
+    household alternatives. Often they are set to zero to reflect
+    the unavailability of certain alternatives to certain types of people.
+
+    This modifies the `hh_util` data inplace.
+
+    Parameters
+    ----------
+    hh_util : dict of pandas.Series
+        Keys will be household IDs and values will be Series
+        mapping alternative choices to their utility.
+    people : pandas.DataFrame
+        DataFrame of individual people data.
+    hh_id_col : str
+        Name of the column in `people` that has their household ID.
+    final_rules : pandas.DataFrame
+        This table must have an index of expressions that can be used
+        to filter the `people` table. It must have two columns:
+        the first must have the name of the alternative to which the rule
+        applies, and the second must have the value of the utility for that
+        alternative. The names of the columns is not important, but
+        the order is.
+
+    """
+    rule_mask = eval_variables(final_rules.index, people)
+
+    for hh_id, df in people.groupby(hh_id_col, sort=False):
+        mask = rule_mask.loc[df.index]
+        utils = hh_util[hh_id]
+
+        for exp, row in final_rules.iterrows():
+            m = mask[exp].as_matrix()
+
+            # this crazy business combines three things to figure out
+            # which household alternatives need to be modified by this rule.
+            # the three things are:
+            # - the mask of people for whom the rule expression is true (m)
+            # - the individual alternative to which the rule applies
+            #   (row.iloc[0])
+            # - the alternative combinations for the household (combo)
+            app = [
+                ((np.array([row.iloc[0]] * len(utils.index[0])) == combo) & m
+                 ).any()
+                for combo in utils.index]
+
+            utils[app] = row.iloc[1]
+
+
+def apply_all_people(hh_util, all_people):
+    """
+    Apply utility adjustments to household alternatives.
+
+    This modifies the `hh_util` data inplace.
+
+    Parameters
+    ----------
+    hh_util : dict of pandas.Series
+        Keys will be household IDs and values will be Series
+        mapping alternative choices to their utility.
+    all_people : pandas.DataFrame
+        Adjustments to household alternatives, with alternatives in the
+        index and the adjustment values in the first column.
+        Index should be household alternatives in the form of tuples
+        containing individual alternatives, e.g.
+        ('Mandatory', 'Mandatory', 'Mandatory'), where 'Mandatory' is
+        one of the alternatives available to individual household members.
+        Note that these may also be expressed as Python code to save space,
+        so the previous could also be written as ('Mandatory',) * 3.
+
+    """
+    # evaluate all the expressions in the all_people index
+    all_people.index = [eval(x) for x in all_people.index]
diff --git a/activitysim/cdap/tests/__init__.py b/activitysim/cdap/tests/__init__.py