diff --git a/activitysim/abm/models/atwork_subtour_destination.py b/activitysim/abm/models/atwork_subtour_destination.py index d0d7fd9916..1c313a0896 100644 --- a/activitysim/abm/models/atwork_subtour_destination.py +++ b/activitysim/abm/models/atwork_subtour_destination.py @@ -7,7 +7,7 @@ import pandas as pd from activitysim.abm.models.util import tour_destination -from activitysim.core import config, estimation, los, tracing, workflow +from activitysim.core import config, estimation, los, tracing, workflow, expressions from activitysim.core.configuration.logit import TourLocationComponentSettings from activitysim.core.util import assign_in_place @@ -120,3 +120,11 @@ def atwork_subtour_destination( state.tracing.trace_df( tours, label="atwork_subtour_destination", columns=["destination"] ) + + expressions.annotate_tables( + state, + locals_dict={}, + skims=None, + model_settings=model_settings, + trace_label=trace_label, + ) diff --git a/activitysim/abm/models/atwork_subtour_frequency.py b/activitysim/abm/models/atwork_subtour_frequency.py index 3483554432..ca7ae6d259 100644 --- a/activitysim/abm/models/atwork_subtour_frequency.py +++ b/activitysim/abm/models/atwork_subtour_frequency.py @@ -37,8 +37,8 @@ class AtworkSubtourFrequencySettings(LogitComponentSettings, extra="forbid"): Settings for the `atwork_subtour_frequency` component. """ - preprocessor: PreprocessorSettings | None = None - """Setting for the preprocessor.""" + # no additional fields are required for this component + pass @workflow.step @@ -92,15 +92,15 @@ def atwork_subtour_frequency( nest_spec = config.get_logit_model_settings(model_settings) constants = config.get_model_constants(model_settings) - # - preprocessor - preprocessor_settings = model_settings.preprocessor - if preprocessor_settings: - expressions.assign_columns( - state, - df=work_tours, - model_settings=preprocessor_settings, - trace_label=trace_label, - ) + # preprocess choosers + expressions.annotate_preprocessors( + state, + df=work_tours, + locals_dict=constants, + skims=None, + model_settings=model_settings, + trace_label=trace_label, + ) if estimator: estimator.write_spec(model_settings) @@ -164,3 +164,11 @@ def atwork_subtour_frequency( if trace_hh_id: state.tracing.trace_df(tours, label="atwork_subtour_frequency.tours") + + expressions.annotate_tables( + state, + locals_dict=constants, + skims=None, + model_settings=model_settings, + trace_label=trace_label, + ) diff --git a/activitysim/abm/models/atwork_subtour_mode_choice.py b/activitysim/abm/models/atwork_subtour_mode_choice.py index 4e1949dc06..b9889aee72 100644 --- a/activitysim/abm/models/atwork_subtour_mode_choice.py +++ b/activitysim/abm/models/atwork_subtour_mode_choice.py @@ -195,17 +195,6 @@ def atwork_subtour_mode_choice( ) state.add_table("tours", tours) - # - annotate tours table - if model_settings.annotate_tours: - tours = state.get_dataframe("tours") - expressions.assign_columns( - state, - df=tours, - model_settings=model_settings.annotate_tours, - trace_label=tracing.extend_trace_label(trace_label, "annotate_tours"), - ) - state.add_table("tours", tours) - if trace_hh_id: state.tracing.trace_df( tours[tours.tour_category == "atwork"], @@ -213,3 +202,11 @@ def atwork_subtour_mode_choice( slicer="tour_id", index_label="tour_id", ) + + expressions.annotate_tables( + state, + locals_dict=constants, + skims=skims, + model_settings=model_settings, + trace_label=trace_label, + ) diff --git a/activitysim/abm/models/atwork_subtour_scheduling.py b/activitysim/abm/models/atwork_subtour_scheduling.py index fff94ef30f..2ad67ff22a 100644 --- a/activitysim/abm/models/atwork_subtour_scheduling.py +++ b/activitysim/abm/models/atwork_subtour_scheduling.py @@ -141,3 +141,11 @@ def atwork_subtour_scheduling( trace_label, "tour_map", ) + + expressions.annotate_tables( + state, + locals_dict=constants, + skims=skims, + model_settings=model_settings, + trace_label=trace_label, + ) diff --git a/activitysim/abm/models/auto_ownership.py b/activitysim/abm/models/auto_ownership.py index a66ce763a7..fe6a472abb 100644 --- a/activitysim/abm/models/auto_ownership.py +++ b/activitysim/abm/models/auto_ownership.py @@ -18,8 +18,6 @@ from activitysim.core.configuration.base import PreprocessorSettings, PydanticReadable from activitysim.core.configuration.logit import LogitComponentSettings -from .util import annotate - logger = logging.getLogger(__name__) @@ -28,8 +26,8 @@ class AutoOwnershipSettings(LogitComponentSettings, extra="forbid"): Settings for the `auto_ownership` component. """ - preprocessor: PreprocessorSettings | None = None - annotate_households: PreprocessorSettings | None = None + # no additional fields are required for this component + pass @workflow.step @@ -69,20 +67,14 @@ def auto_ownership_simulate( logger.info("Running %s with %d households", trace_label, len(choosers)) - # - preprocessor - preprocessor_settings = model_settings.preprocessor - if preprocessor_settings: - locals_d = {} - if constants is not None: - locals_d.update(constants) - - expressions.assign_columns( - state, - df=choosers, - model_settings=preprocessor_settings, - locals_dict=locals_d, - trace_label=trace_label, - ) + expressions.annotate_preprocessors( + state, + df=choosers, + locals_dict=constants, + skims=None, + model_settings=model_settings, + trace_label=trace_label, + ) if estimator: estimator.write_model_settings(model_settings, model_settings_file_name) @@ -120,8 +112,13 @@ def auto_ownership_simulate( "auto_ownership", households.auto_ownership, value_counts=True ) - if model_settings.annotate_households: - annotate.annotate_households(state, model_settings, trace_label) - if trace_hh_id: state.tracing.trace_df(households, label="auto_ownership", warn_if_empty=True) + + expressions.annotate_tables( + state, + locals_dict=constants, + skims=None, + model_settings=model_settings, + trace_label=trace_label, + ) diff --git a/activitysim/abm/models/cdap.py b/activitysim/abm/models/cdap.py index 6776c06c7f..f8632da09f 100644 --- a/activitysim/abm/models/cdap.py +++ b/activitysim/abm/models/cdap.py @@ -36,12 +36,17 @@ class CdapSettings(PydanticReadable, extra="forbid"): JOINT_TOUR_COEFFICIENTS: str = "cdap_joint_tour_coefficients.csv" JOINT_TOUR_USEFUL_COLUMNS: list[str] | None = None """Columns to include from the persons table that will be need to calculate household joint tour utility.""" - annotate_persons: PreprocessorSettings | None = None - annotate_households: PreprocessorSettings | None = None COEFFICIENTS: Path CONSTANTS: dict[str, Any] = {} compute_settings: ComputeSettings | None = None + preprocessor: PreprocessorSettings | None = None + """Preprocess choosers tables before running the model.""" + annotate_persons: PreprocessorSettings | None = None + """Postprocess persons table after model completion.""" + annotate_households: PreprocessorSettings | None = None + """Postprocess households table after model completion.""" + @workflow.step def cdap_simulate( @@ -171,6 +176,16 @@ def cdap_simulate( index=True, ) + # preprocess choosers + expressions.annotate_preprocessors( + state, + df=persons_merged, + locals_dict=constants, + skims=None, + model_settings=model_settings, + trace_label=trace_label, + ) + if estimator: estimator.write_model_settings(model_settings, "cdap.yaml") estimator.write_spec(model_settings, tag="INDIV_AND_HHSIZE1_SPEC") @@ -241,14 +256,6 @@ def cdap_simulate( cap_cat_type = pd.api.types.CategoricalDtype(["", "M", "N", "H"], ordered=False) choices = choices.astype(cap_cat_type) persons["cdap_activity"] = choices - - expressions.assign_columns( - state, - df=persons, - model_settings=model_settings.annotate_persons, - trace_label=tracing.extend_trace_label(trace_label, "annotate_persons"), - ) - state.add_table("persons", persons) # - annotate households table @@ -256,12 +263,6 @@ def cdap_simulate( hh_joint = hh_joint.reindex(households.index) households["has_joint_tour"] = hh_joint - expressions.assign_columns( - state, - df=households, - model_settings=model_settings.annotate_households, - trace_label=tracing.extend_trace_label(trace_label, "annotate_households"), - ) state.add_table("households", households) tracing.print_summary("cdap_activity", persons.cdap_activity, value_counts=True) @@ -269,3 +270,11 @@ def cdap_simulate( "cdap crosstabs:\n%s" % pd.crosstab(persons.ptype, persons.cdap_activity, margins=True) ) + + expressions.annotate_tables( + state, + locals_dict=constants, + skims=None, + model_settings=model_settings, + trace_label=trace_label, + ) diff --git a/activitysim/abm/models/free_parking.py b/activitysim/abm/models/free_parking.py index 9aa2800a67..ebcecd248a 100644 --- a/activitysim/abm/models/free_parking.py +++ b/activitysim/abm/models/free_parking.py @@ -26,9 +26,6 @@ class FreeParkingSettings(LogitComponentSettings, extra="forbid"): Settings for the `free_parking` component. """ - preprocessor: PreprocessorSettings | None = None - """Setting for the preprocessor.""" - FREE_PARKING_ALT: int """The code for free parking.""" @@ -78,21 +75,6 @@ def free_parking( constants = model_settings.CONSTANTS or {} - # - preprocessor - preprocessor_settings = model_settings.preprocessor - if preprocessor_settings: - locals_d = {} - if constants is not None: - locals_d.update(constants) - - expressions.assign_columns( - state, - df=choosers, - model_settings=preprocessor_settings, - locals_dict=locals_d, - trace_label=trace_label, - ) - model_spec = state.filesystem.read_model_spec(file_name=model_settings.SPEC) coefficients_df = state.filesystem.read_model_coefficients(model_settings) model_spec = simulate.eval_coefficients( @@ -101,6 +83,15 @@ def free_parking( nest_spec = config.get_logit_model_settings(model_settings) + expressions.annotate_preprocessors( + state, + df=choosers, + locals_dict=constants, + skims=None, + model_settings=model_settings, + trace_label=trace_label, + ) + if estimator: estimator.write_model_settings(model_settings, model_settings_file_name) estimator.write_spec(file_name=model_settings.SPEC) @@ -144,3 +135,11 @@ def free_parking( if state.settings.trace_hh_id: state.tracing.trace_df(persons, label=trace_label, warn_if_empty=True) + + expressions.annotate_tables( + state, + locals_dict=constants, + skims=None, + model_settings=model_settings, + trace_label=trace_label, + ) diff --git a/activitysim/abm/models/joint_tour_composition.py b/activitysim/abm/models/joint_tour_composition.py index ee4ac3a69c..1c620e0709 100644 --- a/activitysim/abm/models/joint_tour_composition.py +++ b/activitysim/abm/models/joint_tour_composition.py @@ -18,6 +18,7 @@ from activitysim.core.configuration.base import PreprocessorSettings from activitysim.core.configuration.logit import LogitComponentSettings + logger = logging.getLogger(__name__) @@ -36,8 +37,7 @@ class JointTourCompositionSettings(LogitComponentSettings, extra="forbid"): Settings for the `joint_tour_composition` component. """ - preprocessor: PreprocessorSettings | None = None - """Setting for the preprocessor.""" + pass @workflow.step @@ -78,26 +78,6 @@ def joint_tour_composition( "Running joint_tour_composition with %d joint tours" % joint_tours.shape[0] ) - # - run preprocessor - preprocessor_settings = model_settings.preprocessor - if preprocessor_settings: - locals_dict = { - "persons": persons, - "hh_time_window_overlap": lambda *x: hh_time_window_overlap(state, *x), - } - - expressions.assign_columns( - state, - df=households, - model_settings=preprocessor_settings, - locals_dict=locals_dict, - trace_label=trace_label, - ) - - joint_tours_merged = pd.merge( - joint_tours, households, left_on="household_id", right_index=True, how="left" - ) - # - simple_simulate model_spec = state.filesystem.read_model_spec(file_name=model_settings.SPEC) coefficients_df = state.filesystem.read_model_coefficients(model_settings) @@ -108,6 +88,25 @@ def joint_tour_composition( nest_spec = config.get_logit_model_settings(model_settings) constants = config.get_model_constants(model_settings) + locals_dict = { + "persons": persons, + "hh_time_window_overlap": lambda *x: hh_time_window_overlap(state, *x), + } + locals_dict.update(constants) + + expressions.annotate_preprocessors( + state, + df=households, + locals_dict=locals_dict, + skims=None, + model_settings=model_settings, + trace_label=trace_label, + ) + + joint_tours_merged = pd.merge( + joint_tours, households, left_on="household_id", right_index=True, how="left" + ) + if estimator: estimator.write_spec(model_settings) estimator.write_model_settings(model_settings, model_settings_file_name) @@ -156,3 +155,11 @@ def joint_tour_composition( label="joint_tour_composition.joint_tours", slicer="household_id", ) + + expressions.annotate_tables( + state, + locals_dict=locals_dict, + skims=None, + model_settings=model_settings, + trace_label=trace_label, + ) diff --git a/activitysim/abm/models/joint_tour_destination.py b/activitysim/abm/models/joint_tour_destination.py index cd6c2fed01..29b117821b 100644 --- a/activitysim/abm/models/joint_tour_destination.py +++ b/activitysim/abm/models/joint_tour_destination.py @@ -7,10 +7,11 @@ import pandas as pd from activitysim.abm.models.util import tour_destination -from activitysim.core import estimation, los, tracing, workflow +from activitysim.core import estimation, los, tracing, workflow, expressions from activitysim.core.configuration.logit import TourLocationComponentSettings from activitysim.core.util import assign_in_place + logger = logging.getLogger(__name__) @@ -113,3 +114,11 @@ def joint_tour_destination( if trace_hh_id: state.tracing.trace_df(joint_tours, label="joint_tour_destination.joint_tours") + + expressions.annotate_tables( + state, + locals_dict={}, + skims=None, + model_settings=model_settings, + trace_label=trace_label, + ) diff --git a/activitysim/abm/models/joint_tour_frequency.py b/activitysim/abm/models/joint_tour_frequency.py index 1700c143b0..1b9fde94ba 100644 --- a/activitysim/abm/models/joint_tour_frequency.py +++ b/activitysim/abm/models/joint_tour_frequency.py @@ -25,11 +25,11 @@ class JointTourFrequencySettings(LogitComponentSettings, extra="forbid"): """ - Settings for the `free_parking` component. + Settings for the `joint_tour_frequency` component. """ - preprocessor: PreprocessorSettings | None = None - """Setting for the preprocessor.""" + # no additional settings are required for this model + pass @workflow.step @@ -72,22 +72,6 @@ def joint_tour_frequency( % multi_person_households.shape[0] ) - # - preprocessor - preprocessor_settings = model_settings.preprocessor - if preprocessor_settings: - locals_dict = { - "persons": persons, - "hh_time_window_overlap": lambda *x: hh_time_window_overlap(state, *x), - } - - expressions.assign_columns( - state, - df=multi_person_households, - model_settings=preprocessor_settings, - locals_dict=locals_dict, - trace_label=trace_label, - ) - model_spec = state.filesystem.read_model_spec(file_name=model_settings.SPEC) coefficients_df = state.filesystem.read_model_coefficients(model_settings) model_spec = simulate.eval_coefficients( @@ -97,6 +81,22 @@ def joint_tour_frequency( nest_spec = config.get_logit_model_settings(model_settings) constants = config.get_model_constants(model_settings) + # - preprocess choosers table + locals_dict = { + "persons": persons, + "hh_time_window_overlap": lambda *x: hh_time_window_overlap(state, *x), + } + locals_dict.update(constants) + + expressions.annotate_preprocessors( + state, + df=multi_person_households, + locals_dict=locals_dict, + skims=None, + model_settings=model_settings, + trace_label=trace_label, + ) + if estimator: estimator.write_spec(model_settings) estimator.write_model_settings(model_settings, model_settings_file_name) @@ -205,3 +205,11 @@ def joint_tour_frequency( print(f"tours_not_in_survey_tours\n{tours_not_in_survey_tours}") different = True assert not different + + expressions.annotate_tables( + state, + locals_dict=locals_dict, + skims=None, + model_settings=model_settings, + trace_label=trace_label, + ) diff --git a/activitysim/abm/models/joint_tour_frequency_composition.py b/activitysim/abm/models/joint_tour_frequency_composition.py index 553b280fe7..b9f801eb5e 100644 --- a/activitysim/abm/models/joint_tour_frequency_composition.py +++ b/activitysim/abm/models/joint_tour_frequency_composition.py @@ -22,6 +22,7 @@ ) from activitysim.core.interaction_simulate import interaction_simulate + logger = logging.getLogger(__name__) @@ -58,37 +59,6 @@ def joint_tour_frequency_composition( logger.info("Running %s with %d households", trace_label, len(choosers)) - # alt preprocessor - alt_preprocessor_settings = model_settings.ALTS_PREPROCESSOR - if alt_preprocessor_settings: - locals_dict = {} - - alts = alts.copy() - - expressions.assign_columns( - state, - df=alts, - model_settings=alt_preprocessor_settings, - locals_dict=locals_dict, - trace_label=trace_label, - ) - - # - preprocessor - preprocessor_settings = model_settings.preprocessor - if preprocessor_settings: - locals_dict = { - "persons": persons, - "hh_time_window_overlap": lambda *x: hh_time_window_overlap(state, *x), - } - - expressions.assign_columns( - state, - df=choosers, - model_settings=preprocessor_settings, - locals_dict=locals_dict, - trace_label=trace_label, - ) - estimator = estimation.manager.begin_estimation( state, "joint_tour_frequency_composition" ) @@ -101,6 +71,32 @@ def joint_tour_frequency_composition( constants = config.get_model_constants(model_settings) + # preprocess choosers table + locals_dict = { + "persons": persons, + "hh_time_window_overlap": lambda *x: hh_time_window_overlap(state, *x), + } + locals_dict.update(constants) + expressions.annotate_preprocessors( + state, + df=choosers, + locals_dict=locals_dict, + skims=None, + model_settings=model_settings, + trace_label=trace_label, + ) + + # preprocess alternatives table + expressions.annotate_preprocessors( + state, + df=alts, + locals_dict=constants, + skims=None, + model_settings=model_settings, + trace_label=trace_label, + preprocessor_setting_name="ALTS_PREPROCESSOR", + ) + if estimator: estimator.write_spec(model_settings) estimator.write_model_settings(model_settings, model_settings_file_name) @@ -221,3 +217,11 @@ def joint_tour_frequency_composition( label="joint_tour_frequency_composition.joint_tours", slicer="household_id", ) + + expressions.annotate_tables( + state, + locals_dict=locals_dict, + skims=None, + model_settings=model_settings, + trace_label=trace_label, + ) diff --git a/activitysim/abm/models/joint_tour_participation.py b/activitysim/abm/models/joint_tour_participation.py index 55d5367b3c..4e834fc628 100644 --- a/activitysim/abm/models/joint_tour_participation.py +++ b/activitysim/abm/models/joint_tour_participation.py @@ -274,22 +274,6 @@ def participants_chooser( return choices, rands -def annotate_jtp( - state: workflow.State, - model_settings: JointTourParticipationSettings, - trace_label: str, -): - # - annotate persons - persons = state.get_dataframe("persons") - expressions.assign_columns( - state, - df=persons, - model_settings=model_settings.annotate_persons, - trace_label=tracing.extend_trace_label(trace_label, "annotate_persons"), - ) - state.add_table("persons", persons) - - def add_null_results( state: workflow.State, model_settings: JointTourParticipationSettings, @@ -305,7 +289,13 @@ def add_null_results( state.add_table("joint_tour_participants", participants) # - run annotations - annotate_jtp(state, model_settings, trace_label) + expressions.annotate_tables( + state, + locals_dict={}, + skims=None, + model_settings=model_settings, + trace_label=trace_label, + ) class JointTourParticipationSettings(LogitComponentSettings, extra="forbid"): @@ -313,12 +303,6 @@ class JointTourParticipationSettings(LogitComponentSettings, extra="forbid"): Settings for the `joint_tour_participation` component. """ - preprocessor: PreprocessorSettings | None = None - """Setting for the preprocessor.""" - - annotate_persons: PreprocessorSettings | None = None - """Instructions for annotating the persons table.""" - participation_choice: str = "participate" max_participation_choice_iterations: int = 5000 @@ -362,25 +346,6 @@ def joint_tour_participation( "Running joint_tours_participation with %d potential participants (candidates)" % candidates.shape[0] ) - - # - preprocessor - preprocessor_settings = model_settings.preprocessor - if preprocessor_settings: - locals_dict = { - "person_time_window_overlap": lambda x: person_time_window_overlap( - state, x - ), - "persons": persons_merged, - } - - expressions.assign_columns( - state, - df=candidates, - model_settings=preprocessor_settings, - locals_dict=locals_dict, - trace_label=trace_label, - ) - # - simple_simulate estimator = estimation.manager.begin_estimation(state, "joint_tour_participation") @@ -394,6 +359,21 @@ def joint_tour_participation( nest_spec = config.get_logit_model_settings(model_settings) constants = config.get_model_constants(model_settings) + # preprocess choosers table + locals_dict = { + "persons": persons_merged, + "person_time_window_overlap": lambda x: person_time_window_overlap(state, x), + } + locals_dict.update(constants) + expressions.annotate_preprocessors( + state, + df=candidates, + locals_dict=locals_dict, + skims=None, + model_settings=model_settings, + trace_label=trace_label, + ) + if estimator: estimator.write_model_settings(model_settings, model_settings_file_name) estimator.write_spec(model_settings) @@ -498,9 +478,6 @@ def joint_tour_participation( state.add_table("tours", tours) - # - run annotations - annotate_jtp(state, model_settings, trace_label) - if trace_hh_id: state.tracing.trace_df( participants, label="joint_tour_participation.participants" @@ -509,3 +486,11 @@ def joint_tour_participation( state.tracing.trace_df( joint_tours, label="joint_tour_participation.joint_tours" ) + + expressions.annotate_tables( + state, + locals_dict=locals_dict, + skims=None, + model_settings=model_settings, + trace_label=trace_label, + ) diff --git a/activitysim/abm/models/joint_tour_scheduling.py b/activitysim/abm/models/joint_tour_scheduling.py index 9bdcbe146f..791dd7aaf4 100644 --- a/activitysim/abm/models/joint_tour_scheduling.py +++ b/activitysim/abm/models/joint_tour_scheduling.py @@ -22,20 +22,8 @@ from activitysim.core.configuration.logit import LogitComponentSettings from activitysim.core.util import assign_in_place, reindex -logger = logging.getLogger(__name__) - -# class JointTourSchedulingSettings(LogitComponentSettings, extra="forbid"): -# """ -# Settings for the `joint_tour_scheduling` component. -# """ -# -# preprocessor: PreprocessorSettings | None = None -# """Setting for the preprocessor.""" -# -# sharrow_skip: bool = False -# """Setting to skip sharrow""" -# +logger = logging.getLogger(__name__) @workflow.step @@ -85,21 +73,18 @@ def joint_tour_scheduling( constants = config.get_model_constants(model_settings) # - run preprocessor to annotate choosers - preprocessor_settings = model_settings.preprocessor - if preprocessor_settings: - locals_d = {} - if constants is not None: - locals_d.update(constants) - - expressions.assign_columns( - state, - df=joint_tours, - model_settings=preprocessor_settings, - locals_dict=locals_d, - trace_label=trace_label, - ) - timetable = state.get_injectable("timetable") + locals_d = {"timetable": timetable} + locals_d.update(constants) + + expressions.annotate_preprocessors( + state, + df=joint_tours, + locals_dict=locals_d, + skims=None, + model_settings=model_settings, + trace_label=trace_label, + ) estimator = estimation.manager.begin_estimation(state, "joint_tour_scheduling") @@ -172,3 +157,11 @@ def joint_tour_scheduling( state.tracing.trace_df( joint_tours, label="joint_tour_scheduling", slicer="household_id" ) + + expressions.annotate_tables( + state, + locals_dict=locals_d, + skims=None, + model_settings=model_settings, + trace_label=trace_label, + ) diff --git a/activitysim/abm/models/location_choice.py b/activitysim/abm/models/location_choice.py index 83e794b2be..f634bb3fdb 100644 --- a/activitysim/abm/models/location_choice.py +++ b/activitysim/abm/models/location_choice.py @@ -163,6 +163,27 @@ def _location_sample( } locals_d.update(model_settings.CONSTANTS or {}) + # preprocess choosers table + expressions.annotate_preprocessors( + state, + df=choosers, + locals_dict=locals_d, + skims=skims, + model_settings=model_settings, + trace_label=trace_label, + ) + + # preprocess alternatives table + expressions.annotate_preprocessors( + state, + df=alternatives, + locals_dict=locals_d, + skims=None, + model_settings=model_settings, + trace_label=trace_label, + preprocessor_setting_name="alts_preprocessor_sample", + ) + spec = simulate.spec_for_segment( state, None, @@ -664,6 +685,27 @@ def run_location_simulate( } locals_d.update(model_settings.CONSTANTS or {}) + # preprocess choosers table + expressions.annotate_preprocessors( + state, + df=choosers, + locals_dict=locals_d, + skims=None, # skims included in locals_d + model_settings=model_settings, + trace_label=trace_label, + ) + + # preprocess alternatives table + expressions.annotate_preprocessors( + state, + df=alternatives, + locals_dict=locals_d, + skims=None, + model_settings=model_settings, + trace_label=trace_label, + preprocessor_setting_name="alts_preprocessor_sample", + ) + if estimator: # write choosers after annotation estimator.write_choosers(choosers) @@ -1117,33 +1159,18 @@ def iterate_location_choice( ) state.extend_table(sample_table_name, save_sample_df) - # - annotate persons table - if model_settings.annotate_persons: - expressions.assign_columns( - state, - df=persons_df, - model_settings=model_settings.annotate_persons, - trace_label=tracing.extend_trace_label(trace_label, "annotate_persons"), - ) - - state.add_table("persons", persons_df) + state.add_table("persons", persons_df) - if state.settings.trace_hh_id: - state.tracing.trace_df(persons_df, label=trace_label, warn_if_empty=True) + if state.settings.trace_hh_id: + state.tracing.trace_df(persons_df, label=trace_label, warn_if_empty=True) - # - annotate households table - if model_settings.annotate_households: - households_df = households - expressions.assign_columns( - state, - df=households_df, - model_settings=model_settings.annotate_households, - trace_label=tracing.extend_trace_label(trace_label, "annotate_households"), - ) - state.add_table("households", households_df) - - if state.settings.trace_hh_id: - state.tracing.trace_df(households_df, label=trace_label, warn_if_empty=True) + expressions.annotate_tables( + state, + locals_dict={}, + skims=None, + model_settings=model_settings, + trace_label=trace_label, + ) if dc_logsum_column_name: tracing.print_summary( diff --git a/activitysim/abm/models/mandatory_scheduling.py b/activitysim/abm/models/mandatory_scheduling.py index 64fc26215f..a8cb46ecca 100644 --- a/activitysim/abm/models/mandatory_scheduling.py +++ b/activitysim/abm/models/mandatory_scheduling.py @@ -8,8 +8,9 @@ from activitysim.abm.models.util.tour_scheduling import run_tour_scheduling from activitysim.core import timetable as tt -from activitysim.core import tracing, workflow +from activitysim.core import tracing, workflow, expressions from activitysim.core.util import assign_in_place, reindex +from activitysim.abm.models.util.vectorize_tour_scheduling import TourSchedulingSettings logger = logging.getLogger(__name__) @@ -30,6 +31,12 @@ def mandatory_tour_scheduling( model_name = "mandatory_tour_scheduling" trace_label = model_name + model_settings = TourSchedulingSettings.read_settings_file( + state.filesystem, + f"{model_name}.yaml", + mandatory=False, + ) + mandatory_tours = tours[tours.tour_category == "mandatory"] # - if no mandatory_tours @@ -55,11 +62,12 @@ def mandatory_tour_scheduling( choices = run_tour_scheduling( state, - model_name, + model_settings, mandatory_tours, persons_merged, tdd_alts, tour_segment_col, + trace_label, ) assign_in_place( @@ -86,3 +94,11 @@ def mandatory_tour_scheduling( columns=None, warn_if_empty=True, ) + + expressions.annotate_tables( + state, + locals_dict={}, + skims=None, + model_settings=model_settings, + trace_label=trace_label, + ) diff --git a/activitysim/abm/models/mandatory_tour_frequency.py b/activitysim/abm/models/mandatory_tour_frequency.py index 8ab69710f1..41a9806748 100644 --- a/activitysim/abm/models/mandatory_tour_frequency.py +++ b/activitysim/abm/models/mandatory_tour_frequency.py @@ -58,10 +58,8 @@ class MandatoryTourFrequencySettings(LogitComponentSettings, extra="forbid"): Settings for the `mandatory_tour_frequency` component. """ - preprocessor: PreprocessorSettings | None = None - """Setting for the preprocessor.""" - - annotate_persons: PreprocessorSettings | None = None + # no additional fields are required for this component + pass @workflow.step @@ -95,19 +93,6 @@ def mandatory_tour_frequency( add_null_results(state, trace_label, model_settings) return - # - preprocessor - preprocessor_settings = model_settings.preprocessor - if preprocessor_settings: - locals_dict = {} - - expressions.assign_columns( - state, - df=choosers, - model_settings=preprocessor_settings, - locals_dict=locals_dict, - trace_label=trace_label, - ) - estimator = estimation.manager.begin_estimation(state, "mandatory_tour_frequency") model_spec = state.filesystem.read_model_spec(file_name=model_settings.SPEC) @@ -119,6 +104,16 @@ def mandatory_tour_frequency( nest_spec = config.get_logit_model_settings(model_settings) constants = config.get_model_constants(model_settings) + # - preprocessor + expressions.annotate_preprocessors( + state, + df=choosers, + locals_dict=constants, + skims=None, + model_settings=model_settings, + trace_label=trace_label, + ) + if estimator: estimator.write_spec(model_settings) estimator.write_model_settings(model_settings, model_settings_file_name) @@ -183,13 +178,6 @@ def mandatory_tour_frequency( # need to reindex as we only handled persons with cdap_activity == 'M' persons["mandatory_tour_frequency"] = choices.reindex(persons.index).fillna("") - expressions.assign_columns( - state, - df=persons, - model_settings=model_settings.annotate_persons, - trace_label=tracing.extend_trace_label(trace_label, "annotate_persons"), - ) - state.add_table("persons", persons) tracing.print_summary( @@ -206,3 +194,11 @@ def mandatory_tour_frequency( state.tracing.trace_df( persons, label="mandatory_tour_frequency.persons", warn_if_empty=True ) + + expressions.annotate_tables( + state, + locals_dict=constants, + skims=None, + model_settings=model_settings, + trace_label=trace_label, + ) diff --git a/activitysim/abm/models/non_mandatory_destination.py b/activitysim/abm/models/non_mandatory_destination.py index 496c734cdd..b0c6ea3fb1 100644 --- a/activitysim/abm/models/non_mandatory_destination.py +++ b/activitysim/abm/models/non_mandatory_destination.py @@ -6,8 +6,8 @@ import pandas as pd -from activitysim.abm.models.util import annotate, tour_destination -from activitysim.core import estimation, los, tracing, workflow +from activitysim.abm.models.util import tour_destination +from activitysim.core import estimation, los, tracing, workflow, expressions from activitysim.core.configuration.logit import TourLocationComponentSettings from activitysim.core.util import assign_in_place @@ -129,9 +129,6 @@ def non_mandatory_tour_destination( state.add_table("tours", tours) - if model_settings.annotate_tours: - annotate.annotate_tours(state, model_settings, trace_label) - if want_sample_table: assert len(save_sample_df.index.get_level_values(0).unique()) == len(choices_df) # save_sample_df.set_index(model_settings['ALT_DEST_COL_NAME'], append=True, inplace=True) @@ -146,3 +143,11 @@ def non_mandatory_tour_destination( columns=None, warn_if_empty=True, ) + + expressions.annotate_tables( + state, + locals_dict={}, + skims=None, + model_settings=model_settings, + trace_label=trace_label, + ) diff --git a/activitysim/abm/models/non_mandatory_scheduling.py b/activitysim/abm/models/non_mandatory_scheduling.py index 4e444107bc..bbb09f7f57 100644 --- a/activitysim/abm/models/non_mandatory_scheduling.py +++ b/activitysim/abm/models/non_mandatory_scheduling.py @@ -8,8 +8,9 @@ from activitysim.abm.models.util.tour_scheduling import run_tour_scheduling from activitysim.core import timetable as tt -from activitysim.core import tracing, workflow +from activitysim.core import tracing, workflow, expressions from activitysim.core.util import assign_in_place +from activitysim.abm.models.util.vectorize_tour_scheduling import TourSchedulingSettings logger = logging.getLogger(__name__) DUMP = False @@ -29,6 +30,13 @@ def non_mandatory_tour_scheduling( model_name = "non_mandatory_tour_scheduling" trace_label = model_name trace_hh_id = state.settings.trace_hh_id + + model_settings = TourSchedulingSettings.read_settings_file( + state.filesystem, + f"{model_name}.yaml", + mandatory=False, + ) + non_mandatory_tours = tours[tours.tour_category == "non_mandatory"] # - if no mandatory_tours @@ -40,11 +48,12 @@ def non_mandatory_tour_scheduling( choices = run_tour_scheduling( state, - model_name, + model_settings, non_mandatory_tours, persons_merged, tdd_alts, tour_segment_col, + trace_label, ) assign_in_place( @@ -71,3 +80,11 @@ def non_mandatory_tour_scheduling( columns=None, warn_if_empty=True, ) + + expressions.annotate_tables( + state, + locals_dict={}, + skims=None, + model_settings=model_settings, + trace_label=trace_label, + ) diff --git a/activitysim/abm/models/non_mandatory_tour_frequency.py b/activitysim/abm/models/non_mandatory_tour_frequency.py index 69b0524d42..175a4b1bfa 100644 --- a/activitysim/abm/models/non_mandatory_tour_frequency.py +++ b/activitysim/abm/models/non_mandatory_tour_frequency.py @@ -10,7 +10,6 @@ import numpy as np import pandas as pd -from activitysim.abm.models.util import annotate from activitysim.abm.models.util.overlap import ( person_available_periods, person_max_window, @@ -28,7 +27,7 @@ tracing, workflow, ) -from activitysim.core.configuration.base import PreprocessorSettings, PydanticReadable +from activitysim.core.configuration.base import PydanticReadable, PreprocessorSettings from activitysim.core.configuration.logit import LogitComponentSettings from activitysim.core.interaction_simulate import interaction_simulate @@ -166,27 +165,21 @@ class NonMandatoryTourFrequencySettings(LogitComponentSettings, extra="forbid"): Settings for the `non_mandatory_tour_frequency` component. """ - preprocessor: PreprocessorSettings | None = None - """Setting for the preprocessor.""" - SEGMENT_COL: str = "ptype" # not used anymore TODO remove if needed SPEC_SEGMENTS: list[NonMandatoryTourSpecSegment] = [] # check the above - annotate_persons: PreprocessorSettings | None = None - """Preprocessor settings to annotate persons""" - - annotate_tours: PreprocessorSettings | None = None - """Preprocessor settings to annotate tours""" - explicit_chunk: float = 0 """ If > 0, use this chunk size instead of adaptive chunking. If less than 1, use this fraction of the total number of rows. """ + alts_preprocessor: PreprocessorSettings | None = None + """Settings for the alternatives preprocessor.""" + @workflow.step def non_mandatory_tour_frequency( @@ -233,27 +226,36 @@ def non_mandatory_tour_frequency( choosers = persons_merged choosers = choosers[choosers.cdap_activity.isin(["M", "N"])] - # - preprocessor - preprocessor_settings = model_settings.preprocessor - if preprocessor_settings: - locals_dict = { - "person_max_window": lambda x: person_max_window(state, x), - "person_available_periods": lambda persons, start_bin, end_bin, continuous: person_available_periods( - state, persons, start_bin, end_bin, continuous - ), - } - - expressions.assign_columns( - state, - df=choosers, - model_settings=preprocessor_settings, - locals_dict=locals_dict, - trace_label=trace_label, - ) - logger.info("Running non_mandatory_tour_frequency with %d persons", len(choosers)) - + # preprocessing choosers constants = config.get_model_constants(model_settings) + locals_dict = { + "person_max_window": lambda x: person_max_window(state, x), + "person_available_periods": lambda persons, start_bin, end_bin, continuous: person_available_periods( + state, persons, start_bin, end_bin, continuous + ), + } + locals_dict.update(constants) + + expressions.annotate_preprocessors( + state, + df=choosers, + locals_dict=locals_dict, + skims=None, + model_settings=model_settings, + trace_label=trace_label, + ) + + # preprocessing alternatives + expressions.annotate_preprocessors( + state, + df=alternatives, + locals_dict=constants, + skims=None, + model_settings=model_settings, + trace_label=trace_label, + preprocessor_setting_name="alts_preprocessor", + ) model_spec = state.filesystem.read_model_spec(file_name=model_settings.SPEC) spec_segments = model_settings.SPEC_SEGMENTS @@ -479,16 +481,6 @@ def non_mandatory_tour_frequency( # need to re-compute tour frequency statistics to account for school escort tours recompute_tour_count_statistics(state) - if model_settings.annotate_tours: - annotate.annotate_tours(state, model_settings, trace_label) - - expressions.assign_columns( - state, - df=persons, - model_settings=model_settings.annotate_persons, - trace_label=trace_label, - ) - state.add_table("persons", persons) tracing.print_summary( @@ -513,3 +505,11 @@ def non_mandatory_tour_frequency( label="non_mandatory_tour_frequency.annotated_persons", warn_if_empty=True, ) + + expressions.annotate_tables( + state, + locals_dict=constants, + skims=None, + model_settings=model_settings, + trace_label=trace_label, + ) diff --git a/activitysim/abm/models/parking_location_choice.py b/activitysim/abm/models/parking_location_choice.py index 674b950aaa..995229b69f 100644 --- a/activitysim/abm/models/parking_location_choice.py +++ b/activitysim/abm/models/parking_location_choice.py @@ -3,6 +3,7 @@ from __future__ import annotations import logging +import warnings from pathlib import Path import numpy as np @@ -325,7 +326,14 @@ class ParkingLocationSettings(LogitComponentSettings, extra="forbid"): """The school escort model does not use this setting, see `SPECIFICATION`.""" PREPROCESSOR: PreprocessorSettings | None = None - """Setting for the preprocessor.""" + """Setting for the preprocessor. + Runs before the choosers are filtered by the CHOOSER_FILTER_COLUMN_NAME. + Deprecated name -- use `preprocessor` instead. + """ + + alts_preprocessor: PreprocessorSettings | None = None + """Setting for the alternatives (aka landuse zones) preprocessor. + Runs before the alternatives are filtered by the ALTERNATIVE_FILTER_COLUMN_NAME.""" ALT_DEST_COL_NAME: str = "parking_zone" """Parking destination column name.""" @@ -362,6 +370,19 @@ class ParkingLocationSettings(LogitComponentSettings, extra="forbid"): If less than 1, use this fraction of the total number of rows. """ + def __init__(self, **data): + # Handle deprecated ALTS_PREPROCESSOR + if "PREPROCESSOR" in data: + warnings.warn( + "The 'PREPROCESSOR' setting is deprecated. Please use 'preprocessor' (lowercase) instead.", + DeprecationWarning, + stacklevel=2, + ) + # If both are provided, prefer the lowercase version + if "preprocessor" not in data: + data["preprocessor"] = data["PREPROCESSOR"] + super().__init__(**data) + @workflow.step def parking_location( @@ -388,8 +409,6 @@ def parking_location( trace_hh_id = state.settings.trace_hh_id alt_destination_col_name = model_settings.ALT_DEST_COL_NAME - preprocessor_settings = model_settings.PREPROCESSOR - trips_df = trips trips_merged_df = trips_merged land_use_df = land_use @@ -416,14 +435,28 @@ def parking_location( if constants is not None: locals_dict.update(constants) - if preprocessor_settings: - expressions.assign_columns( - state, - df=trips_merged_df, - model_settings=preprocessor_settings, - locals_dict=locals_dict, - trace_label=trace_label, - ) + # putting preprocessor and alts preprocessor here so that they are run before + # the filter columns are applied so the user can use the preprocessor to add filter + # preprocessing choosers + expressions.annotate_preprocessors( + state, + df=trips_merged_df, + locals_dict=locals_dict, + skims=None, + model_settings=model_settings, + trace_label=trace_label, + ) + + # preprocessing alternatives + expressions.annotate_preprocessors( + state, + df=land_use_df, + locals_dict=locals_dict, + skims=None, + model_settings=model_settings, + trace_label=trace_label, + preprocessor_setting_name="alts_preprocessor", + ) parking_locations, save_sample_df = run_parking_destination( state, @@ -467,3 +500,11 @@ def parking_location( if state.is_table(sample_table_name): raise RuntimeError("sample table %s already exists" % sample_table_name) state.extend_table(sample_table_name, save_sample_df) + + expressions.annotate_tables( + state, + locals_dict=locals_dict, + skims=None, + model_settings=model_settings, + trace_label=trace_label, + ) diff --git a/activitysim/abm/models/school_escorting.py b/activitysim/abm/models/school_escorting.py index a0cf6a3312..e4955dc039 100644 --- a/activitysim/abm/models/school_escorting.py +++ b/activitysim/abm/models/school_escorting.py @@ -319,9 +319,6 @@ class SchoolEscortSettings(BaseLogitComponentSettings, extra="forbid"): Settings for the `telecommute_frequency` component. """ - preprocessor: PreprocessorSettings | None = None - """Setting for the preprocessor.""" - ALTS: Any NUM_ESCORTEES: int = 3 @@ -353,6 +350,8 @@ class SchoolEscortSettings(BaseLogitComponentSettings, extra="forbid"): preprocessor_outbound: PreprocessorSettings | None = None preprocessor_inbound: PreprocessorSettings | None = None preprocessor_outbound_cond: PreprocessorSettings | None = None + alts_preprocessor: PreprocessorSettings | None = None + """Preprocessor settings for the school escorting model alternatives.""" no_escorting_alterative: int = 1 """The alternative number for no escorting. Used to set the choice for households with no escortees.""" @@ -428,6 +427,16 @@ def school_escorting( constants = config.get_model_constants(model_settings) locals_dict = {} locals_dict.update(constants) + # alternatives preprocessor + expressions.annotate_preprocessors( + state, + df=choosers, + locals_dict=constants, + skims=None, + model_settings=model_settings, + trace_label=trace_label, + preprocessor_setting_name="alts_preprocessor", + ) school_escorting_stages = ["outbound", "inbound", "outbound_cond"] escort_bundles = [] @@ -476,15 +485,16 @@ def school_escorting( logger.info("Running %s with %d households", stage_trace_label, len(choosers)) - preprocessor_settings = getattr(model_settings, "preprocessor_" + stage, None) - if preprocessor_settings: - expressions.assign_columns( - state, - df=choosers, - model_settings=preprocessor_settings, - locals_dict=locals_dict, - trace_label=stage_trace_label, - ) + preprocessor_setting_name = "preprocessor_" + stage + expressions.annotate_preprocessors( + state, + df=choosers, + locals_dict=locals_dict, + skims=None, + model_settings=model_settings, + trace_label=trace_label, + preprocessor_setting_name=preprocessor_setting_name, + ) if estimator: estimator.write_model_settings(model_settings, model_settings_file_name) @@ -655,3 +665,11 @@ def school_escorting( ) timetable.replace_table(state) + + expressions.annotate_tables( + state, + locals_dict=constants, + skims=None, + model_settings=model_settings, + trace_label=trace_label, + ) diff --git a/activitysim/abm/models/stop_frequency.py b/activitysim/abm/models/stop_frequency.py index 2f0253f219..ec1d4961d1 100644 --- a/activitysim/abm/models/stop_frequency.py +++ b/activitysim/abm/models/stop_frequency.py @@ -134,21 +134,15 @@ def stop_frequency( simulate.set_skim_wrapper_targets(tours_merged, skims) # this should be pre-slice as some expressions may count tours by type - annotations = expressions.compute_columns( + expressions.annotate_preprocessors( state, df=tours_merged, - model_settings=preprocessor_settings, locals_dict=locals_dict, + skims=None, # skims are already set on tours_merged above + model_settings=model_settings, trace_label=trace_label, ) - assign_in_place( - tours_merged, - annotations, - state.settings.downcast_int, - state.settings.downcast_float, - ) - tracing.print_summary( "stop_frequency segments", tours_merged.primary_purpose, value_counts=True ) @@ -304,10 +298,6 @@ def stop_frequency( trips, label="stop_frequency.trips", slicer="person_id", columns=None ) - state.tracing.trace_df( - annotations, label="stop_frequency.annotations", columns=None - ) - state.tracing.trace_df( tours_merged, label="stop_frequency.tours_merged", @@ -317,3 +307,11 @@ def stop_frequency( if state.is_table("school_escort_trips"): school_escort_tours_trips.merge_school_escort_trips_into_pipeline(state) + + expressions.annotate_tables( + state, + locals_dict=constants, + skims=None, + model_settings=model_settings, + trace_label=trace_label, + ) diff --git a/activitysim/abm/models/telecommute_frequency.py b/activitysim/abm/models/telecommute_frequency.py index f98791a2a2..195335258a 100755 --- a/activitysim/abm/models/telecommute_frequency.py +++ b/activitysim/abm/models/telecommute_frequency.py @@ -61,20 +61,15 @@ def telecommute_frequency( constants = config.get_model_constants(model_settings) - # - preprocessor - preprocessor_settings = model_settings.preprocessor - if preprocessor_settings: - locals_d = {} - if constants is not None: - locals_d.update(constants) - - expressions.assign_columns( - state, - df=choosers, - model_settings=preprocessor_settings, - locals_dict=locals_d, - trace_label=trace_label, - ) + # choosers preprocessor + expressions.annotate_preprocessors( + state, + df=choosers, + locals_dict=constants, + skims=None, + model_settings=model_settings, + trace_label=trace_label, + ) model_spec = state.filesystem.read_model_spec(file_name=model_settings.SPEC) coefficients_df = state.filesystem.read_model_coefficients(model_settings) @@ -127,3 +122,11 @@ def telecommute_frequency( if state.settings.trace_hh_id: state.tracing.trace_df(persons, label=trace_label, warn_if_empty=True) + + expressions.annotate_tables( + state, + locals_dict=constants, + skims=None, + model_settings=model_settings, + trace_label=trace_label, + ) diff --git a/activitysim/abm/models/tour_mode_choice.py b/activitysim/abm/models/tour_mode_choice.py index befcb0d7a3..d2052f099a 100644 --- a/activitysim/abm/models/tour_mode_choice.py +++ b/activitysim/abm/models/tour_mode_choice.py @@ -7,9 +7,18 @@ import numpy as np import pandas as pd -from activitysim.abm.models.util import annotate, school_escort_tours_trips, trip +from activitysim.abm.models.util import school_escort_tours_trips, trip from activitysim.abm.models.util.mode import run_tour_mode_choice_simulate -from activitysim.core import config, estimation, logit, los, simulate, tracing, workflow +from activitysim.core import ( + config, + estimation, + logit, + los, + simulate, + tracing, + workflow, + expressions, +) from activitysim.core.configuration.logit import TourModeComponentSettings from activitysim.core.util import assign_in_place, reindex @@ -450,10 +459,6 @@ def tour_mode_choice_simulate( state.add_table("tours", all_tours) - # - annotate tours table - if model_settings.annotate_tours: - annotate.annotate_tours(state, model_settings, trace_label) - if state.settings.trace_hh_id: state.tracing.trace_df( primary_tours, @@ -462,3 +467,11 @@ def tour_mode_choice_simulate( index_label="tour_id", warn_if_empty=True, ) + + expressions.annotate_tables( + state, + locals_dict=constants, + skims=skims, + model_settings=model_settings, + trace_label=trace_label, + ) diff --git a/activitysim/abm/models/tour_od_choice.py b/activitysim/abm/models/tour_od_choice.py index b518b36f97..ad9f527d75 100644 --- a/activitysim/abm/models/tour_od_choice.py +++ b/activitysim/abm/models/tour_od_choice.py @@ -7,7 +7,7 @@ import pandas as pd from activitysim.abm.models.util import tour_od -from activitysim.core import estimation, los, workflow +from activitysim.core import estimation, los, workflow, expressions logger = logging.getLogger(__name__) @@ -147,3 +147,11 @@ def tour_od_choice( columns=None, warn_if_empty=True, ) + + expressions.annotate_tables( + state, + locals_dict={}, + skims=None, + model_settings=model_settings, + trace_label=trace_label, + ) diff --git a/activitysim/abm/models/tour_scheduling_probabilistic.py b/activitysim/abm/models/tour_scheduling_probabilistic.py index 324db45667..8dcb1bbff2 100644 --- a/activitysim/abm/models/tour_scheduling_probabilistic.py +++ b/activitysim/abm/models/tour_scheduling_probabilistic.py @@ -7,7 +7,7 @@ import pandas as pd from activitysim.abm.models.util import probabilistic_scheduling as ps -from activitysim.core import chunk, estimation, workflow +from activitysim.core import chunk, estimation, workflow, expressions from activitysim.core.configuration.base import PydanticReadable logger = logging.getLogger(__name__) @@ -175,3 +175,11 @@ def tour_scheduling_probabilistic( assert not tours_df["duration"].isnull().any() state.add_table("tours", tours_df) + + expressions.annotate_tables( + state, + locals_dict={}, + skims=None, + model_settings=model_settings, + trace_label=trace_label, + ) diff --git a/activitysim/abm/models/transit_pass_ownership.py b/activitysim/abm/models/transit_pass_ownership.py index 9a34b7b0b4..19ba883212 100644 --- a/activitysim/abm/models/transit_pass_ownership.py +++ b/activitysim/abm/models/transit_pass_ownership.py @@ -17,6 +17,7 @@ from activitysim.core.configuration.base import PreprocessorSettings from activitysim.core.configuration.logit import LogitComponentSettings + logger = logging.getLogger("activitysim") @@ -25,8 +26,8 @@ class TransitPassOwnershipSettings(LogitComponentSettings, extra="forbid"): Settings for the `transit_pass_ownership` component. """ - preprocessor: PreprocessorSettings | None = None - """Setting for the preprocessor.""" + # no additional fields are required for this component + pass @workflow.step @@ -56,19 +57,14 @@ def transit_pass_ownership( constants = config.get_model_constants(model_settings) # - preprocessor - preprocessor_settings = model_settings.preprocessor - if preprocessor_settings: - locals_d = {} - if constants is not None: - locals_d.update(constants) - - expressions.assign_columns( - state, - df=choosers, - model_settings=preprocessor_settings, - locals_dict=locals_d, - trace_label=trace_label, - ) + expressions.annotate_preprocessors( + state, + df=choosers, + locals_dict=constants, + skims=None, + model_settings=model_settings, + trace_label=trace_label, + ) model_spec = state.filesystem.read_model_spec(file_name=model_settings.SPEC) coefficients_df = state.filesystem.read_model_coefficients(model_settings) @@ -114,3 +110,11 @@ def transit_pass_ownership( if state.settings.trace_hh_id: state.tracing.trace_df(persons, label=trace_label, warn_if_empty=True) + + expressions.annotate_tables( + state, + locals_dict=constants, + skims=None, + model_settings=model_settings, + trace_label=trace_label, + ) diff --git a/activitysim/abm/models/transit_pass_subsidy.py b/activitysim/abm/models/transit_pass_subsidy.py index 0f71279cdf..328b499929 100644 --- a/activitysim/abm/models/transit_pass_subsidy.py +++ b/activitysim/abm/models/transit_pass_subsidy.py @@ -17,6 +17,7 @@ from activitysim.core.configuration.base import PreprocessorSettings, PydanticReadable from activitysim.core.configuration.logit import LogitComponentSettings + logger = logging.getLogger("activitysim") @@ -25,9 +26,6 @@ class TransitPassSubsidySettings(LogitComponentSettings, extra="forbid"): Settings for the `transit_pass_subsidy` component. """ - preprocessor: PreprocessorSettings | None = None - """Setting for the preprocessor.""" - CHOOSER_FILTER_COLUMN_NAME: str | None = None """Column name which selects choosers. If None, all persons are choosers.""" @@ -56,20 +54,15 @@ def transit_pass_subsidy( constants = config.get_model_constants(model_settings) - # - preprocessor - preprocessor_settings = model_settings.preprocessor - if preprocessor_settings: - locals_d = {} - if constants is not None: - locals_d.update(constants) - - expressions.assign_columns( - state, - df=choosers, - model_settings=preprocessor_settings, - locals_dict=locals_d, - trace_label=trace_label, - ) + # - preprocessor, running before choosers are filtered so column can be created + expressions.annotate_preprocessors( + state, + df=choosers, + locals_dict=constants, + skims=None, + model_settings=model_settings, + trace_label=trace_label, + ) filter_col = model_settings.CHOOSER_FILTER_COLUMN_NAME if filter_col is not None: @@ -122,3 +115,11 @@ def transit_pass_subsidy( if state.settings.trace_hh_id: state.tracing.trace_df(persons, label=trace_label, warn_if_empty=True) + + expressions.annotate_tables( + state, + locals_dict=constants, + skims=None, + model_settings=model_settings, + trace_label=trace_label, + ) diff --git a/activitysim/abm/models/trip_departure_choice.py b/activitysim/abm/models/trip_departure_choice.py index 236a755dec..0e4dd05d9d 100644 --- a/activitysim/abm/models/trip_departure_choice.py +++ b/activitysim/abm/models/trip_departure_choice.py @@ -191,9 +191,21 @@ def choose_tour_leg_pattern( trace_label="trace_label", *, chunk_sizer: chunk.ChunkSizer, - compute_settings: ComputeSettings | None = None, + model_settings: TripDepartureChoiceSettings, ): alternatives = generate_alternatives(trip_segment, STOP_TIME_DURATION).sort_index() + + # preprocessing alternatives + expressions.annotate_preprocessors( + state, + df=alternatives, + locals_dict={}, + skims=None, + model_settings=model_settings, + trace_label=trace_label, + preprocessor_setting_name="alts_preprocessor", + ) + have_trace_targets = state.tracing.has_trace_targets(trip_segment) if have_trace_targets: @@ -245,7 +257,7 @@ def choose_tour_leg_pattern( trace_label, trace_rows, estimator=None, - compute_settings=compute_settings, + compute_settings=model_settings.compute_settings, ) interaction_utilities = pd.concat( @@ -402,7 +414,7 @@ def apply_stage_two_model( trips, chunk_size, trace_label: str, - compute_settings: ComputeSettings | None = None, + model_settings: TripDepartureChoiceSettings, ): if not trips.index.is_monotonic_increasing: trips = trips.sort_index() @@ -473,7 +485,7 @@ def apply_stage_two_model( spec, trace_label=segment_trace_label, chunk_sizer=chunk_sizer, - compute_settings=compute_settings, + model_settings=model_settings, ) choices = pd.merge( @@ -509,6 +521,9 @@ class TripDepartureChoiceSettings(PydanticCompute, extra="forbid"): PREPROCESSOR: PreprocessorSettings | None = None """Setting for the preprocessor.""" + alts_preprocessor: PreprocessorSettings | None = None + """Setting for the alternatives preprocessor.""" + SPECIFICATION: str = "trip_departure_choice.csv" """Filename for the trip departure choice (.csv) file.""" @@ -580,7 +595,7 @@ def trip_departure_choice( trips_merged_df, state.settings.chunk_size, trace_label, - compute_settings=model_settings.compute_settings, + model_settings=model_settings, ) trips_df = trips @@ -590,3 +605,11 @@ def trip_departure_choice( assert trips_df[trips_df["depart"].isnull()].empty state.add_table("trips", trips_df) + + expressions.annotate_tables( + state, + locals_dict={}, + skims=None, + model_settings=model_settings, + trace_label=trace_label, + ) diff --git a/activitysim/abm/models/trip_destination.py b/activitysim/abm/models/trip_destination.py index 2b6b5a2ff2..0e7cfb98dd 100644 --- a/activitysim/abm/models/trip_destination.py +++ b/activitysim/abm/models/trip_destination.py @@ -59,9 +59,6 @@ class TripDestinationSettings(LocationComponentSettings, extra="forbid"): PRIMARY_ORIGIN: str = "origin" PRIMARY_DEST: str = "tour_leg_dest" # must be created in preprocessor REDUNDANT_TOURS_MERGED_CHOOSER_COLUMNS: list[str] | None = None - preprocessor: PreprocessorSettings | None = None - alts_preprocessor_sample: PreprocessorSettings | None = None - alts_preprocessor_simulate: PreprocessorSettings | None = None CLEANUP: bool fail_some_trips_for_testing: bool = False """This setting is used by testing code to force failed trip_destination.""" @@ -203,14 +200,16 @@ def _destination_sample( log_alt_losers = state.settings.log_alt_losers - if model_settings.alts_preprocessor_sample: - expressions.assign_columns( - state, - df=alternatives, - model_settings=model_settings.alts_preprocessor_sample, - locals_dict=locals_dict, - trace_label=tracing.extend_trace_label(trace_label, "alts"), - ) + # preprocessing alternatives + expressions.annotate_preprocessors( + state, + df=alternatives, + locals_dict=locals_dict, + skims=skims, + model_settings=model_settings, + trace_label=trace_label, + preprocessor_setting_name="alts_preprocessor_sample", + ) choices = interaction_sample( state, @@ -951,14 +950,16 @@ def trip_destination_simulate( ) locals_dict.update(skims) - if model_settings.alts_preprocessor_simulate: - expressions.assign_columns( - state, - df=destination_sample, - model_settings=model_settings.alts_preprocessor_simulate, - locals_dict=locals_dict, - trace_label=tracing.extend_trace_label(trace_label, "alts"), - ) + # preprocessing alternatives + expressions.annotate_preprocessors( + state, + df=destination_sample, + locals_dict=locals_dict, + skims=skims, + model_settings=model_settings, + trace_label=trace_label, + preprocessor_setting_name="alts_preprocessor_simulate", + ) log_alt_losers = state.settings.log_alt_losers destinations = interaction_sample_simulate( @@ -1383,15 +1384,15 @@ def run_trip_destination( } locals_dict.update(model_settings.CONSTANTS) - # - annotate nth_trips - if preprocessor_settings: - expressions.assign_columns( - state, - df=nth_trips, - model_settings=preprocessor_settings, - locals_dict=locals_dict, - trace_label=nth_trace_label, - ) + # preprocessing choosers + expressions.annotate_preprocessors( + state, + df=nth_trips, + locals_dict=locals_dict, + skims=None, + model_settings=model_settings, + trace_label=trace_label, + ) if isinstance( nth_trips["trip_period"].dtype, pd.api.types.CategoricalDtype @@ -1677,3 +1678,11 @@ def trip_destination( if state.is_table(sample_table_name): raise RuntimeError("sample table %s already exists" % sample_table_name) state.extend_table(sample_table_name, save_sample_df) + + expressions.annotate_tables( + state, + locals_dict={}, + skims=None, + model_settings=model_settings, + trace_label=trace_label, + ) diff --git a/activitysim/abm/models/trip_mode_choice.py b/activitysim/abm/models/trip_mode_choice.py index b3dd0e7f4f..a942b7af84 100644 --- a/activitysim/abm/models/trip_mode_choice.py +++ b/activitysim/abm/models/trip_mode_choice.py @@ -8,7 +8,7 @@ import numpy as np import pandas as pd -from activitysim.abm.models.util import annotate, school_escort_tours_trips +from activitysim.abm.models.util import school_escort_tours_trips from activitysim.abm.models.util.mode import mode_choice_simulate from activitysim.core import ( chunk, @@ -32,9 +32,6 @@ class TripModeChoiceSettings(TemplatedLogitComponentSettings, extra="forbid"): Settings for the `trip_mode_choice` component. """ - preprocessor: PreprocessorSettings | None = None - """Setting for the preprocessor.""" - MODE_CHOICE_LOGSUM_COLUMN_NAME: str = "mode_choice_logsum" """Column name of the mode choice logsum""" @@ -49,8 +46,11 @@ class TripModeChoiceSettings(TemplatedLogitComponentSettings, extra="forbid"): use_TVPB_constants: bool = True FORCE_ESCORTEE_CHAUFFEUR_MODE_MATCH: bool = True - - annotate_trips: PreprocessorSettings | None = None + """ + If True, overwrite the trip mode of escortee trips to match the mode selected + by the chauffeur. This is useful for school escort tours where the escortee trip + mode (e.g., "transit") should match the chauffeur trip mode. + """ LEGACY_COEFFICIENTS: str | None = None @@ -370,15 +370,6 @@ def trip_mode_choice( state.add_table("trips", trips_df) - if model_settings.annotate_trips: - # need to update locals_dict to access skims that are the same .shape as trips table - locals_dict = {} - locals_dict.update(constants) - simulate.set_skim_wrapper_targets(trips_merged, skims) - locals_dict.update(skims) - locals_dict["timeframe"] = "trip" - annotate.annotate_trips(state, model_settings, trace_label, locals_dict) - if state.settings.trace_hh_id: state.tracing.trace_df( trips_df, @@ -387,3 +378,17 @@ def trip_mode_choice( index_label="trip_id", warn_if_empty=True, ) + + # need to update locals_dict to access skims that are the same .shape as trips table + locals_dict = {} + locals_dict.update(constants) + simulate.set_skim_wrapper_targets(trips_merged, skims) + locals_dict.update(skims) + locals_dict["timeframe"] = "trip" + expressions.annotate_tables( + state, + locals_dict=locals_dict, + skims=skims, + model_settings=model_settings, + trace_label=trace_label, + ) diff --git a/activitysim/abm/models/trip_purpose.py b/activitysim/abm/models/trip_purpose.py index 5f208f514d..695882938d 100644 --- a/activitysim/abm/models/trip_purpose.py +++ b/activitysim/abm/models/trip_purpose.py @@ -253,16 +253,14 @@ def run_trip_purpose( trips_df = trips_df[~last_trip] logger.info("assign purpose to %s intermediate trips", trips_df.shape[0]) - preprocessor_settings = model_settings.preprocessor - if preprocessor_settings: - locals_dict = config.get_model_constants(model_settings) - expressions.assign_columns( - state, - df=trips_df, - model_settings=preprocessor_settings, - locals_dict=locals_dict, - trace_label=trace_label, - ) + expressions.annotate_preprocessors( + state, + df=trips_df, + locals_dict=config.get_model_constants(model_settings), + skims=None, + model_settings=model_settings, + trace_label=trace_label, + ) use_depart_time = model_settings.use_depart_time @@ -303,6 +301,10 @@ def trip_purpose(state: workflow.State, trips: pd.DataFrame) -> None: """ trace_label = "trip_purpose" + model_settings = TripPurposeSettings.read_settings_file( + state.filesystem, "trip_purpose.yaml" + ) + trips_df = trips if state.is_table("school_escort_trips"): @@ -326,6 +328,7 @@ def trip_purpose(state: workflow.State, trips: pd.DataFrame) -> None: state, trips_df, estimator, + model_settings, trace_label=trace_label, ) @@ -359,3 +362,11 @@ def trip_purpose(state: workflow.State, trips: pd.DataFrame) -> None: index_label="trip_id", warn_if_empty=True, ) + + expressions.annotate_tables( + state, + locals_dict={}, + skims=None, + model_settings=model_settings, + trace_label=trace_label, + ) diff --git a/activitysim/abm/models/trip_purpose_and_destination.py b/activitysim/abm/models/trip_purpose_and_destination.py index 66443e0892..02b8a6cbfd 100644 --- a/activitysim/abm/models/trip_purpose_and_destination.py +++ b/activitysim/abm/models/trip_purpose_and_destination.py @@ -12,7 +12,7 @@ cleanup_failed_trips, flag_failed_trip_leg_mates, ) -from activitysim.core import estimation, tracing, workflow +from activitysim.core import estimation, tracing, workflow, expressions from activitysim.core.configuration.base import PydanticReadable from activitysim.core.util import assign_in_place @@ -261,3 +261,11 @@ def trip_purpose_and_destination( index_label="trip_id", warn_if_empty=True, ) + + expressions.annotate_tables( + state, + locals_dict={}, + skims=None, + model_settings=model_settings, + trace_label=trace_label, + ) diff --git a/activitysim/abm/models/trip_scheduling.py b/activitysim/abm/models/trip_scheduling.py index fa69567ec9..0e45d463dc 100644 --- a/activitysim/abm/models/trip_scheduling.py +++ b/activitysim/abm/models/trip_scheduling.py @@ -224,7 +224,6 @@ def schedule_trips_in_leg( failfix = model_settings.FAILFIX depart_alt_base = model_settings.DEPART_ALT_BASE scheduling_mode = model_settings.scheduling_mode - preprocessor_settings = model_settings.preprocessor probs_join_cols = model_settings.probs_join_cols if probs_join_cols is None: @@ -286,14 +285,14 @@ def schedule_trips_in_leg( nth_trace_label = tracing.extend_trace_label(trace_label, "num_%s" % i) # - annotate trips - if preprocessor_settings: - expressions.assign_columns( - state, - df=trips, - model_settings=preprocessor_settings, - locals_dict=locals_dict, - trace_label=nth_trace_label, - ) + expressions.annotate_preprocessors( + state, + df=trips, + locals_dict=locals_dict, + skims=None, + model_settings=model_settings, + trace_label=trace_label, + ) if ( outbound @@ -682,3 +681,11 @@ def trip_scheduling( assert not trips_df.depart.isnull().any() state.add_table("trips", trips_df) + + expressions.annotate_tables( + state, + locals_dict={}, + skims=None, + model_settings=model_settings, + trace_label=trace_label, + ) diff --git a/activitysim/abm/models/trip_scheduling_choice.py b/activitysim/abm/models/trip_scheduling_choice.py index 5f58e68ee4..510d4ece8d 100644 --- a/activitysim/abm/models/trip_scheduling_choice.py +++ b/activitysim/abm/models/trip_scheduling_choice.py @@ -280,6 +280,17 @@ def run_trip_scheduling_choice( choosers = choosers.sort_index() schedules = generate_schedule_alternatives(choosers).sort_index() + # preprocessing alternatives + expressions.annotate_preprocessors( + state, + df=schedules, + locals_dict=locals_dict, + skims=None, + model_settings=model_settings, + trace_label=trace_label, + preprocessor_setting_name="alts_preprocessor", + ) + # Assuming we did the max_alt_size calculation correctly, # we should get the same sizes here. assert choosers[NUM_ALTERNATIVES].sum() == schedules.shape[0] @@ -340,6 +351,8 @@ class TripSchedulingChoiceSettings(PydanticReadable, extra="forbid"): PREPROCESSOR: PreprocessorSettings | None = None """Setting for the preprocessor.""" + alts_preprocessor: PreprocessorSettings | None = None + """Setting for the alternatives preprocessor.""" SPECIFICATION: str """file name of specification file""" @@ -396,34 +409,32 @@ def trip_scheduling_choice( .reindex(tours.index) ) - preprocessor_settings = model_settings.PREPROCESSOR - # hack: preprocessor adds origin column in place if it does not exist already od_skim_stack_wrapper = skim_dict.wrap("origin", "destination") do_skim_stack_wrapper = skim_dict.wrap("destination", "origin") obib_skim_stack_wrapper = skim_dict.wrap(LAST_OB_STOP, FIRST_IB_STOP) - skims = [od_skim_stack_wrapper, do_skim_stack_wrapper, obib_skim_stack_wrapper] - - locals_dict = { + skims = { "od_skims": od_skim_stack_wrapper, "do_skims": do_skim_stack_wrapper, "obib_skims": obib_skim_stack_wrapper, + } + locals_dict = { "orig_col_name": "origin", "dest_col_name": "destination", "timeframe": "timeless_directional", } + locals_dict.update(skims) - if preprocessor_settings: - simulate.set_skim_wrapper_targets(tours_df, skims) - - expressions.assign_columns( - state, - df=tours_df, - model_settings=preprocessor_settings, - locals_dict=locals_dict, - trace_label=trace_label, - ) + # preprocess choosers + expressions.annotate_preprocessors( + state, + df=tours_df, + locals_dict=locals_dict, + skims=skims, + model_settings=model_settings, + trace_label=trace_label, + ) tours_df = run_trip_scheduling_choice( state, @@ -436,3 +447,11 @@ def trip_scheduling_choice( ) state.add_table("tours", tours_df) + + expressions.annotate_tables( + state, + locals_dict=locals_dict, + skims=skims, + model_settings=model_settings, + trace_label=trace_label, + ) diff --git a/activitysim/abm/models/util/annotate.py b/activitysim/abm/models/util/annotate.py deleted file mode 100644 index 5c0f628219..0000000000 --- a/activitysim/abm/models/util/annotate.py +++ /dev/null @@ -1,136 +0,0 @@ -# ActivitySim -# See full license in LICENSE.txt. -from __future__ import annotations - -import logging - -import pandas as pd - -from activitysim.core import expressions, tracing, workflow -from activitysim.core.configuration import PydanticBase - -""" -Code for annotating tables -""" - -logger = logging.getLogger(__name__) - - -def annotate_households( - state: workflow.State, - model_settings: dict | PydanticBase, - trace_label: str, - locals_dict: dict | None = None, -): - """ - Add columns to the households table in the pipeline according to spec. - - Parameters - ---------- - model_settings : dict - trace_label : str - """ - if isinstance(model_settings, PydanticBase): - model_settings = model_settings.dict() - if locals_dict is None: - locals_dict = {} - households = state.get_dataframe("households") - expressions.assign_columns( - state, - df=households, - model_settings=model_settings.get("annotate_households"), - locals_dict=locals_dict, - trace_label=tracing.extend_trace_label(trace_label, "annotate_households"), - ) - state.add_table("households", households) - - -def annotate_persons( - state: workflow.State, - model_settings: dict | PydanticBase, - trace_label: str, - locals_dict: dict | None = None, -): - """ - Add columns to the persons table in the pipeline according to spec. - - Parameters - ---------- - model_settings : dict - trace_label : str - """ - if isinstance(model_settings, PydanticBase): - model_settings = model_settings.dict() - if locals_dict is None: - locals_dict = {} - persons = state.get_dataframe("persons") - expressions.assign_columns( - state, - df=persons, - model_settings=model_settings.get("annotate_persons"), - locals_dict=locals_dict, - trace_label=tracing.extend_trace_label(trace_label, "annotate_persons"), - ) - state.add_table("persons", persons) - - -def annotate_tours( - state: workflow.State, - model_settings: dict | PydanticBase, - trace_label: str, - locals_dict: dict | None = None, -): - """ - Add columns to the tours table in the pipeline according to spec. - - Parameters - ---------- - state : workflow.State - model_settings : dict or PydanticBase - trace_label : str - locals_dict : dict, optional - """ - if isinstance(model_settings, PydanticBase): - model_settings = model_settings.dict() - if locals_dict is None: - locals_dict = {} - tours = state.get_dataframe("tours") - expressions.assign_columns( - state, - df=tours, - model_settings=model_settings.get("annotate_tours"), - locals_dict=locals_dict, - trace_label=tracing.extend_trace_label(trace_label, "annotate_tours"), - ) - state.add_table("tours", tours) - - -def annotate_trips( - state: workflow.State, - model_settings: dict | PydanticBase, - trace_label: str, - locals_dict=None, -): - """ - Add columns to the trips table in the pipeline according to spec. - - Parameters - ---------- - state : workflow.State - model_settings : dict or PydanticBase - trace_label : str - locals_dict : dict, optional - """ - if isinstance(model_settings, PydanticBase): - model_settings = model_settings.dict() - if locals_dict is None: - locals_dict = {} - trips = state.get_dataframe("trips") - expressions.assign_columns( - state, - df=trips, - model_settings=model_settings.get("annotate_trips"), - locals_dict=locals_dict, - trace_label=tracing.extend_trace_label(trace_label, "annotate_trips"), - ) - state.add_table("trips", trips) diff --git a/activitysim/abm/models/util/tour_destination.py b/activitysim/abm/models/util/tour_destination.py index 0891b8d216..7234355fde 100644 --- a/activitysim/abm/models/util/tour_destination.py +++ b/activitysim/abm/models/util/tour_destination.py @@ -9,7 +9,7 @@ from activitysim.abm.models.util import logsums as logsum from activitysim.abm.tables.size_terms import tour_destination_size_terms -from activitysim.core import config, los, simulate, tracing, workflow +from activitysim.core import config, los, simulate, tracing, workflow, expressions from activitysim.core.configuration.logit import TourLocationComponentSettings from activitysim.core.interaction_sample import interaction_sample from activitysim.core.interaction_sample_simulate import interaction_sample_simulate @@ -109,6 +109,27 @@ def _destination_sample( log_alt_losers = state.settings.log_alt_losers + # preprocess choosers table + expressions.annotate_preprocessors( + state, + df=choosers, + locals_dict=locals_d, + skims=skims, + model_settings=model_settings, + trace_label=trace_label, + ) + + # preprocess alternatives table + expressions.annotate_preprocessors( + state, + df=destination_size_terms, + locals_dict=locals_d, + skims=None, + model_settings=model_settings, + trace_label=trace_label, + preprocessor_setting_name="alts_preprocessor_sample", + ) + choices = interaction_sample( state, choosers, @@ -761,6 +782,27 @@ def run_destination_simulate( if constants is not None: locals_d.update(constants) + # preprocess choosers table + expressions.annotate_preprocessors( + state, + df=choosers, + locals_dict=locals_d, + skims=skims, + model_settings=model_settings, + trace_label=trace_label, + ) + + # preprocess alternatives table + expressions.annotate_preprocessors( + state, + df=destination_sample, + locals_dict=locals_d, + skims=skims, + model_settings=model_settings, + trace_label=trace_label, + preprocessor_setting_name="alts_preprocessor_simulate", + ) + state.tracing.dump_df(DUMP, choosers, trace_label, "choosers") log_alt_losers = state.settings.log_alt_losers diff --git a/activitysim/abm/models/util/tour_frequency.py b/activitysim/abm/models/util/tour_frequency.py index 93c624b135..3c64f4bcc2 100644 --- a/activitysim/abm/models/util/tour_frequency.py +++ b/activitysim/abm/models/util/tour_frequency.py @@ -643,8 +643,7 @@ class JointTourFreqCompSettings(LogitComponentSettings, extra="forbid"): """ ALTS_TABLE_STRUCTURE: JointTourFreqCompAlts = JointTourFreqCompAlts() - preprocessor: PreprocessorSettings | None = None - ALTS_PREPROCESSOR: PreprocessorSettings | None = None + ALTS_PREPROCESSOR: PreprocessorSettings | list[PreprocessorSettings] | None = None def create_joint_tours( diff --git a/activitysim/abm/models/util/tour_od.py b/activitysim/abm/models/util/tour_od.py index 7c615142f4..96ec9aba86 100644 --- a/activitysim/abm/models/util/tour_od.py +++ b/activitysim/abm/models/util/tour_od.py @@ -47,7 +47,6 @@ class TourODSettings(TourLocationComponentSettings): ORIGIN_ATTR_COLS_TO_USE: list[str] = [] ORIG_COL_NAME: str ORIG_FILTER: str | None = None - preprocessor: PreprocessorSettings | None = None def get_od_id_col(origin_col, destination_col): @@ -203,6 +202,17 @@ def _od_sample( elif skims.orig_key not in od_alts_df: logger.error("Alts df is missing origin skim key column.") + # preprocessing alternatives + expressions.annotate_preprocessors( + state, + df=od_alts_df, + locals_dict=locals_d, + skims=skims, + model_settings=model_settings, + trace_label=trace_label, + preprocessor_setting_name="alts_preprocessor_sample", + ) + choices = interaction_sample( state, choosers, @@ -658,54 +668,6 @@ def od_presample( return maz_choices -# class SizeTermCalculatorOD: # class SizeTermCalculator -# """ -# convenience object to provide size_terms for a selector (e.g. -# non_mandatory) for various segments (e.g. tour_type or purpose) -# returns size terms for specified segment in df or series form. -# """ -# -# def __init__(self, size_term_selector): -# # do this once so they can request size_terms for various segments (tour_type or purpose) -# land_use = state.checkpoint.load_dataframe("land_use") -# self.land_use = land_use -# size_terms = state.get_injectable("size_terms") -# self.destination_size_terms = tour_destination_size_terms( -# self.land_use, size_terms, size_term_selector -# ) -# -# assert not self.destination_size_terms.isna().any(axis=None) -# -# def omnibus_size_terms_df(self): -# return self.destination_size_terms -# -# def dest_size_terms_df(self, segment_name, trace_label): -# # return size terms as df with one column named 'size_term' -# # convenient if creating or merging with alts -# -# size_terms = self.destination_size_terms[[segment_name]].copy() -# size_terms.columns = ["size_term"] -# -# # FIXME - no point in considering impossible alternatives (where dest size term is zero) -# logger.debug( -# f"SizeTermCalculator dropping {(~(size_terms.size_term > 0)).sum()} " -# f"of {len(size_terms)} rows where size_term is zero for {segment_name}" -# ) -# size_terms = size_terms[size_terms.size_term > 0] -# -# if len(size_terms) == 0: -# logger.warning( -# f"SizeTermCalculator: no zones with non-zero size terms for {segment_name} in {trace_label}" -# ) -# -# return size_terms -# -# def dest_size_terms_series(self, segment_name): -# # return size terms as as series -# # convenient (and no copy overhead) if reindexing and assigning into alts column -# return self.destination_size_terms[segment_name] - - def run_od_sample( state, spec_segment_name, @@ -1044,6 +1006,17 @@ def run_od_simulate( if constants is not None: locals_d.update(constants) + # preprocessing alternatives + expressions.annotate_preprocessors( + state, + df=od_sample, + locals_dict=locals_d, + skims=skims, + model_settings=model_settings, + trace_label=trace_label, + preprocessor_setting_name="alts_preprocessor_simulate", + ) + state.tracing.dump_df(DUMP, choosers, trace_label, "choosers") choices = interaction_sample_simulate( state, @@ -1085,7 +1058,6 @@ def run_tour_od( trace_label, ): size_term_calculator = SizeTermCalculator(state, model_settings.SIZE_TERM_SELECTOR) - preprocessor_settings = model_settings.preprocessor origin_col_name = model_settings.ORIG_COL_NAME chooser_segment_column = model_settings.CHOOSER_SEGMENT_COLUMN_NAME @@ -1108,15 +1080,15 @@ def run_tour_od( right_index=True, ) - # - annotate choosers - if preprocessor_settings: - expressions.assign_columns( - state, - df=choosers, - model_settings=preprocessor_settings, - trace_label=trace_label, - ) - + # preprocessing choosers + expressions.annotate_preprocessors( + state, + df=choosers, + locals_dict={}, + skims=None, + model_settings=model_settings, + trace_label=trace_label, + ) # size_term segment is segment_name segment_destination_size_terms = size_term_calculator.dest_size_terms_df( segment_name, trace_label diff --git a/activitysim/abm/models/util/tour_scheduling.py b/activitysim/abm/models/util/tour_scheduling.py index db003786f4..0a7c6675d1 100644 --- a/activitysim/abm/models/util/tour_scheduling.py +++ b/activitysim/abm/models/util/tour_scheduling.py @@ -16,20 +16,13 @@ def run_tour_scheduling( state: workflow.State, - model_name: str, + model_settings: TourSchedulingSettings, chooser_tours: pd.DataFrame, persons_merged: pd.DataFrame, tdd_alts: pd.DataFrame, tour_segment_col: str, + trace_label: str, ): - trace_label = model_name - model_settings_file_name = f"{model_name}.yaml" - - model_settings = TourSchedulingSettings.read_settings_file( - state.filesystem, - model_settings_file_name, - mandatory=False, - ) if model_settings.LOGSUM_SETTINGS: logsum_settings = TourModeComponentSettings.read_settings_file( @@ -52,18 +45,19 @@ def run_tour_scheduling( timetable = state.get_injectable("timetable") # - run preprocessor to annotate choosers - preprocessor_settings = model_settings.preprocessor - if preprocessor_settings: - locals_d = {"tt": timetable.attach_state(state)} - locals_d.update(config.get_model_constants(model_settings)) - - expressions.assign_columns( - state, - df=chooser_tours, - model_settings=preprocessor_settings, - locals_dict=locals_d, - trace_label=trace_label, - ) + locals_d = {"tt": timetable.attach_state(state)} + locals_d.update(config.get_model_constants(model_settings)) + + # preprocess choosers + expressions.annotate_preprocessors( + state, + df=chooser_tours, + locals_dict=locals_d, + skims=None, + model_settings=model_settings, + trace_label=trace_label, + ) + # alts preprocessed in vectorize_tour_scheduling estimators = {} if model_settings.TOUR_SPEC_SEGMENTS: @@ -72,7 +66,7 @@ def run_tour_scheduling( specs = {} compute_settings = {} for spec_segment_name, spec_settings in spec_segment_settings.items(): - bundle_name = f"{model_name}_{spec_segment_name}" + bundle_name = f"{trace_label}_{spec_segment_name}" # estimator for this tour_segment estimator = estimation.manager.begin_estimation( @@ -91,7 +85,7 @@ def run_tour_scheduling( if estimator: estimators[spec_segment_name] = estimator # add to local list - estimator.write_model_settings(model_settings, model_settings_file_name) + estimator.write_model_settings(model_settings, f"{trace_label}.yaml") estimator.write_spec(spec_settings) estimator.write_coefficients(coefficients_df, spec_settings) @@ -121,7 +115,7 @@ def run_tour_scheduling( assert not model_settings.TOUR_SPEC_SEGMENTS assert tour_segment_col is None - estimator = estimation.manager.begin_estimation(state, model_name) + estimator = estimation.manager.begin_estimation(state, trace_label) spec_file_name = model_settings.SPEC model_spec = state.filesystem.read_model_spec(file_name=spec_file_name) @@ -132,7 +126,7 @@ def run_tour_scheduling( if estimator: estimators[None] = estimator # add to local list - estimator.write_model_settings(model_settings, model_settings_file_name) + estimator.write_model_settings(model_settings, f"{trace_label}.yaml") estimator.write_spec(model_settings) estimator.write_coefficients(coefficients_df, model_settings) @@ -146,7 +140,7 @@ def run_tour_scheduling( if estimators: timetable.begin_transaction(list(estimators.values())) - logger.info(f"Running {model_name} with %d tours", len(chooser_tours)) + logger.info(f"Running {trace_label} with %d tours", len(chooser_tours)) choices = vts.vectorize_tour_scheduling( state, chooser_tours, diff --git a/activitysim/abm/models/util/vectorize_tour_scheduling.py b/activitysim/abm/models/util/vectorize_tour_scheduling.py index dfab8171de..d4593c21fa 100644 --- a/activitysim/abm/models/util/vectorize_tour_scheduling.py +++ b/activitysim/abm/models/util/vectorize_tour_scheduling.py @@ -43,8 +43,6 @@ class TourSchedulingSettings(LogitComponentSettings, extra="forbid"): give the segements. """ SIMULATE_CHOOSER_COLUMNS: list[str] | None = None - preprocessor: PreprocessorSettings | None = None - """Setting for the preprocessor.""" SPEC_SEGMENTS: dict[str, LogitComponentSettings] = {} diff --git a/activitysim/abm/models/vehicle_allocation.py b/activitysim/abm/models/vehicle_allocation.py index a84dfaabf9..a3f04037c0 100644 --- a/activitysim/abm/models/vehicle_allocation.py +++ b/activitysim/abm/models/vehicle_allocation.py @@ -90,10 +90,7 @@ class VehicleAllocationSettings(LogitComponentSettings, extra="forbid"): Settings for the `vehicle_allocation` component. """ - preprocessor: PreprocessorSettings | None = None - """Setting for the preprocessor.""" - - OCCUPANCY_LEVELS: list = [1] # TODO Check this + OCCUPANCY_LEVELS: list = [1] # TODO check this """Occupancy level It will create columns in the tour table selecting a vehicle for each of the @@ -101,9 +98,6 @@ class VehicleAllocationSettings(LogitComponentSettings, extra="forbid"): if not supplied, will default to only one occupancy level of 1 """ - annotate_tours: PreprocessorSettings | None = None - """Preprocessor settings to annotate tours""" - @workflow.step def vehicle_allocation( @@ -212,15 +206,14 @@ def vehicle_allocation( locals_dict.update(skims) # ------ preprocessor - preprocessor_settings = model_settings.preprocessor - if preprocessor_settings: - expressions.assign_columns( - state, - df=choosers, - model_settings=preprocessor_settings, - locals_dict=locals_dict, - trace_label=trace_label, - ) + expressions.annotate_preprocessors( + state, + df=choosers, + locals_dict=locals_dict, + skims=skims, + model_settings=model_settings, + trace_label=trace_label, + ) logger.info("Running %s with %d tours", trace_label, len(choosers)) @@ -286,9 +279,13 @@ def vehicle_allocation( "vehicle_allocation", tours[tours_veh_occup_cols], value_counts=True ) - annotate_settings = model_settings.annotate_tours - if annotate_settings: - annotate_vehicle_allocation(state, model_settings, trace_label) - if state.settings.trace_hh_id: state.tracing.trace_df(tours, label="vehicle_allocation", warn_if_empty=True) + + expressions.annotate_tables( + state, + locals_dict=locals_dict, + skims=skims, + model_settings=model_settings, + trace_label=trace_label, + ) diff --git a/activitysim/abm/models/vehicle_type_choice.py b/activitysim/abm/models/vehicle_type_choice.py index 93caae0381..5347b5bb79 100644 --- a/activitysim/abm/models/vehicle_type_choice.py +++ b/activitysim/abm/models/vehicle_type_choice.py @@ -407,15 +407,16 @@ def iterate_vehicle_type_choice( ) # alts preprocessor - alts_preprocessor_settings = model_settings.alts_preprocessor - if alts_preprocessor_settings: - expressions.assign_columns( - state, - df=alts_wide, - model_settings=alts_preprocessor_settings, - locals_dict=locals_dict, - trace_label=trace_label, - ) + # preprocessing alternatives + expressions.annotate_preprocessors( + state, + df=alts_wide, + locals_dict=locals_dict, + skims=None, + model_settings=model_settings, + trace_label=trace_label, + preprocessor_setting_name="alts_preprocessor", + ) # - preparing choosers for iterating vehicles_merged["already_owned_veh"] = "" @@ -434,15 +435,15 @@ def iterate_vehicle_type_choice( # running preprocessor on entire vehicle table to enumerate vehicle types # already owned by the household choosers = vehicles_merged - preprocessor_settings = model_settings.preprocessor - if preprocessor_settings: - expressions.assign_columns( - state, - df=choosers, - model_settings=preprocessor_settings, - locals_dict=locals_dict, - trace_label=trace_label, - ) + # preprocessing choosers + expressions.annotate_preprocessors( + state, + df=choosers, + locals_dict=locals_dict, + skims=None, + model_settings=model_settings, + trace_label=trace_label, + ) # only make choices for vehicles that have not been selected yet choosers = choosers[choosers["vehicle_num"] == veh_num] @@ -564,7 +565,6 @@ class VehicleTypeChoiceSettings(LogitComponentSettings, extra="forbid"): VEHICLE_TYPE_DATA_FILE: str | None = None PROBS_SPEC: str | None = None combinatorial_alts: dict | None = None - preprocessor: PreprocessorSettings | None = None alts_preprocessor: PreprocessorSettings | None = None SIMULATION_TYPE: Literal[ "simple_simulate", "interaction_simulate" @@ -576,10 +576,6 @@ class VehicleTypeChoiceSettings(LogitComponentSettings, extra="forbid"): COLS_TO_INCLUDE_IN_ALTS_TABLE: list[str] = [] """Columns to include in the alternatives table for use in utility calculations.""" - annotate_households: PreprocessorSettings | None = None - annotate_persons: PreprocessorSettings | None = None - annotate_vehicles: PreprocessorSettings | None = None - REQUIRE_DATA_FOR_ALL_ALTS: bool = False WRITE_OUT_ALTS_FILE: bool = False @@ -708,14 +704,6 @@ def vehicle_type_choice( vehicles = pd.concat([vehicles, choices], axis=1) state.add_table("vehicles", vehicles) - # - annotate tables - if model_settings.annotate_households: - annotate_vehicle_type_choice_households(state, model_settings, trace_label) - if model_settings.annotate_persons: - annotate_vehicle_type_choice_persons(state, model_settings, trace_label) - if model_settings.annotate_vehicles: - annotate_vehicle_type_choice_vehicles(state, model_settings, trace_label) - tracing.print_summary( "vehicle_type_choice", vehicles.vehicle_type, value_counts=True ) @@ -724,3 +712,11 @@ def vehicle_type_choice( state.tracing.trace_df( vehicles, label="vehicle_type_choice", warn_if_empty=True ) + + expressions.annotate_tables( + state, + locals_dict=locals_dict, + skims=None, + model_settings=model_settings, + trace_label=trace_label, + ) diff --git a/activitysim/abm/models/work_from_home.py b/activitysim/abm/models/work_from_home.py index 8b96dafa13..6b8f8d7815 100755 --- a/activitysim/abm/models/work_from_home.py +++ b/activitysim/abm/models/work_from_home.py @@ -18,6 +18,7 @@ from activitysim.core.configuration.base import PreprocessorSettings, PydanticReadable from activitysim.core.configuration.logit import LogitComponentSettings + logger = logging.getLogger("activitysim") @@ -26,9 +27,6 @@ class WorkFromHomeSettings(LogitComponentSettings, extra="forbid"): Settings for the `work_from_home` component. """ - preprocessor: PreprocessorSettings | None = None - """Setting for the preprocessor.""" - WORK_FROM_HOME_ALT: int """Value that specify if the person is working from home""" # TODO @@ -88,20 +86,14 @@ def work_from_home( constants = config.get_model_constants(model_settings) work_from_home_alt = model_settings.WORK_FROM_HOME_ALT - # - preprocessor - preprocessor_settings = model_settings.preprocessor - if preprocessor_settings: - locals_d = {} - if constants is not None: - locals_d.update(constants) - - expressions.assign_columns( - state, - df=choosers, - model_settings=preprocessor_settings, - locals_dict=locals_d, - trace_label=trace_label, - ) + expressions.annotate_preprocessors( + state, + df=choosers, + locals_dict=constants, + skims=None, + model_settings=model_settings, + trace_label=trace_label, + ) model_spec = state.filesystem.read_model_spec(file_name=model_settings.SPEC) coefficients_df = state.filesystem.read_model_coefficients(model_settings) @@ -221,3 +213,11 @@ def work_from_home( if state.settings.trace_hh_id: state.tracing.trace_df(persons, label=trace_label, warn_if_empty=True) + + expressions.annotate_tables( + state, + locals_dict=constants, + skims=None, + model_settings=model_settings, + trace_label=trace_label, + ) diff --git a/activitysim/core/configuration/logit.py b/activitysim/core/configuration/logit.py index d03bcab778..7688ac1bb3 100644 --- a/activitysim/core/configuration/logit.py +++ b/activitysim/core/configuration/logit.py @@ -77,6 +77,31 @@ class BaseLogitComponentSettings(PydanticCompute): CONSTANTS: dict[str, Any] = {} """Named constants usable in the utility expressions.""" + preprocessor: PreprocessorSettings | list[PreprocessorSettings] | None = None + """Chooser preprocessor settings. + + This is a set of expressions to be evaluated on the choosers + before the logit model is run. It is used to prepare the choosers + for the logit model by adding columns that are used in the + utility expressions. + """ + + annotate_households: PreprocessorSettings | None = None + """Annotate households output tables with additional columns. + + These settings are used to add additional columns to the output tables + after the logit model is run. They are typically used to add + additional attributes that are derived from the model results. + """ + annotate_persons: PreprocessorSettings | None = None + """Annotate persons output tables with additional columns.""" + annotate_tours: PreprocessorSettings | None = None + """Annotate tours output tables with additional columns.""" + annotate_trips: PreprocessorSettings | None = None + """Annotate trips output tables with additional columns.""" + annotate_vehicles: PreprocessorSettings | None = None + """Annotate vehicles output tables with additional columns.""" + # sharrow_skip is deprecated in factor of compute_settings.sharrow_skip @model_validator(mode="before") @classmethod @@ -196,6 +221,11 @@ class LocationComponentSettings(BaseLogitComponentSettings): If less than 1, use this fraction of the total number of rows. """ + alts_preprocessor_sample: PreprocessorSettings | None = None + """Alternatives preprocessor settings to use when sampling alternatives.""" + alts_preprocessor_simulate: PreprocessorSettings | None = None + """Alternatives preprocessor settings to use when simulating choices.""" + class TourLocationComponentSettings(LocationComponentSettings, extra="forbid"): # Logsum-related settings @@ -207,7 +237,6 @@ class TourLocationComponentSettings(LocationComponentSettings, extra="forbid"): SEGMENTS: list[str] | None = None SIZE_TERM_SELECTOR: str | None = None - annotate_tours: PreprocessorSettings | None = None CHOOSER_FILTER_COLUMN_NAME: str | None = None DEST_CHOICE_COLUMN_NAME: str | None = None @@ -221,8 +250,6 @@ class TourLocationComponentSettings(LocationComponentSettings, extra="forbid"): SEGMENT_IDS: dict[str, int] | dict[str, str] | dict[str, bool] | None = None SHADOW_PRICE_TABLE: str | None = None MODELED_SIZE_TABLE: str | None = None - annotate_persons: PreprocessorSettings | None = None - annotate_households: PreprocessorSettings | None = None SIMULATE_CHOOSER_COLUMNS: list[str] | None = None ALT_DEST_COL_NAME: str LOGSUM_TOUR_PURPOSE: str | dict[str, str] | None = None @@ -249,8 +276,6 @@ class TourModeComponentSettings(TemplatedLogitComponentSettings, extra="forbid") COMPUTE_TRIP_MODE_CHOICE_LOGSUMS: bool = False tvpb_mode_path_types: dict[str, Any] | None = None FORCE_ESCORTEE_CHAUFFEUR_MODE_MATCH: bool = True - annotate_tours: PreprocessorSettings | None = None - preprocessor: PreprocessorSettings | list[PreprocessorSettings] | None = None nontour_preprocessor: PreprocessorSettings | list[ PreprocessorSettings ] | None = None diff --git a/activitysim/core/expressions.py b/activitysim/core/expressions.py index 413636d3f4..e6ef0b8e69 100644 --- a/activitysim/core/expressions.py +++ b/activitysim/core/expressions.py @@ -6,7 +6,7 @@ import pandas as pd -from activitysim.core import assign, simulate, tracing, workflow +from activitysim.core import config, assign, simulate, tracing, workflow from activitysim.core.configuration.base import PreprocessorSettings, PydanticBase from activitysim.core.util import ( assign_in_place, @@ -184,26 +184,65 @@ def assign_columns( def annotate_preprocessors( state: workflow.State, df: pd.DataFrame, - locals_dict, - skims, + locals_dict: dict, + skims: dict | None, model_settings: PydanticBase | dict, trace_label: str, + preprocessor_setting_name: str = "preprocessor", ): - locals_d = {} - locals_d.update(locals_dict) - locals_d.update(skims) + """ + Look through the preprocessor settings and apply the calculations to the dataframe. + This is generally called before the main model calculations to prepare the data. + + Parameters + ---------- + state : workflow.State + The current state of the workflow. + df : pd.DataFrame + DataFrame to which the preprocessor settings will be applied. + locals_dict : dict + Dictionary of local variables to be used in the expressions. + skims : dict | None + Dictionary of skims to be used in the expressions. + model_settings : PydanticBase | dict + Model settings containing the preprocessor settings. + trace_label : str + Label for tracing the operations. + preprocessor_setting_name : str + Name of the preprocessor settings key in the model settings. + + Returns + ------- + None -- dataframe is modified in place + + """ + if isinstance(model_settings, PydanticBase): + preprocessor_settings = getattr(model_settings, preprocessor_setting_name, []) + elif isinstance(model_settings, dict): + preprocessor_settings = model_settings.get(preprocessor_setting_name, []) + else: + raise ValueError( + f"Expected model_settings to be PydanticBase or dict, got {type(model_settings)}" + ) + + if not preprocessor_settings or preprocessor_settings == []: + return - try: - preprocessor_settings = model_settings.preprocessor - except AttributeError: - preprocessor_settings = model_settings.get("preprocessor", []) - if preprocessor_settings is None: - preprocessor_settings = [] if not isinstance(preprocessor_settings, list): assert isinstance(preprocessor_settings, dict | PreprocessorSettings) preprocessor_settings = [preprocessor_settings] - simulate.set_skim_wrapper_targets(df, skims) + locals_d = {} + locals_d.update(locals_dict) + if skims: + try: + simulate.set_skim_wrapper_targets(df, skims) + locals_d.update(skims) + except AssertionError as e: + logger.warning( + "Failed to set skim wrapper targets: %s. Skims wrappers may not be used in expressions.", + e, + ) for preproc_settings in preprocessor_settings: results = compute_columns( @@ -211,7 +250,9 @@ def annotate_preprocessors( df=df, model_settings=preproc_settings, locals_dict=locals_d, - trace_label=trace_label, + trace_label=tracing.extend_trace_label( + trace_label, preprocessor_setting_name + ), ) assign_in_place( @@ -219,6 +260,85 @@ def annotate_preprocessors( ) +def annotate_tables( + state: workflow.State, + model_settings: PydanticBase | dict, + trace_label: str, + skims: dict | None = None, + locals_dict: dict | None = None, +): + """ + Look through the annotate settings and apply the calculations to the tables. + This is generally called after the main model calculations to add data to output tables. + + Parameters + ---------- + state : workflow.State + The current state of the workflow. + model_settings : PydanticBase | dict + Model settings containing the annotation settings for various tables. + trace_label : str + Label for tracing the operations. + skims : dict | None + Dictionary of skims to be used in the expressions, if applicable. + locals_dict : dict | None + Dictionary of local variables to be used in the expressions, if applicable. + + Returns + ------- + None -- tables are modified in place + """ + + # process tables in least to most aggregated order + tables = ["trips", "tours", "vehicles", "persons", "households"] + + for table_name in tables: + if isinstance(model_settings, PydanticBase): + annotate_settings = getattr(model_settings, f"annotate_{table_name}", None) + elif isinstance(model_settings, dict): + annotate_settings = model_settings.get(f"annotate_{table_name}", None) + else: + raise ValueError( + f"Expected model_settings to be PydanticBase or dict, got {type(model_settings)}" + ) + + if annotate_settings is None: + continue + assert isinstance( + annotate_settings, (dict, PreprocessorSettings) + ), f"Expected annotate_{table_name} to be dict or PreprocessorSettings, got {type(annotate_settings)}" + + df = state.get_dataframe(table_name) + + locals_d = {} + if skims: + try: + simulate.set_skim_wrapper_targets(df, skims) + locals_d.update(skims) + except AssertionError as e: + logger.warning( + "Failed to set skim wrapper targets: %s. Skims wrappers may not be used in expressions.", + e, + ) + if locals_dict: + locals_d.update(locals_dict) + + results = compute_columns( + state, + df=df, + model_settings=annotate_settings, + locals_dict=locals_d, + trace_label=tracing.extend_trace_label(trace_label, "annotate_persons"), + ) + + assign_in_place( + df, results, state.settings.downcast_int, state.settings.downcast_float + ) + + # write table with new columns back to state + state.add_table(table_name, df) + + def filter_chooser_columns(choosers, chooser_columns): missing_columns = [c for c in chooser_columns if c not in choosers] if missing_columns: diff --git a/activitysim/core/test/configs/preprocessor.csv b/activitysim/core/test/configs/preprocessor.csv new file mode 100644 index 0000000000..e2f5c29b97 --- /dev/null +++ b/activitysim/core/test/configs/preprocessor.csv @@ -0,0 +1,8 @@ +Description,Target,Expression +income from households table,_hh_income,"reindex(households.income, df.household_id)" +income test,is_high_income,_hh_income > 50000 +count persons test,num_persons,persons.groupby('household_id').size().reindex(df.household_id) +skim dict test,od_distance,"skim_dict.lookup(df.origin, df.destination, 'DIST')" +skim wrapper test,od_distance_wrapper,skims2d['DIST'] +sov time,od_sov_time,skims3d['SOV_TIME'] +testing constant from locals_dict,constant_test,test_constant / 2 \ No newline at end of file diff --git a/activitysim/core/test/test_preprocessing.py b/activitysim/core/test/test_preprocessing.py new file mode 100644 index 0000000000..06c7785603 --- /dev/null +++ b/activitysim/core/test/test_preprocessing.py @@ -0,0 +1,197 @@ +# ActivitySim +# See full license in LICENSE.txt. +from __future__ import annotations + +import logging +import logging.config +import os.path + +import numpy as np +import pandas as pd +import pytest + +from activitysim.core import workflow, expressions, los +from activitysim.core.configuration.base import PreprocessorSettings + + +def add_canonical_dirs(configs_dir_name): + state = workflow.State() + los_configs_dir = os.path.join(os.path.dirname(__file__), f"los/{configs_dir_name}") + configs_dir = os.path.join(os.path.dirname(__file__), "configs") + data_dir = os.path.join(os.path.dirname(__file__), f"los/data") + output_dir = os.path.join(os.path.dirname(__file__), "output") + state.initialize_filesystem( + working_dir=os.path.dirname(__file__), + configs_dir=(los_configs_dir, configs_dir), + output_dir=output_dir, + data_dir=(data_dir,), + ) + return state + + +@pytest.fixture +def state() -> workflow.State: + state = add_canonical_dirs("configs_1z").load_settings() + network_los = los.Network_LOS(state) + network_los.load_data() + state.set("skim_dict", network_los.get_default_skim_dict()) + return state + + +@pytest.fixture(scope="module") +def households(): + return pd.DataFrame( + { + "household_id": [1, 2, 3], + "home_zone_id": [1, 2, 3], + "income": [50000, 60000, 70000], + } + ).set_index("household_id") + + +@pytest.fixture(scope="module") +def persons(): + return pd.DataFrame( + { + "person_id": [1, 2, 3, 4, 5], + "household_id": [1, 1, 2, 2, 3], + "age": [25, 30, 22, 28, 35], + } + ).set_index("person_id") + + +@pytest.fixture(scope="module") +def tours(): + return pd.DataFrame( + { + "tour_id": [1, 2, 3], + "household_id": [1, 2, 3], + "person_id": [1, 2, 3], + "tour_type": ["work", "shopping", "othmaint"], + "origin": [1, 2, 3], + "destination": [2, 3, 1], + "period": ["AM", "PM", "AM"], + } + ).set_index("tour_id") + + +def check_outputs(tours): + """ + Check that the tours DataFrame has the expected new columns and values + according to the preprocessor / annotator expressions. + """ + new_cols = [ + "is_high_income", + "num_persons", + "od_distance", + "od_distance_wrapper", + "od_sov_time", + "constant_test", + ] + + # check all new columns are added + assert all( + col in tours.columns for col in new_cols + ), f"Missing columns: {set(new_cols) - set(tours.columns)}" + + # column with _ shouldn't be in the columns + assert ( + "_hh_income" not in tours.columns + ), f"Unexpected column found: _hh_income in {tours.columns}" + + # check the values in the new columns + exppected_output = pd.DataFrame( + { + "tour_id": [1, 2, 3], + "is_high_income": [False, True, True], + "num_persons": [2, 2, 1], + "od_distance": [0.24, 0.28, 0.57], + "od_distance_wrapper": [0.24, 0.28, 0.57], + "od_sov_time": [0.78, 0.89, 1.76], + "constant_test": [21, 21, 21], + } + ).set_index("tour_id") + pd.testing.assert_frame_equal(tours[new_cols], exppected_output, check_dtype=False) + + +def setup_skims(state: workflow.State): + """Creates a set of skim wrappers to test in expressions.""" + skim_dict = state.get("skim_dict") + skims3d = skim_dict.wrap_3d( + orig_key="origin", dest_key="destination", dim3_key="period" + ) + skims2d = skim_dict.wrap("origin", "destination") + return {"skims3d": skims3d, "skims2d": skims2d} + + +def test_preprocessor(state: workflow.State, households, persons, tours): + # adding dataframes to state so they can be accessed in preprocessor + state.add_table("households", households) + state.add_table("persons", persons) + original_tours = tours.copy() + state.add_table("tours", original_tours) + + # defining preprocessor + preprocessor_settings = PreprocessorSettings( + SPEC="preprocessor.csv", + DF="tours", + TABLES=["persons", "households"], + ) + model_settings = {"preprocessor": preprocessor_settings} + + # annotating preprocessors + expressions.annotate_preprocessors( + state, + df=tours, + locals_dict={"test_constant": 42}, + skims=setup_skims(state), + model_settings=model_settings, + trace_label="ci_test_preprocessor", + ) + + check_outputs(tours) + + state_tours = state.get_table("tours") + # check that the state table is not modified + pd.testing.assert_frame_equal(state_tours, original_tours) + + +def test_annotator(state, households, persons, tours): + # adding dataframes to state so they can be accessed in annotator + state.add_table("households", households) + state.add_table("persons", persons) + original_tours = tours.copy() + state.add_table("tours", original_tours) + + # defining annotator + annotator_settings = PreprocessorSettings( + SPEC="preprocessor.csv", + DF="tours", + TABLES=["persons", "households"], + ) + model_settings = {"annotate_tours": annotator_settings} + + # annotating preprocessors + expressions.annotate_tables( + state, + model_settings=model_settings, + trace_label="ci_test_annotator", + skims=setup_skims(state), + locals_dict={"test_constant": 42}, + ) + + # outputs now put directly into the state object + check_outputs(state.get_table("tours")) + + # test what happens if we try to annotate a table that does not exist + model_settings = {"annotate_trips": annotator_settings} + + with pytest.raises(ValueError) as excinfo: + # this should raise an error because "trips" table does not exist in state + expressions.annotate_tables( + state, + model_settings=model_settings, + trace_label="ci_test_annotator", + skims=None, + locals_dict={"test_constant": 42}, + )