Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
96 changes: 79 additions & 17 deletions activitysim/abm/models/location_choice.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
)
from activitysim.core.interaction_sample import interaction_sample
from activitysim.core.interaction_sample_simulate import interaction_sample_simulate
from activitysim.core.util import reindex

from .util import estimation
from .util import logsums as logsum
Expand Down Expand Up @@ -138,22 +139,17 @@ def _location_sample(
logger.info("Running %s with %d persons" % (trace_label, len(choosers.index)))

sample_size = model_settings["SAMPLE_SIZE"]
if config.setting("disable_destination_sampling", False) or (
estimator and estimator.want_unsampled_alternatives
):
# FIXME interaction_sample will return unsampled complete alternatives with probs and pick_count
logger.info(
"Estimation mode for %s using unsampled alternatives short_circuit_choices"
% (trace_label,)
)
sample_size = 0
if estimator:
sample_size = model_settings.get("ESTIMATION_SAMPLE_SIZE", 0)

locals_d = {
"skims": skims,
"segment_size": segment_name,
"orig_col_name": skims.orig_key, # added for sharrow flows
"dest_col_name": skims.dest_key, # added for sharrow flows
"timeframe": "timeless",
"reindex": reindex,
"land_use": inject.get_table("land_use").to_frame(),
}
constants = config.get_model_constants(model_settings)
locals_d.update(constants)
Expand Down Expand Up @@ -470,6 +466,38 @@ def run_location_sample(
trace_label=trace_label,
)

# FIXME temporary code to ensure sampled alternative is in choices for estimation
# Hack to get shorter run times when you don't care about creating EDB for location choice models
if estimator:
# grabbing survey values
survey_persons = estimation.manager.get_survey_table("persons")
if "school_location" in trace_label:
survey_choices = survey_persons["school_zone_id"].reset_index()
elif ("workplace_location" in trace_label) and ("external" not in trace_label):
survey_choices = survey_persons["workplace_zone_id"].reset_index()
else:
return choices
survey_choices.columns = ["person_id", "alt_dest"]
survey_choices = survey_choices[
survey_choices["person_id"].isin(choices.index)
& (survey_choices.alt_dest > 0)
]
# merging survey destination into table if not available
joined_data = survey_choices.merge(
choices, on=["person_id", "alt_dest"], how="left", indicator=True
)
missing_rows = joined_data[joined_data["_merge"] == "left_only"]
missing_rows["pick_count"] = 1
if len(missing_rows) > 0:
new_choices = missing_rows[
["person_id", "alt_dest", "prob", "pick_count"]
].set_index("person_id")
choices = choices.append(new_choices, ignore_index=False).sort_index()
# making probability the mean of all other sampled destinations by person
choices["prob"] = choices["prob"].fillna(
choices.groupby("person_id")["prob"].transform("mean")
)

return choices


Expand Down Expand Up @@ -601,6 +629,8 @@ def run_location_simulate(
"orig_col_name": skims.orig_key, # added for sharrow flows
"dest_col_name": skims.dest_key, # added for sharrow flows
"timeframe": "timeless",
"reindex": reindex,
"land_use": inject.get_table("land_use").to_frame(),
}
constants = config.get_model_constants(model_settings)
if constants is not None:
Expand Down Expand Up @@ -808,6 +838,24 @@ def run_location_choice(
)
tracing.trace_df(choices_df, estimation_trace_label)

if want_logsums & (not skip_choice):
# grabbing index, could be person_id or proto_person_id
index_name = choices_df.index.name
# merging mode choice logsum of chosen alternative to choices
choices_df = (
pd.merge(
choices_df.reset_index(),
location_sample_df.reset_index()[
[index_name, model_settings["ALT_DEST_COL_NAME"], ALT_LOGSUM]
],
how="left",
left_on=[index_name, "choice"],
right_on=[index_name, model_settings["ALT_DEST_COL_NAME"]],
)
.drop(columns=model_settings["ALT_DEST_COL_NAME"])
.set_index(index_name)
)

choices_list.append(choices_df)

if want_sample_table:
Expand All @@ -825,7 +873,7 @@ def run_location_choice(
else:
# this will only happen with small samples (e.g. singleton) with no (e.g.) school segs
logger.warning("%s no choices", trace_label)
choices_df = pd.DataFrame(columns=["choice", "logsum"])
choices_df = pd.DataFrame(columns=["choice", "logsum", ALT_LOGSUM])

if len(sample_list) > 0:
save_sample_df = pd.concat(sample_list)
Expand Down Expand Up @@ -869,7 +917,8 @@ def iterate_location_choice(
Returns
-------
adds choice column model_settings['DEST_CHOICE_COLUMN_NAME']
adds logsum column model_settings['DEST_CHOICE_LOGSUM_COLUMN_NAME']- if provided
adds destination choice logsum column model_settings['DEST_CHOICE_LOGSUM_COLUMN_NAME']- if provided
adds mode choice logsum to selected destination column model_settings['MODE_CHOICE_LOGSUM_COLUMN_NAME']- if provided
adds annotations to persons table
"""

Expand All @@ -879,7 +928,11 @@ def iterate_location_choice(
chooser_filter_column = model_settings["CHOOSER_FILTER_COLUMN_NAME"]

dest_choice_column_name = model_settings["DEST_CHOICE_COLUMN_NAME"]
logsum_column_name = model_settings.get("DEST_CHOICE_LOGSUM_COLUMN_NAME")
dc_logsum_column_name = model_settings.get("DEST_CHOICE_LOGSUM_COLUMN_NAME")
mc_logsum_column_name = model_settings.get("MODE_CHOICE_LOGSUM_COLUMN_NAME")
want_logsums = (dc_logsum_column_name is not None) | (
mc_logsum_column_name is not None
)

sample_table_name = model_settings.get("DEST_CHOICE_SAMPLE_TABLE_NAME")
want_sample_table = (
Expand Down Expand Up @@ -929,7 +982,7 @@ def iterate_location_choice(
persons_merged_df_,
network_los,
shadow_price_calculator=spc,
want_logsums=logsum_column_name is not None,
want_logsums=want_logsums,
want_sample_table=want_sample_table,
estimator=estimator,
model_settings=model_settings,
Expand Down Expand Up @@ -1005,10 +1058,15 @@ def iterate_location_choice(
)

# add the dest_choice_logsum column to persons dataframe
if logsum_column_name:
persons_df[logsum_column_name] = (
if dc_logsum_column_name:
persons_df[dc_logsum_column_name] = (
choices_df["logsum"].reindex(persons_df.index).astype("float")
)
# add the mode choice logsum column to persons dataframe
if mc_logsum_column_name:
persons_df[mc_logsum_column_name] = (
choices_df[ALT_LOGSUM].reindex(persons_df.index).astype("float")
)

if save_sample_df is not None:
# might be None for tiny samples even if sample_table_name was specified
Expand Down Expand Up @@ -1047,9 +1105,13 @@ def iterate_location_choice(
if trace_hh_id:
tracing.trace_df(households_df, label=trace_label, warn_if_empty=True)

if logsum_column_name:
if dc_logsum_column_name:
tracing.print_summary(
dc_logsum_column_name, choices_df["logsum"], value_counts=True
)
if mc_logsum_column_name:
tracing.print_summary(
logsum_column_name, choices_df["logsum"], value_counts=True
mc_logsum_column_name, choices_df[ALT_LOGSUM], value_counts=True
)

return persons_df
Expand Down
101 changes: 99 additions & 2 deletions activitysim/estimation/larch/location_choice.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
import os
from pathlib import Path
from typing import Collection
import pickle
from datetime import datetime

import numpy as np
import pandas as pd
Expand Down Expand Up @@ -44,6 +46,8 @@ def location_choice_model(
settings_file="{name}_model_settings.yaml",
landuse_file="{name}_landuse.csv",
return_data=False,
alt_values_to_feather=False,
chunking_size=None,
):
model_selector = name.replace("_location", "")
model_selector = model_selector.replace("_destination", "")
Expand All @@ -57,12 +61,42 @@ def _read_csv(filename, **kwargs):
filename = filename.format(name=name)
return pd.read_csv(os.path.join(edb_directory, filename), **kwargs)

def _read_feather(filename, **kwargs):
filename = filename.format(name=name)
return pd.read_feather(os.path.join(edb_directory, filename), **kwargs)

def _to_feather(df, filename, **kwargs):
filename = filename.format(name=name)
return df.to_feather(os.path.join(edb_directory, filename), **kwargs)

def _read_pickle(filename, **kwargs):
filename = filename.format(name=name)
return pd.read_pickle(os.path.join(edb_directory, filename))

def _to_pickle(df, filename, **kwargs):
filename = filename.format(name=name)
return df.to_pickle(os.path.join(edb_directory, filename))

def _file_exists(filename):
filename = filename.format(name=name)
return os.path.exists(os.path.join(edb_directory, filename))

coefficients = _read_csv(
coefficients_file,
index_col="coefficient_name",
)
spec = _read_csv(spec_file, comment="#")
alt_values = _read_csv(alt_values_file)

# read alternative values either as csv or feather file
alt_values_fea_file = alt_values_file.replace(".csv", ".fea")
if os.path.exists(
os.path.join(edb_directory, alt_values_fea_file.format(name=name))
):
alt_values = _read_feather(alt_values_fea_file)
else:
alt_values = _read_csv(alt_values_file)
if alt_values_to_feather:
_to_feather(df=alt_values, filename=alt_values_fea_file)
chooser_data = _read_csv(chooser_file)
landuse = _read_csv(landuse_file, index_col="zone_id")
master_size_spec = _read_csv(size_spec_file)
Expand Down Expand Up @@ -106,6 +140,9 @@ def _read_csv(filename, **kwargs):
.set_index("segment")
)
size_spec = size_spec.loc[:, size_spec.max() > 0]
assert (
len(size_spec) > 0
), f"Empty size_spec, is model_selector {SIZE_TERM_SELECTOR} in your size term file?"

size_coef = size_coefficients_from_spec(size_spec)

Expand Down Expand Up @@ -148,7 +185,48 @@ def _read_csv(filename, **kwargs):

chooser_index_name = chooser_data.columns[0]
x_co = chooser_data.set_index(chooser_index_name)
x_ca = cv_to_ca(alt_values.set_index([chooser_index_name, alt_values.columns[1]]))

def split(a, n):
k, m = divmod(len(a), n)
return (a[i * k + min(i, m) : (i + 1) * k + min(i + 1, m)] for i in range(n))

# process x_ca with cv_to_ca with or without chunking
x_ca_pickle_file = "{name}_x_ca.pkl"
if chunking_size == None:
x_ca = cv_to_ca(
alt_values.set_index([chooser_index_name, alt_values.columns[1]])
)
elif _file_exists(x_ca_pickle_file):
# if pickle file from previous x_ca processing exist, load it to save time
time_start = datetime.now()
x_ca = _read_pickle(x_ca_pickle_file)
print(
f"x_ca data loaded from {name}_x_ca.fea - time elapsed {(datetime.now() - time_start).total_seconds()}"
)
else:
time_start = datetime.now()
# calculate num_chunks based on chunking_size (or max number of rows per chunk)
num_chunks = int(len(alt_values) / chunking_size)
all_person_ids = list(alt_values["person_id"].unique())
split_ids = list(split(all_person_ids, num_chunks))
x_ca_list = []
i = 0
for chunk_ids in split_ids:
alt_values_i = alt_values[alt_values["person_id"].isin(chunk_ids)]
x_ca_i = cv_to_ca(
alt_values_i.set_index([chooser_index_name, alt_values_i.columns[1]])
)
x_ca_list.append(x_ca_i)
print(
f"\rx_ca_i compute done for chunk {i}/{num_chunks} - time elapsed {(datetime.now() - time_start).total_seconds()}"
)
i = i + 1
x_ca = pd.concat(x_ca_list, axis=0)
# save final x_ca result as pickle file to save time for future data loading
_to_pickle(df=x_ca, filename=x_ca_pickle_file)
print(
f"x_ca compute done - time elapsed {(datetime.now() - time_start).total_seconds()}"
)

if CHOOSER_SEGMENT_COLUMN_NAME is not None:
# label segments with names
Expand Down Expand Up @@ -214,6 +292,9 @@ def _read_csv(filename, **kwargs):
else:
av = 1

assert len(x_co) > 0, "Empty chooser dataframe"
assert len(x_ca_1) > 0, "Empty alternatives dataframe"

d = DataFrames(co=x_co, ca=x_ca_1, av=av)

m = Model(dataservice=d)
Expand Down Expand Up @@ -331,6 +412,14 @@ def workplace_location_model(**kwargs):
)


def external_workplace_location_model(**kwargs):
unused = kwargs.pop("name", None)
return location_choice_model(
name="external_workplace_location",
**kwargs,
)


def school_location_model(**kwargs):
unused = kwargs.pop("name", None)
return location_choice_model(
Expand Down Expand Up @@ -367,6 +456,14 @@ def non_mandatory_tour_destination_model(**kwargs):
)


def external_non_mandatory_destination_model(**kwargs):
unused = kwargs.pop("name", None)
return location_choice_model(
name="external_non_mandatory_destination",
**kwargs,
)


def trip_destination_model(**kwargs):
unused = kwargs.pop("name", None)
return location_choice_model(
Expand Down