Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 7 additions & 5 deletions .github/workflows/core_tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ jobs:
uses: actions/setup-python@v5
with:
python-version-file: ".python-version"

- name: Install activitysim
run: |
uv sync --locked --only-group github-action
Expand Down Expand Up @@ -128,6 +128,8 @@ jobs:
run: |
uv run pytest --pyargs activitysim.cli

- run: uv run pytest test/test_skim_name_conflicts.py
- run: uv run pytest test/random_seed/test_random_seed.py

builtin_regional_models:
needs: foundation
Expand Down Expand Up @@ -166,7 +168,7 @@ jobs:
uses: actions/setup-python@v5
with:
python-version-file: ".python-version"

- name: Install activitysim
run: |
uv sync --locked --only-group github-action
Expand Down Expand Up @@ -235,7 +237,7 @@ jobs:
uses: actions/setup-python@v5
with:
python-version-file: ".python-version"

- name: Install activitysim
run: |
uv sync --locked --only-group github-action
Expand Down Expand Up @@ -277,7 +279,7 @@ jobs:
uses: actions/setup-python@v5
with:
python-version-file: ".python-version"

- name: Install activitysim
run: |
uv sync --locked --only-group github-action
Expand Down Expand Up @@ -310,7 +312,7 @@ jobs:
uses: actions/setup-python@v5
with:
python-version-file: ".python-version"

- name: Install activitysim
run: |
uv sync --locked --only-group github-action
Expand Down
65 changes: 63 additions & 2 deletions activitysim/core/skim_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import glob
import logging
import os
import re
import time
from functools import partial
from pathlib import Path
Expand Down Expand Up @@ -499,8 +500,6 @@ def _apply_digital_encoding(dataset, digital_encodings):
As modified
"""
if digital_encodings:
import re

# apply once, before saving to zarr, will stick around in cache
for encoding in digital_encodings:
logger.info(f"applying zarr digital-encoding: {encoding}")
Expand Down Expand Up @@ -753,6 +752,68 @@ def load_skim_dataset_to_shared_memory(state, skim_tag="taz") -> xr.Dataset:
d = None # skims are not stored in shared memory, so we need to load them
do_not_save_zarr = False

potential_conflicts = network_los_preload.skims_info.get(skim_tag).skim_conflicts
if potential_conflicts:
# There are some conflicts in the skims, where both time-dependent
# and time-agnostic skim with the same name are present. We need
# to check we have sufficient ignore rules in place to correct this
# condition.

def _should_ignore(ignore, x):
if isinstance(ignore, str):
ignore = [ignore]
if ignore is not None:
for i in ignore:
if re.match(i, x):
return True
return False

ignore = state.settings.omx_ignore_patterns
problems = []
for time_agnostic, time_dependent in potential_conflicts.items():
# option 1, ignore all the time-dependent skims
# if this is fulfilled, we are ok and can proceed
if all((_should_ignore(ignore, i)) for i in time_dependent):
continue
# option 2, ignore the time-agnostic skim
# if this is fulfilled, we are ok and can proceed
if _should_ignore(ignore, time_agnostic):
continue
# otherwise, we have a problem. collect all the problems
# and raise an error at the end listing all of them
problems.append(time_agnostic)
if problems:
solution_1 = "__.+'\n - '^".join(problems)
solution_2 = "$'\n - '^".join(problems)
# we have a problem, raise an error
error_message = (
f"skims {problems} are present in both time-dependent and time-agnostic formats.\n"
"Please add ignore rules to the omx_ignore_patterns setting to resolve this issue.\n"
"To ignore the time dependent skims, add the following to your settings file:\n"
"\n"
"omx_ignore_patterns:\n"
f" - '^{solution_1}__.+'\n"
"\n"
"To ignore the time agnostic skims, add the following to your settings file:\n"
"\n"
"omx_ignore_patterns:\n"
f" - '^{solution_2}$'\n"
"\n"
"You can also do some variation or combination of the two, as long as you resolve\n"
"the conflict(s). In addition, note that minor edits to model spec files may be\n"
"needed to accommodate these changes in how skim data is represented (e.g. changing\n"
"`odt_skims` to `od_skims`, or similar modifications wherever the offending variable\n"
"names are used). Alternatively, you can modify the skim data in the source files to\n"
"remove the naming conflicts, which is typically done upstream of ActivitySim in\n"
"whatever tool you are using to create the skims in the first place.\n"
"\n"
"See [https://activitysim.github.io/?q=skims] for more information.\n"
)
# write the error message to the log
logger.error(error_message)
# raise an error to stop the run, put the entire error message there also
raise ValueError(error_message)

if d is None:
time_periods = _dedupe_time_periods(network_los_preload)
if zarr_file:
Expand Down
65 changes: 58 additions & 7 deletions activitysim/core/skim_dict_factory.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
import os
import warnings
from abc import ABC
from collections import defaultdict

import numpy as np
import openmatrix as omx
Expand Down Expand Up @@ -86,6 +87,7 @@ def __init__(self, state, skim_tag, network_los):
self.offset_map_name = None
self.offset_map = None
self.omx_keys = None
self.skim_conflicts = None
self.base_keys = None
self.block_offsets = None

Expand Down Expand Up @@ -117,7 +119,12 @@ def load_skim_info(self, state, skim_tag):
logger.debug(f"load_skim_info {skim_tag} reading {omx_file_path}")

with omx.open_file(omx_file_path, mode="r") as omx_file:
# fixme call to omx_file.shape() failing in windows p3.5

# Check the shape of the skims. All skim files loaded within this
# loop need to have the same shape. For the first file, the
# shape is set to the omx_shape attribute, so we know what shape
# to expect. For subsequent files, we check that the shape is the
# same as the first file. If not, we raise an error.
if self.omx_shape is None:
self.omx_shape = tuple(
int(i) for i in omx_file.shape()
Expand All @@ -127,13 +134,24 @@ def load_skim_info(self, state, skim_tag):
int(i) for i in omx_file.shape()
), f"Mismatch shape {self.omx_shape} != {omx_file.shape()}"

# Check that all the matrix names are unique across all the
# omx files. This check is only looking at the name as stored in
# the file, and is not processing any time period transformations.
# If duplicate names are found, a warning is issued. This is not
# a fatal error, but it is inefficient and may be symptom of a
# deeper problem.
for skim_name in omx_file.listMatrices():
if skim_name in self.omx_manifest:
warnings.warn(
f"duplicate skim '{skim_name}' found in {self.omx_manifest[skim_name]} and {omx_file.filename}"
)
self.omx_manifest[skim_name] = omx_file_path

# We load the offset map if it exists. This is expected to be
# a 1D array of integers that that gives ID values for each TAZ
# in the skims. ActivitySim expects there to be only one such
# mapping, although it can appear multiple times (e.g. once in
# each file).
for m in omx_file.listMappings():
if self.offset_map is None:
self.offset_map_name = m
Expand All @@ -146,21 +164,54 @@ def load_skim_info(self, state, skim_tag):
f"Multiple mappings in omx file: {self.offset_map_name} != {m}"
)

# - omx_keys dict maps skim key to omx_key
# DISTWALK: DISTWALK
# ('DRV_COM_WLK_BOARDS', 'AM'): DRV_COM_WLK_BOARDS__AM, ...
# Create the `omx_keys` mapping, which connects skim key to omx_key.
# The skim key is either a single string that names a skim that is not
# time-dependent, or a 2-tuple of strings which names a skim and a time
# period. The omx_key is the original name of the skim in the omx file.
# For non-time-dependent skims, the omx_key is the same as the skim key,
# e.g. DISTWALK: DISTWALK. For time-dependent skims, the omx_key is the
# skim key with the time period appended,
# e.g. ('DRV_COM_WLK_BOARDS', 'AM'): DRV_COM_WLK_BOARDS__AM.
self.omx_keys = dict()
for skim_name in self.omx_manifest.keys():
key1, sep, key2 = skim_name.partition("__")

# - ignore composite tags not in dim3_tags_to_load
if dim3_tags_to_load and sep and key2 not in dim3_tags_to_load:
# If a skim is found that has a time period that is not one of
# the known named time periods, a warning is issued, and that
# skim is ignored. This is not a fatal error, but it may be a
# symptom of a deeper problem.
warnings.warn(f"skim '{key1}' has unknown time period '{key2}'")
continue

skim_key = (key1, key2) if sep else key1

self.omx_keys[skim_key] = skim_name

# Create a skim_conflicts set, which identifies any skims that have both
# time-dependent and time-agnostic versions. This condition in and of
# itself is not a fatal error, as it is possible to have both types of skims
# in the same data when using the legacy codebase. When using skim_dataset
# instead of skim_dictionary (the former is required when using sharrow) this
# condition is no longer allowed, although we can potentially recover if
# the user has specified instructions that certain skim variables are not
# to be loaded. The recovery option is checked later, in the skim_dataset
# module, as that is where the skim variables are actually loaded.
time_dependent_skims = defaultdict(set)
time_agnostic_skims = set()
for k, v in self.omx_keys.items():
if isinstance(k, tuple):
time_dependent_skims[k[0]].add(v)
else:
time_agnostic_skims.add(k)
self.skim_conflicts = {
k: v for k, v in time_dependent_skims.items() if k in time_agnostic_skims
}
if self.skim_conflicts:
msg = "some skims have both time-dependent and time-agnostic versions:"
for k in self.skim_conflicts:
msg += f"\n- {k}"
warnings.warn(msg)

# Count the number of skims in the omx file
self.num_skims = len(self.omx_keys)

# - key1_subkeys dict maps key1 to dict of subkeys with that key1
Expand Down
Loading
Loading