ActivitySim · jpn-- · Jul 29, 2025 · May 12, 2025 · May 12, 2025 · May 12, 2025
diff --git a/.github/workflows/core_tests.yml b/.github/workflows/core_tests.yml
@@ -36,7 +36,7 @@ jobs:
         uses: actions/setup-python@v5
         with:
           python-version-file: ".python-version"
-    
+
       - name: Install activitysim
         run: |
           uv sync --locked --only-group github-action
@@ -128,6 +128,8 @@ jobs:
         run: |
           uv run pytest --pyargs activitysim.cli
 
+      - run: uv run pytest test/test_skim_name_conflicts.py
+      - run: uv run pytest test/random_seed/test_random_seed.py
 
   builtin_regional_models:
     needs: foundation
@@ -166,7 +168,7 @@ jobs:
         uses: actions/setup-python@v5
         with:
           python-version-file: ".python-version"
-    
+
       - name: Install activitysim
         run: |
           uv sync --locked --only-group github-action
@@ -235,7 +237,7 @@ jobs:
         uses: actions/setup-python@v5
         with:
           python-version-file: ".python-version"
-    
+
       - name: Install activitysim
         run: |
           uv sync --locked --only-group github-action
@@ -277,7 +279,7 @@ jobs:
         uses: actions/setup-python@v5
         with:
           python-version-file: ".python-version"
-    
+
       - name: Install activitysim
         run: |
           uv sync --locked --only-group github-action
@@ -310,7 +312,7 @@ jobs:
         uses: actions/setup-python@v5
         with:
           python-version-file: ".python-version"
-    
+
       - name: Install activitysim
         run: |
           uv sync --locked --only-group github-action

diff --git a/activitysim/core/skim_dataset.py b/activitysim/core/skim_dataset.py
@@ -3,6 +3,7 @@
 import glob
 import logging
 import os
+import re
 import time
 from functools import partial
 from pathlib import Path
@@ -499,8 +500,6 @@ def _apply_digital_encoding(dataset, digital_encodings):
         As modified
     """
     if digital_encodings:
-        import re
-
         # apply once, before saving to zarr, will stick around in cache
         for encoding in digital_encodings:
             logger.info(f"applying zarr digital-encoding: {encoding}")
@@ -753,6 +752,68 @@ def load_skim_dataset_to_shared_memory(state, skim_tag="taz") -> xr.Dataset:
         d = None  # skims are not stored in shared memory, so we need to load them
     do_not_save_zarr = False
 
+    potential_conflicts = network_los_preload.skims_info.get(skim_tag).skim_conflicts
+    if potential_conflicts:
+        # There are some conflicts in the skims, where both time-dependent
+        # and time-agnostic skim with the same name are present.  We need
+        # to check we have sufficient ignore rules in place to correct this
+        # condition.
+
+        def _should_ignore(ignore, x):
+            if isinstance(ignore, str):
+                ignore = [ignore]
+            if ignore is not None:
+                for i in ignore:
+                    if re.match(i, x):
+                        return True
+            return False
+
+        ignore = state.settings.omx_ignore_patterns
+        problems = []
+        for time_agnostic, time_dependent in potential_conflicts.items():
+            # option 1, ignore all the time-dependent skims
+            # if this is fulfilled, we are ok and can proceed
+            if all((_should_ignore(ignore, i)) for i in time_dependent):
+                continue
+            # option 2, ignore the time-agnostic skim
+            # if this is fulfilled, we are ok and can proceed
+            if _should_ignore(ignore, time_agnostic):
+                continue
+            # otherwise, we have a problem.  collect all the problems
+            # and raise an error at the end listing all of them
+            problems.append(time_agnostic)
+        if problems:
+            solution_1 = "__.+'\n  - '^".join(problems)
+            solution_2 = "$'\n  - '^".join(problems)
+            # we have a problem, raise an error
+            error_message = (
+                f"skims {problems} are present in both time-dependent and time-agnostic formats.\n"
+                "Please add ignore rules to the omx_ignore_patterns setting to resolve this issue.\n"
+                "To ignore the time dependent skims, add the following to your settings file:\n"
+                "\n"
+                "omx_ignore_patterns:\n"
+                f"  - '^{solution_1}__.+'\n"
+                "\n"
+                "To ignore the time agnostic skims, add the following to your settings file:\n"
+                "\n"
+                "omx_ignore_patterns:\n"
+                f"  - '^{solution_2}$'\n"
+                "\n"
+                "You can also do some variation or combination of the two, as long as you resolve\n"
+                "the conflict(s). In addition, note that minor edits to model spec files may be\n"
+                "needed to accommodate these changes in how skim data is represented (e.g. changing\n"
+                "`odt_skims` to `od_skims`, or similar modifications wherever the offending variable\n"
+                "names are used).  Alternatively, you can modify the skim data in the source files to\n"
+                "remove the naming conflicts, which is typically done upstream of ActivitySim in\n"
+                "whatever tool you are using to create the skims in the first place.\n"
+                "\n"
+                "See [https://activitysim.github.io/?q=skims] for more information.\n"
+            )
+            # write the error message to the log
+            logger.error(error_message)
+            # raise an error to stop the run, put the entire error message there also
+            raise ValueError(error_message)
+
     if d is None:
         time_periods = _dedupe_time_periods(network_los_preload)
         if zarr_file:

diff --git a/activitysim/core/skim_dict_factory.py b/activitysim/core/skim_dict_factory.py
@@ -8,6 +8,7 @@
 import os
 import warnings
 from abc import ABC
+from collections import defaultdict
 
 import numpy as np
 import openmatrix as omx
@@ -86,6 +87,7 @@ def __init__(self, state, skim_tag, network_los):
         self.offset_map_name = None
         self.offset_map = None
         self.omx_keys = None
+        self.skim_conflicts = None
         self.base_keys = None
         self.block_offsets = None
 
@@ -117,7 +119,12 @@ def load_skim_info(self, state, skim_tag):
             logger.debug(f"load_skim_info {skim_tag} reading {omx_file_path}")
 
             with omx.open_file(omx_file_path, mode="r") as omx_file:
-                # fixme call to omx_file.shape() failing in windows p3.5
+
+                # Check the shape of the skims. All skim files loaded within this
+                # loop need to have the same shape.   For the first file, the
+                # shape is set to the omx_shape attribute, so we know what shape
+                # to expect.  For subsequent files, we check that the shape is the
+                # same as the first file.  If not, we raise an error.
                 if self.omx_shape is None:
                     self.omx_shape = tuple(
                         int(i) for i in omx_file.shape()
@@ -127,13 +134,24 @@ def load_skim_info(self, state, skim_tag):
                         int(i) for i in omx_file.shape()
                     ), f"Mismatch shape {self.omx_shape} != {omx_file.shape()}"
 
+                # Check that all the matrix names are unique across all the
+                # omx files. This check is only looking at the name as stored in
+                # the file, and is not processing any time period transformations.
+                # If duplicate names are found, a warning is issued. This is not
+                # a fatal error, but it is inefficient and may be symptom of a
+                # deeper problem.
                 for skim_name in omx_file.listMatrices():
                     if skim_name in self.omx_manifest:
                         warnings.warn(
                             f"duplicate skim '{skim_name}' found in {self.omx_manifest[skim_name]} and {omx_file.filename}"
                         )
                     self.omx_manifest[skim_name] = omx_file_path
 
+                # We load the offset map if it exists. This is expected to be
+                # a 1D array of integers that that gives ID values for each TAZ
+                # in the skims. ActivitySim expects there to be only one such
+                # mapping, although it can appear multiple times (e.g. once in
+                # each file).
                 for m in omx_file.listMappings():
                     if self.offset_map is None:
                         self.offset_map_name = m
@@ -146,21 +164,54 @@ def load_skim_info(self, state, skim_tag):
                                 f"Multiple mappings in omx file: {self.offset_map_name} != {m}"
                             )
 
-        # - omx_keys dict maps skim key to omx_key
-        # DISTWALK: DISTWALK
-        # ('DRV_COM_WLK_BOARDS', 'AM'): DRV_COM_WLK_BOARDS__AM, ...
+        # Create the `omx_keys` mapping, which connects skim key to omx_key.
+        # The skim key is either a single string that names a skim that is not
+        # time-dependent, or a 2-tuple of strings which names a skim and a time
+        # period. The omx_key is the original name of the skim in the omx file.
+        # For non-time-dependent skims, the omx_key is the same as the skim key,
+        # e.g. DISTWALK: DISTWALK.  For time-dependent skims, the omx_key is the
+        # skim key with the time period appended,
+        # e.g. ('DRV_COM_WLK_BOARDS', 'AM'): DRV_COM_WLK_BOARDS__AM.
         self.omx_keys = dict()
         for skim_name in self.omx_manifest.keys():
             key1, sep, key2 = skim_name.partition("__")
-
             # - ignore composite tags not in dim3_tags_to_load
             if dim3_tags_to_load and sep and key2 not in dim3_tags_to_load:
+                # If a skim is found that has a time period that is not one of
+                # the known named time periods, a warning is issued, and that
+                # skim is ignored. This is not a fatal error, but it may be a
+                # symptom of a deeper problem.
+                warnings.warn(f"skim '{key1}' has unknown time period '{key2}'")
                 continue
-
             skim_key = (key1, key2) if sep else key1
-
             self.omx_keys[skim_key] = skim_name
 
+        # Create a skim_conflicts set, which identifies any skims that have both
+        # time-dependent and time-agnostic versions. This condition in and of
+        # itself is not a fatal error, as it is possible to have both types of skims
+        # in the same data when using the legacy codebase. When using skim_dataset
+        # instead of skim_dictionary (the former is required when using sharrow) this
+        # condition is no longer allowed, although we can potentially recover if
+        # the user has specified instructions that certain skim variables are not
+        # to be loaded. The recovery option is checked later, in the skim_dataset
+        # module, as that is where the skim variables are actually loaded.
+        time_dependent_skims = defaultdict(set)
+        time_agnostic_skims = set()
+        for k, v in self.omx_keys.items():
+            if isinstance(k, tuple):
+                time_dependent_skims[k[0]].add(v)
+            else:
+                time_agnostic_skims.add(k)
+        self.skim_conflicts = {
+            k: v for k, v in time_dependent_skims.items() if k in time_agnostic_skims
+        }
+        if self.skim_conflicts:
+            msg = "some skims have both time-dependent and time-agnostic versions:"
+            for k in self.skim_conflicts:
+                msg += f"\n- {k}"
+            warnings.warn(msg)
+
+        # Count the number of skims in the omx file
         self.num_skims = len(self.omx_keys)
 
         # - key1_subkeys dict maps key1 to dict of subkeys with that key1