diff --git a/.github/workflows/core_tests.yml b/.github/workflows/core_tests.yml index 1afe9270a6..134242cd67 100644 --- a/.github/workflows/core_tests.yml +++ b/.github/workflows/core_tests.yml @@ -10,7 +10,7 @@ on: - '*' env: - CACHE_NUMBER: 0 # increase to reset cache manually + CACHE_NUMBER: 2 # increase to reset cache manually jobs: foundation: @@ -24,10 +24,10 @@ jobs: name: linux-64-py${{ matrix.python-version }} runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Setup Mambaforge - uses: conda-incubator/setup-miniconda@v2 + uses: conda-incubator/setup-miniconda@v3 with: miniforge-variant: Mambaforge miniforge-version: latest @@ -38,7 +38,7 @@ jobs: - name: Set cache date for year and month run: echo "DATE=$(date +'%Y%m')" >> $GITHUB_ENV - - uses: actions/cache@v3 + - uses: actions/cache@v4 with: path: /usr/share/miniconda3/envs/asim-test key: linux-64-conda-${{ hashFiles('conda-environments/github-actions-tests.yml') }}-${{ env.DATE }}-${{ env.CACHE_NUMBER }} @@ -47,19 +47,6 @@ jobs: - name: Update environment run: | mamba env update -n asim-test -f conda-environments/github-actions-tests.yml - mamba install --yes \ - "psutil=5.9.5" \ - "pydantic=2.6.1" \ - "pypyr=5.8.0" \ - "pytables=3.6.1" \ - "pytest-cov" \ - "pytest-regressions=2.5.0" \ - "scikit-learn=1.2.2" \ - "sharrow>=2.6.0" \ - "simwrapper=1.8.5" \ - "xarray=2023.2.0" \ - "zarr=2.14.2" \ - "zstandard=0.21.0" if: steps.cache.outputs.cache-hit != 'true' - name: Install activitysim @@ -109,12 +96,10 @@ jobs: include: - os: macos-latest label: macOS - prefix: /Users/runner/miniconda3/envs/asim-test python-version: "3.10" - os: windows-latest label: win-64 - prefix: C:\Miniconda3\envs\asim-test python-version: "3.10" defaults: @@ -124,10 +109,10 @@ jobs: name: ${{ matrix.label }} runs-on: ${{ matrix.os }} steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Setup Mambaforge - uses: conda-incubator/setup-miniconda@v2 + uses: conda-incubator/setup-miniconda@v3 with: miniforge-variant: Mambaforge miniforge-version: latest @@ -138,28 +123,15 @@ jobs: - name: Set cache date for year and month run: echo "DATE=$(date +'%Y%m')" >> $GITHUB_ENV - - uses: actions/cache@v3 + - uses: actions/cache@v4 with: - path: ${{ matrix.prefix }} + path: ${{ env.CONDA }}/envs key: ${{ matrix.label }}-conda-${{ hashFiles('conda-environments/github-actions-tests.yml') }}-${{ env.DATE }}-${{ env.CACHE_NUMBER }} id: cache - name: Update environment run: | mamba env update -n asim-test -f conda-environments/github-actions-tests.yml - mamba install --yes \ - "psutil=5.9.5" \ - "pydantic=2.6.1" \ - "pypyr=5.8.0" \ - "pytables=3.6.1" \ - "pytest-cov" \ - "pytest-regressions=2.5.0" \ - "scikit-learn=1.2.2" \ - "sharrow>=2.6.0" \ - "simwrapper=1.8.5" \ - "xarray=2023.2.0" \ - "zarr=2.14.2" \ - "zstandard=0.21.0" if: steps.cache.outputs.cache-hit != 'true' - name: Install activitysim @@ -199,7 +171,6 @@ jobs: builtin_regional_models: needs: foundation env: - mamba-env-prefix: /usr/share/miniconda3/envs/asim-test python-version: "3.10" label: linux-64 strategy: @@ -221,10 +192,10 @@ jobs: name: ${{ matrix.region }} runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Setup Mambaforge - uses: conda-incubator/setup-miniconda@v2 + uses: conda-incubator/setup-miniconda@v3 with: miniforge-variant: Mambaforge miniforge-version: latest @@ -235,28 +206,15 @@ jobs: - name: Set cache date for year and month run: echo "DATE=$(date +'%Y%m')" >> $GITHUB_ENV - - uses: actions/cache@v3 + - uses: actions/cache@v4 with: - path: ${{ env.mamba-env-prefix }} + path: ${{ env.CONDA }}/envs key: ${{ env.label }}-conda-${{ hashFiles('conda-environments/github-actions-tests.yml') }}-${{ env.DATE }}-${{ env.CACHE_NUMBER }} id: cache - name: Update environment run: | mamba env update -n asim-test -f conda-environments/github-actions-tests.yml - mamba install --yes \ - "psutil=5.9.5" \ - "pydantic=2.6.1" \ - "pypyr=5.8.0" \ - "pytables=3.6.1" \ - "pytest-cov" \ - "pytest-regressions=2.5.0" \ - "scikit-learn=1.2.2" \ - "sharrow>=2.6.0" \ - "simwrapper=1.8.5" \ - "xarray=2023.2.0" \ - "zarr=2.14.2" \ - "zstandard=0.21.0" if: steps.cache.outputs.cache-hit != 'true' - name: Install activitysim @@ -301,14 +259,19 @@ jobs: external_regional_models: needs: foundation env: - mamba-env-prefix: /usr/share/miniconda3/envs/asim-test python-version: "3.10" label: linux-64 strategy: matrix: - region: - - prototype_mtc - - prototype_psrc_in_development + include: + - region: Standard 1-Zone Example (MTC) + region-org: ActivitySim + region-repo: activitysim-prototype-mtc + region-branch: extended + - region: Standard 2-Zone Example (SANDAG) + region-org: ActivitySim + region-repo: sandag-abm3-example + region-branch: main fail-fast: false defaults: run: @@ -316,10 +279,11 @@ jobs: name: ${{ matrix.region }} runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 + - name: Checkout ActivitySim + uses: actions/checkout@v4 - name: Setup Mambaforge - uses: conda-incubator/setup-miniconda@v2 + uses: conda-incubator/setup-miniconda@v3 with: miniforge-variant: Mambaforge miniforge-version: latest @@ -330,10 +294,10 @@ jobs: - name: Set cache date for year and month run: echo "DATE=$(date +'%Y%m')" >> $GITHUB_ENV - - uses: actions/cache@v3 + - uses: actions/cache@v4 with: path: | - ${{ env.mamba-env-prefix }} + ${{ env.CONDA }}/envs ~/.cache/ActivitySim key: ${{ env.label }}-conda-${{ hashFiles('conda-environments/github-actions-tests.yml') }}-${{ env.DATE }}-${{ env.CACHE_NUMBER }} id: cache @@ -341,19 +305,6 @@ jobs: - name: Update environment run: | mamba env update -n asim-test -f conda-environments/github-actions-tests.yml - mamba install --yes \ - "psutil=5.9.5" \ - "pydantic=2.6.1" \ - "pypyr=5.8.0" \ - "pytables=3.6.1" \ - "pytest-cov" \ - "pytest-regressions=2.5.0" \ - "scikit-learn=1.2.2" \ - "sharrow>=2.6.0" \ - "simwrapper=1.8.5" \ - "xarray=2023.2.0" \ - "zarr=2.14.2" \ - "zstandard=0.21.0" if: steps.cache.outputs.cache-hit != 'true' - name: Install activitysim @@ -361,21 +312,28 @@ jobs: # are in the conda environment defined above. Also, this avoids pip getting # confused and reinstalling tables (pytables). run: | - python -m pip install -e . --no-deps + python -m pip install . --no-deps - name: Conda checkup run: | mamba info -a mamba list + - name: Checkout Example + uses: actions/checkout@v4 + with: + repository: '${{ matrix.region-org }}/${{ matrix.region-repo }}' + ref: '${{ matrix.region-branch }}' + path: '${{ matrix.region-repo }}' + - name: Test ${{ matrix.region }} run: | - python -m activitysim test ${{ matrix.region }} + cd ${{ matrix.region-repo }}/test + python -m pytest . random_seed_generation: needs: foundation env: - mamba-env-prefix: /usr/share/miniconda3/envs/asim-test python-version: "3.10" label: linux-64 defaults: @@ -385,10 +343,10 @@ jobs: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Setup Mambaforge - uses: conda-incubator/setup-miniconda@v2 + uses: conda-incubator/setup-miniconda@v3 with: miniforge-variant: Mambaforge miniforge-version: latest @@ -399,28 +357,15 @@ jobs: - name: Set cache date for year and month run: echo "DATE=$(date +'%Y%m')" >> $GITHUB_ENV - - uses: actions/cache@v3 + - uses: actions/cache@v4 with: - path: ${{ env.mamba-env-prefix }} + path: ${{ env.CONDA }}/envs key: ${{ env.label }}-conda-${{ hashFiles('conda-environments/github-actions-tests.yml') }}-${{ env.DATE }}-${{ env.CACHE_NUMBER }} id: cache - name: Update environment run: | mamba env update -n asim-test -f conda-environments/github-actions-tests.yml - mamba install --yes \ - "psutil=5.9.5" \ - "pydantic=2.6.1" \ - "pypyr=5.8.0" \ - "pytables=3.6.1" \ - "pytest-cov" \ - "pytest-regressions=2.5.0" \ - "scikit-learn=1.2.2" \ - "sharrow>=2.6.0" \ - "simwrapper=1.8.5" \ - "xarray=2023.2.0" \ - "zarr=2.14.2" \ - "zstandard=0.21.0" if: steps.cache.outputs.cache-hit != 'true' - name: Install activitysim @@ -442,7 +387,6 @@ jobs: estimation_mode: needs: foundation env: - mamba-env-prefix: /usr/share/miniconda3/envs/asim-test python-version: "3.10" label: linux-64 defaults: @@ -451,10 +395,10 @@ jobs: name: estimation_mode_test runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Setup Mambaforge - uses: conda-incubator/setup-miniconda@v2 + uses: conda-incubator/setup-miniconda@v3 with: miniforge-variant: Mambaforge miniforge-version: latest @@ -465,28 +409,15 @@ jobs: - name: Set cache date for year and month run: echo "DATE=$(date +'%Y%m')" >> $GITHUB_ENV - - uses: actions/cache@v3 + - uses: actions/cache@v4 with: - path: ${{ env.mamba-env-prefix }} + path: ${{ env.CONDA }}/envs key: ${{ env.label }}-conda-${{ hashFiles('conda-environments/github-actions-tests.yml') }}-${{ env.DATE }}-${{ env.CACHE_NUMBER }} id: cache - name: Update environment run: | mamba env update -n asim-test -f conda-environments/github-actions-tests.yml - mamba install --yes \ - "psutil=5.9.5" \ - "pydantic=2.6.1" \ - "pypyr=5.8.0" \ - "pytables=3.6.1" \ - "pytest-cov" \ - "pytest-regressions=2.5.0" \ - "scikit-learn=1.2.2" \ - "sharrow>=2.6.0" \ - "simwrapper=1.8.5" \ - "xarray=2023.2.0" \ - "zarr=2.14.2" \ - "zstandard=0.21.0" if: steps.cache.outputs.cache-hit != 'true' - name: Install Larch @@ -517,7 +448,7 @@ jobs: run: shell: bash -l {0} steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 with: fetch-depth: 0 # get all tags, lets setuptools_scm do its thing - name: Set up Python 3.10 @@ -525,7 +456,7 @@ jobs: with: python-version: "3.10" - name: Install dependencies - uses: conda-incubator/setup-miniconda@v2 + uses: conda-incubator/setup-miniconda@v3 with: miniforge-variant: Mambaforge miniforge-version: latest diff --git a/activitysim/abm/models/accessibility.py b/activitysim/abm/models/accessibility.py index 4c87eddb78..3c6f98ae29 100644 --- a/activitysim/abm/models/accessibility.py +++ b/activitysim/abm/models/accessibility.py @@ -23,10 +23,25 @@ class AccessibilitySettings(PydanticReadable): CONSTANTS: dict[str, Any] = {} land_use_columns: list[str] = [] - """Only include the these columns in the computational tables + """Only include the these columns in the computational tables. + + This setting joins land use columns to the accessibility destinations. + + Memory usage is reduced by only listing the minimum columns needed by + the SPEC, and nothing extra. + """ + + land_use_columns_orig: list[str] = [] + """Join these land use columns to the origin zones. + + This setting joins land use columns to the accessibility origins. + To disambiguate from the destination land use columns, the names of the + columns added will be prepended with 'landuse_orig_'. Memory usage is reduced by only listing the minimum columns needed by the SPEC, and nothing extra. + + .. versionadded:: 1.3 """ SPEC: str = "accessibility.csv" @@ -58,6 +73,7 @@ def compute_accessibilities_for_zones( state: workflow.State, accessibility_df: pd.DataFrame, land_use_df: pd.DataFrame, + orig_land_use_df: pd.DataFrame | None, assignment_spec: dict, constants: dict, network_los: los.Network_LOS, @@ -72,6 +88,7 @@ def compute_accessibilities_for_zones( state : workflow.State accessibility_df : pd.DataFrame land_use_df : pd.DataFrame + orig_land_use_df : pd.DataFrame | None assignment_spec : dict constants : dict network_los : los.Network_LOS @@ -104,6 +121,12 @@ def compute_accessibilities_for_zones( logger.debug(f"{trace_label}: tiling land_use_columns into od_data") for c in land_use_df.columns: od_data[c] = np.tile(land_use_df[c].to_numpy(), orig_zone_count) + if orig_land_use_df is not None: + logger.debug(f"{trace_label}: repeating orig_land_use_columns into od_data") + for c in orig_land_use_df: + od_data[f"landuse_orig_{c}"] = np.repeat( + orig_land_use_df[c], dest_zone_count + ) logger.debug(f"{trace_label}: converting od_data to DataFrame") od_df = pd.DataFrame(od_data) logger.debug(f"{trace_label}: dropping od_data") @@ -236,6 +259,11 @@ def compute_accessibility( land_use_df = land_use land_use_df = land_use_df[land_use_columns] + if model_settings.land_use_columns_orig: + orig_land_use_df = land_use[model_settings.land_use_columns_orig] + else: + orig_land_use_df = None + logger.info( f"Running {trace_label} with {len(accessibility_df.index)} orig zones " f"{len(land_use_df)} dest zones" @@ -252,10 +280,15 @@ def compute_accessibility( ) in chunk.adaptive_chunked_choosers( state, accessibility_df, trace_label, explicit_chunk_size=explicit_chunk_size ): + if orig_land_use_df is not None: + orig_land_use_df_chunk = orig_land_use_df.loc[chooser_chunk.index] + else: + orig_land_use_df_chunk = None accessibilities = compute_accessibilities_for_zones( state, chooser_chunk, land_use_df, + orig_land_use_df_chunk, assignment_spec, constants, network_los, diff --git a/activitysim/abm/tables/landuse.py b/activitysim/abm/tables/landuse.py index 9abc0c2e71..1668e7b356 100644 --- a/activitysim/abm/tables/landuse.py +++ b/activitysim/abm/tables/landuse.py @@ -21,6 +21,13 @@ def land_use(state: workflow.State): df = read_input_table(state, "land_use") + # try to make life easy for everybody by keeping everything in canonical order + # but as long as coalesce_pipeline doesn't sort tables it coalesces, it might not stay in order + # so even though we do this, anyone downstream who depends on it, should look out for themselves... + if not df.index.is_monotonic_increasing: + logger.info(f"sorting land_use index") + df = df.sort_index() + sharrow_enabled = state.settings.sharrow if sharrow_enabled: err_msg = ( @@ -34,12 +41,6 @@ def land_use(state: workflow.State): assert df.index[-1] == len(df.index) - 1, err_msg assert df.index.dtype.kind == "i", err_msg - # try to make life easy for everybody by keeping everything in canonical order - # but as long as coalesce_pipeline doesn't sort tables it coalesces, it might not stay in order - # so even though we do this, anyone downstream who depends on it, should look out for themselves... - if not df.index.is_monotonic_increasing: - df = df.sort_index() - logger.info("loaded land_use %s" % (df.shape,)) buffer = io.StringIO() df.info(buf=buffer) @@ -65,7 +66,11 @@ def land_use_taz(state: workflow.State): "no land_use_taz defined in input_table_list, constructing " "from discovered TAZ values in land_use" ) - unique_tazs = np.unique(land_use["TAZ"]) + # use original TAZ values if available, otherwise use current TAZ values + if state.settings.recode_pipeline_columns and "_original_TAZ" in land_use: + unique_tazs = np.unique(land_use["_original_TAZ"]) + else: + unique_tazs = np.unique(land_use["TAZ"]) if state.settings.recode_pipeline_columns: df = pd.Series( unique_tazs, diff --git a/activitysim/abm/test/test_agg_accessibility.py b/activitysim/abm/test/test_agg_accessibility.py index 4015e35cb0..2c00252dc5 100644 --- a/activitysim/abm/test/test_agg_accessibility.py +++ b/activitysim/abm/test/test_agg_accessibility.py @@ -61,3 +61,48 @@ def test_agg_accessibility_explicit_chunking(state, dataframe_regression): ) df = state.get_dataframe("accessibility") dataframe_regression.check(df, basename="simple_agg_accessibility") + + +@pytest.mark.parametrize("explicit_chunk", [0, 5]) +def test_agg_accessibility_orig_land_use( + state, dataframe_regression, tmp_path, explicit_chunk +): + # set top level settings + state.settings.chunk_size = 0 + state.settings.sharrow = False + state.settings.chunk_training_mode = "explicit" + + # read the accessibility settings and override the explicit chunk size to 5 + model_settings = AccessibilitySettings.read_settings_file( + state.filesystem, "accessibility.yaml" + ) + model_settings.explicit_chunk = explicit_chunk + model_settings.land_use_columns = ["RETEMPN", "TOTEMP", "TOTACRE"] + model_settings.land_use_columns_orig = ["TOTACRE"] + + land_use = state.get_dataframe("land_use") + accessibility = state.get_dataframe("accessibility") + + tmp_spec = tmp_path / "tmp-accessibility.csv" + tmp_spec.open("w").write( + """Description,Target,Expression +orig_acreage,orig_acreage,df.landuse_orig_TOTACRE +dest_acreage,dest_acreage,df.TOTACRE +""" + ) + model_settings.SPEC = str(tmp_spec) + + # state.filesystem.get_config_file_path(model_settings.SPEC) + + compute_accessibility( + state, + land_use, + accessibility, + state.get("network_los"), + model_settings, + model_settings_file_name="accessibility.yaml", + trace_label="compute_accessibility", + output_table_name="accessibility", + ) + df = state.get_dataframe("accessibility") + dataframe_regression.check(df, basename="simple_agg_accessibility_orig_land_use") diff --git a/activitysim/abm/test/test_agg_accessibility/simple_agg_accessibility_orig_land_use.csv b/activitysim/abm/test/test_agg_accessibility/simple_agg_accessibility_orig_land_use.csv new file mode 100644 index 0000000000..6e269bec4c --- /dev/null +++ b/activitysim/abm/test/test_agg_accessibility/simple_agg_accessibility_orig_land_use.csv @@ -0,0 +1,26 @@ +zone_id,orig_acreage,dest_acreage +0,6.2314652154886145,7.3737508868303339 +1,6.657368991274053,7.3737508868303339 +2,5.909440711629391,7.3737508868303339 +3,6.1810513148933497,7.3737508868303339 +4,7.1842500057933423,7.3737508868303339 +5,6.5875500148247959,7.3737508868303339 +6,7.026426808699636,7.3737508868303339 +7,7.1514854639047352,7.3737508868303339 +8,7.9377317752601089,7.3737508868303339 +9,7.5167053007413269,7.3737508868303339 +10,7.6138186848086287,7.3737508868303339 +11,7.1955623436220684,7.3737508868303339 +12,6.4975288537722626,7.3737508868303339 +13,6.6411821697405919,7.3737508868303339 +14,6.5701824369168911,7.3737508868303339 +15,8.034631032923107,7.3737508868303339 +16,8.2449906898128429,7.3737508868303339 +17,7.8948771916168834,7.3737508868303339 +18,8.0507033814702993,7.3737508868303339 +19,7.8073066868519945,7.3737508868303339 +20,7.5875638951029023,7.3737508868303339 +21,7.6932537206062692,7.3737508868303339 +22,7.7279755421055585,7.3737508868303339 +23,6.8834625864130921,7.3737508868303339 +24,6.2653012127377101,7.3737508868303339 diff --git a/activitysim/core/configuration/filesystem.py b/activitysim/core/configuration/filesystem.py index ce50becd4d..27496b1c73 100644 --- a/activitysim/core/configuration/filesystem.py +++ b/activitysim/core/configuration/filesystem.py @@ -425,9 +425,12 @@ def persist_sharrow_cache(self) -> None: -------- FileSystem.sharrow_cache_dir """ + import sharrow as sh + + sharrow_minor_version = ".".join(sh.__version__.split(".")[:2]) self.sharrow_cache_dir = Path( platformdirs.user_cache_dir(appname="ActivitySim") - ).joinpath(f"numba-{numba.__version__}") + ).joinpath(f"sharrow-{sharrow_minor_version}-numba-{numba.__version__}") self.sharrow_cache_dir.mkdir(parents=True, exist_ok=True) def _cascading_input_file_path( diff --git a/activitysim/core/configuration/top.py b/activitysim/core/configuration/top.py index 0b61210373..024f878a46 100644 --- a/activitysim/core/configuration/top.py +++ b/activitysim/core/configuration/top.py @@ -3,7 +3,7 @@ from pathlib import Path from typing import Any, Literal -from pydantic import validator +from pydantic import model_validator, validator from activitysim.core.configuration.base import PydanticBase, Union @@ -476,6 +476,26 @@ class Settings(PydanticBase, extra="allow", validate_assignment=True): True will disable the use of zarr. """ + store_skims_in_shm: bool = True + """ + Store skim dataset in shared memory. + + .. versionadded:: 1.3 + + By default, if sharrow is enabled (any setting other than false), ActivitySim + stores the skim dataset in shared memory. This can be changed by setting this + option to False, in which case skims are stores in "typical" process-local + memory. Note that storing skims in shared memory is pretty much required for + multiprocessing, unless you have a very small model or an absurdly large amount + of RAM. + """ + + @model_validator(mode="after") + def _check_store_skims_in_shm(self): + if not self.store_skims_in_shm and self.multiprocess: + raise ValueError("store_skims_in_shm requires multiprocess to be False") + return self + instrument: bool = False """ Use `pyinstrument` to profile component performance. @@ -585,6 +605,18 @@ class Settings(PydanticBase, extra="allow", validate_assignment=True): compatible with using :py:attr:`Settings.sharrow`. """ + omx_ignore_patterns: list[str] = [] + """ + List of regex patterns to ignore when reading OMX files. + + This is useful if you have tables in your OMX file that you don't want to + read in. For example, if you have both time-of-day values and time-independent + values (e.g., "BIKE_TIME" and "BIKE_TIME__AM"), you can ignore the time-of-day + values by setting this to ["BIKE_TIME__.+"]. + + .. versionadded:: 1.3 + """ + keep_mem_logs: bool = False pipeline_complib: str = "NOTSET" @@ -614,6 +646,7 @@ class Settings(PydanticBase, extra="allow", validate_assignment=True): "trace_hh_id", "memory_profile", "instrument", + "sharrow", ) """ Setting to log on startup. diff --git a/activitysim/core/interaction_simulate.py b/activitysim/core/interaction_simulate.py index 0a863b2e7f..f6f97dd200 100644 --- a/activitysim/core/interaction_simulate.py +++ b/activitysim/core/interaction_simulate.py @@ -174,7 +174,7 @@ def replace_in_index_level(mi, level, *repls): for i1, i2 in zip(exprs, labels): logger.debug(f" - expr: {i1}: {i2}") - timelogger.mark("sharrow preamble", True, logger, trace_label) + timelogger.mark("sharrow interact preamble", True, logger, trace_label) sh_util, sh_flow, sh_tree = apply_flow( state, @@ -197,10 +197,10 @@ def replace_in_index_level(mi, level, *repls): # if not testing sharrow, we are done with this object now. del sh_util - timelogger.mark("sharrow flow", True, logger, trace_label) + timelogger.mark("sharrow interact flow", True, logger, trace_label) else: sh_util, sh_flow, sh_tree = None, None, None - timelogger.mark("sharrow flow", False) + timelogger.mark("sharrow interact flow", False) if ( utilities is None @@ -425,7 +425,7 @@ def to_series(x): dtype=np.float32, ) logger.info("finish sh_flow load dataarray") - sh_utility_fat = sh_utility_fat[trace_rows, :] + # sh_utility_fat = sh_utility_fat[trace_rows, :] # trace selection above, do not repeat sh_utility_fat = sh_utility_fat.to_dataframe("vals") try: sh_utility_fat = sh_utility_fat.unstack("expressions") @@ -541,53 +541,74 @@ def to_series(x): retrace_eval_parts = {} re_trace_df = df.iloc[re_trace] - for expr, label, coefficient in zip(exprs, labels, spec.iloc[:, 0]): - if expr.startswith("_"): - target = expr[: expr.index("@")] - rhs = expr[expr.index("@") + 1 :] - v = to_series(eval(rhs, globals(), locals_d)) - locals_d[target] = v - if trace_eval_results is not None: - trace_eval_results[expr] = v.iloc[re_trace] - continue - if expr.startswith("@"): - v = to_series(eval(expr[1:], globals(), locals_d)) - else: - v = df.eval(expr, resolvers=[locals_d]) - if check_for_variability and v.std() == 0: - logger.info( - "%s: no variability (%s) in: %s" - % (trace_label, v.iloc[0], expr) + with compute_settings.pandas_option_context(): + for expr, label, coefficient in zip( + exprs, labels, spec.iloc[:, 0] + ): + if expr.startswith("_"): + target = expr[: expr.index("@")] + rhs = expr[expr.index("@") + 1 :] + v = to_series(eval(rhs, globals(), locals_d)) + locals_d[target] = v + if trace_eval_results is not None: + trace_eval_results[expr] = v.iloc[re_trace] + continue + if expr.startswith("@"): + v = to_series(eval(expr[1:], globals(), locals_d)) + else: + v = df.eval(expr, resolvers=[locals_d]) + if check_for_variability and v.std() == 0: + logger.info( + "%s: no variability (%s) in: %s" + % (trace_label, v.iloc[0], expr) + ) + no_variability += 1 + retrace_eval_data[expr] = v.iloc[re_trace] + k = "partial utility (coefficient = %s) for %s" % ( + coefficient, + expr, + ) + retrace_eval_parts[k] = ( + v.iloc[re_trace] * coefficient + ).astype("float") + retrace_eval_data_ = pd.concat(retrace_eval_data, axis=1) + retrace_eval_parts_ = pd.concat(retrace_eval_parts, axis=1) + + re_sh_flow_load = sh_flow.load(sh_tree, dtype=np.float32) + re_sh_flow_load_ = re_sh_flow_load[re_trace] + + use_bottleneck = pd.get_option("compute.use_bottleneck") + use_numexpr = pd.get_option("compute.use_numexpr") + use_numba = pd.get_option("compute.use_numba") + + look_for_problems_here = np.where( + ~np.isclose( + re_sh_flow_load_[ + :, + ~spec.index.get_level_values(0).str.startswith("_"), + ], + retrace_eval_data_.values.astype(np.float32), ) - no_variability += 1 - retrace_eval_data[expr] = v.iloc[re_trace] - k = "partial utility (coefficient = %s) for %s" % ( - coefficient, - expr, - ) - retrace_eval_parts[k] = (v.iloc[re_trace] * coefficient).astype( - "float" - ) - retrace_eval_data_ = pd.concat(retrace_eval_data, axis=1) - retrace_eval_parts_ = pd.concat(retrace_eval_parts, axis=1) - - re_sh_flow_load = sh_flow.load(sh_tree, dtype=np.float32) - re_sh_flow_load_ = re_sh_flow_load[re_trace] - - use_bottleneck = pd.get_option("compute.use_bottleneck") - use_numexpr = pd.get_option("compute.use_numexpr") - use_numba = pd.get_option("compute.use_numba") - - look_for_problems_here = np.where( - ~np.isclose( - re_sh_flow_load_[ - :, ~spec.index.get_level_values(0).str.startswith("_") - ], - retrace_eval_data_.values.astype(np.float32), ) - ) - raise # enter debugger now to see what's up + if len(look_for_problems_here) == 2: + # the first index is the row index, which is probably may different rows + # the second is column index, hopefully only a few unique values + problem_col_indexes = np.unique(look_for_problems_here[1]) + problem_cols = list( + retrace_eval_data_.columns[problem_col_indexes] + ) + print("problem expressions:\n", "\n".join(problem_cols)) + + MISMATCH_sharrow = re_sh_flow_load_[ + :, + ~spec.index.get_level_values(0).str.startswith("_"), + ][:, problem_col_indexes] + MISMATCH_legacy = retrace_eval_data_.iloc[ + :, problem_col_indexes + ] + + raise # enter debugger now to see what's up timelogger.mark("sharrow interact test", True, logger, trace_label) logger.info(f"utilities.dtypes {trace_label}\n{utilities.dtypes}") diff --git a/activitysim/core/skim_dataset.py b/activitysim/core/skim_dataset.py index 8421cb6c7d..1ed871ec0f 100644 --- a/activitysim/core/skim_dataset.py +++ b/activitysim/core/skim_dataset.py @@ -714,6 +714,7 @@ def load_skim_dataset_to_shared_memory(state, skim_tag="taz") -> xr.Dataset: omx_file_paths = state.filesystem.expand_input_file_list( network_los_preload.omx_file_names(skim_tag), ) + omx_file_handles = [] zarr_file = network_los_preload.zarr_file_name(skim_tag) if state.settings.disable_zarr: @@ -746,7 +747,10 @@ def load_skim_dataset_to_shared_memory(state, skim_tag="taz") -> xr.Dataset: else: remapper = None - d = _use_existing_backing_if_valid(backing, omx_file_paths, skim_tag) + if state.settings.store_skims_in_shm: + d = _use_existing_backing_if_valid(backing, omx_file_paths, skim_tag) + else: + d = None # skims are not stored in shared memory, so we need to load them do_not_save_zarr = False if d is None: @@ -768,8 +772,11 @@ def load_skim_dataset_to_shared_memory(state, skim_tag="taz") -> xr.Dataset: if d is None: if zarr_file and not do_not_save_zarr: logger.info("did not find zarr skims, loading omx") + omx_file_handles = [ + openmatrix.open_file(f, mode="r") for f in omx_file_paths + ] d = sh.dataset.from_omx_3d( - [openmatrix.open_file(f, mode="r") for f in omx_file_paths], + omx_file_handles, index_names=( ("otap", "dtap", "time_period") if skim_tag == "tap" @@ -777,6 +784,7 @@ def load_skim_dataset_to_shared_memory(state, skim_tag="taz") -> xr.Dataset: ), time_periods=time_periods, max_float_precision=max_float_precision, + ignore=state.settings.omx_ignore_patterns, ) if zarr_file: @@ -835,6 +843,7 @@ def load_skim_dataset_to_shared_memory(state, skim_tag="taz") -> xr.Dataset: else: land_use_zone_id = None + dask_required = False if network_los_preload.zone_system == ONE_ZONE: # check TAZ alignment for ONE_ZONE system. # other systems use MAZ for most lookups, which dynamically @@ -845,6 +854,7 @@ def load_skim_dataset_to_shared_memory(state, skim_tag="taz") -> xr.Dataset: except AssertionError as err: logger.info(f"otaz realignment required\n{err}") d = d.reindex(otaz=land_use_zone_id) + dask_required = True else: logger.info("otaz alignment ok") d["otaz"] = land_use.index.to_numpy() @@ -858,6 +868,7 @@ def load_skim_dataset_to_shared_memory(state, skim_tag="taz") -> xr.Dataset: except AssertionError as err: logger.info(f"dtaz realignment required\n{err}") d = d.reindex(dtaz=land_use_zone_id) + dask_required = True else: logger.info("dtaz alignment ok") d["dtaz"] = land_use.index.to_numpy() @@ -866,10 +877,34 @@ def load_skim_dataset_to_shared_memory(state, skim_tag="taz") -> xr.Dataset: np.testing.assert_array_equal(land_use.index, d.dtaz) if d.shm.is_shared_memory: + for f in omx_file_handles: + f.close() + return d + elif not state.settings.store_skims_in_shm: + logger.info( + "store_skims_in_shm is False, keeping skims in process-local memory" + ) return d else: logger.info("writing skims to shared memory") - return d.shm.to_shared_memory(backing, mode="r") + if dask_required: + # setting `load` to True uses dask to load the data into memory + d_shared_mem = d.shm.to_shared_memory(backing, mode="r", load=True) + else: + # setting `load` to false then calling `reload_from_omx_3d` avoids + # using dask to load the data into memory, which is not performant + # on Windows for large datasets, but this only works if the data + # requires no realignment (i.e. the land use table and skims match + # exactly in order and length). + d_shared_mem = d.shm.to_shared_memory(backing, mode="r", load=False) + sh.dataset.reload_from_omx_3d( + d_shared_mem, + [str(i) for i in omx_file_paths], + ignore=state.settings.omx_ignore_patterns, + ) + for f in omx_file_handles: + f.close() + return d_shared_mem @workflow.cached_object diff --git a/activitysim/core/steps/output.py b/activitysim/core/steps/output.py index 97f50a57f5..b9d7cc13d6 100644 --- a/activitysim/core/steps/output.py +++ b/activitysim/core/steps/output.py @@ -370,6 +370,18 @@ def write_tables(state: workflow.State) -> None: decode_instruction = decode_instruction.strip() else: decode_filter = None + + if decode_instruction == "time_period": + map_col = list(state.network_settings.skim_time_periods.labels) + map_func = map_col.__getitem__ + revised_col = ( + pd.Series(dt.column(colname)).astype(int).map(map_func) + ) + dt = dt.drop([colname]).append_column( + colname, pa.array(revised_col) + ) + continue + if "." not in decode_instruction: lookup_col = decode_instruction source_table = table_name diff --git a/activitysim/examples/prototype_mtc_extended/configs/school_escorting.yaml b/activitysim/examples/prototype_mtc_extended/configs/school_escorting.yaml index ff04d214e9..9ecc711f99 100644 --- a/activitysim/examples/prototype_mtc_extended/configs/school_escorting.yaml +++ b/activitysim/examples/prototype_mtc_extended/configs/school_escorting.yaml @@ -1,5 +1,5 @@ # Some data values in the spec file will refer to missing values stored -# as NaN in the data. This requires the `sharrow_fastmath` setting to +# as NaN in the data. This requires the `fastmath` setting to # be set to `false` to avoid errors in the sharrow implementation. compute_settings: fastmath: false diff --git a/activitysim/examples/prototype_mtc_extended/configs/tour_mode_choice.yaml b/activitysim/examples/prototype_mtc_extended/configs/tour_mode_choice.yaml index 19f5014fcc..6550c28e5e 100644 --- a/activitysim/examples/prototype_mtc_extended/configs/tour_mode_choice.yaml +++ b/activitysim/examples/prototype_mtc_extended/configs/tour_mode_choice.yaml @@ -196,3 +196,6 @@ LOGSUM_CHOOSER_COLUMNS: MODE_CHOICE_LOGSUM_COLUMN_NAME: mode_choice_logsum + +compute_settings: + fastmath: false # use of isnan in utility functions requires fastmath=False diff --git a/conda-environments/activitysim-dev-base.yml b/conda-environments/activitysim-dev-base.yml index 4b6d5f1849..f7aa5c735b 100644 --- a/conda-environments/activitysim-dev-base.yml +++ b/conda-environments/activitysim-dev-base.yml @@ -37,7 +37,7 @@ dependencies: - nbconvert - nbformat - nbmake -- numba = 0.56.* +- numba = 0.57.* - numexpr - numpy = 1.23.* - numpydoc @@ -77,4 +77,4 @@ dependencies: - zstandard - pip: - - autodoc_pydantic \ No newline at end of file + - autodoc_pydantic diff --git a/conda-environments/activitysim-dev.yml b/conda-environments/activitysim-dev.yml index 33a5856fbf..107b4b355e 100644 --- a/conda-environments/activitysim-dev.yml +++ b/conda-environments/activitysim-dev.yml @@ -33,7 +33,7 @@ dependencies: - nbconvert - nbformat - nbmake -- numba = 0.56.* +- numba = 0.57.* - numexpr - numpy = 1.23.* - numpydoc @@ -61,7 +61,7 @@ dependencies: - ruff - setuptools_scm - scikit-learn = 1.2.* -- sharrow >= 2.6.0 +- sharrow >= 2.9.1 - simwrapper > 1.7 - snakeviz # for profiling - sparse diff --git a/conda-environments/docbuild.yml b/conda-environments/docbuild.yml index 6a9b88ef29..2553fbb17f 100644 --- a/conda-environments/docbuild.yml +++ b/conda-environments/docbuild.yml @@ -27,7 +27,7 @@ dependencies: - matplotlib - myst-nb - myst-parser -- numba >= 0.56.4 +- numba >= 0.57 - numpy >= 1.16.1, <2 - numpydoc - openmatrix >= 0.3.4.1 @@ -46,7 +46,7 @@ dependencies: - pyyaml >= 5.1 - requests >= 2.7 - scikit-learn >= 1.1 -- sharrow >= 2.6.0 +- sharrow >= 2.9.1 - simwrapper > 1.7 - sparse - sphinx-argparse diff --git a/conda-environments/github-actions-tests.yml b/conda-environments/github-actions-tests.yml index 3aa8ae1155..692ee11928 100644 --- a/conda-environments/github-actions-tests.yml +++ b/conda-environments/github-actions-tests.yml @@ -13,7 +13,7 @@ dependencies: - dask = 2023.3.2 - isort = 5.12.0 - nbmake = 1.4.6 -- numba = 0.56.4 +- numba = 0.57.* - numpy = 1.23.5 - openmatrix = 0.3.5.0 - orca = 1.8 @@ -32,7 +32,7 @@ dependencies: - requests = 2.28.* - ruff - scikit-learn = 1.2.* -- sharrow >= 2.6.0 +- sharrow >= 2.9.1 - simwrapper > 1.7 - sparse - xarray = 2023.2.* diff --git a/docs/dev-guide/using-sharrow.md b/docs/dev-guide/using-sharrow.md index e2b0093d43..04894d769c 100644 --- a/docs/dev-guide/using-sharrow.md +++ b/docs/dev-guide/using-sharrow.md @@ -174,6 +174,16 @@ memory, as the variable is computed and stored for every row in the entire dataf before it can be used in other expressions. In sharrow, temporary variables are allocated, used, and freed for each row separately, so no extra memory is required. +### Pandas-only Expressions + +In legacy mode, expressions can be evaluated using expressions that tap into the +full pandas library, including the ability to call pandas functions and methods +directly. This is not possible in sharrow, as the expressions are compiled into +numba code, which does not have access to the pandas library. If a pandas function +is needed, it must be called in a pre-processor. However, many pandas functions +can be replaced with numpy functions, which are available in numba. For example, +`df.income.fillna(0)` can be replaced with `np.nan_to_num(df.income)`. + ### Switchable Expressions As a general rule, it is best to write each utility expression in a manner that @@ -193,13 +203,23 @@ in several examples: `@np.log1p(size_terms.get(df.alt_dest, df.purpose)) # sharrow: np.log1p(size_terms['sizearray'])` Here, `size_terms` is a DataFrameMatrix class instance, a special class written into -ActivitySim to facilitate reading from a DataFrame as it it were a 2-d array. As it +ActivitySim to facilitate reading from a DataFrame as if it were a 2-d array. As it is a special purpose class written in Python, the numba compiler cannot handle it directly. Fortunately, sharrow provides an alternative: passing the size terms as a xarray `DataArray`. This has a slightly different interface, so the sharrow and legacy evaluation modes require different expressions. The switching expression is used to handle the DataFrameMatrix on the left (before "# sharrow:") and the DataArray on the right. +### Optional Variables + +In some cases, a variable may be used where it is available, but is not strictly +necessary for the model to run. For example, a model may have a reference to +mode choice logsums, but the model can still run without them, if it is being used +prior to when logsums are calculated. In this case, the variable can be accessed +using the `get` method, which allows for a default value if the variable is not found. + + `@df.get('mode_choice_logsum', 0)` + ### Performance Considerations Sharrow is usually expected to bring significant performance gains to ActivitySim. @@ -222,6 +242,17 @@ compute_settings: in the component's configuration yaml file. +In addition, by default sharrow also tries to optimize performance by setting the +`fastmath` flag to True in the numba compiler. This makes the compiler generate +faster code, by assuming that all variables have finite values (not NaN or Inf). +If the model has expressions that use variables that can contains NaN or Inf +values, the `fastmath` flag can be disabled by setting + +```yaml +compute_settings: + fastmath: false +``` + ### Multiprocessing Performance Sharrow leverages a number of performance enhancing techniques, including diff --git a/pyproject.toml b/pyproject.toml index 07ec493eca..acd8e38e77 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -11,7 +11,7 @@ name = "activitysim" dynamic = ["version"] dependencies = [ "cytoolz >= 0.8.1", - "numba >= 0.55.2", + "numba >= 0.57", "numpy >= 1.16.1, <2", "openmatrix >= 0.3.4.1", "orca >= 1.6", @@ -24,7 +24,7 @@ dependencies = [ "pyyaml >= 5.1", "requests >= 2.7", "scikit-learn >= 1.1", - "sharrow >= 2.6", + "sharrow >= 2.9.1", "simwrapper > 1.7", "sparse", "tables >= 3.5.1", diff --git a/test/joint_tours/configs/joint_tour_frequency_composition.csv b/test/joint_tours/configs/joint_tour_frequency_composition.csv index 2cace7b25e..aa63d0be26 100644 --- a/test/joint_tours/configs/joint_tour_frequency_composition.csv +++ b/test/joint_tours/configs/joint_tour_frequency_composition.csv @@ -47,16 +47,16 @@ Label,Description,Expression,Coefficient ,Shopping HOV accessibility for 2 Tours,((autosnum_workers)*shop_hov_oversufficient_accessibility)*(num_joint_tours==2)*shopping,coef_shopping_hov_accessibility_for_2_tours ,Maintenance HOV Accessibility,((autosnum_workers)*maint_hov_oversufficient_accessibility)*othmaint,coef_maintenance_hov_accessibility ,Discretionary HOV Accessibility,((autosnum_workers)*discr_hov_oversufficient_accessibility)*othdiscr,coef_discretionary_hov_accessibility -,Constant for Children Party/ Shopping Tour,@(df.purpose1==5)*(df.party1==2)+(df.purpose2==5)*(df.party2==2),coef_constant_for_children_party_shopping_tour -,Constant for Children Party/ Maintenance Tour,@(df.purpose1==6)*(df.party1==2)+(df.purpose2==6)*(df.party2==2),coef_constant_for_children_party_maintenance_tour -,Constant for Children Party/ Eating Out Tour,@(df.purpose1==7)*(df.party1==2)+(df.purpose2==7)*(df.party2==2),coef_constant_for_children_party_eating_out_tour -,Constant for Children Party/ Visiting Tour,@(df.purpose1==8)*(df.party1==2)+(df.purpose2==8)*(df.party2==2),coef_constant_for_children_party_visiting_tour -,Constant for Children Party/ Discretionary Tour,@(df.purpose1==9)*(df.party1==2)+(df.purpose2==9)*(df.party2==2),coef_constant_for_children_party_discretionary_tour -,Constant for Mixed Party/ Shopping Tour,@(df.purpose1==5)*(df.party1==2)+(df.purpose2==5)*(df.party2==2),coef_constant_for_mixed_party_shopping_tour -,Constant for Mixed Party/ Maintenance Tour,@(df.purpose1==6)*(df.party1==3)+(df.purpose2==6)*(df.party2==3),coef_constant_for_mixed_party_maintenance_tour -,Constant for Mixed Party/ Eating Out Tour,@(df.purpose1==7)*(df.party1==3)+(df.purpose2==7)*(df.party2==3),coef_constant_for_mixed_party_eating_out_tour -,Constant for Mixed Party/ Visiting Tour,@(df.purpose1==8)*(df.party1==3)+(df.purpose2==8)*(df.party2==3),coef_constant_for_mixed_party_visiting_tour -,Constant for Mixed Party/ Discretionary Tour,@(df.purpose1==9)*(df.party1==3)+(df.purpose2==9)*(df.party2==3),coef_constant_for_mixed_party_discretionary_tour +,Constant for Children Party/ Shopping Tour,@((df.purpose1==5)*(df.party1==2)).astype(int)+@((df.purpose2==5)*(df.party2==2)).astype(int),coef_constant_for_children_party_shopping_tour +,Constant for Children Party/ Maintenance Tour,@((df.purpose1==6)*(df.party1==2)).astype(int)+@((df.purpose2==6)*(df.party2==2)).astype(int),coef_constant_for_children_party_maintenance_tour +,Constant for Children Party/ Eating Out Tour,@((df.purpose1==7)*(df.party1==2)).astype(int)+@((df.purpose2==7)*(df.party2==2)).astype(int),coef_constant_for_children_party_eating_out_tour +,Constant for Children Party/ Visiting Tour,@((df.purpose1==8)*(df.party1==2)).astype(int)+@((df.purpose2==8)*(df.party2==2)).astype(int),coef_constant_for_children_party_visiting_tour +,Constant for Children Party/ Discretionary Tour,@((df.purpose1==9)*(df.party1==2)).astype(int)+@((df.purpose2==9)*(df.party2==2)).astype(int),coef_constant_for_children_party_discretionary_tour +,Constant for Mixed Party/ Shopping Tour,@((df.purpose1==5)*(df.party1==3)).astype(int)+@((df.purpose2==5)*(df.party2==3)).astype(int),coef_constant_for_mixed_party_shopping_tour +,Constant for Mixed Party/ Maintenance Tour,@((df.purpose1==6)*(df.party1==3)).astype(int)+@((df.purpose2==6)*(df.party2==3)).astype(int),coef_constant_for_mixed_party_maintenance_tour +,Constant for Mixed Party/ Eating Out Tour,@((df.purpose1==7)*(df.party1==3)).astype(int)+@((df.purpose2==7)*(df.party2==3)).astype(int),coef_constant_for_mixed_party_eating_out_tour +,Constant for Mixed Party/ Visiting Tour,@((df.purpose1==8)*(df.party1==3)).astype(int)+@((df.purpose2==8)*(df.party2==3)).astype(int),coef_constant_for_mixed_party_visiting_tour +,Constant for Mixed Party/ Discretionary Tour,@((df.purpose1==9)*(df.party1==3)).astype(int)+@((df.purpose2==9)*(df.party2==3)).astype(int),coef_constant_for_mixed_party_discretionary_tour ,Number of Active Full time workers /Adult Party,num_travel_active_full_time_workers * (party1==1) + num_travel_active_full_time_workers * (party2==1),coef_number_of_active_full_time_workers_adult_party ,Number of Active Part time workers /Adult Party,num_travel_active_part_time_workers * (party1==1) + num_travel_active_part_time_workers * (party2==1),coef_number_of_active_part_time_workers_adult_party ,Number of Active University Students /Adult Party,num_travel_active_university_students * (party1==1) + num_travel_active_university_students * (party2==1),coef_number_of_active_university_students_adult_party @@ -78,16 +78,16 @@ Label,Description,Expression,Coefficient ,Not more than 1 travel active adult in HH,@(df.num_travel_active_adults < 2)*(((df.party1==1)+(df.party2==1))>0),coef_unavailable ,Not more than 1 travel active child in HH,@(df.num_travel_active_children < 2)*(((df.party1==2)+(df.party2==2))>0),coef_unavailable ,No travel-active pair adult-child in HH ,@((df.num_travel_active_adults*df.num_travel_active_children) ==0)*(((df.party1==3)+(df.party2==3))>0),coef_unavailable -,Adjustment for Children Party/ Shopping Tour,@(df.purpose1==5)*(df.party1==2)+(df.purpose2==5)*(df.party2==2),coef_adjustment_for_children_party_shopping_tour -,Adjustment for Children Party/ Maintenance Tour,@(df.purpose1==6)*(df.party1==2)+(df.purpose2==6)*(df.party2==2),coef_adjustment_for_children_party_maintenance_tour -,Adjustment for Children Party/ Eating Out Tour,@(df.purpose1==7)*(df.party1==2)+(df.purpose2==7)*(df.party2==2),coef_adjustment_for_children_party_eating_out_tour -,Adjustment for Children Party/ Visiting Tour,@(df.purpose1==8)*(df.party1==2)+(df.purpose2==8)*(df.party2==2),coef_adjustment_for_children_party_visiting_tour -,Adjustment for Children Party/ Discretionary Tour,@(df.purpose1==9)*(df.party1==2)+(df.purpose2==9)*(df.party2==2),coef_adjustment_for_children_party_discretionary_tour -,Adjustment for Mixed Party/ Shopping Tour,@(df.purpose1==5)*(df.party1==2)+(df.purpose2==5)*(df.party2==2),coef_adjustment_for_mixed_party_shopping_tour -,Adjustment for Mixed Party/ Maintenance Tour,@(df.purpose1==6)*(df.party1==3)+(df.purpose2==6)*(df.party2==3),coef_adjustment_for_mixed_party_maintenance_tour -,Adjustment for Mixed Party/ Eating Out Tour,@(df.purpose1==7)*(df.party1==3)+(df.purpose2==7)*(df.party2==3),coef_adjustment_for_mixed_party_eating_out_tour -,Adjustment for Mixed Party/ Visiting Tour,@(df.purpose1==8)*(df.party1==3)+(df.purpose2==8)*(df.party2==3),coef_adjustment_for_mixed_party_visiting_tour -,Adjustment for Mixed Party/ Discretionary Tour,@(df.purpose1==9)*(df.party1==3)+(df.purpose2==9)*(df.party2==3),coef_adjustment_for_mixed_party_discretionary_tour +,Adjustment for Children Party/ Shopping Tour,@((df.purpose1==5)*(df.party1==2)).astype(int)+@((df.purpose2==5)*(df.party2==2)).astype(int),coef_adjustment_for_children_party_shopping_tour +,Adjustment for Children Party/ Maintenance Tour,@((df.purpose1==6)*(df.party1==2)).astype(int)+@((df.purpose2==6)*(df.party2==2)).astype(int),coef_adjustment_for_children_party_maintenance_tour +,Adjustment for Children Party/ Eating Out Tour,@((df.purpose1==7)*(df.party1==2)).astype(int)+@((df.purpose2==7)*(df.party2==2)).astype(int),coef_adjustment_for_children_party_eating_out_tour +,Adjustment for Children Party/ Visiting Tour,@((df.purpose1==8)*(df.party1==2)).astype(int)+@((df.purpose2==8)*(df.party2==2)).astype(int),coef_adjustment_for_children_party_visiting_tour +,Adjustment for Children Party/ Discretionary Tour,@((df.purpose1==9)*(df.party1==2)).astype(int)+@((df.purpose2==9)*(df.party2==2)).astype(int),coef_adjustment_for_children_party_discretionary_tour +,Adjustment for Mixed Party/ Shopping Tour,@((df.purpose1==5)*(df.party1==3)).astype(int)+@((df.purpose2==5)*(df.party2==3)).astype(int),coef_adjustment_for_mixed_party_shopping_tour +,Adjustment for Mixed Party/ Maintenance Tour,@((df.purpose1==6)*(df.party1==3)).astype(int)+@((df.purpose2==6)*(df.party2==3)).astype(int),coef_adjustment_for_mixed_party_maintenance_tour +,Adjustment for Mixed Party/ Eating Out Tour,@((df.purpose1==7)*(df.party1==3)).astype(int)+@((df.purpose2==7)*(df.party2==3)).astype(int),coef_adjustment_for_mixed_party_eating_out_tour +,Adjustment for Mixed Party/ Visiting Tour,@((df.purpose1==8)*(df.party1==3)).astype(int)+@((df.purpose2==8)*(df.party2==3)).astype(int),coef_adjustment_for_mixed_party_visiting_tour +,Adjustment for Mixed Party/ Discretionary Tour,@((df.purpose1==9)*(df.party1==3)).astype(int)+@((df.purpose2==9)*(df.party2==3)).astype(int),coef_adjustment_for_mixed_party_discretionary_tour ,Adjustment for shopping tour,shopping,coef_adjustment_for_shopping_tour ,Adjustment for Maintenance tour,othmaint,coef_adjustment_for_maintenance_tour ,Adjustment for eating out tour,eatout,coef_adjustment_for_eating_out_tour