From 0d69abbd0a9496a8306c6fa703c44386639947f8 Mon Sep 17 00:00:00 2001 From: Nick Fournier Date: Wed, 26 Apr 2023 14:43:41 -0700 Subject: [PATCH 1/3] added stricter joining of annotated fields --- activitysim/core/util.py | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/activitysim/core/util.py b/activitysim/core/util.py index 8a0f29b117..4148d220e1 100644 --- a/activitysim/core/util.py +++ b/activitysim/core/util.py @@ -373,8 +373,22 @@ def assign_in_place(df, df2): df[new_columns] = df2[new_columns] -def df_from_dict(values, index=None): +def reindex_if_series(values, index): + if index is not None: + return values + + if isinstance(values, pd.Series): + assert len(set(values.index).intersection(index)) == len(index) + + if all(values.index != index): + return values.reindex(index=index) + +def df_from_dict(values, index=None): + + # If value object is a series and has out of order index, reindex it + values = {k: reindex_if_series(v, index) for k, v in values.items()} + df = pd.DataFrame.from_dict(values) if index is not None: df.index = index From 6b3a90c4e6e89ea2cd44a5c074b9b32692d5c6a7 Mon Sep 17 00:00:00 2001 From: Nick Fournier Date: Wed, 26 Apr 2023 15:17:00 -0700 Subject: [PATCH 2/3] lint blacked --- activitysim/core/util.py | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/activitysim/core/util.py b/activitysim/core/util.py index 4148d220e1..b7d611ec26 100644 --- a/activitysim/core/util.py +++ b/activitysim/core/util.py @@ -26,7 +26,6 @@ def si_units(x, kind="B", digits=3, shift=1000): - # nano micro milli kilo mega giga tera peta exa zeta yotta tiers = ["n", "ยต", "m", "", "K", "M", "G", "T", "P", "E", "Z", "Y"] @@ -342,7 +341,6 @@ def assign_in_place(df, df2): # this is a hack fix for a bug in pandas.update # github.com/pydata/pandas/issues/4094 for c, old_dtype in zip(common_columns, old_dtypes): - # if both df and df2 column were same type, but result is not if (old_dtype == df2[c].dtype) and (df[c].dtype != old_dtype): try: @@ -376,19 +374,18 @@ def assign_in_place(df, df2): def reindex_if_series(values, index): if index is not None: return values - + if isinstance(values, pd.Series): assert len(set(values.index).intersection(index)) == len(index) - + if all(values.index != index): - return values.reindex(index=index) + return values.reindex(index=index) def df_from_dict(values, index=None): - # If value object is a series and has out of order index, reindex it - values = {k: reindex_if_series(v, index) for k, v in values.items()} - + values = {k: reindex_if_series(v, index) for k, v in values.items()} + df = pd.DataFrame.from_dict(values) if index is not None: df.index = index From a3f60a4531be7f47bbbe469a5cd2666f978006ad Mon Sep 17 00:00:00 2001 From: David Hensle Date: Thu, 8 Feb 2024 11:33:51 -0800 Subject: [PATCH 3/3] added unit test for df_from_dict indexing --- activitysim/core/test/test_util.py | 29 ++++++++++++++++++++++++++++- 1 file changed, 28 insertions(+), 1 deletion(-) diff --git a/activitysim/core/test/test_util.py b/activitysim/core/test/test_util.py index ae9b4fa83b..415ec1f9ee 100644 --- a/activitysim/core/test/test_util.py +++ b/activitysim/core/test/test_util.py @@ -7,7 +7,7 @@ import pandas.testing as pdt import pytest -from ..util import other_than, quick_loc_df, quick_loc_series, reindex +from ..util import other_than, quick_loc_df, quick_loc_series, reindex, df_from_dict @pytest.fixture(scope="module") @@ -62,3 +62,30 @@ def test_quick_loc_series(): assert list(quick_loc_series(loc_list, series)) == attrib_list assert list(quick_loc_series(loc_list, series)) == list(series.loc[loc_list]) + + +def test_df_from_dict(): + + index = [1, 2, 3, 4, 5] + df = pd.DataFrame({"attrib": [1, 2, 2, 3, 1]}, index=index) + + # scramble index order for one expression and not the other + sorted = df.eval("attrib.sort_values()") + not_sorted = df.eval("attrib * 1") + + # check above expressions + pdt.assert_series_equal( + sorted, pd.Series([1, 1, 2, 2, 3], index=[1, 5, 2, 3, 4]), check_names=False + ) + pdt.assert_series_equal(not_sorted, df.attrib, check_names=False) + + # create a new dataframe from the above expressions + values = {"sorted": sorted, "not_sorted": not_sorted} + new_df = df_from_dict(values, index) + + # index should become unscrambed and back to the same order as before + expected_df = pd.DataFrame( + {"sorted": [1, 2, 2, 3, 1], "not_sorted": [1, 2, 2, 3, 1]}, index=index + ) + + pdt.assert_frame_equal(new_df, expected_df)