diff --git a/activitysim/core/test/test_util.py b/activitysim/core/test/test_util.py index ae9b4fa83b..415ec1f9ee 100644 --- a/activitysim/core/test/test_util.py +++ b/activitysim/core/test/test_util.py @@ -7,7 +7,7 @@ import pandas.testing as pdt import pytest -from ..util import other_than, quick_loc_df, quick_loc_series, reindex +from ..util import other_than, quick_loc_df, quick_loc_series, reindex, df_from_dict @pytest.fixture(scope="module") @@ -62,3 +62,30 @@ def test_quick_loc_series(): assert list(quick_loc_series(loc_list, series)) == attrib_list assert list(quick_loc_series(loc_list, series)) == list(series.loc[loc_list]) + + +def test_df_from_dict(): + + index = [1, 2, 3, 4, 5] + df = pd.DataFrame({"attrib": [1, 2, 2, 3, 1]}, index=index) + + # scramble index order for one expression and not the other + sorted = df.eval("attrib.sort_values()") + not_sorted = df.eval("attrib * 1") + + # check above expressions + pdt.assert_series_equal( + sorted, pd.Series([1, 1, 2, 2, 3], index=[1, 5, 2, 3, 4]), check_names=False + ) + pdt.assert_series_equal(not_sorted, df.attrib, check_names=False) + + # create a new dataframe from the above expressions + values = {"sorted": sorted, "not_sorted": not_sorted} + new_df = df_from_dict(values, index) + + # index should become unscrambed and back to the same order as before + expected_df = pd.DataFrame( + {"sorted": [1, 2, 2, 3, 1], "not_sorted": [1, 2, 2, 3, 1]}, index=index + ) + + pdt.assert_frame_equal(new_df, expected_df) diff --git a/activitysim/core/util.py b/activitysim/core/util.py index 8a0f29b117..b7d611ec26 100644 --- a/activitysim/core/util.py +++ b/activitysim/core/util.py @@ -26,7 +26,6 @@ def si_units(x, kind="B", digits=3, shift=1000): - # nano micro milli kilo mega giga tera peta exa zeta yotta tiers = ["n", "ยต", "m", "", "K", "M", "G", "T", "P", "E", "Z", "Y"] @@ -342,7 +341,6 @@ def assign_in_place(df, df2): # this is a hack fix for a bug in pandas.update # github.com/pydata/pandas/issues/4094 for c, old_dtype in zip(common_columns, old_dtypes): - # if both df and df2 column were same type, but result is not if (old_dtype == df2[c].dtype) and (df[c].dtype != old_dtype): try: @@ -373,7 +371,20 @@ def assign_in_place(df, df2): df[new_columns] = df2[new_columns] +def reindex_if_series(values, index): + if index is not None: + return values + + if isinstance(values, pd.Series): + assert len(set(values.index).intersection(index)) == len(index) + + if all(values.index != index): + return values.reindex(index=index) + + def df_from_dict(values, index=None): + # If value object is a series and has out of order index, reindex it + values = {k: reindex_if_series(v, index) for k, v in values.items()} df = pd.DataFrame.from_dict(values) if index is not None: