Skip to content

Commit 052bdbc

Browse files
committed
preallocate output dfs list and concat outside loop for speed
1 parent 2e9a978 commit 052bdbc

File tree

1 file changed

+10
-4
lines changed
  • google_symptoms/delphi_google_symptoms

1 file changed

+10
-4
lines changed

google_symptoms/delphi_google_symptoms/geo.py

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -79,8 +79,11 @@ def geo_map(df, geo_res, namescols = None):
7979
return df
8080

8181
map_df = generate_transition_matrix(geo_res)
82-
converted_df = pd.DataFrame(columns = df.columns)
83-
for _date in df["timestamp"].unique():
82+
83+
dates_list = df["timestamp"].unique()
84+
dfs_list = [pd.DataFrame()] * len(dates_list)
85+
86+
for i, _date in enumerate(dates_list):
8487
val_lists = df[df["timestamp"] == _date].merge(
8588
map_df["geo_id"], how="right"
8689
)[namescols].fillna(0)
@@ -92,5 +95,8 @@ def geo_map(df, geo_res, namescols = None):
9295
newdf["geo_id"] = list(map_df.keys())[1:]
9396
mask = (newdf == 0)
9497
newdf[mask] = np.nan
95-
converted_df = pd.concat([converted_df, newdf])
96-
return converted_df
98+
dfs_list[i] = newdf
99+
100+
# Reindex to make sure output has same columns as input df. Filled with
101+
# NaN values if column doesn't already exist.
102+
return pd.concat(dfs_list).reindex(df.columns, axis=1)

0 commit comments

Comments
 (0)