Skip to content

Commit 528c2ce

Browse files
committed
use numpy indexing in cdap.make_interactions
Once again we get significant speedups by working with NumPy arrays instead of Pandas structures.
1 parent 43a13bc commit 528c2ce

File tree

1 file changed

+27
-9
lines changed

1 file changed

+27
-9
lines changed

activitysim/cdap/cdap.py

Lines changed: 27 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -48,24 +48,42 @@ def make_interactions(people, hh_id_col, p_type_col):
4848
three_fmt = '{}{}{}'.format
4949
two = []
5050
three = []
51+
two_perm_cache = {}
52+
three_combo_cache = {}
53+
54+
for hh_id, df in people.groupby(hh_id_col, sort=False):
55+
hh_size = len(df)
5156

52-
for hh, df in people.groupby(hh_id_col, sort=False):
5357
# skip households with only one person
54-
if len(df) == 1:
58+
if hh_size == 1:
5559
continue
5660

57-
ptypes = df[p_type_col]
61+
ptypes = df[p_type_col].values
62+
hh_idx = df.index.values
5863

59-
for pA, pB in itertools.permutations(df.index, 2):
60-
two.append((pA, two_fmt(*ptypes[[pA, pB]])))
64+
if hh_size in two_perm_cache:
65+
two_perms = two_perm_cache[hh_size]
66+
else:
67+
two_perms = list(itertools.permutations(np.arange(hh_size), 2))
68+
two_perm_cache[hh_size] = two_perms
69+
70+
two.extend(
71+
(hh_idx[pA], two_fmt(*ptypes[[pA, pB]])) for pA, pB in two_perms)
6172

6273
# now skip households with two people
63-
if len(df) == 2:
74+
if hh_size == 2:
6475
continue
6576

66-
for idx in itertools.combinations(df.index, 3):
67-
combo = three_fmt(*ptypes[list(idx)])
68-
three.extend((p, combo) for p in idx)
77+
if hh_size in three_combo_cache:
78+
three_combos = three_combo_cache[hh_size]
79+
else:
80+
three_combos = list(itertools.combinations(np.arange(hh_size), 3))
81+
three_combo_cache[hh_size] = three_combos
82+
83+
three.extend(
84+
(hh_idx[p], three_fmt(*ptypes.take(idx)))
85+
for idx in three_combos
86+
for p in idx)
6987

7088
if two:
7189
two_idx, two_val = zip(*two)

0 commit comments

Comments
 (0)