diff --git a/matching/LICENSE b/matching/LICENSE deleted file mode 100644 index ef03b1c..0000000 --- a/matching/LICENSE +++ /dev/null @@ -1,21 +0,0 @@ -MIT License - -Copyright (c) 2024 Jing Wen - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. diff --git a/matching/README.md b/matching/README.md index 6938b7a..d3c024c 100644 --- a/matching/README.md +++ b/matching/README.md @@ -28,31 +28,31 @@ mentor_df = pd.DataFrame({ # 2. Define similarity functions def similarity_mentee_mentor_group(mentors: list, mentee: pd.Series): '''You can define any similarity function you want, as long as you return a number (you might be - able to return other comparable objects but I haven't tested it). Smaller is more similar. + able to return other comparable objects but I haven't tested it). Larger is more similar. Args: mentors: list of pd.Series, each representing a mentor mentee: pd.Series, representing a single mentee ''' acc = 0 - acc += sum( + acc -= sum( # Penalize any differences (alternatively, you can and should use Word Error Rate for more sensible comparison) np.abs(mentor['feat1'] - mentee['feat1']) for mentor in mentors ) - acc += sum( + acc -= sum( # Penalize any differences (alternatively, you can and should use Word Error Rate for more sensible comparison) np.abs(mentor['feat2'] - mentee['feat2']) for mentor in mentors ) return acc def similarity_mentor_mentor(mentor1: pd.Series, mentor2: pd.Series): - '''Again, you can define any similarity function you want, as long as you return a number. Smaller is more similar. + '''Again, you can define any similarity function you want, as long as you return a number. Larger is more similar. Args: mentor1: pd.Series, representing a single mentor mentor2: pd.Series, representing a single mentor ''' - return np.abs(mentor1['feat1'] - mentor2['feat1']) * np.abs(mentor1['feat2'] - mentor2['feat2'])**0.15 + return -np.abs(mentor1['feat1'] - mentor2['feat1']) * np.abs(mentor1['feat2'] - mentor2['feat2'])**0.15 # 3. Run the matching assignments_by_mentor, assignments_by_mentee = manytomany.match( diff --git a/matching/example.py b/matching/example.py index d2e83ed..5f53293 100644 --- a/matching/example.py +++ b/matching/example.py @@ -19,18 +19,18 @@ def similarity_mentee_mentor_group(mentors: list, mentee: pd.Series): '''You can define any similarity function you want, as long as you return a number (you might be able to return other comparable objects but I haven't tested it). - Smaller is more similar. + Larger is more similar. Args: mentors: list of pd.Series, each representing a mentor mentee: pd.Series, representing a single mentee ''' acc = 0 - acc += sum( + acc -= sum( # Penalize every difference np.abs(mentor['feat1'] - mentee['feat1']) for mentor in mentors ) - acc += sum( + acc -= sum( # Penalize every difference np.abs(mentor['feat2'] - mentee['feat2']) for mentor in mentors ) @@ -39,13 +39,13 @@ def similarity_mentee_mentor_group(mentors: list, mentee: pd.Series): def similarity_mentor_mentor(mentor1: pd.Series, mentor2: pd.Series): '''Again, you can define any similarity function you want, as long as you return a number. - Smaller is more similar. + Larger is more similar. Args: mentor1: pd.Series, representing a single mentor mentor2: pd.Series, representing a single mentor ''' - return np.abs(mentor1['feat1'] - mentor2['feat1']) * np.abs(mentor1['feat2'] - mentor2['feat2'])**0.15 + return -np.abs(mentor1['feat1'] - mentor2['feat1']) * np.abs(mentor1['feat2'] - mentor2['feat2'])**0.15 # 3. Run the matching assignments_by_mentor, assignments_by_mentee = manytomany.match( diff --git a/matching/manytomany/__init__.py b/matching/manytomany/__init__.py index dfc444d..139deb8 100644 --- a/matching/manytomany/__init__.py +++ b/matching/manytomany/__init__.py @@ -11,7 +11,7 @@ def group_mentors(mentors: pd.DataFrame, Args: mentors: pd.DataFrame, representing the mentors mentors_per_mentee: int, the number of mentors per mentee - similarity_func: callable, a function that takes two pd.Series and returns a number. Smaller is more similar. + similarity_func: callable, a function that takes two pd.Series and returns a number. Larger is more similar. Returns: dict, mapping mentor group IDs to lists of mentor IDs @@ -23,7 +23,7 @@ def group_mentors(mentors: pd.DataFrame, if mentor_id1 == mentor_id2: similarity_matrix.loc[mentor_id1, mentor_id2] = np.inf continue - similarity_matrix.loc[mentor_id1, mentor_id2] = similarity_func(mentors.loc[mentor_id1], mentors.loc[mentor_id2]) + similarity_matrix.loc[mentor_id1, mentor_id2] = -similarity_func(mentors.loc[mentor_id1], mentors.loc[mentor_id2]) # Cluster mentors n_clusters = len(mentors.index) // mentors_per_mentee @@ -46,7 +46,7 @@ def match_mentees_to_mentor_groups(mentors: pd.DataFrame, mentees: pd.DataFrame, representing the mentees mentor_groups: dict, mapping mentor group IDs to lists of mentor IDs mentees_per_mentor: int, the number of mentees per mentor - similarity_func: callable, a function that takes two pd.Series and returns a number. Smaller is more similar. + similarity_func: callable, a function that takes two pd.Series and returns a number. Larger is more similar. Returns: pd.DataFrame, representing the assignments from mentor POV. @@ -56,7 +56,7 @@ def match_mentees_to_mentor_groups(mentors: pd.DataFrame, similarity_matrix = pd.DataFrame(index=mentor_groups.keys(), columns=mentees.index) for mentor_group_id, mentor_group in mentor_groups.items(): for mentee_id, mentee in mentees.iterrows(): - similarity_matrix.loc[mentor_group_id, mentee_id] = similarity_func([mentors.iloc[mentor_id] for mentor_id in mentor_group], mentee) + similarity_matrix.loc[mentor_group_id, mentee_id] = -similarity_func([mentors.iloc[mentor_id] for mentor_id in mentor_group], mentee) assignments = pd.DataFrame(index=mentor_groups.keys(), columns=[f'assigned_{i}' for i in range(mentees_per_mentor)])