Skip to content

Commit 95fc814

Browse files
authored
Group matcher algorithm (#385)
1 parent 7d7f5e7 commit 95fc814

13 files changed

Lines changed: 356 additions & 1 deletion

File tree

.gitmodules

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,6 @@
11
[submodule "simulation_cache"]
22
path = simulation_cache
33
url = https://github.com/Teamable-Analytics/algorithms-simulation-cache.git
4+
[submodule "api/ai/external_algorithms/group_matcher_algorithm/group-matcher"]
5+
path = api/ai/external_algorithms/group_matcher_algorithm/group-matcher
6+
url = git@github.com:ketphan02/group-matcher.git

api/ai/algorithm_runner.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,16 @@
11
from typing import List, TYPE_CHECKING
22

3+
from api.ai.external_algorithms.group_matcher_algorithm.group_matcher_algorithm import (
4+
GroupMatcherAlgorithm,
5+
)
36
from api.ai.geg_algorithm.geg_algorithm import GeneralizedEnvyGraphAlgorithm
47
from api.ai.interfaces.algorithm_config import (
58
AlgorithmConfig,
69
RandomAlgorithmConfig,
710
WeightAlgorithmConfig,
811
SocialAlgorithmConfig,
912
PriorityAlgorithmConfig,
13+
GroupMatcherAlgorithmConfig,
1014
)
1115
from api.ai.interfaces.algorithm_options import (
1216
RandomAlgorithmOptions,
@@ -16,6 +20,7 @@
1620
MultipleRoundRobinAlgorithmOptions,
1721
GeneralizedEnvyGraphAlgorithmOptions,
1822
DoubleRoundRobinAlgorithmOptions,
23+
GroupMatcherAlgorithmOptions,
1924
)
2025
from api.ai.interfaces.team_generation_options import TeamGenerationOptions
2126
from api.ai.multiple_round_robin_with_adjusted_winner_algorithm.mrr_algorithm import (
@@ -78,6 +83,8 @@ def get_algorithm_from_type(algorithm_type: AlgorithmType):
7883
return GeneralizedEnvyGraphAlgorithm
7984
if algorithm_type == AlgorithmType.DRR:
8085
return DoubleRoundRobinAlgorithm
86+
if algorithm_type == AlgorithmType.GROUP_MATCHER:
87+
return GroupMatcherAlgorithm
8188

8289
raise NotImplementedError(
8390
f"Algorithm type {algorithm_type} is not associated with an algorithm class!"
@@ -99,6 +106,8 @@ def get_algorithm_option_class(algorithm_type: AlgorithmType):
99106
return GeneralizedEnvyGraphAlgorithmOptions
100107
if algorithm_type == AlgorithmType.DRR:
101108
return DoubleRoundRobinAlgorithmOptions
109+
if algorithm_type == AlgorithmType.GROUP_MATCHER:
110+
return GroupMatcherAlgorithmOptions
102111

103112
raise NotImplementedError(
104113
f"Algorithm type {algorithm_type} is not associated with an algorithm options class!"
@@ -120,6 +129,8 @@ def get_algorithm_config_class(algorithm_type: AlgorithmType):
120129
return None
121130
if algorithm_type == AlgorithmType.DRR:
122131
return None
132+
if algorithm_type == AlgorithmType.GROUP_MATCHER:
133+
return GroupMatcherAlgorithmConfig
123134

124135
raise NotImplementedError(
125136
f"Algorithm type {algorithm_type} is not associated with an algorithm config class!"

api/ai/external_algorithms/__init__.py

Whitespace-only changes.

api/ai/external_algorithms/group_matcher_algorithm/__init__.py

Whitespace-only changes.
Lines changed: 90 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,90 @@
1+
from dataclasses import dataclass
2+
from typing import Dict, Iterator
3+
4+
import faker
5+
from pandas import DataFrame
6+
7+
from api.ai.external_algorithms.group_matcher_algorithm.utils import (
8+
fromYearLevelToAlYearLevel,
9+
fromNumbersToTimeSlots,
10+
fromGenderToAlGender,
11+
fromRaceToAlRace,
12+
)
13+
from api.dataclasses.enums import ScenarioAttribute, Gender, Race
14+
from api.dataclasses.student import Student
15+
from api.dataclasses.team import Team
16+
from api.dataclasses.team_set import TeamSet
17+
18+
19+
@dataclass
20+
class GroupMatcherStudent(Student):
21+
def __init__(self, student: Student):
22+
super().__init__(
23+
student.id,
24+
student.name,
25+
student.attributes,
26+
student.relationships,
27+
student.project_preferences,
28+
student.team,
29+
)
30+
self.email = faker.Faker().email()
31+
if not self.name:
32+
self.name = faker.Faker().name()
33+
34+
def get_formatted_data(self):
35+
return {
36+
"Email Address": self.email,
37+
"SID": self.id,
38+
"First name": self.name.split()[0],
39+
"Last name": self.name.split()[1],
40+
"What year are you": fromYearLevelToAlYearLevel(
41+
self.attributes[ScenarioAttribute.YEAR_LEVEL.value][0]
42+
).value,
43+
"Would you like to be part of a course study group?": "Yes",
44+
"Do you have an existing study group of size 2-6 in mind": "No",
45+
"timezone offset": "-7", # all the same timezone
46+
"Would you like to attend the same discussion": "Yes",
47+
"discussion section times": fromNumbersToTimeSlots(
48+
self.attributes.get(
49+
ScenarioAttribute.TIMESLOT_AVAILABILITY.value, ["1"]
50+
)
51+
),
52+
"Will you be on the Berkeley campus": "Yes", # No remote students
53+
"Which of these options best describes your race?": fromRaceToAlRace(
54+
Race(self.attributes.get(ScenarioAttribute.RACE.value, [Race.Other])[0])
55+
).value,
56+
"How do you self-identify?": fromGenderToAlGender(
57+
Gender(self.attributes[ScenarioAttribute.GENDER.value][0])
58+
).value,
59+
}
60+
61+
@staticmethod
62+
def transform_output_data_to_team_set(
63+
output_data: DataFrame,
64+
team_trace: Dict[int, Team],
65+
student_trace: Dict[int, Student],
66+
team_cycler: Iterator[Team],
67+
) -> TeamSet:
68+
for _, row in output_data.iterrows():
69+
student_id = row["sid"]
70+
group_num = int(row["group_num"]) + 1
71+
if group_num not in team_trace.keys():
72+
new_team_attributes = next(team_cycler)
73+
new_team = Team(
74+
_id=len(team_trace) + 1,
75+
name=f"Team {len(team_trace) + 1}",
76+
requirements=new_team_attributes.requirements,
77+
project_id=new_team_attributes.project_id,
78+
students=[],
79+
)
80+
team_trace[int(row["group_num"]) + 1] = new_team
81+
82+
student = student_trace[student_id]
83+
team = team_trace[int(row["group_num"]) + 1]
84+
85+
student.add_team(team)
86+
team.add_student(student)
87+
88+
return TeamSet(
89+
teams=[team for team in team_trace.values() if len(team.students) > 0]
90+
)
Submodule group-matcher added at dc00ecc
Lines changed: 96 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,96 @@
1+
import csv
2+
from itertools import cycle
3+
import os
4+
import time
5+
from pathlib import Path
6+
from typing import List, Dict, Iterator
7+
8+
import pandas as pd
9+
10+
from api.ai.external_algorithms.group_matcher_algorithm.custom_dataclasses import (
11+
GroupMatcherStudent,
12+
)
13+
from api.ai.interfaces.algorithm import Algorithm
14+
from api.ai.interfaces.algorithm_config import GroupMatcherAlgorithmConfig
15+
from api.ai.interfaces.algorithm_options import GroupMatcherAlgorithmOptions
16+
from api.ai.interfaces.team_generation_options import TeamGenerationOptions
17+
from api.dataclasses.student import Student
18+
from api.dataclasses.team import Team
19+
from api.dataclasses.team_set import TeamSet
20+
21+
22+
class GroupMatcherAlgorithm(Algorithm):
23+
"""
24+
From paper: https://sigcse2023.sigcse.org/details/sigcse-ts-2023-papers/163/Inclusive-study-group-formation-at-scale
25+
"""
26+
27+
student_trace: Dict[int, Student]
28+
team_trace: Dict[int, Team]
29+
team_cycler: Iterator[Team]
30+
31+
group_matcher_input_data_file_path: Path
32+
group_matcher_output_data_file_path: Path
33+
group_matcher_config_path: Path
34+
35+
def __init__(
36+
self,
37+
algorithm_options: GroupMatcherAlgorithmOptions,
38+
team_generation_options: TeamGenerationOptions,
39+
algorithm_config: GroupMatcherAlgorithmConfig,
40+
):
41+
super().__init__(algorithm_options, team_generation_options, algorithm_config)
42+
self.group_matcher_input_data_file_path = Path(algorithm_config.csv_input_path)
43+
self.group_matcher_run_path = algorithm_config.group_matcher_run_path
44+
45+
self.prepare_file_environment()
46+
47+
self.team_trace = {
48+
team_idx + 1: team for team_idx, team in enumerate(self.teams)
49+
}
50+
self.team_cycler = cycle(self.teams)
51+
52+
def prepare_file_environment(self):
53+
class_size = int(self.group_matcher_input_data_file_path.stem.split("-")[0])
54+
self.group_matcher_run_path = self.group_matcher_run_path
55+
self.group_matcher_output_data_file_path = (
56+
Path.cwd() / f"out-private-{class_size}.csv"
57+
)
58+
if self.group_matcher_output_data_file_path.exists():
59+
self.group_matcher_output_data_file_path.unlink()
60+
self.group_matcher_config_path = (
61+
Path(self.group_matcher_run_path).parent / "example_config.py"
62+
)
63+
if not self.group_matcher_input_data_file_path.parent.exists():
64+
self.group_matcher_input_data_file_path.parent.mkdir(parents=True)
65+
66+
def export_students_data_to_group_matcher_format_csv(
67+
self, students: List[Student]
68+
) -> None:
69+
student_data = [
70+
GroupMatcherStudent(student).get_formatted_data() for student in students
71+
]
72+
self.student_trace = {student.id: student for student in students}
73+
with open(self.group_matcher_input_data_file_path, "w") as csvfile:
74+
writer = csv.DictWriter(
75+
csvfile, fieldnames=student_data[0].keys(), delimiter=";"
76+
)
77+
writer.writeheader()
78+
writer.writerows(student_data)
79+
80+
def generate(self, students: List[Student]) -> TeamSet:
81+
self.export_students_data_to_group_matcher_format_csv(students)
82+
83+
# Run the group matcher algorithm
84+
cmd = f"python3 {self.group_matcher_run_path} {self.group_matcher_config_path} {self.group_matcher_input_data_file_path}"
85+
os.system(cmd)
86+
87+
# This only happens when class size is small and the system I/O speed is not as fast as the runtime
88+
while not self.group_matcher_output_data_file_path.exists():
89+
print("Not found file " + str(self.group_matcher_output_data_file_path))
90+
time.sleep(1)
91+
# Read the output csv file and create a TeamSet
92+
df = pd.read_csv(self.group_matcher_output_data_file_path)
93+
94+
return GroupMatcherStudent.transform_output_data_to_team_set(
95+
df, self.team_trace, self.student_trace, self.team_cycler
96+
)
Lines changed: 121 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,121 @@
1+
from typing import List
2+
3+
from api.dataclasses.enums import AttributeValueEnum, Gender, Race
4+
5+
6+
class AlRace(AttributeValueEnum):
7+
White = "White"
8+
Asian = "Asian"
9+
Hispanic = "Hispanic"
10+
Black_Or_African_American = "Black/African American"
11+
Indegenous = "Indegenous"
12+
Middle_Eastern = "Middle-Eastern"
13+
Multiple_Races = "Multiple races"
14+
15+
16+
class AlYearLevel(AttributeValueEnum):
17+
Freshman = "freshman"
18+
Sophomore = "sophomore"
19+
Junior = "junior"
20+
Senior = "senior"
21+
Graduate = "graduate"
22+
23+
24+
class AlGender(AttributeValueEnum):
25+
Female = "Female"
26+
Male = "Male"
27+
Other = "Other"
28+
29+
30+
def fromGenderToAlGender(gender: Gender) -> AlGender:
31+
if gender == Gender.MALE:
32+
return AlGender.Male
33+
if gender == Gender.FEMALE:
34+
return AlGender.Female
35+
return AlGender.Other
36+
37+
38+
def fromAlGenderToGender(alGenderNum: int or str) -> Gender:
39+
if alGenderNum == 0 or alGenderNum == "Male":
40+
return Gender.FEMALE
41+
if alGenderNum == 1 or alGenderNum == "Female":
42+
return Gender.MALE
43+
return Gender.OTHER
44+
45+
46+
def fromRaceToAlRace(race: Race) -> AlRace:
47+
if race == Race.European:
48+
return AlRace.White
49+
if (
50+
race == Race.South_Asian
51+
or race == Race.East_Asian
52+
or race == Race.South_East_Asian
53+
):
54+
return AlRace.Asian
55+
if race == Race.Hispanic_or_Latin_American:
56+
return AlRace.Hispanic
57+
if race == Race.African:
58+
return AlRace.Black_Or_African_American
59+
if race == Race.First_Nations_or_Indigenous:
60+
return AlRace.Indegenous
61+
if race == Race.Middle_Eastern:
62+
return AlRace.Middle_Eastern
63+
if race == Race.Other:
64+
return AlRace.Multiple_Races
65+
66+
67+
def fromAlRaceToRace(alRaceNum: int or str) -> Race:
68+
if alRaceNum == 0 or alRaceNum == "White":
69+
return Race.European
70+
if alRaceNum == 1 or alRaceNum == "Asian":
71+
return Race.South_Asian
72+
if alRaceNum == 2 or alRaceNum == "Hispanic":
73+
return Race.Hispanic_or_Latin_American
74+
if alRaceNum == 3 or alRaceNum == "Black/African American":
75+
return Race.African
76+
if alRaceNum == 4 or alRaceNum == "Indegenous":
77+
return Race.First_Nations_or_Indigenous
78+
if alRaceNum == 5 or alRaceNum == "Middle-Eastern":
79+
return Race.Middle_Eastern
80+
if alRaceNum == 6 or alRaceNum == "Multiple races":
81+
return Race.Other
82+
83+
84+
def fromYearLevelToAlYearLevel(yearLevel: int) -> AlYearLevel:
85+
if yearLevel == 0:
86+
return AlYearLevel.Freshman
87+
if yearLevel == 1:
88+
return AlYearLevel.Sophomore
89+
if yearLevel == 2:
90+
return AlYearLevel.Junior
91+
if yearLevel == 3:
92+
return AlYearLevel.Senior
93+
return AlYearLevel.Graduate
94+
95+
96+
def fromAlYearLevelToYearLevel(alYearLevel: str) -> int:
97+
if "freshman" in alYearLevel.lower():
98+
return 0
99+
if "sophomore" in alYearLevel.lower():
100+
return 1
101+
if "junior" in alYearLevel.lower():
102+
return 2
103+
if "senior" in alYearLevel.lower():
104+
return 3
105+
return 4
106+
107+
108+
def fromNumbersToTimeSlots(numbers: List[int]) -> List[str]:
109+
return [fromNumberToTimeslot(number) for number in numbers]
110+
111+
112+
def fromNumberToTimeslot(number: int) -> str:
113+
return str(number)
114+
115+
116+
def fromTimeslotToNumber(timeslot: str) -> int:
117+
return int(timeslot)
118+
119+
120+
def fromTimeslotsToNumbers(timeslots: List[str]) -> List[int]:
121+
return [fromTimeslotToNumber(timeslot) for timeslot in timeslots]

api/ai/interfaces/algorithm_config.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
from abc import ABC, abstractmethod
22
from dataclasses import dataclass, field
33
from enum import Enum
4+
from pathlib import Path
45
from typing import Callable, Tuple, List
56

67
from api.dataclasses.student import Student
@@ -121,3 +122,16 @@ def __init__(self, utility_function: Callable[[Student, TeamShell], float]):
121122

122123
def validate(self):
123124
super().validate()
125+
126+
127+
class GroupMatcherAlgorithmConfig(AlgorithmConfig):
128+
csv_input_path: Path
129+
group_matcher_run_path: Path
130+
131+
def __init__(self, csv_output_path: str, group_matcher_run_path: str):
132+
super().__init__()
133+
self.csv_input_path = Path(csv_output_path)
134+
self.group_matcher_run_path = Path(group_matcher_run_path)
135+
136+
def validate(self):
137+
super().validate()

0 commit comments

Comments
 (0)