Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ go 1.24.0

require (
github.com/PuerkitoBio/goquery v1.8.1
github.com/UTDNebula/nebula-api/api v0.0.0-20260226052950-dbe2edfa3cfe //points to the compound-key branch of the nebula-api.
github.com/UTDNebula/nebula-api/api v0.0.0-20260302171502-90e1baaaf4e1 //points to the compound-key branch of the nebula-api.
github.com/chromedp/cdproto v0.0.0-20250120090109-d38428e4d9c8
github.com/chromedp/chromedp v0.12.1
github.com/google/go-cmp v0.7.0
Expand Down
4 changes: 2 additions & 2 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,8 @@ github.com/GoogleCloudPlatform/opentelemetry-operations-go/internal/resourcemapp
github.com/GoogleCloudPlatform/opentelemetry-operations-go/internal/resourcemapping v0.51.0/go.mod h1:otE2jQekW/PqXk1Awf5lmfokJx4uwuqcj1ab5SpGeW0=
github.com/PuerkitoBio/goquery v1.8.1 h1:uQxhNlArOIdbrH1tr0UXwdVFgDcZDrZVdcpygAcwmWM=
github.com/PuerkitoBio/goquery v1.8.1/go.mod h1:Q8ICL1kNUJ2sXGoAhPGUdYDJvgQgHzJsnnd3H7Ho5jQ=
github.com/UTDNebula/nebula-api/api v0.0.0-20260226052950-dbe2edfa3cfe h1:QUcltcZLKB+kbP3xa7h+sbOwf7vxofRGPKXHo+B35Q4=
github.com/UTDNebula/nebula-api/api v0.0.0-20260226052950-dbe2edfa3cfe/go.mod h1:vWwnuoXFE/Lo9yW6Z6DJguCtAHu0xMym+6r2IEru1v0=
github.com/UTDNebula/nebula-api/api v0.0.0-20260302171502-90e1baaaf4e1 h1:xdceHH3Y2AiC0DV0bp47zN9FvM7JdzcpJrod7SIOR5w=
github.com/UTDNebula/nebula-api/api v0.0.0-20260302171502-90e1baaaf4e1/go.mod h1:vWwnuoXFE/Lo9yW6Z6DJguCtAHu0xMym+6r2IEru1v0=
github.com/andybalholm/cascadia v1.3.1 h1:nhxRkql1kdYCc8Snf7D5/D3spOX+dBgjA6u8x004T2c=
github.com/andybalholm/cascadia v1.3.1/go.mod h1:R4bJ1UQfqADjvDa4P6HZHLh/3OxWWEqc0Sk8XGwHqvA=
github.com/bytedance/gopkg v0.1.3 h1:TPBSwH8RsouGCBcMBktLt1AymVo2TVsBVCY4b6TnZ/M=
Expand Down
9 changes: 1 addition & 8 deletions parser/courseParser.go
Original file line number Diff line number Diff line change
Expand Up @@ -53,15 +53,8 @@ func getCourse(internalCourseNumber string, session schema.AcademicSession, rowI
CoursePrefix, CourseNumber := getPrefixAndNumber(classInfo)
catalogYear := getCatalogYear(session)

courseKey := schema.CourseKey{
Subject_prefix: CoursePrefix,
Course_number: CourseNumber,
Catalog_year: catalogYear,
}

course := schema.Course{
Id: primitive.NewObjectID(),
Key: courseKey,
Course_number: CourseNumber,
Subject_prefix: CoursePrefix,
Title: utils.TrimWhitespace(rowInfo["Course Title:"].Text()),
Expand All @@ -72,7 +65,7 @@ func getCourse(internalCourseNumber string, session schema.AcademicSession, rowI
Activity_type: classInfo["Activity Type:"],
Grading: classInfo["Grading:"],
Internal_course_number: internalCourseNumber,
Catalog_year: getCatalogYear(session),
Catalog_year: catalogYear,
}

// Try to get lecture/lab contact hours and offering frequency from course description
Expand Down
2 changes: 1 addition & 1 deletion parser/courseParser_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ func TestGetCourse(t *testing.T) {
output := *getCourse(courseNum, testCase.Section.Academic_session, testCase.RowInfo, testCase.ClassInfo)
expected := testCase.Course

diff := cmp.Diff(expected, output, cmpopts.IgnoreFields(schema.Course{}, "Id", "Enrollment_reqs", "Prerequisites", "Key", "Section_keys"))
diff := cmp.Diff(expected, output, cmpopts.IgnoreFields(schema.Course{}, "Id", "Sections", "Enrollment_reqs", "Prerequisites"))

if diff != "" {
t.Errorf("Failed (-expected +got)\n %s", diff)
Expand Down
6 changes: 3 additions & 3 deletions parser/parser_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -316,7 +316,7 @@ func TestParse(t *testing.T) {
t.Run(key, func(t *testing.T) {
if outputCourse, ok := CoursesByKey[key]; ok {
diff := cmp.Diff(expectedCourse, outputCourse,
cmpopts.IgnoreFields(schema.Course{}, "Id", "Key", "Section_keys"),
cmpopts.IgnoreFields(schema.Course{}, "Id"),
)

if diff != "" {
Expand Down Expand Up @@ -361,7 +361,7 @@ func TestParse(t *testing.T) {
if outputProfessor, ok := ProfessorsByKey[key]; ok {

diff := cmp.Diff(expectedProfessor, outputProfessor,
cmpopts.IgnoreFields(schema.Professor{}, "Id", "Key", "Section_keys"),
cmpopts.IgnoreFields(schema.Professor{}, "Id"),
cmp.Transformer("Sections", func(sections []primitive.ObjectID) []string {
result := make([]string, 0, len(sections))
for _, id := range sections {
Expand Down Expand Up @@ -411,7 +411,7 @@ func TestParse(t *testing.T) {
t.Run(key, func(t *testing.T) {
if outputSection, ok := SectionsByClass[key]; ok {
diff := cmp.Diff(expectedSection, outputSection,
cmpopts.IgnoreFields(schema.Section{}, "Id", "Key", "Course_key", "Professor_keys"),
cmpopts.IgnoreFields(schema.Section{}, "Id"),
cmp.Transformer("Professors", func(profIds []primitive.ObjectID) []string {
result := make([]string, 0, len(profIds))
for _, id := range profIds {
Expand Down
7 changes: 3 additions & 4 deletions parser/professorParser.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ import (
"go.mongodb.org/mongo-driver/bson/primitive"
)

func parseProfessors(sectionId schema.SectionKey, rowInfo map[string]*goquery.Selection) []schema.ProfessorKey {
func parseProfessors(sectionKey schema.ProfSectionKey, rowInfo map[string]*goquery.Selection) []schema.ProfessorKey {
professorText := utils.TrimWhitespace(rowInfo["Instructor(s):"].Text())
professorMatches := personRegexp.FindAllStringSubmatch(professorText, -1)
var profRefs []schema.ProfessorKey = make([]schema.ProfessorKey, 0, len(professorMatches))
Expand All @@ -36,7 +36,7 @@ func parseProfessors(sectionId schema.SectionKey, rowInfo map[string]*goquery.Se

prof, profExists := Professors[profKey]
if profExists {
prof.Section_keys = append(prof.Section_keys, sectionId)
prof.Sections = append(prof.Sections, sectionKey)
profRefs = append(profRefs, professorKey)
continue
}
Expand All @@ -45,10 +45,9 @@ func parseProfessors(sectionId schema.SectionKey, rowInfo map[string]*goquery.Se
prof.Id = primitive.NewObjectID()
prof.First_name = firstName
prof.Last_name = lastName
prof.Key = professorKey
prof.Titles = []string{utils.TrimWhitespace(match[2])}
prof.Email = utils.TrimWhitespace(match[3])
prof.Section_keys = []schema.SectionKey{sectionId}
prof.Sections = []schema.ProfSectionKey{sectionKey}
profRefs = append(profRefs, professorKey)
Professors[profKey] = prof
ProfessorIDMap[prof.Id] = profKey
Expand Down
7 changes: 0 additions & 7 deletions parser/profileLoader.go
Original file line number Diff line number Diff line change
Expand Up @@ -37,13 +37,6 @@ func loadProfiles(inDir string) {
}

profKey := prof.First_name + prof.Last_name

professorKey := schema.ProfessorKey {
First_name: prof.First_name,
Last_name: prof.Last_name,
}

prof.Key = professorKey
Professors[profKey] = &prof
ProfessorIDMap[prof.Id] = profKey
}
Expand Down
29 changes: 14 additions & 15 deletions parser/sectionParser.go
Original file line number Diff line number Diff line change
Expand Up @@ -42,36 +42,35 @@ func parseSection(rowInfo map[string]*goquery.Selection, classInfo map[string]st
classNum, courseNum := getInternalClassAndCourseNum(classInfo)
session := getAcademicSession(rowInfo)
courseRef := parseCourse(courseNum, session, rowInfo, classInfo)

sectionNumber := getSectionNumber(classInfo)

id := primitive.NewObjectID()

// Build compound keys
courseKey := courseRef.Key
courseKey := schema.CourseKey{
Subject_prefix: courseRef.Subject_prefix,
Course_number: courseRef.Course_number,
Catalog_year: courseRef.Catalog_year,
}

if (courseKey == schema.CourseKey{}) {
courseKey = schema.CourseKey{
Subject_prefix: courseRef.Subject_prefix,
Course_number: courseRef.Course_number,
Catalog_year: courseRef.Catalog_year,
}
courseSectionKey := schema.CourseSectionKey{
Section_number: sectionNumber,
Term: session.Name,
}
sectionKey := schema.SectionKey{

profSectionKey := schema.ProfSectionKey{
Subject_prefix: courseRef.Subject_prefix,
Course_number: courseRef.Course_number,
Catalog_year: courseRef.Catalog_year,
Term: session.Name,
Section_number: sectionNumber,
Term: session.Name,
}

section := schema.Section{
Id: id,
Key: sectionKey,
Section_number: sectionNumber,
Course_key: courseKey,
Course: courseKey,
Academic_session: session,
Professor_keys: parseProfessors(sectionKey, rowInfo),
Professors: parseProfessors(profSectionKey, rowInfo),
Teaching_assistants: getTeachingAssistants(rowInfo),
Internal_class_number: classNum,
Instruction_mode: getInstructionMode(classInfo),
Expand All @@ -85,7 +84,7 @@ func parseSection(rowInfo map[string]*goquery.Selection, classInfo map[string]st
Sections[section.Id] = &section

// Append new section to course's section listing
courseRef.Section_keys = append(courseRef.Section_keys, sectionKey)
courseRef.Sections = append(courseRef.Sections, courseSectionKey)
}

// getInternalClassAndCourseNum returns a sections internal course and class number,
Expand Down
5 changes: 4 additions & 1 deletion parser/testdata/case_000/course.json
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,10 @@
"corequisites": null,
"co_or_pre_requisites": null,
"sections": [
"67d07ee0c972c18731e23bd8"
{
"section_number": "003",
"term": "25S"
}
],
"lecture_contact_hours": "3",
"laboratory_contact_hours": "0",
Expand Down
14 changes: 12 additions & 2 deletions parser/testdata/case_000/professors.json
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,12 @@
"image_uri": "",
"office_hours": null,
"sections": [
"67d07ee0c972c18731e23bd8"
{
"subject_prefix": "ACCT",
"course_number": "2301",
"section_number": "003",
"term": "25S"
}
]
},
{
Expand All @@ -38,7 +43,12 @@
"image_uri": "",
"office_hours": null,
"sections": [
"67d07ee0c972c18731e23bd8"
{
"subject_prefix": "ACCT",
"course_number": "2301",
"section_number": "003",
"term": "25S"
}
]
}
]
16 changes: 13 additions & 3 deletions parser/testdata/case_000/section.json
Original file line number Diff line number Diff line change
@@ -1,16 +1,26 @@
{
"_id": "67d07ee0c972c18731e23bd8",
"section_number": "003",
"course_reference": "67d07ee0c972c18731e23bd7",
"course_key": {
"subject_prefix": "ACCT",
"course_number": "2301",
"catalog_year": "24"
},
"section_corequisites": null,
"academic_session": {
"name": "25S",
"start_date": "2025-01-21T00:00:00-06:00",
"end_date": "2025-05-16T00:00:00-05:00"
},
"professors": [
"67d07ee0c972c18731e23bd9",
"67d07ee0c972c18731e23bda"
{
"first_name": "Naim Bugra",
"last_name": "Ozel"
},
{
"first_name": "Jieying",
"last_name": "Zhang"
}
],
"teaching_assistants": [
{
Expand Down
5 changes: 4 additions & 1 deletion parser/testdata/case_001/course.json
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,10 @@
"corequisites": null,
"co_or_pre_requisites": null,
"sections": [
"67d07ee0c972c18731e23bdc"
{
"section_number": "001",
"term": "25S"
}
],
"lecture_contact_hours": "3",
"laboratory_contact_hours": "0",
Expand Down
14 changes: 12 additions & 2 deletions parser/testdata/case_001/professors.json
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,12 @@
"image_uri": "",
"office_hours": null,
"sections": [
"67d07ee0c972c18731e23bdc"
{
"subject_prefix": "ACCT",
"course_number": "2301",
"section_number": "001",
"term": "25S"
}
]
},
{
Expand All @@ -38,7 +43,12 @@
"image_uri": "",
"office_hours": null,
"sections": [
"67d07ee0c972c18731e23bdc"
{
"subject_prefix": "ACCT",
"course_number": "2301",
"section_number": "001",
"term": "25S"
}
]
}
]
16 changes: 13 additions & 3 deletions parser/testdata/case_001/section.json
Original file line number Diff line number Diff line change
@@ -1,16 +1,26 @@
{
"_id": "67d07ee0c972c18731e23bdc",
"section_number": "001",
"course_reference": "67d07ee0c972c18731e23bdb",
"course_key": {
"subject_prefix": "ACCT",
"course_number": "2301",
"catalog_year": "24"
},
"section_corequisites": null,
"academic_session": {
"name": "25S",
"start_date": "2025-01-21T00:00:00-06:00",
"end_date": "2025-05-16T00:00:00-05:00"
},
"professors": [
"67d07ee0c972c18731e23bdd",
"67d07ee0c972c18731e23bde"
{
"first_name": "Jieying",
"last_name": "Zhang"
},
{
"first_name": "Naim Bugra",
"last_name": "Ozel"
}
],
"teaching_assistants": [
{
Expand Down
5 changes: 4 additions & 1 deletion parser/testdata/case_002/course.json
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,10 @@
"corequisites": null,
"co_or_pre_requisites": null,
"sections": [
"67d07ee0c972c18731e23be0"
{
"section_number": "501",
"term": "25S"
}
],
"lecture_contact_hours": "3",
"laboratory_contact_hours": "0",
Expand Down
7 changes: 6 additions & 1 deletion parser/testdata/case_002/professors.json
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,12 @@
"image_uri": "",
"office_hours": null,
"sections": [
"67d07ee0c972c18731e23be0"
{
"subject_prefix": "BA",
"course_number": "1320",
"section_number": "501",
"term": "25S"
}
]
}
]
11 changes: 9 additions & 2 deletions parser/testdata/case_002/section.json
Original file line number Diff line number Diff line change
@@ -1,15 +1,22 @@
{
"_id": "67d07ee0c972c18731e23be0",
"section_number": "501",
"course_reference": "67d07ee0c972c18731e23bdf",
"course_key": {
"subject_prefix": "BA",
"course_number": "1320",
"catalog_year": "24"
},
"section_corequisites": null,
"academic_session": {
"name": "25S",
"start_date": "2025-01-21T00:00:00-06:00",
"end_date": "2025-05-16T00:00:00-05:00"
},
"professors": [
"67d07ee0c972c18731e23be1"
{
"first_name": "Peter",
"last_name": "Lewin"
}
],
"teaching_assistants": [
{
Expand Down
5 changes: 4 additions & 1 deletion parser/testdata/case_003/course.json
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,10 @@
"corequisites": null,
"co_or_pre_requisites": null,
"sections": [
"67d07ee0c972c18731e23be3"
{
"section_number": "016",
"term": "25S"
}
],
"lecture_contact_hours": "1",
"laboratory_contact_hours": "0",
Expand Down
Loading