Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions parser/astraParser.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,13 @@ import (
"github.com/UTDNebula/nebula-api/api/schema"
)

// InputData describes the raw Astra export payload containing fields metadata and row values.
type InputData struct {
Fields string `json:"fields"`
Data [][]interface{} `json:"data"`
}

// ParseAstra reads Astra scrape output and produces structured multi-building event JSON files.
func ParseAstra(inDir string, outDir string) {

astraFile, err := os.ReadFile(inDir + "/astraScraped.json")
Expand Down
3 changes: 3 additions & 0 deletions parser/courseParser_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ import (
"github.com/UTDNebula/nebula-api/api/schema"
)

// TestGetCourse checks course parsing from HTML fixtures.
func TestGetCourse(t *testing.T) {
t.Parallel()

Expand All @@ -28,6 +29,7 @@ func TestGetCourse(t *testing.T) {
}
}

// TestGetCatalogYear ensures catalog year derivation matches expected academic sessions.
func TestGetCatalogYear(t *testing.T) {
t.Parallel()

Expand Down Expand Up @@ -88,6 +90,7 @@ func TestGetCatalogYear(t *testing.T) {
}
}

// TestGetPrefixAndCourseNum verifies extraction of subject prefixes and course numbers.
func TestGetPrefixAndCourseNum(t *testing.T) {
t.Parallel()

Expand Down
3 changes: 2 additions & 1 deletion parser/mapParser.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,12 @@ import (
"github.com/UTDNebula/nebula-api/api/schema"
)

// Found under "Academic & Administrative" and "Housing" on https://api.concept3d.com/categories/?map=1772&key=0001085cc708b9cef47080f064612ca5
// BUILDINGS_CATEGORY_IDS lists category identifiers for academic, administrative, and housing buildings on Concept3D.
var BUILDINGS_CATEGORY_IDS = []int{42138, 42141}

var acronymRegex = regexp.MustCompile(`.*\((.*)\)`)

// ParseMapLocations filters Concept3D location exports to building records and writes normalized JSON output.
func ParseMapLocations(inDir string, outDir string) {
mapFile, err := os.ReadFile(inDir + "/mapLocationsScraped.json")
if err != nil {
Expand Down
2 changes: 2 additions & 0 deletions parser/mazevoParser.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,12 @@ var buildingRenames = map[string]string{
"Student Services Addition (SSA)": "SSA",
}

// SourceData represents the Mazevo API response containing booking records.
type SourceData struct {
Bookings []map[string]interface{} `json:"bookings"`
}

// ParseMazevo reads Mazevo scrape output and emits normalized multi-building event JSON.
func ParseMazevo(inDir string, outDir string) {

mazevoFile, err := os.ReadFile(inDir + "/mazevoScraped.json")
Expand Down
19 changes: 10 additions & 9 deletions parser/parser.go
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
// Package parser converts scraped course and scheduling inputs into structured Nebula API schema documents.
package parser

import (
Expand All @@ -14,32 +15,32 @@ import (
)

var (
// Sections dictionary for mapping UUIDs to a *schema.Section
// Sections maps section IDs to the associated section records.
Sections = make(map[primitive.ObjectID]*schema.Section)

// Courses dictionary for keys (Internal_course_number + Catalog_year) to a *schema.Course
// Courses maps catalog identifiers to course definitions.
Courses = make(map[string]*schema.Course)

// Professors dictionary for keys (First_name + Last_name) to a *schema.Professor
// Professors maps professor names to professor documents.
Professors = make(map[string]*schema.Professor)

//CourseIDMap auxiliary dictionary for mapping UUIDs to a *schema.Course
// CourseIDMap maps course IDs to their catalog keys.
CourseIDMap = make(map[primitive.ObjectID]string)

//ProfessorIDMap auxiliary dictionary for mapping UUIDs to a *schema.Professor
// ProfessorIDMap maps professor IDs to their lookup keys.
ProfessorIDMap = make(map[primitive.ObjectID]string)

// ReqParsers dictionary mapping course UUIDs to the func() that parsers its Reqs
// ReqParsers maps course IDs to requisite parser functions.
ReqParsers = make(map[primitive.ObjectID]func())

// GradeMap mappings for section grade distributions, mapping is MAP[SEMESTER] -> MAP[SUBJECT + NUMBER + SECTION] -> GRADE DISTRIBUTION
// GradeMap stores grade distributions keyed by semester and section identifier.
GradeMap map[string]map[string][]int

// timeLocation Time location for dates (uses America/Chicago tz database zone for CDT which accounts for daylight saving)
// timeLocation captures the America/Chicago location for timestamp normalization.
timeLocation, timeError = time.LoadLocation("America/Chicago")
)

// Parse Externally exposed parse function
// Parse loads scraped course artifacts, applies parsing and validation, and persists structured results.
func Parse(inDir string, outDir string, csvPath string, skipValidation bool) {

// Panic if timeLocation didn't load properly
Expand Down
13 changes: 5 additions & 8 deletions parser/parser_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ import (
"go.mongodb.org/mongo-driver/bson/primitive"
)

// TestData bundles a parser test input with its expected artifacts.
type TestData struct {
Input string
RowInfo map[string]*goquery.Selection
Expand All @@ -33,14 +34,7 @@ type TestData struct {
// testData global dictionary containing the data from /testdata by folder name
var testData map[string]TestData

// TestMain entry point for all tests in the parser package.
// The function will load `./testdata` into memory before running
// the tests so that test can run in parallel.
//
// You can optionally provide the flag `update`, which will run
// updateTestData. Example usage
//
// `go test -v ./parser -args -update`
// TestMain loads parser fixtures and handles the -update flag for regenerating expectations.
func TestMain(m *testing.M) {
update := flag.Bool("update", false, "Regenerates the expected output for the provided test inputs. Should only be used when you are 100% sure your code is correct! It will make all test pass :)")

Expand Down Expand Up @@ -247,6 +241,7 @@ func clearGlobals() {
ReqParsers = make(map[primitive.ObjectID]func())
}

// TestParse verifies that parsing input fixtures generates the expected JSON exports.
func TestParse(t *testing.T) {
tempDir := t.TempDir()
// todo fix grade data, csvPath = ./grade-data panics
Expand Down Expand Up @@ -496,6 +491,7 @@ func unmarshallFile[T any](path string) (T, error) {
return result, nil
}

// TestGetClassInfo validates extraction of class metadata from course pages.
func TestGetClassInfo(t *testing.T) {
t.Parallel()

Expand All @@ -519,6 +515,7 @@ func TestGetClassInfo(t *testing.T) {
}
}

// TestGetRowInfo confirms table rows are mapped to labels and content correctly.
func TestGetRowInfo(t *testing.T) {
t.Parallel()
// don't include any weird characters in the content, it's not a bug with getRowInfo but
Expand Down
28 changes: 20 additions & 8 deletions parser/requisiteParser.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ import (
It's worth noting that I say stack in quotes above because it's not treated as strictly LIFO like a stack would normally be.
*/

// Regex matcher object for requisite group parsing
// Matcher defines a regex-driven handler used during requisite group parsing.
type Matcher struct {
Regex *regexp.Regexp
Handler func(string, []string) interface{}
Expand All @@ -31,6 +31,7 @@ type Matcher struct {

var ANDRegex = regexp.MustCompile(`(?i)\s+and\s+`)

// ANDMatcher parses conjunction-separated requisites into an AND collection requirement.
func ANDMatcher(group string, subgroups []string) interface{} {
// Split text along " and " boundaries, then parse subexpressions as groups into an "AND" CollectionRequirement
subExpressions := ANDRegex.Split(group, -1)
Expand All @@ -52,12 +53,8 @@ func ANDMatcher(group string, subgroups []string) interface{} {
}
}

// First regex subgroup represents the text to be subgrouped and parsed with parseFnc
// Ex: Text is: "(OPRE 3360 or STAT 3360 or STAT 4351), and JSOM majors and minors only"
// Regex is: "(JSOM majors and minors only)"
// Resulting substituted text would be: "(OPRE 3360 or STAT 3360 or STAT 4351), and @N", where N is some group number
// When @N is dereferenced from the requisite list, it will have a value equivalent to the result of parseFnc(group, subgroups)

// SubstitutionMatcher returns a matcher that replaces a subgroup with parseFnc's result before parsing the outer group.
// For example, "(OPRE 3360 or STAT 3360 or STAT 4351), and JSOM majors and minors only" becomes "... and @N".
func SubstitutionMatcher(parseFnc func(string, []string) interface{}) func(string, []string) interface{} {
// Return a closure that uses parseFnc to substitute subgroups[1]
return func(group string, subgroups []string) interface{} {
Expand All @@ -72,6 +69,7 @@ func SubstitutionMatcher(parseFnc func(string, []string) interface{}) func(strin

var ORRegex = regexp.MustCompile(`(?i)\s+or\s+`)

// ORMatcher parses disjunction-separated requisites into an OR collection requirement.
func ORMatcher(group string, subgroups []string) interface{} {
// Split text along " or " boundaries, then parse subexpressions as groups into an "OR" CollectionRequirement
subExpressions := ORRegex.Split(group, -1)
Expand All @@ -93,6 +91,7 @@ func ORMatcher(group string, subgroups []string) interface{} {
}
}

// CourseMinGradeMatcher returns a course requirement enforcing a minimum grade when an ICN is found.
func CourseMinGradeMatcher(group string, subgroups []string) interface{} {
icn, err := findICN(subgroups[1], subgroups[2])
if err != nil {
Expand All @@ -102,6 +101,7 @@ func CourseMinGradeMatcher(group string, subgroups []string) interface{} {
return schema.NewCourseRequirement(icn, subgroups[3])
}

// CourseMatcher returns a course requirement with the default minimum grade expectation.
func CourseMatcher(group string, subgroups []string) interface{} {
icn, err := findICN(subgroups[1], subgroups[2])
if err != nil {
Expand All @@ -111,10 +111,12 @@ func CourseMatcher(group string, subgroups []string) interface{} {
return schema.NewCourseRequirement(icn, "D")
}

// ConsentMatcher captures grantor consent requirements from requisite text.
func ConsentMatcher(group string, subgroups []string) interface{} {
return schema.NewConsentRequirement(subgroups[1])
}

// LimitMatcher produces a limit requirement that caps allowable credit hours.
func LimitMatcher(group string, subgroups []string) interface{} {
hourLimit, err := strconv.Atoi(subgroups[1])
if err != nil {
Expand All @@ -123,18 +125,22 @@ func LimitMatcher(group string, subgroups []string) interface{} {
return schema.NewLimitRequirement(hourLimit)
}

// MajorMatcher produces a major-specific requirement.
func MajorMatcher(group string, subgroups []string) interface{} {
return schema.NewMajorRequirement(subgroups[1])
}

// MinorMatcher produces a minor-specific requirement.
func MinorMatcher(group string, subgroups []string) interface{} {
return schema.NewMinorRequirement(subgroups[1])
}

// MajorMinorMatcher builds an OR collection spanning both major and minor requirements.
func MajorMinorMatcher(group string, subgroups []string) interface{} {
return schema.NewCollectionRequirement("OR", 1, []interface{}{*schema.NewMajorRequirement(subgroups[1]), *schema.NewMinorRequirement(subgroups[1])})
}

// CoreMatcher creates a requirement for completion of a specific core course count.
func CoreMatcher(group string, subgroups []string) interface{} {
hourReq, err := strconv.Atoi(subgroups[1])
if err != nil {
Expand All @@ -143,10 +149,12 @@ func CoreMatcher(group string, subgroups []string) interface{} {
return schema.NewCoreRequirement(subgroups[2], hourReq)
}

// CoreCompletionMatcher indicates completion of a specific core category without an hour requirement.
func CoreCompletionMatcher(group string, subgroups []string) interface{} {
return schema.NewCoreRequirement(subgroups[1], -1)
}

// ChoiceMatcher converts a subgroup collection into a mutually exclusive choice requirement.
func ChoiceMatcher(group string, subgroups []string) interface{} {
collectionReq, ok := parseGroup(subgroups[1]).(*schema.CollectionRequirement)
if !ok {
Expand All @@ -156,6 +164,7 @@ func ChoiceMatcher(group string, subgroups []string) interface{} {
return schema.NewChoiceRequirement(collectionReq)
}

// GPAMatcher represents GPA-based prerequisites.
func GPAMatcher(group string, subgroups []string) interface{} {
GPAFloat, err := strconv.ParseFloat(subgroups[1], 32)
if err != nil {
Expand All @@ -164,13 +173,15 @@ func GPAMatcher(group string, subgroups []string) interface{} {
return schema.NewGPARequirement(GPAFloat, "")
}

// ThrowawayMatcher marks text that should be ignored during requisite evaluation.
func ThrowawayMatcher(group string, subgroups []string) interface{} {
return schema.Requirement{Type: "throwaway"}
}

// Regex for group tags
var groupTagRegex = regexp.MustCompile(`@(\d+)`)

// GroupTagMatcher resolves stack-referenced groups by index.
func GroupTagMatcher(group string, subgroups []string) interface{} {
groupIndex, err := strconv.Atoi(subgroups[1])
if err != nil {
Expand All @@ -185,13 +196,14 @@ func GroupTagMatcher(group string, subgroups []string) interface{} {
return parsedGrp
}

// OtherMatcher wraps unmatched text in an OtherRequirement.
func OtherMatcher(group string, subgroups []string) interface{} {
return schema.NewOtherRequirement(ungroupText(group), "")
}

/////////////////////// END MATCHER FUNCS ///////////////////////

// Matcher container, matchers must be in order of precedence
// Matchers contains the ordered collection of matcher rules applied during requisite parsing.
// NOTE: PARENTHESES ARE OF HIGHEST PRECEDENCE! (This is due to groupParens() handling grouping of parenthesized text before parsing begins)
var Matchers []Matcher

Expand Down
3 changes: 3 additions & 0 deletions parser/sectionParser_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import (
"github.com/google/go-cmp/cmp"
)

// TestGetInternalClassAndCourseNum checks parsing of internal course identifiers.
func TestGetInternalClassAndCourseNum(t *testing.T) {
t.Parallel()

Expand Down Expand Up @@ -59,6 +60,7 @@ func TestGetInternalClassAndCourseNum(t *testing.T) {
}
}

// TestGetAcademicSession ensures term metadata is parsed correctly.
func TestGetAcademicSession(t *testing.T) {
t.Parallel()

Expand All @@ -78,6 +80,7 @@ func TestGetAcademicSession(t *testing.T) {
}
}

// TestGetSectionNumber validates extraction of section numbers.
func TestGetSectionNumber(t *testing.T) {
t.Parallel()

Expand Down
Loading