diff --git a/parser/astraParser.go b/parser/astraParser.go index 15a2c2d..f2e7890 100644 --- a/parser/astraParser.go +++ b/parser/astraParser.go @@ -11,11 +11,13 @@ import ( "github.com/UTDNebula/nebula-api/api/schema" ) +// InputData describes the raw Astra export payload containing fields metadata and row values. type InputData struct { Fields string `json:"fields"` Data [][]interface{} `json:"data"` } +// ParseAstra reads Astra scrape output and produces structured multi-building event JSON files. func ParseAstra(inDir string, outDir string) { astraFile, err := os.ReadFile(inDir + "/astraScraped.json") diff --git a/parser/courseParser_test.go b/parser/courseParser_test.go index 0bc22a3..a72ede8 100644 --- a/parser/courseParser_test.go +++ b/parser/courseParser_test.go @@ -9,6 +9,7 @@ import ( "github.com/UTDNebula/nebula-api/api/schema" ) +// TestGetCourse checks course parsing from HTML fixtures. func TestGetCourse(t *testing.T) { t.Parallel() @@ -28,6 +29,7 @@ func TestGetCourse(t *testing.T) { } } +// TestGetCatalogYear ensures catalog year derivation matches expected academic sessions. func TestGetCatalogYear(t *testing.T) { t.Parallel() @@ -88,6 +90,7 @@ func TestGetCatalogYear(t *testing.T) { } } +// TestGetPrefixAndCourseNum verifies extraction of subject prefixes and course numbers. func TestGetPrefixAndCourseNum(t *testing.T) { t.Parallel() diff --git a/parser/mapParser.go b/parser/mapParser.go index 56db69e..875569c 100644 --- a/parser/mapParser.go +++ b/parser/mapParser.go @@ -12,11 +12,12 @@ import ( "github.com/UTDNebula/nebula-api/api/schema" ) -// Found under "Academic & Administrative" and "Housing" on https://api.concept3d.com/categories/?map=1772&key=0001085cc708b9cef47080f064612ca5 +// BUILDINGS_CATEGORY_IDS lists category identifiers for academic, administrative, and housing buildings on Concept3D. var BUILDINGS_CATEGORY_IDS = []int{42138, 42141} var acronymRegex = regexp.MustCompile(`.*\((.*)\)`) +// ParseMapLocations filters Concept3D location exports to building records and writes normalized JSON output. func ParseMapLocations(inDir string, outDir string) { mapFile, err := os.ReadFile(inDir + "/mapLocationsScraped.json") if err != nil { diff --git a/parser/mazevoParser.go b/parser/mazevoParser.go index 5e3a03b..a4cca30 100644 --- a/parser/mazevoParser.go +++ b/parser/mazevoParser.go @@ -16,10 +16,12 @@ var buildingRenames = map[string]string{ "Student Services Addition (SSA)": "SSA", } +// SourceData represents the Mazevo API response containing booking records. type SourceData struct { Bookings []map[string]interface{} `json:"bookings"` } +// ParseMazevo reads Mazevo scrape output and emits normalized multi-building event JSON. func ParseMazevo(inDir string, outDir string) { mazevoFile, err := os.ReadFile(inDir + "/mazevoScraped.json") diff --git a/parser/parser.go b/parser/parser.go index 82b1d08..2b7049e 100644 --- a/parser/parser.go +++ b/parser/parser.go @@ -1,3 +1,4 @@ +// Package parser converts scraped course and scheduling inputs into structured Nebula API schema documents. package parser import ( @@ -14,32 +15,32 @@ import ( ) var ( - // Sections dictionary for mapping UUIDs to a *schema.Section + // Sections maps section IDs to the associated section records. Sections = make(map[primitive.ObjectID]*schema.Section) - // Courses dictionary for keys (Internal_course_number + Catalog_year) to a *schema.Course + // Courses maps catalog identifiers to course definitions. Courses = make(map[string]*schema.Course) - // Professors dictionary for keys (First_name + Last_name) to a *schema.Professor + // Professors maps professor names to professor documents. Professors = make(map[string]*schema.Professor) - //CourseIDMap auxiliary dictionary for mapping UUIDs to a *schema.Course + // CourseIDMap maps course IDs to their catalog keys. CourseIDMap = make(map[primitive.ObjectID]string) - //ProfessorIDMap auxiliary dictionary for mapping UUIDs to a *schema.Professor + // ProfessorIDMap maps professor IDs to their lookup keys. ProfessorIDMap = make(map[primitive.ObjectID]string) - // ReqParsers dictionary mapping course UUIDs to the func() that parsers its Reqs + // ReqParsers maps course IDs to requisite parser functions. ReqParsers = make(map[primitive.ObjectID]func()) - // GradeMap mappings for section grade distributions, mapping is MAP[SEMESTER] -> MAP[SUBJECT + NUMBER + SECTION] -> GRADE DISTRIBUTION + // GradeMap stores grade distributions keyed by semester and section identifier. GradeMap map[string]map[string][]int - // timeLocation Time location for dates (uses America/Chicago tz database zone for CDT which accounts for daylight saving) + // timeLocation captures the America/Chicago location for timestamp normalization. timeLocation, timeError = time.LoadLocation("America/Chicago") ) -// Parse Externally exposed parse function +// Parse loads scraped course artifacts, applies parsing and validation, and persists structured results. func Parse(inDir string, outDir string, csvPath string, skipValidation bool) { // Panic if timeLocation didn't load properly diff --git a/parser/parser_test.go b/parser/parser_test.go index 95ccf86..cee8873 100644 --- a/parser/parser_test.go +++ b/parser/parser_test.go @@ -21,6 +21,7 @@ import ( "go.mongodb.org/mongo-driver/bson/primitive" ) +// TestData bundles a parser test input with its expected artifacts. type TestData struct { Input string RowInfo map[string]*goquery.Selection @@ -33,14 +34,7 @@ type TestData struct { // testData global dictionary containing the data from /testdata by folder name var testData map[string]TestData -// TestMain entry point for all tests in the parser package. -// The function will load `./testdata` into memory before running -// the tests so that test can run in parallel. -// -// You can optionally provide the flag `update`, which will run -// updateTestData. Example usage -// -// `go test -v ./parser -args -update` +// TestMain loads parser fixtures and handles the -update flag for regenerating expectations. func TestMain(m *testing.M) { update := flag.Bool("update", false, "Regenerates the expected output for the provided test inputs. Should only be used when you are 100% sure your code is correct! It will make all test pass :)") @@ -247,6 +241,7 @@ func clearGlobals() { ReqParsers = make(map[primitive.ObjectID]func()) } +// TestParse verifies that parsing input fixtures generates the expected JSON exports. func TestParse(t *testing.T) { tempDir := t.TempDir() // todo fix grade data, csvPath = ./grade-data panics @@ -496,6 +491,7 @@ func unmarshallFile[T any](path string) (T, error) { return result, nil } +// TestGetClassInfo validates extraction of class metadata from course pages. func TestGetClassInfo(t *testing.T) { t.Parallel() @@ -519,6 +515,7 @@ func TestGetClassInfo(t *testing.T) { } } +// TestGetRowInfo confirms table rows are mapped to labels and content correctly. func TestGetRowInfo(t *testing.T) { t.Parallel() // don't include any weird characters in the content, it's not a bug with getRowInfo but diff --git a/parser/requisiteParser.go b/parser/requisiteParser.go index 743521a..88f6ff9 100644 --- a/parser/requisiteParser.go +++ b/parser/requisiteParser.go @@ -21,7 +21,7 @@ import ( It's worth noting that I say stack in quotes above because it's not treated as strictly LIFO like a stack would normally be. */ -// Regex matcher object for requisite group parsing +// Matcher defines a regex-driven handler used during requisite group parsing. type Matcher struct { Regex *regexp.Regexp Handler func(string, []string) interface{} @@ -31,6 +31,7 @@ type Matcher struct { var ANDRegex = regexp.MustCompile(`(?i)\s+and\s+`) +// ANDMatcher parses conjunction-separated requisites into an AND collection requirement. func ANDMatcher(group string, subgroups []string) interface{} { // Split text along " and " boundaries, then parse subexpressions as groups into an "AND" CollectionRequirement subExpressions := ANDRegex.Split(group, -1) @@ -52,12 +53,8 @@ func ANDMatcher(group string, subgroups []string) interface{} { } } -// First regex subgroup represents the text to be subgrouped and parsed with parseFnc -// Ex: Text is: "(OPRE 3360 or STAT 3360 or STAT 4351), and JSOM majors and minors only" -// Regex is: "(JSOM majors and minors only)" -// Resulting substituted text would be: "(OPRE 3360 or STAT 3360 or STAT 4351), and @N", where N is some group number -// When @N is dereferenced from the requisite list, it will have a value equivalent to the result of parseFnc(group, subgroups) - +// SubstitutionMatcher returns a matcher that replaces a subgroup with parseFnc's result before parsing the outer group. +// For example, "(OPRE 3360 or STAT 3360 or STAT 4351), and JSOM majors and minors only" becomes "... and @N". func SubstitutionMatcher(parseFnc func(string, []string) interface{}) func(string, []string) interface{} { // Return a closure that uses parseFnc to substitute subgroups[1] return func(group string, subgroups []string) interface{} { @@ -72,6 +69,7 @@ func SubstitutionMatcher(parseFnc func(string, []string) interface{}) func(strin var ORRegex = regexp.MustCompile(`(?i)\s+or\s+`) +// ORMatcher parses disjunction-separated requisites into an OR collection requirement. func ORMatcher(group string, subgroups []string) interface{} { // Split text along " or " boundaries, then parse subexpressions as groups into an "OR" CollectionRequirement subExpressions := ORRegex.Split(group, -1) @@ -93,6 +91,7 @@ func ORMatcher(group string, subgroups []string) interface{} { } } +// CourseMinGradeMatcher returns a course requirement enforcing a minimum grade when an ICN is found. func CourseMinGradeMatcher(group string, subgroups []string) interface{} { icn, err := findICN(subgroups[1], subgroups[2]) if err != nil { @@ -102,6 +101,7 @@ func CourseMinGradeMatcher(group string, subgroups []string) interface{} { return schema.NewCourseRequirement(icn, subgroups[3]) } +// CourseMatcher returns a course requirement with the default minimum grade expectation. func CourseMatcher(group string, subgroups []string) interface{} { icn, err := findICN(subgroups[1], subgroups[2]) if err != nil { @@ -111,10 +111,12 @@ func CourseMatcher(group string, subgroups []string) interface{} { return schema.NewCourseRequirement(icn, "D") } +// ConsentMatcher captures grantor consent requirements from requisite text. func ConsentMatcher(group string, subgroups []string) interface{} { return schema.NewConsentRequirement(subgroups[1]) } +// LimitMatcher produces a limit requirement that caps allowable credit hours. func LimitMatcher(group string, subgroups []string) interface{} { hourLimit, err := strconv.Atoi(subgroups[1]) if err != nil { @@ -123,18 +125,22 @@ func LimitMatcher(group string, subgroups []string) interface{} { return schema.NewLimitRequirement(hourLimit) } +// MajorMatcher produces a major-specific requirement. func MajorMatcher(group string, subgroups []string) interface{} { return schema.NewMajorRequirement(subgroups[1]) } +// MinorMatcher produces a minor-specific requirement. func MinorMatcher(group string, subgroups []string) interface{} { return schema.NewMinorRequirement(subgroups[1]) } +// MajorMinorMatcher builds an OR collection spanning both major and minor requirements. func MajorMinorMatcher(group string, subgroups []string) interface{} { return schema.NewCollectionRequirement("OR", 1, []interface{}{*schema.NewMajorRequirement(subgroups[1]), *schema.NewMinorRequirement(subgroups[1])}) } +// CoreMatcher creates a requirement for completion of a specific core course count. func CoreMatcher(group string, subgroups []string) interface{} { hourReq, err := strconv.Atoi(subgroups[1]) if err != nil { @@ -143,10 +149,12 @@ func CoreMatcher(group string, subgroups []string) interface{} { return schema.NewCoreRequirement(subgroups[2], hourReq) } +// CoreCompletionMatcher indicates completion of a specific core category without an hour requirement. func CoreCompletionMatcher(group string, subgroups []string) interface{} { return schema.NewCoreRequirement(subgroups[1], -1) } +// ChoiceMatcher converts a subgroup collection into a mutually exclusive choice requirement. func ChoiceMatcher(group string, subgroups []string) interface{} { collectionReq, ok := parseGroup(subgroups[1]).(*schema.CollectionRequirement) if !ok { @@ -156,6 +164,7 @@ func ChoiceMatcher(group string, subgroups []string) interface{} { return schema.NewChoiceRequirement(collectionReq) } +// GPAMatcher represents GPA-based prerequisites. func GPAMatcher(group string, subgroups []string) interface{} { GPAFloat, err := strconv.ParseFloat(subgroups[1], 32) if err != nil { @@ -164,6 +173,7 @@ func GPAMatcher(group string, subgroups []string) interface{} { return schema.NewGPARequirement(GPAFloat, "") } +// ThrowawayMatcher marks text that should be ignored during requisite evaluation. func ThrowawayMatcher(group string, subgroups []string) interface{} { return schema.Requirement{Type: "throwaway"} } @@ -171,6 +181,7 @@ func ThrowawayMatcher(group string, subgroups []string) interface{} { // Regex for group tags var groupTagRegex = regexp.MustCompile(`@(\d+)`) +// GroupTagMatcher resolves stack-referenced groups by index. func GroupTagMatcher(group string, subgroups []string) interface{} { groupIndex, err := strconv.Atoi(subgroups[1]) if err != nil { @@ -185,13 +196,14 @@ func GroupTagMatcher(group string, subgroups []string) interface{} { return parsedGrp } +// OtherMatcher wraps unmatched text in an OtherRequirement. func OtherMatcher(group string, subgroups []string) interface{} { return schema.NewOtherRequirement(ungroupText(group), "") } /////////////////////// END MATCHER FUNCS /////////////////////// -// Matcher container, matchers must be in order of precedence +// Matchers contains the ordered collection of matcher rules applied during requisite parsing. // NOTE: PARENTHESES ARE OF HIGHEST PRECEDENCE! (This is due to groupParens() handling grouping of parenthesized text before parsing begins) var Matchers []Matcher diff --git a/parser/sectionParser_test.go b/parser/sectionParser_test.go index 100b431..a920b4d 100644 --- a/parser/sectionParser_test.go +++ b/parser/sectionParser_test.go @@ -8,6 +8,7 @@ import ( "github.com/google/go-cmp/cmp" ) +// TestGetInternalClassAndCourseNum checks parsing of internal course identifiers. func TestGetInternalClassAndCourseNum(t *testing.T) { t.Parallel() @@ -59,6 +60,7 @@ func TestGetInternalClassAndCourseNum(t *testing.T) { } } +// TestGetAcademicSession ensures term metadata is parsed correctly. func TestGetAcademicSession(t *testing.T) { t.Parallel() @@ -78,6 +80,7 @@ func TestGetAcademicSession(t *testing.T) { } } +// TestGetSectionNumber validates extraction of section numbers. func TestGetSectionNumber(t *testing.T) { t.Parallel() diff --git a/parser/validator_test.go b/parser/validator_test.go index 551e7ba..432b11f 100644 --- a/parser/validator_test.go +++ b/parser/validator_test.go @@ -57,6 +57,7 @@ func init() { } // Test duplicate courses. Designed for fail cases +// TestDuplicateCoursesFail expects duplicates to trigger validation panic. func TestDuplicateCoursesFail(t *testing.T) { for i := range len(testCourses) { t.Run(fmt.Sprintf("Duplicate course %v", i), func(t *testing.T) { @@ -66,6 +67,7 @@ func TestDuplicateCoursesFail(t *testing.T) { } // Test duplicate sections. Designed for fail cases +// TestDuplicateSectionsFail ensures duplicate sections are rejected. func TestDuplicateSectionsFail(t *testing.T) { for i := range len(testSections) { t.Run(fmt.Sprintf("Duplicate section %v", i), func(t *testing.T) { @@ -75,6 +77,7 @@ func TestDuplicateSectionsFail(t *testing.T) { } // Test duplicate professors . Designed for fail cases +// TestDuplicateProfFail ensures duplicate professors fail validation. func TestDuplicateProfFail(t *testing.T) { for i := range len(testProfessors) { t.Run(fmt.Sprintf("Duplicate professor %v", i), func(t *testing.T) { @@ -84,6 +87,7 @@ func TestDuplicateProfFail(t *testing.T) { } // Test duplicate courses. Designed for pass case +// TestDuplicateCoursesPass confirms unique courses validate successfully. func TestDuplicateCoursesPass(t *testing.T) { for i := range len(testCourses) - 1 { t.Run(fmt.Sprintf("Duplicate courses %v, %v", i, i+1), func(t *testing.T) { @@ -93,6 +97,7 @@ func TestDuplicateCoursesPass(t *testing.T) { } // Test duplicate sections. Designed for pass cases +// TestDuplicateSectionsPass confirms unique sections validate successfully. func TestDuplicateSectionsPass(t *testing.T) { for i := range len(testSections) - 1 { t.Run(fmt.Sprintf("Duplicate sections %v, %v", i, i+1), func(t *testing.T) { @@ -102,6 +107,7 @@ func TestDuplicateSectionsPass(t *testing.T) { } // Test duplicate professors. Designed for pass cases +// TestDuplicateProfPass confirms unique professors validate successfully. func TestDuplicateProfPass(t *testing.T) { for i := range len(testProfessors) - 1 { t.Run(fmt.Sprintf("Duplicate professors %v, %v", i, i+1), func(t *testing.T) { @@ -111,6 +117,7 @@ func TestDuplicateProfPass(t *testing.T) { } // Test if course references to anything nonexistent. Designed for pass case +// TestCourseReferencePass ensures section references to courses succeed. func TestCourseReferencePass(t *testing.T) { sectionMap := make(map[primitive.ObjectID]*schema.Section) for _, section := range testSections { @@ -144,6 +151,7 @@ func TestCourseReferencePass(t *testing.T) { // - Section doesn't reference back to same course // // This is fail: missing +// TestCourseReferenceFail1 detects missing course references during validation. func TestCourseReferenceFail1(t *testing.T) { for key, value := range indexMap { t.Run(fmt.Sprintf("Section %v & course %v", key, value), func(t *testing.T) { @@ -153,6 +161,7 @@ func TestCourseReferenceFail1(t *testing.T) { } // This is fail: modified +// TestCourseReferenceFail2 detects mismatched section-course references. func TestCourseReferenceFail2(t *testing.T) { for key, value := range indexMap { t.Run(fmt.Sprintf("Section %v & course %v", key, value), func(t *testing.T) { @@ -162,6 +171,7 @@ func TestCourseReferenceFail2(t *testing.T) { } // Test section reference to professor, designed for pass case +// TestSectionReferenceProfPass ensures section professor references are mutual. func TestSectionReferenceProfPass(t *testing.T) { // Build profIDMap & profs profIDMap := make(map[primitive.ObjectID]string) @@ -192,6 +202,7 @@ func TestSectionReferenceProfPass(t *testing.T) { } // Test section reference to professors, designed for fail case +// TestSectionReferenceProfFail catches missing professor back-references. func TestSectionReferenceProfFail(t *testing.T) { profIDMap := make(map[primitive.ObjectID]string) @@ -234,6 +245,7 @@ func TestSectionReferenceProfFail(t *testing.T) { } // Test section reference to course +// TestSectionReferenceCourse verifies section-course reference validation. func TestSectionReferenceCourse(t *testing.T) { courseIDMap := make(map[primitive.ObjectID]string) for _, course := range testCourses { diff --git a/scrapers/astra.go b/scrapers/astra.go index d1c10ee..849f2d8 100644 --- a/scrapers/astra.go +++ b/scrapers/astra.go @@ -16,8 +16,10 @@ import ( "github.com/valyala/fastjson" ) +// MAX_EVENTS_PER_DAY caps the Astra API results to guard against truncated responses. var MAX_EVENTS_PER_DAY = 5000 +// ScrapeAstra iterates day-by-day through Astra events and persists the raw JSON output. func ScrapeAstra(outDir string) { // Start chromedp chromedpCtx, cancel := utils.InitChromeDp() diff --git a/scrapers/calendar.go b/scrapers/calendar.go index 4429c2e..11b56e0 100644 --- a/scrapers/calendar.go +++ b/scrapers/calendar.go @@ -19,18 +19,19 @@ import ( "go.mongodb.org/mongo-driver/bson/primitive" ) -// Structure of the API response +// RawEvent mirrors the nested event payload returned by the calendar API. type RawEvent struct { Event map[string]interface{} `json:"event"` } +// APICalendarResponse models the calendar API pagination envelope. type APICalendarResponse struct { Events []RawEvent `json:"events"` Page map[string]int `json:"page"` Date map[string]string `json:"date"` } -// Get the calendar data through API instead of scraping from website +// ScrapeCalendar retrieves calendar events through the API and writes normalized JSON output. func ScrapeCalendar(outDir string) { err := os.MkdirAll(outDir, 0777) if err != nil { @@ -139,7 +140,7 @@ func ScrapeCalendar(outDir string) { log.Printf("Finished parsing %d events successfully!\n\n", len(events)) } -// Scrape the data from the api and unmarshal it to response data +// scrapeAndUnmarshal fetches a calendar page and decodes it into data. func scrapeAndUnmarshal(client *http.Client, page int, data *APICalendarResponse) error { // Call API to get the byte data calendarUrl := fmt.Sprintf("https://calendar.utdallas.edu/api/2/events?days=365&pp=100&page=%d", page) @@ -167,7 +168,7 @@ func scrapeAndUnmarshal(client *http.Client, page int, data *APICalendarResponse return nil } -// Casting an interface{} to an slice of interface{} +// toSlice attempts to convert data into a slice of interface{}. func toSlice(data interface{}) []interface{} { if array, ok := data.([]interface{}); ok { return array @@ -175,7 +176,7 @@ func toSlice(data interface{}) []interface{} { return nil } -// Casting an interface{} to map from string to interface{} +// toMap attempts to convert data into a map keyed by string. func toMap(data interface{}) map[string]interface{} { if dataMap, ok := data.(map[string]interface{}); ok { return dataMap @@ -183,7 +184,7 @@ func toMap(data interface{}) map[string]interface{} { return nil } -// Casting an interface{} to string, if the data is nil then string is "" +// toString returns the string form of data or empty string when nil. func toString(data interface{}) string { if data != nil { if dataString, ok := data.(string); ok { @@ -193,7 +194,7 @@ func toString(data interface{}) string { return "" } -// Parse string time +// parseTime converts an RFC3339 timestamp string to a time.Time. func parseTime(stringTime string) time.Time { parsedTime, err := time.Parse(time.RFC3339, stringTime) if err != nil { diff --git a/scrapers/map.go b/scrapers/map.go index 0736e2b..c4f4008 100644 --- a/scrapers/map.go +++ b/scrapers/map.go @@ -15,15 +15,19 @@ import ( //See API documentation https://devcms.concept3d.com/swagger/dist/ and https://api.concept3d.com/documentation/?map=1772&key=0001085cc708b9cef47080f064612ca5 -// Found in dev tools on https://map.utdallas.edu/ in any call to https://api.concept3d.com/ +// API_KEY is the Concept3D API key observed from map.utdallas.edu traffic. const API_KEY string = "0001085cc708b9cef47080f064612ca5" -// Found in https://map.concept3d.com/?id=1772 +// UTD_MAP_ID references the Concept3D map identifier for the UTD campus map. const UTD_MAP_ID string = "1772" +// START_URL points to the Concept3D API host. const START_URL string = "https://api.concept3d.com" + +// END_URL appends the map and key query parameters for Concept3D requests. const END_URL string = "/?map=" + UTD_MAP_ID + "&key=" + API_KEY +// ScrapeMapLocations downloads Concept3D responses and writes raw map data to disk. func ScrapeMapLocations(outDir string) { // Make output folder err := os.MkdirAll(outDir, 0777) diff --git a/scrapers/mazevo.go b/scrapers/mazevo.go index 5c21f42..fb50df0 100644 --- a/scrapers/mazevo.go +++ b/scrapers/mazevo.go @@ -17,6 +17,7 @@ import ( "github.com/UTDNebula/api-tools/utils" ) +// ScrapeMazevo pulls Mazevo calendar events via the public API and stores the raw response. func ScrapeMazevo(outDir string) { apikey, err := utils.GetEnv("MAZEVO_API_KEY") if err != nil { diff --git a/scrapers/organizations.go b/scrapers/organizations.go index 3c1f41e..46aa833 100644 --- a/scrapers/organizations.go +++ b/scrapers/organizations.go @@ -42,6 +42,7 @@ var ( emailRegex = regexp.MustCompile(fmt.Sprintf(`%s@%s%s`, localPartPattern, subdomainPattern, topdomainPattern)) ) +// ScrapeOrganizations authenticates with SharePoint and exports the student organization directory CSV. func ScrapeOrganizations(outdir string) { log.Println("Scraping SOC ...") ctx, cancel := utils.InitChromeDp() diff --git a/scrapers/profiles.go b/scrapers/profiles.go index bbdfffc..59ec59d 100644 --- a/scrapers/profiles.go +++ b/scrapers/profiles.go @@ -23,6 +23,7 @@ import ( "go.mongodb.org/mongo-driver/bson/primitive" ) +// BASE_URL is the root listing endpoint for UTD professor profiles. const BASE_URL string = "https://profiles.utdallas.edu/browse?page=" var primaryLocationRegex *regexp.Regexp = regexp.MustCompile(`^(\w+)\s+(\d+\.\d{3}[A-z]?)$`) @@ -141,6 +142,7 @@ func scrapeProfessorLinks(chromedpCtx context.Context) []string { return professorLinks } +// ScrapeProfiles navigates UTD profile listings and writes professor metadata to JSON. func ScrapeProfiles(outDir string) { chromedpCtx, cancel := utils.InitChromeDp() diff --git a/uploader/eventsUploader.go b/uploader/eventsUploader.go index 9d67830..70d43df 100644 --- a/uploader/eventsUploader.go +++ b/uploader/eventsUploader.go @@ -20,6 +20,7 @@ import ( var eventsFilesToUpload [2]string = [2]string{"astra.json", "mazevo.json"} +// UploadEvents loads event JSON files and replaces the corresponding MongoDB collections. func UploadEvents(inDir string) { //Load env vars diff --git a/uploader/mapUploader.go b/uploader/mapUploader.go index 052be14..6262abe 100644 --- a/uploader/mapUploader.go +++ b/uploader/mapUploader.go @@ -20,6 +20,7 @@ import ( var mapFilesToUpload [1]string = [1]string{"mapLocations.json"} +// UploadMapLocations replaces the map locations collection with the generated map JSON. func UploadMapLocations(inDir string) { //Load env vars diff --git a/uploader/pipelines/events.go b/uploader/pipelines/events.go index 96faccb..540e1bc 100644 --- a/uploader/pipelines/events.go +++ b/uploader/pipelines/events.go @@ -1,3 +1,4 @@ +// Package pipelines defines reusable MongoDB aggregation pipelines for derived data. package pipelines import ( @@ -5,7 +6,7 @@ import ( "go.mongodb.org/mongo-driver/mongo" ) -// Pipeline for aggregating sections->events +// EventsPipeline aggregates section meetings into building and room event summaries. var EventsPipeline = mongo.Pipeline{ //separate each meeting {{Key: "$unwind", Value: "$meetings"}}, diff --git a/uploader/pipelines/trends_course_sections.go b/uploader/pipelines/trends_course_sections.go index f3d768b..87fce4d 100644 --- a/uploader/pipelines/trends_course_sections.go +++ b/uploader/pipelines/trends_course_sections.go @@ -5,6 +5,7 @@ import ( "go.mongodb.org/mongo-driver/mongo" ) +// TrendsCourseSectionsPipeline links course documents to their section records for trend reporting. var TrendsCourseSectionsPipeline = mongo.Pipeline{ bson.D{ {Key: "$lookup", diff --git a/uploader/pipelines/trends_prof_sections.go b/uploader/pipelines/trends_prof_sections.go index add5b25..2961c97 100644 --- a/uploader/pipelines/trends_prof_sections.go +++ b/uploader/pipelines/trends_prof_sections.go @@ -5,6 +5,7 @@ import ( "go.mongodb.org/mongo-driver/mongo" ) +// TrendsProfSectionsPipeline denormalizes professor records with their taught sections for analytics. var TrendsProfSectionsPipeline = mongo.Pipeline{ bson.D{ {Key: "$lookup", diff --git a/uploader/uploader.go b/uploader/uploader.go index dc34561..050c55c 100644 --- a/uploader/uploader.go +++ b/uploader/uploader.go @@ -1,7 +1,4 @@ -/* - This file is responsible for handling uploading of parsed data to MongoDB. -*/ - +// Package uploader writes parsed datasets and derived aggregations into MongoDB collections. package uploader import ( @@ -31,6 +28,7 @@ import ( var filesToUpload [3]string = [3]string{"courses.json", "professors.json", "sections.json"} +// Upload sends parsed JSON files to MongoDB and refreshes static aggregations. func Upload(inDir string, replace bool, staticOnly bool) { //Connect to mongo client := connectDB() @@ -72,10 +70,8 @@ func Upload(inDir string, replace bool, staticOnly bool) { log.Print("Done building static aggregations!") } -// Generic upload function to upload parsed JSON data to the Mongo database -// Make sure that the name of the file being parsed matches with the name of the collection you are uploading to! -// For example, your file should be named courses.json if you want to upload courses -// As of right now, courses, professors, and sections are available to upload. +// UploadData uploads parsed JSON documents to a MongoDB collection. +// Make sure the file name matches the collection name (e.g., courses.json for the courses collection). func UploadData[T any](client *mongo.Client, ctx context.Context, fptr *os.File, replace bool) { fileName := fptr.Name()[strings.LastIndex(fptr.Name(), "/")+1 : len(fptr.Name())-5] log.Println("Uploading " + fileName + ".json ...") diff --git a/utils/logger.go b/utils/logger.go index 53f761c..12f19a0 100644 --- a/utils/logger.go +++ b/utils/logger.go @@ -13,17 +13,17 @@ import ( "log" ) -// Custom io.Writer for routing writing to multiple sub-writers +// SplitWriter routes writes to multiple underlying writers. type SplitWriter struct { writers []io.Writer } -// Constructor for utils.SplitWriter +// NewSplitWriter constructs a SplitWriter that fans out writes to the provided writers. func NewSplitWriter(writers ...io.Writer) *SplitWriter { return &SplitWriter{writers: writers} } -// Writes the specified bytes to every sub-writer of the SplitWriter +// Write copies the provided bytes to each underlying writer. func (splitWriter *SplitWriter) Write(p []byte) (n int, err error) { type writeResult struct { n int @@ -49,19 +49,20 @@ func (splitWriter *SplitWriter) Write(p []byte) (n int, err error) { return n, err } -// Verbose logging flag, only works with the utils.Logger verbose functions +// Lverbose enables verbose logging on Logger instances and global loggers. const Lverbose = 1 << 7 -// Extension of log.Logger that supports a verbose logging flag; verbose printing functions start with 'V' +// Logger extends log.Logger with helper methods that respect the verbose flag. type Logger struct { log.Logger } +// NewLogger constructs a Logger that writes to out with the given prefix and flags. func NewLogger(out io.Writer, prefix string, flag int) *Logger { return &Logger{*log.New(out, prefix, flag)} } -// Verbose-only variant of Logger.Printf +// VPrintf prints using fmt.Printf semantics when the verbose flag is set. func (logger *Logger) VPrintf(format string, vars ...any) { flags := logger.Flags() if flags&Lverbose != 0 { @@ -69,7 +70,7 @@ func (logger *Logger) VPrintf(format string, vars ...any) { } } -// Verbose-only variant of Logger.Print +// VPrint prints text when the verbose flag is set. func (logger *Logger) VPrint(text string) { flags := logger.Flags() if flags&Lverbose != 0 { @@ -77,7 +78,7 @@ func (logger *Logger) VPrint(text string) { } } -// Verbose-only variant of Logger.Println +// VPrintln prints text with a newline when the verbose flag is set. func (logger *Logger) VPrintln(text string) { flags := logger.Flags() if flags&Lverbose != 0 { @@ -85,7 +86,7 @@ func (logger *Logger) VPrintln(text string) { } } -// Verbose-only variant of log.Printf +// VPrintf prints through the package-level logger when the verbose flag is set. func VPrintf(format string, vars ...any) { flags := log.Flags() if flags&Lverbose != 0 { @@ -93,7 +94,7 @@ func VPrintf(format string, vars ...any) { } } -// Verbose-only variant of log.Print +// VPrint prints text through the package-level logger when the verbose flag is set. func VPrint(text string) { flags := log.Flags() if flags&Lverbose != 0 { @@ -101,7 +102,7 @@ func VPrint(text string) { } } -// Verbose-only variant of log.Println +// VPrintln prints text with a newline through the package-level logger when the verbose flag is set. func VPrintln(text string) { flags := log.Flags() if flags&Lverbose != 0 { diff --git a/utils/methods.go b/utils/methods.go index dbaa4ca..90712e2 100644 --- a/utils/methods.go +++ b/utils/methods.go @@ -1,7 +1,4 @@ -/* - This file contains utility methods used throughout various files in this repo. -*/ - +// Package utils provides shared helpers for scraping, parsing, and uploading workflows. package utils import ( @@ -24,9 +21,10 @@ import ( "github.com/chromedp/chromedp" ) +// Headless toggles whether chromedp runs without a visible browser window. var Headless = true -// Finds .env value and produces proper error if not found +// GetEnv finds an environment variable and returns an error when it is unset. func GetEnv(name string) (string, error) { value, exists := os.LookupEnv(name) if !exists || value == "" { @@ -35,7 +33,7 @@ func GetEnv(name string) (string, error) { return value, nil } -// Initializes Chrome DevTools Protocol +// InitChromeDp configures and returns a chromedp context with optional headless settings. func InitChromeDp() (chromedpCtx context.Context, cancelFnc context.CancelFunc) { log.Printf("Initializing chromedp...") if Headless { @@ -55,7 +53,7 @@ func InitChromeDp() (chromedpCtx context.Context, cancelFnc context.CancelFunc) return chromedpCtx, cancelFnc } -// This function generates a fresh auth token and returns the new headers +// RefreshToken logs into CourseBook and returns headers containing a fresh session token. func RefreshToken(chromedpCtx context.Context) map[string][]string { netID, err := GetEnv("LOGIN_NETID") if err != nil { @@ -140,7 +138,7 @@ func RefreshToken(chromedpCtx context.Context) map[string][]string { } } -// This function signs into Astra +// RefreshAstraToken signs into Astra and returns headers containing authentication cookies. func RefreshAstraToken(chromedpCtx context.Context) map[string][]string { // Get username and password username, err := GetEnv("LOGIN_ASTRA_USERNAME") @@ -217,7 +215,7 @@ func RefreshAstraToken(chromedpCtx context.Context) map[string][]string { } } -// Encodes and writes the given data as tab-indented JSON to the given filepath. +// WriteJSON encodes data as indented JSON and writes it to filepath. func WriteJSON(filepath string, data interface{}) error { fptr, err := os.Create(filepath) if err != nil { @@ -230,7 +228,7 @@ func WriteJSON(filepath string, data interface{}) error { return nil } -// Recursively gets the filepath of every file with the given extension, using the given directory as the root. +// GetAllFilesWithExtension recursively gathers file paths within inDir that match extension. func GetAllFilesWithExtension(inDir string, extension string) []string { var filePaths []string err := filepath.WalkDir(inDir, func(path string, d fs.DirEntry, err error) error { @@ -249,12 +247,12 @@ func GetAllFilesWithExtension(inDir string, extension string) []string { return filePaths } -// Removes standard whitespace characters (space, tab, newline, carriage return) from a given string. +// TrimWhitespace removes spaces, tabs, newlines, and carriage returns from the provided string. func TrimWhitespace(text string) string { return strings.Trim(text, " \t\n\r") } -// Gets all of the values from a given map. +// GetMapValues returns a slice of all map values. func GetMapValues[M ~map[K]V, K comparable, V any](m M) []V { r := make([]V, 0, len(m)) for _, v := range m { @@ -263,7 +261,7 @@ func GetMapValues[M ~map[K]V, K comparable, V any](m M) []V { return r } -// Gets all of the keys from a given map. +// GetMapKeys returns a slice of all map keys. func GetMapKeys[M ~map[K]V, K comparable, V any](m M) []K { r := make([]K, 0, len(m)) for k := range m { @@ -272,12 +270,12 @@ func GetMapKeys[M ~map[K]V, K comparable, V any](m M) []K { return r } -// Creates a regexp with MustCompile() using a sprintf input. +// Regexpf formats and compiles a regular expression pattern using fmt.Sprintf semantics. func Regexpf(format string, vars ...interface{}) *regexp.Regexp { return regexp.MustCompile(fmt.Sprintf(format, vars...)) } -// Attempts to retry running the given error-returning function up to a maximum number of retries, at which point the last error is returned. A callback is called between each retry. +// Retry calls action until it succeeds or exceeds maxRetries, invoking retryCallback between attempts. func Retry(action func() error, maxRetries int, retryCallback func(numRetries int)) error { for retries := 1; ; retries++ { // Perform the action @@ -289,7 +287,7 @@ func Retry(action func() error, maxRetries int, retryCallback func(numRetries in } } -// Get all the available course prefixes +// GetCoursePrefixes retrieves all course prefix values from CourseBook. func GetCoursePrefixes(chromedpCtx context.Context) []string { // Might need to refresh the token every time we get new course prefixes in the future // refreshToken(chromedpCtx) @@ -316,7 +314,7 @@ func GetCoursePrefixes(chromedpCtx context.Context) []string { return coursePrefixes } -// Convert the value of any type to either string or float64 +// ConvertFromInterface attempts to convert a value into the requested type and returns a pointer when successful. func ConvertFromInterface[T string | float64](value any) *T { if parsed, ok := value.(T); ok { return &parsed diff --git a/utils/regexes.go b/utils/regexes.go index dde039e..cbf9ade 100644 --- a/utils/regexes.go +++ b/utils/regexes.go @@ -4,36 +4,36 @@ package utils -// Subject, i.e. HIST +// R_SUBJECT matches a subject prefix such as HIST. const R_SUBJECT string = `[A-Z]{2,4}` -// Course code, i.e. 2252. -// The first digit of a course code is the course level, the second digit is the # of credit hours. +// R_COURSE_CODE matches a four-character course number like 2252 or V001. +// The first digit of a course code is the course level, the second digit is the number of credit hours. const R_COURSE_CODE string = `[0-9vV]{4}` -// Subject + Course, captured +// R_SUBJ_COURSE_CAP captures both subject and course number components. const R_SUBJ_COURSE_CAP string = `([A-Z]{2,4})\s*([0-9vV]{4})` -// Subject + Course, uncaptured +// R_SUBJ_COURSE matches subject and course combinations without capturing groups. const R_SUBJ_COURSE string = `[A-Z]{2,4}\s*[0-9vV]{4}` -// Section code, i.e. 101 +// R_SECTION_CODE matches section identifiers such as 101 or A1. const R_SECTION_CODE string = `[0-9A-z]+` -// Term/Semester code, i.e. 22s +// R_TERM_CODE matches term codes like 22S or 23f. const R_TERM_CODE string = `[0-9]{2}[sufSUF]` -// Grade, i.e. C- +// R_GRADE matches letter grades with optional modifiers, such as C-. const R_GRADE string = `[ABCFabcf][+-]?` -// Date in format, i.e. January 5, 2022 +// R_DATE_MDY matches dates formatted like January 5, 2022. const R_DATE_MDY string = `[A-z]+\s+[0-9]+,\s+[0-9]{4}` -// Day of week, i.e. Monday +// R_WEEKDAY matches full weekday names like Monday or Thursday. const R_WEEKDAY string = `(?:Mon|Tues|Wednes|Thurs|Fri|Satur|Sun)day` -// Time in 12-hour AM/PM format, i.e. 5:22pm +// R_TIME_AM_PM matches 12-hour times such as 5:22pm. const R_TIME_AM_PM string = `[0-9]+:[0-9]+\s*(?:am|pm)` -// Year statuses +// R_YEARS matches class standing descriptors like freshmen or seniors. const R_YEARS string = `(?:freshm[ae]n|sophomores?|juniors?|seniors?)` diff --git a/utils/utils_test.go b/utils/utils_test.go index 94a7bf1..aeb5ba2 100644 --- a/utils/utils_test.go +++ b/utils/utils_test.go @@ -8,6 +8,7 @@ import ( "github.com/joho/godotenv" ) +// TestMain loads environment variables for utils package tests. func TestMain(m *testing.M) { // Load .env vars for testing godotenv.Load("../.env") @@ -15,6 +16,7 @@ func TestMain(m *testing.M) { os.Exit(m.Run()) } +// TestInitChromeDp ensures chromedp contexts initialize in both headed and headless modes. func TestInitChromeDp(t *testing.T) { // Test with head Headless = false @@ -32,6 +34,7 @@ func TestInitChromeDp(t *testing.T) { cancel() } +// TestRefreshToken confirms coursebook tokens refresh under both headless settings. func TestRefreshToken(t *testing.T) { // Get a chromedp context ctx, cancel := InitChromeDp()