Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,7 @@ Run the tool by changing directory using `cd` to the `api-tools` directory and r
| Command | Description |
|---------|-------------|
| `./api-tools -parse -astra` | Parses Astra data. |
| `./api-tools -parse -calendar` | Parses calendar data. |
| `./api-tools -parse -csv [directory]` | Outputs grade data CSVs (default: `./grade-data`). |
| `./api-tools -parse -map` | Parses UTD Map data. |
| `./api-tools -parse -mazevo` | Parses Mazevo data. |
Expand Down
4 changes: 4 additions & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ require (
github.com/valyala/fastjson v1.6.4
go.mongodb.org/mongo-driver v1.17.3
golang.org/x/net v0.36.0
gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c
)

require (
Expand Down Expand Up @@ -57,6 +58,8 @@ require (
github.com/json-iterator/go v1.1.12 // indirect
github.com/klauspost/compress v1.17.8 // indirect
github.com/klauspost/cpuid/v2 v2.2.9 // indirect
github.com/kr/pretty v0.3.1 // indirect
github.com/kr/text v0.2.0 // indirect
github.com/leodido/go-urn v1.4.0 // indirect
github.com/mailru/easyjson v0.9.0 // indirect
github.com/mattn/go-isatty v0.0.20 // indirect
Expand All @@ -65,6 +68,7 @@ require (
github.com/montanaflynn/stats v0.7.1 // indirect
github.com/pelletier/go-toml/v2 v2.2.3 // indirect
github.com/planetscale/vtprotobuf v0.6.1-0.20240319094008-0393e58bdf10 // indirect
github.com/rogpeppe/go-internal v1.13.1 // indirect
github.com/twitchyliquid64/golang-asm v0.15.1 // indirect
github.com/ugorji/go/codec v1.2.12 // indirect
github.com/xdg-go/pbkdf2 v1.0.0 // indirect
Expand Down
6 changes: 6 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ github.com/cloudwego/base64x v0.1.5/go.mod h1:0zlkT4Wn5C6NdauXdJRhSKRlJvmclQ1hhJ
github.com/cloudwego/iasm v0.2.0/go.mod h1:8rXZaNYT2n95jn+zTI1sDr+IgcD2GVs0nlbbQPiEFhY=
github.com/cncf/xds/go v0.0.0-20250121191232-2f005788dc42 h1:Om6kYQYDUk5wWbT0t0q6pvyM49i9XZAv9dDrkDA7gjk=
github.com/cncf/xds/go v0.0.0-20250121191232-2f005788dc42/go.mod h1:W+zGtBO5Y1IgJhy4+A9GOqVhqLpfZi+vwmdNXUehLA8=
github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E=
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
Expand Down Expand Up @@ -121,8 +122,11 @@ github.com/klauspost/cpuid/v2 v2.0.9/go.mod h1:FInQzS24/EEf25PyTYn52gqo7WaD8xa02
github.com/klauspost/cpuid/v2 v2.2.9 h1:66ze0taIn2H33fBvCkXuv9BmCwDfafmiIVpKV9kKGuY=
github.com/klauspost/cpuid/v2 v2.2.9/go.mod h1:rqkxqrZ1EhYM9G+hXH7YdowN5R5RGN6NK4QwQ3WMXF8=
github.com/knz/go-libedit v1.10.1/go.mod h1:MZTVkCWyz0oBc7JOWP3wNAzd002ZbM/5hgShxwh4x8M=
github.com/kr/pretty v0.2.1/go.mod h1:ipq/a2n7PKx3OHsz4KJII5eveXtPO4qwEXGdVfWzfnI=
github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE=
github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk=
github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ=
github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI=
github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE=
github.com/ledongthuc/pdf v0.0.0-20220302134840-0c2507a12d80 h1:6Yzfa6GP0rIo/kULo2bwGEkFvCePZ3qHDDTC3/J9Swo=
Expand All @@ -144,10 +148,12 @@ github.com/orisano/pixelmatch v0.0.0-20220722002657-fb0b55479cde h1:x0TT0RDC7UhA
github.com/orisano/pixelmatch v0.0.0-20220722002657-fb0b55479cde/go.mod h1:nZgzbfBr3hhjoZnS66nKrHmduYNpc34ny7RK4z5/HM0=
github.com/pelletier/go-toml/v2 v2.2.3 h1:YmeHyLY8mFWbdkNWwpr+qIL2bEqT0o95WSdkNHvL12M=
github.com/pelletier/go-toml/v2 v2.2.3/go.mod h1:MfCQTFTvCcUyyvvwm1+G6H/jORL20Xlb6rzQu9GuUkc=
github.com/pkg/diff v0.0.0-20210226163009-20ebb0f2a09e/go.mod h1:pJLUxLENpZxwdsKMEsNbx1VGcRFpLqf3715MtcvvzbA=
github.com/planetscale/vtprotobuf v0.6.1-0.20240319094008-0393e58bdf10 h1:GFCKgmp0tecUJ0sJuv4pzYCqS9+RGSn52M3FUwPs+uo=
github.com/planetscale/vtprotobuf v0.6.1-0.20240319094008-0393e58bdf10/go.mod h1:t/avpk3KcrXxUnYOhZhMXJlSEyie6gQbtLq5NM3loB8=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/rogpeppe/go-internal v1.9.0/go.mod h1:WtVeX8xhTBvf0smdhujwtBcq4Qrzq/fJaraNFVN+nFs=
github.com/rogpeppe/go-internal v1.13.1 h1:KvO1DLK/DRN07sQ1LQKScxyZJuNnedQ5/wKSR38lUII=
github.com/rogpeppe/go-internal v1.13.1/go.mod h1:uMEvuHeurkdAXX61udpOXGD/AzZDWNMNyH2VO9fmH0o=
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
Expand Down
8 changes: 5 additions & 3 deletions main.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,8 +38,8 @@ func main() {
scrapeProfiles := flag.Bool("profiles", false, "Alongside -scrape, signifies that professor profiles should be scraped.")
// Flag for soc scraping
scrapeOrganizations := flag.Bool("organizations", false, "Alongside -scrape, signifies that SOC organizations should be scraped.")
// Flag for calendar scraping
scrapeCalendar := flag.Bool("calendar", false, "Alongside -scrape, signifies that calendar should be scraped.")
// Flag for calendar scraping and parsing
calendar := flag.Bool("calendar", false, "Alongside -scrape or -parse, signifies that calendar should be scraped.")
// Flag for astra scraping and parsing
astra := flag.Bool("astra", false, "Alongside -scrape or -parse, signifies that Astra should be scraped/parsed.")
// Flag for mazevo scraping and parsing
Expand Down Expand Up @@ -106,7 +106,7 @@ func main() {
scrapers.ScrapeCoursebook(*term, *startPrefix, *outDir, *resume)
case *scrapeOrganizations:
scrapers.ScrapeOrganizations(*outDir)
case *scrapeCalendar:
case *calendar:
scrapers.ScrapeCalendar(*outDir)
case *astra:
scrapers.ScrapeAstra(*outDir)
Expand All @@ -119,6 +119,8 @@ func main() {
}
case *parse:
switch {
case *calendar:
parser.ParseCalendar(*inDir, *outDir)
case *astra:
parser.ParseAstra(*inDir, *outDir)
case *mazevo:
Expand Down
241 changes: 241 additions & 0 deletions parser/calendarParser.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,241 @@
package parser

import (
"encoding/json"
"fmt"
"log"
"os"
"regexp"
"slices"
"strings"

"github.com/UTDNebula/api-tools/utils"
"github.com/UTDNebula/nebula-api/api/schema"
)

// Some events have only the building name, not the abbreviation
// Maps building names to their abbreviations
var buildingAbbreviations = map[string]string{
"Activity Center": "AB",
"Activity Center Bookstore": "ACB",
"Administration": "AD",
"Edith and Peter O’Donnell Jr. Athenaeum": "APC",
"Edith O'Donnell Arts and Technology Building": "ATC",
"Lloyd V. Berkner Hall": "BE",
"Bioengineering and Sciences Building": "BSB",
"Classroom Building": "CB",
"Callier Center Richardson": "CR",
"Callier Center Addition": "CRA",
"Davidson-Gundy Alumni Center": "DGA",
"Dining Hall West": "DHW",
"Engineering and Computer Science North": "ECSN",
"Engineering and Computer Science South": "ECSS",
"Engineering and Computer Science West": "ECSW",
"Energy Plant": "EP",
"Founders Annex": "FA",
"Facilities Management": "FM",
"Founders North": "FN",
"Founders Building": "FO",
"Cecil H. Green Hall": "GR",
"Karl Hoblitzelle Hall": "HH",
"Erik Jonsson Academic Center": "JO",
"Naveen Jindal School of Management": "JSOM",
"Eugene McDermott Library": "MC",
"Modular Lab 1": "ML1",
"Modular Lab 2": "ML2",
"North Office Building": "NB",
"North Lab": "NL",
"Police": "PD",
"Physics Annex": "PHA",
"Physics Building": "PHY",
"Natural Science and Engineering Research Lab": "RL",
"Research and Operations Center": "ROC",
"Research and Operations Center West": "ROW",
"Service Building": "SB",
"Sciences Building": "SCI",
"Safety and Grounds": "SG",
"Student Learning Center": "SLC",
"Student Services Building Addition": "SSA",
"Student Services Building": "SSB",
"Student Union": "SU",
"Student Union Food Court": "SUFC",
"Synergy Park North": "SPN",
"Synergy Park North 2": "SP2",
"University Theatre": "TH",
"Visitor Center": "VC",
"Waterview Science and Technology Center": "WSTC",
"Andromeda Hall & University Housing Office": "RHA",
"Capella Hall": "RHC",
"Helix Hall": "RHH",
"Sirius Hall": "RHS",
"Vega Hall": "RHV",
"Recreation Center West": "RCW",
"SP/N Gallery": "SP2",
}

// Valid building abreviations for checking
var validAbbreviations []string = []string{
"AB",
"ACB",
"AD",
"APC",
"ATC",
"BE",
"BSB",
"CB",
"CR",
"CRA",
"DGA",
"DHW",
"ECSN",
"ECSS",
"ECSW",
"EP",
"FA",
"FM",
"FN",
"FO",
"GR",
"HH",
"JO",
"JSOM",
"MC",
"ML1",
"ML2",
"NB",
"NL",
"PD",
"PHA",
"PHY",
"RL",
"ROC",
"ROW",
"SB",
"SCI",
"SG",
"SLC",
"SSA",
"SSB",
"SU",
"SUFC",
"SPN",
"SP2",
"TH",
"VC",
"WSTC",
"RHA",
"RHC",
"RHH",
"RHS",
"RHV",
"RCW",
}

func ParseCalendar(inDir string, outDir string) {

calendarFile, err := os.ReadFile(inDir + "/eventScraped.json")
if err != nil {
panic(err)
}

var allEvents []schema.Event

err = json.Unmarshal(calendarFile, &allEvents)
if err != nil {
panic(err)
}

multiBuildingMap := make(map[string]map[string]map[string][]schema.Event)

for _, event := range(allEvents) {

// Get date
dateTime := event.StartTime
dateTimeString := dateTime.String()
date := dateTimeString[:10]

// Get building and room
location := utils.ConvertFromInterface[string](event.Location)

// Regexp to match building abbreviations and room numbers
buildingRegexp := regexp.MustCompile(`[A-Z]{2,4}`)
roomRegexp := regexp.MustCompile(`([0-9]{1,2}\.[0-9]{3})([A-Z])?`)

building := buildingRegexp.FindString(*location)
room := roomRegexp.FindString(*location)

// buildingRegexp might capture something that isn't a valid building abbreviation (e.g., UTD)
isValidBuilding := slices.Contains(validAbbreviations, building)

// If location doesn't have building abbreviation or buildingRegexp captured an invalid abbreviation,
// check for the full building name
lowercaseLocation := strings.ToLower(*location)
if building == "" || !isValidBuilding {
for key := range buildingAbbreviations {
if strings.Contains(lowercaseLocation, strings.ToLower(key)) {
building = buildingAbbreviations[key]
isValidBuilding = true
}
}
}

// If location doesn't have room number, check to see if location included a room
if room == "" && isValidBuilding {
locationParts := strings.SplitN(*location, ",", 2)
if len(locationParts) == 2 {
room = locationParts[1]
}
}

// If building is still empty string, then location was initally an empty string
// or location was a place off campus
if building == "" {
building = "Other"
}

// If room is still empty string, then location was initally an empty string, or
// location did not include a room, or location was a place off campus
if room == "" {
room = "Other"
}

if _, exists := multiBuildingMap[date]; !exists {
multiBuildingMap[date] = make(map[string]map[string][]schema.Event)
}

if _, exists := multiBuildingMap[date][building]; !exists {
multiBuildingMap[date][building] = make(map[string][]schema.Event)
}

multiBuildingMap[date][building][room] = append(multiBuildingMap[date][building][room], event)
}

var result []schema.MultiBuildingEvents[schema.Event]

for date, buildings := range multiBuildingMap {
var singleBuildings []schema.SingleBuildingEvents[schema.Event]
for building, rooms := range buildings {
var roomEvents []schema.RoomEvents[schema.Event]
for room, events := range rooms {
roomEvents = append(roomEvents, schema.RoomEvents[schema.Event]{
Room: room,
Events: events,
})
}

singleBuildings = append(singleBuildings, schema.SingleBuildingEvents[schema.Event]{
Building: building,
Rooms: roomEvents,
})
}

result = append(result, schema.MultiBuildingEvents[schema.Event]{
Date: date,
Buildings: singleBuildings,
})
}

log.Print("Parsed Calendar!")

utils.WriteJSON(fmt.Sprintf("%s/events.json", outDir), result)
}
2 changes: 1 addition & 1 deletion scrapers/calendar.go
Original file line number Diff line number Diff line change
Expand Up @@ -133,7 +133,7 @@ func ScrapeCalendar(outDir string) {
log.Printf("Parsed the events of page %d successfully!\n\n", page+1)
}

if err := utils.WriteJSON(fmt.Sprintf("%s/events.json", outDir), events); err != nil {
if err := utils.WriteJSON(fmt.Sprintf("%s/eventScraped.json", outDir), events); err != nil {
panic(err)
}
log.Printf("Finished parsing %d events successfully!\n\n", len(events))
Expand Down