Skip to content

Commit

Permalink
Added dynamic retrieval of course prefix list for coursebook scraping
Browse files Browse the repository at this point in the history
  • Loading branch information
mohammadmehrab committed Oct 16, 2024
1 parent c1e3cad commit f2d5813
Show file tree
Hide file tree
Showing 3 changed files with 39 additions and 32 deletions.
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
module github.com/UTDNebula/api-tools

go 1.23
go 1.23.0

require (
github.com/PuerkitoBio/goquery v1.8.1
Expand Down
32 changes: 1 addition & 31 deletions scrapers/coursebook.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,37 +28,7 @@ func ScrapeCoursebook(term string, startPrefix string, outDir string) {
chromedpCtx, cancel := utils.InitChromeDp()
defer cancel()

// Refresh the token
refreshToken(chromedpCtx)

log.Printf("Finding course prefix nodes...")

var coursePrefixes []string
var coursePrefixNodes []*cdp.Node

// Get option elements for course prefix dropdown
err := chromedp.Run(chromedpCtx,
chromedp.Navigate("https://coursebook.utdallas.edu"),
chromedp.Nodes("select#combobox_cp option", &coursePrefixNodes, chromedp.ByQueryAll),
)

if err != nil {
log.Panic(err)
}

log.Println("Found the course prefix nodes!")

log.Println("Finding course prefixes...")

// Remove the first option due to it being empty
coursePrefixNodes = coursePrefixNodes[1:]

// Get the value of each option and append to coursePrefixes
for _, node := range coursePrefixNodes {
coursePrefixes = append(coursePrefixes, node.AttributeValue("value"))
}

log.Println("Found the course prefixes!")
coursePrefixes := utils.GetCoursePrefixes(chromedpCtx)

// Find index of starting prefix, if one has been given
startPrefixIndex := 0
Expand Down
37 changes: 37 additions & 0 deletions utils/methods.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ import (
"strings"

"github.com/chromedp/cdproto/network"
"github.com/chromedp/cdproto/cdp"
"github.com/chromedp/chromedp"
)

Expand Down Expand Up @@ -251,3 +252,39 @@ func RetryHTTP(requestCreator func() *http.Request, client *http.Client, retryCa
}
return res, err
}

func GetCoursePrefixes(chromedpCtx context.Context) []string {
// Refresh the token
// refreshToken(chromedpCtx)

log.Printf("Finding course prefix nodes...")

var coursePrefixes []string
var coursePrefixNodes []*cdp.Node

// Get option elements for course prefix dropdown
err := chromedp.Run(chromedpCtx,
chromedp.Navigate("https://coursebook.utdallas.edu"),
chromedp.Nodes("select#combobox_cp option", &coursePrefixNodes, chromedp.ByQueryAll),
)

if err != nil {
log.Panic(err)
}

log.Println("Found the course prefix nodes!")

log.Println("Finding course prefixes...")

// Remove the first option due to it being empty
coursePrefixNodes = coursePrefixNodes[1:]

// Get the value of each option and append to coursePrefixes
for _, node := range coursePrefixNodes {
coursePrefixes = append(coursePrefixes, node.AttributeValue("value"))
}

log.Println("Found the course prefixes!")

return coursePrefixes
}

0 comments on commit f2d5813

Please sign in to comment.