From f2d5813dd2dd4c6c33e633f1f26f26a02b21f372 Mon Sep 17 00:00:00 2001 From: mohammadmehrab <108484416+mohammadmehrab@users.noreply.github.com> Date: Wed, 16 Oct 2024 00:43:48 -0500 Subject: [PATCH] Added dynamic retrieval of course prefix list for coursebook scraping --- go.mod | 2 +- scrapers/coursebook.go | 32 +------------------------------- utils/methods.go | 37 +++++++++++++++++++++++++++++++++++++ 3 files changed, 39 insertions(+), 32 deletions(-) diff --git a/go.mod b/go.mod index e7e353d..f695df8 100644 --- a/go.mod +++ b/go.mod @@ -1,6 +1,6 @@ module github.com/UTDNebula/api-tools -go 1.23 +go 1.23.0 require ( github.com/PuerkitoBio/goquery v1.8.1 diff --git a/scrapers/coursebook.go b/scrapers/coursebook.go index ecd28c2..e9e1f91 100644 --- a/scrapers/coursebook.go +++ b/scrapers/coursebook.go @@ -28,37 +28,7 @@ func ScrapeCoursebook(term string, startPrefix string, outDir string) { chromedpCtx, cancel := utils.InitChromeDp() defer cancel() - // Refresh the token - refreshToken(chromedpCtx) - - log.Printf("Finding course prefix nodes...") - - var coursePrefixes []string - var coursePrefixNodes []*cdp.Node - - // Get option elements for course prefix dropdown - err := chromedp.Run(chromedpCtx, - chromedp.Navigate("https://coursebook.utdallas.edu"), - chromedp.Nodes("select#combobox_cp option", &coursePrefixNodes, chromedp.ByQueryAll), - ) - - if err != nil { - log.Panic(err) - } - - log.Println("Found the course prefix nodes!") - - log.Println("Finding course prefixes...") - - // Remove the first option due to it being empty - coursePrefixNodes = coursePrefixNodes[1:] - - // Get the value of each option and append to coursePrefixes - for _, node := range coursePrefixNodes { - coursePrefixes = append(coursePrefixes, node.AttributeValue("value")) - } - - log.Println("Found the course prefixes!") + coursePrefixes := utils.GetCoursePrefixes(chromedpCtx) // Find index of starting prefix, if one has been given startPrefixIndex := 0 diff --git a/utils/methods.go b/utils/methods.go index a6e42e3..da9193c 100644 --- a/utils/methods.go +++ b/utils/methods.go @@ -19,6 +19,7 @@ import ( "strings" "github.com/chromedp/cdproto/network" + "github.com/chromedp/cdproto/cdp" "github.com/chromedp/chromedp" ) @@ -251,3 +252,39 @@ func RetryHTTP(requestCreator func() *http.Request, client *http.Client, retryCa } return res, err } + +func GetCoursePrefixes(chromedpCtx context.Context) []string { + // Refresh the token + // refreshToken(chromedpCtx) + + log.Printf("Finding course prefix nodes...") + + var coursePrefixes []string + var coursePrefixNodes []*cdp.Node + + // Get option elements for course prefix dropdown + err := chromedp.Run(chromedpCtx, + chromedp.Navigate("https://coursebook.utdallas.edu"), + chromedp.Nodes("select#combobox_cp option", &coursePrefixNodes, chromedp.ByQueryAll), + ) + + if err != nil { + log.Panic(err) + } + + log.Println("Found the course prefix nodes!") + + log.Println("Finding course prefixes...") + + // Remove the first option due to it being empty + coursePrefixNodes = coursePrefixNodes[1:] + + // Get the value of each option and append to coursePrefixes + for _, node := range coursePrefixNodes { + coursePrefixes = append(coursePrefixes, node.AttributeValue("value")) + } + + log.Println("Found the course prefixes!") + + return coursePrefixes +}