Skip to content

Commit

Permalink
Added dynamic retrieval of course prefix list for coursebook scraping (
Browse files Browse the repository at this point in the history
  • Loading branch information
mohammadmehrab authored Oct 17, 2024
1 parent 6b96e8d commit 77c54da
Show file tree
Hide file tree
Showing 3 changed files with 40 additions and 132 deletions.
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
module github.com/UTDNebula/api-tools

go 1.23
go 1.23.0

require (
github.com/PuerkitoBio/goquery v1.8.1
Expand Down
133 changes: 2 additions & 131 deletions scrapers/coursebook.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,8 @@ func ScrapeCoursebook(term string, startPrefix string, outDir string) {
chromedpCtx, cancel := utils.InitChromeDp()
defer cancel()

coursePrefixes := utils.GetCoursePrefixes(chromedpCtx)

// Find index of starting prefix, if one has been given
startPrefixIndex := 0
if startPrefix != "" && startPrefix != coursePrefixes[0] {
Expand Down Expand Up @@ -183,136 +185,5 @@ func ScrapeCoursebook(term string, startPrefix string, outDir string) {
totalSections += sectionsInCoursePrefix
}
log.Printf("\nDone scraping term! Scraped a total of %d sections.", totalSections)
}

var coursePrefixes = []string{
"cp_acct",
"cp_acn",
"cp_acts",
"cp_aero",
"cp_ahst",
"cp_ams",
"cp_arab",
"cp_arhm",
"cp_arts",
"cp_atcm",
"cp_aud",
"cp_ba",
"cp_bbsu",
"cp_bcom",
"cp_biol",
"cp_bis",
"cp_blaw",
"cp_bmen",
"cp_bps",
"cp_buan",
"cp_ce",
"cp_cgs",
"cp_chem",
"cp_chin",
"cp_cldp",
"cp_comd",
"cp_comm",
"cp_crim",
"cp_crwt",
"cp_cs",
"cp_danc",
"cp_econ",
"cp_ecs",
"cp_ecsc",
"cp_ed",
"cp_ee",
"cp_eebm",
"cp_eecs",
"cp_eect",
"cp_eedg",
"cp_eegr",
"cp_eemf",
"cp_eeop",
"cp_eepe",
"cp_eerf",
"cp_eesc",
"cp_engr",
"cp_engy",
"cp_entp",
"cp_envr",
"cp_epcs",
"cp_epps",
"cp_film",
"cp_fin",
"cp_fren",
"cp_ftec",
"cp_geog",
"cp_geos",
"cp_germ",
"cp_gisc",
"cp_govt",
"cp_gst",
"cp_hcs",
"cp_hdcd",
"cp_hist",
"cp_hlth",
"cp_hmgt",
"cp_hons",
"cp_huas",
"cp_huhi",
"cp_huma",
"cp_idea",
"cp_ims",
"cp_ipec",
"cp_isae",
"cp_isah",
"cp_isis",
"cp_isns",
"cp_itss",
"cp_japn",
"cp_kore",
"cp_lang",
"cp_lats",
"cp_lit",
"cp_mais",
"cp_mas",
"cp_math",
"cp_mech",
"cp_meco",
"cp_mils",
"cp_mis",
"cp_mkt",
"cp_msen",
"cp_mthe",
"cp_musi",
"cp_nats",
"cp_nsc",
"cp_ob",
"cp_obhr",
"cp_opre",
"cp_pa",
"cp_phil",
"cp_phin",
"cp_phys",
"cp_ppol",
"cp_pppe",
"cp_psci",
"cp_psy",
"cp_psyc",
"cp_real",
"cp_rels",
"cp_rhet",
"cp_rmis",
"cp_sci",
"cp_se",
"cp_smed",
"cp_soc",
"cp_span",
"cp_spau",
"cp_stat",
"cp_syse",
"cp_sysm",
"cp_te",
"cp_thea",
"cp_univ",
"cp_utd",
"cp_utsw",
"cp_viet",
"cp_vpas",
}
37 changes: 37 additions & 0 deletions utils/methods.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ import (
"strings"

"github.com/chromedp/cdproto/network"
"github.com/chromedp/cdproto/cdp"
"github.com/chromedp/chromedp"
)

Expand Down Expand Up @@ -251,3 +252,39 @@ func RetryHTTP(requestCreator func() *http.Request, client *http.Client, retryCa
}
return res, err
}

func GetCoursePrefixes(chromedpCtx context.Context) []string {
// Refresh the token
// refreshToken(chromedpCtx)

log.Printf("Finding course prefix nodes...")

var coursePrefixes []string
var coursePrefixNodes []*cdp.Node

// Get option elements for course prefix dropdown
err := chromedp.Run(chromedpCtx,
chromedp.Navigate("https://coursebook.utdallas.edu"),
chromedp.Nodes("select#combobox_cp option", &coursePrefixNodes, chromedp.ByQueryAll),
)

if err != nil {
log.Panic(err)
}

log.Println("Found the course prefix nodes!")

log.Println("Finding course prefixes...")

// Remove the first option due to it being empty
coursePrefixNodes = coursePrefixNodes[1:]

// Get the value of each option and append to coursePrefixes
for _, node := range coursePrefixNodes {
coursePrefixes = append(coursePrefixes, node.AttributeValue("value"))
}

log.Println("Found the course prefixes!")

return coursePrefixes
}

0 comments on commit 77c54da

Please sign in to comment.