diff --git a/go.mod b/go.mod index e7e353d..f695df8 100644 --- a/go.mod +++ b/go.mod @@ -1,6 +1,6 @@ module github.com/UTDNebula/api-tools -go 1.23 +go 1.23.0 require ( github.com/PuerkitoBio/goquery v1.8.1 diff --git a/scrapers/coursebook.go b/scrapers/coursebook.go index 3b484e4..e9e1f91 100644 --- a/scrapers/coursebook.go +++ b/scrapers/coursebook.go @@ -28,6 +28,8 @@ func ScrapeCoursebook(term string, startPrefix string, outDir string) { chromedpCtx, cancel := utils.InitChromeDp() defer cancel() + coursePrefixes := utils.GetCoursePrefixes(chromedpCtx) + // Find index of starting prefix, if one has been given startPrefixIndex := 0 if startPrefix != "" && startPrefix != coursePrefixes[0] { @@ -183,136 +185,5 @@ func ScrapeCoursebook(term string, startPrefix string, outDir string) { totalSections += sectionsInCoursePrefix } log.Printf("\nDone scraping term! Scraped a total of %d sections.", totalSections) -} -var coursePrefixes = []string{ - "cp_acct", - "cp_acn", - "cp_acts", - "cp_aero", - "cp_ahst", - "cp_ams", - "cp_arab", - "cp_arhm", - "cp_arts", - "cp_atcm", - "cp_aud", - "cp_ba", - "cp_bbsu", - "cp_bcom", - "cp_biol", - "cp_bis", - "cp_blaw", - "cp_bmen", - "cp_bps", - "cp_buan", - "cp_ce", - "cp_cgs", - "cp_chem", - "cp_chin", - "cp_cldp", - "cp_comd", - "cp_comm", - "cp_crim", - "cp_crwt", - "cp_cs", - "cp_danc", - "cp_econ", - "cp_ecs", - "cp_ecsc", - "cp_ed", - "cp_ee", - "cp_eebm", - "cp_eecs", - "cp_eect", - "cp_eedg", - "cp_eegr", - "cp_eemf", - "cp_eeop", - "cp_eepe", - "cp_eerf", - "cp_eesc", - "cp_engr", - "cp_engy", - "cp_entp", - "cp_envr", - "cp_epcs", - "cp_epps", - "cp_film", - "cp_fin", - "cp_fren", - "cp_ftec", - "cp_geog", - "cp_geos", - "cp_germ", - "cp_gisc", - "cp_govt", - "cp_gst", - "cp_hcs", - "cp_hdcd", - "cp_hist", - "cp_hlth", - "cp_hmgt", - "cp_hons", - "cp_huas", - "cp_huhi", - "cp_huma", - "cp_idea", - "cp_ims", - "cp_ipec", - "cp_isae", - "cp_isah", - "cp_isis", - "cp_isns", - "cp_itss", - "cp_japn", - "cp_kore", - "cp_lang", - "cp_lats", - "cp_lit", - "cp_mais", - "cp_mas", - "cp_math", - "cp_mech", - "cp_meco", - "cp_mils", - "cp_mis", - "cp_mkt", - "cp_msen", - "cp_mthe", - "cp_musi", - "cp_nats", - "cp_nsc", - "cp_ob", - "cp_obhr", - "cp_opre", - "cp_pa", - "cp_phil", - "cp_phin", - "cp_phys", - "cp_ppol", - "cp_pppe", - "cp_psci", - "cp_psy", - "cp_psyc", - "cp_real", - "cp_rels", - "cp_rhet", - "cp_rmis", - "cp_sci", - "cp_se", - "cp_smed", - "cp_soc", - "cp_span", - "cp_spau", - "cp_stat", - "cp_syse", - "cp_sysm", - "cp_te", - "cp_thea", - "cp_univ", - "cp_utd", - "cp_utsw", - "cp_viet", - "cp_vpas", } diff --git a/utils/methods.go b/utils/methods.go index a6e42e3..da9193c 100644 --- a/utils/methods.go +++ b/utils/methods.go @@ -19,6 +19,7 @@ import ( "strings" "github.com/chromedp/cdproto/network" + "github.com/chromedp/cdproto/cdp" "github.com/chromedp/chromedp" ) @@ -251,3 +252,39 @@ func RetryHTTP(requestCreator func() *http.Request, client *http.Client, retryCa } return res, err } + +func GetCoursePrefixes(chromedpCtx context.Context) []string { + // Refresh the token + // refreshToken(chromedpCtx) + + log.Printf("Finding course prefix nodes...") + + var coursePrefixes []string + var coursePrefixNodes []*cdp.Node + + // Get option elements for course prefix dropdown + err := chromedp.Run(chromedpCtx, + chromedp.Navigate("https://coursebook.utdallas.edu"), + chromedp.Nodes("select#combobox_cp option", &coursePrefixNodes, chromedp.ByQueryAll), + ) + + if err != nil { + log.Panic(err) + } + + log.Println("Found the course prefix nodes!") + + log.Println("Finding course prefixes...") + + // Remove the first option due to it being empty + coursePrefixNodes = coursePrefixNodes[1:] + + // Get the value of each option and append to coursePrefixes + for _, node := range coursePrefixNodes { + coursePrefixes = append(coursePrefixes, node.AttributeValue("value")) + } + + log.Println("Found the course prefixes!") + + return coursePrefixes +}