From d6750ab2ed7a89505d0255894a04991e1a83eb76 Mon Sep 17 00:00:00 2001 From: Tyler Hill Date: Tue, 24 Sep 2024 23:46:21 -0500 Subject: [PATCH 01/10] Log in to Astra --- .env.template | 2 ++ main.go | 4 ++++ scrapers/astra.go | 26 ++++++++++++++++++++++++++ utils/methods.go | 33 +++++++++++++++++++++++++++++++++ 4 files changed, 65 insertions(+) create mode 100644 scrapers/astra.go diff --git a/.env.template b/.env.template index 2ead8de..a560670 100644 --- a/.env.template +++ b/.env.template @@ -1,6 +1,8 @@ #Scrapers LOGIN_NETID= LOGIN_PASSWORD= +LOGIN_ASTRA_USERNAME= +LOGIN_ASTRA_PASSWORD= HEADLESS_MODE=false #Uploader diff --git a/main.go b/main.go index 04e4271..ff46c56 100644 --- a/main.go +++ b/main.go @@ -36,6 +36,8 @@ func main() { scrapeOrganizations := flag.Bool("organizations", false, "Alongside -scrape, signifies that SOC organizations should be scraped.") // Flag for event scraping scrapeEvents := flag.Bool("events", false, "Alongside -scrape, signifies that events should be scraped.") + // Flag for astra scraping + scrapeAstra := flag.Bool("astra", false, "Alongside -scrape, signifies that Astra should be scraped.") // Flags for parsing parse := flag.Bool("parse", false, "Puts the tool into parsing mode.") @@ -92,6 +94,8 @@ func main() { scrapers.ScrapeOrganizations(*outDir) case *scrapeEvents: scrapers.ScrapeEvents(*outDir) + case *scrapeAstra: + scrapers.ScrapeAstra(*outDir) default: log.Panic("You must specify which type of scraping you would like to perform with one of the scraping flags!") } diff --git a/scrapers/astra.go b/scrapers/astra.go new file mode 100644 index 0000000..23794ec --- /dev/null +++ b/scrapers/astra.go @@ -0,0 +1,26 @@ +/* + This file contains the code for the Astra scraper. +*/ + +package scrapers + +import ( + "log" + + "github.com/UTDNebula/api-tools/utils" + "github.com/joho/godotenv" +) + +func ScrapeAstra(outDir string) { + + // Load env vars + if err := godotenv.Load(); err != nil { + log.Panic("Error loading .env file") + } + + // Start chromedp + chromedpCtx, cancel := utils.InitChromeDp() + defer cancel() + + utils.SignInAstra(chromedpCtx) +} diff --git a/utils/methods.go b/utils/methods.go index cebe6b2..12a83af 100644 --- a/utils/methods.go +++ b/utils/methods.go @@ -100,6 +100,39 @@ func RefreshToken(chromedpCtx context.Context) map[string][]string { } } +// This function signs into Astra +func SignInAstra(chromedpCtx context.Context) error { + // Get username and password + username, present := os.LookupEnv("LOGIN_ASTRA_USERNAME") + if !present { + log.Panic("LOGIN_ASTRA_USERNAME is missing from .env!") + } + password, present := os.LookupEnv("LOGIN_ASTRA_PASSWORD") + if !present { + log.Panic("LOGIN_ASTRA_PASSWORD is missing from .env!") + } + + // Sign in + VPrintf("Signing in...") + _, err := chromedp.RunResponse(chromedpCtx, + chromedp.ActionFunc(func(ctx context.Context) error { + err := network.ClearBrowserCookies().Do(ctx) + return err + }), + chromedp.Navigate(`https://www.aaiscloud.com/UTXDallas/logon.aspx?ReturnUrl=%2futxdallas%2fcalendars%2fdailygridcalendar.aspx`), + chromedp.WaitVisible(`input#userNameField-inputEl`), + chromedp.SendKeys(`input#userNameField-inputEl`, username), + chromedp.SendKeys(`input#textfield-1029-inputEl`, password), + chromedp.WaitVisible(`a#logonButton`), + chromedp.Click(`a#logonButton`), + chromedp.WaitVisible(`body`), + ) + if err != nil { + panic(err) + } + return nil +} + // Encodes and writes the given data as tab-indented JSON to the given filepath. func WriteJSON(filepath string, data interface{}) error { fptr, err := os.Create(filepath) From 012f0ded6b2554f09a8032ccb6b4c209ddc7a023 Mon Sep 17 00:00:00 2001 From: Tyler Hill Date: Wed, 25 Sep 2024 19:02:06 -0500 Subject: [PATCH 02/10] Attempt to call backend Astra API --- scrapers/astra.go | 35 ++++++++++++++++++++++++++++++++++- utils/methods.go | 37 ++++++++++++++++++++++++++++++++++++- 2 files changed, 70 insertions(+), 2 deletions(-) diff --git a/scrapers/astra.go b/scrapers/astra.go index 23794ec..3be3fef 100644 --- a/scrapers/astra.go +++ b/scrapers/astra.go @@ -5,7 +5,11 @@ package scrapers import ( + "fmt" "log" + "net/http" + "strings" + "time" "github.com/UTDNebula/api-tools/utils" "github.com/joho/godotenv" @@ -21,6 +25,35 @@ func ScrapeAstra(outDir string) { // Start chromedp chromedpCtx, cancel := utils.InitChromeDp() defer cancel() + fmt.Println("1") + // Init http client + tr := &http.Transport{ + MaxIdleConns: 10, + IdleConnTimeout: 30 * time.Second, + DisableCompression: true, + } + fmt.Println("2") + cli := &http.Client{Transport: tr} + fmt.Println("3") - utils.SignInAstra(chromedpCtx) + /*astraHeaders := */ + utils.RefreshAstraToken(chromedpCtx) + fmt.Println("4") + url := fmt.Sprintf("https://www.aaiscloud.com/UTXDallas/~api/calendar/CalendarWeekGrid?_dc=%d&action=GET", time.Now().UnixMilli()) + body := "start=0&limit=5000&isForWeekView=false&fields=ActivityId%2CActivityPk%2CActivityName%2CParentActivityId%2CParentActivityName%2CMeetingType%2CDescription%2CStartDate%2CEndDate%2CDayOfWeek%2CStartMinute%2CEndMinute%2CActivityTypeCode%2CResourceId%2CCampusName%2CBuildingCode%2CRoomNumber%2CRoomName%2CLocationName%2CInstitutionId%2CSectionId%2CSectionPk%2CIsExam%2CIsCrosslist%2CIsAllDay%2CIsPrivate%2CEventId%2CEventPk%2CCurrentState%2CNotAllowedUsageMask%2CUsageColor%2CUsageColorIsPrimary%2CEventTypeColor%2CMaxAttendance%2CActualAttendance%2CCapacity&filter=(((StartDate%3C%3D%222024-09-26T23%3A00%3A00%22)%26%26(EndDate%3E%3D%222024-09-26T00%3A00%3A00%22))%26%26((((((((Resource.Building.CampusId%20in%20(%2203c9d930-7343-11e9-8a0c-35dcbeb1edcd%22))%26%26(Resource.Regions.Id%20in%20(%223578b3b0-9dab-11e9-bb13-b5bc7e192516%22)))%26%26(Resource.RoomTypeId%20in%20(%22fe74a890-65f8-11e9-991a-ff0e0065dfaa%22)))%26%26(((EventMeetingByActivityId.Event.EventTypeId%20in%20(%221a7720e9-8d19-11e9-b19f-0556148ced27%22%2C%221a7720ea-8d19-11e9-b19f-0556148ced27%22%2C%221a7720eb-8d19-11e9-b19f-0556148ced27%22%2C%221a7720ec-8d19-11e9-b19f-0556148ced27%22%2C%221a7720ed-8d19-11e9-b19f-0556148ced27%22%2C%221a7720ee-8d19-11e9-b19f-0556148ced27%22%2C%221a7720ef-8d19-11e9-b19f-0556148ced27%22%2C%221a7720f0-8d19-11e9-b19f-0556148ced27%22%2C%221a7720f1-8d19-11e9-b19f-0556148ced27%22%2C%221a7720f2-8d19-11e9-b19f-0556148ced27%22%2C%22874f9347-10f4-4367-ab1e-d697b187e9cb%22%2C%221a7720f4-8d19-11e9-b19f-0556148ced27%22%2C%221a7720f5-8d19-11e9-b19f-0556148ced27%22%2C%221a7720f6-8d19-11e9-b19f-0556148ced27%22%2C%221a7720e8-8d19-11e9-b19f-0556148ced27%22%2C%220494ce20-15e1-11ee-9d2b-ff74be387a2d%22%2C%221a7720f8-8d19-11e9-b19f-0556148ced27%22%2C%221a7720f9-8d19-11e9-b19f-0556148ced27%22))%26%26(CurrentState%20in%20(%22Incomplete%22%2C%22Requested%22%2C%22Scheduled%22)))%26%26(ActivityTypeCode%3D%3D2)))%7C%7C((((Resource.Building.CampusId%20in%20(%2203c9d930-7343-11e9-8a0c-35dcbeb1edcd%22))%26%26(Resource.Regions.Id%20in%20(%223578b3b0-9dab-11e9-bb13-b5bc7e192516%22)))%26%26(Resource.RoomTypeId%20in%20(%22fe74a890-65f8-11e9-991a-ff0e0065dfaa%22)))%26%26(ActivityTypeCode%3D%3D1)))%7C%7C(((((Resource.Building.CampusId%20in%20(%2203c9d930-7343-11e9-8a0c-35dcbeb1edcd%22))%26%26(Resource.Regions.Id%20in%20(%223578b3b0-9dab-11e9-bb13-b5bc7e192516%22)))%26%26(Resource.RoomTypeId%20in%20(%22fe74a890-65f8-11e9-991a-ff0e0065dfaa%22)))%26%26(((PrePostMeetingByActivityId.EventMeeting.Event.EventTypeId%20in%20(%221a7720e9-8d19-11e9-b19f-0556148ced27%22%2C%221a7720ea-8d19-11e9-b19f-0556148ced27%22%2C%221a7720eb-8d19-11e9-b19f-0556148ced27%22%2C%221a7720ec-8d19-11e9-b19f-0556148ced27%22%2C%221a7720ed-8d19-11e9-b19f-0556148ced27%22%2C%221a7720ee-8d19-11e9-b19f-0556148ced27%22%2C%221a7720ef-8d19-11e9-b19f-0556148ced27%22%2C%221a7720f0-8d19-11e9-b19f-0556148ced27%22%2C%221a7720f1-8d19-11e9-b19f-0556148ced27%22%2C%221a7720f2-8d19-11e9-b19f-0556148ced27%22%2C%22874f9347-10f4-4367-ab1e-d697b187e9cb%22%2C%221a7720f4-8d19-11e9-b19f-0556148ced27%22%2C%221a7720f5-8d19-11e9-b19f-0556148ced27%22%2C%221a7720f6-8d19-11e9-b19f-0556148ced27%22%2C%221a7720e8-8d19-11e9-b19f-0556148ced27%22%2C%220494ce20-15e1-11ee-9d2b-ff74be387a2d%22%2C%221a7720f8-8d19-11e9-b19f-0556148ced27%22%2C%221a7720f9-8d19-11e9-b19f-0556148ced27%22))%26%26(CurrentState%20in%20(%22Incomplete%22%2C%22Requested%22%2C%22Scheduled%22)))%26%26(ActivityTypeCode%3D%3D252)))%7C%7C((((Resource.Building.CampusId%20in%20(%2203c9d930-7343-11e9-8a0c-35dcbeb1edcd%22))%26%26(Resource.Regions.Id%20in%20(%223578b3b0-9dab-11e9-bb13-b5bc7e192516%22)))%26%26(Resource.RoomTypeId%20in%20(%22fe74a890-65f8-11e9-991a-ff0e0065dfaa%22)))%26%26(((SetupTeardownWindowByActivityId.EventMeeting.Event.EventTypeId%20in%20(%221a7720e9-8d19-11e9-b19f-0556148ced27%22%2C%221a7720ea-8d19-11e9-b19f-0556148ced27%22%2C%221a7720eb-8d19-11e9-b19f-0556148ced27%22%2C%221a7720ec-8d19-11e9-b19f-0556148ced27%22%2C%221a7720ed-8d19-11e9-b19f-0556148ced27%22%2C%221a7720ee-8d19-11e9-b19f-0556148ced27%22%2C%221a7720ef-8d19-11e9-b19f-0556148ced27%22%2C%221a7720f0-8d19-11e9-b19f-0556148ced27%22%2C%221a7720f1-8d19-11e9-b19f-0556148ced27%22%2C%221a7720f2-8d19-11e9-b19f-0556148ced27%22%2C%22874f9347-10f4-4367-ab1e-d697b187e9cb%22%2C%221a7720f4-8d19-11e9-b19f-0556148ced27%22%2C%221a7720f5-8d19-11e9-b19f-0556148ced27%22%2C%221a7720f6-8d19-11e9-b19f-0556148ced27%22%2C%221a7720e8-8d19-11e9-b19f-0556148ced27%22%2C%220494ce20-15e1-11ee-9d2b-ff74be387a2d%22%2C%221a7720f8-8d19-11e9-b19f-0556148ced27%22%2C%221a7720f9-8d19-11e9-b19f-0556148ced27%22))%26%26(CurrentState%20in%20(%22Incomplete%22%2C%22Requested%22%2C%22Scheduled%22)))%26%26(ActivityTypeCode%3D%3D251)))))%7C%7C(((((Resource.Building.CampusId%20in%20(%2203c9d930-7343-11e9-8a0c-35dcbeb1edcd%22))%26%26(Resource.Regions.Id%20in%20(%223578b3b0-9dab-11e9-bb13-b5bc7e192516%22)))%26%26(Resource.RoomTypeId%20in%20(%22fe74a890-65f8-11e9-991a-ff0e0065dfaa%22)))%26%26((ActivityTypeCode%3D%3D9)%26%26(ActivityId%3D%3Dnull)))%7C%7C((ActivityTypeCode%3D%3D356)%7C%7C(ActivityTypeCode%3D%3D357))))%7C%7C(ActivityTypeCode%3D%3D255)))&sortOrder=%2BStartDate%2C%2BStartMinute&page=1&group=%7B%22property%22%3A%22StartDate%22%2C%22direction%22%3A%22ASC%22%7D&sort=%5B%7B%22property%22%3A%22StartDate%22%2C%22direction%22%3A%22ASC%22%7D%2C%7B%22property%22%3A%22StartMinute%22%2C%22direction%22%3A%22ASC%22%7D%5D" + req, err := http.NewRequest("POST", url, strings.NewReader(body)) + if err != nil { + panic(err) + } + fmt.Println("5") + //req.Header = astraHeaders + res, err := cli.Do(req) + if err != nil { + panic(err) + } + fmt.Println("6") + if res.StatusCode != 200 { + log.Panicf("ERROR: Status was: %s\nIf the status is 404, you've likely been IP ratelimited!", res.Status) + } + fmt.Println("7") } diff --git a/utils/methods.go b/utils/methods.go index 12a83af..f9cd66f 100644 --- a/utils/methods.go +++ b/utils/methods.go @@ -101,7 +101,7 @@ func RefreshToken(chromedpCtx context.Context) map[string][]string { } // This function signs into Astra -func SignInAstra(chromedpCtx context.Context) error { +func RefreshAstraToken(chromedpCtx context.Context) error /*map[string][]string*/ { // Get username and password username, present := os.LookupEnv("LOGIN_ASTRA_USERNAME") if !present { @@ -130,6 +130,41 @@ func SignInAstra(chromedpCtx context.Context) error { if err != nil { panic(err) } + + /*var cookieStrs []string + _, err = chromedp.RunResponse(chromedpCtx, + chromedp.Navigate(`https://coursebook.utdallas.edu/`), + chromedp.ActionFunc(func(ctx context.Context) error { + cookies, err := network.GetCookies().Do(ctx) + cookieStrs = make([]string, len(cookies)) + gotToken := false + for i, cookie := range cookies { + cookieStrs[i] = fmt.Sprintf("%s=%s", cookie.Name, cookie.Value) + if cookie.Name == "PTGSESSID" { + VPrintf("Got new token: PTGSESSID = %s", cookie.Value) + gotToken = true + } + } + if !gotToken { + return errors.New("failed to get a new token") + } + return err + }), + ) + if err != nil { + panic(err) + } + + return map[string][]string{ + "Host": {"www.aaiscloud.com"}, + "User-Agent": {"Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/110.0"}, + "Accept": {"/*"}, // add back star + "Accept-Encoding": {"gzip, deflate, br, zstd"}, + "Accept-Language": {"en-US,en;q=0.5"}, + "Content-Type": {"application/x-www-form-urlencoded; charset=UTF-8"}, + "Cookie": cookieStrs, + "Connection": {"keep-alive"}, + }*/ return nil } From bf2503a0ffc87995f7ef53c9f02ea74ba1e5485c Mon Sep 17 00:00:00 2001 From: Tyler Hill Date: Thu, 26 Sep 2024 00:29:38 -0500 Subject: [PATCH 03/10] Uncomment cookie code --- scrapers/astra.go | 14 ++++++-------- utils/methods.go | 15 +++++++-------- 2 files changed, 13 insertions(+), 16 deletions(-) diff --git a/scrapers/astra.go b/scrapers/astra.go index 3be3fef..9c67721 100644 --- a/scrapers/astra.go +++ b/scrapers/astra.go @@ -25,28 +25,26 @@ func ScrapeAstra(outDir string) { // Start chromedp chromedpCtx, cancel := utils.InitChromeDp() defer cancel() - fmt.Println("1") + // Init http client tr := &http.Transport{ MaxIdleConns: 10, IdleConnTimeout: 30 * time.Second, DisableCompression: true, } - fmt.Println("2") cli := &http.Client{Transport: tr} - fmt.Println("3") - /*astraHeaders := */ - utils.RefreshAstraToken(chromedpCtx) - fmt.Println("4") + astraHeaders := utils.RefreshAstraToken(chromedpCtx) + time.Sleep(500 * time.Millisecond) + url := fmt.Sprintf("https://www.aaiscloud.com/UTXDallas/~api/calendar/CalendarWeekGrid?_dc=%d&action=GET", time.Now().UnixMilli()) body := "start=0&limit=5000&isForWeekView=false&fields=ActivityId%2CActivityPk%2CActivityName%2CParentActivityId%2CParentActivityName%2CMeetingType%2CDescription%2CStartDate%2CEndDate%2CDayOfWeek%2CStartMinute%2CEndMinute%2CActivityTypeCode%2CResourceId%2CCampusName%2CBuildingCode%2CRoomNumber%2CRoomName%2CLocationName%2CInstitutionId%2CSectionId%2CSectionPk%2CIsExam%2CIsCrosslist%2CIsAllDay%2CIsPrivate%2CEventId%2CEventPk%2CCurrentState%2CNotAllowedUsageMask%2CUsageColor%2CUsageColorIsPrimary%2CEventTypeColor%2CMaxAttendance%2CActualAttendance%2CCapacity&filter=(((StartDate%3C%3D%222024-09-26T23%3A00%3A00%22)%26%26(EndDate%3E%3D%222024-09-26T00%3A00%3A00%22))%26%26((((((((Resource.Building.CampusId%20in%20(%2203c9d930-7343-11e9-8a0c-35dcbeb1edcd%22))%26%26(Resource.Regions.Id%20in%20(%223578b3b0-9dab-11e9-bb13-b5bc7e192516%22)))%26%26(Resource.RoomTypeId%20in%20(%22fe74a890-65f8-11e9-991a-ff0e0065dfaa%22)))%26%26(((EventMeetingByActivityId.Event.EventTypeId%20in%20(%221a7720e9-8d19-11e9-b19f-0556148ced27%22%2C%221a7720ea-8d19-11e9-b19f-0556148ced27%22%2C%221a7720eb-8d19-11e9-b19f-0556148ced27%22%2C%221a7720ec-8d19-11e9-b19f-0556148ced27%22%2C%221a7720ed-8d19-11e9-b19f-0556148ced27%22%2C%221a7720ee-8d19-11e9-b19f-0556148ced27%22%2C%221a7720ef-8d19-11e9-b19f-0556148ced27%22%2C%221a7720f0-8d19-11e9-b19f-0556148ced27%22%2C%221a7720f1-8d19-11e9-b19f-0556148ced27%22%2C%221a7720f2-8d19-11e9-b19f-0556148ced27%22%2C%22874f9347-10f4-4367-ab1e-d697b187e9cb%22%2C%221a7720f4-8d19-11e9-b19f-0556148ced27%22%2C%221a7720f5-8d19-11e9-b19f-0556148ced27%22%2C%221a7720f6-8d19-11e9-b19f-0556148ced27%22%2C%221a7720e8-8d19-11e9-b19f-0556148ced27%22%2C%220494ce20-15e1-11ee-9d2b-ff74be387a2d%22%2C%221a7720f8-8d19-11e9-b19f-0556148ced27%22%2C%221a7720f9-8d19-11e9-b19f-0556148ced27%22))%26%26(CurrentState%20in%20(%22Incomplete%22%2C%22Requested%22%2C%22Scheduled%22)))%26%26(ActivityTypeCode%3D%3D2)))%7C%7C((((Resource.Building.CampusId%20in%20(%2203c9d930-7343-11e9-8a0c-35dcbeb1edcd%22))%26%26(Resource.Regions.Id%20in%20(%223578b3b0-9dab-11e9-bb13-b5bc7e192516%22)))%26%26(Resource.RoomTypeId%20in%20(%22fe74a890-65f8-11e9-991a-ff0e0065dfaa%22)))%26%26(ActivityTypeCode%3D%3D1)))%7C%7C(((((Resource.Building.CampusId%20in%20(%2203c9d930-7343-11e9-8a0c-35dcbeb1edcd%22))%26%26(Resource.Regions.Id%20in%20(%223578b3b0-9dab-11e9-bb13-b5bc7e192516%22)))%26%26(Resource.RoomTypeId%20in%20(%22fe74a890-65f8-11e9-991a-ff0e0065dfaa%22)))%26%26(((PrePostMeetingByActivityId.EventMeeting.Event.EventTypeId%20in%20(%221a7720e9-8d19-11e9-b19f-0556148ced27%22%2C%221a7720ea-8d19-11e9-b19f-0556148ced27%22%2C%221a7720eb-8d19-11e9-b19f-0556148ced27%22%2C%221a7720ec-8d19-11e9-b19f-0556148ced27%22%2C%221a7720ed-8d19-11e9-b19f-0556148ced27%22%2C%221a7720ee-8d19-11e9-b19f-0556148ced27%22%2C%221a7720ef-8d19-11e9-b19f-0556148ced27%22%2C%221a7720f0-8d19-11e9-b19f-0556148ced27%22%2C%221a7720f1-8d19-11e9-b19f-0556148ced27%22%2C%221a7720f2-8d19-11e9-b19f-0556148ced27%22%2C%22874f9347-10f4-4367-ab1e-d697b187e9cb%22%2C%221a7720f4-8d19-11e9-b19f-0556148ced27%22%2C%221a7720f5-8d19-11e9-b19f-0556148ced27%22%2C%221a7720f6-8d19-11e9-b19f-0556148ced27%22%2C%221a7720e8-8d19-11e9-b19f-0556148ced27%22%2C%220494ce20-15e1-11ee-9d2b-ff74be387a2d%22%2C%221a7720f8-8d19-11e9-b19f-0556148ced27%22%2C%221a7720f9-8d19-11e9-b19f-0556148ced27%22))%26%26(CurrentState%20in%20(%22Incomplete%22%2C%22Requested%22%2C%22Scheduled%22)))%26%26(ActivityTypeCode%3D%3D252)))%7C%7C((((Resource.Building.CampusId%20in%20(%2203c9d930-7343-11e9-8a0c-35dcbeb1edcd%22))%26%26(Resource.Regions.Id%20in%20(%223578b3b0-9dab-11e9-bb13-b5bc7e192516%22)))%26%26(Resource.RoomTypeId%20in%20(%22fe74a890-65f8-11e9-991a-ff0e0065dfaa%22)))%26%26(((SetupTeardownWindowByActivityId.EventMeeting.Event.EventTypeId%20in%20(%221a7720e9-8d19-11e9-b19f-0556148ced27%22%2C%221a7720ea-8d19-11e9-b19f-0556148ced27%22%2C%221a7720eb-8d19-11e9-b19f-0556148ced27%22%2C%221a7720ec-8d19-11e9-b19f-0556148ced27%22%2C%221a7720ed-8d19-11e9-b19f-0556148ced27%22%2C%221a7720ee-8d19-11e9-b19f-0556148ced27%22%2C%221a7720ef-8d19-11e9-b19f-0556148ced27%22%2C%221a7720f0-8d19-11e9-b19f-0556148ced27%22%2C%221a7720f1-8d19-11e9-b19f-0556148ced27%22%2C%221a7720f2-8d19-11e9-b19f-0556148ced27%22%2C%22874f9347-10f4-4367-ab1e-d697b187e9cb%22%2C%221a7720f4-8d19-11e9-b19f-0556148ced27%22%2C%221a7720f5-8d19-11e9-b19f-0556148ced27%22%2C%221a7720f6-8d19-11e9-b19f-0556148ced27%22%2C%221a7720e8-8d19-11e9-b19f-0556148ced27%22%2C%220494ce20-15e1-11ee-9d2b-ff74be387a2d%22%2C%221a7720f8-8d19-11e9-b19f-0556148ced27%22%2C%221a7720f9-8d19-11e9-b19f-0556148ced27%22))%26%26(CurrentState%20in%20(%22Incomplete%22%2C%22Requested%22%2C%22Scheduled%22)))%26%26(ActivityTypeCode%3D%3D251)))))%7C%7C(((((Resource.Building.CampusId%20in%20(%2203c9d930-7343-11e9-8a0c-35dcbeb1edcd%22))%26%26(Resource.Regions.Id%20in%20(%223578b3b0-9dab-11e9-bb13-b5bc7e192516%22)))%26%26(Resource.RoomTypeId%20in%20(%22fe74a890-65f8-11e9-991a-ff0e0065dfaa%22)))%26%26((ActivityTypeCode%3D%3D9)%26%26(ActivityId%3D%3Dnull)))%7C%7C((ActivityTypeCode%3D%3D356)%7C%7C(ActivityTypeCode%3D%3D357))))%7C%7C(ActivityTypeCode%3D%3D255)))&sortOrder=%2BStartDate%2C%2BStartMinute&page=1&group=%7B%22property%22%3A%22StartDate%22%2C%22direction%22%3A%22ASC%22%7D&sort=%5B%7B%22property%22%3A%22StartDate%22%2C%22direction%22%3A%22ASC%22%7D%2C%7B%22property%22%3A%22StartMinute%22%2C%22direction%22%3A%22ASC%22%7D%5D" req, err := http.NewRequest("POST", url, strings.NewReader(body)) if err != nil { panic(err) } - fmt.Println("5") - //req.Header = astraHeaders + + req.Header = astraHeaders res, err := cli.Do(req) if err != nil { panic(err) diff --git a/utils/methods.go b/utils/methods.go index f9cd66f..e25b54f 100644 --- a/utils/methods.go +++ b/utils/methods.go @@ -101,7 +101,7 @@ func RefreshToken(chromedpCtx context.Context) map[string][]string { } // This function signs into Astra -func RefreshAstraToken(chromedpCtx context.Context) error /*map[string][]string*/ { +func RefreshAstraToken(chromedpCtx context.Context) map[string][]string { // Get username and password username, present := os.LookupEnv("LOGIN_ASTRA_USERNAME") if !present { @@ -120,27 +120,27 @@ func RefreshAstraToken(chromedpCtx context.Context) error /*map[string][]string* return err }), chromedp.Navigate(`https://www.aaiscloud.com/UTXDallas/logon.aspx?ReturnUrl=%2futxdallas%2fcalendars%2fdailygridcalendar.aspx`), + //chromedp.Navigate(`https://www.aaiscloud.com/UTXDallas/logon.aspx`), chromedp.WaitVisible(`input#userNameField-inputEl`), chromedp.SendKeys(`input#userNameField-inputEl`, username), chromedp.SendKeys(`input#textfield-1029-inputEl`, password), chromedp.WaitVisible(`a#logonButton`), chromedp.Click(`a#logonButton`), - chromedp.WaitVisible(`body`), ) if err != nil { panic(err) } - /*var cookieStrs []string + var cookieStrs []string _, err = chromedp.RunResponse(chromedpCtx, - chromedp.Navigate(`https://coursebook.utdallas.edu/`), + //chromedp.Navigate(`https://www.aaiscloud.com/UTXDallas/Calendars/DailyGridCalendar.aspx`), chromedp.ActionFunc(func(ctx context.Context) error { cookies, err := network.GetCookies().Do(ctx) cookieStrs = make([]string, len(cookies)) gotToken := false for i, cookie := range cookies { cookieStrs[i] = fmt.Sprintf("%s=%s", cookie.Name, cookie.Value) - if cookie.Name == "PTGSESSID" { + if cookie.Name == "UTXDallas.ASPXFORMSAUTH" { VPrintf("Got new token: PTGSESSID = %s", cookie.Value) gotToken = true } @@ -158,14 +158,13 @@ func RefreshAstraToken(chromedpCtx context.Context) error /*map[string][]string* return map[string][]string{ "Host": {"www.aaiscloud.com"}, "User-Agent": {"Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/110.0"}, - "Accept": {"/*"}, // add back star + "Accept": {"*/*"}, "Accept-Encoding": {"gzip, deflate, br, zstd"}, "Accept-Language": {"en-US,en;q=0.5"}, "Content-Type": {"application/x-www-form-urlencoded; charset=UTF-8"}, "Cookie": cookieStrs, "Connection": {"keep-alive"}, - }*/ - return nil + } } // Encodes and writes the given data as tab-indented JSON to the given filepath. From f1cccc9fc11b0e25e4f1679d1759453988ed9528 Mon Sep 17 00:00:00 2001 From: Tyler Hill Date: Thu, 26 Sep 2024 14:54:56 -0500 Subject: [PATCH 04/10] Successful scrape! TODOs: sorting scrape each day look into login inputting user/pass in wrong sometimes --- scrapers/astra.go | 36 ++++++++++++++++++++++++++++++------ utils/methods.go | 31 +++++++++++++++++++------------ 2 files changed, 49 insertions(+), 18 deletions(-) diff --git a/scrapers/astra.go b/scrapers/astra.go index 9c67721..bec9b99 100644 --- a/scrapers/astra.go +++ b/scrapers/astra.go @@ -6,9 +6,10 @@ package scrapers import ( "fmt" + "io" "log" "net/http" - "strings" + "os" "time" "github.com/UTDNebula/api-tools/utils" @@ -26,6 +27,13 @@ func ScrapeAstra(outDir string) { chromedpCtx, cancel := utils.InitChromeDp() defer cancel() + err := os.MkdirAll(outDir, 0777) + if err != nil { + panic(err) + } + + //days := []string + // Init http client tr := &http.Transport{ MaxIdleConns: 10, @@ -37,9 +45,10 @@ func ScrapeAstra(outDir string) { astraHeaders := utils.RefreshAstraToken(chromedpCtx) time.Sleep(500 * time.Millisecond) - url := fmt.Sprintf("https://www.aaiscloud.com/UTXDallas/~api/calendar/CalendarWeekGrid?_dc=%d&action=GET", time.Now().UnixMilli()) - body := "start=0&limit=5000&isForWeekView=false&fields=ActivityId%2CActivityPk%2CActivityName%2CParentActivityId%2CParentActivityName%2CMeetingType%2CDescription%2CStartDate%2CEndDate%2CDayOfWeek%2CStartMinute%2CEndMinute%2CActivityTypeCode%2CResourceId%2CCampusName%2CBuildingCode%2CRoomNumber%2CRoomName%2CLocationName%2CInstitutionId%2CSectionId%2CSectionPk%2CIsExam%2CIsCrosslist%2CIsAllDay%2CIsPrivate%2CEventId%2CEventPk%2CCurrentState%2CNotAllowedUsageMask%2CUsageColor%2CUsageColorIsPrimary%2CEventTypeColor%2CMaxAttendance%2CActualAttendance%2CCapacity&filter=(((StartDate%3C%3D%222024-09-26T23%3A00%3A00%22)%26%26(EndDate%3E%3D%222024-09-26T00%3A00%3A00%22))%26%26((((((((Resource.Building.CampusId%20in%20(%2203c9d930-7343-11e9-8a0c-35dcbeb1edcd%22))%26%26(Resource.Regions.Id%20in%20(%223578b3b0-9dab-11e9-bb13-b5bc7e192516%22)))%26%26(Resource.RoomTypeId%20in%20(%22fe74a890-65f8-11e9-991a-ff0e0065dfaa%22)))%26%26(((EventMeetingByActivityId.Event.EventTypeId%20in%20(%221a7720e9-8d19-11e9-b19f-0556148ced27%22%2C%221a7720ea-8d19-11e9-b19f-0556148ced27%22%2C%221a7720eb-8d19-11e9-b19f-0556148ced27%22%2C%221a7720ec-8d19-11e9-b19f-0556148ced27%22%2C%221a7720ed-8d19-11e9-b19f-0556148ced27%22%2C%221a7720ee-8d19-11e9-b19f-0556148ced27%22%2C%221a7720ef-8d19-11e9-b19f-0556148ced27%22%2C%221a7720f0-8d19-11e9-b19f-0556148ced27%22%2C%221a7720f1-8d19-11e9-b19f-0556148ced27%22%2C%221a7720f2-8d19-11e9-b19f-0556148ced27%22%2C%22874f9347-10f4-4367-ab1e-d697b187e9cb%22%2C%221a7720f4-8d19-11e9-b19f-0556148ced27%22%2C%221a7720f5-8d19-11e9-b19f-0556148ced27%22%2C%221a7720f6-8d19-11e9-b19f-0556148ced27%22%2C%221a7720e8-8d19-11e9-b19f-0556148ced27%22%2C%220494ce20-15e1-11ee-9d2b-ff74be387a2d%22%2C%221a7720f8-8d19-11e9-b19f-0556148ced27%22%2C%221a7720f9-8d19-11e9-b19f-0556148ced27%22))%26%26(CurrentState%20in%20(%22Incomplete%22%2C%22Requested%22%2C%22Scheduled%22)))%26%26(ActivityTypeCode%3D%3D2)))%7C%7C((((Resource.Building.CampusId%20in%20(%2203c9d930-7343-11e9-8a0c-35dcbeb1edcd%22))%26%26(Resource.Regions.Id%20in%20(%223578b3b0-9dab-11e9-bb13-b5bc7e192516%22)))%26%26(Resource.RoomTypeId%20in%20(%22fe74a890-65f8-11e9-991a-ff0e0065dfaa%22)))%26%26(ActivityTypeCode%3D%3D1)))%7C%7C(((((Resource.Building.CampusId%20in%20(%2203c9d930-7343-11e9-8a0c-35dcbeb1edcd%22))%26%26(Resource.Regions.Id%20in%20(%223578b3b0-9dab-11e9-bb13-b5bc7e192516%22)))%26%26(Resource.RoomTypeId%20in%20(%22fe74a890-65f8-11e9-991a-ff0e0065dfaa%22)))%26%26(((PrePostMeetingByActivityId.EventMeeting.Event.EventTypeId%20in%20(%221a7720e9-8d19-11e9-b19f-0556148ced27%22%2C%221a7720ea-8d19-11e9-b19f-0556148ced27%22%2C%221a7720eb-8d19-11e9-b19f-0556148ced27%22%2C%221a7720ec-8d19-11e9-b19f-0556148ced27%22%2C%221a7720ed-8d19-11e9-b19f-0556148ced27%22%2C%221a7720ee-8d19-11e9-b19f-0556148ced27%22%2C%221a7720ef-8d19-11e9-b19f-0556148ced27%22%2C%221a7720f0-8d19-11e9-b19f-0556148ced27%22%2C%221a7720f1-8d19-11e9-b19f-0556148ced27%22%2C%221a7720f2-8d19-11e9-b19f-0556148ced27%22%2C%22874f9347-10f4-4367-ab1e-d697b187e9cb%22%2C%221a7720f4-8d19-11e9-b19f-0556148ced27%22%2C%221a7720f5-8d19-11e9-b19f-0556148ced27%22%2C%221a7720f6-8d19-11e9-b19f-0556148ced27%22%2C%221a7720e8-8d19-11e9-b19f-0556148ced27%22%2C%220494ce20-15e1-11ee-9d2b-ff74be387a2d%22%2C%221a7720f8-8d19-11e9-b19f-0556148ced27%22%2C%221a7720f9-8d19-11e9-b19f-0556148ced27%22))%26%26(CurrentState%20in%20(%22Incomplete%22%2C%22Requested%22%2C%22Scheduled%22)))%26%26(ActivityTypeCode%3D%3D252)))%7C%7C((((Resource.Building.CampusId%20in%20(%2203c9d930-7343-11e9-8a0c-35dcbeb1edcd%22))%26%26(Resource.Regions.Id%20in%20(%223578b3b0-9dab-11e9-bb13-b5bc7e192516%22)))%26%26(Resource.RoomTypeId%20in%20(%22fe74a890-65f8-11e9-991a-ff0e0065dfaa%22)))%26%26(((SetupTeardownWindowByActivityId.EventMeeting.Event.EventTypeId%20in%20(%221a7720e9-8d19-11e9-b19f-0556148ced27%22%2C%221a7720ea-8d19-11e9-b19f-0556148ced27%22%2C%221a7720eb-8d19-11e9-b19f-0556148ced27%22%2C%221a7720ec-8d19-11e9-b19f-0556148ced27%22%2C%221a7720ed-8d19-11e9-b19f-0556148ced27%22%2C%221a7720ee-8d19-11e9-b19f-0556148ced27%22%2C%221a7720ef-8d19-11e9-b19f-0556148ced27%22%2C%221a7720f0-8d19-11e9-b19f-0556148ced27%22%2C%221a7720f1-8d19-11e9-b19f-0556148ced27%22%2C%221a7720f2-8d19-11e9-b19f-0556148ced27%22%2C%22874f9347-10f4-4367-ab1e-d697b187e9cb%22%2C%221a7720f4-8d19-11e9-b19f-0556148ced27%22%2C%221a7720f5-8d19-11e9-b19f-0556148ced27%22%2C%221a7720f6-8d19-11e9-b19f-0556148ced27%22%2C%221a7720e8-8d19-11e9-b19f-0556148ced27%22%2C%220494ce20-15e1-11ee-9d2b-ff74be387a2d%22%2C%221a7720f8-8d19-11e9-b19f-0556148ced27%22%2C%221a7720f9-8d19-11e9-b19f-0556148ced27%22))%26%26(CurrentState%20in%20(%22Incomplete%22%2C%22Requested%22%2C%22Scheduled%22)))%26%26(ActivityTypeCode%3D%3D251)))))%7C%7C(((((Resource.Building.CampusId%20in%20(%2203c9d930-7343-11e9-8a0c-35dcbeb1edcd%22))%26%26(Resource.Regions.Id%20in%20(%223578b3b0-9dab-11e9-bb13-b5bc7e192516%22)))%26%26(Resource.RoomTypeId%20in%20(%22fe74a890-65f8-11e9-991a-ff0e0065dfaa%22)))%26%26((ActivityTypeCode%3D%3D9)%26%26(ActivityId%3D%3Dnull)))%7C%7C((ActivityTypeCode%3D%3D356)%7C%7C(ActivityTypeCode%3D%3D357))))%7C%7C(ActivityTypeCode%3D%3D255)))&sortOrder=%2BStartDate%2C%2BStartMinute&page=1&group=%7B%22property%22%3A%22StartDate%22%2C%22direction%22%3A%22ASC%22%7D&sort=%5B%7B%22property%22%3A%22StartDate%22%2C%22direction%22%3A%22ASC%22%7D%2C%7B%22property%22%3A%22StartMinute%22%2C%22direction%22%3A%22ASC%22%7D%5D" - req, err := http.NewRequest("POST", url, strings.NewReader(body)) + //Request daily events + date := time.Now().Format("2006-01-02") + url := fmt.Sprintf("https://www.aaiscloud.com/UTXDallas/~api/calendar/CalendarWeekGrid?_dc=%d&action=GET&start=0&limit=5000&isForWeekView=false&fields=ActivityId,ActivityPk,ActivityName,ParentActivityId,ParentActivityName,MeetingType,Description,StartDate,EndDate,DayOfWeek,StartMinute,EndMinute,ActivityTypeCode,ResourceId,CampusName,BuildingCode,RoomNumber,RoomName,LocationName,InstitutionId,SectionId,SectionPk,IsExam,IsCrosslist,IsAllDay,IsPrivate,EventId,EventPk,CurrentState,NotAllowedUsageMask,UsageColor,UsageColorIsPrimary,EventTypeColor,MaxAttendance,ActualAttendance,Capacity&filter=(StartDate%%3C%%3D%%22%sT23%%3A00%%3A00%%22)%%26%%26(EndDate%%3E%%3D%%22%sT00%%3A00%%3A00%%22)&page=1", time.Now().UnixMilli(), date, date) + req, err := http.NewRequest("GET", url, nil) if err != nil { panic(err) } @@ -49,9 +58,24 @@ func ScrapeAstra(outDir string) { if err != nil { panic(err) } - fmt.Println("6") if res.StatusCode != 200 { log.Panicf("ERROR: Status was: %s\nIf the status is 404, you've likely been IP ratelimited!", res.Status) } - fmt.Println("7") + + defer res.Body.Close() + body, err := io.ReadAll(res.Body) + if err != nil { + panic(err) + } + + // Write event data to output file + fptr, err := os.Create(fmt.Sprintf("%s/reservations.json", outDir)) + if err != nil { + panic(err) + } + _, err = fptr.Write(body) + if err != nil { + panic(err) + } + fptr.Close() } diff --git a/utils/methods.go b/utils/methods.go index e25b54f..37c8691 100644 --- a/utils/methods.go +++ b/utils/methods.go @@ -126,20 +126,22 @@ func RefreshAstraToken(chromedpCtx context.Context) map[string][]string { chromedp.SendKeys(`input#textfield-1029-inputEl`, password), chromedp.WaitVisible(`a#logonButton`), chromedp.Click(`a#logonButton`), + chromedp.WaitVisible(`body`, chromedp.ByQuery), ) if err != nil { panic(err) } - var cookieStrs []string + cookieStr := "" _, err = chromedp.RunResponse(chromedpCtx, //chromedp.Navigate(`https://www.aaiscloud.com/UTXDallas/Calendars/DailyGridCalendar.aspx`), + chromedp.WaitVisible(`body`, chromedp.ByQuery), chromedp.ActionFunc(func(ctx context.Context) error { cookies, err := network.GetCookies().Do(ctx) - cookieStrs = make([]string, len(cookies)) gotToken := false - for i, cookie := range cookies { - cookieStrs[i] = fmt.Sprintf("%s=%s", cookie.Name, cookie.Value) + for _, cookie := range cookies { + cookieStr = fmt.Sprintf("%s%s=%s; ", cookieStr, cookie.Name, cookie.Value) + //log.Println(cookieStr) if cookie.Name == "UTXDallas.ASPXFORMSAUTH" { VPrintf("Got new token: PTGSESSID = %s", cookie.Value) gotToken = true @@ -156,14 +158,19 @@ func RefreshAstraToken(chromedpCtx context.Context) map[string][]string { } return map[string][]string{ - "Host": {"www.aaiscloud.com"}, - "User-Agent": {"Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/110.0"}, - "Accept": {"*/*"}, - "Accept-Encoding": {"gzip, deflate, br, zstd"}, - "Accept-Language": {"en-US,en;q=0.5"}, - "Content-Type": {"application/x-www-form-urlencoded; charset=UTF-8"}, - "Cookie": cookieStrs, - "Connection": {"keep-alive"}, + "Host": {"www.aaiscloud.com"}, + "User-Agent": {"Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/110.0"}, + "Accept": {"text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/png,image/svg+xml,*/*;q=0.8"}, + "Accept-Language": {"en-US,en;q=0.5"}, + "Accept-Encoding": {"gzip, deflate, br, zstd"}, + "Connection": {"keep-alive"}, + "Cookie": {cookieStr}, + "Upgrade-Insecure-Requests": {"1"}, + "Sec-Fetch-Dest": {"document"}, + "Sec-Fetch-Mode": {"navigate"}, + "Sec-Fetch-Site": {"none"}, + "Sec-Fetch-User": {"?1"}, + "Priority": {"u=0, i"}, } } From b9b4bdab936de806c04e6edc150382fe01c20244 Mon Sep 17 00:00:00 2001 From: Tyler Hill Date: Thu, 26 Sep 2024 18:22:44 -0500 Subject: [PATCH 05/10] Request in loop --- scrapers/astra.go | 57 ++++++++++++++++++++++++++++++----------------- utils/methods.go | 5 ++--- 2 files changed, 38 insertions(+), 24 deletions(-) diff --git a/scrapers/astra.go b/scrapers/astra.go index bec9b99..6df4fec 100644 --- a/scrapers/astra.go +++ b/scrapers/astra.go @@ -32,7 +32,8 @@ func ScrapeAstra(outDir string) { panic(err) } - //days := []string + days := "{" + firstLoop := true // Init http client tr := &http.Transport{ @@ -45,35 +46,49 @@ func ScrapeAstra(outDir string) { astraHeaders := utils.RefreshAstraToken(chromedpCtx) time.Sleep(500 * time.Millisecond) - //Request daily events - date := time.Now().Format("2006-01-02") - url := fmt.Sprintf("https://www.aaiscloud.com/UTXDallas/~api/calendar/CalendarWeekGrid?_dc=%d&action=GET&start=0&limit=5000&isForWeekView=false&fields=ActivityId,ActivityPk,ActivityName,ParentActivityId,ParentActivityName,MeetingType,Description,StartDate,EndDate,DayOfWeek,StartMinute,EndMinute,ActivityTypeCode,ResourceId,CampusName,BuildingCode,RoomNumber,RoomName,LocationName,InstitutionId,SectionId,SectionPk,IsExam,IsCrosslist,IsAllDay,IsPrivate,EventId,EventPk,CurrentState,NotAllowedUsageMask,UsageColor,UsageColorIsPrimary,EventTypeColor,MaxAttendance,ActualAttendance,Capacity&filter=(StartDate%%3C%%3D%%22%sT23%%3A00%%3A00%%22)%%26%%26(EndDate%%3E%%3D%%22%sT00%%3A00%%3A00%%22)&page=1", time.Now().UnixMilli(), date, date) - req, err := http.NewRequest("GET", url, nil) - if err != nil { - panic(err) - } + //Starting date + date := time.Now() - req.Header = astraHeaders - res, err := cli.Do(req) - if err != nil { - panic(err) - } - if res.StatusCode != 200 { - log.Panicf("ERROR: Status was: %s\nIf the status is 404, you've likely been IP ratelimited!", res.Status) - } + for i := 0; i < 10; i++ { - defer res.Body.Close() - body, err := io.ReadAll(res.Body) - if err != nil { - panic(err) + //Request daily events + formattedDate := date.Format("2006-01-02") + url := fmt.Sprintf("https://www.aaiscloud.com/UTXDallas/~api/calendar/CalendarWeekGrid?_dc=%d&action=GET&start=0&limit=5000&isForWeekView=false&fields=ActivityId,ActivityPk,ActivityName,ParentActivityId,ParentActivityName,MeetingType,Description,StartDate,EndDate,DayOfWeek,StartMinute,EndMinute,ActivityTypeCode,ResourceId,CampusName,BuildingCode,RoomNumber,RoomName,LocationName,InstitutionId,SectionId,SectionPk,IsExam,IsCrosslist,IsAllDay,IsPrivate,EventId,EventPk,CurrentState,NotAllowedUsageMask,UsageColor,UsageColorIsPrimary,EventTypeColor,MaxAttendance,ActualAttendance,Capacity&filter=(StartDate%%3C%%3D%%22%sT23%%3A00%%3A00%%22)%%26%%26(EndDate%%3E%%3D%%22%sT00%%3A00%%3A00%%22)&page=1", time.Now().UnixMilli(), formattedDate, formattedDate) + req, err := http.NewRequest("GET", url, nil) + if err != nil { + panic(err) + } + req.Header = astraHeaders + res, err := cli.Do(req) + if err != nil { + panic(err) + } + if res.StatusCode != 200 { + log.Panicf("ERROR: Status was: %s\nIf the status is 404, you've likely been IP ratelimited!", res.Status) + } + + //Save to days JSON + defer res.Body.Close() + body, err := io.ReadAll(res.Body) + if err != nil { + panic(err) + } + comma := "," + if firstLoop { + comma = "" + firstLoop = false + } + days = fmt.Sprintf("%s%s\"%s\":%s", days, comma, formattedDate, string(body)) + date = date.Add(time.Hour * 24) } // Write event data to output file + days = fmt.Sprintf("%s}", days) fptr, err := os.Create(fmt.Sprintf("%s/reservations.json", outDir)) if err != nil { panic(err) } - _, err = fptr.Write(body) + _, err = fptr.Write([]byte(days)) if err != nil { panic(err) } diff --git a/utils/methods.go b/utils/methods.go index 37c8691..442b16f 100644 --- a/utils/methods.go +++ b/utils/methods.go @@ -120,7 +120,6 @@ func RefreshAstraToken(chromedpCtx context.Context) map[string][]string { return err }), chromedp.Navigate(`https://www.aaiscloud.com/UTXDallas/logon.aspx?ReturnUrl=%2futxdallas%2fcalendars%2fdailygridcalendar.aspx`), - //chromedp.Navigate(`https://www.aaiscloud.com/UTXDallas/logon.aspx`), chromedp.WaitVisible(`input#userNameField-inputEl`), chromedp.SendKeys(`input#userNameField-inputEl`, username), chromedp.SendKeys(`input#textfield-1029-inputEl`, password), @@ -132,16 +131,15 @@ func RefreshAstraToken(chromedpCtx context.Context) map[string][]string { panic(err) } + //Save all cookies to string cookieStr := "" _, err = chromedp.RunResponse(chromedpCtx, - //chromedp.Navigate(`https://www.aaiscloud.com/UTXDallas/Calendars/DailyGridCalendar.aspx`), chromedp.WaitVisible(`body`, chromedp.ByQuery), chromedp.ActionFunc(func(ctx context.Context) error { cookies, err := network.GetCookies().Do(ctx) gotToken := false for _, cookie := range cookies { cookieStr = fmt.Sprintf("%s%s=%s; ", cookieStr, cookie.Name, cookie.Value) - //log.Println(cookieStr) if cookie.Name == "UTXDallas.ASPXFORMSAUTH" { VPrintf("Got new token: PTGSESSID = %s", cookie.Value) gotToken = true @@ -157,6 +155,7 @@ func RefreshAstraToken(chromedpCtx context.Context) map[string][]string { panic(err) } + //Return headers, copied from a request the actual site made return map[string][]string{ "Host": {"www.aaiscloud.com"}, "User-Agent": {"Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/110.0"}, From 90a97e502f2aaa3a7dffc7075ee954e636e4e61d Mon Sep 17 00:00:00 2001 From: Tyler Hill Date: Thu, 26 Sep 2024 23:28:38 -0500 Subject: [PATCH 06/10] Scrape until 90 days of less than 10 events After this and next semester seems there's only ever 2 events, one in FO 3.616 with no time (?) and one with no location that always shows up after the current semester and says either the holiday and "Events for Future Terms" as well as "No Events Allowed". This just scrapes 90 days into that, stops at about a year and 2 months out. --- go.mod | 1 + go.sum | 2 ++ scrapers/astra.go | 34 ++++++++++++++++++++++++++-------- utils/methods.go | 4 ++-- 4 files changed, 31 insertions(+), 10 deletions(-) diff --git a/go.mod b/go.mod index d6acab6..e7e353d 100644 --- a/go.mod +++ b/go.mod @@ -8,6 +8,7 @@ require ( github.com/chromedp/cdproto v0.0.0-20240801214329-3f85d328b335 github.com/chromedp/chromedp v0.10.0 github.com/joho/godotenv v1.5.1 + github.com/valyala/fastjson v1.6.4 go.mongodb.org/mongo-driver v1.15.0 ) diff --git a/go.sum b/go.sum index 8fbfdd0..bab34d8 100644 --- a/go.sum +++ b/go.sum @@ -100,6 +100,8 @@ github.com/twitchyliquid64/golang-asm v0.15.1 h1:SU5vSMR7hnwNxj24w34ZyCi/FmDZTkS github.com/twitchyliquid64/golang-asm v0.15.1/go.mod h1:a1lVb/DtPvCB8fslRZhAngC2+aY1QWCk3Cedj/Gdt08= github.com/ugorji/go/codec v1.2.12 h1:9LC83zGrHhuUA9l16C9AHXAqEV/2wBQ4nkvumAE65EE= github.com/ugorji/go/codec v1.2.12/go.mod h1:UNopzCgEMSXjBc6AOMqYvWC1ktqTAfzJZUZgYf6w6lg= +github.com/valyala/fastjson v1.6.4 h1:uAUNq9Z6ymTgGhcm0UynUAB6tlbakBrz6CQFax3BXVQ= +github.com/valyala/fastjson v1.6.4/go.mod h1:CLCAqky6SMuOcxStkYQvblddUtoRxhYMGLrsQns1aXY= github.com/xdg-go/pbkdf2 v1.0.0 h1:Su7DPu48wXMwC3bs7MCNG+z4FhcyEuz5dlvchbq0B0c= github.com/xdg-go/pbkdf2 v1.0.0/go.mod h1:jrpuAogTd400dnrH08LKmI/xc1MbPOebTwRqcT5RDeI= github.com/xdg-go/scram v1.1.2 h1:FHX5I5B4i4hKRVRBCFRxq1iQRej7WO3hhBuJf+UUySY= diff --git a/scrapers/astra.go b/scrapers/astra.go index 6df4fec..9c9c01a 100644 --- a/scrapers/astra.go +++ b/scrapers/astra.go @@ -14,6 +14,7 @@ import ( "github.com/UTDNebula/api-tools/utils" "github.com/joho/godotenv" + "github.com/valyala/fastjson" ) func ScrapeAstra(outDir string) { @@ -27,13 +28,14 @@ func ScrapeAstra(outDir string) { chromedpCtx, cancel := utils.InitChromeDp() defer cancel() + // Make output folder err := os.MkdirAll(outDir, 0777) if err != nil { panic(err) } - days := "{" - firstLoop := true + days := "{" // String JSON for storing results by day + firstLoop := true // To avoid adding a comma to the JSON on the first loop // Init http client tr := &http.Transport{ @@ -46,13 +48,18 @@ func ScrapeAstra(outDir string) { astraHeaders := utils.RefreshAstraToken(chromedpCtx) time.Sleep(500 * time.Millisecond) - //Starting date + // Starting date date := time.Now() - for i := 0; i < 10; i++ { + // Stop condition + lt10EventsCount := 0 - //Request daily events + // Run until 90 days of no events + for lt10EventsCount < 90 { formattedDate := date.Format("2006-01-02") + log.Printf("Scraping %s...", formattedDate) + + // Request daily events url := fmt.Sprintf("https://www.aaiscloud.com/UTXDallas/~api/calendar/CalendarWeekGrid?_dc=%d&action=GET&start=0&limit=5000&isForWeekView=false&fields=ActivityId,ActivityPk,ActivityName,ParentActivityId,ParentActivityName,MeetingType,Description,StartDate,EndDate,DayOfWeek,StartMinute,EndMinute,ActivityTypeCode,ResourceId,CampusName,BuildingCode,RoomNumber,RoomName,LocationName,InstitutionId,SectionId,SectionPk,IsExam,IsCrosslist,IsAllDay,IsPrivate,EventId,EventPk,CurrentState,NotAllowedUsageMask,UsageColor,UsageColorIsPrimary,EventTypeColor,MaxAttendance,ActualAttendance,Capacity&filter=(StartDate%%3C%%3D%%22%sT23%%3A00%%3A00%%22)%%26%%26(EndDate%%3E%%3D%%22%sT00%%3A00%%3A00%%22)&page=1", time.Now().UnixMilli(), formattedDate, formattedDate) req, err := http.NewRequest("GET", url, nil) if err != nil { @@ -66,19 +73,30 @@ func ScrapeAstra(outDir string) { if res.StatusCode != 200 { log.Panicf("ERROR: Status was: %s\nIf the status is 404, you've likely been IP ratelimited!", res.Status) } - - //Save to days JSON defer res.Body.Close() body, err := io.ReadAll(res.Body) if err != nil { panic(err) } + stringBody := string(body) + + // Check for no events + if fastjson.GetInt(body, "totalRecords") < 10 { + lt10EventsCount += 1 + if lt10EventsCount > 30 { + log.Printf("There have been %d days in a row with fewer than 10 events.", lt10EventsCount) + } + } else { + lt10EventsCount = 0 + } + + // Add to record comma := "," if firstLoop { comma = "" firstLoop = false } - days = fmt.Sprintf("%s%s\"%s\":%s", days, comma, formattedDate, string(body)) + days = fmt.Sprintf("%s%s\"%s\":%s", days, comma, formattedDate, stringBody) date = date.Add(time.Hour * 24) } diff --git a/utils/methods.go b/utils/methods.go index 442b16f..5ab5ee7 100644 --- a/utils/methods.go +++ b/utils/methods.go @@ -131,7 +131,7 @@ func RefreshAstraToken(chromedpCtx context.Context) map[string][]string { panic(err) } - //Save all cookies to string + // Save all cookies to string cookieStr := "" _, err = chromedp.RunResponse(chromedpCtx, chromedp.WaitVisible(`body`, chromedp.ByQuery), @@ -155,7 +155,7 @@ func RefreshAstraToken(chromedpCtx context.Context) map[string][]string { panic(err) } - //Return headers, copied from a request the actual site made + // Return headers, copied from a request the actual site made return map[string][]string{ "Host": {"www.aaiscloud.com"}, "User-Agent": {"Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/110.0"}, From 5ed69079dfd888cf656ddcd991f00b662b4914b9 Mon Sep 17 00:00:00 2001 From: Tyler Hill Date: Thu, 26 Sep 2024 23:46:46 -0500 Subject: [PATCH 07/10] Sort by start time --- scrapers/astra.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scrapers/astra.go b/scrapers/astra.go index 9c9c01a..2cd5c3e 100644 --- a/scrapers/astra.go +++ b/scrapers/astra.go @@ -60,7 +60,7 @@ func ScrapeAstra(outDir string) { log.Printf("Scraping %s...", formattedDate) // Request daily events - url := fmt.Sprintf("https://www.aaiscloud.com/UTXDallas/~api/calendar/CalendarWeekGrid?_dc=%d&action=GET&start=0&limit=5000&isForWeekView=false&fields=ActivityId,ActivityPk,ActivityName,ParentActivityId,ParentActivityName,MeetingType,Description,StartDate,EndDate,DayOfWeek,StartMinute,EndMinute,ActivityTypeCode,ResourceId,CampusName,BuildingCode,RoomNumber,RoomName,LocationName,InstitutionId,SectionId,SectionPk,IsExam,IsCrosslist,IsAllDay,IsPrivate,EventId,EventPk,CurrentState,NotAllowedUsageMask,UsageColor,UsageColorIsPrimary,EventTypeColor,MaxAttendance,ActualAttendance,Capacity&filter=(StartDate%%3C%%3D%%22%sT23%%3A00%%3A00%%22)%%26%%26(EndDate%%3E%%3D%%22%sT00%%3A00%%3A00%%22)&page=1", time.Now().UnixMilli(), formattedDate, formattedDate) + url := fmt.Sprintf("https://www.aaiscloud.com/UTXDallas/~api/calendar/CalendarWeekGrid?_dc=%d&action=GET&start=0&limit=5000&isForWeekView=false&fields=ActivityId,ActivityPk,ActivityName,ParentActivityId,ParentActivityName,MeetingType,Description,StartDate,EndDate,DayOfWeek,StartMinute,EndMinute,ActivityTypeCode,ResourceId,CampusName,BuildingCode,RoomNumber,RoomName,LocationName,InstitutionId,SectionId,SectionPk,IsExam,IsCrosslist,IsAllDay,IsPrivate,EventId,EventPk,CurrentState,NotAllowedUsageMask,UsageColor,UsageColorIsPrimary,EventTypeColor,MaxAttendance,ActualAttendance,Capacity&filter=(StartDate%%3C%%3D%%22%sT23%%3A00%%3A00%%22)%%26%%26(EndDate%%3E%%3D%%22%sT00%%3A00%%3A00%%22)&page=1&sortOrder=%%2BStartDate,%%2BStartMinute", time.Now().UnixMilli(), formattedDate, formattedDate) req, err := http.NewRequest("GET", url, nil) if err != nil { panic(err) From 12cd58d4957ea935f94239def0c2be6e92621dc8 Mon Sep 17 00:00:00 2001 From: Tyler Hill Date: Sat, 28 Sep 2024 15:03:09 -0500 Subject: [PATCH 08/10] Check if max events exceeded --- scrapers/astra.go | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/scrapers/astra.go b/scrapers/astra.go index 2cd5c3e..8fa40ee 100644 --- a/scrapers/astra.go +++ b/scrapers/astra.go @@ -17,6 +17,8 @@ import ( "github.com/valyala/fastjson" ) +var MAX_EVENTS_PER_DAY = 5000 + func ScrapeAstra(outDir string) { // Load env vars @@ -60,7 +62,7 @@ func ScrapeAstra(outDir string) { log.Printf("Scraping %s...", formattedDate) // Request daily events - url := fmt.Sprintf("https://www.aaiscloud.com/UTXDallas/~api/calendar/CalendarWeekGrid?_dc=%d&action=GET&start=0&limit=5000&isForWeekView=false&fields=ActivityId,ActivityPk,ActivityName,ParentActivityId,ParentActivityName,MeetingType,Description,StartDate,EndDate,DayOfWeek,StartMinute,EndMinute,ActivityTypeCode,ResourceId,CampusName,BuildingCode,RoomNumber,RoomName,LocationName,InstitutionId,SectionId,SectionPk,IsExam,IsCrosslist,IsAllDay,IsPrivate,EventId,EventPk,CurrentState,NotAllowedUsageMask,UsageColor,UsageColorIsPrimary,EventTypeColor,MaxAttendance,ActualAttendance,Capacity&filter=(StartDate%%3C%%3D%%22%sT23%%3A00%%3A00%%22)%%26%%26(EndDate%%3E%%3D%%22%sT00%%3A00%%3A00%%22)&page=1&sortOrder=%%2BStartDate,%%2BStartMinute", time.Now().UnixMilli(), formattedDate, formattedDate) + url := fmt.Sprintf("https://www.aaiscloud.com/UTXDallas/~api/calendar/CalendarWeekGrid?_dc=%d&action=GET&start=0&limit=%d&isForWeekView=false&fields=ActivityId,ActivityPk,ActivityName,ParentActivityId,ParentActivityName,MeetingType,Description,StartDate,EndDate,DayOfWeek,StartMinute,EndMinute,ActivityTypeCode,ResourceId,CampusName,BuildingCode,RoomNumber,RoomName,LocationName,InstitutionId,SectionId,SectionPk,IsExam,IsCrosslist,IsAllDay,IsPrivate,EventId,EventPk,CurrentState,NotAllowedUsageMask,UsageColor,UsageColorIsPrimary,EventTypeColor,MaxAttendance,ActualAttendance,Capacity&filter=(StartDate%%3C%%3D%%22%sT23%%3A00%%3A00%%22)%%26%%26(EndDate%%3E%%3D%%22%sT00%%3A00%%3A00%%22)&page=1&sortOrder=%%2BStartDate,%%2BStartMinute", time.Now().UnixMilli(), MAX_EVENTS_PER_DAY, formattedDate, formattedDate) req, err := http.NewRequest("GET", url, nil) if err != nil { panic(err) @@ -81,7 +83,11 @@ func ScrapeAstra(outDir string) { stringBody := string(body) // Check for no events - if fastjson.GetInt(body, "totalRecords") < 10 { + numEvents := fastjson.GetInt(body, "totalRecords") + if numEvents >= MAX_EVENTS_PER_DAY { + log.Panic("ERROR: Max events per day exceeded!") + } + if numEvents < 10 { lt10EventsCount += 1 if lt10EventsCount > 30 { log.Printf("There have been %d days in a row with fewer than 10 events.", lt10EventsCount) From 6b98eb5d8a1a93c30ff77e19f645533bde426393 Mon Sep 17 00:00:00 2001 From: Tyler Hill Date: Sat, 28 Sep 2024 15:06:42 -0500 Subject: [PATCH 09/10] Run close commands when not needed Closes chromedp when not necessary --- scrapers/astra.go | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/scrapers/astra.go b/scrapers/astra.go index 8fa40ee..e0e5d56 100644 --- a/scrapers/astra.go +++ b/scrapers/astra.go @@ -28,7 +28,6 @@ func ScrapeAstra(outDir string) { // Start chromedp chromedpCtx, cancel := utils.InitChromeDp() - defer cancel() // Make output folder err := os.MkdirAll(outDir, 0777) @@ -47,8 +46,10 @@ func ScrapeAstra(outDir string) { } cli := &http.Client{Transport: tr} + // Get cookies for auth astraHeaders := utils.RefreshAstraToken(chromedpCtx) time.Sleep(500 * time.Millisecond) + cancel() // Don't need chromedp anymore // Starting date date := time.Now() @@ -75,11 +76,11 @@ func ScrapeAstra(outDir string) { if res.StatusCode != 200 { log.Panicf("ERROR: Status was: %s\nIf the status is 404, you've likely been IP ratelimited!", res.Status) } - defer res.Body.Close() body, err := io.ReadAll(res.Body) if err != nil { panic(err) } + res.Body.Close() stringBody := string(body) // Check for no events From e4d250131ea44c422352e1f7a5991ea741089cf7 Mon Sep 17 00:00:00 2001 From: Tyler Hill Date: Sat, 28 Sep 2024 15:11:30 -0500 Subject: [PATCH 10/10] Start on previous day --- scrapers/astra.go | 2 ++ 1 file changed, 2 insertions(+) diff --git a/scrapers/astra.go b/scrapers/astra.go index e0e5d56..d2cf5b1 100644 --- a/scrapers/astra.go +++ b/scrapers/astra.go @@ -53,6 +53,8 @@ func ScrapeAstra(outDir string) { // Starting date date := time.Now() + // Start on previous date to make sure we have today's data, regardless of what timezone the scraper is in + date = date.Add(time.Hour * -24) // Stop condition lt10EventsCount := 0