Skip to content

Commit

Permalink
Successful scrape!
Browse files Browse the repository at this point in the history
TODOs:
sorting
scrape each day
look into login inputting user/pass in wrong sometimes
  • Loading branch information
TyHil committed Sep 26, 2024
1 parent bf2503a commit f1cccc9
Show file tree
Hide file tree
Showing 2 changed files with 49 additions and 18 deletions.
36 changes: 30 additions & 6 deletions scrapers/astra.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,10 @@ package scrapers

import (
"fmt"
"io"
"log"
"net/http"
"strings"
"os"
"time"

"github.com/UTDNebula/api-tools/utils"
Expand All @@ -26,6 +27,13 @@ func ScrapeAstra(outDir string) {
chromedpCtx, cancel := utils.InitChromeDp()
defer cancel()

err := os.MkdirAll(outDir, 0777)
if err != nil {
panic(err)
}

//days := []string

// Init http client
tr := &http.Transport{
MaxIdleConns: 10,
Expand All @@ -37,9 +45,10 @@ func ScrapeAstra(outDir string) {
astraHeaders := utils.RefreshAstraToken(chromedpCtx)
time.Sleep(500 * time.Millisecond)

url := fmt.Sprintf("https://www.aaiscloud.com/UTXDallas/~api/calendar/CalendarWeekGrid?_dc=%d&action=GET", time.Now().UnixMilli())
body := "start=0&limit=5000&isForWeekView=false&fields=ActivityId%2CActivityPk%2CActivityName%2CParentActivityId%2CParentActivityName%2CMeetingType%2CDescription%2CStartDate%2CEndDate%2CDayOfWeek%2CStartMinute%2CEndMinute%2CActivityTypeCode%2CResourceId%2CCampusName%2CBuildingCode%2CRoomNumber%2CRoomName%2CLocationName%2CInstitutionId%2CSectionId%2CSectionPk%2CIsExam%2CIsCrosslist%2CIsAllDay%2CIsPrivate%2CEventId%2CEventPk%2CCurrentState%2CNotAllowedUsageMask%2CUsageColor%2CUsageColorIsPrimary%2CEventTypeColor%2CMaxAttendance%2CActualAttendance%2CCapacity&filter=(((StartDate%3C%3D%222024-09-26T23%3A00%3A00%22)%26%26(EndDate%3E%3D%222024-09-26T00%3A00%3A00%22))%26%26((((((((Resource.Building.CampusId%20in%20(%2203c9d930-7343-11e9-8a0c-35dcbeb1edcd%22))%26%26(Resource.Regions.Id%20in%20(%223578b3b0-9dab-11e9-bb13-b5bc7e192516%22)))%26%26(Resource.RoomTypeId%20in%20(%22fe74a890-65f8-11e9-991a-ff0e0065dfaa%22)))%26%26(((EventMeetingByActivityId.Event.EventTypeId%20in%20(%221a7720e9-8d19-11e9-b19f-0556148ced27%22%2C%221a7720ea-8d19-11e9-b19f-0556148ced27%22%2C%221a7720eb-8d19-11e9-b19f-0556148ced27%22%2C%221a7720ec-8d19-11e9-b19f-0556148ced27%22%2C%221a7720ed-8d19-11e9-b19f-0556148ced27%22%2C%221a7720ee-8d19-11e9-b19f-0556148ced27%22%2C%221a7720ef-8d19-11e9-b19f-0556148ced27%22%2C%221a7720f0-8d19-11e9-b19f-0556148ced27%22%2C%221a7720f1-8d19-11e9-b19f-0556148ced27%22%2C%221a7720f2-8d19-11e9-b19f-0556148ced27%22%2C%22874f9347-10f4-4367-ab1e-d697b187e9cb%22%2C%221a7720f4-8d19-11e9-b19f-0556148ced27%22%2C%221a7720f5-8d19-11e9-b19f-0556148ced27%22%2C%221a7720f6-8d19-11e9-b19f-0556148ced27%22%2C%221a7720e8-8d19-11e9-b19f-0556148ced27%22%2C%220494ce20-15e1-11ee-9d2b-ff74be387a2d%22%2C%221a7720f8-8d19-11e9-b19f-0556148ced27%22%2C%221a7720f9-8d19-11e9-b19f-0556148ced27%22))%26%26(CurrentState%20in%20(%22Incomplete%22%2C%22Requested%22%2C%22Scheduled%22)))%26%26(ActivityTypeCode%3D%3D2)))%7C%7C((((Resource.Building.CampusId%20in%20(%2203c9d930-7343-11e9-8a0c-35dcbeb1edcd%22))%26%26(Resource.Regions.Id%20in%20(%223578b3b0-9dab-11e9-bb13-b5bc7e192516%22)))%26%26(Resource.RoomTypeId%20in%20(%22fe74a890-65f8-11e9-991a-ff0e0065dfaa%22)))%26%26(ActivityTypeCode%3D%3D1)))%7C%7C(((((Resource.Building.CampusId%20in%20(%2203c9d930-7343-11e9-8a0c-35dcbeb1edcd%22))%26%26(Resource.Regions.Id%20in%20(%223578b3b0-9dab-11e9-bb13-b5bc7e192516%22)))%26%26(Resource.RoomTypeId%20in%20(%22fe74a890-65f8-11e9-991a-ff0e0065dfaa%22)))%26%26(((PrePostMeetingByActivityId.EventMeeting.Event.EventTypeId%20in%20(%221a7720e9-8d19-11e9-b19f-0556148ced27%22%2C%221a7720ea-8d19-11e9-b19f-0556148ced27%22%2C%221a7720eb-8d19-11e9-b19f-0556148ced27%22%2C%221a7720ec-8d19-11e9-b19f-0556148ced27%22%2C%221a7720ed-8d19-11e9-b19f-0556148ced27%22%2C%221a7720ee-8d19-11e9-b19f-0556148ced27%22%2C%221a7720ef-8d19-11e9-b19f-0556148ced27%22%2C%221a7720f0-8d19-11e9-b19f-0556148ced27%22%2C%221a7720f1-8d19-11e9-b19f-0556148ced27%22%2C%221a7720f2-8d19-11e9-b19f-0556148ced27%22%2C%22874f9347-10f4-4367-ab1e-d697b187e9cb%22%2C%221a7720f4-8d19-11e9-b19f-0556148ced27%22%2C%221a7720f5-8d19-11e9-b19f-0556148ced27%22%2C%221a7720f6-8d19-11e9-b19f-0556148ced27%22%2C%221a7720e8-8d19-11e9-b19f-0556148ced27%22%2C%220494ce20-15e1-11ee-9d2b-ff74be387a2d%22%2C%221a7720f8-8d19-11e9-b19f-0556148ced27%22%2C%221a7720f9-8d19-11e9-b19f-0556148ced27%22))%26%26(CurrentState%20in%20(%22Incomplete%22%2C%22Requested%22%2C%22Scheduled%22)))%26%26(ActivityTypeCode%3D%3D252)))%7C%7C((((Resource.Building.CampusId%20in%20(%2203c9d930-7343-11e9-8a0c-35dcbeb1edcd%22))%26%26(Resource.Regions.Id%20in%20(%223578b3b0-9dab-11e9-bb13-b5bc7e192516%22)))%26%26(Resource.RoomTypeId%20in%20(%22fe74a890-65f8-11e9-991a-ff0e0065dfaa%22)))%26%26(((SetupTeardownWindowByActivityId.EventMeeting.Event.EventTypeId%20in%20(%221a7720e9-8d19-11e9-b19f-0556148ced27%22%2C%221a7720ea-8d19-11e9-b19f-0556148ced27%22%2C%221a7720eb-8d19-11e9-b19f-0556148ced27%22%2C%221a7720ec-8d19-11e9-b19f-0556148ced27%22%2C%221a7720ed-8d19-11e9-b19f-0556148ced27%22%2C%221a7720ee-8d19-11e9-b19f-0556148ced27%22%2C%221a7720ef-8d19-11e9-b19f-0556148ced27%22%2C%221a7720f0-8d19-11e9-b19f-0556148ced27%22%2C%221a7720f1-8d19-11e9-b19f-0556148ced27%22%2C%221a7720f2-8d19-11e9-b19f-0556148ced27%22%2C%22874f9347-10f4-4367-ab1e-d697b187e9cb%22%2C%221a7720f4-8d19-11e9-b19f-0556148ced27%22%2C%221a7720f5-8d19-11e9-b19f-0556148ced27%22%2C%221a7720f6-8d19-11e9-b19f-0556148ced27%22%2C%221a7720e8-8d19-11e9-b19f-0556148ced27%22%2C%220494ce20-15e1-11ee-9d2b-ff74be387a2d%22%2C%221a7720f8-8d19-11e9-b19f-0556148ced27%22%2C%221a7720f9-8d19-11e9-b19f-0556148ced27%22))%26%26(CurrentState%20in%20(%22Incomplete%22%2C%22Requested%22%2C%22Scheduled%22)))%26%26(ActivityTypeCode%3D%3D251)))))%7C%7C(((((Resource.Building.CampusId%20in%20(%2203c9d930-7343-11e9-8a0c-35dcbeb1edcd%22))%26%26(Resource.Regions.Id%20in%20(%223578b3b0-9dab-11e9-bb13-b5bc7e192516%22)))%26%26(Resource.RoomTypeId%20in%20(%22fe74a890-65f8-11e9-991a-ff0e0065dfaa%22)))%26%26((ActivityTypeCode%3D%3D9)%26%26(ActivityId%3D%3Dnull)))%7C%7C((ActivityTypeCode%3D%3D356)%7C%7C(ActivityTypeCode%3D%3D357))))%7C%7C(ActivityTypeCode%3D%3D255)))&sortOrder=%2BStartDate%2C%2BStartMinute&page=1&group=%7B%22property%22%3A%22StartDate%22%2C%22direction%22%3A%22ASC%22%7D&sort=%5B%7B%22property%22%3A%22StartDate%22%2C%22direction%22%3A%22ASC%22%7D%2C%7B%22property%22%3A%22StartMinute%22%2C%22direction%22%3A%22ASC%22%7D%5D"
req, err := http.NewRequest("POST", url, strings.NewReader(body))
//Request daily events
date := time.Now().Format("2006-01-02")
url := fmt.Sprintf("https://www.aaiscloud.com/UTXDallas/~api/calendar/CalendarWeekGrid?_dc=%d&action=GET&start=0&limit=5000&isForWeekView=false&fields=ActivityId,ActivityPk,ActivityName,ParentActivityId,ParentActivityName,MeetingType,Description,StartDate,EndDate,DayOfWeek,StartMinute,EndMinute,ActivityTypeCode,ResourceId,CampusName,BuildingCode,RoomNumber,RoomName,LocationName,InstitutionId,SectionId,SectionPk,IsExam,IsCrosslist,IsAllDay,IsPrivate,EventId,EventPk,CurrentState,NotAllowedUsageMask,UsageColor,UsageColorIsPrimary,EventTypeColor,MaxAttendance,ActualAttendance,Capacity&filter=(StartDate%%3C%%3D%%22%sT23%%3A00%%3A00%%22)%%26%%26(EndDate%%3E%%3D%%22%sT00%%3A00%%3A00%%22)&page=1", time.Now().UnixMilli(), date, date)
req, err := http.NewRequest("GET", url, nil)
if err != nil {
panic(err)
}
Expand All @@ -49,9 +58,24 @@ func ScrapeAstra(outDir string) {
if err != nil {
panic(err)
}
fmt.Println("6")
if res.StatusCode != 200 {
log.Panicf("ERROR: Status was: %s\nIf the status is 404, you've likely been IP ratelimited!", res.Status)
}
fmt.Println("7")

defer res.Body.Close()
body, err := io.ReadAll(res.Body)
if err != nil {
panic(err)
}

// Write event data to output file
fptr, err := os.Create(fmt.Sprintf("%s/reservations.json", outDir))
if err != nil {
panic(err)
}
_, err = fptr.Write(body)
if err != nil {
panic(err)
}
fptr.Close()
}
31 changes: 19 additions & 12 deletions utils/methods.go
Original file line number Diff line number Diff line change
Expand Up @@ -126,20 +126,22 @@ func RefreshAstraToken(chromedpCtx context.Context) map[string][]string {
chromedp.SendKeys(`input#textfield-1029-inputEl`, password),
chromedp.WaitVisible(`a#logonButton`),
chromedp.Click(`a#logonButton`),
chromedp.WaitVisible(`body`, chromedp.ByQuery),
)
if err != nil {
panic(err)
}

var cookieStrs []string
cookieStr := ""
_, err = chromedp.RunResponse(chromedpCtx,
//chromedp.Navigate(`https://www.aaiscloud.com/UTXDallas/Calendars/DailyGridCalendar.aspx`),
chromedp.WaitVisible(`body`, chromedp.ByQuery),
chromedp.ActionFunc(func(ctx context.Context) error {
cookies, err := network.GetCookies().Do(ctx)
cookieStrs = make([]string, len(cookies))
gotToken := false
for i, cookie := range cookies {
cookieStrs[i] = fmt.Sprintf("%s=%s", cookie.Name, cookie.Value)
for _, cookie := range cookies {
cookieStr = fmt.Sprintf("%s%s=%s; ", cookieStr, cookie.Name, cookie.Value)
//log.Println(cookieStr)
if cookie.Name == "UTXDallas.ASPXFORMSAUTH" {
VPrintf("Got new token: PTGSESSID = %s", cookie.Value)
gotToken = true
Expand All @@ -156,14 +158,19 @@ func RefreshAstraToken(chromedpCtx context.Context) map[string][]string {
}

return map[string][]string{
"Host": {"www.aaiscloud.com"},
"User-Agent": {"Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/110.0"},
"Accept": {"*/*"},
"Accept-Encoding": {"gzip, deflate, br, zstd"},
"Accept-Language": {"en-US,en;q=0.5"},
"Content-Type": {"application/x-www-form-urlencoded; charset=UTF-8"},
"Cookie": cookieStrs,
"Connection": {"keep-alive"},
"Host": {"www.aaiscloud.com"},
"User-Agent": {"Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/110.0"},
"Accept": {"text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/png,image/svg+xml,*/*;q=0.8"},
"Accept-Language": {"en-US,en;q=0.5"},
"Accept-Encoding": {"gzip, deflate, br, zstd"},
"Connection": {"keep-alive"},
"Cookie": {cookieStr},
"Upgrade-Insecure-Requests": {"1"},
"Sec-Fetch-Dest": {"document"},
"Sec-Fetch-Mode": {"navigate"},
"Sec-Fetch-Site": {"none"},
"Sec-Fetch-User": {"?1"},
"Priority": {"u=0, i"},
}
}

Expand Down

0 comments on commit f1cccc9

Please sign in to comment.