-
Notifications
You must be signed in to change notification settings - Fork 14
/
Copy pathastra.go
123 lines (105 loc) · 3.45 KB
/
astra.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
/*
This file contains the code for the Astra scraper.
*/
package scrapers
import (
"fmt"
"io"
"log"
"net/http"
"os"
"time"
"github.com/UTDNebula/api-tools/utils"
"github.com/joho/godotenv"
"github.com/valyala/fastjson"
)
var MAX_EVENTS_PER_DAY = 5000
func ScrapeAstra(outDir string) {
// Load env vars
if err := godotenv.Load(); err != nil {
log.Panic("Error loading .env file")
}
// Start chromedp
chromedpCtx, cancel := utils.InitChromeDp()
// Make output folder
err := os.MkdirAll(outDir, 0777)
if err != nil {
panic(err)
}
days := "{" // String JSON for storing results by day
firstLoop := true // To avoid adding a comma to the JSON on the first loop
// Init http client
tr := &http.Transport{
MaxIdleConns: 10,
IdleConnTimeout: 30 * time.Second,
DisableCompression: true,
}
cli := &http.Client{Transport: tr}
// Get cookies for auth
astraHeaders := utils.RefreshAstraToken(chromedpCtx)
time.Sleep(500 * time.Millisecond)
cancel() // Don't need chromedp anymore
// Starting date
date := time.Now()
// Start on previous date to make sure we have today's data, regardless of what timezone the scraper is in
date = date.Add(time.Hour * -24)
// Stop condition
lt10EventsCount := 0
// Run until 90 days of no events
for lt10EventsCount < 90 {
formattedDate := date.Format("2006-01-02")
log.Printf("Scraping %s...", formattedDate)
// Request daily events
url := fmt.Sprintf("https://www.aaiscloud.com/UTXDallas/~api/calendar/CalendarWeekGrid?_dc=%d&action=GET&start=0&limit=%d&isForWeekView=false&fields=ActivityId,ActivityPk,ActivityName,ParentActivityId,ParentActivityName,MeetingType,Description,StartDate,EndDate,DayOfWeek,StartMinute,EndMinute,ActivityTypeCode,ResourceId,CampusName,BuildingCode,RoomNumber,RoomName,LocationName,InstitutionId,SectionId,SectionPk,IsExam,IsCrosslist,IsAllDay,IsPrivate,EventId,EventPk,CurrentState,NotAllowedUsageMask,UsageColor,UsageColorIsPrimary,EventTypeColor,MaxAttendance,ActualAttendance,Capacity&filter=(StartDate%%3C%%3D%%22%sT23%%3A00%%3A00%%22)%%26%%26(EndDate%%3E%%3D%%22%sT00%%3A00%%3A00%%22)&page=1&sortOrder=%%2BStartDate,%%2BStartMinute", time.Now().UnixMilli(), MAX_EVENTS_PER_DAY, formattedDate, formattedDate)
req, err := http.NewRequest("GET", url, nil)
if err != nil {
panic(err)
}
req.Header = astraHeaders
res, err := cli.Do(req)
if err != nil {
panic(err)
}
if res.StatusCode != 200 {
log.Panicf("ERROR: Status was: %s\nIf the status is 404, you've likely been IP ratelimited!", res.Status)
}
body, err := io.ReadAll(res.Body)
if err != nil {
panic(err)
}
res.Body.Close()
stringBody := string(body)
// Check for no events
numEvents := fastjson.GetInt(body, "totalRecords")
if numEvents >= MAX_EVENTS_PER_DAY {
log.Panic("ERROR: Max events per day exceeded!")
}
if numEvents < 10 {
lt10EventsCount += 1
if lt10EventsCount > 30 {
log.Printf("There have been %d days in a row with fewer than 10 events.", lt10EventsCount)
}
} else {
lt10EventsCount = 0
}
// Add to record
comma := ","
if firstLoop {
comma = ""
firstLoop = false
}
days = fmt.Sprintf("%s%s\"%s\":%s", days, comma, formattedDate, stringBody)
date = date.Add(time.Hour * 24)
}
// Write event data to output file
days = fmt.Sprintf("%s}", days)
fptr, err := os.Create(fmt.Sprintf("%s/reservations.json", outDir))
if err != nil {
panic(err)
}
_, err = fptr.Write([]byte(days))
if err != nil {
panic(err)
}
fptr.Close()
}