Skip to content

Commit 87c94cd

Browse files
Merge pull request #78 from metakgp/sanitizeurl
Sanitize url
2 parents 7cb5a29 + 5516db9 commit 87c94cd

File tree

1 file changed

+10
-5
lines changed

1 file changed

+10
-5
lines changed

crawler/crawler.go

+10-5
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,11 @@ func downloadFile(new_qp qpRaw) {
5858
defer file.Close()
5959
}
6060

61+
func sanitizeFilename(s string) string {
62+
// replaces all spaces with _
63+
return strings.ReplaceAll(s, "%20", "_")
64+
}
65+
6166
func main() {
6267

6368
c := colly.NewCollector(
@@ -81,13 +86,13 @@ func main() {
8186
return
8287
}
8388
link := e.Attr("href")
84-
url := e.Request.AbsoluteURL(link)
89+
file_url := e.Request.AbsoluteURL(link)
8590
var name string
8691
var year int
8792
var exam_type string
8893

89-
if strings.Contains(url, ".pdf") {
90-
temp := strings.Split(url, "/")
94+
if strings.Contains(file_url, ".pdf") {
95+
temp := strings.Split(file_url, "/")
9196
name = temp[len(temp)-1]
9297
year, _ = strconv.Atoi(temp[4])
9398
exam_type = strings.ToLower(temp[5])
@@ -105,13 +110,13 @@ func main() {
105110
}
106111
}
107112

108-
new_qp = append(new_qp, qpRaw{strings.Join(temp[4:], "_"), name, year, exam_type, url})
113+
new_qp = append(new_qp, qpRaw{sanitizeFilename(strings.Join(temp[4:], "_")), name, year, exam_type, file_url})
109114
}
110115

111116
c.Visit(e.Request.AbsoluteURL(link))
112117
})
113118

114-
c.Visit("http://10.18.24.75/peqp")
119+
c.Visit("http://10.18.24.75/peqp/2024")
115120
c.Wait()
116121

117122
file, err := os.Create("qp.csv")

0 commit comments

Comments
 (0)