forked from wolstena/varnish-bad-bot-detection
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathbad_bot_detection.vcl
132 lines (130 loc) · 6.49 KB
/
bad_bot_detection.vcl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
# {{ ansible_managed }}
sub bad_bot_detection {
if (
req.http.user-agent ~ "^$"
|| req.http.user-agent ~ "^Java"
|| req.http.user-agent ~ "^Jakarta"
|| req.http.user-agent ~ "IDBot"
|| req.http.user-agent ~ "id-search"
|| req.http.user-agent ~ "User-Agent"
|| req.http.user-agent ~ "compatible ;"
|| req.http.user-agent ~ "ConveraCrawler"
|| req.http.user-agent ~ "^Mozilla$"
|| req.http.user-agent ~ "libwww"
|| req.http.user-agent ~ "lwp-trivial"
|| req.http.user-agent ~ "PHP/"
|| req.http.user-agent ~ "urllib"
|| req.http.user-agent ~ "GT:WWW"
|| req.http.user-agent ~ "Snoopy"
|| req.http.user-agent ~ "MFC_Tear_Sample"
|| req.http.user-agent ~ "HTTP::Lite"
|| req.http.user-agent ~ "PHPCrawl"
|| req.http.user-agent ~ "URI::Fetch"
|| req.http.user-agent ~ "Zend_Http_Client"
|| req.http.user-agent ~ "http client"
|| req.http.user-agent ~ "PECL::HTTP"
|| req.http.user-agent ~ "panscient.com"
|| req.http.user-agent ~ "IBM EVV"
|| req.http.user-agent ~ "Bork-edition"
|| req.http.user-agent ~ "Fetch API Request"
|| req.http.user-agent ~ "PleaseCrawl"
|| req.http.user-agent ~ "[A-Z][a-z]{3,} [a-z]{4,} [a-z]{4,}"
|| req.http.user-agent ~ "layeredtech.com"
|| req.http.user-agent ~ "WEP Search"
|| req.http.user-agent ~ "Wells Search II"
|| req.http.user-agent ~ "Missigua Locator"
|| req.http.user-agent ~ "ISC Systems iRc Search 2.1"
|| req.http.user-agent ~ "Microsoft URL Control"
|| req.http.user-agent ~ "Indy Library"
|| req.http.user-agent == "8484 Boston Project v 1.0"
|| req.http.user-agent == "Atomic_Email_Hunter/4.0"
|| req.http.user-agent == "atSpider/1.0"
|| req.http.user-agent == "autoemailspider"
|| req.http.user-agent == "China Local Browse 2.6"
|| req.http.user-agent == "ContactBot/0.2"
|| req.http.user-agent == "ContentSmartz"
|| req.http.user-agent == "DataCha0s/2.0"
|| req.http.user-agent == "DataCha0s/2.0"
|| req.http.user-agent == "DBrowse 1.4b"
|| req.http.user-agent == "DBrowse 1.4d"
|| req.http.user-agent == "Demo Bot DOT 16b"
|| req.http.user-agent == "Demo Bot Z 16b"
|| req.http.user-agent == "DSurf15a 01"
|| req.http.user-agent == "DSurf15a 71"
|| req.http.user-agent == "DSurf15a 81"
|| req.http.user-agent == "DSurf15a VA"
|| req.http.user-agent == "EBrowse 1.4b"
|| req.http.user-agent == "Educate Search VxB"
|| req.http.user-agent == "EmailSiphon"
|| req.http.user-agent == "EmailWolf 1.00"
|| req.http.user-agent == "ESurf15a 15"
|| req.http.user-agent == "ExtractorPro"
|| req.http.user-agent == "Franklin Locator 1.8"
|| req.http.user-agent == "FSurf15a 01"
|| req.http.user-agent == "Full Web Bot 0416B"
|| req.http.user-agent == "Full Web Bot 0516B"
|| req.http.user-agent == "Full Web Bot 2816B"
|| req.http.user-agent == "Guestbook Auto Submitter"
|| req.http.user-agent == "Industry Program 1.0.x"
|| req.http.user-agent == "ISC Systems iRc Search 2.1"
|| req.http.user-agent == "IUPUI Research Bot v 1.9a"
|| req.http.user-agent == "LARBIN-EXPERIMENTAL ([email protected])"
|| req.http.user-agent == "LetsCrawl.com/1.0 +http://letscrawl.com/"
|| req.http.user-agent == "Lincoln State Web Browser"
|| req.http.user-agent == "LMQueueBot/0.2"
|| req.http.user-agent == "LWP::Simple/5.803"
|| req.http.user-agent == "Mac Finder 1.0.xx"
|| req.http.user-agent == "MFC Foundation Class Library 4.0"
|| req.http.user-agent == "Microsoft URL Control - 6.00.8xxx"
|| req.http.user-agent == "Missauga Locate 1.0.0"
|| req.http.user-agent == "Missigua Locator 1.9"
|| req.http.user-agent == "Missouri College Browse"
|| req.http.user-agent == "Mizzu Labs 2.2"
|| req.http.user-agent == "Mo College 1.9"
|| req.http.user-agent == "Mozilla/2.0 (compatible; NEWT ActiveX; Win32)"
|| req.http.user-agent == "Mozilla/3.0 (compatible; Indy Library)"
|| req.http.user-agent == "Mozilla/4.0 (compatible; Advanced Email Extractor v2.xx)"
|| req.http.user-agent == "Mozilla/4.0 (compatible; Iplexx Spider/1.0 http://www.iplexx.at)"
|| req.http.user-agent == "Mozilla/4.0 (compatible; MSIE 5.0; Windows NT; DigExt; DTS Agent"
|| req.http.user-agent == "Mozilla/4.0 [email protected]"
|| req.http.user-agent == "Mozilla/5.0 (Version: xxxx Type:xx)"
|| req.http.user-agent == "MVAClient"
|| req.http.user-agent == "NameOfAgent (CMS Spider)"
|| req.http.user-agent == "NASA Search 1.0"
|| req.http.user-agent == "Nsauditor/1.x"
|| req.http.user-agent == "PBrowse 1.4b"
|| req.http.user-agent == "PEval 1.4b"
|| req.http.user-agent == "Poirot"
|| req.http.user-agent == "Port Huron Labs"
|| req.http.user-agent == "Production Bot 0116B"
|| req.http.user-agent == "Production Bot 2016B"
|| req.http.user-agent == "Production Bot DOT 3016B"
|| req.http.user-agent == "Program Shareware 1.0.2"
|| req.http.user-agent == "PSurf15a 11"
|| req.http.user-agent == "PSurf15a 51"
|| req.http.user-agent == "PSurf15a VA"
|| req.http.user-agent == "psycheclone"
|| req.http.user-agent == "RSurf15a 41"
|| req.http.user-agent == "RSurf15a 51"
|| req.http.user-agent == "RSurf15a 81"
|| req.http.user-agent == "searchbot [email protected]"
|| req.http.user-agent == "ShablastBot 1.0"
|| req.http.user-agent == "snap.com beta crawler v0"
|| req.http.user-agent == "Snapbot/1.0"
|| req.http.user-agent == "sogou develop spider"
|| req.http.user-agent == "Sogou Orion spider/3.0(+http://www.sogou.com/docs/help/webmasters.htm#07)"
|| req.http.user-agent == "sogou spider"
|| req.http.user-agent == "Sogou web spider/3.0(+http://www.sogou.com/docs/help/webmasters.htm#07)"
|| req.http.user-agent == "sohu agent"
|| req.http.user-agent == "SSurf15a 11"
|| req.http.user-agent == "TSurf15a 11"
|| req.http.user-agent == "Under the Rainbow 2.2"
|| req.http.user-agent == "User-Agent: Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1)"
|| req.http.user-agent == "VadixBot"
|| req.http.user-agent == "WebVulnCrawl.blogspot.com/1.0 libwww-perl/5.803"
|| req.http.user-agent == "Wells Search II"
|| req.http.user-agent == "WEP Search 00"
) {
return(synth(403, "Forbidden Bots"));
}
}