-
Notifications
You must be signed in to change notification settings - Fork 372
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
improved referrer parsing. match hostname against blacklist using com…
…munity-maintained blacklist file graciously provided by Matomo (https://github.com/matomo-org/referrer-spam-blacklist). closes #170 relates to #154
- Loading branch information
1 parent
9589072
commit bca066b
Showing
8 changed files
with
1,360 additions
and
48 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,9 +1,54 @@ | ||
package aggregator | ||
|
||
import ( | ||
"net/url" | ||
"testing" | ||
) | ||
|
||
func TestProcess(t *testing.T) { | ||
func TestParseReferrer(t *testing.T) { | ||
testsValid := map[string]*url.URL{ | ||
"https://www.usefathom.com/?utm_source=github": &url.URL{ | ||
Scheme: "https", | ||
Host: "www.usefathom.com", | ||
Path: "/", | ||
}, | ||
"https://www.usefathom.com/privacy/amp/?utm_source=github": &url.URL{ | ||
Scheme: "https", | ||
Host: "www.usefathom.com", | ||
Path: "/privacy/", | ||
}, | ||
} | ||
testsErr := []string{ | ||
"mysite.com", | ||
"foobar", | ||
"", | ||
} | ||
|
||
for r, e := range testsValid { | ||
v, err := parseReferrer(r) | ||
if err != nil { | ||
t.Error(err) | ||
} | ||
|
||
if v.Host != e.Host { | ||
t.Errorf("Invalid Host: expected %s, got %s", e.Host, v.Host) | ||
} | ||
|
||
if v.Scheme != e.Scheme { | ||
t.Errorf("Invalid Scheme: expected %s, got %s", e.Scheme, v.Scheme) | ||
} | ||
|
||
if v.Path != e.Path { | ||
t.Errorf("Invalid Path: expected %s, got %s", e.Path, v.Path) | ||
} | ||
|
||
} | ||
|
||
for _, r := range testsErr { | ||
v, err := parseReferrer(r) | ||
if err == nil { | ||
t.Errorf("Expected err, got %#v", v) | ||
} | ||
} | ||
|
||
} |
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,42 @@ | ||
package aggregator | ||
|
||
import ( | ||
"bufio" | ||
"bytes" | ||
"strings" | ||
) | ||
|
||
type blacklist struct { | ||
data []byte | ||
} | ||
|
||
func newBlacklist() (*blacklist, error) { | ||
var err error | ||
b := &blacklist{} | ||
b.data, err = Asset("blacklist.txt") | ||
if err != nil { | ||
return nil, err | ||
} | ||
|
||
return b, nil | ||
} | ||
|
||
// Has returns true if the given domain appears on the blacklist | ||
// Uses sub-string matching, so if usesfathom.com is blacklisted then this function will also return true for danny.usesfathom.com | ||
func (b *blacklist) Has(r string) bool { | ||
if r == "" { | ||
return false | ||
} | ||
|
||
scanner := bufio.NewScanner(bytes.NewReader(b.data)) | ||
domain := "" | ||
|
||
for scanner.Scan() { | ||
domain = scanner.Text() | ||
if strings.HasSuffix(r, domain) { | ||
return true | ||
} | ||
} | ||
|
||
return false | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
package aggregator | ||
|
||
import ( | ||
"testing" | ||
) | ||
|
||
func TestBlacklistHas(t *testing.T) { | ||
b, err := newBlacklist() | ||
if err != nil { | ||
t.Error(err) | ||
} | ||
|
||
table := map[string]bool{ | ||
"03e.info": true, | ||
"zvetki.ru": true, | ||
"usefathom.com": false, | ||
"foo.03e.info": true, // sub-string match | ||
} | ||
|
||
for r, e := range table { | ||
if v := b.Has(r); v != e { | ||
t.Errorf("Expected %v, got %v", e, v) | ||
} | ||
} | ||
} |
Oops, something went wrong.
bca066b
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Awesome!! Thanks @dannyvankooten