Skip to content

Commit 6ce890f

Browse files
authored
Merge pull request #100 from lavafroth/refactor
Refactor to dry up code
2 parents e75ad3d + 6ad0592 commit 6ce890f

File tree

12 files changed

+77
-122
lines changed

12 files changed

+77
-122
lines changed

go.mod

+2-1
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,10 @@
11
module github.com/lc/gau/v2
22

3-
go 1.17
3+
go 1.20
44

55
require (
66
github.com/bobesa/go-domain-util v0.0.0-20190911083921-4033b5f7dd89
7+
github.com/deckarep/golang-set/v2 v2.3.0
78
github.com/json-iterator/go v1.1.12
89
github.com/lynxsecurity/pflag v1.1.3
910
github.com/lynxsecurity/viper v1.10.0

go.sum

+2
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@ github.com/bobesa/go-domain-util v0.0.0-20190911083921-4033b5f7dd89/go.mod h1:/0
55
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
66
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
77
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
8+
github.com/deckarep/golang-set/v2 v2.3.0 h1:qs18EKUfHm2X9fA50Mr/M5hccg2tNnVqsiBImnyDs0g=
9+
github.com/deckarep/golang-set/v2 v2.3.0/go.mod h1:VAky9rY/yGXJOLEDv3OMci+7wtDpOF4IN+y82NBOac4=
810
github.com/fsnotify/fsnotify v1.5.1 h1:mZcQUHVQUQWoPXXtuf9yuEXKudkV2sx1E06UadKWpgI=
911
github.com/fsnotify/fsnotify v1.5.1/go.mod h1:T3375wBYaZdLLcVNkcVbzGHY7f1l/uK5T5Ai1i3InKU=
1012
github.com/golang/snappy v0.0.3/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q=

pkg/httpclient/client.go

+1-2
Original file line numberDiff line numberDiff line change
@@ -36,10 +36,9 @@ func MakeRequest(c *fasthttp.Client, url string, maxRetries uint, timeout uint,
3636
req.SetRequestURI(url)
3737
respBody, err = doReq(c, req, timeout)
3838
if err == nil {
39-
goto done
39+
break
4040
}
4141
}
42-
done:
4342
if err != nil {
4443
return nil, err
4544
}

pkg/output/output.go

+19-41
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
package output
22

33
import (
4+
mapset "github.com/deckarep/golang-set/v2"
45
jsoniter "github.com/json-iterator/go"
56
"github.com/valyala/bytebufferpool"
67
"io"
@@ -13,40 +14,24 @@ type JSONResult struct {
1314
Url string `json:"url"`
1415
}
1516

16-
func WriteURLs(writer io.Writer, results <-chan string, blacklistMap map[string]struct{}, RemoveParameters bool) error {
17-
lastURL := make(map[string]struct{})
17+
func WriteURLs(writer io.Writer, results <-chan string, blacklistMap mapset.Set[string], RemoveParameters bool) error {
18+
lastURL := mapset.NewThreadUnsafeSet[string]()
1819
for result := range results {
1920
buf := bytebufferpool.Get()
20-
if len(blacklistMap) != 0 {
21-
u, err := url.Parse(result)
22-
if err != nil {
23-
continue
24-
}
25-
base := strings.Split(path.Base(u.Path), ".")
26-
ext := base[len(base)-1]
27-
if ext != "" {
28-
_, ok := blacklistMap[strings.ToLower(ext)]
29-
if ok {
30-
continue
31-
}
32-
}
21+
u, err := url.Parse(result)
22+
if err != nil {
23+
continue
3324
}
34-
if RemoveParameters {
35-
u, err := url.Parse(result)
36-
if err != nil {
37-
continue
38-
}
39-
if _, ok := lastURL[u.Host+u.Path]; ok {
40-
continue
41-
} else {
42-
lastURL[u.Host+u.Path] = struct{}{} ;
43-
}
44-
25+
if blacklistMap.Contains(strings.ToLower(path.Ext(u.Path))) {
26+
continue
27+
}
28+
if RemoveParameters && !lastURL.Add(u.Host+u.Path) {
29+
continue
4530
}
4631

4732
buf.B = append(buf.B, []byte(result)...)
4833
buf.B = append(buf.B, "\n"...)
49-
_, err := writer.Write(buf.B)
34+
_, err = writer.Write(buf.B)
5035
if err != nil {
5136
return err
5237
}
@@ -55,23 +40,16 @@ func WriteURLs(writer io.Writer, results <-chan string, blacklistMap map[string]
5540
return nil
5641
}
5742

58-
func WriteURLsJSON(writer io.Writer, results <-chan string, blacklistMap map[string]struct{}, RemoveParameters bool) {
43+
func WriteURLsJSON(writer io.Writer, results <-chan string, blacklistMap mapset.Set[string], RemoveParameters bool) {
5944
var jr JSONResult
6045
enc := jsoniter.NewEncoder(writer)
6146
for result := range results {
62-
if len(blacklistMap) != 0 {
63-
u, err := url.Parse(result)
64-
if err != nil {
65-
continue
66-
}
67-
base := strings.Split(path.Base(u.Path), ".")
68-
ext := base[len(base)-1]
69-
if ext != "" {
70-
_, ok := blacklistMap[strings.ToLower(ext)]
71-
if ok {
72-
continue
73-
}
74-
}
47+
u, err := url.Parse(result)
48+
if err != nil {
49+
continue
50+
}
51+
if blacklistMap.Contains(strings.ToLower(path.Ext(u.Path))) {
52+
continue
7553
}
7654
jr.Url = result
7755
if err := enc.Encode(jr); err != nil {

pkg/providers/commoncrawl/commoncrawl.go

+9-18
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,6 @@ type Client struct {
2929
}
3030

3131
func New(c *providers.Config, filters providers.Filters) (*Client, error) {
32-
client := &Client{config: c, filters: filters}
3332
// Fetch the list of available CommonCrawl Api URLs.
3433
resp, err := httpclient.MakeRequest(c.Client, "http://index.commoncrawl.org/collinfo.json", c.MaxRetries, c.Timeout)
3534
if err != nil {
@@ -45,8 +44,7 @@ func New(c *providers.Config, filters providers.Filters) (*Client, error) {
4544
return nil, errors.New("failed to grab latest commoncrawl index")
4645
}
4746

48-
client.apiURL = r[0].API
49-
return client, nil
47+
return &Client{config: c, filters: filters, apiURL: r[0].API}, nil
5048
}
5149

5250
func (c *Client) Name() string {
@@ -62,9 +60,7 @@ func (c *Client) Fetch(ctx context.Context, domain string, results chan string)
6260
}
6361
// 0 pages means no results
6462
if p.Pages == 0 {
65-
if c.config.Verbose {
66-
logrus.WithFields(logrus.Fields{"provider": Name}).Infof("no results for %s", domain)
67-
}
63+
logrus.WithFields(logrus.Fields{"provider": Name}).Infof("no results for %s", domain)
6864
return nil
6965
}
7066

@@ -74,9 +70,7 @@ paginate:
7470
case <-ctx.Done():
7571
break paginate
7672
default:
77-
if c.config.Verbose {
78-
logrus.WithFields(logrus.Fields{"provider": Name, "page": page}).Infof("fetching %s", domain)
79-
}
73+
logrus.WithFields(logrus.Fields{"provider": Name, "page": page}).Infof("fetching %s", domain)
8074
apiURL := c.formatURL(domain, page)
8175
resp, err := httpclient.MakeRequest(c.config.Client, apiURL, c.config.MaxRetries, c.config.Timeout)
8276
if err != nil {
@@ -111,18 +105,15 @@ func (c *Client) formatURL(domain string, page uint) string {
111105
}
112106

113107
// Fetch the number of pages.
114-
func (c *Client) getPagination(domain string) (paginationResult, error) {
108+
func (c *Client) getPagination(domain string) (r paginationResult, err error) {
115109
url := fmt.Sprintf("%s&showNumPages=true", c.formatURL(domain, 0))
110+
var resp []byte
116111

117-
resp, err := httpclient.MakeRequest(c.config.Client, url, c.config.MaxRetries, c.config.Timeout)
112+
resp, err = httpclient.MakeRequest(c.config.Client, url, c.config.MaxRetries, c.config.Timeout)
118113
if err != nil {
119-
return paginationResult{}, err
120-
}
121-
122-
var r paginationResult
123-
if err = jsoniter.Unmarshal(resp, &r); err != nil {
124-
return r, err
114+
return
125115
}
126116

127-
return r, nil
117+
err = jsoniter.Unmarshal(resp, &r)
118+
return
128119
}

pkg/providers/otx/otx.go

+12-16
Original file line numberDiff line numberDiff line change
@@ -47,14 +47,12 @@ func (c *Client) Name() string {
4747

4848
func (c *Client) Fetch(ctx context.Context, domain string, results chan string) error {
4949
paginate:
50-
for page := 1; ; page++ {
50+
for page := uint(1); ; page++ {
5151
select {
5252
case <-ctx.Done():
5353
break paginate
5454
default:
55-
if c.config.Verbose {
56-
logrus.WithFields(logrus.Fields{"provider": Name, "page": page - 1}).Infof("fetching %s", domain)
57-
}
55+
logrus.WithFields(logrus.Fields{"provider": Name, "page": page - 1}).Infof("fetching %s", domain)
5856
apiURL := c.formatURL(domain, page)
5957
resp, err := httpclient.MakeRequest(c.config.Client, apiURL, c.config.MaxRetries, c.config.Timeout)
6058
if err != nil {
@@ -77,20 +75,18 @@ paginate:
7775
return nil
7876
}
7977

80-
func (c *Client) formatURL(domain string, page int) string {
78+
func (c *Client) formatURL(domain string, page uint) string {
79+
category := "hostname"
8180
if !domainutil.HasSubdomain(domain) {
82-
return fmt.Sprintf(_BaseURL+"api/v1/indicators/domain/%s/url_list?limit=100&page=%d",
83-
domain, page,
84-
)
85-
} else if domainutil.HasSubdomain(domain) && c.config.IncludeSubdomains {
86-
return fmt.Sprintf(_BaseURL+"api/v1/indicators/domain/%s/url_list?limit=100&page=%d",
87-
domainutil.Domain(domain), page,
88-
)
89-
} else {
90-
return fmt.Sprintf(_BaseURL+"api/v1/indicators/hostname/%s/url_list?limit=100&page=%d",
91-
domain, page,
92-
)
81+
category = "domain"
82+
}
83+
if domainutil.HasSubdomain(domain) && c.config.IncludeSubdomains {
84+
domain = domainutil.Domain(domain)
85+
category = "domain"
9386
}
87+
88+
return fmt.Sprintf("%sapi/v1/indicators/%s/%s/url_list?limit=100&page=%d", _BaseURL, category, domain, page)
89+
9490
}
9591

9692
var _BaseURL = "https://otx.alienvault.com/"

pkg/providers/providers.go

+2-2
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ package providers
22

33
import (
44
"context"
5+
mapset "github.com/deckarep/golang-set/v2"
56
"github.com/valyala/fasthttp"
67
)
78

@@ -21,13 +22,12 @@ type URLScan struct {
2122
type Config struct {
2223
Threads uint
2324
Timeout uint
24-
Verbose bool
2525
MaxRetries uint
2626
IncludeSubdomains bool
2727
RemoveParameters bool
2828
Client *fasthttp.Client
2929
Providers []string
30-
Blacklist map[string]struct{}
30+
Blacklist mapset.Set[string]
3131
Output string
3232
JSON bool
3333
URLScan URLScan

pkg/providers/urlscan/types.go

+4-7
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
package urlscan
22

33
import (
4-
"reflect"
54
"strings"
65
)
76

@@ -29,12 +28,10 @@ type archivedPage struct {
2928

3029
func parseSort(sort []interface{}) string {
3130
var sortParam []string
32-
for i := 0; i < len(sort); i++ {
33-
t := reflect.TypeOf(sort[i])
34-
v := reflect.ValueOf(sort[i])
35-
switch t.Kind() {
36-
case reflect.String:
37-
sortParam = append(sortParam, v.String())
31+
for _, t := range sort {
32+
switch t.(type) {
33+
case string:
34+
sortParam = append(sortParam, t.(string))
3835
}
3936
}
4037
return strings.Join(sortParam, ",")

pkg/providers/urlscan/urlscan.go

+5-11
Original file line numberDiff line numberDiff line change
@@ -41,16 +41,13 @@ func (c *Client) Fetch(ctx context.Context, domain string, results chan string)
4141
header.Value = c.config.URLScan.APIKey
4242
}
4343

44-
page := 0
4544
paginate:
46-
for {
45+
for page := uint(0); ; page++ {
4746
select {
4847
case <-ctx.Done():
4948
break paginate
5049
default:
51-
if c.config.Verbose {
52-
logrus.WithFields(logrus.Fields{"provider": Name, "page": page}).Infof("fetching %s", domain)
53-
}
50+
logrus.WithFields(logrus.Fields{"provider": Name, "page": page}).Infof("fetching %s", domain)
5451
apiURL := c.formatURL(domain, searchAfter)
5552
resp, err := httpclient.MakeRequest(c.config.Client, apiURL, c.config.MaxRetries, c.config.Timeout, header)
5653
if err != nil {
@@ -64,9 +61,7 @@ paginate:
6461
}
6562
// rate limited
6663
if result.Status == 429 {
67-
if c.config.Verbose {
68-
logrus.WithField("provider", "urlscan").Warnf("urlscan responded with 429")
69-
}
64+
logrus.WithField("provider", "urlscan").Warnf("urlscan responded with 429, probably being rate limited")
7065
break paginate
7166
}
7267

@@ -89,18 +84,17 @@ paginate:
8984
if !result.HasMore {
9085
break paginate
9186
}
92-
page++
9387
}
9488
}
9589
return nil
9690
}
9791

9892
func (c *Client) formatURL(domain string, after string) string {
9993
if after != "" {
100-
return fmt.Sprintf(_BaseURL+"api/v1/search/?q=domain:%s&size=100", domain) + "&search_after=" + after
94+
after = "&search_after=" + after
10195
}
10296

103-
return fmt.Sprintf(_BaseURL+"api/v1/search/?q=domain:%s&size=100", domain)
97+
return fmt.Sprintf(_BaseURL+"api/v1/search/?q=domain:%s&size=100", domain) + after
10498
}
10599

106100
func setBaseURL(baseURL string) {

pkg/providers/wayback/wayback.go

+7-13
Original file line numberDiff line numberDiff line change
@@ -22,11 +22,8 @@ type Client struct {
2222
config *providers.Config
2323
}
2424

25-
func New(c *providers.Config, filters providers.Filters) *Client {
26-
return &Client{
27-
filters: filters,
28-
config: c,
29-
}
25+
func New(config *providers.Config, filters providers.Filters) *Client {
26+
return &Client{filters, config}
3027
}
3128

3229
func (c *Client) Name() string {
@@ -43,14 +40,13 @@ func (c *Client) Fetch(ctx context.Context, domain string, results chan string)
4340
if err != nil {
4441
return fmt.Errorf("failed to fetch wayback pagination: %s", err)
4542
}
43+
4644
for page := uint(0); page < pages; page++ {
4745
select {
4846
case <-ctx.Done():
4947
return nil
5048
default:
51-
if c.config.Verbose {
52-
logrus.WithFields(logrus.Fields{"provider": Name, "page": page}).Infof("fetching %s", domain)
53-
}
49+
logrus.WithFields(logrus.Fields{"provider": Name, "page": page}).Infof("fetching %s", domain)
5450
apiURL := c.formatURL(domain, page)
5551
// make HTTP request
5652
resp, err := httpclient.MakeRequest(c.config.Client, apiURL, c.config.MaxRetries, c.config.Timeout)
@@ -70,11 +66,9 @@ func (c *Client) Fetch(ctx context.Context, domain string, results chan string)
7066
}
7167

7268
// output results
73-
for i, entry := range result {
74-
// Skip first result by default
75-
if i != 0 {
76-
results <- entry[0]
77-
}
69+
// Slicing as [1:] to skip first result by default
70+
for _, entry := range result[1:] {
71+
results <- entry[0]
7872
}
7973
}
8074
}

0 commit comments

Comments
 (0)