Skip to content

Commit eb99c7c

Browse files
authored
Merge pull request #138 from lc/lc/fix-wayback
fix(wayback): wayback API broke gau
2 parents d556483 + 69833dd commit eb99c7c

File tree

9 files changed

+34
-51
lines changed

9 files changed

+34
-51
lines changed

cmd/gau/main.go

+5-9
Original file line numberDiff line numberDiff line change
@@ -31,10 +31,10 @@ func main() {
3131

3232
results := make(chan string)
3333

34-
var out = os.Stdout
34+
out := os.Stdout
3535
// Handle results in background
3636
if config.Output != "" {
37-
out, err = os.OpenFile(config.Output, os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0644)
37+
out, err = os.OpenFile(config.Output, os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0o644)
3838
if err != nil {
3939
log.Fatalf("Could not open output file: %v\n", err)
4040
}
@@ -54,7 +54,6 @@ func main() {
5454

5555
workChan := make(chan runner.Work)
5656
gau.Start(workChan, results)
57-
5857
domains := flags.Args()
5958
if len(domains) > 0 {
6059
for _, provider := range gau.Providers {
@@ -67,15 +66,12 @@ func main() {
6766
for _, provider := range gau.Providers {
6867
for sc.Scan() {
6968
workChan <- runner.NewWork(sc.Text(), provider)
70-
71-
if err := sc.Err(); err != nil {
72-
log.Fatal(err)
73-
}
69+
}
70+
if err := sc.Err(); err != nil {
71+
log.Fatal(err)
7472
}
7573
}
76-
7774
}
78-
7975
close(workChan)
8076

8177
// wait for providers to fetch URLS

pkg/httpclient/client.go

+10-6
Original file line numberDiff line numberDiff line change
@@ -2,13 +2,17 @@ package httpclient
22

33
import (
44
"errors"
5-
"github.com/valyala/fasthttp"
65
"math/rand"
76
"time"
7+
8+
"github.com/valyala/fasthttp"
89
)
910

10-
var ErrNilResponse = errors.New("unexpected nil response")
11-
var ErrNon200Response = errors.New("API responded with non-200 status code")
11+
var (
12+
ErrNilResponse = errors.New("unexpected nil response")
13+
ErrNon200Response = errors.New("API responded with non-200 status code")
14+
ErrBadRequest = errors.New("API responded with 400 status code")
15+
)
1216

1317
type Header struct {
1418
Key string
@@ -54,9 +58,11 @@ func doReq(c *fasthttp.Client, req *fasthttp.Request, timeout uint) ([]byte, err
5458
return nil, err
5559
}
5660
if resp.StatusCode() != 200 {
61+
if resp.StatusCode() == 400 {
62+
return nil, ErrBadRequest
63+
}
5764
return nil, ErrNon200Response
5865
}
59-
6066
if resp.Body() == nil {
6167
return nil, ErrNilResponse
6268
}
@@ -79,9 +85,7 @@ func getUserAgent() string {
7985
"Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; Trident/6.0)",
8086
}
8187

82-
rand.Seed(time.Now().UnixNano())
8388
randomIndex := rand.Intn(len(payload))
84-
8589
pick := payload[randomIndex]
8690

8791
return pick

pkg/output/output.go

+4-3
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,14 @@
11
package output
22

33
import (
4-
mapset "github.com/deckarep/golang-set/v2"
5-
jsoniter "github.com/json-iterator/go"
6-
"github.com/valyala/bytebufferpool"
74
"io"
85
"net/url"
96
"path"
107
"strings"
8+
9+
mapset "github.com/deckarep/golang-set/v2"
10+
jsoniter "github.com/json-iterator/go"
11+
"github.com/valyala/bytebufferpool"
1112
)
1213

1314
type JSONResult struct {

pkg/providers/otx/otx.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ package otx
33
import (
44
"context"
55
"fmt"
6+
67
"github.com/bobesa/go-domain-util/domainutil"
78
jsoniter "github.com/json-iterator/go"
89
"github.com/lc/gau/v2/pkg/httpclient"
@@ -84,7 +85,6 @@ func (c *Client) formatURL(domain string, page uint) string {
8485
}
8586

8687
return fmt.Sprintf("%sapi/v1/indicators/%s/%s/url_list?limit=100&page=%d", _BaseURL, category, domain, page)
87-
8888
}
8989

9090
var _BaseURL = "https://otx.alienvault.com/"

pkg/providers/providers.go

+1
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ package providers
22

33
import (
44
"context"
5+
56
mapset "github.com/deckarep/golang-set/v2"
67
"github.com/valyala/fasthttp"
78
)

pkg/providers/urlscan/types.go

+1-3
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,7 @@ import (
44
"strings"
55
)
66

7-
var (
8-
_BaseURL = "https://urlscan.io/"
9-
)
7+
var _BaseURL = "https://urlscan.io/"
108

119
type apiResponse struct {
1210
Status int `json:"status"`

pkg/providers/urlscan/urlscan.go

+3-1
Original file line numberDiff line numberDiff line change
@@ -4,11 +4,12 @@ import (
44
"bytes"
55
"context"
66
"fmt"
7+
"strings"
8+
79
jsoniter "github.com/json-iterator/go"
810
"github.com/lc/gau/v2/pkg/httpclient"
911
"github.com/lc/gau/v2/pkg/providers"
1012
"github.com/sirupsen/logrus"
11-
"strings"
1213
)
1314

1415
const (
@@ -30,6 +31,7 @@ func New(c *providers.Config) *Client {
3031
func (c *Client) Name() string {
3132
return Name
3233
}
34+
3335
func (c *Client) Fetch(ctx context.Context, domain string, results chan string) error {
3436
var searchAfter string
3537
var header httpclient.Header

pkg/providers/wayback/wayback.go

+7-27
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,9 @@ package wayback
22

33
import (
44
"context"
5+
"errors"
56
"fmt"
7+
68
jsoniter "github.com/json-iterator/go"
79
"github.com/lc/gau/v2/pkg/httpclient"
810
"github.com/lc/gau/v2/pkg/providers"
@@ -36,12 +38,7 @@ type waybackResult [][]string
3638
// Fetch fetches all urls for a given domain and sends them to a channel.
3739
// It returns an error should one occur.
3840
func (c *Client) Fetch(ctx context.Context, domain string, results chan string) error {
39-
pages, err := c.getPagination(domain)
40-
if err != nil {
41-
return fmt.Errorf("failed to fetch wayback pagination: %s", err)
42-
}
43-
44-
for page := uint(0); page < pages; page++ {
41+
for page := uint(0); ; page++ {
4542
select {
4643
case <-ctx.Done():
4744
return nil
@@ -51,9 +48,11 @@ func (c *Client) Fetch(ctx context.Context, domain string, results chan string)
5148
// make HTTP request
5249
resp, err := httpclient.MakeRequest(c.config.Client, apiURL, c.config.MaxRetries, c.config.Timeout)
5350
if err != nil {
51+
if errors.Is(err, httpclient.ErrBadRequest) {
52+
return nil
53+
}
5454
return fmt.Errorf("failed to fetch wayback results page %d: %s", page, err)
5555
}
56-
5756
var result waybackResult
5857
if err = jsoniter.Unmarshal(resp, &result); err != nil {
5958
return fmt.Errorf("failed to decode wayback results for page %d: %s", page, err)
@@ -72,7 +71,6 @@ func (c *Client) Fetch(ctx context.Context, domain string, results chan string)
7271
}
7372
}
7473
}
75-
return nil
7674
}
7775

7876
// formatUrl returns a formatted URL for the Wayback API
@@ -82,25 +80,7 @@ func (c *Client) formatURL(domain string, page uint) string {
8280
}
8381
filterParams := c.filters.GetParameters(true)
8482
return fmt.Sprintf(
85-
"https://web.archive.org/cdx/search/cdx?url=%s/*&output=json&collapse=urlkey&fl=original&page=%d",
83+
"https://web.archive.org/cdx/search/cdx?url=%s/*&output=json&collapse=urlkey&fl=original&pageSize=100&page=%d",
8684
domain, page,
8785
) + filterParams
8886
}
89-
90-
// getPagination returns the number of pages for Wayback
91-
func (c *Client) getPagination(domain string) (uint, error) {
92-
url := fmt.Sprintf("%s&showNumPages=true", c.formatURL(domain, 0))
93-
resp, err := httpclient.MakeRequest(c.config.Client, url, c.config.MaxRetries, c.config.Timeout)
94-
95-
if err != nil {
96-
return 0, err
97-
}
98-
99-
var paginationResult uint
100-
101-
if err = jsoniter.Unmarshal(resp, &paginationResult); err != nil {
102-
return 0, err
103-
}
104-
105-
return paginationResult, nil
106-
}

runner/runner.go

+2-1
Original file line numberDiff line numberDiff line change
@@ -3,13 +3,14 @@ package runner
33
import (
44
"context"
55
"fmt"
6+
"sync"
7+
68
"github.com/lc/gau/v2/pkg/providers"
79
"github.com/lc/gau/v2/pkg/providers/commoncrawl"
810
"github.com/lc/gau/v2/pkg/providers/otx"
911
"github.com/lc/gau/v2/pkg/providers/urlscan"
1012
"github.com/lc/gau/v2/pkg/providers/wayback"
1113
"github.com/sirupsen/logrus"
12-
"sync"
1314
)
1415

1516
type Runner struct {

0 commit comments

Comments
 (0)