@@ -2,7 +2,9 @@ package wayback
2
2
3
3
import (
4
4
"context"
5
+ "errors"
5
6
"fmt"
7
+
6
8
jsoniter "github.com/json-iterator/go"
7
9
"github.com/lc/gau/v2/pkg/httpclient"
8
10
"github.com/lc/gau/v2/pkg/providers"
@@ -36,12 +38,7 @@ type waybackResult [][]string
36
38
// Fetch fetches all urls for a given domain and sends them to a channel.
37
39
// It returns an error should one occur.
38
40
func (c * Client ) Fetch (ctx context.Context , domain string , results chan string ) error {
39
- pages , err := c .getPagination (domain )
40
- if err != nil {
41
- return fmt .Errorf ("failed to fetch wayback pagination: %s" , err )
42
- }
43
-
44
- for page := uint (0 ); page < pages ; page ++ {
41
+ for page := uint (0 ); ; page ++ {
45
42
select {
46
43
case <- ctx .Done ():
47
44
return nil
@@ -51,9 +48,11 @@ func (c *Client) Fetch(ctx context.Context, domain string, results chan string)
51
48
// make HTTP request
52
49
resp , err := httpclient .MakeRequest (c .config .Client , apiURL , c .config .MaxRetries , c .config .Timeout )
53
50
if err != nil {
51
+ if errors .Is (err , httpclient .ErrBadRequest ) {
52
+ return nil
53
+ }
54
54
return fmt .Errorf ("failed to fetch wayback results page %d: %s" , page , err )
55
55
}
56
-
57
56
var result waybackResult
58
57
if err = jsoniter .Unmarshal (resp , & result ); err != nil {
59
58
return fmt .Errorf ("failed to decode wayback results for page %d: %s" , page , err )
@@ -72,7 +71,6 @@ func (c *Client) Fetch(ctx context.Context, domain string, results chan string)
72
71
}
73
72
}
74
73
}
75
- return nil
76
74
}
77
75
78
76
// formatUrl returns a formatted URL for the Wayback API
@@ -82,25 +80,7 @@ func (c *Client) formatURL(domain string, page uint) string {
82
80
}
83
81
filterParams := c .filters .GetParameters (true )
84
82
return fmt .Sprintf (
85
- "https://web.archive.org/cdx/search/cdx?url=%s/*&output=json&collapse=urlkey&fl=original&page=%d" ,
83
+ "https://web.archive.org/cdx/search/cdx?url=%s/*&output=json&collapse=urlkey&fl=original&pageSize=100& page=%d" ,
86
84
domain , page ,
87
85
) + filterParams
88
86
}
89
-
90
- // getPagination returns the number of pages for Wayback
91
- func (c * Client ) getPagination (domain string ) (uint , error ) {
92
- url := fmt .Sprintf ("%s&showNumPages=true" , c .formatURL (domain , 0 ))
93
- resp , err := httpclient .MakeRequest (c .config .Client , url , c .config .MaxRetries , c .config .Timeout )
94
-
95
- if err != nil {
96
- return 0 , err
97
- }
98
-
99
- var paginationResult uint
100
-
101
- if err = jsoniter .Unmarshal (resp , & paginationResult ); err != nil {
102
- return 0 , err
103
- }
104
-
105
- return paginationResult , nil
106
- }
0 commit comments