Skip to content

Commit

Permalink
make standard request when it's form
Browse files Browse the repository at this point in the history
  • Loading branch information
dogancanbakir committed Jun 6, 2024
1 parent ab8e870 commit 2c59b60
Showing 1 changed file with 94 additions and 0 deletions.
94 changes: 94 additions & 0 deletions pkg/engine/hybrid/crawl.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package hybrid

import (
"bytes"
"context"
"io"
"net/http"
"net/http/httputil"
Expand All @@ -26,6 +27,10 @@ import (
)

func (c *Crawler) navigateRequest(s *common.CrawlSession, request *navigation.Request) (*navigation.Response, error) {
if request.Tag == "form" {
return c.makeStandardRequest(s, request)
}

depth := request.Depth + 1
response := &navigation.Response{
Depth: depth,
Expand Down Expand Up @@ -233,6 +238,95 @@ func (c *Crawler) navigateRequest(s *common.CrawlSession, request *navigation.Re
return response, nil
}

// copy of the makeRequest method from standard/crawl.go
func (c *Crawler) makeStandardRequest(s *common.CrawlSession, request *navigation.Request) (*navigation.Response, error) {
response := &navigation.Response{
Depth: request.Depth + 1,
RootHostname: s.Hostname,
}
ctx := context.WithValue(s.Ctx, navigation.Depth{}, request.Depth)
httpReq, err := http.NewRequestWithContext(ctx, request.Method, request.URL, nil)
if err != nil {
return response, err
}
if request.Body != "" && request.Method != "GET" {
httpReq.Body = io.NopCloser(strings.NewReader(request.Body))
}
req, err := retryablehttp.FromRequest(httpReq)
if err != nil {
return response, err
}
req.Header.Set("User-Agent", utils.WebUserAgent())

// Set the headers for the request.
for k, v := range request.Headers {
req.Header.Set(k, v)
if k == "Host" {
req.Host = v
}
}
for k, v := range c.Headers {
req.Header.Set(k, v)
if k == "Host" {
req.Host = v
}
}

resp, err := s.HttpClient.Do(req)
if resp != nil {
defer func() {
if resp.Body != nil && resp.StatusCode != http.StatusSwitchingProtocols {
_, _ = io.Copy(io.Discard, resp.Body)
}
_ = resp.Body.Close()
}()
}

rawRequestBytes, _ := req.Dump()
request.Raw = string(rawRequestBytes)

if err != nil {
return response, err
}
if resp.StatusCode == http.StatusSwitchingProtocols {
return response, nil
}
limitReader := io.LimitReader(resp.Body, int64(c.Options.Options.BodyReadSize))
data, err := io.ReadAll(limitReader)
if err != nil {
return response, err
}
if !c.Options.UniqueFilter.UniqueContent(data) {
return &navigation.Response{}, nil
}

technologies := c.Options.Wappalyzer.Fingerprint(resp.Header, data)
response.Technologies = mapsutil.GetKeys(technologies)

resp.Body = io.NopCloser(strings.NewReader(string(data)))

response.Body = string(data)
response.Resp = resp
response.Reader, err = goquery.NewDocumentFromReader(bytes.NewReader(data))
response.Reader.Url, _ = url.Parse(request.URL)
response.StatusCode = resp.StatusCode
response.Headers = utils.FlattenHeaders(resp.Header)
if c.Options.Options.FormExtraction {
response.Forms = append(response.Forms, utils.ParseFormFields(response.Reader)...)
}

resp.ContentLength = int64(len(data))

rawResponseBytes, _ := httputil.DumpResponse(resp, true)
response.Raw = string(rawResponseBytes)

if err != nil {
return response, errorutil.NewWithTag("hybrid", "could not make document from reader").Wrap(err)
}

return response, nil
}

func (c *Crawler) addHeadersToPage(page *rod.Page) {
if len(c.Headers) == 0 {
return
Expand Down

0 comments on commit 2c59b60

Please sign in to comment.