diff --git a/modules/go_modules/getLinks.go b/modules/go_modules/getLinks.go new file mode 100644 index 00000000..1da3949f --- /dev/null +++ b/modules/go_modules/getLinks.go @@ -0,0 +1,81 @@ +package main + +import "C" + +import ( + "fmt" + "golang.org/x/net/html" + "log" + "net/http" + "net/url" + "regexp" + "sync" + "time" +) + +func validOnionUrl(url string) bool { + pattern := `^https?:\/\/(www\.)?([a-z,A-Z,0-9]*)\.onion/(.*)` + re := regexp.MustCompile(pattern) + return re.Match([]byte(url)) +} + +func asyncHead(client *http.Client, link string) { + defer wg.Done() + resp, err := client.Get(link) + if err == nil && resp.StatusCode < 400 { + fmt.Printf("%v is reachable.\n", link) + } else { + fmt.Printf("%v is not reachable.\n", link) + } +} + +var wg sync.WaitGroup + +//export GetLinks +func GetLinks(searchUrl string, addr string, port string, timeout int) { + var torProxy string = "socks5://" + addr + ":" + port + torProxyUrl, err := url.Parse(torProxy) + if err != nil { + log.Fatal("Error parsing URL: ", err) + } + torTransport := &http.Transport{Proxy: http.ProxyURL(torProxyUrl)} + client := &http.Client{Transport: torTransport, Timeout: time.Second * time.Duration(timeout)} + resp, err := client.Get(searchUrl) + if err != nil { + log.Fatal("Error with GET request", err) + } + defer resp.Body.Close() + bytes := resp.Body + tokenizer := html.NewTokenizer(bytes) + found_urls := make([]string, 0) + for not_end := true; not_end; { + currentTokenType := tokenizer.Next() + switch { + case currentTokenType == html.ErrorToken: + not_end = false + case currentTokenType == html.StartTagToken: + token := tokenizer.Token() + if token.Data == "a" { + attributes := token.Attr + for i := 0; i < len(attributes); i++ { + if attributes[i].Key == "href" && validOnionUrl(attributes[i].Val) { + found_urls = append(found_urls, attributes[i].Val) + } + } + } + } + } + fmt.Printf("Number of URLs found: %v\n", len(found_urls)) + for _, link := range found_urls { + _, err := url.ParseRequestURI(link) + if err != nil { + continue + } + wg.Add(1) + go asyncHead(client, link) + } + wg.Wait() +} + +func main() { +} diff --git a/modules/go_modules/getLinks.py b/modules/go_modules/getLinks.py new file mode 100644 index 00000000..9d63e82d --- /dev/null +++ b/modules/go_modules/getLinks.py @@ -0,0 +1,18 @@ +from ctypes import cdll, c_char_p, c_longlong, c_int, Structure +import sys + +goBot = cdll.LoadLibrary("./goBot.so") + + +class GoString(Structure): + _fields_ = [("p", c_char_p), ("n", c_longlong)] + + +goBot.GetLinks.argtypes = [GoString, GoString, GoString, c_int] +url = sys.argv[1].encode('utf-8') +addr = b"127.0.0.1" +port = b"9050" +goBot.GetLinks(GoString(url, len(url)), + GoString(addr, len(addr)), + GoString(port, len(port)), + 15) diff --git a/modules/go_modules/goBot.h b/modules/go_modules/goBot.h new file mode 100644 index 00000000..cdac4b0f --- /dev/null +++ b/modules/go_modules/goBot.h @@ -0,0 +1,60 @@ +/* Created by "go tool cgo" - DO NOT EDIT. */ + +/* package command-line-arguments */ + +/* Start of preamble from import "C" comments. */ + + + + +/* End of preamble from import "C" comments. */ + + +/* Start of boilerplate cgo prologue. */ +#line 1 "cgo-gcc-export-header-prolog" + +#ifndef GO_CGO_PROLOGUE_H +#define GO_CGO_PROLOGUE_H + +typedef signed char GoInt8; +typedef unsigned char GoUint8; +typedef short GoInt16; +typedef unsigned short GoUint16; +typedef int GoInt32; +typedef unsigned int GoUint32; +typedef long long GoInt64; +typedef unsigned long long GoUint64; +typedef GoInt64 GoInt; +typedef GoUint64 GoUint; +typedef __SIZE_TYPE__ GoUintptr; +typedef float GoFloat32; +typedef double GoFloat64; +typedef float _Complex GoComplex64; +typedef double _Complex GoComplex128; + +/* + static assertion to make sure the file is being used on architecture + at least with matching size of GoInt. +*/ +typedef char _check_for_64_bit_pointer_matching_GoInt[sizeof(void*)==64/8 ? 1:-1]; + +typedef struct { const char *p; GoInt n; } GoString; +typedef void *GoMap; +typedef void *GoChan; +typedef struct { void *t; void *v; } GoInterface; +typedef struct { void *data; GoInt len; GoInt cap; } GoSlice; + +#endif + +/* End of boilerplate cgo prologue. */ + +#ifdef __cplusplus +extern "C" { +#endif + + +extern void GetLinks(GoString p0, GoString p1, GoString p2, GoInt p3); + +#ifdef __cplusplus +} +#endif diff --git a/modules/go_modules/goBot.so b/modules/go_modules/goBot.so new file mode 100644 index 00000000..8bc358ac Binary files /dev/null and b/modules/go_modules/goBot.so differ