-
-
Notifications
You must be signed in to change notification settings - Fork 546
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #68 from KingAkeem/goBot
Added Go version of getweblinks getLinks function
- Loading branch information
Showing
4 changed files
with
159 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,81 @@ | ||
package main | ||
|
||
import "C" | ||
|
||
import ( | ||
"fmt" | ||
"golang.org/x/net/html" | ||
"log" | ||
"net/http" | ||
"net/url" | ||
"regexp" | ||
"sync" | ||
"time" | ||
) | ||
|
||
func validOnionUrl(url string) bool { | ||
pattern := `^https?:\/\/(www\.)?([a-z,A-Z,0-9]*)\.onion/(.*)` | ||
re := regexp.MustCompile(pattern) | ||
return re.Match([]byte(url)) | ||
} | ||
|
||
func asyncHead(client *http.Client, link string) { | ||
defer wg.Done() | ||
resp, err := client.Get(link) | ||
if err == nil && resp.StatusCode < 400 { | ||
fmt.Printf("%v is reachable.\n", link) | ||
} else { | ||
fmt.Printf("%v is not reachable.\n", link) | ||
} | ||
} | ||
|
||
var wg sync.WaitGroup | ||
|
||
//export GetLinks | ||
func GetLinks(searchUrl string, addr string, port string, timeout int) { | ||
var torProxy string = "socks5://" + addr + ":" + port | ||
torProxyUrl, err := url.Parse(torProxy) | ||
if err != nil { | ||
log.Fatal("Error parsing URL: ", err) | ||
} | ||
torTransport := &http.Transport{Proxy: http.ProxyURL(torProxyUrl)} | ||
client := &http.Client{Transport: torTransport, Timeout: time.Second * time.Duration(timeout)} | ||
resp, err := client.Get(searchUrl) | ||
if err != nil { | ||
log.Fatal("Error with GET request", err) | ||
} | ||
defer resp.Body.Close() | ||
bytes := resp.Body | ||
tokenizer := html.NewTokenizer(bytes) | ||
found_urls := make([]string, 0) | ||
for not_end := true; not_end; { | ||
currentTokenType := tokenizer.Next() | ||
switch { | ||
case currentTokenType == html.ErrorToken: | ||
not_end = false | ||
case currentTokenType == html.StartTagToken: | ||
token := tokenizer.Token() | ||
if token.Data == "a" { | ||
attributes := token.Attr | ||
for i := 0; i < len(attributes); i++ { | ||
if attributes[i].Key == "href" && validOnionUrl(attributes[i].Val) { | ||
found_urls = append(found_urls, attributes[i].Val) | ||
} | ||
} | ||
} | ||
} | ||
} | ||
fmt.Printf("Number of URLs found: %v\n", len(found_urls)) | ||
for _, link := range found_urls { | ||
_, err := url.ParseRequestURI(link) | ||
if err != nil { | ||
continue | ||
} | ||
wg.Add(1) | ||
go asyncHead(client, link) | ||
} | ||
wg.Wait() | ||
} | ||
|
||
func main() { | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,18 @@ | ||
from ctypes import cdll, c_char_p, c_longlong, c_int, Structure | ||
import sys | ||
|
||
goBot = cdll.LoadLibrary("./goBot.so") | ||
|
||
|
||
class GoString(Structure): | ||
_fields_ = [("p", c_char_p), ("n", c_longlong)] | ||
|
||
|
||
goBot.GetLinks.argtypes = [GoString, GoString, GoString, c_int] | ||
url = sys.argv[1].encode('utf-8') | ||
addr = b"127.0.0.1" | ||
port = b"9050" | ||
goBot.GetLinks(GoString(url, len(url)), | ||
GoString(addr, len(addr)), | ||
GoString(port, len(port)), | ||
15) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,60 @@ | ||
/* Created by "go tool cgo" - DO NOT EDIT. */ | ||
|
||
/* package command-line-arguments */ | ||
|
||
/* Start of preamble from import "C" comments. */ | ||
|
||
|
||
|
||
|
||
/* End of preamble from import "C" comments. */ | ||
|
||
|
||
/* Start of boilerplate cgo prologue. */ | ||
#line 1 "cgo-gcc-export-header-prolog" | ||
|
||
#ifndef GO_CGO_PROLOGUE_H | ||
#define GO_CGO_PROLOGUE_H | ||
|
||
typedef signed char GoInt8; | ||
typedef unsigned char GoUint8; | ||
typedef short GoInt16; | ||
typedef unsigned short GoUint16; | ||
typedef int GoInt32; | ||
typedef unsigned int GoUint32; | ||
typedef long long GoInt64; | ||
typedef unsigned long long GoUint64; | ||
typedef GoInt64 GoInt; | ||
typedef GoUint64 GoUint; | ||
typedef __SIZE_TYPE__ GoUintptr; | ||
typedef float GoFloat32; | ||
typedef double GoFloat64; | ||
typedef float _Complex GoComplex64; | ||
typedef double _Complex GoComplex128; | ||
|
||
/* | ||
static assertion to make sure the file is being used on architecture | ||
at least with matching size of GoInt. | ||
*/ | ||
typedef char _check_for_64_bit_pointer_matching_GoInt[sizeof(void*)==64/8 ? 1:-1]; | ||
|
||
typedef struct { const char *p; GoInt n; } GoString; | ||
typedef void *GoMap; | ||
typedef void *GoChan; | ||
typedef struct { void *t; void *v; } GoInterface; | ||
typedef struct { void *data; GoInt len; GoInt cap; } GoSlice; | ||
|
||
#endif | ||
|
||
/* End of boilerplate cgo prologue. */ | ||
|
||
#ifdef __cplusplus | ||
extern "C" { | ||
#endif | ||
|
||
|
||
extern void GetLinks(GoString p0, GoString p1, GoString p2, GoInt p3); | ||
|
||
#ifdef __cplusplus | ||
} | ||
#endif |
Binary file not shown.