Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added Go version of getweblinks getLinks function #68

Merged
merged 4 commits into from
Mar 13, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
81 changes: 81 additions & 0 deletions modules/go_modules/getLinks.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
package main

import "C"

import (
"fmt"
"golang.org/x/net/html"
"log"
"net/http"
"net/url"
"regexp"
"sync"
"time"
)

func validOnionUrl(url string) bool {
pattern := `^https?:\/\/(www\.)?([a-z,A-Z,0-9]*)\.onion/(.*)`
re := regexp.MustCompile(pattern)
return re.Match([]byte(url))
}

func asyncHead(client *http.Client, link string) {
defer wg.Done()
resp, err := client.Get(link)
if err == nil && resp.StatusCode < 400 {
fmt.Printf("%v is reachable.\n", link)
} else {
fmt.Printf("%v is not reachable.\n", link)
}
}

var wg sync.WaitGroup

//export GetLinks
func GetLinks(searchUrl string, addr string, port string, timeout int) {
var torProxy string = "socks5://" + addr + ":" + port
torProxyUrl, err := url.Parse(torProxy)
if err != nil {
log.Fatal("Error parsing URL: ", err)
}
torTransport := &http.Transport{Proxy: http.ProxyURL(torProxyUrl)}
client := &http.Client{Transport: torTransport, Timeout: time.Second * time.Duration(timeout)}
resp, err := client.Get(searchUrl)
if err != nil {
log.Fatal("Error with GET request", err)
}
defer resp.Body.Close()
bytes := resp.Body
tokenizer := html.NewTokenizer(bytes)
found_urls := make([]string, 0)
for not_end := true; not_end; {
currentTokenType := tokenizer.Next()
switch {
case currentTokenType == html.ErrorToken:
not_end = false
case currentTokenType == html.StartTagToken:
token := tokenizer.Token()
if token.Data == "a" {
attributes := token.Attr
for i := 0; i < len(attributes); i++ {
if attributes[i].Key == "href" && validOnionUrl(attributes[i].Val) {
found_urls = append(found_urls, attributes[i].Val)
}
}
}
}
}
fmt.Printf("Number of URLs found: %v\n", len(found_urls))
for _, link := range found_urls {
_, err := url.ParseRequestURI(link)
if err != nil {
continue
}
wg.Add(1)
go asyncHead(client, link)
}
wg.Wait()
}

func main() {
}
18 changes: 18 additions & 0 deletions modules/go_modules/getLinks.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
from ctypes import cdll, c_char_p, c_longlong, c_int, Structure
import sys

goBot = cdll.LoadLibrary("./goBot.so")


class GoString(Structure):
_fields_ = [("p", c_char_p), ("n", c_longlong)]


goBot.GetLinks.argtypes = [GoString, GoString, GoString, c_int]
url = sys.argv[1].encode('utf-8')
addr = b"127.0.0.1"
port = b"9050"
goBot.GetLinks(GoString(url, len(url)),
GoString(addr, len(addr)),
GoString(port, len(port)),
15)
60 changes: 60 additions & 0 deletions modules/go_modules/goBot.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
/* Created by "go tool cgo" - DO NOT EDIT. */

/* package command-line-arguments */

/* Start of preamble from import "C" comments. */




/* End of preamble from import "C" comments. */


/* Start of boilerplate cgo prologue. */
#line 1 "cgo-gcc-export-header-prolog"

#ifndef GO_CGO_PROLOGUE_H
#define GO_CGO_PROLOGUE_H

typedef signed char GoInt8;
typedef unsigned char GoUint8;
typedef short GoInt16;
typedef unsigned short GoUint16;
typedef int GoInt32;
typedef unsigned int GoUint32;
typedef long long GoInt64;
typedef unsigned long long GoUint64;
typedef GoInt64 GoInt;
typedef GoUint64 GoUint;
typedef __SIZE_TYPE__ GoUintptr;
typedef float GoFloat32;
typedef double GoFloat64;
typedef float _Complex GoComplex64;
typedef double _Complex GoComplex128;

/*
static assertion to make sure the file is being used on architecture
at least with matching size of GoInt.
*/
typedef char _check_for_64_bit_pointer_matching_GoInt[sizeof(void*)==64/8 ? 1:-1];

typedef struct { const char *p; GoInt n; } GoString;
typedef void *GoMap;
typedef void *GoChan;
typedef struct { void *t; void *v; } GoInterface;
typedef struct { void *data; GoInt len; GoInt cap; } GoSlice;

#endif

/* End of boilerplate cgo prologue. */

#ifdef __cplusplus
extern "C" {
#endif


extern void GetLinks(GoString p0, GoString p1, GoString p2, GoInt p3);

#ifdef __cplusplus
}
#endif
Binary file added modules/go_modules/goBot.so
Binary file not shown.