Skip to content

Commit

Permalink
feat: v0.2.0 (#147)
Browse files Browse the repository at this point in the history
* feat: v0.2.0

Signed-off-by: Dwi Siswanto <[email protected]>

* docs: update flags

Signed-off-by: Dwi Siswanto <[email protected]>

---------

Signed-off-by: Dwi Siswanto <[email protected]>
  • Loading branch information
dwisiswant0 authored Oct 24, 2024
1 parent 570005a commit 6b64892
Show file tree
Hide file tree
Showing 12 changed files with 257 additions and 78 deletions.
54 changes: 36 additions & 18 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -76,26 +76,44 @@ Simply, galer can be run with:

### Flags

```bash
▶ galer -h
```

![galer](https://user-images.githubusercontent.com/25837540/100824601-0ee53b80-3489-11eb-878d-a58d1ec3489d.jpg)

This will display help for the tool. Here are all the switches it supports.

| **Flag** | **Description** |
|------------------- |----------------------------------------------------------------- |
| -u, --url | Target to fetches _(single target URL or list)_ |
| -e, --extension | Show only certain extensions _(comma-separated, e.g. js,php)_ |
| -c, --concurrency | Concurrency level _(default: 50)_ |
| --same-host | Same host only |
| --same-root | Same root (eTLD+1) only (takes precedence over --same-host) |
| -o, --output | Save fetched URLs output into file |
| -t, --timeout | Maximum time _(seconds)_ allowed for connection _(default: 60)_ |
| -s, --silent | Silent mode _(suppress an errors)_ |
| -v, --verbose | Verbose mode show error details unless you weren't use silent |
| -h, --help | Display its helps |
This will display help for the tool. Here are all the options it supports.

```console
$ galer -h

__ v0.2.0
__ _ _(_ ) __ _ __
/'_ '\/'_' )| | /'__'( '__)
( (_) ( (_| || |( ___| |
'\__ '\__,_(___'\____(_)
( )_) |
\___/' @dwisiswant0

A fast tool to fetch URLs from HTML attributes by crawl-in

Usage:
galer -u [URL|URLs.txt] -o [output.txt]

Options:
-u, --url <URL/FILE> Target to fetches (single target URL or list)
-e, --extension <EXT> Show only certain extensions (comma-separated, e.g. js,php)
-c, --concurrency <N> Concurrency level (default: 50)
-w, --wait <N> Wait N seconds before evaluate (default: 1)
-d, --depth <N> Max. depth for crawling (levels of links to follow)
--same-host Same host only
--same-root Same root (eTLD+1) only (takes precedence over --same-host)
-o, --output <FILE> Save fetched URLs output into file
-T, --template <string> Format for output template (e.g., "{{scheme}}://{{host}}{{path}}")
Valid variables are: "raw_url", "scheme", "user", "username",
"password", "host", "hostname", "port", "path", "raw_path",
"escaped_path", "raw_query", "fragment", "raw_fragment".
-t, --timeout <N> Max. time (seconds) allowed for connection (default: 60)
-s, --silent Silent mode (suppress an errors)
-v, --verbose Verbose mode show error details unless you weren't use silent
-h, --help Display its helps
```

### Examples

Expand Down
5 changes: 4 additions & 1 deletion go.mod
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
module github.com/dwisiswant0/galer

go 1.22.0
go 1.23

toolchain go1.23.1

require (
Expand Down Expand Up @@ -29,5 +30,7 @@ require (
github.com/muesli/reflow v0.3.0 // indirect
github.com/muesli/termenv v0.15.2 // indirect
github.com/rivo/uniseg v0.4.7 // indirect
github.com/valyala/bytebufferpool v1.0.0 // indirect
github.com/valyala/fasttemplate v1.2.2 // indirect
golang.org/x/sys v0.26.0 // indirect
)
4 changes: 4 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,10 @@ github.com/rivo/uniseg v0.4.7 h1:WUdvkW8uEhrYfLC4ZzdpI2ztxP1I582+49Oc5Mq64VQ=
github.com/rivo/uniseg v0.4.7/go.mod h1:FN3SvrM+Zdj16jyLfmOkMNblXMcoc8DfTHruCPUcx88=
github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg=
github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
github.com/valyala/bytebufferpool v1.0.0 h1:GqA5TC/0021Y/b9FG4Oi9Mr3q7XYx6KllzawFIhcdPw=
github.com/valyala/bytebufferpool v1.0.0/go.mod h1:6bBcMArwyJ5K/AmCkWv1jt77kVWyCJ6HpOuEn7z0Csc=
github.com/valyala/fasttemplate v1.2.2 h1:lxLXG0uE3Qnshl9QyaK6XJxMXlQZELvChBOCmQD0Loo=
github.com/valyala/fasttemplate v1.2.2/go.mod h1:KHLXt3tVN2HBp8eijSv/kGJopbvo7S+qRAEEKiv+SiQ=
golang.org/x/exp v0.0.0-20241004190924-225e2abe05e6 h1:1wqE9dj9NpSm04INVsJhhEUzhuDVjbcyKH91sVyPATw=
golang.org/x/exp v0.0.0-20241004190924-225e2abe05e6/go.mod h1:NQtJDoLvd6faHhE7m4T/1IY708gDefGGjR/iUW8yQQ8=
golang.org/x/net v0.30.0 h1:AcW1SDZMkb8IpzCdQUaIq2sP4sZ4zw+55h6ynffypl4=
Expand Down
12 changes: 9 additions & 3 deletions internal/runner/consts.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ package runner

const (
author = "dwisiswant0"
version = "0.1.0"
version = "0.2.0"
banner = `
__ v` + version + `
__ _ _(_ ) __ _ __
Expand All @@ -21,11 +21,17 @@ Usage:
Options:
-u, --url <URL/FILE> Target to fetches (single target URL or list)
-e, --extension <EXT> Show only certain extensions (comma-separated, e.g. js,php)
-c, --concurrency <int> Concurrency level (default: 50)
-c, --concurrency <N> Concurrency level (default: 50)
-w, --wait <N> Wait N seconds before evaluate (default: 1)
-d, --depth <N> Max. depth for crawling (levels of links to follow)
--same-host Same host only
--same-root Same root (eTLD+1) only (takes precedence over --same-host)
-o, --output <FILE> Save fetched URLs output into file
-t, --timeout <int> Maximum time (seconds) allowed for connection (default: 60)
-T, --template <string> Format for output template (e.g., "{{scheme}}://{{host}}{{path}}")
Valid variables are: "raw_url", "scheme", "user", "username",
"password", "host", "hostname", "port", "path", "raw_path",
"escaped_path", "raw_query", "fragment", "raw_fragment".
-t, --timeout <N> Max. time (seconds) allowed for connection (default: 60)
-s, --silent Silent mode (suppress an errors)
-v, --verbose Verbose mode show error details unless you weren't use silent
-h, --help Display its helps
Expand Down
19 changes: 15 additions & 4 deletions internal/runner/parser.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,17 +12,19 @@ import (
// Options will defines its options
type Options struct {
Concurrency int
Timeout int
Depth int
URL string
Ext string
File *os.File
List *bufio.Scanner
Output string
SameHost bool
SameRoot bool
Silent bool
Template string
Timeout int
URL string
Verbose bool
List *bufio.Scanner
File *os.File
Wait int
}

// Parse user given arguments
Expand All @@ -35,6 +37,12 @@ func Parse() *Options {
flag.IntVar(&opt.Concurrency, "concurrency", 50, "")
flag.IntVar(&opt.Concurrency, "c", 50, "")

flag.IntVar(&opt.Wait, "wait", 1, "")
flag.IntVar(&opt.Wait, "w", 1, "")

flag.IntVar(&opt.Depth, "depth", 1, "")
flag.IntVar(&opt.Depth, "d", 1, "")

flag.IntVar(&opt.Timeout, "timeout", 60, "")
flag.IntVar(&opt.Timeout, "t", 60, "")

Expand All @@ -47,6 +55,9 @@ func Parse() *Options {
flag.StringVar(&opt.Output, "output", "", "")
flag.StringVar(&opt.Output, "o", "", "")

flag.StringVar(&opt.Template, "template", "", "")
flag.StringVar(&opt.Template, "T", "", "")

flag.BoolVar(&opt.Silent, "silent", false, "")
flag.BoolVar(&opt.Silent, "s", false, "")

Expand Down
120 changes: 82 additions & 38 deletions internal/runner/runner.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,63 +2,107 @@ package runner

import (
"fmt"
"io"
"os"

"github.com/dwisiswant0/galer/pkg/galer"
"github.com/remeh/sizedwaitgroup"
)

// New to executes galer
func New(opt *Options) {
job := make(chan string)
con := opt.Concurrency
swg := sizedwaitgroup.New(con)
cfg = &galer.Config{
Timeout: opt.Timeout,
SameHost: opt.SameHost,
SameRoot: opt.SameRoot,
type Runner struct {
opt *Options
swg sizedwaitgroup.SizedWaitGroup
urls map[string]bool
galer *galer.Config
}

// New initialize [Runner]
func New(opt *Options) *Runner {
return &Runner{
opt: opt,
swg: sizedwaitgroup.New(opt.Concurrency),
urls: make(map[string]bool),
galer: &galer.Config{
Logger: clog,
SameHost: opt.SameHost,
SameRoot: opt.SameRoot,
Template: opt.Template,
Timeout: opt.Timeout,
Wait: opt.Wait,
},
}
cfg = galer.New(cfg)
}

for i := 0; i < con; i++ {
swg.Add()
// Do runs crawling
func (r *Runner) Do() {
jobs := make(chan string)

for i := 0; i < r.opt.Concurrency; i++ {
r.swg.Add()
go func() {
defer swg.Done()
for URL := range job {
run := opt.run(URL, cfg)
for _, u := range run {
if opt.Ext != "" {
if !opt.isOnExt(u) {
continue
}
}

fmt.Println(u)

if opt.File != nil {
fmt.Fprintf(opt.File, "%s\n", out)
}
}
defer r.swg.Done()
for job := range jobs {
r.galer.SetScope(job)
r.run(job, 1)
}
}()
}

for opt.List.Scan() {
u := opt.List.Text()
job <- u
for r.opt.List.Scan() {
u := r.opt.List.Text()
jobs <- u
}

close(jobs)
r.swg.Wait()
r.galer.Close()

if r.opt.File != nil {
r.opt.File.Close()
}
}

func (r *Runner) run(URL string, counter int) {
cfg := galer.New(r.galer)

var writer io.Writer = os.Stdout
if r.opt.File != nil {
writer = io.MultiWriter(os.Stdout, r.opt.File)
}

close(job)
swg.Wait()
_ = cfg.Close()
for counter <= r.opt.Depth {
crawl := r.crawl(URL, cfg)
if len(crawl) == 0 {
break
}
counter++

var batches []string
for _, u := range crawl {
if !r.urls[u] {
fmt.Fprintf(writer, "%s\n", u)
batches = append(batches, u)
r.urls[u] = true
}
}

if opt.File != nil {
opt.File.Close()
for _, u := range batches {
if r.opt.Ext != "" {
if !r.opt.isOnExt(u) {
continue
}
}

if counter <= r.opt.Depth {
r.run(u, counter+1)
}
}
}
}

func (opt *Options) run(URL string, cfg *galer.Config) []string {
func (r *Runner) crawl(URL string, cfg *galer.Config) []string {
res, err := cfg.Crawl(URL)
if err != nil && !opt.Silent {
if err != nil && opt.Verbose {
clog.Error(err, "url", URL)

return []string{}
Expand Down
2 changes: 1 addition & 1 deletion internal/runner/validator.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ func (opt *Options) validate() error {
opt.List = bufio.NewScanner(f)
}
} else {
return errors.New("No target inputs provided")
return errors.New("no target inputs provided")
}

if opt.Output != "" {
Expand Down
3 changes: 0 additions & 3 deletions internal/runner/vars.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,9 @@ package runner

import (
"github.com/charmbracelet/log"
"github.com/dwisiswant0/galer/pkg/galer"
)

var (
out string
opt *Options
cfg *galer.Config
clog *log.Logger
)
3 changes: 2 additions & 1 deletion main.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,5 +4,6 @@ import "github.com/dwisiswant0/galer/internal/runner"

func main() {
options := runner.Parse()
runner.New(options)
r := runner.New(options)
r.Do()
}
Loading

0 comments on commit 6b64892

Please sign in to comment.