forked from projectdiscovery/nuclei
-
Notifications
You must be signed in to change notification settings - Fork 1
/
dedupe.go
113 lines (101 loc) · 2.84 KB
/
dedupe.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
// Package dedupe implements deduplication layer for nuclei-generated
// issues.
//
// The layer can be persisted to leveldb based storage for further use.
package dedupe
import (
"crypto/sha1"
"os"
"reflect"
"unsafe"
"github.com/syndtr/goleveldb/leveldb"
"github.com/syndtr/goleveldb/leveldb/errors"
"github.com/projectdiscovery/nuclei/v2/pkg/output"
"github.com/projectdiscovery/nuclei/v2/pkg/types"
)
// Storage is a duplicate detecting storage for nuclei scan events.
type Storage struct {
temporary string
storage *leveldb.DB
}
// New creates a new duplicate detecting storage for nuclei scan events.
func New(dbPath string) (*Storage, error) {
storage := &Storage{}
var err error
if dbPath == "" {
dbPath, err = os.MkdirTemp("", "nuclei-report-*")
storage.temporary = dbPath
}
if err != nil {
return nil, err
}
storage.storage, err = leveldb.OpenFile(dbPath, nil)
if err != nil {
if !errors.IsCorrupted(err) {
return nil, err
}
// If the metadata is corrupted, try to recover
storage.storage, err = leveldb.RecoverFile(dbPath, nil)
if err != nil {
return nil, err
}
}
return storage, nil
}
// Close closes the storage for further operations
func (s *Storage) Close() {
s.storage.Close()
if s.temporary != "" {
os.RemoveAll(s.temporary)
}
}
// Index indexes an item in storage and returns true if the item
// was unique.
func (s *Storage) Index(result *output.ResultEvent) (bool, error) {
hasher := sha1.New()
if result.TemplateID != "" {
_, _ = hasher.Write(unsafeToBytes(result.TemplateID))
}
if result.MatcherName != "" {
_, _ = hasher.Write(unsafeToBytes(result.MatcherName))
}
if result.ExtractorName != "" {
_, _ = hasher.Write(unsafeToBytes(result.ExtractorName))
}
if result.Type != "" {
_, _ = hasher.Write(unsafeToBytes(result.Type))
}
if result.Host != "" {
_, _ = hasher.Write(unsafeToBytes(result.Host))
}
if result.Matched != "" {
_, _ = hasher.Write(unsafeToBytes(result.Matched))
}
for _, v := range result.ExtractedResults {
_, _ = hasher.Write(unsafeToBytes(v))
}
for k, v := range result.Metadata {
_, _ = hasher.Write(unsafeToBytes(k))
_, _ = hasher.Write(unsafeToBytes(types.ToString(v)))
}
hash := hasher.Sum(nil)
exists, err := s.storage.Has(hash, nil)
if err != nil {
// if we have an error, return with it but mark it as true
// since we don't want to lose an issue considering it a dupe.
return true, err
}
if !exists {
return true, s.storage.Put(hash, nil, nil)
}
return false, err
}
// unsafeToBytes converts a string to byte slice and does it with
// zero allocations.
//
// Reference - https://stackoverflow.com/questions/59209493/how-to-use-unsafe-get-a-byte-slice-from-a-string-without-memory-copy
func unsafeToBytes(data string) []byte {
var buf = *(*[]byte)(unsafe.Pointer(&data))
(*reflect.SliceHeader)(unsafe.Pointer(&buf)).Cap = len(data)
return buf
}