Skip to content
Merged
Show file tree
Hide file tree
Changes from 9 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 28 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -418,6 +418,7 @@ TruffleHog has a sub-command for each source of data that you may want to scan:
- jenkins
- elasticsearch
- stdin
- multi-scan

Each subcommand can have options that you can see with the `--help` flag provided to the sub command:

Expand Down Expand Up @@ -481,6 +482,33 @@ For example, to scan a `git` repository, start with
trufflehog git https://github.com/trufflesecurity/trufflehog.git
```

## Configuration

TruffleHog supports defining [custom regex detectors](#regex-detector-alpha)
and multiple sources in a configuration file provided via the `--config` flag.
The regex detectors can be used with any subcommand, while the sources defined
in configuration are only for the `multi-scan` subcommand.

The configuration format for sources can be found on Truffle Security's
[source configuration documentation page](https://docs.trufflesecurity.com/scan-data-for-secrets).

Example GitHub source configuration and [options reference](https://docs.trufflesecurity.com/github#Fvm1I):

```yaml
sources:
- connection:
'@type': type.googleapis.com/sources.GitHub
repositories:
- https://github.com/trufflesecurity/test_keys.git
unauthenticated: {}
name: example config scan
type: SOURCE_TYPE_GITHUB
verify: true
```

You may define multiple connections under the `sources` key (see above), and
TruffleHog will scan all of the sources concurrently.

## S3

The S3 source supports assuming IAM roles for scanning in addition to IAM users. This makes it easier for users to scan multiple AWS accounts without needing to rely on hardcoded credentials for each account.
Expand Down
19 changes: 16 additions & 3 deletions main.go
Original file line number Diff line number Diff line change
Expand Up @@ -252,7 +252,8 @@ var (
huggingfaceIncludeDiscussions = huggingfaceScan.Flag("include-discussions", "Include discussions in scan.").Bool()
huggingfaceIncludePrs = huggingfaceScan.Flag("include-prs", "Include pull requests in scan.").Bool()

scanScan = cli.Command("scan", "Find credentials in multiple sources defined in configuration.")
stdinInputScan = cli.Command("stdin", "Find credentials from stdin.")
multiScanScan = cli.Command("multi-scan", "Find credentials in multiple sources defined in configuration.")

analyzeCmd = analyzer.Command(cli)
usingTUI = false
Expand Down Expand Up @@ -539,6 +540,16 @@ func run(state overseer.State) {
engConf.VerificationResultCache = simple.NewCache[detectors.Result]()
}

// Check that there are no sources defined for non-scan subcommands. If
// there are, return an error as it is ambiguous what the user is
// trying to do.
if cmd != multiScanScan.FullCommand() && len(conf.Sources) > 0 {
logFatal(
fmt.Errorf("ambiguous configuration"),
"sources should only be defined in configuration for the 'multi-scan' command",
)
}

if *compareDetectionStrategies {
if err := compareScans(ctx, cmd, engConf); err != nil {
logFatal(err, "error comparing detection strategies")
Expand Down Expand Up @@ -975,7 +986,7 @@ func runSingleScan(ctx context.Context, cmd string, cfg engine.Config) (metrics,
} else {
refs = append(refs, ref)
}
case scanScan.FullCommand():
case multiScanScan.FullCommand():
if *configFilename == "" {
return scanMetrics, fmt.Errorf("missing required flag: --config")
}
Expand All @@ -986,8 +997,10 @@ func runSingleScan(ctx context.Context, cmd string, cfg engine.Config) (metrics,
}
case stdinInputScan.FullCommand():
cfg := sources.StdinConfig{}
if ref, err = eng.ScanStdinInput(ctx, cfg); err != nil {
if ref, err := eng.ScanStdinInput(ctx, cfg); err != nil {
return scanMetrics, fmt.Errorf("failed to scan stdin input: %v", err)
} else {
refs = append(refs, ref)
}
default:
return scanMetrics, fmt.Errorf("invalid command: %s", cmd)
Expand Down
7 changes: 3 additions & 4 deletions pkg/config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ import (

// Config holds user supplied configuration.
type Config struct {
Sources []sources.ConfigurableSource
Sources []sources.ConfiguredSource
Detectors []detectors.Detector
}

Expand Down Expand Up @@ -55,14 +55,13 @@ func NewYAML(input []byte) (*Config, error) {
}

// Convert to configured sources.
var sourceConfigs []sources.ConfigurableSource
var sourceConfigs []sources.ConfiguredSource
for _, pbSource := range inputYAML.Sources {
s, err := instantiateSourceFromType(pbSource.GetType())
if err != nil {
return nil, err
}
src := sources.NewConfigurableSource(s)
src.Configure(pbSource)
src := sources.NewConfiguredSource(s, pbSource)

sourceConfigs = append(sourceConfigs, src)
}
Expand Down
13 changes: 12 additions & 1 deletion pkg/engine/engine.go
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,7 @@ type Config struct {
// also serves as a multiplier for other worker types (e.g., detector workers, notifier workers)
Concurrency int

ConfiguredSources []sources.ConfigurableSource
ConfiguredSources []sources.ConfiguredSource
Decoders []decoders.Decoder
Detectors []detectors.Detector
DetectorVerificationOverrides map[config.DetectorID]bool
Expand Down Expand Up @@ -547,6 +547,17 @@ func (r *verificationOverlapTracker) increment() {

const ignoreTag = "trufflehog:ignore"

// AhoCorasickCoreKeywords returns a set of keywords that the engine's
// AhoCorasickCore is using.
func (e *Engine) AhoCorasickCoreKeywords() map[string]struct{} {
// Turn AhoCorasick keywordsToDetectors into a map of keywords
keywords := make(map[string]struct{})
for key := range e.AhoCorasickCore.KeywordsToDetectors() {
keywords[key] = struct{}{}
}
return keywords
}

// HasFoundResults returns true if any results are found.
func (e *Engine) HasFoundResults() bool {
return atomic.LoadUint32(&e.numFoundResults) > 0
Expand Down
16 changes: 3 additions & 13 deletions pkg/engine/postman.go
Original file line number Diff line number Diff line change
Expand Up @@ -49,22 +49,12 @@ func (e *Engine) ScanPostman(ctx context.Context, c sources.PostmanConfig) (sour
sourceName := "trufflehog - postman"
sourceID, jobID, _ := e.sourceManager.GetIDs(ctx, sourceName, postman.SourceType)

postmanSource := &postman.Source{}
e.setPostmanKeywords(postmanSource)
postmanSource := &postman.Source{
DetectorKeywords: e.AhoCorasickCoreKeywords(),
}

if err := postmanSource.Init(ctx, sourceName, jobID, sourceID, true, &conn, c.Concurrency); err != nil {
return sources.JobProgressRef{}, err
}
return e.sourceManager.EnumerateAndScan(ctx, sourceName, postmanSource)
}

// setPostmanKeywords sets the keywords from the engine's AhoCorasickCore in
// the postman source.
func (e *Engine) setPostmanKeywords(source *postman.Source) {
// Turn AhoCorasick keywordsToDetectors into a map of keywords
keywords := make(map[string]struct{})
for key := range e.AhoCorasickCore.KeywordsToDetectors() {
keywords[key] = struct{}{}
}
source.DetectorKeywords = keywords
}
4 changes: 2 additions & 2 deletions pkg/engine/scan.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ import (
// sources and returns their job references. If there is an error during
// initialization or starting of the scan, an error is returned along with the
// references that successfully started up to that point.
func (e *Engine) ScanConfig(ctx context.Context, configuredSources ...sources.ConfigurableSource) ([]sources.JobProgressRef, error) {
func (e *Engine) ScanConfig(ctx context.Context, configuredSources ...sources.ConfiguredSource) ([]sources.JobProgressRef, error) {
var refs []sources.JobProgressRef
for _, configuredSource := range configuredSources {
sourceID, jobID, _ := e.sourceManager.GetIDs(ctx, configuredSource.Name, configuredSource.SourceType())
Expand All @@ -22,7 +22,7 @@ func (e *Engine) ScanConfig(ctx context.Context, configuredSources ...sources.Co
// Postman needs special initialization to set Keywords from
// the engine.
if postmanSource, ok := source.(*postman.Source); ok {
e.setPostmanKeywords(postmanSource)
postmanSource.DetectorKeywords = e.AhoCorasickCoreKeywords()
}

// Start the scan.
Expand Down
72 changes: 34 additions & 38 deletions pkg/sources/sources.go
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ type Source interface {
SourceID() SourceID
// JobID returns the initialized job ID used for tracking relationships in the DB.
JobID() JobID
// Init initializes the source.
// Init initializes the source. Calling this method more than once is undefined behavior.
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

❤️

Init(aCtx context.Context, name string, jobId JobID, sourceId SourceID, verify bool, connection *anypb.Any, concurrency int) error
// Chunks emits data over a channel which is then decoded and scanned for secrets.
// By default, data is obtained indiscriminately. However, by providing one or more
Expand Down Expand Up @@ -105,56 +105,52 @@ type SourceUnitEnumerator interface {
Enumerate(ctx context.Context, reporter UnitReporter) error
}

// ConfigurableSource is a Source with most of it's initialization values
// pre-configured and exposes a simplified Init() method. A ConfigurableSource
// can only be configured multiple times but only initialized once.
type ConfigurableSource struct {
Name string
source Source
initFunc func(context.Context, SourceID, JobID) error
}

// NewConfigurableSource wraps an instantiated Source object.
func NewConfigurableSource(s Source) ConfigurableSource {
return ConfigurableSource{source: s}
// ConfiguredSource is a Source with most of it's initialization values
// pre-configured from a [sourcespb.LocalSource] configuration struct. It
// exposes a simplified Init() method and can be only initialized once. This
// struct is not necessary for running sources, but it helps simplify gathering
// all of the necessary information to call the [Source.Init] method.
type ConfiguredSource struct {
Name string
source Source
initParams struct {
verify bool
conn *anypb.Any
concurrency int
}
}

// Configure registers the initialization arguments from the protobuf and sets
// the Name attribute if available in the config.
func (c *ConfigurableSource) Configure(config *sourcespb.LocalSource) {
// Use the configured name if it exists.
if name := config.GetName(); name != "" {
c.Name = name
}
c.initFunc = func(ctx context.Context, sourceID SourceID, jobID JobID) error {
return c.source.Init(
ctx,
config.GetName(),
jobID,
sourceID,
config.GetVerify(),
config.GetConnection(),
runtime.NumCPU(),
)
// NewConfiguredSource pre-configures an instantiated Source object with the
// provided protobuf configuration.
func NewConfiguredSource(s Source, config *sourcespb.LocalSource) ConfiguredSource {
return ConfiguredSource{
Name: config.GetName(),
source: s,
initParams: struct {
verify bool
conn *anypb.Any
concurrency int
}{
verify: config.GetVerify(),
conn: config.GetConnection(),
concurrency: runtime.NumCPU(),
},
}
}

// SourceType exposes the underlying source type.
func (c *ConfigurableSource) SourceType() sourcespb.SourceType {
func (c *ConfiguredSource) SourceType() sourcespb.SourceType {
return c.source.Type()
}

// Init returns the initialized Source. The ConfigurableSource is unusable after
// calling this method.
func (c *ConfigurableSource) Init(ctx context.Context, sourceID SourceID, jobID JobID) (Source, error) {
// Init returns the initialized Source. The ConfiguredSource is unusable after
// calling this method because initializing a [Source] more than once is undefined.
func (c *ConfiguredSource) Init(ctx context.Context, sourceID SourceID, jobID JobID) (Source, error) {
if c.source == nil {
return nil, errors.New("source already initialized")
}
if c.initFunc == nil {
return nil, errors.New("source not configured")
}
err := c.initFunc(ctx, sourceID, jobID)
src := c.source
err := src.Init(ctx, c.Name, jobID, sourceID, c.initParams.verify, c.initParams.conn, c.initParams.concurrency)
c.source = nil
return src, err
}
Expand Down
Loading