diff --git a/pkg/catalog/loader/loader.go b/pkg/catalog/loader/loader.go index b1887149d5..9dde530ca3 100644 --- a/pkg/catalog/loader/loader.go +++ b/pkg/catalog/loader/loader.go @@ -7,6 +7,7 @@ import ( "os" "sort" "strings" + "sync" "github.com/logrusorgru/aurora" "github.com/pkg/errors" @@ -88,6 +89,9 @@ type Store struct { logger *gologger.Logger + // parserCacheOnce is used to cache the parser cache result + parserCacheOnce func() *templates.Cache + // NotFoundCallback is called for each not found template // This overrides error handling for not found templates NotFoundCallback func(template string) bool @@ -155,6 +159,18 @@ func New(cfg *Config) (*Store, error) { logger: cfg.Logger, } + store.parserCacheOnce = sync.OnceValue(func() *templates.Cache { + if cfg.ExecutorOptions == nil || cfg.ExecutorOptions.Parser == nil { + return nil + } + + if parser, ok := cfg.ExecutorOptions.Parser.(*templates.Parser); ok { + return parser.Cache() + } + + return nil + }) + // Do a check to see if we have URLs in templates flag, if so // we need to processs them separately and remove them from the initial list var templatesFinal []string @@ -310,11 +326,11 @@ func (store *Store) LoadTemplatesOnlyMetadata() error { store.logger.Warning().Msg(err.Error()) } } - parserItem, ok := store.config.ExecutorOptions.Parser.(*templates.Parser) - if !ok { + + templatesCache := store.parserCacheOnce() + if templatesCache == nil { return errors.New("invalid parser") } - templatesCache := parserItem.Cache() loadedTemplateIDs := mapsutil.NewSyncLockMap[string, struct{}]() @@ -386,6 +402,7 @@ func (store *Store) areTemplatesValid(filteredTemplatePaths map[string]struct{}) func (store *Store) areWorkflowOrTemplatesValid(filteredTemplatePaths map[string]struct{}, isWorkflow bool, load func(templatePath string, tagFilter *templates.TagFilter) (bool, error)) bool { areTemplatesValid := true + parsedCache := store.parserCacheOnce() for templatePath := range filteredTemplatePaths { if _, err := load(templatePath, store.tagFilter); err != nil { @@ -395,13 +412,26 @@ func (store *Store) areWorkflowOrTemplatesValid(filteredTemplatePaths map[string } } - template, err := templates.Parse(templatePath, store.preprocessor, store.config.ExecutorOptions) - if err != nil { - if isParsingError(store, "Error occurred parsing template %s: %s\n", templatePath, err) { - areTemplatesValid = false - continue + var template *templates.Template + var err error + + if parsedCache != nil { + if cachedTemplate, _, cacheErr := parsedCache.Has(templatePath); cacheErr == nil && cachedTemplate != nil { + template = cachedTemplate + } + } + + if template == nil { + template, err = templates.Parse(templatePath, store.preprocessor, store.config.ExecutorOptions) + if err != nil { + if isParsingError(store, "Error occurred parsing template %s: %s\n", templatePath, err) { + areTemplatesValid = false + continue + } } - } else if template == nil { + } + + if template == nil { // NOTE(dwisiswant0): possibly global matchers template. // This could definitely be handled better, for example by returning an // `ErrGlobalMatchersTemplate` during `templates.Parse` and checking it diff --git a/pkg/catalog/loader/loader_bench_test.go b/pkg/catalog/loader/loader_bench_test.go new file mode 100644 index 0000000000..079e928ad5 --- /dev/null +++ b/pkg/catalog/loader/loader_bench_test.go @@ -0,0 +1,43 @@ +package loader_test + +import ( + "testing" + + "github.com/projectdiscovery/gologger" + "github.com/projectdiscovery/nuclei/v3/pkg/catalog/config" + "github.com/projectdiscovery/nuclei/v3/pkg/catalog/disk" + "github.com/projectdiscovery/nuclei/v3/pkg/catalog/loader" + "github.com/projectdiscovery/nuclei/v3/pkg/loader/workflow" + "github.com/projectdiscovery/nuclei/v3/pkg/templates" + "github.com/projectdiscovery/nuclei/v3/pkg/testutils" +) + +func BenchmarkStoreValidateTemplates(b *testing.B) { + options := testutils.DefaultOptions.Copy() + options.Logger = &gologger.Logger{} + testutils.Init(options) + + catalog := disk.NewCatalog(config.DefaultConfig.TemplatesDirectory) + executerOpts := testutils.NewMockExecuterOptions(options, nil) + executerOpts.Parser = templates.NewParser() + + workflowLoader, err := workflow.NewLoader(executerOpts) + if err != nil { + b.Fatalf("could not create workflow loader: %s", err) + } + executerOpts.WorkflowLoader = workflowLoader + + loaderCfg := loader.NewConfig(options, catalog, executerOpts) + + store, err := loader.New(loaderCfg) + if err != nil { + b.Fatalf("could not create store: %s", err) + } + + b.ResetTimer() + b.ReportAllocs() + + for b.Loop() { + _ = store.ValidateTemplates() + } +} diff --git a/pkg/templates/compile.go b/pkg/templates/compile.go index a9730043ed..e6563b59e0 100644 --- a/pkg/templates/compile.go +++ b/pkg/templates/compile.go @@ -47,13 +47,138 @@ func init() { SignatureStats[Unsigned] = &atomic.Uint64{} } +// updateRequestOptions updates options for all request types in a template +func updateRequestOptions(template *Template) { + for i, r := range template.RequestsDNS { + rCopy := *r + rCopy.UpdateOptions(template.Options) + template.RequestsDNS[i] = &rCopy + } + for i, r := range template.RequestsHTTP { + rCopy := *r + rCopy.UpdateOptions(template.Options) + template.RequestsHTTP[i] = &rCopy + } + for i, r := range template.RequestsCode { + rCopy := *r + rCopy.UpdateOptions(template.Options) + template.RequestsCode[i] = &rCopy + } + for i, r := range template.RequestsFile { + rCopy := *r + rCopy.UpdateOptions(template.Options) + template.RequestsFile[i] = &rCopy + } + for i, r := range template.RequestsHeadless { + rCopy := *r + rCopy.UpdateOptions(template.Options) + template.RequestsHeadless[i] = &rCopy + } + for i, r := range template.RequestsNetwork { + rCopy := *r + rCopy.UpdateOptions(template.Options) + template.RequestsNetwork[i] = &rCopy + } + for i, r := range template.RequestsJavascript { + rCopy := *r + rCopy.UpdateOptions(template.Options) + template.RequestsJavascript[i] = &rCopy + } + for i, r := range template.RequestsSSL { + rCopy := *r + rCopy.UpdateOptions(template.Options) + template.RequestsSSL[i] = &rCopy + } + for i, r := range template.RequestsWHOIS { + rCopy := *r + rCopy.UpdateOptions(template.Options) + template.RequestsWHOIS[i] = &rCopy + } + for i, r := range template.RequestsWebsocket { + rCopy := *r + rCopy.UpdateOptions(template.Options) + template.RequestsWebsocket[i] = &rCopy + } +} + +// parseFromSource parses a template from source with caching support +func parseFromSource(filePath string, preprocessor Preprocessor, options *protocols.ExecutorOptions, parser *Parser) (*Template, error) { + var reader io.ReadCloser + if !options.DoNotCache { + _, raw, err := parser.parsedTemplatesCache.Has(filePath) + if err == nil && raw != nil { + reader = io.NopCloser(bytes.NewReader(raw)) + } + } + + var err error + if reader == nil { + reader, err = utils.ReaderFromPathOrURL(filePath, options.Catalog) + if err != nil { + return nil, err + } + } + + defer func() { + _ = reader.Close() + }() + + options = options.Copy() + options.TemplatePath = filePath + + template, err := ParseTemplateFromReader(reader, preprocessor, options) + if err != nil { + return nil, err + } + + if template.isGlobalMatchersEnabled() { + item := &globalmatchers.Item{ + TemplateID: template.ID, + TemplatePath: filePath, + TemplateInfo: template.Info, + } + + for _, request := range template.RequestsHTTP { + item.Operators = append(item.Operators, request.CompiledOperators) + } + + options.GlobalMatchers.AddOperator(item) + + return nil, nil + } + + // Compile the workflow request + if len(template.Workflows) > 0 { + compiled := &template.Workflow + + compileWorkflow(filePath, preprocessor, options, compiled, options.WorkflowLoader) + template.CompiledWorkflow = compiled + template.CompiledWorkflow.Options = options + } + + template.Path = filePath + if !options.DoNotCache { + parser.compiledTemplatesCache.Store(filePath, template, nil, err) + } + + return template, nil +} + +// getParser returns a cached parser instance +func getParser(options *protocols.ExecutorOptions) *Parser { + parser, ok := options.Parser.(*Parser) + if !ok || parser == nil { + panic("invalid parser") + } + + return parser +} + // Parse parses a yaml request template file // TODO make sure reading from the disk the template parsing happens once: see parsers.ParseTemplate vs templates.Parse func Parse(filePath string, preprocessor Preprocessor, options *protocols.ExecutorOptions) (*Template, error) { - parser, ok := options.Parser.(*Parser) - if !ok { - panic("not a parser") - } + parser := getParser(options) + if !options.DoNotCache { if value, _, _ := parser.compiledTemplatesCache.Has(filePath); value != nil { // Copy the template, apply new options, and recompile requests @@ -68,12 +193,14 @@ func Parse(filePath string, preprocessor Preprocessor, options *protocols.Execut if tplCopy.Options.Variables.Len() > 0 { newBase.Variables = tplCopy.Options.Variables } + if len(tplCopy.Options.Constants) > 0 { newBase.Constants = tplCopy.Options.Constants } - tplCopy.Options = newBase + tplCopy.Options = newBase tplCopy.Options.ApplyNewEngineOptions(options) + if tplCopy.CompiledWorkflow != nil { tplCopy.CompiledWorkflow.Options.ApplyNewEngineOptions(options) for _, w := range tplCopy.CompiledWorkflow.Workflows { @@ -83,69 +210,8 @@ func Parse(filePath string, preprocessor Preprocessor, options *protocols.Execut } } - // TODO: Reconsider whether to recompile requests. Compiling these is just as slow - // as not using a cache at all, but may be necessary. - - for i, r := range tplCopy.RequestsDNS { - rCopy := *r - rCopy.UpdateOptions(tplCopy.Options) - // rCopy.Compile(tplCopy.Options) - tplCopy.RequestsDNS[i] = &rCopy - } - for i, r := range tplCopy.RequestsHTTP { - rCopy := *r - rCopy.UpdateOptions(tplCopy.Options) - // rCopy.Compile(tplCopy.Options) - tplCopy.RequestsHTTP[i] = &rCopy - } - for i, r := range tplCopy.RequestsCode { - rCopy := *r - rCopy.UpdateOptions(tplCopy.Options) - // rCopy.Compile(tplCopy.Options) - tplCopy.RequestsCode[i] = &rCopy - } - for i, r := range tplCopy.RequestsFile { - rCopy := *r - rCopy.UpdateOptions(tplCopy.Options) - // rCopy.Compile(tplCopy.Options) - tplCopy.RequestsFile[i] = &rCopy - } - for i, r := range tplCopy.RequestsHeadless { - rCopy := *r - rCopy.UpdateOptions(tplCopy.Options) - // rCopy.Compile(tplCopy.Options) - tplCopy.RequestsHeadless[i] = &rCopy - } - for i, r := range tplCopy.RequestsNetwork { - rCopy := *r - rCopy.UpdateOptions(tplCopy.Options) - // rCopy.Compile(tplCopy.Options) - tplCopy.RequestsNetwork[i] = &rCopy - } - for i, r := range tplCopy.RequestsJavascript { - rCopy := *r - rCopy.UpdateOptions(tplCopy.Options) - //rCopy.Compile(tplCopy.Options) - tplCopy.RequestsJavascript[i] = &rCopy - } - for i, r := range tplCopy.RequestsSSL { - rCopy := *r - rCopy.UpdateOptions(tplCopy.Options) - // rCopy.Compile(tplCopy.Options) - tplCopy.RequestsSSL[i] = &rCopy - } - for i, r := range tplCopy.RequestsWHOIS { - rCopy := *r - rCopy.UpdateOptions(tplCopy.Options) - // rCopy.Compile(tplCopy.Options) - tplCopy.RequestsWHOIS[i] = &rCopy - } - for i, r := range tplCopy.RequestsWebsocket { - rCopy := *r - rCopy.UpdateOptions(tplCopy.Options) - // rCopy.Compile(tplCopy.Options) - tplCopy.RequestsWebsocket[i] = &rCopy - } + // Update options for all request types + updateRequestOptions(&tplCopy) template := &tplCopy if template.isGlobalMatchersEnabled() { @@ -154,12 +220,16 @@ func Parse(filePath string, preprocessor Preprocessor, options *protocols.Execut TemplatePath: filePath, TemplateInfo: template.Info, } + for _, request := range template.RequestsHTTP { item.Operators = append(item.Operators, request.CompiledOperators) } + options.GlobalMatchers.AddOperator(item) + return nil, nil } + // Compile the workflow request if len(template.Workflows) > 0 { compiled := &template.Workflow @@ -172,61 +242,12 @@ func Parse(filePath string, preprocessor Preprocessor, options *protocols.Execut // options.Logger.Error().Msgf("returning cached template %s after recompiling %d requests", tplCopy.Options.TemplateID, tplCopy.Requests()) return template, nil } - // else: fallthrough to re-parse template from scratch - } - } - var reader io.ReadCloser - if !options.DoNotCache { - _, raw, err := parser.parsedTemplatesCache.Has(filePath) - if err == nil && raw != nil { - reader = io.NopCloser(bytes.NewReader(raw)) - } - } - var err error - if reader == nil { - reader, err = utils.ReaderFromPathOrURL(filePath, options.Catalog) - if err != nil { - return nil, err - } - } - - defer func() { - _ = reader.Close() - }() - - // Make a copy of the options for this template - options = options.Copy() - options.TemplatePath = filePath - template, err := ParseTemplateFromReader(reader, preprocessor, options) - if err != nil { - return nil, err - } - if template.isGlobalMatchersEnabled() { - item := &globalmatchers.Item{ - TemplateID: template.ID, - TemplatePath: filePath, - TemplateInfo: template.Info, - } - for _, request := range template.RequestsHTTP { - item.Operators = append(item.Operators, request.CompiledOperators) + // else: fallthrough to re-parse template from scratch } - options.GlobalMatchers.AddOperator(item) - return nil, nil } - // Compile the workflow request - if len(template.Workflows) > 0 { - compiled := &template.Workflow - compileWorkflow(filePath, preprocessor, options, compiled, options.WorkflowLoader) - template.CompiledWorkflow = compiled - template.CompiledWorkflow.Options = options - } - template.Path = filePath - if !options.DoNotCache { - parser.compiledTemplatesCache.Store(filePath, template, nil, err) - } - return template, nil + return parseFromSource(filePath, preprocessor, options, parser) } // isGlobalMatchersEnabled checks if any of requests in the template diff --git a/pkg/templates/compile_bench_test.go b/pkg/templates/compile_bench_test.go new file mode 100644 index 0000000000..041dc92d85 --- /dev/null +++ b/pkg/templates/compile_bench_test.go @@ -0,0 +1,70 @@ +package templates_test + +import ( + "bytes" + "io" + "os" + "testing" + + "github.com/projectdiscovery/nuclei/v3/pkg/templates" +) + +func BenchmarkParse(b *testing.B) { + filePath := "tests/match-1.yaml" + + setup() + b.ResetTimer() + b.ReportAllocs() + + for b.Loop() { + _, err := templates.Parse(filePath, nil, executerOpts) + if err != nil { + b.Fatalf("could not parse template: %s", err) + } + } +} + +func BenchmarkParseTemplateFromReader(b *testing.B) { + filePath := "tests/match-1.yaml" + + file, err := os.Open(filePath) + if err != nil { + b.Fatalf("could not open template file: %s", err) + } + defer func() { + _ = file.Close() + }() + + content, err := io.ReadAll(file) + if err != nil { + b.Fatalf("could not read template file: %s", err) + } + + setup() + + // Prepare the options with template path set. + // + // TODO(dwisiswant0): ParseTemplateFromReader should ideally work with just + // a reader without requiring path information, making it more flexible for + // in-memory templates or templates from non-file sources, the function + // unnecessarily couples the parsing logic to filepath info when it should + // primarily care about the content because it only needs a reader, but it + // actually requires path information in the options. + // + // The current implementation fails with a confusing error about template + // format detection, "no template name field provided", rather than + // explicitly stating that a path is required. + opts := executerOpts.Copy() + opts.TemplatePath = filePath + + b.ResetTimer() + b.ReportAllocs() + + for b.Loop() { + reader := bytes.NewReader(content) + _, err := templates.ParseTemplateFromReader(reader, nil, opts) + if err != nil { + b.Fatalf("could not parse template from reader: %s", err) + } + } +} diff --git a/pkg/testutils/testutils.go b/pkg/testutils/testutils.go index 521654c452..49b79e6421 100644 --- a/pkg/testutils/testutils.go +++ b/pkg/testutils/testutils.go @@ -94,9 +94,6 @@ type TemplateInfo struct { func NewMockExecuterOptions(options *types.Options, info *TemplateInfo) *protocols.ExecutorOptions { progressImpl, _ := progress.NewStatsTicker(0, false, false, false, 0) executerOpts := &protocols.ExecutorOptions{ - TemplateID: info.ID, - TemplateInfo: info.Info, - TemplatePath: info.Path, Output: NewMockOutputWriter(options.OmitTemplate), Options: options, Progress: progressImpl, @@ -106,7 +103,15 @@ func NewMockExecuterOptions(options *types.Options, info *TemplateInfo) *protoco Catalog: disk.NewCatalog(config.DefaultConfig.TemplatesDirectory), RateLimiter: ratelimit.New(context.Background(), uint(options.RateLimit), time.Second), } + + if info != nil { + executerOpts.TemplateInfo = info.Info + executerOpts.TemplateID = info.ID + executerOpts.TemplatePath = info.Path + } + executerOpts.CreateTemplateCtxStore() + return executerOpts }