From 99627d42f380bd9230c79585ab313e41992e6ec3 Mon Sep 17 00:00:00 2001 From: Andreas Lappe Date: Fri, 18 Apr 2014 18:44:12 +0200 Subject: [PATCH 01/74] Remove the log parameter as web doesn't allow it --- pkg/logstash-web.upstart.ubuntu | 1 - 1 file changed, 1 deletion(-) diff --git a/pkg/logstash-web.upstart.ubuntu b/pkg/logstash-web.upstart.ubuntu index 028c4814c4c..465369fcae4 100644 --- a/pkg/logstash-web.upstart.ubuntu +++ b/pkg/logstash-web.upstart.ubuntu @@ -36,7 +36,6 @@ script HOME="${HOME:-$LS_HOME}" JAVA_OPTS="${LS_JAVA_OPTS}" - [ -n "${LS_LOG_FILE}" ] && LS_OPTS="${LSOPTS} -l ${LS_LOG_FILE}" # Reset filehandle limit ulimit -n ${LS_OPEN_FILES} cd "${LS_HOME}" From ae867bc64d5a8bd423174f97850a99b93b721871 Mon Sep 17 00:00:00 2001 From: Michael Klishin Date: Thu, 3 Jul 2014 11:53:35 +0400 Subject: [PATCH 02/74] [rabbitmq] March Hare 2.3.0+, Bunny 1.3.1+ --- logstash.gemspec | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/logstash.gemspec b/logstash.gemspec index 702c5129e76..c299a1ebc2e 100644 --- a/logstash.gemspec +++ b/logstash.gemspec @@ -75,9 +75,9 @@ Gem::Specification.new do |gem| end if RUBY_PLATFORM != 'java' - gem.add_runtime_dependency "bunny", ["~> 1.1.8"] #(MIT license) + gem.add_runtime_dependency "bunny", ["~> 1.3.1"] #(MIT license) else - gem.add_runtime_dependency "march_hare", ["~> 2.1.0"] #(MIT license) + gem.add_runtime_dependency "march_hare", ["~> 2.3.0"] #(MIT license) end if RUBY_VERSION >= '1.9.1' From cc2155919f052bff29ec223d5c4c8213ef1cecc4 Mon Sep 17 00:00:00 2001 From: Colin Surprenant Date: Mon, 14 Jul 2014 17:41:06 -0400 Subject: [PATCH 03/74] use bundler compatible ruby version string --- gembag.rb | 2 +- lib/logstash/environment.rb | 7 ++++++- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/gembag.rb b/gembag.rb index 86218f6c3c5..8fad3b43753 100644 --- a/gembag.rb +++ b/gembag.rb @@ -43,7 +43,7 @@ def install_gem(name, requirement, target) module Bundler module SharedHelpers def default_lockfile - ruby = "#{LogStash::Environment.ruby_engine}-#{LogStash::Environment.ruby_abi_version}" + ruby = "#{LogStash::Environment.ruby_engine}-#{LogStash::Environment.gem_ruby_version}" return Pathname.new("#{default_gemfile}.#{ruby}.lock") end end diff --git a/lib/logstash/environment.rb b/lib/logstash/environment.rb index 0d44126caa8..03399379c9c 100644 --- a/lib/logstash/environment.rb +++ b/lib/logstash/environment.rb @@ -29,7 +29,7 @@ def gem_target end def set_gem_paths! - gemdir = "#{gem_target}/#{ruby_engine}/#{ruby_abi_version}/" + gemdir = "#{gem_target}/#{ruby_engine}/#{gem_ruby_version}/" ENV["GEM_HOME"] = gemdir ENV["GEM_PATH"] = gemdir end @@ -39,6 +39,11 @@ def ruby_abi_version RUBY_VERSION[/(\d+\.\d+)(\.\d+)*/, 1] end + # @return [String] the ruby version string bundler uses to craft its gem path + def gem_ruby_version + RbConfig::CONFIG["ruby_version"] + end + # @return [String] jruby, ruby, rbx, ... def ruby_engine RUBY_ENGINE From 6e3d377bbaea923c76d862e7e357507a884dc6ae Mon Sep 17 00:00:00 2001 From: Suyog Rao Date: Fri, 18 Jul 2014 15:26:56 -0700 Subject: [PATCH 04/74] Evaluate dynamic fields in event when used with gsub Closes #1529 --- lib/logstash/filters/mutate.rb | 17 +++++++++--- spec/filters/mutate.rb | 48 ++++++++++++++++++++++++++++++++++ 2 files changed, 62 insertions(+), 3 deletions(-) diff --git a/lib/logstash/filters/mutate.rb b/lib/logstash/filters/mutate.rb index 89075508c5c..50bcb949821 100644 --- a/lib/logstash/filters/mutate.rb +++ b/lib/logstash/filters/mutate.rb @@ -191,9 +191,10 @@ def register @logger.error("Invalid gsub configuration. gsub has to define 3 elements per config entry", :field => field, :needle => needle, :replacement => replacement) raise "Bad configuration, aborting." end + @gsub_parsed << { :field => field, - :needle => Regexp.new(needle), + :needle => (needle.index("%{").nil?? Regexp.new(needle): needle), :replacement => replacement } end @@ -303,7 +304,7 @@ def gsub(event) "skipping", :field => field, :value => v) v else - v.gsub(needle, replacement) + gsub_dynamic_fields(event, v, needle, replacement) end end else @@ -312,11 +313,21 @@ def gsub(event) "skipping", :field => field, :value => event[field]) next end - event[field] = event[field].gsub(needle, replacement) + event[field] = gsub_dynamic_fields(event, event[field], needle, replacement) end end # @gsub_parsed.each end # def gsub + private + def gsub_dynamic_fields(event, original, needle, replacement) + if needle.is_a? Regexp + original.gsub(needle, event.sprintf(replacement)) + else + # we need to replace any dynamic fields + original.gsub(Regexp.new(event.sprintf(needle)), event.sprintf(replacement)) + end + end + private def uppercase(event) @uppercase.each do |field| diff --git a/spec/filters/mutate.rb b/spec/filters/mutate.rb index e3a1fbf2284..1c6ee2eacdd 100644 --- a/spec/filters/mutate.rb +++ b/spec/filters/mutate.rb @@ -178,5 +178,53 @@ insist { subject["[foo][bar]"] }.is_a?(Fixnum) end end + + #LOGSTASH-1529 + describe "gsub on a String with dynamic fields (%{}) in pattern" do + config ' + filter { + mutate { + gsub => [ "unicorns", "of type %{unicorn_type}", "green" ] + } + }' + + sample("unicorns" => "Unicorns of type blue are common", "unicorn_type" => "blue") do + insist { subject["unicorns"] } == "Unicorns green are common" + end + end + + #LOGSTASH-1529 + describe "gsub on a String with dynamic fields (%{}) in pattern and replace" do + config ' + filter { + mutate { + gsub => [ "unicorns2", "of type %{unicorn_color}", "%{unicorn_color} and green" ] + } + }' + + sample("unicorns2" => "Unicorns of type blue are common", "unicorn_color" => "blue") do + insist { subject["unicorns2"] } == "Unicorns blue and green are common" + end + end + + #LOGSTASH-1529 + describe "gsub on a String array with dynamic fields in pattern" do + config ' + filter { + mutate { + gsub => [ "unicorns_array", "of type %{color}", "blue and green" ] + } + }' + + sample("unicorns_array" => [ + "Unicorns of type blue are found in Alaska", "Unicorns of type blue are extinct" ], + "color" => "blue" + ) do + insist { subject["unicorns_array"] } == [ + "Unicorns blue and green are found in Alaska", + "Unicorns blue and green are extinct" + ] + end + end end From e086e1c7eb1e80a3c1adfd391f6e87acc623acd0 Mon Sep 17 00:00:00 2001 From: Suyog Rao Date: Mon, 21 Jul 2014 15:00:39 -0700 Subject: [PATCH 05/74] Add tests for issue to reproduce behavior Fix break_on_match issue while evaluating multiple patterns Closes #1547 Make better exception message Add more specs for inputs with arrays Closes #1547 --- lib/logstash/filters/grok.rb | 69 ++++++++++--------- spec/filters/grok.rb | 129 +++++++++++++++++++++++++++++++++++ 2 files changed, 164 insertions(+), 34 deletions(-) diff --git a/lib/logstash/filters/grok.rb b/lib/logstash/filters/grok.rb index 5bc50e254dc..08a67688b64 100644 --- a/lib/logstash/filters/grok.rb +++ b/lib/logstash/filters/grok.rb @@ -257,16 +257,14 @@ def register @match.each do |field, patterns| patterns = [patterns] if patterns.is_a?(String) - if !@patterns.include?(field) - @patterns[field] = Grok::Pile.new - #@patterns[field].logger = @logger - - add_patterns_from_files(@patternfiles, @patterns[field]) - end @logger.info? and @logger.info("Grok compile", :field => field, :patterns => patterns) patterns.each do |pattern| @logger.debug? and @logger.debug("regexp: #{@type}/#{field}", :pattern => pattern) - @patterns[field].compile(pattern) + grok = Grok.new + grok.logger = @logger unless @logger.nil? + add_patterns_from_files(@patternfiles, grok) + grok.compile(pattern) + @patterns[field] << grok end end # @match.each end # def register @@ -279,8 +277,8 @@ def filter(event) done = false @logger.debug? and @logger.debug("Running grok filter", :event => event); - @patterns.each do |field, grok| - if match(grok, field, event) + @patterns.each do |field, groks| + if match(groks, field, event) matched = true break if @break_on_match end @@ -302,36 +300,38 @@ def filter(event) end # def filter private - def match(grok, field, event) + def match(groks, field, event) input = event[field] if input.is_a?(Array) - success = true + success = false input.each do |input| - grok, match = grok.match(input) - if match - match.each_capture do |capture, value| - handle(capture, value, event) - end - else - success = false - end + success |= match_against_groks(groks, input, event) end return success - #elsif input.is_a?(String) else - # Convert anything else to string (number, hash, etc) - grok, match = grok.match(input.to_s) - return false if !match - - match.each_capture do |capture, value| - handle(capture, value, event) - end - return true + return match_against_groks(groks, input, event) end rescue StandardError => e @logger.warn("Grok regexp threw exception", :exception => e.message) end + private + def match_against_groks(groks, input, event) + matched = false + groks.each do |grok| + # Convert anything else to string (number, hash, etc) + match = grok.match(input.to_s) + if match + match.each_capture do |capture, value| + handle(capture, value, event) + end + matched = true + break if @break_on_match + end + end + return matched + end + private def handle(capture, value, event) handler = @handlers[capture] ||= compile_capture_handler(capture) @@ -392,12 +392,13 @@ def compile_capture_handler(capture) end # def compile_capture_handler private - def add_patterns_from_files(paths, pile) - paths.each { |path| add_patterns_from_file(path, pile) } + def add_patterns_from_files(paths, grok) + paths.each do |path| + if !File.exists?(path) + raise "Grok pattern file does not exist: #{path}" + end + grok.add_patterns_from_file(path) + end end # def add_patterns_from_files - private - def add_patterns_from_file(path, pile) - pile.add_patterns_from_file(path) - end # def add_patterns_from_file end # class LogStash::Filters::Grok diff --git a/spec/filters/grok.rb b/spec/filters/grok.rb index ded857fc617..5d84105b690 100644 --- a/spec/filters/grok.rb +++ b/spec/filters/grok.rb @@ -500,4 +500,133 @@ insist { subject["foo"] }.is_a?(String) end end + + describe "break_on_match default should be true and first match should exit filter" do + config <<-CONFIG + filter { + grok { + match => { "message" => "%{INT:foo}" + "somefield" => "%{INT:bar}"} + } + } + CONFIG + + sample("message" => "hello world 123", "somefield" => "testme abc 999") do + insist { subject["foo"] } == "123" + insist { subject["bar"] }.nil? + end + end + + describe "break_on_match when set to false should try all patterns" do + config <<-CONFIG + filter { + grok { + match => { "message" => "%{INT:foo}" + "somefield" => "%{INT:bar}"} + break_on_match => false + } + } + CONFIG + + sample("message" => "hello world 123", "somefield" => "testme abc 999") do + insist { subject["foo"] } == "123" + insist { subject["bar"] } == "999" + end + end + + describe "LOGSTASH-1547 - break_on_match should work on fields with multiple patterns" do + config <<-CONFIG + filter { + grok { + match => { "message" => ["%{GREEDYDATA:name1}beard", "tree%{GREEDYDATA:name2}"] } + break_on_match => false + } + } + CONFIG + + sample "treebranch" do + insist { subject["name2"] } == "branch" + end + + sample "bushbeard" do + insist { subject["name1"] } == "bush" + end + + sample "treebeard" do + insist { subject["name1"] } == "tree" + insist { subject["name2"] } == "beard" + end + end + + describe "break_on_match default for array input with single grok pattern" do + config <<-CONFIG + filter { + grok { + match => { "message" => "%{INT:foo}"} + } + } + CONFIG + + # array input -- + sample("message" => ["hello world 123", "line 23"]) do + insist { subject["foo"] } == ["123", "23"] + insist { subject["tags"] }.nil? + end + + # array input, one of them matches + sample("message" => ["hello world 123", "abc"]) do + insist { subject["foo"] } == "123" + insist { subject["tags"] }.nil? + end + end + + describe "break_on_match = true (default) for array input with multiple grok pattern" do + config <<-CONFIG + filter { + grok { + match => { "message" => ["%{INT:foo}", "%{WORD:bar}"] } + } + } + CONFIG + + # array input -- + sample("message" => ["hello world 123", "line 23"]) do + insist { subject["foo"] } == ["123", "23"] + insist { subject["bar"] }.nil? + insist { subject["tags"] }.nil? + end + + # array input, one of them matches + sample("message" => ["hello world", "line 23"]) do + insist { subject["bar"] } == "hello" + insist { subject["foo"] } == "23" + insist { subject["tags"] }.nil? + end + end + + describe "break_on_match = false for array input with multiple grok pattern" do + config <<-CONFIG + filter { + grok { + match => { "message" => ["%{INT:foo}", "%{WORD:bar}"] } + break_on_match => false + } + } + CONFIG + + # array input -- + sample("message" => ["hello world 123", "line 23"]) do + insist { subject["foo"] } == ["123", "23"] + insist { subject["bar"] } == ["hello", "line"] + insist { subject["tags"] }.nil? + end + + # array input, one of them matches + sample("message" => ["hello world", "line 23"]) do + insist { subject["bar"] } == ["hello", "line"] + insist { subject["foo"] } == "23" + insist { subject["tags"] }.nil? + end + end + end From 0d987cac9e45d8b707c4c4838f1d963bab0b79fa Mon Sep 17 00:00:00 2001 From: Suyog Rao Date: Mon, 21 Jul 2014 15:00:39 -0700 Subject: [PATCH 06/74] Add tests for issue to reproduce behavior Fix break_on_match issue while evaluating multiple patterns Closes #1547 Make better exception message Add more specs for inputs with arrays Closes #1547 --- lib/logstash/filters/grok.rb | 69 ++++++++++--------- spec/filters/grok.rb | 129 +++++++++++++++++++++++++++++++++++ 2 files changed, 164 insertions(+), 34 deletions(-) diff --git a/lib/logstash/filters/grok.rb b/lib/logstash/filters/grok.rb index 5bc50e254dc..08a67688b64 100644 --- a/lib/logstash/filters/grok.rb +++ b/lib/logstash/filters/grok.rb @@ -257,16 +257,14 @@ def register @match.each do |field, patterns| patterns = [patterns] if patterns.is_a?(String) - if !@patterns.include?(field) - @patterns[field] = Grok::Pile.new - #@patterns[field].logger = @logger - - add_patterns_from_files(@patternfiles, @patterns[field]) - end @logger.info? and @logger.info("Grok compile", :field => field, :patterns => patterns) patterns.each do |pattern| @logger.debug? and @logger.debug("regexp: #{@type}/#{field}", :pattern => pattern) - @patterns[field].compile(pattern) + grok = Grok.new + grok.logger = @logger unless @logger.nil? + add_patterns_from_files(@patternfiles, grok) + grok.compile(pattern) + @patterns[field] << grok end end # @match.each end # def register @@ -279,8 +277,8 @@ def filter(event) done = false @logger.debug? and @logger.debug("Running grok filter", :event => event); - @patterns.each do |field, grok| - if match(grok, field, event) + @patterns.each do |field, groks| + if match(groks, field, event) matched = true break if @break_on_match end @@ -302,36 +300,38 @@ def filter(event) end # def filter private - def match(grok, field, event) + def match(groks, field, event) input = event[field] if input.is_a?(Array) - success = true + success = false input.each do |input| - grok, match = grok.match(input) - if match - match.each_capture do |capture, value| - handle(capture, value, event) - end - else - success = false - end + success |= match_against_groks(groks, input, event) end return success - #elsif input.is_a?(String) else - # Convert anything else to string (number, hash, etc) - grok, match = grok.match(input.to_s) - return false if !match - - match.each_capture do |capture, value| - handle(capture, value, event) - end - return true + return match_against_groks(groks, input, event) end rescue StandardError => e @logger.warn("Grok regexp threw exception", :exception => e.message) end + private + def match_against_groks(groks, input, event) + matched = false + groks.each do |grok| + # Convert anything else to string (number, hash, etc) + match = grok.match(input.to_s) + if match + match.each_capture do |capture, value| + handle(capture, value, event) + end + matched = true + break if @break_on_match + end + end + return matched + end + private def handle(capture, value, event) handler = @handlers[capture] ||= compile_capture_handler(capture) @@ -392,12 +392,13 @@ def compile_capture_handler(capture) end # def compile_capture_handler private - def add_patterns_from_files(paths, pile) - paths.each { |path| add_patterns_from_file(path, pile) } + def add_patterns_from_files(paths, grok) + paths.each do |path| + if !File.exists?(path) + raise "Grok pattern file does not exist: #{path}" + end + grok.add_patterns_from_file(path) + end end # def add_patterns_from_files - private - def add_patterns_from_file(path, pile) - pile.add_patterns_from_file(path) - end # def add_patterns_from_file end # class LogStash::Filters::Grok diff --git a/spec/filters/grok.rb b/spec/filters/grok.rb index ded857fc617..5d84105b690 100644 --- a/spec/filters/grok.rb +++ b/spec/filters/grok.rb @@ -500,4 +500,133 @@ insist { subject["foo"] }.is_a?(String) end end + + describe "break_on_match default should be true and first match should exit filter" do + config <<-CONFIG + filter { + grok { + match => { "message" => "%{INT:foo}" + "somefield" => "%{INT:bar}"} + } + } + CONFIG + + sample("message" => "hello world 123", "somefield" => "testme abc 999") do + insist { subject["foo"] } == "123" + insist { subject["bar"] }.nil? + end + end + + describe "break_on_match when set to false should try all patterns" do + config <<-CONFIG + filter { + grok { + match => { "message" => "%{INT:foo}" + "somefield" => "%{INT:bar}"} + break_on_match => false + } + } + CONFIG + + sample("message" => "hello world 123", "somefield" => "testme abc 999") do + insist { subject["foo"] } == "123" + insist { subject["bar"] } == "999" + end + end + + describe "LOGSTASH-1547 - break_on_match should work on fields with multiple patterns" do + config <<-CONFIG + filter { + grok { + match => { "message" => ["%{GREEDYDATA:name1}beard", "tree%{GREEDYDATA:name2}"] } + break_on_match => false + } + } + CONFIG + + sample "treebranch" do + insist { subject["name2"] } == "branch" + end + + sample "bushbeard" do + insist { subject["name1"] } == "bush" + end + + sample "treebeard" do + insist { subject["name1"] } == "tree" + insist { subject["name2"] } == "beard" + end + end + + describe "break_on_match default for array input with single grok pattern" do + config <<-CONFIG + filter { + grok { + match => { "message" => "%{INT:foo}"} + } + } + CONFIG + + # array input -- + sample("message" => ["hello world 123", "line 23"]) do + insist { subject["foo"] } == ["123", "23"] + insist { subject["tags"] }.nil? + end + + # array input, one of them matches + sample("message" => ["hello world 123", "abc"]) do + insist { subject["foo"] } == "123" + insist { subject["tags"] }.nil? + end + end + + describe "break_on_match = true (default) for array input with multiple grok pattern" do + config <<-CONFIG + filter { + grok { + match => { "message" => ["%{INT:foo}", "%{WORD:bar}"] } + } + } + CONFIG + + # array input -- + sample("message" => ["hello world 123", "line 23"]) do + insist { subject["foo"] } == ["123", "23"] + insist { subject["bar"] }.nil? + insist { subject["tags"] }.nil? + end + + # array input, one of them matches + sample("message" => ["hello world", "line 23"]) do + insist { subject["bar"] } == "hello" + insist { subject["foo"] } == "23" + insist { subject["tags"] }.nil? + end + end + + describe "break_on_match = false for array input with multiple grok pattern" do + config <<-CONFIG + filter { + grok { + match => { "message" => ["%{INT:foo}", "%{WORD:bar}"] } + break_on_match => false + } + } + CONFIG + + # array input -- + sample("message" => ["hello world 123", "line 23"]) do + insist { subject["foo"] } == ["123", "23"] + insist { subject["bar"] } == ["hello", "line"] + insist { subject["tags"] }.nil? + end + + # array input, one of them matches + sample("message" => ["hello world", "line 23"]) do + insist { subject["bar"] } == ["hello", "line"] + insist { subject["foo"] } == "23" + insist { subject["tags"] }.nil? + end + end + end From c7fb7de5cd82b4322926cd4abebac11695bb54d3 Mon Sep 17 00:00:00 2001 From: Joseph Lawson Date: Tue, 22 Jul 2014 16:53:22 -0400 Subject: [PATCH 07/74] call vendor-geoip instead of $(GEOIP) in prepare-tarball Closes #1563 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 99b6c8c2ed1..6bb7d1dc7e4 100644 --- a/Makefile +++ b/Makefile @@ -355,7 +355,7 @@ show: .PHONY: prepare-tarball prepare-tarball tarball zip: WORKDIR=build/tarball/logstash-$(VERSION) -prepare-tarball: vendor/kibana $(ELASTICSEARCH) $(JRUBY) $(GEOIP) $(TYPESDB) vendor-gems +prepare-tarball: vendor/kibana $(ELASTICSEARCH) $(JRUBY) vendor-geoip $(TYPESDB) vendor-gems prepare-tarball: vendor/ua-parser/regexes.yaml prepare-tarball: @echo "=> Preparing tarball" From 5daa328987a724be8e9fd3b8211a7e718ea6d9d5 Mon Sep 17 00:00:00 2001 From: Jordan Sissel Date: Thu, 24 Jul 2014 07:35:31 -0700 Subject: [PATCH 08/74] Update ES version docs ES version 1.0 or greater is now supported by all protocols. The Elasticsearch eng team has tests to ensure client nodes from all ES versions >=1.0 work with all ES clusters version >=1.0 --- lib/logstash/outputs/elasticsearch.rb | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/lib/logstash/outputs/elasticsearch.rb b/lib/logstash/outputs/elasticsearch.rb index d42eb9eee4c..03c92c41823 100644 --- a/lib/logstash/outputs/elasticsearch.rb +++ b/lib/logstash/outputs/elasticsearch.rb @@ -11,8 +11,7 @@ # need to use this output. # # *VERSION NOTE*: Your Elasticsearch cluster must be running Elasticsearch -# %ELASTICSEARCH_VERSION%. If you use any other version of Elasticsearch, -# you should set `protocol => http` in this plugin. +# 1.0.0 or later. # # If you want to set other Elasticsearch options that are not exposed directly # as configuration options, there are two methods: @@ -28,10 +27,6 @@ # # ## Operational Notes # -# Template management requires Elasticsearch version 0.90.7 or later. If you -# are using a version older than this, please upgrade. You will receive -# more benefits than just template management! -# # If using the default `protocol` setting ("node"), your firewalls might need # to permit port 9300 in *both* directions (from Logstash to Elasticsearch, and # Elasticsearch to Logstash) From 18c4ec2b355480e20b297605521fc8e30ec73197 Mon Sep 17 00:00:00 2001 From: Gaurav Arora Date: Thu, 15 May 2014 15:03:49 +0530 Subject: [PATCH 09/74] correct documentation of some config elements Closes #1380 --- lib/logstash/filters/mutate.rb | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/lib/logstash/filters/mutate.rb b/lib/logstash/filters/mutate.rb index 50bcb949821..9c579415d41 100644 --- a/lib/logstash/filters/mutate.rb +++ b/lib/logstash/filters/mutate.rb @@ -17,7 +17,7 @@ class LogStash::Filters::Mutate < LogStash::Filters::Base # filter { # mutate { # # Renames the 'HOSTORIP' field to 'client_ip' - # rename => [ "HOSTORIP", "client_ip" ] + # rename => { "HOSTORIP" => "client_ip" } # } # } config :rename, :validate => :hash @@ -43,7 +43,7 @@ class LogStash::Filters::Mutate < LogStash::Filters::Base # # filter { # mutate { - # replace => [ "message", "%{source_host}: My new message" ] + # replace => { "message" => "%{source_host}: My new message" } # } # } config :replace, :validate => :hash @@ -55,7 +55,7 @@ class LogStash::Filters::Mutate < LogStash::Filters::Base # # filter { # mutate { - # update => [ "sample", "My new message" ] + # update => { "sample" => "My new message" } # } # } config :update, :validate => :hash @@ -70,7 +70,7 @@ class LogStash::Filters::Mutate < LogStash::Filters::Base # # filter { # mutate { - # convert => [ "fieldname", "integer" ] + # convert => { "fieldname" => "integer" } # } # } config :convert, :validate => :hash @@ -129,7 +129,7 @@ class LogStash::Filters::Mutate < LogStash::Filters::Base # # filter { # mutate { - # split => ["fieldname", ","] + # split => { "fieldname" => "," } # } # } config :split, :validate => :hash @@ -140,7 +140,7 @@ class LogStash::Filters::Mutate < LogStash::Filters::Base # # filter { # mutate { - # join => ["fieldname", ","] + # join => { "fieldname" => "," } # } # } config :join, :validate => :hash @@ -166,7 +166,7 @@ class LogStash::Filters::Mutate < LogStash::Filters::Base # # filter { # mutate { - # merge => ["dest_field", "added_field"] + # merge => { "dest_field" => "added_field" } # } # } config :merge, :validate => :hash From e1485422e3c5aa03b031d846fe33e4ee4905686a Mon Sep 17 00:00:00 2001 From: Alejandro E Brito Monedero Date: Tue, 13 May 2014 11:30:49 +0200 Subject: [PATCH 10/74] Update metrics.rb example Update the example because output doesn't have the message parameter, instead it should use a a codec. Also update the way how logstash is executed. --- lib/logstash/filters/metrics.rb | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/lib/logstash/filters/metrics.rb b/lib/logstash/filters/metrics.rb index 9d27b4b4b53..e15d43a60e0 100644 --- a/lib/logstash/filters/metrics.rb +++ b/lib/logstash/filters/metrics.rb @@ -74,14 +74,16 @@ # # only emit events with the 'metric' tag # if "metric" in [tags] { # stdout { -# message => "rate: %{events.rate_1m}" +# codec => line { +# format => "rate: %{events.rate_1m}" +# } # } # } # } # # Running the above: # -# % java -jar logstash.jar agent -f example.conf +# % ./logstash-1.4.1/bin/logstash agent -f example.conf # rate: 23721.983566819246 # rate: 24811.395722536377 # rate: 25875.892745934525 From 925ce925ec85e24a85f5b923e836b3e2a3b2be69 Mon Sep 17 00:00:00 2001 From: Suyog Rao Date: Fri, 25 Jul 2014 17:38:13 -0700 Subject: [PATCH 11/74] Removed version info in LS command Closes #1362 --- lib/logstash/filters/metrics.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/logstash/filters/metrics.rb b/lib/logstash/filters/metrics.rb index e15d43a60e0..8d60f7498a5 100644 --- a/lib/logstash/filters/metrics.rb +++ b/lib/logstash/filters/metrics.rb @@ -83,7 +83,7 @@ # # Running the above: # -# % ./logstash-1.4.1/bin/logstash agent -f example.conf +# % bin/logstash -f example.conf # rate: 23721.983566819246 # rate: 24811.395722536377 # rate: 25875.892745934525 From f8e00994fe5a57bff5b138b2a97a80c4d8c1a6e6 Mon Sep 17 00:00:00 2001 From: Brian DeFreitas Date: Fri, 21 Mar 2014 12:58:51 -0700 Subject: [PATCH 12/74] Add tilde to UNIXPATH Tilde '~' is a valid character in UNIXPATHs. Closes #1195 --- patterns/grok-patterns | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/patterns/grok-patterns b/patterns/grok-patterns index 4850b44ebd0..37c70487a6a 100755 --- a/patterns/grok-patterns +++ b/patterns/grok-patterns @@ -31,7 +31,7 @@ HOSTPORT %{IPORHOST}:%{POSINT} # paths PATH (?:%{UNIXPATH}|%{WINPATH}) -UNIXPATH (?>/(?>[\w_%!$@:.,-]+|\\.)*)+ +UNIXPATH (?>/(?>[\w_%!$@:.,~-]+|\\.)*)+ TTY (?:/dev/(pts|tty([pq])?)(\w+)?/?(?:[0-9]+)) WINPATH (?>[A-Za-z]+:|\\)(?:\\[^\\?*]*)+ URIPROTO [A-Za-z]+(\+[A-Za-z+]+)? From 95abfdebe8f7271163b3a2b0909377f4c993716f Mon Sep 17 00:00:00 2001 From: Wiibaa Date: Fri, 11 Apr 2014 07:31:27 +0200 Subject: [PATCH 13/74] reject invalid UNIX timestamp --- lib/logstash/filters/date.rb | 18 ++++++++++++------ spec/filters/date.rb | 18 ++++++++++++++++++ 2 files changed, 30 insertions(+), 6 deletions(-) diff --git a/lib/logstash/filters/date.rb b/lib/logstash/filters/date.rb index 937127a646b..4faf64c6be7 100644 --- a/lib/logstash/filters/date.rb +++ b/lib/logstash/filters/date.rb @@ -133,14 +133,20 @@ def setupMatcher(field, locale, value) end parser = lambda { |date| joda_parser.parseMillis(date) } when "UNIX" # unix epoch - joda_instant = org.joda.time.Instant.java_class.constructor(Java::long).method(:new_instance) - #parser = lambda { |date| joda_instant.call((date.to_f * 1000).to_i).to_java.toDateTime } - parser = lambda { |date| (date.to_f * 1000).to_i } + parser = lambda do |date| + if /\d+/ === date || date.is_a?(Numeric) + (date.to_f * 1000).to_i + else + raise "Invalid UNIX epoch value '#{date}'" + end + end when "UNIX_MS" # unix epoch in ms - joda_instant = org.joda.time.Instant.java_class.constructor(Java::long).method(:new_instance) parser = lambda do |date| - #return joda_instant.call(date.to_i).to_java.toDateTime - return date.to_i + if /\d+/ === date || date.is_a?(Numeric) + date.to_i + else + raise "Invalid UNIX epoch value '#{date}'" + end end when "TAI64N" # TAI64 with nanoseconds, -10000 accounts for leap seconds joda_instant = org.joda.time.Instant.java_class.constructor(Java::long).method(:new_instance) diff --git a/spec/filters/date.rb b/spec/filters/date.rb index 2419f4399ea..6b5633c4655 100644 --- a/spec/filters/date.rb +++ b/spec/filters/date.rb @@ -112,6 +112,12 @@ insist { subject["@timestamp"].time } == Time.iso8601(output).utc end end # times.each + + #Invalid value should not be evaluated to zero (String#to_i madness) + sample("mydate" => "%{bad_value}") do + insist { subject["mydate"] } == "%{bad_value}" + insist { subject["@timestamp"] } != Time.iso8601("1970-01-01T00:00:00.000Z").utc + end end describe "parsing microsecond-precise times with UNIX (#213)" do @@ -128,6 +134,18 @@ # Joda time only supports milliseconds :\ insist { subject.timestamp.time } == Time.iso8601("2012-10-16T12:15:44.123-07:00").utc end + + #Support float values + sample("mydate" => 1350414944.123456) do + insist { subject["mydate"] } == 1350414944.123456 + insist { subject.timestamp } == Time.iso8601("2012-10-16T12:15:44.123-07:00").utc + end + + #Invalid value should not be evaluated to zero (String#to_i madness) + sample("mydate" => "%{bad_value}") do + insist { subject["mydate"] } == "%{bad_value}" + insist { subject["@timestamp"] } != Time.iso8601("1970-01-01T00:00:00.000Z").utc + end end describe "parsing with UNIX_MS" do From 2e21df8e9ed2f66475455514fb46b2e3afc6c080 Mon Sep 17 00:00:00 2001 From: wiibaa Date: Sun, 11 May 2014 18:02:10 +0200 Subject: [PATCH 14/74] stronger regex and code-style change --- lib/logstash/filters/date.rb | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/lib/logstash/filters/date.rb b/lib/logstash/filters/date.rb index 4faf64c6be7..030f909d6c4 100644 --- a/lib/logstash/filters/date.rb +++ b/lib/logstash/filters/date.rb @@ -134,19 +134,13 @@ def setupMatcher(field, locale, value) parser = lambda { |date| joda_parser.parseMillis(date) } when "UNIX" # unix epoch parser = lambda do |date| - if /\d+/ === date || date.is_a?(Numeric) - (date.to_f * 1000).to_i - else - raise "Invalid UNIX epoch value '#{date}'" - end + raise "Invalid UNIX epoch value '#{date}'" unless /^\d+(\.\d+)?$/ === date || date.is_a?(Numeric) + (date.to_f * 1000).to_i end when "UNIX_MS" # unix epoch in ms parser = lambda do |date| - if /\d+/ === date || date.is_a?(Numeric) - date.to_i - else - raise "Invalid UNIX epoch value '#{date}'" - end + raise "Invalid UNIX epoch value '#{date}'" unless /^\d+$/ === date || date.is_a?(Numeric) + date.to_i end when "TAI64N" # TAI64 with nanoseconds, -10000 accounts for leap seconds joda_instant = org.joda.time.Instant.java_class.constructor(Java::long).method(:new_instance) From 51949602f2090a61314951e25ff08c97aba5e726 Mon Sep 17 00:00:00 2001 From: wiibaa Date: Mon, 23 Jun 2014 14:08:22 +0200 Subject: [PATCH 15/74] fix failing test --- spec/filters/date.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/spec/filters/date.rb b/spec/filters/date.rb index 6b5633c4655..20f336a15e0 100644 --- a/spec/filters/date.rb +++ b/spec/filters/date.rb @@ -138,7 +138,7 @@ #Support float values sample("mydate" => 1350414944.123456) do insist { subject["mydate"] } == 1350414944.123456 - insist { subject.timestamp } == Time.iso8601("2012-10-16T12:15:44.123-07:00").utc + insist { subject["@timestamp"].time } == Time.iso8601("2012-10-16T12:15:44.123-07:00").utc end #Invalid value should not be evaluated to zero (String#to_i madness) From 0f790649bc59a89a05cabd90ff339a829ce7137b Mon Sep 17 00:00:00 2001 From: wiibaa Date: Wed, 30 Jul 2014 07:58:25 +0200 Subject: [PATCH 16/74] use non-capturing group in float regex Closes #1253 --- lib/logstash/filters/date.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/logstash/filters/date.rb b/lib/logstash/filters/date.rb index 030f909d6c4..8b98529c1e9 100644 --- a/lib/logstash/filters/date.rb +++ b/lib/logstash/filters/date.rb @@ -134,7 +134,7 @@ def setupMatcher(field, locale, value) parser = lambda { |date| joda_parser.parseMillis(date) } when "UNIX" # unix epoch parser = lambda do |date| - raise "Invalid UNIX epoch value '#{date}'" unless /^\d+(\.\d+)?$/ === date || date.is_a?(Numeric) + raise "Invalid UNIX epoch value '#{date}'" unless /^\d+(?:\.\d+)?$/ === date || date.is_a?(Numeric) (date.to_f * 1000).to_i end when "UNIX_MS" # unix epoch in ms From 4c86e3342f4f099ce914e42852ebef17250de27e Mon Sep 17 00:00:00 2001 From: Jordan Sissel Date: Tue, 8 Jul 2014 22:58:46 +0000 Subject: [PATCH 17/74] config grammar: Allow spaces in [field references][like this] Tests included. Note about grammar compilation: I had to use treetop 1.4.15 (1.5.3 generates ruby code that fails to parse, haven't spent any time debugging) Fixes #1513 --- lib/logstash/config/grammar.rb | 2 +- lib/logstash/config/grammar.treetop | 2 +- spec/conditionals/test.rb | 24 ++++++++++++++++++++++-- 3 files changed, 24 insertions(+), 4 deletions(-) diff --git a/lib/logstash/config/grammar.rb b/lib/logstash/config/grammar.rb index 750c83f0102..1852c851a76 100644 --- a/lib/logstash/config/grammar.rb +++ b/lib/logstash/config/grammar.rb @@ -3453,7 +3453,7 @@ def _nt_selector_element if r1 s2, i2 = [], index loop do - if has_terminal?('\G[^\\], ]', true, index) + if has_terminal?('\G[^\\],]', true, index) r3 = true @index += 1 else diff --git a/lib/logstash/config/grammar.treetop b/lib/logstash/config/grammar.treetop index 42a62b21820..e46fc55307a 100644 --- a/lib/logstash/config/grammar.treetop +++ b/lib/logstash/config/grammar.treetop @@ -234,7 +234,7 @@ grammar LogStashConfig end rule selector_element - "[" [^\], ]+ "]" + "[" [^\],]+ "]" end diff --git a/spec/conditionals/test.rb b/spec/conditionals/test.rb index 8886032ac13..2e06c593750 100644 --- a/spec/conditionals/test.rb +++ b/spec/conditionals/test.rb @@ -1,6 +1,6 @@ require "test_utils" -module ConditionalFancines +module ConditionalFanciness def description return example.metadata[:example_group][:description_args][0] end @@ -23,7 +23,7 @@ def conditional(expression, &block) describe "conditionals" do extend LogStash::RSpec - extend ConditionalFancines + extend ConditionalFanciness describe "simple" do config <<-CONFIG @@ -320,4 +320,24 @@ def conditional(expression, &block) end end end + + describe "field references" do + conditional "[field with space]" do + sample("field with space" => "hurray") do + insist { subject["tags"].include?("success") } + end + end + + conditional "[field with space] == 'hurray'" do + sample("field with space" => "hurray") do + insist { subject["tags"].include?("success") } + end + end + + conditional "[nested field][reference with][some spaces] == 'hurray'" do + sample({"nested field" => { "reference with" => { "some spaces" => "hurray" } } }) do + insist { subject["tags"].include?("success") } + end + end + end end From 6b20d179aaff2da26cebc9a740e73989a112e200 Mon Sep 17 00:00:00 2001 From: Victor Garcia Date: Wed, 16 Jul 2014 12:10:24 -0700 Subject: [PATCH 18/74] Adding delaycompress to logrotate To avoid issues like: /etc/cron.daily/logrotate: gzip: stdin: file size changed while zipping This change is harmless and will prevent errors like this one --- pkg/logrotate.conf | 1 + 1 file changed, 1 insertion(+) diff --git a/pkg/logrotate.conf b/pkg/logrotate.conf index 191a3a5630b..69977aeecc8 100644 --- a/pkg/logrotate.conf +++ b/pkg/logrotate.conf @@ -3,6 +3,7 @@ rotate 7 copytruncate compress + delaycompress missingok notifempty } From e9e3ad5a5037e3cff33c87056b7587562375e8cd Mon Sep 17 00:00:00 2001 From: yummylogs Date: Fri, 1 Aug 2014 12:20:03 -0700 Subject: [PATCH 19/74] Update multiline.rb --- lib/logstash/filters/multiline.rb | 2 ++ 1 file changed, 2 insertions(+) diff --git a/lib/logstash/filters/multiline.rb b/lib/logstash/filters/multiline.rb index 76fcff420fd..6a22f5e550b 100644 --- a/lib/logstash/filters/multiline.rb +++ b/lib/logstash/filters/multiline.rb @@ -10,6 +10,8 @@ # from files into a single event. For example - joining java exception and # stacktrace messages into a single event. # +# NOTE: This filter will not work with multiple worker threads "-w 2" on the logstash command line. +# # The config looks like this: # # filter { From bdd69065a438c7ab05a8631ede567147797ffedf Mon Sep 17 00:00:00 2001 From: Simon Mulser Date: Mon, 4 Aug 2014 00:13:17 +0200 Subject: [PATCH 20/74] add unique tag for grok parser in module input --- lib/logstash/inputs/syslog.rb | 1 + spec/inputs/syslog.rb | 35 ++++++++++++++++++++++++++++++++++- 2 files changed, 35 insertions(+), 1 deletion(-) diff --git a/lib/logstash/inputs/syslog.rb b/lib/logstash/inputs/syslog.rb index dddf08b7f80..6491f6892c9 100644 --- a/lib/logstash/inputs/syslog.rb +++ b/lib/logstash/inputs/syslog.rb @@ -56,6 +56,7 @@ def register @grok_filter = LogStash::Filters::Grok.new( "overwrite" => "message", "match" => { "message" => "<%{POSINT:priority}>%{SYSLOGLINE}" }, + "tag_on_failure" => ["_grokparsefailure_sysloginputplugin"], ) @date_filter = LogStash::Filters::Date.new( diff --git a/spec/inputs/syslog.rb b/spec/inputs/syslog.rb index 37fa3efcfed..58275bd0e6c 100644 --- a/spec/inputs/syslog.rb +++ b/spec/inputs/syslog.rb @@ -29,6 +29,8 @@ socket.close events = event_count.times.collect { queue.pop } + + insist { events.length } == event_count event_count.times do |i| insist { events[i]["priority"] } == 164 insist { events[i]["severity"] } == 4 @@ -36,5 +38,36 @@ end end end -end + describe "adds unique tag when grok parsing fails" do + port = 5511 + event_count = 10 + + config <<-CONFIG + input { + syslog { + type => "blah" + port => #{port} + } + } + CONFIG + + input do |pipeline, queue| + Thread.new { pipeline.run } + sleep 0.1 while !pipeline.ready? + + socket = Stud.try(5.times) { TCPSocket.new("127.0.0.1", port) } + event_count.times do |i| + socket.puts("message which causes the a grok parse failure") + end + socket.close + + events = event_count.times.collect { queue.pop } + + insist { events.length } == event_count + event_count.times do |i| + insist { events[i]["tags"] } == ["_grokparsefailure_sysloginputplugin"] + end + end + end +end From 409dc8c36622c649bfd8a1905cbfc0ead174f7c7 Mon Sep 17 00:00:00 2001 From: Avishai Ish-Shalom Date: Thu, 7 Aug 2014 08:38:51 +0300 Subject: [PATCH 21/74] LOGSTASH-2276 fixed if/else branch mixup in rabbitmq input --- lib/logstash/inputs/rabbitmq/bunny.rb | 4 ++-- lib/logstash/inputs/rabbitmq/march_hare.rb | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/lib/logstash/inputs/rabbitmq/bunny.rb b/lib/logstash/inputs/rabbitmq/bunny.rb index 4594143438a..c933f7a4af8 100644 --- a/lib/logstash/inputs/rabbitmq/bunny.rb +++ b/lib/logstash/inputs/rabbitmq/bunny.rb @@ -32,9 +32,9 @@ def register @settings[:verify_ssl] = @verify_ssl if @verify_ssl proto = if @ssl - "amqp" - else "amqps" + else + "amqp" end @connection_url = "#{proto}://#{@user}@#{@host}:#{@port}#{vhost}/#{@queue}" diff --git a/lib/logstash/inputs/rabbitmq/march_hare.rb b/lib/logstash/inputs/rabbitmq/march_hare.rb index 6a80d4de2af..d2f0f0bc59f 100644 --- a/lib/logstash/inputs/rabbitmq/march_hare.rb +++ b/lib/logstash/inputs/rabbitmq/march_hare.rb @@ -22,9 +22,9 @@ def register @settings[:tls] = @ssl if @ssl proto = if @ssl - "amqp" - else "amqps" + else + "amqp" end @connection_url = "#{proto}://#{@user}@#{@host}:#{@port}#{vhost}/#{@queue}" From da147dba1efa9127659e95d1702588e594eeb378 Mon Sep 17 00:00:00 2001 From: Chris Martin Date: Thu, 7 Aug 2014 03:03:08 -0400 Subject: [PATCH 22/74] fix typos: elasticearch -> elasticsearch --- docs/tutorials/getting-started-with-logstash.asciidoc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/tutorials/getting-started-with-logstash.asciidoc b/docs/tutorials/getting-started-with-logstash.asciidoc index 3bc39f7d9b9..26ddf29a1eb 100644 --- a/docs/tutorials/getting-started-with-logstash.asciidoc +++ b/docs/tutorials/getting-started-with-logstash.asciidoc @@ -67,7 +67,7 @@ goodnight moon So, by re-configuring the "stdout" output (adding a "codec"), we can change the output of Logstash. By adding inputs, outputs and filters to your configuration, it's possible to massage the log data in many ways, in order to maximize flexibility of the stored data when you are querying it. == Storing logs with Elasticsearch -Now, you're probably saying, "that's all fine and dandy, but typing all my logs into Logstash isn't really an option, and merely seeing them spit to STDOUT isn't very useful." Good point. First, let's set up Elasticsearch to store the messages we send into Logstash. If you don't have Elasticearch already installed, you can http://www.elasticsearch.org/download/[download the RPM or DEB package], or install manually by downloading the current release tarball, by issuing the following four commands: +Now, you're probably saying, "that's all fine and dandy, but typing all my logs into Logstash isn't really an option, and merely seeing them spit to STDOUT isn't very useful." Good point. First, let's set up Elasticsearch to store the messages we send into Logstash. If you don't have Elasticsearch already installed, you can http://www.elasticsearch.org/download/[download the RPM or DEB package], or install manually by downloading the current release tarball, by issuing the following four commands: ---- curl -O https://download.elasticsearch.org/elasticsearch/elasticsearch/elasticsearch-%ELASTICSEARCH_VERSION%.tar.gz tar zxvf elasticsearch-%ELASTICSEARCH_VERSION%.tar.gz @@ -120,7 +120,7 @@ which should return something like this: Congratulations! You've successfully stashed logs in Elasticsearch via Logstash. === Elasticsearch Plugins (an aside) -Another very useful tool for querying your Logstash data (and Elasticsearch in general) is the Elasticearch-kopf plugin. Here is more information on http://www.elasticsearch.org/guide/en/elasticsearch/reference/current/modules-plugins.html[Elasticsearch plugins]. To install elasticsearch-kopf, simply issue the following command in your Elasticsearch directory (the same one in which you ran Elasticsearch earlier): +Another very useful tool for querying your Logstash data (and Elasticsearch in general) is the Elasticsearch-kopf plugin. Here is more information on http://www.elasticsearch.org/guide/en/elasticsearch/reference/current/modules-plugins.html[Elasticsearch plugins]. To install elasticsearch-kopf, simply issue the following command in your Elasticsearch directory (the same one in which you ran Elasticsearch earlier): ---- bin/plugin -install lmenezes/elasticsearch-kopf ---- From cf9844dcbd3285e845d729e3731f59d16c70c3bd Mon Sep 17 00:00:00 2001 From: Kyle House Date: Mon, 11 Aug 2014 16:19:20 -0500 Subject: [PATCH 23/74] Add sprintf ability to room_id field in hipchat output --- lib/logstash/outputs/hipchat.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/logstash/outputs/hipchat.rb b/lib/logstash/outputs/hipchat.rb index 56198588999..00617b24d7d 100644 --- a/lib/logstash/outputs/hipchat.rb +++ b/lib/logstash/outputs/hipchat.rb @@ -45,7 +45,7 @@ def receive(event) return unless output?(event) hipchat_data = Hash.new - hipchat_data['room_id'] = @room_id + hipchat_data['room_id'] = event.sprintf(@room_id) hipchat_data['from'] = @from hipchat_data['color'] = @color hipchat_data['notify'] = @trigger_notify ? "1" : "0" From adfbc34229ef48c7744cd91ea6126c4df722438d Mon Sep 17 00:00:00 2001 From: Richard Pijnenburg Date: Tue, 19 Aug 2014 12:38:07 +0000 Subject: [PATCH 24/74] Update Gemfile for package testing --- tools/Gemfile.beaker | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tools/Gemfile.beaker b/tools/Gemfile.beaker index 33b02d3f0d0..bcc56637eeb 100644 --- a/tools/Gemfile.beaker +++ b/tools/Gemfile.beaker @@ -1,11 +1,12 @@ source 'https://rubygems.org' -gem 'beaker', :git => 'https://github.com/electrical/beaker.git', :branch => 'docker_test' +gem 'beaker' gem 'beaker-rspec' gem 'pry' -gem 'docker-api' +gem 'docker-api', '~> 1.13.0' gem 'rubysl-securerandom' gem 'rspec_junit_formatter' +gem 'rspec', '~> 2.14.0' case RUBY_VERSION when '1.8.7' From dbe6b33d100e240a9a5cc09510e81048189186ef Mon Sep 17 00:00:00 2001 From: Jordan Sissel Date: Thu, 21 Aug 2014 14:51:21 +0000 Subject: [PATCH 25/74] Update version 1.5.0.dev --- lib/logstash/version.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/logstash/version.rb b/lib/logstash/version.rb index 4094d0f02d5..1fbd17b732e 100644 --- a/lib/logstash/version.rb +++ b/lib/logstash/version.rb @@ -1,6 +1,6 @@ # encoding: utf-8 # The version of logstash. -LOGSTASH_VERSION = "1.4.1" +LOGSTASH_VERSION = "1.5.0.dev" # Note to authors: this should not include dashes because 'gem' barfs if # you include a dash in the version string. From 637694557cd70c30602ee70f0583a23b8d38db8f Mon Sep 17 00:00:00 2001 From: Jordan Sissel Date: Fri, 22 Aug 2014 23:26:42 +0000 Subject: [PATCH 26/74] Update march_hare and bunny to latest --- logstash.gemspec | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/logstash.gemspec b/logstash.gemspec index c299a1ebc2e..d9e658ea761 100644 --- a/logstash.gemspec +++ b/logstash.gemspec @@ -75,9 +75,9 @@ Gem::Specification.new do |gem| end if RUBY_PLATFORM != 'java' - gem.add_runtime_dependency "bunny", ["~> 1.3.1"] #(MIT license) + gem.add_runtime_dependency "bunny", ["~> 1.4.0"] #(MIT license) else - gem.add_runtime_dependency "march_hare", ["~> 2.3.0"] #(MIT license) + gem.add_runtime_dependency "march_hare", ["~> 2.5.1"] #(MIT license) end if RUBY_VERSION >= '1.9.1' From abe6bdbbd83684ace49507257b9f67dfe188a973 Mon Sep 17 00:00:00 2001 From: Marc Fournier Date: Tue, 26 Aug 2014 11:59:26 +0200 Subject: [PATCH 27/74] LOGSTASH-1886: intercept exception raised by march_hare Since ae867bc64, the march_hare version we use raises an error when the network link is broken while the socket is idle. This case must be handled by logstash to initiate the reconnection process. --- lib/logstash/outputs/rabbitmq/march_hare.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/logstash/outputs/rabbitmq/march_hare.rb b/lib/logstash/outputs/rabbitmq/march_hare.rb index cdee3cf4bd9..f7bdd9304db 100644 --- a/lib/logstash/outputs/rabbitmq/march_hare.rb +++ b/lib/logstash/outputs/rabbitmq/march_hare.rb @@ -44,7 +44,7 @@ def publish_serialized(message) else @logger.warn("Tried to send a message, but not connected to RabbitMQ.") end - rescue MarchHare::Exception, com.rabbitmq.client.AlreadyClosedException => e + rescue MarchHare::Exception, IOError, com.rabbitmq.client.AlreadyClosedException => e @connected.set(false) n = 10 From a764ed956ef18cc4829fdf06057211351bdf25bf Mon Sep 17 00:00:00 2001 From: Suyog Rao Date: Tue, 26 Aug 2014 14:36:27 -0700 Subject: [PATCH 28/74] Added more tests without the need for live syslog input Closes #1593 --- lib/logstash/inputs/syslog.rb | 4 ++-- spec/inputs/syslog.rb | 25 ++++++++++++++++++++++--- 2 files changed, 24 insertions(+), 5 deletions(-) diff --git a/lib/logstash/inputs/syslog.rb b/lib/logstash/inputs/syslog.rb index 6491f6892c9..8a4351e5907 100644 --- a/lib/logstash/inputs/syslog.rb +++ b/lib/logstash/inputs/syslog.rb @@ -56,7 +56,7 @@ def register @grok_filter = LogStash::Filters::Grok.new( "overwrite" => "message", "match" => { "message" => "<%{POSINT:priority}>%{SYSLOGLINE}" }, - "tag_on_failure" => ["_grokparsefailure_sysloginputplugin"], + "tag_on_failure" => ["_grokparsefailure_sysloginput"], ) @date_filter = LogStash::Filters::Date.new( @@ -198,7 +198,7 @@ def close_tcp def syslog_relay(event) @grok_filter.filter(event) - if event["tags"].nil? || !event["tags"].include?("_grokparsefailure") + if event["tags"].nil? || !event["tags"].include?(@grok_filter.tag_on_failure) # Per RFC3164, priority = (facility * 8) + severity # = (facility << 3) & (severity) priority = event["priority"].to_i rescue 13 diff --git a/spec/inputs/syslog.rb b/spec/inputs/syslog.rb index 58275bd0e6c..ef40a4aac37 100644 --- a/spec/inputs/syslog.rb +++ b/spec/inputs/syslog.rb @@ -1,11 +1,13 @@ # coding: utf-8 require "test_utils" require "socket" +require "logstash/inputs/syslog" +require "logstash/event" -describe "inputs/syslog", :socket => true do +describe "inputs/syslog" do extend LogStash::RSpec - describe "properly handles priority, severity and facilities" do + it "should properly handle priority, severity and facilities", :socket => true do port = 5511 event_count = 10 @@ -39,7 +41,7 @@ end end - describe "adds unique tag when grok parsing fails" do + it "should add unique tag when grok parsing fails with live syslog input", :socket => true do port = 5511 event_count = 10 @@ -70,4 +72,21 @@ end end end + + it "should add unique tag when grok parsing fails" do + input = LogStash::Inputs::Syslog.new({}) + input.register + + # event which is not syslog should have a new tag + event = LogStash::Event.new({ "message" => "hello world, this is not syslog RFC3164" }) + input.syslog_relay(event) + insist { event["tags"] } == ["_grokparsefailure_sysloginput"] + + syslog_event = LogStash::Event.new({ "message" => "<164>Oct 26 15:19:25 1.2.3.4 %ASA-4-106023: Deny udp src DRAC:10.1.2.3/43434" }) + input.syslog_relay(syslog_event) + insist { syslog_event["priority"] } == 164 + insist { syslog_event["severity"] } == 4 + insist { syslog_event["tags"] } == nil + end + end From 8351fbd401c928590f0da092fee8478950f681f1 Mon Sep 17 00:00:00 2001 From: wiibaa Date: Fri, 22 Aug 2014 13:23:55 +0200 Subject: [PATCH 29/74] filter/mutate raise configError in register Closes #1656 --- lib/logstash/filters/mutate.rb | 13 ++++++------- spec/filters/mutate.rb | 29 +++++++++++++++++++++++++++++ 2 files changed, 35 insertions(+), 7 deletions(-) diff --git a/lib/logstash/filters/mutate.rb b/lib/logstash/filters/mutate.rb index 9c579415d41..ffee7e09c1c 100644 --- a/lib/logstash/filters/mutate.rb +++ b/lib/logstash/filters/mutate.rb @@ -177,19 +177,18 @@ def register # TODO(sissel): Validate conversion requests if provided. @convert.nil? or @convert.each do |field, type| if !valid_conversions.include?(type) - @logger.error("Invalid conversion type", - "type" => type, "expected one of" => valid_types) - # TODO(sissel): It's 2011, man, let's actually make like.. a proper - # 'configuration broken' exception - raise "Bad configuration, aborting." + raise LogStash::ConfigurationError, I18n.t("logstash.agent.configuration.invalid_plugin_register", + :plugin => "filter", :type => "mutate", + :error => "Invalid conversion type '#{type}', expected one of '#{valid_conversions.join(',')}'") end end # @convert.each @gsub_parsed = [] @gsub.nil? or @gsub.each_slice(3) do |field, needle, replacement| if [field, needle, replacement].any? {|n| n.nil?} - @logger.error("Invalid gsub configuration. gsub has to define 3 elements per config entry", :field => field, :needle => needle, :replacement => replacement) - raise "Bad configuration, aborting." + raise LogStash::ConfigurationError, I18n.t("logstash.agent.configuration.invalid_plugin_register", + :plugin => "filter", :type => "mutate", + :error => "Invalid gsub configuration #{[field, needle, replacement]}. gsub requires 3 non-nil elements per config entry") end @gsub_parsed << { diff --git a/spec/filters/mutate.rb b/spec/filters/mutate.rb index 1c6ee2eacdd..ea9a318eb6b 100644 --- a/spec/filters/mutate.rb +++ b/spec/filters/mutate.rb @@ -6,6 +6,35 @@ describe LogStash::Filters::Mutate do extend LogStash::RSpec + context "config validation" do + describe "invalid convert type should raise a configuration error" do + config <<-CONFIG + filter { + mutate { + convert => [ "message", "int"] //should be integer + } + } + CONFIG + + sample "not_really_important" do + insist {subject}.raises LogStash::ConfigurationError + end + end + describe "invalid gsub triad should raise a configuration error" do + config <<-CONFIG + filter { + mutate { + gsub => [ "message", "toreplace"] + } + } + CONFIG + + sample "not_really_important" do + insist {subject}.raises LogStash::ConfigurationError + end + end + end + describe "basics" do config <<-CONFIG filter { From d7bb4df9bfe8e7617124e97e4a964ad0f243b1f8 Mon Sep 17 00:00:00 2001 From: Suyog Rao Date: Mon, 11 Aug 2014 11:47:43 -0700 Subject: [PATCH 30/74] More fixes to use new grok lib --- lib/logstash/filters/grok.rb | 79 +++++++++--------------------------- 1 file changed, 19 insertions(+), 60 deletions(-) diff --git a/lib/logstash/filters/grok.rb b/lib/logstash/filters/grok.rb index 08a67688b64..c59cd2d7f4a 100644 --- a/lib/logstash/filters/grok.rb +++ b/lib/logstash/filters/grok.rb @@ -263,7 +263,7 @@ def register grok = Grok.new grok.logger = @logger unless @logger.nil? add_patterns_from_files(@patternfiles, grok) - grok.compile(pattern) + grok.compile(pattern, @named_captures_only) @patterns[field] << grok end end # @match.each @@ -320,76 +320,35 @@ def match_against_groks(groks, input, event) matched = false groks.each do |grok| # Convert anything else to string (number, hash, etc) - match = grok.match(input.to_s) - if match - match.each_capture do |capture, value| - handle(capture, value, event) - end + matched = grok.match_and_capture(input.to_s) do |field, value| matched = true - break if @break_on_match + handle(field, value, event) end + break if matched and @break_on_match end return matched end private - def handle(capture, value, event) - handler = @handlers[capture] ||= compile_capture_handler(capture) - return handler.call(value, event) - end - - private - def compile_capture_handler(capture) - # SYNTAX:SEMANTIC:TYPE - syntax, semantic, coerce = capture.split(":") - - # each_capture do |fullname, value| - # capture_handlers[fullname].call(value, event) - # end - - code = [] - code << "# for capture #{capture}" - code << "lambda do |value, event|" - #code << " p :value => value, :event => event" - if semantic.nil? - if @named_captures_only - # Abort early if we are only keeping named (semantic) captures - # and this capture has no semantic name. - code << " return" - else - field = syntax - end - else - field = semantic - end - code << " return if value.nil? || value.empty?" unless @keep_empty_captures - if coerce - case coerce - when "int"; code << " value = value.to_i" - when "float"; code << " value = value.to_f" - end - end + def handle(field, value, event) + return if (value.nil? || (value.is_a?(String) && value.empty?)) unless + @keep_empty_captures - code << " # field: #{field}" if @overwrite.include?(field) - code << " event[field] = value" + event[field] = value else - code << " v = event[field]" - code << " if v.nil?" - code << " event[field] = value" - code << " elsif v.is_a?(Array)" - code << " event[field] << value" - code << " elsif v.is_a?(String)" - # Promote to array since we aren't overwriting. - code << " event[field] = [v, value]" - code << " end" + v = event[field] + if v.nil? + event[field] = value + elsif v.is_a?(Array) + event[field] << value + elsif v.is_a?(String) + #puts v, value + # Promote to array since we aren't overwriting. + event[field] = [v, value] + end end - code << " return" - code << "end" - - #puts code - return eval(code.join("\n"), binding, "") - end # def compile_capture_handler + end private def add_patterns_from_files(paths, grok) From 7b9aba22615a7c011b4458f14ad9ec4a1d980984 Mon Sep 17 00:00:00 2001 From: Suyog Rao Date: Fri, 22 Aug 2014 12:57:23 -0700 Subject: [PATCH 31/74] Use updated grok version --- logstash.gemspec | 2 +- tools/Gemfile.jruby-1.9.lock | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/logstash.gemspec b/logstash.gemspec index d9e658ea761..e47f41f7e73 100644 --- a/logstash.gemspec +++ b/logstash.gemspec @@ -41,7 +41,7 @@ Gem::Specification.new do |gem| gem.add_runtime_dependency "gelfd", ["0.2.0"] #(Apache 2.0 license) gem.add_runtime_dependency "gelf", ["1.3.2"] #(MIT license) gem.add_runtime_dependency "gmetric", ["0.1.3"] #(MIT license) - gem.add_runtime_dependency "jls-grok", ["0.10.12"] #(BSD license) + gem.add_runtime_dependency "jls-grok", ["0.11.0"] #(BSD license) gem.add_runtime_dependency "mail" #(MIT license) gem.add_runtime_dependency "metriks" #(MIT license) gem.add_runtime_dependency "redis" #(MIT license) diff --git a/tools/Gemfile.jruby-1.9.lock b/tools/Gemfile.jruby-1.9.lock index f05fb0e2800..3b82d07a247 100644 --- a/tools/Gemfile.jruby-1.9.lock +++ b/tools/Gemfile.jruby-1.9.lock @@ -68,7 +68,7 @@ GEM http_parser.rb (0.5.3-java) i18n (0.6.9) insist (1.0.0) - jls-grok (0.10.12) + jls-grok (0.11.0) cabin (>= 0.6.0) jls-lumberjack (0.0.20) jrjackson (0.2.7) @@ -199,7 +199,7 @@ DEPENDENCIES gmetric (= 0.1.3) i18n (>= 0.6.6) insist (= 1.0.0) - jls-grok (= 0.10.12) + jls-grok (= 0.11.0) jls-lumberjack (>= 0.0.20) jrjackson jruby-httpclient From 0aaf8c68742bafa78f83492920902648e651c763 Mon Sep 17 00:00:00 2001 From: Suyog Rao Date: Fri, 22 Aug 2014 13:01:10 -0700 Subject: [PATCH 32/74] Clean up code Closes #1657 --- lib/logstash/filters/grok.rb | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/lib/logstash/filters/grok.rb b/lib/logstash/filters/grok.rb index c59cd2d7f4a..9ee1506b05f 100644 --- a/lib/logstash/filters/grok.rb +++ b/lib/logstash/filters/grok.rb @@ -331,8 +331,7 @@ def match_against_groks(groks, input, event) private def handle(field, value, event) - return if (value.nil? || (value.is_a?(String) && value.empty?)) unless - @keep_empty_captures + return if (value.nil? || (value.is_a?(String) && value.empty?)) unless @keep_empty_captures if @overwrite.include?(field) event[field] = value @@ -343,7 +342,6 @@ def handle(field, value, event) elsif v.is_a?(Array) event[field] << value elsif v.is_a?(String) - #puts v, value # Promote to array since we aren't overwriting. event[field] = [v, value] end From 18049e138d04565f280941159cf988e3572ee18c Mon Sep 17 00:00:00 2001 From: Wiibaa Date: Mon, 11 Nov 2013 08:20:23 +0100 Subject: [PATCH 33/74] Support almost-ISO8601 patterns in date filter ISO8601 config --- lib/logstash/filters/date.rb | 47 +++++++++++++++++++++++------------- spec/filters/date.rb | 11 +++++++++ 2 files changed, 41 insertions(+), 17 deletions(-) diff --git a/lib/logstash/filters/date.rb b/lib/logstash/filters/date.rb index 8b98529c1e9..d5bb7a59c31 100644 --- a/lib/logstash/filters/date.rb +++ b/lib/logstash/filters/date.rb @@ -123,31 +123,42 @@ def register def setupMatcher(field, locale, value) value.each do |format| + parsers = [] case format when "ISO8601" - joda_parser = org.joda.time.format.ISODateTimeFormat.dateTimeParser + iso_parser = org.joda.time.format.ISODateTimeFormat.dateTimeParser + if @timezone + iso_parser = iso_parser.withZone(org.joda.time.DateTimeZone.forID(@timezone)) + else + iso_parser = iso_parser.withOffsetParsed + end + parsers << lambda { |date| iso_parser.parseDateTime(date) } + #Fall back solution of almost ISO8601 date-time + almostISOparsers = [ + org.joda.time.format.DateTimeFormat.forPattern("yyyy-MM-dd HH:mm:ss.SSSZ").getParser(), + org.joda.time.format.DateTimeFormat.forPattern("yyyy-MM-dd HH:mm:ss.SSS").getParser() + ].to_java(org.joda.time.format.DateTimeParser) + joda_parser = org.joda.time.format.DateTimeFormatterBuilder.new.append( nil, almostISOparsers ).toFormatter() if @timezone joda_parser = joda_parser.withZone(org.joda.time.DateTimeZone.forID(@timezone)) else joda_parser = joda_parser.withOffsetParsed end - parser = lambda { |date| joda_parser.parseMillis(date) } + parsers << lambda { |date| joda_parser.parseMillis(date) } when "UNIX" # unix epoch - parser = lambda do |date| + parsers << lambda do |date| raise "Invalid UNIX epoch value '#{date}'" unless /^\d+(?:\.\d+)?$/ === date || date.is_a?(Numeric) (date.to_f * 1000).to_i end when "UNIX_MS" # unix epoch in ms - parser = lambda do |date| + parsers << lambda do |date| raise "Invalid UNIX epoch value '#{date}'" unless /^\d+$/ === date || date.is_a?(Numeric) date.to_i end when "TAI64N" # TAI64 with nanoseconds, -10000 accounts for leap seconds - joda_instant = org.joda.time.Instant.java_class.constructor(Java::long).method(:new_instance) - parser = lambda do |date| + parsers << lambda do |date| # Skip leading "@" if it is present (common in tai64n times) date = date[1..-1] if date[0, 1] == "@" - #return joda_instant.call((date[1..15].hex * 1000 - 10000)+(date[16..23].hex/1000000)).to_java.toDateTime return (date[1..15].hex * 1000 - 10000)+(date[16..23].hex/1000000) end else @@ -160,13 +171,13 @@ def setupMatcher(field, locale, value) if (locale != nil) joda_parser = joda_parser.withLocale(locale) end - parser = lambda { |date| joda_parser.parseMillis(date) } + parsers << lambda { |date| joda_parser.parseMillis(date) } end @logger.debug("Adding type with date config", :type => @type, :field => field, :format => format) @parsers[field] << { - :parser => parser, + :parser => parsers, :format => format } end @@ -192,14 +203,16 @@ def filter(event) success = false last_exception = RuntimeError.new "Unknown" fieldparsers.each do |parserconfig| - parser = parserconfig[:parser] - begin - epochmillis = parser.call(value) - success = true - break # success - rescue StandardError, JavaException => e - last_exception = e - end + parserconfig[:parser].each do |parser| + begin + epochmillis = parser.call(value) + success = true + break # success + rescue StandardError, JavaException => e + last_exception = e + end + end # parserconfig[:parser].each + break if success end # fieldparsers.each raise last_exception unless success diff --git a/spec/filters/date.rb b/spec/filters/date.rb index 20f336a15e0..cc5f9653fb3 100644 --- a/spec/filters/date.rb +++ b/spec/filters/date.rb @@ -27,6 +27,7 @@ date { match => [ "mydate", "ISO8601" ] locale => "en" + timezone => "UTC" } } CONFIG @@ -48,6 +49,16 @@ "2001-09-05T16:36:36.123+0700" => "2001-09-05T09:36:36.123Z", "2001-11-06T20:45:45.123-0000" => "2001-11-06T20:45:45.123Z", "2001-12-07T23:54:54.123Z" => "2001-12-07T23:54:54.123Z", + + #Almost ISO8601 support, with timezone + + "2001-11-06 20:45:45.123-0000" => "2001-11-06T20:45:45.123Z", + "2001-12-07 23:54:54.123Z" => "2001-12-07T23:54:54.123Z", + + #Almost ISO8601 support, without timezone + + "2001-11-06 20:45:45.123" => "2001-11-06T20:45:45.123Z", + } times.each do |input, output| From 3096aeca10d99151ac7c82a98ed28ae10770b2c5 Mon Sep 17 00:00:00 2001 From: wiibaa Date: Fri, 1 Aug 2014 06:30:40 +0200 Subject: [PATCH 34/74] fix rebase Closes #769 --- lib/logstash/filters/date.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/logstash/filters/date.rb b/lib/logstash/filters/date.rb index d5bb7a59c31..68e38f6f29e 100644 --- a/lib/logstash/filters/date.rb +++ b/lib/logstash/filters/date.rb @@ -132,7 +132,7 @@ def setupMatcher(field, locale, value) else iso_parser = iso_parser.withOffsetParsed end - parsers << lambda { |date| iso_parser.parseDateTime(date) } + parsers << lambda { |date| iso_parser.parseMillis(date) } #Fall back solution of almost ISO8601 date-time almostISOparsers = [ org.joda.time.format.DateTimeFormat.forPattern("yyyy-MM-dd HH:mm:ss.SSSZ").getParser(), From f5b4f0da793b038de279f2daefb1e37ec7b7bda9 Mon Sep 17 00:00:00 2001 From: Richard Pijnenburg Date: Fri, 25 Jul 2014 17:38:03 +0000 Subject: [PATCH 35/74] Add Plugin manager to Logstash The plugin manager allows users to install, uninstall, update and list plugins for logstash --- lib/logstash/namespace.rb | 1 + lib/logstash/pluginmanager.rb | 7 +++ lib/logstash/pluginmanager/install.rb | 58 +++++++++++++++++++++ lib/logstash/pluginmanager/list.rb | 38 ++++++++++++++ lib/logstash/pluginmanager/main.rb | 17 ++++++ lib/logstash/pluginmanager/uninstall.rb | 30 +++++++++++ lib/logstash/pluginmanager/update.rb | 69 +++++++++++++++++++++++++ lib/logstash/pluginmanager/util.rb | 51 ++++++++++++++++++ lib/logstash/runner.rb | 17 ++++++ logstash.gemspec | 4 ++ 10 files changed, 292 insertions(+) create mode 100644 lib/logstash/pluginmanager.rb create mode 100644 lib/logstash/pluginmanager/install.rb create mode 100644 lib/logstash/pluginmanager/list.rb create mode 100644 lib/logstash/pluginmanager/main.rb create mode 100644 lib/logstash/pluginmanager/uninstall.rb create mode 100644 lib/logstash/pluginmanager/update.rb create mode 100644 lib/logstash/pluginmanager/util.rb diff --git a/lib/logstash/namespace.rb b/lib/logstash/namespace.rb index 3ff393377a5..93f426b0fd7 100644 --- a/lib/logstash/namespace.rb +++ b/lib/logstash/namespace.rb @@ -11,6 +11,7 @@ module File; end module Web; end module Util; end module PluginMixins; end + module PluginManager; end SHUTDOWN = :shutdown end # module LogStash diff --git a/lib/logstash/pluginmanager.rb b/lib/logstash/pluginmanager.rb new file mode 100644 index 00000000000..fb365f20db4 --- /dev/null +++ b/lib/logstash/pluginmanager.rb @@ -0,0 +1,7 @@ +require "logstash/namespace" + +module LogStash::PluginManager + +require 'logstash/pluginmanager/main' + +end # class Logstash::PluginManager diff --git a/lib/logstash/pluginmanager/install.rb b/lib/logstash/pluginmanager/install.rb new file mode 100644 index 00000000000..5d9277c6e8a --- /dev/null +++ b/lib/logstash/pluginmanager/install.rb @@ -0,0 +1,58 @@ +require 'clamp' +require 'logstash/namespace' +require 'logstash/pluginmanager' +require 'logstash/pluginmanager/util' +require 'rubygems/installer' +require 'rubygems/uninstaller' +require 'jar-dependencies' +require 'jar_install_post_install_hook' + +class LogStash::PluginManager::Install < Clamp::Command + + parameter "PLUGIN", "plugin name or file" + + option "--version", "VERSION", "version of the plugin to install", :default => ">= 0" + + option "--proxy", "PROXY", "Use HTTP proxy for remote operations" + + def execute + + ::Gem.configuration.verbose = false + ::Gem.configuration[:http_proxy] = proxy + + puts ("validating #{plugin} #{version}") + + unless gem_path = (plugin =~ /\.gem$/ && File.file?(plugin)) ? plugin : LogStash::PluginManager::Util.download_gem(plugin, version) + $stderr.puts ("Plugin does not exist '#{plugin}'. Aborting") + exit(99) + end + + unless gem_meta = LogStash::PluginManager::Util.logstash_plugin?(gem_path) + $stderr.puts ("Invalid logstash plugin gem '#{plugin}'. Aborting...") + exit(99) + end + + puts ("valid logstash plugin. Continueing...") + + if LogStash::PluginManager::Util.installed?(gem_meta.name) + + current = Gem::Specification.find_by_name(gem_meta.name) + if Gem::Version.new(current.version) > Gem::Version.new(gem_meta.version) + unless LogStash::PluginManager::Util.ask_yesno("Do you wish to downgrade this plugin?") + $stderr.puts("Aborting installation") + exit(99) + end + end + + puts ("removing existing plugin before installation") + ::Gem.done_installing_hooks.clear + ::Gem::Uninstaller.new(gem_meta.name, {}).uninstall + end + + ::Gem.configuration.verbose = false + specs, _ = ::Gem.install(plugin, version) + puts ("Successfully installed '#{specs.name}' with version '#{specs.version}'") + + end + +end # class Logstash::PluginManager diff --git a/lib/logstash/pluginmanager/list.rb b/lib/logstash/pluginmanager/list.rb new file mode 100644 index 00000000000..54081761b59 --- /dev/null +++ b/lib/logstash/pluginmanager/list.rb @@ -0,0 +1,38 @@ +require 'clamp' +require 'logstash/namespace' +require 'logstash/pluginmanager' +require 'logstash/pluginmanager/util' +require 'rubygems/spec_fetcher' + +class LogStash::PluginManager::List < Clamp::Command + + parameter "[PLUGIN]", "Plugin name to search for, leave empty for all plugins" + + option "--group", "NAME", "Show all plugins from a certain group. Can be one of 'output', 'input', 'codec', 'filter'" + + def execute + + if group + unless ['input', 'output', 'filter', 'codec'].include?(group) + signal_usage_error "Group name not valid" + end + plugin_name = nil + else + plugin_name = plugin + end + + Gem.configuration.verbose = false + + # If we are listing a group make sure we check all gems + specs = LogStash::PluginManager::Util.matching_specs(plugin_name) \ + .select{|spec| LogStash::PluginManager::Util.logstash_plugin?(spec) } \ + .select{|spec| group ? group == spec.metadata['logstash_group'] : true} + if specs.empty? + $stderr.puts ("No plugins found.") + exit(99) + end + specs.each {|spec| puts ("#{spec.name} (#{spec.version})") } + + end + +end # class Logstash::PluginManager diff --git a/lib/logstash/pluginmanager/main.rb b/lib/logstash/pluginmanager/main.rb new file mode 100644 index 00000000000..e66d562d285 --- /dev/null +++ b/lib/logstash/pluginmanager/main.rb @@ -0,0 +1,17 @@ +require "logstash/namespace" +require "logstash/errors" +require 'clamp' +require 'logstash/pluginmanager/install' +require 'logstash/pluginmanager/uninstall' +require 'logstash/pluginmanager/list' +require 'logstash/pluginmanager/update' +require 'logstash/pluginmanager/util' + +class LogStash::PluginManager::Main < Clamp::Command + + subcommand "install", "Install a plugin", LogStash::PluginManager::Install + subcommand "uninstall", "Uninstall a plugin", LogStash::PluginManager::Uninstall + subcommand "update", "Install a plugin", LogStash::PluginManager::Update + subcommand "list", "List all installed plugins", LogStash::PluginManager::List + +end # class Logstash::PluginManager::Main diff --git a/lib/logstash/pluginmanager/uninstall.rb b/lib/logstash/pluginmanager/uninstall.rb new file mode 100644 index 00000000000..d139f5f5cd9 --- /dev/null +++ b/lib/logstash/pluginmanager/uninstall.rb @@ -0,0 +1,30 @@ +require "logstash/namespace" +require "logstash/logging" +require "logstash/errors" +require 'clamp' +require 'logstash/pluginmanager' +require 'logstash/pluginmanager/util' +require 'rubygems/uninstaller' + +class LogStash::PluginManager::Uninstall < Clamp::Command + + parameter "PLUGIN", "plugin name" + + public + def execute + + ::Gem.configuration.verbose = false + + puts ("Validating removal of #{plugin}.") + + unless gem_data = LogStash::PluginManager::Util.logstash_plugin?(plugin) + $stderr.puts ("Trying to remove a non logstash plugin. Aborting") + exit(99) + end + + puts ("Uninstalling plugin '#{plugin}' with version '#{gem_data.version}'.") + ::Gem::Uninstaller.new(plugin, {}).uninstall + + end + +end # class Logstash::PluginManager diff --git a/lib/logstash/pluginmanager/update.rb b/lib/logstash/pluginmanager/update.rb new file mode 100644 index 00000000000..bf08bb65e56 --- /dev/null +++ b/lib/logstash/pluginmanager/update.rb @@ -0,0 +1,69 @@ +require 'clamp' +require 'logstash/namespace' +require 'logstash/pluginmanager' +require 'logstash/pluginmanager/util' +require 'rubygems/installer' +require 'rubygems/uninstaller' +require 'jar-dependencies' +require 'jar_install_post_install_hook' + +class LogStash::PluginManager::Update < Clamp::Command + + parameter "[PLUGIN]", "Plugin name" + + option "--version", "VERSION", "version of the plugin to install", :default => ">= 0" + + option "--proxy", "PROXY", "Use HTTP proxy for remote operations" + + def execute + + ::Gem.configuration.verbose = false + ::Gem.configuration[:http_proxy] = proxy + + if plugin.nil? + puts ("Updating all plugins") + else + puts ("Updating #{plugin} plugin") + end + + specs = LogStash::PluginManager::Util.matching_specs(plugin).select{|spec| LogStash::PluginManager::Util.logstash_plugin?(spec) } + if specs.empty? + $stderr.puts ("No plugins found to update or trying to update a non logstash plugin.") + exit(99) + end + specs.each { |spec| update_gem(spec, version) } + + end + + + def update_gem(spec, version) + + unless gem_path = LogStash::PluginManager::Util.download_gem(spec.name, version) + $stderr.puts ("Plugin '#{spec.name}' does not exist remotely. Skipping.") + return nil + end + + unless gem_meta = LogStash::PluginManager::Util.logstash_plugin?(gem_path) + $stderr.puts ("Invalid logstash plugin gem. skipping.") + return nil + end + + unless Gem::Version.new(gem_meta.version) > Gem::Version.new(spec.version) + puts ("No newer version available for #{spec.name}. skipping.") + return nil + end + + puts ("Updating #{spec.name} from version #{spec.version} to #{gem_meta.version}") + + if LogStash::PluginManager::Util.installed?(spec.name) + ::Gem.done_installing_hooks.clear + ::Gem::Uninstaller.new(gem_meta.name, {}).uninstall + end + + ::Gem.configuration.verbose = false + ::Gem.install(spec.name, version) + puts ("Update successful") + + end + +end # class Logstash::PluginManager diff --git a/lib/logstash/pluginmanager/util.rb b/lib/logstash/pluginmanager/util.rb new file mode 100644 index 00000000000..7138258b268 --- /dev/null +++ b/lib/logstash/pluginmanager/util.rb @@ -0,0 +1,51 @@ +class LogStash::PluginManager::Util + + def self.logstash_plugin?(gem) + + gem_data = case + when gem.is_a?(Gem::Specification); gem + when (gem =~ /\.gem$/ and File.file?(gem)); Gem::Package.new(gem).spec + else Gem::Specification.find_by_name(gem) + end + + gem_data.metadata['logstash_plugin'] == "true" ? gem_data : false + end + + def self.download_gem(gem_name, gem_version = '') + + gem_version ||= Gem::Requirement.default + + dep = ::Gem::Dependency.new(gem_name, gem_version) + specs_and_sources, errors = ::Gem::SpecFetcher.fetcher.spec_for_dependency dep + if specs_and_sources.empty? + return false + end + spec, source = specs_and_sources.max_by { |s,| s.version } + path = source.download( spec, java.lang.System.getProperty("java.io.tmpdir")) + path + end + + def self.installed?(name) + Gem::Specification.any? { |x| x.name == name } + end + + def self.matching_specs(name) + req = Gem::Requirement.default + re = name ? /#{name}/i : // + specs = Gem::Specification.find_all{|spec| spec.name =~ re && req =~ spec.version} + specs.inject({}){|result, spec| result[spec.name_tuple] = spec; result}.values + end + + def self.ask_yesno(prompt) + while true + $stderr.puts ("#{prompt} [y/n]: ") + case $stdin.getc.downcase + when 'Y', 'y', 'j', 'J', 'yes' #j for Germans (Ja) + return true + when /\A[nN]o?\Z/ #n or no + break + end + end + end + +end diff --git a/lib/logstash/runner.rb b/lib/logstash/runner.rb index 22ae31636a0..4d83edf4dc4 100644 --- a/lib/logstash/runner.rb +++ b/lib/logstash/runner.rb @@ -152,6 +152,23 @@ def run(args) require "pry" return binding.pry end, + "plugin" => lambda do + require 'logstash/pluginmanager' + plugin_manager = LogStash::PluginManager::Main.new($0) + begin + plugin_manager.parse(args) + rescue Clamp::HelpWanted => e + show_help(e.command) + end + + begin + plugin_manager.execute + rescue Clamp::HelpWanted => e + show_help(e.command) + end + + return [] + end, "agent" => lambda do require "logstash/agent" # Hack up a runner diff --git a/logstash.gemspec b/logstash.gemspec index e47f41f7e73..1319c7ca351 100644 --- a/logstash.gemspec +++ b/logstash.gemspec @@ -61,6 +61,10 @@ Gem::Specification.new do |gem| gem.add_runtime_dependency "edn" #(MIT license) gem.add_runtime_dependency "elasticsearch" #9Apache 2.0 license) + # Plugin manager dependencies + gem.add_runtime_dependency "jar-dependencies" #(MIT license) + gem.add_runtime_dependency "ruby-maven" #(EPL license) + if RUBY_PLATFORM == 'java' gem.platform = RUBY_PLATFORM gem.add_runtime_dependency "jruby-httpclient" #(Apache 2.0 license) From 3efce866c991cd38ae79616f03fcfa671d71eb25 Mon Sep 17 00:00:00 2001 From: wiibaa Date: Tue, 26 Aug 2014 09:08:49 +0200 Subject: [PATCH 36/74] filter/date fix locale config Closes #1664 --- lib/logstash/filters/date.rb | 30 +++++++++++----------- spec/filters/date.rb | 48 ++++++++++++++++++++++++++++++++++++ 2 files changed, 62 insertions(+), 16 deletions(-) diff --git a/lib/logstash/filters/date.rb b/lib/logstash/filters/date.rb index 68e38f6f29e..0465633374d 100644 --- a/lib/logstash/filters/date.rb +++ b/lib/logstash/filters/date.rb @@ -38,11 +38,12 @@ class LogStash::Filters::Date < LogStash::Filters::Base # For example, `America/Los_Angeles` or `Europe/France` are valid IDs. config :timezone, :validate => :string - # Specify a locale to be used for date parsing. If this is not specified, the - # platform default will be used. + # Specify a locale to be used for date parsing using either IETF-BCP47 or POSIX language tag. + # Simple examples are `en`,`en-US` for BCP47 or `en_US` for POSIX. + # If not specified, the platform default will be used. # - # The locale is mostly necessary to be set for parsing month names and - # weekday names. + # The locale is mostly necessary to be set for parsing month names (pattern with MMM) and + # weekday names (pattern with EEE). # config :locale, :validate => :string @@ -98,16 +99,6 @@ def initialize(config = {}) @parsers = Hash.new { |h,k| h[k] = [] } end # def initialize - private - def parseLocale(localeString) - return nil if localeString == nil - matches = localeString.match(/(?.+?)(?:_(?.+?))?(?:_(?.+))?/) - lang = matches['lang'] == nil ? "" : matches['lang'].strip() - country = matches['country'] == nil ? "" : matches['country'].strip() - variant = matches['variant'] == nil ? "" : matches['variant'].strip() - return lang.length > 0 ? java.util.Locale.new(lang, country, variant) : nil - end - public def register require "java" @@ -116,8 +107,15 @@ def register :plugin => "filter", :type => "date", :error => "The match setting should contains first a field name and at least one date format, current value is #{@match}") end - # TODO(sissel): Need a way of capturing regexp configs better. - locale = parseLocale(@config["locale"][0]) if @config["locale"] != nil and @config["locale"][0] != nil + + locale = nil + if @locale + if @locale.include? '_' + @logger.warn("Date filter now use BCP47 format for locale, replacing underscore with dash") + @locale.gsub!('_','-') + end + locale = java.util.Locale.forLanguageTag(@locale) + end setupMatcher(@config["match"].shift, locale, @config["match"] ) end diff --git a/spec/filters/date.rb b/spec/filters/date.rb index cc5f9653fb3..e9942e35649 100644 --- a/spec/filters/date.rb +++ b/spec/filters/date.rb @@ -356,4 +356,52 @@ insist { subject["@timestamp"].year } == Time.now.year end end + + describe "Supporting locale only" do + config <<-CONFIG + filter { + date { + match => [ "message", "dd MMMM yyyy" ] + locale => "fr" + timezone => "UTC" + } + } + CONFIG + + sample "14 juillet 1789" do + insist { subject["@timestamp"].time } == Time.iso8601("1789-07-14T00:00:00.000Z").utc + end + end + + describe "Supporting locale+country in BCP47" do + config <<-CONFIG + filter { + date { + match => [ "message", "dd MMMM yyyy" ] + locale => "fr-FR" + timezone => "UTC" + } + } + CONFIG + + sample "14 juillet 1789" do + insist { subject["@timestamp"].time } == Time.iso8601("1789-07-14T00:00:00.000Z").utc + end + end + + describe "Supporting locale+country in POSIX (internally replace '_' by '-')" do + config <<-CONFIG + filter { + date { + match => [ "message", "dd MMMM yyyy" ] + locale => "fr_FR" + timezone => "UTC" + } + } + CONFIG + + sample "14 juillet 1789" do + insist { subject["@timestamp"].time } == Time.iso8601("1789-07-14T00:00:00.000Z").utc + end + end end From 56dbbff8a8254ff04ed95e9b79b76f1b8b41fa01 Mon Sep 17 00:00:00 2001 From: wickeddoc Date: Tue, 2 Sep 2014 12:07:19 +0200 Subject: [PATCH 37/74] String has to be surrounded by quotes --- docs/tutorials/getting-started-with-logstash.asciidoc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/tutorials/getting-started-with-logstash.asciidoc b/docs/tutorials/getting-started-with-logstash.asciidoc index 26ddf29a1eb..38c330d75a0 100644 --- a/docs/tutorials/getting-started-with-logstash.asciidoc +++ b/docs/tutorials/getting-started-with-logstash.asciidoc @@ -261,7 +261,7 @@ Now, let's configure something actually *useful*... apache2 access log files! We input { file { path => "/tmp/access_log" - start_position => beginning + start_position => "beginning" } } From 2efad5bffe7ff417f9e0980a984902a1a5c444fb Mon Sep 17 00:00:00 2001 From: Fredrik Gustafsson Date: Wed, 27 Aug 2014 10:10:17 +0200 Subject: [PATCH 38/74] prevent overwrite of event host field Closes #1668 --- lib/logstash/inputs/stdin.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/logstash/inputs/stdin.rb b/lib/logstash/inputs/stdin.rb index d065e2b09ee..f09df42358f 100644 --- a/lib/logstash/inputs/stdin.rb +++ b/lib/logstash/inputs/stdin.rb @@ -27,7 +27,7 @@ def run(queue) data = $stdin.sysread(16384) @codec.decode(data) do |event| decorate(event) - event["host"] = @host + event["host"] = @host if !event.include?("host") queue << event end rescue EOFError, LogStash::ShutdownSignal From 73e52b24654488a56f9ed1d1bb64269b56b94879 Mon Sep 17 00:00:00 2001 From: Paul Fletcher-Hill Date: Fri, 8 Aug 2014 14:18:00 -0400 Subject: [PATCH 39/74] Dynamic include_keys and exclude_keys for kv filter Closes #1618 --- lib/logstash/filters/kv.rb | 13 +++++++++---- spec/filters/kv.rb | 32 ++++++++++++++++++++++++++++++++ 2 files changed, 41 insertions(+), 4 deletions(-) diff --git a/lib/logstash/filters/kv.rb b/lib/logstash/filters/kv.rb index 4535df04527..0786233c643 100644 --- a/lib/logstash/filters/kv.rb +++ b/lib/logstash/filters/kv.rb @@ -206,13 +206,18 @@ def parse(text, event, kv_keys) if !event =~ /[@field_split]/ return kv_keys end + + # Interpret dynamic keys for @include_keys and @exclude_keys + include_keys = @include_keys.map{|key| event.sprintf(key)} + exclude_keys = @exclude_keys.map{|key| event.sprintf(key)} + text.scan(@scan_re) do |key, v1, v2, v3| value = v1 || v2 || v3 key = @trimkey.nil? ? key : key.gsub(@trimkey_re, "") - - # Bail out as per the values of @include_keys and @exclude_keys - next if not @include_keys.empty? and not @include_keys.include?(key) - next if @exclude_keys.include?(key) + + # Bail out as per the values of include_keys and exclude_keys + next if not include_keys.empty? and not include_keys.include?(key) + next if exclude_keys.include?(key) key = event.sprintf(@prefix) + key diff --git a/spec/filters/kv.rb b/spec/filters/kv.rb index 1da201a7b95..39f72fa9788 100644 --- a/spec/filters/kv.rb +++ b/spec/filters/kv.rb @@ -346,6 +346,38 @@ insist { subject["__doublequoted"] } == "hello world" end end + + describe "test include_keys with dynamic key" do + config <<-CONFIG + filter { + kv { + source => "data" + include_keys => [ "%{key}"] + } + } + CONFIG + + sample({"data" => "foo=bar baz=fizz", "key" => "foo"}) do + insist { subject["foo"] } == "bar" + insist { subject["baz"] } == nil + end + end + + describe "test exclude_keys with dynamic key" do + config <<-CONFIG + filter { + kv { + source => "data" + exclude_keys => [ "%{key}"] + } + } + CONFIG + + sample({"data" => "foo=bar baz=fizz", "key" => "foo"}) do + insist { subject["foo"] } == nil + insist { subject["baz"] } == "fizz" + end + end describe "test include_keys and exclude_keys" do config <<-CONFIG From c327896b623fae0de036572491c0b3c4d9b42758 Mon Sep 17 00:00:00 2001 From: John Pariseau Date: Mon, 25 Aug 2014 10:36:14 -0400 Subject: [PATCH 40/74] Change credentials default to [], use empty array closes /elasticsearch/logstash/#1619 --- lib/logstash/inputs/s3.rb | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/lib/logstash/inputs/s3.rb b/lib/logstash/inputs/s3.rb index 4f30937367b..0b24eee2957 100644 --- a/lib/logstash/inputs/s3.rb +++ b/lib/logstash/inputs/s3.rb @@ -21,8 +21,8 @@ class LogStash::Inputs::S3 < LogStash::Inputs::Base # Credentials can be specified: # - As an ["id","secret"] array # - As a path to a file containing AWS_ACCESS_KEY_ID=... and AWS_SECRET_ACCESS_KEY=... - # - In the environment (variables AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY) - config :credentials, :validate => :array, :default => nil + # - In the environment if not set (variables AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY) + config :credentials, :validate => :array, :default => [] # The name of the S3 bucket. config :bucket, :validate => :string, :required => true @@ -72,7 +72,10 @@ def register @access_key_id = ENV['AWS_ACCESS_KEY_ID'] @secret_access_key = ENV['AWS_SECRET_ACCESS_KEY'] elsif @credentials.is_a? Array - if @credentials.length ==1 + if @credentials.length == 0 + @access_key_id = ENV['AWS_ACCESS_KEY_ID'] + @secret_access_key = ENV['AWS_SECRET_ACCESS_KEY'] + elsif @credentials.length == 1 File.open(@credentials[0]) { |f| f.each do |line| unless (/^\#/.match(line)) if(/\s*=\s*/.match(line)) From 06010a1764866e0549a710829032369ca4a9707e Mon Sep 17 00:00:00 2001 From: Suyog Rao Date: Wed, 3 Sep 2014 13:52:38 -0700 Subject: [PATCH 41/74] Removed nil check Closes #1661 --- lib/logstash/inputs/s3.rb | 44 ++++++++++++++++++--------------------- 1 file changed, 20 insertions(+), 24 deletions(-) diff --git a/lib/logstash/inputs/s3.rb b/lib/logstash/inputs/s3.rb index 0b24eee2957..403aaebd349 100644 --- a/lib/logstash/inputs/s3.rb +++ b/lib/logstash/inputs/s3.rb @@ -21,7 +21,7 @@ class LogStash::Inputs::S3 < LogStash::Inputs::Base # Credentials can be specified: # - As an ["id","secret"] array # - As a path to a file containing AWS_ACCESS_KEY_ID=... and AWS_SECRET_ACCESS_KEY=... - # - In the environment if not set (variables AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY) + # - In the environment, if not set (using variables AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY) config :credentials, :validate => :array, :default => [] # The name of the S3 bucket. @@ -68,36 +68,32 @@ def register @logger.info("Registering s3 input", :bucket => @bucket, :region_endpoint => @region_endpoint) - if @credentials.nil? + if @credentials.length == 0 @access_key_id = ENV['AWS_ACCESS_KEY_ID'] @secret_access_key = ENV['AWS_SECRET_ACCESS_KEY'] - elsif @credentials.is_a? Array - if @credentials.length == 0 - @access_key_id = ENV['AWS_ACCESS_KEY_ID'] - @secret_access_key = ENV['AWS_SECRET_ACCESS_KEY'] - elsif @credentials.length == 1 - File.open(@credentials[0]) { |f| f.each do |line| - unless (/^\#/.match(line)) - if(/\s*=\s*/.match(line)) - param, value = line.split('=', 2) - param = param.chomp().strip() - value = value.chomp().strip() - if param.eql?('AWS_ACCESS_KEY_ID') - @access_key_id = value - elsif param.eql?('AWS_SECRET_ACCESS_KEY') - @secret_access_key = value - end + elsif @credentials.length == 1 + File.open(@credentials[0]) { |f| f.each do |line| + unless (/^\#/.match(line)) + if(/\s*=\s*/.match(line)) + param, value = line.split('=', 2) + param = param.chomp().strip() + value = value.chomp().strip() + if param.eql?('AWS_ACCESS_KEY_ID') + @access_key_id = value + elsif param.eql?('AWS_SECRET_ACCESS_KEY') + @secret_access_key = value end end end - } - elsif @credentials.length == 2 - @access_key_id = @credentials[0] - @secret_access_key = @credentials[1] - else - raise ArgumentError.new('Credentials must be of the form "/path/to/file" or ["id", "secret"]') end + } + elsif @credentials.length == 2 + @access_key_id = @credentials[0] + @secret_access_key = @credentials[1] + else + raise ArgumentError.new('Credentials must be of the form "/path/to/file" or ["id", "secret"]') end + if @access_key_id.nil? or @secret_access_key.nil? raise ArgumentError.new('Missing AWS credentials') end From 4683e51e4b046acea20a5baf24e341ecb64fd7ff Mon Sep 17 00:00:00 2001 From: Suyog Rao Date: Mon, 14 Jul 2014 14:56:10 -0700 Subject: [PATCH 42/74] Merge @joekiller's Kafka plugin to logstash-core Add documentation and tests Use scala version 2.9.2 and Kafka 0.8.1.1 Closes #1472 --- Makefile | 26 +++++- lib/logstash/inputs/kafka.rb | 143 ++++++++++++++++++++++++++++++ lib/logstash/outputs/kafka.rb | 159 ++++++++++++++++++++++++++++++++++ logstash.gemspec | 1 + spec/inputs/kafka.rb | 55 ++++++++++++ spec/outputs/kafka.rb | 39 +++++++++ tools/Gemfile.jruby-1.9.lock | 2 + 7 files changed, 422 insertions(+), 3 deletions(-) create mode 100644 lib/logstash/inputs/kafka.rb create mode 100644 lib/logstash/outputs/kafka.rb create mode 100644 spec/inputs/kafka.rb create mode 100644 spec/outputs/kafka.rb diff --git a/Makefile b/Makefile index 6bb7d1dc7e4..600f7eaebf9 100644 --- a/Makefile +++ b/Makefile @@ -21,6 +21,12 @@ GEOIP_ASN=vendor/geoip/GeoIPASNum.dat GEOIP_ASN_URL=http://logstash.objects.dreamhost.com/maxmind/GeoIPASNum-2014-02-12.dat.gz KIBANA_URL=https://download.elasticsearch.org/kibana/kibana/kibana-3.0.1.tar.gz PLUGIN_FILES=$(shell find lib -type f| egrep '^lib/logstash/(inputs|outputs|filters|codecs)/[^/]+$$' | egrep -v '/(base|threadable).rb$$|/inputs/ganglia/') +SCALA_VERSION?=2.9.2 + +KAFKA_VERSION?=0.8.1.1 +KAFKA_URL=https://archive.apache.org/dist/kafka +KAFKA=vendor/jar/kafka_$(SCALA_VERSION)-$(KAFKA_VERSION) + QUIET=@ ifeq (@,$(QUIET)) QUIET_OUTPUT=> /dev/null 2>&1 @@ -149,6 +155,11 @@ vendor/jar/graphtastic-rmiclient.jar: | wget-or-curl vendor/jar @echo "=> Fetching graphtastic rmi client jar" $(QUIET)$(DOWNLOAD_COMMAND) $@ http://cloud.github.com/downloads/NickPadilla/GraphTastic/graphtastic-rmiclient.jar +vendor/jar/kafka_$(SCALA_VERSION)-$(KAFKA_VERSION).tgz: | wget-or-curl vendor/jar + @echo "=> Fetching kafka $(SCALA_VERSION)-$(KAFKA_VERSION)" + $(QUIET)$(DOWNLOAD_COMMAND) $@ $(KAFKA_URL)/$(KAFKA_VERSION)/kafka_$(SCALA_VERSION)-$(KAFKA_VERSION).tgz + + .PHONY: vendor-elasticsearch vendor-elasticsearch: $(ELASTICSEARCH) $(ELASTICSEARCH): $(ELASTICSEARCH).tar.gz | vendor/jar @@ -183,6 +194,15 @@ $(TYPESDB): | vendor/collectd $(QUIET)tar zxf $@.tar.gz -O "collectd-$(COLLECTD_VERSION)/src/types.db" > $@ $(QUIET)rm $@.tar.gz +.PHONY: vendor-kafka +vendor-kafka: $(KAFKA) +$(KAFKA): $(KAFKA).tgz | vendor/jar + @echo "=> Pulling the jars out of $<" + $(QUIET)tar -C $(shell dirname $@) -xf $< $(TAR_OPTS) \ + 'kafka_$(SCALA_VERSION)-$(KAFKA_VERSION)/libs/*.jar' + $(QUIET)tar -C $(shell dirname $@) -xf $< $(TAR_OPTS) \ + 'kafka_$(SCALA_VERSION)-$(KAFKA_VERSION)/*.jar' + # Always run vendor/bundle .PHONY: fix-bundler fix-bundler: @@ -219,7 +239,7 @@ vendor/ua-parser/regexes.yaml: | vendor/ua-parser/ .PHONY: test test: QUIET_OUTPUT= -test: | $(JRUBY) vendor-elasticsearch vendor-geoip vendor-collectd vendor-gems +test: | $(JRUBY) vendor-elasticsearch vendor-geoip vendor-collectd vendor-kafka vendor-gems $(SPEC_ENV) bin/logstash rspec $(SPEC_OPTS) --order rand --fail-fast $(TESTS) .PHONY: reporting-test @@ -355,12 +375,12 @@ show: .PHONY: prepare-tarball prepare-tarball tarball zip: WORKDIR=build/tarball/logstash-$(VERSION) -prepare-tarball: vendor/kibana $(ELASTICSEARCH) $(JRUBY) vendor-geoip $(TYPESDB) vendor-gems +prepare-tarball: vendor/kibana $(ELASTICSEARCH) $(JRUBY) vendor-geoip $(TYPESDB) $(KAFKA) vendor-gems prepare-tarball: vendor/ua-parser/regexes.yaml prepare-tarball: @echo "=> Preparing tarball" $(QUIET)$(MAKE) $(WORKDIR) - $(QUIET)rsync -a --relative bin lib spec locales patterns vendor/bundle/jruby vendor/geoip vendor/jar vendor/kibana vendor/ua-parser vendor/collectd LICENSE README.md --exclude 'vendor/bundle/jruby/1.9/cache' --exclude 'vendor/bundle/jruby/1.9/gems/*/doc' --exclude 'vendor/jar/elasticsearch-$(ELASTICSEARCH_VERSION).tar.gz' $(WORKDIR) + $(QUIET)rsync -a --relative bin lib spec locales patterns vendor/bundle/jruby vendor/geoip vendor/jar vendor/kibana vendor/ua-parser vendor/collectd LICENSE README.md --exclude 'vendor/bundle/jruby/1.9/cache' --exclude 'vendor/bundle/jruby/1.9/gems/*/doc' --exclude 'vendor/jar/elasticsearch-$(ELASTICSEARCH_VERSION).tar.gz' --exclude 'vendor/jar/kafka_$(SCALA_VERSION)-$(KAFKA_VERSION).tgz' $(WORKDIR) $(QUIET)sed -i -e 's/^LOGSTASH_VERSION = .*/LOGSTASH_VERSION = "$(VERSION)"/' $(WORKDIR)/lib/logstash/version.rb $(QUIET)sed -i -e 's/%JRUBY_VERSION%/$(JRUBY_VERSION)/' $(WORKDIR)/bin/logstash.bat diff --git a/lib/logstash/inputs/kafka.rb b/lib/logstash/inputs/kafka.rb new file mode 100644 index 00000000000..277fce3668e --- /dev/null +++ b/lib/logstash/inputs/kafka.rb @@ -0,0 +1,143 @@ +require 'logstash/namespace' +require 'logstash/inputs/base' + +# This input will read events from a Kafka topic. It uses the high level consumer API provided +# by Kafka to read messages from the broker. It also maintains the state of what has been +# consumed using Zookeeper. The default input codec is json +# +# The only required configuration is the topic name. By default it will connect to a Zookeeper +# running on localhost. All the broker information is read from Zookeeper state +# +# Ideally you should have as many threads as the number of partitions for a perfect balance -- +# more threads than partitions means that some threads will be idle +# +# For more information see http://kafka.apache.org/documentation.html#theconsumer +# +# Kafka consumer configuration: http://kafka.apache.org/documentation.html#consumerconfigs +# +class LogStash::Inputs::Kafka < LogStash::Inputs::Base + config_name 'kafka' + milestone 1 + + default :codec, 'json' + + # Specifies the ZooKeeper connection string in the form hostname:port where host and port are + # the host and port of a ZooKeeper server. You can also specify multiple hosts in the form + # hostname1:port1,hostname2:port2,hostname3:port3. + config :zk_connect, :validate => :string, :default => 'localhost:2181' + # A string that uniquely identifies the group of consumer processes to which this consumer + # belongs. By setting the same group id multiple processes indicate that they are all part of + # the same consumer group. + config :group_id, :validate => :string, :default => 'logstash' + # The topic to consume messages from + config :topic_id, :validate => :string, :required => true + # Specify whether to jump to beginning of the queue when there is no initial offset in + # ZooKeeper, or if an offset is out of range. If this is false, messages are consumed + # from the latest offset + config :reset_beginning, :validate => :boolean, :default => false + # Number of threads to read from the partitions. Ideally you should have as many threads as the + # number of partitions for a perfect balance. More threads than partitions means that some + # threads will be idle. Less threads means a single thread could be consuming from more than + # one partition + config :consumer_threads, :validate => :number, :default => 1 + # Internal Logstash queue size used to hold events in memory after it has been read from Kafka + config :queue_size, :validate => :number, :default => 20 + # When a new consumer joins a consumer group the set of consumers attempt to "rebalance" the + # load to assign partitions to each consumer. If the set of consumers changes while this + # assignment is taking place the rebalance will fail and retry. This setting controls the + # maximum number of attempts before giving up. + config :rebalance_max_retries, :validate => :number, :default => 4 + # Backoff time between retries during rebalance. + config :rebalance_backoff_ms, :validate => :number, :default => 2000 + # Throw a timeout exception to the consumer if no message is available for consumption after + # the specified interval + config :consumer_timeout_ms, :validate => :number, :default => -1 + # Option to restart the consumer loop on error + config :consumer_restart_on_error, :validate => :boolean, :default => true + # Time in millis to wait for consumer to restart after an error + config :consumer_restart_sleep_ms, :validate => :number, :default => 0 + config :decorate_events, :validate => :boolean, :default => true + # A unique id for the consumer; generated automatically if not set. + config :consumer_id, :validate => :string, :default => nil + # The number of byes of messages to attempt to fetch for each topic-partition in each fetch + # request. These bytes will be read into memory for each partition, so this helps control + # the memory used by the consumer. The fetch request size must be at least as large as the + # maximum message size the server allows or else it is possible for the producer to send + # messages larger than the consumer can fetch. + config :fetch_message_max_bytes, :validate => :number, :default => 1048576 + + public + def register + jarpath = File.join(File.dirname(__FILE__), "../../../vendor/jar/kafka*/libs/*.jar") + Dir[jarpath].each do |jar| + require jar + end + require 'jruby-kafka' + options = { + :zk_connect => @zk_connect, + :group_id => @group_id, + :topic_id => @topic_id, + :rebalance_max_retries => @rebalance_max_retries, + :rebalance_backoff_ms => @rebalance_backoff_ms, + :consumer_timeout_ms => @consumer_timeout_ms, + :consumer_restart_on_error => @consumer_restart_on_error, + :consumer_restart_sleep_ms => @consumer_restart_sleep_ms, + :consumer_id => @consumer_id, + :fetch_message_max_bytes => @fetch_message_max_bytes + } + if @reset_beginning == true + options[:reset_beginning] = 'from-beginning' + end # if :reset_beginning + @kafka_client_queue = SizedQueue.new(@queue_size) + @consumer_group = Kafka::Group.new(options) + @logger.info('Registering kafka', :group_id => @group_id, :topic_id => @topic_id, :zk_connect => @zk_connect) + end # def register + + public + def run(logstash_queue) + java_import 'kafka.common.ConsumerRebalanceFailedException' + @logger.info('Running kafka', :group_id => @group_id, :topic_id => @topic_id, :zk_connect => @zk_connect) + begin + @consumer_group.run(@consumer_threads,@kafka_client_queue) + begin + while true + event = @kafka_client_queue.pop + queue_event("#{event}",logstash_queue) + end + rescue LogStash::ShutdownSignal + @logger.info('Kafka got shutdown signal') + @consumer_group.shutdown() + end + until @kafka_client_queue.empty? + queue_event("#{@kafka_client_queue.pop}",logstash_queue) + end + @logger.info('Done running kafka input') + rescue => e + @logger.warn('kafka client threw exception, restarting', + :exception => e) + if @consumer_group.running? + @consumer_group.shutdown() + end + sleep(Float(@consumer_restart_sleep_ms) * 1 / 1000) + retry + end + finished + end # def run + + private + def queue_event(msg, output_queue) + begin + @codec.decode(msg) do |event| + decorate(event) + if @decorate_events + event['kafka'] = {'msg_size' => msg.bytesize, 'topic' => @topic_id, 'consumer_group' => @group_id} + end + output_queue << event + end # @codec.decode + rescue => e # parse or event creation error + @logger.error("Failed to create event", :message => msg, :exception => e, + :backtrace => e.backtrace); + end # begin + end # def queue_event + +end #class LogStash::Inputs::Kafka diff --git a/lib/logstash/outputs/kafka.rb b/lib/logstash/outputs/kafka.rb new file mode 100644 index 00000000000..dac8c8ba57d --- /dev/null +++ b/lib/logstash/outputs/kafka.rb @@ -0,0 +1,159 @@ +require 'logstash/namespace' +require 'logstash/outputs/base' + +# Write events to a Kafka topic. This uses the Kafka Producer API to write messages to a topic on +# the broker. +# +# The only required configuration is the topic name. The default codec is json, +# so events will be persisted on the broker in json format. If you select a codec of plain, +# Logstash will encode your messages with not only the message but also with a timestamp and +# hostname. If you do not want anything but your message passing through, you should make the output +# configuration something like: +# output { +# kafka { +# codec => plain { +# format => "%{message}" +# } +# } +# } +# For more information see http://kafka.apache.org/documentation.html#theproducer +# +# Kafka producer configuration: http://kafka.apache.org/documentation.html#producerconfigs +class LogStash::Outputs::Kafka < LogStash::Outputs::Base + config_name 'kafka' + milestone 1 + + default :codec, 'json' + # This is for bootstrapping and the producer will only use it for getting metadata (topics, + # partitions and replicas). The socket connections for sending the actual data will be + # established based on the broker information returned in the metadata. The format is + # host1:port1,host2:port2, and the list can be a subset of brokers or a VIP pointing to a + # subset of brokers. + config :broker_list, :validate => :string, :default => 'localhost:9092' + # The topic to produce the messages to + config :topic_id, :validate => :string, :required => true + # This parameter allows you to specify the compression codec for all data generated by this + # producer. Valid values are "none", "gzip" and "snappy". + config :compression_codec, :validate => %w( none gzip snappy ), :default => 'none' + # This parameter allows you to set whether compression should be turned on for particular + # topics. If the compression codec is anything other than NoCompressionCodec, + # enable compression only for specified topics if any. If the list of compressed topics is + # empty, then enable the specified compression codec for all topics. If the compression codec + # is NoCompressionCodec, compression is disabled for all topics + config :compressed_topics, :validate => :string, :default => '' + # This value controls when a produce request is considered completed. Specifically, + # how many other brokers must have committed the data to their log and acknowledged this to the + # leader. For more info, see -- http://kafka.apache.org/documentation.html#producerconfigs + config :request_required_acks, :validate => [-1,0,1], :default => 0 + # The serializer class for messages. The default encoder takes a byte[] and returns the same byte[] + config :serializer_class, :validate => :string, :default => 'kafka.serializer.StringEncoder' + # The partitioner class for partitioning messages amongst partitions in the topic. The default + # partitioner is based on the hash of the key. If the key is null, + # the message is sent to a random partition in the broker. + # NOTE: topic_metadata_refresh_interval_ms controls how long the producer will distribute to a + # partition in the topic. This defaults to 10 mins, so the producer will continue to write to a + # single partition for 10 mins before it switches + config :partitioner_class, :validate => :string, :default => 'kafka.producer.DefaultPartitioner' + # The amount of time the broker will wait trying to meet the request.required.acks requirement + # before sending back an error to the client. + config :request_timeout_ms, :validate => :number, :default => 10000 + # This parameter specifies whether the messages are sent asynchronously in a background thread. + # Valid values are (1) async for asynchronous send and (2) sync for synchronous send. By + # setting the producer to async we allow batching together of requests (which is great for + # throughput) but open the possibility of a failure of the client machine dropping unsent data. + config :producer_type, :validate => %w( sync async ), :default => 'sync' + # The serializer class for keys (defaults to the same as for messages if nothing is given) + config :key_serializer_class, :validate => :string, :default => nil + # This property will cause the producer to automatically retry a failed send request. This + # property specifies the number of retries when such failures occur. Note that setting a + # non-zero value here can lead to duplicates in the case of network errors that cause a message + # to be sent but the acknowledgement to be lost. + config :message_send_max_retries, :validate => :number, :default => 3 + # Before each retry, the producer refreshes the metadata of relevant topics to see if a new + # leader has been elected. Since leader election takes a bit of time, + # this property specifies the amount of time that the producer waits before refreshing the + # metadata. + config :retry_backoff_ms, :validate => :number, :default => 100 + # The producer generally refreshes the topic metadata from brokers when there is a failure + # (partition missing, leader not available...). It will also poll regularly (default: every + # 10min so 600000ms). If you set this to a negative value, metadata will only get refreshed on + # failure. If you set this to zero, the metadata will get refreshed after each message sent + # (not recommended). Important note: the refresh happen only AFTER the message is sent, + # so if the producer never sends a message the metadata is never refreshed + config :topic_metadata_refresh_interval_ms, :validate => :number, :default => 600 * 1000 + # Maximum time to buffer data when using async mode. For example a setting of 100 will try to + # batch together 100ms of messages to send at once. This will improve throughput but adds + # message delivery latency due to the buffering. + config :queue_buffering_max_ms, :validate => :number, :default => 5000 + # The maximum number of unsent messages that can be queued up the producer when using async + # mode before either the producer must be blocked or data must be dropped. + config :queue_buffering_max_messages, :validate => :number, :default => 10000 + # The amount of time to block before dropping messages when running in async mode and the + # buffer has reached queue.buffering.max.messages. If set to 0 events will be enqueued + # immediately or dropped if the queue is full (the producer send call will never block). If set + # to -1 the producer will block indefinitely and never willingly drop a send. + config :queue_enqueue_timeout_ms, :validate => :number, :default => -1 + # The number of messages to send in one batch when using async mode. The producer will wait + # until either this number of messages are ready to send or queue.buffer.max.ms is reached. + config :batch_num_messages, :validate => :number, :default => 200 + # Socket write buffer size + config :send_buffer_bytes, :validate => :number, :default => 100 * 1024 + # The client id is a user-specified string sent in each request to help trace calls. It should + # logically identify the application making the request. + config :client_id, :validate => :string, :default => "" + + public + def register + jarpath = File.join(File.dirname(__FILE__), "../../../vendor/jar/kafka*/libs/*.jar") + Dir[jarpath].each do |jar| + require jar + end + require 'jruby-kafka' + options = { + :topic_id => @topic_id, + :broker_list => @broker_list, + :compression_codec => @compression_codec, + :compressed_topics => @compressed_topics, + :request_required_acks => @request_required_acks, + :serializer_class => @serializer_class, + :partitioner_class => @partitioner_class, + :request_timeout_ms => @request_timeout_ms, + :producer_type => @producer_type, + :key_serializer_class => @key_serializer_class, + :message_send_max_retries => @message_send_max_retries, + :retry_backoff_ms => @retry_backoff_ms, + :topic_metadata_refresh_interval_ms => @topic_metadata_refresh_interval_ms, + :queue_buffering_max_ms => @queue_buffering_max_ms, + :queue_buffering_max_messages => @queue_buffering_max_messages, + :queue_enqueue_timeout_ms => @queue_enqueue_timeout_ms, + :batch_num_messages => @batch_num_messages, + :send_buffer_bytes => @send_buffer_bytes, + :client_id => @client_id + } + @producer = Kafka::Producer.new(options) + @producer.connect() + + @logger.info('Registering kafka producer', :topic_id => @topic_id, :broker_list => @broker_list) + + @codec.on_event do |event| + begin + @producer.sendMsg(@topic_id,nil,event) + rescue LogStash::ShutdownSignal + @logger.info('Kafka producer got shutdown signal') + rescue => e + @logger.warn('kafka producer threw exception, restarting', + :exception => e) + end + end + end # def register + + def receive(event) + return unless output?(event) + if event == LogStash::SHUTDOWN + finished + return + end + @codec.encode(event) + end + +end #class LogStash::Outputs::Kafka diff --git a/logstash.gemspec b/logstash.gemspec index 1319c7ca351..2de6afb5f11 100644 --- a/logstash.gemspec +++ b/logstash.gemspec @@ -72,6 +72,7 @@ Gem::Specification.new do |gem| gem.add_runtime_dependency "jruby-openssl", "0.8.7" #(CPL/GPL/LGPL license) gem.add_runtime_dependency "msgpack-jruby" #(Apache 2.0 license) gem.add_runtime_dependency "jrjackson" #(Apache 2.0 license) + gem.add_runtime_dependency "jruby-kafka", [">=0.1.0"] #(Apache 2.0 license) else gem.add_runtime_dependency "excon" #(MIT license) gem.add_runtime_dependency "msgpack" #(Apache 2.0 license) diff --git a/spec/inputs/kafka.rb b/spec/inputs/kafka.rb new file mode 100644 index 00000000000..36a6389bd93 --- /dev/null +++ b/spec/inputs/kafka.rb @@ -0,0 +1,55 @@ +# encoding: utf-8 + +require 'rspec' +require 'insist' +require 'logstash/namespace' +require 'logstash/inputs/kafka' +require 'logstash/errors' + +describe LogStash::Inputs::Kafka do + extend LogStash::RSpec + + let (:kafka_config) {{"topic_id" => "test"}} + + it 'should populate kafka config with default values' do + kafka = LogStash::Inputs::Kafka.new(kafka_config) + insist {kafka.zk_connect} == "localhost:2181" + insist {kafka.topic_id} == "test" + insist {kafka.group_id} == "logstash" + insist {kafka.reset_beginning} == false + end + + it "should register and load kafka jars without errors" do + kafka = LogStash::Inputs::Kafka.new(kafka_config) + kafka.register + end + + it "should retrieve event from kafka" do + # Extend class to control behavior + class LogStash::Inputs::TestKafka < LogStash::Inputs::Kafka + milestone 1 + private + def queue_event(msg, output_queue) + super(msg, output_queue) + # need to raise exception here to stop the infinite loop + raise LogStash::ShutdownSignal + end + end + + kafka = LogStash::Inputs::TestKafka.new(kafka_config) + kafka.register + + class Kafka::Group + public + def run(a_numThreads, a_queue) + a_queue << "Kafka message" + end + end + + logstash_queue = Queue.new + kafka.run logstash_queue + e = logstash_queue.pop + insist { e["message"] } == "Kafka message" + insist { e["kafka"] } == {"msg_size"=>13, "topic"=>"test", "consumer_group"=>"logstash"} + end +end diff --git a/spec/outputs/kafka.rb b/spec/outputs/kafka.rb new file mode 100644 index 00000000000..0a87b974c54 --- /dev/null +++ b/spec/outputs/kafka.rb @@ -0,0 +1,39 @@ +# encoding: utf-8 + +require 'rspec' +require 'insist' +require 'logstash/namespace' +require "logstash/timestamp" +require 'logstash/outputs/kafka' + +describe LogStash::Outputs::Kafka do + + let (:kafka_config) {{"topic_id" => "test"}} + + it 'should populate kafka config with default values' do + kafka = LogStash::Outputs::Kafka.new(kafka_config) + insist {kafka.broker_list} == "localhost:9092" + insist {kafka.topic_id} == "test" + insist {kafka.compression_codec} == "none" + insist {kafka.serializer_class} == "kafka.serializer.StringEncoder" + insist {kafka.partitioner_class} == "kafka.producer.DefaultPartitioner" + insist {kafka.producer_type} == "sync" + end + + it "should register and load kafka jars without errors" do + kafka = LogStash::Outputs::Kafka.new(kafka_config) + kafka.register + end + + it "should send logstash event to kafka broker" do + timestamp = LogStash::Timestamp.now + expect_any_instance_of(Kafka::Producer) + .to receive(:sendMsg) + .with("test", nil, "{\"message\":\"hello world\",\"host\":\"test\",\"@timestamp\":\"#{timestamp}\",\"@version\":\"1\"}") + e = LogStash::Event.new({"message" => "hello world", "host" => "test", "@timestamp" => timestamp}) + kafka = LogStash::Outputs::Kafka.new(kafka_config) + kafka.register + kafka.receive(e) + end + +end diff --git a/tools/Gemfile.jruby-1.9.lock b/tools/Gemfile.jruby-1.9.lock index 3b82d07a247..1eb0966d178 100644 --- a/tools/Gemfile.jruby-1.9.lock +++ b/tools/Gemfile.jruby-1.9.lock @@ -73,6 +73,7 @@ GEM jls-lumberjack (0.0.20) jrjackson (0.2.7) jruby-httpclient (1.1.1-java) + jruby-kafka (0.1.0) jruby-openssl (0.8.7) bouncy-castle-java (>= 1.5.0147) json (1.8.1-java) @@ -203,6 +204,7 @@ DEPENDENCIES jls-lumberjack (>= 0.0.20) jrjackson jruby-httpclient + jruby-kafka (>= 0.1.0) jruby-openssl (= 0.8.7) kramdown mail From 39976a3c5c941011c8ca3124abbf78d3b06824b4 Mon Sep 17 00:00:00 2001 From: Joao Duarte Date: Thu, 4 Sep 2014 15:03:38 +0100 Subject: [PATCH 43/74] Prevents event.sprintf from converting floats to sci notation This allows sprintf to correctly print floats up to 15 decimal places, which should be enough for most. Without this, very big or very small floats will be converted to scientific notation. closes #1670 --- lib/logstash/event.rb | 12 ++++- lib/logstash/outputs/statsd.rb | 6 +-- spec/outputs/statsd.rb | 86 ++++++++++++++++++++++++++++++++++ 3 files changed, 100 insertions(+), 4 deletions(-) create mode 100644 spec/outputs/statsd.rb diff --git a/lib/logstash/event.rb b/lib/logstash/event.rb index f9806a09c7a..369e9829428 100644 --- a/lib/logstash/event.rb +++ b/lib/logstash/event.rb @@ -40,6 +40,11 @@ class DeprecatedMethod < StandardError; end TIMESTAMP_FAILURE_TAG = "_timestampparsefailure" TIMESTAMP_FAILURE_FIELD = "_@timestamp" + # Floats outside of these upper and lower bounds are forcibly converted + # to scientific notation by Float#to_s + MIN_FLOAT_BEFORE_SCI_NOT = 0.0001 + MAX_FLOAT_BEFORE_SCI_NOT = 1000000000000000.0 + public def initialize(data = {}) @logger = Cabin::Channel.get(LogStash) @@ -184,7 +189,12 @@ def remove(fieldref) # is an array (or hash?) should be. Join by comma? Something else? public def sprintf(format) - format = format.to_s + if format.is_a?(Float) and + (format < MIN_FLOAT_BEFORE_SCI_NOT or format >= MAX_FLOAT_BEFORE_SCI_NOT) then + format = ("%.15f" % format).sub(/0*$/,"") + else + format = format.to_s + end if format.index("%").nil? return format end diff --git a/lib/logstash/outputs/statsd.rb b/lib/logstash/outputs/statsd.rb index aaf3c556fb3..755ec43cf1c 100644 --- a/lib/logstash/outputs/statsd.rb +++ b/lib/logstash/outputs/statsd.rb @@ -95,11 +95,11 @@ def receive(event) end @count.each do |metric, val| @client.count(build_stat(event.sprintf(metric), sender), - event.sprintf(val).to_f, @sample_rate) + event.sprintf(val), @sample_rate) end @timing.each do |metric, val| @client.timing(build_stat(event.sprintf(metric), sender), - event.sprintf(val).to_f, @sample_rate) + event.sprintf(val), @sample_rate) end @set.each do |metric, val| @client.set(build_stat(event.sprintf(metric), sender), @@ -107,7 +107,7 @@ def receive(event) end @gauge.each do |metric, val| @client.gauge(build_stat(event.sprintf(metric), sender), - event.sprintf(val).to_f, @sample_rate) + event.sprintf(val), @sample_rate) end end # def receive diff --git a/spec/outputs/statsd.rb b/spec/outputs/statsd.rb new file mode 100644 index 00000000000..5bb0942600e --- /dev/null +++ b/spec/outputs/statsd.rb @@ -0,0 +1,86 @@ +require "test_utils" +require "logstash/outputs/statsd" +require "mocha/api" +require "socket" + +describe LogStash::Outputs::Statsd do + extend LogStash::RSpec + port = 4399 + udp_server = UDPSocket.new + udp_server.bind("127.0.0.1", port) + + describe "send metric to statsd" do + config <<-CONFIG + input { + generator { + message => "valid" + count => 1 + } + } + + output { + statsd { + host => "localhost" + sender => "spec" + port => #{port} + count => [ "test.valid", "0.1" ] + } + } + CONFIG + + agent do + metric, *data = udp_server.recvfrom(100) + insist { metric } == "logstash.spec.test.valid:0.1|c" + end + end + + describe "output a very small float" do + config <<-CONFIG + input { + generator { + message => "valid" + count => 1 + } + } + + output { + statsd { + host => "localhost" + sender => "spec" + port => #{port} + count => [ "test.valid", 0.000001 ] + } + } + CONFIG + + agent do + metric, *data = udp_server.recvfrom(100) + insist { metric } == "logstash.spec.test.valid:0.000001|c" + end + end + + describe "output a very big float" do + config <<-CONFIG + input { + generator { + message => "valid" + count => 1 + } + } + + output { + statsd { + host => "localhost" + sender => "spec" + port => #{port} + count => [ "test.valid", 9999999999999.01 ] + } + } + CONFIG + + agent do + metric, *data = udp_server.recvfrom(100) + insist { metric } == "logstash.spec.test.valid:9999999999999.01|c" + end + end +end From cff7c37603c7ddabd732856bc6b92e61c9dc3ae9 Mon Sep 17 00:00:00 2001 From: Suyog Rao Date: Thu, 4 Sep 2014 11:52:50 -0700 Subject: [PATCH 44/74] Dont add kafka metadata by default Closes #1691 --- lib/logstash/inputs/kafka.rb | 3 ++- spec/inputs/kafka.rb | 4 +++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/lib/logstash/inputs/kafka.rb b/lib/logstash/inputs/kafka.rb index 277fce3668e..14b9e1b960c 100644 --- a/lib/logstash/inputs/kafka.rb +++ b/lib/logstash/inputs/kafka.rb @@ -56,7 +56,8 @@ class LogStash::Inputs::Kafka < LogStash::Inputs::Base config :consumer_restart_on_error, :validate => :boolean, :default => true # Time in millis to wait for consumer to restart after an error config :consumer_restart_sleep_ms, :validate => :number, :default => 0 - config :decorate_events, :validate => :boolean, :default => true + # Option to add Kafka metadata like topic, message size to the event + config :decorate_events, :validate => :boolean, :default => false # A unique id for the consumer; generated automatically if not set. config :consumer_id, :validate => :string, :default => nil # The number of byes of messages to attempt to fetch for each topic-partition in each fetch diff --git a/spec/inputs/kafka.rb b/spec/inputs/kafka.rb index 36a6389bd93..6950a80f871 100644 --- a/spec/inputs/kafka.rb +++ b/spec/inputs/kafka.rb @@ -50,6 +50,8 @@ def run(a_numThreads, a_queue) kafka.run logstash_queue e = logstash_queue.pop insist { e["message"] } == "Kafka message" - insist { e["kafka"] } == {"msg_size"=>13, "topic"=>"test", "consumer_group"=>"logstash"} + # no metadata by default + insist { e["kafka"] } == nil end + end From 4211522de1cf996891c6b93f93ba1ab4e408d587 Mon Sep 17 00:00:00 2001 From: Jordan Sissel Date: Sat, 12 Apr 2014 13:50:40 -0700 Subject: [PATCH 45/74] Add recurse method for doing breadth-first traversal of the AST This will be used by the filter flush compiler Add generation of a flush lambda for each filter. This allows filters to flush and have any generated events proceed downward through the config as you would expect, respecting any branches or future plugins. Die on IOError which occurs when reading from a closed STDIN Make filter_flusher invoke the new (and correct, I hope!) way to flush. - On shutdown, we will also flush all filters. - The flusher thread will terminate if we are shutting down. Clarify the comment Fix comment generation in the code to avoid newlines. Add 'max_age' setting to multiline for flushing. This setting chooses how long (in seconds) an event is considered to be fresh before it will be automatically flushed. This is useful for: * slow log sources to get the 'last event' flushed, * transaction-id-style events that have no obvious "end" event and also are mixed among other-id events in the same stream. Also: - Make filters have no teardown by default. - Remove 'enable_flush' since it is not needed anymore; flush is always enabled. refactor flush new spool filter and specs, mainly for testing flushing turn off unrelated test error for now fix the flush logic, fix the flush compiles code to not include output section synchronize cross-thread access to @pending refactor for performance and readability synchronize cross-thread access to @spool unused code input:udp removed boggus ShutdownSignal handling, morphed loop do into while true, cosmetic reformat use transcient events and not exceptions for in-flow pipeline signaling inline flushing into filterworker removed now unnecessary flushing thread safety fix conditionals bug for new events generated by filters & specs spec for issue #793 performance tweeks simplify filter handling of events and new_events this removes unecessary duplication when treating the original event as a special case (different from new_events generated by a filter). Also, since @filter_func only outputs non-cancelled events, some checks were also removed. Move multiple filter specs to a filter_chains file append events generated by a filter using unshift instead of insert closes #793, closes #1429, closes #1431, closes #1548 --- lib/logstash/config/config_ast.rb | 202 ++++++++++++++++++++++-------- lib/logstash/event.rb | 8 ++ lib/logstash/filters/base.rb | 23 ++-- lib/logstash/filters/multiline.rb | 193 ++++++++++++++++------------ lib/logstash/filters/split.rb | 4 +- lib/logstash/filters/spool.rb | 32 +++++ lib/logstash/filterworker.rb | 122 ------------------ lib/logstash/inputs/stdin.rb | 2 +- lib/logstash/inputs/udp.rb | 43 +++---- lib/logstash/pipeline.rb | 97 +++++++------- spec/conditionals/test.rb | 40 +++++- spec/filters/clone.rb | 2 + spec/filters/filter_chains.rb | 122 ++++++++++++++++++ spec/filters/multiline.rb | 5 +- spec/filters/split.rb | 1 + spec/filters/spool.rb | 89 +++++++++++++ spec/test_utils.rb | 16 +-- 17 files changed, 648 insertions(+), 353 deletions(-) create mode 100644 lib/logstash/filters/spool.rb delete mode 100644 lib/logstash/filterworker.rb create mode 100644 spec/filters/filter_chains.rb create mode 100644 spec/filters/spool.rb diff --git a/lib/logstash/config/config_ast.rb b/lib/logstash/config/config_ast.rb index f5e6eaf9af7..a88bf5f5915 100644 --- a/lib/logstash/config/config_ast.rb +++ b/lib/logstash/config/config_ast.rb @@ -6,6 +6,15 @@ def compile return elements.collect(&:compile).reject(&:empty?).join("") end + # Traverse the syntax tree recursively. + # The order should respect the order of the configuration file as it is read + # and written by humans (and the order in which it is parsed). + def recurse(e, depth=0, &block) + r = block.call(e, depth) + e.elements.each { |e| recurse(e, depth + 1, &block) } if r && e.elements + nil + end + def recursive_inject(results=[], &block) if !elements.nil? elements.each do |element| @@ -39,29 +48,34 @@ def recursive_select_parent(results=[], klass) end end -module LogStash; module Config; module AST +module LogStash; module Config; module AST class Node < Treetop::Runtime::SyntaxNode; end class Config < Node def compile - # TODO(sissel): Move this into config/config_ast.rb code = [] - code << "@inputs = []" - code << "@filters = []" - code << "@outputs = []" + + code << <<-CODE + @inputs = [] + @filters = [] + @outputs = [] + @periodic_flushers = [] + @shutdown_flushers = [] + CODE + sections = recursive_select(LogStash::Config::AST::PluginSection) sections.each do |s| code << s.compile_initializer end # start inputs - #code << "class << self" definitions = [] - + ["filter", "output"].each do |type| - #definitions << "def #{type}(event)" + # defines @filter_func and @output_func + definitions << "@#{type}_func = lambda do |event, &block|" if type == "filter" - definitions << " extra_events = []" + definitions << " events = [event]" end definitions << " @logger.debug? && @logger.debug(\"#{type} received\", :event => event.to_hash)" @@ -70,13 +84,12 @@ def compile end if type == "filter" - definitions << " extra_events.each(&block)" + definitions << " events.flatten.each{|e| block.call(e) }" end definitions << "end" end code += definitions.join("\n").split("\n", -1).collect { |l| " #{l}" } - #code << "end" return code.join("\n") end end @@ -84,14 +97,52 @@ def compile class Comment < Node; end class Whitespace < Node; end class PluginSection < Node + # Global plugin numbering for the janky instance variable naming we use + # like @filter__1 @@i = 0 + # Generate ruby code to initialize all the plugins. def compile_initializer generate_variables code = [] - @variables.collect do |plugin, name| - code << "#{name} = #{plugin.compile_initializer}" - code << "@#{plugin.plugin_type}s << #{name}" + @variables.each do |plugin, name| + + + code << <<-CODE + #{name} = #{plugin.compile_initializer} + @#{plugin.plugin_type}s << #{name} + CODE + + # The flush method for this filter. + if plugin.plugin_type == "filter" + + code << <<-CODE + #{name}_flush = lambda do |options, &block| + @logger.debug? && @logger.debug(\"Flushing\", :plugin => #{name}) + + flushed_events = #{name}.flush(options) + + return if flushed_events.nil? || flushed_events.empty? + + flushed_events.each do |event| + @logger.debug? && @logger.debug(\"Flushing\", :plugin => #{name}, :event => event) + + events = [event] + #{plugin.compile_starting_here.gsub(/^/, " ")} + + block.call(event) + events.flatten.each{|e| block.call(e) if e != event} + end + + end + + if #{name}.respond_to?(:flush) + @periodic_flushers << #{name}_flush if #{name}.periodic_flush + @shutdown_flushers << #{name}_flush + end + CODE + + end end return code.join("\n") end @@ -151,38 +202,69 @@ def compile_initializer def compile case plugin_type - when "input" - return "start_input(#{variable_name})" - when "filter" - # This is some pretty stupid code, honestly. - # I'd prefer much if it were put into the Pipeline itself - # and this should simply compile to - # #{variable_name}.filter(event) - return [ - "newevents = []", - "extra_events.each do |event|", - " #{variable_name}.filter(event) do |newevent|", - " newevents << newevent", - " end", - "end", - "extra_events += newevents", - - "#{variable_name}.filter(event) do |newevent|", - " extra_events << newevent", - "end", - "if event.cancelled?", - " extra_events.each(&block)", - " return", - "end", - ].map { |l| "#{l}\n" }.join("") - when "output" - return "#{variable_name}.handle(event)\n" - when "codec" - settings = attributes.recursive_select(Attribute).collect(&:compile).reject(&:empty?) - attributes_code = "LogStash::Util.hash_merge_many(#{settings.map { |c| "{ #{c} }" }.join(", ")})" - return "plugin(#{plugin_type.inspect}, #{plugin_name.inspect}, #{attributes_code})" + when "input" + return "start_input(#{variable_name})" + when "filter" + return <<-CODE + events = events.flat_map do |event| + next [] if event.cancelled? + + new_events = [] + #{variable_name}.filter(event){|new_event| new_events << new_event} + event.cancelled? ? new_events : new_events.unshift(event) + end + CODE + when "output" + return "#{variable_name}.handle(event)\n" + when "codec" + settings = attributes.recursive_select(Attribute).collect(&:compile).reject(&:empty?) + attributes_code = "LogStash::Util.hash_merge_many(#{settings.map { |c| "{ #{c} }" }.join(", ")})" + return "plugin(#{plugin_type.inspect}, #{plugin_name.inspect}, #{attributes_code})" end end + + def compile_starting_here + return unless plugin_type == "filter" # only filter supported. + + expressions = [ + LogStash::Config::AST::Branch, + LogStash::Config::AST::Plugin + ] + code = [] + + # Find the branch we are in, if any (the 'if' statement, etc) + self_branch = recursive_select_parent(LogStash::Config::AST::BranchEntry).first + + # Find any siblings to our branch so we can skip them later. For example, + # if we are in an 'else if' we want to skip any sibling 'else if' or + # 'else' blocks. + branch_siblings = [] + if self_branch + branch_siblings = recursive_select_parent(LogStash::Config::AST::Branch).first \ + .recursive_select(LogStash::Config::AST::BranchEntry) \ + .reject { |b| b == self_branch } + end + + #ast = recursive_select_parent(LogStash::Config::AST::PluginSection).first + ast = recursive_select_parent(LogStash::Config::AST::Config).first + + found = false + recurse(ast) do |element, depth| + next false if element.is_a?(LogStash::Config::AST::PluginSection) && element.plugin_type.text_value != "filter" + if element == self + found = true + next false + end + if found && expressions.include?(element.class) + code << element.compile + next false + end + next false if branch_siblings.include?(element) + next true + end + + return code.collect { |l| "#{l}\n" }.join("") + end # def compile_starting_here end class Name < Node @@ -200,7 +282,7 @@ class Value < RValue; end module Unicode def self.wrap(text) - return "(" + text.inspect + ".force_encoding(\"UTF-8\")" + ")" + return "(" + text.inspect + ".force_encoding(Encoding::UTF_8)" + ")" end end @@ -245,24 +327,40 @@ class BranchOrPlugin < Node; end class Branch < Node def compile - return super + "end\n" + + # this construct is non obvious. we need to loop through each event and apply the conditional. + # each branch of a conditional will contain a construct (a filter for example) that also loops through + # the events variable so we have to initialize it to [event] for the branch code. + # at the end, events is returned to handle the case where no branch match and no branch code is executed + # so we must make sure to return the current event. + + return <<-CODE + events = events.flat_map do |event| + events = [event] + #{super} + end + events + end + CODE end end - class If < Node + + class BranchEntry < Node; end + class If < BranchEntry def compile children = recursive_inject { |e| e.is_a?(Branch) || e.is_a?(Plugin) } - return "if #{condition.compile}\n" \ + return "if #{condition.compile} # if #{condition.text_value}\n" \ << children.collect(&:compile).map { |s| s.split("\n", -1).map { |l| " " + l }.join("\n") }.join("") << "\n" end end - class Elsif < Node + class Elsif < BranchEntry def compile children = recursive_inject { |e| e.is_a?(Branch) || e.is_a?(Plugin) } - return "elsif #{condition.compile}\n" \ + return "elsif #{condition.compile} # else if #{condition.text_value}\n" \ << children.collect(&:compile).map { |s| s.split("\n", -1).map { |l| " " + l }.join("\n") }.join("") << "\n" end end - class Else < Node + class Else < BranchEntry def compile children = recursive_inject { |e| e.is_a?(Branch) || e.is_a?(Plugin) } return "else\n" \ @@ -325,7 +423,7 @@ def compile end end - module ComparisonOperator + module ComparisonOperator def compile return " #{text_value} " end diff --git a/lib/logstash/event.rb b/lib/logstash/event.rb index 369e9829428..5c3dd35d899 100644 --- a/lib/logstash/event.rb +++ b/lib/logstash/event.rb @@ -8,6 +8,14 @@ require "logstash/timestamp" require "logstash/json" +# transcient pipeline events for normal in-flow signaling as opposed to +# flow altering exceptions. for now having base classes is adequate and +# in the future it might be necessary to refactor using like a BaseEvent +# class to have a common interface for all pileline events to support +# eventual queueing persistence for example, TBD. +class LogStash::ShutdownEvent; end +class LogStash::FlushEvent; end + # the logstash event object. # # An event is simply a tuple of (timestamp, data). diff --git a/lib/logstash/filters/base.rb b/lib/logstash/filters/base.rb index 652c31b8412..d35af57b725 100644 --- a/lib/logstash/filters/base.rb +++ b/lib/logstash/filters/base.rb @@ -59,7 +59,7 @@ class LogStash::Filters::Base < LogStash::Plugin # } # # # You can also remove multiple tags at once: - # + # # filter { # %PLUGIN% { # remove_tag => [ "foo_%{somefield}", "sad_unwanted_tag"] @@ -68,7 +68,7 @@ class LogStash::Filters::Base < LogStash::Plugin # # If the event has field "somefield" == "hello" this filter, on success, # would remove the tag "foo_hello" if it is present. The second example - # would remove a sad, unwanted tag as well. + # would remove a sad, unwanted tag as well. config :remove_tag, :validate => :array, :default => [] # If this filter is successful, add any arbitrary fields to this event. @@ -85,7 +85,7 @@ class LogStash::Filters::Base < LogStash::Plugin # # filter { # %PLUGIN% { - # add_field => { + # add_field => { # "foo_%{somefield}" => "Hello world, from %{host}" # "new_field" => "new_static_value" # } @@ -95,7 +95,7 @@ class LogStash::Filters::Base < LogStash::Plugin # If the event has field "somefield" == "hello" this filter, on success, # would add field "foo_hello" if it is present, with the # value above and the %{host} piece replaced with that value from the - # event. The second example would also add a hardcoded field. + # event. The second example would also add a hardcoded field. config :add_field, :validate => :hash, :default => {} # If this filter is successful, remove arbitrary fields from this event. @@ -117,10 +117,14 @@ class LogStash::Filters::Base < LogStash::Plugin # } # # If the event has field "somefield" == "hello" this filter, on success, - # would remove the field with name "foo_hello" if it is present. The second + # would remove the field with name "foo_hello" if it is present. The second # example would remove an additional, non-dynamic field. config :remove_field, :validate => :array, :default => [] + # Call the filter flush method at regular interval. + # Optional. + config :periodic_flush, :validate => :boolean, :default => false + RESERVED = ["type", "tags", "exclude_tags", "include_fields", "exclude_fields", "add_tag", "remove_tag", "add_field", "remove_field", "include_any", "exclude_any"] public @@ -169,11 +173,11 @@ def filter_matched(event) :field => field, :value => value) end end - + @remove_field.each do |field| field = event.sprintf(field) @logger.debug? and @logger.debug("filters/#{self.class.name}: removing field", - :field => field) + :field => field) event.remove(field) end @@ -223,4 +227,9 @@ def filter?(event) return true end + + public + def teardown + # Nothing to do by default. + end end # class LogStash::Filters::Base diff --git a/lib/logstash/filters/multiline.rb b/lib/logstash/filters/multiline.rb index 6a22f5e550b..a4d0b6298b0 100644 --- a/lib/logstash/filters/multiline.rb +++ b/lib/logstash/filters/multiline.rb @@ -100,21 +100,31 @@ class LogStash::Filters::Multiline < LogStash::Filters::Base # NUMBER \d+ config :patterns_dir, :validate => :array, :default => [] - # for debugging & testing purposes, do not use in production. allows periodic flushing of pending events - config :enable_flush, :validate => :boolean, :default => false + # The maximum age an event can be (in seconds) before it is automatically + # flushed. + config :max_age, :validate => :number, :default => 5 + + # Call the filter flush method at regular interval. + # Optional. + config :periodic_flush, :validate => :boolean, :default => true + # Detect if we are running from a jarfile, pick the right path. @@patterns_path = Set.new @@patterns_path += [LogStash::Environment.pattern_path("*")] + MULTILINE_TAG = "multiline" + public def initialize(config = {}) super + # this filter cannot be parallelized because message order + # cannot be garanteed across threads, line #2 could be processed + # before line #1 @threadsafe = false - # This filter needs to keep state. - @types = Hash.new { |h,k| h[k] = [] } + # this filter needs to keep state @pending = Hash.new end # def initialize @@ -138,6 +148,16 @@ def register @grok.compile(@pattern) + case @what + when "previous" + class << self; alias_method :multiline_filter!, :previous_filter!; end + when "next" + class << self; alias_method :multiline_filter!, :next_filter!; end + else + # we should never get here since @what is validated at config + raise(ArgumentError, "Unknown multiline 'what' value") + end # case @what + @logger.debug("Registered multiline plugin", :type => @type, :config => @config) end # def register @@ -145,95 +165,112 @@ def register def filter(event) return unless filter?(event) - if event["message"].is_a?(Array) - match = @grok.match(event["message"].first) - else - match = @grok.match(event["message"]) - end - key = event.sprintf(@stream_identity) - pending = @pending[key] - - @logger.debug("Multiline", :pattern => @pattern, :message => event["message"], - :match => match, :negate => @negate) + match = event["message"].is_a?(Array) ? @grok.match(event["message"].first) : @grok.match(event["message"]) + match = (match and !@negate) || (!match and @negate) # add negate option - # Add negate option - match = (match and !@negate) || (!match and @negate) + @logger.debug? && @logger.debug("Multiline", :pattern => @pattern, :message => event["message"], :match => match, :negate => @negate) - case @what - when "previous" - if match - event.tag "multiline" - # previous previous line is part of this event. - # append it to the event and cancel it - if pending - pending.append(event) - else - @pending[key] = event - end - event.cancel - else - # this line is not part of the previous event - # if we have a pending event, it's done, send it. - # put the current event into pending - if pending - tmp = event.to_hash - event.overwrite(pending) - @pending[key] = LogStash::Event.new(tmp) - else - @pending[key] = event - event.cancel - end # if/else pending - end # if/else match - when "next" - if match - event.tag "multiline" - # this line is part of a multiline event, the next - # line will be part, too, put it into pending. - if pending - pending.append(event) - else - @pending[key] = event - end - event.cancel - else - # if we have something in pending, join it with this message - # and send it. otherwise, this is a new message and not part of - # multiline, send it. - if pending - pending.append(event) - event.overwrite(pending) - @pending.delete(key) - end - end # if/else match - else - # TODO(sissel): Make this part of the 'register' method. - @logger.warn("Unknown multiline 'what' value.", :what => @what) - end # case @what + multiline_filter!(event, match) - if !event.cancelled? + unless event.cancelled? collapse_event!(event) filter_matched(event) if match end end # def filter - # Flush any pending messages. This is generally used for unit testing only. - # - # Note: flush is disabled now; it is preferable to use the multiline codec. + # flush any pending messages + # called at regular interval without options and at pipeline shutdown with the :final => true option + # @param options [Hash] + # @option options [Boolean] :final => true to signal a final shutdown flush + # @return [Array] list of flushed events public - def flush - return [] unless @enable_flush + def flush(options = {}) + expired = nil + + # note that thread safety concerns are not necessary here because the multiline filter + # is not thread safe thus cannot be run in multiple folterworker threads and flushing + # is called by the same thread - events = [] - @pending.each do |key, value| - value.uncancel - events << collapse_event!(value) + # select all expired events from the @pending hash into a new expired hash + # if :final flush then select all events + expired = @pending.inject({}) do |r, (key, event)| + age = Time.now - Array(event["@timestamp"]).first.time + r[key] = event if (age >= @max_age) || options[:final] + r end - @pending.clear - return events + + # delete expired items from @pending hash + expired.each{|key, event| @pending.delete(key)} + + # return list of uncancelled and collapsed expired events + expired.map{|key, event| event.uncancel; collapse_event!(event)} end # def flush + public + def teardown + # nothing to do + end + private + def previous_filter!(event, match) + key = event.sprintf(@stream_identity) + + pending = @pending[key] + + if match + event.tag(MULTILINE_TAG) + # previous previous line is part of this event. + # append it to the event and cancel it + if pending + pending.append(event) + else + @pending[key] = event + end + event.cancel + else + # this line is not part of the previous event + # if we have a pending event, it's done, send it. + # put the current event into pending + if pending + tmp = event.to_hash + event.overwrite(pending) + @pending[key] = LogStash::Event.new(tmp) + else + @pending[key] = event + event.cancel + end + end # if match + end + + def next_filter!(event, match) + key = event.sprintf(@stream_identity) + + # protect @pending for race condition between the flush thread and the worker thread + pending = @pending[key] + + if match + event.tag(MULTILINE_TAG) + # this line is part of a multiline event, the next + # line will be part, too, put it into pending. + if pending + pending.append(event) + else + @pending[key] = event + end + event.cancel + else + # if we have something in pending, join it with this message + # and send it. otherwise, this is a new message and not part of + # multiline, send it. + if pending + pending.append(event) + event.overwrite(pending) + @pending.delete(key) + end + end # if match + end + def collapse_event!(event) event["message"] = event["message"].join("\n") if event["message"].is_a?(Array) event.timestamp = event.timestamp.first if event.timestamp.is_a?(Array) diff --git a/lib/logstash/filters/split.rb b/lib/logstash/filters/split.rb index 3524f0d6fa6..77ce1651255 100644 --- a/lib/logstash/filters/split.rb +++ b/lib/logstash/filters/split.rb @@ -8,7 +8,7 @@ # which emits one event for the whole output of a command and splitting that # output by newline - making each line an event. # -# The end result of each split is a complete copy of the event +# The end result of each split is a complete copy of the event # with only the current split section of the given field changed. class LogStash::Filters::Split < LogStash::Filters::Base @@ -31,8 +31,6 @@ def register def filter(event) return unless filter?(event) - events = [] - original_value = event[@field] # If for some reason the field is an array of values, take the first only. diff --git a/lib/logstash/filters/spool.rb b/lib/logstash/filters/spool.rb new file mode 100644 index 00000000000..e9c399aadc9 --- /dev/null +++ b/lib/logstash/filters/spool.rb @@ -0,0 +1,32 @@ +# encoding: utf-8 +require "logstash/filters/base" +require "logstash/namespace" +require "thread" + +# spool filter. this is used generally for internal/dev testing. +class LogStash::Filters::Spool < LogStash::Filters::Base + config_name "spool" + milestone 1 + + def register + @spool = [] + @spool_lock = Mutex.new # to synchronize between the flush & worker threads + end # def register + + def filter(event) + return unless filter?(event) + + filter_matched(event) + event.cancel + @spool_lock.synchronize {@spool << event} + end # def filter + + def flush(options = {}) + @spool_lock.synchronize do + flushed = @spool.map{|event| event.uncancel; event} + @spool = [] + flushed + end + end + +end # class LogStash::Filters::NOOP diff --git a/lib/logstash/filterworker.rb b/lib/logstash/filterworker.rb deleted file mode 100644 index 59a886c3c83..00000000000 --- a/lib/logstash/filterworker.rb +++ /dev/null @@ -1,122 +0,0 @@ -# encoding: utf-8 -require "logstash/namespace" -require "logstash/logging" -require "logstash/plugin" -require "logstash/config/mixin" -require "stud/interval" - -# TODO(sissel): Should this really be a 'plugin' ? -class LogStash::FilterWorker < LogStash::Plugin - include Stud - attr_accessor :logger - attr_accessor :filters - attr_reader :after_filter - - Exceptions = [Exception] - Exceptions << java.lang.Exception if RUBY_ENGINE == "jruby" - - def initialize(filters, input_queue, output_queue) - @filters = filters - @input_queue = input_queue - @output_queue = output_queue - @shutdown_requested = false - end # def initialize - - #This block is called after each filter is done on an event. - #The filtered event and filter class name is passed to the block. - #This could be used to add metrics in the future? - def after_filter(&block) - @after_filter = block - end - - def run - # TODO(sissel): Run a flusher thread for each plugin requesting flushes - # > It seems reasonable that you could want a multiline filter to flush - # after 5 seconds, but want a metrics filter to flush every 10 or 60. - - # Set up the periodic flusher thread. - @flusher = Thread.new { interval(5) { flusher } } - - while !@shutdown_requested && event = @input_queue.pop - if event == LogStash::SHUTDOWN - finished - @input_queue << LogStash::SHUTDOWN # for the next filter thread - return - end - - filter(event) - end # while @input_queue.pop - finished - end # def run - - def flusher - events = [] - @filters.each do |filter| - - # Filter any events generated so far in this flush. - events.each do |event| - # TODO(sissel): watchdog on flush filtration? - unless event.cancelled? - filter.filter(event) - @after_filter.call(event,filter) unless @after_filter.nil? - end - end - - # TODO(sissel): watchdog on flushes? - if filter.respond_to?(:flush) - flushed = filter.flush - events += flushed if !flushed.nil? && flushed.any? - end - end - - events.each do |event| - @logger.debug? and @logger.debug("Pushing flushed events", :event => event) - @output_queue.push(event) unless event.cancelled? - end - end # def flusher - - def teardown - @shutdown_requested = true - end - - def filter(original_event) - # Make an 'events' array that filters can push onto if they - # need to generate additional events based on the current event. - # The 'split' filter does this, for example. - events = [original_event] - - events.each do |event| - @filters.each do |filter| - # Filter can emit multiple events, like the 'split' event, so - # give the input queue to dump generated events into. - - # TODO(sissel): This may require some refactoring later, I am not sure - # this is the best approach. The goal is to allow filters to modify - # the current event, but if necessary, create new events based on - # this event. - begin - update_watchdog(:event => event, :filter => filter) - filter.execute(event) do |newevent| - events << newevent - end - rescue *Exceptions => e - @logger.warn("Exception during filter", :event => event, - :exception => $!, :backtrace => e.backtrace, - :filter => filter) - ensure - clear_watchdog - end - if event.cancelled? - @logger.debug? and @logger.debug("Event cancelled", :event => event, - :filter => filter.class) - break - end - @after_filter.call(event,filter) unless @after_filter.nil? - end # @filters.each - - @logger.debug? and @logger.debug("Event finished filtering", :event => event, - :thread => Thread.current[:name]) - @output_queue.push(event) unless event.cancelled? - end # events.each - end # def filter -end # class LogStash::FilterWorker diff --git a/lib/logstash/inputs/stdin.rb b/lib/logstash/inputs/stdin.rb index f09df42358f..210a2300f53 100644 --- a/lib/logstash/inputs/stdin.rb +++ b/lib/logstash/inputs/stdin.rb @@ -30,7 +30,7 @@ def run(queue) event["host"] = @host if !event.include?("host") queue << event end - rescue EOFError, LogStash::ShutdownSignal + rescue IOError, EOFError, LogStash::ShutdownSignal # stdin closed or a requested shutdown break end diff --git a/lib/logstash/inputs/udp.rb b/lib/logstash/inputs/udp.rb index f4224655a36..ba5148c43d4 100644 --- a/lib/logstash/inputs/udp.rb +++ b/lib/logstash/inputs/udp.rb @@ -5,7 +5,7 @@ require "socket" # Read messages as events over the network via udp. The only required -# configuration item is `port`, which specifies the udp port logstash +# configuration item is `port`, which specifies the udp port logstash # will listen on for event streams. # class LogStash::Inputs::Udp < LogStash::Inputs::Base @@ -23,10 +23,10 @@ class LogStash::Inputs::Udp < LogStash::Inputs::Base # The maximum packet size to read from the network config :buffer_size, :validate => :number, :default => 8192 - + # Number of threads processing packets config :workers, :validate => :number, :default => 2 - + # This is the number of unprocessed UDP packets you can hold in memory # before packets will start dropping. config :queue_size, :validate => :number, :default => 2000 @@ -44,7 +44,7 @@ def register public def run(output_queue) - @output_queue = output_queue + @output_queue = output_queue begin # udp server udp_listener(output_queue) @@ -68,17 +68,17 @@ def udp_listener(output_queue) @udp = UDPSocket.new(Socket::AF_INET) @udp.bind(@host, @port) - @input_to_worker = SizedQueue.new(@queue_size) + @input_to_worker = SizedQueue.new(@queue_size) - @input_workers = @workers.times do |i| - @logger.debug("Starting UDP worker thread", :worker => i) - Thread.new { inputworker(i) } - end - - loop do - #collect datagram message and add to queue + @input_workers = @workers.times do |i| + @logger.debug("Starting UDP worker thread", :worker => i) + Thread.new { inputworker(i) } + end + + while true + #collect datagram message and add to queue payload, client = @udp.recvfrom(@buffer_size) - @input_to_worker.push([payload,client]) + @input_to_worker.push([payload, client]) end ensure if @udp @@ -86,29 +86,24 @@ def udp_listener(output_queue) @udp.close_write rescue nil end end # def udp_listener - + def inputworker(number) LogStash::Util::set_thread_name(" e @logger.error("Exception in inputworker", "exception" => e, "backtrace" => e.backtrace) end end # def inputworker - + public def teardown @udp.close if @udp && !@udp.closed? diff --git a/lib/logstash/pipeline.rb b/lib/logstash/pipeline.rb index 8ed9c7b5a52..8811e1e5dc4 100644 --- a/lib/logstash/pipeline.rb +++ b/lib/logstash/pipeline.rb @@ -1,14 +1,18 @@ # encoding: utf-8 -require "logstash/config/file" +require "thread" # +require "stud/interval" require "logstash/namespace" -require "thread" # stdlib +require "logstash/errors" +require "logstash/event" +require "logstash/config/file" require "logstash/filters/base" require "logstash/inputs/base" require "logstash/outputs/base" -require "logstash/errors" -require "stud/interval" # gem stud class LogStash::Pipeline + + FLUSH_EVENT = LogStash::FlushEvent.new + def initialize(configstr) @logger = Cabin::Channel.get(LogStash) grammar = LogStashConfigParser.new @@ -69,6 +73,7 @@ def filters? def run @started = true @input_threads = [] + start_inputs start_filters if filters? start_outputs @@ -78,11 +83,12 @@ def run @logger.info("Pipeline started") wait_inputs - # In theory there's nothing to do to filters to tell them to shutdown? if filters? shutdown_filters wait_filters + flush_filters_to_output!(:final => true) end + shutdown_outputs wait_outputs @@ -103,7 +109,8 @@ def wait_inputs end def shutdown_filters - @input_to_filter.push(LogStash::ShutdownSignal) + @flusher_lock.synchronize { @flusher_thread.kill } + @input_to_filter.push(LogStash::ShutdownEvent.new) end def wait_filters @@ -112,7 +119,7 @@ def wait_filters def shutdown_outputs # nothing, filters will do this - @filter_to_output.push(LogStash::ShutdownSignal) + @filter_to_output.push(LogStash::ShutdownEvent.new) end def wait_outputs @@ -143,11 +150,12 @@ def start_filters Thread.new { filterworker } end - # Set up the periodic flusher thread. - @flusher_thread = Thread.new { Stud.interval(5) { filter_flusher } } + @flusher_lock = Mutex.new + @flusher_thread = Thread.new { Stud.interval(5) { @flusher_lock.synchronize { @input_to_filter.push(FLUSH_EVENT) } } } end def start_outputs + @outputs.each(&:register) @output_threads = [ Thread.new { outputworker } ] @@ -189,24 +197,23 @@ def filterworker begin while true event = @input_to_filter.pop - if event == LogStash::ShutdownSignal + + case event + when LogStash::Event + # use events array to guarantee ordering of origin vs created events + # where created events are emitted by filters like split or metrics + events = [] + filter(event) { |newevent| events << newevent } + events.each { |event| @filter_to_output.push(event) } + when LogStash::FlushEvent + # handle filter flushing here so that non threadsafe filters (thus only running one filterworker) + # don't have to deal with thread safety implementing the flush method + @flusher_lock.synchronize { flush_filters_to_output! } + when LogStash::ShutdownEvent + # pass it down to any other filterworker and stop this worker @input_to_filter.push(event) break end - - - # TODO(sissel): we can avoid the extra array creation here - # if we don't guarantee ordering of origin vs created events. - # - origin event is one that comes in naturally to the filter worker. - # - created events are emitted by filters like split or metrics - events = [event] - filter(event) do |newevent| - events << newevent - end - events.each do |event| - next if event.cancelled? - @filter_to_output.push(event) - end end rescue => e @logger.error("Exception in filterworker", "exception" => e, "backtrace" => e.backtrace) @@ -217,11 +224,10 @@ def filterworker def outputworker LogStash::Util::set_thread_name(">output") - @outputs.each(&:register) @outputs.each(&:worker_setup) while true event = @filter_to_output.pop - break if event == LogStash::ShutdownSignal + break if event.is_a?(LogStash::ShutdownEvent) output(event) end # while true @outputs.each(&:teardown) @@ -248,7 +254,7 @@ def shutdown end end - # No need to send the ShutdownSignal to the filters/outputs nor to wait for + # No need to send the ShutdownEvent to the filters/outputs nor to wait for # the inputs to finish, because in the #run method we wait for that anyway. end # def shutdown @@ -266,28 +272,27 @@ def output(event) @output_func.call(event) end - def filter_flusher - events = [] - @filters.each do |filter| + # perform filters flush and yeild flushed event to the passed block + # @param options [Hash] + # @option options [Boolean] :final => true to signal a final shutdown flush + def flush_filters(options = {}, &block) + flushers = options[:final] ? @shutdown_flushers : @periodic_flushers - # Filter any events generated so far in this flush. - events.each do |event| - # TODO(sissel): watchdog on flush filtration? - unless event.cancelled? - filter.filter(event) - end - end + flushers.each do |flusher| + flusher.call(options, &block) + end + end - # TODO(sissel): watchdog on flushes? - if filter.respond_to?(:flush) - flushed = filter.flush - events += flushed if !flushed.nil? && flushed.any? + # perform filters flush into the output queue + # @param options [Hash] + # @option options [Boolean] :final => true to signal a final shutdown flush + def flush_filters_to_output!(options = {}) + flush_filters(options) do |event| + unless event.cancelled? + @logger.debug? and @logger.debug("Pushing flushed events", :event => event) + @filter_to_output.push(event) end end + end # flush_filters_to_output! - events.each do |event| - @logger.debug? and @logger.debug("Pushing flushed events", :event => event) - @filter_to_output.push(event) unless event.cancelled? - end - end # def filter_flusher end # class Pipeline diff --git a/spec/conditionals/test.rb b/spec/conditionals/test.rb index 2e06c593750..7407ecd008c 100644 --- a/spec/conditionals/test.rb +++ b/spec/conditionals/test.rb @@ -160,7 +160,7 @@ def conditional(expression, &block) if "foo" not in "baz" { mutate { add_tag => "baz" } } if "foo" not in "foo" { mutate { add_tag => "foo" } } if !("foo" not in "foo") { mutate { add_tag => "notfoo" } } - if "foo" not in [somelist] { mutate { add_tag => "notsomelist" } } + if "foo" not in [somelist] { mutate { add_tag => "notsomelist" } } if "one" not in [somelist] { mutate { add_tag => "somelist" } } if "foo" not in [alsomissing] { mutate { add_tag => "no string in missing field" } } } @@ -183,12 +183,12 @@ def conditional(expression, &block) conditional "[message] == 'sample'" do sample("sample") { insist { subject["tags"] }.include?("success") } sample("different") { insist { subject["tags"] }.include?("failure") } - end + end conditional "[message] != 'sample'" do sample("sample") { insist { subject["tags"] }.include?("failure") } sample("different") { insist { subject["tags"] }.include?("success") } - end + end conditional "[message] < 'sample'" do sample("apple") { insist { subject["tags"] }.include?("success") } @@ -230,12 +230,12 @@ def conditional(expression, &block) conditional "!([message] == 'sample')" do sample("sample") { reject { subject["tags"] }.include?("success") } sample("different") { reject { subject["tags"] }.include?("failure") } - end + end conditional "!([message] != 'sample')" do sample("sample") { reject { subject["tags"] }.include?("failure") } sample("different") { reject { subject["tags"] }.include?("success") } - end + end conditional "!([message] < 'sample')" do sample("apple") { reject { subject["tags"] }.include?("success") } @@ -340,4 +340,34 @@ def conditional(expression, &block) end end end + + describe "new events from root" do + config <<-CONFIG + filter { + if [type] == "original" { + clone { + clones => ["clone"] + } + } + if [type] == "original" { + mutate { add_field => { "cond1" => "true" } } + } else { + mutate { add_field => { "cond2" => "true" } } + } + } + CONFIG + + sample({"type" => "original"}) do + insist { subject }.is_a?(Array) + insist { subject.length } == 2 + + insist { subject[0]["type"] } == "original" + insist { subject[0]["cond1"] } == "true" + insist { subject[0]["cond2"] } == nil + + insist { subject[1]["type"] } == "clone" + # insist { subject[1]["cond1"] } == nil + # insist { subject[1]["cond2"] } == "true" + end + end end diff --git a/spec/filters/clone.rb b/spec/filters/clone.rb index fd73f178fac..900539f3a76 100644 --- a/spec/filters/clone.rb +++ b/spec/filters/clone.rb @@ -80,4 +80,6 @@ insist { subject[1]["number"] } == 5 end end + + end diff --git a/spec/filters/filter_chains.rb b/spec/filters/filter_chains.rb new file mode 100644 index 00000000000..edff7979bc6 --- /dev/null +++ b/spec/filters/filter_chains.rb @@ -0,0 +1,122 @@ +# encoding: utf-8 + +require "test_utils" +require "logstash/filters/split" +require "logstash/filters/clone" + +describe LogStash::Filters do + extend LogStash::RSpec + + describe "chain split with mutate filter" do + config <<-CONFIG + filter { + split { } + mutate { replace => [ "message", "test" ] } + } + CONFIG + + sample "hello\nbird" do + insist { subject.length } == 2 + insist { subject[0]["message"] } == "test" + insist { subject[1]["message"] } == "test" + end + end + + + describe "new events bug #793" do + config <<-CONFIG + filter { + split { terminator => "," } + mutate { rename => { "message" => "fancypants" } } + } + CONFIG + + sample "hello,world" do + insist { subject.length } == 2 + insist { subject[0]["fancypants"] } == "hello" + insist { subject[1]["fancypants"] } == "world" + end + end + + describe "split then multiple mutate" do + config <<-CONFIG + filter { + split { } + mutate { replace => [ "message", "test" ] } + mutate { replace => [ "message", "test2" ] } + mutate { replace => [ "message", "test3" ] } + mutate { replace => [ "message", "test4" ] } + } + CONFIG + + sample "big\nbird" do + insist { subject.length } == 2 + insist { subject[0]["message"] } == "test4" + insist { subject[1]["message"] } == "test4" + end + end + + describe "split then clone" do + config <<-CONFIG + filter { + split { } + clone { clones => ['clone1', 'clone2'] } + } + CONFIG + + sample "big\nbird" do + insist { subject.length } == 6 + + insist { subject[0]["message"] } == "big" + insist { subject[0]["type"] } == nil + + insist { subject[1]["message"] } == "big" + insist { subject[1]["type"] } == "clone1" + + insist { subject[2]["message"] } == "big" + insist { subject[2]["type"] } == "clone2" + + insist { subject[3]["message"] } == "bird" + insist { subject[3]["type"] } == nil + + insist { subject[4]["message"] } == "bird" + insist { subject[4]["type"] } == "clone1" + + insist { subject[5]["message"] } == "bird" + insist { subject[5]["type"] } == "clone2" + end + end + + describe "clone with conditionals, see bug #1548" do + type "original" + config <<-CONFIG + filter { + clone { + clones => ["clone"] + } + if [type] == "clone" { + mutate { add_field => { "clone" => "true" } } + } else { + mutate { add_field => { "original" => "true" } } + } + } + CONFIG + + sample("message" => "hello world") do + insist { subject }.is_a? Array + # subject.each{|event| puts(event.inspect + "\n")} + insist { subject.length } == 2 + + insist { subject.first["type"] } == nil + insist { subject.first["original"] } == "true" + insist { subject.first["clone"]} == nil + insist { subject.first["message"] } == "hello world" + + insist { subject.last["type"]} == "clone" + insist { subject.last["original"] } == nil + insist { subject.last["clone"]} == "true" + insist { subject.last["message"] } == "hello world" + end + end + +end diff --git a/spec/filters/multiline.rb b/spec/filters/multiline.rb index e5be67f31d0..dcb8bb74873 100644 --- a/spec/filters/multiline.rb +++ b/spec/filters/multiline.rb @@ -11,7 +11,7 @@ config <<-CONFIG filter { multiline { - enable_flush => true + periodic_flush => false pattern => "^\\s" what => previous } @@ -30,7 +30,6 @@ config <<-CONFIG filter { multiline { - enable_flush => true pattern => "^%{NUMBER} %{TIME}" negate => true what => previous @@ -47,7 +46,6 @@ config <<-CONFIG filter { multiline { - enable_flush => true pattern => "^\\s" what => previous } @@ -98,7 +96,6 @@ add_tag => "dummy" } multiline { - enable_flush => true add_tag => [ "nope" ] remove_tag => "dummy" add_field => [ "dummy2", "value" ] diff --git a/spec/filters/split.rb b/spec/filters/split.rb index f258dc38952..7e95c80894c 100644 --- a/spec/filters/split.rb +++ b/spec/filters/split.rb @@ -57,4 +57,5 @@ insist { subject[2]["custom"] } == "sesame street" end end + end diff --git a/spec/filters/spool.rb b/spec/filters/spool.rb new file mode 100644 index 00000000000..0758aaee518 --- /dev/null +++ b/spec/filters/spool.rb @@ -0,0 +1,89 @@ +require "test_utils" +require "logstash/filters/spool" + +#NOOP filter is perfect for testing Filters::Base features with minimal overhead +describe LogStash::Filters::Spool do + extend LogStash::RSpec + + # spool test are really flush tests. spool does nothing more than waiting for flush to be called. + + describe "flush one event" do + config <<-CONFIG + filter { + spool { } + } + CONFIG + + sample "foo" do + insist { subject["message"] } == "foo" + end + end + + describe "spooling multiple events" do + config <<-CONFIG + filter { + spool { } + } + CONFIG + + sample ["foo", "bar"] do + insist { subject[0]["message"] } == "foo" + insist { subject[1]["message"] } == "bar" + end + end + + describe "spooling events through conditionals" do + config <<-CONFIG + filter { + spool { } + if [message] == "foo" { + mutate { add_field => { "cond1" => "true" } } + } else { + mutate { add_field => { "cond2" => "true" } } + } + mutate { add_field => { "last" => "true" } } + } + CONFIG + + sample ["foo", "bar"] do + insist { subject[0]["message"] } == "foo" + insist { subject[0]["cond1"] } == "true" + insist { subject[0]["cond2"] } == nil + insist { subject[0]["last"] } == "true" + + insist { subject[1]["message"] } == "bar" + insist { subject[1]["cond1"] } == nil + insist { subject[1]["cond2"] } == "true" + insist { subject[1]["last"] } == "true" + end + end + + describe "spooling eventS with conditionals" do + config <<-CONFIG + filter { + mutate { add_field => { "first" => "true" } } + if [message] == "foo" { + spool { } + } else { + mutate { add_field => { "cond2" => "true" } } + } + mutate { add_field => { "last" => "true" } } + } + CONFIG + + sample ["foo", "bar"] do + # here received events will be reversed since the spooled one will be flushed last, at shutdown + + insist { subject[0]["message"] } == "bar" + insist { subject[0]["first"] } == "true" + insist { subject[0]["cond2"] } == "true" + insist { subject[0]["last"] } == "true" + + insist { subject[1]["message"] } == "foo" + insist { subject[1]["first"] } == "true" + insist { subject[1]["cond2"] } == nil + insist { subject[1]["last"] } == "true" + end + end + +end diff --git a/spec/test_utils.rb b/spec/test_utils.rb index f890552ef34..8c98f4a911c 100644 --- a/spec/test_utils.rb +++ b/spec/test_utils.rb @@ -88,23 +88,17 @@ def sample(sample_event, &block) let(:results) do results = [] - count = 0 pipeline.instance_eval { @filters.each(&:register) } event.each do |e| - extra = [] - pipeline.filter(e) do |new_event| - extra << new_event - end - results << e if !e.cancelled? - results += extra.reject(&:cancelled?) + pipeline.filter(e) {|new_event| results << new_event } end - pipeline.instance_eval {@filters.each {|f| results += f.flush if f.respond_to?(:flush)}} + pipeline.flush_filters(:final => true) do |e| + results << e unless e.cancelled? + end - # TODO(sissel): pipeline flush needs to be implemented. - # results += pipeline.flush - next results + results end subject { results.length > 1 ? results: results.first } From bf953f0f90f8d90eb8db3d5839d7139d0cf05b1a Mon Sep 17 00:00:00 2001 From: Tray Torrance Date: Mon, 4 Aug 2014 11:14:11 -0700 Subject: [PATCH 46/74] Fixes finterprint filter handling of timestamp field Fixes #1572 --- lib/logstash/filters/fingerprint.rb | 6 ++--- spec/filters/fingerprint.rb | 35 +++++++++++++++++++++++++++++ 2 files changed, 38 insertions(+), 3 deletions(-) diff --git a/lib/logstash/filters/fingerprint.rb b/lib/logstash/filters/fingerprint.rb index ce39d3c69e5..2bb211a660a 100644 --- a/lib/logstash/filters/fingerprint.rb +++ b/lib/logstash/filters/fingerprint.rb @@ -90,15 +90,15 @@ def anonymize_ipv4_network(ip_string) def anonymize_openssl(data) digest = encryption_algorithm() # in JRuby 1.7.11 outputs as ASCII-8BIT - OpenSSL::HMAC.hexdigest(digest, @key, data).force_encoding(Encoding::UTF_8) + OpenSSL::HMAC.hexdigest(digest, @key, data.to_s).force_encoding(Encoding::UTF_8) end def anonymize_murmur3(value) case value when Fixnum MurmurHash3::V32.int_hash(value) - when String - MurmurHash3::V32.str_hash(value) + else + MurmurHash3::V32.str_hash(value.to_s) end end diff --git a/spec/filters/fingerprint.rb b/spec/filters/fingerprint.rb index 22fe49543f6..d8973441f70 100644 --- a/spec/filters/fingerprint.rb +++ b/spec/filters/fingerprint.rb @@ -164,4 +164,39 @@ end end + context 'Timestamps' do + epoch_time = Time.at(0).gmtime + + describe 'OpenSSL Fingerprinting' do + config <<-CONFIG + filter { + fingerprint { + source => ['@timestamp'] + key => '0123' + method => 'SHA1' + } + } + CONFIG + + sample("@timestamp" => epoch_time) do + insist { subject["fingerprint"] } == '1d5379ec92d86a67cfc642d55aa050ca312d3b9a' + end + end + + describe 'MURMUR3 Fingerprinting' do + config <<-CONFIG + filter { + fingerprint { + source => ['@timestamp'] + method => 'MURMUR3' + } + } + CONFIG + + sample("@timestamp" => epoch_time) do + insist { subject["fingerprint"] } == 743372282 + end + end + end + end From d65706ded40cbf91644f0f93451e5b24e3a5d3c5 Mon Sep 17 00:00:00 2001 From: Richard Pijnenburg Date: Wed, 10 Sep 2014 10:00:00 +0000 Subject: [PATCH 47/74] Improvements to plugin manager - Use better installer method to avoid documentation installation With the previous method it would install the documentation which didn't exist. - Create Fake gemspec info for logstash to help dependency management for plugins Previously we would create a real gem and install it to aid in dependency management for plugins and logstash Now we create a fake gemspec on the fly with the real version of logstash avoiding having to build it every time. --- lib/logstash/pluginmanager/install.rb | 9 +++++++-- lib/logstash/pluginmanager/update.rb | 9 +++++++-- lib/logstash/pluginmanager/util.rb | 18 ++++++++++++++++++ 3 files changed, 32 insertions(+), 4 deletions(-) diff --git a/lib/logstash/pluginmanager/install.rb b/lib/logstash/pluginmanager/install.rb index 5d9277c6e8a..97c49cc1480 100644 --- a/lib/logstash/pluginmanager/install.rb +++ b/lib/logstash/pluginmanager/install.rb @@ -2,7 +2,7 @@ require 'logstash/namespace' require 'logstash/pluginmanager' require 'logstash/pluginmanager/util' -require 'rubygems/installer' +require 'rubygems/dependency_installer' require 'rubygems/uninstaller' require 'jar-dependencies' require 'jar_install_post_install_hook' @@ -16,6 +16,7 @@ class LogStash::PluginManager::Install < Clamp::Command option "--proxy", "PROXY", "Use HTTP proxy for remote operations" def execute + LogStash::PluginManager::Util.load_logstash_gemspec ::Gem.configuration.verbose = false ::Gem.configuration[:http_proxy] = proxy @@ -50,7 +51,11 @@ def execute end ::Gem.configuration.verbose = false - specs, _ = ::Gem.install(plugin, version) + options = {} + options[:document] = [] + inst = Gem::DependencyInstaller.new(options) + inst.install plugin, version + specs, _ = inst.installed_gems puts ("Successfully installed '#{specs.name}' with version '#{specs.version}'") end diff --git a/lib/logstash/pluginmanager/update.rb b/lib/logstash/pluginmanager/update.rb index bf08bb65e56..51e1d5d35ab 100644 --- a/lib/logstash/pluginmanager/update.rb +++ b/lib/logstash/pluginmanager/update.rb @@ -2,7 +2,7 @@ require 'logstash/namespace' require 'logstash/pluginmanager' require 'logstash/pluginmanager/util' -require 'rubygems/installer' +require 'rubygems/dependency_installer' require 'rubygems/uninstaller' require 'jar-dependencies' require 'jar_install_post_install_hook' @@ -17,6 +17,7 @@ class LogStash::PluginManager::Update < Clamp::Command def execute + LogStash::PluginManager::Util.load_logstash_gemspec ::Gem.configuration.verbose = false ::Gem.configuration[:http_proxy] = proxy @@ -61,7 +62,11 @@ def update_gem(spec, version) end ::Gem.configuration.verbose = false - ::Gem.install(spec.name, version) + options = {} + options[:document] = [] + inst = Gem::DependencyInstaller.new(options) + inst.install spec.name, gem_meta.version + specs, _ = inst.installed_gems puts ("Update successful") end diff --git a/lib/logstash/pluginmanager/util.rb b/lib/logstash/pluginmanager/util.rb index 7138258b268..c04b995cfad 100644 --- a/lib/logstash/pluginmanager/util.rb +++ b/lib/logstash/pluginmanager/util.rb @@ -1,3 +1,5 @@ +require 'logstash/version' + class LogStash::PluginManager::Util def self.logstash_plugin?(gem) @@ -48,4 +50,20 @@ def self.ask_yesno(prompt) end end + def self.load_logstash_gemspec + logstash_spec = Gem::Specification.new do |gem| + gem.authors = ["Jordan Sissel", "Pete Fritchman"] + gem.email = ["jls@semicomplete.com", "petef@databits.net"] + gem.description = %q{scalable log and event management (search, archive, pipeline)} + gem.summary = %q{logstash - log and event management} + gem.homepage = "http://logstash.net/" + gem.license = "Apache License (2.0)" + + gem.name = "logstash" + gem.version = LOGSTASH_VERSION + end + + Gem::Specification.add_spec logstash_spec + end + end From 9398cd65a16ccad5f63e1b9c686a6c468f76e65b Mon Sep 17 00:00:00 2001 From: Richard Pijnenburg Date: Wed, 10 Sep 2014 13:43:57 +0000 Subject: [PATCH 48/74] Move logstash gemspec loading into environment module Previously the loading of the logstash gemspec was limited to the plugin manager After more tests we need the gemspec to be loaded for any action with the plugins Moving it to the environment module makes it available for more generic loading Make sure plugin manager uses new function for loading gemspec --- lib/logstash/environment.rb | 18 ++++++++++++++++++ lib/logstash/pluginmanager/install.rb | 3 ++- lib/logstash/pluginmanager/update.rb | 2 +- lib/logstash/pluginmanager/util.rb | 17 ----------------- lib/logstash/runner.rb | 1 + 5 files changed, 22 insertions(+), 19 deletions(-) diff --git a/lib/logstash/environment.rb b/lib/logstash/environment.rb index 03399379c9c..16006da2833 100644 --- a/lib/logstash/environment.rb +++ b/lib/logstash/environment.rb @@ -1,4 +1,5 @@ require "logstash/errors" +require 'logstash/version' module LogStash module Environment @@ -68,5 +69,22 @@ def pattern_path(path) def locales_path(path) return ::File.join(LOGSTASH_HOME, "locales", path) end + + def load_logstash_gemspec! + logstash_spec = Gem::Specification.new do |gem| + gem.authors = ["Jordan Sissel", "Pete Fritchman"] + gem.email = ["jls@semicomplete.com", "petef@databits.net"] + gem.description = %q{scalable log and event management (search, archive, pipeline)} + gem.summary = %q{logstash - log and event management} + gem.homepage = "http://logstash.net/" + gem.license = "Apache License (2.0)" + + gem.name = "logstash" + gem.version = LOGSTASH_VERSION + end + + Gem::Specification.add_spec logstash_spec + end + end end diff --git a/lib/logstash/pluginmanager/install.rb b/lib/logstash/pluginmanager/install.rb index 97c49cc1480..d2ace41a24e 100644 --- a/lib/logstash/pluginmanager/install.rb +++ b/lib/logstash/pluginmanager/install.rb @@ -1,5 +1,6 @@ require 'clamp' require 'logstash/namespace' +require 'logstash/environment' require 'logstash/pluginmanager' require 'logstash/pluginmanager/util' require 'rubygems/dependency_installer' @@ -16,7 +17,7 @@ class LogStash::PluginManager::Install < Clamp::Command option "--proxy", "PROXY", "Use HTTP proxy for remote operations" def execute - LogStash::PluginManager::Util.load_logstash_gemspec + LogStash::Environment.load_logstash_gemspec! ::Gem.configuration.verbose = false ::Gem.configuration[:http_proxy] = proxy diff --git a/lib/logstash/pluginmanager/update.rb b/lib/logstash/pluginmanager/update.rb index 51e1d5d35ab..e2ebbb6b35f 100644 --- a/lib/logstash/pluginmanager/update.rb +++ b/lib/logstash/pluginmanager/update.rb @@ -17,7 +17,7 @@ class LogStash::PluginManager::Update < Clamp::Command def execute - LogStash::PluginManager::Util.load_logstash_gemspec + LogStash::Environment.load_logstash_gemspec! ::Gem.configuration.verbose = false ::Gem.configuration[:http_proxy] = proxy diff --git a/lib/logstash/pluginmanager/util.rb b/lib/logstash/pluginmanager/util.rb index c04b995cfad..ce6cab38058 100644 --- a/lib/logstash/pluginmanager/util.rb +++ b/lib/logstash/pluginmanager/util.rb @@ -1,4 +1,3 @@ -require 'logstash/version' class LogStash::PluginManager::Util @@ -50,20 +49,4 @@ def self.ask_yesno(prompt) end end - def self.load_logstash_gemspec - logstash_spec = Gem::Specification.new do |gem| - gem.authors = ["Jordan Sissel", "Pete Fritchman"] - gem.email = ["jls@semicomplete.com", "petef@databits.net"] - gem.description = %q{scalable log and event management (search, archive, pipeline)} - gem.summary = %q{logstash - log and event management} - gem.homepage = "http://logstash.net/" - gem.license = "Apache License (2.0)" - - gem.name = "logstash" - gem.version = LOGSTASH_VERSION - end - - Gem::Specification.add_spec logstash_spec - end - end diff --git a/lib/logstash/runner.rb b/lib/logstash/runner.rb index 4d83edf4dc4..82862748d9a 100644 --- a/lib/logstash/runner.rb +++ b/lib/logstash/runner.rb @@ -6,6 +6,7 @@ require "logstash/environment" LogStash::Environment.set_gem_paths! +LogStash::Environment.load_logstash_gemspec! Thread.abort_on_exception = true if ENV["PROFILE_BAD_LOG_CALLS"] || $DEBUGLIST.include?("log") From 9c16d78a661c51f874a5bbc3e4ad90c59633b60e Mon Sep 17 00:00:00 2001 From: Joseph Lawson Date: Mon, 15 Sep 2014 22:34:37 -0400 Subject: [PATCH 49/74] this should fix logstash #1738 Closes #1738 --- lib/logstash/outputs/kafka.rb | 1 - 1 file changed, 1 deletion(-) diff --git a/lib/logstash/outputs/kafka.rb b/lib/logstash/outputs/kafka.rb index dac8c8ba57d..ae050c9280b 100644 --- a/lib/logstash/outputs/kafka.rb +++ b/lib/logstash/outputs/kafka.rb @@ -110,7 +110,6 @@ def register end require 'jruby-kafka' options = { - :topic_id => @topic_id, :broker_list => @broker_list, :compression_codec => @compression_codec, :compressed_topics => @compressed_topics, From 01e9f375914e1156d7ba8750236556ce52f91996 Mon Sep 17 00:00:00 2001 From: wiibaa Date: Tue, 23 Sep 2014 07:21:05 +0200 Subject: [PATCH 50/74] test and fix for LOGSTASH-2288 --- lib/logstash/config/config_ast.rb | 5 +---- spec/conditionals/test.rb | 27 ++++++++++++++++++++++++++- 2 files changed, 27 insertions(+), 5 deletions(-) diff --git a/lib/logstash/config/config_ast.rb b/lib/logstash/config/config_ast.rb index a88bf5f5915..3a31c04a3fb 100644 --- a/lib/logstash/config/config_ast.rb +++ b/lib/logstash/config/config_ast.rb @@ -74,10 +74,7 @@ def compile # defines @filter_func and @output_func definitions << "@#{type}_func = lambda do |event, &block|" - if type == "filter" - definitions << " events = [event]" - end - + definitions << " events = [event]" definitions << " @logger.debug? && @logger.debug(\"#{type} received\", :event => event.to_hash)" sections.select { |s| s.plugin_type.text_value == type }.each do |s| definitions << s.compile.split("\n", -1).map { |e| " #{e}" } diff --git a/spec/conditionals/test.rb b/spec/conditionals/test.rb index 7407ecd008c..95166e63be5 100644 --- a/spec/conditionals/test.rb +++ b/spec/conditionals/test.rb @@ -21,7 +21,32 @@ def conditional(expression, &block) end end -describe "conditionals" do +describe "conditionals in output" do + extend LogStash::RSpec + extend ConditionalFanciness + + describe "simple" do + config <<-CONFIG + input { + generator { + message => '{"foo":{"bar"},"baz": "quux"}' + count => 1 + } + } + output { + if [foo] == "bar" { + stdout { } + } + } + CONFIG + + agent do + #LOGSTASH-2288, should not fail raising an exception + end + end +end + +describe "conditionals in filter" do extend LogStash::RSpec extend ConditionalFanciness From b0091b242c8cf29904335464a0ee41518d6fd592 Mon Sep 17 00:00:00 2001 From: Joao Duarte Date: Thu, 18 Sep 2014 21:07:04 +0000 Subject: [PATCH 51/74] Remove the ability to run multiple commands Addresses #1747. This removes the argument list iteration and spawning of multiple tasks. It's still possible to specify aditional arguments but now they're ignored. PR: #1752 --- lib/logstash/runner.rb | 54 +++++++++--------------------------------- spec/runner_spec.rb | 12 ++++------ 2 files changed, 16 insertions(+), 50 deletions(-) diff --git a/lib/logstash/runner.rb b/lib/logstash/runner.rb index 82862748d9a..aa179946db5 100644 --- a/lib/logstash/runner.rb +++ b/lib/logstash/runner.rb @@ -69,10 +69,6 @@ def wait class LogStash::Runner include LogStash::Program - def initialize - @runners = [] - end - def main(args) require "logstash/util" require "stud/trap" @@ -89,23 +85,11 @@ def main(args) Stud::untrap("INT", @startup_interruption_trap) - args = [nil] if args.empty? - - while args != nil && !args.empty? - args = run(args) - end - - status = [] - @runners.each do |r| - #$stderr.puts "Waiting on #{r.wait.inspect}" - status << r.wait - end - - # Avoid running test/unit's at_exit crap - if status.empty? || status.first.nil? + if args.empty? then exit(0) else - exit(status.first) + task = run(args) + exit(task.wait) end end # def self.main @@ -118,14 +102,12 @@ def run(args) if args.include?("--verbose") agent_args << "--verbose" end - LogStash::Agent.run($0, agent_args) - return [] + return LogStash::Agent.run($0, agent_args) end, "web" => lambda do # Give them kibana. require "logstash/kibana" kibana = LogStash::Kibana::Runner.new - @runners << kibana return kibana.run(args) end, "rspec" => lambda do @@ -136,18 +118,11 @@ def run(args) require "test_utils" all_specs = Dir.glob(File.join(spec_path, "/**/*.rb")) rspec = LogStash::RSpecsRunner.new(args.empty? ? all_specs : args) - rspec.run - @runners << rspec - return [] + return rspec.run end, "irb" => lambda do require "irb" - IRB.start(__FILE__) - return [] - end, - "ruby" => lambda do - require(args[0]) - return [] + return IRB.start(__FILE__) end, "pry" => lambda do require "pry" @@ -158,17 +133,11 @@ def run(args) plugin_manager = LogStash::PluginManager::Main.new($0) begin plugin_manager.parse(args) + return plugin_manager.execute rescue Clamp::HelpWanted => e show_help(e.command) + return 0 end - - begin - plugin_manager.execute - rescue Clamp::HelpWanted => e - show_help(e.command) - end - - return [] end, "agent" => lambda do require "logstash/agent" @@ -178,21 +147,20 @@ def run(args) agent.parse(args) rescue Clamp::HelpWanted => e show_help(e.command) - return [] + return 0 rescue Clamp::UsageError => e # If 'too many arguments' then give the arguments to # the next command. Otherwise it's a real error. raise if e.message != "too many arguments" remaining = agent.remaining_arguments end - @runners << Stud::Task.new { agent.execute } - return remaining + return agent.execute end } # commands if commands.include?(command) - args = commands[command].call + return Stud::Task.new { commands[command].call } else if command.nil? $stderr.puts "No command given" diff --git a/spec/runner_spec.rb b/spec/runner_spec.rb index 5250747f354..a379f3d49a5 100644 --- a/spec/runner_spec.rb +++ b/spec/runner_spec.rb @@ -19,24 +19,22 @@ def run(args); end it "should run agent help" do expect(subject).to receive(:show_help).once.and_return(nil) args = ["agent", "-h"] - expect(subject.run(args)).to eq([]) + expect(subject.run(args).wait).to eq(0) end it "should run agent help and not run following commands" do expect(subject).to receive(:show_help).once.and_return(nil) args = ["agent", "-h", "web"] - expect(subject.run(args)).to eq([]) + expect(subject.run(args).wait).to eq(0) end - it "should run agent and web" do + it "should not run agent and web" do expect(Stud::Task).to receive(:new).once args = ["agent", "-e", "", "web"] args = subject.run(args) - expect(args).to eq(["web"]) - - expect(LogStash::Kibana::Runner).to receive(:new).once.and_return(NullRunner.new) - args = subject.run(args) expect(args).to eq(nil) + + expect(LogStash::Kibana::Runner).to_not receive(:new) end end end From a536eefad2251f690cab0ba545cedf4d16a21365 Mon Sep 17 00:00:00 2001 From: Joao Duarte Date: Fri, 19 Sep 2014 09:14:29 +0000 Subject: [PATCH 52/74] spec the absense and wrong commands PR: #1752 --- lib/logstash/runner.rb | 33 ++++++++++++++------------------- spec/runner_spec.rb | 14 ++++++++++++++ 2 files changed, 28 insertions(+), 19 deletions(-) diff --git a/lib/logstash/runner.rb b/lib/logstash/runner.rb index aa179946db5..6bd2fc87949 100644 --- a/lib/logstash/runner.rb +++ b/lib/logstash/runner.rb @@ -85,12 +85,8 @@ def main(args) Stud::untrap("INT", @startup_interruption_trap) - if args.empty? then - exit(0) - else - task = run(args) - exit(task.wait) - end + task = run(args) + exit(task.wait) end # def self.main def run(args) @@ -170,21 +166,20 @@ def run(args) $stderr.puts "No such command #{command.inspect}" end end - $stderr.puts "Usage: logstash [command args]" - $stderr.puts "Run a command with the --help flag to see the arguments." - $stderr.puts "For example: logstash agent --help" - $stderr.puts - # hardcode the available commands to reduce confusion. - $stderr.puts "Available commands:" - $stderr.puts " agent - runs the logstash agent" - $stderr.puts " version - emits version info about this logstash" - $stderr.puts " web - runs the logstash web ui (called Kibana)" - $stderr.puts " rspec - runs tests" + $stderr.puts %q[ +Usage: logstash [command args] +Run a command with the --help flag to see the arguments. +For example: logstash agent --help + +Available commands: + agent - runs the logstash agent + version - emits version info about this logstash + web - runs the logstash web ui (called Kibana) + rspec - runs tests + ] #$stderr.puts commands.keys.map { |s| " #{s}" }.join("\n") - exit 1 + return Stud::Task.new { 1 } end - - return args end # def run # @return true if this file is the main file being run and not via rspec diff --git a/spec/runner_spec.rb b/spec/runner_spec.rb index a379f3d49a5..01c7587f63e 100644 --- a/spec/runner_spec.rb +++ b/spec/runner_spec.rb @@ -22,6 +22,20 @@ def run(args); end expect(subject.run(args).wait).to eq(0) end + it "should show help with no arguments" do + expect($stderr).to receive(:puts).once.and_return("No command given") + expect($stderr).to receive(:puts).once + args = [] + expect(subject.run(args).wait).to eq(1) + end + + it "should show help for unknown commands" do + expect($stderr).to receive(:puts).once.and_return("No such command welp") + expect($stderr).to receive(:puts).once + args = ["welp"] + expect(subject.run(args).wait).to eq(1) + end + it "should run agent help and not run following commands" do expect(subject).to receive(:show_help).once.and_return(nil) args = ["agent", "-h", "web"] From bf11118709cc9bcc7c099c270ad58931afa51a74 Mon Sep 17 00:00:00 2001 From: Colin Surprenant Date: Wed, 18 Jun 2014 17:16:45 +0000 Subject: [PATCH 53/74] stringify all symbols in twitter hash Fixes #1450 --- lib/logstash/inputs/twitter.rb | 3 ++- lib/logstash/util.rb | 12 ++++++++++++ spec/util_spec.rb | 33 +++++++++++++++++++++++++++++++++ 3 files changed, 47 insertions(+), 1 deletion(-) create mode 100644 spec/util_spec.rb diff --git a/lib/logstash/inputs/twitter.rb b/lib/logstash/inputs/twitter.rb index 333b21a5661..75ba6884d42 100644 --- a/lib/logstash/inputs/twitter.rb +++ b/lib/logstash/inputs/twitter.rb @@ -2,6 +2,7 @@ require "logstash/inputs/base" require "logstash/namespace" require "logstash/timestamp" +require "logstash/util" # Read events from the twitter streaming api. class LogStash::Inputs::Twitter < LogStash::Inputs::Base @@ -67,7 +68,7 @@ def run(queue) @client.filter(:track => @keywords.join(",")) do |tweet| @logger.info? && @logger.info("Got tweet", :user => tweet.user.screen_name, :text => tweet.text) if @full_tweet - event = LogStash::Event.new(tweet.to_hash) + event = LogStash::Event.new(LogStash::Util.stringify_symbols(tweet.to_hash)) event.timestamp = LogStash::Timestamp.new(tweet.created_at) else event = LogStash::Event.new( diff --git a/lib/logstash/util.rb b/lib/logstash/util.rb index 1ce6cd00ed6..f0cbc956490 100644 --- a/lib/logstash/util.rb +++ b/lib/logstash/util.rb @@ -136,5 +136,17 @@ def self.normalize(o) def self.normalize(o); o; end end + def self.stringify_symbols(o) + case o + when Hash + o.inject({}){|r, (k, v)| r[k.is_a?(Symbol) ? k.to_s : k] = stringify_symbols(v); r} + when Array + o.map{|i| stringify_symbols(i)} + when Symbol + o.to_s + else + o + end + end end # module LogStash::Util diff --git a/spec/util_spec.rb b/spec/util_spec.rb new file mode 100644 index 00000000000..aeff9bdb469 --- /dev/null +++ b/spec/util_spec.rb @@ -0,0 +1,33 @@ +require "logstash/util" + + +describe LogStash::Util do + + context "stringify_keys" do + it "should convert hash symbol keys to strings" do + expect(LogStash::Util.stringify_symbols({:a => 1, "b" => 2})).to eq({"a" => 1, "b" => 2}) + end + + it "should keep non symbolic hash keys as is" do + expect(LogStash::Util.stringify_symbols({1 => 1, 2.0 => 2})).to eq({1 => 1, 2.0 => 2}) + end + + it "should convert inner hash keys to strings" do + expect(LogStash::Util.stringify_symbols({:a => 1, "b" => {:c => 3}})).to eq({"a" => 1, "b" => {"c" => 3}}) + expect(LogStash::Util.stringify_symbols([:a, 1, "b", {:c => 3}])).to eq(["a", 1, "b", {"c" => 3}]) + end + + it "should convert hash symbol values to strings" do + expect(LogStash::Util.stringify_symbols({:a => :a, "b" => :b})).to eq({"a" => "a", "b" => "b"}) + end + + it "should convert array symbol values to strings" do + expect(LogStash::Util.stringify_symbols([1, :a])).to eq([1, "a"]) + end + + it "should convert innner array symbol values to strings" do + expect(LogStash::Util.stringify_symbols({:a => [1, :b]})).to eq({"a" => [1, "b"]}) + expect(LogStash::Util.stringify_symbols([:a, [1, :b]])).to eq(["a", [1, "b"]]) + end + end +end From 10ac156f3e6aa6bdbbab68150a87d6a14c97f33d Mon Sep 17 00:00:00 2001 From: Colin Surprenant Date: Wed, 18 Jun 2014 18:10:14 +0000 Subject: [PATCH 54/74] add robustness to tweets stream handling Fixes #1450 --- lib/logstash/inputs/twitter.rb | 58 +++++++++++++++++++++------------- 1 file changed, 36 insertions(+), 22 deletions(-) diff --git a/lib/logstash/inputs/twitter.rb b/lib/logstash/inputs/twitter.rb index 75ba6884d42..b746c9d1b8a 100644 --- a/lib/logstash/inputs/twitter.rb +++ b/lib/logstash/inputs/twitter.rb @@ -65,27 +65,41 @@ def register public def run(queue) @logger.info("Starting twitter tracking", :keywords => @keywords) - @client.filter(:track => @keywords.join(",")) do |tweet| - @logger.info? && @logger.info("Got tweet", :user => tweet.user.screen_name, :text => tweet.text) - if @full_tweet - event = LogStash::Event.new(LogStash::Util.stringify_symbols(tweet.to_hash)) - event.timestamp = LogStash::Timestamp.new(tweet.created_at) - else - event = LogStash::Event.new( - LogStash::Event::TIMESTAMP => LogStash::Timestamp.new(tweet.created_at), - "message" => tweet.full_text, - "user" => tweet.user.screen_name, - "client" => tweet.source, - "retweeted" => tweet.retweeted?, - "source" => "http://twitter.com/#{tweet.user.screen_name}/status/#{tweet.id}" - ) - end - decorate(event) - event["in-reply-to"] = tweet.in_reply_to_status_id if tweet.reply? - unless tweet.urls.empty? - event["urls"] = tweet.urls.map(&:expanded_url).map(&:to_s) - end - queue << event - end # client.filter + begin + @client.filter(:track => @keywords.join(",")) do |tweet| + if tweet.is_a?(Twitter::Tweet) + @logger.debug? && @logger.debug("Got tweet", :user => tweet.user.screen_name, :text => tweet.text) + if @full_tweet + event = LogStash::Event.new(LogStash::Util.stringify_symbols(tweet.to_hash)) + event.timestamp = LogStash::Timestamp.new(tweet.created_at) + else + event = LogStash::Event.new( + LogStash::Event::TIMESTAMP => LogStash::Timestamp.new(tweet.created_at), + "message" => tweet.full_text, + "user" => tweet.user.screen_name, + "client" => tweet.source, + "retweeted" => tweet.retweeted?, + "source" => "http://twitter.com/#{tweet.user.screen_name}/status/#{tweet.id}" + ) + event["in-reply-to"] = tweet.in_reply_to_status_id if tweet.reply? + unless tweet.urls.empty? + event["urls"] = tweet.urls.map(&:expanded_url).map(&:to_s) + end + end + + decorate(event) + queue << event + end + end # client.filter + rescue LogStash::ShutdownSignal + return + rescue Twitter::Error::TooManyRequests => e + @logger.warn("Twitter too many requests error, sleeping for #{e.rate_limit.reset_in}s") + sleep(e.rate_limit.reset_in) + retry + rescue => e + @logger.warn("Twitter client error", :message => e.message, :exception => e, :backtrace => e.backtrace) + retry + end end # def run end # class LogStash::Inputs::Twitter From a78f6bafd1c392352672aa0cd36bd444efe99bf5 Mon Sep 17 00:00:00 2001 From: Colin Surprenant Date: Wed, 18 Jun 2014 20:49:47 +0000 Subject: [PATCH 55/74] monkey patch twitter gem to trap json parsing exceptions and use our own json parser Fixes #1450 --- lib/logstash/inputs/twitter.rb | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/lib/logstash/inputs/twitter.rb b/lib/logstash/inputs/twitter.rb index b746c9d1b8a..64ce5ba66ff 100644 --- a/lib/logstash/inputs/twitter.rb +++ b/lib/logstash/inputs/twitter.rb @@ -3,6 +3,7 @@ require "logstash/namespace" require "logstash/timestamp" require "logstash/util" +require "logstash/json" # Read events from the twitter streaming api. class LogStash::Inputs::Twitter < LogStash::Inputs::Base @@ -54,6 +55,24 @@ class LogStash::Inputs::Twitter < LogStash::Inputs::Base public def register require "twitter" + + # monkey patch twitter gem to ignore json parsing error. + # at the same time, use our own json parser + # this has been tested with a specific gem version, raise if not the same + raise("Invalid Twitter gem") unless Twitter::Version.to_s == "5.0.0.rc.1" + Twitter::Streaming::Response.module_eval do + def on_body(data) + @tokenizer.extract(data).each do |line| + next if line.empty? + begin + @block.call(LogStash::Json.load(line, :symbolize_keys => true)) + rescue LogStash::Json::ParserError + # silently ignore json parsing errors + end + end + end + end + @client = Twitter::Streaming::Client.new do |c| c.consumer_key = @consumer_key c.consumer_secret = @consumer_secret.value From d2f6118124f1e37c1c6ece43f84deca4a8ac682f Mon Sep 17 00:00:00 2001 From: Colin Surprenant Date: Wed, 18 Jun 2014 20:50:24 +0000 Subject: [PATCH 56/74] support :symbolize_keys options for json parsing Fixes #1450 --- lib/logstash/json.rb | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/lib/logstash/json.rb b/lib/logstash/json.rb index d7fce4397a6..5079de759a0 100644 --- a/lib/logstash/json.rb +++ b/lib/logstash/json.rb @@ -17,7 +17,7 @@ class GeneratorError < LogStash::Error; end ### MRI - def mri_load(data) + def mri_load(data, options = {}) Oj.load(data) rescue Oj::ParseError => e raise LogStash::Json::ParserError.new(e.message) @@ -31,8 +31,8 @@ def mri_dump(o) ### JRuby - def jruby_load(data) - JrJackson::Raw.parse_raw(data) + def jruby_load(data, options = {}) + options[:symbolize_keys] ? JrJackson::Raw.parse_sym(data) : JrJackson::Raw.parse_raw(data) rescue JrJackson::ParseError => e raise LogStash::Json::ParserError.new(e.message) end From e4ac936f0c5f7bef48adcf36fc969cc46562663f Mon Sep 17 00:00:00 2001 From: Colin Surprenant Date: Tue, 23 Sep 2014 19:36:48 +0000 Subject: [PATCH 57/74] honor common to_json method signature Fixes #1772 --- lib/logstash/event.rb | 3 ++- lib/logstash/timestamp.rb | 3 ++- spec/event.rb | 23 +++++++++++++++++++++++ spec/timestamp.rb | 7 +++++++ 4 files changed, 34 insertions(+), 2 deletions(-) diff --git a/lib/logstash/event.rb b/lib/logstash/event.rb index 5c3dd35d899..70f68ac9bb2 100644 --- a/lib/logstash/event.rb +++ b/lib/logstash/event.rb @@ -135,7 +135,8 @@ def fields end public - def to_json + def to_json(*args) + # ignore arguments to respect accepted to_json method signature LogStash::Json.dump(@data) end # def to_json diff --git a/lib/logstash/timestamp.rb b/lib/logstash/timestamp.rb index a96dfb02c1d..fc8d793911c 100644 --- a/lib/logstash/timestamp.rb +++ b/lib/logstash/timestamp.rb @@ -78,7 +78,8 @@ def utc end alias_method :gmtime, :utc - def to_json + def to_json(*args) + # ignore arguments to respect accepted to_json method signature LogStash::Json.dump(@time.iso8601(ISO8601_PRECISION)) end alias_method :inspect, :to_json diff --git a/spec/event.rb b/spec/event.rb index e885c4f6e79..06d85c7609c 100644 --- a/spec/event.rb +++ b/spec/event.rb @@ -294,4 +294,27 @@ insist{event[LogStash::Event::TIMESTAMP_FAILURE_FIELD]} == "foo" end end + + context "to_json" do + it "should support to_json" do + new_event = LogStash::Event.new( + "@timestamp" => Time.iso8601("2014-09-23T19:26:15.832Z"), + "message" => "foo bar", + ) + json = new_event.to_json + + insist { json } == "{\"@timestamp\":\"2014-09-23T19:26:15.832Z\",\"message\":\"foo bar\",\"@version\":\"1\"}" + end + + it "should support to_json and ignore arguments" do + new_event = LogStash::Event.new( + "@timestamp" => Time.iso8601("2014-09-23T19:26:15.832Z"), + "message" => "foo bar", + ) + json = new_event.to_json(:foo => 1, :bar => "baz") + + insist { json } == "{\"@timestamp\":\"2014-09-23T19:26:15.832Z\",\"message\":\"foo bar\",\"@version\":\"1\"}" + end + end + end diff --git a/spec/timestamp.rb b/spec/timestamp.rb index f6e6a0ceeed..1ebbab634b3 100644 --- a/spec/timestamp.rb +++ b/spec/timestamp.rb @@ -32,4 +32,11 @@ expect(LogStash::Timestamp.coerce(:foobar)).to be_nil end + it "should support to_json" do + expect(LogStash::Timestamp.parse_iso8601("2014-09-23T00:00:00-0800").to_json).to eq("\"2014-09-23T08:00:00.000Z\"") + end + + it "should support to_json and ignore arguments" do + expect(LogStash::Timestamp.parse_iso8601("2014-09-23T00:00:00-0800").to_json(:some => 1, :argumnents => "test")).to eq("\"2014-09-23T08:00:00.000Z\"") + end end From 8c133c3e4542e656c52c47bbd77835e059680c2b Mon Sep 17 00:00:00 2001 From: Colin Surprenant Date: Tue, 23 Sep 2014 19:39:37 +0000 Subject: [PATCH 58/74] add ruby filter spec Fixes #1772 --- spec/filters/ruby.rb | 56 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 56 insertions(+) create mode 100644 spec/filters/ruby.rb diff --git a/spec/filters/ruby.rb b/spec/filters/ruby.rb new file mode 100644 index 00000000000..6bb4b7b7c74 --- /dev/null +++ b/spec/filters/ruby.rb @@ -0,0 +1,56 @@ +require "test_utils" +require "logstash/filters/ruby" +require "logstash/filters/date" + +describe LogStash::Filters::Ruby do + extend LogStash::RSpec + + describe "generate pretty json on event.to_hash" do + # this obviously tests the Ruby filter but also makes sure + # the fix for issue #1771 is correct and that to_json is + # compatible with the json gem convention. + + config <<-CONFIG + filter { + date { + match => [ "mydate", "ISO8601" ] + locale => "en" + timezone => "UTC" + } + ruby { + init => "require 'json'" + code => "event['pretty'] = JSON.pretty_generate(event.to_hash)" + } + } + CONFIG + + sample("message" => "hello world", "mydate" => "2014-09-23T00:00:00-0800") do + # json is rendered in pretty json since the JSON.pretty_generate created json from the event hash + insist { subject["pretty"] } == "{\n \"message\": \"hello world\",\n \"mydate\": \"2014-09-23T00:00:00-0800\",\n \"@version\": \"1\",\n \"@timestamp\": \"2014-09-23T08:00:00.000Z\"\n}" + end + end + + describe "generate pretty json on event.to_hash" do + # this obviously tests the Ruby filter but asses that using the json gem directly + # on even will correctly call the to_json method but will use the logstash json + # generation and thus will not work with pretty_generate. + config <<-CONFIG + filter { + date { + match => [ "mydate", "ISO8601" ] + locale => "en" + timezone => "UTC" + } + ruby { + init => "require 'json'" + code => "event['pretty'] = JSON.pretty_generate(event)" + } + } + CONFIG + + sample("message" => "hello world", "mydate" => "2014-09-23T00:00:00-0800") do + # if this eventually breaks because we removed the custom to_json and/or added pretty support to JrJackson then all is good :) + insist { subject["pretty"] } == "{\"message\":\"hello world\",\"mydate\":\"2014-09-23T00:00:00-0800\",\"@version\":\"1\",\"@timestamp\":\"2014-09-23T08:00:00.000Z\"}" + end + end +end From 19c6caa6d7f6cc8cd42c6aeeab9ceafdec30991f Mon Sep 17 00:00:00 2001 From: James Turnbull Date: Sat, 13 Sep 2014 02:48:27 +0000 Subject: [PATCH 59/74] Updated link in syslog documentation Fixes #1724 --- lib/logstash/inputs/syslog.rb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/logstash/inputs/syslog.rb b/lib/logstash/inputs/syslog.rb index 8a4351e5907..b194fdad790 100644 --- a/lib/logstash/inputs/syslog.rb +++ b/lib/logstash/inputs/syslog.rb @@ -18,9 +18,9 @@ # RFC3164 style or ISO8601. Otherwise the rest of RFC3164 must be obeyed. # If you do not use RFC3164, do not use this input. # -# For more information see (the RFC3164 page)[http://www.ietf.org/rfc/rfc3164.txt]. +# For more information see [the RFC3164 page](http://www.ietf.org/rfc/rfc3164.txt). # -# Note: this input will start listeners on both TCP and UDP. +# Note: This input will start listeners on both TCP and UDP. class LogStash::Inputs::Syslog < LogStash::Inputs::Base config_name "syslog" milestone 1 From 26a27f2e2fd426fd185e4bb98dbcf4f81d4a2c84 Mon Sep 17 00:00:00 2001 From: Kurt Hurtado Date: Tue, 23 Sep 2014 01:10:48 +0000 Subject: [PATCH 60/74] specify ctrl-d to halt pipeline Fixes #1767 --- docs/tutorials/getting-started-with-logstash.asciidoc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/tutorials/getting-started-with-logstash.asciidoc b/docs/tutorials/getting-started-with-logstash.asciidoc index 38c330d75a0..f64aa5c917d 100644 --- a/docs/tutorials/getting-started-with-logstash.asciidoc +++ b/docs/tutorials/getting-started-with-logstash.asciidoc @@ -48,7 +48,7 @@ hello world OK, that's interesting... We ran Logstash with an input called "stdin", and an output named "stdout", and Logstash basically echoed back whatever we typed in some sort of structured format. Note that specifying the *-e* command line flag allows Logstash to accept a configuration directly from the command line. This is especially useful for quickly testing configurations without having to edit a file between iterations. -Let's try a slightly fancier example. First, you should exit Logstash by issuing a 'CTRL-C' command in the shell in which it is running. Now run Logstash again with the following command: +Let's try a slightly fancier example. First, you should exit Logstash by issuing a 'CTRL-D' command in the shell in which it is running. Now run Logstash again with the following command: ---- bin/logstash -e 'input { stdin { } } output { stdout { codec => rubydebug } }' ---- From ac45c438dc7d4d3f690fa36b1c69eb16bbbfb744 Mon Sep 17 00:00:00 2001 From: Kurt Hurtado Date: Tue, 23 Sep 2014 01:15:28 +0000 Subject: [PATCH 61/74] CTRL-C Enter is OK too Fixes #1767 --- docs/tutorials/getting-started-with-logstash.asciidoc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/tutorials/getting-started-with-logstash.asciidoc b/docs/tutorials/getting-started-with-logstash.asciidoc index f64aa5c917d..69e00e1bd3f 100644 --- a/docs/tutorials/getting-started-with-logstash.asciidoc +++ b/docs/tutorials/getting-started-with-logstash.asciidoc @@ -48,7 +48,7 @@ hello world OK, that's interesting... We ran Logstash with an input called "stdin", and an output named "stdout", and Logstash basically echoed back whatever we typed in some sort of structured format. Note that specifying the *-e* command line flag allows Logstash to accept a configuration directly from the command line. This is especially useful for quickly testing configurations without having to edit a file between iterations. -Let's try a slightly fancier example. First, you should exit Logstash by issuing a 'CTRL-D' command in the shell in which it is running. Now run Logstash again with the following command: +Let's try a slightly fancier example. First, you should exit Logstash by issuing a 'CTRL-D' command (or 'CTRL-C Enter') in the shell in which it is running. Now run Logstash again with the following command: ---- bin/logstash -e 'input { stdin { } } output { stdout { codec => rubydebug } }' ---- From 96c5e32f1fb47e246bc4a620d8b1d80d4f83c98d Mon Sep 17 00:00:00 2001 From: Joao Duarte Date: Sat, 27 Sep 2014 23:59:50 +0200 Subject: [PATCH 62/74] Fix add_tag behaviour in dns filter The filter should only modify the event's fields and tags if and only if all resolves/reverses succeed. So we clone the event, modify the new copy and return it if all operations succeed. Otherwise the original event is not modified. For performance reasons we could reverse the clone logic: clone the event, modify the original event and, it case of failure, return the backup. Note: this changes the dns filter behaviour towards add_tag Fixes #1795 --- lib/logstash/filters/dns.rb | 12 ++++++-- spec/filters/dns.rb | 60 ++++++++++++++++++++++++++++++++++++- 2 files changed, 68 insertions(+), 4 deletions(-) diff --git a/lib/logstash/filters/dns.rb b/lib/logstash/filters/dns.rb index 46bb0755574..f23b8191f80 100644 --- a/lib/logstash/filters/dns.rb +++ b/lib/logstash/filters/dns.rb @@ -70,11 +70,14 @@ def register def filter(event) return unless filter?(event) + new_event = event.clone + if @resolve begin status = Timeout::timeout(@timeout) { - resolve(event) + resolve(new_event) } + return if status.nil? rescue Timeout::Error @logger.debug("DNS: resolve action timed out") return @@ -84,15 +87,18 @@ def filter(event) if @reverse begin status = Timeout::timeout(@timeout) { - reverse(event) + reverse(new_event) } + return if status.nil? rescue Timeout::Error @logger.debug("DNS: reverse action timed out") return end end - filter_matched(event) + filter_matched(new_event) + yield new_event + event.cancel end private diff --git a/spec/filters/dns.rb b/spec/filters/dns.rb index 80dae9885ad..ddcd30f14cc 100644 --- a/spec/filters/dns.rb +++ b/spec/filters/dns.rb @@ -73,14 +73,72 @@ config <<-CONFIG filter { dns { - resolve => "host" + resolve => ["host"] action => "replace" + add_tag => ["success"] } } CONFIG sample("host" => "carrera.databits.net") do insist { subject["host"] } == "199.192.228.250" + insist { subject["tags"] } == ["success"] + end + end + + describe "dns fail resolve lookup, don't add tag" do + config <<-CONFIG + filter { + dns { + resolve => ["host1", "host2"] + action => "replace" + add_tag => ["success"] + } + } + CONFIG + + sample("host1" => "carrera.databits.net", "host2" => "nonexistanthostname###.net") do + insist { subject["tags"] }.nil? + insist { subject["host1"] } == "carrera.databits.net" + insist { subject["host2"] } == "nonexistanthostname###.net" + end + end + + describe "dns resolves lookups, adds tag" do + config <<-CONFIG + filter { + dns { + resolve => ["host1", "host2"] + action => "replace" + add_tag => ["success"] + } + } + CONFIG + + sample("host1" => "carrera.databits.net", "host2" => "carrera.databits.net") do + insist { subject["tags"] } == ["success"] + end + end + + describe "dns resolves and reverses, fails last, no tag" do + config <<-CONFIG + filter { + dns { + resolve => ["host1"] + reverse => ["ip1", "ip2"] + action => "replace" + add_tag => ["success"] + } + } + CONFIG + + sample("host1" => "carrera.databits.net", + "ip1" => "127.0.0.1", + "ip2" => "128.0.0.1") do + insist { subject["tags"] }.nil? + insist { subject["host1"] } == "carrera.databits.net" + insist { subject["ip1"] } == "127.0.0.1" + insist { subject["ip2"] } == "128.0.0.1" end end From 7395512761e512ac6599068f218437b14d6c118a Mon Sep 17 00:00:00 2001 From: wiibaa Date: Fri, 26 Sep 2014 07:18:15 +0200 Subject: [PATCH 63/74] filter/xml fix for LOGSTASH-2246: extract non-ascii content with xpath Fixes #1803 --- lib/logstash/filters/xml.rb | 2 +- spec/filters/xml.rb | 19 +++++++++++++++++++ 2 files changed, 20 insertions(+), 1 deletion(-) diff --git a/lib/logstash/filters/xml.rb b/lib/logstash/filters/xml.rb index 80c7bfea46c..2702631c839 100644 --- a/lib/logstash/filters/xml.rb +++ b/lib/logstash/filters/xml.rb @@ -115,7 +115,7 @@ def filter(event) unless value.nil? matched = true event[xpath_dest] ||= [] - event[xpath_dest] << value.to_s + event[xpath_dest] << value.to_str end end # XPath.each end # @xpath.each diff --git a/spec/filters/xml.rb b/spec/filters/xml.rb index 44583348aca..bbfff94eee6 100644 --- a/spec/filters/xml.rb +++ b/spec/filters/xml.rb @@ -1,3 +1,4 @@ +# encoding: utf-8 require "test_utils" require "logstash/filters/xml" @@ -154,4 +155,22 @@ end end + describe "parse correctly non ascii content with xpath" do + config <<-CONFIG + filter { + xml { + source => "xmldata" + target => "data" + xpath => [ "/foo/key/text()", "xpath_field" ] + } + } + CONFIG + + # Single value + sample("xmldata" => 'Français') do + insist { subject["tags"] }.nil? + insist { subject["xpath_field"]} == ["Français"] + end + end + end From f7d7a7532abd2c45ceebcb476d43f8504ab6d3df Mon Sep 17 00:00:00 2001 From: Jordan Sissel Date: Mon, 29 Sep 2014 23:57:25 +0000 Subject: [PATCH 64/74] Set the string encoding when parsing XML Improves upon #1790 Fixes #1803 --- lib/logstash/filters/xml.rb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/logstash/filters/xml.rb b/lib/logstash/filters/xml.rb index 2702631c839..4fcf770e52b 100644 --- a/lib/logstash/filters/xml.rb +++ b/lib/logstash/filters/xml.rb @@ -91,7 +91,7 @@ def filter(event) if @xpath begin - doc = Nokogiri::XML(value) + doc = Nokogiri::XML(value, nil, value.encoding.to_s) rescue => e event.tag("_xmlparsefailure") @logger.warn("Trouble parsing xml", :source => @source, :value => value, @@ -115,7 +115,7 @@ def filter(event) unless value.nil? matched = true event[xpath_dest] ||= [] - event[xpath_dest] << value.to_str + event[xpath_dest] << value.to_s end end # XPath.each end # @xpath.each From 51b3bf46c4eb7a275707c69719c0bfaf3215ec93 Mon Sep 17 00:00:00 2001 From: Richard Pijnenburg Date: Tue, 16 Sep 2014 14:05:07 +0000 Subject: [PATCH 65/74] [RSPEC] Refactoring step 1 - Move helper functions in own modules and extend Rspec - Refactor files into correct naming and paths - Modify files to use new spec_helper and helpers - Pin rspec to 2.14.x Fixes #1758 --- lib/logstash/runner.rb | 3 +- logstash.gemspec | 2 +- spec/codecs/{collectd.rb => collectd_spec.rb} | 0 .../{edn_lines.rb => edn_lines_spec.rb} | 0 spec/codecs/{edn.rb => edn_spec.rb} | 0 spec/codecs/{graphite.rb => graphite_spec.rb} | 0 .../{json_lines.rb => json_lines_spec.rb} | 0 spec/codecs/{json.rb => json_spec.rb} | 0 .../{json_spooler.rb => json_spooler_spec.rb} | 0 spec/codecs/{line.rb => line_spec.rb} | 0 spec/codecs/{msgpack.rb => msgpack_spec.rb} | 0 .../{multiline.rb => multiline_spec.rb} | 0 ...ogstashjson.rb => oldlogstashjson_spec.rb} | 0 spec/codecs/{plain.rb => plain_spec.rb} | 0 spec/codecs/{spool.rb => spool_spec.rb} | 0 .../test.rb => core/conditionals_spec.rb} | 3 +- spec/{config.rb => core/config_spec.rb} | 0 spec/{event.rb => core/event_spec.rb} | 0 spec/{ => core}/runner_spec.rb | 0 spec/{timestamp.rb => core/timestamp_spec.rb} | 0 spec/{web.rb => core/web_spec.rb} | 0 .../{anonymize.rb => anonymize_spec.rb} | 3 +- .../filters/{checksum.rb => checksum_spec.rb} | 3 +- spec/filters/{clone.rb => clone_spec.rb} | 3 +- spec/filters/{csv.rb => csv_spec.rb} | 3 +- spec/filters/{date.rb => date_spec.rb} | 3 +- spec/filters/{dns.rb => dns_spec.rb} | 4 +- spec/filters/{drop.rb => drop_spec.rb} | 3 +- .../{fingerprint.rb => fingerprint_spec.rb} | 4 +- spec/filters/{geoip.rb => geoip_spec.rb} | 4 +- .../grok-patterns/{java.rb => java_spec.rb} | 3 +- spec/filters/{grok.rb => grok_spec.rb} | 22 ++- spec/filters/{json.rb => json_spec.rb} | 3 +- spec/filters/{kv.rb => kv_spec.rb} | 3 +- spec/filters/{metrics.rb => metrics_spec.rb} | 1 + .../{multiline.rb => multiline_spec.rb} | 4 +- spec/filters/{mutate.rb => mutate_spec.rb} | 3 +- spec/filters/{noop.rb => noop_spec.rb} | 3 +- spec/filters/{split.rb => split_spec.rb} | 4 +- spec/filters/{spool.rb => spool_spec.rb} | 3 +- .../filters/{throttle.rb => throttle_spec.rb} | 3 +- .../{urldecode.rb => urldecode_spec.rb} | 3 +- .../{useragent.rb => useragent_spec.rb} | 5 +- spec/filters/{xml.rb => xml_spec.rb} | 3 +- spec/inputs/{base.rb => base_spec.rb} | 2 +- spec/inputs/{collectd.rb => collectd_spec.rb} | 4 +- ...elasticsearch.rb => elasticsearch_spec.rb} | 4 +- spec/inputs/{file.rb => file_spec.rb} | 4 +- spec/inputs/{gelf.rb => gelf_spec.rb} | 4 +- .../{generator.rb => generator_spec.rb} | 4 +- spec/inputs/{imap.rb => imap_spec.rb} | 0 spec/inputs/{kafka.rb => kafka_spec.rb} | 2 +- spec/inputs/{log4j.rb => log4j_spec.rb} | 0 spec/inputs/{pipe.rb => pipe_spec.rb} | 4 +- spec/inputs/{redis.rb => redis_spec.rb} | 4 +- spec/inputs/{stdin.rb => stdin_spec.rb} | 2 +- spec/inputs/{syslog.rb => syslog_spec.rb} | 4 +- spec/inputs/{tcp.rb => tcp_spec.rb} | 10 +- spec/logstash_helpers.rb | 77 ++++++++++ spec/outputs/cloudwatch_spec.rb | 18 +++ spec/outputs/{csv.rb => csv_spec.rb} | 4 +- ...rch_http.rb => elasticsearch_http_spec.rb} | 4 +- ...h_river.rb => elasticsearch_river_spec.rb} | 0 ...elasticsearch.rb => elasticsearch_spec.rb} | 4 +- spec/outputs/{email.rb => email_spec.rb} | 4 +- spec/outputs/{file.rb => file_spec.rb} | 4 +- .../outputs/{graphite.rb => graphite_spec.rb} | 4 +- spec/outputs/{kafka.rb => kafka_spec.rb} | 0 spec/outputs/{redis.rb => redis_spec.rb} | 4 +- spec/outputs/{statsd.rb => statsd_spec.rb} | 4 +- .../date.rb} | 0 spec/{ => performance}/speed.rb | 0 spec/spec_helper.rb | 48 ++++++ spec/support/LOGSTASH-733.rb | 21 --- spec/support/LOGSTASH-820.rb | 25 --- .../{akamai-grok.rb => akamai-grok_spec.rb} | 5 +- .../{date-http.rb => date-http_spec.rb} | 3 +- spec/support/{pull375.rb => pull375_spec.rb} | 4 +- spec/test_utils.rb | 143 ------------------ spec/util/accessors_spec.rb | 2 +- spec/util/charset_spec.rb | 5 +- .../environment_spec.rb} | 0 spec/util/fieldeval_spec.rb | 2 +- spec/{jar.rb => util/jar_spec.rb} | 0 spec/{json.rb => util/json_spec.rb} | 8 +- 85 files changed, 244 insertions(+), 298 deletions(-) rename spec/codecs/{collectd.rb => collectd_spec.rb} (100%) rename spec/codecs/{edn_lines.rb => edn_lines_spec.rb} (100%) rename spec/codecs/{edn.rb => edn_spec.rb} (100%) rename spec/codecs/{graphite.rb => graphite_spec.rb} (100%) rename spec/codecs/{json_lines.rb => json_lines_spec.rb} (100%) rename spec/codecs/{json.rb => json_spec.rb} (100%) rename spec/codecs/{json_spooler.rb => json_spooler_spec.rb} (100%) rename spec/codecs/{line.rb => line_spec.rb} (100%) rename spec/codecs/{msgpack.rb => msgpack_spec.rb} (100%) rename spec/codecs/{multiline.rb => multiline_spec.rb} (100%) rename spec/codecs/{oldlogstashjson.rb => oldlogstashjson_spec.rb} (100%) rename spec/codecs/{plain.rb => plain_spec.rb} (100%) rename spec/codecs/{spool.rb => spool_spec.rb} (100%) rename spec/{conditionals/test.rb => core/conditionals_spec.rb} (99%) rename spec/{config.rb => core/config_spec.rb} (100%) rename spec/{event.rb => core/event_spec.rb} (100%) rename spec/{ => core}/runner_spec.rb (100%) rename spec/{timestamp.rb => core/timestamp_spec.rb} (100%) rename spec/{web.rb => core/web_spec.rb} (100%) rename spec/filters/{anonymize.rb => anonymize_spec.rb} (99%) rename spec/filters/{checksum.rb => checksum_spec.rb} (95%) rename spec/filters/{clone.rb => clone_spec.rb} (98%) rename spec/filters/{csv.rb => csv_spec.rb} (98%) rename spec/filters/{date.rb => date_spec.rb} (99%) rename spec/filters/{dns.rb => dns_spec.rb} (99%) rename spec/filters/{drop.rb => drop_spec.rb} (84%) rename spec/filters/{fingerprint.rb => fingerprint_spec.rb} (99%) rename spec/filters/{geoip.rb => geoip_spec.rb} (98%) rename spec/filters/grok-patterns/{java.rb => java_spec.rb} (98%) rename spec/filters/{grok.rb => grok_spec.rb} (95%) rename spec/filters/{json.rb => json_spec.rb} (98%) rename spec/filters/{kv.rb => kv_spec.rb} (99%) rename spec/filters/{metrics.rb => metrics_spec.rb} (99%) rename spec/filters/{multiline.rb => multiline_spec.rb} (98%) rename spec/filters/{mutate.rb => mutate_spec.rb} (99%) rename spec/filters/{noop.rb => noop_spec.rb} (99%) rename spec/filters/{split.rb => split_spec.rb} (96%) rename spec/filters/{spool.rb => spool_spec.rb} (98%) rename spec/filters/{throttle.rb => throttle_spec.rb} (98%) rename spec/filters/{urldecode.rb => urldecode_spec.rb} (96%) rename spec/filters/{useragent.rb => useragent_spec.rb} (94%) rename spec/filters/{xml.rb => xml_spec.rb} (98%) rename spec/inputs/{base.rb => base_spec.rb} (95%) rename spec/inputs/{collectd.rb => collectd_spec.rb} (99%) rename spec/inputs/{elasticsearch.rb => elasticsearch_spec.rb} (98%) rename spec/inputs/{file.rb => file_spec.rb} (98%) rename spec/inputs/{gelf.rb => gelf_spec.rb} (95%) rename spec/inputs/{generator.rb => generator_spec.rb} (97%) rename spec/inputs/{imap.rb => imap_spec.rb} (100%) rename spec/inputs/{kafka.rb => kafka_spec.rb} (98%) rename spec/inputs/{log4j.rb => log4j_spec.rb} (100%) rename spec/inputs/{pipe.rb => pipe_spec.rb} (96%) rename spec/inputs/{redis.rb => redis_spec.rb} (96%) rename spec/inputs/{stdin.rb => stdin_spec.rb} (97%) rename spec/inputs/{syslog.rb => syslog_spec.rb} (98%) rename spec/inputs/{tcp.rb => tcp_spec.rb} (98%) create mode 100644 spec/logstash_helpers.rb create mode 100644 spec/outputs/cloudwatch_spec.rb rename spec/outputs/{csv.rb => csv_spec.rb} (99%) rename spec/outputs/{elasticsearch_http.rb => elasticsearch_http_spec.rb} (99%) rename spec/outputs/{elasticsearch_river.rb => elasticsearch_river_spec.rb} (100%) rename spec/outputs/{elasticsearch.rb => elasticsearch_spec.rb} (99%) rename spec/outputs/{email.rb => email_spec.rb} (99%) rename spec/outputs/{file.rb => file_spec.rb} (97%) rename spec/outputs/{graphite.rb => graphite_spec.rb} (99%) rename spec/outputs/{kafka.rb => kafka_spec.rb} (100%) rename spec/outputs/{redis.rb => redis_spec.rb} (98%) rename spec/outputs/{statsd.rb => statsd_spec.rb} (97%) rename spec/{filters/date_performance.rb => performance/date.rb} (100%) rename spec/{ => performance}/speed.rb (100%) create mode 100644 spec/spec_helper.rb delete mode 100644 spec/support/LOGSTASH-733.rb delete mode 100644 spec/support/LOGSTASH-820.rb rename spec/support/{akamai-grok.rb => akamai-grok_spec.rb} (96%) rename spec/support/{date-http.rb => date-http_spec.rb} (88%) rename spec/support/{pull375.rb => pull375_spec.rb} (91%) delete mode 100644 spec/test_utils.rb rename spec/{environment.rb => util/environment_spec.rb} (100%) rename spec/{jar.rb => util/jar_spec.rb} (100%) rename spec/{json.rb => util/json_spec.rb} (91%) diff --git a/lib/logstash/runner.rb b/lib/logstash/runner.rb index 6bd2fc87949..8aacfced759 100644 --- a/lib/logstash/runner.rb +++ b/lib/logstash/runner.rb @@ -111,8 +111,7 @@ def run(args) require "rspec" spec_path = File.expand_path(File.join(File.dirname(__FILE__), "/../../spec")) $LOAD_PATH << spec_path - require "test_utils" - all_specs = Dir.glob(File.join(spec_path, "/**/*.rb")) + all_specs = Dir.glob(File.join(spec_path, "/**/*_spec.rb")) rspec = LogStash::RSpecsRunner.new(args.empty? ? all_specs : args) return rspec.run end, diff --git a/logstash.gemspec b/logstash.gemspec index 2de6afb5f11..a95bfc6e8f9 100644 --- a/logstash.gemspec +++ b/logstash.gemspec @@ -102,7 +102,7 @@ Gem::Specification.new do |gem| gem.add_runtime_dependency "spoon" #(Apache 2.0 license) gem.add_runtime_dependency "mocha" #(MIT license) gem.add_runtime_dependency "shoulda" #(MIT license) - gem.add_runtime_dependency "rspec" #(MIT license) + gem.add_runtime_dependency "rspec", "~> 2.14.0" #(MIT license) gem.add_runtime_dependency "insist", "1.0.0" #(Apache 2.0 license) gem.add_runtime_dependency "rumbster" # For faking smtp in email tests (Apache 2.0 license) diff --git a/spec/codecs/collectd.rb b/spec/codecs/collectd_spec.rb similarity index 100% rename from spec/codecs/collectd.rb rename to spec/codecs/collectd_spec.rb diff --git a/spec/codecs/edn_lines.rb b/spec/codecs/edn_lines_spec.rb similarity index 100% rename from spec/codecs/edn_lines.rb rename to spec/codecs/edn_lines_spec.rb diff --git a/spec/codecs/edn.rb b/spec/codecs/edn_spec.rb similarity index 100% rename from spec/codecs/edn.rb rename to spec/codecs/edn_spec.rb diff --git a/spec/codecs/graphite.rb b/spec/codecs/graphite_spec.rb similarity index 100% rename from spec/codecs/graphite.rb rename to spec/codecs/graphite_spec.rb diff --git a/spec/codecs/json_lines.rb b/spec/codecs/json_lines_spec.rb similarity index 100% rename from spec/codecs/json_lines.rb rename to spec/codecs/json_lines_spec.rb diff --git a/spec/codecs/json.rb b/spec/codecs/json_spec.rb similarity index 100% rename from spec/codecs/json.rb rename to spec/codecs/json_spec.rb diff --git a/spec/codecs/json_spooler.rb b/spec/codecs/json_spooler_spec.rb similarity index 100% rename from spec/codecs/json_spooler.rb rename to spec/codecs/json_spooler_spec.rb diff --git a/spec/codecs/line.rb b/spec/codecs/line_spec.rb similarity index 100% rename from spec/codecs/line.rb rename to spec/codecs/line_spec.rb diff --git a/spec/codecs/msgpack.rb b/spec/codecs/msgpack_spec.rb similarity index 100% rename from spec/codecs/msgpack.rb rename to spec/codecs/msgpack_spec.rb diff --git a/spec/codecs/multiline.rb b/spec/codecs/multiline_spec.rb similarity index 100% rename from spec/codecs/multiline.rb rename to spec/codecs/multiline_spec.rb diff --git a/spec/codecs/oldlogstashjson.rb b/spec/codecs/oldlogstashjson_spec.rb similarity index 100% rename from spec/codecs/oldlogstashjson.rb rename to spec/codecs/oldlogstashjson_spec.rb diff --git a/spec/codecs/plain.rb b/spec/codecs/plain_spec.rb similarity index 100% rename from spec/codecs/plain.rb rename to spec/codecs/plain_spec.rb diff --git a/spec/codecs/spool.rb b/spec/codecs/spool_spec.rb similarity index 100% rename from spec/codecs/spool.rb rename to spec/codecs/spool_spec.rb diff --git a/spec/conditionals/test.rb b/spec/core/conditionals_spec.rb similarity index 99% rename from spec/conditionals/test.rb rename to spec/core/conditionals_spec.rb index 95166e63be5..0c7fceb8201 100644 --- a/spec/conditionals/test.rb +++ b/spec/core/conditionals_spec.rb @@ -1,4 +1,4 @@ -require "test_utils" +require "spec_helper" module ConditionalFanciness def description @@ -47,7 +47,6 @@ def conditional(expression, &block) end describe "conditionals in filter" do - extend LogStash::RSpec extend ConditionalFanciness describe "simple" do diff --git a/spec/config.rb b/spec/core/config_spec.rb similarity index 100% rename from spec/config.rb rename to spec/core/config_spec.rb diff --git a/spec/event.rb b/spec/core/event_spec.rb similarity index 100% rename from spec/event.rb rename to spec/core/event_spec.rb diff --git a/spec/runner_spec.rb b/spec/core/runner_spec.rb similarity index 100% rename from spec/runner_spec.rb rename to spec/core/runner_spec.rb diff --git a/spec/timestamp.rb b/spec/core/timestamp_spec.rb similarity index 100% rename from spec/timestamp.rb rename to spec/core/timestamp_spec.rb diff --git a/spec/web.rb b/spec/core/web_spec.rb similarity index 100% rename from spec/web.rb rename to spec/core/web_spec.rb diff --git a/spec/filters/anonymize.rb b/spec/filters/anonymize_spec.rb similarity index 99% rename from spec/filters/anonymize.rb rename to spec/filters/anonymize_spec.rb index 43f571b8284..dae90e620bc 100644 --- a/spec/filters/anonymize.rb +++ b/spec/filters/anonymize_spec.rb @@ -1,10 +1,9 @@ # encoding: utf-8 -require "test_utils" +require "spec_helper" require "logstash/filters/anonymize" describe LogStash::Filters::Anonymize do - extend LogStash::RSpec describe "anonymize ipaddress with IPV4_NETWORK algorithm" do # The logstash config goes here. diff --git a/spec/filters/checksum.rb b/spec/filters/checksum_spec.rb similarity index 95% rename from spec/filters/checksum.rb rename to spec/filters/checksum_spec.rb index bb8f2eae887..a79c70e8b97 100644 --- a/spec/filters/checksum.rb +++ b/spec/filters/checksum_spec.rb @@ -1,11 +1,10 @@ # encoding: utf-8 -require "test_utils" +require "spec_helper" require "logstash/filters/checksum" require 'openssl' describe LogStash::Filters::Checksum do - extend LogStash::RSpec LogStash::Filters::Checksum::ALGORITHMS.each do |alg| describe "#{alg} checksum with single field" do diff --git a/spec/filters/clone.rb b/spec/filters/clone_spec.rb similarity index 98% rename from spec/filters/clone.rb rename to spec/filters/clone_spec.rb index 900539f3a76..df4edeff24e 100644 --- a/spec/filters/clone.rb +++ b/spec/filters/clone_spec.rb @@ -1,8 +1,7 @@ -require "test_utils" +require "spec_helper" require "logstash/filters/clone" describe LogStash::Filters::Clone do - extend LogStash::RSpec describe "all defaults" do type "original" diff --git a/spec/filters/csv.rb b/spec/filters/csv_spec.rb similarity index 98% rename from spec/filters/csv.rb rename to spec/filters/csv_spec.rb index ad44a4bcc24..f9d7d3a00c2 100644 --- a/spec/filters/csv.rb +++ b/spec/filters/csv_spec.rb @@ -1,10 +1,9 @@ # encoding: utf-8 -require "test_utils" +require "spec_helper" require "logstash/filters/csv" describe LogStash::Filters::CSV do - extend LogStash::RSpec describe "all defaults" do # The logstash config goes here. diff --git a/spec/filters/date.rb b/spec/filters/date_spec.rb similarity index 99% rename from spec/filters/date.rb rename to spec/filters/date_spec.rb index e9942e35649..39936fe581f 100644 --- a/spec/filters/date.rb +++ b/spec/filters/date_spec.rb @@ -1,9 +1,8 @@ -require "test_utils" +require "spec_helper" require "logstash/filters/date" puts "Skipping date performance tests because this ruby is not jruby" if RUBY_ENGINE != "jruby" RUBY_ENGINE == "jruby" and describe LogStash::Filters::Date do - extend LogStash::RSpec describe "giving an invalid match config, raise a configuration error" do config <<-CONFIG diff --git a/spec/filters/dns.rb b/spec/filters/dns_spec.rb similarity index 99% rename from spec/filters/dns.rb rename to spec/filters/dns_spec.rb index ddcd30f14cc..cc5a3a4d914 100644 --- a/spec/filters/dns.rb +++ b/spec/filters/dns_spec.rb @@ -1,11 +1,9 @@ # encoding: utf-8 - -require "test_utils" +require "spec_helper" require "logstash/filters/dns" require "resolv" describe LogStash::Filters::DNS do - extend LogStash::RSpec before(:all) do begin diff --git a/spec/filters/drop.rb b/spec/filters/drop_spec.rb similarity index 84% rename from spec/filters/drop.rb rename to spec/filters/drop_spec.rb index 94fa78b8c77..8d8fcb65628 100644 --- a/spec/filters/drop.rb +++ b/spec/filters/drop_spec.rb @@ -1,8 +1,7 @@ -require "test_utils" +require "spec_helper" require "logstash/filters/drop" describe LogStash::Filters::Drop do - extend LogStash::RSpec describe "drop the event" do config <<-CONFIG diff --git a/spec/filters/fingerprint.rb b/spec/filters/fingerprint_spec.rb similarity index 99% rename from spec/filters/fingerprint.rb rename to spec/filters/fingerprint_spec.rb index d8973441f70..346ed84f621 100644 --- a/spec/filters/fingerprint.rb +++ b/spec/filters/fingerprint_spec.rb @@ -1,10 +1,8 @@ # encoding: utf-8 - -require "test_utils" +require "spec_helper" require "logstash/filters/fingerprint" describe LogStash::Filters::Fingerprint do - extend LogStash::RSpec describe "fingerprint ipaddress with IPV4_NETWORK method" do config <<-CONFIG diff --git a/spec/filters/geoip.rb b/spec/filters/geoip_spec.rb similarity index 98% rename from spec/filters/geoip.rb rename to spec/filters/geoip_spec.rb index b1864de6e3e..1ecb56b6470 100644 --- a/spec/filters/geoip.rb +++ b/spec/filters/geoip_spec.rb @@ -1,8 +1,8 @@ -require "test_utils" +require "spec_helper" require "logstash/filters/geoip" describe LogStash::Filters::GeoIP do - extend LogStash::RSpec + describe "defaults" do config <<-CONFIG filter { diff --git a/spec/filters/grok-patterns/java.rb b/spec/filters/grok-patterns/java_spec.rb similarity index 98% rename from spec/filters/grok-patterns/java.rb rename to spec/filters/grok-patterns/java_spec.rb index ce0eb3b64fc..7423cec2415 100644 --- a/spec/filters/grok-patterns/java.rb +++ b/spec/filters/grok-patterns/java_spec.rb @@ -1,5 +1,5 @@ # encoding: utf-8 -require "test_utils" +require "spec_helper" # Test suite for the grok patterns defined in patterns/java # For each pattern: @@ -7,7 +7,6 @@ # - a sample is considered invalid i.e. "should NOT match" where message != result # describe "java grok pattern" do - extend LogStash::RSpec describe "JAVACLASS" do config <<-CONFIG diff --git a/spec/filters/grok.rb b/spec/filters/grok_spec.rb similarity index 95% rename from spec/filters/grok.rb rename to spec/filters/grok_spec.rb index 5d84105b690..ac192a66c20 100644 --- a/spec/filters/grok.rb +++ b/spec/filters/grok_spec.rb @@ -1,10 +1,8 @@ # encoding: utf-8 - -require "test_utils" +require "spec_helper" require "logstash/filters/grok" describe LogStash::Filters::Grok do - extend LogStash::RSpec describe "simple syslog line" do # The logstash config goes here. @@ -629,4 +627,22 @@ end end + describe "grok with unicode" do + config <<-CONFIG + filter { + grok { + #pattern => "<%{POSINT:syslog_pri}>%{SYSLOGTIMESTAMP:syslog_timestamp} %{SYSLOGHOST:syslog_hostname} %{PROG:syslog_program}(?:\[%{POSINT:syslog_pid}\])?: %{GREEDYDATA:syslog_message}" + pattern => "<%{POSINT:syslog_pri}>%{SPACE}%{SYSLOGTIMESTAMP:syslog_timestamp} %{SYSLOGHOST:syslog_hostname} %{PROG:syslog_program}(:?)(?:\\[%{GREEDYDATA:syslog_pid}\\])?(:?) %{GREEDYDATA:syslog_message}" + } + } + CONFIG + + sample "<22>Jan 4 07:50:46 mailmaster postfix/policy-spf[9454]: : SPF permerror (Junk encountered in record 'v=spf1 mx a:mail.domain.no ip4:192.168.0.4 �all'): Envelope-from: email@domain.no" do + insist { subject["tags"] }.nil? + insist { subject["syslog_pri"] } == "22" + insist { subject["syslog_program"] } == "postfix/policy-spf" + end + end + + end diff --git a/spec/filters/json.rb b/spec/filters/json_spec.rb similarity index 98% rename from spec/filters/json.rb rename to spec/filters/json_spec.rb index b571b9b4023..1a8536aeb20 100644 --- a/spec/filters/json.rb +++ b/spec/filters/json_spec.rb @@ -1,9 +1,8 @@ -require "test_utils" +require "spec_helper" require "logstash/filters/json" require "logstash/timestamp" describe LogStash::Filters::Json do - extend LogStash::RSpec describe "parse message into the event" do config <<-CONFIG diff --git a/spec/filters/kv.rb b/spec/filters/kv_spec.rb similarity index 99% rename from spec/filters/kv.rb rename to spec/filters/kv_spec.rb index 39f72fa9788..d907a9e80d4 100644 --- a/spec/filters/kv.rb +++ b/spec/filters/kv_spec.rb @@ -1,8 +1,7 @@ -require "test_utils" +require "spec_helper" require "logstash/filters/kv" describe LogStash::Filters::KV do - extend LogStash::RSpec describe "defaults" do # The logstash config goes here. diff --git a/spec/filters/metrics.rb b/spec/filters/metrics_spec.rb similarity index 99% rename from spec/filters/metrics.rb rename to spec/filters/metrics_spec.rb index b34af869c19..921c7fe9968 100644 --- a/spec/filters/metrics.rb +++ b/spec/filters/metrics_spec.rb @@ -1,3 +1,4 @@ +require "spec_helper" require "logstash/filters/metrics" describe LogStash::Filters::Metrics do diff --git a/spec/filters/multiline.rb b/spec/filters/multiline_spec.rb similarity index 98% rename from spec/filters/multiline.rb rename to spec/filters/multiline_spec.rb index dcb8bb74873..a9e1b18307d 100644 --- a/spec/filters/multiline.rb +++ b/spec/filters/multiline_spec.rb @@ -1,12 +1,10 @@ # encoding: utf-8 -require "test_utils" +require "spec_helper" require "logstash/filters/multiline" describe LogStash::Filters::Multiline do - extend LogStash::RSpec - describe "simple multiline" do config <<-CONFIG filter { diff --git a/spec/filters/mutate.rb b/spec/filters/mutate_spec.rb similarity index 99% rename from spec/filters/mutate.rb rename to spec/filters/mutate_spec.rb index ea9a318eb6b..38afe392a77 100644 --- a/spec/filters/mutate.rb +++ b/spec/filters/mutate_spec.rb @@ -1,10 +1,9 @@ # encoding: utf-8 -require "test_utils" +require "spec_helper" require "logstash/filters/mutate" describe LogStash::Filters::Mutate do - extend LogStash::RSpec context "config validation" do describe "invalid convert type should raise a configuration error" do diff --git a/spec/filters/noop.rb b/spec/filters/noop_spec.rb similarity index 99% rename from spec/filters/noop.rb rename to spec/filters/noop_spec.rb index 810d2fbab12..e2cb6ecb428 100644 --- a/spec/filters/noop.rb +++ b/spec/filters/noop_spec.rb @@ -1,9 +1,8 @@ -require "test_utils" +require "spec_helper" require "logstash/filters/noop" #NOOP filter is perfect for testing Filters::Base features with minimal overhead describe LogStash::Filters::NOOP do - extend LogStash::RSpec describe "adding multiple value to one field" do config <<-CONFIG diff --git a/spec/filters/split.rb b/spec/filters/split_spec.rb similarity index 96% rename from spec/filters/split.rb rename to spec/filters/split_spec.rb index 7e95c80894c..05fbbd7f076 100644 --- a/spec/filters/split.rb +++ b/spec/filters/split_spec.rb @@ -1,10 +1,8 @@ # encoding: utf-8 - -require "test_utils" +require "spec_helper" require "logstash/filters/split" describe LogStash::Filters::Split do - extend LogStash::RSpec describe "all defaults" do config <<-CONFIG diff --git a/spec/filters/spool.rb b/spec/filters/spool_spec.rb similarity index 98% rename from spec/filters/spool.rb rename to spec/filters/spool_spec.rb index 0758aaee518..b68c3483cd8 100644 --- a/spec/filters/spool.rb +++ b/spec/filters/spool_spec.rb @@ -1,9 +1,8 @@ -require "test_utils" +require "spec_helper" require "logstash/filters/spool" #NOOP filter is perfect for testing Filters::Base features with minimal overhead describe LogStash::Filters::Spool do - extend LogStash::RSpec # spool test are really flush tests. spool does nothing more than waiting for flush to be called. diff --git a/spec/filters/throttle.rb b/spec/filters/throttle_spec.rb similarity index 98% rename from spec/filters/throttle.rb rename to spec/filters/throttle_spec.rb index 5cf77adb749..746fe0663cc 100644 --- a/spec/filters/throttle.rb +++ b/spec/filters/throttle_spec.rb @@ -1,8 +1,7 @@ -require "test_utils" +require "spec_helper" require "logstash/filters/throttle" describe LogStash::Filters::Throttle do - extend LogStash::RSpec describe "no before_count" do config <<-CONFIG diff --git a/spec/filters/urldecode.rb b/spec/filters/urldecode_spec.rb similarity index 96% rename from spec/filters/urldecode.rb rename to spec/filters/urldecode_spec.rb index 4fe3a4f5297..4b8225d7a0c 100644 --- a/spec/filters/urldecode.rb +++ b/spec/filters/urldecode_spec.rb @@ -1,10 +1,9 @@ # encoding: utf-8 -require "test_utils" +require "spec_helper" require "logstash/filters/urldecode" describe LogStash::Filters::Urldecode do - extend LogStash::RSpec describe "urldecode of correct urlencoded data" do # The logstash config goes here. diff --git a/spec/filters/useragent.rb b/spec/filters/useragent_spec.rb similarity index 94% rename from spec/filters/useragent.rb rename to spec/filters/useragent_spec.rb index d7d83e0e418..cd1c6c1f4e6 100644 --- a/spec/filters/useragent.rb +++ b/spec/filters/useragent_spec.rb @@ -1,10 +1,9 @@ # encoding: utf-8 -require "test_utils" +require "spec_helper" require "logstash/filters/useragent" describe LogStash::Filters::UserAgent do - extend LogStash::RSpec describe "defaults" do config <<-CONFIG @@ -25,7 +24,7 @@ end end - describe "" do + describe "Without target field" do config <<-CONFIG filter { useragent { diff --git a/spec/filters/xml.rb b/spec/filters/xml_spec.rb similarity index 98% rename from spec/filters/xml.rb rename to spec/filters/xml_spec.rb index bbfff94eee6..591f2a16759 100644 --- a/spec/filters/xml.rb +++ b/spec/filters/xml_spec.rb @@ -1,9 +1,8 @@ # encoding: utf-8 -require "test_utils" +require "spec_helper" require "logstash/filters/xml" describe LogStash::Filters::Xml do - extend LogStash::RSpec describe "parse standard xml (Deprecated checks)" do config <<-CONFIG diff --git a/spec/inputs/base.rb b/spec/inputs/base_spec.rb similarity index 95% rename from spec/inputs/base.rb rename to spec/inputs/base_spec.rb index bfb546343e0..4aec45401bb 100644 --- a/spec/inputs/base.rb +++ b/spec/inputs/base_spec.rb @@ -1,5 +1,5 @@ # encoding: utf-8 -require "test_utils" +require "spec_helper" describe "LogStash::Inputs::Base#fix_streaming_codecs" do it "should carry the charset setting along when switching" do diff --git a/spec/inputs/collectd.rb b/spec/inputs/collectd_spec.rb similarity index 99% rename from spec/inputs/collectd.rb rename to spec/inputs/collectd_spec.rb index 939db0967df..7f3093a8d44 100644 --- a/spec/inputs/collectd.rb +++ b/spec/inputs/collectd_spec.rb @@ -1,9 +1,9 @@ -require "test_utils" +require "spec_helper" require "socket" require "tempfile" describe "inputs/collectd", :socket => true do - extend LogStash::RSpec + udp_sock = UDPSocket.new(Socket::AF_INET) describe "parses a normal packet" do diff --git a/spec/inputs/elasticsearch.rb b/spec/inputs/elasticsearch_spec.rb similarity index 98% rename from spec/inputs/elasticsearch.rb rename to spec/inputs/elasticsearch_spec.rb index d695a2ed871..4eb0f456346 100644 --- a/spec/inputs/elasticsearch.rb +++ b/spec/inputs/elasticsearch_spec.rb @@ -1,8 +1,8 @@ -require "test_utils" +require "spec_helper" require "logstash/inputs/elasticsearch" describe "inputs/elasticsearch" do - extend LogStash::RSpec + search_response = <<-RESPONSE { diff --git a/spec/inputs/file.rb b/spec/inputs/file_spec.rb similarity index 98% rename from spec/inputs/file.rb rename to spec/inputs/file_spec.rb index 9126719db99..462ade1c454 100644 --- a/spec/inputs/file.rb +++ b/spec/inputs/file_spec.rb @@ -1,10 +1,10 @@ # encoding: utf-8 -require "test_utils" +require "spec_helper" require "tempfile" describe "inputs/file" do - extend LogStash::RSpec + describe "starts at the end of an existing file" do tmp_file = Tempfile.new('logstash-spec-input-file') diff --git a/spec/inputs/gelf.rb b/spec/inputs/gelf_spec.rb similarity index 95% rename from spec/inputs/gelf.rb rename to spec/inputs/gelf_spec.rb index 458b34a647b..32f3aaca99c 100644 --- a/spec/inputs/gelf.rb +++ b/spec/inputs/gelf_spec.rb @@ -1,8 +1,8 @@ -require "test_utils" +require "spec_helper" require "gelf" describe "inputs/gelf" do - extend LogStash::RSpec + describe "reads chunked gelf messages " do port = 12209 diff --git a/spec/inputs/generator.rb b/spec/inputs/generator_spec.rb similarity index 97% rename from spec/inputs/generator.rb rename to spec/inputs/generator_spec.rb index b21ffaeb77f..0df2c0dae26 100644 --- a/spec/inputs/generator.rb +++ b/spec/inputs/generator_spec.rb @@ -1,7 +1,7 @@ -require "test_utils" +require "spec_helper" describe "inputs/generator" do - extend LogStash::RSpec + context "performance", :performance => true do event_count = 100000 + rand(50000) diff --git a/spec/inputs/imap.rb b/spec/inputs/imap_spec.rb similarity index 100% rename from spec/inputs/imap.rb rename to spec/inputs/imap_spec.rb diff --git a/spec/inputs/kafka.rb b/spec/inputs/kafka_spec.rb similarity index 98% rename from spec/inputs/kafka.rb rename to spec/inputs/kafka_spec.rb index 6950a80f871..7aba00ccfbd 100644 --- a/spec/inputs/kafka.rb +++ b/spec/inputs/kafka_spec.rb @@ -7,7 +7,7 @@ require 'logstash/errors' describe LogStash::Inputs::Kafka do - extend LogStash::RSpec + let (:kafka_config) {{"topic_id" => "test"}} diff --git a/spec/inputs/log4j.rb b/spec/inputs/log4j_spec.rb similarity index 100% rename from spec/inputs/log4j.rb rename to spec/inputs/log4j_spec.rb diff --git a/spec/inputs/pipe.rb b/spec/inputs/pipe_spec.rb similarity index 96% rename from spec/inputs/pipe.rb rename to spec/inputs/pipe_spec.rb index 067937b4a75..e78a3dcceb5 100644 --- a/spec/inputs/pipe.rb +++ b/spec/inputs/pipe_spec.rb @@ -1,9 +1,9 @@ # encoding: utf-8 -require "test_utils" +require "spec_helper" require "tempfile" describe "inputs/pipe" do - extend LogStash::RSpec + describe "echo" do event_count = 1 diff --git a/spec/inputs/redis.rb b/spec/inputs/redis_spec.rb similarity index 96% rename from spec/inputs/redis.rb rename to spec/inputs/redis_spec.rb index c7f6b513fbc..04eceaf1348 100644 --- a/spec/inputs/redis.rb +++ b/spec/inputs/redis_spec.rb @@ -1,4 +1,4 @@ -require "test_utils" +require "spec_helper" require "redis" def populate(key, event_count) @@ -23,7 +23,7 @@ def process(pipeline, queue, event_count) end # process describe "inputs/redis", :redis => true do - extend LogStash::RSpec + describe "read events from a list" do key = 10.times.collect { rand(10).to_s }.join("") diff --git a/spec/inputs/stdin.rb b/spec/inputs/stdin_spec.rb similarity index 97% rename from spec/inputs/stdin.rb rename to spec/inputs/stdin_spec.rb index a3efea1e5e4..8da76aad9d6 100644 --- a/spec/inputs/stdin.rb +++ b/spec/inputs/stdin_spec.rb @@ -1,5 +1,5 @@ # encoding: utf-8 -require "test_utils" +require "spec_helper" require "socket" require "logstash/inputs/stdin" diff --git a/spec/inputs/syslog.rb b/spec/inputs/syslog_spec.rb similarity index 98% rename from spec/inputs/syslog.rb rename to spec/inputs/syslog_spec.rb index ef40a4aac37..48bb7552c37 100644 --- a/spec/inputs/syslog.rb +++ b/spec/inputs/syslog_spec.rb @@ -1,11 +1,11 @@ # coding: utf-8 -require "test_utils" +require "spec_helper" require "socket" require "logstash/inputs/syslog" require "logstash/event" describe "inputs/syslog" do - extend LogStash::RSpec + it "should properly handle priority, severity and facilities", :socket => true do port = 5511 diff --git a/spec/inputs/tcp.rb b/spec/inputs/tcp_spec.rb similarity index 98% rename from spec/inputs/tcp.rb rename to spec/inputs/tcp_spec.rb index cbcccef404a..48004064b19 100644 --- a/spec/inputs/tcp.rb +++ b/spec/inputs/tcp_spec.rb @@ -1,12 +1,12 @@ # encoding: utf-8 -require "test_utils" +require "spec_helper" require "socket" require "timeout" require "logstash/json" require "logstash/inputs/tcp" describe LogStash::Inputs::Tcp do - extend LogStash::RSpec + context "codec (PR #1372)" do it "switches from plain to line" do @@ -205,7 +205,7 @@ describe "one message per connection" do event_count = 10 - port = 5515 + port = 5516 config <<-CONFIG input { tcp { @@ -224,7 +224,7 @@ socket.flush socket.close end - + # wait till all events have been processed Timeout.timeout(1) {sleep 0.1 while queue.size < event_count} @@ -239,7 +239,7 @@ describe "connection threads are cleaned up when connection is closed" do event_count = 10 - port = 5515 + port = 5517 config <<-CONFIG input { tcp { diff --git a/spec/logstash_helpers.rb b/spec/logstash_helpers.rb new file mode 100644 index 00000000000..40e6af09c50 --- /dev/null +++ b/spec/logstash_helpers.rb @@ -0,0 +1,77 @@ +require "logstash/agent" +require "logstash/pipeline" +require "logstash/event" + +module LogStashHelper + + def config(configstr) + let(:config) { configstr } + end # def config + + def type(default_type) + let(:default_type) { default_type } + end + + def tags(*tags) + let(:default_tags) { tags } + puts "Setting default tags: #{@default_tags}" + end + + def sample(sample_event, &block) + name = sample_event.is_a?(String) ? sample_event : LogStash::Json.dump(sample_event) + name = name[0..50] + "..." if name.length > 50 + + describe "\"#{name}\"" do + let(:pipeline) { LogStash::Pipeline.new(config) } + let(:event) do + sample_event = [sample_event] unless sample_event.is_a?(Array) + next sample_event.collect do |e| + e = { "message" => e } if e.is_a?(String) + next LogStash::Event.new(e) + end + end + + let(:results) do + results = [] + pipeline.instance_eval { @filters.each(&:register) } + + event.each do |e| + pipeline.filter(e) {|new_event| results << new_event } + end + + pipeline.flush_filters(:final => true) do |e| + results << e unless e.cancelled? + end + + results + end + + subject { results.length > 1 ? results: results.first } + + it("when processed", &block) + end + end # def sample + + def input(&block) + it "inputs" do + pipeline = LogStash::Pipeline.new(config) + queue = Queue.new + pipeline.instance_eval do + @output_func = lambda { |event| queue << event } + end + block.call(pipeline, queue) + pipeline.shutdown + end + end # def input + + def agent(&block) + + it("agent(#{caller[0].gsub(/ .*/, "")}) runs") do + pipeline = LogStash::Pipeline.new(config) + pipeline.run + block.call + end + end # def agent + +end # module LogStash + diff --git a/spec/outputs/cloudwatch_spec.rb b/spec/outputs/cloudwatch_spec.rb new file mode 100644 index 00000000000..047d9968c5e --- /dev/null +++ b/spec/outputs/cloudwatch_spec.rb @@ -0,0 +1,18 @@ +require "spec_helper" +require "logstash/plugin" +require "logstash/json" + +describe "outputs/cloudwatch" do + + + output = LogStash::Plugin.lookup("output", "cloudwatch").new + + it "should register" do + expect {output.register}.to_not raise_error + end + + it "should respond correctly to a receive call" do + event = LogStash::Event.new + expect { output.receive(event) }.to_not raise_error + end +end diff --git a/spec/outputs/csv.rb b/spec/outputs/csv_spec.rb similarity index 99% rename from spec/outputs/csv.rb rename to spec/outputs/csv_spec.rb index 29fa719c89a..5430cb3235b 100644 --- a/spec/outputs/csv.rb +++ b/spec/outputs/csv_spec.rb @@ -1,10 +1,10 @@ require "csv" require "tempfile" -require "test_utils" +require "spec_helper" require "logstash/outputs/csv" describe LogStash::Outputs::CSV do - extend LogStash::RSpec + describe "Write a single field to a csv file" do tmpfile = Tempfile.new('logstash-spec-output-csv') diff --git a/spec/outputs/elasticsearch_http.rb b/spec/outputs/elasticsearch_http_spec.rb similarity index 99% rename from spec/outputs/elasticsearch_http.rb rename to spec/outputs/elasticsearch_http_spec.rb index f668b37191d..a89526216a7 100644 --- a/spec/outputs/elasticsearch_http.rb +++ b/spec/outputs/elasticsearch_http_spec.rb @@ -1,8 +1,8 @@ -require "test_utils" +require "spec_helper" require "logstash/json" describe "outputs/elasticsearch_http", :elasticsearch => true do - extend LogStash::RSpec + describe "ship lots of events w/ default index_type" do # Generate a random index name diff --git a/spec/outputs/elasticsearch_river.rb b/spec/outputs/elasticsearch_river_spec.rb similarity index 100% rename from spec/outputs/elasticsearch_river.rb rename to spec/outputs/elasticsearch_river_spec.rb diff --git a/spec/outputs/elasticsearch.rb b/spec/outputs/elasticsearch_spec.rb similarity index 99% rename from spec/outputs/elasticsearch.rb rename to spec/outputs/elasticsearch_spec.rb index 836c9ef56e1..ce896ec6cbb 100644 --- a/spec/outputs/elasticsearch.rb +++ b/spec/outputs/elasticsearch_spec.rb @@ -1,10 +1,10 @@ -require "test_utils" +require "spec_helper" require "ftw" require "logstash/plugin" require "logstash/json" describe "outputs/elasticsearch" do - extend LogStash::RSpec + it "should register" do output = LogStash::Plugin.lookup("output", "elasticsearch").new("embedded" => "false", "protocol" => "transport", "manage_template" => "false") diff --git a/spec/outputs/email.rb b/spec/outputs/email_spec.rb similarity index 99% rename from spec/outputs/email.rb rename to spec/outputs/email_spec.rb index fdcb6c3e94d..38a8b50ea00 100644 --- a/spec/outputs/email.rb +++ b/spec/outputs/email_spec.rb @@ -1,9 +1,9 @@ -require "test_utils" +require "spec_helper" require "rumbster" require "message_observers" describe "outputs/email", :broken => true do - extend LogStash::RSpec + @@port=2525 let (:rumbster) { Rumbster.new(@@port) } diff --git a/spec/outputs/file.rb b/spec/outputs/file_spec.rb similarity index 97% rename from spec/outputs/file.rb rename to spec/outputs/file_spec.rb index a49366bd4e3..aef261acc2e 100644 --- a/spec/outputs/file.rb +++ b/spec/outputs/file_spec.rb @@ -1,10 +1,10 @@ -require "test_utils" +require "spec_helper" require "logstash/outputs/file" require "logstash/json" require "tempfile" describe LogStash::Outputs::File do - extend LogStash::RSpec + describe "ship lots of events to a file" do event_count = 10000 + rand(500) diff --git a/spec/outputs/graphite.rb b/spec/outputs/graphite_spec.rb similarity index 99% rename from spec/outputs/graphite.rb rename to spec/outputs/graphite_spec.rb index 2b5fe1ac428..d8bfe240e66 100644 --- a/spec/outputs/graphite.rb +++ b/spec/outputs/graphite_spec.rb @@ -1,9 +1,9 @@ -require "test_utils" +require "spec_helper" require "logstash/outputs/graphite" require "mocha/api" describe LogStash::Outputs::Graphite, :socket => true do - extend LogStash::RSpec + describe "defaults should include all metrics" do port = 4939 diff --git a/spec/outputs/kafka.rb b/spec/outputs/kafka_spec.rb similarity index 100% rename from spec/outputs/kafka.rb rename to spec/outputs/kafka_spec.rb diff --git a/spec/outputs/redis.rb b/spec/outputs/redis_spec.rb similarity index 98% rename from spec/outputs/redis.rb rename to spec/outputs/redis_spec.rb index 3c4dbeb04d0..70cc362b1bc 100644 --- a/spec/outputs/redis.rb +++ b/spec/outputs/redis_spec.rb @@ -1,10 +1,10 @@ -require "test_utils" +require "spec_helper" require "logstash/outputs/redis" require "logstash/json" require "redis" describe LogStash::Outputs::Redis, :redis => true do - extend LogStash::RSpec + describe "ship lots of events to a list" do key = 10.times.collect { rand(10).to_s }.join("") diff --git a/spec/outputs/statsd.rb b/spec/outputs/statsd_spec.rb similarity index 97% rename from spec/outputs/statsd.rb rename to spec/outputs/statsd_spec.rb index 5bb0942600e..3aa95e0d8a8 100644 --- a/spec/outputs/statsd.rb +++ b/spec/outputs/statsd_spec.rb @@ -1,10 +1,10 @@ -require "test_utils" +require "spec_helper" require "logstash/outputs/statsd" require "mocha/api" require "socket" describe LogStash::Outputs::Statsd do - extend LogStash::RSpec + port = 4399 udp_server = UDPSocket.new udp_server.bind("127.0.0.1", port) diff --git a/spec/filters/date_performance.rb b/spec/performance/date.rb similarity index 100% rename from spec/filters/date_performance.rb rename to spec/performance/date.rb diff --git a/spec/speed.rb b/spec/performance/speed.rb similarity index 100% rename from spec/speed.rb rename to spec/performance/speed.rb diff --git a/spec/spec_helper.rb b/spec/spec_helper.rb new file mode 100644 index 00000000000..90e22b906cf --- /dev/null +++ b/spec/spec_helper.rb @@ -0,0 +1,48 @@ +require "logstash/logging" +require 'logstash_helpers' +require "insist" + +if ENV['COVERAGE'] + require 'simplecov' + require 'coveralls' + + SimpleCov.formatter = SimpleCov::Formatter::MultiFormatter[ + SimpleCov::Formatter::HTMLFormatter, + Coveralls::SimpleCov::Formatter + ] + SimpleCov.start do + add_filter 'spec/' + add_filter 'vendor/' + end +end + +$TESTING = true +if RUBY_VERSION < "1.9.2" + $stderr.puts "Ruby 1.9.2 or later is required. (You are running: " + RUBY_VERSION + ")" + raise LoadError +end + +$logger = LogStash::Logger.new(STDOUT) +if ENV["TEST_DEBUG"] + $logger.level = :debug +else + $logger.level = :error +end + +puts("Using Accessor#strict_set for specs") +# mokey path LogStash::Event to use strict_set in tests +# ugly, I know, but this avoids adding conditionals in performance critical section +class LogStash::Event + def []=(str, value) + if str == TIMESTAMP && !value.is_a?(LogStash::Timestamp) + raise TypeError, "The field '@timestamp' must be a LogStash::Timestamp, not a #{value.class} (#{value})" + end + @accessors.strict_set(str, value) + end # def []= +end + +RSpec.configure do |config| + config.extend LogStashHelper + config.filter_run_excluding :redis => true, :socket => true, :performance => true, :elasticsearch => true, :broken => true, :export_cypher => true +end + diff --git a/spec/support/LOGSTASH-733.rb b/spec/support/LOGSTASH-733.rb deleted file mode 100644 index 62a9b7dce81..00000000000 --- a/spec/support/LOGSTASH-733.rb +++ /dev/null @@ -1,21 +0,0 @@ -# This spec covers the question here: -# https://logstash.jira.com/browse/LOGSTASH-733 - -require "test_utils" - -describe "LOGSTASH-733" do - extend LogStash::RSpec - describe "pipe-delimited fields" do - config <<-CONFIG - filter { - kv { field_split => "|" } - } - CONFIG - - sample "field1=test|field2=another test|field3=test3" do - insist { subject["field1"] } == "test" - insist { subject["field2"] } == "another test" - insist { subject["field3"] } == "test3" - end - end -end diff --git a/spec/support/LOGSTASH-820.rb b/spec/support/LOGSTASH-820.rb deleted file mode 100644 index 251b1f0e5bc..00000000000 --- a/spec/support/LOGSTASH-820.rb +++ /dev/null @@ -1,25 +0,0 @@ -# encoding: utf-8 -# This spec covers the question here: -# https://logstash.jira.com/browse/LOGSTASH-820 - -require "test_utils" - -describe "LOGSTASH-820" do - extend LogStash::RSpec - describe "grok with unicode" do - config <<-CONFIG - filter { - grok { - #pattern => "<%{POSINT:syslog_pri}>%{SYSLOGTIMESTAMP:syslog_timestamp} %{SYSLOGHOST:syslog_hostname} %{PROG:syslog_program}(?:\[%{POSINT:syslog_pid}\])?: %{GREEDYDATA:syslog_message}" - pattern => "<%{POSINT:syslog_pri}>%{SPACE}%{SYSLOGTIMESTAMP:syslog_timestamp} %{SYSLOGHOST:syslog_hostname} %{PROG:syslog_program}(:?)(?:\\[%{GREEDYDATA:syslog_pid}\\])?(:?) %{GREEDYDATA:syslog_message}" - } - } - CONFIG - - sample "<22>Jan 4 07:50:46 mailmaster postfix/policy-spf[9454]: : SPF permerror (Junk encountered in record 'v=spf1 mx a:mail.domain.no ip4:192.168.0.4 �all'): Envelope-from: email@domain.no" do - insist { subject["tags"] }.nil? - insist { subject["syslog_pri"] } == "22" - insist { subject["syslog_program"] } == "postfix/policy-spf" - end - end -end diff --git a/spec/support/akamai-grok.rb b/spec/support/akamai-grok_spec.rb similarity index 96% rename from spec/support/akamai-grok.rb rename to spec/support/akamai-grok_spec.rb index f889d5c9e87..cb678a2dee6 100644 --- a/spec/support/akamai-grok.rb +++ b/spec/support/akamai-grok_spec.rb @@ -1,7 +1,6 @@ -require "test_utils" +require "spec_helper" -describe "..." do - extend LogStash::RSpec +describe "Akamai Grok pattern" do config <<-'CONFIG' filter { diff --git a/spec/support/date-http.rb b/spec/support/date-http_spec.rb similarity index 88% rename from spec/support/date-http.rb rename to spec/support/date-http_spec.rb index a6ac07966fc..fb25f48b45b 100644 --- a/spec/support/date-http.rb +++ b/spec/support/date-http_spec.rb @@ -1,7 +1,6 @@ -require "test_utils" +require "spec_helper" describe "http dates", :if => RUBY_ENGINE == "jruby" do - extend LogStash::RSpec config <<-'CONFIG' filter { diff --git a/spec/support/pull375.rb b/spec/support/pull375_spec.rb similarity index 91% rename from spec/support/pull375.rb rename to spec/support/pull375_spec.rb index 77295fc61be..b0851ff8eb5 100644 --- a/spec/support/pull375.rb +++ b/spec/support/pull375_spec.rb @@ -3,10 +3,10 @@ # This spec covers the question here: # https://github.com/logstash/logstash/pull/375 -require "test_utils" +require "spec_helper" describe "pull #375" do - extend LogStash::RSpec + describe "kv after grok" do config <<-CONFIG filter { diff --git a/spec/test_utils.rb b/spec/test_utils.rb deleted file mode 100644 index 8c98f4a911c..00000000000 --- a/spec/test_utils.rb +++ /dev/null @@ -1,143 +0,0 @@ -# encoding: utf-8 - -require "logstash/json" -require "logstash/timestamp" - -if ENV['COVERAGE'] - require 'simplecov' - require 'coveralls' - - SimpleCov.formatter = SimpleCov::Formatter::MultiFormatter[ - SimpleCov::Formatter::HTMLFormatter, - Coveralls::SimpleCov::Formatter - ] - SimpleCov.start do - add_filter 'spec/' - add_filter 'vendor/' - end -end -require "insist" -require "logstash/agent" -require "logstash/pipeline" -require "logstash/event" -require "logstash/logging" -require "insist" -require "stud/try" - -$TESTING = true -if RUBY_VERSION < "1.9.2" - $stderr.puts "Ruby 1.9.2 or later is required. (You are running: " + RUBY_VERSION + ")" - $stderr.puts "Options for fixing this: " - $stderr.puts " * If doing 'ruby bin/logstash ...' add --1.9 flag to 'ruby'" - $stderr.puts " * If doing 'java -jar ... ' add -Djruby.compat.version=RUBY1_9 to java flags" - raise LoadError -end - -$logger = LogStash::Logger.new(STDOUT) -if ENV["TEST_DEBUG"] - $logger.level = :debug -else - $logger.level = :error -end - -puts("Using Accessor#strict_set for specs") -# mokey path LogStash::Event to use strict_set in tests -# ugly, I know, but this avoids adding conditionals in performance critical section -class LogStash::Event - def []=(str, value) - if str == TIMESTAMP && !value.is_a?(LogStash::Timestamp) - raise TypeError, "The field '@timestamp' must be a LogStash::Timestamp, not a #{value.class} (#{value})" - end - @accessors.strict_set(str, value) - end # def []= -end - -RSpec.configure do |config| - config.filter_run_excluding :redis => true, :socket => true, :performance => true, :elasticsearch => true, :broken => true, :export_cypher => true -end - -module LogStash - module RSpec - def config(configstr) - let(:config) { configstr } - end # def config - - def type(default_type) - let(:default_type) { default_type } - end - - def tags(*tags) - let(:default_tags) { tags } - puts "Setting default tags: #{@default_tags}" - end - - def sample(sample_event, &block) - name = sample_event.is_a?(String) ? sample_event : LogStash::Json.dump(sample_event) - name = name[0..50] + "..." if name.length > 50 - - describe "\"#{name}\"" do - extend LogStash::RSpec - let(:pipeline) { LogStash::Pipeline.new(config) } - let(:event) do - sample_event = [sample_event] unless sample_event.is_a?(Array) - next sample_event.collect do |e| - e = { "message" => e } if e.is_a?(String) - next LogStash::Event.new(e) - end - end - - let(:results) do - results = [] - pipeline.instance_eval { @filters.each(&:register) } - - event.each do |e| - pipeline.filter(e) {|new_event| results << new_event } - end - - pipeline.flush_filters(:final => true) do |e| - results << e unless e.cancelled? - end - - results - end - - subject { results.length > 1 ? results: results.first } - - it("when processed", &block) - end - end # def sample - - def input(&block) - it "inputs" do - pipeline = LogStash::Pipeline.new(config) - queue = Queue.new - pipeline.instance_eval do - @output_func = lambda { |event| queue << event } - end - block.call(pipeline, queue) - pipeline.shutdown - end - end # def input - - def agent(&block) - require "logstash/pipeline" - - it("agent(#{caller[0].gsub(/ .*/, "")}) runs") do - pipeline = LogStash::Pipeline.new(config) - pipeline.run - block.call - end - end # def agent - - end # module RSpec -end # module LogStash - -class Shiftback - def initialize(&block) - @block = block - end - - def <<(event) - @block.call(event) - end -end # class Shiftback diff --git a/spec/util/accessors_spec.rb b/spec/util/accessors_spec.rb index e86e25aed37..ca6ea831c14 100644 --- a/spec/util/accessors_spec.rb +++ b/spec/util/accessors_spec.rb @@ -1,6 +1,6 @@ # encoding: utf-8 -require "test_utils" +require "spec_helper" require "logstash/util/accessors" describe LogStash::Util::Accessors, :if => true do diff --git a/spec/util/charset_spec.rb b/spec/util/charset_spec.rb index f741b0ce2cb..8514e1a5755 100644 --- a/spec/util/charset_spec.rb +++ b/spec/util/charset_spec.rb @@ -1,6 +1,6 @@ # encoding: utf-8 -require "test_utils" +require "spec_helper" require "logstash/util/charset" describe LogStash::Util::Charset do @@ -29,7 +29,8 @@ ["foo \xED\xB9\x81\xC3", "bar \xAD"].each do |data| insist { data.encoding.name } == "UTF-8" insist { data.valid_encoding? } == false - logger.should_receive(:warn).twice + expect(logger).to receive(:warn).exactly(2).times +#logger.should_receive(:warn).twice insist { subject.convert(data) } == data.inspect[1..-2] insist { subject.convert(data).encoding.name } == "UTF-8" end diff --git a/spec/environment.rb b/spec/util/environment_spec.rb similarity index 100% rename from spec/environment.rb rename to spec/util/environment_spec.rb diff --git a/spec/util/fieldeval_spec.rb b/spec/util/fieldeval_spec.rb index 4cf10597ba2..963ad3925f5 100644 --- a/spec/util/fieldeval_spec.rb +++ b/spec/util/fieldeval_spec.rb @@ -1,4 +1,4 @@ -require "test_utils" +require "spec_helper" require "logstash/util/fieldreference" describe LogStash::Util::FieldReference, :if => true do diff --git a/spec/jar.rb b/spec/util/jar_spec.rb similarity index 100% rename from spec/jar.rb rename to spec/util/jar_spec.rb diff --git a/spec/json.rb b/spec/util/json_spec.rb similarity index 91% rename from spec/json.rb rename to spec/util/json_spec.rb index 147b6196d38..f7bd42a08e3 100644 --- a/spec/json.rb +++ b/spec/util/json_spec.rb @@ -42,20 +42,22 @@ context "jruby serialize" do it "should respond to dump and serialize object" do - expect(JrJackson::Json).to receive(:dump).with(string).and_call_original +expect(JrJackson::Json).to receive(:dump).with(string).and_call_original expect(LogStash::Json.dump(string)).to eql(json_string) end it "should call JrJackson::Raw.generate for Hash" do - expect(JrJackson::Raw).to receive(:generate).with(hash).and_call_original +#expect(JrJackson::Raw).to receive(:generate).with(hash).and_call_original expect(LogStash::Json.dump(hash)).to eql(json_hash) end it "should call JrJackson::Raw.generate for Array" do - expect(JrJackson::Raw).to receive(:generate).with(array).and_call_original +#expect(JrJackson::Raw).to receive(:generate).with(array).and_call_original expect(LogStash::Json.dump(array)).to eql(json_array) end + end + else ### MRI specific From 17e282c3a713df2907907ffff9d311c4cf79c3da Mon Sep 17 00:00:00 2001 From: Richard Pijnenburg Date: Sun, 21 Sep 2014 13:34:43 +0000 Subject: [PATCH 66/74] Update gitignore file Fixes #1758 --- .gitignore | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.gitignore b/.gitignore index 72e92c59021..d8e74989adf 100644 --- a/.gitignore +++ b/.gitignore @@ -23,3 +23,5 @@ etc/jira-output.conf coverage/* .VERSION.mk .idea/* +spec/reports +rspec.xml From 46a6e78aea929949a1123d1f812e5b2597567148 Mon Sep 17 00:00:00 2001 From: Richard Pijnenburg Date: Sun, 21 Sep 2014 16:30:01 +0000 Subject: [PATCH 67/74] Add missing tests Fixes #1758 --- spec/examples/{fail2ban.rb => fail2ban_spec.rb} | 4 ++-- spec/examples/{graphite-input.rb => graphite-input_spec.rb} | 4 ++-- .../{mysql-slow-query.rb => mysql-slow-query_spec.rb} | 4 ++-- .../{parse-apache-logs.rb => parse-apache-logs_spec.rb} | 4 ++-- .../{parse-haproxy-logs.rb => parse-haproxy-logs_spec.rb} | 4 ++-- spec/examples/{syslog.rb => syslog_spec.rb} | 4 ++-- spec/filters/grok/{timeouts.rb => timeout1_spec.rb} | 2 +- spec/filters/grok/{timeout2.rb => timeout2_spec.rb} | 4 ++-- 8 files changed, 15 insertions(+), 15 deletions(-) rename spec/examples/{fail2ban.rb => fail2ban_spec.rb} (94%) rename spec/examples/{graphite-input.rb => graphite-input_spec.rb} (95%) rename spec/examples/{mysql-slow-query.rb => mysql-slow-query_spec.rb} (97%) rename spec/examples/{parse-apache-logs.rb => parse-apache-logs_spec.rb} (98%) rename spec/examples/{parse-haproxy-logs.rb => parse-haproxy-logs_spec.rb} (99%) rename spec/examples/{syslog.rb => syslog_spec.rb} (97%) rename spec/filters/grok/{timeouts.rb => timeout1_spec.rb} (99%) rename spec/filters/grok/{timeout2.rb => timeout2_spec.rb} (98%) diff --git a/spec/examples/fail2ban.rb b/spec/examples/fail2ban_spec.rb similarity index 94% rename from spec/examples/fail2ban.rb rename to spec/examples/fail2ban_spec.rb index edb0baa9693..ee352d2004d 100644 --- a/spec/examples/fail2ban.rb +++ b/spec/examples/fail2ban_spec.rb @@ -1,9 +1,9 @@ # encoding: utf-8 -require "test_utils" +require "spec_helper" describe "fail2ban logs", :if => RUBY_ENGINE == "jruby" do - extend LogStash::RSpec + # The logstash config goes here. # At this time, only filters are supported. diff --git a/spec/examples/graphite-input.rb b/spec/examples/graphite-input_spec.rb similarity index 95% rename from spec/examples/graphite-input.rb rename to spec/examples/graphite-input_spec.rb index b1f4e96f571..579b214d88d 100644 --- a/spec/examples/graphite-input.rb +++ b/spec/examples/graphite-input_spec.rb @@ -1,9 +1,9 @@ # encoding: utf-8 -require "test_utils" +require "spec_helper" describe "receive graphite input", :if => RUBY_ENGINE == "jruby" do - extend LogStash::RSpec + # The logstash config goes here. # At this time, only filters are supported. diff --git a/spec/examples/mysql-slow-query.rb b/spec/examples/mysql-slow-query_spec.rb similarity index 97% rename from spec/examples/mysql-slow-query.rb rename to spec/examples/mysql-slow-query_spec.rb index da019e0b616..bce57b96458 100644 --- a/spec/examples/mysql-slow-query.rb +++ b/spec/examples/mysql-slow-query_spec.rb @@ -1,8 +1,8 @@ -require "test_utils" +require "spec_helper" # Skip until we convert this to use multiline codec describe "parse mysql slow query log", :if => false do - extend LogStash::RSpec + config <<-'CONFIG' filter { diff --git a/spec/examples/parse-apache-logs.rb b/spec/examples/parse-apache-logs_spec.rb similarity index 98% rename from spec/examples/parse-apache-logs.rb rename to spec/examples/parse-apache-logs_spec.rb index 4407a95f29c..7a403f66857 100644 --- a/spec/examples/parse-apache-logs.rb +++ b/spec/examples/parse-apache-logs_spec.rb @@ -1,9 +1,9 @@ # encoding: utf-8 -require "test_utils" +require "spec_helper" describe "apache common log format", :if => RUBY_ENGINE == "jruby" do - extend LogStash::RSpec + # The logstash config goes here. # At this time, only filters are supported. diff --git a/spec/examples/parse-haproxy-logs.rb b/spec/examples/parse-haproxy-logs_spec.rb similarity index 99% rename from spec/examples/parse-haproxy-logs.rb rename to spec/examples/parse-haproxy-logs_spec.rb index 90f272c7a54..8c8abcfc044 100644 --- a/spec/examples/parse-haproxy-logs.rb +++ b/spec/examples/parse-haproxy-logs_spec.rb @@ -1,9 +1,9 @@ # encoding: utf-8 -require "test_utils" +require "spec_helper" describe "haproxy httplog format" do - extend LogStash::RSpec + # The logstash config goes here. # At this time, only filters are supported. diff --git a/spec/examples/syslog.rb b/spec/examples/syslog_spec.rb similarity index 97% rename from spec/examples/syslog.rb rename to spec/examples/syslog_spec.rb index fe617cf59df..1559a125cb2 100644 --- a/spec/examples/syslog.rb +++ b/spec/examples/syslog_spec.rb @@ -1,9 +1,9 @@ # encoding: utf-8 -require "test_utils" +require "spec_helper" describe "parse syslog", :if => RUBY_ENGINE == "jruby" do - extend LogStash::RSpec + config <<-'CONFIG' filter { diff --git a/spec/filters/grok/timeouts.rb b/spec/filters/grok/timeout1_spec.rb similarity index 99% rename from spec/filters/grok/timeouts.rb rename to spec/filters/grok/timeout1_spec.rb index 3039cb76f21..3d165856184 100644 --- a/spec/filters/grok/timeouts.rb +++ b/spec/filters/grok/timeout1_spec.rb @@ -1,4 +1,4 @@ -require "test_utils" +require "spec_helper" require "grok-pure" require "timeout" diff --git a/spec/filters/grok/timeout2.rb b/spec/filters/grok/timeout2_spec.rb similarity index 98% rename from spec/filters/grok/timeout2.rb rename to spec/filters/grok/timeout2_spec.rb index 89c3a0cc712..11332c9c269 100644 --- a/spec/filters/grok/timeout2.rb +++ b/spec/filters/grok/timeout2_spec.rb @@ -1,9 +1,9 @@ -require "test_utils" +require "spec_helper" require "grok-pure" require "timeout" describe "grok known timeout failures" do - extend LogStash::RSpec + describe "user reported timeout" do config <<-'CONFIG' From 58f6bad61fca08b2d9b62d1feabf6db7696bc187 Mon Sep 17 00:00:00 2001 From: Richard Pijnenburg Date: Sun, 21 Sep 2014 16:35:19 +0000 Subject: [PATCH 68/74] An other missing one Fixes #1758 --- spec/filters/{filter_chains.rb => filter_chains_spec.rb} | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) rename spec/filters/{filter_chains.rb => filter_chains_spec.rb} (98%) diff --git a/spec/filters/filter_chains.rb b/spec/filters/filter_chains_spec.rb similarity index 98% rename from spec/filters/filter_chains.rb rename to spec/filters/filter_chains_spec.rb index edff7979bc6..7c956de50ce 100644 --- a/spec/filters/filter_chains.rb +++ b/spec/filters/filter_chains_spec.rb @@ -1,11 +1,11 @@ # encoding: utf-8 -require "test_utils" +require "spec_helper" require "logstash/filters/split" require "logstash/filters/clone" describe LogStash::Filters do - extend LogStash::RSpec + describe "chain split with mutate filter" do config <<-CONFIG From ef233087015bf54188b0b56790196072a10b65f1 Mon Sep 17 00:00:00 2001 From: Richard Pijnenburg Date: Sun, 21 Sep 2014 18:25:12 +0000 Subject: [PATCH 69/74] Fix performance tests - date speed test only run when performance is enabled Fixes #1758 --- spec/performance/date.rb | 8 ++++---- spec/performance/speed.rb | 4 ++-- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/spec/performance/date.rb b/spec/performance/date.rb index 0c30b59dd84..47d3b0402c5 100644 --- a/spec/performance/date.rb +++ b/spec/performance/date.rb @@ -1,11 +1,11 @@ -require "test_utils" +require "spec_helper" require "logstash/filters/date" puts "Skipping date tests because this ruby is not jruby" if RUBY_ENGINE != "jruby" -describe LogStash::Filters::Date, :if => RUBY_ENGINE == "jruby" do - extend LogStash::RSpec +describe LogStash::Filters::Date, :if => RUBY_ENGINE == "jruby", :performance => true do + - describe "speed test of date parsing", :performance => true do + describe "speed test of date parsing" do it "should be fast" do event_count = 100000 min_rate = 4000 diff --git a/spec/performance/speed.rb b/spec/performance/speed.rb index d8f0a9e5e18..d2f66dd8da8 100644 --- a/spec/performance/speed.rb +++ b/spec/performance/speed.rb @@ -1,7 +1,7 @@ -require "test_utils" +require "spec_helper" describe "speed tests", :performance => true do - extend LogStash::RSpec + count = 1000000 config <<-CONFIG From 42fa9eab3b10193f757f64e24093694c612ecf90 Mon Sep 17 00:00:00 2001 From: Richard Pijnenburg Date: Mon, 22 Sep 2014 10:38:40 +0000 Subject: [PATCH 70/74] [TESTING] Fix tcp input spec test Fixes #1758 --- spec/inputs/tcp_spec.rb | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/spec/inputs/tcp_spec.rb b/spec/inputs/tcp_spec.rb index 48004064b19..bbec11f010d 100644 --- a/spec/inputs/tcp_spec.rb +++ b/spec/inputs/tcp_spec.rb @@ -4,6 +4,7 @@ require "timeout" require "logstash/json" require "logstash/inputs/tcp" +require 'stud/try' describe LogStash::Inputs::Tcp do @@ -40,7 +41,7 @@ Thread.new { pipeline.run } sleep 0.1 while !pipeline.ready? - socket = Stud.try(5.times) { TCPSocket.new("127.0.0.1", port) } + socket = Stud::try(5.times) { TCPSocket.new("127.0.0.1", port) } event_count.times do |i| # unicode smiley for testing unicode support! socket.puts("#{i} ☹") @@ -57,7 +58,7 @@ end # input end - describe "read events with plain codec and ISO-8859-1 charset", :socket => true do + describe "read events with plain codec and ISO-8859-1 charset" do port = 5513 charset = "ISO-8859-1" config <<-CONFIG @@ -73,7 +74,7 @@ Thread.new { pipeline.run } sleep 0.1 while !pipeline.ready? - socket = Stud.try(5.times) { TCPSocket.new("127.0.0.1", port) } + socket = Stud::try(5.times) { TCPSocket.new("127.0.0.1", port) } text = "\xA3" # the £ symbol in ISO-8859-1 aka Latin-1 text.force_encoding("ISO-8859-1") socket.puts(text) @@ -92,7 +93,7 @@ end # input end - describe "read events with json codec", :socket => true do + describe "read events with json codec" do port = 5514 config <<-CONFIG input { @@ -114,7 +115,7 @@ "host" => "example host" } - socket = Stud.try(5.times) { TCPSocket.new("127.0.0.1", port) } + socket = Stud::try(5.times) { TCPSocket.new("127.0.0.1", port) } socket.puts(LogStash::Json.dump(data)) socket.close @@ -132,7 +133,7 @@ end # input end - describe "read events with json codec (testing 'host' handling)", :socket => true do + describe "read events with json codec (testing 'host' handling)" do port = 5514 config <<-CONFIG input { @@ -151,7 +152,7 @@ "hello" => "world" } - socket = Stud.try(5.times) { TCPSocket.new("127.0.0.1", port) } + socket = Stud::try(5.times) { TCPSocket.new("127.0.0.1", port) } socket.puts(LogStash::Json.dump(data)) socket.close @@ -164,7 +165,7 @@ end # input end - describe "read events with json_lines codec", :socket => true do + describe "read events with json_lines codec" do port = 5515 config <<-CONFIG input { @@ -186,7 +187,7 @@ "idx" => 0 } - socket = Stud.try(5.times) { TCPSocket.new("127.0.0.1", port) } + socket = Stud::try(5.times) { TCPSocket.new("127.0.0.1", port) } (1..5).each do |idx| data["idx"] = idx socket.puts(LogStash::Json.dump(data) + "\n") @@ -219,7 +220,7 @@ sleep 0.1 while !pipeline.ready? event_count.times do |i| - socket = Stud.try(5.times) { TCPSocket.new("127.0.0.1", port) } + socket = Stud::try(5.times) { TCPSocket.new("127.0.0.1", port) } socket.puts("#{i}") socket.flush socket.close @@ -256,7 +257,7 @@ insist { inputs.size } == 1 sockets = event_count.times.map do |i| - socket = Stud.try(5.times) { TCPSocket.new("127.0.0.1", port) } + socket = Stud::try(5.times) { TCPSocket.new("127.0.0.1", port) } socket.puts("#{i}") socket.flush socket From 6daeb2268501bc2556749ffd7dd032cc8fafd1d1 Mon Sep 17 00:00:00 2001 From: Richard Pijnenburg Date: Mon, 22 Sep 2014 11:46:13 +0000 Subject: [PATCH 71/74] [TESTING] Fix Kafka output test When doing randomized testing, one of the tests failed because kafka was not initialized yet Fixes #1758 --- spec/outputs/kafka_spec.rb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/spec/outputs/kafka_spec.rb b/spec/outputs/kafka_spec.rb index 0a87b974c54..28c36028500 100644 --- a/spec/outputs/kafka_spec.rb +++ b/spec/outputs/kafka_spec.rb @@ -26,13 +26,13 @@ end it "should send logstash event to kafka broker" do + kafka = LogStash::Outputs::Kafka.new(kafka_config) + kafka.register timestamp = LogStash::Timestamp.now expect_any_instance_of(Kafka::Producer) .to receive(:sendMsg) .with("test", nil, "{\"message\":\"hello world\",\"host\":\"test\",\"@timestamp\":\"#{timestamp}\",\"@version\":\"1\"}") e = LogStash::Event.new({"message" => "hello world", "host" => "test", "@timestamp" => timestamp}) - kafka = LogStash::Outputs::Kafka.new(kafka_config) - kafka.register kafka.receive(e) end From 79c4802e45656ec3f2aaebf18aa39807c32e5ed2 Mon Sep 17 00:00:00 2001 From: Richard Pijnenburg Date: Mon, 22 Sep 2014 19:20:19 +0000 Subject: [PATCH 72/74] Minor fixes after review Fixes #1758 --- spec/core/conditionals_spec.rb | 1 - spec/logstash_helpers.rb | 2 +- spec/util/json_spec.rb | 6 +++--- 3 files changed, 4 insertions(+), 5 deletions(-) diff --git a/spec/core/conditionals_spec.rb b/spec/core/conditionals_spec.rb index 0c7fceb8201..2bb46b2825f 100644 --- a/spec/core/conditionals_spec.rb +++ b/spec/core/conditionals_spec.rb @@ -22,7 +22,6 @@ def conditional(expression, &block) end describe "conditionals in output" do - extend LogStash::RSpec extend ConditionalFanciness describe "simple" do diff --git a/spec/logstash_helpers.rb b/spec/logstash_helpers.rb index 40e6af09c50..0439661e059 100644 --- a/spec/logstash_helpers.rb +++ b/spec/logstash_helpers.rb @@ -14,7 +14,7 @@ def type(default_type) def tags(*tags) let(:default_tags) { tags } - puts "Setting default tags: #{@default_tags}" + puts "Setting default tags: #{tags}" end def sample(sample_event, &block) diff --git a/spec/util/json_spec.rb b/spec/util/json_spec.rb index f7bd42a08e3..a745f91a1e8 100644 --- a/spec/util/json_spec.rb +++ b/spec/util/json_spec.rb @@ -42,17 +42,17 @@ context "jruby serialize" do it "should respond to dump and serialize object" do -expect(JrJackson::Json).to receive(:dump).with(string).and_call_original + expect(JrJackson::Json).to receive(:dump).with(string).and_call_original expect(LogStash::Json.dump(string)).to eql(json_string) end it "should call JrJackson::Raw.generate for Hash" do -#expect(JrJackson::Raw).to receive(:generate).with(hash).and_call_original + expect(JrJackson::Raw).to receive(:generate).with(hash).and_call_original expect(LogStash::Json.dump(hash)).to eql(json_hash) end it "should call JrJackson::Raw.generate for Array" do -#expect(JrJackson::Raw).to receive(:generate).with(array).and_call_original + expect(JrJackson::Raw).to receive(:generate).with(array).and_call_original expect(LogStash::Json.dump(array)).to eql(json_array) end From cf2242170011fbf2d264ae87660192754697671a Mon Sep 17 00:00:00 2001 From: Richard Pijnenburg Date: Tue, 30 Sep 2014 10:00:34 +0000 Subject: [PATCH 73/74] [TESTING] Convert ruby filter spec after rebase Fixes #1758 --- spec/filters/{ruby.rb => ruby_spec.rb} | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) rename spec/filters/{ruby.rb => ruby_spec.rb} (97%) diff --git a/spec/filters/ruby.rb b/spec/filters/ruby_spec.rb similarity index 97% rename from spec/filters/ruby.rb rename to spec/filters/ruby_spec.rb index 6bb4b7b7c74..b1c83f19e0a 100644 --- a/spec/filters/ruby.rb +++ b/spec/filters/ruby_spec.rb @@ -1,9 +1,8 @@ -require "test_utils" +require "spec_helper" require "logstash/filters/ruby" require "logstash/filters/date" describe LogStash::Filters::Ruby do - extend LogStash::RSpec describe "generate pretty json on event.to_hash" do # this obviously tests the Ruby filter but also makes sure From 284ec9f0493430e393567f61aa008a7c28b75f0f Mon Sep 17 00:00:00 2001 From: Suyog Rao Date: Mon, 29 Sep 2014 13:01:05 -0700 Subject: [PATCH 74/74] remove kafka source since they moved to separate plugin Closes #1801 --- lib/logstash/inputs/kafka.rb | 144 ------------------------------- lib/logstash/outputs/kafka.rb | 158 ---------------------------------- spec/inputs/kafka_spec.rb | 57 ------------ spec/outputs/kafka_spec.rb | 39 --------- 4 files changed, 398 deletions(-) delete mode 100644 lib/logstash/inputs/kafka.rb delete mode 100644 lib/logstash/outputs/kafka.rb delete mode 100644 spec/inputs/kafka_spec.rb delete mode 100644 spec/outputs/kafka_spec.rb diff --git a/lib/logstash/inputs/kafka.rb b/lib/logstash/inputs/kafka.rb deleted file mode 100644 index 14b9e1b960c..00000000000 --- a/lib/logstash/inputs/kafka.rb +++ /dev/null @@ -1,144 +0,0 @@ -require 'logstash/namespace' -require 'logstash/inputs/base' - -# This input will read events from a Kafka topic. It uses the high level consumer API provided -# by Kafka to read messages from the broker. It also maintains the state of what has been -# consumed using Zookeeper. The default input codec is json -# -# The only required configuration is the topic name. By default it will connect to a Zookeeper -# running on localhost. All the broker information is read from Zookeeper state -# -# Ideally you should have as many threads as the number of partitions for a perfect balance -- -# more threads than partitions means that some threads will be idle -# -# For more information see http://kafka.apache.org/documentation.html#theconsumer -# -# Kafka consumer configuration: http://kafka.apache.org/documentation.html#consumerconfigs -# -class LogStash::Inputs::Kafka < LogStash::Inputs::Base - config_name 'kafka' - milestone 1 - - default :codec, 'json' - - # Specifies the ZooKeeper connection string in the form hostname:port where host and port are - # the host and port of a ZooKeeper server. You can also specify multiple hosts in the form - # hostname1:port1,hostname2:port2,hostname3:port3. - config :zk_connect, :validate => :string, :default => 'localhost:2181' - # A string that uniquely identifies the group of consumer processes to which this consumer - # belongs. By setting the same group id multiple processes indicate that they are all part of - # the same consumer group. - config :group_id, :validate => :string, :default => 'logstash' - # The topic to consume messages from - config :topic_id, :validate => :string, :required => true - # Specify whether to jump to beginning of the queue when there is no initial offset in - # ZooKeeper, or if an offset is out of range. If this is false, messages are consumed - # from the latest offset - config :reset_beginning, :validate => :boolean, :default => false - # Number of threads to read from the partitions. Ideally you should have as many threads as the - # number of partitions for a perfect balance. More threads than partitions means that some - # threads will be idle. Less threads means a single thread could be consuming from more than - # one partition - config :consumer_threads, :validate => :number, :default => 1 - # Internal Logstash queue size used to hold events in memory after it has been read from Kafka - config :queue_size, :validate => :number, :default => 20 - # When a new consumer joins a consumer group the set of consumers attempt to "rebalance" the - # load to assign partitions to each consumer. If the set of consumers changes while this - # assignment is taking place the rebalance will fail and retry. This setting controls the - # maximum number of attempts before giving up. - config :rebalance_max_retries, :validate => :number, :default => 4 - # Backoff time between retries during rebalance. - config :rebalance_backoff_ms, :validate => :number, :default => 2000 - # Throw a timeout exception to the consumer if no message is available for consumption after - # the specified interval - config :consumer_timeout_ms, :validate => :number, :default => -1 - # Option to restart the consumer loop on error - config :consumer_restart_on_error, :validate => :boolean, :default => true - # Time in millis to wait for consumer to restart after an error - config :consumer_restart_sleep_ms, :validate => :number, :default => 0 - # Option to add Kafka metadata like topic, message size to the event - config :decorate_events, :validate => :boolean, :default => false - # A unique id for the consumer; generated automatically if not set. - config :consumer_id, :validate => :string, :default => nil - # The number of byes of messages to attempt to fetch for each topic-partition in each fetch - # request. These bytes will be read into memory for each partition, so this helps control - # the memory used by the consumer. The fetch request size must be at least as large as the - # maximum message size the server allows or else it is possible for the producer to send - # messages larger than the consumer can fetch. - config :fetch_message_max_bytes, :validate => :number, :default => 1048576 - - public - def register - jarpath = File.join(File.dirname(__FILE__), "../../../vendor/jar/kafka*/libs/*.jar") - Dir[jarpath].each do |jar| - require jar - end - require 'jruby-kafka' - options = { - :zk_connect => @zk_connect, - :group_id => @group_id, - :topic_id => @topic_id, - :rebalance_max_retries => @rebalance_max_retries, - :rebalance_backoff_ms => @rebalance_backoff_ms, - :consumer_timeout_ms => @consumer_timeout_ms, - :consumer_restart_on_error => @consumer_restart_on_error, - :consumer_restart_sleep_ms => @consumer_restart_sleep_ms, - :consumer_id => @consumer_id, - :fetch_message_max_bytes => @fetch_message_max_bytes - } - if @reset_beginning == true - options[:reset_beginning] = 'from-beginning' - end # if :reset_beginning - @kafka_client_queue = SizedQueue.new(@queue_size) - @consumer_group = Kafka::Group.new(options) - @logger.info('Registering kafka', :group_id => @group_id, :topic_id => @topic_id, :zk_connect => @zk_connect) - end # def register - - public - def run(logstash_queue) - java_import 'kafka.common.ConsumerRebalanceFailedException' - @logger.info('Running kafka', :group_id => @group_id, :topic_id => @topic_id, :zk_connect => @zk_connect) - begin - @consumer_group.run(@consumer_threads,@kafka_client_queue) - begin - while true - event = @kafka_client_queue.pop - queue_event("#{event}",logstash_queue) - end - rescue LogStash::ShutdownSignal - @logger.info('Kafka got shutdown signal') - @consumer_group.shutdown() - end - until @kafka_client_queue.empty? - queue_event("#{@kafka_client_queue.pop}",logstash_queue) - end - @logger.info('Done running kafka input') - rescue => e - @logger.warn('kafka client threw exception, restarting', - :exception => e) - if @consumer_group.running? - @consumer_group.shutdown() - end - sleep(Float(@consumer_restart_sleep_ms) * 1 / 1000) - retry - end - finished - end # def run - - private - def queue_event(msg, output_queue) - begin - @codec.decode(msg) do |event| - decorate(event) - if @decorate_events - event['kafka'] = {'msg_size' => msg.bytesize, 'topic' => @topic_id, 'consumer_group' => @group_id} - end - output_queue << event - end # @codec.decode - rescue => e # parse or event creation error - @logger.error("Failed to create event", :message => msg, :exception => e, - :backtrace => e.backtrace); - end # begin - end # def queue_event - -end #class LogStash::Inputs::Kafka diff --git a/lib/logstash/outputs/kafka.rb b/lib/logstash/outputs/kafka.rb deleted file mode 100644 index ae050c9280b..00000000000 --- a/lib/logstash/outputs/kafka.rb +++ /dev/null @@ -1,158 +0,0 @@ -require 'logstash/namespace' -require 'logstash/outputs/base' - -# Write events to a Kafka topic. This uses the Kafka Producer API to write messages to a topic on -# the broker. -# -# The only required configuration is the topic name. The default codec is json, -# so events will be persisted on the broker in json format. If you select a codec of plain, -# Logstash will encode your messages with not only the message but also with a timestamp and -# hostname. If you do not want anything but your message passing through, you should make the output -# configuration something like: -# output { -# kafka { -# codec => plain { -# format => "%{message}" -# } -# } -# } -# For more information see http://kafka.apache.org/documentation.html#theproducer -# -# Kafka producer configuration: http://kafka.apache.org/documentation.html#producerconfigs -class LogStash::Outputs::Kafka < LogStash::Outputs::Base - config_name 'kafka' - milestone 1 - - default :codec, 'json' - # This is for bootstrapping and the producer will only use it for getting metadata (topics, - # partitions and replicas). The socket connections for sending the actual data will be - # established based on the broker information returned in the metadata. The format is - # host1:port1,host2:port2, and the list can be a subset of brokers or a VIP pointing to a - # subset of brokers. - config :broker_list, :validate => :string, :default => 'localhost:9092' - # The topic to produce the messages to - config :topic_id, :validate => :string, :required => true - # This parameter allows you to specify the compression codec for all data generated by this - # producer. Valid values are "none", "gzip" and "snappy". - config :compression_codec, :validate => %w( none gzip snappy ), :default => 'none' - # This parameter allows you to set whether compression should be turned on for particular - # topics. If the compression codec is anything other than NoCompressionCodec, - # enable compression only for specified topics if any. If the list of compressed topics is - # empty, then enable the specified compression codec for all topics. If the compression codec - # is NoCompressionCodec, compression is disabled for all topics - config :compressed_topics, :validate => :string, :default => '' - # This value controls when a produce request is considered completed. Specifically, - # how many other brokers must have committed the data to their log and acknowledged this to the - # leader. For more info, see -- http://kafka.apache.org/documentation.html#producerconfigs - config :request_required_acks, :validate => [-1,0,1], :default => 0 - # The serializer class for messages. The default encoder takes a byte[] and returns the same byte[] - config :serializer_class, :validate => :string, :default => 'kafka.serializer.StringEncoder' - # The partitioner class for partitioning messages amongst partitions in the topic. The default - # partitioner is based on the hash of the key. If the key is null, - # the message is sent to a random partition in the broker. - # NOTE: topic_metadata_refresh_interval_ms controls how long the producer will distribute to a - # partition in the topic. This defaults to 10 mins, so the producer will continue to write to a - # single partition for 10 mins before it switches - config :partitioner_class, :validate => :string, :default => 'kafka.producer.DefaultPartitioner' - # The amount of time the broker will wait trying to meet the request.required.acks requirement - # before sending back an error to the client. - config :request_timeout_ms, :validate => :number, :default => 10000 - # This parameter specifies whether the messages are sent asynchronously in a background thread. - # Valid values are (1) async for asynchronous send and (2) sync for synchronous send. By - # setting the producer to async we allow batching together of requests (which is great for - # throughput) but open the possibility of a failure of the client machine dropping unsent data. - config :producer_type, :validate => %w( sync async ), :default => 'sync' - # The serializer class for keys (defaults to the same as for messages if nothing is given) - config :key_serializer_class, :validate => :string, :default => nil - # This property will cause the producer to automatically retry a failed send request. This - # property specifies the number of retries when such failures occur. Note that setting a - # non-zero value here can lead to duplicates in the case of network errors that cause a message - # to be sent but the acknowledgement to be lost. - config :message_send_max_retries, :validate => :number, :default => 3 - # Before each retry, the producer refreshes the metadata of relevant topics to see if a new - # leader has been elected. Since leader election takes a bit of time, - # this property specifies the amount of time that the producer waits before refreshing the - # metadata. - config :retry_backoff_ms, :validate => :number, :default => 100 - # The producer generally refreshes the topic metadata from brokers when there is a failure - # (partition missing, leader not available...). It will also poll regularly (default: every - # 10min so 600000ms). If you set this to a negative value, metadata will only get refreshed on - # failure. If you set this to zero, the metadata will get refreshed after each message sent - # (not recommended). Important note: the refresh happen only AFTER the message is sent, - # so if the producer never sends a message the metadata is never refreshed - config :topic_metadata_refresh_interval_ms, :validate => :number, :default => 600 * 1000 - # Maximum time to buffer data when using async mode. For example a setting of 100 will try to - # batch together 100ms of messages to send at once. This will improve throughput but adds - # message delivery latency due to the buffering. - config :queue_buffering_max_ms, :validate => :number, :default => 5000 - # The maximum number of unsent messages that can be queued up the producer when using async - # mode before either the producer must be blocked or data must be dropped. - config :queue_buffering_max_messages, :validate => :number, :default => 10000 - # The amount of time to block before dropping messages when running in async mode and the - # buffer has reached queue.buffering.max.messages. If set to 0 events will be enqueued - # immediately or dropped if the queue is full (the producer send call will never block). If set - # to -1 the producer will block indefinitely and never willingly drop a send. - config :queue_enqueue_timeout_ms, :validate => :number, :default => -1 - # The number of messages to send in one batch when using async mode. The producer will wait - # until either this number of messages are ready to send or queue.buffer.max.ms is reached. - config :batch_num_messages, :validate => :number, :default => 200 - # Socket write buffer size - config :send_buffer_bytes, :validate => :number, :default => 100 * 1024 - # The client id is a user-specified string sent in each request to help trace calls. It should - # logically identify the application making the request. - config :client_id, :validate => :string, :default => "" - - public - def register - jarpath = File.join(File.dirname(__FILE__), "../../../vendor/jar/kafka*/libs/*.jar") - Dir[jarpath].each do |jar| - require jar - end - require 'jruby-kafka' - options = { - :broker_list => @broker_list, - :compression_codec => @compression_codec, - :compressed_topics => @compressed_topics, - :request_required_acks => @request_required_acks, - :serializer_class => @serializer_class, - :partitioner_class => @partitioner_class, - :request_timeout_ms => @request_timeout_ms, - :producer_type => @producer_type, - :key_serializer_class => @key_serializer_class, - :message_send_max_retries => @message_send_max_retries, - :retry_backoff_ms => @retry_backoff_ms, - :topic_metadata_refresh_interval_ms => @topic_metadata_refresh_interval_ms, - :queue_buffering_max_ms => @queue_buffering_max_ms, - :queue_buffering_max_messages => @queue_buffering_max_messages, - :queue_enqueue_timeout_ms => @queue_enqueue_timeout_ms, - :batch_num_messages => @batch_num_messages, - :send_buffer_bytes => @send_buffer_bytes, - :client_id => @client_id - } - @producer = Kafka::Producer.new(options) - @producer.connect() - - @logger.info('Registering kafka producer', :topic_id => @topic_id, :broker_list => @broker_list) - - @codec.on_event do |event| - begin - @producer.sendMsg(@topic_id,nil,event) - rescue LogStash::ShutdownSignal - @logger.info('Kafka producer got shutdown signal') - rescue => e - @logger.warn('kafka producer threw exception, restarting', - :exception => e) - end - end - end # def register - - def receive(event) - return unless output?(event) - if event == LogStash::SHUTDOWN - finished - return - end - @codec.encode(event) - end - -end #class LogStash::Outputs::Kafka diff --git a/spec/inputs/kafka_spec.rb b/spec/inputs/kafka_spec.rb deleted file mode 100644 index 7aba00ccfbd..00000000000 --- a/spec/inputs/kafka_spec.rb +++ /dev/null @@ -1,57 +0,0 @@ -# encoding: utf-8 - -require 'rspec' -require 'insist' -require 'logstash/namespace' -require 'logstash/inputs/kafka' -require 'logstash/errors' - -describe LogStash::Inputs::Kafka do - - - let (:kafka_config) {{"topic_id" => "test"}} - - it 'should populate kafka config with default values' do - kafka = LogStash::Inputs::Kafka.new(kafka_config) - insist {kafka.zk_connect} == "localhost:2181" - insist {kafka.topic_id} == "test" - insist {kafka.group_id} == "logstash" - insist {kafka.reset_beginning} == false - end - - it "should register and load kafka jars without errors" do - kafka = LogStash::Inputs::Kafka.new(kafka_config) - kafka.register - end - - it "should retrieve event from kafka" do - # Extend class to control behavior - class LogStash::Inputs::TestKafka < LogStash::Inputs::Kafka - milestone 1 - private - def queue_event(msg, output_queue) - super(msg, output_queue) - # need to raise exception here to stop the infinite loop - raise LogStash::ShutdownSignal - end - end - - kafka = LogStash::Inputs::TestKafka.new(kafka_config) - kafka.register - - class Kafka::Group - public - def run(a_numThreads, a_queue) - a_queue << "Kafka message" - end - end - - logstash_queue = Queue.new - kafka.run logstash_queue - e = logstash_queue.pop - insist { e["message"] } == "Kafka message" - # no metadata by default - insist { e["kafka"] } == nil - end - -end diff --git a/spec/outputs/kafka_spec.rb b/spec/outputs/kafka_spec.rb deleted file mode 100644 index 28c36028500..00000000000 --- a/spec/outputs/kafka_spec.rb +++ /dev/null @@ -1,39 +0,0 @@ -# encoding: utf-8 - -require 'rspec' -require 'insist' -require 'logstash/namespace' -require "logstash/timestamp" -require 'logstash/outputs/kafka' - -describe LogStash::Outputs::Kafka do - - let (:kafka_config) {{"topic_id" => "test"}} - - it 'should populate kafka config with default values' do - kafka = LogStash::Outputs::Kafka.new(kafka_config) - insist {kafka.broker_list} == "localhost:9092" - insist {kafka.topic_id} == "test" - insist {kafka.compression_codec} == "none" - insist {kafka.serializer_class} == "kafka.serializer.StringEncoder" - insist {kafka.partitioner_class} == "kafka.producer.DefaultPartitioner" - insist {kafka.producer_type} == "sync" - end - - it "should register and load kafka jars without errors" do - kafka = LogStash::Outputs::Kafka.new(kafka_config) - kafka.register - end - - it "should send logstash event to kafka broker" do - kafka = LogStash::Outputs::Kafka.new(kafka_config) - kafka.register - timestamp = LogStash::Timestamp.now - expect_any_instance_of(Kafka::Producer) - .to receive(:sendMsg) - .with("test", nil, "{\"message\":\"hello world\",\"host\":\"test\",\"@timestamp\":\"#{timestamp}\",\"@version\":\"1\"}") - e = LogStash::Event.new({"message" => "hello world", "host" => "test", "@timestamp" => timestamp}) - kafka.receive(e) - end - -end