From 8136d386bb3e54f684a68e591831164bc934886d Mon Sep 17 00:00:00 2001 From: Hiroshi Hatake Date: Tue, 4 Aug 2020 17:05:37 +0900 Subject: [PATCH 1/2] buffer: Enable optimization for metadata comparison in all platform Signed-off-by: Hiroshi Hatake Co-authored-by: Ashie Takuro --- lib/fluent/plugin/buffer.rb | 25 ++++--------------------- 1 file changed, 4 insertions(+), 21 deletions(-) diff --git a/lib/fluent/plugin/buffer.rb b/lib/fluent/plugin/buffer.rb index 5b9380aef8..5382ebd346 100644 --- a/lib/fluent/plugin/buffer.rb +++ b/lib/fluent/plugin/buffer.rb @@ -143,33 +143,16 @@ def <=>(o) end end - # timekey should be unixtime as usual. - # So, unixtime should be bigger than 2^30 - 1 (= 1073741823) nowadays. - # We should check object_id stability to use object_id as optimization for comparing operations. - # e.g.) - # irb> Time.parse("2020/07/31 18:30:00+09:00").to_i - # => 1596187800 - # irb> Time.parse("2020/07/31 18:30:00+09:00").to_i > 2**30 -1 - # => true - def self.enable_optimize? - a1 = 2**30 - 1 - a2 = 2**30 - 1 - b1 = 2**62 - 1 - b2 = 2**62 - 1 - (a1.object_id == a2.object_id) && (b1.object_id == b2.object_id) - end - # This is an optimization code. Current Struct's implementation is comparing all data. # https://github.com/ruby/ruby/blob/0623e2b7cc621b1733a760b72af246b06c30cf96/struct.c#L1200-L1203 # Actually this overhead is very small but this class is generated *per chunk* (and used in hash object). # This means that this class is one of the most called object in Fluentd. # See https://github.com/fluent/fluentd/pull/2560 - # But, this optimization has a side effect on Windows and 32bit environment(s) due to differing object_id. - # This difference causes flood of buffer files. - # So, this optimization should be enabled on `enable_optimize?` as true platforms. + # Note that Integer#hash returns stable value. + # So, we can use this feature to enable this optimization in **all platform**. def hash - timekey.object_id - end if enable_optimize? + timekey.hash + end end # for tests From f78c8738668de0ebaac72545a2e427a8287f98e4 Mon Sep 17 00:00:00 2001 From: Hiroshi Hatake Date: Wed, 5 Aug 2020 09:47:15 +0900 Subject: [PATCH 2/2] Remove additional comments These line should be needless to say what the below code does. Signed-off-by: Hiroshi Hatake --- lib/fluent/plugin/buffer.rb | 2 -- 1 file changed, 2 deletions(-) diff --git a/lib/fluent/plugin/buffer.rb b/lib/fluent/plugin/buffer.rb index 5382ebd346..488a47973e 100644 --- a/lib/fluent/plugin/buffer.rb +++ b/lib/fluent/plugin/buffer.rb @@ -148,8 +148,6 @@ def <=>(o) # Actually this overhead is very small but this class is generated *per chunk* (and used in hash object). # This means that this class is one of the most called object in Fluentd. # See https://github.com/fluent/fluentd/pull/2560 - # Note that Integer#hash returns stable value. - # So, we can use this feature to enable this optimization in **all platform**. def hash timekey.hash end