fluent · repeatedly · Jan 6, 2020 · Dec 12, 2019 · Dec 17, 2019 · ganmacs
diff --git a/lib/fluent/plugin/buffer.rb b/lib/fluent/plugin/buffer.rb
@@ -270,10 +270,10 @@ def write(metadata_and_data, format: nil, size: nil, enqueue: false)
 
         log.on_trace { log.trace "writing events into buffer", instance: self.object_id, metadata_size: metadata_and_data.size }
 
-        staged_bytesize = 0
         operated_chunks = []
         unstaged_chunks = {} # metadata => [chunk, chunk, ...]
         chunks_to_enqueue = []
+        staged_bytesizes_by_chunk = {}
 
         begin
           # sort metadata to get lock of chunks in same order with other threads
@@ -283,7 +283,13 @@ def write(metadata_and_data, format: nil, size: nil, enqueue: false)
               chunk.mon_enter # add lock to prevent to be committed/rollbacked from other threads
               operated_chunks << chunk
               if chunk.staged?
-                staged_bytesize += adding_bytesize
+                #
+                # https://github.com/fluent/fluentd/issues/2712
+                # write_once is supposed to write to a chunk only once
+                # but this block **may** run multiple times from write_step_by_step and previous write may be rollbacked
+                # So we should be counting the stage_size only for the last successful write
+                #
+                staged_bytesizes_by_chunk[chunk] = adding_bytesize
 chunk = get_next_chunk.call 
 synchronize{ @stage[metadata] ||= generate_chunk(metadata).staged! } 
 chunk.synchronize do 
 chunk = get_next_chunk.call 
 synchronize{ @stage[metadata] ||= generate_chunk(metadata).staged! } 
 chunk.synchronize do 
               elsif chunk.unstaged?
                 unstaged_chunks[metadata] ||= []
                 unstaged_chunks[metadata] << chunk
@@ -330,27 +336,37 @@ def write(metadata_and_data, format: nil, size: nil, enqueue: false)
 
           # All locks about chunks are released.
 
-          synchronize do
-            # At here, staged chunks may be enqueued by other threads.
-            @stage_size += staged_bytesize
-
-            chunks_to_enqueue.each do |c|
-              if c.staged? && (enqueue || chunk_size_full?(c))
-                m = c.metadata
-                enqueue_chunk(m)
-                if unstaged_chunks[m]
-                  u = unstaged_chunks[m].pop
+          #
+          # Now update the stage, stage_size with proper locking
+          # FIX FOR stage_size miscomputation - https://github.com/fluent/fluentd/issues/2712
+          #
+          staged_bytesizes_by_chunk.each do |chunk, bytesize|
+            chunk.synchronize do
+              synchronize { @stage_size += bytesize }
+              log.on_trace { log.trace { "chunk #{chunk.path} size_added: #{bytesize} new_size: #{chunk.bytesize}" } }
+            end
+          end
+
+          chunks_to_enqueue.each do |c|
+            if c.staged? && (enqueue || chunk_size_full?(c))
+              m = c.metadata
+              enqueue_chunk(m)
+              if unstaged_chunks[m]
+                u = unstaged_chunks[m].pop
+                u.synchronize do
                   if u.unstaged? && !chunk_size_full?(u)
-                    @stage[m] = u.staged!
-                    @stage_size += u.bytesize
+                    synchronize {
+                      @stage[m] = u.staged!
+                      @stage_size += u.bytesize
+                    }
                   end
                 end
-              elsif c.unstaged?
-                enqueue_unstaged_chunk(c)
-              else
-                # previously staged chunk is already enqueued, closed or purged.
-                # no problem.
               end
+            elsif c.unstaged?
+              enqueue_unstaged_chunk(c)
+            else
+              # previously staged chunk is already enqueued, closed or purged.
+              # no problem.
             end
           end