Skip to content

Commit 3613b32

Browse files
authored
Merge pull request #1356 from fluent/introduce-socket-plugin-helper
Introduce socket plugin helper
2 parents 66fb153 + c627ef5 commit 3613b32

File tree

11 files changed

+564
-382
lines changed

11 files changed

+564
-382
lines changed

lib/fluent/plugin/out_forward.rb

+195-173
Large diffs are not rendered by default.

lib/fluent/plugin/output.rb

+71-46
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,8 @@ class Output < Base
4040

4141
CHUNKING_FIELD_WARN_NUM = 4
4242

43+
PROCESS_CLOCK_ID = Process::CLOCK_MONOTONIC_RAW rescue Process::CLOCK_MONOTONIC
44+
4345
config_param :time_as_integer, :bool, default: false
4446

4547
# `<buffer>` and `<secondary>` sections are available only when '#format' and '#write' are implemented
@@ -138,7 +140,7 @@ def prefer_delayed_commit
138140
end
139141

140142
# Internal states
141-
FlushThreadState = Struct.new(:thread, :next_time)
143+
FlushThreadState = Struct.new(:thread, :next_clock)
142144
DequeuedChunkInfo = Struct.new(:chunk_id, :time, :timeout) do
143145
def expired?
144146
time + timeout < Time.now
@@ -898,9 +900,9 @@ def commit_write(chunk_id, delayed: @delayed_commit, secondary: false)
898900
@retry_mutex.synchronize do
899901
if @retry # success to flush chunks in retries
900902
if secondary
901-
log.warn "retry succeeded by secondary.", plugin_id: plugin_id, chunk_id: dump_unique_id_hex(chunk_id)
903+
log.warn "retry succeeded by secondary.", chunk_id: dump_unique_id_hex(chunk_id)
902904
else
903-
log.warn "retry succeeded.", plugin_id: plugin_id, chunk_id: dump_unique_id_hex(chunk_id)
905+
log.warn "retry succeeded.", chunk_id: dump_unique_id_hex(chunk_id)
904906
end
905907
@retry = nil
906908
end
@@ -918,6 +920,8 @@ def rollback_write(chunk_id)
918920
# in many cases, false can be just ignored
919921
if @buffer.takeback_chunk(chunk_id)
920922
@counters_monitor.synchronize{ @rollback_count += 1 }
923+
primary = @as_secondary ? @primary_instance : self
924+
primary.update_retry_state(chunk_id, @as_secondary)
921925
true
922926
else
923927
false
@@ -930,7 +934,9 @@ def try_rollback_write
930934
info = @dequeued_chunks.shift
931935
if @buffer.takeback_chunk(info.chunk_id)
932936
@counters_monitor.synchronize{ @rollback_count += 1 }
933-
log.warn "failed to flush the buffer chunk, timeout to commit.", plugin_id: plugin_id, chunk_id: dump_unique_id_hex(info.chunk_id), flushed_at: info.time
937+
log.warn "failed to flush the buffer chunk, timeout to commit.", chunk_id: dump_unique_id_hex(info.chunk_id), flushed_at: info.time
938+
primary = @as_secondary ? @primary_instance : self
939+
primary.update_retry_state(info.chunk_id, @as_secondary)
934940
end
935941
end
936942
end
@@ -943,7 +949,9 @@ def try_rollback_all
943949
info = @dequeued_chunks.shift
944950
if @buffer.takeback_chunk(info.chunk_id)
945951
@counters_monitor.synchronize{ @rollback_count += 1 }
946-
log.info "delayed commit for buffer chunks was cancelled in shutdown", plugin_id: plugin_id, chunk_id: dump_unique_id_hex(info.chunk_id)
952+
log.info "delayed commit for buffer chunks was cancelled in shutdown", chunk_id: dump_unique_id_hex(info.chunk_id)
953+
primary = @as_secondary ? @primary_instance : self
954+
primary.update_retry_state(info.chunk_id, @as_secondary)
947955
end
948956
end
949957
end
@@ -997,43 +1005,60 @@ def try_flush
9971005
log.trace "done to commit a chunk", chunk: dump_chunk_id
9981006
end
9991007
rescue => e
1000-
log.debug "taking back chunk for errors.", plugin_id: plugin_id, chunk: dump_unique_id_hex(chunk.unique_id)
1008+
log.debug "taking back chunk for errors.", chunk: dump_unique_id_hex(chunk.unique_id)
10011009
if output.delayed_commit
10021010
@dequeued_chunks_mutex.synchronize do
10031011
@dequeued_chunks.delete_if{|d| d.chunk_id == chunk.unique_id }
10041012
end
10051013
end
10061014
@buffer.takeback_chunk(chunk.unique_id)
10071015

1008-
@retry_mutex.synchronize do
1009-
if @retry
1010-
@counters_monitor.synchronize{ @num_errors += 1 }
1011-
if @retry.limit?
1012-
records = @buffer.queued_records
1013-
log.error "failed to flush the buffer, and hit limit for retries. dropping all chunks in the buffer queue.", plugin_id: plugin_id, retry_times: @retry.steps, records: records, error: e
1014-
log.error_backtrace e.backtrace
1015-
@buffer.clear_queue!
1016-
log.debug "buffer queue cleared", plugin_id: plugin_id
1017-
@retry = nil
1018-
else
1019-
@retry.step
1020-
msg = if using_secondary
1021-
"failed to flush the buffer with secondary output."
1022-
else
1023-
"failed to flush the buffer."
1024-
end
1025-
log.warn msg, plugin_id: plugin_id, retry_time: @retry.steps, next_retry: @retry.next_time, chunk: dump_unique_id_hex(chunk.unique_id), error: e
1026-
log.warn_backtrace e.backtrace
1027-
end
1016+
update_retry_state(chunk.unique_id, using_secondary, e)
1017+
1018+
raise if @under_plugin_development && !@retry_for_error_chunk
1019+
end
1020+
end
1021+
1022+
def update_retry_state(chunk_id, using_secondary, error = nil)
1023+
@retry_mutex.synchronize do
1024+
@counters_monitor.synchronize{ @num_errors += 1 }
1025+
chunk_id_hex = dump_unique_id_hex(chunk_id)
1026+
1027+
unless @retry
1028+
@retry = retry_state(@buffer_config.retry_randomize)
1029+
if error
1030+
log.warn "failed to flush the buffer.", retry_time: @retry.steps, next_retry_seconds: @retry.next_time, chunk: chunk_id_hex, error: error
1031+
log.warn_backtrace error.backtrace
1032+
end
1033+
return
1034+
end
1035+
1036+
# @retry exists
1037+
1038+
if error
1039+
if @retry.limit?
1040+
records = @buffer.queued_records
1041+
msg = "failed to flush the buffer, and hit limit for retries. dropping all chunks in the buffer queue."
1042+
log.error msg, retry_times: @retry.steps, records: records, error: error
1043+
log.error_backtrace error.backtrace
1044+
elsif using_secondary
1045+
msg = "failed to flush the buffer with secondary output."
1046+
log.warn msg, retry_time: @retry.steps, next_retry_seconds: @retry.next_time, chunk: chunk_id_hex, error: error
1047+
log.warn_backtrace error.backtrace
10281048
else
1029-
@retry = retry_state(@buffer_config.retry_randomize)
1030-
@counters_monitor.synchronize{ @num_errors += 1 }
1031-
log.warn "failed to flush the buffer.", plugin_id: plugin_id, retry_time: @retry.steps, next_retry: @retry.next_time, chunk: dump_unique_id_hex(chunk.unique_id), error: e
1032-
log.warn_backtrace e.backtrace
1049+
msg = "failed to flush the buffer."
1050+
log.warn msg, retry_time: @retry.steps, next_retry_seconds: @retry.next_time, chunk: chunk_id_hex, error: error
1051+
log.warn_backtrace error.backtrace
10331052
end
10341053
end
10351054

1036-
raise if @under_plugin_development && !@retry_for_error_chunk
1055+
if @retry.limit?
1056+
@buffer.clear_queue!
1057+
log.debug "buffer queue cleared"
1058+
@retry = nil
1059+
else
1060+
@retry.step
1061+
end
10371062
end
10381063
end
10391064

@@ -1060,7 +1085,7 @@ def submit_flush_once
10601085
# Without locks: it is rough but enough to select "next" writer selection
10611086
@output_flush_thread_current_position = (@output_flush_thread_current_position + 1) % @buffer_config.flush_thread_count
10621087
state = @output_flush_threads[@output_flush_thread_current_position]
1063-
state.next_time = 0
1088+
state.next_clock = 0
10641089
if state.thread && state.thread.status # "run"/"sleep"/"aborting" or false(successfully stop) or nil(killed by exception)
10651090
state.thread.run
10661091
else
@@ -1102,7 +1127,7 @@ def enqueue_thread_wait
11021127
# only for tests of output plugin
11031128
def flush_thread_wakeup
11041129
@output_flush_threads.each do |state|
1105-
state.next_time = 0
1130+
state.next_clock = 0
11061131
state.thread.run
11071132
end
11081133
end
@@ -1156,7 +1181,7 @@ def enqueue_thread_run
11561181
end
11571182
rescue => e
11581183
raise if @under_plugin_development
1159-
log.error "unexpected error while checking flushed chunks. ignored.", plugin_id: plugin_id, error: e
1184+
log.error "unexpected error while checking flushed chunks. ignored.", error: e
11601185
log.error_backtrace
11611186
ensure
11621187
@output_enqueue_thread_waiting = false
@@ -1166,7 +1191,7 @@ def enqueue_thread_run
11661191
end
11671192
rescue => e
11681193
# normal errors are rescued by inner begin-rescue clause.
1169-
log.error "error on enqueue thread", plugin_id: plugin_id, error: e
1194+
log.error "error on enqueue thread", error: e
11701195
log.error_backtrace
11711196
raise
11721197
end
@@ -1175,9 +1200,7 @@ def enqueue_thread_run
11751200
def flush_thread_run(state)
11761201
flush_thread_interval = @buffer_config.flush_thread_interval
11771202

1178-
# If the given clock_id is not supported, Errno::EINVAL is raised.
1179-
clock_id = Process::CLOCK_MONOTONIC_RAW rescue Process::CLOCK_MONOTONIC
1180-
state.next_time = Process.clock_gettime(clock_id) + flush_thread_interval
1203+
state.next_clock = Process.clock_gettime(PROCESS_CLOCK_ID) + flush_thread_interval
11811204

11821205
while !self.after_started? && !self.stopped?
11831206
sleep 0.5
@@ -1187,16 +1210,18 @@ def flush_thread_run(state)
11871210
begin
11881211
# This thread don't use `thread_current_running?` because this thread should run in `before_shutdown` phase
11891212
while @output_flush_threads_running
1190-
time = Process.clock_gettime(clock_id)
1191-
interval = state.next_time - time
1213+
current_clock = Process.clock_gettime(PROCESS_CLOCK_ID)
1214+
interval = state.next_clock - current_clock
11921215

1193-
if state.next_time <= time
1216+
if state.next_clock <= current_clock && (!@retry || @retry_mutex.synchronize{ @retry.next_time } <= Time.now)
11941217
try_flush
1195-
# next_flush_interval uses flush_thread_interval or flush_thread_burst_interval (or retrying)
1218+
1219+
# next_flush_time uses flush_thread_interval or flush_thread_burst_interval (or retrying)
11961220
interval = next_flush_time.to_f - Time.now.to_f
1197-
# TODO: if secondary && delayed-commit, next_flush_time will be much longer than expected (because @retry still exists)
1198-
# @retry should be cleared if delayed commit is enabled? Or any other solution?
1199-
state.next_time = Process.clock_gettime(clock_id) + interval
1221+
# TODO: if secondary && delayed-commit, next_flush_time will be much longer than expected
1222+
# because @retry still exists (#commit_write is not called yet in #try_flush)
1223+
# @retry should be cleared if delayed commit is enabled? Or any other solution?
1224+
state.next_clock = Process.clock_gettime(PROCESS_CLOCK_ID) + interval
12001225
end
12011226

12021227
if @dequeued_chunks_mutex.synchronize{ !@dequeued_chunks.empty? && @dequeued_chunks.first.expired? }
@@ -1210,7 +1235,7 @@ def flush_thread_run(state)
12101235
rescue => e
12111236
# normal errors are rescued by output plugins in #try_flush
12121237
# so this rescue section is for critical & unrecoverable errors
1213-
log.error "error on output thread", plugin_id: plugin_id, error: e
1238+
log.error "error on output thread", error: e
12141239
log.error_backtrace
12151240
raise
12161241
end

lib/fluent/plugin_helper.rb

+1-1
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@
2424
require 'fluent/plugin_helper/formatter'
2525
require 'fluent/plugin_helper/inject'
2626
require 'fluent/plugin_helper/extract'
27-
# require 'fluent/plugin_helper/socket'
27+
require 'fluent/plugin_helper/socket'
2828
require 'fluent/plugin_helper/server'
2929
require 'fluent/plugin_helper/retry_state'
3030
require 'fluent/plugin_helper/compat_parameters'

lib/fluent/plugin_helper/inject.rb

+1-1
Original file line numberDiff line numberDiff line change
@@ -99,7 +99,7 @@ def configure(conf)
9999
if @_inject_hostname_key
100100
@_inject_hostname = @inject_config.hostname
101101
unless @_inject_hostname
102-
@_inject_hostname = Socket.gethostname
102+
@_inject_hostname = ::Socket.gethostname
103103
log.info "using hostname for specified field", host_key: @_inject_hostname_key, host_name: @_inject_hostname
104104
end
105105
end

lib/fluent/plugin_helper/server.rb

+24-17
Original file line numberDiff line numberDiff line change
@@ -108,20 +108,24 @@ def server_create_connection(title, port, proto: :tcp, bind: '0.0.0.0', shared:
108108
# sock.remote_port
109109
# # ...
110110
# end
111-
def server_create(title, port, proto: :tcp, bind: '0.0.0.0', shared: true, backlog: nil, max_bytes: nil, flags: 0, **socket_options, &callback)
111+
def server_create(title, port, proto: :tcp, bind: '0.0.0.0', shared: true, socket: nil, backlog: nil, max_bytes: nil, flags: 0, **socket_options, &callback)
112112
raise ArgumentError, "BUG: title must be a symbol" unless title && title.is_a?(Symbol)
113113
raise ArgumentError, "BUG: port must be an integer" unless port && port.is_a?(Integer)
114114
raise ArgumentError, "BUG: invalid protocol name" unless PROTOCOLS.include?(proto)
115115

116+
raise ArgumentError, "BUG: socket option is available only for udp" if socket && proto != :udp
117+
116118
raise ArgumentError, "BUG: block not specified which handles received data" unless block_given?
117119
raise ArgumentError, "BUG: block must have 1 or 2 arguments" unless callback.arity == 1 || callback.arity == 2
118120

119121
if proto == :tcp || proto == :tls # default linger_timeout only for server
120122
socket_options[:linger_timeout] ||= 0
121123
end
122124

123-
socket_option_validate!(proto, **socket_options)
124-
socket_option_setter = ->(sock){ socket_option_set(sock, **socket_options) }
125+
unless socket
126+
socket_option_validate!(proto, **socket_options)
127+
socket_option_setter = ->(sock){ socket_option_set(sock, **socket_options) }
128+
end
125129

126130
if proto != :tcp && proto != :tls && proto != :unix # options to listen/accept connections
127131
raise ArgumentError, "BUG: backlog is available for tcp/tls" if backlog
@@ -140,9 +144,15 @@ def server_create(title, port, proto: :tcp, bind: '0.0.0.0', shared: true, backl
140144
raise "not implemented yet"
141145
when :udp
142146
raise ArgumentError, "BUG: max_bytes must be specified for UDP" unless max_bytes
143-
sock = server_create_udp_socket(shared, bind, port)
144-
socket_option_setter.call(sock)
145-
server = EventHandler::UDPServer.new(sock, max_bytes, flags, @log, @under_plugin_development, &callback)
147+
if socket
148+
sock = socket
149+
close_socket = false
150+
else
151+
sock = server_create_udp_socket(shared, bind, port)
152+
socket_option_setter.call(sock)
153+
close_socket = true
154+
end
155+
server = EventHandler::UDPServer.new(sock, max_bytes, flags, close_socket, @log, @under_plugin_development, &callback)
146156
when :unix
147157
raise "not implemented yet"
148158
else
@@ -267,10 +277,11 @@ def server_create_tls_socket(shared, bind, port)
267277
end
268278

269279
class CallbackSocket
270-
def initialize(server_type, sock, enabled_events = [])
280+
def initialize(server_type, sock, enabled_events = [], close_socket: true)
271281
@server_type = server_type
272282
@sock = sock
273283
@enabled_events = enabled_events
284+
@close_socket = close_socket
274285
end
275286

276287
def remote_addr
@@ -294,12 +305,7 @@ def write(data)
294305
end
295306

296307
def close
297-
@sock.close
298-
# close cool.io socket in another thread, not to make deadlock
299-
# for flushing @_write_buffer when conn.close is called in callback
300-
# ::Thread.new{
301-
# @sock.close
302-
# }
308+
@sock.close if @close_socket
303309
end
304310

305311
def data(&callback)
@@ -334,8 +340,8 @@ def write(data)
334340
end
335341

336342
class UDPCallbackSocket < CallbackSocket
337-
def initialize(sock, peeraddr)
338-
super("udp", sock, [])
343+
def initialize(sock, peeraddr, **kwargs)
344+
super("udp", sock, [], **kwargs)
339345
@peeraddr = peeraddr
340346
end
341347

@@ -358,14 +364,15 @@ def write(data)
358364

359365
module EventHandler
360366
class UDPServer < Coolio::IO
361-
def initialize(sock, max_bytes, flags, log, under_plugin_development, &callback)
367+
def initialize(sock, max_bytes, flags, close_socket, log, under_plugin_development, &callback)
362368
raise ArgumentError, "socket must be a UDPSocket: sock = #{sock}" unless sock.is_a?(UDPSocket)
363369

364370
super(sock)
365371

366372
@sock = sock
367373
@max_bytes = max_bytes
368374
@flags = flags
375+
@close_socket = close_socket
369376
@log = log
370377
@under_plugin_development = under_plugin_development
371378
@callback = callback
@@ -398,7 +405,7 @@ def on_readable_with_sock
398405
rescue Errno::EAGAIN, Errno::EWOULDBLOCK, Errno::EINTR, Errno::ECONNRESET
399406
return
400407
end
401-
@callback.call(data, UDPCallbackSocket.new(@sock, addr))
408+
@callback.call(data, UDPCallbackSocket.new(@sock, addr, close_socket: @close_socket))
402409
rescue => e
403410
@log.error "unexpected error in processing UDP data", error: e
404411
@log.error_backtrace

0 commit comments

Comments
 (0)