@@ -40,6 +40,8 @@ class Output < Base
40
40
41
41
CHUNKING_FIELD_WARN_NUM = 4
42
42
43
+ PROCESS_CLOCK_ID = Process ::CLOCK_MONOTONIC_RAW rescue Process ::CLOCK_MONOTONIC
44
+
43
45
config_param :time_as_integer , :bool , default : false
44
46
45
47
# `<buffer>` and `<secondary>` sections are available only when '#format' and '#write' are implemented
@@ -138,7 +140,7 @@ def prefer_delayed_commit
138
140
end
139
141
140
142
# Internal states
141
- FlushThreadState = Struct . new ( :thread , :next_time )
143
+ FlushThreadState = Struct . new ( :thread , :next_clock )
142
144
DequeuedChunkInfo = Struct . new ( :chunk_id , :time , :timeout ) do
143
145
def expired?
144
146
time + timeout < Time . now
@@ -898,9 +900,9 @@ def commit_write(chunk_id, delayed: @delayed_commit, secondary: false)
898
900
@retry_mutex . synchronize do
899
901
if @retry # success to flush chunks in retries
900
902
if secondary
901
- log . warn "retry succeeded by secondary." , plugin_id : plugin_id , chunk_id : dump_unique_id_hex ( chunk_id )
903
+ log . warn "retry succeeded by secondary." , chunk_id : dump_unique_id_hex ( chunk_id )
902
904
else
903
- log . warn "retry succeeded." , plugin_id : plugin_id , chunk_id : dump_unique_id_hex ( chunk_id )
905
+ log . warn "retry succeeded." , chunk_id : dump_unique_id_hex ( chunk_id )
904
906
end
905
907
@retry = nil
906
908
end
@@ -918,6 +920,8 @@ def rollback_write(chunk_id)
918
920
# in many cases, false can be just ignored
919
921
if @buffer . takeback_chunk ( chunk_id )
920
922
@counters_monitor . synchronize { @rollback_count += 1 }
923
+ primary = @as_secondary ? @primary_instance : self
924
+ primary . update_retry_state ( chunk_id , @as_secondary )
921
925
true
922
926
else
923
927
false
@@ -930,7 +934,9 @@ def try_rollback_write
930
934
info = @dequeued_chunks . shift
931
935
if @buffer . takeback_chunk ( info . chunk_id )
932
936
@counters_monitor . synchronize { @rollback_count += 1 }
933
- log . warn "failed to flush the buffer chunk, timeout to commit." , plugin_id : plugin_id , chunk_id : dump_unique_id_hex ( info . chunk_id ) , flushed_at : info . time
937
+ log . warn "failed to flush the buffer chunk, timeout to commit." , chunk_id : dump_unique_id_hex ( info . chunk_id ) , flushed_at : info . time
938
+ primary = @as_secondary ? @primary_instance : self
939
+ primary . update_retry_state ( info . chunk_id , @as_secondary )
934
940
end
935
941
end
936
942
end
@@ -943,7 +949,9 @@ def try_rollback_all
943
949
info = @dequeued_chunks . shift
944
950
if @buffer . takeback_chunk ( info . chunk_id )
945
951
@counters_monitor . synchronize { @rollback_count += 1 }
946
- log . info "delayed commit for buffer chunks was cancelled in shutdown" , plugin_id : plugin_id , chunk_id : dump_unique_id_hex ( info . chunk_id )
952
+ log . info "delayed commit for buffer chunks was cancelled in shutdown" , chunk_id : dump_unique_id_hex ( info . chunk_id )
953
+ primary = @as_secondary ? @primary_instance : self
954
+ primary . update_retry_state ( info . chunk_id , @as_secondary )
947
955
end
948
956
end
949
957
end
@@ -997,43 +1005,60 @@ def try_flush
997
1005
log . trace "done to commit a chunk" , chunk : dump_chunk_id
998
1006
end
999
1007
rescue => e
1000
- log . debug "taking back chunk for errors." , plugin_id : plugin_id , chunk : dump_unique_id_hex ( chunk . unique_id )
1008
+ log . debug "taking back chunk for errors." , chunk : dump_unique_id_hex ( chunk . unique_id )
1001
1009
if output . delayed_commit
1002
1010
@dequeued_chunks_mutex . synchronize do
1003
1011
@dequeued_chunks . delete_if { |d | d . chunk_id == chunk . unique_id }
1004
1012
end
1005
1013
end
1006
1014
@buffer . takeback_chunk ( chunk . unique_id )
1007
1015
1008
- @retry_mutex . synchronize do
1009
- if @retry
1010
- @counters_monitor . synchronize { @num_errors += 1 }
1011
- if @retry . limit?
1012
- records = @buffer . queued_records
1013
- log . error "failed to flush the buffer, and hit limit for retries. dropping all chunks in the buffer queue." , plugin_id : plugin_id , retry_times : @retry . steps , records : records , error : e
1014
- log . error_backtrace e . backtrace
1015
- @buffer . clear_queue!
1016
- log . debug "buffer queue cleared" , plugin_id : plugin_id
1017
- @retry = nil
1018
- else
1019
- @retry . step
1020
- msg = if using_secondary
1021
- "failed to flush the buffer with secondary output."
1022
- else
1023
- "failed to flush the buffer."
1024
- end
1025
- log . warn msg , plugin_id : plugin_id , retry_time : @retry . steps , next_retry : @retry . next_time , chunk : dump_unique_id_hex ( chunk . unique_id ) , error : e
1026
- log . warn_backtrace e . backtrace
1027
- end
1016
+ update_retry_state ( chunk . unique_id , using_secondary , e )
1017
+
1018
+ raise if @under_plugin_development && !@retry_for_error_chunk
1019
+ end
1020
+ end
1021
+
1022
+ def update_retry_state ( chunk_id , using_secondary , error = nil )
1023
+ @retry_mutex . synchronize do
1024
+ @counters_monitor . synchronize { @num_errors += 1 }
1025
+ chunk_id_hex = dump_unique_id_hex ( chunk_id )
1026
+
1027
+ unless @retry
1028
+ @retry = retry_state ( @buffer_config . retry_randomize )
1029
+ if error
1030
+ log . warn "failed to flush the buffer." , retry_time : @retry . steps , next_retry_seconds : @retry . next_time , chunk : chunk_id_hex , error : error
1031
+ log . warn_backtrace error . backtrace
1032
+ end
1033
+ return
1034
+ end
1035
+
1036
+ # @retry exists
1037
+
1038
+ if error
1039
+ if @retry . limit?
1040
+ records = @buffer . queued_records
1041
+ msg = "failed to flush the buffer, and hit limit for retries. dropping all chunks in the buffer queue."
1042
+ log . error msg , retry_times : @retry . steps , records : records , error : error
1043
+ log . error_backtrace error . backtrace
1044
+ elsif using_secondary
1045
+ msg = "failed to flush the buffer with secondary output."
1046
+ log . warn msg , retry_time : @retry . steps , next_retry_seconds : @retry . next_time , chunk : chunk_id_hex , error : error
1047
+ log . warn_backtrace error . backtrace
1028
1048
else
1029
- @retry = retry_state ( @buffer_config . retry_randomize )
1030
- @counters_monitor . synchronize { @num_errors += 1 }
1031
- log . warn "failed to flush the buffer." , plugin_id : plugin_id , retry_time : @retry . steps , next_retry : @retry . next_time , chunk : dump_unique_id_hex ( chunk . unique_id ) , error : e
1032
- log . warn_backtrace e . backtrace
1049
+ msg = "failed to flush the buffer."
1050
+ log . warn msg , retry_time : @retry . steps , next_retry_seconds : @retry . next_time , chunk : chunk_id_hex , error : error
1051
+ log . warn_backtrace error . backtrace
1033
1052
end
1034
1053
end
1035
1054
1036
- raise if @under_plugin_development && !@retry_for_error_chunk
1055
+ if @retry . limit?
1056
+ @buffer . clear_queue!
1057
+ log . debug "buffer queue cleared"
1058
+ @retry = nil
1059
+ else
1060
+ @retry . step
1061
+ end
1037
1062
end
1038
1063
end
1039
1064
@@ -1060,7 +1085,7 @@ def submit_flush_once
1060
1085
# Without locks: it is rough but enough to select "next" writer selection
1061
1086
@output_flush_thread_current_position = ( @output_flush_thread_current_position + 1 ) % @buffer_config . flush_thread_count
1062
1087
state = @output_flush_threads [ @output_flush_thread_current_position ]
1063
- state . next_time = 0
1088
+ state . next_clock = 0
1064
1089
if state . thread && state . thread . status # "run"/"sleep"/"aborting" or false(successfully stop) or nil(killed by exception)
1065
1090
state . thread . run
1066
1091
else
@@ -1102,7 +1127,7 @@ def enqueue_thread_wait
1102
1127
# only for tests of output plugin
1103
1128
def flush_thread_wakeup
1104
1129
@output_flush_threads . each do |state |
1105
- state . next_time = 0
1130
+ state . next_clock = 0
1106
1131
state . thread . run
1107
1132
end
1108
1133
end
@@ -1156,7 +1181,7 @@ def enqueue_thread_run
1156
1181
end
1157
1182
rescue => e
1158
1183
raise if @under_plugin_development
1159
- log . error "unexpected error while checking flushed chunks. ignored." , plugin_id : plugin_id , error : e
1184
+ log . error "unexpected error while checking flushed chunks. ignored." , error : e
1160
1185
log . error_backtrace
1161
1186
ensure
1162
1187
@output_enqueue_thread_waiting = false
@@ -1166,7 +1191,7 @@ def enqueue_thread_run
1166
1191
end
1167
1192
rescue => e
1168
1193
# normal errors are rescued by inner begin-rescue clause.
1169
- log . error "error on enqueue thread" , plugin_id : plugin_id , error : e
1194
+ log . error "error on enqueue thread" , error : e
1170
1195
log . error_backtrace
1171
1196
raise
1172
1197
end
@@ -1175,9 +1200,7 @@ def enqueue_thread_run
1175
1200
def flush_thread_run ( state )
1176
1201
flush_thread_interval = @buffer_config . flush_thread_interval
1177
1202
1178
- # If the given clock_id is not supported, Errno::EINVAL is raised.
1179
- clock_id = Process ::CLOCK_MONOTONIC_RAW rescue Process ::CLOCK_MONOTONIC
1180
- state . next_time = Process . clock_gettime ( clock_id ) + flush_thread_interval
1203
+ state . next_clock = Process . clock_gettime ( PROCESS_CLOCK_ID ) + flush_thread_interval
1181
1204
1182
1205
while !self . after_started? && !self . stopped?
1183
1206
sleep 0.5
@@ -1187,16 +1210,18 @@ def flush_thread_run(state)
1187
1210
begin
1188
1211
# This thread don't use `thread_current_running?` because this thread should run in `before_shutdown` phase
1189
1212
while @output_flush_threads_running
1190
- time = Process . clock_gettime ( clock_id )
1191
- interval = state . next_time - time
1213
+ current_clock = Process . clock_gettime ( PROCESS_CLOCK_ID )
1214
+ interval = state . next_clock - current_clock
1192
1215
1193
- if state . next_time <= time
1216
+ if state . next_clock <= current_clock && ( ! @retry || @retry_mutex . synchronize { @retry . next_time } <= Time . now )
1194
1217
try_flush
1195
- # next_flush_interval uses flush_thread_interval or flush_thread_burst_interval (or retrying)
1218
+
1219
+ # next_flush_time uses flush_thread_interval or flush_thread_burst_interval (or retrying)
1196
1220
interval = next_flush_time . to_f - Time . now . to_f
1197
- # TODO: if secondary && delayed-commit, next_flush_time will be much longer than expected (because @retry still exists)
1198
- # @retry should be cleared if delayed commit is enabled? Or any other solution?
1199
- state . next_time = Process . clock_gettime ( clock_id ) + interval
1221
+ # TODO: if secondary && delayed-commit, next_flush_time will be much longer than expected
1222
+ # because @retry still exists (#commit_write is not called yet in #try_flush)
1223
+ # @retry should be cleared if delayed commit is enabled? Or any other solution?
1224
+ state . next_clock = Process . clock_gettime ( PROCESS_CLOCK_ID ) + interval
1200
1225
end
1201
1226
1202
1227
if @dequeued_chunks_mutex . synchronize { !@dequeued_chunks . empty? && @dequeued_chunks . first . expired? }
@@ -1210,7 +1235,7 @@ def flush_thread_run(state)
1210
1235
rescue => e
1211
1236
# normal errors are rescued by output plugins in #try_flush
1212
1237
# so this rescue section is for critical & unrecoverable errors
1213
- log . error "error on output thread" , plugin_id : plugin_id , error : e
1238
+ log . error "error on output thread" , error : e
1214
1239
log . error_backtrace
1215
1240
raise
1216
1241
end
0 commit comments