-
Notifications
You must be signed in to change notification settings - Fork 177
Closed
Description
In RIAK 3.0.12, vnodes started to hang in leveldb:write/3 and the process mailbox could grow up to 10K messages.
After a while whole node starts to behave unstable. The queue size of the vnode does not decrease, even without a load.
[{current_function,{eleveldb,write,3}},
{initial_call,{proc_lib,init_p,5}},
{status,waiting},
{message_queue_len,10001},
{links,[<0.1687.0>,<0.2146.0>,<0.258.0>]},
{dictionary,[{rand_seed,{#{bits => 58,jump => #Fun<rand.13.8986388>,
next => #Fun<rand.10.8986388>,type => exsss,
uniform => #Fun<rand.11.8986388>,
uniform_n => #Fun<rand.12.8986388>},
[112506433278832578|159401657947672872]}},
{#Ref<0.2265291839.1457520643.98734>,
{bc_state,"/var/lib/riak/bitcask/11417981541647679048466287755595961091061972992",
fresh,undefined,[],2147483648,
[{expiry_secs,-1},
{io_mode,erlang},
{expiry_grace_time,0},
{small_file_threshold,10485760},
{dead_bytes_threshold,134217728},
{frag_threshold,40},
{dead_bytes_merge_trigger,536870912},
{frag_merge_trigger,60},
{max_file_size,2147483648},
{open_timeout,4},
{data_root,"/var/lib/riak/bitcask"},
{sync_strategy,none},
{merge_window,always},
{max_fold_age,-1},
{max_fold_puts,0},
{expiry_secs,-1},
{require_hint_crc,true},
{key_transform,#Fun<riak_kv_bitcask_backend.1.43379991>},
{read_write,true}],
#Fun<riak_kv_bitcask_backend.1.43379991>,
#Ref<0.2265291839.1457651715.98733>,1,2}},
{'$initial_call',{riak_core_vnode,init,1}},
{'$ancestors',[riak_core_vnode_sup,riak_core_sup,<0.254.0>]},
{bitcask_time_fudge,no_testing},
{hashtree_tokens,47},
{#Ref<0.2265291839.1457520644.95496>,
{bc_state,"/var/lib/riak/bitcask/11417981541647679048466287755595961091061972992",
fresh,undefined,[],2147483648,
[{expiry_secs,-1},
{io_mode,erlang},
{expiry_grace_time,0},
{small_file_threshold,10485760},
{dead_bytes_threshold,134217728},
{frag_threshold,40},
{dead_bytes_merge_trigger,536870912},
{frag_merge_trigger,60},
{max_file_size,2147483648},
{open_timeout,4},
{data_root,"/var/lib/riak/bitcask"},
{sync_strategy,none},
{merge_window,always},
{max_fold_age,-1},
{max_fold_puts,0},
{expiry_secs,-1},
{require_hint_crc,true},
{key_transform,#Fun<riak_kv_bitcask_backend.1.43379991>},
{read_write,true}],
#Fun<riak_kv_bitcask_backend.1.43379991>,
#Ref<0.2265291839.1457651719.94834>,1,2}},
{bitcask_file_mod,bitcask_file}]},
{trap_exit,true},
{error_handler,error_handler},
{priority,normal},
{group_leader,<0.253.0>},
{total_heap_size,1542816},
{heap_size,10958},
{stack_size,70},
{reductions,29125337},
{garbage_collection,[{max_heap_size,#{error_logger => true,kill => true,size => 0}},
{min_bin_vheap_size,46422},
{min_heap_size,233},
{fullsweep_after,0},
{minor_gcs,0}]},
{suspending,[]}]
{backtrace,<<"Program counter: 0x00007f9e9f962d90 (eleveldb:write/3 + 120)\nCP: 0x0000000000000000 (invalid)\n\n0x00007f9d1db7de20 Return addr 0x00007fa3f6e51e28 (riak_kv_eleveldb_backend:do_put/6 + 608)\ny(0) []\ny(1) []\ny(2) #Ref<0.2265291839.1457782787.52859>\n\n0x00007f9d1db7de40 Return addr 0x00007fa3f719fa00 (riak_kv_multi_backend:put/5 + 200)\ny(0) [{sync,false}]\ny(1) {state,#Ref<0.2265291839.1457651712.94836>,\"/var/lib/riak/leveldb/11417981541647679048466287755595961091061972992\",[{block_cache_threshold,33554432},{block_restart_interval,16},{block_size_steps,16},{cache_object_warming,true},{compression,snappy},{create_if_missing,true},{delete_threshold,1000},{eleveldb_threads,71},{expiry_enabled,false},{expiry_minutes,0},{fadvise_willneed,false},{limited_developer_mem,false},{sst_block_size,4096},{tiered_slow_level,0},{total_leveldb_mem_percent,35},{use_bloomfilter,true},{whole_file_expiry,true},{write_buffer_size,16662690}],[{block_cache_threshold,33554432},{block_restart_interval,16},{block_size_steps,16},{cache_object_warming,true},{compression,snappy},{create_if_missing,true},{data_root,\"/var/lib/riak/leveldb\"},{delete_threshold,1000},{eleveldb_threads,71},{expiry_enabled,false},{expiry_minutes,0},{fadvise_willneed,false},{limited_developer_mem,false},{sst_block_size,4096},{sync,false},{tiered_slow_level,0},{total_leveldb_mem_percent,35},{use_bloomfilter,true},{verify_checksums,true},{verify_compaction,true},{whole_file_expiry,true},{write_buffer_size,16662690},{write_buffer_size_max,31457280},{write_buffer_size_min,15728640}],[{verify_checksums,true}],[{sync,false}],[{verify_checksums,true},{fill_cache,false}],false,false}\ny(2) []\ny(3) []\ny(4) []\ny(5) []\ny(6) []\n\n0x00007f9d1db7de80 Return addr 0x00007f9e6c21e490 (riak_kv_vnode:encode_and_put_no_sib_check/8 + 2104)\ny(0) riak_kv_eleveldb_backend\ny(1) {<<\"leveldb_mult\">>,riak_kv_eleveldb_backend,{state,#Ref<0.2265291839.1457651712.94836>,\"/var/lib/riak/leveldb/11417981541647679048466287755595961091061972992\",[{block_cache_threshold,33554432},{block_restart_interval,16},{block_size_steps,16},{cache_object_warming,true},{compression,snappy},{create_if_missing,true},{delete_threshold,1000},{eleveldb_threads,71},{expiry_enabled,false},{expiry_minutes,0},{fadvise_willneed,false},{limited_developer_mem,false},{sst_block_size,4096},{tiered_slow_level,0},{total_leveldb_mem_percent,35},{use_bloomfilter,true},{whole_file_expiry,true},{write_buffer_size,16662690}],[{block_cache_threshold,33554432},{block_restart_interval,16},{block_size_steps,16},{cache_object_warming,true},{compression,snappy},{create_if_missing,true},{data_root,\"/var/lib/riak/leveldb\"},{delete_threshold,1000},{eleveldb_threads,71},{expiry_enabled,false},{expiry_minutes,0},{fadvise_willneed,false},{limited_developer_mem,false},{sst_block_size,4096},{sync,false},{tiered_slow_level,0},{total_leveldb_mem_percent,35},{use_bloomfilter,true},{verify_checksums,true},{verify_compaction,true},{whole_file_expiry,true},{write_buffer_size,16662690},{write_buffer_size_max,31457280},{write_buffer_size_min,15728640}],[{verify_checksums,true}],[{sync,false}],[{verify_checksums,true},{fill_cache,false}],false,false}}\ny(2) {state,[{<<\"leveldb\">>,riak_kv_bitcask_backend,{state,#Ref<0.2265291839.1457520643.98734>,\"11417981541647679048466287755595961091061972992\",[{io_mode,erlang},{expiry_grace_time,0},{small_file_threshold,10485760},{dead_bytes_threshold,134217728},{frag_threshold,40},{dead_bytes_merge_trigger,536870912},{frag_merge_trigger,60},{max_file_size,2147483648},{open_timeout,4},{data_root,\"/var/lib/riak/bitcask\"},{sync_strategy,none},{merge_window,always},{max_fold_age,-1},{max_fold_puts,0},{expiry_secs,-1},{require_hint_crc,true},{key_transform,#Fun<riak_kv_bitcask_backend.1.43379991>},{read_write,true}],11417981541647679048466287755595961091061972992,\"/var/lib/riak/bitcask\",1}},{<<\"memory_mult\">>,riak_kv_memory_backend,{state,#Ref<0.2265291839.1457651713.96426>,#Ref<0.2265291839.1457651713.96425>,#Ref<0.2265291839.1457651713.96424>,134217728,0,0,129600}},{<<\"leveldb_mult\">>,riak_kv_eleveldb_backend,{state,#Ref<0.2265291839.1457651712.94836>,\"/var/lib/riak/leveldb/11417981541647679048466287755595961091061972992\",[{block_cache_threshold,33554432},{block_restart_interval,16},{block_size_steps,16},{cache_object_warming,true},{compression,snappy},{create_if_missing,true},{delete_threshold,1000},{eleveldb_threads,71},{expiry_enabled,false},{expiry_minutes,0},{fadvise_willneed,false},{limited_developer_mem,false},{sst_block_size,4096},{tiered_slow_level,0},{total_leveldb_mem_percent,35},{use_bloomfilter,true},{whole_file_expiry,true},{write_buffer_size,16662690}],[{block_cache_threshold,33554432},{block_restart_interval,16},{block_size_steps,16},{cache_object_warming,true},{compression,snappy},{create_if_missing,true},{data_root,\"/var/lib/riak/leveldb\"},{delete_threshold,1000},{eleveldb_threads,71},{expiry_enabled,false},{expiry_minutes,0},{fadvise_willneed,false},{limited_developer_mem,false},{sst_block_size,4096},{sync,false},{tiered_slow_level,0},{total_leveldb_mem_percent,35},{use_bloomfilter,true},{verify_checksums,true},{verify_compaction,true},{whole_file_expiry,true},{write_buffer_size,16662690},{write_buffer_size_max,31457280},{write_buffer_size_min,15728640}],[{verify_checksums,true}],[{sync,false}],[{verify_checksums,true},{fill_cache,false}],false,false}},{<<\"bitcask_mult\">>,riak_kv_bitcask_backend,{state,#Ref<0.2265291839.1457520644.95496>,\"11417981541647679048466287755595961091061972992\",[{io_mode,erlang},{expiry_grace_time,0},{small_file_threshold,10485760},{dead_bytes_threshold,134217728},{frag_threshold,40},{dead_bytes_merge_trigger,536870912},{frag_merge_trigger,60},{max_file_size,2147483648},{open_timeout,4},{data_root,\"/var/lib/riak/bitcask\"},{sync_strategy,none},{merge_window,always},{max_fold_age,-1},{max_fold_puts,0},{expiry_secs,-1},{require_hint_crc,true},{key_transform,#Fun<riak_kv_bitcask_backend.1.43379991>},{read_write,true}],11417981541647679048466287755595961091061972992,\"/var/lib/riak/bitcask\",1}}],<<\"bitcask_mult\">>}\ny(3) []\ny(4) []\ny(5) []\ny(6) []\n\n0x00007f9d1db7dec0 Return addr 0x00007f9e6c217110 (riak_kv_vnode:actual_put/8 + 264)\ny(0)
Another interesting thing that i noticed, that in the vnode state the compression is set to snappy, but in the config it is defined to lz4.
{<<"leveldb_mult">>,riak_kv_eleveldb_backend,
{state,#Ref<0.1786542404.3883008000.67404>,
"/var/lib/riak/leveldb/451010270895083322414418366346040463096947933184",
[{block_cache_threshold,33554432},
{block_restart_interval,16},
{block_size_steps,16},
{cache_object_warming,true},
{compression,snappy},
{create_if_missing,true},
{delete_threshold,1000},
{eleveldb_threads,71},
{expiry_enabled,false},
{expiry_minutes,0},
{fadvise_willneed,false},
{limited_developer_mem,false},
{sst_block_size,4096},
{tiered_slow_level,0},
{total_leveldb_mem_percent,35},
{use_bloomfilter,true},
{whole_file_expiry,true},
{write_buffer_size,22791803}],
[{block_cache_threshold,33554432},
{block_restart_interval,16},
{block_size_steps,16},
{cache_object_warming,true},
{compression,snappy},
{create_if_missing,true},
{data_root,"/var/lib/riak/leveldb"},
{delete_threshold,1000},
{eleveldb_threads,71},
{expiry_enabled,false},
{expiry_minutes,0},
{fadvise_willneed,false},
{limited_developer_mem,false},
{sst_block_size,4096},
{sync,false},
{tiered_slow_level,0},
{total_leveldb_mem_percent,35},
{use_bloomfilter,true},
{verify_checksums,true},
{verify_compaction,true},
{whole_file_expiry,true},
{write_buffer_size,22791803},
{write_buffer_size_max,31457280},
{write_buffer_size_min,15728640}],
[{verify_checksums,true}],
[{sync,false}],
[{verify_checksums,true},{fill_cache,false}],
true,false}},
It's not fully confirmed, but t seems that in 3.0.9 doesn't have the issue.
Are there any incompatibility between snappy versions?
Metadata
Metadata
Assignees
Labels
No labels