Skip to content

vnode hangs in {eleveldb,write,3} #273

@systream

Description

@systream

In RIAK 3.0.12, vnodes started to hang in leveldb:write/3 and the process mailbox could grow up to 10K messages.
After a while whole node starts to behave unstable. The queue size of the vnode does not decrease, even without a load.

[{current_function,{eleveldb,write,3}},
 {initial_call,{proc_lib,init_p,5}},
 {status,waiting},
 {message_queue_len,10001},
 {links,[<0.1687.0>,<0.2146.0>,<0.258.0>]},
 {dictionary,[{rand_seed,{#{bits => 58,jump => #Fun<rand.13.8986388>,
                            next => #Fun<rand.10.8986388>,type => exsss,
                            uniform => #Fun<rand.11.8986388>,
                            uniform_n => #Fun<rand.12.8986388>},
                          [112506433278832578|159401657947672872]}},
              {#Ref<0.2265291839.1457520643.98734>,
               {bc_state,"/var/lib/riak/bitcask/11417981541647679048466287755595961091061972992",
                         fresh,undefined,[],2147483648,
                         [{expiry_secs,-1},
                          {io_mode,erlang},
                          {expiry_grace_time,0},
                          {small_file_threshold,10485760},
                          {dead_bytes_threshold,134217728},
                          {frag_threshold,40},
                          {dead_bytes_merge_trigger,536870912},
                          {frag_merge_trigger,60},
                          {max_file_size,2147483648},
                          {open_timeout,4},
                          {data_root,"/var/lib/riak/bitcask"},
                          {sync_strategy,none},
                          {merge_window,always},
                          {max_fold_age,-1},
                          {max_fold_puts,0},
                          {expiry_secs,-1},
                          {require_hint_crc,true},
                          {key_transform,#Fun<riak_kv_bitcask_backend.1.43379991>},
                          {read_write,true}], 
                         #Fun<riak_kv_bitcask_backend.1.43379991>,
                         #Ref<0.2265291839.1457651715.98733>,1,2}},
              {'$initial_call',{riak_core_vnode,init,1}},
              {'$ancestors',[riak_core_vnode_sup,riak_core_sup,<0.254.0>]},
              {bitcask_time_fudge,no_testing},
              {hashtree_tokens,47},
              {#Ref<0.2265291839.1457520644.95496>,
               {bc_state,"/var/lib/riak/bitcask/11417981541647679048466287755595961091061972992",
                         fresh,undefined,[],2147483648,
                         [{expiry_secs,-1},
                          {io_mode,erlang},
                          {expiry_grace_time,0},
                          {small_file_threshold,10485760},
                          {dead_bytes_threshold,134217728},
                          {frag_threshold,40},
                          {dead_bytes_merge_trigger,536870912},
                          {frag_merge_trigger,60},
                          {max_file_size,2147483648},
                          {open_timeout,4},
                          {data_root,"/var/lib/riak/bitcask"},
                          {sync_strategy,none},
                          {merge_window,always},
                          {max_fold_age,-1},
                          {max_fold_puts,0},
                          {expiry_secs,-1},
                          {require_hint_crc,true},
                          {key_transform,#Fun<riak_kv_bitcask_backend.1.43379991>},
                          {read_write,true}],
                         #Fun<riak_kv_bitcask_backend.1.43379991>,
                         #Ref<0.2265291839.1457651719.94834>,1,2}},
              {bitcask_file_mod,bitcask_file}]},
 {trap_exit,true},
 {error_handler,error_handler},
 {priority,normal},
 {group_leader,<0.253.0>},
 {total_heap_size,1542816},
 {heap_size,10958},
 {stack_size,70},
 {reductions,29125337},
 {garbage_collection,[{max_heap_size,#{error_logger => true,kill => true,size => 0}},
                      {min_bin_vheap_size,46422},
                      {min_heap_size,233},
                      {fullsweep_after,0},
                      {minor_gcs,0}]},
 {suspending,[]}]
{backtrace,<<"Program counter: 0x00007f9e9f962d90 (eleveldb:write/3 + 120)\nCP: 0x0000000000000000 (invalid)\n\n0x00007f9d1db7de20 Return addr 0x00007fa3f6e51e28 (riak_kv_eleveldb_backend:do_put/6 + 608)\ny(0)     []\ny(1)     []\ny(2)     #Ref<0.2265291839.1457782787.52859>\n\n0x00007f9d1db7de40 Return addr 0x00007fa3f719fa00 (riak_kv_multi_backend:put/5 + 200)\ny(0)     [{sync,false}]\ny(1)     {state,#Ref<0.2265291839.1457651712.94836>,\"/var/lib/riak/leveldb/11417981541647679048466287755595961091061972992\",[{block_cache_threshold,33554432},{block_restart_interval,16},{block_size_steps,16},{cache_object_warming,true},{compression,snappy},{create_if_missing,true},{delete_threshold,1000},{eleveldb_threads,71},{expiry_enabled,false},{expiry_minutes,0},{fadvise_willneed,false},{limited_developer_mem,false},{sst_block_size,4096},{tiered_slow_level,0},{total_leveldb_mem_percent,35},{use_bloomfilter,true},{whole_file_expiry,true},{write_buffer_size,16662690}],[{block_cache_threshold,33554432},{block_restart_interval,16},{block_size_steps,16},{cache_object_warming,true},{compression,snappy},{create_if_missing,true},{data_root,\"/var/lib/riak/leveldb\"},{delete_threshold,1000},{eleveldb_threads,71},{expiry_enabled,false},{expiry_minutes,0},{fadvise_willneed,false},{limited_developer_mem,false},{sst_block_size,4096},{sync,false},{tiered_slow_level,0},{total_leveldb_mem_percent,35},{use_bloomfilter,true},{verify_checksums,true},{verify_compaction,true},{whole_file_expiry,true},{write_buffer_size,16662690},{write_buffer_size_max,31457280},{write_buffer_size_min,15728640}],[{verify_checksums,true}],[{sync,false}],[{verify_checksums,true},{fill_cache,false}],false,false}\ny(2)     []\ny(3)     []\ny(4)     []\ny(5)     []\ny(6)     []\n\n0x00007f9d1db7de80 Return addr 0x00007f9e6c21e490 (riak_kv_vnode:encode_and_put_no_sib_check/8 + 2104)\ny(0)     riak_kv_eleveldb_backend\ny(1)     {<<\"leveldb_mult\">>,riak_kv_eleveldb_backend,{state,#Ref<0.2265291839.1457651712.94836>,\"/var/lib/riak/leveldb/11417981541647679048466287755595961091061972992\",[{block_cache_threshold,33554432},{block_restart_interval,16},{block_size_steps,16},{cache_object_warming,true},{compression,snappy},{create_if_missing,true},{delete_threshold,1000},{eleveldb_threads,71},{expiry_enabled,false},{expiry_minutes,0},{fadvise_willneed,false},{limited_developer_mem,false},{sst_block_size,4096},{tiered_slow_level,0},{total_leveldb_mem_percent,35},{use_bloomfilter,true},{whole_file_expiry,true},{write_buffer_size,16662690}],[{block_cache_threshold,33554432},{block_restart_interval,16},{block_size_steps,16},{cache_object_warming,true},{compression,snappy},{create_if_missing,true},{data_root,\"/var/lib/riak/leveldb\"},{delete_threshold,1000},{eleveldb_threads,71},{expiry_enabled,false},{expiry_minutes,0},{fadvise_willneed,false},{limited_developer_mem,false},{sst_block_size,4096},{sync,false},{tiered_slow_level,0},{total_leveldb_mem_percent,35},{use_bloomfilter,true},{verify_checksums,true},{verify_compaction,true},{whole_file_expiry,true},{write_buffer_size,16662690},{write_buffer_size_max,31457280},{write_buffer_size_min,15728640}],[{verify_checksums,true}],[{sync,false}],[{verify_checksums,true},{fill_cache,false}],false,false}}\ny(2)     {state,[{<<\"leveldb\">>,riak_kv_bitcask_backend,{state,#Ref<0.2265291839.1457520643.98734>,\"11417981541647679048466287755595961091061972992\",[{io_mode,erlang},{expiry_grace_time,0},{small_file_threshold,10485760},{dead_bytes_threshold,134217728},{frag_threshold,40},{dead_bytes_merge_trigger,536870912},{frag_merge_trigger,60},{max_file_size,2147483648},{open_timeout,4},{data_root,\"/var/lib/riak/bitcask\"},{sync_strategy,none},{merge_window,always},{max_fold_age,-1},{max_fold_puts,0},{expiry_secs,-1},{require_hint_crc,true},{key_transform,#Fun<riak_kv_bitcask_backend.1.43379991>},{read_write,true}],11417981541647679048466287755595961091061972992,\"/var/lib/riak/bitcask\",1}},{<<\"memory_mult\">>,riak_kv_memory_backend,{state,#Ref<0.2265291839.1457651713.96426>,#Ref<0.2265291839.1457651713.96425>,#Ref<0.2265291839.1457651713.96424>,134217728,0,0,129600}},{<<\"leveldb_mult\">>,riak_kv_eleveldb_backend,{state,#Ref<0.2265291839.1457651712.94836>,\"/var/lib/riak/leveldb/11417981541647679048466287755595961091061972992\",[{block_cache_threshold,33554432},{block_restart_interval,16},{block_size_steps,16},{cache_object_warming,true},{compression,snappy},{create_if_missing,true},{delete_threshold,1000},{eleveldb_threads,71},{expiry_enabled,false},{expiry_minutes,0},{fadvise_willneed,false},{limited_developer_mem,false},{sst_block_size,4096},{tiered_slow_level,0},{total_leveldb_mem_percent,35},{use_bloomfilter,true},{whole_file_expiry,true},{write_buffer_size,16662690}],[{block_cache_threshold,33554432},{block_restart_interval,16},{block_size_steps,16},{cache_object_warming,true},{compression,snappy},{create_if_missing,true},{data_root,\"/var/lib/riak/leveldb\"},{delete_threshold,1000},{eleveldb_threads,71},{expiry_enabled,false},{expiry_minutes,0},{fadvise_willneed,false},{limited_developer_mem,false},{sst_block_size,4096},{sync,false},{tiered_slow_level,0},{total_leveldb_mem_percent,35},{use_bloomfilter,true},{verify_checksums,true},{verify_compaction,true},{whole_file_expiry,true},{write_buffer_size,16662690},{write_buffer_size_max,31457280},{write_buffer_size_min,15728640}],[{verify_checksums,true}],[{sync,false}],[{verify_checksums,true},{fill_cache,false}],false,false}},{<<\"bitcask_mult\">>,riak_kv_bitcask_backend,{state,#Ref<0.2265291839.1457520644.95496>,\"11417981541647679048466287755595961091061972992\",[{io_mode,erlang},{expiry_grace_time,0},{small_file_threshold,10485760},{dead_bytes_threshold,134217728},{frag_threshold,40},{dead_bytes_merge_trigger,536870912},{frag_merge_trigger,60},{max_file_size,2147483648},{open_timeout,4},{data_root,\"/var/lib/riak/bitcask\"},{sync_strategy,none},{merge_window,always},{max_fold_age,-1},{max_fold_puts,0},{expiry_secs,-1},{require_hint_crc,true},{key_transform,#Fun<riak_kv_bitcask_backend.1.43379991>},{read_write,true}],11417981541647679048466287755595961091061972992,\"/var/lib/riak/bitcask\",1}}],<<\"bitcask_mult\">>}\ny(3)     []\ny(4)     []\ny(5)     []\ny(6)     []\n\n0x00007f9d1db7dec0 Return addr 0x00007f9e6c217110 (riak_kv_vnode:actual_put/8 + 264)\ny(0)   

Another interesting thing that i noticed, that in the vnode state the compression is set to snappy, but in the config it is defined to lz4.

 {<<"leveldb_mult">>,riak_kv_eleveldb_backend,
                               {state,#Ref<0.1786542404.3883008000.67404>,
                                      "/var/lib/riak/leveldb/451010270895083322414418366346040463096947933184",
                                      [{block_cache_threshold,33554432},
                                       {block_restart_interval,16},
                                       {block_size_steps,16},
                                       {cache_object_warming,true},
                                       {compression,snappy},
                                       {create_if_missing,true},
                                       {delete_threshold,1000},
                                       {eleveldb_threads,71},
                                       {expiry_enabled,false},
                                       {expiry_minutes,0},
                                       {fadvise_willneed,false},
                                       {limited_developer_mem,false},
                                       {sst_block_size,4096},
                                       {tiered_slow_level,0},
                                       {total_leveldb_mem_percent,35},
                                       {use_bloomfilter,true},
                                       {whole_file_expiry,true},
                                       {write_buffer_size,22791803}],
                                      [{block_cache_threshold,33554432},
                                       {block_restart_interval,16},
                                       {block_size_steps,16},
                                       {cache_object_warming,true},
                                       {compression,snappy},
                                       {create_if_missing,true},
                                       {data_root,"/var/lib/riak/leveldb"},
                                       {delete_threshold,1000},
                                       {eleveldb_threads,71},
                                       {expiry_enabled,false},
                                       {expiry_minutes,0},
                                       {fadvise_willneed,false},
                                       {limited_developer_mem,false},
                                       {sst_block_size,4096},
                                       {sync,false},
                                       {tiered_slow_level,0},
                                       {total_leveldb_mem_percent,35},
                                       {use_bloomfilter,true},
                                       {verify_checksums,true},
                                       {verify_compaction,true},
                                       {whole_file_expiry,true},
                                       {write_buffer_size,22791803},
                                       {write_buffer_size_max,31457280},
                                       {write_buffer_size_min,15728640}],
                                      [{verify_checksums,true}],
                                      [{sync,false}],
                                      [{verify_checksums,true},{fill_cache,false}],
                                      true,false}},

It's not fully confirmed, but t seems that in 3.0.9 doesn't have the issue.
Are there any incompatibility between snappy versions?

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions