Skip to content

SIGSEGV on 1.9.4 with stack trace #5753

@PettitWesley

Description

@PettitWesley

Bug Report

Describe the bug
SIGSEGV and crash/pod restart. We got a stack trace from Valgrind.

{"log":"[2022/07/19 06:35:12] [debug] [input:tail:tail.1] inode=54654517 events: IN_MODIFY \n","stream":"stderr","time":"2022-07-19T06:35:12.801792673Z"}
{"log":"[2022/07/19 06:35:12] [debug] [input:tail:tail.1] inode=54654517 events: IN_MODIFY \n","stream":"stderr","time":"2022-07-19T06:35:12.808897828Z"}
{"log":"[2022/07/19 06:35:12] [debug] [input:tail:tail.1] inode=54654517 events: IN_MODIFY \n","stream":"stderr","time":"2022-07-19T06:35:13.044681592Z"}
{"log":"[2022/07/19 06:35:13] [debug] [input:tail:tail.0] inode=141645686 events: IN_MODIFY \n","stream":"stderr","time":"2022-07-19T06:35:13.04887481Z"}
{"log":"==1== Invalid read of size 8\n","stream":"stderr","time":"2022-07-19T06:35:13.05575357Z"}
{"log":"==1==    at 0x5200A1: input_chunk_get (flb_input_chunk.c:1123)\n","stream":"stderr","time":"2022-07-19T06:35:13.055773607Z"}
{"log":"==1==    by 0x520922: input_chunk_append_raw (flb_input_chunk.c:1408)\n","stream":"stderr","time":"2022-07-19T06:35:13.055798924Z"}
{"log":"==1==    by 0x520F34: flb_input_chunk_append_raw2 (flb_input_chunk.c:1600)\n","stream":"stderr","time":"2022-07-19T06:35:13.055802456Z"}
{"log":"==1==    by 0x551B95: process_content (tail_file.c:536)\n","stream":"stderr","time":"2022-07-19T06:35:13.055814303Z"}
{"log":"==1==    by 0x553EF5: flb_tail_file_chunk (tail_file.c:1315)\n","stream":"stderr","time":"2022-07-19T06:35:13.055817402Z"}
{"log":"==1==    by 0x54B79F: in_tail_collect_event (tail.c:310)\n","stream":"stderr","time":"2022-07-19T06:35:13.055820549Z"}
{"log":"==1==    by 0x54CBCF: tail_fs_event (tail_fs_inotify.c:267)\n","stream":"stderr","time":"2022-07-19T06:35:13.055823481Z"}
{"log":"==1==    by 0x4E0D74: flb_input_collector_fd (flb_input.c:1210)\n","stream":"stderr","time":"2022-07-19T06:35:13.055826561Z"}
{"log":"==1==    by 0x4F64B8: flb_engine_handle_event (flb_engine.c:440)\n","stream":"stderr","time":"2022-07-19T06:35:13.055829567Z"}
{"log":"==1==    by 0x4F64B8: flb_engine_start (flb_engine.c:763)\n","stream":"stderr","time":"2022-07-19T06:35:13.055832418Z"}
{"log":"==1==    by 0x4D3A1D: flb_lib_worker (flb_lib.c:626)\n","stream":"stderr","time":"2022-07-19T06:35:13.055835187Z"}
{"log":"==1==    by 0x4E4444A: start_thread (in /usr/lib64/libpthread-2.26.so)\n","stream":"stderr","time":"2022-07-19T06:35:13.055837982Z"}
{"log":"==1==    by 0x686C56E: clone (in /usr/lib64/libc-2.26.so)\n","stream":"stderr","time":"2022-07-19T06:35:13.055840961Z"}
{"log":"==1==  Address 0xccb5bf0 is 32 bytes inside a block of size 56 free'd\n","stream":"stderr","time":"2022-07-19T06:35:13.055843677Z"}
{"log":"==1==    at 0x4C2F94F: free (vg_replace_malloc.c:872)\n","stream":"stderr","time":"2022-07-19T06:35:13.055846689Z"}
{"log":"==1==    by 0xA2EE02: msgpack_zone_free (zone.c:221)\n","stream":"stderr","time":"2022-07-19T06:35:13.055849549Z"}
{"log":"==1==    by 0xA2C3F9: msgpack_unpacked_destroy (unpack.h:260)\n","stream":"stderr","time":"2022-07-19T06:35:13.055852243Z"}
{"log":"==1==    by 0xA2E763: msgpack_unpack_next (unpack.c:661)\n","stream":"stderr","time":"2022-07-19T06:35:13.055868284Z"}
{"log":"==1==    by 0x60B8C3: cb_kube_filter (kubernetes.c:487)\n","stream":"stderr","time":"2022-07-19T06:35:13.055871266Z"}
{"log":"==1==    by 0x4E14B4: flb_filter_do (flb_filter.c:124)\n","stream":"stderr","time":"2022-07-19T06:35:13.055874083Z"}
{"log":"==1==    by 0x520C09: input_chunk_append_raw (flb_input_chunk.c:1478)\n","stream":"stderr","time":"2022-07-19T06:35:13.055876913Z"}
{"log":"==1==    by 0x520F34: flb_input_chunk_append_raw2 (flb_input_chunk.c:1600)\n","stream":"stderr","time":"2022-07-19T06:35:13.05587978Z"}
{"log":"==1==    by 0x551B95: process_content (tail_file.c:536)\n","stream":"stderr","time":"2022-07-19T06:35:13.055882587Z"}
{"log":"==1==    by 0x553EF5: flb_tail_file_chunk (tail_file.c:1315)\n","stream":"stderr","time":"2022-07-19T06:35:13.055885484Z"}
{"log":"==1==    by 0x54B79F: in_tail_collect_event (tail.c:310)\n","stream":"stderr","time":"2022-07-19T06:35:13.055888358Z"}
{"log":"==1==    by 0x54CBCF: tail_fs_event (tail_fs_inotify.c:267)\n","stream":"stderr","time":"2022-07-19T06:35:13.055891369Z"}
{"log":"==1==  Block was alloc'd at\n","stream":"stderr","time":"2022-07-19T06:35:13.055894462Z"}
{"log":"==1==    at 0x4C2D065: malloc (vg_replace_malloc.c:381)\n","stream":"stderr","time":"2022-07-19T06:35:13.055897358Z"}
{"log":"==1==    by 0xA2ED76: msgpack_zone_new (zone.c:199)\n","stream":"stderr","time":"2022-07-19T06:35:13.055900093Z"}
{"log":"==1==    by 0xA2C77A: template_callback_array (unpack.c:208)\n","stream":"stderr","time":"2022-07-19T06:35:13.055903139Z"}
{"log":"==1==    by 0xA2CFD1: template_execute (unpack_template.h:231)\n","stream":"stderr","time":"2022-07-19T06:35:13.055905929Z"}
{"log":"==1==    by 0xA2E7D7: msgpack_unpack_next (unpack.c:677)\n","stream":"stderr","time":"2022-07-19T06:35:13.055908837Z"}
{"log":"==1==    by 0x60B8C3: cb_kube_filter (kubernetes.c:487)\n","stream":"stderr","time":"2022-07-19T06:35:13.055911465Z"}
{"log":"==1==    by 0x4E14B4: flb_filter_do (flb_filter.c:124)\n","stream":"stderr","time":"2022-07-19T06:35:13.055915039Z"}
{"log":"==1==    by 0x520C09: input_chunk_append_raw (flb_input_chunk.c:1478)\n","stream":"stderr","time":"2022-07-19T06:35:13.055917886Z"}
{"log":"==1==    by 0x520F34: flb_input_chunk_append_raw2 (flb_input_chunk.c:1600)\n","stream":"stderr","time":"2022-07-19T06:35:13.055923108Z"}
{"log":"==1==    by 0x551B95: process_content (tail_file.c:536)\n","stream":"stderr","time":"2022-07-19T06:35:13.055925973Z"}
{"log":"==1==    by 0x553EF5: flb_tail_file_chunk (tail_file.c:1315)\n","stream":"stderr","time":"2022-07-19T06:35:13.055928806Z"}
{"log":"==1==    by 0x54B79F: in_tail_collect_event (tail.c:310)\n","stream":"stderr","time":"2022-07-19T06:35:13.055944328Z"}
{"log":"==1== \n","stream":"stderr","time":"2022-07-19T06:35:13.055947224Z"}
{"log":"==1== Invalid read of size 4\n","stream":"stderr","time":"2022-07-19T06:35:13.144393339Z"}
{"log":"==1==    at 0x85082F: cio_chunk_is_locked (cio_chunk.c:375)\n","stream":"stderr","time":"2022-07-19T06:35:13.144453424Z"}
{"log":"==1==    by 0x5200AC: input_chunk_get (flb_input_chunk.c:1123)\n","stream":"stderr","time":"2022-07-19T06:35:13.144457459Z"}
{"log":"==1==    by 0x520922: input_chunk_append_raw (flb_input_chunk.c:1408)\n","stream":"stderr","time":"2022-07-19T06:35:13.144460659Z"}
{"log":"==1==    by 0x520F34: flb_input_chunk_append_raw2 (flb_input_chunk.c:1600)\n","stream":"stderr","time":"2022-07-19T06:35:13.144463794Z"}
{"log":"==1==    by 0x551B95: process_content (tail_file.c:536)\n","stream":"stderr","time":"2022-07-19T06:35:13.144466771Z"}
{"log":"==1==    by 0x553EF5: flb_tail_file_chunk (tail_file.c:1315)\n","stream":"stderr","time":"2022-07-19T06:35:13.1444699Z"}
{"log":"==1==    by 0x54B79F: in_tail_collect_event (tail.c:310)\n","stream":"stderr","time":"2022-07-19T06:35:13.144473148Z"}
{"log":"==1==    by 0x54CBCF: tail_fs_event (tail_fs_inotify.c:267)\n","stream":"stderr","time":"2022-07-19T06:35:13.14447631Z"}
{"log":"==1==    by 0x4E0D74: flb_input_collector_fd (flb_input.c:1210)\n","stream":"stderr","time":"2022-07-19T06:35:13.144479381Z"}
{"log":"==1==    by 0x4F64B8: flb_engine_handle_event (flb_engine.c:440)\n","stream":"stderr","time":"2022-07-19T06:35:13.144482721Z"}
{"log":"==1==    by 0x4F64B8: flb_engine_start (flb_engine.c:763)\n","stream":"stderr","time":"2022-07-19T06:35:13.144485806Z"}
{"log":"==1==    by 0x4D3A1D: flb_lib_worker (flb_lib.c:626)\n","stream":"stderr","time":"2022-07-19T06:35:13.144488989Z"}
{"log":"==1==    by 0x4E4444A: start_thread (in /usr/lib64/libpthread-2.26.so)\n","stream":"stderr","time":"2022-07-19T06:35:13.144492209Z"}
{"log":"==1==  Address 0x0 is not stack'd, malloc'd or (recently) free'd\n","stream":"stderr","time":"2022-07-19T06:35:13.144495562Z"}
{"log":"==1== \n","stream":"stderr","time":"2022-07-19T06:35:13.144498589Z"}
{"log":"[2022/07/19 06:35:13] [engine] caught signal (SIGSEGV)\n","stream":"stderr","time":"2022-07-19T06:35:13.146001201Z"}
{"log":"[2022/07/19 06:35:18] [debug] [upstream] KA connection #23 to kinesis.ap-south-1.amazonaws.com:443 has been disconnected by the remote service\n","stream":"stderr","time":"2022-07-19T06:35:18.949858473Z"}
{"log":"[2022/07/19 06:35:19] [debug] [socket] could not validate socket status for #23 (don't worry)\n","stream":"stderr","time":"2022-07-19T06:35:19.347963492Z"}
{"log":"#0  0x85082f            in  cio_chunk_is_locked() at lib/chunkio/src/cio_chunk.c:375\n","stream":"stderr","time":"2022-07-19T06:35:21.449837408Z"}
{"log":"#1  0x5200ac            in  input_chunk_get() at src/flb_input_chunk.c:1123\n","stream":"stderr","time":"2022-07-19T06:35:21.551075087Z"}
{"log":"#2  0x520922            in  input_chunk_append_raw() at src/flb_input_chunk.c:1408\n","stream":"stderr","time":"2022-07-19T06:35:21.642916468Z"}
{"log":"#3  0x520f34            in  flb_input_chunk_append_raw2() at src/flb_input_chunk.c:1600\n","stream":"stderr","time":"2022-07-19T06:35:21.642980033Z"}
{"log":"#4  0x551b95            in  process_content() at plugins/in_tail/tail_file.c:536\n","stream":"stderr","time":"2022-07-19T06:35:21.650688891Z"}
{"log":"#5  0x553ef5            in  flb_tail_file_chunk() at plugins/in_tail/tail_file.c:1315\n","stream":"stderr","time":"2022-07-19T06:35:21.650775559Z"}
{"log":"#6  0x54b79f            in  in_tail_collect_event() at plugins/in_tail/tail.c:310\n","stream":"stderr","time":"2022-07-19T06:35:21.745962807Z"}
{"log":"#7  0x54cbcf            in  tail_fs_event() at plugins/in_tail/tail_fs_inotify.c:267\n","stream":"stderr","time":"2022-07-19T06:35:21.750300756Z"}
{"log":"#8  0x4e0d74            in  flb_input_collector_fd() at src/flb_input.c:1210\n","stream":"stderr","time":"2022-07-19T06:35:21.851347422Z"}
{"log":"#9  0x4f64b8            in  flb_engine_handle_event() at src/flb_engine.c:440\n","stream":"stderr","time":"2022-07-19T06:35:21.946636774Z"}
{"log":"#10 0x4f64b8            in  flb_engine_start() at src/flb_engine.c:763\n","stream":"stderr","time":"2022-07-19T06:35:21.946839223Z"}
{"log":"#11 0x4d3a1d            in  flb_lib_worker() at src/flb_lib.c:626\n","stream":"stderr","time":"2022-07-19T06:35:21.951180436Z"}
{"log":"#12 0x4e4444a           in  ???() at ???:0\n","stream":"stderr","time":"2022-07-19T06:35:21.9522142Z"}
{"log":"#13 0x686c56e           in  ???() at ???:0\n","stream":"stderr","time":"2022-07-19T06:35:22.043030087Z"}
{"log":"#14 0xffffffffffffffff  in  ???() at ???:0\n","stream":"stderr","time":"2022-07-19T06:35:22.043334147Z"}
{"log":"==1== \n","stream":"stderr","time":"2022-07-19T06:35:22.045334382Z"}
{"log":"==1== Process terminating with default action of signal 6 (SIGABRT): dumping core\n","stream":"stderr","time":"2022-07-19T06:35:22.04534848Z"}
{"log":"==1==    at 0x67B0CA0: raise (in /usr/lib64/libc-2.26.so)\n","stream":"stderr","time":"2022-07-19T06:35:22.045451907Z"}
{"log":"==1==    by 0x67B2147: abort (in /usr/lib64/libc-2.26.so)\n","stream":"stderr","time":"2022-07-19T06:35:22.045458085Z"}
{"log":"==1==    by 0x455432: flb_signal_handler (fluent-bit.c:581)\n","stream":"stderr","time":"2022-07-19T06:35:22.045461622Z"}
{"log":"==1==    by 0x67B0D0F: ??? (in /usr/lib64/libc-2.26.so)\n","stream":"stderr","time":"2022-07-19T06:35:22.04546512Z"}
{"log":"==1==    by 0x85082E: cio_chunk_is_locked (cio_chunk.c:375)\n","stream":"stderr","time":"2022-07-19T06:35:22.045468677Z"}
{"log":"==1==    by 0x5200AC: input_chunk_get (flb_input_chunk.c:1123)\n","stream":"stderr","time":"2022-07-19T06:35:22.045472835Z"}
{"log":"==1==    by 0x520922: input_chunk_append_raw (flb_input_chunk.c:1408)\n","stream":"stderr","time":"2022-07-19T06:35:22.045476539Z"}
{"log":"==1==    by 0x520F34: flb_input_chunk_append_raw2 (flb_input_chunk.c:1600)\n","stream":"stderr","time":"2022-07-19T06:35:22.045480246Z"}
{"log":"==1==    by 0x551B95: process_content (tail_file.c:536)\n","stream":"stderr","time":"2022-07-19T06:35:22.04548396Z"}
{"log":"==1==    by 0x553EF5: flb_tail_file_chunk (tail_file.c:1315)\n","stream":"stderr","time":"2022-07-19T06:35:22.045487447Z"}
{"log":"==1==    by 0x54B79F: in_tail_collect_event (tail.c:310)\n","stream":"stderr","time":"2022-07-19T06:35:22.045495495Z"}
{"log":"==1==    by 0x54CBCF: tail_fs_event (tail_fs_inotify.c:267)\n","stream":"stderr","time":"2022-07-19T06:35:22.045499104Z"}
{"log":"==1== \n","stream":"stderr","time":"2022-07-19T06:35:23.145947435Z"}
{"log":"==1== HEAP SUMMARY:\n","stream":"stderr","time":"2022-07-19T06:35:23.147551496Z"}
{"log":"==1==     in use at exit: 14,800,062 bytes in 49,247 blocks\n","stream":"stderr","time":"2022-07-19T06:35:23.147563702Z"}
{"log":"==1==   total heap usage: 726,057 allocs, 676,810 frees, 3,613,374,251 bytes allocated\n","stream":"stderr","time":"2022-07-19T06:35:23.147567117Z"}
{"log":"==1== \n","stream":"stderr","time":"2022-07-19T06:35:23.147570439Z"}
{"log":"==1== Thread 1:\n","stream":"stderr","time":"2022-07-19T06:35:25.347256352Z"}
{"log":"==1== 1 bytes in 1 blocks are possibly lost in loss record 2 of 1,985\n","stream":"stderr","time":"2022-07-19T06:35:25.348750799Z"}

Configuration

apiVersion: v1
kind: ConfigMap
metadata:
  name: fluent-bit-config
  namespace: amazon-cloudwatch
  labels:
    k8s-app: fluent-bit-template
data:
  # Configuration files: server, input, filters and output
  # ======================================================
  uniform-time-format.lua: |
    function cb_print(tag, timestamp, record)
        record['time'] = string.format(
            '%s.%sZ',
            os.date('%Y-%m-%dT%H:%M:%S', timestamp['sec']),
            string.sub(string.format('%06d', timestamp['nsec']), 1, 6)
        )
        return 2, timestamp, record
    end
    
  fluent-bit.conf: |
    [SERVICE]
        Flush                       5
        Daemon                      off
        Log_level                   Debug
        Http_server                 On
        Http_listen                 0.0.0.0
        Http_port                   2022
        Parsers_File                parsers.conf
        storage.path                /var/fluent-bit/state/flb-storage/
        storage.sync                normal
        storage.checksum            off
        storage.backlog.mem_limit   5M
    
    [INPUT]
        Name                tail
        Tag                 kube.var.log.containers.*
        Exclude_Path        /var/log/containers/cloudwatch-agent*, /var/log/containers/fluent-bit*, /var/log/containers/aws-node*, /var/log/containers/kube-proxy*
        Path                /var/log/containers/test*default*
        Path_Key            file_name
        Parser              docker
        DB                  /var/fluent-bit/state/flb_container-5aa94922-a0e1-4aaa-9adc-1a3ef6957fb7.cc48429c-2035-471d-a7d4-e033187373b5.db
        DB.locking          True
        Docker_Mode         On
        
        Mem_Buf_Limit       50MB
        Refresh_Interval    10
        Rotate_Wait         30
        storage.type        filesystem
        Read_from_Head      True

    [OUTPUT]
        Name                kinesis_streams
        Match               *
        Region              us-east-1
        Stream              words
        Retry_Limit         False



    [FILTER]
        Name                parser
        Match               kube.var.log.containers.*
        Key_Name            log
        Parser              json_5aa94922-a0e1-4aaa-9adc-1a3ef6957fb7

    [FILTER]
        Name                kubernetes
        Match               kube.var.log.containers.*

        Kube_URL            https://kubernetes.default.svc:443
        Kube_CA_File        /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
        Kube_Token_File     /var/run/secrets/kubernetes.io/serviceaccount/token

        Merge_Log           On
        Merge_Log_Trim      On
        Merge_Log_Key       log_processed

        Buffer_Size         512k
        Use_Kubelet         True
        Kubelet_Port        10250

    [INPUT]
        Name                tail
        Tag                 kube.var.log.containers.*
        Exclude_Path        /var/log/containers/cloudwatch-agent*, /var/log/containers/fluent-bit*, /var/log/containers/aws-node*, /var/log/containers/kube-proxy*
        Path                /var/log/containers/*.log
        Path_Key            file_name
        Parser              docker
        DB                  /var/fluent-bit/state/flb_container-a9622cf2-c350-4796-ad90-7d74963ed565.2a1cd39a-b9b1-40ca-a10d-3d2a74849ce2.db
        DB.locking          True
        Docker_Mode         On
        Mem_Buf_Limit       50MB
        Refresh_Interval    10
        Rotate_Wait         30
        storage.type        filesystem
        Read_from_Head      True

    [OUTPUT]
        Name                kinesis_streams
        Match               *
        Region              us-west-2
        Stream              rayhan-kinesis-stream
        Retry_Limit         False



    [FILTER]
        Name                parser
        Match               kube.var.log.containers.*
        Key_Name            log
        Parser              json_a9622cf2-c350-4796-ad90-7d74963ed565

    [FILTER]
        Name                kubernetes
        Match               kube.var.log.containers.*
        Kube_URL            https://kubernetes.default.svc:443
        Kube_CA_File        /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
        Kube_Token_File     /var/run/secrets/kubernetes.io/serviceaccount/token

        Merge_Log           On
        Merge_Log_Trim      On
        Merge_Log_Key       log_processed

        Buffer_Size         512k
        Use_Kubelet         True
        Kubelet_Port        10250


    [FILTER]
        Name                modify
        Match               *
        Set                 cluster test

    [FILTER]
        Name                lua
        Match               *
        time_as_table       on
        script              uniform-time-format.lua
        call                cb_print
    

  parsers.conf: |
    [PARSER]
        Name   json
        Format json
        Time_Key time
        Time_Format %Y-%m-%dT%H:%M:%S.%LZ

    [PARSER]
        Name         docker
        Format       json
        Time_Key     container_log_time
        Time_Format  %Y-%m-%dT%H:%M:%S.%LZ
        Time_Keep    On

    [PARSER]
        Name        cri_regex
        Format      regex
        Regex       ^(?<container_log_time>[^ ]+) (?<stream>stdout|stderr) (?<logtag>[^ ]*) (?<message>.*)$      
        Time_Key    container_log_time
        Time_Format %Y-%m-%dT%H:%M:%S.%LZ
        Time_Keep    On        


    [PARSER]
        Name        json_5aa94922-a0e1-4aaa-9adc-1a3ef6957fb7
        Format      json
        
        Time_Key    time
        Time_Format ""

    [PARSER]
        Name        json_a9622cf2-c350-4796-ad90-7d74963ed565
        Format      json
        
        Time_Key    time
        Time_Format ""

Your Environment
Amazon EKS on Amazon EC2 on Amazon Linux 2

Additional context

Metadata

Metadata

Assignees

No one assigned

    Labels

    Type

    No type

    Projects

    No projects

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions