Skip to content

Commit 42267e4

Browse files
authored
fix: big syscall data truncated may lead to failing to parse HTTP message (#274)
fix: handle big syscall data (truncated) properly When we fail to read the body, it might be due to the response being too large, causing syscall data to be missing when transferred to user space. Here, we attempt to find a boundary. If found, that's ideal and we return immediately. Otherwise, we try to locate a Fake Data Mark (FDM). When user space detects missing data from the kernel (possibly due to exceeding MAX_MSG_SIZE or situations like readv/writev where a buffer array is read/written at once), it supplements with fake data in user space. At the beginning of this fake data, an FDM is set, which is a special string. Following the FDM, the length of the supplemental fake data (minus the length of the FDM) is written.
1 parent 927d0dc commit 42267e4

File tree

8 files changed

+238
-46
lines changed

8 files changed

+238
-46
lines changed

.github/workflows/test.yml

+17
Original file line numberDiff line numberDiff line change
@@ -161,6 +161,23 @@ jobs:
161161
sudo chmod +x /usr/local/bin/docker-compose
162162
docker-compose --version
163163
164+
- name: Test Truncated Data parsing
165+
uses: cilium/little-vm-helper@97c89f004bd0ab4caeacfe92ebc956e13e362e6b # v0.0.19
166+
if: ${{ !contains(fromJSON('["4.19-20240912.022020", "5.4-20240912.022020"]'), matrix.kernel) }}
167+
with:
168+
provision: 'false'
169+
cmd: |
170+
set -euxo pipefail
171+
uname -a
172+
cat /etc/issue
173+
pushd /host
174+
if [ -f "/var/lib/kyanos/btf/current.btf" ]; then
175+
bash /host/testdata/test_truncated_data.sh 'sudo /host/kyanos/kyanos $kyanos_log_option --btf /var/lib/kyanos/btf/current.btf'
176+
else
177+
bash /host/testdata/test_truncated_data.sh 'sudo /host/kyanos/kyanos $kyanos_log_option'
178+
fi
179+
popd
180+
164181
- name: Test Kafka
165182
uses: cilium/little-vm-helper@97c89f004bd0ab4caeacfe92ebc956e13e362e6b # v0.0.19
166183
with:

agent/conn/conntrack.go

+34-2
Original file line numberDiff line numberDiff line change
@@ -507,6 +507,29 @@ func (c *Connection4) OnSslDataEvent(data []byte, event *bpf.SslData, recordChan
507507
}
508508
}
509509
}
510+
511+
func isSyscallFunctionMultiMessage(f bpf.AgentSourceFunctionT) bool {
512+
return f == bpf.AgentSourceFunctionTKSyscallSendMMsg ||
513+
f == bpf.AgentSourceFunctionTKSyscallRecvMMsg ||
514+
f == bpf.AgentSourceFunctionTKSyscallWriteV ||
515+
f == bpf.AgentSourceFunctionTKSyscallReadV
516+
}
517+
func fillSyscallDataIfNeeded(data []byte, event *bpf.SyscallEventData, c *Connection4) []byte {
518+
519+
if event.SyscallEvent.Ke.Len > event.SyscallEvent.BufSize {
520+
common.ConntrackLog.Debugf("syscall read/write data too len and some data can't be captured, so we need to fill a fake data, len: %d, bufsize: %d", event.SyscallEvent.Ke.Len, event.SyscallEvent.BufSize)
521+
522+
fakeData, ok := protocol.MakeNewFakeData(event.SyscallEvent.Ke.Len - event.SyscallEvent.BufSize)
523+
if !ok {
524+
fakeData = make([]byte, event.SyscallEvent.Ke.Len-event.SyscallEvent.BufSize)
525+
}
526+
data = append(data, fakeData...)
527+
return data
528+
} else {
529+
return data
530+
}
531+
}
532+
510533
func (c *Connection4) OnSyscallEvent(data []byte, event *bpf.SyscallEventData, recordChannel chan RecordWithConn) bool {
511534
addedToBuffer := true
512535
if len(data) > 0 {
@@ -515,13 +538,23 @@ func (c *Connection4) OnSyscallEvent(data []byte, event *bpf.SyscallEventData, r
515538
common.ConntrackLog.Warnf("%s is ssl, but receive syscall event with data!", c.ToString())
516539
}
517540
} else {
541+
data = fillSyscallDataIfNeeded(data, event, c)
518542
addedToBuffer = c.addDataToBufferAndTryParse(data, &event.SyscallEvent.Ke)
519543
}
520544
} else if event.SyscallEvent.GetSourceFunction() == bpf.AgentSourceFunctionTKSyscallSendfile {
521545
// sendfile has no data, so we need to fill a fake data
522546
common.ConntrackLog.Debug("sendfile has no data, so we need to fill a fake data")
523-
fakeData := make([]byte, event.SyscallEvent.Ke.Len)
547+
fakeData, ok := protocol.MakeNewFakeData(event.SyscallEvent.Ke.Len)
548+
if !ok {
549+
fakeData = make([]byte, event.SyscallEvent.Ke.Len)
550+
}
524551
addedToBuffer = c.addDataToBufferAndTryParse(fakeData, &event.SyscallEvent.Ke)
552+
} else if isSyscallFunctionMultiMessage(event.SyscallEvent.GetSourceFunction()) && !c.ssl {
553+
common.ConntrackLog.Debug("syscall read/write multiple message and some data can't be captured, so we need to fill a fake data")
554+
fakeData, ok := protocol.MakeNewFakeData(event.SyscallEvent.Ke.Len)
555+
if ok {
556+
addedToBuffer = c.addDataToBufferAndTryParse(fakeData, &event.SyscallEvent.Ke)
557+
}
525558
}
526559
if !addedToBuffer {
527560
return false
@@ -531,7 +564,6 @@ func (c *Connection4) OnSyscallEvent(data []byte, event *bpf.SyscallEventData, r
531564
parser := c.GetProtocolParser(c.Protocol)
532565
if parser == nil {
533566
return true
534-
panic("no protocol parser!")
535567
}
536568

537569
records := parser.Match(c.ReqQueue, c.RespQueue)

agent/conn/processor.go

+2-2
Original file line numberDiff line numberDiff line change
@@ -309,10 +309,10 @@ func (p *Processor) run() {
309309
case event := <-p.firstPacketsEvents:
310310
p.handleFirstPacketEvent(event, recordChannel)
311311
case <-ticker.C:
312-
p.processTimedKernEvents(recordChannel)
313-
p.processTimedSyscallEvents(recordChannel)
314312
p.processTimedSslEvents(recordChannel)
313+
p.processTimedKernEvents(recordChannel)
315314
p.processOldFirstPacketEvents(recordChannel)
315+
p.processTimedSyscallEvents(recordChannel)
316316
}
317317
}
318318
}

agent/protocol/fakedata.go

+42
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
package protocol
2+
3+
import "strings"
4+
5+
const fakeDataMarkPrefix = "__FAKE_DATA__"
6+
const fakeDataMarkLen = len(fakeDataMarkPrefix) + 4
7+
8+
func MakeNewFakeData(size uint32) ([]byte, bool) {
9+
if size < uint32(fakeDataMarkLen) {
10+
return nil, false
11+
} else {
12+
buf := make([]byte, size)
13+
copy(buf, fakeDataMarkPrefix)
14+
size -= uint32(fakeDataMarkLen)
15+
lenByte := []byte{byte(size >> 24), byte(size >> 16), byte(size >> 8), byte(size)}
16+
buf[fakeDataMarkLen] = lenByte[0]
17+
buf[fakeDataMarkLen+1] = lenByte[1]
18+
buf[fakeDataMarkLen+2] = lenByte[2]
19+
buf[fakeDataMarkLen+3] = lenByte[3]
20+
return buf, true
21+
}
22+
}
23+
24+
func fakeDataMarkIndex(buf []byte) (int, bool) {
25+
if len(buf) < fakeDataMarkLen {
26+
return -1, false
27+
}
28+
idx := strings.Index(string(buf), fakeDataMarkPrefix)
29+
if idx < 0 {
30+
return -1, false
31+
}
32+
return idx, true
33+
}
34+
35+
func getFakeDataSize(buf []byte, pos int) uint32 {
36+
buf = buf[pos:]
37+
if len(buf) < fakeDataMarkLen {
38+
return 0
39+
} else {
40+
return uint32(buf[fakeDataMarkLen])<<24 | uint32(buf[fakeDataMarkLen+1])<<16 | uint32(buf[fakeDataMarkLen+2])<<8 | uint32(buf[fakeDataMarkLen+3])
41+
}
42+
}

agent/protocol/http.go

+68-33
Original file line numberDiff line numberDiff line change
@@ -127,55 +127,90 @@ func (h *HTTPStreamParser) ParseRequest(buf string, messageType MessageType, tim
127127
}
128128
}
129129

130-
func (h HTTPStreamParser) ParseResponse(buf string, messageType MessageType, timestamp uint64, seq uint64) ParseResult {
130+
func (h *HTTPStreamParser) ParseResponse(buf string, messageType MessageType, timestamp uint64, seq uint64, streamBuffer *buffer.StreamBuffer) ParseResult {
131131
reader := strings.NewReader(buf)
132132
bufioReader := bufio.NewReader(reader)
133133
resp, err := http.ReadResponse(bufioReader, nil)
134134
parseResult := ParseResult{}
135135
if err != nil {
136-
if err == io.EOF || err == io.ErrUnexpectedEOF {
137-
return ParseResult{
138-
ParseState: NeedsMoreData,
139-
}
140-
} else {
141-
return ParseResult{
142-
ParseState: Invalid,
143-
}
136+
return h.handleReadResponseError(err, buf, streamBuffer, messageType, timestamp, seq)
137+
}
138+
139+
respBody, err := io.ReadAll(resp.Body)
140+
if err != nil {
141+
return h.handleReadBodyError(err, buf, streamBuffer, messageType, timestamp, seq)
142+
}
143+
144+
readIndex := common.GetBufioReaderReadIndex(bufioReader)
145+
if readIndex == 0 && len(respBody) > 0 {
146+
readIndex = len(buf)
147+
} else if readIndex == 0 {
148+
return ParseResult{
149+
ParseState: NeedsMoreData,
144150
}
145-
} else {
146-
respBody, err := io.ReadAll(resp.Body)
147-
if err != nil {
148-
if err == io.EOF || err == io.ErrUnexpectedEOF {
149-
return ParseResult{
150-
ParseState: NeedsMoreData,
151-
}
152-
} else {
153-
return ParseResult{
154-
ParseState: Invalid,
155-
}
156-
}
151+
}
152+
parseResult.ReadBytes = readIndex
153+
parseResult.ParsedMessages = []ParsedMessage{
154+
&ParsedHttpResponse{
155+
FrameBase: NewFrameBase(timestamp, readIndex, seq),
156+
buf: []byte(buf[:readIndex]),
157+
},
158+
}
159+
parseResult.ParseState = Success
160+
return parseResult
161+
}
162+
163+
func (h *HTTPStreamParser) handleReadResponseError(err error, buf string, streamBuffer *buffer.StreamBuffer, messageType MessageType, timestamp uint64, seq uint64) ParseResult {
164+
if err == io.EOF || err == io.ErrUnexpectedEOF {
165+
return ParseResult{
166+
ParseState: NeedsMoreData,
157167
}
158-
readIndex := common.GetBufioReaderReadIndex(bufioReader)
159-
if readIndex == 0 && len(respBody) > 0 {
160-
readIndex = len(buf)
161-
} else if readIndex == 0 {
162-
return ParseResult{
163-
ParseState: NeedsMoreData,
164-
}
168+
} else {
169+
return ParseResult{
170+
ParseState: Invalid,
165171
}
166-
parseResult.ReadBytes = readIndex
172+
}
173+
}
174+
175+
func (h *HTTPStreamParser) handleReadBodyError(err error, buf string, streamBuffer *buffer.StreamBuffer, messageType MessageType, timestamp uint64, seq uint64) ParseResult {
176+
parseResult := ParseResult{}
177+
boundary := h.FindBoundary(streamBuffer, messageType, 0)
178+
if boundary > 0 {
179+
parseResult.ReadBytes = boundary
167180
parseResult.ParsedMessages = []ParsedMessage{
168181
&ParsedHttpResponse{
169-
FrameBase: NewFrameBase(timestamp, readIndex, seq),
170-
buf: []byte(buf[:readIndex]),
182+
FrameBase: NewFrameBase(timestamp, boundary, seq),
183+
buf: []byte(buf[:boundary]),
171184
},
172185
}
173186
parseResult.ParseState = Success
174187
return parseResult
188+
} else if fakeDataIdx, _ := fakeDataMarkIndex([]byte(buf)); fakeDataIdx != -1 {
189+
fakeDataSize := getFakeDataSize([]byte(buf), fakeDataIdx)
190+
if len(buf) >= fakeDataIdx+int(fakeDataSize)+fakeDataMarkLen {
191+
parseResult.ReadBytes = fakeDataIdx + int(fakeDataSize) + fakeDataMarkLen
192+
parseResult.ParsedMessages = []ParsedMessage{
193+
&ParsedHttpResponse{
194+
FrameBase: NewFrameBase(timestamp, parseResult.ReadBytes, seq),
195+
buf: []byte(buf[:parseResult.ReadBytes]),
196+
},
197+
}
198+
parseResult.ParseState = Success
199+
return parseResult
200+
}
201+
}
202+
if err == io.EOF || err == io.ErrUnexpectedEOF {
203+
return ParseResult{
204+
ParseState: NeedsMoreData,
205+
}
206+
} else {
207+
return ParseResult{
208+
ParseState: Invalid,
209+
}
175210
}
176211
}
177212

178-
func (h HTTPStreamParser) ParseStream(streamBuffer *buffer.StreamBuffer, messageType MessageType) ParseResult {
213+
func (h *HTTPStreamParser) ParseStream(streamBuffer *buffer.StreamBuffer, messageType MessageType) ParseResult {
179214
head := streamBuffer.Head()
180215
buf := string(head.Buffer())
181216
ts, ok := streamBuffer.FindTimestampBySeq(head.LeftBoundary())
@@ -188,7 +223,7 @@ func (h HTTPStreamParser) ParseStream(streamBuffer *buffer.StreamBuffer, message
188223
case Request:
189224
return h.ParseRequest(buf, messageType, ts, head.LeftBoundary())
190225
case Response:
191-
return h.ParseResponse(buf, messageType, ts, head.LeftBoundary())
226+
return h.ParseResponse(buf, messageType, ts, head.LeftBoundary(), streamBuffer)
192227
default:
193228
panic("messageType invalid")
194229
}

agent/protocol/http_test.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -153,7 +153,7 @@ func TestParseResponse(t *testing.T) {
153153

154154
parser := protocol.HTTPStreamParser{}
155155

156-
parseResult := parser.ParseResponse(httpMessage, protocol.Response, 10, 20)
156+
parseResult := parser.ParseResponse(httpMessage, protocol.Response, 10, 20, buffer)
157157

158158
assert.Equal(t, protocol.Success, parseResult.ParseState)
159159
assert.Equal(t, 1, len(parseResult.ParsedMessages))

bpf/data_common.h

+5-8
Original file line numberDiff line numberDiff line change
@@ -124,14 +124,6 @@ static bool __always_inline report_conn_evt(void* ctx, struct conn_info_t *conn_
124124
}
125125

126126
static __inline bool should_trace_conn(struct conn_info_t *conn_info) {
127-
// conn_info->laddr.in4.sin_port
128-
// bpf_printk("conn_info->laddr.in4.sin_port: %d, %d",
129-
// conn_info->laddr.in4.sin_port,conn_info->raddr.in4.sin_port);
130-
// if (conn_info->laddr.in4.sin_port == target_port ||
131-
// conn_info->raddr.in4.sin_port == target_port) {
132-
// return true;
133-
// }
134-
135127
return conn_info->protocol != kProtocolUnknown && conn_info->no_trace <= traceable ;
136128
}
137129

@@ -205,6 +197,8 @@ static void __always_inline report_syscall_buf(void* ctx, uint64_t seq, struct c
205197
evt->buf_size = amount_copied;
206198
size_t __len = sizeof(struct kern_evt) + sizeof(uint32_t) + amount_copied;
207199
bpf_perf_event_output(ctx, &syscall_rb, BPF_F_CURRENT_CPU, evt, __len);
200+
201+
// bpf_printk("len:%d, amount_copied:%d", len, amount_copied);
208202
}
209203
static void __always_inline report_syscall_evt(void* ctx, uint64_t seq, struct conn_id_s_t *conn_id_s, uint32_t len, enum step_t step, struct data_args *args, bool prepend_length_header, uint32_t length_header) {
210204
report_syscall_buf(ctx, seq, conn_id_s, len, step, args->start_ts, args->end_ts - args->start_ts, args->buf, args->source_fn, prepend_length_header, length_header);
@@ -266,6 +260,9 @@ static void __always_inline report_syscall_evt_vecs(void* ctx, uint64_t seq, str
266260
bytes_sent += iov_size;
267261
seq += iov_size;
268262
}
263+
if (bytes_sent < total_size) {
264+
report_syscall_buf_without_data(ctx, seq, conn_id_s, total_size - bytes_sent, step, args->start_ts, args->end_ts - args->start_ts, args->source_fn);
265+
}
269266
}
270267

271268

testdata/test_truncated_data.sh

+69
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
#!/usr/bin/env bash
2+
. $(dirname "$0")/common.sh
3+
set -ex
4+
5+
CMD="$1"
6+
DOCKER_REGISTRY="$2"
7+
FILE_PREFIX="/tmp/kyanos"
8+
CLIENT_LNAME="${FILE_PREFIX}_truncated_client.log"
9+
SERVER_LNAME="${FILE_PREFIX}_truncated_server.log"
10+
11+
12+
function test_client() {
13+
if [ -z "$DOCKER_REGISTRY" ]; then
14+
IMAGE_NAME="lobehub/lobe-chat:v1.46.7"
15+
else
16+
IMAGE_NAME=$DOCKER_REGISTRY"/lobehub/lobe-chat:v1.46.7"
17+
fi
18+
docker pull "$IMAGE_NAME"
19+
20+
cname='lobe'
21+
port=3210
22+
docker rm -f $cname
23+
cid1=$(docker run --name $cname -p $port:$port -d "$IMAGE_NAME")
24+
export cid1
25+
echo $cid1
26+
27+
timeout 30 ${CMD} watch --debug-output http --remote-ports $port 2>&1 | tee "${CLIENT_LNAME}" &
28+
sleep 15
29+
curl http://localhost:3210
30+
wait
31+
32+
cat "${CLIENT_LNAME}"
33+
docker rm -f $cid1 || true
34+
check_patterns_in_file "${CLIENT_LNAME}" "localhost:3210"
35+
}
36+
37+
38+
function test_server() {
39+
if [ -z "$DOCKER_REGISTRY" ]; then
40+
IMAGE_NAME="lobehub/lobe-chat:v1.46.7"
41+
else
42+
IMAGE_NAME=$DOCKER_REGISTRY"/lobehub/lobe-chat:v1.46.7"
43+
fi
44+
docker pull "$IMAGE_NAME"
45+
46+
cname='lobe'
47+
port=3210
48+
docker rm -f $cname
49+
cid1=$(docker run --name $cname -p $port:$port -d "$IMAGE_NAME")
50+
export cid1
51+
echo $cid1
52+
53+
timeout 30 ${CMD} watch --debug-output http --local-ports $port 2>&1 | tee "${SERVER_LNAME}" &
54+
sleep 15
55+
curl http://localhost:3210
56+
wait
57+
58+
cat "${SERVER_LNAME}"
59+
docker rm -f $cid1 || true
60+
check_patterns_in_file "${SERVER_LNAME}" "localhost:3210"
61+
}
62+
63+
64+
function main() {
65+
test_client
66+
test_server
67+
}
68+
69+
main

0 commit comments

Comments
 (0)