Fix counting of escape sequences when splitting TXT strings

janik-cloudflare · janik-cloudflare · commit d3869e0b6e98 · 2024-03-01T13:51:24.000+01:00
`endingToTxtSlice`, used by TXT, SPF and a few other types, parses a
string such as `"hello world"` from an RR's content in a zone file.
These strings are limited to 255 characters, and `endingToTxtSlice`
automatically splits them if they're longer than that. However, it
didn't count the length correctly: escape sequences such as `\\` or
`\123` were counted as multiple characters (2 and 4 respectively in
these examples), but they should only count as one character because
they represent a single byte in wire format (which is where this 255
character limit comes from). This commit fixes that.
diff --git a/parse_test.go b/parse_test.go
@@ -1098,18 +1098,21 @@ func TestTXT(t *testing.T) {
 		}
 	}
 
-	// Test TXT record with chunk larger than 255 bytes, they should be split up, by the parser
-	s := ""
-	for i := 0; i < 255; i++ {
-		s += "a"
-	}
-	s += "b"
+	// Test TXT record with chunk larger than 255 bytes; they should be split
+	// up by the parser. Add some escape sequences too to ensure their length
+	// is counted correctly.
+	s := `\;\\\120` + strings.Repeat("a", 255) + "b"
 	rr, err = NewRR(`test.local. 60 IN TXT "` + s + `"`)
 	if err != nil {
 		t.Error("failed to parse empty-string TXT record", err)
 	}
-	if rr.(*TXT).Txt[1] != "b" {
-		t.Errorf("Txt should have two chunk, last one my be 'b', but is %s", rr.(*TXT).Txt[1])
+	if rr.(*TXT).Txt[1] != "aaab" {
+		t.Errorf("Txt should have two chunks, last one must be 'aaab', but is %s", rr.(*TXT).Txt[1])
+	}
+	rrContent := strings.Replace(rr.String(), rr.Header().String(), "", 1)
+	expectedRRContent := `";\\x` + strings.Repeat("a", 252) + `" "aaab"`
+	if expectedRRContent != rrContent {
+		t.Errorf("Expected TXT RR content to be %#q but got %#q", expectedRRContent, rrContent)
 	}
 }
 
diff --git a/scan_rr.go b/scan_rr.go
@@ -51,25 +51,21 @@ func endingToTxtSlice(c *zlexer, errstr string) ([]string, *ParseError) {
 		switch l.value {
 		case zString:
 			empty = false
-			if len(l.token) > 255 {
-				// split up tokens that are larger than 255 into 255-chunks
-				sx := []string{}
-				p, i := 0, 255
-				for {
-					if i <= len(l.token) {
-						sx = append(sx, l.token[p:i])
-					} else {
-						sx = append(sx, l.token[p:])
-						break
-
-					}
-					p, i = p+255, i+255
+			// split up tokens that are larger than 255 into 255-chunks
+			sx := []string{}
+			p := 0
+			for {
+				i := escapedStringOffset(l.token[p:], 255)
+				if i != -1 {
+					sx = append(sx, l.token[p:p+i])
+				} else {
+					sx = append(sx, l.token[p:])
+					break
+
 				}
-				s = append(s, sx...)
-				break
+				p += i
 			}
-
-			s = append(s, l.token)
+			s = append(s, sx...)
 		case zBlank:
 			if quote {
 				// zBlank can only be seen in between txt parts.
@@ -1920,3 +1916,32 @@ func (rr *APL) parse(c *zlexer, o string) *ParseError {
 	rr.Prefixes = prefixes
 	return nil
 }
+
+// escapedStringOffset finds the offset within a string (which may contain escape
+// sequences) that corresponds to a certain byte offset. If the input offset is
+// out of bounds, -1 is returned.
+func escapedStringOffset(s string, byteOffset int) int {
+	if byteOffset == 0 {
+		return 0
+	}
+
+	offset := 0
+	for i := 0; i < len(s); i++ {
+		offset += 1
+
+		// Skip escape sequences
+		if s[i] != '\\' {
+			// Not an escape sequence; nothing to do.
+		} else if isDDD(s[i+1:]) {
+			i += 3
+		} else {
+			i++
+		}
+
+		if offset >= byteOffset {
+			return i + 1
+		}
+	}
+
+	return -1
+}
diff --git a/scan_test.go b/scan_test.go
@@ -427,3 +427,31 @@ func BenchmarkZoneParser(b *testing.B) {
 		}
 	}
 }
+
+func TestEscapedStringOffset(t *testing.T) {
+	var cases = []struct {
+		input          string
+		inputOffset    int
+		expectedOffset int
+	}{
+		{"simple string with no escape sequences", 20, 20},
+		{"simple string with no escape sequences", 500, -1},
+		{`\;\088\\\;\120\\`, 0, 0},
+		{`\;\088\\\;\120\\`, 1, 2},
+		{`\;\088\\\;\120\\`, 2, 6},
+		{`\;\088\\\;\120\\`, 3, 8},
+		{`\;\088\\\;\120\\`, 4, 10},
+		{`\;\088\\\;\120\\`, 5, 14},
+		{`\;\088\\\;\120\\`, 6, 16},
+		{`\;\088\\\;\120\\`, 7, -1},
+	}
+	for i, test := range cases {
+		outputOffset := escapedStringOffset(test.input, test.inputOffset)
+		if outputOffset != test.expectedOffset {
+			t.Errorf(
+				"Test %d (input %#q offset %d) returned offset %d but expected %d",
+				i, test.input, test.inputOffset, outputOffset, test.expectedOffset,
+			)
+		}
+	}
+}

Original file line number	Diff line number	Diff line change
`@@ -1098,18 +1098,21 @@ func TestTXT(t *testing.T) {`
`1098`	`1098`	`}`
`1099`	`1099`	`}`
`1100`	`1100`
`1101`		`- // Test TXT record with chunk larger than 255 bytes, they should be split up, by the parser`
`1102`		`- s := ""`
`1103`		`- for i := 0; i < 255; i++ {`
`1104`		`- s += "a"`
`1105`		`- }`
`1106`		`- s += "b"`
	`1101`	`+ // Test TXT record with chunk larger than 255 bytes; they should be split`
	`1102`	`+ // up by the parser. Add some escape sequences too to ensure their length`
	`1103`	`+ // is counted correctly.`
	`1104`	+ s := `\;\\\120` + strings.Repeat("a", 255) + "b"
`1107`	`1105`	rr, err = NewRR(`test.local. 60 IN TXT "` + s + `"`)
`1108`	`1106`	`if err != nil {`
`1109`	`1107`	`t.Error("failed to parse empty-string TXT record", err)`
`1110`	`1108`	`}`
`1111`		`- if rr.(*TXT).Txt[1] != "b" {`
`1112`		`- t.Errorf("Txt should have two chunk, last one my be 'b', but is %s", rr.(*TXT).Txt[1])`
	`1109`	`+ if rr.(*TXT).Txt[1] != "aaab" {`
	`1110`	`+ t.Errorf("Txt should have two chunks, last one must be 'aaab', but is %s", rr.(*TXT).Txt[1])`
	`1111`	`+ }`
	`1112`	`+ rrContent := strings.Replace(rr.String(), rr.Header().String(), "", 1)`
	`1113`	+ expectedRRContent := `";\\x` + strings.Repeat("a", 252) + `" "aaab"`
	`1114`	`+ if expectedRRContent != rrContent {`
	`1115`	`+ t.Errorf("Expected TXT RR content to be %#q but got %#q", expectedRRContent, rrContent)`
`1113`	`1116`	`}`
`1114`	`1117`	`}`
`1115`	`1118`