diff --git a/testdata/parseYaml.golden b/testdata/parseYaml.golden index 8c1a439b..a5eb13dd 100644 --- a/testdata/parseYaml.golden +++ b/testdata/parseYaml.golden @@ -37,5 +37,22 @@ "---a": 2, "a": 1, "a---": 3 - } + }, + [ + { + "a": 1 + }, + "hello world\n", + 3 + ], + [ + { + "a": 1 + }, + null, + 2 + ], + [ + null + ] ] diff --git a/testdata/parseYaml.jsonnet b/testdata/parseYaml.jsonnet index bc82dc18..9910f8f2 100644 --- a/testdata/parseYaml.jsonnet +++ b/testdata/parseYaml.jsonnet @@ -32,7 +32,7 @@ ||| --- a: 1 - --- + --- a: 2 |||, @@ -42,5 +42,23 @@ ---a: 2 a---: 3 |||, + + // Scalar documents can start on the same line as the document-start marker + ||| + a: 1 + --- > + hello + world + --- 3 + |||, + + // Documents can be empty; this is interpreted as null + ||| + a: 1 + --- + --- 2 + |||, + + "---", ] ] diff --git a/yaml.go b/yaml.go index 4f08694e..a2bf3517 100644 --- a/yaml.go +++ b/yaml.go @@ -20,14 +20,10 @@ import ( "bufio" "bytes" "io" - "strings" - "unicode" "sigs.k8s.io/yaml" ) -const separator = "---" - // YAMLToJSONDecoder decodes YAML documents from an io.Reader by // separating individual documents. It first converts the YAML // body to JSON, then unmarshals the JSON. @@ -76,6 +72,7 @@ type Reader interface { // YAMLReader reads YAML type YAMLReader struct { reader Reader + buffer bytes.Buffer stream bool } @@ -86,38 +83,44 @@ func NewYAMLReader(r *bufio.Reader) *YAMLReader { } } +var docStartMarker = []byte("---") + // Read returns a full YAML document. func (r *YAMLReader) read() ([]byte, error) { - var buffer bytes.Buffer for { line, err := r.reader.Read() if err != nil && err != io.EOF { return nil, err } - sep := len([]byte(separator)) - if i := bytes.Index(line, []byte(separator)); i == 0 { - // We have a potential document terminator - i += sep - after := line[i:] - if len(strings.TrimRightFunc(string(after), unicode.IsSpace)) == 0 { + // Per https://yaml.org/spec/1.2.2/#912-document-markers + // Document content lines are forbidden to contain marker sequences. + // The marker sequences are `---` or `...` at the start of the line, + // followed by space, tab, CR, LF, or EOF. + if bytes.HasPrefix(line, docStartMarker) { + end := len(docStartMarker) + if end == len(line) || line[end] == '\n' || line[end] == ' ' || line[end] == '\t' { r.stream = true - if buffer.Len() != 0 { - return buffer.Bytes(), nil - } - if err == io.EOF { - return nil, err + if r.buffer.Len() != 0 { + out := append([]byte(nil), r.buffer.Bytes()...) + r.buffer.Reset() + // The document start marker should be included in the next document. + r.buffer.Write(line) + return out, nil } } } + if err == io.EOF { - if buffer.Len() != 0 { + if r.buffer.Len() != 0 { // If we're at EOF, we have a final, non-terminated line. Return it. - return buffer.Bytes(), nil + out := append([]byte(nil), r.buffer.Bytes()...) + r.buffer.Reset() + return out, nil } return nil, err } - buffer.Write(line) + r.buffer.Write(line) } }