Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
31 commits
Select commit Hold shift + click to select a range
23e80ea
Configure max sizes
jsoriano Feb 11, 2022
ee16618
Add helper types for content type and file sizes
jsoriano Feb 11, 2022
a80b486
Check size limits based on content size
jsoriano Feb 11, 2022
c798d32
Fix file size yaml parsing
jsoriano Feb 11, 2022
84b6ffc
Fix yaml marshaler
jsoriano Feb 16, 2022
02ca9ad
Add fuzz tests
jsoriano Feb 16, 2022
e6bdd5a
Validate max size
jsoriano Feb 17, 2022
60ec912
Read item content only if it is going to be used
jsoriano Feb 17, 2022
7c00668
Add limits to files referenced in relative paths
jsoriano Feb 17, 2022
f7c781f
Check limit of files in folder
jsoriano Feb 17, 2022
fd71e99
Global limit per file
jsoriano Feb 17, 2022
5257b7d
Validate limit of fields
jsoriano Feb 17, 2022
dd3c3f6
Fix validation of nested fields
jsoriano Feb 17, 2022
a0ed324
Fix linting
jsoriano Feb 17, 2022
a64bd33
strings.Cut is not available yet :)
jsoriano Feb 17, 2022
0df97f3
Merge remote-tracking branch 'origin/main' into package-limits
jsoriano Feb 21, 2022
2f87d26
Define limits in the spec
jsoriano Feb 21, 2022
9b909f3
Propagate limits to children content
jsoriano Feb 22, 2022
96b7804
Extend comments on spec types
jsoriano Feb 22, 2022
c373452
Linting
jsoriano Feb 22, 2022
050dd75
Implement global limits
jsoriano Feb 22, 2022
2c4c8ac
Linting
jsoriano Feb 22, 2022
b524c92
Use fs.FS
jsoriano Feb 22, 2022
38c0256
Merge remote-tracking branch 'origin/main' into package-limits
jsoriano Feb 24, 2022
9453025
Move limit to data stream fields to the spec
jsoriano Feb 24, 2022
3ca2145
Add tests
jsoriano Feb 24, 2022
48e4b51
Refactor fs mock
jsoriano Feb 28, 2022
f3efa84
More tests for limits
jsoriano Feb 28, 2022
e462211
Add changelog entry
jsoriano Feb 28, 2022
a0f59d3
Adjust test file size
jsoriano Feb 28, 2022
43a3811
Feedback from Review
jsoriano Mar 1, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
.idea/
temp/
temp/
fuzz
76 changes: 76 additions & 0 deletions code/go/internal/spectypes/contenttype.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
// or more contributor license agreements. Licensed under the Elastic License;
// you may not use this file except in compliance with the Elastic License.

package spectypes

import (
"encoding/json"
"fmt"
"mime"

"gopkg.in/yaml.v3"
)

// ContentType contains a content media type with its parameters.
type ContentType struct {
MediaType string
Params map[string]string
}

// Ensure ContentType implements these interfaces.
var (
_ json.Marshaler = new(ContentType)
_ json.Unmarshaler = new(ContentType)
_ yaml.Marshaler = new(ContentType)
_ yaml.Unmarshaler = new(ContentType)
)

// MarshalJSON implements the json.Marshaler interface for ContentType. Returned
// value is a string representation of the content media type and its parameters.
func (t ContentType) MarshalJSON() ([]byte, error) {
return []byte(`"` + t.String() + `"`), nil
}

// MarshalYAML implements the json.Marshaler interface for ContentType. Returned
// value is a string representation of the content media type and its parameters.
func (t ContentType) MarshalYAML() (interface{}, error) {
return t.String(), nil
}

// UnmarshalJSON implements the json.Marshaler interface for ContentType.
func (t *ContentType) UnmarshalJSON(d []byte) error {
var raw string
err := json.Unmarshal(d, &raw)
if err != nil {
return err
}

return t.unmarshalString(raw)
}

// UnmarshalYAML implements the yaml.Marshaler interface for ContentType.
func (t *ContentType) UnmarshalYAML(value *yaml.Node) error {
// For some reason go-yaml doesn't like the UnmarshalJSON function above.
return t.unmarshalString(value.Value)
}

func (t *ContentType) unmarshalString(text string) error {
mediatype, params, err := mime.ParseMediaType(text)
if err != nil {
return err
}
if mime.FormatMediaType(mediatype, params) == "" {
// Bug in mime library? Happens when parsing something like "0;*0=0"
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't have any deeper knowledge of this. Did you find it by accident with unit tests?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

return fmt.Errorf("invalid token in media type")
}

t.MediaType = mediatype
t.Params = params
return nil
}

// String returns a string representation of the content type.
func (t ContentType) String() string {
return mime.FormatMediaType(t.MediaType, t.Params)
}
120 changes: 120 additions & 0 deletions code/go/internal/spectypes/contenttype_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
// or more contributor license agreements. Licensed under the Elastic License;
// you may not use this file except in compliance with the Elastic License.

package spectypes

import (
"encoding/json"
"testing"

"gopkg.in/yaml.v3"

"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)

func TestContentTypeMarshalJSON(t *testing.T) {
jsonContentType := ContentType{"application/json", nil}
yamlContentType := ContentType{"application/x-yaml", map[string]string{
"require-document-dashes": "true",
}}

cases := []struct {
contentType ContentType
expected string
}{
{ContentType{}, `""`},
{jsonContentType, `"application/json"`},
{yamlContentType, `"application/x-yaml; require-document-dashes=true"`},
}

for _, c := range cases {
t.Run(c.expected, func(t *testing.T) {
d, err := json.Marshal(c.contentType)
require.NoError(t, err)
assert.Equal(t, c.expected, string(d))
})
}
}

func TestContentTypeMarshalYAML(t *testing.T) {
jsonContentType := ContentType{"application/json", nil}
yamlContentType := ContentType{"application/x-yaml", map[string]string{
"require-document-dashes": "true",
}}

cases := []struct {
contentType ContentType
expected string
}{
{ContentType{}, "\"\"\n"},
{jsonContentType, "application/json\n"},
{yamlContentType, "application/x-yaml; require-document-dashes=true\n"},
}

for _, c := range cases {
t.Run(c.expected, func(t *testing.T) {
d, err := yaml.Marshal(c.contentType)
require.NoError(t, err)
assert.Equal(t, c.expected, string(d))
})
}
}

func TestContentTypeUnmarshal(t *testing.T) {
t.Run("json", func(t *testing.T) {
testContentTypeUnmarshalFormat(t, json.Unmarshal)
})
t.Run("yaml", func(t *testing.T) {
testContentTypeUnmarshalFormat(t, yaml.Unmarshal)
})
}

func testContentTypeUnmarshalFormat(t *testing.T, unmarshaler func([]byte, interface{}) error) {
cases := []struct {
json string
expectedType string
expectedParams map[string]string
valid bool
}{
{`"application/json"`, "application/json", nil, true},
{
`"application/x-yaml; require-document-dashes=true"`,
"application/x-yaml",
map[string]string{"require-document-dashes": "true"},
true,
},
{
`"application/x-yaml; require-document-dashes=true; charset=utf-8"`,
"application/x-yaml",
map[string]string{
"require-document-dashes": "true",
"charset": "utf-8",
},
true,
},
{`"application`, "", nil, false},
{`""`, "", nil, false},
{`"application/json; charset"`, "", nil, false},
}

for _, c := range cases {
t.Run(c.json, func(t *testing.T) {
var found ContentType
err := unmarshaler([]byte(c.json), &found)
if c.valid {
require.NoError(t, err)
assert.Equal(t, c.expectedType, found.MediaType)
if len(c.expectedParams) == 0 {
assert.Empty(t, found.Params)
} else {
assert.EqualValues(t, c.expectedParams, found.Params)
}
} else {
t.Log(found)
require.Error(t, err)
}
})
}
}
133 changes: 133 additions & 0 deletions code/go/internal/spectypes/filesize.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,133 @@
// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
// or more contributor license agreements. Licensed under the Elastic License;
// you may not use this file except in compliance with the Elastic License.

package spectypes

import (
"encoding/json"
"fmt"
"regexp"
"strconv"

"gopkg.in/yaml.v3"
)

// Common units for file sizes.
const (
Byte = FileSize(1)
KiloByte = 1024 * Byte
MegaByte = 1024 * KiloByte
)

const (
byteString = "B"
kiloByteString = "KB"
megaByteString = "MB"
)

// FileSize represents the size of a file.
type FileSize uint64

// Ensure FileSize implements these interfaces.
var (
_ json.Marshaler = new(FileSize)
_ json.Unmarshaler = new(FileSize)
_ yaml.Marshaler = new(FileSize)
_ yaml.Unmarshaler = new(FileSize)
)

func parseFileSizeInt(s string) (uint64, error) {
// os.FileInfo reports size as int64, don't support bigger numbers.
maxBitSize := 63
return strconv.ParseUint(s, 10, maxBitSize)
}

// MarshalJSON implements the json.Marshaler interface for FileSize, it returns
// the string representation in a format that can be unmarshaled back to an
// equivalent value.
func (s FileSize) MarshalJSON() ([]byte, error) {
return []byte(`"` + s.String() + `"`), nil
}

// MarshalYAML implements the yaml.Marshaler interface for FileSize, it returns
// the string representation in a format that can be unmarshaled back to an
// equivalent value.
func (s FileSize) MarshalYAML() (interface{}, error) {
return s.String(), nil
}

// UnmarshalJSON implements the json.Unmarshaler interface for FileSize.
func (s *FileSize) UnmarshalJSON(d []byte) error {
// Support unquoted plain numbers.
bytes, err := parseFileSizeInt(string(d))
if err == nil {
*s = FileSize(bytes)
return nil
}

var text string
err = json.Unmarshal(d, &text)
if err != nil {
return err
}

return s.unmarshalString(text)
}

// UnmarshalYAML implements the yaml.Unmarshaler interface for FileSize.
func (s *FileSize) UnmarshalYAML(value *yaml.Node) error {
// Support unquoted plain numbers.
bytes, err := parseFileSizeInt(value.Value)
if err == nil {
*s = FileSize(bytes)
return nil
}

return s.unmarshalString(value.Value)
}

var bytesPattern = regexp.MustCompile(fmt.Sprintf(`^(\d+)(%s|%s|%s|)$`, byteString, kiloByteString, megaByteString))

func (s *FileSize) unmarshalString(text string) error {
match := bytesPattern.FindStringSubmatch(text)
if len(match) < 3 {
return fmt.Errorf("invalid format for file size (%s)", text)
}

q, err := parseFileSizeInt(match[1])
if err != nil {
return fmt.Errorf("invalid format for file size (%s): %w", text, err)
}

unit := match[2]
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It could be a good idea to have less code here.

But there are a couple of small concerns I have with humanize:

  • It is strict with the interpretation of unit prefixes (Kilo means 1000, KiByte needs to be used for 1024), we would need to change from MB to MiB for example.
  • Related to previous point, given that the parser supports units with both prefixes, we may end up having a mix in the spec (250MB and 250MiB would be correctly parsed, but they don't mean the same).
  • It rounds-up when giving the string representation ("1024B" and "1025B" are both converted to "1.0 KiB"). We need to be able to convert back and forth because we parse yaml and convert it to json, and we would be losing precision. We could workaround this problem by marshalling always to the number of bytes, and using humanize to parse and for String(). Or we could ignore this lose of precission.

Wdyt, should I give a try to humanize?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Related to previous point, given that the parser supports units with both prefixes, we may end up having a mix in the spec (250MB and 250MiB would be correctly parsed, but they don't mean the same).

they would be parsed correctly, but would mean different amount of units: 25010241024 vs 25010001000), so I would say it isn't a bug.

It rounds-up when giving the string representation ("1024B" and "1025B" are both converted to "1.0 KiB"). We need to be able to convert back and forth because we parse yaml and convert it to json, and we would be losing precision. We could workaround this problem by marshalling always to the number of bytes, and using humanize to parse and for String(). Or we could ignore this lose of precission.

It rounds-up when giving the string representation ("1024B" and "1025B" are both converted to "1.0 KiB").

Definitely, we can't lose precision. I would give the go-humanize library a try only if there is an option to strictly convert values, without any rounding. If we can't achieve this, let's stick to what you implemented.

BTW I'm fine with improving it in follow-ups as what you have here works well.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Related to previous point, given that the parser supports units with both prefixes, we may end up having a mix in the spec (250MB and 250MiB would be correctly parsed, but they don't mean the same).

they would be parsed correctly, but would mean different amount of units: 250_1024_1024 vs 250_1000_1000), so I would say it isn't a bug.

Well, 250MB are parsed as 250000000 bytes, while 250MiB are parsed as 262144000 bytes, a difference of 11MiB, or 12MB.

Let's revisit this in follow ups if needed.

switch unit {
case megaByteString:
*s = FileSize(q) * MegaByte
case kiloByteString:
*s = FileSize(q) * KiloByte
case byteString, "":
*s = FileSize(q) * Byte
default:
return fmt.Errorf("invalid unit for filesize (%s): %s", text, unit)
}

return nil
}

// String returns the string representation of the FileSize.
func (s FileSize) String() string {
format := func(q FileSize, unit string) string {
return fmt.Sprintf("%d%s", q, unit)
}

if s >= MegaByte && (s%MegaByte == 0) {
return format(s/MegaByte, megaByteString)
}

if s >= KiloByte && (s%KiloByte == 0) {
return format(s/KiloByte, kiloByteString)
}

return format(s, byteString)
}
Loading