Merge pull request #22 from bvobart/testing-linter

Implements Testing linter and implements a test suite framework for mllint
bvobart · Jun 25, 2021 · 183a6f9 · 183a6f9
2 parents 9e88a75 + 5876ada
commit 183a6f9
Show file tree

Hide file tree

Showing 29 changed files with 1,971 additions and 38 deletions.
diff --git a/.goreleaser.yml b/.goreleaser.yml
@@ -24,6 +24,7 @@ dockers:
       - --build-arg=python_version=3.6
     extra_files:
       - build/requirements-tools.txt
+    use_buildx: true
 
   - image_templates:
       - bvobart/mllint:latest-py3.7
@@ -32,6 +33,7 @@ dockers:
       - --build-arg=python_version=3.7
     extra_files:
       - build/requirements-tools.txt
+    use_buildx: true
 
   - image_templates:
       - bvobart/mllint:latest-py3.8
@@ -40,6 +42,7 @@ dockers:
       - --build-arg=python_version=3.8
     extra_files:
       - build/requirements-tools.txt
+    use_buildx: true
 
   - image_templates:
       - bvobart/mllint:latest
@@ -50,6 +53,7 @@ dockers:
       - --build-arg=python_version=3.9
     extra_files:
       - build/requirements-tools.txt
+    use_buildx: true
 
 archives:
   - format: tar.gz

diff --git a/categories/categories.go b/categories/categories.go
@@ -109,10 +109,34 @@ and may also include dynamic checks on the data that is currently in the reposit
 var Testing = api.Category{
 	Name: "Testing",
 	Slug: "testing",
-	Description: `This category deals with the way your project is being tested.
-
-It is not implemented yet. The idea is that this will contain some rules to check whether you have tests, what your latest test results were, how good your test coverage is,
-and probably also something on whether you're actually testing your ML code or not.`,
+	Description: `Testing in the context of Software Engineering refers to the practice of writing automated checks to ensure that something works as intended.
+Testing ML systems is, however, different from testing traditional software systems.
+In traditional software systems, **humans write** all the logic that processes whatever data the system handles,
+whereas in ML systems, **humans provide examples** (training data) of what we want the desired behaviour to be and the **machine learns** the logic required to produce this behaviour.
+
+Properly testing ML systems is not only limited to testing the output behaviour of the system, but also entails, e.g.:
+- ensuring that data preparation is done correctly and consistently
+- ensuring that data featurisation is done correctly and consistent
+- ensuring that the data is fed into the learning process correctly, e.g. testing helper functions
+- ensuring that the learned logic consistently and accurately produces the desired behaviour
+
+This category contains several rules relating to whether and to what degree you are testing the code of your ML project.
+Per default, ` + "`mllint`" + ` expects **at least one test file** to be implemented in your project ` + "(i.e. a Python file starting with `test_` or ending with `_test.py`)" + `
+and recommends that you have **at least 1 test file** for **every 4 non-test files**, though both these targets are configurable.
+See the default configuration and the description of rule ` + "`testing/has-tests`" + ` for more information on how to configure this.
+
+For ` + "`mllint`" + ` to be able to assess whether your project's tests pass and what coverage these tests achieve,
+we will **not** actually run your tests. Instead, we expect you to run your project's tests yourself and provide 
+the filenames to a JUnit-compatible XML test report and a Cobertura-compatible XML coverage report in your project's ` + "`mllint`" + ` configuration.
+See the description of rule ` + "`testing/pass` and `testing/coverage`" + ` for more information on how to generate and configure these.
+
+---
+
+Here are some links to interesting blogs that give more in-depth information about different techniques for testing ML systems:
+- [MadeWithML - Testing ML Systems: Code, Data and Models](https://madewithml.com/courses/mlops/testing/)
+- [Jeremy Jordan - Effective testing for machine learning systems](https://www.jeremyjordan.me/testing-ml/)
+
+> *"When writing tests for machine learning systems, one must not only test the student (the ML model), but also the teacher (the code that produces the ML model)." — Bart van Oort (bvobart)*`,
 }
 
 var ContinuousIntegration = api.Category{

diff --git a/commands/config.go b/commands/config.go
@@ -6,12 +6,13 @@ import (
 	"github.com/fatih/color"
 	"github.com/google/go-cmp/cmp"
 	"github.com/spf13/cobra"
-	"gopkg.in/yaml.v3"
 
 	"github.com/bvobart/mllint/config"
 	"github.com/bvobart/mllint/utils"
 )
 
+var outputToml = false
+
 func NewConfigCommand() *cobra.Command {
 	cmd := &cobra.Command{
 		Use:   "config [dir]",
@@ -28,6 +29,7 @@ Specifying %s or %s will cause this command to purely print the current or defau
 		RunE: runConfig,
 		Args: cobra.MaximumNArgs(1),
 	}
+	cmd.Flags().BoolVarP(&outputToml, "toml", "t", false, "Specify this flag to output the config in TOML format instead of the default is YAML output.")
 	return cmd
 }
 
@@ -50,7 +52,12 @@ func runConfig(_ *cobra.Command, args []string) error {
 	shush(func() { fmt.Print("---\n\n") })
 
 	// print the config
-	output, err := yaml.Marshal(conf)
+	var output []byte
+	if outputToml {
+		output, err = conf.TOML()
+	} else {
+		output, err = conf.YAML()
+	}
 	if err != nil {
 		return err
 	}
@@ -63,7 +70,13 @@ func runConfig(_ *cobra.Command, args []string) error {
 func runConfigDefault() error {
 	shush(func() { color.Green("Using default configuration\n\n") })
 
-	output, err := yaml.Marshal(config.Default())
+	var output []byte
+	var err error
+	if outputToml {
+		output, err = config.Default().TOML()
+	} else {
+		output, err = config.Default().YAML()
+	}
 	if err != nil {
 		return err
 	}

diff --git a/config/config.go b/config/config.go
@@ -18,6 +18,7 @@ type Config struct {
 	Rules       RuleConfig        `yaml:"rules" toml:"rules"`
 	Git         GitConfig         `yaml:"git" toml:"git"`
 	CodeQuality CodeQualityConfig `yaml:"code-quality" toml:"code-quality"`
+	Testing     TestingConfig     `yaml:"testing" toml:"testing"`
 }
 
 // RuleConfig contains info about which rules are enabled / disabled.
@@ -32,19 +33,83 @@ type GitConfig struct {
 	MaxFileSize uint64 `yaml:"maxFileSize" toml:"maxFileSize"`
 }
 
+// CodeQualityConfig contains the configuration for the CQ linters used in the Code Quality category
 type CodeQualityConfig struct {
 	// Defines all code linters to use in the Code Quality category
 	Linters []string `yaml:"linters" toml:"linters"`
 }
 
+// TestingConfig contains the configuration for the rules in the Testing category.
+type TestingConfig struct {
+	// Filename of the project's test execution report, either absolute or relative to the project's root.
+	// Expects a JUnit XML file, which when using `pytest` can be generated with `pytest --junitxml=tests-report.xml`
+	Report string `yaml:"report" toml:"report"`
+
+	// Settings about how many tests there should be in a project.
+	Targets TestingTargets `yaml:"targets" toml:"targets"`
+
+	// Settings about the rules for checking project test coverage.
+	Coverage TestCoverage `yaml:"coverage" toml:"coverage"`
+}
+
+type TestingTargets struct {
+	// Minimum amount of test files to have in a project. Absolute number. Defaults to 1.
+	Minimum uint64 `yaml:"minimum" toml:"minimum"`
+
+	// Ratio of test files to have in a project, i.e. number of test files per other Python file.
+	// Defaults to 1 part tests to 4 parts non-tests
+	Ratio TestingTargetsRatio `yaml:"ratio" toml:"ratio"`
+}
+
+type TestingTargetsRatio struct {
+	// Number of parts of test files.
+	Tests uint64 `yaml:"tests" toml:"tests"`
+	// Number of parts of other Python files.
+	Other uint64 `yaml:"other" toml:"other"`
+}
+
+type TestCoverage struct {
+	// Filename of the project's test coverage report, either absolute or relative to the project's root.
+	// Expects a Cobertura-compatible XML file, which can be generated after `coverage run -m pytest --junitxml=tests-report.xml`
+	// with `coverage xml -o tests-coverage.xml`, or using the `pytest-cov` plugin.
+	Report string `yaml:"report" toml:"report"`
+
+	// Specifies the target amount of line / branch / whatever coverage that the user wants want to have in the project
+	// Only line coverage is implemented so far.
+	Targets TestCoverageTargets `yaml:"targets" toml:"targets"`
+}
+
+type TestCoverageTargets struct {
+	// Target amount of overall line coverage to achieve in tests.
+	Line float64 `yaml:"line" toml:"line"`
+}
+
+//---------------------------------------------------------------------------------------
+
 func Default() *Config {
 	return &Config{
 		Rules:       RuleConfig{Disabled: []string{}},
 		Git:         GitConfig{MaxFileSize: 10_000_000}, // 10 MB
 		CodeQuality: CodeQualityConfig{Linters: []string{"pylint", "mypy", "black", "isort", "bandit"}},
+		Testing: TestingConfig{
+			Targets: TestingTargets{
+				Minimum: 1,
+				Ratio: TestingTargetsRatio{
+					Tests: 1,
+					Other: 4,
+				},
+			},
+			Coverage: TestCoverage{
+				Targets: TestCoverageTargets{
+					Line: 80,
+				},
+			},
+		},
 	}
 }
 
+//---------------------------------------------------------------------------------------
+
 type FileType string
 
 const (
@@ -60,6 +125,8 @@ func (t FileType) String() string {
 	return string(t)
 }
 
+//---------------------------------------------------------------------------------------
+
 // ParseFromDir parses the mllint config from the given project directory.
 // If an `.mllint.yml` file is present, then this will be used,
 // otherwise, if a `pyproject.toml` file is present, then this will be used,
@@ -131,3 +198,15 @@ func ParseTOML(reader io.Reader) (*Config, error) {
 
 	return tomlFile.Tool.Mllint, nil
 }
+
+//---------------------------------------------------------------------------------------
+
+func (conf *Config) YAML() ([]byte, error) {
+	return yaml.Marshal(conf)
+}
+
+func (conf *Config) TOML() ([]byte, error) {
+	pyproject := pyprojectTOML{}
+	pyproject.Tool.Mllint = conf
+	return toml.Marshal(pyproject)
+}
diff --git a/config/config_test.go b/config/config_test.go
@@ -30,6 +30,20 @@ code-quality:
     - black
 `
 
+const yamlTesting = `
+testing:
+    report: junit-report.xml
+    targets:
+        minimum: 2
+        ratio:
+            tests: 2
+            other: 8
+    coverage:
+        report: coverage.xml
+        targets:
+            line: 50 # percent line coverage.
+`
+
 const yamlInvalid = `
 rules:
   disabled: nothing
@@ -49,6 +63,13 @@ const tomlLinters = `
 linters = ["pylint", "mypy"]
 `
 
+const tomlTesting = `
+[tool.mllint.testing]
+report = "tests-report.xml"
+targets = { minimum = 2, ratio = { tests = 2, other = 8 }}
+coverage = { report = "coverage.xml", targets = { line = 100.0 }}
+`
+
 const tomlInvalid = `
 [tool.mllint.rules]
 disabled = "nothing"
@@ -97,6 +118,21 @@ func TestParseYAML(t *testing.T) {
 			}(),
 			Err: nil,
 		},
+		{
+			Name: "YamlTesting",
+			File: strings.NewReader(yamlTesting),
+			Expected: func() *config.Config {
+				c := config.Default()
+				c.Testing.Report = "junit-report.xml"
+				c.Testing.Targets.Minimum = 2
+				c.Testing.Targets.Ratio.Tests = 2
+				c.Testing.Targets.Ratio.Other = 8
+				c.Testing.Coverage.Report = "coverage.xml"
+				c.Testing.Coverage.Targets.Line = 50
+				return c
+			}(),
+			Err: nil,
+		},
 		{
 			Name:     "YamlError",
 			File:     strings.NewReader(yamlInvalid),
@@ -166,6 +202,21 @@ func TestParseTOML(t *testing.T) {
 			}(),
 			Err: nil,
 		},
+		{
+			Name: "TomlTesting",
+			File: strings.NewReader(tomlTesting),
+			Expected: func() *config.Config {
+				c := config.Default()
+				c.Testing.Report = "tests-report.xml"
+				c.Testing.Targets.Minimum = 2
+				c.Testing.Targets.Ratio.Tests = 2
+				c.Testing.Targets.Ratio.Other = 8
+				c.Testing.Coverage.Report = "coverage.xml"
+				c.Testing.Coverage.Targets.Line = 100
+				return c
+			}(),
+			Err: nil,
+		},
 		{
 			Name:     "TomlError",
 			File:     strings.NewReader(tomlInvalid),

diff --git a/go.mod b/go.mod
@@ -5,6 +5,7 @@ go 1.16
 require (
 	github.com/MichaelMure/go-term-markdown v0.1.4
 	github.com/bmizerany/assert v0.0.0-20160611221934-b7ed37b82869 // indirect
+	github.com/bvobart/gocover-cobertura v0.0.0-20210621150944-54fde689e823
 	github.com/dustin/go-humanize v1.0.0
 	github.com/fatih/color v1.11.0
 	github.com/go-enry/go-enry/v2 v2.7.0 // indirect
@@ -13,6 +14,7 @@ require (
 	github.com/gosuri/uilive v0.0.4
 	github.com/hashicorp/go-multierror v1.1.1
 	github.com/hhatto/gocloc v0.4.1
+	github.com/joshdk/go-junit v0.0.0-20210226021600-6145f504ca0d
 	github.com/juliangruber/go-intersect v1.0.0
 	github.com/mattn/go-isatty v0.0.12
 	github.com/nathan-fiscaletti/consolesize-go v0.0.0-20210105204122-a87d9f614b9d

diff --git a/go.sum b/go.sum
@@ -38,6 +38,10 @@ github.com/bgentry/speakeasy v0.1.0/go.mod h1:+zsyZBPWlz7T6j88CTgSN5bM796AkVf0kB
 github.com/bketelsen/crypt v0.0.3-0.20200106085610-5cbc8cc4026c/go.mod h1:MKsuJmJgSg28kpZDP6UIiPt0e0Oz0kqKNGyRaWEPv84=
 github.com/bmizerany/assert v0.0.0-20160611221934-b7ed37b82869 h1:DDGfHa7BWjL4YnC6+E63dPcxHo2sUxDIu8g3QgEJdRY=
 github.com/bmizerany/assert v0.0.0-20160611221934-b7ed37b82869/go.mod h1:Ekp36dRnpXw/yCqJaO+ZrUyxD+3VXMFFr56k5XYrpB4=
+github.com/bvobart/gocover-cobertura v0.0.0-20210620145524-460c2a91acaa h1:g9IO47WKVOumt7O3M4++yM//ToeKV5SOEqE2XvVasMs=
+github.com/bvobart/gocover-cobertura v0.0.0-20210620145524-460c2a91acaa/go.mod h1:b4ErSlLH34szWzul7CTNT0Yq/+5kS6zBkfQYKDCuiCg=
+github.com/bvobart/gocover-cobertura v0.0.0-20210621150944-54fde689e823 h1:gLHluPmnlN5Gy9KDg6rmle4F3AU77LARkfkhMy82+bc=
+github.com/bvobart/gocover-cobertura v0.0.0-20210621150944-54fde689e823/go.mod h1:b4ErSlLH34szWzul7CTNT0Yq/+5kS6zBkfQYKDCuiCg=
 github.com/cespare/xxhash v1.1.0/go.mod h1:XrSqR1VqqWfGrhpAt58auRo0WTKS1nRRg3ghfAqPWnc=
 github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDkc90ppPyw=
 github.com/coreos/bbolt v1.3.2/go.mod h1:iRUV2dpdMOn7Bo10OQBFzIJO9kkE559Wcmn+qkEiiKk=
@@ -139,6 +143,8 @@ github.com/inconshreveable/mousetrap v1.0.0 h1:Z8tu5sraLXCXIcARxBp/8cbvlwVa7Z1NH
 github.com/inconshreveable/mousetrap v1.0.0/go.mod h1:PxqpIevigyE2G7u3NXJIT2ANytuPF1OarO4DADm73n8=
 github.com/jessevdk/go-flags v1.4.0/go.mod h1:4FA24M0QyGHXBuZZK/XkWh8h0e1EYbRYJSGM75WSRxI=
 github.com/jonboulle/clockwork v0.1.0/go.mod h1:Ii8DK3G1RaLaWxj9trq07+26W01tbo22gdxWY5EU2bo=
+github.com/joshdk/go-junit v0.0.0-20210226021600-6145f504ca0d h1:lcSbmPJf3b19MTZtGDLI6Y2Jnk3VBDT8UG/8IVCEMxA=
+github.com/joshdk/go-junit v0.0.0-20210226021600-6145f504ca0d/go.mod h1:TiiV0PqkaNfFXjEiyjWM3XXrhVyCa1K4Zfga6W52ung=
 github.com/json-iterator/go v1.1.6/go.mod h1:+SdeFBvtyEkXs7REEP0seUULqWtbJapLOCVDaaPEHmU=
 github.com/jstemmer/go-junit-report v0.0.0-20190106144839-af01ea7f8024/go.mod h1:6v2b51hI/fHJwM22ozAgKL4VKDeJcHhJFhtBdhmNjmU=
 github.com/jtolds/gls v4.20.0+incompatible/go.mod h1:QJZ7F/aHp+rZTRtaJ1ow/lLfFfVYBRgL+9YlvaHOwJU=

diff --git a/linters/codequality/rules.go b/linters/codequality/rules.go
@@ -31,7 +31,7 @@ Linter | Why?
 
 This rule will be satisfied, iff for each of these linters:
 - **Either** there is a configuration file for this linter in the project
-- **Or** the linter is a dependency of the project`,
+- **Or** the linter is a dependency of the project (preferably a dev dependency)`,
 	Weight: 1,
 }