Finally fixes the implementation of testing/has-tests such that it …

…conforms to the intendede intuition, updates details and descriptions of all `testing/` rules and `testing` category
bvobart · Jun 25, 2021 · 5876ada · 5876ada
1 parent 40599ae
commit 5876ada
Show file tree

Hide file tree

Showing 4 changed files with 144 additions and 32 deletions.
diff --git a/categories/categories.go b/categories/categories.go
@@ -111,17 +111,32 @@ var Testing = api.Category{
 	Slug: "testing",
 	Description: `Testing in the context of Software Engineering refers to the practice of writing automated checks to ensure that something works as intended.
 Testing ML systems is, however, different from testing traditional software systems.
-In traditional software systems, humans **write** all the logic that processes whatever data the system handles,
-whereas in ML systems, humans provide examples (training data) of what we want the desired behaviour to be and the machine **learns** the logic required to produce this behaviour.
-Properly testing ML systems therefore entails ensuring that the learned logic always and consistently produces the desired behaviour. 
+In traditional software systems, **humans write** all the logic that processes whatever data the system handles,
+whereas in ML systems, **humans provide examples** (training data) of what we want the desired behaviour to be and the **machine learns** the logic required to produce this behaviour.
+
+Properly testing ML systems is not only limited to testing the output behaviour of the system, but also entails, e.g.:
+- ensuring that data preparation is done correctly and consistently
+- ensuring that data featurisation is done correctly and consistent
+- ensuring that the data is fed into the learning process correctly, e.g. testing helper functions
+- ensuring that the learned logic consistently and accurately produces the desired behaviour
+
+This category contains several rules relating to whether and to what degree you are testing the code of your ML project.
+Per default, ` + "`mllint`" + ` expects **at least one test file** to be implemented in your project ` + "(i.e. a Python file starting with `test_` or ending with `_test.py`)" + `
+and recommends that you have **at least 1 test file** for **every 4 non-test files**, though both these targets are configurable.
+See the default configuration and the description of rule ` + "`testing/has-tests`" + ` for more information on how to configure this.
+
+For ` + "`mllint`" + ` to be able to assess whether your project's tests pass and what coverage these tests achieve,
+we will **not** actually run your tests. Instead, we expect you to run your project's tests yourself and provide 
+the filenames to a JUnit-compatible XML test report and a Cobertura-compatible XML coverage report in your project's ` + "`mllint`" + ` configuration.
+See the description of rule ` + "`testing/pass` and `testing/coverage`" + ` for more information on how to generate and configure these.
+
+---
 
 Here are some links to interesting blogs that give more in-depth information about different techniques for testing ML systems:
 - [MadeWithML - Testing ML Systems: Code, Data and Models](https://madewithml.com/courses/mlops/testing/)
 - [Jeremy Jordan - Effective testing for machine learning systems](https://www.jeremyjordan.me/testing-ml/)
 
-This category contains several rules relating to whether and to what degree you are testing the code of your ML project.
-
-TODO: update this description again once fully implemented`,
+> *"When writing tests for machine learning systems, one must not only test the student (the ML model), but also the teacher (the code that produces the ML model)." — Bart van Oort (bvobart)*`,
 }
 
 var ContinuousIntegration = api.Category{

diff --git a/linters/testing/linter.go b/linters/testing/linter.go
@@ -82,8 +82,9 @@ func (l *TestingLinter) ScoreRuleHasTests(report *api.Report, project api.Projec
 	}
 
 	// determine expected and actual ratio of test files vs. other Python files
-	expectedRatio := float64(l.Config.Targets.Ratio.Tests) / float64(l.Config.Targets.Ratio.Other)
-	actualRatio := float64(numTests) / float64(len(project.PythonFiles)-numTests) // project.PythonFiles includes the test files, so we subtract them
+	ratioTotal := float64(l.Config.Targets.Ratio.Tests + l.Config.Targets.Ratio.Other)
+	expectedRatio := float64(l.Config.Targets.Ratio.Tests) / ratioTotal
+	actualRatio := float64(numTests) / float64(len(project.PythonFiles))
 	// score is basically: actual ratio / expected ratio
 	report.Scores[RuleHasTests] = math.Min(100*actualRatio/expectedRatio, 100)
 
@@ -95,6 +96,7 @@ func (l *TestingLinter) ScoreRuleHasTests(report *api.Report, project api.Projec
 	} else {
 		report.Details[RuleHasTests] = fmt.Sprintf(
 			`Great! Your project contains **%d** test %s, which meets the minimum of **%d** test files required.
+
 This equates to **%s%%** of Python files in your project being tests, which meets the target ratio of **%s%%**`,
 			numTests, fileStr, l.Config.Targets.Minimum, humanize.Ftoa(100*actualRatio), humanize.Ftoa(100*expectedRatio),
 		)
@@ -257,11 +259,11 @@ Please make sure your test report file is a valid Cobertura-compatible XML file.
 //---------------------------------------------------------------------------------------
 
 const howToMakeJUnitXML = "When using `pytest` to run your project's tests, use the `--junitxml=<filename>` option to generate such a test report, e.g.:" + `
-` + "```" + `
+` + "```sh" + `
 pytest --junitxml=tests-report.xml
 ` + "```\n"
 
 const howToMakeCoverageXML = "Generating a test coverage report with `pytest` can be done by adding and installing `pytest-cov` as a development dependency of your project. Then use the following command to run your tests and generate both a test report as well as a coverage report:" + `
-` + "```" + `
+` + "```sh" + `
 pytest --junitxml=tests-report.xml --cov=path_to_package_under_test --cov-report=xml
 ` + "```\n"
diff --git a/linters/testing/linter_test.go b/linters/testing/linter_test.go
@@ -52,16 +52,16 @@ func TestTestingLinter(t *stdtesting.T) {
 			},
 		},
 		{
-			Name:    "OneTestSixteenFiles",
+			Name:    "OneTestFifteenFiles", // 1 + 15 = 16, makes it easier to calculate percentages with.
 			Dir:     ".",
-			Options: testutils.NewOptions().UsePythonFiles(createPythonFilenames(16).Concat(createPythonTestFilenames(1))),
+			Options: testutils.NewOptions().UsePythonFiles(createPythonFilenames(15).Concat(createPythonTestFilenames(1))),
 			Expect: func(t *stdtesting.T, report api.Report, err error) {
 				require.NoError(t, err)
-				require.EqualValues(t, 25, report.Scores[testing.RuleHasTests])
+				require.EqualValues(t, 31.25, report.Scores[testing.RuleHasTests])
 				require.Contains(t, report.Details[testing.RuleHasTests], "There is **1** test file in your project")
 				require.Contains(t, report.Details[testing.RuleHasTests], "minimum of **1** test file required")
 				require.Contains(t, report.Details[testing.RuleHasTests], "equates to **6.25%** of Python files in your project being tests")
-				require.Contains(t, report.Details[testing.RuleHasTests], "`mllint` expects that **25%** of your project's Python files are tests")
+				require.Contains(t, report.Details[testing.RuleHasTests], "`mllint` expects that **20%** of your project's Python files are tests")
 				require.EqualValues(t, 0, report.Scores[testing.RuleTestsPass])
 				require.EqualValues(t, 0, report.Scores[testing.RuleTestCoverage])
 				require.EqualValues(t, 0, report.Scores[testing.RuleTestsFolder])
@@ -76,8 +76,8 @@ func TestTestingLinter(t *stdtesting.T) {
 				require.EqualValues(t, 100, report.Scores[testing.RuleHasTests])
 				require.Contains(t, report.Details[testing.RuleHasTests], "Your project contains **4** test files")
 				require.Contains(t, report.Details[testing.RuleHasTests], "meets the minimum of **1** test files required")
-				require.Contains(t, report.Details[testing.RuleHasTests], "equates to **25%** of Python files in your project being tests")
-				require.Contains(t, report.Details[testing.RuleHasTests], "meets the target ratio of **25%**")
+				require.Contains(t, report.Details[testing.RuleHasTests], "equates to **20%** of Python files in your project being tests")
+				require.Contains(t, report.Details[testing.RuleHasTests], "meets the target ratio of **20%**")
 				require.EqualValues(t, 0, report.Scores[testing.RuleTestsPass])
 				require.EqualValues(t, 0, report.Scores[testing.RuleTestCoverage])
 				require.EqualValues(t, 0, report.Scores[testing.RuleTestsFolder])

diff --git a/linters/testing/rules.go b/linters/testing/rules.go
@@ -3,29 +3,124 @@ package testing
 import "github.com/bvobart/mllint/api"
 
 var RuleHasTests = api.Rule{
-	Name:    "Project has automated tests",
-	Slug:    "testing/has-tests",
-	Details: "TODO, https://docs.pytest.org/en/6.2.x/goodpractices.html#conventions-for-python-test-discovery",
-	Weight:  1,
+	Name: "Project has automated tests",
+	Slug: "testing/has-tests",
+	Details: `Every ML project should have a set of automated tests to assess the quality, consistency and correctness of their application in a repeatable and reproducible manner.
+
+This rule checks how many test files your project contains. ` + "In accordance with `pytest`'s [conventions](https://docs.pytest.org/en/6.2.x/goodpractices.html#conventions-for-python-test-discovery) for Python tests, test files are Python files starting with `test_` or ending with `_test.py`." + `
+Per default, ` + "`mllint`" + ` expects **at least one test file** to be implemented in your project ` + "(i.e. a Python file starting with `test_` or ending with `_test.py`)" + `
+and recommends that you have **at least 1 test file** for **every 4 non-test files**, though both these targets are configurable.
+
+In order to configure different targets for how many tests a project should have, use the following ` + "`mllint`" + ` configuration snippet:
+` + "```yaml" + `
+testing:
+  report: tests-report.xml # JUnit report for rule testing/pass
+
+  # Specify targets for testing/has-tests.
+  # Both the minimum required amount of tests as well as the desired ratio of tests to other Python files will be checked.
+  targets:
+    # Minimum number of tests expected to be in your project. Default: 1
+    minimum: 1
+
+    # Define a target ratio of Python test files to other Python files in your project.
+    # By default, mllint expects that 20% of all Python files in your project are tests,
+    # i.e., 1 test file is implemented for every 4 other Python files.
+    ratio:
+      tests: 1
+      other: 4
+` + "```" + `
+
+or equivalent TOML (without the explaining comments):
+` + "```toml" + `
+[tool.mllint.testing]
+report = "tests-report.xml"
+targets = { minimum = 1, ratio = { tests = 1, other = 4 }}
+` + "```",
+	Weight: 1,
 }
 
 var RuleTestsPass = api.Rule{
-	Name:    "Project passes all of its automated tests",
-	Slug:    "testing/pass",
-	Details: "TODO",
-	Weight:  1,
+	Name: "Project passes all of its automated tests",
+	Slug: "testing/pass",
+	Details: `Of course, the point of having automated tests is to ensure that they pass.
+While ` + "`mllint`" + ` will **not run** your tests as part of its static analysis, ` + "`mllint`" + ` expects you to run these on your own terms
+and provide a the filenames to a JUnit-compatible XML test report and a Cobertura-compatible XML coverage
+report in your project's ` + "`mllint`" + ` configuration. Specifically for this rule, the JUnit test report is analysed.
+
+` + howToMakeJUnitXML + `
+
+You can then configure mllint to pick up your test report as follows:
+
+` + "```yaml" + `
+testing:
+  report: tests-report.xml # JUnit report for rule testing/pass
+` + "```" + `
+
+or equivalent TOML:
+` + "```toml" + `
+[tool.mllint.testing]
+report = "tests-report.xml"
+` + "```" + `
+`,
+	Weight: 1,
 }
 
 var RuleTestCoverage = api.Rule{
-	Name:    "Project provides a test coverage report",
-	Slug:    "testing/coverage",
-	Details: "TODO: fill this in with details about what the rule checks, what the reasoning behind it is, where people can get more information on how to implement what this rule checks.",
-	Weight:  1,
+	Name: "Project provides a test coverage report",
+	Slug: "testing/coverage",
+	Details: `One way of measuring the effectiveness of automated tests, is by measuring how many lines of code are touched while the tests are being executed.
+This is called **test coverage**. The idea is that the more lines are being executed by your tests, the more of your code's behaviour is being exercised,
+thus yielding a greater probability of bugs surfacing and being detected or prevented.
+
+Note, however, that line test coverage only measures whether a line of code is executed.
+This does **not** mean that the result or side-effect of that line's execution is being assessed for correctness.
+
+Additionally, one line may cause two different pathways through your application, e.g., an *if*-statement.
+When testing one such path, line test coverage will show that the *if*-statement was covered, 
+yet it does not always show that only one of the possible paths through your application has been exercised.
+This can especially occur in complex one-line operations. For this use-case, there is also the concept of **branch coverage**,
+though ` + "`mllint`" + ` currently does not assess this though.
+
+Furthermore, for testing ML systems, there is also academic discussion as to whether line coverage or branch coverage makes sense,
+or whether different forms of coverage are required. While ` + "`mllint`" + ` currently does not check or support any of these novel forms of test coverage for ML,
+we are looking for suggestions on what novel forms of ML code coverage should be assessed and how these can be measured.
+
+---
+
+While ` + "`mllint`" + ` will **not run** your tests as part of its static analysis, ` + "`mllint`" + ` expects you to run these on your own terms
+and provide a the filenames to a JUnit-compatible XML test report and a Cobertura-compatible XML coverage
+report in your project's ` + "`mllint`" + ` configuration. Specifically for this rule, the Cobertura-compatible coverage report is analysed.
+
+` + howToMakeCoverageXML + `
+
+You can then configure mllint to pick up your test report as follows:
+
+` + "```yaml" + `
+testing:
+  coverage:
+    report: coverage.xml
+    targets:
+      line: 80 # percent line coverage. Default is 80%
+` + "```" + `
+
+or equivalent TOML:
+` + "```toml" + `
+[tool.mllint.testing.coverage]
+report = "coverage.xml"
+targets = { line = 80.0 } 
+
+# Note: unlike YAML, TOML distinguishes between floats and integers, so be sure to use 80.0 instead of 80
+` + "```" + `
+`,
+	Weight: 1,
 }
 
 var RuleTestsFolder = api.Rule{
-	Name:    "Tests should be placed in the tests folder",
-	Slug:    "testing/tests-folder",
-	Details: "TODO",
-	Weight:  1,
+	Name: "Tests should be placed in the tests folder",
+	Slug: "testing/tests-folder",
+	Details: "In accordance with `pytest`'s [conventions](https://docs.pytest.org/en/6.2.x/goodpractices.html#conventions-for-python-test-discovery) for Python tests and [recommendations on test layout](https://docs.pytest.org/en/6.2.x/goodpractices.html#tests-outside-application-code), test files are Python files starting with `test_` or ending with `_test.py`" + `
+and should be placed in a folder called ` + "`tests`" + ` at the root of your project.
+
+This rule therefore simply checks whether all test files in your projects are indeed in this ` + "`tests`" + ` folder at the root of your project.`,
+	Weight: 1,
 }