Skip to content

Commit

Permalink
Some improvements to pythonlib (#3992)
Browse files Browse the repository at this point in the history
This is in view of #3928, specifically some essential things required
for the first task, `example/pythonlib/basic/`.

You can look at commits individually if you like. The gist of this PR is
to improve the scaffolding we currently have to unblock the previously
mentioned tasks. It can be summarized by the following points (roughly
each corresponding to one commit):

- [x] make `run` task the default, following the same convention used in
`ScalaModule` and `JavaModule`
- [x] add a command to run an interactive REPL. This is named `console`,
again following the conventions of scalalib
- [x] rework the way source files are handled:
- support multiple source directories, again following similar
conventions of scalalib
- instead of aggreting python scripts in one syntetic directory, keep
them where they are and instead manipulate PYTHONPATH or pass in
parameters to various directories where appropriate
- [x] define a module for unit tests, similar to the TestModule of
scalalib
  • Loading branch information
jodersky authored Nov 20, 2024
1 parent 422ef40 commit 4729afc
Show file tree
Hide file tree
Showing 10 changed files with 294 additions and 51 deletions.
9 changes: 8 additions & 1 deletion example/pythonlib/basic/1-simple/build.mill
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,15 @@ object foo extends PythonModule {
def pythonDeps = Seq("numpy==2.1.3")
}

object qux extends PythonModule {
object qux extends PythonModule { q =>
def moduleDeps = Seq(foo, foo.bar)

object test extends PythonTests with TestModule.Unittest
object test2 extends PythonTests with TestModule.Pytest {
override def sources = T {
q.test.sources()
}
}
}

/** Usage
Expand Down
6 changes: 3 additions & 3 deletions example/pythonlib/basic/1-simple/qux/src/main.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
#!/usr/bin/python3
import numpy as np

from foo.src.foo import data
from foo.bar.src.bar import df
from foo import data
from bar import df

def main() -> None:
print(f"Numpy : Sum: {np.sum(data)} | Pandas: Mean: {df['Values'].mean()}, Max: {df['Values'].max()}")

if __name__ == "__main__":
main()
main()
17 changes: 17 additions & 0 deletions example/pythonlib/basic/1-simple/qux/test/src/test_dummy.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
import unittest

class TestStringMethods(unittest.TestCase):

def test_upper(self):
self.assertEqual('foo'.upper(), 'FOO')

def test_isupper(self):
self.assertTrue('FOO'.isupper())
self.assertFalse('Foo'.isupper())

def test_split(self):
s = 'hello world'
self.assertEqual(s.split(), ['hello', 'world'])
# check that s.split fails when the separator is not a string
with self.assertRaises(TypeError):
s.split(2)
181 changes: 142 additions & 39 deletions pythonlib/src/mill/pythonlib/PythonModule.scala
Original file line number Diff line number Diff line change
@@ -1,18 +1,62 @@
package mill.pythonlib

import mill._
import mill.api.Result
import mill.util.Util
import mill.util.Jvm
import mill.api.Ctx

trait PythonModule extends Module with TaskModule { outer =>

trait PythonModule extends Module {
/**
* The direct dependencies of this module.
* This is meant to be overridden to add dependencies.
*/
def moduleDeps: Seq[PythonModule] = Nil
def mainFileName: T[String] = Task { "main.py" }
def sources: T[PathRef] = Task.Source(millSourcePath / "src")

/**
* The folders where the source files for this mill module live.
*
* Python modules will be defined relative to these directories.
*/
def sources: T[Seq[PathRef]] = Task.Sources { millSourcePath / "src" }

/**
* The script to run. This file may not exist if this module is only a library.
*/
def script: T[PathRef] = Task.Source { millSourcePath / "src" / "main.py" }

/**
* Any python dependencies you want to add to this module. The format of each
* dependency should be the same as used with `pip install`, or as you would
* find in a `requirements.txt` file. E.g. `def pythonDeps =
* Seq("numpy==2.1.3")`
*/
def pythonDeps: T[Seq[String]] = Task { Seq.empty[String] }

/**
* Python dependencies of this module, and all other modules that this module
* depends on, recursively.
*/
def transitivePythonDeps: T[Seq[String]] = Task {
val upstreamDependencies = Task.traverse(moduleDeps)(_.transitivePythonDeps)().flatten
pythonDeps() ++ upstreamDependencies
}

/**
* Source directories of this module, and all other modules that this module
* depends on, recursively.
*/
def transitiveSources: T[Seq[PathRef]] = Task {
val upstreamSources = Task.traverse(moduleDeps)(_.transitiveSources)().flatten
sources() ++ upstreamSources
}

/**
* An executable python interpreter. This interpreter is set up to run in a
* virtual environment which has been initialized to contain all libraries and
* tools needed by this module and its dependencies.
*/
def pythonExe: T[PathRef] = Task {
os.call(("python3", "-m", "venv", Task.dest / "venv"))
val python = Task.dest / "venv" / "bin" / "python3"
Expand All @@ -21,59 +65,118 @@ trait PythonModule extends Module {
PathRef(python)
}

def typeCheck: T[Unit] = Task {
Task.traverse(moduleDeps)(_.typeCheck)()

os.call(
(pythonExe().path, "-m", "mypy", "--strict", sources().path),
stdout = os.Inherit,
cwd = T.workspace
// TODO: right now, any task that calls this helper will have its own python
// cache. This is slow. Look into sharing the cache between tasks.
def runner: Task[PythonModule.Runner] = Task.Anon {
new PythonModule.RunnerImpl(
command0 = pythonExe().path.toString,
env0 = Map(
"PYTHONPATH" -> transitiveSources().map(_.path).mkString(java.io.File.pathSeparator),
"PYTHONPYCACHEPREFIX" -> (T.dest / "cache").toString,
if (Task.log.colored) { "FORCE_COLOR" -> "1" }
else { "NO_COLOR" -> "1" }
),
workingDir0 = Task.workspace
)
}

def gatherScripts(upstream: Seq[(PathRef, PythonModule)]) = {
for ((sourcesFolder, mod) <- upstream) {
val destinationPath =
os.pwd / mod.millSourcePath.subRelativeTo(mill.api.WorkspaceRoot.workspaceRoot)
os.copy.over(sourcesFolder.path / os.up, destinationPath)
}
/**
* Run a typechecker on this module.
*/
def typeCheck: T[Unit] = Task {
runner().run(
(
// format: off
"-m", "mypy",
"--strict",
"--cache-dir", (T.dest / "mypycache").toString,
sources().map(_.path)
// format: on
)
)
}

/**
* Run the main python script of this module.
*
* @see [[script]]
*/
def run(args: mill.define.Args) = Task.Command {
gatherScripts(Task.traverse(moduleDeps)(_.sources)().zip(moduleDeps))

os.call(
(pythonExe().path, sources().path / mainFileName(), args.value),
env = Map("PYTHONPATH" -> Task.dest.toString),
stdout = os.Inherit
runner().run(
(
script().path,
args.value
)
)
}

override def defaultCommandName(): String = "run"

/**
* Opens up a Python console with your module and all dependencies present,
* for you to test and operate your code interactively.
*/
def console(): Command[Unit] = Task.Command(exclusive = true) {
if (!Util.isInteractive()) {
Result.Failure("console needs to be run with the -i/--interactive flag")
} else {
runner().run()
Result.Success(())
}
}

/** Bundles the project into a single PEX executable(bundle.pex). */
def bundle = Task {
gatherScripts(Task.traverse(moduleDeps)(_.sources)().zip(moduleDeps))

val pexFile = Task.dest / "bundle.pex"
os.call(
runner().run(
(
pythonExe().path,
"-m",
"pex",
// format: off
"-m", "pex",
transitivePythonDeps(),
"-D",
Task.dest,
"-c",
sources().path / mainFileName(),
"-o",
pexFile,
"--scie",
"eager"
transitiveSources().flatMap(pr =>
Seq("-D", pr.path.toString)
),
"--exe", script().path,
"-o", pexFile,
"--scie", "eager",
// format: on
),
env = Map("PYTHONPATH" -> Task.dest.toString),
stdout = os.Inherit
workingDir = T.dest
)

PathRef(pexFile)
}

trait PythonTests extends PythonModule {
override def moduleDeps: Seq[PythonModule] = Seq(outer)
}

}

object PythonModule {
trait Runner {
def run(
args: os.Shellable = Seq(),
command: String = null,
env: Map[String, String] = null,
workingDir: os.Path = null
)(implicit ctx: Ctx): Unit
}

private class RunnerImpl(
command0: String,
env0: Map[String, String],
workingDir0: os.Path
) extends Runner {
def run(
args: os.Shellable = Seq(),
command: String = null,
env: Map[String, String] = null,
workingDir: os.Path = null
)(implicit ctx: Ctx): Unit =
Jvm.runSubprocess(
commandArgs = Seq(Option(command).getOrElse(command0)) ++ args.value,
envArgs = Option(env).getOrElse(env0),
workingDir = Option(workingDir).getOrElse(workingDir0)
)
}
}
87 changes: 87 additions & 0 deletions pythonlib/src/mill/pythonlib/TestModule.scala
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
package mill.pythonlib

import mill.Task
import mill.Command
import mill.TaskModule
import mill.T

trait TestModule extends TaskModule {
import TestModule.TestResult

/**
* Discovers and runs the module's tests in a subprocess, reporting the
* results to the console.
* @see [[testCached]]
*/
def test(args: String*): Command[Seq[TestResult]] =
Task.Command {
testTask(Task.Anon { args })()
}

/**
* Args to be used by [[testCached]].
*/
def testCachedArgs: T[Seq[String]] = Task { Seq[String]() }

/**
* Discovers and runs the module's tests in a subprocess, reporting the
* results to the console.
* If no input has changed since the last run, no test were executed.
* @see [[test()]]
*/
def testCached: T[Seq[TestResult]] = Task {
testTask(testCachedArgs)()
}

/**
* The actual task shared by `test`-tasks.
*/
protected def testTask(args: Task[Seq[String]]): Task[Seq[TestResult]]

override def defaultCommandName() = "test"
}

object TestModule {

// TODO: this is a dummy for now, however we should look into re-using
// mill.testrunner.TestResults
type TestResult = Unit

/** TestModule that uses Python's standard unittest module to run tests. */
trait Unittest extends PythonModule with TestModule {
protected def testTask(args: Task[Seq[String]]) = Task.Anon {
val testArgs = if (args().isEmpty) {
Seq("discover") ++ sources().flatMap(pr => Seq("-s", pr.path.toString))
} else {
args()
}
runner().run(
("-m", "unittest", testArgs)
)
Seq()
}
}

/** TestModule that uses pytest to run tests. */
trait Pytest extends PythonModule with TestModule {

override def pythonDeps: T[Seq[String]] = T {
super.pythonDeps() ++ Seq("pytest==8.3.3")
}

protected def testTask(args: Task[Seq[String]]) = Task.Anon {
runner().run(
(
// format: off
"-m", "pytest",
"-o", s"cache_dir=${Task.dest / "cache"}",
sources().map(_.path),
args()
// format: in
)
)
Seq()
}
}

}
Original file line number Diff line number Diff line change
@@ -1,4 +1,7 @@
import sys

bar_val: int = 42

def main() -> None: print("Hello, " + " ".join(sys.argv[1:]) + " Foo Bar!")
if __name__ == "__main__":
main()
main()
6 changes: 5 additions & 1 deletion pythonlib/test/resources/hello-world-python/foo/src/foo.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,8 @@
import sys
import bar

foo_val: int = bar.bar_val

def main() -> None: print("Hello, " + " ".join(sys.argv[1:]) + " Foo!")
if __name__ == "__main__":
main()
main()
6 changes: 5 additions & 1 deletion pythonlib/test/resources/hello-world-python/qux/src/qux.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,8 @@
import sys
import foo

qux_val: int = foo.foo_val

def main() -> None: print("Hello, " + " ".join(sys.argv[1:]) + " Qux!")
if __name__ == "__main__":
main()
main()
Loading

0 comments on commit 4729afc

Please sign in to comment.