diff --git a/.github/workflows/primary.yml b/.github/workflows/primary.yml index 75298b2e8..c068c802d 100644 --- a/.github/workflows/primary.yml +++ b/.github/workflows/primary.yml @@ -10,6 +10,10 @@ on: - ".github/workflows/primary.yml" branches: - canary + # need to run this periodically on the default branch to populate the build cache + schedule: + # daily at 2am PST + - cron: 0 10 * * * merge_group: types: [checks_requested] workflow_dispatch: {} @@ -106,3 +110,19 @@ jobs: - name: Build rust for wasm32 run: cargo build --target=wasm32-unknown-unknown working-directory: engine/baml-schema-wasm + integ-tests: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: jdx/mise-action@v2 + - uses: dtolnay/rust-toolchain@stable + with: + toolchain: stable + - uses: Swatinem/rust-cache@v2 + with: + workspaces: engine + - name: run python tests + run: | + cd integ-tests/python + poetry install + ./run_tests.sh diff --git a/.mise.toml b/.mise.toml index 5fcb9cda1..401be3817 100644 --- a/.mise.toml +++ b/.mise.toml @@ -2,3 +2,4 @@ node = "20.14" ruby = "3.1" pnpm = "9.9" +poetry = "1.8.4" diff --git a/engine/language_client_python/Cargo.toml b/engine/language_client_python/Cargo.toml index 622f32b83..4c0ab7c76 100644 --- a/engine/language_client_python/Cargo.toml +++ b/engine/language_client_python/Cargo.toml @@ -46,7 +46,11 @@ regex.workspace = true serde.workspace = true serde_json.workspace = true tokio = { version = "1", features = ["full"] } -tracing-subscriber = { version = "0.3.18", features = ["json", "env-filter","valuable"] } +tracing-subscriber = { version = "0.3.18", features = [ + "json", + "env-filter", + "valuable", +] } [build-dependencies] pyo3-build-config = "0.21.2" diff --git a/engine/language_client_python/src/lib.rs b/engine/language_client_python/src/lib.rs index 694a0a28f..1818fa7dc 100644 --- a/engine/language_client_python/src/lib.rs +++ b/engine/language_client_python/src/lib.rs @@ -20,6 +20,8 @@ fn invoke_runtime_cli(py: Python) -> PyResult<()> { .map_err(errors::BamlError::from_anyhow) } +pub(crate) const MODULE_NAME: &str = "baml_py.baml_py"; + #[pymodule] fn baml_py(m: Bound<'_, PyModule>) -> PyResult<()> { let use_json = match std::env::var("BAML_LOG_JSON") { @@ -74,11 +76,6 @@ fn baml_py(m: Bound<'_, PyModule>) -> PyResult<()> { m.add_wrapped(wrap_pyfunction!(invoke_runtime_cli))?; - // m.add( - // "BamlValidationError", - // m.py().get_type_bound::(), - // )?; - // m.add_class::()?; errors::errors(&m)?; Ok(()) diff --git a/engine/language_client_python/src/types/audio.rs b/engine/language_client_python/src/types/audio.rs index a0f9dc7b9..2ce520f6c 100644 --- a/engine/language_client_python/src/types/audio.rs +++ b/engine/language_client_python/src/types/audio.rs @@ -1,6 +1,6 @@ use baml_types::BamlMediaContent; use pyo3::prelude::{pymethods, PyResult}; -use pyo3::types::PyType; +use pyo3::types::{PyTuple, PyType}; use pyo3::{Bound, PyAny, PyObject, Python}; use pythonize::{depythonize_bound, pythonize}; @@ -50,6 +50,20 @@ impl BamlAudioPy { } } + /// Defines the default constructor: https://pyo3.rs/v0.23.3/class#constructor + /// + /// Used for `pickle.load`: https://docs.python.org/3/library/pickle.html#object.__getnewargs__ + #[new] + pub fn py_new(data: PyObject, py: Python<'_>) -> PyResult { + Self::baml_deserialize(data, py) + } + + /// Used for `pickle.dump`: https://docs.python.org/3/library/pickle.html#object.__getnewargs__ + pub fn __getnewargs__<'py>(&self, py: Python<'py>) -> PyResult> { + let o = self.baml_serialize(py)?; + Ok(PyTuple::new_bound(py, vec![o])) + } + pub fn __repr__(&self) -> String { match &self.inner.content { BamlMediaContent::Url(url) => { diff --git a/engine/language_client_python/src/types/image.rs b/engine/language_client_python/src/types/image.rs index 8bc2496c6..01dbae941 100644 --- a/engine/language_client_python/src/types/image.rs +++ b/engine/language_client_python/src/types/image.rs @@ -1,5 +1,5 @@ use pyo3::prelude::{pymethods, PyResult}; -use pyo3::types::PyType; +use pyo3::types::{PyTuple, PyType}; use pyo3::{Bound, PyAny, PyObject, Python}; use pythonize::{depythonize_bound, pythonize}; @@ -49,6 +49,20 @@ impl BamlImagePy { } } + /// Defines the default constructor: https://pyo3.rs/v0.23.3/class#constructor + /// + /// Used for `pickle.load`: https://docs.python.org/3/library/pickle.html#object.__getnewargs__ + #[new] + pub fn py_new(data: PyObject, py: Python<'_>) -> PyResult { + Self::baml_deserialize(data, py) + } + + /// Used for `pickle.dump`: https://docs.python.org/3/library/pickle.html#object.__getnewargs__ + pub fn __getnewargs__<'py>(&self, py: Python<'py>) -> PyResult> { + let o = self.baml_serialize(py)?; + Ok(PyTuple::new_bound(py, vec![o])) + } + pub fn __repr__(&self) -> String { match &self.inner.content { baml_types::BamlMediaContent::Url(url) => { diff --git a/engine/language_client_python/src/types/lang_wrapper.rs b/engine/language_client_python/src/types/lang_wrapper.rs index 34142f029..90cd41f49 100644 --- a/engine/language_client_python/src/types/lang_wrapper.rs +++ b/engine/language_client_python/src/types/lang_wrapper.rs @@ -1,7 +1,7 @@ #[macro_export] macro_rules! lang_wrapper { ($name:ident, $type:ty, clone_safe $(, $attr_name:ident : $attr_type:ty = $default:expr)*) => { - #[pyo3::prelude::pyclass] + #[pyo3::prelude::pyclass(module = "baml_py.baml_py")] pub struct $name { pub(crate) inner: std::sync::Arc<$type>, $($attr_name: $attr_type),* @@ -18,7 +18,7 @@ macro_rules! lang_wrapper { }; ($name:ident, $type:ty, thread_safe $(, $attr_name:ident : $attr_type:ty)*) => { - #[pyo3::prelude::pyclass] + #[pyo3::prelude::pyclass(module = "baml_py.baml_py")] pub struct $name { pub(crate) inner: std::sync::Arc>, $($attr_name: $attr_type),* @@ -35,7 +35,7 @@ macro_rules! lang_wrapper { }; ($name:ident, $type:ty, sync_thread_safe $(, $attr_name:ident : $attr_type:ty)*) => { - #[pyo3::prelude::pyclass] + #[pyo3::prelude::pyclass(module = "baml_py.baml_py")] pub struct $name { pub(crate) inner: std::sync::Arc>, $($attr_name: $attr_type),* @@ -62,7 +62,7 @@ macro_rules! lang_wrapper { }; ($name:ident, $type:ty $(, $attr_name:ident : $attr_type:ty = $default:expr)*) => { - #[pyo3::prelude::pyclass] + #[pyo3::prelude::pyclass(module = "baml_py.baml_py")] pub struct $name { pub(crate) inner: $type, $($attr_name: $attr_type),* @@ -79,7 +79,7 @@ macro_rules! lang_wrapper { }; ($name:ident, $type:ty, no_from $(, $attr_name:ident : $attr_type:ty)*) => { - #[pyo3::prelude::pyclass] + #[pyo3::prelude::pyclass(module = "baml_py.baml_py")] pub struct $name { pub(crate) inner: $type, $($attr_name: $attr_type),* diff --git a/engine/language_client_python/src/types/media_repr.rs b/engine/language_client_python/src/types/media_repr.rs index ecab71382..5e07688ff 100644 --- a/engine/language_client_python/src/types/media_repr.rs +++ b/engine/language_client_python/src/types/media_repr.rs @@ -73,6 +73,9 @@ impl TryInto for &BamlMedia { /// can't implement this in internal_monkeypatch without adding a hard dependency /// on pydantic. And we don't want to do _that_, because that will make it harder /// to implement output_type python/vanilla in the future. +/// +/// See docs: +/// https://docs.pydantic.dev/latest/concepts/types/#customizing-validation-with-__get_pydantic_core_schema__ pub fn __get_pydantic_core_schema__( _cls: Bound<'_, PyType>, _source_type: Bound<'_, PyAny>, @@ -129,7 +132,7 @@ def get_schema(): ret = get_schema() "#; // py.run(code, None, Some(ret_dict)); - let fun: Py = PyModule::from_code_bound(py, code, "", "")? + let fun: Py = PyModule::from_code_bound(py, code, file!(), crate::MODULE_NAME)? .getattr("ret")? .into(); Ok(fun.to_object(py)) diff --git a/integ-tests/python/run_tests.sh b/integ-tests/python/run_tests.sh new file mode 100755 index 000000000..5289cc822 --- /dev/null +++ b/integ-tests/python/run_tests.sh @@ -0,0 +1,11 @@ +#!/bin/bash + +# Run tests for CI + +set -euxo pipefail + +env -u CONDA_PREFIX poetry run maturin develop --manifest-path ../../engine/language_client_python/Cargo.toml +poetry run baml-cli generate --from ../baml_src + +# test_functions.py is excluded because it requires credentials +poetry run pytest "$@" --ignore=tests/test_functions.py diff --git a/integ-tests/python/tests/test_python.py b/integ-tests/python/tests/test_python.py new file mode 100644 index 000000000..be3f505a0 --- /dev/null +++ b/integ-tests/python/tests/test_python.py @@ -0,0 +1,44 @@ +"""Test the compatibility of baml_py with the Python ecosystem.""" + +import baml_py +import inspect +import pickle +import pydantic +import pytest + + +def test_inspect(): + """Assert that baml_py is compatible with the inspect module. + + This is a regression test for a bug where `inspect.stack()` would implode if the + pyo3 code called `PyModule::from_code` without specifying the `file_name` arg (i.e. + without specifying the source file metadata for the inline Python snippet). + """ + + class LoremIpsum(pydantic.BaseModel): # pyright: ignore[reportUnusedClass] + """Defining this Pydantic model alone is sufficient to trigger the bug.""" + + my_image: baml_py.Image + my_audio: baml_py.Audio + + try: + inspect.stack() + except Exception as e: + pytest.fail(f"inspect.stack() raised an unexpected exception: {e}") + + +def test_pickle(): + i = baml_py.Image.from_url("https://example.com/image.png") + p = pickle.dumps(i) + assert i == pickle.loads(pickle.dumps(i)) + assert p == pickle.dumps(pickle.loads(p)) + + i2 = baml_py.Image.from_url("https://example.com/image.jpg") + p2 = pickle.dumps(i2) + assert i2 == pickle.loads(pickle.dumps(i2)) + assert p2 == pickle.dumps(pickle.loads(p2)) + + i3 = baml_py.Image.from_base64("image/png", "iVBORw0KGgoAAAANSUhEUgAAAAUA") + p3 = pickle.dumps(i3) + assert i3 == pickle.loads(pickle.dumps(i3)) + assert p3 == pickle.dumps(pickle.loads(p3))