Skip to content

Commit aa0ccbe

Browse files
committed
[internal] Add debug output for intermittent abort in poll_workunits. (pantsbuild#14253)
Adds debug output to attempt to catch what we thought was a panic being converted into an abort, but which might just be an all-natural abort from another source. See PyO3/pyo3#2102 for some discussion of the relevant problem. Beyond just being debug output, this patch actually seems to avoid the issue for the user who was experiencing it. [ci skip-build-wheels]
1 parent e287aac commit aa0ccbe

File tree

1 file changed

+44
-28
lines changed

1 file changed

+44
-28
lines changed

src/rust/engine/src/externs/interface.rs

+44-28
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ use pyo3::prelude::{
3636
PyResult as PyO3Result, Python, ToPyObject,
3737
};
3838
use pyo3::types::{PyBytes, PyDict, PyList, PyTuple, PyType};
39-
use pyo3::{create_exception, IntoPy, PyAny};
39+
use pyo3::{create_exception, IntoPy, PyAny, PyRef};
4040
use regex::Regex;
4141
use rule_graph::{self, RuleGraph};
4242
use task_executor::Executor;
@@ -843,36 +843,52 @@ async fn workunits_to_py_tuple_value(
843843

844844
#[pyfunction]
845845
fn session_poll_workunits(
846-
py: Python,
847-
py_scheduler: &PyScheduler,
848-
py_session: &PySession,
846+
py_scheduler: PyObject,
847+
py_session: PyObject,
849848
max_log_verbosity_level: u64,
850849
) -> PyO3Result<PyObject> {
851-
let py_level: PythonLogLevel = max_log_verbosity_level
852-
.try_into()
853-
.map_err(|e| PyException::new_err(format!("{}", e)))?;
854-
let core = &py_scheduler.0.core;
855-
core.executor.enter(|| {
856-
let (started, completed) = py.allow_threads(|| {
857-
py_session
858-
.0
859-
.workunit_store()
860-
.latest_workunits(py_level.into())
861-
});
850+
// TODO: Black magic. PyObject is not marked UnwindSafe, and contains an UnsafeCell. Since PyO3
851+
// only allows us to receive `pyfunction` arguments as `PyObject` (or references under a held
852+
// GIL), we cannot do what it does to use `catch_unwind` which would be interacting with
853+
// `catch_unwind` while the object is still a raw pointer, and unchecked.
854+
//
855+
// Instead, we wrap the call, and assert that it is safe. It really might not be though. So this
856+
// code should only live long enough to shake out the current issue, and an upstream issue with
857+
// PyO3 will be the long term solution.
858+
//
859+
// see https://github.com/PyO3/pyo3/issues/2102 for more info.
860+
let py_scheduler = std::panic::AssertUnwindSafe(py_scheduler);
861+
let py_session = std::panic::AssertUnwindSafe(py_session);
862+
std::panic::catch_unwind(|| {
863+
let (core, session, py_level) = {
864+
let gil = Python::acquire_gil();
865+
let py = gil.python();
866+
867+
let py_scheduler = py_scheduler.extract::<PyRef<PyScheduler>>(py)?;
868+
let py_session = py_session.extract::<PyRef<PySession>>(py)?;
869+
let py_level: PythonLogLevel = max_log_verbosity_level
870+
.try_into()
871+
.map_err(|e| PyException::new_err(format!("{}", e)))?;
872+
(py_scheduler.0.core.clone(), py_session.0.clone(), py_level)
873+
};
874+
core.executor.enter(|| {
875+
let (started, completed) = session.workunit_store().latest_workunits(py_level.into());
862876

863-
let started_val = core.executor.block_on(workunits_to_py_tuple_value(
864-
py,
865-
started,
866-
core,
867-
&py_session.0,
868-
))?;
869-
let completed_val = core.executor.block_on(workunits_to_py_tuple_value(
870-
py,
871-
completed,
872-
core,
873-
&py_session.0,
874-
))?;
875-
Ok(externs::store_tuple(py, vec![started_val, completed_val]).into())
877+
let gil = Python::acquire_gil();
878+
let py = gil.python();
879+
880+
let started_val = core
881+
.executor
882+
.block_on(workunits_to_py_tuple_value(py, started, &core, &session))?;
883+
let completed_val = core
884+
.executor
885+
.block_on(workunits_to_py_tuple_value(py, completed, &core, &session))?;
886+
Ok(externs::store_tuple(py, vec![started_val, completed_val]).into())
887+
})
888+
})
889+
.unwrap_or_else(|e| {
890+
log::warn!("Panic in `session_poll_workunits`: {:?}", e);
891+
std::panic::resume_unwind(e);
876892
})
877893
}
878894

0 commit comments

Comments
 (0)