Skip to content
Merged
7 changes: 7 additions & 0 deletions doc/man1/flux-jobs.rst
Original file line number Diff line number Diff line change
Expand Up @@ -453,6 +453,13 @@ the state of the job or other context:
Returns the job runtime for jobs in RUN state or later, otherwise the
job duration (if set) is returned.

**inactive_reason**
If the job is inactive, returns the reason that the job is no
longer active. Generally speaking, will output "Exit", "Timeout",
"Canceled", or signal. If available, other contextual information
will also be provided such as the exit ``returncode`` or
cancellation message.

CONFIGURATION
=============

Expand Down
1 change: 1 addition & 0 deletions src/bindings/python/flux/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ nobase_fluxpy_PYTHON = \
message.py \
constants.py \
util.py \
compat36.py \
future.py \
memoized_property.py \
debugged.py \
Expand Down
79 changes: 79 additions & 0 deletions src/bindings/python/flux/compat36.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
###############################################################
# Copyright 2023 Lawrence Livermore National Security, LLC
# (c.f. AUTHORS, NOTICE.LLNS, COPYING)
#
# This file is part of the Flux resource manager framework.
# For details, see https://github.com/flux-framework.
#
# SPDX-License-Identifier: LGPL-3.0
###############################################################

import signal


# strsignal() is only available on Python 3.8 and up
def strsignal(signum):
if signum == signal.SIGHUP:
return "Hangup"
elif signum == signal.SIGINT:
return "Interrupt"
elif signum == signal.SIGQUIT:
return "Quit"
elif signum == signal.SIGILL:
return "Illegal instruction"
elif signum == signal.SIGTRAP:
return "Trace/breakpoint trap"
elif signum == signal.SIGABRT or signum == signal.SIGIOT:
return "Aborted"
elif signum == signal.SIGBUS:
return "Bus error"
elif signum == signal.SIGFPE:
return "Floating point exception"
elif signum == signal.SIGKILL:
return "Killed"
elif signum == signal.SIGUSR1:
return "User defined signal 1"
elif signum == signal.SIGSEGV:
return "Segmentation Fault"
elif signum == signal.SIGUSR2:
return "User defined signal 2"
elif signum == signal.SIGPIPE:
return "Broken pipe"
elif signum == signal.SIGALRM:
return "Alarm clock"
elif signum == signal.SIGTERM:
return "Terminated"
# N.B. signal.SIGSTKFLT not defined until Python 3.11
elif "SIGSTKFLT" in dir(signal) and signum == signal.SIGSTKFLT: # novermin
return "Stack fault"
elif signum == signal.SIGCHLD:
return "Child exited"
elif signum == signal.SIGCONT:
return "Continued"
elif signum == signal.SIGSTOP:
return "Stopped (signal)"
elif signum == signal.SIGTSTP:
return "Stopped"
elif signum == signal.SIGTTIN:
return "Stopped (tty input)"
elif signum == signal.SIGTTOU:
return "Stopped (tty output)"
elif signum == signal.SIGURG:
return "Urgent I/O condition"
elif signum == signal.SIGXCPU:
return "CPU time limit exceeded"
elif signum == signal.SIGXFSZ:
return "File size limit exceeded"
elif signum == signal.SIGVTALRM:
return "Virtual timer expired"
elif signum == signal.SIGPROF:
return "Profiling timer expired"
elif signum == signal.SIGWINCH:
return "Window changed"
elif signum == signal.SIGIO or signum == signal.SIGPOLL:
return "I/O possible"
elif signum == signal.SIGPWR:
return "Power failure"
elif signum == signal.SIGSYS:
return "Bad system call"
raise ValueError
66 changes: 66 additions & 0 deletions src/bindings/python/flux/job/info.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,13 @@
except ImportError:
SchedResourceList = None

# strsignal() is only available in Python 3.8 and up.
# flux-core's minimum is 3.6. Use compat library if not available.
try:
from signal import strsignal # novermin
except ImportError:
from flux.compat36 import strsignal


def statetostr(stateid, fmt="L"):
return raw.flux_job_statetostr(stateid, fmt).decode("utf-8")
Expand Down Expand Up @@ -541,6 +548,55 @@ def zero_remove(key):

return result

@memoized_property
def inactive_reason(self):
"""
Generate contextual exit reason based on how the job ended
"""
state = str(self.state)
if state != "INACTIVE":
return ""
result = str(self.result)
if result == "CANCELED":
if (
self.exception.occurred
and self.exception.type == "cancel"
and self.exception.note
):
return f"Canceled: {self.exception.note}"
else:
return "Canceled"
elif result == "FAILED":
# exception.type == "exec" is special case, handled by returncode
if (
self.exception.occurred
and self.exception.type != "exec"
and self.exception.severity == 0
):
note = None
if self.exception.note:
note = f" note={self.exception.note}"
return f'Exception: type={self.exception.type}{note or ""}'
elif self.returncode > 128:
signum = self.returncode - 128
try:
sigdesc = strsignal(signum)
except ValueError:
sigdesc = f"Signaled {signum}"
return sigdesc
elif self.returncode == 126:
return "Command invoked cannot execute"
elif self.returncode == 127:
return "command not found"
elif self.returncode == 128:
return "Invalid argument to exit"
else:
return f"Exit {self.returncode}"
elif result == "TIMEOUT":
return "Timeout"
else:
return f"Exit {self.returncode}"


def job_fields_to_attrs(fields):
# Note there is no attr for "id", it is always returned
Expand Down Expand Up @@ -593,6 +649,15 @@ def job_fields_to_attrs(fields):
"dependencies": ("dependencies",),
"contextual_info": ("state", "dependencies", "annotations", "nodelist"),
"contextual_time": ("state", "t_run", "t_cleanup", "duration"),
"inactive_reason": (
"state",
"result",
"waitstatus",
"exception_occurred",
"exception_severity",
"exception_type",
"exception_note",
),
# Special cases, pointers to sub-dicts in annotations
"sched": ("annotations",),
"user": ("annotations",),
Expand Down Expand Up @@ -676,6 +741,7 @@ class JobInfoFormat(flux.util.OutputFormat):
"dependencies": "DEPENDENCIES",
"contextual_info": "INFO",
"contextual_time": "TIME",
"inactive_reason": "INACTIVE-REASON",
# The following are special pre-defined cases per RFC27
"annotations.sched.t_estimate": "T_ESTIMATE",
"annotations.sched.reason_pending": "REASON",
Expand Down
11 changes: 8 additions & 3 deletions src/bindings/python/flux/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -360,6 +360,12 @@ def fsd(secs):
return strtmp


def empty_outputs():
localepoch = datetime.fromtimestamp(0.0).strftime("%FT%T")
empty = ("", "0s", "0.0", "0:00:00", "1970-01-01T00:00:00", localepoch)
return empty


class UtilFormatter(Formatter):
# pylint: disable=too-many-branches

Expand Down Expand Up @@ -439,8 +445,7 @@ def format_field(self, value, spec):
# must be deferred to the UtilDatetetime format() method, since
# that method will be called after this one:
if spec.endswith("h") and not isinstance(value, UtilDatetime):
localepoch = datetime.fromtimestamp(0.0).strftime("%FT%T")
basecases = ("", "0s", "0.0", "0:00:00", "1970-01-01T00:00:00", localepoch)
basecases = empty_outputs()
value = "-" if str(value) in basecases else str(value)
spec = spec[:-1] + "s"
retval = super().format_field(value, spec)
Expand Down Expand Up @@ -639,7 +644,7 @@ def filter_empty(self, items):

# Iterate over all items, rebuilding lst each time to contain
# only those fields that resulted in non-"empty" strings:
empty = ("", "0", "0s", "0.0", "0:00:00", "1970-01-01T00:00:00")
empty = empty_outputs()
for item in items:
lst = [x for x in lst if formatter.format(x["fmt"], item) in empty]

Expand Down
7 changes: 7 additions & 0 deletions src/cmd/flux-jobs.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,13 @@ class FluxJobsConfig(UtilConfig):
"{priority:<12} {state:<8.8} {dependencies}"
),
},
"endreason": {
"description": "Show why each job ended",
"format": (
"{id.f58:>12} ?:{queue:<8.8} {username:<8.8} {name:<10.10+} "
"{status_abbrev:>2.2} {t_inactive!d:%b%d %R::>12h} {inactive_reason}"
),
},
}

def __init__(self):
Expand Down
1 change: 1 addition & 0 deletions t/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -266,6 +266,7 @@ TESTSCRIPTS = \
python/t0025-uri.py \
python/t0026-tree.py \
python/t0027-constraint-parser.py \
python/t0028-compat36.py \
python/t1000-service-add-remove.py

if HAVE_FLUX_SECURITY
Expand Down
49 changes: 49 additions & 0 deletions t/python/t0028-compat36.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
#!/usr/bin/env python3

###############################################################
# Copyright 2021 Lawrence Livermore National Security, LLC
# (c.f. AUTHORS, NOTICE.LLNS, COPYING)
#
# This file is part of the Flux resource manager framework.
# For details, see https://github.com/flux-framework.
#
# SPDX-License-Identifier: LGPL-3.0
###############################################################

import unittest
import subflux # To set up PYTHONPATH
from pycotap import TAPTestRunner
import signal
import flux.compat36


class TestCompat36(unittest.TestCase):
def test_strsignal(self):
# Cover getting all signals strings, sanity check
# values of most common ones.
# N.B. SIGSTKFLT not defined until Python 3.11, will get ValueError
for i in range(signal.SIGHUP, signal.SIGIO):
try:
desc = flux.compat36.strsignal(i)
except ValueError:
pass

self.assertEqual(flux.compat36.strsignal(signal.SIGHUP), "Hangup")
self.assertEqual(flux.compat36.strsignal(signal.SIGINT), "Interrupt")
self.assertEqual(flux.compat36.strsignal(signal.SIGKILL), "Killed")
self.assertEqual(flux.compat36.strsignal(signal.SIGSEGV), "Segmentation Fault")
self.assertEqual(flux.compat36.strsignal(signal.SIGTERM), "Terminated")

def test_strsignal_invalid(self):
gotvalueerror = False

try:
str = flux.compat36.strsignal(0)
except ValueError:
gotvalueerror = True

self.assertIs(gotvalueerror, True)


if __name__ == "__main__":
unittest.main(testRunner=TAPTestRunner())
Loading