Skip to content

Commit ec19e59

Browse files
author
Neil Williams
committed
Base for new dispatcher design
Merge in pipeline support Add YAML line number retrieval. Merge CompositeAction into Pipeline Change-Id: I75cfec232c83340adb77d3ca85cce32da2086846
1 parent 0ceadc3 commit ec19e59

23 files changed

+1431
-24
lines changed

ci-run

+1-1
Original file line numberDiff line numberDiff line change
@@ -4,4 +4,4 @@ set -e
44

55
pep8 --ignore E501 .
66

7-
python setup.py test
7+
TMPDIR=. python setup.py test

lava/dispatcher/commands.py

+55-22
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
import logging
44
import os
55
import sys
6+
import yaml
67

78
from json_schema_validator.errors import ValidationError
89
from lava.tool.command import Command
@@ -11,6 +12,7 @@
1112
import lava_dispatcher.config
1213
from lava_dispatcher.config import get_config, get_device_config, list_devices
1314
from lava_dispatcher.job import LavaTestJob, validate_job_data
15+
import lava_dispatcher.pipeline.parser
1416

1517

1618
class SetUserConfigDirAction(argparse.Action):
@@ -48,6 +50,43 @@ def invoke(self):
4850
print d
4951

5052

53+
def run_legacy_job(job_data, oob_file, config, output_dir, validate):
54+
55+
if os.getuid() != 0:
56+
logging.error("lava dispatch has to be run as root")
57+
exit(1)
58+
59+
json_job_data = json.dumps(job_data)
60+
job = LavaTestJob(json_job_data, oob_file, config, output_dir)
61+
62+
#FIXME Return status
63+
if validate:
64+
try:
65+
validate_job_data(job.job_data)
66+
except ValidationError as e:
67+
print e
68+
else:
69+
job.run()
70+
71+
72+
def get_pipeline_runner(job):
73+
def run_pipeline_job(job_data, oob_file, config, output_dir, validate_only):
74+
try:
75+
# FIXME use job_data (?)
76+
# FIXME use oob_file (!)
77+
# FIXME use config (!)
78+
# FIXME use output_dir (!)
79+
job.validate()
80+
if not validate_only:
81+
job.run()
82+
except lava_dispatcher.pipeline.JobError as e:
83+
print(e)
84+
sys.exit(2)
85+
86+
# TODO make use of the arguments passed in
87+
return run_pipeline_job
88+
89+
5190
class dispatch(DispatcherCommand):
5291
"""
5392
Run test scenarios on virtual and physical hardware
@@ -86,10 +125,6 @@ def invoke(self):
86125

87126
config = None
88127

89-
if os.getuid() != 0:
90-
logging.error("lava dispatch has to be run as root")
91-
exit(1)
92-
93128
if self.args.oob_fd:
94129
oob_file = os.fdopen(self.args.oob_fd, 'w')
95130
else:
@@ -128,38 +163,36 @@ def invoke(self):
128163
setproctitle("%s [job: %s]" % (
129164
getproctitle(), self.args.job_id))
130165

131-
# Load the scenario file
132-
with open(self.args.job_file) as stream:
133-
jobdata = stream.read()
134-
json_jobdata = json.loads(jobdata)
166+
# Load the job file
167+
job_runner, job_data = self.parse_job_file(self.args.job_file)
135168

136169
# detect multinode and start a NodeDispatcher to work with the LAVA Coordinator.
137170
if not self.args.validate:
138-
if 'target_group' in json_jobdata:
139-
node = NodeDispatcher(json_jobdata, oob_file, self.args.output_dir)
171+
if 'target_group' in job_data:
172+
node = NodeDispatcher(job_data, oob_file, self.args.output_dir)
140173
node.run()
141174
# the NodeDispatcher has started and closed.
142175
exit(0)
143176
if self.args.target is None:
144-
if 'target' not in json_jobdata:
177+
if 'target' not in job_data:
145178
logging.error("The job file does not specify a target device. "
146179
"You must specify one using the --target option.")
147180
exit(1)
148181
else:
149-
json_jobdata['target'] = self.args.target
150-
jobdata = json.dumps(json_jobdata)
182+
job_data['target'] = self.args.target
151183
if self.args.output_dir and not os.path.isdir(self.args.output_dir):
152184
os.makedirs(self.args.output_dir)
153-
job = LavaTestJob(jobdata, oob_file, config, self.args.output_dir)
154185

155-
#FIXME Return status
156-
if self.args.validate:
157-
try:
158-
validate_job_data(job.job_data)
159-
except ValidationError as e:
160-
print e
161-
else:
162-
job.run()
186+
job_runner(job_data, oob_file, config, self.args.output_dir, self.args.validate)
187+
188+
def parse_job_file(self, filename):
189+
if filename.lower().endswith('.yaml') or filename.lower().endswith('.yml'):
190+
parser = lava_dispatcher.pipeline.parser.JobParser()
191+
job = parser.parse(open(filename))
192+
return get_pipeline_runner(job), job.parameters
193+
194+
# everything else is assumed to be JSON
195+
return run_legacy_job, json.load(open(filename))
163196

164197

165198
class DeviceCommand(DispatcherCommand):

lava_dispatcher/pipeline/README.rst

+165
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,165 @@
1+
Requirements
2+
============
3+
4+
Functional:
5+
6+
- It should be possible to have multiple simultaneous connections to the
7+
device and run parallel tests on it. Example: one serial connection
8+
and one SSH connection.
9+
10+
- It should be possible to interact not only with "high-level" software
11+
such as bootloader and OS, but with bare metal as well.
12+
13+
- Support for different types of images:
14+
15+
- pre-built image
16+
- linaro hwpack + rootfs
17+
- kernel + ramdisk/rootfs
18+
- tftp
19+
- nfsroot
20+
21+
- Support for different bootloaders on same platform. Example: uboot,
22+
uefi, and second stage (grub) pipeline
23+
24+
- It should be possible to choose which device to boot from. This
25+
impacts both the deployment code and the boot code
26+
27+
- It must be possible to test advanced multi boot test cases with
28+
repetition - suspend, kexec, wake test cases. In special it is
29+
necessary to test wake, suspend, reboot, kexec etc.
30+
31+
- The dispatcher should be able to provide interactive support to
32+
low-level serial. For some new devices, remote bringup is often
33+
necessary because developers can't have a device on their desks. When
34+
necessary, interact with the scheduler to put board online/offline.
35+
36+
Non-functional:
37+
38+
- Speed. Avoid as much overhead as possible.
39+
40+
- Security. Should not require to be run as root. If necessary, let's
41+
have a separate helper program that can be setuid to do the stuff that
42+
actually needs root privileges.
43+
44+
- Simplicity.
45+
46+
- Having master image and test system on the same device makes several
47+
actions harder than they need to be. Master images must be booted
48+
from the network so that the actualy storage on the devices are left
49+
entirely to the test system. When possibel, deployment to the test
50+
system should be done by "just" dd'ing an image to the desired
51+
device.
52+
53+
- Avoid as much as possible running commands on the target. When
54+
it is possible to perform some operation in the dispatcher host,
55+
let's not perform it on the target.
56+
57+
Design
58+
======
59+
60+
The proposed design is based around the Pipes and Filters architectural
61+
pattern, which is reified for instance in the UNIX pipes system. The
62+
idea is to have every piece of funcionality as self-contained as
63+
possible, and to be able to compose them in sequence to achieve the
64+
desired high-level funcionality.
65+
66+
Main concepts in the design
67+
---------------------------
68+
69+
- *Device* represents the device under test.
70+
71+
- *Connection* is a data connection between the dispatcher host and the
72+
device under test. Examples of connections: serial connection, SSH
73+
connection, adb shell, etc.
74+
75+
- *Action* an action that has to be performed. A Action can be a
76+
shell commands run on the target, an operations run on
77+
the dispatcher host, or anything. Actions should be as constrained as
78+
possible so that all possible errors can be easily tracked. Where
79+
multiple operations are required, use an action which contains
80+
an internal pipeline and add the individual commands as actions
81+
within that pipeline.
82+
83+
Actions must be aggregated into a *Pipeline* - the top level object is
84+
always a pipeline. Pipelines can repeat actions and actions can include
85+
internal pipelines containing more actions. Actions have parameters which
86+
are set during the parsing of the YAML submission. Parameter data is
87+
static within each action and is used to validate the action before any
88+
pipeline is run. Dynamic data is set in the context which is available
89+
via the parent pipeline of any action. Actions must be idempotent and
90+
must raise a RuntimeError exception if the dynamic data is absent or
91+
unusable. Errors in parameter data must raise a JobError exception.
92+
Each command will receive a connection as an input parameter and can
93+
optionally provide a different connection to the command that
94+
comes after it. Usually, the first command in a pipeline will receive
95+
*None* as connection, and must provide a connection to the subsequent
96+
command.
97+
98+
See `Connection Management`_ below for other requirements that
99+
Actions must observe.
100+
101+
- *Image* represents the test system that needs to be deployed to the
102+
target.
103+
104+
Each command in a pipeline will be given a chance to insert data into
105+
the root filesystem of the image, before the pipeline starts to run.
106+
107+
- *Deployment* is a strategy to deploy a given image to a given device.
108+
Subclasses of deployment represent the different ways of deploying
109+
images to device, which depend on both the type of image and on the
110+
capabilities of the device.
111+
112+
- *Job*. A Job aggregates a *Device* representing the target device to
113+
be used, an *Image* to be deployed, and *Action* to be executed. The
114+
Action can be, and usually *will* be, a composite command composed
115+
of several subcommands.
116+
117+
The chosen deployment strategy will be chosen based on the image and
118+
the device.
119+
120+
Connection management
121+
---------------------
122+
123+
Connections to devices under test are often unreliable and have been a
124+
major source of problems in automation. This way, in the case where a
125+
connection failure (disconnection, serial corruption) during the
126+
execution of a command, that command will be re-tried. Because of this,
127+
every step performed by a command must be prepared to be idempotent,
128+
i.e. to do nothing in the case where it has been performed before, and
129+
more importantly, to not crash if has been performed before.
130+
131+
Exceptions
132+
----------
133+
134+
LAVA must be clear on what was the likely cause of an incomplete test
135+
job or a failed test result. Any one failure must trigger only one
136+
exception. e.g. A JobError which results in a RuntimeError is still
137+
a bug in the dispatcher code as it should have been caught during
138+
the validation step.
139+
140+
- *JobError*: An Error arising from the information supplied as part of
141+
the TestJob. e.g. HTTP404 on a file to be downloaded as part of the
142+
preparation of the TestJob or a download which results in a file
143+
which tar or gzip does not recognise. This exception is used when
144+
data supplied as the parameters to an Action causes that action
145+
to fail. Job errors should always be supported by a unit test.
146+
147+
- *InfrastructureError: Exceptions based on an error raised by a component
148+
of the test which is neither the LAVA dispatcher code nor the
149+
code being executed on the device under test. This includes
150+
errors arising from the device (like the arndale SD controller
151+
issue) and errors arising from the hardware to which the device
152+
is connected (serial console connection, ethernet switches or
153+
internet connection beyond the control of the device under test).
154+
Actions are required to include code to check for likely
155+
infrastructure errors so that pipelines can retry or fail the
156+
test, recording whether a retry fixed the infrastructure error.
157+
158+
- *TestError*: exceptions raised when the device under test did not
159+
behave as expected.
160+
161+
- *RuntimeError*: Exceptions arising from dynamic data prepared by
162+
LAVA Dispatcher and failures of Actions not already handled by
163+
the code. Runtime errors are bugs in lava-dispatcher code. (It is
164+
also a bug to use the wrong exception type). Fixes for runtime
165+
error bugs should always include a unit test.

lava_dispatcher/pipeline/__init__.py

+89
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,89 @@
1+
import sys
2+
from collections import OrderedDict
3+
from lava_dispatcher.pipeline.action import *
4+
from lava_dispatcher.pipeline.serial import *
5+
from lava_dispatcher.pipeline.ssh import *
6+
from lava_dispatcher.pipeline.shell import *
7+
from lava_dispatcher.pipeline.run import *
8+
9+
from lava_dispatcher.pipeline.job_actions.boot import *
10+
from lava_dispatcher.pipeline.job_actions.commands import *
11+
from lava_dispatcher.pipeline.job_actions.deploy import *
12+
from lava_dispatcher.pipeline.job_actions.logic import *
13+
from lava_dispatcher.pipeline.job_actions.submit import *
14+
from lava_dispatcher.pipeline.job_actions.test import *
15+
16+
17+
class Pipeline(object):
18+
19+
def __init__(self, parent=None):
20+
self.children = {}
21+
self.actions = []
22+
self.summary = "pipeline"
23+
self.parent = None
24+
self.branch_level = 1 # the level of the last added child
25+
if not parent:
26+
self.children = {self: self.actions}
27+
elif not parent.level:
28+
raise RuntimeError("Tried to create a pipeline with an invalid parent.")
29+
else:
30+
# parent must be an Action
31+
if type(parent) != Action:
32+
raise RuntimeError("Internal pipelines need an Action as a parent")
33+
self.parent = parent
34+
self.branch_level = parent.level
35+
36+
def add_action(self, action):
37+
38+
if not action or not issubclass(type(action), Action):
39+
raise RuntimeError("Only actions can be added to a pipeline: %s" % action)
40+
if not action:
41+
raise RuntimeError("Unable to add empty action to pipeline")
42+
self.actions.append(action)
43+
action.level = "%s.%s" % (self.branch_level, len(self.actions))
44+
if self.parent:
45+
self.children.update({self: self.actions})
46+
self.parent.pipeline = self
47+
else:
48+
action.level = "%s" % (len(self.actions))
49+
50+
def _describe(self, structure):
51+
for action in self.actions:
52+
structure[action.level] = {
53+
'description': action.description,
54+
'summary': action.summary,
55+
'content': action.explode()
56+
}
57+
if not action.pipeline:
58+
continue
59+
action.pipeline._describe(structure)
60+
61+
def describe(self):
62+
"""
63+
Describe the current pipeline, recursing through any
64+
internal pipelines.
65+
:return: JSON string of the structure
66+
"""
67+
structure = OrderedDict()
68+
self._describe(structure)
69+
return structure
70+
71+
@property
72+
def errors(self):
73+
sub_action_errors = [a.errors for a in self.actions]
74+
return reduce(lambda a, b: a + b, sub_action_errors)
75+
76+
def run_actions(self, connection, args=None):
77+
for action in self.actions:
78+
new_connection = action.run(connection, args)
79+
if new_connection:
80+
connection = new_connection
81+
return connection
82+
83+
def prepare_actions(self):
84+
for action in self.actions:
85+
action.prepare()
86+
87+
def post_process_actions(self):
88+
for action in self.actions:
89+
action.post_process()

0 commit comments

Comments
 (0)