Skip to content

Commit

Permalink
src/patchset.py: Implement Patchset service
Browse files Browse the repository at this point in the history
Patchset service process patchset nodes:
- Wait for parent checkout node to be available
- Download checkout node tarball
- Apply patches and calculate patchset hash
- Upload new tarball

Signed-off-by: Nikolay Yurin <[email protected]>
  • Loading branch information
yurinnick committed Oct 21, 2023
1 parent 3711ed6 commit f4a6ebe
Show file tree
Hide file tree
Showing 4 changed files with 297 additions and 19 deletions.
5 changes: 5 additions & 0 deletions config/kernelci.toml
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,11 @@ kdir = "/home/kernelci/data/src/linux"
output = "/home/kernelci/data/output"
storage_config = "docker-host"

[patchset]
kdir = "/home/kernelci/data/src/linux-patchset"
output = "/home/kernelci/data/output"
storage_config = "docker-host"

[scheduler]
output = "/home/kernelci/output"

Expand Down
14 changes: 14 additions & 0 deletions docker-compose.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -149,3 +149,17 @@ services:
- '--settings=${KCI_SETTINGS:-/home/kernelci/config/kernelci.toml}'
- 'run'
- '--mode=holdoff'

patchset:
<<: *base-service
container_name: 'kernelci-pipeline-patchset'
command:
- './pipeline/patchset.py'
- '--settings=${KCI_SETTINGS:-/home/kernelci/config/kernelci.toml}'
- 'run'
volumes:
- './src:/home/kernelci/pipeline'
- './config:/home/kernelci/config'
- './data/ssh:/home/kernelci/data/ssh'
- './data/src:/home/kernelci/data/src'
- './data/output:/home/kernelci/data/output'
250 changes: 250 additions & 0 deletions src/patchset.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,250 @@
#!/usr/bin/env python3
#
# SPDX-License-Identifier: LGPL-2.1-or-later
#
# Copyright (C) 2022 Collabora Limited
# Author: Nikolay Yurin <[email protected]>

import os
import sys
import json
import requests
import time
import tempfile
import hashlib
from datetime import datetime, timedelta
from urllib.parse import urlparse
from urllib.request import urlopen

import kernelci
import kernelci.build
import kernelci.config
from kernelci.legacy.cli import Args, Command, parse_opts
import kernelci.storage

from tarball import Tarball

# FIXME: make patchset service configuration option
ALLOWED_DOMAINS = {"patchwork.kernel.org"}
PATCHSET_SHORT_HASH_LEN = 13


class Patchset(Tarball):
TAR_CREATE_CMD = """\
set -e
cd {target_dir}/
tar --create --transform "s/^/{prefix}\\//" * | gzip > {tarball_path}
"""

APPLY_PATCH_SHELL_CMD = """\
set -e
cd {kdir}
patch -p1 < {patch_file}
"""

# FIXME: I really don't have a good idea what I'm doing here
# This code probably needs rework and put into kernelci.patch
def _hash_patch(self, patch_file):
allowed_prefixes = {
"-", # This convers both removed lines and source file
"+", # This convers both added lines and target file
# "@" I don't know how we should handle hunks yet
}
norm_patch_lines = []
for line in patch_file.readlines():
if not line:
continue

if line[0] in allowed_prefixes:
norm_patch_lines.append(line)

norm_patch_str = "/n".join(norm_patch_lines)
return hashlib.sha256(norm_patch_str.encode("utf-8")).hexdigest()

# FIXME: move into kernelci.patch
def _apply_patch(self, kdir, patch_url):
encoding = urlopen(patch_url).headers.get_charsets()[0]
with tempfile.NamedTemporaryFile(
prefix="kernel-patch-",
encoding=encoding
) as tmp_f:
if not kernelci.build._download_file(patch_url, tmp_f.name):
raise FileNotFoundError(
f"Error downloading patch from {patch_url}"
)

kernelci.shell_cmd(self.APPLY_PATCH_SHELL_CMD.format(
kdir=kdir,
patch_file=tmp_f.name,
))

return self._hash_patch(tmp_f)

# FIXME: move into kernelci.patch
def _apply_patches(self, kdir, patch_artifacts):
patchset_hash = hashlib.sha256()
for patch_name, patch_url in patch_artifacts.items():
self.log.info(
f"Applying patch {patch_name}, url: {patch_url}"
)
patch_hash = self._apply_patch(kdir, patch_url)
patchset_hash.update(patch_hash.encode("utf-8"))

return patchset_hash.hexdigest()

def _download_checkout_archive(self, tarball_url, retries=3):
self.log.info(f"Downloading checkout tarball, url: {tarball_url}")
urlpath = urlparse(tarball_url).path
tar_filename = os.path.basename(urlpath)
kernelci.build.pull_tarball(
kdir=self._kdir,
url=tarball_url,
dest_filename=tar_filename,
retries=retries,
delete=True
)

def _update_node(
self,
patchset_node,
revision,
tarball_url,
):
node = patchset_node.copy()
node.update({
'revision': revision,
'state': 'available',
'artifacts': {
'tarball': tarball_url,
},
'holdoff': str(datetime.utcnow() + timedelta(minutes=10))
})
try:
self._api.update_node(node)
except requests.exceptions.HTTPError as err:
err_msg = json.loads(err.response.content).get("detail", [])
self.log.error(err_msg)

def _setup(self, *args):
return self._api_helper.subscribe_filters({
'op': 'created',
'name': 'patchset',
'state': 'running',
})

def _has_approved_domain(self, url):
return urlparse(url).hostname in ALLOWED_DOMAINS

def _process_patchset(self, checkout_node, patchset_node):
build_config = self._find_build_config(checkout_node)
if not build_config:
raise RuntimeError(
"No build config found for checkout node %s",
checkout_node['id']
)

patch_artifacts = patchset_node.get("artifacts")
if not patch_artifacts:
raise ValueError(
f"No patch artifacts available for node {patchset_node['id']}"
)

if not all(
self._has_approved_domain(patch_mbox_url)
for patch_mbox_url in patch_artifacts.values()
):
raise RuntimeError("Forbidden domain")

patchset_build_config = self._find_build_config(patchset_node)
if build_config != patchset_build_config:
raise ValueError(
f"Patchsets node {patchset_node['id']} build config "
f"doesn't match to the parent node {checkout_node['id']}"
)

self._download_checkout_archive(checkout_node["artifacts"]["tarball"])

chekout_name = '-'.join([
'linux',
checkout_node["revision"]["tree"],
checkout_node["revision"]["branch"],
checkout_node["revision"]["describe"],
])
chekout_path = os.path.join(self._kdir, chekout_name)
patchset_hash = self._apply_patches(chekout_path, patch_artifacts)
patchset_hash_short = patchset_hash[:PATCHSET_SHORT_HASH_LEN]
tarball_path = self._make_tarball(
target_dir=chekout_path,
tarball_name=f"{chekout_name}-{patchset_hash_short}"
)
tarball_url = self._push_tarball(tarball_path)

patchset_revision = checkout_node["revision"].copy()
patchset_revision['patchset'] = patchset_hash

self._update_node(
patchset_node,
patchset_revision,
tarball_url
)

def _mark_failed(self, patchset_node):
node = patchset_node.copy()
node.update({
'state': 'done',
'result': 'fail',
})
try:
self._api.update_node(node)
except requests.exceptions.HTTPError as err:
err_msg = json.loads(err.response.content).get("detail", [])
self.log.error(err_msg)

def _run(self, sub_id):
self.log.info("Listening for new trigger events")
self.log.info("Press Ctrl-C to stop.")

while True:
patchset_nodes = self._api.get_nodes({
"name": "patchset",
"state": "running",
})

for patchset_node in patchset_nodes:
if not patchset_node["parent"]:
continue

checkout_node = self._api.get_node(patchset_node["parent"])
if checkout_node["state"] == 'running':
continue

try:
self._process_patchset(checkout_node, patchset_node)
except Exception as e:
self._mark_failed(patchset_node)
self.log.error(e)

time.sleep(10)


class cmd_run(Command):
help = (
"Wait for a checkout node to be available "
"and push a source+patchset tarball"
)
args = [
Args.kdir, Args.output, Args.api_config, Args.storage_config,
]
opt_args = [
Args.verbose, Args.storage_cred,
]

def __call__(self, configs, args):
return Patchset(configs, args).run(args)


if __name__ == '__main__':
opts = parse_opts('patchset', globals())
configs = kernelci.config.load('config/pipeline.yaml')
status = opts.command(configs, opts)
sys.exit(0 if status is True else 1)
47 changes: 28 additions & 19 deletions src/tarball.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,12 @@
# Copyright (C) 2022 Collabora Limited
# Author: Guillaume Tucker <[email protected]>
# Author: Jeny Sadadia <[email protected]>
# Author: Nikolay Yurin <[email protected]>

from datetime import datetime, timedelta
import logging
import os
import re
import sys
import urllib.parse
import json
import requests

Expand All @@ -32,6 +31,11 @@


class Tarball(Service):
TAR_CREATE_CMD = """\
set -e
cd {target_dir}
git archive --format=tar --prefix={prefix}/ HEAD | gzip > {tarball_path}
"""

def __init__(self, configs, args):
super().__init__(configs, args, 'tarball')
Expand All @@ -50,7 +54,7 @@ def _find_build_config(self, node):
revision = node['revision']
tree = revision['tree']
branch = revision['branch']
for name, config in self._build_configs.items():
for config in self._build_configs.values():
if config.tree.name == tree and config.branch == branch:
return config

Expand All @@ -59,25 +63,22 @@ def _update_repo(self, config):
kernelci.build.update_repo(config, self._kdir)
self.log.info("Repo updated")

def _make_tarball(self, config, describe):
name = '-'.join(['linux', config.tree.name, config.branch, describe])
tarball = f"{name}.tar.gz"
self.log.info(f"Making tarball {tarball}")
output_path = os.path.relpath(self._output, self._kdir)
cmd = """\
set -e
cd {kdir}
git archive --format=tar --prefix={name}/ HEAD | gzip > {output}/{tarball}
""".format(kdir=self._kdir, name=name, output=output_path, tarball=tarball)
def _make_tarball(self, target_dir, tarball_name):
self.log.info(f"Making tarball {tarball_name}")
tarball_path = os.path.join(self._output, f"{tarball_name}.tar.gz")
cmd = self.TAR_CREATE_CMD.format(
target_dir=target_dir,
prefix=tarball_name,
tarball_path=tarball_path
)
self.log.info(cmd)
kernelci.shell_cmd(cmd)
self.log.info("Tarball created")
return tarball
return tarball_path

def _push_tarball(self, config, describe):
tarball_name = self._make_tarball(config, describe)
tarball_path = os.path.join(self._output, tarball_name)
self.log.info(f"Uploading {tarball_path}")
def _push_tarball(self, tarball_path):
tarball_name = os.path.basename(tarball_path)
self.log.info(f"Uploading {tarball_name}")
tarball_url = self._storage.upload_single((tarball_path, tarball_name))
self.log.info(f"Upload complete: {tarball_url}")
os.unlink(tarball_path)
Expand Down Expand Up @@ -134,11 +135,19 @@ def _run(self, sub_id):
continue

self._update_repo(build_config)

describe = kernelci.build.git_describe(
build_config.tree.name, self._kdir
)
version = self._get_version_from_describe()
tarball_url = self._push_tarball(build_config, describe)
tarball_name = '-'.join([
'linux',
build_config.tree.name,
build_config.branch,
describe
])
tarball_path = self._make_tarball(self._kdir, tarball_name)
tarball_url = self._push_tarball(tarball_path)
self._update_node(checkout_node, describe, version, tarball_url)

return True
Expand Down

0 comments on commit f4a6ebe

Please sign in to comment.