Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ require (
github.com/opencontainers/selinux v1.9.1
github.com/ostreedev/ostree-go v0.0.0-20190702140239-759a8c1ac913
github.com/pkg/errors v0.9.1
github.com/satori/go.uuid v1.2.0
github.com/sirupsen/logrus v1.8.1
github.com/stretchr/testify v1.7.0
github.com/ulikunitz/xz v0.5.10
Expand Down
1 change: 1 addition & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -603,6 +603,7 @@ github.com/rogpeppe/fastuuid v1.2.0/go.mod h1:jVj6XXZzXRy/MSR5jhDC/2q6DgLz+nrA6L
github.com/rogpeppe/go-internal v1.3.0/go.mod h1:M8bDsm7K2OlrFYOpmOWEs/qY81heoFRclV5y23lUDJ4=
github.com/russross/blackfriday/v2 v2.0.1/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM=
github.com/safchain/ethtool v0.0.0-20190326074333-42ed695e3de8/go.mod h1:Z0q5wiBQGYcxhMZ6gUqHn6pYNLypFAvaL3UvgZLR0U4=
github.com/satori/go.uuid v1.2.0 h1:0uYX9dsZ2yD7q2RtLRtPSdGDWzjeM3TbMJP9utgA0ww=
github.com/satori/go.uuid v1.2.0/go.mod h1:dA0hQrYB0VpLJoorglMZABFdXlWrHn1NEOzdhQKdks0=
github.com/seccomp/libseccomp-golang v0.9.1/go.mod h1:GbW5+tmTXfcxTToHLXlScSlAvWlF4P2Ca7zGrPiEpWo=
github.com/shurcooL/sanitized_anchor_name v1.0.0/go.mod h1:1NzhyTcUVG4SuEtjjoZeVRXNmyL/1OwPU0+IJeTBvfc=
Expand Down
238 changes: 238 additions & 0 deletions sif/internal/sif_util.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,238 @@
// +build linux

package internal

import (
"bufio"
"bytes"
"fmt"
"io"
"io/ioutil"
"os"
"os/exec"
"path/filepath"
"strings"

"github.com/containers/image/v5/sif/sif"
"github.com/pkg/errors"

imgspecv1 "github.com/opencontainers/image-spec/specs-go/v1"
)

type SifImage struct {
fimg sif.FileImage
rootfs *sif.Descriptor
deffile *sif.Descriptor
defReader *io.SectionReader
cmdlist []string
runscript *bytes.Buffer
env *sif.Descriptor
envReader *io.SectionReader
envlist []string
}

func LoadSIFImage(path string) (image SifImage, err error) {
// open up the SIF file and get its header
image.fimg, err = sif.LoadContainer(path, true)
if err != nil {
return
}

// check for a system partition and save it
image.rootfs, _, err = image.fimg.GetPartPrimSys()
if err != nil {
return SifImage{}, errors.Wrap(err, "looking up rootfs from SIF file")
}

// look for a definition file object
searchDesc := sif.Descriptor{Datatype: sif.DataDeffile}
resultDescs, _, err := image.fimg.GetFromDescr(searchDesc)
if err == nil && resultDescs != nil {
// we assume in practice that typical SIF files don't hold multiple deffiles
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should there be a warning, or at least a debug log, in that case?

image.deffile = resultDescs[0]
image.defReader = io.NewSectionReader(image.fimg.Fp, image.deffile.Fileoff, image.deffile.Filelen)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

defReader is only used in generateConfig immediately below, so maybe it can just be a local variable.

}
if err = image.generateConfig(); err != nil {
return SifImage{}, err
}

// look for an environment variable set object
searchDesc = sif.Descriptor{Datatype: sif.DataEnvVar}
resultDescs, _, err = image.fimg.GetFromDescr(searchDesc)
if err == nil && resultDescs != nil {
// we assume in practice that typical SIF files don't hold multiple EnvVar sets
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should there be a warning, or at least a debug log, in that case?

image.env = resultDescs[0]
image.envReader = io.NewSectionReader(image.fimg.Fp, image.env.Fileoff, image.env.Filelen)
}
Comment on lines +59 to +66
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I can’t see any use of .env or .envReader. What does this code do?


return image, nil
}

func (image *SifImage) parseEnvironment(scanner *bufio.Scanner) error {
for scanner.Scan() {
s := strings.TrimSpace(scanner.Text())
if s == "" || strings.HasPrefix(s, "#") {
continue
}
if strings.HasPrefix(s, "%") {
return nil
}
image.envlist = append(image.envlist, s)
}
if err := scanner.Err(); err != nil {
return errors.Wrap(err, "parsing environment from SIF definition file object")
}
return nil
}

func (image *SifImage) parseRunscript(scanner *bufio.Scanner) error {
for scanner.Scan() {
s := strings.TrimSpace(scanner.Text())
if strings.HasPrefix(s, "%") {
return nil
}
image.cmdlist = append(image.cmdlist, s)
}
if err := scanner.Err(); err != nil {
return errors.Wrap(err, "parsing runscript from SIF definition file object")
}
return nil
}

func (image *SifImage) generateRunscript() error {
base := `#!/bin/bash
`
image.runscript = bytes.NewBufferString(base)
for _, s := range image.envlist {
_, err := image.runscript.WriteString(fmt.Sprintln(s))
if err != nil {
return errors.Wrap(err, "writing to runscript buffer")
}
}
for _, s := range image.cmdlist {
_, err := image.runscript.WriteString(fmt.Sprintln(s))
Comment on lines +107 to +113
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Is it intentional that Sprintln interprets Go format strings in the data? That seems very surprising.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

how so?

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What if cmdlist contains Hello %s%50d%n? That’s not going to result in anything useful in the generated script AFAICS.

Luckily, with Go, it’s not a trivially-exploited arbitrary-code-execution vulnerability, but it’s both useless and it exposes the format string processing code to arbitrary potentially-malicious input, something it’s not really intended for.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

After two months I need to confess that I need to sit down and get back into the context, and I will do that sometime soon. Thanks for the thorough review BTW, but are we not talking about an Sprint(a) vs an Sprintf(format, a) construct here, where Sprintln(a) does not interpret anything? Or are you trying to make another point?

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You’re right, my mistake; this is not interpreting as a format string, so it is safe.


(It doesn’t quite make sense to me to do it this way — either performance and memory use is critical, in which case runscript.WriteString(entry); runscript.WriteByte('\n') (avoiding an allocated copy for the string with newline, and all the reflection implied by Sprintln)), or it isn’t so much, and those loops can be replaced by something like strings.Join(image.cmdlist, "\n").

Assuming the performance of this not that much of a concern, that can be deferred for some indeterminate future.)

if err != nil {
return errors.Wrap(err, "writing to runscript buffer")
}
}
return nil
}

func (image *SifImage) generateConfig() error {
if image.deffile == nil {
image.cmdlist = append(image.cmdlist, "bash")
return nil
}

// extract %environment/%runscript from definition file
var err error
scanner := bufio.NewScanner(image.defReader)
for scanner.Scan() {
s := strings.TrimSpace(scanner.Text())
again:
if s == `%environment` {
if err = image.parseEnvironment(scanner); err != nil {
return err
}
} else if s == `%runscript` {
if err = image.parseRunscript(scanner); err != nil {
return err
}
}
s = strings.TrimSpace(scanner.Text())
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This seems to be redundant with the line at the top of the loop at a first glance, and is very non-obvious. At the very least

for scanner.Scan() {
again:
	// parseEnvironment and parseRunscript, will, on success, scan one line past its section, so we need to
	// return here without going through scanner.Scan()
	s := strings.TrimSpace(scanner.Text())
	if s == `%environment` {
		if err = image.parseEnvironment(scanner); err != nil {
			return err
		}
		goto again
	} else if s == `%runscript` {
		if err = image.parseRunscript(scanner); err != nil {
			return err
		}
		goto again
	}
}

including the comment, but that AFAICS doesn’t handle EOF in parseEnvironment/parseRunscript correctly, calling .Text() after EOF. That’s not invalid but the value is not obvious — I’d expect a trailing %environment line to cause an infinite loop, without checking in detail.

So, how about something vaguely like

// enum state = parsingEnvironment/parsingRunscript/parsingOther
for scanner.Scan() {
	s := strings.TrimSpace(scanner.Text())
	switch {
	case s == "%environment": state = parsingEnvironment
	case s == "%runscript": state = parsingRunscript
	case strings.HasPrefix(s, "%"): state = parsingOther // Warn on unrecognized?
	case state == parsingEnvironment: image.parseEnvironmentEntry(s)
	case state == parsingRunscript: image.parseRunscriptEntry(s)
	default: // ignore s, or warn?
    }
}

?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

have you tested it?

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No. I’m not claiming that the code as is doesn’t work (well, apart from a trailing directive?), just that it looks very non-obvious.

If there are even more non-obvious gotchas that I didn’t notice by reading the code as is, those gotchas should be commented in detail and/or covered by unit tests.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So I’m not sure which “it” I was supposed to test, but looking at https://github.com/mtrmac/image/tree/sif-wip :

  • A trailing %… section header does not cause an infinite loop, because scanner.Scan(), after EOF, returns an empty string in scanner.Text(); AFAICS that’s totally undocumented (and possibly even contrary to the strict text of the existing documentation). So it’s good that there isn’t an infinite loop but we shouldn’t rely on that, unless there’s some guarantee I’m missing.
  • A state-machine approach like the above does work, at least enough to pass a trivial smoke test I added.

if s == `%environment` || s == `%runscript` {
goto again
}
}
if err := scanner.Err(); err != nil {
return errors.Wrap(err, "reading lines from SIF definition file object")
}

if len(image.cmdlist) == 0 && len(image.envlist) == 0 {
image.cmdlist = append(image.cmdlist, "bash")
} else {
if err = image.generateRunscript(); err != nil {
return errors.Wrap(err, "generating runscript")
}
image.cmdlist = []string{"/podman/runscript"}
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

image.cmdlist serves two different purposes — a list of bash commands (e.g. input to generateRunscript, and a single command to be by the runtime. Those should be two separate variables.

}

return nil
}

func (image SifImage) GetConfig(config *imgspecv1.Image) error {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The name of this function doesn’t match what it does. Maybe a ) Command() []string that just returns the command to run?

config.Config.Cmd = append(config.Config.Cmd, image.cmdlist...)
return nil
}

func (image SifImage) UnloadSIFImage() (err error) {
err = image.fimg.UnloadContainer()
return
Comment on lines +168 to +170
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
func (image SifImage) UnloadSIFImage() (err error) {
err = image.fimg.UnloadContainer()
return
func (image SifImage) UnloadSIFImage() error {
return image.fimg.UnloadContainer()

}

func (image SifImage) GetSIFID() string {
return image.fimg.Header.ID.String()
}

func (image SifImage) GetSIFArch() string {
return sif.GetGoArch(string(image.fimg.Header.Arch[:sif.HdrArchLen-1]))
}

const squashFilename = "rootfs.squashfs"
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

(Absolutely non-blocking: Below is not too much code that it would be unmanageable, still, the way every function gets a tempdir and implicitly knowns its full structure has unnecessary implicit dependencies.

A top-level SquashFSToTarLayer “driver” that determines squashPath/tarPath/extractedPath and passes those to its subroutines, which don’t make any assumptions about the complete structure of the directory, would be a bit more explicit.)

const tarFilename = "rootfs.tar"

func runUnSquashFSTar(tempdir string) (err error) {
script := `
#!/bin/sh
unsquashfs -f ` + squashFilename + ` && tar --acls --xattrs -C ./squashfs-root -cpf ` + tarFilename + ` ./
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Shouldn’t shquashfs-root be removed immediately afterwards, to decrease disk space usage?

`

if err = ioutil.WriteFile(filepath.Join(tempdir, "script"), []byte(script), 0755); err != nil {
return err
}
cmd := []string{"fakeroot", "--", "./script"}
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Non-blocking: I don’t think we need this single-use variable; this can be just constants passed to exec.Command.

And then s/xcmd/cmd/.


xcmd := exec.Command(cmd[0], cmd[1:]...)
xcmd.Stderr = os.Stderr
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

All of the output should be captured and provided to the caller (on error only, I guess); a library like c/image shouldn’t just write unexpected data to the standard streams. Probably using xcmd.CombinedOutput().

xcmd.Dir = tempdir
err = xcmd.Run()
return
}

func (image *SifImage) writeRunscript(tempdir string) (err error) {
if image.runscript == nil {
return nil
}
rsPath := filepath.Join(tempdir, "squashfs-root", "podman")
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If runUnSquashFSTar depends on the squashfs-root name, it would be nice to make it a Go constant to make that dependency visible. (Also see elsewhere.)

if err = os.MkdirAll(rsPath, 0755); err != nil {
return
}
if err = ioutil.WriteFile(filepath.Join(rsPath, "runscript"), image.runscript.Bytes(), 0755); err != nil {
return errors.Wrap(err, "writing /podman/runscript")
}
return nil
}

func (image SifImage) SquashFSToTarLayer(tempdir string) (tarpath string, err error) {
if _, err = image.fimg.Fp.Seek(image.rootfs.Fileoff, 0); err != nil {
return
}
f, err := os.Create(filepath.Join(tempdir, squashFilename))
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Shouldn’t the file be removed by the time this function returns, so that we don’t unnecessarily use up disk space for another copy of the data?


(Do we even need a copy? unsquashfs has an -offset option — but I know nothing about the format and I haven’t tried using it.)

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we even need a copy? unsquashfs has an -offset option

This does work, but sadly isn’t available in RHEL 8.

if err != nil {
return
}
defer f.Close()
if _, err = io.CopyN(f, image.fimg.Fp, image.rootfs.Filelen); err != nil {
return
}
if err = f.Sync(); err != nil {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why is this necessary?

return
}
if err = image.writeRunscript(tempdir); err != nil {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

(Non-blocking? It’s non-obvious that writeRunscripts affects the output of runUnSquashFSTar, which at a first glance seems to create the complete input for the tar file. That seems with at least a comment somewhere, or possibly some kind of restructuring — maybe inlining writeRunscript into runUnSquashFSTar, and renaming the latter one to something like createTar.)

return
}
if err = runUnSquashFSTar(tempdir); err != nil {
return
}
return filepath.Join(tempdir, tarFilename), nil
}
27 changes: 27 additions & 0 deletions sif/sif/LICENSE.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
Copyright (c) 2018, Sylabs Inc. All rights reserved.

Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:

1. Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.

2. Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.

3. Neither the name of the copyright holder nor the names of its
contributors may be used to endorse or promote products derived from this
software without specific prior written permission.

THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
Loading