commit 4c7afb8afa5232c6d51722b024afaf1f261d2c20 (tree)
parent ce8ce9f59edeb0eaa2390dd3f18727d02e721318
Author: Motiejus Jakštys <motiejus@jakstys.lt>
Date: Mon, 24 May 2021 00:11:58 +0300
doc
Diffstat:
5 files changed, 247 insertions(+), 88 deletions(-)
diff --git a/main.go b/main.go
@@ -50,9 +50,7 @@ func (r *cmdRootFS) Execute(args []string) (err error) {
if err != nil {
return err
}
- defer func() {
- err = multierr.Append(err, in.Close())
- }()
+ defer func() { err = multierr.Append(err, in.Close()) }()
var out *os.File
outf := string(r.PositionalArgs.Outfile)
@@ -64,9 +62,7 @@ func (r *cmdRootFS) Execute(args []string) (err error) {
return err
}
}
- defer func() {
- err = multierr.Append(err, out.Close())
- }()
+ defer func() { err = multierr.Append(err, out.Close()) }()
return rootfs.RootFS(in, out)
}
diff --git a/rootfs/BUILD b/rootfs/BUILD
@@ -2,7 +2,10 @@ load("@io_bazel_rules_go//go:def.bzl", "go_library", "go_test")
go_library(
name = "go_default_library",
- srcs = ["rootfs.go"],
+ srcs = [
+ "doc.go",
+ "rootfs.go",
+ ],
importpath = "github.com/motiejus/code/undocker/rootfs",
visibility = ["//visibility:public"],
deps = ["@org_uber_go_multierr//:go_default_library"],
diff --git a/rootfs/doc.go b/rootfs/doc.go
@@ -0,0 +1,70 @@
+// Package rootfs extracts all layers of a Docker container image to a single
+// tarball. It will go trough all layers in order and copy every file to the
+// destination archive.
+//
+// Except it will also reasonably process those files.
+//
+// == Non-directory will be copied only once ==
+// A non-directory will be copied only once, only from within it's past
+// occurrence. I.e. if file /a/b was found in layers 0 and 2, only the file
+// from layer 2 will be used.
+// Directories will always be copied, even if there are duplicates. This is
+// to avoid a situation like this:
+// layer0:
+// - ./dir/
+// - ./dir/file
+// layer1:
+// - ./dir/
+// - ./dir/file
+// In theory, the directory from layer 1 takes precedence, so a tarball like
+// this could be created:
+// - ./dir/ (from layer1)
+// - ./dir/file1 (from layer1)
+// However, imagine the following:
+// layer0:
+// - ./dir/
+// - ./dir/file1
+// layer1:
+// - ./dir/
+// Then the resulting tarball would have:
+// - ./dir/file1 (from layer1)
+// - ./dir/ (from layer0)
+// Which would mean `untar` would try to untar a file to a directory which
+// was not yet created. Therefore directories will be copied to the resulting
+// tar in the order they appear in the layers.
+//
+// == Special files: .dockerenv ==
+//
+// .dockernv is present in all docker containers, and is likely to remain
+// such. So if you do `docker export <container>`, the resulting tarball will
+// have this file. rootfs will not add it. You are welcome to append one
+// yourself.
+//
+// == Special files: opaque files and dirs (.wh.*) ==
+//
+// From mount.aufs(8)[1]:
+//
+// The whiteout is for hiding files on lower branches. Also it is applied to
+// stop readdir going lower branches. The latter case is called ‘opaque
+// directory.’ Any whiteout is an empty file, it means whiteout is just an
+// mark. In the case of hiding lower files, the name of whiteout is
+// ‘.wh.<filename>.’ And in the case of stopping readdir, the name is
+// ‘.wh..wh..opq’. All whiteouts are hardlinked, including ‘<writable branch
+// top dir>/.wh..wh.aufs`.
+//
+// My interpretation:
+// - a hardlink called `.wh..wh..opq` means that directory contents from the
+// layers below the mentioned file should be ignored. Higher layers may add
+// files on top.
+// - if hardlink `.wh.([^/]+)` is found, $1 should be deleted from the current
+// and lower layers.
+//
+// == Tar format ==
+//
+// Since we do care about long filenames and large file sizes (>8GB), we are
+// using "classic" GNU Tar. However, at least NetBSD pax is known to have
+// problems reading it[2].
+//
+// [1]: https://manpages.debian.org/unstable/aufs-tools/mount.aufs.8.en.html
+// [2]: https://mgorny.pl/articles/portability-of-tar-features.html
+package rootfs
diff --git a/rootfs/rootfs.go b/rootfs/rootfs.go
@@ -4,6 +4,7 @@ import (
"archive/tar"
"encoding/json"
"errors"
+ "fmt"
"io"
"path/filepath"
"strings"
@@ -13,10 +14,11 @@ import (
const (
_manifestJSON = "manifest.json"
+ _layerSuffix = "/layer.tar"
)
var (
- ErrBadManifest = errors.New("bad or missing manifest.json")
+ errBadManifest = errors.New("bad or missing manifest.json")
)
type dockerManifestJSON []struct {
@@ -24,7 +26,7 @@ type dockerManifestJSON []struct {
Layers []string `json:"Layers"`
}
-// Rootfs accepts a docker layer tarball and writes it to outfile.
+// RootFS accepts a docker layer tarball and writes it to outfile.
// 1. create map[string]io.ReadSeeker for each layer.
// 2. parse manifest.json and get the layer order.
// 3. go through each layer in order and write:
@@ -36,9 +38,7 @@ type dockerManifestJSON []struct {
func RootFS(in io.ReadSeeker, out io.Writer) (err error) {
tr := tar.NewReader(in)
tw := tar.NewWriter(out)
- defer func() {
- err = multierr.Append(err, tw.Close())
- }()
+ defer func() { err = multierr.Append(err, tw.Close()) }()
// layerOffsets maps a layer name (a9b123c0daa/layer.tar) to it's offset
layerOffsets := map[string]int64{}
@@ -60,9 +60,9 @@ func RootFS(in io.ReadSeeker, out io.Writer) (err error) {
case filepath.Clean(hdr.Name) == _manifestJSON:
dec := json.NewDecoder(tr)
if err := dec.Decode(&manifest); err != nil {
- return err
+ return fmt.Errorf("parse %s: %w", _manifestJSON, err)
}
- case strings.HasSuffix(hdr.Name, "/layer.tar"):
+ case strings.HasSuffix(hdr.Name, _layerSuffix):
here, err := in.Seek(0, io.SeekCurrent)
if err != nil {
return err
@@ -71,15 +71,11 @@ func RootFS(in io.ReadSeeker, out io.Writer) (err error) {
}
}
- if len(manifest) == 0 {
- return ErrBadManifest
- }
-
- if len(layerOffsets) != len(manifest[0].Layers) {
- return ErrBadManifest
+ if len(manifest) == 0 || len(layerOffsets) != len(manifest[0].Layers) {
+ return errBadManifest
}
- // phase 1.5: enumerate layers
+ // enumerate layers the way they would be laid down in the image
layers := make([]int64, len(layerOffsets))
for i, name := range manifest[0].Layers {
layers[i] = layerOffsets[name]
@@ -88,8 +84,7 @@ func RootFS(in io.ReadSeeker, out io.Writer) (err error) {
// file2layer maps a filename to layer number (index in "layers")
file2layer := map[string]int{}
- // phase 2: iterate through all layers and save filenames
- // for all kinds of files.
+ // iterate through all layers and save filenames for all kinds of files.
for i, offset := range layers {
if _, err := in.Seek(offset, io.SeekStart); err != nil {
return err
@@ -124,37 +119,44 @@ func RootFS(in io.ReadSeeker, out io.Writer) (err error) {
return err
}
- // only directories can have multiple entries with the same name.
+ // Only directories can have multiple entries with the same name.
// all other file types cannot.
if hdr.Typeflag != tar.TypeDir && file2layer[hdr.Name] != i {
continue
}
- hdrOut := &tar.Header{
- Typeflag: hdr.Typeflag,
- Name: hdr.Name,
- Linkname: hdr.Linkname,
- Size: hdr.Size,
- Mode: int64(hdr.Mode & 0777),
- Uid: hdr.Uid,
- Gid: hdr.Gid,
- Uname: hdr.Uname,
- Gname: hdr.Gname,
- ModTime: hdr.ModTime,
- Devmajor: hdr.Devmajor,
- Devminor: hdr.Devminor,
- Format: tar.FormatPAX,
- }
-
- if err := tw.WriteHeader(hdrOut); err != nil {
+ if err := writeFile(tr, tw, hdr); err != nil {
return err
}
+ }
+ }
+ return nil
+}
- if hdr.Typeflag == tar.TypeReg {
- if _, err := io.Copy(tw, tr); err != nil {
- return err
- }
- }
+func writeFile(tr *tar.Reader, tw *tar.Writer, hdr *tar.Header) error {
+ hdrOut := &tar.Header{
+ Typeflag: hdr.Typeflag,
+ Name: hdr.Name,
+ Linkname: hdr.Linkname,
+ Size: hdr.Size,
+ Mode: int64(hdr.Mode & 0777),
+ Uid: hdr.Uid,
+ Gid: hdr.Gid,
+ Uname: hdr.Uname,
+ Gname: hdr.Gname,
+ ModTime: hdr.ModTime,
+ Devmajor: hdr.Devmajor,
+ Devminor: hdr.Devminor,
+ Format: tar.FormatGNU,
+ }
+
+ if err := tw.WriteHeader(hdrOut); err != nil {
+ return err
+ }
+
+ if hdr.Typeflag == tar.TypeReg {
+ if _, err := io.Copy(tw, tr); err != nil {
+ return err
}
}
diff --git a/rootfs/rootfs_test.go b/rootfs/rootfs_test.go
@@ -14,11 +14,11 @@ import (
func TestRootFS(t *testing.T) {
layer0 := tarball{
dir{name: "/", uid: 0},
- file{name: "/file", uid: 0, contents: []byte("from 0")},
+ file{name: "/file", uid: 0, contents: bytes.NewBufferString("from 0")},
}
layer1 := tarball{
- file{name: "/file", uid: 1, contents: []byte("from 1")},
+ file{name: "/file", uid: 1, contents: bytes.NewBufferString("from 1")},
}
layer2 := tarball{
@@ -44,34 +44,95 @@ func TestRootFS(t *testing.T) {
{
name: "basic file overwrite, layer order mixed",
image: tarball{
- file{name: "layer1/layer.tar", contents: layer1.bytes(t)},
- file{name: "layer0/layer.tar", contents: layer0.bytes(t)},
+ file{name: "layer1/layer.tar", contents: layer1},
+ file{name: "layer0/layer.tar", contents: layer0},
manifest{"layer0/layer.tar", "layer1/layer.tar"},
},
want: []extractable{
dir{name: "/", uid: 0},
- file{name: "/file", uid: 1, contents: []byte("from 1")},
+ file{name: "/file", uid: 1, contents: bytes.NewBufferString("from 1")},
},
},
{
name: "directory overwrite retains original dir",
image: tarball{
- file{name: "layer2/layer.tar", contents: layer2.bytes(t)},
- file{name: "layer0/layer.tar", contents: layer0.bytes(t)},
- file{name: "layer1/layer.tar", contents: layer1.bytes(t)},
+ file{name: "layer2/layer.tar", contents: layer2},
+ file{name: "layer0/layer.tar", contents: layer0},
+ file{name: "layer1/layer.tar", contents: layer1},
manifest{"layer0/layer.tar", "layer1/layer.tar", "layer2/layer.tar"},
},
want: []extractable{
dir{name: "/", uid: 0},
- file{name: "/file", uid: 1, contents: []byte("from 1")},
+ file{name: "/file", uid: 1, contents: bytes.NewBufferString("from 1")},
dir{name: "/", uid: 2},
},
},
+ {
+ name: "simple whiteout",
+ image: tarball{
+ file{name: "layer0/layer.tar", contents: tarball{
+ file{name: "filea"},
+ file{name: "fileb"},
+ dir{name: "dira"},
+ dir{name: "dirb"},
+ }},
+ file{name: "layer1/layer.tar", contents: tarball{
+ hardlink{name: ".wh.filea"},
+ hardlink{name: ".wh.dira"},
+ }},
+ manifest{"layer0/layer.tar", "layer1/layer.tar"},
+ },
+ want: []extractable{
+ file{name: "fileb"},
+ dir{name: "dirb"},
+ },
+ },
+ {
+ name: "whiteout with override",
+ image: tarball{
+ file{name: "layer0/layer.tar", contents: tarball{
+ file{name: "filea", contents: bytes.NewBufferString("from 0")},
+ }},
+ file{name: "layer1/layer.tar", contents: tarball{
+ hardlink{name: ".wh.filea"},
+ }},
+ file{name: "layer2/layer.tar", contents: tarball{
+ file{name: "filea", contents: bytes.NewBufferString("from 3")},
+ }},
+ manifest{
+ "layer0/layer.tar",
+ "layer1/layer.tar",
+ "layer2/layer.tar",
+ },
+ },
+ want: []extractable{
+ file{name: "filea", contents: bytes.NewBufferString("from 3")},
+ },
+ },
+ {
+ name: "files and directories do not whiteout",
+ image: tarball{
+ file{name: "layer0/layer.tar", contents: tarball{
+ dir{name: "dir"},
+ file{name: "file"},
+ }},
+ file{name: "layer1/layer.tar", contents: tarball{
+ dir{name: ".wh.dir"},
+ file{name: ".wh.file"},
+ }},
+ },
+ want: []extractable{
+ dir{name: "dir"},
+ dir{name: ".wh.dir"},
+ file{name: "file"},
+ file{name: ".wh.file"},
+ },
+ },
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
- in := bytes.NewReader(tt.image.bytes(t))
+ in := bytes.NewReader(tt.image.Bytes())
out := bytes.Buffer{}
err := RootFS(in, &out)
@@ -81,15 +142,31 @@ func TestRootFS(t *testing.T) {
}
require.NoError(t, err)
got := extract(t, &out)
- assert.Equal(t, got, tt.want)
+ assert.Equal(t, tt.want, got)
})
}
}
// Helpers
-type tarrable interface {
- tar(*testing.T, *tar.Writer)
+type tarrer interface {
+ tar(*tar.Writer)
+}
+
+type byter interface {
+ Bytes() []byte
+}
+
+type tarball []tarrer
+
+func (tb tarball) Bytes() []byte {
+ buf := bytes.Buffer{}
+ tw := tar.NewWriter(&buf)
+ for _, member := range tb {
+ member.tar(tw)
+ }
+ tw.Close()
+ return buf.Bytes()
}
// extractable is an empty interface for comparing extracted outputs in tests.
@@ -101,63 +178,70 @@ type dir struct {
uid int
}
-func (d dir) tar(t *testing.T, tw *tar.Writer) {
- t.Helper()
+func (d dir) tar(tw *tar.Writer) {
hdr := &tar.Header{
Typeflag: tar.TypeDir,
Name: d.name,
Mode: 0644,
Uid: d.uid,
}
- require.NoError(t, tw.WriteHeader(hdr))
+ tw.WriteHeader(hdr)
}
type file struct {
name string
uid int
- contents []byte
+ contents byter
}
-func (f file) tar(t *testing.T, tw *tar.Writer) {
- t.Helper()
+func (f file) tar(tw *tar.Writer) {
+ var contentbytes []byte
+ if f.contents != nil {
+ contentbytes = f.contents.Bytes()
+ }
hdr := &tar.Header{
Typeflag: tar.TypeReg,
Name: f.name,
Mode: 0644,
Uid: f.uid,
- Size: int64(len(f.contents)),
+ Size: int64(len(contentbytes)),
}
- require.NoError(t, tw.WriteHeader(hdr))
- _, err := tw.Write(f.contents)
- require.NoError(t, err)
+ tw.WriteHeader(hdr)
+ tw.Write(contentbytes)
}
type manifest []string
-func (m manifest) tar(t *testing.T, tw *tar.Writer) {
- t.Helper()
+func (m manifest) tar(tw *tar.Writer) {
b, err := json.Marshal(dockerManifestJSON{{Layers: m}})
- require.NoError(t, err)
- file{name: "manifest.json", uid: 0, contents: b}.tar(t, tw)
+ if err != nil {
+ panic("testerr")
+ }
+ file{
+ name: "manifest.json",
+ uid: 0,
+ contents: bytes.NewBuffer(b),
+ }.tar(tw)
}
-type tarball []tarrable
+type hardlink struct {
+ name string
+ uid int
+}
-func (tb tarball) bytes(t *testing.T) []byte {
- t.Helper()
- buf := bytes.Buffer{}
- tw := tar.NewWriter(&buf)
- for _, member := range tb {
- member.tar(t, tw)
- }
- require.NoError(t, tw.Close())
- return buf.Bytes()
+func (h hardlink) tar(tw *tar.Writer) {
+ tw.WriteHeader(&tar.Header{
+ Typeflag: tar.TypeLink,
+ Name: h.name,
+ Mode: 0644,
+ Uid: h.uid,
+ })
}
-func extract(t *testing.T, f io.Reader) []extractable {
+func extract(t *testing.T, r io.Reader) []extractable {
t.Helper()
ret := []extractable{}
- tr := tar.NewReader(f)
+ tr := tar.NewReader(r)
for {
hdr, err := tr.Next()
if err == io.EOF {
@@ -170,9 +254,13 @@ func extract(t *testing.T, f io.Reader) []extractable {
case tar.TypeDir:
elem = dir{name: hdr.Name, uid: hdr.Uid}
case tar.TypeReg:
- buf := bytes.Buffer{}
- io.Copy(&buf, tr)
- elem = file{name: hdr.Name, uid: hdr.Uid, contents: buf.Bytes()}
+ f := file{name: hdr.Name, uid: hdr.Uid}
+ if hdr.Size > 0 {
+ var buf bytes.Buffer
+ io.Copy(&buf, tr)
+ f.contents = &buf
+ }
+ elem = f
}
ret = append(ret, elem)
}