This commit is contained in:
Motiejus Jakštys 2021-05-24 00:11:58 +03:00
parent ce8ce9f59e
commit 4c7afb8afa
5 changed files with 249 additions and 90 deletions

View File

@ -50,9 +50,7 @@ func (r *cmdRootFS) Execute(args []string) (err error) {
if err != nil {
return err
}
defer func() {
err = multierr.Append(err, in.Close())
}()
defer func() { err = multierr.Append(err, in.Close()) }()
var out *os.File
outf := string(r.PositionalArgs.Outfile)
@ -64,9 +62,7 @@ func (r *cmdRootFS) Execute(args []string) (err error) {
return err
}
}
defer func() {
err = multierr.Append(err, out.Close())
}()
defer func() { err = multierr.Append(err, out.Close()) }()
return rootfs.RootFS(in, out)
}

View File

@ -2,7 +2,10 @@ load("@io_bazel_rules_go//go:def.bzl", "go_library", "go_test")
go_library(
name = "go_default_library",
srcs = ["rootfs.go"],
srcs = [
"doc.go",
"rootfs.go",
],
importpath = "github.com/motiejus/code/undocker/rootfs",
visibility = ["//visibility:public"],
deps = ["@org_uber_go_multierr//:go_default_library"],

70
rootfs/doc.go Normal file
View File

@ -0,0 +1,70 @@
// Package rootfs extracts all layers of a Docker container image to a single
// tarball. It will go trough all layers in order and copy every file to the
// destination archive.
//
// Except it will also reasonably process those files.
//
// == Non-directory will be copied only once ==
// A non-directory will be copied only once, only from within it's past
// occurrence. I.e. if file /a/b was found in layers 0 and 2, only the file
// from layer 2 will be used.
// Directories will always be copied, even if there are duplicates. This is
// to avoid a situation like this:
// layer0:
// - ./dir/
// - ./dir/file
// layer1:
// - ./dir/
// - ./dir/file
// In theory, the directory from layer 1 takes precedence, so a tarball like
// this could be created:
// - ./dir/ (from layer1)
// - ./dir/file1 (from layer1)
// However, imagine the following:
// layer0:
// - ./dir/
// - ./dir/file1
// layer1:
// - ./dir/
// Then the resulting tarball would have:
// - ./dir/file1 (from layer1)
// - ./dir/ (from layer0)
// Which would mean `untar` would try to untar a file to a directory which
// was not yet created. Therefore directories will be copied to the resulting
// tar in the order they appear in the layers.
//
// == Special files: .dockerenv ==
//
// .dockernv is present in all docker containers, and is likely to remain
// such. So if you do `docker export <container>`, the resulting tarball will
// have this file. rootfs will not add it. You are welcome to append one
// yourself.
//
// == Special files: opaque files and dirs (.wh.*) ==
//
// From mount.aufs(8)[1]:
//
// The whiteout is for hiding files on lower branches. Also it is applied to
// stop readdir going lower branches. The latter case is called opaque
// directory. Any whiteout is an empty file, it means whiteout is just an
// mark. In the case of hiding lower files, the name of whiteout is
// .wh.<filename>. And in the case of stopping readdir, the name is
// .wh..wh..opq. All whiteouts are hardlinked, including <writable branch
// top dir>/.wh..wh.aufs`.
//
// My interpretation:
// - a hardlink called `.wh..wh..opq` means that directory contents from the
// layers below the mentioned file should be ignored. Higher layers may add
// files on top.
// - if hardlink `.wh.([^/]+)` is found, $1 should be deleted from the current
// and lower layers.
//
// == Tar format ==
//
// Since we do care about long filenames and large file sizes (>8GB), we are
// using "classic" GNU Tar. However, at least NetBSD pax is known to have
// problems reading it[2].
//
// [1]: https://manpages.debian.org/unstable/aufs-tools/mount.aufs.8.en.html
// [2]: https://mgorny.pl/articles/portability-of-tar-features.html
package rootfs

View File

@ -4,6 +4,7 @@ import (
"archive/tar"
"encoding/json"
"errors"
"fmt"
"io"
"path/filepath"
"strings"
@ -13,10 +14,11 @@ import (
const (
_manifestJSON = "manifest.json"
_layerSuffix = "/layer.tar"
)
var (
ErrBadManifest = errors.New("bad or missing manifest.json")
errBadManifest = errors.New("bad or missing manifest.json")
)
type dockerManifestJSON []struct {
@ -24,7 +26,7 @@ type dockerManifestJSON []struct {
Layers []string `json:"Layers"`
}
// Rootfs accepts a docker layer tarball and writes it to outfile.
// RootFS accepts a docker layer tarball and writes it to outfile.
// 1. create map[string]io.ReadSeeker for each layer.
// 2. parse manifest.json and get the layer order.
// 3. go through each layer in order and write:
@ -36,9 +38,7 @@ type dockerManifestJSON []struct {
func RootFS(in io.ReadSeeker, out io.Writer) (err error) {
tr := tar.NewReader(in)
tw := tar.NewWriter(out)
defer func() {
err = multierr.Append(err, tw.Close())
}()
defer func() { err = multierr.Append(err, tw.Close()) }()
// layerOffsets maps a layer name (a9b123c0daa/layer.tar) to it's offset
layerOffsets := map[string]int64{}
@ -60,9 +60,9 @@ func RootFS(in io.ReadSeeker, out io.Writer) (err error) {
case filepath.Clean(hdr.Name) == _manifestJSON:
dec := json.NewDecoder(tr)
if err := dec.Decode(&manifest); err != nil {
return err
return fmt.Errorf("parse %s: %w", _manifestJSON, err)
}
case strings.HasSuffix(hdr.Name, "/layer.tar"):
case strings.HasSuffix(hdr.Name, _layerSuffix):
here, err := in.Seek(0, io.SeekCurrent)
if err != nil {
return err
@ -71,15 +71,11 @@ func RootFS(in io.ReadSeeker, out io.Writer) (err error) {
}
}
if len(manifest) == 0 {
return ErrBadManifest
if len(manifest) == 0 || len(layerOffsets) != len(manifest[0].Layers) {
return errBadManifest
}
if len(layerOffsets) != len(manifest[0].Layers) {
return ErrBadManifest
}
// phase 1.5: enumerate layers
// enumerate layers the way they would be laid down in the image
layers := make([]int64, len(layerOffsets))
for i, name := range manifest[0].Layers {
layers[i] = layerOffsets[name]
@ -88,8 +84,7 @@ func RootFS(in io.ReadSeeker, out io.Writer) (err error) {
// file2layer maps a filename to layer number (index in "layers")
file2layer := map[string]int{}
// phase 2: iterate through all layers and save filenames
// for all kinds of files.
// iterate through all layers and save filenames for all kinds of files.
for i, offset := range layers {
if _, err := in.Seek(offset, io.SeekStart); err != nil {
return err
@ -124,12 +119,21 @@ func RootFS(in io.ReadSeeker, out io.Writer) (err error) {
return err
}
// only directories can have multiple entries with the same name.
// Only directories can have multiple entries with the same name.
// all other file types cannot.
if hdr.Typeflag != tar.TypeDir && file2layer[hdr.Name] != i {
continue
}
if err := writeFile(tr, tw, hdr); err != nil {
return err
}
}
}
return nil
}
func writeFile(tr *tar.Reader, tw *tar.Writer, hdr *tar.Header) error {
hdrOut := &tar.Header{
Typeflag: hdr.Typeflag,
Name: hdr.Name,
@ -143,7 +147,7 @@ func RootFS(in io.ReadSeeker, out io.Writer) (err error) {
ModTime: hdr.ModTime,
Devmajor: hdr.Devmajor,
Devminor: hdr.Devminor,
Format: tar.FormatPAX,
Format: tar.FormatGNU,
}
if err := tw.WriteHeader(hdrOut); err != nil {
@ -155,8 +159,6 @@ func RootFS(in io.ReadSeeker, out io.Writer) (err error) {
return err
}
}
}
}
return nil
}

View File

@ -14,11 +14,11 @@ import (
func TestRootFS(t *testing.T) {
layer0 := tarball{
dir{name: "/", uid: 0},
file{name: "/file", uid: 0, contents: []byte("from 0")},
file{name: "/file", uid: 0, contents: bytes.NewBufferString("from 0")},
}
layer1 := tarball{
file{name: "/file", uid: 1, contents: []byte("from 1")},
file{name: "/file", uid: 1, contents: bytes.NewBufferString("from 1")},
}
layer2 := tarball{
@ -44,34 +44,95 @@ func TestRootFS(t *testing.T) {
{
name: "basic file overwrite, layer order mixed",
image: tarball{
file{name: "layer1/layer.tar", contents: layer1.bytes(t)},
file{name: "layer0/layer.tar", contents: layer0.bytes(t)},
file{name: "layer1/layer.tar", contents: layer1},
file{name: "layer0/layer.tar", contents: layer0},
manifest{"layer0/layer.tar", "layer1/layer.tar"},
},
want: []extractable{
dir{name: "/", uid: 0},
file{name: "/file", uid: 1, contents: []byte("from 1")},
file{name: "/file", uid: 1, contents: bytes.NewBufferString("from 1")},
},
},
{
name: "directory overwrite retains original dir",
image: tarball{
file{name: "layer2/layer.tar", contents: layer2.bytes(t)},
file{name: "layer0/layer.tar", contents: layer0.bytes(t)},
file{name: "layer1/layer.tar", contents: layer1.bytes(t)},
file{name: "layer2/layer.tar", contents: layer2},
file{name: "layer0/layer.tar", contents: layer0},
file{name: "layer1/layer.tar", contents: layer1},
manifest{"layer0/layer.tar", "layer1/layer.tar", "layer2/layer.tar"},
},
want: []extractable{
dir{name: "/", uid: 0},
file{name: "/file", uid: 1, contents: []byte("from 1")},
file{name: "/file", uid: 1, contents: bytes.NewBufferString("from 1")},
dir{name: "/", uid: 2},
},
},
{
name: "simple whiteout",
image: tarball{
file{name: "layer0/layer.tar", contents: tarball{
file{name: "filea"},
file{name: "fileb"},
dir{name: "dira"},
dir{name: "dirb"},
}},
file{name: "layer1/layer.tar", contents: tarball{
hardlink{name: ".wh.filea"},
hardlink{name: ".wh.dira"},
}},
manifest{"layer0/layer.tar", "layer1/layer.tar"},
},
want: []extractable{
file{name: "fileb"},
dir{name: "dirb"},
},
},
{
name: "whiteout with override",
image: tarball{
file{name: "layer0/layer.tar", contents: tarball{
file{name: "filea", contents: bytes.NewBufferString("from 0")},
}},
file{name: "layer1/layer.tar", contents: tarball{
hardlink{name: ".wh.filea"},
}},
file{name: "layer2/layer.tar", contents: tarball{
file{name: "filea", contents: bytes.NewBufferString("from 3")},
}},
manifest{
"layer0/layer.tar",
"layer1/layer.tar",
"layer2/layer.tar",
},
},
want: []extractable{
file{name: "filea", contents: bytes.NewBufferString("from 3")},
},
},
{
name: "files and directories do not whiteout",
image: tarball{
file{name: "layer0/layer.tar", contents: tarball{
dir{name: "dir"},
file{name: "file"},
}},
file{name: "layer1/layer.tar", contents: tarball{
dir{name: ".wh.dir"},
file{name: ".wh.file"},
}},
},
want: []extractable{
dir{name: "dir"},
dir{name: ".wh.dir"},
file{name: "file"},
file{name: ".wh.file"},
},
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
in := bytes.NewReader(tt.image.bytes(t))
in := bytes.NewReader(tt.image.Bytes())
out := bytes.Buffer{}
err := RootFS(in, &out)
@ -81,15 +142,31 @@ func TestRootFS(t *testing.T) {
}
require.NoError(t, err)
got := extract(t, &out)
assert.Equal(t, got, tt.want)
assert.Equal(t, tt.want, got)
})
}
}
// Helpers
type tarrable interface {
tar(*testing.T, *tar.Writer)
type tarrer interface {
tar(*tar.Writer)
}
type byter interface {
Bytes() []byte
}
type tarball []tarrer
func (tb tarball) Bytes() []byte {
buf := bytes.Buffer{}
tw := tar.NewWriter(&buf)
for _, member := range tb {
member.tar(tw)
}
tw.Close()
return buf.Bytes()
}
// extractable is an empty interface for comparing extracted outputs in tests.
@ -101,63 +178,70 @@ type dir struct {
uid int
}
func (d dir) tar(t *testing.T, tw *tar.Writer) {
t.Helper()
func (d dir) tar(tw *tar.Writer) {
hdr := &tar.Header{
Typeflag: tar.TypeDir,
Name: d.name,
Mode: 0644,
Uid: d.uid,
}
require.NoError(t, tw.WriteHeader(hdr))
tw.WriteHeader(hdr)
}
type file struct {
name string
uid int
contents []byte
contents byter
}
func (f file) tar(t *testing.T, tw *tar.Writer) {
t.Helper()
func (f file) tar(tw *tar.Writer) {
var contentbytes []byte
if f.contents != nil {
contentbytes = f.contents.Bytes()
}
hdr := &tar.Header{
Typeflag: tar.TypeReg,
Name: f.name,
Mode: 0644,
Uid: f.uid,
Size: int64(len(f.contents)),
Size: int64(len(contentbytes)),
}
require.NoError(t, tw.WriteHeader(hdr))
_, err := tw.Write(f.contents)
require.NoError(t, err)
tw.WriteHeader(hdr)
tw.Write(contentbytes)
}
type manifest []string
func (m manifest) tar(t *testing.T, tw *tar.Writer) {
t.Helper()
func (m manifest) tar(tw *tar.Writer) {
b, err := json.Marshal(dockerManifestJSON{{Layers: m}})
require.NoError(t, err)
file{name: "manifest.json", uid: 0, contents: b}.tar(t, tw)
if err != nil {
panic("testerr")
}
file{
name: "manifest.json",
uid: 0,
contents: bytes.NewBuffer(b),
}.tar(tw)
}
type tarball []tarrable
func (tb tarball) bytes(t *testing.T) []byte {
t.Helper()
buf := bytes.Buffer{}
tw := tar.NewWriter(&buf)
for _, member := range tb {
member.tar(t, tw)
}
require.NoError(t, tw.Close())
return buf.Bytes()
type hardlink struct {
name string
uid int
}
func extract(t *testing.T, f io.Reader) []extractable {
func (h hardlink) tar(tw *tar.Writer) {
tw.WriteHeader(&tar.Header{
Typeflag: tar.TypeLink,
Name: h.name,
Mode: 0644,
Uid: h.uid,
})
}
func extract(t *testing.T, r io.Reader) []extractable {
t.Helper()
ret := []extractable{}
tr := tar.NewReader(f)
tr := tar.NewReader(r)
for {
hdr, err := tr.Next()
if err == io.EOF {
@ -170,9 +254,13 @@ func extract(t *testing.T, f io.Reader) []extractable {
case tar.TypeDir:
elem = dir{name: hdr.Name, uid: hdr.Uid}
case tar.TypeReg:
buf := bytes.Buffer{}
f := file{name: hdr.Name, uid: hdr.Uid}
if hdr.Size > 0 {
var buf bytes.Buffer
io.Copy(&buf, tr)
elem = file{name: hdr.Name, uid: hdr.Uid, contents: buf.Bytes()}
f.contents = &buf
}
elem = f
}
ret = append(ret, elem)
}