doc
This commit is contained in:
parent
ce8ce9f59e
commit
4c7afb8afa
8
main.go
8
main.go
|
@ -50,9 +50,7 @@ func (r *cmdRootFS) Execute(args []string) (err error) {
|
|||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer func() {
|
||||
err = multierr.Append(err, in.Close())
|
||||
}()
|
||||
defer func() { err = multierr.Append(err, in.Close()) }()
|
||||
|
||||
var out *os.File
|
||||
outf := string(r.PositionalArgs.Outfile)
|
||||
|
@ -64,9 +62,7 @@ func (r *cmdRootFS) Execute(args []string) (err error) {
|
|||
return err
|
||||
}
|
||||
}
|
||||
defer func() {
|
||||
err = multierr.Append(err, out.Close())
|
||||
}()
|
||||
defer func() { err = multierr.Append(err, out.Close()) }()
|
||||
|
||||
return rootfs.RootFS(in, out)
|
||||
}
|
||||
|
|
|
@ -2,7 +2,10 @@ load("@io_bazel_rules_go//go:def.bzl", "go_library", "go_test")
|
|||
|
||||
go_library(
|
||||
name = "go_default_library",
|
||||
srcs = ["rootfs.go"],
|
||||
srcs = [
|
||||
"doc.go",
|
||||
"rootfs.go",
|
||||
],
|
||||
importpath = "github.com/motiejus/code/undocker/rootfs",
|
||||
visibility = ["//visibility:public"],
|
||||
deps = ["@org_uber_go_multierr//:go_default_library"],
|
||||
|
|
|
@ -0,0 +1,70 @@
|
|||
// Package rootfs extracts all layers of a Docker container image to a single
|
||||
// tarball. It will go trough all layers in order and copy every file to the
|
||||
// destination archive.
|
||||
//
|
||||
// Except it will also reasonably process those files.
|
||||
//
|
||||
// == Non-directory will be copied only once ==
|
||||
// A non-directory will be copied only once, only from within it's past
|
||||
// occurrence. I.e. if file /a/b was found in layers 0 and 2, only the file
|
||||
// from layer 2 will be used.
|
||||
// Directories will always be copied, even if there are duplicates. This is
|
||||
// to avoid a situation like this:
|
||||
// layer0:
|
||||
// - ./dir/
|
||||
// - ./dir/file
|
||||
// layer1:
|
||||
// - ./dir/
|
||||
// - ./dir/file
|
||||
// In theory, the directory from layer 1 takes precedence, so a tarball like
|
||||
// this could be created:
|
||||
// - ./dir/ (from layer1)
|
||||
// - ./dir/file1 (from layer1)
|
||||
// However, imagine the following:
|
||||
// layer0:
|
||||
// - ./dir/
|
||||
// - ./dir/file1
|
||||
// layer1:
|
||||
// - ./dir/
|
||||
// Then the resulting tarball would have:
|
||||
// - ./dir/file1 (from layer1)
|
||||
// - ./dir/ (from layer0)
|
||||
// Which would mean `untar` would try to untar a file to a directory which
|
||||
// was not yet created. Therefore directories will be copied to the resulting
|
||||
// tar in the order they appear in the layers.
|
||||
//
|
||||
// == Special files: .dockerenv ==
|
||||
//
|
||||
// .dockernv is present in all docker containers, and is likely to remain
|
||||
// such. So if you do `docker export <container>`, the resulting tarball will
|
||||
// have this file. rootfs will not add it. You are welcome to append one
|
||||
// yourself.
|
||||
//
|
||||
// == Special files: opaque files and dirs (.wh.*) ==
|
||||
//
|
||||
// From mount.aufs(8)[1]:
|
||||
//
|
||||
// The whiteout is for hiding files on lower branches. Also it is applied to
|
||||
// stop readdir going lower branches. The latter case is called ‘opaque
|
||||
// directory.’ Any whiteout is an empty file, it means whiteout is just an
|
||||
// mark. In the case of hiding lower files, the name of whiteout is
|
||||
// ‘.wh.<filename>.’ And in the case of stopping readdir, the name is
|
||||
// ‘.wh..wh..opq’. All whiteouts are hardlinked, including ‘<writable branch
|
||||
// top dir>/.wh..wh.aufs`.
|
||||
//
|
||||
// My interpretation:
|
||||
// - a hardlink called `.wh..wh..opq` means that directory contents from the
|
||||
// layers below the mentioned file should be ignored. Higher layers may add
|
||||
// files on top.
|
||||
// - if hardlink `.wh.([^/]+)` is found, $1 should be deleted from the current
|
||||
// and lower layers.
|
||||
//
|
||||
// == Tar format ==
|
||||
//
|
||||
// Since we do care about long filenames and large file sizes (>8GB), we are
|
||||
// using "classic" GNU Tar. However, at least NetBSD pax is known to have
|
||||
// problems reading it[2].
|
||||
//
|
||||
// [1]: https://manpages.debian.org/unstable/aufs-tools/mount.aufs.8.en.html
|
||||
// [2]: https://mgorny.pl/articles/portability-of-tar-features.html
|
||||
package rootfs
|
|
@ -4,6 +4,7 @@ import (
|
|||
"archive/tar"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
|
@ -13,10 +14,11 @@ import (
|
|||
|
||||
const (
|
||||
_manifestJSON = "manifest.json"
|
||||
_layerSuffix = "/layer.tar"
|
||||
)
|
||||
|
||||
var (
|
||||
ErrBadManifest = errors.New("bad or missing manifest.json")
|
||||
errBadManifest = errors.New("bad or missing manifest.json")
|
||||
)
|
||||
|
||||
type dockerManifestJSON []struct {
|
||||
|
@ -24,7 +26,7 @@ type dockerManifestJSON []struct {
|
|||
Layers []string `json:"Layers"`
|
||||
}
|
||||
|
||||
// Rootfs accepts a docker layer tarball and writes it to outfile.
|
||||
// RootFS accepts a docker layer tarball and writes it to outfile.
|
||||
// 1. create map[string]io.ReadSeeker for each layer.
|
||||
// 2. parse manifest.json and get the layer order.
|
||||
// 3. go through each layer in order and write:
|
||||
|
@ -36,9 +38,7 @@ type dockerManifestJSON []struct {
|
|||
func RootFS(in io.ReadSeeker, out io.Writer) (err error) {
|
||||
tr := tar.NewReader(in)
|
||||
tw := tar.NewWriter(out)
|
||||
defer func() {
|
||||
err = multierr.Append(err, tw.Close())
|
||||
}()
|
||||
defer func() { err = multierr.Append(err, tw.Close()) }()
|
||||
// layerOffsets maps a layer name (a9b123c0daa/layer.tar) to it's offset
|
||||
layerOffsets := map[string]int64{}
|
||||
|
||||
|
@ -60,9 +60,9 @@ func RootFS(in io.ReadSeeker, out io.Writer) (err error) {
|
|||
case filepath.Clean(hdr.Name) == _manifestJSON:
|
||||
dec := json.NewDecoder(tr)
|
||||
if err := dec.Decode(&manifest); err != nil {
|
||||
return err
|
||||
return fmt.Errorf("parse %s: %w", _manifestJSON, err)
|
||||
}
|
||||
case strings.HasSuffix(hdr.Name, "/layer.tar"):
|
||||
case strings.HasSuffix(hdr.Name, _layerSuffix):
|
||||
here, err := in.Seek(0, io.SeekCurrent)
|
||||
if err != nil {
|
||||
return err
|
||||
|
@ -71,15 +71,11 @@ func RootFS(in io.ReadSeeker, out io.Writer) (err error) {
|
|||
}
|
||||
}
|
||||
|
||||
if len(manifest) == 0 {
|
||||
return ErrBadManifest
|
||||
if len(manifest) == 0 || len(layerOffsets) != len(manifest[0].Layers) {
|
||||
return errBadManifest
|
||||
}
|
||||
|
||||
if len(layerOffsets) != len(manifest[0].Layers) {
|
||||
return ErrBadManifest
|
||||
}
|
||||
|
||||
// phase 1.5: enumerate layers
|
||||
// enumerate layers the way they would be laid down in the image
|
||||
layers := make([]int64, len(layerOffsets))
|
||||
for i, name := range manifest[0].Layers {
|
||||
layers[i] = layerOffsets[name]
|
||||
|
@ -88,8 +84,7 @@ func RootFS(in io.ReadSeeker, out io.Writer) (err error) {
|
|||
// file2layer maps a filename to layer number (index in "layers")
|
||||
file2layer := map[string]int{}
|
||||
|
||||
// phase 2: iterate through all layers and save filenames
|
||||
// for all kinds of files.
|
||||
// iterate through all layers and save filenames for all kinds of files.
|
||||
for i, offset := range layers {
|
||||
if _, err := in.Seek(offset, io.SeekStart); err != nil {
|
||||
return err
|
||||
|
@ -124,37 +119,44 @@ func RootFS(in io.ReadSeeker, out io.Writer) (err error) {
|
|||
return err
|
||||
}
|
||||
|
||||
// only directories can have multiple entries with the same name.
|
||||
// Only directories can have multiple entries with the same name.
|
||||
// all other file types cannot.
|
||||
if hdr.Typeflag != tar.TypeDir && file2layer[hdr.Name] != i {
|
||||
continue
|
||||
}
|
||||
|
||||
hdrOut := &tar.Header{
|
||||
Typeflag: hdr.Typeflag,
|
||||
Name: hdr.Name,
|
||||
Linkname: hdr.Linkname,
|
||||
Size: hdr.Size,
|
||||
Mode: int64(hdr.Mode & 0777),
|
||||
Uid: hdr.Uid,
|
||||
Gid: hdr.Gid,
|
||||
Uname: hdr.Uname,
|
||||
Gname: hdr.Gname,
|
||||
ModTime: hdr.ModTime,
|
||||
Devmajor: hdr.Devmajor,
|
||||
Devminor: hdr.Devminor,
|
||||
Format: tar.FormatPAX,
|
||||
}
|
||||
|
||||
if err := tw.WriteHeader(hdrOut); err != nil {
|
||||
if err := writeFile(tr, tw, hdr); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
if hdr.Typeflag == tar.TypeReg {
|
||||
if _, err := io.Copy(tw, tr); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
func writeFile(tr *tar.Reader, tw *tar.Writer, hdr *tar.Header) error {
|
||||
hdrOut := &tar.Header{
|
||||
Typeflag: hdr.Typeflag,
|
||||
Name: hdr.Name,
|
||||
Linkname: hdr.Linkname,
|
||||
Size: hdr.Size,
|
||||
Mode: int64(hdr.Mode & 0777),
|
||||
Uid: hdr.Uid,
|
||||
Gid: hdr.Gid,
|
||||
Uname: hdr.Uname,
|
||||
Gname: hdr.Gname,
|
||||
ModTime: hdr.ModTime,
|
||||
Devmajor: hdr.Devmajor,
|
||||
Devminor: hdr.Devminor,
|
||||
Format: tar.FormatGNU,
|
||||
}
|
||||
|
||||
if err := tw.WriteHeader(hdrOut); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if hdr.Typeflag == tar.TypeReg {
|
||||
if _, err := io.Copy(tw, tr); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -14,11 +14,11 @@ import (
|
|||
func TestRootFS(t *testing.T) {
|
||||
layer0 := tarball{
|
||||
dir{name: "/", uid: 0},
|
||||
file{name: "/file", uid: 0, contents: []byte("from 0")},
|
||||
file{name: "/file", uid: 0, contents: bytes.NewBufferString("from 0")},
|
||||
}
|
||||
|
||||
layer1 := tarball{
|
||||
file{name: "/file", uid: 1, contents: []byte("from 1")},
|
||||
file{name: "/file", uid: 1, contents: bytes.NewBufferString("from 1")},
|
||||
}
|
||||
|
||||
layer2 := tarball{
|
||||
|
@ -44,34 +44,95 @@ func TestRootFS(t *testing.T) {
|
|||
{
|
||||
name: "basic file overwrite, layer order mixed",
|
||||
image: tarball{
|
||||
file{name: "layer1/layer.tar", contents: layer1.bytes(t)},
|
||||
file{name: "layer0/layer.tar", contents: layer0.bytes(t)},
|
||||
file{name: "layer1/layer.tar", contents: layer1},
|
||||
file{name: "layer0/layer.tar", contents: layer0},
|
||||
manifest{"layer0/layer.tar", "layer1/layer.tar"},
|
||||
},
|
||||
want: []extractable{
|
||||
dir{name: "/", uid: 0},
|
||||
file{name: "/file", uid: 1, contents: []byte("from 1")},
|
||||
file{name: "/file", uid: 1, contents: bytes.NewBufferString("from 1")},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "directory overwrite retains original dir",
|
||||
image: tarball{
|
||||
file{name: "layer2/layer.tar", contents: layer2.bytes(t)},
|
||||
file{name: "layer0/layer.tar", contents: layer0.bytes(t)},
|
||||
file{name: "layer1/layer.tar", contents: layer1.bytes(t)},
|
||||
file{name: "layer2/layer.tar", contents: layer2},
|
||||
file{name: "layer0/layer.tar", contents: layer0},
|
||||
file{name: "layer1/layer.tar", contents: layer1},
|
||||
manifest{"layer0/layer.tar", "layer1/layer.tar", "layer2/layer.tar"},
|
||||
},
|
||||
want: []extractable{
|
||||
dir{name: "/", uid: 0},
|
||||
file{name: "/file", uid: 1, contents: []byte("from 1")},
|
||||
file{name: "/file", uid: 1, contents: bytes.NewBufferString("from 1")},
|
||||
dir{name: "/", uid: 2},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "simple whiteout",
|
||||
image: tarball{
|
||||
file{name: "layer0/layer.tar", contents: tarball{
|
||||
file{name: "filea"},
|
||||
file{name: "fileb"},
|
||||
dir{name: "dira"},
|
||||
dir{name: "dirb"},
|
||||
}},
|
||||
file{name: "layer1/layer.tar", contents: tarball{
|
||||
hardlink{name: ".wh.filea"},
|
||||
hardlink{name: ".wh.dira"},
|
||||
}},
|
||||
manifest{"layer0/layer.tar", "layer1/layer.tar"},
|
||||
},
|
||||
want: []extractable{
|
||||
file{name: "fileb"},
|
||||
dir{name: "dirb"},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "whiteout with override",
|
||||
image: tarball{
|
||||
file{name: "layer0/layer.tar", contents: tarball{
|
||||
file{name: "filea", contents: bytes.NewBufferString("from 0")},
|
||||
}},
|
||||
file{name: "layer1/layer.tar", contents: tarball{
|
||||
hardlink{name: ".wh.filea"},
|
||||
}},
|
||||
file{name: "layer2/layer.tar", contents: tarball{
|
||||
file{name: "filea", contents: bytes.NewBufferString("from 3")},
|
||||
}},
|
||||
manifest{
|
||||
"layer0/layer.tar",
|
||||
"layer1/layer.tar",
|
||||
"layer2/layer.tar",
|
||||
},
|
||||
},
|
||||
want: []extractable{
|
||||
file{name: "filea", contents: bytes.NewBufferString("from 3")},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "files and directories do not whiteout",
|
||||
image: tarball{
|
||||
file{name: "layer0/layer.tar", contents: tarball{
|
||||
dir{name: "dir"},
|
||||
file{name: "file"},
|
||||
}},
|
||||
file{name: "layer1/layer.tar", contents: tarball{
|
||||
dir{name: ".wh.dir"},
|
||||
file{name: ".wh.file"},
|
||||
}},
|
||||
},
|
||||
want: []extractable{
|
||||
dir{name: "dir"},
|
||||
dir{name: ".wh.dir"},
|
||||
file{name: "file"},
|
||||
file{name: ".wh.file"},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
in := bytes.NewReader(tt.image.bytes(t))
|
||||
in := bytes.NewReader(tt.image.Bytes())
|
||||
out := bytes.Buffer{}
|
||||
|
||||
err := RootFS(in, &out)
|
||||
|
@ -81,15 +142,31 @@ func TestRootFS(t *testing.T) {
|
|||
}
|
||||
require.NoError(t, err)
|
||||
got := extract(t, &out)
|
||||
assert.Equal(t, got, tt.want)
|
||||
assert.Equal(t, tt.want, got)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// Helpers
|
||||
|
||||
type tarrable interface {
|
||||
tar(*testing.T, *tar.Writer)
|
||||
type tarrer interface {
|
||||
tar(*tar.Writer)
|
||||
}
|
||||
|
||||
type byter interface {
|
||||
Bytes() []byte
|
||||
}
|
||||
|
||||
type tarball []tarrer
|
||||
|
||||
func (tb tarball) Bytes() []byte {
|
||||
buf := bytes.Buffer{}
|
||||
tw := tar.NewWriter(&buf)
|
||||
for _, member := range tb {
|
||||
member.tar(tw)
|
||||
}
|
||||
tw.Close()
|
||||
return buf.Bytes()
|
||||
}
|
||||
|
||||
// extractable is an empty interface for comparing extracted outputs in tests.
|
||||
|
@ -101,63 +178,70 @@ type dir struct {
|
|||
uid int
|
||||
}
|
||||
|
||||
func (d dir) tar(t *testing.T, tw *tar.Writer) {
|
||||
t.Helper()
|
||||
func (d dir) tar(tw *tar.Writer) {
|
||||
hdr := &tar.Header{
|
||||
Typeflag: tar.TypeDir,
|
||||
Name: d.name,
|
||||
Mode: 0644,
|
||||
Uid: d.uid,
|
||||
}
|
||||
require.NoError(t, tw.WriteHeader(hdr))
|
||||
tw.WriteHeader(hdr)
|
||||
}
|
||||
|
||||
type file struct {
|
||||
name string
|
||||
uid int
|
||||
contents []byte
|
||||
contents byter
|
||||
}
|
||||
|
||||
func (f file) tar(t *testing.T, tw *tar.Writer) {
|
||||
t.Helper()
|
||||
func (f file) tar(tw *tar.Writer) {
|
||||
var contentbytes []byte
|
||||
if f.contents != nil {
|
||||
contentbytes = f.contents.Bytes()
|
||||
}
|
||||
hdr := &tar.Header{
|
||||
Typeflag: tar.TypeReg,
|
||||
Name: f.name,
|
||||
Mode: 0644,
|
||||
Uid: f.uid,
|
||||
Size: int64(len(f.contents)),
|
||||
Size: int64(len(contentbytes)),
|
||||
}
|
||||
require.NoError(t, tw.WriteHeader(hdr))
|
||||
_, err := tw.Write(f.contents)
|
||||
require.NoError(t, err)
|
||||
tw.WriteHeader(hdr)
|
||||
tw.Write(contentbytes)
|
||||
}
|
||||
|
||||
type manifest []string
|
||||
|
||||
func (m manifest) tar(t *testing.T, tw *tar.Writer) {
|
||||
t.Helper()
|
||||
func (m manifest) tar(tw *tar.Writer) {
|
||||
b, err := json.Marshal(dockerManifestJSON{{Layers: m}})
|
||||
require.NoError(t, err)
|
||||
file{name: "manifest.json", uid: 0, contents: b}.tar(t, tw)
|
||||
}
|
||||
|
||||
type tarball []tarrable
|
||||
|
||||
func (tb tarball) bytes(t *testing.T) []byte {
|
||||
t.Helper()
|
||||
buf := bytes.Buffer{}
|
||||
tw := tar.NewWriter(&buf)
|
||||
for _, member := range tb {
|
||||
member.tar(t, tw)
|
||||
if err != nil {
|
||||
panic("testerr")
|
||||
}
|
||||
require.NoError(t, tw.Close())
|
||||
return buf.Bytes()
|
||||
file{
|
||||
name: "manifest.json",
|
||||
uid: 0,
|
||||
contents: bytes.NewBuffer(b),
|
||||
}.tar(tw)
|
||||
}
|
||||
|
||||
func extract(t *testing.T, f io.Reader) []extractable {
|
||||
type hardlink struct {
|
||||
name string
|
||||
uid int
|
||||
}
|
||||
|
||||
func (h hardlink) tar(tw *tar.Writer) {
|
||||
tw.WriteHeader(&tar.Header{
|
||||
Typeflag: tar.TypeLink,
|
||||
Name: h.name,
|
||||
Mode: 0644,
|
||||
Uid: h.uid,
|
||||
})
|
||||
}
|
||||
|
||||
func extract(t *testing.T, r io.Reader) []extractable {
|
||||
t.Helper()
|
||||
ret := []extractable{}
|
||||
tr := tar.NewReader(f)
|
||||
tr := tar.NewReader(r)
|
||||
for {
|
||||
hdr, err := tr.Next()
|
||||
if err == io.EOF {
|
||||
|
@ -170,9 +254,13 @@ func extract(t *testing.T, f io.Reader) []extractable {
|
|||
case tar.TypeDir:
|
||||
elem = dir{name: hdr.Name, uid: hdr.Uid}
|
||||
case tar.TypeReg:
|
||||
buf := bytes.Buffer{}
|
||||
io.Copy(&buf, tr)
|
||||
elem = file{name: hdr.Name, uid: hdr.Uid, contents: buf.Bytes()}
|
||||
f := file{name: hdr.Name, uid: hdr.Uid}
|
||||
if hdr.Size > 0 {
|
||||
var buf bytes.Buffer
|
||||
io.Copy(&buf, tr)
|
||||
f.contents = &buf
|
||||
}
|
||||
elem = f
|
||||
}
|
||||
ret = append(ret, elem)
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue