From 628b178e231ad89757fc39d4ef2061bcd22ea9c6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Motiejus=20Jak=C5=A1tys?= Date: Mon, 24 May 2021 00:11:58 +0300 Subject: [PATCH] start using tree --- internal/tree/tree.go | 2 +- internal/tree/tree_test.go | 13 +++++++++---- rootfs/BUILD | 5 ++++- rootfs/doc.go | 13 +++++++++---- rootfs/rootfs.go | 38 +++++++++++++++++++++++++++++++++----- rootfs/rootfs_test.go | 10 ++-------- 6 files changed, 58 insertions(+), 23 deletions(-) diff --git a/internal/tree/tree.go b/internal/tree/tree.go index 0e82890..518b0ef 100644 --- a/internal/tree/tree.go +++ b/internal/tree/tree.go @@ -16,7 +16,7 @@ type Tree struct { } // New creates a new tree from a given path. -func New(paths []string) *Tree { +func New(paths ...string) *Tree { t := &Tree{name: ".", children: []*Tree{}} for _, path := range paths { t.Add(path) diff --git a/internal/tree/tree_test.go b/internal/tree/tree_test.go index 961202c..66b9632 100644 --- a/internal/tree/tree_test.go +++ b/internal/tree/tree_test.go @@ -18,6 +18,11 @@ func TestTree(t *testing.T) { paths: []string{}, matchFalse: []string{"a", "b"}, }, + { + name: "directory", + paths: []string{"a/b"}, + matchTrue: []string{"a/b/"}, + }, { name: "a few sequences", paths: []string{"a", "b", "c/b/a"}, @@ -28,7 +33,7 @@ func TestTree(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - tree := New(tt.paths) + tree := New(tt.paths...) for _, path := range tt.matchTrue { t.Run(path, func(t *testing.T) { @@ -48,8 +53,8 @@ func TestTree(t *testing.T) { } func TestTreeMerge(t *testing.T) { - tree1 := New([]string{"bin/ar", "var/cache/apt"}) - tree2 := New([]string{"bin/ar", "bin/busybox", "usr/share/doc"}) + tree1 := New("bin/ar", "var/cache/apt") + tree2 := New("bin/ar", "bin/busybox", "usr/share/doc") tree1.Merge(tree2) assert.Equal(t, "./bin/ar:./bin/busybox:./usr/share/doc:./var/cache/apt", tree1.String()) assert.Equal(t, "./bin/ar:./bin/busybox:./usr/share/doc", tree2.String()) @@ -85,7 +90,7 @@ func TestString(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - tree := New(tt.paths) + tree := New(tt.paths...) assert.Equal(t, tt.wantStr, tree.String()) }) } diff --git a/rootfs/BUILD b/rootfs/BUILD index ba78de1..7a278a0 100644 --- a/rootfs/BUILD +++ b/rootfs/BUILD @@ -8,7 +8,10 @@ go_library( ], importpath = "github.com/motiejus/code/undocker/rootfs", visibility = ["//visibility:public"], - deps = ["@org_uber_go_multierr//:go_default_library"], + deps = [ + "//src/undocker/internal/tree:go_default_library", + "@org_uber_go_multierr//:go_default_library", + ], ) go_test( diff --git a/rootfs/doc.go b/rootfs/doc.go index 14b18ce..d919f6d 100644 --- a/rootfs/doc.go +++ b/rootfs/doc.go @@ -53,11 +53,16 @@ // top dir>/.wh..wh.aufs`. // // My interpretation: -// - a hardlink called `.wh..wh..opq` means that directory contents from the -// layers below the mentioned file should be ignored. Higher layers may add +// - a file/hardlink called `.wh..wh..opq` means that directory contents from +// the layers below the mentioned file should be ignored. Higher layers may add // files on top. -// - if hardlink `.wh.([^/]+)` is found, $1 should be deleted from the current -// and lower layers. +// * Ambiguity: should the directory from the lower layers be removed? I am +// assuming yes, but this assumptions is baseless. +// - if file/hardlink `.wh.([^/]+)` is found, $1 should be deleted from the +// current and lower layers. +// +// Note: these may be regular files in practice. So this implementation will +// match either. // // == Tar format == // diff --git a/rootfs/rootfs.go b/rootfs/rootfs.go index 5e11df4..06d7d7b 100644 --- a/rootfs/rootfs.go +++ b/rootfs/rootfs.go @@ -9,6 +9,7 @@ import ( "path/filepath" "strings" + "github.com/motiejus/code/undocker/internal/tree" "go.uber.org/multierr" ) @@ -85,8 +86,7 @@ func RootFS(in io.ReadSeeker, out io.Writer) (err error) { // file2layer maps a filename to layer number (index in "layers") file2layer := map[string]int{} - // whreaddir maps a directory to a layer number until which - // its contents should be ignored, exclusively. + // whreaddir maps `wh..wh..opq` file to a layer. whreaddir := map[string]int{} // wh maps a filename to a layer until which it should be ignored, @@ -111,7 +111,9 @@ func RootFS(in io.ReadSeeker, out io.Writer) (err error) { continue } - if hdr.Typeflag == tar.TypeLink { + // according to aufs documentation, whiteout files should be hardlinks. + // I saw at least one docker container using regular files for whiteout. + if hdr.Typeflag == tar.TypeLink || hdr.Typeflag == tar.TypeReg { basename := filepath.Base(hdr.Name) basedir := filepath.Dir(hdr.Name) if basename == _whReaddir { @@ -128,7 +130,10 @@ func RootFS(in io.ReadSeeker, out io.Writer) (err error) { } } - // phase 3: iterate through all layers and write files. + // construct directories to ignore, by layer. + whIgnore := whiteoutDirs(whreaddir, len(layers)) + + // iterate through all layers and write files. for i, offset := range layers { if _, err := in.Seek(offset, io.SeekStart); err != nil { return err @@ -142,7 +147,13 @@ func RootFS(in io.ReadSeeker, out io.Writer) (err error) { if err != nil { return err } - if file2layer[hdr.Name] != i { + if layer, ok := wh[hdr.Name]; ok && layer >= i { + continue + } + if whIgnore[i].HasPrefix(hdr.Name) { + continue + } + if hdr.Typeflag != tar.TypeDir && file2layer[hdr.Name] != i { continue } if err := writeFile(tr, tw, hdr); err != nil { @@ -182,3 +193,20 @@ func writeFile(tr *tar.Reader, tw *tar.Writer, hdr *tar.Header) error { return nil } + +func whiteoutDirs(whreaddir map[string]int, nlayers int) []*tree.Tree { + ret := make([]*tree.Tree, nlayers) + for i := range ret { + ret[i] = tree.New() + } + for fname, layer := range whreaddir { + if layer == 0 { + continue + } + ret[layer-1].Add(fname) + } + for i := nlayers - 1; i > 0; i-- { + ret[i-1].Merge(ret[i]) + } + return ret +} diff --git a/rootfs/rootfs_test.go b/rootfs/rootfs_test.go index f16eca7..f4e0fe2 100644 --- a/rootfs/rootfs_test.go +++ b/rootfs/rootfs_test.go @@ -110,26 +110,19 @@ func TestRootFS(t *testing.T) { }, }, { - name: "files and directories do not whiteout", + name: "directories do not whiteout", image: tarball{ file{name: "layer0/layer.tar", contents: tarball{ dir{name: "dir"}, - file{name: "file"}, - file{name: ".wh..wh..opq", uid: 0}, }}, file{name: "layer1/layer.tar", contents: tarball{ dir{name: ".wh.dir"}, - file{name: ".wh.file"}, - file{name: ".wh..wh..opq", uid: 1}, }}, manifest{"layer0/layer.tar", "layer1/layer.tar"}, }, want: []extractable{ dir{name: "dir"}, dir{name: ".wh.dir"}, - file{name: "file"}, - file{name: ".wh.file"}, - file{name: ".wh..wh..opq", uid: 1}, }, }, { @@ -140,6 +133,7 @@ func TestRootFS(t *testing.T) { file{name: "a/filea"}, }}, file{name: "layer1/layer.tar", contents: tarball{ + dir{name: "a"}, file{name: "a/fileb"}, hardlink{name: "a/.wh..wh..opq"}, }},