From 0f08cef777f66f00546852adb9a31db15e3bae67 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Motiejus=20Jak=C5=A1tys?= Date: Mon, 24 May 2021 00:11:58 +0300 Subject: [PATCH] add tree --- internal/tree/BUILD | 15 +++++ internal/tree/tree.go | 120 +++++++++++++++++++++++++++++++++++++ internal/tree/tree_test.go | 92 ++++++++++++++++++++++++++++ rootfs/rootfs.go | 42 +++++++++---- 4 files changed, 258 insertions(+), 11 deletions(-) create mode 100644 internal/tree/BUILD create mode 100644 internal/tree/tree.go create mode 100644 internal/tree/tree_test.go diff --git a/internal/tree/BUILD b/internal/tree/BUILD new file mode 100644 index 0000000..08fc59b --- /dev/null +++ b/internal/tree/BUILD @@ -0,0 +1,15 @@ +load("@io_bazel_rules_go//go:def.bzl", "go_library", "go_test") + +go_library( + name = "go_default_library", + srcs = ["tree.go"], + importpath = "github.com/motiejus/code/undocker/internal/tree", + visibility = ["//src:__subpackages__"], +) + +go_test( + name = "go_default_test", + srcs = ["tree_test.go"], + embed = [":go_default_library"], + deps = ["@com_github_stretchr_testify//assert:go_default_library"], +) diff --git a/internal/tree/tree.go b/internal/tree/tree.go new file mode 100644 index 0000000..0e82890 --- /dev/null +++ b/internal/tree/tree.go @@ -0,0 +1,120 @@ +package tree + +import ( + "path/filepath" + "sort" + "strings" +) + +// Tree is a way to store directory paths for whiteouts. +// It is semi-optimized for reads and non-optimized for writes; +// See Merge() and HasPrefix for trade-offs. +type Tree struct { + name string + children []*Tree + end bool +} + +// New creates a new tree from a given path. +func New(paths []string) *Tree { + t := &Tree{name: ".", children: []*Tree{}} + for _, path := range paths { + t.Add(path) + } + return t +} + +// Add adds a sequence to a tree +func (t *Tree) Add(path string) { + t.add(strings.Split(filepath.Clean(path), "/")) +} + +// HasPrefix returns if tree contains a prefix matching a given sequence. +// Search algorithm is naive: it does linear search when going through the +// nodes instead of binary-search. Since we expect number of children to be +// really small (usually 1 or 2), it does not really matter. If you find a +// real-world container with 30+ whiteout paths on a single path, please ping +// the author/maintainer of this code. +func (t *Tree) HasPrefix(path string) bool { + return t.hasprefix(strings.Split(filepath.Clean(path), "/")) +} + +// Merge merges adds t2 to t. It is not optimized for speed, since it's walking +// full branch for every other branch. +func (t *Tree) Merge(t2 *Tree) { + t.merge(t2, []string{}) +} + +// String stringifies a tree +func (t *Tree) String() string { + if len(t.children) == 0 { + return "" + } + + res := &stringer{[]string{}} + res.stringify(t, []string{}) + sort.Strings(res.res) + return strings.Join(res.res, ":") +} + +func (t *Tree) add(nodes []string) { + if len(nodes) == 0 { + t.end = true + return + } + for i := range t.children { + if t.children[i].name == nodes[0] { + t.children[i].add(nodes[1:]) + return + } + } + + newNode := &Tree{name: nodes[0]} + t.children = append(t.children, newNode) + newNode.add(nodes[1:]) +} + +func (t *Tree) hasprefix(nodes []string) bool { + if len(nodes) == 0 { + return t.end + } + if t.end { + return true + } + + for i := range t.children { + if t.children[i].name == nodes[0] { + return t.children[i].hasprefix(nodes[1:]) + } + } + + return false +} + +type stringer struct { + res []string +} + +func (s *stringer) stringify(t *Tree, acc []string) { + if t.name == "" { + return + } + acc = append(acc, t.name) + if t.end { + s.res = append(s.res, strings.Join(acc, "/")) + } + + for _, child := range t.children { + s.stringify(child, acc) + } +} + +func (t *Tree) merge(t2 *Tree, acc []string) { + if t2.end { + t.add(append(acc[1:], t2.name)) + } + acc = append(acc, t2.name) + for _, child := range t2.children { + t.merge(child, acc) + } +} diff --git a/internal/tree/tree_test.go b/internal/tree/tree_test.go new file mode 100644 index 0000000..961202c --- /dev/null +++ b/internal/tree/tree_test.go @@ -0,0 +1,92 @@ +package tree + +import ( + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestTree(t *testing.T) { + tests := []struct { + name string + paths []string + matchTrue []string + matchFalse []string + }{ + { + name: "empty sequence matches nothing", + paths: []string{}, + matchFalse: []string{"a", "b"}, + }, + { + name: "a few sequences", + paths: []string{"a", "b", "c/b/a"}, + matchTrue: []string{"a", "a/b/c", "c/b/a", "c/b/a/d"}, + matchFalse: []string{"c/d", "c", "c/b"}, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + tree := New(tt.paths) + + for _, path := range tt.matchTrue { + t.Run(path, func(t *testing.T) { + assert.True(t, tree.HasPrefix(path), + "expected %s to be a prefix of %s", path, tree) + }) + } + + for _, path := range tt.matchFalse { + t.Run(path, func(t *testing.T) { + assert.False(t, tree.HasPrefix(path), + "expected %s to not be a prefix of %s", path, tree) + }) + } + }) + } +} + +func TestTreeMerge(t *testing.T) { + tree1 := New([]string{"bin/ar", "var/cache/apt"}) + tree2 := New([]string{"bin/ar", "bin/busybox", "usr/share/doc"}) + tree1.Merge(tree2) + assert.Equal(t, "./bin/ar:./bin/busybox:./usr/share/doc:./var/cache/apt", tree1.String()) + assert.Equal(t, "./bin/ar:./bin/busybox:./usr/share/doc", tree2.String()) +} + +func TestString(t *testing.T) { + tests := []struct { + name string + paths []string + wantStr string + }{ + { + name: "empty", + paths: []string{}, + wantStr: "", + }, + { + name: "simple path", + paths: []string{"a/b/c"}, + wantStr: "./a/b/c", + }, + { + name: "duplicate paths", + paths: []string{"a/a", "a//a"}, + wantStr: "./a/a", + }, + { + name: "a few sequences", + paths: []string{"bin/ar", "bin/busybox", "var/cache/apt/archives"}, + wantStr: "./bin/ar:./bin/busybox:./var/cache/apt/archives", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + tree := New(tt.paths) + assert.Equal(t, tt.wantStr, tree.String()) + }) + } +} diff --git a/rootfs/rootfs.go b/rootfs/rootfs.go index 9826652..5e11df4 100644 --- a/rootfs/rootfs.go +++ b/rootfs/rootfs.go @@ -15,6 +15,8 @@ import ( const ( _manifestJSON = "manifest.json" _layerSuffix = "/layer.tar" + _whReaddir = ".wh..wh..opq" + _whPrefix = ".wh." ) var ( @@ -39,23 +41,22 @@ func RootFS(in io.ReadSeeker, out io.Writer) (err error) { tr := tar.NewReader(in) tw := tar.NewWriter(out) defer func() { err = multierr.Append(err, tw.Close()) }() + // layerOffsets maps a layer name (a9b123c0daa/layer.tar) to it's offset layerOffsets := map[string]int64{} // manifest is the docker manifest in the image var manifest dockerManifestJSON - // phase 1: get layer offsets and manifest.json + // get layer offsets and manifest.json for { hdr, err := tr.Next() if err == io.EOF { break } - if hdr.Typeflag != tar.TypeReg { continue } - switch { case filepath.Clean(hdr.Name) == _manifestJSON: dec := json.NewDecoder(tr) @@ -84,13 +85,20 @@ func RootFS(in io.ReadSeeker, out io.Writer) (err error) { // file2layer maps a filename to layer number (index in "layers") file2layer := map[string]int{} - // iterate through all layers and save filenames for all kinds of files. + // whreaddir maps a directory to a layer number until which + // its contents should be ignored, exclusively. + whreaddir := map[string]int{} + + // wh maps a filename to a layer until which it should be ignored, + // inclusively. + wh := map[string]int{} + + // build up `file2layer`, `whreaddir`, `wh` for i, offset := range layers { if _, err := in.Seek(offset, io.SeekStart); err != nil { return err } tr = tar.NewReader(in) - for { hdr, err := tr.Next() if err == io.EOF { @@ -99,6 +107,23 @@ func RootFS(in io.ReadSeeker, out io.Writer) (err error) { if err != nil { return err } + if hdr.Typeflag == tar.TypeDir { + continue + } + + if hdr.Typeflag == tar.TypeLink { + basename := filepath.Base(hdr.Name) + basedir := filepath.Dir(hdr.Name) + if basename == _whReaddir { + whreaddir[basedir] = i + continue + } else if strings.HasPrefix(basename, _whPrefix) { + fname := strings.TrimPrefix(basename, _whPrefix) + wh[filepath.Join(basedir, fname)] = i + continue + } + } + file2layer[hdr.Name] = i } } @@ -109,7 +134,6 @@ func RootFS(in io.ReadSeeker, out io.Writer) (err error) { return err } tr = tar.NewReader(in) - for { hdr, err := tr.Next() if err == io.EOF { @@ -118,13 +142,9 @@ func RootFS(in io.ReadSeeker, out io.Writer) (err error) { if err != nil { return err } - - // Only directories can have multiple entries with the same name. - // all other file types cannot. - if hdr.Typeflag != tar.TypeDir && file2layer[hdr.Name] != i { + if file2layer[hdr.Name] != i { continue } - if err := writeFile(tr, tw, hdr); err != nil { return err }