This commit is contained in:
Motiejus Jakštys 2021-05-24 00:11:58 +03:00
parent 48dd16fe27
commit 0f08cef777
4 changed files with 258 additions and 11 deletions

15
internal/tree/BUILD Normal file
View File

@ -0,0 +1,15 @@
load("@io_bazel_rules_go//go:def.bzl", "go_library", "go_test")
go_library(
name = "go_default_library",
srcs = ["tree.go"],
importpath = "github.com/motiejus/code/undocker/internal/tree",
visibility = ["//src:__subpackages__"],
)
go_test(
name = "go_default_test",
srcs = ["tree_test.go"],
embed = [":go_default_library"],
deps = ["@com_github_stretchr_testify//assert:go_default_library"],
)

120
internal/tree/tree.go Normal file
View File

@ -0,0 +1,120 @@
package tree
import (
"path/filepath"
"sort"
"strings"
)
// Tree is a way to store directory paths for whiteouts.
// It is semi-optimized for reads and non-optimized for writes;
// See Merge() and HasPrefix for trade-offs.
type Tree struct {
name string
children []*Tree
end bool
}
// New creates a new tree from a given path.
func New(paths []string) *Tree {
t := &Tree{name: ".", children: []*Tree{}}
for _, path := range paths {
t.Add(path)
}
return t
}
// Add adds a sequence to a tree
func (t *Tree) Add(path string) {
t.add(strings.Split(filepath.Clean(path), "/"))
}
// HasPrefix returns if tree contains a prefix matching a given sequence.
// Search algorithm is naive: it does linear search when going through the
// nodes instead of binary-search. Since we expect number of children to be
// really small (usually 1 or 2), it does not really matter. If you find a
// real-world container with 30+ whiteout paths on a single path, please ping
// the author/maintainer of this code.
func (t *Tree) HasPrefix(path string) bool {
return t.hasprefix(strings.Split(filepath.Clean(path), "/"))
}
// Merge merges adds t2 to t. It is not optimized for speed, since it's walking
// full branch for every other branch.
func (t *Tree) Merge(t2 *Tree) {
t.merge(t2, []string{})
}
// String stringifies a tree
func (t *Tree) String() string {
if len(t.children) == 0 {
return "<empty>"
}
res := &stringer{[]string{}}
res.stringify(t, []string{})
sort.Strings(res.res)
return strings.Join(res.res, ":")
}
func (t *Tree) add(nodes []string) {
if len(nodes) == 0 {
t.end = true
return
}
for i := range t.children {
if t.children[i].name == nodes[0] {
t.children[i].add(nodes[1:])
return
}
}
newNode := &Tree{name: nodes[0]}
t.children = append(t.children, newNode)
newNode.add(nodes[1:])
}
func (t *Tree) hasprefix(nodes []string) bool {
if len(nodes) == 0 {
return t.end
}
if t.end {
return true
}
for i := range t.children {
if t.children[i].name == nodes[0] {
return t.children[i].hasprefix(nodes[1:])
}
}
return false
}
type stringer struct {
res []string
}
func (s *stringer) stringify(t *Tree, acc []string) {
if t.name == "" {
return
}
acc = append(acc, t.name)
if t.end {
s.res = append(s.res, strings.Join(acc, "/"))
}
for _, child := range t.children {
s.stringify(child, acc)
}
}
func (t *Tree) merge(t2 *Tree, acc []string) {
if t2.end {
t.add(append(acc[1:], t2.name))
}
acc = append(acc, t2.name)
for _, child := range t2.children {
t.merge(child, acc)
}
}

View File

@ -0,0 +1,92 @@
package tree
import (
"testing"
"github.com/stretchr/testify/assert"
)
func TestTree(t *testing.T) {
tests := []struct {
name string
paths []string
matchTrue []string
matchFalse []string
}{
{
name: "empty sequence matches nothing",
paths: []string{},
matchFalse: []string{"a", "b"},
},
{
name: "a few sequences",
paths: []string{"a", "b", "c/b/a"},
matchTrue: []string{"a", "a/b/c", "c/b/a", "c/b/a/d"},
matchFalse: []string{"c/d", "c", "c/b"},
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
tree := New(tt.paths)
for _, path := range tt.matchTrue {
t.Run(path, func(t *testing.T) {
assert.True(t, tree.HasPrefix(path),
"expected %s to be a prefix of %s", path, tree)
})
}
for _, path := range tt.matchFalse {
t.Run(path, func(t *testing.T) {
assert.False(t, tree.HasPrefix(path),
"expected %s to not be a prefix of %s", path, tree)
})
}
})
}
}
func TestTreeMerge(t *testing.T) {
tree1 := New([]string{"bin/ar", "var/cache/apt"})
tree2 := New([]string{"bin/ar", "bin/busybox", "usr/share/doc"})
tree1.Merge(tree2)
assert.Equal(t, "./bin/ar:./bin/busybox:./usr/share/doc:./var/cache/apt", tree1.String())
assert.Equal(t, "./bin/ar:./bin/busybox:./usr/share/doc", tree2.String())
}
func TestString(t *testing.T) {
tests := []struct {
name string
paths []string
wantStr string
}{
{
name: "empty",
paths: []string{},
wantStr: "<empty>",
},
{
name: "simple path",
paths: []string{"a/b/c"},
wantStr: "./a/b/c",
},
{
name: "duplicate paths",
paths: []string{"a/a", "a//a"},
wantStr: "./a/a",
},
{
name: "a few sequences",
paths: []string{"bin/ar", "bin/busybox", "var/cache/apt/archives"},
wantStr: "./bin/ar:./bin/busybox:./var/cache/apt/archives",
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
tree := New(tt.paths)
assert.Equal(t, tt.wantStr, tree.String())
})
}
}

View File

@ -15,6 +15,8 @@ import (
const ( const (
_manifestJSON = "manifest.json" _manifestJSON = "manifest.json"
_layerSuffix = "/layer.tar" _layerSuffix = "/layer.tar"
_whReaddir = ".wh..wh..opq"
_whPrefix = ".wh."
) )
var ( var (
@ -39,23 +41,22 @@ func RootFS(in io.ReadSeeker, out io.Writer) (err error) {
tr := tar.NewReader(in) tr := tar.NewReader(in)
tw := tar.NewWriter(out) tw := tar.NewWriter(out)
defer func() { err = multierr.Append(err, tw.Close()) }() defer func() { err = multierr.Append(err, tw.Close()) }()
// layerOffsets maps a layer name (a9b123c0daa/layer.tar) to it's offset // layerOffsets maps a layer name (a9b123c0daa/layer.tar) to it's offset
layerOffsets := map[string]int64{} layerOffsets := map[string]int64{}
// manifest is the docker manifest in the image // manifest is the docker manifest in the image
var manifest dockerManifestJSON var manifest dockerManifestJSON
// phase 1: get layer offsets and manifest.json // get layer offsets and manifest.json
for { for {
hdr, err := tr.Next() hdr, err := tr.Next()
if err == io.EOF { if err == io.EOF {
break break
} }
if hdr.Typeflag != tar.TypeReg { if hdr.Typeflag != tar.TypeReg {
continue continue
} }
switch { switch {
case filepath.Clean(hdr.Name) == _manifestJSON: case filepath.Clean(hdr.Name) == _manifestJSON:
dec := json.NewDecoder(tr) dec := json.NewDecoder(tr)
@ -84,13 +85,20 @@ func RootFS(in io.ReadSeeker, out io.Writer) (err error) {
// file2layer maps a filename to layer number (index in "layers") // file2layer maps a filename to layer number (index in "layers")
file2layer := map[string]int{} file2layer := map[string]int{}
// iterate through all layers and save filenames for all kinds of files. // whreaddir maps a directory to a layer number until which
// its contents should be ignored, exclusively.
whreaddir := map[string]int{}
// wh maps a filename to a layer until which it should be ignored,
// inclusively.
wh := map[string]int{}
// build up `file2layer`, `whreaddir`, `wh`
for i, offset := range layers { for i, offset := range layers {
if _, err := in.Seek(offset, io.SeekStart); err != nil { if _, err := in.Seek(offset, io.SeekStart); err != nil {
return err return err
} }
tr = tar.NewReader(in) tr = tar.NewReader(in)
for { for {
hdr, err := tr.Next() hdr, err := tr.Next()
if err == io.EOF { if err == io.EOF {
@ -99,6 +107,23 @@ func RootFS(in io.ReadSeeker, out io.Writer) (err error) {
if err != nil { if err != nil {
return err return err
} }
if hdr.Typeflag == tar.TypeDir {
continue
}
if hdr.Typeflag == tar.TypeLink {
basename := filepath.Base(hdr.Name)
basedir := filepath.Dir(hdr.Name)
if basename == _whReaddir {
whreaddir[basedir] = i
continue
} else if strings.HasPrefix(basename, _whPrefix) {
fname := strings.TrimPrefix(basename, _whPrefix)
wh[filepath.Join(basedir, fname)] = i
continue
}
}
file2layer[hdr.Name] = i file2layer[hdr.Name] = i
} }
} }
@ -109,7 +134,6 @@ func RootFS(in io.ReadSeeker, out io.Writer) (err error) {
return err return err
} }
tr = tar.NewReader(in) tr = tar.NewReader(in)
for { for {
hdr, err := tr.Next() hdr, err := tr.Next()
if err == io.EOF { if err == io.EOF {
@ -118,13 +142,9 @@ func RootFS(in io.ReadSeeker, out io.Writer) (err error) {
if err != nil { if err != nil {
return err return err
} }
if file2layer[hdr.Name] != i {
// Only directories can have multiple entries with the same name.
// all other file types cannot.
if hdr.Typeflag != tar.TypeDir && file2layer[hdr.Name] != i {
continue continue
} }
if err := writeFile(tr, tw, hdr); err != nil { if err := writeFile(tr, tw, hdr); err != nil {
return err return err
} }