add tree
This commit is contained in:
parent
48dd16fe27
commit
0f08cef777
15
internal/tree/BUILD
Normal file
15
internal/tree/BUILD
Normal file
@ -0,0 +1,15 @@
|
||||
load("@io_bazel_rules_go//go:def.bzl", "go_library", "go_test")
|
||||
|
||||
go_library(
|
||||
name = "go_default_library",
|
||||
srcs = ["tree.go"],
|
||||
importpath = "github.com/motiejus/code/undocker/internal/tree",
|
||||
visibility = ["//src:__subpackages__"],
|
||||
)
|
||||
|
||||
go_test(
|
||||
name = "go_default_test",
|
||||
srcs = ["tree_test.go"],
|
||||
embed = [":go_default_library"],
|
||||
deps = ["@com_github_stretchr_testify//assert:go_default_library"],
|
||||
)
|
120
internal/tree/tree.go
Normal file
120
internal/tree/tree.go
Normal file
@ -0,0 +1,120 @@
|
||||
package tree
|
||||
|
||||
import (
|
||||
"path/filepath"
|
||||
"sort"
|
||||
"strings"
|
||||
)
|
||||
|
||||
// Tree is a way to store directory paths for whiteouts.
|
||||
// It is semi-optimized for reads and non-optimized for writes;
|
||||
// See Merge() and HasPrefix for trade-offs.
|
||||
type Tree struct {
|
||||
name string
|
||||
children []*Tree
|
||||
end bool
|
||||
}
|
||||
|
||||
// New creates a new tree from a given path.
|
||||
func New(paths []string) *Tree {
|
||||
t := &Tree{name: ".", children: []*Tree{}}
|
||||
for _, path := range paths {
|
||||
t.Add(path)
|
||||
}
|
||||
return t
|
||||
}
|
||||
|
||||
// Add adds a sequence to a tree
|
||||
func (t *Tree) Add(path string) {
|
||||
t.add(strings.Split(filepath.Clean(path), "/"))
|
||||
}
|
||||
|
||||
// HasPrefix returns if tree contains a prefix matching a given sequence.
|
||||
// Search algorithm is naive: it does linear search when going through the
|
||||
// nodes instead of binary-search. Since we expect number of children to be
|
||||
// really small (usually 1 or 2), it does not really matter. If you find a
|
||||
// real-world container with 30+ whiteout paths on a single path, please ping
|
||||
// the author/maintainer of this code.
|
||||
func (t *Tree) HasPrefix(path string) bool {
|
||||
return t.hasprefix(strings.Split(filepath.Clean(path), "/"))
|
||||
}
|
||||
|
||||
// Merge merges adds t2 to t. It is not optimized for speed, since it's walking
|
||||
// full branch for every other branch.
|
||||
func (t *Tree) Merge(t2 *Tree) {
|
||||
t.merge(t2, []string{})
|
||||
}
|
||||
|
||||
// String stringifies a tree
|
||||
func (t *Tree) String() string {
|
||||
if len(t.children) == 0 {
|
||||
return "<empty>"
|
||||
}
|
||||
|
||||
res := &stringer{[]string{}}
|
||||
res.stringify(t, []string{})
|
||||
sort.Strings(res.res)
|
||||
return strings.Join(res.res, ":")
|
||||
}
|
||||
|
||||
func (t *Tree) add(nodes []string) {
|
||||
if len(nodes) == 0 {
|
||||
t.end = true
|
||||
return
|
||||
}
|
||||
for i := range t.children {
|
||||
if t.children[i].name == nodes[0] {
|
||||
t.children[i].add(nodes[1:])
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
newNode := &Tree{name: nodes[0]}
|
||||
t.children = append(t.children, newNode)
|
||||
newNode.add(nodes[1:])
|
||||
}
|
||||
|
||||
func (t *Tree) hasprefix(nodes []string) bool {
|
||||
if len(nodes) == 0 {
|
||||
return t.end
|
||||
}
|
||||
if t.end {
|
||||
return true
|
||||
}
|
||||
|
||||
for i := range t.children {
|
||||
if t.children[i].name == nodes[0] {
|
||||
return t.children[i].hasprefix(nodes[1:])
|
||||
}
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
|
||||
type stringer struct {
|
||||
res []string
|
||||
}
|
||||
|
||||
func (s *stringer) stringify(t *Tree, acc []string) {
|
||||
if t.name == "" {
|
||||
return
|
||||
}
|
||||
acc = append(acc, t.name)
|
||||
if t.end {
|
||||
s.res = append(s.res, strings.Join(acc, "/"))
|
||||
}
|
||||
|
||||
for _, child := range t.children {
|
||||
s.stringify(child, acc)
|
||||
}
|
||||
}
|
||||
|
||||
func (t *Tree) merge(t2 *Tree, acc []string) {
|
||||
if t2.end {
|
||||
t.add(append(acc[1:], t2.name))
|
||||
}
|
||||
acc = append(acc, t2.name)
|
||||
for _, child := range t2.children {
|
||||
t.merge(child, acc)
|
||||
}
|
||||
}
|
92
internal/tree/tree_test.go
Normal file
92
internal/tree/tree_test.go
Normal file
@ -0,0 +1,92 @@
|
||||
package tree
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
)
|
||||
|
||||
func TestTree(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
paths []string
|
||||
matchTrue []string
|
||||
matchFalse []string
|
||||
}{
|
||||
{
|
||||
name: "empty sequence matches nothing",
|
||||
paths: []string{},
|
||||
matchFalse: []string{"a", "b"},
|
||||
},
|
||||
{
|
||||
name: "a few sequences",
|
||||
paths: []string{"a", "b", "c/b/a"},
|
||||
matchTrue: []string{"a", "a/b/c", "c/b/a", "c/b/a/d"},
|
||||
matchFalse: []string{"c/d", "c", "c/b"},
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
tree := New(tt.paths)
|
||||
|
||||
for _, path := range tt.matchTrue {
|
||||
t.Run(path, func(t *testing.T) {
|
||||
assert.True(t, tree.HasPrefix(path),
|
||||
"expected %s to be a prefix of %s", path, tree)
|
||||
})
|
||||
}
|
||||
|
||||
for _, path := range tt.matchFalse {
|
||||
t.Run(path, func(t *testing.T) {
|
||||
assert.False(t, tree.HasPrefix(path),
|
||||
"expected %s to not be a prefix of %s", path, tree)
|
||||
})
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestTreeMerge(t *testing.T) {
|
||||
tree1 := New([]string{"bin/ar", "var/cache/apt"})
|
||||
tree2 := New([]string{"bin/ar", "bin/busybox", "usr/share/doc"})
|
||||
tree1.Merge(tree2)
|
||||
assert.Equal(t, "./bin/ar:./bin/busybox:./usr/share/doc:./var/cache/apt", tree1.String())
|
||||
assert.Equal(t, "./bin/ar:./bin/busybox:./usr/share/doc", tree2.String())
|
||||
}
|
||||
|
||||
func TestString(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
paths []string
|
||||
wantStr string
|
||||
}{
|
||||
{
|
||||
name: "empty",
|
||||
paths: []string{},
|
||||
wantStr: "<empty>",
|
||||
},
|
||||
{
|
||||
name: "simple path",
|
||||
paths: []string{"a/b/c"},
|
||||
wantStr: "./a/b/c",
|
||||
},
|
||||
{
|
||||
name: "duplicate paths",
|
||||
paths: []string{"a/a", "a//a"},
|
||||
wantStr: "./a/a",
|
||||
},
|
||||
{
|
||||
name: "a few sequences",
|
||||
paths: []string{"bin/ar", "bin/busybox", "var/cache/apt/archives"},
|
||||
wantStr: "./bin/ar:./bin/busybox:./var/cache/apt/archives",
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
tree := New(tt.paths)
|
||||
assert.Equal(t, tt.wantStr, tree.String())
|
||||
})
|
||||
}
|
||||
}
|
@ -15,6 +15,8 @@ import (
|
||||
const (
|
||||
_manifestJSON = "manifest.json"
|
||||
_layerSuffix = "/layer.tar"
|
||||
_whReaddir = ".wh..wh..opq"
|
||||
_whPrefix = ".wh."
|
||||
)
|
||||
|
||||
var (
|
||||
@ -39,23 +41,22 @@ func RootFS(in io.ReadSeeker, out io.Writer) (err error) {
|
||||
tr := tar.NewReader(in)
|
||||
tw := tar.NewWriter(out)
|
||||
defer func() { err = multierr.Append(err, tw.Close()) }()
|
||||
|
||||
// layerOffsets maps a layer name (a9b123c0daa/layer.tar) to it's offset
|
||||
layerOffsets := map[string]int64{}
|
||||
|
||||
// manifest is the docker manifest in the image
|
||||
var manifest dockerManifestJSON
|
||||
|
||||
// phase 1: get layer offsets and manifest.json
|
||||
// get layer offsets and manifest.json
|
||||
for {
|
||||
hdr, err := tr.Next()
|
||||
if err == io.EOF {
|
||||
break
|
||||
}
|
||||
|
||||
if hdr.Typeflag != tar.TypeReg {
|
||||
continue
|
||||
}
|
||||
|
||||
switch {
|
||||
case filepath.Clean(hdr.Name) == _manifestJSON:
|
||||
dec := json.NewDecoder(tr)
|
||||
@ -84,13 +85,20 @@ func RootFS(in io.ReadSeeker, out io.Writer) (err error) {
|
||||
// file2layer maps a filename to layer number (index in "layers")
|
||||
file2layer := map[string]int{}
|
||||
|
||||
// iterate through all layers and save filenames for all kinds of files.
|
||||
// whreaddir maps a directory to a layer number until which
|
||||
// its contents should be ignored, exclusively.
|
||||
whreaddir := map[string]int{}
|
||||
|
||||
// wh maps a filename to a layer until which it should be ignored,
|
||||
// inclusively.
|
||||
wh := map[string]int{}
|
||||
|
||||
// build up `file2layer`, `whreaddir`, `wh`
|
||||
for i, offset := range layers {
|
||||
if _, err := in.Seek(offset, io.SeekStart); err != nil {
|
||||
return err
|
||||
}
|
||||
tr = tar.NewReader(in)
|
||||
|
||||
for {
|
||||
hdr, err := tr.Next()
|
||||
if err == io.EOF {
|
||||
@ -99,6 +107,23 @@ func RootFS(in io.ReadSeeker, out io.Writer) (err error) {
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if hdr.Typeflag == tar.TypeDir {
|
||||
continue
|
||||
}
|
||||
|
||||
if hdr.Typeflag == tar.TypeLink {
|
||||
basename := filepath.Base(hdr.Name)
|
||||
basedir := filepath.Dir(hdr.Name)
|
||||
if basename == _whReaddir {
|
||||
whreaddir[basedir] = i
|
||||
continue
|
||||
} else if strings.HasPrefix(basename, _whPrefix) {
|
||||
fname := strings.TrimPrefix(basename, _whPrefix)
|
||||
wh[filepath.Join(basedir, fname)] = i
|
||||
continue
|
||||
}
|
||||
}
|
||||
|
||||
file2layer[hdr.Name] = i
|
||||
}
|
||||
}
|
||||
@ -109,7 +134,6 @@ func RootFS(in io.ReadSeeker, out io.Writer) (err error) {
|
||||
return err
|
||||
}
|
||||
tr = tar.NewReader(in)
|
||||
|
||||
for {
|
||||
hdr, err := tr.Next()
|
||||
if err == io.EOF {
|
||||
@ -118,13 +142,9 @@ func RootFS(in io.ReadSeeker, out io.Writer) (err error) {
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Only directories can have multiple entries with the same name.
|
||||
// all other file types cannot.
|
||||
if hdr.Typeflag != tar.TypeDir && file2layer[hdr.Name] != i {
|
||||
if file2layer[hdr.Name] != i {
|
||||
continue
|
||||
}
|
||||
|
||||
if err := writeFile(tr, tw, hdr); err != nil {
|
||||
return err
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user