undocker/rootfs/rootfs.go

268 lines
6.1 KiB
Go
Raw Normal View History

2021-05-24 00:11:57 +03:00
package rootfs
2021-05-24 00:11:57 +03:00
import (
2021-05-24 00:11:57 +03:00
"archive/tar"
2021-05-24 00:11:58 +03:00
"bytes"
"compress/gzip"
2021-05-24 00:11:57 +03:00
"encoding/json"
2021-05-24 00:11:57 +03:00
"errors"
2021-05-24 00:11:58 +03:00
"fmt"
2021-05-24 00:11:57 +03:00
"io"
2021-05-24 00:11:58 +03:00
"io/ioutil"
2021-05-24 00:11:57 +03:00
"path/filepath"
2021-05-24 00:11:57 +03:00
"strings"
2021-05-24 00:11:57 +03:00
2021-05-24 00:11:58 +03:00
"github.com/motiejus/code/undocker/internal/bytecounter"
2021-05-24 00:11:57 +03:00
"go.uber.org/multierr"
2021-05-24 00:11:57 +03:00
)
2021-05-24 00:11:57 +03:00
const (
_manifestJSON = "manifest.json"
2021-05-24 00:11:58 +03:00
_layerSuffix = "/layer.tar"
2021-05-24 00:11:58 +03:00
_whReaddir = ".wh..wh..opq"
_whPrefix = ".wh."
2021-05-24 00:11:57 +03:00
)
2021-05-24 00:11:57 +03:00
var (
2021-05-24 00:11:58 +03:00
errBadManifest = errors.New("bad or missing manifest.json")
2021-05-24 00:11:57 +03:00
)
2021-05-24 00:11:58 +03:00
type (
// RootFS accepts a docker layer tarball and flattens it.
RootFS struct {
rd io.ReadSeeker
}
2021-05-24 00:11:57 +03:00
2021-05-24 00:11:58 +03:00
dockerManifestJSON []struct {
Layers []string `json:"Layers"`
}
nameOffset struct {
name string
offset int64
}
)
2021-05-24 00:11:58 +03:00
// New creates a new RootFS'er.
func New(rd io.ReadSeeker) *RootFS {
return &RootFS{rd: rd}
}
// WriteTo writes a docker image to an open tarball.
2021-05-24 00:11:58 +03:00
func (r *RootFS) WriteTo(w io.Writer) (n int64, err error) {
2021-05-24 00:11:58 +03:00
tr := tar.NewReader(r.rd)
2021-05-24 00:11:58 +03:00
var closer func() error
2021-05-24 00:11:58 +03:00
2021-05-24 00:11:57 +03:00
// layerOffsets maps a layer name (a9b123c0daa/layer.tar) to it's offset
2021-05-24 00:11:57 +03:00
layerOffsets := map[string]int64{}
2021-05-24 00:11:57 +03:00
// manifest is the docker manifest in the image
2021-05-24 00:11:57 +03:00
var manifest dockerManifestJSON
2021-05-24 00:11:58 +03:00
// get layer offsets and manifest.json
2021-05-24 00:11:57 +03:00
for {
2021-05-24 00:11:57 +03:00
hdr, err := tr.Next()
2021-05-24 00:11:57 +03:00
if err == io.EOF {
break
}
2021-05-24 00:11:57 +03:00
if hdr.Typeflag != tar.TypeReg {
2021-05-24 00:11:57 +03:00
continue
}
switch {
2021-05-24 00:11:57 +03:00
case filepath.Clean(hdr.Name) == _manifestJSON:
2021-05-24 00:11:57 +03:00
dec := json.NewDecoder(tr)
if err := dec.Decode(&manifest); err != nil {
2021-05-24 00:11:58 +03:00
return n, fmt.Errorf("decode %s: %w", _manifestJSON, err)
2021-05-24 00:11:57 +03:00
}
2021-05-24 00:11:58 +03:00
case strings.HasSuffix(hdr.Name, _layerSuffix):
2021-05-24 00:11:58 +03:00
here, err := r.rd.Seek(0, io.SeekCurrent)
2021-05-24 00:11:57 +03:00
if err != nil {
2021-05-24 00:11:58 +03:00
return n, err
2021-05-24 00:11:57 +03:00
}
2021-05-24 00:11:57 +03:00
layerOffsets[hdr.Name] = here
2021-05-24 00:11:57 +03:00
}
}
2021-05-24 00:11:58 +03:00
if len(manifest) == 0 || len(layerOffsets) != len(manifest[0].Layers) {
2021-05-24 00:11:58 +03:00
return n, errBadManifest
2021-05-24 00:11:58 +03:00
}
2021-05-24 00:11:58 +03:00
// enumerate layers the way they would be laid down in the image
2021-05-24 00:11:58 +03:00
layers := make([]nameOffset, len(layerOffsets))
2021-05-24 00:11:57 +03:00
for i, name := range manifest[0].Layers {
2021-05-24 00:11:58 +03:00
layers[i] = nameOffset{
name: name,
offset: layerOffsets[name],
}
2021-05-24 00:11:57 +03:00
}
2021-05-24 00:11:57 +03:00
// file2layer maps a filename to layer number (index in "layers")
file2layer := map[string]int{}
2021-05-24 00:11:58 +03:00
// whreaddir maps `wh..wh..opq` file to a layer; see doc.go
2021-05-24 00:11:58 +03:00
whreaddir := map[string]int{}
// wh maps a filename to a layer until which it should be ignored,
2021-05-24 00:11:58 +03:00
// inclusively; see doc.go
2021-05-24 00:11:58 +03:00
wh := map[string]int{}
2021-05-24 00:11:58 +03:00
// iterate over all files, construct `file2layer`, `whreaddir`, `wh`
2021-05-24 00:11:58 +03:00
for i, no := range layers {
if _, err := r.rd.Seek(no.offset, io.SeekStart); err != nil {
2021-05-24 00:11:58 +03:00
return n, err
2021-05-24 00:11:57 +03:00
}
2021-05-24 00:11:58 +03:00
tr, closer = openTargz(r.rd)
2021-05-24 00:11:57 +03:00
for {
hdr, err := tr.Next()
if err == io.EOF {
break
}
2021-05-24 00:11:57 +03:00
if err != nil {
2021-05-24 00:11:58 +03:00
return n, fmt.Errorf("decode %s: %w", no.name, err)
2021-05-24 00:11:57 +03:00
}
2021-05-24 00:11:58 +03:00
if hdr.Typeflag == tar.TypeDir {
continue
}
2021-05-24 00:11:58 +03:00
// according to aufs documentation, whiteout files should be
// hardlinks. I saw at least one docker container using regular
// files for whiteouts.
2021-05-24 00:11:58 +03:00
if hdr.Typeflag == tar.TypeLink || hdr.Typeflag == tar.TypeReg {
2021-05-24 00:11:58 +03:00
basename := filepath.Base(hdr.Name)
basedir := filepath.Dir(hdr.Name)
if basename == _whReaddir {
whreaddir[basedir] = i
continue
} else if strings.HasPrefix(basename, _whPrefix) {
fname := strings.TrimPrefix(basename, _whPrefix)
wh[filepath.Join(basedir, fname)] = i
continue
}
}
2021-05-24 00:11:57 +03:00
file2layer[hdr.Name] = i
}
2021-05-24 00:11:58 +03:00
if err := closer(); err != nil {
return n, err
}
2021-05-24 00:11:57 +03:00
}
2021-05-24 00:11:58 +03:00
// construct directories to whiteout, for each layer.
2021-05-24 00:11:58 +03:00
whIgnore := whiteoutDirs(whreaddir, len(layers))
2021-05-24 00:11:58 +03:00
wr := bytecounter.New(w)
tw := tar.NewWriter(wr)
defer func() {
err = multierr.Append(err, tw.Close())
n = wr.N
}()
2021-05-24 00:11:58 +03:00
// iterate through all layers, all files, and write files.
2021-05-24 00:11:58 +03:00
for i, no := range layers {
if _, err := r.rd.Seek(no.offset, io.SeekStart); err != nil {
2021-05-24 00:11:58 +03:00
return n, err
2021-05-24 00:11:57 +03:00
}
2021-05-24 00:11:58 +03:00
tr, closer = openTargz(r.rd)
2021-05-24 00:11:57 +03:00
for {
hdr, err := tr.Next()
if err == io.EOF {
break
}
2021-05-24 00:11:57 +03:00
if err != nil {
2021-05-24 00:11:58 +03:00
return n, fmt.Errorf("decode %s: %w", no.name, err)
2021-05-24 00:11:57 +03:00
}
2021-05-24 00:11:58 +03:00
if layer, ok := wh[hdr.Name]; ok && layer >= i {
continue
}
if whIgnore[i].HasPrefix(hdr.Name) {
continue
}
if hdr.Typeflag != tar.TypeDir && file2layer[hdr.Name] != i {
2021-05-24 00:11:57 +03:00
continue
}
2021-05-24 00:11:58 +03:00
if err := writeFile(tr, tw, hdr); err != nil {
2021-05-24 00:11:58 +03:00
return n, err
2021-05-24 00:11:57 +03:00
}
2021-05-24 00:11:58 +03:00
}
2021-05-24 00:11:58 +03:00
if err := closer(); err != nil {
return n, err
}
2021-05-24 00:11:58 +03:00
}
2021-05-24 00:11:58 +03:00
return n, nil
2021-05-24 00:11:58 +03:00
}
2021-05-24 00:11:57 +03:00
2021-05-24 00:11:58 +03:00
func writeFile(tr *tar.Reader, tw *tar.Writer, hdr *tar.Header) error {
hdrOut := &tar.Header{
Typeflag: hdr.Typeflag,
Name: hdr.Name,
Linkname: hdr.Linkname,
Size: hdr.Size,
Mode: int64(hdr.Mode & 0777),
Uid: hdr.Uid,
Gid: hdr.Gid,
Uname: hdr.Uname,
Gname: hdr.Gname,
ModTime: hdr.ModTime,
Devmajor: hdr.Devmajor,
Devminor: hdr.Devminor,
Format: tar.FormatGNU,
}
if err := tw.WriteHeader(hdrOut); err != nil {
return err
}
if hdr.Typeflag == tar.TypeReg {
if _, err := io.Copy(tw, tr); err != nil {
return err
2021-05-24 00:11:57 +03:00
}
}
2021-05-24 00:11:57 +03:00
2021-05-24 00:11:57 +03:00
return nil
2021-05-24 00:11:57 +03:00
}
2021-05-24 00:11:58 +03:00
2021-05-24 00:11:58 +03:00
func whiteoutDirs(whreaddir map[string]int, nlayers int) []*tree {
ret := make([]*tree, nlayers)
2021-05-24 00:11:58 +03:00
for i := range ret {
2021-05-24 00:11:58 +03:00
ret[i] = newTree()
2021-05-24 00:11:58 +03:00
}
for fname, layer := range whreaddir {
if layer == 0 {
continue
}
ret[layer-1].Add(fname)
}
for i := nlayers - 1; i > 0; i-- {
ret[i-1].Merge(ret[i])
}
return ret
}
2021-05-24 00:11:58 +03:00
2021-05-24 00:11:58 +03:00
// openTargz creates a tar reader from a targzip or tar.
//
// We may be looking at magic values for tar and/or gzip,
// which would mean "cleaner" code (e.g. no proxyWriter),
// but that would mean re-implementing gzip.readHeader(),
// which is ... already in stdlib.
2021-05-24 00:11:58 +03:00
func openTargz(r io.Reader) (*tar.Reader, func() error) {
2021-05-24 00:11:58 +03:00
hdrbuf := &bytes.Buffer{}
hdrw := &proxyWriter{w: hdrbuf}
2021-05-24 00:11:58 +03:00
gz, err := gzip.NewReader(io.TeeReader(r, hdrw))
2021-05-24 00:11:58 +03:00
if err == nil {
2021-05-24 00:11:58 +03:00
hdrw.w = ioutil.Discard
hdrbuf = nil
2021-05-24 00:11:58 +03:00
return tar.NewReader(gz), gz.Close
}
2021-05-24 00:11:58 +03:00
return tar.NewReader(io.MultiReader(hdrbuf, r)), func() error { return nil }
2021-05-24 00:11:58 +03:00
}
2021-05-24 00:11:58 +03:00
// proxyWriter is a pass-through writer. Its underlying writer can be changed
2021-05-24 00:11:58 +03:00
// on-the-fly. Useful when there is a stream that needs to be discarded (change
// the underlying writer to, say, ioutil.Discard).
2021-05-24 00:11:58 +03:00
type proxyWriter struct {
w io.Writer
2021-05-24 00:11:58 +03:00
}
2021-05-24 00:11:58 +03:00
// Write writes a slice to the underlying w.
2021-05-24 00:11:58 +03:00
func (pw *proxyWriter) Write(p []byte) (int, error) {
return pw.w.Write(p)
2021-05-24 00:11:58 +03:00
}