2021-05-24 00:11:57 +03:00
|
|
|
package rootfs
|
2021-05-24 00:11:57 +03:00
|
|
|
|
|
|
|
import (
|
2021-05-24 00:11:57 +03:00
|
|
|
"archive/tar"
|
2021-05-24 00:11:58 +03:00
|
|
|
"bytes"
|
|
|
|
"compress/gzip"
|
2021-05-24 00:11:57 +03:00
|
|
|
"encoding/json"
|
2021-08-13 15:27:44 +03:00
|
|
|
"errors"
|
2021-05-24 00:11:58 +03:00
|
|
|
"fmt"
|
2021-05-24 00:11:57 +03:00
|
|
|
"io"
|
2021-05-24 00:11:57 +03:00
|
|
|
"path/filepath"
|
2021-05-24 00:11:57 +03:00
|
|
|
"strings"
|
2021-05-24 00:11:57 +03:00
|
|
|
)
|
|
|
|
|
2021-05-24 00:11:57 +03:00
|
|
|
const (
|
|
|
|
_manifestJSON = "manifest.json"
|
2021-05-24 00:11:58 +03:00
|
|
|
_tarSuffix = ".tar"
|
2021-05-24 00:11:58 +03:00
|
|
|
_whReaddir = ".wh..wh..opq"
|
|
|
|
_whPrefix = ".wh."
|
2021-05-24 00:11:57 +03:00
|
|
|
)
|
|
|
|
|
2021-08-13 15:27:44 +03:00
|
|
|
var _gzipMagic = []byte{0x1f, 0x8b}
|
|
|
|
|
2021-05-24 00:11:58 +03:00
|
|
|
type (
|
|
|
|
dockerManifestJSON []struct {
|
|
|
|
Layers []string `json:"Layers"`
|
|
|
|
}
|
|
|
|
|
|
|
|
nameOffset struct {
|
|
|
|
name string
|
|
|
|
offset int64
|
|
|
|
}
|
|
|
|
)
|
2021-05-24 00:11:58 +03:00
|
|
|
|
2021-05-24 00:11:58 +03:00
|
|
|
// Flatten flattens a docker image to a tarball. The underlying io.Writer
|
|
|
|
// should be an open file handle, which the caller is responsible for closing
|
|
|
|
// themselves
|
2021-08-29 18:39:46 +03:00
|
|
|
func Flatten(rd io.ReadSeeker, w io.Writer) (_err error) {
|
2021-05-24 00:11:58 +03:00
|
|
|
tr := tar.NewReader(rd)
|
2021-05-24 00:11:58 +03:00
|
|
|
var closer func() error
|
2021-08-13 15:27:44 +03:00
|
|
|
var err error
|
2021-05-24 00:11:58 +03:00
|
|
|
|
2021-05-24 00:11:57 +03:00
|
|
|
// layerOffsets maps a layer name (a9b123c0daa/layer.tar) to it's offset
|
2021-05-24 00:11:57 +03:00
|
|
|
layerOffsets := map[string]int64{}
|
2021-05-24 00:11:57 +03:00
|
|
|
|
|
|
|
// manifest is the docker manifest in the image
|
2021-05-24 00:11:57 +03:00
|
|
|
var manifest dockerManifestJSON
|
|
|
|
|
2021-05-24 00:11:58 +03:00
|
|
|
// get layer offsets and manifest.json
|
2021-05-24 00:11:57 +03:00
|
|
|
for {
|
2021-05-24 00:11:57 +03:00
|
|
|
hdr, err := tr.Next()
|
2021-05-24 00:11:57 +03:00
|
|
|
if err == io.EOF {
|
|
|
|
break
|
|
|
|
}
|
2021-08-19 16:32:22 +03:00
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
2021-05-24 00:11:57 +03:00
|
|
|
if hdr.Typeflag != tar.TypeReg {
|
2021-05-24 00:11:57 +03:00
|
|
|
continue
|
|
|
|
}
|
|
|
|
switch {
|
2021-05-24 00:11:57 +03:00
|
|
|
case filepath.Clean(hdr.Name) == _manifestJSON:
|
2021-05-24 00:11:57 +03:00
|
|
|
dec := json.NewDecoder(tr)
|
|
|
|
if err := dec.Decode(&manifest); err != nil {
|
2021-05-24 00:11:58 +03:00
|
|
|
return fmt.Errorf("decode %s: %w", _manifestJSON, err)
|
2021-05-24 00:11:57 +03:00
|
|
|
}
|
2021-05-24 00:11:58 +03:00
|
|
|
case strings.HasSuffix(hdr.Name, _tarSuffix):
|
2021-05-24 00:11:58 +03:00
|
|
|
here, err := rd.Seek(0, io.SeekCurrent)
|
2021-05-24 00:11:57 +03:00
|
|
|
if err != nil {
|
2021-05-24 00:11:58 +03:00
|
|
|
return err
|
2021-05-24 00:11:57 +03:00
|
|
|
}
|
2022-06-09 14:22:19 +03:00
|
|
|
layerOffsets[strings.TrimPrefix(hdr.Name, "./")] = here
|
2021-05-24 00:11:57 +03:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-05-24 00:11:58 +03:00
|
|
|
if err := validateManifest(layerOffsets, manifest); err != nil {
|
|
|
|
return err
|
2021-05-24 00:11:58 +03:00
|
|
|
}
|
|
|
|
|
2021-05-24 00:11:58 +03:00
|
|
|
// enumerate layers the way they would be laid down in the image
|
2021-05-24 00:11:58 +03:00
|
|
|
layers := make([]nameOffset, len(layerOffsets))
|
2021-05-24 00:11:57 +03:00
|
|
|
for i, name := range manifest[0].Layers {
|
2021-05-24 00:11:58 +03:00
|
|
|
layers[i] = nameOffset{
|
|
|
|
name: name,
|
2022-06-09 14:22:19 +03:00
|
|
|
offset: layerOffsets[strings.TrimPrefix(name, "./")],
|
2021-05-24 00:11:58 +03:00
|
|
|
}
|
2021-05-24 00:11:57 +03:00
|
|
|
}
|
|
|
|
|
2021-05-24 00:11:57 +03:00
|
|
|
// file2layer maps a filename to layer number (index in "layers")
|
|
|
|
file2layer := map[string]int{}
|
|
|
|
|
2021-05-24 00:11:58 +03:00
|
|
|
// whreaddir maps `wh..wh..opq` file to a layer; see doc.go
|
2021-05-24 00:11:58 +03:00
|
|
|
whreaddir := map[string]int{}
|
|
|
|
|
|
|
|
// wh maps a filename to a layer until which it should be ignored,
|
2021-05-24 00:11:58 +03:00
|
|
|
// inclusively; see doc.go
|
2021-05-24 00:11:58 +03:00
|
|
|
wh := map[string]int{}
|
|
|
|
|
2021-05-24 00:11:58 +03:00
|
|
|
// iterate over all files, construct `file2layer`, `whreaddir`, `wh`
|
2021-05-24 00:11:58 +03:00
|
|
|
for i, no := range layers {
|
2021-05-24 00:11:58 +03:00
|
|
|
if _, err := rd.Seek(no.offset, io.SeekStart); err != nil {
|
2021-05-24 00:11:58 +03:00
|
|
|
return err
|
2021-05-24 00:11:57 +03:00
|
|
|
}
|
2021-08-13 15:27:44 +03:00
|
|
|
tr, closer, err = openTargz(rd)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
2021-05-24 00:11:57 +03:00
|
|
|
for {
|
|
|
|
hdr, err := tr.Next()
|
|
|
|
if err == io.EOF {
|
|
|
|
break
|
|
|
|
}
|
2021-05-24 00:11:57 +03:00
|
|
|
if err != nil {
|
2021-05-24 00:11:58 +03:00
|
|
|
return fmt.Errorf("decode %s: %w", no.name, err)
|
2021-05-24 00:11:57 +03:00
|
|
|
}
|
2021-05-24 00:11:58 +03:00
|
|
|
if hdr.Typeflag == tar.TypeDir {
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
|
2021-05-24 00:11:58 +03:00
|
|
|
// according to aufs documentation, whiteout files should be
|
|
|
|
// hardlinks. I saw at least one docker container using regular
|
|
|
|
// files for whiteouts.
|
2021-05-24 00:11:58 +03:00
|
|
|
if hdr.Typeflag == tar.TypeLink || hdr.Typeflag == tar.TypeReg {
|
2021-05-24 00:11:58 +03:00
|
|
|
basename := filepath.Base(hdr.Name)
|
|
|
|
basedir := filepath.Dir(hdr.Name)
|
|
|
|
if basename == _whReaddir {
|
|
|
|
whreaddir[basedir] = i
|
|
|
|
continue
|
|
|
|
} else if strings.HasPrefix(basename, _whPrefix) {
|
|
|
|
fname := strings.TrimPrefix(basename, _whPrefix)
|
|
|
|
wh[filepath.Join(basedir, fname)] = i
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
}
|
2021-05-24 00:11:57 +03:00
|
|
|
file2layer[hdr.Name] = i
|
|
|
|
}
|
2021-05-24 00:11:58 +03:00
|
|
|
if err := closer(); err != nil {
|
2021-05-24 00:11:58 +03:00
|
|
|
return err
|
2021-05-24 00:11:58 +03:00
|
|
|
}
|
2021-05-24 00:11:57 +03:00
|
|
|
}
|
|
|
|
|
2021-05-24 00:11:58 +03:00
|
|
|
// construct directories to whiteout, for each layer.
|
2021-05-24 00:11:58 +03:00
|
|
|
whIgnore := whiteoutDirs(whreaddir, len(layers))
|
|
|
|
|
2021-05-24 00:11:58 +03:00
|
|
|
tw := tar.NewWriter(w)
|
2021-05-24 00:11:58 +03:00
|
|
|
defer func() {
|
2023-05-19 15:01:44 +03:00
|
|
|
_err = errors.Join(_err, tw.Close())
|
2021-05-24 00:11:58 +03:00
|
|
|
}()
|
2021-05-24 00:11:58 +03:00
|
|
|
// iterate through all layers, all files, and write files.
|
2021-05-24 00:11:58 +03:00
|
|
|
for i, no := range layers {
|
2021-05-24 00:11:58 +03:00
|
|
|
if _, err := rd.Seek(no.offset, io.SeekStart); err != nil {
|
2021-05-24 00:11:58 +03:00
|
|
|
return err
|
2021-05-24 00:11:57 +03:00
|
|
|
}
|
2021-08-13 15:27:44 +03:00
|
|
|
tr, closer, err = openTargz(rd)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
2021-05-24 00:11:57 +03:00
|
|
|
for {
|
|
|
|
hdr, err := tr.Next()
|
|
|
|
if err == io.EOF {
|
|
|
|
break
|
|
|
|
}
|
2021-05-24 00:11:57 +03:00
|
|
|
if err != nil {
|
2021-05-24 00:11:58 +03:00
|
|
|
return fmt.Errorf("decode %s: %w", no.name, err)
|
2021-05-24 00:11:57 +03:00
|
|
|
}
|
2021-05-24 00:11:58 +03:00
|
|
|
if layer, ok := wh[hdr.Name]; ok && layer >= i {
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
if whIgnore[i].HasPrefix(hdr.Name) {
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
if hdr.Typeflag != tar.TypeDir && file2layer[hdr.Name] != i {
|
2021-05-24 00:11:57 +03:00
|
|
|
continue
|
|
|
|
}
|
2021-08-29 18:39:46 +03:00
|
|
|
if err := writeFile(tr, tw, hdr); err != nil {
|
2021-05-24 00:11:58 +03:00
|
|
|
return err
|
2021-05-24 00:11:57 +03:00
|
|
|
}
|
2021-05-24 00:11:58 +03:00
|
|
|
}
|
2021-05-24 00:11:58 +03:00
|
|
|
if err := closer(); err != nil {
|
2021-05-24 00:11:58 +03:00
|
|
|
return err
|
2021-05-24 00:11:58 +03:00
|
|
|
}
|
2021-05-24 00:11:58 +03:00
|
|
|
}
|
2021-05-24 00:11:58 +03:00
|
|
|
return nil
|
2021-05-24 00:11:58 +03:00
|
|
|
}
|
2021-05-24 00:11:57 +03:00
|
|
|
|
2021-08-29 18:39:46 +03:00
|
|
|
func writeFile(tr *tar.Reader, tw *tar.Writer, hdr *tar.Header) error {
|
2021-05-24 00:11:58 +03:00
|
|
|
hdrOut := &tar.Header{
|
|
|
|
Typeflag: hdr.Typeflag,
|
2021-08-29 18:39:46 +03:00
|
|
|
Name: hdr.Name,
|
2021-05-24 00:11:58 +03:00
|
|
|
Linkname: hdr.Linkname,
|
|
|
|
Size: hdr.Size,
|
|
|
|
Mode: int64(hdr.Mode & 0777),
|
|
|
|
Uid: hdr.Uid,
|
|
|
|
Gid: hdr.Gid,
|
|
|
|
Uname: hdr.Uname,
|
|
|
|
Gname: hdr.Gname,
|
|
|
|
ModTime: hdr.ModTime,
|
|
|
|
Devmajor: hdr.Devmajor,
|
|
|
|
Devminor: hdr.Devminor,
|
|
|
|
Format: tar.FormatGNU,
|
|
|
|
}
|
|
|
|
|
|
|
|
if err := tw.WriteHeader(hdrOut); err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
if hdr.Typeflag == tar.TypeReg {
|
|
|
|
if _, err := io.Copy(tw, tr); err != nil {
|
|
|
|
return err
|
2021-05-24 00:11:57 +03:00
|
|
|
}
|
|
|
|
}
|
2021-05-24 00:11:57 +03:00
|
|
|
|
2021-05-24 00:11:57 +03:00
|
|
|
return nil
|
2021-05-24 00:11:57 +03:00
|
|
|
}
|
2021-05-24 00:11:58 +03:00
|
|
|
|
2021-05-24 00:11:58 +03:00
|
|
|
func whiteoutDirs(whreaddir map[string]int, nlayers int) []*tree {
|
|
|
|
ret := make([]*tree, nlayers)
|
2021-05-24 00:11:58 +03:00
|
|
|
for i := range ret {
|
2021-05-24 00:11:58 +03:00
|
|
|
ret[i] = newTree()
|
2021-05-24 00:11:58 +03:00
|
|
|
}
|
|
|
|
for fname, layer := range whreaddir {
|
|
|
|
if layer == 0 {
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
ret[layer-1].Add(fname)
|
|
|
|
}
|
|
|
|
for i := nlayers - 1; i > 0; i-- {
|
|
|
|
ret[i-1].Merge(ret[i])
|
|
|
|
}
|
|
|
|
return ret
|
|
|
|
}
|
2021-05-24 00:11:58 +03:00
|
|
|
|
2021-05-24 00:11:58 +03:00
|
|
|
// validateManifest
|
2021-05-24 00:11:58 +03:00
|
|
|
func validateManifest(
|
|
|
|
layerOffsets map[string]int64,
|
|
|
|
manifest dockerManifestJSON,
|
|
|
|
) error {
|
2021-05-24 00:11:58 +03:00
|
|
|
if len(manifest) == 0 {
|
|
|
|
return fmt.Errorf("empty or missing manifest")
|
|
|
|
}
|
|
|
|
|
|
|
|
for _, layer := range manifest[0].Layers {
|
|
|
|
if _, ok := layerOffsets[layer]; !ok {
|
|
|
|
return fmt.Errorf("%s defined in manifest, missing in tarball", layer)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2021-05-24 00:11:58 +03:00
|
|
|
// openTargz creates a tar reader from a targzip or tar.
|
2021-08-13 15:27:44 +03:00
|
|
|
func openTargz(rs io.ReadSeeker) (*tar.Reader, func() error, error) {
|
|
|
|
// find out whether the given file is targz or tar
|
|
|
|
head := make([]byte, 2)
|
|
|
|
_, err := io.ReadFull(rs, head)
|
|
|
|
switch {
|
|
|
|
case err == io.ErrUnexpectedEOF:
|
|
|
|
return nil, nil, errors.New("tarball or gzipfile too small")
|
|
|
|
case err != nil:
|
|
|
|
return nil, nil, fmt.Errorf("read error: %w", err)
|
2021-05-24 00:11:58 +03:00
|
|
|
}
|
|
|
|
|
2021-08-13 15:27:44 +03:00
|
|
|
if _, err := rs.Seek(-2, io.SeekCurrent); err != nil {
|
|
|
|
return nil, nil, fmt.Errorf("seek: %w", err)
|
|
|
|
}
|
|
|
|
|
|
|
|
r := rs.(io.Reader)
|
|
|
|
closer := func() error { return nil }
|
|
|
|
if bytes.Equal(head, _gzipMagic) {
|
|
|
|
gzipr, err := gzip.NewReader(r)
|
|
|
|
if err != nil {
|
|
|
|
return nil, nil, fmt.Errorf("gzip.NewReader: %w", err)
|
|
|
|
}
|
|
|
|
closer = gzipr.Close
|
|
|
|
r = gzipr
|
|
|
|
}
|
2021-05-24 00:11:58 +03:00
|
|
|
|
2021-08-13 15:27:44 +03:00
|
|
|
return tar.NewReader(r), closer, nil
|
2021-05-24 00:11:58 +03:00
|
|
|
}
|