undocker

extract docker archives
Log | Files | Refs | README | LICENSE

rootfs.go (6253B) - Raw


      1 package rootfs
      2 
      3 import (
      4 	"archive/tar"
      5 	"bytes"
      6 	"compress/gzip"
      7 	"encoding/json"
      8 	"errors"
      9 	"fmt"
     10 	"io"
     11 	"path/filepath"
     12 	"strings"
     13 )
     14 
     15 const (
     16 	_manifestJSON = "manifest.json"
     17 	_tarSuffix    = ".tar"
     18 	_whReaddir    = ".wh..wh..opq"
     19 	_whPrefix     = ".wh."
     20 )
     21 
     22 var _gzipMagic = []byte{0x1f, 0x8b}
     23 
     24 type (
     25 	dockerManifestJSON []struct {
     26 		Layers []string `json:"Layers"`
     27 	}
     28 
     29 	nameOffset struct {
     30 		name   string
     31 		offset int64
     32 	}
     33 )
     34 
     35 // Flatten flattens a docker image to a tarball. The underlying io.Writer
     36 // should be an open file handle, which the caller is responsible for closing
     37 // themselves
     38 func Flatten(rd io.ReadSeeker, w io.Writer) (_err error) {
     39 	tr := tar.NewReader(rd)
     40 	var closer func() error
     41 	var err error
     42 
     43 	// fileOffsets maps a file name (a9b123c0daa/layer.tar) to it's offset
     44 	fileOffsets := map[string]int64{}
     45 
     46 	// manifest is the docker manifest in the image
     47 	var manifest dockerManifestJSON
     48 
     49 	// get layer offsets and manifest.json
     50 	for {
     51 		hdr, err := tr.Next()
     52 		if err == io.EOF {
     53 			break
     54 		}
     55 		if err != nil {
     56 			return err
     57 		}
     58 		if hdr.Typeflag != tar.TypeReg {
     59 			continue
     60 		}
     61 		here, err := rd.Seek(0, io.SeekCurrent)
     62 		if err != nil {
     63 			return err
     64 		}
     65 		fileOffsets[strings.TrimPrefix(hdr.Name, "./")] = here
     66 		if filepath.Clean(hdr.Name) == _manifestJSON {
     67 			dec := json.NewDecoder(tr)
     68 			if err := dec.Decode(&manifest); err != nil {
     69 				return fmt.Errorf("decode %s: %w", _manifestJSON, err)
     70 			}
     71 		}
     72 	}
     73 
     74 	if err := validateManifest(fileOffsets, manifest); err != nil {
     75 		return err
     76 	}
     77 
     78 	// enumerate layers the way they would be laid down in the image
     79 	layers := make([]nameOffset, len(manifest[0].Layers))
     80 	for i, name := range manifest[0].Layers {
     81 		layers[i] = nameOffset{
     82 			name:   name,
     83 			offset: fileOffsets[strings.TrimPrefix(name, "./")],
     84 		}
     85 	}
     86 
     87 	// file2layer maps a filename to layer number (index in "layers")
     88 	file2layer := map[string]int{}
     89 
     90 	// whreaddir maps `wh..wh..opq` file to a layer; see doc.go
     91 	whreaddir := map[string]int{}
     92 
     93 	// wh maps a filename to a layer until which it should be ignored,
     94 	// inclusively; see doc.go
     95 	wh := map[string]int{}
     96 
     97 	// iterate over all files, construct `file2layer`, `whreaddir`, `wh`
     98 	for i, no := range layers {
     99 		if _, err := rd.Seek(no.offset, io.SeekStart); err != nil {
    100 			return err
    101 		}
    102 		tr, closer, err = openTargz(rd)
    103 		if err != nil {
    104 			return err
    105 		}
    106 		for {
    107 			hdr, err := tr.Next()
    108 			if err == io.EOF {
    109 				break
    110 			}
    111 			if err != nil {
    112 				return fmt.Errorf("decode %s: %w", no.name, err)
    113 			}
    114 			if hdr.Typeflag == tar.TypeDir {
    115 				continue
    116 			}
    117 
    118 			// according to aufs documentation, whiteout files should be
    119 			// hardlinks. I saw at least one docker container using regular
    120 			// files for whiteouts.
    121 			if hdr.Typeflag == tar.TypeLink || hdr.Typeflag == tar.TypeReg {
    122 				basename := filepath.Base(hdr.Name)
    123 				basedir := filepath.Dir(hdr.Name)
    124 				if basename == _whReaddir {
    125 					whreaddir[basedir] = i
    126 					continue
    127 				} else if strings.HasPrefix(basename, _whPrefix) {
    128 					fname := strings.TrimPrefix(basename, _whPrefix)
    129 					wh[filepath.Join(basedir, fname)] = i
    130 					continue
    131 				}
    132 			}
    133 			file2layer[hdr.Name] = i
    134 		}
    135 		if err := closer(); err != nil {
    136 			return err
    137 		}
    138 	}
    139 
    140 	// construct directories to whiteout, for each layer.
    141 	whIgnore := whiteoutDirs(whreaddir, len(layers))
    142 
    143 	tw := tar.NewWriter(w)
    144 	defer func() {
    145 		_err = errors.Join(_err, tw.Close())
    146 	}()
    147 	// iterate through all layers, all files, and write files.
    148 	for i, no := range layers {
    149 		if _, err := rd.Seek(no.offset, io.SeekStart); err != nil {
    150 			return err
    151 		}
    152 		tr, closer, err = openTargz(rd)
    153 		if err != nil {
    154 			return err
    155 		}
    156 		for {
    157 			hdr, err := tr.Next()
    158 			if err == io.EOF {
    159 				break
    160 			}
    161 			if err != nil {
    162 				return fmt.Errorf("decode %s: %w", no.name, err)
    163 			}
    164 			if layer, ok := wh[hdr.Name]; ok && layer >= i {
    165 				continue
    166 			}
    167 			if whIgnore[i].HasPrefix(hdr.Name) {
    168 				continue
    169 			}
    170 			if hdr.Typeflag != tar.TypeDir && file2layer[hdr.Name] != i {
    171 				continue
    172 			}
    173 			if err := writeFile(tr, tw, hdr); err != nil {
    174 				return err
    175 			}
    176 		}
    177 		if err := closer(); err != nil {
    178 			return err
    179 		}
    180 	}
    181 	return nil
    182 }
    183 
    184 func writeFile(tr *tar.Reader, tw *tar.Writer, hdr *tar.Header) error {
    185 	hdrOut := &tar.Header{
    186 		Typeflag: hdr.Typeflag,
    187 		Name:     hdr.Name,
    188 		Linkname: hdr.Linkname,
    189 		Size:     hdr.Size,
    190 		Mode:     int64(hdr.Mode & 0777),
    191 		Uid:      hdr.Uid,
    192 		Gid:      hdr.Gid,
    193 		Uname:    hdr.Uname,
    194 		Gname:    hdr.Gname,
    195 		ModTime:  hdr.ModTime,
    196 		Devmajor: hdr.Devmajor,
    197 		Devminor: hdr.Devminor,
    198 		Format:   tar.FormatGNU,
    199 	}
    200 
    201 	if err := tw.WriteHeader(hdrOut); err != nil {
    202 		return err
    203 	}
    204 
    205 	if hdr.Typeflag == tar.TypeReg {
    206 		if _, err := io.Copy(tw, tr); err != nil {
    207 			return err
    208 		}
    209 	}
    210 
    211 	return nil
    212 }
    213 
    214 func whiteoutDirs(whreaddir map[string]int, nlayers int) []*tree {
    215 	ret := make([]*tree, nlayers)
    216 	for i := range ret {
    217 		ret[i] = newTree()
    218 	}
    219 	for fname, layer := range whreaddir {
    220 		if layer == 0 {
    221 			continue
    222 		}
    223 		ret[layer-1].Add(fname)
    224 	}
    225 	for i := nlayers - 1; i > 0; i-- {
    226 		ret[i-1].Merge(ret[i])
    227 	}
    228 	return ret
    229 }
    230 
    231 // validateManifest
    232 func validateManifest(
    233 	fileOffsets map[string]int64,
    234 	manifest dockerManifestJSON,
    235 ) error {
    236 	if len(manifest) == 0 {
    237 		return fmt.Errorf("empty or missing manifest")
    238 	}
    239 
    240 	for _, layer := range manifest[0].Layers {
    241 		if _, ok := fileOffsets[layer]; !ok {
    242 			return fmt.Errorf("%s defined in manifest, missing in tarball", layer)
    243 		}
    244 	}
    245 
    246 	return nil
    247 }
    248 
    249 // openTargz creates a tar reader from a targzip or tar.
    250 func openTargz(rs io.ReadSeeker) (*tar.Reader, func() error, error) {
    251 	// find out whether the given file is targz or tar
    252 	head := make([]byte, 2)
    253 	_, err := io.ReadFull(rs, head)
    254 	switch {
    255 	case err == io.ErrUnexpectedEOF:
    256 		return nil, nil, errors.New("tarball or gzipfile too small")
    257 	case err != nil:
    258 		return nil, nil, fmt.Errorf("read error: %w", err)
    259 	}
    260 
    261 	if _, err := rs.Seek(-2, io.SeekCurrent); err != nil {
    262 		return nil, nil, fmt.Errorf("seek: %w", err)
    263 	}
    264 
    265 	r := rs.(io.Reader)
    266 	closer := func() error { return nil }
    267 	if bytes.Equal(head, _gzipMagic) {
    268 		gzipr, err := gzip.NewReader(r)
    269 		if err != nil {
    270 			return nil, nil, fmt.Errorf("gzip.NewReader: %w", err)
    271 		}
    272 		closer = gzipr.Close
    273 		r = gzipr
    274 	}
    275 
    276 	return tar.NewReader(r), closer, nil
    277 }