Skip to content

Commit 9d3d370

Browse files
committed
archive/tar: support reporting and selecting the format
The Reader and Writer are now at feature parity, meaning that everything that can be parsed by the Reader, can also be composed by the Writer. This position enables us to support selection of the format in a backwards compatible way, since it ensures that everything that can be read can also be round-trip written. As such, we add the following new API: type Format int const FormatUnknown Format = 0 ... type Header struct { ...; Format Format } The new Header.Format field is populated by the Reader on the best guess on what the format is. Note that the Reader is very liberal in what it permits, so a hybrid TAR file using aspects of multiple formats can still be decoded, but will be reported as FormatUnknown. Even though Reader has full support for V7 and basic support for STAR, it will still report those formats as unknown (and the constants for those formats are not even exported). The reasons for this is because the Writer has no support for V7 or STAR. Leaving it as unknown allows the Writer to choose a format usually USTAR or GNU that can encode the equivalent Header. When writing, the Header.allowedFormats will take the Format field into consideration if it is a known format. Fixes #18710 Change-Id: I00980c475d067c6969d3414e1ff0224fdd89cd49 Reviewed-on: https://go-review.googlesource.com/58230 Run-TryBot: Joe Tsai <[email protected]> TryBot-Result: Gobot Gobot <[email protected]> Reviewed-by: Ian Lance Taylor <[email protected]>
1 parent 9a9a0fc commit 9d3d370

11 files changed

+277
-154
lines changed

src/archive/tar/common.go

+40-26
Original file line numberDiff line numberDiff line change
@@ -5,11 +5,6 @@
55
// Package tar implements access to tar archives.
66
// It aims to cover most of the variations, including those produced
77
// by GNU and BSD tars.
8-
//
9-
// References:
10-
// http://www.freebsd.org/cgi/man.cgi?query=tar&sektion=5
11-
// http://www.gnu.org/software/tar/manual/html_node/Standard.html
12-
// http://pubs.opengroup.org/onlinepubs/9699919799/utilities/pax.html
138
package tar
149

1510
import (
@@ -76,13 +71,26 @@ type Header struct {
7671
// SparseHoles represents a sequence of holes in a sparse file.
7772
//
7873
// A file is sparse if len(SparseHoles) > 0 or Typeflag is TypeGNUSparse.
74+
// If TypeGNUSparse is set, then the format is GNU, otherwise
75+
// the PAX format with GNU-specific record is used.
76+
//
7977
// A sparse file consists of fragments of data, intermixed with holes
8078
// (described by this field). A hole is semantically a block of NUL-bytes,
81-
// but does not actually exist within the TAR file.
79+
// but does not actually exist within the tar file.
8280
// The logical size of the file stored in the Size field, while
8381
// the holes must be sorted in ascending order,
8482
// not overlap with each other, and not extend past the specified Size.
8583
SparseHoles []SparseEntry
84+
85+
// Format specifies the format of the tar header.
86+
//
87+
// This is set by Reader.Next as a best-effort guess at the format.
88+
// Since the Reader liberally reads some non-compliant files,
89+
// it is possible for this to be FormatUnknown.
90+
//
91+
// When writing, if this is not FormatUnknown, then Writer.WriteHeader
92+
// uses this as the format to encode the header.
93+
Format Format
8694
}
8795

8896
// SparseEntry represents a Length-sized fragment at Offset in the file.
@@ -209,12 +217,12 @@ func (h *Header) FileInfo() os.FileInfo {
209217

210218
// allowedFormats determines which formats can be used. The value returned
211219
// is the logical OR of multiple possible formats. If the value is
212-
// formatUnknown, then the input Header cannot be encoded.
220+
// FormatUnknown, then the input Header cannot be encoded.
213221
//
214222
// As a by-product of checking the fields, this function returns paxHdrs, which
215223
// contain all fields that could not be directly encoded.
216-
func (h *Header) allowedFormats() (format int, paxHdrs map[string]string) {
217-
format = formatUSTAR | formatPAX | formatGNU
224+
func (h *Header) allowedFormats() (format Format, paxHdrs map[string]string) {
225+
format = FormatUSTAR | FormatPAX | FormatGNU
218226
paxHdrs = make(map[string]string)
219227

220228
verifyString := func(s string, size int, paxKey string) {
@@ -224,28 +232,28 @@ func (h *Header) allowedFormats() (format int, paxHdrs map[string]string) {
224232
tooLong := len(s) > size
225233
allowLongGNU := paxKey == paxPath || paxKey == paxLinkpath
226234
if hasNUL(s) || (tooLong && !allowLongGNU) {
227-
format &^= formatGNU // No GNU
235+
format.mustNotBe(FormatGNU)
228236
}
229237
if !isASCII(s) || tooLong {
230238
canSplitUSTAR := paxKey == paxPath
231239
if _, _, ok := splitUSTARPath(s); !canSplitUSTAR || !ok {
232-
format &^= formatUSTAR // No USTAR
240+
format.mustNotBe(FormatUSTAR)
233241
}
234242
if paxKey == paxNone {
235-
format &^= formatPAX // No PAX
243+
format.mustNotBe(FormatPAX)
236244
} else {
237245
paxHdrs[paxKey] = s
238246
}
239247
}
240248
}
241249
verifyNumeric := func(n int64, size int, paxKey string) {
242250
if !fitsInBase256(size, n) {
243-
format &^= formatGNU // No GNU
251+
format.mustNotBe(FormatGNU)
244252
}
245253
if !fitsInOctal(size, n) {
246-
format &^= formatUSTAR // No USTAR
254+
format.mustNotBe(FormatUSTAR)
247255
if paxKey == paxNone {
248-
format &^= formatPAX // No PAX
256+
format.mustNotBe(FormatPAX)
249257
} else {
250258
paxHdrs[paxKey] = strconv.FormatInt(n, 10)
251259
}
@@ -258,12 +266,12 @@ func (h *Header) allowedFormats() (format int, paxHdrs map[string]string) {
258266
needsNano := ts.Nanosecond() != 0
259267
hasFieldUSTAR := paxKey == paxMtime
260268
if !fitsInBase256(size, ts.Unix()) || needsNano {
261-
format &^= formatGNU // No GNU
269+
format.mustNotBe(FormatGNU)
262270
}
263271
if !fitsInOctal(size, ts.Unix()) || needsNano || !hasFieldUSTAR {
264-
format &^= formatUSTAR // No USTAR
272+
format.mustNotBe(FormatUSTAR)
265273
if paxKey == paxNone {
266-
format &^= formatPAX // No PAX
274+
format.mustNotBe(FormatPAX)
267275
} else {
268276
paxHdrs[paxKey] = formatPAXTime(ts)
269277
}
@@ -289,34 +297,40 @@ func (h *Header) allowedFormats() (format int, paxHdrs map[string]string) {
289297
verifyTime(h.ChangeTime, len(gnu.ChangeTime()), paxCtime)
290298

291299
if !isHeaderOnlyType(h.Typeflag) && h.Size < 0 {
292-
return formatUnknown, nil
300+
return FormatUnknown, nil
293301
}
294302
if len(h.Xattrs) > 0 {
295303
for k, v := range h.Xattrs {
296304
paxHdrs[paxXattr+k] = v
297305
}
298-
format &= formatPAX // PAX only
306+
format.mayOnlyBe(FormatPAX)
299307
}
300308
for k, v := range paxHdrs {
301309
// Forbid empty values (which represent deletion) since usage of
302310
// them are non-sensible without global PAX record support.
303311
if !validPAXRecord(k, v) || v == "" {
304-
return formatUnknown, nil // Invalid PAX key
312+
return FormatUnknown, nil // Invalid PAX key
305313
}
306314
}
307315
if len(h.SparseHoles) > 0 || h.Typeflag == TypeGNUSparse {
308316
if isHeaderOnlyType(h.Typeflag) {
309-
return formatUnknown, nil // Cannot have sparse data on header-only file
317+
return FormatUnknown, nil // Cannot have sparse data on header-only file
310318
}
311319
if !validateSparseEntries(h.SparseHoles, h.Size) {
312-
return formatUnknown, nil
320+
return FormatUnknown, nil
313321
}
314322
if h.Typeflag == TypeGNUSparse {
315-
format &= formatGNU // GNU only
323+
format.mayOnlyBe(FormatGNU)
316324
} else {
317-
format &^= formatGNU // No GNU
325+
format.mustNotBe(FormatGNU)
326+
}
327+
format.mustNotBe(FormatUSTAR)
328+
}
329+
if wantFormat := h.Format; wantFormat != FormatUnknown {
330+
if wantFormat.has(FormatPAX) {
331+
wantFormat.mayBe(FormatUSTAR) // PAX implies USTAR allowed too
318332
}
319-
format &^= formatUSTAR // No USTAR
333+
format.mayOnlyBe(wantFormat) // Set union of formats allowed and format wanted
320334
}
321335
return format, paxHdrs
322336
}

src/archive/tar/format.go

+86-29
Original file line numberDiff line numberDiff line change
@@ -4,38 +4,95 @@
44

55
package tar
66

7+
import "strings"
8+
9+
type Format int
10+
711
// Constants to identify various tar formats.
812
const (
9-
// The format is unknown.
10-
formatUnknown = (1 << iota) / 2 // Sequence of 0, 1, 2, 4, 8, etc...
13+
// Deliberately hide the meaning of constants from public API.
14+
_ Format = (1 << iota) / 4 // Sequence of 0, 0, 1, 2, 4, 8, etc...
15+
16+
// FormatUnknown indicates that the format is unknown.
17+
FormatUnknown
1118

1219
// The format of the original Unix V7 tar tool prior to standardization.
1320
formatV7
1421

15-
// The old and new GNU formats, which are incompatible with USTAR.
16-
// This does cover the old GNU sparse extension.
17-
// This does not cover the GNU sparse extensions using PAX headers,
18-
// versions 0.0, 0.1, and 1.0; these fall under the PAX format.
19-
formatGNU
22+
// FormatUSTAR represents the USTAR header format defined in POSIX.1-1988.
23+
//
24+
// While this format is compatible with most tar readers,
25+
// the format has several limitations making it unsuitable for some usages.
26+
// Most notably, it cannot support sparse files, files larger than 8GiB,
27+
// filenames larger than 256 characters, and non-ASCII filenames.
28+
//
29+
// Reference:
30+
// http://pubs.opengroup.org/onlinepubs/9699919799/utilities/pax.html#tag_20_92_13_06
31+
FormatUSTAR
32+
33+
// FormatPAX represents the PAX header format defined in POSIX.1-2001.
34+
//
35+
// PAX extends USTAR by writing a special file with Typeflag TypeXHeader
36+
// preceding the original header. This file contains a set of key-value
37+
// records, which are used to overcome USTAR's shortcomings.
38+
//
39+
// Some newer formats add their own extensions to PAX by defining their
40+
// own keys and assigning certain semantic meaning to the associated values.
41+
// For example, sparse file support in PAX is implemented using keys
42+
// defined by the GNU manual (e.g., "GNU.sparse.map").
43+
//
44+
// Reference:
45+
// http://pubs.opengroup.org/onlinepubs/009695399/utilities/pax.html
46+
FormatPAX
47+
48+
// FormatGNU represents the GNU header format.
49+
//
50+
// The GNU header format is older than the USTAR and PAX standards and
51+
// is not compatible with them. The GNU format supports
52+
// arbitrary file sizes, filenames of arbitrary encoding and length,
53+
// sparse files, and other features.
54+
//
55+
// It is recommended that PAX be chosen over GNU unless the target
56+
// application can only parse GNU formatted archives.
57+
//
58+
// Reference:
59+
// http://www.gnu.org/software/tar/manual/html_node/Standard.html
60+
FormatGNU
2061

2162
// Schily's tar format, which is incompatible with USTAR.
2263
// This does not cover STAR extensions to the PAX format; these fall under
2364
// the PAX format.
2465
formatSTAR
2566

26-
// USTAR is the former standardization of tar defined in POSIX.1-1988.
27-
// This is incompatible with the GNU and STAR formats.
28-
formatUSTAR
29-
30-
// PAX is the latest standardization of tar defined in POSIX.1-2001.
31-
// This is an extension of USTAR and is "backwards compatible" with it.
32-
//
33-
// Some newer formats add their own extensions to PAX, such as GNU sparse
34-
// files and SCHILY extended attributes. Since they are backwards compatible
35-
// with PAX, they will be labelled as "PAX".
36-
formatPAX
67+
formatMax
3768
)
3869

70+
func (f Format) has(f2 Format) bool { return f&f2 != 0 }
71+
func (f *Format) mayBe(f2 Format) { *f |= f2 }
72+
func (f *Format) mayOnlyBe(f2 Format) { *f &= f2 }
73+
func (f *Format) mustNotBe(f2 Format) { *f &^= f2 }
74+
75+
var formatNames = map[Format]string{
76+
formatV7: "V7", FormatUSTAR: "USTAR", FormatPAX: "PAX", FormatGNU: "GNU", formatSTAR: "STAR",
77+
}
78+
79+
func (f Format) String() string {
80+
var ss []string
81+
for f2 := Format(1); f2 < formatMax; f2 <<= 1 {
82+
if f.has(f2) {
83+
ss = append(ss, formatNames[f2])
84+
}
85+
}
86+
switch len(ss) {
87+
case 0:
88+
return "<unknown>"
89+
case 1:
90+
return ss[0]
91+
default:
92+
return "(" + strings.Join(ss, " | ") + ")"
93+
}
94+
}
95+
3996
// Magics used to identify various formats.
4097
const (
4198
magicGNU, versionGNU = "ustar ", " \x00"
@@ -69,14 +126,14 @@ func (b *block) Sparse() sparseArray { return (sparseArray)(b[:]) }
69126

70127
// GetFormat checks that the block is a valid tar header based on the checksum.
71128
// It then attempts to guess the specific format based on magic values.
72-
// If the checksum fails, then formatUnknown is returned.
73-
func (b *block) GetFormat() (format int) {
129+
// If the checksum fails, then FormatUnknown is returned.
130+
func (b *block) GetFormat() Format {
74131
// Verify checksum.
75132
var p parser
76133
value := p.parseOctal(b.V7().Chksum())
77134
chksum1, chksum2 := b.ComputeChecksum()
78135
if p.err != nil || (value != chksum1 && value != chksum2) {
79-
return formatUnknown
136+
return FormatUnknown
80137
}
81138

82139
// Guess the magic values.
@@ -87,29 +144,29 @@ func (b *block) GetFormat() (format int) {
87144
case magic == magicUSTAR && trailer == trailerSTAR:
88145
return formatSTAR
89146
case magic == magicUSTAR:
90-
return formatUSTAR
147+
return FormatUSTAR | FormatPAX
91148
case magic == magicGNU && version == versionGNU:
92-
return formatGNU
149+
return FormatGNU
93150
default:
94151
return formatV7
95152
}
96153
}
97154

98155
// SetFormat writes the magic values necessary for specified format
99156
// and then updates the checksum accordingly.
100-
func (b *block) SetFormat(format int) {
157+
func (b *block) SetFormat(format Format) {
101158
// Set the magic values.
102-
switch format {
103-
case formatV7:
159+
switch {
160+
case format.has(formatV7):
104161
// Do nothing.
105-
case formatGNU:
162+
case format.has(FormatGNU):
106163
copy(b.GNU().Magic(), magicGNU)
107164
copy(b.GNU().Version(), versionGNU)
108-
case formatSTAR:
165+
case format.has(formatSTAR):
109166
copy(b.STAR().Magic(), magicUSTAR)
110167
copy(b.STAR().Version(), versionUSTAR)
111168
copy(b.STAR().Trailer(), trailerSTAR)
112-
case formatUSTAR, formatPAX:
169+
case format.has(FormatUSTAR | FormatPAX):
113170
copy(b.USTAR().Magic(), magicUSTAR)
114171
copy(b.USTAR().Version(), versionUSTAR)
115172
default:

0 commit comments

Comments
 (0)