Skip to content

Commit 7fc5cda

Browse files
authored
docs(archive): improve docs and general code cleanup (#3593)
1 parent eaaabe4 commit 7fc5cda

File tree

4 files changed

+161
-77
lines changed

4 files changed

+161
-77
lines changed

archive/_common.ts

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -15,17 +15,18 @@ export interface TarInfo {
1515

1616
export interface TarOptions extends TarInfo {
1717
/**
18-
* append file
18+
* Filepath of the file to append to the archive
1919
*/
2020
filePath?: string;
2121

2222
/**
23-
* append any arbitrary content
23+
* A Reader of any arbitrary content to append to the archive
2424
*/
2525
reader?: Reader;
2626

2727
/**
28-
* size of the content to be appended
28+
* Size of the content to be appended. This is only required
29+
* when passing a reader to the archive.
2930
*/
3031
contentSize?: number;
3132
}
@@ -46,7 +47,7 @@ export enum FileTypes {
4647
"contiguous-file" = 7,
4748
}
4849

49-
export const recordSize = 512;
50+
export const HEADER_LENGTH = 512;
5051

5152
/*
5253
struct posix_header { // byte offset

archive/mod.ts

Lines changed: 16 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
*
99
* Copyright (c) 2011 T. Jameson Little
1010
* Copyright (c) 2019 Jun Kato
11-
* Copyright (c) 2018-2022 the Deno authors
11+
* Copyright (c) 2018-2023 the Deno authors
1212
*
1313
* Permission is hereby granted, free of charge, to any person obtaining a copy
1414
* of this software and associated documentation files (the "Software"), to deal
@@ -30,8 +30,21 @@
3030
*/
3131

3232
/**
33-
* Provides a `Tar` and `Untar` classes for compressing and decompressing
34-
* arbitrary data.
33+
* Tar is a utility for collecting multiple files (or any arbitrary data) into one
34+
* archive file, while untar is the inverse utility to extract the files from an
35+
* archive. Files are not compressed, only collected into the archive.
36+
*
37+
* ### File format and limitations
38+
*
39+
* The ustar file format is used for creating the archive file.
40+
* While this format is compatible with most tar readers,
41+
* the format has several limitations, including:
42+
* * Files must be smaller than 8GiB
43+
* * Filenames (including path) must be shorter than 256 characters
44+
* * Filenames (including path) cannot contain non-ASCII characters
45+
* * Sparse files are not supported
46+
* In addition to the ustar format, untar may also read from the pax format.
47+
* However, additional features, such as longer filenames, may be ignored.
3548
*
3649
* @module
3750
*/

archive/tar.ts

Lines changed: 94 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,4 @@
11
// Copyright 2018-2023 the Deno authors. All rights reserved. MIT license.
2-
3-
import {
4-
FileTypes,
5-
type TarInfo,
6-
type TarMeta,
7-
type TarOptions,
8-
ustarStructure,
9-
} from "./_common.ts";
10-
import type { Reader } from "../types.d.ts";
11-
12-
export { type TarInfo, type TarMeta, type TarOptions };
13-
142
/*!
153
* Ported and modified from: https://github.com/beatgammit/tar-js and
164
* licensed as:
@@ -19,7 +7,7 @@ export { type TarInfo, type TarMeta, type TarOptions };
197
*
208
* Copyright (c) 2011 T. Jameson Little
219
* Copyright (c) 2019 Jun Kato
22-
* Copyright (c) 2018-2022 the Deno authors
10+
* Copyright (c) 2018-2023 the Deno authors
2311
*
2412
* Permission is hereby granted, free of charge, to any person obtaining a copy
2513
* of this software and associated documentation files (the "Software"), to deal
@@ -40,12 +28,22 @@ export { type TarInfo, type TarMeta, type TarOptions };
4028
* THE SOFTWARE.
4129
*/
4230

31+
import {
32+
FileTypes,
33+
type TarInfo,
34+
type TarMeta,
35+
type TarOptions,
36+
ustarStructure,
37+
} from "./_common.ts";
38+
import type { Reader } from "../types.d.ts";
4339
import { MultiReader } from "../io/multi_reader.ts";
4440
import { Buffer } from "../io/buffer.ts";
4541
import { assert } from "../assert/assert.ts";
46-
import { recordSize } from "./_common.ts";
42+
import { HEADER_LENGTH } from "./_common.ts";
43+
44+
export { type TarInfo, type TarMeta, type TarOptions };
4745

48-
const ustar = "ustar\u000000";
46+
const USTAR_MAGIC_HEADER = "ustar\u000000";
4947

5048
/**
5149
* Simple file reader
@@ -86,8 +84,8 @@ function pad(num: number, bytes: number, base = 8): string {
8684
* Create header for a file in a tar archive
8785
*/
8886
function formatHeader(data: TarData): Uint8Array {
89-
const encoder = new TextEncoder(),
90-
buffer = clean(512);
87+
const encoder = new TextEncoder();
88+
const buffer = clean(HEADER_LENGTH);
9189
let offset = 0;
9290
ustarStructure.forEach(function (value) {
9391
const entry = encoder.encode(data[value.field as keyof TarData] || "");
@@ -124,7 +122,30 @@ export interface TarDataWithSource extends TarData {
124122
}
125123

126124
/**
127-
* A class to create a tar archive
125+
* ### Overview
126+
* A class to create a tar archive. Tar archives allow for storing multiple files in a
127+
* single file (called an archive, or sometimes a tarball). These archives typically
128+
* have the '.tar' extension.
129+
*
130+
* ### Usage
131+
* The workflow is to create a Tar instance, append files to it, and then write the
132+
* tar archive to the filesystem (or other output stream). See the worked example
133+
* below for details.
134+
*
135+
* ### Compression
136+
* Tar archives are not compressed by default. If you want to compress the archive,
137+
* you may compress the tar archive after creation, but this capability is not provided
138+
* here.
139+
*
140+
* ### File format and limitations
141+
*
142+
* The ustar file format is used for creating the archive file.
143+
* While this format is compatible with most tar readers,
144+
* the format has several limitations, including:
145+
* * Files must be smaller than 8GiB
146+
* * Filenames (including path) must be shorter than 256 characters
147+
* * Filenames (including path) cannot contain non-ASCII characters
148+
* * Sparse files are not supported
128149
*
129150
* @example
130151
* ```ts
@@ -133,17 +154,21 @@ export interface TarDataWithSource extends TarData {
133154
* import { copy } from "https://deno.land/std@$STD_VERSION/streams/copy.ts";
134155
*
135156
* const tar = new Tar();
136-
* const content = new TextEncoder().encode("Deno.land");
157+
*
158+
* // Now that we've created our tar, let's add some files to it:
159+
*
160+
* const content = new TextEncoder().encode("Some arbitrary content");
137161
* await tar.append("deno.txt", {
138162
* reader: new Buffer(content),
139163
* contentSize: content.byteLength,
140164
* });
141165
*
142-
* // Or specifying a filePath.
143-
* await tar.append("land.txt", {
144-
* filePath: "./land.txt",
166+
* // This file is sourced from the filesystem (and renamed in the archive)
167+
* await tar.append("filename_in_archive.txt", {
168+
* filePath: "./filename_on_filesystem.txt",
145169
* });
146170
*
171+
* // Now let's write the tar (with it's two files) to the filesystem
147172
* // use tar.getReader() to read the contents.
148173
*
149174
* const writer = await Deno.open("./out.tar", { write: true, create: true });
@@ -159,16 +184,31 @@ export class Tar {
159184
}
160185

161186
/**
162-
* Append a file to this tar archive
163-
* @param fn file name
187+
* Append a file or reader of arbitrary content to this tar archive. Directories
188+
* appended to the archive append only the directory itself to the archive, not
189+
* its contents. To add a directory and its contents, recursively append the
190+
* directory's contents. Directories and subdirectories will be created automatically
191+
* in the archive as required.
192+
*
193+
* @param filenameInArchive file name of the content in the archive
164194
* e.g., test.txt; use slash for directory separators
165-
* @param opts options
195+
* @param source details of the source of the content including the
196+
* reference to the content itself and potentially any
197+
* related metadata.
166198
*/
167-
async append(fn: string, opts: TarOptions) {
168-
if (typeof fn !== "string") {
199+
async append(filenameInArchive: string, source: TarOptions) {
200+
if (typeof filenameInArchive !== "string") {
169201
throw new Error("file name not specified");
170202
}
171-
let fileName = fn;
203+
let fileName = filenameInArchive;
204+
205+
/**
206+
* Ustar format has a limitation of file name length. Specifically:
207+
* 1. File names can contain at most 255 bytes.
208+
* 2. File names longer than 100 bytes must be split at a directory separator in two parts,
209+
* the first being at most 155 bytes long. So, in most cases file names must be a bit shorter
210+
* than 255 bytes.
211+
*/
172212
// separate file name into two parts if needed
173213
let fileNamePrefix: string | undefined;
174214
if (fileName.length > 100) {
@@ -195,41 +235,42 @@ export class Tar {
195235
}
196236
}
197237

198-
opts = opts || {};
238+
source = source || {};
199239

200240
// set meta data
201241
let info: Deno.FileInfo | undefined;
202-
if (opts.filePath) {
203-
info = await Deno.stat(opts.filePath);
242+
if (source.filePath) {
243+
info = await Deno.stat(source.filePath);
204244
if (info.isDirectory) {
205245
info.size = 0;
206-
opts.reader = new Buffer();
246+
source.reader = new Buffer();
207247
}
208248
}
209249

210-
const mode = opts.fileMode || (info && info.mode) ||
211-
parseInt("777", 8) & 0xfff,
212-
mtime = Math.floor(
213-
opts.mtime ?? (info?.mtime ?? new Date()).valueOf() / 1000,
214-
),
215-
uid = opts.uid || 0,
216-
gid = opts.gid || 0;
217-
if (typeof opts.owner === "string" && opts.owner.length >= 32) {
250+
const mode = source.fileMode || (info && info.mode) ||
251+
parseInt("777", 8) & 0xfff /* 511 */;
252+
const mtime = Math.floor(
253+
source.mtime ?? (info?.mtime ?? new Date()).valueOf() / 1000,
254+
);
255+
const uid = source.uid || 0;
256+
const gid = source.gid || 0;
257+
258+
if (typeof source.owner === "string" && source.owner.length >= 32) {
218259
throw new Error(
219260
"ustar format does not allow owner name length >= 32 bytes",
220261
);
221262
}
222-
if (typeof opts.group === "string" && opts.group.length >= 32) {
263+
if (typeof source.group === "string" && source.group.length >= 32) {
223264
throw new Error(
224265
"ustar format does not allow group name length >= 32 bytes",
225266
);
226267
}
227268

228-
const fileSize = info?.size ?? opts.contentSize;
269+
const fileSize = info?.size ?? source.contentSize;
229270
assert(fileSize !== undefined, "fileSize must be set");
230271

231-
const type = opts.type
232-
? FileTypes[opts.type as keyof typeof FileTypes]
272+
const type = source.type
273+
? FileTypes[source.type as keyof typeof FileTypes]
233274
: (info?.isDirectory ? FileTypes.directory : FileTypes.file);
234275
const tarData: TarDataWithSource = {
235276
fileName,
@@ -241,11 +282,11 @@ export class Tar {
241282
mtime: pad(mtime, 11),
242283
checksum: " ",
243284
type: type.toString(),
244-
ustar,
245-
owner: opts.owner || "",
246-
group: opts.group || "",
247-
filePath: opts.filePath,
248-
reader: opts.reader,
285+
ustar: USTAR_MAGIC_HEADER,
286+
owner: source.owner || "",
287+
group: source.group || "",
288+
filePath: source.filePath,
289+
reader: source.reader,
249290
};
250291

251292
// calculate the checksum
@@ -264,7 +305,7 @@ export class Tar {
264305
}
265306

266307
/**
267-
* Get a Reader instance for this tar data
308+
* Get a Reader instance for this tar archive.
268309
*/
269310
getReader(): Reader {
270311
const readers: Reader[] = [];
@@ -284,15 +325,15 @@ export class Tar {
284325
readers.push(
285326
new Buffer(
286327
clean(
287-
recordSize -
288-
(parseInt(tarData.fileSize, 8) % recordSize || recordSize),
328+
HEADER_LENGTH -
329+
(parseInt(tarData.fileSize, 8) % HEADER_LENGTH || HEADER_LENGTH),
289330
),
290331
),
291332
);
292333
});
293334

294335
// append 2 empty records
295-
readers.push(new Buffer(clean(recordSize * 2)));
336+
readers.push(new Buffer(clean(HEADER_LENGTH * 2)));
296337
return new MultiReader(readers);
297338
}
298339
}

0 commit comments

Comments
 (0)