Skip to content

Commit

Permalink
Make static package archives deterministic
Browse files Browse the repository at this point in the history
I noticed that the Jest test that verifies the stability of these hashes
was flaking and determined that the output was, in fact, not
determinstic.

I beleve there are two possibile sources of nondeterminism in these
zips:

1. The order of files being globbed may be nondeterministic
2. Concurrency in archiver's stat queue causes nondeterminism

In the end, I believe most of the problem is likely caused by 2, but I
figured that we might as well address the theoretical glob order at the
same time.

More info:

archiverjs/node-archiver#383 (comment)
  • Loading branch information
lencioni committed Jul 26, 2024
1 parent ce37241 commit 0d88711
Show file tree
Hide file tree
Showing 2 changed files with 71 additions and 9 deletions.
70 changes: 64 additions & 6 deletions src/createStaticPackage.js
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
import { Writable } from 'stream';
import crypto from 'crypto';
import path from 'path';
import fs from 'fs';

import Archiver from 'archiver';

Expand All @@ -21,15 +23,56 @@ const IFRAME_CONTENT = `
</html>
`;

/**
* Gets all files in a directory and all of its subdirectories
*
* The returned value is sorted for deterministic output.
*
* @param {string} dir
* @returns {Array<string>}
*/
function getFilesRecursive(dir) {
const files = [];
const dirs = [dir];

while (dirs.length > 0) {
const currentDir = dirs.shift();
// Node 18 adds `recursive` option to `fs.readdirSync`, which would
// make this function simpler.
const dirents = fs.readdirSync(currentDir, { withFileTypes: true });

for (const dirent of dirents) {
const res = path.resolve(currentDir, dirent.name);

if (dirent.isDirectory()) {
// This is a directory, so we are going to add it to the list of directories to recurse into.
dirs.push(res);
files.push(res);
} else {
files.push(res);
}
}
}

// Sort the files for deterministic output. This is important so that
// the archive hash is the same.
files.sort((a, b) => a.localeCompare(b));
return files;
}

export default function createStaticPackage({ tmpdir, publicFolders }) {
return new Promise((resolve, reject) => {
const archive = new Archiver('zip');
const archive = new Archiver('zip', {
// Concurrency in the stat queue leads to non-deterministic output.
// https:/archiverjs/node-archiver/issues/383#issuecomment-2253139948
statConcurrency: 1,
});

const stream = new Writable();
const data = [];

// eslint-disable-next-line no-underscore-dangle
stream._write = (chunk, enc, done) => {
stream._write = (chunk, _enc, done) => {
data.push(...chunk);
done();
};
Expand All @@ -43,19 +86,34 @@ export default function createStaticPackage({ tmpdir, publicFolders }) {
validateArchive(archive.pointer(), entries);
const buffer = Buffer.from(data);
const hash = crypto.createHash('md5').update(buffer).digest('hex');

resolve({ buffer, hash });
});
archive.pipe(stream);

archive.directory(tmpdir, false, { date: FILE_CREATION_DATE });
// We can't use archive.directory() here because it is not deterministic.
// https:/archiverjs/node-archiver/issues/383#issuecomment-2252938075
const tmpdirFiles = getFilesRecursive(tmpdir);
for (const file of tmpdirFiles) {
archive.file(file, {
name: file.slice(tmpdir.length + 1),
prefix: '',
date: FILE_CREATION_DATE,
});
}

publicFolders.forEach((folder) => {
if (folder === tmpdir) {
// ignore, since this is handled separately
} else if (folder.startsWith(process.cwd())) {
archive.directory(folder.slice(process.cwd().length + 1), false, {
date: FILE_CREATION_DATE,
});
const folderFiles = getFilesRecursive(folder);
for (const file of folderFiles) {
archive.file(file, {
name: file.slice(folder.length + 1),
prefix: '',
date: FILE_CREATION_DATE,
});
}
}
});

Expand Down
10 changes: 7 additions & 3 deletions test/createStaticPackage-test.js
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,7 @@ beforeEach(() => {
'test/integrations/assets', // relative
];
tmpdir = path.resolve(__dirname, 'assets');
subject = () =>
createStaticPackage({
subject = () => createStaticPackage({
tmpdir,
publicFolders,
});
Expand All @@ -28,7 +27,12 @@ it('creates a package', async () => {
});

it('creates deterministic hashes when content has not changed', async () => {
expect((await subject()).hash).toEqual((await subject()).hash);
const first = (await subject()).hash;
const second = (await subject()).hash;

expect(first).not.toBeUndefined();
expect(second).not.toBeUndefined();
expect(first).toEqual(second);
});

it('picks out the right files', async () => {
Expand Down

0 comments on commit 0d88711

Please sign in to comment.