From 0d887114c83378edb93e2bc7e6898d299518a4d3 Mon Sep 17 00:00:00 2001 From: Joe Lencioni Date: Fri, 26 Jul 2024 11:59:29 -0500 Subject: [PATCH] Make static package archives deterministic I noticed that the Jest test that verifies the stability of these hashes was flaking and determined that the output was, in fact, not determinstic. I beleve there are two possibile sources of nondeterminism in these zips: 1. The order of files being globbed may be nondeterministic 2. Concurrency in archiver's stat queue causes nondeterminism In the end, I believe most of the problem is likely caused by 2, but I figured that we might as well address the theoretical glob order at the same time. More info: https://github.com/archiverjs/node-archiver/issues/383#issuecomment-2252938075 --- src/createStaticPackage.js | 70 +++++++++++++++++++++++++++++--- test/createStaticPackage-test.js | 10 +++-- 2 files changed, 71 insertions(+), 9 deletions(-) diff --git a/src/createStaticPackage.js b/src/createStaticPackage.js index 1c3b916..90451ee 100644 --- a/src/createStaticPackage.js +++ b/src/createStaticPackage.js @@ -1,5 +1,7 @@ import { Writable } from 'stream'; import crypto from 'crypto'; +import path from 'path'; +import fs from 'fs'; import Archiver from 'archiver'; @@ -21,15 +23,56 @@ const IFRAME_CONTENT = ` `; +/** + * Gets all files in a directory and all of its subdirectories + * + * The returned value is sorted for deterministic output. + * + * @param {string} dir + * @returns {Array} + */ +function getFilesRecursive(dir) { + const files = []; + const dirs = [dir]; + + while (dirs.length > 0) { + const currentDir = dirs.shift(); + // Node 18 adds `recursive` option to `fs.readdirSync`, which would + // make this function simpler. + const dirents = fs.readdirSync(currentDir, { withFileTypes: true }); + + for (const dirent of dirents) { + const res = path.resolve(currentDir, dirent.name); + + if (dirent.isDirectory()) { + // This is a directory, so we are going to add it to the list of directories to recurse into. + dirs.push(res); + files.push(res); + } else { + files.push(res); + } + } + } + + // Sort the files for deterministic output. This is important so that + // the archive hash is the same. + files.sort((a, b) => a.localeCompare(b)); + return files; +} + export default function createStaticPackage({ tmpdir, publicFolders }) { return new Promise((resolve, reject) => { - const archive = new Archiver('zip'); + const archive = new Archiver('zip', { + // Concurrency in the stat queue leads to non-deterministic output. + // https://github.com/archiverjs/node-archiver/issues/383#issuecomment-2253139948 + statConcurrency: 1, + }); const stream = new Writable(); const data = []; // eslint-disable-next-line no-underscore-dangle - stream._write = (chunk, enc, done) => { + stream._write = (chunk, _enc, done) => { data.push(...chunk); done(); }; @@ -43,19 +86,34 @@ export default function createStaticPackage({ tmpdir, publicFolders }) { validateArchive(archive.pointer(), entries); const buffer = Buffer.from(data); const hash = crypto.createHash('md5').update(buffer).digest('hex'); + resolve({ buffer, hash }); }); archive.pipe(stream); - archive.directory(tmpdir, false, { date: FILE_CREATION_DATE }); + // We can't use archive.directory() here because it is not deterministic. + // https://github.com/archiverjs/node-archiver/issues/383#issuecomment-2252938075 + const tmpdirFiles = getFilesRecursive(tmpdir); + for (const file of tmpdirFiles) { + archive.file(file, { + name: file.slice(tmpdir.length + 1), + prefix: '', + date: FILE_CREATION_DATE, + }); + } publicFolders.forEach((folder) => { if (folder === tmpdir) { // ignore, since this is handled separately } else if (folder.startsWith(process.cwd())) { - archive.directory(folder.slice(process.cwd().length + 1), false, { - date: FILE_CREATION_DATE, - }); + const folderFiles = getFilesRecursive(folder); + for (const file of folderFiles) { + archive.file(file, { + name: file.slice(folder.length + 1), + prefix: '', + date: FILE_CREATION_DATE, + }); + } } }); diff --git a/test/createStaticPackage-test.js b/test/createStaticPackage-test.js index 47e6e32..4ac0f41 100644 --- a/test/createStaticPackage-test.js +++ b/test/createStaticPackage-test.js @@ -14,8 +14,7 @@ beforeEach(() => { 'test/integrations/assets', // relative ]; tmpdir = path.resolve(__dirname, 'assets'); - subject = () => - createStaticPackage({ + subject = () => createStaticPackage({ tmpdir, publicFolders, }); @@ -28,7 +27,12 @@ it('creates a package', async () => { }); it('creates deterministic hashes when content has not changed', async () => { - expect((await subject()).hash).toEqual((await subject()).hash); + const first = (await subject()).hash; + const second = (await subject()).hash; + + expect(first).not.toBeUndefined(); + expect(second).not.toBeUndefined(); + expect(first).toEqual(second); }); it('picks out the right files', async () => {