-
Notifications
You must be signed in to change notification settings - Fork 25
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Make archives deterministic #292
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -59,7 +59,7 @@ | |
"dependencies": { | ||
"@babel/preset-react": "^7.12.1", | ||
"abort-controller": "^3.0.0", | ||
"archiver": "^5.0.2", | ||
"archiver": "^7.0.1", | ||
"async-retry": "^1.3.1", | ||
"babel-plugin-dynamic-import-node": "^2.1.0", | ||
"commander": "^2.15.1", | ||
|
@@ -108,5 +108,6 @@ | |
"peerDependencies": { | ||
"babel-loader": "^7.0.0 || ^8.0.0 || ^9.0.0", | ||
"webpack": "^3.5.5 || ^4.0.0 || ^5.0.0" | ||
} | ||
}, | ||
"packageManager": "[email protected]" | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,5 +1,7 @@ | ||
import { Writable } from 'stream'; | ||
import crypto from 'crypto'; | ||
import path from 'path'; | ||
import fs from 'fs'; | ||
|
||
import Archiver from 'archiver'; | ||
|
||
|
@@ -21,15 +23,56 @@ const IFRAME_CONTENT = ` | |
</html> | ||
`; | ||
|
||
/** | ||
* Gets all files in a directory and all of its subdirectories | ||
* | ||
* The returned value is sorted for deterministic output. | ||
* | ||
* @param {string} dir | ||
* @returns {Array<string>} | ||
*/ | ||
function getFilesRecursive(dir) { | ||
const files = []; | ||
const dirs = [dir]; | ||
|
||
while (dirs.length > 0) { | ||
const currentDir = dirs.shift(); | ||
// Node 18 adds `recursive` option to `fs.readdirSync`, which would | ||
// make this function simpler. | ||
const dirents = fs.readdirSync(currentDir, { withFileTypes: true }); | ||
|
||
for (const dirent of dirents) { | ||
const res = path.resolve(currentDir, dirent.name); | ||
|
||
if (dirent.isDirectory()) { | ||
// This is a directory, so we are going to add it to the list of directories to recurse into. | ||
dirs.push(res); | ||
files.push(res); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I originally didn't include this line, but we have an integration test that asserts on the full contents of the archive, and the directories were previously present as distinct entries. Adding this line fixed that test. I'm not sure how important this is though, but I wanted to preserve existing functionality to be safe. |
||
} else { | ||
files.push(res); | ||
} | ||
} | ||
} | ||
|
||
// Sort the files for deterministic output. This is important so that | ||
// the archive hash is the same. | ||
files.sort((a, b) => a.localeCompare(b)); | ||
return files; | ||
} | ||
|
||
export default function createStaticPackage({ tmpdir, publicFolders }) { | ||
return new Promise((resolve, reject) => { | ||
const archive = new Archiver('zip'); | ||
const archive = new Archiver('zip', { | ||
// Concurrency in the stat queue leads to non-deterministic output. | ||
// https:/archiverjs/node-archiver/issues/383#issuecomment-2253139948 | ||
statConcurrency: 1, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. As far as I can tell, this is the most important part of the change. The rest of the code changes here address possible ordering inconsistencies in the globbing, which at this point are only theoretical. |
||
}); | ||
|
||
const stream = new Writable(); | ||
const data = []; | ||
|
||
// eslint-disable-next-line no-underscore-dangle | ||
stream._write = (chunk, enc, done) => { | ||
stream._write = (chunk, _enc, done) => { | ||
data.push(...chunk); | ||
done(); | ||
}; | ||
|
@@ -43,19 +86,34 @@ export default function createStaticPackage({ tmpdir, publicFolders }) { | |
validateArchive(archive.pointer(), entries); | ||
const buffer = Buffer.from(data); | ||
const hash = crypto.createHash('md5').update(buffer).digest('hex'); | ||
|
||
resolve({ buffer, hash }); | ||
}); | ||
archive.pipe(stream); | ||
|
||
archive.directory(tmpdir, false, { date: FILE_CREATION_DATE }); | ||
// We can't use archive.directory() here because it is not deterministic. | ||
// https:/archiverjs/node-archiver/issues/383#issuecomment-2252938075 | ||
const tmpdirFiles = getFilesRecursive(tmpdir); | ||
for (const file of tmpdirFiles) { | ||
archive.file(file, { | ||
name: file.slice(tmpdir.length + 1), | ||
prefix: '', | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The |
||
date: FILE_CREATION_DATE, | ||
}); | ||
} | ||
|
||
publicFolders.forEach((folder) => { | ||
if (folder === tmpdir) { | ||
// ignore, since this is handled separately | ||
} else if (folder.startsWith(process.cwd())) { | ||
archive.directory(folder.slice(process.cwd().length + 1), false, { | ||
date: FILE_CREATION_DATE, | ||
}); | ||
const folderFiles = getFilesRecursive(folder); | ||
for (const file of folderFiles) { | ||
archive.file(file, { | ||
name: file.slice(folder.length + 1), | ||
prefix: '', | ||
date: FILE_CREATION_DATE, | ||
}); | ||
} | ||
} | ||
}); | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -13,7 +13,11 @@ const FILE_CREATION_DATE = new Date('Fri Feb 08 2019 13:31:55 GMT+0100 (CET)'); | |
|
||
function makePackage({ paths, publicFolders }) { | ||
return new Promise((resolve, reject) => { | ||
const archive = new Archiver('zip'); | ||
const archive = new Archiver('zip', { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I found a couple more places we use Archiver, and added statConcurrency 1 to them, which should help more things be deterministic. We should probably add tests for this if we don't have any. Also, I did not bother to do the glob stuff here. We could leave it as is, or remove the glob stuff I did elsewhere, or do the same glob stuff everywhere. @trotzig what is your preference? |
||
// Concurrency in the stat queue leads to non-deterministic output. | ||
// https:/archiverjs/node-archiver/issues/383#issuecomment-2253139948 | ||
statConcurrency: 1, | ||
}); | ||
|
||
const stream = new Writable(); | ||
const data = []; | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Though I called this "recursive", I decided to use iteration instead of recursion since I think that will be a little more efficient.