From 11885fa35961bb7261c293c64a2df5686f364392 Mon Sep 17 00:00:00 2001 From: Alan Shaw Date: Thu, 23 Aug 2018 12:15:50 +0100 Subject: [PATCH 1/2] fix: build & export interop with go-ipfs for small file raw leaves For files smaller than max chunk size and raw leaves true, go-ipfs will create a single node that is a raw buffer. Prior to this PR, js-ipfs created a unixfs file node who's data was the raw buffer. This resulted in different hashes for the same file. This PR changes the builder to do the same thing as go-ipfs and adds a resolver to the exporter that allows the exporter to export a node that is a single raw buffer (so that you can `ipfs cat [CID w codec raw]` as you can in go-ipfs). License: MIT Signed-off-by: Alan Shaw --- src/builder/reduce.js | 52 +++------------------- src/exporter/clean-multihash.js | 7 --- src/exporter/dir-flat.js | 4 +- src/exporter/dir-hamt-sharded.js | 5 +-- src/exporter/extract-data-from-block.js | 23 ++++++++++ src/exporter/file.js | 27 ++---------- src/exporter/object.js | 2 +- src/exporter/raw.js | 57 +++++++++++++++++++++++++ src/exporter/resolve.js | 25 +++++++---- test/builder-dir-sharding.js | 2 +- test/exporter.js | 33 +++++++++++++- test/importer.js | 21 ++++++++- 12 files changed, 164 insertions(+), 94 deletions(-) delete mode 100644 src/exporter/clean-multihash.js create mode 100644 src/exporter/extract-data-from-block.js create mode 100644 src/exporter/raw.js diff --git a/src/builder/reduce.js b/src/builder/reduce.js index 2ba62bc4..483c9fd2 100644 --- a/src/builder/reduce.js +++ b/src/builder/reduce.js @@ -13,51 +13,13 @@ module.exports = function reduce (file, ipld, options) { if (leaves.length === 1 && leaves[0].single && options.reduceSingleLeafToSelf) { const leaf = leaves[0] - if (options.leafType === 'file' && !options.rawLeaves) { - return callback(null, { - path: file.path, - multihash: leaf.multihash, - size: leaf.size, - leafSize: leaf.leafSize, - name: leaf.name - }) - } - - // we're using raw leaf nodes so we convert the node into a UnixFS `file` node. - return waterfall([ - (cb) => ipld.get(leaf.cid, cb), - (result, cb) => { - // If result.value is a buffer, this is a raw leaf otherwise it's a dag-pb node - const data = Buffer.isBuffer(result.value) ? result.value : result.value.data - const fileNode = new UnixFS('file', data) - - DAGNode.create(fileNode.marshal(), [], options.hashAlg, (error, node) => { - cb(error, { DAGNode: node, fileNode: fileNode }) - }) - }, - (result, cb) => { - if (options.onlyHash) { - return cb(null, result) - } - - let cid = new CID(result.DAGNode.multihash) - - if (options.cidVersion === 1) { - cid = cid.toV1() - } - - ipld.put(result.DAGNode, { cid }, (error) => cb(error, result)) - }, - (result, cb) => { - cb(null, { - path: file.path, - multihash: result.DAGNode.multihash, - size: result.DAGNode.size, - leafSize: result.fileNode.fileSize(), - name: leaf.name - }) - } - ], callback) + return callback(null, { + path: file.path, + multihash: leaf.multihash, + size: leaf.size, + leafSize: leaf.leafSize, + name: leaf.name + }) } // create a parent node and add all the leaves diff --git a/src/exporter/clean-multihash.js b/src/exporter/clean-multihash.js deleted file mode 100644 index 6263f8de..00000000 --- a/src/exporter/clean-multihash.js +++ /dev/null @@ -1,7 +0,0 @@ -'use strict' - -const CID = require('cids') - -module.exports = (multihash) => { - return new CID(multihash).toBaseEncodedString() -} diff --git a/src/exporter/dir-flat.js b/src/exporter/dir-flat.js index 9a75f6fa..b6c5f0b1 100644 --- a/src/exporter/dir-flat.js +++ b/src/exporter/dir-flat.js @@ -6,14 +6,14 @@ const cat = require('pull-cat') // Logic to export a unixfs directory. module.exports = dirExporter -function dirExporter (node, name, path, pathRest, resolve, size, dag, parent, depth) { +function dirExporter (cid, node, name, path, pathRest, resolve, size, dag, parent, depth) { const accepts = pathRest[0] const dir = { name: name, depth: depth, path: path, - hash: node.multihash, + hash: cid, size: node.size, type: 'dir' } diff --git a/src/exporter/dir-hamt-sharded.js b/src/exporter/dir-hamt-sharded.js index 6569d2a4..1a091172 100644 --- a/src/exporter/dir-hamt-sharded.js +++ b/src/exporter/dir-hamt-sharded.js @@ -2,19 +2,18 @@ const pull = require('pull-stream') const cat = require('pull-cat') -const cleanHash = require('./clean-multihash') // Logic to export a unixfs directory. module.exports = shardedDirExporter -function shardedDirExporter (node, name, path, pathRest, resolve, size, dag, parent, depth) { +function shardedDirExporter (cid, node, name, path, pathRest, resolve, size, dag, parent, depth) { let dir if (!parent || (parent.path !== path)) { dir = { name: name, depth: depth, path: path, - hash: cleanHash(node.multihash), + hash: cid, size: node.size, type: 'dir' } diff --git a/src/exporter/extract-data-from-block.js b/src/exporter/extract-data-from-block.js new file mode 100644 index 00000000..29b32d70 --- /dev/null +++ b/src/exporter/extract-data-from-block.js @@ -0,0 +1,23 @@ +'use strict' + +module.exports = function extractDataFromBlock (block, streamPosition, begin, end) { + const blockLength = block.length + + if (begin >= streamPosition + blockLength) { + // If begin is after the start of the block, return an empty block + // This can happen when internal nodes contain data + return Buffer.alloc(0) + } + + if (end - streamPosition < blockLength) { + // If the end byte is in the current block, truncate the block to the end byte + block = block.slice(0, end - streamPosition) + } + + if (begin > streamPosition && begin < (streamPosition + blockLength)) { + // If the start byte is in the current block, skip to the start byte + block = block.slice(begin - streamPosition) + } + + return block +} diff --git a/src/exporter/file.js b/src/exporter/file.js index 71418c88..6310d9fe 100644 --- a/src/exporter/file.js +++ b/src/exporter/file.js @@ -5,9 +5,10 @@ const UnixFS = require('ipfs-unixfs') const CID = require('cids') const pull = require('pull-stream') const paramap = require('pull-paramap') +const extractDataFromBlock = require('./extract-data-from-block') // Logic to export a single (possibly chunked) unixfs file. -module.exports = (node, name, path, pathRest, resolve, size, dag, parent, depth, offset, length) => { +module.exports = (cid, node, name, path, pathRest, resolve, size, dag, parent, depth, offset, length) => { const accepts = pathRest[0] if (accepts !== undefined && accepts !== path) { @@ -48,7 +49,7 @@ module.exports = (node, name, path, pathRest, resolve, size, dag, parent, depth, content: content, name: name, path: path, - hash: node.multihash, + hash: cid, size: fileSize, type: 'file' }]) @@ -149,25 +150,3 @@ function streamBytes (dag, node, fileSize, offset, length) { pull.filter(Boolean) ) } - -function extractDataFromBlock (block, streamPosition, begin, end) { - const blockLength = block.length - - if (begin >= streamPosition + blockLength) { - // If begin is after the start of the block, return an empty block - // This can happen when internal nodes contain data - return Buffer.alloc(0) - } - - if (end - streamPosition < blockLength) { - // If the end byte is in the current block, truncate the block to the end byte - block = block.slice(0, end - streamPosition) - } - - if (begin > streamPosition && begin < (streamPosition + blockLength)) { - // If the start byte is in the current block, skip to the start byte - block = block.slice(begin - streamPosition) - } - - return block -} diff --git a/src/exporter/object.js b/src/exporter/object.js index f6383eab..1d60f06f 100644 --- a/src/exporter/object.js +++ b/src/exporter/object.js @@ -3,7 +3,7 @@ const CID = require('cids') const pull = require('pull-stream') -module.exports = (node, name, path, pathRest, resolve, size, dag, parent, depth) => { +module.exports = (cid, node, name, path, pathRest, resolve, size, dag, parent, depth) => { let newNode if (pathRest.length) { const pathElem = pathRest[0] diff --git a/src/exporter/raw.js b/src/exporter/raw.js new file mode 100644 index 00000000..501b81e8 --- /dev/null +++ b/src/exporter/raw.js @@ -0,0 +1,57 @@ +'use strict' + +const pull = require('pull-stream') +const extractDataFromBlock = require('./extract-data-from-block') + +// Logic to export a single raw block +module.exports = (cid, node, name, path, pathRest, resolve, size, dag, parent, depth, offset, length) => { + const accepts = pathRest[0] + + if (accepts !== undefined && accepts !== path) { + return pull.empty() + } + + size = size || node.length + + if (offset < 0) { + return pull.error(new Error('Offset must be greater than 0')) + } + + if (offset > size) { + return pull.error(new Error('Offset must be less than the file size')) + } + + if (length < 0) { + return pull.error(new Error('Length must be greater than or equal to 0')) + } + + if (length === 0) { + return pull.once({ + depth, + content: pull.once(Buffer.alloc(0)), + hash: cid, + name, + path, + size, + type: 'raw' + }) + } + + if (!offset) { + offset = 0 + } + + if (!length || (offset + length > size)) { + length = size - offset + } + + return pull.once({ + depth, + content: pull.once(extractDataFromBlock(node, 0, offset, offset + length)), + hash: cid, + name, + path, + size, + type: 'raw' + }) +} diff --git a/src/exporter/resolve.js b/src/exporter/resolve.js index 3f40ee06..587e75d2 100644 --- a/src/exporter/resolve.js +++ b/src/exporter/resolve.js @@ -9,7 +9,8 @@ const resolvers = { directory: require('./dir-flat'), 'hamt-sharded-directory': require('./dir-hamt-sharded'), file: require('./file'), - object: require('./object') + object: require('./object'), + raw: require('./raw') } module.exports = Object.assign({ @@ -31,15 +32,19 @@ function createResolver (dag, options, depth, parent) { if ((typeof item.depth) !== 'number') { return pull.error(new Error('no depth')) } + if (item.object) { - return cb(null, resolveItem(item.object, item, options.offset, options.length)) + return cb(null, resolveItem(null, item.object, item, options.offset, options.length)) } - dag.get(new CID(item.multihash), (err, node) => { + + const cid = new CID(item.multihash) + + dag.get(cid, (err, node) => { if (err) { return cb(err) } // const name = item.fromPathRest ? item.name : item.path - cb(null, resolveItem(node.value, item, options.offset, options.length)) + cb(null, resolveItem(cid, node.value, item, options.offset, options.length)) }) }), pull.flatten(), @@ -47,23 +52,25 @@ function createResolver (dag, options, depth, parent) { pull.filter((node) => node.depth <= options.maxDepth) ) - function resolveItem (node, item, offset, length) { - return resolve(node, item.name, item.path, item.pathRest, item.size, dag, item.parent || parent, item.depth, offset, length) + function resolveItem (cid, node, item, offset, length) { + return resolve(cid, node, item.name, item.path, item.pathRest, item.size, dag, item.parent || parent, item.depth, offset, length) } - function resolve (node, name, path, pathRest, size, dag, parentNode, depth, offset, length) { + function resolve (cid, node, name, path, pathRest, size, dag, parentNode, depth, offset, length) { const type = typeOf(node) const nodeResolver = resolvers[type] if (!nodeResolver) { return pull.error(new Error('Unkown node type ' + type)) } const resolveDeep = createResolver(dag, options, depth, node) - return nodeResolver(node, name, path, pathRest, resolveDeep, size, dag, parentNode, depth, offset, length) + return nodeResolver(cid, node, name, path, pathRest, resolveDeep, size, dag, parentNode, depth, offset, length) } } function typeOf (node) { - if (Buffer.isBuffer(node.data)) { + if (Buffer.isBuffer(node)) { + return 'raw' + } else if (Buffer.isBuffer(node.data)) { return UnixFS.unmarshal(node.data).type } else { return 'object' diff --git a/test/builder-dir-sharding.js b/test/builder-dir-sharding.js index a7c7a8e0..79a6389e 100644 --- a/test/builder-dir-sharding.js +++ b/test/builder-dir-sharding.js @@ -115,7 +115,7 @@ module.exports = (repo) => { expect(nodes.length).to.be.eql(2) const expectedHash = new CID(shardedHash).toBaseEncodedString() expect(nodes[0].path).to.be.eql(expectedHash) - expect(nodes[0].hash).to.be.eql(expectedHash) + expect(new CID(nodes[0].hash).toBaseEncodedString()).to.be.eql(expectedHash) expect(nodes[1].path).to.be.eql(expectedHash + '/b') expect(nodes[1].size).to.be.eql(21) pull( diff --git a/test/exporter.js b/test/exporter.js index dcfe3255..b7dc92ad 100644 --- a/test/exporter.js +++ b/test/exporter.js @@ -30,6 +30,7 @@ const exporter = unixFSEngine.exporter const importer = unixFSEngine.importer const bigFile = loadFixture('test/fixtures/1.2MiB.txt') +const smallFile = loadFixture('test/fixtures/200Bytes.txt') module.exports = (repo) => { describe('exporter', () => { @@ -420,12 +421,42 @@ module.exports = (repo) => { ) }) - it('exports a large file > 5mb imported with raw leaves', function (done) { + it('exports a small file imported with raw leaves', function (done) { this.timeout(30 * 1000) pull( pull.values([{ path: '200Bytes.txt', + content: pull.values([smallFile]) + }]), + importer(ipld, { + rawLeaves: true + }), + pull.collect(collected) + ) + + function collected (err, files) { + expect(err).to.not.exist() + expect(files.length).to.equal(1) + + pull( + exporter(files[0].multihash, ipld), + pull.collect((err, files) => { + expect(err).to.not.exist() + expect(new CID(files[0].hash).toBaseEncodedString()).to.equal('zb2rhXrz1gkCv8p4nUDZRohY6MzBE9C3HVTVDP72g6Du3SD9Q') + + fileEql(files[0], smallFile, done) + }) + ) + } + }) + + it('exports a large file > 1mb imported with raw leaves', function (done) { + this.timeout(30 * 1000) + + pull( + pull.values([{ + path: '1.2MiB.txt', content: pull.values([bigFile]) }]), importer(ipld, { diff --git a/test/importer.js b/test/importer.js index 44601645..abcd4ad1 100644 --- a/test/importer.js +++ b/test/importer.js @@ -219,7 +219,11 @@ module.exports = (repo) => { pam: { multihash: 'QmPAixYTaYnPe795fcWcuRpo6tfwHgRKNiBHpMzoomDVN6', size: 2656553 - } + }, + '200Bytes.txt with raw leaves': extend({}, baseFiles['200Bytes.txt'], { + multihash: 'zb2rhXrz1gkCv8p4nUDZRohY6MzBE9C3HVTVDP72g6Du3SD9Q', + size: 200 + }) }, strategyOverrides[strategy]) const expected = extend({}, defaultResults, strategies[strategy]) @@ -320,6 +324,21 @@ module.exports = (repo) => { ) }) + it('small file (smaller than a chunk) with raw leaves', (done) => { + pull( + pull.values([{ + path: '200Bytes.txt', + content: pull.values([smallFile]) + }]), + importer(ipld, Object.assign({}, options, { rawLeaves: true })), + pull.collect((err, files) => { + expect(err).to.not.exist() + expect(stringifyMh(files)).to.be.eql([expected['200Bytes.txt with raw leaves']]) + done() + }) + ) + }) + it('small file as buffer (smaller than a chunk)', (done) => { pull( pull.values([{ From 444227ccc393eb701d0052055221817b39cba06c Mon Sep 17 00:00:00 2001 From: Alan Shaw Date: Thu, 23 Aug 2018 16:55:51 +0100 Subject: [PATCH 2/2] test: add tests for offset and length Also fixes an issue with the file resolver and 0 length export. License: MIT Signed-off-by: Alan Shaw --- src/exporter/file.js | 12 +++- src/exporter/raw.js | 2 +- test/exporter.js | 154 ++++++++++++++++++++++++++++++++++++------- 3 files changed, 140 insertions(+), 28 deletions(-) diff --git a/src/exporter/file.js b/src/exporter/file.js index 6310d9fe..96392187 100644 --- a/src/exporter/file.js +++ b/src/exporter/file.js @@ -19,7 +19,7 @@ module.exports = (cid, node, name, path, pathRest, resolve, size, dag, parent, d const fileSize = size || file.fileSize() if (offset < 0) { - return pull.error(new Error('Offset must be greater than 0')) + return pull.error(new Error('Offset must be greater than or equal to 0')) } if (offset > fileSize) { @@ -31,7 +31,15 @@ module.exports = (cid, node, name, path, pathRest, resolve, size, dag, parent, d } if (length === 0) { - return pull.once(Buffer.alloc(0)) + return pull.once({ + depth: depth, + content: pull.once(Buffer.alloc(0)), + name: name, + path: path, + hash: cid, + size: fileSize, + type: 'file' + }) } if (!offset) { diff --git a/src/exporter/raw.js b/src/exporter/raw.js index 501b81e8..96265835 100644 --- a/src/exporter/raw.js +++ b/src/exporter/raw.js @@ -14,7 +14,7 @@ module.exports = (cid, node, name, path, pathRest, resolve, size, dag, parent, d size = size || node.length if (offset < 0) { - return pull.error(new Error('Offset must be greater than 0')) + return pull.error(new Error('Offset must be greater than or equal to 0')) } if (offset > size) { diff --git a/test/exporter.js b/test/exporter.js index b7dc92ad..daad1d9c 100644 --- a/test/exporter.js +++ b/test/exporter.js @@ -36,7 +36,7 @@ module.exports = (repo) => { describe('exporter', () => { let ipld - function addTestFile ({file, strategy = 'balanced', path = '/foo', maxChunkSize}, cb) { + function addTestFile ({file, strategy = 'balanced', path = '/foo', maxChunkSize, rawLeaves}, cb) { pull( pull.values([{ path, @@ -44,6 +44,7 @@ module.exports = (repo) => { }]), importer(ipld, { strategy, + rawLeaves, chunkerOptions: { maxChunkSize } @@ -54,8 +55,8 @@ module.exports = (repo) => { ) } - function addAndReadTestFile ({file, offset, length, strategy = 'balanced', path = '/foo', maxChunkSize}, cb) { - addTestFile({file, strategy, path, maxChunkSize}, (error, multihash) => { + function addAndReadTestFile ({file, offset, length, strategy = 'balanced', path = '/foo', maxChunkSize, rawLeaves}, cb) { + addTestFile({file, strategy, path, maxChunkSize, rawLeaves}, (error, multihash) => { if (error) { return cb(error) } @@ -322,6 +323,21 @@ module.exports = (repo) => { ) }) + it('exports a zero length chunk of a large file', function (done) { + this.timeout(30 * 1000) + + addAndReadTestFile({ + file: bigFile, + path: '1.2MiB.txt', + rawLeaves: true, + length: 0 + }, (err, data) => { + expect(err).to.not.exist() + expect(data).to.eql(Buffer.alloc(0)) + done() + }) + }) + it('exports a chunk of a large file > 5mb made from multiple blocks', function (done) { this.timeout(30 * 1000) const hash = 'QmRQgufjp9vLE8XK2LGKZSsPCFCF6e4iynCQtNB5X2HBKE' @@ -424,31 +440,119 @@ module.exports = (repo) => { it('exports a small file imported with raw leaves', function (done) { this.timeout(30 * 1000) - pull( - pull.values([{ - path: '200Bytes.txt', - content: pull.values([smallFile]) - }]), - importer(ipld, { - rawLeaves: true - }), - pull.collect(collected) - ) + addAndReadTestFile({ + file: smallFile, + path: '200Bytes.txt', + rawLeaves: true + }, (err, data) => { + expect(err).to.not.exist() + expect(data).to.eql(smallFile) + done() + }) + }) - function collected (err, files) { + it('exports a chunk of a small file imported with raw leaves', function (done) { + this.timeout(30 * 1000) + + const length = 100 + + addAndReadTestFile({ + file: smallFile, + path: '200Bytes.txt', + rawLeaves: true, + length + }, (err, data) => { expect(err).to.not.exist() - expect(files.length).to.equal(1) + expect(data).to.eql(smallFile.slice(0, length)) + done() + }) + }) - pull( - exporter(files[0].multihash, ipld), - pull.collect((err, files) => { - expect(err).to.not.exist() - expect(new CID(files[0].hash).toBaseEncodedString()).to.equal('zb2rhXrz1gkCv8p4nUDZRohY6MzBE9C3HVTVDP72g6Du3SD9Q') + it('exports a chunk of a small file imported with raw leaves with length', function (done) { + this.timeout(30 * 1000) - fileEql(files[0], smallFile, done) - }) - ) - } + const offset = 100 + const length = 200 + + addAndReadTestFile({ + file: smallFile, + path: '200Bytes.txt', + rawLeaves: true, + offset, + length + }, (err, data) => { + expect(err).to.not.exist() + expect(data).to.eql(smallFile.slice(offset)) + done() + }) + }) + + it('exports a zero length chunk of a small file imported with raw leaves', function (done) { + this.timeout(30 * 1000) + + const length = 0 + + addAndReadTestFile({ + file: smallFile, + path: '200Bytes.txt', + rawLeaves: true, + length + }, (err, data) => { + expect(err).to.not.exist() + expect(data).to.eql(Buffer.alloc(0)) + done() + }) + }) + + it('errors when exporting a chunk of a small file imported with raw leaves and negative length', function (done) { + this.timeout(30 * 1000) + + const length = -100 + + addAndReadTestFile({ + file: smallFile, + path: '200Bytes.txt', + rawLeaves: true, + length + }, (err, data) => { + expect(err).to.exist() + expect(err.message).to.equal('Length must be greater than or equal to 0') + done() + }) + }) + + it('errors when exporting a chunk of a small file imported with raw leaves and negative offset', function (done) { + this.timeout(30 * 1000) + + const offset = -100 + + addAndReadTestFile({ + file: smallFile, + path: '200Bytes.txt', + rawLeaves: true, + offset + }, (err, data) => { + expect(err).to.exist() + expect(err.message).to.equal('Offset must be greater than or equal to 0') + done() + }) + }) + + it('errors when exporting a chunk of a small file imported with raw leaves and offset greater than file size', function (done) { + this.timeout(30 * 1000) + + const offset = 201 + + addAndReadTestFile({ + file: smallFile, + path: '200Bytes.txt', + rawLeaves: true, + offset + }, (err, data) => { + expect(err).to.exist() + expect(err.message).to.equal('Offset must be less than the file size') + done() + }) }) it('exports a large file > 1mb imported with raw leaves', function (done) { @@ -512,7 +616,7 @@ module.exports = (repo) => { offset: -1 }, (error, data) => { expect(error).to.be.ok() - expect(error.message).to.contain('Offset must be greater than 0') + expect(error.message).to.contain('Offset must be greater than or equal to 0') done() })