From 7a893227b334936677165329aa5324e3b1e62a1b Mon Sep 17 00:00:00 2001 From: Francois Daoust Date: Thu, 1 Aug 2024 22:12:54 +0200 Subject: [PATCH 1/9] Major rewrite to make things more consistent and extensible **IMPORTANT: Work in progress, not yet functional!** The code in Strudy did the same thing in multiple ways: - The main CLI was linked to the first type of analysis reports, that still get published to `w3c/webref-analysis`. - A few CLIs were there, used mainly for exploration purpose. - The study modules could sometimes be used as CLI, for no apparent reason. - The study modules used different conventions for inputs/outputs - The reporting code was somewhat separated from the rest, whereas it could be useful to extend it. - Preparing flexible reports and amending the "file an issue" workflow was a bit tedious. This rewrite attempts to solve the above by: - Rewriting study modules to expose one similar study function - Adding a generic study module that knows about all anomaly types, dispatches the study to the different study modules depending on the request, and can structure and format the result as needed in markdown to file one or more issues. - Adding a few tests to be more confident that the analyses are doing the right thing! - **(Not done yet)** Updating the entry points to use the new study module - **(Not done yet)** Getting rid of all side CLIs --- src/lib/study-algorithms.js | 27 +- src/lib/study-backrefs.js | 124 ++++----- src/lib/study-dfns.js | 478 ++++++++++++++++++++++++++++++++++ src/lib/study-refs.js | 129 ++++++++-- src/lib/study-webidl.js | 58 ++--- src/lib/study.js | 498 ++++++++++++++++++++++++++++++++++++ test/study-algorithms.js | 37 +++ test/study-backrefs.js | 25 +- test/study-dfns.js | 39 +++ test/study-refs.js | 55 +++- test/study-webidl.js | 11 +- test/study.js | 150 +++++++++++ 12 files changed, 1470 insertions(+), 161 deletions(-) create mode 100644 src/lib/study-dfns.js create mode 100644 src/lib/study.js create mode 100644 test/study-algorithms.js create mode 100644 test/study-dfns.js create mode 100644 test/study.js diff --git a/src/lib/study-algorithms.js b/src/lib/study-algorithms.js index 76f76fd9..2466a282 100644 --- a/src/lib/study-algorithms.js +++ b/src/lib/study-algorithms.js @@ -1,11 +1,4 @@ import { JSDOM } from 'jsdom'; -import { recordCategorizedAnomaly } from './util.js'; - -const possibleAnomalies = [ - 'missingTaskForPromise', - 'missingTaskForEvent' -]; - /** * Normalize whitespaces in string to make analysis easier @@ -57,9 +50,8 @@ function nestParallelSteps(algo) { /** * Main function, study all algorithms */ -function studyAlgorithms(edResults) { +function studyAlgorithms(specs) { const report = []; - const recordAnomaly = recordCategorizedAnomaly(report, 'algorithms', possibleAnomalies); // Return human-friendly markdown that identifies the given algorithm function getAlgoName(algo) { @@ -95,11 +87,19 @@ function studyAlgorithms(edResults) { // https://w3c.github.io/clipboard-apis/#dom-clipboard-read !html.includes('systemClipboardRepresentation') ) { - recordAnomaly(spec, 'missingTaskForPromise', `${getAlgoName(algo)} has a parallel step that resolves/rejects a promise directly`); + report.push({ + name: 'missingTaskForPromise', + message: `${getAlgoName(algo)} has a parallel step that resolves/rejects a promise directly`, + spec + }); return true; } else if (html.match(/fire an?( \w+)? event/i)) { - recordAnomaly(spec, 'missingTaskForEvent', `${getAlgoName(algo)} has a parallel step that fires an event directly`); + report.push({ + name: 'missingTaskForEvent', + message: `${getAlgoName(algo)} has a parallel step that fires an event directly`, + spec + }); return true; } } @@ -133,13 +133,10 @@ function studyAlgorithms(edResults) { return anomalyFound; } - // We're only interested in specs that define algorithms - const specs = edResults.filter(spec => !!spec.algorithms); - // Study algorithms in turn. // Note: the root level of each algorithm is its first step. It may say // something like "run these steps in parallel" in particular. - for (const spec of specs) { + for (const spec of specs.filter(spec => !!spec.algorithms)) { for (const algo of spec.algorithms) { nestParallelSteps(algo); studyAlgorithmStep(spec, algo, algo); diff --git a/src/lib/study-backrefs.js b/src/lib/study-backrefs.js index 2be544fa..2d046d82 100644 --- a/src/lib/study-backrefs.js +++ b/src/lib/study-backrefs.js @@ -1,18 +1,3 @@ -import { loadCrawlResults, recordCategorizedAnomaly } from './util.js'; -import { fileURLToPath } from 'node:url'; - -const possibleAnomalies = [ - 'brokenLinks', - 'datedUrls', - 'evolvingLinks', - 'frailLinks', - 'nonCanonicalRefs', - 'notDfn', - 'notExported', - 'outdatedSpecs', - 'unknownSpecs' -]; - /** * The backrefs analyzer only checks links to other specs. This function returns * true when a link does target a spec, and false if it targets something else @@ -39,57 +24,91 @@ const matchSpecUrl = url => TODO: DRY Copied from browser-specs/src/compute-shortname.js */ -function computeShortname (url) { - function parseUrl (url) { +function computeShortname(url) { + function parseUrl(url) { // Handle /TR/ URLs - const w3cTr = url.match(/^https?:\/\/(?:www\.)?w3\.org\/TR\/([^/]+)\/$/); + const w3cTr = url.match(/^https?:\/\/(?:www\.)?w3\.org\/TR\/([^\/]+)\/$/); if (w3cTr) { return w3cTr[1]; } // Handle WHATWG specs - const whatwg = url.match(/\/\/(.+)\.spec\.whatwg\.org\/?/); + const whatwg = url.match(/\/\/(.+)\.spec\.whatwg\.org\//); if (whatwg) { - return whatwg[1]; + return whatwg[1]; } // Handle TC39 Proposals - const tc39 = url.match(/\/\/tc39\.es\/proposal-([^/]+)\/$/); + const tc39 = url.match(/\/\/tc39\.es\/proposal-([^\/]+)\/$/); if (tc39) { - return 'tc39-' + tc39[1]; + return "tc39-" + tc39[1]; } + // Handle Khronos extensions - const khronos = url.match(/https:\/\/registry\.khronos\.org\/webgl\/extensions\/([^/]+)\/$/); + const khronos = url.match(/https:\/\/registry\.khronos\.org\/webgl\/extensions\/([^\/]+)\/$/); if (khronos) { - return khronos[1]; + return khronos[1]; } // Handle extension specs defined in the same repo as the main spec // (e.g. generate a "gamepad-extensions" name for // https://w3c.github.io/gamepad/extensions.html") - const ext = url.match(/\/.*\.github\.io\/([^/]+)\/(extensions?)\.html$/); + const ext = url.match(/\/.*\.github\.io\/([^\/]+)\/(extensions?)\.html$/); if (ext) { return ext[1] + '-' + ext[2]; } // Handle draft specs on GitHub, excluding the "webappsec-" prefix for // specifications developed by the Web Application Security Working Group - const github = url.match(/\/.*\.github\.io\/(?:webappsec-)?([^/]+)\//); + const github = url.match(/\/.*\.github\.io\/(?:webappsec-)?([^\/]+)\//); if (github) { - return github[1]; + return github[1]; } // Handle CSS WG specs - const css = url.match(/\/drafts\.(?:csswg|fxtf|css-houdini)\.org\/([^/]+)\//); + const css = url.match(/\/drafts\.(?:csswg|fxtf|css-houdini)\.org\/([^\/]+)\//); if (css) { return css[1]; } // Handle SVG drafts - const svg = url.match(/\/svgwg\.org\/specs\/(?:svg-)?([^/]+)\//); + const svg = url.match(/\/svgwg\.org\/specs\/(?:svg-)?([^\/]+)\//); if (svg) { - return 'svg-' + svg[1]; + return "svg-" + svg[1]; + } + + // Handle IETF RFCs + const rfcs = url.match(/\/www.rfc-editor\.org\/rfc\/(rfc[0-9]+)/); + if (rfcs) { + return rfcs[1]; + } + + // Handle IETF group drafts + const ietfDraft = url.match(/\/datatracker\.ietf\.org\/doc\/html\/draft-ietf-[^\-]+-([^\/]+)/); + if (ietfDraft) { + return ietfDraft[1]; + } + + // Handle IETF individual drafts, stripping group name + // TODO: retrieve the list of IETF groups to make sure that the group name + // is an actual group name and not the beginning of the shortname: + // https://datatracker.ietf.org/api/v1/group/group/ + // (multiple requests needed due to pagination, "?limit=1000" is the max) + const ietfIndDraft = url.match(/\/datatracker\.ietf\.org\/doc\/html\/draft-[^\-]+-([^\/]+)/); + if (ietfIndDraft) { + if (ietfIndDraft[1].indexOf('-') !== -1) { + return ietfIndDraft[1].slice(ietfIndDraft[1].indexOf('-') + 1); + } + else { + return ietfIndDraft[1]; + } + } + + // Handle TAG findings + const tag = url.match(/^https?:\/\/(?:www\.)?w3\.org\/2001\/tag\/doc\/([^\/]+)\/?$/); + if (tag) { + return tag[1]; } // Return name when one was given @@ -97,7 +116,7 @@ function computeShortname (url) { return url; } - throw new Error(`Cannot extract meaningful name from ${url}`); + throw `Cannot extract meaningful name from ${url}`; } // Parse the URL to extract the name @@ -107,8 +126,8 @@ function computeShortname (url) { // Latin characters (a-z letters, digits, underscore and "-"), and that it // only contains a dot for fractional levels at the end of the name // (e.g. "blah-1.2" is good but "blah.blah" and "blah-3.1-blah" are not) - if (!name.match(/^[\w-]+((?<=-\d+)\.\d+)?$/)) { - throw new Error(`Specification name contains unexpected characters: ${name} (extracted from ${url})`); + if (!name.match(/^[\w\-]+((?<=\-v?\d+)\.\d+)?$/)) { + throw `Specification name contains unexpected characters: ${name} (extracted from ${url})`; } return name; @@ -234,18 +253,21 @@ const matchAnchor = (url, anchor) => link => { return link === (url + '#' + anchor) || link === (url + '#' + encodeURIComponent(anchor)); }; -function studyBackrefs (edResults, trResults = [], htmlFragments = {}, shortnameFilter) { - trResults = trResults || []; +async function studyBackrefs(specs, { crawlResults = null, trResults = [], htmlFragments = null } = {}) { + crawlResults = crawlResults ?? specs; const report = []; - edResults.forEach(spec => { - if (shortnameFilter && spec.shortname !== shortnameFilter) return; - studyLinks(spec, spec.links?.rawlinks, report, edResults, trResults, htmlFragments); + // Donwload automatic map of multipages anchors in HTML spec + const fragmentsUrl = 'https://html.spec.whatwg.org/multipage/fragment-links.json'; + htmlFragments = htmlFragments ?? await fetch(fragmentsUrl).then(r => r.json()); + + specs.forEach(spec => { + studyLinks(spec, spec.links?.rawlinks, report, crawlResults, trResults, htmlFragments); // given the current limitation of classification of links for bikeshed // https://github.com/w3c/reffy/issues/1584 // we also check autolinks for bikeshed specs if (spec.generator === "bikeshed") { - studyLinks(spec, spec.links?.autolinks, report, edResults, trResults, htmlFragments); + studyLinks(spec, spec.links?.autolinks, report, crawlResults, trResults, htmlFragments); } }); return report; @@ -254,7 +276,9 @@ function studyBackrefs (edResults, trResults = [], htmlFragments = {}, shortname function studyLinks(spec, links, report, edResults, trResults, htmlFragments) { if (!links) return; - const recordAnomaly = recordCategorizedAnomaly(report, 'links', possibleAnomalies); + function recordAnomaly(spec, name, message) { + report.push({ name, message, spec }); + } Object.keys(links) .filter(matchSpecUrl) @@ -421,22 +445,4 @@ function studyLinks(spec, links, report, edResults, trResults, htmlFragments) { }); } -/************************************************** -Export methods for use as module -**************************************************/ -export default studyBackrefs; - -if (process.argv[1] === fileURLToPath(import.meta.url)) { - const crawl = await loadCrawlResults(process.argv[2], process.argv[3]); - let htmlFragments = {}; - try { - console.info('Downloading HTML spec fragments data…'); - htmlFragments = await fetch('https://html.spec.whatwg.org/multipage/fragment-links.json').then(r => r.json()); - console.info('- done'); - } catch (err) { - console.error('- failed: could not fetch HTML fragments data, may report false positive broken links on HTML spec'); - } - - const results = studyBackrefs(crawl.ed, crawl.tr, htmlFragments, process.argv[4] ?? undefined); - console.log(results); -} +export default studyBackrefs; \ No newline at end of file diff --git a/src/lib/study-dfns.js b/src/lib/study-dfns.js new file mode 100644 index 00000000..b779b720 --- /dev/null +++ b/src/lib/study-dfns.js @@ -0,0 +1,478 @@ +/** + * The definitions checker compares CSS, dfns, and IDL extracts created by Reffy + * to detect CSS/IDL terms that do not have a corresponding dfn in the + * specification. + * + * Note: CSS extraction already relies on dfns and reports missing dfns in a + * "warnings" property. This checker simply looks at that list. + * + * @module checker + */ + +import path from 'node:path'; +import { fileURLToPath } from 'node:url'; +import loadJSON from '../lib/load-json.js'; + + +/** + * List of spec shortnames that, so far, don't follow the dfns data model + */ +const specsWithObsoleteDfnsModel = [ + 'svg-animations', 'svg-markers', 'svg-strokes', 'SVG2', + 'webgl1', 'webgl2', + 'webrtc-identity' +]; + + +/** + * Return true when provided arrays are "equal", meaning that they contain the + * same items + * + * @function + * @private + * @param {Array} a First array to compare + * @param {Array} b Second array to compare + * @return {boolean} True when arrays are equal + */ +function arraysEqual(a, b) { + return Array.isArray(a) && + Array.isArray(b) && + a.length === b.length && + a.every((val, index) => val === b[index]); +} + + +/** + * Return the list of expected definitions from the CSS extract + * + * @function + * @private + * @param {Object} css The root of the object that describes CSS terms in the + * CSS extract + * @return {Array} An array of expected definitions + */ +function getExpectedDfnsFromCSS(css) { + const expected = (css.warnings ?? []) + .filter(warning => warning.msg === 'Missing definition') + .map(warning => { + return { + linkingText: [warning.name], + type: warning.type, + 'for': warning.for + }; + }); + + return expected; +} + + +/** + * Return true when the given CSS definition matches the expected definition + * + * @function + * @private + * @param {Object} expected Expected definition + * @param {Object} actual Actual definition to check + * @return {Boolean} true when actual definition matches the expected one + */ +function matchCSSDfn(expected, actual) { + return arraysEqual(expected.linkingText, actual.linkingText) && + (!expected.for || arraysEqual(expected.for, actual.for)) && + (!expected.type || (expected.type === actual.type)); +} + + +/** + * Return the list of expected definitions from the IDL extract + * + * @function + * @private + * @param {Object} css The root of the object that describes IDL terms in the + * `idlparsed` extract. + * @return {Array} An array of expected definitions + */ +function getExpectedDfnsFromIdl(idl = {}) { + // Parse IDL names that the spec defines + const idlNames = Object.values(idl.idlNames || {}); + let expected = idlNames.map(name => getExpectedDfnsFromIdlDesc(name)).flat(); + + // Parse members of IDL names that the spec extends + const idlExtendedNames = Object.values(idl.idlExtendedNames || {}); + expected = expected.concat(idlExtendedNames.map(extended => + extended.map(name => getExpectedDfnsFromIdlDesc(name, { excludeRoot: true }))) + .flat(2)); + return expected; +} + + +/** + * Return true if the given parsed IDL object describes a default toJSON + * operation that references: + * https://heycam.github.io/webidl/#default-tojson-steps + * + * @function + * @private + * @param {Object} desc Parsed IDL object to check + * @return {Boolean} true when object describes a default toJSON operation. + */ +function isDefaultToJSONOperation(desc) { + return (desc.type === 'operation') && + (desc.name === 'toJSON') && + (desc.extAttrs && desc.extAttrs.find(attr => attr.name === "Default")); +} + + +/** + * Return the expected definition for the given parsed IDL structure + * + * @function + * @public + * @param {Object} desc The object that describes the IDL term in the + * `idlparsed` extract. + * @param {Object} parentDesc (optional) The object that describes the parent + * IDL term of the term to parse (used to compute the `for` property). + * @return {Object} The expected definition, or null if no expected definition + * is defined. + */ +function getExpectedDfnFromIdlDesc(idl, parentIdl) { + function serializeArgs(args = []) { + return args + .map(arg => arg.variadic ? `...${arg.name}` : arg.name) + .join(', '); + } + + let expected = { + linkingText: [idl.name], + type: idl.type, + 'for': parentIdl && (parentIdl !== idl) ? [parentIdl.name] : [] + }; + + switch (idl.type) { + case 'attribute': + case 'const': + break; + + case 'constructor': + // Ignore constructors for HTML elements, the spec has a dedicated + // section for them: + // https://html.spec.whatwg.org/multipage/dom.html#html-element-constructors + if (!parentIdl.name.startsWith('HTML')) { + expected.linkingText = [`constructor(${serializeArgs(idl.arguments)})`]; + } + else { + expected = null; + } + break; + + case 'enum': + break; + + case 'enum-value': + // The enumeration could include the empty string as a value. There + // cannot be a matching definition in that case. + // Note: look for the quoted value and the unquoted value + const value = idl.value.replace(/^"(.*)"$/, '$1'); + expected.linkingText = (value !== '') ? [`"${value}"`, value] : [`"${value}"`]; + break; + + case 'field': + expected.type = 'dict-member'; + break; + + case 'callback': + case 'callback interface': + case 'dictionary': + case 'interface': + case 'interface mixin': + case 'namespace': + expected.type = + (idl.type === 'callback interface') ? 'callback' : + (idl.type === 'interface mixin') ? 'interface' : + idl.type; + // Ignore partial definition + if (idl.partial) { + expected = null; + } + break; + + case 'includes': + expected = null; + break; + + case 'iterable': + case 'maplike': + case 'setlike': + // No definition expected for iterable, maplike and setlike members + expected = null; + break; + + case 'operation': + // Stringification behavior is typically defined with a + // "stringification behavior" definition scoped to the interface + if (idl.special === 'stringifier') { + expected.linkingText = ['stringification behavior', 'stringificationbehavior']; + expected.type = 'dfn'; + } + // Ignore special "getter", "setter", "deleter" operations when they don't + // have an identifier. They should link to a definition in the prose, but + // the labels seem arbitrary for now. + // Also ignore default toJSON operations. Steps are defined in WebIDL. + else if ((idl.name || + ((idl.special !== 'getter') && + (idl.special !== 'setter') && + (idl.special !== 'deleter'))) && + !isDefaultToJSONOperation(idl)) { + expected.linkingText = [`${idl.name}(${serializeArgs(idl.arguments)})`]; + expected.type = 'method'; + } + else { + expected = null; + } + break; + + case 'typedef': + break; + + case 'argument': + expected = null; + break; + + default: + console.warn('Unsupported IDL type', idl.type, idl); + expected = null; + break; + } + + return expected; +} + + +/** + * Return the list of expected definitions from a parsed IDL extract entry. + * + * The function is recursive. + * + * @function + * @private + * @param {Object} idl The object that describes the IDL term in the + * `idlparsed` extract. + * @return {Array} An array of expected definitions + */ +function getExpectedDfnsFromIdlDesc(idl, {excludeRoot} = {excludeRoot: false}) { + const res = []; + const parentIdl = idl; + const idlToProcess = excludeRoot ? [] : [idl]; + + switch (idl.type) { + case 'enum': + if (idl.values) { + idlToProcess.push(...idl.values); + } + break; + + case 'callback': + case 'callback interface': + case 'dictionary': + case 'interface': + case 'interface mixin': + case 'namespace': + if (idl.members) { + idlToProcess.push(...idl.members); + } + break; + } + + idlToProcess.forEach(idl => { + const expected = getExpectedDfnFromIdlDesc(idl, parentIdl); + if (expected) { + expected.access = 'public'; + expected.informative = false; + res.push(expected); + } + }); + + return res; +} + + +/** + * Return true when the given IDL definition matches the expected definition. + * + * The function handles overloaded methods, though not properly. That is, it + * will only find the "right" definition for an overloaded method if the number + * and/or the name of the arguments differ between the overloaded definitions. + * Otherwise it will just match the first definition that looks good. + * + * The function works around Respec's issue #3200 for methods and constructors + * that take only optional parameters: + * https://github.com/w3c/respec/issues/3200 + * + * @function + * @private + * @param {Object} expected Expected definition + * @param {Object} actual Actual definition to check + * @param {Object} options Comparison options + * @return {Boolean} true when actual definition matches the expected one + */ +function matchIdlDfn(expected, actual, + {skipArgs, skipFor, skipType} = {skipArgs: false, skipFor: false, skipType: false}) { + const fixedLt = actual.linkingText + .map(lt => lt.replace(/!overload-\d/, '')) + .map(lt => lt.replace(/\(, /, '(')); + let found = expected.linkingText.some(val => fixedLt.includes(val)); + if (!found && skipArgs) { + const names = fixedLt.map(lt => lt.replace(/\(.*\)/, '')); + found = expected.linkingText.some(val => { + const valname = val.replace(/\(.*\)/, ''); + return names.find(name => name === valname); + }); + } + return found && + (expected.for.every(val => actual.for.includes(val)) || skipFor) && + (expected.type === actual.type || skipType); +} + + +/** + * Checks the CSS and IDL extracts against the dfns extract for the given spec + * + * @function + * @public + * @param {Object} spec Crawl result for the spec to parse + * @return {Object} An object with a css and idl property, each of them holding + * an array of missing CSS or IDL definitions. The function returns null when + * there are no missing definitions. + */ +function checkSpecDefinitions(spec) { + if (specsWithObsoleteDfnsModel.includes(spec.shortname)) { + return { obsoleteDfnsModel: true }; + } + + const dfns = spec.dfns ?? []; + const css = spec.css ?? {}; + const idl = spec.idlparsed ?? {}; + + // Make sure that all expected CSS definitions exist in the dfns extract + const expectedCSSDfns = getExpectedDfnsFromCSS(css); + const missingCSSDfns = expectedCSSDfns.map(expected => { + let actual = dfns.find(dfn => matchCSSDfn(expected, dfn)); + if (!actual && !expected.type) { + // Right definition is missing. For valuespaces that define functions, + // look for a function definition without the enclosing "<>" instead + const altText = [expected.linkingText[0].replace(/^<(.*)\(\)>$/, '$1()')]; + actual = dfns.find(dfn => arraysEqual(altText, dfn.linkingText)); + } + if (!actual && expected.value) { + // Still missing? For valuespaces that define functions, this may be + // because there is no definition without parameters, try to find the + // actual value instead + actual = dfns.find(dfn => arraysEqual([expected.value], dfn.linkingText)); + } + if (actual) { + // Right definition found + return null; + } + else { + // Right definition is missing, there may be a definition that looks + // like the one we're looking for + const found = dfns.find(dfn => + arraysEqual(dfn.linkingText, expected.linkingText)); + return { expected, found }; + } + }).filter(missing => !!missing); + + // Make sure that all expected IDL definitions exist in the dfns extract + const expectedIdlDfns = getExpectedDfnsFromIdl(idl); + const missingIdlDfns = expectedIdlDfns.map(expected => { + let actual = dfns.find(dfn => matchIdlDfn(expected, dfn)); + if (actual) { + // Right definition found + return null; + } + else { + // Right definition is missing, include the interface's definitions to + // be able to link to it in the report + let parent = null; + if (expected.for && expected.for[0]) { + parent = dfns.find(dfn => + (dfn.linkingText[0] === expected.for[0]) && + ['callback', 'dictionary', 'enum', 'interface', 'namespace'].includes(dfn.type)); + } + + // Look for a definition that seems as close as possible to the one + // we're looking for, in the following order: + // 1. For operations, find a definition without taking arguments into + // account and report possible match with a "warning" flag. + // 2. For terms linked to a parent interface-like object, find a match + // scoped to the same parent without taking the type into account. + // 3. Look for a definition with the same name, neither taking the type + // nor the parent into account. + let found = dfns.find(dfn => matchIdlDfn(expected, dfn, { skipArgs: true })); + if (found) { + return { expected, found, for: parent, warning: true }; + } + found = dfns.find(dfn => matchIdlDfn(expected, dfn, + { skipArgs: true, skipType: true })); + if (found) { + return { expected, found, for: parent }; + } + found = dfns.find(dfn => matchIdlDfn(expected, dfn, + { skipArgs: true, skipType: true, skipFor: true })); + return { expected, found, for: parent }; + } + }).filter(missing => !!missing); + + // Report results + return { + css: missingCSSDfns, + idl: missingIdlDfns + }; +} + + +/** + * Format the anomaly message to report as Markdown + * + * @function + * @private + * @param {Object} missing Object that describes missing dfn + */ +function formatAnomalyMessage(missing) { + const exp = missing.expected; + const found = missing.found; + const foundFor = (found && found.for && found.for.length > 0) ? + ' for ' + found.for.map(f => `\`${f}\``).join(',') : + ''; + return '`' + exp.linkingText[0] + '` ' + + (exp.type ? `with type \`${exp.type}\`` : '') + + (missing.for ? ` for [\`${missing.for.linkingText[0]}\`](${missing.for.href})` : '') + + (found ? `, but found [\`${found.linkingText[0]}\`](${found.href}) with type \`${found.type}\`${foundFor}` : ''); +} + + +/** + * Checks the CSS and IDL extracts against the dfns extract for all specs in + * the report, and return a list of missing definitions. + * + * @function + * @public + */ +export default function studyDefinitions(specs) { + return specs + .map(spec => { + const missing = checkSpecDefinitions(spec); + const res = []; + for (const type of ['css', 'idl']) { + const anomalies = missing[type]; + for (const anomaly of anomalies) { + res.push({ + name: 'missingDfns', + message: formatAnomalyMessage(anomaly), + spec + }); + } + } + return res; + }) + .flat(); +} diff --git a/src/lib/study-refs.js b/src/lib/study-refs.js index f7542486..19bc99c6 100644 --- a/src/lib/study-refs.js +++ b/src/lib/study-refs.js @@ -1,31 +1,116 @@ -import { loadCrawlResults, recordCategorizedAnomaly } from './util.js'; -import { fileURLToPath } from 'node:url'; +import { canonicalizeUrl, canonicalizesTo } from './canonicalize-url.js'; -const possibleAnomalies = [ - 'discontinuedReferences' -]; +/** + * Helper function that returns true when the given URL seems to target a real + * "spec" (as opposed to, say, a Wiki page, or something else) + */ +const matchSpecUrl = url => + url.match(/spec.whatwg.org/) || + url.match(/www.w3.org\/TR\/[a-z0-9]/) || + (url.match(/w3c.github.io/) && ! url.match(/w3c.github.io\/test-results\//)); -function studyReferences (edResults) { - const report = []; - const recordAnomaly = recordCategorizedAnomaly(report, 'refs', possibleAnomalies); - edResults.forEach(spec => { - (spec.refs?.normative || []).forEach(ref => { - const referencedSpec = edResults.find(s => s.url === ref.url || s?.nightly?.url === ref.url || s?.nightly?.alternateUrls?.includes(ref.url)); +function studyReferences (specs, { crawlResults = null } = {}) { + crawlResults = crawlResults ?? specs; - if (referencedSpec && referencedSpec.standing === "discontinued") { + // Construct spec equivalence from the crawl report + const specEquivalents = {}; + for (const spec of crawlResults) { + for (const v of (spec.versions ?? [])) { + if (specEquivalents[v]) { + if (Array.isArray(specEquivalents[v])) { + specEquivalents[v].push(spec.url); + } + else { + specEquivalents[v] = [specEquivalents[v], spec.url]; + } + } + else { + specEquivalents[v] = spec.url; + } + } + } + + // Strong canonicalization options to find references + const useEquivalents = { + datedToLatest: true, + equivalents: specEquivalents + }; - const newSpecsLinks = edResults.filter(s => referencedSpec.obsoletedBy?.includes(s.shortname)).map(s => `[${s.shortname}](${s?.nightly?.url || s.url})`); - recordAnomaly(spec, 'discontinuedReferences', `[${ref.name}](${ref.url}) ${newSpecsLinks.length ? `has been obsoleted by ${newSpecsLinks}` : `is discontinued, no known replacement reference`}`); + const report = []; + for (const spec of specs) { + for (const ref of spec.refs?.normative ?? []) { + const referencedSpec = crawlResults.find(s => + s.url === ref.url || + s?.nightly?.url === ref.url || + s?.nightly?.alternateUrls?.includes(ref.url)); + if (referencedSpec && referencedSpec.standing === "discontinued") { + const newSpecsLinks = crawlResults + .filter(s => referencedSpec.obsoletedBy?.includes(s.shortname)) + .map(s => `[${s.shortname}](${s?.nightly?.url || s.url})`); + report.push({ + name: 'discontinuedReferences', + message: `[${ref.name}](${ref.url}) ${newSpecsLinks.length ? `has been obsoleted by ${newSpecsLinks}` : `is discontinued, no known replacement reference`}`, + spec + }); } - }); - }); + } + + // Detect links to external specifications within the body of the spec + // that do not have a corresponding entry in the list of references + // (all links to external specs should have a companion ref) + Object.keys(spec.links?.rawlinks ?? {}) + .filter(matchSpecUrl) + .filter(l => { + // Filter out "good" and "inconsistent" references + const canon = canonicalizeUrl(l, useEquivalents); + const refs = (spec.refs?.normative ?? []).concat(spec.refs?.informative ?? []); + return !refs.find(r => canonicalizesTo(r.url, canon, useEquivalents)); + }) + .filter(l => + // Ignore links to other versions of "self". There may + // be cases where it would be worth reporting them but + // most of the time they appear in "changelog" sections. + !canonicalizesTo(l, spec.url, useEquivalents) && + !canonicalizesTo(l, spec.versions, useEquivalents) + ) + .forEach(l => { + report.push({ + name: 'missingReferences', + message: l, + spec + }); + }); + + // Detect links to external specifications within the body of the spec + // that have a corresponding entry in the references, but for which the + // reference uses a different URL, e.g., because the link targets the + // Editor's Draft, whereas the reference targets the latest published + // version + Object.keys(spec.links?.rawlinks ?? {}) + .filter(matchSpecUrl) + .map(l => { + const canonSimple = canonicalizeUrl(l); + const canon = canonicalizeUrl(l, useEquivalents); + const refs = (spec.refs?.normative ?? []) + .concat(spec.refs?.informative ?? []); + + // Filter out "good" references + if (refs.find(r => canonicalizesTo(r.url, canonSimple))) { + return null; + } + const ref = refs.find(r => canonicalizesTo(r.url, canon, useEquivalents)); + return (ref ? { link: l, ref } : null); + }) + .filter(anomaly => !!anomaly) + .forEach(anomaly => { + report.push({ + name: 'inconsistentReferences', + message: `${anomaly.link}, related reference "${anomaly.ref.name}" uses URL ${anomaly.ref.url}`, + spec + }); + }); + } return report; } export default studyReferences; - -if (process.argv[1] === fileURLToPath(import.meta.url)) { - const crawl = await loadCrawlResults(process.argv[2]); - const results = studyReferences(crawl.ed); - console.log(results); -} diff --git a/src/lib/study-webidl.js b/src/lib/study-webidl.js index fc09365f..30e9b5ab 100644 --- a/src/lib/study-webidl.js +++ b/src/lib/study-webidl.js @@ -5,52 +5,26 @@ * object structure: * * { - * "category": "webidl", * "name": "type of anomaly", * "message": "Description of the anomaly", - * "specs": [ - * { spec that contains or triggers the anomaly }, - * { another spec that contains or triggers the anomaly }, - * ... - * ] + * "spec": { spec that contains or triggers the anomaly } * } + * + * Some anomalies may be associated with more than one spec, when the code + * cannot tell which spec needs fixing (e.g., when checking duplicates while + * merging partials). In such cases, the `spec` property is replaced by a + * `specs` property that contains an array of specs. * - * All anomalies will be associated with at least one spec (so specs.length > 0) - * but some of them may be associated with more than one, when the code cannot - * tell which of them needs to be fixed (e.g. when checking duplicates while - * merging partials). - * - * The spec object returned in the "specs" array is the spec object provided in - * the crawl results parameter. + * The spec object returned in the `spec` and `specs` properties is the spec + * object provided in the crawl results parameter. */ -import { recordCategorizedAnomaly } from './util.js'; import * as WebIDL2 from 'webidl2'; const getSpecs = list => [...new Set(list.map(({ spec }) => spec))]; const specName = spec => spec.shortname ?? spec.url; const dfnName = dfn => `${dfn.idl.partial ? 'partial ' : ''}${dfn.idl.type} "${dfn.idl.name}"`; -const possibleAnomalies = [ - 'incompatiblePartialIdlExposure', - 'invalid', - 'noExposure', - 'noOriginalDefinition', - 'overloaded', - 'redefined', - 'redefinedIncludes', - 'redefinedMember', - 'redefinedWithDifferentTypes', - 'singleEnumValue', - 'unexpectedEventHandler', - 'unknownExposure', - 'unknownExtAttr', - 'unknownType', - 'wrongCaseEnumValue', - 'wrongKind', - 'wrongType' -]; - const basicTypes = new Set([ // Types defined by Web IDL itself: 'any', // https://webidl.spec.whatwg.org/#idl-any @@ -192,7 +166,7 @@ function describeMember (member) { return desc; } -function studyWebIdl (edResults, curatedResults) { +function studyWebIdl (specs, { curatedResults = [] } = {}) { const report = []; // List of anomalies to report const dfns = {}; // Index of IDL definitions (save includes) const includesStatements = {}; // Index of "includes" statements @@ -201,7 +175,14 @@ function studyWebIdl (edResults, curatedResults) { const usedExtAttrs = {}; // Index of extended attributes // Record an anomaly for the given spec(s). - const recordAnomaly = recordCategorizedAnomaly(report, 'webidl', possibleAnomalies); + function recordAnomaly (spec, name, message) { + if (Array.isArray(spec)) { + report.push({ name, message, specs: spec }); + } + else { + report.push({ name, message, spec }); + } + } function inheritsFrom (iface, ancestor) { if (!iface.inheritance) return false; @@ -397,7 +378,7 @@ function studyWebIdl (edResults, curatedResults) { } } - edResults + specs // We're only interested in specs that define Web IDL content .filter(spec => !!spec.idl) @@ -666,7 +647,4 @@ function studyWebIdl (edResults, curatedResults) { return report; } -/************************************************** -Export methods for use as module -**************************************************/ export default studyWebIdl; diff --git a/src/lib/study.js b/src/lib/study.js new file mode 100644 index 00000000..20cb96f6 --- /dev/null +++ b/src/lib/study.js @@ -0,0 +1,498 @@ +import studyDfns from './study-dfns.js'; +import studyAlgorithms from './study-algorithms.js'; +import studyBackrefs from './study-backrefs.js'; +import studyRefs from './study-refs.js'; +import studyWebIdl from './study-webidl.js'; +import isInMultiSpecRepository from './is-in-multi-spec-repo.js'; +import { recordCategorizedAnomaly } from './util.js'; + +/** + * List of anomalies, grouped per study function + */ +const anomalyGroups = [ + { + name: 'generic', + title: 'Generic', + description: 'The following errors prevented the spec from being analyzed', + types: [ + { + name: 'error', + title: 'Crawl error', + description: 'The following crawl errors occurred' + } + ], + study: (specs) => specs + .filter(spec => !!spec.error) + .map(spec => Object.assign( + { name: 'error', message: spec.error, spec } + )) + }, + + { + name: 'dfns', + title: 'Problems with definitions', + description: 'The following problems were identified in term definitions', + types: [ + { + name: 'missingDfns', + title: 'Missing definitions', + description: 'The following constructs were found without a definition' + } + ], + study: studyDfns + }, + + { + name: 'backrefs', + title: 'Problems with links to other specs', + description: 'The following problems were identified when analyzing links to other specifications', + types: [ + { + name: 'brokenLinks', + title: 'Broken links', + description: 'The following links to other specifications were detected as pointing to non-existing anchors' + }, + { + name: 'datedUrls', + title: 'Links to dated TR URLs', + description: 'The following links target a dated version of a specification' + }, + { + name: 'evolvingLinks', + title: 'Links to now gone anchors', + description: 'The following links in the specification link to anchors that no longer exist in the Editor\'s Draft of the targeted specification' + }, + { name: 'frailLinks', title: 'Unstable link anchors' }, + { + name: 'nonCanonicalRefs', + title: 'Non-canonical links', + description: 'The following links were detected as pointing to outdated URLs' + }, + { + name: 'notDfn', + title: 'Links to unofficial anchors', + description: 'The following links were detected as pointing to anchors that are neither definitions or headings in the targeted specification' + }, + { + name: 'notExported', + title: 'Links to non-exported definitions', + description: 'The following links were detected as pointing to a private definition in the targeted specification' + }, + { + name: 'outdatedSpecs', + title: 'Outdated references', + description: 'The following links were detected as pointing to outdated specifications' + }, + { + name: 'unknownSpecs', + title: 'Links to unknown specs', + description: 'The following links were detected as pointing to documents that are not recognized as specifications' + } + ], + study: studyBackrefs, + studyParams: ['tr'] + }, + + { + name: 'algorithms', + title: 'Problems with algorithms', + description: 'The following problems were identified when analyzing algorithms', + types: [ + { + name: 'missingTaskForPromise', + title: 'Missing tasks in parallel steps to handle a promise', + description: 'The following algorithms resolve or reject a Promise within a step that runs [in parallel](https://html.spec.whatwg.org/multipage/infrastructure.html#in-parallel) without first queuing a task' + }, + { + name: 'missingTaskForEvent', + title: 'Missing tasks in parallel steps to fire an event', + description: 'The following algorithms fire an event within a step that runs [in parallel](https://html.spec.whatwg.org/multipage/infrastructure.html#in-parallel) without first queuing a task' + } + ], + study: studyAlgorithms + }, + + { + name: 'refs', + title: 'Problems with references', + description: 'The following problems were identified when analyzing the list of references', + types: [ + { + name: 'discontinuedReferences', + title: 'Normative references to discontinued specs', + description: 'The following normative references were detected as pointing to discontinued specifications' + }, + { + name: 'missingReferences', + title: 'Missing references', + description: 'The following links target specifications that are not mentioned in the list of references' + }, + { + name: 'inconsistentReferences', + title: 'Inconsistent reference links', + description: 'The following links use a different URL for the targeted specification from the URL defined in the references' + } + ], + study: studyRefs + }, + + { + name: 'webidl', + title: 'Web IDL problems', + description: 'The following Web IDL problems were identified', + types: [ + { name: 'incompatiblePartialIdlExposure', title: 'Incompatible `[Exposed]` attribute in partial definitions' }, + { name: 'invalid', title: 'Invalid Web IDL' }, + { name: 'noExposure', title: 'Missing `[Exposed]` attributes' }, + { name: 'noOriginalDefinition', title: 'Missing base interfaces' }, + { name: 'overloaded', title: 'Invalid overloaded operations' }, + { name: 'redefined', title: 'Duplicated IDL names' }, + { name: 'redefinedIncludes', title: 'Duplicated `includes` statements' }, + { name: 'redefinedMember', title: 'Duplicated members' }, + { name: 'redefinedWithDifferentTypes', title: 'Duplicated IDL names with different types' }, + { name: 'singleEnumValue', title: 'Enums with a single value' }, + { name: 'unexpectedEventHandler', title: 'Missing `EventTarget` inheritances' }, + { name: 'unknownExposure', title: 'Unknown globals in `[Exposed]` attribute' }, + { name: 'unknownExtAttr', title: 'Unknown extended attributes' }, + { name: 'unknownType', title: 'Unknown Web IDL type' }, + { name: 'wrongCaseEnumValue', title: 'Enums with wrong casing' }, + { name: 'wrongKind', title: 'Invalid inheritance chains' }, + { name: 'wrongType', title: 'Web IDL names incorrectly used as types' } + ], + study: studyWebIdl, + studyParams: ['curated'] + } +]; + + +/** + * Possible report structures + */ +const reportStructures = [ + 'flat', + 'type+spec', + 'group+spec>type', + 'spec>type', + 'spec>group>type', + 'type>spec', + 'group>type>spec', + 'group>spec>type' +]; + + +// Compute mapping between an anomaly type and its parent group +const anomalyToGroup = {}; +for (const group of anomalyGroups) { + for (const type of group.types) { + anomalyToGroup[type.name] = group; + } +} + +/** + * Return an object that describes the requested anomaly type + */ +function getAnomalyType(name) { + for (const group of anomalyGroups) { + const type = group.types.find(t => t.name === name); + if (type) { + return Object.assign({}, type); + } + } + return null; +} + +/** + * Return an object that describes the requested anomaly group + */ +function getAnomalyGroup(name) { + for (const group of anomalyGroups) { + if (group.name === name) { + return { + name: group.name, + title: group.title + }; + } + } + return null; +} + +/** + * Return an object that describes the requested anomaly group + * from the given anomaly type + */ +function getAnomalyGroupFromType(type) { + const name = anomalyToGroup[type]; + return getAnomalyGroup(name); +} + + +/** + * Structure a flat list of anomalies to the requested structure + */ +function structureResults(structure, anomalies, crawlResults) { + const levels = structure.split('>') + .map(level => level.replace(/\s+/g, '')); + const report = []; + + switch (levels[0]) { + case 'flat': + for (const anomaly of anomalies) { + report.push(anomaly); + } + break; + + case 'type+spec': + for (const anomaly of anomalies) { + const type = getAnomalyType(anomaly.name) + for (const spec of anomaly.specs) { + let entry = report.find(entry => + entry.type.name === anomaly.name && + entry.spec.shortname === spec.shortname); + if (!entry) { + const titlePrefix = isInMultiSpecRepository(spec, crawlResults) ? + `[${spec.shortname}] ` : ''; + entry = { + title: `${titlePrefix}${type.title} in ${spec.title}`, + type, spec, anomalies: [] + }; + report.push(entry); + } + entry.anomalies.push(anomaly); + } + } + break; + + case 'group+spec': + for (const anomaly of anomalies) { + const group = anomalyToGroup[anomaly.name]; + for (const spec of anomaly.specs) { + let entry = report.find(entry => + entry.group.name === group.name && + entry.spec.shortname === spec.shortname); + if (!entry) { + const titlePrefix = isInMultiSpecRepository(spec, crawlResults) ? + `[${spec.shortname}] ` : ''; + entry = { + title: `${titlePrefix}${group.title} in ${spec.title}`, + group, spec, anomalies: [] + }; + report.push(entry); + } + entry.anomalies.push(anomaly); + } + } + break; + + case 'spec': + for (const anomaly of anomalies) { + for (const spec of anomaly.specs) { + let entry = report.find(entry => + entry.spec.shortname === spec.shortname); + if (!entry) { + entry = { + title: spec.title, + spec, anomalies: [] + }; + report.push(entry); + } + entry.anomalies.push(anomaly); + } + } + break; + + case 'type': + for (const anomaly of anomalies) { + const type = getAnomalyType(anomaly.name); + let entry = report.find(entry => entry.type.name === anomaly.name); + if (!entry) { + entry = { + title: type.title, + type, anomalies: [] + }; + report.push(entry); + } + entry.anomalies.push(anomaly); + } + break; + + case 'group': + for (const anomaly of anomalies) { + const group = anomalyToGroup[anomaly.name]; + let entry = report.find(entry => entry.group.name === group.name); + if (!entry) { + entry = { + title: group.title, + group, anomalies: [] + }; + report.push(entry); + } + entry.anomalies.push(anomaly); + } + break; + } + + if (levels.length > 1) { + const itemsStructure = levels.slice(1).join('>'); + for (const entry of report) { + entry.items = structureResults(itemsStructure, entry.anomalies, crawlResults); + delete entry.anomalies; + } + } + return report; +} + + +function makeLowerCase(description) { + return description.charAt(0).toLowerCase() + description.slice(1); +} + +function pad(str, depth) { + while (depth > 1) { + str = ' ' + str; + depth -= 1; + } + return str; +} + +function serializeEntry(entry, depth = 0) { + let res = ''; + if (entry.spec && entry.group) { + res = `While crawling [${entry.spec.title}](${entry.spec.crawled}), ${makeLowerCase(entry.group.description ?? entry.group.title)}:`; + } + else if (entry.spec && entry.type) { + res = `While crawling [${entry.spec.title}](${entry.spec.crawled}), ${makeLowerCase(entry.type.description ?? entry.type.title)}:`; + } + else if (entry.group) { + if (depth === 0) { + res = (entry.group.description ?? entry.group.title) + ':'; + } + else { + res = pad(`* ${entry.group.title}`, depth); + } + } + else if (entry.type) { + if (depth === 0) { + res = (entry.type.description ?? entry.type.title) + ':'; + } + else { + res = pad(`* ${entry.type.title}`, depth); + } + } + else if (entry.spec) { + if (depth === 0) { + res = `While crawling [${entry.spec.title}](${entry.spec.crawled}), the following anomalies were identified:`; + } + else { + res = pad(`* [${entry.spec.title}](${entry.spec.crawled})`, depth); + } + } + else if (entry.message) { + res = pad(`* ${entry.message}`, depth); + } + + for (const item of entry.items ?? []) { + res += '\n' + serializeEntry(item, depth + 1); + } + for (const anomaly of entry.anomalies ?? []) { + res += `\n` + serializeEntry(anomaly, depth + 1); + } + + return res; +} + + +/** + * Format the structured report as JSON or markdown, or a combination of both + */ +function formatReport(format, report) { + if (format === 'json') { + return report; + } + else if (format === 'issue') { + return report.map(entry => Object.assign({ + title: entry.title, + content: serializeEntry(entry) + })); + } + else if (format === 'full') { + return [ + { + title: 'Study report', + content: report.map(entry => serializeEntry(entry)) + } + ] + } +} + + +/** + * Main function that studies a crawl result and returns a structured + * report. + */ +export default async function study(specs, options) { + options = Object.assign({}, options ?? {}); + const what = options.what ?? ['all']; + const structure = options.structure ?? 'type + spec'; + const format = options.format ?? 'issue'; + + if (!what.includes('all')) { + const validWhat = what.every(name => + group.find(g => g.name === name || g.types.find(t => t.name === name))); + if (!validWhat) { + throw new Error('Invalid `what` option'); + } + } + if (!reportStructures.find(s => structure.replace(/\s+/g, '') === s)) { + throw new Error('Invalid `structure` option'); + } + + // Only keep specs that caller wants to study + // (but note study functions that analyze references need the whole list!) + options.crawlResults = specs; + if (options.specs) { + specs = options.crawlResults.filter(spec => specs.find(s => s.shortname === spec.shortname)); + } + + // Anomalies are studied in groups of related anomalies, let's compute the + // studies that we need to run to answer the request + const groups = anomalyGroups.filter(group => + what.includes('all') || + what.includes(group.name) || + group.types.find(type => what.includes(type.name))); + + // Run studies and fill the anomaly report accordingly + let anomalies = []; + for (const group of groups) { + const studyResult = await group.study(specs, options); + const recordAnomaly = recordCategorizedAnomaly( + anomalies, group.name, group.types.map(t => t.name)); + studyResult.map(an => recordAnomaly(an.spec ?? an.specs, an.name, an.message)); + } + + // Only keep anomalies whose types we're interested in + anomalies = anomalies.filter(anomaly => + what.includes('all') || + what.includes(anomaly.name) || + what.includes(anomalyToGroup[anomaly.name].name)); + + // Now that we have a flat report of anomalies, + // let's structure and serialize it as requested + const report = structureResults(structure, anomalies, options.crawlResults); + + // And serialize it using the right format + const result = { + type: 'study', + date: (new Date()).toJSON(), + structure, + what, + stats: { + crawled: options.crawlResults.length, + studied: specs.length, + anomalies: anomalies.length + }, + results: formatReport(format, report) + }; + + // Return the structured report + return result; +} \ No newline at end of file diff --git a/test/study-algorithms.js b/test/study-algorithms.js new file mode 100644 index 00000000..c5b19888 --- /dev/null +++ b/test/study-algorithms.js @@ -0,0 +1,37 @@ +import study from '../src/lib/study-algorithms.js'; +import { assertNbAnomalies, assertAnomaly } from './util.js'; + +describe('The algorithms analyser', () => { + const specUrl = 'https://www.w3.org/TR/spec'; + const specUrl2 = 'https://www.w3.org/TR/spec2'; + + function toCrawlResult(algorithms) { + return [{ url: specUrl, algorithms }]; + } + + it('reports no anomaly if there are no algorithms', () => { + const crawlResult = toCrawlResult([]); + const report = study(crawlResult); + assertNbAnomalies(report, 0); + }); + + it('reports an error when a step resolves a promise in parallel', () => { + const crawlResult = toCrawlResult([ + { + html: 'The encodingInfo() method MUST run the following steps:', + rationale: 'if', + steps: [ + { html: 'Let p be a new promise.' }, + { html: 'In parallel, run the Create a MediaCapabilitiesEncodingInfo algorithm with configuration and resolve p with its result.' }, + { html: 'Return p.' } + ] + } + ]); + const report = study(crawlResult); + assertAnomaly(report, 0, { + name: 'missingTaskForPromise', + message: 'The algorithm that starts with "The encodingInfo() method MUST run the following steps:" has a parallel step that resolves/rejects a promise directly', + spec: { url: 'https://www.w3.org/TR/spec' } + }); + }); +}); \ No newline at end of file diff --git a/test/study-backrefs.js b/test/study-backrefs.js index ea755137..8e6e4eac 100644 --- a/test/study-backrefs.js +++ b/test/study-backrefs.js @@ -3,7 +3,7 @@ */ /* global describe, it */ -import studyBackrefs from '../src/lib/study-backrefs.js'; +import study from '../src/lib/study-backrefs.js'; import { assertNbAnomalies, assertAnomaly } from './util.js'; const specEdUrl = 'https://w3c.github.io/spec/'; @@ -48,28 +48,33 @@ const populateSpec = (url, ids, links, dfns) => { function toCrawlResults (ids, links, trIds = ids) { return { - ed: [populateSpec(specEdUrl, toFullIds(specEdUrl, ids), []), - populateSpec(specEdUrl2, [], toLinks(specEdUrl, links))], - tr: [populateSpec(specEdUrl, toFullIds(specEdUrl, trIds), [])] + ed: [ + populateSpec(specEdUrl, toFullIds(specEdUrl, ids), []), + populateSpec(specEdUrl2, [], toLinks(specEdUrl, links)) + ], + tr: [ + populateSpec(specEdUrl, toFullIds(specEdUrl, trIds), []) + ] }; } describe('The links analyser', () => { - it('reports no anomaly if links are valid', () => { + it('reports no anomaly if links are valid', async () => { const ids = ['validid']; const crawlResult = toCrawlResults(ids, ids); - const report = studyBackrefs(crawlResult.ed, crawlResult.tr); + const report = await study(crawlResult.ed, { htmlFragments: {} }); assertNbAnomalies(report, 0); }); - it('reports a broken link', () => { + it('reports a broken link', async () => { const ids = ['validid']; const crawlResult = toCrawlResults([], ids); - const report = studyBackrefs(crawlResult.ed, crawlResult.tr); + const report = await study(crawlResult.ed, { htmlFragments: {} }); assertNbAnomalies(report, 1); assertAnomaly(report, 0, { - category: 'links', - message: specEdUrl + '#' + ids[0] + name: 'brokenLinks', + message: specEdUrl + '#' + ids[0], + spec: { url: 'https://www.w3.org/TR/spec2/' } }); }); diff --git a/test/study-dfns.js b/test/study-dfns.js new file mode 100644 index 00000000..b656c52c --- /dev/null +++ b/test/study-dfns.js @@ -0,0 +1,39 @@ +import studyDefinitions from '../src/lib/study-dfns.js'; +import { assertNbAnomalies, assertAnomaly } from './util.js'; + +describe('The definitions analyser', () => { + const specUrl = 'https://www.w3.org/TR/spec'; + const specUrl2 = 'https://www.w3.org/TR/spec2'; + + function toCrawlResult({ css = {}, dfns = [], idlparsed = {} }) { + const crawlResult = [{ + url: specUrl, + css, dfns, idlparsed + }]; + return crawlResult; + } + + it('reports no anomaly if there are no definitions', () => { + const crawlResult = toCrawlResult({}); + const report = studyDefinitions(crawlResult); + assertNbAnomalies(report, 0); + }); + + it('reports missing definition anomalies from CSS extracts', () => { + const crawlResult = toCrawlResult({ + css: { + warnings: [{ + msg: 'Missing definition', + name: 'no-def', + type: 'value' + }] + } + }); + const report = studyDefinitions(crawlResult); + assertAnomaly(report, 0, { + name: 'missingDfns', + message: '`no-def` with type `value`', + spec: { url: 'https://www.w3.org/TR/spec' } + }); + }); +}); \ No newline at end of file diff --git a/test/study-refs.js b/test/study-refs.js index 11a9db18..3250e183 100644 --- a/test/study-refs.js +++ b/test/study-refs.js @@ -3,7 +3,7 @@ */ /* global describe, it */ -import studyReferences from '../src/lib/study-refs.js'; +import study from '../src/lib/study-refs.js'; import { assertNbAnomalies, assertAnomaly } from './util.js'; const specEdUrl = 'https://w3c.github.io/spec/'; @@ -14,6 +14,9 @@ function toRefs (name, url) { return [ {name, url} ]; } +const toTr = url => url.replace( + 'https://w3c.github.io', + 'https://www.w3.org/TR'); const populateSpec = (url, refs = [], standing = "good", obsoletedBy) => { const shortname = url.slice(0, -1).split('/').pop(); @@ -25,6 +28,9 @@ const populateSpec = (url, refs = [], standing = "good", obsoletedBy) => { nightly: { url }, + release: { + url: toTr(url) + }, shortname, standing, obsoletedBy @@ -39,31 +45,62 @@ function toEdCrawlResults (standing = "good", replacements) { ]; } -describe('The reference analyser', () => { +describe('The references analyser', () => { it('reports no anomaly if references are not discontinued', () => { const crawlResult = toEdCrawlResults(); - const report = studyReferences(crawlResult); + const report = study(crawlResult); assertNbAnomalies(report, 0); }); it('reports a discontinued reference with a replacement', () => { const crawlResult = toEdCrawlResults("discontinued", ["spec3"]); - const report = studyReferences(crawlResult); + const report = study(crawlResult); assertNbAnomalies(report, 1); assertAnomaly(report, 0, { - category: 'refs', - message: /spec3/ + name: 'discontinuedReferences', + message: /spec3/, + spec: { url: specEdUrl } }); }); it('reports a discontinued reference without a replacement', () => { const crawlResult = toEdCrawlResults("discontinued"); - const report = studyReferences(crawlResult); + const report = study(crawlResult); + assertNbAnomalies(report, 1); + assertAnomaly(report, 0, { + name: 'discontinuedReferences', + message: /no known replacement/, + spec: { url: specEdUrl } + }); + }); + + it('reports a missing reference', () => { + const spec = populateSpec(specEdUrl); + spec.links = { rawlinks: {} }; + spec.links.rawlinks[specEdUrl2] = {}; + const crawlResult = [spec]; + const report = study(crawlResult); assertNbAnomalies(report, 1); assertAnomaly(report, 0, { - category: 'refs', - message: /no known replacement/ + name: 'missingReferences', + message: specEdUrl2, + spec: { url: specEdUrl } }); }); + it('reports an inconsistent reference', () => { + const spec = populateSpec(specEdUrl, toRefs('spec2', toTr(specEdUrl2))); + spec.links = { rawlinks: {} }; + spec.links.rawlinks[specEdUrl2] = {}; + const spec2 = populateSpec(specEdUrl2); + spec2.versions = [toTr(specEdUrl2)]; + const crawlResult = [spec, spec2]; + const report = study(crawlResult); + assertNbAnomalies(report, 1); + assertAnomaly(report, 0, { + name: 'inconsistentReferences', + message: `${specEdUrl2}, related reference "spec2" uses URL ${toTr(specEdUrl2)}`, + spec: { url: specEdUrl } + }); + }); }); diff --git a/test/study-webidl.js b/test/study-webidl.js index 4660b0e0..52e13a3e 100644 --- a/test/study-webidl.js +++ b/test/study-webidl.js @@ -4,7 +4,7 @@ */ /* global describe, it */ -import studyWebIdl from '../src/lib/study-webidl.js'; +import study from '../src/lib/study-webidl.js'; import { assertNbAnomalies, assertAnomaly } from './util.js'; describe('The Web IDL analyser', () => { @@ -21,7 +21,7 @@ describe('The Web IDL analyser', () => { function analyzeIdl (idl, idlSpec2) { const crawlResult = toCrawlResult(idl, idlSpec2); - return studyWebIdl(crawlResult); + return study(crawlResult); } it('reports no anomaly if IDL is valid', () => { @@ -86,12 +86,11 @@ interface Invalid; `); assertNbAnomalies(report, 1); assertAnomaly(report, 0, { - category: 'webidl', name: 'invalid', message: `Syntax error at line 3, since \`interface Invalid\`: interface Invalid; ^ Bodyless interface`, - specs: [{ url: specUrl }] + spec: { url: specUrl } }); }); @@ -105,11 +104,11 @@ interface Invalid; [Global=Window,Exposed=*] interface Valid: Invalid {}; `); - const curatedResult = toCrawlResult(` + const curatedResults = toCrawlResult(` [Global=Window,Exposed=*] interface Invalid{}; `); - const report = studyWebIdl(crawlResult, curatedResult); + const report = study(crawlResult, { curatedResults }); assertNbAnomalies(report, 1); assertAnomaly(report, 0, { name: 'invalid' }); }); diff --git a/test/study.js b/test/study.js new file mode 100644 index 00000000..986813a6 --- /dev/null +++ b/test/study.js @@ -0,0 +1,150 @@ +import study from '../src/lib/study.js'; +import { assertNbAnomalies, assertAnomaly } from './util.js'; + +const specUrl = 'https://w3c.github.io/world/'; +const specUrl2 = 'https://w3c.github.io/universe/'; + +function toTr(url) { + return url.replace('https://w3c.github.io', 'https://www.w3.org/TR'); +} + +function populateSpec(url, crawl) { + const shortname = url.slice(0, -1).split('/').pop(); + const spec = Object.assign({ + shortname, + title: `Hello ${shortname} API`, + url: toTr(url), + nightly: { url }, + release: { url: toTr(url) }, + crawled: url + }, crawl); + return spec; +} + +describe('The main study function', function () { + this.slow(5000); + this.timeout(10000); + + it('reports no anomaly when spec is empty', async function() { + const crawlResult = [{ url: specUrl }]; + const report = await study(crawlResult, { htmlFragments: {} }); + assertNbAnomalies(report.results, 0); + }); + + it('reports anomalies per type and spec by default', async function() { + const crawlResult = [ + populateSpec(specUrl, { error: 'Boo' }), + populateSpec(specUrl2, { error: 'Borked' }) + ]; + const report = await study(crawlResult, { htmlFragments: {} }); + assertNbAnomalies(report.results, 2); + assertAnomaly(report.results, 0, { + title: 'Crawl error in Hello world API', + content: `While crawling [Hello world API](${specUrl}), the following crawl errors occurred:\n* Boo` + }); + assertAnomaly(report.results, 1, { + title: 'Crawl error in Hello universe API', + content: `While crawling [Hello universe API](${specUrl2}), the following crawl errors occurred:\n* Borked` + }); + }); + + it('reports anomalies per type when asked', async function() { + const crawlResult = [ + populateSpec(specUrl, { error: 'Boo' }), + populateSpec(specUrl2, { error: 'Borked' }) + ]; + const report = await study(crawlResult, { structure: 'type>spec', htmlFragments: {} }); + assertNbAnomalies(report.results, 1); + assertAnomaly(report.results, 0, { + title: 'Crawl error', + content: `The following crawl errors occurred: +* [Hello world API](https://w3c.github.io/world/) + * Boo +* [Hello universe API](https://w3c.github.io/universe/) + * Borked` + }); + }); + + it('reports anomalies per spec when asked', async function() { + const crawlResult = [ + populateSpec(specUrl, { error: 'Boo' }), + populateSpec(specUrl2, { error: 'Borked' }) + ]; + const report = await study(crawlResult, { structure: 'spec>type', htmlFragments: {} }); + assertNbAnomalies(report.results, 2); + assertAnomaly(report.results, 0, { + title: 'Hello world API', + content: `While crawling [Hello world API](https://w3c.github.io/world/), the following anomalies were identified: +* Crawl error + * Boo` + }); + }); + + it('reports anomalies per spec and groups anomalies when asked', async function() { + const crawlResult = [ + populateSpec(specUrl, { error: 'Boo' }), + populateSpec(specUrl2, { error: 'Borked' }) + ]; + const report = await study(crawlResult, { structure: 'spec>group>type', htmlFragments: {} }); + assertNbAnomalies(report.results, 2); + assertAnomaly(report.results, 0, { + title: 'Hello world API', + content: `While crawling [Hello world API](https://w3c.github.io/world/), the following anomalies were identified: +* Generic + * Crawl error + * Boo` + }); + }); + + it('reports anomalies per group and spec when asked', async function() { + const crawlResult = [ + populateSpec(specUrl, { error: 'Boo' }), + populateSpec(specUrl2, { error: 'Borked' }) + ]; + const report = await study(crawlResult, { structure: 'group+spec>type', htmlFragments: {} }); + assertNbAnomalies(report.results, 2); + assertAnomaly(report.results, 0, { + title: 'Generic in Hello world API', + content: `While crawling [Hello world API](https://w3c.github.io/world/), the following errors prevented the spec from being analyzed: +* Crawl error + * Boo` + }); + }); + + it('reports anomalies per group, with anomaly type as intermediary level, when asked', async function() { + const crawlResult = [ + populateSpec(specUrl, { error: 'Boo' }), + populateSpec(specUrl2, { error: 'Borked' }) + ]; + const report = await study(crawlResult, { structure: 'group>type>spec', htmlFragments: {} }); + assertNbAnomalies(report.results, 1); + assertAnomaly(report.results, 0, { + title: 'Generic', + content: `The following errors prevented the spec from being analyzed: +* Crawl error + * [Hello world API](https://w3c.github.io/world/) + * Boo + * [Hello universe API](https://w3c.github.io/universe/) + * Borked` + }); + }); + + it('reports anomalies per group, with spec as intermediary level, when asked', async function() { + const crawlResult = [ + populateSpec(specUrl, { error: 'Boo' }), + populateSpec(specUrl2, { error: 'Borked' }) + ]; + const report = await study(crawlResult, { structure: 'group>spec>type', htmlFragments: {} }); + assertNbAnomalies(report.results, 1); + assertAnomaly(report.results, 0, { + title: 'Generic', + content: `The following errors prevented the spec from being analyzed: +* [Hello world API](https://w3c.github.io/world/) + * Crawl error + * Boo +* [Hello universe API](https://w3c.github.io/universe/) + * Crawl error + * Borked` + }); + }); +}); \ No newline at end of file From 8f3376172b3af8e35e855bebdd7519f9a3506ab2 Mon Sep 17 00:00:00 2001 From: Francois Daoust Date: Wed, 21 Aug 2024 14:22:51 +0200 Subject: [PATCH 2/9] Re-write the main strudy CLI Time to rewrite the CLI. The CLI now gets divided into commands. The only available command for now is `inspect`, but a `view` command could perhaps be created afterwards to create views on the crawl report, such as the previous dependencies report (what specs have a normative reference on a given spec). New options got added to the CLI: ``` -f, --format report markdown or json (default: "markdown") -i, --issues report issues as markdown files in the given folder -m, --max maximum number of issue files to create/update (default: 0) --structure report structure (default: "type+spec") --update-mode what issue files to update (default: "new") -w, --what what to analyze (default: ["all"]) ``` Library code updated to fix a few bugs, integrate feedback, and complete the logic where needed. Tests remain fairly minimal for now. --- src/lib/study-backrefs.js | 4 +- src/lib/study-dfns.js | 28 +-- src/lib/study-webidl.js | 20 ++- src/lib/study.js | 160 +++++++++++------ strudy.js | 354 +++++++++++++++++++++++--------------- test/cli.js | 52 ++++-- test/data/empty.json | 3 + test/study.js | 65 ++++--- 8 files changed, 440 insertions(+), 246 deletions(-) create mode 100644 test/data/empty.json diff --git a/src/lib/study-backrefs.js b/src/lib/study-backrefs.js index 2d046d82..0d8612d8 100644 --- a/src/lib/study-backrefs.js +++ b/src/lib/study-backrefs.js @@ -116,7 +116,7 @@ function computeShortname(url) { return url; } - throw `Cannot extract meaningful name from ${url}`; + throw new Error(`Cannot extract meaningful name from ${url}`); } // Parse the URL to extract the name @@ -127,7 +127,7 @@ function computeShortname(url) { // only contains a dot for fractional levels at the end of the name // (e.g. "blah-1.2" is good but "blah.blah" and "blah-3.1-blah" are not) if (!name.match(/^[\w\-]+((?<=\-v?\d+)\.\d+)?$/)) { - throw `Specification name contains unexpected characters: ${name} (extracted from ${url})`; + throw new Error(`Specification name contains unexpected characters: ${name} (extracted from ${url})`); } return name; diff --git a/src/lib/study-dfns.js b/src/lib/study-dfns.js index b779b720..0b00e5f4 100644 --- a/src/lib/study-dfns.js +++ b/src/lib/study-dfns.js @@ -87,7 +87,7 @@ function matchCSSDfn(expected, actual) { * * @function * @private - * @param {Object} css The root of the object that describes IDL terms in the + * @param {Object} idl The root of the object that describes IDL terms in the * `idlparsed` extract. * @return {Array} An array of expected definitions */ @@ -108,7 +108,7 @@ function getExpectedDfnsFromIdl(idl = {}) { /** * Return true if the given parsed IDL object describes a default toJSON * operation that references: - * https://heycam.github.io/webidl/#default-tojson-steps + * https://webidl.spec.whatwg.org/#default-tojson-steps * * @function * @private @@ -127,9 +127,9 @@ function isDefaultToJSONOperation(desc) { * * @function * @public - * @param {Object} desc The object that describes the IDL term in the + * @param {Object} idl The object that describes the IDL term in the * `idlparsed` extract. - * @param {Object} parentDesc (optional) The object that describes the parent + * @param {Object} parentIdl (optional) The object that describes the parent * IDL term of the term to parse (used to compute the `for` property). * @return {Object} The expected definition, or null if no expected definition * is defined. @@ -305,7 +305,7 @@ function getExpectedDfnsFromIdlDesc(idl, {excludeRoot} = {excludeRoot: false}) { * * The function works around Respec's issue #3200 for methods and constructors * that take only optional parameters: - * https://github.com/w3c/respec/issues/3200 + * https://github.com/speced/respec/issues/3200 * * @function * @private @@ -462,14 +462,16 @@ export default function studyDefinitions(specs) { .map(spec => { const missing = checkSpecDefinitions(spec); const res = []; - for (const type of ['css', 'idl']) { - const anomalies = missing[type]; - for (const anomaly of anomalies) { - res.push({ - name: 'missingDfns', - message: formatAnomalyMessage(anomaly), - spec - }); + if (!missing.obsoleteDfnsModel) { + for (const type of ['css', 'idl']) { + const anomalies = missing[type]; + for (const anomaly of anomalies) { + res.push({ + name: 'missingDfns', + message: formatAnomalyMessage(anomaly), + spec + }); + } } } return res; diff --git a/src/lib/study-webidl.js b/src/lib/study-webidl.js index 30e9b5ab..f5711326 100644 --- a/src/lib/study-webidl.js +++ b/src/lib/study-webidl.js @@ -166,7 +166,7 @@ function describeMember (member) { return desc; } -function studyWebIdl (specs, { curatedResults = [] } = {}) { +function studyWebIdl (specs, { crawledResults = [], curatedResults = [] } = {}) { const report = []; // List of anomalies to report const dfns = {}; // Index of IDL definitions (save includes) const includesStatements = {}; // Index of "includes" statements @@ -174,13 +174,19 @@ function studyWebIdl (specs, { curatedResults = [] } = {}) { const usedTypes = {}; // Index of types used in the IDL const usedExtAttrs = {}; // Index of extended attributes - // Record an anomaly for the given spec(s). + // Record an anomaly for the given spec(s), + // provided we are indeed interested in the results function recordAnomaly (spec, name, message) { if (Array.isArray(spec)) { - report.push({ name, message, specs: spec }); + const filtered = spec.filter(sp => specs.find(s => s.shortname === sp.shortname)); + if (filtered.length > 0) { + report.push({ name, message, specs: filtered }); + } } else { - report.push({ name, message, spec }); + if (specs.find(s => s.shortname === spec.shortname)) { + report.push({ name, message, spec }); + } } } @@ -378,7 +384,11 @@ function studyWebIdl (specs, { curatedResults = [] } = {}) { } } - specs + // We need to run the analysis on all specs, even if caller is only + // interested in a few of them, because types may be defined in specs that + // the caller is not interested in. + const allSpecs = (crawledResults.length > 0) ? crawledResults : specs; + allSpecs // We're only interested in specs that define Web IDL content .filter(spec => !!spec.idl) diff --git a/src/lib/study.js b/src/lib/study.js index 20cb96f6..23a3dab0 100644 --- a/src/lib/study.js +++ b/src/lib/study.js @@ -62,7 +62,11 @@ const anomalyGroups = [ title: 'Links to now gone anchors', description: 'The following links in the specification link to anchors that no longer exist in the Editor\'s Draft of the targeted specification' }, - { name: 'frailLinks', title: 'Unstable link anchors' }, + { + name: 'frailLinks', + title: 'Unstable link anchors', + description: 'The following links in the specification link to anchors that either have a new name or are inherently brittle' + }, { name: 'nonCanonicalRefs', title: 'Non-canonical links', @@ -166,17 +170,29 @@ const anomalyGroups = [ /** - * Possible report structures + * Possible report structures. + * + * "/" separates levels in the hierarchy. + * "+" combines creates a composed key at a given level. + * + * For example, "group+spec/type" means: first level per + * anomaly group and spec (so one "web-animations-2-webidl" entry if the + * spec "web-animations-2" has "webidl" issues), second level per type. + * + * The list is described in more details in the CLI help. Run: + * npx strudy inspect --help + * ... or check the code in `strudy.js` at the root of the project. */ const reportStructures = [ 'flat', 'type+spec', - 'group+spec>type', - 'spec>type', - 'spec>group>type', - 'type>spec', - 'group>type>spec', - 'group>spec>type' + 'group+spec', + 'group+spec/type', + 'spec/type', + 'spec/group/type', + 'type/spec', + 'group/type/spec', + 'group/spec/type' ]; @@ -230,7 +246,7 @@ function getAnomalyGroupFromType(type) { * Structure a flat list of anomalies to the requested structure */ function structureResults(structure, anomalies, crawlResults) { - const levels = structure.split('>') + const levels = structure.split('/') .map(level => level.replace(/\s+/g, '')); const report = []; @@ -243,7 +259,7 @@ function structureResults(structure, anomalies, crawlResults) { case 'type+spec': for (const anomaly of anomalies) { - const type = getAnomalyType(anomaly.name) + const type = getAnomalyType(anomaly.name); for (const spec of anomaly.specs) { let entry = report.find(entry => entry.type.name === anomaly.name && @@ -252,6 +268,7 @@ function structureResults(structure, anomalies, crawlResults) { const titlePrefix = isInMultiSpecRepository(spec, crawlResults) ? `[${spec.shortname}] ` : ''; entry = { + name: `${spec.shortname}-${type.name.toLowerCase()}`, title: `${titlePrefix}${type.title} in ${spec.title}`, type, spec, anomalies: [] }; @@ -273,6 +290,7 @@ function structureResults(structure, anomalies, crawlResults) { const titlePrefix = isInMultiSpecRepository(spec, crawlResults) ? `[${spec.shortname}] ` : ''; entry = { + name: `${spec.shortname}-${group.name.toLowerCase()}`, title: `${titlePrefix}${group.title} in ${spec.title}`, group, spec, anomalies: [] }; @@ -290,6 +308,7 @@ function structureResults(structure, anomalies, crawlResults) { entry.spec.shortname === spec.shortname); if (!entry) { entry = { + name: spec.shortname, title: spec.title, spec, anomalies: [] }; @@ -306,6 +325,7 @@ function structureResults(structure, anomalies, crawlResults) { let entry = report.find(entry => entry.type.name === anomaly.name); if (!entry) { entry = { + name: type.name.toLowerCase(), title: type.title, type, anomalies: [] }; @@ -321,6 +341,7 @@ function structureResults(structure, anomalies, crawlResults) { let entry = report.find(entry => entry.group.name === group.name); if (!entry) { entry = { + name: group.name.toLowerCase(), title: group.title, group, anomalies: [] }; @@ -332,7 +353,7 @@ function structureResults(structure, anomalies, crawlResults) { } if (levels.length > 1) { - const itemsStructure = levels.slice(1).join('>'); + const itemsStructure = levels.slice(1).join('/'); for (const entry of report) { entry.items = structureResults(itemsStructure, entry.anomalies, crawlResults); delete entry.anomalies; @@ -354,49 +375,74 @@ function pad(str, depth) { return str; } -function serializeEntry(entry, depth = 0) { - let res = ''; - if (entry.spec && entry.group) { - res = `While crawling [${entry.spec.title}](${entry.spec.crawled}), ${makeLowerCase(entry.group.description ?? entry.group.title)}:`; - } - else if (entry.spec && entry.type) { - res = `While crawling [${entry.spec.title}](${entry.spec.crawled}), ${makeLowerCase(entry.type.description ?? entry.type.title)}:`; - } - else if (entry.group) { - if (depth === 0) { - res = (entry.group.description ?? entry.group.title) + ':'; +function serializeEntry(entry, format, depth = 0) { + let res; + if (format === 'json') { + res = Object.assign({}, entry); + if (entry.spec) { + res.spec = { + url: entry.spec.url, + shortname: entry.spec.shortname, + title: entry.spec.title + }; } - else { - res = pad(`* ${entry.group.title}`, depth); + if (entry.specs) { + res.specs = entry.specs.map(spec => Object.assign({ + url: spec.url, + shortname: spec.shortname, + title: spec.title + })); } - } - else if (entry.type) { - if (depth === 0) { - res = (entry.type.description ?? entry.type.title) + ':'; + if (entry.items) { + res.items = entry.items.map(item => serializeEntry(item, format, depth + 1)); } - else { - res = pad(`* ${entry.type.title}`, depth); + if (entry.anomalies) { + res.anomalies = entry.anomalies.map(anomaly => serializeEntry(anomaly, format, depth + 1)); } } - else if (entry.spec) { - if (depth === 0) { - res = `While crawling [${entry.spec.title}](${entry.spec.crawled}), the following anomalies were identified:`; + else if (format === 'markdown') { + res = ''; + if (entry.spec && entry.group) { + res = `While crawling [${entry.spec.title}](${entry.spec.crawled}), ${makeLowerCase(entry.group.description ?? entry.group.title)}:`; } - else { - res = pad(`* [${entry.spec.title}](${entry.spec.crawled})`, depth); + else if (entry.spec && entry.type) { + res = `While crawling [${entry.spec.title}](${entry.spec.crawled}), ${makeLowerCase(entry.type.description ?? entry.type.title)}:`; + } + else if (entry.group) { + if (depth === 0) { + res = (entry.group.description ?? entry.group.title) + ':'; + } + else { + res = pad(`* ${entry.group.title}`, depth); + } + } + else if (entry.type) { + if (depth === 0) { + res = (entry.type.description ?? entry.type.title) + ':'; + } + else { + res = pad(`* ${entry.type.title}`, depth); + } + } + else if (entry.spec) { + if (depth === 0) { + res = `While crawling [${entry.spec.title}](${entry.spec.crawled}), the following anomalies were identified:`; + } + else { + res = pad(`* [${entry.spec.title}](${entry.spec.crawled})`, depth); + } + } + else if (entry.message) { + res = pad(`* [ ] ${entry.message}`, depth); } - } - else if (entry.message) { - res = pad(`* ${entry.message}`, depth); - } - for (const item of entry.items ?? []) { - res += '\n' + serializeEntry(item, depth + 1); - } - for (const anomaly of entry.anomalies ?? []) { - res += `\n` + serializeEntry(anomaly, depth + 1); + for (const item of entry.items ?? []) { + res += '\n' + serializeEntry(item, format, depth + 1); + } + for (const anomaly of entry.anomalies ?? []) { + res += `\n` + serializeEntry(anomaly, format, depth + 1); + } } - return res; } @@ -406,19 +452,26 @@ function serializeEntry(entry, depth = 0) { */ function formatReport(format, report) { if (format === 'json') { - return report; + // We'll return the report as is, trimming the information about specs to + // a reasonable minimum (the rest of the information can easily be + // retrieved from the crawl result if needed) + return report.map(entry => serializeEntry(entry, 'json')); } else if (format === 'issue') { return report.map(entry => Object.assign({ + name: entry.name, title: entry.title, - content: serializeEntry(entry) + spec: entry.spec, + content: serializeEntry(entry, 'markdown') })); } else if (format === 'full') { return [ { title: 'Study report', - content: report.map(entry => serializeEntry(entry)) + content: report.map(entry => +`## ${entry.title} +${serializeEntry(entry, 'markdown')}`) } ] } @@ -429,15 +482,18 @@ function formatReport(format, report) { * Main function that studies a crawl result and returns a structured * report. */ -export default async function study(specs, options) { - options = Object.assign({}, options ?? {}); +export default async function study(specs, options = {}) { + // Copy the options object (we're going to add options on our own + // before calling other study methods) + options = Object.assign({}, options); + const what = options.what ?? ['all']; const structure = options.structure ?? 'type + spec'; const format = options.format ?? 'issue'; if (!what.includes('all')) { const validWhat = what.every(name => - group.find(g => g.name === name || g.types.find(t => t.name === name))); + anomalyGroups.find(g => g.name === name || g.types.find(t => t.name === name))); if (!validWhat) { throw new Error('Invalid `what` option'); } @@ -450,7 +506,7 @@ export default async function study(specs, options) { // (but note study functions that analyze references need the whole list!) options.crawlResults = specs; if (options.specs) { - specs = options.crawlResults.filter(spec => specs.find(s => s.shortname === spec.shortname)); + specs = options.crawlResults.filter(spec => options.specs.find(shortname => shortname === spec.shortname)); } // Anomalies are studied in groups of related anomalies, let's compute the diff --git a/strudy.js b/strudy.js index a766d8fb..c74dda9e 100644 --- a/strudy.js +++ b/strudy.js @@ -7,27 +7,25 @@ * Provided Strudy was installed as a global package, the spec analyzer can be * called directly through: * - * `strudy [options] [report]` - * - * Use the `--help` option for usage instructions. + * `strudy --help` * * If Strudy was not installed as a global package, call: * - * `node strudy.js [options] [report]` + * `node strudy.js --help` * * @module crawler */ -import { Command } from 'commander'; +import { Command, InvalidArgumentError } from 'commander'; import { constants as fsConstants } from 'node:fs'; import fs from 'node:fs/promises'; -import pandoc from 'node-pandoc'; import path from 'node:path'; import satisfies from 'semver/functions/satisfies.js'; import packageContents from './package.json' with { type: 'json' }; -import studyCrawl from './src/lib/study-crawl.js'; -import generateReport from './src/lib/generate-report.js'; +import study from './src/lib/study.js'; import loadJSON from './src/lib/load-json.js'; +import { expandCrawlResult } from 'reffy'; +import matter from 'gray-matter'; // Warn if version of Node.js does not satisfy requirements const { version, engines } = packageContents; @@ -48,55 +46,48 @@ async function exists(file) { } } - -async function isStudyReport(file) { - const fd = await fs.open(file, 'r'); - try { - const buff = Buffer.alloc(1024); - await fd.read(buff, 0, 1024); - const str = buff.toString(); - if (str.match(/"type"\s*:\s*"study"/)) { - return true; - } - } - catch { - return false; - } - finally { - await fd.close(); +function myParseInt(value) { + const parsedValue = parseInt(value, 10); + if (isNaN(parsedValue)) { + throw new InvalidArgumentError('Not a number.'); } + return parsedValue; } - const program = new Command(); program .name('strudy') - .description('Analyzes a crawl report generated by Reffy') - .version(version) - .usage('[options] ') - .argument('', 'Path/URL to crawl report or study file') - .option('-f, --format ', 'create a markdown/HTML report from study file') - .option('-d, --diff ', 'create a diff from some reference study') + .description('Analyzes a crawl report generated by Reffy to detect anomalies in specifications') + .version(version); + +program + .command('inspect') + .alias('study') + .argument('', 'Path/URL to crawl report') + .option('-f, --format ', 'report markdown or json', 'markdown') + .option('-i, --issues ', 'report issues as markdown files in the given folder') + .option('-m, --max ', 'maximum number of issue files to create/update', myParseInt, 0) .option('-s, --spec ', 'restrict analysis to given specs') - .option('--dep', 'create a dependencies report') - .option('--onlynew', 'only include new diff in the diff report') - .option('--perissue', 'create a markdown/HTML report per issue') - .option('--tr ', 'Path/URL to crawl report on published specs') + .option('--structure ', 'report structure', 'type+spec') + .option('--tr ', 'path/URL to crawl report on published specs') + .option('--update-mode ', 'what issue files to update', 'new') + .option('-w, --what ', 'what to analyze', ['all']) .showHelpAfterError('(run with --help for usage information)') .addHelpText('after', ` Minimal usage example: To study a crawl report in current folder: - $ strudy . + $ strudy inspect . Description: Analyzes a crawl report generated by Reffy and create a report with potential anomalies in each of the specs contained in the crawl report. - The report is written to the console as a serialized JSON object or as a - markdown or HTML report depending on command options. + Depending on command options, the report is either written to the console as + a serialized JSON object or as a markdown report (see the --format option), + or written to individual issues files in a folder (see the --issues option). Argument: - + Path to the crawl report to analyze. If the path leads to a folder, Strudy will look for an "ed/index.json" file under that folder first (if it exists, it will also look for a possible "tr/index.json" file to set the --tr option), @@ -105,51 +96,65 @@ Argument: Usage notes for some of the options: -f, --format Tell Strudy to return a report in the specified format. Format may be one of - "json" (default when option is not set), "markdown" or "html". + "markdown" (default when option is not set) or "json". + + The --format option cannot be set to "json" if the --issues option is set. + +-i, --issues + Tell Strudy to report the anomalies in anomaly files in the given folder. + An anomaly file gets created for and named after keys at the first level of + the report (see --structure option). - When the option is specified to either "markdown" or "html", the report - pointed to by may be a JSON file that contains a Strudy report. + Anomaly files are in markdown. The --format option must be set to "markdown", + or not set at all. --d, --diff - Tell Strudy tool to return a diff from the provided reference Strudy report. - must point to a Strudy report. + Anomaly files start with metadata, used to convert the file to a GitHub issue + and track the resolution of the issue afterwards: "Repo" sets the repository + for the issue, "Title" the title of the issue, and "Tracked" the URL of the + issue, once created. - When the option is specified, the report pointed to by may be a JSON - file that contains a Strudy report. + Existing anomaly files in the folder are preserved by default, set the + --update-mode option to change that behavior. - Diff reports are in markdown and the "--format" option, if specified, must be - "markdown". +-m, --max + Maximum number of issue files to add or update. Defaults to 0, which means + "no limit". - The --diff option and the --dep option cannot both be set. + This setting should only be useful when combined with --issues to create + issue files in batches. It may also be set in the absence of --issues, in + which case it restricts the number of entries at the first level of the + report (see --structure). -s, --spec - Valid spec values may be a shortname, a URL, or a relative path to JSON file - that contains a list of spec URLs and/or shortnames. Shortnames may be the - shortname of the spec series. + Valid spec values may be a shortname, a URL, or a relative path to a JSON + file that contains a list of spec URLs and/or shortnames. Shortnames may be + the shortname of the spec series. Use "all" to include all specs. This is equivalent to not setting the option at all. For instance: - $ strudy . --spec picture-in-picture https://w3c.github.io/mediasession/ - ---dep - Tell Strudy to return a dependencies report. - - When the option is specified, the report pointed to by may be a JSON - file that contains a Strudy report. - - Dependencies reports are in markdown and the "--format" option, if specified, - must be "markdown". - - The --diff option and the --dep option cannot both be set. - ---perissue - Markdown/HTML reports are per spec by default. Set this option to tell Strudy - to generate markdown/HTML reports per issue instead. - - The --diff option must not be set. - The --format option must be set to either "markdown" or "html". + $ strudy inspect . --spec picture-in-picture https://w3c.github.io/mediasession/ + +--structure + Describes the hierarchy in the report(s) that Strudy returns. Possible values: + "flat" no level, report anomalies one by one + "type+spec" one level with one entry per type and spec (default) + "group+spec/type" first level per group and spec, second level per type + "spec/type" first level per spec, second level per type + "spec/group/type" first level per spec, second level per group, third level + per type + "type/spec" first level per type, second level per spec + "group/type/spec" first level per group, second level per type, third level + per spec + "group/spec/type" first level per group, second level per spec, third level + per type + + Last level contains the actual list of anomalies. + + Note: an anomaly always has a "type". Related anomaly types are grouped in an + anomaly "group". For example, "brokenLinks" and "datedUrls" both belong to + the "backrefs" group (also see the --what option). --tr Useful for Strudy to refine its broken link analysis when crawl report @@ -159,113 +164,180 @@ Usage notes for some of the options: version lags behind the Editor's Draft may have issues of the form "The term exists in the /TR version but no longer exists in the Editor's Draft". - Note that if is a link to a folder, the tool will automatically look + Note that if is a link to a folder, the tool will automatically look for the TR crawl report in a "tr" subfolder and set itself. + +--update-mode + Tell Strudy what issue files to update when --issues is set and an issue file + already exists for the issue at hand. Possible values are: + "new" (default) preserve existing files + "untracked" update existing files that do not have a "Tracked" URL + "tracked" update existing files that have a "Tracked" URL + "all" update all existing files + + Strudy will always create new issue files, the mode only changes the behavior + for existing issue files. + + The --issues option must be set. + +-w, --what + Tell Strudy which anomalies to analyze. Values can be the names of anomaly + types or the name of anomaly groups. The value "all" (default) tells Strudy + to analyze and report on all possible anomalies. + + The list of anomaly types and groups will likely evolve over time, see actual + list in src/lib/study.js. + + Examples: + "-w algorithms -w backrefs" to study algorithms and references to other specs + "-w unknownSpecs" to study links to unknown specs `) .action(async (report, options) => { - if (options.format && !['json', 'markdown', 'html'].includes(options.format)) { + // Check options + if (options.format && !['json', 'markdown'].includes(options.format)) { console.error(`Unsupported --format option "${options.format}". -Format must be one of "json", "markdown" or "html".`) +Format must be one of "json" or "markdown".`) process.exit(2); } - if (options.diff && options.format && (options.format !== 'markdown')) { - console.error(`Diff reports are always in markdown. -The --format option can only be set to "markdown" when --diff is used.`); + if (options.format !== 'markdown' && options.issues) { + console.error(`The --format option can only be set to "markdown" when --issues is used.`); process.exit(2); } - if (options.diff && options.perissue) { - console.error('The --diff and --perissue options cannot both be set.'); + if (options.updateMode && !['new', 'untracked', 'tracked', 'all'].includes(options.updateMode)) { + console.error(`Unsupported --update-mode option "${options.updateMode}"`); process.exit(2); } - if (options.perissue && !['markdown', 'html'].includes(options.format)) { - console.error('The --format option must be "markdown" or "html" when --perissue is set.') + if (options.updateMode !== 'new' && !options.issues) { + console.error('The --update-mode option can only be set when --issues is set'); process.exit(2); } - if (options.dep && options.diff) { - console.error('The --dep and --diff options cannot both be set.'); + if (options.issues && !await exists(options.issues)) { + console.error(`Could not find/access the folder to store anomalies: ${options.issues}`) process.exit(2); } - let edReport = report; - let trReport = options.tr; + // Load (and expand) the crawl results + let edReportFile = report; + let trReportFile = options.tr; if (!report.endsWith('.json')) { if (await exists(path.join(report, 'ed'))) { - edReport = path.join(report, 'ed'); - if (!trReport && await exists(path.join(report, 'tr'))) { - trReport = path.join(report, 'tr'); + edReportFile = path.join(report, 'ed'); + if (!trReportFile && await exists(path.join(report, 'tr'))) { + trReportFile = path.join(report, 'tr'); } } - edReport = path.join(edReport, 'index.json'); + edReportFile = path.join(edReportFile, 'index.json'); } - if (!await exists(edReport)) { + if (!await exists(edReportFile)) { console.error(`Could not find/access crawl/study report: ${report}`); process.exit(2); } - if (trReport) { - if (!trReport.endsWith('.json')) { - trReport = path.join(trReport, 'index.json'); + if (trReportFile) { + if (!trReportFile.endsWith('.json')) { + trReportFile = path.join(trReportFile, 'index.json'); } - if (!await exists(trReport)) { + if (!await exists(trReportFile)) { console.error(`Could not find/access TR crawl report: ${options.tr}`); process.exit(2); } } - // Specified report may already be the study report - // To find out, we'll do a bit of content sniffing to avoid loading the - // report twice (report file may be somewhat large). - let study = null; - const isStudy = await isStudyReport(edReport); - if (isStudy) { - study = await loadJSON(edReport); + let edReport = await loadJSON(edReportFile); + edReport = await expandCrawlResult(edReport, path.dirname(edReportFile)); + + let trReport; + if (trReportFile) { + trReport = await loadJSON(trReportFile); + trReport = await expandCrawlResult(trReport, path.dirname(trReportFile)); } - if (!study) { - const studyOptions = { - include: options.spec ?? null, - trResults: trReport + // Create a structured anomaly report out of the crawl report + const anomaliesReport = await study(edReport.results, { + what: options.what, + structure: options.structure, + format: options.format === 'json' ? + 'json' : + (options.issues ? 'issue' : 'full'), + trResults: trReport?.results ?? [], + specs: options.spec + }); + + // Output the structured anomaly report + if (options.format === 'json') { + // Caller wants a JSON report. We'll just trim the number of anomalies + // in the first level to the requested maximum as needed + if (options.max > 0) { + anomaliesReport.results = anomaliesReport.results.slice(0, options.max); } - study = await studyCrawl(edReport, studyOptions); + console.log(JSON.stringify(anomaliesReport, null, 2)); } + else if (options.issues) { + // Caller wants to add/update issue files in the provided folder. + // Issue files are formatted with the gray-matter library to save useful + // metadata as front matter in the file. + let reported = 0; + for (const entry of anomaliesReport.results) { + const filename = path.join(options.issues, `${entry.name}.md`); + let existingReport; + let tracked = 'N/A'; + if (await exists(filename)) { + if (options.updateMode === 'new') { + console.warn(`- skip ${filename}, file already exists`); + continue; + } + existingReport = matter(await fs.readFile(filename, 'utf-8')); + tracked = existingReport.data.Tracked ?? 'N/A'; + if ((options.updateMode === 'tracked' && tracked === 'N/A') || + (options.updateMode === 'untracked' && tracked !== 'N/A')) { + console.warn(`- skip ${filename}, file already exists, with Tracked="${tracked}"`); + continue; + } + } - let res = null; - if (options.diff || options.dep) { - // Generate diff/dependencies report - res = await generateReport(study, { - depReport: options.dep, - diffReport: !!options.diff, - refStudyFile: options.diff, - onlyNew: options.onlynew - }); - } - else if (options.format && options.format !== 'json') { - // Generate markdown report and possibly an HTML report - const generateOptions = { perSpec: !options.perissue }; - const markdown = await generateReport(study, generateOptions); - - if (options.format === 'html') { - const template = path.join(__dirname, 'src', 'templates', - `report${options.perissue ? '-perissue' : ''}-template.html`); - const promise = new Promise((resolve, reject) => { - let args = [ - '-f', 'markdown', '-t', 'html5', '--section-divs', '-s', - '--template', template - ]; - pandoc(markdown, args, (err, result) => - err ? reject(err) : resolve(result)); - }); - res = await promise; - } - else { - res = markdown; + const content = ` +${entry.content} + +This issue was detected and reported semi-automatically by [Strudy](https://github.com/w3c/strudy/) based on data collected in [webref](https://github.com/w3c/webref/).`; + if (existingReport?.content === content) { + console.warn(`- skip ${filename}, file already exists, no change`); + continue; + } + + const issueReport = matter(content); + issueReport.data = { + Title: entry.title, + Tracked: tracked + }; + if (entry.spec) { + const spec = edReport.results.find(spec => spec.url === entry.spec.url); + if (spec.nightly?.repository) { + issueReport.data.Repo = spec.nightly.repository; + } + } + console.warn(`- ${existingReport ? 'update' : 'add'} ${filename}`); + const filecontent = issueReport.stringify(); + await fs.writeFile(filename, filecontent, 'utf-8'); + reported += 1; + if (options.max > 0 && reported >= options.max) { + break; + } } } else { - // Output the study report to the console - res = JSON.stringify(study, null, 2); + // Caller wants a markdown report written to the console. + // The anomalies report should already be a "full" one (so only one + // result item at the first level). + const content = anomaliesReport.results[0].content; + let reported = 0; + for (const entry of content) { + console.log(entry); + console.log(); + reported += 1; + if (options.max > 0 && reported >= options.max) { + break; + } + } } - - console.log(res); }); program.parseAsync(process.argv); diff --git a/test/cli.js b/test/cli.js index a48b3c2d..4c6ac976 100644 --- a/test/cli.js +++ b/test/cli.js @@ -26,19 +26,51 @@ describe(`Strudy's CLI`, function () { it('reports usage help when asked', async function () { const { stdout, stderr } = await strudy(`--help`); - assert.match(stdout, /^Usage: strudy \[options\] /); + assert.match(stdout, /^Usage: strudy \[options\] \[command\]/); assert.deepEqual(stderr, ''); }); - it('expects a report argument', async function () { - const { stdout, stderr } = await strudy(``); - assert.match(stderr, /error: missing required argument 'report'/); - assert.deepEqual(stdout, ''); - }); + describe(`The "inspect" command`, function () { + it('expects a crawl report as argument', async function () { + const { stdout, stderr } = await strudy(`inspect`); + assert.match(stderr, /error: missing required argument 'crawl'/); + assert.deepEqual(stdout, ''); + }); + + it('reports an error when provided crawl report does not exist', async function () { + const { stdout, stderr } = await strudy(`inspect notareport`); + assert.match(stderr, /Could not find/); + assert.deepEqual(stdout, ''); + }); + + it('reports an error when provided issues folder does not exist', async function () { + const { stdout, stderr } = await strudy(`inspect test/data/empty.json --issues notafolder`); + assert.match(stderr, /Could not find\/access the folder to store anomalies/); + assert.deepEqual(stdout, ''); + }); + + it('refuses formats other than "json" or "markdown"', async function () { + const { stdout, stderr } = await strudy(`inspect test/data/empty.json --format html`); + assert.match(stderr, /Unsupported --format option/); + assert.deepEqual(stdout, ''); + }); + + it('rejects incompatible format and issues options', async function () { + const { stdout, stderr } = await strudy(`inspect test/data/empty.json --format json --issues issues`); + assert.match(stderr, /The --format option can only be set to "markdown" when --issues is used/); + assert.deepEqual(stdout, ''); + }); + + it('reports an error when update-mode is set but not the issues option', async function () { + const { stdout, stderr } = await strudy(`inspect test/data/empty.json --update-mode all`); + assert.match(stderr, /The --update-mode option can only be set when --issues is set/); + assert.deepEqual(stdout, ''); + }); - it('reports an error when provided report does not exist', async function () { - const { stdout, stderr } = await strudy(`notareport`); - assert.match(stderr, /Could not find/); - assert.deepEqual(stdout, ''); + it('reports an error when update-mode is set to some unknown mode', async function () { + const { stdout, stderr } = await strudy(`inspect test/data/empty.json --issues issues --update-mode notamode`); + assert.match(stderr, /Unsupported --update-mode option/); + assert.deepEqual(stdout, ''); + }) }); }); \ No newline at end of file diff --git a/test/data/empty.json b/test/data/empty.json new file mode 100644 index 00000000..914332ed --- /dev/null +++ b/test/data/empty.json @@ -0,0 +1,3 @@ +{ + "results": [] +} \ No newline at end of file diff --git a/test/study.js b/test/study.js index 986813a6..30c09386 100644 --- a/test/study.js +++ b/test/study.js @@ -40,11 +40,15 @@ describe('The main study function', function () { assertNbAnomalies(report.results, 2); assertAnomaly(report.results, 0, { title: 'Crawl error in Hello world API', - content: `While crawling [Hello world API](${specUrl}), the following crawl errors occurred:\n* Boo` + content: +`While crawling [Hello world API](${specUrl}), the following crawl errors occurred: +* [ ] Boo` }); assertAnomaly(report.results, 1, { title: 'Crawl error in Hello universe API', - content: `While crawling [Hello universe API](${specUrl2}), the following crawl errors occurred:\n* Borked` + content: +`While crawling [Hello universe API](${specUrl2}), the following crawl errors occurred: +* [ ] Borked` }); }); @@ -53,15 +57,16 @@ describe('The main study function', function () { populateSpec(specUrl, { error: 'Boo' }), populateSpec(specUrl2, { error: 'Borked' }) ]; - const report = await study(crawlResult, { structure: 'type>spec', htmlFragments: {} }); + const report = await study(crawlResult, { structure: 'type/spec', htmlFragments: {} }); assertNbAnomalies(report.results, 1); assertAnomaly(report.results, 0, { title: 'Crawl error', - content: `The following crawl errors occurred: + content: +`The following crawl errors occurred: * [Hello world API](https://w3c.github.io/world/) - * Boo + * [ ] Boo * [Hello universe API](https://w3c.github.io/universe/) - * Borked` + * [ ] Borked` }); }); @@ -70,13 +75,14 @@ describe('The main study function', function () { populateSpec(specUrl, { error: 'Boo' }), populateSpec(specUrl2, { error: 'Borked' }) ]; - const report = await study(crawlResult, { structure: 'spec>type', htmlFragments: {} }); + const report = await study(crawlResult, { structure: 'spec/type', htmlFragments: {} }); assertNbAnomalies(report.results, 2); assertAnomaly(report.results, 0, { title: 'Hello world API', - content: `While crawling [Hello world API](https://w3c.github.io/world/), the following anomalies were identified: + content: +`While crawling [Hello world API](https://w3c.github.io/world/), the following anomalies were identified: * Crawl error - * Boo` + * [ ] Boo` }); }); @@ -85,14 +91,15 @@ describe('The main study function', function () { populateSpec(specUrl, { error: 'Boo' }), populateSpec(specUrl2, { error: 'Borked' }) ]; - const report = await study(crawlResult, { structure: 'spec>group>type', htmlFragments: {} }); + const report = await study(crawlResult, { structure: 'spec/group/type', htmlFragments: {} }); assertNbAnomalies(report.results, 2); assertAnomaly(report.results, 0, { title: 'Hello world API', - content: `While crawling [Hello world API](https://w3c.github.io/world/), the following anomalies were identified: + content: +`While crawling [Hello world API](https://w3c.github.io/world/), the following anomalies were identified: * Generic * Crawl error - * Boo` + * [ ] Boo` }); }); @@ -101,13 +108,14 @@ describe('The main study function', function () { populateSpec(specUrl, { error: 'Boo' }), populateSpec(specUrl2, { error: 'Borked' }) ]; - const report = await study(crawlResult, { structure: 'group+spec>type', htmlFragments: {} }); + const report = await study(crawlResult, { structure: 'group+spec/type', htmlFragments: {} }); assertNbAnomalies(report.results, 2); assertAnomaly(report.results, 0, { title: 'Generic in Hello world API', - content: `While crawling [Hello world API](https://w3c.github.io/world/), the following errors prevented the spec from being analyzed: + content: +`While crawling [Hello world API](https://w3c.github.io/world/), the following errors prevented the spec from being analyzed: * Crawl error - * Boo` + * [ ] Boo` }); }); @@ -116,16 +124,17 @@ describe('The main study function', function () { populateSpec(specUrl, { error: 'Boo' }), populateSpec(specUrl2, { error: 'Borked' }) ]; - const report = await study(crawlResult, { structure: 'group>type>spec', htmlFragments: {} }); + const report = await study(crawlResult, { structure: 'group/type/spec', htmlFragments: {} }); assertNbAnomalies(report.results, 1); assertAnomaly(report.results, 0, { title: 'Generic', - content: `The following errors prevented the spec from being analyzed: + content: +`The following errors prevented the spec from being analyzed: * Crawl error * [Hello world API](https://w3c.github.io/world/) - * Boo + * [ ] Boo * [Hello universe API](https://w3c.github.io/universe/) - * Borked` + * [ ] Borked` }); }); @@ -134,17 +143,27 @@ describe('The main study function', function () { populateSpec(specUrl, { error: 'Boo' }), populateSpec(specUrl2, { error: 'Borked' }) ]; - const report = await study(crawlResult, { structure: 'group>spec>type', htmlFragments: {} }); + const report = await study(crawlResult, { structure: 'group/spec/type', htmlFragments: {} }); assertNbAnomalies(report.results, 1); assertAnomaly(report.results, 0, { title: 'Generic', - content: `The following errors prevented the spec from being analyzed: + content: +`The following errors prevented the spec from being analyzed: * [Hello world API](https://w3c.github.io/world/) * Crawl error - * Boo + * [ ] Boo * [Hello universe API](https://w3c.github.io/universe/) * Crawl error - * Borked` + * [ ] Borked` }); }); + + it('only reports anomalies for requested specs', async function() { + const crawlResult = [ + populateSpec(specUrl, { error: 'Boo' }), + populateSpec(specUrl2, { error: 'Borked' }) + ]; + const report = await study(crawlResult, { specs: ['universe'], htmlFragments: {} }); + assertNbAnomalies(report.results, 1); + }); }); \ No newline at end of file From 2b4982d0d111620d218130f726a227250387bd82 Mon Sep 17 00:00:00 2001 From: Francois Daoust Date: Wed, 21 Aug 2024 16:56:28 +0200 Subject: [PATCH 3/9] Handle old issue files that can now be deleted The `--update-mode` option now also accepts an `old` value that preserves existing issue files by default, except when the analysis no longer detects any error for them. --- src/lib/study.js | 66 +++++++++++++++++++++++++++++++++++++++++++++++- strudy.js | 23 ++++++++++++++--- 2 files changed, 84 insertions(+), 5 deletions(-) diff --git a/src/lib/study.js b/src/lib/study.js index 23a3dab0..7f6264ea 100644 --- a/src/lib/study.js +++ b/src/lib/study.js @@ -478,6 +478,69 @@ ${serializeEntry(entry, 'markdown')}`) } +/** + * The report includes a set of anomalies. It can also be useful to know + * what things looked fine, in other words what other anomalies could have + * been reported in theory. This can typically be used to identify issue files + * created in the past and that now need to be deleted. + * + * Note: Some anomalies may hide others. For example, a WebIDL update can make + * the Web IDL invalid... and hide other WebIDL issues that may still exist in + * the spec. This function may return false negatives as a result. + */ +function getNamesOfNonReportedEntries(report, specs, what, structure) { + const groups = []; + anomalyGroups.filter(group => + what.includes('all') || + what.includes(group.name) || + group.types.find(type => what.includes(type.name))); + const types = []; + for (const group of anomalyGroups) { + if (what.includes('all') || + what.includes(group.name) || + group.types.find(type => what.includes(type.name))) { + groups.push(group); + for (const type of group.types) { + if (what.includes('all') || + what.includes(group.name) || + what.includes(type)) { + types.push(type); + } + } + } + } + + const levels = structure.split('/') + .map(level => level.replace(/\s+/g, '')); + let allNames; + switch (levels[0]) { + case 'flat': + // Not much we can say there + break; + case 'type+spec': + allNames = specs + .map(spec => types.map(type => `${spec.shortname}-${type.name.toLowerCase()}`)) + .flat(); + break; + case 'group+spec': + allNames = specs + .map(spec => groups.map(group => `${spec.shortname}-${group.name.toLowerCase()}`)) + .flat(); + break; + case 'spec': + allNames = specs.map(spec => spec.shortname); + break; + case 'type': + allNames = types.map(type => type.name); + break; + case 'group': + allNames = groups.map(group => group.name); + break; + } + return allNames.filter(name => !report.find(entry => entry.name === name)); +} + + /** * Main function that studies a crawl result and returns a structured * report. @@ -546,7 +609,8 @@ export default async function study(specs, options = {}) { studied: specs.length, anomalies: anomalies.length }, - results: formatReport(format, report) + results: formatReport(format, report), + looksGood: getNamesOfNonReportedEntries(report, specs, what, structure) }; // Return the structured report diff --git a/strudy.js b/strudy.js index c74dda9e..f012089f 100644 --- a/strudy.js +++ b/strudy.js @@ -171,9 +171,11 @@ Usage notes for some of the options: Tell Strudy what issue files to update when --issues is set and an issue file already exists for the issue at hand. Possible values are: "new" (default) preserve existing files + "old" preserve existing files but get rid of old ones for which + study reveals no more issue "untracked" update existing files that do not have a "Tracked" URL "tracked" update existing files that have a "Tracked" URL - "all" update all existing files + "all" update all existing files, deleting them when needed Strudy will always create new issue files, the mode only changes the behavior for existing issue files. @@ -203,7 +205,7 @@ Format must be one of "json" or "markdown".`) console.error(`The --format option can only be set to "markdown" when --issues is used.`); process.exit(2); } - if (options.updateMode && !['new', 'untracked', 'tracked', 'all'].includes(options.updateMode)) { + if (options.updateMode && !['new', 'old', 'untracked', 'tracked', 'all'].includes(options.updateMode)) { console.error(`Unsupported --update-mode option "${options.updateMode}"`); process.exit(2); } @@ -281,7 +283,8 @@ Format must be one of "json" or "markdown".`) let existingReport; let tracked = 'N/A'; if (await exists(filename)) { - if (options.updateMode === 'new') { + if (options.updateMode === 'new' || + options.updateMode === 'old') { console.warn(`- skip ${filename}, file already exists`); continue; } @@ -298,7 +301,7 @@ Format must be one of "json" or "markdown".`) ${entry.content} This issue was detected and reported semi-automatically by [Strudy](https://github.com/w3c/strudy/) based on data collected in [webref](https://github.com/w3c/webref/).`; - if (existingReport?.content === content) { + if (existingReport?.content.trim() === content.trim()) { console.warn(`- skip ${filename}, file already exists, no change`); continue; } @@ -322,6 +325,18 @@ ${entry.content} break; } } + + if (options.updateMode === 'old' || + options.updateMode === 'all') { + const reportFiles = await fs.readdir(options.issues); + const todelete = reportFiles.filter(file => + anomaliesReport.looksGood.find(name => file === `${name}.md`)); + for (const file of todelete) { + const filename = path.join(options.issues, file); + console.warn(`- delete ${filename}, no more anomalies detected`); + await fs.unlink(filename); + } + } } else { // Caller wants a markdown report written to the console. From fd2ccd5afca0479f6b200f87bab05768891863c6 Mon Sep 17 00:00:00 2001 From: Francois Daoust Date: Thu, 22 Aug 2024 13:55:35 +0200 Subject: [PATCH 4/9] Rewrite file-issue-for-review logic The script used to run the study *and* handle Git and GitHub commands. It now expects that some analysis got run with the main strudy CLI, and only handles Git and GitHub commands. One update is that the script can also create pull requests for issue files that got deleted during the analysis (because study did not reveal any issue with the underlying spec anymore). --- .github/workflows/file-issue-for-review.yml | 7 +- src/reporting/file-issue-for-review.js | 386 +++++++------------- strudy.js | 2 +- 3 files changed, 145 insertions(+), 250 deletions(-) diff --git a/.github/workflows/file-issue-for-review.yml b/.github/workflows/file-issue-for-review.yml index b04dfb41..8607d846 100644 --- a/.github/workflows/file-issue-for-review.yml +++ b/.github/workflows/file-issue-for-review.yml @@ -28,10 +28,13 @@ jobs: git config user.email "<>" git remote set-url --push origin https://x-access-token:${{ secrets.GITHUB_TOKEN }}@github.com/$GITHUB_REPOSITORY working-directory: strudy + - name: Run Strudy to detect new anomalies + working-directory: strudy + run: node strudy.js inspect ../webref --issues issues --what brokenLinks discontinuedReferences --update-mode old - name: Run issue filer script working-directory: strudy - run: node src/reporting/file-issue-for-review.js ../webref/ed/ ../webref/tr/ brokenLinks + run: node src/reporting/file-issue-for-review.js --max 10 env: - GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} + GITHUB_TOKEN: ${{ secrets.ISSUE_REPORT_GH_TOKEN }} diff --git a/src/reporting/file-issue-for-review.js b/src/reporting/file-issue-for-review.js index 0d797fbe..b04a0a00 100644 --- a/src/reporting/file-issue-for-review.js +++ b/src/reporting/file-issue-for-review.js @@ -1,280 +1,172 @@ -/* Takes a report of anomalies produced by Strudy, - creates a draft of an issue per spec and per anomaly type - and submits as a pull request in this repo if no existing one matches +/** + * Looks at draft issue files produced by the Strudy CLI in the issues folder + * and submits new/updated/deleted ones as pull requests in this repo if there + * is no pending pull request already. */ -import { loadCrawlResults } from '../lib/util.js'; -import studyBackrefs from '../lib/study-backrefs.js'; -import studyReferences from '../lib/study-refs.js'; -import isInMultiSpecRepository from '../lib/is-in-multi-spec-repo.js'; -import loadJSON from '../lib/load-json.js'; import path from 'node:path'; import fs from 'node:fs/promises'; +import { fileURLToPath } from "node:url"; import { execSync } from 'node:child_process'; -import Octokit from '../lib/octokit.js'; import matter from 'gray-matter'; +import { Command, InvalidArgumentError } from 'commander'; -const config = await loadJSON("config.json"); -const GH_TOKEN = config?.GH_TOKEN ?? process.env.GH_TOKEN; +/** + * Command-line execution parameters for calls to `execSync` + */ +const scriptPath = path.dirname(fileURLToPath(import.meta.url)); +const execParams = { + cwd: path.join(scriptPath, '..', '..'), + encoding: 'utf8' +}; -const MAX_PR_BY_RUN = 10; -const repoOwner = 'w3c'; -const repoName = 'strudy'; +/** + * Wrap "matter" issue report to create a suitable PR body + */ +function prWrapper(action, issueReport) { + if (action === 'add') { + return `This pull request was automatically created by Strudy upon detecting errors in ${issueReport.data.Title}. -const octokit = new Octokit({ - auth: GH_TOKEN - // log: console -}); - -function issueWrapper (spec, anomalies, anomalyType, crawl) { - const titlePrefix = isInMultiSpecRepository(spec, crawl.ed) ? `[${spec.shortname}] ` : ''; - let anomalyReport = ''; let title = ''; - switch (anomalyType) { - case 'brokenLinks': - title = `Broken references in ${spec.title}`; - anomalyReport = 'the following links to other specifications were detected as pointing to non-existing anchors'; - break; - case 'outdatedSpecs': - title = `Outdated references in ${spec.title}`; - anomalyReport = 'the following links were detected as pointing to outdated specifications'; - break; - case 'nonCanonicalRefs': - title = `Non-canonical references in ${spec.title}`; - anomalyReport = 'the following links were detected as pointing to outdated URLs'; - break; - case 'discontinuedReferences': - title = `Normative references to discontinued specs in ${spec.title}`; - anomalyReport = 'the following normative referenced were detected as pointing to discontinued specifications'; - break; - } - return { - title: titlePrefix + title, - content: ` -While crawling [${spec.title}](${spec.crawled}), ${anomalyReport}: -${anomalies.map(anomaly => `* [ ] ${anomaly.message}`).join('\n')} - -This issue was detected and reported semi-automatically by [Strudy](https://github.com/w3c/strudy/) based on data collected in [webref](https://github.com/w3c/webref/).` - }; -} - -function prWrapper (title, uri, repo, issueReport) { - return `This pull request was automatically created by Strudy upon detecting errors in ${title}. - -Please check that these errors were correctly detected, and that they have not already been reported in ${repo}. +Please check that these errors were correctly detected, and that they have not already been reported in ${issueReport.data.Repo}. If everything is OK, you can merge this pull request which will report the issue below to the repo, and update the underlying report file with a link to the said issue. -${issueReport} +${issueReport.stringify()} `; -} - - -const knownAnomalyTypes = ['brokenLinks', 'outdatedSpecs', 'nonCanonicalRefs', 'discontinuedReferences']; + } + else { + return `This pull request was automatically created by Strudy while analyzing ${issueReport.data.Title}. -let edCrawlResultsPath = process.argv[2]; -let trCrawlResultsPath = process.argv[3]; -const anomalyFilter = process.argv.slice(4).filter(p => !p.startsWith('--')); -const unknownAnomalyType = anomalyFilter.find(p => !knownAnomalyTypes.includes(p)); -if (unknownAnomalyType) { - console.error(`Unknown report type ${unknownAnomalyType} - known types are ${knownAnomalyTypes.join(', ')}`); - process.exit(1); -} -const anomalyTypes = anomalyFilter.length ? anomalyFilter : knownAnomalyTypes; -const updateMode = process.argv.includes('--update') ? 'update-untracked' : (process.argv.includes('--update-tracked') ? 'update-tracked' : false); -const dryRun = process.argv.includes('--dry-run'); -const noGit = dryRun || updateMode || process.argv.includes('--no-git'); +Please check that past errors listed below have indeed been corrected, and that the related issue in ${issueReport.data.Repo} has been closed accordingly. -if (!noGit && !GH_TOKEN) { - console.error('GH_TOKEN must be set to some personal access token as an env variable or in a config.json file'); - process.exit(1); -} +If everything looks OK, you can merge this pull request to delete the issue file. -// Target the index file if needed -if (!edCrawlResultsPath.endsWith('index.json')) { - edCrawlResultsPath = path.join(edCrawlResultsPath, 'index.json'); -} -if (!trCrawlResultsPath.endsWith('index.json')) { - trCrawlResultsPath = path.join(trCrawlResultsPath, 'index.json'); +${issueReport.stringify()} +`; + } } -let existingReports = []; -if (updateMode) { - console.log('Compiling list of relevant existing issue reports…'); - // List all existing reports to serve as a comparison point - // to detect if any report can be deleted - // if the anomalies are no longer reported - const reportFiles = (await fs.readdir('issues')).map(p => 'issues/' + p); - for (const anomalyType of anomalyTypes) { - existingReports = existingReports.concat(reportFiles.filter(p => p.endsWith(`-${anomalyType.toLowerCase()}.md`))); +/** + * Parse the maximum number of pull requests option as integer + */ +function myParseInt(value) { + const parsedValue = parseInt(value, 10); + if (isNaN(parsedValue)) { + throw new InvalidArgumentError('Not a number.'); } - console.log('- done'); + return parsedValue; } -const nolongerRelevantReports = new Set(existingReports); -// Donwload automatic map of multipages anchors in HTML spec -let htmlFragments = {}; -try { - console.log('Downloading HTML spec fragments data…'); - htmlFragments = await fetch('https://html.spec.whatwg.org/multipage/fragment-links.json').then(r => r.json()); - console.log('- done'); -} catch (err) { - console.log('- failed: could not fetch HTML fragments data, may report false positive broken links on HTML spec'); -} +const program = new Command(); +program + .description('File added/updated/deleted issue files as individual GitHub pull requests') + .option('--dry-run', 'run the script without creating any actual pull request') + .option('-m, --max ', 'maximum number of pull requests to create/update', myParseInt, 10) + .showHelpAfterError('(run with --help for usage information)') + .addHelpText('after', ` +Minimal usage example: + To create up to 10 pull requests from local issue files, run: + $ node file-issue-for-review.js + +Description: + The command looks into the \`issues\` folder to find files that have been + added, updated or deleted, and that have not yet been committed to the + repository. For each of them, it creates a pull request on GitHub, unless one + already exists. + + The \`gh\` and \`git\` CLI commands must be available and functional. The + command will push Git updates to the \`origin\` remote, which must exist. + +Usage notes for some of the options: +--dry-run + Run the script without committing anything, and without creating any actual + pull request. The option is meant for debugging. + +-m, --max + Maximum number of pull requests to create. Defaults to 10. + + You may set the option to 0 to create as many pull requests as needed. You + may want to check that there aren't too many pull requests to create first, + though! +`) + .action(async (options) => { + function execOrLog(cmd) { + options.dryRun ? console.log(cmd) : execSync(cmd, execParams); + } -console.log(`Opening crawl results ${edCrawlResultsPath} and ${trCrawlResultsPath}…`); -const crawl = await loadCrawlResults(edCrawlResultsPath, trCrawlResultsPath); -console.log('- done'); -console.log('Running references analysis…'); -// TODO: if we're not running all the reports, this could run only the -// relevant study function -const results = studyBackrefs(crawl.ed, crawl.tr, htmlFragments).concat(studyReferences(crawl.ed)); -console.log('- done'); -const currentBranch = noGit || execSync('git branch --show-current', { encoding: 'utf8' }).trim(); -const needsPush = {}; -for (const anomalyType of anomalyTypes) { - const anomalies = results.filter(r => r.name === anomalyType); - const specs = [...new Set(anomalies.map(a => a.specs.map(s => s.url)).flat())]; - for (const url of specs) { - const specAnomalies = anomalies.filter(a => a.specs[0].url === url); - const spec = specAnomalies[0].specs[0]; - console.log(`Compiling ${anomalyType} report for ${spec.title}…`); - // if we don't know the repo, we can't file an issue - if (!spec.nightly?.repository) { - console.log(`No known repo for ${spec.title}, skipping`); - continue; + if (options.dryRun) { + console.log('DRY RUN!'); + console.log('The command won\'t make any actual change.'); } - if (spec.standing === "discontinued") { - console.log(`${spec.title} is discontinued, skipping`); - continue; + console.log('How many pull requests can we use to change the world?'); + console.log(`- nb pull requests that we may create: ${options.max}`); + + console.log('On which Git branch are we?'); + const currentBranch = execSync('git branch --show-current', execParams).trim(); + console.log(`- current branch: ${currentBranch}`); + + console.log('How many issue files ought to be reported?'); + const toadd = execSync('git diff --name-only --diff-filter=d issues', execParams).trim().split('\n'); + console.log(`- nb issue files to add/update: ${toadd.length}`); + const todelete = execSync('git diff --name-only --diff-filter=D issues', execParams).trim().split('\n'); + console.log(`- nb issue files to delete: ${todelete.length}`); + const toreport = toadd.map(name => { return { action: 'add', filename: name }; }) + .concat(todelete.map(name => { return { action: 'delete', filename: name }; })) + .sort((e1, e2) => e1.filename.localeCompare(e2.filename)); + + if (toreport.length === 0) { + console.log('No issue files to report'); } - const issueMoniker = `${spec.shortname}-${anomalyType.toLowerCase()}`; - // is there already a file with that moniker? - const issueFilename = path.join('issues/', issueMoniker + '.md'); - let tracked = 'N/A'; - let existingReportContent; + + let reported = 0; try { - if (!(await fs.stat(issueFilename)).isFile()) { - console.error(`${issueFilename} already exists but is not a file`); - continue; - } else { - if (!updateMode) { - console.log(`${issueFilename} already exists, bailing`); + console.log('Create pull requests as needed...'); + for (const entry of toreport) { + // Look for a related PR that may still be pending + const issueMoniker = entry.filename.match(/^issues\/(.*)\.md$/)[1]; + const pendingPRStr = execSync(`gh pr list --head ${issueMoniker} --json number,headRefName`, execParams); + const pendingPR = JSON.parse(pendingPRStr)[0]; + if (pendingPR) { + console.log(`- skip ${entry.filename}, a pending PR already exists (#${pendingPR.number}`); continue; - } else { - nolongerRelevantReports.delete(issueFilename); - try { - const existingReport = matter(await fs.readFile(issueFilename, 'utf-8')); - tracked = existingReport.data.Tracked; - existingReportContent = existingReport.content; - // only update tracked or untracked reports based on - // CLI parameter - if ((updateMode === 'update-untracked' && tracked !== 'N/A') || (updateMode === 'update-tracked' && tracked === 'N/A')) { - continue; - } - } catch (e) { - console.error('Failed to parse existing content', e); - continue; - } } - } - } catch (err) { - // Intentionally blank - } - // if not, we create the file, add it in a branch - // and submit it as a pull request to the repo - const { title, content: issueReportContent } = issueWrapper(spec, specAnomalies, anomalyType, crawl); - if (updateMode) { - if (existingReportContent) { - const existingAnomalies = existingReportContent.split('\n').filter(l => l.startsWith('* [ ] ')).map(l => l.slice(6)); - if (existingAnomalies.every((a, i) => specAnomalies[i] === a) && existingAnomalies.length === specAnomalies.length) { - // no substantial change, skip - console.log(`Skipping ${title}, no change`); - continue; + + let issueReport; + if (entry.action === 'add') { + issueReport = matter(await fs.readFile(entry.filename, 'utf-8')); } - } else { - // in update mode, we only care about existing reports - continue; - } - } - const issueReportData = matter(issueReportContent); - issueReportData.data = { - Repo: spec.nightly.repository, - Tracked: tracked, - Title: title - }; - let issueReport; - try { - issueReport = issueReportData.stringify(); - } catch (err) { - console.error(`Failed to stringify report of ${anomalyType} for ${title}: ${err}`, issueReportContent); - continue; - } - if (dryRun) { - console.log(`Would add ${issueFilename} with`); - console.log(issueReport); - console.log(); - } else { - await fs.writeFile(issueFilename, issueReport, 'utf-8'); - try { - if (!noGit) { - console.log(`Committing issue report as ${issueFilename} in branch ${issueMoniker}…`); - execSync(`git checkout -b ${issueMoniker}`); - execSync(`git add ${issueFilename}`); - execSync(`git commit -m "File report on ${issueReportData.data.Title}"`); - needsPush[issueMoniker] = { title: issueReportData.data.Title, report: issueReport, repo: spec.nightly.repository, specTitle: spec.title, uri: spec.crawled }; - console.log('- done'); - execSync(`git checkout ${currentBranch}`); + else { + // File was deleted, retrieve its previous content from the HEAD + issueReport = matter(await execSync(`git show HEAD:${entry.filename}`, execParams)); + } + + console.log(`- create PR for ${entry.filename}`); + execOrLog(`git checkout -b ${issueMoniker}`); + execOrLog(`git add ${entry.filename}`); + execOrLog(`git commit -m "${entry.action === 'add' ? 'File' : 'Delete'} report on ${issueReport.data.Title}"`); + execOrLog(`git push origin ${issueMoniker}`); + + const prBodyFile = path.join(execParams.cwd, '__pr.md') + const prBody = prWrapper(entry.action, issueReport); + await fs.writeFile(prBodyFile, prBody, 'utf8'); + try { + execOrLog(`gh pr create --body-file __pr.md --title "${entry.action === 'add' ? 'File' : 'Delete'} report on ${issueReport.data.Title.replace(/"/g, '')}"`); + } + finally { + await fs.rm(prBodyFile, { force: true }); + } + + reported += 1; + if (options.max > 0 && reported > options.max) { + break; } - } catch (err) { - console.error(`Failed to commit error report for ${spec.title}`, err); - await fs.unlink(issueFilename); - execSync(`git checkout ${currentBranch}`); } } - } -} -if (nolongerRelevantReports.size) { - console.log('The following reports are no longer relevant, deleting them', [...nolongerRelevantReports]); - for (const issueFilename of nolongerRelevantReports) { - await fs.unlink(issueFilename); - } -} -if (Object.keys(needsPush).length) { - let counter = 0; - for (const branch in needsPush) { - if (counter > MAX_PR_BY_RUN) { - delete needsPush[branch]; - continue; + finally { + console.log(`- get back to the initial Git branch ${currentBranch}`); + execOrLog(`git checkout ${currentBranch}`, execParams); + console.log(`- nb PR ${options.dryRun ? 'that would be ' : ''}created: ${reported}`); } + }); - // is there already a pull request targetting that branch? - const { data: pullrequests } = (await octokit.rest.pulls.list({ - owner: repoOwner, - repo: repoName, - head: `${repoOwner}:${branch}` - })); - if (pullrequests.length > 0) { - console.log(`A pull request from branch ${branch} already exists, bailing`); - delete needsPush[branch]; - } - counter++; - } -} -if (Object.keys(needsPush).length) { - console.log(`Pushing new branches ${Object.keys(needsPush).join(' ')}…`); - execSync(`git push origin ${Object.keys(needsPush).join(' ')}`); - console.log('- done'); - for (const branch in needsPush) { - const { title, specTitle, uri, repo, report } = needsPush[branch]; - console.log(`Creating pull request from branch ${branch}…`); - await octokit.rest.pulls.create({ - owner: repoOwner, - repo: repoName, - title, - body: prWrapper(specTitle, uri, repo, report), - head: `${repoOwner}:${branch}`, - base: 'main' - }); - console.log('- done'); - } -} +program.parseAsync(process.argv); \ No newline at end of file diff --git a/strudy.js b/strudy.js index f012089f..6ddb1629 100644 --- a/strudy.js +++ b/strudy.js @@ -334,7 +334,7 @@ ${entry.content} for (const file of todelete) { const filename = path.join(options.issues, file); console.warn(`- delete ${filename}, no more anomalies detected`); - await fs.unlink(filename); + await fs.rm(filename, { force: true }); } } } From 5831dd32589b42a5d575d71fd35193a7d7792456 Mon Sep 17 00:00:00 2001 From: Francois Daoust Date: Thu, 22 Aug 2024 14:20:12 +0200 Subject: [PATCH 5/9] Delete previous CLIs and previous "study crawl" logic Delete all the things! The previous CLIs should no longer be needed as the main Strudy CLI now should take care of everything. The `study-crawl` CLI (and companion `generate-report`) is a sort of exception to the rule: all anomalies it reported are now covered by the CLI, but the CLI could also output a dependencies report, compute a diff between two reports, and generate an HTML report. New Strudy CLI no longer can do any of that. This update drops the files anyway. We could perhaps keep them around, but we haven't maintained the code, and `w3c/webref-analysis` is, I think, the only project that uses it and I propose to shelve that project now that we have, at least theoretically, a more flexible way to create issue reports. The NPM package would only export the main `study` function, because there shouldn't be any need to export the individual study functions such as `studyWebIdl` anymore. We'll have to update Webref tests accordingly. Project dependencies adjusted to drop `node-pandoc`... and `node-fetch` which was no longer being used in any case. --- .github/workflows/file-issue-for-review.yml | 2 +- index.js | 7 +- package.json | 2 - src/cli/check-missing-dfns.js | 588 ----------- src/cli/study-algorithms.js | 59 -- src/cli/study-backrefs.js | 127 --- src/cli/study-webidl.js | 44 - src/lib/generate-report.js | 1020 ------------------- src/lib/study-crawl.js | 412 -------- 9 files changed, 4 insertions(+), 2257 deletions(-) delete mode 100644 src/cli/check-missing-dfns.js delete mode 100644 src/cli/study-algorithms.js delete mode 100644 src/cli/study-backrefs.js delete mode 100644 src/cli/study-webidl.js delete mode 100644 src/lib/generate-report.js delete mode 100644 src/lib/study-crawl.js diff --git a/.github/workflows/file-issue-for-review.yml b/.github/workflows/file-issue-for-review.yml index 8607d846..0a4d5ea7 100644 --- a/.github/workflows/file-issue-for-review.yml +++ b/.github/workflows/file-issue-for-review.yml @@ -26,7 +26,7 @@ jobs: run: | git config user.name "strudy-bot" git config user.email "<>" - git remote set-url --push origin https://x-access-token:${{ secrets.GITHUB_TOKEN }}@github.com/$GITHUB_REPOSITORY + git remote set-url --push origin https://x-access-token:${{ secrets.ISSUE_REPORT_GH_TOKEN }}@github.com/$GITHUB_REPOSITORY working-directory: strudy - name: Run Strudy to detect new anomalies working-directory: strudy diff --git a/index.js b/index.js index 42252811..058ea8c0 100644 --- a/index.js +++ b/index.js @@ -1,8 +1,7 @@ -import studyCrawl from './src/lib/study-crawl.js'; +import study from './src/lib/study.js'; import studyWebIdl from './src/lib/study-webidl.js'; -import generateReport from './src/lib/generate-report.js'; -export { studyCrawl, studyWebIdl, generateReport }; +export { study, studyWebIdl }; -const strudy = { studyCrawl, studyWebIdl, generateReport }; +const strudy = { study, studyWebIdl }; export default strudy; diff --git a/package.json b/package.json index 53a82979..e59bd6d0 100644 --- a/package.json +++ b/package.json @@ -41,8 +41,6 @@ "commander": "12.1.0", "gray-matter": "^4.0.3", "jsdom": "^24.1.1", - "node-fetch": "^2.6.5", - "node-pandoc": "0.3.0", "reffy": "^17.1.1", "semver": "^7.3.5", "webidl2": "^24.2.2" diff --git a/src/cli/check-missing-dfns.js b/src/cli/check-missing-dfns.js deleted file mode 100644 index be298a75..00000000 --- a/src/cli/check-missing-dfns.js +++ /dev/null @@ -1,588 +0,0 @@ -#!/usr/bin/env node -/** - * The definitions checker compares CSS, dfns, and IDL extracts created by Reffy - * to detect CSS/IDL terms that do not have a corresponding dfn in the - * specification. - * - * The definitions checker can be called directly through: - * - * `node check-missing-dfns.js [crawl report] [spec] [format]` - * - * where: - * - `crawl report` is the local path to the root folder that contains the - * `index.json` and the extracts (e.g. `reports/ed`) - * - `spec` is the optional shortname of the specification on which to focus or - * `all` (default) to check all specs - * - `format` is the optional output format. Either `json` or `markdown` with - * `markdown` being the default. - * - * Note: CSS extraction already relies on dfns and reports missing dfns in a - * "warnings" property. This checker simply looks at that list. - * - * @module checker - */ - -import path from 'node:path'; -import { fileURLToPath } from 'node:url'; -import loadJSON from '../lib/load-json.js'; - - -/** - * List of spec shortnames that, so far, don't follow the dfns data model - */ -const specsWithObsoleteDfnsModel = [ - 'svg-animations', 'svg-markers', 'svg-strokes', 'SVG2', - 'webgl1', 'webgl2', - 'webrtc-identity' -]; - - -/** - * Return true when provided arrays are "equal", meaning that they contain the - * same items - * - * @function - * @private - * @param {Array} a First array to compare - * @param {Array} b Second array to compare - * @return {boolean} True when arrays are equal - */ -function arraysEqual(a, b) { - return Array.isArray(a) && - Array.isArray(b) && - a.length === b.length && - a.every((val, index) => val === b[index]); -} - - -/** - * Return the list of expected definitions from the CSS extract - * - * @function - * @private - * @param {Object} css The root of the object that describes CSS terms in the - * CSS extract - * @return {Array} An array of expected definitions - */ -function getExpectedDfnsFromCSS(css) { - const expected = (css.warnings ?? []) - .filter(warning => warning.msg === 'Missing definition') - .map(warning => { - return { - linkingText: [warning.name], - type: warning.type, - 'for': warning.for - }; - }); - - return expected; -} - - -/** - * Return true when the given CSS definition matches the expected definition - * - * @function - * @private - * @param {Object} expected Expected definition - * @param {Object} actual Actual definition to check - * @return {Boolean} true when actual definition matches the expected one - */ -function matchCSSDfn(expected, actual) { - return arraysEqual(expected.linkingText, actual.linkingText) && - (!expected.for || arraysEqual(expected.for, actual.for)) && - (!expected.type || (expected.type === actual.type)); -} - - -/** - * Return the list of expected definitions from the IDL extract - * - * @function - * @private - * @param {Object} css The root of the object that describes IDL terms in the - * `idlparsed` extract. - * @return {Array} An array of expected definitions - */ -function getExpectedDfnsFromIdl(idl = {}) { - // Parse IDL names that the spec defines - const idlNames = Object.values(idl.idlNames || {}); - let expected = idlNames.map(name => getExpectedDfnsFromIdlDesc(name)).flat(); - - // Parse members of IDL names that the spec extends - const idlExtendedNames = Object.values(idl.idlExtendedNames || {}); - expected = expected.concat(idlExtendedNames.map(extended => - extended.map(name => getExpectedDfnsFromIdlDesc(name, { excludeRoot: true }))) - .flat(2)); - return expected; -} - - -/** - * Return true if the given parsed IDL object describes a default toJSON - * operation that references: - * https://heycam.github.io/webidl/#default-tojson-steps - * - * @function - * @private - * @param {Object} desc Parsed IDL object to check - * @return {Boolean} true when object describes a default toJSON operation. - */ -function isDefaultToJSONOperation(desc) { - return (desc.type === 'operation') && - (desc.name === 'toJSON') && - (desc.extAttrs && desc.extAttrs.find(attr => attr.name === "Default")); -} - - -/** - * Return the expected definition for the given parsed IDL structure - * - * @function - * @public - * @param {Object} desc The object that describes the IDL term in the - * `idlparsed` extract. - * @param {Object} parentDesc (optional) The object that describes the parent - * IDL term of the term to parse (used to compute the `for` property). - * @return {Object} The expected definition, or null if no expected definition - * is defined. - */ -function getExpectedDfnFromIdlDesc(idl, parentIdl) { - function serializeArgs(args = []) { - return args - .map(arg => arg.variadic ? `...${arg.name}` : arg.name) - .join(', '); - } - - let expected = { - linkingText: [idl.name], - type: idl.type, - 'for': parentIdl && (parentIdl !== idl) ? [parentIdl.name] : [] - }; - - switch (idl.type) { - case 'attribute': - case 'const': - break; - - case 'constructor': - // Ignore constructors for HTML elements, the spec has a dedicated - // section for them: - // https://html.spec.whatwg.org/multipage/dom.html#html-element-constructors - if (!parentIdl.name.startsWith('HTML')) { - expected.linkingText = [`constructor(${serializeArgs(idl.arguments)})`]; - } - else { - expected = null; - } - break; - - case 'enum': - break; - - case 'enum-value': - // The enumeration could include the empty string as a value. There - // cannot be a matching definition in that case. - // Note: look for the quoted value and the unquoted value - const value = idl.value.replace(/^"(.*)"$/, '$1'); - expected.linkingText = (value !== '') ? [`"${value}"`, value] : [`"${value}"`]; - break; - - case 'field': - expected.type = 'dict-member'; - break; - - case 'callback': - case 'callback interface': - case 'dictionary': - case 'interface': - case 'interface mixin': - case 'namespace': - expected.type = - (idl.type === 'callback interface') ? 'callback' : - (idl.type === 'interface mixin') ? 'interface' : - idl.type; - // Ignore partial definition - if (idl.partial) { - expected = null; - } - break; - - case 'includes': - expected = null; - break; - - case 'iterable': - case 'maplike': - case 'setlike': - // No definition expected for iterable, maplike and setlike members - expected = null; - break; - - case 'operation': - // Stringification behavior is typically defined with a - // "stringification behavior" definition scoped to the interface - if (idl.special === 'stringifier') { - expected.linkingText = ['stringification behavior', 'stringificationbehavior']; - expected.type = 'dfn'; - } - // Ignore special "getter", "setter", "deleter" operations when they don't - // have an identifier. They should link to a definition in the prose, but - // the labels seem arbitrary for now. - // Also ignore default toJSON operations. Steps are defined in WebIDL. - else if ((idl.name || - ((idl.special !== 'getter') && - (idl.special !== 'setter') && - (idl.special !== 'deleter'))) && - !isDefaultToJSONOperation(idl)) { - expected.linkingText = [`${idl.name}(${serializeArgs(idl.arguments)})`]; - expected.type = 'method'; - } - else { - expected = null; - } - break; - - case 'typedef': - break; - - case 'argument': - expected = null; - break; - - default: - console.warn('Unsupported IDL type', idl.type, idl); - expected = null; - break; - } - - return expected; -} - - -/** - * Return the list of expected definitions from a parsed IDL extract entry. - * - * The function is recursive. - * - * @function - * @private - * @param {Object} idl The object that describes the IDL term in the - * `idlparsed` extract. - * @return {Array} An array of expected definitions - */ -function getExpectedDfnsFromIdlDesc(idl, {excludeRoot} = {excludeRoot: false}) { - const res = []; - const parentIdl = idl; - const idlToProcess = excludeRoot ? [] : [idl]; - - switch (idl.type) { - case 'enum': - if (idl.values) { - idlToProcess.push(...idl.values); - } - break; - - case 'callback': - case 'callback interface': - case 'dictionary': - case 'interface': - case 'interface mixin': - case 'namespace': - if (idl.members) { - idlToProcess.push(...idl.members); - } - break; - } - - idlToProcess.forEach(idl => { - const expected = getExpectedDfnFromIdlDesc(idl, parentIdl); - if (expected) { - expected.access = 'public'; - expected.informative = false; - res.push(expected); - } - }); - - return res; -} - - -/** - * Return true when the given IDL definition matches the expected definition. - * - * The function handles overloaded methods, though not properly. That is, it - * will only find the "right" definition for an overloaded method if the number - * and/or the name of the arguments differ between the overloaded definitions. - * Otherwise it will just match the first definition that looks good. - * - * The function works around Respec's issue #3200 for methods and constructors - * that take only optional parameters: - * https://github.com/w3c/respec/issues/3200 - * - * @function - * @private - * @param {Object} expected Expected definition - * @param {Object} actual Actual definition to check - * @param {Object} options Comparison options - * @return {Boolean} true when actual definition matches the expected one - */ -function matchIdlDfn(expected, actual, - {skipArgs, skipFor, skipType} = {skipArgs: false, skipFor: false, skipType: false}) { - const fixedLt = actual.linkingText - .map(lt => lt.replace(/!overload-\d/, '')) - .map(lt => lt.replace(/\(, /, '(')); - let found = expected.linkingText.some(val => fixedLt.includes(val)); - if (!found && skipArgs) { - const names = fixedLt.map(lt => lt.replace(/\(.*\)/, '')); - found = expected.linkingText.some(val => { - const valname = val.replace(/\(.*\)/, ''); - return names.find(name => name === valname); - }); - } - return found && - (expected.for.every(val => actual.for.includes(val)) || skipFor) && - (expected.type === actual.type || skipType); -} - - -/** - * Checks the CSS and IDL extracts against the dfns extract for the given spec - * - * @function - * @public - * @param {Object} spec Crawl result for the spec to parse - * @param {String} options Check options. Set the rootFolder property to the - * root folder against which to resolve relative paths to load CSS/IDL - * extracts (only needed if the extracts have not yet been loaded and attached - * to the spec object). Set the includeObsolete property to true to include - * detailed results about specs that use an obsolete dfns data model. - * @return {Object} An object with a css and idl property, each of them holding - * an array of missing CSS or IDL definitions. The function returns null when - * there are no missing definitions. - */ -async function checkSpecDefinitions(spec, options = {}) { - if (!options.includeObsolete && specsWithObsoleteDfnsModel.includes(spec.shortname)) { - return { obsoleteDfnsModel: true }; - } - - const dfns = (typeof spec.dfns === "string") ? - (await loadJSON(path.resolve(options.rootFolder, spec.dfns))).dfns : - (spec.dfns || []); - const css = (typeof spec.css === "string") ? - (await loadJSON(path.resolve(options.rootFolder, spec.css))) : - (spec.css || {}); - const idl = (typeof spec.idlparsed === "string") ? - (await loadJSON(path.resolve(options.rootFolder, spec.idlparsed))).idlparsed : - spec.idlparsed; - - // Make sure that all expected CSS definitions exist in the dfns extract - const expectedCSSDfns = getExpectedDfnsFromCSS(css); - const missingCSSDfns = expectedCSSDfns.map(expected => { - let actual = dfns.find(dfn => matchCSSDfn(expected, dfn)); - if (!actual && !expected.type) { - // Right definition is missing. For valuespaces that define functions, - // look for a function definition without the enclosing "<>" instead - const altText = [expected.linkingText[0].replace(/^<(.*)\(\)>$/, '$1()')]; - actual = dfns.find(dfn => arraysEqual(altText, dfn.linkingText)); - } - if (!actual && expected.value) { - // Still missing? For valuespaces that define functions, this may be - // because there is no definition without parameters, try to find the - // actual value instead - actual = dfns.find(dfn => arraysEqual([expected.value], dfn.linkingText)); - } - if (actual) { - // Right definition found - return null; - } - else { - // Right definition is missing, there may be a definition that looks - // like the one we're looking for - const found = dfns.find(dfn => - arraysEqual(dfn.linkingText, expected.linkingText)); - return { expected, found }; - } - }).filter(missing => !!missing); - - // Make sure that all expected IDL definitions exist in the dfns extract - const expectedIdlDfns = getExpectedDfnsFromIdl(idl); - const missingIdlDfns = expectedIdlDfns.map(expected => { - let actual = dfns.find(dfn => matchIdlDfn(expected, dfn)); - if (actual) { - // Right definition found - return null; - } - else { - // Right definition is missing, include the interface's definitions to - // be able to link to it in the report - let parent = null; - if (expected.for && expected.for[0]) { - parent = dfns.find(dfn => - (dfn.linkingText[0] === expected.for[0]) && - ['callback', 'dictionary', 'enum', 'interface', 'namespace'].includes(dfn.type)); - } - - // Look for a definition that seems as close as possible to the one - // we're looking for, in the following order: - // 1. For operations, find a definition without taking arguments into - // account and report possible match with a "warning" flag. - // 2. For terms linked to a parent interface-like object, find a match - // scoped to the same parent without taking the type into account. - // 3. Look for a definition with the same name, neither taking the type - // nor the parent into account. - let found = dfns.find(dfn => matchIdlDfn(expected, dfn, { skipArgs: true })); - if (found) { - return { expected, found, for: parent, warning: true }; - } - found = dfns.find(dfn => matchIdlDfn(expected, dfn, - { skipArgs: true, skipType: true })); - if (found) { - return { expected, found, for: parent }; - } - found = dfns.find(dfn => matchIdlDfn(expected, dfn, - { skipArgs: true, skipType: true, skipFor: true })); - return { expected, found, for: parent }; - } - }).filter(missing => !!missing); - - // Report results - return { - css: missingCSSDfns, - idl: missingIdlDfns - }; -} - - -/** - * Checks the CSS and IDL extracts against the dfns extract for all specs in - * the report. - * - * @function - * @public - * @param {String} pathToReport Path to the root folder that contains the - * `index.json` report file and the extracts subfolders. - * @param {Object} options Check options. Set the "shortname" property to a - * spec's shortname to only check that spec. - * @return {Array} The list of specifications along with dfn problems that have - * been identified. Each entry has `url`, 'crawled`, `shortname` properties to - * identify the specification, and a `missing` property that is an object that - * may have `css` and `idl` properties which list missing CSS/IDL definitions. - */ -async function checkDefinitions(pathToReport, options = {}) { - const rootFolder = path.resolve(process.cwd(), pathToReport); - const index = (await loadJSON(path.resolve(rootFolder, 'index.json'))).results; - - // Check all dfns against CSS and IDL extracts - const checkOptions = { - rootFolder, - includeObsolete: !!options.shortname - }; - const missing = await Promise.all( - index - .filter(spec => !options.shortname || spec.shortname === options.shortname) - .map(async spec => { - const res = { - url: spec.url, - crawled: spec.crawled, - shortname: spec.shortname, - }; - if (!spec.dfns) { - return res; - } - res.missing = await checkSpecDefinitions(spec, checkOptions); - return res; - }) - ); - - return missing; -} - - -/** - * Report missing dfn to the console as Markdown - * - * @function - * @private - * @param {Object} missing Object that describes missing dfn - */ -function reportMissing(missing) { - const exp = missing.expected; - const found = missing.found; - const foundFor = (found && found.for && found.for.length > 0) ? - ' for ' + found.for.map(f => `\`${f}\``).join(',') : - ''; - console.log(`- \`${exp.linkingText[0]}\` ${exp.type ? `with type \`${exp.type}\`` : ''}` + - (missing.for ? ` for [\`${missing.for.linkingText[0]}\`](${missing.for.href})` : '') + - (found ? `, but found [\`${found.linkingText[0]}\`](${found.href}) with type \`${found.type}\`${foundFor}` : '')); -} - - -/************************************************** -Export methods for use as module -**************************************************/ -export { - checkSpecDefinitions, - checkDefinitions, - - // "Inner" functions that the IDL names generator uses to link IDL terms with - // their definition (see generate-idlnames.js) - getExpectedDfnFromIdlDesc, - matchIdlDfn -} - - -/************************************************** -Code run if the code is run as a stand-alone module -**************************************************/ -if (process.argv[1] === fileURLToPath(import.meta.url)) { - const pathToReport = process.argv[2]; - const shortname = process.argv[3] || 'all'; - const format = process.argv[4] || 'markdown'; - - const options = (shortname === 'all') ? undefined : { shortname }; - let res = await checkDefinitions(pathToReport, options); - if (shortname === 'all') { - res = res - .filter(result => result.missing && - !result.missing.obsoleteDfnsModel && - ((result.missing.css.length > 0) || (result.missing.idl.length > 0))); - } - - if (format === 'json') { - console.log(JSON.stringify(res, null, 2)); - } - else { - res.forEach(result => { - const missing = result.missing || {css: [], idl: []}; - const errors = ['css', 'idl'] - .map(type => result.missing[type].filter(missing => !missing.warning)) - .flat(); - const warnings = ['css', 'idl'] - .map(type => result.missing[type].filter(missing => missing.warning)) - .flat(); - console.log('
'); - console.log(`${result.shortname} (${errors.length} errors, ${warnings.length} warnings)`); - console.log(); - if (errors.length === 0 && warnings.length === 0) { - console.log('All good!'); - } - if (errors.length > 0) { - console.log('
'); - console.log(`Errors (${errors.length})`); - console.log(); - errors.forEach(reportMissing); - console.log('
'); - } - if (warnings.length > 0) { - console.log('
'); - console.log(`Warnings (${warnings.length})`); - console.log(); - warnings.forEach(reportMissing); - console.log('
'); - } - console.log('
'); - console.log(); - }) - } -} \ No newline at end of file diff --git a/src/cli/study-algorithms.js b/src/cli/study-algorithms.js deleted file mode 100644 index 39844990..00000000 --- a/src/cli/study-algorithms.js +++ /dev/null @@ -1,59 +0,0 @@ -#!/usr/bin/env node - -import { loadCrawlResults } from '../lib/util.js'; -import studyAlgorithms from '../lib/study-algorithms.js'; -import loadJSON from '../lib/load-json.js'; -import { expandCrawlResult } from 'reffy'; -import path from 'node:path'; - -function reportToConsole(results) { - const toreport = []; - for (const anomaly of results) { - const spec = anomaly.specs[0]; - let entry = toreport.find(entry => entry.spec.shortname === spec.shortname); - if (!entry) { - entry = { spec, anomalies: [] }; - toreport.push(entry); - } - entry.anomalies.push(anomaly); - } - toreport.sort((entry1, entry2) => { - return entry1.spec.title.localeCompare(entry2.spec.title); - }); - for (const entry of toreport) { - const spec = entry.spec; - console.log(`- [${spec.title}](${spec.nightly?.url ?? spec.url})`); - for (const anomaly of entry.anomalies) { - console.log(` - ${anomaly.message}`); - } - } -} - -async function main(crawlPath, anomalyType) { - // Target the index file if needed - if (!crawlPath.endsWith('index.json')) { - crawlPath = path.join(crawlPath, 'index.json'); - } - - const crawl = await loadJSON(crawlPath); - if (!crawl) { - throw new Error("Impossible to read " + crawlPath); - } - - const expanded = await expandCrawlResult(crawl, crawlPath.replace(/index\.json$/, ''), ['algorithms']); - const report = studyAlgorithms(expanded.results); - reportToConsole(report); -} - -/************************************************** -Main loop -**************************************************/ -const crawlPath = process.argv[2]; -if (!crawlPath) { - console.error('Web IDL analyzer must be called with a paths to crawl results as first parameter'); - process.exit(2); -} -main(crawlPath).catch(e => { - console.error(e); - process.exit(3); -}); diff --git a/src/cli/study-backrefs.js b/src/cli/study-backrefs.js deleted file mode 100644 index ac985ae1..00000000 --- a/src/cli/study-backrefs.js +++ /dev/null @@ -1,127 +0,0 @@ -#!/usr/bin/env node -/** - * The backrefs analyzer takes links to a ED crawl folder and a TR crawl folder, - * and creates a report that lists, for each spec: - * - * - Links to anchors that do not exist - * - Links to anchors that no longer exist in the ED of the target spec - * - Links to anchors that are not definitions or headings - * - Links to definitions that are not exported - * - Links to dated TR URLs - * - Links to specs that should no longer be referenced - * - Links to documents that look like specs but are unknown in Reffy - * (likely not an anomaly per se) - * - * It also flags links that look like specs but that do not appear in the crawl - * (most of these should be false positives). - * - * The backrefs analyzer can be called directly through: - * - * `node study-backrefs.js [root crawl folder]` - * - * where `root crawl folder` is the path to the root folder that contains `ed` - * and `tr` subfolders. Alternatively, the analyzer may be called with two - * arguments, one being the path to the ED crawl folder, another being the path - * to the TR crawl folder. - * - * @module backrefs - */ - -import { loadCrawlResults } from '../lib/util.js'; -import studyBackrefs from '../lib/study-backrefs.js'; -import path from 'node:path'; - -function reportToConsole(results) { - for (const anomaly of results) { - anomaly.specs = anomaly.specs.map(spec => { - return { shortname: spec.shortname, url: spec.url, title: spec.title }; - }); - } - const perSpec = {}; - for (const anomaly of results) { - for (const spec of anomaly.specs) { - if (!perSpec[spec.url]) { - perSpec[spec.url] = { spec, anomalies: [] }; - } - perSpec[spec.url].anomalies.push(anomaly); - } - } - - const anomalyTypes = [ - { name: 'brokenLinks', title: 'Links to anchors that do not exist' }, - { name: 'evolvingLinks', title: 'Links to anchors that no longer exist in the editor draft of the target spec' }, - { name: 'notDfn', title: 'Links to anchors that are not definitions or headings' }, - { name: 'notExported', title: 'Links to definitions that are not exported' }, - { name: 'datedUrls', title: 'Links to dated TR URLs' }, - { name: 'outdatedSpecs', title: 'Links to specs that should no longer be referenced' }, - { name: 'unknownSpecs', title: 'Links to documents that are not recognized as specs' } - ]; - let report = ''; - Object.keys(perSpec) - .sort((url1, url2) => perSpec[url1].spec.title.localeCompare(perSpec[url2].spec.title)) - .forEach(url => { - const spec = perSpec[url].spec; - const anomalies = perSpec[url].anomalies; - report += `
${spec.title}\n\n`; - for (const type of anomalyTypes) { - const links = anomalies - .filter(anomaly => anomaly.name === type.name) - .map(anomaly => anomaly.message); - if (links.length > 0) { - report += `${type.title}:\n`; - for (const link of links) { - report += `* ${link}\n`; - } - report += '\n\n'; - } - } - report += '
\n'; - }); - console.log(report); -} - - -/************************************************** -Main loop -**************************************************/ -let edCrawlResultsPath = process.argv[2]; -let trCrawlResultsPath = process.argv[3]; - -if (!edCrawlResultsPath) { - console.error('Backrefs analyzer must be called with a paths to crawl results as first parameter'); - process.exit(2); -} - -// If only one argument is provided, consider that it is the path to the -// root folder of a crawl results, with "ed" and "tr" subfolders -if (!trCrawlResultsPath) { - trCrawlResultsPath = path.join(edCrawlResultsPath, 'tr'); - edCrawlResultsPath = path.join(edCrawlResultsPath, 'ed'); -} - -// Target the index file if needed -if (!edCrawlResultsPath.endsWith('index.json')) { - edCrawlResultsPath = path.join(edCrawlResultsPath, 'index.json'); -} -if (!trCrawlResultsPath.endsWith('index.json')) { - trCrawlResultsPath = path.join(trCrawlResultsPath, 'index.json'); -} - -// Analyze the crawl results -loadCrawlResults(edCrawlResultsPath, trCrawlResultsPath) - .then(async crawl => { - // Donwload automatic map of multipages anchors in HTML spec - let htmlFragments = {}; - try { - htmlFragments = await fetch("https://html.spec.whatwg.org/multipage/fragment-links.json").then(r => r.json()); - } catch (err) { - console.warn("Could not fetch HTML fragments data, may report false positive broken links on HTML spec", err); - } - return { crawl, htmlFragments }; - }) - .then(({ crawl, htmlFragments }) => studyBackrefs(crawl.ed, crawl.tr, htmlFragments)) - .then(reportToConsole) - .catch(e => { - console.error(e); - process.exit(3); - }); diff --git a/src/cli/study-webidl.js b/src/cli/study-webidl.js deleted file mode 100644 index 7d396085..00000000 --- a/src/cli/study-webidl.js +++ /dev/null @@ -1,44 +0,0 @@ -#!/usr/bin/env node - -import { loadCrawlResults } from '../lib/util.js'; -import studyWebIdl from '../lib/study-webidl.js'; -import loadJSON from '../lib/load-json.js'; -import { expandCrawlResult } from 'reffy'; -import path from 'node:path'; - - -function reportToConsole(results) { - results.forEach(anomaly => anomaly.specs = anomaly.specs.map(spec => { - return { shortname: spec.shortname, url: spec.url }; - })); - console.log(JSON.stringify(results, null, 2)); -} - -async function main(crawlPath) { - // Target the index file if needed - if (!crawlPath.endsWith('index.json')) { - crawlPath = path.join(crawlPath, 'index.json'); - } - - const crawl = await loadJSON(crawlPath); - if (!crawl) { - throw new Error("Impossible to read " + crawlPath); - } - - const expanded = await expandCrawlResult(crawl, crawlPath.replace(/index\.json$/, ''), 'idl'); - const report = studyWebIdl(expanded.results); - reportToConsole(report); -} - -/************************************************** -Main loop -**************************************************/ -const crawlPath = process.argv[2]; -if (!crawlPath) { - console.error('Web IDL analyzer must be called with a paths to crawl results as first parameter'); - process.exit(2); -} -main(crawlPath).catch(e => { - console.error(e); - process.exit(3); -}); diff --git a/src/lib/generate-report.js b/src/lib/generate-report.js deleted file mode 100644 index 349a5f6c..00000000 --- a/src/lib/generate-report.js +++ /dev/null @@ -1,1020 +0,0 @@ -/** - * The Markdown report generator takes an anomalies report as input and - * generates a human-readable report in Markdown out of it. Depending on - * parameters, the generated report may be a report per spec, a report per - * issue, a dependencies report, or a diff report. - * - * @module markdownGenerator - */ - -import loadJSON from './load-json.js'; - - -/** - * Compares specs for ordering by title - */ -const byTitle = (a, b) => a.title.toUpperCase().localeCompare(b.title.toUpperCase()); - -/** - * Returns true when two arrays are equal - */ -const arrayEquals = (a, b, prop) => - (a.length === b.length) && - a.every(item => !!(prop ? b.find(i => i[prop] === item[prop]) : b.find(i => i === item))); - -/** - * Options for date formatting - */ -const dateOptions = { - day: '2-digit', - month: 'long', - year: 'numeric' -}; - -const toSlug = name => name.replace(/([A-Z])/g, s => s.toLowerCase()) - .replace(/[^a-z0-9]/g, '_') - .replace(/_+/g, '_'); - -/** - * Helper function that outputs main crawl info about a spec - * - * @function - */ -function writeCrawlInfo(spec, withHeader, w) { - let wres = ''; - w = w || (msg => wres += (msg || '') + '\n'); - - if (withHeader) { - w('#### Spec info {.info}'); - } - else { - w('Spec info:'); - } - w(); - - let crawledUrl = spec.crawled || spec.latest; - w('- Initial URL: [' + spec.url + '](' + spec.url + ')'); - w('- Crawled URL: [' + crawledUrl + '](' + crawledUrl + ')'); - if (spec.date) { - w('- Crawled version: ' + spec.date); - } - if (spec.nightly) { - w('- Editor\'s Draft: [' + spec.nightly.url + '](' + spec.nightly.url + ')'); - } - if (spec.release) { - w('- Latest published version: [' + spec.release.url + '](' + spec.release.url + ')'); - } - if (spec.repository) { - let githubcom = spec.repository.match(/^https:\/\/github.com\/([^\/]*)\/([^\/]*)/); - let repositoryName = spec.repository; - if (githubcom) { - repositoryName = 'GitHub ' + githubcom[1] + '/' + githubcom[2]; - } - w('- Repository: [' + repositoryName + '](' + spec.repository + ')'); - } - w('- Shortname: ' + (spec.shortname || 'no shortname')); - return wres; -} - - -function writeDependenciesInfo(spec, results, withHeader, w) { - let wres = ''; - w = w || (msg => wres += (msg || '') + '\n'); - - if (withHeader) { - w('#### Known dependencies on this specification {.dependencies}'); - w(); - } - - if (spec.report.referencedBy.normative.length > 0) { - w('Normative references to this spec from:'); - w(); - spec.report.referencedBy.normative.forEach(s => { - w('- [' + s.title + '](' + s.crawled + ')'); - }); - } - else { - w('No normative reference to this spec from other specs.'); - } - w(); - - // Check the list of specifications that should normatively reference - // this specification because they use IDL content it defines. - let shouldBeReferencedBy = results.filter(s => - s.report.missingWebIdlRef && - s.report.missingWebIdlRef.find(i => - i.refs.find(ref => (ref.url === spec.url)))); - if (shouldBeReferencedBy.length > 0) { - w('Although they do not, the following specs should also normatively' + - ' reference this spec because they use IDL terms it defines:'); - w(); - shouldBeReferencedBy.forEach(s => { - w('- [' + s.title + '](' + s.crawled + ')'); - }); - w(); - } - - if (spec.report.referencedBy.informative.length > 0) { - w('Informative references to this spec from:'); - w(); - spec.report.referencedBy.informative.forEach(s => { - w('- [' + s.title + '](' + s.crawled + ')'); - }); - } - else { - w('No informative reference to this spec from other specs.'); - } - return wres; -} - -/** - * Outputs a human-readable Markdown anomaly report from a crawl report, - * with one entry per spec. - * - * The function spits the report to the console. - * - * @function - */ -function generateReportPerSpec(study) { - var count = 0; - let wres = ''; - const w = msg => wres += (msg || '') + '\n'; - const results = study.results; - - w('% ' + (study.title || 'Web specs analysis')); - w('% Strudy'); - w('% ' + (new Date(study.date)).toLocaleDateString('en-US', dateOptions)); - w(); - - const specReport = spec => { - // Prepare anomaly flags - let flags = ['spec']; - if (spec.report.error) { - flags.push('error'); - } - else { - if (!spec.report.ok) { - flags.push('anomaly'); - } - flags = flags.concat(Object.keys(spec.report) - .filter(anomaly => (anomaly !== 'referencedBy')) - .filter(anomaly => (Array.isArray(spec.report[anomaly]) ? - (spec.report[anomaly].length > 0) : - !!spec.report[anomaly]))); - } - let attr = flags.reduce((res, anomaly) => - res + (res ? ' ' : '') + 'data-' + anomaly + '=true', ''); - - w('### ' + spec.title + ' {' + attr + '}'); - w(); - writeCrawlInfo(spec, true, w); - w(); - - const report = spec.report; - w('#### Potential issue(s) {.anomalies}'); - w(); - if (report.ok) { - w('This specification looks good!'); - } - else if (report.error) { - w('The following network or parsing error occurred:'); - w('`' + report.error + '`'); - w(); - w('Reffy could not render this specification as a DOM tree and' + - ' cannot say anything about it as a result. In particular,' + - ' it cannot include content defined in this specification' + - ' in the analysis of other specifications crawled in this' + - ' report.'); - } - else { - if (report.noNormativeRefs) { - w('- No normative references found'); - } - if (report.hasInvalidIdl) { - w('- Invalid WebIDL content found'); - } - if (report.hasObsoleteIdl) { - w('- Obsolete WebIDL constructs found'); - } - if (report.noRefToWebIDL) { - w('- Spec uses WebIDL but does not reference it normatively'); - } - if (report.unknownExposedNames && - (report.unknownExposedNames.length > 0)) { - w('- Unknown [Exposed] names used: ' + - report.unknownExposedNames.map(name => '`' + name + '`').join(', ')); - } - if (report.unknownIdlNames && - (report.unknownIdlNames.length > 0)) { - w('- Unknown WebIDL names used: ' + - report.unknownIdlNames.map(name => '`' + name + '`').join(', ')); - } - if (report.redefinedIdlNames && - (report.redefinedIdlNames.length > 0)) { - w('- WebIDL names also defined elsewhere: '); - report.redefinedIdlNames.map(i => { - w(' * `' + i.name + '` also defined in ' + - i.refs.map(ref => ('[' + ref.title + '](' + ref.crawled + ')')).join(' and ')); - }); - } - if (report.missingWebIdlRef && - (report.missingWebIdlRef.length > 0)) { - w('- Missing references for WebIDL names: '); - report.missingWebIdlRef.map(i => { - w(' * `' + i.name + '` defined in ' + - i.refs.map(ref => ('[' + ref.title + '](' + ref.crawled + ')')).join(' or ')); - }); - } - [ - {prop: 'css', warning: false, title: 'No definition for CSS constructs'}, - {prop: 'idl', warning: false, title: 'No definition for IDL constructs'}, - {prop: 'css', warning: true, title: 'Possibly no definition for CSS constructs'}, - {prop: 'idl', warning: true, title: 'Possibly no definition for IDL constructs'} - ].forEach(type => { - if (report.missingDfns && report.missingDfns[type.prop] && - (report.missingDfns[type.prop].filter(r => !!r.warning === type.warning).length > 0)) { - w('- ' + type.title + ': '); - report.missingDfns[type.prop].filter(r => !!r.warning === type.warning).map(missing => { - const exp = missing.expected; - const found = missing.found; - const foundFor = (found && found.for && found.for.length > 0) ? - ' for ' + found.for.map(f => '`' + f + '`').join(',') : - ''; - w(' * `' + exp.linkingText[0] + '`' + - (exp.type ? ' with type `' + exp.type + '`' : '') + - (missing.for ? ' for [`' + missing.for.linkingText[0] + '`](' + missing.for.href + ')' : '') + - (found ? ', but found [`' + found.linkingText[0] + '`](' + found.href + ') with type `' + found.type + '`' + foundFor : '')); - }); - } - }); - if (report.missingLinkRef && - (report.missingLinkRef.length > 0)) { - w('- Missing references for links: '); - report.missingLinkRef.map(l => { - w(' * [`' + l + '`](' + l + ')'); - }); - } - if (report.inconsistentRef && - (report.inconsistentRef.length > 0)) { - w('- Inconsistent references for links: '); - report.inconsistentRef.map(l => { - w(' * [`' + l.link + '`](' + l.link + '), related reference "' + l.ref.name + '" uses URL [`' + l.ref.url + '`](' + l.ref.url + ')'); - }); - } - if (report.xrefs) { - [ - { prop: 'notExported', title: 'External links to private terms' }, - { prop: 'notDfn', title: 'External links that neither target definitions nor headings' }, - { prop: 'brokenLinks', title: 'Broken external links' }, - { prop: 'evolvingLinks', title: 'External links to terms that no longer exist in the latest version of the targeted specification' }, - { prop: 'outdatedSpecs', title: 'External links to outdated specs' }, - { prop: 'datedUrls', title: 'External links that use a dated URL' } - ].forEach(type => { - if (report.xrefs[type.prop] && (report.xrefs[type.prop].length > 0)) { - w('- ' + type.title + ':'); - report.xrefs[type.prop].map(l => { - w(' * [`' + l + '`](' + l + ')'); - }) - } - }); - } - } - w(); - writeDependenciesInfo(spec, results, true, w); - w(); - w(); - }; - - - const orgs = [...new Set(study.results.map(r => r.organization))].sort(); - for (let org of orgs) { - w(`# ${org} {#org-${toSlug(org)}}`); - w(); - const groups = [...new Set(study.results.filter(r => r.organization === org).map(r => r.groups.map(g => g.name)).flat())].sort(); - for (let group of groups) { - w(`## ${group} {#group-${toSlug(group)}}`); - w(); - study.results - .filter(r => r.organization === org && r.groups.find(g => g.name === group)) - .forEach(specReport); - } - } - - w(); - w(); - - return wres; -} - - -/** - * Outputs a human-readable Markdown anomaly report from a crawl report, - * sorted by type of anomaly. - * - * The function spits the report to the console. - * - * @function - */ -function generateReportPerIssue(study) { - let wres = ''; - const w = msg => wres += (msg || '') + '\n'; - - let count = 0; - let results = study.results; - - w('% ' + (study.title || 'Web specs analysis')); - w('% Strudy'); - w('% ' + (new Date(study.date)).toLocaleDateString('en-US', dateOptions)); - w(); - - count = results.length; - w('' + count + ' specification' + ((count > 1) ? 's' : '') + ' were crawled in this report.'); - w(); - w(); - - let parsingErrors = results.filter(spec => spec.report.error); - if (parsingErrors.length > 0) { - w('## Specifications that could not be rendered'); - w(); - w('Reffy could not fetch or render these specifications for some reason.' + - ' This may happen when a network error occurred or when a specification' + - ' uses an old version of ReSpec.'); - w(); - count = 0; - parsingErrors.forEach(spec => { - count += 1; - w('- [' + spec.title + '](' + spec.crawled + '): `' + spec.report.error + '`'); - }); - w(); - w('=> ' + count + ' specification' + ((count > 1) ? 's' : '') + ' found'); - w(); - w(); - - // Remove specs that could not be parsed from the rest of the report - results = results.filter(spec => !spec.report.error); - } - - - count = 0; - w('## Specifications without normative dependencies'); - w(); - results - .filter(spec => spec.report.noNormativeRefs) - .forEach(spec => { - count += 1; - w('- [' + spec.title + '](' + spec.crawled + ')'); - }); - w(); - w('=> ' + count + ' specification' + ((count > 1) ? 's' : '') + ' found'); - if (count > 0) { - w(); - w('Basically all specifications have normative dependencies on some other' + - ' specification. Reffy could not find any normative dependencies for the' + - ' specifications mentioned above, which seems strange.'); - } - w(); - w(); - - count = 0; - w('## List of specifications with invalid WebIDL content'); - w(); - results - .filter(spec => spec.report.hasInvalidIdl) - .forEach(spec => { - count += 1; - w('- [' + spec.title + '](' + spec.crawled + ')'); - }); - w(); - w('=> ' + count + ' specification' + ((count > 1) ? 's' : '') + ' found'); - if (count > 0) { - w(); - w('WebIDL continues to evolve. Strudy may incorrectly report as invalid' + - ' perfectly valid WebIDL content if the specification uses bleeding-edge' + - ' WebIDL features'); - } - w(); - w(); - - count = 0; - w('## List of specifications with obsolete WebIDL constructs'); - w(); - results - .filter(spec => spec.report.hasObsoleteIdl) - .forEach(spec => { - count += 1; - w('- [' + spec.title + '](' + spec.crawled + ')'); - }); - w(); - w('=> ' + count + ' specification' + ((count > 1) ? 's' : '') + ' found'); - if (count > 0) { - w(); - w('A typical example is the use of `[]` instead of `FrozenArray`.'); - } - w(); - w(); - - count = 0; - w('## Specifications that use WebIDL but do not reference the WebIDL spec'); - w(); - results.forEach(spec => { - if (spec.report.noRefToWebIDL) { - count += 1; - w('- [' + spec.title + '](' + spec.crawled + ')'); - } - }); - w(); - w('=> ' + count + ' specification' + ((count > 1) ? 's' : '') + ' found'); - if (count > 0) { - w(); - ('All specifications that define WebIDL content should have a ' + - ' **normative** reference to the WebIDL specification. ' + - ' Some specifications listed here may reference the WebIDL' + - ' specification informatively, but that is not enough!'); - } - w(); - w(); - - - count = 0; - w('## List of [Exposed] names not defined in the specifications crawled'); - w(); - var idlNames = {}; - results.forEach(spec => { - if (!spec.report.unknownExposedNames || - (spec.report.unknownExposedNames.length === 0)) { - return; - } - spec.report.unknownExposedNames.forEach(name => { - if (!idlNames[name]) { - idlNames[name] = []; - } - idlNames[name].push(spec); - }); - }); - Object.keys(idlNames).sort().forEach(name => { - count += 1; - w('- `' + name + '` used in ' + - idlNames[name].map(ref => ('[' + ref.title + '](' + ref.crawled + ')')).join(', ')); - }); - w(); - w('=> ' + count + ' [Exposed] name' + ((count > 1) ? 's' : '') + ' found'); - if (count > 0) { - w(); - w('Please keep in mind that Strudy only knows about IDL terms defined in the' + - ' specifications that were crawled **and** that do not have invalid IDL content.'); - } - w(); - w(); - - - count = 0; - w('## List of WebIDL names not defined in the specifications crawled'); - w(); - idlNames = {}; - results.forEach(spec => { - if (!spec.report.unknownIdlNames || - (spec.report.unknownIdlNames.length === 0)) { - return; - } - spec.report.unknownIdlNames.forEach(name => { - if (!idlNames[name]) { - idlNames[name] = []; - } - idlNames[name].push(spec); - }); - }); - Object.keys(idlNames).sort().forEach(name => { - count += 1; - w('- `' + name + '` used in ' + - idlNames[name].map(ref => ('[' + ref.title + '](' + ref.crawled + ')')).join(', ')); - }); - w(); - w('=> ' + count + ' WebIDL name' + ((count > 1) ? 's' : '') + ' found'); - if (count > 0) { - w(); - w('Some of them may be type errors in specs (e.g. "int" does not exist, "Array" cannot be used on its own, etc.)'); - w('Also, please keep in mind that Strudy only knows about IDL terms defined in the' + - ' specifications that were crawled **and** that do not have invalid IDL content.'); - } - w(); - w(); - - count = 0; - w('## List of WebIDL names defined in more than one spec'); - w(); - idlNames = {}; - results.forEach(spec => { - if (!spec.report.redefinedIdlNames || - (spec.report.redefinedIdlNames.length === 0)) { - return; - } - spec.report.redefinedIdlNames.forEach(i => { - if (!idlNames[i.name]) { - idlNames[i.name] = []; - } - idlNames[i.name].push(spec); - }); - }); - Object.keys(idlNames).sort().forEach(name => { - count += 1; - w('- `' + name + '` defined in ' + - idlNames[name].map(ref => ('[' + ref.title + '](' + ref.crawled + ')')).join(' and ')); - }); - w(); - w('=> ' + count + ' WebIDL name' + ((count > 1) ? 's' : '') + ' found'); - if (count > 0) { - w(); - w('"There can be only one"...'); - } - w(); - w(); - - count = 0; - var countrefs = 0; - w('## Missing references for WebIDL names'); - w(); - results.forEach(spec => { - if (spec.report.missingWebIdlRef && - (spec.report.missingWebIdlRef.length > 0)) { - count += 1; - if (spec.report.missingWebIdlRef.length === 1) { - countrefs += 1; - let i = spec.report.missingWebIdlRef[0]; - w('- [' + spec.title + '](' + spec.crawled + ')' + - ' uses `' + i.name + '` but does not reference ' + - i.refs.map(ref => ('[' + ref.title + '](' + ref.crawled + ')')).join(' or ')); - } - else { - w('- [' + spec.title + '](' + spec.crawled + ') uses:'); - spec.report.missingWebIdlRef.map(i => { - countrefs += 1; - w(' * `' + i.name + '` but does not reference ' + - i.refs.map(ref => ('[' + ref.title + '](' + ref.crawled + ')')).join(' or ')); - }); - } - } - }); - w(); - w('=> ' + countrefs + ' missing reference' + ((countrefs > 1) ? 's' : '') + - ' for IDL definitions found in ' + count + ' specification' + - ((count > 1) ? 's' : '')); - w(); - w(); - - [ - {prop: 'css', warning: false, title: 'No definition for CSS constructs'}, - {prop: 'idl', warning: false, title: 'No definition for IDL constructs'}, - {prop: 'css', warning: true, title: 'Possibly no definition for CSS constructs'}, - {prop: 'idl', warning: true, title: 'Possibly no definition for IDL constructs'} - ].forEach(type => { - count = 0; - countrefs = 0; - w('## ' + type.title); - w(); - - results.forEach(spec => { - if (spec.report.missingDfns && - spec.report.missingDfns[type.prop] && - (spec.report.missingDfns[type.prop].filter(r => !!r.warning === type.warning).length > 0)) { - count += 1; - - w('- [' + spec.title + '](' + spec.crawled + '):'); - spec.report.missingDfns[type.prop].filter(r => !!r.warning === type.warning).map(missing => { - countrefs += 1; - const exp = missing.expected; - const found = missing.found; - const foundFor = (found && found.for && found.for.length > 0) ? - ' for ' + found.for.map(f => '`' + f + '`').join(',') : - ''; - w(' * `' + exp.linkingText[0] + '`' + - (exp.type ? ' with type `' + exp.type + '`' : '') + - (missing.for ? ' for [`' + missing.for.linkingText[0] + '`](' + missing.for.href + ')' : '') + - (found ? ', but found [`' + found.linkingText[0] + '`](' + found.href + ') with type `' + found.type + '`' + foundFor : '')); - }); - } - }); - - w(); - w('=> ' + countrefs + ' construct' + ((countrefs > 1) ? 's' : '') + - ' without definition found in ' + count + ' specification' + - ((count > 1) ? 's' : '')); - w(); - w(); - }); - - - count = 0; - countrefs = 0; - w('## Missing references based on document links'); - w(); - results.forEach(spec => { - if (spec.report.missingLinkRef && - (spec.report.missingLinkRef.length > 0)) { - count += 1; - if (spec.report.missingLinkRef.length === 1) { - countrefs += 1; - let l = spec.report.missingLinkRef[0]; - w('- [' + spec.title + '](' + spec.crawled + ')' + - ' links to [`' + l + '`](' + l + ') but does not list it' + - ' in its references'); - } - else { - w('- [' + spec.title + '](' + spec.crawled + ') links to:'); - spec.report.missingLinkRef.forEach(l => { - countrefs++; - w(' * [`' + l + '`](' + l + ') but does not list it ' + - 'in its references'); - }); - } - } - }); - w(); - w('=> ' + countrefs + ' missing reference' + ((countrefs > 1) ? 's' : '') + - ' for links found in ' + count + ' specification' + - ((count > 1) ? 's' : '')); - if (count > 0) { - w(); - w('Any link to an external document from within a specification should' + - ' trigger the creation of a corresponding entry in the references' + - ' section.'); - w(); - w('Note Strudy only reports on links to "well-known" specs and ignores' + - ' links to non-usual specs (e.g. PDF documents, etc.) for now.'); - } - w(); - w(); - - count = 0; - countrefs = 0; - w('## Reference URL is inconsistent with URL used in document links'); - w(); - results.forEach(spec => { - if (spec.report.inconsistentRef && - (spec.report.inconsistentRef.length > 0)) { - count += 1; - if (spec.report.inconsistentRef.length === 1) { - countrefs += 1; - let l = spec.report.inconsistentRef[0]; - w('- [' + spec.title + '](' + spec.crawled + ')' + - ' links to [`' + l.link + '`](' + l.link + ') but related reference "' + l.ref.name + '" uses URL [`' + l.ref.url + '`](' + l.ref.url + ')'); - } - else { - w('- [' + spec.title + '](' + spec.crawled + ') links to:'); - spec.report.inconsistentRef.forEach(l => { - countrefs++; - w(' * [`' + l.link + '`](' + l.link + ') but related reference "' + l.ref.name + '" uses URL [`' + l.ref.url + '`](' + l.ref.url + ')'); - }); - } - } - }); - w(); - w('=> ' + countrefs + ' inconsistent reference' + ((countrefs > 1) ? 's' : '') + - ' for links found in ' + count + ' specification' + - ((count > 1) ? 's' : '')); - if (count > 0) { - w(); - w('Links in the body of a specification should be to the same document' + - ' as that pointed to by the related reference in the References section.' + - ' The specifications reported here use a different URL. For instance,' + - ' they may use a link to the Editor\'s Draft but target the latest' + - ' published version in the References section.' + - ' There should be some consistency across the specification.'); - } - w(); - w(); - - [ - { prop: 'notExported', title: 'External links to private terms' }, - { prop: 'notDfn', title: 'External links that neither target definitions nor headings' }, - { prop: 'brokenLinks', title: 'Broken external links' }, - { prop: 'evolvingLinks', title: 'External links to terms that no longer exist in the latest version of the targeted specification' }, - { prop: 'outdatedSpecs', title: 'External links to outdated specs' }, - { prop: 'datedUrls', title: 'External links that use a dated URL' } - ].forEach(type => { - count = 0; - countrefs = 0; - w('## ' + type.title); - w(); - - results.forEach(spec => { - if (spec.report.xrefs && - spec.report.xrefs[type.prop] && - (spec.report.xrefs[type.prop].length > 0)) { - count += 1; - - w('- [' + spec.title + '](' + spec.crawled + '):'); - spec.report.xrefs[type.prop].map(l => { - countrefs += 1; - w(' * [`' + l + '`](' + l + ')'); - }); - } - }); - - w(); - w('=> ' + countrefs + ' problematic external link' + ((countrefs > 1) ? 's' : '') + - ' found in ' + count + ' specification' + - ((count > 1) ? 's' : '')); - w(); - w(); - }); - - - return wres; -} - - -/** - * Outputs a human-readable Markdown dependencies report from a crawl report, - * one entry per spec. - * - * The function spits the report to the console. - * - * @function - */ -function generateDependenciesReport(study) { - let wres = ''; - const w = msg => wres += (msg || '') + '\n'; - - let count = 0; - const results = study.results; - - w('# Web specs dependencies report'); - w(); - w('Strudy is an analysis tool for Web spec crawl reports created by Reffy.' + - ' It studies extracts created during the crawl.'); - w(); - w('The report below lists incoming links for each specification, in other words the list' + - ' of specifications that normatively or informatively reference a given specification.'); - w(); - w('By definition, Strudy only knows about incoming links from specifications that have been' + - ' crawled and that could successfully be parsed. Other specifications that Strudy does' + - ' not know anything about may reference specifications listed here.'); - w(); - results.forEach(spec => { - w('## ' + spec.title); - w(); - writeCrawlInfo(spec, false, w); - w(); - writeDependenciesInfo(spec, results, false, w); - w(); - w(); - }); - - return wres; -} - - -/** - * Outputs a human-readable diff between two crawl reports, one entry per spec. - * - * The function spits the report to the console. - * - * @function - */ -function generateDiffReport(study, refStudy, options) { - options = options || {}; - let wres = ''; - const w = msg => wres += (msg || '') + '\n'; - - const results = study.results; - const resultsRef = refStudy.results; - - // Compute diff for all specs - // (note we're only interested in specs that are part in the new crawl, - // and won't report on specs that were there before and got dropped) - let resultsDiff = results.map(spec => { - let ref = resultsRef.find(s => s.url === spec.url) || { - missing: true, - report: { - unknownExposedNames: [], - unknownIdlNames: [], - redefinedIdlNames: [], - missingWebIdlRef: [], - missingLinkRef: [], - inconsistentRef: [] - } - }; - - const report = spec.report; - const reportRef = ref.report; - - const getSimpleDiff = prop => (report[prop] !== reportRef[prop]) ? - { - ins: (typeof report[prop] !== 'undefined') ? report[prop] : null, - del: (typeof reportRef[prop] !== 'undefined') ? reportRef[prop] : null - } : - null; - const getArrayDiff = (prop, key) => - (!arrayEquals(report[prop], reportRef[prop], key) && - (!options.onlyNew || report[prop].find(item => !reportRef[prop].find(i => (key ? i[key] === item[key] : i === item))))) ? - { - ins: report[prop].filter(item => !reportRef[prop].find(i => (key ? i[key] === item[key] : i === item))), - del: reportRef[prop].filter(item => !report[prop].find(i => (key ? i[key] === item[key] : i === item))) - } : - null; - - // Compute diff between new and ref report for that spec - const diff = { - title: (spec.title !== ref.title) ? { - ins: (typeof spec.title !== 'undefined') ? spec.title : null, - del: (typeof ref.title !== 'undefined') ? ref.title : null - } : null, - ok: getSimpleDiff('ok'), - error: getSimpleDiff('error'), - noNormativeRefs: getSimpleDiff('noNormativeRefs'), - noRefToWebIDL: getSimpleDiff('noRefToWebIDL'), - hasInvalidIdl: getSimpleDiff('hasInvalidIdl'), - hasObsoleteIdl: getSimpleDiff('hasObsoleteIdl'), - unknownExposedNames: getArrayDiff('unknownExposedNames'), - unknownIdlNames: getArrayDiff('unknownIdlNames'), - redefinedIdlNames: getArrayDiff('redefinedIdlNames', 'name'), - missingWebIdlRef: getArrayDiff('missingWebIdlRef', 'name'), - missingLinkRef: getArrayDiff('missingLinkRef'), - inconsistentRef: getArrayDiff('inconsistentRef', 'link') - }; - - return { - title: spec.title, - shortname: spec.shortname, - date: spec.date, - url: spec.url, - release: spec.release, - nightly: spec.nightly, - repository: spec.repository, - isNewSpec: ref.missing, - hasDiff: Object.keys(diff).some(key => diff[key] !== null), - diff - }; - }); - - if (!options.onlyNew) { - resultsDiff = resultsDiff.concat(resultsRef - .map(spec => { - let ref = results.find(s => s.url === spec.url); - if (ref) return null; - return { - title: spec.title, - shortname: spec.shortname, - date: spec.date, - url: spec.url, - release: spec.release, - nightly: spec.nightly, - crawled: spec.crawled, - repository: spec.repository, - isUnknownSpec: true, - hasDiff: true - }; - }) - .filter(spec => !!spec)); - resultsDiff.sort(byTitle); - } - - w('% Diff between report from "' + - (new Date(study.date)).toLocaleDateString('en-US', dateOptions) + - '" and reference report from "' + - (new Date(refStudy.date)).toLocaleDateString('en-US', dateOptions) + - '"'); - w('% Strudy'); - w('% ' + (new Date(study.date)).toLocaleDateString('en-US', dateOptions)); - w(); - - resultsDiff.forEach(spec => { - // Nothing to report if crawl result is the same - if (!spec.hasDiff) { - return; - } - - w('## ' + spec.title); - w(); - - let crawledUrl = spec.crawled || spec.latest; - w('- Initial URL: [' + spec.url + '](' + spec.url + ')'); - w('- Crawled URL: [' + crawledUrl + '](' + crawledUrl + ')'); - if (spec.nightly && (spec.nightly.url !== crawledUrl)) { - w('- Editor\'s Draft: [' + spec.nightly.url + '](' + spec.nightly.url + ')'); - } - if (spec.repository) { - let githubcom = spec.repository.match(/^https:\/\/github.com\/([^\/]*)\/([^\/]*)/); - let repositoryName = spec.repository; - if (githubcom) { - repositoryName = 'GitHub ' + githubcom[1] + '/' + githubcom[2]; - } - w('- Repository: [' + repositoryName + '](' + spec.repository + ')'); - } - - if (spec.isNewSpec) { - w('- This specification was not in the reference crawl report.'); - w(); - w(); - return; - } - - if (spec.isUnknownSpec) { - w('- This specification is not in the new crawl report.'); - w(); - w(); - return; - } - - const diff = spec.diff; - const simpleDiff = prop => - ((diff[prop].ins !== null) ? '*INS* ' + diff[prop].ins : '') + - (((diff[prop].ins !== null) && (diff[prop].del !== null)) ? ' / ' : '') + - ((diff[prop].del !== null) ? '*DEL* ' + diff[prop].del : ''); - const arrayDiff = (prop, key) => - ((diff[prop].ins.length > 0) ? '*INS* ' + diff[prop].ins.map(i => (key ? i[key] : i)).join(', ') : '') + - (((diff[prop].ins.length > 0) && (diff[prop].del.length > 0)) ? ' / ' : '') + - ((diff[prop].del.length > 0) ? '*DEL* ' + diff[prop].del.map(i => (key ? i[key] : i)).join(', ') : ''); - - [ - { title: 'Spec title', prop: 'title', diff: 'simple' }, - { title: 'Spec is OK', prop: 'ok', diff: 'simple' }, - { title: 'Spec could not be rendered', prop: 'error', diff: 'simple' }, - { title: 'No normative references found', prop: 'noNormativeRefs', diff: 'simple' }, - { title: 'Invalid WebIDL content found', prop: 'hasInvalidIdl', diff: 'simple' }, - { title: 'Obsolete WebIDL constructs found', prop: 'hasObsoleteIdl', diff: 'simple' }, - { title: 'Spec does not reference WebIDL normatively', prop: 'noRefToWebIDL', diff: 'simple' }, - { title: 'Unknown [Exposed] names used', prop: 'unknownExposedNames', diff: 'array' }, - { title: 'Unknown WebIDL names used', prop: 'unknownIdlNames', diff: 'array' }, - { title: 'WebIDL names also defined elsewhere', prop: 'redefinedIdlNames', diff: 'array', key: 'name' }, - { title: 'Missing references for WebIDL names', prop: 'missingWebIdlRef', diff: 'array', key: 'name' }, - { title: 'Missing references for links', prop: 'missingLinkRef', diff: 'array' }, - { title: 'Inconsistent references for links', prop: 'inconsistentRef', diff: 'array', key: 'link' } - ].forEach(item => { - // Only report actual changes, and don't report other changes when - // the spec could not be rendered in one of the crawl reports - if (diff[item.prop] && ((item.prop === 'error') || (item.prop === 'title') || (item.prop === 'latest') || !diff.error)) { - w('- ' + item.title + ': ' + ((item.diff === 'simple') ? - simpleDiff(item.prop) : - arrayDiff(item.prop, item.key))); - } - }); - w(); - w(); - }); - - return wres; -} - - -/** - * Main function that generates a Markdown report from a study file. - * - * @function - * @param {String} studyFile Path to the study file to parse, or study report - * @param {Object} options Type of report to generate and other options - * @return {String} The generated report - */ -async function generateReport(studyFile, options) { - options = options || {}; - if (!studyFile) { - throw new Error('Required filename parameter missing'); - } - if (options.diffReport && !options.refStudyFile) { - throw new Error('Required filename to reference crawl for diff missing'); - } - - const study = typeof studyFile === 'string' ? - (await loadJSON(studyFile)) : - studyFile; - if (!study) { - throw new Error('Impossible to read ' + studyFile); - } - - let refStudy = {}; - if (options.diffReport) { - if (options.refStudyFile.startsWith('http')) { - try { - let response = await fetch(options.refStudyFile, { nolog: true }); - refStudy = await response.json(); - } - catch (e) { - throw new Error('Impossible to fetch ' + options.refStudyFile + ': ' + e); - } - return generateDiffReport(study, refStudy, { onlyNew: options.onlyNew }); - } - else { - refStudy = await loadJSON(options.refStudyFile); - if (!refStudy) { - throw new Error('Impossible to read ' + options.refStudyFile); - } - return generateDiffReport(study, refStudy, { onlyNew: options.onlyNew }); - } - } - else if (options.depReport) { - return generateDependenciesReport(study); - } - else if (options.perSpec) { - return generateReportPerSpec(study); - } - else { - return generateReportPerIssue(study); - } - return report; -} - - -/************************************************** -Export methods for use as module -**************************************************/ -export default generateReport; diff --git a/src/lib/study-crawl.js b/src/lib/study-crawl.js deleted file mode 100644 index be210dc4..00000000 --- a/src/lib/study-crawl.js +++ /dev/null @@ -1,412 +0,0 @@ -/** - * The crawl analyzer takes a crawl report as input and creates a report that - * contains, for each spec, a list of potential anomalies, such as: - * - * 1. specs that do not seem to reference any other spec normatively; - * 2. specs that define WebIDL terms but do not normatively reference the WebIDL - * spec; - * 3. specs that contain invalid WebIDL terms definitions; - * 4. specs that use obsolete WebIDL constructs (e.g. `[]` instead of - * `FrozenArray`); - * 5. specs that define WebIDL terms that are *also* defined in another spec; - * 6. specs that use WebIDL terms defined in another spec without referencing - * that spec normatively; - * 7. specs that use WebIDL terms for which the crawler could not find any - * definition in any of the specs it studied; - * 8. specs that link to another spec but do not include a reference to that - * other spec; - * 9. specs that link to another spec inconsistently in the body of the document - * and in the list of references (e.g. because the body of the document - * references the Editor's draft while the reference is to the latest published - * version). - * 10. W3C specs that do not have a known Editor's Draft - * - * @module analyzer - */ - -import fs from 'node:fs'; -import path from 'node:path'; -import { expandCrawlResult, isLatestLevelThatPasses } from 'reffy'; -import studyBackrefs from './study-backrefs.js'; -import { checkSpecDefinitions } from '../cli/check-missing-dfns.js'; -import { canonicalizeUrl, canonicalizesTo } from "./canonicalize-url.js"; -import loadJSON from './load-json.js'; - -const array_concat = (a,b) => a.concat(b); -const uniqueFilter = (item, idx, arr) => arr.indexOf(item) === idx; - -/** - * Helper function that returns true when the given URL seems to target a real - * "spec" (as opposed to, say, a Wiki page, or something else) - */ -const matchSpecUrl = url => - url.match(/spec.whatwg.org/) || - url.match(/www.w3.org\/TR\/[a-z0-9]/) || - (url.match(/w3c.github.io/) && ! url.match(/w3c.github.io\/test-results\//)); - - -/** - * Compares specs for ordering by title - */ -const byTitle = (a, b) => - (a.title || '').toUpperCase().localeCompare((b.title || '').toUpperCase()); - - -/** - * Returns true when the given error array is not set or does not contain any - * error. - */ -function isOK(errors) { - return !errors || (errors.length === 0); -} - - -/** - * Filter out spec info parameters that are not needed when the spec is to - * appear as a reference in the final report, to keep the JSON report somewhat - * readable. - * - * @function - * @param {Object} spec The spec info to filter, typically the spec object - * contained in the results of a crawl. - * @return {Object} A new spec object that only contains the URL, title, the - * URL that was crawled. - */ -function filterSpecInfo(spec) { - return { - url: spec.url, - title: spec.title, - crawled: spec.crawled - }; -} - - -/** - * Analyze the result of a crawl and produce a report that can easily be - * converted without more processing to a human readable version. - * - * @function - * @param {Array(Object)} A crawl result, one entry per spec - * @param {Array(Object)} An optional list of specs to include in the report. - * All specs are included by default. - * @return {Array(Object)} A report, one entry per spec, each spec will have - * a "report" property with "interesting" properties, see code comments inline - * for details - */ -async function studyCrawlResults(results, options = {}) { - const knownIdlNames = results - .map(r => r.idlparsed?.idlNames ? Object.keys(r.idlparsed.idlNames) : [], []) - .reduce(array_concat) - .filter(uniqueFilter); - const knownGlobalNames = results - .map(r => r.idlparsed?.globals ? Object.keys(r.idlparsed.globals) : [], []) - .reduce(array_concat) - .filter(uniqueFilter); - const idlNamesIndex = {}; - knownIdlNames.forEach(name => - idlNamesIndex[name] = results.filter(spec => - isLatestLevelThatPasses(spec, results, s => - s.idlparsed?.idlNames?.[name]))); - - // WebIDL-1 only kept for historical reasons to process old crawl results - const WebIDLSpec = results.find(spec => - spec.shortname === 'webidl' || spec.shortname === 'WebIDL-1') || {}; - - const sortedResults = results.sort(byTitle); - - // Construct spec equivalence from the crawl report, which should be more - // complete than the initial equivalence list. - const specEquivalents = {}; - sortedResults.forEach(spec => - spec.versions.forEach(v => { - if (specEquivalents[v]) { - if (Array.isArray(specEquivalents[v])) { - specEquivalents[v].push(spec.url); - } - else { - specEquivalents[v] = [specEquivalents[v], spec.url]; - } - } - else { - specEquivalents[v] = spec.url; - } - } - )); - - // Strong canonicalization options to find references - var useEquivalents = { - datedToLatest: true, - equivalents: specEquivalents - }; - - const xrefsReport = studyBackrefs(sortedResults, options.trResults); - - const specsToInclude = options.include; - return Promise.all(sortedResults - .filter(spec => !specsToInclude || - (specsToInclude.length === 0) || - specsToInclude.some(toInclude => - toInclude === spec.shortname || - toInclude === spec.series?.shortname || - toInclude === spec.url || - toInclude === spec.crawled || - toInclude === spec.nightly?.url || - toInclude.shortname === spec.shortname || - toInclude.shortname === spec.series?.shortname || - (toInclude.url && toInclude.url === spec.url) || - (toInclude.url && toInclude.url === spec.crawled) || - (toInclude.url && toInclude.url === spec.nightly?.url) || - (toInclude.html && toInclude.html === spec.html))) - .map(async spec => { - spec.idlparsed = spec.idlparsed || {}; - spec.css = spec.css || {}; - spec.refs = spec.refs || {}; - spec.links = spec.links || {}; - const idlDfns = spec.idlparsed.idlNames ? - Object.keys(spec.idlparsed.idlNames) : []; - const idlExtendedDfns = spec.idlparsed.idlExtendedNames ? - Object.keys(spec.idlparsed.idlExtendedNames) : []; - const idlDeps = spec.idlparsed.externalDependencies ? - spec.idlparsed.externalDependencies : []; - const exposed = spec.idlparsed.exposed ? Object.keys(spec.idlparsed.exposed) : []; - - const xrefs = xrefsReport[spec.url]; - if (xrefs) { - // The backrefs analysis tool includes the spec's title in its - // report, which we already have at the top level. - delete xrefs.title; - - // The backrefs analysis tool also includes a list of documents - // that look like specs but that are not crawled. That is not - // an anomaly with the spec but rather a list of potential specs - // to be included in browser-specs. They should be treated - // separately. - delete xrefs.unknownSpecs; - } - - const report = { - // An error at this level means the spec could not be parsed at all - error: spec.error, - - // Whether the crawler found normative references - // (most specs should have) - noNormativeRefs: !spec.refs.normative || - (spec.refs.normative.length === 0), - - // Whether the spec normatively references the WebIDL spec - // (all specs that define IDL content should) - noRefToWebIDL: (spec !== WebIDLSpec) && - (spec.idlparsed.bareMessage || (idlDfns.length > 0) || (idlExtendedDfns.length > 0)) && - (!spec.refs.normative || !spec.refs.normative.find(ref => - ref.name.match(/^WebIDL/i) || - (ref.url === WebIDLSpec.url) || - (WebIDLSpec.nightly && (ref.url === WebIDLSpec.nightly.url)))), - - // Whether the spec has invalid IDL content - // (the crawler cannot do much when IDL content is invalid, it - // cannot tell what IDL definitions and references the spec - // contains in particular) - hasInvalidIdl: !!(!spec.idlparsed.idlNames && spec.idlparsed.bareMessage), - - // Whether the spec uses IDL constructs that were valid in - // WebIDL Level 1 but no longer are, typically "[]" instead of - // "FrozenArray" - hasObsoleteIdl: spec.idlparsed.hasObsoleteIdl, - - // List of Exposed names used in the spec that we know nothing - // about because we cannot find a matching "Global" name in - // any other spec - unknownExposedNames: exposed - .filter(name => !knownGlobalNames.includes(name) && name !== "*") - .sort(), - - // List of IDL names used in the spec that we know nothing about - // (for instance because of some typo or because the term is - // defined in a spec that has not been crawled or that could - // not be parsed) - unknownIdlNames: idlDeps - .filter(name => knownIdlNames.indexOf(name) === -1) - .sort(), - - // List of IDL definitions that are already defined in some - // other crawled spec - // (this should not happen, ideally) - redefinedIdlNames: idlDfns - .filter(name => (idlNamesIndex[name].length > 1)) - .map(name => { - return { - name, - refs: idlNamesIndex[name].filter(ref => (ref.url !== spec.url)).map(filterSpecInfo) - }; - }), - - // List of IDL names used in the spec that are defined in some - // other spec, and which do not seem to appear in the list of - // normative references - // (There should always be an entry in the normative list of - // references that links to that other spec) - // NB: "Exposed=Window", which would in theory trigger the need - // to add a normative reference to HTML, is considered to be - // an exception to the rule, and ignored. - missingWebIdlRef: idlDeps - .filter(name => knownIdlNames.indexOf(name) !== -1) - .map(name => { - const refs = idlNamesIndex[name].map(filterSpecInfo); - let ref = null; - if (spec.refs && spec.refs.normative) { - ref = refs.find(s => { - const canon = canonicalizeUrl(s.url, useEquivalents); - return !!spec.refs.normative.find(r => - canonicalizesTo(r.url, canon, useEquivalents)); - }); - } - return (ref ? null : { name, refs }); - }) - .filter(i => !!i), - - // CSS/IDL terms that do not have a corresponding dfn in the - // specification - missingDfns: await checkSpecDefinitions(spec), - - // Links to external specifications within the body of the spec - // that do not have a corresponding entry in the references - // (all links to external specs should have a companion ref) - missingLinkRef: Object.keys(spec.links.rawlinks || {}) - .filter(matchSpecUrl) - .filter(l => { - // Filter out "good" and "inconsistent" references - const canon = canonicalizeUrl(l, useEquivalents); - const refs = (spec.refs.normative || []).concat(spec.refs.informative || []); - return !refs.find(r => canonicalizesTo(r.url, canon, useEquivalents)); - }) - .filter(l => - // Ignore links to other versions of "self". There may - // be cases where it would be worth reporting them but - // most of the time they appear in "changelog" sections. - !canonicalizesTo(l, spec.url, useEquivalents) && - !canonicalizesTo(l, spec.versions, useEquivalents) - ), - - // Links to external specifications within the body of the spec - // that have a corresponding entry in the references, but for - // which the reference uses a different URL, e.g. because the - // link targets the Editor's Draft, whereas the reference - // targets the latest published version - inconsistentRef: Object.keys(spec.links.rawlinks || {}) - .filter(matchSpecUrl) - .map(l => { - const canonSimple = canonicalizeUrl(l); - const canon = canonicalizeUrl(l, useEquivalents); - const refs = (spec.refs.normative || []).concat(spec.refs.informative || []); - - // Filter out "good" references - if (refs.find(r => canonicalizesTo(r.url, canonSimple))) { - return null; - } - const ref = refs.find(r => canonicalizesTo(r.url, canon, useEquivalents)); - return (ref ? { link: l, ref } : null); - }) - .filter(l => !!l), - - // Lists of specs present in the crawl report that reference - // the current spec, either normatively or informatively - // (used to produce the dependencies report) - referencedBy: { - normative: sortedResults - .filter(s => - s.refs && s.refs.normative && - s.refs.normative.find(r => - canonicalizesTo(r.url, spec.url, useEquivalents) || - canonicalizesTo(r.url, spec.versions, useEquivalents))) - .map(filterSpecInfo), - informative: sortedResults - .filter(s => - s.refs && s.refs.informative && - s.refs.informative.find(r => - canonicalizesTo(r.url, spec.url, useEquivalents) || - canonicalizesTo(r.url, spec.versions, useEquivalents))) - .map(filterSpecInfo) - }, - - // Analysis of cross-references to other specs - xrefs: xrefsReport[spec.url] - }; - - // A spec is OK if it does not contain anything "suspicious". - report.ok = !report.error && - !report.noNormativeRefs && - !report.hasInvalidIdl && - !report.hasObsoleteIdl && - !report.noRefToWebIDL && - !report.missingDfns.obsoleteDfnsModel && - isOK(report.unknownIdlNames) && - isOK(report.redefinedIdlNames) && - isOK(report.missingWebIdlRef) && - isOK(report.missingDfns.css.filter(r => !r.warning)) && - isOK(report.missingDfns.idl.filter(r => !r.warning)) && - isOK(report.missingLinkRef) && - isOK(report.inconsistentRef) && - (!report.xrefs || ( - isOK(report.xrefs.notExported) && - isOK(report.xrefs.notDfn) && - isOK(report.xrefs.brokenLinks) && - isOK(report.xrefs.evolvingLinks) && - isOK(report.xrefs.outdatedSpecs) && - isOK(report.xrefs.datedUrls))); - - const res = { - title: spec.title || spec.url, - shortname: spec.shortname, - date: spec.date, - url: spec.url, - release: spec.release, - nightly: spec.nightly, - crawled: spec.crawled, - organization: spec.organization, - groups: spec.groups, - report - }; - return res; - })); -} - -async function studyCrawl(crawlResults, options = {}) { - if (typeof crawlResults === 'string') { - const crawlResultsPath = crawlResults; - crawlResults = await loadJSON(crawlResults); - crawlResults = await expandCrawlResult(crawlResults, path.dirname(crawlResultsPath)); - } - else { - crawlResults = crawlResults || {}; - } - crawlResults.results = crawlResults.results || []; - crawlResults.stats = crawlResults.stats || {}; - - if (typeof options.trResults === 'string') { - const crawlResultsPath = options.trResults; - options.trResults = await loadJSON(options.trResults); - options.trResults = await expandCrawlResult(options.trResults, path.dirname(crawlResultsPath)); - options.trResults = options.trResults.results; - } - - const results = await studyCrawlResults(crawlResults.results, options); - - return { - type: 'study', - title: crawlResults.title || 'Web specs analysis', - description: crawlResults.description || '', - date: crawlResults.date || (new Date()).toJSON(), - stats: { - crawled: crawlResults.stats.crawled || crawlResults.results.length, - errors: crawlResults.stats.errors || crawlResults.results.filter(spec => !!spec.error).length, - studied: results.length || crawlResults.stats.crawled - }, - results: results - }; -} - - -/************************************************** -Export methods for use as module -**************************************************/ -export default studyCrawl; From 4ee50687a1f4eee1780561f40fd45bf7a0ca124a Mon Sep 17 00:00:00 2001 From: Francois Daoust Date: Thu, 22 Aug 2024 15:10:11 +0200 Subject: [PATCH 6/9] Adjust boilerplate text in existing issue files This refreshes the boilerplate text in existing issue files so that the CLI can more easily detect whether a change is warranted, and stops reporting that the files need to be updated in particular. --- issues/DOM-Parsing-brokenlinks.md | 2 +- issues/FileAPI-brokenlinks.md | 2 +- issues/background-fetch-brokenlinks.md | 2 +- issues/background-sync-brokenlinks.md | 2 +- issues/change-password-url-discontinuedreferences.md | 2 +- issues/clear-site-data-discontinuedreferences.md | 2 +- issues/clipboard-apis-brokenlinks.md | 2 +- issues/content-index-brokenlinks.md | 2 +- issues/csp-embedded-enforcement-brokenlinks.md | 2 +- issues/css-line-grid-1-brokenlinks.md | 2 +- issues/css-nav-1-brokenlinks.md | 2 +- issues/filter-effects-1-brokenlinks.md | 2 +- issues/get-installed-related-apps-brokenlinks.md | 2 +- issues/html-aam-1.0-brokenlinks.md | 2 +- issues/html-discontinuedreferences.md | 2 +- issues/intersection-observer-brokenlinks.md | 2 +- issues/json-ld11-discontinuedreferences.md | 2 +- issues/keyboard-lock-brokenlinks.md | 2 +- issues/keyboard-map-brokenlinks.md | 2 +- issues/layout-instability-brokenlinks.md | 2 +- issues/media-feeds-discontinuedreferences.md | 2 +- issues/nav-tracking-mitigations-discontinuedreferences.md | 2 +- issues/netinfo-discontinuedreferences.md | 2 +- issues/periodic-background-sync-brokenlinks.md | 2 +- issues/permissions-request-brokenlinks.md | 2 +- issues/portals-brokenlinks.md | 2 +- issues/raw-camera-access-brokenlinks.md | 2 +- issues/reporting-1-brokenlinks.md | 2 +- issues/savedata-discontinuedreferences.md | 2 +- issues/service-workers-brokenlinks.md | 2 +- issues/svg-aam-1.0-brokenlinks.md | 2 +- issues/upgrade-insecure-requests-brokenlinks.md | 2 +- issues/web-otp-brokenlinks.md | 2 +- issues/webpackage-discontinuedreferences.md | 2 +- issues/webrtc-identity-brokenlinks.md | 2 +- issues/webxr-depth-sensing-1-brokenlinks.md | 2 +- issues/webxr-hit-test-1-brokenlinks.md | 2 +- issues/webxr-lighting-estimation-1-brokenlinks.md | 2 +- issues/webxrlayers-1-brokenlinks.md | 2 +- strudy.js | 6 +++++- 40 files changed, 44 insertions(+), 40 deletions(-) diff --git a/issues/DOM-Parsing-brokenlinks.md b/issues/DOM-Parsing-brokenlinks.md index 7557c48e..b3bc5762 100644 --- a/issues/DOM-Parsing-brokenlinks.md +++ b/issues/DOM-Parsing-brokenlinks.md @@ -4,7 +4,7 @@ Tracked: 'https://github.com/w3c/DOM-Parsing/issues/74' Title: Broken references in DOM Parsing and Serialization --- -While crawling [DOM Parsing and Serialization](https://w3c.github.io/DOM-Parsing/), the following links to other specifications were detected as pointing to non-existing anchors, which should be fixed: +While crawling [DOM Parsing and Serialization](https://w3c.github.io/DOM-Parsing/), the following links to other specifications were detected as pointing to non-existing anchors: * [ ] https://www.w3.org/TR/dom/#case-sensitive * [ ] https://www.w3.org/TR/dom/#ascii-case-insensitive * [ ] https://www.w3.org/TR/dom/#domexception diff --git a/issues/FileAPI-brokenlinks.md b/issues/FileAPI-brokenlinks.md index 5ce77ed3..90390d58 100644 --- a/issues/FileAPI-brokenlinks.md +++ b/issues/FileAPI-brokenlinks.md @@ -4,7 +4,7 @@ Tracked: 'https://github.com/w3c/FileAPI/issues/185' Title: Broken references in File API --- -While crawling [File API](https://w3c.github.io/FileAPI/), the following links to other specifications were detected as pointing to non-existing anchors, which should be fixed: +While crawling [File API](https://w3c.github.io/FileAPI/), the following links to other specifications were detected as pointing to non-existing anchors: * [ ] https://mimesniff.spec.whatwg.org/#parsable-mime-type This issue was detected and reported semi-automatically by [Strudy](https://github.com/w3c/strudy/) based on data collected in [webref](https://github.com/w3c/webref/). diff --git a/issues/background-fetch-brokenlinks.md b/issues/background-fetch-brokenlinks.md index 84c33a46..c244682c 100644 --- a/issues/background-fetch-brokenlinks.md +++ b/issues/background-fetch-brokenlinks.md @@ -4,7 +4,7 @@ Tracked: 'https://github.com/WICG/background-fetch/issues/167' Title: Broken references in Background Fetch --- -While crawling [Background Fetch](https://wicg.github.io/background-fetch/), the following links to other specifications were detected as pointing to non-existing anchors, which should be fixed: +While crawling [Background Fetch](https://wicg.github.io/background-fetch/), the following links to other specifications were detected as pointing to non-existing anchors: * [ ] https://fetch.spec.whatwg.org/#concept-fetch-terminate * [ ] https://w3c.github.io/permissions/#permission-state * [ ] https://w3c.github.io/permissions/#dictdef-permissiondescriptor diff --git a/issues/background-sync-brokenlinks.md b/issues/background-sync-brokenlinks.md index 283a238a..df84bf1a 100644 --- a/issues/background-sync-brokenlinks.md +++ b/issues/background-sync-brokenlinks.md @@ -4,7 +4,7 @@ Tracked: 'https://github.com/WICG/background-sync/issues/186' Title: Broken references in Web Background Synchronization --- -While crawling [Web Background Synchronization](https://wicg.github.io/background-sync/spec/), the following links to other specifications were detected as pointing to non-existing anchors, which should be fixed: +While crawling [Web Background Synchronization](https://wicg.github.io/background-sync/spec/), the following links to other specifications were detected as pointing to non-existing anchors: * [ ] https://notifications.spec.whatwg.org/#permission-model * [ ] https://slightlyoff.github.io/ServiceWorker/spec/service_worker/index.html#service-worker-registration-interface * [ ] https://slightlyoff.github.io/ServiceWorker/spec/service_worker/index.html#service-worker-global-scope-interface diff --git a/issues/change-password-url-discontinuedreferences.md b/issues/change-password-url-discontinuedreferences.md index 39a51ce2..cf63ce19 100644 --- a/issues/change-password-url-discontinuedreferences.md +++ b/issues/change-password-url-discontinuedreferences.md @@ -6,7 +6,7 @@ Title: >- Passwords --- -While crawling [A Well-Known URL for Changing Passwords](https://w3c.github.io/webappsec-change-password-url/), the following normative referenced were detected as pointing to discontinued specifications: +While crawling [A Well-Known URL for Changing Passwords](https://w3c.github.io/webappsec-change-password-url/), the following normative references were detected as pointing to discontinued specifications: * [ ] [HTTP-SEMANTICS](https://httpwg.org/specs/rfc7231.html) has been obsoleted by [rfc9110](https://httpwg.org/specs/rfc9110.html) This issue was detected and reported semi-automatically by [Strudy](https://github.com/w3c/strudy/) based on data collected in [webref](https://github.com/w3c/webref/). diff --git a/issues/clear-site-data-discontinuedreferences.md b/issues/clear-site-data-discontinuedreferences.md index c9bceea1..1f9f1b2c 100644 --- a/issues/clear-site-data-discontinuedreferences.md +++ b/issues/clear-site-data-discontinuedreferences.md @@ -4,7 +4,7 @@ Tracked: 'https://github.com/w3c/webappsec-clear-site-data/issues/79' Title: Normative references to discontinued specs in Clear Site Data --- -While crawling [Clear Site Data](https://w3c.github.io/webappsec-clear-site-data/), the following normative referenced were detected as pointing to discontinued specifications: +While crawling [Clear Site Data](https://w3c.github.io/webappsec-clear-site-data/), the following normative references were detected as pointing to discontinued specifications: * [ ] [RFC7230](https://httpwg.org/specs/rfc7230.html) has been obsoleted by [rfc9110](https://httpwg.org/specs/rfc9110.html),[rfc9112](https://httpwg.org/specs/rfc9112.html) * [ ] [RFC7234](https://httpwg.org/specs/rfc7234.html) has been obsoleted by [rfc9111](https://httpwg.org/specs/rfc9111.html) diff --git a/issues/clipboard-apis-brokenlinks.md b/issues/clipboard-apis-brokenlinks.md index 450da512..bd879dd4 100644 --- a/issues/clipboard-apis-brokenlinks.md +++ b/issues/clipboard-apis-brokenlinks.md @@ -4,7 +4,7 @@ Tracked: 'https://github.com/w3c/clipboard-apis/issues/187' Title: Broken references in Clipboard API and events --- -While crawling [Clipboard API and events](https://w3c.github.io/clipboard-apis/), the following links to other specifications were detected as pointing to non-existing anchors, which should be fixed: +While crawling [Clipboard API and events](https://w3c.github.io/clipboard-apis/), the following links to other specifications were detected as pointing to non-existing anchors: * [ ] https://html.spec.whatwg.org/multipage/interaction.html#drag-data-item-kind * [ ] https://html.spec.whatwg.org/multipage/interaction.html#drag-data-item-type-string diff --git a/issues/content-index-brokenlinks.md b/issues/content-index-brokenlinks.md index 60d22d24..1e646ec7 100644 --- a/issues/content-index-brokenlinks.md +++ b/issues/content-index-brokenlinks.md @@ -4,7 +4,7 @@ Tracked: 'https://github.com/WICG/content-index/issues/33' Title: Broken references in Content Index --- -While crawling [Content Index](https://wicg.github.io/content-index/spec/), the following links to other specifications were detected as pointing to non-existing anchors, which should be fixed: +While crawling [Content Index](https://wicg.github.io/content-index/spec/), the following links to other specifications were detected as pointing to non-existing anchors: * [ ] https://dom.spec.whatwg.org/#context-object This issue was detected and reported semi-automatically by [Strudy](https://github.com/w3c/strudy/) based on data collected in [webref](https://github.com/w3c/webref/). diff --git a/issues/csp-embedded-enforcement-brokenlinks.md b/issues/csp-embedded-enforcement-brokenlinks.md index 5a7e8925..4c564240 100644 --- a/issues/csp-embedded-enforcement-brokenlinks.md +++ b/issues/csp-embedded-enforcement-brokenlinks.md @@ -4,7 +4,7 @@ Tracked: 'https://github.com/w3c/webappsec-cspee/issues/27' Title: 'Broken references in Content Security Policy: Embedded Enforcement' --- -While crawling [Content Security Policy: Embedded Enforcement](https://w3c.github.io/webappsec-cspee/), the following links to other specifications were detected as pointing to non-existing anchors, which should be fixed: +While crawling [Content Security Policy: Embedded Enforcement](https://w3c.github.io/webappsec-cspee/), the following links to other specifications were detected as pointing to non-existing anchors: * [ ] https://fetch.spec.whatwg.org/#concept-response-csp-list * [ ] https://w3c.github.io/webappsec-csp/#port-part-match diff --git a/issues/css-line-grid-1-brokenlinks.md b/issues/css-line-grid-1-brokenlinks.md index fbeb4e2d..c610c59f 100644 --- a/issues/css-line-grid-1-brokenlinks.md +++ b/issues/css-line-grid-1-brokenlinks.md @@ -4,7 +4,7 @@ Tracked: 'https://github.com/w3c/csswg-drafts/issues/8080' Title: '[css-line-grid] Broken references in CSS Line Grid Module Level 1' --- -While crawling [CSS Line Grid Module Level 1](https://drafts.csswg.org/css-line-grid/), the following links to other specifications were detected as pointing to non-existing anchors, which should be fixed: +While crawling [CSS Line Grid Module Level 1](https://drafts.csswg.org/css-line-grid/), the following links to other specifications were detected as pointing to non-existing anchors: * [ ] https://drafts.csswg.org/css-inline/#central This issue was detected and reported semi-automatically by [Strudy](https://github.com/w3c/strudy/) based on data collected in [webref](https://github.com/w3c/webref/). diff --git a/issues/css-nav-1-brokenlinks.md b/issues/css-nav-1-brokenlinks.md index e62e0937..5cd163e7 100644 --- a/issues/css-nav-1-brokenlinks.md +++ b/issues/css-nav-1-brokenlinks.md @@ -4,7 +4,7 @@ Tracked: 'https://github.com/w3c/csswg-drafts/issues/8081' Title: '[css-nav-1] Broken references in CSS Spatial Navigation Level 1' --- -While crawling [CSS Spatial Navigation Level 1](https://drafts.csswg.org/css-nav-1/), the following links to other specifications were detected as pointing to non-existing anchors, which should be fixed: +While crawling [CSS Spatial Navigation Level 1](https://drafts.csswg.org/css-nav-1/), the following links to other specifications were detected as pointing to non-existing anchors: * [ ] https://drafts.csswg.org/css2/box.html#x14 * [ ] https://html.spec.whatwg.org/multipage/infrastructure.html#nodes-are-removed * [ ] https://html.spec.whatwg.org/multipage/interaction.html#expressly-inert diff --git a/issues/filter-effects-1-brokenlinks.md b/issues/filter-effects-1-brokenlinks.md index 2a059ffb..414512f1 100644 --- a/issues/filter-effects-1-brokenlinks.md +++ b/issues/filter-effects-1-brokenlinks.md @@ -4,7 +4,7 @@ Tracked: 'https://github.com/w3c/fxtf-drafts/issues/482' Title: '[filter-effects-1] Broken references in Filter Effects Module Level 1' --- -While crawling [Filter Effects Module Level 1](https://drafts.fxtf.org/filter-effects-1/), the following links to other specifications were detected as pointing to non-existing anchors, which should be fixed: +While crawling [Filter Effects Module Level 1](https://drafts.fxtf.org/filter-effects-1/), the following links to other specifications were detected as pointing to non-existing anchors: * [ ] https://drafts.csswg.org/css-transitions/#animtype-length * [ ] https://drafts.csswg.org/css-transitions/#animtype-number * [ ] https://drafts.csswg.org/css-transitions/#animtype-shadow-list diff --git a/issues/get-installed-related-apps-brokenlinks.md b/issues/get-installed-related-apps-brokenlinks.md index 766fed98..402045af 100644 --- a/issues/get-installed-related-apps-brokenlinks.md +++ b/issues/get-installed-related-apps-brokenlinks.md @@ -4,7 +4,7 @@ Tracked: 'https://github.com/WICG/get-installed-related-apps/issues/35' Title: Broken references in Get Installed Related Apps API --- -While crawling [Get Installed Related Apps API](https://wicg.github.io/get-installed-related-apps/spec/), the following links to other specifications were detected as pointing to non-existing anchors, which should be fixed: +While crawling [Get Installed Related Apps API](https://wicg.github.io/get-installed-related-apps/spec/), the following links to other specifications were detected as pointing to non-existing anchors: * [ ] https://dom.spec.whatwg.org/#context-object * [ ] https://www.w3.org/TR/appmanifest/#dom-fingerprint * [ ] https://www.w3.org/TR/appmanifest/#dom-externalapplicationresource diff --git a/issues/html-aam-1.0-brokenlinks.md b/issues/html-aam-1.0-brokenlinks.md index 6dee9b94..212eaaf1 100644 --- a/issues/html-aam-1.0-brokenlinks.md +++ b/issues/html-aam-1.0-brokenlinks.md @@ -4,7 +4,7 @@ Tracked: 'https://github.com/w3c/html-aam/issues/447' Title: Broken references in HTML Accessibility API Mappings 1.0 --- -While crawling [HTML Accessibility API Mappings 1.0](https://w3c.github.io/html-aam/), the following links to other specifications were detected as pointing to non-existing anchors, which should be fixed: +While crawling [HTML Accessibility API Mappings 1.0](https://w3c.github.io/html-aam/), the following links to other specifications were detected as pointing to non-existing anchors: * [ ] https://html.spec.whatwg.org/multipage/iframe-embed-object.html#attr-param-name * [ ] https://html.spec.whatwg.org/multipage/microdata.html#attr-itemprop * [ ] https://html.spec.whatwg.org/multipage/sections.html#sectioning-root diff --git a/issues/html-discontinuedreferences.md b/issues/html-discontinuedreferences.md index 26c2131b..17b4a9f4 100644 --- a/issues/html-discontinuedreferences.md +++ b/issues/html-discontinuedreferences.md @@ -4,7 +4,7 @@ Tracked: 'https://github.com/whatwg/html/issues/9981' Title: Normative references to discontinued specs in HTML Standard --- -While crawling [HTML Standard](https://html.spec.whatwg.org/multipage/), the following normative referenced were detected as pointing to discontinued specifications: +While crawling [HTML Standard](https://html.spec.whatwg.org/multipage/), the following normative references were detected as pointing to discontinued specifications: * [ ] [HTTP](https://httpwg.org/specs/rfc7230.html) has been obsoleted by [rfc9110](https://httpwg.org/specs/rfc9110.html),[rfc9112](https://httpwg.org/specs/rfc9112.html) This issue was detected and reported semi-automatically by [Strudy](https://github.com/w3c/strudy/) based on data collected in [webref](https://github.com/w3c/webref/). diff --git a/issues/intersection-observer-brokenlinks.md b/issues/intersection-observer-brokenlinks.md index f2a55e28..ec12aee0 100644 --- a/issues/intersection-observer-brokenlinks.md +++ b/issues/intersection-observer-brokenlinks.md @@ -4,7 +4,7 @@ Tracked: 'https://github.com/w3c/IntersectionObserver/issues/506' Title: Broken references in Intersection Observer --- -While crawling [Intersection Observer](https://w3c.github.io/IntersectionObserver/), the following links to other specifications were detected as pointing to non-existing anchors, which should be fixed: +While crawling [Intersection Observer](https://w3c.github.io/IntersectionObserver/), the following links to other specifications were detected as pointing to non-existing anchors: * [ ] http://www.w3.org/TR/hr-time/#domhighrestimestamp * [ ] http://www.w3.org/TR/hr-time/#time-origin * [ ] https://drafts.csswg.org/css-box/#containing-block diff --git a/issues/json-ld11-discontinuedreferences.md b/issues/json-ld11-discontinuedreferences.md index 5a00c830..cc8d9a3a 100644 --- a/issues/json-ld11-discontinuedreferences.md +++ b/issues/json-ld11-discontinuedreferences.md @@ -4,7 +4,7 @@ Tracked: 'https://github.com/w3c/json-ld-syntax/issues/423' Title: Normative references to discontinued specs in JSON-LD 1.1 --- -While crawling [JSON-LD 1.1](https://w3c.github.io/json-ld-syntax/), the following normative referenced were detected as pointing to discontinued specifications: +While crawling [JSON-LD 1.1](https://w3c.github.io/json-ld-syntax/), the following normative references were detected as pointing to discontinued specifications: * [ ] [RFC7231](https://tools.ietf.org/html/rfc7231) has been obsoleted by [rfc9110](https://httpwg.org/specs/rfc9110.html) This issue was detected and reported semi-automatically by [Strudy](https://github.com/w3c/strudy/) based on data collected in [webref](https://github.com/w3c/webref/). diff --git a/issues/keyboard-lock-brokenlinks.md b/issues/keyboard-lock-brokenlinks.md index ec76a0d7..aa6a0b4a 100644 --- a/issues/keyboard-lock-brokenlinks.md +++ b/issues/keyboard-lock-brokenlinks.md @@ -4,7 +4,7 @@ Tracked: 'https://github.com/WICG/keyboard-lock/issues/68' Title: Broken references in Keyboard Lock --- -While crawling [Keyboard Lock](https://wicg.github.io/keyboard-lock/), the following links to other specifications were detected as pointing to non-existing anchors, which should be fixed: +While crawling [Keyboard Lock](https://wicg.github.io/keyboard-lock/), the following links to other specifications were detected as pointing to non-existing anchors: * [ ] http://www.w3.org/TR/uievents-code/#code-keyw * [ ] http://www.w3.org/TR/uievents-code/#code-keya * [ ] http://www.w3.org/TR/uievents-code/#code-keys diff --git a/issues/keyboard-map-brokenlinks.md b/issues/keyboard-map-brokenlinks.md index a5e96f45..b2af1f0d 100644 --- a/issues/keyboard-map-brokenlinks.md +++ b/issues/keyboard-map-brokenlinks.md @@ -4,7 +4,7 @@ Tracked: 'https://github.com/WICG/keyboard-map/issues/43' Title: Broken references in Keyboard Map --- -While crawling [Keyboard Map](https://wicg.github.io/keyboard-map/), the following links to other specifications were detected as pointing to non-existing anchors, which should be fixed: +While crawling [Keyboard Map](https://wicg.github.io/keyboard-map/), the following links to other specifications were detected as pointing to non-existing anchors: * [ ] http://www.w3.org/TR/uievents-code/#code-quote This issue was detected and reported semi-automatically by [Strudy](https://github.com/w3c/strudy/) based on data collected in [webref](https://github.com/w3c/webref/). diff --git a/issues/layout-instability-brokenlinks.md b/issues/layout-instability-brokenlinks.md index 8ce34650..42c1ebc4 100644 --- a/issues/layout-instability-brokenlinks.md +++ b/issues/layout-instability-brokenlinks.md @@ -4,7 +4,7 @@ Tracked: 'https://github.com/WICG/layout-instability/issues/116' Title: Broken references in Layout Instability API --- -While crawling [Layout Instability API](https://wicg.github.io/layout-instability/), the following links to other specifications were detected as pointing to non-existing anchors, which should be fixed: +While crawling [Layout Instability API](https://wicg.github.io/layout-instability/), the following links to other specifications were detected as pointing to non-existing anchors: * [ ] https://w3c.github.io/resource-timing/#sec-privacy-security * [ ] https://www.w3.org/TR/css-values-4/#pixel-unit diff --git a/issues/media-feeds-discontinuedreferences.md b/issues/media-feeds-discontinuedreferences.md index b2d7b9b3..fff2bf50 100644 --- a/issues/media-feeds-discontinuedreferences.md +++ b/issues/media-feeds-discontinuedreferences.md @@ -4,7 +4,7 @@ Tracked: 'https://github.com/WICG/media-feeds/issues/60' Title: Normative references to discontinued specs in Media Feeds --- -While crawling [Media Feeds](https://wicg.github.io/media-feeds/), the following normative referenced were detected as pointing to discontinued specifications: +While crawling [Media Feeds](https://wicg.github.io/media-feeds/), the following normative references were detected as pointing to discontinued specifications: * [ ] [rfc7231](https://httpwg.org/specs/rfc7231.html) has been obsoleted by [rfc9110](https://httpwg.org/specs/rfc9110.html) This issue was detected and reported semi-automatically by [Strudy](https://github.com/w3c/strudy/) based on data collected in [webref](https://github.com/w3c/webref/). diff --git a/issues/nav-tracking-mitigations-discontinuedreferences.md b/issues/nav-tracking-mitigations-discontinuedreferences.md index 3e4583fd..3380c148 100644 --- a/issues/nav-tracking-mitigations-discontinuedreferences.md +++ b/issues/nav-tracking-mitigations-discontinuedreferences.md @@ -6,7 +6,7 @@ Title: >- Mitigations --- -While crawling [Navigational-Tracking Mitigations](https://privacycg.github.io/nav-tracking-mitigations/), the following normative referenced were detected as pointing to discontinued specifications: +While crawling [Navigational-Tracking Mitigations](https://privacycg.github.io/nav-tracking-mitigations/), the following normative references were detected as pointing to discontinued specifications: * [ ] [RFC7234](https://httpwg.org/specs/rfc7234.html) has been obsoleted by [rfc9111](https://httpwg.org/specs/rfc9111.html) This issue was detected and reported semi-automatically by [Strudy](https://github.com/w3c/strudy/) based on data collected in [webref](https://github.com/w3c/webref/). diff --git a/issues/netinfo-discontinuedreferences.md b/issues/netinfo-discontinuedreferences.md index 0a930781..72f8df5c 100644 --- a/issues/netinfo-discontinuedreferences.md +++ b/issues/netinfo-discontinuedreferences.md @@ -4,7 +4,7 @@ Tracked: 'https://github.com/WICG/netinfo/issues/97' Title: Normative references to discontinued specs in Network Information API --- -While crawling [Network Information API](https://wicg.github.io/netinfo/), the following normative referenced were detected as pointing to discontinued specifications: +While crawling [Network Information API](https://wicg.github.io/netinfo/), the following normative references were detected as pointing to discontinued specifications: * [ ] [RFC7231](https://httpwg.org/specs/rfc7231.html) has been obsoleted by [rfc9110](https://httpwg.org/specs/rfc9110.html) This issue was detected and reported semi-automatically by [Strudy](https://github.com/w3c/strudy/) based on data collected in [webref](https://github.com/w3c/webref/). diff --git a/issues/periodic-background-sync-brokenlinks.md b/issues/periodic-background-sync-brokenlinks.md index b8010d2d..f725b6ad 100644 --- a/issues/periodic-background-sync-brokenlinks.md +++ b/issues/periodic-background-sync-brokenlinks.md @@ -4,7 +4,7 @@ Tracked: 'https://github.com/WICG/periodic-background-sync/issues/11' Title: Broken references in Web Periodic Background Synchronization --- -While crawling [Web Periodic Background Synchronization](https://wicg.github.io/periodic-background-sync/), the following links to other specifications were detected as pointing to non-existing anchors, which should be fixed: +While crawling [Web Periodic Background Synchronization](https://wicg.github.io/periodic-background-sync/), the following links to other specifications were detected as pointing to non-existing anchors: * [ ] https://dom.spec.whatwg.org/#context-object * [ ] https://w3c.github.io/permissions/#enumdef-permissionstate * [ ] https://w3c.github.io/permissions/#dictdef-permissiondescriptor diff --git a/issues/permissions-request-brokenlinks.md b/issues/permissions-request-brokenlinks.md index 160c7a8b..a3a1c2bb 100644 --- a/issues/permissions-request-brokenlinks.md +++ b/issues/permissions-request-brokenlinks.md @@ -4,7 +4,7 @@ Tracked: 'https://github.com/WICG/permissions-request/issues/8' Title: Broken references in Requesting Permissions --- -While crawling [Requesting Permissions](https://wicg.github.io/permissions-request/), the following links to other specifications were detected as pointing to non-existing anchors, which should be fixed: +While crawling [Requesting Permissions](https://wicg.github.io/permissions-request/), the following links to other specifications were detected as pointing to non-existing anchors: * [ ] https://w3c.github.io/permissions/#permission-registry This issue was detected and reported semi-automatically by [Strudy](https://github.com/w3c/strudy/) based on data collected in [webref](https://github.com/w3c/webref/). diff --git a/issues/portals-brokenlinks.md b/issues/portals-brokenlinks.md index fa539cf0..5ddbf5f4 100644 --- a/issues/portals-brokenlinks.md +++ b/issues/portals-brokenlinks.md @@ -4,7 +4,7 @@ Tracked: 'https://github.com/WICG/portals/issues/285' Title: Broken references in Portals --- -While crawling [Portals](https://wicg.github.io/portals/), the following links to other specifications were detected as pointing to non-existing anchors, which should be fixed: +While crawling [Portals](https://wicg.github.io/portals/), the following links to other specifications were detected as pointing to non-existing anchors: * [ ] https://html.spec.whatwg.org/multipage/web-messaging.html#postmessageoptions * [ ] https://html.spec.whatwg.org/multipage/web-messaging.html#dom-postmessageoptions-transfer diff --git a/issues/raw-camera-access-brokenlinks.md b/issues/raw-camera-access-brokenlinks.md index 0ce3d54d..2239d382 100644 --- a/issues/raw-camera-access-brokenlinks.md +++ b/issues/raw-camera-access-brokenlinks.md @@ -4,7 +4,7 @@ Tracked: 'https://github.com/immersive-web/raw-camera-access/issues/17' Title: Broken references in WebXR Raw Camera Access Module --- -While crawling [WebXR Raw Camera Access Module](https://immersive-web.github.io/raw-camera-access/), the following links to other specifications were detected as pointing to non-existing anchors, which should be fixed: +While crawling [WebXR Raw Camera Access Module](https://immersive-web.github.io/raw-camera-access/), the following links to other specifications were detected as pointing to non-existing anchors: * [ ] https://www.w3.org/TR/webxr/#set-of-granted-features * [ ] https://www.w3.org/TR/webxr/#requestanimationframe diff --git a/issues/reporting-1-brokenlinks.md b/issues/reporting-1-brokenlinks.md index a98946c7..e8e8a0ca 100644 --- a/issues/reporting-1-brokenlinks.md +++ b/issues/reporting-1-brokenlinks.md @@ -4,7 +4,7 @@ Tracked: 'https://github.com/w3c/reporting/issues/261' Title: Broken references in Reporting API --- -While crawling [Reporting API](https://w3c.github.io/reporting/), the following links to other specifications were detected as pointing to non-existing anchors, which should be fixed: +While crawling [Reporting API](https://w3c.github.io/reporting/), the following links to other specifications were detected as pointing to non-existing anchors: * [ ] https://fetch.spec.whatwg.org/#concept-response-https-state * [ ] https://fetch.spec.whatwg.org/#wait-for-a-response * [ ] https://html.spec.whatwg.org/multipage/webappapis.html#creation-url diff --git a/issues/savedata-discontinuedreferences.md b/issues/savedata-discontinuedreferences.md index dd02eded..263ce812 100644 --- a/issues/savedata-discontinuedreferences.md +++ b/issues/savedata-discontinuedreferences.md @@ -4,7 +4,7 @@ Tracked: 'https://github.com/WICG/savedata/issues/13' Title: Normative references to discontinued specs in Save Data API --- -While crawling [Save Data API](https://wicg.github.io/savedata/), the following normative referenced were detected as pointing to discontinued specifications: +While crawling [Save Data API](https://wicg.github.io/savedata/), the following normative references were detected as pointing to discontinued specifications: * [ ] [RFC7231](https://httpwg.org/specs/rfc7231.html) has been obsoleted by [rfc9110](https://httpwg.org/specs/rfc9110.html) This issue was detected and reported semi-automatically by [Strudy](https://github.com/w3c/strudy/) based on data collected in [webref](https://github.com/w3c/webref/). diff --git a/issues/service-workers-brokenlinks.md b/issues/service-workers-brokenlinks.md index 6198ae76..4deb97c1 100644 --- a/issues/service-workers-brokenlinks.md +++ b/issues/service-workers-brokenlinks.md @@ -4,7 +4,7 @@ Tracked: 'https://github.com/w3c/ServiceWorker/issues/1669' Title: Broken references in Service Workers Nightly --- -While crawling [Service Workers Nightly](https://w3c.github.io/ServiceWorker/), the following links to other specifications were detected as pointing to non-existing anchors, which should be fixed: +While crawling [Service Workers Nightly](https://w3c.github.io/ServiceWorker/), the following links to other specifications were detected as pointing to non-existing anchors: * [ ] https://w3c.github.io/push-api/#h-the-push-event * [ ] https://w3c.github.io/push-api/#dfn-fire-the-push-event * [ ] https://wicg.github.io/BackgroundSync/spec/#sync diff --git a/issues/svg-aam-1.0-brokenlinks.md b/issues/svg-aam-1.0-brokenlinks.md index 9157463d..95b969f2 100644 --- a/issues/svg-aam-1.0-brokenlinks.md +++ b/issues/svg-aam-1.0-brokenlinks.md @@ -4,7 +4,7 @@ Tracked: 'https://github.com/w3c/svg-aam/issues/23' Title: Broken references in SVG Accessibility API Mappings --- -While crawling [SVG Accessibility API Mappings](https://w3c.github.io/svg-aam/), the following links to other specifications were detected as pointing to non-existing anchors, which should be fixed: +While crawling [SVG Accessibility API Mappings](https://w3c.github.io/svg-aam/), the following links to other specifications were detected as pointing to non-existing anchors: * [ ] https://w3c.github.io/core-aam/#keyboard-focus * [ ] https://w3c.github.io/core-aam/#exclude_elements2 * [ ] https://w3c.github.io/core-aam/#include_elements diff --git a/issues/upgrade-insecure-requests-brokenlinks.md b/issues/upgrade-insecure-requests-brokenlinks.md index 20df641b..fc31c585 100644 --- a/issues/upgrade-insecure-requests-brokenlinks.md +++ b/issues/upgrade-insecure-requests-brokenlinks.md @@ -4,7 +4,7 @@ Tracked: 'https://github.com/w3c/webappsec-upgrade-insecure-requests/issues/33' Title: Broken references in Upgrade Insecure Requests --- -While crawling [Upgrade Insecure Requests](https://w3c.github.io/webappsec-upgrade-insecure-requests/), the following links to other specifications were detected as pointing to non-existing anchors, which should be fixed: +While crawling [Upgrade Insecure Requests](https://w3c.github.io/webappsec-upgrade-insecure-requests/), the following links to other specifications were detected as pointing to non-existing anchors: * [ ] https://html.spec.whatwg.org/multipage/browsers.html#create-a-document-object * [ ] https://html.spec.whatwg.org/multipage/webappapis.html#responsible-document * [ ] https://html.spec.whatwg.org/multipage/webappapis.html#responsible-browsing-context diff --git a/issues/web-otp-brokenlinks.md b/issues/web-otp-brokenlinks.md index f0640b69..d8eb7325 100644 --- a/issues/web-otp-brokenlinks.md +++ b/issues/web-otp-brokenlinks.md @@ -4,7 +4,7 @@ Tracked: 'https://github.com/WICG/web-otp/issues/59' Title: Broken references in WebOTP API --- -While crawling [WebOTP API](https://wicg.github.io/web-otp/), the following links to other specifications were detected as pointing to non-existing anchors, which should be fixed: +While crawling [WebOTP API](https://wicg.github.io/web-otp/), the following links to other specifications were detected as pointing to non-existing anchors: * [ ] https://dom.spec.whatwg.org/#abortsignal-aborted-flag * [ ] https://w3c.github.io/webappsec-credential-management/#collectfromcredentialstore-origin-options-sameoriginwithancestors diff --git a/issues/webpackage-discontinuedreferences.md b/issues/webpackage-discontinuedreferences.md index f5c537b5..2ff69f0a 100644 --- a/issues/webpackage-discontinuedreferences.md +++ b/issues/webpackage-discontinuedreferences.md @@ -4,7 +4,7 @@ Tracked: 'https://github.com/WICG/webpackage/issues/885' Title: Normative references to discontinued specs in Loading Signed Exchanges --- -While crawling [Loading Signed Exchanges](https://wicg.github.io/webpackage/loading.html), the following normative referenced were detected as pointing to discontinued specifications: +While crawling [Loading Signed Exchanges](https://wicg.github.io/webpackage/loading.html), the following normative references were detected as pointing to discontinued specifications: * [ ] [RFC7230](https://httpwg.org/specs/rfc7230.html) has been obsoleted by [rfc9110](https://httpwg.org/specs/rfc9110.html),[rfc9112](https://httpwg.org/specs/rfc9112.html) * [ ] [RFC7231](https://httpwg.org/specs/rfc7231.html) has been obsoleted by [rfc9110](https://httpwg.org/specs/rfc9110.html) diff --git a/issues/webrtc-identity-brokenlinks.md b/issues/webrtc-identity-brokenlinks.md index e09829a9..66cd662a 100644 --- a/issues/webrtc-identity-brokenlinks.md +++ b/issues/webrtc-identity-brokenlinks.md @@ -4,7 +4,7 @@ Tracked: 'https://github.com/w3c/webrtc-identity/issues/40' Title: Broken references in Identity for WebRTC 1.0 --- -While crawling [Identity for WebRTC 1.0](https://w3c.github.io/webrtc-identity/), the following links to other specifications were detected as pointing to non-existing anchors, which should be fixed: +While crawling [Identity for WebRTC 1.0](https://w3c.github.io/webrtc-identity/), the following links to other specifications were detected as pointing to non-existing anchors: * [ ] https://w3c.github.io/webrtc-pc/#dfn-create-an-offer * [ ] https://w3c.github.io/webrtc-pc/#dfn-create-an-answer * [ ] https://www.w3.org/TR/webrtc/#dom-rtcconfiguration-peeridentity diff --git a/issues/webxr-depth-sensing-1-brokenlinks.md b/issues/webxr-depth-sensing-1-brokenlinks.md index ada71907..56352e7b 100644 --- a/issues/webxr-depth-sensing-1-brokenlinks.md +++ b/issues/webxr-depth-sensing-1-brokenlinks.md @@ -4,7 +4,7 @@ Tracked: 'https://github.com/immersive-web/depth-sensing/issues/40' Title: Broken references in WebXR Depth Sensing Module --- -While crawling [WebXR Depth Sensing Module](https://immersive-web.github.io/depth-sensing/), the following links to other specifications were detected as pointing to non-existing anchors, which should be fixed: +While crawling [WebXR Depth Sensing Module](https://immersive-web.github.io/depth-sensing/), the following links to other specifications were detected as pointing to non-existing anchors: * [ ] https://www.w3.org/TR/webxr/#feature-policy * [ ] https://www.w3.org/TR/webxr/#xr-device-list-of-enabled-features diff --git a/issues/webxr-hit-test-1-brokenlinks.md b/issues/webxr-hit-test-1-brokenlinks.md index 69aeb211..b8104e68 100644 --- a/issues/webxr-hit-test-1-brokenlinks.md +++ b/issues/webxr-hit-test-1-brokenlinks.md @@ -4,7 +4,7 @@ Tracked: 'https://github.com/immersive-web/hit-test/issues/114' Title: Broken references in WebXR Hit Test Module --- -While crawling [WebXR Hit Test Module](https://immersive-web.github.io/hit-test/), the following links to other specifications were detected as pointing to non-existing anchors, which should be fixed: +While crawling [WebXR Hit Test Module](https://immersive-web.github.io/hit-test/), the following links to other specifications were detected as pointing to non-existing anchors: * [ ] https://www.w3.org/TR/webxr/#feature-policy * [ ] https://www.w3.org/TR/webxr/#xrsession-list-of-enabled-features diff --git a/issues/webxr-lighting-estimation-1-brokenlinks.md b/issues/webxr-lighting-estimation-1-brokenlinks.md index bc9f2505..4c7e129c 100644 --- a/issues/webxr-lighting-estimation-1-brokenlinks.md +++ b/issues/webxr-lighting-estimation-1-brokenlinks.md @@ -4,7 +4,7 @@ Tracked: 'https://github.com/immersive-web/lighting-estimation/issues/58' Title: Broken references in WebXR Lighting Estimation API Level 1 --- -While crawling [WebXR Lighting Estimation API Level 1](https://immersive-web.github.io/lighting-estimation/), the following links to other specifications were detected as pointing to non-existing anchors, which should be fixed: +While crawling [WebXR Lighting Estimation API Level 1](https://immersive-web.github.io/lighting-estimation/), the following links to other specifications were detected as pointing to non-existing anchors: * [ ] https://www.w3.org/TR/webxr/#xrsession-list-of-enabled-features * [ ] https://www.w3.org/TR/webxrlayers-1/#session diff --git a/issues/webxrlayers-1-brokenlinks.md b/issues/webxrlayers-1-brokenlinks.md index f5010de1..b08e9e4e 100644 --- a/issues/webxrlayers-1-brokenlinks.md +++ b/issues/webxrlayers-1-brokenlinks.md @@ -5,7 +5,7 @@ Title: Broken references in WebXR Layers API Level 1 --- -While crawling [WebXR Layers API Level 1](https://immersive-web.github.io/layers/), the following links to other specifications were detected as pointing to non-existing anchors, which should be fixed: +While crawling [WebXR Layers API Level 1](https://immersive-web.github.io/layers/), the following links to other specifications were detected as pointing to non-existing anchors: * [ ] https://www.w3.org/TR/webxr/#animationframe This issue was detected and reported semi-automatically by [Strudy](https://github.com/w3c/strudy/) based on data collected in [webref](https://github.com/w3c/webref/). diff --git a/strudy.js b/strudy.js index 6ddb1629..bbd4bdd2 100644 --- a/strudy.js +++ b/strudy.js @@ -301,7 +301,11 @@ Format must be one of "json" or "markdown".`) ${entry.content} This issue was detected and reported semi-automatically by [Strudy](https://github.com/w3c/strudy/) based on data collected in [webref](https://github.com/w3c/webref/).`; - if (existingReport?.content.trim() === content.trim()) { + // Note from @tidoust: One day, I'll understand how to set up Git and + // code so that all line endings end up being "\n" even on Windows + // machines. In the meantime, note that local issue files may well + // contain "\r\n" on Windows machines. + if (existingReport?.content.replace(/\r\n/g, '\n').trim() === content.trim()) { console.warn(`- skip ${filename}, file already exists, no change`); continue; } From 5f9271b78e1311da64e6bfff2788bbb979cb9e40 Mon Sep 17 00:00:00 2001 From: Francois Daoust Date: Thu, 22 Aug 2024 15:37:11 +0200 Subject: [PATCH 7/9] Stop exporting studyWebIdl Forgotten in a previous commit --- index.js | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/index.js b/index.js index 058ea8c0..6e550f67 100644 --- a/index.js +++ b/index.js @@ -1,7 +1,6 @@ import study from './src/lib/study.js'; -import studyWebIdl from './src/lib/study-webidl.js'; -export { study, studyWebIdl }; +export { study }; -const strudy = { study, studyWebIdl }; +const strudy = { study }; export default strudy; From adc0eeb9a6dee1808dd69d09cb2832b6f3b4fb94 Mon Sep 17 00:00:00 2001 From: Francois Daoust Date: Thu, 22 Aug 2024 18:09:47 +0200 Subject: [PATCH 8/9] Restrict lookup to ".md" files in issues folder --- src/reporting/file-issue-for-review.js | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/reporting/file-issue-for-review.js b/src/reporting/file-issue-for-review.js index b04a0a00..2eeaa768 100644 --- a/src/reporting/file-issue-for-review.js +++ b/src/reporting/file-issue-for-review.js @@ -106,9 +106,9 @@ Usage notes for some of the options: console.log(`- current branch: ${currentBranch}`); console.log('How many issue files ought to be reported?'); - const toadd = execSync('git diff --name-only --diff-filter=d issues', execParams).trim().split('\n'); + const toadd = execSync('git diff --name-only --diff-filter=d issues/*.md', execParams).trim().split('\n'); console.log(`- nb issue files to add/update: ${toadd.length}`); - const todelete = execSync('git diff --name-only --diff-filter=D issues', execParams).trim().split('\n'); + const todelete = execSync('git diff --name-only --diff-filter=D issues/*.md', execParams).trim().split('\n'); console.log(`- nb issue files to delete: ${todelete.length}`); const toreport = toadd.map(name => { return { action: 'add', filename: name }; }) .concat(todelete.map(name => { return { action: 'delete', filename: name }; })) From 18f08e6434cb7d793f25917a3a345abffa59cbea Mon Sep 17 00:00:00 2001 From: Francois Daoust Date: Thu, 22 Aug 2024 18:36:33 +0200 Subject: [PATCH 9/9] Filter out empty strings (and add reminder) --- src/reporting/file-issue-for-review.js | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/reporting/file-issue-for-review.js b/src/reporting/file-issue-for-review.js index 2eeaa768..f5cea3f8 100644 --- a/src/reporting/file-issue-for-review.js +++ b/src/reporting/file-issue-for-review.js @@ -105,10 +105,14 @@ Usage notes for some of the options: const currentBranch = execSync('git branch --show-current', execParams).trim(); console.log(`- current branch: ${currentBranch}`); + // Possibly useful reminder about calls to `filter` below: + // `split` on an empty string does not return an empty array! console.log('How many issue files ought to be reported?'); - const toadd = execSync('git diff --name-only --diff-filter=d issues/*.md', execParams).trim().split('\n'); + const toadd = execSync('git diff --name-only --diff-filter=d issues/*.md', execParams) + .trim().split('\n').filter(x => !!x); console.log(`- nb issue files to add/update: ${toadd.length}`); - const todelete = execSync('git diff --name-only --diff-filter=D issues/*.md', execParams).trim().split('\n'); + const todelete = execSync('git diff --name-only --diff-filter=D issues/*.md', execParams) + .trim().split('\n').filter(x => !!x); console.log(`- nb issue files to delete: ${todelete.length}`); const toreport = toadd.map(name => { return { action: 'add', filename: name }; }) .concat(todelete.map(name => { return { action: 'delete', filename: name }; }))