diff --git a/.github/workflows/file-issue-for-review.yml b/.github/workflows/file-issue-for-review.yml index b04dfb41..0a4d5ea7 100644 --- a/.github/workflows/file-issue-for-review.yml +++ b/.github/workflows/file-issue-for-review.yml @@ -26,12 +26,15 @@ jobs: run: | git config user.name "strudy-bot" git config user.email "<>" - git remote set-url --push origin https://x-access-token:${{ secrets.GITHUB_TOKEN }}@github.com/$GITHUB_REPOSITORY + git remote set-url --push origin https://x-access-token:${{ secrets.ISSUE_REPORT_GH_TOKEN }}@github.com/$GITHUB_REPOSITORY working-directory: strudy + - name: Run Strudy to detect new anomalies + working-directory: strudy + run: node strudy.js inspect ../webref --issues issues --what brokenLinks discontinuedReferences --update-mode old - name: Run issue filer script working-directory: strudy - run: node src/reporting/file-issue-for-review.js ../webref/ed/ ../webref/tr/ brokenLinks + run: node src/reporting/file-issue-for-review.js --max 10 env: - GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} + GITHUB_TOKEN: ${{ secrets.ISSUE_REPORT_GH_TOKEN }} diff --git a/index.js b/index.js index 42252811..6e550f67 100644 --- a/index.js +++ b/index.js @@ -1,8 +1,6 @@ -import studyCrawl from './src/lib/study-crawl.js'; -import studyWebIdl from './src/lib/study-webidl.js'; -import generateReport from './src/lib/generate-report.js'; +import study from './src/lib/study.js'; -export { studyCrawl, studyWebIdl, generateReport }; +export { study }; -const strudy = { studyCrawl, studyWebIdl, generateReport }; +const strudy = { study }; export default strudy; diff --git a/issues/DOM-Parsing-brokenlinks.md b/issues/DOM-Parsing-brokenlinks.md index 7557c48e..b3bc5762 100644 --- a/issues/DOM-Parsing-brokenlinks.md +++ b/issues/DOM-Parsing-brokenlinks.md @@ -4,7 +4,7 @@ Tracked: 'https://github.com/w3c/DOM-Parsing/issues/74' Title: Broken references in DOM Parsing and Serialization --- -While crawling [DOM Parsing and Serialization](https://w3c.github.io/DOM-Parsing/), the following links to other specifications were detected as pointing to non-existing anchors, which should be fixed: +While crawling [DOM Parsing and Serialization](https://w3c.github.io/DOM-Parsing/), the following links to other specifications were detected as pointing to non-existing anchors: * [ ] https://www.w3.org/TR/dom/#case-sensitive * [ ] https://www.w3.org/TR/dom/#ascii-case-insensitive * [ ] https://www.w3.org/TR/dom/#domexception diff --git a/issues/FileAPI-brokenlinks.md b/issues/FileAPI-brokenlinks.md index 5ce77ed3..90390d58 100644 --- a/issues/FileAPI-brokenlinks.md +++ b/issues/FileAPI-brokenlinks.md @@ -4,7 +4,7 @@ Tracked: 'https://github.com/w3c/FileAPI/issues/185' Title: Broken references in File API --- -While crawling [File API](https://w3c.github.io/FileAPI/), the following links to other specifications were detected as pointing to non-existing anchors, which should be fixed: +While crawling [File API](https://w3c.github.io/FileAPI/), the following links to other specifications were detected as pointing to non-existing anchors: * [ ] https://mimesniff.spec.whatwg.org/#parsable-mime-type This issue was detected and reported semi-automatically by [Strudy](https://github.com/w3c/strudy/) based on data collected in [webref](https://github.com/w3c/webref/). diff --git a/issues/background-fetch-brokenlinks.md b/issues/background-fetch-brokenlinks.md index 84c33a46..c244682c 100644 --- a/issues/background-fetch-brokenlinks.md +++ b/issues/background-fetch-brokenlinks.md @@ -4,7 +4,7 @@ Tracked: 'https://github.com/WICG/background-fetch/issues/167' Title: Broken references in Background Fetch --- -While crawling [Background Fetch](https://wicg.github.io/background-fetch/), the following links to other specifications were detected as pointing to non-existing anchors, which should be fixed: +While crawling [Background Fetch](https://wicg.github.io/background-fetch/), the following links to other specifications were detected as pointing to non-existing anchors: * [ ] https://fetch.spec.whatwg.org/#concept-fetch-terminate * [ ] https://w3c.github.io/permissions/#permission-state * [ ] https://w3c.github.io/permissions/#dictdef-permissiondescriptor diff --git a/issues/background-sync-brokenlinks.md b/issues/background-sync-brokenlinks.md index 283a238a..df84bf1a 100644 --- a/issues/background-sync-brokenlinks.md +++ b/issues/background-sync-brokenlinks.md @@ -4,7 +4,7 @@ Tracked: 'https://github.com/WICG/background-sync/issues/186' Title: Broken references in Web Background Synchronization --- -While crawling [Web Background Synchronization](https://wicg.github.io/background-sync/spec/), the following links to other specifications were detected as pointing to non-existing anchors, which should be fixed: +While crawling [Web Background Synchronization](https://wicg.github.io/background-sync/spec/), the following links to other specifications were detected as pointing to non-existing anchors: * [ ] https://notifications.spec.whatwg.org/#permission-model * [ ] https://slightlyoff.github.io/ServiceWorker/spec/service_worker/index.html#service-worker-registration-interface * [ ] https://slightlyoff.github.io/ServiceWorker/spec/service_worker/index.html#service-worker-global-scope-interface diff --git a/issues/change-password-url-discontinuedreferences.md b/issues/change-password-url-discontinuedreferences.md index 39a51ce2..cf63ce19 100644 --- a/issues/change-password-url-discontinuedreferences.md +++ b/issues/change-password-url-discontinuedreferences.md @@ -6,7 +6,7 @@ Title: >- Passwords --- -While crawling [A Well-Known URL for Changing Passwords](https://w3c.github.io/webappsec-change-password-url/), the following normative referenced were detected as pointing to discontinued specifications: +While crawling [A Well-Known URL for Changing Passwords](https://w3c.github.io/webappsec-change-password-url/), the following normative references were detected as pointing to discontinued specifications: * [ ] [HTTP-SEMANTICS](https://httpwg.org/specs/rfc7231.html) has been obsoleted by [rfc9110](https://httpwg.org/specs/rfc9110.html) This issue was detected and reported semi-automatically by [Strudy](https://github.com/w3c/strudy/) based on data collected in [webref](https://github.com/w3c/webref/). diff --git a/issues/clear-site-data-discontinuedreferences.md b/issues/clear-site-data-discontinuedreferences.md index c9bceea1..1f9f1b2c 100644 --- a/issues/clear-site-data-discontinuedreferences.md +++ b/issues/clear-site-data-discontinuedreferences.md @@ -4,7 +4,7 @@ Tracked: 'https://github.com/w3c/webappsec-clear-site-data/issues/79' Title: Normative references to discontinued specs in Clear Site Data --- -While crawling [Clear Site Data](https://w3c.github.io/webappsec-clear-site-data/), the following normative referenced were detected as pointing to discontinued specifications: +While crawling [Clear Site Data](https://w3c.github.io/webappsec-clear-site-data/), the following normative references were detected as pointing to discontinued specifications: * [ ] [RFC7230](https://httpwg.org/specs/rfc7230.html) has been obsoleted by [rfc9110](https://httpwg.org/specs/rfc9110.html),[rfc9112](https://httpwg.org/specs/rfc9112.html) * [ ] [RFC7234](https://httpwg.org/specs/rfc7234.html) has been obsoleted by [rfc9111](https://httpwg.org/specs/rfc9111.html) diff --git a/issues/clipboard-apis-brokenlinks.md b/issues/clipboard-apis-brokenlinks.md index 450da512..bd879dd4 100644 --- a/issues/clipboard-apis-brokenlinks.md +++ b/issues/clipboard-apis-brokenlinks.md @@ -4,7 +4,7 @@ Tracked: 'https://github.com/w3c/clipboard-apis/issues/187' Title: Broken references in Clipboard API and events --- -While crawling [Clipboard API and events](https://w3c.github.io/clipboard-apis/), the following links to other specifications were detected as pointing to non-existing anchors, which should be fixed: +While crawling [Clipboard API and events](https://w3c.github.io/clipboard-apis/), the following links to other specifications were detected as pointing to non-existing anchors: * [ ] https://html.spec.whatwg.org/multipage/interaction.html#drag-data-item-kind * [ ] https://html.spec.whatwg.org/multipage/interaction.html#drag-data-item-type-string diff --git a/issues/content-index-brokenlinks.md b/issues/content-index-brokenlinks.md index 60d22d24..1e646ec7 100644 --- a/issues/content-index-brokenlinks.md +++ b/issues/content-index-brokenlinks.md @@ -4,7 +4,7 @@ Tracked: 'https://github.com/WICG/content-index/issues/33' Title: Broken references in Content Index --- -While crawling [Content Index](https://wicg.github.io/content-index/spec/), the following links to other specifications were detected as pointing to non-existing anchors, which should be fixed: +While crawling [Content Index](https://wicg.github.io/content-index/spec/), the following links to other specifications were detected as pointing to non-existing anchors: * [ ] https://dom.spec.whatwg.org/#context-object This issue was detected and reported semi-automatically by [Strudy](https://github.com/w3c/strudy/) based on data collected in [webref](https://github.com/w3c/webref/). diff --git a/issues/csp-embedded-enforcement-brokenlinks.md b/issues/csp-embedded-enforcement-brokenlinks.md index 5a7e8925..4c564240 100644 --- a/issues/csp-embedded-enforcement-brokenlinks.md +++ b/issues/csp-embedded-enforcement-brokenlinks.md @@ -4,7 +4,7 @@ Tracked: 'https://github.com/w3c/webappsec-cspee/issues/27' Title: 'Broken references in Content Security Policy: Embedded Enforcement' --- -While crawling [Content Security Policy: Embedded Enforcement](https://w3c.github.io/webappsec-cspee/), the following links to other specifications were detected as pointing to non-existing anchors, which should be fixed: +While crawling [Content Security Policy: Embedded Enforcement](https://w3c.github.io/webappsec-cspee/), the following links to other specifications were detected as pointing to non-existing anchors: * [ ] https://fetch.spec.whatwg.org/#concept-response-csp-list * [ ] https://w3c.github.io/webappsec-csp/#port-part-match diff --git a/issues/css-line-grid-1-brokenlinks.md b/issues/css-line-grid-1-brokenlinks.md index fbeb4e2d..c610c59f 100644 --- a/issues/css-line-grid-1-brokenlinks.md +++ b/issues/css-line-grid-1-brokenlinks.md @@ -4,7 +4,7 @@ Tracked: 'https://github.com/w3c/csswg-drafts/issues/8080' Title: '[css-line-grid] Broken references in CSS Line Grid Module Level 1' --- -While crawling [CSS Line Grid Module Level 1](https://drafts.csswg.org/css-line-grid/), the following links to other specifications were detected as pointing to non-existing anchors, which should be fixed: +While crawling [CSS Line Grid Module Level 1](https://drafts.csswg.org/css-line-grid/), the following links to other specifications were detected as pointing to non-existing anchors: * [ ] https://drafts.csswg.org/css-inline/#central This issue was detected and reported semi-automatically by [Strudy](https://github.com/w3c/strudy/) based on data collected in [webref](https://github.com/w3c/webref/). diff --git a/issues/css-nav-1-brokenlinks.md b/issues/css-nav-1-brokenlinks.md index e62e0937..5cd163e7 100644 --- a/issues/css-nav-1-brokenlinks.md +++ b/issues/css-nav-1-brokenlinks.md @@ -4,7 +4,7 @@ Tracked: 'https://github.com/w3c/csswg-drafts/issues/8081' Title: '[css-nav-1] Broken references in CSS Spatial Navigation Level 1' --- -While crawling [CSS Spatial Navigation Level 1](https://drafts.csswg.org/css-nav-1/), the following links to other specifications were detected as pointing to non-existing anchors, which should be fixed: +While crawling [CSS Spatial Navigation Level 1](https://drafts.csswg.org/css-nav-1/), the following links to other specifications were detected as pointing to non-existing anchors: * [ ] https://drafts.csswg.org/css2/box.html#x14 * [ ] https://html.spec.whatwg.org/multipage/infrastructure.html#nodes-are-removed * [ ] https://html.spec.whatwg.org/multipage/interaction.html#expressly-inert diff --git a/issues/filter-effects-1-brokenlinks.md b/issues/filter-effects-1-brokenlinks.md index 2a059ffb..414512f1 100644 --- a/issues/filter-effects-1-brokenlinks.md +++ b/issues/filter-effects-1-brokenlinks.md @@ -4,7 +4,7 @@ Tracked: 'https://github.com/w3c/fxtf-drafts/issues/482' Title: '[filter-effects-1] Broken references in Filter Effects Module Level 1' --- -While crawling [Filter Effects Module Level 1](https://drafts.fxtf.org/filter-effects-1/), the following links to other specifications were detected as pointing to non-existing anchors, which should be fixed: +While crawling [Filter Effects Module Level 1](https://drafts.fxtf.org/filter-effects-1/), the following links to other specifications were detected as pointing to non-existing anchors: * [ ] https://drafts.csswg.org/css-transitions/#animtype-length * [ ] https://drafts.csswg.org/css-transitions/#animtype-number * [ ] https://drafts.csswg.org/css-transitions/#animtype-shadow-list diff --git a/issues/get-installed-related-apps-brokenlinks.md b/issues/get-installed-related-apps-brokenlinks.md index 766fed98..402045af 100644 --- a/issues/get-installed-related-apps-brokenlinks.md +++ b/issues/get-installed-related-apps-brokenlinks.md @@ -4,7 +4,7 @@ Tracked: 'https://github.com/WICG/get-installed-related-apps/issues/35' Title: Broken references in Get Installed Related Apps API --- -While crawling [Get Installed Related Apps API](https://wicg.github.io/get-installed-related-apps/spec/), the following links to other specifications were detected as pointing to non-existing anchors, which should be fixed: +While crawling [Get Installed Related Apps API](https://wicg.github.io/get-installed-related-apps/spec/), the following links to other specifications were detected as pointing to non-existing anchors: * [ ] https://dom.spec.whatwg.org/#context-object * [ ] https://www.w3.org/TR/appmanifest/#dom-fingerprint * [ ] https://www.w3.org/TR/appmanifest/#dom-externalapplicationresource diff --git a/issues/html-aam-1.0-brokenlinks.md b/issues/html-aam-1.0-brokenlinks.md index 6dee9b94..212eaaf1 100644 --- a/issues/html-aam-1.0-brokenlinks.md +++ b/issues/html-aam-1.0-brokenlinks.md @@ -4,7 +4,7 @@ Tracked: 'https://github.com/w3c/html-aam/issues/447' Title: Broken references in HTML Accessibility API Mappings 1.0 --- -While crawling [HTML Accessibility API Mappings 1.0](https://w3c.github.io/html-aam/), the following links to other specifications were detected as pointing to non-existing anchors, which should be fixed: +While crawling [HTML Accessibility API Mappings 1.0](https://w3c.github.io/html-aam/), the following links to other specifications were detected as pointing to non-existing anchors: * [ ] https://html.spec.whatwg.org/multipage/iframe-embed-object.html#attr-param-name * [ ] https://html.spec.whatwg.org/multipage/microdata.html#attr-itemprop * [ ] https://html.spec.whatwg.org/multipage/sections.html#sectioning-root diff --git a/issues/html-discontinuedreferences.md b/issues/html-discontinuedreferences.md index 26c2131b..17b4a9f4 100644 --- a/issues/html-discontinuedreferences.md +++ b/issues/html-discontinuedreferences.md @@ -4,7 +4,7 @@ Tracked: 'https://github.com/whatwg/html/issues/9981' Title: Normative references to discontinued specs in HTML Standard --- -While crawling [HTML Standard](https://html.spec.whatwg.org/multipage/), the following normative referenced were detected as pointing to discontinued specifications: +While crawling [HTML Standard](https://html.spec.whatwg.org/multipage/), the following normative references were detected as pointing to discontinued specifications: * [ ] [HTTP](https://httpwg.org/specs/rfc7230.html) has been obsoleted by [rfc9110](https://httpwg.org/specs/rfc9110.html),[rfc9112](https://httpwg.org/specs/rfc9112.html) This issue was detected and reported semi-automatically by [Strudy](https://github.com/w3c/strudy/) based on data collected in [webref](https://github.com/w3c/webref/). diff --git a/issues/intersection-observer-brokenlinks.md b/issues/intersection-observer-brokenlinks.md index f2a55e28..ec12aee0 100644 --- a/issues/intersection-observer-brokenlinks.md +++ b/issues/intersection-observer-brokenlinks.md @@ -4,7 +4,7 @@ Tracked: 'https://github.com/w3c/IntersectionObserver/issues/506' Title: Broken references in Intersection Observer --- -While crawling [Intersection Observer](https://w3c.github.io/IntersectionObserver/), the following links to other specifications were detected as pointing to non-existing anchors, which should be fixed: +While crawling [Intersection Observer](https://w3c.github.io/IntersectionObserver/), the following links to other specifications were detected as pointing to non-existing anchors: * [ ] http://www.w3.org/TR/hr-time/#domhighrestimestamp * [ ] http://www.w3.org/TR/hr-time/#time-origin * [ ] https://drafts.csswg.org/css-box/#containing-block diff --git a/issues/json-ld11-discontinuedreferences.md b/issues/json-ld11-discontinuedreferences.md index 5a00c830..cc8d9a3a 100644 --- a/issues/json-ld11-discontinuedreferences.md +++ b/issues/json-ld11-discontinuedreferences.md @@ -4,7 +4,7 @@ Tracked: 'https://github.com/w3c/json-ld-syntax/issues/423' Title: Normative references to discontinued specs in JSON-LD 1.1 --- -While crawling [JSON-LD 1.1](https://w3c.github.io/json-ld-syntax/), the following normative referenced were detected as pointing to discontinued specifications: +While crawling [JSON-LD 1.1](https://w3c.github.io/json-ld-syntax/), the following normative references were detected as pointing to discontinued specifications: * [ ] [RFC7231](https://tools.ietf.org/html/rfc7231) has been obsoleted by [rfc9110](https://httpwg.org/specs/rfc9110.html) This issue was detected and reported semi-automatically by [Strudy](https://github.com/w3c/strudy/) based on data collected in [webref](https://github.com/w3c/webref/). diff --git a/issues/keyboard-lock-brokenlinks.md b/issues/keyboard-lock-brokenlinks.md index ec76a0d7..aa6a0b4a 100644 --- a/issues/keyboard-lock-brokenlinks.md +++ b/issues/keyboard-lock-brokenlinks.md @@ -4,7 +4,7 @@ Tracked: 'https://github.com/WICG/keyboard-lock/issues/68' Title: Broken references in Keyboard Lock --- -While crawling [Keyboard Lock](https://wicg.github.io/keyboard-lock/), the following links to other specifications were detected as pointing to non-existing anchors, which should be fixed: +While crawling [Keyboard Lock](https://wicg.github.io/keyboard-lock/), the following links to other specifications were detected as pointing to non-existing anchors: * [ ] http://www.w3.org/TR/uievents-code/#code-keyw * [ ] http://www.w3.org/TR/uievents-code/#code-keya * [ ] http://www.w3.org/TR/uievents-code/#code-keys diff --git a/issues/keyboard-map-brokenlinks.md b/issues/keyboard-map-brokenlinks.md index a5e96f45..b2af1f0d 100644 --- a/issues/keyboard-map-brokenlinks.md +++ b/issues/keyboard-map-brokenlinks.md @@ -4,7 +4,7 @@ Tracked: 'https://github.com/WICG/keyboard-map/issues/43' Title: Broken references in Keyboard Map --- -While crawling [Keyboard Map](https://wicg.github.io/keyboard-map/), the following links to other specifications were detected as pointing to non-existing anchors, which should be fixed: +While crawling [Keyboard Map](https://wicg.github.io/keyboard-map/), the following links to other specifications were detected as pointing to non-existing anchors: * [ ] http://www.w3.org/TR/uievents-code/#code-quote This issue was detected and reported semi-automatically by [Strudy](https://github.com/w3c/strudy/) based on data collected in [webref](https://github.com/w3c/webref/). diff --git a/issues/layout-instability-brokenlinks.md b/issues/layout-instability-brokenlinks.md index 8ce34650..42c1ebc4 100644 --- a/issues/layout-instability-brokenlinks.md +++ b/issues/layout-instability-brokenlinks.md @@ -4,7 +4,7 @@ Tracked: 'https://github.com/WICG/layout-instability/issues/116' Title: Broken references in Layout Instability API --- -While crawling [Layout Instability API](https://wicg.github.io/layout-instability/), the following links to other specifications were detected as pointing to non-existing anchors, which should be fixed: +While crawling [Layout Instability API](https://wicg.github.io/layout-instability/), the following links to other specifications were detected as pointing to non-existing anchors: * [ ] https://w3c.github.io/resource-timing/#sec-privacy-security * [ ] https://www.w3.org/TR/css-values-4/#pixel-unit diff --git a/issues/media-feeds-discontinuedreferences.md b/issues/media-feeds-discontinuedreferences.md index b2d7b9b3..fff2bf50 100644 --- a/issues/media-feeds-discontinuedreferences.md +++ b/issues/media-feeds-discontinuedreferences.md @@ -4,7 +4,7 @@ Tracked: 'https://github.com/WICG/media-feeds/issues/60' Title: Normative references to discontinued specs in Media Feeds --- -While crawling [Media Feeds](https://wicg.github.io/media-feeds/), the following normative referenced were detected as pointing to discontinued specifications: +While crawling [Media Feeds](https://wicg.github.io/media-feeds/), the following normative references were detected as pointing to discontinued specifications: * [ ] [rfc7231](https://httpwg.org/specs/rfc7231.html) has been obsoleted by [rfc9110](https://httpwg.org/specs/rfc9110.html) This issue was detected and reported semi-automatically by [Strudy](https://github.com/w3c/strudy/) based on data collected in [webref](https://github.com/w3c/webref/). diff --git a/issues/nav-tracking-mitigations-discontinuedreferences.md b/issues/nav-tracking-mitigations-discontinuedreferences.md index 3e4583fd..3380c148 100644 --- a/issues/nav-tracking-mitigations-discontinuedreferences.md +++ b/issues/nav-tracking-mitigations-discontinuedreferences.md @@ -6,7 +6,7 @@ Title: >- Mitigations --- -While crawling [Navigational-Tracking Mitigations](https://privacycg.github.io/nav-tracking-mitigations/), the following normative referenced were detected as pointing to discontinued specifications: +While crawling [Navigational-Tracking Mitigations](https://privacycg.github.io/nav-tracking-mitigations/), the following normative references were detected as pointing to discontinued specifications: * [ ] [RFC7234](https://httpwg.org/specs/rfc7234.html) has been obsoleted by [rfc9111](https://httpwg.org/specs/rfc9111.html) This issue was detected and reported semi-automatically by [Strudy](https://github.com/w3c/strudy/) based on data collected in [webref](https://github.com/w3c/webref/). diff --git a/issues/netinfo-discontinuedreferences.md b/issues/netinfo-discontinuedreferences.md index 0a930781..72f8df5c 100644 --- a/issues/netinfo-discontinuedreferences.md +++ b/issues/netinfo-discontinuedreferences.md @@ -4,7 +4,7 @@ Tracked: 'https://github.com/WICG/netinfo/issues/97' Title: Normative references to discontinued specs in Network Information API --- -While crawling [Network Information API](https://wicg.github.io/netinfo/), the following normative referenced were detected as pointing to discontinued specifications: +While crawling [Network Information API](https://wicg.github.io/netinfo/), the following normative references were detected as pointing to discontinued specifications: * [ ] [RFC7231](https://httpwg.org/specs/rfc7231.html) has been obsoleted by [rfc9110](https://httpwg.org/specs/rfc9110.html) This issue was detected and reported semi-automatically by [Strudy](https://github.com/w3c/strudy/) based on data collected in [webref](https://github.com/w3c/webref/). diff --git a/issues/periodic-background-sync-brokenlinks.md b/issues/periodic-background-sync-brokenlinks.md index b8010d2d..f725b6ad 100644 --- a/issues/periodic-background-sync-brokenlinks.md +++ b/issues/periodic-background-sync-brokenlinks.md @@ -4,7 +4,7 @@ Tracked: 'https://github.com/WICG/periodic-background-sync/issues/11' Title: Broken references in Web Periodic Background Synchronization --- -While crawling [Web Periodic Background Synchronization](https://wicg.github.io/periodic-background-sync/), the following links to other specifications were detected as pointing to non-existing anchors, which should be fixed: +While crawling [Web Periodic Background Synchronization](https://wicg.github.io/periodic-background-sync/), the following links to other specifications were detected as pointing to non-existing anchors: * [ ] https://dom.spec.whatwg.org/#context-object * [ ] https://w3c.github.io/permissions/#enumdef-permissionstate * [ ] https://w3c.github.io/permissions/#dictdef-permissiondescriptor diff --git a/issues/permissions-request-brokenlinks.md b/issues/permissions-request-brokenlinks.md index 160c7a8b..a3a1c2bb 100644 --- a/issues/permissions-request-brokenlinks.md +++ b/issues/permissions-request-brokenlinks.md @@ -4,7 +4,7 @@ Tracked: 'https://github.com/WICG/permissions-request/issues/8' Title: Broken references in Requesting Permissions --- -While crawling [Requesting Permissions](https://wicg.github.io/permissions-request/), the following links to other specifications were detected as pointing to non-existing anchors, which should be fixed: +While crawling [Requesting Permissions](https://wicg.github.io/permissions-request/), the following links to other specifications were detected as pointing to non-existing anchors: * [ ] https://w3c.github.io/permissions/#permission-registry This issue was detected and reported semi-automatically by [Strudy](https://github.com/w3c/strudy/) based on data collected in [webref](https://github.com/w3c/webref/). diff --git a/issues/portals-brokenlinks.md b/issues/portals-brokenlinks.md index fa539cf0..5ddbf5f4 100644 --- a/issues/portals-brokenlinks.md +++ b/issues/portals-brokenlinks.md @@ -4,7 +4,7 @@ Tracked: 'https://github.com/WICG/portals/issues/285' Title: Broken references in Portals --- -While crawling [Portals](https://wicg.github.io/portals/), the following links to other specifications were detected as pointing to non-existing anchors, which should be fixed: +While crawling [Portals](https://wicg.github.io/portals/), the following links to other specifications were detected as pointing to non-existing anchors: * [ ] https://html.spec.whatwg.org/multipage/web-messaging.html#postmessageoptions * [ ] https://html.spec.whatwg.org/multipage/web-messaging.html#dom-postmessageoptions-transfer diff --git a/issues/raw-camera-access-brokenlinks.md b/issues/raw-camera-access-brokenlinks.md index 0ce3d54d..2239d382 100644 --- a/issues/raw-camera-access-brokenlinks.md +++ b/issues/raw-camera-access-brokenlinks.md @@ -4,7 +4,7 @@ Tracked: 'https://github.com/immersive-web/raw-camera-access/issues/17' Title: Broken references in WebXR Raw Camera Access Module --- -While crawling [WebXR Raw Camera Access Module](https://immersive-web.github.io/raw-camera-access/), the following links to other specifications were detected as pointing to non-existing anchors, which should be fixed: +While crawling [WebXR Raw Camera Access Module](https://immersive-web.github.io/raw-camera-access/), the following links to other specifications were detected as pointing to non-existing anchors: * [ ] https://www.w3.org/TR/webxr/#set-of-granted-features * [ ] https://www.w3.org/TR/webxr/#requestanimationframe diff --git a/issues/reporting-1-brokenlinks.md b/issues/reporting-1-brokenlinks.md index a98946c7..e8e8a0ca 100644 --- a/issues/reporting-1-brokenlinks.md +++ b/issues/reporting-1-brokenlinks.md @@ -4,7 +4,7 @@ Tracked: 'https://github.com/w3c/reporting/issues/261' Title: Broken references in Reporting API --- -While crawling [Reporting API](https://w3c.github.io/reporting/), the following links to other specifications were detected as pointing to non-existing anchors, which should be fixed: +While crawling [Reporting API](https://w3c.github.io/reporting/), the following links to other specifications were detected as pointing to non-existing anchors: * [ ] https://fetch.spec.whatwg.org/#concept-response-https-state * [ ] https://fetch.spec.whatwg.org/#wait-for-a-response * [ ] https://html.spec.whatwg.org/multipage/webappapis.html#creation-url diff --git a/issues/savedata-discontinuedreferences.md b/issues/savedata-discontinuedreferences.md index dd02eded..263ce812 100644 --- a/issues/savedata-discontinuedreferences.md +++ b/issues/savedata-discontinuedreferences.md @@ -4,7 +4,7 @@ Tracked: 'https://github.com/WICG/savedata/issues/13' Title: Normative references to discontinued specs in Save Data API --- -While crawling [Save Data API](https://wicg.github.io/savedata/), the following normative referenced were detected as pointing to discontinued specifications: +While crawling [Save Data API](https://wicg.github.io/savedata/), the following normative references were detected as pointing to discontinued specifications: * [ ] [RFC7231](https://httpwg.org/specs/rfc7231.html) has been obsoleted by [rfc9110](https://httpwg.org/specs/rfc9110.html) This issue was detected and reported semi-automatically by [Strudy](https://github.com/w3c/strudy/) based on data collected in [webref](https://github.com/w3c/webref/). diff --git a/issues/service-workers-brokenlinks.md b/issues/service-workers-brokenlinks.md index 6198ae76..4deb97c1 100644 --- a/issues/service-workers-brokenlinks.md +++ b/issues/service-workers-brokenlinks.md @@ -4,7 +4,7 @@ Tracked: 'https://github.com/w3c/ServiceWorker/issues/1669' Title: Broken references in Service Workers Nightly --- -While crawling [Service Workers Nightly](https://w3c.github.io/ServiceWorker/), the following links to other specifications were detected as pointing to non-existing anchors, which should be fixed: +While crawling [Service Workers Nightly](https://w3c.github.io/ServiceWorker/), the following links to other specifications were detected as pointing to non-existing anchors: * [ ] https://w3c.github.io/push-api/#h-the-push-event * [ ] https://w3c.github.io/push-api/#dfn-fire-the-push-event * [ ] https://wicg.github.io/BackgroundSync/spec/#sync diff --git a/issues/svg-aam-1.0-brokenlinks.md b/issues/svg-aam-1.0-brokenlinks.md index 9157463d..95b969f2 100644 --- a/issues/svg-aam-1.0-brokenlinks.md +++ b/issues/svg-aam-1.0-brokenlinks.md @@ -4,7 +4,7 @@ Tracked: 'https://github.com/w3c/svg-aam/issues/23' Title: Broken references in SVG Accessibility API Mappings --- -While crawling [SVG Accessibility API Mappings](https://w3c.github.io/svg-aam/), the following links to other specifications were detected as pointing to non-existing anchors, which should be fixed: +While crawling [SVG Accessibility API Mappings](https://w3c.github.io/svg-aam/), the following links to other specifications were detected as pointing to non-existing anchors: * [ ] https://w3c.github.io/core-aam/#keyboard-focus * [ ] https://w3c.github.io/core-aam/#exclude_elements2 * [ ] https://w3c.github.io/core-aam/#include_elements diff --git a/issues/upgrade-insecure-requests-brokenlinks.md b/issues/upgrade-insecure-requests-brokenlinks.md index 20df641b..fc31c585 100644 --- a/issues/upgrade-insecure-requests-brokenlinks.md +++ b/issues/upgrade-insecure-requests-brokenlinks.md @@ -4,7 +4,7 @@ Tracked: 'https://github.com/w3c/webappsec-upgrade-insecure-requests/issues/33' Title: Broken references in Upgrade Insecure Requests --- -While crawling [Upgrade Insecure Requests](https://w3c.github.io/webappsec-upgrade-insecure-requests/), the following links to other specifications were detected as pointing to non-existing anchors, which should be fixed: +While crawling [Upgrade Insecure Requests](https://w3c.github.io/webappsec-upgrade-insecure-requests/), the following links to other specifications were detected as pointing to non-existing anchors: * [ ] https://html.spec.whatwg.org/multipage/browsers.html#create-a-document-object * [ ] https://html.spec.whatwg.org/multipage/webappapis.html#responsible-document * [ ] https://html.spec.whatwg.org/multipage/webappapis.html#responsible-browsing-context diff --git a/issues/web-otp-brokenlinks.md b/issues/web-otp-brokenlinks.md index f0640b69..d8eb7325 100644 --- a/issues/web-otp-brokenlinks.md +++ b/issues/web-otp-brokenlinks.md @@ -4,7 +4,7 @@ Tracked: 'https://github.com/WICG/web-otp/issues/59' Title: Broken references in WebOTP API --- -While crawling [WebOTP API](https://wicg.github.io/web-otp/), the following links to other specifications were detected as pointing to non-existing anchors, which should be fixed: +While crawling [WebOTP API](https://wicg.github.io/web-otp/), the following links to other specifications were detected as pointing to non-existing anchors: * [ ] https://dom.spec.whatwg.org/#abortsignal-aborted-flag * [ ] https://w3c.github.io/webappsec-credential-management/#collectfromcredentialstore-origin-options-sameoriginwithancestors diff --git a/issues/webpackage-discontinuedreferences.md b/issues/webpackage-discontinuedreferences.md index f5c537b5..2ff69f0a 100644 --- a/issues/webpackage-discontinuedreferences.md +++ b/issues/webpackage-discontinuedreferences.md @@ -4,7 +4,7 @@ Tracked: 'https://github.com/WICG/webpackage/issues/885' Title: Normative references to discontinued specs in Loading Signed Exchanges --- -While crawling [Loading Signed Exchanges](https://wicg.github.io/webpackage/loading.html), the following normative referenced were detected as pointing to discontinued specifications: +While crawling [Loading Signed Exchanges](https://wicg.github.io/webpackage/loading.html), the following normative references were detected as pointing to discontinued specifications: * [ ] [RFC7230](https://httpwg.org/specs/rfc7230.html) has been obsoleted by [rfc9110](https://httpwg.org/specs/rfc9110.html),[rfc9112](https://httpwg.org/specs/rfc9112.html) * [ ] [RFC7231](https://httpwg.org/specs/rfc7231.html) has been obsoleted by [rfc9110](https://httpwg.org/specs/rfc9110.html) diff --git a/issues/webrtc-identity-brokenlinks.md b/issues/webrtc-identity-brokenlinks.md index e09829a9..66cd662a 100644 --- a/issues/webrtc-identity-brokenlinks.md +++ b/issues/webrtc-identity-brokenlinks.md @@ -4,7 +4,7 @@ Tracked: 'https://github.com/w3c/webrtc-identity/issues/40' Title: Broken references in Identity for WebRTC 1.0 --- -While crawling [Identity for WebRTC 1.0](https://w3c.github.io/webrtc-identity/), the following links to other specifications were detected as pointing to non-existing anchors, which should be fixed: +While crawling [Identity for WebRTC 1.0](https://w3c.github.io/webrtc-identity/), the following links to other specifications were detected as pointing to non-existing anchors: * [ ] https://w3c.github.io/webrtc-pc/#dfn-create-an-offer * [ ] https://w3c.github.io/webrtc-pc/#dfn-create-an-answer * [ ] https://www.w3.org/TR/webrtc/#dom-rtcconfiguration-peeridentity diff --git a/issues/webxr-depth-sensing-1-brokenlinks.md b/issues/webxr-depth-sensing-1-brokenlinks.md index ada71907..56352e7b 100644 --- a/issues/webxr-depth-sensing-1-brokenlinks.md +++ b/issues/webxr-depth-sensing-1-brokenlinks.md @@ -4,7 +4,7 @@ Tracked: 'https://github.com/immersive-web/depth-sensing/issues/40' Title: Broken references in WebXR Depth Sensing Module --- -While crawling [WebXR Depth Sensing Module](https://immersive-web.github.io/depth-sensing/), the following links to other specifications were detected as pointing to non-existing anchors, which should be fixed: +While crawling [WebXR Depth Sensing Module](https://immersive-web.github.io/depth-sensing/), the following links to other specifications were detected as pointing to non-existing anchors: * [ ] https://www.w3.org/TR/webxr/#feature-policy * [ ] https://www.w3.org/TR/webxr/#xr-device-list-of-enabled-features diff --git a/issues/webxr-hit-test-1-brokenlinks.md b/issues/webxr-hit-test-1-brokenlinks.md index 69aeb211..b8104e68 100644 --- a/issues/webxr-hit-test-1-brokenlinks.md +++ b/issues/webxr-hit-test-1-brokenlinks.md @@ -4,7 +4,7 @@ Tracked: 'https://github.com/immersive-web/hit-test/issues/114' Title: Broken references in WebXR Hit Test Module --- -While crawling [WebXR Hit Test Module](https://immersive-web.github.io/hit-test/), the following links to other specifications were detected as pointing to non-existing anchors, which should be fixed: +While crawling [WebXR Hit Test Module](https://immersive-web.github.io/hit-test/), the following links to other specifications were detected as pointing to non-existing anchors: * [ ] https://www.w3.org/TR/webxr/#feature-policy * [ ] https://www.w3.org/TR/webxr/#xrsession-list-of-enabled-features diff --git a/issues/webxr-lighting-estimation-1-brokenlinks.md b/issues/webxr-lighting-estimation-1-brokenlinks.md index bc9f2505..4c7e129c 100644 --- a/issues/webxr-lighting-estimation-1-brokenlinks.md +++ b/issues/webxr-lighting-estimation-1-brokenlinks.md @@ -4,7 +4,7 @@ Tracked: 'https://github.com/immersive-web/lighting-estimation/issues/58' Title: Broken references in WebXR Lighting Estimation API Level 1 --- -While crawling [WebXR Lighting Estimation API Level 1](https://immersive-web.github.io/lighting-estimation/), the following links to other specifications were detected as pointing to non-existing anchors, which should be fixed: +While crawling [WebXR Lighting Estimation API Level 1](https://immersive-web.github.io/lighting-estimation/), the following links to other specifications were detected as pointing to non-existing anchors: * [ ] https://www.w3.org/TR/webxr/#xrsession-list-of-enabled-features * [ ] https://www.w3.org/TR/webxrlayers-1/#session diff --git a/issues/webxrlayers-1-brokenlinks.md b/issues/webxrlayers-1-brokenlinks.md index f5010de1..b08e9e4e 100644 --- a/issues/webxrlayers-1-brokenlinks.md +++ b/issues/webxrlayers-1-brokenlinks.md @@ -5,7 +5,7 @@ Title: Broken references in WebXR Layers API Level 1 --- -While crawling [WebXR Layers API Level 1](https://immersive-web.github.io/layers/), the following links to other specifications were detected as pointing to non-existing anchors, which should be fixed: +While crawling [WebXR Layers API Level 1](https://immersive-web.github.io/layers/), the following links to other specifications were detected as pointing to non-existing anchors: * [ ] https://www.w3.org/TR/webxr/#animationframe This issue was detected and reported semi-automatically by [Strudy](https://github.com/w3c/strudy/) based on data collected in [webref](https://github.com/w3c/webref/). diff --git a/package.json b/package.json index 53a82979..e59bd6d0 100644 --- a/package.json +++ b/package.json @@ -41,8 +41,6 @@ "commander": "12.1.0", "gray-matter": "^4.0.3", "jsdom": "^24.1.1", - "node-fetch": "^2.6.5", - "node-pandoc": "0.3.0", "reffy": "^17.1.1", "semver": "^7.3.5", "webidl2": "^24.2.2" diff --git a/src/cli/study-algorithms.js b/src/cli/study-algorithms.js deleted file mode 100644 index 39844990..00000000 --- a/src/cli/study-algorithms.js +++ /dev/null @@ -1,59 +0,0 @@ -#!/usr/bin/env node - -import { loadCrawlResults } from '../lib/util.js'; -import studyAlgorithms from '../lib/study-algorithms.js'; -import loadJSON from '../lib/load-json.js'; -import { expandCrawlResult } from 'reffy'; -import path from 'node:path'; - -function reportToConsole(results) { - const toreport = []; - for (const anomaly of results) { - const spec = anomaly.specs[0]; - let entry = toreport.find(entry => entry.spec.shortname === spec.shortname); - if (!entry) { - entry = { spec, anomalies: [] }; - toreport.push(entry); - } - entry.anomalies.push(anomaly); - } - toreport.sort((entry1, entry2) => { - return entry1.spec.title.localeCompare(entry2.spec.title); - }); - for (const entry of toreport) { - const spec = entry.spec; - console.log(`- [${spec.title}](${spec.nightly?.url ?? spec.url})`); - for (const anomaly of entry.anomalies) { - console.log(` - ${anomaly.message}`); - } - } -} - -async function main(crawlPath, anomalyType) { - // Target the index file if needed - if (!crawlPath.endsWith('index.json')) { - crawlPath = path.join(crawlPath, 'index.json'); - } - - const crawl = await loadJSON(crawlPath); - if (!crawl) { - throw new Error("Impossible to read " + crawlPath); - } - - const expanded = await expandCrawlResult(crawl, crawlPath.replace(/index\.json$/, ''), ['algorithms']); - const report = studyAlgorithms(expanded.results); - reportToConsole(report); -} - -/************************************************** -Main loop -**************************************************/ -const crawlPath = process.argv[2]; -if (!crawlPath) { - console.error('Web IDL analyzer must be called with a paths to crawl results as first parameter'); - process.exit(2); -} -main(crawlPath).catch(e => { - console.error(e); - process.exit(3); -}); diff --git a/src/cli/study-backrefs.js b/src/cli/study-backrefs.js deleted file mode 100644 index ac985ae1..00000000 --- a/src/cli/study-backrefs.js +++ /dev/null @@ -1,127 +0,0 @@ -#!/usr/bin/env node -/** - * The backrefs analyzer takes links to a ED crawl folder and a TR crawl folder, - * and creates a report that lists, for each spec: - * - * - Links to anchors that do not exist - * - Links to anchors that no longer exist in the ED of the target spec - * - Links to anchors that are not definitions or headings - * - Links to definitions that are not exported - * - Links to dated TR URLs - * - Links to specs that should no longer be referenced - * - Links to documents that look like specs but are unknown in Reffy - * (likely not an anomaly per se) - * - * It also flags links that look like specs but that do not appear in the crawl - * (most of these should be false positives). - * - * The backrefs analyzer can be called directly through: - * - * `node study-backrefs.js [root crawl folder]` - * - * where `root crawl folder` is the path to the root folder that contains `ed` - * and `tr` subfolders. Alternatively, the analyzer may be called with two - * arguments, one being the path to the ED crawl folder, another being the path - * to the TR crawl folder. - * - * @module backrefs - */ - -import { loadCrawlResults } from '../lib/util.js'; -import studyBackrefs from '../lib/study-backrefs.js'; -import path from 'node:path'; - -function reportToConsole(results) { - for (const anomaly of results) { - anomaly.specs = anomaly.specs.map(spec => { - return { shortname: spec.shortname, url: spec.url, title: spec.title }; - }); - } - const perSpec = {}; - for (const anomaly of results) { - for (const spec of anomaly.specs) { - if (!perSpec[spec.url]) { - perSpec[spec.url] = { spec, anomalies: [] }; - } - perSpec[spec.url].anomalies.push(anomaly); - } - } - - const anomalyTypes = [ - { name: 'brokenLinks', title: 'Links to anchors that do not exist' }, - { name: 'evolvingLinks', title: 'Links to anchors that no longer exist in the editor draft of the target spec' }, - { name: 'notDfn', title: 'Links to anchors that are not definitions or headings' }, - { name: 'notExported', title: 'Links to definitions that are not exported' }, - { name: 'datedUrls', title: 'Links to dated TR URLs' }, - { name: 'outdatedSpecs', title: 'Links to specs that should no longer be referenced' }, - { name: 'unknownSpecs', title: 'Links to documents that are not recognized as specs' } - ]; - let report = ''; - Object.keys(perSpec) - .sort((url1, url2) => perSpec[url1].spec.title.localeCompare(perSpec[url2].spec.title)) - .forEach(url => { - const spec = perSpec[url].spec; - const anomalies = perSpec[url].anomalies; - report += `
${spec.title}\n\n`; - for (const type of anomalyTypes) { - const links = anomalies - .filter(anomaly => anomaly.name === type.name) - .map(anomaly => anomaly.message); - if (links.length > 0) { - report += `${type.title}:\n`; - for (const link of links) { - report += `* ${link}\n`; - } - report += '\n\n'; - } - } - report += '
\n'; - }); - console.log(report); -} - - -/************************************************** -Main loop -**************************************************/ -let edCrawlResultsPath = process.argv[2]; -let trCrawlResultsPath = process.argv[3]; - -if (!edCrawlResultsPath) { - console.error('Backrefs analyzer must be called with a paths to crawl results as first parameter'); - process.exit(2); -} - -// If only one argument is provided, consider that it is the path to the -// root folder of a crawl results, with "ed" and "tr" subfolders -if (!trCrawlResultsPath) { - trCrawlResultsPath = path.join(edCrawlResultsPath, 'tr'); - edCrawlResultsPath = path.join(edCrawlResultsPath, 'ed'); -} - -// Target the index file if needed -if (!edCrawlResultsPath.endsWith('index.json')) { - edCrawlResultsPath = path.join(edCrawlResultsPath, 'index.json'); -} -if (!trCrawlResultsPath.endsWith('index.json')) { - trCrawlResultsPath = path.join(trCrawlResultsPath, 'index.json'); -} - -// Analyze the crawl results -loadCrawlResults(edCrawlResultsPath, trCrawlResultsPath) - .then(async crawl => { - // Donwload automatic map of multipages anchors in HTML spec - let htmlFragments = {}; - try { - htmlFragments = await fetch("https://html.spec.whatwg.org/multipage/fragment-links.json").then(r => r.json()); - } catch (err) { - console.warn("Could not fetch HTML fragments data, may report false positive broken links on HTML spec", err); - } - return { crawl, htmlFragments }; - }) - .then(({ crawl, htmlFragments }) => studyBackrefs(crawl.ed, crawl.tr, htmlFragments)) - .then(reportToConsole) - .catch(e => { - console.error(e); - process.exit(3); - }); diff --git a/src/cli/study-webidl.js b/src/cli/study-webidl.js deleted file mode 100644 index 7d396085..00000000 --- a/src/cli/study-webidl.js +++ /dev/null @@ -1,44 +0,0 @@ -#!/usr/bin/env node - -import { loadCrawlResults } from '../lib/util.js'; -import studyWebIdl from '../lib/study-webidl.js'; -import loadJSON from '../lib/load-json.js'; -import { expandCrawlResult } from 'reffy'; -import path from 'node:path'; - - -function reportToConsole(results) { - results.forEach(anomaly => anomaly.specs = anomaly.specs.map(spec => { - return { shortname: spec.shortname, url: spec.url }; - })); - console.log(JSON.stringify(results, null, 2)); -} - -async function main(crawlPath) { - // Target the index file if needed - if (!crawlPath.endsWith('index.json')) { - crawlPath = path.join(crawlPath, 'index.json'); - } - - const crawl = await loadJSON(crawlPath); - if (!crawl) { - throw new Error("Impossible to read " + crawlPath); - } - - const expanded = await expandCrawlResult(crawl, crawlPath.replace(/index\.json$/, ''), 'idl'); - const report = studyWebIdl(expanded.results); - reportToConsole(report); -} - -/************************************************** -Main loop -**************************************************/ -const crawlPath = process.argv[2]; -if (!crawlPath) { - console.error('Web IDL analyzer must be called with a paths to crawl results as first parameter'); - process.exit(2); -} -main(crawlPath).catch(e => { - console.error(e); - process.exit(3); -}); diff --git a/src/lib/generate-report.js b/src/lib/generate-report.js deleted file mode 100644 index 349a5f6c..00000000 --- a/src/lib/generate-report.js +++ /dev/null @@ -1,1020 +0,0 @@ -/** - * The Markdown report generator takes an anomalies report as input and - * generates a human-readable report in Markdown out of it. Depending on - * parameters, the generated report may be a report per spec, a report per - * issue, a dependencies report, or a diff report. - * - * @module markdownGenerator - */ - -import loadJSON from './load-json.js'; - - -/** - * Compares specs for ordering by title - */ -const byTitle = (a, b) => a.title.toUpperCase().localeCompare(b.title.toUpperCase()); - -/** - * Returns true when two arrays are equal - */ -const arrayEquals = (a, b, prop) => - (a.length === b.length) && - a.every(item => !!(prop ? b.find(i => i[prop] === item[prop]) : b.find(i => i === item))); - -/** - * Options for date formatting - */ -const dateOptions = { - day: '2-digit', - month: 'long', - year: 'numeric' -}; - -const toSlug = name => name.replace(/([A-Z])/g, s => s.toLowerCase()) - .replace(/[^a-z0-9]/g, '_') - .replace(/_+/g, '_'); - -/** - * Helper function that outputs main crawl info about a spec - * - * @function - */ -function writeCrawlInfo(spec, withHeader, w) { - let wres = ''; - w = w || (msg => wres += (msg || '') + '\n'); - - if (withHeader) { - w('#### Spec info {.info}'); - } - else { - w('Spec info:'); - } - w(); - - let crawledUrl = spec.crawled || spec.latest; - w('- Initial URL: [' + spec.url + '](' + spec.url + ')'); - w('- Crawled URL: [' + crawledUrl + '](' + crawledUrl + ')'); - if (spec.date) { - w('- Crawled version: ' + spec.date); - } - if (spec.nightly) { - w('- Editor\'s Draft: [' + spec.nightly.url + '](' + spec.nightly.url + ')'); - } - if (spec.release) { - w('- Latest published version: [' + spec.release.url + '](' + spec.release.url + ')'); - } - if (spec.repository) { - let githubcom = spec.repository.match(/^https:\/\/github.com\/([^\/]*)\/([^\/]*)/); - let repositoryName = spec.repository; - if (githubcom) { - repositoryName = 'GitHub ' + githubcom[1] + '/' + githubcom[2]; - } - w('- Repository: [' + repositoryName + '](' + spec.repository + ')'); - } - w('- Shortname: ' + (spec.shortname || 'no shortname')); - return wres; -} - - -function writeDependenciesInfo(spec, results, withHeader, w) { - let wres = ''; - w = w || (msg => wres += (msg || '') + '\n'); - - if (withHeader) { - w('#### Known dependencies on this specification {.dependencies}'); - w(); - } - - if (spec.report.referencedBy.normative.length > 0) { - w('Normative references to this spec from:'); - w(); - spec.report.referencedBy.normative.forEach(s => { - w('- [' + s.title + '](' + s.crawled + ')'); - }); - } - else { - w('No normative reference to this spec from other specs.'); - } - w(); - - // Check the list of specifications that should normatively reference - // this specification because they use IDL content it defines. - let shouldBeReferencedBy = results.filter(s => - s.report.missingWebIdlRef && - s.report.missingWebIdlRef.find(i => - i.refs.find(ref => (ref.url === spec.url)))); - if (shouldBeReferencedBy.length > 0) { - w('Although they do not, the following specs should also normatively' + - ' reference this spec because they use IDL terms it defines:'); - w(); - shouldBeReferencedBy.forEach(s => { - w('- [' + s.title + '](' + s.crawled + ')'); - }); - w(); - } - - if (spec.report.referencedBy.informative.length > 0) { - w('Informative references to this spec from:'); - w(); - spec.report.referencedBy.informative.forEach(s => { - w('- [' + s.title + '](' + s.crawled + ')'); - }); - } - else { - w('No informative reference to this spec from other specs.'); - } - return wres; -} - -/** - * Outputs a human-readable Markdown anomaly report from a crawl report, - * with one entry per spec. - * - * The function spits the report to the console. - * - * @function - */ -function generateReportPerSpec(study) { - var count = 0; - let wres = ''; - const w = msg => wres += (msg || '') + '\n'; - const results = study.results; - - w('% ' + (study.title || 'Web specs analysis')); - w('% Strudy'); - w('% ' + (new Date(study.date)).toLocaleDateString('en-US', dateOptions)); - w(); - - const specReport = spec => { - // Prepare anomaly flags - let flags = ['spec']; - if (spec.report.error) { - flags.push('error'); - } - else { - if (!spec.report.ok) { - flags.push('anomaly'); - } - flags = flags.concat(Object.keys(spec.report) - .filter(anomaly => (anomaly !== 'referencedBy')) - .filter(anomaly => (Array.isArray(spec.report[anomaly]) ? - (spec.report[anomaly].length > 0) : - !!spec.report[anomaly]))); - } - let attr = flags.reduce((res, anomaly) => - res + (res ? ' ' : '') + 'data-' + anomaly + '=true', ''); - - w('### ' + spec.title + ' {' + attr + '}'); - w(); - writeCrawlInfo(spec, true, w); - w(); - - const report = spec.report; - w('#### Potential issue(s) {.anomalies}'); - w(); - if (report.ok) { - w('This specification looks good!'); - } - else if (report.error) { - w('The following network or parsing error occurred:'); - w('`' + report.error + '`'); - w(); - w('Reffy could not render this specification as a DOM tree and' + - ' cannot say anything about it as a result. In particular,' + - ' it cannot include content defined in this specification' + - ' in the analysis of other specifications crawled in this' + - ' report.'); - } - else { - if (report.noNormativeRefs) { - w('- No normative references found'); - } - if (report.hasInvalidIdl) { - w('- Invalid WebIDL content found'); - } - if (report.hasObsoleteIdl) { - w('- Obsolete WebIDL constructs found'); - } - if (report.noRefToWebIDL) { - w('- Spec uses WebIDL but does not reference it normatively'); - } - if (report.unknownExposedNames && - (report.unknownExposedNames.length > 0)) { - w('- Unknown [Exposed] names used: ' + - report.unknownExposedNames.map(name => '`' + name + '`').join(', ')); - } - if (report.unknownIdlNames && - (report.unknownIdlNames.length > 0)) { - w('- Unknown WebIDL names used: ' + - report.unknownIdlNames.map(name => '`' + name + '`').join(', ')); - } - if (report.redefinedIdlNames && - (report.redefinedIdlNames.length > 0)) { - w('- WebIDL names also defined elsewhere: '); - report.redefinedIdlNames.map(i => { - w(' * `' + i.name + '` also defined in ' + - i.refs.map(ref => ('[' + ref.title + '](' + ref.crawled + ')')).join(' and ')); - }); - } - if (report.missingWebIdlRef && - (report.missingWebIdlRef.length > 0)) { - w('- Missing references for WebIDL names: '); - report.missingWebIdlRef.map(i => { - w(' * `' + i.name + '` defined in ' + - i.refs.map(ref => ('[' + ref.title + '](' + ref.crawled + ')')).join(' or ')); - }); - } - [ - {prop: 'css', warning: false, title: 'No definition for CSS constructs'}, - {prop: 'idl', warning: false, title: 'No definition for IDL constructs'}, - {prop: 'css', warning: true, title: 'Possibly no definition for CSS constructs'}, - {prop: 'idl', warning: true, title: 'Possibly no definition for IDL constructs'} - ].forEach(type => { - if (report.missingDfns && report.missingDfns[type.prop] && - (report.missingDfns[type.prop].filter(r => !!r.warning === type.warning).length > 0)) { - w('- ' + type.title + ': '); - report.missingDfns[type.prop].filter(r => !!r.warning === type.warning).map(missing => { - const exp = missing.expected; - const found = missing.found; - const foundFor = (found && found.for && found.for.length > 0) ? - ' for ' + found.for.map(f => '`' + f + '`').join(',') : - ''; - w(' * `' + exp.linkingText[0] + '`' + - (exp.type ? ' with type `' + exp.type + '`' : '') + - (missing.for ? ' for [`' + missing.for.linkingText[0] + '`](' + missing.for.href + ')' : '') + - (found ? ', but found [`' + found.linkingText[0] + '`](' + found.href + ') with type `' + found.type + '`' + foundFor : '')); - }); - } - }); - if (report.missingLinkRef && - (report.missingLinkRef.length > 0)) { - w('- Missing references for links: '); - report.missingLinkRef.map(l => { - w(' * [`' + l + '`](' + l + ')'); - }); - } - if (report.inconsistentRef && - (report.inconsistentRef.length > 0)) { - w('- Inconsistent references for links: '); - report.inconsistentRef.map(l => { - w(' * [`' + l.link + '`](' + l.link + '), related reference "' + l.ref.name + '" uses URL [`' + l.ref.url + '`](' + l.ref.url + ')'); - }); - } - if (report.xrefs) { - [ - { prop: 'notExported', title: 'External links to private terms' }, - { prop: 'notDfn', title: 'External links that neither target definitions nor headings' }, - { prop: 'brokenLinks', title: 'Broken external links' }, - { prop: 'evolvingLinks', title: 'External links to terms that no longer exist in the latest version of the targeted specification' }, - { prop: 'outdatedSpecs', title: 'External links to outdated specs' }, - { prop: 'datedUrls', title: 'External links that use a dated URL' } - ].forEach(type => { - if (report.xrefs[type.prop] && (report.xrefs[type.prop].length > 0)) { - w('- ' + type.title + ':'); - report.xrefs[type.prop].map(l => { - w(' * [`' + l + '`](' + l + ')'); - }) - } - }); - } - } - w(); - writeDependenciesInfo(spec, results, true, w); - w(); - w(); - }; - - - const orgs = [...new Set(study.results.map(r => r.organization))].sort(); - for (let org of orgs) { - w(`# ${org} {#org-${toSlug(org)}}`); - w(); - const groups = [...new Set(study.results.filter(r => r.organization === org).map(r => r.groups.map(g => g.name)).flat())].sort(); - for (let group of groups) { - w(`## ${group} {#group-${toSlug(group)}}`); - w(); - study.results - .filter(r => r.organization === org && r.groups.find(g => g.name === group)) - .forEach(specReport); - } - } - - w(); - w(); - - return wres; -} - - -/** - * Outputs a human-readable Markdown anomaly report from a crawl report, - * sorted by type of anomaly. - * - * The function spits the report to the console. - * - * @function - */ -function generateReportPerIssue(study) { - let wres = ''; - const w = msg => wres += (msg || '') + '\n'; - - let count = 0; - let results = study.results; - - w('% ' + (study.title || 'Web specs analysis')); - w('% Strudy'); - w('% ' + (new Date(study.date)).toLocaleDateString('en-US', dateOptions)); - w(); - - count = results.length; - w('' + count + ' specification' + ((count > 1) ? 's' : '') + ' were crawled in this report.'); - w(); - w(); - - let parsingErrors = results.filter(spec => spec.report.error); - if (parsingErrors.length > 0) { - w('## Specifications that could not be rendered'); - w(); - w('Reffy could not fetch or render these specifications for some reason.' + - ' This may happen when a network error occurred or when a specification' + - ' uses an old version of ReSpec.'); - w(); - count = 0; - parsingErrors.forEach(spec => { - count += 1; - w('- [' + spec.title + '](' + spec.crawled + '): `' + spec.report.error + '`'); - }); - w(); - w('=> ' + count + ' specification' + ((count > 1) ? 's' : '') + ' found'); - w(); - w(); - - // Remove specs that could not be parsed from the rest of the report - results = results.filter(spec => !spec.report.error); - } - - - count = 0; - w('## Specifications without normative dependencies'); - w(); - results - .filter(spec => spec.report.noNormativeRefs) - .forEach(spec => { - count += 1; - w('- [' + spec.title + '](' + spec.crawled + ')'); - }); - w(); - w('=> ' + count + ' specification' + ((count > 1) ? 's' : '') + ' found'); - if (count > 0) { - w(); - w('Basically all specifications have normative dependencies on some other' + - ' specification. Reffy could not find any normative dependencies for the' + - ' specifications mentioned above, which seems strange.'); - } - w(); - w(); - - count = 0; - w('## List of specifications with invalid WebIDL content'); - w(); - results - .filter(spec => spec.report.hasInvalidIdl) - .forEach(spec => { - count += 1; - w('- [' + spec.title + '](' + spec.crawled + ')'); - }); - w(); - w('=> ' + count + ' specification' + ((count > 1) ? 's' : '') + ' found'); - if (count > 0) { - w(); - w('WebIDL continues to evolve. Strudy may incorrectly report as invalid' + - ' perfectly valid WebIDL content if the specification uses bleeding-edge' + - ' WebIDL features'); - } - w(); - w(); - - count = 0; - w('## List of specifications with obsolete WebIDL constructs'); - w(); - results - .filter(spec => spec.report.hasObsoleteIdl) - .forEach(spec => { - count += 1; - w('- [' + spec.title + '](' + spec.crawled + ')'); - }); - w(); - w('=> ' + count + ' specification' + ((count > 1) ? 's' : '') + ' found'); - if (count > 0) { - w(); - w('A typical example is the use of `[]` instead of `FrozenArray`.'); - } - w(); - w(); - - count = 0; - w('## Specifications that use WebIDL but do not reference the WebIDL spec'); - w(); - results.forEach(spec => { - if (spec.report.noRefToWebIDL) { - count += 1; - w('- [' + spec.title + '](' + spec.crawled + ')'); - } - }); - w(); - w('=> ' + count + ' specification' + ((count > 1) ? 's' : '') + ' found'); - if (count > 0) { - w(); - ('All specifications that define WebIDL content should have a ' + - ' **normative** reference to the WebIDL specification. ' + - ' Some specifications listed here may reference the WebIDL' + - ' specification informatively, but that is not enough!'); - } - w(); - w(); - - - count = 0; - w('## List of [Exposed] names not defined in the specifications crawled'); - w(); - var idlNames = {}; - results.forEach(spec => { - if (!spec.report.unknownExposedNames || - (spec.report.unknownExposedNames.length === 0)) { - return; - } - spec.report.unknownExposedNames.forEach(name => { - if (!idlNames[name]) { - idlNames[name] = []; - } - idlNames[name].push(spec); - }); - }); - Object.keys(idlNames).sort().forEach(name => { - count += 1; - w('- `' + name + '` used in ' + - idlNames[name].map(ref => ('[' + ref.title + '](' + ref.crawled + ')')).join(', ')); - }); - w(); - w('=> ' + count + ' [Exposed] name' + ((count > 1) ? 's' : '') + ' found'); - if (count > 0) { - w(); - w('Please keep in mind that Strudy only knows about IDL terms defined in the' + - ' specifications that were crawled **and** that do not have invalid IDL content.'); - } - w(); - w(); - - - count = 0; - w('## List of WebIDL names not defined in the specifications crawled'); - w(); - idlNames = {}; - results.forEach(spec => { - if (!spec.report.unknownIdlNames || - (spec.report.unknownIdlNames.length === 0)) { - return; - } - spec.report.unknownIdlNames.forEach(name => { - if (!idlNames[name]) { - idlNames[name] = []; - } - idlNames[name].push(spec); - }); - }); - Object.keys(idlNames).sort().forEach(name => { - count += 1; - w('- `' + name + '` used in ' + - idlNames[name].map(ref => ('[' + ref.title + '](' + ref.crawled + ')')).join(', ')); - }); - w(); - w('=> ' + count + ' WebIDL name' + ((count > 1) ? 's' : '') + ' found'); - if (count > 0) { - w(); - w('Some of them may be type errors in specs (e.g. "int" does not exist, "Array" cannot be used on its own, etc.)'); - w('Also, please keep in mind that Strudy only knows about IDL terms defined in the' + - ' specifications that were crawled **and** that do not have invalid IDL content.'); - } - w(); - w(); - - count = 0; - w('## List of WebIDL names defined in more than one spec'); - w(); - idlNames = {}; - results.forEach(spec => { - if (!spec.report.redefinedIdlNames || - (spec.report.redefinedIdlNames.length === 0)) { - return; - } - spec.report.redefinedIdlNames.forEach(i => { - if (!idlNames[i.name]) { - idlNames[i.name] = []; - } - idlNames[i.name].push(spec); - }); - }); - Object.keys(idlNames).sort().forEach(name => { - count += 1; - w('- `' + name + '` defined in ' + - idlNames[name].map(ref => ('[' + ref.title + '](' + ref.crawled + ')')).join(' and ')); - }); - w(); - w('=> ' + count + ' WebIDL name' + ((count > 1) ? 's' : '') + ' found'); - if (count > 0) { - w(); - w('"There can be only one"...'); - } - w(); - w(); - - count = 0; - var countrefs = 0; - w('## Missing references for WebIDL names'); - w(); - results.forEach(spec => { - if (spec.report.missingWebIdlRef && - (spec.report.missingWebIdlRef.length > 0)) { - count += 1; - if (spec.report.missingWebIdlRef.length === 1) { - countrefs += 1; - let i = spec.report.missingWebIdlRef[0]; - w('- [' + spec.title + '](' + spec.crawled + ')' + - ' uses `' + i.name + '` but does not reference ' + - i.refs.map(ref => ('[' + ref.title + '](' + ref.crawled + ')')).join(' or ')); - } - else { - w('- [' + spec.title + '](' + spec.crawled + ') uses:'); - spec.report.missingWebIdlRef.map(i => { - countrefs += 1; - w(' * `' + i.name + '` but does not reference ' + - i.refs.map(ref => ('[' + ref.title + '](' + ref.crawled + ')')).join(' or ')); - }); - } - } - }); - w(); - w('=> ' + countrefs + ' missing reference' + ((countrefs > 1) ? 's' : '') + - ' for IDL definitions found in ' + count + ' specification' + - ((count > 1) ? 's' : '')); - w(); - w(); - - [ - {prop: 'css', warning: false, title: 'No definition for CSS constructs'}, - {prop: 'idl', warning: false, title: 'No definition for IDL constructs'}, - {prop: 'css', warning: true, title: 'Possibly no definition for CSS constructs'}, - {prop: 'idl', warning: true, title: 'Possibly no definition for IDL constructs'} - ].forEach(type => { - count = 0; - countrefs = 0; - w('## ' + type.title); - w(); - - results.forEach(spec => { - if (spec.report.missingDfns && - spec.report.missingDfns[type.prop] && - (spec.report.missingDfns[type.prop].filter(r => !!r.warning === type.warning).length > 0)) { - count += 1; - - w('- [' + spec.title + '](' + spec.crawled + '):'); - spec.report.missingDfns[type.prop].filter(r => !!r.warning === type.warning).map(missing => { - countrefs += 1; - const exp = missing.expected; - const found = missing.found; - const foundFor = (found && found.for && found.for.length > 0) ? - ' for ' + found.for.map(f => '`' + f + '`').join(',') : - ''; - w(' * `' + exp.linkingText[0] + '`' + - (exp.type ? ' with type `' + exp.type + '`' : '') + - (missing.for ? ' for [`' + missing.for.linkingText[0] + '`](' + missing.for.href + ')' : '') + - (found ? ', but found [`' + found.linkingText[0] + '`](' + found.href + ') with type `' + found.type + '`' + foundFor : '')); - }); - } - }); - - w(); - w('=> ' + countrefs + ' construct' + ((countrefs > 1) ? 's' : '') + - ' without definition found in ' + count + ' specification' + - ((count > 1) ? 's' : '')); - w(); - w(); - }); - - - count = 0; - countrefs = 0; - w('## Missing references based on document links'); - w(); - results.forEach(spec => { - if (spec.report.missingLinkRef && - (spec.report.missingLinkRef.length > 0)) { - count += 1; - if (spec.report.missingLinkRef.length === 1) { - countrefs += 1; - let l = spec.report.missingLinkRef[0]; - w('- [' + spec.title + '](' + spec.crawled + ')' + - ' links to [`' + l + '`](' + l + ') but does not list it' + - ' in its references'); - } - else { - w('- [' + spec.title + '](' + spec.crawled + ') links to:'); - spec.report.missingLinkRef.forEach(l => { - countrefs++; - w(' * [`' + l + '`](' + l + ') but does not list it ' + - 'in its references'); - }); - } - } - }); - w(); - w('=> ' + countrefs + ' missing reference' + ((countrefs > 1) ? 's' : '') + - ' for links found in ' + count + ' specification' + - ((count > 1) ? 's' : '')); - if (count > 0) { - w(); - w('Any link to an external document from within a specification should' + - ' trigger the creation of a corresponding entry in the references' + - ' section.'); - w(); - w('Note Strudy only reports on links to "well-known" specs and ignores' + - ' links to non-usual specs (e.g. PDF documents, etc.) for now.'); - } - w(); - w(); - - count = 0; - countrefs = 0; - w('## Reference URL is inconsistent with URL used in document links'); - w(); - results.forEach(spec => { - if (spec.report.inconsistentRef && - (spec.report.inconsistentRef.length > 0)) { - count += 1; - if (spec.report.inconsistentRef.length === 1) { - countrefs += 1; - let l = spec.report.inconsistentRef[0]; - w('- [' + spec.title + '](' + spec.crawled + ')' + - ' links to [`' + l.link + '`](' + l.link + ') but related reference "' + l.ref.name + '" uses URL [`' + l.ref.url + '`](' + l.ref.url + ')'); - } - else { - w('- [' + spec.title + '](' + spec.crawled + ') links to:'); - spec.report.inconsistentRef.forEach(l => { - countrefs++; - w(' * [`' + l.link + '`](' + l.link + ') but related reference "' + l.ref.name + '" uses URL [`' + l.ref.url + '`](' + l.ref.url + ')'); - }); - } - } - }); - w(); - w('=> ' + countrefs + ' inconsistent reference' + ((countrefs > 1) ? 's' : '') + - ' for links found in ' + count + ' specification' + - ((count > 1) ? 's' : '')); - if (count > 0) { - w(); - w('Links in the body of a specification should be to the same document' + - ' as that pointed to by the related reference in the References section.' + - ' The specifications reported here use a different URL. For instance,' + - ' they may use a link to the Editor\'s Draft but target the latest' + - ' published version in the References section.' + - ' There should be some consistency across the specification.'); - } - w(); - w(); - - [ - { prop: 'notExported', title: 'External links to private terms' }, - { prop: 'notDfn', title: 'External links that neither target definitions nor headings' }, - { prop: 'brokenLinks', title: 'Broken external links' }, - { prop: 'evolvingLinks', title: 'External links to terms that no longer exist in the latest version of the targeted specification' }, - { prop: 'outdatedSpecs', title: 'External links to outdated specs' }, - { prop: 'datedUrls', title: 'External links that use a dated URL' } - ].forEach(type => { - count = 0; - countrefs = 0; - w('## ' + type.title); - w(); - - results.forEach(spec => { - if (spec.report.xrefs && - spec.report.xrefs[type.prop] && - (spec.report.xrefs[type.prop].length > 0)) { - count += 1; - - w('- [' + spec.title + '](' + spec.crawled + '):'); - spec.report.xrefs[type.prop].map(l => { - countrefs += 1; - w(' * [`' + l + '`](' + l + ')'); - }); - } - }); - - w(); - w('=> ' + countrefs + ' problematic external link' + ((countrefs > 1) ? 's' : '') + - ' found in ' + count + ' specification' + - ((count > 1) ? 's' : '')); - w(); - w(); - }); - - - return wres; -} - - -/** - * Outputs a human-readable Markdown dependencies report from a crawl report, - * one entry per spec. - * - * The function spits the report to the console. - * - * @function - */ -function generateDependenciesReport(study) { - let wres = ''; - const w = msg => wres += (msg || '') + '\n'; - - let count = 0; - const results = study.results; - - w('# Web specs dependencies report'); - w(); - w('Strudy is an analysis tool for Web spec crawl reports created by Reffy.' + - ' It studies extracts created during the crawl.'); - w(); - w('The report below lists incoming links for each specification, in other words the list' + - ' of specifications that normatively or informatively reference a given specification.'); - w(); - w('By definition, Strudy only knows about incoming links from specifications that have been' + - ' crawled and that could successfully be parsed. Other specifications that Strudy does' + - ' not know anything about may reference specifications listed here.'); - w(); - results.forEach(spec => { - w('## ' + spec.title); - w(); - writeCrawlInfo(spec, false, w); - w(); - writeDependenciesInfo(spec, results, false, w); - w(); - w(); - }); - - return wres; -} - - -/** - * Outputs a human-readable diff between two crawl reports, one entry per spec. - * - * The function spits the report to the console. - * - * @function - */ -function generateDiffReport(study, refStudy, options) { - options = options || {}; - let wres = ''; - const w = msg => wres += (msg || '') + '\n'; - - const results = study.results; - const resultsRef = refStudy.results; - - // Compute diff for all specs - // (note we're only interested in specs that are part in the new crawl, - // and won't report on specs that were there before and got dropped) - let resultsDiff = results.map(spec => { - let ref = resultsRef.find(s => s.url === spec.url) || { - missing: true, - report: { - unknownExposedNames: [], - unknownIdlNames: [], - redefinedIdlNames: [], - missingWebIdlRef: [], - missingLinkRef: [], - inconsistentRef: [] - } - }; - - const report = spec.report; - const reportRef = ref.report; - - const getSimpleDiff = prop => (report[prop] !== reportRef[prop]) ? - { - ins: (typeof report[prop] !== 'undefined') ? report[prop] : null, - del: (typeof reportRef[prop] !== 'undefined') ? reportRef[prop] : null - } : - null; - const getArrayDiff = (prop, key) => - (!arrayEquals(report[prop], reportRef[prop], key) && - (!options.onlyNew || report[prop].find(item => !reportRef[prop].find(i => (key ? i[key] === item[key] : i === item))))) ? - { - ins: report[prop].filter(item => !reportRef[prop].find(i => (key ? i[key] === item[key] : i === item))), - del: reportRef[prop].filter(item => !report[prop].find(i => (key ? i[key] === item[key] : i === item))) - } : - null; - - // Compute diff between new and ref report for that spec - const diff = { - title: (spec.title !== ref.title) ? { - ins: (typeof spec.title !== 'undefined') ? spec.title : null, - del: (typeof ref.title !== 'undefined') ? ref.title : null - } : null, - ok: getSimpleDiff('ok'), - error: getSimpleDiff('error'), - noNormativeRefs: getSimpleDiff('noNormativeRefs'), - noRefToWebIDL: getSimpleDiff('noRefToWebIDL'), - hasInvalidIdl: getSimpleDiff('hasInvalidIdl'), - hasObsoleteIdl: getSimpleDiff('hasObsoleteIdl'), - unknownExposedNames: getArrayDiff('unknownExposedNames'), - unknownIdlNames: getArrayDiff('unknownIdlNames'), - redefinedIdlNames: getArrayDiff('redefinedIdlNames', 'name'), - missingWebIdlRef: getArrayDiff('missingWebIdlRef', 'name'), - missingLinkRef: getArrayDiff('missingLinkRef'), - inconsistentRef: getArrayDiff('inconsistentRef', 'link') - }; - - return { - title: spec.title, - shortname: spec.shortname, - date: spec.date, - url: spec.url, - release: spec.release, - nightly: spec.nightly, - repository: spec.repository, - isNewSpec: ref.missing, - hasDiff: Object.keys(diff).some(key => diff[key] !== null), - diff - }; - }); - - if (!options.onlyNew) { - resultsDiff = resultsDiff.concat(resultsRef - .map(spec => { - let ref = results.find(s => s.url === spec.url); - if (ref) return null; - return { - title: spec.title, - shortname: spec.shortname, - date: spec.date, - url: spec.url, - release: spec.release, - nightly: spec.nightly, - crawled: spec.crawled, - repository: spec.repository, - isUnknownSpec: true, - hasDiff: true - }; - }) - .filter(spec => !!spec)); - resultsDiff.sort(byTitle); - } - - w('% Diff between report from "' + - (new Date(study.date)).toLocaleDateString('en-US', dateOptions) + - '" and reference report from "' + - (new Date(refStudy.date)).toLocaleDateString('en-US', dateOptions) + - '"'); - w('% Strudy'); - w('% ' + (new Date(study.date)).toLocaleDateString('en-US', dateOptions)); - w(); - - resultsDiff.forEach(spec => { - // Nothing to report if crawl result is the same - if (!spec.hasDiff) { - return; - } - - w('## ' + spec.title); - w(); - - let crawledUrl = spec.crawled || spec.latest; - w('- Initial URL: [' + spec.url + '](' + spec.url + ')'); - w('- Crawled URL: [' + crawledUrl + '](' + crawledUrl + ')'); - if (spec.nightly && (spec.nightly.url !== crawledUrl)) { - w('- Editor\'s Draft: [' + spec.nightly.url + '](' + spec.nightly.url + ')'); - } - if (spec.repository) { - let githubcom = spec.repository.match(/^https:\/\/github.com\/([^\/]*)\/([^\/]*)/); - let repositoryName = spec.repository; - if (githubcom) { - repositoryName = 'GitHub ' + githubcom[1] + '/' + githubcom[2]; - } - w('- Repository: [' + repositoryName + '](' + spec.repository + ')'); - } - - if (spec.isNewSpec) { - w('- This specification was not in the reference crawl report.'); - w(); - w(); - return; - } - - if (spec.isUnknownSpec) { - w('- This specification is not in the new crawl report.'); - w(); - w(); - return; - } - - const diff = spec.diff; - const simpleDiff = prop => - ((diff[prop].ins !== null) ? '*INS* ' + diff[prop].ins : '') + - (((diff[prop].ins !== null) && (diff[prop].del !== null)) ? ' / ' : '') + - ((diff[prop].del !== null) ? '*DEL* ' + diff[prop].del : ''); - const arrayDiff = (prop, key) => - ((diff[prop].ins.length > 0) ? '*INS* ' + diff[prop].ins.map(i => (key ? i[key] : i)).join(', ') : '') + - (((diff[prop].ins.length > 0) && (diff[prop].del.length > 0)) ? ' / ' : '') + - ((diff[prop].del.length > 0) ? '*DEL* ' + diff[prop].del.map(i => (key ? i[key] : i)).join(', ') : ''); - - [ - { title: 'Spec title', prop: 'title', diff: 'simple' }, - { title: 'Spec is OK', prop: 'ok', diff: 'simple' }, - { title: 'Spec could not be rendered', prop: 'error', diff: 'simple' }, - { title: 'No normative references found', prop: 'noNormativeRefs', diff: 'simple' }, - { title: 'Invalid WebIDL content found', prop: 'hasInvalidIdl', diff: 'simple' }, - { title: 'Obsolete WebIDL constructs found', prop: 'hasObsoleteIdl', diff: 'simple' }, - { title: 'Spec does not reference WebIDL normatively', prop: 'noRefToWebIDL', diff: 'simple' }, - { title: 'Unknown [Exposed] names used', prop: 'unknownExposedNames', diff: 'array' }, - { title: 'Unknown WebIDL names used', prop: 'unknownIdlNames', diff: 'array' }, - { title: 'WebIDL names also defined elsewhere', prop: 'redefinedIdlNames', diff: 'array', key: 'name' }, - { title: 'Missing references for WebIDL names', prop: 'missingWebIdlRef', diff: 'array', key: 'name' }, - { title: 'Missing references for links', prop: 'missingLinkRef', diff: 'array' }, - { title: 'Inconsistent references for links', prop: 'inconsistentRef', diff: 'array', key: 'link' } - ].forEach(item => { - // Only report actual changes, and don't report other changes when - // the spec could not be rendered in one of the crawl reports - if (diff[item.prop] && ((item.prop === 'error') || (item.prop === 'title') || (item.prop === 'latest') || !diff.error)) { - w('- ' + item.title + ': ' + ((item.diff === 'simple') ? - simpleDiff(item.prop) : - arrayDiff(item.prop, item.key))); - } - }); - w(); - w(); - }); - - return wres; -} - - -/** - * Main function that generates a Markdown report from a study file. - * - * @function - * @param {String} studyFile Path to the study file to parse, or study report - * @param {Object} options Type of report to generate and other options - * @return {String} The generated report - */ -async function generateReport(studyFile, options) { - options = options || {}; - if (!studyFile) { - throw new Error('Required filename parameter missing'); - } - if (options.diffReport && !options.refStudyFile) { - throw new Error('Required filename to reference crawl for diff missing'); - } - - const study = typeof studyFile === 'string' ? - (await loadJSON(studyFile)) : - studyFile; - if (!study) { - throw new Error('Impossible to read ' + studyFile); - } - - let refStudy = {}; - if (options.diffReport) { - if (options.refStudyFile.startsWith('http')) { - try { - let response = await fetch(options.refStudyFile, { nolog: true }); - refStudy = await response.json(); - } - catch (e) { - throw new Error('Impossible to fetch ' + options.refStudyFile + ': ' + e); - } - return generateDiffReport(study, refStudy, { onlyNew: options.onlyNew }); - } - else { - refStudy = await loadJSON(options.refStudyFile); - if (!refStudy) { - throw new Error('Impossible to read ' + options.refStudyFile); - } - return generateDiffReport(study, refStudy, { onlyNew: options.onlyNew }); - } - } - else if (options.depReport) { - return generateDependenciesReport(study); - } - else if (options.perSpec) { - return generateReportPerSpec(study); - } - else { - return generateReportPerIssue(study); - } - return report; -} - - -/************************************************** -Export methods for use as module -**************************************************/ -export default generateReport; diff --git a/src/lib/study-algorithms.js b/src/lib/study-algorithms.js index 76f76fd9..2466a282 100644 --- a/src/lib/study-algorithms.js +++ b/src/lib/study-algorithms.js @@ -1,11 +1,4 @@ import { JSDOM } from 'jsdom'; -import { recordCategorizedAnomaly } from './util.js'; - -const possibleAnomalies = [ - 'missingTaskForPromise', - 'missingTaskForEvent' -]; - /** * Normalize whitespaces in string to make analysis easier @@ -57,9 +50,8 @@ function nestParallelSteps(algo) { /** * Main function, study all algorithms */ -function studyAlgorithms(edResults) { +function studyAlgorithms(specs) { const report = []; - const recordAnomaly = recordCategorizedAnomaly(report, 'algorithms', possibleAnomalies); // Return human-friendly markdown that identifies the given algorithm function getAlgoName(algo) { @@ -95,11 +87,19 @@ function studyAlgorithms(edResults) { // https://w3c.github.io/clipboard-apis/#dom-clipboard-read !html.includes('systemClipboardRepresentation') ) { - recordAnomaly(spec, 'missingTaskForPromise', `${getAlgoName(algo)} has a parallel step that resolves/rejects a promise directly`); + report.push({ + name: 'missingTaskForPromise', + message: `${getAlgoName(algo)} has a parallel step that resolves/rejects a promise directly`, + spec + }); return true; } else if (html.match(/fire an?( \w+)? event/i)) { - recordAnomaly(spec, 'missingTaskForEvent', `${getAlgoName(algo)} has a parallel step that fires an event directly`); + report.push({ + name: 'missingTaskForEvent', + message: `${getAlgoName(algo)} has a parallel step that fires an event directly`, + spec + }); return true; } } @@ -133,13 +133,10 @@ function studyAlgorithms(edResults) { return anomalyFound; } - // We're only interested in specs that define algorithms - const specs = edResults.filter(spec => !!spec.algorithms); - // Study algorithms in turn. // Note: the root level of each algorithm is its first step. It may say // something like "run these steps in parallel" in particular. - for (const spec of specs) { + for (const spec of specs.filter(spec => !!spec.algorithms)) { for (const algo of spec.algorithms) { nestParallelSteps(algo); studyAlgorithmStep(spec, algo, algo); diff --git a/src/lib/study-backrefs.js b/src/lib/study-backrefs.js index 2be544fa..0d8612d8 100644 --- a/src/lib/study-backrefs.js +++ b/src/lib/study-backrefs.js @@ -1,18 +1,3 @@ -import { loadCrawlResults, recordCategorizedAnomaly } from './util.js'; -import { fileURLToPath } from 'node:url'; - -const possibleAnomalies = [ - 'brokenLinks', - 'datedUrls', - 'evolvingLinks', - 'frailLinks', - 'nonCanonicalRefs', - 'notDfn', - 'notExported', - 'outdatedSpecs', - 'unknownSpecs' -]; - /** * The backrefs analyzer only checks links to other specs. This function returns * true when a link does target a spec, and false if it targets something else @@ -39,57 +24,91 @@ const matchSpecUrl = url => TODO: DRY Copied from browser-specs/src/compute-shortname.js */ -function computeShortname (url) { - function parseUrl (url) { +function computeShortname(url) { + function parseUrl(url) { // Handle /TR/ URLs - const w3cTr = url.match(/^https?:\/\/(?:www\.)?w3\.org\/TR\/([^/]+)\/$/); + const w3cTr = url.match(/^https?:\/\/(?:www\.)?w3\.org\/TR\/([^\/]+)\/$/); if (w3cTr) { return w3cTr[1]; } // Handle WHATWG specs - const whatwg = url.match(/\/\/(.+)\.spec\.whatwg\.org\/?/); + const whatwg = url.match(/\/\/(.+)\.spec\.whatwg\.org\//); if (whatwg) { - return whatwg[1]; + return whatwg[1]; } // Handle TC39 Proposals - const tc39 = url.match(/\/\/tc39\.es\/proposal-([^/]+)\/$/); + const tc39 = url.match(/\/\/tc39\.es\/proposal-([^\/]+)\/$/); if (tc39) { - return 'tc39-' + tc39[1]; + return "tc39-" + tc39[1]; } + // Handle Khronos extensions - const khronos = url.match(/https:\/\/registry\.khronos\.org\/webgl\/extensions\/([^/]+)\/$/); + const khronos = url.match(/https:\/\/registry\.khronos\.org\/webgl\/extensions\/([^\/]+)\/$/); if (khronos) { - return khronos[1]; + return khronos[1]; } // Handle extension specs defined in the same repo as the main spec // (e.g. generate a "gamepad-extensions" name for // https://w3c.github.io/gamepad/extensions.html") - const ext = url.match(/\/.*\.github\.io\/([^/]+)\/(extensions?)\.html$/); + const ext = url.match(/\/.*\.github\.io\/([^\/]+)\/(extensions?)\.html$/); if (ext) { return ext[1] + '-' + ext[2]; } // Handle draft specs on GitHub, excluding the "webappsec-" prefix for // specifications developed by the Web Application Security Working Group - const github = url.match(/\/.*\.github\.io\/(?:webappsec-)?([^/]+)\//); + const github = url.match(/\/.*\.github\.io\/(?:webappsec-)?([^\/]+)\//); if (github) { - return github[1]; + return github[1]; } // Handle CSS WG specs - const css = url.match(/\/drafts\.(?:csswg|fxtf|css-houdini)\.org\/([^/]+)\//); + const css = url.match(/\/drafts\.(?:csswg|fxtf|css-houdini)\.org\/([^\/]+)\//); if (css) { return css[1]; } // Handle SVG drafts - const svg = url.match(/\/svgwg\.org\/specs\/(?:svg-)?([^/]+)\//); + const svg = url.match(/\/svgwg\.org\/specs\/(?:svg-)?([^\/]+)\//); if (svg) { - return 'svg-' + svg[1]; + return "svg-" + svg[1]; + } + + // Handle IETF RFCs + const rfcs = url.match(/\/www.rfc-editor\.org\/rfc\/(rfc[0-9]+)/); + if (rfcs) { + return rfcs[1]; + } + + // Handle IETF group drafts + const ietfDraft = url.match(/\/datatracker\.ietf\.org\/doc\/html\/draft-ietf-[^\-]+-([^\/]+)/); + if (ietfDraft) { + return ietfDraft[1]; + } + + // Handle IETF individual drafts, stripping group name + // TODO: retrieve the list of IETF groups to make sure that the group name + // is an actual group name and not the beginning of the shortname: + // https://datatracker.ietf.org/api/v1/group/group/ + // (multiple requests needed due to pagination, "?limit=1000" is the max) + const ietfIndDraft = url.match(/\/datatracker\.ietf\.org\/doc\/html\/draft-[^\-]+-([^\/]+)/); + if (ietfIndDraft) { + if (ietfIndDraft[1].indexOf('-') !== -1) { + return ietfIndDraft[1].slice(ietfIndDraft[1].indexOf('-') + 1); + } + else { + return ietfIndDraft[1]; + } + } + + // Handle TAG findings + const tag = url.match(/^https?:\/\/(?:www\.)?w3\.org\/2001\/tag\/doc\/([^\/]+)\/?$/); + if (tag) { + return tag[1]; } // Return name when one was given @@ -107,7 +126,7 @@ function computeShortname (url) { // Latin characters (a-z letters, digits, underscore and "-"), and that it // only contains a dot for fractional levels at the end of the name // (e.g. "blah-1.2" is good but "blah.blah" and "blah-3.1-blah" are not) - if (!name.match(/^[\w-]+((?<=-\d+)\.\d+)?$/)) { + if (!name.match(/^[\w\-]+((?<=\-v?\d+)\.\d+)?$/)) { throw new Error(`Specification name contains unexpected characters: ${name} (extracted from ${url})`); } @@ -234,18 +253,21 @@ const matchAnchor = (url, anchor) => link => { return link === (url + '#' + anchor) || link === (url + '#' + encodeURIComponent(anchor)); }; -function studyBackrefs (edResults, trResults = [], htmlFragments = {}, shortnameFilter) { - trResults = trResults || []; +async function studyBackrefs(specs, { crawlResults = null, trResults = [], htmlFragments = null } = {}) { + crawlResults = crawlResults ?? specs; const report = []; - edResults.forEach(spec => { - if (shortnameFilter && spec.shortname !== shortnameFilter) return; - studyLinks(spec, spec.links?.rawlinks, report, edResults, trResults, htmlFragments); + // Donwload automatic map of multipages anchors in HTML spec + const fragmentsUrl = 'https://html.spec.whatwg.org/multipage/fragment-links.json'; + htmlFragments = htmlFragments ?? await fetch(fragmentsUrl).then(r => r.json()); + + specs.forEach(spec => { + studyLinks(spec, spec.links?.rawlinks, report, crawlResults, trResults, htmlFragments); // given the current limitation of classification of links for bikeshed // https://github.com/w3c/reffy/issues/1584 // we also check autolinks for bikeshed specs if (spec.generator === "bikeshed") { - studyLinks(spec, spec.links?.autolinks, report, edResults, trResults, htmlFragments); + studyLinks(spec, spec.links?.autolinks, report, crawlResults, trResults, htmlFragments); } }); return report; @@ -254,7 +276,9 @@ function studyBackrefs (edResults, trResults = [], htmlFragments = {}, shortname function studyLinks(spec, links, report, edResults, trResults, htmlFragments) { if (!links) return; - const recordAnomaly = recordCategorizedAnomaly(report, 'links', possibleAnomalies); + function recordAnomaly(spec, name, message) { + report.push({ name, message, spec }); + } Object.keys(links) .filter(matchSpecUrl) @@ -421,22 +445,4 @@ function studyLinks(spec, links, report, edResults, trResults, htmlFragments) { }); } -/************************************************** -Export methods for use as module -**************************************************/ -export default studyBackrefs; - -if (process.argv[1] === fileURLToPath(import.meta.url)) { - const crawl = await loadCrawlResults(process.argv[2], process.argv[3]); - let htmlFragments = {}; - try { - console.info('Downloading HTML spec fragments data…'); - htmlFragments = await fetch('https://html.spec.whatwg.org/multipage/fragment-links.json').then(r => r.json()); - console.info('- done'); - } catch (err) { - console.error('- failed: could not fetch HTML fragments data, may report false positive broken links on HTML spec'); - } - - const results = studyBackrefs(crawl.ed, crawl.tr, htmlFragments, process.argv[4] ?? undefined); - console.log(results); -} +export default studyBackrefs; \ No newline at end of file diff --git a/src/lib/study-crawl.js b/src/lib/study-crawl.js deleted file mode 100644 index be210dc4..00000000 --- a/src/lib/study-crawl.js +++ /dev/null @@ -1,412 +0,0 @@ -/** - * The crawl analyzer takes a crawl report as input and creates a report that - * contains, for each spec, a list of potential anomalies, such as: - * - * 1. specs that do not seem to reference any other spec normatively; - * 2. specs that define WebIDL terms but do not normatively reference the WebIDL - * spec; - * 3. specs that contain invalid WebIDL terms definitions; - * 4. specs that use obsolete WebIDL constructs (e.g. `[]` instead of - * `FrozenArray`); - * 5. specs that define WebIDL terms that are *also* defined in another spec; - * 6. specs that use WebIDL terms defined in another spec without referencing - * that spec normatively; - * 7. specs that use WebIDL terms for which the crawler could not find any - * definition in any of the specs it studied; - * 8. specs that link to another spec but do not include a reference to that - * other spec; - * 9. specs that link to another spec inconsistently in the body of the document - * and in the list of references (e.g. because the body of the document - * references the Editor's draft while the reference is to the latest published - * version). - * 10. W3C specs that do not have a known Editor's Draft - * - * @module analyzer - */ - -import fs from 'node:fs'; -import path from 'node:path'; -import { expandCrawlResult, isLatestLevelThatPasses } from 'reffy'; -import studyBackrefs from './study-backrefs.js'; -import { checkSpecDefinitions } from '../cli/check-missing-dfns.js'; -import { canonicalizeUrl, canonicalizesTo } from "./canonicalize-url.js"; -import loadJSON from './load-json.js'; - -const array_concat = (a,b) => a.concat(b); -const uniqueFilter = (item, idx, arr) => arr.indexOf(item) === idx; - -/** - * Helper function that returns true when the given URL seems to target a real - * "spec" (as opposed to, say, a Wiki page, or something else) - */ -const matchSpecUrl = url => - url.match(/spec.whatwg.org/) || - url.match(/www.w3.org\/TR\/[a-z0-9]/) || - (url.match(/w3c.github.io/) && ! url.match(/w3c.github.io\/test-results\//)); - - -/** - * Compares specs for ordering by title - */ -const byTitle = (a, b) => - (a.title || '').toUpperCase().localeCompare((b.title || '').toUpperCase()); - - -/** - * Returns true when the given error array is not set or does not contain any - * error. - */ -function isOK(errors) { - return !errors || (errors.length === 0); -} - - -/** - * Filter out spec info parameters that are not needed when the spec is to - * appear as a reference in the final report, to keep the JSON report somewhat - * readable. - * - * @function - * @param {Object} spec The spec info to filter, typically the spec object - * contained in the results of a crawl. - * @return {Object} A new spec object that only contains the URL, title, the - * URL that was crawled. - */ -function filterSpecInfo(spec) { - return { - url: spec.url, - title: spec.title, - crawled: spec.crawled - }; -} - - -/** - * Analyze the result of a crawl and produce a report that can easily be - * converted without more processing to a human readable version. - * - * @function - * @param {Array(Object)} A crawl result, one entry per spec - * @param {Array(Object)} An optional list of specs to include in the report. - * All specs are included by default. - * @return {Array(Object)} A report, one entry per spec, each spec will have - * a "report" property with "interesting" properties, see code comments inline - * for details - */ -async function studyCrawlResults(results, options = {}) { - const knownIdlNames = results - .map(r => r.idlparsed?.idlNames ? Object.keys(r.idlparsed.idlNames) : [], []) - .reduce(array_concat) - .filter(uniqueFilter); - const knownGlobalNames = results - .map(r => r.idlparsed?.globals ? Object.keys(r.idlparsed.globals) : [], []) - .reduce(array_concat) - .filter(uniqueFilter); - const idlNamesIndex = {}; - knownIdlNames.forEach(name => - idlNamesIndex[name] = results.filter(spec => - isLatestLevelThatPasses(spec, results, s => - s.idlparsed?.idlNames?.[name]))); - - // WebIDL-1 only kept for historical reasons to process old crawl results - const WebIDLSpec = results.find(spec => - spec.shortname === 'webidl' || spec.shortname === 'WebIDL-1') || {}; - - const sortedResults = results.sort(byTitle); - - // Construct spec equivalence from the crawl report, which should be more - // complete than the initial equivalence list. - const specEquivalents = {}; - sortedResults.forEach(spec => - spec.versions.forEach(v => { - if (specEquivalents[v]) { - if (Array.isArray(specEquivalents[v])) { - specEquivalents[v].push(spec.url); - } - else { - specEquivalents[v] = [specEquivalents[v], spec.url]; - } - } - else { - specEquivalents[v] = spec.url; - } - } - )); - - // Strong canonicalization options to find references - var useEquivalents = { - datedToLatest: true, - equivalents: specEquivalents - }; - - const xrefsReport = studyBackrefs(sortedResults, options.trResults); - - const specsToInclude = options.include; - return Promise.all(sortedResults - .filter(spec => !specsToInclude || - (specsToInclude.length === 0) || - specsToInclude.some(toInclude => - toInclude === spec.shortname || - toInclude === spec.series?.shortname || - toInclude === spec.url || - toInclude === spec.crawled || - toInclude === spec.nightly?.url || - toInclude.shortname === spec.shortname || - toInclude.shortname === spec.series?.shortname || - (toInclude.url && toInclude.url === spec.url) || - (toInclude.url && toInclude.url === spec.crawled) || - (toInclude.url && toInclude.url === spec.nightly?.url) || - (toInclude.html && toInclude.html === spec.html))) - .map(async spec => { - spec.idlparsed = spec.idlparsed || {}; - spec.css = spec.css || {}; - spec.refs = spec.refs || {}; - spec.links = spec.links || {}; - const idlDfns = spec.idlparsed.idlNames ? - Object.keys(spec.idlparsed.idlNames) : []; - const idlExtendedDfns = spec.idlparsed.idlExtendedNames ? - Object.keys(spec.idlparsed.idlExtendedNames) : []; - const idlDeps = spec.idlparsed.externalDependencies ? - spec.idlparsed.externalDependencies : []; - const exposed = spec.idlparsed.exposed ? Object.keys(spec.idlparsed.exposed) : []; - - const xrefs = xrefsReport[spec.url]; - if (xrefs) { - // The backrefs analysis tool includes the spec's title in its - // report, which we already have at the top level. - delete xrefs.title; - - // The backrefs analysis tool also includes a list of documents - // that look like specs but that are not crawled. That is not - // an anomaly with the spec but rather a list of potential specs - // to be included in browser-specs. They should be treated - // separately. - delete xrefs.unknownSpecs; - } - - const report = { - // An error at this level means the spec could not be parsed at all - error: spec.error, - - // Whether the crawler found normative references - // (most specs should have) - noNormativeRefs: !spec.refs.normative || - (spec.refs.normative.length === 0), - - // Whether the spec normatively references the WebIDL spec - // (all specs that define IDL content should) - noRefToWebIDL: (spec !== WebIDLSpec) && - (spec.idlparsed.bareMessage || (idlDfns.length > 0) || (idlExtendedDfns.length > 0)) && - (!spec.refs.normative || !spec.refs.normative.find(ref => - ref.name.match(/^WebIDL/i) || - (ref.url === WebIDLSpec.url) || - (WebIDLSpec.nightly && (ref.url === WebIDLSpec.nightly.url)))), - - // Whether the spec has invalid IDL content - // (the crawler cannot do much when IDL content is invalid, it - // cannot tell what IDL definitions and references the spec - // contains in particular) - hasInvalidIdl: !!(!spec.idlparsed.idlNames && spec.idlparsed.bareMessage), - - // Whether the spec uses IDL constructs that were valid in - // WebIDL Level 1 but no longer are, typically "[]" instead of - // "FrozenArray" - hasObsoleteIdl: spec.idlparsed.hasObsoleteIdl, - - // List of Exposed names used in the spec that we know nothing - // about because we cannot find a matching "Global" name in - // any other spec - unknownExposedNames: exposed - .filter(name => !knownGlobalNames.includes(name) && name !== "*") - .sort(), - - // List of IDL names used in the spec that we know nothing about - // (for instance because of some typo or because the term is - // defined in a spec that has not been crawled or that could - // not be parsed) - unknownIdlNames: idlDeps - .filter(name => knownIdlNames.indexOf(name) === -1) - .sort(), - - // List of IDL definitions that are already defined in some - // other crawled spec - // (this should not happen, ideally) - redefinedIdlNames: idlDfns - .filter(name => (idlNamesIndex[name].length > 1)) - .map(name => { - return { - name, - refs: idlNamesIndex[name].filter(ref => (ref.url !== spec.url)).map(filterSpecInfo) - }; - }), - - // List of IDL names used in the spec that are defined in some - // other spec, and which do not seem to appear in the list of - // normative references - // (There should always be an entry in the normative list of - // references that links to that other spec) - // NB: "Exposed=Window", which would in theory trigger the need - // to add a normative reference to HTML, is considered to be - // an exception to the rule, and ignored. - missingWebIdlRef: idlDeps - .filter(name => knownIdlNames.indexOf(name) !== -1) - .map(name => { - const refs = idlNamesIndex[name].map(filterSpecInfo); - let ref = null; - if (spec.refs && spec.refs.normative) { - ref = refs.find(s => { - const canon = canonicalizeUrl(s.url, useEquivalents); - return !!spec.refs.normative.find(r => - canonicalizesTo(r.url, canon, useEquivalents)); - }); - } - return (ref ? null : { name, refs }); - }) - .filter(i => !!i), - - // CSS/IDL terms that do not have a corresponding dfn in the - // specification - missingDfns: await checkSpecDefinitions(spec), - - // Links to external specifications within the body of the spec - // that do not have a corresponding entry in the references - // (all links to external specs should have a companion ref) - missingLinkRef: Object.keys(spec.links.rawlinks || {}) - .filter(matchSpecUrl) - .filter(l => { - // Filter out "good" and "inconsistent" references - const canon = canonicalizeUrl(l, useEquivalents); - const refs = (spec.refs.normative || []).concat(spec.refs.informative || []); - return !refs.find(r => canonicalizesTo(r.url, canon, useEquivalents)); - }) - .filter(l => - // Ignore links to other versions of "self". There may - // be cases where it would be worth reporting them but - // most of the time they appear in "changelog" sections. - !canonicalizesTo(l, spec.url, useEquivalents) && - !canonicalizesTo(l, spec.versions, useEquivalents) - ), - - // Links to external specifications within the body of the spec - // that have a corresponding entry in the references, but for - // which the reference uses a different URL, e.g. because the - // link targets the Editor's Draft, whereas the reference - // targets the latest published version - inconsistentRef: Object.keys(spec.links.rawlinks || {}) - .filter(matchSpecUrl) - .map(l => { - const canonSimple = canonicalizeUrl(l); - const canon = canonicalizeUrl(l, useEquivalents); - const refs = (spec.refs.normative || []).concat(spec.refs.informative || []); - - // Filter out "good" references - if (refs.find(r => canonicalizesTo(r.url, canonSimple))) { - return null; - } - const ref = refs.find(r => canonicalizesTo(r.url, canon, useEquivalents)); - return (ref ? { link: l, ref } : null); - }) - .filter(l => !!l), - - // Lists of specs present in the crawl report that reference - // the current spec, either normatively or informatively - // (used to produce the dependencies report) - referencedBy: { - normative: sortedResults - .filter(s => - s.refs && s.refs.normative && - s.refs.normative.find(r => - canonicalizesTo(r.url, spec.url, useEquivalents) || - canonicalizesTo(r.url, spec.versions, useEquivalents))) - .map(filterSpecInfo), - informative: sortedResults - .filter(s => - s.refs && s.refs.informative && - s.refs.informative.find(r => - canonicalizesTo(r.url, spec.url, useEquivalents) || - canonicalizesTo(r.url, spec.versions, useEquivalents))) - .map(filterSpecInfo) - }, - - // Analysis of cross-references to other specs - xrefs: xrefsReport[spec.url] - }; - - // A spec is OK if it does not contain anything "suspicious". - report.ok = !report.error && - !report.noNormativeRefs && - !report.hasInvalidIdl && - !report.hasObsoleteIdl && - !report.noRefToWebIDL && - !report.missingDfns.obsoleteDfnsModel && - isOK(report.unknownIdlNames) && - isOK(report.redefinedIdlNames) && - isOK(report.missingWebIdlRef) && - isOK(report.missingDfns.css.filter(r => !r.warning)) && - isOK(report.missingDfns.idl.filter(r => !r.warning)) && - isOK(report.missingLinkRef) && - isOK(report.inconsistentRef) && - (!report.xrefs || ( - isOK(report.xrefs.notExported) && - isOK(report.xrefs.notDfn) && - isOK(report.xrefs.brokenLinks) && - isOK(report.xrefs.evolvingLinks) && - isOK(report.xrefs.outdatedSpecs) && - isOK(report.xrefs.datedUrls))); - - const res = { - title: spec.title || spec.url, - shortname: spec.shortname, - date: spec.date, - url: spec.url, - release: spec.release, - nightly: spec.nightly, - crawled: spec.crawled, - organization: spec.organization, - groups: spec.groups, - report - }; - return res; - })); -} - -async function studyCrawl(crawlResults, options = {}) { - if (typeof crawlResults === 'string') { - const crawlResultsPath = crawlResults; - crawlResults = await loadJSON(crawlResults); - crawlResults = await expandCrawlResult(crawlResults, path.dirname(crawlResultsPath)); - } - else { - crawlResults = crawlResults || {}; - } - crawlResults.results = crawlResults.results || []; - crawlResults.stats = crawlResults.stats || {}; - - if (typeof options.trResults === 'string') { - const crawlResultsPath = options.trResults; - options.trResults = await loadJSON(options.trResults); - options.trResults = await expandCrawlResult(options.trResults, path.dirname(crawlResultsPath)); - options.trResults = options.trResults.results; - } - - const results = await studyCrawlResults(crawlResults.results, options); - - return { - type: 'study', - title: crawlResults.title || 'Web specs analysis', - description: crawlResults.description || '', - date: crawlResults.date || (new Date()).toJSON(), - stats: { - crawled: crawlResults.stats.crawled || crawlResults.results.length, - errors: crawlResults.stats.errors || crawlResults.results.filter(spec => !!spec.error).length, - studied: results.length || crawlResults.stats.crawled - }, - results: results - }; -} - - -/************************************************** -Export methods for use as module -**************************************************/ -export default studyCrawl; diff --git a/src/cli/check-missing-dfns.js b/src/lib/study-dfns.js similarity index 69% rename from src/cli/check-missing-dfns.js rename to src/lib/study-dfns.js index be298a75..0b00e5f4 100644 --- a/src/cli/check-missing-dfns.js +++ b/src/lib/study-dfns.js @@ -1,21 +1,8 @@ -#!/usr/bin/env node /** * The definitions checker compares CSS, dfns, and IDL extracts created by Reffy * to detect CSS/IDL terms that do not have a corresponding dfn in the * specification. * - * The definitions checker can be called directly through: - * - * `node check-missing-dfns.js [crawl report] [spec] [format]` - * - * where: - * - `crawl report` is the local path to the root folder that contains the - * `index.json` and the extracts (e.g. `reports/ed`) - * - `spec` is the optional shortname of the specification on which to focus or - * `all` (default) to check all specs - * - `format` is the optional output format. Either `json` or `markdown` with - * `markdown` being the default. - * * Note: CSS extraction already relies on dfns and reports missing dfns in a * "warnings" property. This checker simply looks at that list. * @@ -100,7 +87,7 @@ function matchCSSDfn(expected, actual) { * * @function * @private - * @param {Object} css The root of the object that describes IDL terms in the + * @param {Object} idl The root of the object that describes IDL terms in the * `idlparsed` extract. * @return {Array} An array of expected definitions */ @@ -121,7 +108,7 @@ function getExpectedDfnsFromIdl(idl = {}) { /** * Return true if the given parsed IDL object describes a default toJSON * operation that references: - * https://heycam.github.io/webidl/#default-tojson-steps + * https://webidl.spec.whatwg.org/#default-tojson-steps * * @function * @private @@ -140,9 +127,9 @@ function isDefaultToJSONOperation(desc) { * * @function * @public - * @param {Object} desc The object that describes the IDL term in the + * @param {Object} idl The object that describes the IDL term in the * `idlparsed` extract. - * @param {Object} parentDesc (optional) The object that describes the parent + * @param {Object} parentIdl (optional) The object that describes the parent * IDL term of the term to parse (used to compute the `for` property). * @return {Object} The expected definition, or null if no expected definition * is defined. @@ -318,7 +305,7 @@ function getExpectedDfnsFromIdlDesc(idl, {excludeRoot} = {excludeRoot: false}) { * * The function works around Respec's issue #3200 for methods and constructors * that take only optional parameters: - * https://github.com/w3c/respec/issues/3200 + * https://github.com/speced/respec/issues/3200 * * @function * @private @@ -352,29 +339,18 @@ function matchIdlDfn(expected, actual, * @function * @public * @param {Object} spec Crawl result for the spec to parse - * @param {String} options Check options. Set the rootFolder property to the - * root folder against which to resolve relative paths to load CSS/IDL - * extracts (only needed if the extracts have not yet been loaded and attached - * to the spec object). Set the includeObsolete property to true to include - * detailed results about specs that use an obsolete dfns data model. * @return {Object} An object with a css and idl property, each of them holding * an array of missing CSS or IDL definitions. The function returns null when * there are no missing definitions. */ -async function checkSpecDefinitions(spec, options = {}) { - if (!options.includeObsolete && specsWithObsoleteDfnsModel.includes(spec.shortname)) { +function checkSpecDefinitions(spec) { + if (specsWithObsoleteDfnsModel.includes(spec.shortname)) { return { obsoleteDfnsModel: true }; } - const dfns = (typeof spec.dfns === "string") ? - (await loadJSON(path.resolve(options.rootFolder, spec.dfns))).dfns : - (spec.dfns || []); - const css = (typeof spec.css === "string") ? - (await loadJSON(path.resolve(options.rootFolder, spec.css))) : - (spec.css || {}); - const idl = (typeof spec.idlparsed === "string") ? - (await loadJSON(path.resolve(options.rootFolder, spec.idlparsed))).idlparsed : - spec.idlparsed; + const dfns = spec.dfns ?? []; + const css = spec.css ?? {}; + const idl = spec.idlparsed ?? {}; // Make sure that all expected CSS definitions exist in the dfns extract const expectedCSSDfns = getExpectedDfnsFromCSS(css); @@ -455,134 +431,50 @@ async function checkSpecDefinitions(spec, options = {}) { /** - * Checks the CSS and IDL extracts against the dfns extract for all specs in - * the report. - * - * @function - * @public - * @param {String} pathToReport Path to the root folder that contains the - * `index.json` report file and the extracts subfolders. - * @param {Object} options Check options. Set the "shortname" property to a - * spec's shortname to only check that spec. - * @return {Array} The list of specifications along with dfn problems that have - * been identified. Each entry has `url`, 'crawled`, `shortname` properties to - * identify the specification, and a `missing` property that is an object that - * may have `css` and `idl` properties which list missing CSS/IDL definitions. - */ -async function checkDefinitions(pathToReport, options = {}) { - const rootFolder = path.resolve(process.cwd(), pathToReport); - const index = (await loadJSON(path.resolve(rootFolder, 'index.json'))).results; - - // Check all dfns against CSS and IDL extracts - const checkOptions = { - rootFolder, - includeObsolete: !!options.shortname - }; - const missing = await Promise.all( - index - .filter(spec => !options.shortname || spec.shortname === options.shortname) - .map(async spec => { - const res = { - url: spec.url, - crawled: spec.crawled, - shortname: spec.shortname, - }; - if (!spec.dfns) { - return res; - } - res.missing = await checkSpecDefinitions(spec, checkOptions); - return res; - }) - ); - - return missing; -} - - -/** - * Report missing dfn to the console as Markdown + * Format the anomaly message to report as Markdown * * @function * @private * @param {Object} missing Object that describes missing dfn */ -function reportMissing(missing) { +function formatAnomalyMessage(missing) { const exp = missing.expected; const found = missing.found; const foundFor = (found && found.for && found.for.length > 0) ? ' for ' + found.for.map(f => `\`${f}\``).join(',') : ''; - console.log(`- \`${exp.linkingText[0]}\` ${exp.type ? `with type \`${exp.type}\`` : ''}` + + return '`' + exp.linkingText[0] + '` ' + + (exp.type ? `with type \`${exp.type}\`` : '') + (missing.for ? ` for [\`${missing.for.linkingText[0]}\`](${missing.for.href})` : '') + - (found ? `, but found [\`${found.linkingText[0]}\`](${found.href}) with type \`${found.type}\`${foundFor}` : '')); + (found ? `, but found [\`${found.linkingText[0]}\`](${found.href}) with type \`${found.type}\`${foundFor}` : ''); } -/************************************************** -Export methods for use as module -**************************************************/ -export { - checkSpecDefinitions, - checkDefinitions, - - // "Inner" functions that the IDL names generator uses to link IDL terms with - // their definition (see generate-idlnames.js) - getExpectedDfnFromIdlDesc, - matchIdlDfn -} - - -/************************************************** -Code run if the code is run as a stand-alone module -**************************************************/ -if (process.argv[1] === fileURLToPath(import.meta.url)) { - const pathToReport = process.argv[2]; - const shortname = process.argv[3] || 'all'; - const format = process.argv[4] || 'markdown'; - - const options = (shortname === 'all') ? undefined : { shortname }; - let res = await checkDefinitions(pathToReport, options); - if (shortname === 'all') { - res = res - .filter(result => result.missing && - !result.missing.obsoleteDfnsModel && - ((result.missing.css.length > 0) || (result.missing.idl.length > 0))); - } - - if (format === 'json') { - console.log(JSON.stringify(res, null, 2)); - } - else { - res.forEach(result => { - const missing = result.missing || {css: [], idl: []}; - const errors = ['css', 'idl'] - .map(type => result.missing[type].filter(missing => !missing.warning)) - .flat(); - const warnings = ['css', 'idl'] - .map(type => result.missing[type].filter(missing => missing.warning)) - .flat(); - console.log('
'); - console.log(`${result.shortname} (${errors.length} errors, ${warnings.length} warnings)`); - console.log(); - if (errors.length === 0 && warnings.length === 0) { - console.log('All good!'); - } - if (errors.length > 0) { - console.log('
'); - console.log(`Errors (${errors.length})`); - console.log(); - errors.forEach(reportMissing); - console.log('
'); - } - if (warnings.length > 0) { - console.log('
'); - console.log(`Warnings (${warnings.length})`); - console.log(); - warnings.forEach(reportMissing); - console.log('
'); +/** + * Checks the CSS and IDL extracts against the dfns extract for all specs in + * the report, and return a list of missing definitions. + * + * @function + * @public + */ +export default function studyDefinitions(specs) { + return specs + .map(spec => { + const missing = checkSpecDefinitions(spec); + const res = []; + if (!missing.obsoleteDfnsModel) { + for (const type of ['css', 'idl']) { + const anomalies = missing[type]; + for (const anomaly of anomalies) { + res.push({ + name: 'missingDfns', + message: formatAnomalyMessage(anomaly), + spec + }); + } } - console.log('
'); - console.log(); - }) - } -} \ No newline at end of file + } + return res; + }) + .flat(); +} diff --git a/src/lib/study-refs.js b/src/lib/study-refs.js index f7542486..19bc99c6 100644 --- a/src/lib/study-refs.js +++ b/src/lib/study-refs.js @@ -1,31 +1,116 @@ -import { loadCrawlResults, recordCategorizedAnomaly } from './util.js'; -import { fileURLToPath } from 'node:url'; +import { canonicalizeUrl, canonicalizesTo } from './canonicalize-url.js'; -const possibleAnomalies = [ - 'discontinuedReferences' -]; +/** + * Helper function that returns true when the given URL seems to target a real + * "spec" (as opposed to, say, a Wiki page, or something else) + */ +const matchSpecUrl = url => + url.match(/spec.whatwg.org/) || + url.match(/www.w3.org\/TR\/[a-z0-9]/) || + (url.match(/w3c.github.io/) && ! url.match(/w3c.github.io\/test-results\//)); -function studyReferences (edResults) { - const report = []; - const recordAnomaly = recordCategorizedAnomaly(report, 'refs', possibleAnomalies); - edResults.forEach(spec => { - (spec.refs?.normative || []).forEach(ref => { - const referencedSpec = edResults.find(s => s.url === ref.url || s?.nightly?.url === ref.url || s?.nightly?.alternateUrls?.includes(ref.url)); +function studyReferences (specs, { crawlResults = null } = {}) { + crawlResults = crawlResults ?? specs; - if (referencedSpec && referencedSpec.standing === "discontinued") { + // Construct spec equivalence from the crawl report + const specEquivalents = {}; + for (const spec of crawlResults) { + for (const v of (spec.versions ?? [])) { + if (specEquivalents[v]) { + if (Array.isArray(specEquivalents[v])) { + specEquivalents[v].push(spec.url); + } + else { + specEquivalents[v] = [specEquivalents[v], spec.url]; + } + } + else { + specEquivalents[v] = spec.url; + } + } + } + + // Strong canonicalization options to find references + const useEquivalents = { + datedToLatest: true, + equivalents: specEquivalents + }; - const newSpecsLinks = edResults.filter(s => referencedSpec.obsoletedBy?.includes(s.shortname)).map(s => `[${s.shortname}](${s?.nightly?.url || s.url})`); - recordAnomaly(spec, 'discontinuedReferences', `[${ref.name}](${ref.url}) ${newSpecsLinks.length ? `has been obsoleted by ${newSpecsLinks}` : `is discontinued, no known replacement reference`}`); + const report = []; + for (const spec of specs) { + for (const ref of spec.refs?.normative ?? []) { + const referencedSpec = crawlResults.find(s => + s.url === ref.url || + s?.nightly?.url === ref.url || + s?.nightly?.alternateUrls?.includes(ref.url)); + if (referencedSpec && referencedSpec.standing === "discontinued") { + const newSpecsLinks = crawlResults + .filter(s => referencedSpec.obsoletedBy?.includes(s.shortname)) + .map(s => `[${s.shortname}](${s?.nightly?.url || s.url})`); + report.push({ + name: 'discontinuedReferences', + message: `[${ref.name}](${ref.url}) ${newSpecsLinks.length ? `has been obsoleted by ${newSpecsLinks}` : `is discontinued, no known replacement reference`}`, + spec + }); } - }); - }); + } + + // Detect links to external specifications within the body of the spec + // that do not have a corresponding entry in the list of references + // (all links to external specs should have a companion ref) + Object.keys(spec.links?.rawlinks ?? {}) + .filter(matchSpecUrl) + .filter(l => { + // Filter out "good" and "inconsistent" references + const canon = canonicalizeUrl(l, useEquivalents); + const refs = (spec.refs?.normative ?? []).concat(spec.refs?.informative ?? []); + return !refs.find(r => canonicalizesTo(r.url, canon, useEquivalents)); + }) + .filter(l => + // Ignore links to other versions of "self". There may + // be cases where it would be worth reporting them but + // most of the time they appear in "changelog" sections. + !canonicalizesTo(l, spec.url, useEquivalents) && + !canonicalizesTo(l, spec.versions, useEquivalents) + ) + .forEach(l => { + report.push({ + name: 'missingReferences', + message: l, + spec + }); + }); + + // Detect links to external specifications within the body of the spec + // that have a corresponding entry in the references, but for which the + // reference uses a different URL, e.g., because the link targets the + // Editor's Draft, whereas the reference targets the latest published + // version + Object.keys(spec.links?.rawlinks ?? {}) + .filter(matchSpecUrl) + .map(l => { + const canonSimple = canonicalizeUrl(l); + const canon = canonicalizeUrl(l, useEquivalents); + const refs = (spec.refs?.normative ?? []) + .concat(spec.refs?.informative ?? []); + + // Filter out "good" references + if (refs.find(r => canonicalizesTo(r.url, canonSimple))) { + return null; + } + const ref = refs.find(r => canonicalizesTo(r.url, canon, useEquivalents)); + return (ref ? { link: l, ref } : null); + }) + .filter(anomaly => !!anomaly) + .forEach(anomaly => { + report.push({ + name: 'inconsistentReferences', + message: `${anomaly.link}, related reference "${anomaly.ref.name}" uses URL ${anomaly.ref.url}`, + spec + }); + }); + } return report; } export default studyReferences; - -if (process.argv[1] === fileURLToPath(import.meta.url)) { - const crawl = await loadCrawlResults(process.argv[2]); - const results = studyReferences(crawl.ed); - console.log(results); -} diff --git a/src/lib/study-webidl.js b/src/lib/study-webidl.js index fc09365f..f5711326 100644 --- a/src/lib/study-webidl.js +++ b/src/lib/study-webidl.js @@ -5,52 +5,26 @@ * object structure: * * { - * "category": "webidl", * "name": "type of anomaly", * "message": "Description of the anomaly", - * "specs": [ - * { spec that contains or triggers the anomaly }, - * { another spec that contains or triggers the anomaly }, - * ... - * ] + * "spec": { spec that contains or triggers the anomaly } * } + * + * Some anomalies may be associated with more than one spec, when the code + * cannot tell which spec needs fixing (e.g., when checking duplicates while + * merging partials). In such cases, the `spec` property is replaced by a + * `specs` property that contains an array of specs. * - * All anomalies will be associated with at least one spec (so specs.length > 0) - * but some of them may be associated with more than one, when the code cannot - * tell which of them needs to be fixed (e.g. when checking duplicates while - * merging partials). - * - * The spec object returned in the "specs" array is the spec object provided in - * the crawl results parameter. + * The spec object returned in the `spec` and `specs` properties is the spec + * object provided in the crawl results parameter. */ -import { recordCategorizedAnomaly } from './util.js'; import * as WebIDL2 from 'webidl2'; const getSpecs = list => [...new Set(list.map(({ spec }) => spec))]; const specName = spec => spec.shortname ?? spec.url; const dfnName = dfn => `${dfn.idl.partial ? 'partial ' : ''}${dfn.idl.type} "${dfn.idl.name}"`; -const possibleAnomalies = [ - 'incompatiblePartialIdlExposure', - 'invalid', - 'noExposure', - 'noOriginalDefinition', - 'overloaded', - 'redefined', - 'redefinedIncludes', - 'redefinedMember', - 'redefinedWithDifferentTypes', - 'singleEnumValue', - 'unexpectedEventHandler', - 'unknownExposure', - 'unknownExtAttr', - 'unknownType', - 'wrongCaseEnumValue', - 'wrongKind', - 'wrongType' -]; - const basicTypes = new Set([ // Types defined by Web IDL itself: 'any', // https://webidl.spec.whatwg.org/#idl-any @@ -192,7 +166,7 @@ function describeMember (member) { return desc; } -function studyWebIdl (edResults, curatedResults) { +function studyWebIdl (specs, { crawledResults = [], curatedResults = [] } = {}) { const report = []; // List of anomalies to report const dfns = {}; // Index of IDL definitions (save includes) const includesStatements = {}; // Index of "includes" statements @@ -200,8 +174,21 @@ function studyWebIdl (edResults, curatedResults) { const usedTypes = {}; // Index of types used in the IDL const usedExtAttrs = {}; // Index of extended attributes - // Record an anomaly for the given spec(s). - const recordAnomaly = recordCategorizedAnomaly(report, 'webidl', possibleAnomalies); + // Record an anomaly for the given spec(s), + // provided we are indeed interested in the results + function recordAnomaly (spec, name, message) { + if (Array.isArray(spec)) { + const filtered = spec.filter(sp => specs.find(s => s.shortname === sp.shortname)); + if (filtered.length > 0) { + report.push({ name, message, specs: filtered }); + } + } + else { + if (specs.find(s => s.shortname === spec.shortname)) { + report.push({ name, message, spec }); + } + } + } function inheritsFrom (iface, ancestor) { if (!iface.inheritance) return false; @@ -397,7 +384,11 @@ function studyWebIdl (edResults, curatedResults) { } } - edResults + // We need to run the analysis on all specs, even if caller is only + // interested in a few of them, because types may be defined in specs that + // the caller is not interested in. + const allSpecs = (crawledResults.length > 0) ? crawledResults : specs; + allSpecs // We're only interested in specs that define Web IDL content .filter(spec => !!spec.idl) @@ -666,7 +657,4 @@ function studyWebIdl (edResults, curatedResults) { return report; } -/************************************************** -Export methods for use as module -**************************************************/ export default studyWebIdl; diff --git a/src/lib/study.js b/src/lib/study.js new file mode 100644 index 00000000..7f6264ea --- /dev/null +++ b/src/lib/study.js @@ -0,0 +1,618 @@ +import studyDfns from './study-dfns.js'; +import studyAlgorithms from './study-algorithms.js'; +import studyBackrefs from './study-backrefs.js'; +import studyRefs from './study-refs.js'; +import studyWebIdl from './study-webidl.js'; +import isInMultiSpecRepository from './is-in-multi-spec-repo.js'; +import { recordCategorizedAnomaly } from './util.js'; + +/** + * List of anomalies, grouped per study function + */ +const anomalyGroups = [ + { + name: 'generic', + title: 'Generic', + description: 'The following errors prevented the spec from being analyzed', + types: [ + { + name: 'error', + title: 'Crawl error', + description: 'The following crawl errors occurred' + } + ], + study: (specs) => specs + .filter(spec => !!spec.error) + .map(spec => Object.assign( + { name: 'error', message: spec.error, spec } + )) + }, + + { + name: 'dfns', + title: 'Problems with definitions', + description: 'The following problems were identified in term definitions', + types: [ + { + name: 'missingDfns', + title: 'Missing definitions', + description: 'The following constructs were found without a definition' + } + ], + study: studyDfns + }, + + { + name: 'backrefs', + title: 'Problems with links to other specs', + description: 'The following problems were identified when analyzing links to other specifications', + types: [ + { + name: 'brokenLinks', + title: 'Broken links', + description: 'The following links to other specifications were detected as pointing to non-existing anchors' + }, + { + name: 'datedUrls', + title: 'Links to dated TR URLs', + description: 'The following links target a dated version of a specification' + }, + { + name: 'evolvingLinks', + title: 'Links to now gone anchors', + description: 'The following links in the specification link to anchors that no longer exist in the Editor\'s Draft of the targeted specification' + }, + { + name: 'frailLinks', + title: 'Unstable link anchors', + description: 'The following links in the specification link to anchors that either have a new name or are inherently brittle' + }, + { + name: 'nonCanonicalRefs', + title: 'Non-canonical links', + description: 'The following links were detected as pointing to outdated URLs' + }, + { + name: 'notDfn', + title: 'Links to unofficial anchors', + description: 'The following links were detected as pointing to anchors that are neither definitions or headings in the targeted specification' + }, + { + name: 'notExported', + title: 'Links to non-exported definitions', + description: 'The following links were detected as pointing to a private definition in the targeted specification' + }, + { + name: 'outdatedSpecs', + title: 'Outdated references', + description: 'The following links were detected as pointing to outdated specifications' + }, + { + name: 'unknownSpecs', + title: 'Links to unknown specs', + description: 'The following links were detected as pointing to documents that are not recognized as specifications' + } + ], + study: studyBackrefs, + studyParams: ['tr'] + }, + + { + name: 'algorithms', + title: 'Problems with algorithms', + description: 'The following problems were identified when analyzing algorithms', + types: [ + { + name: 'missingTaskForPromise', + title: 'Missing tasks in parallel steps to handle a promise', + description: 'The following algorithms resolve or reject a Promise within a step that runs [in parallel](https://html.spec.whatwg.org/multipage/infrastructure.html#in-parallel) without first queuing a task' + }, + { + name: 'missingTaskForEvent', + title: 'Missing tasks in parallel steps to fire an event', + description: 'The following algorithms fire an event within a step that runs [in parallel](https://html.spec.whatwg.org/multipage/infrastructure.html#in-parallel) without first queuing a task' + } + ], + study: studyAlgorithms + }, + + { + name: 'refs', + title: 'Problems with references', + description: 'The following problems were identified when analyzing the list of references', + types: [ + { + name: 'discontinuedReferences', + title: 'Normative references to discontinued specs', + description: 'The following normative references were detected as pointing to discontinued specifications' + }, + { + name: 'missingReferences', + title: 'Missing references', + description: 'The following links target specifications that are not mentioned in the list of references' + }, + { + name: 'inconsistentReferences', + title: 'Inconsistent reference links', + description: 'The following links use a different URL for the targeted specification from the URL defined in the references' + } + ], + study: studyRefs + }, + + { + name: 'webidl', + title: 'Web IDL problems', + description: 'The following Web IDL problems were identified', + types: [ + { name: 'incompatiblePartialIdlExposure', title: 'Incompatible `[Exposed]` attribute in partial definitions' }, + { name: 'invalid', title: 'Invalid Web IDL' }, + { name: 'noExposure', title: 'Missing `[Exposed]` attributes' }, + { name: 'noOriginalDefinition', title: 'Missing base interfaces' }, + { name: 'overloaded', title: 'Invalid overloaded operations' }, + { name: 'redefined', title: 'Duplicated IDL names' }, + { name: 'redefinedIncludes', title: 'Duplicated `includes` statements' }, + { name: 'redefinedMember', title: 'Duplicated members' }, + { name: 'redefinedWithDifferentTypes', title: 'Duplicated IDL names with different types' }, + { name: 'singleEnumValue', title: 'Enums with a single value' }, + { name: 'unexpectedEventHandler', title: 'Missing `EventTarget` inheritances' }, + { name: 'unknownExposure', title: 'Unknown globals in `[Exposed]` attribute' }, + { name: 'unknownExtAttr', title: 'Unknown extended attributes' }, + { name: 'unknownType', title: 'Unknown Web IDL type' }, + { name: 'wrongCaseEnumValue', title: 'Enums with wrong casing' }, + { name: 'wrongKind', title: 'Invalid inheritance chains' }, + { name: 'wrongType', title: 'Web IDL names incorrectly used as types' } + ], + study: studyWebIdl, + studyParams: ['curated'] + } +]; + + +/** + * Possible report structures. + * + * "/" separates levels in the hierarchy. + * "+" combines creates a composed key at a given level. + * + * For example, "group+spec/type" means: first level per + * anomaly group and spec (so one "web-animations-2-webidl" entry if the + * spec "web-animations-2" has "webidl" issues), second level per type. + * + * The list is described in more details in the CLI help. Run: + * npx strudy inspect --help + * ... or check the code in `strudy.js` at the root of the project. + */ +const reportStructures = [ + 'flat', + 'type+spec', + 'group+spec', + 'group+spec/type', + 'spec/type', + 'spec/group/type', + 'type/spec', + 'group/type/spec', + 'group/spec/type' +]; + + +// Compute mapping between an anomaly type and its parent group +const anomalyToGroup = {}; +for (const group of anomalyGroups) { + for (const type of group.types) { + anomalyToGroup[type.name] = group; + } +} + +/** + * Return an object that describes the requested anomaly type + */ +function getAnomalyType(name) { + for (const group of anomalyGroups) { + const type = group.types.find(t => t.name === name); + if (type) { + return Object.assign({}, type); + } + } + return null; +} + +/** + * Return an object that describes the requested anomaly group + */ +function getAnomalyGroup(name) { + for (const group of anomalyGroups) { + if (group.name === name) { + return { + name: group.name, + title: group.title + }; + } + } + return null; +} + +/** + * Return an object that describes the requested anomaly group + * from the given anomaly type + */ +function getAnomalyGroupFromType(type) { + const name = anomalyToGroup[type]; + return getAnomalyGroup(name); +} + + +/** + * Structure a flat list of anomalies to the requested structure + */ +function structureResults(structure, anomalies, crawlResults) { + const levels = structure.split('/') + .map(level => level.replace(/\s+/g, '')); + const report = []; + + switch (levels[0]) { + case 'flat': + for (const anomaly of anomalies) { + report.push(anomaly); + } + break; + + case 'type+spec': + for (const anomaly of anomalies) { + const type = getAnomalyType(anomaly.name); + for (const spec of anomaly.specs) { + let entry = report.find(entry => + entry.type.name === anomaly.name && + entry.spec.shortname === spec.shortname); + if (!entry) { + const titlePrefix = isInMultiSpecRepository(spec, crawlResults) ? + `[${spec.shortname}] ` : ''; + entry = { + name: `${spec.shortname}-${type.name.toLowerCase()}`, + title: `${titlePrefix}${type.title} in ${spec.title}`, + type, spec, anomalies: [] + }; + report.push(entry); + } + entry.anomalies.push(anomaly); + } + } + break; + + case 'group+spec': + for (const anomaly of anomalies) { + const group = anomalyToGroup[anomaly.name]; + for (const spec of anomaly.specs) { + let entry = report.find(entry => + entry.group.name === group.name && + entry.spec.shortname === spec.shortname); + if (!entry) { + const titlePrefix = isInMultiSpecRepository(spec, crawlResults) ? + `[${spec.shortname}] ` : ''; + entry = { + name: `${spec.shortname}-${group.name.toLowerCase()}`, + title: `${titlePrefix}${group.title} in ${spec.title}`, + group, spec, anomalies: [] + }; + report.push(entry); + } + entry.anomalies.push(anomaly); + } + } + break; + + case 'spec': + for (const anomaly of anomalies) { + for (const spec of anomaly.specs) { + let entry = report.find(entry => + entry.spec.shortname === spec.shortname); + if (!entry) { + entry = { + name: spec.shortname, + title: spec.title, + spec, anomalies: [] + }; + report.push(entry); + } + entry.anomalies.push(anomaly); + } + } + break; + + case 'type': + for (const anomaly of anomalies) { + const type = getAnomalyType(anomaly.name); + let entry = report.find(entry => entry.type.name === anomaly.name); + if (!entry) { + entry = { + name: type.name.toLowerCase(), + title: type.title, + type, anomalies: [] + }; + report.push(entry); + } + entry.anomalies.push(anomaly); + } + break; + + case 'group': + for (const anomaly of anomalies) { + const group = anomalyToGroup[anomaly.name]; + let entry = report.find(entry => entry.group.name === group.name); + if (!entry) { + entry = { + name: group.name.toLowerCase(), + title: group.title, + group, anomalies: [] + }; + report.push(entry); + } + entry.anomalies.push(anomaly); + } + break; + } + + if (levels.length > 1) { + const itemsStructure = levels.slice(1).join('/'); + for (const entry of report) { + entry.items = structureResults(itemsStructure, entry.anomalies, crawlResults); + delete entry.anomalies; + } + } + return report; +} + + +function makeLowerCase(description) { + return description.charAt(0).toLowerCase() + description.slice(1); +} + +function pad(str, depth) { + while (depth > 1) { + str = ' ' + str; + depth -= 1; + } + return str; +} + +function serializeEntry(entry, format, depth = 0) { + let res; + if (format === 'json') { + res = Object.assign({}, entry); + if (entry.spec) { + res.spec = { + url: entry.spec.url, + shortname: entry.spec.shortname, + title: entry.spec.title + }; + } + if (entry.specs) { + res.specs = entry.specs.map(spec => Object.assign({ + url: spec.url, + shortname: spec.shortname, + title: spec.title + })); + } + if (entry.items) { + res.items = entry.items.map(item => serializeEntry(item, format, depth + 1)); + } + if (entry.anomalies) { + res.anomalies = entry.anomalies.map(anomaly => serializeEntry(anomaly, format, depth + 1)); + } + } + else if (format === 'markdown') { + res = ''; + if (entry.spec && entry.group) { + res = `While crawling [${entry.spec.title}](${entry.spec.crawled}), ${makeLowerCase(entry.group.description ?? entry.group.title)}:`; + } + else if (entry.spec && entry.type) { + res = `While crawling [${entry.spec.title}](${entry.spec.crawled}), ${makeLowerCase(entry.type.description ?? entry.type.title)}:`; + } + else if (entry.group) { + if (depth === 0) { + res = (entry.group.description ?? entry.group.title) + ':'; + } + else { + res = pad(`* ${entry.group.title}`, depth); + } + } + else if (entry.type) { + if (depth === 0) { + res = (entry.type.description ?? entry.type.title) + ':'; + } + else { + res = pad(`* ${entry.type.title}`, depth); + } + } + else if (entry.spec) { + if (depth === 0) { + res = `While crawling [${entry.spec.title}](${entry.spec.crawled}), the following anomalies were identified:`; + } + else { + res = pad(`* [${entry.spec.title}](${entry.spec.crawled})`, depth); + } + } + else if (entry.message) { + res = pad(`* [ ] ${entry.message}`, depth); + } + + for (const item of entry.items ?? []) { + res += '\n' + serializeEntry(item, format, depth + 1); + } + for (const anomaly of entry.anomalies ?? []) { + res += `\n` + serializeEntry(anomaly, format, depth + 1); + } + } + return res; +} + + +/** + * Format the structured report as JSON or markdown, or a combination of both + */ +function formatReport(format, report) { + if (format === 'json') { + // We'll return the report as is, trimming the information about specs to + // a reasonable minimum (the rest of the information can easily be + // retrieved from the crawl result if needed) + return report.map(entry => serializeEntry(entry, 'json')); + } + else if (format === 'issue') { + return report.map(entry => Object.assign({ + name: entry.name, + title: entry.title, + spec: entry.spec, + content: serializeEntry(entry, 'markdown') + })); + } + else if (format === 'full') { + return [ + { + title: 'Study report', + content: report.map(entry => +`## ${entry.title} +${serializeEntry(entry, 'markdown')}`) + } + ] + } +} + + +/** + * The report includes a set of anomalies. It can also be useful to know + * what things looked fine, in other words what other anomalies could have + * been reported in theory. This can typically be used to identify issue files + * created in the past and that now need to be deleted. + * + * Note: Some anomalies may hide others. For example, a WebIDL update can make + * the Web IDL invalid... and hide other WebIDL issues that may still exist in + * the spec. This function may return false negatives as a result. + */ +function getNamesOfNonReportedEntries(report, specs, what, structure) { + const groups = []; + anomalyGroups.filter(group => + what.includes('all') || + what.includes(group.name) || + group.types.find(type => what.includes(type.name))); + const types = []; + for (const group of anomalyGroups) { + if (what.includes('all') || + what.includes(group.name) || + group.types.find(type => what.includes(type.name))) { + groups.push(group); + for (const type of group.types) { + if (what.includes('all') || + what.includes(group.name) || + what.includes(type)) { + types.push(type); + } + } + } + } + + const levels = structure.split('/') + .map(level => level.replace(/\s+/g, '')); + let allNames; + switch (levels[0]) { + case 'flat': + // Not much we can say there + break; + case 'type+spec': + allNames = specs + .map(spec => types.map(type => `${spec.shortname}-${type.name.toLowerCase()}`)) + .flat(); + break; + case 'group+spec': + allNames = specs + .map(spec => groups.map(group => `${spec.shortname}-${group.name.toLowerCase()}`)) + .flat(); + break; + case 'spec': + allNames = specs.map(spec => spec.shortname); + break; + case 'type': + allNames = types.map(type => type.name); + break; + case 'group': + allNames = groups.map(group => group.name); + break; + } + return allNames.filter(name => !report.find(entry => entry.name === name)); +} + + +/** + * Main function that studies a crawl result and returns a structured + * report. + */ +export default async function study(specs, options = {}) { + // Copy the options object (we're going to add options on our own + // before calling other study methods) + options = Object.assign({}, options); + + const what = options.what ?? ['all']; + const structure = options.structure ?? 'type + spec'; + const format = options.format ?? 'issue'; + + if (!what.includes('all')) { + const validWhat = what.every(name => + anomalyGroups.find(g => g.name === name || g.types.find(t => t.name === name))); + if (!validWhat) { + throw new Error('Invalid `what` option'); + } + } + if (!reportStructures.find(s => structure.replace(/\s+/g, '') === s)) { + throw new Error('Invalid `structure` option'); + } + + // Only keep specs that caller wants to study + // (but note study functions that analyze references need the whole list!) + options.crawlResults = specs; + if (options.specs) { + specs = options.crawlResults.filter(spec => options.specs.find(shortname => shortname === spec.shortname)); + } + + // Anomalies are studied in groups of related anomalies, let's compute the + // studies that we need to run to answer the request + const groups = anomalyGroups.filter(group => + what.includes('all') || + what.includes(group.name) || + group.types.find(type => what.includes(type.name))); + + // Run studies and fill the anomaly report accordingly + let anomalies = []; + for (const group of groups) { + const studyResult = await group.study(specs, options); + const recordAnomaly = recordCategorizedAnomaly( + anomalies, group.name, group.types.map(t => t.name)); + studyResult.map(an => recordAnomaly(an.spec ?? an.specs, an.name, an.message)); + } + + // Only keep anomalies whose types we're interested in + anomalies = anomalies.filter(anomaly => + what.includes('all') || + what.includes(anomaly.name) || + what.includes(anomalyToGroup[anomaly.name].name)); + + // Now that we have a flat report of anomalies, + // let's structure and serialize it as requested + const report = structureResults(structure, anomalies, options.crawlResults); + + // And serialize it using the right format + const result = { + type: 'study', + date: (new Date()).toJSON(), + structure, + what, + stats: { + crawled: options.crawlResults.length, + studied: specs.length, + anomalies: anomalies.length + }, + results: formatReport(format, report), + looksGood: getNamesOfNonReportedEntries(report, specs, what, structure) + }; + + // Return the structured report + return result; +} \ No newline at end of file diff --git a/src/reporting/file-issue-for-review.js b/src/reporting/file-issue-for-review.js index 0d797fbe..f5cea3f8 100644 --- a/src/reporting/file-issue-for-review.js +++ b/src/reporting/file-issue-for-review.js @@ -1,280 +1,176 @@ -/* Takes a report of anomalies produced by Strudy, - creates a draft of an issue per spec and per anomaly type - and submits as a pull request in this repo if no existing one matches +/** + * Looks at draft issue files produced by the Strudy CLI in the issues folder + * and submits new/updated/deleted ones as pull requests in this repo if there + * is no pending pull request already. */ -import { loadCrawlResults } from '../lib/util.js'; -import studyBackrefs from '../lib/study-backrefs.js'; -import studyReferences from '../lib/study-refs.js'; -import isInMultiSpecRepository from '../lib/is-in-multi-spec-repo.js'; -import loadJSON from '../lib/load-json.js'; import path from 'node:path'; import fs from 'node:fs/promises'; +import { fileURLToPath } from "node:url"; import { execSync } from 'node:child_process'; -import Octokit from '../lib/octokit.js'; import matter from 'gray-matter'; +import { Command, InvalidArgumentError } from 'commander'; -const config = await loadJSON("config.json"); -const GH_TOKEN = config?.GH_TOKEN ?? process.env.GH_TOKEN; +/** + * Command-line execution parameters for calls to `execSync` + */ +const scriptPath = path.dirname(fileURLToPath(import.meta.url)); +const execParams = { + cwd: path.join(scriptPath, '..', '..'), + encoding: 'utf8' +}; -const MAX_PR_BY_RUN = 10; -const repoOwner = 'w3c'; -const repoName = 'strudy'; +/** + * Wrap "matter" issue report to create a suitable PR body + */ +function prWrapper(action, issueReport) { + if (action === 'add') { + return `This pull request was automatically created by Strudy upon detecting errors in ${issueReport.data.Title}. -const octokit = new Octokit({ - auth: GH_TOKEN - // log: console -}); - -function issueWrapper (spec, anomalies, anomalyType, crawl) { - const titlePrefix = isInMultiSpecRepository(spec, crawl.ed) ? `[${spec.shortname}] ` : ''; - let anomalyReport = ''; let title = ''; - switch (anomalyType) { - case 'brokenLinks': - title = `Broken references in ${spec.title}`; - anomalyReport = 'the following links to other specifications were detected as pointing to non-existing anchors'; - break; - case 'outdatedSpecs': - title = `Outdated references in ${spec.title}`; - anomalyReport = 'the following links were detected as pointing to outdated specifications'; - break; - case 'nonCanonicalRefs': - title = `Non-canonical references in ${spec.title}`; - anomalyReport = 'the following links were detected as pointing to outdated URLs'; - break; - case 'discontinuedReferences': - title = `Normative references to discontinued specs in ${spec.title}`; - anomalyReport = 'the following normative referenced were detected as pointing to discontinued specifications'; - break; - } - return { - title: titlePrefix + title, - content: ` -While crawling [${spec.title}](${spec.crawled}), ${anomalyReport}: -${anomalies.map(anomaly => `* [ ] ${anomaly.message}`).join('\n')} - -This issue was detected and reported semi-automatically by [Strudy](https://github.com/w3c/strudy/) based on data collected in [webref](https://github.com/w3c/webref/).` - }; -} - -function prWrapper (title, uri, repo, issueReport) { - return `This pull request was automatically created by Strudy upon detecting errors in ${title}. - -Please check that these errors were correctly detected, and that they have not already been reported in ${repo}. +Please check that these errors were correctly detected, and that they have not already been reported in ${issueReport.data.Repo}. If everything is OK, you can merge this pull request which will report the issue below to the repo, and update the underlying report file with a link to the said issue. -${issueReport} +${issueReport.stringify()} `; -} - - -const knownAnomalyTypes = ['brokenLinks', 'outdatedSpecs', 'nonCanonicalRefs', 'discontinuedReferences']; + } + else { + return `This pull request was automatically created by Strudy while analyzing ${issueReport.data.Title}. -let edCrawlResultsPath = process.argv[2]; -let trCrawlResultsPath = process.argv[3]; -const anomalyFilter = process.argv.slice(4).filter(p => !p.startsWith('--')); -const unknownAnomalyType = anomalyFilter.find(p => !knownAnomalyTypes.includes(p)); -if (unknownAnomalyType) { - console.error(`Unknown report type ${unknownAnomalyType} - known types are ${knownAnomalyTypes.join(', ')}`); - process.exit(1); -} -const anomalyTypes = anomalyFilter.length ? anomalyFilter : knownAnomalyTypes; -const updateMode = process.argv.includes('--update') ? 'update-untracked' : (process.argv.includes('--update-tracked') ? 'update-tracked' : false); -const dryRun = process.argv.includes('--dry-run'); -const noGit = dryRun || updateMode || process.argv.includes('--no-git'); +Please check that past errors listed below have indeed been corrected, and that the related issue in ${issueReport.data.Repo} has been closed accordingly. -if (!noGit && !GH_TOKEN) { - console.error('GH_TOKEN must be set to some personal access token as an env variable or in a config.json file'); - process.exit(1); -} +If everything looks OK, you can merge this pull request to delete the issue file. -// Target the index file if needed -if (!edCrawlResultsPath.endsWith('index.json')) { - edCrawlResultsPath = path.join(edCrawlResultsPath, 'index.json'); -} -if (!trCrawlResultsPath.endsWith('index.json')) { - trCrawlResultsPath = path.join(trCrawlResultsPath, 'index.json'); +${issueReport.stringify()} +`; + } } -let existingReports = []; -if (updateMode) { - console.log('Compiling list of relevant existing issue reports…'); - // List all existing reports to serve as a comparison point - // to detect if any report can be deleted - // if the anomalies are no longer reported - const reportFiles = (await fs.readdir('issues')).map(p => 'issues/' + p); - for (const anomalyType of anomalyTypes) { - existingReports = existingReports.concat(reportFiles.filter(p => p.endsWith(`-${anomalyType.toLowerCase()}.md`))); +/** + * Parse the maximum number of pull requests option as integer + */ +function myParseInt(value) { + const parsedValue = parseInt(value, 10); + if (isNaN(parsedValue)) { + throw new InvalidArgumentError('Not a number.'); } - console.log('- done'); + return parsedValue; } -const nolongerRelevantReports = new Set(existingReports); -// Donwload automatic map of multipages anchors in HTML spec -let htmlFragments = {}; -try { - console.log('Downloading HTML spec fragments data…'); - htmlFragments = await fetch('https://html.spec.whatwg.org/multipage/fragment-links.json').then(r => r.json()); - console.log('- done'); -} catch (err) { - console.log('- failed: could not fetch HTML fragments data, may report false positive broken links on HTML spec'); -} +const program = new Command(); +program + .description('File added/updated/deleted issue files as individual GitHub pull requests') + .option('--dry-run', 'run the script without creating any actual pull request') + .option('-m, --max ', 'maximum number of pull requests to create/update', myParseInt, 10) + .showHelpAfterError('(run with --help for usage information)') + .addHelpText('after', ` +Minimal usage example: + To create up to 10 pull requests from local issue files, run: + $ node file-issue-for-review.js + +Description: + The command looks into the \`issues\` folder to find files that have been + added, updated or deleted, and that have not yet been committed to the + repository. For each of them, it creates a pull request on GitHub, unless one + already exists. + + The \`gh\` and \`git\` CLI commands must be available and functional. The + command will push Git updates to the \`origin\` remote, which must exist. + +Usage notes for some of the options: +--dry-run + Run the script without committing anything, and without creating any actual + pull request. The option is meant for debugging. + +-m, --max + Maximum number of pull requests to create. Defaults to 10. + + You may set the option to 0 to create as many pull requests as needed. You + may want to check that there aren't too many pull requests to create first, + though! +`) + .action(async (options) => { + function execOrLog(cmd) { + options.dryRun ? console.log(cmd) : execSync(cmd, execParams); + } -console.log(`Opening crawl results ${edCrawlResultsPath} and ${trCrawlResultsPath}…`); -const crawl = await loadCrawlResults(edCrawlResultsPath, trCrawlResultsPath); -console.log('- done'); -console.log('Running references analysis…'); -// TODO: if we're not running all the reports, this could run only the -// relevant study function -const results = studyBackrefs(crawl.ed, crawl.tr, htmlFragments).concat(studyReferences(crawl.ed)); -console.log('- done'); -const currentBranch = noGit || execSync('git branch --show-current', { encoding: 'utf8' }).trim(); -const needsPush = {}; -for (const anomalyType of anomalyTypes) { - const anomalies = results.filter(r => r.name === anomalyType); - const specs = [...new Set(anomalies.map(a => a.specs.map(s => s.url)).flat())]; - for (const url of specs) { - const specAnomalies = anomalies.filter(a => a.specs[0].url === url); - const spec = specAnomalies[0].specs[0]; - console.log(`Compiling ${anomalyType} report for ${spec.title}…`); - // if we don't know the repo, we can't file an issue - if (!spec.nightly?.repository) { - console.log(`No known repo for ${spec.title}, skipping`); - continue; + if (options.dryRun) { + console.log('DRY RUN!'); + console.log('The command won\'t make any actual change.'); } - if (spec.standing === "discontinued") { - console.log(`${spec.title} is discontinued, skipping`); - continue; + console.log('How many pull requests can we use to change the world?'); + console.log(`- nb pull requests that we may create: ${options.max}`); + + console.log('On which Git branch are we?'); + const currentBranch = execSync('git branch --show-current', execParams).trim(); + console.log(`- current branch: ${currentBranch}`); + + // Possibly useful reminder about calls to `filter` below: + // `split` on an empty string does not return an empty array! + console.log('How many issue files ought to be reported?'); + const toadd = execSync('git diff --name-only --diff-filter=d issues/*.md', execParams) + .trim().split('\n').filter(x => !!x); + console.log(`- nb issue files to add/update: ${toadd.length}`); + const todelete = execSync('git diff --name-only --diff-filter=D issues/*.md', execParams) + .trim().split('\n').filter(x => !!x); + console.log(`- nb issue files to delete: ${todelete.length}`); + const toreport = toadd.map(name => { return { action: 'add', filename: name }; }) + .concat(todelete.map(name => { return { action: 'delete', filename: name }; })) + .sort((e1, e2) => e1.filename.localeCompare(e2.filename)); + + if (toreport.length === 0) { + console.log('No issue files to report'); } - const issueMoniker = `${spec.shortname}-${anomalyType.toLowerCase()}`; - // is there already a file with that moniker? - const issueFilename = path.join('issues/', issueMoniker + '.md'); - let tracked = 'N/A'; - let existingReportContent; + + let reported = 0; try { - if (!(await fs.stat(issueFilename)).isFile()) { - console.error(`${issueFilename} already exists but is not a file`); - continue; - } else { - if (!updateMode) { - console.log(`${issueFilename} already exists, bailing`); + console.log('Create pull requests as needed...'); + for (const entry of toreport) { + // Look for a related PR that may still be pending + const issueMoniker = entry.filename.match(/^issues\/(.*)\.md$/)[1]; + const pendingPRStr = execSync(`gh pr list --head ${issueMoniker} --json number,headRefName`, execParams); + const pendingPR = JSON.parse(pendingPRStr)[0]; + if (pendingPR) { + console.log(`- skip ${entry.filename}, a pending PR already exists (#${pendingPR.number}`); continue; - } else { - nolongerRelevantReports.delete(issueFilename); - try { - const existingReport = matter(await fs.readFile(issueFilename, 'utf-8')); - tracked = existingReport.data.Tracked; - existingReportContent = existingReport.content; - // only update tracked or untracked reports based on - // CLI parameter - if ((updateMode === 'update-untracked' && tracked !== 'N/A') || (updateMode === 'update-tracked' && tracked === 'N/A')) { - continue; - } - } catch (e) { - console.error('Failed to parse existing content', e); - continue; - } } - } - } catch (err) { - // Intentionally blank - } - // if not, we create the file, add it in a branch - // and submit it as a pull request to the repo - const { title, content: issueReportContent } = issueWrapper(spec, specAnomalies, anomalyType, crawl); - if (updateMode) { - if (existingReportContent) { - const existingAnomalies = existingReportContent.split('\n').filter(l => l.startsWith('* [ ] ')).map(l => l.slice(6)); - if (existingAnomalies.every((a, i) => specAnomalies[i] === a) && existingAnomalies.length === specAnomalies.length) { - // no substantial change, skip - console.log(`Skipping ${title}, no change`); - continue; + + let issueReport; + if (entry.action === 'add') { + issueReport = matter(await fs.readFile(entry.filename, 'utf-8')); } - } else { - // in update mode, we only care about existing reports - continue; - } - } - const issueReportData = matter(issueReportContent); - issueReportData.data = { - Repo: spec.nightly.repository, - Tracked: tracked, - Title: title - }; - let issueReport; - try { - issueReport = issueReportData.stringify(); - } catch (err) { - console.error(`Failed to stringify report of ${anomalyType} for ${title}: ${err}`, issueReportContent); - continue; - } - if (dryRun) { - console.log(`Would add ${issueFilename} with`); - console.log(issueReport); - console.log(); - } else { - await fs.writeFile(issueFilename, issueReport, 'utf-8'); - try { - if (!noGit) { - console.log(`Committing issue report as ${issueFilename} in branch ${issueMoniker}…`); - execSync(`git checkout -b ${issueMoniker}`); - execSync(`git add ${issueFilename}`); - execSync(`git commit -m "File report on ${issueReportData.data.Title}"`); - needsPush[issueMoniker] = { title: issueReportData.data.Title, report: issueReport, repo: spec.nightly.repository, specTitle: spec.title, uri: spec.crawled }; - console.log('- done'); - execSync(`git checkout ${currentBranch}`); + else { + // File was deleted, retrieve its previous content from the HEAD + issueReport = matter(await execSync(`git show HEAD:${entry.filename}`, execParams)); + } + + console.log(`- create PR for ${entry.filename}`); + execOrLog(`git checkout -b ${issueMoniker}`); + execOrLog(`git add ${entry.filename}`); + execOrLog(`git commit -m "${entry.action === 'add' ? 'File' : 'Delete'} report on ${issueReport.data.Title}"`); + execOrLog(`git push origin ${issueMoniker}`); + + const prBodyFile = path.join(execParams.cwd, '__pr.md') + const prBody = prWrapper(entry.action, issueReport); + await fs.writeFile(prBodyFile, prBody, 'utf8'); + try { + execOrLog(`gh pr create --body-file __pr.md --title "${entry.action === 'add' ? 'File' : 'Delete'} report on ${issueReport.data.Title.replace(/"/g, '')}"`); + } + finally { + await fs.rm(prBodyFile, { force: true }); + } + + reported += 1; + if (options.max > 0 && reported > options.max) { + break; } - } catch (err) { - console.error(`Failed to commit error report for ${spec.title}`, err); - await fs.unlink(issueFilename); - execSync(`git checkout ${currentBranch}`); } } - } -} -if (nolongerRelevantReports.size) { - console.log('The following reports are no longer relevant, deleting them', [...nolongerRelevantReports]); - for (const issueFilename of nolongerRelevantReports) { - await fs.unlink(issueFilename); - } -} -if (Object.keys(needsPush).length) { - let counter = 0; - for (const branch in needsPush) { - if (counter > MAX_PR_BY_RUN) { - delete needsPush[branch]; - continue; + finally { + console.log(`- get back to the initial Git branch ${currentBranch}`); + execOrLog(`git checkout ${currentBranch}`, execParams); + console.log(`- nb PR ${options.dryRun ? 'that would be ' : ''}created: ${reported}`); } + }); - // is there already a pull request targetting that branch? - const { data: pullrequests } = (await octokit.rest.pulls.list({ - owner: repoOwner, - repo: repoName, - head: `${repoOwner}:${branch}` - })); - if (pullrequests.length > 0) { - console.log(`A pull request from branch ${branch} already exists, bailing`); - delete needsPush[branch]; - } - counter++; - } -} -if (Object.keys(needsPush).length) { - console.log(`Pushing new branches ${Object.keys(needsPush).join(' ')}…`); - execSync(`git push origin ${Object.keys(needsPush).join(' ')}`); - console.log('- done'); - for (const branch in needsPush) { - const { title, specTitle, uri, repo, report } = needsPush[branch]; - console.log(`Creating pull request from branch ${branch}…`); - await octokit.rest.pulls.create({ - owner: repoOwner, - repo: repoName, - title, - body: prWrapper(specTitle, uri, repo, report), - head: `${repoOwner}:${branch}`, - base: 'main' - }); - console.log('- done'); - } -} +program.parseAsync(process.argv); \ No newline at end of file diff --git a/strudy.js b/strudy.js index a766d8fb..bbd4bdd2 100644 --- a/strudy.js +++ b/strudy.js @@ -7,27 +7,25 @@ * Provided Strudy was installed as a global package, the spec analyzer can be * called directly through: * - * `strudy [options] [report]` - * - * Use the `--help` option for usage instructions. + * `strudy --help` * * If Strudy was not installed as a global package, call: * - * `node strudy.js [options] [report]` + * `node strudy.js --help` * * @module crawler */ -import { Command } from 'commander'; +import { Command, InvalidArgumentError } from 'commander'; import { constants as fsConstants } from 'node:fs'; import fs from 'node:fs/promises'; -import pandoc from 'node-pandoc'; import path from 'node:path'; import satisfies from 'semver/functions/satisfies.js'; import packageContents from './package.json' with { type: 'json' }; -import studyCrawl from './src/lib/study-crawl.js'; -import generateReport from './src/lib/generate-report.js'; +import study from './src/lib/study.js'; import loadJSON from './src/lib/load-json.js'; +import { expandCrawlResult } from 'reffy'; +import matter from 'gray-matter'; // Warn if version of Node.js does not satisfy requirements const { version, engines } = packageContents; @@ -48,55 +46,48 @@ async function exists(file) { } } - -async function isStudyReport(file) { - const fd = await fs.open(file, 'r'); - try { - const buff = Buffer.alloc(1024); - await fd.read(buff, 0, 1024); - const str = buff.toString(); - if (str.match(/"type"\s*:\s*"study"/)) { - return true; - } - } - catch { - return false; - } - finally { - await fd.close(); +function myParseInt(value) { + const parsedValue = parseInt(value, 10); + if (isNaN(parsedValue)) { + throw new InvalidArgumentError('Not a number.'); } + return parsedValue; } - const program = new Command(); program .name('strudy') - .description('Analyzes a crawl report generated by Reffy') - .version(version) - .usage('[options] ') - .argument('', 'Path/URL to crawl report or study file') - .option('-f, --format ', 'create a markdown/HTML report from study file') - .option('-d, --diff ', 'create a diff from some reference study') + .description('Analyzes a crawl report generated by Reffy to detect anomalies in specifications') + .version(version); + +program + .command('inspect') + .alias('study') + .argument('', 'Path/URL to crawl report') + .option('-f, --format ', 'report markdown or json', 'markdown') + .option('-i, --issues ', 'report issues as markdown files in the given folder') + .option('-m, --max ', 'maximum number of issue files to create/update', myParseInt, 0) .option('-s, --spec ', 'restrict analysis to given specs') - .option('--dep', 'create a dependencies report') - .option('--onlynew', 'only include new diff in the diff report') - .option('--perissue', 'create a markdown/HTML report per issue') - .option('--tr ', 'Path/URL to crawl report on published specs') + .option('--structure ', 'report structure', 'type+spec') + .option('--tr ', 'path/URL to crawl report on published specs') + .option('--update-mode ', 'what issue files to update', 'new') + .option('-w, --what ', 'what to analyze', ['all']) .showHelpAfterError('(run with --help for usage information)') .addHelpText('after', ` Minimal usage example: To study a crawl report in current folder: - $ strudy . + $ strudy inspect . Description: Analyzes a crawl report generated by Reffy and create a report with potential anomalies in each of the specs contained in the crawl report. - The report is written to the console as a serialized JSON object or as a - markdown or HTML report depending on command options. + Depending on command options, the report is either written to the console as + a serialized JSON object or as a markdown report (see the --format option), + or written to individual issues files in a folder (see the --issues option). Argument: - + Path to the crawl report to analyze. If the path leads to a folder, Strudy will look for an "ed/index.json" file under that folder first (if it exists, it will also look for a possible "tr/index.json" file to set the --tr option), @@ -105,51 +96,65 @@ Argument: Usage notes for some of the options: -f, --format Tell Strudy to return a report in the specified format. Format may be one of - "json" (default when option is not set), "markdown" or "html". + "markdown" (default when option is not set) or "json". + + The --format option cannot be set to "json" if the --issues option is set. + +-i, --issues + Tell Strudy to report the anomalies in anomaly files in the given folder. + An anomaly file gets created for and named after keys at the first level of + the report (see --structure option). - When the option is specified to either "markdown" or "html", the report - pointed to by may be a JSON file that contains a Strudy report. + Anomaly files are in markdown. The --format option must be set to "markdown", + or not set at all. --d, --diff - Tell Strudy tool to return a diff from the provided reference Strudy report. - must point to a Strudy report. + Anomaly files start with metadata, used to convert the file to a GitHub issue + and track the resolution of the issue afterwards: "Repo" sets the repository + for the issue, "Title" the title of the issue, and "Tracked" the URL of the + issue, once created. - When the option is specified, the report pointed to by may be a JSON - file that contains a Strudy report. + Existing anomaly files in the folder are preserved by default, set the + --update-mode option to change that behavior. - Diff reports are in markdown and the "--format" option, if specified, must be - "markdown". +-m, --max + Maximum number of issue files to add or update. Defaults to 0, which means + "no limit". - The --diff option and the --dep option cannot both be set. + This setting should only be useful when combined with --issues to create + issue files in batches. It may also be set in the absence of --issues, in + which case it restricts the number of entries at the first level of the + report (see --structure). -s, --spec - Valid spec values may be a shortname, a URL, or a relative path to JSON file - that contains a list of spec URLs and/or shortnames. Shortnames may be the - shortname of the spec series. + Valid spec values may be a shortname, a URL, or a relative path to a JSON + file that contains a list of spec URLs and/or shortnames. Shortnames may be + the shortname of the spec series. Use "all" to include all specs. This is equivalent to not setting the option at all. For instance: - $ strudy . --spec picture-in-picture https://w3c.github.io/mediasession/ - ---dep - Tell Strudy to return a dependencies report. - - When the option is specified, the report pointed to by may be a JSON - file that contains a Strudy report. - - Dependencies reports are in markdown and the "--format" option, if specified, - must be "markdown". - - The --diff option and the --dep option cannot both be set. - ---perissue - Markdown/HTML reports are per spec by default. Set this option to tell Strudy - to generate markdown/HTML reports per issue instead. - - The --diff option must not be set. - The --format option must be set to either "markdown" or "html". + $ strudy inspect . --spec picture-in-picture https://w3c.github.io/mediasession/ + +--structure + Describes the hierarchy in the report(s) that Strudy returns. Possible values: + "flat" no level, report anomalies one by one + "type+spec" one level with one entry per type and spec (default) + "group+spec/type" first level per group and spec, second level per type + "spec/type" first level per spec, second level per type + "spec/group/type" first level per spec, second level per group, third level + per type + "type/spec" first level per type, second level per spec + "group/type/spec" first level per group, second level per type, third level + per spec + "group/spec/type" first level per group, second level per spec, third level + per type + + Last level contains the actual list of anomalies. + + Note: an anomaly always has a "type". Related anomaly types are grouped in an + anomaly "group". For example, "brokenLinks" and "datedUrls" both belong to + the "backrefs" group (also see the --what option). --tr Useful for Strudy to refine its broken link analysis when crawl report @@ -159,113 +164,199 @@ Usage notes for some of the options: version lags behind the Editor's Draft may have issues of the form "The term exists in the /TR version but no longer exists in the Editor's Draft". - Note that if is a link to a folder, the tool will automatically look + Note that if is a link to a folder, the tool will automatically look for the TR crawl report in a "tr" subfolder and set itself. + +--update-mode + Tell Strudy what issue files to update when --issues is set and an issue file + already exists for the issue at hand. Possible values are: + "new" (default) preserve existing files + "old" preserve existing files but get rid of old ones for which + study reveals no more issue + "untracked" update existing files that do not have a "Tracked" URL + "tracked" update existing files that have a "Tracked" URL + "all" update all existing files, deleting them when needed + + Strudy will always create new issue files, the mode only changes the behavior + for existing issue files. + + The --issues option must be set. + +-w, --what + Tell Strudy which anomalies to analyze. Values can be the names of anomaly + types or the name of anomaly groups. The value "all" (default) tells Strudy + to analyze and report on all possible anomalies. + + The list of anomaly types and groups will likely evolve over time, see actual + list in src/lib/study.js. + + Examples: + "-w algorithms -w backrefs" to study algorithms and references to other specs + "-w unknownSpecs" to study links to unknown specs `) .action(async (report, options) => { - if (options.format && !['json', 'markdown', 'html'].includes(options.format)) { + // Check options + if (options.format && !['json', 'markdown'].includes(options.format)) { console.error(`Unsupported --format option "${options.format}". -Format must be one of "json", "markdown" or "html".`) +Format must be one of "json" or "markdown".`) process.exit(2); } - if (options.diff && options.format && (options.format !== 'markdown')) { - console.error(`Diff reports are always in markdown. -The --format option can only be set to "markdown" when --diff is used.`); + if (options.format !== 'markdown' && options.issues) { + console.error(`The --format option can only be set to "markdown" when --issues is used.`); process.exit(2); } - if (options.diff && options.perissue) { - console.error('The --diff and --perissue options cannot both be set.'); + if (options.updateMode && !['new', 'old', 'untracked', 'tracked', 'all'].includes(options.updateMode)) { + console.error(`Unsupported --update-mode option "${options.updateMode}"`); process.exit(2); } - if (options.perissue && !['markdown', 'html'].includes(options.format)) { - console.error('The --format option must be "markdown" or "html" when --perissue is set.') + if (options.updateMode !== 'new' && !options.issues) { + console.error('The --update-mode option can only be set when --issues is set'); process.exit(2); } - if (options.dep && options.diff) { - console.error('The --dep and --diff options cannot both be set.'); + if (options.issues && !await exists(options.issues)) { + console.error(`Could not find/access the folder to store anomalies: ${options.issues}`) process.exit(2); } - let edReport = report; - let trReport = options.tr; + // Load (and expand) the crawl results + let edReportFile = report; + let trReportFile = options.tr; if (!report.endsWith('.json')) { if (await exists(path.join(report, 'ed'))) { - edReport = path.join(report, 'ed'); - if (!trReport && await exists(path.join(report, 'tr'))) { - trReport = path.join(report, 'tr'); + edReportFile = path.join(report, 'ed'); + if (!trReportFile && await exists(path.join(report, 'tr'))) { + trReportFile = path.join(report, 'tr'); } } - edReport = path.join(edReport, 'index.json'); + edReportFile = path.join(edReportFile, 'index.json'); } - if (!await exists(edReport)) { + if (!await exists(edReportFile)) { console.error(`Could not find/access crawl/study report: ${report}`); process.exit(2); } - if (trReport) { - if (!trReport.endsWith('.json')) { - trReport = path.join(trReport, 'index.json'); + if (trReportFile) { + if (!trReportFile.endsWith('.json')) { + trReportFile = path.join(trReportFile, 'index.json'); } - if (!await exists(trReport)) { + if (!await exists(trReportFile)) { console.error(`Could not find/access TR crawl report: ${options.tr}`); process.exit(2); } } - // Specified report may already be the study report - // To find out, we'll do a bit of content sniffing to avoid loading the - // report twice (report file may be somewhat large). - let study = null; - const isStudy = await isStudyReport(edReport); - if (isStudy) { - study = await loadJSON(edReport); + let edReport = await loadJSON(edReportFile); + edReport = await expandCrawlResult(edReport, path.dirname(edReportFile)); + + let trReport; + if (trReportFile) { + trReport = await loadJSON(trReportFile); + trReport = await expandCrawlResult(trReport, path.dirname(trReportFile)); } - if (!study) { - const studyOptions = { - include: options.spec ?? null, - trResults: trReport + // Create a structured anomaly report out of the crawl report + const anomaliesReport = await study(edReport.results, { + what: options.what, + structure: options.structure, + format: options.format === 'json' ? + 'json' : + (options.issues ? 'issue' : 'full'), + trResults: trReport?.results ?? [], + specs: options.spec + }); + + // Output the structured anomaly report + if (options.format === 'json') { + // Caller wants a JSON report. We'll just trim the number of anomalies + // in the first level to the requested maximum as needed + if (options.max > 0) { + anomaliesReport.results = anomaliesReport.results.slice(0, options.max); } - study = await studyCrawl(edReport, studyOptions); + console.log(JSON.stringify(anomaliesReport, null, 2)); } + else if (options.issues) { + // Caller wants to add/update issue files in the provided folder. + // Issue files are formatted with the gray-matter library to save useful + // metadata as front matter in the file. + let reported = 0; + for (const entry of anomaliesReport.results) { + const filename = path.join(options.issues, `${entry.name}.md`); + let existingReport; + let tracked = 'N/A'; + if (await exists(filename)) { + if (options.updateMode === 'new' || + options.updateMode === 'old') { + console.warn(`- skip ${filename}, file already exists`); + continue; + } + existingReport = matter(await fs.readFile(filename, 'utf-8')); + tracked = existingReport.data.Tracked ?? 'N/A'; + if ((options.updateMode === 'tracked' && tracked === 'N/A') || + (options.updateMode === 'untracked' && tracked !== 'N/A')) { + console.warn(`- skip ${filename}, file already exists, with Tracked="${tracked}"`); + continue; + } + } - let res = null; - if (options.diff || options.dep) { - // Generate diff/dependencies report - res = await generateReport(study, { - depReport: options.dep, - diffReport: !!options.diff, - refStudyFile: options.diff, - onlyNew: options.onlynew - }); - } - else if (options.format && options.format !== 'json') { - // Generate markdown report and possibly an HTML report - const generateOptions = { perSpec: !options.perissue }; - const markdown = await generateReport(study, generateOptions); - - if (options.format === 'html') { - const template = path.join(__dirname, 'src', 'templates', - `report${options.perissue ? '-perissue' : ''}-template.html`); - const promise = new Promise((resolve, reject) => { - let args = [ - '-f', 'markdown', '-t', 'html5', '--section-divs', '-s', - '--template', template - ]; - pandoc(markdown, args, (err, result) => - err ? reject(err) : resolve(result)); - }); - res = await promise; + const content = ` +${entry.content} + +This issue was detected and reported semi-automatically by [Strudy](https://github.com/w3c/strudy/) based on data collected in [webref](https://github.com/w3c/webref/).`; + // Note from @tidoust: One day, I'll understand how to set up Git and + // code so that all line endings end up being "\n" even on Windows + // machines. In the meantime, note that local issue files may well + // contain "\r\n" on Windows machines. + if (existingReport?.content.replace(/\r\n/g, '\n').trim() === content.trim()) { + console.warn(`- skip ${filename}, file already exists, no change`); + continue; + } + + const issueReport = matter(content); + issueReport.data = { + Title: entry.title, + Tracked: tracked + }; + if (entry.spec) { + const spec = edReport.results.find(spec => spec.url === entry.spec.url); + if (spec.nightly?.repository) { + issueReport.data.Repo = spec.nightly.repository; + } + } + console.warn(`- ${existingReport ? 'update' : 'add'} ${filename}`); + const filecontent = issueReport.stringify(); + await fs.writeFile(filename, filecontent, 'utf-8'); + reported += 1; + if (options.max > 0 && reported >= options.max) { + break; + } } - else { - res = markdown; + + if (options.updateMode === 'old' || + options.updateMode === 'all') { + const reportFiles = await fs.readdir(options.issues); + const todelete = reportFiles.filter(file => + anomaliesReport.looksGood.find(name => file === `${name}.md`)); + for (const file of todelete) { + const filename = path.join(options.issues, file); + console.warn(`- delete ${filename}, no more anomalies detected`); + await fs.rm(filename, { force: true }); + } } } else { - // Output the study report to the console - res = JSON.stringify(study, null, 2); + // Caller wants a markdown report written to the console. + // The anomalies report should already be a "full" one (so only one + // result item at the first level). + const content = anomaliesReport.results[0].content; + let reported = 0; + for (const entry of content) { + console.log(entry); + console.log(); + reported += 1; + if (options.max > 0 && reported >= options.max) { + break; + } + } } - - console.log(res); }); program.parseAsync(process.argv); diff --git a/test/cli.js b/test/cli.js index a48b3c2d..4c6ac976 100644 --- a/test/cli.js +++ b/test/cli.js @@ -26,19 +26,51 @@ describe(`Strudy's CLI`, function () { it('reports usage help when asked', async function () { const { stdout, stderr } = await strudy(`--help`); - assert.match(stdout, /^Usage: strudy \[options\] /); + assert.match(stdout, /^Usage: strudy \[options\] \[command\]/); assert.deepEqual(stderr, ''); }); - it('expects a report argument', async function () { - const { stdout, stderr } = await strudy(``); - assert.match(stderr, /error: missing required argument 'report'/); - assert.deepEqual(stdout, ''); - }); + describe(`The "inspect" command`, function () { + it('expects a crawl report as argument', async function () { + const { stdout, stderr } = await strudy(`inspect`); + assert.match(stderr, /error: missing required argument 'crawl'/); + assert.deepEqual(stdout, ''); + }); + + it('reports an error when provided crawl report does not exist', async function () { + const { stdout, stderr } = await strudy(`inspect notareport`); + assert.match(stderr, /Could not find/); + assert.deepEqual(stdout, ''); + }); + + it('reports an error when provided issues folder does not exist', async function () { + const { stdout, stderr } = await strudy(`inspect test/data/empty.json --issues notafolder`); + assert.match(stderr, /Could not find\/access the folder to store anomalies/); + assert.deepEqual(stdout, ''); + }); + + it('refuses formats other than "json" or "markdown"', async function () { + const { stdout, stderr } = await strudy(`inspect test/data/empty.json --format html`); + assert.match(stderr, /Unsupported --format option/); + assert.deepEqual(stdout, ''); + }); + + it('rejects incompatible format and issues options', async function () { + const { stdout, stderr } = await strudy(`inspect test/data/empty.json --format json --issues issues`); + assert.match(stderr, /The --format option can only be set to "markdown" when --issues is used/); + assert.deepEqual(stdout, ''); + }); + + it('reports an error when update-mode is set but not the issues option', async function () { + const { stdout, stderr } = await strudy(`inspect test/data/empty.json --update-mode all`); + assert.match(stderr, /The --update-mode option can only be set when --issues is set/); + assert.deepEqual(stdout, ''); + }); - it('reports an error when provided report does not exist', async function () { - const { stdout, stderr } = await strudy(`notareport`); - assert.match(stderr, /Could not find/); - assert.deepEqual(stdout, ''); + it('reports an error when update-mode is set to some unknown mode', async function () { + const { stdout, stderr } = await strudy(`inspect test/data/empty.json --issues issues --update-mode notamode`); + assert.match(stderr, /Unsupported --update-mode option/); + assert.deepEqual(stdout, ''); + }) }); }); \ No newline at end of file diff --git a/test/data/empty.json b/test/data/empty.json new file mode 100644 index 00000000..914332ed --- /dev/null +++ b/test/data/empty.json @@ -0,0 +1,3 @@ +{ + "results": [] +} \ No newline at end of file diff --git a/test/study-algorithms.js b/test/study-algorithms.js new file mode 100644 index 00000000..c5b19888 --- /dev/null +++ b/test/study-algorithms.js @@ -0,0 +1,37 @@ +import study from '../src/lib/study-algorithms.js'; +import { assertNbAnomalies, assertAnomaly } from './util.js'; + +describe('The algorithms analyser', () => { + const specUrl = 'https://www.w3.org/TR/spec'; + const specUrl2 = 'https://www.w3.org/TR/spec2'; + + function toCrawlResult(algorithms) { + return [{ url: specUrl, algorithms }]; + } + + it('reports no anomaly if there are no algorithms', () => { + const crawlResult = toCrawlResult([]); + const report = study(crawlResult); + assertNbAnomalies(report, 0); + }); + + it('reports an error when a step resolves a promise in parallel', () => { + const crawlResult = toCrawlResult([ + { + html: 'The encodingInfo() method MUST run the following steps:', + rationale: 'if', + steps: [ + { html: 'Let p be a new promise.' }, + { html: 'In parallel, run the Create a MediaCapabilitiesEncodingInfo algorithm with configuration and resolve p with its result.' }, + { html: 'Return p.' } + ] + } + ]); + const report = study(crawlResult); + assertAnomaly(report, 0, { + name: 'missingTaskForPromise', + message: 'The algorithm that starts with "The encodingInfo() method MUST run the following steps:" has a parallel step that resolves/rejects a promise directly', + spec: { url: 'https://www.w3.org/TR/spec' } + }); + }); +}); \ No newline at end of file diff --git a/test/study-backrefs.js b/test/study-backrefs.js index ea755137..8e6e4eac 100644 --- a/test/study-backrefs.js +++ b/test/study-backrefs.js @@ -3,7 +3,7 @@ */ /* global describe, it */ -import studyBackrefs from '../src/lib/study-backrefs.js'; +import study from '../src/lib/study-backrefs.js'; import { assertNbAnomalies, assertAnomaly } from './util.js'; const specEdUrl = 'https://w3c.github.io/spec/'; @@ -48,28 +48,33 @@ const populateSpec = (url, ids, links, dfns) => { function toCrawlResults (ids, links, trIds = ids) { return { - ed: [populateSpec(specEdUrl, toFullIds(specEdUrl, ids), []), - populateSpec(specEdUrl2, [], toLinks(specEdUrl, links))], - tr: [populateSpec(specEdUrl, toFullIds(specEdUrl, trIds), [])] + ed: [ + populateSpec(specEdUrl, toFullIds(specEdUrl, ids), []), + populateSpec(specEdUrl2, [], toLinks(specEdUrl, links)) + ], + tr: [ + populateSpec(specEdUrl, toFullIds(specEdUrl, trIds), []) + ] }; } describe('The links analyser', () => { - it('reports no anomaly if links are valid', () => { + it('reports no anomaly if links are valid', async () => { const ids = ['validid']; const crawlResult = toCrawlResults(ids, ids); - const report = studyBackrefs(crawlResult.ed, crawlResult.tr); + const report = await study(crawlResult.ed, { htmlFragments: {} }); assertNbAnomalies(report, 0); }); - it('reports a broken link', () => { + it('reports a broken link', async () => { const ids = ['validid']; const crawlResult = toCrawlResults([], ids); - const report = studyBackrefs(crawlResult.ed, crawlResult.tr); + const report = await study(crawlResult.ed, { htmlFragments: {} }); assertNbAnomalies(report, 1); assertAnomaly(report, 0, { - category: 'links', - message: specEdUrl + '#' + ids[0] + name: 'brokenLinks', + message: specEdUrl + '#' + ids[0], + spec: { url: 'https://www.w3.org/TR/spec2/' } }); }); diff --git a/test/study-dfns.js b/test/study-dfns.js new file mode 100644 index 00000000..b656c52c --- /dev/null +++ b/test/study-dfns.js @@ -0,0 +1,39 @@ +import studyDefinitions from '../src/lib/study-dfns.js'; +import { assertNbAnomalies, assertAnomaly } from './util.js'; + +describe('The definitions analyser', () => { + const specUrl = 'https://www.w3.org/TR/spec'; + const specUrl2 = 'https://www.w3.org/TR/spec2'; + + function toCrawlResult({ css = {}, dfns = [], idlparsed = {} }) { + const crawlResult = [{ + url: specUrl, + css, dfns, idlparsed + }]; + return crawlResult; + } + + it('reports no anomaly if there are no definitions', () => { + const crawlResult = toCrawlResult({}); + const report = studyDefinitions(crawlResult); + assertNbAnomalies(report, 0); + }); + + it('reports missing definition anomalies from CSS extracts', () => { + const crawlResult = toCrawlResult({ + css: { + warnings: [{ + msg: 'Missing definition', + name: 'no-def', + type: 'value' + }] + } + }); + const report = studyDefinitions(crawlResult); + assertAnomaly(report, 0, { + name: 'missingDfns', + message: '`no-def` with type `value`', + spec: { url: 'https://www.w3.org/TR/spec' } + }); + }); +}); \ No newline at end of file diff --git a/test/study-refs.js b/test/study-refs.js index 11a9db18..3250e183 100644 --- a/test/study-refs.js +++ b/test/study-refs.js @@ -3,7 +3,7 @@ */ /* global describe, it */ -import studyReferences from '../src/lib/study-refs.js'; +import study from '../src/lib/study-refs.js'; import { assertNbAnomalies, assertAnomaly } from './util.js'; const specEdUrl = 'https://w3c.github.io/spec/'; @@ -14,6 +14,9 @@ function toRefs (name, url) { return [ {name, url} ]; } +const toTr = url => url.replace( + 'https://w3c.github.io', + 'https://www.w3.org/TR'); const populateSpec = (url, refs = [], standing = "good", obsoletedBy) => { const shortname = url.slice(0, -1).split('/').pop(); @@ -25,6 +28,9 @@ const populateSpec = (url, refs = [], standing = "good", obsoletedBy) => { nightly: { url }, + release: { + url: toTr(url) + }, shortname, standing, obsoletedBy @@ -39,31 +45,62 @@ function toEdCrawlResults (standing = "good", replacements) { ]; } -describe('The reference analyser', () => { +describe('The references analyser', () => { it('reports no anomaly if references are not discontinued', () => { const crawlResult = toEdCrawlResults(); - const report = studyReferences(crawlResult); + const report = study(crawlResult); assertNbAnomalies(report, 0); }); it('reports a discontinued reference with a replacement', () => { const crawlResult = toEdCrawlResults("discontinued", ["spec3"]); - const report = studyReferences(crawlResult); + const report = study(crawlResult); assertNbAnomalies(report, 1); assertAnomaly(report, 0, { - category: 'refs', - message: /spec3/ + name: 'discontinuedReferences', + message: /spec3/, + spec: { url: specEdUrl } }); }); it('reports a discontinued reference without a replacement', () => { const crawlResult = toEdCrawlResults("discontinued"); - const report = studyReferences(crawlResult); + const report = study(crawlResult); + assertNbAnomalies(report, 1); + assertAnomaly(report, 0, { + name: 'discontinuedReferences', + message: /no known replacement/, + spec: { url: specEdUrl } + }); + }); + + it('reports a missing reference', () => { + const spec = populateSpec(specEdUrl); + spec.links = { rawlinks: {} }; + spec.links.rawlinks[specEdUrl2] = {}; + const crawlResult = [spec]; + const report = study(crawlResult); assertNbAnomalies(report, 1); assertAnomaly(report, 0, { - category: 'refs', - message: /no known replacement/ + name: 'missingReferences', + message: specEdUrl2, + spec: { url: specEdUrl } }); }); + it('reports an inconsistent reference', () => { + const spec = populateSpec(specEdUrl, toRefs('spec2', toTr(specEdUrl2))); + spec.links = { rawlinks: {} }; + spec.links.rawlinks[specEdUrl2] = {}; + const spec2 = populateSpec(specEdUrl2); + spec2.versions = [toTr(specEdUrl2)]; + const crawlResult = [spec, spec2]; + const report = study(crawlResult); + assertNbAnomalies(report, 1); + assertAnomaly(report, 0, { + name: 'inconsistentReferences', + message: `${specEdUrl2}, related reference "spec2" uses URL ${toTr(specEdUrl2)}`, + spec: { url: specEdUrl } + }); + }); }); diff --git a/test/study-webidl.js b/test/study-webidl.js index 4660b0e0..52e13a3e 100644 --- a/test/study-webidl.js +++ b/test/study-webidl.js @@ -4,7 +4,7 @@ */ /* global describe, it */ -import studyWebIdl from '../src/lib/study-webidl.js'; +import study from '../src/lib/study-webidl.js'; import { assertNbAnomalies, assertAnomaly } from './util.js'; describe('The Web IDL analyser', () => { @@ -21,7 +21,7 @@ describe('The Web IDL analyser', () => { function analyzeIdl (idl, idlSpec2) { const crawlResult = toCrawlResult(idl, idlSpec2); - return studyWebIdl(crawlResult); + return study(crawlResult); } it('reports no anomaly if IDL is valid', () => { @@ -86,12 +86,11 @@ interface Invalid; `); assertNbAnomalies(report, 1); assertAnomaly(report, 0, { - category: 'webidl', name: 'invalid', message: `Syntax error at line 3, since \`interface Invalid\`: interface Invalid; ^ Bodyless interface`, - specs: [{ url: specUrl }] + spec: { url: specUrl } }); }); @@ -105,11 +104,11 @@ interface Invalid; [Global=Window,Exposed=*] interface Valid: Invalid {}; `); - const curatedResult = toCrawlResult(` + const curatedResults = toCrawlResult(` [Global=Window,Exposed=*] interface Invalid{}; `); - const report = studyWebIdl(crawlResult, curatedResult); + const report = study(crawlResult, { curatedResults }); assertNbAnomalies(report, 1); assertAnomaly(report, 0, { name: 'invalid' }); }); diff --git a/test/study.js b/test/study.js new file mode 100644 index 00000000..30c09386 --- /dev/null +++ b/test/study.js @@ -0,0 +1,169 @@ +import study from '../src/lib/study.js'; +import { assertNbAnomalies, assertAnomaly } from './util.js'; + +const specUrl = 'https://w3c.github.io/world/'; +const specUrl2 = 'https://w3c.github.io/universe/'; + +function toTr(url) { + return url.replace('https://w3c.github.io', 'https://www.w3.org/TR'); +} + +function populateSpec(url, crawl) { + const shortname = url.slice(0, -1).split('/').pop(); + const spec = Object.assign({ + shortname, + title: `Hello ${shortname} API`, + url: toTr(url), + nightly: { url }, + release: { url: toTr(url) }, + crawled: url + }, crawl); + return spec; +} + +describe('The main study function', function () { + this.slow(5000); + this.timeout(10000); + + it('reports no anomaly when spec is empty', async function() { + const crawlResult = [{ url: specUrl }]; + const report = await study(crawlResult, { htmlFragments: {} }); + assertNbAnomalies(report.results, 0); + }); + + it('reports anomalies per type and spec by default', async function() { + const crawlResult = [ + populateSpec(specUrl, { error: 'Boo' }), + populateSpec(specUrl2, { error: 'Borked' }) + ]; + const report = await study(crawlResult, { htmlFragments: {} }); + assertNbAnomalies(report.results, 2); + assertAnomaly(report.results, 0, { + title: 'Crawl error in Hello world API', + content: +`While crawling [Hello world API](${specUrl}), the following crawl errors occurred: +* [ ] Boo` + }); + assertAnomaly(report.results, 1, { + title: 'Crawl error in Hello universe API', + content: +`While crawling [Hello universe API](${specUrl2}), the following crawl errors occurred: +* [ ] Borked` + }); + }); + + it('reports anomalies per type when asked', async function() { + const crawlResult = [ + populateSpec(specUrl, { error: 'Boo' }), + populateSpec(specUrl2, { error: 'Borked' }) + ]; + const report = await study(crawlResult, { structure: 'type/spec', htmlFragments: {} }); + assertNbAnomalies(report.results, 1); + assertAnomaly(report.results, 0, { + title: 'Crawl error', + content: +`The following crawl errors occurred: +* [Hello world API](https://w3c.github.io/world/) + * [ ] Boo +* [Hello universe API](https://w3c.github.io/universe/) + * [ ] Borked` + }); + }); + + it('reports anomalies per spec when asked', async function() { + const crawlResult = [ + populateSpec(specUrl, { error: 'Boo' }), + populateSpec(specUrl2, { error: 'Borked' }) + ]; + const report = await study(crawlResult, { structure: 'spec/type', htmlFragments: {} }); + assertNbAnomalies(report.results, 2); + assertAnomaly(report.results, 0, { + title: 'Hello world API', + content: +`While crawling [Hello world API](https://w3c.github.io/world/), the following anomalies were identified: +* Crawl error + * [ ] Boo` + }); + }); + + it('reports anomalies per spec and groups anomalies when asked', async function() { + const crawlResult = [ + populateSpec(specUrl, { error: 'Boo' }), + populateSpec(specUrl2, { error: 'Borked' }) + ]; + const report = await study(crawlResult, { structure: 'spec/group/type', htmlFragments: {} }); + assertNbAnomalies(report.results, 2); + assertAnomaly(report.results, 0, { + title: 'Hello world API', + content: +`While crawling [Hello world API](https://w3c.github.io/world/), the following anomalies were identified: +* Generic + * Crawl error + * [ ] Boo` + }); + }); + + it('reports anomalies per group and spec when asked', async function() { + const crawlResult = [ + populateSpec(specUrl, { error: 'Boo' }), + populateSpec(specUrl2, { error: 'Borked' }) + ]; + const report = await study(crawlResult, { structure: 'group+spec/type', htmlFragments: {} }); + assertNbAnomalies(report.results, 2); + assertAnomaly(report.results, 0, { + title: 'Generic in Hello world API', + content: +`While crawling [Hello world API](https://w3c.github.io/world/), the following errors prevented the spec from being analyzed: +* Crawl error + * [ ] Boo` + }); + }); + + it('reports anomalies per group, with anomaly type as intermediary level, when asked', async function() { + const crawlResult = [ + populateSpec(specUrl, { error: 'Boo' }), + populateSpec(specUrl2, { error: 'Borked' }) + ]; + const report = await study(crawlResult, { structure: 'group/type/spec', htmlFragments: {} }); + assertNbAnomalies(report.results, 1); + assertAnomaly(report.results, 0, { + title: 'Generic', + content: +`The following errors prevented the spec from being analyzed: +* Crawl error + * [Hello world API](https://w3c.github.io/world/) + * [ ] Boo + * [Hello universe API](https://w3c.github.io/universe/) + * [ ] Borked` + }); + }); + + it('reports anomalies per group, with spec as intermediary level, when asked', async function() { + const crawlResult = [ + populateSpec(specUrl, { error: 'Boo' }), + populateSpec(specUrl2, { error: 'Borked' }) + ]; + const report = await study(crawlResult, { structure: 'group/spec/type', htmlFragments: {} }); + assertNbAnomalies(report.results, 1); + assertAnomaly(report.results, 0, { + title: 'Generic', + content: +`The following errors prevented the spec from being analyzed: +* [Hello world API](https://w3c.github.io/world/) + * Crawl error + * [ ] Boo +* [Hello universe API](https://w3c.github.io/universe/) + * Crawl error + * [ ] Borked` + }); + }); + + it('only reports anomalies for requested specs', async function() { + const crawlResult = [ + populateSpec(specUrl, { error: 'Boo' }), + populateSpec(specUrl2, { error: 'Borked' }) + ]; + const report = await study(crawlResult, { specs: ['universe'], htmlFragments: {} }); + assertNbAnomalies(report.results, 1); + }); +}); \ No newline at end of file